extract_artist_track 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2de6a47213e726e3b252c3462a9b460965ad40af
4
+ data.tar.gz: 8b2ec412d56d01bd71ed8e73ed84080445e48894
5
+ SHA512:
6
+ metadata.gz: 695b24b0ce207a1a612d3b35f0262ca155f7e45064b5fa929f23ff97050510460b9c8444ff546749243f38b3854ae438ba91ba6382926ad624d2a19a5bf18407
7
+ data.tar.gz: 91977dc18cffcf2c589313d7ce49108ac6258629e3e3e08040b216ae3ddcfa8bcf82d19bce91267db6d7859f388027549e3c80a5f283918d3ebaed7d7abd75f8
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ .ruby-version
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.3.1
5
+ before_install: gem install bundler -v 1.12.5
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in extract_artist_track.gemspec
4
+ gemspec
@@ -0,0 +1,38 @@
1
+ # ExtractArtistTrack [![CircleCI](https://circleci.com/gh/6/extract_artist_track.svg?style=svg)](https://circleci.com/gh/6/extract_artist_track)
2
+
3
+ Ruby port of [get-artist-title](https://github.com/goto-bus-stop/get-artist-title) for getting the song artist and track from a freeform string, e.g. a YouTube video title.
4
+
5
+ ```ruby
6
+ parser = ExtractArtistTrack::Parser.new(title: "Madeon - The City (Official Video).mp4")
7
+ parser.artist
8
+ => "Madeon"
9
+ parser.track
10
+ => "The City"
11
+
12
+ parser = ExtractArtistTrack::Parser.new(title: "[MV] Perfume「ねぇ」")
13
+ parser.artist
14
+ => "Perfume"
15
+ parser.track
16
+ => "ねぇ"
17
+ ```
18
+
19
+
20
+ ## Installation
21
+
22
+ Add this line to your application's Gemfile:
23
+
24
+ ```ruby
25
+ gem 'extract_artist_track'
26
+ ```
27
+
28
+ And then execute:
29
+
30
+ $ bundle
31
+
32
+
33
+ ## Development
34
+
35
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
36
+
37
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
38
+
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "extract_artist_track"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,3 @@
1
+ machine:
2
+ ruby:
3
+ version: 2.3.0
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'extract_artist_track/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "extract_artist_track"
8
+ spec.version = ExtractArtistTrack::VERSION
9
+ spec.authors = ["Peter Graham"]
10
+
11
+ spec.summary = %q{Extract artist and track from YouTube video title.}
12
+ spec.description = %q{Extract artist and track from a freeform YouTube video title.}
13
+ spec.homepage = "https://github.com/6/extract_artist_track"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
16
+ spec.bindir = "exe"
17
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_dependency "mime-types", "3.1"
21
+ spec.add_development_dependency "bundler", "~> 1.12"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "~> 3.0"
24
+ spec.add_development_dependency "rspec-its", "~> 1.2"
25
+ spec.add_development_dependency "rspec_junit_formatter", "0.2.3"
26
+ end
@@ -0,0 +1,38 @@
1
+ require "ostruct"
2
+ require "mime/types/full"
3
+
4
+ require "extract_artist_track/version"
5
+ require "extract_artist_track/splitter"
6
+ require "extract_artist_track/cleaner"
7
+ require "extract_artist_track/parser"
8
+
9
+ module ExtractArtistTrack
10
+ QUOTES = [
11
+ '“”',
12
+ '""',
13
+ "''",
14
+ '『』',
15
+ '「」',
16
+ '【】',
17
+ '‹›' ,
18
+ '«»',
19
+ ]
20
+
21
+ SEPARATORS = [
22
+ ' -- ',
23
+ '--',
24
+ ' - ',
25
+ ' – ',
26
+ ' — ',
27
+ ' _ ',
28
+ '-',
29
+ '–',
30
+ '—',
31
+ ':',
32
+ '|',
33
+ '///',
34
+ ' / ',
35
+ '_',
36
+ '/',
37
+ ]
38
+ end
@@ -0,0 +1,73 @@
1
+ module ExtractArtistTrack
2
+ class Cleaner
3
+ def self.clean_title(title)
4
+ return if !title.is_a?(String)
5
+ title = clean_file_extension(title)
6
+ clean_fluff(title)
7
+ end
8
+
9
+ def self.clean_artist(artist)
10
+ artist = clean_fluff(artist.strip)
11
+ .gsub(/\s*[0-1][0-9][0-1][0-9][0-3][0-9]\s*/, '') # date formats ex. 130624
12
+ .gsub(/^[\/\s,:;~\-–_\s"]+/, '') # trim starting white chars and dash
13
+ .gsub(/[\/\s,:;~\-–_\s"]+$/, '') # trim trailing white chars and dash
14
+ clean_quotes(artist)
15
+ end
16
+
17
+ def self.clean_track(track)
18
+ track = clean_fluff(track.strip)
19
+ .gsub(/\s*\*+\s?\S+\s?\*+$/, '') # **NEW**
20
+ .gsub(/\s*video\s*clip/i, '') # video clip
21
+ .gsub(/\s+\(?live\)?$/i, '') # live
22
+ .gsub(/\(\s*\)/, '') # Leftovers after e.g. (official video)
23
+ .gsub(/\[\s*\]/, '') # Leftovers after e.g. [1080p]
24
+ .gsub(/【\s*】/, '') # Leftovers after e.g. 【MV】
25
+ .gsub(/^(|.*\s)"(.*)"(\s.*|)$/, '\2') # Artist - The new "Track title" featuring someone
26
+ .gsub(/^(|.*\s)'(.*)'(\s.*|)$/, '\2') # 'Track title'
27
+ .gsub(/^[\/\s,:;~\-–_\s"]+/, '') # trim starting white chars and dash
28
+ .gsub(/[\/\s,:;~\-–_\s"]+$/, '') # trim trailing white chars and dash
29
+ clean_quotes(track)
30
+ end
31
+
32
+ private
33
+
34
+ def self.clean_file_extension(string)
35
+ file_extensions = MIME::Types[/^(audio|video)/].map {|mime| mime.extensions }.flatten.uniq
36
+ file_extensions_regex = %r{\.(#{file_extensions.join("|")})$}i
37
+ string.gsub(file_extensions_regex, '')
38
+ end
39
+
40
+ # Remove various versions of "MV" and "PV" markers
41
+ def self.clean_mvpv(string)
42
+ string
43
+ .gsub(/\s*\[\s*([PM]\/?V)\s*\]/, '') # [MV] or [M/V]
44
+ .gsub(/\s*\(\s*([PM]\/?V)\s*\)/, '') # (MV) or (M/V)
45
+ .gsub(/\s*【\s*([PM]\/?V)\s*】/, '') # 【MV】 or 【M/V】
46
+ .gsub(/[\s\-–_]+([PM]\/?V)\s*/, '') # MV or M/V at the end
47
+ .gsub(/([PM]\/?V)[\s\-–_]+/, '') # MV or M/V at the start
48
+ end
49
+
50
+ def self.clean_fluff(string)
51
+ clean_mvpv(string)
52
+ .gsub(/\s*\[[^\]]+\]$/, '') # [whatever] at the end
53
+ .gsub(/^\s*\[[^\]]+\]\s*/, '') # [whatever] at the start
54
+ .gsub(/\s*\([^\)]*\bver(\.|sion)?\s*\)$/i, '') # (whatever version)
55
+ .gsub(/\s*[a-z]*\s*\bver(\.|sion)?$/i, '') # ver. and 1 word before (no parens)
56
+ .gsub(/\s*(of+icial\s*)?(music\s*)?video/i, '') # (official)? (music)? video
57
+ .gsub(/\s*(ALBUM TRACK\s*)?(album track\s*)/i, '') # (ALBUM TRACK)
58
+ .gsub(/\s*\(\s*of+icial\s*\)/i, '') # (official)
59
+ .gsub(/\s*\(\s*[0-9]{4}\s*\)/i, '') # (1999)
60
+ .gsub(/\s+\(\s*(HD|HQ)\s*\)$/, '') # HD (HQ)
61
+ .gsub(/[\s\-–_]+(HD|HQ)\s*$/, '') # HD (HQ)
62
+ .gsub(/(\s*[-~_\/]\s*)?\b(with\s+)?lyrics\s*/i, '')
63
+ .gsub(/\(\s*(with\s+)?lyrics\s*\)\s*/i, '')
64
+ end
65
+
66
+ def self.clean_quotes(string)
67
+ regexes = QUOTES.map { |set| %r{^#{set[0]}(.*?)#{set[1]}\s*} }
68
+ regexes.reduce(string) do |str, regex|
69
+ str.gsub(regex, '\1 ')
70
+ end.strip
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,29 @@
1
+ module ExtractArtistTrack
2
+ class Parser
3
+ def initialize(title:)
4
+ @title = Cleaner.clean_title(title)
5
+ end
6
+
7
+ def artist
8
+ return @artist if defined?(@artist)
9
+ return @artist = nil if invalid? || splitter.artist.nil?
10
+ @artist = Cleaner.clean_artist(splitter.artist)
11
+ end
12
+
13
+ def track
14
+ return @track if defined?(@track)
15
+ return @track = nil if invalid? || splitter.track.nil?
16
+ @track = Cleaner.clean_track(splitter.track)
17
+ end
18
+
19
+ private
20
+
21
+ def invalid?
22
+ @title.nil? || @title.empty?
23
+ end
24
+
25
+ def splitter
26
+ @splitter ||= Splitter.new(title: @title)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,41 @@
1
+ module ExtractArtistTrack
2
+ class Splitter
3
+ attr_reader :title, :artist, :track
4
+ def initialize(title:)
5
+ @title = title
6
+ return if title.nil? || title.empty?
7
+ split = split_by_separator
8
+ split ||= split_by_quotes
9
+ @artist = split&.artist
10
+ @track = split&.track
11
+ end
12
+
13
+ def split_by_separator
14
+ SEPARATORS.each do |separator|
15
+ index = title.index(separator)
16
+ if index
17
+ return OpenStruct.new({
18
+ artist: title[0..index-1],
19
+ track: title[index..-1],
20
+ })
21
+ end
22
+ end
23
+ nil
24
+ end
25
+
26
+ def split_by_quotes
27
+ regexes = QUOTES.map { |set| %r{#{set[0]}(.*?)#{set[1]}} }
28
+ regexes.each do |regex|
29
+ str = title.gsub(regex) { |match| " #{match} " }
30
+ index = regex =~ str
31
+ if index
32
+ return OpenStruct.new({
33
+ artist: str[0..index-1],
34
+ track: str[index..-1],
35
+ })
36
+ end
37
+ end
38
+ nil
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,3 @@
1
+ module ExtractArtistTrack
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,141 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: extract_artist_track
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Peter Graham
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-09-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mime-types
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: '3.1'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: '3.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.12'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.12'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec-its
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.2'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.2'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec_junit_formatter
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '='
88
+ - !ruby/object:Gem::Version
89
+ version: 0.2.3
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '='
95
+ - !ruby/object:Gem::Version
96
+ version: 0.2.3
97
+ description: Extract artist and track from a freeform YouTube video title.
98
+ email:
99
+ executables: []
100
+ extensions: []
101
+ extra_rdoc_files: []
102
+ files:
103
+ - ".gitignore"
104
+ - ".rspec"
105
+ - ".travis.yml"
106
+ - Gemfile
107
+ - README.md
108
+ - Rakefile
109
+ - bin/console
110
+ - bin/setup
111
+ - circle.yml
112
+ - extract_artist_track.gemspec
113
+ - lib/extract_artist_track.rb
114
+ - lib/extract_artist_track/cleaner.rb
115
+ - lib/extract_artist_track/parser.rb
116
+ - lib/extract_artist_track/splitter.rb
117
+ - lib/extract_artist_track/version.rb
118
+ homepage: https://github.com/6/extract_artist_track
119
+ licenses: []
120
+ metadata: {}
121
+ post_install_message:
122
+ rdoc_options: []
123
+ require_paths:
124
+ - lib
125
+ required_ruby_version: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - ">="
128
+ - !ruby/object:Gem::Version
129
+ version: '0'
130
+ required_rubygems_version: !ruby/object:Gem::Requirement
131
+ requirements:
132
+ - - ">="
133
+ - !ruby/object:Gem::Version
134
+ version: '0'
135
+ requirements: []
136
+ rubyforge_project:
137
+ rubygems_version: 2.5.1
138
+ signing_key:
139
+ specification_version: 4
140
+ summary: Extract artist and track from YouTube video title.
141
+ test_files: []