extract_artist_track 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2de6a47213e726e3b252c3462a9b460965ad40af
4
+ data.tar.gz: 8b2ec412d56d01bd71ed8e73ed84080445e48894
5
+ SHA512:
6
+ metadata.gz: 695b24b0ce207a1a612d3b35f0262ca155f7e45064b5fa929f23ff97050510460b9c8444ff546749243f38b3854ae438ba91ba6382926ad624d2a19a5bf18407
7
+ data.tar.gz: 91977dc18cffcf2c589313d7ce49108ac6258629e3e3e08040b216ae3ddcfa8bcf82d19bce91267db6d7859f388027549e3c80a5f283918d3ebaed7d7abd75f8
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ .ruby-version
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.3.1
5
+ before_install: gem install bundler -v 1.12.5
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in extract_artist_track.gemspec
4
+ gemspec
@@ -0,0 +1,38 @@
1
+ # ExtractArtistTrack [![CircleCI](https://circleci.com/gh/6/extract_artist_track.svg?style=svg)](https://circleci.com/gh/6/extract_artist_track)
2
+
3
+ Ruby port of [get-artist-title](https://github.com/goto-bus-stop/get-artist-title) for getting the song artist and track from a freeform string, e.g. a YouTube video title.
4
+
5
+ ```ruby
6
+ parser = ExtractArtistTrack::Parser.new(title: "Madeon - The City (Official Video).mp4")
7
+ parser.artist
8
+ => "Madeon"
9
+ parser.track
10
+ => "The City"
11
+
12
+ parser = ExtractArtistTrack::Parser.new(title: "[MV] Perfume「ねぇ」")
13
+ parser.artist
14
+ => "Perfume"
15
+ parser.track
16
+ => "ねぇ"
17
+ ```
18
+
19
+
20
+ ## Installation
21
+
22
+ Add this line to your application's Gemfile:
23
+
24
+ ```ruby
25
+ gem 'extract_artist_track'
26
+ ```
27
+
28
+ And then execute:
29
+
30
+ $ bundle
31
+
32
+
33
+ ## Development
34
+
35
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
36
+
37
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
38
+
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "extract_artist_track"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,3 @@
1
+ machine:
2
+ ruby:
3
+ version: 2.3.0
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'extract_artist_track/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "extract_artist_track"
8
+ spec.version = ExtractArtistTrack::VERSION
9
+ spec.authors = ["Peter Graham"]
10
+
11
+ spec.summary = %q{Extract artist and track from YouTube video title.}
12
+ spec.description = %q{Extract artist and track from a freeform YouTube video title.}
13
+ spec.homepage = "https://github.com/6/extract_artist_track"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
16
+ spec.bindir = "exe"
17
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_dependency "mime-types", "3.1"
21
+ spec.add_development_dependency "bundler", "~> 1.12"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "~> 3.0"
24
+ spec.add_development_dependency "rspec-its", "~> 1.2"
25
+ spec.add_development_dependency "rspec_junit_formatter", "0.2.3"
26
+ end
@@ -0,0 +1,38 @@
1
+ require "ostruct"
2
+ require "mime/types/full"
3
+
4
+ require "extract_artist_track/version"
5
+ require "extract_artist_track/splitter"
6
+ require "extract_artist_track/cleaner"
7
+ require "extract_artist_track/parser"
8
+
9
+ module ExtractArtistTrack
10
+ QUOTES = [
11
+ '“”',
12
+ '""',
13
+ "''",
14
+ '『』',
15
+ '「」',
16
+ '【】',
17
+ '‹›' ,
18
+ '«»',
19
+ ]
20
+
21
+ SEPARATORS = [
22
+ ' -- ',
23
+ '--',
24
+ ' - ',
25
+ ' – ',
26
+ ' — ',
27
+ ' _ ',
28
+ '-',
29
+ '–',
30
+ '—',
31
+ ':',
32
+ '|',
33
+ '///',
34
+ ' / ',
35
+ '_',
36
+ '/',
37
+ ]
38
+ end
@@ -0,0 +1,73 @@
1
+ module ExtractArtistTrack
2
+ class Cleaner
3
+ def self.clean_title(title)
4
+ return if !title.is_a?(String)
5
+ title = clean_file_extension(title)
6
+ clean_fluff(title)
7
+ end
8
+
9
+ def self.clean_artist(artist)
10
+ artist = clean_fluff(artist.strip)
11
+ .gsub(/\s*[0-1][0-9][0-1][0-9][0-3][0-9]\s*/, '') # date formats ex. 130624
12
+ .gsub(/^[\/\s,:;~\-–_\s"]+/, '') # trim starting white chars and dash
13
+ .gsub(/[\/\s,:;~\-–_\s"]+$/, '') # trim trailing white chars and dash
14
+ clean_quotes(artist)
15
+ end
16
+
17
+ def self.clean_track(track)
18
+ track = clean_fluff(track.strip)
19
+ .gsub(/\s*\*+\s?\S+\s?\*+$/, '') # **NEW**
20
+ .gsub(/\s*video\s*clip/i, '') # video clip
21
+ .gsub(/\s+\(?live\)?$/i, '') # live
22
+ .gsub(/\(\s*\)/, '') # Leftovers after e.g. (official video)
23
+ .gsub(/\[\s*\]/, '') # Leftovers after e.g. [1080p]
24
+ .gsub(/【\s*】/, '') # Leftovers after e.g. 【MV】
25
+ .gsub(/^(|.*\s)"(.*)"(\s.*|)$/, '\2') # Artist - The new "Track title" featuring someone
26
+ .gsub(/^(|.*\s)'(.*)'(\s.*|)$/, '\2') # 'Track title'
27
+ .gsub(/^[\/\s,:;~\-–_\s"]+/, '') # trim starting white chars and dash
28
+ .gsub(/[\/\s,:;~\-–_\s"]+$/, '') # trim trailing white chars and dash
29
+ clean_quotes(track)
30
+ end
31
+
32
+ private
33
+
34
+ def self.clean_file_extension(string)
35
+ file_extensions = MIME::Types[/^(audio|video)/].map {|mime| mime.extensions }.flatten.uniq
36
+ file_extensions_regex = %r{\.(#{file_extensions.join("|")})$}i
37
+ string.gsub(file_extensions_regex, '')
38
+ end
39
+
40
+ # Remove various versions of "MV" and "PV" markers
41
+ def self.clean_mvpv(string)
42
+ string
43
+ .gsub(/\s*\[\s*([PM]\/?V)\s*\]/, '') # [MV] or [M/V]
44
+ .gsub(/\s*\(\s*([PM]\/?V)\s*\)/, '') # (MV) or (M/V)
45
+ .gsub(/\s*【\s*([PM]\/?V)\s*】/, '') # 【MV】 or 【M/V】
46
+ .gsub(/[\s\-–_]+([PM]\/?V)\s*/, '') # MV or M/V at the end
47
+ .gsub(/([PM]\/?V)[\s\-–_]+/, '') # MV or M/V at the start
48
+ end
49
+
50
+ def self.clean_fluff(string)
51
+ clean_mvpv(string)
52
+ .gsub(/\s*\[[^\]]+\]$/, '') # [whatever] at the end
53
+ .gsub(/^\s*\[[^\]]+\]\s*/, '') # [whatever] at the start
54
+ .gsub(/\s*\([^\)]*\bver(\.|sion)?\s*\)$/i, '') # (whatever version)
55
+ .gsub(/\s*[a-z]*\s*\bver(\.|sion)?$/i, '') # ver. and 1 word before (no parens)
56
+ .gsub(/\s*(of+icial\s*)?(music\s*)?video/i, '') # (official)? (music)? video
57
+ .gsub(/\s*(ALBUM TRACK\s*)?(album track\s*)/i, '') # (ALBUM TRACK)
58
+ .gsub(/\s*\(\s*of+icial\s*\)/i, '') # (official)
59
+ .gsub(/\s*\(\s*[0-9]{4}\s*\)/i, '') # (1999)
60
+ .gsub(/\s+\(\s*(HD|HQ)\s*\)$/, '') # HD (HQ)
61
+ .gsub(/[\s\-–_]+(HD|HQ)\s*$/, '') # HD (HQ)
62
+ .gsub(/(\s*[-~_\/]\s*)?\b(with\s+)?lyrics\s*/i, '')
63
+ .gsub(/\(\s*(with\s+)?lyrics\s*\)\s*/i, '')
64
+ end
65
+
66
+ def self.clean_quotes(string)
67
+ regexes = QUOTES.map { |set| %r{^#{set[0]}(.*?)#{set[1]}\s*} }
68
+ regexes.reduce(string) do |str, regex|
69
+ str.gsub(regex, '\1 ')
70
+ end.strip
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,29 @@
1
+ module ExtractArtistTrack
2
+ class Parser
3
+ def initialize(title:)
4
+ @title = Cleaner.clean_title(title)
5
+ end
6
+
7
+ def artist
8
+ return @artist if defined?(@artist)
9
+ return @artist = nil if invalid? || splitter.artist.nil?
10
+ @artist = Cleaner.clean_artist(splitter.artist)
11
+ end
12
+
13
+ def track
14
+ return @track if defined?(@track)
15
+ return @track = nil if invalid? || splitter.track.nil?
16
+ @track = Cleaner.clean_track(splitter.track)
17
+ end
18
+
19
+ private
20
+
21
+ def invalid?
22
+ @title.nil? || @title.empty?
23
+ end
24
+
25
+ def splitter
26
+ @splitter ||= Splitter.new(title: @title)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,41 @@
1
+ module ExtractArtistTrack
2
+ class Splitter
3
+ attr_reader :title, :artist, :track
4
+ def initialize(title:)
5
+ @title = title
6
+ return if title.nil? || title.empty?
7
+ split = split_by_separator
8
+ split ||= split_by_quotes
9
+ @artist = split&.artist
10
+ @track = split&.track
11
+ end
12
+
13
+ def split_by_separator
14
+ SEPARATORS.each do |separator|
15
+ index = title.index(separator)
16
+ if index
17
+ return OpenStruct.new({
18
+ artist: title[0..index-1],
19
+ track: title[index..-1],
20
+ })
21
+ end
22
+ end
23
+ nil
24
+ end
25
+
26
+ def split_by_quotes
27
+ regexes = QUOTES.map { |set| %r{#{set[0]}(.*?)#{set[1]}} }
28
+ regexes.each do |regex|
29
+ str = title.gsub(regex) { |match| " #{match} " }
30
+ index = regex =~ str
31
+ if index
32
+ return OpenStruct.new({
33
+ artist: str[0..index-1],
34
+ track: str[index..-1],
35
+ })
36
+ end
37
+ end
38
+ nil
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,3 @@
1
+ module ExtractArtistTrack
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,141 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: extract_artist_track
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Peter Graham
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-09-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mime-types
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: '3.1'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: '3.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.12'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.12'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec-its
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.2'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.2'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec_junit_formatter
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '='
88
+ - !ruby/object:Gem::Version
89
+ version: 0.2.3
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '='
95
+ - !ruby/object:Gem::Version
96
+ version: 0.2.3
97
+ description: Extract artist and track from a freeform YouTube video title.
98
+ email:
99
+ executables: []
100
+ extensions: []
101
+ extra_rdoc_files: []
102
+ files:
103
+ - ".gitignore"
104
+ - ".rspec"
105
+ - ".travis.yml"
106
+ - Gemfile
107
+ - README.md
108
+ - Rakefile
109
+ - bin/console
110
+ - bin/setup
111
+ - circle.yml
112
+ - extract_artist_track.gemspec
113
+ - lib/extract_artist_track.rb
114
+ - lib/extract_artist_track/cleaner.rb
115
+ - lib/extract_artist_track/parser.rb
116
+ - lib/extract_artist_track/splitter.rb
117
+ - lib/extract_artist_track/version.rb
118
+ homepage: https://github.com/6/extract_artist_track
119
+ licenses: []
120
+ metadata: {}
121
+ post_install_message:
122
+ rdoc_options: []
123
+ require_paths:
124
+ - lib
125
+ required_ruby_version: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - ">="
128
+ - !ruby/object:Gem::Version
129
+ version: '0'
130
+ required_rubygems_version: !ruby/object:Gem::Requirement
131
+ requirements:
132
+ - - ">="
133
+ - !ruby/object:Gem::Version
134
+ version: '0'
135
+ requirements: []
136
+ rubyforge_project:
137
+ rubygems_version: 2.5.1
138
+ signing_key:
139
+ specification_version: 4
140
+ summary: Extract artist and track from YouTube video title.
141
+ test_files: []