extract_artist_track 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rspec +3 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/README.md +38 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/circle.yml +3 -0
- data/extract_artist_track.gemspec +26 -0
- data/lib/extract_artist_track.rb +38 -0
- data/lib/extract_artist_track/cleaner.rb +73 -0
- data/lib/extract_artist_track/parser.rb +29 -0
- data/lib/extract_artist_track/splitter.rb +41 -0
- data/lib/extract_artist_track/version.rb +3 -0
- metadata +141 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2de6a47213e726e3b252c3462a9b460965ad40af
|
4
|
+
data.tar.gz: 8b2ec412d56d01bd71ed8e73ed84080445e48894
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 695b24b0ce207a1a612d3b35f0262ca155f7e45064b5fa929f23ff97050510460b9c8444ff546749243f38b3854ae438ba91ba6382926ad624d2a19a5bf18407
|
7
|
+
data.tar.gz: 91977dc18cffcf2c589313d7ce49108ac6258629e3e3e08040b216ae3ddcfa8bcf82d19bce91267db6d7859f388027549e3c80a5f283918d3ebaed7d7abd75f8
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# ExtractArtistTrack [](https://circleci.com/gh/6/extract_artist_track)
|
2
|
+
|
3
|
+
Ruby port of [get-artist-title](https://github.com/goto-bus-stop/get-artist-title) for getting the song artist and track from a freeform string, e.g. a YouTube video title.
|
4
|
+
|
5
|
+
```ruby
|
6
|
+
parser = ExtractArtistTrack::Parser.new(title: "Madeon - The City (Official Video).mp4")
|
7
|
+
parser.artist
|
8
|
+
=> "Madeon"
|
9
|
+
parser.track
|
10
|
+
=> "The City"
|
11
|
+
|
12
|
+
parser = ExtractArtistTrack::Parser.new(title: "[MV] Perfume「ねぇ」")
|
13
|
+
parser.artist
|
14
|
+
=> "Perfume"
|
15
|
+
parser.track
|
16
|
+
=> "ねぇ"
|
17
|
+
```
|
18
|
+
|
19
|
+
|
20
|
+
## Installation
|
21
|
+
|
22
|
+
Add this line to your application's Gemfile:
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
gem 'extract_artist_track'
|
26
|
+
```
|
27
|
+
|
28
|
+
And then execute:
|
29
|
+
|
30
|
+
$ bundle
|
31
|
+
|
32
|
+
|
33
|
+
## Development
|
34
|
+
|
35
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
36
|
+
|
37
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
38
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "extract_artist_track"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/circle.yml
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'extract_artist_track/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "extract_artist_track"
|
8
|
+
spec.version = ExtractArtistTrack::VERSION
|
9
|
+
spec.authors = ["Peter Graham"]
|
10
|
+
|
11
|
+
spec.summary = %q{Extract artist and track from YouTube video title.}
|
12
|
+
spec.description = %q{Extract artist and track from a freeform YouTube video title.}
|
13
|
+
spec.homepage = "https://github.com/6/extract_artist_track"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
16
|
+
spec.bindir = "exe"
|
17
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_dependency "mime-types", "3.1"
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.12"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
24
|
+
spec.add_development_dependency "rspec-its", "~> 1.2"
|
25
|
+
spec.add_development_dependency "rspec_junit_formatter", "0.2.3"
|
26
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require "ostruct"
|
2
|
+
require "mime/types/full"
|
3
|
+
|
4
|
+
require "extract_artist_track/version"
|
5
|
+
require "extract_artist_track/splitter"
|
6
|
+
require "extract_artist_track/cleaner"
|
7
|
+
require "extract_artist_track/parser"
|
8
|
+
|
9
|
+
module ExtractArtistTrack
|
10
|
+
QUOTES = [
|
11
|
+
'“”',
|
12
|
+
'""',
|
13
|
+
"''",
|
14
|
+
'『』',
|
15
|
+
'「」',
|
16
|
+
'【】',
|
17
|
+
'‹›' ,
|
18
|
+
'«»',
|
19
|
+
]
|
20
|
+
|
21
|
+
SEPARATORS = [
|
22
|
+
' -- ',
|
23
|
+
'--',
|
24
|
+
' - ',
|
25
|
+
' – ',
|
26
|
+
' — ',
|
27
|
+
' _ ',
|
28
|
+
'-',
|
29
|
+
'–',
|
30
|
+
'—',
|
31
|
+
':',
|
32
|
+
'|',
|
33
|
+
'///',
|
34
|
+
' / ',
|
35
|
+
'_',
|
36
|
+
'/',
|
37
|
+
]
|
38
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module ExtractArtistTrack
|
2
|
+
class Cleaner
|
3
|
+
def self.clean_title(title)
|
4
|
+
return if !title.is_a?(String)
|
5
|
+
title = clean_file_extension(title)
|
6
|
+
clean_fluff(title)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.clean_artist(artist)
|
10
|
+
artist = clean_fluff(artist.strip)
|
11
|
+
.gsub(/\s*[0-1][0-9][0-1][0-9][0-3][0-9]\s*/, '') # date formats ex. 130624
|
12
|
+
.gsub(/^[\/\s,:;~\-–_\s"]+/, '') # trim starting white chars and dash
|
13
|
+
.gsub(/[\/\s,:;~\-–_\s"]+$/, '') # trim trailing white chars and dash
|
14
|
+
clean_quotes(artist)
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.clean_track(track)
|
18
|
+
track = clean_fluff(track.strip)
|
19
|
+
.gsub(/\s*\*+\s?\S+\s?\*+$/, '') # **NEW**
|
20
|
+
.gsub(/\s*video\s*clip/i, '') # video clip
|
21
|
+
.gsub(/\s+\(?live\)?$/i, '') # live
|
22
|
+
.gsub(/\(\s*\)/, '') # Leftovers after e.g. (official video)
|
23
|
+
.gsub(/\[\s*\]/, '') # Leftovers after e.g. [1080p]
|
24
|
+
.gsub(/【\s*】/, '') # Leftovers after e.g. 【MV】
|
25
|
+
.gsub(/^(|.*\s)"(.*)"(\s.*|)$/, '\2') # Artist - The new "Track title" featuring someone
|
26
|
+
.gsub(/^(|.*\s)'(.*)'(\s.*|)$/, '\2') # 'Track title'
|
27
|
+
.gsub(/^[\/\s,:;~\-–_\s"]+/, '') # trim starting white chars and dash
|
28
|
+
.gsub(/[\/\s,:;~\-–_\s"]+$/, '') # trim trailing white chars and dash
|
29
|
+
clean_quotes(track)
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def self.clean_file_extension(string)
|
35
|
+
file_extensions = MIME::Types[/^(audio|video)/].map {|mime| mime.extensions }.flatten.uniq
|
36
|
+
file_extensions_regex = %r{\.(#{file_extensions.join("|")})$}i
|
37
|
+
string.gsub(file_extensions_regex, '')
|
38
|
+
end
|
39
|
+
|
40
|
+
# Remove various versions of "MV" and "PV" markers
|
41
|
+
def self.clean_mvpv(string)
|
42
|
+
string
|
43
|
+
.gsub(/\s*\[\s*([PM]\/?V)\s*\]/, '') # [MV] or [M/V]
|
44
|
+
.gsub(/\s*\(\s*([PM]\/?V)\s*\)/, '') # (MV) or (M/V)
|
45
|
+
.gsub(/\s*【\s*([PM]\/?V)\s*】/, '') # 【MV】 or 【M/V】
|
46
|
+
.gsub(/[\s\-–_]+([PM]\/?V)\s*/, '') # MV or M/V at the end
|
47
|
+
.gsub(/([PM]\/?V)[\s\-–_]+/, '') # MV or M/V at the start
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.clean_fluff(string)
|
51
|
+
clean_mvpv(string)
|
52
|
+
.gsub(/\s*\[[^\]]+\]$/, '') # [whatever] at the end
|
53
|
+
.gsub(/^\s*\[[^\]]+\]\s*/, '') # [whatever] at the start
|
54
|
+
.gsub(/\s*\([^\)]*\bver(\.|sion)?\s*\)$/i, '') # (whatever version)
|
55
|
+
.gsub(/\s*[a-z]*\s*\bver(\.|sion)?$/i, '') # ver. and 1 word before (no parens)
|
56
|
+
.gsub(/\s*(of+icial\s*)?(music\s*)?video/i, '') # (official)? (music)? video
|
57
|
+
.gsub(/\s*(ALBUM TRACK\s*)?(album track\s*)/i, '') # (ALBUM TRACK)
|
58
|
+
.gsub(/\s*\(\s*of+icial\s*\)/i, '') # (official)
|
59
|
+
.gsub(/\s*\(\s*[0-9]{4}\s*\)/i, '') # (1999)
|
60
|
+
.gsub(/\s+\(\s*(HD|HQ)\s*\)$/, '') # HD (HQ)
|
61
|
+
.gsub(/[\s\-–_]+(HD|HQ)\s*$/, '') # HD (HQ)
|
62
|
+
.gsub(/(\s*[-~_\/]\s*)?\b(with\s+)?lyrics\s*/i, '')
|
63
|
+
.gsub(/\(\s*(with\s+)?lyrics\s*\)\s*/i, '')
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.clean_quotes(string)
|
67
|
+
regexes = QUOTES.map { |set| %r{^#{set[0]}(.*?)#{set[1]}\s*} }
|
68
|
+
regexes.reduce(string) do |str, regex|
|
69
|
+
str.gsub(regex, '\1 ')
|
70
|
+
end.strip
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module ExtractArtistTrack
|
2
|
+
class Parser
|
3
|
+
def initialize(title:)
|
4
|
+
@title = Cleaner.clean_title(title)
|
5
|
+
end
|
6
|
+
|
7
|
+
def artist
|
8
|
+
return @artist if defined?(@artist)
|
9
|
+
return @artist = nil if invalid? || splitter.artist.nil?
|
10
|
+
@artist = Cleaner.clean_artist(splitter.artist)
|
11
|
+
end
|
12
|
+
|
13
|
+
def track
|
14
|
+
return @track if defined?(@track)
|
15
|
+
return @track = nil if invalid? || splitter.track.nil?
|
16
|
+
@track = Cleaner.clean_track(splitter.track)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def invalid?
|
22
|
+
@title.nil? || @title.empty?
|
23
|
+
end
|
24
|
+
|
25
|
+
def splitter
|
26
|
+
@splitter ||= Splitter.new(title: @title)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module ExtractArtistTrack
|
2
|
+
class Splitter
|
3
|
+
attr_reader :title, :artist, :track
|
4
|
+
def initialize(title:)
|
5
|
+
@title = title
|
6
|
+
return if title.nil? || title.empty?
|
7
|
+
split = split_by_separator
|
8
|
+
split ||= split_by_quotes
|
9
|
+
@artist = split&.artist
|
10
|
+
@track = split&.track
|
11
|
+
end
|
12
|
+
|
13
|
+
def split_by_separator
|
14
|
+
SEPARATORS.each do |separator|
|
15
|
+
index = title.index(separator)
|
16
|
+
if index
|
17
|
+
return OpenStruct.new({
|
18
|
+
artist: title[0..index-1],
|
19
|
+
track: title[index..-1],
|
20
|
+
})
|
21
|
+
end
|
22
|
+
end
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
|
26
|
+
def split_by_quotes
|
27
|
+
regexes = QUOTES.map { |set| %r{#{set[0]}(.*?)#{set[1]}} }
|
28
|
+
regexes.each do |regex|
|
29
|
+
str = title.gsub(regex) { |match| " #{match} " }
|
30
|
+
index = regex =~ str
|
31
|
+
if index
|
32
|
+
return OpenStruct.new({
|
33
|
+
artist: str[0..index-1],
|
34
|
+
track: str[index..-1],
|
35
|
+
})
|
36
|
+
end
|
37
|
+
end
|
38
|
+
nil
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
metadata
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: extract_artist_track
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Peter Graham
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-09-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mime-types
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '3.1'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3.1'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.12'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.12'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec-its
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.2'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.2'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rspec_junit_formatter
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.2.3
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 0.2.3
|
97
|
+
description: Extract artist and track from a freeform YouTube video title.
|
98
|
+
email:
|
99
|
+
executables: []
|
100
|
+
extensions: []
|
101
|
+
extra_rdoc_files: []
|
102
|
+
files:
|
103
|
+
- ".gitignore"
|
104
|
+
- ".rspec"
|
105
|
+
- ".travis.yml"
|
106
|
+
- Gemfile
|
107
|
+
- README.md
|
108
|
+
- Rakefile
|
109
|
+
- bin/console
|
110
|
+
- bin/setup
|
111
|
+
- circle.yml
|
112
|
+
- extract_artist_track.gemspec
|
113
|
+
- lib/extract_artist_track.rb
|
114
|
+
- lib/extract_artist_track/cleaner.rb
|
115
|
+
- lib/extract_artist_track/parser.rb
|
116
|
+
- lib/extract_artist_track/splitter.rb
|
117
|
+
- lib/extract_artist_track/version.rb
|
118
|
+
homepage: https://github.com/6/extract_artist_track
|
119
|
+
licenses: []
|
120
|
+
metadata: {}
|
121
|
+
post_install_message:
|
122
|
+
rdoc_options: []
|
123
|
+
require_paths:
|
124
|
+
- lib
|
125
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - ">="
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0'
|
130
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
131
|
+
requirements:
|
132
|
+
- - ">="
|
133
|
+
- !ruby/object:Gem::Version
|
134
|
+
version: '0'
|
135
|
+
requirements: []
|
136
|
+
rubyforge_project:
|
137
|
+
rubygems_version: 2.5.1
|
138
|
+
signing_key:
|
139
|
+
specification_version: 4
|
140
|
+
summary: Extract artist and track from YouTube video title.
|
141
|
+
test_files: []
|