metal_archives 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +3 -0
- data/LICENSE +0 -0
- data/README.md +88 -0
- data/Rakefile +14 -0
- data/lib/metal_archives.rb +22 -0
- data/lib/metal_archives/configuration.rb +81 -0
- data/lib/metal_archives/error.rb +35 -0
- data/lib/metal_archives/http_client.rb +75 -0
- data/lib/metal_archives/models/artist.rb +216 -0
- data/lib/metal_archives/models/band.rb +291 -0
- data/lib/metal_archives/models/base_model.rb +153 -0
- data/lib/metal_archives/models/label.rb +112 -0
- data/lib/metal_archives/models/range.rb +58 -0
- data/lib/metal_archives/parsers/artist.rb +103 -0
- data/lib/metal_archives/parsers/band.rb +160 -0
- data/lib/metal_archives/parsers/label.rb +68 -0
- data/lib/metal_archives/parsers/parser_helper.rb +79 -0
- data/lib/metal_archives/version.rb +6 -0
- data/metal_archives.gemspec +28 -0
- data/test/base_model_test.rb +87 -0
- data/test/configuration_test.rb +57 -0
- data/test/parser_helper_test.rb +37 -0
- data/test/property/artist_property_test.rb +43 -0
- data/test/property/band_property_test.rb +94 -0
- data/test/query/artist_query_test.rb +44 -0
- data/test/query/band_query_test.rb +84 -0
- data/test/range_test.rb +41 -0
- data/test/test_helper.rb +26 -0
- metadata +214 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
module MetalArchives
|
2
|
+
##
|
3
|
+
# Range which can start and/or end with +nil+
|
4
|
+
#
|
5
|
+
class Range
|
6
|
+
include Comparable
|
7
|
+
|
8
|
+
##
|
9
|
+
# Begin- and endpoint of range
|
10
|
+
#
|
11
|
+
attr_accessor :begin, :end
|
12
|
+
|
13
|
+
##
|
14
|
+
# Create a new range
|
15
|
+
#
|
16
|
+
# [+_begin+]
|
17
|
+
# Start of range
|
18
|
+
#
|
19
|
+
# Default: +nil+
|
20
|
+
#
|
21
|
+
# [+_end+]
|
22
|
+
# End of range
|
23
|
+
#
|
24
|
+
# Default: +nil+
|
25
|
+
#
|
26
|
+
def initialize(_begin = nil, _end = nil)
|
27
|
+
@begin = _begin
|
28
|
+
@end = _end
|
29
|
+
end
|
30
|
+
|
31
|
+
##
|
32
|
+
# Whether start of range is present
|
33
|
+
#
|
34
|
+
def begin?
|
35
|
+
!!@begin
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# Whether end of range is present
|
40
|
+
#
|
41
|
+
def end?
|
42
|
+
!!@end
|
43
|
+
end
|
44
|
+
|
45
|
+
##
|
46
|
+
# Comparison operator
|
47
|
+
#
|
48
|
+
def <=>(other)
|
49
|
+
comparison = self.begin <=> other.begin
|
50
|
+
|
51
|
+
if comparison == 0
|
52
|
+
return self.end <=> other.end
|
53
|
+
else
|
54
|
+
return comparison
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'date'
|
3
|
+
require 'countries'
|
4
|
+
|
5
|
+
module MetalArchives
|
6
|
+
module Parsers
|
7
|
+
##
|
8
|
+
# Artist parser
|
9
|
+
#
|
10
|
+
class Artist # :nodoc:
|
11
|
+
class << self
|
12
|
+
##
|
13
|
+
# Map attributes to MA attributes
|
14
|
+
#
|
15
|
+
# Returns +Hash+
|
16
|
+
#
|
17
|
+
# [+params+]
|
18
|
+
# +Hash+
|
19
|
+
#
|
20
|
+
def map_params(query)
|
21
|
+
params = {
|
22
|
+
:query => query[:name] || '',
|
23
|
+
|
24
|
+
:iDisplayStart => query[:iDisplayStart] || 0
|
25
|
+
}
|
26
|
+
|
27
|
+
params
|
28
|
+
end
|
29
|
+
|
30
|
+
def parse_html(response)
|
31
|
+
props = {}
|
32
|
+
doc = Nokogiri::HTML response
|
33
|
+
|
34
|
+
doc.css('#member_info dl').each do |dl|
|
35
|
+
dl.css('dt').each do |dt|
|
36
|
+
case dt.content.strip
|
37
|
+
when 'Real/full name:'
|
38
|
+
props[:name] = dt.next_element.content.strip
|
39
|
+
when 'Age:'
|
40
|
+
break if dt.next_element.content == 'N/A'
|
41
|
+
date = dt.next_element.content.gsub(/ [0-9]* \(born ([^\)]*)\)/, '\1')
|
42
|
+
props[:date_of_birth] = Date.parse date
|
43
|
+
when 'R.I.P.:'
|
44
|
+
break if dt.next_element.content == 'N/A'
|
45
|
+
props[:date_of_death] = Date.parse dt.next_element.content
|
46
|
+
when 'Died of:'
|
47
|
+
break if dt.next_element.content = 'N/A'
|
48
|
+
props[:cause_of_death] = dt.next_element.content
|
49
|
+
when 'Place of origin:'
|
50
|
+
break if dt.next_element.content == 'N/A'
|
51
|
+
props[:country] = ISO3166::Country.find_country_by_name(dt.next_element.css('a').first.content)
|
52
|
+
location = dt.next_element.xpath('text()').map { |x| x.content }.join('').strip.gsub(/[()]/, '')
|
53
|
+
props[:location] = location unless location.empty?
|
54
|
+
when 'Gender:'
|
55
|
+
break if dt.next_element.content == 'N/A'
|
56
|
+
case dt.next_element.content
|
57
|
+
when 'Male'
|
58
|
+
props[:gender] = :male
|
59
|
+
when 'Female'
|
60
|
+
props[:gender] = :female
|
61
|
+
else
|
62
|
+
raise Errors::ParserError, "Unknown gender: #{dt.next_element.content}"
|
63
|
+
end
|
64
|
+
else
|
65
|
+
raise Errors::ParserError, "Unknown token: #{dt.content}"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
props[:aliases] = []
|
71
|
+
alt = doc.css('.band_member_name').first.content
|
72
|
+
props[:aliases] << alt unless props[:name] == alt
|
73
|
+
|
74
|
+
props
|
75
|
+
end
|
76
|
+
|
77
|
+
def parse_links_html(response)
|
78
|
+
links = []
|
79
|
+
|
80
|
+
doc = Nokogiri::HTML response
|
81
|
+
|
82
|
+
# Default to official links
|
83
|
+
type = :official
|
84
|
+
|
85
|
+
doc.css('#linksTablemain tr').each do |row|
|
86
|
+
if row['id'] =~ /^header_/
|
87
|
+
type = row['id'].gsub(/^header_/, '').downcase.to_sym
|
88
|
+
else
|
89
|
+
a = row.css('td a').first
|
90
|
+
links << {
|
91
|
+
:url => a['href'],
|
92
|
+
:type => type,
|
93
|
+
:title => a.content
|
94
|
+
}
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
links
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,160 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'date'
|
3
|
+
require 'countries'
|
4
|
+
|
5
|
+
module MetalArchives
|
6
|
+
module Parsers
|
7
|
+
##
|
8
|
+
# Band parser
|
9
|
+
#
|
10
|
+
class Band # :nodoc:
|
11
|
+
class << self
|
12
|
+
##
|
13
|
+
# Map attributes to MA attributes
|
14
|
+
#
|
15
|
+
# Returns +Hash+
|
16
|
+
#
|
17
|
+
# [+params+]
|
18
|
+
# +Hash+
|
19
|
+
#
|
20
|
+
def map_params(query)
|
21
|
+
params = {
|
22
|
+
:bandName => query[:name] || '',
|
23
|
+
:exactBandMatch => (!!query[:exact] ? 1 : 0),
|
24
|
+
:genre => query[:genre] || '',
|
25
|
+
:yearCreationFrom => (query[:year] and query[:year].begin ? query[:year].begin.year : '') || '',
|
26
|
+
:yearCreationTo => (query[:year] and query[:year].end ? query[:year].end.year : '') || '',
|
27
|
+
:bandNotes => query[:comment] || '',
|
28
|
+
:status => map_status(query[:status]),
|
29
|
+
:themes => query[:lyrical_themes] || '',
|
30
|
+
:location => query[:location] || '',
|
31
|
+
:bandLabelName => query[:label] || '',
|
32
|
+
:indieLabelBand => (!!query[:independent] ? 1 : 0),
|
33
|
+
|
34
|
+
:iDisplayStart => query[:iDisplayStart] || 0
|
35
|
+
}
|
36
|
+
|
37
|
+
params[:country] = []
|
38
|
+
Array(query[:country]).each do |country|
|
39
|
+
params[:country] << (country.is_a?(ISO3166::Country) ? country.alpha2 : (country || ''))
|
40
|
+
end
|
41
|
+
params[:country] = params[:country].first if (params[:country].size == 1)
|
42
|
+
|
43
|
+
params
|
44
|
+
end
|
45
|
+
|
46
|
+
def parse_html(response)
|
47
|
+
props = {}
|
48
|
+
doc = Nokogiri::HTML response
|
49
|
+
|
50
|
+
props[:name] = doc.css('#band_info .band_name a').first.content
|
51
|
+
|
52
|
+
props[:aliases] = []
|
53
|
+
props[:logo] = doc.css('.band_name_img img').first.attr('src') unless doc.css('.band_name_img').empty?
|
54
|
+
props[:photo] = doc.css('.band_img img').first.attr('src') unless doc.css('.band_img').empty?
|
55
|
+
|
56
|
+
doc.css('#band_stats dl').each do |dl|
|
57
|
+
dl.search('dt').each do |dt|
|
58
|
+
case dt.content
|
59
|
+
when 'Country of origin:'
|
60
|
+
props[:country] = ISO3166::Country.find_country_by_name dt.next_element.css('a').first.content
|
61
|
+
when 'Location:'
|
62
|
+
break if dt.next_element.content == 'N/A'
|
63
|
+
props[:location] = dt.next_element.content
|
64
|
+
when 'Status:'
|
65
|
+
props[:status] = dt.next_element.content.downcase.gsub(/ /, '_').to_sym
|
66
|
+
when 'Formed in:'
|
67
|
+
break if dt.next_element.content == 'N/A'
|
68
|
+
props[:date_formed] = Date.new dt.next_element.content.to_i
|
69
|
+
when 'Genre:'
|
70
|
+
break if dt.next_element.content == 'N/A'
|
71
|
+
props[:genres] = ParserHelper.parse_genre dt.next_element.content
|
72
|
+
when 'Lyrical themes:'
|
73
|
+
props[:lyrical_themes] = []
|
74
|
+
break if dt.next_element.content == 'N/A'
|
75
|
+
dt.next_element.content.split(',').each do |theme|
|
76
|
+
t = theme.split.map(&:capitalize)
|
77
|
+
t.delete '(early)'
|
78
|
+
t.delete '(later)'
|
79
|
+
props[:lyrical_themes] << t.join(' ')
|
80
|
+
end
|
81
|
+
when /(Current|Last) label:/
|
82
|
+
props[:independent] = (dt.next_element.content == 'Unsigned/independent')
|
83
|
+
# TODO
|
84
|
+
when 'Years active:'
|
85
|
+
break if dt.next_element.content == 'N/A'
|
86
|
+
props[:date_active] = []
|
87
|
+
dt.next_element.content.split(',').each do |range|
|
88
|
+
# Aliases
|
89
|
+
range.scan(/\(as ([^)]*)\)/).each { |name| props[:aliases] << name.first }
|
90
|
+
# Ranges
|
91
|
+
r = range.gsub(/ *\(as ([^)]*)\) */, '').strip.split('-')
|
92
|
+
date_start = (r.first == '?' ? nil : Date.new(r.first.to_i))
|
93
|
+
date_end = (r.last == '?' or r.last == 'present' ? nil : Date.new(r.first.to_i))
|
94
|
+
props[:date_active] << Range.new(date_start, date_end)
|
95
|
+
end
|
96
|
+
else
|
97
|
+
raise MetalArchives::Errors::ParserError, "Unknown token: #{dt.content}"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
props
|
103
|
+
end
|
104
|
+
|
105
|
+
def parse_similar_bands_html(response)
|
106
|
+
similar = []
|
107
|
+
|
108
|
+
doc = Nokogiri::HTML response
|
109
|
+
doc.css('#artist_list tbody tr').each do |row|
|
110
|
+
similar << {
|
111
|
+
:band => MetalArchives::Band.new(:id => row.css('td a').first['href'].split('/').last.to_i),
|
112
|
+
:score => row.css('td').last.content.strip
|
113
|
+
}
|
114
|
+
end
|
115
|
+
|
116
|
+
similar
|
117
|
+
end
|
118
|
+
|
119
|
+
def parse_related_links_html(response)
|
120
|
+
links = []
|
121
|
+
|
122
|
+
doc = Nokogiri::HTML response
|
123
|
+
doc.css('#linksTableOfficial td a').each do |a|
|
124
|
+
links << {
|
125
|
+
:url => a['href'],
|
126
|
+
:type => :official,
|
127
|
+
:title => a.content
|
128
|
+
}
|
129
|
+
end
|
130
|
+
doc.css('#linksTableOfficial_merchandise td a').each do |a|
|
131
|
+
links << {
|
132
|
+
:url => a['href'],
|
133
|
+
:type => :merchandise,
|
134
|
+
:title => a.content
|
135
|
+
}
|
136
|
+
end
|
137
|
+
|
138
|
+
links
|
139
|
+
end
|
140
|
+
|
141
|
+
private
|
142
|
+
def map_status(status)
|
143
|
+
s = {
|
144
|
+
nil => '',
|
145
|
+
:active => 'Active',
|
146
|
+
:split_up => 'Split-up',
|
147
|
+
:on_hold => 'On hold',
|
148
|
+
:unknown => 'Unknown',
|
149
|
+
:changed_name => 'Changed name',
|
150
|
+
:disputed => 'Disputed'
|
151
|
+
}
|
152
|
+
|
153
|
+
raise MetalArchives::Errors::ParserError, "Unknown status: #{status}" unless s[status]
|
154
|
+
|
155
|
+
s[status]
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module MetalArchives
|
5
|
+
module Parsers
|
6
|
+
##
|
7
|
+
# Label parser
|
8
|
+
#
|
9
|
+
class Label # :nodoc:
|
10
|
+
class << self
|
11
|
+
def find_endpoint(params)
|
12
|
+
"http://www.metal-archives.com/labels/#{params[:name]}/#{params[:id]}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def parse(response)
|
16
|
+
props = {}
|
17
|
+
doc = Nokogiri::HTML(response)
|
18
|
+
|
19
|
+
props[:name] = doc.css('#label_info .label_name').first.content
|
20
|
+
|
21
|
+
props[:contact] = []
|
22
|
+
doc.css('#label_contact a').each do |contact|
|
23
|
+
props[:contact] << {
|
24
|
+
:title => contact.content,
|
25
|
+
:content => contact.attr(:href)
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
doc.css('#label_info dl').each do |dl|
|
30
|
+
dl.search('dt').each do |dt|
|
31
|
+
case dt.content
|
32
|
+
when 'Address:'
|
33
|
+
break if dt.next_element.content == 'N/A'
|
34
|
+
props[:address] = dt.next_element.content
|
35
|
+
when 'Country:'
|
36
|
+
break if dt.next_element.content == 'N/A'
|
37
|
+
props[:country] = ParserHelper.parse_country dt.next_element.css('a').first.content
|
38
|
+
when 'Phone number:'
|
39
|
+
break if dt.next_element.content == 'N/A'
|
40
|
+
props[:phone] = dt.next_element.content
|
41
|
+
when 'Status:'
|
42
|
+
props[:status] = dt.next_element.content.downcase.gsub(/ /, '_').to_sym
|
43
|
+
when 'Specialised in:'
|
44
|
+
break if dt.next_element.content == 'N/A'
|
45
|
+
props[:specializations] = ParserHelper.parse_genre dt.next_element.content
|
46
|
+
when 'Founding date :'
|
47
|
+
break if dt.next_element.content == 'N/A'
|
48
|
+
props[:date_founded] = Date.new dt.next_element.content.to_i
|
49
|
+
when 'Sub-labels:'
|
50
|
+
# TODO
|
51
|
+
when 'Online shopping:'
|
52
|
+
if dt.next_element.content == 'Yes'
|
53
|
+
props[:online_shopping] = true
|
54
|
+
elsif dt.next_element.content == 'No'
|
55
|
+
props[:online_shopping] = false
|
56
|
+
end
|
57
|
+
else
|
58
|
+
raise "Unknown token: #{dt.content}"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
props
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'countries'
|
3
|
+
|
4
|
+
module MetalArchives
|
5
|
+
##
|
6
|
+
# Mapping layer from and to MA Web Service
|
7
|
+
#
|
8
|
+
module Parsers # :nodoc:
|
9
|
+
##
|
10
|
+
# Parsing utilities
|
11
|
+
#
|
12
|
+
class ParserHelper # :nodoc:
|
13
|
+
class << self
|
14
|
+
##
|
15
|
+
# Parse a country
|
16
|
+
#
|
17
|
+
# Returns +ISO3166::Country+
|
18
|
+
#
|
19
|
+
def parse_country(input)
|
20
|
+
ISO3166::Country.find_country_by_name (input)
|
21
|
+
end
|
22
|
+
|
23
|
+
##
|
24
|
+
# Opinionated parsing of genres
|
25
|
+
#
|
26
|
+
# Returns an +Array+ of +String+
|
27
|
+
#
|
28
|
+
# The following components are omitted:
|
29
|
+
# - Metal
|
30
|
+
# - (early)
|
31
|
+
# - (later)
|
32
|
+
#
|
33
|
+
# All genres are capitalized.
|
34
|
+
#
|
35
|
+
# For examples on how genres are parsed, refer to +ParserHelperTest::test_parse_genre+
|
36
|
+
#
|
37
|
+
def parse_genre(input)
|
38
|
+
genres = []
|
39
|
+
# Split fields
|
40
|
+
input.split(',').each do |genre|
|
41
|
+
##
|
42
|
+
# Start with a single empty genre string. Split the genre by spaces
|
43
|
+
# and process each component. If a component does not have a slash,
|
44
|
+
# concatenate it to all genre strings present in +temp+. If it does
|
45
|
+
# have a slash present, duplicate all genre strings, and concatenate
|
46
|
+
# the first component (before the slash) to the first half, and the
|
47
|
+
# last component to the last half. +temp+ now has an array of genre
|
48
|
+
# combinations.
|
49
|
+
#
|
50
|
+
# 'Traditional Heavy/Power Metal' => ['Traditional Heavy', 'Traditional Power']
|
51
|
+
# 'Traditional/Classical Heavy/Power Metal' => [
|
52
|
+
# 'Traditional Heavy', 'Traditional Power',
|
53
|
+
# 'Classical Heavy', 'Classical Power']
|
54
|
+
#
|
55
|
+
temp = ['']
|
56
|
+
genre.downcase.split.reject { |g| ['(early)', '(later)', 'metal'].include? g }.each do |g|
|
57
|
+
unless g.include? '/'
|
58
|
+
temp.map! { |t| t.empty? ? g.capitalize : "#{t.capitalize} #{g.capitalize}" }
|
59
|
+
else
|
60
|
+
# Duplicate all WIP genres
|
61
|
+
temp2 = temp.dup
|
62
|
+
|
63
|
+
# Assign first and last components to temp and temp2 respectively
|
64
|
+
split = g.split '/'
|
65
|
+
temp.map! { |t| t.empty? ? split.first.capitalize : "#{t.capitalize} #{split.first.capitalize}" }
|
66
|
+
temp2.map! { |t| t.empty? ? split.last.capitalize : "#{t.capitalize} #{split.last.capitalize}" }
|
67
|
+
|
68
|
+
# Add both genre trees
|
69
|
+
temp += temp2
|
70
|
+
end
|
71
|
+
end
|
72
|
+
genres += temp
|
73
|
+
end
|
74
|
+
genres.uniq
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|