metal_archives 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +3 -0
- data/LICENSE +0 -0
- data/README.md +88 -0
- data/Rakefile +14 -0
- data/lib/metal_archives.rb +22 -0
- data/lib/metal_archives/configuration.rb +81 -0
- data/lib/metal_archives/error.rb +35 -0
- data/lib/metal_archives/http_client.rb +75 -0
- data/lib/metal_archives/models/artist.rb +216 -0
- data/lib/metal_archives/models/band.rb +291 -0
- data/lib/metal_archives/models/base_model.rb +153 -0
- data/lib/metal_archives/models/label.rb +112 -0
- data/lib/metal_archives/models/range.rb +58 -0
- data/lib/metal_archives/parsers/artist.rb +103 -0
- data/lib/metal_archives/parsers/band.rb +160 -0
- data/lib/metal_archives/parsers/label.rb +68 -0
- data/lib/metal_archives/parsers/parser_helper.rb +79 -0
- data/lib/metal_archives/version.rb +6 -0
- data/metal_archives.gemspec +28 -0
- data/test/base_model_test.rb +87 -0
- data/test/configuration_test.rb +57 -0
- data/test/parser_helper_test.rb +37 -0
- data/test/property/artist_property_test.rb +43 -0
- data/test/property/band_property_test.rb +94 -0
- data/test/query/artist_query_test.rb +44 -0
- data/test/query/band_query_test.rb +84 -0
- data/test/range_test.rb +41 -0
- data/test/test_helper.rb +26 -0
- metadata +214 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
module MetalArchives
|
2
|
+
##
|
3
|
+
# Range which can start and/or end with +nil+
|
4
|
+
#
|
5
|
+
class Range
|
6
|
+
include Comparable
|
7
|
+
|
8
|
+
##
|
9
|
+
# Begin- and endpoint of range
|
10
|
+
#
|
11
|
+
attr_accessor :begin, :end
|
12
|
+
|
13
|
+
##
|
14
|
+
# Create a new range
|
15
|
+
#
|
16
|
+
# [+_begin+]
|
17
|
+
# Start of range
|
18
|
+
#
|
19
|
+
# Default: +nil+
|
20
|
+
#
|
21
|
+
# [+_end+]
|
22
|
+
# End of range
|
23
|
+
#
|
24
|
+
# Default: +nil+
|
25
|
+
#
|
26
|
+
def initialize(_begin = nil, _end = nil)
|
27
|
+
@begin = _begin
|
28
|
+
@end = _end
|
29
|
+
end
|
30
|
+
|
31
|
+
##
|
32
|
+
# Whether start of range is present
|
33
|
+
#
|
34
|
+
def begin?
|
35
|
+
!!@begin
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# Whether end of range is present
|
40
|
+
#
|
41
|
+
def end?
|
42
|
+
!!@end
|
43
|
+
end
|
44
|
+
|
45
|
+
##
|
46
|
+
# Comparison operator
|
47
|
+
#
|
48
|
+
def <=>(other)
|
49
|
+
comparison = self.begin <=> other.begin
|
50
|
+
|
51
|
+
if comparison == 0
|
52
|
+
return self.end <=> other.end
|
53
|
+
else
|
54
|
+
return comparison
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'date'
|
3
|
+
require 'countries'
|
4
|
+
|
5
|
+
module MetalArchives
|
6
|
+
module Parsers
|
7
|
+
##
|
8
|
+
# Artist parser
|
9
|
+
#
|
10
|
+
class Artist # :nodoc:
|
11
|
+
class << self
|
12
|
+
##
|
13
|
+
# Map attributes to MA attributes
|
14
|
+
#
|
15
|
+
# Returns +Hash+
|
16
|
+
#
|
17
|
+
# [+params+]
|
18
|
+
# +Hash+
|
19
|
+
#
|
20
|
+
def map_params(query)
|
21
|
+
params = {
|
22
|
+
:query => query[:name] || '',
|
23
|
+
|
24
|
+
:iDisplayStart => query[:iDisplayStart] || 0
|
25
|
+
}
|
26
|
+
|
27
|
+
params
|
28
|
+
end
|
29
|
+
|
30
|
+
def parse_html(response)
|
31
|
+
props = {}
|
32
|
+
doc = Nokogiri::HTML response
|
33
|
+
|
34
|
+
doc.css('#member_info dl').each do |dl|
|
35
|
+
dl.css('dt').each do |dt|
|
36
|
+
case dt.content.strip
|
37
|
+
when 'Real/full name:'
|
38
|
+
props[:name] = dt.next_element.content.strip
|
39
|
+
when 'Age:'
|
40
|
+
break if dt.next_element.content == 'N/A'
|
41
|
+
date = dt.next_element.content.gsub(/ [0-9]* \(born ([^\)]*)\)/, '\1')
|
42
|
+
props[:date_of_birth] = Date.parse date
|
43
|
+
when 'R.I.P.:'
|
44
|
+
break if dt.next_element.content == 'N/A'
|
45
|
+
props[:date_of_death] = Date.parse dt.next_element.content
|
46
|
+
when 'Died of:'
|
47
|
+
break if dt.next_element.content = 'N/A'
|
48
|
+
props[:cause_of_death] = dt.next_element.content
|
49
|
+
when 'Place of origin:'
|
50
|
+
break if dt.next_element.content == 'N/A'
|
51
|
+
props[:country] = ISO3166::Country.find_country_by_name(dt.next_element.css('a').first.content)
|
52
|
+
location = dt.next_element.xpath('text()').map { |x| x.content }.join('').strip.gsub(/[()]/, '')
|
53
|
+
props[:location] = location unless location.empty?
|
54
|
+
when 'Gender:'
|
55
|
+
break if dt.next_element.content == 'N/A'
|
56
|
+
case dt.next_element.content
|
57
|
+
when 'Male'
|
58
|
+
props[:gender] = :male
|
59
|
+
when 'Female'
|
60
|
+
props[:gender] = :female
|
61
|
+
else
|
62
|
+
raise Errors::ParserError, "Unknown gender: #{dt.next_element.content}"
|
63
|
+
end
|
64
|
+
else
|
65
|
+
raise Errors::ParserError, "Unknown token: #{dt.content}"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
props[:aliases] = []
|
71
|
+
alt = doc.css('.band_member_name').first.content
|
72
|
+
props[:aliases] << alt unless props[:name] == alt
|
73
|
+
|
74
|
+
props
|
75
|
+
end
|
76
|
+
|
77
|
+
def parse_links_html(response)
|
78
|
+
links = []
|
79
|
+
|
80
|
+
doc = Nokogiri::HTML response
|
81
|
+
|
82
|
+
# Default to official links
|
83
|
+
type = :official
|
84
|
+
|
85
|
+
doc.css('#linksTablemain tr').each do |row|
|
86
|
+
if row['id'] =~ /^header_/
|
87
|
+
type = row['id'].gsub(/^header_/, '').downcase.to_sym
|
88
|
+
else
|
89
|
+
a = row.css('td a').first
|
90
|
+
links << {
|
91
|
+
:url => a['href'],
|
92
|
+
:type => type,
|
93
|
+
:title => a.content
|
94
|
+
}
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
links
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,160 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'date'
|
3
|
+
require 'countries'
|
4
|
+
|
5
|
+
module MetalArchives
|
6
|
+
module Parsers
|
7
|
+
##
|
8
|
+
# Band parser
|
9
|
+
#
|
10
|
+
class Band # :nodoc:
|
11
|
+
class << self
|
12
|
+
##
|
13
|
+
# Map attributes to MA attributes
|
14
|
+
#
|
15
|
+
# Returns +Hash+
|
16
|
+
#
|
17
|
+
# [+params+]
|
18
|
+
# +Hash+
|
19
|
+
#
|
20
|
+
def map_params(query)
|
21
|
+
params = {
|
22
|
+
:bandName => query[:name] || '',
|
23
|
+
:exactBandMatch => (!!query[:exact] ? 1 : 0),
|
24
|
+
:genre => query[:genre] || '',
|
25
|
+
:yearCreationFrom => (query[:year] and query[:year].begin ? query[:year].begin.year : '') || '',
|
26
|
+
:yearCreationTo => (query[:year] and query[:year].end ? query[:year].end.year : '') || '',
|
27
|
+
:bandNotes => query[:comment] || '',
|
28
|
+
:status => map_status(query[:status]),
|
29
|
+
:themes => query[:lyrical_themes] || '',
|
30
|
+
:location => query[:location] || '',
|
31
|
+
:bandLabelName => query[:label] || '',
|
32
|
+
:indieLabelBand => (!!query[:independent] ? 1 : 0),
|
33
|
+
|
34
|
+
:iDisplayStart => query[:iDisplayStart] || 0
|
35
|
+
}
|
36
|
+
|
37
|
+
params[:country] = []
|
38
|
+
Array(query[:country]).each do |country|
|
39
|
+
params[:country] << (country.is_a?(ISO3166::Country) ? country.alpha2 : (country || ''))
|
40
|
+
end
|
41
|
+
params[:country] = params[:country].first if (params[:country].size == 1)
|
42
|
+
|
43
|
+
params
|
44
|
+
end
|
45
|
+
|
46
|
+
def parse_html(response)
|
47
|
+
props = {}
|
48
|
+
doc = Nokogiri::HTML response
|
49
|
+
|
50
|
+
props[:name] = doc.css('#band_info .band_name a').first.content
|
51
|
+
|
52
|
+
props[:aliases] = []
|
53
|
+
props[:logo] = doc.css('.band_name_img img').first.attr('src') unless doc.css('.band_name_img').empty?
|
54
|
+
props[:photo] = doc.css('.band_img img').first.attr('src') unless doc.css('.band_img').empty?
|
55
|
+
|
56
|
+
doc.css('#band_stats dl').each do |dl|
|
57
|
+
dl.search('dt').each do |dt|
|
58
|
+
case dt.content
|
59
|
+
when 'Country of origin:'
|
60
|
+
props[:country] = ISO3166::Country.find_country_by_name dt.next_element.css('a').first.content
|
61
|
+
when 'Location:'
|
62
|
+
break if dt.next_element.content == 'N/A'
|
63
|
+
props[:location] = dt.next_element.content
|
64
|
+
when 'Status:'
|
65
|
+
props[:status] = dt.next_element.content.downcase.gsub(/ /, '_').to_sym
|
66
|
+
when 'Formed in:'
|
67
|
+
break if dt.next_element.content == 'N/A'
|
68
|
+
props[:date_formed] = Date.new dt.next_element.content.to_i
|
69
|
+
when 'Genre:'
|
70
|
+
break if dt.next_element.content == 'N/A'
|
71
|
+
props[:genres] = ParserHelper.parse_genre dt.next_element.content
|
72
|
+
when 'Lyrical themes:'
|
73
|
+
props[:lyrical_themes] = []
|
74
|
+
break if dt.next_element.content == 'N/A'
|
75
|
+
dt.next_element.content.split(',').each do |theme|
|
76
|
+
t = theme.split.map(&:capitalize)
|
77
|
+
t.delete '(early)'
|
78
|
+
t.delete '(later)'
|
79
|
+
props[:lyrical_themes] << t.join(' ')
|
80
|
+
end
|
81
|
+
when /(Current|Last) label:/
|
82
|
+
props[:independent] = (dt.next_element.content == 'Unsigned/independent')
|
83
|
+
# TODO
|
84
|
+
when 'Years active:'
|
85
|
+
break if dt.next_element.content == 'N/A'
|
86
|
+
props[:date_active] = []
|
87
|
+
dt.next_element.content.split(',').each do |range|
|
88
|
+
# Aliases
|
89
|
+
range.scan(/\(as ([^)]*)\)/).each { |name| props[:aliases] << name.first }
|
90
|
+
# Ranges
|
91
|
+
r = range.gsub(/ *\(as ([^)]*)\) */, '').strip.split('-')
|
92
|
+
date_start = (r.first == '?' ? nil : Date.new(r.first.to_i))
|
93
|
+
date_end = (r.last == '?' or r.last == 'present' ? nil : Date.new(r.first.to_i))
|
94
|
+
props[:date_active] << Range.new(date_start, date_end)
|
95
|
+
end
|
96
|
+
else
|
97
|
+
raise MetalArchives::Errors::ParserError, "Unknown token: #{dt.content}"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
props
|
103
|
+
end
|
104
|
+
|
105
|
+
def parse_similar_bands_html(response)
|
106
|
+
similar = []
|
107
|
+
|
108
|
+
doc = Nokogiri::HTML response
|
109
|
+
doc.css('#artist_list tbody tr').each do |row|
|
110
|
+
similar << {
|
111
|
+
:band => MetalArchives::Band.new(:id => row.css('td a').first['href'].split('/').last.to_i),
|
112
|
+
:score => row.css('td').last.content.strip
|
113
|
+
}
|
114
|
+
end
|
115
|
+
|
116
|
+
similar
|
117
|
+
end
|
118
|
+
|
119
|
+
def parse_related_links_html(response)
|
120
|
+
links = []
|
121
|
+
|
122
|
+
doc = Nokogiri::HTML response
|
123
|
+
doc.css('#linksTableOfficial td a').each do |a|
|
124
|
+
links << {
|
125
|
+
:url => a['href'],
|
126
|
+
:type => :official,
|
127
|
+
:title => a.content
|
128
|
+
}
|
129
|
+
end
|
130
|
+
doc.css('#linksTableOfficial_merchandise td a').each do |a|
|
131
|
+
links << {
|
132
|
+
:url => a['href'],
|
133
|
+
:type => :merchandise,
|
134
|
+
:title => a.content
|
135
|
+
}
|
136
|
+
end
|
137
|
+
|
138
|
+
links
|
139
|
+
end
|
140
|
+
|
141
|
+
private
|
142
|
+
def map_status(status)
|
143
|
+
s = {
|
144
|
+
nil => '',
|
145
|
+
:active => 'Active',
|
146
|
+
:split_up => 'Split-up',
|
147
|
+
:on_hold => 'On hold',
|
148
|
+
:unknown => 'Unknown',
|
149
|
+
:changed_name => 'Changed name',
|
150
|
+
:disputed => 'Disputed'
|
151
|
+
}
|
152
|
+
|
153
|
+
raise MetalArchives::Errors::ParserError, "Unknown status: #{status}" unless s[status]
|
154
|
+
|
155
|
+
s[status]
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module MetalArchives
|
5
|
+
module Parsers
|
6
|
+
##
|
7
|
+
# Label parser
|
8
|
+
#
|
9
|
+
class Label # :nodoc:
|
10
|
+
class << self
|
11
|
+
def find_endpoint(params)
|
12
|
+
"http://www.metal-archives.com/labels/#{params[:name]}/#{params[:id]}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def parse(response)
|
16
|
+
props = {}
|
17
|
+
doc = Nokogiri::HTML(response)
|
18
|
+
|
19
|
+
props[:name] = doc.css('#label_info .label_name').first.content
|
20
|
+
|
21
|
+
props[:contact] = []
|
22
|
+
doc.css('#label_contact a').each do |contact|
|
23
|
+
props[:contact] << {
|
24
|
+
:title => contact.content,
|
25
|
+
:content => contact.attr(:href)
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
doc.css('#label_info dl').each do |dl|
|
30
|
+
dl.search('dt').each do |dt|
|
31
|
+
case dt.content
|
32
|
+
when 'Address:'
|
33
|
+
break if dt.next_element.content == 'N/A'
|
34
|
+
props[:address] = dt.next_element.content
|
35
|
+
when 'Country:'
|
36
|
+
break if dt.next_element.content == 'N/A'
|
37
|
+
props[:country] = ParserHelper.parse_country dt.next_element.css('a').first.content
|
38
|
+
when 'Phone number:'
|
39
|
+
break if dt.next_element.content == 'N/A'
|
40
|
+
props[:phone] = dt.next_element.content
|
41
|
+
when 'Status:'
|
42
|
+
props[:status] = dt.next_element.content.downcase.gsub(/ /, '_').to_sym
|
43
|
+
when 'Specialised in:'
|
44
|
+
break if dt.next_element.content == 'N/A'
|
45
|
+
props[:specializations] = ParserHelper.parse_genre dt.next_element.content
|
46
|
+
when 'Founding date :'
|
47
|
+
break if dt.next_element.content == 'N/A'
|
48
|
+
props[:date_founded] = Date.new dt.next_element.content.to_i
|
49
|
+
when 'Sub-labels:'
|
50
|
+
# TODO
|
51
|
+
when 'Online shopping:'
|
52
|
+
if dt.next_element.content == 'Yes'
|
53
|
+
props[:online_shopping] = true
|
54
|
+
elsif dt.next_element.content == 'No'
|
55
|
+
props[:online_shopping] = false
|
56
|
+
end
|
57
|
+
else
|
58
|
+
raise "Unknown token: #{dt.content}"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
props
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'countries'
|
3
|
+
|
4
|
+
module MetalArchives
|
5
|
+
##
|
6
|
+
# Mapping layer from and to MA Web Service
|
7
|
+
#
|
8
|
+
module Parsers # :nodoc:
|
9
|
+
##
|
10
|
+
# Parsing utilities
|
11
|
+
#
|
12
|
+
class ParserHelper # :nodoc:
|
13
|
+
class << self
|
14
|
+
##
|
15
|
+
# Parse a country
|
16
|
+
#
|
17
|
+
# Returns +ISO3166::Country+
|
18
|
+
#
|
19
|
+
def parse_country(input)
|
20
|
+
ISO3166::Country.find_country_by_name (input)
|
21
|
+
end
|
22
|
+
|
23
|
+
##
|
24
|
+
# Opinionated parsing of genres
|
25
|
+
#
|
26
|
+
# Returns an +Array+ of +String+
|
27
|
+
#
|
28
|
+
# The following components are omitted:
|
29
|
+
# - Metal
|
30
|
+
# - (early)
|
31
|
+
# - (later)
|
32
|
+
#
|
33
|
+
# All genres are capitalized.
|
34
|
+
#
|
35
|
+
# For examples on how genres are parsed, refer to +ParserHelperTest::test_parse_genre+
|
36
|
+
#
|
37
|
+
def parse_genre(input)
|
38
|
+
genres = []
|
39
|
+
# Split fields
|
40
|
+
input.split(',').each do |genre|
|
41
|
+
##
|
42
|
+
# Start with a single empty genre string. Split the genre by spaces
|
43
|
+
# and process each component. If a component does not have a slash,
|
44
|
+
# concatenate it to all genre strings present in +temp+. If it does
|
45
|
+
# have a slash present, duplicate all genre strings, and concatenate
|
46
|
+
# the first component (before the slash) to the first half, and the
|
47
|
+
# last component to the last half. +temp+ now has an array of genre
|
48
|
+
# combinations.
|
49
|
+
#
|
50
|
+
# 'Traditional Heavy/Power Metal' => ['Traditional Heavy', 'Traditional Power']
|
51
|
+
# 'Traditional/Classical Heavy/Power Metal' => [
|
52
|
+
# 'Traditional Heavy', 'Traditional Power',
|
53
|
+
# 'Classical Heavy', 'Classical Power']
|
54
|
+
#
|
55
|
+
temp = ['']
|
56
|
+
genre.downcase.split.reject { |g| ['(early)', '(later)', 'metal'].include? g }.each do |g|
|
57
|
+
unless g.include? '/'
|
58
|
+
temp.map! { |t| t.empty? ? g.capitalize : "#{t.capitalize} #{g.capitalize}" }
|
59
|
+
else
|
60
|
+
# Duplicate all WIP genres
|
61
|
+
temp2 = temp.dup
|
62
|
+
|
63
|
+
# Assign first and last components to temp and temp2 respectively
|
64
|
+
split = g.split '/'
|
65
|
+
temp.map! { |t| t.empty? ? split.first.capitalize : "#{t.capitalize} #{split.first.capitalize}" }
|
66
|
+
temp2.map! { |t| t.empty? ? split.last.capitalize : "#{t.capitalize} #{split.last.capitalize}" }
|
67
|
+
|
68
|
+
# Add both genre trees
|
69
|
+
temp += temp2
|
70
|
+
end
|
71
|
+
end
|
72
|
+
genres += temp
|
73
|
+
end
|
74
|
+
genres.uniq
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|