metal_archives 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -1
- data/lib/metal_archives.rb +1 -1
- data/lib/metal_archives/configuration.rb +2 -1
- data/lib/metal_archives/http_client.rb +6 -1
- data/lib/metal_archives/models/artist.rb +48 -30
- data/lib/metal_archives/models/band.rb +65 -41
- data/lib/metal_archives/models/base_model.rb +29 -18
- data/lib/metal_archives/parsers/artist.rb +40 -19
- data/lib/metal_archives/parsers/band.rb +56 -20
- data/lib/metal_archives/parsers/{parser_helper.rb → parser.rb} +12 -3
- data/lib/metal_archives/version.rb +1 -1
- data/metal_archives.gemspec +10 -10
- data/test/parser_test.rb +37 -0
- data/test/query/artist_query_test.rb +1 -1
- data/test/query/band_query_test.rb +1 -1
- metadata +19 -79
- data/test/parser_helper_test.rb +0 -37
@@ -1,8 +1,8 @@
|
|
1
1
|
module MetalArchives
|
2
2
|
##
|
3
|
-
#
|
3
|
+
# Abstract model class
|
4
4
|
#
|
5
|
-
class BaseModel
|
5
|
+
class BaseModel
|
6
6
|
##
|
7
7
|
# Generic shallow copy constructor
|
8
8
|
#
|
@@ -21,21 +21,32 @@ module MetalArchives
|
|
21
21
|
obj.instance_of? self.class and self.id == obj.id
|
22
22
|
end
|
23
23
|
|
24
|
+
##
|
25
|
+
# Fetch, parse and load the data
|
26
|
+
#
|
27
|
+
# [Raises]
|
28
|
+
# - rdoc-ref:MetalArchives::Errors::InvalidIDError when no id
|
29
|
+
# - rdoc-ref:MetalArchives::Errors::APIError when receiving a status code >= 400 (except 404)
|
30
|
+
#
|
31
|
+
def load!
|
32
|
+
raise Errors::InvalidIDError, 'no id present' unless !!id
|
33
|
+
|
34
|
+
# Use constructor to set attributes
|
35
|
+
initialize assemble
|
36
|
+
end
|
37
|
+
|
24
38
|
protected
|
25
39
|
##
|
26
|
-
#
|
40
|
+
# Fetch the data and assemble the model
|
27
41
|
#
|
28
|
-
#
|
29
|
-
# Raises rdoc-ref:MetalArchives::Errors::NotImplementedError when no :assemble method is implemented
|
30
|
-
# Raises rdoc-ref:MetalArchives::Errors::APIError when receiving a status code >= 400 (except 404)
|
42
|
+
# Override this method
|
31
43
|
#
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
initialize assemble
|
44
|
+
# [Raises]
|
45
|
+
# - rdoc-ref:MetalArchives::Errors::InvalidIDError when no or invalid id
|
46
|
+
# - rdoc-ref:MetalArchives::Errors::APIError when receiving a status code >= 400 (except 404)
|
47
|
+
#
|
48
|
+
def assemble
|
49
|
+
raise Errors::NotImplementedError, 'method :assemble not implemented'
|
39
50
|
end
|
40
51
|
|
41
52
|
class << self
|
@@ -66,13 +77,13 @@ module MetalArchives
|
|
66
77
|
|
67
78
|
# property
|
68
79
|
define_method(name) do
|
69
|
-
|
80
|
+
load! unless instance_variable_defined?("@#{name}") or name == :id
|
70
81
|
instance_variable_get("@#{name}")
|
71
82
|
end
|
72
83
|
|
73
84
|
# property?
|
74
85
|
define_method("#{name}?") do
|
75
|
-
|
86
|
+
load! unless instance_variable_defined?("@#{name}") or name == :id
|
76
87
|
|
77
88
|
property = instance_variable_get("@#{name}")
|
78
89
|
property.respond_to?(:empty?) ? !property.empty? : !!property
|
@@ -116,13 +127,13 @@ module MetalArchives
|
|
116
127
|
|
117
128
|
# property
|
118
129
|
define_method(name) do
|
119
|
-
|
130
|
+
load! unless instance_variable_defined?("@#{name}")
|
120
131
|
instance_variable_get("@#{name}")
|
121
132
|
end
|
122
133
|
|
123
134
|
# property?
|
124
135
|
define_method("#{name}?") do
|
125
|
-
|
136
|
+
load! unless instance_variable_defined?("@#{name}")
|
126
137
|
|
127
138
|
property = instance_variable_get("@#{name}")
|
128
139
|
property.respond_to?(:empty?) ? !property.empty? : !!property
|
@@ -145,7 +156,7 @@ module MetalArchives
|
|
145
156
|
end
|
146
157
|
|
147
158
|
##
|
148
|
-
# Defines a model boolean property. This method is an alias for
|
159
|
+
# Defines a model boolean property. This method is an alias for <tt>enum name, :values => [true, false]</tt>
|
149
160
|
#
|
150
161
|
# [+name+]
|
151
162
|
# Name of the property
|
@@ -7,7 +7,7 @@ module Parsers
|
|
7
7
|
##
|
8
8
|
# Artist parser
|
9
9
|
#
|
10
|
-
class Artist # :nodoc:
|
10
|
+
class Artist < Parser # :nodoc:
|
11
11
|
class << self
|
12
12
|
##
|
13
13
|
# Map attributes to MA attributes
|
@@ -19,47 +19,52 @@ module Parsers
|
|
19
19
|
#
|
20
20
|
def map_params(query)
|
21
21
|
params = {
|
22
|
-
:query => query[:name] || ''
|
23
|
-
|
24
|
-
:iDisplayStart => query[:iDisplayStart] || 0
|
22
|
+
:query => query[:name] || ''
|
25
23
|
}
|
26
24
|
|
27
25
|
params
|
28
26
|
end
|
29
27
|
|
28
|
+
##
|
29
|
+
# Parse main HTML page
|
30
|
+
#
|
31
|
+
# Returns +Hash+
|
32
|
+
#
|
33
|
+
# [Raises]
|
34
|
+
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
35
|
+
#
|
30
36
|
def parse_html(response)
|
31
37
|
props = {}
|
32
38
|
doc = Nokogiri::HTML response
|
33
39
|
|
34
40
|
doc.css('#member_info dl').each do |dl|
|
35
41
|
dl.css('dt').each do |dt|
|
36
|
-
|
42
|
+
content = sanitize(dt.next_element.content)
|
43
|
+
|
44
|
+
next if content == 'N/A'
|
45
|
+
|
46
|
+
case sanitize(dt.content)
|
37
47
|
when 'Real/full name:'
|
38
|
-
props[:name] =
|
48
|
+
props[:name] = content
|
39
49
|
when 'Age:'
|
40
|
-
|
41
|
-
date = dt.next_element.content.gsub(/ [0-9]* \(born ([^\)]*)\)/, '\1')
|
50
|
+
date = content.gsub(/ [0-9]* \(born ([^\)]*)\)/, '\1')
|
42
51
|
props[:date_of_birth] = Date.parse date
|
43
52
|
when 'R.I.P.:'
|
44
|
-
|
45
|
-
props[:date_of_death] = Date.parse dt.next_element.content
|
53
|
+
props[:date_of_death] = Date.parse content
|
46
54
|
when 'Died of:'
|
47
|
-
|
48
|
-
props[:cause_of_death] = dt.next_element.content
|
55
|
+
props[:cause_of_death] = content
|
49
56
|
when 'Place of origin:'
|
50
|
-
|
51
|
-
props[:country] = ISO3166::Country.find_country_by_name(dt.next_element.css('a').first.content)
|
57
|
+
props[:country] = ISO3166::Country.find_country_by_name(sanitize(dt.next_element.css('a').first.content))
|
52
58
|
location = dt.next_element.xpath('text()').map { |x| x.content }.join('').strip.gsub(/[()]/, '')
|
53
59
|
props[:location] = location unless location.empty?
|
54
60
|
when 'Gender:'
|
55
|
-
|
56
|
-
case dt.next_element.content
|
61
|
+
case content
|
57
62
|
when 'Male'
|
58
63
|
props[:gender] = :male
|
59
64
|
when 'Female'
|
60
65
|
props[:gender] = :female
|
61
66
|
else
|
62
|
-
raise Errors::ParserError, "Unknown gender: #{
|
67
|
+
raise Errors::ParserError, "Unknown gender: #{content}"
|
63
68
|
end
|
64
69
|
else
|
65
70
|
raise Errors::ParserError, "Unknown token: #{dt.content}"
|
@@ -68,14 +73,23 @@ module Parsers
|
|
68
73
|
end
|
69
74
|
|
70
75
|
props[:aliases] = []
|
71
|
-
alt = doc.css('.band_member_name').first.content
|
76
|
+
alt = sanitize doc.css('.band_member_name').first.content
|
72
77
|
props[:aliases] << alt unless props[:name] == alt
|
73
78
|
|
74
79
|
props
|
75
80
|
rescue => e
|
76
|
-
|
81
|
+
e.backtrace.each { |b| MetalArchives::config.logger.error b }
|
82
|
+
raise Errors::ParserError, e
|
77
83
|
end
|
78
84
|
|
85
|
+
##
|
86
|
+
# Parse links HTML page
|
87
|
+
#
|
88
|
+
# Returns +Hash+
|
89
|
+
#
|
90
|
+
# [Raises]
|
91
|
+
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
92
|
+
#
|
79
93
|
def parse_links_html(response)
|
80
94
|
links = []
|
81
95
|
|
@@ -89,6 +103,10 @@ module Parsers
|
|
89
103
|
type = row['id'].gsub(/^header_/, '').downcase.to_sym
|
90
104
|
else
|
91
105
|
a = row.css('td a').first
|
106
|
+
|
107
|
+
# No links have been added yet
|
108
|
+
next unless a
|
109
|
+
|
92
110
|
links << {
|
93
111
|
:url => a['href'],
|
94
112
|
:type => type,
|
@@ -98,6 +116,9 @@ module Parsers
|
|
98
116
|
end
|
99
117
|
|
100
118
|
links
|
119
|
+
rescue => e
|
120
|
+
e.backtrace.each { |b| MetalArchives::config.logger.error b }
|
121
|
+
raise Errors::ParserError, e
|
101
122
|
end
|
102
123
|
end
|
103
124
|
end
|
@@ -7,7 +7,7 @@ module Parsers
|
|
7
7
|
##
|
8
8
|
# Band parser
|
9
9
|
#
|
10
|
-
class Band # :nodoc:
|
10
|
+
class Band < Parser # :nodoc:
|
11
11
|
class << self
|
12
12
|
##
|
13
13
|
# Map attributes to MA attributes
|
@@ -29,9 +29,7 @@ module Parsers
|
|
29
29
|
:themes => query[:lyrical_themes] || '',
|
30
30
|
:location => query[:location] || '',
|
31
31
|
:bandLabelName => query[:label] || '',
|
32
|
-
:indieLabelBand => (!!query[:independent] ? 1 : 0)
|
33
|
-
|
34
|
-
:iDisplayStart => query[:iDisplayStart] || 0
|
32
|
+
:indieLabelBand => (!!query[:independent] ? 1 : 0)
|
35
33
|
}
|
36
34
|
|
37
35
|
params[:country] = []
|
@@ -43,11 +41,19 @@ module Parsers
|
|
43
41
|
params
|
44
42
|
end
|
45
43
|
|
44
|
+
##
|
45
|
+
# Parse main HTML page
|
46
|
+
#
|
47
|
+
# Returns +Hash+
|
48
|
+
#
|
49
|
+
# [Raises]
|
50
|
+
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
51
|
+
#
|
46
52
|
def parse_html(response)
|
47
53
|
props = {}
|
48
54
|
doc = Nokogiri::HTML response
|
49
55
|
|
50
|
-
props[:name] = doc.css('#band_info .band_name a').first.content
|
56
|
+
props[:name] = sanitize doc.css('#band_info .band_name a').first.content
|
51
57
|
|
52
58
|
props[:aliases] = []
|
53
59
|
props[:logo] = doc.css('.band_name_img img').first.attr('src') unless doc.css('.band_name_img').empty?
|
@@ -55,43 +61,42 @@ module Parsers
|
|
55
61
|
|
56
62
|
doc.css('#band_stats dl').each do |dl|
|
57
63
|
dl.search('dt').each do |dt|
|
64
|
+
content = sanitize(dt.next_element.content)
|
65
|
+
|
66
|
+
next if content == 'N/A'
|
67
|
+
|
58
68
|
case dt.content
|
59
69
|
when 'Country of origin:'
|
60
|
-
props[:country] = ISO3166::Country.find_country_by_name dt.next_element.css('a').first.content
|
70
|
+
props[:country] = ISO3166::Country.find_country_by_name sanitize(dt.next_element.css('a').first.content)
|
61
71
|
when 'Location:'
|
62
|
-
|
63
|
-
props[:location] = dt.next_element.content
|
72
|
+
props[:location] = content
|
64
73
|
when 'Status:'
|
65
|
-
props[:status] =
|
74
|
+
props[:status] = content.downcase.gsub(/ /, '_').to_sym
|
66
75
|
when 'Formed in:'
|
67
|
-
|
68
|
-
props[:date_formed] = Date.new dt.next_element.content.to_i
|
76
|
+
props[:date_formed] = Date.new content.to_i
|
69
77
|
when 'Genre:'
|
70
|
-
|
71
|
-
props[:genres] = ParserHelper.parse_genre dt.next_element.content
|
78
|
+
props[:genres] = parse_genre content
|
72
79
|
when 'Lyrical themes:'
|
73
80
|
props[:lyrical_themes] = []
|
74
|
-
|
75
|
-
dt.next_element.content.split(',').each do |theme|
|
81
|
+
content.split(',').each do |theme|
|
76
82
|
t = theme.split.map(&:capitalize)
|
77
83
|
t.delete '(early)'
|
78
84
|
t.delete '(later)'
|
79
85
|
props[:lyrical_themes] << t.join(' ')
|
80
86
|
end
|
81
87
|
when /(Current|Last) label:/
|
82
|
-
props[:independent] = (
|
88
|
+
props[:independent] = (content == 'Unsigned/independent')
|
83
89
|
# TODO
|
84
90
|
when 'Years active:'
|
85
|
-
break if dt.next_element.content == 'N/A'
|
86
91
|
props[:date_active] = []
|
87
|
-
|
92
|
+
content.split(',').each do |range|
|
88
93
|
# Aliases
|
89
94
|
range.scan(/\(as ([^)]*)\)/).each { |name| props[:aliases] << name.first }
|
90
95
|
# Ranges
|
91
96
|
r = range.gsub(/ *\(as ([^)]*)\) */, '').strip.split('-')
|
92
97
|
date_start = (r.first == '?' ? nil : Date.new(r.first.to_i))
|
93
98
|
date_end = (r.last == '?' or r.last == 'present' ? nil : Date.new(r.first.to_i))
|
94
|
-
props[:date_active] << Range.new(date_start, date_end)
|
99
|
+
props[:date_active] << MetalArchives::Range.new(date_start, date_end)
|
95
100
|
end
|
96
101
|
else
|
97
102
|
raise MetalArchives::Errors::ParserError, "Unknown token: #{dt.content}"
|
@@ -101,9 +106,18 @@ module Parsers
|
|
101
106
|
|
102
107
|
props
|
103
108
|
rescue => e
|
104
|
-
|
109
|
+
e.backtrace.each { |b| MetalArchives::config.logger.error b }
|
110
|
+
raise Errors::ParserError, e
|
105
111
|
end
|
106
112
|
|
113
|
+
##
|
114
|
+
# Parse similar bands HTML page
|
115
|
+
#
|
116
|
+
# Returns +Hash+
|
117
|
+
#
|
118
|
+
# [Raises]
|
119
|
+
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
120
|
+
#
|
107
121
|
def parse_similar_bands_html(response)
|
108
122
|
similar = []
|
109
123
|
|
@@ -116,8 +130,19 @@ module Parsers
|
|
116
130
|
end
|
117
131
|
|
118
132
|
similar
|
133
|
+
rescue => e
|
134
|
+
e.backtrace.each { |b| MetalArchives::config.logger.error b }
|
135
|
+
raise Errors::ParserError, e
|
119
136
|
end
|
120
137
|
|
138
|
+
##
|
139
|
+
# Parse related links HTML page
|
140
|
+
#
|
141
|
+
# Returns +Hash+
|
142
|
+
#
|
143
|
+
# [Raises]
|
144
|
+
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
145
|
+
#
|
121
146
|
def parse_related_links_html(response)
|
122
147
|
links = []
|
123
148
|
|
@@ -138,9 +163,20 @@ module Parsers
|
|
138
163
|
end
|
139
164
|
|
140
165
|
links
|
166
|
+
rescue => e
|
167
|
+
e.backtrace.each { |b| MetalArchives::config.logger.error b }
|
168
|
+
raise Errors::ParserError, e
|
141
169
|
end
|
142
170
|
|
143
171
|
private
|
172
|
+
##
|
173
|
+
# Map MA band status
|
174
|
+
#
|
175
|
+
# Returns +Symbol+
|
176
|
+
#
|
177
|
+
# [Raises]
|
178
|
+
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
179
|
+
#
|
144
180
|
def map_status(status)
|
145
181
|
s = {
|
146
182
|
nil => '',
|
@@ -7,9 +7,9 @@ module MetalArchives
|
|
7
7
|
#
|
8
8
|
module Parsers # :nodoc:
|
9
9
|
##
|
10
|
-
#
|
10
|
+
# Parser base class
|
11
11
|
#
|
12
|
-
class
|
12
|
+
class Parser
|
13
13
|
class << self
|
14
14
|
##
|
15
15
|
# Parse a country
|
@@ -20,6 +20,15 @@ module Parsers # :nodoc:
|
|
20
20
|
ISO3166::Country.find_country_by_name (input)
|
21
21
|
end
|
22
22
|
|
23
|
+
##
|
24
|
+
# Sanitize a string
|
25
|
+
#
|
26
|
+
# Return +String+
|
27
|
+
#
|
28
|
+
def sanitize(input)
|
29
|
+
input.gsub(/^"/, '').gsub(/"$/, '').strip
|
30
|
+
end
|
31
|
+
|
23
32
|
##
|
24
33
|
# Opinionated parsing of genres
|
25
34
|
#
|
@@ -32,7 +41,7 @@ module Parsers # :nodoc:
|
|
32
41
|
#
|
33
42
|
# All genres are capitalized.
|
34
43
|
#
|
35
|
-
# For examples on how genres are parsed, refer to +
|
44
|
+
# For examples on how genres are parsed, refer to +ParserTest#test_parse_genre+
|
36
45
|
#
|
37
46
|
def parse_genre(input)
|
38
47
|
genres = []
|
data/metal_archives.gemspec
CHANGED
@@ -14,15 +14,15 @@ Gem::Specification.new do |gem|
|
|
14
14
|
gem.version = MetalArchives::VERSION
|
15
15
|
gem.license = "MIT"
|
16
16
|
|
17
|
-
gem.add_development_dependency 'byebug', '~> 9.0
|
18
|
-
gem.add_development_dependency 'rake', '~> 11.
|
19
|
-
gem.add_development_dependency 'rdoc', '~> 5.0
|
20
|
-
gem.add_development_dependency 'test-unit', '~> 3.
|
21
|
-
gem.add_development_dependency 'activesupport', '~> 5.0
|
17
|
+
gem.add_development_dependency 'byebug', '~> 9.0'
|
18
|
+
gem.add_development_dependency 'rake', '~> 11.0'
|
19
|
+
gem.add_development_dependency 'rdoc', '~> 5.0'
|
20
|
+
gem.add_development_dependency 'test-unit', '~> 3.0'
|
21
|
+
gem.add_development_dependency 'activesupport', '~> 5.0'
|
22
22
|
|
23
|
-
gem.add_dependency 'faraday', '~> 0.
|
24
|
-
gem.add_dependency 'faraday-http-cache', '~>
|
25
|
-
gem.add_dependency 'faraday_throttler', '~> 0.0.3'
|
26
|
-
gem.add_dependency 'nokogiri', '~> 1.6.8
|
27
|
-
gem.add_dependency 'countries', '~> 1.2.5'
|
23
|
+
gem.add_dependency 'faraday', '~> 0.9'
|
24
|
+
gem.add_dependency 'faraday-http-cache', '~> 2.0'
|
25
|
+
gem.add_dependency 'faraday_throttler', '~> 0.0.3'
|
26
|
+
gem.add_dependency 'nokogiri', '~> 1.6.8'
|
27
|
+
gem.add_dependency 'countries', '~> 1.2.5'
|
28
28
|
end
|
data/test/parser_test.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
require 'date'
|
4
|
+
require 'countries'
|
5
|
+
|
6
|
+
require 'metal_archives/parsers/parser.rb'
|
7
|
+
|
8
|
+
class ParserTest < Test::Unit::TestCase
|
9
|
+
def test_parse_country
|
10
|
+
assert_equal ISO3166::Country['US'], MetalArchives::Parsers::Parser.parse_country('United States')
|
11
|
+
assert_equal ISO3166::Country['DE'], MetalArchives::Parsers::Parser.parse_country('Germany')
|
12
|
+
assert_equal ISO3166::Country['BE'], MetalArchives::Parsers::Parser.parse_country('Belgium')
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_parse_genre
|
16
|
+
assert_equal ['Black', 'Death', 'Power'].sort,
|
17
|
+
MetalArchives::Parsers::Parser.parse_genre('Death, Power, Black').sort
|
18
|
+
|
19
|
+
assert_equal ['Black', 'Death', 'Power'].sort,
|
20
|
+
MetalArchives::Parsers::Parser.parse_genre('Death, Power, Black').sort
|
21
|
+
|
22
|
+
assert_equal ['Black', 'Death', 'Heavy', 'Power'].sort,
|
23
|
+
MetalArchives::Parsers::Parser.parse_genre('Death (early), Heavy/Power Metal, Black (later)').sort
|
24
|
+
|
25
|
+
assert_equal ['Death', 'Power'].sort,
|
26
|
+
MetalArchives::Parsers::Parser.parse_genre(' Death , Power Metal, Power, Power').sort
|
27
|
+
|
28
|
+
assert_equal ['Heavy Power', 'Speed Power'].sort,
|
29
|
+
MetalArchives::Parsers::Parser.parse_genre('Heavy/Speed Power Metal').sort
|
30
|
+
|
31
|
+
assert_equal ['Traditional Heavy', 'Traditional Power'].sort,
|
32
|
+
MetalArchives::Parsers::Parser.parse_genre('Traditional Heavy/Power Metal').sort
|
33
|
+
|
34
|
+
assert_equal ['Traditional Heavy', 'Traditional Power', 'Classical Heavy', 'Classical Power'].sort,
|
35
|
+
MetalArchives::Parsers::Parser.parse_genre('Traditional/Classical Heavy/Power Metal').sort
|
36
|
+
end
|
37
|
+
end
|