metal_archives 0.8.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +59 -7
- data/.rspec +1 -0
- data/.rubocop.yml +14 -0
- data/.travis.yml +11 -0
- data/Gemfile +2 -0
- data/{LICENSE → LICENSE.md} +0 -0
- data/README.md +77 -9
- data/Rakefile +5 -3
- data/lib/metal_archives.rb +8 -0
- data/lib/metal_archives/configuration.rb +28 -7
- data/lib/metal_archives/error.rb +37 -30
- data/lib/metal_archives/http_client.rb +21 -42
- data/lib/metal_archives/middleware/headers.rb +38 -0
- data/lib/metal_archives/middleware/rewrite_endpoint.rb +38 -0
- data/lib/metal_archives/models/artist.rb +51 -65
- data/lib/metal_archives/models/band.rb +41 -39
- data/lib/metal_archives/models/base_model.rb +88 -59
- data/lib/metal_archives/models/label.rb +7 -6
- data/lib/metal_archives/parsers/artist.rb +110 -99
- data/lib/metal_archives/parsers/band.rb +168 -156
- data/lib/metal_archives/parsers/label.rb +54 -52
- data/lib/metal_archives/parsers/parser.rb +73 -71
- data/lib/metal_archives/utils/collection.rb +7 -1
- data/lib/metal_archives/utils/lru_cache.rb +11 -4
- data/lib/metal_archives/utils/nil_date.rb +54 -0
- data/lib/metal_archives/utils/range.rb +16 -8
- data/lib/metal_archives/version.rb +3 -1
- data/metal_archives.gemspec +21 -11
- data/spec/configuration_spec.rb +101 -0
- data/spec/factories/artist_factory.rb +37 -0
- data/spec/factories/band_factory.rb +60 -0
- data/spec/factories/nil_date_factory.rb +9 -0
- data/spec/factories/range_factory.rb +8 -0
- data/spec/models/artist_spec.rb +142 -0
- data/spec/models/band_spec.rb +179 -0
- data/spec/models/base_model_spec.rb +217 -0
- data/spec/parser_spec.rb +19 -0
- data/spec/spec_helper.rb +111 -0
- data/spec/support/factory_girl.rb +5 -0
- data/spec/support/metal_archives.rb +26 -0
- data/spec/utils/collection_spec.rb +72 -0
- data/spec/utils/lru_cache_spec.rb +53 -0
- data/spec/utils/nil_date_spec.rb +98 -0
- data/spec/utils/range_spec.rb +62 -0
- metadata +142 -57
- data/test/base_model_test.rb +0 -111
- data/test/configuration_test.rb +0 -57
- data/test/parser_test.rb +0 -37
- data/test/property/artist_property_test.rb +0 -43
- data/test/property/band_property_test.rb +0 -94
- data/test/query/artist_query_test.rb +0 -109
- data/test/query/band_query_test.rb +0 -152
- data/test/test_helper.rb +0 -25
- data/test/utils/collection_test.rb +0 -51
- data/test/utils/lru_cache_test.rb +0 -22
- data/test/utils/range_test.rb +0 -42
@@ -1,182 +1,194 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'json'
|
2
3
|
require 'date'
|
3
4
|
require 'countries'
|
4
5
|
|
5
6
|
module MetalArchives
|
6
|
-
module Parsers
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
7
|
+
module Parsers
|
8
|
+
##
|
9
|
+
# Band parser
|
10
|
+
#
|
11
|
+
class Band < Parser # :nodoc:
|
12
|
+
class << self
|
13
|
+
##
|
14
|
+
# Map attributes to MA attributes
|
15
|
+
#
|
16
|
+
# Returns +Hash+
|
17
|
+
#
|
18
|
+
# [+params+]
|
19
|
+
# +Hash+
|
20
|
+
#
|
21
|
+
def map_params(query)
|
22
|
+
params = {
|
23
|
+
:bandName => query[:name] || '',
|
24
|
+
:exactBandMatch => (!!query[:exact] ? 1 : 0),
|
25
|
+
:genre => query[:genre] || '',
|
26
|
+
:yearCreationFrom => (query[:year] && query[:year].begin ? query[:year].begin.year : '') || '',
|
27
|
+
:yearCreationTo => (query[:year] && query[:year].end ? query[:year].end.year : '') || '',
|
28
|
+
:bandNotes => query[:comment] || '',
|
29
|
+
:status => map_status(query[:status]),
|
30
|
+
:themes => query[:lyrical_themes] || '',
|
31
|
+
:location => query[:location] || '',
|
32
|
+
:bandLabelName => query[:label] || '',
|
33
|
+
:indieLabelBand => (!!query[:independent] ? 1 : 0)
|
34
|
+
}
|
35
|
+
|
36
|
+
params[:country] = []
|
37
|
+
Array(query[:country]).each do |country|
|
38
|
+
params[:country] << (country.is_a?(ISO3166::Country) ? country.alpha2 : (country || ''))
|
39
|
+
end
|
40
|
+
params[:country] = params[:country].first if params[:country].size == 1
|
41
|
+
|
42
|
+
params
|
38
43
|
end
|
39
|
-
params[:country] = params[:country].first if (params[:country].size == 1)
|
40
44
|
|
41
|
-
|
42
|
-
|
45
|
+
##
|
46
|
+
# Parse main HTML page
|
47
|
+
#
|
48
|
+
# Returns +Hash+
|
49
|
+
#
|
50
|
+
# [Raises]
|
51
|
+
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
52
|
+
#
|
53
|
+
def parse_html(response)
|
54
|
+
props = {}
|
55
|
+
doc = Nokogiri::HTML response
|
43
56
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
57
|
+
props[:name] = sanitize doc.css('#band_info .band_name a').first.content
|
58
|
+
|
59
|
+
props[:aliases] = []
|
60
|
+
|
61
|
+
# Logo
|
62
|
+
unless doc.css('.band_name_img').empty?
|
63
|
+
logo_uri = URI doc.css('.band_name_img img').first.attr('src')
|
64
|
+
props[:logo] = Middleware::RewriteEndpoint.rewrite logo_uri
|
65
|
+
end
|
66
|
+
|
67
|
+
# Photo
|
68
|
+
unless doc.css('.band_img').empty?
|
69
|
+
photo_uri = URI doc.css('.band_img img').first.attr('src')
|
70
|
+
props[:photo] = Middleware::RewriteEndpoint.rewrite photo_uri
|
71
|
+
end
|
72
|
+
|
73
|
+
doc.css('#band_stats dl').each do |dl|
|
74
|
+
dl.search('dt').each do |dt|
|
75
|
+
content = sanitize(dt.next_element.content)
|
76
|
+
|
77
|
+
next if content == 'N/A'
|
78
|
+
|
79
|
+
case dt.content
|
80
|
+
when 'Country of origin:'
|
81
|
+
props[:country] = ISO3166::Country.find_country_by_name sanitize(dt.next_element.css('a').first.content)
|
82
|
+
when 'Location:'
|
83
|
+
props[:location] = content
|
84
|
+
when 'Status:'
|
85
|
+
props[:status] = content.downcase.tr(' ', '_').to_sym
|
86
|
+
when 'Formed in:'
|
87
|
+
props[:date_formed] = Date.new content.to_i
|
88
|
+
when 'Genre:'
|
89
|
+
props[:genres] = parse_genre content
|
90
|
+
when 'Lyrical themes:'
|
91
|
+
props[:lyrical_themes] = []
|
92
|
+
content.split(',').each do |theme|
|
93
|
+
t = theme.split.map(&:capitalize)
|
94
|
+
t.delete '(early)'
|
95
|
+
t.delete '(later)'
|
96
|
+
props[:lyrical_themes] << t.join(' ')
|
97
|
+
end
|
98
|
+
when /(Current|Last) label:/
|
99
|
+
props[:independent] = (content == 'Unsigned/independent')
|
100
|
+
# TODO
|
101
|
+
when 'Years active:'
|
102
|
+
props[:date_active] = []
|
103
|
+
content.split(',').each do |range|
|
104
|
+
# Aliases
|
105
|
+
range.scan(/\(as ([^)]*)\)/).each { |name| props[:aliases] << name.first }
|
106
|
+
# Ranges
|
107
|
+
r = range.gsub(/ *\(as ([^)]*)\) */, '').strip.split('-')
|
108
|
+
date_start = (r.first == '?' ? nil : Date.new(r.first.to_i))
|
109
|
+
date_end = (r.last ==( '?') || r.last == 'present' ? nil : Date.new(r.first.to_i))
|
110
|
+
props[:date_active] << MetalArchives::Range.new(date_start, date_end)
|
111
|
+
end
|
112
|
+
else
|
113
|
+
raise MetalArchives::Errors::ParserError, "Unknown token: #{dt.content}"
|
100
114
|
end
|
101
|
-
else
|
102
|
-
raise MetalArchives::Errors::ParserError, "Unknown token: #{dt.content}"
|
103
115
|
end
|
104
116
|
end
|
117
|
+
|
118
|
+
props
|
119
|
+
rescue => e
|
120
|
+
e.backtrace.each { |b| MetalArchives.config.logger.error b }
|
121
|
+
raise Errors::ParserError, e
|
105
122
|
end
|
106
123
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
124
|
+
##
|
125
|
+
# Parse similar bands HTML page
|
126
|
+
#
|
127
|
+
# Returns +Hash+
|
128
|
+
#
|
129
|
+
# [Raises]
|
130
|
+
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
131
|
+
#
|
132
|
+
def parse_similar_bands_html(response)
|
133
|
+
similar = []
|
112
134
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
# Returns +Hash+
|
117
|
-
#
|
118
|
-
# [Raises]
|
119
|
-
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
120
|
-
#
|
121
|
-
def parse_similar_bands_html(response)
|
122
|
-
similar = []
|
123
|
-
|
124
|
-
doc = Nokogiri::HTML response
|
125
|
-
doc.css('#artist_list tbody tr').each do |row|
|
126
|
-
similar << {
|
135
|
+
doc = Nokogiri::HTML response
|
136
|
+
doc.css('#artist_list tbody tr').each do |row|
|
137
|
+
similar << {
|
127
138
|
:band => MetalArchives::Band.new(:id => row.css('td a').first['href'].split('/').last.to_i),
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
similar
|
133
|
-
rescue => e
|
134
|
-
e.backtrace.each { |b| MetalArchives::config.logger.error b }
|
135
|
-
raise Errors::ParserError, e
|
136
|
-
end
|
139
|
+
:score => row.css('td').last.content.strip
|
140
|
+
}
|
141
|
+
end
|
137
142
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
#
|
143
|
-
# [Raises]
|
144
|
-
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
145
|
-
#
|
146
|
-
def parse_related_links_html(response)
|
147
|
-
links = []
|
148
|
-
|
149
|
-
doc = Nokogiri::HTML response
|
150
|
-
doc.css('#linksTableOfficial td a').each do |a|
|
151
|
-
links << {
|
152
|
-
:url => a['href'],
|
153
|
-
:type => :official,
|
154
|
-
:title => a.content
|
155
|
-
}
|
143
|
+
similar
|
144
|
+
rescue => e
|
145
|
+
e.backtrace.each { |b| MetalArchives.config.logger.error b }
|
146
|
+
raise Errors::ParserError, e
|
156
147
|
end
|
157
|
-
doc.css('#linksTableOfficial_merchandise td a').each do |a|
|
158
|
-
links << {
|
159
|
-
:url => a['href'],
|
160
|
-
:type => :merchandise,
|
161
|
-
:title => a.content
|
162
|
-
}
|
163
|
-
end
|
164
|
-
|
165
|
-
links
|
166
|
-
rescue => e
|
167
|
-
e.backtrace.each { |b| MetalArchives::config.logger.error b }
|
168
|
-
raise Errors::ParserError, e
|
169
|
-
end
|
170
148
|
|
171
|
-
private
|
172
149
|
##
|
173
|
-
#
|
150
|
+
# Parse related links HTML page
|
174
151
|
#
|
175
|
-
# Returns +
|
152
|
+
# Returns +Hash+
|
176
153
|
#
|
177
154
|
# [Raises]
|
178
155
|
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
179
156
|
#
|
157
|
+
def parse_related_links_html(response)
|
158
|
+
links = []
|
159
|
+
|
160
|
+
doc = Nokogiri::HTML response
|
161
|
+
doc.css('#linksTableOfficial td a').each do |a|
|
162
|
+
links << {
|
163
|
+
:url => a['href'],
|
164
|
+
:type => :official,
|
165
|
+
:title => a.content
|
166
|
+
}
|
167
|
+
end
|
168
|
+
doc.css('#linksTableOfficial_merchandise td a').each do |a|
|
169
|
+
links << {
|
170
|
+
:url => a['href'],
|
171
|
+
:type => :merchandise,
|
172
|
+
:title => a.content
|
173
|
+
}
|
174
|
+
end
|
175
|
+
|
176
|
+
links
|
177
|
+
rescue => e
|
178
|
+
e.backtrace.each { |b| MetalArchives.config.logger.error b }
|
179
|
+
raise Errors::ParserError, e
|
180
|
+
end
|
181
|
+
|
182
|
+
private
|
183
|
+
|
184
|
+
##
|
185
|
+
# Map MA band status
|
186
|
+
#
|
187
|
+
# Returns +Symbol+
|
188
|
+
#
|
189
|
+
# [Raises]
|
190
|
+
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
191
|
+
#
|
180
192
|
def map_status(status)
|
181
193
|
s = {
|
182
194
|
nil => '',
|
@@ -192,7 +204,7 @@ module Parsers
|
|
192
204
|
|
193
205
|
s[status]
|
194
206
|
end
|
207
|
+
end
|
195
208
|
end
|
196
209
|
end
|
197
210
|
end
|
198
|
-
end
|
@@ -1,68 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'date'
|
2
4
|
require 'nokogiri'
|
3
5
|
|
4
6
|
module MetalArchives
|
5
|
-
module Parsers
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
7
|
+
module Parsers
|
8
|
+
##
|
9
|
+
# Label parser
|
10
|
+
#
|
11
|
+
class Label # :nodoc:
|
12
|
+
class << self
|
13
|
+
def find_endpoint(params)
|
14
|
+
"#{MetalArchives.config.default_endpoint}labels/#{params[:name]}/#{params[:id]}"
|
15
|
+
end
|
14
16
|
|
15
|
-
|
16
|
-
|
17
|
-
|
17
|
+
def parse(response)
|
18
|
+
props = {}
|
19
|
+
doc = Nokogiri::HTML(response)
|
18
20
|
|
19
|
-
|
21
|
+
props[:name] = doc.css('#label_info .label_name').first.content
|
20
22
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
23
|
+
props[:contact] = []
|
24
|
+
doc.css('#label_contact a').each do |contact|
|
25
|
+
props[:contact] << {
|
26
|
+
:title => contact.content,
|
27
|
+
:content => contact.attr(:href)
|
28
|
+
}
|
29
|
+
end
|
28
30
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
31
|
+
doc.css('#label_info dl').each do |dl|
|
32
|
+
dl.search('dt').each do |dt|
|
33
|
+
case dt.content
|
34
|
+
when 'Address:'
|
35
|
+
break if dt.next_element.content == 'N/A'
|
36
|
+
props[:address] = dt.next_element.content
|
37
|
+
when 'Country:'
|
38
|
+
break if dt.next_element.content == 'N/A'
|
39
|
+
props[:country] = ParserHelper.parse_country dt.next_element.css('a').first.content
|
40
|
+
when 'Phone number:'
|
41
|
+
break if dt.next_element.content == 'N/A'
|
42
|
+
props[:phone] = dt.next_element.content
|
43
|
+
when 'Status:'
|
44
|
+
props[:status] = dt.next_element.content.downcase.tr(' ', '_').to_sym
|
45
|
+
when 'Specialised in:'
|
46
|
+
break if dt.next_element.content == 'N/A'
|
47
|
+
props[:specializations] = ParserHelper.parse_genre dt.next_element.content
|
48
|
+
when 'Founding date :'
|
49
|
+
break if dt.next_element.content == 'N/A'
|
50
|
+
props[:date_founded] = Date.new dt.next_element.content.to_i
|
51
|
+
when 'Sub-labels:'
|
52
|
+
# TODO
|
53
|
+
when 'Online shopping:'
|
54
|
+
if dt.next_element.content == 'Yes'
|
55
|
+
props[:online_shopping] = true
|
56
|
+
elsif dt.next_element.content == 'No'
|
57
|
+
props[:online_shopping] = false
|
58
|
+
end
|
59
|
+
else
|
60
|
+
raise "Unknown token: #{dt.content}"
|
56
61
|
end
|
57
|
-
else
|
58
|
-
raise "Unknown token: #{dt.content}"
|
59
62
|
end
|
60
63
|
end
|
61
|
-
end
|
62
64
|
|
63
|
-
|
65
|
+
props
|
66
|
+
end
|
64
67
|
end
|
65
68
|
end
|
66
69
|
end
|
67
70
|
end
|
68
|
-
end
|