metal_archives 0.8.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +59 -7
- data/.rspec +1 -0
- data/.rubocop.yml +14 -0
- data/.travis.yml +11 -0
- data/Gemfile +2 -0
- data/{LICENSE → LICENSE.md} +0 -0
- data/README.md +77 -9
- data/Rakefile +5 -3
- data/lib/metal_archives.rb +8 -0
- data/lib/metal_archives/configuration.rb +28 -7
- data/lib/metal_archives/error.rb +37 -30
- data/lib/metal_archives/http_client.rb +21 -42
- data/lib/metal_archives/middleware/headers.rb +38 -0
- data/lib/metal_archives/middleware/rewrite_endpoint.rb +38 -0
- data/lib/metal_archives/models/artist.rb +51 -65
- data/lib/metal_archives/models/band.rb +41 -39
- data/lib/metal_archives/models/base_model.rb +88 -59
- data/lib/metal_archives/models/label.rb +7 -6
- data/lib/metal_archives/parsers/artist.rb +110 -99
- data/lib/metal_archives/parsers/band.rb +168 -156
- data/lib/metal_archives/parsers/label.rb +54 -52
- data/lib/metal_archives/parsers/parser.rb +73 -71
- data/lib/metal_archives/utils/collection.rb +7 -1
- data/lib/metal_archives/utils/lru_cache.rb +11 -4
- data/lib/metal_archives/utils/nil_date.rb +54 -0
- data/lib/metal_archives/utils/range.rb +16 -8
- data/lib/metal_archives/version.rb +3 -1
- data/metal_archives.gemspec +21 -11
- data/spec/configuration_spec.rb +101 -0
- data/spec/factories/artist_factory.rb +37 -0
- data/spec/factories/band_factory.rb +60 -0
- data/spec/factories/nil_date_factory.rb +9 -0
- data/spec/factories/range_factory.rb +8 -0
- data/spec/models/artist_spec.rb +142 -0
- data/spec/models/band_spec.rb +179 -0
- data/spec/models/base_model_spec.rb +217 -0
- data/spec/parser_spec.rb +19 -0
- data/spec/spec_helper.rb +111 -0
- data/spec/support/factory_girl.rb +5 -0
- data/spec/support/metal_archives.rb +26 -0
- data/spec/utils/collection_spec.rb +72 -0
- data/spec/utils/lru_cache_spec.rb +53 -0
- data/spec/utils/nil_date_spec.rb +98 -0
- data/spec/utils/range_spec.rb +62 -0
- metadata +142 -57
- data/test/base_model_test.rb +0 -111
- data/test/configuration_test.rb +0 -57
- data/test/parser_test.rb +0 -37
- data/test/property/artist_property_test.rb +0 -43
- data/test/property/band_property_test.rb +0 -94
- data/test/query/artist_query_test.rb +0 -109
- data/test/query/band_query_test.rb +0 -152
- data/test/test_helper.rb +0 -25
- data/test/utils/collection_test.rb +0 -51
- data/test/utils/lru_cache_test.rb +0 -22
- data/test/utils/range_test.rb +0 -42
@@ -1,182 +1,194 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'json'
|
2
3
|
require 'date'
|
3
4
|
require 'countries'
|
4
5
|
|
5
6
|
module MetalArchives
|
6
|
-
module Parsers
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
7
|
+
module Parsers
|
8
|
+
##
|
9
|
+
# Band parser
|
10
|
+
#
|
11
|
+
class Band < Parser # :nodoc:
|
12
|
+
class << self
|
13
|
+
##
|
14
|
+
# Map attributes to MA attributes
|
15
|
+
#
|
16
|
+
# Returns +Hash+
|
17
|
+
#
|
18
|
+
# [+params+]
|
19
|
+
# +Hash+
|
20
|
+
#
|
21
|
+
def map_params(query)
|
22
|
+
params = {
|
23
|
+
:bandName => query[:name] || '',
|
24
|
+
:exactBandMatch => (!!query[:exact] ? 1 : 0),
|
25
|
+
:genre => query[:genre] || '',
|
26
|
+
:yearCreationFrom => (query[:year] && query[:year].begin ? query[:year].begin.year : '') || '',
|
27
|
+
:yearCreationTo => (query[:year] && query[:year].end ? query[:year].end.year : '') || '',
|
28
|
+
:bandNotes => query[:comment] || '',
|
29
|
+
:status => map_status(query[:status]),
|
30
|
+
:themes => query[:lyrical_themes] || '',
|
31
|
+
:location => query[:location] || '',
|
32
|
+
:bandLabelName => query[:label] || '',
|
33
|
+
:indieLabelBand => (!!query[:independent] ? 1 : 0)
|
34
|
+
}
|
35
|
+
|
36
|
+
params[:country] = []
|
37
|
+
Array(query[:country]).each do |country|
|
38
|
+
params[:country] << (country.is_a?(ISO3166::Country) ? country.alpha2 : (country || ''))
|
39
|
+
end
|
40
|
+
params[:country] = params[:country].first if params[:country].size == 1
|
41
|
+
|
42
|
+
params
|
38
43
|
end
|
39
|
-
params[:country] = params[:country].first if (params[:country].size == 1)
|
40
44
|
|
41
|
-
|
42
|
-
|
45
|
+
##
|
46
|
+
# Parse main HTML page
|
47
|
+
#
|
48
|
+
# Returns +Hash+
|
49
|
+
#
|
50
|
+
# [Raises]
|
51
|
+
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
52
|
+
#
|
53
|
+
def parse_html(response)
|
54
|
+
props = {}
|
55
|
+
doc = Nokogiri::HTML response
|
43
56
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
57
|
+
props[:name] = sanitize doc.css('#band_info .band_name a').first.content
|
58
|
+
|
59
|
+
props[:aliases] = []
|
60
|
+
|
61
|
+
# Logo
|
62
|
+
unless doc.css('.band_name_img').empty?
|
63
|
+
logo_uri = URI doc.css('.band_name_img img').first.attr('src')
|
64
|
+
props[:logo] = Middleware::RewriteEndpoint.rewrite logo_uri
|
65
|
+
end
|
66
|
+
|
67
|
+
# Photo
|
68
|
+
unless doc.css('.band_img').empty?
|
69
|
+
photo_uri = URI doc.css('.band_img img').first.attr('src')
|
70
|
+
props[:photo] = Middleware::RewriteEndpoint.rewrite photo_uri
|
71
|
+
end
|
72
|
+
|
73
|
+
doc.css('#band_stats dl').each do |dl|
|
74
|
+
dl.search('dt').each do |dt|
|
75
|
+
content = sanitize(dt.next_element.content)
|
76
|
+
|
77
|
+
next if content == 'N/A'
|
78
|
+
|
79
|
+
case dt.content
|
80
|
+
when 'Country of origin:'
|
81
|
+
props[:country] = ISO3166::Country.find_country_by_name sanitize(dt.next_element.css('a').first.content)
|
82
|
+
when 'Location:'
|
83
|
+
props[:location] = content
|
84
|
+
when 'Status:'
|
85
|
+
props[:status] = content.downcase.tr(' ', '_').to_sym
|
86
|
+
when 'Formed in:'
|
87
|
+
props[:date_formed] = Date.new content.to_i
|
88
|
+
when 'Genre:'
|
89
|
+
props[:genres] = parse_genre content
|
90
|
+
when 'Lyrical themes:'
|
91
|
+
props[:lyrical_themes] = []
|
92
|
+
content.split(',').each do |theme|
|
93
|
+
t = theme.split.map(&:capitalize)
|
94
|
+
t.delete '(early)'
|
95
|
+
t.delete '(later)'
|
96
|
+
props[:lyrical_themes] << t.join(' ')
|
97
|
+
end
|
98
|
+
when /(Current|Last) label:/
|
99
|
+
props[:independent] = (content == 'Unsigned/independent')
|
100
|
+
# TODO
|
101
|
+
when 'Years active:'
|
102
|
+
props[:date_active] = []
|
103
|
+
content.split(',').each do |range|
|
104
|
+
# Aliases
|
105
|
+
range.scan(/\(as ([^)]*)\)/).each { |name| props[:aliases] << name.first }
|
106
|
+
# Ranges
|
107
|
+
r = range.gsub(/ *\(as ([^)]*)\) */, '').strip.split('-')
|
108
|
+
date_start = (r.first == '?' ? nil : Date.new(r.first.to_i))
|
109
|
+
date_end = (r.last ==( '?') || r.last == 'present' ? nil : Date.new(r.first.to_i))
|
110
|
+
props[:date_active] << MetalArchives::Range.new(date_start, date_end)
|
111
|
+
end
|
112
|
+
else
|
113
|
+
raise MetalArchives::Errors::ParserError, "Unknown token: #{dt.content}"
|
100
114
|
end
|
101
|
-
else
|
102
|
-
raise MetalArchives::Errors::ParserError, "Unknown token: #{dt.content}"
|
103
115
|
end
|
104
116
|
end
|
117
|
+
|
118
|
+
props
|
119
|
+
rescue => e
|
120
|
+
e.backtrace.each { |b| MetalArchives.config.logger.error b }
|
121
|
+
raise Errors::ParserError, e
|
105
122
|
end
|
106
123
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
124
|
+
##
|
125
|
+
# Parse similar bands HTML page
|
126
|
+
#
|
127
|
+
# Returns +Hash+
|
128
|
+
#
|
129
|
+
# [Raises]
|
130
|
+
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
131
|
+
#
|
132
|
+
def parse_similar_bands_html(response)
|
133
|
+
similar = []
|
112
134
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
# Returns +Hash+
|
117
|
-
#
|
118
|
-
# [Raises]
|
119
|
-
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
120
|
-
#
|
121
|
-
def parse_similar_bands_html(response)
|
122
|
-
similar = []
|
123
|
-
|
124
|
-
doc = Nokogiri::HTML response
|
125
|
-
doc.css('#artist_list tbody tr').each do |row|
|
126
|
-
similar << {
|
135
|
+
doc = Nokogiri::HTML response
|
136
|
+
doc.css('#artist_list tbody tr').each do |row|
|
137
|
+
similar << {
|
127
138
|
:band => MetalArchives::Band.new(:id => row.css('td a').first['href'].split('/').last.to_i),
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
similar
|
133
|
-
rescue => e
|
134
|
-
e.backtrace.each { |b| MetalArchives::config.logger.error b }
|
135
|
-
raise Errors::ParserError, e
|
136
|
-
end
|
139
|
+
:score => row.css('td').last.content.strip
|
140
|
+
}
|
141
|
+
end
|
137
142
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
#
|
143
|
-
# [Raises]
|
144
|
-
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
145
|
-
#
|
146
|
-
def parse_related_links_html(response)
|
147
|
-
links = []
|
148
|
-
|
149
|
-
doc = Nokogiri::HTML response
|
150
|
-
doc.css('#linksTableOfficial td a').each do |a|
|
151
|
-
links << {
|
152
|
-
:url => a['href'],
|
153
|
-
:type => :official,
|
154
|
-
:title => a.content
|
155
|
-
}
|
143
|
+
similar
|
144
|
+
rescue => e
|
145
|
+
e.backtrace.each { |b| MetalArchives.config.logger.error b }
|
146
|
+
raise Errors::ParserError, e
|
156
147
|
end
|
157
|
-
doc.css('#linksTableOfficial_merchandise td a').each do |a|
|
158
|
-
links << {
|
159
|
-
:url => a['href'],
|
160
|
-
:type => :merchandise,
|
161
|
-
:title => a.content
|
162
|
-
}
|
163
|
-
end
|
164
|
-
|
165
|
-
links
|
166
|
-
rescue => e
|
167
|
-
e.backtrace.each { |b| MetalArchives::config.logger.error b }
|
168
|
-
raise Errors::ParserError, e
|
169
|
-
end
|
170
148
|
|
171
|
-
private
|
172
149
|
##
|
173
|
-
#
|
150
|
+
# Parse related links HTML page
|
174
151
|
#
|
175
|
-
# Returns +
|
152
|
+
# Returns +Hash+
|
176
153
|
#
|
177
154
|
# [Raises]
|
178
155
|
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
179
156
|
#
|
157
|
+
def parse_related_links_html(response)
|
158
|
+
links = []
|
159
|
+
|
160
|
+
doc = Nokogiri::HTML response
|
161
|
+
doc.css('#linksTableOfficial td a').each do |a|
|
162
|
+
links << {
|
163
|
+
:url => a['href'],
|
164
|
+
:type => :official,
|
165
|
+
:title => a.content
|
166
|
+
}
|
167
|
+
end
|
168
|
+
doc.css('#linksTableOfficial_merchandise td a').each do |a|
|
169
|
+
links << {
|
170
|
+
:url => a['href'],
|
171
|
+
:type => :merchandise,
|
172
|
+
:title => a.content
|
173
|
+
}
|
174
|
+
end
|
175
|
+
|
176
|
+
links
|
177
|
+
rescue => e
|
178
|
+
e.backtrace.each { |b| MetalArchives.config.logger.error b }
|
179
|
+
raise Errors::ParserError, e
|
180
|
+
end
|
181
|
+
|
182
|
+
private
|
183
|
+
|
184
|
+
##
|
185
|
+
# Map MA band status
|
186
|
+
#
|
187
|
+
# Returns +Symbol+
|
188
|
+
#
|
189
|
+
# [Raises]
|
190
|
+
# - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
|
191
|
+
#
|
180
192
|
def map_status(status)
|
181
193
|
s = {
|
182
194
|
nil => '',
|
@@ -192,7 +204,7 @@ module Parsers
|
|
192
204
|
|
193
205
|
s[status]
|
194
206
|
end
|
207
|
+
end
|
195
208
|
end
|
196
209
|
end
|
197
210
|
end
|
198
|
-
end
|
@@ -1,68 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'date'
|
2
4
|
require 'nokogiri'
|
3
5
|
|
4
6
|
module MetalArchives
|
5
|
-
module Parsers
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
7
|
+
module Parsers
|
8
|
+
##
|
9
|
+
# Label parser
|
10
|
+
#
|
11
|
+
class Label # :nodoc:
|
12
|
+
class << self
|
13
|
+
def find_endpoint(params)
|
14
|
+
"#{MetalArchives.config.default_endpoint}labels/#{params[:name]}/#{params[:id]}"
|
15
|
+
end
|
14
16
|
|
15
|
-
|
16
|
-
|
17
|
-
|
17
|
+
def parse(response)
|
18
|
+
props = {}
|
19
|
+
doc = Nokogiri::HTML(response)
|
18
20
|
|
19
|
-
|
21
|
+
props[:name] = doc.css('#label_info .label_name').first.content
|
20
22
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
23
|
+
props[:contact] = []
|
24
|
+
doc.css('#label_contact a').each do |contact|
|
25
|
+
props[:contact] << {
|
26
|
+
:title => contact.content,
|
27
|
+
:content => contact.attr(:href)
|
28
|
+
}
|
29
|
+
end
|
28
30
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
31
|
+
doc.css('#label_info dl').each do |dl|
|
32
|
+
dl.search('dt').each do |dt|
|
33
|
+
case dt.content
|
34
|
+
when 'Address:'
|
35
|
+
break if dt.next_element.content == 'N/A'
|
36
|
+
props[:address] = dt.next_element.content
|
37
|
+
when 'Country:'
|
38
|
+
break if dt.next_element.content == 'N/A'
|
39
|
+
props[:country] = ParserHelper.parse_country dt.next_element.css('a').first.content
|
40
|
+
when 'Phone number:'
|
41
|
+
break if dt.next_element.content == 'N/A'
|
42
|
+
props[:phone] = dt.next_element.content
|
43
|
+
when 'Status:'
|
44
|
+
props[:status] = dt.next_element.content.downcase.tr(' ', '_').to_sym
|
45
|
+
when 'Specialised in:'
|
46
|
+
break if dt.next_element.content == 'N/A'
|
47
|
+
props[:specializations] = ParserHelper.parse_genre dt.next_element.content
|
48
|
+
when 'Founding date :'
|
49
|
+
break if dt.next_element.content == 'N/A'
|
50
|
+
props[:date_founded] = Date.new dt.next_element.content.to_i
|
51
|
+
when 'Sub-labels:'
|
52
|
+
# TODO
|
53
|
+
when 'Online shopping:'
|
54
|
+
if dt.next_element.content == 'Yes'
|
55
|
+
props[:online_shopping] = true
|
56
|
+
elsif dt.next_element.content == 'No'
|
57
|
+
props[:online_shopping] = false
|
58
|
+
end
|
59
|
+
else
|
60
|
+
raise "Unknown token: #{dt.content}"
|
56
61
|
end
|
57
|
-
else
|
58
|
-
raise "Unknown token: #{dt.content}"
|
59
62
|
end
|
60
63
|
end
|
61
|
-
end
|
62
64
|
|
63
|
-
|
65
|
+
props
|
66
|
+
end
|
64
67
|
end
|
65
68
|
end
|
66
69
|
end
|
67
70
|
end
|
68
|
-
end
|