metal_archives 2.1.1 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yml +93 -0
  3. data/.gitignore +6 -6
  4. data/.overcommit.yml +35 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +69 -6
  7. data/CHANGELOG.md +29 -0
  8. data/Gemfile +1 -1
  9. data/LICENSE.md +17 -4
  10. data/README.md +65 -86
  11. data/Rakefile +8 -7
  12. data/bin/console +38 -0
  13. data/bin/setup +8 -0
  14. data/config/inflections.rb +7 -0
  15. data/config/initializers/.keep +0 -0
  16. data/docker-compose.yml +23 -0
  17. data/lib/metal_archives.rb +82 -25
  18. data/lib/metal_archives/cache/base.rb +40 -0
  19. data/lib/metal_archives/cache/memory.rb +68 -0
  20. data/lib/metal_archives/cache/null.rb +22 -0
  21. data/lib/metal_archives/cache/redis.rb +49 -0
  22. data/lib/metal_archives/{utils/collection.rb → collection.rb} +3 -5
  23. data/lib/metal_archives/configuration.rb +33 -50
  24. data/lib/metal_archives/{error.rb → errors.rb} +9 -1
  25. data/lib/metal_archives/http_client.rb +45 -44
  26. data/lib/metal_archives/models/artist.rb +90 -45
  27. data/lib/metal_archives/models/band.rb +80 -55
  28. data/lib/metal_archives/models/base.rb +218 -0
  29. data/lib/metal_archives/models/label.rb +14 -15
  30. data/lib/metal_archives/models/release.rb +349 -0
  31. data/lib/metal_archives/parsers/artist.rb +86 -50
  32. data/lib/metal_archives/parsers/band.rb +155 -88
  33. data/lib/metal_archives/parsers/base.rb +14 -0
  34. data/lib/metal_archives/parsers/country.rb +21 -0
  35. data/lib/metal_archives/parsers/date.rb +31 -0
  36. data/lib/metal_archives/parsers/genre.rb +67 -0
  37. data/lib/metal_archives/parsers/label.rb +39 -31
  38. data/lib/metal_archives/parsers/parser.rb +16 -63
  39. data/lib/metal_archives/parsers/release.rb +242 -0
  40. data/lib/metal_archives/parsers/year.rb +29 -0
  41. data/lib/metal_archives/version.rb +12 -1
  42. data/metal_archives.env.example +10 -0
  43. data/metal_archives.gemspec +43 -28
  44. data/nginx/default.conf +60 -0
  45. metadata +181 -72
  46. data/.travis.yml +0 -12
  47. data/lib/metal_archives/middleware/cache_check.rb +0 -20
  48. data/lib/metal_archives/middleware/encoding.rb +0 -16
  49. data/lib/metal_archives/middleware/headers.rb +0 -38
  50. data/lib/metal_archives/middleware/rewrite_endpoint.rb +0 -38
  51. data/lib/metal_archives/models/base_model.rb +0 -215
  52. data/lib/metal_archives/utils/lru_cache.rb +0 -61
  53. data/lib/metal_archives/utils/nil_date.rb +0 -99
  54. data/lib/metal_archives/utils/range.rb +0 -66
  55. data/spec/configuration_spec.rb +0 -96
  56. data/spec/factories/artist_factory.rb +0 -37
  57. data/spec/factories/band_factory.rb +0 -60
  58. data/spec/factories/nil_date_factory.rb +0 -9
  59. data/spec/factories/range_factory.rb +0 -8
  60. data/spec/models/artist_spec.rb +0 -138
  61. data/spec/models/band_spec.rb +0 -164
  62. data/spec/models/base_model_spec.rb +0 -219
  63. data/spec/parser_spec.rb +0 -19
  64. data/spec/spec_helper.rb +0 -111
  65. data/spec/support/factory_girl.rb +0 -5
  66. data/spec/support/metal_archives.rb +0 -33
  67. data/spec/utils/collection_spec.rb +0 -72
  68. data/spec/utils/lru_cache_spec.rb +0 -53
  69. data/spec/utils/nil_date_spec.rb +0 -156
  70. data/spec/utils/range_spec.rb +0 -62
@@ -1,10 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'json'
4
- require 'date'
5
- require 'countries'
6
-
7
- require 'metal_archives/middleware/rewrite_endpoint'
3
+ require "json"
4
+ require "date"
5
+ require "countries"
8
6
 
9
7
  module MetalArchives
10
8
  module Parsers
@@ -22,11 +20,9 @@ module MetalArchives
22
20
  # +Hash+
23
21
  #
24
22
  def map_params(query)
25
- params = {
26
- :query => query[:name] || ''
23
+ {
24
+ query: query[:name] || "",
27
25
  }
28
-
29
- params
30
26
  end
31
27
 
32
28
  ##
@@ -38,50 +34,56 @@ module MetalArchives
38
34
  # - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
39
35
  #
40
36
  def parse_html(response)
41
- props = {}
37
+ # Set default props
38
+ props = {
39
+ name: nil,
40
+ aliases: [],
41
+
42
+ date_of_birth: nil,
43
+ date_of_death: nil,
44
+ cause_of_death: nil,
45
+ gender: nil,
46
+
47
+ country: nil,
48
+ location: nil,
49
+
50
+ photo: nil,
51
+
52
+ bands: [],
53
+ }
54
+
42
55
  doc = Nokogiri::HTML response
43
56
 
44
57
  # Photo
45
- unless doc.css('.member_img').empty?
46
- photo_uri = URI doc.css('.member_img img').first.attr('src')
47
- props[:photo] = Middleware::RewriteEndpoint.rewrite photo_uri
58
+ unless doc.css(".member_img").empty?
59
+ photo_uri = URI doc.css(".member_img img").first.attr("src")
60
+ props[:photo] = rewrite(photo_uri)
48
61
  end
49
62
 
50
- doc.css('#member_info dl').each do |dl|
51
- dl.css('dt').each do |dt|
63
+ doc.css("#member_info dl").each do |dl|
64
+ dl.css("dt").each do |dt|
52
65
  content = sanitize(dt.next_element.content)
53
66
 
54
- next if content == 'N/A'
67
+ next if content == "N/A"
55
68
 
56
69
  case sanitize(dt.content)
57
- when 'Real/full name:'
70
+ when "Real/full name:"
58
71
  props[:name] = content
59
- when 'Age:'
60
- date = content.strip.gsub(/[0-9]* *\(born ([^\)]*)\)/, '\1')
61
- begin
62
- props[:date_of_birth] = NilDate.parse date
63
- rescue MetalArchives::Errors::ArgumentError => e
64
- dob = Date.parse date
65
- props[:date_of_birth] = NilDate.new dob.year, dob.month, dob.day
66
- end
67
- when 'R.I.P.:'
68
- begin
69
- dod = Date.parse content
70
- props[:date_of_death] = NilDate.new dod.year, dod.month, dod.day
71
- rescue ArgumentError => e
72
- props[:date_of_death] = NilDate.parse content
73
- end
74
- when 'Died of:'
72
+ when "Age:"
73
+ props[:date_of_birth] = Parsers::Date.parse(content.strip.gsub(/[0-9]* *\(born ([^)]*)\)/, '\1'))
74
+ when "R.I.P.:"
75
+ props[:date_of_death] = Parsers::Date.parse(content)
76
+ when "Died of:"
75
77
  props[:cause_of_death] = content
76
- when 'Place of origin:'
77
- props[:country] = ISO3166::Country.find_country_by_name(sanitize(dt.next_element.css('a').first.content))
78
- location = dt.next_element.xpath('text()').map(&:content).join('').strip.gsub(/[()]/, '')
78
+ when "Place of origin:"
79
+ props[:country] = Country.parse(sanitize(dt.next_element.css("a").first.content))
80
+ location = dt.next_element.xpath("text()").map(&:content).join.strip.gsub(/[()]/, "")
79
81
  props[:location] = location unless location.empty?
80
- when 'Gender:'
82
+ when "Gender:"
81
83
  case content
82
- when 'Male'
84
+ when "Male"
83
85
  props[:gender] = :male
84
- when 'Female'
86
+ when "Female"
85
87
  props[:gender] = :female
86
88
  else
87
89
  raise Errors::ParserError, "Unknown gender: #{content}"
@@ -93,12 +95,46 @@ module MetalArchives
93
95
  end
94
96
 
95
97
  # Aliases
96
- props[:aliases] = []
97
- alt = sanitize doc.css('.band_member_name').first.content
98
+ alt = sanitize doc.css(".band_member_name").first.content
98
99
  props[:aliases] << alt unless props[:name] == alt
99
100
 
101
+ # Active bands
102
+ proc = proc do |row|
103
+ link = row.css("h3 a")
104
+
105
+ name, id = nil
106
+
107
+ if link.any?
108
+ # Band name contains a link
109
+ id = Integer(link.attr("href").text.gsub(%r(^.*/([^/#]*)#.*$), '\1'))
110
+ else
111
+ # Band name does not contain a link
112
+ name = sanitize row.css("h3").text
113
+ end
114
+
115
+ r = row.css(".member_in_band_role")
116
+
117
+ range = Parsers::Year.parse(r.xpath("text()").map(&:content).join.strip.gsub(/[\n\r\t]/, "").gsub(/.*\((.*)\)/, '\1'))
118
+ role = sanitize r.css("strong").first.content
119
+
120
+ {
121
+ id: id,
122
+ name: name,
123
+ years_active: range,
124
+ role: role,
125
+ }.compact
126
+ end
127
+
128
+ doc.css("#artist_tab_active .member_in_band").each do |row|
129
+ props[:bands] << proc.call(row).merge(active: true)
130
+ end
131
+
132
+ doc.css("#artist_tab_past .member_in_band").each do |row|
133
+ props[:bands] << proc.call(row).merge(active: false)
134
+ end
135
+
100
136
  props
101
- rescue => e
137
+ rescue StandardError => e
102
138
  e.backtrace.each { |b| MetalArchives.config.logger.error b }
103
139
  raise Errors::ParserError, e
104
140
  end
@@ -119,25 +155,25 @@ module MetalArchives
119
155
  # Default to official links
120
156
  type = :official
121
157
 
122
- doc.css('#linksTablemain tr').each do |row|
123
- if row['id'].match /^header_/
124
- type = row['id'].gsub(/^header_/, '').downcase.to_sym
158
+ doc.css("#linksTablemain tr").each do |row|
159
+ if /^header_/.match?(row["id"])
160
+ type = row["id"].gsub(/^header_/, "").downcase.to_sym
125
161
  else
126
- a = row.css('td a').first
162
+ a = row.css("td a").first
127
163
 
128
164
  # No links have been added yet
129
165
  next unless a
130
166
 
131
167
  links << {
132
- :url => a['href'],
133
- :type => type,
134
- :title => a.content
168
+ url: a["href"],
169
+ type: type,
170
+ title: a.content,
135
171
  }
136
172
  end
137
173
  end
138
174
 
139
175
  links
140
- rescue => e
176
+ rescue StandardError => e
141
177
  e.backtrace.each { |b| MetalArchives.config.logger.error b }
142
178
  raise Errors::ParserError, e
143
179
  end
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
- require 'json'
3
- require 'date'
4
- require 'countries'
2
+
3
+ require "json"
4
+ require "date"
5
+ require "countries"
5
6
 
6
7
  module MetalArchives
7
8
  module Parsers
@@ -20,22 +21,22 @@ module MetalArchives
20
21
  #
21
22
  def map_params(query)
22
23
  params = {
23
- :bandName => query[:name] || '',
24
- :exactBandMatch => (!!query[:exact] ? 1 : 0),
25
- :genre => query[:genre] || '',
26
- :yearCreationFrom => (query[:year] && query[:year].begin ? query[:year].begin.year : '') || '',
27
- :yearCreationTo => (query[:year] && query[:year].end ? query[:year].end.year : '') || '',
28
- :bandNotes => query[:comment] || '',
29
- :status => map_status(query[:status]),
30
- :themes => query[:lyrical_themes] || '',
31
- :location => query[:location] || '',
32
- :bandLabelName => query[:label] || '',
33
- :indieLabelBand => (!!query[:independent] ? 1 : 0)
24
+ bandName: query[:name] || "",
25
+ exactBandMatch: (query[:exact] ? 1 : 0),
26
+ genre: query[:genre] || "",
27
+ yearCreationFrom: query[:year]&.begin || "",
28
+ yearCreationTo: query[:year]&.end || "",
29
+ bandNotes: query[:comment] || "",
30
+ status: map_status(query[:status]),
31
+ themes: query[:lyrical_themes] || "",
32
+ location: query[:location] || "",
33
+ bandLabelName: query[:label] || "",
34
+ indieLabelBand: (query[:independent] ? 1 : 0),
34
35
  }
35
36
 
36
37
  params[:country] = []
37
38
  Array(query[:country]).each do |country|
38
- params[:country] << (country.is_a?(ISO3166::Country) ? country.alpha2 : (country || ''))
39
+ params[:country] << (country.is_a?(ISO3166::Country) ? country.alpha2 : (country || ""))
39
40
  end
40
41
  params[:country] = params[:country].first if params[:country].size == 1
41
42
 
@@ -51,77 +52,120 @@ module MetalArchives
51
52
  # - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
52
53
  #
53
54
  def parse_html(response)
54
- props = {}
55
- doc = Nokogiri::HTML response
55
+ # Set default props
56
+ props = {
57
+ name: nil,
58
+ aliases: [],
59
+
60
+ logo: nil,
61
+ photo: nil,
62
+
63
+ country: nil,
64
+ location: nil,
65
+
66
+ status: nil,
67
+ date_formed: nil,
68
+ years_active: [],
69
+ independent: nil,
56
70
 
57
- props[:name] = sanitize doc.css('#band_info .band_name a').first.content
71
+ genres: [],
72
+ lyrical_themes: [],
58
73
 
59
- props[:aliases] = []
74
+ members: [],
75
+ }
76
+
77
+ doc = Nokogiri::HTML response
78
+
79
+ props[:name] = sanitize doc.css("#band_info .band_name a").first.content
60
80
 
61
81
  # Logo
62
- unless doc.css('.band_name_img').empty?
63
- logo_uri = URI doc.css('.band_name_img img').first.attr('src')
64
- props[:logo] = Middleware::RewriteEndpoint.rewrite logo_uri
82
+ unless doc.css(".band_name_img").empty?
83
+ logo_uri = URI doc.css(".band_name_img img").first.attr("src")
84
+ props[:logo] = rewrite(logo_uri)
65
85
  end
66
86
 
67
87
  # Photo
68
- unless doc.css('.band_img').empty?
69
- photo_uri = URI doc.css('.band_img img').first.attr('src')
70
- props[:photo] = Middleware::RewriteEndpoint.rewrite photo_uri
88
+ unless doc.css(".band_img").empty?
89
+ photo_uri = URI doc.css(".band_img img").first.attr("src")
90
+ props[:photo] = rewrite(photo_uri)
71
91
  end
72
92
 
73
- doc.css('#band_stats dl').each do |dl|
74
- dl.search('dt').each do |dt|
93
+ doc.css("#band_stats dl").each do |dl|
94
+ dl.search("dt").each do |dt|
75
95
  content = sanitize(dt.next_element.content)
76
96
 
77
- next if content == 'N/A'
97
+ next if content == "N/A"
78
98
 
79
99
  case dt.content
80
- when 'Country of origin:'
81
- props[:country] = ISO3166::Country.find_country_by_name sanitize(dt.next_element.css('a').first.content)
82
- when 'Location:'
100
+ when "Country of origin:"
101
+ props[:country] = Country.parse(sanitize(dt.next_element.css("a").first.content))
102
+ when "Location:"
83
103
  props[:location] = content
84
- when 'Status:'
85
- props[:status] = content.downcase.tr(' ', '_').to_sym
86
- when 'Formed in:'
87
- begin
88
- dof = Date.parse content
89
- props[:date_formed] = NilDate.new dof.year, dof.month, dof.day
90
- rescue ArgumentError => e
91
- props[:date_formed] = NilDate.parse content
92
- end
93
- when 'Genre:'
94
- props[:genres] = parse_genre content
95
- when 'Lyrical themes:'
96
- props[:lyrical_themes] = []
97
- content.split(',').each do |theme|
104
+ when "Status:"
105
+ props[:status] = content.downcase.tr(" -", "_").to_sym
106
+ when "Formed in:"
107
+ props[:date_formed] = Parsers::Date.parse(content)
108
+ when "Genre:"
109
+ props[:genres] = Parsers::Genre.parse(content)
110
+ when "Lyrical themes:"
111
+ content.split(",").each do |theme|
98
112
  t = theme.split.map(&:capitalize)
99
- t.delete '(early)'
100
- t.delete '(later)'
101
- props[:lyrical_themes] << t.join(' ')
113
+ t.delete "(early)"
114
+ t.delete "(later)"
115
+ props[:lyrical_themes] << t.join(" ")
102
116
  end
103
117
  when /(Current|Last) label:/
104
- props[:independent] = (content == 'Unsigned/independent')
105
- # TODO
106
- when 'Years active:'
107
- props[:date_active] = []
108
- content.split(',').each do |range|
118
+ props[:independent] = (content == "Unsigned/independent")
119
+ # TODO: label
120
+ when "Years active:"
121
+ content.split(",").each do |range|
109
122
  # Aliases
110
123
  range.scan(/\(as ([^)]*)\)/).each { |name| props[:aliases] << name.first }
111
124
  # Ranges
112
- r = range.gsub(/ *\(as ([^)]*)\) */, '').strip.split('-')
113
- date_start = (r.first == '?' ? nil : Date.new(r.first.to_i))
114
- date_end = (r.last ==( '?') || r.last == 'present' ? nil : Date.new(r.first.to_i))
115
- props[:date_active] << MetalArchives::Range.new(date_start, date_end)
125
+ props[:years_active] << Parsers::Year.parse(range.gsub(/ *\(as ([^)]*)\) */, ""))
116
126
  end
117
127
  else
118
- raise MetalArchives::Errors::ParserError, "Unknown token: #{dt.content}"
128
+ raise Errors::ParserError, "Unknown token: #{dt.content}"
119
129
  end
120
130
  end
121
131
  end
122
132
 
133
+ # Members
134
+ proc = proc do |row|
135
+ link = row.css("a")
136
+
137
+ if link.any?
138
+ # Artist name contains a link
139
+ id = Integer(link.attr("href").text.split("/").last)
140
+ name = sanitize link.text
141
+ else
142
+ # Artist name does not contain a link
143
+ name = sanitize row.css("h3").text
144
+ end
145
+
146
+ r = row.css("td").last.text
147
+ role, range = r.match(/(.*)\(([^(]*)\)/).captures
148
+
149
+ range = Parsers::Year.parse(range)
150
+
151
+ {
152
+ id: id,
153
+ name: name,
154
+ years_active: range,
155
+ role: sanitize(role),
156
+ }.compact
157
+ end
158
+
159
+ doc.css("#band_tab_members_current .lineupRow").each do |row|
160
+ props[:members] << proc.call(row).merge(current: true)
161
+ end
162
+
163
+ doc.css("#band_tab_members_past .lineupRow").each do |row|
164
+ props[:members] << proc.call(row).merge(current: false)
165
+ end
166
+
123
167
  props
124
- rescue => e
168
+ rescue StandardError => e
125
169
  e.backtrace.each { |b| MetalArchives.config.logger.error b }
126
170
  raise Errors::ParserError, e
127
171
  end
@@ -138,15 +182,16 @@ module MetalArchives
138
182
  similar = []
139
183
 
140
184
  doc = Nokogiri::HTML response
141
- doc.css('#artist_list tbody tr').each do |row|
185
+ doc.css("#artist_list tbody tr").each do |row|
142
186
  similar << {
143
- :band => MetalArchives::Band.new(:id => row.css('td a').first['href'].split('/').last.to_i),
144
- :score => row.css('td').last.content.strip
187
+ id: row.css("td a").first["href"].split("/").last.to_i,
188
+ score: row.css("td").last.content.strip,
145
189
  }
146
190
  end
147
191
 
148
192
  similar
149
- rescue => e
193
+ rescue StandardError => e
194
+ MetalArchives.config.logger e.message
150
195
  e.backtrace.each { |b| MetalArchives.config.logger.error b }
151
196
  raise Errors::ParserError, e
152
197
  end
@@ -163,49 +208,71 @@ module MetalArchives
163
208
  links = []
164
209
 
165
210
  doc = Nokogiri::HTML response
166
- doc.css('#linksTableOfficial td a').each do |a|
211
+ doc.css("#linksTableOfficial td a").each do |a|
167
212
  links << {
168
- :url => a['href'],
169
- :type => :official,
170
- :title => a.content
213
+ url: a["href"],
214
+ type: :official,
215
+ title: a.content,
171
216
  }
172
217
  end
173
- doc.css('#linksTableOfficial_merchandise td a').each do |a|
218
+ doc.css("#linksTableOfficial_merchandise td a").each do |a|
174
219
  links << {
175
- :url => a['href'],
176
- :type => :merchandise,
177
- :title => a.content
220
+ url: a["href"],
221
+ type: :merchandise,
222
+ title: a.content,
178
223
  }
179
224
  end
180
225
 
181
226
  links
182
- rescue => e
227
+ rescue StandardError => e
228
+ e.backtrace.each { |b| MetalArchives.config.logger.error b }
229
+ raise Errors::ParserError, e
230
+ end
231
+
232
+ ##
233
+ # Parse releases HTML page
234
+ #
235
+ # Returns +Array+
236
+ # [Raises]
237
+ # - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
238
+ #
239
+ def parse_releases_html(response)
240
+ releases = []
241
+
242
+ doc = Nokogiri::HTML response
243
+ doc.css("tbody tr td:first a").each do |a|
244
+ id = a["href"].split("/").last.to_i
245
+ releases << MetalArchives::Release.find(id)
246
+ end
247
+
248
+ releases
249
+ rescue StandardError => e
183
250
  e.backtrace.each { |b| MetalArchives.config.logger.error b }
184
251
  raise Errors::ParserError, e
185
252
  end
186
253
 
187
254
  private
188
255
 
189
- ##
190
- # Map MA band status
191
- #
192
- # Returns +Symbol+
193
- #
194
- # [Raises]
195
- # - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
196
- #
256
+ ##
257
+ # Map MA band status
258
+ #
259
+ # Returns +Symbol+
260
+ #
261
+ # [Raises]
262
+ # - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
263
+ #
197
264
  def map_status(status)
198
265
  s = {
199
- nil => '',
200
- :active => 'Active',
201
- :split_up => 'Split-up',
202
- :on_hold => 'On hold',
203
- :unknown => 'Unknown',
204
- :changed_name => 'Changed name',
205
- :disputed => 'Disputed'
266
+ nil => "",
267
+ :active => "Active",
268
+ :split_up => "Split-up",
269
+ :on_hold => "On hold",
270
+ :unknown => "Unknown",
271
+ :changed_name => "Changed name",
272
+ :disputed => "Disputed",
206
273
  }
207
274
 
208
- raise MetalArchives::Errors::ParserError, "Unknown status: #{status}" unless s[status]
275
+ raise Errors::ParserError, "Unknown status: #{status}" unless s[status]
209
276
 
210
277
  s[status]
211
278
  end