metal_archives 2.2.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yml +93 -0
  3. data/.gitignore +6 -6
  4. data/.overcommit.yml +35 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +66 -6
  7. data/CHANGELOG.md +33 -0
  8. data/Gemfile +1 -1
  9. data/LICENSE.md +17 -4
  10. data/README.md +65 -86
  11. data/Rakefile +8 -7
  12. data/bin/console +38 -0
  13. data/bin/setup +8 -0
  14. data/config/inflections.rb +7 -0
  15. data/config/initializers/.keep +0 -0
  16. data/docker-compose.yml +23 -0
  17. data/lib/metal_archives.rb +82 -27
  18. data/lib/metal_archives/cache/base.rb +40 -0
  19. data/lib/metal_archives/cache/memory.rb +68 -0
  20. data/lib/metal_archives/cache/null.rb +22 -0
  21. data/lib/metal_archives/cache/redis.rb +49 -0
  22. data/lib/metal_archives/{utils/collection.rb → collection.rb} +3 -5
  23. data/lib/metal_archives/configuration.rb +33 -50
  24. data/lib/metal_archives/{error.rb → errors.rb} +9 -1
  25. data/lib/metal_archives/http_client.rb +45 -44
  26. data/lib/metal_archives/models/artist.rb +90 -45
  27. data/lib/metal_archives/models/band.rb +77 -52
  28. data/lib/metal_archives/models/base.rb +225 -0
  29. data/lib/metal_archives/models/label.rb +14 -15
  30. data/lib/metal_archives/models/release.rb +25 -29
  31. data/lib/metal_archives/parsers/artist.rb +86 -50
  32. data/lib/metal_archives/parsers/band.rb +155 -88
  33. data/lib/metal_archives/parsers/base.rb +14 -0
  34. data/lib/metal_archives/parsers/country.rb +21 -0
  35. data/lib/metal_archives/parsers/date.rb +31 -0
  36. data/lib/metal_archives/parsers/genre.rb +67 -0
  37. data/lib/metal_archives/parsers/label.rb +39 -31
  38. data/lib/metal_archives/parsers/parser.rb +18 -63
  39. data/lib/metal_archives/parsers/release.rb +98 -89
  40. data/lib/metal_archives/parsers/year.rb +31 -0
  41. data/lib/metal_archives/version.rb +12 -1
  42. data/metal_archives.env.example +10 -0
  43. data/metal_archives.gemspec +43 -28
  44. data/nginx/default.conf +60 -0
  45. metadata +179 -74
  46. data/.travis.yml +0 -12
  47. data/lib/metal_archives/middleware/cache_check.rb +0 -20
  48. data/lib/metal_archives/middleware/encoding.rb +0 -16
  49. data/lib/metal_archives/middleware/headers.rb +0 -38
  50. data/lib/metal_archives/middleware/rewrite_endpoint.rb +0 -38
  51. data/lib/metal_archives/models/base_model.rb +0 -215
  52. data/lib/metal_archives/utils/lru_cache.rb +0 -61
  53. data/lib/metal_archives/utils/nil_date.rb +0 -99
  54. data/lib/metal_archives/utils/range.rb +0 -66
  55. data/spec/configuration_spec.rb +0 -96
  56. data/spec/factories/artist_factory.rb +0 -37
  57. data/spec/factories/band_factory.rb +0 -60
  58. data/spec/factories/nil_date_factory.rb +0 -9
  59. data/spec/factories/range_factory.rb +0 -8
  60. data/spec/models/artist_spec.rb +0 -138
  61. data/spec/models/band_spec.rb +0 -164
  62. data/spec/models/base_model_spec.rb +0 -219
  63. data/spec/models/release_spec.rb +0 -133
  64. data/spec/parser_spec.rb +0 -19
  65. data/spec/spec_helper.rb +0 -111
  66. data/spec/support/factory_girl.rb +0 -5
  67. data/spec/support/metal_archives.rb +0 -33
  68. data/spec/utils/collection_spec.rb +0 -72
  69. data/spec/utils/lru_cache_spec.rb +0 -53
  70. data/spec/utils/nil_date_spec.rb +0 -156
  71. data/spec/utils/range_spec.rb +0 -62
@@ -1,10 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'json'
4
- require 'date'
5
- require 'countries'
6
-
7
- require 'metal_archives/middleware/rewrite_endpoint'
3
+ require "json"
4
+ require "date"
5
+ require "countries"
8
6
 
9
7
  module MetalArchives
10
8
  module Parsers
@@ -22,11 +20,9 @@ module MetalArchives
22
20
  # +Hash+
23
21
  #
24
22
  def map_params(query)
25
- params = {
26
- :query => query[:name] || ''
23
+ {
24
+ query: query[:name] || "",
27
25
  }
28
-
29
- params
30
26
  end
31
27
 
32
28
  ##
@@ -38,50 +34,56 @@ module MetalArchives
38
34
  # - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
39
35
  #
40
36
  def parse_html(response)
41
- props = {}
37
+ # Set default props
38
+ props = {
39
+ name: nil,
40
+ aliases: [],
41
+
42
+ date_of_birth: nil,
43
+ date_of_death: nil,
44
+ cause_of_death: nil,
45
+ gender: nil,
46
+
47
+ country: nil,
48
+ location: nil,
49
+
50
+ photo: nil,
51
+
52
+ bands: [],
53
+ }
54
+
42
55
  doc = Nokogiri::HTML response
43
56
 
44
57
  # Photo
45
- unless doc.css('.member_img').empty?
46
- photo_uri = URI doc.css('.member_img img').first.attr('src')
47
- props[:photo] = Middleware::RewriteEndpoint.rewrite photo_uri
58
+ unless doc.css(".member_img").empty?
59
+ photo_uri = URI doc.css(".member_img img").first.attr("src")
60
+ props[:photo] = rewrite(photo_uri)
48
61
  end
49
62
 
50
- doc.css('#member_info dl').each do |dl|
51
- dl.css('dt').each do |dt|
63
+ doc.css("#member_info dl").each do |dl|
64
+ dl.css("dt").each do |dt|
52
65
  content = sanitize(dt.next_element.content)
53
66
 
54
- next if content == 'N/A'
67
+ next if content == "N/A"
55
68
 
56
69
  case sanitize(dt.content)
57
- when 'Real/full name:'
70
+ when "Real/full name:"
58
71
  props[:name] = content
59
- when 'Age:'
60
- date = content.strip.gsub(/[0-9]* *\(born ([^\)]*)\)/, '\1')
61
- begin
62
- props[:date_of_birth] = NilDate.parse date
63
- rescue MetalArchives::Errors::ArgumentError => e
64
- dob = Date.parse date
65
- props[:date_of_birth] = NilDate.new dob.year, dob.month, dob.day
66
- end
67
- when 'R.I.P.:'
68
- begin
69
- dod = Date.parse content
70
- props[:date_of_death] = NilDate.new dod.year, dod.month, dod.day
71
- rescue ArgumentError => e
72
- props[:date_of_death] = NilDate.parse content
73
- end
74
- when 'Died of:'
72
+ when "Age:"
73
+ props[:date_of_birth] = Parsers::Date.parse(content.strip.gsub(/[0-9]* *\(born ([^)]*)\)/, '\1'))
74
+ when "R.I.P.:"
75
+ props[:date_of_death] = Parsers::Date.parse(content)
76
+ when "Died of:"
75
77
  props[:cause_of_death] = content
76
- when 'Place of origin:'
77
- props[:country] = ISO3166::Country.find_country_by_name(sanitize(dt.next_element.css('a').first.content))
78
- location = dt.next_element.xpath('text()').map(&:content).join('').strip.gsub(/[()]/, '')
78
+ when "Place of origin:"
79
+ props[:country] = Country.parse(sanitize(dt.next_element.css("a").first.content))
80
+ location = dt.next_element.xpath("text()").map(&:content).join.strip.gsub(/[()]/, "")
79
81
  props[:location] = location unless location.empty?
80
- when 'Gender:'
82
+ when "Gender:"
81
83
  case content
82
- when 'Male'
84
+ when "Male"
83
85
  props[:gender] = :male
84
- when 'Female'
86
+ when "Female"
85
87
  props[:gender] = :female
86
88
  else
87
89
  raise Errors::ParserError, "Unknown gender: #{content}"
@@ -93,12 +95,46 @@ module MetalArchives
93
95
  end
94
96
 
95
97
  # Aliases
96
- props[:aliases] = []
97
- alt = sanitize doc.css('.band_member_name').first.content
98
+ alt = sanitize doc.css(".band_member_name").first.content
98
99
  props[:aliases] << alt unless props[:name] == alt
99
100
 
101
+ # Active bands
102
+ proc = proc do |row|
103
+ link = row.css("h3 a")
104
+
105
+ name, id = nil
106
+
107
+ if link.any?
108
+ # Band name contains a link
109
+ id = Integer(link.attr("href").text.gsub(%r(^.*/([^/#]*)#.*$), '\1'))
110
+ else
111
+ # Band name does not contain a link
112
+ name = sanitize row.css("h3").text
113
+ end
114
+
115
+ r = row.css(".member_in_band_role")
116
+
117
+ range = Parsers::Year.parse(r.xpath("text()").map(&:content).join.strip.gsub(/[\n\r\t]/, "").gsub(/.*\((.*)\)/, '\1'))
118
+ role = sanitize r.css("strong")&.first&.content
119
+
120
+ {
121
+ id: id,
122
+ name: name,
123
+ years_active: range,
124
+ role: role,
125
+ }.compact
126
+ end
127
+
128
+ doc.css("#artist_tab_active .member_in_band").each do |row|
129
+ props[:bands] << proc.call(row).merge(active: true)
130
+ end
131
+
132
+ doc.css("#artist_tab_past .member_in_band").each do |row|
133
+ props[:bands] << proc.call(row).merge(active: false)
134
+ end
135
+
100
136
  props
101
- rescue => e
137
+ rescue StandardError => e
102
138
  e.backtrace.each { |b| MetalArchives.config.logger.error b }
103
139
  raise Errors::ParserError, e
104
140
  end
@@ -119,25 +155,25 @@ module MetalArchives
119
155
  # Default to official links
120
156
  type = :official
121
157
 
122
- doc.css('#linksTablemain tr').each do |row|
123
- if row['id'].match /^header_/
124
- type = row['id'].gsub(/^header_/, '').downcase.to_sym
158
+ doc.css("#linksTablemain tr").each do |row|
159
+ if /^header_/.match?(row["id"])
160
+ type = row["id"].gsub(/^header_/, "").downcase.to_sym
125
161
  else
126
- a = row.css('td a').first
162
+ a = row.css("td a").first
127
163
 
128
164
  # No links have been added yet
129
165
  next unless a
130
166
 
131
167
  links << {
132
- :url => a['href'],
133
- :type => type,
134
- :title => a.content
168
+ url: a["href"],
169
+ type: type,
170
+ title: a.content,
135
171
  }
136
172
  end
137
173
  end
138
174
 
139
175
  links
140
- rescue => e
176
+ rescue StandardError => e
141
177
  e.backtrace.each { |b| MetalArchives.config.logger.error b }
142
178
  raise Errors::ParserError, e
143
179
  end
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
- require 'json'
3
- require 'date'
4
- require 'countries'
2
+
3
+ require "json"
4
+ require "date"
5
+ require "countries"
5
6
 
6
7
  module MetalArchives
7
8
  module Parsers
@@ -20,22 +21,22 @@ module MetalArchives
20
21
  #
21
22
  def map_params(query)
22
23
  params = {
23
- :bandName => query[:name] || '',
24
- :exactBandMatch => (!!query[:exact] ? 1 : 0),
25
- :genre => query[:genre] || '',
26
- :yearCreationFrom => (query[:year] && query[:year].begin ? query[:year].begin.year : '') || '',
27
- :yearCreationTo => (query[:year] && query[:year].end ? query[:year].end.year : '') || '',
28
- :bandNotes => query[:comment] || '',
29
- :status => map_status(query[:status]),
30
- :themes => query[:lyrical_themes] || '',
31
- :location => query[:location] || '',
32
- :bandLabelName => query[:label] || '',
33
- :indieLabelBand => (!!query[:independent] ? 1 : 0)
24
+ bandName: query[:name] || "",
25
+ exactBandMatch: (query[:exact] ? 1 : 0),
26
+ genre: query[:genre] || "",
27
+ yearCreationFrom: query[:year]&.begin || "",
28
+ yearCreationTo: query[:year]&.end || "",
29
+ bandNotes: query[:comment] || "",
30
+ status: map_status(query[:status]),
31
+ themes: query[:lyrical_themes] || "",
32
+ location: query[:location] || "",
33
+ bandLabelName: query[:label] || "",
34
+ indieLabelBand: (query[:independent] ? 1 : 0),
34
35
  }
35
36
 
36
37
  params[:country] = []
37
38
  Array(query[:country]).each do |country|
38
- params[:country] << (country.is_a?(ISO3166::Country) ? country.alpha2 : (country || ''))
39
+ params[:country] << (country.is_a?(ISO3166::Country) ? country.alpha2 : (country || ""))
39
40
  end
40
41
  params[:country] = params[:country].first if params[:country].size == 1
41
42
 
@@ -51,77 +52,120 @@ module MetalArchives
51
52
  # - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
52
53
  #
53
54
  def parse_html(response)
54
- props = {}
55
- doc = Nokogiri::HTML response
55
+ # Set default props
56
+ props = {
57
+ name: nil,
58
+ aliases: [],
59
+
60
+ logo: nil,
61
+ photo: nil,
62
+
63
+ country: nil,
64
+ location: nil,
65
+
66
+ status: nil,
67
+ date_formed: nil,
68
+ years_active: [],
69
+ independent: nil,
56
70
 
57
- props[:name] = sanitize doc.css('#band_info .band_name a').first.content
71
+ genres: [],
72
+ lyrical_themes: [],
58
73
 
59
- props[:aliases] = []
74
+ members: [],
75
+ }
76
+
77
+ doc = Nokogiri::HTML response
78
+
79
+ props[:name] = sanitize doc.css("#band_info .band_name a").first.content
60
80
 
61
81
  # Logo
62
- unless doc.css('.band_name_img').empty?
63
- logo_uri = URI doc.css('.band_name_img img').first.attr('src')
64
- props[:logo] = Middleware::RewriteEndpoint.rewrite logo_uri
82
+ unless doc.css(".band_name_img").empty?
83
+ logo_uri = URI doc.css(".band_name_img img").first.attr("src")
84
+ props[:logo] = rewrite(logo_uri)
65
85
  end
66
86
 
67
87
  # Photo
68
- unless doc.css('.band_img').empty?
69
- photo_uri = URI doc.css('.band_img img').first.attr('src')
70
- props[:photo] = Middleware::RewriteEndpoint.rewrite photo_uri
88
+ unless doc.css(".band_img").empty?
89
+ photo_uri = URI doc.css(".band_img img").first.attr("src")
90
+ props[:photo] = rewrite(photo_uri)
71
91
  end
72
92
 
73
- doc.css('#band_stats dl').each do |dl|
74
- dl.search('dt').each do |dt|
93
+ doc.css("#band_stats dl").each do |dl|
94
+ dl.search("dt").each do |dt|
75
95
  content = sanitize(dt.next_element.content)
76
96
 
77
- next if content == 'N/A'
97
+ next if content == "N/A"
78
98
 
79
99
  case dt.content
80
- when 'Country of origin:'
81
- props[:country] = ISO3166::Country.find_country_by_name sanitize(dt.next_element.css('a').first.content)
82
- when 'Location:'
100
+ when "Country of origin:"
101
+ props[:country] = Country.parse(sanitize(dt.next_element.css("a").first.content))
102
+ when "Location:"
83
103
  props[:location] = content
84
- when 'Status:'
85
- props[:status] = content.downcase.tr(' ', '_').to_sym
86
- when 'Formed in:'
87
- begin
88
- dof = Date.parse content
89
- props[:date_formed] = NilDate.new dof.year, dof.month, dof.day
90
- rescue ArgumentError => e
91
- props[:date_formed] = NilDate.parse content
92
- end
93
- when 'Genre:'
94
- props[:genres] = parse_genre content
95
- when 'Lyrical themes:'
96
- props[:lyrical_themes] = []
97
- content.split(',').each do |theme|
104
+ when "Status:"
105
+ props[:status] = content.downcase.tr(" -", "_").to_sym
106
+ when "Formed in:"
107
+ props[:date_formed] = Parsers::Date.parse(content)
108
+ when "Genre:"
109
+ props[:genres] = Parsers::Genre.parse(content)
110
+ when "Lyrical themes:"
111
+ content.split(",").each do |theme|
98
112
  t = theme.split.map(&:capitalize)
99
- t.delete '(early)'
100
- t.delete '(later)'
101
- props[:lyrical_themes] << t.join(' ')
113
+ t.delete "(early)"
114
+ t.delete "(later)"
115
+ props[:lyrical_themes] << t.join(" ")
102
116
  end
103
117
  when /(Current|Last) label:/
104
- props[:independent] = (content == 'Unsigned/independent')
105
- # TODO
106
- when 'Years active:'
107
- props[:date_active] = []
108
- content.split(',').each do |range|
118
+ props[:independent] = (content == "Unsigned/independent")
119
+ # TODO: label
120
+ when "Years active:"
121
+ content.split(",").each do |range|
109
122
  # Aliases
110
123
  range.scan(/\(as ([^)]*)\)/).each { |name| props[:aliases] << name.first }
111
124
  # Ranges
112
- r = range.gsub(/ *\(as ([^)]*)\) */, '').strip.split('-')
113
- date_start = (r.first == '?' ? nil : Date.new(r.first.to_i))
114
- date_end = (r.last ==( '?') || r.last == 'present' ? nil : Date.new(r.first.to_i))
115
- props[:date_active] << MetalArchives::Range.new(date_start, date_end)
125
+ props[:years_active] << Parsers::Year.parse(range.gsub(/ *\(as ([^)]*)\) */, ""))
116
126
  end
117
127
  else
118
- raise MetalArchives::Errors::ParserError, "Unknown token: #{dt.content}"
128
+ raise Errors::ParserError, "Unknown token: #{dt.content}"
119
129
  end
120
130
  end
121
131
  end
122
132
 
133
+ # Members
134
+ proc = proc do |row|
135
+ link = row.css("a")
136
+
137
+ if link.any?
138
+ # Artist name contains a link
139
+ id = Integer(link.attr("href").text.split("/").last)
140
+ name = sanitize link.text
141
+ else
142
+ # Artist name does not contain a link
143
+ name = sanitize row.css("h3").text
144
+ end
145
+
146
+ r = row.css("td").last.text
147
+ role, range = r.match(/(.*)\(([^(]*)\)/)&.captures
148
+
149
+ range = Parsers::Year.parse(range)
150
+
151
+ {
152
+ id: id,
153
+ name: name,
154
+ years_active: range,
155
+ role: sanitize(role),
156
+ }.compact
157
+ end
158
+
159
+ doc.css("#band_tab_members_current .lineupRow").each do |row|
160
+ props[:members] << proc.call(row).merge(current: true)
161
+ end
162
+
163
+ doc.css("#band_tab_members_past .lineupRow").each do |row|
164
+ props[:members] << proc.call(row).merge(current: false)
165
+ end
166
+
123
167
  props
124
- rescue => e
168
+ rescue StandardError => e
125
169
  e.backtrace.each { |b| MetalArchives.config.logger.error b }
126
170
  raise Errors::ParserError, e
127
171
  end
@@ -138,15 +182,16 @@ module MetalArchives
138
182
  similar = []
139
183
 
140
184
  doc = Nokogiri::HTML response
141
- doc.css('#artist_list tbody tr').each do |row|
185
+ doc.css("#artist_list tbody tr").each do |row|
142
186
  similar << {
143
- :band => MetalArchives::Band.new(:id => row.css('td a').first['href'].split('/').last.to_i),
144
- :score => row.css('td').last.content.strip
187
+ id: row.css("td a").first["href"].split("/").last.to_i,
188
+ score: row.css("td").last.content.strip,
145
189
  }
146
190
  end
147
191
 
148
192
  similar
149
- rescue => e
193
+ rescue StandardError => e
194
+ MetalArchives.config.logger e.message
150
195
  e.backtrace.each { |b| MetalArchives.config.logger.error b }
151
196
  raise Errors::ParserError, e
152
197
  end
@@ -163,49 +208,71 @@ module MetalArchives
163
208
  links = []
164
209
 
165
210
  doc = Nokogiri::HTML response
166
- doc.css('#linksTableOfficial td a').each do |a|
211
+ doc.css("#linksTableOfficial td a").each do |a|
167
212
  links << {
168
- :url => a['href'],
169
- :type => :official,
170
- :title => a.content
213
+ url: a["href"],
214
+ type: :official,
215
+ title: a.content,
171
216
  }
172
217
  end
173
- doc.css('#linksTableOfficial_merchandise td a').each do |a|
218
+ doc.css("#linksTableOfficial_merchandise td a").each do |a|
174
219
  links << {
175
- :url => a['href'],
176
- :type => :merchandise,
177
- :title => a.content
220
+ url: a["href"],
221
+ type: :merchandise,
222
+ title: a.content,
178
223
  }
179
224
  end
180
225
 
181
226
  links
182
- rescue => e
227
+ rescue StandardError => e
228
+ e.backtrace.each { |b| MetalArchives.config.logger.error b }
229
+ raise Errors::ParserError, e
230
+ end
231
+
232
+ ##
233
+ # Parse releases HTML page
234
+ #
235
+ # Returns +Array+
236
+ # [Raises]
237
+ # - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
238
+ #
239
+ def parse_releases_html(response)
240
+ releases = []
241
+
242
+ doc = Nokogiri::HTML response
243
+ doc.css("tbody tr td:first a").each do |a|
244
+ id = a["href"].split("/").last.to_i
245
+ releases << MetalArchives::Release.find(id)
246
+ end
247
+
248
+ releases
249
+ rescue StandardError => e
183
250
  e.backtrace.each { |b| MetalArchives.config.logger.error b }
184
251
  raise Errors::ParserError, e
185
252
  end
186
253
 
187
254
  private
188
255
 
189
- ##
190
- # Map MA band status
191
- #
192
- # Returns +Symbol+
193
- #
194
- # [Raises]
195
- # - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
196
- #
256
+ ##
257
+ # Map MA band status
258
+ #
259
+ # Returns +Symbol+
260
+ #
261
+ # [Raises]
262
+ # - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
263
+ #
197
264
  def map_status(status)
198
265
  s = {
199
- nil => '',
200
- :active => 'Active',
201
- :split_up => 'Split-up',
202
- :on_hold => 'On hold',
203
- :unknown => 'Unknown',
204
- :changed_name => 'Changed name',
205
- :disputed => 'Disputed'
266
+ nil => "",
267
+ :active => "Active",
268
+ :split_up => "Split-up",
269
+ :on_hold => "On hold",
270
+ :unknown => "Unknown",
271
+ :changed_name => "Changed name",
272
+ :disputed => "Disputed",
206
273
  }
207
274
 
208
- raise MetalArchives::Errors::ParserError, "Unknown status: #{status}" unless s[status]
275
+ raise Errors::ParserError, "Unknown status: #{status}" unless s[status]
209
276
 
210
277
  s[status]
211
278
  end