metal_archives 2.1.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yml +93 -0
  3. data/.gitignore +6 -6
  4. data/.overcommit.yml +35 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +69 -6
  7. data/CHANGELOG.md +29 -0
  8. data/Gemfile +1 -1
  9. data/LICENSE.md +17 -4
  10. data/README.md +65 -86
  11. data/Rakefile +8 -7
  12. data/bin/console +38 -0
  13. data/bin/setup +8 -0
  14. data/config/inflections.rb +7 -0
  15. data/config/initializers/.keep +0 -0
  16. data/docker-compose.yml +23 -0
  17. data/lib/metal_archives.rb +82 -25
  18. data/lib/metal_archives/cache/base.rb +40 -0
  19. data/lib/metal_archives/cache/memory.rb +68 -0
  20. data/lib/metal_archives/cache/null.rb +22 -0
  21. data/lib/metal_archives/cache/redis.rb +49 -0
  22. data/lib/metal_archives/{utils/collection.rb → collection.rb} +3 -5
  23. data/lib/metal_archives/configuration.rb +33 -50
  24. data/lib/metal_archives/{error.rb → errors.rb} +9 -1
  25. data/lib/metal_archives/http_client.rb +45 -44
  26. data/lib/metal_archives/models/artist.rb +90 -45
  27. data/lib/metal_archives/models/band.rb +80 -55
  28. data/lib/metal_archives/models/base.rb +218 -0
  29. data/lib/metal_archives/models/label.rb +14 -15
  30. data/lib/metal_archives/models/release.rb +349 -0
  31. data/lib/metal_archives/parsers/artist.rb +86 -50
  32. data/lib/metal_archives/parsers/band.rb +155 -88
  33. data/lib/metal_archives/parsers/base.rb +14 -0
  34. data/lib/metal_archives/parsers/country.rb +21 -0
  35. data/lib/metal_archives/parsers/date.rb +31 -0
  36. data/lib/metal_archives/parsers/genre.rb +67 -0
  37. data/lib/metal_archives/parsers/label.rb +39 -31
  38. data/lib/metal_archives/parsers/parser.rb +16 -63
  39. data/lib/metal_archives/parsers/release.rb +242 -0
  40. data/lib/metal_archives/parsers/year.rb +29 -0
  41. data/lib/metal_archives/version.rb +12 -1
  42. data/metal_archives.env.example +10 -0
  43. data/metal_archives.gemspec +43 -28
  44. data/nginx/default.conf +60 -0
  45. metadata +181 -72
  46. data/.travis.yml +0 -12
  47. data/lib/metal_archives/middleware/cache_check.rb +0 -20
  48. data/lib/metal_archives/middleware/encoding.rb +0 -16
  49. data/lib/metal_archives/middleware/headers.rb +0 -38
  50. data/lib/metal_archives/middleware/rewrite_endpoint.rb +0 -38
  51. data/lib/metal_archives/models/base_model.rb +0 -215
  52. data/lib/metal_archives/utils/lru_cache.rb +0 -61
  53. data/lib/metal_archives/utils/nil_date.rb +0 -99
  54. data/lib/metal_archives/utils/range.rb +0 -66
  55. data/spec/configuration_spec.rb +0 -96
  56. data/spec/factories/artist_factory.rb +0 -37
  57. data/spec/factories/band_factory.rb +0 -60
  58. data/spec/factories/nil_date_factory.rb +0 -9
  59. data/spec/factories/range_factory.rb +0 -8
  60. data/spec/models/artist_spec.rb +0 -138
  61. data/spec/models/band_spec.rb +0 -164
  62. data/spec/models/base_model_spec.rb +0 -219
  63. data/spec/parser_spec.rb +0 -19
  64. data/spec/spec_helper.rb +0 -111
  65. data/spec/support/factory_girl.rb +0 -5
  66. data/spec/support/metal_archives.rb +0 -33
  67. data/spec/utils/collection_spec.rb +0 -72
  68. data/spec/utils/lru_cache_spec.rb +0 -53
  69. data/spec/utils/nil_date_spec.rb +0 -156
  70. data/spec/utils/range_spec.rb +0 -62
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MetalArchives
4
+ module Parsers
5
+ ##
6
+ # Abstract base class
7
+ #
8
+ class Base
9
+ def self.parse(_input)
10
+ raise Errors::NotImplementedError, "method .parse not implemented"
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "countries"
4
+
5
+ module MetalArchives
6
+ module Parsers
7
+ ##
8
+ # Country parser
9
+ #
10
+ class Country < Base
11
+ ##
12
+ # Parse a country
13
+ #
14
+ # Returns +ISO3166::Country+
15
+ #
16
+ def self.parse(input)
17
+ ISO3166::Country.find_country_by_name(input)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MetalArchives
4
+ module Parsers
5
+ ##
6
+ # Date parser
7
+ #
8
+ class Date < Base
9
+ ##
10
+ # Parse a date
11
+ #
12
+ # Returns +Date+
13
+ #
14
+ def self.parse(input)
15
+ ::Date.parse(input)
16
+ rescue ::Date::Error
17
+ components = input
18
+ .split("-")
19
+ .map(&:to_i)
20
+ .reject(&:zero?)
21
+ .compact
22
+
23
+ return if components.empty?
24
+
25
+ ::Date.new(*components)
26
+ rescue TypeError
27
+ nil
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MetalArchives
4
+ module Parsers
5
+ ##
6
+ # Genre parser
7
+ #
8
+ class Genre < Base
9
+ SUFFIXES = %w((early) (later) metal).freeze
10
+
11
+ ##
12
+ # Opinionated parsing of genres
13
+ #
14
+ # Returns an +Array+ of +String+
15
+ #
16
+ # The following components are omitted:
17
+ # - Metal
18
+ # - (early)
19
+ # - (later)
20
+ #
21
+ # All genres are capitalized.
22
+ #
23
+ # For examples on how genres are parsed, refer to +gnre_spec.rb+
24
+ #
25
+ def self.parse(input)
26
+ genres = []
27
+ # Split fields
28
+ input.split(",").each do |genre|
29
+ ##
30
+ # Start with a single empty genre string. Split the genre by spaces
31
+ # and process each component. If a component does not have a slash,
32
+ # concatenate it to all genre strings present in +temp+. If it does
33
+ # have a slash present, duplicate all genre strings, and concatenate
34
+ # the first component (before the slash) to the first half, and the
35
+ # last component to the last half. +temp+ now has an array of genre
36
+ # combinations.
37
+ #
38
+ # 'Traditional Heavy/Power Metal' => ['Traditional Heavy', 'Traditional Power']
39
+ # 'Traditional/Classical Heavy/Power Metal' => [
40
+ # 'Traditional Heavy', 'Traditional Power',
41
+ # 'Classical Heavy', 'Classical Power']
42
+ #
43
+ temp = [""]
44
+
45
+ genre.downcase.split.reject { |g| SUFFIXES.include? g }.each do |g|
46
+ if g.include? "/"
47
+ # Duplicate all WIP genres
48
+ temp2 = temp.dup
49
+
50
+ # Assign first and last components to temp and temp2 respectively
51
+ split = g.split "/"
52
+ temp.map! { |t| t.empty? ? split.first.capitalize : "#{t.capitalize} #{split.first.capitalize}" }
53
+ temp2.map! { |t| t.empty? ? split.last.capitalize : "#{t.capitalize} #{split.last.capitalize}" }
54
+
55
+ # Add both genre trees
56
+ temp += temp2
57
+ else
58
+ temp.map! { |t| t.empty? ? g.capitalize : "#{t.capitalize} #{g.capitalize}" }
59
+ end
60
+ end
61
+ genres += temp
62
+ end
63
+ genres.uniq
64
+ end
65
+ end
66
+ end
67
+ end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'date'
4
- require 'nokogiri'
3
+ require "date"
4
+ require "nokogiri"
5
5
 
6
6
  module MetalArchives
7
7
  module Parsers
@@ -11,53 +11,61 @@ module MetalArchives
11
11
  class Label # :nodoc:
12
12
  class << self
13
13
  def find_endpoint(params)
14
- "#{MetalArchives.config.default_endpoint}labels/#{params[:name]}/#{params[:id]}"
14
+ "#{MetalArchives.config.endpoint}labels/#{params[:name]}/#{params[:id]}"
15
15
  end
16
16
 
17
17
  def parse(response)
18
- props = {}
18
+ # Set default props
19
+ props = {
20
+ name: nil,
21
+ contact: [],
22
+ address: nil,
23
+ country: nil,
24
+ phone: nil,
25
+ status: nil,
26
+ specialization: [],
27
+ date_founded: nil,
28
+
29
+ online_shopping: nil,
30
+ }
31
+
19
32
  doc = Nokogiri::HTML(response)
20
33
 
21
- props[:name] = doc.css('#label_info .label_name').first.content
34
+ props[:name] = doc.css("#label_info .label_name").first.content
22
35
 
23
- props[:contact] = []
24
- doc.css('#label_contact a').each do |contact|
36
+ doc.css("#label_contact a").each do |contact|
25
37
  props[:contact] << {
26
- :title => contact.content,
27
- :content => contact.attr(:href)
38
+ title: contact.content,
39
+ content: contact.attr(:href),
28
40
  }
29
41
  end
30
42
 
31
- doc.css('#label_info dl').each do |dl|
32
- dl.search('dt').each do |dt|
43
+ doc.css("#label_info dl").each do |dl|
44
+ dl.search("dt").each do |dt|
33
45
  content = sanitize(dt.next_element.content)
34
46
 
35
- next if content == 'N/A'
47
+ next if content == "N/A"
36
48
 
37
49
  case sanitize(dt.content)
38
- when 'Address:'
50
+ when "Address:"
39
51
  props[:address] = content
40
- when 'Country:'
41
- props[:country] = ParserHelper.parse_country css('a').first.content
42
- when 'Phone number:'
52
+ when "Country:"
53
+ props[:country] = Country.parse(css("a").first.content)
54
+ when "Phone number:"
43
55
  props[:phone] = content
44
- when 'Status:'
45
- props[:status] = content.downcase.tr(' ', '_').to_sym
46
- when 'Specialised in:'
47
- props[:specializations] = ParserHelper.parse_genre content
48
- when 'Founding date :'
49
- begin
50
- dof = Date.parse content
51
- props[:date_founded] = NilDate.new dof.year, dof.month, dof.day
52
- rescue ArgumentError => e
53
- props[:date_founded] = NilDate.parse content
54
- end
55
- when 'Sub-labels:'
56
+ when "Status:"
57
+ props[:status] = content.downcase.tr(" ", "_").to_sym
58
+ when "Specialised in:"
59
+ props[:specializations] = Parsers::Genre.parse(content)
60
+ when "Founding date :"
61
+ props[:date_founded] = Parsers::Date.parse(content)
62
+ when "Sub-labels:"
56
63
  # TODO
57
- when 'Online shopping:'
58
- if content == 'Yes'
64
+ when "Online shopping:"
65
+ case content
66
+ when "Yes"
59
67
  props[:online_shopping] = true
60
- elsif content == 'No'
68
+ when "No"
61
69
  props[:online_shopping] = false
62
70
  end
63
71
  else
@@ -1,88 +1,41 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'date'
4
- require 'countries'
3
+ require "date"
5
4
 
6
5
  module MetalArchives
7
- ##
8
- # Mapping layer from and to MA Web Service
9
- #
10
- module Parsers # :nodoc:
6
+ module Parsers
11
7
  ##
12
8
  # Parser base class
13
9
  #
14
10
  class Parser
15
11
  class << self
16
- ##
17
- # Parse a country
18
- #
19
- # Returns +ISO3166::Country+
20
- #
21
- def parse_country(input)
22
- ISO3166::Country.find_country_by_name input
23
- end
24
-
25
12
  ##
26
13
  # Sanitize a string
27
14
  #
28
15
  # Return +String+
29
16
  #
30
17
  def sanitize(input)
31
- input.gsub(/^"/, '').gsub(/"$/, '').strip
18
+ input
19
+ .gsub(/^"/, "")
20
+ .gsub(/"$/, "")
21
+ .gsub(/[[:space:]]/, " ")
22
+ .strip
32
23
  end
33
24
 
34
25
  ##
35
- # Opinionated parsing of genres
36
- #
37
- # Returns an +Array+ of +String+
38
- #
39
- # The following components are omitted:
40
- # - Metal
41
- # - (early)
42
- # - (later)
43
- #
44
- # All genres are capitalized.
26
+ # Rewrite a URL
45
27
  #
46
- # For examples on how genres are parsed, refer to +ParserTest#test_parse_genre+
28
+ # Return +URI+
47
29
  #
48
- def parse_genre(input)
49
- genres = []
50
- # Split fields
51
- input.split(',').each do |genre|
52
- ##
53
- # Start with a single empty genre string. Split the genre by spaces
54
- # and process each component. If a component does not have a slash,
55
- # concatenate it to all genre strings present in +temp+. If it does
56
- # have a slash present, duplicate all genre strings, and concatenate
57
- # the first component (before the slash) to the first half, and the
58
- # last component to the last half. +temp+ now has an array of genre
59
- # combinations.
60
- #
61
- # 'Traditional Heavy/Power Metal' => ['Traditional Heavy', 'Traditional Power']
62
- # 'Traditional/Classical Heavy/Power Metal' => [
63
- # 'Traditional Heavy', 'Traditional Power',
64
- # 'Classical Heavy', 'Classical Power']
65
- #
66
- temp = ['']
67
- genre.downcase.split.reject { |g| ['(early)', '(later)', 'metal'].include? g }.each do |g|
68
- if g.include? '/'
69
- # Duplicate all WIP genres
70
- temp2 = temp.dup
30
+ def rewrite(input)
31
+ return input unless MetalArchives.config.endpoint
71
32
 
72
- # Assign first and last components to temp and temp2 respectively
73
- split = g.split '/'
74
- temp.map! { |t| t.empty? ? split.first.capitalize : "#{t.capitalize} #{split.first.capitalize}" }
75
- temp2.map! { |t| t.empty? ? split.last.capitalize : "#{t.capitalize} #{split.last.capitalize}" }
33
+ endpoint = URI(MetalArchives.config.endpoint)
76
34
 
77
- # Add both genre trees
78
- temp += temp2
79
- else
80
- temp.map! { |t| t.empty? ? g.capitalize : "#{t.capitalize} #{g.capitalize}" }
81
- end
82
- end
83
- genres += temp
84
- end
85
- genres.uniq
35
+ URI(input)
36
+ .tap { |u| u.host = endpoint.host }
37
+ .tap { |u| u.scheme = endpoint.scheme }
38
+ .to_s
86
39
  end
87
40
  end
88
41
  end
@@ -0,0 +1,242 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "date"
5
+
6
+ module MetalArchives
7
+ module Parsers
8
+ ##
9
+ # Release parser
10
+ #
11
+ class Release < Parser # :nodoc:
12
+ class << self
13
+ TYPE_TO_QUERY = {
14
+ full_length: 1,
15
+ live: 2,
16
+ demo: 3,
17
+ single: 4,
18
+ ep: 5,
19
+ video: 6,
20
+ boxed_set: 7,
21
+ split: 8,
22
+ compilation: 10,
23
+ split_video: 12,
24
+ collaboration: 13,
25
+ }.freeze
26
+
27
+ TYPE_TO_SYM = {
28
+ "Full-length" => :full_length,
29
+ "Live album" => :live,
30
+ "Demo" => :demo,
31
+ "Single" => :single,
32
+ "EP" => :ep,
33
+ "Video" => :video,
34
+ "Boxed set" => :boxed_set,
35
+ "Split" => :split,
36
+ "Compilation" => :compilation,
37
+ "Split video" => :split_video,
38
+ "Collaboration" => :collaboration,
39
+ }.freeze
40
+
41
+ FORMAT_TO_QUERY = {
42
+ cd: "CD",
43
+ cassette: "Cassette",
44
+ vinyl: "Vinyl*",
45
+ vhs: "VHS",
46
+ dvd: "DVD",
47
+ "2dvd": "2DVD",
48
+ digital: "Digital",
49
+ blu_ray: "Blu-ray*",
50
+ other: "Other",
51
+ unknown: "Unknown",
52
+ }.freeze
53
+
54
+ FORMAT_TO_SYM = {
55
+ "CD" => :cd,
56
+ "Cassette" => :cassette,
57
+ "VHS" => :vhs,
58
+ "DVD" => :dvd,
59
+ "2DVD" => :"2dvd",
60
+ "Digital" => :digital,
61
+ "Other" => :other,
62
+ "Unknown" => :unknown,
63
+ }.freeze
64
+
65
+ ##
66
+ # Map attributes to MA attributes
67
+ #
68
+ # Returns +Hash+
69
+ #
70
+ # [+params+]
71
+ # +Hash+
72
+ #
73
+ def map_params(query)
74
+ {
75
+ bandName: query[:band_name] || "",
76
+ releaseTitle: query[:title] || "",
77
+ releaseYearFrom: query[:from_year] || "",
78
+ releaseMonthFrom: query[:from_month] || "",
79
+ releaseYearTo: query[:to_year] || "",
80
+ releaseMonthTo: query[:to_month] || "",
81
+ country: map_countries(query[:country]) || "",
82
+ location: query[:location] || "",
83
+ releaseLabelName: query[:label_name] || "",
84
+ releaseCatalogNumber: query[:catalog_id] || "",
85
+ releaseIdentifiers: query[:identifier] || "",
86
+ releaseRecordingInfo: query[:recording_info] || "",
87
+ releaseDescription: query[:version_description] || "",
88
+ releaseNotes: query[:notes] || "",
89
+ genre: query[:genre] || "",
90
+ releaseType: map_types(query[:types]),
91
+ releaseFormat: map_formats(query[:formats]),
92
+ }
93
+ end
94
+
95
+ ##
96
+ # Parse main HTML page
97
+ #
98
+ # Returns +Hash+
99
+ #
100
+ # [Raises]
101
+ # - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
102
+ #
103
+ def parse_html(response)
104
+ # Set default props
105
+ props = {
106
+ title: nil,
107
+ type: nil,
108
+ date_released: nil,
109
+ catalog_id: nil,
110
+ identifier: nil,
111
+ version_description: nil,
112
+ format: nil,
113
+ limitation: nil,
114
+ }
115
+
116
+ doc = Nokogiri::HTML response
117
+
118
+ props[:title] = sanitize doc.css("#album_info .album_name a").first.content
119
+
120
+ doc.css("#album_info dl").each do |dl|
121
+ dl.search("dt").each do |dt|
122
+ content = sanitize dt.next_element.content
123
+
124
+ next if content == "N/A"
125
+
126
+ case sanitize(dt.content)
127
+ when "Type:"
128
+ props[:type] = map_type content
129
+ when "Release date:"
130
+ props[:date_released] = Parsers::Date.parse(content)
131
+ when "Catalog ID:"
132
+ props[:catalog_id] = content
133
+ when "Identifier:"
134
+ props[:identifier] = content
135
+ when "Version desc.:"
136
+ props[:version_description] = content
137
+ when "Label:"
138
+ # TODO: label
139
+ when "Format:"
140
+ props[:format] = map_format content
141
+ when "Limitation:"
142
+ props[:limitation] = content.to_i
143
+ when "Reviews:"
144
+ next if content == "None yet"
145
+ # TODO: reviews
146
+ else
147
+ raise Errors::ParserError, "Unknown token: #{dt.content}"
148
+ end
149
+ end
150
+ end
151
+
152
+ props
153
+ rescue StandardError => e
154
+ e.backtrace.each { |b| MetalArchives.config.logger.error b }
155
+ raise Errors::ParserError, e
156
+ end
157
+
158
+ private
159
+
160
+ ##
161
+ # Map MA countries to query parameters
162
+ #
163
+ # Returns +Array+ of +ISO3166::Country+
164
+ #
165
+ # [+types+]
166
+ # +Array+ containing one or more +String+s
167
+ #
168
+ def map_countries(countries)
169
+ countries&.map(&:alpha2)
170
+ end
171
+
172
+ ##
173
+ # Map MA release type to query parameters
174
+ #
175
+ # Returns +Array+ of +Integer+
176
+ #
177
+ # [+types+]
178
+ # +Array+ containing one or more +Symbol+, see rdoc-ref:Release.type
179
+ #
180
+ def map_types(type_syms)
181
+ return unless type_syms
182
+
183
+ types = []
184
+ type_syms.each do |type|
185
+ raise Errors::ParserError, "Unknown type: #{type}" unless TYPE_TO_QUERY[type]
186
+
187
+ types << TYPE_TO_QUERY[type]
188
+ end
189
+
190
+ types
191
+ end
192
+
193
+ ##
194
+ # Map MA release type to +Symbol+
195
+ #
196
+ # Returns +Symbol+, see rdoc-ref:Release.type
197
+ #
198
+ def map_type(type)
199
+ raise Errors::ParserError, "Unknown type: #{type}" unless TYPE_TO_SYM[type]
200
+
201
+ TYPE_TO_SYM[type]
202
+ end
203
+
204
+ ##
205
+ # Map MA release format to query parameters
206
+ #
207
+ # Returns +Array+ of +Integer+
208
+ #
209
+ # [+types+]
210
+ # +Array+ containing one or more +Symbol+, see rdoc-ref:Release.type
211
+ #
212
+ def map_formats(format_syms)
213
+ return unless format_syms
214
+
215
+ formats = []
216
+ format_syms.each do |format|
217
+ raise Errors::ParserError, "Unknown format: #{format}" unless FORMAT_TO_QUERY[format]
218
+
219
+ formats << FORMAT_TO_QUERY[format]
220
+ end
221
+
222
+ formats
223
+ end
224
+
225
+ ##
226
+ # Map MA release format to +Symbol+
227
+ #
228
+ # Returns +Symbol+, see rdoc-ref:Release.format
229
+ #
230
+ def map_format(format)
231
+ return :cd if /CD/.match?(format)
232
+ return :vinyl if /[Vv]inyl/.match?(format)
233
+ return :blu_ray if /[Bb]lu.?[Rr]ay/.match?(format)
234
+
235
+ raise Errors::ParserError, "Unknown format: #{format}" unless FORMAT_TO_SYM[format]
236
+
237
+ FORMAT_TO_SYM[format]
238
+ end
239
+ end
240
+ end
241
+ end
242
+ end