metal_archives 2.1.1 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yml +93 -0
  3. data/.gitignore +6 -6
  4. data/.overcommit.yml +35 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +69 -6
  7. data/CHANGELOG.md +29 -0
  8. data/Gemfile +1 -1
  9. data/LICENSE.md +17 -4
  10. data/README.md +65 -86
  11. data/Rakefile +8 -7
  12. data/bin/console +38 -0
  13. data/bin/setup +8 -0
  14. data/config/inflections.rb +7 -0
  15. data/config/initializers/.keep +0 -0
  16. data/docker-compose.yml +23 -0
  17. data/lib/metal_archives.rb +82 -25
  18. data/lib/metal_archives/cache/base.rb +40 -0
  19. data/lib/metal_archives/cache/memory.rb +68 -0
  20. data/lib/metal_archives/cache/null.rb +22 -0
  21. data/lib/metal_archives/cache/redis.rb +49 -0
  22. data/lib/metal_archives/{utils/collection.rb → collection.rb} +3 -5
  23. data/lib/metal_archives/configuration.rb +33 -50
  24. data/lib/metal_archives/{error.rb → errors.rb} +9 -1
  25. data/lib/metal_archives/http_client.rb +45 -44
  26. data/lib/metal_archives/models/artist.rb +90 -45
  27. data/lib/metal_archives/models/band.rb +80 -55
  28. data/lib/metal_archives/models/base.rb +218 -0
  29. data/lib/metal_archives/models/label.rb +14 -15
  30. data/lib/metal_archives/models/release.rb +349 -0
  31. data/lib/metal_archives/parsers/artist.rb +86 -50
  32. data/lib/metal_archives/parsers/band.rb +155 -88
  33. data/lib/metal_archives/parsers/base.rb +14 -0
  34. data/lib/metal_archives/parsers/country.rb +21 -0
  35. data/lib/metal_archives/parsers/date.rb +31 -0
  36. data/lib/metal_archives/parsers/genre.rb +67 -0
  37. data/lib/metal_archives/parsers/label.rb +39 -31
  38. data/lib/metal_archives/parsers/parser.rb +16 -63
  39. data/lib/metal_archives/parsers/release.rb +242 -0
  40. data/lib/metal_archives/parsers/year.rb +29 -0
  41. data/lib/metal_archives/version.rb +12 -1
  42. data/metal_archives.env.example +10 -0
  43. data/metal_archives.gemspec +43 -28
  44. data/nginx/default.conf +60 -0
  45. metadata +181 -72
  46. data/.travis.yml +0 -12
  47. data/lib/metal_archives/middleware/cache_check.rb +0 -20
  48. data/lib/metal_archives/middleware/encoding.rb +0 -16
  49. data/lib/metal_archives/middleware/headers.rb +0 -38
  50. data/lib/metal_archives/middleware/rewrite_endpoint.rb +0 -38
  51. data/lib/metal_archives/models/base_model.rb +0 -215
  52. data/lib/metal_archives/utils/lru_cache.rb +0 -61
  53. data/lib/metal_archives/utils/nil_date.rb +0 -99
  54. data/lib/metal_archives/utils/range.rb +0 -66
  55. data/spec/configuration_spec.rb +0 -96
  56. data/spec/factories/artist_factory.rb +0 -37
  57. data/spec/factories/band_factory.rb +0 -60
  58. data/spec/factories/nil_date_factory.rb +0 -9
  59. data/spec/factories/range_factory.rb +0 -8
  60. data/spec/models/artist_spec.rb +0 -138
  61. data/spec/models/band_spec.rb +0 -164
  62. data/spec/models/base_model_spec.rb +0 -219
  63. data/spec/parser_spec.rb +0 -19
  64. data/spec/spec_helper.rb +0 -111
  65. data/spec/support/factory_girl.rb +0 -5
  66. data/spec/support/metal_archives.rb +0 -33
  67. data/spec/utils/collection_spec.rb +0 -72
  68. data/spec/utils/lru_cache_spec.rb +0 -53
  69. data/spec/utils/nil_date_spec.rb +0 -156
  70. data/spec/utils/range_spec.rb +0 -62
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MetalArchives
4
+ module Parsers
5
+ ##
6
+ # Abstract base class
7
+ #
8
+ class Base
9
+ def self.parse(_input)
10
+ raise Errors::NotImplementedError, "method .parse not implemented"
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "countries"
4
+
5
+ module MetalArchives
6
+ module Parsers
7
+ ##
8
+ # Country parser
9
+ #
10
+ class Country < Base
11
+ ##
12
+ # Parse a country
13
+ #
14
+ # Returns +ISO3166::Country+
15
+ #
16
+ def self.parse(input)
17
+ ISO3166::Country.find_country_by_name(input)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MetalArchives
4
+ module Parsers
5
+ ##
6
+ # Date parser
7
+ #
8
+ class Date < Base
9
+ ##
10
+ # Parse a date
11
+ #
12
+ # Returns +Date+
13
+ #
14
+ def self.parse(input)
15
+ ::Date.parse(input)
16
+ rescue ::Date::Error
17
+ components = input
18
+ .split("-")
19
+ .map(&:to_i)
20
+ .reject(&:zero?)
21
+ .compact
22
+
23
+ return if components.empty?
24
+
25
+ ::Date.new(*components)
26
+ rescue TypeError
27
+ nil
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MetalArchives
4
+ module Parsers
5
+ ##
6
+ # Genre parser
7
+ #
8
+ class Genre < Base
9
+ SUFFIXES = %w((early) (later) metal).freeze
10
+
11
+ ##
12
+ # Opinionated parsing of genres
13
+ #
14
+ # Returns an +Array+ of +String+
15
+ #
16
+ # The following components are omitted:
17
+ # - Metal
18
+ # - (early)
19
+ # - (later)
20
+ #
21
+ # All genres are capitalized.
22
+ #
23
+ # For examples on how genres are parsed, refer to +gnre_spec.rb+
24
+ #
25
+ def self.parse(input)
26
+ genres = []
27
+ # Split fields
28
+ input.split(",").each do |genre|
29
+ ##
30
+ # Start with a single empty genre string. Split the genre by spaces
31
+ # and process each component. If a component does not have a slash,
32
+ # concatenate it to all genre strings present in +temp+. If it does
33
+ # have a slash present, duplicate all genre strings, and concatenate
34
+ # the first component (before the slash) to the first half, and the
35
+ # last component to the last half. +temp+ now has an array of genre
36
+ # combinations.
37
+ #
38
+ # 'Traditional Heavy/Power Metal' => ['Traditional Heavy', 'Traditional Power']
39
+ # 'Traditional/Classical Heavy/Power Metal' => [
40
+ # 'Traditional Heavy', 'Traditional Power',
41
+ # 'Classical Heavy', 'Classical Power']
42
+ #
43
+ temp = [""]
44
+
45
+ genre.downcase.split.reject { |g| SUFFIXES.include? g }.each do |g|
46
+ if g.include? "/"
47
+ # Duplicate all WIP genres
48
+ temp2 = temp.dup
49
+
50
+ # Assign first and last components to temp and temp2 respectively
51
+ split = g.split "/"
52
+ temp.map! { |t| t.empty? ? split.first.capitalize : "#{t.capitalize} #{split.first.capitalize}" }
53
+ temp2.map! { |t| t.empty? ? split.last.capitalize : "#{t.capitalize} #{split.last.capitalize}" }
54
+
55
+ # Add both genre trees
56
+ temp += temp2
57
+ else
58
+ temp.map! { |t| t.empty? ? g.capitalize : "#{t.capitalize} #{g.capitalize}" }
59
+ end
60
+ end
61
+ genres += temp
62
+ end
63
+ genres.uniq
64
+ end
65
+ end
66
+ end
67
+ end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'date'
4
- require 'nokogiri'
3
+ require "date"
4
+ require "nokogiri"
5
5
 
6
6
  module MetalArchives
7
7
  module Parsers
@@ -11,53 +11,61 @@ module MetalArchives
11
11
  class Label # :nodoc:
12
12
  class << self
13
13
  def find_endpoint(params)
14
- "#{MetalArchives.config.default_endpoint}labels/#{params[:name]}/#{params[:id]}"
14
+ "#{MetalArchives.config.endpoint}labels/#{params[:name]}/#{params[:id]}"
15
15
  end
16
16
 
17
17
  def parse(response)
18
- props = {}
18
+ # Set default props
19
+ props = {
20
+ name: nil,
21
+ contact: [],
22
+ address: nil,
23
+ country: nil,
24
+ phone: nil,
25
+ status: nil,
26
+ specialization: [],
27
+ date_founded: nil,
28
+
29
+ online_shopping: nil,
30
+ }
31
+
19
32
  doc = Nokogiri::HTML(response)
20
33
 
21
- props[:name] = doc.css('#label_info .label_name').first.content
34
+ props[:name] = doc.css("#label_info .label_name").first.content
22
35
 
23
- props[:contact] = []
24
- doc.css('#label_contact a').each do |contact|
36
+ doc.css("#label_contact a").each do |contact|
25
37
  props[:contact] << {
26
- :title => contact.content,
27
- :content => contact.attr(:href)
38
+ title: contact.content,
39
+ content: contact.attr(:href),
28
40
  }
29
41
  end
30
42
 
31
- doc.css('#label_info dl').each do |dl|
32
- dl.search('dt').each do |dt|
43
+ doc.css("#label_info dl").each do |dl|
44
+ dl.search("dt").each do |dt|
33
45
  content = sanitize(dt.next_element.content)
34
46
 
35
- next if content == 'N/A'
47
+ next if content == "N/A"
36
48
 
37
49
  case sanitize(dt.content)
38
- when 'Address:'
50
+ when "Address:"
39
51
  props[:address] = content
40
- when 'Country:'
41
- props[:country] = ParserHelper.parse_country css('a').first.content
42
- when 'Phone number:'
52
+ when "Country:"
53
+ props[:country] = Country.parse(css("a").first.content)
54
+ when "Phone number:"
43
55
  props[:phone] = content
44
- when 'Status:'
45
- props[:status] = content.downcase.tr(' ', '_').to_sym
46
- when 'Specialised in:'
47
- props[:specializations] = ParserHelper.parse_genre content
48
- when 'Founding date :'
49
- begin
50
- dof = Date.parse content
51
- props[:date_founded] = NilDate.new dof.year, dof.month, dof.day
52
- rescue ArgumentError => e
53
- props[:date_founded] = NilDate.parse content
54
- end
55
- when 'Sub-labels:'
56
+ when "Status:"
57
+ props[:status] = content.downcase.tr(" ", "_").to_sym
58
+ when "Specialised in:"
59
+ props[:specializations] = Parsers::Genre.parse(content)
60
+ when "Founding date :"
61
+ props[:date_founded] = Parsers::Date.parse(content)
62
+ when "Sub-labels:"
56
63
  # TODO
57
- when 'Online shopping:'
58
- if content == 'Yes'
64
+ when "Online shopping:"
65
+ case content
66
+ when "Yes"
59
67
  props[:online_shopping] = true
60
- elsif content == 'No'
68
+ when "No"
61
69
  props[:online_shopping] = false
62
70
  end
63
71
  else
@@ -1,88 +1,41 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'date'
4
- require 'countries'
3
+ require "date"
5
4
 
6
5
  module MetalArchives
7
- ##
8
- # Mapping layer from and to MA Web Service
9
- #
10
- module Parsers # :nodoc:
6
+ module Parsers
11
7
  ##
12
8
  # Parser base class
13
9
  #
14
10
  class Parser
15
11
  class << self
16
- ##
17
- # Parse a country
18
- #
19
- # Returns +ISO3166::Country+
20
- #
21
- def parse_country(input)
22
- ISO3166::Country.find_country_by_name input
23
- end
24
-
25
12
  ##
26
13
  # Sanitize a string
27
14
  #
28
15
  # Return +String+
29
16
  #
30
17
  def sanitize(input)
31
- input.gsub(/^"/, '').gsub(/"$/, '').strip
18
+ input
19
+ .gsub(/^"/, "")
20
+ .gsub(/"$/, "")
21
+ .gsub(/[[:space:]]/, " ")
22
+ .strip
32
23
  end
33
24
 
34
25
  ##
35
- # Opinionated parsing of genres
36
- #
37
- # Returns an +Array+ of +String+
38
- #
39
- # The following components are omitted:
40
- # - Metal
41
- # - (early)
42
- # - (later)
43
- #
44
- # All genres are capitalized.
26
+ # Rewrite a URL
45
27
  #
46
- # For examples on how genres are parsed, refer to +ParserTest#test_parse_genre+
28
+ # Return +URI+
47
29
  #
48
- def parse_genre(input)
49
- genres = []
50
- # Split fields
51
- input.split(',').each do |genre|
52
- ##
53
- # Start with a single empty genre string. Split the genre by spaces
54
- # and process each component. If a component does not have a slash,
55
- # concatenate it to all genre strings present in +temp+. If it does
56
- # have a slash present, duplicate all genre strings, and concatenate
57
- # the first component (before the slash) to the first half, and the
58
- # last component to the last half. +temp+ now has an array of genre
59
- # combinations.
60
- #
61
- # 'Traditional Heavy/Power Metal' => ['Traditional Heavy', 'Traditional Power']
62
- # 'Traditional/Classical Heavy/Power Metal' => [
63
- # 'Traditional Heavy', 'Traditional Power',
64
- # 'Classical Heavy', 'Classical Power']
65
- #
66
- temp = ['']
67
- genre.downcase.split.reject { |g| ['(early)', '(later)', 'metal'].include? g }.each do |g|
68
- if g.include? '/'
69
- # Duplicate all WIP genres
70
- temp2 = temp.dup
30
+ def rewrite(input)
31
+ return input unless MetalArchives.config.endpoint
71
32
 
72
- # Assign first and last components to temp and temp2 respectively
73
- split = g.split '/'
74
- temp.map! { |t| t.empty? ? split.first.capitalize : "#{t.capitalize} #{split.first.capitalize}" }
75
- temp2.map! { |t| t.empty? ? split.last.capitalize : "#{t.capitalize} #{split.last.capitalize}" }
33
+ endpoint = URI(MetalArchives.config.endpoint)
76
34
 
77
- # Add both genre trees
78
- temp += temp2
79
- else
80
- temp.map! { |t| t.empty? ? g.capitalize : "#{t.capitalize} #{g.capitalize}" }
81
- end
82
- end
83
- genres += temp
84
- end
85
- genres.uniq
35
+ URI(input)
36
+ .tap { |u| u.host = endpoint.host }
37
+ .tap { |u| u.scheme = endpoint.scheme }
38
+ .to_s
86
39
  end
87
40
  end
88
41
  end
@@ -0,0 +1,242 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "date"
5
+
6
+ module MetalArchives
7
+ module Parsers
8
+ ##
9
+ # Release parser
10
+ #
11
+ class Release < Parser # :nodoc:
12
+ class << self
13
+ TYPE_TO_QUERY = {
14
+ full_length: 1,
15
+ live: 2,
16
+ demo: 3,
17
+ single: 4,
18
+ ep: 5,
19
+ video: 6,
20
+ boxed_set: 7,
21
+ split: 8,
22
+ compilation: 10,
23
+ split_video: 12,
24
+ collaboration: 13,
25
+ }.freeze
26
+
27
+ TYPE_TO_SYM = {
28
+ "Full-length" => :full_length,
29
+ "Live album" => :live,
30
+ "Demo" => :demo,
31
+ "Single" => :single,
32
+ "EP" => :ep,
33
+ "Video" => :video,
34
+ "Boxed set" => :boxed_set,
35
+ "Split" => :split,
36
+ "Compilation" => :compilation,
37
+ "Split video" => :split_video,
38
+ "Collaboration" => :collaboration,
39
+ }.freeze
40
+
41
+ FORMAT_TO_QUERY = {
42
+ cd: "CD",
43
+ cassette: "Cassette",
44
+ vinyl: "Vinyl*",
45
+ vhs: "VHS",
46
+ dvd: "DVD",
47
+ "2dvd": "2DVD",
48
+ digital: "Digital",
49
+ blu_ray: "Blu-ray*",
50
+ other: "Other",
51
+ unknown: "Unknown",
52
+ }.freeze
53
+
54
+ FORMAT_TO_SYM = {
55
+ "CD" => :cd,
56
+ "Cassette" => :cassette,
57
+ "VHS" => :vhs,
58
+ "DVD" => :dvd,
59
+ "2DVD" => :"2dvd",
60
+ "Digital" => :digital,
61
+ "Other" => :other,
62
+ "Unknown" => :unknown,
63
+ }.freeze
64
+
65
+ ##
66
+ # Map attributes to MA attributes
67
+ #
68
+ # Returns +Hash+
69
+ #
70
+ # [+params+]
71
+ # +Hash+
72
+ #
73
+ def map_params(query)
74
+ {
75
+ bandName: query[:band_name] || "",
76
+ releaseTitle: query[:title] || "",
77
+ releaseYearFrom: query[:from_year] || "",
78
+ releaseMonthFrom: query[:from_month] || "",
79
+ releaseYearTo: query[:to_year] || "",
80
+ releaseMonthTo: query[:to_month] || "",
81
+ country: map_countries(query[:country]) || "",
82
+ location: query[:location] || "",
83
+ releaseLabelName: query[:label_name] || "",
84
+ releaseCatalogNumber: query[:catalog_id] || "",
85
+ releaseIdentifiers: query[:identifier] || "",
86
+ releaseRecordingInfo: query[:recording_info] || "",
87
+ releaseDescription: query[:version_description] || "",
88
+ releaseNotes: query[:notes] || "",
89
+ genre: query[:genre] || "",
90
+ releaseType: map_types(query[:types]),
91
+ releaseFormat: map_formats(query[:formats]),
92
+ }
93
+ end
94
+
95
+ ##
96
+ # Parse main HTML page
97
+ #
98
+ # Returns +Hash+
99
+ #
100
+ # [Raises]
101
+ # - rdoc-ref:MetalArchives::Errors::ParserError when parsing failed. Please report this error.
102
+ #
103
+ def parse_html(response)
104
+ # Set default props
105
+ props = {
106
+ title: nil,
107
+ type: nil,
108
+ date_released: nil,
109
+ catalog_id: nil,
110
+ identifier: nil,
111
+ version_description: nil,
112
+ format: nil,
113
+ limitation: nil,
114
+ }
115
+
116
+ doc = Nokogiri::HTML response
117
+
118
+ props[:title] = sanitize doc.css("#album_info .album_name a").first.content
119
+
120
+ doc.css("#album_info dl").each do |dl|
121
+ dl.search("dt").each do |dt|
122
+ content = sanitize dt.next_element.content
123
+
124
+ next if content == "N/A"
125
+
126
+ case sanitize(dt.content)
127
+ when "Type:"
128
+ props[:type] = map_type content
129
+ when "Release date:"
130
+ props[:date_released] = Parsers::Date.parse(content)
131
+ when "Catalog ID:"
132
+ props[:catalog_id] = content
133
+ when "Identifier:"
134
+ props[:identifier] = content
135
+ when "Version desc.:"
136
+ props[:version_description] = content
137
+ when "Label:"
138
+ # TODO: label
139
+ when "Format:"
140
+ props[:format] = map_format content
141
+ when "Limitation:"
142
+ props[:limitation] = content.to_i
143
+ when "Reviews:"
144
+ next if content == "None yet"
145
+ # TODO: reviews
146
+ else
147
+ raise Errors::ParserError, "Unknown token: #{dt.content}"
148
+ end
149
+ end
150
+ end
151
+
152
+ props
153
+ rescue StandardError => e
154
+ e.backtrace.each { |b| MetalArchives.config.logger.error b }
155
+ raise Errors::ParserError, e
156
+ end
157
+
158
+ private
159
+
160
+ ##
161
+ # Map MA countries to query parameters
162
+ #
163
+ # Returns +Array+ of +ISO3166::Country+
164
+ #
165
+ # [+types+]
166
+ # +Array+ containing one or more +String+s
167
+ #
168
+ def map_countries(countries)
169
+ countries&.map(&:alpha2)
170
+ end
171
+
172
+ ##
173
+ # Map MA release type to query parameters
174
+ #
175
+ # Returns +Array+ of +Integer+
176
+ #
177
+ # [+types+]
178
+ # +Array+ containing one or more +Symbol+, see rdoc-ref:Release.type
179
+ #
180
+ def map_types(type_syms)
181
+ return unless type_syms
182
+
183
+ types = []
184
+ type_syms.each do |type|
185
+ raise Errors::ParserError, "Unknown type: #{type}" unless TYPE_TO_QUERY[type]
186
+
187
+ types << TYPE_TO_QUERY[type]
188
+ end
189
+
190
+ types
191
+ end
192
+
193
+ ##
194
+ # Map MA release type to +Symbol+
195
+ #
196
+ # Returns +Symbol+, see rdoc-ref:Release.type
197
+ #
198
+ def map_type(type)
199
+ raise Errors::ParserError, "Unknown type: #{type}" unless TYPE_TO_SYM[type]
200
+
201
+ TYPE_TO_SYM[type]
202
+ end
203
+
204
+ ##
205
+ # Map MA release format to query parameters
206
+ #
207
+ # Returns +Array+ of +Integer+
208
+ #
209
+ # [+types+]
210
+ # +Array+ containing one or more +Symbol+, see rdoc-ref:Release.type
211
+ #
212
+ def map_formats(format_syms)
213
+ return unless format_syms
214
+
215
+ formats = []
216
+ format_syms.each do |format|
217
+ raise Errors::ParserError, "Unknown format: #{format}" unless FORMAT_TO_QUERY[format]
218
+
219
+ formats << FORMAT_TO_QUERY[format]
220
+ end
221
+
222
+ formats
223
+ end
224
+
225
+ ##
226
+ # Map MA release format to +Symbol+
227
+ #
228
+ # Returns +Symbol+, see rdoc-ref:Release.format
229
+ #
230
+ def map_format(format)
231
+ return :cd if /CD/.match?(format)
232
+ return :vinyl if /[Vv]inyl/.match?(format)
233
+ return :blu_ray if /[Bb]lu.?[Rr]ay/.match?(format)
234
+
235
+ raise Errors::ParserError, "Unknown format: #{format}" unless FORMAT_TO_SYM[format]
236
+
237
+ FORMAT_TO_SYM[format]
238
+ end
239
+ end
240
+ end
241
+ end
242
+ end