metal_archives 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ module MetalArchives
2
+ ##
3
+ # Range which can start and/or end with +nil+
4
+ #
5
+ class Range
6
+ include Comparable
7
+
8
+ ##
9
+ # Begin- and endpoint of range
10
+ #
11
+ attr_accessor :begin, :end
12
+
13
+ ##
14
+ # Create a new range
15
+ #
16
+ # [+_begin+]
17
+ # Start of range
18
+ #
19
+ # Default: +nil+
20
+ #
21
+ # [+_end+]
22
+ # End of range
23
+ #
24
+ # Default: +nil+
25
+ #
26
+ def initialize(_begin = nil, _end = nil)
27
+ @begin = _begin
28
+ @end = _end
29
+ end
30
+
31
+ ##
32
+ # Whether start of range is present
33
+ #
34
+ def begin?
35
+ !!@begin
36
+ end
37
+
38
+ ##
39
+ # Whether end of range is present
40
+ #
41
+ def end?
42
+ !!@end
43
+ end
44
+
45
+ ##
46
+ # Comparison operator
47
+ #
48
+ def <=>(other)
49
+ comparison = self.begin <=> other.begin
50
+
51
+ if comparison == 0
52
+ return self.end <=> other.end
53
+ else
54
+ return comparison
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,103 @@
1
+ require 'json'
2
+ require 'date'
3
+ require 'countries'
4
+
5
+ module MetalArchives
6
+ module Parsers
7
+ ##
8
+ # Artist parser
9
+ #
10
+ class Artist # :nodoc:
11
+ class << self
12
+ ##
13
+ # Map attributes to MA attributes
14
+ #
15
+ # Returns +Hash+
16
+ #
17
+ # [+params+]
18
+ # +Hash+
19
+ #
20
+ def map_params(query)
21
+ params = {
22
+ :query => query[:name] || '',
23
+
24
+ :iDisplayStart => query[:iDisplayStart] || 0
25
+ }
26
+
27
+ params
28
+ end
29
+
30
+ def parse_html(response)
31
+ props = {}
32
+ doc = Nokogiri::HTML response
33
+
34
+ doc.css('#member_info dl').each do |dl|
35
+ dl.css('dt').each do |dt|
36
+ case dt.content.strip
37
+ when 'Real/full name:'
38
+ props[:name] = dt.next_element.content.strip
39
+ when 'Age:'
40
+ break if dt.next_element.content == 'N/A'
41
+ date = dt.next_element.content.gsub(/ [0-9]* \(born ([^\)]*)\)/, '\1')
42
+ props[:date_of_birth] = Date.parse date
43
+ when 'R.I.P.:'
44
+ break if dt.next_element.content == 'N/A'
45
+ props[:date_of_death] = Date.parse dt.next_element.content
46
+ when 'Died of:'
47
+ break if dt.next_element.content = 'N/A'
48
+ props[:cause_of_death] = dt.next_element.content
49
+ when 'Place of origin:'
50
+ break if dt.next_element.content == 'N/A'
51
+ props[:country] = ISO3166::Country.find_country_by_name(dt.next_element.css('a').first.content)
52
+ location = dt.next_element.xpath('text()').map { |x| x.content }.join('').strip.gsub(/[()]/, '')
53
+ props[:location] = location unless location.empty?
54
+ when 'Gender:'
55
+ break if dt.next_element.content == 'N/A'
56
+ case dt.next_element.content
57
+ when 'Male'
58
+ props[:gender] = :male
59
+ when 'Female'
60
+ props[:gender] = :female
61
+ else
62
+ raise Errors::ParserError, "Unknown gender: #{dt.next_element.content}"
63
+ end
64
+ else
65
+ raise Errors::ParserError, "Unknown token: #{dt.content}"
66
+ end
67
+ end
68
+ end
69
+
70
+ props[:aliases] = []
71
+ alt = doc.css('.band_member_name').first.content
72
+ props[:aliases] << alt unless props[:name] == alt
73
+
74
+ props
75
+ end
76
+
77
+ def parse_links_html(response)
78
+ links = []
79
+
80
+ doc = Nokogiri::HTML response
81
+
82
+ # Default to official links
83
+ type = :official
84
+
85
+ doc.css('#linksTablemain tr').each do |row|
86
+ if row['id'] =~ /^header_/
87
+ type = row['id'].gsub(/^header_/, '').downcase.to_sym
88
+ else
89
+ a = row.css('td a').first
90
+ links << {
91
+ :url => a['href'],
92
+ :type => type,
93
+ :title => a.content
94
+ }
95
+ end
96
+ end
97
+
98
+ links
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,160 @@
1
+ require 'json'
2
+ require 'date'
3
+ require 'countries'
4
+
5
+ module MetalArchives
6
+ module Parsers
7
+ ##
8
+ # Band parser
9
+ #
10
+ class Band # :nodoc:
11
+ class << self
12
+ ##
13
+ # Map attributes to MA attributes
14
+ #
15
+ # Returns +Hash+
16
+ #
17
+ # [+params+]
18
+ # +Hash+
19
+ #
20
+ def map_params(query)
21
+ params = {
22
+ :bandName => query[:name] || '',
23
+ :exactBandMatch => (!!query[:exact] ? 1 : 0),
24
+ :genre => query[:genre] || '',
25
+ :yearCreationFrom => (query[:year] and query[:year].begin ? query[:year].begin.year : '') || '',
26
+ :yearCreationTo => (query[:year] and query[:year].end ? query[:year].end.year : '') || '',
27
+ :bandNotes => query[:comment] || '',
28
+ :status => map_status(query[:status]),
29
+ :themes => query[:lyrical_themes] || '',
30
+ :location => query[:location] || '',
31
+ :bandLabelName => query[:label] || '',
32
+ :indieLabelBand => (!!query[:independent] ? 1 : 0),
33
+
34
+ :iDisplayStart => query[:iDisplayStart] || 0
35
+ }
36
+
37
+ params[:country] = []
38
+ Array(query[:country]).each do |country|
39
+ params[:country] << (country.is_a?(ISO3166::Country) ? country.alpha2 : (country || ''))
40
+ end
41
+ params[:country] = params[:country].first if (params[:country].size == 1)
42
+
43
+ params
44
+ end
45
+
46
+ def parse_html(response)
47
+ props = {}
48
+ doc = Nokogiri::HTML response
49
+
50
+ props[:name] = doc.css('#band_info .band_name a').first.content
51
+
52
+ props[:aliases] = []
53
+ props[:logo] = doc.css('.band_name_img img').first.attr('src') unless doc.css('.band_name_img').empty?
54
+ props[:photo] = doc.css('.band_img img').first.attr('src') unless doc.css('.band_img').empty?
55
+
56
+ doc.css('#band_stats dl').each do |dl|
57
+ dl.search('dt').each do |dt|
58
+ case dt.content
59
+ when 'Country of origin:'
60
+ props[:country] = ISO3166::Country.find_country_by_name dt.next_element.css('a').first.content
61
+ when 'Location:'
62
+ break if dt.next_element.content == 'N/A'
63
+ props[:location] = dt.next_element.content
64
+ when 'Status:'
65
+ props[:status] = dt.next_element.content.downcase.gsub(/ /, '_').to_sym
66
+ when 'Formed in:'
67
+ break if dt.next_element.content == 'N/A'
68
+ props[:date_formed] = Date.new dt.next_element.content.to_i
69
+ when 'Genre:'
70
+ break if dt.next_element.content == 'N/A'
71
+ props[:genres] = ParserHelper.parse_genre dt.next_element.content
72
+ when 'Lyrical themes:'
73
+ props[:lyrical_themes] = []
74
+ break if dt.next_element.content == 'N/A'
75
+ dt.next_element.content.split(',').each do |theme|
76
+ t = theme.split.map(&:capitalize)
77
+ t.delete '(early)'
78
+ t.delete '(later)'
79
+ props[:lyrical_themes] << t.join(' ')
80
+ end
81
+ when /(Current|Last) label:/
82
+ props[:independent] = (dt.next_element.content == 'Unsigned/independent')
83
+ # TODO
84
+ when 'Years active:'
85
+ break if dt.next_element.content == 'N/A'
86
+ props[:date_active] = []
87
+ dt.next_element.content.split(',').each do |range|
88
+ # Aliases
89
+ range.scan(/\(as ([^)]*)\)/).each { |name| props[:aliases] << name.first }
90
+ # Ranges
91
+ r = range.gsub(/ *\(as ([^)]*)\) */, '').strip.split('-')
92
+ date_start = (r.first == '?' ? nil : Date.new(r.first.to_i))
93
+ date_end = (r.last == '?' or r.last == 'present' ? nil : Date.new(r.first.to_i))
94
+ props[:date_active] << Range.new(date_start, date_end)
95
+ end
96
+ else
97
+ raise MetalArchives::Errors::ParserError, "Unknown token: #{dt.content}"
98
+ end
99
+ end
100
+ end
101
+
102
+ props
103
+ end
104
+
105
+ def parse_similar_bands_html(response)
106
+ similar = []
107
+
108
+ doc = Nokogiri::HTML response
109
+ doc.css('#artist_list tbody tr').each do |row|
110
+ similar << {
111
+ :band => MetalArchives::Band.new(:id => row.css('td a').first['href'].split('/').last.to_i),
112
+ :score => row.css('td').last.content.strip
113
+ }
114
+ end
115
+
116
+ similar
117
+ end
118
+
119
+ def parse_related_links_html(response)
120
+ links = []
121
+
122
+ doc = Nokogiri::HTML response
123
+ doc.css('#linksTableOfficial td a').each do |a|
124
+ links << {
125
+ :url => a['href'],
126
+ :type => :official,
127
+ :title => a.content
128
+ }
129
+ end
130
+ doc.css('#linksTableOfficial_merchandise td a').each do |a|
131
+ links << {
132
+ :url => a['href'],
133
+ :type => :merchandise,
134
+ :title => a.content
135
+ }
136
+ end
137
+
138
+ links
139
+ end
140
+
141
+ private
142
+ def map_status(status)
143
+ s = {
144
+ nil => '',
145
+ :active => 'Active',
146
+ :split_up => 'Split-up',
147
+ :on_hold => 'On hold',
148
+ :unknown => 'Unknown',
149
+ :changed_name => 'Changed name',
150
+ :disputed => 'Disputed'
151
+ }
152
+
153
+ raise MetalArchives::Errors::ParserError, "Unknown status: #{status}" unless s[status]
154
+
155
+ s[status]
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,68 @@
1
+ require 'date'
2
+ require 'nokogiri'
3
+
4
+ module MetalArchives
5
+ module Parsers
6
+ ##
7
+ # Label parser
8
+ #
9
+ class Label # :nodoc:
10
+ class << self
11
+ def find_endpoint(params)
12
+ "http://www.metal-archives.com/labels/#{params[:name]}/#{params[:id]}"
13
+ end
14
+
15
+ def parse(response)
16
+ props = {}
17
+ doc = Nokogiri::HTML(response)
18
+
19
+ props[:name] = doc.css('#label_info .label_name').first.content
20
+
21
+ props[:contact] = []
22
+ doc.css('#label_contact a').each do |contact|
23
+ props[:contact] << {
24
+ :title => contact.content,
25
+ :content => contact.attr(:href)
26
+ }
27
+ end
28
+
29
+ doc.css('#label_info dl').each do |dl|
30
+ dl.search('dt').each do |dt|
31
+ case dt.content
32
+ when 'Address:'
33
+ break if dt.next_element.content == 'N/A'
34
+ props[:address] = dt.next_element.content
35
+ when 'Country:'
36
+ break if dt.next_element.content == 'N/A'
37
+ props[:country] = ParserHelper.parse_country dt.next_element.css('a').first.content
38
+ when 'Phone number:'
39
+ break if dt.next_element.content == 'N/A'
40
+ props[:phone] = dt.next_element.content
41
+ when 'Status:'
42
+ props[:status] = dt.next_element.content.downcase.gsub(/ /, '_').to_sym
43
+ when 'Specialised in:'
44
+ break if dt.next_element.content == 'N/A'
45
+ props[:specializations] = ParserHelper.parse_genre dt.next_element.content
46
+ when 'Founding date :'
47
+ break if dt.next_element.content == 'N/A'
48
+ props[:date_founded] = Date.new dt.next_element.content.to_i
49
+ when 'Sub-labels:'
50
+ # TODO
51
+ when 'Online shopping:'
52
+ if dt.next_element.content == 'Yes'
53
+ props[:online_shopping] = true
54
+ elsif dt.next_element.content == 'No'
55
+ props[:online_shopping] = false
56
+ end
57
+ else
58
+ raise "Unknown token: #{dt.content}"
59
+ end
60
+ end
61
+ end
62
+
63
+ props
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,79 @@
1
+ require 'date'
2
+ require 'countries'
3
+
4
+ module MetalArchives
5
+ ##
6
+ # Mapping layer from and to MA Web Service
7
+ #
8
+ module Parsers # :nodoc:
9
+ ##
10
+ # Parsing utilities
11
+ #
12
+ class ParserHelper # :nodoc:
13
+ class << self
14
+ ##
15
+ # Parse a country
16
+ #
17
+ # Returns +ISO3166::Country+
18
+ #
19
+ def parse_country(input)
20
+ ISO3166::Country.find_country_by_name (input)
21
+ end
22
+
23
+ ##
24
+ # Opinionated parsing of genres
25
+ #
26
+ # Returns an +Array+ of +String+
27
+ #
28
+ # The following components are omitted:
29
+ # - Metal
30
+ # - (early)
31
+ # - (later)
32
+ #
33
+ # All genres are capitalized.
34
+ #
35
+ # For examples on how genres are parsed, refer to +ParserHelperTest::test_parse_genre+
36
+ #
37
+ def parse_genre(input)
38
+ genres = []
39
+ # Split fields
40
+ input.split(',').each do |genre|
41
+ ##
42
+ # Start with a single empty genre string. Split the genre by spaces
43
+ # and process each component. If a component does not have a slash,
44
+ # concatenate it to all genre strings present in +temp+. If it does
45
+ # have a slash present, duplicate all genre strings, and concatenate
46
+ # the first component (before the slash) to the first half, and the
47
+ # last component to the last half. +temp+ now has an array of genre
48
+ # combinations.
49
+ #
50
+ # 'Traditional Heavy/Power Metal' => ['Traditional Heavy', 'Traditional Power']
51
+ # 'Traditional/Classical Heavy/Power Metal' => [
52
+ # 'Traditional Heavy', 'Traditional Power',
53
+ # 'Classical Heavy', 'Classical Power']
54
+ #
55
+ temp = ['']
56
+ genre.downcase.split.reject { |g| ['(early)', '(later)', 'metal'].include? g }.each do |g|
57
+ unless g.include? '/'
58
+ temp.map! { |t| t.empty? ? g.capitalize : "#{t.capitalize} #{g.capitalize}" }
59
+ else
60
+ # Duplicate all WIP genres
61
+ temp2 = temp.dup
62
+
63
+ # Assign first and last components to temp and temp2 respectively
64
+ split = g.split '/'
65
+ temp.map! { |t| t.empty? ? split.first.capitalize : "#{t.capitalize} #{split.first.capitalize}" }
66
+ temp2.map! { |t| t.empty? ? split.last.capitalize : "#{t.capitalize} #{split.last.capitalize}" }
67
+
68
+ # Add both genre trees
69
+ temp += temp2
70
+ end
71
+ end
72
+ genres += temp
73
+ end
74
+ genres.uniq
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end