metal_archives 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,58 @@
1
+ module MetalArchives
2
+ ##
3
+ # Range which can start and/or end with +nil+
4
+ #
5
+ class Range
6
+ include Comparable
7
+
8
+ ##
9
+ # Begin- and endpoint of range
10
+ #
11
+ attr_accessor :begin, :end
12
+
13
+ ##
14
+ # Create a new range
15
+ #
16
+ # [+_begin+]
17
+ # Start of range
18
+ #
19
+ # Default: +nil+
20
+ #
21
+ # [+_end+]
22
+ # End of range
23
+ #
24
+ # Default: +nil+
25
+ #
26
+ def initialize(_begin = nil, _end = nil)
27
+ @begin = _begin
28
+ @end = _end
29
+ end
30
+
31
+ ##
32
+ # Whether start of range is present
33
+ #
34
+ def begin?
35
+ !!@begin
36
+ end
37
+
38
+ ##
39
+ # Whether end of range is present
40
+ #
41
+ def end?
42
+ !!@end
43
+ end
44
+
45
+ ##
46
+ # Comparison operator
47
+ #
48
+ def <=>(other)
49
+ comparison = self.begin <=> other.begin
50
+
51
+ if comparison == 0
52
+ return self.end <=> other.end
53
+ else
54
+ return comparison
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,103 @@
1
+ require 'json'
2
+ require 'date'
3
+ require 'countries'
4
+
5
+ module MetalArchives
6
+ module Parsers
7
+ ##
8
+ # Artist parser
9
+ #
10
+ class Artist # :nodoc:
11
+ class << self
12
+ ##
13
+ # Map attributes to MA attributes
14
+ #
15
+ # Returns +Hash+
16
+ #
17
+ # [+params+]
18
+ # +Hash+
19
+ #
20
+ def map_params(query)
21
+ params = {
22
+ :query => query[:name] || '',
23
+
24
+ :iDisplayStart => query[:iDisplayStart] || 0
25
+ }
26
+
27
+ params
28
+ end
29
+
30
+ def parse_html(response)
31
+ props = {}
32
+ doc = Nokogiri::HTML response
33
+
34
+ doc.css('#member_info dl').each do |dl|
35
+ dl.css('dt').each do |dt|
36
+ case dt.content.strip
37
+ when 'Real/full name:'
38
+ props[:name] = dt.next_element.content.strip
39
+ when 'Age:'
40
+ break if dt.next_element.content == 'N/A'
41
+ date = dt.next_element.content.gsub(/ [0-9]* \(born ([^\)]*)\)/, '\1')
42
+ props[:date_of_birth] = Date.parse date
43
+ when 'R.I.P.:'
44
+ break if dt.next_element.content == 'N/A'
45
+ props[:date_of_death] = Date.parse dt.next_element.content
46
+ when 'Died of:'
47
+ break if dt.next_element.content = 'N/A'
48
+ props[:cause_of_death] = dt.next_element.content
49
+ when 'Place of origin:'
50
+ break if dt.next_element.content == 'N/A'
51
+ props[:country] = ISO3166::Country.find_country_by_name(dt.next_element.css('a').first.content)
52
+ location = dt.next_element.xpath('text()').map { |x| x.content }.join('').strip.gsub(/[()]/, '')
53
+ props[:location] = location unless location.empty?
54
+ when 'Gender:'
55
+ break if dt.next_element.content == 'N/A'
56
+ case dt.next_element.content
57
+ when 'Male'
58
+ props[:gender] = :male
59
+ when 'Female'
60
+ props[:gender] = :female
61
+ else
62
+ raise Errors::ParserError, "Unknown gender: #{dt.next_element.content}"
63
+ end
64
+ else
65
+ raise Errors::ParserError, "Unknown token: #{dt.content}"
66
+ end
67
+ end
68
+ end
69
+
70
+ props[:aliases] = []
71
+ alt = doc.css('.band_member_name').first.content
72
+ props[:aliases] << alt unless props[:name] == alt
73
+
74
+ props
75
+ end
76
+
77
+ def parse_links_html(response)
78
+ links = []
79
+
80
+ doc = Nokogiri::HTML response
81
+
82
+ # Default to official links
83
+ type = :official
84
+
85
+ doc.css('#linksTablemain tr').each do |row|
86
+ if row['id'] =~ /^header_/
87
+ type = row['id'].gsub(/^header_/, '').downcase.to_sym
88
+ else
89
+ a = row.css('td a').first
90
+ links << {
91
+ :url => a['href'],
92
+ :type => type,
93
+ :title => a.content
94
+ }
95
+ end
96
+ end
97
+
98
+ links
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,160 @@
1
+ require 'json'
2
+ require 'date'
3
+ require 'countries'
4
+
5
+ module MetalArchives
6
+ module Parsers
7
+ ##
8
+ # Band parser
9
+ #
10
+ class Band # :nodoc:
11
+ class << self
12
+ ##
13
+ # Map attributes to MA attributes
14
+ #
15
+ # Returns +Hash+
16
+ #
17
+ # [+params+]
18
+ # +Hash+
19
+ #
20
+ def map_params(query)
21
+ params = {
22
+ :bandName => query[:name] || '',
23
+ :exactBandMatch => (!!query[:exact] ? 1 : 0),
24
+ :genre => query[:genre] || '',
25
+ :yearCreationFrom => (query[:year] and query[:year].begin ? query[:year].begin.year : '') || '',
26
+ :yearCreationTo => (query[:year] and query[:year].end ? query[:year].end.year : '') || '',
27
+ :bandNotes => query[:comment] || '',
28
+ :status => map_status(query[:status]),
29
+ :themes => query[:lyrical_themes] || '',
30
+ :location => query[:location] || '',
31
+ :bandLabelName => query[:label] || '',
32
+ :indieLabelBand => (!!query[:independent] ? 1 : 0),
33
+
34
+ :iDisplayStart => query[:iDisplayStart] || 0
35
+ }
36
+
37
+ params[:country] = []
38
+ Array(query[:country]).each do |country|
39
+ params[:country] << (country.is_a?(ISO3166::Country) ? country.alpha2 : (country || ''))
40
+ end
41
+ params[:country] = params[:country].first if (params[:country].size == 1)
42
+
43
+ params
44
+ end
45
+
46
+ def parse_html(response)
47
+ props = {}
48
+ doc = Nokogiri::HTML response
49
+
50
+ props[:name] = doc.css('#band_info .band_name a').first.content
51
+
52
+ props[:aliases] = []
53
+ props[:logo] = doc.css('.band_name_img img').first.attr('src') unless doc.css('.band_name_img').empty?
54
+ props[:photo] = doc.css('.band_img img').first.attr('src') unless doc.css('.band_img').empty?
55
+
56
+ doc.css('#band_stats dl').each do |dl|
57
+ dl.search('dt').each do |dt|
58
+ case dt.content
59
+ when 'Country of origin:'
60
+ props[:country] = ISO3166::Country.find_country_by_name dt.next_element.css('a').first.content
61
+ when 'Location:'
62
+ break if dt.next_element.content == 'N/A'
63
+ props[:location] = dt.next_element.content
64
+ when 'Status:'
65
+ props[:status] = dt.next_element.content.downcase.gsub(/ /, '_').to_sym
66
+ when 'Formed in:'
67
+ break if dt.next_element.content == 'N/A'
68
+ props[:date_formed] = Date.new dt.next_element.content.to_i
69
+ when 'Genre:'
70
+ break if dt.next_element.content == 'N/A'
71
+ props[:genres] = ParserHelper.parse_genre dt.next_element.content
72
+ when 'Lyrical themes:'
73
+ props[:lyrical_themes] = []
74
+ break if dt.next_element.content == 'N/A'
75
+ dt.next_element.content.split(',').each do |theme|
76
+ t = theme.split.map(&:capitalize)
77
+ t.delete '(early)'
78
+ t.delete '(later)'
79
+ props[:lyrical_themes] << t.join(' ')
80
+ end
81
+ when /(Current|Last) label:/
82
+ props[:independent] = (dt.next_element.content == 'Unsigned/independent')
83
+ # TODO
84
+ when 'Years active:'
85
+ break if dt.next_element.content == 'N/A'
86
+ props[:date_active] = []
87
+ dt.next_element.content.split(',').each do |range|
88
+ # Aliases
89
+ range.scan(/\(as ([^)]*)\)/).each { |name| props[:aliases] << name.first }
90
+ # Ranges
91
+ r = range.gsub(/ *\(as ([^)]*)\) */, '').strip.split('-')
92
+ date_start = (r.first == '?' ? nil : Date.new(r.first.to_i))
93
+ date_end = (r.last == '?' or r.last == 'present' ? nil : Date.new(r.first.to_i))
94
+ props[:date_active] << Range.new(date_start, date_end)
95
+ end
96
+ else
97
+ raise MetalArchives::Errors::ParserError, "Unknown token: #{dt.content}"
98
+ end
99
+ end
100
+ end
101
+
102
+ props
103
+ end
104
+
105
+ def parse_similar_bands_html(response)
106
+ similar = []
107
+
108
+ doc = Nokogiri::HTML response
109
+ doc.css('#artist_list tbody tr').each do |row|
110
+ similar << {
111
+ :band => MetalArchives::Band.new(:id => row.css('td a').first['href'].split('/').last.to_i),
112
+ :score => row.css('td').last.content.strip
113
+ }
114
+ end
115
+
116
+ similar
117
+ end
118
+
119
+ def parse_related_links_html(response)
120
+ links = []
121
+
122
+ doc = Nokogiri::HTML response
123
+ doc.css('#linksTableOfficial td a').each do |a|
124
+ links << {
125
+ :url => a['href'],
126
+ :type => :official,
127
+ :title => a.content
128
+ }
129
+ end
130
+ doc.css('#linksTableOfficial_merchandise td a').each do |a|
131
+ links << {
132
+ :url => a['href'],
133
+ :type => :merchandise,
134
+ :title => a.content
135
+ }
136
+ end
137
+
138
+ links
139
+ end
140
+
141
+ private
142
+ def map_status(status)
143
+ s = {
144
+ nil => '',
145
+ :active => 'Active',
146
+ :split_up => 'Split-up',
147
+ :on_hold => 'On hold',
148
+ :unknown => 'Unknown',
149
+ :changed_name => 'Changed name',
150
+ :disputed => 'Disputed'
151
+ }
152
+
153
+ raise MetalArchives::Errors::ParserError, "Unknown status: #{status}" unless s[status]
154
+
155
+ s[status]
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,68 @@
1
+ require 'date'
2
+ require 'nokogiri'
3
+
4
+ module MetalArchives
5
+ module Parsers
6
+ ##
7
+ # Label parser
8
+ #
9
+ class Label # :nodoc:
10
+ class << self
11
+ def find_endpoint(params)
12
+ "http://www.metal-archives.com/labels/#{params[:name]}/#{params[:id]}"
13
+ end
14
+
15
+ def parse(response)
16
+ props = {}
17
+ doc = Nokogiri::HTML(response)
18
+
19
+ props[:name] = doc.css('#label_info .label_name').first.content
20
+
21
+ props[:contact] = []
22
+ doc.css('#label_contact a').each do |contact|
23
+ props[:contact] << {
24
+ :title => contact.content,
25
+ :content => contact.attr(:href)
26
+ }
27
+ end
28
+
29
+ doc.css('#label_info dl').each do |dl|
30
+ dl.search('dt').each do |dt|
31
+ case dt.content
32
+ when 'Address:'
33
+ break if dt.next_element.content == 'N/A'
34
+ props[:address] = dt.next_element.content
35
+ when 'Country:'
36
+ break if dt.next_element.content == 'N/A'
37
+ props[:country] = ParserHelper.parse_country dt.next_element.css('a').first.content
38
+ when 'Phone number:'
39
+ break if dt.next_element.content == 'N/A'
40
+ props[:phone] = dt.next_element.content
41
+ when 'Status:'
42
+ props[:status] = dt.next_element.content.downcase.gsub(/ /, '_').to_sym
43
+ when 'Specialised in:'
44
+ break if dt.next_element.content == 'N/A'
45
+ props[:specializations] = ParserHelper.parse_genre dt.next_element.content
46
+ when 'Founding date :'
47
+ break if dt.next_element.content == 'N/A'
48
+ props[:date_founded] = Date.new dt.next_element.content.to_i
49
+ when 'Sub-labels:'
50
+ # TODO
51
+ when 'Online shopping:'
52
+ if dt.next_element.content == 'Yes'
53
+ props[:online_shopping] = true
54
+ elsif dt.next_element.content == 'No'
55
+ props[:online_shopping] = false
56
+ end
57
+ else
58
+ raise "Unknown token: #{dt.content}"
59
+ end
60
+ end
61
+ end
62
+
63
+ props
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,79 @@
1
+ require 'date'
2
+ require 'countries'
3
+
4
+ module MetalArchives
5
+ ##
6
+ # Mapping layer from and to MA Web Service
7
+ #
8
+ module Parsers # :nodoc:
9
+ ##
10
+ # Parsing utilities
11
+ #
12
+ class ParserHelper # :nodoc:
13
+ class << self
14
+ ##
15
+ # Parse a country
16
+ #
17
+ # Returns +ISO3166::Country+
18
+ #
19
+ def parse_country(input)
20
+ ISO3166::Country.find_country_by_name (input)
21
+ end
22
+
23
+ ##
24
+ # Opinionated parsing of genres
25
+ #
26
+ # Returns an +Array+ of +String+
27
+ #
28
+ # The following components are omitted:
29
+ # - Metal
30
+ # - (early)
31
+ # - (later)
32
+ #
33
+ # All genres are capitalized.
34
+ #
35
+ # For examples on how genres are parsed, refer to +ParserHelperTest::test_parse_genre+
36
+ #
37
+ def parse_genre(input)
38
+ genres = []
39
+ # Split fields
40
+ input.split(',').each do |genre|
41
+ ##
42
+ # Start with a single empty genre string. Split the genre by spaces
43
+ # and process each component. If a component does not have a slash,
44
+ # concatenate it to all genre strings present in +temp+. If it does
45
+ # have a slash present, duplicate all genre strings, and concatenate
46
+ # the first component (before the slash) to the first half, and the
47
+ # last component to the last half. +temp+ now has an array of genre
48
+ # combinations.
49
+ #
50
+ # 'Traditional Heavy/Power Metal' => ['Traditional Heavy', 'Traditional Power']
51
+ # 'Traditional/Classical Heavy/Power Metal' => [
52
+ # 'Traditional Heavy', 'Traditional Power',
53
+ # 'Classical Heavy', 'Classical Power']
54
+ #
55
+ temp = ['']
56
+ genre.downcase.split.reject { |g| ['(early)', '(later)', 'metal'].include? g }.each do |g|
57
+ unless g.include? '/'
58
+ temp.map! { |t| t.empty? ? g.capitalize : "#{t.capitalize} #{g.capitalize}" }
59
+ else
60
+ # Duplicate all WIP genres
61
+ temp2 = temp.dup
62
+
63
+ # Assign first and last components to temp and temp2 respectively
64
+ split = g.split '/'
65
+ temp.map! { |t| t.empty? ? split.first.capitalize : "#{t.capitalize} #{split.first.capitalize}" }
66
+ temp2.map! { |t| t.empty? ? split.last.capitalize : "#{t.capitalize} #{split.last.capitalize}" }
67
+
68
+ # Add both genre trees
69
+ temp += temp2
70
+ end
71
+ end
72
+ genres += temp
73
+ end
74
+ genres.uniq
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end