metal-archives 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'mechanize'
4
+
5
+ group :development do
6
+ gem "rspec", "~> 2.3.0"
7
+ gem "bundler", "~> 1.0.0"
8
+ gem "jeweler", "~> 1.5.2"
9
+ gem "rcov", ">= 0"
10
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,32 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.2)
5
+ git (1.2.5)
6
+ jeweler (1.5.2)
7
+ bundler (~> 1.0.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ mechanize (1.0.0)
11
+ nokogiri (>= 1.2.1)
12
+ nokogiri (1.4.4)
13
+ rake (0.8.7)
14
+ rcov (0.9.9)
15
+ rspec (2.3.0)
16
+ rspec-core (~> 2.3.0)
17
+ rspec-expectations (~> 2.3.0)
18
+ rspec-mocks (~> 2.3.0)
19
+ rspec-core (2.3.1)
20
+ rspec-expectations (2.3.0)
21
+ diff-lcs (~> 1.1.2)
22
+ rspec-mocks (2.3.0)
23
+
24
+ PLATFORMS
25
+ ruby
26
+
27
+ DEPENDENCIES
28
+ bundler (~> 1.0.0)
29
+ jeweler (~> 1.5.2)
30
+ mechanize
31
+ rcov
32
+ rspec (~> 2.3.0)
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Danny Olson
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,46 @@
1
+ = metal-archives
2
+
3
+ metal-archives provides an interface to search for album releases for a specific year, defaulting to the current one if none is provided.
4
+
5
+ Here is a quick and dirty example:
6
+
7
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
8
+ $LOAD_PATH.unshift "./metal-archives/lib"
9
+ require 'mechanize'
10
+ require 'metal-archives'
11
+
12
+ agent = MetalArchives::Agent.new
13
+ links = []
14
+
15
+ agent.paginated_result_links.each do |search_result|
16
+ link = agent.album_links_from_url(search_result)
17
+ if link.nil?
18
+ puts "\nThrew an exception so exit"
19
+ break
20
+ else
21
+ print '.'
22
+ links << link
23
+ end
24
+ end
25
+ links.flatten!
26
+ puts "DONE: #{links.size}"
27
+
28
+ if links.size >= 1
29
+ puts "album information for the first result: #{agent.album_from_url(links.first).inspect}"
30
+ puts "album information for the first result: #{agent.album_from_url(links.last).inspect}"
31
+ end
32
+
33
+ == Contributing to metal-archives
34
+
35
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
36
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
37
+ * Fork the project
38
+ * Start a feature/bugfix branch
39
+ * Commit and push until you are happy with your contribution
40
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
41
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
42
+
43
+ == Copyright
44
+
45
+ Copyright (c) 2011 Danny Olson. See LICENSE.txt for
46
+ further details.
data/Rakefile ADDED
@@ -0,0 +1,51 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ Jeweler::Tasks.new do |gem|
14
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
+ gem.name = "metal-archives"
16
+ gem.homepage = "http://github.com/dbolson/metal-archives"
17
+ gem.license = "MIT"
18
+ gem.summary = %Q{metal-archives provides a way to search http://metal-archives.org/ for album release information.}
19
+ gem.description = %Q{metal-archives provides an interface to search for album releases for a specific year, defaulting to the current one if none is provided.}
20
+ gem.email = "dbolson@gmail.com"
21
+ gem.authors = ["Danny Olson"]
22
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
23
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
24
+ # gem.add_runtime_dependency 'jabber4r', '> 0.1'
25
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
26
+ gem.add_runtime_dependency 'mechanize', '~> 1.0.0'
27
+ end
28
+ Jeweler::RubygemsDotOrgTasks.new
29
+
30
+ require 'rspec/core'
31
+ require 'rspec/core/rake_task'
32
+ RSpec::Core::RakeTask.new(:spec) do |spec|
33
+ spec.pattern = FileList['spec/**/*_spec.rb']
34
+ end
35
+
36
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
37
+ spec.pattern = 'spec/**/*_spec.rb'
38
+ spec.rcov = true
39
+ end
40
+
41
+ task :default => :spec
42
+
43
+ require 'rake/rdoctask'
44
+ Rake::RDocTask.new do |rdoc|
45
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
46
+
47
+ rdoc.rdoc_dir = 'rdoc'
48
+ rdoc.title = "metal-archives #{version}"
49
+ rdoc.rdoc_files.include('README*')
50
+ rdoc.rdoc_files.include('lib/**/*.rb')
51
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,159 @@
1
+ require 'mechanize'
2
+
3
+ module MetalArchives
4
+ SITE_URL = 'http://metal-archives.com'
5
+
6
+ class Agent
7
+ # An agent accesses the website and holds the HTML source.
8
+ def initialize
9
+ begin
10
+ @agent = Mechanize.new
11
+ rescue Exception => e
12
+ puts "\nError accessing metal-archives.com on initialization: #{e}"
13
+ return nil
14
+ end
15
+ end
16
+
17
+ # Goes straight to the search results page for the given year.
18
+ def search_by_year(year=Time.now.year)
19
+ begin
20
+ @agent.get("#{SITE_URL}/advanced.php?release_year=#{year}")
21
+ rescue Exception => e
22
+ puts "\nError accessing metal-archives.com's search results page: #{e}"
23
+ return nil
24
+ end
25
+ end
26
+
27
+ # Finds all the links to the search results pages as they are paginated.
28
+ def paginated_result_links(year=Time.now.year)
29
+ links = ["/advanced.php?release_year=#{year}"] # need the first page because it's not a link
30
+ begin
31
+ search_by_year(year).search('body table:nth-child(2n) tr:first-child a').each do |link|
32
+ links << link['href']
33
+ end
34
+ rescue Exception => e
35
+ puts "\nError accessing metal-archives.com's paginated result links: #{e}"
36
+ ensure
37
+ return links
38
+ end
39
+ end
40
+
41
+ # Finds all the links to the albums on a given search results page.
42
+ def album_links_from_url(url)
43
+ links = []
44
+ begin
45
+ page = @agent.get(SITE_URL + url)
46
+ page.encoding = 'iso-8859-1' if !page.nil? && page.encoding != 'iso-8859-1' # needed for foreign characters
47
+ page.search('body table:nth-child(2n) tr td:nth-child(3n) a').each do |link|
48
+ links << link['href']
49
+ end
50
+ rescue Exception => e
51
+ puts "\nError accessing metal-archives.com's album links from url: #{e}"
52
+ return nil
53
+ end
54
+ return links
55
+ end
56
+
57
+ # Finds the following fields on an album's page:
58
+ # album name
59
+ # band name
60
+ # album's record label
61
+ # album's release date
62
+ # album's release type (full-length, demo, split, DVD, etc.)
63
+ def album_from_url(url)
64
+ page = @agent.get(SITE_URL + '/' + url)
65
+ page.encoding = 'iso-8859-1' if !page.nil? && page.encoding != 'iso-8859-1' # needed for foreign characters
66
+ band_and_album = page.search('body table tr:first-child .tt').text
67
+
68
+ # these fields can be in one of the following forms, so we need to find the specific fields appropriately:
69
+ # "\n\t\t", "Demo", ", Nazgûl Distro & Prod.", "", "2011", "\t\t\t"
70
+ # "\n\t\t", "Demo", ", Deific Mourning", "", "\n\n\t\tJanuary ", "2011", "\t\t\t"
71
+ # "Full-length", ", ARX Productions", "", "\n\n\t\tFebruary 25th, ", "2011", "\t\t\t"
72
+ begin
73
+ album_fields = page.search('body table:nth-child(2n) tr:first-child > td:first-child').first.children
74
+ rescue Exception => e
75
+ puts "\nError accessing metal-archives.com's album information: #{e}"
76
+ end
77
+
78
+ {
79
+ :album => album_from_content(band_and_album),
80
+ :band => band_from_content(band_and_album),
81
+ :label => label_from_content(album_fields),
82
+ :release_date => release_date_from_content(album_fields),
83
+ :release_type => release_type_from_content(album_fields),
84
+ :url => url
85
+ }
86
+ end
87
+
88
+ private
89
+
90
+ # The band and and album fields are together, so we need to split them apart.
91
+ def album_from_content(content)
92
+ content.split(' - ')[1].strip
93
+ end
94
+
95
+ # The band and and album fields are together, so we need to split them apart.
96
+ def band_from_content(content)
97
+ content.split(' - ')[0].strip
98
+ end
99
+
100
+ # The label will probably always have ", " in front, so we need to get rid of that but also allow
101
+ # just the text if it does not have this string.
102
+ def label_from_content(content)
103
+ label = content[2].text
104
+ label.match(/,\s(.+)/) ? $1 : label
105
+ end
106
+
107
+ # The date can be in one of the following forms:
108
+ # year
109
+ # month, year
110
+ # month, day, year
111
+ def release_date_from_content(content)
112
+ date = content[4].text
113
+ if content.size == 7
114
+ date << content[5].text
115
+
116
+ split_date = date.split(' ')
117
+ if split_date.size == 2 # only have month and year
118
+ date = DateTime.
119
+ new(
120
+ split_date[1].to_i,
121
+ Date::MONTHNAMES.find_index(split_date[0]),
122
+ -1
123
+ ).
124
+ strftime('%B %e %Y')
125
+
126
+ # need to use block to get s, the current captured backreference of the regexp because
127
+ # gsub doesn't see the $n-style references
128
+ date.gsub!(/\s(\d{1,2})\s/) do |s|
129
+ "#{MetalArchives.ordinalize(s.rstrip)}, "
130
+ end
131
+ end
132
+
133
+ else # only have year
134
+ date = "December 31st, #{date}"
135
+ end
136
+ date.strip
137
+ end
138
+
139
+ # Finds the release type in the assumed spot.
140
+ def release_type_from_content(content)
141
+ content[1].text
142
+ end
143
+ end
144
+
145
+ # Taken from Rails active_support/core_ext/string/inflections.rb but not referenced so the
146
+ # entire library is needed for this one method.
147
+ def self.ordinalize(number)
148
+ if (11..13).include?(number.to_i % 100)
149
+ "#{number}th"
150
+ else
151
+ case number.to_i % 10
152
+ when 1; "#{number}st"
153
+ when 2; "#{number}nd"
154
+ when 3; "#{number}rd"
155
+ else "#{number}th"
156
+ end
157
+ end
158
+ end
159
+ end
@@ -0,0 +1,75 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{metal-archives}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Danny Olson"]
12
+ s.date = %q{2011-02-11}
13
+ s.description = %q{metal-archives provides an interface to search for album releases for a specific year, defaulting to the current one if none is provided.}
14
+ s.email = %q{dbolson@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".rspec",
22
+ "Gemfile",
23
+ "Gemfile.lock",
24
+ "LICENSE.txt",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "lib/metal-archives.rb",
29
+ "metal-archives.gemspec",
30
+ "spec/html/album_result.html",
31
+ "spec/html/album_result2.html",
32
+ "spec/html/album_result3.html",
33
+ "spec/html/search_results.html",
34
+ "spec/metal-archives_spec.rb",
35
+ "spec/spec_helper.rb"
36
+ ]
37
+ s.homepage = %q{http://github.com/dbolson/metal-archives}
38
+ s.licenses = ["MIT"]
39
+ s.require_paths = ["lib"]
40
+ s.rubygems_version = %q{1.3.7}
41
+ s.summary = %q{metal-archives provides a way to search http://metal-archives.org/ for album release information.}
42
+ s.test_files = [
43
+ "spec/metal-archives_spec.rb",
44
+ "spec/spec_helper.rb"
45
+ ]
46
+
47
+ if s.respond_to? :specification_version then
48
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
49
+ s.specification_version = 3
50
+
51
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
52
+ s.add_runtime_dependency(%q<mechanize>, [">= 0"])
53
+ s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
54
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
55
+ s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
56
+ s.add_development_dependency(%q<rcov>, [">= 0"])
57
+ s.add_runtime_dependency(%q<mechanize>, ["~> 1.0.0"])
58
+ else
59
+ s.add_dependency(%q<mechanize>, [">= 0"])
60
+ s.add_dependency(%q<rspec>, ["~> 2.3.0"])
61
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
62
+ s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
63
+ s.add_dependency(%q<rcov>, [">= 0"])
64
+ s.add_dependency(%q<mechanize>, ["~> 1.0.0"])
65
+ end
66
+ else
67
+ s.add_dependency(%q<mechanize>, [">= 0"])
68
+ s.add_dependency(%q<rspec>, ["~> 2.3.0"])
69
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
70
+ s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
71
+ s.add_dependency(%q<rcov>, [">= 0"])
72
+ s.add_dependency(%q<mechanize>, ["~> 1.0.0"])
73
+ end
74
+ end
75
+
@@ -0,0 +1,123 @@
1
+ <html>
2
+ <head>
3
+ <title>Encyclopaedia Metallum - A Tree - Fn-2+Fn-1=Fn</title>
4
+ <meta name="keywords" content="A Tree Fn-2+Fn-1=Fn Uruk-Hai - Vocals and all instruments ({\link Nefertum}, {\link Omnia Malis Est}, {\link Infernal Angels}, Tavern-Hell) Depressive Black Metal/Rock Italy Nazg&ucirc;l Distro &amp; Prod. album cover information discography lyrics links reviews">
5
+ <meta name="description" content="A Tree - Fn-2+Fn-1=Fn - songs/tracklisting, lyrics, details, reviews">
6
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
7
+ <link rel="stylesheet" type="text/css" href="/css/basic.css" />
8
+ <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon" />
9
+ <script language="Javascript" type="text/Javascript">
10
+ <!--
11
+ var flags = new Array();
12
+ function toggle(id) {
13
+ if (flags[id] == undefined) {
14
+ flags[id] = false;
15
+ }
16
+ var list = document.getElementById(id);
17
+ if (flags[id]) {
18
+ list.style.display = 'none';
19
+ } else {
20
+ list.style.display = 'block';
21
+ }
22
+ flags[id] = !flags[id];
23
+ return false;
24
+ }
25
+
26
+ function openLyrics(x)
27
+ {
28
+ var popup;
29
+ popup = window.open("viewlyrics.php?id="+x, "Lyrics", "width=500,height=500,resizable,scrollbars,top=50");
30
+ }
31
+
32
+ // -->
33
+ </script>
34
+
35
+ <!--[if IE]>
36
+ <script type="text/javascript" src="/js/utils.js"></script>
37
+ <script type="text/javascript">
38
+ function resizeCover() {
39
+ resizeImage("coverArt", 250, 250);
40
+ }
41
+
42
+ window.onload = resizeCover;
43
+ </script>
44
+ <![endif]-->
45
+ </head>
46
+
47
+ <body>
48
+ <center>
49
+ <table border='3' bordercolor='#551521' cellspacing='3' cellpadding='4'>
50
+ <tr><td colspan='2' class='tt'>A Tree - Fn-2+Fn-1=Fn </td></tr></table><br>
51
+ <table>
52
+ <tr> <td valign="top">
53
+ <b>Demo</b>, Nazgûl Distro & Prod.<br>
54
+
55
+ <b>2011</b> </td>
56
+ <td valign="top">
57
+ <table cellpadding="4">
58
+ <tr>
59
+ <td valign="top">[<a href='newreview.php?id=295756'>add/edit review</a>] </td>
60
+ <td valign="top">
61
+
62
+ </td>
63
+ </tr>
64
+ </table>
65
+ </td>
66
+ </tr>
67
+ <tr><td colspan="2" class="trt" height="2"></td></tr>
68
+ <tr><td colspan="2" height="6"></td></tr>
69
+ <tr>
70
+ <td valign="top"><table cellpadding="2">
71
+ <tr><td>1.</td><td>Evolution Begins</td><td align='center'></td><td align='center' nowrap='true'></tr><tr><td>2.</td><td>Among Trees</td><td align='center'></td><td align='center' nowrap='true'></tr><tr><td>3.</td><td>The Deal Between Nature and Myself</td><td align='center'></td><td align='center' nowrap='true'></tr><tr><td>4.</td><td>Math(er) Nature</td><td align='center'></td><td align='center' nowrap='true'></tr><tr><td>5.</td><td>Empty Words as Empty we Are</td><td align='center'></td><td align='center' nowrap='true'></tr><tr><td>6.</td><td>Rainroom (Katatonia Cover)</td><td align='center'></td><td align='center' nowrap='true'></tr> </table></td>
72
+
73
+ <td valign="top">
74
+ <a href="http://www.metal-archives.com/images/2/9/5/7/295756.jpg">
75
+ <img border='0' src='http://www.metal-archives.com/images/2/9/5/7/295756.jpg' alt="Fn-2+Fn-1=Fn cover (Click to see larger picture)" title="Fn-2+Fn-1=Fn cover (Click to see larger picture)" id="coverArt" class="coverArt">
76
+ </a>
77
+ <!--[if IE]><script type="text/javascript">document.getElementById("coverArt").style.visibility = "hidden";</script><![endif]-->
78
+ </td>
79
+ </tr>
80
+ <tr><td colspan="2" class="trt" height="2"></td></tr>
81
+ <tr><td colspan="2">Buy from...</td></tr>
82
+
83
+ <tr><td colspan="2"><ul style="padding-bottom:0px; margin-bottom:0px;">
84
+ <li>
85
+ <a target="_blank" href="/redirect.php?pid=29&rid=295756">search on eBay</a>&nbsp;&nbsp; more... <a href="more" onclick="return toggle('subEbay');">>></a><br />
86
+ <ul id="subEbay" style="display: none; margin-top: 0px; margin-bottom: 0px;">
87
+ <li><a target="_blank" href="/redirect.php?pid=39&rid=295756">search on Half.com</a></li>
88
+ <li><a target="_blank" href="/redirect.php?pid=23&rid=295756">eBay Canada</a></li>
89
+
90
+ <li><a target="_blank" href="/redirect.php?pid=24&rid=295756">eBay France</a></li>
91
+ <li><a target="_blank" href="/redirect.php?pid=25&rid=295756">eBay UK</a></li>
92
+ <li><a target="_blank" href="/redirect.php?pid=37&rid=295756">eBay Spain</a></li>
93
+ <li><a target="_blank" href="/redirect.php?pid=35&rid=295756">eBay Belgium</a></li>
94
+ <li><a target="_blank" href="/redirect.php?pid=26&rid=295756">eBay Netherlands</a></li>
95
+ <li><a target="_blank" href="/redirect.php?pid=27&rid=295756">eBay Italy</a></li>
96
+
97
+ <li><a target="_blank" href="/redirect.php?pid=28&rid=295756">eBay Australia</a></li>
98
+ </ul>
99
+ </li>
100
+ <li><a target="_blank" href="/redirect.php?pid=7&rid=295756">search on Gemm.com</a></li>
101
+ </ul>
102
+ </td></tr>
103
+ </table>
104
+ <br><br>
105
+ <a href="band.php?id=3540321251">Back to A Tree's page</a>
106
+
107
+ </center>
108
+ <!--
109
+ script type="text/javascript">
110
+ var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
111
+ document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
112
+ </script>
113
+ -->
114
+ <script type="text/javascript" src="http://www.google-analytics.com/ga.js"></script>
115
+ <script type="text/javascript">
116
+ try {
117
+ var pageTracker = _gat._getTracker("UA-4046749-1");
118
+ pageTracker._initData();
119
+ pageTracker._trackPageview();
120
+ }
121
+ catch(e) {}
122
+ </script></body>
123
+ </html>