bc_crawler 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +252 -0
- data/Rakefile +3 -0
- data/bc_crawler.gemspec +26 -0
- data/lib/bc_crawler.rb +11 -0
- data/lib/bc_crawler/helper.rb +7 -0
- data/lib/bc_crawler/main.rb +45 -0
- data/lib/bc_crawler/release.rb +87 -0
- data/lib/bc_crawler/track.rb +30 -0
- data/lib/bc_crawler/version.rb +3 -0
- data/spec/bc_crawler_spec.rb +30 -0
- data/spec/spec_helper.rb +1 -0
- data/tasks/rspec.rake +3 -0
- metadata +133 -0
data/.gitignore
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
*.bundle
|
19
|
+
*.so
|
20
|
+
*.o
|
21
|
+
*.a
|
22
|
+
mkmf.log
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Mario Schuettel
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,252 @@
|
|
1
|
+
# BcCrawler
|
2
|
+
|
3
|
+
A simple Ruby Gem to crawl bandcamp.com sites. It will load information about the artist/label/band, their releases (albums) and all tracks.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'bc_crawler'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install bc_crawler
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
### Crawl an artist/label/band
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require 'bc_crawler'
|
25
|
+
|
26
|
+
main = BcCrawler::Main.new('https://amandapalmer.bandcamp.com/')
|
27
|
+
=> URL: https://amandapalmer.bandcamp.com/
|
28
|
+
|
29
|
+
main.releases.first
|
30
|
+
=> URL : https://amandapalmer.bandcamp.com//album/an-evening-with-neil-gaiman-and-amanda-palmer
|
31
|
+
Data :
|
32
|
+
```
|
33
|
+
|
34
|
+
Initially, the data attribute is empty, because only the "main"-page has been crawled.
|
35
|
+
|
36
|
+
### Crawl a release
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
main.releases.first.crawl
|
40
|
+
|
41
|
+
main.releases.first
|
42
|
+
=> URL : https://amandapalmer.bandcamp.com//album/an-evening-with-neil-gaiman-and-amanda-palmer
|
43
|
+
Data : { Hash }
|
44
|
+
```
|
45
|
+
|
46
|
+
### Crawl all releases from an artist/label/band at once
|
47
|
+
|
48
|
+
```ruby
|
49
|
+
main.crawl
|
50
|
+
# Crawling https://amandapalmer.bandcamp.com//album/an-evening-with-neil-gaiman-and-amanda-palmer
|
51
|
+
# Crawling https://amandapalmer.bandcamp.com//album/theatre-is-evil-2
|
52
|
+
# Crawling https://amandapalmer.bandcamp.com//album/amanda-palmer-goes-down-under
|
53
|
+
# Crawling https://amandapalmer.bandcamp.com//album/amanda-palmer-performs-the-popular-hits-of-radiohead-on-her-magical-ukulele
|
54
|
+
# Crawling https://amandapalmer.bandcamp.com//album/nighty-night
|
55
|
+
# Crawling https://amandapalmer.bandcamp.com//album/who-killed-amanda-palmer
|
56
|
+
# Crawling https://amandapalmer.bandcamp.com//album/who-killed-amanda-palmer-alternate-tracks
|
57
|
+
# Crawling https://amandapalmer.bandcamp.com//album/map-of-tasmania-the-remix-project
|
58
|
+
# Crawling https://amandapalmer.bandcamp.com//album/7-series-part-3
|
59
|
+
```
|
60
|
+
|
61
|
+
Certain information about releases and tracks can directly be accessed by attributes.
|
62
|
+
|
63
|
+
### Release information
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
release = main.releases.first
|
67
|
+
|
68
|
+
release.artist
|
69
|
+
=> "Neil Gaiman and Amanda Palmer"
|
70
|
+
|
71
|
+
release.band_id
|
72
|
+
=> 3463798201
|
73
|
+
|
74
|
+
release.type
|
75
|
+
=> "album"
|
76
|
+
|
77
|
+
release.title
|
78
|
+
=> "An Evening With Neil Gaiman and Amanda Palmer"
|
79
|
+
|
80
|
+
release.id # "Relase ID"
|
81
|
+
=> 3510389344
|
82
|
+
|
83
|
+
release.release_date
|
84
|
+
=> "19 Nov 2013 00:00:00 GMT"
|
85
|
+
|
86
|
+
release.featured_track_id
|
87
|
+
=> 658956410
|
88
|
+
|
89
|
+
release.about
|
90
|
+
=> nil
|
91
|
+
|
92
|
+
release.credits
|
93
|
+
=> nil
|
94
|
+
|
95
|
+
release.art_fullsize_url
|
96
|
+
=> "https://f1.bcbits.com/img/a3489132960_10.jpg"
|
97
|
+
|
98
|
+
release.art_thumb_url
|
99
|
+
=> "https://f1.bcbits.com/img/a3489132960_3.jpg"
|
100
|
+
|
101
|
+
release.art_id
|
102
|
+
=> nil
|
103
|
+
|
104
|
+
release.has_audio
|
105
|
+
=> true
|
106
|
+
|
107
|
+
release.purchase_url
|
108
|
+
=> nil
|
109
|
+
```
|
110
|
+
|
111
|
+
A release holds one track or more in an array. Each track has these attributes
|
112
|
+
|
113
|
+
### Track information
|
114
|
+
```ruby
|
115
|
+
random_track = release.tracks[rand(0..release.tracks.count)]
|
116
|
+
|
117
|
+
random_track.id # "Track ID"
|
118
|
+
=> 658956410
|
119
|
+
|
120
|
+
random_track.track_num
|
121
|
+
=> 32
|
122
|
+
|
123
|
+
random_track.title
|
124
|
+
=> "Judy Blume"
|
125
|
+
|
126
|
+
random_track.duration
|
127
|
+
=> 395.093
|
128
|
+
|
129
|
+
random_track.url
|
130
|
+
=> "https://amandapalmer.bandcamp.com//track/judy-blume-2"
|
131
|
+
|
132
|
+
random_track.is_downloadable
|
133
|
+
=> true
|
134
|
+
|
135
|
+
random_track.streaming
|
136
|
+
=> 1
|
137
|
+
|
138
|
+
random_track.file
|
139
|
+
=> {"mp3-128"=>"http://popplers5.bandcamp.com/download/track?enc=mp3-128&fsig=6667d236f0f0128472b2d505feb8f43a&id=658956410&stream=1&ts=1417597933.0"}
|
140
|
+
|
141
|
+
random_track.is_draft
|
142
|
+
=> false
|
143
|
+
|
144
|
+
random_track.title_link
|
145
|
+
=> "/track/judy-blume-2"
|
146
|
+
```
|
147
|
+
|
148
|
+
|
149
|
+
If the information above is not enough, you can access the entire data object from Bandcamp in the release.data attribute
|
150
|
+
|
151
|
+
release.data structure
|
152
|
+
```JSON
|
153
|
+
{
|
154
|
+
"artFullsizeUrl": "https://f1.bcbits.com/img/a3489132960_10.jpg",
|
155
|
+
"artThumbURL": "https://f1.bcbits.com/img/a3489132960_3.jpg",
|
156
|
+
"current": {
|
157
|
+
"is_set_price": null,
|
158
|
+
"purchase_title": null,
|
159
|
+
"minimum_price_nonzero": 10,
|
160
|
+
"killed": null,
|
161
|
+
"publish_date": "07 Nov 2013 15:27:37 GMT",
|
162
|
+
"mod_date": "22 Nov 2013 20:01:15 GMT",
|
163
|
+
"art_id": 3489132960,
|
164
|
+
"minimum_price": 10,
|
165
|
+
"featured_track_id": 658956410,
|
166
|
+
"auto_repriced": null,
|
167
|
+
"require_email": null,
|
168
|
+
"download_pref": 2,
|
169
|
+
"title": "An Evening With Neil Gaiman and Amanda Palmer",
|
170
|
+
"new_desc_format": 1,
|
171
|
+
"about": null,
|
172
|
+
"require_email_0": null,
|
173
|
+
"private": null,
|
174
|
+
"artist": "Neil Gaiman and Amanda Palmer",
|
175
|
+
"id": 3510389344,
|
176
|
+
"band_id": 3463798201,
|
177
|
+
"credits": null,
|
178
|
+
"upc": null,
|
179
|
+
"set_price": 7,
|
180
|
+
"new_date": "07 Nov 2013 14:50:34 GMT",
|
181
|
+
"type": "album",
|
182
|
+
"purchase_url": null,
|
183
|
+
"release_date": "19 Nov 2013 00:00:00 GMT",
|
184
|
+
"download_desc_id": null
|
185
|
+
},
|
186
|
+
"hasAudio": true,
|
187
|
+
"trackinfo": [
|
188
|
+
"(all tracks go here... see 'trackinfo')"
|
189
|
+
],
|
190
|
+
"url": "http://amandapalmer.bandcamp.com/album/an-evening-with-neil-gaiman-and-amanda-palmer"
|
191
|
+
}
|
192
|
+
```
|
193
|
+
|
194
|
+
Assuming you want the "minimum_price" of a release
|
195
|
+
```ruby
|
196
|
+
release.data['current']['minimum_price']
|
197
|
+
=> 10.0
|
198
|
+
```
|
199
|
+
|
200
|
+
The "trackinfo" in release.data looks like this
|
201
|
+
```JSON
|
202
|
+
{
|
203
|
+
"video_poster_url": null,
|
204
|
+
"is_draft": false,
|
205
|
+
"title_link": "/track/my-last-landlady-3",
|
206
|
+
"download_tooltip": "",
|
207
|
+
"video_caption": null,
|
208
|
+
"has_lyrics": false,
|
209
|
+
"sizeof_lyrics": 0,
|
210
|
+
"duration": 391.821,
|
211
|
+
"license_type": 1,
|
212
|
+
"video_featured": null,
|
213
|
+
"has_info": false,
|
214
|
+
"title": "My Last Landlady",
|
215
|
+
"video_source_type": null,
|
216
|
+
"track_num": 1,
|
217
|
+
"private": null,
|
218
|
+
"alt_link": null,
|
219
|
+
"video_id": null,
|
220
|
+
"is_downloadable": false,
|
221
|
+
"video_source_id": null,
|
222
|
+
"lyrics": null,
|
223
|
+
"album_preorder": false,
|
224
|
+
"id": 1844797083,
|
225
|
+
"encoding_error": null,
|
226
|
+
"has_free_download": null,
|
227
|
+
"video_mobile_url": null,
|
228
|
+
"streaming": 1,
|
229
|
+
"unreleased_track": false,
|
230
|
+
"file": {
|
231
|
+
"mp3-128": "http://popplers5.bandcamp.com/download/track?enc=mp3-128&fsig=25ddaa2b8fa8a008562e4e0c6efc2eff&id=1844797083&stream=1&ts=1417597933.0"
|
232
|
+
},
|
233
|
+
"encoding_pending": null,
|
234
|
+
"free_album_download": false,
|
235
|
+
"encodings_id": 3584714018
|
236
|
+
}
|
237
|
+
```
|
238
|
+
|
239
|
+
Assuming you want to know if the first track of a release "has_lyrics":
|
240
|
+
|
241
|
+
```ruby
|
242
|
+
release.data['trackinfo'][0]['has_lyrics']
|
243
|
+
=> false
|
244
|
+
```
|
245
|
+
|
246
|
+
## Contributing
|
247
|
+
|
248
|
+
1. Fork it ( https://github.com/[my-github-username]/bc_crawler/fork )
|
249
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
250
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
251
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
252
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/bc_crawler.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'bc_crawler/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'bc_crawler'
|
8
|
+
spec.version = BcCrawler::VERSION
|
9
|
+
spec.authors = ['Mario Schuettel']
|
10
|
+
spec.email = ["github@lxxxvi.ch"]
|
11
|
+
spec.summary = 'Crawl Bandcamp Sites'
|
12
|
+
spec.description = 'Allows to crawl bandcamp sites, including release and track information'
|
13
|
+
spec.homepage = ''
|
14
|
+
spec.license = 'MIT'
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ['lib']
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency 'rspec'
|
24
|
+
|
25
|
+
spec.add_runtime_dependency 'json', '>= 1.8.1'
|
26
|
+
end
|
data/lib/bc_crawler.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# BcCrawler (Bandcamp Crawler) can be used to fetch release data
|
2
|
+
# from a given artist, band or label on bandcamp.com.
|
3
|
+
# It will fetch the main information such as band name, release name,
|
4
|
+
# track name, track duration, track number, etc.
|
5
|
+
|
6
|
+
module BcCrawler
|
7
|
+
class Main
|
8
|
+
attr_accessor :releases, :url
|
9
|
+
|
10
|
+
def initialize(url)
|
11
|
+
@url = url
|
12
|
+
@releases = []
|
13
|
+
|
14
|
+
# call the page
|
15
|
+
html = open(@url).read
|
16
|
+
release_paths = Set.new
|
17
|
+
|
18
|
+
# get all "a" elements that target an /album/... URL
|
19
|
+
html.scan(/<a href="\/album\/(.*?)"/).each { |r| release_paths << "/album/#{r.first}" }
|
20
|
+
|
21
|
+
# TODO: implement single tracks, that are not assigned to an album, but directly to the artist
|
22
|
+
|
23
|
+
# initialize the release(s)
|
24
|
+
release_paths.each do |path|
|
25
|
+
@releases << BcCrawler::Release.new("#{ @url }#{ path }")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def crawl
|
30
|
+
# fetch information about the release
|
31
|
+
@releases.each do |release|
|
32
|
+
release.crawl
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_s
|
37
|
+
<<-EOF
|
38
|
+
URL : #{ @url }
|
39
|
+
Number of releases : #{ @releases.count }
|
40
|
+
EOF
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
@@ -0,0 +1,87 @@
|
|
1
|
+
module BcCrawler
|
2
|
+
|
3
|
+
class Release
|
4
|
+
|
5
|
+
attr_reader :art_fullsize_url, :art_thumb_url, :art_id, :about, :featured_track_id,
|
6
|
+
:credits, :artist, :purchase_url, :band_id, :id, :release_date,
|
7
|
+
:type, :title, :tracks, :has_audio, :url, :html, :data
|
8
|
+
|
9
|
+
def initialize(url)
|
10
|
+
@url = url
|
11
|
+
@tracks = []
|
12
|
+
end
|
13
|
+
|
14
|
+
# Scan the HTML for a particular JavaScript snippet where a variable named "TralbumData" is assigned.
|
15
|
+
# TralbumData contains all information about the release (and its tracks), but has to be cleaned first
|
16
|
+
# in order to get a valid JSON object.
|
17
|
+
#
|
18
|
+
# By default, only the main nodes in TralbumData are crawled. There are more nodes available.
|
19
|
+
#
|
20
|
+
# nodes = %w(album_is_preorder album_release_date artFullsizeUrl artist artThumbURL
|
21
|
+
# current defaultPrice featured_track_id FREE freeDownloadPage hasAudio
|
22
|
+
# id initial_track_num is_preorder item_type last_subscription_item
|
23
|
+
# maxPrice minPrice packages PAID playing_from preorder_count trackinfo url)
|
24
|
+
def crawl(nodes = %w(artFullsizeUrl artThumbURL current hasAudio trackinfo url))
|
25
|
+
puts "Crawling #{@url}"
|
26
|
+
@nodes = nodes
|
27
|
+
|
28
|
+
# call the URL, fetch the JavaScript code (TralbumData) and clean the string
|
29
|
+
@html = open(@url).read
|
30
|
+
js_content = html.gsub(/\n/, '~~')[/var TralbumData = \{(.*?)\};/, 1] # get content of JS variable TralbumData
|
31
|
+
.gsub('~~', "\n") # undo line endings replacement
|
32
|
+
.gsub("\t", '') # remove tabs
|
33
|
+
.gsub("\" + \"", '') # special bug in "url" node
|
34
|
+
|
35
|
+
# scan the JavaScript code text for the given nodes
|
36
|
+
json_nodes = []
|
37
|
+
@nodes.each do |node|
|
38
|
+
json_nodes << js_content[/^( )*#{node}( )*:.*$/] # fetch current node in JavaScript object
|
39
|
+
.gsub(/#{node}/, "\"#{node}\"") # add double quotes around node name
|
40
|
+
.gsub(/( )*,( )*$/, '') # remove empty lines with comma
|
41
|
+
end
|
42
|
+
|
43
|
+
@data = JSON.parse("{ #{ json_nodes.join(', ') } }")
|
44
|
+
|
45
|
+
# Finally, we load the release info
|
46
|
+
load_release_info
|
47
|
+
end
|
48
|
+
|
49
|
+
# Assign some of the main information to instance variables
|
50
|
+
# TODO: make ALL information available as instance variables
|
51
|
+
def load_release_info
|
52
|
+
@art_fullsize_url = @data['artFullsizeUrl']
|
53
|
+
@art_thumb_url = @data['artThumbURL']
|
54
|
+
@art_id = @data['current']['art_it']
|
55
|
+
@about = @data['current']['about']
|
56
|
+
@featured_track_id = @data['current']['featured_track_id']
|
57
|
+
@credits = @data['current']['credits']
|
58
|
+
@artist = @data['current']['artist']
|
59
|
+
@purchase_url = @data['current']['purchase_url']
|
60
|
+
@band_id = @data['current']['band_id']
|
61
|
+
@id = @data['current']['id']
|
62
|
+
@release_date = @data['current']['release_date']
|
63
|
+
@type = @data['current']['type']
|
64
|
+
@title = @data['current']['title']
|
65
|
+
@has_audio = @data['hasAudio']
|
66
|
+
load_track_info
|
67
|
+
end
|
68
|
+
|
69
|
+
# Tracks have their own class
|
70
|
+
def load_track_info
|
71
|
+
@data['trackinfo'].each do |track|
|
72
|
+
@tracks << Track.new(self, track)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_s
|
77
|
+
<<-EOF
|
78
|
+
URL : #{ @url }
|
79
|
+
Artist : #{ @artist }
|
80
|
+
Release title : #{ @title }
|
81
|
+
Number of tracks : #{ @tracks.count }
|
82
|
+
#{ '(use .crawl method to fetch the missing data)' if @artist.nil? }
|
83
|
+
EOF
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module BcCrawler
|
2
|
+
class Track
|
3
|
+
|
4
|
+
attr_reader :duration, :track_num, :is_downloadable, :streaming,
|
5
|
+
:is_draft, :id, :title_link, :file, :title, :url
|
6
|
+
|
7
|
+
def initialize(release, track)
|
8
|
+
@release = release
|
9
|
+
@duration = track['duration']
|
10
|
+
@track_num = track['track_num']
|
11
|
+
@is_downloadable = track['is_downloadable']
|
12
|
+
@streaming = track['streaming']
|
13
|
+
@is_draft = track['is_draft']
|
14
|
+
@id = track['id']
|
15
|
+
@title_link = track['title_link']
|
16
|
+
@file = track['file']
|
17
|
+
@title = track['title']
|
18
|
+
@url = "#{ BcCrawler::Helper.get_base_url(@release.url) }#{ track['title_link'] }"
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_s
|
22
|
+
<<-EOF
|
23
|
+
URL : #{ @url }
|
24
|
+
Track number : #{ @track_num }
|
25
|
+
Track name : #{ @title }
|
26
|
+
Duration : #{ @duration }
|
27
|
+
EOF
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe BcCrawler do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@test_release_url = 'http://amandapalmer.bandcamp.com/album/amanda-palmer-performs-the-popular-hits-of-radiohead-on-her-magical-ukulele'
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'returns the base url' do
|
10
|
+
base_url = BcCrawler::Helper.get_base_url('https://abc.bandcamp.com/album/of-the-year')
|
11
|
+
expect(base_url).to eq('https://abc.bandcamp.com/')
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'crawls the main page' do
|
15
|
+
main_page = BcCrawler::Main.new('http://amandapalmer.bandcamp.com/')
|
16
|
+
expect(main_page.releases.count).to be > 0
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'crawls the release page' do
|
20
|
+
album_page = BcCrawler::Release.new(@test_release_url)
|
21
|
+
album_page.crawl
|
22
|
+
expect(album_page.title).to eq('Amanda Palmer Performs The Popular Hits Of Radiohead On Her Magical Ukulele')
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'stores the trackinfo' do
|
26
|
+
album_page = BcCrawler::Release.new(@test_release_url)
|
27
|
+
album_page.crawl
|
28
|
+
expect(album_page.tracks.first.track_num).to be == 1
|
29
|
+
end
|
30
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bc_crawler'
|
data/tasks/rspec.rake
ADDED
metadata
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bc_crawler
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.4
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Mario Schuettel
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2015-01-03 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.6'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.6'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rspec
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: json
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.8.1
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 1.8.1
|
78
|
+
description: Allows to crawl bandcamp sites, including release and track information
|
79
|
+
email:
|
80
|
+
- github@lxxxvi.ch
|
81
|
+
executables: []
|
82
|
+
extensions: []
|
83
|
+
extra_rdoc_files: []
|
84
|
+
files:
|
85
|
+
- .gitignore
|
86
|
+
- Gemfile
|
87
|
+
- LICENSE.txt
|
88
|
+
- README.md
|
89
|
+
- Rakefile
|
90
|
+
- bc_crawler.gemspec
|
91
|
+
- lib/bc_crawler.rb
|
92
|
+
- lib/bc_crawler/helper.rb
|
93
|
+
- lib/bc_crawler/main.rb
|
94
|
+
- lib/bc_crawler/release.rb
|
95
|
+
- lib/bc_crawler/track.rb
|
96
|
+
- lib/bc_crawler/version.rb
|
97
|
+
- spec/bc_crawler_spec.rb
|
98
|
+
- spec/spec_helper.rb
|
99
|
+
- tasks/rspec.rake
|
100
|
+
homepage: ''
|
101
|
+
licenses:
|
102
|
+
- MIT
|
103
|
+
post_install_message:
|
104
|
+
rdoc_options: []
|
105
|
+
require_paths:
|
106
|
+
- lib
|
107
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
108
|
+
none: false
|
109
|
+
requirements:
|
110
|
+
- - ! '>='
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: '0'
|
113
|
+
segments:
|
114
|
+
- 0
|
115
|
+
hash: 4069188658231555620
|
116
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
117
|
+
none: false
|
118
|
+
requirements:
|
119
|
+
- - ! '>='
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
segments:
|
123
|
+
- 0
|
124
|
+
hash: 4069188658231555620
|
125
|
+
requirements: []
|
126
|
+
rubyforge_project:
|
127
|
+
rubygems_version: 1.8.24
|
128
|
+
signing_key:
|
129
|
+
specification_version: 3
|
130
|
+
summary: Crawl Bandcamp Sites
|
131
|
+
test_files:
|
132
|
+
- spec/bc_crawler_spec.rb
|
133
|
+
- spec/spec_helper.rb
|