torrent_crawler 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +4 -0
- data/LICENSE +20 -0
- data/README.md +23 -0
- data/Rakefile +31 -0
- data/VERSION +1 -0
- data/lib/crawlers/base.rb +51 -0
- data/lib/crawlers/linux_tracker.rb +60 -0
- data/lib/crawlers/mininova.rb +60 -0
- data/lib/torrent_crawler.rb +19 -0
- data/lib/torrent_crawler/torrent.rb +61 -0
- data/spec/crawlers/base_spec.rb +19 -0
- data/spec/crawlers/linux_tracker_spec.rb +68 -0
- data/spec/crawlers/mininova_spec.rb +70 -0
- data/spec/file_fixtures/linux_tracker/details.html +1296 -0
- data/spec/file_fixtures/linux_tracker/index.html +1896 -0
- data/spec/file_fixtures/mininova/details_det.html +239 -0
- data/spec/file_fixtures/mininova/details_tor.html +268 -0
- data/spec/file_fixtures/mininova/index.html +173 -0
- data/spec/meta_tracker/torrent_spec.rb +25 -0
- data/spec/meta_tracker_spec.rb +4 -0
- data/spec/spec_helper.rb +17 -0
- data/torrent_crawler.gemspec +71 -0
- metadata +110 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 rspeicher
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# torrent_crawler
|
2
|
+
|
3
|
+
Crawl multiple torrent sites.
|
4
|
+
|
5
|
+
## Default Crawlers
|
6
|
+
|
7
|
+
The included crawlers act as more of an example than anything else. Unless you
|
8
|
+
really want to know which Linux ISOs just got released.
|
9
|
+
|
10
|
+
* [LinuxTracker](http://linuxtracker.org)
|
11
|
+
* [Mininova](http://mininova.org)
|
12
|
+
|
13
|
+
## Note on Patches/Pull Requests
|
14
|
+
|
15
|
+
* Fork
|
16
|
+
* Code
|
17
|
+
* Commit
|
18
|
+
* Push
|
19
|
+
* Pull Request
|
20
|
+
|
21
|
+
## Copyright
|
22
|
+
|
23
|
+
Copyright (c) 2010 Robert Speicher. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "torrent_crawler"
|
8
|
+
gem.summary = %Q{Crawl multiple torrent sites}
|
9
|
+
gem.description = %Q{Crawl multiple torrent sites.}
|
10
|
+
gem.email = "rspeicher@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/tsigo/torrent_crawler"
|
12
|
+
gem.authors = ["rspeicher"]
|
13
|
+
gem.add_development_dependency "rspec", "~> 2.0.0"
|
14
|
+
end
|
15
|
+
Jeweler::GemcutterTasks.new
|
16
|
+
rescue LoadError
|
17
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
18
|
+
end
|
19
|
+
|
20
|
+
require 'rspec/core/rake_task'
|
21
|
+
RSpec::Core::RakeTask.new(:spec)
|
22
|
+
task :default => :spec
|
23
|
+
|
24
|
+
begin
|
25
|
+
require 'yard'
|
26
|
+
YARD::Rake::YardocTask.new
|
27
|
+
rescue LoadError
|
28
|
+
task :yardoc do
|
29
|
+
abort "YARD is not available. In order to run yardoc, you must: sudo gem install yard"
|
30
|
+
end
|
31
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.0
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module TorrentCrawler
|
2
|
+
module Crawlers
|
3
|
+
class Base
|
4
|
+
attr_accessor :results
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@results = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def headers
|
11
|
+
{
|
12
|
+
'User-Agent' => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3",
|
13
|
+
'Accept-Language' => 'en-us,en;q=0.5',
|
14
|
+
'Referer' => index_url
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
def index_url
|
19
|
+
raise BadCrawlerError, "A method named index_url has not been implemented in this Crawler class"
|
20
|
+
end
|
21
|
+
|
22
|
+
def detail_url
|
23
|
+
raise BadCrawlerError, "A method named detail_url has not been implemented in this Crawler class"
|
24
|
+
end
|
25
|
+
|
26
|
+
def index(last_seen = nil)
|
27
|
+
raise BadCrawlerError, "A method named index has not been implemented in this Crawler class"
|
28
|
+
end
|
29
|
+
|
30
|
+
def detail(tracker_id)
|
31
|
+
raise BadCrawlerError, "A method named detail has not been implemented in this Crawler class"
|
32
|
+
end
|
33
|
+
|
34
|
+
def result(&block)
|
35
|
+
torrent = TorrentCrawler::Torrent.new
|
36
|
+
torrent.tracker_key = self.tracker_key
|
37
|
+
|
38
|
+
yield torrent
|
39
|
+
end
|
40
|
+
|
41
|
+
def tracker_key
|
42
|
+
self.class.to_s.gsub(/::/, '/').
|
43
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
44
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
45
|
+
tr("-", "_").
|
46
|
+
downcase.
|
47
|
+
gsub(/.*\/([^\/]+)$/, '\1')
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module TorrentCrawler::Crawlers
|
2
|
+
class LinuxTracker < Base
|
3
|
+
def index_url
|
4
|
+
"http://linuxtracker.org/"
|
5
|
+
end
|
6
|
+
|
7
|
+
def detail_url(tracker_id)
|
8
|
+
"http://linuxtracker.org/index.php?page=torrent-details&id=#{tracker_id}"
|
9
|
+
end
|
10
|
+
|
11
|
+
def index(last_seen = nil)
|
12
|
+
doc = Nokogiri::HTML(open(self.index_url, self.headers))
|
13
|
+
doc.css('#rightcol :nth-child(4) table > tr').each do |tr|
|
14
|
+
next if tr.css(':nth-child(2) a').first.nil?
|
15
|
+
|
16
|
+
result do |torrent|
|
17
|
+
torrent.tracker_id = tr.css(':nth-child(2) a').first['href'].gsub(/.*id=([a-z0-9]+).*/, '\1')
|
18
|
+
|
19
|
+
return results if torrent.tracker_id == last_seen
|
20
|
+
|
21
|
+
torrent.hash = torrent.tracker_id
|
22
|
+
torrent.title = tr.css(':nth-child(2) a').first.text.strip
|
23
|
+
# torrent.uploader
|
24
|
+
torrent.size = tr.css(':nth-child(5)').first.text.strip
|
25
|
+
# torrent.files
|
26
|
+
torrent.seeders = tr.css(':nth-child(6)').first.text.strip
|
27
|
+
torrent.leechers = tr.css(':nth-child(7)').first.text.strip
|
28
|
+
torrent.snatches = tr.css(':nth-child(8)').first.text.strip
|
29
|
+
torrent.snatches = '0' if torrent.snatches == '---'
|
30
|
+
torrent.uploaded_at = Time.now
|
31
|
+
|
32
|
+
results << torrent
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
results
|
37
|
+
end
|
38
|
+
|
39
|
+
def detail(tracker_id)
|
40
|
+
doc = Nokogiri::HTML(open(self.detail_url(tracker_id), self.headers))
|
41
|
+
|
42
|
+
result do |torrent|
|
43
|
+
torrent.tracker_id = tracker_id
|
44
|
+
torrent.hash = tracker_id
|
45
|
+
torrent.title = doc.css('tr:nth-child(1) .row1:nth-child(2)').first.text.strip
|
46
|
+
torrent.uploader = doc.css('tr:nth-child(16) a').first.text.strip
|
47
|
+
torrent.size = doc.css('tr:nth-child(13) .row1').first.text.strip
|
48
|
+
torrent.files = doc.css('tr:nth-child(14) .row1').first.text.gsub(/.*(\d+) files?.*/im, '\1')
|
49
|
+
torrent.seeders = doc.css('tr:nth-child(19) .row1').first.text.gsub(/.*Seeds: (\d+).*/, '\1')
|
50
|
+
torrent.leechers = doc.css('tr:nth-child(19) .row1').first.text.gsub(/.*Leechers: (\d+).*/, '\1')
|
51
|
+
torrent.snatches = doc.css('tr:nth-child(18) .row1').first.text.gsub(/[^\d]+/, '')
|
52
|
+
torrent.uploaded_at = Time.now # TODO: Parse site's value?
|
53
|
+
|
54
|
+
torrent.tags << torrent.uploader
|
55
|
+
|
56
|
+
torrent
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module TorrentCrawler::Crawlers
|
2
|
+
class Mininova < Base
|
3
|
+
def index_url
|
4
|
+
"http://www.mininova.org/"
|
5
|
+
end
|
6
|
+
|
7
|
+
def detail_url(tracker_id)
|
8
|
+
"http://www.mininova.org/det/#{tracker_id}"
|
9
|
+
end
|
10
|
+
|
11
|
+
def index(last_seen = nil)
|
12
|
+
doc = Nokogiri::HTML(open(self.index_url, self.headers))
|
13
|
+
doc.css('table.maintable:nth-child(2) tr').each do |tr|
|
14
|
+
next if tr.css(':nth-child(2) a').first.nil?
|
15
|
+
|
16
|
+
result do |torrent|
|
17
|
+
torrent.tracker_id = tr.css('td:nth-child(2) a').first['href'].gsub(%r{^.*/get/(\d+)/?$}, '\1')
|
18
|
+
|
19
|
+
return results if torrent.tracker_id == last_seen
|
20
|
+
|
21
|
+
# torrent.hash
|
22
|
+
torrent.title = tr.css('td:nth-child(2) a:nth-child(2)').first.text.strip
|
23
|
+
# torrent.uploader
|
24
|
+
torrent.size = tr.css('td:nth-child(3)').first.text.strip
|
25
|
+
# torrent.files
|
26
|
+
torrent.seeders = tr.css('td:nth-child(4)').first.text.strip
|
27
|
+
torrent.leechers = tr.css('td:nth-child(5)').first.text.strip
|
28
|
+
# torrent.snatches
|
29
|
+
torrent.uploaded_at = Time.now
|
30
|
+
|
31
|
+
torrent.tags << tr.css('td:nth-child(1)').first.text.strip
|
32
|
+
torrent.tags << tr.css('td:nth-child(2) small strong').first.text.strip
|
33
|
+
|
34
|
+
results << torrent
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
results
|
39
|
+
end
|
40
|
+
|
41
|
+
def detail(tracker_id)
|
42
|
+
doc = Nokogiri::HTML(open(self.detail_url(tracker_id), headers))
|
43
|
+
|
44
|
+
result do |torrent|
|
45
|
+
torrent.tracker_id = tracker_id
|
46
|
+
torrent.hash = doc.css('#torrentdetails p:nth-child(2)').first.text.strip.gsub(/Info hash:\s*(.*)/, '\1')
|
47
|
+
torrent.title = doc.css('h1').first.text.gsub(/Details of (.*)/, '\1')
|
48
|
+
# torrent.uploader
|
49
|
+
torrent.size = doc.css('#torrentdetails p:nth-child(6)').first.text.strip.gsub(/(.*) in \d+ files?/, '\1')
|
50
|
+
torrent.files = doc.css('#torrentdetails p:nth-child(6)').first.text.strip.gsub(/.*(\d+) files?$/m, '\1')
|
51
|
+
# torrent.seeders
|
52
|
+
# torrent.leechers
|
53
|
+
# torrent.snatches
|
54
|
+
torrent.uploaded_at = DateTime.parse(doc.css('#torrentdetails p:nth-child(3)').first.text.strip.gsub(/Added on:\s*(.*)/, '\1'))
|
55
|
+
|
56
|
+
torrent
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
module TorrentCrawler
|
6
|
+
module Crawlers
|
7
|
+
# Raised when a crawler class fails to implement a required method
|
8
|
+
class BadCrawlerError < NoMethodError; end
|
9
|
+
|
10
|
+
# Raised when a crawler fails to authenticate with the tracker
|
11
|
+
class AuthenticationError < RuntimeError; end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
require 'torrent_crawler/torrent'
|
16
|
+
|
17
|
+
require 'crawlers/base'
|
18
|
+
require 'crawlers/linux_tracker'
|
19
|
+
require 'crawlers/mininova'
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module TorrentCrawler
|
2
|
+
class Torrent
|
3
|
+
attr_accessor :tracker_key
|
4
|
+
|
5
|
+
attr_accessor :tracker_id
|
6
|
+
|
7
|
+
def id
|
8
|
+
tracker_id
|
9
|
+
end
|
10
|
+
|
11
|
+
def id=(value)
|
12
|
+
tracker_id = value
|
13
|
+
end
|
14
|
+
|
15
|
+
attr_accessor :hash
|
16
|
+
|
17
|
+
attr_accessor :title
|
18
|
+
|
19
|
+
attr_accessor :uploader
|
20
|
+
|
21
|
+
def size
|
22
|
+
@size ||= 0
|
23
|
+
end
|
24
|
+
|
25
|
+
def size=(value)
|
26
|
+
if value.respond_to? :downcase
|
27
|
+
if value =~ /^([0-9\.]+)(.*)$/
|
28
|
+
value = $1.strip.to_f
|
29
|
+
unit = $2.strip.gsub(/[^A-Za-z]/, '')
|
30
|
+
|
31
|
+
case unit
|
32
|
+
when /tb/i
|
33
|
+
value *= 1024 * 1024 * 1024
|
34
|
+
when /gb/i
|
35
|
+
value *= 1024 * 1024
|
36
|
+
when /mb/i, /megabytes?/i
|
37
|
+
value *= 1024
|
38
|
+
end
|
39
|
+
|
40
|
+
@size = value.to_i
|
41
|
+
end
|
42
|
+
else
|
43
|
+
@size = value.to_i
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
attr_accessor :files
|
48
|
+
|
49
|
+
attr_accessor :seeders
|
50
|
+
|
51
|
+
attr_accessor :leechers
|
52
|
+
|
53
|
+
attr_accessor :snatches
|
54
|
+
|
55
|
+
attr_accessor :uploaded_at
|
56
|
+
|
57
|
+
def tags
|
58
|
+
@tags ||= []
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
|
4
|
+
module TorrentCrawler::Crawlers
|
5
|
+
class TestCrawler < Base
|
6
|
+
end
|
7
|
+
|
8
|
+
describe Base, "#index" do
|
9
|
+
it "should raise BadCrawlerError when not implemented" do
|
10
|
+
expect { TestCrawler.new.index }.to raise_error(BadCrawlerError, /index has not been implemented/)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe Base, "#tracker_key" do
|
15
|
+
it "should provide a default tracker_key" do
|
16
|
+
TestCrawler.new.tracker_key.should eql('test_crawler')
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module TorrentCrawler::Crawlers
|
4
|
+
describe LinuxTracker do
|
5
|
+
subject { LinuxTracker.new }
|
6
|
+
|
7
|
+
it "should implement index_url" do
|
8
|
+
expect { subject.index_url }.to_not raise_error
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should implement detail_url" do
|
12
|
+
expect { subject.detail_url('abcd') }.to_not raise_error
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe LinuxTracker, "#index" do
|
17
|
+
before do
|
18
|
+
crawler = LinuxTracker.new
|
19
|
+
FakeWeb.register_uri(:get, crawler.index_url, :body => file_fixture('linux_tracker/index.html'))
|
20
|
+
results = crawler.index
|
21
|
+
@subject = results[0]
|
22
|
+
end
|
23
|
+
subject { @subject }
|
24
|
+
|
25
|
+
its(:tracker_key) { should eql('linux_tracker') }
|
26
|
+
its(:tracker_id) { should eql('c35157e2d773fcde76e0b3ae441752f01c82bcd8') }
|
27
|
+
its(:hash) { should eql('c35157e2d773fcde76e0b3ae441752f01c82bcd8') }
|
28
|
+
its(:title) { should eql('parabola 2010 10 01 netinstall x86 64') }
|
29
|
+
its(:uploader) { should be_nil }
|
30
|
+
its(:size) { should eql(174_080) }
|
31
|
+
its(:files) { should be_nil }
|
32
|
+
its(:seeders) { should eql('1') }
|
33
|
+
its(:leechers) { should eql('0') }
|
34
|
+
its(:snatches) { should eql('0') }
|
35
|
+
its(:uploaded_at) { should_not be_nil }
|
36
|
+
end
|
37
|
+
|
38
|
+
describe LinuxTracker, "#index with last_seen" do
|
39
|
+
it "should stop crawling when it hits a previously-seen id" do
|
40
|
+
crawler = LinuxTracker.new
|
41
|
+
FakeWeb.register_uri(:get, crawler.index_url, :body => file_fixture('linux_tracker/index.html'))
|
42
|
+
results = crawler.index('cd63c50078bae05b27195159508be0787f09d002')
|
43
|
+
results.size.should eql(2)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe LinuxTracker, "#detail" do
|
48
|
+
before do
|
49
|
+
crawler = LinuxTracker.new
|
50
|
+
FakeWeb.register_uri(:get, crawler.detail_url('c35157e2d773fcde76e0b3ae441752f01c82bcd8'), :body => file_fixture('linux_tracker/details.html'))
|
51
|
+
@subject = crawler.detail('c35157e2d773fcde76e0b3ae441752f01c82bcd8')
|
52
|
+
end
|
53
|
+
subject { @subject }
|
54
|
+
|
55
|
+
its(:tracker_key) { should eql('linux_tracker') }
|
56
|
+
its(:tracker_id) { should eql('c35157e2d773fcde76e0b3ae441752f01c82bcd8') }
|
57
|
+
its(:hash) { should eql('c35157e2d773fcde76e0b3ae441752f01c82bcd8') }
|
58
|
+
its(:title) { should eql('parabola-2010.10.01-netinstall-x86_64') }
|
59
|
+
its(:uploader) { should eql('youknowwho') }
|
60
|
+
its(:size) { should eql(174_080) }
|
61
|
+
its(:files) { should eql('1') }
|
62
|
+
its(:seeders) { should eql('2') }
|
63
|
+
its(:leechers) { should eql('0') }
|
64
|
+
its(:snatches) { should eql('0') }
|
65
|
+
its(:uploaded_at) { should_not be_nil }
|
66
|
+
its(:tags) { should_not eql([]) }
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module TorrentCrawler::Crawlers
|
4
|
+
describe Mininova do
|
5
|
+
subject { Mininova.new }
|
6
|
+
|
7
|
+
it "should implement index_url" do
|
8
|
+
expect { subject.index_url }.to_not raise_error
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should implement detail_url" do
|
12
|
+
expect { subject.detail_url('abcd') }.to_not raise_error
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe Mininova, "#index" do
|
17
|
+
before do
|
18
|
+
crawler = Mininova.new
|
19
|
+
FakeWeb.register_uri(:get, crawler.index_url, :body => file_fixture('mininova/index.html'))
|
20
|
+
results = crawler.index
|
21
|
+
@subject = results[0]
|
22
|
+
end
|
23
|
+
subject { @subject }
|
24
|
+
|
25
|
+
its(:tracker_key) { should eql('mininova') }
|
26
|
+
its(:tracker_id) { should eql('13195739') }
|
27
|
+
its(:hash) { should be_nil }
|
28
|
+
its(:title) { should eql(%{Nikki McKnight aka Nix "Sky's the Limit"}) }
|
29
|
+
its(:uploader) { should be_nil }
|
30
|
+
its(:size) { should eql(16_404) }
|
31
|
+
its(:files) { should be_nil }
|
32
|
+
its(:seeders) { should eql('1') }
|
33
|
+
its(:leechers) { should eql('0') }
|
34
|
+
its(:snatches) { should be_nil }
|
35
|
+
its(:uploaded_at) { should_not be_nil }
|
36
|
+
its(:tags) { should include('Music') }
|
37
|
+
its(:tags) { should include('Hip Hop') }
|
38
|
+
end
|
39
|
+
|
40
|
+
describe Mininova, "#index with last_seen" do
|
41
|
+
it "should stop crawling when it hits a previously-seen id" do
|
42
|
+
crawler = Mininova.new
|
43
|
+
FakeWeb.register_uri(:get, crawler.index_url, :body => file_fixture('mininova/index.html'))
|
44
|
+
results = crawler.index('13195736')
|
45
|
+
results.size.should eql(2)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe Mininova, "#detail" do
|
50
|
+
before do
|
51
|
+
crawler = Mininova.new
|
52
|
+
FakeWeb.register_uri(:get, crawler.detail_url('13195739'), :body => file_fixture('mininova/details_det.html'))
|
53
|
+
@subject = crawler.detail('13195739')
|
54
|
+
end
|
55
|
+
subject { @subject }
|
56
|
+
|
57
|
+
its(:tracker_key) { should eql('mininova') }
|
58
|
+
its(:tracker_id) { should eql('13195739') }
|
59
|
+
its(:hash) { should eql('757c4ac2d2aee458aa53847a0bd24b7946efff3b') }
|
60
|
+
its(:title) { should eql(%{Nikki McKnight aka Nix "Sky's the Limit"}) }
|
61
|
+
its(:uploader) { should be_nil }
|
62
|
+
its(:size) { should eql(16_404) }
|
63
|
+
its(:files) { should eql('2') }
|
64
|
+
its(:seeders) { should be_nil }
|
65
|
+
its(:leechers) { should be_nil }
|
66
|
+
its(:snatches) { should be_nil }
|
67
|
+
its(:uploaded_at) { should_not be_nil }
|
68
|
+
its(:tags) { should eql([]) }
|
69
|
+
end
|
70
|
+
end
|