rubyretriever 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/rr +2 -2
- data/lib/retriever/version.rb +1 -1
- data/spec/link_spec.rb +2 -2
- data/spec/page_spec.rb +4 -4
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0aa827221b6c3034f4463c376b29e47b740580e6
|
4
|
+
data.tar.gz: c800c5820d62e45c140dea2d94140a3a9636aeff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fba8ef21412309bdfe3435caf8fe4ec01d197cce5cb1698fc9536bc8127bcd9c45d51c6a908bb642c5bfb3ff9caca1e73f98c78669036dc94c38704412a0461a
|
7
|
+
data.tar.gz: 092058e59d4c591be1d5ceab99dfa4219f86929897d2d1ab25859d03bf918622f4289ab8bca5fff4e87d6b9af228af80ac129deec1122e18d2f33db38544dea6
|
data/bin/rr
CHANGED
@@ -63,7 +63,7 @@ ARGV.each do|q|
|
|
63
63
|
puts "### Performing File Harvest" if options[:fileharvest]
|
64
64
|
puts "### Searching for file extension: #{options[:fileharvest]} pages" if (options[:fileharvest])
|
65
65
|
puts "### Performing SEO Scrape" if options[:seo]
|
66
|
-
puts "###
|
66
|
+
puts "### Writing output to filename: #{options[:filename]}" if options[:filename]
|
67
67
|
puts "### Being verbose"
|
68
68
|
puts "### Stopping after #{options[:maxpages]} pages"
|
69
69
|
end
|
@@ -73,4 +73,4 @@ ARGV.each do|q|
|
|
73
73
|
puts "### [RubyRetriever] is done."
|
74
74
|
puts "###############################"
|
75
75
|
puts
|
76
|
-
end
|
76
|
+
end
|
data/lib/retriever/version.rb
CHANGED
data/spec/link_spec.rb
CHANGED
@@ -35,7 +35,7 @@ SOURCE
|
|
35
35
|
<a href='http://www.cnet.com/products/gadgets'>gadgets2</a>
|
36
36
|
SOURCE
|
37
37
|
|
38
|
-
expect(links).to
|
38
|
+
expect(links.size).to eq(1)
|
39
39
|
end
|
40
40
|
|
41
41
|
it "adds a protocol to urls missing them (www.)" do
|
@@ -52,7 +52,7 @@ SOURCE
|
|
52
52
|
<a href='http://www.cnet.com/products/gadgets/' data-vanity-rewritten='true'></a>
|
53
53
|
SOURCE
|
54
54
|
|
55
|
-
expect(links).to
|
55
|
+
expect(links.size).to eq(1)
|
56
56
|
end
|
57
57
|
|
58
58
|
it "returns relative urls with full path based on hostname" do
|
data/spec/page_spec.rb
CHANGED
@@ -16,7 +16,7 @@ describe "Page" do
|
|
16
16
|
<a href='http://www.yahoo.com/test/'>yahoo</a>
|
17
17
|
SOURCE
|
18
18
|
|
19
|
-
expect(links).to
|
19
|
+
expect(links.size).to eq(4)
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
@@ -28,7 +28,7 @@ SOURCE
|
|
28
28
|
<a href='http://www.yahoo.com/test/'>yahoo</a>
|
29
29
|
SOURCE
|
30
30
|
|
31
|
-
expect(links).to
|
31
|
+
expect(links.size).to eq(1)
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
@@ -38,7 +38,7 @@ SOURCE
|
|
38
38
|
@source = (<<SOURCE).strip
|
39
39
|
<link rel='stylesheet' id='gforms_reset_css-css' href='http://www.cnet.com/wp-content/plugins/gravityforms/css/formreset.css?ver=1.7.12' type='text/css' media='all' />
|
40
40
|
SOURCE
|
41
|
-
expect(links).to
|
41
|
+
expect(links.size).to eq(0)
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
@@ -50,7 +50,7 @@ SOURCE
|
|
50
50
|
http://www.google.com
|
51
51
|
<a href='/test.html'>test</a>
|
52
52
|
SOURCE
|
53
|
-
expect(links).to
|
53
|
+
expect(links.size).to eq(1)
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|