rcrawler 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +0 -3
- data/README.md +8 -0
- data/bin/rcrawler +6 -0
- data/lib/rcrawler.rb +3 -0
- data/lib/rcrawler/async.rb +0 -1
- data/lib/rcrawler/cli.rb +19 -0
- data/lib/rcrawler/version.rb +1 -1
- data/rcrawler.gemspec +1 -0
- data/spec/rcrawler/async_spec.rb +8 -2
- data/spec/rcrawler/cli_spec.rb +24 -0
- data/spec/rcrawler_spec.rb +5 -0
- metadata +23 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56f84a572fdd53fcb80d5473e4e47a4a549a4d85
|
4
|
+
data.tar.gz: 410785518581055a9cc5cdb264027b6d2157661b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 387532aebc322463c4415b8d7fb79c545dea09b49737822df9e1590a50199237f01680ae1e78759ba17376c4d03bfbee0dc75a4a5e8d5c0261b5d313cedfcb31
|
7
|
+
data.tar.gz: abae8d6cb3ae5ff50f81905875fe6d269300108dae7b058c12c8693c3b5264a96a4211cd73d1ba362decd0d0126a3d7ecb4629ff5b0a153e3a222b993badc821
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -79,6 +79,14 @@ RCrawler.async do
|
|
79
79
|
end
|
80
80
|
end
|
81
81
|
```
|
82
|
+
|
83
|
+
#### Command
|
84
|
+
|
85
|
+
% rcrawler help
|
86
|
+
Commands:
|
87
|
+
rcrawler help [COMMAND] # Describe available commands or one specific command
|
88
|
+
rcrawler sc http://example.com # Get screen shot
|
89
|
+
|
82
90
|
## Contributing
|
83
91
|
|
84
92
|
1. Fork it
|
data/bin/rcrawler
ADDED
data/lib/rcrawler.rb
CHANGED
@@ -5,8 +5,11 @@ require "capybara/dsl"
|
|
5
5
|
require "capybara-webkit"
|
6
6
|
require "nokogiri"
|
7
7
|
require "headless"
|
8
|
+
require "timeout"
|
9
|
+
require "thor"
|
8
10
|
|
9
11
|
require "rcrawler/version"
|
12
|
+
require "rcrawler/cli"
|
10
13
|
require "rcrawler/configuration"
|
11
14
|
require "rcrawler/driver"
|
12
15
|
require "rcrawler/crawl"
|
data/lib/rcrawler/async.rb
CHANGED
data/lib/rcrawler/cli.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require "digest/sha1"
|
4
|
+
|
5
|
+
module RCrawler
|
6
|
+
class CLI < Thor
|
7
|
+
desc "sc http://example.com", "Get a screen shot"
|
8
|
+
long_desc <<-LONGDESC
|
9
|
+
`sc` is get a screen shot. Save to current directory if filename not specified.
|
10
|
+
LONGDESC
|
11
|
+
option :output, aliases: "-o", type: :string, desc: "Output filename."
|
12
|
+
def sc(url)
|
13
|
+
filename = options.fetch("output", "#{Digest::SHA1.hexdigest(url)}.png")
|
14
|
+
RCrawler.crawl do
|
15
|
+
screenshot(url, filename)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/rcrawler/version.rb
CHANGED
data/rcrawler.gemspec
CHANGED
data/spec/rcrawler/async_spec.rb
CHANGED
@@ -50,7 +50,13 @@ describe RCrawler::Async do
|
|
50
50
|
|
51
51
|
it "exec_crawl method should be called" do
|
52
52
|
async.crawl {}
|
53
|
-
expect{async.send(:create_thread)}.not_to raise_error
|
53
|
+
expect{async.send(:create_thread).join}.not_to raise_error
|
54
|
+
end
|
55
|
+
|
56
|
+
it "exception should be generated if timeout" do
|
57
|
+
Timeout.should_receive(:timeout).and_raise(Timeout::Error)
|
58
|
+
async.instance_eval {@queue.push Proc.new{}}
|
59
|
+
expect{async.send(:create_thread).join}.to raise_error(Timeout::Error)
|
54
60
|
end
|
55
61
|
end
|
56
62
|
|
@@ -59,7 +65,7 @@ describe RCrawler::Async do
|
|
59
65
|
mock = double("crawl mock")
|
60
66
|
mock.should_receive(:instance_eval)
|
61
67
|
RCrawler::Crawl.should_receive(:new).and_return(mock)
|
62
|
-
expect
|
68
|
+
expect{async.send(:exec_crawl, Proc.new{})}.not_to raise_error
|
63
69
|
end
|
64
70
|
end
|
65
71
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe RCrawler::CLI do
|
6
|
+
describe "#sc" do
|
7
|
+
let(:url) {"http://example.com"}
|
8
|
+
it "default filename should be a hashed url" do
|
9
|
+
args = ["sc", url]
|
10
|
+
RCrawler::Driver.any_instance.should_receive(:screenshot).with(url, "#{Digest::SHA1.hexdigest(url)}.png")
|
11
|
+
expect {
|
12
|
+
RCrawler::CLI.start(args)
|
13
|
+
}.not_to raise_error
|
14
|
+
end
|
15
|
+
|
16
|
+
it "output filename should be a specified filename" do
|
17
|
+
args = ["sc", url, "-o", "example.png"]
|
18
|
+
RCrawler::Driver.any_instance.should_receive(:screenshot).with(url, "example.png")
|
19
|
+
expect {
|
20
|
+
RCrawler::CLI.start(args)
|
21
|
+
}.not_to raise_error
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/spec/rcrawler_spec.rb
CHANGED
@@ -18,6 +18,11 @@ describe RCrawler do
|
|
18
18
|
mock.should_receive(:instance_eval)
|
19
19
|
RCrawler.crawl {}
|
20
20
|
end
|
21
|
+
|
22
|
+
it "exception should be generated if timeout" do
|
23
|
+
Timeout.should_receive(:timeout).and_raise(Timeout::Error)
|
24
|
+
expect{RCrawler.crawl {}}.to raise_error(Timeout::Error)
|
25
|
+
end
|
21
26
|
end
|
22
27
|
|
23
28
|
describe ".async" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rcrawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- i2bskn
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - '>='
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: thor
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: capybara
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -111,7 +125,8 @@ dependencies:
|
|
111
125
|
description: The wrapper of capybara for crawler
|
112
126
|
email:
|
113
127
|
- i2bskn@gmail.com
|
114
|
-
executables:
|
128
|
+
executables:
|
129
|
+
- rcrawler
|
115
130
|
extensions: []
|
116
131
|
extra_rdoc_files: []
|
117
132
|
files:
|
@@ -122,14 +137,17 @@ files:
|
|
122
137
|
- LICENSE.txt
|
123
138
|
- README.md
|
124
139
|
- Rakefile
|
140
|
+
- bin/rcrawler
|
125
141
|
- lib/rcrawler.rb
|
126
142
|
- lib/rcrawler/async.rb
|
143
|
+
- lib/rcrawler/cli.rb
|
127
144
|
- lib/rcrawler/configuration.rb
|
128
145
|
- lib/rcrawler/crawl.rb
|
129
146
|
- lib/rcrawler/driver.rb
|
130
147
|
- lib/rcrawler/version.rb
|
131
148
|
- rcrawler.gemspec
|
132
149
|
- spec/rcrawler/async_spec.rb
|
150
|
+
- spec/rcrawler/cli_spec.rb
|
133
151
|
- spec/rcrawler/configuration_spec.rb
|
134
152
|
- spec/rcrawler/crawl_spec.rb
|
135
153
|
- spec/rcrawler/driver_spec.rb
|
@@ -155,12 +173,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
155
173
|
version: '0'
|
156
174
|
requirements: []
|
157
175
|
rubyforge_project:
|
158
|
-
rubygems_version: 2.0.
|
176
|
+
rubygems_version: 2.0.2
|
159
177
|
signing_key:
|
160
178
|
specification_version: 4
|
161
179
|
summary: The wrapper of capybara for crawler
|
162
180
|
test_files:
|
163
181
|
- spec/rcrawler/async_spec.rb
|
182
|
+
- spec/rcrawler/cli_spec.rb
|
164
183
|
- spec/rcrawler/configuration_spec.rb
|
165
184
|
- spec/rcrawler/crawl_spec.rb
|
166
185
|
- spec/rcrawler/driver_spec.rb
|