rcrawler 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +0 -3
- data/README.md +8 -0
- data/bin/rcrawler +6 -0
- data/lib/rcrawler.rb +3 -0
- data/lib/rcrawler/async.rb +0 -1
- data/lib/rcrawler/cli.rb +19 -0
- data/lib/rcrawler/version.rb +1 -1
- data/rcrawler.gemspec +1 -0
- data/spec/rcrawler/async_spec.rb +8 -2
- data/spec/rcrawler/cli_spec.rb +24 -0
- data/spec/rcrawler_spec.rb +5 -0
- metadata +23 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56f84a572fdd53fcb80d5473e4e47a4a549a4d85
|
4
|
+
data.tar.gz: 410785518581055a9cc5cdb264027b6d2157661b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 387532aebc322463c4415b8d7fb79c545dea09b49737822df9e1590a50199237f01680ae1e78759ba17376c4d03bfbee0dc75a4a5e8d5c0261b5d313cedfcb31
|
7
|
+
data.tar.gz: abae8d6cb3ae5ff50f81905875fe6d269300108dae7b058c12c8693c3b5264a96a4211cd73d1ba362decd0d0126a3d7ecb4629ff5b0a153e3a222b993badc821
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -79,6 +79,14 @@ RCrawler.async do
|
|
79
79
|
end
|
80
80
|
end
|
81
81
|
```
|
82
|
+
|
83
|
+
#### Command
|
84
|
+
|
85
|
+
% rcrawler help
|
86
|
+
Commands:
|
87
|
+
rcrawler help [COMMAND] # Describe available commands or one specific command
|
88
|
+
rcrawler sc http://example.com # Get screen shot
|
89
|
+
|
82
90
|
## Contributing
|
83
91
|
|
84
92
|
1. Fork it
|
data/bin/rcrawler
ADDED
data/lib/rcrawler.rb
CHANGED
@@ -5,8 +5,11 @@ require "capybara/dsl"
|
|
5
5
|
require "capybara-webkit"
|
6
6
|
require "nokogiri"
|
7
7
|
require "headless"
|
8
|
+
require "timeout"
|
9
|
+
require "thor"
|
8
10
|
|
9
11
|
require "rcrawler/version"
|
12
|
+
require "rcrawler/cli"
|
10
13
|
require "rcrawler/configuration"
|
11
14
|
require "rcrawler/driver"
|
12
15
|
require "rcrawler/crawl"
|
data/lib/rcrawler/async.rb
CHANGED
data/lib/rcrawler/cli.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require "digest/sha1"
|
4
|
+
|
5
|
+
module RCrawler
|
6
|
+
class CLI < Thor
|
7
|
+
desc "sc http://example.com", "Get a screen shot"
|
8
|
+
long_desc <<-LONGDESC
|
9
|
+
`sc` is get a screen shot. Save to current directory if filename not specified.
|
10
|
+
LONGDESC
|
11
|
+
option :output, aliases: "-o", type: :string, desc: "Output filename."
|
12
|
+
def sc(url)
|
13
|
+
filename = options.fetch("output", "#{Digest::SHA1.hexdigest(url)}.png")
|
14
|
+
RCrawler.crawl do
|
15
|
+
screenshot(url, filename)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/rcrawler/version.rb
CHANGED
data/rcrawler.gemspec
CHANGED
data/spec/rcrawler/async_spec.rb
CHANGED
@@ -50,7 +50,13 @@ describe RCrawler::Async do
|
|
50
50
|
|
51
51
|
it "exec_crawl method should be called" do
|
52
52
|
async.crawl {}
|
53
|
-
expect{async.send(:create_thread)}.not_to raise_error
|
53
|
+
expect{async.send(:create_thread).join}.not_to raise_error
|
54
|
+
end
|
55
|
+
|
56
|
+
it "exception should be generated if timeout" do
|
57
|
+
Timeout.should_receive(:timeout).and_raise(Timeout::Error)
|
58
|
+
async.instance_eval {@queue.push Proc.new{}}
|
59
|
+
expect{async.send(:create_thread).join}.to raise_error(Timeout::Error)
|
54
60
|
end
|
55
61
|
end
|
56
62
|
|
@@ -59,7 +65,7 @@ describe RCrawler::Async do
|
|
59
65
|
mock = double("crawl mock")
|
60
66
|
mock.should_receive(:instance_eval)
|
61
67
|
RCrawler::Crawl.should_receive(:new).and_return(mock)
|
62
|
-
expect
|
68
|
+
expect{async.send(:exec_crawl, Proc.new{})}.not_to raise_error
|
63
69
|
end
|
64
70
|
end
|
65
71
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe RCrawler::CLI do
|
6
|
+
describe "#sc" do
|
7
|
+
let(:url) {"http://example.com"}
|
8
|
+
it "default filename should be a hashed url" do
|
9
|
+
args = ["sc", url]
|
10
|
+
RCrawler::Driver.any_instance.should_receive(:screenshot).with(url, "#{Digest::SHA1.hexdigest(url)}.png")
|
11
|
+
expect {
|
12
|
+
RCrawler::CLI.start(args)
|
13
|
+
}.not_to raise_error
|
14
|
+
end
|
15
|
+
|
16
|
+
it "output filename should be a specified filename" do
|
17
|
+
args = ["sc", url, "-o", "example.png"]
|
18
|
+
RCrawler::Driver.any_instance.should_receive(:screenshot).with(url, "example.png")
|
19
|
+
expect {
|
20
|
+
RCrawler::CLI.start(args)
|
21
|
+
}.not_to raise_error
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/spec/rcrawler_spec.rb
CHANGED
@@ -18,6 +18,11 @@ describe RCrawler do
|
|
18
18
|
mock.should_receive(:instance_eval)
|
19
19
|
RCrawler.crawl {}
|
20
20
|
end
|
21
|
+
|
22
|
+
it "exception should be generated if timeout" do
|
23
|
+
Timeout.should_receive(:timeout).and_raise(Timeout::Error)
|
24
|
+
expect{RCrawler.crawl {}}.to raise_error(Timeout::Error)
|
25
|
+
end
|
21
26
|
end
|
22
27
|
|
23
28
|
describe ".async" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rcrawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- i2bskn
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - '>='
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: thor
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: capybara
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -111,7 +125,8 @@ dependencies:
|
|
111
125
|
description: The wrapper of capybara for crawler
|
112
126
|
email:
|
113
127
|
- i2bskn@gmail.com
|
114
|
-
executables:
|
128
|
+
executables:
|
129
|
+
- rcrawler
|
115
130
|
extensions: []
|
116
131
|
extra_rdoc_files: []
|
117
132
|
files:
|
@@ -122,14 +137,17 @@ files:
|
|
122
137
|
- LICENSE.txt
|
123
138
|
- README.md
|
124
139
|
- Rakefile
|
140
|
+
- bin/rcrawler
|
125
141
|
- lib/rcrawler.rb
|
126
142
|
- lib/rcrawler/async.rb
|
143
|
+
- lib/rcrawler/cli.rb
|
127
144
|
- lib/rcrawler/configuration.rb
|
128
145
|
- lib/rcrawler/crawl.rb
|
129
146
|
- lib/rcrawler/driver.rb
|
130
147
|
- lib/rcrawler/version.rb
|
131
148
|
- rcrawler.gemspec
|
132
149
|
- spec/rcrawler/async_spec.rb
|
150
|
+
- spec/rcrawler/cli_spec.rb
|
133
151
|
- spec/rcrawler/configuration_spec.rb
|
134
152
|
- spec/rcrawler/crawl_spec.rb
|
135
153
|
- spec/rcrawler/driver_spec.rb
|
@@ -155,12 +173,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
155
173
|
version: '0'
|
156
174
|
requirements: []
|
157
175
|
rubyforge_project:
|
158
|
-
rubygems_version: 2.0.
|
176
|
+
rubygems_version: 2.0.2
|
159
177
|
signing_key:
|
160
178
|
specification_version: 4
|
161
179
|
summary: The wrapper of capybara for crawler
|
162
180
|
test_files:
|
163
181
|
- spec/rcrawler/async_spec.rb
|
182
|
+
- spec/rcrawler/cli_spec.rb
|
164
183
|
- spec/rcrawler/configuration_spec.rb
|
165
184
|
- spec/rcrawler/crawl_spec.rb
|
166
185
|
- spec/rcrawler/driver_spec.rb
|