scrape 0.2 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/README.md +12 -1
- data/bin/scrape +1 -2
- data/lib/scrape/cli.rb +18 -9
- data/lib/scrape/version.rb +1 -1
- data/test/unit/cli_test.rb +43 -0
- metadata +2 -1
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -36,9 +36,20 @@ Simply install the gem
|
|
36
36
|
gem install scrape
|
37
37
|
```
|
38
38
|
|
39
|
+
or you can download the source by cloning the repository
|
40
|
+
|
41
|
+
```
|
42
|
+
git clone https://github.com/evilmarty/scrape.git
|
43
|
+
```
|
44
|
+
|
45
|
+
## Contribute
|
46
|
+
|
47
|
+
Please fork the repository and make a pull request on Github.
|
48
|
+
|
49
|
+
If you discover an issue please [lodge it](https://github.com/evilmarty/scrape/issues).
|
50
|
+
|
39
51
|
## TODO
|
40
52
|
|
41
53
|
* Fix bugs
|
42
|
-
* Add support for Robots.txt
|
43
54
|
* Depth limiting
|
44
55
|
* Better docs
|
data/bin/scrape
CHANGED
data/lib/scrape/cli.rb
CHANGED
@@ -1,24 +1,26 @@
|
|
1
1
|
require "optparse"
|
2
|
+
require "scrape"
|
2
3
|
|
3
4
|
class Scrape::CLI
|
4
|
-
attr_reader :command, :
|
5
|
+
attr_reader :command, :app, :options
|
5
6
|
|
6
|
-
def initialize command,
|
7
|
-
@command
|
8
|
-
|
7
|
+
def initialize command, argv = ""
|
8
|
+
@command = command
|
9
|
+
@options = {:file => File.join(Dir.pwd, 'Scrapefile'), :ignore_robots_txt => false}
|
9
10
|
|
10
|
-
def run argv
|
11
|
-
options = {:file => File.join(pwd, 'Scrapefile')}
|
12
11
|
opts = OptionParser.new do |opts|
|
13
12
|
opts.banner = "Scrape #{Scrape::VERSION} - Usage: #{command} [options]"
|
14
13
|
opts.separator ""
|
15
14
|
opts.separator "Specific options:"
|
16
15
|
|
17
16
|
opts.on "-f", "--scrapefile [FILE]", "Use FILE as scrapefile" do |file|
|
18
|
-
options[:file] = File.expand_path file
|
17
|
+
@options[:file] = File.expand_path file.strip
|
19
18
|
end
|
20
19
|
opts.on "-i", "--ignore-robots-txt", "Ignore robots.txt" do
|
21
|
-
options[:ignore_robots_txt] = true
|
20
|
+
@options[:ignore_robots_txt] = true
|
21
|
+
end
|
22
|
+
opts.on "-u", "--user-agent [AGENT]", "Change the user agent" do |agent|
|
23
|
+
Scrape.user_agent = agent.strip
|
22
24
|
end
|
23
25
|
opts.on_tail "-h", "--help", "Show this message" do
|
24
26
|
puts opts
|
@@ -31,8 +33,15 @@ class Scrape::CLI
|
|
31
33
|
end
|
32
34
|
opts.parse argv
|
33
35
|
|
34
|
-
Scrape::Application.new
|
36
|
+
@app = Scrape::Application.new options[:file], options
|
37
|
+
end
|
35
38
|
|
39
|
+
def run
|
40
|
+
app.run
|
41
|
+
exit
|
42
|
+
rescue SystemExit, Interrupt
|
43
|
+
puts ""
|
44
|
+
exit
|
36
45
|
rescue Scrape::FileNotFound
|
37
46
|
puts "#{command} aborted!"
|
38
47
|
puts "No Scrapefile found"
|
data/lib/scrape/version.rb
CHANGED
@@ -0,0 +1,43 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
require "scrape/cli"
|
3
|
+
|
4
|
+
class CLITest < Scrape::TestCase
|
5
|
+
test "should use default file when none specified" do
|
6
|
+
cli = Scrape::CLI.new "test", ""
|
7
|
+
assert_equal File.join(Dir.pwd, 'Scrapefile'), cli.options[:file]
|
8
|
+
end
|
9
|
+
|
10
|
+
test "should use the specified file" do
|
11
|
+
cli = Scrape::CLI.new "test", "-f /tmp/test1.scrape"
|
12
|
+
assert_equal "/tmp/test1.scrape", cli.options[:file]
|
13
|
+
end
|
14
|
+
|
15
|
+
test "should not ignore robots.txt file when not specified" do
|
16
|
+
cli = Scrape::CLI.new "test", ""
|
17
|
+
assert_equal false, cli.options[:ignore_robots_txt]
|
18
|
+
end
|
19
|
+
|
20
|
+
test "should ignore robots.txt file when specified" do
|
21
|
+
cli = Scrape::CLI.new "test", "-i"
|
22
|
+
assert_equal true, cli.options[:ignore_robots_txt]
|
23
|
+
end
|
24
|
+
|
25
|
+
test "should set the user agent when specified" do
|
26
|
+
user_agent = Scrape.user_agent
|
27
|
+
cli = Scrape::CLI.new "test", "-u Test"
|
28
|
+
assert_equal "Test", Scrape.user_agent
|
29
|
+
Scrape.user_agent = user_agent
|
30
|
+
end
|
31
|
+
|
32
|
+
test "should exit when help is displayed" do
|
33
|
+
assert_raises SystemExit do
|
34
|
+
capture_io{ Scrape::CLI.new "test", "-h" }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
test "should exit when version is displayed" do
|
39
|
+
assert_raises SystemExit do
|
40
|
+
capture_io{ Scrape::CLI.new "test", "-v" }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -77,6 +77,7 @@ files:
|
|
77
77
|
- test/support/test3.scrape
|
78
78
|
- test/test_helper.rb
|
79
79
|
- test/unit/application_test.rb
|
80
|
+
- test/unit/cli_test.rb
|
80
81
|
- test/unit/default_loader_test.rb
|
81
82
|
- test/unit/match_test.rb
|
82
83
|
- test/unit/robots_txt_rules_test.rb
|