scrape 0.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/README.md +12 -1
- data/bin/scrape +1 -2
- data/lib/scrape/cli.rb +18 -9
- data/lib/scrape/version.rb +1 -1
- data/test/unit/cli_test.rb +43 -0
- metadata +2 -1
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -36,9 +36,20 @@ Simply install the gem
|
|
36
36
|
gem install scrape
|
37
37
|
```
|
38
38
|
|
39
|
+
or you can download the source by cloning the repository
|
40
|
+
|
41
|
+
```
|
42
|
+
git clone https://github.com/evilmarty/scrape.git
|
43
|
+
```
|
44
|
+
|
45
|
+
## Contribute
|
46
|
+
|
47
|
+
Please fork the repository and make a pull request on Github.
|
48
|
+
|
49
|
+
If you discover an issue please [lodge it](https://github.com/evilmarty/scrape/issues).
|
50
|
+
|
39
51
|
## TODO
|
40
52
|
|
41
53
|
* Fix bugs
|
42
|
-
* Add support for Robots.txt
|
43
54
|
* Depth limiting
|
44
55
|
* Better docs
|
data/bin/scrape
CHANGED
data/lib/scrape/cli.rb
CHANGED
@@ -1,24 +1,26 @@
|
|
1
1
|
require "optparse"
|
2
|
+
require "scrape"
|
2
3
|
|
3
4
|
class Scrape::CLI
|
4
|
-
attr_reader :command, :
|
5
|
+
attr_reader :command, :app, :options
|
5
6
|
|
6
|
-
def initialize command,
|
7
|
-
@command
|
8
|
-
|
7
|
+
def initialize command, argv = ""
|
8
|
+
@command = command
|
9
|
+
@options = {:file => File.join(Dir.pwd, 'Scrapefile'), :ignore_robots_txt => false}
|
9
10
|
|
10
|
-
def run argv
|
11
|
-
options = {:file => File.join(pwd, 'Scrapefile')}
|
12
11
|
opts = OptionParser.new do |opts|
|
13
12
|
opts.banner = "Scrape #{Scrape::VERSION} - Usage: #{command} [options]"
|
14
13
|
opts.separator ""
|
15
14
|
opts.separator "Specific options:"
|
16
15
|
|
17
16
|
opts.on "-f", "--scrapefile [FILE]", "Use FILE as scrapefile" do |file|
|
18
|
-
options[:file] = File.expand_path file
|
17
|
+
@options[:file] = File.expand_path file.strip
|
19
18
|
end
|
20
19
|
opts.on "-i", "--ignore-robots-txt", "Ignore robots.txt" do
|
21
|
-
options[:ignore_robots_txt] = true
|
20
|
+
@options[:ignore_robots_txt] = true
|
21
|
+
end
|
22
|
+
opts.on "-u", "--user-agent [AGENT]", "Change the user agent" do |agent|
|
23
|
+
Scrape.user_agent = agent.strip
|
22
24
|
end
|
23
25
|
opts.on_tail "-h", "--help", "Show this message" do
|
24
26
|
puts opts
|
@@ -31,8 +33,15 @@ class Scrape::CLI
|
|
31
33
|
end
|
32
34
|
opts.parse argv
|
33
35
|
|
34
|
-
Scrape::Application.new
|
36
|
+
@app = Scrape::Application.new options[:file], options
|
37
|
+
end
|
35
38
|
|
39
|
+
def run
|
40
|
+
app.run
|
41
|
+
exit
|
42
|
+
rescue SystemExit, Interrupt
|
43
|
+
puts ""
|
44
|
+
exit
|
36
45
|
rescue Scrape::FileNotFound
|
37
46
|
puts "#{command} aborted!"
|
38
47
|
puts "No Scrapefile found"
|
data/lib/scrape/version.rb
CHANGED
@@ -0,0 +1,43 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
require "scrape/cli"
|
3
|
+
|
4
|
+
class CLITest < Scrape::TestCase
|
5
|
+
test "should use default file when none specified" do
|
6
|
+
cli = Scrape::CLI.new "test", ""
|
7
|
+
assert_equal File.join(Dir.pwd, 'Scrapefile'), cli.options[:file]
|
8
|
+
end
|
9
|
+
|
10
|
+
test "should use the specified file" do
|
11
|
+
cli = Scrape::CLI.new "test", "-f /tmp/test1.scrape"
|
12
|
+
assert_equal "/tmp/test1.scrape", cli.options[:file]
|
13
|
+
end
|
14
|
+
|
15
|
+
test "should not ignore robots.txt file when not specified" do
|
16
|
+
cli = Scrape::CLI.new "test", ""
|
17
|
+
assert_equal false, cli.options[:ignore_robots_txt]
|
18
|
+
end
|
19
|
+
|
20
|
+
test "should ignore robots.txt file when specified" do
|
21
|
+
cli = Scrape::CLI.new "test", "-i"
|
22
|
+
assert_equal true, cli.options[:ignore_robots_txt]
|
23
|
+
end
|
24
|
+
|
25
|
+
test "should set the user agent when specified" do
|
26
|
+
user_agent = Scrape.user_agent
|
27
|
+
cli = Scrape::CLI.new "test", "-u Test"
|
28
|
+
assert_equal "Test", Scrape.user_agent
|
29
|
+
Scrape.user_agent = user_agent
|
30
|
+
end
|
31
|
+
|
32
|
+
test "should exit when help is displayed" do
|
33
|
+
assert_raises SystemExit do
|
34
|
+
capture_io{ Scrape::CLI.new "test", "-h" }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
test "should exit when version is displayed" do
|
39
|
+
assert_raises SystemExit do
|
40
|
+
capture_io{ Scrape::CLI.new "test", "-v" }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -77,6 +77,7 @@ files:
|
|
77
77
|
- test/support/test3.scrape
|
78
78
|
- test/test_helper.rb
|
79
79
|
- test/unit/application_test.rb
|
80
|
+
- test/unit/cli_test.rb
|
80
81
|
- test/unit/default_loader_test.rb
|
81
82
|
- test/unit/match_test.rb
|
82
83
|
- test/unit/robots_txt_rules_test.rb
|