scrape 0.2 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scrape (0.2)
4
+ scrape (0.2.1)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
data/README.md CHANGED
@@ -36,9 +36,20 @@ Simply install the gem
36
36
  gem install scrape
37
37
  ```
38
38
 
39
+ or you can download the source by cloning the repository
40
+
41
+ ```
42
+ git clone https://github.com/evilmarty/scrape.git
43
+ ```
44
+
45
+ ## Contribute
46
+
47
+ Please fork the repository and make a pull request on Github.
48
+
49
+ If you discover an issue please [lodge it](https://github.com/evilmarty/scrape/issues).
50
+
39
51
  ## TODO
40
52
 
41
53
  * Fix bugs
42
- * Add support for Robots.txt
43
54
  * Depth limiting
44
55
  * Better docs
data/bin/scrape CHANGED
@@ -2,7 +2,6 @@
2
2
 
3
3
  $: << File.expand_path('../../lib', __FILE__)
4
4
 
5
- require "scrape"
6
5
  require "scrape/cli"
7
6
 
8
- Scrape::CLI.new(File.basename($0)).run(ARGV)
7
+ Scrape::CLI.new(File.basename($0), ARGV).run
data/lib/scrape/cli.rb CHANGED
@@ -1,24 +1,26 @@
1
1
  require "optparse"
2
+ require "scrape"
2
3
 
3
4
  class Scrape::CLI
4
- attr_reader :command, :pwd
5
+ attr_reader :command, :app, :options
5
6
 
6
- def initialize command, pwd = Dir.pwd
7
- @command, @pwd = command, pwd
8
- end
7
+ def initialize command, argv = ""
8
+ @command = command
9
+ @options = {:file => File.join(Dir.pwd, 'Scrapefile'), :ignore_robots_txt => false}
9
10
 
10
- def run argv
11
- options = {:file => File.join(pwd, 'Scrapefile')}
12
11
  opts = OptionParser.new do |opts|
13
12
  opts.banner = "Scrape #{Scrape::VERSION} - Usage: #{command} [options]"
14
13
  opts.separator ""
15
14
  opts.separator "Specific options:"
16
15
 
17
16
  opts.on "-f", "--scrapefile [FILE]", "Use FILE as scrapefile" do |file|
18
- options[:file] = File.expand_path file
17
+ @options[:file] = File.expand_path file.strip
19
18
  end
20
19
  opts.on "-i", "--ignore-robots-txt", "Ignore robots.txt" do
21
- options[:ignore_robots_txt] = true
20
+ @options[:ignore_robots_txt] = true
21
+ end
22
+ opts.on "-u", "--user-agent [AGENT]", "Change the user agent" do |agent|
23
+ Scrape.user_agent = agent.strip
22
24
  end
23
25
  opts.on_tail "-h", "--help", "Show this message" do
24
26
  puts opts
@@ -31,8 +33,15 @@ class Scrape::CLI
31
33
  end
32
34
  opts.parse argv
33
35
 
34
- Scrape::Application.new(options.delete(:file), options).run
36
+ @app = Scrape::Application.new options[:file], options
37
+ end
35
38
 
39
+ def run
40
+ app.run
41
+ exit
42
+ rescue SystemExit, Interrupt
43
+ puts ""
44
+ exit
36
45
  rescue Scrape::FileNotFound
37
46
  puts "#{command} aborted!"
38
47
  puts "No Scrapefile found"
@@ -1,3 +1,3 @@
1
1
  module Scrape
2
- VERSION = '0.2' unless defined? ::Scrape::VERSION
2
+ VERSION = '0.2.1' unless defined? ::Scrape::VERSION
3
3
  end
@@ -0,0 +1,43 @@
1
+ require "test_helper"
2
+ require "scrape/cli"
3
+
4
+ class CLITest < Scrape::TestCase
5
+ test "should use default file when none specified" do
6
+ cli = Scrape::CLI.new "test", ""
7
+ assert_equal File.join(Dir.pwd, 'Scrapefile'), cli.options[:file]
8
+ end
9
+
10
+ test "should use the specified file" do
11
+ cli = Scrape::CLI.new "test", "-f /tmp/test1.scrape"
12
+ assert_equal "/tmp/test1.scrape", cli.options[:file]
13
+ end
14
+
15
+ test "should not ignore robots.txt file when not specified" do
16
+ cli = Scrape::CLI.new "test", ""
17
+ assert_equal false, cli.options[:ignore_robots_txt]
18
+ end
19
+
20
+ test "should ignore robots.txt file when specified" do
21
+ cli = Scrape::CLI.new "test", "-i"
22
+ assert_equal true, cli.options[:ignore_robots_txt]
23
+ end
24
+
25
+ test "should set the user agent when specified" do
26
+ user_agent = Scrape.user_agent
27
+ cli = Scrape::CLI.new "test", "-u Test"
28
+ assert_equal "Test", Scrape.user_agent
29
+ Scrape.user_agent = user_agent
30
+ end
31
+
32
+ test "should exit when help is displayed" do
33
+ assert_raises SystemExit do
34
+ capture_io{ Scrape::CLI.new "test", "-h" }
35
+ end
36
+ end
37
+
38
+ test "should exit when version is displayed" do
39
+ assert_raises SystemExit do
40
+ capture_io{ Scrape::CLI.new "test", "-v" }
41
+ end
42
+ end
43
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrape
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -77,6 +77,7 @@ files:
77
77
  - test/support/test3.scrape
78
78
  - test/test_helper.rb
79
79
  - test/unit/application_test.rb
80
+ - test/unit/cli_test.rb
80
81
  - test/unit/default_loader_test.rb
81
82
  - test/unit/match_test.rb
82
83
  - test/unit/robots_txt_rules_test.rb