scrape 0.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scrape (0.2)
4
+ scrape (0.2.1)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
data/README.md CHANGED
@@ -36,9 +36,20 @@ Simply install the gem
36
36
  gem install scrape
37
37
  ```
38
38
 
39
+ or you can download the source by cloning the repository
40
+
41
+ ```
42
+ git clone https://github.com/evilmarty/scrape.git
43
+ ```
44
+
45
+ ## Contribute
46
+
47
+ Please fork the repository and make a pull request on Github.
48
+
49
+ If you discover an issue please [lodge it](https://github.com/evilmarty/scrape/issues).
50
+
39
51
  ## TODO
40
52
 
41
53
  * Fix bugs
42
- * Add support for Robots.txt
43
54
  * Depth limiting
44
55
  * Better docs
data/bin/scrape CHANGED
@@ -2,7 +2,6 @@
2
2
 
3
3
  $: << File.expand_path('../../lib', __FILE__)
4
4
 
5
- require "scrape"
6
5
  require "scrape/cli"
7
6
 
8
- Scrape::CLI.new(File.basename($0)).run(ARGV)
7
+ Scrape::CLI.new(File.basename($0), ARGV).run
data/lib/scrape/cli.rb CHANGED
@@ -1,24 +1,26 @@
1
1
  require "optparse"
2
+ require "scrape"
2
3
 
3
4
  class Scrape::CLI
4
- attr_reader :command, :pwd
5
+ attr_reader :command, :app, :options
5
6
 
6
- def initialize command, pwd = Dir.pwd
7
- @command, @pwd = command, pwd
8
- end
7
+ def initialize command, argv = ""
8
+ @command = command
9
+ @options = {:file => File.join(Dir.pwd, 'Scrapefile'), :ignore_robots_txt => false}
9
10
 
10
- def run argv
11
- options = {:file => File.join(pwd, 'Scrapefile')}
12
11
  opts = OptionParser.new do |opts|
13
12
  opts.banner = "Scrape #{Scrape::VERSION} - Usage: #{command} [options]"
14
13
  opts.separator ""
15
14
  opts.separator "Specific options:"
16
15
 
17
16
  opts.on "-f", "--scrapefile [FILE]", "Use FILE as scrapefile" do |file|
18
- options[:file] = File.expand_path file
17
+ @options[:file] = File.expand_path file.strip
19
18
  end
20
19
  opts.on "-i", "--ignore-robots-txt", "Ignore robots.txt" do
21
- options[:ignore_robots_txt] = true
20
+ @options[:ignore_robots_txt] = true
21
+ end
22
+ opts.on "-u", "--user-agent [AGENT]", "Change the user agent" do |agent|
23
+ Scrape.user_agent = agent.strip
22
24
  end
23
25
  opts.on_tail "-h", "--help", "Show this message" do
24
26
  puts opts
@@ -31,8 +33,15 @@ class Scrape::CLI
31
33
  end
32
34
  opts.parse argv
33
35
 
34
- Scrape::Application.new(options.delete(:file), options).run
36
+ @app = Scrape::Application.new options[:file], options
37
+ end
35
38
 
39
+ def run
40
+ app.run
41
+ exit
42
+ rescue SystemExit, Interrupt
43
+ puts ""
44
+ exit
36
45
  rescue Scrape::FileNotFound
37
46
  puts "#{command} aborted!"
38
47
  puts "No Scrapefile found"
@@ -1,3 +1,3 @@
1
1
  module Scrape
2
- VERSION = '0.2' unless defined? ::Scrape::VERSION
2
+ VERSION = '0.2.1' unless defined? ::Scrape::VERSION
3
3
  end
@@ -0,0 +1,43 @@
1
+ require "test_helper"
2
+ require "scrape/cli"
3
+
4
+ class CLITest < Scrape::TestCase
5
+ test "should use default file when none specified" do
6
+ cli = Scrape::CLI.new "test", ""
7
+ assert_equal File.join(Dir.pwd, 'Scrapefile'), cli.options[:file]
8
+ end
9
+
10
+ test "should use the specified file" do
11
+ cli = Scrape::CLI.new "test", "-f /tmp/test1.scrape"
12
+ assert_equal "/tmp/test1.scrape", cli.options[:file]
13
+ end
14
+
15
+ test "should not ignore robots.txt file when not specified" do
16
+ cli = Scrape::CLI.new "test", ""
17
+ assert_equal false, cli.options[:ignore_robots_txt]
18
+ end
19
+
20
+ test "should ignore robots.txt file when specified" do
21
+ cli = Scrape::CLI.new "test", "-i"
22
+ assert_equal true, cli.options[:ignore_robots_txt]
23
+ end
24
+
25
+ test "should set the user agent when specified" do
26
+ user_agent = Scrape.user_agent
27
+ cli = Scrape::CLI.new "test", "-u Test"
28
+ assert_equal "Test", Scrape.user_agent
29
+ Scrape.user_agent = user_agent
30
+ end
31
+
32
+ test "should exit when help is displayed" do
33
+ assert_raises SystemExit do
34
+ capture_io{ Scrape::CLI.new "test", "-h" }
35
+ end
36
+ end
37
+
38
+ test "should exit when version is displayed" do
39
+ assert_raises SystemExit do
40
+ capture_io{ Scrape::CLI.new "test", "-v" }
41
+ end
42
+ end
43
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrape
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -77,6 +77,7 @@ files:
77
77
  - test/support/test3.scrape
78
78
  - test/test_helper.rb
79
79
  - test/unit/application_test.rb
80
+ - test/unit/cli_test.rb
80
81
  - test/unit/default_loader_test.rb
81
82
  - test/unit/match_test.rb
82
83
  - test/unit/robots_txt_rules_test.rb