botch 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/README.md +45 -0
- data/Rakefile +7 -0
- data/botch.gemspec +16 -0
- data/lib/botch.rb +5 -0
- data/lib/botch/base.rb +204 -0
- data/lib/botch/clients/abstract_client.rb +15 -0
- data/lib/botch/clients/faraday_client.rb +28 -0
- data/lib/botch/clients/mechanize_client.rb +36 -0
- data/spec/botch_spec.rb +67 -0
- data/spec/faraday_spec.rb +57 -0
- data/spec/mechanize_spec.rb +57 -0
- data/spec/spec_helper.rb +39 -0
- metadata +115 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8ef71ba47deb27d75990a88721b652a6974b0ac4
|
4
|
+
data.tar.gz: 6e2db20439b70a7f34285b44ee79246cf88ccf05
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f7d6b4d3cd2f94ff018fb70be5d8e71237f52bc82404167aafd34f08b4f473a830b3cda4d52aefe792d65bef5659fa3df0054b2e9d9554d62da2c77b1d39ec9c
|
7
|
+
data.tar.gz: 09725780996179fdd914675a15f06bcf792819f02e0a20bcc5a9403bfcde08a859e8ff8536a5f1cba006c2d49dfce23630249a2e540da0c4237fe712a29bd2fa
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# Botch
|
2
|
+
|
3
|
+
Botch is a simple DSL for quickly creating web crawlers.
|
4
|
+
|
5
|
+
Inspired by Sinatra.
|
6
|
+
|
7
|
+
## Usage
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
require 'lib/botch'
|
11
|
+
require 'kconv'
|
12
|
+
|
13
|
+
class SampleBotch < Botch::Base
|
14
|
+
set :user_agent, "SampleBotch"
|
15
|
+
|
16
|
+
filter(:all) { status == 200 }
|
17
|
+
rule(:all) { |response| body.toutf8 }
|
18
|
+
end
|
19
|
+
|
20
|
+
if $0 == __FILE__
|
21
|
+
SampleBotch.run("http://namusyaka.info/") do |response|
|
22
|
+
puts response
|
23
|
+
end
|
24
|
+
end
|
25
|
+
```
|
26
|
+
|
27
|
+
## TODO
|
28
|
+
|
29
|
+
- RSpec
|
30
|
+
- GET/POST method
|
31
|
+
- Documentation
|
32
|
+
- Classic style
|
33
|
+
|
34
|
+
## Contributing to Botch
|
35
|
+
|
36
|
+
1. fork the project.
|
37
|
+
2. create your feature branch. (`git checkout -b my-feature`)
|
38
|
+
3. commit your changes. (`git commit -am 'commit message.'`)
|
39
|
+
4. push to the branch. (`git push origin my-feature`)
|
40
|
+
5. send pull request.
|
41
|
+
|
42
|
+
## License
|
43
|
+
|
44
|
+
MIT
|
45
|
+
|
data/Rakefile
ADDED
data/botch.gemspec
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.expand_path("../lib/botch", __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new "botch", Botch::VERSION do |s|
|
4
|
+
s.description = "Botch is a DSL for quickly creating web crawlers. Inspired by Sinatra."
|
5
|
+
s.summary = "A DSL for web clawler."
|
6
|
+
s.authors = ["namusyaka"]
|
7
|
+
s.email = "namusyaka@gmail.com"
|
8
|
+
s.homepage = "https://github.com/namusyaka/botch"
|
9
|
+
s.files = `git ls-files`.split("\n") - %w(.gitignore)
|
10
|
+
s.test_files = s.files.select { |path| path =~ /^spec\/.*_spec\.rb/ }
|
11
|
+
|
12
|
+
s.add_dependency "faraday"
|
13
|
+
s.add_dependency "mechanize"
|
14
|
+
s.add_development_dependency "rspec"
|
15
|
+
s.add_development_dependency "fakeweb", ["~> 1.3"]
|
16
|
+
end
|
data/lib/botch.rb
ADDED
data/lib/botch/base.rb
ADDED
@@ -0,0 +1,204 @@
|
|
1
|
+
require 'rubygems' unless defined?(Gem)
|
2
|
+
require 'faraday'
|
3
|
+
require 'mechanize'
|
4
|
+
|
5
|
+
%w(
|
6
|
+
clients/abstract_client
|
7
|
+
clients/faraday_client
|
8
|
+
clients/mechanize_client
|
9
|
+
).each{ |path| require File.expand_path("../#{path}", __FILE__) }
|
10
|
+
|
11
|
+
module Botch
|
12
|
+
class Route
|
13
|
+
attr_accessor :routes
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@routes = []
|
17
|
+
self
|
18
|
+
end
|
19
|
+
|
20
|
+
def add(label, options = {}, &block)
|
21
|
+
raise ArgumentError unless block_given?
|
22
|
+
if position = index(label)
|
23
|
+
route = @routes[position]
|
24
|
+
route[:block] = block
|
25
|
+
route[:label] = label
|
26
|
+
else
|
27
|
+
options[:block] = block
|
28
|
+
options[:label] = label
|
29
|
+
@routes << options
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def del(label)
|
34
|
+
@routes.delete_if{ |route| route[:label] == label }
|
35
|
+
end
|
36
|
+
|
37
|
+
def exist?(label)
|
38
|
+
!!index(label)
|
39
|
+
end
|
40
|
+
|
41
|
+
alias :exists? :exist?
|
42
|
+
|
43
|
+
def index(label)
|
44
|
+
@routes.index{ |route| route[:label] === label }
|
45
|
+
end
|
46
|
+
|
47
|
+
def inject(url)
|
48
|
+
@routes.inject([]) do |result, route|
|
49
|
+
result << route if map_validation(url, route[:map])
|
50
|
+
result
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def map_validation(url, map)
|
57
|
+
case map.class.to_s
|
58
|
+
when "Regexp" then url =~ map
|
59
|
+
when "String" then url.include?(map)
|
60
|
+
else true
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
%w( Filter Rule ).each { |klass| Object.const_set(klass, Class.new(Route)) }
|
66
|
+
|
67
|
+
class Base
|
68
|
+
DEFAULT_INSTANCE_VARIABLES = { :header => nil, :body => nil, :status => nil }
|
69
|
+
attr_reader(*DEFAULT_INSTANCE_VARIABLES.keys)
|
70
|
+
|
71
|
+
def initialize
|
72
|
+
@header, @body = nil, nil
|
73
|
+
end
|
74
|
+
|
75
|
+
def client
|
76
|
+
self.class.client
|
77
|
+
end
|
78
|
+
|
79
|
+
def options
|
80
|
+
self.class.options
|
81
|
+
end
|
82
|
+
|
83
|
+
def settings
|
84
|
+
self.class.settings
|
85
|
+
end
|
86
|
+
|
87
|
+
class << self
|
88
|
+
@@routes = { :filter => Filter.new, :rule => Rule.new }
|
89
|
+
|
90
|
+
attr_reader :client
|
91
|
+
|
92
|
+
def instance
|
93
|
+
@instance ||= self.new
|
94
|
+
end
|
95
|
+
|
96
|
+
def helpers(*extensions, &block)
|
97
|
+
class_eval(&block) if block_given?
|
98
|
+
include(*extensions) if extensions.any?
|
99
|
+
end
|
100
|
+
|
101
|
+
def set(key, value = nil)
|
102
|
+
return __send__("#{key}=", value) if respond_to?("#{key}=")
|
103
|
+
|
104
|
+
key_symbol = key.to_sym
|
105
|
+
return settings[key_symbol] = value if settings.has_key?(key_symbol)
|
106
|
+
|
107
|
+
options[key_symbol] = value
|
108
|
+
end
|
109
|
+
|
110
|
+
def route(type, label, options = {}, &block)
|
111
|
+
unbound_method = generate_method("#{type} #{label}", &block).bind(instance)
|
112
|
+
wrapper = generate_wrapper(&unbound_method)
|
113
|
+
|
114
|
+
@@routes[type.to_sym].add(label, options, &wrapper)
|
115
|
+
end
|
116
|
+
|
117
|
+
def filter(label, options = {}, &block)
|
118
|
+
route(:filter, label, options, &block)
|
119
|
+
end
|
120
|
+
|
121
|
+
def rule(label, options = {}, &block)
|
122
|
+
route(:rule, label, options, &block)
|
123
|
+
end
|
124
|
+
|
125
|
+
def generate_wrapper(&method)
|
126
|
+
method.arity != 0 ? proc {|args| method.call(*args) } :
|
127
|
+
proc {|args| method.call }
|
128
|
+
end
|
129
|
+
|
130
|
+
def generate_method(method_name, &block)
|
131
|
+
define_method(method_name, &block)
|
132
|
+
unbound_method = instance_method(method_name)
|
133
|
+
remove_method(method_name)
|
134
|
+
unbound_method
|
135
|
+
end
|
136
|
+
|
137
|
+
def reset!
|
138
|
+
settings = {}
|
139
|
+
end
|
140
|
+
|
141
|
+
def run(*urls, &block)
|
142
|
+
if block_given?
|
143
|
+
unbound_method = generate_method(:main_unbound_method, &block).bind(instance)
|
144
|
+
block = case unbound_method.arity
|
145
|
+
when 2 then proc{|r,v| unbound_method.call(r, v) }
|
146
|
+
when 1 then proc{|r,v| unbound_method.call(r) }
|
147
|
+
else proc{|r,v| unbound_method.call }
|
148
|
+
end
|
149
|
+
end
|
150
|
+
set_default_options! unless self.client
|
151
|
+
|
152
|
+
urls.map do |url|
|
153
|
+
filters, rules = @@routes.map{ |k, v| v.inject(url) }
|
154
|
+
response = self.client.get(url, options)
|
155
|
+
|
156
|
+
set_instance_variables(:header => response[:header],
|
157
|
+
:body => response[:body],
|
158
|
+
:status => response[:status])
|
159
|
+
|
160
|
+
response = response[:response]
|
161
|
+
|
162
|
+
unless filters.empty?
|
163
|
+
valid = filters.map{ |_filter| _filter[:block].call(response) }.all?
|
164
|
+
next if settings[:disabled_invalid] && !valid
|
165
|
+
end
|
166
|
+
|
167
|
+
response = rules.inject(nil) { |result, _rule|
|
168
|
+
_rule[:block].call((result || response))
|
169
|
+
} unless rules.empty?
|
170
|
+
|
171
|
+
response = block.call(response, valid) if block_given?
|
172
|
+
set_instance_variables(DEFAULT_INSTANCE_VARIABLES)
|
173
|
+
response
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def client=(name)
|
178
|
+
@client = Client.const_get("#{name.to_s.capitalize}Client").new(settings) if clients.include?(name)
|
179
|
+
end
|
180
|
+
|
181
|
+
def settings
|
182
|
+
@settings ||= { :disabled_invalid => false }
|
183
|
+
end
|
184
|
+
|
185
|
+
def options
|
186
|
+
@options ||= {}
|
187
|
+
end
|
188
|
+
|
189
|
+
private
|
190
|
+
|
191
|
+
def set_instance_variables(pairs = {})
|
192
|
+
pairs.each_pair { |name, value| instance.instance_variable_set("@#{name}".to_sym, value) }
|
193
|
+
end
|
194
|
+
|
195
|
+
def clients
|
196
|
+
@_clients ||= [:faraday, :mechanize]
|
197
|
+
end
|
198
|
+
|
199
|
+
def set_default_options!
|
200
|
+
self.client = :faraday
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Botch
|
2
|
+
module Client
|
3
|
+
class FaradayClient < AbstractClient
|
4
|
+
def initialize(settings = {})
|
5
|
+
@client = :faraday
|
6
|
+
@handler = Faraday.new(settings) do |builder|
|
7
|
+
builder.use Faraday::Adapter::NetHttp
|
8
|
+
builder.use Faraday::Request::UrlEncoded
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def get(url, options)
|
13
|
+
options.each_pair{ |key, value| @handler.headers[key] = value }
|
14
|
+
response = @handler.get(url)
|
15
|
+
parse_response(response)
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_response(response)
|
19
|
+
result = {}
|
20
|
+
result[:status] = response.status
|
21
|
+
result[:header] = response.headers
|
22
|
+
result[:body] = response.body
|
23
|
+
result[:response] = response
|
24
|
+
result
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Botch
|
2
|
+
module Client
|
3
|
+
class MechanizeResponseError
|
4
|
+
attr_accessor :code, :header, :body
|
5
|
+
|
6
|
+
def initialize(response_error)
|
7
|
+
@code = response_error.response_code
|
8
|
+
@body = ""
|
9
|
+
@header = Mechanize::Headers.new
|
10
|
+
@response = response_error
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class MechanizeClient < AbstractClient
|
15
|
+
def initialize(options = {})
|
16
|
+
@client = :mechanize
|
17
|
+
@handler = Mechanize.new
|
18
|
+
end
|
19
|
+
|
20
|
+
def get(url, options = {})
|
21
|
+
@handler.user_agent = options[:user_agent] if options[:user_agent]
|
22
|
+
mechanize_page = @handler.get(url) rescue MechanizeResponseError.new($!)
|
23
|
+
parse_response(mechanize_page)
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse_response(response)
|
27
|
+
result = {}
|
28
|
+
result[:header] = response.header
|
29
|
+
result[:status] = response.code.to_i
|
30
|
+
result[:body] = response.body
|
31
|
+
result[:response] = response
|
32
|
+
result
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/spec/botch_spec.rb
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__))
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
module Botch
|
5
|
+
describe do
|
6
|
+
it "Should have a version." do
|
7
|
+
expect(Botch.const_defined?("VERSION")).to be_true
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
describe Base do
|
12
|
+
before(:all) do
|
13
|
+
@fake = Fake.new
|
14
|
+
class SampleBotch < Botch::Base; end
|
15
|
+
SampleBotch.run(@fake.url) {}
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'Default client should be faraday.' do
|
19
|
+
expect(SampleBotch.client).to be_an_instance_of(Botch::Client::FaradayClient)
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "settings and options" do
|
23
|
+
before(:all) do
|
24
|
+
class SampleBotch < Botch::Base
|
25
|
+
set :user_agent, "SampleBotch User-Agent"
|
26
|
+
set :client, :mechanize
|
27
|
+
set :disabled_invalid, true
|
28
|
+
set :original_option, "foobar"
|
29
|
+
end
|
30
|
+
@options = SampleBotch.options
|
31
|
+
@settings = SampleBotch.settings
|
32
|
+
end
|
33
|
+
|
34
|
+
it "Original options should be stored in options." do
|
35
|
+
expect(@options[:original_option]).to eq("foobar")
|
36
|
+
end
|
37
|
+
|
38
|
+
it ":user_agent should be stored in options." do
|
39
|
+
expect(@options[:user_agent]).to eq("SampleBotch User-Agent")
|
40
|
+
end
|
41
|
+
|
42
|
+
it ":disabled_invalid should be stored in settings." do
|
43
|
+
expect(@settings[:disabled_invalid]).to be_true
|
44
|
+
end
|
45
|
+
|
46
|
+
it "Client setter should be valid." do
|
47
|
+
expect(SampleBotch.client).to be_an_instance_of(Botch::Client::MechanizeClient)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
describe "instance variable" do
|
52
|
+
before(:all) do
|
53
|
+
class SampleBotch < Botch::Base
|
54
|
+
set :user_agent, "SampleBotch User-Agent"
|
55
|
+
set :disabled_invalid, false
|
56
|
+
|
57
|
+
filter(:all) { @test = "test" }
|
58
|
+
rule(:all) { @test }
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should be able to use instance variable." do
|
63
|
+
expect(SampleBotch.run(@fake.url)[0]).to eq("test")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__))
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
module Botch
|
5
|
+
describe Client::FaradayClient do
|
6
|
+
before(:all) do
|
7
|
+
FakeWeb.clean_registry
|
8
|
+
@fakes = []
|
9
|
+
@fakes << Fake.new("/", :status => [200, "OK"], :content_type => "text/html")
|
10
|
+
@fakes << Fake.new("/test1", :status => [404, "Not Found"], :content_type => "text/html")
|
11
|
+
@fakes << Fake.new("/test2", :status => [500, "Internal Server Error"], :content_type => "text/html")
|
12
|
+
|
13
|
+
class SampleBotch < Botch::Base
|
14
|
+
set :user_agent, "SampleBotch User-Agent"
|
15
|
+
set :client, :faraday
|
16
|
+
set :disabled_invalid, nil
|
17
|
+
filter(:all){ status == 200 }
|
18
|
+
rule(:all){ status }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'client should be faraday if set :faraday to :client.' do
|
23
|
+
expect(SampleBotch.run(@fakes[0].url) { client }[0]).to be_an_instance_of(Botch::Client::FaradayClient)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'helpers should return valid data.' do
|
27
|
+
result = SampleBotch.run(@fakes[0].url) do
|
28
|
+
{ :status => status, :header => header, :body => body }
|
29
|
+
end
|
30
|
+
result = result[0]
|
31
|
+
expect(result[:status]).to eq(200)
|
32
|
+
expect(result[:header]).to be_an_instance_of(Faraday::Utils::Headers)
|
33
|
+
expect(result[:body]).to be_an_instance_of(String)
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'block argument of #rule should replace last expression.' do
|
37
|
+
result = SampleBotch.run(*@fakes.map(&:url))
|
38
|
+
expect(result[0]).to eq(200)
|
39
|
+
expect(result[1]).to eq(404)
|
40
|
+
expect(result[2]).to eq(500)
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'block argument of #run should replace last expression.' do
|
44
|
+
result = SampleBotch.run(*@fakes.map(&:url)){ "Foo" }
|
45
|
+
expect(result[0]).to eq("Foo")
|
46
|
+
expect(result[1]).to eq("Foo")
|
47
|
+
expect(result[2]).to eq("Foo")
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'the second argument should be boolean.' do
|
51
|
+
result = SampleBotch.run(*@fakes.map(&:url)){ |response, valid| valid }
|
52
|
+
expect(result[0]).to be_true
|
53
|
+
expect(result[1]).to be_false
|
54
|
+
expect(result[2]).to be_false
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__))
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
module Botch
|
5
|
+
describe Client::MechanizeClient do
|
6
|
+
before(:all) do
|
7
|
+
FakeWeb.clean_registry
|
8
|
+
@fakes = []
|
9
|
+
@fakes << Fake.new("/", :status => [200, "OK"], :content_type => "text/html")
|
10
|
+
@fakes << Fake.new("/test1", :status => [404, "Not Found"], :content_type => "text/html")
|
11
|
+
@fakes << Fake.new("/test2", :status => [500, "Internal Server Error"], :content_type => "text/html")
|
12
|
+
|
13
|
+
class SampleBotch < Botch::Base
|
14
|
+
set :user_agent, "SampleBotch User-Agent"
|
15
|
+
set :client, :mechanize
|
16
|
+
set :disabled_invalid, nil
|
17
|
+
filter(:all){ status == 200 }
|
18
|
+
rule(:all){ status }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'client should be faraday if set :mechanize to :client.' do
|
23
|
+
expect(SampleBotch.run(@fakes[0].url) { client }[0]).to be_an_instance_of(Botch::Client::MechanizeClient)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'helpers should return valid data.' do
|
27
|
+
result = SampleBotch.run(@fakes[0].url) do
|
28
|
+
{ :status => status, :header => header, :body => body }
|
29
|
+
end
|
30
|
+
result = result[0]
|
31
|
+
expect(result[:status]).to eq(200)
|
32
|
+
expect(result[:header]).to be_an_instance_of(Mechanize::Headers)
|
33
|
+
expect(result[:body]).to be_an_instance_of(String)
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'block argument of #rule should replace last expression.' do
|
37
|
+
result = SampleBotch.run(*@fakes.map(&:url))
|
38
|
+
expect(result[0]).to eq(200)
|
39
|
+
expect(result[1]).to eq(404)
|
40
|
+
expect(result[2]).to eq(500)
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'block argument of #run should replace last expression.' do
|
44
|
+
result = SampleBotch.run(*@fakes.map(&:url)){ "Foo" }
|
45
|
+
expect(result[0]).to eq("Foo")
|
46
|
+
expect(result[1]).to eq("Foo")
|
47
|
+
expect(result[2]).to eq("Foo")
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'the second argument should be boolean.' do
|
51
|
+
result = SampleBotch.run(*@fakes.map(&:url)){ |response, valid| valid }
|
52
|
+
expect(result[0]).to be_true
|
53
|
+
expect(result[1]).to be_false
|
54
|
+
expect(result[2]).to be_false
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'botch'
|
2
|
+
require 'fakeweb'
|
3
|
+
|
4
|
+
FakeWeb.allow_net_connect = false
|
5
|
+
|
6
|
+
module Botch
|
7
|
+
SPEC_DOMAIN = 'example.com'
|
8
|
+
|
9
|
+
class Fake
|
10
|
+
def initialize(path = "/", options = {})
|
11
|
+
@path = path
|
12
|
+
@scheme = options[:scheme] || "http"
|
13
|
+
@content_type = options[:content_type] || "text/html"
|
14
|
+
@status = options[:status] || [200, "OK"]
|
15
|
+
add_to_fakeweb
|
16
|
+
end
|
17
|
+
|
18
|
+
def url
|
19
|
+
@scheme + "://" + SPEC_DOMAIN + @path
|
20
|
+
end
|
21
|
+
|
22
|
+
def body
|
23
|
+
@body ||= <<-HTML
|
24
|
+
<html>
|
25
|
+
<head>
|
26
|
+
<title>Fake page #{@path}</title>
|
27
|
+
</head>
|
28
|
+
<body>
|
29
|
+
</body>
|
30
|
+
</html>
|
31
|
+
HTML
|
32
|
+
end
|
33
|
+
|
34
|
+
def add_to_fakeweb
|
35
|
+
options = { :body => @body, :content_type => @content_type, :status => @status }
|
36
|
+
FakeWeb.register_uri(:get, url, options)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
metadata
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: botch
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.1'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- namusyaka
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-07-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: faraday
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: mechanize
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: fakeweb
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.3'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.3'
|
69
|
+
description: Botch is a DSL for quickly creating web crawlers. Inspired by Sinatra.
|
70
|
+
email: namusyaka@gmail.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files: []
|
74
|
+
files:
|
75
|
+
- Gemfile
|
76
|
+
- README.md
|
77
|
+
- Rakefile
|
78
|
+
- botch.gemspec
|
79
|
+
- lib/botch.rb
|
80
|
+
- lib/botch/base.rb
|
81
|
+
- lib/botch/clients/abstract_client.rb
|
82
|
+
- lib/botch/clients/faraday_client.rb
|
83
|
+
- lib/botch/clients/mechanize_client.rb
|
84
|
+
- spec/botch_spec.rb
|
85
|
+
- spec/faraday_spec.rb
|
86
|
+
- spec/mechanize_spec.rb
|
87
|
+
- spec/spec_helper.rb
|
88
|
+
homepage: https://github.com/namusyaka/botch
|
89
|
+
licenses: []
|
90
|
+
metadata: {}
|
91
|
+
post_install_message:
|
92
|
+
rdoc_options: []
|
93
|
+
require_paths:
|
94
|
+
- lib
|
95
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - '>='
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - '>='
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
requirements: []
|
106
|
+
rubyforge_project:
|
107
|
+
rubygems_version: 2.0.2
|
108
|
+
signing_key:
|
109
|
+
specification_version: 4
|
110
|
+
summary: A DSL for web clawler.
|
111
|
+
test_files:
|
112
|
+
- spec/botch_spec.rb
|
113
|
+
- spec/faraday_spec.rb
|
114
|
+
- spec/mechanize_spec.rb
|
115
|
+
has_rdoc:
|