botch 0.1 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +18 -4
- data/botch.gemspec +1 -0
- data/lib/botch.rb +1 -1
- data/lib/botch/base.rb +24 -14
- data/lib/botch/clients/abstract_client.rb +3 -0
- data/lib/botch/clients/faraday_client.rb +7 -1
- data/lib/botch/clients/mechanize_client.rb +22 -0
- data/spec/faraday_spec.rb +14 -0
- data/spec/mechanize_spec.rb +14 -0
- data/spec/spec_helper.rb +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0607caf61a586fda5284bb88ef4500301099c6f4
|
4
|
+
data.tar.gz: aa588e07e5bb953d824d9c204047598176e8cb4a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fcb46fa77f9aa1d5d2f42826461824d66339e93c728609192253eb874c9e44552a57b85a146134ac635ff67dc43db638587a881b8505cc6edaaf9835aa5189b3
|
7
|
+
data.tar.gz: 316efe1f6faeccf5852aed2e9314e70df28b51a1d0f84597cbd45e3d7b05e4fcdf9b9da9cc9df23ac355b881cd695fc64626a88a735fb9cbf50575e03250c593
|
data/README.md
CHANGED
@@ -4,6 +4,16 @@ Botch is a simple DSL for quickly creating web crawlers.
|
|
4
4
|
|
5
5
|
Inspired by Sinatra.
|
6
6
|
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
add this line to your Gemfile.
|
10
|
+
|
11
|
+
`gem 'botch'`
|
12
|
+
|
13
|
+
or
|
14
|
+
|
15
|
+
`$ gem install botch`
|
16
|
+
|
7
17
|
## Usage
|
8
18
|
|
9
19
|
```ruby
|
@@ -13,12 +23,17 @@ require 'kconv'
|
|
13
23
|
class SampleBotch < Botch::Base
|
14
24
|
set :user_agent, "SampleBotch"
|
15
25
|
|
16
|
-
filter
|
17
|
-
|
26
|
+
filter :example, :map => "example.com" do
|
27
|
+
status == 200
|
28
|
+
end
|
29
|
+
|
30
|
+
rule :example, :map => /example\.com/ do
|
31
|
+
body.toutf8
|
32
|
+
end
|
18
33
|
end
|
19
34
|
|
20
35
|
if $0 == __FILE__
|
21
|
-
SampleBotch.run("http://
|
36
|
+
SampleBotch.run("http://example.com/") do |response|
|
22
37
|
puts response
|
23
38
|
end
|
24
39
|
end
|
@@ -27,7 +42,6 @@ end
|
|
27
42
|
## TODO
|
28
43
|
|
29
44
|
- RSpec
|
30
|
-
- GET/POST method
|
31
45
|
- Documentation
|
32
46
|
- Classic style
|
33
47
|
|
data/botch.gemspec
CHANGED
@@ -8,6 +8,7 @@ Gem::Specification.new "botch", Botch::VERSION do |s|
|
|
8
8
|
s.homepage = "https://github.com/namusyaka/botch"
|
9
9
|
s.files = `git ls-files`.split("\n") - %w(.gitignore)
|
10
10
|
s.test_files = s.files.select { |path| path =~ /^spec\/.*_spec\.rb/ }
|
11
|
+
s.license = "MIT"
|
11
12
|
|
12
13
|
s.add_dependency "faraday"
|
13
14
|
s.add_dependency "mechanize"
|
data/lib/botch.rb
CHANGED
data/lib/botch/base.rb
CHANGED
@@ -3,10 +3,10 @@ require 'faraday'
|
|
3
3
|
require 'mechanize'
|
4
4
|
|
5
5
|
%w(
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
).each{ |path| require File.expand_path("
|
6
|
+
abstract
|
7
|
+
faraday
|
8
|
+
mechanize
|
9
|
+
).each{ |path| require File.expand_path("../clients/#{path}_client", __FILE__) }
|
10
10
|
|
11
11
|
module Botch
|
12
12
|
class Route
|
@@ -134,24 +134,29 @@ module Botch
|
|
134
134
|
unbound_method
|
135
135
|
end
|
136
136
|
|
137
|
+
def generate_main_block(&block)
|
138
|
+
unbound_method = generate_method(:main_unbound_method, &block).bind(instance)
|
139
|
+
case unbound_method.arity
|
140
|
+
when 2 then proc{|r,v| unbound_method.call(r,v) }
|
141
|
+
when 1 then proc{|r,v| unbound_method.call(r) }
|
142
|
+
else proc{|r,v| unbound_method.call }
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
137
146
|
def reset!
|
138
147
|
settings = {}
|
139
148
|
end
|
140
149
|
|
141
|
-
def
|
142
|
-
|
143
|
-
unbound_method = generate_method(:main_unbound_method, &block).bind(instance)
|
144
|
-
block = case unbound_method.arity
|
145
|
-
when 2 then proc{|r,v| unbound_method.call(r, v) }
|
146
|
-
when 1 then proc{|r,v| unbound_method.call(r) }
|
147
|
-
else proc{|r,v| unbound_method.call }
|
148
|
-
end
|
149
|
-
end
|
150
|
+
def request(method, *urls, &block)
|
151
|
+
|
150
152
|
set_default_options! unless self.client
|
153
|
+
raise ArgumentError unless self.client.respond_to?(method)
|
154
|
+
|
155
|
+
block = generate_main_block(&block) if block_given?
|
151
156
|
|
152
157
|
urls.map do |url|
|
153
158
|
filters, rules = @@routes.map{ |k, v| v.inject(url) }
|
154
|
-
response = self.client.
|
159
|
+
response = self.client.send(method, url, options)
|
155
160
|
|
156
161
|
set_instance_variables(:header => response[:header],
|
157
162
|
:body => response[:body],
|
@@ -174,6 +179,11 @@ module Botch
|
|
174
179
|
end
|
175
180
|
end
|
176
181
|
|
182
|
+
def get(*urls, &block); request(:get, *urls, &block); end
|
183
|
+
def post(*urls, &block); request(:post, *urls, &block); end
|
184
|
+
|
185
|
+
alias :run :get
|
186
|
+
|
177
187
|
def client=(name)
|
178
188
|
@client = Client.const_get("#{name.to_s.capitalize}Client").new(settings) if clients.include?(name)
|
179
189
|
end
|
@@ -9,12 +9,18 @@ module Botch
|
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
-
def get(url, options)
|
12
|
+
def get(url, options = {})
|
13
13
|
options.each_pair{ |key, value| @handler.headers[key] = value }
|
14
14
|
response = @handler.get(url)
|
15
15
|
parse_response(response)
|
16
16
|
end
|
17
17
|
|
18
|
+
def post(url, options = {})
|
19
|
+
options.each_pair{ |key, value| @handler.headers[key] = value }
|
20
|
+
response = @handler.post(url)
|
21
|
+
parse_response(response)
|
22
|
+
end
|
23
|
+
|
18
24
|
def parse_response(response)
|
19
25
|
result = {}
|
20
26
|
result[:status] = response.status
|
@@ -23,6 +23,15 @@ module Botch
|
|
23
23
|
parse_response(mechanize_page)
|
24
24
|
end
|
25
25
|
|
26
|
+
def post(url, options = {})
|
27
|
+
@handler.user_agent = options[:user_agent] if options[:user_agent]
|
28
|
+
url, query = serialize_url(url)
|
29
|
+
mechanize_page = @handler.post(url, query) rescue MechanizeResponseError.new($!)
|
30
|
+
parse_response(mechanize_page)
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
26
35
|
def parse_response(response)
|
27
36
|
result = {}
|
28
37
|
result[:header] = response.header
|
@@ -31,6 +40,19 @@ module Botch
|
|
31
40
|
result[:response] = response
|
32
41
|
result
|
33
42
|
end
|
43
|
+
|
44
|
+
def serialize_url(url)
|
45
|
+
uri = URI.parse(url)
|
46
|
+
serializable_url = []
|
47
|
+
serializable_url[0] = "#{uri.scheme}://#{uri.host}#{uri.path}"
|
48
|
+
serializable_url[1] = uri.query.split(/&/).map do |pair|
|
49
|
+
pair = pair.split(/=/)
|
50
|
+
pair << "" if pair.length == 1
|
51
|
+
pair
|
52
|
+
end
|
53
|
+
serializable_url[1] = Hash[*serializable_url[1].flatten]
|
54
|
+
serializable_url
|
55
|
+
end
|
34
56
|
end
|
35
57
|
end
|
36
58
|
end
|
data/spec/faraday_spec.rb
CHANGED
@@ -53,5 +53,19 @@ module Botch
|
|
53
53
|
expect(result[1]).to be_false
|
54
54
|
expect(result[2]).to be_false
|
55
55
|
end
|
56
|
+
|
57
|
+
it 'shoud be able to use get method.' do
|
58
|
+
result = SampleBotch.get(*@fakes.map(&:url)){ |response, valid| valid }
|
59
|
+
expect(result[0]).to be_true
|
60
|
+
expect(result[1]).to be_false
|
61
|
+
expect(result[2]).to be_false
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'shoud be able to use post method.' do
|
65
|
+
result = SampleBotch.run(*@fakes.map(&:url)){ |response, valid| valid }
|
66
|
+
expect(result[0]).to be_true
|
67
|
+
expect(result[1]).to be_false
|
68
|
+
expect(result[2]).to be_false
|
69
|
+
end
|
56
70
|
end
|
57
71
|
end
|
data/spec/mechanize_spec.rb
CHANGED
@@ -53,5 +53,19 @@ module Botch
|
|
53
53
|
expect(result[1]).to be_false
|
54
54
|
expect(result[2]).to be_false
|
55
55
|
end
|
56
|
+
|
57
|
+
it 'shoud be able to use get method.' do
|
58
|
+
result = SampleBotch.get(*@fakes.map(&:url)){ |response, valid| valid }
|
59
|
+
expect(result[0]).to be_true
|
60
|
+
expect(result[1]).to be_false
|
61
|
+
expect(result[2]).to be_false
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'shoud be able to use post method.' do
|
65
|
+
result = SampleBotch.run(*@fakes.map(&:url)){ |response, valid| valid }
|
66
|
+
expect(result[0]).to be_true
|
67
|
+
expect(result[1]).to be_false
|
68
|
+
expect(result[2]).to be_false
|
69
|
+
end
|
56
70
|
end
|
57
71
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: botch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- namusyaka
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -86,7 +86,8 @@ files:
|
|
86
86
|
- spec/mechanize_spec.rb
|
87
87
|
- spec/spec_helper.rb
|
88
88
|
homepage: https://github.com/namusyaka/botch
|
89
|
-
licenses:
|
89
|
+
licenses:
|
90
|
+
- MIT
|
90
91
|
metadata: {}
|
91
92
|
post_install_message:
|
92
93
|
rdoc_options: []
|