botch 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +16 -0
- data/README.md +12 -1
- data/botch.gemspec +1 -1
- data/lib/botch.rb +0 -4
- data/lib/botch/base.rb +15 -10
- data/lib/botch/version.rb +3 -0
- data/spec/botch_spec.rb +1 -7
- data/spec/filter_spec.rb +29 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0fe3e67b5d6e95407e12c4faf9f869c225b904ff
|
4
|
+
data.tar.gz: dccfb3b75e385bf8c1b1ac2e898bd719c60fe747
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c271d30d2d725f552cdb0f347959962557a7892de5525aeb2c53a30e7f35c5a0de6ad61d18f66792011f4d9a80a5346e1670dfa0111f2bf531af23deea4a96d5
|
7
|
+
data.tar.gz: e76498ef673c6a0bfd09fb9c7935512aabb2417589b4cd22ea22568bc335721e23d84fcc00112e863c11bb63d56f1ce7610e7db6e2b3b011ddc24d54575d97f7
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -1,9 +1,13 @@
|
|
1
1
|
# Botch
|
2
2
|
|
3
|
+
[](https://travis-ci.org/namusyaka/botch)
|
4
|
+
|
3
5
|
Botch is a simple DSL for quickly creating web crawlers.
|
4
6
|
|
5
7
|
Inspired by Sinatra.
|
6
8
|
|
9
|
+
[Japanese](https://gist.github.com/namusyaka/6001467)
|
10
|
+
|
7
11
|
## Installation
|
8
12
|
|
9
13
|
add this line to your Gemfile.
|
@@ -19,16 +23,23 @@ or
|
|
19
23
|
```ruby
|
20
24
|
require 'lib/botch'
|
21
25
|
require 'kconv'
|
26
|
+
require 'rack'
|
22
27
|
|
23
28
|
class SampleBotch < Botch::Base
|
24
29
|
set :user_agent, "SampleBotch"
|
25
30
|
|
31
|
+
helpers do
|
32
|
+
def h(str)
|
33
|
+
Rack::Utils.escape_html(str)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
26
37
|
filter :example, :map => "example.com" do
|
27
38
|
status == 200
|
28
39
|
end
|
29
40
|
|
30
41
|
rule :example, :map => /example\.com/ do
|
31
|
-
body.toutf8
|
42
|
+
h(body.toutf8)
|
32
43
|
end
|
33
44
|
end
|
34
45
|
|
data/botch.gemspec
CHANGED
data/lib/botch.rb
CHANGED
data/lib/botch/base.rb
CHANGED
@@ -14,7 +14,6 @@ module Botch
|
|
14
14
|
|
15
15
|
def initialize
|
16
16
|
@routes = []
|
17
|
-
self
|
18
17
|
end
|
19
18
|
|
20
19
|
def add(label, options = {}, &block)
|
@@ -54,11 +53,11 @@ module Botch
|
|
54
53
|
private
|
55
54
|
|
56
55
|
def map_validation(url, map)
|
57
|
-
case map
|
58
|
-
when
|
59
|
-
when
|
60
|
-
when
|
61
|
-
else
|
56
|
+
case map
|
57
|
+
when Regexp then url =~ map
|
58
|
+
when String then url.include?(map)
|
59
|
+
when NilClass then true
|
60
|
+
else nil
|
62
61
|
end
|
63
62
|
end
|
64
63
|
end
|
@@ -144,13 +143,18 @@ module Botch
|
|
144
143
|
end
|
145
144
|
end
|
146
145
|
|
146
|
+
def reset
|
147
|
+
@@routes = { :filter => Filter.new, :rule => Rule.new }
|
148
|
+
end
|
149
|
+
|
147
150
|
def reset!
|
151
|
+
reset
|
148
152
|
settings = {}
|
149
153
|
end
|
150
154
|
|
151
155
|
def request(method, *urls, &block)
|
152
156
|
|
153
|
-
set_default_options
|
157
|
+
set_default_options unless self.client
|
154
158
|
raise ArgumentError unless self.client.respond_to?(method)
|
155
159
|
|
156
160
|
block = generate_main_block(&block) if block_given?
|
@@ -159,11 +163,12 @@ module Botch
|
|
159
163
|
filters, rules = @@routes.map{ |k, v| v.inject(url) }
|
160
164
|
response = self.client.send(method, url, options)
|
161
165
|
|
162
|
-
set_instance_variables
|
166
|
+
set_instance_variables :header => response[:header],
|
163
167
|
:body => response[:body],
|
164
|
-
:status => response[:status]
|
168
|
+
:status => response[:status]
|
165
169
|
|
166
170
|
response = response[:response]
|
171
|
+
valid = true
|
167
172
|
|
168
173
|
unless filters.empty?
|
169
174
|
valid = filters.map{ |_filter| _filter[:block].call(response) }.all?
|
@@ -207,7 +212,7 @@ module Botch
|
|
207
212
|
@_clients ||= [:faraday, :mechanize]
|
208
213
|
end
|
209
214
|
|
210
|
-
def set_default_options
|
215
|
+
def set_default_options
|
211
216
|
self.client = :faraday
|
212
217
|
end
|
213
218
|
end
|
data/spec/botch_spec.rb
CHANGED
@@ -2,12 +2,6 @@ $:.unshift(File.dirname(__FILE__))
|
|
2
2
|
require 'spec_helper'
|
3
3
|
|
4
4
|
module Botch
|
5
|
-
describe do
|
6
|
-
it "Should have a version." do
|
7
|
-
expect(Botch.const_defined?("VERSION")).to be_true
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
5
|
describe Base do
|
12
6
|
before(:all) do
|
13
7
|
@fake = Fake.new
|
@@ -43,7 +37,7 @@ module Botch
|
|
43
37
|
expect(@settings[:disabled_invalid]).to be_true
|
44
38
|
end
|
45
39
|
|
46
|
-
it "
|
40
|
+
it "client should be a Client::Mechanize instance." do
|
47
41
|
expect(SampleBotch.client).to be_an_instance_of(Botch::Client::MechanizeClient)
|
48
42
|
end
|
49
43
|
end
|
data/spec/filter_spec.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__))
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
module Botch
|
5
|
+
describe Filter do
|
6
|
+
before(:each) do
|
7
|
+
FakeWeb.clean_registry
|
8
|
+
@fakes = []
|
9
|
+
@fakes << Fake.new("/", :status => [200, "OK"], :content_type => "text/html")
|
10
|
+
@fakes << Fake.new("/test1", :status => [404, "Not Found"], :content_type => "text/html")
|
11
|
+
@fakes << Fake.new("/test2", :status => [500, "Internal Server Error"], :content_type => "text/html")
|
12
|
+
|
13
|
+
class SampleBotch < Botch::Base
|
14
|
+
reset!
|
15
|
+
set :client, :faraday
|
16
|
+
filter :test1, :map => /test1|test2/ do
|
17
|
+
status == 404
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it "filter should decide a valid in reference to return value of filter's block." do
|
23
|
+
result = SampleBotch.run(*@fakes.map(&:url)) {|response, valid| valid }
|
24
|
+
expect(result[0]).to be_true
|
25
|
+
expect(result[1]).to be_true
|
26
|
+
expect(result[2]).to be_false
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: botch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- namusyaka
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -72,6 +72,7 @@ executables: []
|
|
72
72
|
extensions: []
|
73
73
|
extra_rdoc_files: []
|
74
74
|
files:
|
75
|
+
- .travis.yml
|
75
76
|
- Gemfile
|
76
77
|
- README.md
|
77
78
|
- Rakefile
|
@@ -81,8 +82,10 @@ files:
|
|
81
82
|
- lib/botch/clients/abstract_client.rb
|
82
83
|
- lib/botch/clients/faraday_client.rb
|
83
84
|
- lib/botch/clients/mechanize_client.rb
|
85
|
+
- lib/botch/version.rb
|
84
86
|
- spec/botch_spec.rb
|
85
87
|
- spec/faraday_spec.rb
|
88
|
+
- spec/filter_spec.rb
|
86
89
|
- spec/mechanize_spec.rb
|
87
90
|
- spec/spec_helper.rb
|
88
91
|
homepage: https://github.com/namusyaka/botch
|
@@ -112,5 +115,6 @@ summary: A DSL for web clawler.
|
|
112
115
|
test_files:
|
113
116
|
- spec/botch_spec.rb
|
114
117
|
- spec/faraday_spec.rb
|
118
|
+
- spec/filter_spec.rb
|
115
119
|
- spec/mechanize_spec.rb
|
116
120
|
has_rdoc:
|