crabfarm 0.7.2 → 0.7.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/crabfarm.rb +0 -2
- data/lib/crabfarm/base_navigator.rb +0 -5
- data/lib/crabfarm/context.rb +1 -15
- data/lib/crabfarm/live/context.rb +0 -2
- data/lib/crabfarm/modes/publisher.rb +49 -34
- data/lib/crabfarm/support/phantom_runner.rb +1 -0
- data/lib/crabfarm/version.rb +1 -1
- metadata +2 -3
- data/lib/crabfarm/http_client.rb +0 -97
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2df1fe8ddac13f58aebe2e0f99b2654e58ecc73e
|
4
|
+
data.tar.gz: 10243091f0a47c4af76cbd76ccf46a4d114c5d7c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 134313f90f076d08a2360cd0fca4f1920109bf871b49caa9843c222c860d42eeb6f8e463d4c8f0632370cf23f95546bbb65e9968fc19477ae225a15fe7eb98dd
|
7
|
+
data.tar.gz: 08db898418eb68af12a18658f21448fc92499ab1675b8092877d650ca039a81a2369a57330d760b8460d503029851660b25dc9ac18199489f7852c0141884e3d
|
data/lib/crabfarm.rb
CHANGED
@@ -1,13 +1,11 @@
|
|
1
1
|
require "logger"
|
2
2
|
require "forwardable"
|
3
|
-
require "net/http"
|
4
3
|
require "active_support/inflector"
|
5
4
|
|
6
5
|
require "crabfarm/version"
|
7
6
|
require "crabfarm/errors"
|
8
7
|
require "crabfarm/configuration"
|
9
8
|
require "crabfarm/driver_pool"
|
10
|
-
require "crabfarm/http_client"
|
11
9
|
require "crabfarm/state_store"
|
12
10
|
require "crabfarm/context"
|
13
11
|
require "crabfarm/transition_service"
|
@@ -13,7 +13,6 @@ module Crabfarm
|
|
13
13
|
|
14
14
|
attr_reader :params
|
15
15
|
|
16
|
-
def_delegators '@context', :http
|
17
16
|
def_delegators '@context.store', :get, :fetch
|
18
17
|
|
19
18
|
def initialize(_context, _params)
|
@@ -31,10 +30,6 @@ module Crabfarm
|
|
31
30
|
@context.pool.driver(_name)
|
32
31
|
end
|
33
32
|
|
34
|
-
def download(_url)
|
35
|
-
@context.http.get(_url).body
|
36
|
-
end
|
37
|
-
|
38
33
|
def run
|
39
34
|
raise NotImplementedError.new
|
40
35
|
end
|
data/lib/crabfarm/context.rb
CHANGED
@@ -2,7 +2,7 @@ module Crabfarm
|
|
2
2
|
class Context
|
3
3
|
extend Forwardable
|
4
4
|
|
5
|
-
attr_accessor :pool, :store
|
5
|
+
attr_accessor :pool, :store
|
6
6
|
|
7
7
|
def initialize
|
8
8
|
@store = StateStore.new
|
@@ -34,7 +34,6 @@ module Crabfarm
|
|
34
34
|
def load_services
|
35
35
|
init_browser_adapter
|
36
36
|
init_driver_pool
|
37
|
-
init_http_client
|
38
37
|
end
|
39
38
|
|
40
39
|
def reset_services
|
@@ -43,7 +42,6 @@ module Crabfarm
|
|
43
42
|
end
|
44
43
|
|
45
44
|
def unload_services
|
46
|
-
release_http_client
|
47
45
|
release_driver_pool
|
48
46
|
release_browser_adapter
|
49
47
|
end
|
@@ -69,22 +67,10 @@ module Crabfarm
|
|
69
67
|
@pool = nil
|
70
68
|
end
|
71
69
|
|
72
|
-
def init_http_client
|
73
|
-
@http = build_http_client proxy if @http.nil?
|
74
|
-
end
|
75
|
-
|
76
70
|
def build_browser_adapter(_proxy)
|
77
71
|
Strategies.load(:browser, config.browser).new _proxy
|
78
72
|
end
|
79
73
|
|
80
|
-
def build_http_client(_proxy)
|
81
|
-
HttpClient.new _proxy
|
82
|
-
end
|
83
|
-
|
84
|
-
def release_http_client
|
85
|
-
@http = nil
|
86
|
-
end
|
87
|
-
|
88
74
|
def proxy
|
89
75
|
config.proxy
|
90
76
|
end
|
@@ -21,8 +21,6 @@ module Crabfarm
|
|
21
21
|
return BrowserAdapter.new @manager
|
22
22
|
end
|
23
23
|
|
24
|
-
# TODO: override build_http_client, i would like to tap into the http requests and show downloaded data in viewer
|
25
|
-
|
26
24
|
class BrowserAdapter < Crabfarm::Adapters::Browser::Base
|
27
25
|
|
28
26
|
def initialize(_manager)
|
@@ -8,6 +8,8 @@ require 'base64'
|
|
8
8
|
require 'rainbow'
|
9
9
|
require 'rainbow/ext/string'
|
10
10
|
require 'digest/sha1'
|
11
|
+
require 'net/http'
|
12
|
+
require 'crabfarm/utils/console'
|
11
13
|
|
12
14
|
module Crabfarm
|
13
15
|
module Modes
|
@@ -22,12 +24,11 @@ module Crabfarm
|
|
22
24
|
@options = _options
|
23
25
|
|
24
26
|
load_config
|
25
|
-
return unless dry_run or check_credentials
|
26
|
-
detect_git_repo
|
27
|
+
return unless dry_run? or check_credentials
|
27
28
|
|
28
|
-
if
|
29
|
-
if
|
30
|
-
|
29
|
+
if !unsafe? and detect_git_repo
|
30
|
+
if is_tree_dirty?
|
31
|
+
console.warning "Aborting: Your working copy has uncommited changes! Use the --unsafe option to force."
|
31
32
|
return
|
32
33
|
end
|
33
34
|
load_files_from_git
|
@@ -39,16 +40,23 @@ module Crabfarm
|
|
39
40
|
compress_package
|
40
41
|
generate_signature
|
41
42
|
|
42
|
-
|
43
|
+
build_payload
|
44
|
+
send_package if not dry_run? and ensure_valid_remote
|
45
|
+
|
46
|
+
@payload
|
43
47
|
end
|
44
48
|
|
45
49
|
private
|
46
50
|
|
47
|
-
def
|
51
|
+
def verbose?
|
52
|
+
@options.fetch(:verbose, true)
|
53
|
+
end
|
54
|
+
|
55
|
+
def dry_run?
|
48
56
|
@options.fetch(:dry, false)
|
49
57
|
end
|
50
58
|
|
51
|
-
def unsafe
|
59
|
+
def unsafe?
|
52
60
|
@options.fetch(:unsafe, false)
|
53
61
|
end
|
54
62
|
|
@@ -77,7 +85,7 @@ module Crabfarm
|
|
77
85
|
|
78
86
|
def ensure_valid_remote
|
79
87
|
if @url.nil?
|
80
|
-
@url =
|
88
|
+
@url = console.question 'Enter default remote for crawler'
|
81
89
|
return false unless validate_remote @url
|
82
90
|
@local_config['remote'] = @url
|
83
91
|
save_local_config
|
@@ -89,15 +97,15 @@ module Crabfarm
|
|
89
97
|
|
90
98
|
def validate_remote(_url)
|
91
99
|
return true if /^[\w\-]+\/[\w\-]+$/i === _url
|
92
|
-
|
100
|
+
console.error "Invalid remote syntax: #{_url}"
|
93
101
|
return false
|
94
102
|
end
|
95
103
|
|
96
104
|
def check_credentials
|
97
105
|
if @token.nil?
|
98
|
-
|
99
|
-
email =
|
100
|
-
password =
|
106
|
+
console.info 'No credential data found, please identify yourself'
|
107
|
+
email = console.question 'Enter your crabfarm.io email'
|
108
|
+
password = console.question 'Enter your crabfarm.io password'
|
101
109
|
|
102
110
|
resp = send_request Net::HTTP::Post, 'api/tokens', {
|
103
111
|
'email' => email,
|
@@ -110,9 +118,9 @@ module Crabfarm
|
|
110
118
|
@home_config['token'] = @token
|
111
119
|
save_home_config
|
112
120
|
when Net::HTTPUnauthorized
|
113
|
-
|
121
|
+
console.error "The provided credentials are invalid!"
|
114
122
|
else
|
115
|
-
|
123
|
+
console.error "Unknown error when asking for token!"
|
116
124
|
end
|
117
125
|
end
|
118
126
|
|
@@ -148,7 +156,7 @@ module Crabfarm
|
|
148
156
|
if File.exists? File.join(git_path, '.git')
|
149
157
|
@git = Git.open git_path
|
150
158
|
@rel_path = if path_to_git.count > 0 then File.join(*path_to_git.reverse!) else nil end
|
151
|
-
return
|
159
|
+
return true
|
152
160
|
else
|
153
161
|
path_to_git << File.basename(git_path)
|
154
162
|
git_path = File.expand_path('..', git_path)
|
@@ -156,16 +164,13 @@ module Crabfarm
|
|
156
164
|
end
|
157
165
|
|
158
166
|
@git = nil
|
159
|
-
|
160
|
-
|
161
|
-
def inside_git_repo?
|
162
|
-
not @git.nil?
|
167
|
+
return false
|
163
168
|
end
|
164
169
|
|
165
170
|
def load_files_from_git
|
166
171
|
@git.chdir do
|
167
172
|
@ref = @git.log.first.sha
|
168
|
-
|
173
|
+
console.result "Packaging files from current HEAD (#{@ref}):" if verbose?
|
169
174
|
entries = @git.gtree(@ref).full_tree.map(&:split)
|
170
175
|
entries = entries.select { |e| e[1] == 'blob' }
|
171
176
|
|
@@ -184,10 +189,12 @@ module Crabfarm
|
|
184
189
|
end
|
185
190
|
|
186
191
|
def load_files_from_fs
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
192
|
+
console.result "Packaging files (no version control)" if verbose?
|
193
|
+
Dir.chdir(@crawler_path) do
|
194
|
+
@file_list = Dir[*@include].map do |path|
|
195
|
+
full_path = File.join(@crawler_path, path)
|
196
|
+
[path, File.stat(full_path).mode, File.read(full_path)]
|
197
|
+
end
|
191
198
|
end
|
192
199
|
@ref = "filesystem"
|
193
200
|
end
|
@@ -196,7 +203,7 @@ module Crabfarm
|
|
196
203
|
@package = StringIO.new("")
|
197
204
|
Gem::Package::TarWriter.new(@package) do |tar|
|
198
205
|
@file_list.each do |f|
|
199
|
-
|
206
|
+
console.info "+ #{f[0]} - #{f[1]}" if verbose?
|
200
207
|
path, mode, contents = f
|
201
208
|
tar.add_file(path, mode) { |tf| tf.write contents }
|
202
209
|
end
|
@@ -214,26 +221,30 @@ module Crabfarm
|
|
214
221
|
|
215
222
|
def generate_signature
|
216
223
|
@signature = Digest::SHA1.hexdigest @package.string
|
217
|
-
|
224
|
+
console.info "Package SHA1: #{@signature}" if verbose?
|
218
225
|
end
|
219
226
|
|
220
|
-
def
|
221
|
-
|
227
|
+
def build_payload
|
228
|
+
@payload = {
|
222
229
|
"repo" => Base64.encode64(@cpackage.string),
|
223
230
|
"sha" => @signature,
|
224
231
|
"ref" => @ref
|
225
|
-
}
|
232
|
+
}
|
233
|
+
end
|
234
|
+
|
235
|
+
def send_package
|
236
|
+
resp = send_request(Net::HTTP::Put, "api/bots/#{@url}", @payload)
|
226
237
|
|
227
238
|
case resp
|
228
239
|
when Net::HTTPSuccess
|
229
240
|
sha = JSON.parse(resp.body)['sha']
|
230
|
-
|
241
|
+
console.result "#{@url} updated!"
|
231
242
|
when Net::HTTPUnauthorized
|
232
|
-
|
243
|
+
console.error "You are not authorized to update crawler: #{@url}"
|
233
244
|
when Net::HTTPNotFound
|
234
|
-
|
245
|
+
console.error "Crawler not found: #{@url}"
|
235
246
|
else
|
236
|
-
|
247
|
+
console.error "Unknown error when updating crawler information!"
|
237
248
|
end
|
238
249
|
end
|
239
250
|
|
@@ -249,6 +260,10 @@ module Crabfarm
|
|
249
260
|
end
|
250
261
|
end
|
251
262
|
|
263
|
+
def console
|
264
|
+
Crabfarm::Utils::Console
|
265
|
+
end
|
266
|
+
|
252
267
|
end
|
253
268
|
end
|
254
269
|
end
|
data/lib/crabfarm/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crabfarm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ignacio Baixas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-09-
|
11
|
+
date: 2015-09-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -500,7 +500,6 @@ files:
|
|
500
500
|
- lib/crabfarm/factories/reducer.rb
|
501
501
|
- lib/crabfarm/factories/snapshot_reducer.rb
|
502
502
|
- lib/crabfarm/forked_navigator.rb
|
503
|
-
- lib/crabfarm/http_client.rb
|
504
503
|
- lib/crabfarm/live/context.rb
|
505
504
|
- lib/crabfarm/live/controller.rb
|
506
505
|
- lib/crabfarm/live/interactable.rb
|
data/lib/crabfarm/http_client.rb
DELETED
@@ -1,97 +0,0 @@
|
|
1
|
-
require "uri"
|
2
|
-
|
3
|
-
module Crabfarm
|
4
|
-
class HttpClient
|
5
|
-
|
6
|
-
class HttpRequestError < StandardError
|
7
|
-
extend Forwardable
|
8
|
-
|
9
|
-
def_delegators :@response, :code, :body
|
10
|
-
|
11
|
-
attr_reader :response
|
12
|
-
|
13
|
-
def initialize(_response)
|
14
|
-
@response = _response
|
15
|
-
super _response.message
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
class MaximumRedirectsError < StandardError
|
20
|
-
def initialize
|
21
|
-
super 'Redirection loop detected!'
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
attr_reader :proxy_addr, :proxy_port
|
26
|
-
|
27
|
-
def initialize(_proxy=nil)
|
28
|
-
if _proxy.nil?
|
29
|
-
@proxy_addr = nil
|
30
|
-
@proxy_port = nil
|
31
|
-
else
|
32
|
-
@proxy_addr, @proxy_port = _proxy.split ':'
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def get(_url, _query={}, _headers={})
|
37
|
-
uri = URI _url
|
38
|
-
perform_request Net::HTTP::Get, uri, _headers
|
39
|
-
end
|
40
|
-
|
41
|
-
def post(_url, _data, _headers={})
|
42
|
-
perform_request Net::HTTP::Post, URI(_url), _headers do |req|
|
43
|
-
req.body = prepare_data(_data)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def put(_url, _data, _headers={})
|
48
|
-
perform_request Net::HTTP::Put, URI(_url), _headers do |req|
|
49
|
-
req.body = prepare_data(_data)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
def delete(_url)
|
54
|
-
perform_request Net::HTTP::Delete, URI(_url), _headers
|
55
|
-
end
|
56
|
-
|
57
|
-
private
|
58
|
-
|
59
|
-
def perform_request(_req_type, _uri, _headers, _limit=10)
|
60
|
-
|
61
|
-
raise MaximumRedirectsError.new if _limit == 0
|
62
|
-
|
63
|
-
request = _req_type.new(_uri.request_uri.empty? ? '/' : _uri.request_uri)
|
64
|
-
_headers.keys.each { |k| request[k] = _headers[k] }
|
65
|
-
yield request if block_given?
|
66
|
-
|
67
|
-
response = build_client(_uri).request request
|
68
|
-
|
69
|
-
case response
|
70
|
-
when Net::HTTPSuccess then
|
71
|
-
response
|
72
|
-
when Net::HTTPRedirection then
|
73
|
-
location = response['location']
|
74
|
-
perform_request(_req_type, URI.parse(location), _headers, _limit - 1)
|
75
|
-
else
|
76
|
-
handle_error_response response
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
def build_client(uri)
|
81
|
-
client = Net::HTTP.new uri.host, uri.port || 80, proxy_addr, proxy_port
|
82
|
-
client.use_ssl = true if uri.scheme == 'https'
|
83
|
-
client.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
84
|
-
client
|
85
|
-
end
|
86
|
-
|
87
|
-
def handle_error_response(_response)
|
88
|
-
raise HttpRequestError.new _response
|
89
|
-
end
|
90
|
-
|
91
|
-
def prepare_data(_data)
|
92
|
-
if _data.is_a? Hash
|
93
|
-
_data.keys.map { |k| "#{k}=#{_data[k]}" }.join '&'
|
94
|
-
else _data end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|