crabfarm 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/crabfarm.rb +0 -2
- data/lib/crabfarm/base_navigator.rb +0 -5
- data/lib/crabfarm/context.rb +1 -15
- data/lib/crabfarm/live/context.rb +0 -2
- data/lib/crabfarm/modes/publisher.rb +49 -34
- data/lib/crabfarm/support/phantom_runner.rb +1 -0
- data/lib/crabfarm/version.rb +1 -1
- metadata +2 -3
- data/lib/crabfarm/http_client.rb +0 -97
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2df1fe8ddac13f58aebe2e0f99b2654e58ecc73e
|
4
|
+
data.tar.gz: 10243091f0a47c4af76cbd76ccf46a4d114c5d7c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 134313f90f076d08a2360cd0fca4f1920109bf871b49caa9843c222c860d42eeb6f8e463d4c8f0632370cf23f95546bbb65e9968fc19477ae225a15fe7eb98dd
|
7
|
+
data.tar.gz: 08db898418eb68af12a18658f21448fc92499ab1675b8092877d650ca039a81a2369a57330d760b8460d503029851660b25dc9ac18199489f7852c0141884e3d
|
data/lib/crabfarm.rb
CHANGED
@@ -1,13 +1,11 @@
|
|
1
1
|
require "logger"
|
2
2
|
require "forwardable"
|
3
|
-
require "net/http"
|
4
3
|
require "active_support/inflector"
|
5
4
|
|
6
5
|
require "crabfarm/version"
|
7
6
|
require "crabfarm/errors"
|
8
7
|
require "crabfarm/configuration"
|
9
8
|
require "crabfarm/driver_pool"
|
10
|
-
require "crabfarm/http_client"
|
11
9
|
require "crabfarm/state_store"
|
12
10
|
require "crabfarm/context"
|
13
11
|
require "crabfarm/transition_service"
|
@@ -13,7 +13,6 @@ module Crabfarm
|
|
13
13
|
|
14
14
|
attr_reader :params
|
15
15
|
|
16
|
-
def_delegators '@context', :http
|
17
16
|
def_delegators '@context.store', :get, :fetch
|
18
17
|
|
19
18
|
def initialize(_context, _params)
|
@@ -31,10 +30,6 @@ module Crabfarm
|
|
31
30
|
@context.pool.driver(_name)
|
32
31
|
end
|
33
32
|
|
34
|
-
def download(_url)
|
35
|
-
@context.http.get(_url).body
|
36
|
-
end
|
37
|
-
|
38
33
|
def run
|
39
34
|
raise NotImplementedError.new
|
40
35
|
end
|
data/lib/crabfarm/context.rb
CHANGED
@@ -2,7 +2,7 @@ module Crabfarm
|
|
2
2
|
class Context
|
3
3
|
extend Forwardable
|
4
4
|
|
5
|
-
attr_accessor :pool, :store
|
5
|
+
attr_accessor :pool, :store
|
6
6
|
|
7
7
|
def initialize
|
8
8
|
@store = StateStore.new
|
@@ -34,7 +34,6 @@ module Crabfarm
|
|
34
34
|
def load_services
|
35
35
|
init_browser_adapter
|
36
36
|
init_driver_pool
|
37
|
-
init_http_client
|
38
37
|
end
|
39
38
|
|
40
39
|
def reset_services
|
@@ -43,7 +42,6 @@ module Crabfarm
|
|
43
42
|
end
|
44
43
|
|
45
44
|
def unload_services
|
46
|
-
release_http_client
|
47
45
|
release_driver_pool
|
48
46
|
release_browser_adapter
|
49
47
|
end
|
@@ -69,22 +67,10 @@ module Crabfarm
|
|
69
67
|
@pool = nil
|
70
68
|
end
|
71
69
|
|
72
|
-
def init_http_client
|
73
|
-
@http = build_http_client proxy if @http.nil?
|
74
|
-
end
|
75
|
-
|
76
70
|
def build_browser_adapter(_proxy)
|
77
71
|
Strategies.load(:browser, config.browser).new _proxy
|
78
72
|
end
|
79
73
|
|
80
|
-
def build_http_client(_proxy)
|
81
|
-
HttpClient.new _proxy
|
82
|
-
end
|
83
|
-
|
84
|
-
def release_http_client
|
85
|
-
@http = nil
|
86
|
-
end
|
87
|
-
|
88
74
|
def proxy
|
89
75
|
config.proxy
|
90
76
|
end
|
@@ -21,8 +21,6 @@ module Crabfarm
|
|
21
21
|
return BrowserAdapter.new @manager
|
22
22
|
end
|
23
23
|
|
24
|
-
# TODO: override build_http_client, i would like to tap into the http requests and show downloaded data in viewer
|
25
|
-
|
26
24
|
class BrowserAdapter < Crabfarm::Adapters::Browser::Base
|
27
25
|
|
28
26
|
def initialize(_manager)
|
@@ -8,6 +8,8 @@ require 'base64'
|
|
8
8
|
require 'rainbow'
|
9
9
|
require 'rainbow/ext/string'
|
10
10
|
require 'digest/sha1'
|
11
|
+
require 'net/http'
|
12
|
+
require 'crabfarm/utils/console'
|
11
13
|
|
12
14
|
module Crabfarm
|
13
15
|
module Modes
|
@@ -22,12 +24,11 @@ module Crabfarm
|
|
22
24
|
@options = _options
|
23
25
|
|
24
26
|
load_config
|
25
|
-
return unless dry_run or check_credentials
|
26
|
-
detect_git_repo
|
27
|
+
return unless dry_run? or check_credentials
|
27
28
|
|
28
|
-
if
|
29
|
-
if
|
30
|
-
|
29
|
+
if !unsafe? and detect_git_repo
|
30
|
+
if is_tree_dirty?
|
31
|
+
console.warning "Aborting: Your working copy has uncommited changes! Use the --unsafe option to force."
|
31
32
|
return
|
32
33
|
end
|
33
34
|
load_files_from_git
|
@@ -39,16 +40,23 @@ module Crabfarm
|
|
39
40
|
compress_package
|
40
41
|
generate_signature
|
41
42
|
|
42
|
-
|
43
|
+
build_payload
|
44
|
+
send_package if not dry_run? and ensure_valid_remote
|
45
|
+
|
46
|
+
@payload
|
43
47
|
end
|
44
48
|
|
45
49
|
private
|
46
50
|
|
47
|
-
def
|
51
|
+
def verbose?
|
52
|
+
@options.fetch(:verbose, true)
|
53
|
+
end
|
54
|
+
|
55
|
+
def dry_run?
|
48
56
|
@options.fetch(:dry, false)
|
49
57
|
end
|
50
58
|
|
51
|
-
def unsafe
|
59
|
+
def unsafe?
|
52
60
|
@options.fetch(:unsafe, false)
|
53
61
|
end
|
54
62
|
|
@@ -77,7 +85,7 @@ module Crabfarm
|
|
77
85
|
|
78
86
|
def ensure_valid_remote
|
79
87
|
if @url.nil?
|
80
|
-
@url =
|
88
|
+
@url = console.question 'Enter default remote for crawler'
|
81
89
|
return false unless validate_remote @url
|
82
90
|
@local_config['remote'] = @url
|
83
91
|
save_local_config
|
@@ -89,15 +97,15 @@ module Crabfarm
|
|
89
97
|
|
90
98
|
def validate_remote(_url)
|
91
99
|
return true if /^[\w\-]+\/[\w\-]+$/i === _url
|
92
|
-
|
100
|
+
console.error "Invalid remote syntax: #{_url}"
|
93
101
|
return false
|
94
102
|
end
|
95
103
|
|
96
104
|
def check_credentials
|
97
105
|
if @token.nil?
|
98
|
-
|
99
|
-
email =
|
100
|
-
password =
|
106
|
+
console.info 'No credential data found, please identify yourself'
|
107
|
+
email = console.question 'Enter your crabfarm.io email'
|
108
|
+
password = console.question 'Enter your crabfarm.io password'
|
101
109
|
|
102
110
|
resp = send_request Net::HTTP::Post, 'api/tokens', {
|
103
111
|
'email' => email,
|
@@ -110,9 +118,9 @@ module Crabfarm
|
|
110
118
|
@home_config['token'] = @token
|
111
119
|
save_home_config
|
112
120
|
when Net::HTTPUnauthorized
|
113
|
-
|
121
|
+
console.error "The provided credentials are invalid!"
|
114
122
|
else
|
115
|
-
|
123
|
+
console.error "Unknown error when asking for token!"
|
116
124
|
end
|
117
125
|
end
|
118
126
|
|
@@ -148,7 +156,7 @@ module Crabfarm
|
|
148
156
|
if File.exists? File.join(git_path, '.git')
|
149
157
|
@git = Git.open git_path
|
150
158
|
@rel_path = if path_to_git.count > 0 then File.join(*path_to_git.reverse!) else nil end
|
151
|
-
return
|
159
|
+
return true
|
152
160
|
else
|
153
161
|
path_to_git << File.basename(git_path)
|
154
162
|
git_path = File.expand_path('..', git_path)
|
@@ -156,16 +164,13 @@ module Crabfarm
|
|
156
164
|
end
|
157
165
|
|
158
166
|
@git = nil
|
159
|
-
|
160
|
-
|
161
|
-
def inside_git_repo?
|
162
|
-
not @git.nil?
|
167
|
+
return false
|
163
168
|
end
|
164
169
|
|
165
170
|
def load_files_from_git
|
166
171
|
@git.chdir do
|
167
172
|
@ref = @git.log.first.sha
|
168
|
-
|
173
|
+
console.result "Packaging files from current HEAD (#{@ref}):" if verbose?
|
169
174
|
entries = @git.gtree(@ref).full_tree.map(&:split)
|
170
175
|
entries = entries.select { |e| e[1] == 'blob' }
|
171
176
|
|
@@ -184,10 +189,12 @@ module Crabfarm
|
|
184
189
|
end
|
185
190
|
|
186
191
|
def load_files_from_fs
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
192
|
+
console.result "Packaging files (no version control)" if verbose?
|
193
|
+
Dir.chdir(@crawler_path) do
|
194
|
+
@file_list = Dir[*@include].map do |path|
|
195
|
+
full_path = File.join(@crawler_path, path)
|
196
|
+
[path, File.stat(full_path).mode, File.read(full_path)]
|
197
|
+
end
|
191
198
|
end
|
192
199
|
@ref = "filesystem"
|
193
200
|
end
|
@@ -196,7 +203,7 @@ module Crabfarm
|
|
196
203
|
@package = StringIO.new("")
|
197
204
|
Gem::Package::TarWriter.new(@package) do |tar|
|
198
205
|
@file_list.each do |f|
|
199
|
-
|
206
|
+
console.info "+ #{f[0]} - #{f[1]}" if verbose?
|
200
207
|
path, mode, contents = f
|
201
208
|
tar.add_file(path, mode) { |tf| tf.write contents }
|
202
209
|
end
|
@@ -214,26 +221,30 @@ module Crabfarm
|
|
214
221
|
|
215
222
|
def generate_signature
|
216
223
|
@signature = Digest::SHA1.hexdigest @package.string
|
217
|
-
|
224
|
+
console.info "Package SHA1: #{@signature}" if verbose?
|
218
225
|
end
|
219
226
|
|
220
|
-
def
|
221
|
-
|
227
|
+
def build_payload
|
228
|
+
@payload = {
|
222
229
|
"repo" => Base64.encode64(@cpackage.string),
|
223
230
|
"sha" => @signature,
|
224
231
|
"ref" => @ref
|
225
|
-
}
|
232
|
+
}
|
233
|
+
end
|
234
|
+
|
235
|
+
def send_package
|
236
|
+
resp = send_request(Net::HTTP::Put, "api/bots/#{@url}", @payload)
|
226
237
|
|
227
238
|
case resp
|
228
239
|
when Net::HTTPSuccess
|
229
240
|
sha = JSON.parse(resp.body)['sha']
|
230
|
-
|
241
|
+
console.result "#{@url} updated!"
|
231
242
|
when Net::HTTPUnauthorized
|
232
|
-
|
243
|
+
console.error "You are not authorized to update crawler: #{@url}"
|
233
244
|
when Net::HTTPNotFound
|
234
|
-
|
245
|
+
console.error "Crawler not found: #{@url}"
|
235
246
|
else
|
236
|
-
|
247
|
+
console.error "Unknown error when updating crawler information!"
|
237
248
|
end
|
238
249
|
end
|
239
250
|
|
@@ -249,6 +260,10 @@ module Crabfarm
|
|
249
260
|
end
|
250
261
|
end
|
251
262
|
|
263
|
+
def console
|
264
|
+
Crabfarm::Utils::Console
|
265
|
+
end
|
266
|
+
|
252
267
|
end
|
253
268
|
end
|
254
269
|
end
|
data/lib/crabfarm/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crabfarm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ignacio Baixas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-09-
|
11
|
+
date: 2015-09-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -500,7 +500,6 @@ files:
|
|
500
500
|
- lib/crabfarm/factories/reducer.rb
|
501
501
|
- lib/crabfarm/factories/snapshot_reducer.rb
|
502
502
|
- lib/crabfarm/forked_navigator.rb
|
503
|
-
- lib/crabfarm/http_client.rb
|
504
503
|
- lib/crabfarm/live/context.rb
|
505
504
|
- lib/crabfarm/live/controller.rb
|
506
505
|
- lib/crabfarm/live/interactable.rb
|
data/lib/crabfarm/http_client.rb
DELETED
@@ -1,97 +0,0 @@
|
|
1
|
-
require "uri"
|
2
|
-
|
3
|
-
module Crabfarm
|
4
|
-
class HttpClient
|
5
|
-
|
6
|
-
class HttpRequestError < StandardError
|
7
|
-
extend Forwardable
|
8
|
-
|
9
|
-
def_delegators :@response, :code, :body
|
10
|
-
|
11
|
-
attr_reader :response
|
12
|
-
|
13
|
-
def initialize(_response)
|
14
|
-
@response = _response
|
15
|
-
super _response.message
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
class MaximumRedirectsError < StandardError
|
20
|
-
def initialize
|
21
|
-
super 'Redirection loop detected!'
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
attr_reader :proxy_addr, :proxy_port
|
26
|
-
|
27
|
-
def initialize(_proxy=nil)
|
28
|
-
if _proxy.nil?
|
29
|
-
@proxy_addr = nil
|
30
|
-
@proxy_port = nil
|
31
|
-
else
|
32
|
-
@proxy_addr, @proxy_port = _proxy.split ':'
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def get(_url, _query={}, _headers={})
|
37
|
-
uri = URI _url
|
38
|
-
perform_request Net::HTTP::Get, uri, _headers
|
39
|
-
end
|
40
|
-
|
41
|
-
def post(_url, _data, _headers={})
|
42
|
-
perform_request Net::HTTP::Post, URI(_url), _headers do |req|
|
43
|
-
req.body = prepare_data(_data)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def put(_url, _data, _headers={})
|
48
|
-
perform_request Net::HTTP::Put, URI(_url), _headers do |req|
|
49
|
-
req.body = prepare_data(_data)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
def delete(_url)
|
54
|
-
perform_request Net::HTTP::Delete, URI(_url), _headers
|
55
|
-
end
|
56
|
-
|
57
|
-
private
|
58
|
-
|
59
|
-
def perform_request(_req_type, _uri, _headers, _limit=10)
|
60
|
-
|
61
|
-
raise MaximumRedirectsError.new if _limit == 0
|
62
|
-
|
63
|
-
request = _req_type.new(_uri.request_uri.empty? ? '/' : _uri.request_uri)
|
64
|
-
_headers.keys.each { |k| request[k] = _headers[k] }
|
65
|
-
yield request if block_given?
|
66
|
-
|
67
|
-
response = build_client(_uri).request request
|
68
|
-
|
69
|
-
case response
|
70
|
-
when Net::HTTPSuccess then
|
71
|
-
response
|
72
|
-
when Net::HTTPRedirection then
|
73
|
-
location = response['location']
|
74
|
-
perform_request(_req_type, URI.parse(location), _headers, _limit - 1)
|
75
|
-
else
|
76
|
-
handle_error_response response
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
def build_client(uri)
|
81
|
-
client = Net::HTTP.new uri.host, uri.port || 80, proxy_addr, proxy_port
|
82
|
-
client.use_ssl = true if uri.scheme == 'https'
|
83
|
-
client.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
84
|
-
client
|
85
|
-
end
|
86
|
-
|
87
|
-
def handle_error_response(_response)
|
88
|
-
raise HttpRequestError.new _response
|
89
|
-
end
|
90
|
-
|
91
|
-
def prepare_data(_data)
|
92
|
-
if _data.is_a? Hash
|
93
|
-
_data.keys.map { |k| "#{k}=#{_data[k]}" }.join '&'
|
94
|
-
else _data end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|