cangrejo 0.1.5 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5d9cb5f49f576488055b6f9e9164bbfb6af271b4
4
- data.tar.gz: af63078d5ff257624f736e463ab96e8df5be5871
3
+ metadata.gz: 654c44f4ed8700ac8dbb214f8fd3006540ee31cb
4
+ data.tar.gz: cdf70051e44adbee2ca3291061ddd2ac71155571
5
5
  SHA512:
6
- metadata.gz: 1efc55a238a31a5962e11a41122ec066f1955129a65cd44f4c840f6ce39e4b40325a59f16ab3810cbdf68da8ef382dad617dbfca4f8c3b25cb9163d7c8d2d793
7
- data.tar.gz: eede15bb49b8efb34b51520e71fa83fea70819d0768e719a88a36b166626fbfda07fcb923ac41bf17202156037608ffb6a890f5fda5e5702731398824e34309b
6
+ metadata.gz: 847580c48eb860475720e652e7ee83dbb66f91aa7c88c2a15635296f0aa9471b7d63b67209a60b532a7ed421331aa8fc98b5c3721e68e6b913a2bc1edaf98b5d
7
+ data.tar.gz: e42eda249f094f9eca98b8a67f2f1bf77de681fe4c20e8538872ec46977a8cd5f255b9c96d2907cbc0dc72baddb71eb8d1664df1d30a92caa2d2238c66ff6984
@@ -4,4 +4,23 @@ module Cangrejo
4
4
 
5
5
  class ConfigurationError < Error; end
6
6
 
7
+ class LaunchTimeout < Error
8
+ def initialize(_msg)
9
+ super "Timed out trying to start crawler"
10
+ end
11
+ end
12
+
13
+ class CrawlerError < Error
14
+
15
+ def initialize(_msg, _backtrace)
16
+ super _msg
17
+ @original_bt = _backtrace
18
+ end
19
+
20
+ def set_backtrace(_backtrace)
21
+ super @original_bt + _backtrace
22
+ end
23
+
24
+ end
25
+
7
26
  end
@@ -1,41 +1,94 @@
1
1
  require "cangrejo/restclient/request_extensions"
2
2
  require "cangrejo/restclient/json_resource"
3
- require "cangrejo/support/launcher"
3
+ require "childprocess"
4
4
 
5
5
  module Cangrejo
6
6
  module Modes
7
7
  class Local
8
8
 
9
+ attr_reader :process, :path
10
+
9
11
  def initialize(_path)
10
12
  @path = _path
11
13
  end
12
14
 
13
15
  def setup
14
- init_launcher
16
+ select_socket_file
17
+ start_process
18
+ wait_for_socket
15
19
  init_rest_client
16
20
  end
17
21
 
18
22
  def release
19
- @launcher.kill unless @launcher.nil?
23
+ process.stop unless process.nil?
20
24
  end
21
25
 
22
26
  private
23
27
 
24
- def init_launcher
25
- @launcher = Support::Launcher.new @path, timeout: launch_timeout, argv: launch_cmd_arguments
26
- @launcher.launch
28
+ def cmd_enviroment
29
+ {
30
+ 'BUNDLE_GEMFILE' => gem_path
31
+ }
32
+ end
33
+
34
+ def cmd_arguments
35
+ [
36
+ '--no-reload'
37
+ ]
27
38
  end
28
39
 
29
40
  def launch_timeout
30
41
  5.0
31
42
  end
32
43
 
33
- def launch_cmd_arguments
34
- ['--no-reload']
44
+ def start_process
45
+ @process = prepare_process
46
+ @process.start
47
+ end
48
+
49
+ def prepare_process
50
+ cmd = [ "bin/crabfarm", "s", "--host=#{host}" ]
51
+ cmd += cmd_arguments
52
+
53
+ puts cmd.join(' ')
54
+
55
+ cp = ChildProcess.build(*cmd)
56
+ cp.environment.merge! cmd_enviroment
57
+ cp.cwd = @path
58
+ cp.leader = true
59
+ cp.io.inherit!
60
+
61
+ return cp
62
+ end
63
+
64
+ def wait_for_socket
65
+ Timeout::timeout(launch_timeout, LaunchTimeout) do
66
+ # TODO: detect if the process crashes before timeout
67
+ sleep 0.1 while not File.exist? @socket_file
68
+ end
35
69
  end
36
70
 
37
71
  def init_rest_client
38
- RestClient::JsonResource.new Net::SocketUri.new(@launcher.host, '/api/state')
72
+ RestClient::JsonResource.new Net::SocketUri.new(host, '/api/state')
73
+ end
74
+
75
+ def select_socket_file
76
+ @socket_file = random_filename while @socket_file.nil? or File.exist? @socket_file
77
+ end
78
+
79
+ def random_filename
80
+ File.join(Cangrejo.config.temp_path, "csocket-#{Random.rand(1000000)}.sock")
81
+ end
82
+
83
+ def gem_path
84
+ File.join(@path, 'Gemfile')
85
+ end
86
+
87
+ def host
88
+ # TODO: add posibility to use ports instead of unix sockets, it would also
89
+ # be nice to have a mechanism where the loaded process reports the port it
90
+ # binded to.
91
+ "unix://#{@socket_file}"
39
92
  end
40
93
 
41
94
  end
@@ -53,16 +53,22 @@ module Cangrejo
53
53
 
54
54
  params = add_timestamp(_params)
55
55
 
56
- while_times_out do
57
- @rest.put(name: _state, params: params, wait: WAIT_STEP)
58
- @state_name = _state
59
- @state_params = _params
56
+ begin
57
+ while_times_out do
58
+ @rest.put(name: _state, params: params, wait: WAIT_STEP)
59
+ @state_name = _state
60
+ @state_params = _params
61
+ end
62
+ rescue RestClient::InternalServerError => exc
63
+ raise unwrap_packed_exception exc
60
64
  end
61
65
 
62
66
  wrap_response_doc
63
67
  self
64
68
  end
65
69
 
70
+ alias :navigate :crawl
71
+
66
72
  def release
67
73
  @mode.release
68
74
  @rest = nil
@@ -107,5 +113,14 @@ module Cangrejo
107
113
  end
108
114
  end
109
115
 
116
+ def unwrap_packed_exception(_exc)
117
+ begin
118
+ data = JSON.parse _exc.http_body
119
+ Cangrejo::CrawlerError.new data['exception'], data['backtrace']
120
+ rescue
121
+ _exc
122
+ end
123
+ end
124
+
110
125
  end
111
126
  end
@@ -1,3 +1,3 @@
1
1
  module Cangrejo
2
- VERSION = "0.1.5"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,92 +1,183 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cangrejo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-04 00:00:00.000000000 Z
11
+ date: 2015-09-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.7.2
19
+ version: '1.7'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.7.2
26
+ version: '1.7'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: git
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: '1.2'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: '1.2'
41
+ - !ruby/object:Gem::Dependency
42
+ name: childprocess
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.5'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.5'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: bundler
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
- - - ~>
59
+ - - "~>"
46
60
  - !ruby/object:Gem::Version
47
61
  version: '1.6'
48
62
  type: :development
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
- - - ~>
66
+ - - "~>"
53
67
  - !ruby/object:Gem::Version
54
68
  version: '1.6'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: rake
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
- - - '>='
73
+ - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: '0'
75
+ version: '10.4'
62
76
  type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
- - - '>='
80
+ - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: '0'
82
+ version: '10.4'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rspec
71
85
  requirement: !ruby/object:Gem::Requirement
72
86
  requirements:
73
- - - '>='
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.1'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.1'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec-nc
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.2'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.2'
111
+ - !ruby/object:Gem::Dependency
112
+ name: guard
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '2.11'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '2.11'
125
+ - !ruby/object:Gem::Dependency
126
+ name: guard-rspec
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '4.5'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '4.5'
139
+ - !ruby/object:Gem::Dependency
140
+ name: terminal-notifier-guard
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
74
144
  - !ruby/object:Gem::Version
75
- version: '0'
145
+ version: '1.6'
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ version: 1.6.1
76
149
  type: :development
77
150
  prerelease: false
78
151
  version_requirements: !ruby/object:Gem::Requirement
79
152
  requirements:
80
- - - '>='
153
+ - - "~>"
154
+ - !ruby/object:Gem::Version
155
+ version: '1.6'
156
+ - - ">="
157
+ - !ruby/object:Gem::Version
158
+ version: 1.6.1
159
+ - !ruby/object:Gem::Dependency
160
+ name: sys-proctable
161
+ requirement: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - "~>"
81
164
  - !ruby/object:Gem::Version
82
- version: '0'
83
- description:
165
+ version: '0.9'
166
+ type: :development
167
+ prerelease: false
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - "~>"
171
+ - !ruby/object:Gem::Version
172
+ version: '0.9'
173
+ description: Cangrejo lets you consume crabfarm crawlers using a simple DSL
84
174
  email:
85
175
  - ignacio@platan.us
86
176
  executables: []
87
177
  extensions: []
88
178
  extra_rdoc_files: []
89
179
  files:
180
+ - lib/cangrejo.rb
90
181
  - lib/cangrejo/configurator.rb
91
182
  - lib/cangrejo/errors.rb
92
183
  - lib/cangrejo/modes/git.rb
@@ -97,10 +188,8 @@ files:
97
188
  - lib/cangrejo/restclient/json_resource.rb
98
189
  - lib/cangrejo/restclient/request_extensions.rb
99
190
  - lib/cangrejo/session.rb
100
- - lib/cangrejo/support/launcher.rb
101
191
  - lib/cangrejo/version.rb
102
- - lib/cangrejo.rb
103
- homepage: ''
192
+ homepage: https://github.com/platanus/cangrejo-gem
104
193
  licenses:
105
194
  - MIT
106
195
  metadata: {}
@@ -110,19 +199,18 @@ require_paths:
110
199
  - lib
111
200
  required_ruby_version: !ruby/object:Gem::Requirement
112
201
  requirements:
113
- - - '>='
202
+ - - ">="
114
203
  - !ruby/object:Gem::Version
115
204
  version: '0'
116
205
  required_rubygems_version: !ruby/object:Gem::Requirement
117
206
  requirements:
118
- - - '>='
207
+ - - ">="
119
208
  - !ruby/object:Gem::Version
120
209
  version: '0'
121
210
  requirements: []
122
211
  rubyforge_project:
123
- rubygems_version: 2.0.14
212
+ rubygems_version: 2.4.5
124
213
  signing_key:
125
214
  specification_version: 4
126
215
  summary: Crabfarm client for ruby
127
216
  test_files: []
128
- has_rdoc:
@@ -1,79 +0,0 @@
1
- require 'timeout'
2
-
3
- module Cangrejo
4
- module Support
5
- class Launcher
6
-
7
- class LaunchTimeout < Cangrejo::Error
8
- def initialize(_msg)
9
- super "Timed out trying to start crawler"
10
- end
11
- end
12
-
13
- SPAWN_TIMEOUT = 5
14
- KILL_TIMEOUT = 5
15
-
16
- def initialize(_path, _options={})
17
- @path = _path
18
- @timeout = _options.fetch(:timeout, SPAWN_TIMEOUT)
19
- @argv = _options.fetch(:argv, [])
20
- select_socket_file
21
- end
22
-
23
- def host
24
- "unix://#{@socket_file}"
25
- end
26
-
27
- def launch
28
- gem_path = File.join(@path, 'Gemfile')
29
- # TODO: for some reason, the gemfile path must be specified here, maybe because of rbenv?
30
- @pid = Process.spawn({ 'BUNDLE_GEMFILE' => gem_path }, "bin/crabfarm s --host=#{host} #{@argv.join(' ')}", chdir: @path, pgroup: true)
31
- wait_for_socket
32
- end
33
-
34
- def kill
35
- safe_kill @pid unless @pid.nil?
36
- end
37
-
38
- private
39
-
40
- def select_socket_file
41
- @socket_file = random_filename while @socket_file.nil? or File.exist? @socket_file
42
- end
43
-
44
- def random_filename
45
- File.join(Cangrejo.config.temp_path, "csocket-#{Random.rand(1000000)}.sock")
46
- end
47
-
48
- def wait_for_socket
49
- Timeout::timeout(@timeout, LaunchTimeout) do
50
- # TODO: detect if the process crashes before timeout
51
- sleep 0.1 while not File.exist? @socket_file
52
- end
53
- end
54
-
55
- def safe_kill _pid
56
- begin
57
- Timeout.timeout(KILL_TIMEOUT) do
58
- Process.kill "INT", _pid
59
- Process.wait _pid
60
- end
61
- rescue Timeout::Error
62
- ensure
63
- ensure_dead _pid
64
- end
65
- end
66
-
67
- def ensure_dead _pid
68
- begin
69
- # Kill the entire process group to make sure childs aren't left hanging around
70
- Process.kill(-9, _pid)
71
- Process.wait _pid
72
- rescue
73
- nil
74
- end
75
- end
76
-
77
- end
78
- end
79
- end