cangrejo 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5d9cb5f49f576488055b6f9e9164bbfb6af271b4
4
- data.tar.gz: af63078d5ff257624f736e463ab96e8df5be5871
3
+ metadata.gz: 654c44f4ed8700ac8dbb214f8fd3006540ee31cb
4
+ data.tar.gz: cdf70051e44adbee2ca3291061ddd2ac71155571
5
5
  SHA512:
6
- metadata.gz: 1efc55a238a31a5962e11a41122ec066f1955129a65cd44f4c840f6ce39e4b40325a59f16ab3810cbdf68da8ef382dad617dbfca4f8c3b25cb9163d7c8d2d793
7
- data.tar.gz: eede15bb49b8efb34b51520e71fa83fea70819d0768e719a88a36b166626fbfda07fcb923ac41bf17202156037608ffb6a890f5fda5e5702731398824e34309b
6
+ metadata.gz: 847580c48eb860475720e652e7ee83dbb66f91aa7c88c2a15635296f0aa9471b7d63b67209a60b532a7ed421331aa8fc98b5c3721e68e6b913a2bc1edaf98b5d
7
+ data.tar.gz: e42eda249f094f9eca98b8a67f2f1bf77de681fe4c20e8538872ec46977a8cd5f255b9c96d2907cbc0dc72baddb71eb8d1664df1d30a92caa2d2238c66ff6984
@@ -4,4 +4,23 @@ module Cangrejo
4
4
 
5
5
  class ConfigurationError < Error; end
6
6
 
7
+ class LaunchTimeout < Error
8
+ def initialize(_msg)
9
+ super "Timed out trying to start crawler"
10
+ end
11
+ end
12
+
13
+ class CrawlerError < Error
14
+
15
+ def initialize(_msg, _backtrace)
16
+ super _msg
17
+ @original_bt = _backtrace
18
+ end
19
+
20
+ def set_backtrace(_backtrace)
21
+ super @original_bt + _backtrace
22
+ end
23
+
24
+ end
25
+
7
26
  end
@@ -1,41 +1,94 @@
1
1
  require "cangrejo/restclient/request_extensions"
2
2
  require "cangrejo/restclient/json_resource"
3
- require "cangrejo/support/launcher"
3
+ require "childprocess"
4
4
 
5
5
  module Cangrejo
6
6
  module Modes
7
7
  class Local
8
8
 
9
+ attr_reader :process, :path
10
+
9
11
  def initialize(_path)
10
12
  @path = _path
11
13
  end
12
14
 
13
15
  def setup
14
- init_launcher
16
+ select_socket_file
17
+ start_process
18
+ wait_for_socket
15
19
  init_rest_client
16
20
  end
17
21
 
18
22
  def release
19
- @launcher.kill unless @launcher.nil?
23
+ process.stop unless process.nil?
20
24
  end
21
25
 
22
26
  private
23
27
 
24
- def init_launcher
25
- @launcher = Support::Launcher.new @path, timeout: launch_timeout, argv: launch_cmd_arguments
26
- @launcher.launch
28
+ def cmd_enviroment
29
+ {
30
+ 'BUNDLE_GEMFILE' => gem_path
31
+ }
32
+ end
33
+
34
+ def cmd_arguments
35
+ [
36
+ '--no-reload'
37
+ ]
27
38
  end
28
39
 
29
40
  def launch_timeout
30
41
  5.0
31
42
  end
32
43
 
33
- def launch_cmd_arguments
34
- ['--no-reload']
44
+ def start_process
45
+ @process = prepare_process
46
+ @process.start
47
+ end
48
+
49
+ def prepare_process
50
+ cmd = [ "bin/crabfarm", "s", "--host=#{host}" ]
51
+ cmd += cmd_arguments
52
+
53
+ puts cmd.join(' ')
54
+
55
+ cp = ChildProcess.build(*cmd)
56
+ cp.environment.merge! cmd_enviroment
57
+ cp.cwd = @path
58
+ cp.leader = true
59
+ cp.io.inherit!
60
+
61
+ return cp
62
+ end
63
+
64
+ def wait_for_socket
65
+ Timeout::timeout(launch_timeout, LaunchTimeout) do
66
+ # TODO: detect if the process crashes before timeout
67
+ sleep 0.1 while not File.exist? @socket_file
68
+ end
35
69
  end
36
70
 
37
71
  def init_rest_client
38
- RestClient::JsonResource.new Net::SocketUri.new(@launcher.host, '/api/state')
72
+ RestClient::JsonResource.new Net::SocketUri.new(host, '/api/state')
73
+ end
74
+
75
+ def select_socket_file
76
+ @socket_file = random_filename while @socket_file.nil? or File.exist? @socket_file
77
+ end
78
+
79
+ def random_filename
80
+ File.join(Cangrejo.config.temp_path, "csocket-#{Random.rand(1000000)}.sock")
81
+ end
82
+
83
+ def gem_path
84
+ File.join(@path, 'Gemfile')
85
+ end
86
+
87
+ def host
88
+ # TODO: add posibility to use ports instead of unix sockets, it would also
89
+ # be nice to have a mechanism where the loaded process reports the port it
90
+ # binded to.
91
+ "unix://#{@socket_file}"
39
92
  end
40
93
 
41
94
  end
@@ -53,16 +53,22 @@ module Cangrejo
53
53
 
54
54
  params = add_timestamp(_params)
55
55
 
56
- while_times_out do
57
- @rest.put(name: _state, params: params, wait: WAIT_STEP)
58
- @state_name = _state
59
- @state_params = _params
56
+ begin
57
+ while_times_out do
58
+ @rest.put(name: _state, params: params, wait: WAIT_STEP)
59
+ @state_name = _state
60
+ @state_params = _params
61
+ end
62
+ rescue RestClient::InternalServerError => exc
63
+ raise unwrap_packed_exception exc
60
64
  end
61
65
 
62
66
  wrap_response_doc
63
67
  self
64
68
  end
65
69
 
70
+ alias :navigate :crawl
71
+
66
72
  def release
67
73
  @mode.release
68
74
  @rest = nil
@@ -107,5 +113,14 @@ module Cangrejo
107
113
  end
108
114
  end
109
115
 
116
+ def unwrap_packed_exception(_exc)
117
+ begin
118
+ data = JSON.parse _exc.http_body
119
+ Cangrejo::CrawlerError.new data['exception'], data['backtrace']
120
+ rescue
121
+ _exc
122
+ end
123
+ end
124
+
110
125
  end
111
126
  end
@@ -1,3 +1,3 @@
1
1
  module Cangrejo
2
- VERSION = "0.1.5"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,92 +1,183 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cangrejo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-04 00:00:00.000000000 Z
11
+ date: 2015-09-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.7.2
19
+ version: '1.7'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.7.2
26
+ version: '1.7'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: git
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: '1.2'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: '1.2'
41
+ - !ruby/object:Gem::Dependency
42
+ name: childprocess
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.5'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.5'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: bundler
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
- - - ~>
59
+ - - "~>"
46
60
  - !ruby/object:Gem::Version
47
61
  version: '1.6'
48
62
  type: :development
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
- - - ~>
66
+ - - "~>"
53
67
  - !ruby/object:Gem::Version
54
68
  version: '1.6'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: rake
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
- - - '>='
73
+ - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: '0'
75
+ version: '10.4'
62
76
  type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
- - - '>='
80
+ - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: '0'
82
+ version: '10.4'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rspec
71
85
  requirement: !ruby/object:Gem::Requirement
72
86
  requirements:
73
- - - '>='
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.1'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.1'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec-nc
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.2'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.2'
111
+ - !ruby/object:Gem::Dependency
112
+ name: guard
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '2.11'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '2.11'
125
+ - !ruby/object:Gem::Dependency
126
+ name: guard-rspec
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '4.5'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '4.5'
139
+ - !ruby/object:Gem::Dependency
140
+ name: terminal-notifier-guard
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
74
144
  - !ruby/object:Gem::Version
75
- version: '0'
145
+ version: '1.6'
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ version: 1.6.1
76
149
  type: :development
77
150
  prerelease: false
78
151
  version_requirements: !ruby/object:Gem::Requirement
79
152
  requirements:
80
- - - '>='
153
+ - - "~>"
154
+ - !ruby/object:Gem::Version
155
+ version: '1.6'
156
+ - - ">="
157
+ - !ruby/object:Gem::Version
158
+ version: 1.6.1
159
+ - !ruby/object:Gem::Dependency
160
+ name: sys-proctable
161
+ requirement: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - "~>"
81
164
  - !ruby/object:Gem::Version
82
- version: '0'
83
- description:
165
+ version: '0.9'
166
+ type: :development
167
+ prerelease: false
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - "~>"
171
+ - !ruby/object:Gem::Version
172
+ version: '0.9'
173
+ description: Cangrejo lets you consume crabfarm crawlers using a simple DSL
84
174
  email:
85
175
  - ignacio@platan.us
86
176
  executables: []
87
177
  extensions: []
88
178
  extra_rdoc_files: []
89
179
  files:
180
+ - lib/cangrejo.rb
90
181
  - lib/cangrejo/configurator.rb
91
182
  - lib/cangrejo/errors.rb
92
183
  - lib/cangrejo/modes/git.rb
@@ -97,10 +188,8 @@ files:
97
188
  - lib/cangrejo/restclient/json_resource.rb
98
189
  - lib/cangrejo/restclient/request_extensions.rb
99
190
  - lib/cangrejo/session.rb
100
- - lib/cangrejo/support/launcher.rb
101
191
  - lib/cangrejo/version.rb
102
- - lib/cangrejo.rb
103
- homepage: ''
192
+ homepage: https://github.com/platanus/cangrejo-gem
104
193
  licenses:
105
194
  - MIT
106
195
  metadata: {}
@@ -110,19 +199,18 @@ require_paths:
110
199
  - lib
111
200
  required_ruby_version: !ruby/object:Gem::Requirement
112
201
  requirements:
113
- - - '>='
202
+ - - ">="
114
203
  - !ruby/object:Gem::Version
115
204
  version: '0'
116
205
  required_rubygems_version: !ruby/object:Gem::Requirement
117
206
  requirements:
118
- - - '>='
207
+ - - ">="
119
208
  - !ruby/object:Gem::Version
120
209
  version: '0'
121
210
  requirements: []
122
211
  rubyforge_project:
123
- rubygems_version: 2.0.14
212
+ rubygems_version: 2.4.5
124
213
  signing_key:
125
214
  specification_version: 4
126
215
  summary: Crabfarm client for ruby
127
216
  test_files: []
128
- has_rdoc:
@@ -1,79 +0,0 @@
1
- require 'timeout'
2
-
3
- module Cangrejo
4
- module Support
5
- class Launcher
6
-
7
- class LaunchTimeout < Cangrejo::Error
8
- def initialize(_msg)
9
- super "Timed out trying to start crawler"
10
- end
11
- end
12
-
13
- SPAWN_TIMEOUT = 5
14
- KILL_TIMEOUT = 5
15
-
16
- def initialize(_path, _options={})
17
- @path = _path
18
- @timeout = _options.fetch(:timeout, SPAWN_TIMEOUT)
19
- @argv = _options.fetch(:argv, [])
20
- select_socket_file
21
- end
22
-
23
- def host
24
- "unix://#{@socket_file}"
25
- end
26
-
27
- def launch
28
- gem_path = File.join(@path, 'Gemfile')
29
- # TODO: for some reason, the gemfile path must be specified here, maybe because of rbenv?
30
- @pid = Process.spawn({ 'BUNDLE_GEMFILE' => gem_path }, "bin/crabfarm s --host=#{host} #{@argv.join(' ')}", chdir: @path, pgroup: true)
31
- wait_for_socket
32
- end
33
-
34
- def kill
35
- safe_kill @pid unless @pid.nil?
36
- end
37
-
38
- private
39
-
40
- def select_socket_file
41
- @socket_file = random_filename while @socket_file.nil? or File.exist? @socket_file
42
- end
43
-
44
- def random_filename
45
- File.join(Cangrejo.config.temp_path, "csocket-#{Random.rand(1000000)}.sock")
46
- end
47
-
48
- def wait_for_socket
49
- Timeout::timeout(@timeout, LaunchTimeout) do
50
- # TODO: detect if the process crashes before timeout
51
- sleep 0.1 while not File.exist? @socket_file
52
- end
53
- end
54
-
55
- def safe_kill _pid
56
- begin
57
- Timeout.timeout(KILL_TIMEOUT) do
58
- Process.kill "INT", _pid
59
- Process.wait _pid
60
- end
61
- rescue Timeout::Error
62
- ensure
63
- ensure_dead _pid
64
- end
65
- end
66
-
67
- def ensure_dead _pid
68
- begin
69
- # Kill the entire process group to make sure childs aren't left hanging around
70
- Process.kill(-9, _pid)
71
- Process.wait _pid
72
- rescue
73
- nil
74
- end
75
- end
76
-
77
- end
78
- end
79
- end