cangrejo 0.0.12 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cangrejo/configurator.rb +6 -3
- data/lib/cangrejo/modes/remote.rb +14 -6
- data/lib/cangrejo/restclient/json_resource.rb +8 -0
- data/lib/cangrejo/session.rb +26 -5
- data/lib/cangrejo/version.rb +1 -1
- data/lib/cangrejo.rb +6 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b442485758a976c42a6d7d5ff6682a47f5ba4ff
|
4
|
+
data.tar.gz: b5dfee2e538ec781b3ce44867ff763e2e3b1d033
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 014f5bf8f5f5d56e8afdff1d0ffbf21a4a2cc13c22ff9b34875af4f08f11c3450a85a79efd46ca5495155dddad1dc0101c9c177bb1448b71969aaa32889fc81b
|
7
|
+
data.tar.gz: b3119281d804c14fda96bfbdb23f9a488a45f76f72738aacca5be53b90691524400239132ecb948a47ed3167c714396ef6c4c9a842a0898b8ca0233f7c3a9b0a
|
@@ -4,8 +4,7 @@ module Cangrejo
|
|
4
4
|
[
|
5
5
|
:crabfarm_host,
|
6
6
|
:crawler_cache_path,
|
7
|
-
:temp_path
|
8
|
-
:hold_by_default
|
7
|
+
:temp_path
|
9
8
|
]
|
10
9
|
.each do |name|
|
11
10
|
define_method "set_#{name}" do |value|
|
@@ -17,7 +16,11 @@ module Cangrejo
|
|
17
16
|
@config = _config
|
18
17
|
end
|
19
18
|
|
20
|
-
def
|
19
|
+
def set_crawler_setup(_options)
|
20
|
+
@config.crawlers[nil] = _options
|
21
|
+
end
|
22
|
+
|
23
|
+
def set_crawler_setup_for(_name, _options)
|
21
24
|
@config.crawlers[_name] = _options
|
22
25
|
end
|
23
26
|
|
@@ -4,22 +4,30 @@ module Cangrejo
|
|
4
4
|
module Modes
|
5
5
|
class Remote
|
6
6
|
|
7
|
-
def initialize(
|
8
|
-
@
|
7
|
+
def initialize(_remote)
|
8
|
+
@remote = _remote
|
9
9
|
end
|
10
10
|
|
11
11
|
def setup
|
12
|
-
|
13
|
-
sessions.post({}.to_json)
|
14
|
-
return prepare_resource "api/sessions/#{sessions.id}"
|
12
|
+
@session = create_session
|
15
13
|
end
|
16
14
|
|
17
15
|
def release
|
18
|
-
|
16
|
+
@session.put({ status: 'finished' }) unless @session.nil?
|
17
|
+
@session = nil
|
19
18
|
end
|
20
19
|
|
21
20
|
private
|
22
21
|
|
22
|
+
def session_collection
|
23
|
+
@collection ||= prepare_resource "api/bots/#{@remote}/sessions"
|
24
|
+
end
|
25
|
+
|
26
|
+
def create_session
|
27
|
+
new_session_id = session_collection.post({}.to_json).id
|
28
|
+
prepare_resource "api/sessions/#{new_session_id}"
|
29
|
+
end
|
30
|
+
|
23
31
|
def prepare_resource(_path)
|
24
32
|
RestClient::JsonResource.new URI.join(remote_host, _path).to_s
|
25
33
|
end
|
@@ -8,16 +8,24 @@ module RestClient
|
|
8
8
|
additional_headers['Accept'] = 'json'
|
9
9
|
r = super additional_headers, &block
|
10
10
|
@state = JSON.parse r
|
11
|
+
self
|
11
12
|
end
|
12
13
|
|
13
14
|
def post(payload, additional_headers={}, &block)
|
14
15
|
r = super payload.to_json, decorate_headers(additional_headers), &block
|
15
16
|
@state = JSON.parse r
|
17
|
+
self
|
16
18
|
end
|
17
19
|
|
18
20
|
def put(payload, additional_headers={}, &block)
|
19
21
|
r = super payload.to_json, decorate_headers(additional_headers), &block
|
20
22
|
@state = JSON.parse r
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
def delete(additional_headers={}, &block)
|
27
|
+
super decorate_headers(additional_headers), &block
|
28
|
+
self
|
21
29
|
end
|
22
30
|
|
23
31
|
private
|
data/lib/cangrejo/session.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'date'
|
1
2
|
require "ostruct"
|
2
3
|
|
3
4
|
module Cangrejo
|
@@ -7,11 +8,30 @@ module Cangrejo
|
|
7
8
|
|
8
9
|
attr_reader :doc, :state_name, :state_params
|
9
10
|
|
10
|
-
def
|
11
|
-
|
12
|
-
|
11
|
+
def self.connect _name_or_config
|
12
|
+
session = Session.new _name_or_config
|
13
|
+
begin
|
14
|
+
session.start
|
15
|
+
yield session
|
16
|
+
ensure
|
17
|
+
session.release
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize(_name_or_config)
|
22
|
+
options = if _name_or_config.is_a? Hash
|
23
|
+
@name = nil
|
24
|
+
_name_or_config
|
25
|
+
else
|
26
|
+
@name = _name_or_config
|
27
|
+
Cangrejo.config.crawlers.fetch(_name, {})
|
28
|
+
end
|
29
|
+
|
13
30
|
select_mode options
|
14
|
-
|
31
|
+
end
|
32
|
+
|
33
|
+
def started?
|
34
|
+
not @rest.nil?
|
15
35
|
end
|
16
36
|
|
17
37
|
def start
|
@@ -45,6 +65,7 @@ module Cangrejo
|
|
45
65
|
|
46
66
|
def release
|
47
67
|
@mode.release
|
68
|
+
@rest = nil
|
48
69
|
self
|
49
70
|
end
|
50
71
|
|
@@ -82,7 +103,7 @@ module Cangrejo
|
|
82
103
|
Modes::Git.new _options[:git_remote], _options[:git_commit], _options[:relative_path], @name
|
83
104
|
else
|
84
105
|
require "cangrejo/modes/remote"
|
85
|
-
Modes::Remote.new @name
|
106
|
+
Modes::Remote.new _options.fetch(:remote, @name)
|
86
107
|
end
|
87
108
|
end
|
88
109
|
|
data/lib/cangrejo/version.rb
CHANGED
data/lib/cangrejo.rb
CHANGED
@@ -9,7 +9,6 @@ module Cangrejo
|
|
9
9
|
crabfarm_host: 'http://api.crabfarm.io',
|
10
10
|
crawler_cache_path: 'tmp/crawler_cache',
|
11
11
|
temp_path: 'tmp',
|
12
|
-
hold_by_default: false,
|
13
12
|
crawlers: Hash.new
|
14
13
|
})
|
15
14
|
|
@@ -20,4 +19,10 @@ module Cangrejo
|
|
20
19
|
def self.configure
|
21
20
|
yield Configurator.new @@config
|
22
21
|
end
|
22
|
+
|
23
|
+
def self.connect _name_or_config=nil, &_block
|
24
|
+
_name_or_config = config[:crawlers].values.first if _name_or_config.nil?
|
25
|
+
Session.connect _name_or_config, &_block
|
26
|
+
end
|
27
|
+
|
23
28
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cangrejo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ignacio Baixas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - '>='
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
description:
|
70
84
|
email:
|
71
85
|
- ignacio@platan.us
|