cangrejo 0.0.12 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cangrejo/configurator.rb +6 -3
- data/lib/cangrejo/modes/remote.rb +14 -6
- data/lib/cangrejo/restclient/json_resource.rb +8 -0
- data/lib/cangrejo/session.rb +26 -5
- data/lib/cangrejo/version.rb +1 -1
- data/lib/cangrejo.rb +6 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b442485758a976c42a6d7d5ff6682a47f5ba4ff
|
4
|
+
data.tar.gz: b5dfee2e538ec781b3ce44867ff763e2e3b1d033
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 014f5bf8f5f5d56e8afdff1d0ffbf21a4a2cc13c22ff9b34875af4f08f11c3450a85a79efd46ca5495155dddad1dc0101c9c177bb1448b71969aaa32889fc81b
|
7
|
+
data.tar.gz: b3119281d804c14fda96bfbdb23f9a488a45f76f72738aacca5be53b90691524400239132ecb948a47ed3167c714396ef6c4c9a842a0898b8ca0233f7c3a9b0a
|
@@ -4,8 +4,7 @@ module Cangrejo
|
|
4
4
|
[
|
5
5
|
:crabfarm_host,
|
6
6
|
:crawler_cache_path,
|
7
|
-
:temp_path
|
8
|
-
:hold_by_default
|
7
|
+
:temp_path
|
9
8
|
]
|
10
9
|
.each do |name|
|
11
10
|
define_method "set_#{name}" do |value|
|
@@ -17,7 +16,11 @@ module Cangrejo
|
|
17
16
|
@config = _config
|
18
17
|
end
|
19
18
|
|
20
|
-
def
|
19
|
+
def set_crawler_setup(_options)
|
20
|
+
@config.crawlers[nil] = _options
|
21
|
+
end
|
22
|
+
|
23
|
+
def set_crawler_setup_for(_name, _options)
|
21
24
|
@config.crawlers[_name] = _options
|
22
25
|
end
|
23
26
|
|
@@ -4,22 +4,30 @@ module Cangrejo
|
|
4
4
|
module Modes
|
5
5
|
class Remote
|
6
6
|
|
7
|
-
def initialize(
|
8
|
-
@
|
7
|
+
def initialize(_remote)
|
8
|
+
@remote = _remote
|
9
9
|
end
|
10
10
|
|
11
11
|
def setup
|
12
|
-
|
13
|
-
sessions.post({}.to_json)
|
14
|
-
return prepare_resource "api/sessions/#{sessions.id}"
|
12
|
+
@session = create_session
|
15
13
|
end
|
16
14
|
|
17
15
|
def release
|
18
|
-
|
16
|
+
@session.put({ status: 'finished' }) unless @session.nil?
|
17
|
+
@session = nil
|
19
18
|
end
|
20
19
|
|
21
20
|
private
|
22
21
|
|
22
|
+
def session_collection
|
23
|
+
@collection ||= prepare_resource "api/bots/#{@remote}/sessions"
|
24
|
+
end
|
25
|
+
|
26
|
+
def create_session
|
27
|
+
new_session_id = session_collection.post({}.to_json).id
|
28
|
+
prepare_resource "api/sessions/#{new_session_id}"
|
29
|
+
end
|
30
|
+
|
23
31
|
def prepare_resource(_path)
|
24
32
|
RestClient::JsonResource.new URI.join(remote_host, _path).to_s
|
25
33
|
end
|
@@ -8,16 +8,24 @@ module RestClient
|
|
8
8
|
additional_headers['Accept'] = 'json'
|
9
9
|
r = super additional_headers, &block
|
10
10
|
@state = JSON.parse r
|
11
|
+
self
|
11
12
|
end
|
12
13
|
|
13
14
|
def post(payload, additional_headers={}, &block)
|
14
15
|
r = super payload.to_json, decorate_headers(additional_headers), &block
|
15
16
|
@state = JSON.parse r
|
17
|
+
self
|
16
18
|
end
|
17
19
|
|
18
20
|
def put(payload, additional_headers={}, &block)
|
19
21
|
r = super payload.to_json, decorate_headers(additional_headers), &block
|
20
22
|
@state = JSON.parse r
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
def delete(additional_headers={}, &block)
|
27
|
+
super decorate_headers(additional_headers), &block
|
28
|
+
self
|
21
29
|
end
|
22
30
|
|
23
31
|
private
|
data/lib/cangrejo/session.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'date'
|
1
2
|
require "ostruct"
|
2
3
|
|
3
4
|
module Cangrejo
|
@@ -7,11 +8,30 @@ module Cangrejo
|
|
7
8
|
|
8
9
|
attr_reader :doc, :state_name, :state_params
|
9
10
|
|
10
|
-
def
|
11
|
-
|
12
|
-
|
11
|
+
def self.connect _name_or_config
|
12
|
+
session = Session.new _name_or_config
|
13
|
+
begin
|
14
|
+
session.start
|
15
|
+
yield session
|
16
|
+
ensure
|
17
|
+
session.release
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize(_name_or_config)
|
22
|
+
options = if _name_or_config.is_a? Hash
|
23
|
+
@name = nil
|
24
|
+
_name_or_config
|
25
|
+
else
|
26
|
+
@name = _name_or_config
|
27
|
+
Cangrejo.config.crawlers.fetch(_name, {})
|
28
|
+
end
|
29
|
+
|
13
30
|
select_mode options
|
14
|
-
|
31
|
+
end
|
32
|
+
|
33
|
+
def started?
|
34
|
+
not @rest.nil?
|
15
35
|
end
|
16
36
|
|
17
37
|
def start
|
@@ -45,6 +65,7 @@ module Cangrejo
|
|
45
65
|
|
46
66
|
def release
|
47
67
|
@mode.release
|
68
|
+
@rest = nil
|
48
69
|
self
|
49
70
|
end
|
50
71
|
|
@@ -82,7 +103,7 @@ module Cangrejo
|
|
82
103
|
Modes::Git.new _options[:git_remote], _options[:git_commit], _options[:relative_path], @name
|
83
104
|
else
|
84
105
|
require "cangrejo/modes/remote"
|
85
|
-
Modes::Remote.new @name
|
106
|
+
Modes::Remote.new _options.fetch(:remote, @name)
|
86
107
|
end
|
87
108
|
end
|
88
109
|
|
data/lib/cangrejo/version.rb
CHANGED
data/lib/cangrejo.rb
CHANGED
@@ -9,7 +9,6 @@ module Cangrejo
|
|
9
9
|
crabfarm_host: 'http://api.crabfarm.io',
|
10
10
|
crawler_cache_path: 'tmp/crawler_cache',
|
11
11
|
temp_path: 'tmp',
|
12
|
-
hold_by_default: false,
|
13
12
|
crawlers: Hash.new
|
14
13
|
})
|
15
14
|
|
@@ -20,4 +19,10 @@ module Cangrejo
|
|
20
19
|
def self.configure
|
21
20
|
yield Configurator.new @@config
|
22
21
|
end
|
22
|
+
|
23
|
+
def self.connect _name_or_config=nil, &_block
|
24
|
+
_name_or_config = config[:crawlers].values.first if _name_or_config.nil?
|
25
|
+
Session.connect _name_or_config, &_block
|
26
|
+
end
|
27
|
+
|
23
28
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cangrejo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ignacio Baixas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - '>='
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
description:
|
70
84
|
email:
|
71
85
|
- ignacio@platan.us
|