turbot-runner 0.0.17 → 0.0.18
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/lib/turbot_runner.rb +17 -7
- data/lib/turbot_runner/version.rb +1 -1
- metadata +27 -16
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OGMxNGNmYmMwNGQ4ZDQ2MDkzOGQ4NmQ3NTI5NzFjY2U4ZWQ4Y2ZlNQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MjJkOTI5NmU3MjViYWFhZjlhZjJlMmZmNzIzZGI2Mzc5YTA2NWZmNw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
NDNmZDcwMGVmMDk4ZjM2ZDM0OWQ5ODgxYzJlNjRkMTIxNTk5NDZlMDc4ZGFh
|
10
|
+
OGUwNGIwOWMwOGY3ODNjYmE4ODE2NGYzMjJlM2M0NmU4MDE1YmEyZTE5NDU2
|
11
|
+
MWM1M2FkN2Y0MmRiODgyNzMwYzBkMjczYzEwYjJmOWIxMjIyNDU=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
NmQzMDIwNTI3YWU2M2NhMjJiN2NlYThiMWVlNTkyM2E1MWE0MGM3MTlmZDVk
|
14
|
+
ZDFjNDcwZTI3ZDllOTJjYWRmNjc0OTg2OWFkZmZjM2RjYjhhM2Y0ZDMzYzMw
|
15
|
+
M2U2MmU4Yzg1YzgwMWRmN2QxZjgyNjZmNWE4MmFmMTNlMDBhYWQ=
|
data/lib/turbot_runner.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
|
+
require 'active_support/core_ext/hash/slice'
|
2
|
+
require 'active_support/core_ext/object/to_query'
|
1
3
|
require 'json'
|
2
4
|
require 'open3'
|
5
|
+
require 'set'
|
3
6
|
require 'timeout'
|
4
7
|
|
5
8
|
module TurbotRunner
|
@@ -25,11 +28,11 @@ module TurbotRunner
|
|
25
28
|
@status = :initialized
|
26
29
|
@interrupted = false
|
27
30
|
@schemas = {}
|
31
|
+
@seen_uids = Set.new
|
28
32
|
end
|
29
33
|
|
30
34
|
def run(opts={})
|
31
35
|
@status = :running
|
32
|
-
validation_required = opts[:validate] || true
|
33
36
|
|
34
37
|
command = "#{interpreter_for(scraper_file)} #{scraper_file}"
|
35
38
|
data_type = @config['data_type']
|
@@ -67,6 +70,10 @@ module TurbotRunner
|
|
67
70
|
runner.send_line(line)
|
68
71
|
line1 = runner.get_next_line
|
69
72
|
|
73
|
+
# A transformer should output an empty line if it doesn't make
|
74
|
+
# sense to transform a record.
|
75
|
+
next if line1.strip.empty?
|
76
|
+
|
70
77
|
begin
|
71
78
|
record1 = JSON.parse(line1)
|
72
79
|
rescue JSON::ParserError
|
@@ -136,14 +143,17 @@ module TurbotRunner
|
|
136
143
|
|
137
144
|
if messages.empty?
|
138
145
|
identifying_fields = identifying_fields_for_data_type(data_type)
|
146
|
+
identifying_hash = record.slice(*identifying_fields)
|
139
147
|
|
140
|
-
|
141
|
-
identifying_fields.each do |k|
|
142
|
-
hash[k] = record[k] if record.has_key?(k)
|
143
|
-
end
|
144
|
-
|
145
|
-
if hash.empty?
|
148
|
+
if identifying_hash.empty?
|
146
149
|
messages << "Missing attributes for identifying fields: #{identifying_fields.join(', ')}"
|
150
|
+
else
|
151
|
+
record_uid = Digest::SHA1.hexdigest(identifying_hash.to_query)
|
152
|
+
if @seen_uids.include?(record_uid)
|
153
|
+
messages << "Values for identifying fields must be unique. There has already been a record with: #{identifying_hash.to_json}"
|
154
|
+
else
|
155
|
+
@seen_uids << record_uid
|
156
|
+
end
|
147
157
|
end
|
148
158
|
end
|
149
159
|
|
metadata
CHANGED
@@ -1,20 +1,32 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: turbot-runner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.18
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- OpenCorporates
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-16 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 4.1.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 4.1.0
|
14
27
|
- !ruby/object:Gem::Dependency
|
15
28
|
name: json-schema
|
16
29
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
30
|
requirements:
|
19
31
|
- - '='
|
20
32
|
- !ruby/object:Gem::Version
|
@@ -22,7 +34,6 @@ dependencies:
|
|
22
34
|
type: :runtime
|
23
35
|
prerelease: false
|
24
36
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
37
|
requirements:
|
27
38
|
- - '='
|
28
39
|
- !ruby/object:Gem::Version
|
@@ -37,13 +48,6 @@ files:
|
|
37
48
|
- lib/prerun.rb
|
38
49
|
- lib/turbot_runner.rb
|
39
50
|
- lib/turbot_runner/version.rb
|
40
|
-
- spec/dummy-bot-python/manifest.json
|
41
|
-
- spec/dummy-bot-python/scraper.py
|
42
|
-
- spec/dummy-bot-python/transformer.py
|
43
|
-
- spec/dummy-bot-ruby/manifest.json
|
44
|
-
- spec/dummy-bot-ruby/scraper.rb
|
45
|
-
- spec/dummy-bot-ruby/transformer.rb
|
46
|
-
- spec/turbot_runner_spec.rb
|
47
51
|
- schema/schemas/company-schema.json
|
48
52
|
- schema/schemas/financial-payment-schema.json
|
49
53
|
- schema/schemas/includes/address.json
|
@@ -64,29 +68,36 @@ files:
|
|
64
68
|
- schema/schemas/share-parcel-schema.json
|
65
69
|
- schema/schemas/simple-licence-schema.json
|
66
70
|
- schema/schemas/subsidiary-relationship-schema.json
|
71
|
+
- spec/dummy-bot-python/manifest.json
|
72
|
+
- spec/dummy-bot-python/scraper.py
|
73
|
+
- spec/dummy-bot-python/transformer.py
|
74
|
+
- spec/dummy-bot-ruby/manifest.json
|
75
|
+
- spec/dummy-bot-ruby/scraper.rb
|
76
|
+
- spec/dummy-bot-ruby/transformer.rb
|
77
|
+
- spec/turbot_runner_spec.rb
|
67
78
|
homepage: http://turbot.opencorporates.com/
|
68
79
|
licenses:
|
69
80
|
- MIT
|
81
|
+
metadata: {}
|
70
82
|
post_install_message:
|
71
83
|
rdoc_options: []
|
72
84
|
require_paths:
|
73
85
|
- lib
|
74
86
|
required_ruby_version: !ruby/object:Gem::Requirement
|
75
|
-
none: false
|
76
87
|
requirements:
|
77
88
|
- - ! '>='
|
78
89
|
- !ruby/object:Gem::Version
|
79
90
|
version: 1.9.2
|
80
91
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
92
|
requirements:
|
83
93
|
- - ! '>='
|
84
94
|
- !ruby/object:Gem::Version
|
85
95
|
version: '0'
|
86
96
|
requirements: []
|
87
97
|
rubyforge_project:
|
88
|
-
rubygems_version:
|
98
|
+
rubygems_version: 2.2.2
|
89
99
|
signing_key:
|
90
|
-
specification_version:
|
100
|
+
specification_version: 4
|
91
101
|
summary: Utilities for running bots with Turbot
|
92
102
|
test_files: []
|
103
|
+
has_rdoc:
|