turbot-runner 0.0.17 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/lib/turbot_runner.rb +17 -7
- data/lib/turbot_runner/version.rb +1 -1
- metadata +27 -16
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OGMxNGNmYmMwNGQ4ZDQ2MDkzOGQ4NmQ3NTI5NzFjY2U4ZWQ4Y2ZlNQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MjJkOTI5NmU3MjViYWFhZjlhZjJlMmZmNzIzZGI2Mzc5YTA2NWZmNw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
NDNmZDcwMGVmMDk4ZjM2ZDM0OWQ5ODgxYzJlNjRkMTIxNTk5NDZlMDc4ZGFh
|
10
|
+
OGUwNGIwOWMwOGY3ODNjYmE4ODE2NGYzMjJlM2M0NmU4MDE1YmEyZTE5NDU2
|
11
|
+
MWM1M2FkN2Y0MmRiODgyNzMwYzBkMjczYzEwYjJmOWIxMjIyNDU=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
NmQzMDIwNTI3YWU2M2NhMjJiN2NlYThiMWVlNTkyM2E1MWE0MGM3MTlmZDVk
|
14
|
+
ZDFjNDcwZTI3ZDllOTJjYWRmNjc0OTg2OWFkZmZjM2RjYjhhM2Y0ZDMzYzMw
|
15
|
+
M2U2MmU4Yzg1YzgwMWRmN2QxZjgyNjZmNWE4MmFmMTNlMDBhYWQ=
|
data/lib/turbot_runner.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
|
+
require 'active_support/core_ext/hash/slice'
|
2
|
+
require 'active_support/core_ext/object/to_query'
|
1
3
|
require 'json'
|
2
4
|
require 'open3'
|
5
|
+
require 'set'
|
3
6
|
require 'timeout'
|
4
7
|
|
5
8
|
module TurbotRunner
|
@@ -25,11 +28,11 @@ module TurbotRunner
|
|
25
28
|
@status = :initialized
|
26
29
|
@interrupted = false
|
27
30
|
@schemas = {}
|
31
|
+
@seen_uids = Set.new
|
28
32
|
end
|
29
33
|
|
30
34
|
def run(opts={})
|
31
35
|
@status = :running
|
32
|
-
validation_required = opts[:validate] || true
|
33
36
|
|
34
37
|
command = "#{interpreter_for(scraper_file)} #{scraper_file}"
|
35
38
|
data_type = @config['data_type']
|
@@ -67,6 +70,10 @@ module TurbotRunner
|
|
67
70
|
runner.send_line(line)
|
68
71
|
line1 = runner.get_next_line
|
69
72
|
|
73
|
+
# A transformer should output an empty line if it doesn't make
|
74
|
+
# sense to transform a record.
|
75
|
+
next if line1.strip.empty?
|
76
|
+
|
70
77
|
begin
|
71
78
|
record1 = JSON.parse(line1)
|
72
79
|
rescue JSON::ParserError
|
@@ -136,14 +143,17 @@ module TurbotRunner
|
|
136
143
|
|
137
144
|
if messages.empty?
|
138
145
|
identifying_fields = identifying_fields_for_data_type(data_type)
|
146
|
+
identifying_hash = record.slice(*identifying_fields)
|
139
147
|
|
140
|
-
|
141
|
-
identifying_fields.each do |k|
|
142
|
-
hash[k] = record[k] if record.has_key?(k)
|
143
|
-
end
|
144
|
-
|
145
|
-
if hash.empty?
|
148
|
+
if identifying_hash.empty?
|
146
149
|
messages << "Missing attributes for identifying fields: #{identifying_fields.join(', ')}"
|
150
|
+
else
|
151
|
+
record_uid = Digest::SHA1.hexdigest(identifying_hash.to_query)
|
152
|
+
if @seen_uids.include?(record_uid)
|
153
|
+
messages << "Values for identifying fields must be unique. There has already been a record with: #{identifying_hash.to_json}"
|
154
|
+
else
|
155
|
+
@seen_uids << record_uid
|
156
|
+
end
|
147
157
|
end
|
148
158
|
end
|
149
159
|
|
metadata
CHANGED
@@ -1,20 +1,32 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: turbot-runner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.18
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- OpenCorporates
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-16 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 4.1.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 4.1.0
|
14
27
|
- !ruby/object:Gem::Dependency
|
15
28
|
name: json-schema
|
16
29
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
30
|
requirements:
|
19
31
|
- - '='
|
20
32
|
- !ruby/object:Gem::Version
|
@@ -22,7 +34,6 @@ dependencies:
|
|
22
34
|
type: :runtime
|
23
35
|
prerelease: false
|
24
36
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
37
|
requirements:
|
27
38
|
- - '='
|
28
39
|
- !ruby/object:Gem::Version
|
@@ -37,13 +48,6 @@ files:
|
|
37
48
|
- lib/prerun.rb
|
38
49
|
- lib/turbot_runner.rb
|
39
50
|
- lib/turbot_runner/version.rb
|
40
|
-
- spec/dummy-bot-python/manifest.json
|
41
|
-
- spec/dummy-bot-python/scraper.py
|
42
|
-
- spec/dummy-bot-python/transformer.py
|
43
|
-
- spec/dummy-bot-ruby/manifest.json
|
44
|
-
- spec/dummy-bot-ruby/scraper.rb
|
45
|
-
- spec/dummy-bot-ruby/transformer.rb
|
46
|
-
- spec/turbot_runner_spec.rb
|
47
51
|
- schema/schemas/company-schema.json
|
48
52
|
- schema/schemas/financial-payment-schema.json
|
49
53
|
- schema/schemas/includes/address.json
|
@@ -64,29 +68,36 @@ files:
|
|
64
68
|
- schema/schemas/share-parcel-schema.json
|
65
69
|
- schema/schemas/simple-licence-schema.json
|
66
70
|
- schema/schemas/subsidiary-relationship-schema.json
|
71
|
+
- spec/dummy-bot-python/manifest.json
|
72
|
+
- spec/dummy-bot-python/scraper.py
|
73
|
+
- spec/dummy-bot-python/transformer.py
|
74
|
+
- spec/dummy-bot-ruby/manifest.json
|
75
|
+
- spec/dummy-bot-ruby/scraper.rb
|
76
|
+
- spec/dummy-bot-ruby/transformer.rb
|
77
|
+
- spec/turbot_runner_spec.rb
|
67
78
|
homepage: http://turbot.opencorporates.com/
|
68
79
|
licenses:
|
69
80
|
- MIT
|
81
|
+
metadata: {}
|
70
82
|
post_install_message:
|
71
83
|
rdoc_options: []
|
72
84
|
require_paths:
|
73
85
|
- lib
|
74
86
|
required_ruby_version: !ruby/object:Gem::Requirement
|
75
|
-
none: false
|
76
87
|
requirements:
|
77
88
|
- - ! '>='
|
78
89
|
- !ruby/object:Gem::Version
|
79
90
|
version: 1.9.2
|
80
91
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
92
|
requirements:
|
83
93
|
- - ! '>='
|
84
94
|
- !ruby/object:Gem::Version
|
85
95
|
version: '0'
|
86
96
|
requirements: []
|
87
97
|
rubyforge_project:
|
88
|
-
rubygems_version:
|
98
|
+
rubygems_version: 2.2.2
|
89
99
|
signing_key:
|
90
|
-
specification_version:
|
100
|
+
specification_version: 4
|
91
101
|
summary: Utilities for running bots with Turbot
|
92
102
|
test_files: []
|
103
|
+
has_rdoc:
|