turbot-runner 0.0.17 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OGMxNGNmYmMwNGQ4ZDQ2MDkzOGQ4NmQ3NTI5NzFjY2U4ZWQ4Y2ZlNQ==
5
+ data.tar.gz: !binary |-
6
+ MjJkOTI5NmU3MjViYWFhZjlhZjJlMmZmNzIzZGI2Mzc5YTA2NWZmNw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NDNmZDcwMGVmMDk4ZjM2ZDM0OWQ5ODgxYzJlNjRkMTIxNTk5NDZlMDc4ZGFh
10
+ OGUwNGIwOWMwOGY3ODNjYmE4ODE2NGYzMjJlM2M0NmU4MDE1YmEyZTE5NDU2
11
+ MWM1M2FkN2Y0MmRiODgyNzMwYzBkMjczYzEwYjJmOWIxMjIyNDU=
12
+ data.tar.gz: !binary |-
13
+ NmQzMDIwNTI3YWU2M2NhMjJiN2NlYThiMWVlNTkyM2E1MWE0MGM3MTlmZDVk
14
+ ZDFjNDcwZTI3ZDllOTJjYWRmNjc0OTg2OWFkZmZjM2RjYjhhM2Y0ZDMzYzMw
15
+ M2U2MmU4Yzg1YzgwMWRmN2QxZjgyNjZmNWE4MmFmMTNlMDBhYWQ=
data/lib/turbot_runner.rb CHANGED
@@ -1,5 +1,8 @@
1
+ require 'active_support/core_ext/hash/slice'
2
+ require 'active_support/core_ext/object/to_query'
1
3
  require 'json'
2
4
  require 'open3'
5
+ require 'set'
3
6
  require 'timeout'
4
7
 
5
8
  module TurbotRunner
@@ -25,11 +28,11 @@ module TurbotRunner
25
28
  @status = :initialized
26
29
  @interrupted = false
27
30
  @schemas = {}
31
+ @seen_uids = Set.new
28
32
  end
29
33
 
30
34
  def run(opts={})
31
35
  @status = :running
32
- validation_required = opts[:validate] || true
33
36
 
34
37
  command = "#{interpreter_for(scraper_file)} #{scraper_file}"
35
38
  data_type = @config['data_type']
@@ -67,6 +70,10 @@ module TurbotRunner
67
70
  runner.send_line(line)
68
71
  line1 = runner.get_next_line
69
72
 
73
+ # A transformer should output an empty line if it doesn't make
74
+ # sense to transform a record.
75
+ next if line1.strip.empty?
76
+
70
77
  begin
71
78
  record1 = JSON.parse(line1)
72
79
  rescue JSON::ParserError
@@ -136,14 +143,17 @@ module TurbotRunner
136
143
 
137
144
  if messages.empty?
138
145
  identifying_fields = identifying_fields_for_data_type(data_type)
146
+ identifying_hash = record.slice(*identifying_fields)
139
147
 
140
- hash = Hash.new
141
- identifying_fields.each do |k|
142
- hash[k] = record[k] if record.has_key?(k)
143
- end
144
-
145
- if hash.empty?
148
+ if identifying_hash.empty?
146
149
  messages << "Missing attributes for identifying fields: #{identifying_fields.join(', ')}"
150
+ else
151
+ record_uid = Digest::SHA1.hexdigest(identifying_hash.to_query)
152
+ if @seen_uids.include?(record_uid)
153
+ messages << "Values for identifying fields must be unique. There has already been a record with: #{identifying_hash.to_json}"
154
+ else
155
+ @seen_uids << record_uid
156
+ end
147
157
  end
148
158
  end
149
159
 
@@ -1,3 +1,3 @@
1
1
  module TurbotRunner
2
- VERSION = '0.0.17'
2
+ VERSION = '0.0.18'
3
3
  end
metadata CHANGED
@@ -1,20 +1,32 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.17
5
- prerelease:
4
+ version: 0.0.18
6
5
  platform: ruby
7
6
  authors:
8
7
  - OpenCorporates
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-07-15 00:00:00.000000000 Z
11
+ date: 2014-07-16 00:00:00.000000000 Z
13
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 4.1.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 4.1.0
14
27
  - !ruby/object:Gem::Dependency
15
28
  name: json-schema
16
29
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
30
  requirements:
19
31
  - - '='
20
32
  - !ruby/object:Gem::Version
@@ -22,7 +34,6 @@ dependencies:
22
34
  type: :runtime
23
35
  prerelease: false
24
36
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
37
  requirements:
27
38
  - - '='
28
39
  - !ruby/object:Gem::Version
@@ -37,13 +48,6 @@ files:
37
48
  - lib/prerun.rb
38
49
  - lib/turbot_runner.rb
39
50
  - lib/turbot_runner/version.rb
40
- - spec/dummy-bot-python/manifest.json
41
- - spec/dummy-bot-python/scraper.py
42
- - spec/dummy-bot-python/transformer.py
43
- - spec/dummy-bot-ruby/manifest.json
44
- - spec/dummy-bot-ruby/scraper.rb
45
- - spec/dummy-bot-ruby/transformer.rb
46
- - spec/turbot_runner_spec.rb
47
51
  - schema/schemas/company-schema.json
48
52
  - schema/schemas/financial-payment-schema.json
49
53
  - schema/schemas/includes/address.json
@@ -64,29 +68,36 @@ files:
64
68
  - schema/schemas/share-parcel-schema.json
65
69
  - schema/schemas/simple-licence-schema.json
66
70
  - schema/schemas/subsidiary-relationship-schema.json
71
+ - spec/dummy-bot-python/manifest.json
72
+ - spec/dummy-bot-python/scraper.py
73
+ - spec/dummy-bot-python/transformer.py
74
+ - spec/dummy-bot-ruby/manifest.json
75
+ - spec/dummy-bot-ruby/scraper.rb
76
+ - spec/dummy-bot-ruby/transformer.rb
77
+ - spec/turbot_runner_spec.rb
67
78
  homepage: http://turbot.opencorporates.com/
68
79
  licenses:
69
80
  - MIT
81
+ metadata: {}
70
82
  post_install_message:
71
83
  rdoc_options: []
72
84
  require_paths:
73
85
  - lib
74
86
  required_ruby_version: !ruby/object:Gem::Requirement
75
- none: false
76
87
  requirements:
77
88
  - - ! '>='
78
89
  - !ruby/object:Gem::Version
79
90
  version: 1.9.2
80
91
  required_rubygems_version: !ruby/object:Gem::Requirement
81
- none: false
82
92
  requirements:
83
93
  - - ! '>='
84
94
  - !ruby/object:Gem::Version
85
95
  version: '0'
86
96
  requirements: []
87
97
  rubyforge_project:
88
- rubygems_version: 1.8.23
98
+ rubygems_version: 2.2.2
89
99
  signing_key:
90
- specification_version: 3
100
+ specification_version: 4
91
101
  summary: Utilities for running bots with Turbot
92
102
  test_files: []
103
+ has_rdoc: