turbot-runner 0.0.17 → 0.0.18

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OGMxNGNmYmMwNGQ4ZDQ2MDkzOGQ4NmQ3NTI5NzFjY2U4ZWQ4Y2ZlNQ==
5
+ data.tar.gz: !binary |-
6
+ MjJkOTI5NmU3MjViYWFhZjlhZjJlMmZmNzIzZGI2Mzc5YTA2NWZmNw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NDNmZDcwMGVmMDk4ZjM2ZDM0OWQ5ODgxYzJlNjRkMTIxNTk5NDZlMDc4ZGFh
10
+ OGUwNGIwOWMwOGY3ODNjYmE4ODE2NGYzMjJlM2M0NmU4MDE1YmEyZTE5NDU2
11
+ MWM1M2FkN2Y0MmRiODgyNzMwYzBkMjczYzEwYjJmOWIxMjIyNDU=
12
+ data.tar.gz: !binary |-
13
+ NmQzMDIwNTI3YWU2M2NhMjJiN2NlYThiMWVlNTkyM2E1MWE0MGM3MTlmZDVk
14
+ ZDFjNDcwZTI3ZDllOTJjYWRmNjc0OTg2OWFkZmZjM2RjYjhhM2Y0ZDMzYzMw
15
+ M2U2MmU4Yzg1YzgwMWRmN2QxZjgyNjZmNWE4MmFmMTNlMDBhYWQ=
data/lib/turbot_runner.rb CHANGED
@@ -1,5 +1,8 @@
1
+ require 'active_support/core_ext/hash/slice'
2
+ require 'active_support/core_ext/object/to_query'
1
3
  require 'json'
2
4
  require 'open3'
5
+ require 'set'
3
6
  require 'timeout'
4
7
 
5
8
  module TurbotRunner
@@ -25,11 +28,11 @@ module TurbotRunner
25
28
  @status = :initialized
26
29
  @interrupted = false
27
30
  @schemas = {}
31
+ @seen_uids = Set.new
28
32
  end
29
33
 
30
34
  def run(opts={})
31
35
  @status = :running
32
- validation_required = opts[:validate] || true
33
36
 
34
37
  command = "#{interpreter_for(scraper_file)} #{scraper_file}"
35
38
  data_type = @config['data_type']
@@ -67,6 +70,10 @@ module TurbotRunner
67
70
  runner.send_line(line)
68
71
  line1 = runner.get_next_line
69
72
 
73
+ # A transformer should output an empty line if it doesn't make
74
+ # sense to transform a record.
75
+ next if line1.strip.empty?
76
+
70
77
  begin
71
78
  record1 = JSON.parse(line1)
72
79
  rescue JSON::ParserError
@@ -136,14 +143,17 @@ module TurbotRunner
136
143
 
137
144
  if messages.empty?
138
145
  identifying_fields = identifying_fields_for_data_type(data_type)
146
+ identifying_hash = record.slice(*identifying_fields)
139
147
 
140
- hash = Hash.new
141
- identifying_fields.each do |k|
142
- hash[k] = record[k] if record.has_key?(k)
143
- end
144
-
145
- if hash.empty?
148
+ if identifying_hash.empty?
146
149
  messages << "Missing attributes for identifying fields: #{identifying_fields.join(', ')}"
150
+ else
151
+ record_uid = Digest::SHA1.hexdigest(identifying_hash.to_query)
152
+ if @seen_uids.include?(record_uid)
153
+ messages << "Values for identifying fields must be unique. There has already been a record with: #{identifying_hash.to_json}"
154
+ else
155
+ @seen_uids << record_uid
156
+ end
147
157
  end
148
158
  end
149
159
 
@@ -1,3 +1,3 @@
1
1
  module TurbotRunner
2
- VERSION = '0.0.17'
2
+ VERSION = '0.0.18'
3
3
  end
metadata CHANGED
@@ -1,20 +1,32 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.17
5
- prerelease:
4
+ version: 0.0.18
6
5
  platform: ruby
7
6
  authors:
8
7
  - OpenCorporates
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-07-15 00:00:00.000000000 Z
11
+ date: 2014-07-16 00:00:00.000000000 Z
13
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 4.1.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 4.1.0
14
27
  - !ruby/object:Gem::Dependency
15
28
  name: json-schema
16
29
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
30
  requirements:
19
31
  - - '='
20
32
  - !ruby/object:Gem::Version
@@ -22,7 +34,6 @@ dependencies:
22
34
  type: :runtime
23
35
  prerelease: false
24
36
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
37
  requirements:
27
38
  - - '='
28
39
  - !ruby/object:Gem::Version
@@ -37,13 +48,6 @@ files:
37
48
  - lib/prerun.rb
38
49
  - lib/turbot_runner.rb
39
50
  - lib/turbot_runner/version.rb
40
- - spec/dummy-bot-python/manifest.json
41
- - spec/dummy-bot-python/scraper.py
42
- - spec/dummy-bot-python/transformer.py
43
- - spec/dummy-bot-ruby/manifest.json
44
- - spec/dummy-bot-ruby/scraper.rb
45
- - spec/dummy-bot-ruby/transformer.rb
46
- - spec/turbot_runner_spec.rb
47
51
  - schema/schemas/company-schema.json
48
52
  - schema/schemas/financial-payment-schema.json
49
53
  - schema/schemas/includes/address.json
@@ -64,29 +68,36 @@ files:
64
68
  - schema/schemas/share-parcel-schema.json
65
69
  - schema/schemas/simple-licence-schema.json
66
70
  - schema/schemas/subsidiary-relationship-schema.json
71
+ - spec/dummy-bot-python/manifest.json
72
+ - spec/dummy-bot-python/scraper.py
73
+ - spec/dummy-bot-python/transformer.py
74
+ - spec/dummy-bot-ruby/manifest.json
75
+ - spec/dummy-bot-ruby/scraper.rb
76
+ - spec/dummy-bot-ruby/transformer.rb
77
+ - spec/turbot_runner_spec.rb
67
78
  homepage: http://turbot.opencorporates.com/
68
79
  licenses:
69
80
  - MIT
81
+ metadata: {}
70
82
  post_install_message:
71
83
  rdoc_options: []
72
84
  require_paths:
73
85
  - lib
74
86
  required_ruby_version: !ruby/object:Gem::Requirement
75
- none: false
76
87
  requirements:
77
88
  - - ! '>='
78
89
  - !ruby/object:Gem::Version
79
90
  version: 1.9.2
80
91
  required_rubygems_version: !ruby/object:Gem::Requirement
81
- none: false
82
92
  requirements:
83
93
  - - ! '>='
84
94
  - !ruby/object:Gem::Version
85
95
  version: '0'
86
96
  requirements: []
87
97
  rubyforge_project:
88
- rubygems_version: 1.8.23
98
+ rubygems_version: 2.2.2
89
99
  signing_key:
90
- specification_version: 3
100
+ specification_version: 4
91
101
  summary: Utilities for running bots with Turbot
92
102
  test_files: []
103
+ has_rdoc: