datacatalog-importer 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/datacatalog-importer.gemspec +2 -2
- data/lib/pusher.rb +39 -26
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.8
|
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{datacatalog-importer}
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.8"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["David James"]
|
12
|
-
s.date = %q{2010-03-
|
12
|
+
s.date = %q{2010-03-16}
|
13
13
|
s.description = %q{This framework makes it easier to write importers for the National Data Catalog.}
|
14
14
|
s.email = %q{djames@sunlightfoundation.com}
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/pusher.rb
CHANGED
@@ -7,6 +7,10 @@ module DataCatalog
|
|
7
7
|
include DataCatalog::Importer::Shared
|
8
8
|
|
9
9
|
REQUIRED = %w(api_key base_uri cache_folder)
|
10
|
+
|
11
|
+
# These keys should not be passed along directly; they need to be
|
12
|
+
# examined so that real ID's can be passed along instead.
|
13
|
+
LOOKUP_KEYS = [:organization, :downloads]
|
10
14
|
|
11
15
|
def initialize(options)
|
12
16
|
REQUIRED.each do |r|
|
@@ -92,23 +96,24 @@ module DataCatalog
|
|
92
96
|
end
|
93
97
|
|
94
98
|
def find_organization_by(field, name)
|
95
|
-
|
99
|
+
verify_one_result(field, name, "Organization") do
|
100
|
+
DataCatalog::Organization.all(field => name)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def verify_one_result(field, name, model)
|
105
|
+
docs = yield
|
96
106
|
n = docs.length
|
97
107
|
case n
|
98
|
-
when 0
|
99
|
-
|
100
|
-
|
101
|
-
docs[0]
|
102
|
-
else
|
103
|
-
multiple_matches("Organization", { field => name }, n)
|
108
|
+
when 0 then nil
|
109
|
+
when 1 then docs[0]
|
110
|
+
else multiple_matches(model, { field => name }, n)
|
104
111
|
end
|
105
112
|
end
|
106
113
|
|
107
114
|
# Important: do not modify data
|
108
115
|
def create_or_update_source(data)
|
109
|
-
data = data.reject
|
110
|
-
[:organization, :downloads].include?(key)
|
111
|
-
end
|
116
|
+
data = data.reject { |k, v| LOOKUP_KEYS.include?(k) }
|
112
117
|
docs = DataCatalog::Source.all(:url => data[:url])
|
113
118
|
n = docs.length
|
114
119
|
case n
|
@@ -157,21 +162,29 @@ module DataCatalog
|
|
157
162
|
#
|
158
163
|
# Note: modifies data (that is why I use the !)
|
159
164
|
def link_to_existing_organization!(data, organization_id_key)
|
160
|
-
|
161
|
-
raise "Could not find :organization key" unless
|
162
|
-
name = organization_hash[:name]
|
163
|
-
url = organization_hash[:url]
|
165
|
+
hash = data.delete(:organization)
|
166
|
+
raise "Could not find :organization key" unless hash
|
164
167
|
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
168
|
+
keys = [:url, :home_url, :name]
|
169
|
+
unless hash.any? { |key, value| keys.include?(key) }
|
170
|
+
raise "Need #{keys.join(' or ')} to lookup an organization"
|
171
|
+
end
|
172
|
+
|
173
|
+
attempts = []
|
174
|
+
organization = nil
|
175
|
+
keys.each do |key|
|
176
|
+
value = hash[key]
|
177
|
+
organization = if value
|
178
|
+
attempts << "#{key}:#{value}"
|
179
|
+
find_organization_by(key, value)
|
180
|
+
end
|
181
|
+
break if organization
|
169
182
|
end
|
170
183
|
|
171
184
|
if organization
|
172
185
|
data[organization_id_key] = organization.id
|
173
186
|
else
|
174
|
-
puts "
|
187
|
+
puts "- Could not find organization with #{attempts.join(' or ')}"
|
175
188
|
end
|
176
189
|
true # return value not important
|
177
190
|
end
|
@@ -179,8 +192,8 @@ module DataCatalog
|
|
179
192
|
# ---
|
180
193
|
|
181
194
|
def create_failed(model, text, data, error)
|
182
|
-
puts "
|
183
|
-
puts "
|
195
|
+
puts "- Failed. #{error}"
|
196
|
+
puts " Uploading Report to API."
|
184
197
|
DataCatalog::Report.create({
|
185
198
|
:status => "new",
|
186
199
|
:text => "Cannot create #{model} : #{text}",
|
@@ -193,8 +206,8 @@ module DataCatalog
|
|
193
206
|
end
|
194
207
|
|
195
208
|
def update_failed(model, data, id, error)
|
196
|
-
puts "
|
197
|
-
puts "
|
209
|
+
puts "- Failed. #{error}"
|
210
|
+
puts " Uploading Report to API."
|
198
211
|
DataCatalog::Report.create({
|
199
212
|
:status => "new",
|
200
213
|
:text => "Cannot update #{model} of id : #{id}",
|
@@ -207,13 +220,13 @@ module DataCatalog
|
|
207
220
|
end
|
208
221
|
|
209
222
|
def multiple_matches(model, data, n)
|
210
|
-
puts "
|
211
|
-
puts "
|
223
|
+
puts "- Failed. #{n} matches for #{model}."
|
224
|
+
puts " Uploading Report to API."
|
212
225
|
DataCatalog::Report.create({
|
213
226
|
:status => "new",
|
214
227
|
:text => "Multiple matches for url : #{data[:url]}",
|
215
228
|
:object => {
|
216
|
-
:error => "Cannot automatically update #{model}since there " +
|
229
|
+
:error => "Cannot automatically update #{model} since there " +
|
217
230
|
"are #{n} matches for url : #{data[:url]}",
|
218
231
|
:params => data,
|
219
232
|
},
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 8
|
9
|
+
version: 0.1.8
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- David James
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-03-
|
17
|
+
date: 2010-03-16 00:00:00 -04:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|