chimps 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +3 -9
- data/Gemfile.lock +14 -10
- data/README.rdoc +146 -240
- data/Rakefile +4 -33
- data/VERSION +1 -1
- data/lib/chimps/config.rb +35 -21
- data/lib/chimps/{utils/error.rb → error.rb} +1 -12
- data/lib/chimps/query_request.rb +67 -0
- data/lib/chimps/request.rb +82 -108
- data/lib/chimps/response.rb +62 -22
- data/lib/chimps/utils/typewriter.rb +90 -0
- data/lib/chimps/utils/uses_curl.rb +22 -12
- data/lib/chimps/utils.rb +50 -6
- data/lib/chimps/workflows/download.rb +72 -0
- data/lib/chimps/workflows/upload.rb +113 -0
- data/lib/chimps.rb +12 -12
- data/spec/chimps/query_request_spec.rb +44 -0
- data/spec/chimps/request_spec.rb +92 -0
- data/spec/chimps/response_spec.rb +0 -1
- data/spec/chimps/workflows/download_spec.rb +48 -0
- data/spec/spec_helper.rb +2 -19
- metadata +46 -91
- data/.document +0 -5
- data/.gitignore +0 -32
- data/CHANGELOG.textile +0 -4
- data/bin/chimps +0 -5
- data/lib/chimps/cli.rb +0 -28
- data/lib/chimps/commands/base.rb +0 -65
- data/lib/chimps/commands/batch.rb +0 -40
- data/lib/chimps/commands/create.rb +0 -31
- data/lib/chimps/commands/destroy.rb +0 -26
- data/lib/chimps/commands/download.rb +0 -46
- data/lib/chimps/commands/help.rb +0 -100
- data/lib/chimps/commands/list.rb +0 -41
- data/lib/chimps/commands/query.rb +0 -82
- data/lib/chimps/commands/search.rb +0 -48
- data/lib/chimps/commands/show.rb +0 -30
- data/lib/chimps/commands/test.rb +0 -39
- data/lib/chimps/commands/update.rb +0 -34
- data/lib/chimps/commands/upload.rb +0 -50
- data/lib/chimps/commands.rb +0 -125
- data/lib/chimps/typewriter.rb +0 -349
- data/lib/chimps/utils/log.rb +0 -48
- data/lib/chimps/utils/uses_model.rb +0 -34
- data/lib/chimps/utils/uses_yaml_data.rb +0 -93
- data/lib/chimps/workflows/batch.rb +0 -127
- data/lib/chimps/workflows/downloader.rb +0 -102
- data/lib/chimps/workflows/up.rb +0 -149
- data/lib/chimps/workflows/upload/bundler.rb +0 -249
- data/lib/chimps/workflows/upload/notifier.rb +0 -59
- data/lib/chimps/workflows/upload/token.rb +0 -77
- data/lib/chimps/workflows/upload/uploader.rb +0 -51
- data/lib/chimps/workflows.rb +0 -12
- data/spec/chimps/typewriter_spec.rb +0 -114
- data/spec/chimps/workflows/upload/bundler_spec.rb +0 -75
- data/spec/chimps/workflows/upload/token_spec.rb +0 -6
data/lib/chimps/typewriter.rb
DELETED
@@ -1,349 +0,0 @@
|
|
1
|
-
module Chimps
|
2
|
-
|
3
|
-
# Responses from Infochimps (once parsed from the original JSON or
|
4
|
-
# YAML) consist of nested hashes:
|
5
|
-
#
|
6
|
-
# { 'dataset' => {
|
7
|
-
# 'title' => 'My dataset',
|
8
|
-
# 'description' => 'An amazing dataset which...',
|
9
|
-
# ...
|
10
|
-
# 'sources' => {
|
11
|
-
# 'source' => {
|
12
|
-
# 'title' => 'Trustworthy Source'
|
13
|
-
# ...
|
14
|
-
# },
|
15
|
-
# 'source' => {..},
|
16
|
-
# ...
|
17
|
-
# }
|
18
|
-
# },
|
19
|
-
# ...
|
20
|
-
# }
|
21
|
-
#
|
22
|
-
# This class utilizes a typewriter and a team of trained chimpanizes
|
23
|
-
# to create pretty, line-oriented output from these hashes.
|
24
|
-
class Typewriter < Array
|
25
|
-
|
26
|
-
# The response that this Typewriter will print.
|
27
|
-
attr_accessor :response
|
28
|
-
|
29
|
-
# Widths of columns as determined by the maximum number of
|
30
|
-
# characters in any row.
|
31
|
-
attr_accessor :column_widths
|
32
|
-
|
33
|
-
# Fields to print for each resource. Given as humanized names,
|
34
|
-
# will be automatically converted to key names.
|
35
|
-
RESOURCE_FIELDS = ["ID", "Cached Slug", "Updated At", "Title"]
|
36
|
-
|
37
|
-
# String to insert between fields in output.
|
38
|
-
FIELD_SEPARATOR = " "
|
39
|
-
|
40
|
-
# Return a Typewriter to print +data+.
|
41
|
-
#
|
42
|
-
# @param [Chimps::Response] response
|
43
|
-
# @return [Chimps::Typewriter]
|
44
|
-
def initialize response, options={}
|
45
|
-
super()
|
46
|
-
@response = response
|
47
|
-
@column_widths = []
|
48
|
-
@skip_column_names = options[:skip_column_names]
|
49
|
-
accumulate(response)
|
50
|
-
end
|
51
|
-
|
52
|
-
# Print column names as well as values?
|
53
|
-
#
|
54
|
-
# @return [true, nil]
|
55
|
-
def skip_column_names?
|
56
|
-
@skip_column_names
|
57
|
-
end
|
58
|
-
|
59
|
-
# Print the accumulated lines in this Typewriter to the given
|
60
|
-
# +output+ (defaults to <tt>$stdout</tt>).
|
61
|
-
#
|
62
|
-
# Will first calculate appropriate column widths for any
|
63
|
-
# Array-like lines.
|
64
|
-
#
|
65
|
-
# @param [#puts] output
|
66
|
-
def print output=$stdout
|
67
|
-
calculate_column_widths!
|
68
|
-
each do |line|
|
69
|
-
if line.is_a?(Array)
|
70
|
-
output.puts pad_and_join(line)
|
71
|
-
else
|
72
|
-
output.puts line
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
# Accumulate lines to print from +obj+.
|
78
|
-
#
|
79
|
-
# If +obj+ is a string then it will be accumulated as a single
|
80
|
-
# line to print.
|
81
|
-
#
|
82
|
-
# If +obj+ is an Array then each element will be passed to
|
83
|
-
# Chimps::Typewriter#accumulate.
|
84
|
-
#
|
85
|
-
# If +obj+ is a Hash then each key will be mapped to a method
|
86
|
-
# <tt>accumulate_KEY</tt> and the corresponding value passed in.
|
87
|
-
# This method is responsible for accumulating lines to print.
|
88
|
-
#
|
89
|
-
# @param [Array, Hash, String] obj
|
90
|
-
def accumulate obj
|
91
|
-
case obj
|
92
|
-
when Hash
|
93
|
-
obj.each_pair do |resource_name, resource_data|
|
94
|
-
case
|
95
|
-
when %w[datasets sources licenses].include?(resource_name.to_s)
|
96
|
-
accumulate_listing(resource_data)
|
97
|
-
when %w[dataset source license].include?(resource_name.to_s)
|
98
|
-
accumulate_resource(resource_name, resource_data)
|
99
|
-
when %w[errors batch search api_account message].include?(resource_name.to_s)
|
100
|
-
send("accumulate_#{resource_name}", resource_data)
|
101
|
-
when %w[message].include?(resource_name.to_s)
|
102
|
-
self << [resource_data]
|
103
|
-
when %w[error].include?(resource_name.to_s)
|
104
|
-
nil
|
105
|
-
when :array == resource_name # constructed by Chimps::Response
|
106
|
-
accumulate_listing(resource_data)
|
107
|
-
when :string == resource_name # constructed by Chimps::Response
|
108
|
-
self << obj[:string]
|
109
|
-
else
|
110
|
-
$stderr.puts resource_data.inspect if Chimps.verbose?
|
111
|
-
raise PrintingError.new("Unrecognized resource type `#{resource_name}'.")
|
112
|
-
end
|
113
|
-
end
|
114
|
-
when Array
|
115
|
-
obj.each { |element| accumulate(element) }
|
116
|
-
when String
|
117
|
-
self << obj
|
118
|
-
else
|
119
|
-
raise PrintingError.new("Cannot print a #{obj.class}")
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
protected
|
124
|
-
|
125
|
-
# Loop through the accumulated lines, finding the maximum widths
|
126
|
-
# of each element in each Array-like line.
|
127
|
-
def calculate_column_widths!
|
128
|
-
each do |line|
|
129
|
-
next unless line.is_a?(Array) # don't try to align strings
|
130
|
-
line.each_with_index do |value, field|
|
131
|
-
current_max_width = column_widths[field]
|
132
|
-
unless current_max_width
|
133
|
-
current_max_width = 0
|
134
|
-
column_widths << current_max_width
|
135
|
-
end
|
136
|
-
value_size = value.to_s.size
|
137
|
-
column_widths[field] = value_size if value_size > current_max_width
|
138
|
-
end
|
139
|
-
end
|
140
|
-
end
|
141
|
-
|
142
|
-
# Return a string with +values+ joined by FIELD_SEPARATOR each
|
143
|
-
# padded to the corresponding maximum column size.
|
144
|
-
#
|
145
|
-
# Must have called Chimps::Typewriter#calculate_column_widths!
|
146
|
-
# first.
|
147
|
-
#
|
148
|
-
# @param [Array] values
|
149
|
-
# @return [String]
|
150
|
-
def pad_and_join values
|
151
|
-
returning([]) do |padded_values|
|
152
|
-
values.each_with_index do |value, field|
|
153
|
-
max_width = column_widths[field]
|
154
|
-
value_width = value.to_s.size
|
155
|
-
padded_values << value.to_s + (' ' * (max_width - value_width))
|
156
|
-
end
|
157
|
-
end.join(FIELD_SEPARATOR)
|
158
|
-
end
|
159
|
-
|
160
|
-
# Accumulate lines for the given +resource_name+ from the given
|
161
|
-
# +resource_data+.
|
162
|
-
#
|
163
|
-
# Fields to accumulate in each line are set in
|
164
|
-
# Chimps::Typewriter::RESOURCE_FIELDS.
|
165
|
-
#
|
166
|
-
# The structure of the response for a resource looks like:
|
167
|
-
#
|
168
|
-
# {
|
169
|
-
# 'dataset' => {
|
170
|
-
# 'id' => 39293,
|
171
|
-
# 'title' => 'My Awesome Dataset',
|
172
|
-
# ...
|
173
|
-
# }
|
174
|
-
# }
|
175
|
-
#
|
176
|
-
# The key is +resource_name+ and the value is +resource_data+.
|
177
|
-
#
|
178
|
-
# @param [String] resource_name
|
179
|
-
# @param [Hash] resource_data
|
180
|
-
def accumulate_resource resource_name, resource_data
|
181
|
-
self << self.class::RESOURCE_FIELDS.map { |field_name| resource_data[field_name.downcase.tr(' ', '_')] }
|
182
|
-
end
|
183
|
-
|
184
|
-
# Accumulate lines for each of the +resources+, all of the given
|
185
|
-
# +type+.
|
186
|
-
#
|
187
|
-
# The structure of the response for a listing looks like:
|
188
|
-
#
|
189
|
-
# {
|
190
|
-
# 'datasets' => [
|
191
|
-
# {
|
192
|
-
# 'dataset' => {
|
193
|
-
# 'id' => 39293,
|
194
|
-
# 'title' => 'My Awesome Dataset',
|
195
|
-
# ...
|
196
|
-
# },
|
197
|
-
# },
|
198
|
-
# {
|
199
|
-
# 'dataset' => {
|
200
|
-
# 'id' => 28998,
|
201
|
-
# 'title' => 'My Other Awesome Dataset',
|
202
|
-
# ...
|
203
|
-
# },
|
204
|
-
# },
|
205
|
-
# ...
|
206
|
-
# ]
|
207
|
-
# }
|
208
|
-
#
|
209
|
-
# The value is +resources+.
|
210
|
-
#
|
211
|
-
# @param [Array<Hash>] resources
|
212
|
-
def accumulate_listing resources
|
213
|
-
return if resources.blank?
|
214
|
-
self << self.class::RESOURCE_FIELDS unless skip_column_names?
|
215
|
-
resources.each { |resource| accumulate(resource) }
|
216
|
-
end
|
217
|
-
|
218
|
-
# Accumulate lines for each of the error messages in +errors+.
|
219
|
-
#
|
220
|
-
# The structure of the response looks like
|
221
|
-
#
|
222
|
-
# {
|
223
|
-
# 'errors' => [
|
224
|
-
# "A title is required.",
|
225
|
-
# "A description is required.",
|
226
|
-
# ...
|
227
|
-
# ]
|
228
|
-
# }
|
229
|
-
#
|
230
|
-
# The value is +errors+.
|
231
|
-
#
|
232
|
-
# @param [Array] errors
|
233
|
-
def accumulate_errors errors
|
234
|
-
errors.each do |error|
|
235
|
-
self << error
|
236
|
-
end
|
237
|
-
end
|
238
|
-
|
239
|
-
# Accumulate a line for the given +message+.
|
240
|
-
#
|
241
|
-
# The structure of the response from the Infochimps Query API on
|
242
|
-
# an error is:
|
243
|
-
#
|
244
|
-
# {
|
245
|
-
# 'message' => "The error message returned"
|
246
|
-
# }
|
247
|
-
#
|
248
|
-
# The value is +message+.
|
249
|
-
#
|
250
|
-
# @param [String] message
|
251
|
-
def accumulate_message message
|
252
|
-
self << message
|
253
|
-
end
|
254
|
-
|
255
|
-
# Accumulate lines for each of the batch responses in +batch+.
|
256
|
-
#
|
257
|
-
# The structure of the response looks like
|
258
|
-
#
|
259
|
-
# {
|
260
|
-
# 'batch' => [
|
261
|
-
# {
|
262
|
-
# 'status' => 'created',
|
263
|
-
# 'resource' => {
|
264
|
-
# 'dataset' => {
|
265
|
-
# 'id' => 39293,
|
266
|
-
# 'title' => "My Awesome Dataset",
|
267
|
-
# ...
|
268
|
-
# },
|
269
|
-
# },
|
270
|
-
# 'errors' => nil,
|
271
|
-
# 'local_paths' => [...] # this is totally optional
|
272
|
-
# },
|
273
|
-
# {
|
274
|
-
# 'status' => 'invalid',
|
275
|
-
# 'errors' => [
|
276
|
-
# "A title is required.",
|
277
|
-
# "A description is required."
|
278
|
-
# ]
|
279
|
-
# },
|
280
|
-
# ...
|
281
|
-
# ]
|
282
|
-
# }
|
283
|
-
#
|
284
|
-
# The value is +batch+.
|
285
|
-
def accumulate_batch batch
|
286
|
-
self << ["Status", "Resource", "ID", "Errors"] unless skip_column_names?
|
287
|
-
batch.each do |response|
|
288
|
-
status = response['status']
|
289
|
-
errors = response['errors']
|
290
|
-
if response['resource'] && errors.blank?
|
291
|
-
resource_type = response['resource'].keys.first
|
292
|
-
resource = response['resource'][resource_type]
|
293
|
-
id = resource['id']
|
294
|
-
self << [status, resource_type, id]
|
295
|
-
else
|
296
|
-
self << ([status, nil, nil] + errors)
|
297
|
-
end
|
298
|
-
end
|
299
|
-
end
|
300
|
-
|
301
|
-
# Accumulate lines for the results in +search+.
|
302
|
-
#
|
303
|
-
# The structure of the response looks like
|
304
|
-
#
|
305
|
-
# {
|
306
|
-
# 'search' => {
|
307
|
-
# 'results' => [
|
308
|
-
# { 'dataset' => {...} },
|
309
|
-
# { 'dataset' => {...} },
|
310
|
-
# ...
|
311
|
-
# ]
|
312
|
-
#
|
313
|
-
# }
|
314
|
-
# }
|
315
|
-
#
|
316
|
-
# The value keyed to +search+ is +search+.
|
317
|
-
def accumulate_search search
|
318
|
-
return if search['results'].blank?
|
319
|
-
self << self.class::RESOURCE_FIELDS unless skip_column_names?
|
320
|
-
search['results'].each { |resource| accumulate(resource) }
|
321
|
-
end
|
322
|
-
|
323
|
-
# Accumulate lines for the +api_account+.
|
324
|
-
#
|
325
|
-
# The structure of the response looks like
|
326
|
-
#
|
327
|
-
# { 'api_account' => {
|
328
|
-
# 'api_key' => ...,
|
329
|
-
# 'owner' => {
|
330
|
-
# 'username' => 'Infochimps',
|
331
|
-
# ...
|
332
|
-
# },
|
333
|
-
# 'updated_at' => ...,
|
334
|
-
# ...
|
335
|
-
# }
|
336
|
-
# }
|
337
|
-
#
|
338
|
-
# The value is +api_account+
|
339
|
-
def accumulate_api_account api_account
|
340
|
-
# FIXME this is sort of ugly...
|
341
|
-
self << "USERNAME: #{api_account['owner']['username']}"
|
342
|
-
self << "API KEY: #{api_account['apikey']}"
|
343
|
-
self << "LAST UPDATED: #{api_account['updated_at']}"
|
344
|
-
end
|
345
|
-
|
346
|
-
end
|
347
|
-
|
348
|
-
end
|
349
|
-
|
data/lib/chimps/utils/log.rb
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
module Chimps
|
2
|
-
|
3
|
-
# The Chimps logger. Set via Chimps::Config[:log] and defaults
|
4
|
-
# to $stdout.
|
5
|
-
#
|
6
|
-
# @return [Logger]
|
7
|
-
def self.log
|
8
|
-
@log ||= Log.new_logger
|
9
|
-
end
|
10
|
-
|
11
|
-
# Set the Chimps logger.
|
12
|
-
#
|
13
|
-
# @param [Logger] new_log
|
14
|
-
def self.log= new_log
|
15
|
-
@log = new_log
|
16
|
-
end
|
17
|
-
|
18
|
-
# Module for initializing the Chimps logger from configuration
|
19
|
-
# settings.
|
20
|
-
module Log
|
21
|
-
|
22
|
-
# Initialize a new Logger instance with the log level set by
|
23
|
-
# Chimps.verbose?
|
24
|
-
#
|
25
|
-
# @return [Logger]
|
26
|
-
def self.new_logger
|
27
|
-
require 'logger'
|
28
|
-
returning(Logger.new(log_file)) do |log|
|
29
|
-
log.progname = "Chimps"
|
30
|
-
log.level = Chimps.verbose? ? Logger::INFO : Logger::WARN
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
# Return either the path to the log file in Chimps::Config[:log]
|
35
|
-
# or $stdout if the path is blank or equal to `-'.
|
36
|
-
#
|
37
|
-
# @return [String, $stdout] the path to the log or $stdout
|
38
|
-
def self.log_file
|
39
|
-
if Chimps::Config[:log]
|
40
|
-
Chimps::Config[:log].strip == '-' ? $stdout : Chimps::Config[:log]
|
41
|
-
else
|
42
|
-
$stdout
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
|
@@ -1,34 +0,0 @@
|
|
1
|
-
module Chimps
|
2
|
-
module Utils
|
3
|
-
module UsesModel
|
4
|
-
|
5
|
-
def model
|
6
|
-
config[:model]
|
7
|
-
end
|
8
|
-
|
9
|
-
def plural_model
|
10
|
-
if model[-1].chr == 'y'
|
11
|
-
model[1..-1] + 'ies'
|
12
|
-
else
|
13
|
-
model + 's'
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
def model_identifier
|
18
|
-
raise CLIError.new("Must provide an ID or URL-escaped handle as the first argument") if config.argv.first.blank?
|
19
|
-
config.argv.first
|
20
|
-
end
|
21
|
-
|
22
|
-
def models_path
|
23
|
-
"#{plural_model}.json"
|
24
|
-
end
|
25
|
-
|
26
|
-
def model_path
|
27
|
-
"#{plural_model}/#{model_identifier}.json"
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
|
@@ -1,93 +0,0 @@
|
|
1
|
-
module Chimps
|
2
|
-
module Utils
|
3
|
-
module UsesYamlData
|
4
|
-
|
5
|
-
def ignore_yaml_files_on_command_line
|
6
|
-
false
|
7
|
-
end
|
8
|
-
def ignore_first_arg_on_command_line
|
9
|
-
false
|
10
|
-
end
|
11
|
-
|
12
|
-
def data
|
13
|
-
@data ||= merge_all(*(data_from_stdin + data_from_file + data_from_command_line)) || {}
|
14
|
-
end
|
15
|
-
|
16
|
-
protected
|
17
|
-
|
18
|
-
def merge_all *objs
|
19
|
-
objs.compact!
|
20
|
-
return if objs.blank? # raising an error here is left to the caller
|
21
|
-
klasses = objs.map(&:class).uniq
|
22
|
-
raise CLIError.new("Mismatched YAML data types -- Hashes can only be combined with Hashes, Arrays with Arrays") if klasses.size > 1
|
23
|
-
data_type = klasses.first.new
|
24
|
-
case data_type
|
25
|
-
when Array
|
26
|
-
# greater precedence at the end so iterate in order
|
27
|
-
returning([]) do |d|
|
28
|
-
objs.each do |obj|
|
29
|
-
d.concat(obj)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
when Hash
|
33
|
-
# greater precedence at the end so iterate in order
|
34
|
-
returning({}) do |d|
|
35
|
-
objs.each do |obj|
|
36
|
-
d.merge!(obj)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
else raise CLIError.new("Incompatible YAML data type #{data_type} -- can only combine Hashes and Arrays")
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
def params_from_command_line
|
44
|
-
returning([]) do |d|
|
45
|
-
config.argv.each_with_index do |arg, index|
|
46
|
-
next if index == 0 && ignore_first_arg_on_command_line
|
47
|
-
next unless arg =~ /^(\w+) *=(.*)$/
|
48
|
-
name, value = $1.downcase.to_sym, $2.strip
|
49
|
-
d << { name => value } # always a hash
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def yaml_files_from_command_line
|
55
|
-
returning([]) do |d|
|
56
|
-
config.argv.each_with_index do |arg, index|
|
57
|
-
next if index == 0 && ignore_first_arg_on_command_line
|
58
|
-
next if arg =~ /^(\w+) *=(.*)$/
|
59
|
-
path = File.expand_path(arg)
|
60
|
-
raise CLIError.new("No such path #{path}") unless File.exist?(path)
|
61
|
-
d << YAML.load(open(path)) # either a hash or an array
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def data_from_command_line
|
67
|
-
if ignore_yaml_files_on_command_line
|
68
|
-
params_from_command_line
|
69
|
-
else
|
70
|
-
yaml_files_from_command_line + params_from_command_line
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def data_from_file
|
75
|
-
[config[:data_file] ? YAML.load_file(File.expand_path(config[:data_file])) : nil]
|
76
|
-
end
|
77
|
-
|
78
|
-
def data_from_stdin
|
79
|
-
return [nil] unless $stdin.stat.size > 0
|
80
|
-
returning([]) do |d|
|
81
|
-
YAML.load_stream($stdin).each do |document|
|
82
|
-
d << document
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
def ensure_data_is_present!
|
88
|
-
raise CLIError.new("Must provide some data to send, either on the command line, from an input file, or by piping to STDIN. Try `chimps help #{name}'") unless data.present?
|
89
|
-
end
|
90
|
-
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
@@ -1,127 +0,0 @@
|
|
1
|
-
module Chimps
|
2
|
-
module Workflows
|
3
|
-
|
4
|
-
# A class for performing batch updates/uploads to Infochimps.
|
5
|
-
#
|
6
|
-
# It works by taking YAML data describing many updates and
|
7
|
-
# performing a single batch API request with this data.
|
8
|
-
#
|
9
|
-
# The batch response is then parsed and analyzed and (given
|
10
|
-
# success or fearlessness) any necessary uploads are performed.
|
11
|
-
#
|
12
|
-
# Examples of the input data format can be found in the
|
13
|
-
# <tt>/examples</tt> directory of the Chimps distribution.
|
14
|
-
class BatchUpdater
|
15
|
-
|
16
|
-
# The data used sent as a bulk update.
|
17
|
-
attr_reader :data
|
18
|
-
|
19
|
-
# The batch update response
|
20
|
-
attr_reader :batch_response
|
21
|
-
|
22
|
-
# The output file to store the bulk update response.
|
23
|
-
attr_reader :output_path
|
24
|
-
|
25
|
-
# Whether to upload even if there were errors on update.
|
26
|
-
attr_reader :upload_even_if_errors
|
27
|
-
|
28
|
-
# The data format to annotate the upload with.
|
29
|
-
#
|
30
|
-
# Chimps will try to guess if this isn't given.
|
31
|
-
attr_reader :fmt
|
32
|
-
|
33
|
-
# Create a new BatchUpdater with the given +data+ and +options+.
|
34
|
-
#
|
35
|
-
# The intermediate batch response can be saved at a file named
|
36
|
-
# by <tt>:output_path</tt>, though this isn't necessary.
|
37
|
-
#
|
38
|
-
# @param [Array] data an array of resource updates
|
39
|
-
# @param [Hash] options
|
40
|
-
# @option options [String] output_path path to store the batch response
|
41
|
-
# @option options [true, false] upload_even_if_errors whether to continue uploading in the presence of errors on update
|
42
|
-
# @option options [String] fmt the data format to annotate each upload with (see `chimps upload')
|
43
|
-
# @return [Chimps::Workflows::BatchUpdater]
|
44
|
-
def initialize data, options={}
|
45
|
-
@data = data
|
46
|
-
@output_path = options[:output_path]
|
47
|
-
@upload_even_if_errors = options[:upload_even_if_errors]
|
48
|
-
@fmt = options[:fmt]
|
49
|
-
end
|
50
|
-
|
51
|
-
# The path to submit batch update requests.
|
52
|
-
#
|
53
|
-
# @return [String]
|
54
|
-
def batch_path
|
55
|
-
"batch.json"
|
56
|
-
end
|
57
|
-
|
58
|
-
# Perform this batch update followed by the batch upload.
|
59
|
-
def execute!
|
60
|
-
batch_update!
|
61
|
-
batch_upload!
|
62
|
-
end
|
63
|
-
|
64
|
-
# Perform the batch update.
|
65
|
-
def batch_update!
|
66
|
-
@batch_response = Request.new(batch_path, :data => { :batch => data }, :authenticate => true).post
|
67
|
-
File.open(output_path, 'w') { |f| f.puts batch_response.body } if output_path
|
68
|
-
batch_response.print
|
69
|
-
end
|
70
|
-
|
71
|
-
# Were any of the updates performed during the batch update
|
72
|
-
# errors?
|
73
|
-
#
|
74
|
-
# @return [true, false]
|
75
|
-
def error?
|
76
|
-
batch_response['batch'].each do |response|
|
77
|
-
status = response['status']
|
78
|
-
return true unless ['created', 'updated'].include?(status)
|
79
|
-
end
|
80
|
-
false
|
81
|
-
end
|
82
|
-
|
83
|
-
# Did all of the updates performed in the batch update succeed?
|
84
|
-
#
|
85
|
-
# @return [true, false]
|
86
|
-
def success?
|
87
|
-
! error?
|
88
|
-
end
|
89
|
-
|
90
|
-
# Perform the batch upload.
|
91
|
-
#
|
92
|
-
# Will bail if the batch update had an error unless
|
93
|
-
# Chimps::Workflows::BatchUpdater#upload_even_if_errors returns
|
94
|
-
# true.
|
95
|
-
def batch_upload!
|
96
|
-
return unless success? || upload_even_if_errors
|
97
|
-
$stderr.puts("WARNING: continuing with uploads even though there were errors") unless success?
|
98
|
-
dataset_ids_and_local_paths.each do |id, local_paths|
|
99
|
-
Chimps::Workflows::Uploader.new(:dataset => id, :local_paths => local_paths, :fmt => fmt).execute!
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
protected
|
104
|
-
# Iterate through the batch response and return tuples
|
105
|
-
# consisting of an ID and an array of of local paths to upload.
|
106
|
-
#
|
107
|
-
# Only datasets which were successfully created/updated,
|
108
|
-
# returned an ID, and had local_paths defined in the original
|
109
|
-
# batch update will be output.
|
110
|
-
#
|
111
|
-
# @return [Array<Array>]
|
112
|
-
def dataset_ids_and_local_paths
|
113
|
-
batch_response['batch'].map do |response|
|
114
|
-
status = response['status']
|
115
|
-
next unless (status == 'created' || status == 'updated') # skip errors
|
116
|
-
next unless dataset = response['resource']['dataset'] # skip unless it's a dataset
|
117
|
-
id = dataset['id']
|
118
|
-
next if id.blank? # skip unless it has an ID
|
119
|
-
local_paths = response['local_paths']
|
120
|
-
next if local_paths.blank? # skip unless local_paths were defined
|
121
|
-
[id, local_paths]
|
122
|
-
end.compact
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|