chimps 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. data/Gemfile +3 -9
  2. data/Gemfile.lock +14 -10
  3. data/README.rdoc +146 -240
  4. data/Rakefile +4 -33
  5. data/VERSION +1 -1
  6. data/lib/chimps/config.rb +35 -21
  7. data/lib/chimps/{utils/error.rb → error.rb} +1 -12
  8. data/lib/chimps/query_request.rb +67 -0
  9. data/lib/chimps/request.rb +82 -108
  10. data/lib/chimps/response.rb +62 -22
  11. data/lib/chimps/utils/typewriter.rb +90 -0
  12. data/lib/chimps/utils/uses_curl.rb +22 -12
  13. data/lib/chimps/utils.rb +50 -6
  14. data/lib/chimps/workflows/download.rb +72 -0
  15. data/lib/chimps/workflows/upload.rb +113 -0
  16. data/lib/chimps.rb +12 -12
  17. data/spec/chimps/query_request_spec.rb +44 -0
  18. data/spec/chimps/request_spec.rb +92 -0
  19. data/spec/chimps/response_spec.rb +0 -1
  20. data/spec/chimps/workflows/download_spec.rb +48 -0
  21. data/spec/spec_helper.rb +2 -19
  22. metadata +46 -91
  23. data/.document +0 -5
  24. data/.gitignore +0 -32
  25. data/CHANGELOG.textile +0 -4
  26. data/bin/chimps +0 -5
  27. data/lib/chimps/cli.rb +0 -28
  28. data/lib/chimps/commands/base.rb +0 -65
  29. data/lib/chimps/commands/batch.rb +0 -40
  30. data/lib/chimps/commands/create.rb +0 -31
  31. data/lib/chimps/commands/destroy.rb +0 -26
  32. data/lib/chimps/commands/download.rb +0 -46
  33. data/lib/chimps/commands/help.rb +0 -100
  34. data/lib/chimps/commands/list.rb +0 -41
  35. data/lib/chimps/commands/query.rb +0 -82
  36. data/lib/chimps/commands/search.rb +0 -48
  37. data/lib/chimps/commands/show.rb +0 -30
  38. data/lib/chimps/commands/test.rb +0 -39
  39. data/lib/chimps/commands/update.rb +0 -34
  40. data/lib/chimps/commands/upload.rb +0 -50
  41. data/lib/chimps/commands.rb +0 -125
  42. data/lib/chimps/typewriter.rb +0 -349
  43. data/lib/chimps/utils/log.rb +0 -48
  44. data/lib/chimps/utils/uses_model.rb +0 -34
  45. data/lib/chimps/utils/uses_yaml_data.rb +0 -93
  46. data/lib/chimps/workflows/batch.rb +0 -127
  47. data/lib/chimps/workflows/downloader.rb +0 -102
  48. data/lib/chimps/workflows/up.rb +0 -149
  49. data/lib/chimps/workflows/upload/bundler.rb +0 -249
  50. data/lib/chimps/workflows/upload/notifier.rb +0 -59
  51. data/lib/chimps/workflows/upload/token.rb +0 -77
  52. data/lib/chimps/workflows/upload/uploader.rb +0 -51
  53. data/lib/chimps/workflows.rb +0 -12
  54. data/spec/chimps/typewriter_spec.rb +0 -114
  55. data/spec/chimps/workflows/upload/bundler_spec.rb +0 -75
  56. data/spec/chimps/workflows/upload/token_spec.rb +0 -6
@@ -1,349 +0,0 @@
1
- module Chimps
2
-
3
- # Responses from Infochimps (once parsed from the original JSON or
4
- # YAML) consist of nested hashes:
5
- #
6
- # { 'dataset' => {
7
- # 'title' => 'My dataset',
8
- # 'description' => 'An amazing dataset which...',
9
- # ...
10
- # 'sources' => {
11
- # 'source' => {
12
- # 'title' => 'Trustworthy Source'
13
- # ...
14
- # },
15
- # 'source' => {..},
16
- # ...
17
- # }
18
- # },
19
- # ...
20
- # }
21
- #
22
- # This class utilizes a typewriter and a team of trained chimpanizes
23
- # to create pretty, line-oriented output from these hashes.
24
- class Typewriter < Array
25
-
26
- # The response that this Typewriter will print.
27
- attr_accessor :response
28
-
29
- # Widths of columns as determined by the maximum number of
30
- # characters in any row.
31
- attr_accessor :column_widths
32
-
33
- # Fields to print for each resource. Given as humanized names,
34
- # will be automatically converted to key names.
35
- RESOURCE_FIELDS = ["ID", "Cached Slug", "Updated At", "Title"]
36
-
37
- # String to insert between fields in output.
38
- FIELD_SEPARATOR = " "
39
-
40
- # Return a Typewriter to print +data+.
41
- #
42
- # @param [Chimps::Response] response
43
- # @return [Chimps::Typewriter]
44
- def initialize response, options={}
45
- super()
46
- @response = response
47
- @column_widths = []
48
- @skip_column_names = options[:skip_column_names]
49
- accumulate(response)
50
- end
51
-
52
- # Print column names as well as values?
53
- #
54
- # @return [true, nil]
55
- def skip_column_names?
56
- @skip_column_names
57
- end
58
-
59
- # Print the accumulated lines in this Typewriter to the given
60
- # +output+ (defaults to <tt>$stdout</tt>).
61
- #
62
- # Will first calculate appropriate column widths for any
63
- # Array-like lines.
64
- #
65
- # @param [#puts] output
66
- def print output=$stdout
67
- calculate_column_widths!
68
- each do |line|
69
- if line.is_a?(Array)
70
- output.puts pad_and_join(line)
71
- else
72
- output.puts line
73
- end
74
- end
75
- end
76
-
77
- # Accumulate lines to print from +obj+.
78
- #
79
- # If +obj+ is a string then it will be accumulated as a single
80
- # line to print.
81
- #
82
- # If +obj+ is an Array then each element will be passed to
83
- # Chimps::Typewriter#accumulate.
84
- #
85
- # If +obj+ is a Hash then each key will be mapped to a method
86
- # <tt>accumulate_KEY</tt> and the corresponding value passed in.
87
- # This method is responsible for accumulating lines to print.
88
- #
89
- # @param [Array, Hash, String] obj
90
- def accumulate obj
91
- case obj
92
- when Hash
93
- obj.each_pair do |resource_name, resource_data|
94
- case
95
- when %w[datasets sources licenses].include?(resource_name.to_s)
96
- accumulate_listing(resource_data)
97
- when %w[dataset source license].include?(resource_name.to_s)
98
- accumulate_resource(resource_name, resource_data)
99
- when %w[errors batch search api_account message].include?(resource_name.to_s)
100
- send("accumulate_#{resource_name}", resource_data)
101
- when %w[message].include?(resource_name.to_s)
102
- self << [resource_data]
103
- when %w[error].include?(resource_name.to_s)
104
- nil
105
- when :array == resource_name # constructed by Chimps::Response
106
- accumulate_listing(resource_data)
107
- when :string == resource_name # constructed by Chimps::Response
108
- self << obj[:string]
109
- else
110
- $stderr.puts resource_data.inspect if Chimps.verbose?
111
- raise PrintingError.new("Unrecognized resource type `#{resource_name}'.")
112
- end
113
- end
114
- when Array
115
- obj.each { |element| accumulate(element) }
116
- when String
117
- self << obj
118
- else
119
- raise PrintingError.new("Cannot print a #{obj.class}")
120
- end
121
- end
122
-
123
- protected
124
-
125
- # Loop through the accumulated lines, finding the maximum widths
126
- # of each element in each Array-like line.
127
- def calculate_column_widths!
128
- each do |line|
129
- next unless line.is_a?(Array) # don't try to align strings
130
- line.each_with_index do |value, field|
131
- current_max_width = column_widths[field]
132
- unless current_max_width
133
- current_max_width = 0
134
- column_widths << current_max_width
135
- end
136
- value_size = value.to_s.size
137
- column_widths[field] = value_size if value_size > current_max_width
138
- end
139
- end
140
- end
141
-
142
- # Return a string with +values+ joined by FIELD_SEPARATOR each
143
- # padded to the corresponding maximum column size.
144
- #
145
- # Must have called Chimps::Typewriter#calculate_column_widths!
146
- # first.
147
- #
148
- # @param [Array] values
149
- # @return [String]
150
- def pad_and_join values
151
- returning([]) do |padded_values|
152
- values.each_with_index do |value, field|
153
- max_width = column_widths[field]
154
- value_width = value.to_s.size
155
- padded_values << value.to_s + (' ' * (max_width - value_width))
156
- end
157
- end.join(FIELD_SEPARATOR)
158
- end
159
-
160
- # Accumulate lines for the given +resource_name+ from the given
161
- # +resource_data+.
162
- #
163
- # Fields to accumulate in each line are set in
164
- # Chimps::Typewriter::RESOURCE_FIELDS.
165
- #
166
- # The structure of the response for a resource looks like:
167
- #
168
- # {
169
- # 'dataset' => {
170
- # 'id' => 39293,
171
- # 'title' => 'My Awesome Dataset',
172
- # ...
173
- # }
174
- # }
175
- #
176
- # The key is +resource_name+ and the value is +resource_data+.
177
- #
178
- # @param [String] resource_name
179
- # @param [Hash] resource_data
180
- def accumulate_resource resource_name, resource_data
181
- self << self.class::RESOURCE_FIELDS.map { |field_name| resource_data[field_name.downcase.tr(' ', '_')] }
182
- end
183
-
184
- # Accumulate lines for each of the +resources+, all of the given
185
- # +type+.
186
- #
187
- # The structure of the response for a listing looks like:
188
- #
189
- # {
190
- # 'datasets' => [
191
- # {
192
- # 'dataset' => {
193
- # 'id' => 39293,
194
- # 'title' => 'My Awesome Dataset',
195
- # ...
196
- # },
197
- # },
198
- # {
199
- # 'dataset' => {
200
- # 'id' => 28998,
201
- # 'title' => 'My Other Awesome Dataset',
202
- # ...
203
- # },
204
- # },
205
- # ...
206
- # ]
207
- # }
208
- #
209
- # The value is +resources+.
210
- #
211
- # @param [Array<Hash>] resources
212
- def accumulate_listing resources
213
- return if resources.blank?
214
- self << self.class::RESOURCE_FIELDS unless skip_column_names?
215
- resources.each { |resource| accumulate(resource) }
216
- end
217
-
218
- # Accumulate lines for each of the error messages in +errors+.
219
- #
220
- # The structure of the response looks like
221
- #
222
- # {
223
- # 'errors' => [
224
- # "A title is required.",
225
- # "A description is required.",
226
- # ...
227
- # ]
228
- # }
229
- #
230
- # The value is +errors+.
231
- #
232
- # @param [Array] errors
233
- def accumulate_errors errors
234
- errors.each do |error|
235
- self << error
236
- end
237
- end
238
-
239
- # Accumulate a line for the given +message+.
240
- #
241
- # The structure of the response from the Infochimps Query API on
242
- # an error is:
243
- #
244
- # {
245
- # 'message' => "The error message returned"
246
- # }
247
- #
248
- # The value is +message+.
249
- #
250
- # @param [String] message
251
- def accumulate_message message
252
- self << message
253
- end
254
-
255
- # Accumulate lines for each of the batch responses in +batch+.
256
- #
257
- # The structure of the response looks like
258
- #
259
- # {
260
- # 'batch' => [
261
- # {
262
- # 'status' => 'created',
263
- # 'resource' => {
264
- # 'dataset' => {
265
- # 'id' => 39293,
266
- # 'title' => "My Awesome Dataset",
267
- # ...
268
- # },
269
- # },
270
- # 'errors' => nil,
271
- # 'local_paths' => [...] # this is totally optional
272
- # },
273
- # {
274
- # 'status' => 'invalid',
275
- # 'errors' => [
276
- # "A title is required.",
277
- # "A description is required."
278
- # ]
279
- # },
280
- # ...
281
- # ]
282
- # }
283
- #
284
- # The value is +batch+.
285
- def accumulate_batch batch
286
- self << ["Status", "Resource", "ID", "Errors"] unless skip_column_names?
287
- batch.each do |response|
288
- status = response['status']
289
- errors = response['errors']
290
- if response['resource'] && errors.blank?
291
- resource_type = response['resource'].keys.first
292
- resource = response['resource'][resource_type]
293
- id = resource['id']
294
- self << [status, resource_type, id]
295
- else
296
- self << ([status, nil, nil] + errors)
297
- end
298
- end
299
- end
300
-
301
- # Accumulate lines for the results in +search+.
302
- #
303
- # The structure of the response looks like
304
- #
305
- # {
306
- # 'search' => {
307
- # 'results' => [
308
- # { 'dataset' => {...} },
309
- # { 'dataset' => {...} },
310
- # ...
311
- # ]
312
- #
313
- # }
314
- # }
315
- #
316
- # The value keyed to +search+ is +search+.
317
- def accumulate_search search
318
- return if search['results'].blank?
319
- self << self.class::RESOURCE_FIELDS unless skip_column_names?
320
- search['results'].each { |resource| accumulate(resource) }
321
- end
322
-
323
- # Accumulate lines for the +api_account+.
324
- #
325
- # The structure of the response looks like
326
- #
327
- # { 'api_account' => {
328
- # 'api_key' => ...,
329
- # 'owner' => {
330
- # 'username' => 'Infochimps',
331
- # ...
332
- # },
333
- # 'updated_at' => ...,
334
- # ...
335
- # }
336
- # }
337
- #
338
- # The value is +api_account+
339
- def accumulate_api_account api_account
340
- # FIXME this is sort of ugly...
341
- self << "USERNAME: #{api_account['owner']['username']}"
342
- self << "API KEY: #{api_account['apikey']}"
343
- self << "LAST UPDATED: #{api_account['updated_at']}"
344
- end
345
-
346
- end
347
-
348
- end
349
-
@@ -1,48 +0,0 @@
1
- module Chimps
2
-
3
- # The Chimps logger. Set via Chimps::Config[:log] and defaults
4
- # to $stdout.
5
- #
6
- # @return [Logger]
7
- def self.log
8
- @log ||= Log.new_logger
9
- end
10
-
11
- # Set the Chimps logger.
12
- #
13
- # @param [Logger] new_log
14
- def self.log= new_log
15
- @log = new_log
16
- end
17
-
18
- # Module for initializing the Chimps logger from configuration
19
- # settings.
20
- module Log
21
-
22
- # Initialize a new Logger instance with the log level set by
23
- # Chimps.verbose?
24
- #
25
- # @return [Logger]
26
- def self.new_logger
27
- require 'logger'
28
- returning(Logger.new(log_file)) do |log|
29
- log.progname = "Chimps"
30
- log.level = Chimps.verbose? ? Logger::INFO : Logger::WARN
31
- end
32
- end
33
-
34
- # Return either the path to the log file in Chimps::Config[:log]
35
- # or $stdout if the path is blank or equal to `-'.
36
- #
37
- # @return [String, $stdout] the path to the log or $stdout
38
- def self.log_file
39
- if Chimps::Config[:log]
40
- Chimps::Config[:log].strip == '-' ? $stdout : Chimps::Config[:log]
41
- else
42
- $stdout
43
- end
44
- end
45
- end
46
- end
47
-
48
-
@@ -1,34 +0,0 @@
1
- module Chimps
2
- module Utils
3
- module UsesModel
4
-
5
- def model
6
- config[:model]
7
- end
8
-
9
- def plural_model
10
- if model[-1].chr == 'y'
11
- model[1..-1] + 'ies'
12
- else
13
- model + 's'
14
- end
15
- end
16
-
17
- def model_identifier
18
- raise CLIError.new("Must provide an ID or URL-escaped handle as the first argument") if config.argv.first.blank?
19
- config.argv.first
20
- end
21
-
22
- def models_path
23
- "#{plural_model}.json"
24
- end
25
-
26
- def model_path
27
- "#{plural_model}/#{model_identifier}.json"
28
- end
29
-
30
- end
31
- end
32
- end
33
-
34
-
@@ -1,93 +0,0 @@
1
- module Chimps
2
- module Utils
3
- module UsesYamlData
4
-
5
- def ignore_yaml_files_on_command_line
6
- false
7
- end
8
- def ignore_first_arg_on_command_line
9
- false
10
- end
11
-
12
- def data
13
- @data ||= merge_all(*(data_from_stdin + data_from_file + data_from_command_line)) || {}
14
- end
15
-
16
- protected
17
-
18
- def merge_all *objs
19
- objs.compact!
20
- return if objs.blank? # raising an error here is left to the caller
21
- klasses = objs.map(&:class).uniq
22
- raise CLIError.new("Mismatched YAML data types -- Hashes can only be combined with Hashes, Arrays with Arrays") if klasses.size > 1
23
- data_type = klasses.first.new
24
- case data_type
25
- when Array
26
- # greater precedence at the end so iterate in order
27
- returning([]) do |d|
28
- objs.each do |obj|
29
- d.concat(obj)
30
- end
31
- end
32
- when Hash
33
- # greater precedence at the end so iterate in order
34
- returning({}) do |d|
35
- objs.each do |obj|
36
- d.merge!(obj)
37
- end
38
- end
39
- else raise CLIError.new("Incompatible YAML data type #{data_type} -- can only combine Hashes and Arrays")
40
- end
41
- end
42
-
43
- def params_from_command_line
44
- returning([]) do |d|
45
- config.argv.each_with_index do |arg, index|
46
- next if index == 0 && ignore_first_arg_on_command_line
47
- next unless arg =~ /^(\w+) *=(.*)$/
48
- name, value = $1.downcase.to_sym, $2.strip
49
- d << { name => value } # always a hash
50
- end
51
- end
52
- end
53
-
54
- def yaml_files_from_command_line
55
- returning([]) do |d|
56
- config.argv.each_with_index do |arg, index|
57
- next if index == 0 && ignore_first_arg_on_command_line
58
- next if arg =~ /^(\w+) *=(.*)$/
59
- path = File.expand_path(arg)
60
- raise CLIError.new("No such path #{path}") unless File.exist?(path)
61
- d << YAML.load(open(path)) # either a hash or an array
62
- end
63
- end
64
- end
65
-
66
- def data_from_command_line
67
- if ignore_yaml_files_on_command_line
68
- params_from_command_line
69
- else
70
- yaml_files_from_command_line + params_from_command_line
71
- end
72
- end
73
-
74
- def data_from_file
75
- [config[:data_file] ? YAML.load_file(File.expand_path(config[:data_file])) : nil]
76
- end
77
-
78
- def data_from_stdin
79
- return [nil] unless $stdin.stat.size > 0
80
- returning([]) do |d|
81
- YAML.load_stream($stdin).each do |document|
82
- d << document
83
- end
84
- end
85
- end
86
-
87
- def ensure_data_is_present!
88
- raise CLIError.new("Must provide some data to send, either on the command line, from an input file, or by piping to STDIN. Try `chimps help #{name}'") unless data.present?
89
- end
90
-
91
- end
92
- end
93
- end
@@ -1,127 +0,0 @@
1
- module Chimps
2
- module Workflows
3
-
4
- # A class for performing batch updates/uploads to Infochimps.
5
- #
6
- # It works by taking YAML data describing many updates and
7
- # performing a single batch API request with this data.
8
- #
9
- # The batch response is then parsed and analyzed and (given
10
- # success or fearlessness) any necessary uploads are performed.
11
- #
12
- # Examples of the input data format can be found in the
13
- # <tt>/examples</tt> directory of the Chimps distribution.
14
- class BatchUpdater
15
-
16
- # The data used sent as a bulk update.
17
- attr_reader :data
18
-
19
- # The batch update response
20
- attr_reader :batch_response
21
-
22
- # The output file to store the bulk update response.
23
- attr_reader :output_path
24
-
25
- # Whether to upload even if there were errors on update.
26
- attr_reader :upload_even_if_errors
27
-
28
- # The data format to annotate the upload with.
29
- #
30
- # Chimps will try to guess if this isn't given.
31
- attr_reader :fmt
32
-
33
- # Create a new BatchUpdater with the given +data+ and +options+.
34
- #
35
- # The intermediate batch response can be saved at a file named
36
- # by <tt>:output_path</tt>, though this isn't necessary.
37
- #
38
- # @param [Array] data an array of resource updates
39
- # @param [Hash] options
40
- # @option options [String] output_path path to store the batch response
41
- # @option options [true, false] upload_even_if_errors whether to continue uploading in the presence of errors on update
42
- # @option options [String] fmt the data format to annotate each upload with (see `chimps upload')
43
- # @return [Chimps::Workflows::BatchUpdater]
44
- def initialize data, options={}
45
- @data = data
46
- @output_path = options[:output_path]
47
- @upload_even_if_errors = options[:upload_even_if_errors]
48
- @fmt = options[:fmt]
49
- end
50
-
51
- # The path to submit batch update requests.
52
- #
53
- # @return [String]
54
- def batch_path
55
- "batch.json"
56
- end
57
-
58
- # Perform this batch update followed by the batch upload.
59
- def execute!
60
- batch_update!
61
- batch_upload!
62
- end
63
-
64
- # Perform the batch update.
65
- def batch_update!
66
- @batch_response = Request.new(batch_path, :data => { :batch => data }, :authenticate => true).post
67
- File.open(output_path, 'w') { |f| f.puts batch_response.body } if output_path
68
- batch_response.print
69
- end
70
-
71
- # Were any of the updates performed during the batch update
72
- # errors?
73
- #
74
- # @return [true, false]
75
- def error?
76
- batch_response['batch'].each do |response|
77
- status = response['status']
78
- return true unless ['created', 'updated'].include?(status)
79
- end
80
- false
81
- end
82
-
83
- # Did all of the updates performed in the batch update succeed?
84
- #
85
- # @return [true, false]
86
- def success?
87
- ! error?
88
- end
89
-
90
- # Perform the batch upload.
91
- #
92
- # Will bail if the batch update had an error unless
93
- # Chimps::Workflows::BatchUpdater#upload_even_if_errors returns
94
- # true.
95
- def batch_upload!
96
- return unless success? || upload_even_if_errors
97
- $stderr.puts("WARNING: continuing with uploads even though there were errors") unless success?
98
- dataset_ids_and_local_paths.each do |id, local_paths|
99
- Chimps::Workflows::Uploader.new(:dataset => id, :local_paths => local_paths, :fmt => fmt).execute!
100
- end
101
- end
102
-
103
- protected
104
- # Iterate through the batch response and return tuples
105
- # consisting of an ID and an array of of local paths to upload.
106
- #
107
- # Only datasets which were successfully created/updated,
108
- # returned an ID, and had local_paths defined in the original
109
- # batch update will be output.
110
- #
111
- # @return [Array<Array>]
112
- def dataset_ids_and_local_paths
113
- batch_response['batch'].map do |response|
114
- status = response['status']
115
- next unless (status == 'created' || status == 'updated') # skip errors
116
- next unless dataset = response['resource']['dataset'] # skip unless it's a dataset
117
- id = dataset['id']
118
- next if id.blank? # skip unless it has an ID
119
- local_paths = response['local_paths']
120
- next if local_paths.blank? # skip unless local_paths were defined
121
- [id, local_paths]
122
- end.compact
123
- end
124
- end
125
- end
126
- end
127
-