chimps 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. data/Gemfile +3 -9
  2. data/Gemfile.lock +14 -10
  3. data/README.rdoc +146 -240
  4. data/Rakefile +4 -33
  5. data/VERSION +1 -1
  6. data/lib/chimps/config.rb +35 -21
  7. data/lib/chimps/{utils/error.rb → error.rb} +1 -12
  8. data/lib/chimps/query_request.rb +67 -0
  9. data/lib/chimps/request.rb +82 -108
  10. data/lib/chimps/response.rb +62 -22
  11. data/lib/chimps/utils/typewriter.rb +90 -0
  12. data/lib/chimps/utils/uses_curl.rb +22 -12
  13. data/lib/chimps/utils.rb +50 -6
  14. data/lib/chimps/workflows/download.rb +72 -0
  15. data/lib/chimps/workflows/upload.rb +113 -0
  16. data/lib/chimps.rb +12 -12
  17. data/spec/chimps/query_request_spec.rb +44 -0
  18. data/spec/chimps/request_spec.rb +92 -0
  19. data/spec/chimps/response_spec.rb +0 -1
  20. data/spec/chimps/workflows/download_spec.rb +48 -0
  21. data/spec/spec_helper.rb +2 -19
  22. metadata +46 -91
  23. data/.document +0 -5
  24. data/.gitignore +0 -32
  25. data/CHANGELOG.textile +0 -4
  26. data/bin/chimps +0 -5
  27. data/lib/chimps/cli.rb +0 -28
  28. data/lib/chimps/commands/base.rb +0 -65
  29. data/lib/chimps/commands/batch.rb +0 -40
  30. data/lib/chimps/commands/create.rb +0 -31
  31. data/lib/chimps/commands/destroy.rb +0 -26
  32. data/lib/chimps/commands/download.rb +0 -46
  33. data/lib/chimps/commands/help.rb +0 -100
  34. data/lib/chimps/commands/list.rb +0 -41
  35. data/lib/chimps/commands/query.rb +0 -82
  36. data/lib/chimps/commands/search.rb +0 -48
  37. data/lib/chimps/commands/show.rb +0 -30
  38. data/lib/chimps/commands/test.rb +0 -39
  39. data/lib/chimps/commands/update.rb +0 -34
  40. data/lib/chimps/commands/upload.rb +0 -50
  41. data/lib/chimps/commands.rb +0 -125
  42. data/lib/chimps/typewriter.rb +0 -349
  43. data/lib/chimps/utils/log.rb +0 -48
  44. data/lib/chimps/utils/uses_model.rb +0 -34
  45. data/lib/chimps/utils/uses_yaml_data.rb +0 -93
  46. data/lib/chimps/workflows/batch.rb +0 -127
  47. data/lib/chimps/workflows/downloader.rb +0 -102
  48. data/lib/chimps/workflows/up.rb +0 -149
  49. data/lib/chimps/workflows/upload/bundler.rb +0 -249
  50. data/lib/chimps/workflows/upload/notifier.rb +0 -59
  51. data/lib/chimps/workflows/upload/token.rb +0 -77
  52. data/lib/chimps/workflows/upload/uploader.rb +0 -51
  53. data/lib/chimps/workflows.rb +0 -12
  54. data/spec/chimps/typewriter_spec.rb +0 -114
  55. data/spec/chimps/workflows/upload/bundler_spec.rb +0 -75
  56. data/spec/chimps/workflows/upload/token_spec.rb +0 -6
@@ -1,349 +0,0 @@
1
- module Chimps
2
-
3
- # Responses from Infochimps (once parsed from the original JSON or
4
- # YAML) consist of nested hashes:
5
- #
6
- # { 'dataset' => {
7
- # 'title' => 'My dataset',
8
- # 'description' => 'An amazing dataset which...',
9
- # ...
10
- # 'sources' => {
11
- # 'source' => {
12
- # 'title' => 'Trustworthy Source'
13
- # ...
14
- # },
15
- # 'source' => {..},
16
- # ...
17
- # }
18
- # },
19
- # ...
20
- # }
21
- #
22
- # This class utilizes a typewriter and a team of trained chimpanizes
23
- # to create pretty, line-oriented output from these hashes.
24
- class Typewriter < Array
25
-
26
- # The response that this Typewriter will print.
27
- attr_accessor :response
28
-
29
- # Widths of columns as determined by the maximum number of
30
- # characters in any row.
31
- attr_accessor :column_widths
32
-
33
- # Fields to print for each resource. Given as humanized names,
34
- # will be automatically converted to key names.
35
- RESOURCE_FIELDS = ["ID", "Cached Slug", "Updated At", "Title"]
36
-
37
- # String to insert between fields in output.
38
- FIELD_SEPARATOR = " "
39
-
40
- # Return a Typewriter to print +data+.
41
- #
42
- # @param [Chimps::Response] response
43
- # @return [Chimps::Typewriter]
44
- def initialize response, options={}
45
- super()
46
- @response = response
47
- @column_widths = []
48
- @skip_column_names = options[:skip_column_names]
49
- accumulate(response)
50
- end
51
-
52
- # Print column names as well as values?
53
- #
54
- # @return [true, nil]
55
- def skip_column_names?
56
- @skip_column_names
57
- end
58
-
59
- # Print the accumulated lines in this Typewriter to the given
60
- # +output+ (defaults to <tt>$stdout</tt>).
61
- #
62
- # Will first calculate appropriate column widths for any
63
- # Array-like lines.
64
- #
65
- # @param [#puts] output
66
- def print output=$stdout
67
- calculate_column_widths!
68
- each do |line|
69
- if line.is_a?(Array)
70
- output.puts pad_and_join(line)
71
- else
72
- output.puts line
73
- end
74
- end
75
- end
76
-
77
- # Accumulate lines to print from +obj+.
78
- #
79
- # If +obj+ is a string then it will be accumulated as a single
80
- # line to print.
81
- #
82
- # If +obj+ is an Array then each element will be passed to
83
- # Chimps::Typewriter#accumulate.
84
- #
85
- # If +obj+ is a Hash then each key will be mapped to a method
86
- # <tt>accumulate_KEY</tt> and the corresponding value passed in.
87
- # This method is responsible for accumulating lines to print.
88
- #
89
- # @param [Array, Hash, String] obj
90
- def accumulate obj
91
- case obj
92
- when Hash
93
- obj.each_pair do |resource_name, resource_data|
94
- case
95
- when %w[datasets sources licenses].include?(resource_name.to_s)
96
- accumulate_listing(resource_data)
97
- when %w[dataset source license].include?(resource_name.to_s)
98
- accumulate_resource(resource_name, resource_data)
99
- when %w[errors batch search api_account message].include?(resource_name.to_s)
100
- send("accumulate_#{resource_name}", resource_data)
101
- when %w[message].include?(resource_name.to_s)
102
- self << [resource_data]
103
- when %w[error].include?(resource_name.to_s)
104
- nil
105
- when :array == resource_name # constructed by Chimps::Response
106
- accumulate_listing(resource_data)
107
- when :string == resource_name # constructed by Chimps::Response
108
- self << obj[:string]
109
- else
110
- $stderr.puts resource_data.inspect if Chimps.verbose?
111
- raise PrintingError.new("Unrecognized resource type `#{resource_name}'.")
112
- end
113
- end
114
- when Array
115
- obj.each { |element| accumulate(element) }
116
- when String
117
- self << obj
118
- else
119
- raise PrintingError.new("Cannot print a #{obj.class}")
120
- end
121
- end
122
-
123
- protected
124
-
125
- # Loop through the accumulated lines, finding the maximum widths
126
- # of each element in each Array-like line.
127
- def calculate_column_widths!
128
- each do |line|
129
- next unless line.is_a?(Array) # don't try to align strings
130
- line.each_with_index do |value, field|
131
- current_max_width = column_widths[field]
132
- unless current_max_width
133
- current_max_width = 0
134
- column_widths << current_max_width
135
- end
136
- value_size = value.to_s.size
137
- column_widths[field] = value_size if value_size > current_max_width
138
- end
139
- end
140
- end
141
-
142
- # Return a string with +values+ joined by FIELD_SEPARATOR each
143
- # padded to the corresponding maximum column size.
144
- #
145
- # Must have called Chimps::Typewriter#calculate_column_widths!
146
- # first.
147
- #
148
- # @param [Array] values
149
- # @return [String]
150
- def pad_and_join values
151
- returning([]) do |padded_values|
152
- values.each_with_index do |value, field|
153
- max_width = column_widths[field]
154
- value_width = value.to_s.size
155
- padded_values << value.to_s + (' ' * (max_width - value_width))
156
- end
157
- end.join(FIELD_SEPARATOR)
158
- end
159
-
160
- # Accumulate lines for the given +resource_name+ from the given
161
- # +resource_data+.
162
- #
163
- # Fields to accumulate in each line are set in
164
- # Chimps::Typewriter::RESOURCE_FIELDS.
165
- #
166
- # The structure of the response for a resource looks like:
167
- #
168
- # {
169
- # 'dataset' => {
170
- # 'id' => 39293,
171
- # 'title' => 'My Awesome Dataset',
172
- # ...
173
- # }
174
- # }
175
- #
176
- # The key is +resource_name+ and the value is +resource_data+.
177
- #
178
- # @param [String] resource_name
179
- # @param [Hash] resource_data
180
- def accumulate_resource resource_name, resource_data
181
- self << self.class::RESOURCE_FIELDS.map { |field_name| resource_data[field_name.downcase.tr(' ', '_')] }
182
- end
183
-
184
- # Accumulate lines for each of the +resources+, all of the given
185
- # +type+.
186
- #
187
- # The structure of the response for a listing looks like:
188
- #
189
- # {
190
- # 'datasets' => [
191
- # {
192
- # 'dataset' => {
193
- # 'id' => 39293,
194
- # 'title' => 'My Awesome Dataset',
195
- # ...
196
- # },
197
- # },
198
- # {
199
- # 'dataset' => {
200
- # 'id' => 28998,
201
- # 'title' => 'My Other Awesome Dataset',
202
- # ...
203
- # },
204
- # },
205
- # ...
206
- # ]
207
- # }
208
- #
209
- # The value is +resources+.
210
- #
211
- # @param [Array<Hash>] resources
212
- def accumulate_listing resources
213
- return if resources.blank?
214
- self << self.class::RESOURCE_FIELDS unless skip_column_names?
215
- resources.each { |resource| accumulate(resource) }
216
- end
217
-
218
- # Accumulate lines for each of the error messages in +errors+.
219
- #
220
- # The structure of the response looks like
221
- #
222
- # {
223
- # 'errors' => [
224
- # "A title is required.",
225
- # "A description is required.",
226
- # ...
227
- # ]
228
- # }
229
- #
230
- # The value is +errors+.
231
- #
232
- # @param [Array] errors
233
- def accumulate_errors errors
234
- errors.each do |error|
235
- self << error
236
- end
237
- end
238
-
239
- # Accumulate a line for the given +message+.
240
- #
241
- # The structure of the response from the Infochimps Query API on
242
- # an error is:
243
- #
244
- # {
245
- # 'message' => "The error message returned"
246
- # }
247
- #
248
- # The value is +message+.
249
- #
250
- # @param [String] message
251
- def accumulate_message message
252
- self << message
253
- end
254
-
255
- # Accumulate lines for each of the batch responses in +batch+.
256
- #
257
- # The structure of the response looks like
258
- #
259
- # {
260
- # 'batch' => [
261
- # {
262
- # 'status' => 'created',
263
- # 'resource' => {
264
- # 'dataset' => {
265
- # 'id' => 39293,
266
- # 'title' => "My Awesome Dataset",
267
- # ...
268
- # },
269
- # },
270
- # 'errors' => nil,
271
- # 'local_paths' => [...] # this is totally optional
272
- # },
273
- # {
274
- # 'status' => 'invalid',
275
- # 'errors' => [
276
- # "A title is required.",
277
- # "A description is required."
278
- # ]
279
- # },
280
- # ...
281
- # ]
282
- # }
283
- #
284
- # The value is +batch+.
285
- def accumulate_batch batch
286
- self << ["Status", "Resource", "ID", "Errors"] unless skip_column_names?
287
- batch.each do |response|
288
- status = response['status']
289
- errors = response['errors']
290
- if response['resource'] && errors.blank?
291
- resource_type = response['resource'].keys.first
292
- resource = response['resource'][resource_type]
293
- id = resource['id']
294
- self << [status, resource_type, id]
295
- else
296
- self << ([status, nil, nil] + errors)
297
- end
298
- end
299
- end
300
-
301
- # Accumulate lines for the results in +search+.
302
- #
303
- # The structure of the response looks like
304
- #
305
- # {
306
- # 'search' => {
307
- # 'results' => [
308
- # { 'dataset' => {...} },
309
- # { 'dataset' => {...} },
310
- # ...
311
- # ]
312
- #
313
- # }
314
- # }
315
- #
316
- # The value keyed to +search+ is +search+.
317
- def accumulate_search search
318
- return if search['results'].blank?
319
- self << self.class::RESOURCE_FIELDS unless skip_column_names?
320
- search['results'].each { |resource| accumulate(resource) }
321
- end
322
-
323
- # Accumulate lines for the +api_account+.
324
- #
325
- # The structure of the response looks like
326
- #
327
- # { 'api_account' => {
328
- # 'api_key' => ...,
329
- # 'owner' => {
330
- # 'username' => 'Infochimps',
331
- # ...
332
- # },
333
- # 'updated_at' => ...,
334
- # ...
335
- # }
336
- # }
337
- #
338
- # The value is +api_account+
339
- def accumulate_api_account api_account
340
- # FIXME this is sort of ugly...
341
- self << "USERNAME: #{api_account['owner']['username']}"
342
- self << "API KEY: #{api_account['apikey']}"
343
- self << "LAST UPDATED: #{api_account['updated_at']}"
344
- end
345
-
346
- end
347
-
348
- end
349
-
@@ -1,48 +0,0 @@
1
- module Chimps
2
-
3
- # The Chimps logger. Set via Chimps::Config[:log] and defaults
4
- # to $stdout.
5
- #
6
- # @return [Logger]
7
- def self.log
8
- @log ||= Log.new_logger
9
- end
10
-
11
- # Set the Chimps logger.
12
- #
13
- # @param [Logger] new_log
14
- def self.log= new_log
15
- @log = new_log
16
- end
17
-
18
- # Module for initializing the Chimps logger from configuration
19
- # settings.
20
- module Log
21
-
22
- # Initialize a new Logger instance with the log level set by
23
- # Chimps.verbose?
24
- #
25
- # @return [Logger]
26
- def self.new_logger
27
- require 'logger'
28
- returning(Logger.new(log_file)) do |log|
29
- log.progname = "Chimps"
30
- log.level = Chimps.verbose? ? Logger::INFO : Logger::WARN
31
- end
32
- end
33
-
34
- # Return either the path to the log file in Chimps::Config[:log]
35
- # or $stdout if the path is blank or equal to `-'.
36
- #
37
- # @return [String, $stdout] the path to the log or $stdout
38
- def self.log_file
39
- if Chimps::Config[:log]
40
- Chimps::Config[:log].strip == '-' ? $stdout : Chimps::Config[:log]
41
- else
42
- $stdout
43
- end
44
- end
45
- end
46
- end
47
-
48
-
@@ -1,34 +0,0 @@
1
- module Chimps
2
- module Utils
3
- module UsesModel
4
-
5
- def model
6
- config[:model]
7
- end
8
-
9
- def plural_model
10
- if model[-1].chr == 'y'
11
- model[1..-1] + 'ies'
12
- else
13
- model + 's'
14
- end
15
- end
16
-
17
- def model_identifier
18
- raise CLIError.new("Must provide an ID or URL-escaped handle as the first argument") if config.argv.first.blank?
19
- config.argv.first
20
- end
21
-
22
- def models_path
23
- "#{plural_model}.json"
24
- end
25
-
26
- def model_path
27
- "#{plural_model}/#{model_identifier}.json"
28
- end
29
-
30
- end
31
- end
32
- end
33
-
34
-
@@ -1,93 +0,0 @@
1
- module Chimps
2
- module Utils
3
- module UsesYamlData
4
-
5
- def ignore_yaml_files_on_command_line
6
- false
7
- end
8
- def ignore_first_arg_on_command_line
9
- false
10
- end
11
-
12
- def data
13
- @data ||= merge_all(*(data_from_stdin + data_from_file + data_from_command_line)) || {}
14
- end
15
-
16
- protected
17
-
18
- def merge_all *objs
19
- objs.compact!
20
- return if objs.blank? # raising an error here is left to the caller
21
- klasses = objs.map(&:class).uniq
22
- raise CLIError.new("Mismatched YAML data types -- Hashes can only be combined with Hashes, Arrays with Arrays") if klasses.size > 1
23
- data_type = klasses.first.new
24
- case data_type
25
- when Array
26
- # greater precedence at the end so iterate in order
27
- returning([]) do |d|
28
- objs.each do |obj|
29
- d.concat(obj)
30
- end
31
- end
32
- when Hash
33
- # greater precedence at the end so iterate in order
34
- returning({}) do |d|
35
- objs.each do |obj|
36
- d.merge!(obj)
37
- end
38
- end
39
- else raise CLIError.new("Incompatible YAML data type #{data_type} -- can only combine Hashes and Arrays")
40
- end
41
- end
42
-
43
- def params_from_command_line
44
- returning([]) do |d|
45
- config.argv.each_with_index do |arg, index|
46
- next if index == 0 && ignore_first_arg_on_command_line
47
- next unless arg =~ /^(\w+) *=(.*)$/
48
- name, value = $1.downcase.to_sym, $2.strip
49
- d << { name => value } # always a hash
50
- end
51
- end
52
- end
53
-
54
- def yaml_files_from_command_line
55
- returning([]) do |d|
56
- config.argv.each_with_index do |arg, index|
57
- next if index == 0 && ignore_first_arg_on_command_line
58
- next if arg =~ /^(\w+) *=(.*)$/
59
- path = File.expand_path(arg)
60
- raise CLIError.new("No such path #{path}") unless File.exist?(path)
61
- d << YAML.load(open(path)) # either a hash or an array
62
- end
63
- end
64
- end
65
-
66
- def data_from_command_line
67
- if ignore_yaml_files_on_command_line
68
- params_from_command_line
69
- else
70
- yaml_files_from_command_line + params_from_command_line
71
- end
72
- end
73
-
74
- def data_from_file
75
- [config[:data_file] ? YAML.load_file(File.expand_path(config[:data_file])) : nil]
76
- end
77
-
78
- def data_from_stdin
79
- return [nil] unless $stdin.stat.size > 0
80
- returning([]) do |d|
81
- YAML.load_stream($stdin).each do |document|
82
- d << document
83
- end
84
- end
85
- end
86
-
87
- def ensure_data_is_present!
88
- raise CLIError.new("Must provide some data to send, either on the command line, from an input file, or by piping to STDIN. Try `chimps help #{name}'") unless data.present?
89
- end
90
-
91
- end
92
- end
93
- end
@@ -1,127 +0,0 @@
1
- module Chimps
2
- module Workflows
3
-
4
- # A class for performing batch updates/uploads to Infochimps.
5
- #
6
- # It works by taking YAML data describing many updates and
7
- # performing a single batch API request with this data.
8
- #
9
- # The batch response is then parsed and analyzed and (given
10
- # success or fearlessness) any necessary uploads are performed.
11
- #
12
- # Examples of the input data format can be found in the
13
- # <tt>/examples</tt> directory of the Chimps distribution.
14
- class BatchUpdater
15
-
16
- # The data used sent as a bulk update.
17
- attr_reader :data
18
-
19
- # The batch update response
20
- attr_reader :batch_response
21
-
22
- # The output file to store the bulk update response.
23
- attr_reader :output_path
24
-
25
- # Whether to upload even if there were errors on update.
26
- attr_reader :upload_even_if_errors
27
-
28
- # The data format to annotate the upload with.
29
- #
30
- # Chimps will try to guess if this isn't given.
31
- attr_reader :fmt
32
-
33
- # Create a new BatchUpdater with the given +data+ and +options+.
34
- #
35
- # The intermediate batch response can be saved at a file named
36
- # by <tt>:output_path</tt>, though this isn't necessary.
37
- #
38
- # @param [Array] data an array of resource updates
39
- # @param [Hash] options
40
- # @option options [String] output_path path to store the batch response
41
- # @option options [true, false] upload_even_if_errors whether to continue uploading in the presence of errors on update
42
- # @option options [String] fmt the data format to annotate each upload with (see `chimps upload')
43
- # @return [Chimps::Workflows::BatchUpdater]
44
- def initialize data, options={}
45
- @data = data
46
- @output_path = options[:output_path]
47
- @upload_even_if_errors = options[:upload_even_if_errors]
48
- @fmt = options[:fmt]
49
- end
50
-
51
- # The path to submit batch update requests.
52
- #
53
- # @return [String]
54
- def batch_path
55
- "batch.json"
56
- end
57
-
58
- # Perform this batch update followed by the batch upload.
59
- def execute!
60
- batch_update!
61
- batch_upload!
62
- end
63
-
64
- # Perform the batch update.
65
- def batch_update!
66
- @batch_response = Request.new(batch_path, :data => { :batch => data }, :authenticate => true).post
67
- File.open(output_path, 'w') { |f| f.puts batch_response.body } if output_path
68
- batch_response.print
69
- end
70
-
71
- # Were any of the updates performed during the batch update
72
- # errors?
73
- #
74
- # @return [true, false]
75
- def error?
76
- batch_response['batch'].each do |response|
77
- status = response['status']
78
- return true unless ['created', 'updated'].include?(status)
79
- end
80
- false
81
- end
82
-
83
- # Did all of the updates performed in the batch update succeed?
84
- #
85
- # @return [true, false]
86
- def success?
87
- ! error?
88
- end
89
-
90
- # Perform the batch upload.
91
- #
92
- # Will bail if the batch update had an error unless
93
- # Chimps::Workflows::BatchUpdater#upload_even_if_errors returns
94
- # true.
95
- def batch_upload!
96
- return unless success? || upload_even_if_errors
97
- $stderr.puts("WARNING: continuing with uploads even though there were errors") unless success?
98
- dataset_ids_and_local_paths.each do |id, local_paths|
99
- Chimps::Workflows::Uploader.new(:dataset => id, :local_paths => local_paths, :fmt => fmt).execute!
100
- end
101
- end
102
-
103
- protected
104
- # Iterate through the batch response and return tuples
105
- # consisting of an ID and an array of of local paths to upload.
106
- #
107
- # Only datasets which were successfully created/updated,
108
- # returned an ID, and had local_paths defined in the original
109
- # batch update will be output.
110
- #
111
- # @return [Array<Array>]
112
- def dataset_ids_and_local_paths
113
- batch_response['batch'].map do |response|
114
- status = response['status']
115
- next unless (status == 'created' || status == 'updated') # skip errors
116
- next unless dataset = response['resource']['dataset'] # skip unless it's a dataset
117
- id = dataset['id']
118
- next if id.blank? # skip unless it has an ID
119
- local_paths = response['local_paths']
120
- next if local_paths.blank? # skip unless local_paths were defined
121
- [id, local_paths]
122
- end.compact
123
- end
124
- end
125
- end
126
- end
127
-