chimps 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/.gitignore +17 -0
  2. data/LICENSE +674 -0
  3. data/README.rdoc +48 -0
  4. data/VERSION +1 -0
  5. data/bin/chimps +4 -0
  6. data/examples/batch.yaml +69 -0
  7. data/lib/chimps/cli.rb +102 -0
  8. data/lib/chimps/commands/base.rb +107 -0
  9. data/lib/chimps/commands/batch.rb +68 -0
  10. data/lib/chimps/commands/create.rb +33 -0
  11. data/lib/chimps/commands/destroy.rb +28 -0
  12. data/lib/chimps/commands/download.rb +76 -0
  13. data/lib/chimps/commands/help.rb +89 -0
  14. data/lib/chimps/commands/list.rb +54 -0
  15. data/lib/chimps/commands/query.rb +59 -0
  16. data/lib/chimps/commands/search.rb +59 -0
  17. data/lib/chimps/commands/show.rb +32 -0
  18. data/lib/chimps/commands/test.rb +40 -0
  19. data/lib/chimps/commands/update.rb +33 -0
  20. data/lib/chimps/commands/upload.rb +63 -0
  21. data/lib/chimps/commands.rb +46 -0
  22. data/lib/chimps/config.rb +57 -0
  23. data/lib/chimps/request.rb +302 -0
  24. data/lib/chimps/response.rb +146 -0
  25. data/lib/chimps/typewriter.rb +326 -0
  26. data/lib/chimps/utils/error.rb +40 -0
  27. data/lib/chimps/utils/extensions.rb +109 -0
  28. data/lib/chimps/utils/uses_curl.rb +26 -0
  29. data/lib/chimps/utils/uses_model.rb +51 -0
  30. data/lib/chimps/utils/uses_yaml_data.rb +94 -0
  31. data/lib/chimps/utils.rb +11 -0
  32. data/lib/chimps/workflows/batch.rb +127 -0
  33. data/lib/chimps/workflows/downloader.rb +102 -0
  34. data/lib/chimps/workflows/uploader.rb +238 -0
  35. data/lib/chimps/workflows.rb +11 -0
  36. data/lib/chimps.rb +22 -0
  37. data/spec/chimps/cli_spec.rb +22 -0
  38. data/spec/chimps/commands/base_spec.rb +25 -0
  39. data/spec/chimps/commands/list_spec.rb +25 -0
  40. data/spec/chimps/response_spec.rb +8 -0
  41. data/spec/chimps/typewriter_spec.rb +114 -0
  42. data/spec/spec_helper.rb +17 -0
  43. data/spec/support/custom_matchers.rb +6 -0
  44. metadata +133 -0
@@ -0,0 +1,146 @@
1
+ module Chimps
2
+
3
+ # A class to wrap responses from the Infochimps API.
4
+ class Response < Hash
5
+
6
+ # The response body.
7
+ attr_reader :body
8
+
9
+ # The error message for this response, if it was an error.
10
+ #
11
+ # This is actually generated within RestClient from the HTTP
12
+ # status code and attached to the response. It is passed in when
13
+ # initializing a Chimps::Response by a Chimps::Request.
14
+ attr_reader :error
15
+
16
+ # Return a response built from a String with the
17
+ # RestClient::Response module mixed-in.
18
+ #
19
+ # If <tt>:error</tt> is passed then this response is is considered
20
+ # an error with the given message.
21
+ #
22
+ # @param [String, #to_i, #headers] body
23
+ # @param [Hash] options
24
+ # @option options [String] error the error message
25
+ # @return [Chimps::Response]
26
+ def initialize body, options={}
27
+ super()
28
+ @body = body
29
+ @error = options[:error]
30
+ parse!
31
+ end
32
+
33
+ # The HTTP status code of the response.
34
+ #
35
+ # @return [Integer]
36
+ def code
37
+ @code ||= body.to_i
38
+ end
39
+
40
+ # The HTTP headers of the response.
41
+ #
42
+ # @return [Hash]
43
+ def headers
44
+ @headers ||= body.headers
45
+ end
46
+
47
+ # The <tt>Content-type</tt> of the response.
48
+ #
49
+ # Will return <tt>:yaml</tt> or <tt>:json</tt> if possible, else
50
+ # just the raw <tt>Content-type</tt>.
51
+ #
52
+ # @return [Symbol, String]
53
+ def content_type
54
+ @content_type ||= case headers[:content_type]
55
+ when /json/ then :json
56
+ when /yaml/ then :yaml
57
+ else headers[:content_type]
58
+ end
59
+ end
60
+
61
+ # Parse the response from Infochimps.
62
+ def parse!
63
+ data = parse_response_body
64
+ case data
65
+ # hack...sometimes we get back an array instead of a
66
+ # hash...should change the API at Chimps end
67
+ when Hash then merge!(data)
68
+ when Array then self[:array] = data # see Chimps::Typewriter#accumulate
69
+ when String then self[:string] = data
70
+ end
71
+ end
72
+
73
+ # Was this response a success?
74
+ #
75
+ # @return [true, false]
76
+ def success?
77
+ ! error?
78
+ end
79
+
80
+ # Was this response an error??
81
+ #
82
+ # @return [true, false]
83
+ def error?
84
+ !! @error
85
+ end
86
+
87
+ # Print this response.
88
+ #
89
+ # Will also print a diagnostic line if Chimps is verbose or this
90
+ # response was an error.
91
+ #
92
+ # @param [Hash] options
93
+ # @option options [true, nil] skip_column_names (nil) Don't print column names in output.
94
+ def print options={}
95
+ puts diagnostic_line if Chimps.verbose? || error?
96
+ Typewriter.new(self, options).print
97
+ end
98
+
99
+ protected
100
+
101
+ # Construct and return a line of diagnostic information on this
102
+ # response.
103
+ #
104
+ # @return [String]
105
+ def diagnostic_line
106
+ line = "#{code.to_s} -- "
107
+ line += (success? ? "SUCCESS" : error)
108
+ line
109
+ end
110
+
111
+ # Raise a Chimps::ParseError, optionally including the response
112
+ # body in the error message if Chimps is verbose.
113
+ def parse_error!
114
+ message = Chimps.verbose? ? "#{diagnostic_line}\n\n#{body}" : diagnostic_line
115
+ raise ParseError.new(message)
116
+ end
117
+
118
+ # Parse the body of this response using the YAML or JSON libraries
119
+ # into a Ruby data structure.
120
+ #
121
+ # @return [Hash, Array, String]
122
+ def parse_response_body
123
+ return {} if body.blank? || body == 'null'
124
+ if content_type == :yaml
125
+ require 'yaml'
126
+ begin
127
+ YAML.parse(body)
128
+ rescue YAML::ParseError => e
129
+ parse_error!
130
+ rescue ArgumentError => e # WHY does YAML return an ArgumentError on malformed input...?
131
+ @error = "Response was received but was malformed"
132
+ parse_error!
133
+ end
134
+ else
135
+ require 'json'
136
+ begin
137
+ JSON.parse(body)
138
+ rescue JSON::ParserError => e
139
+ parse_error!
140
+ end
141
+ end
142
+ end
143
+
144
+ end
145
+ end
146
+
@@ -0,0 +1,326 @@
1
+ module Chimps
2
+
3
+ # Responses from Infochimps (once parsed from the original JSON or
4
+ # YAML) consist of nested hashes:
5
+ #
6
+ # { 'dataset' => {
7
+ # 'title' => 'My dataset',
8
+ # 'description' => 'An amazing dataset which...',
9
+ # ...
10
+ # 'sources' => {
11
+ # 'source' => {
12
+ # 'title' => 'Trustworthy Source'
13
+ # ...
14
+ # },
15
+ # 'source' => {..},
16
+ # ...
17
+ # }
18
+ # },
19
+ # ...
20
+ # }
21
+ #
22
+ # This class utilizes a typewriter and a team of trained chimpanizes
23
+ # to create pretty, line-oriented output from these hashes.
24
+ class Typewriter < Array
25
+
26
+ # The response that this Typewriter will print.
27
+ attr_accessor :response
28
+
29
+ # Widths of columns as determined by the maximum number of
30
+ # characters in any row.
31
+ attr_accessor :column_widths
32
+
33
+ # Fields to print for each resource. Given as humanized names,
34
+ # will be automatically converted to key names.
35
+ RESOURCE_FIELDS = ["ID", "Cached Slug", "Updated At", "Title"]
36
+
37
+ # String to insert between fields in output.
38
+ FIELD_SEPARATOR = " "
39
+
40
+ # Return a Typewriter to print +data+.
41
+ #
42
+ # @param [Chimps::Response] response
43
+ # @return [Chimps::Typewriter]
44
+ def initialize response, options={}
45
+ super()
46
+ @response = response
47
+ @column_widths = []
48
+ @skip_column_names = options[:skip_column_names]
49
+ accumulate(response)
50
+ end
51
+
52
+ # Print column names as well as values?
53
+ #
54
+ # @return [true, nil]
55
+ def skip_column_names?
56
+ @skip_column_names
57
+ end
58
+
59
+ # Print the accumulated lines in this Typewriter.
60
+ #
61
+ # Will first calculate appropriate column widths for any
62
+ # Array-like lines.
63
+ def print
64
+ calculate_column_widths!
65
+ each do |line|
66
+ if line.is_a?(Array)
67
+ puts pad_and_join(line)
68
+ else
69
+ puts line
70
+ end
71
+ end
72
+ end
73
+
74
+ # Accumulate lines to print from +obj+.
75
+ #
76
+ # If +obj+ is a string then it will be accumulated as a single
77
+ # line to print.
78
+ #
79
+ # If +obj+ is an Array then each element will be passed to
80
+ # Chimps::Typewriter#accumulate.
81
+ #
82
+ # If +obj+ is a Hash then each key will be mapped to a method
83
+ # <tt>accumulate_KEY</tt> and the corresponding value passed in.
84
+ # This method is responsible for accumulating lines to print.
85
+ #
86
+ # @param [Array, Hash, String] obj
87
+ def accumulate obj
88
+ case obj
89
+ when Hash
90
+ obj.each_pair do |resource_name, resource_data|
91
+ case
92
+ when %w[datasets sources licenses].include?(resource_name.to_s)
93
+ accumulate_listing(resource_data)
94
+ when %w[dataset source license].include?(resource_name.to_s)
95
+ accumulate_resource(resource_name, resource_data)
96
+ when %w[errors batch search api_account].include?(resource_name.to_s)
97
+ send("accumulate_#{resource_name}", resource_data)
98
+ when :array == resource_name # constructed by Chimps::Response
99
+ accumulate_listing(resource_data)
100
+ when :string == resource_name # constructed by Chimps::Response
101
+ self << obj[:string]
102
+ else
103
+ $stderr.puts resource_data.inspect if Chimps.verbose?
104
+ raise PrintingError.new("Unrecognized resource type `#{resource_name}'.")
105
+ end
106
+ end
107
+ when Array
108
+ obj.each { |element| accumulate(element) }
109
+ when String
110
+ self << obj
111
+ else
112
+ raise PrintingError.new("Cannot print a #{obj.class}")
113
+ end
114
+ end
115
+
116
+ protected
117
+
118
+ # Loop through the accumulated lines, finding the maximum widths
119
+ # of each element in each Array-like line.
120
+ def calculate_column_widths!
121
+ each do |line|
122
+ next unless line.is_a?(Array) # don't try to align strings
123
+ line.each_with_index do |value, field|
124
+ current_max_width = column_widths[field]
125
+ unless current_max_width
126
+ current_max_width = 0
127
+ column_widths << current_max_width
128
+ end
129
+ value_size = value.to_s.size
130
+ column_widths[field] = value_size if value_size > current_max_width
131
+ end
132
+ end
133
+ end
134
+
135
+ # Return a string with +values+ joined by FIELD_SEPARATOR each
136
+ # padded to the corresponding maximum column size.
137
+ #
138
+ # Must have called Chimps::Typewriter#calculate_column_widths!
139
+ # first.
140
+ #
141
+ # @param [Array] values
142
+ # @return [String]
143
+ def pad_and_join values
144
+ returning([]) do |padded_values|
145
+ values.each_with_index do |value, field|
146
+ max_width = column_widths[field]
147
+ value_width = value.to_s.size
148
+ padded_values << value.to_s + (' ' * (max_width - value_width))
149
+ end
150
+ end.join(FIELD_SEPARATOR)
151
+ end
152
+
153
+ # Accumulate lines for the given +resource_name+ from the given
154
+ # +resource_data+.
155
+ #
156
+ # Fields to accumulate in each line are set in
157
+ # Chimps::Typewriter::RESOURCE_FIELDS.
158
+ #
159
+ # The structure of the response for a resource looks like:
160
+ #
161
+ # {
162
+ # 'dataset' => {
163
+ # 'id' => 39293,
164
+ # 'title' => 'My Awesome Dataset',
165
+ # ...
166
+ # }
167
+ # }
168
+ #
169
+ # The key is +resource_name+ and the value is +resource_data+.
170
+ #
171
+ # @param [String] resource_name
172
+ # @param [Hash] resource_data
173
+ def accumulate_resource resource_name, resource_data
174
+ self << self.class::RESOURCE_FIELDS.map { |field_name| resource_data[field_name.downcase.tr(' ', '_')] }
175
+ end
176
+
177
+ # Accumulate lines for each of the +resources+, all of the given
178
+ # +type+.
179
+ #
180
+ # The structure of the response for a listing looks like:
181
+ #
182
+ # {
183
+ # 'datasets' => [
184
+ # {
185
+ # 'dataset' => {
186
+ # 'id' => 39293,
187
+ # 'title' => 'My Awesome Dataset',
188
+ # ...
189
+ # },
190
+ # },
191
+ # {
192
+ # 'dataset' => {
193
+ # 'id' => 28998,
194
+ # 'title' => 'My Other Awesome Dataset',
195
+ # ...
196
+ # },
197
+ # },
198
+ # ...
199
+ # ]
200
+ # }
201
+ #
202
+ # The value is +resources+.
203
+ #
204
+ # @param [Array<Hash>] resources
205
+ def accumulate_listing resources
206
+ return if resources.blank?
207
+ self << self.class::RESOURCE_FIELDS unless skip_column_names?
208
+ resources.each { |resource| accumulate(resource) }
209
+ end
210
+
211
+ # Accumulate lines for each of the error messages in +errors+.
212
+ #
213
+ # The structure of the response looks like
214
+ #
215
+ # {
216
+ # 'errors' => [
217
+ # "A title is required.",
218
+ # "A description is required.",
219
+ # ...
220
+ # ]
221
+ # }
222
+ #
223
+ # The value is +errors+.
224
+ #
225
+ # @param [Array] errors
226
+ def accumulate_errors errors
227
+ errors.each do |error|
228
+ self << error
229
+ end
230
+ end
231
+
232
+ # Accumulate lines for each of the batch responses in +batch+.
233
+ #
234
+ # The structure of the response looks like
235
+ #
236
+ # {
237
+ # 'batch' => [
238
+ # {
239
+ # 'status' => 'created',
240
+ # 'resource' => {
241
+ # 'dataset' => {
242
+ # 'id' => 39293,
243
+ # 'title' => "My Awesome Dataset",
244
+ # ...
245
+ # },
246
+ # },
247
+ # 'errors' => nil,
248
+ # 'local_paths' => [...] # this is totally optional
249
+ # },
250
+ # {
251
+ # 'status' => 'invalid',
252
+ # 'errors' => [
253
+ # "A title is required.",
254
+ # "A description is required."
255
+ # ]
256
+ # },
257
+ # ...
258
+ # ]
259
+ # }
260
+ #
261
+ # The value is +batch+.
262
+ def accumulate_batch batch
263
+ self << ["Status", "Resource", "ID", "Errors"] unless skip_column_names?
264
+ batch.each do |response|
265
+ status = response['status']
266
+ errors = response['errors']
267
+ if response['resource'] && errors.blank?
268
+ resource_type = response['resource'].keys.first
269
+ resource = response['resource'][resource_type]
270
+ id = resource['id']
271
+ self << [status, resource_type, id]
272
+ else
273
+ self << ([status, nil, nil] + errors)
274
+ end
275
+ end
276
+ end
277
+
278
+ # Accumulate lines for the results in +search+.
279
+ #
280
+ # The structure of the response looks like
281
+ #
282
+ # {
283
+ # 'search' => {
284
+ # 'results' => [
285
+ # { 'dataset' => {...} },
286
+ # { 'dataset' => {...} },
287
+ # ...
288
+ # ]
289
+ #
290
+ # }
291
+ # }
292
+ #
293
+ # The value keyed to +search+ is +search+.
294
+ def accumulate_search search
295
+ return if search['results'].blank?
296
+ self << self.class::RESOURCE_FIELDS unless skip_column_names?
297
+ search['results'].each { |resource| accumulate(resource) }
298
+ end
299
+
300
+ # Accumulate lines for the +api_account+.
301
+ #
302
+ # The structure of the response looks like
303
+ #
304
+ # { 'api_account' => {
305
+ # 'api_key' => ...,
306
+ # 'owner' => {
307
+ # 'username' => 'Infochimps',
308
+ # ...
309
+ # },
310
+ # 'updated_at' => ...,
311
+ # ...
312
+ # }
313
+ # }
314
+ #
315
+ # The value is +api_account+
316
+ def accumulate_api_account api_account
317
+ # FIXME this is sort of ugly...
318
+ self << "USERNAME: #{api_account['owner']['username']}"
319
+ self << "API KEY: #{api_account['api_key']}"
320
+ self << "LAST UPDATED: #{api_account['updated_at']}"
321
+ end
322
+
323
+ end
324
+
325
+ end
326
+
@@ -0,0 +1,40 @@
1
+ module Chimps
2
+ # Base exception class for Chimps. All Chimps exceptions are
3
+ # subclasses of Chimps::Error so they can be easily caught.
4
+ Error = Class.new(StandardError)
5
+
6
+ # Raised when the user provides bad input on the command line.
7
+ CLIError = Class.new(Error)
8
+
9
+ # Raised when the user hasn't specified any API credentials or the
10
+ # server rejects the user's API credentials.
11
+ #
12
+ # Roughly corresponds to HTTP status code 401.
13
+ AuthenticationError = Class.new(Error)
14
+
15
+ # Raised when the Infochimps server response is unexpected or
16
+ # missing.
17
+ #
18
+ # Roughly corresponds to HTTP status code 5xx.
19
+ ServerError = Class.new(Error)
20
+
21
+ # Raised when IMW fails to properly package files to upload.
22
+ PackagingError = Class.new(Error)
23
+
24
+ # Raised when there is an error in uploading to S3 or in notifiying
25
+ # Infochimps of the new package.
26
+ UploadError = Class.new(Error)
27
+
28
+ # Raised when a subclass doesn't fails to implement required
29
+ # methods.
30
+ NotImplementedError = Class.new(Error)
31
+
32
+ # Raised when the response from Infochimps isn't well-formed or is
33
+ # unexpected.
34
+ ParseError = Class.new(Error)
35
+
36
+ # Raised when Chimps encounters response data it doesn't know how to
37
+ # pretty print.
38
+ PrintingError = Class.new(Error)
39
+ end
40
+
@@ -0,0 +1,109 @@
1
+ def returning obj
2
+ yield obj
3
+ obj
4
+ end
5
+
6
+ class String
7
+ # Ruby 1.9 introduces an inherit argument for Module#const_get and
8
+ # #const_defined? and changes their default behavior.
9
+ if Module.method(:const_get).arity == 1
10
+ # Tries to find a constant with the name specified in the argument string:
11
+ #
12
+ # "Module".constantize # => Module
13
+ # "Test::Unit".constantize # => Test::Unit
14
+ #
15
+ # The name is assumed to be the one of a top-level constant, no matter whether
16
+ # it starts with "::" or not. No lexical context is taken into account:
17
+ #
18
+ # C = 'outside'
19
+ # module M
20
+ # C = 'inside'
21
+ # C # => 'inside'
22
+ # "C".constantize # => 'outside', same as ::C
23
+ # end
24
+ #
25
+ # NameError is raised when the name is not in CamelCase or the constant is
26
+ # unknown.
27
+ def constantize
28
+ names = split('::')
29
+ names.shift if names.empty? || names.first.empty?
30
+
31
+ constant = Object
32
+ names.each do |name|
33
+ constant = constant.const_defined?(name) ? constant.const_get(name) : constant.const_missing(name)
34
+ end
35
+ constant
36
+ end
37
+ else
38
+ def constantize
39
+ names = split('::')
40
+ names.shift if names.empty? || names.first.empty?
41
+
42
+ constant = Object
43
+ names.each do |name|
44
+ constant = constant.const_get(name, false) || constant.const_missing(name)
45
+ end
46
+ constant
47
+ end
48
+ end
49
+
50
+ end
51
+
52
+ class Object
53
+ # An object is blank if it's false, empty, or a whitespace string.
54
+ # For example, "", " ", +nil+, [], and {} are blank.
55
+ #
56
+ # This simplifies
57
+ #
58
+ # if !address.nil? && !address.empty?
59
+ #
60
+ # to
61
+ #
62
+ # if !address.blank?
63
+ def blank?
64
+ respond_to?(:empty?) ? empty? : !self
65
+ end
66
+
67
+ # An object is present if it's not blank.
68
+ def present?
69
+ !blank?
70
+ end
71
+ end
72
+
73
+ class NilClass #:nodoc:
74
+ def blank?
75
+ true
76
+ end
77
+ end
78
+
79
+ class FalseClass #:nodoc:
80
+ def blank?
81
+ true
82
+ end
83
+ end
84
+
85
+ class TrueClass #:nodoc:
86
+ def blank?
87
+ false
88
+ end
89
+ end
90
+
91
+ class Array #:nodoc:
92
+ alias_method :blank?, :empty?
93
+ end
94
+
95
+ class Hash #:nodoc:
96
+ alias_method :blank?, :empty?
97
+ end
98
+
99
+ class String #:nodoc:
100
+ def blank?
101
+ self !~ /\S/
102
+ end
103
+ end
104
+
105
+ class Numeric #:nodoc:
106
+ def blank?
107
+ false
108
+ end
109
+ end
@@ -0,0 +1,26 @@
1
+ module Chimps
2
+ module Utils
3
+
4
+ # A module which defines methods to interface with +curl+ via a
5
+ # system call.
6
+ module UsesCurl
7
+
8
+ def curl
9
+ `which curl`.chomp
10
+ end
11
+
12
+ # FIXME right now curl is the default but it really shouldn't be...
13
+ # def define_curl_options
14
+ # on_tail("-c", "--curl", "Use curl instead of Ruby to upload package (faster)") do |c|
15
+ # @curl = c
16
+ # end
17
+ # end
18
+
19
+ # Should this use curl?
20
+ # def curl?
21
+ # @curl
22
+ # end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,51 @@
1
+ module Chimps
2
+ module Utils
3
+ module UsesModel
4
+
5
+ def model
6
+ @model ||= self.class::MODELS.first
7
+ end
8
+
9
+ def plural_model
10
+ if model[-1].chr == 'y'
11
+ model[1..-1] + 'ies'
12
+ else
13
+ model + 's'
14
+ end
15
+ end
16
+
17
+ def model_identifier
18
+ raise CLIError.new("Must provide an ID or URL-escaped handle as the first argument") if argv.first.blank?
19
+ argv.first
20
+ end
21
+
22
+ def models_path
23
+ "#{plural_model}.json"
24
+ end
25
+
26
+ def model_path
27
+ "#{plural_model}/#{model_identifier}.json"
28
+ end
29
+
30
+ def model= model
31
+ raise CLIError.new("Invalid model: #{model}. Must be one of #{models_string}") unless self.class::MODELS.include?(model)
32
+ @model = model
33
+ end
34
+
35
+ def models_string
36
+ returning(self.class::MODELS.dup) do |parts|
37
+ parts[0] = "#{parts.first} (default)"
38
+ parts[-1] = "or #{parts.last}"
39
+ end.join(', ')
40
+ end
41
+
42
+ def define_model_option
43
+ on_tail("-m", "--model MODEL", "Use a different resource, one of: #{models_string}") do |m|
44
+ self.model= m
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+
51
+