chimps 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/.gitignore +17 -0
  2. data/LICENSE +674 -0
  3. data/README.rdoc +48 -0
  4. data/VERSION +1 -0
  5. data/bin/chimps +4 -0
  6. data/examples/batch.yaml +69 -0
  7. data/lib/chimps/cli.rb +102 -0
  8. data/lib/chimps/commands/base.rb +107 -0
  9. data/lib/chimps/commands/batch.rb +68 -0
  10. data/lib/chimps/commands/create.rb +33 -0
  11. data/lib/chimps/commands/destroy.rb +28 -0
  12. data/lib/chimps/commands/download.rb +76 -0
  13. data/lib/chimps/commands/help.rb +89 -0
  14. data/lib/chimps/commands/list.rb +54 -0
  15. data/lib/chimps/commands/query.rb +59 -0
  16. data/lib/chimps/commands/search.rb +59 -0
  17. data/lib/chimps/commands/show.rb +32 -0
  18. data/lib/chimps/commands/test.rb +40 -0
  19. data/lib/chimps/commands/update.rb +33 -0
  20. data/lib/chimps/commands/upload.rb +63 -0
  21. data/lib/chimps/commands.rb +46 -0
  22. data/lib/chimps/config.rb +57 -0
  23. data/lib/chimps/request.rb +302 -0
  24. data/lib/chimps/response.rb +146 -0
  25. data/lib/chimps/typewriter.rb +326 -0
  26. data/lib/chimps/utils/error.rb +40 -0
  27. data/lib/chimps/utils/extensions.rb +109 -0
  28. data/lib/chimps/utils/uses_curl.rb +26 -0
  29. data/lib/chimps/utils/uses_model.rb +51 -0
  30. data/lib/chimps/utils/uses_yaml_data.rb +94 -0
  31. data/lib/chimps/utils.rb +11 -0
  32. data/lib/chimps/workflows/batch.rb +127 -0
  33. data/lib/chimps/workflows/downloader.rb +102 -0
  34. data/lib/chimps/workflows/uploader.rb +238 -0
  35. data/lib/chimps/workflows.rb +11 -0
  36. data/lib/chimps.rb +22 -0
  37. data/spec/chimps/cli_spec.rb +22 -0
  38. data/spec/chimps/commands/base_spec.rb +25 -0
  39. data/spec/chimps/commands/list_spec.rb +25 -0
  40. data/spec/chimps/response_spec.rb +8 -0
  41. data/spec/chimps/typewriter_spec.rb +114 -0
  42. data/spec/spec_helper.rb +17 -0
  43. data/spec/support/custom_matchers.rb +6 -0
  44. metadata +133 -0
@@ -0,0 +1,146 @@
1
+ module Chimps
2
+
3
+ # A class to wrap responses from the Infochimps API.
4
+ class Response < Hash
5
+
6
+ # The response body.
7
+ attr_reader :body
8
+
9
+ # The error message for this response, if it was an error.
10
+ #
11
+ # This is actually generated within RestClient from the HTTP
12
+ # status code and attached to the response. It is passed in when
13
+ # initializing a Chimps::Response by a Chimps::Request.
14
+ attr_reader :error
15
+
16
+ # Return a response built from a String with the
17
+ # RestClient::Response module mixed-in.
18
+ #
19
+ # If <tt>:error</tt> is passed then this response is is considered
20
+ # an error with the given message.
21
+ #
22
+ # @param [String, #to_i, #headers] body
23
+ # @param [Hash] options
24
+ # @option options [String] error the error message
25
+ # @return [Chimps::Response]
26
+ def initialize body, options={}
27
+ super()
28
+ @body = body
29
+ @error = options[:error]
30
+ parse!
31
+ end
32
+
33
+ # The HTTP status code of the response.
34
+ #
35
+ # @return [Integer]
36
+ def code
37
+ @code ||= body.to_i
38
+ end
39
+
40
+ # The HTTP headers of the response.
41
+ #
42
+ # @return [Hash]
43
+ def headers
44
+ @headers ||= body.headers
45
+ end
46
+
47
+ # The <tt>Content-type</tt> of the response.
48
+ #
49
+ # Will return <tt>:yaml</tt> or <tt>:json</tt> if possible, else
50
+ # just the raw <tt>Content-type</tt>.
51
+ #
52
+ # @return [Symbol, String]
53
+ def content_type
54
+ @content_type ||= case headers[:content_type]
55
+ when /json/ then :json
56
+ when /yaml/ then :yaml
57
+ else headers[:content_type]
58
+ end
59
+ end
60
+
61
+ # Parse the response from Infochimps.
62
+ def parse!
63
+ data = parse_response_body
64
+ case data
65
+ # hack...sometimes we get back an array instead of a
66
+ # hash...should change the API at Chimps end
67
+ when Hash then merge!(data)
68
+ when Array then self[:array] = data # see Chimps::Typewriter#accumulate
69
+ when String then self[:string] = data
70
+ end
71
+ end
72
+
73
+ # Was this response a success?
74
+ #
75
+ # @return [true, false]
76
+ def success?
77
+ ! error?
78
+ end
79
+
80
+ # Was this response an error??
81
+ #
82
+ # @return [true, false]
83
+ def error?
84
+ !! @error
85
+ end
86
+
87
+ # Print this response.
88
+ #
89
+ # Will also print a diagnostic line if Chimps is verbose or this
90
+ # response was an error.
91
+ #
92
+ # @param [Hash] options
93
+ # @option options [true, nil] skip_column_names (nil) Don't print column names in output.
94
+ def print options={}
95
+ puts diagnostic_line if Chimps.verbose? || error?
96
+ Typewriter.new(self, options).print
97
+ end
98
+
99
+ protected
100
+
101
+ # Construct and return a line of diagnostic information on this
102
+ # response.
103
+ #
104
+ # @return [String]
105
+ def diagnostic_line
106
+ line = "#{code.to_s} -- "
107
+ line += (success? ? "SUCCESS" : error)
108
+ line
109
+ end
110
+
111
+ # Raise a Chimps::ParseError, optionally including the response
112
+ # body in the error message if Chimps is verbose.
113
+ def parse_error!
114
+ message = Chimps.verbose? ? "#{diagnostic_line}\n\n#{body}" : diagnostic_line
115
+ raise ParseError.new(message)
116
+ end
117
+
118
+ # Parse the body of this response using the YAML or JSON libraries
119
+ # into a Ruby data structure.
120
+ #
121
+ # @return [Hash, Array, String]
122
+ def parse_response_body
123
+ return {} if body.blank? || body == 'null'
124
+ if content_type == :yaml
125
+ require 'yaml'
126
+ begin
127
+ YAML.parse(body)
128
+ rescue YAML::ParseError => e
129
+ parse_error!
130
+ rescue ArgumentError => e # WHY does YAML return an ArgumentError on malformed input...?
131
+ @error = "Response was received but was malformed"
132
+ parse_error!
133
+ end
134
+ else
135
+ require 'json'
136
+ begin
137
+ JSON.parse(body)
138
+ rescue JSON::ParserError => e
139
+ parse_error!
140
+ end
141
+ end
142
+ end
143
+
144
+ end
145
+ end
146
+
@@ -0,0 +1,326 @@
1
+ module Chimps
2
+
3
+ # Responses from Infochimps (once parsed from the original JSON or
4
+ # YAML) consist of nested hashes:
5
+ #
6
+ # { 'dataset' => {
7
+ # 'title' => 'My dataset',
8
+ # 'description' => 'An amazing dataset which...',
9
+ # ...
10
+ # 'sources' => {
11
+ # 'source' => {
12
+ # 'title' => 'Trustworthy Source'
13
+ # ...
14
+ # },
15
+ # 'source' => {..},
16
+ # ...
17
+ # }
18
+ # },
19
+ # ...
20
+ # }
21
+ #
22
+ # This class utilizes a typewriter and a team of trained chimpanizes
23
+ # to create pretty, line-oriented output from these hashes.
24
+ class Typewriter < Array
25
+
26
+ # The response that this Typewriter will print.
27
+ attr_accessor :response
28
+
29
+ # Widths of columns as determined by the maximum number of
30
+ # characters in any row.
31
+ attr_accessor :column_widths
32
+
33
+ # Fields to print for each resource. Given as humanized names,
34
+ # will be automatically converted to key names.
35
+ RESOURCE_FIELDS = ["ID", "Cached Slug", "Updated At", "Title"]
36
+
37
+ # String to insert between fields in output.
38
+ FIELD_SEPARATOR = " "
39
+
40
+ # Return a Typewriter to print +data+.
41
+ #
42
+ # @param [Chimps::Response] response
43
+ # @return [Chimps::Typewriter]
44
+ def initialize response, options={}
45
+ super()
46
+ @response = response
47
+ @column_widths = []
48
+ @skip_column_names = options[:skip_column_names]
49
+ accumulate(response)
50
+ end
51
+
52
+ # Print column names as well as values?
53
+ #
54
+ # @return [true, nil]
55
+ def skip_column_names?
56
+ @skip_column_names
57
+ end
58
+
59
+ # Print the accumulated lines in this Typewriter.
60
+ #
61
+ # Will first calculate appropriate column widths for any
62
+ # Array-like lines.
63
+ def print
64
+ calculate_column_widths!
65
+ each do |line|
66
+ if line.is_a?(Array)
67
+ puts pad_and_join(line)
68
+ else
69
+ puts line
70
+ end
71
+ end
72
+ end
73
+
74
+ # Accumulate lines to print from +obj+.
75
+ #
76
+ # If +obj+ is a string then it will be accumulated as a single
77
+ # line to print.
78
+ #
79
+ # If +obj+ is an Array then each element will be passed to
80
+ # Chimps::Typewriter#accumulate.
81
+ #
82
+ # If +obj+ is a Hash then each key will be mapped to a method
83
+ # <tt>accumulate_KEY</tt> and the corresponding value passed in.
84
+ # This method is responsible for accumulating lines to print.
85
+ #
86
+ # @param [Array, Hash, String] obj
87
+ def accumulate obj
88
+ case obj
89
+ when Hash
90
+ obj.each_pair do |resource_name, resource_data|
91
+ case
92
+ when %w[datasets sources licenses].include?(resource_name.to_s)
93
+ accumulate_listing(resource_data)
94
+ when %w[dataset source license].include?(resource_name.to_s)
95
+ accumulate_resource(resource_name, resource_data)
96
+ when %w[errors batch search api_account].include?(resource_name.to_s)
97
+ send("accumulate_#{resource_name}", resource_data)
98
+ when :array == resource_name # constructed by Chimps::Response
99
+ accumulate_listing(resource_data)
100
+ when :string == resource_name # constructed by Chimps::Response
101
+ self << obj[:string]
102
+ else
103
+ $stderr.puts resource_data.inspect if Chimps.verbose?
104
+ raise PrintingError.new("Unrecognized resource type `#{resource_name}'.")
105
+ end
106
+ end
107
+ when Array
108
+ obj.each { |element| accumulate(element) }
109
+ when String
110
+ self << obj
111
+ else
112
+ raise PrintingError.new("Cannot print a #{obj.class}")
113
+ end
114
+ end
115
+
116
+ protected
117
+
118
+ # Loop through the accumulated lines, finding the maximum widths
119
+ # of each element in each Array-like line.
120
+ def calculate_column_widths!
121
+ each do |line|
122
+ next unless line.is_a?(Array) # don't try to align strings
123
+ line.each_with_index do |value, field|
124
+ current_max_width = column_widths[field]
125
+ unless current_max_width
126
+ current_max_width = 0
127
+ column_widths << current_max_width
128
+ end
129
+ value_size = value.to_s.size
130
+ column_widths[field] = value_size if value_size > current_max_width
131
+ end
132
+ end
133
+ end
134
+
135
+ # Return a string with +values+ joined by FIELD_SEPARATOR each
136
+ # padded to the corresponding maximum column size.
137
+ #
138
+ # Must have called Chimps::Typewriter#calculate_column_widths!
139
+ # first.
140
+ #
141
+ # @param [Array] values
142
+ # @return [String]
143
+ def pad_and_join values
144
+ returning([]) do |padded_values|
145
+ values.each_with_index do |value, field|
146
+ max_width = column_widths[field]
147
+ value_width = value.to_s.size
148
+ padded_values << value.to_s + (' ' * (max_width - value_width))
149
+ end
150
+ end.join(FIELD_SEPARATOR)
151
+ end
152
+
153
+ # Accumulate lines for the given +resource_name+ from the given
154
+ # +resource_data+.
155
+ #
156
+ # Fields to accumulate in each line are set in
157
+ # Chimps::Typewriter::RESOURCE_FIELDS.
158
+ #
159
+ # The structure of the response for a resource looks like:
160
+ #
161
+ # {
162
+ # 'dataset' => {
163
+ # 'id' => 39293,
164
+ # 'title' => 'My Awesome Dataset',
165
+ # ...
166
+ # }
167
+ # }
168
+ #
169
+ # The key is +resource_name+ and the value is +resource_data+.
170
+ #
171
+ # @param [String] resource_name
172
+ # @param [Hash] resource_data
173
+ def accumulate_resource resource_name, resource_data
174
+ self << self.class::RESOURCE_FIELDS.map { |field_name| resource_data[field_name.downcase.tr(' ', '_')] }
175
+ end
176
+
177
+ # Accumulate lines for each of the +resources+, all of the given
178
+ # +type+.
179
+ #
180
+ # The structure of the response for a listing looks like:
181
+ #
182
+ # {
183
+ # 'datasets' => [
184
+ # {
185
+ # 'dataset' => {
186
+ # 'id' => 39293,
187
+ # 'title' => 'My Awesome Dataset',
188
+ # ...
189
+ # },
190
+ # },
191
+ # {
192
+ # 'dataset' => {
193
+ # 'id' => 28998,
194
+ # 'title' => 'My Other Awesome Dataset',
195
+ # ...
196
+ # },
197
+ # },
198
+ # ...
199
+ # ]
200
+ # }
201
+ #
202
+ # The value is +resources+.
203
+ #
204
+ # @param [Array<Hash>] resources
205
+ def accumulate_listing resources
206
+ return if resources.blank?
207
+ self << self.class::RESOURCE_FIELDS unless skip_column_names?
208
+ resources.each { |resource| accumulate(resource) }
209
+ end
210
+
211
+ # Accumulate lines for each of the error messages in +errors+.
212
+ #
213
+ # The structure of the response looks like
214
+ #
215
+ # {
216
+ # 'errors' => [
217
+ # "A title is required.",
218
+ # "A description is required.",
219
+ # ...
220
+ # ]
221
+ # }
222
+ #
223
+ # The value is +errors+.
224
+ #
225
+ # @param [Array] errors
226
+ def accumulate_errors errors
227
+ errors.each do |error|
228
+ self << error
229
+ end
230
+ end
231
+
232
+ # Accumulate lines for each of the batch responses in +batch+.
233
+ #
234
+ # The structure of the response looks like
235
+ #
236
+ # {
237
+ # 'batch' => [
238
+ # {
239
+ # 'status' => 'created',
240
+ # 'resource' => {
241
+ # 'dataset' => {
242
+ # 'id' => 39293,
243
+ # 'title' => "My Awesome Dataset",
244
+ # ...
245
+ # },
246
+ # },
247
+ # 'errors' => nil,
248
+ # 'local_paths' => [...] # this is totally optional
249
+ # },
250
+ # {
251
+ # 'status' => 'invalid',
252
+ # 'errors' => [
253
+ # "A title is required.",
254
+ # "A description is required."
255
+ # ]
256
+ # },
257
+ # ...
258
+ # ]
259
+ # }
260
+ #
261
+ # The value is +batch+.
262
+ def accumulate_batch batch
263
+ self << ["Status", "Resource", "ID", "Errors"] unless skip_column_names?
264
+ batch.each do |response|
265
+ status = response['status']
266
+ errors = response['errors']
267
+ if response['resource'] && errors.blank?
268
+ resource_type = response['resource'].keys.first
269
+ resource = response['resource'][resource_type]
270
+ id = resource['id']
271
+ self << [status, resource_type, id]
272
+ else
273
+ self << ([status, nil, nil] + errors)
274
+ end
275
+ end
276
+ end
277
+
278
+ # Accumulate lines for the results in +search+.
279
+ #
280
+ # The structure of the response looks like
281
+ #
282
+ # {
283
+ # 'search' => {
284
+ # 'results' => [
285
+ # { 'dataset' => {...} },
286
+ # { 'dataset' => {...} },
287
+ # ...
288
+ # ]
289
+ #
290
+ # }
291
+ # }
292
+ #
293
+ # The value keyed to +search+ is +search+.
294
+ def accumulate_search search
295
+ return if search['results'].blank?
296
+ self << self.class::RESOURCE_FIELDS unless skip_column_names?
297
+ search['results'].each { |resource| accumulate(resource) }
298
+ end
299
+
300
+ # Accumulate lines for the +api_account+.
301
+ #
302
+ # The structure of the response looks like
303
+ #
304
+ # { 'api_account' => {
305
+ # 'api_key' => ...,
306
+ # 'owner' => {
307
+ # 'username' => 'Infochimps',
308
+ # ...
309
+ # },
310
+ # 'updated_at' => ...,
311
+ # ...
312
+ # }
313
+ # }
314
+ #
315
+ # The value is +api_account+
316
+ def accumulate_api_account api_account
317
+ # FIXME this is sort of ugly...
318
+ self << "USERNAME: #{api_account['owner']['username']}"
319
+ self << "API KEY: #{api_account['api_key']}"
320
+ self << "LAST UPDATED: #{api_account['updated_at']}"
321
+ end
322
+
323
+ end
324
+
325
+ end
326
+
@@ -0,0 +1,40 @@
1
+ module Chimps
2
+ # Base exception class for Chimps. All Chimps exceptions are
3
+ # subclasses of Chimps::Error so they can be easily caught.
4
+ Error = Class.new(StandardError)
5
+
6
+ # Raised when the user provides bad input on the command line.
7
+ CLIError = Class.new(Error)
8
+
9
+ # Raised when the user hasn't specified any API credentials or the
10
+ # server rejects the user's API credentials.
11
+ #
12
+ # Roughly corresponds to HTTP status code 401.
13
+ AuthenticationError = Class.new(Error)
14
+
15
+ # Raised when the Infochimps server response is unexpected or
16
+ # missing.
17
+ #
18
+ # Roughly corresponds to HTTP status code 5xx.
19
+ ServerError = Class.new(Error)
20
+
21
+ # Raised when IMW fails to properly package files to upload.
22
+ PackagingError = Class.new(Error)
23
+
24
+ # Raised when there is an error in uploading to S3 or in notifiying
25
+ # Infochimps of the new package.
26
+ UploadError = Class.new(Error)
27
+
28
+ # Raised when a subclass doesn't fails to implement required
29
+ # methods.
30
+ NotImplementedError = Class.new(Error)
31
+
32
+ # Raised when the response from Infochimps isn't well-formed or is
33
+ # unexpected.
34
+ ParseError = Class.new(Error)
35
+
36
+ # Raised when Chimps encounters response data it doesn't know how to
37
+ # pretty print.
38
+ PrintingError = Class.new(Error)
39
+ end
40
+
@@ -0,0 +1,109 @@
1
+ def returning obj
2
+ yield obj
3
+ obj
4
+ end
5
+
6
+ class String
7
+ # Ruby 1.9 introduces an inherit argument for Module#const_get and
8
+ # #const_defined? and changes their default behavior.
9
+ if Module.method(:const_get).arity == 1
10
+ # Tries to find a constant with the name specified in the argument string:
11
+ #
12
+ # "Module".constantize # => Module
13
+ # "Test::Unit".constantize # => Test::Unit
14
+ #
15
+ # The name is assumed to be the one of a top-level constant, no matter whether
16
+ # it starts with "::" or not. No lexical context is taken into account:
17
+ #
18
+ # C = 'outside'
19
+ # module M
20
+ # C = 'inside'
21
+ # C # => 'inside'
22
+ # "C".constantize # => 'outside', same as ::C
23
+ # end
24
+ #
25
+ # NameError is raised when the name is not in CamelCase or the constant is
26
+ # unknown.
27
+ def constantize
28
+ names = split('::')
29
+ names.shift if names.empty? || names.first.empty?
30
+
31
+ constant = Object
32
+ names.each do |name|
33
+ constant = constant.const_defined?(name) ? constant.const_get(name) : constant.const_missing(name)
34
+ end
35
+ constant
36
+ end
37
+ else
38
+ def constantize
39
+ names = split('::')
40
+ names.shift if names.empty? || names.first.empty?
41
+
42
+ constant = Object
43
+ names.each do |name|
44
+ constant = constant.const_get(name, false) || constant.const_missing(name)
45
+ end
46
+ constant
47
+ end
48
+ end
49
+
50
+ end
51
+
52
+ class Object
53
+ # An object is blank if it's false, empty, or a whitespace string.
54
+ # For example, "", " ", +nil+, [], and {} are blank.
55
+ #
56
+ # This simplifies
57
+ #
58
+ # if !address.nil? && !address.empty?
59
+ #
60
+ # to
61
+ #
62
+ # if !address.blank?
63
+ def blank?
64
+ respond_to?(:empty?) ? empty? : !self
65
+ end
66
+
67
+ # An object is present if it's not blank.
68
+ def present?
69
+ !blank?
70
+ end
71
+ end
72
+
73
+ class NilClass #:nodoc:
74
+ def blank?
75
+ true
76
+ end
77
+ end
78
+
79
+ class FalseClass #:nodoc:
80
+ def blank?
81
+ true
82
+ end
83
+ end
84
+
85
+ class TrueClass #:nodoc:
86
+ def blank?
87
+ false
88
+ end
89
+ end
90
+
91
+ class Array #:nodoc:
92
+ alias_method :blank?, :empty?
93
+ end
94
+
95
+ class Hash #:nodoc:
96
+ alias_method :blank?, :empty?
97
+ end
98
+
99
+ class String #:nodoc:
100
+ def blank?
101
+ self !~ /\S/
102
+ end
103
+ end
104
+
105
+ class Numeric #:nodoc:
106
+ def blank?
107
+ false
108
+ end
109
+ end
@@ -0,0 +1,26 @@
1
+ module Chimps
2
+ module Utils
3
+
4
+ # A module which defines methods to interface with +curl+ via a
5
+ # system call.
6
+ module UsesCurl
7
+
8
+ def curl
9
+ `which curl`.chomp
10
+ end
11
+
12
+ # FIXME right now curl is the default but it really shouldn't be...
13
+ # def define_curl_options
14
+ # on_tail("-c", "--curl", "Use curl instead of Ruby to upload package (faster)") do |c|
15
+ # @curl = c
16
+ # end
17
+ # end
18
+
19
+ # Should this use curl?
20
+ # def curl?
21
+ # @curl
22
+ # end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,51 @@
1
+ module Chimps
2
+ module Utils
3
+ module UsesModel
4
+
5
+ def model
6
+ @model ||= self.class::MODELS.first
7
+ end
8
+
9
+ def plural_model
10
+ if model[-1].chr == 'y'
11
+ model[1..-1] + 'ies'
12
+ else
13
+ model + 's'
14
+ end
15
+ end
16
+
17
+ def model_identifier
18
+ raise CLIError.new("Must provide an ID or URL-escaped handle as the first argument") if argv.first.blank?
19
+ argv.first
20
+ end
21
+
22
+ def models_path
23
+ "#{plural_model}.json"
24
+ end
25
+
26
+ def model_path
27
+ "#{plural_model}/#{model_identifier}.json"
28
+ end
29
+
30
+ def model= model
31
+ raise CLIError.new("Invalid model: #{model}. Must be one of #{models_string}") unless self.class::MODELS.include?(model)
32
+ @model = model
33
+ end
34
+
35
+ def models_string
36
+ returning(self.class::MODELS.dup) do |parts|
37
+ parts[0] = "#{parts.first} (default)"
38
+ parts[-1] = "or #{parts.last}"
39
+ end.join(', ')
40
+ end
41
+
42
+ def define_model_option
43
+ on_tail("-m", "--model MODEL", "Use a different resource, one of: #{models_string}") do |m|
44
+ self.model= m
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+
51
+