RubyGems - chimps - Versions diffs - 0.1.0 - Mend

chimps 0.1.0

Files changed (44) hide show

data/.gitignore +17 -0
data/LICENSE +674 -0
data/README.rdoc +48 -0
data/VERSION +1 -0
data/bin/chimps +4 -0
data/examples/batch.yaml +69 -0
data/lib/chimps/cli.rb +102 -0
data/lib/chimps/commands/base.rb +107 -0
data/lib/chimps/commands/batch.rb +68 -0
data/lib/chimps/commands/create.rb +33 -0
data/lib/chimps/commands/destroy.rb +28 -0
data/lib/chimps/commands/download.rb +76 -0
data/lib/chimps/commands/help.rb +89 -0
data/lib/chimps/commands/list.rb +54 -0
data/lib/chimps/commands/query.rb +59 -0
data/lib/chimps/commands/search.rb +59 -0
data/lib/chimps/commands/show.rb +32 -0
data/lib/chimps/commands/test.rb +40 -0
data/lib/chimps/commands/update.rb +33 -0
data/lib/chimps/commands/upload.rb +63 -0
data/lib/chimps/commands.rb +46 -0
data/lib/chimps/config.rb +57 -0
data/lib/chimps/request.rb +302 -0
data/lib/chimps/response.rb +146 -0
data/lib/chimps/typewriter.rb +326 -0
data/lib/chimps/utils/error.rb +40 -0
data/lib/chimps/utils/extensions.rb +109 -0
data/lib/chimps/utils/uses_curl.rb +26 -0
data/lib/chimps/utils/uses_model.rb +51 -0
data/lib/chimps/utils/uses_yaml_data.rb +94 -0
data/lib/chimps/utils.rb +11 -0
data/lib/chimps/workflows/batch.rb +127 -0
data/lib/chimps/workflows/downloader.rb +102 -0
data/lib/chimps/workflows/uploader.rb +238 -0
data/lib/chimps/workflows.rb +11 -0
data/lib/chimps.rb +22 -0
data/spec/chimps/cli_spec.rb +22 -0
data/spec/chimps/commands/base_spec.rb +25 -0
data/spec/chimps/commands/list_spec.rb +25 -0
data/spec/chimps/response_spec.rb +8 -0
data/spec/chimps/typewriter_spec.rb +114 -0
data/spec/spec_helper.rb +17 -0
data/spec/support/custom_matchers.rb +6 -0
metadata +133 -0

data/lib/chimps/response.rb ADDED Viewed

@@ -0,0 +1,146 @@
+module Chimps
+  # A class to wrap responses from the Infochimps API.
+  class Response < Hash
+    # The response body.
+    attr_reader :body
+    # The error message for this response, if it was an error.
+    #
+    # This is actually generated within RestClient from the HTTP
+    # status code and attached to the response.  It is passed in when
+    # initializing a Chimps::Response by a Chimps::Request.
+    attr_reader :error
+    # Return a response built from a String with the
+    # RestClient::Response module mixed-in.
+    #
+    # If <tt>:error</tt> is passed then this response is is considered
+    # an error with the given message.
+    #
+    # @param [String, #to_i, #headers] body
+    # @param [Hash] options
+    # @option options [String] error the error message
+    # @return [Chimps::Response]
+    def initialize body, options={}
+      super()
+      @body  = body
+      @error = options[:error]
+      parse!
+    end
+    # The HTTP status code of the response.
+    #
+    # @return [Integer]
+    def code
+      @code ||= body.to_i
+    end
+    # The HTTP headers of the response.
+    #
+    # @return [Hash]
+    def headers
+      @headers ||= body.headers
+    end
+    # The <tt>Content-type</tt> of the response.
+    #
+    # Will return <tt>:yaml</tt> or <tt>:json</tt> if possible, else
+    # just the raw <tt>Content-type</tt>.
+    #
+    # @return [Symbol, String]
+    def content_type
+      @content_type ||= case headers[:content_type]
+                        when /json/ then :json
+                        when /yaml/ then :yaml
+                        else headers[:content_type]
+                        end
+    end
+    # Parse the response from Infochimps.
+    def parse!
+      data = parse_response_body
+      case data
+        # hack...sometimes we get back an array instead of a
+        # hash...should change the API at Chimps end
+      when Hash   then merge!(data)
+      when Array  then self[:array]  = data # see Chimps::Typewriter#accumulate
+      when String then self[:string] = data
+      end
+    end
+    # Was this response a success?
+    #
+    # @return [true, false]
+    def success?
+      ! error?
+    end
+    # Was this response an error??
+    #
+    # @return [true, false]
+    def error?
+      !! @error
+    end
+    # Print this response.
+    #
+    # Will also print a diagnostic line if Chimps is verbose or this
+    # response was an error.
+    #
+    # @param [Hash] options
+    # @option options [true, nil] skip_column_names (nil) Don't print column names in output.
+    def print options={}
+      puts diagnostic_line if Chimps.verbose? || error?
+      Typewriter.new(self, options).print
+    end
+    protected
+    # Construct and return a line of diagnostic information on this
+    # response.
+    #
+    # @return [String]
+    def diagnostic_line
+      line = "#{code.to_s} -- "
+      line += (success? ? "SUCCESS" : error)
+      line
+    end
+    # Raise a Chimps::ParseError, optionally including the response
+    # body in the error message if Chimps is verbose.
+    def parse_error!
+      message = Chimps.verbose? ? "#{diagnostic_line}\n\n#{body}" : diagnostic_line
+      raise ParseError.new(message)
+    end
+    # Parse the body of this response using the YAML or JSON libraries
+    # into a Ruby data structure.
+    #
+    # @return [Hash, Array, String]
+    def parse_response_body
+      return {} if body.blank? || body == 'null'
+      if content_type == :yaml
+        require 'yaml'
+        begin
+          YAML.parse(body)
+        rescue YAML::ParseError => e
+          parse_error!
+        rescue ArgumentError => e # WHY does YAML return an ArgumentError on malformed input...?
+          @error = "Response was received but was malformed"
+          parse_error!
+        end
+      else
+        require 'json'
+        begin
+          JSON.parse(body)
+        rescue JSON::ParserError => e
+          parse_error!
+        end
+      end
+    end
+  end
+end

data/lib/chimps/typewriter.rb ADDED Viewed

@@ -0,0 +1,326 @@
+module Chimps
+  # Responses from Infochimps (once parsed from the original JSON or
+  # YAML) consist of nested hashes:
+  #
+  #   { 'dataset' => {
+  #                    'title'       => 'My dataset',
+  #                    'description' => 'An amazing dataset which...',
+  #                    ...
+  #                    'sources' => {
+  #                                  'source' => {
+  #                                                'title' => 'Trustworthy Source'
+  #                                                ...
+  #                                              },
+  #                                  'source' => {..},
+  #                                  ...
+  #                                  }
+  #                   },
+  #     ...
+  #   }
+  #
+  # This class utilizes a typewriter and a team of trained chimpanizes
+  # to create pretty, line-oriented output from these hashes.
+  class Typewriter < Array
+    # The response that this Typewriter will print.
+    attr_accessor :response
+    # Widths of columns as determined by the maximum number of
+    # characters in any row.
+    attr_accessor :column_widths
+    # Fields to print for each resource.  Given as humanized names,
+    # will be automatically converted to key names.
+    RESOURCE_FIELDS = ["ID", "Cached Slug", "Updated At", "Title"]
+    # String to insert between fields in output.
+    FIELD_SEPARATOR = "    "
+    # Return a Typewriter to print +data+.
+    #
+    # @param [Chimps::Response] response
+    # @return [Chimps::Typewriter]
+    def initialize response, options={}
+      super()
+      @response          = response
+      @column_widths     = []
+      @skip_column_names = options[:skip_column_names]
+      accumulate(response)
+    end
+    # Print column names as well as values?
+    #
+    # @return [true, nil]
+    def skip_column_names?
+      @skip_column_names
+    end
+    # Print the accumulated lines in this Typewriter.
+    #
+    # Will first calculate appropriate column widths for any
+    # Array-like lines.
+    def print
+      calculate_column_widths!
+      each do |line|
+        if line.is_a?(Array)
+          puts pad_and_join(line)
+        else
+          puts line
+        end
+      end
+    end
+    # Accumulate lines to print from +obj+.
+    #
+    # If +obj+ is a string then it will be accumulated as a single
+    # line to print.
+    #
+    # If +obj+ is an Array then each element will be passed to
+    # Chimps::Typewriter#accumulate.
+    #
+    # If +obj+ is a Hash then each key will be mapped to a method
+    # <tt>accumulate_KEY</tt> and the corresponding value passed in.
+    # This method is responsible for accumulating lines to print.
+    #
+    # @param [Array, Hash, String] obj
+    def accumulate obj
+      case obj
+      when Hash
+        obj.each_pair do |resource_name, resource_data|
+          case
+          when %w[datasets sources licenses].include?(resource_name.to_s)
+            accumulate_listing(resource_data)
+          when %w[dataset source license].include?(resource_name.to_s)
+            accumulate_resource(resource_name, resource_data)
+          when %w[errors batch search api_account].include?(resource_name.to_s)
+            send("accumulate_#{resource_name}", resource_data)
+          when :array  == resource_name         # constructed by Chimps::Response
+            accumulate_listing(resource_data)
+          when :string == resource_name         # constructed by Chimps::Response
+            self << obj[:string]
+          else
+            $stderr.puts resource_data.inspect if Chimps.verbose?
+            raise PrintingError.new("Unrecognized resource type `#{resource_name}'.")
+          end
+        end
+      when Array
+        obj.each { |element| accumulate(element) }
+      when String
+        self << obj
+      else
+        raise PrintingError.new("Cannot print a #{obj.class}")
+      end
+    end
+    protected
+    # Loop through the accumulated lines, finding the maximum widths
+    # of each element in each Array-like line.
+    def calculate_column_widths!
+      each do |line|
+        next unless line.is_a?(Array) # don't try to align strings
+        line.each_with_index do |value, field|
+          current_max_width = column_widths[field]
+          unless current_max_width
+            current_max_width = 0
+            column_widths << current_max_width
+          end
+          value_size = value.to_s.size
+          column_widths[field] = value_size if value_size > current_max_width
+        end
+      end
+    end
+    # Return a string with +values+ joined by FIELD_SEPARATOR each
+    # padded to the corresponding maximum column size.
+    #
+    # Must have called Chimps::Typewriter#calculate_column_widths!
+    # first.
+    #
+    # @param [Array] values
+    # @return [String]
+    def pad_and_join values
+      returning([]) do |padded_values|
+        values.each_with_index do |value, field|
+          max_width    = column_widths[field]
+          value_width  = value.to_s.size
+          padded_values << value.to_s + (' ' * (max_width - value_width))
+        end
+      end.join(FIELD_SEPARATOR)
+    end
+    # Accumulate lines for the given +resource_name+ from the given
+    # +resource_data+.
+    #
+    # Fields to accumulate in each line are set in
+    # Chimps::Typewriter::RESOURCE_FIELDS.
+    #
+    # The structure of the response for a resource looks like:
+    #
+    #   {
+    #     'dataset' => {
+    #                    'id'    => 39293,
+    #                    'title' => 'My Awesome Dataset',
+    #                    ...
+    #                  }
+    #   }
+    #
+    # The key is +resource_name+ and the value is +resource_data+.
+    #
+    # @param [String] resource_name
+    # @param [Hash] resource_data
+    def accumulate_resource resource_name, resource_data
+      self << self.class::RESOURCE_FIELDS.map { |field_name| resource_data[field_name.downcase.tr(' ', '_')] }
+    end
+    # Accumulate lines for each of the +resources+, all of the given
+    # +type+.
+    #
+    # The structure of the response for a listing looks like:
+    #
+    #   {
+    #     'datasets' => [
+    #                     {
+    #                       'dataset' => {
+    #                                    'id'    => 39293,
+    #                                    'title' => 'My Awesome Dataset',
+    #                                    ...
+    #                                    },
+    #                     },
+    #                     {
+    #                       'dataset' => {
+    #                                    'id'    => 28998,
+    #                                    'title' => 'My Other Awesome Dataset',
+    #                                    ...
+    #                                    },
+    #                     },
+    #                     ...
+    #                   ]
+    #   }
+    #
+    # The value is +resources+.
+    #
+    # @param [Array<Hash>] resources
+    def accumulate_listing resources
+      return if resources.blank?
+      self << self.class::RESOURCE_FIELDS unless skip_column_names?
+      resources.each { |resource| accumulate(resource) }
+    end
+    # Accumulate lines for each of the error messages in +errors+.
+    #
+    # The structure of the response looks like
+    #
+    #   {
+    #     'errors' => [
+    #                   "A title is required.",
+    #                   "A description is required.",
+    #                   ...
+    #                 ]
+    #   }
+    #
+    # The value is +errors+.
+    #
+    # @param [Array] errors
+    def accumulate_errors errors
+      errors.each do |error|
+        self << error
+      end
+    end
+    # Accumulate lines for each of the batch responses in +batch+.
+    #
+    # The structure of the response looks like
+    #
+    #   {
+    #     'batch' => [
+    #                  {
+    #                    'status'   => 'created',
+    #                    'resource' => {
+    #                                   'dataset' => {
+    #                                                  'id'    => 39293,
+    #                                                  'title' => "My Awesome Dataset",
+    #                                                  ...
+    #                                                },
+    #                                 },
+    #                    'errors' => nil,
+    #                    'local_paths' => [...] # this is totally optional
+    #                  },
+    #                  {
+    #                    'status'  => 'invalid',
+    #                    'errors' => [
+    #                                  "A title is required.",
+    #                                  "A description is required."
+    #                                ]
+    #                  },
+    #                  ...
+    #                ]
+    #   }
+    #
+    # The value is +batch+.
+    def accumulate_batch batch
+      self << ["Status", "Resource", "ID", "Errors"] unless skip_column_names?
+      batch.each do |response|
+        status = response['status']
+        errors = response['errors']
+        if response['resource'] && errors.blank?
+          resource_type = response['resource'].keys.first
+          resource      = response['resource'][resource_type]
+          id            = resource['id']
+          self << [status, resource_type, id]
+        else
+          self << ([status, nil, nil] + errors)
+        end
+      end
+    end
+    # Accumulate lines for the results in +search+.
+    #
+    # The structure of the response looks like
+    #
+    #   {
+    #     'search' => {
+    #                   'results' => [
+    #                                  { 'dataset' => {...} },
+    #                                  { 'dataset' => {...} },
+    #                                  ...
+    #                                ]
+    #
+    #                 }
+    #   }
+    #
+    # The value keyed to +search+ is +search+.
+    def accumulate_search search
+      return if search['results'].blank?
+      self << self.class::RESOURCE_FIELDS unless skip_column_names?
+      search['results'].each { |resource| accumulate(resource) }
+    end
+    # Accumulate lines for the +api_account+.
+    #
+    # The structure of the response looks like
+    #
+    #   { 'api_account' => {
+    #                        'api_key' => ...,
+    #                        'owner'   => {
+    #                                       'username' => 'Infochimps',
+    #                                       ...
+    #                                     },
+    #                        'updated_at' => ...,
+    #                        ...
+    #                      }
+    #   }
+    #
+    # The value is +api_account+
+    def accumulate_api_account api_account
+      # FIXME this is sort of ugly...
+      self << "USERNAME:     #{api_account['owner']['username']}"
+      self << "API KEY:      #{api_account['api_key']}"
+      self << "LAST UPDATED: #{api_account['updated_at']}"
+    end
+  end
+end

data/lib/chimps/utils/error.rb ADDED Viewed

@@ -0,0 +1,40 @@
+module Chimps
+  # Base exception class for Chimps. All Chimps exceptions are
+  # subclasses of Chimps::Error so they can be easily caught.
+  Error = Class.new(StandardError)
+  # Raised when the user provides bad input on the command line.
+  CLIError = Class.new(Error)
+  # Raised when the user hasn't specified any API credentials or the
+  # server rejects the user's API credentials.
+  #
+  # Roughly corresponds to HTTP status code 401.
+  AuthenticationError = Class.new(Error)
+  # Raised when the Infochimps server response is unexpected or
+  # missing.
+  #
+  # Roughly corresponds to HTTP status code 5xx.
+  ServerError = Class.new(Error)
+  # Raised when IMW fails to properly package files to upload.
+  PackagingError = Class.new(Error)
+  # Raised when there is an error in uploading to S3 or in notifiying
+  # Infochimps of the new package.
+  UploadError = Class.new(Error)
+  # Raised when a subclass doesn't fails to implement required
+  # methods.
+  NotImplementedError = Class.new(Error)
+  # Raised when the response from Infochimps isn't well-formed or is
+  # unexpected.
+  ParseError = Class.new(Error)
+  # Raised when Chimps encounters response data it doesn't know how to
+  # pretty print.
+  PrintingError = Class.new(Error)
+end

data/lib/chimps/utils/extensions.rb ADDED Viewed

@@ -0,0 +1,109 @@
+def returning obj
+  yield obj
+  obj
+end
+class String
+  # Ruby 1.9 introduces an inherit argument for Module#const_get and
+  # #const_defined? and changes their default behavior.
+  if Module.method(:const_get).arity == 1
+    # Tries to find a constant with the name specified in the argument string:
+    #
+    #   "Module".constantize     # => Module
+    #   "Test::Unit".constantize # => Test::Unit
+    #
+    # The name is assumed to be the one of a top-level constant, no matter whether
+    # it starts with "::" or not. No lexical context is taken into account:
+    #
+    #   C = 'outside'
+    #   module M
+    #     C = 'inside'
+    #     C               # => 'inside'
+    #     "C".constantize # => 'outside', same as ::C
+    #   end
+    #
+    # NameError is raised when the name is not in CamelCase or the constant is
+    # unknown.
+    def constantize
+      names = split('::')
+      names.shift if names.empty? || names.first.empty?
+      constant = Object
+      names.each do |name|
+        constant = constant.const_defined?(name) ? constant.const_get(name) : constant.const_missing(name)
+      end
+      constant
+    end
+  else
+    def constantize
+      names = split('::')
+      names.shift if names.empty? || names.first.empty?
+      constant = Object
+      names.each do |name|
+        constant = constant.const_get(name, false) || constant.const_missing(name)
+      end
+      constant
+    end
+  end
+end
+class Object
+  # An object is blank if it's false, empty, or a whitespace string.
+  # For example, "", "   ", +nil+, [], and {} are blank.
+  #
+  # This simplifies
+  #
+  #   if !address.nil? && !address.empty?
+  #
+  # to
+  #
+  #   if !address.blank?
+  def blank?
+    respond_to?(:empty?) ? empty? : !self
+  end
+  # An object is present if it's not blank.
+  def present?
+    !blank?
+  end
+end
+class NilClass #:nodoc:
+  def blank?
+    true
+  end
+end
+class FalseClass #:nodoc:
+  def blank?
+    true
+  end
+end
+class TrueClass #:nodoc:
+  def blank?
+    false
+  end
+end
+class Array #:nodoc:
+  alias_method :blank?, :empty?
+end
+class Hash #:nodoc:
+  alias_method :blank?, :empty?
+end
+class String #:nodoc:
+  def blank?
+    self !~ /\S/
+  end
+end
+class Numeric #:nodoc:
+  def blank?
+    false
+  end
+end

data/lib/chimps/utils/uses_curl.rb ADDED Viewed

@@ -0,0 +1,26 @@
+module Chimps
+  module Utils
+    # A module which defines methods to interface with +curl+ via a
+    # system call.
+    module UsesCurl
+      def curl
+        `which curl`.chomp
+      end
+      # FIXME right now curl is the default but it really shouldn't be...
+      # def define_curl_options
+      #   on_tail("-c", "--curl", "Use curl instead of Ruby to upload package (faster)") do |c|
+      #     @curl = c
+      #   end
+      # end
+      # Should this use curl?
+      # def curl?
+      #   @curl
+      # end
+    end
+  end
+end

data/lib/chimps/utils/uses_model.rb ADDED Viewed

@@ -0,0 +1,51 @@
+module Chimps
+  module Utils
+    module UsesModel
+      def model
+        @model ||= self.class::MODELS.first
+      end
+      def plural_model
+        if model[-1].chr == 'y'
+          model[1..-1] + 'ies'
+        else
+          model + 's'
+        end
+      end
+      def model_identifier
+        raise CLIError.new("Must provide an ID or URL-escaped handle as the first argument") if argv.first.blank?
+        argv.first
+      end
+      def models_path
+        "#{plural_model}.json"
+      end
+      def model_path
+        "#{plural_model}/#{model_identifier}.json"
+      end
+      def model= model
+        raise CLIError.new("Invalid model: #{model}.  Must be one of #{models_string}") unless self.class::MODELS.include?(model)
+        @model = model
+      end
+      def models_string
+        returning(self.class::MODELS.dup) do |parts|
+          parts[0]   = "#{parts.first} (default)"
+          parts[-1]  = "or #{parts.last}"
+        end.join(', ')
+      end
+      def define_model_option
+        on_tail("-m", "--model MODEL", "Use a different resource, one of: #{models_string}") do |m|
+          self.model= m
+        end
+      end
+    end
+  end
+end