RubyGems - chimps - Versions diffs - 0.2.2 → 0.3.0 - Mend

chimps 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

data/Gemfile +3 -9
data/Gemfile.lock +14 -10
data/README.rdoc +146 -240
data/Rakefile +4 -33
data/VERSION +1 -1
data/lib/chimps/config.rb +35 -21
data/lib/chimps/{utils/error.rb → error.rb} +1 -12
data/lib/chimps/query_request.rb +67 -0
data/lib/chimps/request.rb +82 -108
data/lib/chimps/response.rb +62 -22
data/lib/chimps/utils/typewriter.rb +90 -0
data/lib/chimps/utils/uses_curl.rb +22 -12
data/lib/chimps/utils.rb +50 -6
data/lib/chimps/workflows/download.rb +72 -0
data/lib/chimps/workflows/upload.rb +113 -0
data/lib/chimps.rb +12 -12
data/spec/chimps/query_request_spec.rb +44 -0
data/spec/chimps/request_spec.rb +92 -0
data/spec/chimps/response_spec.rb +0 -1
data/spec/chimps/workflows/download_spec.rb +48 -0
data/spec/spec_helper.rb +2 -19
metadata +46 -91
data/.document +0 -5
data/.gitignore +0 -32
data/CHANGELOG.textile +0 -4
data/bin/chimps +0 -5
data/lib/chimps/cli.rb +0 -28
data/lib/chimps/commands/base.rb +0 -65
data/lib/chimps/commands/batch.rb +0 -40
data/lib/chimps/commands/create.rb +0 -31
data/lib/chimps/commands/destroy.rb +0 -26
data/lib/chimps/commands/download.rb +0 -46
data/lib/chimps/commands/help.rb +0 -100
data/lib/chimps/commands/list.rb +0 -41
data/lib/chimps/commands/query.rb +0 -82
data/lib/chimps/commands/search.rb +0 -48
data/lib/chimps/commands/show.rb +0 -30
data/lib/chimps/commands/test.rb +0 -39
data/lib/chimps/commands/update.rb +0 -34
data/lib/chimps/commands/upload.rb +0 -50
data/lib/chimps/commands.rb +0 -125
data/lib/chimps/typewriter.rb +0 -349
data/lib/chimps/utils/log.rb +0 -48
data/lib/chimps/utils/uses_model.rb +0 -34
data/lib/chimps/utils/uses_yaml_data.rb +0 -93
data/lib/chimps/workflows/batch.rb +0 -127
data/lib/chimps/workflows/downloader.rb +0 -102
data/lib/chimps/workflows/up.rb +0 -149
data/lib/chimps/workflows/upload/bundler.rb +0 -249
data/lib/chimps/workflows/upload/notifier.rb +0 -59
data/lib/chimps/workflows/upload/token.rb +0 -77
data/lib/chimps/workflows/upload/uploader.rb +0 -51
data/lib/chimps/workflows.rb +0 -12
data/spec/chimps/typewriter_spec.rb +0 -114
data/spec/chimps/workflows/upload/bundler_spec.rb +0 -75
data/spec/chimps/workflows/upload/token_spec.rb +0 -6

data/lib/chimps/typewriter.rb DELETED Viewed

@@ -1,349 +0,0 @@
-module Chimps
-  # Responses from Infochimps (once parsed from the original JSON or
-  # YAML) consist of nested hashes:
-  #
-  #   { 'dataset' => {
-  #                    'title'       => 'My dataset',
-  #                    'description' => 'An amazing dataset which...',
-  #                    ...
-  #                    'sources' => {
-  #                                  'source' => {
-  #                                                'title' => 'Trustworthy Source'
-  #                                                ...
-  #                                              },
-  #                                  'source' => {..},
-  #                                  ...
-  #                                  }
-  #                   },
-  #     ...
-  #   }
-  #
-  # This class utilizes a typewriter and a team of trained chimpanizes
-  # to create pretty, line-oriented output from these hashes.
-  class Typewriter < Array
-    # The response that this Typewriter will print.
-    attr_accessor :response
-    # Widths of columns as determined by the maximum number of
-    # characters in any row.
-    attr_accessor :column_widths
-    # Fields to print for each resource.  Given as humanized names,
-    # will be automatically converted to key names.
-    RESOURCE_FIELDS = ["ID", "Cached Slug", "Updated At", "Title"]
-    # String to insert between fields in output.
-    FIELD_SEPARATOR = "    "
-    # Return a Typewriter to print +data+.
-    #
-    # @param [Chimps::Response] response
-    # @return [Chimps::Typewriter]
-    def initialize response, options={}
-      super()
-      @response          = response
-      @column_widths     = []
-      @skip_column_names = options[:skip_column_names]
-      accumulate(response)
-    end
-    # Print column names as well as values?
-    #
-    # @return [true, nil]
-    def skip_column_names?
-      @skip_column_names
-    end
-    # Print the accumulated lines in this Typewriter to the given
-    # +output+ (defaults to <tt>$stdout</tt>).
-    #
-    # Will first calculate appropriate column widths for any
-    # Array-like lines.
-    #
-    # @param [#puts] output
-    def print output=$stdout
-      calculate_column_widths!
-      each do |line|
-        if line.is_a?(Array)
-          output.puts pad_and_join(line)
-        else
-          output.puts line
-        end
-      end
-    end
-    # Accumulate lines to print from +obj+.
-    #
-    # If +obj+ is a string then it will be accumulated as a single
-    # line to print.
-    #
-    # If +obj+ is an Array then each element will be passed to
-    # Chimps::Typewriter#accumulate.
-    #
-    # If +obj+ is a Hash then each key will be mapped to a method
-    # <tt>accumulate_KEY</tt> and the corresponding value passed in.
-    # This method is responsible for accumulating lines to print.
-    #
-    # @param [Array, Hash, String] obj
-    def accumulate obj
-      case obj
-      when Hash
-        obj.each_pair do |resource_name, resource_data|
-          case
-          when %w[datasets sources licenses].include?(resource_name.to_s)
-            accumulate_listing(resource_data)
-          when %w[dataset source license].include?(resource_name.to_s)
-            accumulate_resource(resource_name, resource_data)
-          when %w[errors batch search api_account message].include?(resource_name.to_s)
-            send("accumulate_#{resource_name}", resource_data)
-          when %w[message].include?(resource_name.to_s)
-            self << [resource_data]
-          when %w[error].include?(resource_name.to_s)
-            nil
-          when :array  == resource_name         # constructed by Chimps::Response
-            accumulate_listing(resource_data)
-          when :string == resource_name         # constructed by Chimps::Response
-            self << obj[:string]
-          else
-            $stderr.puts resource_data.inspect if Chimps.verbose?
-            raise PrintingError.new("Unrecognized resource type `#{resource_name}'.")
-          end
-        end
-      when Array
-        obj.each { |element| accumulate(element) }
-      when String
-        self << obj
-      else
-        raise PrintingError.new("Cannot print a #{obj.class}")
-      end
-    end
-    protected
-    # Loop through the accumulated lines, finding the maximum widths
-    # of each element in each Array-like line.
-    def calculate_column_widths!
-      each do |line|
-        next unless line.is_a?(Array) # don't try to align strings
-        line.each_with_index do |value, field|
-          current_max_width = column_widths[field]
-          unless current_max_width
-            current_max_width = 0
-            column_widths << current_max_width
-          end
-          value_size = value.to_s.size
-          column_widths[field] = value_size if value_size > current_max_width
-        end
-      end
-    end
-    # Return a string with +values+ joined by FIELD_SEPARATOR each
-    # padded to the corresponding maximum column size.
-    #
-    # Must have called Chimps::Typewriter#calculate_column_widths!
-    # first.
-    #
-    # @param [Array] values
-    # @return [String]
-    def pad_and_join values
-      returning([]) do |padded_values|
-        values.each_with_index do |value, field|
-          max_width    = column_widths[field]
-          value_width  = value.to_s.size
-          padded_values << value.to_s + (' ' * (max_width - value_width))
-        end
-      end.join(FIELD_SEPARATOR)
-    end
-    # Accumulate lines for the given +resource_name+ from the given
-    # +resource_data+.
-    #
-    # Fields to accumulate in each line are set in
-    # Chimps::Typewriter::RESOURCE_FIELDS.
-    #
-    # The structure of the response for a resource looks like:
-    #
-    #   {
-    #     'dataset' => {
-    #                    'id'    => 39293,
-    #                    'title' => 'My Awesome Dataset',
-    #                    ...
-    #                  }
-    #   }
-    #
-    # The key is +resource_name+ and the value is +resource_data+.
-    #
-    # @param [String] resource_name
-    # @param [Hash] resource_data
-    def accumulate_resource resource_name, resource_data
-      self << self.class::RESOURCE_FIELDS.map { |field_name| resource_data[field_name.downcase.tr(' ', '_')] }
-    end
-    # Accumulate lines for each of the +resources+, all of the given
-    # +type+.
-    #
-    # The structure of the response for a listing looks like:
-    #
-    #   {
-    #     'datasets' => [
-    #                     {
-    #                       'dataset' => {
-    #                                    'id'    => 39293,
-    #                                    'title' => 'My Awesome Dataset',
-    #                                    ...
-    #                                    },
-    #                     },
-    #                     {
-    #                       'dataset' => {
-    #                                    'id'    => 28998,
-    #                                    'title' => 'My Other Awesome Dataset',
-    #                                    ...
-    #                                    },
-    #                     },
-    #                     ...
-    #                   ]
-    #   }
-    #
-    # The value is +resources+.
-    #
-    # @param [Array<Hash>] resources
-    def accumulate_listing resources
-      return if resources.blank?
-      self << self.class::RESOURCE_FIELDS unless skip_column_names?
-      resources.each { |resource| accumulate(resource) }
-    end
-    # Accumulate lines for each of the error messages in +errors+.
-    #
-    # The structure of the response looks like
-    #
-    #   {
-    #     'errors' => [
-    #                   "A title is required.",
-    #                   "A description is required.",
-    #                   ...
-    #                 ]
-    #   }
-    #
-    # The value is +errors+.
-    #
-    # @param [Array] errors
-    def accumulate_errors errors
-      errors.each do |error|
-        self << error
-      end
-    end
-    # Accumulate a line for the given +message+.
-    #
-    # The structure of the response from the Infochimps Query API on
-    # an error is:
-    #
-    #   {
-    #     'message' => "The error message returned"
-    #   }
-    #
-    # The value is +message+.
-    #
-    # @param [String] message
-    def accumulate_message message
-      self << message
-    end
-    # Accumulate lines for each of the batch responses in +batch+.
-    #
-    # The structure of the response looks like
-    #
-    #   {
-    #     'batch' => [
-    #                  {
-    #                    'status'   => 'created',
-    #                    'resource' => {
-    #                                   'dataset' => {
-    #                                                  'id'    => 39293,
-    #                                                  'title' => "My Awesome Dataset",
-    #                                                  ...
-    #                                                },
-    #                                 },
-    #                    'errors' => nil,
-    #                    'local_paths' => [...] # this is totally optional
-    #                  },
-    #                  {
-    #                    'status'  => 'invalid',
-    #                    'errors' => [
-    #                                  "A title is required.",
-    #                                  "A description is required."
-    #                                ]
-    #                  },
-    #                  ...
-    #                ]
-    #   }
-    #
-    # The value is +batch+.
-    def accumulate_batch batch
-      self << ["Status", "Resource", "ID", "Errors"] unless skip_column_names?
-      batch.each do |response|
-        status = response['status']
-        errors = response['errors']
-        if response['resource'] && errors.blank?
-          resource_type = response['resource'].keys.first
-          resource      = response['resource'][resource_type]
-          id            = resource['id']
-          self << [status, resource_type, id]
-        else
-          self << ([status, nil, nil] + errors)
-        end
-      end
-    end
-    # Accumulate lines for the results in +search+.
-    #
-    # The structure of the response looks like
-    #
-    #   {
-    #     'search' => {
-    #                   'results' => [
-    #                                  { 'dataset' => {...} },
-    #                                  { 'dataset' => {...} },
-    #                                  ...
-    #                                ]
-    #
-    #                 }
-    #   }
-    #
-    # The value keyed to +search+ is +search+.
-    def accumulate_search search
-      return if search['results'].blank?
-      self << self.class::RESOURCE_FIELDS unless skip_column_names?
-      search['results'].each { |resource| accumulate(resource) }
-    end
-    # Accumulate lines for the +api_account+.
-    #
-    # The structure of the response looks like
-    #
-    #   { 'api_account' => {
-    #                        'api_key' => ...,
-    #                        'owner'   => {
-    #                                       'username' => 'Infochimps',
-    #                                       ...
-    #                                     },
-    #                        'updated_at' => ...,
-    #                        ...
-    #                      }
-    #   }
-    #
-    # The value is +api_account+
-    def accumulate_api_account api_account
-      # FIXME this is sort of ugly...
-      self << "USERNAME:     #{api_account['owner']['username']}"
-      self << "API KEY:      #{api_account['apikey']}"
-      self << "LAST UPDATED: #{api_account['updated_at']}"
-    end
-  end
-end

data/lib/chimps/utils/log.rb DELETED Viewed

@@ -1,48 +0,0 @@
-module Chimps
-  # The Chimps logger.  Set via Chimps::Config[:log] and defaults
-  # to $stdout.
-  #
-  # @return [Logger]
-  def self.log
-    @log ||= Log.new_logger
-  end
-  # Set the Chimps logger.
-  #
-  # @param [Logger] new_log
-  def self.log= new_log
-    @log = new_log
-  end
-  # Module for initializing the Chimps logger from configuration
-  # settings.
-  module Log
-    # Initialize a new Logger instance with the log level set by
-    # Chimps.verbose?
-    #
-    # @return [Logger]
-    def self.new_logger
-      require 'logger'
-      returning(Logger.new(log_file)) do |log|
-        log.progname = "Chimps"
-        log.level    = Chimps.verbose? ? Logger::INFO : Logger::WARN
-      end
-    end
-    # Return either the path to the log file in Chimps::Config[:log]
-    # or $stdout if the path is blank or equal to `-'.
-    #
-    # @return [String, $stdout] the path to the log or $stdout
-    def self.log_file
-      if Chimps::Config[:log]
-        Chimps::Config[:log].strip == '-' ? $stdout : Chimps::Config[:log]
-      else
-        $stdout
-      end
-    end
-  end
-end

data/lib/chimps/utils/uses_model.rb DELETED Viewed

@@ -1,34 +0,0 @@
-module Chimps
-  module Utils
-    module UsesModel
-      def model
-        config[:model]
-      end
-      def plural_model
-        if model[-1].chr == 'y'
-          model[1..-1] + 'ies'
-        else
-          model + 's'
-        end
-      end
-      def model_identifier
-        raise CLIError.new("Must provide an ID or URL-escaped handle as the first argument") if config.argv.first.blank?
-        config.argv.first
-      end
-      def models_path
-        "#{plural_model}.json"
-      end
-      def model_path
-        "#{plural_model}/#{model_identifier}.json"
-      end
-    end
-  end
-end

data/lib/chimps/utils/uses_yaml_data.rb DELETED Viewed

@@ -1,93 +0,0 @@
-module Chimps
-  module Utils
-    module UsesYamlData
-      def ignore_yaml_files_on_command_line
-        false
-      end
-      def ignore_first_arg_on_command_line
-        false
-      end
-      def data
-        @data ||= merge_all(*(data_from_stdin + data_from_file + data_from_command_line)) || {}
-      end
-      protected
-      def merge_all *objs
-        objs.compact!
-        return if objs.blank?   # raising an error here is left to the caller
-        klasses = objs.map(&:class).uniq
-        raise CLIError.new("Mismatched YAML data types -- Hashes can only be combined with Hashes, Arrays with Arrays") if klasses.size > 1
-        data_type = klasses.first.new
-        case data_type
-        when Array
-          # greater precedence at the end so iterate in order
-          returning([]) do |d|
-            objs.each do |obj|
-              d.concat(obj)
-            end
-          end
-        when Hash
-          # greater precedence at the end so iterate in order
-          returning({}) do |d|
-            objs.each do |obj|
-              d.merge!(obj)
-            end
-          end
-        else raise CLIError.new("Incompatible YAML data type #{data_type} -- can only combine Hashes and Arrays")
-        end
-      end
-      def params_from_command_line
-        returning([]) do |d|
-          config.argv.each_with_index do |arg, index|
-            next if index == 0 && ignore_first_arg_on_command_line
-            next unless arg =~ /^(\w+) *=(.*)$/
-            name, value = $1.downcase.to_sym, $2.strip
-            d << { name => value } # always a hash
-          end
-        end
-      end
-      def yaml_files_from_command_line
-        returning([]) do |d|
-          config.argv.each_with_index do |arg, index|
-            next if index == 0 && ignore_first_arg_on_command_line
-            next if arg =~ /^(\w+) *=(.*)$/
-            path = File.expand_path(arg)
-            raise CLIError.new("No such path #{path}") unless File.exist?(path)
-            d << YAML.load(open(path)) # either a hash or an array
-          end
-        end
-      end
-      def data_from_command_line
-        if ignore_yaml_files_on_command_line
-          params_from_command_line
-        else
-          yaml_files_from_command_line + params_from_command_line
-        end
-      end
-      def data_from_file
-        [config[:data_file] ? YAML.load_file(File.expand_path(config[:data_file])) : nil]
-      end
-      def data_from_stdin
-        return [nil] unless $stdin.stat.size > 0
-        returning([]) do |d|
-          YAML.load_stream($stdin).each do |document|
-            d << document
-          end
-        end
-      end
-      def ensure_data_is_present!
-        raise CLIError.new("Must provide some data to send, either on the command line, from an input file, or by piping to STDIN.  Try `chimps help #{name}'") unless data.present?
-      end
-    end
-  end
-end

data/lib/chimps/workflows/batch.rb DELETED Viewed

@@ -1,127 +0,0 @@
-module Chimps
-  module Workflows
-    # A class for performing batch updates/uploads to Infochimps.
-    #
-    # It works by taking YAML data describing many updates and
-    # performing a single batch API request with this data.
-    #
-    # The batch response is then parsed and analyzed and (given
-    # success or fearlessness) any necessary uploads are performed.
-    #
-    # Examples of the input data format can be found in the
-    # <tt>/examples</tt> directory of the Chimps distribution.
-    class BatchUpdater
-      # The data used sent as a bulk update.
-      attr_reader :data
-      # The batch update response
-      attr_reader :batch_response
-      # The output file to store the bulk update response.
-      attr_reader :output_path
-      # Whether to upload even if there were errors on update.
-      attr_reader :upload_even_if_errors
-      # The data format to annotate the upload with.
-      #
-      # Chimps will try to guess if this isn't given.
-      attr_reader :fmt
-      # Create a new BatchUpdater with the given +data+ and +options+.
-      #
-      # The intermediate batch response can be saved at a file named
-      # by <tt>:output_path</tt>, though this isn't necessary.
-      #
-      # @param [Array] data an array of resource updates
-      # @param [Hash] options
-      # @option options [String] output_path path to store the batch response
-      # @option options [true, false] upload_even_if_errors whether to continue uploading in the presence of errors on update
-      # @option options [String] fmt the data format to annotate each upload with (see `chimps upload')
-      # @return [Chimps::Workflows::BatchUpdater]
-      def initialize data, options={}
-        @data                  = data
-        @output_path           = options[:output_path]
-        @upload_even_if_errors = options[:upload_even_if_errors]
-        @fmt                   = options[:fmt]
-      end
-      # The path to submit batch update requests.
-      #
-      # @return [String]
-      def batch_path
-        "batch.json"
-      end
-      # Perform this batch update followed by the batch upload.
-      def execute!
-        batch_update!
-        batch_upload!
-      end
-      # Perform the batch update.
-      def batch_update!
-        @batch_response = Request.new(batch_path, :data => { :batch => data }, :authenticate => true).post
-        File.open(output_path, 'w') { |f| f.puts batch_response.body } if output_path
-        batch_response.print
-      end
-      # Were any of the updates performed during the batch update
-      # errors?
-      #
-      # @return [true, false]
-      def error?
-        batch_response['batch'].each do |response|
-          status = response['status']
-          return true unless ['created', 'updated'].include?(status)
-        end
-        false
-      end
-      # Did all of the updates performed in the batch update succeed?
-      #
-      # @return [true, false]
-      def success?
-        ! error?
-      end
-      # Perform the batch upload.
-      #
-      # Will bail if the batch update had an error unless
-      # Chimps::Workflows::BatchUpdater#upload_even_if_errors returns
-      # true.
-      def batch_upload!
-        return unless success? || upload_even_if_errors
-        $stderr.puts("WARNING: continuing with uploads even though there were errors") unless success?
-        dataset_ids_and_local_paths.each do |id, local_paths|
-          Chimps::Workflows::Uploader.new(:dataset => id, :local_paths => local_paths, :fmt => fmt).execute!
-        end
-      end
-      protected
-      # Iterate through the batch response and return tuples
-      # consisting of an ID and an array of of local paths to upload.
-      #
-      # Only datasets which were successfully created/updated,
-      # returned an ID, and had local_paths defined in the original
-      # batch update will be output.
-      #
-      # @return [Array<Array>]
-      def dataset_ids_and_local_paths
-        batch_response['batch'].map do |response|
-          status = response['status']
-          next unless (status == 'created' || status == 'updated') # skip errors
-          next unless dataset = response['resource']['dataset']    # skip unless it's a dataset
-          id = dataset['id']
-          next if id.blank?                                        # skip unless it has an ID
-          local_paths = response['local_paths']
-          next if local_paths.blank?                               # skip unless local_paths were defined
-          [id, local_paths]
-        end.compact
-      end
-    end
-  end
-end