RubyGems - chimps - Versions diffs - 0.2.2 → 0.3.0 - Mend

chimps 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

data/Gemfile +3 -9
data/Gemfile.lock +14 -10
data/README.rdoc +146 -240
data/Rakefile +4 -33
data/VERSION +1 -1
data/lib/chimps/config.rb +35 -21
data/lib/chimps/{utils/error.rb → error.rb} +1 -12
data/lib/chimps/query_request.rb +67 -0
data/lib/chimps/request.rb +82 -108
data/lib/chimps/response.rb +62 -22
data/lib/chimps/utils/typewriter.rb +90 -0
data/lib/chimps/utils/uses_curl.rb +22 -12
data/lib/chimps/utils.rb +50 -6
data/lib/chimps/workflows/download.rb +72 -0
data/lib/chimps/workflows/upload.rb +113 -0
data/lib/chimps.rb +12 -12
data/spec/chimps/query_request_spec.rb +44 -0
data/spec/chimps/request_spec.rb +92 -0
data/spec/chimps/response_spec.rb +0 -1
data/spec/chimps/workflows/download_spec.rb +48 -0
data/spec/spec_helper.rb +2 -19
metadata +46 -91
data/.document +0 -5
data/.gitignore +0 -32
data/CHANGELOG.textile +0 -4
data/bin/chimps +0 -5
data/lib/chimps/cli.rb +0 -28
data/lib/chimps/commands/base.rb +0 -65
data/lib/chimps/commands/batch.rb +0 -40
data/lib/chimps/commands/create.rb +0 -31
data/lib/chimps/commands/destroy.rb +0 -26
data/lib/chimps/commands/download.rb +0 -46
data/lib/chimps/commands/help.rb +0 -100
data/lib/chimps/commands/list.rb +0 -41
data/lib/chimps/commands/query.rb +0 -82
data/lib/chimps/commands/search.rb +0 -48
data/lib/chimps/commands/show.rb +0 -30
data/lib/chimps/commands/test.rb +0 -39
data/lib/chimps/commands/update.rb +0 -34
data/lib/chimps/commands/upload.rb +0 -50
data/lib/chimps/commands.rb +0 -125
data/lib/chimps/typewriter.rb +0 -349
data/lib/chimps/utils/log.rb +0 -48
data/lib/chimps/utils/uses_model.rb +0 -34
data/lib/chimps/utils/uses_yaml_data.rb +0 -93
data/lib/chimps/workflows/batch.rb +0 -127
data/lib/chimps/workflows/downloader.rb +0 -102
data/lib/chimps/workflows/up.rb +0 -149
data/lib/chimps/workflows/upload/bundler.rb +0 -249
data/lib/chimps/workflows/upload/notifier.rb +0 -59
data/lib/chimps/workflows/upload/token.rb +0 -77
data/lib/chimps/workflows/upload/uploader.rb +0 -51
data/lib/chimps/workflows.rb +0 -12
data/spec/chimps/typewriter_spec.rb +0 -114
data/spec/chimps/workflows/upload/bundler_spec.rb +0 -75
data/spec/chimps/workflows/upload/token_spec.rb +0 -6

data/lib/chimps/utils/typewriter.rb ADDED Viewed

@@ -0,0 +1,90 @@
+module Chimps
+  module Utils
+    # There are two Chimpanzees using a typewriter.  One of them presses
+    # most of the keys and writes to $stdout.  The other only hits the
+    # spacebar and writes to $stderr.  He's crazy.
+    #
+    # These two Chimps together manage to line everything up just right.
+    class Typewriter < Array
+      # The response that this Typewriter will print.
+      attr_accessor :response
+      # Widths of columns as determined by the maximum number of
+      # characters in any row.
+      attr_accessor :column_widths
+      # Separates rows.
+      attr_accessor :row_separator
+      # Separates columns.
+      attr_accessor :column_separator
+      # Default row separator
+      ROW_SEPARATOR = "\n"
+      # Default columnn separator
+      COLUMN_SEPARATOR = "\t"
+      # FIXME
+      def spacer
+        2
+      end
+      # Return a Typewriter to print +response+.
+      #
+      # @param [Chimps::Response] response
+      # @return [Chimps::Typewriter]
+      def initialize response, options={}
+        super()
+        @response             = response
+        @column_widths        = {}
+        self.row_separator    = (options[:row_separator]    || ROW_SEPARATOR)
+        self.column_separator = (options[:column_separator] || COLUMN_SEPARATOR)
+        accumulate(response)
+      end
+      # Print the accumulated lines in this Typewriter.
+      #
+      # Will first calculate appropriate column widths for each line and
+      # then pad with spaces each entry so that the columns line up.
+      #
+      # The spaces are written to $stderr and the rest of the characters
+      # to $stdout.  This lets you pipe output from a Typewriter into
+      # other processes and preserve the TSV structure.
+      def print
+        $stdout.sync = true ; $stderr.sync = true
+        each do |row|
+          row.each_with_index do |entry, field|
+            $stdout.write entry
+            max_width = column_widths[field] + spacer
+            unless entry.size >= max_width
+              num_spaces = max_width - entry.size
+              pad = " " * num_spaces
+              $stderr.write(pad)
+            end
+          end
+          $stdout.write "\n"
+        end
+      end
+      # Accumulate lines to print from +string+.
+      #
+      # Updates internal width counters as it accumulates
+      #
+      # @param [Array, Hash, String] obj
+      def accumulate response
+        response.body.strip.split(row_separator).each do |line|
+          self << [].tap do |row|
+            line.split(column_separator).each_with_index do |entry, field|
+              column_widths[field] = entry.size if entry.size > (column_widths[field] || 0)
+              row << entry
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/chimps/utils/uses_curl.rb CHANGED Viewed

@@ -5,22 +5,32 @@ module Chimps
     # system call.
     module UsesCurl
-      def curl
+      def curl_program
         `which curl`.chomp
       end
-      # FIXME right now curl is the default but it really shouldn't be...
-      # def define_curl_options
-      #   on_tail("-c", "--curl", "Use curl instead of Ruby to upload package (faster)") do |c|
-      #     @curl = c
-      #   end
-      # end
-      # Should this use curl?
-      # def curl?
-      #   @curl
-      # end
+      # Curl invocations (specifically those that do S3 HTTP POST) are
+      # sometimes sensitive about the order of parameters.  Instead of
+      # a Hash we therefore take an Array of pairs here.
+      #
+      # @param [Array<Array<String>>] array
+      # @return [String]
+      def curl_params params
+        params.map do |param, value|
+          "-F #{param}='#{value}'"
+        end.join(' ')
+      end
+      def curl url, options={}
+        options = {:method => "GET", :output => '/dev/null', :params => []}.merge(options)
+        progress_meter = Chimps.verbose? ? '' : '-s -S'
+        command = "#{curl_program} #{progress_meter} -X #{options[:method]} -o #{options[:output]}"
+        command += " #{curl_params(options[:params])}" unless options[:params].empty?
+        command += " '#{url}'"
+        Chimps.log.info(command)
+        system(command)
+      end
     end
   end
 end

data/lib/chimps/utils.rb CHANGED Viewed

@@ -1,13 +1,57 @@
 require 'chimps/config'
-require 'chimps/utils/extensions'
-require 'chimps/utils/error'
-require 'chimps/utils/log'
+# require 'chimps/utils/extensions'
+require 'chimps/error'
 module Chimps
   module Utils
-    autoload :UsesCurl,     'chimps/utils/uses_curl'
-    autoload :UsesModel,    'chimps/utils/uses_model'
-    autoload :UsesYamlData, 'chimps/utils/uses_yaml_data'
+    autoload :UsesCurl,   'chimps/utils/uses_curl'
+    autoload :Typewriter, 'chimps/utils/typewriter'
+  end
+  # The Chimps logger.  Set via Chimps.config[:log] and defaults
+  # to $stdout.
+  #
+  # @return [Logger]
+  def self.log
+    @log ||= Log.new_logger
+  end
+  # Set the Chimps logger.
+  #
+  # @param [Logger] new_log
+  def self.log= new_log
+    @log = new_log
   end
+  # Module for initializing the Chimps logger from configuration
+  # settings.
+  module Log
+    # Initialize a new Logger instance with the log level set by
+    # Chimps.verbose?
+    #
+    # @return [Logger]
+    def self.new_logger
+      require 'logger'
+      Logger.new(log_file).tap do |log|
+        log.progname = "Chimps"
+        log.level    = Chimps.verbose? ? Logger::INFO : Logger::WARN
+      end
+    end
+    # Return either the path to the log file in Chimps.config[:log]
+    # or $stdout if the path is blank or equal to `-'.
+    #
+    # @return [String, $stdout] the path to the log or $stdout
+    def self.log_file
+      if Chimps.config[:log]
+        Chimps.config[:log].strip == '-' ? $stdout : Chimps.config[:log]
+      else
+        $stdout
+      end
+    end
+  end
 end

data/lib/chimps/workflows/download.rb ADDED Viewed

@@ -0,0 +1,72 @@
+module Chimps
+  # A download is composted of an initial POST request which obtains a
+  # signed and expiring token from Infochimps followed by a GET to the
+  # URL provided in the token.
+  class Download
+    # The slug or (ID) of the dataset to download
+    attr_accessor :dataset
+    # Provides the use of curl to download the file.
+    include Chimps::Utils::UsesCurl
+    # Create a new download for the dataset named by the given slug or
+    # ID.
+    #
+    # @param [String] dataset
+    def initialize dataset
+      self.dataset = dataset
+    end
+    # Download data to +path+.
+    #
+    # If +path+ is a directory then the resulting file will be put
+    # there with a basename determined sensibly from +signed_url+.
+    # Otherwise it will be placed at +path+ itself.
+    #
+    # @param [String] path
+    # @return [Integer] the exit code of the curl command used to download the data
+    def download path
+      if File.directory?(path)
+        basename = File.basename(signed_url).split('?').first
+        path     = File.join(path, basename)
+      end
+      curl signed_url, :output => path
+    end
+    # The request for obtaining a download token from Infochimps.
+    #
+    # @return [Chimps::Request]
+    def token_request
+      @token_request ||= Request.new("/datasets/#{dataset}/downloads", :sign_if_possible => true)
+    end
+    # A download token from Infochimps containing a signed URL from
+    # which data can be downloaded.
+    #
+    # @return [Chimps::Response]
+    def token
+      @token ||= token_request.post do |response, request, result, &block|
+        case response.code
+        when 301, 302, 307
+          response.follow_redirection(request, result, &block)
+        when 200
+          response.return!(request, result, &block)
+        else
+          raise Error.new("Could not obtain download token from Infochimps")
+        end
+      end
+    end
+    # Return the signed URL as parsed from the download token.
+    #
+    # @return [String] the token's signed URL
+    def signed_url
+      token.parse
+      raise Error.new("Malformed download token received from Infochimps") unless token['download_token'].is_a?(Hash) && token['download_token']['signed_url']
+      token['download_token']['signed_url']
+    end
+  end
+end

data/lib/chimps/workflows/upload.rb ADDED Viewed

@@ -0,0 +1,113 @@
+module Chimps
+  # An upload at Infochimps is a process attached to a dataset which
+  # carries a state.
+  #
+  # A dataset typically does not have an "upload" associated with it
+  # but anyone authorized to update the dataset can *create* an upload
+  # for it.  This upload object is empty by default.  You can submit
+  # files or links to upload.  When you're done you can submit the
+  # entire upload for processing.  You can view the status of the
+  # upload at any time.
+  class Upload
+    # The slug or (ID) of the dataset to upload for
+    attr_accessor :slug
+    # Gives the ability to use curl to upload local files.
+    include Chimps::Utils::UsesCurl
+    # Create a new Upload for the dataset with the given +slug+ or ID.
+    #
+    # @return [Chimps::Upload]
+    def initialize slug
+      self.slug = slug
+    end
+    # Show this upload.
+    #
+    # @return [Chimps::Response]
+    def show
+      follow_redirects_on :get, "/datasets/#{slug}/upload.yaml"
+    end
+    # Create this upload on Infochimps.
+    #
+    # @return [Chimps::Response]
+    def create
+      follow_redirects_on :post, "/datasets/#{slug}/upload.json", :body => true do |response, request, result, &block|
+        if response.code == 409
+          response              # upload already exists
+        else
+          response.return!(request, result, &block)
+        end
+      end
+    end
+    def update params={}
+      follow_redirects_on :put, "/datasets/#{slug}/upload.json", params
+    end
+    def upload_files *paths
+      paths.map { |p| File.expand_path(p) }.each do |path|
+        upload_file(upload_token)
+      end
+    end
+    def upload_token
+      follow_redirects_on :get, "/datasets/#{slug}/upload.json", :query => { :token => true }
+    end
+    def upload_file path, token
+      token.parse
+      p token
+      raise UploadError.new("#{path} does not exist")                          unless File.exist?(path)
+      raise UploadError.new("#{path} is a directory -- can only upload files") if File.directory?(path)
+      params = %w[AWSAccessKeyId acl key policy success_action_status signature].map do |param|
+        [param, token[param]]
+      end
+      params << ['file', '@' + path] # this is how you tell curl to upload a file
+      Chimps.log.info("Uploading #{path} for dataset #{slug}")
+      curl token['url'], :method => "POST", :params => params
+    end
+    def remove_files *uuids
+      follow_redirects_on :put, "/datasets/#{slug}/upload.json", :body => { :upload => { :remove_files => uuids }}
+    end
+    def create_links *links
+      follow_redirects_on :put, "/datasets/#{slug}/upload.json", :body => { :upload => { :add_links    => links }}
+    end
+    def remove_links *uuids
+      follow_redirects_on :put, "/datasets/#{slug}/upload.json", :body => { :upload => { :remove_links => uuids }}
+    end
+    def start
+      follow_redirects_on :put, "/datasets/#{slug}/upload.json", :query => { :submit => true }
+    end
+    def destroy
+      follow_redirects_on :delete, "/datasets/#{slug}/upload.json"
+    end
+    def restart
+      follow_redirects_on :delete, "/datasets/#{slug}/upload.json", :query => { :restart => true }
+    end
+    def follow_redirects_on method, url, options={}, &block
+      Request.new(url, {:sign => true}.merge(options)).send(method) do |response, request, result, &block|
+        if [301, 302, 307].include?(response.code)
+          response.follow_redirection(request, result, &block)
+        else
+          if response.code != 200 && block_given?
+            response.return!(request, result, &block)
+          else
+            response.return!(request, result)
+          end
+        end
+      end
+    end
+  end
+end

data/lib/chimps.rb CHANGED Viewed

@@ -1,29 +1,29 @@
 require 'rubygems'
+ENV["BUNDLE_GEMFILE"] ||= File.expand_path('../Gemfile', File.dirname(__FILE__))
 require 'bundler/setup'
+require 'chimps/config'
 require 'chimps/utils'
-# The Chimps module implements a Ruby-based command-line interface to
-# the Infochimps data repository.
+# The Chimps module provides classes which make making requests at
+# Infochimps easy.
 #
 # Using this tool you can search, download, edit, and upload data and
 # metadata to and from Infochimps.
 module Chimps
-  autoload :Config,       'chimps/config'
-  autoload :CLI,          'chimps/cli'
-  autoload :Command,      'chimps/commands/base'
-  autoload :Commands,     'chimps/commands'
   autoload :Request,      'chimps/request'
-  autoload :QueryRequest, 'chimps/request'
+  autoload :QueryRequest, 'chimps/query_request'
   autoload :Response,     'chimps/response'
-  autoload :Typewriter,   'chimps/typewriter'
-  autoload :Workflows,    'chimps/workflows'
+  autoload :Download,     'chimps/workflows/download'
+  autoload :Upload,       'chimps/workflows/upload'
   # Load and resolve configuration.
   def self.boot!
-    Config.read Config[:site_config] if Config[:site_config] && File.exist?(Config[:site_config])
-    Config.read Config[:config]      if Config[:config]      && File.exist?(Config[:config])
-    Config.resolve!
+    config.read config[:site_config] if config[:site_config] && File.exist?(config[:site_config])
+    config.read config[:config]      if config[:config]      && File.exist?(config[:config])
+    config.resolve!
+    config[:dataset] = config[:site] if (! config[:dataset]) && config[:site] # backwards compatibility
+    true
   end
 end

data/spec/chimps/query_request_spec.rb ADDED Viewed

@@ -0,0 +1,44 @@
+require File.expand_path('../spec_helper', File.dirname(__FILE__))
+describe Chimps::QueryRequest do
+  before do
+    Chimps.config[:query][:host]  = 'http://qubar.com'
+    Chimps.config[:query][:key]   = 'spec_key'
+  end
+  describe "generating the base URL with query string" do
+    it "should join the path to the Infochimps query host" do
+      Chimps::QueryRequest.new('/path/to/something').base_url.should == 'http://qubar.com/path/to/something'
+    end
+    it "should generate the same base URL regardless of whether the path has a leading '/' or not" do
+      Chimps::QueryRequest.new('/path/to/something').base_url.should == Chimps::QueryRequest.new('path/to/something').base_url
+    end
+  end
+  describe "generating the query string" do
+    it "should add apikey and requested_at params by default" do
+      qs = Chimps::QueryRequest.new('/path/to/something').query_string
+      qs.should     include('apikey')
+      qs.should     include('requested_at')
+      qs.should_not include('signature')
+    end
+    it "should properly URL encode the query string it generates" do
+      Chimps::QueryRequest.new('/path/to/something', :query_params => {:foo => 'bar baz'}).query_string.should include('foo=bar%20baz')
+    end
+    it "should raise an error if no credentials are available" do
+      Chimps.config[:query][:key] = nil
+      lambda { Chimps::QueryRequest.new('/path/to/something').query_string }.should raise_error(Chimps::AuthenticationError)
+    end
+    it "should allow setting a raw query string" do
+      Chimps::QueryRequest.new('/path/to/something', :query => 'foo=bar', :raw => true).query_string.should == 'foo=bar'
+    end
+  end
+end

data/spec/chimps/request_spec.rb ADDED Viewed

@@ -0,0 +1,92 @@
+require File.expand_path('../spec_helper', File.dirname(__FILE__))
+describe Chimps::Request do
+  before do
+    Chimps.config[:dataset][:host]   = 'http://foobar.com'
+    Chimps.config[:dataset][:key]    = 'spec_key'
+    Chimps.config[:dataset][:secret] = 'secret'
+  end
+  describe "generating the base URL with query string" do
+    it "should join the path to the Infochimps site host" do
+      Chimps::Request.new('/path/to/something').base_url.should == 'http://foobar.com/path/to/something'
+    end
+    it "should generate the same base URL regardless of whether the path has a leading '/' or not" do
+      Chimps::Request.new('/path/to/something').base_url.should == Chimps::Request.new('path/to/something').base_url
+    end
+  end
+  describe "generating the query string" do
+    it "should generate no query string by default" do
+      Chimps::Request.new('/path/to/something').query_string.should_not include('?')
+    end
+    it "should encode a Hash of query string parameters when given" do
+      Chimps::Request.new('/path/to/something', :query_params => {:foo => 'bar', :fuzz => 'booz'}).query_string.should == 'foo=bar&fuzz=booz'
+    end
+    it "should properly URL encode the query string it generates" do
+      Chimps::Request.new('/path/to/something', :query_params => {:foo => 'bar baz'}).query_string.should == 'foo=bar%20baz'
+    end
+    it "should sign the URL it generates if asked to" do
+      qs = Chimps::Request.new('/path/to/something', :sign => true).query_string
+      qs.should include('apikey')
+      qs.should include('requested_at')
+      qs.should include('signature')
+    end
+    it "should raise an error if asked to sign and no credentials are available" do
+      Chimps.config[:dataset][:key] = nil
+      lambda { Chimps::Request.new('/path/to/something', :sign => true).query_string }.should raise_error(Chimps::AuthenticationError)
+    end
+    it "should not raise an error if asked to sign_if_possible and no credentials are avialable" do
+      Chimps.config[:dataset][:key] = nil
+      lambda { Chimps::Request.new('/path/to/something', :sign_if_possible => true).query_string }.should_not raise_error(Chimps::AuthenticationError)
+    end
+    it "should allow setting a raw query string" do
+      Chimps::Request.new('/path/to/something', :query => 'foo=bar', :raw => true).query_string.should == 'foo=bar'
+    end
+  end
+  describe "generating the request body" do
+    it "should have no body by default" do
+      Chimps::Request.new('/path/to/something').body.should be_blank
+    end
+    it "should encode a Hash of parameters when given" do
+      Chimps::Request.new('/path/to/something', :body => { :foo => 'bar' }).encoded_body.should == '{"foo":"bar"}'
+    end
+    it "should sign the body when it exists" do
+      request = Chimps::Request.new('/path/to/something', :body => { :foo => 'bar'}, :sign => true)
+      request.should_receive(:sign).with('{"foo":"bar"}')
+      request.query_string
+    end
+    it "should allow setting a raw body" do
+      Chimps::Request.new('/path/to/something', :body => '{"foo": "bar"}', :raw => true).encoded_body.should == '{"foo": "bar"}'
+    end
+  end
+  describe "making a request" do
+    it "should swallow low-level networking errors" do
+      Chimps::Request.new('/some/made/up/path').get.code.should == 404
+    end
+    it "should swallow application-level errors" do
+      Chimps.config[:dataset][:host]   = 'http://www.infochimps.com'
+      Chimps::Request.new('/some/made/up/path').get.code.should == 404
+    end
+  end
+end

data/spec/chimps/response_spec.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 require File.join(File.dirname(__FILE__), '../spec_helper')
-require 'restclient'
 describe Chimps::Response do

data/spec/chimps/workflows/download_spec.rb ADDED Viewed

@@ -0,0 +1,48 @@
+require File.expand_path('../../spec_helper', File.dirname(__FILE__))
+describe Chimps::Download do
+  before do
+    @download = Chimps::Download.new('foobar')
+  end
+  describe "downloading" do
+    before do
+      @basename   = "data.tar.gz"
+      @signed_url = "http://bucket.aws.amazon.com/path/to/#{@basename}?this=is&aFake=SignedURL"
+      @download.stub!(:signed_url).and_return(@signed_url)
+    end
+    it "should write to a sensibly named file when given a directory" do
+      @download.should_receive(:curl).with(@signed_url, { :output => File.join('/tmp', @basename) })
+      @download.download('/tmp')
+    end
+    it "should write to a path when given a path" do
+      @download.should_receive(:curl).with(@signed_url, { :output => '/wukka/wukka.tar.gz' })
+      @download.download('/wukka/wukka.tar.gz')
+    end
+  end
+  describe "extracting a signed URL from a download token" do
+    before do
+      @token = {}
+      @token.stub!(:parse)
+      @download.stub!(:token).and_return(@token)
+    end
+    it "should raise an Error if the token doesn't have a signed URL " do
+      lambda { @download.signed_url }.should raise_error(Chimps::Error)
+      @token['download_token'] = {'foo' => 'bar'}
+      lambda { @download.signed_url }.should raise_error(Chimps::Error)
+    end
+    it "should return the signed URL from the token when present" do
+      @token['download_token'] = {'signed_url' => 'foobar'}
+      @download.signed_url.should == 'foobar'
+    end
+  end
+end

data/spec/spec_helper.rb CHANGED Viewed

@@ -4,27 +4,10 @@ CHIMPS_LIB_DIR  = File.join(CHIMPS_ROOT_DIR, 'lib')                         unle
 $: << CHIMPS_LIB_DIR
 require 'rubygems'
-require 'spec'
+require 'rspec'
 require 'chimps'
 Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each { |path| require path }
-module Chimps
-  module Test
-    TMP_DIR   = "/tmp/chimps_test" unless defined?(TMP_DIR)
-  end
-end
-Spec::Runner.configure do |config|
+RSpec.configure do |config|
   config.include Chimps::Test::CustomMatchers
-  config.before do
-    FileUtils.mkdir_p Chimps::Test::TMP_DIR
-    FileUtils.cd Chimps::Test::TMP_DIR
-  end
-  config.after do
-    FileUtils.rm_rf Chimps::Test::TMP_DIR
-  end
 end