RubyGems - chimps - Versions diffs - 0.2.2 → 0.3.0 - Mend

chimps 0.2.2 → 0.3.0

Files changed (56) hide show

data/Gemfile +3 -9
data/Gemfile.lock +14 -10
data/README.rdoc +146 -240
data/Rakefile +4 -33
data/VERSION +1 -1
data/lib/chimps/config.rb +35 -21
data/lib/chimps/{utils/error.rb → error.rb} +1 -12
data/lib/chimps/query_request.rb +67 -0
data/lib/chimps/request.rb +82 -108
data/lib/chimps/response.rb +62 -22
data/lib/chimps/utils/typewriter.rb +90 -0
data/lib/chimps/utils/uses_curl.rb +22 -12
data/lib/chimps/utils.rb +50 -6
data/lib/chimps/workflows/download.rb +72 -0
data/lib/chimps/workflows/upload.rb +113 -0
data/lib/chimps.rb +12 -12
data/spec/chimps/query_request_spec.rb +44 -0
data/spec/chimps/request_spec.rb +92 -0
data/spec/chimps/response_spec.rb +0 -1
data/spec/chimps/workflows/download_spec.rb +48 -0
data/spec/spec_helper.rb +2 -19
metadata +46 -91
data/.document +0 -5
data/.gitignore +0 -32
data/CHANGELOG.textile +0 -4
data/bin/chimps +0 -5
data/lib/chimps/cli.rb +0 -28
data/lib/chimps/commands/base.rb +0 -65
data/lib/chimps/commands/batch.rb +0 -40
data/lib/chimps/commands/create.rb +0 -31
data/lib/chimps/commands/destroy.rb +0 -26
data/lib/chimps/commands/download.rb +0 -46
data/lib/chimps/commands/help.rb +0 -100
data/lib/chimps/commands/list.rb +0 -41
data/lib/chimps/commands/query.rb +0 -82
data/lib/chimps/commands/search.rb +0 -48
data/lib/chimps/commands/show.rb +0 -30
data/lib/chimps/commands/test.rb +0 -39
data/lib/chimps/commands/update.rb +0 -34
data/lib/chimps/commands/upload.rb +0 -50
data/lib/chimps/commands.rb +0 -125
data/lib/chimps/typewriter.rb +0 -349
data/lib/chimps/utils/log.rb +0 -48
data/lib/chimps/utils/uses_model.rb +0 -34
data/lib/chimps/utils/uses_yaml_data.rb +0 -93
data/lib/chimps/workflows/batch.rb +0 -127
data/lib/chimps/workflows/downloader.rb +0 -102
data/lib/chimps/workflows/up.rb +0 -149
data/lib/chimps/workflows/upload/bundler.rb +0 -249
data/lib/chimps/workflows/upload/notifier.rb +0 -59
data/lib/chimps/workflows/upload/token.rb +0 -77
data/lib/chimps/workflows/upload/uploader.rb +0 -51
data/lib/chimps/workflows.rb +0 -12
data/spec/chimps/typewriter_spec.rb +0 -114
data/spec/chimps/workflows/upload/bundler_spec.rb +0 -75
data/spec/chimps/workflows/upload/token_spec.rb +0 -6

data/lib/chimps/workflows/downloader.rb DELETED Viewed

@@ -1,102 +0,0 @@
-module Chimps
-  module Workflows
-    # Downloads data from Infochimps by first making a request for a
-    # download token and, if granted one, proceeding to download the
-    # data.
-    #
-    # Will download the latest package for a given dataset, optionally
-    # constrained to have given data and package formats.
-    class Downloader
-      include Chimps::Utils::UsesCurl
-      # The token received from Infochimps which contains a signed URL
-      # for the download.
-      attr_reader :token
-      # The ID or handle of the dataset to download.
-      attr_reader :dataset
-      # The data format of the data to download.
-      attr_reader :fmt
-      # The package format of the data to download.
-      attr_reader :pkg_fmt
-      # Create a new Downloader with the given parameters.
-      #
-      # @param [Hash] options
-      # @option options [String, Integer] dataset the ID or handle of the dataset to download
-      # @option options [String] fmt the data format to download
-      # @option options [String] pkg_fmt the package format to download
-      # @option options [String] local_path the local path to which the data will be downloaded
-      # @return [Chimps::Workflows::Downloader]
-      def initialize options={}
-        @dataset    = options[:dataset]
-        @fmt        = options[:fmt]
-        @pkg_fmt    = options[:pkg_fmt]
-        @local_path = options[:local_path]
-      end
-      # Params to send for the token.
-      #
-      # @return [Hash]
-      def token_params
-        { :download_token => { :dataset_id => dataset, :fmt =>  fmt, :pkg_fmt => pkg_fmt} }
-      end
-      # Ask for a download token for this dataset/package.  If no or
-      # an invalid token is obtained, raise an error.
-      def ask_for_token!
-        new_token = Request.new(download_tokens_path, :data => token_params, :sign_if_possible => true).post
-        if new_token.error?
-          new_token.print
-          raise AuthenticationError.new("Unauthorized to download dataset #{dataset}")
-        else
-          @token = new_token
-        end
-      end
-      # Path to submit download token requests to.
-      #
-      # @return [String]
-      def download_tokens_path
-        "/download_tokens"
-      end
-      # The signed, remote URL from where the data can be downloaded.
-      #
-      # @return [String]
-      def download_url
-        token['download_token']['package']['url']
-      end
-      # The local path where the downloaded data will be put.
-      #
-      # Defaults to the current directory and the default basename of
-      # the downloaded package.
-      #
-      # @return [String, nil]
-      def local_path
-        @local_path || token["download_token"]["package"]["basename"]
-      end
-      # Issue the download request.
-      #
-      # Uses +curl+ for the data transfer.
-      def download!
-        command = "#{curl} -o '#{local_path}' '#{download_url}'"
-        puts command if Chimps.verbose?
-        system(command)
-      end
-      # Ask for a token and perform the download.
-      def execute!
-        ask_for_token!
-        download!
-      end
-    end
-  end
-end

data/lib/chimps/workflows/up.rb DELETED Viewed

@@ -1,149 +0,0 @@
-module Chimps
-  module Workflows
-    # A namespace for classes which handle each step of the
-    # BundleAndUpload workflow.
-    module Upload
-      autoload :UploadToken, 'chimps/workflows/upload/token'
-      autoload :Bundler,     'chimps/workflows/upload/bundler'
-      autoload :Uploader,    'chimps/workflows/upload/uploader'
-      autoload :Notifier,    'chimps/workflows/upload/notifier'
-    end
-    # Uploads data to Infochimps by first asking for authorization,
-    # creating an archive, obtaining a token, uploading data, and
-    # notifing Infochimps.
-    #
-    # A helper object from Chimps::Workflows::Upload is delegated to
-    # for each step:
-    #
-    # - authorization & obtaining a token: Chimps::Workflows::Upload::UploadToken
-    # - creating an archive: Chimps::Workflows::Upload::Bundler
-    # - uploading data: Chimps::Workflows::Upload::Uploader
-    # - notifying Infochimps: Chimps::Workflows::Upload::Notifier
-    class Up
-      # The ID or handle of the dataset to download.
-      attr_accessor :dataset
-      # An array of paths to files and directories to package into an
-      # archive.
-      attr_accessor :paths
-      # The format to annotate the upload with.
-      attr_accessor :fmt
-      # The path to the archive to create when uploading.
-      attr_accessor :archive
-      # Create a new Uploader from the given parameters.
-      #
-      # If <tt>:fmt</tt> is provided it will be used as the data
-      # format to annotate the upload with.  If not, Chimps will try
-      # to guess.
-      #
-      # @param [Hash] options
-      # @option options [String, Integer] dataset the ID or handle of the dataset to which data should be uploaded
-      # @option options [Array<String>] paths the paths to aggregate and upload
-      # @option options [String, IMW::Resource] archive (IMW::Workflows::Downloader#default_archive_path) the path to the archive to create
-      # @option options [String] fmt the data format to annotate the upload with
-      def initialize options={}
-        self.dataset = options[:dataset] or raise PackagingError.new("Must provide the ID or handle of a dataset to upload data to.")
-        self.paths   = options[:paths]
-        self.archive = options[:archive]
-        self.fmt     = options[:fmt]
-      end
-      # Upload data to Infochimps by first asking for authorization,
-      # creating an archive, obtaining a token, uploading data, and
-      # notifing Infochimps.
-      def execute!
-        authorize_for_upload!
-        bundle!
-        ask_for_token!
-        upload!
-        notify_infochimps!
-      end
-      #
-      # == Helper Objects ==
-      #
-      # The token authorizing an upload.
-      #
-      # @return [Chimps::Workflows::Upload::UploadToken]
-      def authorization_token
-        @authorization_token ||= Chimps::Workflows::Upload::UploadToken.new(dataset)
-      end
-      # The bundler that will aggregate data for the upload.
-      #
-      # @return [Chimps::Workflows::Upload::Bundler]
-      def bundler
-        @bundler ||= Chimps::Workflows::Upload::Bundler.new(dataset, paths, :fmt => fmt, :archive => archive)
-      end
-      # The token consumed for an upload.
-      #
-      # @return [Chimps::Workflows::Upload::UploadToken]
-      def upload_token
-        @upload_token ||= Chimps::Workflows::Upload::UploadToken.new(dataset, :fmt => bundler.fmt, :pkg_fmt => bundler.pkg_fmt)
-      end
-      # The uploader that will actually send data to Infochimps.
-      #
-      # @return [Chimps::Workflows::Upload::Uploader]
-      def uploader
-        @uploader ||= Chimps::Workflows::Upload::Uploader.new(upload_token, bundler)
-      end
-      # The notifier that will inform Infochimps of the new data.
-      #
-      # @return [Chimps::Workflows::Upload::Notifer]
-      def notifier
-        @notifier ||= Chimps::Workflows::Upload::Notifier.new(upload_token, bundler)
-      end
-      #
-      # == Actions ==
-      #
-      # Authorize the Chimps user for this upload.
-      #
-      # Delegates to Chimps::Workflows::Upload::UploadToken
-      def authorize_for_upload!
-        authorization_token.get
-      end
-      # Bundle the data together.
-      #
-      # Delegates to Chimps::Workflows::Upload::Bundler
-      def bundle!
-        bundler.bundle!
-      end
-      # Obtain an upload token from Infochimps.
-      #
-      # Delegates to Chimps::Workflows::Upload::UploadToken
-      def ask_for_token!
-        upload_token.get
-      end
-      # Upload the data to Infochimps.
-      #
-      # Delegates to Chimps::Workflows::Upload::Uploader
-      def upload!
-        uploader.upload!
-      end
-      # Make a final POST request to Infochimps, creating the final
-      # resource.
-      #
-      # @return [Chimps::Response]
-      def notify_infochimps!
-        notifier.post
-      end
-    end
-  end
-end

data/lib/chimps/workflows/upload/bundler.rb DELETED Viewed

@@ -1,249 +0,0 @@
-module Chimps
-  module Workflows
-    module Upload
-      # Encapsulates the process of analyzing and bundling input
-      # paths.
-      class Bundler
-        #
-        # == Initialization & Attributes
-        #
-        # Instantiate a new Bundler for bundling +paths+ as a package
-        # for +dataset+.
-        #
-        # Each input path can be either a String or an IMW::Resource
-        # identifying a local or remote resource to bundle into an
-        # upload package for Infochimps (remote resources will be
-        # first copied to the local filesystem by IMW).
-        #
-        # If no format is given the format will be guessed by IMW.
-        #
-        # If not archive is given the archive path will be set to a
-        # timestamped named in the current directory, see
-        # Bundler#default_archive_path.
-        #
-        # @param [String, Integer] dataset the ID or slug of an existing Infochimps dataset
-        # @param [Array<String, IMW::Resource>] paths
-        # @param [Hash] options
-        # @option options [String] fmt the format (csv, tsv, xls, &c.) of the data being uploaded
-        # @option options [String, IMW::Resource] archive the path to the local archive to package the input paths into
-        def initialize dataset, paths, options={}
-          require_imw
-          @dataset     = dataset
-          self.paths   = paths
-          if options[:fmt]
-            self.fmt     = options[:fmt]
-          end
-          if options[:archive]
-            self.archive = options[:archive]
-          end
-        end
-        # The dataset this bundler is processing data for.
-        attr_accessor :dataset
-        # The paths this bundler is processing.
-        attr_reader :paths
-        # The resources this bundler is processing.
-        #
-        # Resources are IMW::Resource objects built from this
-        # Bundler's paths.
-        attr_reader :resources
-        # Set the paths for this Bundler.
-        #
-        # If only one input path is given and it is already an archive
-        # or a compressed file then no packaging will be attempted.
-        # Otherwise the input paths will be packaged together
-        #
-        # @param [Array<String, IMW::Resource>] new_paths
-        def paths= new_paths
-          raise PackagingError.new("Must provide at least one path to upload.") if new_paths.blank?
-          @paths, @resources = [], []
-          new_paths.each do |path|
-            resource = IMW.open(path)
-            resource.should_exist!("Cannot bundle.") if resource.is_local?
-            @paths     << path
-            @resources << resource
-          end
-          if resources.size == 1
-            potential_package = resources.first
-            if potential_package.is_local? && potential_package.exist? && (potential_package.is_compressed? || potential_package.is_archive?)
-              self.archive = potential_package
-              @skip_packaging = true
-            end
-          end
-        end
-        # The format of the data being bundled.
-        attr_writer :fmt
-        # The format of the data being bundled.
-        #
-        # Will make a guess using IMW::Tools::Summarizer if no format
-        # is given.
-        def fmt
-          @fmt ||= summarizer.most_common_data_format
-        end
-        # The archive this bundler will build for uploading to
-        # Infochimps.
-        #
-        # @return [IMW::Resource]
-        def archive
-          return @archive if @archive
-          self.archive = default_archive_path
-          self.archive
-        end
-        # Set the path to the archive that will be built.
-        #
-        # The given +path+ must represent a compressed file or archive
-        # (<tt>.tar</tt>, <tt>.tar.gz.</tt>, <tt>.tar.bz2</tt>,
-        # <tt>.zip</tt>, <tt>.rar</tt>, <tt>.bz2</tt>, or <tt>.gz</tt>
-        # extension).
-        #
-        # Additionally, if multiple local paths are being packaged, the
-        # given +path+ must be an archive (not simply <tt>.bz2</tt> or
-        # <tt>.gz</tt> extensions).
-        #
-        # @param [String, IMW::Resource] path_or_obj the obj or IMW::Resource object pointing to the archive to use
-        def archive= path_or_obj
-          potential_package = IMW.open(path_or_obj)
-          raise PackagingError.new("Invalid path #{potential_package}, not an archive or compressed file")        unless potential_package.is_compressed? ||  potential_package.is_archive?
-          raise PackagingError.new("Multiple local paths must be packaged in an archive, not a compressed file.") if     resources.size > 1               && !potential_package.is_archive?
-          @archive = potential_package
-        end
-        # Return the package format of this bundler's archive, i.e. -
-        # its extension.
-        #
-        # @return [String]
-        def pkg_fmt
-          archive.extension
-        end
-        # Return the total size of the package after aggregating and
-        # packaging.
-        #
-        # @return [Integer]
-        def size
-          archive.size
-        end
-        # Return summary information about the package prepared by the
-        # bundler.
-        #
-        # @return [Hash]
-        def summary
-          summarizer.summary
-        end
-        # Bundle the data for this bundler together.
-        def bundle!
-          return if skip_packaging?
-          result = archiver.package(archive.path)
-          raise PackagingError.new("Unable to package files for upload.  Temporary files left in #{archiver.tmp_dir}") if result.is_a?(StandardError) || (!archiver.success?)
-          archiver.clean!
-        end
-        #
-        # == Helper Objects ==
-        #
-        # The IMW::Tools::Archiver responsible for packaging files
-        # into a local archive.
-        #
-        # @return [IMW::Tools::Archiver]
-        def archiver
-          @archiver ||= IMW::Tools::Archiver.new(archive.name, paths_to_bundle)
-        end
-        # Return the summarizer responsible for summarizing data on this
-        # upload.
-        #
-        # @return [IMW::Tools::Summarizer]
-        def summarizer
-          @summarizer ||= IMW::Tools::Summarizer.new(resources)
-        end
-        # Should the packaging step be skipped?
-        #
-        # This will happen if only one local input path was provided and
-        # it exists and is a compressed file or archive.
-        #
-        # @return [true, false]
-        def skip_packaging?
-          !! @skip_packaging
-        end
-        #
-        # == Paths & URLs ==
-        #
-        # The default path to the archive that will be built.
-        #
-        # Defaults to a file in the current directory named after the
-        # +dataset+'s ID or handle and the current time.  The package
-        # format (<tt>.zip</tt> or <tt>.tar.bz2</tt>) is determined by
-        # size, see
-        # Chimps::Workflows::Uploader#default_archive_extension.
-        #
-        # @return [String]
-        def default_archive_path
-          # in current working directory...
-          "chimps_#{dataset}-#{Time.now.strftime(Chimps::Config[:timestamp_format])}.#{default_archive_extension}"
-        end
-        # end <tt>zip</tt> if the data is less than 500 MB in size and
-        # <tt>tar.bz2</tt> otherwise.
-        #
-        # @return ['tar.bz2', 'zip']
-        def default_archive_extension
-          summarizer.total_size >= 524288000 ? 'tar.bz2' : 'zip'
-        end
-        # The URL to the <tt>README-infochimps</tt> file on Infochimps'
-        # servers.
-        #
-        # @return [String]
-        def readme_url
-          File.join(Chimps::Config[:site][:host], "/README-infochimps")
-        end
-        # The URL to the ICSS file for this dataset on Infochimps
-        # servers
-        def icss_url
-          File.join(Chimps::Config[:site][:host], "datasets", "#{dataset}.yaml")
-        end
-        # Both the local paths and remote paths to package.
-        #
-        # @return [Array<String>]
-        def paths_to_bundle
-          paths + [readme_url, icss_url]
-        end
-        protected
-        # Require IMW and match the IMW logger to the Chimps logger.
-        def require_imw
-          begin
-            require 'imw'
-            IMW.log = Chimps.log
-            IMW.verbose = Chimps.verbose?
-          rescue LoadError
-            raise Chimps::Error.new("The Infinite Monkeywrench (IMW) gem is required to upload.")
-          end
-        end
-      end
-    end
-  end
-end

data/lib/chimps/workflows/upload/notifier.rb DELETED Viewed

@@ -1,59 +0,0 @@
-module Chimps
-  module Workflows
-    module Upload
-      # Encapsulates the process of notifying Infochimps of new data
-      # that's already been uploaded.
-      class Notifier
-        # The response from Infochimps to the request to create a
-        # package.
-        attr_accessor :response
-        # The upload token used for the upload.
-        attr_accessor :token
-        # The bundler responsible for the upload.
-        attr_accessor :bundler
-        def initialize token, bundler
-          self.token   = token
-          self.bundler = bundler
-        end
-        # The path on Infochimps to submit package creation requests
-        # to.
-        #
-        # @return [String]
-        def path
-          "/datasets/#{bundler.dataset}/packages.json"
-        end
-        # Information about the uplaoded data to pass to Infochimps
-        # when notifying.
-        #
-        # @return [Hash]
-        def data
-          { :package => {:fmt => token['fmt'], :pkg_size => bundler.size, :pkg_fmt => bundler.pkg_fmt, :raw_summary => bundler.summary, :token_timestamp => token['timestamp'] } }
-        end
-        # Make a request to notify Infochimps of the new data.
-        #
-        # @return [Chimps::Response]
-        def post
-          @response = Request.new(path, :signed => true, :data => data).post
-          if response.error?
-            response.print
-            raise UploadError.new("Unable to notify Infochimps of newly uploaded data.")
-          end
-          response
-        end
-      end
-    end
-  end
-end

data/lib/chimps/workflows/upload/token.rb DELETED Viewed

@@ -1,77 +0,0 @@
-module Chimps
-  module Workflows
-    module Upload
-      # Encapsulates the process of obtaining an upload token for a
-      # dataset from Infochimps.
-      class UploadToken
-        # The ID or slug of the dataset for which to obtain an upload
-        # token.
-        attr_accessor :dataset
-        # The format (csv, xls, tsv, &c.) of the data in the upload.
-        attr_accessor :fmt
-        # The package format (zip, tar.bz2, &c.)  of the data in the
-        # upload.
-        attr_accessor :pkg_fmt
-        # The response from Infochimps to the request for an upload
-        # token.
-        attr_accessor :response
-        # Instantiate a new UploadToken for the given +dataset+ with
-        # the given +fmt+ and +pkg_fmt+.
-        #
-        # @param [String,Integer] dataset the ID or slug of the dataset to upload data for
-        # @param [String] fmt the data format (csv, xls, tsv, &c.) of the data
-        # @param [String] pkg_fmt the package format (zip, tar.bz2, tar.gz, &c.) of the data
-        def initialize dataset, options={}
-          @dataset = dataset
-          @fmt     = options[:fmt]
-          @pkg_fmt = options[:pkg_fmt]
-        end
-        # Delegate slicing to the returned response.
-        def [] param
-          response && response[param]
-        end
-        # The path on Infochimps to submit upload token requests to.
-        #
-        # @return [String]
-        def path
-          "/datasets/#{dataset}/packages/new.json"
-        end
-        # Parameters passed to Infochimps to request an upload token.
-        #
-        # @return [Hash]
-        def params
-          { :package => { :fmt => fmt, :pkg_fmt => pkg_fmt } }
-        end
-        # Make the request to get an upload token from Infochimps
-        def get
-          @response = Request.new(path, :params => params, :signed => true).get
-          if response.error?
-            response.print
-            raise AuthenticationError.new("Unauthorized for an upload token for dataset #{dataset}")
-          end
-        end
-        # Parses the 'url' property of the response from Infochimps to
-        # determine the bucket name.
-        #
-        # @return [String]
-        def bucket
-          File.basename(response['url'])
-        end
-      end
-    end
-  end
-end

data/lib/chimps/workflows/upload/uploader.rb DELETED Viewed

@@ -1,51 +0,0 @@
-module Chimps
-  module Workflows
-    module Upload
-      # Encapsulates the process of uploading a package to Infochimps.
-      class Uploader
-        include Chimps::Utils::UsesCurl
-        # The token consumed when uploading.
-        attr_accessor :token
-        # The bundler from which to glean information about the upload.
-        attr_accessor :bundler
-        # Instantiate a new Uploader which will consume the given
-        # +token+ and upload data from the given +bundler+.
-        #
-        # @param [Chimps::Workflows::Upload::UploadToken] token
-        # @param [Chimps::Workflows::Upload::Bundler] bundler
-        def initialize token, bundler
-          self.token   = token
-          self.bundler = bundler
-        end
-        # Return a string built from the granted upload token that can
-        # be fed to +curl+ in order to authenticate with and upload to
-        # Amazon.
-        #
-        # @return [String]
-        def upload_data
-          data = ['AWSAccessKeyId', 'acl', 'key', 'policy', 'success_action_status', 'signature'].map { |param| "-F #{param}='#{token[param]}'" }
-          data << ["-F file=@#{bundler.archive.path}"]
-          data.join(' ')
-        end
-        # Upload the data.
-        #
-        # Uses +curl+ for the transfer.
-        def upload!
-          progress_meter = Chimps.verbose? ? '' : '-s -S'
-          command = "#{curl} #{progress_meter} -o /dev/null -X POST #{upload_data} #{token['url']}"
-          puts command if Chimps.verbose?
-          raise UploadError.new("Failed to upload #{bundler.archive.path} to Infochimps") unless system(command)
-        end
-      end
-    end
-  end
-end

data/lib/chimps/workflows.rb DELETED Viewed

@@ -1,12 +0,0 @@
-module Chimps
-  # A module defining classes to handle complex workflows between the
-  # local machine and Infochimps' servers.
-  module Workflows
-    autoload :Upload,       'chimps/workflows/up'
-    autoload :Up,           'chimps/workflows/up'
-    autoload :Downloader,   'chimps/workflows/downloader'
-    autoload :BatchUpdater, 'chimps/workflows/batch'
-  end
-end