RubyGems - chimps - Versions diffs - 0.1.4 → 0.1.5 - Mend

chimps 0.1.4 → 0.1.5

Files changed (16) hide show

data/VERSION +1 -1
data/bin/chimps +1 -0
data/lib/chimps/commands/base.rb +6 -9
data/lib/chimps/commands/upload.rb +9 -3
data/lib/chimps/config.rb +32 -3
data/lib/chimps/workflows/up.rb +149 -0
data/lib/chimps/workflows/upload/bundler.rb +248 -0
data/lib/chimps/workflows/upload/notifier.rb +59 -0
data/lib/chimps/workflows/upload/token.rb +77 -0
data/lib/chimps/workflows/upload/uploader.rb +51 -0
data/lib/chimps/workflows.rb +2 -1
data/spec/chimps/workflows/upload/bundler_spec.rb +75 -0
data/spec/chimps/workflows/upload/token_spec.rb +6 -0
data/spec/spec_helper.rb +15 -2
metadata +11 -3
data/lib/chimps/workflows/uploader.rb +0 -267

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.1.4
1	+ 0.1.5

data/bin/chimps CHANGED Viewed

@@ -1,4 +1,5 @@
 #!/usr/bin/env ruby
+$:.unshift(File.dirname(__FILE__) + '/../lib') unless $:.include?(File.dirname(__FILE__) + '/../lib')
 require "chimps"
 require 'chimps/cli'
 Chimps::CLI.execute!(ARGV.dup) if $0 == __FILE__

data/lib/chimps/commands/base.rb CHANGED Viewed

@@ -39,7 +39,7 @@ module Chimps
       @argv = argv
       run_options_definers
       parse_command_line!
-      resolve_options!
+      Chimps.boot!
     end
     # The name of this command, including the
@@ -69,14 +69,6 @@ module Chimps
       end
     end
-    # Ensure that certain options (verbosity, log file) that can be
-    # passed on the command-line override those stored in a
-    # configuration file (if present).
-    def resolve_options!
-      Chimps::Config.load       # load defaults from config file
-      Chimps::CONFIG.merge!(Chimps::COMMAND_LINE_OPTIONS) # overwrites from command line if necessary
-    end
     # Run all methods beginning with +define+ and ending with +option+
     # or +options+.
     #
@@ -107,6 +99,11 @@ module Chimps
       on("-l", "--log-file PATH", "Use the given path to log Chimps output (`-' is interpreted as $stdout).") do |path|
         Chimps::COMMAND_LINE_OPTIONS[:log_file] = path # don't expand_path as it might be a `-'
       end
+      on("-q", "--skip-plugins", "Don't load plugins from Chimps::CONFIG[:plugins] directory.") do |bool|
+        Chimps::CONFIG[:skip_plugins] = true
+      end
     end
     # Run this command.

data/lib/chimps/commands/upload.rb CHANGED Viewed

@@ -16,6 +16,12 @@ sensible name in the current directory but can also be customized.
 If the only file to be packaged is already a package (.zip, .tar,
 .tar.gz, &c.) then it will not be packaged again.
+Supplied paths are allowed to be remote files so someting like
+  chimps upload my-dataset path/to/local/file.txt http://my-site.com/path/to/remote/file.txt
+will work.
 EOF
       # The path to the archive
@@ -34,10 +40,10 @@ EOF
         argv.first
       end
-      # A list of local paths to upload.
+      # A list of paths to upload.
       #
       # @return [Array<String>]
-      def local_paths
+      def paths
         raise CLIError.new("Must provide some paths to upload") if argv.length < 2
         argv[1..-1]
       end
@@ -55,7 +61,7 @@ EOF
       # Upload the data.
       def execute!
-        Chimps::Workflows::Uploader.new(:dataset => dataset, :archive => archive, :local_paths => local_paths, :fmt => fmt).execute!
+        Chimps::Workflows::Up.new(:dataset => dataset, :archive => archive, :paths => paths, :fmt => fmt).execute!.print
       end
     end
   end

data/lib/chimps/config.rb CHANGED Viewed

@@ -1,5 +1,12 @@
 module Chimps
+  # Load all configuration, load plugins, and resolve options.
+  def self.boot!
+    Chimps::Config.load
+    Chimps::Config.load_plugins
+    Chimps::Config.resolve_options!
+  end
   # Options that can be overriden by the command-line.
   COMMAND_LINE_OPTIONS = {
     :identity_file    => File.expand_path(ENV["CHIMPS_RC"] || "~/.chimps"),
@@ -16,7 +23,8 @@ module Chimps
     :site => {
       :host => ENV["CHIMPS_HOST"]       || 'http://infochimps.org'
     },
-    :timestamp_format => "%Y-%m-%d_%H-%M-%S"
+    :timestamp_format => "%Y-%m-%d_%H-%M-%S",
+    :plugins => ["/usr/local/share/chimps"]
   }
   # Is Chimps in verbose mode?
@@ -35,7 +43,14 @@ module Chimps
   # Defines methods to load the Chimps configuration.
   module Config
+    # Ensure that certain options (verbosity, log file) that can be
+    # passed on the command-line override those stored in a
+    # configuration file (if present).
+    def self.resolve_options!
+      Chimps::CONFIG.merge!(Chimps::COMMAND_LINE_OPTIONS) # overwrites from command line if necessary
+    end
     # The root of the Chimps source base.
     #
     # @return [String]
@@ -43,6 +58,17 @@ module Chimps
       File.expand_path File.join(File.dirname(__FILE__), '../..')
     end
+    # Require all ruby files in the directory
+    # Chimps::CONFIG[:plugins].
+    def self.load_plugins
+      return if Chimps::CONFIG[:skip_plugins]
+      plugin_dirs = Chimps::CONFIG[:plugins]
+      return if plugin_dirs.blank?
+      plugin_dirs.each do |dir|
+        Dir[File.expand_path(dir) + "/*.rb"].each { |plugin| require plugin }
+      end
+    end
     # Load the configuration settings from the configuration/identity
     # file.
     def self.load
@@ -50,8 +76,11 @@ module Chimps
       if File.exist?(COMMAND_LINE_OPTIONS[:identity_file])
         require 'yaml'
         YAML.load_file(COMMAND_LINE_OPTIONS[:identity_file]).each_pair do |key, value|
-          if value.is_a?(Hash) && CONFIG.include?(key)
+          case
+          when value.is_a?(Hash) && CONFIG.include?(key)
             CONFIG[key].merge!(value)
+          when value.is_a?(Array) && CONFIG.include?(key)
+            CONFIG[key] += value
           else
             CONFIG[key] = value
           end

data/lib/chimps/workflows/up.rb ADDED Viewed

@@ -0,0 +1,149 @@
+module Chimps
+  module Workflows
+    # A namespace for classes which handle each step of the
+    # BundleAndUpload workflow.
+    module Upload
+      autoload :UploadToken, 'chimps/workflows/upload/token'
+      autoload :Bundler,     'chimps/workflows/upload/bundler'
+      autoload :Uploader,    'chimps/workflows/upload/uploader'
+      autoload :Notifier,    'chimps/workflows/upload/notifier'
+    end
+    # Uploads data to Infochimps by first asking for authorization,
+    # creating an archive, obtaining a token, uploading data, and
+    # notifing Infochimps.
+    #
+    # A helper object from Chimps::Workflows::Upload is delegated to
+    # for each step:
+    #
+    # - authorization & obtaining a token: Chimps::Workflows::Upload::UploadToken
+    # - creating an archive: Chimps::Workflows::Upload::Bundler
+    # - uploading data: Chimps::Workflows::Upload::Uploader
+    # - notifying Infochimps: Chimps::Workflows::Upload::Notifier
+    class Up
+      # The ID or handle of the dataset to download.
+      attr_accessor :dataset
+      # An array of paths to files and directories to package into an
+      # archive.
+      attr_accessor :paths
+      # The format to annotate the upload with.
+      attr_accessor :fmt
+      # The path to the archive to create when uploading.
+      attr_accessor :archive
+      # Create a new Uploader from the given parameters.
+      #
+      # If <tt>:fmt</tt> is provided it will be used as the data
+      # format to annotate the upload with.  If not, Chimps will try
+      # to guess.
+      #
+      # @param [Hash] options
+      # @option options [String, Integer] dataset the ID or handle of the dataset to which data should be uploaded
+      # @option options [Array<String>] paths the paths to aggregate and upload
+      # @option options [String, IMW::Resource] archive (IMW::Workflows::Downloader#default_archive_path) the path to the archive to create
+      # @option options [String] fmt the data format to annotate the upload with
+      def initialize options={}
+        self.dataset = options[:dataset] or raise PackagingError.new("Must provide the ID or handle of a dataset to upload data to.")
+        self.paths   = options[:paths]
+        self.archive = options[:archive]
+        self.fmt     = options[:fmt]
+      end
+      # Upload data to Infochimps by first asking for authorization,
+      # creating an archive, obtaining a token, uploading data, and
+      # notifing Infochimps.
+      def execute!
+        authorize_for_upload!
+        bundle!
+        ask_for_token!
+        upload!
+        notify_infochimps!
+      end
+      #
+      # == Helper Objects ==
+      #
+      # The token authorizing an upload.
+      #
+      # @return [Chimps::Workflows::Upload::UploadToken]
+      def authorization_token
+        @authorization_token ||= Chimps::Workflows::Upload::UploadToken.new(dataset)
+      end
+      # The bundler that will aggregate data for the upload.
+      #
+      # @return [Chimps::Workflows::Upload::Bundler]
+      def bundler
+        @bundler ||= Chimps::Workflows::Upload::Bundler.new(dataset, paths, :fmt => fmt, :archive => archive)
+      end
+      # The token consumed for an upload.
+      #
+      # @return [Chimps::Workflows::Upload::UploadToken]
+      def upload_token
+        @upload_token ||= Chimps::Workflows::Upload::UploadToken.new(dataset, :fmt => bundler.fmt, :pkg_fmt => bundler.pkg_fmt)
+      end
+      # The uploader that will actually send data to Infochimps.
+      #
+      # @return [Chimps::Workflows::Upload::Uploader]
+      def uploader
+        @uploader ||= Chimps::Workflows::Upload::Uploader.new(upload_token, bundler)
+      end
+      # The notifier that will inform Infochimps of the new data.
+      #
+      # @return [Chimps::Workflows::Upload::Notifer]
+      def notifier
+        @notifier ||= Chimps::Workflows::Upload::Notifier.new(upload_token, bundler)
+      end
+      #
+      # == Actions ==
+      #
+      # Authorize the Chimps user for this upload.
+      #
+      # Delegates to Chimps::Workflows::Upload::UploadToken
+      def authorize_for_upload!
+        authorization_token.get
+      end
+      # Bundle the data together.
+      #
+      # Delegates to Chimps::Workflows::Upload::Bundler
+      def bundle!
+        bundler.bundle!
+      end
+      # Obtain an upload token from Infochimps.
+      #
+      # Delegates to Chimps::Workflows::Upload::UploadToken
+      def ask_for_token!
+        upload_token.get
+      end
+      # Upload the data to Infochimps.
+      #
+      # Delegates to Chimps::Workflows::Upload::Uploader
+      def upload!
+        uploader.upload!
+      end
+      # Make a final POST request to Infochimps, creating the final
+      # resource.
+      #
+      # @return [Chimps::Response]
+      def notify_infochimps!
+        notifier.post
+      end
+    end
+  end
+end

data/lib/chimps/workflows/upload/bundler.rb ADDED Viewed

@@ -0,0 +1,248 @@
+module Chimps
+  module Workflows
+    module Upload
+      # Encapsulates the process of analyzing and bundling input
+      # paths.
+      class Bundler
+        #
+        # == Initialization & Attributes
+        #
+        # Instantiate a new Bundler for bundling +paths+ as a package
+        # for +dataset+.
+        #
+        # Each input path can be either a String or an IMW::Resource
+        # identifying a local or remote resource to bundle into an
+        # upload package for Infochimps (remote resources will be
+        # first copied to the local filesystem by IMW).
+        #
+        # If no format is given the format will be guessed by IMW.
+        #
+        # If not archive is given the archive path will be set to a
+        # timestamped named in the current directory, see
+        # Bundler#default_archive_path.
+        #
+        # @param [String, Integer] dataset the ID or slug of an existing Infochimps dataset
+        # @param [Array<String, IMW::Resource>] paths
+        # @param [Hash] options
+        # @option options [String] fmt the format (csv, tsv, xls, &c.) of the data being uploaded
+        # @option options [String, IMW::Resource] archive the path to the local archive to package the input paths into
+        def initialize dataset, paths, options={}
+          require_imw
+          @dataset     = dataset
+          self.paths   = paths
+          if options[:fmt]
+            self.fmt     = options[:fmt]
+          end
+          if options[:archive]
+            self.archive = options[:archive]
+          end
+        end
+        # The dataset this bundler is processing data for.
+        attr_accessor :dataset
+        # The paths this bundler is processing.
+        attr_reader :paths
+        # The resources this bundler is processing.
+        #
+        # Resources are IMW::Resource objects built from this
+        # Bundler's paths.
+        attr_reader :resources
+        # Set the paths for this Bundler.
+        #
+        # If only one input path is given and it is already an archive
+        # or a compressed file then no packaging will be attempted.
+        # Otherwise the input paths will be packaged together
+        #
+        # @param [Array<String, IMW::Resource>] new_paths
+        def paths= new_paths
+          raise PackagingError.new("Must provide at least one path to upload.") if new_paths.blank?
+          @paths, @resources = [], []
+          new_paths.each do |path|
+            resource = IMW.open(path)
+            resource.should_exist!("Cannot bundle.") if resource.is_local?
+            @paths     << path
+            @resources << resource
+          end
+          if resources.size == 1
+            potential_package = resources.first
+            if potential_package.is_local? && potential_package.exist? && (potential_package.is_compressed? || potential_package.is_archive?)
+              self.archive = potential_package
+              @skip_packaging = true
+            end
+          end
+        end
+        # The format of the data being bundled.
+        attr_writer :fmt
+        # The format of the data being bundled.
+        #
+        # Will make a guess using IMW::Tools::Summarizer if no format
+        # is given.
+        def fmt
+          @fmt ||= summarizer.most_common_data_format
+        end
+        # The archive this bundler will build for uploading to
+        # Infochimps.
+        #
+        # @return [IMW::Resource]
+        def archive
+          return @archive if @archive
+          self.archive = default_archive_path
+          self.archive
+        end
+        # Set the path to the archive that will be built.
+        #
+        # The given +path+ must represent a compressed file or archive
+        # (<tt>.tar</tt>, <tt>.tar.gz.</tt>, <tt>.tar.bz2</tt>,
+        # <tt>.zip</tt>, <tt>.rar</tt>, <tt>.bz2</tt>, or <tt>.gz</tt>
+        # extension).
+        #
+        # Additionally, if multiple local paths are being packaged, the
+        # given +path+ must be an archive (not simply <tt>.bz2</tt> or
+        # <tt>.gz</tt> extensions).
+        #
+        # @param [String, IMW::Resource] path_or_obj the obj or IMW::Resource object pointing to the archive to use
+        def archive= path_or_obj
+          potential_package = IMW.open(path_or_obj)
+          raise PackagingError.new("Invalid path #{potential_package}, not an archive or compressed file")        unless potential_package.is_compressed? ||  potential_package.is_archive?
+          raise PackagingError.new("Multiple local paths must be packaged in an archive, not a compressed file.") if     resources.size > 1               && !potential_package.is_archive?
+          @archive = potential_package
+        end
+        # Return the package format of this bundler's archive, i.e. -
+        # its extension.
+        #
+        # @return [String]
+        def pkg_fmt
+          archive.extension
+        end
+        # Return the total size of the package after aggregating and
+        # packaging.
+        #
+        # @return [Integer]
+        def size
+          archive.size
+        end
+        # Return summary information about the package prepared by the
+        # bundler.
+        #
+        # @return [Hash]
+        def summary
+          summarizer.summary
+        end
+        # Bundle the data for this bundler together.
+        def bundle!
+          return if skip_packaging?
+          result = archiver.package(archive.path)
+          raise PackagingError.new("Unable to package files for upload.  Temporary files left in #{archiver.tmp_dir}") if result.is_a?(StandardError) || (!archiver.success?)
+          archiver.clean!
+        end
+        #
+        # == Helper Objects ==
+        #
+        # The IMW::Tools::Archiver responsible for packaging files
+        # into a local archive.
+        #
+        # @return [IMW::Tools::Archiver]
+        def archiver
+          @archiver ||= IMW::Tools::Archiver.new(archive.name, paths_to_bundle)
+        end
+        # Return the summarizer responsible for summarizing data on this
+        # upload.
+        #
+        # @return [IMW::Tools::Summarizer]
+        def summarizer
+          @summarizer ||= IMW::Tools::Summarizer.new(resources)
+        end
+        # Should the packaging step be skipped?
+        #
+        # This will happen if only one local input path was provided and
+        # it exists and is a compressed file or archive.
+        #
+        # @return [true, false]
+        def skip_packaging?
+          !! @skip_packaging
+        end
+        #
+        # == Paths & URLs ==
+        #
+        # The default path to the archive that will be built.
+        #
+        # Defaults to a file in the current directory named after the
+        # +dataset+'s ID or handle and the current time.  The package
+        # format (<tt>.zip</tt> or <tt>.tar.bz2</tt>) is determined by
+        # size, see
+        # Chimps::Workflows::Uploader#default_archive_extension.
+        #
+        # @return [String]
+        def default_archive_path
+          # in current working directory...
+          "chimps_#{dataset}-#{Time.now.strftime(Chimps::CONFIG[:timestamp_format])}.#{default_archive_extension}"
+        end
+        # end <tt>zip</tt> if the data is less than 500 MB in size and
+        # <tt>tar.bz2</tt> otherwise.
+        #
+        # @return ['tar.bz2', 'zip']
+        def default_archive_extension
+          summarizer.total_size >= 524288000 ? 'tar.bz2' : 'zip'
+        end
+        # The URL to the <tt>README-infochimps</tt> file on Infochimps'
+        # servers.
+        #
+        # @return [String]
+        def readme_url
+          File.join(Chimps::CONFIG[:site][:host], "/README-infochimps")
+        end
+        # The URL to the ICSS file for this dataset on Infochimps
+        # servers
+        def icss_url
+          File.join(Chimps::CONFIG[:site][:host], "datasets", "#{dataset}.yaml")
+        end
+        # Both the local paths and remote paths to package.
+        #
+        # @return [Array<String>]
+        def paths_to_bundle
+          paths + [readme_url, icss_url]
+        end
+        protected
+        # Require IMW and match the IMW logger to the Chimps logger.
+        def require_imw
+          begin
+            require 'imw'
+          rescue LoadError
+            raise Chimps::Error.new("The Infinite Monkeywrench (IMW) gem is required to upload.")
+          end
+          IMW.verbose = Chimps.verbose?
+        end
+      end
+    end
+  end
+end

data/lib/chimps/workflows/upload/notifier.rb ADDED Viewed

@@ -0,0 +1,59 @@
+module Chimps
+  module Workflows
+    module Upload
+      # Encapsulates the process of notifying Infochimps of new data
+      # that's already been uploaded.
+      class Notifier
+        # The response from Infochimps to the request to create a
+        # package.
+        attr_accessor :response
+        # The upload token used for the upload.
+        attr_accessor :token
+        # The bundler responsible for the upload.
+        attr_accessor :bundler
+        def initialize token, bundler
+          self.token   = token
+          self.bundler = bundler
+        end
+        # The path on Infochimps to submit package creation requests
+        # to.
+        #
+        # @return [String]
+        def path
+          "/datasets/#{bundler.dataset}/packages.json"
+        end
+        # Information about the uplaoded data to pass to Infochimps
+        # when notifying.
+        #
+        # @return [Hash]
+        def data
+          { :package => {:fmt => token['fmt'], :pkg_size => bundler.size, :pkg_fmt => bundler.pkg_fmt, :summary => bundler.summary, :token_timestamp => token['timestamp'] } }
+        end
+        # Make a request to notify Infochimps of the new data.
+        #
+        # @return [Chimps::Response]
+        def post
+          @response = Request.new(path, :signed => true, :data => data).post
+          if response.error?
+            response.print
+            raise UploadError.new("Unable to notify Infochimps of newly uploaded data.")
+          end
+          response
+        end
+      end
+    end
+  end
+end

data/lib/chimps/workflows/upload/token.rb ADDED Viewed

@@ -0,0 +1,77 @@
+module Chimps
+  module Workflows
+    module Upload
+      # Encapsulates the process of obtaining an upload token for a
+      # dataset from Infochimps.
+      class UploadToken
+        # The ID or slug of the dataset for which to obtain an upload
+        # token.
+        attr_accessor :dataset
+        # The format (csv, xls, tsv, &c.) of the data in the upload.
+        attr_accessor :fmt
+        # The package format (zip, tar.bz2, &c.)  of the data in the
+        # upload.
+        attr_accessor :pkg_fmt
+        # The response from Infochimps to the request for an upload
+        # token.
+        attr_accessor :response
+        # Instantiate a new UploadToken for the given +dataset+ with
+        # the given +fmt+ and +pkg_fmt+.
+        #
+        # @param [String,Integer] dataset the ID or slug of the dataset to upload data for
+        # @param [String] fmt the data format (csv, xls, tsv, &c.) of the data
+        # @param [String] pkg_fmt the package format (zip, tar.bz2, tar.gz, &c.) of the data
+        def initialize dataset, options={}
+          @dataset = dataset
+          @fmt     = options[:fmt]
+          @pkg_fmt = options[:pkg_fmt]
+        end
+        # Delegate slicing to the returned response.
+        def [] param
+          response && response[param]
+        end
+        # The path on Infochimps to submit upload token requests to.
+        #
+        # @return [String]
+        def path
+          "/datasets/#{dataset}/packages/new.json"
+        end
+        # Parameters passed to Infochimps to request an upload token.
+        #
+        # @return [Hash]
+        def params
+          { :package => { :fmt => fmt, :pkg_fmt => pkg_fmt } }
+        end
+        # Make the request to get an upload token from Infochimps
+        def get
+          @response = Request.new(path, :params => params, :signed => true).get
+          if response.error?
+            response.print
+            raise AuthenticationError.new("Unauthorized for an upload token for dataset #{dataset}")
+          end
+        end
+        # Parses the 'url' property of the response from Infochimps to
+        # determine the bucket name.
+        #
+        # @return [String]
+        def bucket
+          File.basename(response['url'])
+        end
+      end
+    end
+  end
+end

data/lib/chimps/workflows/upload/uploader.rb ADDED Viewed

@@ -0,0 +1,51 @@
+module Chimps
+  module Workflows
+    module Upload
+      # Encapsulates the process of uploading a package to Infochimps.
+      class Uploader
+        include Chimps::Utils::UsesCurl
+        # The token consumed when uploading.
+        attr_accessor :token
+        # The bundler from which to glean information about the upload.
+        attr_accessor :bundler
+        # Instantiate a new Uploader which will consume the given
+        # +token+ and upload data from the given +bundler+.
+        #
+        # @param [Chimps::Workflows::Upload::UploadToken] token
+        # @param [Chimps::Workflows::Upload::Bundler] bundler
+        def initialize token, bundler
+          self.token   = token
+          self.bundler = bundler
+        end
+        # Return a string built from the granted upload token that can
+        # be fed to +curl+ in order to authenticate with and upload to
+        # Amazon.
+        #
+        # @return [String]
+        def upload_data
+          data = ['AWSAccessKeyId', 'acl', 'key', 'policy', 'success_action_status', 'signature'].map { |param| "-F #{param}='#{token[param]}'" }
+          data << ["-F file=@#{bundler.archive.path}"]
+          data.join(' ')
+        end
+        # Upload the data.
+        #
+        # Uses +curl+ for the transfer.
+        def upload!
+          progress_meter = Chimps.verbose? ? '' : '-s -S'
+          command = "#{curl} #{progress_meter} -o /dev/null -X POST #{upload_data} #{token['url']}"
+          puts command if Chimps.verbose?
+          raise UploadError.new("Failed to upload #{bundler.archive.path} to Infochimps") unless system(command)
+        end
+      end
+    end
+  end
+end

data/lib/chimps/workflows.rb CHANGED Viewed

@@ -3,7 +3,8 @@ module Chimps
   # A module defining classes to handle complex workflows between the
   # local machine and Infochimps' servers.
   module Workflows
-    autoload :Uploader,     'chimps/workflows/uploader'
+    autoload :Upload,       'chimps/workflows/up'
+    autoload :Up,           'chimps/workflows/up'
     autoload :Downloader,   'chimps/workflows/downloader'
     autoload :BatchUpdater, 'chimps/workflows/batch'
   end

data/spec/chimps/workflows/upload/bundler_spec.rb ADDED Viewed

@@ -0,0 +1,75 @@
+require File.join(File.dirname(__FILE__), '../../../spec_helper')
+describe Chimps::Workflows::Upload::Bundler do
+  before do
+    @dataset             = 'foobar'
+    @extant_path         = File.expand_path("extant_file.txt")
+    @non_extant_path     = File.expand_path("non_extant_file.txt")
+    @archive_path        = File.expand_path("archive.tar.bz2")
+    @extant_archive_path = File.expand_path("extant_archive.tar.bz2")
+    File.open(@extant_path, 'w') { |f| f.write("some content") }
+    File.open(@extant_archive_path, 'w') { |f| f.write("some, admittedly not very tar.bz2'ish, content") }
+  end
+  describe "setting the format of a bundle of input paths" do
+    it "should accept a format when given" do
+      bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path], :fmt => 'foobar')
+      bundler.fmt.should == 'foobar'
+    end
+    it "should guess a format when one isn't given" do
+      bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path])
+      bundler.fmt.should == 'txt'
+    end
+  end
+  describe "setting the archive from a bundle of input paths" do
+    it "should automatically set the archive path when given no other information" do
+      bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path])
+      File.basename(bundler.archive.path).should =~ /^chimps_/
+    end
+    it "should use a valid archive path when given one" do
+      bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path], :archive => 'foo.tar.bz2')
+      File.basename(bundler.archive.path).should == 'foo.tar.bz2'
+    end
+    it "should raise an error when given a non-package or compressed-file archive path" do
+      lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path], :archive => 'foo.txt') }.should raise_error(Chimps::PackagingError)
+    end
+    it "should raise an error when given a compressed-file archive path with multiple input paths" do
+      lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path, @extant_archive_path], :archive => 'foo.bz2') }.should raise_error(Chimps::PackagingError)
+    end
+  end
+  describe "processing input paths" do
+    it "should raise an error when no paths are given" do
+      lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, []) }.should raise_error(Chimps::PackagingError)
+    end
+    it "should raise an error when given a local path which doesn't exist" do
+      lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path, @non_extant_path]) }.should raise_error(IMW::PathError)
+    end
+    it "should set its archive path and skip packaging when passed a single, extant archive path" do
+      bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_archive_path])
+      bundler.skip_packaging?.should be_true
+      bundler.archive.path.should == @extant_archive_path
+    end
+    it "should prefer the explicitly passed in archive path to the implicitly seleced archive path when passed a 1-path input array consisting of an archive as well as the :archive option" do
+      bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_archive_path], :archive => "foo.tar.bz2")
+      File.basename(bundler.archive.path).should == 'foo.tar.bz2'
+    end
+  end
+end

data/spec/chimps/workflows/upload/token_spec.rb ADDED Viewed

@@ -0,0 +1,6 @@
+require File.join(File.dirname(__FILE__), '../../../spec_helper')
+describe Chimps::Workflows::Upload::UploadToken do
+end

data/spec/spec_helper.rb CHANGED Viewed

@@ -9,9 +9,22 @@ require 'chimps'
 Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each { |path| require path }
+module Chimps
+  module Test
+    TMP_DIR   = "/tmp/chimps_test" unless defined?(TMP_DIR)
+  end
+end
 Spec::Runner.configure do |config|
   config.include Chimps::Test::CustomMatchers
-end
+  config.before do
+    FileUtils.mkdir_p Chimps::Test::TMP_DIR
+    FileUtils.cd Chimps::Test::TMP_DIR
+  end
+  config.after do
+    FileUtils.rm_rf Chimps::Test::TMP_DIR
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: chimps
 version: !ruby/object:Gem::Version
-  version: 0.1.4
+  version: 0.1.5
 platform: ruby
 authors:
 - Dhruv Bansal
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-06-09 00:00:00 -05:00
+date: 2010-06-15 00:00:00 -05:00
 default_executable: chimps
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -112,12 +112,18 @@ files:
 - lib/chimps/workflows.rb
 - lib/chimps/workflows/batch.rb
 - lib/chimps/workflows/downloader.rb
-- lib/chimps/workflows/uploader.rb
+- lib/chimps/workflows/up.rb
+- lib/chimps/workflows/upload/bundler.rb
+- lib/chimps/workflows/upload/notifier.rb
+- lib/chimps/workflows/upload/token.rb
+- lib/chimps/workflows/upload/uploader.rb
 - spec/chimps/cli_spec.rb
 - spec/chimps/commands/base_spec.rb
 - spec/chimps/commands/list_spec.rb
 - spec/chimps/response_spec.rb
 - spec/chimps/typewriter_spec.rb
+- spec/chimps/workflows/upload/bundler_spec.rb
+- spec/chimps/workflows/upload/token_spec.rb
 - spec/spec_helper.rb
 - spec/support/custom_matchers.rb
 has_rdoc: true
@@ -150,6 +156,8 @@ specification_version: 3
 summary: Chimps! is a Ruby wrapper and command-line interface for the Infochimps APIs (http://infochimps.org/api, http://api.infochimps.com)
 test_files:
 - spec/spec_helper.rb
+- spec/chimps/workflows/upload/bundler_spec.rb
+- spec/chimps/workflows/upload/token_spec.rb
 - spec/chimps/commands/base_spec.rb
 - spec/chimps/commands/list_spec.rb
 - spec/chimps/typewriter_spec.rb

data/lib/chimps/workflows/uploader.rb DELETED Viewed

@@ -1,267 +0,0 @@
-module Chimps
-  module Workflows
-    # Uploads data to Infochimps by first asking for authorization,
-    # creating an archive, obtaining a token, uploading data, and
-    # notifing Infochimps.
-    class Uploader
-      include Chimps::Utils::UsesCurl
-      # The ID or handle of the dataset to download.
-      attr_reader :dataset
-      # An array of paths to local files and directories to package
-      # into an archive.
-      attr_reader :local_paths
-      # The format to annotate the upload with.
-      attr_reader :fmt
-      # The archive to upload.
-      attr_reader :archive
-      # The token authoring an upload.
-      attr_reader :token
-      # Upload data to Infochimps by first asking for authorization,
-      # creating an archive, obtaining a token, uploading data, and
-      # notifing Infochimps.
-      def execute!
-        authorize_for_upload!
-        create_archive!
-        ask_for_token!
-        upload!
-        notify_infochimps!
-      end
-      # Create a new Uploader from the given parameters.
-      #
-      # If <tt>:fmt</tt> is provided it will be used as the data
-      # format to annotate the upload with.  If not, Chimps will try
-      # to guess.
-      #
-      # @param [Hash] options
-      # @option options [String, Integer] dataset the ID or handle of the dataset to which data should be uploaded
-      # @option options [Array<String>] local_paths the local paths to bundle into an archive
-      # @option options [String, IMW::Resource] archive the path to the archive to create (defaults to IMW::Workflows::Downloader#default_archive_path)
-      # @option options [String] fmt the data format to annotate the upload with
-      def initialize options={}
-        require_imw
-        @dataset         = options[:dataset] or raise PackagingError.new("Must provide the ID or handle of a dataset to upload data to.")
-        self.local_paths = options[:local_paths]   # must come before self.archive=
-        self.archive     = options[:archive]
-        self.fmt         = options[:fmt]
-      end
-      # Set the local paths to upload for this dataset.
-      #
-      # If only one local path is given and it is already an archive
-      # or a compressed file then no further packaging will be done by
-      # this uploader.
-      #
-      # @param [Array<String, IMW::Resource>] paths
-      def local_paths= paths
-        raise PackagingError.new("Must provide at least one local path to upload.") if paths.blank?
-        paths.each { |path| raise PackagingError.new("Invalid path, #{path}") unless File.exist?(File.expand_path(path)) }
-        @local_paths = paths
-        if @local_paths.size == 1
-          potential_package = IMW.open(paths.first)
-          if potential_package.exist? && (potential_package.is_compressed? || potential_package.is_archive?)
-            self.archive = potential_package
-            @skip_packaging = true
-          end
-        end
-      end
-      # Should the packaging step be skipped?
-      #
-      # This will happen if only one local input path was provided and
-      # it exists and is a compressed file or archive.
-      #
-      # @return [true, false]
-      def skip_packaging?
-        !! @skip_packaging
-      end
-      # Set the path to the archive that will be built.
-      #
-      # The given +path+ must represent a compressed file or archive
-      # (<tt>.tar</tt>, <tt>.tar.gz.</tt>, <tt>.tar.bz2</tt>,
-      # <tt>.zip</tt>, <tt>.rar</tt>, <tt>.bz2</tt>, or <tt>.gz</tt>
-      # extension).
-      #
-      # Additionally, if multiple local paths are being packaged, the
-      # given +path+ must be an archive (not simply <tt>.bz2</tt> or
-      # <tt>.gz</tt> extensions).
-      #
-      # @param [String, IMW::Resource] path the archive or path to use
-      def archive= path=nil
-        return @archive if @archive
-        potential_package = IMW.open(path || default_archive_path)
-        raise PackagingError.new("Invalid path #{potential_package}, not an archive or compressed file")        unless potential_package.is_compressed? ||  potential_package.is_archive?
-        raise PackagingError.new("Multiple local paths must be packaged in an archive, not a compressed file.") if     local_paths.size > 1             && !potential_package.is_archive?
-        @archive = potential_package
-      end
-      # Return the summarizer responsible for summarizing data on this
-      # upload.
-      #
-      # @return [IMW::Tools::Summarizer]
-      def summarizer
-        @summarizer ||= IMW::Tools::Summarizer.new(local_paths)
-      end
-      # Set the data format to annotate the upload with.
-      #
-      # If not provided, Chimps will use the Infinite Monkeywrench
-      # (IMW) to try and guess the data format.  See
-      # IMW::Tools::Summarizer for more information.
-      def fmt= new_fmt=nil
-        @fmt ||= new_fmt || summarizer.most_common_data_format
-      end
-      # The default path to the archive that will be built.
-      #
-      # Defaults to a file in the current directory named after the
-      # +dataset+'s ID or handle and the current time.  The package
-      # format (<tt>.zip</tt> or <tt>.tar.bz2</tt>) is determined by
-      # size, see
-      # Chimps::Workflows::Uploader#default_archive_extension.
-      #
-      # @return [String]
-      def default_archive_path
-        # in current working directory...
-        "chimps_#{dataset}-#{Time.now.strftime(Chimps::CONFIG[:timestamp_format])}.#{default_archive_extension}"
-      end
-      # Use <tt>zip</tt> if the data is less than 500 MB in size and
-      # <tt>tar.bz2</tt> otherwise.
-      #
-      # @return ['tar.bz2', 'zip']
-      def default_archive_extension
-        summarizer.total_size >= 524288000 ? 'tar.bz2' : 'zip'
-      end
-      # The URL to the <tt>README-infochimps</tt> file on Infochimps'
-      # servers.
-      #
-      # @return [String]
-      def readme_url
-        File.join(Chimps::CONFIG[:site][:host], "/README-infochimps")
-      end
-      # The URL to the ICSS file for this dataset on Infochimps
-      # servers
-      def icss_url
-        File.join(Chimps::CONFIG[:site][:host], "datasets", "#{dataset}.yaml")
-      end
-      # Both the local paths and remote paths to package.
-      #
-      # @return [Array<String>]
-      def input_paths
-        raise PackaginError.new("Must specify some local paths to package") if local_paths.blank?
-        local_paths + [readme_url, icss_url]
-      end
-      # The path on Infochimps to submit upload token requests to.
-      #
-      # @return [String]
-      def token_path
-        "/datasets/#{dataset}/packages/new.json"
-      end
-      # The path on Infochimps to submit package creation requests to.
-      #
-      # @return [String]
-      def package_creation_path
-        "/datasets/#{dataset}/packages.json"
-      end
-      # Return a hash of params for obtaining a new upload token.
-      #
-      # @return [Hash]
-      def package_params
-        { :package => { :fmt => fmt, :pkg_fmt => archive.extension } }
-      end
-      # Authorize the Chimps user for this upload.
-      def authorize_for_upload!
-        # FIXME we're actually just making a token request here...
-        ask_for_token!
-      end
-      # Obtain an upload token from Infochimps.
-      def ask_for_token!
-        new_token = Request.new(token_path, :params => package_params, :signed => true).get
-        if new_token.error?
-          new_token.print
-          raise AuthenticationError.new("Unauthorized for an upload token for dataset #{dataset}")
-        else
-          @token = new_token
-        end
-      end
-      # Build the local archive if necessary.
-      #
-      # Will not build the local archive if there was only one local
-      # input path and it was already compressed or an archive.
-      def create_archive!
-        return if skip_packaging?
-        archiver = IMW::Tools::Archiver.new(archive.name, input_paths)
-        result   = archiver.package(archive.path)
-        raise PackagingError.new("Unable to package files for upload.  Temporary files left in #{archiver.tmp_dir}") if result.is_a?(StandardError) || (!archiver.success?)
-        archiver.clean!
-      end
-      # Return a string built from the granted upload token that can
-      # be fed to +curl+ in order to authenticate with and upload to
-      # Amazon.
-      #
-      # @return [String]
-      def upload_data
-        data = ['AWSAccessKeyId', 'acl', 'key', 'policy', 'success_action_status', 'signature'].map { |param| "-F #{param}='#{token[param]}'" }
-        data << ["-F file=@#{archive.path}"]
-        data.join(' ')
-      end
-      # Upload the data.
-      #
-      # Uses +curl+ for the transfer.
-      def upload!
-        progress_meter = Chimps.verbose? ? '' : '-s -S'
-        command = "#{curl} #{progress_meter} -o /dev/null -X POST #{upload_data} #{token['url']}"
-        raise UploadError.new("Failed to upload #{archive.path} to Infochimps") unless IMW.system(command)
-      end
-      # Return a hash of parameters used to create a new Package at
-      # Infochimps corresonding to the upload.
-      #
-      # @return [Hash]
-      def package_data
-        { :package => {:path => token['key'], :fmt => token['fmt'], :pkg_size => archive.size, :pkg_fmt => archive.extension, :summary => summarizer.summary, :token_timestamp => token['timestamp'] } }
-      end
-      # Make a final POST request to Infochimps, creating the final
-      # resource.
-      def notify_infochimps!
-        package_creation_response = Request.new(package_creation_path, :signed => true, :data => package_data).post
-        package_creation_response.print
-        raise UploadError.new("Unable to notify Infochimps of newly uploaded data.") if package_creation_response.error?
-      end
-      protected
-      # Require IMW and match the IMW logger to the Chimps logger.
-      def require_imw
-        begin
-          require 'imw'
-        rescue LoadError
-          raise Chimps::Error.new("The Infinite Monkeywrench (IMW) gem is required to upload.")
-        end
-        IMW.verbose = Chimps.verbose?
-      end
-    end
-  end
-end