chimps 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.4
1
+ 0.1.5
data/bin/chimps CHANGED
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+ $:.unshift(File.dirname(__FILE__) + '/../lib') unless $:.include?(File.dirname(__FILE__) + '/../lib')
2
3
  require "chimps"
3
4
  require 'chimps/cli'
4
5
  Chimps::CLI.execute!(ARGV.dup) if $0 == __FILE__
@@ -39,7 +39,7 @@ module Chimps
39
39
  @argv = argv
40
40
  run_options_definers
41
41
  parse_command_line!
42
- resolve_options!
42
+ Chimps.boot!
43
43
  end
44
44
 
45
45
  # The name of this command, including the
@@ -69,14 +69,6 @@ module Chimps
69
69
  end
70
70
  end
71
71
 
72
- # Ensure that certain options (verbosity, log file) that can be
73
- # passed on the command-line override those stored in a
74
- # configuration file (if present).
75
- def resolve_options!
76
- Chimps::Config.load # load defaults from config file
77
- Chimps::CONFIG.merge!(Chimps::COMMAND_LINE_OPTIONS) # overwrites from command line if necessary
78
- end
79
-
80
72
  # Run all methods beginning with +define+ and ending with +option+
81
73
  # or +options+.
82
74
  #
@@ -107,6 +99,11 @@ module Chimps
107
99
  on("-l", "--log-file PATH", "Use the given path to log Chimps output (`-' is interpreted as $stdout).") do |path|
108
100
  Chimps::COMMAND_LINE_OPTIONS[:log_file] = path # don't expand_path as it might be a `-'
109
101
  end
102
+
103
+ on("-q", "--skip-plugins", "Don't load plugins from Chimps::CONFIG[:plugins] directory.") do |bool|
104
+ Chimps::CONFIG[:skip_plugins] = true
105
+ end
106
+
110
107
  end
111
108
 
112
109
  # Run this command.
@@ -16,6 +16,12 @@ sensible name in the current directory but can also be customized.
16
16
 
17
17
  If the only file to be packaged is already a package (.zip, .tar,
18
18
  .tar.gz, &c.) then it will not be packaged again.
19
+
20
+ Supplied paths are allowed to be remote files so someting like
21
+
22
+ chimps upload my-dataset path/to/local/file.txt http://my-site.com/path/to/remote/file.txt
23
+
24
+ will work.
19
25
  EOF
20
26
 
21
27
  # The path to the archive
@@ -34,10 +40,10 @@ EOF
34
40
  argv.first
35
41
  end
36
42
 
37
- # A list of local paths to upload.
43
+ # A list of paths to upload.
38
44
  #
39
45
  # @return [Array<String>]
40
- def local_paths
46
+ def paths
41
47
  raise CLIError.new("Must provide some paths to upload") if argv.length < 2
42
48
  argv[1..-1]
43
49
  end
@@ -55,7 +61,7 @@ EOF
55
61
 
56
62
  # Upload the data.
57
63
  def execute!
58
- Chimps::Workflows::Uploader.new(:dataset => dataset, :archive => archive, :local_paths => local_paths, :fmt => fmt).execute!
64
+ Chimps::Workflows::Up.new(:dataset => dataset, :archive => archive, :paths => paths, :fmt => fmt).execute!.print
59
65
  end
60
66
  end
61
67
  end
data/lib/chimps/config.rb CHANGED
@@ -1,5 +1,12 @@
1
1
  module Chimps
2
2
 
3
+ # Load all configuration, load plugins, and resolve options.
4
+ def self.boot!
5
+ Chimps::Config.load
6
+ Chimps::Config.load_plugins
7
+ Chimps::Config.resolve_options!
8
+ end
9
+
3
10
  # Options that can be overriden by the command-line.
4
11
  COMMAND_LINE_OPTIONS = {
5
12
  :identity_file => File.expand_path(ENV["CHIMPS_RC"] || "~/.chimps"),
@@ -16,7 +23,8 @@ module Chimps
16
23
  :site => {
17
24
  :host => ENV["CHIMPS_HOST"] || 'http://infochimps.org'
18
25
  },
19
- :timestamp_format => "%Y-%m-%d_%H-%M-%S"
26
+ :timestamp_format => "%Y-%m-%d_%H-%M-%S",
27
+ :plugins => ["/usr/local/share/chimps"]
20
28
  }
21
29
 
22
30
  # Is Chimps in verbose mode?
@@ -35,7 +43,14 @@ module Chimps
35
43
 
36
44
  # Defines methods to load the Chimps configuration.
37
45
  module Config
38
-
46
+
47
+ # Ensure that certain options (verbosity, log file) that can be
48
+ # passed on the command-line override those stored in a
49
+ # configuration file (if present).
50
+ def self.resolve_options!
51
+ Chimps::CONFIG.merge!(Chimps::COMMAND_LINE_OPTIONS) # overwrites from command line if necessary
52
+ end
53
+
39
54
  # The root of the Chimps source base.
40
55
  #
41
56
  # @return [String]
@@ -43,6 +58,17 @@ module Chimps
43
58
  File.expand_path File.join(File.dirname(__FILE__), '../..')
44
59
  end
45
60
 
61
+ # Require all ruby files in the directory
62
+ # Chimps::CONFIG[:plugins].
63
+ def self.load_plugins
64
+ return if Chimps::CONFIG[:skip_plugins]
65
+ plugin_dirs = Chimps::CONFIG[:plugins]
66
+ return if plugin_dirs.blank?
67
+ plugin_dirs.each do |dir|
68
+ Dir[File.expand_path(dir) + "/*.rb"].each { |plugin| require plugin }
69
+ end
70
+ end
71
+
46
72
  # Load the configuration settings from the configuration/identity
47
73
  # file.
48
74
  def self.load
@@ -50,8 +76,11 @@ module Chimps
50
76
  if File.exist?(COMMAND_LINE_OPTIONS[:identity_file])
51
77
  require 'yaml'
52
78
  YAML.load_file(COMMAND_LINE_OPTIONS[:identity_file]).each_pair do |key, value|
53
- if value.is_a?(Hash) && CONFIG.include?(key)
79
+ case
80
+ when value.is_a?(Hash) && CONFIG.include?(key)
54
81
  CONFIG[key].merge!(value)
82
+ when value.is_a?(Array) && CONFIG.include?(key)
83
+ CONFIG[key] += value
55
84
  else
56
85
  CONFIG[key] = value
57
86
  end
@@ -0,0 +1,149 @@
1
+ module Chimps
2
+ module Workflows
3
+
4
+ # A namespace for classes which handle each step of the
5
+ # BundleAndUpload workflow.
6
+ module Upload
7
+ autoload :UploadToken, 'chimps/workflows/upload/token'
8
+ autoload :Bundler, 'chimps/workflows/upload/bundler'
9
+ autoload :Uploader, 'chimps/workflows/upload/uploader'
10
+ autoload :Notifier, 'chimps/workflows/upload/notifier'
11
+ end
12
+
13
+ # Uploads data to Infochimps by first asking for authorization,
14
+ # creating an archive, obtaining a token, uploading data, and
15
+ # notifing Infochimps.
16
+ #
17
+ # A helper object from Chimps::Workflows::Upload is delegated to
18
+ # for each step:
19
+ #
20
+ # - authorization & obtaining a token: Chimps::Workflows::Upload::UploadToken
21
+ # - creating an archive: Chimps::Workflows::Upload::Bundler
22
+ # - uploading data: Chimps::Workflows::Upload::Uploader
23
+ # - notifying Infochimps: Chimps::Workflows::Upload::Notifier
24
+ class Up
25
+
26
+ # The ID or handle of the dataset to download.
27
+ attr_accessor :dataset
28
+
29
+ # An array of paths to files and directories to package into an
30
+ # archive.
31
+ attr_accessor :paths
32
+
33
+ # The format to annotate the upload with.
34
+ attr_accessor :fmt
35
+
36
+ # The path to the archive to create when uploading.
37
+ attr_accessor :archive
38
+
39
+ # Create a new Uploader from the given parameters.
40
+ #
41
+ # If <tt>:fmt</tt> is provided it will be used as the data
42
+ # format to annotate the upload with. If not, Chimps will try
43
+ # to guess.
44
+ #
45
+ # @param [Hash] options
46
+ # @option options [String, Integer] dataset the ID or handle of the dataset to which data should be uploaded
47
+ # @option options [Array<String>] paths the paths to aggregate and upload
48
+ # @option options [String, IMW::Resource] archive (IMW::Workflows::Downloader#default_archive_path) the path to the archive to create
49
+ # @option options [String] fmt the data format to annotate the upload with
50
+ def initialize options={}
51
+ self.dataset = options[:dataset] or raise PackagingError.new("Must provide the ID or handle of a dataset to upload data to.")
52
+ self.paths = options[:paths]
53
+ self.archive = options[:archive]
54
+ self.fmt = options[:fmt]
55
+ end
56
+
57
+ # Upload data to Infochimps by first asking for authorization,
58
+ # creating an archive, obtaining a token, uploading data, and
59
+ # notifing Infochimps.
60
+ def execute!
61
+ authorize_for_upload!
62
+ bundle!
63
+ ask_for_token!
64
+ upload!
65
+ notify_infochimps!
66
+ end
67
+
68
+ #
69
+ # == Helper Objects ==
70
+ #
71
+
72
+ # The token authorizing an upload.
73
+ #
74
+ # @return [Chimps::Workflows::Upload::UploadToken]
75
+ def authorization_token
76
+ @authorization_token ||= Chimps::Workflows::Upload::UploadToken.new(dataset)
77
+ end
78
+
79
+ # The bundler that will aggregate data for the upload.
80
+ #
81
+ # @return [Chimps::Workflows::Upload::Bundler]
82
+ def bundler
83
+ @bundler ||= Chimps::Workflows::Upload::Bundler.new(dataset, paths, :fmt => fmt, :archive => archive)
84
+ end
85
+
86
+ # The token consumed for an upload.
87
+ #
88
+ # @return [Chimps::Workflows::Upload::UploadToken]
89
+ def upload_token
90
+ @upload_token ||= Chimps::Workflows::Upload::UploadToken.new(dataset, :fmt => bundler.fmt, :pkg_fmt => bundler.pkg_fmt)
91
+ end
92
+
93
+ # The uploader that will actually send data to Infochimps.
94
+ #
95
+ # @return [Chimps::Workflows::Upload::Uploader]
96
+ def uploader
97
+ @uploader ||= Chimps::Workflows::Upload::Uploader.new(upload_token, bundler)
98
+ end
99
+
100
+ # The notifier that will inform Infochimps of the new data.
101
+ #
102
+ # @return [Chimps::Workflows::Upload::Notifer]
103
+ def notifier
104
+ @notifier ||= Chimps::Workflows::Upload::Notifier.new(upload_token, bundler)
105
+ end
106
+
107
+ #
108
+ # == Actions ==
109
+ #
110
+
111
+ # Authorize the Chimps user for this upload.
112
+ #
113
+ # Delegates to Chimps::Workflows::Upload::UploadToken
114
+ def authorize_for_upload!
115
+ authorization_token.get
116
+ end
117
+
118
+ # Bundle the data together.
119
+ #
120
+ # Delegates to Chimps::Workflows::Upload::Bundler
121
+ def bundle!
122
+ bundler.bundle!
123
+ end
124
+
125
+ # Obtain an upload token from Infochimps.
126
+ #
127
+ # Delegates to Chimps::Workflows::Upload::UploadToken
128
+ def ask_for_token!
129
+ upload_token.get
130
+ end
131
+
132
+ # Upload the data to Infochimps.
133
+ #
134
+ # Delegates to Chimps::Workflows::Upload::Uploader
135
+ def upload!
136
+ uploader.upload!
137
+ end
138
+
139
+ # Make a final POST request to Infochimps, creating the final
140
+ # resource.
141
+ #
142
+ # @return [Chimps::Response]
143
+ def notify_infochimps!
144
+ notifier.post
145
+ end
146
+
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,248 @@
1
+ module Chimps
2
+ module Workflows
3
+ module Upload
4
+
5
+ # Encapsulates the process of analyzing and bundling input
6
+ # paths.
7
+ class Bundler
8
+
9
+ #
10
+ # == Initialization & Attributes
11
+ #
12
+
13
+ # Instantiate a new Bundler for bundling +paths+ as a package
14
+ # for +dataset+.
15
+ #
16
+ # Each input path can be either a String or an IMW::Resource
17
+ # identifying a local or remote resource to bundle into an
18
+ # upload package for Infochimps (remote resources will be
19
+ # first copied to the local filesystem by IMW).
20
+ #
21
+ # If no format is given the format will be guessed by IMW.
22
+ #
23
+ # If not archive is given the archive path will be set to a
24
+ # timestamped named in the current directory, see
25
+ # Bundler#default_archive_path.
26
+ #
27
+ # @param [String, Integer] dataset the ID or slug of an existing Infochimps dataset
28
+ # @param [Array<String, IMW::Resource>] paths
29
+ # @param [Hash] options
30
+ # @option options [String] fmt the format (csv, tsv, xls, &c.) of the data being uploaded
31
+ # @option options [String, IMW::Resource] archive the path to the local archive to package the input paths into
32
+ def initialize dataset, paths, options={}
33
+ require_imw
34
+ @dataset = dataset
35
+ self.paths = paths
36
+ if options[:fmt]
37
+ self.fmt = options[:fmt]
38
+ end
39
+ if options[:archive]
40
+ self.archive = options[:archive]
41
+ end
42
+ end
43
+
44
+ # The dataset this bundler is processing data for.
45
+ attr_accessor :dataset
46
+
47
+ # The paths this bundler is processing.
48
+ attr_reader :paths
49
+
50
+ # The resources this bundler is processing.
51
+ #
52
+ # Resources are IMW::Resource objects built from this
53
+ # Bundler's paths.
54
+ attr_reader :resources
55
+
56
+ # Set the paths for this Bundler.
57
+ #
58
+ # If only one input path is given and it is already an archive
59
+ # or a compressed file then no packaging will be attempted.
60
+ # Otherwise the input paths will be packaged together
61
+ #
62
+ # @param [Array<String, IMW::Resource>] new_paths
63
+ def paths= new_paths
64
+ raise PackagingError.new("Must provide at least one path to upload.") if new_paths.blank?
65
+ @paths, @resources = [], []
66
+
67
+ new_paths.each do |path|
68
+ resource = IMW.open(path)
69
+ resource.should_exist!("Cannot bundle.") if resource.is_local?
70
+ @paths << path
71
+ @resources << resource
72
+ end
73
+
74
+ if resources.size == 1
75
+ potential_package = resources.first
76
+ if potential_package.is_local? && potential_package.exist? && (potential_package.is_compressed? || potential_package.is_archive?)
77
+ self.archive = potential_package
78
+ @skip_packaging = true
79
+ end
80
+ end
81
+ end
82
+
83
+ # The format of the data being bundled.
84
+ attr_writer :fmt
85
+
86
+ # The format of the data being bundled.
87
+ #
88
+ # Will make a guess using IMW::Tools::Summarizer if no format
89
+ # is given.
90
+ def fmt
91
+ @fmt ||= summarizer.most_common_data_format
92
+ end
93
+
94
+ # The archive this bundler will build for uploading to
95
+ # Infochimps.
96
+ #
97
+ # @return [IMW::Resource]
98
+ def archive
99
+ return @archive if @archive
100
+ self.archive = default_archive_path
101
+ self.archive
102
+ end
103
+
104
+ # Set the path to the archive that will be built.
105
+ #
106
+ # The given +path+ must represent a compressed file or archive
107
+ # (<tt>.tar</tt>, <tt>.tar.gz.</tt>, <tt>.tar.bz2</tt>,
108
+ # <tt>.zip</tt>, <tt>.rar</tt>, <tt>.bz2</tt>, or <tt>.gz</tt>
109
+ # extension).
110
+ #
111
+ # Additionally, if multiple local paths are being packaged, the
112
+ # given +path+ must be an archive (not simply <tt>.bz2</tt> or
113
+ # <tt>.gz</tt> extensions).
114
+ #
115
+ # @param [String, IMW::Resource] path_or_obj the obj or IMW::Resource object pointing to the archive to use
116
+ def archive= path_or_obj
117
+ potential_package = IMW.open(path_or_obj)
118
+ raise PackagingError.new("Invalid path #{potential_package}, not an archive or compressed file") unless potential_package.is_compressed? || potential_package.is_archive?
119
+ raise PackagingError.new("Multiple local paths must be packaged in an archive, not a compressed file.") if resources.size > 1 && !potential_package.is_archive?
120
+ @archive = potential_package
121
+ end
122
+
123
+ # Return the package format of this bundler's archive, i.e. -
124
+ # its extension.
125
+ #
126
+ # @return [String]
127
+ def pkg_fmt
128
+ archive.extension
129
+ end
130
+
131
+ # Return the total size of the package after aggregating and
132
+ # packaging.
133
+ #
134
+ # @return [Integer]
135
+ def size
136
+ archive.size
137
+ end
138
+
139
+ # Return summary information about the package prepared by the
140
+ # bundler.
141
+ #
142
+ # @return [Hash]
143
+ def summary
144
+ summarizer.summary
145
+ end
146
+
147
+ # Bundle the data for this bundler together.
148
+ def bundle!
149
+ return if skip_packaging?
150
+ result = archiver.package(archive.path)
151
+ raise PackagingError.new("Unable to package files for upload. Temporary files left in #{archiver.tmp_dir}") if result.is_a?(StandardError) || (!archiver.success?)
152
+ archiver.clean!
153
+ end
154
+
155
+ #
156
+ # == Helper Objects ==
157
+ #
158
+
159
+ # The IMW::Tools::Archiver responsible for packaging files
160
+ # into a local archive.
161
+ #
162
+ # @return [IMW::Tools::Archiver]
163
+ def archiver
164
+ @archiver ||= IMW::Tools::Archiver.new(archive.name, paths_to_bundle)
165
+ end
166
+
167
+ # Return the summarizer responsible for summarizing data on this
168
+ # upload.
169
+ #
170
+ # @return [IMW::Tools::Summarizer]
171
+ def summarizer
172
+ @summarizer ||= IMW::Tools::Summarizer.new(resources)
173
+ end
174
+
175
+ # Should the packaging step be skipped?
176
+ #
177
+ # This will happen if only one local input path was provided and
178
+ # it exists and is a compressed file or archive.
179
+ #
180
+ # @return [true, false]
181
+ def skip_packaging?
182
+ !! @skip_packaging
183
+ end
184
+
185
+ #
186
+ # == Paths & URLs ==
187
+ #
188
+
189
+ # The default path to the archive that will be built.
190
+ #
191
+ # Defaults to a file in the current directory named after the
192
+ # +dataset+'s ID or handle and the current time. The package
193
+ # format (<tt>.zip</tt> or <tt>.tar.bz2</tt>) is determined by
194
+ # size, see
195
+ # Chimps::Workflows::Uploader#default_archive_extension.
196
+ #
197
+ # @return [String]
198
+ def default_archive_path
199
+ # in current working directory...
200
+ "chimps_#{dataset}-#{Time.now.strftime(Chimps::CONFIG[:timestamp_format])}.#{default_archive_extension}"
201
+ end
202
+
203
+ # end <tt>zip</tt> if the data is less than 500 MB in size and
204
+ # <tt>tar.bz2</tt> otherwise.
205
+ #
206
+ # @return ['tar.bz2', 'zip']
207
+ def default_archive_extension
208
+ summarizer.total_size >= 524288000 ? 'tar.bz2' : 'zip'
209
+ end
210
+
211
+ # The URL to the <tt>README-infochimps</tt> file on Infochimps'
212
+ # servers.
213
+ #
214
+ # @return [String]
215
+ def readme_url
216
+ File.join(Chimps::CONFIG[:site][:host], "/README-infochimps")
217
+ end
218
+
219
+ # The URL to the ICSS file for this dataset on Infochimps
220
+ # servers
221
+ def icss_url
222
+ File.join(Chimps::CONFIG[:site][:host], "datasets", "#{dataset}.yaml")
223
+ end
224
+
225
+ # Both the local paths and remote paths to package.
226
+ #
227
+ # @return [Array<String>]
228
+ def paths_to_bundle
229
+ paths + [readme_url, icss_url]
230
+ end
231
+
232
+ protected
233
+ # Require IMW and match the IMW logger to the Chimps logger.
234
+ def require_imw
235
+ begin
236
+ require 'imw'
237
+ rescue LoadError
238
+ raise Chimps::Error.new("The Infinite Monkeywrench (IMW) gem is required to upload.")
239
+ end
240
+ IMW.verbose = Chimps.verbose?
241
+ end
242
+
243
+ end
244
+
245
+ end
246
+ end
247
+ end
248
+
@@ -0,0 +1,59 @@
1
+ module Chimps
2
+ module Workflows
3
+ module Upload
4
+
5
+ # Encapsulates the process of notifying Infochimps of new data
6
+ # that's already been uploaded.
7
+ class Notifier
8
+
9
+ # The response from Infochimps to the request to create a
10
+ # package.
11
+ attr_accessor :response
12
+
13
+ # The upload token used for the upload.
14
+ attr_accessor :token
15
+
16
+ # The bundler responsible for the upload.
17
+ attr_accessor :bundler
18
+
19
+ def initialize token, bundler
20
+ self.token = token
21
+ self.bundler = bundler
22
+ end
23
+
24
+ # The path on Infochimps to submit package creation requests
25
+ # to.
26
+ #
27
+ # @return [String]
28
+ def path
29
+ "/datasets/#{bundler.dataset}/packages.json"
30
+ end
31
+
32
+ # Information about the uplaoded data to pass to Infochimps
33
+ # when notifying.
34
+ #
35
+ # @return [Hash]
36
+ def data
37
+ { :package => {:fmt => token['fmt'], :pkg_size => bundler.size, :pkg_fmt => bundler.pkg_fmt, :summary => bundler.summary, :token_timestamp => token['timestamp'] } }
38
+ end
39
+
40
+ # Make a request to notify Infochimps of the new data.
41
+ #
42
+ # @return [Chimps::Response]
43
+ def post
44
+ @response = Request.new(path, :signed => true, :data => data).post
45
+ if response.error?
46
+ response.print
47
+ raise UploadError.new("Unable to notify Infochimps of newly uploaded data.")
48
+ end
49
+ response
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+
56
+
57
+
58
+
59
+
@@ -0,0 +1,77 @@
1
+ module Chimps
2
+ module Workflows
3
+ module Upload
4
+
5
+ # Encapsulates the process of obtaining an upload token for a
6
+ # dataset from Infochimps.
7
+ class UploadToken
8
+
9
+ # The ID or slug of the dataset for which to obtain an upload
10
+ # token.
11
+ attr_accessor :dataset
12
+
13
+ # The format (csv, xls, tsv, &c.) of the data in the upload.
14
+ attr_accessor :fmt
15
+
16
+ # The package format (zip, tar.bz2, &c.) of the data in the
17
+ # upload.
18
+ attr_accessor :pkg_fmt
19
+
20
+ # The response from Infochimps to the request for an upload
21
+ # token.
22
+ attr_accessor :response
23
+
24
+ # Instantiate a new UploadToken for the given +dataset+ with
25
+ # the given +fmt+ and +pkg_fmt+.
26
+ #
27
+ # @param [String,Integer] dataset the ID or slug of the dataset to upload data for
28
+ # @param [String] fmt the data format (csv, xls, tsv, &c.) of the data
29
+ # @param [String] pkg_fmt the package format (zip, tar.bz2, tar.gz, &c.) of the data
30
+ def initialize dataset, options={}
31
+ @dataset = dataset
32
+ @fmt = options[:fmt]
33
+ @pkg_fmt = options[:pkg_fmt]
34
+ end
35
+
36
+ # Delegate slicing to the returned response.
37
+ def [] param
38
+ response && response[param]
39
+ end
40
+
41
+ # The path on Infochimps to submit upload token requests to.
42
+ #
43
+ # @return [String]
44
+ def path
45
+ "/datasets/#{dataset}/packages/new.json"
46
+ end
47
+
48
+ # Parameters passed to Infochimps to request an upload token.
49
+ #
50
+ # @return [Hash]
51
+ def params
52
+ { :package => { :fmt => fmt, :pkg_fmt => pkg_fmt } }
53
+ end
54
+
55
+ # Make the request to get an upload token from Infochimps
56
+ def get
57
+ @response = Request.new(path, :params => params, :signed => true).get
58
+ if response.error?
59
+ response.print
60
+ raise AuthenticationError.new("Unauthorized for an upload token for dataset #{dataset}")
61
+ end
62
+ end
63
+
64
+ # Parses the 'url' property of the response from Infochimps to
65
+ # determine the bucket name.
66
+ #
67
+ # @return [String]
68
+ def bucket
69
+ File.basename(response['url'])
70
+ end
71
+
72
+ end
73
+ end
74
+ end
75
+ end
76
+
77
+
@@ -0,0 +1,51 @@
1
+ module Chimps
2
+ module Workflows
3
+ module Upload
4
+
5
+ # Encapsulates the process of uploading a package to Infochimps.
6
+ class Uploader
7
+
8
+ include Chimps::Utils::UsesCurl
9
+
10
+ # The token consumed when uploading.
11
+ attr_accessor :token
12
+
13
+ # The bundler from which to glean information about the upload.
14
+ attr_accessor :bundler
15
+
16
+ # Instantiate a new Uploader which will consume the given
17
+ # +token+ and upload data from the given +bundler+.
18
+ #
19
+ # @param [Chimps::Workflows::Upload::UploadToken] token
20
+ # @param [Chimps::Workflows::Upload::Bundler] bundler
21
+ def initialize token, bundler
22
+ self.token = token
23
+ self.bundler = bundler
24
+ end
25
+
26
+ # Return a string built from the granted upload token that can
27
+ # be fed to +curl+ in order to authenticate with and upload to
28
+ # Amazon.
29
+ #
30
+ # @return [String]
31
+ def upload_data
32
+ data = ['AWSAccessKeyId', 'acl', 'key', 'policy', 'success_action_status', 'signature'].map { |param| "-F #{param}='#{token[param]}'" }
33
+ data << ["-F file=@#{bundler.archive.path}"]
34
+ data.join(' ')
35
+ end
36
+
37
+ # Upload the data.
38
+ #
39
+ # Uses +curl+ for the transfer.
40
+ def upload!
41
+ progress_meter = Chimps.verbose? ? '' : '-s -S'
42
+ command = "#{curl} #{progress_meter} -o /dev/null -X POST #{upload_data} #{token['url']}"
43
+ puts command if Chimps.verbose?
44
+ raise UploadError.new("Failed to upload #{bundler.archive.path} to Infochimps") unless system(command)
45
+ end
46
+
47
+ end
48
+ end
49
+ end
50
+ end
51
+
@@ -3,7 +3,8 @@ module Chimps
3
3
  # A module defining classes to handle complex workflows between the
4
4
  # local machine and Infochimps' servers.
5
5
  module Workflows
6
- autoload :Uploader, 'chimps/workflows/uploader'
6
+ autoload :Upload, 'chimps/workflows/up'
7
+ autoload :Up, 'chimps/workflows/up'
7
8
  autoload :Downloader, 'chimps/workflows/downloader'
8
9
  autoload :BatchUpdater, 'chimps/workflows/batch'
9
10
  end
@@ -0,0 +1,75 @@
1
+ require File.join(File.dirname(__FILE__), '../../../spec_helper')
2
+
3
+ describe Chimps::Workflows::Upload::Bundler do
4
+
5
+ before do
6
+ @dataset = 'foobar'
7
+ @extant_path = File.expand_path("extant_file.txt")
8
+ @non_extant_path = File.expand_path("non_extant_file.txt")
9
+ @archive_path = File.expand_path("archive.tar.bz2")
10
+ @extant_archive_path = File.expand_path("extant_archive.tar.bz2")
11
+
12
+
13
+ File.open(@extant_path, 'w') { |f| f.write("some content") }
14
+ File.open(@extant_archive_path, 'w') { |f| f.write("some, admittedly not very tar.bz2'ish, content") }
15
+ end
16
+
17
+ describe "setting the format of a bundle of input paths" do
18
+ it "should accept a format when given" do
19
+ bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path], :fmt => 'foobar')
20
+ bundler.fmt.should == 'foobar'
21
+ end
22
+
23
+ it "should guess a format when one isn't given" do
24
+ bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path])
25
+ bundler.fmt.should == 'txt'
26
+ end
27
+ end
28
+
29
+ describe "setting the archive from a bundle of input paths" do
30
+
31
+ it "should automatically set the archive path when given no other information" do
32
+ bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path])
33
+ File.basename(bundler.archive.path).should =~ /^chimps_/
34
+ end
35
+
36
+ it "should use a valid archive path when given one" do
37
+ bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path], :archive => 'foo.tar.bz2')
38
+ File.basename(bundler.archive.path).should == 'foo.tar.bz2'
39
+ end
40
+
41
+ it "should raise an error when given a non-package or compressed-file archive path" do
42
+ lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path], :archive => 'foo.txt') }.should raise_error(Chimps::PackagingError)
43
+ end
44
+
45
+ it "should raise an error when given a compressed-file archive path with multiple input paths" do
46
+ lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path, @extant_archive_path], :archive => 'foo.bz2') }.should raise_error(Chimps::PackagingError)
47
+ end
48
+
49
+ end
50
+
51
+ describe "processing input paths" do
52
+
53
+ it "should raise an error when no paths are given" do
54
+ lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, []) }.should raise_error(Chimps::PackagingError)
55
+ end
56
+
57
+ it "should raise an error when given a local path which doesn't exist" do
58
+ lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path, @non_extant_path]) }.should raise_error(IMW::PathError)
59
+ end
60
+
61
+ it "should set its archive path and skip packaging when passed a single, extant archive path" do
62
+ bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_archive_path])
63
+ bundler.skip_packaging?.should be_true
64
+ bundler.archive.path.should == @extant_archive_path
65
+ end
66
+
67
+ it "should prefer the explicitly passed in archive path to the implicitly seleced archive path when passed a 1-path input array consisting of an archive as well as the :archive option" do
68
+ bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_archive_path], :archive => "foo.tar.bz2")
69
+ File.basename(bundler.archive.path).should == 'foo.tar.bz2'
70
+ end
71
+
72
+ end
73
+
74
+ end
75
+
@@ -0,0 +1,6 @@
1
+ require File.join(File.dirname(__FILE__), '../../../spec_helper')
2
+
3
+ describe Chimps::Workflows::Upload::UploadToken do
4
+
5
+ end
6
+
data/spec/spec_helper.rb CHANGED
@@ -9,9 +9,22 @@ require 'chimps'
9
9
 
10
10
  Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each { |path| require path }
11
11
 
12
+ module Chimps
13
+ module Test
14
+ TMP_DIR = "/tmp/chimps_test" unless defined?(TMP_DIR)
15
+ end
16
+ end
17
+
12
18
  Spec::Runner.configure do |config|
13
19
  config.include Chimps::Test::CustomMatchers
14
- end
15
20
 
21
+ config.before do
22
+ FileUtils.mkdir_p Chimps::Test::TMP_DIR
23
+ FileUtils.cd Chimps::Test::TMP_DIR
24
+ end
16
25
 
17
-
26
+ config.after do
27
+ FileUtils.rm_rf Chimps::Test::TMP_DIR
28
+ end
29
+
30
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chimps
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dhruv Bansal
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-06-09 00:00:00 -05:00
12
+ date: 2010-06-15 00:00:00 -05:00
13
13
  default_executable: chimps
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -112,12 +112,18 @@ files:
112
112
  - lib/chimps/workflows.rb
113
113
  - lib/chimps/workflows/batch.rb
114
114
  - lib/chimps/workflows/downloader.rb
115
- - lib/chimps/workflows/uploader.rb
115
+ - lib/chimps/workflows/up.rb
116
+ - lib/chimps/workflows/upload/bundler.rb
117
+ - lib/chimps/workflows/upload/notifier.rb
118
+ - lib/chimps/workflows/upload/token.rb
119
+ - lib/chimps/workflows/upload/uploader.rb
116
120
  - spec/chimps/cli_spec.rb
117
121
  - spec/chimps/commands/base_spec.rb
118
122
  - spec/chimps/commands/list_spec.rb
119
123
  - spec/chimps/response_spec.rb
120
124
  - spec/chimps/typewriter_spec.rb
125
+ - spec/chimps/workflows/upload/bundler_spec.rb
126
+ - spec/chimps/workflows/upload/token_spec.rb
121
127
  - spec/spec_helper.rb
122
128
  - spec/support/custom_matchers.rb
123
129
  has_rdoc: true
@@ -150,6 +156,8 @@ specification_version: 3
150
156
  summary: Chimps! is a Ruby wrapper and command-line interface for the Infochimps APIs (http://infochimps.org/api, http://api.infochimps.com)
151
157
  test_files:
152
158
  - spec/spec_helper.rb
159
+ - spec/chimps/workflows/upload/bundler_spec.rb
160
+ - spec/chimps/workflows/upload/token_spec.rb
153
161
  - spec/chimps/commands/base_spec.rb
154
162
  - spec/chimps/commands/list_spec.rb
155
163
  - spec/chimps/typewriter_spec.rb
@@ -1,267 +0,0 @@
1
- module Chimps
2
- module Workflows
3
-
4
- # Uploads data to Infochimps by first asking for authorization,
5
- # creating an archive, obtaining a token, uploading data, and
6
- # notifing Infochimps.
7
- class Uploader
8
-
9
- include Chimps::Utils::UsesCurl
10
-
11
- # The ID or handle of the dataset to download.
12
- attr_reader :dataset
13
-
14
- # An array of paths to local files and directories to package
15
- # into an archive.
16
- attr_reader :local_paths
17
-
18
- # The format to annotate the upload with.
19
- attr_reader :fmt
20
-
21
- # The archive to upload.
22
- attr_reader :archive
23
-
24
- # The token authoring an upload.
25
- attr_reader :token
26
-
27
- # Upload data to Infochimps by first asking for authorization,
28
- # creating an archive, obtaining a token, uploading data, and
29
- # notifing Infochimps.
30
- def execute!
31
- authorize_for_upload!
32
- create_archive!
33
- ask_for_token!
34
- upload!
35
- notify_infochimps!
36
- end
37
-
38
- # Create a new Uploader from the given parameters.
39
- #
40
- # If <tt>:fmt</tt> is provided it will be used as the data
41
- # format to annotate the upload with. If not, Chimps will try
42
- # to guess.
43
- #
44
- # @param [Hash] options
45
- # @option options [String, Integer] dataset the ID or handle of the dataset to which data should be uploaded
46
- # @option options [Array<String>] local_paths the local paths to bundle into an archive
47
- # @option options [String, IMW::Resource] archive the path to the archive to create (defaults to IMW::Workflows::Downloader#default_archive_path)
48
- # @option options [String] fmt the data format to annotate the upload with
49
- def initialize options={}
50
- require_imw
51
- @dataset = options[:dataset] or raise PackagingError.new("Must provide the ID or handle of a dataset to upload data to.")
52
- self.local_paths = options[:local_paths] # must come before self.archive=
53
- self.archive = options[:archive]
54
- self.fmt = options[:fmt]
55
- end
56
-
57
- # Set the local paths to upload for this dataset.
58
- #
59
- # If only one local path is given and it is already an archive
60
- # or a compressed file then no further packaging will be done by
61
- # this uploader.
62
- #
63
- # @param [Array<String, IMW::Resource>] paths
64
- def local_paths= paths
65
- raise PackagingError.new("Must provide at least one local path to upload.") if paths.blank?
66
- paths.each { |path| raise PackagingError.new("Invalid path, #{path}") unless File.exist?(File.expand_path(path)) }
67
- @local_paths = paths
68
- if @local_paths.size == 1
69
- potential_package = IMW.open(paths.first)
70
- if potential_package.exist? && (potential_package.is_compressed? || potential_package.is_archive?)
71
- self.archive = potential_package
72
- @skip_packaging = true
73
- end
74
- end
75
- end
76
-
77
- # Should the packaging step be skipped?
78
- #
79
- # This will happen if only one local input path was provided and
80
- # it exists and is a compressed file or archive.
81
- #
82
- # @return [true, false]
83
- def skip_packaging?
84
- !! @skip_packaging
85
- end
86
-
87
- # Set the path to the archive that will be built.
88
- #
89
- # The given +path+ must represent a compressed file or archive
90
- # (<tt>.tar</tt>, <tt>.tar.gz.</tt>, <tt>.tar.bz2</tt>,
91
- # <tt>.zip</tt>, <tt>.rar</tt>, <tt>.bz2</tt>, or <tt>.gz</tt>
92
- # extension).
93
- #
94
- # Additionally, if multiple local paths are being packaged, the
95
- # given +path+ must be an archive (not simply <tt>.bz2</tt> or
96
- # <tt>.gz</tt> extensions).
97
- #
98
- # @param [String, IMW::Resource] path the archive or path to use
99
- def archive= path=nil
100
- return @archive if @archive
101
- potential_package = IMW.open(path || default_archive_path)
102
- raise PackagingError.new("Invalid path #{potential_package}, not an archive or compressed file") unless potential_package.is_compressed? || potential_package.is_archive?
103
- raise PackagingError.new("Multiple local paths must be packaged in an archive, not a compressed file.") if local_paths.size > 1 && !potential_package.is_archive?
104
- @archive = potential_package
105
- end
106
-
107
- # Return the summarizer responsible for summarizing data on this
108
- # upload.
109
- #
110
- # @return [IMW::Tools::Summarizer]
111
- def summarizer
112
- @summarizer ||= IMW::Tools::Summarizer.new(local_paths)
113
- end
114
-
115
- # Set the data format to annotate the upload with.
116
- #
117
- # If not provided, Chimps will use the Infinite Monkeywrench
118
- # (IMW) to try and guess the data format. See
119
- # IMW::Tools::Summarizer for more information.
120
- def fmt= new_fmt=nil
121
- @fmt ||= new_fmt || summarizer.most_common_data_format
122
- end
123
-
124
- # The default path to the archive that will be built.
125
- #
126
- # Defaults to a file in the current directory named after the
127
- # +dataset+'s ID or handle and the current time. The package
128
- # format (<tt>.zip</tt> or <tt>.tar.bz2</tt>) is determined by
129
- # size, see
130
- # Chimps::Workflows::Uploader#default_archive_extension.
131
- #
132
- # @return [String]
133
- def default_archive_path
134
- # in current working directory...
135
- "chimps_#{dataset}-#{Time.now.strftime(Chimps::CONFIG[:timestamp_format])}.#{default_archive_extension}"
136
- end
137
-
138
- # Use <tt>zip</tt> if the data is less than 500 MB in size and
139
- # <tt>tar.bz2</tt> otherwise.
140
- #
141
- # @return ['tar.bz2', 'zip']
142
- def default_archive_extension
143
- summarizer.total_size >= 524288000 ? 'tar.bz2' : 'zip'
144
- end
145
-
146
- # The URL to the <tt>README-infochimps</tt> file on Infochimps'
147
- # servers.
148
- #
149
- # @return [String]
150
- def readme_url
151
- File.join(Chimps::CONFIG[:site][:host], "/README-infochimps")
152
- end
153
-
154
- # The URL to the ICSS file for this dataset on Infochimps
155
- # servers
156
- def icss_url
157
- File.join(Chimps::CONFIG[:site][:host], "datasets", "#{dataset}.yaml")
158
- end
159
-
160
- # Both the local paths and remote paths to package.
161
- #
162
- # @return [Array<String>]
163
- def input_paths
164
- raise PackaginError.new("Must specify some local paths to package") if local_paths.blank?
165
- local_paths + [readme_url, icss_url]
166
- end
167
-
168
- # The path on Infochimps to submit upload token requests to.
169
- #
170
- # @return [String]
171
- def token_path
172
- "/datasets/#{dataset}/packages/new.json"
173
- end
174
-
175
- # The path on Infochimps to submit package creation requests to.
176
- #
177
- # @return [String]
178
- def package_creation_path
179
- "/datasets/#{dataset}/packages.json"
180
- end
181
-
182
- # Return a hash of params for obtaining a new upload token.
183
- #
184
- # @return [Hash]
185
- def package_params
186
- { :package => { :fmt => fmt, :pkg_fmt => archive.extension } }
187
- end
188
-
189
- # Authorize the Chimps user for this upload.
190
- def authorize_for_upload!
191
- # FIXME we're actually just making a token request here...
192
- ask_for_token!
193
- end
194
-
195
- # Obtain an upload token from Infochimps.
196
- def ask_for_token!
197
- new_token = Request.new(token_path, :params => package_params, :signed => true).get
198
- if new_token.error?
199
- new_token.print
200
- raise AuthenticationError.new("Unauthorized for an upload token for dataset #{dataset}")
201
- else
202
- @token = new_token
203
- end
204
- end
205
-
206
- # Build the local archive if necessary.
207
- #
208
- # Will not build the local archive if there was only one local
209
- # input path and it was already compressed or an archive.
210
- def create_archive!
211
- return if skip_packaging?
212
- archiver = IMW::Tools::Archiver.new(archive.name, input_paths)
213
- result = archiver.package(archive.path)
214
- raise PackagingError.new("Unable to package files for upload. Temporary files left in #{archiver.tmp_dir}") if result.is_a?(StandardError) || (!archiver.success?)
215
- archiver.clean!
216
- end
217
-
218
- # Return a string built from the granted upload token that can
219
- # be fed to +curl+ in order to authenticate with and upload to
220
- # Amazon.
221
- #
222
- # @return [String]
223
- def upload_data
224
- data = ['AWSAccessKeyId', 'acl', 'key', 'policy', 'success_action_status', 'signature'].map { |param| "-F #{param}='#{token[param]}'" }
225
- data << ["-F file=@#{archive.path}"]
226
- data.join(' ')
227
- end
228
-
229
- # Upload the data.
230
- #
231
- # Uses +curl+ for the transfer.
232
- def upload!
233
- progress_meter = Chimps.verbose? ? '' : '-s -S'
234
- command = "#{curl} #{progress_meter} -o /dev/null -X POST #{upload_data} #{token['url']}"
235
- raise UploadError.new("Failed to upload #{archive.path} to Infochimps") unless IMW.system(command)
236
- end
237
-
238
- # Return a hash of parameters used to create a new Package at
239
- # Infochimps corresonding to the upload.
240
- #
241
- # @return [Hash]
242
- def package_data
243
- { :package => {:path => token['key'], :fmt => token['fmt'], :pkg_size => archive.size, :pkg_fmt => archive.extension, :summary => summarizer.summary, :token_timestamp => token['timestamp'] } }
244
- end
245
-
246
- # Make a final POST request to Infochimps, creating the final
247
- # resource.
248
- def notify_infochimps!
249
- package_creation_response = Request.new(package_creation_path, :signed => true, :data => package_data).post
250
- package_creation_response.print
251
- raise UploadError.new("Unable to notify Infochimps of newly uploaded data.") if package_creation_response.error?
252
- end
253
-
254
- protected
255
- # Require IMW and match the IMW logger to the Chimps logger.
256
- def require_imw
257
- begin
258
- require 'imw'
259
- rescue LoadError
260
- raise Chimps::Error.new("The Infinite Monkeywrench (IMW) gem is required to upload.")
261
- end
262
- IMW.verbose = Chimps.verbose?
263
- end
264
-
265
- end
266
- end
267
- end