chimps 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.4
1
+ 0.1.5
data/bin/chimps CHANGED
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+ $:.unshift(File.dirname(__FILE__) + '/../lib') unless $:.include?(File.dirname(__FILE__) + '/../lib')
2
3
  require "chimps"
3
4
  require 'chimps/cli'
4
5
  Chimps::CLI.execute!(ARGV.dup) if $0 == __FILE__
@@ -39,7 +39,7 @@ module Chimps
39
39
  @argv = argv
40
40
  run_options_definers
41
41
  parse_command_line!
42
- resolve_options!
42
+ Chimps.boot!
43
43
  end
44
44
 
45
45
  # The name of this command, including the
@@ -69,14 +69,6 @@ module Chimps
69
69
  end
70
70
  end
71
71
 
72
- # Ensure that certain options (verbosity, log file) that can be
73
- # passed on the command-line override those stored in a
74
- # configuration file (if present).
75
- def resolve_options!
76
- Chimps::Config.load # load defaults from config file
77
- Chimps::CONFIG.merge!(Chimps::COMMAND_LINE_OPTIONS) # overwrites from command line if necessary
78
- end
79
-
80
72
  # Run all methods beginning with +define+ and ending with +option+
81
73
  # or +options+.
82
74
  #
@@ -107,6 +99,11 @@ module Chimps
107
99
  on("-l", "--log-file PATH", "Use the given path to log Chimps output (`-' is interpreted as $stdout).") do |path|
108
100
  Chimps::COMMAND_LINE_OPTIONS[:log_file] = path # don't expand_path as it might be a `-'
109
101
  end
102
+
103
+ on("-q", "--skip-plugins", "Don't load plugins from Chimps::CONFIG[:plugins] directory.") do |bool|
104
+ Chimps::CONFIG[:skip_plugins] = true
105
+ end
106
+
110
107
  end
111
108
 
112
109
  # Run this command.
@@ -16,6 +16,12 @@ sensible name in the current directory but can also be customized.
16
16
 
17
17
  If the only file to be packaged is already a package (.zip, .tar,
18
18
  .tar.gz, &c.) then it will not be packaged again.
19
+
20
+ Supplied paths are allowed to be remote files so someting like
21
+
22
+ chimps upload my-dataset path/to/local/file.txt http://my-site.com/path/to/remote/file.txt
23
+
24
+ will work.
19
25
  EOF
20
26
 
21
27
  # The path to the archive
@@ -34,10 +40,10 @@ EOF
34
40
  argv.first
35
41
  end
36
42
 
37
- # A list of local paths to upload.
43
+ # A list of paths to upload.
38
44
  #
39
45
  # @return [Array<String>]
40
- def local_paths
46
+ def paths
41
47
  raise CLIError.new("Must provide some paths to upload") if argv.length < 2
42
48
  argv[1..-1]
43
49
  end
@@ -55,7 +61,7 @@ EOF
55
61
 
56
62
  # Upload the data.
57
63
  def execute!
58
- Chimps::Workflows::Uploader.new(:dataset => dataset, :archive => archive, :local_paths => local_paths, :fmt => fmt).execute!
64
+ Chimps::Workflows::Up.new(:dataset => dataset, :archive => archive, :paths => paths, :fmt => fmt).execute!.print
59
65
  end
60
66
  end
61
67
  end
data/lib/chimps/config.rb CHANGED
@@ -1,5 +1,12 @@
1
1
  module Chimps
2
2
 
3
+ # Load all configuration, load plugins, and resolve options.
4
+ def self.boot!
5
+ Chimps::Config.load
6
+ Chimps::Config.load_plugins
7
+ Chimps::Config.resolve_options!
8
+ end
9
+
3
10
  # Options that can be overriden by the command-line.
4
11
  COMMAND_LINE_OPTIONS = {
5
12
  :identity_file => File.expand_path(ENV["CHIMPS_RC"] || "~/.chimps"),
@@ -16,7 +23,8 @@ module Chimps
16
23
  :site => {
17
24
  :host => ENV["CHIMPS_HOST"] || 'http://infochimps.org'
18
25
  },
19
- :timestamp_format => "%Y-%m-%d_%H-%M-%S"
26
+ :timestamp_format => "%Y-%m-%d_%H-%M-%S",
27
+ :plugins => ["/usr/local/share/chimps"]
20
28
  }
21
29
 
22
30
  # Is Chimps in verbose mode?
@@ -35,7 +43,14 @@ module Chimps
35
43
 
36
44
  # Defines methods to load the Chimps configuration.
37
45
  module Config
38
-
46
+
47
+ # Ensure that certain options (verbosity, log file) that can be
48
+ # passed on the command-line override those stored in a
49
+ # configuration file (if present).
50
+ def self.resolve_options!
51
+ Chimps::CONFIG.merge!(Chimps::COMMAND_LINE_OPTIONS) # overwrites from command line if necessary
52
+ end
53
+
39
54
  # The root of the Chimps source base.
40
55
  #
41
56
  # @return [String]
@@ -43,6 +58,17 @@ module Chimps
43
58
  File.expand_path File.join(File.dirname(__FILE__), '../..')
44
59
  end
45
60
 
61
+ # Require all ruby files in the directory
62
+ # Chimps::CONFIG[:plugins].
63
+ def self.load_plugins
64
+ return if Chimps::CONFIG[:skip_plugins]
65
+ plugin_dirs = Chimps::CONFIG[:plugins]
66
+ return if plugin_dirs.blank?
67
+ plugin_dirs.each do |dir|
68
+ Dir[File.expand_path(dir) + "/*.rb"].each { |plugin| require plugin }
69
+ end
70
+ end
71
+
46
72
  # Load the configuration settings from the configuration/identity
47
73
  # file.
48
74
  def self.load
@@ -50,8 +76,11 @@ module Chimps
50
76
  if File.exist?(COMMAND_LINE_OPTIONS[:identity_file])
51
77
  require 'yaml'
52
78
  YAML.load_file(COMMAND_LINE_OPTIONS[:identity_file]).each_pair do |key, value|
53
- if value.is_a?(Hash) && CONFIG.include?(key)
79
+ case
80
+ when value.is_a?(Hash) && CONFIG.include?(key)
54
81
  CONFIG[key].merge!(value)
82
+ when value.is_a?(Array) && CONFIG.include?(key)
83
+ CONFIG[key] += value
55
84
  else
56
85
  CONFIG[key] = value
57
86
  end
@@ -0,0 +1,149 @@
1
+ module Chimps
2
+ module Workflows
3
+
4
+ # A namespace for classes which handle each step of the
5
+ # BundleAndUpload workflow.
6
+ module Upload
7
+ autoload :UploadToken, 'chimps/workflows/upload/token'
8
+ autoload :Bundler, 'chimps/workflows/upload/bundler'
9
+ autoload :Uploader, 'chimps/workflows/upload/uploader'
10
+ autoload :Notifier, 'chimps/workflows/upload/notifier'
11
+ end
12
+
13
+ # Uploads data to Infochimps by first asking for authorization,
14
+ # creating an archive, obtaining a token, uploading data, and
15
+ # notifing Infochimps.
16
+ #
17
+ # A helper object from Chimps::Workflows::Upload is delegated to
18
+ # for each step:
19
+ #
20
+ # - authorization & obtaining a token: Chimps::Workflows::Upload::UploadToken
21
+ # - creating an archive: Chimps::Workflows::Upload::Bundler
22
+ # - uploading data: Chimps::Workflows::Upload::Uploader
23
+ # - notifying Infochimps: Chimps::Workflows::Upload::Notifier
24
+ class Up
25
+
26
+ # The ID or handle of the dataset to download.
27
+ attr_accessor :dataset
28
+
29
+ # An array of paths to files and directories to package into an
30
+ # archive.
31
+ attr_accessor :paths
32
+
33
+ # The format to annotate the upload with.
34
+ attr_accessor :fmt
35
+
36
+ # The path to the archive to create when uploading.
37
+ attr_accessor :archive
38
+
39
+ # Create a new Uploader from the given parameters.
40
+ #
41
+ # If <tt>:fmt</tt> is provided it will be used as the data
42
+ # format to annotate the upload with. If not, Chimps will try
43
+ # to guess.
44
+ #
45
+ # @param [Hash] options
46
+ # @option options [String, Integer] dataset the ID or handle of the dataset to which data should be uploaded
47
+ # @option options [Array<String>] paths the paths to aggregate and upload
48
+ # @option options [String, IMW::Resource] archive (IMW::Workflows::Downloader#default_archive_path) the path to the archive to create
49
+ # @option options [String] fmt the data format to annotate the upload with
50
+ def initialize options={}
51
+ self.dataset = options[:dataset] or raise PackagingError.new("Must provide the ID or handle of a dataset to upload data to.")
52
+ self.paths = options[:paths]
53
+ self.archive = options[:archive]
54
+ self.fmt = options[:fmt]
55
+ end
56
+
57
+ # Upload data to Infochimps by first asking for authorization,
58
+ # creating an archive, obtaining a token, uploading data, and
59
+ # notifing Infochimps.
60
+ def execute!
61
+ authorize_for_upload!
62
+ bundle!
63
+ ask_for_token!
64
+ upload!
65
+ notify_infochimps!
66
+ end
67
+
68
+ #
69
+ # == Helper Objects ==
70
+ #
71
+
72
+ # The token authorizing an upload.
73
+ #
74
+ # @return [Chimps::Workflows::Upload::UploadToken]
75
+ def authorization_token
76
+ @authorization_token ||= Chimps::Workflows::Upload::UploadToken.new(dataset)
77
+ end
78
+
79
+ # The bundler that will aggregate data for the upload.
80
+ #
81
+ # @return [Chimps::Workflows::Upload::Bundler]
82
+ def bundler
83
+ @bundler ||= Chimps::Workflows::Upload::Bundler.new(dataset, paths, :fmt => fmt, :archive => archive)
84
+ end
85
+
86
+ # The token consumed for an upload.
87
+ #
88
+ # @return [Chimps::Workflows::Upload::UploadToken]
89
+ def upload_token
90
+ @upload_token ||= Chimps::Workflows::Upload::UploadToken.new(dataset, :fmt => bundler.fmt, :pkg_fmt => bundler.pkg_fmt)
91
+ end
92
+
93
+ # The uploader that will actually send data to Infochimps.
94
+ #
95
+ # @return [Chimps::Workflows::Upload::Uploader]
96
+ def uploader
97
+ @uploader ||= Chimps::Workflows::Upload::Uploader.new(upload_token, bundler)
98
+ end
99
+
100
+ # The notifier that will inform Infochimps of the new data.
101
+ #
102
+ # @return [Chimps::Workflows::Upload::Notifer]
103
+ def notifier
104
+ @notifier ||= Chimps::Workflows::Upload::Notifier.new(upload_token, bundler)
105
+ end
106
+
107
+ #
108
+ # == Actions ==
109
+ #
110
+
111
+ # Authorize the Chimps user for this upload.
112
+ #
113
+ # Delegates to Chimps::Workflows::Upload::UploadToken
114
+ def authorize_for_upload!
115
+ authorization_token.get
116
+ end
117
+
118
+ # Bundle the data together.
119
+ #
120
+ # Delegates to Chimps::Workflows::Upload::Bundler
121
+ def bundle!
122
+ bundler.bundle!
123
+ end
124
+
125
+ # Obtain an upload token from Infochimps.
126
+ #
127
+ # Delegates to Chimps::Workflows::Upload::UploadToken
128
+ def ask_for_token!
129
+ upload_token.get
130
+ end
131
+
132
+ # Upload the data to Infochimps.
133
+ #
134
+ # Delegates to Chimps::Workflows::Upload::Uploader
135
+ def upload!
136
+ uploader.upload!
137
+ end
138
+
139
+ # Make a final POST request to Infochimps, creating the final
140
+ # resource.
141
+ #
142
+ # @return [Chimps::Response]
143
+ def notify_infochimps!
144
+ notifier.post
145
+ end
146
+
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,248 @@
1
+ module Chimps
2
+ module Workflows
3
+ module Upload
4
+
5
+ # Encapsulates the process of analyzing and bundling input
6
+ # paths.
7
+ class Bundler
8
+
9
+ #
10
+ # == Initialization & Attributes
11
+ #
12
+
13
+ # Instantiate a new Bundler for bundling +paths+ as a package
14
+ # for +dataset+.
15
+ #
16
+ # Each input path can be either a String or an IMW::Resource
17
+ # identifying a local or remote resource to bundle into an
18
+ # upload package for Infochimps (remote resources will be
19
+ # first copied to the local filesystem by IMW).
20
+ #
21
+ # If no format is given the format will be guessed by IMW.
22
+ #
23
+ # If not archive is given the archive path will be set to a
24
+ # timestamped named in the current directory, see
25
+ # Bundler#default_archive_path.
26
+ #
27
+ # @param [String, Integer] dataset the ID or slug of an existing Infochimps dataset
28
+ # @param [Array<String, IMW::Resource>] paths
29
+ # @param [Hash] options
30
+ # @option options [String] fmt the format (csv, tsv, xls, &c.) of the data being uploaded
31
+ # @option options [String, IMW::Resource] archive the path to the local archive to package the input paths into
32
+ def initialize dataset, paths, options={}
33
+ require_imw
34
+ @dataset = dataset
35
+ self.paths = paths
36
+ if options[:fmt]
37
+ self.fmt = options[:fmt]
38
+ end
39
+ if options[:archive]
40
+ self.archive = options[:archive]
41
+ end
42
+ end
43
+
44
+ # The dataset this bundler is processing data for.
45
+ attr_accessor :dataset
46
+
47
+ # The paths this bundler is processing.
48
+ attr_reader :paths
49
+
50
+ # The resources this bundler is processing.
51
+ #
52
+ # Resources are IMW::Resource objects built from this
53
+ # Bundler's paths.
54
+ attr_reader :resources
55
+
56
+ # Set the paths for this Bundler.
57
+ #
58
+ # If only one input path is given and it is already an archive
59
+ # or a compressed file then no packaging will be attempted.
60
+ # Otherwise the input paths will be packaged together
61
+ #
62
+ # @param [Array<String, IMW::Resource>] new_paths
63
+ def paths= new_paths
64
+ raise PackagingError.new("Must provide at least one path to upload.") if new_paths.blank?
65
+ @paths, @resources = [], []
66
+
67
+ new_paths.each do |path|
68
+ resource = IMW.open(path)
69
+ resource.should_exist!("Cannot bundle.") if resource.is_local?
70
+ @paths << path
71
+ @resources << resource
72
+ end
73
+
74
+ if resources.size == 1
75
+ potential_package = resources.first
76
+ if potential_package.is_local? && potential_package.exist? && (potential_package.is_compressed? || potential_package.is_archive?)
77
+ self.archive = potential_package
78
+ @skip_packaging = true
79
+ end
80
+ end
81
+ end
82
+
83
+ # The format of the data being bundled.
84
+ attr_writer :fmt
85
+
86
+ # The format of the data being bundled.
87
+ #
88
+ # Will make a guess using IMW::Tools::Summarizer if no format
89
+ # is given.
90
+ def fmt
91
+ @fmt ||= summarizer.most_common_data_format
92
+ end
93
+
94
+ # The archive this bundler will build for uploading to
95
+ # Infochimps.
96
+ #
97
+ # @return [IMW::Resource]
98
+ def archive
99
+ return @archive if @archive
100
+ self.archive = default_archive_path
101
+ self.archive
102
+ end
103
+
104
+ # Set the path to the archive that will be built.
105
+ #
106
+ # The given +path+ must represent a compressed file or archive
107
+ # (<tt>.tar</tt>, <tt>.tar.gz.</tt>, <tt>.tar.bz2</tt>,
108
+ # <tt>.zip</tt>, <tt>.rar</tt>, <tt>.bz2</tt>, or <tt>.gz</tt>
109
+ # extension).
110
+ #
111
+ # Additionally, if multiple local paths are being packaged, the
112
+ # given +path+ must be an archive (not simply <tt>.bz2</tt> or
113
+ # <tt>.gz</tt> extensions).
114
+ #
115
+ # @param [String, IMW::Resource] path_or_obj the obj or IMW::Resource object pointing to the archive to use
116
+ def archive= path_or_obj
117
+ potential_package = IMW.open(path_or_obj)
118
+ raise PackagingError.new("Invalid path #{potential_package}, not an archive or compressed file") unless potential_package.is_compressed? || potential_package.is_archive?
119
+ raise PackagingError.new("Multiple local paths must be packaged in an archive, not a compressed file.") if resources.size > 1 && !potential_package.is_archive?
120
+ @archive = potential_package
121
+ end
122
+
123
+ # Return the package format of this bundler's archive, i.e. -
124
+ # its extension.
125
+ #
126
+ # @return [String]
127
+ def pkg_fmt
128
+ archive.extension
129
+ end
130
+
131
+ # Return the total size of the package after aggregating and
132
+ # packaging.
133
+ #
134
+ # @return [Integer]
135
+ def size
136
+ archive.size
137
+ end
138
+
139
+ # Return summary information about the package prepared by the
140
+ # bundler.
141
+ #
142
+ # @return [Hash]
143
+ def summary
144
+ summarizer.summary
145
+ end
146
+
147
+ # Bundle the data for this bundler together.
148
+ def bundle!
149
+ return if skip_packaging?
150
+ result = archiver.package(archive.path)
151
+ raise PackagingError.new("Unable to package files for upload. Temporary files left in #{archiver.tmp_dir}") if result.is_a?(StandardError) || (!archiver.success?)
152
+ archiver.clean!
153
+ end
154
+
155
+ #
156
+ # == Helper Objects ==
157
+ #
158
+
159
+ # The IMW::Tools::Archiver responsible for packaging files
160
+ # into a local archive.
161
+ #
162
+ # @return [IMW::Tools::Archiver]
163
+ def archiver
164
+ @archiver ||= IMW::Tools::Archiver.new(archive.name, paths_to_bundle)
165
+ end
166
+
167
+ # Return the summarizer responsible for summarizing data on this
168
+ # upload.
169
+ #
170
+ # @return [IMW::Tools::Summarizer]
171
+ def summarizer
172
+ @summarizer ||= IMW::Tools::Summarizer.new(resources)
173
+ end
174
+
175
+ # Should the packaging step be skipped?
176
+ #
177
+ # This will happen if only one local input path was provided and
178
+ # it exists and is a compressed file or archive.
179
+ #
180
+ # @return [true, false]
181
+ def skip_packaging?
182
+ !! @skip_packaging
183
+ end
184
+
185
+ #
186
+ # == Paths & URLs ==
187
+ #
188
+
189
+ # The default path to the archive that will be built.
190
+ #
191
+ # Defaults to a file in the current directory named after the
192
+ # +dataset+'s ID or handle and the current time. The package
193
+ # format (<tt>.zip</tt> or <tt>.tar.bz2</tt>) is determined by
194
+ # size, see
195
+ # Chimps::Workflows::Uploader#default_archive_extension.
196
+ #
197
+ # @return [String]
198
+ def default_archive_path
199
+ # in current working directory...
200
+ "chimps_#{dataset}-#{Time.now.strftime(Chimps::CONFIG[:timestamp_format])}.#{default_archive_extension}"
201
+ end
202
+
203
+ # end <tt>zip</tt> if the data is less than 500 MB in size and
204
+ # <tt>tar.bz2</tt> otherwise.
205
+ #
206
+ # @return ['tar.bz2', 'zip']
207
+ def default_archive_extension
208
+ summarizer.total_size >= 524288000 ? 'tar.bz2' : 'zip'
209
+ end
210
+
211
+ # The URL to the <tt>README-infochimps</tt> file on Infochimps'
212
+ # servers.
213
+ #
214
+ # @return [String]
215
+ def readme_url
216
+ File.join(Chimps::CONFIG[:site][:host], "/README-infochimps")
217
+ end
218
+
219
+ # The URL to the ICSS file for this dataset on Infochimps
220
+ # servers
221
+ def icss_url
222
+ File.join(Chimps::CONFIG[:site][:host], "datasets", "#{dataset}.yaml")
223
+ end
224
+
225
+ # Both the local paths and remote paths to package.
226
+ #
227
+ # @return [Array<String>]
228
+ def paths_to_bundle
229
+ paths + [readme_url, icss_url]
230
+ end
231
+
232
+ protected
233
+ # Require IMW and match the IMW logger to the Chimps logger.
234
+ def require_imw
235
+ begin
236
+ require 'imw'
237
+ rescue LoadError
238
+ raise Chimps::Error.new("The Infinite Monkeywrench (IMW) gem is required to upload.")
239
+ end
240
+ IMW.verbose = Chimps.verbose?
241
+ end
242
+
243
+ end
244
+
245
+ end
246
+ end
247
+ end
248
+
@@ -0,0 +1,59 @@
1
+ module Chimps
2
+ module Workflows
3
+ module Upload
4
+
5
+ # Encapsulates the process of notifying Infochimps of new data
6
+ # that's already been uploaded.
7
+ class Notifier
8
+
9
+ # The response from Infochimps to the request to create a
10
+ # package.
11
+ attr_accessor :response
12
+
13
+ # The upload token used for the upload.
14
+ attr_accessor :token
15
+
16
+ # The bundler responsible for the upload.
17
+ attr_accessor :bundler
18
+
19
+ def initialize token, bundler
20
+ self.token = token
21
+ self.bundler = bundler
22
+ end
23
+
24
+ # The path on Infochimps to submit package creation requests
25
+ # to.
26
+ #
27
+ # @return [String]
28
+ def path
29
+ "/datasets/#{bundler.dataset}/packages.json"
30
+ end
31
+
32
+ # Information about the uplaoded data to pass to Infochimps
33
+ # when notifying.
34
+ #
35
+ # @return [Hash]
36
+ def data
37
+ { :package => {:fmt => token['fmt'], :pkg_size => bundler.size, :pkg_fmt => bundler.pkg_fmt, :summary => bundler.summary, :token_timestamp => token['timestamp'] } }
38
+ end
39
+
40
+ # Make a request to notify Infochimps of the new data.
41
+ #
42
+ # @return [Chimps::Response]
43
+ def post
44
+ @response = Request.new(path, :signed => true, :data => data).post
45
+ if response.error?
46
+ response.print
47
+ raise UploadError.new("Unable to notify Infochimps of newly uploaded data.")
48
+ end
49
+ response
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+
56
+
57
+
58
+
59
+
@@ -0,0 +1,77 @@
1
+ module Chimps
2
+ module Workflows
3
+ module Upload
4
+
5
+ # Encapsulates the process of obtaining an upload token for a
6
+ # dataset from Infochimps.
7
+ class UploadToken
8
+
9
+ # The ID or slug of the dataset for which to obtain an upload
10
+ # token.
11
+ attr_accessor :dataset
12
+
13
+ # The format (csv, xls, tsv, &c.) of the data in the upload.
14
+ attr_accessor :fmt
15
+
16
+ # The package format (zip, tar.bz2, &c.) of the data in the
17
+ # upload.
18
+ attr_accessor :pkg_fmt
19
+
20
+ # The response from Infochimps to the request for an upload
21
+ # token.
22
+ attr_accessor :response
23
+
24
+ # Instantiate a new UploadToken for the given +dataset+ with
25
+ # the given +fmt+ and +pkg_fmt+.
26
+ #
27
+ # @param [String,Integer] dataset the ID or slug of the dataset to upload data for
28
+ # @param [String] fmt the data format (csv, xls, tsv, &c.) of the data
29
+ # @param [String] pkg_fmt the package format (zip, tar.bz2, tar.gz, &c.) of the data
30
+ def initialize dataset, options={}
31
+ @dataset = dataset
32
+ @fmt = options[:fmt]
33
+ @pkg_fmt = options[:pkg_fmt]
34
+ end
35
+
36
+ # Delegate slicing to the returned response.
37
+ def [] param
38
+ response && response[param]
39
+ end
40
+
41
+ # The path on Infochimps to submit upload token requests to.
42
+ #
43
+ # @return [String]
44
+ def path
45
+ "/datasets/#{dataset}/packages/new.json"
46
+ end
47
+
48
+ # Parameters passed to Infochimps to request an upload token.
49
+ #
50
+ # @return [Hash]
51
+ def params
52
+ { :package => { :fmt => fmt, :pkg_fmt => pkg_fmt } }
53
+ end
54
+
55
+ # Make the request to get an upload token from Infochimps
56
+ def get
57
+ @response = Request.new(path, :params => params, :signed => true).get
58
+ if response.error?
59
+ response.print
60
+ raise AuthenticationError.new("Unauthorized for an upload token for dataset #{dataset}")
61
+ end
62
+ end
63
+
64
+ # Parses the 'url' property of the response from Infochimps to
65
+ # determine the bucket name.
66
+ #
67
+ # @return [String]
68
+ def bucket
69
+ File.basename(response['url'])
70
+ end
71
+
72
+ end
73
+ end
74
+ end
75
+ end
76
+
77
+
@@ -0,0 +1,51 @@
1
+ module Chimps
2
+ module Workflows
3
+ module Upload
4
+
5
+ # Encapsulates the process of uploading a package to Infochimps.
6
+ class Uploader
7
+
8
+ include Chimps::Utils::UsesCurl
9
+
10
+ # The token consumed when uploading.
11
+ attr_accessor :token
12
+
13
+ # The bundler from which to glean information about the upload.
14
+ attr_accessor :bundler
15
+
16
+ # Instantiate a new Uploader which will consume the given
17
+ # +token+ and upload data from the given +bundler+.
18
+ #
19
+ # @param [Chimps::Workflows::Upload::UploadToken] token
20
+ # @param [Chimps::Workflows::Upload::Bundler] bundler
21
+ def initialize token, bundler
22
+ self.token = token
23
+ self.bundler = bundler
24
+ end
25
+
26
+ # Return a string built from the granted upload token that can
27
+ # be fed to +curl+ in order to authenticate with and upload to
28
+ # Amazon.
29
+ #
30
+ # @return [String]
31
+ def upload_data
32
+ data = ['AWSAccessKeyId', 'acl', 'key', 'policy', 'success_action_status', 'signature'].map { |param| "-F #{param}='#{token[param]}'" }
33
+ data << ["-F file=@#{bundler.archive.path}"]
34
+ data.join(' ')
35
+ end
36
+
37
+ # Upload the data.
38
+ #
39
+ # Uses +curl+ for the transfer.
40
+ def upload!
41
+ progress_meter = Chimps.verbose? ? '' : '-s -S'
42
+ command = "#{curl} #{progress_meter} -o /dev/null -X POST #{upload_data} #{token['url']}"
43
+ puts command if Chimps.verbose?
44
+ raise UploadError.new("Failed to upload #{bundler.archive.path} to Infochimps") unless system(command)
45
+ end
46
+
47
+ end
48
+ end
49
+ end
50
+ end
51
+
@@ -3,7 +3,8 @@ module Chimps
3
3
  # A module defining classes to handle complex workflows between the
4
4
  # local machine and Infochimps' servers.
5
5
  module Workflows
6
- autoload :Uploader, 'chimps/workflows/uploader'
6
+ autoload :Upload, 'chimps/workflows/up'
7
+ autoload :Up, 'chimps/workflows/up'
7
8
  autoload :Downloader, 'chimps/workflows/downloader'
8
9
  autoload :BatchUpdater, 'chimps/workflows/batch'
9
10
  end
@@ -0,0 +1,75 @@
1
+ require File.join(File.dirname(__FILE__), '../../../spec_helper')
2
+
3
+ describe Chimps::Workflows::Upload::Bundler do
4
+
5
+ before do
6
+ @dataset = 'foobar'
7
+ @extant_path = File.expand_path("extant_file.txt")
8
+ @non_extant_path = File.expand_path("non_extant_file.txt")
9
+ @archive_path = File.expand_path("archive.tar.bz2")
10
+ @extant_archive_path = File.expand_path("extant_archive.tar.bz2")
11
+
12
+
13
+ File.open(@extant_path, 'w') { |f| f.write("some content") }
14
+ File.open(@extant_archive_path, 'w') { |f| f.write("some, admittedly not very tar.bz2'ish, content") }
15
+ end
16
+
17
+ describe "setting the format of a bundle of input paths" do
18
+ it "should accept a format when given" do
19
+ bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path], :fmt => 'foobar')
20
+ bundler.fmt.should == 'foobar'
21
+ end
22
+
23
+ it "should guess a format when one isn't given" do
24
+ bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path])
25
+ bundler.fmt.should == 'txt'
26
+ end
27
+ end
28
+
29
+ describe "setting the archive from a bundle of input paths" do
30
+
31
+ it "should automatically set the archive path when given no other information" do
32
+ bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path])
33
+ File.basename(bundler.archive.path).should =~ /^chimps_/
34
+ end
35
+
36
+ it "should use a valid archive path when given one" do
37
+ bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path], :archive => 'foo.tar.bz2')
38
+ File.basename(bundler.archive.path).should == 'foo.tar.bz2'
39
+ end
40
+
41
+ it "should raise an error when given a non-package or compressed-file archive path" do
42
+ lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path], :archive => 'foo.txt') }.should raise_error(Chimps::PackagingError)
43
+ end
44
+
45
+ it "should raise an error when given a compressed-file archive path with multiple input paths" do
46
+ lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path, @extant_archive_path], :archive => 'foo.bz2') }.should raise_error(Chimps::PackagingError)
47
+ end
48
+
49
+ end
50
+
51
+ describe "processing input paths" do
52
+
53
+ it "should raise an error when no paths are given" do
54
+ lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, []) }.should raise_error(Chimps::PackagingError)
55
+ end
56
+
57
+ it "should raise an error when given a local path which doesn't exist" do
58
+ lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path, @non_extant_path]) }.should raise_error(IMW::PathError)
59
+ end
60
+
61
+ it "should set its archive path and skip packaging when passed a single, extant archive path" do
62
+ bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_archive_path])
63
+ bundler.skip_packaging?.should be_true
64
+ bundler.archive.path.should == @extant_archive_path
65
+ end
66
+
67
+ it "should prefer the explicitly passed in archive path to the implicitly seleced archive path when passed a 1-path input array consisting of an archive as well as the :archive option" do
68
+ bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_archive_path], :archive => "foo.tar.bz2")
69
+ File.basename(bundler.archive.path).should == 'foo.tar.bz2'
70
+ end
71
+
72
+ end
73
+
74
+ end
75
+
@@ -0,0 +1,6 @@
1
+ require File.join(File.dirname(__FILE__), '../../../spec_helper')
2
+
3
+ describe Chimps::Workflows::Upload::UploadToken do
4
+
5
+ end
6
+
data/spec/spec_helper.rb CHANGED
@@ -9,9 +9,22 @@ require 'chimps'
9
9
 
10
10
  Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each { |path| require path }
11
11
 
12
+ module Chimps
13
+ module Test
14
+ TMP_DIR = "/tmp/chimps_test" unless defined?(TMP_DIR)
15
+ end
16
+ end
17
+
12
18
  Spec::Runner.configure do |config|
13
19
  config.include Chimps::Test::CustomMatchers
14
- end
15
20
 
21
+ config.before do
22
+ FileUtils.mkdir_p Chimps::Test::TMP_DIR
23
+ FileUtils.cd Chimps::Test::TMP_DIR
24
+ end
16
25
 
17
-
26
+ config.after do
27
+ FileUtils.rm_rf Chimps::Test::TMP_DIR
28
+ end
29
+
30
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chimps
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dhruv Bansal
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-06-09 00:00:00 -05:00
12
+ date: 2010-06-15 00:00:00 -05:00
13
13
  default_executable: chimps
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -112,12 +112,18 @@ files:
112
112
  - lib/chimps/workflows.rb
113
113
  - lib/chimps/workflows/batch.rb
114
114
  - lib/chimps/workflows/downloader.rb
115
- - lib/chimps/workflows/uploader.rb
115
+ - lib/chimps/workflows/up.rb
116
+ - lib/chimps/workflows/upload/bundler.rb
117
+ - lib/chimps/workflows/upload/notifier.rb
118
+ - lib/chimps/workflows/upload/token.rb
119
+ - lib/chimps/workflows/upload/uploader.rb
116
120
  - spec/chimps/cli_spec.rb
117
121
  - spec/chimps/commands/base_spec.rb
118
122
  - spec/chimps/commands/list_spec.rb
119
123
  - spec/chimps/response_spec.rb
120
124
  - spec/chimps/typewriter_spec.rb
125
+ - spec/chimps/workflows/upload/bundler_spec.rb
126
+ - spec/chimps/workflows/upload/token_spec.rb
121
127
  - spec/spec_helper.rb
122
128
  - spec/support/custom_matchers.rb
123
129
  has_rdoc: true
@@ -150,6 +156,8 @@ specification_version: 3
150
156
  summary: Chimps! is a Ruby wrapper and command-line interface for the Infochimps APIs (http://infochimps.org/api, http://api.infochimps.com)
151
157
  test_files:
152
158
  - spec/spec_helper.rb
159
+ - spec/chimps/workflows/upload/bundler_spec.rb
160
+ - spec/chimps/workflows/upload/token_spec.rb
153
161
  - spec/chimps/commands/base_spec.rb
154
162
  - spec/chimps/commands/list_spec.rb
155
163
  - spec/chimps/typewriter_spec.rb
@@ -1,267 +0,0 @@
1
- module Chimps
2
- module Workflows
3
-
4
- # Uploads data to Infochimps by first asking for authorization,
5
- # creating an archive, obtaining a token, uploading data, and
6
- # notifing Infochimps.
7
- class Uploader
8
-
9
- include Chimps::Utils::UsesCurl
10
-
11
- # The ID or handle of the dataset to download.
12
- attr_reader :dataset
13
-
14
- # An array of paths to local files and directories to package
15
- # into an archive.
16
- attr_reader :local_paths
17
-
18
- # The format to annotate the upload with.
19
- attr_reader :fmt
20
-
21
- # The archive to upload.
22
- attr_reader :archive
23
-
24
- # The token authoring an upload.
25
- attr_reader :token
26
-
27
- # Upload data to Infochimps by first asking for authorization,
28
- # creating an archive, obtaining a token, uploading data, and
29
- # notifing Infochimps.
30
- def execute!
31
- authorize_for_upload!
32
- create_archive!
33
- ask_for_token!
34
- upload!
35
- notify_infochimps!
36
- end
37
-
38
- # Create a new Uploader from the given parameters.
39
- #
40
- # If <tt>:fmt</tt> is provided it will be used as the data
41
- # format to annotate the upload with. If not, Chimps will try
42
- # to guess.
43
- #
44
- # @param [Hash] options
45
- # @option options [String, Integer] dataset the ID or handle of the dataset to which data should be uploaded
46
- # @option options [Array<String>] local_paths the local paths to bundle into an archive
47
- # @option options [String, IMW::Resource] archive the path to the archive to create (defaults to IMW::Workflows::Downloader#default_archive_path)
48
- # @option options [String] fmt the data format to annotate the upload with
49
- def initialize options={}
50
- require_imw
51
- @dataset = options[:dataset] or raise PackagingError.new("Must provide the ID or handle of a dataset to upload data to.")
52
- self.local_paths = options[:local_paths] # must come before self.archive=
53
- self.archive = options[:archive]
54
- self.fmt = options[:fmt]
55
- end
56
-
57
- # Set the local paths to upload for this dataset.
58
- #
59
- # If only one local path is given and it is already an archive
60
- # or a compressed file then no further packaging will be done by
61
- # this uploader.
62
- #
63
- # @param [Array<String, IMW::Resource>] paths
64
- def local_paths= paths
65
- raise PackagingError.new("Must provide at least one local path to upload.") if paths.blank?
66
- paths.each { |path| raise PackagingError.new("Invalid path, #{path}") unless File.exist?(File.expand_path(path)) }
67
- @local_paths = paths
68
- if @local_paths.size == 1
69
- potential_package = IMW.open(paths.first)
70
- if potential_package.exist? && (potential_package.is_compressed? || potential_package.is_archive?)
71
- self.archive = potential_package
72
- @skip_packaging = true
73
- end
74
- end
75
- end
76
-
77
- # Should the packaging step be skipped?
78
- #
79
- # This will happen if only one local input path was provided and
80
- # it exists and is a compressed file or archive.
81
- #
82
- # @return [true, false]
83
- def skip_packaging?
84
- !! @skip_packaging
85
- end
86
-
87
- # Set the path to the archive that will be built.
88
- #
89
- # The given +path+ must represent a compressed file or archive
90
- # (<tt>.tar</tt>, <tt>.tar.gz.</tt>, <tt>.tar.bz2</tt>,
91
- # <tt>.zip</tt>, <tt>.rar</tt>, <tt>.bz2</tt>, or <tt>.gz</tt>
92
- # extension).
93
- #
94
- # Additionally, if multiple local paths are being packaged, the
95
- # given +path+ must be an archive (not simply <tt>.bz2</tt> or
96
- # <tt>.gz</tt> extensions).
97
- #
98
- # @param [String, IMW::Resource] path the archive or path to use
99
- def archive= path=nil
100
- return @archive if @archive
101
- potential_package = IMW.open(path || default_archive_path)
102
- raise PackagingError.new("Invalid path #{potential_package}, not an archive or compressed file") unless potential_package.is_compressed? || potential_package.is_archive?
103
- raise PackagingError.new("Multiple local paths must be packaged in an archive, not a compressed file.") if local_paths.size > 1 && !potential_package.is_archive?
104
- @archive = potential_package
105
- end
106
-
107
- # Return the summarizer responsible for summarizing data on this
108
- # upload.
109
- #
110
- # @return [IMW::Tools::Summarizer]
111
- def summarizer
112
- @summarizer ||= IMW::Tools::Summarizer.new(local_paths)
113
- end
114
-
115
- # Set the data format to annotate the upload with.
116
- #
117
- # If not provided, Chimps will use the Infinite Monkeywrench
118
- # (IMW) to try and guess the data format. See
119
- # IMW::Tools::Summarizer for more information.
120
- def fmt= new_fmt=nil
121
- @fmt ||= new_fmt || summarizer.most_common_data_format
122
- end
123
-
124
- # The default path to the archive that will be built.
125
- #
126
- # Defaults to a file in the current directory named after the
127
- # +dataset+'s ID or handle and the current time. The package
128
- # format (<tt>.zip</tt> or <tt>.tar.bz2</tt>) is determined by
129
- # size, see
130
- # Chimps::Workflows::Uploader#default_archive_extension.
131
- #
132
- # @return [String]
133
- def default_archive_path
134
- # in current working directory...
135
- "chimps_#{dataset}-#{Time.now.strftime(Chimps::CONFIG[:timestamp_format])}.#{default_archive_extension}"
136
- end
137
-
138
- # Use <tt>zip</tt> if the data is less than 500 MB in size and
139
- # <tt>tar.bz2</tt> otherwise.
140
- #
141
- # @return ['tar.bz2', 'zip']
142
- def default_archive_extension
143
- summarizer.total_size >= 524288000 ? 'tar.bz2' : 'zip'
144
- end
145
-
146
- # The URL to the <tt>README-infochimps</tt> file on Infochimps'
147
- # servers.
148
- #
149
- # @return [String]
150
- def readme_url
151
- File.join(Chimps::CONFIG[:site][:host], "/README-infochimps")
152
- end
153
-
154
- # The URL to the ICSS file for this dataset on Infochimps
155
- # servers
156
- def icss_url
157
- File.join(Chimps::CONFIG[:site][:host], "datasets", "#{dataset}.yaml")
158
- end
159
-
160
- # Both the local paths and remote paths to package.
161
- #
162
- # @return [Array<String>]
163
- def input_paths
164
- raise PackaginError.new("Must specify some local paths to package") if local_paths.blank?
165
- local_paths + [readme_url, icss_url]
166
- end
167
-
168
- # The path on Infochimps to submit upload token requests to.
169
- #
170
- # @return [String]
171
- def token_path
172
- "/datasets/#{dataset}/packages/new.json"
173
- end
174
-
175
- # The path on Infochimps to submit package creation requests to.
176
- #
177
- # @return [String]
178
- def package_creation_path
179
- "/datasets/#{dataset}/packages.json"
180
- end
181
-
182
- # Return a hash of params for obtaining a new upload token.
183
- #
184
- # @return [Hash]
185
- def package_params
186
- { :package => { :fmt => fmt, :pkg_fmt => archive.extension } }
187
- end
188
-
189
- # Authorize the Chimps user for this upload.
190
- def authorize_for_upload!
191
- # FIXME we're actually just making a token request here...
192
- ask_for_token!
193
- end
194
-
195
- # Obtain an upload token from Infochimps.
196
- def ask_for_token!
197
- new_token = Request.new(token_path, :params => package_params, :signed => true).get
198
- if new_token.error?
199
- new_token.print
200
- raise AuthenticationError.new("Unauthorized for an upload token for dataset #{dataset}")
201
- else
202
- @token = new_token
203
- end
204
- end
205
-
206
- # Build the local archive if necessary.
207
- #
208
- # Will not build the local archive if there was only one local
209
- # input path and it was already compressed or an archive.
210
- def create_archive!
211
- return if skip_packaging?
212
- archiver = IMW::Tools::Archiver.new(archive.name, input_paths)
213
- result = archiver.package(archive.path)
214
- raise PackagingError.new("Unable to package files for upload. Temporary files left in #{archiver.tmp_dir}") if result.is_a?(StandardError) || (!archiver.success?)
215
- archiver.clean!
216
- end
217
-
218
- # Return a string built from the granted upload token that can
219
- # be fed to +curl+ in order to authenticate with and upload to
220
- # Amazon.
221
- #
222
- # @return [String]
223
- def upload_data
224
- data = ['AWSAccessKeyId', 'acl', 'key', 'policy', 'success_action_status', 'signature'].map { |param| "-F #{param}='#{token[param]}'" }
225
- data << ["-F file=@#{archive.path}"]
226
- data.join(' ')
227
- end
228
-
229
- # Upload the data.
230
- #
231
- # Uses +curl+ for the transfer.
232
- def upload!
233
- progress_meter = Chimps.verbose? ? '' : '-s -S'
234
- command = "#{curl} #{progress_meter} -o /dev/null -X POST #{upload_data} #{token['url']}"
235
- raise UploadError.new("Failed to upload #{archive.path} to Infochimps") unless IMW.system(command)
236
- end
237
-
238
- # Return a hash of parameters used to create a new Package at
239
- # Infochimps corresonding to the upload.
240
- #
241
- # @return [Hash]
242
- def package_data
243
- { :package => {:path => token['key'], :fmt => token['fmt'], :pkg_size => archive.size, :pkg_fmt => archive.extension, :summary => summarizer.summary, :token_timestamp => token['timestamp'] } }
244
- end
245
-
246
- # Make a final POST request to Infochimps, creating the final
247
- # resource.
248
- def notify_infochimps!
249
- package_creation_response = Request.new(package_creation_path, :signed => true, :data => package_data).post
250
- package_creation_response.print
251
- raise UploadError.new("Unable to notify Infochimps of newly uploaded data.") if package_creation_response.error?
252
- end
253
-
254
- protected
255
- # Require IMW and match the IMW logger to the Chimps logger.
256
- def require_imw
257
- begin
258
- require 'imw'
259
- rescue LoadError
260
- raise Chimps::Error.new("The Infinite Monkeywrench (IMW) gem is required to upload.")
261
- end
262
- IMW.verbose = Chimps.verbose?
263
- end
264
-
265
- end
266
- end
267
- end