chimps 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/bin/chimps +1 -0
- data/lib/chimps/commands/base.rb +6 -9
- data/lib/chimps/commands/upload.rb +9 -3
- data/lib/chimps/config.rb +32 -3
- data/lib/chimps/workflows/up.rb +149 -0
- data/lib/chimps/workflows/upload/bundler.rb +248 -0
- data/lib/chimps/workflows/upload/notifier.rb +59 -0
- data/lib/chimps/workflows/upload/token.rb +77 -0
- data/lib/chimps/workflows/upload/uploader.rb +51 -0
- data/lib/chimps/workflows.rb +2 -1
- data/spec/chimps/workflows/upload/bundler_spec.rb +75 -0
- data/spec/chimps/workflows/upload/token_spec.rb +6 -0
- data/spec/spec_helper.rb +15 -2
- metadata +11 -3
- data/lib/chimps/workflows/uploader.rb +0 -267
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.5
|
data/bin/chimps
CHANGED
data/lib/chimps/commands/base.rb
CHANGED
@@ -39,7 +39,7 @@ module Chimps
|
|
39
39
|
@argv = argv
|
40
40
|
run_options_definers
|
41
41
|
parse_command_line!
|
42
|
-
|
42
|
+
Chimps.boot!
|
43
43
|
end
|
44
44
|
|
45
45
|
# The name of this command, including the
|
@@ -69,14 +69,6 @@ module Chimps
|
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
72
|
-
# Ensure that certain options (verbosity, log file) that can be
|
73
|
-
# passed on the command-line override those stored in a
|
74
|
-
# configuration file (if present).
|
75
|
-
def resolve_options!
|
76
|
-
Chimps::Config.load # load defaults from config file
|
77
|
-
Chimps::CONFIG.merge!(Chimps::COMMAND_LINE_OPTIONS) # overwrites from command line if necessary
|
78
|
-
end
|
79
|
-
|
80
72
|
# Run all methods beginning with +define+ and ending with +option+
|
81
73
|
# or +options+.
|
82
74
|
#
|
@@ -107,6 +99,11 @@ module Chimps
|
|
107
99
|
on("-l", "--log-file PATH", "Use the given path to log Chimps output (`-' is interpreted as $stdout).") do |path|
|
108
100
|
Chimps::COMMAND_LINE_OPTIONS[:log_file] = path # don't expand_path as it might be a `-'
|
109
101
|
end
|
102
|
+
|
103
|
+
on("-q", "--skip-plugins", "Don't load plugins from Chimps::CONFIG[:plugins] directory.") do |bool|
|
104
|
+
Chimps::CONFIG[:skip_plugins] = true
|
105
|
+
end
|
106
|
+
|
110
107
|
end
|
111
108
|
|
112
109
|
# Run this command.
|
@@ -16,6 +16,12 @@ sensible name in the current directory but can also be customized.
|
|
16
16
|
|
17
17
|
If the only file to be packaged is already a package (.zip, .tar,
|
18
18
|
.tar.gz, &c.) then it will not be packaged again.
|
19
|
+
|
20
|
+
Supplied paths are allowed to be remote files so someting like
|
21
|
+
|
22
|
+
chimps upload my-dataset path/to/local/file.txt http://my-site.com/path/to/remote/file.txt
|
23
|
+
|
24
|
+
will work.
|
19
25
|
EOF
|
20
26
|
|
21
27
|
# The path to the archive
|
@@ -34,10 +40,10 @@ EOF
|
|
34
40
|
argv.first
|
35
41
|
end
|
36
42
|
|
37
|
-
# A list of
|
43
|
+
# A list of paths to upload.
|
38
44
|
#
|
39
45
|
# @return [Array<String>]
|
40
|
-
def
|
46
|
+
def paths
|
41
47
|
raise CLIError.new("Must provide some paths to upload") if argv.length < 2
|
42
48
|
argv[1..-1]
|
43
49
|
end
|
@@ -55,7 +61,7 @@ EOF
|
|
55
61
|
|
56
62
|
# Upload the data.
|
57
63
|
def execute!
|
58
|
-
Chimps::Workflows::
|
64
|
+
Chimps::Workflows::Up.new(:dataset => dataset, :archive => archive, :paths => paths, :fmt => fmt).execute!.print
|
59
65
|
end
|
60
66
|
end
|
61
67
|
end
|
data/lib/chimps/config.rb
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
module Chimps
|
2
2
|
|
3
|
+
# Load all configuration, load plugins, and resolve options.
|
4
|
+
def self.boot!
|
5
|
+
Chimps::Config.load
|
6
|
+
Chimps::Config.load_plugins
|
7
|
+
Chimps::Config.resolve_options!
|
8
|
+
end
|
9
|
+
|
3
10
|
# Options that can be overriden by the command-line.
|
4
11
|
COMMAND_LINE_OPTIONS = {
|
5
12
|
:identity_file => File.expand_path(ENV["CHIMPS_RC"] || "~/.chimps"),
|
@@ -16,7 +23,8 @@ module Chimps
|
|
16
23
|
:site => {
|
17
24
|
:host => ENV["CHIMPS_HOST"] || 'http://infochimps.org'
|
18
25
|
},
|
19
|
-
:timestamp_format => "%Y-%m-%d_%H-%M-%S"
|
26
|
+
:timestamp_format => "%Y-%m-%d_%H-%M-%S",
|
27
|
+
:plugins => ["/usr/local/share/chimps"]
|
20
28
|
}
|
21
29
|
|
22
30
|
# Is Chimps in verbose mode?
|
@@ -35,7 +43,14 @@ module Chimps
|
|
35
43
|
|
36
44
|
# Defines methods to load the Chimps configuration.
|
37
45
|
module Config
|
38
|
-
|
46
|
+
|
47
|
+
# Ensure that certain options (verbosity, log file) that can be
|
48
|
+
# passed on the command-line override those stored in a
|
49
|
+
# configuration file (if present).
|
50
|
+
def self.resolve_options!
|
51
|
+
Chimps::CONFIG.merge!(Chimps::COMMAND_LINE_OPTIONS) # overwrites from command line if necessary
|
52
|
+
end
|
53
|
+
|
39
54
|
# The root of the Chimps source base.
|
40
55
|
#
|
41
56
|
# @return [String]
|
@@ -43,6 +58,17 @@ module Chimps
|
|
43
58
|
File.expand_path File.join(File.dirname(__FILE__), '../..')
|
44
59
|
end
|
45
60
|
|
61
|
+
# Require all ruby files in the directory
|
62
|
+
# Chimps::CONFIG[:plugins].
|
63
|
+
def self.load_plugins
|
64
|
+
return if Chimps::CONFIG[:skip_plugins]
|
65
|
+
plugin_dirs = Chimps::CONFIG[:plugins]
|
66
|
+
return if plugin_dirs.blank?
|
67
|
+
plugin_dirs.each do |dir|
|
68
|
+
Dir[File.expand_path(dir) + "/*.rb"].each { |plugin| require plugin }
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
46
72
|
# Load the configuration settings from the configuration/identity
|
47
73
|
# file.
|
48
74
|
def self.load
|
@@ -50,8 +76,11 @@ module Chimps
|
|
50
76
|
if File.exist?(COMMAND_LINE_OPTIONS[:identity_file])
|
51
77
|
require 'yaml'
|
52
78
|
YAML.load_file(COMMAND_LINE_OPTIONS[:identity_file]).each_pair do |key, value|
|
53
|
-
|
79
|
+
case
|
80
|
+
when value.is_a?(Hash) && CONFIG.include?(key)
|
54
81
|
CONFIG[key].merge!(value)
|
82
|
+
when value.is_a?(Array) && CONFIG.include?(key)
|
83
|
+
CONFIG[key] += value
|
55
84
|
else
|
56
85
|
CONFIG[key] = value
|
57
86
|
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
module Chimps
|
2
|
+
module Workflows
|
3
|
+
|
4
|
+
# A namespace for classes which handle each step of the
|
5
|
+
# BundleAndUpload workflow.
|
6
|
+
module Upload
|
7
|
+
autoload :UploadToken, 'chimps/workflows/upload/token'
|
8
|
+
autoload :Bundler, 'chimps/workflows/upload/bundler'
|
9
|
+
autoload :Uploader, 'chimps/workflows/upload/uploader'
|
10
|
+
autoload :Notifier, 'chimps/workflows/upload/notifier'
|
11
|
+
end
|
12
|
+
|
13
|
+
# Uploads data to Infochimps by first asking for authorization,
|
14
|
+
# creating an archive, obtaining a token, uploading data, and
|
15
|
+
# notifing Infochimps.
|
16
|
+
#
|
17
|
+
# A helper object from Chimps::Workflows::Upload is delegated to
|
18
|
+
# for each step:
|
19
|
+
#
|
20
|
+
# - authorization & obtaining a token: Chimps::Workflows::Upload::UploadToken
|
21
|
+
# - creating an archive: Chimps::Workflows::Upload::Bundler
|
22
|
+
# - uploading data: Chimps::Workflows::Upload::Uploader
|
23
|
+
# - notifying Infochimps: Chimps::Workflows::Upload::Notifier
|
24
|
+
class Up
|
25
|
+
|
26
|
+
# The ID or handle of the dataset to download.
|
27
|
+
attr_accessor :dataset
|
28
|
+
|
29
|
+
# An array of paths to files and directories to package into an
|
30
|
+
# archive.
|
31
|
+
attr_accessor :paths
|
32
|
+
|
33
|
+
# The format to annotate the upload with.
|
34
|
+
attr_accessor :fmt
|
35
|
+
|
36
|
+
# The path to the archive to create when uploading.
|
37
|
+
attr_accessor :archive
|
38
|
+
|
39
|
+
# Create a new Uploader from the given parameters.
|
40
|
+
#
|
41
|
+
# If <tt>:fmt</tt> is provided it will be used as the data
|
42
|
+
# format to annotate the upload with. If not, Chimps will try
|
43
|
+
# to guess.
|
44
|
+
#
|
45
|
+
# @param [Hash] options
|
46
|
+
# @option options [String, Integer] dataset the ID or handle of the dataset to which data should be uploaded
|
47
|
+
# @option options [Array<String>] paths the paths to aggregate and upload
|
48
|
+
# @option options [String, IMW::Resource] archive (IMW::Workflows::Downloader#default_archive_path) the path to the archive to create
|
49
|
+
# @option options [String] fmt the data format to annotate the upload with
|
50
|
+
def initialize options={}
|
51
|
+
self.dataset = options[:dataset] or raise PackagingError.new("Must provide the ID or handle of a dataset to upload data to.")
|
52
|
+
self.paths = options[:paths]
|
53
|
+
self.archive = options[:archive]
|
54
|
+
self.fmt = options[:fmt]
|
55
|
+
end
|
56
|
+
|
57
|
+
# Upload data to Infochimps by first asking for authorization,
|
58
|
+
# creating an archive, obtaining a token, uploading data, and
|
59
|
+
# notifing Infochimps.
|
60
|
+
def execute!
|
61
|
+
authorize_for_upload!
|
62
|
+
bundle!
|
63
|
+
ask_for_token!
|
64
|
+
upload!
|
65
|
+
notify_infochimps!
|
66
|
+
end
|
67
|
+
|
68
|
+
#
|
69
|
+
# == Helper Objects ==
|
70
|
+
#
|
71
|
+
|
72
|
+
# The token authorizing an upload.
|
73
|
+
#
|
74
|
+
# @return [Chimps::Workflows::Upload::UploadToken]
|
75
|
+
def authorization_token
|
76
|
+
@authorization_token ||= Chimps::Workflows::Upload::UploadToken.new(dataset)
|
77
|
+
end
|
78
|
+
|
79
|
+
# The bundler that will aggregate data for the upload.
|
80
|
+
#
|
81
|
+
# @return [Chimps::Workflows::Upload::Bundler]
|
82
|
+
def bundler
|
83
|
+
@bundler ||= Chimps::Workflows::Upload::Bundler.new(dataset, paths, :fmt => fmt, :archive => archive)
|
84
|
+
end
|
85
|
+
|
86
|
+
# The token consumed for an upload.
|
87
|
+
#
|
88
|
+
# @return [Chimps::Workflows::Upload::UploadToken]
|
89
|
+
def upload_token
|
90
|
+
@upload_token ||= Chimps::Workflows::Upload::UploadToken.new(dataset, :fmt => bundler.fmt, :pkg_fmt => bundler.pkg_fmt)
|
91
|
+
end
|
92
|
+
|
93
|
+
# The uploader that will actually send data to Infochimps.
|
94
|
+
#
|
95
|
+
# @return [Chimps::Workflows::Upload::Uploader]
|
96
|
+
def uploader
|
97
|
+
@uploader ||= Chimps::Workflows::Upload::Uploader.new(upload_token, bundler)
|
98
|
+
end
|
99
|
+
|
100
|
+
# The notifier that will inform Infochimps of the new data.
|
101
|
+
#
|
102
|
+
# @return [Chimps::Workflows::Upload::Notifer]
|
103
|
+
def notifier
|
104
|
+
@notifier ||= Chimps::Workflows::Upload::Notifier.new(upload_token, bundler)
|
105
|
+
end
|
106
|
+
|
107
|
+
#
|
108
|
+
# == Actions ==
|
109
|
+
#
|
110
|
+
|
111
|
+
# Authorize the Chimps user for this upload.
|
112
|
+
#
|
113
|
+
# Delegates to Chimps::Workflows::Upload::UploadToken
|
114
|
+
def authorize_for_upload!
|
115
|
+
authorization_token.get
|
116
|
+
end
|
117
|
+
|
118
|
+
# Bundle the data together.
|
119
|
+
#
|
120
|
+
# Delegates to Chimps::Workflows::Upload::Bundler
|
121
|
+
def bundle!
|
122
|
+
bundler.bundle!
|
123
|
+
end
|
124
|
+
|
125
|
+
# Obtain an upload token from Infochimps.
|
126
|
+
#
|
127
|
+
# Delegates to Chimps::Workflows::Upload::UploadToken
|
128
|
+
def ask_for_token!
|
129
|
+
upload_token.get
|
130
|
+
end
|
131
|
+
|
132
|
+
# Upload the data to Infochimps.
|
133
|
+
#
|
134
|
+
# Delegates to Chimps::Workflows::Upload::Uploader
|
135
|
+
def upload!
|
136
|
+
uploader.upload!
|
137
|
+
end
|
138
|
+
|
139
|
+
# Make a final POST request to Infochimps, creating the final
|
140
|
+
# resource.
|
141
|
+
#
|
142
|
+
# @return [Chimps::Response]
|
143
|
+
def notify_infochimps!
|
144
|
+
notifier.post
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,248 @@
|
|
1
|
+
module Chimps
|
2
|
+
module Workflows
|
3
|
+
module Upload
|
4
|
+
|
5
|
+
# Encapsulates the process of analyzing and bundling input
|
6
|
+
# paths.
|
7
|
+
class Bundler
|
8
|
+
|
9
|
+
#
|
10
|
+
# == Initialization & Attributes
|
11
|
+
#
|
12
|
+
|
13
|
+
# Instantiate a new Bundler for bundling +paths+ as a package
|
14
|
+
# for +dataset+.
|
15
|
+
#
|
16
|
+
# Each input path can be either a String or an IMW::Resource
|
17
|
+
# identifying a local or remote resource to bundle into an
|
18
|
+
# upload package for Infochimps (remote resources will be
|
19
|
+
# first copied to the local filesystem by IMW).
|
20
|
+
#
|
21
|
+
# If no format is given the format will be guessed by IMW.
|
22
|
+
#
|
23
|
+
# If not archive is given the archive path will be set to a
|
24
|
+
# timestamped named in the current directory, see
|
25
|
+
# Bundler#default_archive_path.
|
26
|
+
#
|
27
|
+
# @param [String, Integer] dataset the ID or slug of an existing Infochimps dataset
|
28
|
+
# @param [Array<String, IMW::Resource>] paths
|
29
|
+
# @param [Hash] options
|
30
|
+
# @option options [String] fmt the format (csv, tsv, xls, &c.) of the data being uploaded
|
31
|
+
# @option options [String, IMW::Resource] archive the path to the local archive to package the input paths into
|
32
|
+
def initialize dataset, paths, options={}
|
33
|
+
require_imw
|
34
|
+
@dataset = dataset
|
35
|
+
self.paths = paths
|
36
|
+
if options[:fmt]
|
37
|
+
self.fmt = options[:fmt]
|
38
|
+
end
|
39
|
+
if options[:archive]
|
40
|
+
self.archive = options[:archive]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# The dataset this bundler is processing data for.
|
45
|
+
attr_accessor :dataset
|
46
|
+
|
47
|
+
# The paths this bundler is processing.
|
48
|
+
attr_reader :paths
|
49
|
+
|
50
|
+
# The resources this bundler is processing.
|
51
|
+
#
|
52
|
+
# Resources are IMW::Resource objects built from this
|
53
|
+
# Bundler's paths.
|
54
|
+
attr_reader :resources
|
55
|
+
|
56
|
+
# Set the paths for this Bundler.
|
57
|
+
#
|
58
|
+
# If only one input path is given and it is already an archive
|
59
|
+
# or a compressed file then no packaging will be attempted.
|
60
|
+
# Otherwise the input paths will be packaged together
|
61
|
+
#
|
62
|
+
# @param [Array<String, IMW::Resource>] new_paths
|
63
|
+
def paths= new_paths
|
64
|
+
raise PackagingError.new("Must provide at least one path to upload.") if new_paths.blank?
|
65
|
+
@paths, @resources = [], []
|
66
|
+
|
67
|
+
new_paths.each do |path|
|
68
|
+
resource = IMW.open(path)
|
69
|
+
resource.should_exist!("Cannot bundle.") if resource.is_local?
|
70
|
+
@paths << path
|
71
|
+
@resources << resource
|
72
|
+
end
|
73
|
+
|
74
|
+
if resources.size == 1
|
75
|
+
potential_package = resources.first
|
76
|
+
if potential_package.is_local? && potential_package.exist? && (potential_package.is_compressed? || potential_package.is_archive?)
|
77
|
+
self.archive = potential_package
|
78
|
+
@skip_packaging = true
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# The format of the data being bundled.
|
84
|
+
attr_writer :fmt
|
85
|
+
|
86
|
+
# The format of the data being bundled.
|
87
|
+
#
|
88
|
+
# Will make a guess using IMW::Tools::Summarizer if no format
|
89
|
+
# is given.
|
90
|
+
def fmt
|
91
|
+
@fmt ||= summarizer.most_common_data_format
|
92
|
+
end
|
93
|
+
|
94
|
+
# The archive this bundler will build for uploading to
|
95
|
+
# Infochimps.
|
96
|
+
#
|
97
|
+
# @return [IMW::Resource]
|
98
|
+
def archive
|
99
|
+
return @archive if @archive
|
100
|
+
self.archive = default_archive_path
|
101
|
+
self.archive
|
102
|
+
end
|
103
|
+
|
104
|
+
# Set the path to the archive that will be built.
|
105
|
+
#
|
106
|
+
# The given +path+ must represent a compressed file or archive
|
107
|
+
# (<tt>.tar</tt>, <tt>.tar.gz.</tt>, <tt>.tar.bz2</tt>,
|
108
|
+
# <tt>.zip</tt>, <tt>.rar</tt>, <tt>.bz2</tt>, or <tt>.gz</tt>
|
109
|
+
# extension).
|
110
|
+
#
|
111
|
+
# Additionally, if multiple local paths are being packaged, the
|
112
|
+
# given +path+ must be an archive (not simply <tt>.bz2</tt> or
|
113
|
+
# <tt>.gz</tt> extensions).
|
114
|
+
#
|
115
|
+
# @param [String, IMW::Resource] path_or_obj the obj or IMW::Resource object pointing to the archive to use
|
116
|
+
def archive= path_or_obj
|
117
|
+
potential_package = IMW.open(path_or_obj)
|
118
|
+
raise PackagingError.new("Invalid path #{potential_package}, not an archive or compressed file") unless potential_package.is_compressed? || potential_package.is_archive?
|
119
|
+
raise PackagingError.new("Multiple local paths must be packaged in an archive, not a compressed file.") if resources.size > 1 && !potential_package.is_archive?
|
120
|
+
@archive = potential_package
|
121
|
+
end
|
122
|
+
|
123
|
+
# Return the package format of this bundler's archive, i.e. -
|
124
|
+
# its extension.
|
125
|
+
#
|
126
|
+
# @return [String]
|
127
|
+
def pkg_fmt
|
128
|
+
archive.extension
|
129
|
+
end
|
130
|
+
|
131
|
+
# Return the total size of the package after aggregating and
|
132
|
+
# packaging.
|
133
|
+
#
|
134
|
+
# @return [Integer]
|
135
|
+
def size
|
136
|
+
archive.size
|
137
|
+
end
|
138
|
+
|
139
|
+
# Return summary information about the package prepared by the
|
140
|
+
# bundler.
|
141
|
+
#
|
142
|
+
# @return [Hash]
|
143
|
+
def summary
|
144
|
+
summarizer.summary
|
145
|
+
end
|
146
|
+
|
147
|
+
# Bundle the data for this bundler together.
|
148
|
+
def bundle!
|
149
|
+
return if skip_packaging?
|
150
|
+
result = archiver.package(archive.path)
|
151
|
+
raise PackagingError.new("Unable to package files for upload. Temporary files left in #{archiver.tmp_dir}") if result.is_a?(StandardError) || (!archiver.success?)
|
152
|
+
archiver.clean!
|
153
|
+
end
|
154
|
+
|
155
|
+
#
|
156
|
+
# == Helper Objects ==
|
157
|
+
#
|
158
|
+
|
159
|
+
# The IMW::Tools::Archiver responsible for packaging files
|
160
|
+
# into a local archive.
|
161
|
+
#
|
162
|
+
# @return [IMW::Tools::Archiver]
|
163
|
+
def archiver
|
164
|
+
@archiver ||= IMW::Tools::Archiver.new(archive.name, paths_to_bundle)
|
165
|
+
end
|
166
|
+
|
167
|
+
# Return the summarizer responsible for summarizing data on this
|
168
|
+
# upload.
|
169
|
+
#
|
170
|
+
# @return [IMW::Tools::Summarizer]
|
171
|
+
def summarizer
|
172
|
+
@summarizer ||= IMW::Tools::Summarizer.new(resources)
|
173
|
+
end
|
174
|
+
|
175
|
+
# Should the packaging step be skipped?
|
176
|
+
#
|
177
|
+
# This will happen if only one local input path was provided and
|
178
|
+
# it exists and is a compressed file or archive.
|
179
|
+
#
|
180
|
+
# @return [true, false]
|
181
|
+
def skip_packaging?
|
182
|
+
!! @skip_packaging
|
183
|
+
end
|
184
|
+
|
185
|
+
#
|
186
|
+
# == Paths & URLs ==
|
187
|
+
#
|
188
|
+
|
189
|
+
# The default path to the archive that will be built.
|
190
|
+
#
|
191
|
+
# Defaults to a file in the current directory named after the
|
192
|
+
# +dataset+'s ID or handle and the current time. The package
|
193
|
+
# format (<tt>.zip</tt> or <tt>.tar.bz2</tt>) is determined by
|
194
|
+
# size, see
|
195
|
+
# Chimps::Workflows::Uploader#default_archive_extension.
|
196
|
+
#
|
197
|
+
# @return [String]
|
198
|
+
def default_archive_path
|
199
|
+
# in current working directory...
|
200
|
+
"chimps_#{dataset}-#{Time.now.strftime(Chimps::CONFIG[:timestamp_format])}.#{default_archive_extension}"
|
201
|
+
end
|
202
|
+
|
203
|
+
# end <tt>zip</tt> if the data is less than 500 MB in size and
|
204
|
+
# <tt>tar.bz2</tt> otherwise.
|
205
|
+
#
|
206
|
+
# @return ['tar.bz2', 'zip']
|
207
|
+
def default_archive_extension
|
208
|
+
summarizer.total_size >= 524288000 ? 'tar.bz2' : 'zip'
|
209
|
+
end
|
210
|
+
|
211
|
+
# The URL to the <tt>README-infochimps</tt> file on Infochimps'
|
212
|
+
# servers.
|
213
|
+
#
|
214
|
+
# @return [String]
|
215
|
+
def readme_url
|
216
|
+
File.join(Chimps::CONFIG[:site][:host], "/README-infochimps")
|
217
|
+
end
|
218
|
+
|
219
|
+
# The URL to the ICSS file for this dataset on Infochimps
|
220
|
+
# servers
|
221
|
+
def icss_url
|
222
|
+
File.join(Chimps::CONFIG[:site][:host], "datasets", "#{dataset}.yaml")
|
223
|
+
end
|
224
|
+
|
225
|
+
# Both the local paths and remote paths to package.
|
226
|
+
#
|
227
|
+
# @return [Array<String>]
|
228
|
+
def paths_to_bundle
|
229
|
+
paths + [readme_url, icss_url]
|
230
|
+
end
|
231
|
+
|
232
|
+
protected
|
233
|
+
# Require IMW and match the IMW logger to the Chimps logger.
|
234
|
+
def require_imw
|
235
|
+
begin
|
236
|
+
require 'imw'
|
237
|
+
rescue LoadError
|
238
|
+
raise Chimps::Error.new("The Infinite Monkeywrench (IMW) gem is required to upload.")
|
239
|
+
end
|
240
|
+
IMW.verbose = Chimps.verbose?
|
241
|
+
end
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module Chimps
|
2
|
+
module Workflows
|
3
|
+
module Upload
|
4
|
+
|
5
|
+
# Encapsulates the process of notifying Infochimps of new data
|
6
|
+
# that's already been uploaded.
|
7
|
+
class Notifier
|
8
|
+
|
9
|
+
# The response from Infochimps to the request to create a
|
10
|
+
# package.
|
11
|
+
attr_accessor :response
|
12
|
+
|
13
|
+
# The upload token used for the upload.
|
14
|
+
attr_accessor :token
|
15
|
+
|
16
|
+
# The bundler responsible for the upload.
|
17
|
+
attr_accessor :bundler
|
18
|
+
|
19
|
+
def initialize token, bundler
|
20
|
+
self.token = token
|
21
|
+
self.bundler = bundler
|
22
|
+
end
|
23
|
+
|
24
|
+
# The path on Infochimps to submit package creation requests
|
25
|
+
# to.
|
26
|
+
#
|
27
|
+
# @return [String]
|
28
|
+
def path
|
29
|
+
"/datasets/#{bundler.dataset}/packages.json"
|
30
|
+
end
|
31
|
+
|
32
|
+
# Information about the uplaoded data to pass to Infochimps
|
33
|
+
# when notifying.
|
34
|
+
#
|
35
|
+
# @return [Hash]
|
36
|
+
def data
|
37
|
+
{ :package => {:fmt => token['fmt'], :pkg_size => bundler.size, :pkg_fmt => bundler.pkg_fmt, :summary => bundler.summary, :token_timestamp => token['timestamp'] } }
|
38
|
+
end
|
39
|
+
|
40
|
+
# Make a request to notify Infochimps of the new data.
|
41
|
+
#
|
42
|
+
# @return [Chimps::Response]
|
43
|
+
def post
|
44
|
+
@response = Request.new(path, :signed => true, :data => data).post
|
45
|
+
if response.error?
|
46
|
+
response.print
|
47
|
+
raise UploadError.new("Unable to notify Infochimps of newly uploaded data.")
|
48
|
+
end
|
49
|
+
response
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Chimps
|
2
|
+
module Workflows
|
3
|
+
module Upload
|
4
|
+
|
5
|
+
# Encapsulates the process of obtaining an upload token for a
|
6
|
+
# dataset from Infochimps.
|
7
|
+
class UploadToken
|
8
|
+
|
9
|
+
# The ID or slug of the dataset for which to obtain an upload
|
10
|
+
# token.
|
11
|
+
attr_accessor :dataset
|
12
|
+
|
13
|
+
# The format (csv, xls, tsv, &c.) of the data in the upload.
|
14
|
+
attr_accessor :fmt
|
15
|
+
|
16
|
+
# The package format (zip, tar.bz2, &c.) of the data in the
|
17
|
+
# upload.
|
18
|
+
attr_accessor :pkg_fmt
|
19
|
+
|
20
|
+
# The response from Infochimps to the request for an upload
|
21
|
+
# token.
|
22
|
+
attr_accessor :response
|
23
|
+
|
24
|
+
# Instantiate a new UploadToken for the given +dataset+ with
|
25
|
+
# the given +fmt+ and +pkg_fmt+.
|
26
|
+
#
|
27
|
+
# @param [String,Integer] dataset the ID or slug of the dataset to upload data for
|
28
|
+
# @param [String] fmt the data format (csv, xls, tsv, &c.) of the data
|
29
|
+
# @param [String] pkg_fmt the package format (zip, tar.bz2, tar.gz, &c.) of the data
|
30
|
+
def initialize dataset, options={}
|
31
|
+
@dataset = dataset
|
32
|
+
@fmt = options[:fmt]
|
33
|
+
@pkg_fmt = options[:pkg_fmt]
|
34
|
+
end
|
35
|
+
|
36
|
+
# Delegate slicing to the returned response.
|
37
|
+
def [] param
|
38
|
+
response && response[param]
|
39
|
+
end
|
40
|
+
|
41
|
+
# The path on Infochimps to submit upload token requests to.
|
42
|
+
#
|
43
|
+
# @return [String]
|
44
|
+
def path
|
45
|
+
"/datasets/#{dataset}/packages/new.json"
|
46
|
+
end
|
47
|
+
|
48
|
+
# Parameters passed to Infochimps to request an upload token.
|
49
|
+
#
|
50
|
+
# @return [Hash]
|
51
|
+
def params
|
52
|
+
{ :package => { :fmt => fmt, :pkg_fmt => pkg_fmt } }
|
53
|
+
end
|
54
|
+
|
55
|
+
# Make the request to get an upload token from Infochimps
|
56
|
+
def get
|
57
|
+
@response = Request.new(path, :params => params, :signed => true).get
|
58
|
+
if response.error?
|
59
|
+
response.print
|
60
|
+
raise AuthenticationError.new("Unauthorized for an upload token for dataset #{dataset}")
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Parses the 'url' property of the response from Infochimps to
|
65
|
+
# determine the bucket name.
|
66
|
+
#
|
67
|
+
# @return [String]
|
68
|
+
def bucket
|
69
|
+
File.basename(response['url'])
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module Chimps
|
2
|
+
module Workflows
|
3
|
+
module Upload
|
4
|
+
|
5
|
+
# Encapsulates the process of uploading a package to Infochimps.
|
6
|
+
class Uploader
|
7
|
+
|
8
|
+
include Chimps::Utils::UsesCurl
|
9
|
+
|
10
|
+
# The token consumed when uploading.
|
11
|
+
attr_accessor :token
|
12
|
+
|
13
|
+
# The bundler from which to glean information about the upload.
|
14
|
+
attr_accessor :bundler
|
15
|
+
|
16
|
+
# Instantiate a new Uploader which will consume the given
|
17
|
+
# +token+ and upload data from the given +bundler+.
|
18
|
+
#
|
19
|
+
# @param [Chimps::Workflows::Upload::UploadToken] token
|
20
|
+
# @param [Chimps::Workflows::Upload::Bundler] bundler
|
21
|
+
def initialize token, bundler
|
22
|
+
self.token = token
|
23
|
+
self.bundler = bundler
|
24
|
+
end
|
25
|
+
|
26
|
+
# Return a string built from the granted upload token that can
|
27
|
+
# be fed to +curl+ in order to authenticate with and upload to
|
28
|
+
# Amazon.
|
29
|
+
#
|
30
|
+
# @return [String]
|
31
|
+
def upload_data
|
32
|
+
data = ['AWSAccessKeyId', 'acl', 'key', 'policy', 'success_action_status', 'signature'].map { |param| "-F #{param}='#{token[param]}'" }
|
33
|
+
data << ["-F file=@#{bundler.archive.path}"]
|
34
|
+
data.join(' ')
|
35
|
+
end
|
36
|
+
|
37
|
+
# Upload the data.
|
38
|
+
#
|
39
|
+
# Uses +curl+ for the transfer.
|
40
|
+
def upload!
|
41
|
+
progress_meter = Chimps.verbose? ? '' : '-s -S'
|
42
|
+
command = "#{curl} #{progress_meter} -o /dev/null -X POST #{upload_data} #{token['url']}"
|
43
|
+
puts command if Chimps.verbose?
|
44
|
+
raise UploadError.new("Failed to upload #{bundler.archive.path} to Infochimps") unless system(command)
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
data/lib/chimps/workflows.rb
CHANGED
@@ -3,7 +3,8 @@ module Chimps
|
|
3
3
|
# A module defining classes to handle complex workflows between the
|
4
4
|
# local machine and Infochimps' servers.
|
5
5
|
module Workflows
|
6
|
-
autoload :
|
6
|
+
autoload :Upload, 'chimps/workflows/up'
|
7
|
+
autoload :Up, 'chimps/workflows/up'
|
7
8
|
autoload :Downloader, 'chimps/workflows/downloader'
|
8
9
|
autoload :BatchUpdater, 'chimps/workflows/batch'
|
9
10
|
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '../../../spec_helper')
|
2
|
+
|
3
|
+
describe Chimps::Workflows::Upload::Bundler do
|
4
|
+
|
5
|
+
before do
|
6
|
+
@dataset = 'foobar'
|
7
|
+
@extant_path = File.expand_path("extant_file.txt")
|
8
|
+
@non_extant_path = File.expand_path("non_extant_file.txt")
|
9
|
+
@archive_path = File.expand_path("archive.tar.bz2")
|
10
|
+
@extant_archive_path = File.expand_path("extant_archive.tar.bz2")
|
11
|
+
|
12
|
+
|
13
|
+
File.open(@extant_path, 'w') { |f| f.write("some content") }
|
14
|
+
File.open(@extant_archive_path, 'w') { |f| f.write("some, admittedly not very tar.bz2'ish, content") }
|
15
|
+
end
|
16
|
+
|
17
|
+
describe "setting the format of a bundle of input paths" do
|
18
|
+
it "should accept a format when given" do
|
19
|
+
bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path], :fmt => 'foobar')
|
20
|
+
bundler.fmt.should == 'foobar'
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should guess a format when one isn't given" do
|
24
|
+
bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path])
|
25
|
+
bundler.fmt.should == 'txt'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe "setting the archive from a bundle of input paths" do
|
30
|
+
|
31
|
+
it "should automatically set the archive path when given no other information" do
|
32
|
+
bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path])
|
33
|
+
File.basename(bundler.archive.path).should =~ /^chimps_/
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should use a valid archive path when given one" do
|
37
|
+
bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path], :archive => 'foo.tar.bz2')
|
38
|
+
File.basename(bundler.archive.path).should == 'foo.tar.bz2'
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should raise an error when given a non-package or compressed-file archive path" do
|
42
|
+
lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path], :archive => 'foo.txt') }.should raise_error(Chimps::PackagingError)
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should raise an error when given a compressed-file archive path with multiple input paths" do
|
46
|
+
lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path, @extant_archive_path], :archive => 'foo.bz2') }.should raise_error(Chimps::PackagingError)
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
describe "processing input paths" do
|
52
|
+
|
53
|
+
it "should raise an error when no paths are given" do
|
54
|
+
lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, []) }.should raise_error(Chimps::PackagingError)
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should raise an error when given a local path which doesn't exist" do
|
58
|
+
lambda { Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_path, @non_extant_path]) }.should raise_error(IMW::PathError)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should set its archive path and skip packaging when passed a single, extant archive path" do
|
62
|
+
bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_archive_path])
|
63
|
+
bundler.skip_packaging?.should be_true
|
64
|
+
bundler.archive.path.should == @extant_archive_path
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should prefer the explicitly passed in archive path to the implicitly seleced archive path when passed a 1-path input array consisting of an archive as well as the :archive option" do
|
68
|
+
bundler = Chimps::Workflows::Upload::Bundler.new(@dataset, [@extant_archive_path], :archive => "foo.tar.bz2")
|
69
|
+
File.basename(bundler.archive.path).should == 'foo.tar.bz2'
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
data/spec/spec_helper.rb
CHANGED
@@ -9,9 +9,22 @@ require 'chimps'
|
|
9
9
|
|
10
10
|
Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each { |path| require path }
|
11
11
|
|
12
|
+
module Chimps
|
13
|
+
module Test
|
14
|
+
TMP_DIR = "/tmp/chimps_test" unless defined?(TMP_DIR)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
12
18
|
Spec::Runner.configure do |config|
|
13
19
|
config.include Chimps::Test::CustomMatchers
|
14
|
-
end
|
15
20
|
|
21
|
+
config.before do
|
22
|
+
FileUtils.mkdir_p Chimps::Test::TMP_DIR
|
23
|
+
FileUtils.cd Chimps::Test::TMP_DIR
|
24
|
+
end
|
16
25
|
|
17
|
-
|
26
|
+
config.after do
|
27
|
+
FileUtils.rm_rf Chimps::Test::TMP_DIR
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chimps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dhruv Bansal
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-06-
|
12
|
+
date: 2010-06-15 00:00:00 -05:00
|
13
13
|
default_executable: chimps
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -112,12 +112,18 @@ files:
|
|
112
112
|
- lib/chimps/workflows.rb
|
113
113
|
- lib/chimps/workflows/batch.rb
|
114
114
|
- lib/chimps/workflows/downloader.rb
|
115
|
-
- lib/chimps/workflows/
|
115
|
+
- lib/chimps/workflows/up.rb
|
116
|
+
- lib/chimps/workflows/upload/bundler.rb
|
117
|
+
- lib/chimps/workflows/upload/notifier.rb
|
118
|
+
- lib/chimps/workflows/upload/token.rb
|
119
|
+
- lib/chimps/workflows/upload/uploader.rb
|
116
120
|
- spec/chimps/cli_spec.rb
|
117
121
|
- spec/chimps/commands/base_spec.rb
|
118
122
|
- spec/chimps/commands/list_spec.rb
|
119
123
|
- spec/chimps/response_spec.rb
|
120
124
|
- spec/chimps/typewriter_spec.rb
|
125
|
+
- spec/chimps/workflows/upload/bundler_spec.rb
|
126
|
+
- spec/chimps/workflows/upload/token_spec.rb
|
121
127
|
- spec/spec_helper.rb
|
122
128
|
- spec/support/custom_matchers.rb
|
123
129
|
has_rdoc: true
|
@@ -150,6 +156,8 @@ specification_version: 3
|
|
150
156
|
summary: Chimps! is a Ruby wrapper and command-line interface for the Infochimps APIs (http://infochimps.org/api, http://api.infochimps.com)
|
151
157
|
test_files:
|
152
158
|
- spec/spec_helper.rb
|
159
|
+
- spec/chimps/workflows/upload/bundler_spec.rb
|
160
|
+
- spec/chimps/workflows/upload/token_spec.rb
|
153
161
|
- spec/chimps/commands/base_spec.rb
|
154
162
|
- spec/chimps/commands/list_spec.rb
|
155
163
|
- spec/chimps/typewriter_spec.rb
|
@@ -1,267 +0,0 @@
|
|
1
|
-
module Chimps
|
2
|
-
module Workflows
|
3
|
-
|
4
|
-
# Uploads data to Infochimps by first asking for authorization,
|
5
|
-
# creating an archive, obtaining a token, uploading data, and
|
6
|
-
# notifing Infochimps.
|
7
|
-
class Uploader
|
8
|
-
|
9
|
-
include Chimps::Utils::UsesCurl
|
10
|
-
|
11
|
-
# The ID or handle of the dataset to download.
|
12
|
-
attr_reader :dataset
|
13
|
-
|
14
|
-
# An array of paths to local files and directories to package
|
15
|
-
# into an archive.
|
16
|
-
attr_reader :local_paths
|
17
|
-
|
18
|
-
# The format to annotate the upload with.
|
19
|
-
attr_reader :fmt
|
20
|
-
|
21
|
-
# The archive to upload.
|
22
|
-
attr_reader :archive
|
23
|
-
|
24
|
-
# The token authoring an upload.
|
25
|
-
attr_reader :token
|
26
|
-
|
27
|
-
# Upload data to Infochimps by first asking for authorization,
|
28
|
-
# creating an archive, obtaining a token, uploading data, and
|
29
|
-
# notifing Infochimps.
|
30
|
-
def execute!
|
31
|
-
authorize_for_upload!
|
32
|
-
create_archive!
|
33
|
-
ask_for_token!
|
34
|
-
upload!
|
35
|
-
notify_infochimps!
|
36
|
-
end
|
37
|
-
|
38
|
-
# Create a new Uploader from the given parameters.
|
39
|
-
#
|
40
|
-
# If <tt>:fmt</tt> is provided it will be used as the data
|
41
|
-
# format to annotate the upload with. If not, Chimps will try
|
42
|
-
# to guess.
|
43
|
-
#
|
44
|
-
# @param [Hash] options
|
45
|
-
# @option options [String, Integer] dataset the ID or handle of the dataset to which data should be uploaded
|
46
|
-
# @option options [Array<String>] local_paths the local paths to bundle into an archive
|
47
|
-
# @option options [String, IMW::Resource] archive the path to the archive to create (defaults to IMW::Workflows::Downloader#default_archive_path)
|
48
|
-
# @option options [String] fmt the data format to annotate the upload with
|
49
|
-
def initialize options={}
|
50
|
-
require_imw
|
51
|
-
@dataset = options[:dataset] or raise PackagingError.new("Must provide the ID or handle of a dataset to upload data to.")
|
52
|
-
self.local_paths = options[:local_paths] # must come before self.archive=
|
53
|
-
self.archive = options[:archive]
|
54
|
-
self.fmt = options[:fmt]
|
55
|
-
end
|
56
|
-
|
57
|
-
# Set the local paths to upload for this dataset.
|
58
|
-
#
|
59
|
-
# If only one local path is given and it is already an archive
|
60
|
-
# or a compressed file then no further packaging will be done by
|
61
|
-
# this uploader.
|
62
|
-
#
|
63
|
-
# @param [Array<String, IMW::Resource>] paths
|
64
|
-
def local_paths= paths
|
65
|
-
raise PackagingError.new("Must provide at least one local path to upload.") if paths.blank?
|
66
|
-
paths.each { |path| raise PackagingError.new("Invalid path, #{path}") unless File.exist?(File.expand_path(path)) }
|
67
|
-
@local_paths = paths
|
68
|
-
if @local_paths.size == 1
|
69
|
-
potential_package = IMW.open(paths.first)
|
70
|
-
if potential_package.exist? && (potential_package.is_compressed? || potential_package.is_archive?)
|
71
|
-
self.archive = potential_package
|
72
|
-
@skip_packaging = true
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
# Should the packaging step be skipped?
|
78
|
-
#
|
79
|
-
# This will happen if only one local input path was provided and
|
80
|
-
# it exists and is a compressed file or archive.
|
81
|
-
#
|
82
|
-
# @return [true, false]
|
83
|
-
def skip_packaging?
|
84
|
-
!! @skip_packaging
|
85
|
-
end
|
86
|
-
|
87
|
-
# Set the path to the archive that will be built.
|
88
|
-
#
|
89
|
-
# The given +path+ must represent a compressed file or archive
|
90
|
-
# (<tt>.tar</tt>, <tt>.tar.gz.</tt>, <tt>.tar.bz2</tt>,
|
91
|
-
# <tt>.zip</tt>, <tt>.rar</tt>, <tt>.bz2</tt>, or <tt>.gz</tt>
|
92
|
-
# extension).
|
93
|
-
#
|
94
|
-
# Additionally, if multiple local paths are being packaged, the
|
95
|
-
# given +path+ must be an archive (not simply <tt>.bz2</tt> or
|
96
|
-
# <tt>.gz</tt> extensions).
|
97
|
-
#
|
98
|
-
# @param [String, IMW::Resource] path the archive or path to use
|
99
|
-
def archive= path=nil
|
100
|
-
return @archive if @archive
|
101
|
-
potential_package = IMW.open(path || default_archive_path)
|
102
|
-
raise PackagingError.new("Invalid path #{potential_package}, not an archive or compressed file") unless potential_package.is_compressed? || potential_package.is_archive?
|
103
|
-
raise PackagingError.new("Multiple local paths must be packaged in an archive, not a compressed file.") if local_paths.size > 1 && !potential_package.is_archive?
|
104
|
-
@archive = potential_package
|
105
|
-
end
|
106
|
-
|
107
|
-
# Return the summarizer responsible for summarizing data on this
|
108
|
-
# upload.
|
109
|
-
#
|
110
|
-
# @return [IMW::Tools::Summarizer]
|
111
|
-
def summarizer
|
112
|
-
@summarizer ||= IMW::Tools::Summarizer.new(local_paths)
|
113
|
-
end
|
114
|
-
|
115
|
-
# Set the data format to annotate the upload with.
|
116
|
-
#
|
117
|
-
# If not provided, Chimps will use the Infinite Monkeywrench
|
118
|
-
# (IMW) to try and guess the data format. See
|
119
|
-
# IMW::Tools::Summarizer for more information.
|
120
|
-
def fmt= new_fmt=nil
|
121
|
-
@fmt ||= new_fmt || summarizer.most_common_data_format
|
122
|
-
end
|
123
|
-
|
124
|
-
# The default path to the archive that will be built.
|
125
|
-
#
|
126
|
-
# Defaults to a file in the current directory named after the
|
127
|
-
# +dataset+'s ID or handle and the current time. The package
|
128
|
-
# format (<tt>.zip</tt> or <tt>.tar.bz2</tt>) is determined by
|
129
|
-
# size, see
|
130
|
-
# Chimps::Workflows::Uploader#default_archive_extension.
|
131
|
-
#
|
132
|
-
# @return [String]
|
133
|
-
def default_archive_path
|
134
|
-
# in current working directory...
|
135
|
-
"chimps_#{dataset}-#{Time.now.strftime(Chimps::CONFIG[:timestamp_format])}.#{default_archive_extension}"
|
136
|
-
end
|
137
|
-
|
138
|
-
# Use <tt>zip</tt> if the data is less than 500 MB in size and
|
139
|
-
# <tt>tar.bz2</tt> otherwise.
|
140
|
-
#
|
141
|
-
# @return ['tar.bz2', 'zip']
|
142
|
-
def default_archive_extension
|
143
|
-
summarizer.total_size >= 524288000 ? 'tar.bz2' : 'zip'
|
144
|
-
end
|
145
|
-
|
146
|
-
# The URL to the <tt>README-infochimps</tt> file on Infochimps'
|
147
|
-
# servers.
|
148
|
-
#
|
149
|
-
# @return [String]
|
150
|
-
def readme_url
|
151
|
-
File.join(Chimps::CONFIG[:site][:host], "/README-infochimps")
|
152
|
-
end
|
153
|
-
|
154
|
-
# The URL to the ICSS file for this dataset on Infochimps
|
155
|
-
# servers
|
156
|
-
def icss_url
|
157
|
-
File.join(Chimps::CONFIG[:site][:host], "datasets", "#{dataset}.yaml")
|
158
|
-
end
|
159
|
-
|
160
|
-
# Both the local paths and remote paths to package.
|
161
|
-
#
|
162
|
-
# @return [Array<String>]
|
163
|
-
def input_paths
|
164
|
-
raise PackaginError.new("Must specify some local paths to package") if local_paths.blank?
|
165
|
-
local_paths + [readme_url, icss_url]
|
166
|
-
end
|
167
|
-
|
168
|
-
# The path on Infochimps to submit upload token requests to.
|
169
|
-
#
|
170
|
-
# @return [String]
|
171
|
-
def token_path
|
172
|
-
"/datasets/#{dataset}/packages/new.json"
|
173
|
-
end
|
174
|
-
|
175
|
-
# The path on Infochimps to submit package creation requests to.
|
176
|
-
#
|
177
|
-
# @return [String]
|
178
|
-
def package_creation_path
|
179
|
-
"/datasets/#{dataset}/packages.json"
|
180
|
-
end
|
181
|
-
|
182
|
-
# Return a hash of params for obtaining a new upload token.
|
183
|
-
#
|
184
|
-
# @return [Hash]
|
185
|
-
def package_params
|
186
|
-
{ :package => { :fmt => fmt, :pkg_fmt => archive.extension } }
|
187
|
-
end
|
188
|
-
|
189
|
-
# Authorize the Chimps user for this upload.
|
190
|
-
def authorize_for_upload!
|
191
|
-
# FIXME we're actually just making a token request here...
|
192
|
-
ask_for_token!
|
193
|
-
end
|
194
|
-
|
195
|
-
# Obtain an upload token from Infochimps.
|
196
|
-
def ask_for_token!
|
197
|
-
new_token = Request.new(token_path, :params => package_params, :signed => true).get
|
198
|
-
if new_token.error?
|
199
|
-
new_token.print
|
200
|
-
raise AuthenticationError.new("Unauthorized for an upload token for dataset #{dataset}")
|
201
|
-
else
|
202
|
-
@token = new_token
|
203
|
-
end
|
204
|
-
end
|
205
|
-
|
206
|
-
# Build the local archive if necessary.
|
207
|
-
#
|
208
|
-
# Will not build the local archive if there was only one local
|
209
|
-
# input path and it was already compressed or an archive.
|
210
|
-
def create_archive!
|
211
|
-
return if skip_packaging?
|
212
|
-
archiver = IMW::Tools::Archiver.new(archive.name, input_paths)
|
213
|
-
result = archiver.package(archive.path)
|
214
|
-
raise PackagingError.new("Unable to package files for upload. Temporary files left in #{archiver.tmp_dir}") if result.is_a?(StandardError) || (!archiver.success?)
|
215
|
-
archiver.clean!
|
216
|
-
end
|
217
|
-
|
218
|
-
# Return a string built from the granted upload token that can
|
219
|
-
# be fed to +curl+ in order to authenticate with and upload to
|
220
|
-
# Amazon.
|
221
|
-
#
|
222
|
-
# @return [String]
|
223
|
-
def upload_data
|
224
|
-
data = ['AWSAccessKeyId', 'acl', 'key', 'policy', 'success_action_status', 'signature'].map { |param| "-F #{param}='#{token[param]}'" }
|
225
|
-
data << ["-F file=@#{archive.path}"]
|
226
|
-
data.join(' ')
|
227
|
-
end
|
228
|
-
|
229
|
-
# Upload the data.
|
230
|
-
#
|
231
|
-
# Uses +curl+ for the transfer.
|
232
|
-
def upload!
|
233
|
-
progress_meter = Chimps.verbose? ? '' : '-s -S'
|
234
|
-
command = "#{curl} #{progress_meter} -o /dev/null -X POST #{upload_data} #{token['url']}"
|
235
|
-
raise UploadError.new("Failed to upload #{archive.path} to Infochimps") unless IMW.system(command)
|
236
|
-
end
|
237
|
-
|
238
|
-
# Return a hash of parameters used to create a new Package at
|
239
|
-
# Infochimps corresonding to the upload.
|
240
|
-
#
|
241
|
-
# @return [Hash]
|
242
|
-
def package_data
|
243
|
-
{ :package => {:path => token['key'], :fmt => token['fmt'], :pkg_size => archive.size, :pkg_fmt => archive.extension, :summary => summarizer.summary, :token_timestamp => token['timestamp'] } }
|
244
|
-
end
|
245
|
-
|
246
|
-
# Make a final POST request to Infochimps, creating the final
|
247
|
-
# resource.
|
248
|
-
def notify_infochimps!
|
249
|
-
package_creation_response = Request.new(package_creation_path, :signed => true, :data => package_data).post
|
250
|
-
package_creation_response.print
|
251
|
-
raise UploadError.new("Unable to notify Infochimps of newly uploaded data.") if package_creation_response.error?
|
252
|
-
end
|
253
|
-
|
254
|
-
protected
|
255
|
-
# Require IMW and match the IMW logger to the Chimps logger.
|
256
|
-
def require_imw
|
257
|
-
begin
|
258
|
-
require 'imw'
|
259
|
-
rescue LoadError
|
260
|
-
raise Chimps::Error.new("The Infinite Monkeywrench (IMW) gem is required to upload.")
|
261
|
-
end
|
262
|
-
IMW.verbose = Chimps.verbose?
|
263
|
-
end
|
264
|
-
|
265
|
-
end
|
266
|
-
end
|
267
|
-
end
|