chimps 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +3 -9
- data/Gemfile.lock +14 -10
- data/README.rdoc +146 -240
- data/Rakefile +4 -33
- data/VERSION +1 -1
- data/lib/chimps/config.rb +35 -21
- data/lib/chimps/{utils/error.rb → error.rb} +1 -12
- data/lib/chimps/query_request.rb +67 -0
- data/lib/chimps/request.rb +82 -108
- data/lib/chimps/response.rb +62 -22
- data/lib/chimps/utils/typewriter.rb +90 -0
- data/lib/chimps/utils/uses_curl.rb +22 -12
- data/lib/chimps/utils.rb +50 -6
- data/lib/chimps/workflows/download.rb +72 -0
- data/lib/chimps/workflows/upload.rb +113 -0
- data/lib/chimps.rb +12 -12
- data/spec/chimps/query_request_spec.rb +44 -0
- data/spec/chimps/request_spec.rb +92 -0
- data/spec/chimps/response_spec.rb +0 -1
- data/spec/chimps/workflows/download_spec.rb +48 -0
- data/spec/spec_helper.rb +2 -19
- metadata +46 -91
- data/.document +0 -5
- data/.gitignore +0 -32
- data/CHANGELOG.textile +0 -4
- data/bin/chimps +0 -5
- data/lib/chimps/cli.rb +0 -28
- data/lib/chimps/commands/base.rb +0 -65
- data/lib/chimps/commands/batch.rb +0 -40
- data/lib/chimps/commands/create.rb +0 -31
- data/lib/chimps/commands/destroy.rb +0 -26
- data/lib/chimps/commands/download.rb +0 -46
- data/lib/chimps/commands/help.rb +0 -100
- data/lib/chimps/commands/list.rb +0 -41
- data/lib/chimps/commands/query.rb +0 -82
- data/lib/chimps/commands/search.rb +0 -48
- data/lib/chimps/commands/show.rb +0 -30
- data/lib/chimps/commands/test.rb +0 -39
- data/lib/chimps/commands/update.rb +0 -34
- data/lib/chimps/commands/upload.rb +0 -50
- data/lib/chimps/commands.rb +0 -125
- data/lib/chimps/typewriter.rb +0 -349
- data/lib/chimps/utils/log.rb +0 -48
- data/lib/chimps/utils/uses_model.rb +0 -34
- data/lib/chimps/utils/uses_yaml_data.rb +0 -93
- data/lib/chimps/workflows/batch.rb +0 -127
- data/lib/chimps/workflows/downloader.rb +0 -102
- data/lib/chimps/workflows/up.rb +0 -149
- data/lib/chimps/workflows/upload/bundler.rb +0 -249
- data/lib/chimps/workflows/upload/notifier.rb +0 -59
- data/lib/chimps/workflows/upload/token.rb +0 -77
- data/lib/chimps/workflows/upload/uploader.rb +0 -51
- data/lib/chimps/workflows.rb +0 -12
- data/spec/chimps/typewriter_spec.rb +0 -114
- data/spec/chimps/workflows/upload/bundler_spec.rb +0 -75
- data/spec/chimps/workflows/upload/token_spec.rb +0 -6
@@ -1,102 +0,0 @@
|
|
1
|
-
module Chimps
|
2
|
-
module Workflows
|
3
|
-
|
4
|
-
# Downloads data from Infochimps by first making a request for a
|
5
|
-
# download token and, if granted one, proceeding to download the
|
6
|
-
# data.
|
7
|
-
#
|
8
|
-
# Will download the latest package for a given dataset, optionally
|
9
|
-
# constrained to have given data and package formats.
|
10
|
-
class Downloader
|
11
|
-
|
12
|
-
include Chimps::Utils::UsesCurl
|
13
|
-
|
14
|
-
# The token received from Infochimps which contains a signed URL
|
15
|
-
# for the download.
|
16
|
-
attr_reader :token
|
17
|
-
|
18
|
-
# The ID or handle of the dataset to download.
|
19
|
-
attr_reader :dataset
|
20
|
-
|
21
|
-
# The data format of the data to download.
|
22
|
-
attr_reader :fmt
|
23
|
-
|
24
|
-
# The package format of the data to download.
|
25
|
-
attr_reader :pkg_fmt
|
26
|
-
|
27
|
-
# Create a new Downloader with the given parameters.
|
28
|
-
#
|
29
|
-
# @param [Hash] options
|
30
|
-
# @option options [String, Integer] dataset the ID or handle of the dataset to download
|
31
|
-
# @option options [String] fmt the data format to download
|
32
|
-
# @option options [String] pkg_fmt the package format to download
|
33
|
-
# @option options [String] local_path the local path to which the data will be downloaded
|
34
|
-
# @return [Chimps::Workflows::Downloader]
|
35
|
-
def initialize options={}
|
36
|
-
@dataset = options[:dataset]
|
37
|
-
@fmt = options[:fmt]
|
38
|
-
@pkg_fmt = options[:pkg_fmt]
|
39
|
-
@local_path = options[:local_path]
|
40
|
-
end
|
41
|
-
|
42
|
-
# Params to send for the token.
|
43
|
-
#
|
44
|
-
# @return [Hash]
|
45
|
-
def token_params
|
46
|
-
{ :download_token => { :dataset_id => dataset, :fmt => fmt, :pkg_fmt => pkg_fmt} }
|
47
|
-
end
|
48
|
-
|
49
|
-
# Ask for a download token for this dataset/package. If no or
|
50
|
-
# an invalid token is obtained, raise an error.
|
51
|
-
def ask_for_token!
|
52
|
-
new_token = Request.new(download_tokens_path, :data => token_params, :sign_if_possible => true).post
|
53
|
-
if new_token.error?
|
54
|
-
new_token.print
|
55
|
-
raise AuthenticationError.new("Unauthorized to download dataset #{dataset}")
|
56
|
-
else
|
57
|
-
@token = new_token
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
# Path to submit download token requests to.
|
62
|
-
#
|
63
|
-
# @return [String]
|
64
|
-
def download_tokens_path
|
65
|
-
"/download_tokens"
|
66
|
-
end
|
67
|
-
|
68
|
-
# The signed, remote URL from where the data can be downloaded.
|
69
|
-
#
|
70
|
-
# @return [String]
|
71
|
-
def download_url
|
72
|
-
token['download_token']['package']['url']
|
73
|
-
end
|
74
|
-
|
75
|
-
# The local path where the downloaded data will be put.
|
76
|
-
#
|
77
|
-
# Defaults to the current directory and the default basename of
|
78
|
-
# the downloaded package.
|
79
|
-
#
|
80
|
-
# @return [String, nil]
|
81
|
-
def local_path
|
82
|
-
@local_path || token["download_token"]["package"]["basename"]
|
83
|
-
end
|
84
|
-
|
85
|
-
# Issue the download request.
|
86
|
-
#
|
87
|
-
# Uses +curl+ for the data transfer.
|
88
|
-
def download!
|
89
|
-
command = "#{curl} -o '#{local_path}' '#{download_url}'"
|
90
|
-
puts command if Chimps.verbose?
|
91
|
-
system(command)
|
92
|
-
end
|
93
|
-
|
94
|
-
# Ask for a token and perform the download.
|
95
|
-
def execute!
|
96
|
-
ask_for_token!
|
97
|
-
download!
|
98
|
-
end
|
99
|
-
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
data/lib/chimps/workflows/up.rb
DELETED
@@ -1,149 +0,0 @@
|
|
1
|
-
module Chimps
|
2
|
-
module Workflows
|
3
|
-
|
4
|
-
# A namespace for classes which handle each step of the
|
5
|
-
# BundleAndUpload workflow.
|
6
|
-
module Upload
|
7
|
-
autoload :UploadToken, 'chimps/workflows/upload/token'
|
8
|
-
autoload :Bundler, 'chimps/workflows/upload/bundler'
|
9
|
-
autoload :Uploader, 'chimps/workflows/upload/uploader'
|
10
|
-
autoload :Notifier, 'chimps/workflows/upload/notifier'
|
11
|
-
end
|
12
|
-
|
13
|
-
# Uploads data to Infochimps by first asking for authorization,
|
14
|
-
# creating an archive, obtaining a token, uploading data, and
|
15
|
-
# notifing Infochimps.
|
16
|
-
#
|
17
|
-
# A helper object from Chimps::Workflows::Upload is delegated to
|
18
|
-
# for each step:
|
19
|
-
#
|
20
|
-
# - authorization & obtaining a token: Chimps::Workflows::Upload::UploadToken
|
21
|
-
# - creating an archive: Chimps::Workflows::Upload::Bundler
|
22
|
-
# - uploading data: Chimps::Workflows::Upload::Uploader
|
23
|
-
# - notifying Infochimps: Chimps::Workflows::Upload::Notifier
|
24
|
-
class Up
|
25
|
-
|
26
|
-
# The ID or handle of the dataset to download.
|
27
|
-
attr_accessor :dataset
|
28
|
-
|
29
|
-
# An array of paths to files and directories to package into an
|
30
|
-
# archive.
|
31
|
-
attr_accessor :paths
|
32
|
-
|
33
|
-
# The format to annotate the upload with.
|
34
|
-
attr_accessor :fmt
|
35
|
-
|
36
|
-
# The path to the archive to create when uploading.
|
37
|
-
attr_accessor :archive
|
38
|
-
|
39
|
-
# Create a new Uploader from the given parameters.
|
40
|
-
#
|
41
|
-
# If <tt>:fmt</tt> is provided it will be used as the data
|
42
|
-
# format to annotate the upload with. If not, Chimps will try
|
43
|
-
# to guess.
|
44
|
-
#
|
45
|
-
# @param [Hash] options
|
46
|
-
# @option options [String, Integer] dataset the ID or handle of the dataset to which data should be uploaded
|
47
|
-
# @option options [Array<String>] paths the paths to aggregate and upload
|
48
|
-
# @option options [String, IMW::Resource] archive (IMW::Workflows::Downloader#default_archive_path) the path to the archive to create
|
49
|
-
# @option options [String] fmt the data format to annotate the upload with
|
50
|
-
def initialize options={}
|
51
|
-
self.dataset = options[:dataset] or raise PackagingError.new("Must provide the ID or handle of a dataset to upload data to.")
|
52
|
-
self.paths = options[:paths]
|
53
|
-
self.archive = options[:archive]
|
54
|
-
self.fmt = options[:fmt]
|
55
|
-
end
|
56
|
-
|
57
|
-
# Upload data to Infochimps by first asking for authorization,
|
58
|
-
# creating an archive, obtaining a token, uploading data, and
|
59
|
-
# notifing Infochimps.
|
60
|
-
def execute!
|
61
|
-
authorize_for_upload!
|
62
|
-
bundle!
|
63
|
-
ask_for_token!
|
64
|
-
upload!
|
65
|
-
notify_infochimps!
|
66
|
-
end
|
67
|
-
|
68
|
-
#
|
69
|
-
# == Helper Objects ==
|
70
|
-
#
|
71
|
-
|
72
|
-
# The token authorizing an upload.
|
73
|
-
#
|
74
|
-
# @return [Chimps::Workflows::Upload::UploadToken]
|
75
|
-
def authorization_token
|
76
|
-
@authorization_token ||= Chimps::Workflows::Upload::UploadToken.new(dataset)
|
77
|
-
end
|
78
|
-
|
79
|
-
# The bundler that will aggregate data for the upload.
|
80
|
-
#
|
81
|
-
# @return [Chimps::Workflows::Upload::Bundler]
|
82
|
-
def bundler
|
83
|
-
@bundler ||= Chimps::Workflows::Upload::Bundler.new(dataset, paths, :fmt => fmt, :archive => archive)
|
84
|
-
end
|
85
|
-
|
86
|
-
# The token consumed for an upload.
|
87
|
-
#
|
88
|
-
# @return [Chimps::Workflows::Upload::UploadToken]
|
89
|
-
def upload_token
|
90
|
-
@upload_token ||= Chimps::Workflows::Upload::UploadToken.new(dataset, :fmt => bundler.fmt, :pkg_fmt => bundler.pkg_fmt)
|
91
|
-
end
|
92
|
-
|
93
|
-
# The uploader that will actually send data to Infochimps.
|
94
|
-
#
|
95
|
-
# @return [Chimps::Workflows::Upload::Uploader]
|
96
|
-
def uploader
|
97
|
-
@uploader ||= Chimps::Workflows::Upload::Uploader.new(upload_token, bundler)
|
98
|
-
end
|
99
|
-
|
100
|
-
# The notifier that will inform Infochimps of the new data.
|
101
|
-
#
|
102
|
-
# @return [Chimps::Workflows::Upload::Notifer]
|
103
|
-
def notifier
|
104
|
-
@notifier ||= Chimps::Workflows::Upload::Notifier.new(upload_token, bundler)
|
105
|
-
end
|
106
|
-
|
107
|
-
#
|
108
|
-
# == Actions ==
|
109
|
-
#
|
110
|
-
|
111
|
-
# Authorize the Chimps user for this upload.
|
112
|
-
#
|
113
|
-
# Delegates to Chimps::Workflows::Upload::UploadToken
|
114
|
-
def authorize_for_upload!
|
115
|
-
authorization_token.get
|
116
|
-
end
|
117
|
-
|
118
|
-
# Bundle the data together.
|
119
|
-
#
|
120
|
-
# Delegates to Chimps::Workflows::Upload::Bundler
|
121
|
-
def bundle!
|
122
|
-
bundler.bundle!
|
123
|
-
end
|
124
|
-
|
125
|
-
# Obtain an upload token from Infochimps.
|
126
|
-
#
|
127
|
-
# Delegates to Chimps::Workflows::Upload::UploadToken
|
128
|
-
def ask_for_token!
|
129
|
-
upload_token.get
|
130
|
-
end
|
131
|
-
|
132
|
-
# Upload the data to Infochimps.
|
133
|
-
#
|
134
|
-
# Delegates to Chimps::Workflows::Upload::Uploader
|
135
|
-
def upload!
|
136
|
-
uploader.upload!
|
137
|
-
end
|
138
|
-
|
139
|
-
# Make a final POST request to Infochimps, creating the final
|
140
|
-
# resource.
|
141
|
-
#
|
142
|
-
# @return [Chimps::Response]
|
143
|
-
def notify_infochimps!
|
144
|
-
notifier.post
|
145
|
-
end
|
146
|
-
|
147
|
-
end
|
148
|
-
end
|
149
|
-
end
|
@@ -1,249 +0,0 @@
|
|
1
|
-
module Chimps
|
2
|
-
module Workflows
|
3
|
-
module Upload
|
4
|
-
|
5
|
-
# Encapsulates the process of analyzing and bundling input
|
6
|
-
# paths.
|
7
|
-
class Bundler
|
8
|
-
|
9
|
-
#
|
10
|
-
# == Initialization & Attributes
|
11
|
-
#
|
12
|
-
|
13
|
-
# Instantiate a new Bundler for bundling +paths+ as a package
|
14
|
-
# for +dataset+.
|
15
|
-
#
|
16
|
-
# Each input path can be either a String or an IMW::Resource
|
17
|
-
# identifying a local or remote resource to bundle into an
|
18
|
-
# upload package for Infochimps (remote resources will be
|
19
|
-
# first copied to the local filesystem by IMW).
|
20
|
-
#
|
21
|
-
# If no format is given the format will be guessed by IMW.
|
22
|
-
#
|
23
|
-
# If not archive is given the archive path will be set to a
|
24
|
-
# timestamped named in the current directory, see
|
25
|
-
# Bundler#default_archive_path.
|
26
|
-
#
|
27
|
-
# @param [String, Integer] dataset the ID or slug of an existing Infochimps dataset
|
28
|
-
# @param [Array<String, IMW::Resource>] paths
|
29
|
-
# @param [Hash] options
|
30
|
-
# @option options [String] fmt the format (csv, tsv, xls, &c.) of the data being uploaded
|
31
|
-
# @option options [String, IMW::Resource] archive the path to the local archive to package the input paths into
|
32
|
-
def initialize dataset, paths, options={}
|
33
|
-
require_imw
|
34
|
-
@dataset = dataset
|
35
|
-
self.paths = paths
|
36
|
-
if options[:fmt]
|
37
|
-
self.fmt = options[:fmt]
|
38
|
-
end
|
39
|
-
if options[:archive]
|
40
|
-
self.archive = options[:archive]
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
# The dataset this bundler is processing data for.
|
45
|
-
attr_accessor :dataset
|
46
|
-
|
47
|
-
# The paths this bundler is processing.
|
48
|
-
attr_reader :paths
|
49
|
-
|
50
|
-
# The resources this bundler is processing.
|
51
|
-
#
|
52
|
-
# Resources are IMW::Resource objects built from this
|
53
|
-
# Bundler's paths.
|
54
|
-
attr_reader :resources
|
55
|
-
|
56
|
-
# Set the paths for this Bundler.
|
57
|
-
#
|
58
|
-
# If only one input path is given and it is already an archive
|
59
|
-
# or a compressed file then no packaging will be attempted.
|
60
|
-
# Otherwise the input paths will be packaged together
|
61
|
-
#
|
62
|
-
# @param [Array<String, IMW::Resource>] new_paths
|
63
|
-
def paths= new_paths
|
64
|
-
raise PackagingError.new("Must provide at least one path to upload.") if new_paths.blank?
|
65
|
-
@paths, @resources = [], []
|
66
|
-
|
67
|
-
new_paths.each do |path|
|
68
|
-
resource = IMW.open(path)
|
69
|
-
resource.should_exist!("Cannot bundle.") if resource.is_local?
|
70
|
-
@paths << path
|
71
|
-
@resources << resource
|
72
|
-
end
|
73
|
-
|
74
|
-
if resources.size == 1
|
75
|
-
potential_package = resources.first
|
76
|
-
if potential_package.is_local? && potential_package.exist? && (potential_package.is_compressed? || potential_package.is_archive?)
|
77
|
-
self.archive = potential_package
|
78
|
-
@skip_packaging = true
|
79
|
-
end
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
# The format of the data being bundled.
|
84
|
-
attr_writer :fmt
|
85
|
-
|
86
|
-
# The format of the data being bundled.
|
87
|
-
#
|
88
|
-
# Will make a guess using IMW::Tools::Summarizer if no format
|
89
|
-
# is given.
|
90
|
-
def fmt
|
91
|
-
@fmt ||= summarizer.most_common_data_format
|
92
|
-
end
|
93
|
-
|
94
|
-
# The archive this bundler will build for uploading to
|
95
|
-
# Infochimps.
|
96
|
-
#
|
97
|
-
# @return [IMW::Resource]
|
98
|
-
def archive
|
99
|
-
return @archive if @archive
|
100
|
-
self.archive = default_archive_path
|
101
|
-
self.archive
|
102
|
-
end
|
103
|
-
|
104
|
-
# Set the path to the archive that will be built.
|
105
|
-
#
|
106
|
-
# The given +path+ must represent a compressed file or archive
|
107
|
-
# (<tt>.tar</tt>, <tt>.tar.gz.</tt>, <tt>.tar.bz2</tt>,
|
108
|
-
# <tt>.zip</tt>, <tt>.rar</tt>, <tt>.bz2</tt>, or <tt>.gz</tt>
|
109
|
-
# extension).
|
110
|
-
#
|
111
|
-
# Additionally, if multiple local paths are being packaged, the
|
112
|
-
# given +path+ must be an archive (not simply <tt>.bz2</tt> or
|
113
|
-
# <tt>.gz</tt> extensions).
|
114
|
-
#
|
115
|
-
# @param [String, IMW::Resource] path_or_obj the obj or IMW::Resource object pointing to the archive to use
|
116
|
-
def archive= path_or_obj
|
117
|
-
potential_package = IMW.open(path_or_obj)
|
118
|
-
raise PackagingError.new("Invalid path #{potential_package}, not an archive or compressed file") unless potential_package.is_compressed? || potential_package.is_archive?
|
119
|
-
raise PackagingError.new("Multiple local paths must be packaged in an archive, not a compressed file.") if resources.size > 1 && !potential_package.is_archive?
|
120
|
-
@archive = potential_package
|
121
|
-
end
|
122
|
-
|
123
|
-
# Return the package format of this bundler's archive, i.e. -
|
124
|
-
# its extension.
|
125
|
-
#
|
126
|
-
# @return [String]
|
127
|
-
def pkg_fmt
|
128
|
-
archive.extension
|
129
|
-
end
|
130
|
-
|
131
|
-
# Return the total size of the package after aggregating and
|
132
|
-
# packaging.
|
133
|
-
#
|
134
|
-
# @return [Integer]
|
135
|
-
def size
|
136
|
-
archive.size
|
137
|
-
end
|
138
|
-
|
139
|
-
# Return summary information about the package prepared by the
|
140
|
-
# bundler.
|
141
|
-
#
|
142
|
-
# @return [Hash]
|
143
|
-
def summary
|
144
|
-
summarizer.summary
|
145
|
-
end
|
146
|
-
|
147
|
-
# Bundle the data for this bundler together.
|
148
|
-
def bundle!
|
149
|
-
return if skip_packaging?
|
150
|
-
result = archiver.package(archive.path)
|
151
|
-
raise PackagingError.new("Unable to package files for upload. Temporary files left in #{archiver.tmp_dir}") if result.is_a?(StandardError) || (!archiver.success?)
|
152
|
-
archiver.clean!
|
153
|
-
end
|
154
|
-
|
155
|
-
#
|
156
|
-
# == Helper Objects ==
|
157
|
-
#
|
158
|
-
|
159
|
-
# The IMW::Tools::Archiver responsible for packaging files
|
160
|
-
# into a local archive.
|
161
|
-
#
|
162
|
-
# @return [IMW::Tools::Archiver]
|
163
|
-
def archiver
|
164
|
-
@archiver ||= IMW::Tools::Archiver.new(archive.name, paths_to_bundle)
|
165
|
-
end
|
166
|
-
|
167
|
-
# Return the summarizer responsible for summarizing data on this
|
168
|
-
# upload.
|
169
|
-
#
|
170
|
-
# @return [IMW::Tools::Summarizer]
|
171
|
-
def summarizer
|
172
|
-
@summarizer ||= IMW::Tools::Summarizer.new(resources)
|
173
|
-
end
|
174
|
-
|
175
|
-
# Should the packaging step be skipped?
|
176
|
-
#
|
177
|
-
# This will happen if only one local input path was provided and
|
178
|
-
# it exists and is a compressed file or archive.
|
179
|
-
#
|
180
|
-
# @return [true, false]
|
181
|
-
def skip_packaging?
|
182
|
-
!! @skip_packaging
|
183
|
-
end
|
184
|
-
|
185
|
-
#
|
186
|
-
# == Paths & URLs ==
|
187
|
-
#
|
188
|
-
|
189
|
-
# The default path to the archive that will be built.
|
190
|
-
#
|
191
|
-
# Defaults to a file in the current directory named after the
|
192
|
-
# +dataset+'s ID or handle and the current time. The package
|
193
|
-
# format (<tt>.zip</tt> or <tt>.tar.bz2</tt>) is determined by
|
194
|
-
# size, see
|
195
|
-
# Chimps::Workflows::Uploader#default_archive_extension.
|
196
|
-
#
|
197
|
-
# @return [String]
|
198
|
-
def default_archive_path
|
199
|
-
# in current working directory...
|
200
|
-
"chimps_#{dataset}-#{Time.now.strftime(Chimps::Config[:timestamp_format])}.#{default_archive_extension}"
|
201
|
-
end
|
202
|
-
|
203
|
-
# end <tt>zip</tt> if the data is less than 500 MB in size and
|
204
|
-
# <tt>tar.bz2</tt> otherwise.
|
205
|
-
#
|
206
|
-
# @return ['tar.bz2', 'zip']
|
207
|
-
def default_archive_extension
|
208
|
-
summarizer.total_size >= 524288000 ? 'tar.bz2' : 'zip'
|
209
|
-
end
|
210
|
-
|
211
|
-
# The URL to the <tt>README-infochimps</tt> file on Infochimps'
|
212
|
-
# servers.
|
213
|
-
#
|
214
|
-
# @return [String]
|
215
|
-
def readme_url
|
216
|
-
File.join(Chimps::Config[:site][:host], "/README-infochimps")
|
217
|
-
end
|
218
|
-
|
219
|
-
# The URL to the ICSS file for this dataset on Infochimps
|
220
|
-
# servers
|
221
|
-
def icss_url
|
222
|
-
File.join(Chimps::Config[:site][:host], "datasets", "#{dataset}.yaml")
|
223
|
-
end
|
224
|
-
|
225
|
-
# Both the local paths and remote paths to package.
|
226
|
-
#
|
227
|
-
# @return [Array<String>]
|
228
|
-
def paths_to_bundle
|
229
|
-
paths + [readme_url, icss_url]
|
230
|
-
end
|
231
|
-
|
232
|
-
protected
|
233
|
-
# Require IMW and match the IMW logger to the Chimps logger.
|
234
|
-
def require_imw
|
235
|
-
begin
|
236
|
-
require 'imw'
|
237
|
-
IMW.log = Chimps.log
|
238
|
-
IMW.verbose = Chimps.verbose?
|
239
|
-
rescue LoadError
|
240
|
-
raise Chimps::Error.new("The Infinite Monkeywrench (IMW) gem is required to upload.")
|
241
|
-
end
|
242
|
-
end
|
243
|
-
|
244
|
-
end
|
245
|
-
|
246
|
-
end
|
247
|
-
end
|
248
|
-
end
|
249
|
-
|
@@ -1,59 +0,0 @@
|
|
1
|
-
module Chimps
|
2
|
-
module Workflows
|
3
|
-
module Upload
|
4
|
-
|
5
|
-
# Encapsulates the process of notifying Infochimps of new data
|
6
|
-
# that's already been uploaded.
|
7
|
-
class Notifier
|
8
|
-
|
9
|
-
# The response from Infochimps to the request to create a
|
10
|
-
# package.
|
11
|
-
attr_accessor :response
|
12
|
-
|
13
|
-
# The upload token used for the upload.
|
14
|
-
attr_accessor :token
|
15
|
-
|
16
|
-
# The bundler responsible for the upload.
|
17
|
-
attr_accessor :bundler
|
18
|
-
|
19
|
-
def initialize token, bundler
|
20
|
-
self.token = token
|
21
|
-
self.bundler = bundler
|
22
|
-
end
|
23
|
-
|
24
|
-
# The path on Infochimps to submit package creation requests
|
25
|
-
# to.
|
26
|
-
#
|
27
|
-
# @return [String]
|
28
|
-
def path
|
29
|
-
"/datasets/#{bundler.dataset}/packages.json"
|
30
|
-
end
|
31
|
-
|
32
|
-
# Information about the uplaoded data to pass to Infochimps
|
33
|
-
# when notifying.
|
34
|
-
#
|
35
|
-
# @return [Hash]
|
36
|
-
def data
|
37
|
-
{ :package => {:fmt => token['fmt'], :pkg_size => bundler.size, :pkg_fmt => bundler.pkg_fmt, :raw_summary => bundler.summary, :token_timestamp => token['timestamp'] } }
|
38
|
-
end
|
39
|
-
|
40
|
-
# Make a request to notify Infochimps of the new data.
|
41
|
-
#
|
42
|
-
# @return [Chimps::Response]
|
43
|
-
def post
|
44
|
-
@response = Request.new(path, :signed => true, :data => data).post
|
45
|
-
if response.error?
|
46
|
-
response.print
|
47
|
-
raise UploadError.new("Unable to notify Infochimps of newly uploaded data.")
|
48
|
-
end
|
49
|
-
response
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
@@ -1,77 +0,0 @@
|
|
1
|
-
module Chimps
|
2
|
-
module Workflows
|
3
|
-
module Upload
|
4
|
-
|
5
|
-
# Encapsulates the process of obtaining an upload token for a
|
6
|
-
# dataset from Infochimps.
|
7
|
-
class UploadToken
|
8
|
-
|
9
|
-
# The ID or slug of the dataset for which to obtain an upload
|
10
|
-
# token.
|
11
|
-
attr_accessor :dataset
|
12
|
-
|
13
|
-
# The format (csv, xls, tsv, &c.) of the data in the upload.
|
14
|
-
attr_accessor :fmt
|
15
|
-
|
16
|
-
# The package format (zip, tar.bz2, &c.) of the data in the
|
17
|
-
# upload.
|
18
|
-
attr_accessor :pkg_fmt
|
19
|
-
|
20
|
-
# The response from Infochimps to the request for an upload
|
21
|
-
# token.
|
22
|
-
attr_accessor :response
|
23
|
-
|
24
|
-
# Instantiate a new UploadToken for the given +dataset+ with
|
25
|
-
# the given +fmt+ and +pkg_fmt+.
|
26
|
-
#
|
27
|
-
# @param [String,Integer] dataset the ID or slug of the dataset to upload data for
|
28
|
-
# @param [String] fmt the data format (csv, xls, tsv, &c.) of the data
|
29
|
-
# @param [String] pkg_fmt the package format (zip, tar.bz2, tar.gz, &c.) of the data
|
30
|
-
def initialize dataset, options={}
|
31
|
-
@dataset = dataset
|
32
|
-
@fmt = options[:fmt]
|
33
|
-
@pkg_fmt = options[:pkg_fmt]
|
34
|
-
end
|
35
|
-
|
36
|
-
# Delegate slicing to the returned response.
|
37
|
-
def [] param
|
38
|
-
response && response[param]
|
39
|
-
end
|
40
|
-
|
41
|
-
# The path on Infochimps to submit upload token requests to.
|
42
|
-
#
|
43
|
-
# @return [String]
|
44
|
-
def path
|
45
|
-
"/datasets/#{dataset}/packages/new.json"
|
46
|
-
end
|
47
|
-
|
48
|
-
# Parameters passed to Infochimps to request an upload token.
|
49
|
-
#
|
50
|
-
# @return [Hash]
|
51
|
-
def params
|
52
|
-
{ :package => { :fmt => fmt, :pkg_fmt => pkg_fmt } }
|
53
|
-
end
|
54
|
-
|
55
|
-
# Make the request to get an upload token from Infochimps
|
56
|
-
def get
|
57
|
-
@response = Request.new(path, :params => params, :signed => true).get
|
58
|
-
if response.error?
|
59
|
-
response.print
|
60
|
-
raise AuthenticationError.new("Unauthorized for an upload token for dataset #{dataset}")
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
# Parses the 'url' property of the response from Infochimps to
|
65
|
-
# determine the bucket name.
|
66
|
-
#
|
67
|
-
# @return [String]
|
68
|
-
def bucket
|
69
|
-
File.basename(response['url'])
|
70
|
-
end
|
71
|
-
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
|
@@ -1,51 +0,0 @@
|
|
1
|
-
module Chimps
|
2
|
-
module Workflows
|
3
|
-
module Upload
|
4
|
-
|
5
|
-
# Encapsulates the process of uploading a package to Infochimps.
|
6
|
-
class Uploader
|
7
|
-
|
8
|
-
include Chimps::Utils::UsesCurl
|
9
|
-
|
10
|
-
# The token consumed when uploading.
|
11
|
-
attr_accessor :token
|
12
|
-
|
13
|
-
# The bundler from which to glean information about the upload.
|
14
|
-
attr_accessor :bundler
|
15
|
-
|
16
|
-
# Instantiate a new Uploader which will consume the given
|
17
|
-
# +token+ and upload data from the given +bundler+.
|
18
|
-
#
|
19
|
-
# @param [Chimps::Workflows::Upload::UploadToken] token
|
20
|
-
# @param [Chimps::Workflows::Upload::Bundler] bundler
|
21
|
-
def initialize token, bundler
|
22
|
-
self.token = token
|
23
|
-
self.bundler = bundler
|
24
|
-
end
|
25
|
-
|
26
|
-
# Return a string built from the granted upload token that can
|
27
|
-
# be fed to +curl+ in order to authenticate with and upload to
|
28
|
-
# Amazon.
|
29
|
-
#
|
30
|
-
# @return [String]
|
31
|
-
def upload_data
|
32
|
-
data = ['AWSAccessKeyId', 'acl', 'key', 'policy', 'success_action_status', 'signature'].map { |param| "-F #{param}='#{token[param]}'" }
|
33
|
-
data << ["-F file=@#{bundler.archive.path}"]
|
34
|
-
data.join(' ')
|
35
|
-
end
|
36
|
-
|
37
|
-
# Upload the data.
|
38
|
-
#
|
39
|
-
# Uses +curl+ for the transfer.
|
40
|
-
def upload!
|
41
|
-
progress_meter = Chimps.verbose? ? '' : '-s -S'
|
42
|
-
command = "#{curl} #{progress_meter} -o /dev/null -X POST #{upload_data} #{token['url']}"
|
43
|
-
puts command if Chimps.verbose?
|
44
|
-
raise UploadError.new("Failed to upload #{bundler.archive.path} to Infochimps") unless system(command)
|
45
|
-
end
|
46
|
-
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
data/lib/chimps/workflows.rb
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
module Chimps
|
2
|
-
|
3
|
-
# A module defining classes to handle complex workflows between the
|
4
|
-
# local machine and Infochimps' servers.
|
5
|
-
module Workflows
|
6
|
-
autoload :Upload, 'chimps/workflows/up'
|
7
|
-
autoload :Up, 'chimps/workflows/up'
|
8
|
-
autoload :Downloader, 'chimps/workflows/downloader'
|
9
|
-
autoload :BatchUpdater, 'chimps/workflows/batch'
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|