chimps 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/LICENSE +674 -0
- data/README.rdoc +48 -0
- data/VERSION +1 -0
- data/bin/chimps +4 -0
- data/examples/batch.yaml +69 -0
- data/lib/chimps/cli.rb +102 -0
- data/lib/chimps/commands/base.rb +107 -0
- data/lib/chimps/commands/batch.rb +68 -0
- data/lib/chimps/commands/create.rb +33 -0
- data/lib/chimps/commands/destroy.rb +28 -0
- data/lib/chimps/commands/download.rb +76 -0
- data/lib/chimps/commands/help.rb +89 -0
- data/lib/chimps/commands/list.rb +54 -0
- data/lib/chimps/commands/query.rb +59 -0
- data/lib/chimps/commands/search.rb +59 -0
- data/lib/chimps/commands/show.rb +32 -0
- data/lib/chimps/commands/test.rb +40 -0
- data/lib/chimps/commands/update.rb +33 -0
- data/lib/chimps/commands/upload.rb +63 -0
- data/lib/chimps/commands.rb +46 -0
- data/lib/chimps/config.rb +57 -0
- data/lib/chimps/request.rb +302 -0
- data/lib/chimps/response.rb +146 -0
- data/lib/chimps/typewriter.rb +326 -0
- data/lib/chimps/utils/error.rb +40 -0
- data/lib/chimps/utils/extensions.rb +109 -0
- data/lib/chimps/utils/uses_curl.rb +26 -0
- data/lib/chimps/utils/uses_model.rb +51 -0
- data/lib/chimps/utils/uses_yaml_data.rb +94 -0
- data/lib/chimps/utils.rb +11 -0
- data/lib/chimps/workflows/batch.rb +127 -0
- data/lib/chimps/workflows/downloader.rb +102 -0
- data/lib/chimps/workflows/uploader.rb +238 -0
- data/lib/chimps/workflows.rb +11 -0
- data/lib/chimps.rb +22 -0
- data/spec/chimps/cli_spec.rb +22 -0
- data/spec/chimps/commands/base_spec.rb +25 -0
- data/spec/chimps/commands/list_spec.rb +25 -0
- data/spec/chimps/response_spec.rb +8 -0
- data/spec/chimps/typewriter_spec.rb +114 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/support/custom_matchers.rb +6 -0
- metadata +133 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
module Chimps
|
2
|
+
module Utils
|
3
|
+
module UsesYamlData
|
4
|
+
|
5
|
+
IGNORE_YAML_FILES_ON_COMMAND_LINE = false
|
6
|
+
|
7
|
+
attr_reader :data_file
|
8
|
+
|
9
|
+
def data
|
10
|
+
@data ||= merge_all *(data_from_stdin + data_from_file + data_from_command_line)
|
11
|
+
end
|
12
|
+
|
13
|
+
protected
|
14
|
+
|
15
|
+
def merge_all *objs
|
16
|
+
objs.compact!
|
17
|
+
return if objs.blank? # raising an error here is left to the caller
|
18
|
+
klasses = objs.map(&:class).uniq
|
19
|
+
raise CLIError.new("Mismatched YAML data types -- Hashes can only be combined with Hashes, Arrays with Arrays") if klasses.size > 1
|
20
|
+
data_type = klasses.first.new
|
21
|
+
case data_type
|
22
|
+
when Array
|
23
|
+
# greater precedence at the end so iterate in order
|
24
|
+
returning([]) do |d|
|
25
|
+
objs.each do |obj|
|
26
|
+
d.concat(obj)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
when Hash
|
30
|
+
# greater precedence at the end so iterate in order
|
31
|
+
returning({}) do |d|
|
32
|
+
objs.each do |obj|
|
33
|
+
d.merge!(obj)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
else raise CLIError.new("Unsuitable YAML data type #{data_type} -- can only combine Hashes and Arrays")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def params_from_command_line
|
41
|
+
returning([]) do |d|
|
42
|
+
argv.each do |arg|
|
43
|
+
next unless arg =~ /^(\w+) *=(.*)$/
|
44
|
+
name, value = $1.downcase.to_sym, $2.strip
|
45
|
+
d << { name => value } # always a hash
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def yaml_files_from_command_line
|
51
|
+
returning([]) do |d|
|
52
|
+
argv.each do |arg|
|
53
|
+
next if arg =~ /^(\w+) *=(.*)$/
|
54
|
+
path = File.expand_path(arg)
|
55
|
+
raise CLIError.new("No such path #{path}") unless File.exist?(path)
|
56
|
+
d << YAML.load(open(path)) # either a hash or an array
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def data_from_command_line
|
62
|
+
if self.class::IGNORE_YAML_FILES_ON_COMMAND_LINE
|
63
|
+
params_from_command_line
|
64
|
+
else
|
65
|
+
yaml_files_from_command_line + params_from_command_line
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def data_from_file
|
70
|
+
[data_file ? YAML.load_file(data_file) : nil]
|
71
|
+
end
|
72
|
+
|
73
|
+
def data_from_stdin
|
74
|
+
return [nil] unless $stdin.stat.size > 0
|
75
|
+
returning([]) do |d|
|
76
|
+
YAML.load_stream($stdin).each do |document|
|
77
|
+
d << document
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def define_data_options
|
83
|
+
on_tail("-d", "--data-file PATH", "Path to a file containing key=value data") do |p|
|
84
|
+
@data_file = File.expand_path(p)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def ensure_data_is_present!
|
89
|
+
raise CLIError.new("Must provide some data to send, either on the command line, from an input file, or by piping to STDIN. Try `chimps help #{name}'") unless data.present?
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
data/lib/chimps/utils.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'chimps/config'
|
2
|
+
require 'chimps/utils/extensions'
|
3
|
+
require 'chimps/utils/error'
|
4
|
+
|
5
|
+
module Chimps
|
6
|
+
module Utils
|
7
|
+
autoload :UsesCurl, 'chimps/utils/uses_curl'
|
8
|
+
autoload :UsesModel, 'chimps/utils/uses_model'
|
9
|
+
autoload :UsesYamlData, 'chimps/utils/uses_yaml_data'
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
module Chimps
|
2
|
+
module Workflows
|
3
|
+
|
4
|
+
# A class for performing batch updates/uploads to Infochimps.
|
5
|
+
#
|
6
|
+
# It works by taking YAML data describing many updates and
|
7
|
+
# performing a single batch API request with this data.
|
8
|
+
#
|
9
|
+
# The batch response is then parsed and analyzed and (given
|
10
|
+
# success or fearlessness) any necessary uploads are performed.
|
11
|
+
#
|
12
|
+
# Examples of the input data format can be found in the
|
13
|
+
# <tt>/examples</tt> directory of the Chimps distribution.
|
14
|
+
class BatchUpdater
|
15
|
+
|
16
|
+
# The data used sent as a bulk update.
|
17
|
+
attr_reader :data
|
18
|
+
|
19
|
+
# The batch update response
|
20
|
+
attr_reader :batch_response
|
21
|
+
|
22
|
+
# The output file to store the bulk update response.
|
23
|
+
attr_reader :output_path
|
24
|
+
|
25
|
+
# Whether to upload even if there were errors on update.
|
26
|
+
attr_reader :upload_even_if_errors
|
27
|
+
|
28
|
+
# The data format to annotate the upload with.
|
29
|
+
#
|
30
|
+
# Chimps will try to guess if this isn't given.
|
31
|
+
attr_reader :fmt
|
32
|
+
|
33
|
+
# Create a new BatchUpdater with the given +data+ and +options+.
|
34
|
+
#
|
35
|
+
# The intermediate batch response can be saved at a file named
|
36
|
+
# by <tt>:output_path</tt>, though this isn't necessary.
|
37
|
+
#
|
38
|
+
# @param [Array] data an array of resource updates
|
39
|
+
# @param [Hash] options
|
40
|
+
# @option options [String] output_path path to store the batch response
|
41
|
+
# @option options [true, false] upload_even_if_errors whether to continue uploading in the presence of errors on update
|
42
|
+
# @option options [String] fmt the data format to annotate each upload with (see `chimps upload')
|
43
|
+
# @return [Chimps::Workflows::BatchUpdater]
|
44
|
+
def initialize data, options={}
|
45
|
+
@data = data
|
46
|
+
@output_path = options[:output_path]
|
47
|
+
@upload_even_if_errors = options[:upload_even_if_errors]
|
48
|
+
@fmt = options[:fmt]
|
49
|
+
end
|
50
|
+
|
51
|
+
# The path to submit batch update requests.
|
52
|
+
#
|
53
|
+
# @return [String]
|
54
|
+
def batch_path
|
55
|
+
"batch.json"
|
56
|
+
end
|
57
|
+
|
58
|
+
# Perform this batch update followed by the batch upload.
|
59
|
+
def execute!
|
60
|
+
batch_update!
|
61
|
+
batch_upload!
|
62
|
+
end
|
63
|
+
|
64
|
+
# Perform the batch update.
|
65
|
+
def batch_update!
|
66
|
+
@batch_response = Request.new(batch_path, :data => { :batch => data }, :authenticate => true).post
|
67
|
+
File.open(output_path, 'w') { |f| f.puts batch_response.body } if output_path
|
68
|
+
batch_response.print
|
69
|
+
end
|
70
|
+
|
71
|
+
# Were any of the updates performed during the batch update
|
72
|
+
# errors?
|
73
|
+
#
|
74
|
+
# @return [true, false]
|
75
|
+
def error?
|
76
|
+
batch_response['batch'].each do |response|
|
77
|
+
status = response['status']
|
78
|
+
return true unless ['created', 'updated'].include?(status)
|
79
|
+
end
|
80
|
+
false
|
81
|
+
end
|
82
|
+
|
83
|
+
# Did all of the updates performed in the batch update succeed?
|
84
|
+
#
|
85
|
+
# @return [true, false]
|
86
|
+
def success?
|
87
|
+
! error?
|
88
|
+
end
|
89
|
+
|
90
|
+
# Perform the batch upload.
|
91
|
+
#
|
92
|
+
# Will bail if the batch update had an error unless
|
93
|
+
# Chimps::Workflows::BatchUpdater#upload_even_if_errors returns
|
94
|
+
# true.
|
95
|
+
def batch_upload!
|
96
|
+
return unless success? || upload_even_if_errors
|
97
|
+
$stderr.puts("WARNING: continuing with uploads even though there were errors") unless success?
|
98
|
+
dataset_ids_and_local_paths.each do |id, local_paths|
|
99
|
+
Chimps::Workflows::Uploader.new(:dataset => id, :local_paths => local_paths, :fmt => fmt).execute!
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
protected
|
104
|
+
# Iterate through the batch response and return tuples
|
105
|
+
# consisting of an ID and an array of of local paths to upload.
|
106
|
+
#
|
107
|
+
# Only datasets which were successfully created/updated,
|
108
|
+
# returned an ID, and had local_paths defined in the original
|
109
|
+
# batch update will be output.
|
110
|
+
#
|
111
|
+
# @return [Array<Array>]
|
112
|
+
def dataset_ids_and_local_paths
|
113
|
+
batch_response['batch'].map do |response|
|
114
|
+
status = response['status']
|
115
|
+
next unless (status == 'created' || status == 'updated') # skip errors
|
116
|
+
next unless dataset = response['resource']['dataset'] # skip unless it's a dataset
|
117
|
+
id = dataset['id']
|
118
|
+
next if id.blank? # skip unless it has an ID
|
119
|
+
local_paths = response['local_paths']
|
120
|
+
next if local_paths.blank? # skip unless local_paths were defined
|
121
|
+
[id, local_paths]
|
122
|
+
end.compact
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
@@ -0,0 +1,102 @@
|
|
1
|
+
module Chimps
|
2
|
+
module Workflows
|
3
|
+
|
4
|
+
# Downloads data from Infochimps by first making a request for a
|
5
|
+
# download token and, if granted one, proceeding to download the
|
6
|
+
# data.
|
7
|
+
#
|
8
|
+
# Will download the latest package for a given dataset, optionally
|
9
|
+
# constrained to have given data and package formats.
|
10
|
+
class Downloader
|
11
|
+
|
12
|
+
include Chimps::Utils::UsesCurl
|
13
|
+
|
14
|
+
# The token received from Infochimps which contains a signed URL
|
15
|
+
# for the download.
|
16
|
+
attr_reader :token
|
17
|
+
|
18
|
+
# The ID or handle of the dataset to download.
|
19
|
+
attr_reader :dataset
|
20
|
+
|
21
|
+
# The data format of the data to download.
|
22
|
+
attr_reader :fmt
|
23
|
+
|
24
|
+
# The package format of the data to download.
|
25
|
+
attr_reader :pkg_fmt
|
26
|
+
|
27
|
+
# Create a new Downloader with the given parameters.
|
28
|
+
#
|
29
|
+
# @param [Hash] options
|
30
|
+
# @option options [String, Integer] dataset the ID or handle of the dataset to download
|
31
|
+
# @option options [String] fmt the data format to download
|
32
|
+
# @option options [String] pkg_fmt the package format to download
|
33
|
+
# @option options [String] local_path the local path to which the data will be downloaded
|
34
|
+
# @return [Chimps::Workflows::Downloader]
|
35
|
+
def initialize options={}
|
36
|
+
@dataset = options[:dataset]
|
37
|
+
@fmt = options[:fmt]
|
38
|
+
@pkg_fmt = options[:pkg_fmt]
|
39
|
+
@local_path = options[:local_path]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Params to send for the token.
|
43
|
+
#
|
44
|
+
# @return [Hash]
|
45
|
+
def token_params
|
46
|
+
{ :download_token => { :dataset_id => dataset, :fmt => fmt, :pkg_fmt => pkg_fmt} }
|
47
|
+
end
|
48
|
+
|
49
|
+
# Ask for a download token for this dataset/package. If no or
|
50
|
+
# an invalid token is obtained, raise an error.
|
51
|
+
def ask_for_token!
|
52
|
+
new_token = Request.new(download_tokens_path, :data => token_params, :sign_if_possible => true).post
|
53
|
+
if new_token.error?
|
54
|
+
new_token.print
|
55
|
+
raise AuthenticationError.new("Unauthorized to download dataset #{dataset}")
|
56
|
+
else
|
57
|
+
@token = new_token
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Path to submit download token requests to.
|
62
|
+
#
|
63
|
+
# @return [String]
|
64
|
+
def download_tokens_path
|
65
|
+
"/download_tokens"
|
66
|
+
end
|
67
|
+
|
68
|
+
# The signed, remote URL from where the data can be downloaded.
|
69
|
+
#
|
70
|
+
# @return [String]
|
71
|
+
def download_url
|
72
|
+
token['download_token']['package']['url']
|
73
|
+
end
|
74
|
+
|
75
|
+
# The local path where the downloaded data will be put.
|
76
|
+
#
|
77
|
+
# Defaults to the current directory and the default basename of
|
78
|
+
# the downloaded package.
|
79
|
+
#
|
80
|
+
# @return [String, nil]
|
81
|
+
def local_path
|
82
|
+
@local_path || token["download_token"]["package"]["basename"]
|
83
|
+
end
|
84
|
+
|
85
|
+
# Issue the download request.
|
86
|
+
#
|
87
|
+
# Uses +curl+ for the data transfer.
|
88
|
+
def download!
|
89
|
+
command = "#{curl} -o '#{local_path}' '#{download_url}'"
|
90
|
+
puts command if Chimps.verbose?
|
91
|
+
system(command)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Ask for a token and perform the download.
|
95
|
+
def execute!
|
96
|
+
ask_for_token!
|
97
|
+
download!
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,238 @@
|
|
1
|
+
module Chimps
|
2
|
+
module Workflows
|
3
|
+
|
4
|
+
# Uploads data to Infochimps by first asking for authorization,
|
5
|
+
# creating an archive, obtaining a token, uploading data, and
|
6
|
+
# notifing Infochimps.
|
7
|
+
class Uploader
|
8
|
+
|
9
|
+
include Chimps::Utils::UsesCurl
|
10
|
+
|
11
|
+
# The ID or handle of the dataset to download.
|
12
|
+
attr_reader :dataset
|
13
|
+
|
14
|
+
# An array of paths to local files and directories to package
|
15
|
+
# into an archive.
|
16
|
+
attr_reader :local_paths
|
17
|
+
|
18
|
+
# The format to annotate the upload with.
|
19
|
+
attr_reader :fmt
|
20
|
+
|
21
|
+
# The archive to upload.
|
22
|
+
attr_reader :archive
|
23
|
+
|
24
|
+
# The token authoring an upload.
|
25
|
+
attr_reader :token
|
26
|
+
|
27
|
+
# Upload data to Infochimps by first asking for authorization,
|
28
|
+
# creating an archive, obtaining a token, uploading data, and
|
29
|
+
# notifing Infochimps.
|
30
|
+
def execute!
|
31
|
+
authorize_for_upload!
|
32
|
+
create_archive!
|
33
|
+
ask_for_token!
|
34
|
+
upload!
|
35
|
+
notify_infochimps!
|
36
|
+
end
|
37
|
+
|
38
|
+
# Create a new Uploader from the given parameters.
|
39
|
+
#
|
40
|
+
# If <tt>:fmt</tt> is provided it will be used as the data
|
41
|
+
# format to annotate the upload with. If not, Chimps will try
|
42
|
+
# to guess.
|
43
|
+
#
|
44
|
+
# @param [Hash] options
|
45
|
+
# @option options [String, Integer] dataset the ID or handle of the dataset to which data should be uploaded
|
46
|
+
# @option options [Array<String>] local_paths the local paths to bundle into an archive
|
47
|
+
# @option options [String, IMW::Resource] archive the path to the archive to create (defaults to IMW::Workflows::Downloader#default_archive_path)
|
48
|
+
# @option options [String] fmt the data format to annotate the upload with
|
49
|
+
def initialize options={}
|
50
|
+
require 'imw'
|
51
|
+
IMW.verbose = Chimps.verbose?
|
52
|
+
@dataset = options[:dataset] or raise PackagingError.new("Must provide the ID or handle of a dataset to upload data to.")
|
53
|
+
self.local_paths = options[:local_paths] # must come before self.archive=
|
54
|
+
self.archive = options[:archive]
|
55
|
+
self.fmt = options[:fmt]
|
56
|
+
end
|
57
|
+
|
58
|
+
# Set the local paths to upload for this dataset.
|
59
|
+
#
|
60
|
+
# If only one local path is given and it is already an archive
|
61
|
+
# or a compressed file then no further packaging will be done by
|
62
|
+
# this uploader.
|
63
|
+
#
|
64
|
+
# @param [Array<String, IMW::Resource>] paths
|
65
|
+
def local_paths= paths
|
66
|
+
raise PackagingError.new("Must provide at least one local path to upload.") if paths.blank?
|
67
|
+
paths.each { |path| raise PackagingError.new("Invalid path, #{path}") unless File.exist?(File.expand_path(path)) }
|
68
|
+
@local_paths = paths
|
69
|
+
if @local_paths.size == 1
|
70
|
+
potential_package = IMW.open(paths.first)
|
71
|
+
if potential_package.exist? && (potential_package.is_compressed? || potential_package.is_archive?)
|
72
|
+
self.archive = potential_package
|
73
|
+
@skip_packaging = true
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Should the packaging step be skipped?
|
79
|
+
#
|
80
|
+
# This will happen if only one local input path was provided and
|
81
|
+
# it exists and is a compressed file or archive.
|
82
|
+
#
|
83
|
+
# @return [true, false]
|
84
|
+
def skip_packaging?
|
85
|
+
!! @skip_packaging
|
86
|
+
end
|
87
|
+
|
88
|
+
# Set the path to the archive that will be built.
|
89
|
+
#
|
90
|
+
# The given +path+ must represent a compressed file or archive
|
91
|
+
# (<tt>.tar</tt>, <tt>.tar.gz.</tt>, <tt>.tar.bz2</tt>,
|
92
|
+
# <tt>.zip</tt>, <tt>.rar</tt>, <tt>.bz2</tt>, or <tt>.gz</tt>
|
93
|
+
# extension).
|
94
|
+
#
|
95
|
+
# Additionally, if multiple local paths are being packaged, the
|
96
|
+
# given +path+ must be an archive (not simply <tt>.bz2</tt> or
|
97
|
+
# <tt>.gz</tt> extensions).
|
98
|
+
#
|
99
|
+
# @param [String, IMW::Resource] path the archive or path to use
|
100
|
+
def archive= path=nil
|
101
|
+
return @archive if @archive
|
102
|
+
potential_package = IMW.open(path || default_archive_path)
|
103
|
+
raise PackagingError.new("Invalid path #{potential_package}, not an archive or compressed file") unless potential_package.is_compressed? || potential_package.is_archive?
|
104
|
+
raise PackagingError.new("Multiple local paths must be packaged in an archive, not a compressed file.") if local_paths.size > 1 && !potential_package.is_archive?
|
105
|
+
@archive = potential_package
|
106
|
+
end
|
107
|
+
|
108
|
+
# Set the data format to annotate the upload with.
|
109
|
+
#
|
110
|
+
# If not provided, Chimps will use the Infinite Monkeywrench
|
111
|
+
# (IMW) to try and guess the data format. See
|
112
|
+
# IMW::Tools::Summarizer for more information.
|
113
|
+
def fmt= new_fmt=nil
|
114
|
+
@fmt ||= new_fmt || IMW::Tools::Summarizer.new(local_paths).most_common_data_format
|
115
|
+
end
|
116
|
+
|
117
|
+
# The default path to the archive that will be built.
|
118
|
+
#
|
119
|
+
# Defaults to a ZIP file in the current directory named after
|
120
|
+
# the +dataset+'s ID or handle and the current time.
|
121
|
+
#
|
122
|
+
# @return [String]
|
123
|
+
def default_archive_path
|
124
|
+
# in current working directory...
|
125
|
+
"chimps_#{dataset}-#{Time.now.strftime(Chimps::CONFIG[:timestamp_format])}.zip"
|
126
|
+
end
|
127
|
+
|
128
|
+
# The URL to the <tt>README-infochimps</tt> file on Infochimps'
|
129
|
+
# servers.
|
130
|
+
#
|
131
|
+
# @return [String]
|
132
|
+
def readme_url
|
133
|
+
File.join(Chimps::CONFIG[:host], "/README-infochimps")
|
134
|
+
end
|
135
|
+
|
136
|
+
# The URL to the ICSS file for this dataset on Infochimps
|
137
|
+
# servers
|
138
|
+
def icss_url
|
139
|
+
File.join(Chimps::CONFIG[:host], "datasets", "#{dataset}.yaml")
|
140
|
+
end
|
141
|
+
|
142
|
+
# Both the local paths and remote paths to package.
|
143
|
+
#
|
144
|
+
# @return [Array<String>]
|
145
|
+
def input_paths
|
146
|
+
raise PackaginError.new("Must specify some local paths to package") if local_paths.blank?
|
147
|
+
local_paths + [readme_url, icss_url]
|
148
|
+
end
|
149
|
+
|
150
|
+
# The path on Infochimps to submit upload token requests to.
|
151
|
+
#
|
152
|
+
# @return [String]
|
153
|
+
def token_path
|
154
|
+
"/datasets/#{dataset}/packages/new.json"
|
155
|
+
end
|
156
|
+
|
157
|
+
# The path on Infochimps to submit package creation requests to.
|
158
|
+
#
|
159
|
+
# @return [String]
|
160
|
+
def package_creation_path
|
161
|
+
"/datasets/#{dataset}/packages.json"
|
162
|
+
end
|
163
|
+
|
164
|
+
# Return a hash of params for obtaining a new upload token.
|
165
|
+
#
|
166
|
+
# @return [Hash]
|
167
|
+
def package_params
|
168
|
+
{ :package => { :fmt => fmt, :pkg_fmt => archive.extension } }
|
169
|
+
end
|
170
|
+
|
171
|
+
# Authorize the Chimps user for this upload.
|
172
|
+
def authorize_for_upload!
|
173
|
+
# FIXME we're actually just making a token request here...
|
174
|
+
ask_for_token!
|
175
|
+
end
|
176
|
+
|
177
|
+
# Obtain an upload token from Infochimps.
|
178
|
+
def ask_for_token!
|
179
|
+
new_token = Request.new(token_path, :params => package_params, :signed => true).get
|
180
|
+
if new_token.error?
|
181
|
+
new_token.print
|
182
|
+
raise AuthenticationError.new("Unauthorized for an upload token for dataset #{dataset}")
|
183
|
+
else
|
184
|
+
@token = new_token
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# Build the local archive if necessary.
|
189
|
+
#
|
190
|
+
# Will not build the local archive if there was only one local
|
191
|
+
# input path and it was already compressed or an archive.
|
192
|
+
def create_archive!
|
193
|
+
return if skip_packaging?
|
194
|
+
archiver = IMW::Tools::Archiver.new(archive.name, input_paths)
|
195
|
+
result = archiver.package(archive.path)
|
196
|
+
raise PackagingError.new("Unable to package files for upload. Temporary files left in #{archiver.tmp_dir}") if result.is_a?(RuntimeError) || (!archiver.success?)
|
197
|
+
archiver.clean!
|
198
|
+
end
|
199
|
+
|
200
|
+
# Return a string built from the granted upload token that can
|
201
|
+
# be fed to +curl+ in order to authenticate with and upload to
|
202
|
+
# Amazon.
|
203
|
+
#
|
204
|
+
# @return [String]
|
205
|
+
def upload_data
|
206
|
+
data = ['AWSAccessKeyId', 'acl', 'key', 'policy', 'success_action_status', 'signature'].map { |param| "-F #{param}='#{token[param]}'" }
|
207
|
+
data << ["-F file=@#{archive.path}"]
|
208
|
+
data.join(' ')
|
209
|
+
end
|
210
|
+
|
211
|
+
# Upload the data.
|
212
|
+
#
|
213
|
+
# Uses +curl+ for the transfer.
|
214
|
+
def upload!
|
215
|
+
progress_meter = Chimps.verbose? ? '' : '-s -S'
|
216
|
+
command = "#{curl} #{progress_meter} -o /dev/null -X POST #{upload_data} #{token['url']}"
|
217
|
+
raise UploadError.new("Failed to upload #{archive.path} to Infochimps") unless IMW.system(command)
|
218
|
+
end
|
219
|
+
|
220
|
+
# Return a hash of parameters used to create a new Package at
|
221
|
+
# Infochimps corresonding to the upload.
|
222
|
+
#
|
223
|
+
# @return [Hash]
|
224
|
+
def package_data
|
225
|
+
{ :package => {:path => token['key'], :fmt => token['fmt'], :pkg_size => archive.size, :pkg_fmt => archive.extension} }
|
226
|
+
end
|
227
|
+
|
228
|
+
# Make a final POST request to Infochimps, creating the final
|
229
|
+
# resource.
|
230
|
+
def notify_infochimps!
|
231
|
+
package_creation_response = Request.new(package_creation_path, :signed => true, :data => package_data).post
|
232
|
+
package_creation_response.print
|
233
|
+
raise UploadError.new("Unable to notify Infochimps of newly uploaded data.") if package_creation_response.error?
|
234
|
+
end
|
235
|
+
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module Chimps
|
2
|
+
|
3
|
+
# A module defining classes to handle complex workflows between the
|
4
|
+
# local machine and Infochimps' servers.
|
5
|
+
module Workflows
|
6
|
+
autoload :Uploader, 'chimps/workflows/uploader'
|
7
|
+
autoload :Downloader, 'chimps/workflows/downloader'
|
8
|
+
autoload :BatchUpdater, 'chimps/workflows/batch'
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
data/lib/chimps.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'chimps/utils'
|
3
|
+
|
4
|
+
# The Chimps module implements a Ruby-based command-line interface to
|
5
|
+
# the Infochimps data repository.
|
6
|
+
#
|
7
|
+
# Using this tool you can search, download, edit, and upload data and
|
8
|
+
# metadata to and from Infochimps.
|
9
|
+
module Chimps
|
10
|
+
|
11
|
+
autoload :Config, 'chimps/config'
|
12
|
+
autoload :CONFIG, 'chimps/config'
|
13
|
+
autoload :CLI, 'chimps/cli'
|
14
|
+
autoload :Command, 'chimps/commands/base'
|
15
|
+
autoload :Commands, 'chimps/commands'
|
16
|
+
autoload :Request, 'chimps/request'
|
17
|
+
autoload :QueryRequest, 'chimps/request'
|
18
|
+
autoload :Response, 'chimps/response'
|
19
|
+
autoload :Typewriter, 'chimps/typewriter'
|
20
|
+
autoload :Workflows, 'chimps/workflows'
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '../spec_helper')
|
2
|
+
|
3
|
+
describe Chimps::CLI do
|
4
|
+
end
|
5
|
+
|
6
|
+
describe Chimps::CLI::Runner do
|
7
|
+
|
8
|
+
it "should raise a CLIError when no command is given" do
|
9
|
+
lambda { Chimps::CLI::Runner.new([]).execute! }.should raise_error(Chimps::CLIError)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should raise a CLIError when an unrecognized command is given" do
|
13
|
+
lambda { Chimps::CLI::Runner.new(['foobar', 'arg1', 'arg2']).execute! }.should raise_error(Chimps::CLIError)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should recognize a command when given" do
|
17
|
+
Chimps::Commands.should_receive(:construct).with('list', ['arg1', 'arg2'])
|
18
|
+
Chimps::CLI::Runner.new(['list', 'arg1', 'arg2']).command # execute requires the command to be initialized and returned...
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '../../spec_helper')
|
2
|
+
|
3
|
+
describe Chimps::Command do
|
4
|
+
|
5
|
+
it "should return its full name" do
|
6
|
+
Chimps::Command.name.should == "chimps::command"
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should return just its command name" do
|
10
|
+
Chimps::Command.new([]).name.should == "command"
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should run any methods beginning with `define' and ending with `options?'" do
|
14
|
+
klass = Class.new(Chimps::Command)
|
15
|
+
klass.class_eval <<RUBY
|
16
|
+
attr_accessor :test_property
|
17
|
+
def define_test_options
|
18
|
+
self.test_property=true
|
19
|
+
end
|
20
|
+
RUBY
|
21
|
+
klass.new([]).test_property.should == true
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '../../spec_helper')
|
2
|
+
|
3
|
+
describe Chimps::Commands::List do
|
4
|
+
|
5
|
+
it "should return its full name" do
|
6
|
+
Chimps::Command.name.should == "chimps::command"
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should return just its command name" do
|
10
|
+
Chimps::Command.new([]).name.should == "command"
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should run any methods beginning with `define' and ending with `options?'" do
|
14
|
+
klass = Class.new(Chimps::Command)
|
15
|
+
klass.class_eval <<RUBY
|
16
|
+
attr_accessor :test_property
|
17
|
+
def define_test_options
|
18
|
+
self.test_property=true
|
19
|
+
end
|
20
|
+
RUBY
|
21
|
+
klass.new([]).test_property.should == true
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
end
|