chimps 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/LICENSE +674 -0
- data/README.rdoc +48 -0
- data/VERSION +1 -0
- data/bin/chimps +4 -0
- data/examples/batch.yaml +69 -0
- data/lib/chimps/cli.rb +102 -0
- data/lib/chimps/commands/base.rb +107 -0
- data/lib/chimps/commands/batch.rb +68 -0
- data/lib/chimps/commands/create.rb +33 -0
- data/lib/chimps/commands/destroy.rb +28 -0
- data/lib/chimps/commands/download.rb +76 -0
- data/lib/chimps/commands/help.rb +89 -0
- data/lib/chimps/commands/list.rb +54 -0
- data/lib/chimps/commands/query.rb +59 -0
- data/lib/chimps/commands/search.rb +59 -0
- data/lib/chimps/commands/show.rb +32 -0
- data/lib/chimps/commands/test.rb +40 -0
- data/lib/chimps/commands/update.rb +33 -0
- data/lib/chimps/commands/upload.rb +63 -0
- data/lib/chimps/commands.rb +46 -0
- data/lib/chimps/config.rb +57 -0
- data/lib/chimps/request.rb +302 -0
- data/lib/chimps/response.rb +146 -0
- data/lib/chimps/typewriter.rb +326 -0
- data/lib/chimps/utils/error.rb +40 -0
- data/lib/chimps/utils/extensions.rb +109 -0
- data/lib/chimps/utils/uses_curl.rb +26 -0
- data/lib/chimps/utils/uses_model.rb +51 -0
- data/lib/chimps/utils/uses_yaml_data.rb +94 -0
- data/lib/chimps/utils.rb +11 -0
- data/lib/chimps/workflows/batch.rb +127 -0
- data/lib/chimps/workflows/downloader.rb +102 -0
- data/lib/chimps/workflows/uploader.rb +238 -0
- data/lib/chimps/workflows.rb +11 -0
- data/lib/chimps.rb +22 -0
- data/spec/chimps/cli_spec.rb +22 -0
- data/spec/chimps/commands/base_spec.rb +25 -0
- data/spec/chimps/commands/list_spec.rb +25 -0
- data/spec/chimps/response_spec.rb +8 -0
- data/spec/chimps/typewriter_spec.rb +114 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/support/custom_matchers.rb +6 -0
- metadata +133 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
module Chimps
|
2
|
+
module Utils
|
3
|
+
module UsesYamlData
|
4
|
+
|
5
|
+
IGNORE_YAML_FILES_ON_COMMAND_LINE = false
|
6
|
+
|
7
|
+
attr_reader :data_file
|
8
|
+
|
9
|
+
def data
|
10
|
+
@data ||= merge_all *(data_from_stdin + data_from_file + data_from_command_line)
|
11
|
+
end
|
12
|
+
|
13
|
+
protected
|
14
|
+
|
15
|
+
def merge_all *objs
|
16
|
+
objs.compact!
|
17
|
+
return if objs.blank? # raising an error here is left to the caller
|
18
|
+
klasses = objs.map(&:class).uniq
|
19
|
+
raise CLIError.new("Mismatched YAML data types -- Hashes can only be combined with Hashes, Arrays with Arrays") if klasses.size > 1
|
20
|
+
data_type = klasses.first.new
|
21
|
+
case data_type
|
22
|
+
when Array
|
23
|
+
# greater precedence at the end so iterate in order
|
24
|
+
returning([]) do |d|
|
25
|
+
objs.each do |obj|
|
26
|
+
d.concat(obj)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
when Hash
|
30
|
+
# greater precedence at the end so iterate in order
|
31
|
+
returning({}) do |d|
|
32
|
+
objs.each do |obj|
|
33
|
+
d.merge!(obj)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
else raise CLIError.new("Unsuitable YAML data type #{data_type} -- can only combine Hashes and Arrays")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def params_from_command_line
|
41
|
+
returning([]) do |d|
|
42
|
+
argv.each do |arg|
|
43
|
+
next unless arg =~ /^(\w+) *=(.*)$/
|
44
|
+
name, value = $1.downcase.to_sym, $2.strip
|
45
|
+
d << { name => value } # always a hash
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def yaml_files_from_command_line
|
51
|
+
returning([]) do |d|
|
52
|
+
argv.each do |arg|
|
53
|
+
next if arg =~ /^(\w+) *=(.*)$/
|
54
|
+
path = File.expand_path(arg)
|
55
|
+
raise CLIError.new("No such path #{path}") unless File.exist?(path)
|
56
|
+
d << YAML.load(open(path)) # either a hash or an array
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def data_from_command_line
|
62
|
+
if self.class::IGNORE_YAML_FILES_ON_COMMAND_LINE
|
63
|
+
params_from_command_line
|
64
|
+
else
|
65
|
+
yaml_files_from_command_line + params_from_command_line
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def data_from_file
|
70
|
+
[data_file ? YAML.load_file(data_file) : nil]
|
71
|
+
end
|
72
|
+
|
73
|
+
def data_from_stdin
|
74
|
+
return [nil] unless $stdin.stat.size > 0
|
75
|
+
returning([]) do |d|
|
76
|
+
YAML.load_stream($stdin).each do |document|
|
77
|
+
d << document
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def define_data_options
|
83
|
+
on_tail("-d", "--data-file PATH", "Path to a file containing key=value data") do |p|
|
84
|
+
@data_file = File.expand_path(p)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def ensure_data_is_present!
|
89
|
+
raise CLIError.new("Must provide some data to send, either on the command line, from an input file, or by piping to STDIN. Try `chimps help #{name}'") unless data.present?
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
data/lib/chimps/utils.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'chimps/config'
|
2
|
+
require 'chimps/utils/extensions'
|
3
|
+
require 'chimps/utils/error'
|
4
|
+
|
5
|
+
module Chimps
|
6
|
+
module Utils
|
7
|
+
autoload :UsesCurl, 'chimps/utils/uses_curl'
|
8
|
+
autoload :UsesModel, 'chimps/utils/uses_model'
|
9
|
+
autoload :UsesYamlData, 'chimps/utils/uses_yaml_data'
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
module Chimps
|
2
|
+
module Workflows
|
3
|
+
|
4
|
+
# A class for performing batch updates/uploads to Infochimps.
|
5
|
+
#
|
6
|
+
# It works by taking YAML data describing many updates and
|
7
|
+
# performing a single batch API request with this data.
|
8
|
+
#
|
9
|
+
# The batch response is then parsed and analyzed and (given
|
10
|
+
# success or fearlessness) any necessary uploads are performed.
|
11
|
+
#
|
12
|
+
# Examples of the input data format can be found in the
|
13
|
+
# <tt>/examples</tt> directory of the Chimps distribution.
|
14
|
+
class BatchUpdater
|
15
|
+
|
16
|
+
# The data used sent as a bulk update.
|
17
|
+
attr_reader :data
|
18
|
+
|
19
|
+
# The batch update response
|
20
|
+
attr_reader :batch_response
|
21
|
+
|
22
|
+
# The output file to store the bulk update response.
|
23
|
+
attr_reader :output_path
|
24
|
+
|
25
|
+
# Whether to upload even if there were errors on update.
|
26
|
+
attr_reader :upload_even_if_errors
|
27
|
+
|
28
|
+
# The data format to annotate the upload with.
|
29
|
+
#
|
30
|
+
# Chimps will try to guess if this isn't given.
|
31
|
+
attr_reader :fmt
|
32
|
+
|
33
|
+
# Create a new BatchUpdater with the given +data+ and +options+.
|
34
|
+
#
|
35
|
+
# The intermediate batch response can be saved at a file named
|
36
|
+
# by <tt>:output_path</tt>, though this isn't necessary.
|
37
|
+
#
|
38
|
+
# @param [Array] data an array of resource updates
|
39
|
+
# @param [Hash] options
|
40
|
+
# @option options [String] output_path path to store the batch response
|
41
|
+
# @option options [true, false] upload_even_if_errors whether to continue uploading in the presence of errors on update
|
42
|
+
# @option options [String] fmt the data format to annotate each upload with (see `chimps upload')
|
43
|
+
# @return [Chimps::Workflows::BatchUpdater]
|
44
|
+
def initialize data, options={}
|
45
|
+
@data = data
|
46
|
+
@output_path = options[:output_path]
|
47
|
+
@upload_even_if_errors = options[:upload_even_if_errors]
|
48
|
+
@fmt = options[:fmt]
|
49
|
+
end
|
50
|
+
|
51
|
+
# The path to submit batch update requests.
|
52
|
+
#
|
53
|
+
# @return [String]
|
54
|
+
def batch_path
|
55
|
+
"batch.json"
|
56
|
+
end
|
57
|
+
|
58
|
+
# Perform this batch update followed by the batch upload.
|
59
|
+
def execute!
|
60
|
+
batch_update!
|
61
|
+
batch_upload!
|
62
|
+
end
|
63
|
+
|
64
|
+
# Perform the batch update.
|
65
|
+
def batch_update!
|
66
|
+
@batch_response = Request.new(batch_path, :data => { :batch => data }, :authenticate => true).post
|
67
|
+
File.open(output_path, 'w') { |f| f.puts batch_response.body } if output_path
|
68
|
+
batch_response.print
|
69
|
+
end
|
70
|
+
|
71
|
+
# Were any of the updates performed during the batch update
|
72
|
+
# errors?
|
73
|
+
#
|
74
|
+
# @return [true, false]
|
75
|
+
def error?
|
76
|
+
batch_response['batch'].each do |response|
|
77
|
+
status = response['status']
|
78
|
+
return true unless ['created', 'updated'].include?(status)
|
79
|
+
end
|
80
|
+
false
|
81
|
+
end
|
82
|
+
|
83
|
+
# Did all of the updates performed in the batch update succeed?
|
84
|
+
#
|
85
|
+
# @return [true, false]
|
86
|
+
def success?
|
87
|
+
! error?
|
88
|
+
end
|
89
|
+
|
90
|
+
# Perform the batch upload.
|
91
|
+
#
|
92
|
+
# Will bail if the batch update had an error unless
|
93
|
+
# Chimps::Workflows::BatchUpdater#upload_even_if_errors returns
|
94
|
+
# true.
|
95
|
+
def batch_upload!
|
96
|
+
return unless success? || upload_even_if_errors
|
97
|
+
$stderr.puts("WARNING: continuing with uploads even though there were errors") unless success?
|
98
|
+
dataset_ids_and_local_paths.each do |id, local_paths|
|
99
|
+
Chimps::Workflows::Uploader.new(:dataset => id, :local_paths => local_paths, :fmt => fmt).execute!
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
protected
|
104
|
+
# Iterate through the batch response and return tuples
|
105
|
+
# consisting of an ID and an array of of local paths to upload.
|
106
|
+
#
|
107
|
+
# Only datasets which were successfully created/updated,
|
108
|
+
# returned an ID, and had local_paths defined in the original
|
109
|
+
# batch update will be output.
|
110
|
+
#
|
111
|
+
# @return [Array<Array>]
|
112
|
+
def dataset_ids_and_local_paths
|
113
|
+
batch_response['batch'].map do |response|
|
114
|
+
status = response['status']
|
115
|
+
next unless (status == 'created' || status == 'updated') # skip errors
|
116
|
+
next unless dataset = response['resource']['dataset'] # skip unless it's a dataset
|
117
|
+
id = dataset['id']
|
118
|
+
next if id.blank? # skip unless it has an ID
|
119
|
+
local_paths = response['local_paths']
|
120
|
+
next if local_paths.blank? # skip unless local_paths were defined
|
121
|
+
[id, local_paths]
|
122
|
+
end.compact
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
@@ -0,0 +1,102 @@
|
|
1
|
+
module Chimps
|
2
|
+
module Workflows
|
3
|
+
|
4
|
+
# Downloads data from Infochimps by first making a request for a
|
5
|
+
# download token and, if granted one, proceeding to download the
|
6
|
+
# data.
|
7
|
+
#
|
8
|
+
# Will download the latest package for a given dataset, optionally
|
9
|
+
# constrained to have given data and package formats.
|
10
|
+
class Downloader
|
11
|
+
|
12
|
+
include Chimps::Utils::UsesCurl
|
13
|
+
|
14
|
+
# The token received from Infochimps which contains a signed URL
|
15
|
+
# for the download.
|
16
|
+
attr_reader :token
|
17
|
+
|
18
|
+
# The ID or handle of the dataset to download.
|
19
|
+
attr_reader :dataset
|
20
|
+
|
21
|
+
# The data format of the data to download.
|
22
|
+
attr_reader :fmt
|
23
|
+
|
24
|
+
# The package format of the data to download.
|
25
|
+
attr_reader :pkg_fmt
|
26
|
+
|
27
|
+
# Create a new Downloader with the given parameters.
|
28
|
+
#
|
29
|
+
# @param [Hash] options
|
30
|
+
# @option options [String, Integer] dataset the ID or handle of the dataset to download
|
31
|
+
# @option options [String] fmt the data format to download
|
32
|
+
# @option options [String] pkg_fmt the package format to download
|
33
|
+
# @option options [String] local_path the local path to which the data will be downloaded
|
34
|
+
# @return [Chimps::Workflows::Downloader]
|
35
|
+
def initialize options={}
|
36
|
+
@dataset = options[:dataset]
|
37
|
+
@fmt = options[:fmt]
|
38
|
+
@pkg_fmt = options[:pkg_fmt]
|
39
|
+
@local_path = options[:local_path]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Params to send for the token.
|
43
|
+
#
|
44
|
+
# @return [Hash]
|
45
|
+
def token_params
|
46
|
+
{ :download_token => { :dataset_id => dataset, :fmt => fmt, :pkg_fmt => pkg_fmt} }
|
47
|
+
end
|
48
|
+
|
49
|
+
# Ask for a download token for this dataset/package. If no or
|
50
|
+
# an invalid token is obtained, raise an error.
|
51
|
+
def ask_for_token!
|
52
|
+
new_token = Request.new(download_tokens_path, :data => token_params, :sign_if_possible => true).post
|
53
|
+
if new_token.error?
|
54
|
+
new_token.print
|
55
|
+
raise AuthenticationError.new("Unauthorized to download dataset #{dataset}")
|
56
|
+
else
|
57
|
+
@token = new_token
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Path to submit download token requests to.
|
62
|
+
#
|
63
|
+
# @return [String]
|
64
|
+
def download_tokens_path
|
65
|
+
"/download_tokens"
|
66
|
+
end
|
67
|
+
|
68
|
+
# The signed, remote URL from where the data can be downloaded.
|
69
|
+
#
|
70
|
+
# @return [String]
|
71
|
+
def download_url
|
72
|
+
token['download_token']['package']['url']
|
73
|
+
end
|
74
|
+
|
75
|
+
# The local path where the downloaded data will be put.
|
76
|
+
#
|
77
|
+
# Defaults to the current directory and the default basename of
|
78
|
+
# the downloaded package.
|
79
|
+
#
|
80
|
+
# @return [String, nil]
|
81
|
+
def local_path
|
82
|
+
@local_path || token["download_token"]["package"]["basename"]
|
83
|
+
end
|
84
|
+
|
85
|
+
# Issue the download request.
|
86
|
+
#
|
87
|
+
# Uses +curl+ for the data transfer.
|
88
|
+
def download!
|
89
|
+
command = "#{curl} -o '#{local_path}' '#{download_url}'"
|
90
|
+
puts command if Chimps.verbose?
|
91
|
+
system(command)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Ask for a token and perform the download.
|
95
|
+
def execute!
|
96
|
+
ask_for_token!
|
97
|
+
download!
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,238 @@
|
|
1
|
+
module Chimps
|
2
|
+
module Workflows
|
3
|
+
|
4
|
+
# Uploads data to Infochimps by first asking for authorization,
|
5
|
+
# creating an archive, obtaining a token, uploading data, and
|
6
|
+
# notifing Infochimps.
|
7
|
+
class Uploader
|
8
|
+
|
9
|
+
include Chimps::Utils::UsesCurl
|
10
|
+
|
11
|
+
# The ID or handle of the dataset to download.
|
12
|
+
attr_reader :dataset
|
13
|
+
|
14
|
+
# An array of paths to local files and directories to package
|
15
|
+
# into an archive.
|
16
|
+
attr_reader :local_paths
|
17
|
+
|
18
|
+
# The format to annotate the upload with.
|
19
|
+
attr_reader :fmt
|
20
|
+
|
21
|
+
# The archive to upload.
|
22
|
+
attr_reader :archive
|
23
|
+
|
24
|
+
# The token authoring an upload.
|
25
|
+
attr_reader :token
|
26
|
+
|
27
|
+
# Upload data to Infochimps by first asking for authorization,
|
28
|
+
# creating an archive, obtaining a token, uploading data, and
|
29
|
+
# notifing Infochimps.
|
30
|
+
def execute!
|
31
|
+
authorize_for_upload!
|
32
|
+
create_archive!
|
33
|
+
ask_for_token!
|
34
|
+
upload!
|
35
|
+
notify_infochimps!
|
36
|
+
end
|
37
|
+
|
38
|
+
# Create a new Uploader from the given parameters.
|
39
|
+
#
|
40
|
+
# If <tt>:fmt</tt> is provided it will be used as the data
|
41
|
+
# format to annotate the upload with. If not, Chimps will try
|
42
|
+
# to guess.
|
43
|
+
#
|
44
|
+
# @param [Hash] options
|
45
|
+
# @option options [String, Integer] dataset the ID or handle of the dataset to which data should be uploaded
|
46
|
+
# @option options [Array<String>] local_paths the local paths to bundle into an archive
|
47
|
+
# @option options [String, IMW::Resource] archive the path to the archive to create (defaults to IMW::Workflows::Downloader#default_archive_path)
|
48
|
+
# @option options [String] fmt the data format to annotate the upload with
|
49
|
+
def initialize options={}
|
50
|
+
require 'imw'
|
51
|
+
IMW.verbose = Chimps.verbose?
|
52
|
+
@dataset = options[:dataset] or raise PackagingError.new("Must provide the ID or handle of a dataset to upload data to.")
|
53
|
+
self.local_paths = options[:local_paths] # must come before self.archive=
|
54
|
+
self.archive = options[:archive]
|
55
|
+
self.fmt = options[:fmt]
|
56
|
+
end
|
57
|
+
|
58
|
+
# Set the local paths to upload for this dataset.
|
59
|
+
#
|
60
|
+
# If only one local path is given and it is already an archive
|
61
|
+
# or a compressed file then no further packaging will be done by
|
62
|
+
# this uploader.
|
63
|
+
#
|
64
|
+
# @param [Array<String, IMW::Resource>] paths
|
65
|
+
def local_paths= paths
|
66
|
+
raise PackagingError.new("Must provide at least one local path to upload.") if paths.blank?
|
67
|
+
paths.each { |path| raise PackagingError.new("Invalid path, #{path}") unless File.exist?(File.expand_path(path)) }
|
68
|
+
@local_paths = paths
|
69
|
+
if @local_paths.size == 1
|
70
|
+
potential_package = IMW.open(paths.first)
|
71
|
+
if potential_package.exist? && (potential_package.is_compressed? || potential_package.is_archive?)
|
72
|
+
self.archive = potential_package
|
73
|
+
@skip_packaging = true
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Should the packaging step be skipped?
|
79
|
+
#
|
80
|
+
# This will happen if only one local input path was provided and
|
81
|
+
# it exists and is a compressed file or archive.
|
82
|
+
#
|
83
|
+
# @return [true, false]
|
84
|
+
def skip_packaging?
|
85
|
+
!! @skip_packaging
|
86
|
+
end
|
87
|
+
|
88
|
+
# Set the path to the archive that will be built.
|
89
|
+
#
|
90
|
+
# The given +path+ must represent a compressed file or archive
|
91
|
+
# (<tt>.tar</tt>, <tt>.tar.gz.</tt>, <tt>.tar.bz2</tt>,
|
92
|
+
# <tt>.zip</tt>, <tt>.rar</tt>, <tt>.bz2</tt>, or <tt>.gz</tt>
|
93
|
+
# extension).
|
94
|
+
#
|
95
|
+
# Additionally, if multiple local paths are being packaged, the
|
96
|
+
# given +path+ must be an archive (not simply <tt>.bz2</tt> or
|
97
|
+
# <tt>.gz</tt> extensions).
|
98
|
+
#
|
99
|
+
# @param [String, IMW::Resource] path the archive or path to use
|
100
|
+
def archive= path=nil
|
101
|
+
return @archive if @archive
|
102
|
+
potential_package = IMW.open(path || default_archive_path)
|
103
|
+
raise PackagingError.new("Invalid path #{potential_package}, not an archive or compressed file") unless potential_package.is_compressed? || potential_package.is_archive?
|
104
|
+
raise PackagingError.new("Multiple local paths must be packaged in an archive, not a compressed file.") if local_paths.size > 1 && !potential_package.is_archive?
|
105
|
+
@archive = potential_package
|
106
|
+
end
|
107
|
+
|
108
|
+
# Set the data format to annotate the upload with.
|
109
|
+
#
|
110
|
+
# If not provided, Chimps will use the Infinite Monkeywrench
|
111
|
+
# (IMW) to try and guess the data format. See
|
112
|
+
# IMW::Tools::Summarizer for more information.
|
113
|
+
def fmt= new_fmt=nil
|
114
|
+
@fmt ||= new_fmt || IMW::Tools::Summarizer.new(local_paths).most_common_data_format
|
115
|
+
end
|
116
|
+
|
117
|
+
# The default path to the archive that will be built.
|
118
|
+
#
|
119
|
+
# Defaults to a ZIP file in the current directory named after
|
120
|
+
# the +dataset+'s ID or handle and the current time.
|
121
|
+
#
|
122
|
+
# @return [String]
|
123
|
+
def default_archive_path
|
124
|
+
# in current working directory...
|
125
|
+
"chimps_#{dataset}-#{Time.now.strftime(Chimps::CONFIG[:timestamp_format])}.zip"
|
126
|
+
end
|
127
|
+
|
128
|
+
# The URL to the <tt>README-infochimps</tt> file on Infochimps'
|
129
|
+
# servers.
|
130
|
+
#
|
131
|
+
# @return [String]
|
132
|
+
def readme_url
|
133
|
+
File.join(Chimps::CONFIG[:host], "/README-infochimps")
|
134
|
+
end
|
135
|
+
|
136
|
+
# The URL to the ICSS file for this dataset on Infochimps
|
137
|
+
# servers
|
138
|
+
def icss_url
|
139
|
+
File.join(Chimps::CONFIG[:host], "datasets", "#{dataset}.yaml")
|
140
|
+
end
|
141
|
+
|
142
|
+
# Both the local paths and remote paths to package.
|
143
|
+
#
|
144
|
+
# @return [Array<String>]
|
145
|
+
def input_paths
|
146
|
+
raise PackaginError.new("Must specify some local paths to package") if local_paths.blank?
|
147
|
+
local_paths + [readme_url, icss_url]
|
148
|
+
end
|
149
|
+
|
150
|
+
# The path on Infochimps to submit upload token requests to.
|
151
|
+
#
|
152
|
+
# @return [String]
|
153
|
+
def token_path
|
154
|
+
"/datasets/#{dataset}/packages/new.json"
|
155
|
+
end
|
156
|
+
|
157
|
+
# The path on Infochimps to submit package creation requests to.
|
158
|
+
#
|
159
|
+
# @return [String]
|
160
|
+
def package_creation_path
|
161
|
+
"/datasets/#{dataset}/packages.json"
|
162
|
+
end
|
163
|
+
|
164
|
+
# Return a hash of params for obtaining a new upload token.
|
165
|
+
#
|
166
|
+
# @return [Hash]
|
167
|
+
def package_params
|
168
|
+
{ :package => { :fmt => fmt, :pkg_fmt => archive.extension } }
|
169
|
+
end
|
170
|
+
|
171
|
+
# Authorize the Chimps user for this upload.
|
172
|
+
def authorize_for_upload!
|
173
|
+
# FIXME we're actually just making a token request here...
|
174
|
+
ask_for_token!
|
175
|
+
end
|
176
|
+
|
177
|
+
# Obtain an upload token from Infochimps.
|
178
|
+
def ask_for_token!
|
179
|
+
new_token = Request.new(token_path, :params => package_params, :signed => true).get
|
180
|
+
if new_token.error?
|
181
|
+
new_token.print
|
182
|
+
raise AuthenticationError.new("Unauthorized for an upload token for dataset #{dataset}")
|
183
|
+
else
|
184
|
+
@token = new_token
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# Build the local archive if necessary.
|
189
|
+
#
|
190
|
+
# Will not build the local archive if there was only one local
|
191
|
+
# input path and it was already compressed or an archive.
|
192
|
+
def create_archive!
|
193
|
+
return if skip_packaging?
|
194
|
+
archiver = IMW::Tools::Archiver.new(archive.name, input_paths)
|
195
|
+
result = archiver.package(archive.path)
|
196
|
+
raise PackagingError.new("Unable to package files for upload. Temporary files left in #{archiver.tmp_dir}") if result.is_a?(RuntimeError) || (!archiver.success?)
|
197
|
+
archiver.clean!
|
198
|
+
end
|
199
|
+
|
200
|
+
# Return a string built from the granted upload token that can
|
201
|
+
# be fed to +curl+ in order to authenticate with and upload to
|
202
|
+
# Amazon.
|
203
|
+
#
|
204
|
+
# @return [String]
|
205
|
+
def upload_data
|
206
|
+
data = ['AWSAccessKeyId', 'acl', 'key', 'policy', 'success_action_status', 'signature'].map { |param| "-F #{param}='#{token[param]}'" }
|
207
|
+
data << ["-F file=@#{archive.path}"]
|
208
|
+
data.join(' ')
|
209
|
+
end
|
210
|
+
|
211
|
+
# Upload the data.
|
212
|
+
#
|
213
|
+
# Uses +curl+ for the transfer.
|
214
|
+
def upload!
|
215
|
+
progress_meter = Chimps.verbose? ? '' : '-s -S'
|
216
|
+
command = "#{curl} #{progress_meter} -o /dev/null -X POST #{upload_data} #{token['url']}"
|
217
|
+
raise UploadError.new("Failed to upload #{archive.path} to Infochimps") unless IMW.system(command)
|
218
|
+
end
|
219
|
+
|
220
|
+
# Return a hash of parameters used to create a new Package at
|
221
|
+
# Infochimps corresonding to the upload.
|
222
|
+
#
|
223
|
+
# @return [Hash]
|
224
|
+
def package_data
|
225
|
+
{ :package => {:path => token['key'], :fmt => token['fmt'], :pkg_size => archive.size, :pkg_fmt => archive.extension} }
|
226
|
+
end
|
227
|
+
|
228
|
+
# Make a final POST request to Infochimps, creating the final
|
229
|
+
# resource.
|
230
|
+
def notify_infochimps!
|
231
|
+
package_creation_response = Request.new(package_creation_path, :signed => true, :data => package_data).post
|
232
|
+
package_creation_response.print
|
233
|
+
raise UploadError.new("Unable to notify Infochimps of newly uploaded data.") if package_creation_response.error?
|
234
|
+
end
|
235
|
+
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module Chimps
|
2
|
+
|
3
|
+
# A module defining classes to handle complex workflows between the
|
4
|
+
# local machine and Infochimps' servers.
|
5
|
+
module Workflows
|
6
|
+
autoload :Uploader, 'chimps/workflows/uploader'
|
7
|
+
autoload :Downloader, 'chimps/workflows/downloader'
|
8
|
+
autoload :BatchUpdater, 'chimps/workflows/batch'
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
data/lib/chimps.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'chimps/utils'
|
3
|
+
|
4
|
+
# The Chimps module implements a Ruby-based command-line interface to
|
5
|
+
# the Infochimps data repository.
|
6
|
+
#
|
7
|
+
# Using this tool you can search, download, edit, and upload data and
|
8
|
+
# metadata to and from Infochimps.
|
9
|
+
module Chimps
|
10
|
+
|
11
|
+
autoload :Config, 'chimps/config'
|
12
|
+
autoload :CONFIG, 'chimps/config'
|
13
|
+
autoload :CLI, 'chimps/cli'
|
14
|
+
autoload :Command, 'chimps/commands/base'
|
15
|
+
autoload :Commands, 'chimps/commands'
|
16
|
+
autoload :Request, 'chimps/request'
|
17
|
+
autoload :QueryRequest, 'chimps/request'
|
18
|
+
autoload :Response, 'chimps/response'
|
19
|
+
autoload :Typewriter, 'chimps/typewriter'
|
20
|
+
autoload :Workflows, 'chimps/workflows'
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '../spec_helper')
|
2
|
+
|
3
|
+
describe Chimps::CLI do
|
4
|
+
end
|
5
|
+
|
6
|
+
describe Chimps::CLI::Runner do
|
7
|
+
|
8
|
+
it "should raise a CLIError when no command is given" do
|
9
|
+
lambda { Chimps::CLI::Runner.new([]).execute! }.should raise_error(Chimps::CLIError)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should raise a CLIError when an unrecognized command is given" do
|
13
|
+
lambda { Chimps::CLI::Runner.new(['foobar', 'arg1', 'arg2']).execute! }.should raise_error(Chimps::CLIError)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should recognize a command when given" do
|
17
|
+
Chimps::Commands.should_receive(:construct).with('list', ['arg1', 'arg2'])
|
18
|
+
Chimps::CLI::Runner.new(['list', 'arg1', 'arg2']).command # execute requires the command to be initialized and returned...
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '../../spec_helper')
|
2
|
+
|
3
|
+
describe Chimps::Command do
|
4
|
+
|
5
|
+
it "should return its full name" do
|
6
|
+
Chimps::Command.name.should == "chimps::command"
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should return just its command name" do
|
10
|
+
Chimps::Command.new([]).name.should == "command"
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should run any methods beginning with `define' and ending with `options?'" do
|
14
|
+
klass = Class.new(Chimps::Command)
|
15
|
+
klass.class_eval <<RUBY
|
16
|
+
attr_accessor :test_property
|
17
|
+
def define_test_options
|
18
|
+
self.test_property=true
|
19
|
+
end
|
20
|
+
RUBY
|
21
|
+
klass.new([]).test_property.should == true
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '../../spec_helper')
|
2
|
+
|
3
|
+
describe Chimps::Commands::List do
|
4
|
+
|
5
|
+
it "should return its full name" do
|
6
|
+
Chimps::Command.name.should == "chimps::command"
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should return just its command name" do
|
10
|
+
Chimps::Command.new([]).name.should == "command"
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should run any methods beginning with `define' and ending with `options?'" do
|
14
|
+
klass = Class.new(Chimps::Command)
|
15
|
+
klass.class_eval <<RUBY
|
16
|
+
attr_accessor :test_property
|
17
|
+
def define_test_options
|
18
|
+
self.test_property=true
|
19
|
+
end
|
20
|
+
RUBY
|
21
|
+
klass.new([]).test_property.should == true
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
end
|