chimps 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/chimps/commands/base.rb +18 -5
- data/lib/chimps/commands/help.rb +3 -3
- data/lib/chimps/config.rb +10 -5
- data/lib/chimps/request.rb +17 -16
- data/lib/chimps/utils/log.rb +50 -0
- data/lib/chimps/utils.rb +2 -0
- data/lib/chimps/workflows/uploader.rb +36 -7
- metadata +3 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.3
|
data/lib/chimps/commands/base.rb
CHANGED
@@ -39,7 +39,7 @@ module Chimps
|
|
39
39
|
@argv = argv
|
40
40
|
run_options_definers
|
41
41
|
parse_command_line!
|
42
|
-
|
42
|
+
resolve_options!
|
43
43
|
end
|
44
44
|
|
45
45
|
# The name of this command, including the
|
@@ -68,6 +68,14 @@ module Chimps
|
|
68
68
|
raise CLIError.new("#{e.message}. Try `chimps help #{name}'")
|
69
69
|
end
|
70
70
|
end
|
71
|
+
|
72
|
+
# Ensure that certain options (verbosity, log file) that can be
|
73
|
+
# passed on the command-line override those stored in a
|
74
|
+
# configuration file (if present).
|
75
|
+
def resolve_options!
|
76
|
+
Chimps::Config.load # load defaults from config file
|
77
|
+
Chimps::CONFIG.merge!(Chimps::COMMAND_LINE_OPTIONS) # overwrites from command line if necessary
|
78
|
+
end
|
71
79
|
|
72
80
|
# Run all methods beginning with +define+ and ending with +option+
|
73
81
|
# or +options+.
|
@@ -83,16 +91,21 @@ module Chimps
|
|
83
91
|
# such options at the moment are <tt>-v</tt> (or
|
84
92
|
# <tt>--[no-]verbose</tt>) for verbosity, and <tt>-i</tt> (or
|
85
93
|
# <tt>--identity-file</tt>) for setting the identify file to use.
|
94
|
+
# <tt>--log-file</tt>) for setting the log file to use.
|
86
95
|
def define_common_options
|
87
96
|
separator self.class::HELP
|
88
97
|
separator "\nOptions include:"
|
89
98
|
|
90
|
-
on("-v", "--[no-]verbose", "Be verbose, or not.") do |
|
91
|
-
Chimps::
|
99
|
+
on("-v", "--[no-]verbose", "Be verbose, or not.") do |verbose|
|
100
|
+
Chimps::COMMAND_LINE_OPTIONS[:verbose] = verbose
|
92
101
|
end
|
93
102
|
|
94
|
-
on("-i", "--identity-file PATH", "Use the given YAML identify file to authenticate with Infochimps instead of the default (~/.chimps) ") do |
|
95
|
-
Chimps::
|
103
|
+
on("-i", "--identity-file PATH", "Use the given YAML identify file to authenticate with Infochimps instead of the default (~/.chimps) ") do |path|
|
104
|
+
Chimps::COMMAND_LINE_OPTIONS[:identity_file] = File.expand_path(path)
|
105
|
+
end
|
106
|
+
|
107
|
+
on("-l", "--log-file PATH", "Use the given path to log Chimps output (`-' is interpreted as $stdout).") do |path|
|
108
|
+
Chimps::COMMAND_LINE_OPTIONS[:log_file] = path # don't expand_path as it might be a `-'
|
96
109
|
end
|
97
110
|
end
|
98
111
|
|
data/lib/chimps/commands/help.rb
CHANGED
@@ -57,9 +57,9 @@ for any of the commands above.
|
|
57
57
|
|
58
58
|
= Setup
|
59
59
|
|
60
|
-
Once you have obtained an API key and secret from Infochimps, place
|
61
|
-
in a file Chimps::CONFIG[:identity_file] in your home directory
|
62
|
-
following format
|
60
|
+
Once you have obtained an API key and secret from Infochimps, place
|
61
|
+
them in a file Chimps::CONFIG[:identity_file] in your home directory
|
62
|
+
with the following format
|
63
63
|
|
64
64
|
---
|
65
65
|
# API credentials for use on the main Infochimps site
|
data/lib/chimps/config.rb
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
module Chimps
|
2
2
|
|
3
|
+
# Options that can be overriden by the command-line.
|
4
|
+
COMMAND_LINE_OPTIONS = {
|
5
|
+
:identity_file => File.expand_path(ENV["CHIMPS_RC"] || "~/.chimps"),
|
6
|
+
# log_file -- will be specified on command line
|
7
|
+
# verbose -- will be specified on command line
|
8
|
+
}
|
9
|
+
|
3
10
|
# Default configuration for Chimps. User-specific configuration
|
4
11
|
# lives in a YAML file <tt>~/.chimps</tt>.
|
5
12
|
CONFIG = {
|
@@ -9,14 +16,12 @@ module Chimps
|
|
9
16
|
:site => {
|
10
17
|
:host => ENV["CHIMPS_HOST"] || 'http://infochimps.org'
|
11
18
|
},
|
12
|
-
:identity_file => File.expand_path(ENV["CHIMPS_RC"] || "~/.chimps"),
|
13
|
-
:verbose => nil,
|
14
19
|
:timestamp_format => "%Y-%m-%d_%H-%M-%S"
|
15
20
|
}
|
16
21
|
|
17
22
|
# Is Chimps in verbose mode?
|
18
23
|
#
|
19
|
-
# @return [true, false]
|
24
|
+
# @return [true, false, nil]
|
20
25
|
def self.verbose?
|
21
26
|
CONFIG[:verbose]
|
22
27
|
end
|
@@ -42,9 +47,9 @@ module Chimps
|
|
42
47
|
# file.
|
43
48
|
def self.load
|
44
49
|
# FIXME this is a terrible hack...and it only goes to 2 deep!
|
45
|
-
if File.exist?(
|
50
|
+
if File.exist?(COMMAND_LINE_OPTIONS[:identity_file])
|
46
51
|
require 'yaml'
|
47
|
-
YAML.load_file(
|
52
|
+
YAML.load_file(COMMAND_LINE_OPTIONS[:identity_file]).each_pair do |key, value|
|
48
53
|
if value.is_a?(Hash) && CONFIG.include?(key)
|
49
54
|
CONFIG[key].merge!(value)
|
50
55
|
else
|
data/lib/chimps/request.rb
CHANGED
@@ -103,7 +103,7 @@ module Chimps
|
|
103
103
|
# @return [Chimps::Response]
|
104
104
|
def get options={}
|
105
105
|
handle_exceptions do
|
106
|
-
|
106
|
+
Chimps.log.info("GET #{url}")
|
107
107
|
Response.new(super(DEFAULT_HEADERS.merge(options)))
|
108
108
|
end
|
109
109
|
end
|
@@ -117,7 +117,7 @@ module Chimps
|
|
117
117
|
# @return [Chimps::Response]
|
118
118
|
def post options={}
|
119
119
|
handle_exceptions do
|
120
|
-
|
120
|
+
Chimps.log.info("POST #{url}")
|
121
121
|
Response.new(super(data_text, DEFAULT_HEADERS.merge(options)))
|
122
122
|
end
|
123
123
|
end
|
@@ -131,7 +131,7 @@ module Chimps
|
|
131
131
|
# @return [Chimps::Response]
|
132
132
|
def put options={}
|
133
133
|
handle_exceptions do
|
134
|
-
|
134
|
+
Chimps.log.info("PUT #{url}")
|
135
135
|
Response.new(super(data_text, DEFAULT_HEADERS.merge(options)))
|
136
136
|
end
|
137
137
|
end
|
@@ -146,7 +146,7 @@ module Chimps
|
|
146
146
|
# @return [Chimps::Response]
|
147
147
|
def delete options={}
|
148
148
|
handle_exceptions do
|
149
|
-
|
149
|
+
Chimps.log.info("DELETE #{url}")
|
150
150
|
Response.new(super(DEFAULT_HEADERS.merge(options)))
|
151
151
|
end
|
152
152
|
end
|
@@ -175,20 +175,10 @@ module Chimps
|
|
175
175
|
query_params[:api_key] = Chimps::CONFIG[:site][:key]
|
176
176
|
end
|
177
177
|
|
178
|
-
# Return the sorted keys of the query params.
|
179
|
-
#
|
180
|
-
# @return [Array]
|
181
|
-
def alphabetical_params
|
182
|
-
query_params.keys.map(&:to_s).sort
|
183
|
-
end
|
184
|
-
|
185
178
|
# Return an unsigned query string for this request.
|
186
179
|
#
|
187
|
-
# Query parameters will be used in alphabetical order.
|
188
|
-
#
|
189
180
|
# @return [String]
|
190
181
|
def unsigned_query_string
|
191
|
-
# alphabetical_params.map { |key| "#{CGI::escape(key.to_s)}=#{CGI::escape(query_params[key.to_sym].to_s)}" }.join("&") # doesn't flatten nested hashes properly
|
192
182
|
RestClient::Payload.generate(query_params)
|
193
183
|
end
|
194
184
|
|
@@ -200,8 +190,7 @@ module Chimps
|
|
200
190
|
#
|
201
191
|
# @return [String]
|
202
192
|
def unsigned_query_string_stripped
|
203
|
-
|
204
|
-
@query_params_text ||= alphabetical_params.map { |key| CGI::escape(key.to_s) + CGI::escape(query_params[key.to_sym].to_s) }.join('')
|
193
|
+
@query_params_text ||= obj_to_stripped_string(query_params)
|
205
194
|
end
|
206
195
|
|
207
196
|
# Return the data of this request as a string.
|
@@ -238,6 +227,18 @@ module Chimps
|
|
238
227
|
"#{unsigned_query_string}&signature=#{signature}"
|
239
228
|
end
|
240
229
|
|
230
|
+
# Turn +obj+ into a string, sorting on internal keys.
|
231
|
+
#
|
232
|
+
# @param [Hash, Array, String] obj
|
233
|
+
# @return [String]
|
234
|
+
def obj_to_stripped_string obj
|
235
|
+
case obj
|
236
|
+
when Hash then obj.keys.map(&:to_s).sort.map { |key| [key.to_s.downcase, obj_to_stripped_string(obj[key.to_sym])].join('') }.join('')
|
237
|
+
when Array then obj.map { |e| obj_to_stripped_string(e) }.join('')
|
238
|
+
else obj.to_s
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
241
242
|
end
|
242
243
|
|
243
244
|
# A class to encapsulate requests made against the Infochimps paid
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Chimps
|
2
|
+
|
3
|
+
# The Chimps logger. Set via Chimps::CONFIG[:log_file] and defaults
|
4
|
+
# to $stdout.
|
5
|
+
#
|
6
|
+
# @return [Logger]
|
7
|
+
def self.log
|
8
|
+
@log ||= Log.new_logger
|
9
|
+
end
|
10
|
+
|
11
|
+
# Set the Chimps logger.
|
12
|
+
#
|
13
|
+
# @param [Logger] new_log
|
14
|
+
def self.log= new_log
|
15
|
+
@log = new_log
|
16
|
+
end
|
17
|
+
|
18
|
+
# Module for initializing the Chimps logger from configuration
|
19
|
+
# settings.
|
20
|
+
module Log
|
21
|
+
|
22
|
+
# Initialize a new Logger instance with the log level set by
|
23
|
+
# Chimps.verbose?
|
24
|
+
#
|
25
|
+
# @return [Logger]
|
26
|
+
def self.new_logger
|
27
|
+
require 'logger'
|
28
|
+
returning(Logger.new(log_file)) do |log|
|
29
|
+
log.progname = "Chimps"
|
30
|
+
log.level = Chimps.verbose? ? Logger::INFO : Logger::WARN
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
protected
|
35
|
+
# Return either the path to the log file in
|
36
|
+
# Chimps::CONFIG[:log_file] or $stdout if the path is blank or
|
37
|
+
# equal to `-'.
|
38
|
+
#
|
39
|
+
# @return [String, $stdout] the path to the log or $stdout
|
40
|
+
def self.log_file
|
41
|
+
if Chimps::CONFIG[:log_file]
|
42
|
+
Chimps::CONFIG[:log_file].strip == '-' ? $stdout : Chimps::CONFIG[:log_file]
|
43
|
+
else
|
44
|
+
$stdout
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
|
data/lib/chimps/utils.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'chimps/config'
|
2
2
|
require 'chimps/utils/extensions'
|
3
3
|
require 'chimps/utils/error'
|
4
|
+
require 'chimps/utils/log'
|
4
5
|
|
5
6
|
module Chimps
|
6
7
|
module Utils
|
@@ -8,4 +9,5 @@ module Chimps
|
|
8
9
|
autoload :UsesModel, 'chimps/utils/uses_model'
|
9
10
|
autoload :UsesYamlData, 'chimps/utils/uses_yaml_data'
|
10
11
|
end
|
12
|
+
|
11
13
|
end
|
@@ -47,8 +47,7 @@ module Chimps
|
|
47
47
|
# @option options [String, IMW::Resource] archive the path to the archive to create (defaults to IMW::Workflows::Downloader#default_archive_path)
|
48
48
|
# @option options [String] fmt the data format to annotate the upload with
|
49
49
|
def initialize options={}
|
50
|
-
|
51
|
-
IMW.verbose = Chimps.verbose?
|
50
|
+
require_imw
|
52
51
|
@dataset = options[:dataset] or raise PackagingError.new("Must provide the ID or handle of a dataset to upload data to.")
|
53
52
|
self.local_paths = options[:local_paths] # must come before self.archive=
|
54
53
|
self.archive = options[:archive]
|
@@ -105,24 +104,43 @@ module Chimps
|
|
105
104
|
@archive = potential_package
|
106
105
|
end
|
107
106
|
|
107
|
+
# Return the summarizer responsible for summarizing data on this
|
108
|
+
# upload.
|
109
|
+
#
|
110
|
+
# @return [IMW::Tools::Summarizer]
|
111
|
+
def summarizer
|
112
|
+
@summarizer ||= IMW::Tools::Summarizer.new(local_paths)
|
113
|
+
end
|
114
|
+
|
108
115
|
# Set the data format to annotate the upload with.
|
109
116
|
#
|
110
117
|
# If not provided, Chimps will use the Infinite Monkeywrench
|
111
118
|
# (IMW) to try and guess the data format. See
|
112
119
|
# IMW::Tools::Summarizer for more information.
|
113
120
|
def fmt= new_fmt=nil
|
114
|
-
@fmt ||= new_fmt ||
|
121
|
+
@fmt ||= new_fmt || summarizer.most_common_data_format
|
115
122
|
end
|
116
123
|
|
117
124
|
# The default path to the archive that will be built.
|
118
125
|
#
|
119
|
-
# Defaults to a
|
120
|
-
#
|
126
|
+
# Defaults to a file in the current directory named after the
|
127
|
+
# +dataset+'s ID or handle and the current time. The package
|
128
|
+
# format (<tt>.zip</tt> or <tt>.tar.bz2</tt>) is determined by
|
129
|
+
# size, see
|
130
|
+
# Chimps::Workflows::Uploader#default_archive_extension.
|
121
131
|
#
|
122
132
|
# @return [String]
|
123
133
|
def default_archive_path
|
124
134
|
# in current working directory...
|
125
|
-
"chimps_#{dataset}-#{Time.now.strftime(Chimps::CONFIG[:timestamp_format])}
|
135
|
+
"chimps_#{dataset}-#{Time.now.strftime(Chimps::CONFIG[:timestamp_format])}.#{default_archive_extension}"
|
136
|
+
end
|
137
|
+
|
138
|
+
# Use <tt>zip</tt> if the data is less than 500 MB in size and
|
139
|
+
# <tt>tar.bz2</tt> otherwise.
|
140
|
+
#
|
141
|
+
# @return ['tar.bz2', 'zip']
|
142
|
+
def default_archive_extension
|
143
|
+
summarizer.total_size >= 524288000 ? 'tar.bz2' : 'zip'
|
126
144
|
end
|
127
145
|
|
128
146
|
# The URL to the <tt>README-infochimps</tt> file on Infochimps'
|
@@ -222,7 +240,7 @@ module Chimps
|
|
222
240
|
#
|
223
241
|
# @return [Hash]
|
224
242
|
def package_data
|
225
|
-
{ :package => {:path => token['key'], :fmt => token['fmt'], :pkg_size => archive.size, :pkg_fmt => archive.extension} }
|
243
|
+
{ :package => {:path => token['key'], :fmt => token['fmt'], :pkg_size => archive.size, :pkg_fmt => archive.extension, :summary => summarizer.summary, :token_timestamp => token['timestamp'] } }
|
226
244
|
end
|
227
245
|
|
228
246
|
# Make a final POST request to Infochimps, creating the final
|
@@ -232,6 +250,17 @@ module Chimps
|
|
232
250
|
package_creation_response.print
|
233
251
|
raise UploadError.new("Unable to notify Infochimps of newly uploaded data.") if package_creation_response.error?
|
234
252
|
end
|
253
|
+
|
254
|
+
protected
|
255
|
+
# Require IMW and match the IMW logger to the Chimps logger.
|
256
|
+
def require_imw
|
257
|
+
begin
|
258
|
+
require 'imw'
|
259
|
+
rescue LoadError
|
260
|
+
raise Chimps::Error.new("The Infinite Monkeywrench (IMW) gem is required to upload.")
|
261
|
+
end
|
262
|
+
IMW.verbose = Chimps.verbose?
|
263
|
+
end
|
235
264
|
|
236
265
|
end
|
237
266
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chimps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dhruv Bansal
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-06-06 00:00:00 -05:00
|
13
13
|
default_executable: chimps
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -105,6 +105,7 @@ files:
|
|
105
105
|
- lib/chimps/utils.rb
|
106
106
|
- lib/chimps/utils/error.rb
|
107
107
|
- lib/chimps/utils/extensions.rb
|
108
|
+
- lib/chimps/utils/log.rb
|
108
109
|
- lib/chimps/utils/uses_curl.rb
|
109
110
|
- lib/chimps/utils/uses_model.rb
|
110
111
|
- lib/chimps/utils/uses_yaml_data.rb
|