chimps 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.2
1
+ 0.1.3
@@ -39,7 +39,7 @@ module Chimps
39
39
  @argv = argv
40
40
  run_options_definers
41
41
  parse_command_line!
42
- Chimps::Config.load
42
+ resolve_options!
43
43
  end
44
44
 
45
45
  # The name of this command, including the
@@ -68,6 +68,14 @@ module Chimps
68
68
  raise CLIError.new("#{e.message}. Try `chimps help #{name}'")
69
69
  end
70
70
  end
71
+
72
+ # Ensure that certain options (verbosity, log file) that can be
73
+ # passed on the command-line override those stored in a
74
+ # configuration file (if present).
75
+ def resolve_options!
76
+ Chimps::Config.load # load defaults from config file
77
+ Chimps::CONFIG.merge!(Chimps::COMMAND_LINE_OPTIONS) # overwrites from command line if necessary
78
+ end
71
79
 
72
80
  # Run all methods beginning with +define+ and ending with +option+
73
81
  # or +options+.
@@ -83,16 +91,21 @@ module Chimps
83
91
  # such options at the moment are <tt>-v</tt> (or
84
92
  # <tt>--[no-]verbose</tt>) for verbosity, and <tt>-i</tt> (or
85
93
  # <tt>--identity-file</tt>) for setting the identify file to use.
94
+ # <tt>--log-file</tt>) for setting the log file to use.
86
95
  def define_common_options
87
96
  separator self.class::HELP
88
97
  separator "\nOptions include:"
89
98
 
90
- on("-v", "--[no-]verbose", "Be verbose, or not.") do |v|
91
- Chimps::CONFIG[:verbose] = v
99
+ on("-v", "--[no-]verbose", "Be verbose, or not.") do |verbose|
100
+ Chimps::COMMAND_LINE_OPTIONS[:verbose] = verbose
92
101
  end
93
102
 
94
- on("-i", "--identity-file PATH", "Use the given YAML identify file to authenticate with Infochimps instead of the default (~/.chimps) ") do |i|
95
- Chimps::CONFIG[:identity_file] = File.expand_path(i)
103
+ on("-i", "--identity-file PATH", "Use the given YAML identify file to authenticate with Infochimps instead of the default (~/.chimps) ") do |path|
104
+ Chimps::COMMAND_LINE_OPTIONS[:identity_file] = File.expand_path(path)
105
+ end
106
+
107
+ on("-l", "--log-file PATH", "Use the given path to log Chimps output (`-' is interpreted as $stdout).") do |path|
108
+ Chimps::COMMAND_LINE_OPTIONS[:log_file] = path # don't expand_path as it might be a `-'
96
109
  end
97
110
  end
98
111
 
@@ -57,9 +57,9 @@ for any of the commands above.
57
57
 
58
58
  = Setup
59
59
 
60
- Once you have obtained an API key and secret from Infochimps, place them
61
- in a file Chimps::CONFIG[:identity_file] in your home directory with the
62
- following format
60
+ Once you have obtained an API key and secret from Infochimps, place
61
+ them in a file Chimps::CONFIG[:identity_file] in your home directory
62
+ with the following format
63
63
 
64
64
  ---
65
65
  # API credentials for use on the main Infochimps site
data/lib/chimps/config.rb CHANGED
@@ -1,5 +1,12 @@
1
1
  module Chimps
2
2
 
3
+ # Options that can be overriden by the command-line.
4
+ COMMAND_LINE_OPTIONS = {
5
+ :identity_file => File.expand_path(ENV["CHIMPS_RC"] || "~/.chimps"),
6
+ # log_file -- will be specified on command line
7
+ # verbose -- will be specified on command line
8
+ }
9
+
3
10
  # Default configuration for Chimps. User-specific configuration
4
11
  # lives in a YAML file <tt>~/.chimps</tt>.
5
12
  CONFIG = {
@@ -9,14 +16,12 @@ module Chimps
9
16
  :site => {
10
17
  :host => ENV["CHIMPS_HOST"] || 'http://infochimps.org'
11
18
  },
12
- :identity_file => File.expand_path(ENV["CHIMPS_RC"] || "~/.chimps"),
13
- :verbose => nil,
14
19
  :timestamp_format => "%Y-%m-%d_%H-%M-%S"
15
20
  }
16
21
 
17
22
  # Is Chimps in verbose mode?
18
23
  #
19
- # @return [true, false]
24
+ # @return [true, false, nil]
20
25
  def self.verbose?
21
26
  CONFIG[:verbose]
22
27
  end
@@ -42,9 +47,9 @@ module Chimps
42
47
  # file.
43
48
  def self.load
44
49
  # FIXME this is a terrible hack...and it only goes to 2 deep!
45
- if File.exist?(CONFIG[:identity_file])
50
+ if File.exist?(COMMAND_LINE_OPTIONS[:identity_file])
46
51
  require 'yaml'
47
- YAML.load_file(CONFIG[:identity_file]).each_pair do |key, value|
52
+ YAML.load_file(COMMAND_LINE_OPTIONS[:identity_file]).each_pair do |key, value|
48
53
  if value.is_a?(Hash) && CONFIG.include?(key)
49
54
  CONFIG[key].merge!(value)
50
55
  else
@@ -103,7 +103,7 @@ module Chimps
103
103
  # @return [Chimps::Response]
104
104
  def get options={}
105
105
  handle_exceptions do
106
- puts "GET #{url}" if Chimps.verbose?
106
+ Chimps.log.info("GET #{url}")
107
107
  Response.new(super(DEFAULT_HEADERS.merge(options)))
108
108
  end
109
109
  end
@@ -117,7 +117,7 @@ module Chimps
117
117
  # @return [Chimps::Response]
118
118
  def post options={}
119
119
  handle_exceptions do
120
- puts "POST #{url}" if Chimps.verbose?
120
+ Chimps.log.info("POST #{url}")
121
121
  Response.new(super(data_text, DEFAULT_HEADERS.merge(options)))
122
122
  end
123
123
  end
@@ -131,7 +131,7 @@ module Chimps
131
131
  # @return [Chimps::Response]
132
132
  def put options={}
133
133
  handle_exceptions do
134
- puts "PUT #{url}" if Chimps.verbose?
134
+ Chimps.log.info("PUT #{url}")
135
135
  Response.new(super(data_text, DEFAULT_HEADERS.merge(options)))
136
136
  end
137
137
  end
@@ -146,7 +146,7 @@ module Chimps
146
146
  # @return [Chimps::Response]
147
147
  def delete options={}
148
148
  handle_exceptions do
149
- puts "DELETE #{url}" if Chimps.verbose?
149
+ Chimps.log.info("DELETE #{url}")
150
150
  Response.new(super(DEFAULT_HEADERS.merge(options)))
151
151
  end
152
152
  end
@@ -175,20 +175,10 @@ module Chimps
175
175
  query_params[:api_key] = Chimps::CONFIG[:site][:key]
176
176
  end
177
177
 
178
- # Return the sorted keys of the query params.
179
- #
180
- # @return [Array]
181
- def alphabetical_params
182
- query_params.keys.map(&:to_s).sort
183
- end
184
-
185
178
  # Return an unsigned query string for this request.
186
179
  #
187
- # Query parameters will be used in alphabetical order.
188
- #
189
180
  # @return [String]
190
181
  def unsigned_query_string
191
- # alphabetical_params.map { |key| "#{CGI::escape(key.to_s)}=#{CGI::escape(query_params[key.to_sym].to_s)}" }.join("&") # doesn't flatten nested hashes properly
192
182
  RestClient::Payload.generate(query_params)
193
183
  end
194
184
 
@@ -200,8 +190,7 @@ module Chimps
200
190
  #
201
191
  # @return [String]
202
192
  def unsigned_query_string_stripped
203
- require 'cgi'
204
- @query_params_text ||= alphabetical_params.map { |key| CGI::escape(key.to_s) + CGI::escape(query_params[key.to_sym].to_s) }.join('')
193
+ @query_params_text ||= obj_to_stripped_string(query_params)
205
194
  end
206
195
 
207
196
  # Return the data of this request as a string.
@@ -238,6 +227,18 @@ module Chimps
238
227
  "#{unsigned_query_string}&signature=#{signature}"
239
228
  end
240
229
 
230
+ # Turn +obj+ into a string, sorting on internal keys.
231
+ #
232
+ # @param [Hash, Array, String] obj
233
+ # @return [String]
234
+ def obj_to_stripped_string obj
235
+ case obj
236
+ when Hash then obj.keys.map(&:to_s).sort.map { |key| [key.to_s.downcase, obj_to_stripped_string(obj[key.to_sym])].join('') }.join('')
237
+ when Array then obj.map { |e| obj_to_stripped_string(e) }.join('')
238
+ else obj.to_s
239
+ end
240
+ end
241
+
241
242
  end
242
243
 
243
244
  # A class to encapsulate requests made against the Infochimps paid
@@ -0,0 +1,50 @@
1
+ module Chimps
2
+
3
+ # The Chimps logger. Set via Chimps::CONFIG[:log_file] and defaults
4
+ # to $stdout.
5
+ #
6
+ # @return [Logger]
7
+ def self.log
8
+ @log ||= Log.new_logger
9
+ end
10
+
11
+ # Set the Chimps logger.
12
+ #
13
+ # @param [Logger] new_log
14
+ def self.log= new_log
15
+ @log = new_log
16
+ end
17
+
18
+ # Module for initializing the Chimps logger from configuration
19
+ # settings.
20
+ module Log
21
+
22
+ # Initialize a new Logger instance with the log level set by
23
+ # Chimps.verbose?
24
+ #
25
+ # @return [Logger]
26
+ def self.new_logger
27
+ require 'logger'
28
+ returning(Logger.new(log_file)) do |log|
29
+ log.progname = "Chimps"
30
+ log.level = Chimps.verbose? ? Logger::INFO : Logger::WARN
31
+ end
32
+ end
33
+
34
+ protected
35
+ # Return either the path to the log file in
36
+ # Chimps::CONFIG[:log_file] or $stdout if the path is blank or
37
+ # equal to `-'.
38
+ #
39
+ # @return [String, $stdout] the path to the log or $stdout
40
+ def self.log_file
41
+ if Chimps::CONFIG[:log_file]
42
+ Chimps::CONFIG[:log_file].strip == '-' ? $stdout : Chimps::CONFIG[:log_file]
43
+ else
44
+ $stdout
45
+ end
46
+ end
47
+ end
48
+ end
49
+
50
+
data/lib/chimps/utils.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'chimps/config'
2
2
  require 'chimps/utils/extensions'
3
3
  require 'chimps/utils/error'
4
+ require 'chimps/utils/log'
4
5
 
5
6
  module Chimps
6
7
  module Utils
@@ -8,4 +9,5 @@ module Chimps
8
9
  autoload :UsesModel, 'chimps/utils/uses_model'
9
10
  autoload :UsesYamlData, 'chimps/utils/uses_yaml_data'
10
11
  end
12
+
11
13
  end
@@ -47,8 +47,7 @@ module Chimps
47
47
  # @option options [String, IMW::Resource] archive the path to the archive to create (defaults to IMW::Workflows::Downloader#default_archive_path)
48
48
  # @option options [String] fmt the data format to annotate the upload with
49
49
  def initialize options={}
50
- require 'imw'
51
- IMW.verbose = Chimps.verbose?
50
+ require_imw
52
51
  @dataset = options[:dataset] or raise PackagingError.new("Must provide the ID or handle of a dataset to upload data to.")
53
52
  self.local_paths = options[:local_paths] # must come before self.archive=
54
53
  self.archive = options[:archive]
@@ -105,24 +104,43 @@ module Chimps
105
104
  @archive = potential_package
106
105
  end
107
106
 
107
+ # Return the summarizer responsible for summarizing data on this
108
+ # upload.
109
+ #
110
+ # @return [IMW::Tools::Summarizer]
111
+ def summarizer
112
+ @summarizer ||= IMW::Tools::Summarizer.new(local_paths)
113
+ end
114
+
108
115
  # Set the data format to annotate the upload with.
109
116
  #
110
117
  # If not provided, Chimps will use the Infinite Monkeywrench
111
118
  # (IMW) to try and guess the data format. See
112
119
  # IMW::Tools::Summarizer for more information.
113
120
  def fmt= new_fmt=nil
114
- @fmt ||= new_fmt || IMW::Tools::Summarizer.new(local_paths).most_common_data_format
121
+ @fmt ||= new_fmt || summarizer.most_common_data_format
115
122
  end
116
123
 
117
124
  # The default path to the archive that will be built.
118
125
  #
119
- # Defaults to a ZIP file in the current directory named after
120
- # the +dataset+'s ID or handle and the current time.
126
+ # Defaults to a file in the current directory named after the
127
+ # +dataset+'s ID or handle and the current time. The package
128
+ # format (<tt>.zip</tt> or <tt>.tar.bz2</tt>) is determined by
129
+ # size, see
130
+ # Chimps::Workflows::Uploader#default_archive_extension.
121
131
  #
122
132
  # @return [String]
123
133
  def default_archive_path
124
134
  # in current working directory...
125
- "chimps_#{dataset}-#{Time.now.strftime(Chimps::CONFIG[:timestamp_format])}.zip"
135
+ "chimps_#{dataset}-#{Time.now.strftime(Chimps::CONFIG[:timestamp_format])}.#{default_archive_extension}"
136
+ end
137
+
138
+ # Use <tt>zip</tt> if the data is less than 500 MB in size and
139
+ # <tt>tar.bz2</tt> otherwise.
140
+ #
141
+ # @return ['tar.bz2', 'zip']
142
+ def default_archive_extension
143
+ summarizer.total_size >= 524288000 ? 'tar.bz2' : 'zip'
126
144
  end
127
145
 
128
146
  # The URL to the <tt>README-infochimps</tt> file on Infochimps'
@@ -222,7 +240,7 @@ module Chimps
222
240
  #
223
241
  # @return [Hash]
224
242
  def package_data
225
- { :package => {:path => token['key'], :fmt => token['fmt'], :pkg_size => archive.size, :pkg_fmt => archive.extension} }
243
+ { :package => {:path => token['key'], :fmt => token['fmt'], :pkg_size => archive.size, :pkg_fmt => archive.extension, :summary => summarizer.summary, :token_timestamp => token['timestamp'] } }
226
244
  end
227
245
 
228
246
  # Make a final POST request to Infochimps, creating the final
@@ -232,6 +250,17 @@ module Chimps
232
250
  package_creation_response.print
233
251
  raise UploadError.new("Unable to notify Infochimps of newly uploaded data.") if package_creation_response.error?
234
252
  end
253
+
254
+ protected
255
+ # Require IMW and match the IMW logger to the Chimps logger.
256
+ def require_imw
257
+ begin
258
+ require 'imw'
259
+ rescue LoadError
260
+ raise Chimps::Error.new("The Infinite Monkeywrench (IMW) gem is required to upload.")
261
+ end
262
+ IMW.verbose = Chimps.verbose?
263
+ end
235
264
 
236
265
  end
237
266
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chimps
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dhruv Bansal
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-05-25 00:00:00 -05:00
12
+ date: 2010-06-06 00:00:00 -05:00
13
13
  default_executable: chimps
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -105,6 +105,7 @@ files:
105
105
  - lib/chimps/utils.rb
106
106
  - lib/chimps/utils/error.rb
107
107
  - lib/chimps/utils/extensions.rb
108
+ - lib/chimps/utils/log.rb
108
109
  - lib/chimps/utils/uses_curl.rb
109
110
  - lib/chimps/utils/uses_model.rb
110
111
  - lib/chimps/utils/uses_yaml_data.rb