chimps 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/.gitignore +17 -0
  2. data/LICENSE +674 -0
  3. data/README.rdoc +48 -0
  4. data/VERSION +1 -0
  5. data/bin/chimps +4 -0
  6. data/examples/batch.yaml +69 -0
  7. data/lib/chimps/cli.rb +102 -0
  8. data/lib/chimps/commands/base.rb +107 -0
  9. data/lib/chimps/commands/batch.rb +68 -0
  10. data/lib/chimps/commands/create.rb +33 -0
  11. data/lib/chimps/commands/destroy.rb +28 -0
  12. data/lib/chimps/commands/download.rb +76 -0
  13. data/lib/chimps/commands/help.rb +89 -0
  14. data/lib/chimps/commands/list.rb +54 -0
  15. data/lib/chimps/commands/query.rb +59 -0
  16. data/lib/chimps/commands/search.rb +59 -0
  17. data/lib/chimps/commands/show.rb +32 -0
  18. data/lib/chimps/commands/test.rb +40 -0
  19. data/lib/chimps/commands/update.rb +33 -0
  20. data/lib/chimps/commands/upload.rb +63 -0
  21. data/lib/chimps/commands.rb +46 -0
  22. data/lib/chimps/config.rb +57 -0
  23. data/lib/chimps/request.rb +302 -0
  24. data/lib/chimps/response.rb +146 -0
  25. data/lib/chimps/typewriter.rb +326 -0
  26. data/lib/chimps/utils/error.rb +40 -0
  27. data/lib/chimps/utils/extensions.rb +109 -0
  28. data/lib/chimps/utils/uses_curl.rb +26 -0
  29. data/lib/chimps/utils/uses_model.rb +51 -0
  30. data/lib/chimps/utils/uses_yaml_data.rb +94 -0
  31. data/lib/chimps/utils.rb +11 -0
  32. data/lib/chimps/workflows/batch.rb +127 -0
  33. data/lib/chimps/workflows/downloader.rb +102 -0
  34. data/lib/chimps/workflows/uploader.rb +238 -0
  35. data/lib/chimps/workflows.rb +11 -0
  36. data/lib/chimps.rb +22 -0
  37. data/spec/chimps/cli_spec.rb +22 -0
  38. data/spec/chimps/commands/base_spec.rb +25 -0
  39. data/spec/chimps/commands/list_spec.rb +25 -0
  40. data/spec/chimps/response_spec.rb +8 -0
  41. data/spec/chimps/typewriter_spec.rb +114 -0
  42. data/spec/spec_helper.rb +17 -0
  43. data/spec/support/custom_matchers.rb +6 -0
  44. metadata +133 -0
data/README.rdoc ADDED
@@ -0,0 +1,48 @@
1
+ = Searching
2
+
3
+ Search datasets
4
+
5
+ chimps search statisical abstract
6
+
7
+ Search sources
8
+
9
+ chimps search -m source department of justice
10
+
11
+ Search datasets with particular tags
12
+
13
+ chimps search -t government,finance statistical abstract
14
+
15
+ or categories
16
+
17
+ chimps search -c education statistical abstract
18
+
19
+ = Browsing
20
+
21
+ chimps describe dataset 3923
22
+ chimps describe source us-doj
23
+ chimps describe field length
24
+
25
+ = Downloading
26
+
27
+ chimps download 39283
28
+
29
+ = Creating
30
+
31
+ chimps create data.yaml
32
+
33
+ also
34
+
35
+ chimps schema source
36
+ chimps schema dataset
37
+
38
+ and of course
39
+
40
+ chimps upload 39283 path/to/my/data
41
+
42
+ = General Options
43
+
44
+ Work as someone other than the usual user
45
+
46
+ chimps -i path/to/my/identify_file.yml create data.yaml
47
+
48
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/bin/chimps ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require "chimps"
3
+ require 'chimps/cli'
4
+ Chimps::CLI.execute!(ARGV.dup) if $0 == __FILE__
@@ -0,0 +1,69 @@
1
+ ---
2
+
3
+ # This is an example of the expected structure of a file used in a
4
+ # bulk update.
5
+ #
6
+ # It consists of one large array itself consisting of mappings from a
7
+ # resource name (dataset, license, source) to properties about that
8
+ # resource.
9
+ #
10
+ # Many such documents can be combined together in a YAML stream and
11
+ # sent as a single request.
12
+
13
+ - source:
14
+ id: ~
15
+ request_id: foobar
16
+ title: Foobar the Fake Source
17
+ description: |
18
+
19
+ Foobar generates great data! Unfortunately, it's all of it
20
+ fake!
21
+
22
+ - dataset:
23
+ # this dataset lacks an ID property so it will be created
24
+ title: expatriatoro
25
+ description: |
26
+ Just some description of my dataset
27
+
28
+ tag_list:
29
+ - foo
30
+ - bar
31
+ - baz
32
+
33
+ sources:
34
+ # must all exist
35
+ - Census Bureau
36
+ - foobar
37
+
38
+ fields:
39
+ - title: length
40
+ units: m
41
+
42
+ - title: mass
43
+ units: kg
44
+ local_paths:
45
+ - /tmp/waste/waste.c
46
+
47
+ - dataset:
48
+ id: 1
49
+ title: f
50
+ description: |
51
+ Just some description of my dataset
52
+
53
+ tag_list:
54
+ - foo
55
+ - bar
56
+ - baz
57
+
58
+ sources:
59
+ # must all exist
60
+ - Census Bureau
61
+ - foobar
62
+
63
+ snippet:
64
+ columns: foo, bar,baz
65
+ data:
66
+ - [1,2,3]
67
+ - [4,5,6]
68
+ - [7,8,9]
69
+
data/lib/chimps/cli.rb ADDED
@@ -0,0 +1,102 @@
1
+ module Chimps
2
+
3
+ # Defines methods for choosing which Chimps::Command class should be
4
+ # instantiated from the ARGV passed in on the command line.
5
+ module CLI
6
+
7
+ # Execute the Chimps command specified by +argv+.
8
+ #
9
+ # @param [Array<String>] argv
10
+ def self.execute! argv
11
+ begin
12
+ Runner.new(argv).execute!
13
+ rescue Chimps::Error => e
14
+ puts e.message
15
+ exit 1
16
+ rescue => e
17
+ $stderr.puts("#{e.message} (#{e.class})")
18
+ $stderr.puts(e.backtrace.join("\n"))
19
+ exit 1
20
+ end
21
+ end
22
+
23
+ # Defines methods to parse the original ARGV and from it choose
24
+ # and instantiate the appropriate Chimps::Command subclass with
25
+ # the appropriate arguments.
26
+ class Runner
27
+ include Chimps::Commands
28
+
29
+ # The original ARGV passed in by the user.
30
+ attr_reader :argv
31
+
32
+ # Create a new Chimps::CLI::Runner from +argv+.
33
+ #
34
+ # @param [Array<String>] argv
35
+ # @return [Chimps::CLI::Runner]
36
+ def initialize argv
37
+ @argv = argv
38
+ end
39
+
40
+ # Execute this Runner's chosen and initialized command.
41
+ def execute!
42
+ command.execute!
43
+ end
44
+
45
+ # The chosen and initialized command for this Runner.
46
+ #
47
+ # @return [Chimps::Command]
48
+ def command
49
+ @command ||= construct(command_name, argv_for_command)
50
+ end
51
+
52
+ protected
53
+
54
+ # Return the index in ARGV of the command name to run.
55
+ #
56
+ # It may not always be the first element of ARGV because
57
+ #
58
+ # chimps show my-dataset
59
+ # chimps -v show my-dataset
60
+ # chimps show -v my-dataset
61
+ #
62
+ # should all have the same behavior.
63
+ #
64
+ # @return [Integer] the index in ARGV of the command name.
65
+ def command_index
66
+ return @command_index if @command_index
67
+ argv.each_with_index do |arg, index|
68
+ if command_name?(arg)
69
+ @command_index = index
70
+ break
71
+ end
72
+ end
73
+ @command_index or raise CLIError.new("Must specify a command. Try running `chimps help'")
74
+ end
75
+
76
+ # The name of the command to run.
77
+ #
78
+ # @return [String]
79
+ def command_name
80
+ @command_name ||= argv[command_index]
81
+ end
82
+
83
+ # The ARGV to pass to the command chosen to run.
84
+ #
85
+ # It differs from the original ARGV only in that the command's
86
+ # name has been stripped:
87
+ #
88
+ # Chimps::CLI::Runner.new('show', '-v', 'my-dataset').argv_for_command
89
+ # => ['-v','my-dataset']
90
+ #
91
+ # This does not always return "all but the first element" of
92
+ # ARGV; see Chimps::CLI::Runner#command_index for details.
93
+ def argv_for_command
94
+ returning(argv.dup) do |new_argv|
95
+ new_argv.delete_at(command_index)
96
+ end
97
+ end
98
+ end
99
+
100
+ end
101
+ end
102
+
@@ -0,0 +1,107 @@
1
+ require 'optparse'
2
+
3
+ module Chimps
4
+
5
+ # A base class from which to subclass specific commands. A subclass
6
+ # should
7
+ #
8
+ # - define class constants <tt>BANNER</tt> and <tt>HELP</tt> which
9
+ # - will display the appropriate help to the user.
10
+ #
11
+ # - add specific options by defining a method that begins with
12
+ # +define+ and ends with +options+ (i.e. - +define_output_options+
13
+ # to add options related to output).
14
+ #
15
+ # - define a method <tt>execute!</tt> which will actually run the
16
+ # command.
17
+ class Command < OptionParser
18
+
19
+ # Appears when printing help for this command, as the very first
20
+ # line. Should be one-line summary of how to use this command.
21
+ BANNER = "Define #{self}::BANNER when you subclass Chimps::Command"
22
+
23
+ # Appears when printing help for this command. Should consist of
24
+ # general help or examples of the command iteslf. Help on
25
+ # specific options is automatcally generated.
26
+ HELP = "Define #{self}::HELP when you subclass Chimps::Command"
27
+
28
+ # The (processed) ARGV for this command.
29
+ attr_reader :argv
30
+
31
+ # Create a new command. Will define options specific to
32
+ # subclases, parse the given +argv+, and load the global Chimps
33
+ # configuration. Will _not_ execute the command.
34
+ #
35
+ # @param [Array<String>] argv
36
+ # @return [Chimps::Command]
37
+ def initialize argv
38
+ super self.class::BANNER
39
+ @argv = argv
40
+ run_options_definers
41
+ parse_command_line!
42
+ Chimps::Config.load
43
+ end
44
+
45
+ # The name of this command, including the
46
+ # <tt>Chimps::Commands</tt> prefix.
47
+ #
48
+ # @return [String]
49
+ def self.name
50
+ self.to_s.downcase
51
+ end
52
+
53
+ # The name of this command, excluding the
54
+ # <tt>Chimps::Commands</tt> prefix.
55
+ #
56
+ # @return [String]
57
+ def name
58
+ self.class.name.split('::').last
59
+ end
60
+
61
+ protected
62
+
63
+ # Parse the command line.
64
+ def parse_command_line!
65
+ begin
66
+ parse!(argv)
67
+ rescue OptionParser::InvalidOption => e
68
+ raise CLIError.new("#{e.message}. Try `chimps help #{name}'")
69
+ end
70
+ end
71
+
72
+ # Run all methods beginning with +define+ and ending with +option+
73
+ # or +options+.
74
+ #
75
+ # This is (hackish) mechanism for subclasses of Chimps::Command to
76
+ # define their own specific options.
77
+ def run_options_definers
78
+ # FIXME there's a better way to do this...
79
+ methods.grep(/^define.+options?$/).each { |method| send method }
80
+ end
81
+
82
+ # Define options common to all Chimps' commands. The two only two
83
+ # such options at the moment are <tt>-v</tt> (or
84
+ # <tt>--[no-]verbose</tt>) for verbosity, and <tt>-i</tt> (or
85
+ # <tt>--identity-file</tt>) for setting the identify file to use.
86
+ def define_common_options
87
+ separator self.class::HELP
88
+ separator "\nOptions include:"
89
+
90
+ on("-v", "--[no-]verbose", "Be verbose, or not.") do |v|
91
+ Chimps::CONFIG[:verbose] = v
92
+ end
93
+
94
+ on("-i", "--identity-file PATH", "Use the given YAML identify file to authenticate with Infochimps instead of the default (~/.chimps) ") do |i|
95
+ Chimps::CONFIG[:identity_file] = File.expand_path(i)
96
+ end
97
+ end
98
+
99
+ # Run this command.
100
+ #
101
+ # Will raise a NotImplementedError for Chimps::Command itself --
102
+ # subclasses are expected to redefine this method.
103
+ def execute!
104
+ raise NotImplementedError.new("Redefine the `execute!' method in a subclass of #{self.class}.")
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,68 @@
1
+ module Chimps
2
+ module Commands
3
+
4
+ # A command for performing batch updates.
5
+ class Batch < Chimps::Command
6
+
7
+ BANNER = "usage: chimps batch [OPTIONS] [INPUT_PATH] ..."
8
+ HELP = <<EOF
9
+
10
+ Perform a batch operation on Infochimps by reading YAML input files.
11
+
12
+ The input files should collectively define an array of resources to
13
+ make create or update requests on. Each request in the array is
14
+ treated separately (even though the entire array is processed as one
15
+ POST request) and so it is possible that some will succeed and others
16
+ fail.
17
+
18
+ It is also possible to upload data in this batch process. Each
19
+ (successful) request which defined a 'local_paths' property in the
20
+ original input files will have the data at these paths uploaded to
21
+ Infochimps. These uploads will proceed one at a time following the
22
+ initial batch POST request.
23
+
24
+ The format of the YAML input files is given at
25
+
26
+ http://infochimps.org/api
27
+ EOF
28
+
29
+ # A path to store the intermediate batch response. Useful for
30
+ # debugging.
31
+ attr_accessor :output_path
32
+
33
+ # The data format to annotate the upload with.
34
+ #
35
+ # Chimps will try to guess if this isn't given.
36
+ attr_reader :fmt
37
+
38
+ # Whether to continue to upload even if some of the resources
39
+ # had errors on update/create.
40
+ attr_accessor :upload_even_if_errors
41
+
42
+ include Chimps::Utils::UsesYamlData
43
+
44
+ def define_options
45
+ on_tail("-o", "--output PATH", "Store the response from the server at PATH") do |o|
46
+ @output_path = File.expand_path(o)
47
+ end
48
+
49
+ on_tail("-e", "--force", "Attempt to upload data even when there were errors in the batch update request") do |e|
50
+ @upload_even_if_errors = e
51
+ end
52
+
53
+ on_tail("-f", "--format FORMAT", "Data format to annotate EACH upload with. Tries to guess if not given.") do |f|
54
+ @fmt = f
55
+ end
56
+
57
+ end
58
+
59
+ # Perform the batch update and upload.
60
+ def execute!
61
+ ensure_data_is_present!
62
+ Chimps::Workflows::BatchUpdater.new(data, :output_path => output_path, :upload_even_if_errors => upload_even_if_errors, :fmt => fmt).execute!
63
+ end
64
+
65
+ end
66
+ end
67
+ end
68
+
@@ -0,0 +1,33 @@
1
+ module Chimps
2
+ module Commands
3
+
4
+ # A command to issue a POST requst to create a resource at
5
+ # Infochimps.
6
+ class Create < Chimps::Command
7
+
8
+ BANNER = "usage: chimps create [OPTIONS] [PROP=VALUE] ..."
9
+ HELP = <<EOF
10
+
11
+ Create a single resource (defaults to a dataset) using the properties
12
+ and values supplied.
13
+
14
+ Properties and values can be supplied directly on the command line,
15
+ from an input YAML file, or multiple YAML documents streamed in via
16
+ STDIN, in order of decreasing precedence.
17
+ EOF
18
+
19
+ # Models this command applies to (default first)
20
+ MODELS = %w[dataset source license]
21
+ include Chimps::Utils::UsesModel
22
+ include Chimps::Utils::UsesYamlData
23
+
24
+ # Issue the POST request.
25
+ def execute!
26
+ ensure_data_is_present!
27
+ Request.new(models_path, :data => {model.to_sym => data } , :authenticate => true).post.print
28
+ end
29
+
30
+ end
31
+ end
32
+ end
33
+
@@ -0,0 +1,28 @@
1
+ module Chimps
2
+ module Commands
3
+
4
+ # A command to issue a DELETE request against a resource at
5
+ # Infochimps.
6
+ class Destroy < Chimps::Command
7
+
8
+ BANNER = "usage: chimps destroy [OPTIONS] ID_OR_HANDLE"
9
+ HELP = <<EOF
10
+
11
+ Destroys a resource of a given type (defaults to dataset) identified
12
+ by ID_OR_HANDLE.
13
+
14
+ EOF
15
+
16
+ # Models this command applies to (default first)
17
+ MODELS = %w[dataset package source license]
18
+ include Chimps::Utils::UsesModel
19
+
20
+ # Issue the DELETE request.
21
+ def execute!
22
+ Request.new(model_path, :authenticate => true).delete.print
23
+ end
24
+
25
+ end
26
+ end
27
+ end
28
+
@@ -0,0 +1,76 @@
1
+ module Chimps
2
+ module Commands
3
+
4
+ # A command to download data from Infochimps.
5
+ class Download < Chimps::Command
6
+
7
+ BANNER = "usage: chimps download [OPTIONS] ID_OR_HANDLE"
8
+ HELP = <<EOF
9
+
10
+ Download a dataset identified by the given ID_OR_HANDLE to the current
11
+ directory (you can also specify a particular path).
12
+
13
+ If the dataset isn't freely downloadable, you'll have to have
14
+ purchased it first via the Web.
15
+ EOF
16
+
17
+ # The chosen data format.
18
+ attr_reader :fmt
19
+
20
+ # The chosen package format.
21
+ attr_reader :pkg_fmt
22
+
23
+ # The local path to download the data to.
24
+ attr_reader :local_path
25
+
26
+ # Set the format for the download token.
27
+ #
28
+ # Will try to normalize the input somewhat (downcasing,
29
+ # stripping leading periods)
30
+ #
31
+ # @param [String] new_fmt
32
+ def fmt= new_fmt
33
+ @fmt = new_fmt.downcase.strip.gsub(/^\./, '')
34
+ end
35
+
36
+ # Set the package format for the download token.
37
+ #
38
+ # Will try to normalize the input somewhat (downcasing,
39
+ # stripping leading periods)
40
+ #
41
+ # @param [String] new_pkg_fmt
42
+ def pkg_fmt= new_pkg_fmt
43
+ @pkg_fmt = new_pkg_fmt.downcase.strip.gsub(/^\./, '')
44
+ end
45
+
46
+ # The ID of the package to download.
47
+ def dataset
48
+ raise CLIError.new("Must provide an ID or handle of a dataset to download.") if argv.first.blank?
49
+ argv.first
50
+ end
51
+
52
+ # Issue the request for the token and the request for the
53
+ # download.
54
+ def execute!
55
+ Chimps::Workflows::Downloader.new(:dataset => dataset, :fmt => fmt, :pkg_fmt => pkg_fmt, :local_path => local_path).execute!
56
+ end
57
+
58
+ def define_options
59
+ on_tail("-o", "--output PATH", "Path to download file to") do |o|
60
+ @local_path = File.expand_path(o)
61
+ end
62
+
63
+ on_tail("-f", "--format FORMAT", "Choose a particular data format (csv, tsv, excel, &c.)") do |f|
64
+ self.fmt = f
65
+ end
66
+
67
+ on_tail("-p", "--package PACKAGE", "Choose a particular package type (zip or tar.bz2)") do |p|
68
+ self.pkg_fmt = p
69
+ end
70
+
71
+ end
72
+
73
+ end
74
+ end
75
+ end
76
+
@@ -0,0 +1,89 @@
1
+ module Chimps
2
+ module Commands
3
+ class Help < Chimps::Command
4
+
5
+ BANNER = "usage: chimps help [COMMAND]"
6
+ HELP = <<EOF
7
+
8
+ This is the Infochimps command-line client. You can use it to search,
9
+ browse, create, edit, or delete data and metadata in the Infochimps
10
+ repository at http://infochimps.org.
11
+
12
+ Before you can create, edit, or delete anything you'll need to get an
13
+ Infochimps account and sign up for an API key:
14
+
15
+ http://infochimps.org/signup
16
+
17
+ But you can still browse, search, and download (free) data
18
+ immediately.
19
+
20
+ Learn more about the Infochimps API which powers this tool at
21
+
22
+ http://infochimps.org/api
23
+
24
+ = Commands
25
+
26
+ chimps is a wrapper over the RESTful Infochimps API. It exposes the
27
+ following actions
28
+
29
+ chimps list
30
+ chimps show
31
+ chimps create
32
+ chimps update
33
+ chimps destroy
34
+
35
+ for datasets (as well as other selected resources). It also helps
36
+ automate the workflow of uploading and downloading data and making
37
+ batch changes with
38
+
39
+ chimps upload
40
+ chimps download
41
+ chimps batch
42
+
43
+ You can also make queries against the Infochimps paid query API with
44
+
45
+ chimps query
46
+
47
+ Finally, you can test that your system is configured properly and that
48
+ you can authenticate with Infochimps with
49
+
50
+ chimps test
51
+
52
+ If you're confused try running
53
+
54
+ chimps help COMMAND
55
+
56
+ for any of the commands above.
57
+
58
+ = Setup
59
+
60
+ Once you have obtained an API key and secret from Infochimps, place them
61
+ in a file Chimps::CONFIG[:identity_file] in your home directory with the
62
+ following format
63
+
64
+ ---
65
+ # API credentials for use on the main Infochimps site
66
+ :site:
67
+ :username: your_site_name
68
+ :key: oreeph6giedaeL3
69
+ :secret: Queechei6cu8chiuyiig8cheg5Ahx0boolaizi1ohtarooFu1doo5ohj5ohp9eehae5hakoongahghohgoi7yeihohx1eidaeng0eaveefohchoh6WeeV1EM
70
+
71
+ # API credentials for use on the Infochimps paid query API
72
+ :query:
73
+ :username: your_query_name
74
+ :key: zei7eeloShoah3Ce
75
+ :secret: eixairaichaxaaRe8eeya5moh8Uthahf0pi4eig7SoirohPhei6sai8aereu0yuepiefeipoozoegahchaeheedee8uphohoo9moongae8Fa0aih4BooSeiM
76
+ EOF
77
+
78
+ def execute!
79
+ if argv.first.blank?
80
+ puts self
81
+ else
82
+ puts Chimps::Commands.construct(argv.first, [])
83
+ end
84
+ end
85
+
86
+ end
87
+ end
88
+ end
89
+
@@ -0,0 +1,54 @@
1
+ module Chimps
2
+ module Commands
3
+
4
+ # A command to issue a GET request against an index of resources
5
+ # at Infochimps.
6
+ class List < Chimps::Command
7
+
8
+ BANNER = "chimps list [OPTIONS]"
9
+ HELP = <<EOF
10
+
11
+ List resources of a given type (defaults to dataset).
12
+
13
+ Lists your resources by default but see options below.
14
+
15
+ EOF
16
+
17
+ # Models that can be indexed (default first)
18
+ MODELS = %w[dataset license source]
19
+ include Chimps::Utils::UsesModel
20
+
21
+ def define_options
22
+ on_tail("-a", "--all", "List all resources, not just those owned by you.") do |a|
23
+ @all = a
24
+ end
25
+
26
+ on_tail("-s", "--[no-]skip-column-names", "Don't print column names in output.") do |s|
27
+ @skip_column_names = s
28
+ end
29
+
30
+ end
31
+
32
+ # List all resources or just those owned by the Chimps user?
33
+ def all?
34
+ @all
35
+ end
36
+
37
+ # Parameters to include in the query.
38
+ #
39
+ # If listing all resources, then return +nil+.
40
+ #
41
+ # @return [Hash, nil]
42
+ def params
43
+ return { :id => Chimps.username } unless all?
44
+ end
45
+
46
+ # Issue the GET request.
47
+ def execute!
48
+ Request.new(models_path, :params => params).get.print(:skip_column_names => @skip_column_names)
49
+ end
50
+
51
+ end
52
+ end
53
+ end
54
+