chimps 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/.gitignore +17 -0
  2. data/LICENSE +674 -0
  3. data/README.rdoc +48 -0
  4. data/VERSION +1 -0
  5. data/bin/chimps +4 -0
  6. data/examples/batch.yaml +69 -0
  7. data/lib/chimps/cli.rb +102 -0
  8. data/lib/chimps/commands/base.rb +107 -0
  9. data/lib/chimps/commands/batch.rb +68 -0
  10. data/lib/chimps/commands/create.rb +33 -0
  11. data/lib/chimps/commands/destroy.rb +28 -0
  12. data/lib/chimps/commands/download.rb +76 -0
  13. data/lib/chimps/commands/help.rb +89 -0
  14. data/lib/chimps/commands/list.rb +54 -0
  15. data/lib/chimps/commands/query.rb +59 -0
  16. data/lib/chimps/commands/search.rb +59 -0
  17. data/lib/chimps/commands/show.rb +32 -0
  18. data/lib/chimps/commands/test.rb +40 -0
  19. data/lib/chimps/commands/update.rb +33 -0
  20. data/lib/chimps/commands/upload.rb +63 -0
  21. data/lib/chimps/commands.rb +46 -0
  22. data/lib/chimps/config.rb +57 -0
  23. data/lib/chimps/request.rb +302 -0
  24. data/lib/chimps/response.rb +146 -0
  25. data/lib/chimps/typewriter.rb +326 -0
  26. data/lib/chimps/utils/error.rb +40 -0
  27. data/lib/chimps/utils/extensions.rb +109 -0
  28. data/lib/chimps/utils/uses_curl.rb +26 -0
  29. data/lib/chimps/utils/uses_model.rb +51 -0
  30. data/lib/chimps/utils/uses_yaml_data.rb +94 -0
  31. data/lib/chimps/utils.rb +11 -0
  32. data/lib/chimps/workflows/batch.rb +127 -0
  33. data/lib/chimps/workflows/downloader.rb +102 -0
  34. data/lib/chimps/workflows/uploader.rb +238 -0
  35. data/lib/chimps/workflows.rb +11 -0
  36. data/lib/chimps.rb +22 -0
  37. data/spec/chimps/cli_spec.rb +22 -0
  38. data/spec/chimps/commands/base_spec.rb +25 -0
  39. data/spec/chimps/commands/list_spec.rb +25 -0
  40. data/spec/chimps/response_spec.rb +8 -0
  41. data/spec/chimps/typewriter_spec.rb +114 -0
  42. data/spec/spec_helper.rb +17 -0
  43. data/spec/support/custom_matchers.rb +6 -0
  44. metadata +133 -0
data/README.rdoc ADDED
@@ -0,0 +1,48 @@
1
+ = Searching
2
+
3
+ Search datasets
4
+
5
+ chimps search statisical abstract
6
+
7
+ Search sources
8
+
9
+ chimps search -m source department of justice
10
+
11
+ Search datasets with particular tags
12
+
13
+ chimps search -t government,finance statistical abstract
14
+
15
+ or categories
16
+
17
+ chimps search -c education statistical abstract
18
+
19
+ = Browsing
20
+
21
+ chimps describe dataset 3923
22
+ chimps describe source us-doj
23
+ chimps describe field length
24
+
25
+ = Downloading
26
+
27
+ chimps download 39283
28
+
29
+ = Creating
30
+
31
+ chimps create data.yaml
32
+
33
+ also
34
+
35
+ chimps schema source
36
+ chimps schema dataset
37
+
38
+ and of course
39
+
40
+ chimps upload 39283 path/to/my/data
41
+
42
+ = General Options
43
+
44
+ Work as someone other than the usual user
45
+
46
+ chimps -i path/to/my/identify_file.yml create data.yaml
47
+
48
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/bin/chimps ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require "chimps"
3
+ require 'chimps/cli'
4
+ Chimps::CLI.execute!(ARGV.dup) if $0 == __FILE__
@@ -0,0 +1,69 @@
1
+ ---
2
+
3
+ # This is an example of the expected structure of a file used in a
4
+ # bulk update.
5
+ #
6
+ # It consists of one large array itself consisting of mappings from a
7
+ # resource name (dataset, license, source) to properties about that
8
+ # resource.
9
+ #
10
+ # Many such documents can be combined together in a YAML stream and
11
+ # sent as a single request.
12
+
13
+ - source:
14
+ id: ~
15
+ request_id: foobar
16
+ title: Foobar the Fake Source
17
+ description: |
18
+
19
+ Foobar generates great data! Unfortunately, it's all of it
20
+ fake!
21
+
22
+ - dataset:
23
+ # this dataset lacks an ID property so it will be created
24
+ title: expatriatoro
25
+ description: |
26
+ Just some description of my dataset
27
+
28
+ tag_list:
29
+ - foo
30
+ - bar
31
+ - baz
32
+
33
+ sources:
34
+ # must all exist
35
+ - Census Bureau
36
+ - foobar
37
+
38
+ fields:
39
+ - title: length
40
+ units: m
41
+
42
+ - title: mass
43
+ units: kg
44
+ local_paths:
45
+ - /tmp/waste/waste.c
46
+
47
+ - dataset:
48
+ id: 1
49
+ title: f
50
+ description: |
51
+ Just some description of my dataset
52
+
53
+ tag_list:
54
+ - foo
55
+ - bar
56
+ - baz
57
+
58
+ sources:
59
+ # must all exist
60
+ - Census Bureau
61
+ - foobar
62
+
63
+ snippet:
64
+ columns: foo, bar,baz
65
+ data:
66
+ - [1,2,3]
67
+ - [4,5,6]
68
+ - [7,8,9]
69
+
data/lib/chimps/cli.rb ADDED
@@ -0,0 +1,102 @@
1
+ module Chimps
2
+
3
+ # Defines methods for choosing which Chimps::Command class should be
4
+ # instantiated from the ARGV passed in on the command line.
5
+ module CLI
6
+
7
+ # Execute the Chimps command specified by +argv+.
8
+ #
9
+ # @param [Array<String>] argv
10
+ def self.execute! argv
11
+ begin
12
+ Runner.new(argv).execute!
13
+ rescue Chimps::Error => e
14
+ puts e.message
15
+ exit 1
16
+ rescue => e
17
+ $stderr.puts("#{e.message} (#{e.class})")
18
+ $stderr.puts(e.backtrace.join("\n"))
19
+ exit 1
20
+ end
21
+ end
22
+
23
+ # Defines methods to parse the original ARGV and from it choose
24
+ # and instantiate the appropriate Chimps::Command subclass with
25
+ # the appropriate arguments.
26
+ class Runner
27
+ include Chimps::Commands
28
+
29
+ # The original ARGV passed in by the user.
30
+ attr_reader :argv
31
+
32
+ # Create a new Chimps::CLI::Runner from +argv+.
33
+ #
34
+ # @param [Array<String>] argv
35
+ # @return [Chimps::CLI::Runner]
36
+ def initialize argv
37
+ @argv = argv
38
+ end
39
+
40
+ # Execute this Runner's chosen and initialized command.
41
+ def execute!
42
+ command.execute!
43
+ end
44
+
45
+ # The chosen and initialized command for this Runner.
46
+ #
47
+ # @return [Chimps::Command]
48
+ def command
49
+ @command ||= construct(command_name, argv_for_command)
50
+ end
51
+
52
+ protected
53
+
54
+ # Return the index in ARGV of the command name to run.
55
+ #
56
+ # It may not always be the first element of ARGV because
57
+ #
58
+ # chimps show my-dataset
59
+ # chimps -v show my-dataset
60
+ # chimps show -v my-dataset
61
+ #
62
+ # should all have the same behavior.
63
+ #
64
+ # @return [Integer] the index in ARGV of the command name.
65
+ def command_index
66
+ return @command_index if @command_index
67
+ argv.each_with_index do |arg, index|
68
+ if command_name?(arg)
69
+ @command_index = index
70
+ break
71
+ end
72
+ end
73
+ @command_index or raise CLIError.new("Must specify a command. Try running `chimps help'")
74
+ end
75
+
76
+ # The name of the command to run.
77
+ #
78
+ # @return [String]
79
+ def command_name
80
+ @command_name ||= argv[command_index]
81
+ end
82
+
83
+ # The ARGV to pass to the command chosen to run.
84
+ #
85
+ # It differs from the original ARGV only in that the command's
86
+ # name has been stripped:
87
+ #
88
+ # Chimps::CLI::Runner.new('show', '-v', 'my-dataset').argv_for_command
89
+ # => ['-v','my-dataset']
90
+ #
91
+ # This does not always return "all but the first element" of
92
+ # ARGV; see Chimps::CLI::Runner#command_index for details.
93
+ def argv_for_command
94
+ returning(argv.dup) do |new_argv|
95
+ new_argv.delete_at(command_index)
96
+ end
97
+ end
98
+ end
99
+
100
+ end
101
+ end
102
+
@@ -0,0 +1,107 @@
1
+ require 'optparse'
2
+
3
+ module Chimps
4
+
5
+ # A base class from which to subclass specific commands. A subclass
6
+ # should
7
+ #
8
+ # - define class constants <tt>BANNER</tt> and <tt>HELP</tt> which
9
+ # - will display the appropriate help to the user.
10
+ #
11
+ # - add specific options by defining a method that begins with
12
+ # +define+ and ends with +options+ (i.e. - +define_output_options+
13
+ # to add options related to output).
14
+ #
15
+ # - define a method <tt>execute!</tt> which will actually run the
16
+ # command.
17
+ class Command < OptionParser
18
+
19
+ # Appears when printing help for this command, as the very first
20
+ # line. Should be one-line summary of how to use this command.
21
+ BANNER = "Define #{self}::BANNER when you subclass Chimps::Command"
22
+
23
+ # Appears when printing help for this command. Should consist of
24
+ # general help or examples of the command iteslf. Help on
25
+ # specific options is automatcally generated.
26
+ HELP = "Define #{self}::HELP when you subclass Chimps::Command"
27
+
28
+ # The (processed) ARGV for this command.
29
+ attr_reader :argv
30
+
31
+ # Create a new command. Will define options specific to
32
+ # subclases, parse the given +argv+, and load the global Chimps
33
+ # configuration. Will _not_ execute the command.
34
+ #
35
+ # @param [Array<String>] argv
36
+ # @return [Chimps::Command]
37
+ def initialize argv
38
+ super self.class::BANNER
39
+ @argv = argv
40
+ run_options_definers
41
+ parse_command_line!
42
+ Chimps::Config.load
43
+ end
44
+
45
+ # The name of this command, including the
46
+ # <tt>Chimps::Commands</tt> prefix.
47
+ #
48
+ # @return [String]
49
+ def self.name
50
+ self.to_s.downcase
51
+ end
52
+
53
+ # The name of this command, excluding the
54
+ # <tt>Chimps::Commands</tt> prefix.
55
+ #
56
+ # @return [String]
57
+ def name
58
+ self.class.name.split('::').last
59
+ end
60
+
61
+ protected
62
+
63
+ # Parse the command line.
64
+ def parse_command_line!
65
+ begin
66
+ parse!(argv)
67
+ rescue OptionParser::InvalidOption => e
68
+ raise CLIError.new("#{e.message}. Try `chimps help #{name}'")
69
+ end
70
+ end
71
+
72
+ # Run all methods beginning with +define+ and ending with +option+
73
+ # or +options+.
74
+ #
75
+ # This is (hackish) mechanism for subclasses of Chimps::Command to
76
+ # define their own specific options.
77
+ def run_options_definers
78
+ # FIXME there's a better way to do this...
79
+ methods.grep(/^define.+options?$/).each { |method| send method }
80
+ end
81
+
82
+ # Define options common to all Chimps' commands. The two only two
83
+ # such options at the moment are <tt>-v</tt> (or
84
+ # <tt>--[no-]verbose</tt>) for verbosity, and <tt>-i</tt> (or
85
+ # <tt>--identity-file</tt>) for setting the identify file to use.
86
+ def define_common_options
87
+ separator self.class::HELP
88
+ separator "\nOptions include:"
89
+
90
+ on("-v", "--[no-]verbose", "Be verbose, or not.") do |v|
91
+ Chimps::CONFIG[:verbose] = v
92
+ end
93
+
94
+ on("-i", "--identity-file PATH", "Use the given YAML identify file to authenticate with Infochimps instead of the default (~/.chimps) ") do |i|
95
+ Chimps::CONFIG[:identity_file] = File.expand_path(i)
96
+ end
97
+ end
98
+
99
+ # Run this command.
100
+ #
101
+ # Will raise a NotImplementedError for Chimps::Command itself --
102
+ # subclasses are expected to redefine this method.
103
+ def execute!
104
+ raise NotImplementedError.new("Redefine the `execute!' method in a subclass of #{self.class}.")
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,68 @@
1
+ module Chimps
2
+ module Commands
3
+
4
+ # A command for performing batch updates.
5
+ class Batch < Chimps::Command
6
+
7
+ BANNER = "usage: chimps batch [OPTIONS] [INPUT_PATH] ..."
8
+ HELP = <<EOF
9
+
10
+ Perform a batch operation on Infochimps by reading YAML input files.
11
+
12
+ The input files should collectively define an array of resources to
13
+ make create or update requests on. Each request in the array is
14
+ treated separately (even though the entire array is processed as one
15
+ POST request) and so it is possible that some will succeed and others
16
+ fail.
17
+
18
+ It is also possible to upload data in this batch process. Each
19
+ (successful) request which defined a 'local_paths' property in the
20
+ original input files will have the data at these paths uploaded to
21
+ Infochimps. These uploads will proceed one at a time following the
22
+ initial batch POST request.
23
+
24
+ The format of the YAML input files is given at
25
+
26
+ http://infochimps.org/api
27
+ EOF
28
+
29
+ # A path to store the intermediate batch response. Useful for
30
+ # debugging.
31
+ attr_accessor :output_path
32
+
33
+ # The data format to annotate the upload with.
34
+ #
35
+ # Chimps will try to guess if this isn't given.
36
+ attr_reader :fmt
37
+
38
+ # Whether to continue to upload even if some of the resources
39
+ # had errors on update/create.
40
+ attr_accessor :upload_even_if_errors
41
+
42
+ include Chimps::Utils::UsesYamlData
43
+
44
+ def define_options
45
+ on_tail("-o", "--output PATH", "Store the response from the server at PATH") do |o|
46
+ @output_path = File.expand_path(o)
47
+ end
48
+
49
+ on_tail("-e", "--force", "Attempt to upload data even when there were errors in the batch update request") do |e|
50
+ @upload_even_if_errors = e
51
+ end
52
+
53
+ on_tail("-f", "--format FORMAT", "Data format to annotate EACH upload with. Tries to guess if not given.") do |f|
54
+ @fmt = f
55
+ end
56
+
57
+ end
58
+
59
+ # Perform the batch update and upload.
60
+ def execute!
61
+ ensure_data_is_present!
62
+ Chimps::Workflows::BatchUpdater.new(data, :output_path => output_path, :upload_even_if_errors => upload_even_if_errors, :fmt => fmt).execute!
63
+ end
64
+
65
+ end
66
+ end
67
+ end
68
+
@@ -0,0 +1,33 @@
1
+ module Chimps
2
+ module Commands
3
+
4
+ # A command to issue a POST requst to create a resource at
5
+ # Infochimps.
6
+ class Create < Chimps::Command
7
+
8
+ BANNER = "usage: chimps create [OPTIONS] [PROP=VALUE] ..."
9
+ HELP = <<EOF
10
+
11
+ Create a single resource (defaults to a dataset) using the properties
12
+ and values supplied.
13
+
14
+ Properties and values can be supplied directly on the command line,
15
+ from an input YAML file, or multiple YAML documents streamed in via
16
+ STDIN, in order of decreasing precedence.
17
+ EOF
18
+
19
+ # Models this command applies to (default first)
20
+ MODELS = %w[dataset source license]
21
+ include Chimps::Utils::UsesModel
22
+ include Chimps::Utils::UsesYamlData
23
+
24
+ # Issue the POST request.
25
+ def execute!
26
+ ensure_data_is_present!
27
+ Request.new(models_path, :data => {model.to_sym => data } , :authenticate => true).post.print
28
+ end
29
+
30
+ end
31
+ end
32
+ end
33
+
@@ -0,0 +1,28 @@
1
+ module Chimps
2
+ module Commands
3
+
4
+ # A command to issue a DELETE request against a resource at
5
+ # Infochimps.
6
+ class Destroy < Chimps::Command
7
+
8
+ BANNER = "usage: chimps destroy [OPTIONS] ID_OR_HANDLE"
9
+ HELP = <<EOF
10
+
11
+ Destroys a resource of a given type (defaults to dataset) identified
12
+ by ID_OR_HANDLE.
13
+
14
+ EOF
15
+
16
+ # Models this command applies to (default first)
17
+ MODELS = %w[dataset package source license]
18
+ include Chimps::Utils::UsesModel
19
+
20
+ # Issue the DELETE request.
21
+ def execute!
22
+ Request.new(model_path, :authenticate => true).delete.print
23
+ end
24
+
25
+ end
26
+ end
27
+ end
28
+
@@ -0,0 +1,76 @@
1
+ module Chimps
2
+ module Commands
3
+
4
+ # A command to download data from Infochimps.
5
+ class Download < Chimps::Command
6
+
7
+ BANNER = "usage: chimps download [OPTIONS] ID_OR_HANDLE"
8
+ HELP = <<EOF
9
+
10
+ Download a dataset identified by the given ID_OR_HANDLE to the current
11
+ directory (you can also specify a particular path).
12
+
13
+ If the dataset isn't freely downloadable, you'll have to have
14
+ purchased it first via the Web.
15
+ EOF
16
+
17
+ # The chosen data format.
18
+ attr_reader :fmt
19
+
20
+ # The chosen package format.
21
+ attr_reader :pkg_fmt
22
+
23
+ # The local path to download the data to.
24
+ attr_reader :local_path
25
+
26
+ # Set the format for the download token.
27
+ #
28
+ # Will try to normalize the input somewhat (downcasing,
29
+ # stripping leading periods)
30
+ #
31
+ # @param [String] new_fmt
32
+ def fmt= new_fmt
33
+ @fmt = new_fmt.downcase.strip.gsub(/^\./, '')
34
+ end
35
+
36
+ # Set the package format for the download token.
37
+ #
38
+ # Will try to normalize the input somewhat (downcasing,
39
+ # stripping leading periods)
40
+ #
41
+ # @param [String] new_pkg_fmt
42
+ def pkg_fmt= new_pkg_fmt
43
+ @pkg_fmt = new_pkg_fmt.downcase.strip.gsub(/^\./, '')
44
+ end
45
+
46
+ # The ID of the package to download.
47
+ def dataset
48
+ raise CLIError.new("Must provide an ID or handle of a dataset to download.") if argv.first.blank?
49
+ argv.first
50
+ end
51
+
52
+ # Issue the request for the token and the request for the
53
+ # download.
54
+ def execute!
55
+ Chimps::Workflows::Downloader.new(:dataset => dataset, :fmt => fmt, :pkg_fmt => pkg_fmt, :local_path => local_path).execute!
56
+ end
57
+
58
+ def define_options
59
+ on_tail("-o", "--output PATH", "Path to download file to") do |o|
60
+ @local_path = File.expand_path(o)
61
+ end
62
+
63
+ on_tail("-f", "--format FORMAT", "Choose a particular data format (csv, tsv, excel, &c.)") do |f|
64
+ self.fmt = f
65
+ end
66
+
67
+ on_tail("-p", "--package PACKAGE", "Choose a particular package type (zip or tar.bz2)") do |p|
68
+ self.pkg_fmt = p
69
+ end
70
+
71
+ end
72
+
73
+ end
74
+ end
75
+ end
76
+
@@ -0,0 +1,89 @@
1
+ module Chimps
2
+ module Commands
3
+ class Help < Chimps::Command
4
+
5
+ BANNER = "usage: chimps help [COMMAND]"
6
+ HELP = <<EOF
7
+
8
+ This is the Infochimps command-line client. You can use it to search,
9
+ browse, create, edit, or delete data and metadata in the Infochimps
10
+ repository at http://infochimps.org.
11
+
12
+ Before you can create, edit, or delete anything you'll need to get an
13
+ Infochimps account and sign up for an API key:
14
+
15
+ http://infochimps.org/signup
16
+
17
+ But you can still browse, search, and download (free) data
18
+ immediately.
19
+
20
+ Learn more about the Infochimps API which powers this tool at
21
+
22
+ http://infochimps.org/api
23
+
24
+ = Commands
25
+
26
+ chimps is a wrapper over the RESTful Infochimps API. It exposes the
27
+ following actions
28
+
29
+ chimps list
30
+ chimps show
31
+ chimps create
32
+ chimps update
33
+ chimps destroy
34
+
35
+ for datasets (as well as other selected resources). It also helps
36
+ automate the workflow of uploading and downloading data and making
37
+ batch changes with
38
+
39
+ chimps upload
40
+ chimps download
41
+ chimps batch
42
+
43
+ You can also make queries against the Infochimps paid query API with
44
+
45
+ chimps query
46
+
47
+ Finally, you can test that your system is configured properly and that
48
+ you can authenticate with Infochimps with
49
+
50
+ chimps test
51
+
52
+ If you're confused try running
53
+
54
+ chimps help COMMAND
55
+
56
+ for any of the commands above.
57
+
58
+ = Setup
59
+
60
+ Once you have obtained an API key and secret from Infochimps, place them
61
+ in a file Chimps::CONFIG[:identity_file] in your home directory with the
62
+ following format
63
+
64
+ ---
65
+ # API credentials for use on the main Infochimps site
66
+ :site:
67
+ :username: your_site_name
68
+ :key: oreeph6giedaeL3
69
+ :secret: Queechei6cu8chiuyiig8cheg5Ahx0boolaizi1ohtarooFu1doo5ohj5ohp9eehae5hakoongahghohgoi7yeihohx1eidaeng0eaveefohchoh6WeeV1EM
70
+
71
+ # API credentials for use on the Infochimps paid query API
72
+ :query:
73
+ :username: your_query_name
74
+ :key: zei7eeloShoah3Ce
75
+ :secret: eixairaichaxaaRe8eeya5moh8Uthahf0pi4eig7SoirohPhei6sai8aereu0yuepiefeipoozoegahchaeheedee8uphohoo9moongae8Fa0aih4BooSeiM
76
+ EOF
77
+
78
+ def execute!
79
+ if argv.first.blank?
80
+ puts self
81
+ else
82
+ puts Chimps::Commands.construct(argv.first, [])
83
+ end
84
+ end
85
+
86
+ end
87
+ end
88
+ end
89
+
@@ -0,0 +1,54 @@
1
+ module Chimps
2
+ module Commands
3
+
4
+ # A command to issue a GET request against an index of resources
5
+ # at Infochimps.
6
+ class List < Chimps::Command
7
+
8
+ BANNER = "chimps list [OPTIONS]"
9
+ HELP = <<EOF
10
+
11
+ List resources of a given type (defaults to dataset).
12
+
13
+ Lists your resources by default but see options below.
14
+
15
+ EOF
16
+
17
+ # Models that can be indexed (default first)
18
+ MODELS = %w[dataset license source]
19
+ include Chimps::Utils::UsesModel
20
+
21
+ def define_options
22
+ on_tail("-a", "--all", "List all resources, not just those owned by you.") do |a|
23
+ @all = a
24
+ end
25
+
26
+ on_tail("-s", "--[no-]skip-column-names", "Don't print column names in output.") do |s|
27
+ @skip_column_names = s
28
+ end
29
+
30
+ end
31
+
32
+ # List all resources or just those owned by the Chimps user?
33
+ def all?
34
+ @all
35
+ end
36
+
37
+ # Parameters to include in the query.
38
+ #
39
+ # If listing all resources, then return +nil+.
40
+ #
41
+ # @return [Hash, nil]
42
+ def params
43
+ return { :id => Chimps.username } unless all?
44
+ end
45
+
46
+ # Issue the GET request.
47
+ def execute!
48
+ Request.new(models_path, :params => params).get.print(:skip_column_names => @skip_column_names)
49
+ end
50
+
51
+ end
52
+ end
53
+ end
54
+