rgovdata 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/.document +5 -0
  2. data/.rvmrc +2 -0
  3. data/CHANGELOG +7 -0
  4. data/Gemfile +18 -0
  5. data/Gemfile.lock +48 -0
  6. data/LICENSE +20 -0
  7. data/README.rdoc +114 -0
  8. data/Rakefile +61 -0
  9. data/bin/rgd +12 -0
  10. data/examples/all_quakes.rb +8 -0
  11. data/examples/arbitrary_data.rb +26 -0
  12. data/examples/catalog_traversal.rb +34 -0
  13. data/examples/earthquakes.rb +5 -0
  14. data/lib/rgovdata.rb +4 -0
  15. data/lib/rgovdata/catalog.rb +4 -0
  16. data/lib/rgovdata/catalog/catalog.rb +79 -0
  17. data/lib/rgovdata/catalog/dn.rb +63 -0
  18. data/lib/rgovdata/catalog/registry_strategy/internal_registry.rb +12 -0
  19. data/lib/rgovdata/catalog/registry_strategy/registry_strategy.rb +26 -0
  20. data/lib/rgovdata/config.rb +5 -0
  21. data/lib/rgovdata/config/common_config.rb +13 -0
  22. data/lib/rgovdata/config/config.rb +133 -0
  23. data/lib/rgovdata/data/config_template.yml +19 -0
  24. data/lib/rgovdata/data/sg/registry.yml +147 -0
  25. data/lib/rgovdata/data/template.rb +27 -0
  26. data/lib/rgovdata/data/us/registry.yml +12 -0
  27. data/lib/rgovdata/service.rb +10 -0
  28. data/lib/rgovdata/service/csv_service.rb +3 -0
  29. data/lib/rgovdata/service/dataset/csv_dataset.rb +43 -0
  30. data/lib/rgovdata/service/dataset/dataset.rb +91 -0
  31. data/lib/rgovdata/service/dataset/file_dataset.rb +46 -0
  32. data/lib/rgovdata/service/dataset/odata_dataset.rb +31 -0
  33. data/lib/rgovdata/service/file_service.rb +10 -0
  34. data/lib/rgovdata/service/listing.rb +47 -0
  35. data/lib/rgovdata/service/odata_service.rb +50 -0
  36. data/lib/rgovdata/service/service.rb +93 -0
  37. data/lib/rgovdata/shell/shell.rb +157 -0
  38. data/lib/rgovdata/version.rb +9 -0
  39. data/rgovdata.gemspec +128 -0
  40. data/spec/fixtures/sample.csv +821 -0
  41. data/spec/integration/service/sg/nlb_spec.rb +57 -0
  42. data/spec/integration/service/sg/places_spec.rb +73 -0
  43. data/spec/integration/service/us/eqs7day-M1_spec.rb +57 -0
  44. data/spec/spec_helper.rb +25 -0
  45. data/spec/support/config_examples.rb +8 -0
  46. data/spec/support/mocks.rb +22 -0
  47. data/spec/support/utility.rb +18 -0
  48. data/spec/unit/catalog/base_spec.rb +93 -0
  49. data/spec/unit/catalog/registry_strategy_spec.rb +28 -0
  50. data/spec/unit/config/config_spec.rb +130 -0
  51. data/spec/unit/data/template_spec.rb +32 -0
  52. data/spec/unit/service/dataset/csv_dataset_spec.rb +42 -0
  53. data/spec/unit/service/dataset/dataset_spec.rb +37 -0
  54. data/spec/unit/service/dataset/file_dataset_spec.rb +40 -0
  55. data/spec/unit/service/dataset/odata_dataset_spec.rb +36 -0
  56. data/spec/unit/service/file_service_spec.rb +25 -0
  57. data/spec/unit/service/listing_spec.rb +100 -0
  58. data/spec/unit/service/odata_service_spec.rb +42 -0
  59. data/spec/unit/service/service_spec.rb +82 -0
  60. data/spec/unit/shell/shell_spec.rb +10 -0
  61. metadata +228 -0
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rvmrc ADDED
@@ -0,0 +1,2 @@
1
+ rvm use 1.9.2-p136@rgovdata --create
2
+
@@ -0,0 +1,7 @@
1
+ 0.1.0 Initial Release
2
+ =====================
3
+ * internal registry - limited set of SG and US resources
4
+ * support for OData, csv, and generic text file sources
5
+ * basic command line client
6
+ * rails-compatible (see rgovdata.com site)
7
+ * ruby script examples
data/Gemfile ADDED
@@ -0,0 +1,18 @@
1
+ source 'http://rubygems.org'
2
+
3
+ gem 'activesupport', '>= 3.0.3'
4
+ gem 'i18n', '>= 0.5.0'
5
+ gem 'ruby_odata', '~> 0.0.10'
6
+ gem 'getoptions', '~> 0.3'
7
+
8
+ group :development do
9
+ gem 'bundler', '~> 1.0.0'
10
+ gem 'jeweler', '~> 1.6.4'
11
+ gem 'rcov', '>= 0'
12
+ gem 'rdoc', '~> 3.11'
13
+ end
14
+
15
+ group :development, :test do
16
+ gem 'rake', '~> 0.9.2.2'
17
+ gem 'rspec', '~> 2.7.0', :require => 'spec'
18
+ end
@@ -0,0 +1,48 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ activesupport (3.0.7)
5
+ diff-lcs (1.1.3)
6
+ getoptions (0.3)
7
+ git (1.2.5)
8
+ i18n (0.6.0)
9
+ jeweler (1.6.4)
10
+ bundler (~> 1.0)
11
+ git (>= 1.2.5)
12
+ rake
13
+ json (1.6.1)
14
+ mime-types (1.17.2)
15
+ nokogiri (1.5.0)
16
+ rake (0.9.2.2)
17
+ rcov (0.9.11)
18
+ rdoc (3.11)
19
+ json (~> 1.4)
20
+ rest-client (1.6.7)
21
+ mime-types (>= 1.16)
22
+ rspec (2.7.0)
23
+ rspec-core (~> 2.7.0)
24
+ rspec-expectations (~> 2.7.0)
25
+ rspec-mocks (~> 2.7.0)
26
+ rspec-core (2.7.1)
27
+ rspec-expectations (2.7.0)
28
+ diff-lcs (~> 1.1.2)
29
+ rspec-mocks (2.7.0)
30
+ ruby_odata (0.0.10)
31
+ activesupport (>= 2.3.5)
32
+ nokogiri (>= 1.4.2)
33
+ rest-client (>= 1.5.1)
34
+
35
+ PLATFORMS
36
+ ruby
37
+
38
+ DEPENDENCIES
39
+ activesupport (>= 3.0.3)
40
+ bundler (~> 1.0.0)
41
+ getoptions (~> 0.3)
42
+ i18n (>= 0.5.0)
43
+ jeweler (~> 1.6.4)
44
+ rake (~> 0.9.2.2)
45
+ rcov
46
+ rdoc (~> 3.11)
47
+ rspec (~> 2.7.0)
48
+ ruby_odata (~> 0.0.10)
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Paul Gallagher
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,114 @@
1
+ = RGovData
2
+
3
+ RGovData is a ruby library for really simple access to government data.
4
+ It aims to make consuming government data sets as easy as "gem install rgovdata",
5
+ letting you focus on what you are trying to achieve with the data, and happily
6
+ ignore all the messy underlying details of transport protocols, authentication and so on.
7
+
8
+ It can be used as a command line tool, a library for ruby projects,
9
+ and/or a library for rails projects.
10
+ The problem it is attempting to solve is
11
+
12
+ More information is available at
13
+ link:http://rgovdata.com
14
+ along with examples of it's use.
15
+
16
+ Taking on the challenge of providing access to "all government data" is a somewhat quixotic quest
17
+ for a single individual, but it could work with a community to support it!
18
+ The library is open sourced under an MIT license, and the project hosted on
19
+ {GitHub}[https://github.com/tardate/rgovdata]. If you'd like to get involved, see the
20
+ "Contributing to RGovData" section below.
21
+
22
+ == Requirements
23
+
24
+ * ruby 1.9
25
+ * optional: rails 3.0.x
26
+
27
+ == Objectives and Implementation Status
28
+
29
+ The following are the broad goals of the RGovData library, along with a simple statement of the
30
+ current implementation status:
31
+
32
+ * Support for discovery of data sets [status: limited to an internal registry at present.
33
+ But it has a framework for adding discovery services as and when they are available ]
34
+ * Support all countries that are actively publishing government data sets [status: very limited;
35
+ currently just supporting some examples from SG and US]
36
+ * Support the range of data formats used in government data sets [status: currently limited to CSV and OData,
37
+ but with a framework to add more when available ]
38
+ * Command line tool for discovery and access to data [status: basic features available]
39
+ * Provide a common abstraction and simple API for data discovery and access from ruby projects [status: implemented]
40
+ * Provide specific support for Rails ORM technologies (ActiveModel/ActiveRecord/ActiveResource) to
41
+ make it easy and natural to use in Rails proejcts [status: no specific support yet, however the base ruby API works fine in Rails]
42
+ * Provide a transparent caching mechanism for non-realtime data sets (e.g. csv files) [status: not yet implemented. If you
43
+ require caching or batch downloading, it is something you currently must implement yourself]
44
+
45
+ Note that the current version is a very early implementation. It is likely that interfaces
46
+ and capabilities may be refactored or changed in subsequent versions, and not necessarily preserving
47
+ backward compatibility.
48
+
49
+
50
+ == Getting Started
51
+
52
+ For more details and examples, see
53
+ link:http://rgovdata.com
54
+
55
+
56
+ === Installation - Basic Gem and Command Line Usage
57
+
58
+ Make sure you have a working ruby installation, then simply:
59
+ $ gem install rgovdata
60
+
61
+ When the installation is complete, try the command line:
62
+ $ rgd
63
+ rgovdata client v0.1.0. Type 'help' for info...
64
+ rgd://sg>
65
+
66
+
67
+ === Installation - Rails
68
+
69
+ Add rgovdata to your Gemfile and run bundler:
70
+
71
+ $ cat Gemfile
72
+ ...
73
+ gem 'rgovdata', '~> 0.1.0'
74
+ ...
75
+ $ bundle install
76
+
77
+
78
+ == Data Encumberance
79
+
80
+ Although it's all theoretically "our government data", be aware that many of the data sets you
81
+ can get to with RGovData are encumbered by copyright, commercial or other terms of use (yes, I know: wtf!).
82
+
83
+ It is up to *you* to ensure that your use of data complies with all the applicable restrictions.
84
+ RGovData simply provides a mechanism for getting the data, and explicitly does not provide any rights
85
+ enforcement or protection.
86
+
87
+
88
+ == Contributing to RGovData
89
+
90
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
91
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
92
+ * Fork the project
93
+ * Start a feature/bugfix branch
94
+ * Commit and push until you are happy with your contribution
95
+ * Make sure to add tests for it. This is important so your changes don't get unintentionally broken in a future version.
96
+ * Please try not to mess with the Rakefile, version, gemspec or changelog. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so the mater repository maintainer can cherry-pick around it.
97
+
98
+ === Running Tests
99
+
100
+ RSpec is used for testing, and it is hooked into rake. Note that integration tests are _not_ run by default.
101
+
102
+ * rake - just runs unit tests
103
+ * rake spec - same as rake
104
+ * rake spec:integration - only runs integration tests
105
+ * rake spec:all - run all unit and integration tests
106
+
107
+ When you do run integration tests, they make live calls on some real services, some of which require authentication.
108
+ Integration tests will use an rgovdata.conf file in the root of the project for configuration.
109
+
110
+
111
+ == Copyright
112
+
113
+ Copyright (c) 2011 Paul Gallagher and open-sourced under an MIT license.
114
+ See LICENSE for further details.
@@ -0,0 +1,61 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+ require 'rspec'
14
+ require 'rspec/core/rake_task'
15
+
16
+ $LOAD_PATH.unshift('lib')
17
+ require 'rgovdata/version'
18
+
19
+ require 'jeweler'
20
+ Jeweler::Tasks.new do |gem|
21
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
22
+ gem.name = "rgovdata"
23
+ gem.version = RGovData::Version::STRING
24
+ gem.homepage = "http://github.com/tardate/rgovdata"
25
+ gem.license = "MIT"
26
+ gem.summary = %Q{Really simple access to government data for ruby}
27
+ gem.description = %Q{Consuming government-published data in a ruby or rails application shouldn't require a PhD}
28
+ gem.email = "gallagher.paul@gmail.com"
29
+ gem.authors = ["Paul Gallagher"]
30
+ # dependencies defined in Gemfile
31
+ end
32
+ Jeweler::RubygemsDotOrgTasks.new
33
+
34
+ desc "Run only RSpec unit test examples"
35
+ RSpec::Core::RakeTask.new do |t|
36
+ t.rspec_opts = ["-c", "-f progress"]
37
+ t.pattern = 'spec/unit/**/*_spec.rb'
38
+ end
39
+
40
+ desc "Run only RSpec integration test examples"
41
+ RSpec::Core::RakeTask.new(:'spec:integration') do |t|
42
+ t.rspec_opts = ["-c", "-f progress"]
43
+ t.pattern = 'spec/integration/**/*_spec.rb'
44
+ end
45
+
46
+ desc "Run all RSpec test examples"
47
+ RSpec::Core::RakeTask.new(:'spec:all') do |t|
48
+ t.rspec_opts = ["-c", "-f progress"]
49
+ t.pattern = 'spec/**/*_spec.rb'
50
+ end
51
+
52
+ task :default => :spec
53
+
54
+ require 'rdoc/task'
55
+ RDoc::Task.new do |rdoc|
56
+ rdoc.main = "README.rdoc"
57
+ rdoc.rdoc_dir = 'rdoc'
58
+ rdoc.title = "rgovdata #{RGovData::Version::STRING}"
59
+ rdoc.rdoc_files.include("README.rdoc", "lib/**/*.rb")
60
+ end
61
+
data/bin/rgd ADDED
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'rubygems'
5
+ require 'rgovdata'
6
+ require 'getoptions'
7
+
8
+ begin
9
+ RGovData::Shell.new(GetOptions.new(RGovData::Shell::OPTIONS)).run
10
+ rescue Exception => e
11
+ STDERR.puts e
12
+ end
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rgovdata'
3
+
4
+ dataset = RGovData::Catalog.get('//us/eqs7day-M1/csv')
5
+ puts dataset.attributes.join(',')
6
+ dataset.records.each do |row|
7
+ puts row
8
+ end
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rgovdata'
3
+
4
+ # This demonstrates how to use RGovData with arbitrary services
5
+ # currently not supported by RGovData::Catalog
6
+
7
+ # The minimum requirement is a +uri+ and +type+
8
+ # You may need to add a +credentialset+ if authentication is required
9
+ options = {
10
+ :uri => "http://earthquake.usgs.gov/earthquakes/catalogs/eqs7day-M1.txt",
11
+ :type => :csv
12
+ }
13
+
14
+ # Directly create the service endpoint
15
+ service = RGovData::Service.get_instance(options)
16
+
17
+ puts "Manufactured a service: #{service}"
18
+ puts "With uri: #{service.uri}"
19
+ puts "And type: #{service.type}"
20
+
21
+ # Now we can use the service to access the dataset (only one in this case since it is a CSV file service)
22
+ dataset = service.datasets.first
23
+
24
+ # And work with the data:
25
+ puts "This dataset has the following attributes: #{dataset.attributes.join(',')}"
26
+ puts "And it has #{dataset.records.count} rows of data"
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rgovdata'
3
+
4
+ # This demonstrates the essential process of traversing the RGovData catalog
5
+
6
+ # This gets the root catalog
7
+ root_catalog = RGovData::Catalog.new(nil)
8
+
9
+ puts "The root catalog has a collection of realms: #{root_catalog.records}"
10
+ # => root_catalog.realms is an alias for root_catalog.records at this level
11
+
12
+ # Take the first realm catalog
13
+ catalog = root_catalog.records.first
14
+
15
+ puts "The first catalog is: #{catalog}"
16
+ puts "And it contains the following service listings: #{catalog.records}"
17
+ # => catalog.services is an alias for catalog.records at this level
18
+
19
+ # Take the first service listing
20
+ service_listing = catalog.records.first
21
+
22
+ puts "The first service_listing is: #{service_listing}"
23
+ puts "=> it represents the underlying service: #{service_listing.service}"
24
+
25
+ puts "=> it has the following datasets: #{service_listing.records}"
26
+ # => service_listing.datasets is an alias for service_listing.records at this level
27
+
28
+ # Take the first dataset
29
+ dataset = service_listing.records.first
30
+
31
+ puts "The first dataset is: #{dataset}"
32
+ puts "=> it has the following attributes: #{dataset.attributes}"
33
+ puts "=> it has the following number records: #{dataset.records.count}"
34
+
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rgovdata'
3
+
4
+ quakes = RGovData::Catalog.get('//us/eqs7day-M1/csv').records.count
5
+ puts "Holy Harp Array Batman, there have been #{quakes} M1+ quakes this week!"
@@ -0,0 +1,4 @@
1
+ require 'rgovdata/version'
2
+ require 'rgovdata/config'
3
+ require 'rgovdata/service'
4
+ require 'rgovdata/catalog'
@@ -0,0 +1,4 @@
1
+ require 'rgovdata/catalog/dn'
2
+ require 'rgovdata/catalog/catalog'
3
+ require 'rgovdata/catalog/registry_strategy/registry_strategy'
4
+ require 'rgovdata/catalog/registry_strategy/internal_registry'
@@ -0,0 +1,79 @@
1
+ class RGovData::Catalog
2
+ attr_accessor :realm
3
+ include RGovData::Dn
4
+
5
+ class << self
6
+ # Returns the object specified by the +key+
7
+ # Key specification:
8
+ # //<realm>/<service-key>/<data-set-name>
9
+ # All key components are optional - you will get the best matching object for the key spec
10
+ # //sg - will return RGovData::Catalog for realm=:sg
11
+ # //sg/nlb - will return RGovData::ServiceListing for the nlb service in SG
12
+ # /nlb - will return RGovData::ServiceListing for the nlb service in SG (assuming SG is the default realm)
13
+ # //sg/nlb/Library - will return RGovData::OdataService for the nlb Library service in SG
14
+ def get(key)
15
+ key ||= '//'
16
+ key.gsub!(':','/') # handle alternate encoding
17
+ keypart = Regexp.new(/(?:\/\/([^\/]+))?(?:\/([^\/]+))?(?:\/([^\/]+))?/).match(key)
18
+ found = catalog = self.new(keypart[1])
19
+ if keypart[2]
20
+ found = service = catalog.get_service(keypart[2])
21
+ if keypart[3]
22
+ found = service.get_dataset(keypart[3])
23
+ end
24
+ end
25
+ found
26
+ end
27
+ end
28
+
29
+ def initialize(default_realm=nil)
30
+ @realm = default_realm && default_realm.to_sym
31
+ end
32
+
33
+ # Returns available realms
34
+ def realms
35
+ # TODO: currently hard-coded
36
+ [:sg,:us].map{|realm| self.class.new(realm) }
37
+ end
38
+
39
+ # Returns an array of ServiceListings for the current realm
40
+ def services
41
+ @services ||= registry_strategy.load_services
42
+ end
43
+
44
+ # Returns the service(s) matching +key+
45
+ def get_service(key)
46
+ return nil unless services && !services.empty?
47
+ matches = services.select {|s| s.key =~ /#{key}/}
48
+ matches.count == 1 ? matches.first : matches
49
+ end
50
+
51
+ # override realm setter to clear state when realm changed
52
+ def realm=(value)
53
+ clear
54
+ @realm = value
55
+ end
56
+
57
+ # Returns the registry strategy class for the current realm
58
+ def registry_strategy
59
+ RGovData::RegistryStrategy.instance_for_realm(realm)
60
+ end
61
+ protected :registry_strategy
62
+
63
+ # Generic interface to return the currently applicable record set
64
+ # => overrides RGovData::Dn.records
65
+ def records
66
+ if realm.present?
67
+ services
68
+ else
69
+ realms
70
+ end
71
+ end
72
+
73
+ # Clears current state
74
+ # TODO: move to Dn
75
+ def clear
76
+ @realm = @services = nil
77
+ end
78
+ protected :clear
79
+ end