rgovdata 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/.document +5 -0
  2. data/.rvmrc +2 -0
  3. data/CHANGELOG +7 -0
  4. data/Gemfile +18 -0
  5. data/Gemfile.lock +48 -0
  6. data/LICENSE +20 -0
  7. data/README.rdoc +114 -0
  8. data/Rakefile +61 -0
  9. data/bin/rgd +12 -0
  10. data/examples/all_quakes.rb +8 -0
  11. data/examples/arbitrary_data.rb +26 -0
  12. data/examples/catalog_traversal.rb +34 -0
  13. data/examples/earthquakes.rb +5 -0
  14. data/lib/rgovdata.rb +4 -0
  15. data/lib/rgovdata/catalog.rb +4 -0
  16. data/lib/rgovdata/catalog/catalog.rb +79 -0
  17. data/lib/rgovdata/catalog/dn.rb +63 -0
  18. data/lib/rgovdata/catalog/registry_strategy/internal_registry.rb +12 -0
  19. data/lib/rgovdata/catalog/registry_strategy/registry_strategy.rb +26 -0
  20. data/lib/rgovdata/config.rb +5 -0
  21. data/lib/rgovdata/config/common_config.rb +13 -0
  22. data/lib/rgovdata/config/config.rb +133 -0
  23. data/lib/rgovdata/data/config_template.yml +19 -0
  24. data/lib/rgovdata/data/sg/registry.yml +147 -0
  25. data/lib/rgovdata/data/template.rb +27 -0
  26. data/lib/rgovdata/data/us/registry.yml +12 -0
  27. data/lib/rgovdata/service.rb +10 -0
  28. data/lib/rgovdata/service/csv_service.rb +3 -0
  29. data/lib/rgovdata/service/dataset/csv_dataset.rb +43 -0
  30. data/lib/rgovdata/service/dataset/dataset.rb +91 -0
  31. data/lib/rgovdata/service/dataset/file_dataset.rb +46 -0
  32. data/lib/rgovdata/service/dataset/odata_dataset.rb +31 -0
  33. data/lib/rgovdata/service/file_service.rb +10 -0
  34. data/lib/rgovdata/service/listing.rb +47 -0
  35. data/lib/rgovdata/service/odata_service.rb +50 -0
  36. data/lib/rgovdata/service/service.rb +93 -0
  37. data/lib/rgovdata/shell/shell.rb +157 -0
  38. data/lib/rgovdata/version.rb +9 -0
  39. data/rgovdata.gemspec +128 -0
  40. data/spec/fixtures/sample.csv +821 -0
  41. data/spec/integration/service/sg/nlb_spec.rb +57 -0
  42. data/spec/integration/service/sg/places_spec.rb +73 -0
  43. data/spec/integration/service/us/eqs7day-M1_spec.rb +57 -0
  44. data/spec/spec_helper.rb +25 -0
  45. data/spec/support/config_examples.rb +8 -0
  46. data/spec/support/mocks.rb +22 -0
  47. data/spec/support/utility.rb +18 -0
  48. data/spec/unit/catalog/base_spec.rb +93 -0
  49. data/spec/unit/catalog/registry_strategy_spec.rb +28 -0
  50. data/spec/unit/config/config_spec.rb +130 -0
  51. data/spec/unit/data/template_spec.rb +32 -0
  52. data/spec/unit/service/dataset/csv_dataset_spec.rb +42 -0
  53. data/spec/unit/service/dataset/dataset_spec.rb +37 -0
  54. data/spec/unit/service/dataset/file_dataset_spec.rb +40 -0
  55. data/spec/unit/service/dataset/odata_dataset_spec.rb +36 -0
  56. data/spec/unit/service/file_service_spec.rb +25 -0
  57. data/spec/unit/service/listing_spec.rb +100 -0
  58. data/spec/unit/service/odata_service_spec.rb +42 -0
  59. data/spec/unit/service/service_spec.rb +82 -0
  60. data/spec/unit/shell/shell_spec.rb +10 -0
  61. metadata +228 -0
@@ -0,0 +1,12 @@
1
+ --- !ruby/object:RGovData::ServiceListing
2
+ realm: :us
3
+ key: eqs7day-M1
4
+ name: "Worldwide M1+ Earthquakes, Past 7 Days"
5
+ description: "Real-time, worldwide earthquake list for the past 7 days"
6
+ publisher: US Geological Survey
7
+ keywords: ANSS, geologist, plate, real time, environment, catalog, federal data downloadaftershock, intensity scale, magnitude scale, seismogram, mercalli, hypocenter, federal datasets, hazard, geophysics, epicenter, subduction, GSN, quake, magnitude, seismograph, foreshock, seismic, focal mechanism, seismology, tectonics, intensity, Waveforms, tsunami, seismicity, fault, richter, Volcano, temblor, earthquake, aftershock, seismologist, landslide
8
+ license: public
9
+ info_uri: http://explore.data.gov/Geography-and-Environment/Worldwide-M1-Earthquakes-Past-7-Days/7tag-iwnu
10
+ uri: http://earthquake.usgs.gov/earthquakes/catalogs/eqs7day-M1.txt
11
+ type: :csv
12
+
@@ -0,0 +1,10 @@
1
+ require 'rgovdata/catalog/dn'
2
+ require 'rgovdata/service/listing'
3
+ require 'rgovdata/service/service'
4
+ require 'rgovdata/service/odata_service'
5
+ require 'rgovdata/service/file_service'
6
+ require 'rgovdata/service/csv_service'
7
+ require 'rgovdata/service/dataset/dataset'
8
+ require 'rgovdata/service/dataset/odata_dataset'
9
+ require 'rgovdata/service/dataset/file_dataset'
10
+ require 'rgovdata/service/dataset/csv_dataset'
@@ -0,0 +1,3 @@
1
+ # This is the catalog class that describes a CSV file-based service
2
+ class RGovData::CsvService < RGovData::FileService
3
+ end
@@ -0,0 +1,43 @@
1
+ require 'csv'
2
+
3
+ # This is the catalog class that describes a CSV file service DataSet
4
+ class RGovData::CsvDataSet < RGovData::FileDataSet
5
+
6
+ # Returns array of attributes that describe the records of the specific entity
7
+ # => overrides RGovData::Dn.attributes
8
+ def attributes
9
+ records unless @attributes # forces a load
10
+ @attributes
11
+ end
12
+
13
+ # Returns the value of the named +attribute+ from a recordset +row+
14
+ # => overrides RGovData::DataSet.attribute_value
15
+ def attribute_value(row,attribute)
16
+ row[attribute.to_s]
17
+ end
18
+
19
+ # Loads the native dataset (URI or File)
20
+ # => overrides RGovData::DataSet.load_instance
21
+ def load_instance
22
+ if uri =~ /^.+:\/\//
23
+ URI.parse( uri )
24
+ else
25
+ File.new(uri, "r")
26
+ end
27
+ end
28
+ protected :load_instance
29
+
30
+ # Loads the native record set
31
+ # => overrides RGovData::DataSet.load_records
32
+ def load_records
33
+ csv = CSV.new(open(native_instance),{:headers=>:first_row}).read
34
+ @attributes = csv.headers
35
+ if limit.present?
36
+ csv.entries[0,limit]
37
+ else
38
+ csv.entries
39
+ end
40
+ end
41
+ protected :load_records
42
+
43
+ end
@@ -0,0 +1,91 @@
1
+ # This is the catalog class that describes a generic Service DataSet
2
+ class RGovData::DataSet
3
+ include RGovData::Dn
4
+ attr_reader :options
5
+ attr_reader :service
6
+ attr_reader :native_service
7
+
8
+ class << self
9
+ def load_datasets(service)
10
+ dataset_class = "RGovData::#{service.type.to_s.capitalize}DataSet".constantize
11
+ ds = []
12
+ service.dataset_keys.each do |dataset|
13
+ ds << dataset_class.new({:dataset_key=>dataset},service)
14
+ end
15
+ ds
16
+ end
17
+ end
18
+
19
+ def initialize(options,service)
20
+ @options = if options.is_a?(Hash)
21
+ OpenStruct.new(options)
22
+ else
23
+ OpenStruct.new
24
+ end
25
+ @service = service.dup # avoid circular dependencies
26
+ @native_service = @service.try(:native_instance)
27
+ end
28
+
29
+ # attribute accessors
30
+ def realm ; service.realm ; end
31
+ def service_key ; service.service_key ; end
32
+ def dataset_key ; options.dataset_key ; end
33
+ def uri ; service.uri ; end
34
+ # Returns the record limit currently imposed
35
+ def limit ; options.limit ; end
36
+ # Set the record limit to +value+
37
+ def limit=(value)
38
+ options.limit = value
39
+ end
40
+
41
+ # Returns array of attributes that describe the specific entity
42
+ # => overrides RGovData::Dn.meta_attributes
43
+ def meta_attributes
44
+ [:id,:realm,:service_key,:dataset_key]
45
+ end
46
+
47
+ # Returns the native dataset key
48
+ alias_method :native_dataset_key, :dataset_key
49
+
50
+ # Returns the native dataset instance
51
+ # If +reload+ is true, it re-initializes
52
+ def native_instance(reload = false)
53
+ @native_instance = if reload
54
+ load_instance
55
+ else
56
+ @native_instance || load_instance
57
+ end
58
+ end
59
+
60
+ # Returns the records
61
+ # If +reload+ is true, it re-initializes and re-runs the query
62
+ def records(reload = false)
63
+ @records = if reload
64
+ load_records
65
+ else
66
+ @records || load_records
67
+ end
68
+ end
69
+
70
+ # Returns the value of the named +attribute+ from a recordset +row+
71
+ # Purpose is to encapsulate differences in addressing attribute values
72
+ def attribute_value(row,attribute)
73
+ row.send(attribute)
74
+ end
75
+
76
+ # Loads the native dataset
77
+ # => override this in specific dataset classes as required
78
+ def load_instance
79
+ nil
80
+ end
81
+ protected :load_instance
82
+
83
+ # Loads the native record set
84
+ # => override this in specific dataset classes as required
85
+ def load_records
86
+ nil
87
+ end
88
+ protected :load_records
89
+
90
+
91
+ end
@@ -0,0 +1,46 @@
1
+ require 'uri'
2
+ require 'open-uri'
3
+
4
+ # This is the catalog class that describes a generic file service DataSet
5
+ # Currently only handles text files
6
+ class RGovData::FileDataSet < RGovData::DataSet
7
+
8
+ # Returns array of attributes that describe the records of the specific entity
9
+ # Generic FileDataSets don't have attributes, returns a single selector for the row
10
+ # => overrides RGovData::Dn.attributes
11
+ def attributes
12
+ ['row']
13
+ end
14
+
15
+ # Returns the value of the named +attribute+ from a recordset +row+
16
+ # Generic FileDataSets don't have attributes, so always return full row
17
+ # => overrides RGovData::DataSet.attribute_value
18
+ def attribute_value(row,attribute)
19
+ row
20
+ end
21
+
22
+ # Loads the native dataset (URI or File)
23
+ # => overrides RGovData::DataSet.load_instance
24
+ def load_instance
25
+ if uri =~ /^.+:\/\//
26
+ URI.parse( uri )
27
+ else
28
+ File.new(uri, "r")
29
+ end
30
+ end
31
+ protected :load_instance
32
+
33
+ # Loads the native record set
34
+ # => overrides RGovData::DataSet.load_records
35
+ def load_records
36
+ # open(native_instance,"UserAgent" => "Mozilla/5.0")
37
+ strio = StringIO.new(open(native_instance).read)
38
+ if limit.present?
39
+ strio.to_a[0,limit]
40
+ else
41
+ strio
42
+ end
43
+ end
44
+ protected :load_records
45
+
46
+ end
@@ -0,0 +1,31 @@
1
+ # This is the catalog class that describes an OData Service DataSet
2
+ class RGovData::OdataDataSet < RGovData::DataSet
3
+
4
+ # Returns the attribute names based on class meta-data
5
+ # => overrides RGovData::Dn.attributes
6
+ def attributes
7
+ @attributes ||= native_service.class_metadata[entity_name].keys
8
+ end
9
+
10
+ # Returns the reated OData entity name for this DataSet
11
+ # TODO: currently, this is a hack, as ruby_odata doesn't yet return the collection EntityType
12
+ def entity_name
13
+ dataset_key.gsub(/Set$/,'')
14
+ end
15
+
16
+ # Loads the native OData::QueryBuilder
17
+ # => overrides RGovData::DataSet.load_instance
18
+ def load_instance
19
+ native_service.send(native_dataset_key)
20
+ end
21
+ protected :load_instance
22
+
23
+ # Loads the native record set
24
+ # => overrides RGovData::DataSet.load_records
25
+ def load_records
26
+ native_instance(true)
27
+ native_instance.top(limit) if limit.present?
28
+ Array(service.native_instance.execute)
29
+ end
30
+ protected :load_records
31
+ end
@@ -0,0 +1,10 @@
1
+ # This is the catalog class that describes a generic file-based service
2
+ class RGovData::FileService < RGovData::Service
3
+
4
+ # Returns an array of DataSets (keys) for the service
5
+ # => overrides RGovData::Service.dataset_keys
6
+ def dataset_keys
7
+ [type]
8
+ end
9
+
10
+ end
@@ -0,0 +1,47 @@
1
+ # A ServiceListing is the metadata describing a specific service
2
+ # It encapsulates access to the underlying service
3
+ class RGovData::ServiceListing
4
+ attr_accessor :realm # realm for the service
5
+ attr_accessor :key # unique service name or id (within realm)
6
+ attr_accessor :name # human name of the service
7
+ attr_accessor :description # human description of the service
8
+ attr_accessor :keywords # keywords for the service
9
+ attr_accessor :publisher # service publisher name
10
+ attr_accessor :license # license covering the service if any
11
+ attr_accessor :info_uri # url to a web page about the service if any
12
+ attr_accessor :uri # url to the service interface
13
+ attr_accessor :type # service type [:odata,:csv,:file]
14
+ attr_accessor :credentialset # name of the credential set required
15
+
16
+ include RGovData::Dn
17
+
18
+ # Returns the service for this listing
19
+ def service
20
+ @service ||= RGovData::Service.get_instance(self)
21
+ end
22
+
23
+ # Returns the service key
24
+ alias_method :service_key, :key
25
+
26
+ # Returns an array of DataSets for the service
27
+ # => delegate to service
28
+ def datasets
29
+ service.try(:datasets)
30
+ end
31
+ alias_method :records, :datasets
32
+
33
+ # Returns the dataset(s) matching +key+
34
+ # => delegate to service
35
+ def get_dataset(key)
36
+ service.try(:get_dataset,key)
37
+ end
38
+ # Returns the first dataset matching +key+
39
+ # => delegate to service
40
+ def find(id)
41
+ service.try(:find,id)
42
+ end
43
+ # Alias for find
44
+ alias_method :find_by_id, :find
45
+
46
+
47
+ end
@@ -0,0 +1,50 @@
1
+ require 'ruby_odata'
2
+
3
+ # This is the catalog class that describes an OData Service
4
+ class RGovData::OdataService < RGovData::Service
5
+
6
+ # Returns an array of DataSets (keys) for the service
7
+ def dataset_keys
8
+ # @dataset_keys ||= native_instance.classes.keys
9
+ @dataset_keys ||= native_instance.instance_variable_get(:@collections)
10
+ end
11
+
12
+ # Returns the native service object if applicable
13
+ # By default, returns self
14
+ def native_instance
15
+ @native_instance ||= load_service
16
+ end
17
+
18
+ # Identifies, loads, and returns the native service instance
19
+ def load_service
20
+ clear
21
+ # currently forcing SSL verification off (seems to be required for projectnimbus)
22
+ # TODO: this should probably be a setting in the ServiceListing
23
+ rest_options = {:verify_ssl=>false}
24
+ if credentialset && credentialset != 'projectnimbus'
25
+ credentials = config.credentialsets[credentialset]
26
+ # merge basic auth
27
+ rest_options.merge!({ :username => credentials['username'], :password => credentials['password'] })
28
+ end
29
+ svc = OData::Service.new(uri, rest_options)
30
+ if credentialset && credentialset == 'projectnimbus'
31
+ credentials = config.credentialsets[credentialset]
32
+ # some special funk to insert headers for projectnimbus authentication
33
+ actual_rest_options = svc.instance_variable_get(:@rest_options)
34
+ rest_options = actual_rest_options.merge({:headers => {
35
+ 'AccountKey' => credentials['AccountKey'], 'UniqueUserID' => credentials['UniqueUserID']
36
+ }})
37
+ svc.instance_variable_set(:@rest_options,rest_options)
38
+ end
39
+ svc
40
+ end
41
+ protected :load_service
42
+
43
+ # Clears current state
44
+ # TODO: move to Dn?
45
+ def clear
46
+ @datasets = @dataset_keys = @native_instance = nil
47
+ end
48
+ protected :clear
49
+ end
50
+
@@ -0,0 +1,93 @@
1
+ require 'ostruct'
2
+
3
+ # A Service describes a specific service
4
+ # It encapsulates access to the underlying service implementation
5
+ class RGovData::Service
6
+ include RGovData::CommonConfig
7
+ include RGovData::Dn
8
+
9
+ attr_accessor :options
10
+ attr_reader :native_instance # the underlying native service object (if applicable)
11
+
12
+ class << self
13
+ # Returns the appropriate Service class for the given uri and type
14
+ # +options may be a RGovData::ServiceListing or a Hash
15
+ # If +options+ is a hash, it requires the following members:
16
+ # +uri+
17
+ # +type+
18
+ # +credentialset+
19
+ def get_instance(options={})
20
+ type = (options.class <= RGovData::ServiceListing) ? options.type : options[:type]
21
+ service_class = "RGovData::#{type.to_s.capitalize}Service".constantize
22
+ service_class.new(options)
23
+ rescue # invalid or not a supported type
24
+ nil
25
+ end
26
+ end
27
+
28
+ # +new+ requires
29
+ # +options may be a RGovData::ServiceListing or a Hash
30
+ # If +options+ is a hash, it requires the following members:
31
+ # +uri+
32
+ # +type+
33
+ # +transport+
34
+ # +credentialset+
35
+ def initialize(options)
36
+ @options = if options.is_a?(Hash)
37
+ OpenStruct.new(options)
38
+ elsif options.class <= RGovData::ServiceListing
39
+ options.dup # avoid circular refs
40
+ else
41
+ OpenStruct.new
42
+ end
43
+ end
44
+
45
+ # attribute accessors
46
+ def realm ; options.realm ; end
47
+ def service_key ; options.service_key ; end
48
+ def uri ; options.uri ; end
49
+ def type ; options.type ; end
50
+ def transport ; options.transport ; end
51
+ def credentialset ; options.credentialset ; end
52
+
53
+ # Returns array of attributes that describe the specific entity
54
+ # => overrides RGovData::Dn.meta_attributes
55
+ def meta_attributes
56
+ [:id,:realm,:service_key,:uri,:type,:transport,:credentialset]
57
+ end
58
+
59
+ # Returns the native service object if applicable
60
+ # By default, returns self
61
+ def native_instance
62
+ @native_instance || self
63
+ end
64
+
65
+ # Returns an array of DataSets for the service
66
+ # => may need to be overridden for a specific service type
67
+ def datasets
68
+ dataset_class = "RGovData::#{type.to_s.capitalize}DataSet".constantize
69
+ @datasets ||= dataset_class.load_datasets(self)
70
+ rescue
71
+ []
72
+ end
73
+
74
+ # Returns an array of DataSets (keys) for the service
75
+ # => needs to be overridden for each service type
76
+ def dataset_keys
77
+ []
78
+ end
79
+
80
+ # Returns the dataset(s) matching +key+
81
+ def get_dataset(key)
82
+ return nil unless datasets && !datasets.empty?
83
+ matches = datasets.select {|s| s.dataset_key =~ /#{key}/}
84
+ matches.count == 1 ? matches.first : matches
85
+ end
86
+ # Returns the first dataset matching +key+
87
+ def find(id)
88
+ Array(get_dataset(id)).first
89
+ end
90
+ # Alias for find
91
+ alias_method :find_by_id, :find
92
+
93
+ end