rgovdata 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rvmrc +2 -0
- data/CHANGELOG +7 -0
- data/Gemfile +18 -0
- data/Gemfile.lock +48 -0
- data/LICENSE +20 -0
- data/README.rdoc +114 -0
- data/Rakefile +61 -0
- data/bin/rgd +12 -0
- data/examples/all_quakes.rb +8 -0
- data/examples/arbitrary_data.rb +26 -0
- data/examples/catalog_traversal.rb +34 -0
- data/examples/earthquakes.rb +5 -0
- data/lib/rgovdata.rb +4 -0
- data/lib/rgovdata/catalog.rb +4 -0
- data/lib/rgovdata/catalog/catalog.rb +79 -0
- data/lib/rgovdata/catalog/dn.rb +63 -0
- data/lib/rgovdata/catalog/registry_strategy/internal_registry.rb +12 -0
- data/lib/rgovdata/catalog/registry_strategy/registry_strategy.rb +26 -0
- data/lib/rgovdata/config.rb +5 -0
- data/lib/rgovdata/config/common_config.rb +13 -0
- data/lib/rgovdata/config/config.rb +133 -0
- data/lib/rgovdata/data/config_template.yml +19 -0
- data/lib/rgovdata/data/sg/registry.yml +147 -0
- data/lib/rgovdata/data/template.rb +27 -0
- data/lib/rgovdata/data/us/registry.yml +12 -0
- data/lib/rgovdata/service.rb +10 -0
- data/lib/rgovdata/service/csv_service.rb +3 -0
- data/lib/rgovdata/service/dataset/csv_dataset.rb +43 -0
- data/lib/rgovdata/service/dataset/dataset.rb +91 -0
- data/lib/rgovdata/service/dataset/file_dataset.rb +46 -0
- data/lib/rgovdata/service/dataset/odata_dataset.rb +31 -0
- data/lib/rgovdata/service/file_service.rb +10 -0
- data/lib/rgovdata/service/listing.rb +47 -0
- data/lib/rgovdata/service/odata_service.rb +50 -0
- data/lib/rgovdata/service/service.rb +93 -0
- data/lib/rgovdata/shell/shell.rb +157 -0
- data/lib/rgovdata/version.rb +9 -0
- data/rgovdata.gemspec +128 -0
- data/spec/fixtures/sample.csv +821 -0
- data/spec/integration/service/sg/nlb_spec.rb +57 -0
- data/spec/integration/service/sg/places_spec.rb +73 -0
- data/spec/integration/service/us/eqs7day-M1_spec.rb +57 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/support/config_examples.rb +8 -0
- data/spec/support/mocks.rb +22 -0
- data/spec/support/utility.rb +18 -0
- data/spec/unit/catalog/base_spec.rb +93 -0
- data/spec/unit/catalog/registry_strategy_spec.rb +28 -0
- data/spec/unit/config/config_spec.rb +130 -0
- data/spec/unit/data/template_spec.rb +32 -0
- data/spec/unit/service/dataset/csv_dataset_spec.rb +42 -0
- data/spec/unit/service/dataset/dataset_spec.rb +37 -0
- data/spec/unit/service/dataset/file_dataset_spec.rb +40 -0
- data/spec/unit/service/dataset/odata_dataset_spec.rb +36 -0
- data/spec/unit/service/file_service_spec.rb +25 -0
- data/spec/unit/service/listing_spec.rb +100 -0
- data/spec/unit/service/odata_service_spec.rb +42 -0
- data/spec/unit/service/service_spec.rb +82 -0
- data/spec/unit/shell/shell_spec.rb +10 -0
- metadata +228 -0
@@ -0,0 +1,12 @@
|
|
1
|
+
--- !ruby/object:RGovData::ServiceListing
|
2
|
+
realm: :us
|
3
|
+
key: eqs7day-M1
|
4
|
+
name: "Worldwide M1+ Earthquakes, Past 7 Days"
|
5
|
+
description: "Real-time, worldwide earthquake list for the past 7 days"
|
6
|
+
publisher: US Geological Survey
|
7
|
+
keywords: ANSS, geologist, plate, real time, environment, catalog, federal data downloadaftershock, intensity scale, magnitude scale, seismogram, mercalli, hypocenter, federal datasets, hazard, geophysics, epicenter, subduction, GSN, quake, magnitude, seismograph, foreshock, seismic, focal mechanism, seismology, tectonics, intensity, Waveforms, tsunami, seismicity, fault, richter, Volcano, temblor, earthquake, aftershock, seismologist, landslide
|
8
|
+
license: public
|
9
|
+
info_uri: http://explore.data.gov/Geography-and-Environment/Worldwide-M1-Earthquakes-Past-7-Days/7tag-iwnu
|
10
|
+
uri: http://earthquake.usgs.gov/earthquakes/catalogs/eqs7day-M1.txt
|
11
|
+
type: :csv
|
12
|
+
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'rgovdata/catalog/dn'
|
2
|
+
require 'rgovdata/service/listing'
|
3
|
+
require 'rgovdata/service/service'
|
4
|
+
require 'rgovdata/service/odata_service'
|
5
|
+
require 'rgovdata/service/file_service'
|
6
|
+
require 'rgovdata/service/csv_service'
|
7
|
+
require 'rgovdata/service/dataset/dataset'
|
8
|
+
require 'rgovdata/service/dataset/odata_dataset'
|
9
|
+
require 'rgovdata/service/dataset/file_dataset'
|
10
|
+
require 'rgovdata/service/dataset/csv_dataset'
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
# This is the catalog class that describes a CSV file service DataSet
|
4
|
+
class RGovData::CsvDataSet < RGovData::FileDataSet
|
5
|
+
|
6
|
+
# Returns array of attributes that describe the records of the specific entity
|
7
|
+
# => overrides RGovData::Dn.attributes
|
8
|
+
def attributes
|
9
|
+
records unless @attributes # forces a load
|
10
|
+
@attributes
|
11
|
+
end
|
12
|
+
|
13
|
+
# Returns the value of the named +attribute+ from a recordset +row+
|
14
|
+
# => overrides RGovData::DataSet.attribute_value
|
15
|
+
def attribute_value(row,attribute)
|
16
|
+
row[attribute.to_s]
|
17
|
+
end
|
18
|
+
|
19
|
+
# Loads the native dataset (URI or File)
|
20
|
+
# => overrides RGovData::DataSet.load_instance
|
21
|
+
def load_instance
|
22
|
+
if uri =~ /^.+:\/\//
|
23
|
+
URI.parse( uri )
|
24
|
+
else
|
25
|
+
File.new(uri, "r")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
protected :load_instance
|
29
|
+
|
30
|
+
# Loads the native record set
|
31
|
+
# => overrides RGovData::DataSet.load_records
|
32
|
+
def load_records
|
33
|
+
csv = CSV.new(open(native_instance),{:headers=>:first_row}).read
|
34
|
+
@attributes = csv.headers
|
35
|
+
if limit.present?
|
36
|
+
csv.entries[0,limit]
|
37
|
+
else
|
38
|
+
csv.entries
|
39
|
+
end
|
40
|
+
end
|
41
|
+
protected :load_records
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# This is the catalog class that describes a generic Service DataSet
|
2
|
+
class RGovData::DataSet
|
3
|
+
include RGovData::Dn
|
4
|
+
attr_reader :options
|
5
|
+
attr_reader :service
|
6
|
+
attr_reader :native_service
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def load_datasets(service)
|
10
|
+
dataset_class = "RGovData::#{service.type.to_s.capitalize}DataSet".constantize
|
11
|
+
ds = []
|
12
|
+
service.dataset_keys.each do |dataset|
|
13
|
+
ds << dataset_class.new({:dataset_key=>dataset},service)
|
14
|
+
end
|
15
|
+
ds
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize(options,service)
|
20
|
+
@options = if options.is_a?(Hash)
|
21
|
+
OpenStruct.new(options)
|
22
|
+
else
|
23
|
+
OpenStruct.new
|
24
|
+
end
|
25
|
+
@service = service.dup # avoid circular dependencies
|
26
|
+
@native_service = @service.try(:native_instance)
|
27
|
+
end
|
28
|
+
|
29
|
+
# attribute accessors
|
30
|
+
def realm ; service.realm ; end
|
31
|
+
def service_key ; service.service_key ; end
|
32
|
+
def dataset_key ; options.dataset_key ; end
|
33
|
+
def uri ; service.uri ; end
|
34
|
+
# Returns the record limit currently imposed
|
35
|
+
def limit ; options.limit ; end
|
36
|
+
# Set the record limit to +value+
|
37
|
+
def limit=(value)
|
38
|
+
options.limit = value
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns array of attributes that describe the specific entity
|
42
|
+
# => overrides RGovData::Dn.meta_attributes
|
43
|
+
def meta_attributes
|
44
|
+
[:id,:realm,:service_key,:dataset_key]
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns the native dataset key
|
48
|
+
alias_method :native_dataset_key, :dataset_key
|
49
|
+
|
50
|
+
# Returns the native dataset instance
|
51
|
+
# If +reload+ is true, it re-initializes
|
52
|
+
def native_instance(reload = false)
|
53
|
+
@native_instance = if reload
|
54
|
+
load_instance
|
55
|
+
else
|
56
|
+
@native_instance || load_instance
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Returns the records
|
61
|
+
# If +reload+ is true, it re-initializes and re-runs the query
|
62
|
+
def records(reload = false)
|
63
|
+
@records = if reload
|
64
|
+
load_records
|
65
|
+
else
|
66
|
+
@records || load_records
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Returns the value of the named +attribute+ from a recordset +row+
|
71
|
+
# Purpose is to encapsulate differences in addressing attribute values
|
72
|
+
def attribute_value(row,attribute)
|
73
|
+
row.send(attribute)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Loads the native dataset
|
77
|
+
# => override this in specific dataset classes as required
|
78
|
+
def load_instance
|
79
|
+
nil
|
80
|
+
end
|
81
|
+
protected :load_instance
|
82
|
+
|
83
|
+
# Loads the native record set
|
84
|
+
# => override this in specific dataset classes as required
|
85
|
+
def load_records
|
86
|
+
nil
|
87
|
+
end
|
88
|
+
protected :load_records
|
89
|
+
|
90
|
+
|
91
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
# This is the catalog class that describes a generic file service DataSet
|
5
|
+
# Currently only handles text files
|
6
|
+
class RGovData::FileDataSet < RGovData::DataSet
|
7
|
+
|
8
|
+
# Returns array of attributes that describe the records of the specific entity
|
9
|
+
# Generic FileDataSets don't have attributes, returns a single selector for the row
|
10
|
+
# => overrides RGovData::Dn.attributes
|
11
|
+
def attributes
|
12
|
+
['row']
|
13
|
+
end
|
14
|
+
|
15
|
+
# Returns the value of the named +attribute+ from a recordset +row+
|
16
|
+
# Generic FileDataSets don't have attributes, so always return full row
|
17
|
+
# => overrides RGovData::DataSet.attribute_value
|
18
|
+
def attribute_value(row,attribute)
|
19
|
+
row
|
20
|
+
end
|
21
|
+
|
22
|
+
# Loads the native dataset (URI or File)
|
23
|
+
# => overrides RGovData::DataSet.load_instance
|
24
|
+
def load_instance
|
25
|
+
if uri =~ /^.+:\/\//
|
26
|
+
URI.parse( uri )
|
27
|
+
else
|
28
|
+
File.new(uri, "r")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
protected :load_instance
|
32
|
+
|
33
|
+
# Loads the native record set
|
34
|
+
# => overrides RGovData::DataSet.load_records
|
35
|
+
def load_records
|
36
|
+
# open(native_instance,"UserAgent" => "Mozilla/5.0")
|
37
|
+
strio = StringIO.new(open(native_instance).read)
|
38
|
+
if limit.present?
|
39
|
+
strio.to_a[0,limit]
|
40
|
+
else
|
41
|
+
strio
|
42
|
+
end
|
43
|
+
end
|
44
|
+
protected :load_records
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# This is the catalog class that describes an OData Service DataSet
|
2
|
+
class RGovData::OdataDataSet < RGovData::DataSet
|
3
|
+
|
4
|
+
# Returns the attribute names based on class meta-data
|
5
|
+
# => overrides RGovData::Dn.attributes
|
6
|
+
def attributes
|
7
|
+
@attributes ||= native_service.class_metadata[entity_name].keys
|
8
|
+
end
|
9
|
+
|
10
|
+
# Returns the reated OData entity name for this DataSet
|
11
|
+
# TODO: currently, this is a hack, as ruby_odata doesn't yet return the collection EntityType
|
12
|
+
def entity_name
|
13
|
+
dataset_key.gsub(/Set$/,'')
|
14
|
+
end
|
15
|
+
|
16
|
+
# Loads the native OData::QueryBuilder
|
17
|
+
# => overrides RGovData::DataSet.load_instance
|
18
|
+
def load_instance
|
19
|
+
native_service.send(native_dataset_key)
|
20
|
+
end
|
21
|
+
protected :load_instance
|
22
|
+
|
23
|
+
# Loads the native record set
|
24
|
+
# => overrides RGovData::DataSet.load_records
|
25
|
+
def load_records
|
26
|
+
native_instance(true)
|
27
|
+
native_instance.top(limit) if limit.present?
|
28
|
+
Array(service.native_instance.execute)
|
29
|
+
end
|
30
|
+
protected :load_records
|
31
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# This is the catalog class that describes a generic file-based service
|
2
|
+
class RGovData::FileService < RGovData::Service
|
3
|
+
|
4
|
+
# Returns an array of DataSets (keys) for the service
|
5
|
+
# => overrides RGovData::Service.dataset_keys
|
6
|
+
def dataset_keys
|
7
|
+
[type]
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# A ServiceListing is the metadata describing a specific service
|
2
|
+
# It encapsulates access to the underlying service
|
3
|
+
class RGovData::ServiceListing
|
4
|
+
attr_accessor :realm # realm for the service
|
5
|
+
attr_accessor :key # unique service name or id (within realm)
|
6
|
+
attr_accessor :name # human name of the service
|
7
|
+
attr_accessor :description # human description of the service
|
8
|
+
attr_accessor :keywords # keywords for the service
|
9
|
+
attr_accessor :publisher # service publisher name
|
10
|
+
attr_accessor :license # license covering the service if any
|
11
|
+
attr_accessor :info_uri # url to a web page about the service if any
|
12
|
+
attr_accessor :uri # url to the service interface
|
13
|
+
attr_accessor :type # service type [:odata,:csv,:file]
|
14
|
+
attr_accessor :credentialset # name of the credential set required
|
15
|
+
|
16
|
+
include RGovData::Dn
|
17
|
+
|
18
|
+
# Returns the service for this listing
|
19
|
+
def service
|
20
|
+
@service ||= RGovData::Service.get_instance(self)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns the service key
|
24
|
+
alias_method :service_key, :key
|
25
|
+
|
26
|
+
# Returns an array of DataSets for the service
|
27
|
+
# => delegate to service
|
28
|
+
def datasets
|
29
|
+
service.try(:datasets)
|
30
|
+
end
|
31
|
+
alias_method :records, :datasets
|
32
|
+
|
33
|
+
# Returns the dataset(s) matching +key+
|
34
|
+
# => delegate to service
|
35
|
+
def get_dataset(key)
|
36
|
+
service.try(:get_dataset,key)
|
37
|
+
end
|
38
|
+
# Returns the first dataset matching +key+
|
39
|
+
# => delegate to service
|
40
|
+
def find(id)
|
41
|
+
service.try(:find,id)
|
42
|
+
end
|
43
|
+
# Alias for find
|
44
|
+
alias_method :find_by_id, :find
|
45
|
+
|
46
|
+
|
47
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'ruby_odata'
|
2
|
+
|
3
|
+
# This is the catalog class that describes an OData Service
|
4
|
+
class RGovData::OdataService < RGovData::Service
|
5
|
+
|
6
|
+
# Returns an array of DataSets (keys) for the service
|
7
|
+
def dataset_keys
|
8
|
+
# @dataset_keys ||= native_instance.classes.keys
|
9
|
+
@dataset_keys ||= native_instance.instance_variable_get(:@collections)
|
10
|
+
end
|
11
|
+
|
12
|
+
# Returns the native service object if applicable
|
13
|
+
# By default, returns self
|
14
|
+
def native_instance
|
15
|
+
@native_instance ||= load_service
|
16
|
+
end
|
17
|
+
|
18
|
+
# Identifies, loads, and returns the native service instance
|
19
|
+
def load_service
|
20
|
+
clear
|
21
|
+
# currently forcing SSL verification off (seems to be required for projectnimbus)
|
22
|
+
# TODO: this should probably be a setting in the ServiceListing
|
23
|
+
rest_options = {:verify_ssl=>false}
|
24
|
+
if credentialset && credentialset != 'projectnimbus'
|
25
|
+
credentials = config.credentialsets[credentialset]
|
26
|
+
# merge basic auth
|
27
|
+
rest_options.merge!({ :username => credentials['username'], :password => credentials['password'] })
|
28
|
+
end
|
29
|
+
svc = OData::Service.new(uri, rest_options)
|
30
|
+
if credentialset && credentialset == 'projectnimbus'
|
31
|
+
credentials = config.credentialsets[credentialset]
|
32
|
+
# some special funk to insert headers for projectnimbus authentication
|
33
|
+
actual_rest_options = svc.instance_variable_get(:@rest_options)
|
34
|
+
rest_options = actual_rest_options.merge({:headers => {
|
35
|
+
'AccountKey' => credentials['AccountKey'], 'UniqueUserID' => credentials['UniqueUserID']
|
36
|
+
}})
|
37
|
+
svc.instance_variable_set(:@rest_options,rest_options)
|
38
|
+
end
|
39
|
+
svc
|
40
|
+
end
|
41
|
+
protected :load_service
|
42
|
+
|
43
|
+
# Clears current state
|
44
|
+
# TODO: move to Dn?
|
45
|
+
def clear
|
46
|
+
@datasets = @dataset_keys = @native_instance = nil
|
47
|
+
end
|
48
|
+
protected :clear
|
49
|
+
end
|
50
|
+
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
# A Service describes a specific service
|
4
|
+
# It encapsulates access to the underlying service implementation
|
5
|
+
class RGovData::Service
|
6
|
+
include RGovData::CommonConfig
|
7
|
+
include RGovData::Dn
|
8
|
+
|
9
|
+
attr_accessor :options
|
10
|
+
attr_reader :native_instance # the underlying native service object (if applicable)
|
11
|
+
|
12
|
+
class << self
|
13
|
+
# Returns the appropriate Service class for the given uri and type
|
14
|
+
# +options may be a RGovData::ServiceListing or a Hash
|
15
|
+
# If +options+ is a hash, it requires the following members:
|
16
|
+
# +uri+
|
17
|
+
# +type+
|
18
|
+
# +credentialset+
|
19
|
+
def get_instance(options={})
|
20
|
+
type = (options.class <= RGovData::ServiceListing) ? options.type : options[:type]
|
21
|
+
service_class = "RGovData::#{type.to_s.capitalize}Service".constantize
|
22
|
+
service_class.new(options)
|
23
|
+
rescue # invalid or not a supported type
|
24
|
+
nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# +new+ requires
|
29
|
+
# +options may be a RGovData::ServiceListing or a Hash
|
30
|
+
# If +options+ is a hash, it requires the following members:
|
31
|
+
# +uri+
|
32
|
+
# +type+
|
33
|
+
# +transport+
|
34
|
+
# +credentialset+
|
35
|
+
def initialize(options)
|
36
|
+
@options = if options.is_a?(Hash)
|
37
|
+
OpenStruct.new(options)
|
38
|
+
elsif options.class <= RGovData::ServiceListing
|
39
|
+
options.dup # avoid circular refs
|
40
|
+
else
|
41
|
+
OpenStruct.new
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# attribute accessors
|
46
|
+
def realm ; options.realm ; end
|
47
|
+
def service_key ; options.service_key ; end
|
48
|
+
def uri ; options.uri ; end
|
49
|
+
def type ; options.type ; end
|
50
|
+
def transport ; options.transport ; end
|
51
|
+
def credentialset ; options.credentialset ; end
|
52
|
+
|
53
|
+
# Returns array of attributes that describe the specific entity
|
54
|
+
# => overrides RGovData::Dn.meta_attributes
|
55
|
+
def meta_attributes
|
56
|
+
[:id,:realm,:service_key,:uri,:type,:transport,:credentialset]
|
57
|
+
end
|
58
|
+
|
59
|
+
# Returns the native service object if applicable
|
60
|
+
# By default, returns self
|
61
|
+
def native_instance
|
62
|
+
@native_instance || self
|
63
|
+
end
|
64
|
+
|
65
|
+
# Returns an array of DataSets for the service
|
66
|
+
# => may need to be overridden for a specific service type
|
67
|
+
def datasets
|
68
|
+
dataset_class = "RGovData::#{type.to_s.capitalize}DataSet".constantize
|
69
|
+
@datasets ||= dataset_class.load_datasets(self)
|
70
|
+
rescue
|
71
|
+
[]
|
72
|
+
end
|
73
|
+
|
74
|
+
# Returns an array of DataSets (keys) for the service
|
75
|
+
# => needs to be overridden for each service type
|
76
|
+
def dataset_keys
|
77
|
+
[]
|
78
|
+
end
|
79
|
+
|
80
|
+
# Returns the dataset(s) matching +key+
|
81
|
+
def get_dataset(key)
|
82
|
+
return nil unless datasets && !datasets.empty?
|
83
|
+
matches = datasets.select {|s| s.dataset_key =~ /#{key}/}
|
84
|
+
matches.count == 1 ? matches.first : matches
|
85
|
+
end
|
86
|
+
# Returns the first dataset matching +key+
|
87
|
+
def find(id)
|
88
|
+
Array(get_dataset(id)).first
|
89
|
+
end
|
90
|
+
# Alias for find
|
91
|
+
alias_method :find_by_id, :find
|
92
|
+
|
93
|
+
end
|