datapackage 0.2.5 → 0.2.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/datapackage.rb +18 -0
- data/lib/datapackage/defaults.rb +27 -0
- data/lib/datapackage/exceptions.rb +8 -0
- data/lib/datapackage/helpers.rb +98 -0
- data/lib/datapackage/package.rb +212 -0
- data/lib/datapackage/profile.rb +62 -0
- data/lib/datapackage/registry.rb +36 -0
- data/lib/datapackage/resource.rb +86 -0
- data/lib/datapackage/version.rb +3 -0
- data/lib/profiles/data-package.json +541 -0
- data/lib/profiles/data-resource.json +278 -0
- data/lib/profiles/fiscal-data-package.json +4361 -0
- data/lib/profiles/registry.json +44 -0
- data/lib/profiles/table-schema.json +1560 -0
- data/lib/profiles/tabular-data-package.json +2199 -0
- data/lib/profiles/tabular-data-resource.json +1936 -0
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 18b6d480843853daa822de909d242c78b40ed762
|
4
|
+
data.tar.gz: 4c6004ea6559656434a5759038cc0163c0762520
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e3c5286265b393c7c7ecd3563515bccfb67f2c20a54b6c15e2d4ad1ac00f6c55b69f18c591e38397958a6bf23cd9d0c88deda81ae39e06cf6d8532607f31eeda
|
7
|
+
data.tar.gz: d09058baf930fda7eb99d688f9c9fb405bc1910563d0b1e48763400dc877c27a1540c69eb24e292827e511500612156b37022cbf067622bb2d649fa3e9e9f84b
|
data/lib/datapackage.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'uri'
|
3
|
+
require 'net/http'
|
4
|
+
require 'csv'
|
5
|
+
require 'json'
|
6
|
+
require 'json-schema'
|
7
|
+
require 'zip'
|
8
|
+
require 'ruby_dig'
|
9
|
+
require 'tableschema'
|
10
|
+
|
11
|
+
require 'datapackage/defaults'
|
12
|
+
require 'datapackage/helpers'
|
13
|
+
require 'datapackage/version'
|
14
|
+
require 'datapackage/exceptions'
|
15
|
+
require 'datapackage/profile'
|
16
|
+
require 'datapackage/resource'
|
17
|
+
require 'datapackage/package'
|
18
|
+
require 'datapackage/registry'
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module DataPackage
|
2
|
+
DEFAULTS = {
|
3
|
+
resource: {
|
4
|
+
profile: 'data-resource',
|
5
|
+
tabular_profile: 'tabular-data-resource',
|
6
|
+
encoding: 'utf-8',
|
7
|
+
},
|
8
|
+
package: {
|
9
|
+
profile: 'data-package',
|
10
|
+
},
|
11
|
+
schema: {
|
12
|
+
format: 'default',
|
13
|
+
type: 'string',
|
14
|
+
missing_values: [''],
|
15
|
+
},
|
16
|
+
dialect: {
|
17
|
+
delimiter: ',',
|
18
|
+
doubleQuote: true,
|
19
|
+
lineTerminator: '\r\n',
|
20
|
+
quoteChar: '"',
|
21
|
+
escapeChar: '\\',
|
22
|
+
skipInitialSpace: true,
|
23
|
+
header: true,
|
24
|
+
caseSensitiveHeader: false,
|
25
|
+
},
|
26
|
+
}.freeze
|
27
|
+
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
module DataPackage
|
2
|
+
class Exception < ::Exception; end
|
3
|
+
class RegistryException < Exception; end
|
4
|
+
class ResourceException < Exception; end
|
5
|
+
class ProfileException < Exception; end
|
6
|
+
class PackageException < Exception; end
|
7
|
+
class ValidationError < Exception; end
|
8
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
module DataPackage
|
2
|
+
module Helpers
|
3
|
+
|
4
|
+
# Dereference a resource that can be a URL or path to a JSON file or a hash
|
5
|
+
# Returns a Hash with all values that are URLs or paths dereferenced
|
6
|
+
def dereference_descriptor(resource, base_path: nil, reference_fields: nil)
|
7
|
+
options = {
|
8
|
+
base_path: base_path,
|
9
|
+
reference_fields: reference_fields,
|
10
|
+
}
|
11
|
+
case resource
|
12
|
+
when Hash
|
13
|
+
resource.inject({}) do |new_resource, (key, val)|
|
14
|
+
if reference_fields.nil? || reference_fields.include?(key)
|
15
|
+
new_resource[key] = dereference_descriptor(val, **options)
|
16
|
+
else
|
17
|
+
new_resource[key] = val
|
18
|
+
end
|
19
|
+
new_resource
|
20
|
+
end
|
21
|
+
when Enumerable
|
22
|
+
resource.map{ |el| dereference_descriptor(el, **options)}
|
23
|
+
when String
|
24
|
+
begin
|
25
|
+
resolve_json_reference(resource, deep_dereference: true, base_path: base_path)
|
26
|
+
rescue Errno::ENOENT
|
27
|
+
resource
|
28
|
+
end
|
29
|
+
else
|
30
|
+
resource
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Resolve a reference to a JSON file; Returns the JSON as hash
|
35
|
+
# Raises JSON::ParserError, OpenURI::HTTPError, SocketError or TypeError for invalid references
|
36
|
+
def resolve_json_reference(reference, deep_dereference: false, base_path: nil)
|
37
|
+
# Try to extract JSON from file or webpage
|
38
|
+
reference = join_paths(base_path, reference)
|
39
|
+
resolved_reference = load_json(reference)
|
40
|
+
if deep_dereference == true
|
41
|
+
dereference_descriptor(resolved_reference, base_path: base_path)
|
42
|
+
else
|
43
|
+
resolved_reference
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Load JSON from path or URL;
|
48
|
+
# Raises: Errno::ENOENT, OpenURI::HTTPError, SocketError, JSON::ParserError
|
49
|
+
def load_json(reference)
|
50
|
+
JSON.parse open(reference).read
|
51
|
+
end
|
52
|
+
|
53
|
+
def base_path(path_or_url)
|
54
|
+
path_or_url = path_or_url.to_s
|
55
|
+
if path_or_url.empty?
|
56
|
+
nil
|
57
|
+
elsif path_or_url =~ /\A#{URI::regexp}\z/
|
58
|
+
uri = URI.parse path_or_url
|
59
|
+
return "#{uri.scheme}://#{uri.host}#{File.dirname uri.path}".chomp('/')
|
60
|
+
else
|
61
|
+
if File.directory?(path_or_url)
|
62
|
+
return path_or_url
|
63
|
+
else
|
64
|
+
return File.expand_path File.dirname path_or_url
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def join_paths(base_path, resource)
|
70
|
+
if base_path.nil? || base_path.empty?
|
71
|
+
resource
|
72
|
+
elsif base_path =~ /\A#{URI::regexp}\z/
|
73
|
+
URI.join(base_path, resource).to_s
|
74
|
+
elsif File.directory?(base_path)
|
75
|
+
File.join(base_path, resource).to_s
|
76
|
+
elsif File.file?(base_path)
|
77
|
+
base_path
|
78
|
+
else
|
79
|
+
resource
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def is_fully_qualified_url?(string)
|
84
|
+
uri = URI.parse(string)
|
85
|
+
uri.is_a?(URI::HTTP) && !uri.host.nil?
|
86
|
+
rescue URI::InvalidURIError
|
87
|
+
false
|
88
|
+
end
|
89
|
+
|
90
|
+
def is_safe_path?(string)
|
91
|
+
path = Pathname.new(string)
|
92
|
+
return false if path.absolute?
|
93
|
+
return false unless /^\.+$/.match(path.to_s.split('/').first).nil?
|
94
|
+
true
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,212 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
|
3
|
+
module DataPackage
|
4
|
+
class Package < Hash
|
5
|
+
include DataPackage::Helpers
|
6
|
+
|
7
|
+
attr_reader :opts, :errors, :profile, :dead_resources
|
8
|
+
|
9
|
+
# Parse or create a data package
|
10
|
+
# Supports reading data from JSON file, directory, and a URL
|
11
|
+
# descriptor:: Hash or String
|
12
|
+
# opts:: Options used to customize reading and parsing
|
13
|
+
def initialize(descriptor = nil, opts: {})
|
14
|
+
@opts = opts
|
15
|
+
@dead_resources = []
|
16
|
+
self.merge! parse_package(descriptor)
|
17
|
+
@profile = DataPackage::Profile.new(self.fetch('profile', DataPackage::DEFAULTS[:package][:profile]))
|
18
|
+
self['profile'] = @profile.name
|
19
|
+
define_properties!
|
20
|
+
load_resources!
|
21
|
+
rescue OpenURI::HTTPError, SocketError => e
|
22
|
+
raise PackageException.new "Package URL returned #{e.message}"
|
23
|
+
rescue JSON::ParserError
|
24
|
+
raise PackageException.new 'Package descriptor is not valid JSON'
|
25
|
+
end
|
26
|
+
|
27
|
+
def descriptor
|
28
|
+
self.to_h
|
29
|
+
end
|
30
|
+
|
31
|
+
# Returns the directory for a local file package or base url for a remote
|
32
|
+
# Returns nil for an in-memory object (because it has no base as yet)
|
33
|
+
def base
|
34
|
+
# user can override base
|
35
|
+
return @opts[:base] if @opts[:base]
|
36
|
+
return '' unless @location
|
37
|
+
# work out base directory or uri
|
38
|
+
if local?
|
39
|
+
return File.dirname(@location)
|
40
|
+
else
|
41
|
+
return @location.split('/')[0..-2].join('/')
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# Is this a local package? Returns true if created from an in-memory object or a file/directory reference
|
46
|
+
def local?
|
47
|
+
return @local if @local
|
48
|
+
return false if @location =~ /\A#{URI::regexp}\z/
|
49
|
+
true
|
50
|
+
end
|
51
|
+
|
52
|
+
def resources
|
53
|
+
update_resources!
|
54
|
+
self['resources']
|
55
|
+
end
|
56
|
+
|
57
|
+
def resource_names
|
58
|
+
update_resources!
|
59
|
+
self['resources'].map{|res| res.name}
|
60
|
+
end
|
61
|
+
|
62
|
+
def valid?
|
63
|
+
return false unless @profile.valid?(self)
|
64
|
+
return false if self['resources'].map{ |resource| resource.valid? }.include?(false)
|
65
|
+
true
|
66
|
+
end
|
67
|
+
|
68
|
+
alias :valid :valid?
|
69
|
+
|
70
|
+
def validate
|
71
|
+
@profile.validate(self)
|
72
|
+
self['resources'].each { |resource| resource.validate }
|
73
|
+
true
|
74
|
+
end
|
75
|
+
|
76
|
+
def iter_errors
|
77
|
+
errors = @profile.iter_errors(self){ |err| err }
|
78
|
+
self['resources'].each do |resource|
|
79
|
+
resource.iter_errors{ |err| errors << err }
|
80
|
+
end
|
81
|
+
errors.each{ |error| yield error }
|
82
|
+
end
|
83
|
+
|
84
|
+
def add_resource(resource)
|
85
|
+
resource = load_resource(resource)
|
86
|
+
self['resources'].push(resource)
|
87
|
+
begin
|
88
|
+
self.validate
|
89
|
+
resource
|
90
|
+
rescue DataPackage::ValidationError
|
91
|
+
self['resources'].pop
|
92
|
+
nil
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def remove_resource(resource_name)
|
97
|
+
update_resources!
|
98
|
+
resource = get_resource(resource_name)
|
99
|
+
self['resources'].reject!{ |resource| resource.name == resource_name }
|
100
|
+
resource
|
101
|
+
end
|
102
|
+
|
103
|
+
def get_resource(resource_name)
|
104
|
+
update_resources!
|
105
|
+
self['resources'].find{ |resource| resource.name == resource_name }
|
106
|
+
end
|
107
|
+
|
108
|
+
def save(target=@location)
|
109
|
+
update_resources!
|
110
|
+
File.open(target, "w") { |file| file << JSON.pretty_generate(self) }
|
111
|
+
true
|
112
|
+
end
|
113
|
+
|
114
|
+
def property(property, default = nil)
|
115
|
+
self[property] || default
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
def define_properties!
|
121
|
+
(@profile['properties'] || {}).each do |k, v|
|
122
|
+
next if k == 'resources' || k == 'profile'
|
123
|
+
define_singleton_method("#{k.to_sym}=", proc { |p| set_property(k, p) })
|
124
|
+
define_singleton_method(k.to_sym.to_s, proc { property k, default_value(v) })
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def load_resources!
|
129
|
+
self['resources'] ||= []
|
130
|
+
update_resources!
|
131
|
+
end
|
132
|
+
|
133
|
+
def update_resources!
|
134
|
+
self['resources'].map! do |resource|
|
135
|
+
begin
|
136
|
+
load_resource(resource)
|
137
|
+
rescue ResourceException
|
138
|
+
@dead_resources << resource
|
139
|
+
nil
|
140
|
+
end
|
141
|
+
end.compact!
|
142
|
+
end
|
143
|
+
|
144
|
+
def load_resource(resource)
|
145
|
+
if resource.is_a?(Resource)
|
146
|
+
resource
|
147
|
+
else
|
148
|
+
Resource.new(resource, base)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def default_value(field_data)
|
153
|
+
case field_data['type']
|
154
|
+
when 'array'
|
155
|
+
[]
|
156
|
+
when 'object'
|
157
|
+
{}
|
158
|
+
else
|
159
|
+
nil
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def set_property(key, value)
|
164
|
+
self[key] = value
|
165
|
+
end
|
166
|
+
|
167
|
+
def parse_package(descriptor)
|
168
|
+
# TODO: base directory/url
|
169
|
+
if descriptor.nil?
|
170
|
+
{}
|
171
|
+
elsif descriptor.class == Hash
|
172
|
+
descriptor
|
173
|
+
else
|
174
|
+
read_package(descriptor)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def read_package(descriptor)
|
179
|
+
if File.extname(descriptor) == '.zip'
|
180
|
+
unzip_package(descriptor)
|
181
|
+
else
|
182
|
+
@location = descriptor.to_s
|
183
|
+
load_json(descriptor)
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def unzip_package(descriptor)
|
188
|
+
descriptor = write_to_tempfile(descriptor) if descriptor =~ /\A#{URI::regexp}\z/
|
189
|
+
dir = Dir.mktmpdir
|
190
|
+
package = {}
|
191
|
+
Zip::File.open(descriptor) do |zip_file|
|
192
|
+
# Extract all the files
|
193
|
+
zip_file.each { |entry| entry.extract("#{dir}/#{File.basename entry.name}") }
|
194
|
+
# Get and parse the datapackage metadata
|
195
|
+
entry = zip_file.glob("*/#{@opts[:default_filename] || 'datapackage.json'}").first
|
196
|
+
package = JSON.parse(entry.get_input_stream.read)
|
197
|
+
end
|
198
|
+
# Set the base dir to the directory we unzipped to
|
199
|
+
@opts[:base] = dir
|
200
|
+
# This is now a local file, not a URL
|
201
|
+
@local = true
|
202
|
+
package
|
203
|
+
end
|
204
|
+
|
205
|
+
def write_to_tempfile(url)
|
206
|
+
tempfile = Tempfile.new('datapackage')
|
207
|
+
tempfile.write(open(url).read)
|
208
|
+
tempfile.rewind
|
209
|
+
tempfile
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module DataPackage
|
2
|
+
class Profile < Hash
|
3
|
+
include DataPackage::Helpers
|
4
|
+
|
5
|
+
attr_reader :name, :registry
|
6
|
+
|
7
|
+
def initialize(descriptor)
|
8
|
+
unless descriptor.is_a?(String)
|
9
|
+
raise ProfileException.new 'Profile must be a URL or registry identifier'
|
10
|
+
end
|
11
|
+
@name = descriptor
|
12
|
+
if is_fully_qualified_url?(descriptor)
|
13
|
+
self.merge!(load_json(descriptor))
|
14
|
+
else
|
15
|
+
self.merge!(get_profile_from_registry(descriptor))
|
16
|
+
end
|
17
|
+
rescue OpenURI::HTTPError, SocketError => e
|
18
|
+
raise ProfileException.new "Profile URL returned #{e.message}"
|
19
|
+
rescue JSON::ParserError
|
20
|
+
raise ProfileException.new 'Profile is not valid JSON'
|
21
|
+
end
|
22
|
+
|
23
|
+
def jsonschema
|
24
|
+
self.to_h
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns true if there are no errors in data, false if there are
|
28
|
+
def valid?(data)
|
29
|
+
JSON::Validator.validate(self, data)
|
30
|
+
end
|
31
|
+
|
32
|
+
alias :valid :valid?
|
33
|
+
|
34
|
+
# Validate data against this profile. Returns true or raises DataPackage::ValidationError
|
35
|
+
def validate(data)
|
36
|
+
JSON::Validator.validate!(self, data)
|
37
|
+
rescue JSON::Schema::ValidationError => e
|
38
|
+
raise DataPackage::ValidationError.new(e.message)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Lazily yields each ValidationError raised for data
|
42
|
+
def iter_errors(data)
|
43
|
+
JSON::Validator.fully_validate(self, data).each{ |error| yield error }
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def get_profile_from_registry(descriptor)
|
49
|
+
@registry = DataPackage::Registry.new
|
50
|
+
profile_metadata = registry.profiles.fetch(descriptor)
|
51
|
+
if profile_metadata.fetch('schema_path', nil)
|
52
|
+
profile_path = join_paths(base_path(registry.path), profile_metadata['schema_path'])
|
53
|
+
else
|
54
|
+
profile_path = profile_metadata['schema']
|
55
|
+
end
|
56
|
+
load_json(profile_path)
|
57
|
+
rescue KeyError
|
58
|
+
raise ProfileException.new "Couldn't find profile with id `#{descriptor}` in registry"
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module DataPackage
|
2
|
+
# Allow loading Data Package profiles from the official registry.
|
3
|
+
|
4
|
+
class Registry
|
5
|
+
include DataPackage::Helpers
|
6
|
+
|
7
|
+
attr_reader :path, :profiles
|
8
|
+
|
9
|
+
DEFAULT_REGISTRY_URL = 'https://specs.frictionlessdata.io/schemas/registry.json'.freeze
|
10
|
+
DEFAULT_REGISTRY_PATH = File.join(File.expand_path(File.dirname(__FILE__)), '..', 'profiles', 'registry.json').freeze
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@path = DEFAULT_REGISTRY_PATH
|
14
|
+
@profiles = get_registry(DEFAULT_REGISTRY_PATH)
|
15
|
+
rescue Errno::ENOENT
|
16
|
+
raise RegistryException.new 'Registry path is not valid'
|
17
|
+
rescue OpenURI::HTTPError, SocketError => e
|
18
|
+
raise RegistryException.new "Registry URL returned #{e.message}"
|
19
|
+
rescue JSON::ParserError
|
20
|
+
raise RegistryException.new 'Registry descriptor is not valid JSON'
|
21
|
+
rescue KeyError
|
22
|
+
raise RegistryException.new 'Property `id` is mandatory for profiles'
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def get_registry(descriptor)
|
28
|
+
resources = load_json(descriptor)
|
29
|
+
resources.reduce({}) do |registry, resource|
|
30
|
+
registry[resource['id']] = resource
|
31
|
+
registry
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|