datapackage 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/datapackage.rb +18 -0
- data/lib/datapackage/defaults.rb +27 -0
- data/lib/datapackage/exceptions.rb +8 -0
- data/lib/datapackage/helpers.rb +98 -0
- data/lib/datapackage/package.rb +212 -0
- data/lib/datapackage/profile.rb +62 -0
- data/lib/datapackage/registry.rb +36 -0
- data/lib/datapackage/resource.rb +86 -0
- data/lib/datapackage/version.rb +3 -0
- data/lib/profiles/data-package.json +541 -0
- data/lib/profiles/data-resource.json +278 -0
- data/lib/profiles/fiscal-data-package.json +4361 -0
- data/lib/profiles/registry.json +44 -0
- data/lib/profiles/table-schema.json +1560 -0
- data/lib/profiles/tabular-data-package.json +2199 -0
- data/lib/profiles/tabular-data-resource.json +1936 -0
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 18b6d480843853daa822de909d242c78b40ed762
|
4
|
+
data.tar.gz: 4c6004ea6559656434a5759038cc0163c0762520
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e3c5286265b393c7c7ecd3563515bccfb67f2c20a54b6c15e2d4ad1ac00f6c55b69f18c591e38397958a6bf23cd9d0c88deda81ae39e06cf6d8532607f31eeda
|
7
|
+
data.tar.gz: d09058baf930fda7eb99d688f9c9fb405bc1910563d0b1e48763400dc877c27a1540c69eb24e292827e511500612156b37022cbf067622bb2d649fa3e9e9f84b
|
data/lib/datapackage.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'uri'
|
3
|
+
require 'net/http'
|
4
|
+
require 'csv'
|
5
|
+
require 'json'
|
6
|
+
require 'json-schema'
|
7
|
+
require 'zip'
|
8
|
+
require 'ruby_dig'
|
9
|
+
require 'tableschema'
|
10
|
+
|
11
|
+
require 'datapackage/defaults'
|
12
|
+
require 'datapackage/helpers'
|
13
|
+
require 'datapackage/version'
|
14
|
+
require 'datapackage/exceptions'
|
15
|
+
require 'datapackage/profile'
|
16
|
+
require 'datapackage/resource'
|
17
|
+
require 'datapackage/package'
|
18
|
+
require 'datapackage/registry'
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module DataPackage
|
2
|
+
DEFAULTS = {
|
3
|
+
resource: {
|
4
|
+
profile: 'data-resource',
|
5
|
+
tabular_profile: 'tabular-data-resource',
|
6
|
+
encoding: 'utf-8',
|
7
|
+
},
|
8
|
+
package: {
|
9
|
+
profile: 'data-package',
|
10
|
+
},
|
11
|
+
schema: {
|
12
|
+
format: 'default',
|
13
|
+
type: 'string',
|
14
|
+
missing_values: [''],
|
15
|
+
},
|
16
|
+
dialect: {
|
17
|
+
delimiter: ',',
|
18
|
+
doubleQuote: true,
|
19
|
+
lineTerminator: '\r\n',
|
20
|
+
quoteChar: '"',
|
21
|
+
escapeChar: '\\',
|
22
|
+
skipInitialSpace: true,
|
23
|
+
header: true,
|
24
|
+
caseSensitiveHeader: false,
|
25
|
+
},
|
26
|
+
}.freeze
|
27
|
+
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
module DataPackage
|
2
|
+
class Exception < ::Exception; end
|
3
|
+
class RegistryException < Exception; end
|
4
|
+
class ResourceException < Exception; end
|
5
|
+
class ProfileException < Exception; end
|
6
|
+
class PackageException < Exception; end
|
7
|
+
class ValidationError < Exception; end
|
8
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
module DataPackage
|
2
|
+
module Helpers
|
3
|
+
|
4
|
+
# Dereference a resource that can be a URL or path to a JSON file or a hash
|
5
|
+
# Returns a Hash with all values that are URLs or paths dereferenced
|
6
|
+
def dereference_descriptor(resource, base_path: nil, reference_fields: nil)
|
7
|
+
options = {
|
8
|
+
base_path: base_path,
|
9
|
+
reference_fields: reference_fields,
|
10
|
+
}
|
11
|
+
case resource
|
12
|
+
when Hash
|
13
|
+
resource.inject({}) do |new_resource, (key, val)|
|
14
|
+
if reference_fields.nil? || reference_fields.include?(key)
|
15
|
+
new_resource[key] = dereference_descriptor(val, **options)
|
16
|
+
else
|
17
|
+
new_resource[key] = val
|
18
|
+
end
|
19
|
+
new_resource
|
20
|
+
end
|
21
|
+
when Enumerable
|
22
|
+
resource.map{ |el| dereference_descriptor(el, **options)}
|
23
|
+
when String
|
24
|
+
begin
|
25
|
+
resolve_json_reference(resource, deep_dereference: true, base_path: base_path)
|
26
|
+
rescue Errno::ENOENT
|
27
|
+
resource
|
28
|
+
end
|
29
|
+
else
|
30
|
+
resource
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Resolve a reference to a JSON file; Returns the JSON as hash
|
35
|
+
# Raises JSON::ParserError, OpenURI::HTTPError, SocketError or TypeError for invalid references
|
36
|
+
def resolve_json_reference(reference, deep_dereference: false, base_path: nil)
|
37
|
+
# Try to extract JSON from file or webpage
|
38
|
+
reference = join_paths(base_path, reference)
|
39
|
+
resolved_reference = load_json(reference)
|
40
|
+
if deep_dereference == true
|
41
|
+
dereference_descriptor(resolved_reference, base_path: base_path)
|
42
|
+
else
|
43
|
+
resolved_reference
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Load JSON from path or URL;
|
48
|
+
# Raises: Errno::ENOENT, OpenURI::HTTPError, SocketError, JSON::ParserError
|
49
|
+
def load_json(reference)
|
50
|
+
JSON.parse open(reference).read
|
51
|
+
end
|
52
|
+
|
53
|
+
def base_path(path_or_url)
|
54
|
+
path_or_url = path_or_url.to_s
|
55
|
+
if path_or_url.empty?
|
56
|
+
nil
|
57
|
+
elsif path_or_url =~ /\A#{URI::regexp}\z/
|
58
|
+
uri = URI.parse path_or_url
|
59
|
+
return "#{uri.scheme}://#{uri.host}#{File.dirname uri.path}".chomp('/')
|
60
|
+
else
|
61
|
+
if File.directory?(path_or_url)
|
62
|
+
return path_or_url
|
63
|
+
else
|
64
|
+
return File.expand_path File.dirname path_or_url
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def join_paths(base_path, resource)
|
70
|
+
if base_path.nil? || base_path.empty?
|
71
|
+
resource
|
72
|
+
elsif base_path =~ /\A#{URI::regexp}\z/
|
73
|
+
URI.join(base_path, resource).to_s
|
74
|
+
elsif File.directory?(base_path)
|
75
|
+
File.join(base_path, resource).to_s
|
76
|
+
elsif File.file?(base_path)
|
77
|
+
base_path
|
78
|
+
else
|
79
|
+
resource
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def is_fully_qualified_url?(string)
|
84
|
+
uri = URI.parse(string)
|
85
|
+
uri.is_a?(URI::HTTP) && !uri.host.nil?
|
86
|
+
rescue URI::InvalidURIError
|
87
|
+
false
|
88
|
+
end
|
89
|
+
|
90
|
+
def is_safe_path?(string)
|
91
|
+
path = Pathname.new(string)
|
92
|
+
return false if path.absolute?
|
93
|
+
return false unless /^\.+$/.match(path.to_s.split('/').first).nil?
|
94
|
+
true
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,212 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
|
3
|
+
module DataPackage
|
4
|
+
class Package < Hash
|
5
|
+
include DataPackage::Helpers
|
6
|
+
|
7
|
+
attr_reader :opts, :errors, :profile, :dead_resources
|
8
|
+
|
9
|
+
# Parse or create a data package
|
10
|
+
# Supports reading data from JSON file, directory, and a URL
|
11
|
+
# descriptor:: Hash or String
|
12
|
+
# opts:: Options used to customize reading and parsing
|
13
|
+
def initialize(descriptor = nil, opts: {})
|
14
|
+
@opts = opts
|
15
|
+
@dead_resources = []
|
16
|
+
self.merge! parse_package(descriptor)
|
17
|
+
@profile = DataPackage::Profile.new(self.fetch('profile', DataPackage::DEFAULTS[:package][:profile]))
|
18
|
+
self['profile'] = @profile.name
|
19
|
+
define_properties!
|
20
|
+
load_resources!
|
21
|
+
rescue OpenURI::HTTPError, SocketError => e
|
22
|
+
raise PackageException.new "Package URL returned #{e.message}"
|
23
|
+
rescue JSON::ParserError
|
24
|
+
raise PackageException.new 'Package descriptor is not valid JSON'
|
25
|
+
end
|
26
|
+
|
27
|
+
def descriptor
|
28
|
+
self.to_h
|
29
|
+
end
|
30
|
+
|
31
|
+
# Returns the directory for a local file package or base url for a remote
|
32
|
+
# Returns nil for an in-memory object (because it has no base as yet)
|
33
|
+
def base
|
34
|
+
# user can override base
|
35
|
+
return @opts[:base] if @opts[:base]
|
36
|
+
return '' unless @location
|
37
|
+
# work out base directory or uri
|
38
|
+
if local?
|
39
|
+
return File.dirname(@location)
|
40
|
+
else
|
41
|
+
return @location.split('/')[0..-2].join('/')
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# Is this a local package? Returns true if created from an in-memory object or a file/directory reference
|
46
|
+
def local?
|
47
|
+
return @local if @local
|
48
|
+
return false if @location =~ /\A#{URI::regexp}\z/
|
49
|
+
true
|
50
|
+
end
|
51
|
+
|
52
|
+
def resources
|
53
|
+
update_resources!
|
54
|
+
self['resources']
|
55
|
+
end
|
56
|
+
|
57
|
+
def resource_names
|
58
|
+
update_resources!
|
59
|
+
self['resources'].map{|res| res.name}
|
60
|
+
end
|
61
|
+
|
62
|
+
def valid?
|
63
|
+
return false unless @profile.valid?(self)
|
64
|
+
return false if self['resources'].map{ |resource| resource.valid? }.include?(false)
|
65
|
+
true
|
66
|
+
end
|
67
|
+
|
68
|
+
alias :valid :valid?
|
69
|
+
|
70
|
+
def validate
|
71
|
+
@profile.validate(self)
|
72
|
+
self['resources'].each { |resource| resource.validate }
|
73
|
+
true
|
74
|
+
end
|
75
|
+
|
76
|
+
def iter_errors
|
77
|
+
errors = @profile.iter_errors(self){ |err| err }
|
78
|
+
self['resources'].each do |resource|
|
79
|
+
resource.iter_errors{ |err| errors << err }
|
80
|
+
end
|
81
|
+
errors.each{ |error| yield error }
|
82
|
+
end
|
83
|
+
|
84
|
+
def add_resource(resource)
|
85
|
+
resource = load_resource(resource)
|
86
|
+
self['resources'].push(resource)
|
87
|
+
begin
|
88
|
+
self.validate
|
89
|
+
resource
|
90
|
+
rescue DataPackage::ValidationError
|
91
|
+
self['resources'].pop
|
92
|
+
nil
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def remove_resource(resource_name)
|
97
|
+
update_resources!
|
98
|
+
resource = get_resource(resource_name)
|
99
|
+
self['resources'].reject!{ |resource| resource.name == resource_name }
|
100
|
+
resource
|
101
|
+
end
|
102
|
+
|
103
|
+
def get_resource(resource_name)
|
104
|
+
update_resources!
|
105
|
+
self['resources'].find{ |resource| resource.name == resource_name }
|
106
|
+
end
|
107
|
+
|
108
|
+
def save(target=@location)
|
109
|
+
update_resources!
|
110
|
+
File.open(target, "w") { |file| file << JSON.pretty_generate(self) }
|
111
|
+
true
|
112
|
+
end
|
113
|
+
|
114
|
+
def property(property, default = nil)
|
115
|
+
self[property] || default
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
def define_properties!
|
121
|
+
(@profile['properties'] || {}).each do |k, v|
|
122
|
+
next if k == 'resources' || k == 'profile'
|
123
|
+
define_singleton_method("#{k.to_sym}=", proc { |p| set_property(k, p) })
|
124
|
+
define_singleton_method(k.to_sym.to_s, proc { property k, default_value(v) })
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def load_resources!
|
129
|
+
self['resources'] ||= []
|
130
|
+
update_resources!
|
131
|
+
end
|
132
|
+
|
133
|
+
def update_resources!
|
134
|
+
self['resources'].map! do |resource|
|
135
|
+
begin
|
136
|
+
load_resource(resource)
|
137
|
+
rescue ResourceException
|
138
|
+
@dead_resources << resource
|
139
|
+
nil
|
140
|
+
end
|
141
|
+
end.compact!
|
142
|
+
end
|
143
|
+
|
144
|
+
def load_resource(resource)
|
145
|
+
if resource.is_a?(Resource)
|
146
|
+
resource
|
147
|
+
else
|
148
|
+
Resource.new(resource, base)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def default_value(field_data)
|
153
|
+
case field_data['type']
|
154
|
+
when 'array'
|
155
|
+
[]
|
156
|
+
when 'object'
|
157
|
+
{}
|
158
|
+
else
|
159
|
+
nil
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def set_property(key, value)
|
164
|
+
self[key] = value
|
165
|
+
end
|
166
|
+
|
167
|
+
def parse_package(descriptor)
|
168
|
+
# TODO: base directory/url
|
169
|
+
if descriptor.nil?
|
170
|
+
{}
|
171
|
+
elsif descriptor.class == Hash
|
172
|
+
descriptor
|
173
|
+
else
|
174
|
+
read_package(descriptor)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def read_package(descriptor)
|
179
|
+
if File.extname(descriptor) == '.zip'
|
180
|
+
unzip_package(descriptor)
|
181
|
+
else
|
182
|
+
@location = descriptor.to_s
|
183
|
+
load_json(descriptor)
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def unzip_package(descriptor)
|
188
|
+
descriptor = write_to_tempfile(descriptor) if descriptor =~ /\A#{URI::regexp}\z/
|
189
|
+
dir = Dir.mktmpdir
|
190
|
+
package = {}
|
191
|
+
Zip::File.open(descriptor) do |zip_file|
|
192
|
+
# Extract all the files
|
193
|
+
zip_file.each { |entry| entry.extract("#{dir}/#{File.basename entry.name}") }
|
194
|
+
# Get and parse the datapackage metadata
|
195
|
+
entry = zip_file.glob("*/#{@opts[:default_filename] || 'datapackage.json'}").first
|
196
|
+
package = JSON.parse(entry.get_input_stream.read)
|
197
|
+
end
|
198
|
+
# Set the base dir to the directory we unzipped to
|
199
|
+
@opts[:base] = dir
|
200
|
+
# This is now a local file, not a URL
|
201
|
+
@local = true
|
202
|
+
package
|
203
|
+
end
|
204
|
+
|
205
|
+
def write_to_tempfile(url)
|
206
|
+
tempfile = Tempfile.new('datapackage')
|
207
|
+
tempfile.write(open(url).read)
|
208
|
+
tempfile.rewind
|
209
|
+
tempfile
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module DataPackage
|
2
|
+
class Profile < Hash
|
3
|
+
include DataPackage::Helpers
|
4
|
+
|
5
|
+
attr_reader :name, :registry
|
6
|
+
|
7
|
+
def initialize(descriptor)
|
8
|
+
unless descriptor.is_a?(String)
|
9
|
+
raise ProfileException.new 'Profile must be a URL or registry identifier'
|
10
|
+
end
|
11
|
+
@name = descriptor
|
12
|
+
if is_fully_qualified_url?(descriptor)
|
13
|
+
self.merge!(load_json(descriptor))
|
14
|
+
else
|
15
|
+
self.merge!(get_profile_from_registry(descriptor))
|
16
|
+
end
|
17
|
+
rescue OpenURI::HTTPError, SocketError => e
|
18
|
+
raise ProfileException.new "Profile URL returned #{e.message}"
|
19
|
+
rescue JSON::ParserError
|
20
|
+
raise ProfileException.new 'Profile is not valid JSON'
|
21
|
+
end
|
22
|
+
|
23
|
+
def jsonschema
|
24
|
+
self.to_h
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns true if there are no errors in data, false if there are
|
28
|
+
def valid?(data)
|
29
|
+
JSON::Validator.validate(self, data)
|
30
|
+
end
|
31
|
+
|
32
|
+
alias :valid :valid?
|
33
|
+
|
34
|
+
# Validate data against this profile. Returns true or raises DataPackage::ValidationError
|
35
|
+
def validate(data)
|
36
|
+
JSON::Validator.validate!(self, data)
|
37
|
+
rescue JSON::Schema::ValidationError => e
|
38
|
+
raise DataPackage::ValidationError.new(e.message)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Lazily yields each ValidationError raised for data
|
42
|
+
def iter_errors(data)
|
43
|
+
JSON::Validator.fully_validate(self, data).each{ |error| yield error }
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def get_profile_from_registry(descriptor)
|
49
|
+
@registry = DataPackage::Registry.new
|
50
|
+
profile_metadata = registry.profiles.fetch(descriptor)
|
51
|
+
if profile_metadata.fetch('schema_path', nil)
|
52
|
+
profile_path = join_paths(base_path(registry.path), profile_metadata['schema_path'])
|
53
|
+
else
|
54
|
+
profile_path = profile_metadata['schema']
|
55
|
+
end
|
56
|
+
load_json(profile_path)
|
57
|
+
rescue KeyError
|
58
|
+
raise ProfileException.new "Couldn't find profile with id `#{descriptor}` in registry"
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module DataPackage
|
2
|
+
# Allow loading Data Package profiles from the official registry.
|
3
|
+
|
4
|
+
class Registry
|
5
|
+
include DataPackage::Helpers
|
6
|
+
|
7
|
+
attr_reader :path, :profiles
|
8
|
+
|
9
|
+
DEFAULT_REGISTRY_URL = 'https://specs.frictionlessdata.io/schemas/registry.json'.freeze
|
10
|
+
DEFAULT_REGISTRY_PATH = File.join(File.expand_path(File.dirname(__FILE__)), '..', 'profiles', 'registry.json').freeze
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@path = DEFAULT_REGISTRY_PATH
|
14
|
+
@profiles = get_registry(DEFAULT_REGISTRY_PATH)
|
15
|
+
rescue Errno::ENOENT
|
16
|
+
raise RegistryException.new 'Registry path is not valid'
|
17
|
+
rescue OpenURI::HTTPError, SocketError => e
|
18
|
+
raise RegistryException.new "Registry URL returned #{e.message}"
|
19
|
+
rescue JSON::ParserError
|
20
|
+
raise RegistryException.new 'Registry descriptor is not valid JSON'
|
21
|
+
rescue KeyError
|
22
|
+
raise RegistryException.new 'Property `id` is mandatory for profiles'
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def get_registry(descriptor)
|
28
|
+
resources = load_json(descriptor)
|
29
|
+
resources.reduce({}) do |registry, resource|
|
30
|
+
registry[resource['id']] = resource
|
31
|
+
registry
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|