datapackage 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0e72a49acec8bb887ed72db7645838841b31873f
4
- data.tar.gz: ff5d34e0d9a62bc6209d57a06c76fed750395f4a
3
+ metadata.gz: 18b6d480843853daa822de909d242c78b40ed762
4
+ data.tar.gz: 4c6004ea6559656434a5759038cc0163c0762520
5
5
  SHA512:
6
- metadata.gz: 84486c5bf44e5c2545014efddd2e44f2032e1108b8e2e9c24f708ec8011a212f86dd52247584e9caf6a8545b8c1aac98047877d6f6eaa0a3703b7242c71eab72
7
- data.tar.gz: f8b6b0492986cc2b1fc85675a498963b4710fcb9e0337923342c57a4cf2dc001b162cfa7efb72c3928d5194e417aa71f1c545e32ffcc5e0038449531b78604d5
6
+ metadata.gz: e3c5286265b393c7c7ecd3563515bccfb67f2c20a54b6c15e2d4ad1ac00f6c55b69f18c591e38397958a6bf23cd9d0c88deda81ae39e06cf6d8532607f31eeda
7
+ data.tar.gz: d09058baf930fda7eb99d688f9c9fb405bc1910563d0b1e48763400dc877c27a1540c69eb24e292827e511500612156b37022cbf067622bb2d649fa3e9e9f84b
@@ -0,0 +1,18 @@
1
+ require 'date'
2
+ require 'uri'
3
+ require 'net/http'
4
+ require 'csv'
5
+ require 'json'
6
+ require 'json-schema'
7
+ require 'zip'
8
+ require 'ruby_dig'
9
+ require 'tableschema'
10
+
11
+ require 'datapackage/defaults'
12
+ require 'datapackage/helpers'
13
+ require 'datapackage/version'
14
+ require 'datapackage/exceptions'
15
+ require 'datapackage/profile'
16
+ require 'datapackage/resource'
17
+ require 'datapackage/package'
18
+ require 'datapackage/registry'
@@ -0,0 +1,27 @@
1
+ module DataPackage
2
+ DEFAULTS = {
3
+ resource: {
4
+ profile: 'data-resource',
5
+ tabular_profile: 'tabular-data-resource',
6
+ encoding: 'utf-8',
7
+ },
8
+ package: {
9
+ profile: 'data-package',
10
+ },
11
+ schema: {
12
+ format: 'default',
13
+ type: 'string',
14
+ missing_values: [''],
15
+ },
16
+ dialect: {
17
+ delimiter: ',',
18
+ doubleQuote: true,
19
+ lineTerminator: '\r\n',
20
+ quoteChar: '"',
21
+ escapeChar: '\\',
22
+ skipInitialSpace: true,
23
+ header: true,
24
+ caseSensitiveHeader: false,
25
+ },
26
+ }.freeze
27
+ end
@@ -0,0 +1,8 @@
1
+ module DataPackage
2
+ class Exception < ::Exception; end
3
+ class RegistryException < Exception; end
4
+ class ResourceException < Exception; end
5
+ class ProfileException < Exception; end
6
+ class PackageException < Exception; end
7
+ class ValidationError < Exception; end
8
+ end
@@ -0,0 +1,98 @@
1
+ module DataPackage
2
+ module Helpers
3
+
4
+ # Dereference a resource that can be a URL or path to a JSON file or a hash
5
+ # Returns a Hash with all values that are URLs or paths dereferenced
6
+ def dereference_descriptor(resource, base_path: nil, reference_fields: nil)
7
+ options = {
8
+ base_path: base_path,
9
+ reference_fields: reference_fields,
10
+ }
11
+ case resource
12
+ when Hash
13
+ resource.inject({}) do |new_resource, (key, val)|
14
+ if reference_fields.nil? || reference_fields.include?(key)
15
+ new_resource[key] = dereference_descriptor(val, **options)
16
+ else
17
+ new_resource[key] = val
18
+ end
19
+ new_resource
20
+ end
21
+ when Enumerable
22
+ resource.map{ |el| dereference_descriptor(el, **options)}
23
+ when String
24
+ begin
25
+ resolve_json_reference(resource, deep_dereference: true, base_path: base_path)
26
+ rescue Errno::ENOENT
27
+ resource
28
+ end
29
+ else
30
+ resource
31
+ end
32
+ end
33
+
34
+ # Resolve a reference to a JSON file; Returns the JSON as hash
35
+ # Raises JSON::ParserError, OpenURI::HTTPError, SocketError or TypeError for invalid references
36
+ def resolve_json_reference(reference, deep_dereference: false, base_path: nil)
37
+ # Try to extract JSON from file or webpage
38
+ reference = join_paths(base_path, reference)
39
+ resolved_reference = load_json(reference)
40
+ if deep_dereference == true
41
+ dereference_descriptor(resolved_reference, base_path: base_path)
42
+ else
43
+ resolved_reference
44
+ end
45
+ end
46
+
47
+ # Load JSON from path or URL;
48
+ # Raises: Errno::ENOENT, OpenURI::HTTPError, SocketError, JSON::ParserError
49
+ def load_json(reference)
50
+ JSON.parse open(reference).read
51
+ end
52
+
53
+ def base_path(path_or_url)
54
+ path_or_url = path_or_url.to_s
55
+ if path_or_url.empty?
56
+ nil
57
+ elsif path_or_url =~ /\A#{URI::regexp}\z/
58
+ uri = URI.parse path_or_url
59
+ return "#{uri.scheme}://#{uri.host}#{File.dirname uri.path}".chomp('/')
60
+ else
61
+ if File.directory?(path_or_url)
62
+ return path_or_url
63
+ else
64
+ return File.expand_path File.dirname path_or_url
65
+ end
66
+ end
67
+ end
68
+
69
+ def join_paths(base_path, resource)
70
+ if base_path.nil? || base_path.empty?
71
+ resource
72
+ elsif base_path =~ /\A#{URI::regexp}\z/
73
+ URI.join(base_path, resource).to_s
74
+ elsif File.directory?(base_path)
75
+ File.join(base_path, resource).to_s
76
+ elsif File.file?(base_path)
77
+ base_path
78
+ else
79
+ resource
80
+ end
81
+ end
82
+
83
+ def is_fully_qualified_url?(string)
84
+ uri = URI.parse(string)
85
+ uri.is_a?(URI::HTTP) && !uri.host.nil?
86
+ rescue URI::InvalidURIError
87
+ false
88
+ end
89
+
90
+ def is_safe_path?(string)
91
+ path = Pathname.new(string)
92
+ return false if path.absolute?
93
+ return false unless /^\.+$/.match(path.to_s.split('/').first).nil?
94
+ true
95
+ end
96
+
97
+ end
98
+ end
@@ -0,0 +1,212 @@
1
+ require 'open-uri'
2
+
3
+ module DataPackage
4
+ class Package < Hash
5
+ include DataPackage::Helpers
6
+
7
+ attr_reader :opts, :errors, :profile, :dead_resources
8
+
9
+ # Parse or create a data package
10
+ # Supports reading data from JSON file, directory, and a URL
11
+ # descriptor:: Hash or String
12
+ # opts:: Options used to customize reading and parsing
13
+ def initialize(descriptor = nil, opts: {})
14
+ @opts = opts
15
+ @dead_resources = []
16
+ self.merge! parse_package(descriptor)
17
+ @profile = DataPackage::Profile.new(self.fetch('profile', DataPackage::DEFAULTS[:package][:profile]))
18
+ self['profile'] = @profile.name
19
+ define_properties!
20
+ load_resources!
21
+ rescue OpenURI::HTTPError, SocketError => e
22
+ raise PackageException.new "Package URL returned #{e.message}"
23
+ rescue JSON::ParserError
24
+ raise PackageException.new 'Package descriptor is not valid JSON'
25
+ end
26
+
27
+ def descriptor
28
+ self.to_h
29
+ end
30
+
31
+ # Returns the directory for a local file package or base url for a remote
32
+ # Returns nil for an in-memory object (because it has no base as yet)
33
+ def base
34
+ # user can override base
35
+ return @opts[:base] if @opts[:base]
36
+ return '' unless @location
37
+ # work out base directory or uri
38
+ if local?
39
+ return File.dirname(@location)
40
+ else
41
+ return @location.split('/')[0..-2].join('/')
42
+ end
43
+ end
44
+
45
+ # Is this a local package? Returns true if created from an in-memory object or a file/directory reference
46
+ def local?
47
+ return @local if @local
48
+ return false if @location =~ /\A#{URI::regexp}\z/
49
+ true
50
+ end
51
+
52
+ def resources
53
+ update_resources!
54
+ self['resources']
55
+ end
56
+
57
+ def resource_names
58
+ update_resources!
59
+ self['resources'].map{|res| res.name}
60
+ end
61
+
62
+ def valid?
63
+ return false unless @profile.valid?(self)
64
+ return false if self['resources'].map{ |resource| resource.valid? }.include?(false)
65
+ true
66
+ end
67
+
68
+ alias :valid :valid?
69
+
70
+ def validate
71
+ @profile.validate(self)
72
+ self['resources'].each { |resource| resource.validate }
73
+ true
74
+ end
75
+
76
+ def iter_errors
77
+ errors = @profile.iter_errors(self){ |err| err }
78
+ self['resources'].each do |resource|
79
+ resource.iter_errors{ |err| errors << err }
80
+ end
81
+ errors.each{ |error| yield error }
82
+ end
83
+
84
+ def add_resource(resource)
85
+ resource = load_resource(resource)
86
+ self['resources'].push(resource)
87
+ begin
88
+ self.validate
89
+ resource
90
+ rescue DataPackage::ValidationError
91
+ self['resources'].pop
92
+ nil
93
+ end
94
+ end
95
+
96
+ def remove_resource(resource_name)
97
+ update_resources!
98
+ resource = get_resource(resource_name)
99
+ self['resources'].reject!{ |resource| resource.name == resource_name }
100
+ resource
101
+ end
102
+
103
+ def get_resource(resource_name)
104
+ update_resources!
105
+ self['resources'].find{ |resource| resource.name == resource_name }
106
+ end
107
+
108
+ def save(target=@location)
109
+ update_resources!
110
+ File.open(target, "w") { |file| file << JSON.pretty_generate(self) }
111
+ true
112
+ end
113
+
114
+ def property(property, default = nil)
115
+ self[property] || default
116
+ end
117
+
118
+ private
119
+
120
+ def define_properties!
121
+ (@profile['properties'] || {}).each do |k, v|
122
+ next if k == 'resources' || k == 'profile'
123
+ define_singleton_method("#{k.to_sym}=", proc { |p| set_property(k, p) })
124
+ define_singleton_method(k.to_sym.to_s, proc { property k, default_value(v) })
125
+ end
126
+ end
127
+
128
+ def load_resources!
129
+ self['resources'] ||= []
130
+ update_resources!
131
+ end
132
+
133
+ def update_resources!
134
+ self['resources'].map! do |resource|
135
+ begin
136
+ load_resource(resource)
137
+ rescue ResourceException
138
+ @dead_resources << resource
139
+ nil
140
+ end
141
+ end.compact!
142
+ end
143
+
144
+ def load_resource(resource)
145
+ if resource.is_a?(Resource)
146
+ resource
147
+ else
148
+ Resource.new(resource, base)
149
+ end
150
+ end
151
+
152
+ def default_value(field_data)
153
+ case field_data['type']
154
+ when 'array'
155
+ []
156
+ when 'object'
157
+ {}
158
+ else
159
+ nil
160
+ end
161
+ end
162
+
163
+ def set_property(key, value)
164
+ self[key] = value
165
+ end
166
+
167
+ def parse_package(descriptor)
168
+ # TODO: base directory/url
169
+ if descriptor.nil?
170
+ {}
171
+ elsif descriptor.class == Hash
172
+ descriptor
173
+ else
174
+ read_package(descriptor)
175
+ end
176
+ end
177
+
178
+ def read_package(descriptor)
179
+ if File.extname(descriptor) == '.zip'
180
+ unzip_package(descriptor)
181
+ else
182
+ @location = descriptor.to_s
183
+ load_json(descriptor)
184
+ end
185
+ end
186
+
187
+ def unzip_package(descriptor)
188
+ descriptor = write_to_tempfile(descriptor) if descriptor =~ /\A#{URI::regexp}\z/
189
+ dir = Dir.mktmpdir
190
+ package = {}
191
+ Zip::File.open(descriptor) do |zip_file|
192
+ # Extract all the files
193
+ zip_file.each { |entry| entry.extract("#{dir}/#{File.basename entry.name}") }
194
+ # Get and parse the datapackage metadata
195
+ entry = zip_file.glob("*/#{@opts[:default_filename] || 'datapackage.json'}").first
196
+ package = JSON.parse(entry.get_input_stream.read)
197
+ end
198
+ # Set the base dir to the directory we unzipped to
199
+ @opts[:base] = dir
200
+ # This is now a local file, not a URL
201
+ @local = true
202
+ package
203
+ end
204
+
205
+ def write_to_tempfile(url)
206
+ tempfile = Tempfile.new('datapackage')
207
+ tempfile.write(open(url).read)
208
+ tempfile.rewind
209
+ tempfile
210
+ end
211
+ end
212
+ end
@@ -0,0 +1,62 @@
1
+ module DataPackage
2
+ class Profile < Hash
3
+ include DataPackage::Helpers
4
+
5
+ attr_reader :name, :registry
6
+
7
+ def initialize(descriptor)
8
+ unless descriptor.is_a?(String)
9
+ raise ProfileException.new 'Profile must be a URL or registry identifier'
10
+ end
11
+ @name = descriptor
12
+ if is_fully_qualified_url?(descriptor)
13
+ self.merge!(load_json(descriptor))
14
+ else
15
+ self.merge!(get_profile_from_registry(descriptor))
16
+ end
17
+ rescue OpenURI::HTTPError, SocketError => e
18
+ raise ProfileException.new "Profile URL returned #{e.message}"
19
+ rescue JSON::ParserError
20
+ raise ProfileException.new 'Profile is not valid JSON'
21
+ end
22
+
23
+ def jsonschema
24
+ self.to_h
25
+ end
26
+
27
+ # Returns true if there are no errors in data, false if there are
28
+ def valid?(data)
29
+ JSON::Validator.validate(self, data)
30
+ end
31
+
32
+ alias :valid :valid?
33
+
34
+ # Validate data against this profile. Returns true or raises DataPackage::ValidationError
35
+ def validate(data)
36
+ JSON::Validator.validate!(self, data)
37
+ rescue JSON::Schema::ValidationError => e
38
+ raise DataPackage::ValidationError.new(e.message)
39
+ end
40
+
41
+ # Lazily yields each ValidationError raised for data
42
+ def iter_errors(data)
43
+ JSON::Validator.fully_validate(self, data).each{ |error| yield error }
44
+ end
45
+
46
+ private
47
+
48
+ def get_profile_from_registry(descriptor)
49
+ @registry = DataPackage::Registry.new
50
+ profile_metadata = registry.profiles.fetch(descriptor)
51
+ if profile_metadata.fetch('schema_path', nil)
52
+ profile_path = join_paths(base_path(registry.path), profile_metadata['schema_path'])
53
+ else
54
+ profile_path = profile_metadata['schema']
55
+ end
56
+ load_json(profile_path)
57
+ rescue KeyError
58
+ raise ProfileException.new "Couldn't find profile with id `#{descriptor}` in registry"
59
+ end
60
+
61
+ end
62
+ end
@@ -0,0 +1,36 @@
1
+ module DataPackage
2
+ # Allow loading Data Package profiles from the official registry.
3
+
4
+ class Registry
5
+ include DataPackage::Helpers
6
+
7
+ attr_reader :path, :profiles
8
+
9
+ DEFAULT_REGISTRY_URL = 'https://specs.frictionlessdata.io/schemas/registry.json'.freeze
10
+ DEFAULT_REGISTRY_PATH = File.join(File.expand_path(File.dirname(__FILE__)), '..', 'profiles', 'registry.json').freeze
11
+
12
+ def initialize
13
+ @path = DEFAULT_REGISTRY_PATH
14
+ @profiles = get_registry(DEFAULT_REGISTRY_PATH)
15
+ rescue Errno::ENOENT
16
+ raise RegistryException.new 'Registry path is not valid'
17
+ rescue OpenURI::HTTPError, SocketError => e
18
+ raise RegistryException.new "Registry URL returned #{e.message}"
19
+ rescue JSON::ParserError
20
+ raise RegistryException.new 'Registry descriptor is not valid JSON'
21
+ rescue KeyError
22
+ raise RegistryException.new 'Property `id` is mandatory for profiles'
23
+ end
24
+
25
+ private
26
+
27
+ def get_registry(descriptor)
28
+ resources = load_json(descriptor)
29
+ resources.reduce({}) do |registry, resource|
30
+ registry[resource['id']] = resource
31
+ registry
32
+ end
33
+ end
34
+
35
+ end
36
+ end