datapackage 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0e72a49acec8bb887ed72db7645838841b31873f
4
- data.tar.gz: ff5d34e0d9a62bc6209d57a06c76fed750395f4a
3
+ metadata.gz: 18b6d480843853daa822de909d242c78b40ed762
4
+ data.tar.gz: 4c6004ea6559656434a5759038cc0163c0762520
5
5
  SHA512:
6
- metadata.gz: 84486c5bf44e5c2545014efddd2e44f2032e1108b8e2e9c24f708ec8011a212f86dd52247584e9caf6a8545b8c1aac98047877d6f6eaa0a3703b7242c71eab72
7
- data.tar.gz: f8b6b0492986cc2b1fc85675a498963b4710fcb9e0337923342c57a4cf2dc001b162cfa7efb72c3928d5194e417aa71f1c545e32ffcc5e0038449531b78604d5
6
+ metadata.gz: e3c5286265b393c7c7ecd3563515bccfb67f2c20a54b6c15e2d4ad1ac00f6c55b69f18c591e38397958a6bf23cd9d0c88deda81ae39e06cf6d8532607f31eeda
7
+ data.tar.gz: d09058baf930fda7eb99d688f9c9fb405bc1910563d0b1e48763400dc877c27a1540c69eb24e292827e511500612156b37022cbf067622bb2d649fa3e9e9f84b
@@ -0,0 +1,18 @@
1
+ require 'date'
2
+ require 'uri'
3
+ require 'net/http'
4
+ require 'csv'
5
+ require 'json'
6
+ require 'json-schema'
7
+ require 'zip'
8
+ require 'ruby_dig'
9
+ require 'tableschema'
10
+
11
+ require 'datapackage/defaults'
12
+ require 'datapackage/helpers'
13
+ require 'datapackage/version'
14
+ require 'datapackage/exceptions'
15
+ require 'datapackage/profile'
16
+ require 'datapackage/resource'
17
+ require 'datapackage/package'
18
+ require 'datapackage/registry'
@@ -0,0 +1,27 @@
1
+ module DataPackage
2
+ DEFAULTS = {
3
+ resource: {
4
+ profile: 'data-resource',
5
+ tabular_profile: 'tabular-data-resource',
6
+ encoding: 'utf-8',
7
+ },
8
+ package: {
9
+ profile: 'data-package',
10
+ },
11
+ schema: {
12
+ format: 'default',
13
+ type: 'string',
14
+ missing_values: [''],
15
+ },
16
+ dialect: {
17
+ delimiter: ',',
18
+ doubleQuote: true,
19
+ lineTerminator: '\r\n',
20
+ quoteChar: '"',
21
+ escapeChar: '\\',
22
+ skipInitialSpace: true,
23
+ header: true,
24
+ caseSensitiveHeader: false,
25
+ },
26
+ }.freeze
27
+ end
@@ -0,0 +1,8 @@
1
+ module DataPackage
2
+ class Exception < ::Exception; end
3
+ class RegistryException < Exception; end
4
+ class ResourceException < Exception; end
5
+ class ProfileException < Exception; end
6
+ class PackageException < Exception; end
7
+ class ValidationError < Exception; end
8
+ end
@@ -0,0 +1,98 @@
1
+ module DataPackage
2
+ module Helpers
3
+
4
+ # Dereference a resource that can be a URL or path to a JSON file or a hash
5
+ # Returns a Hash with all values that are URLs or paths dereferenced
6
+ def dereference_descriptor(resource, base_path: nil, reference_fields: nil)
7
+ options = {
8
+ base_path: base_path,
9
+ reference_fields: reference_fields,
10
+ }
11
+ case resource
12
+ when Hash
13
+ resource.inject({}) do |new_resource, (key, val)|
14
+ if reference_fields.nil? || reference_fields.include?(key)
15
+ new_resource[key] = dereference_descriptor(val, **options)
16
+ else
17
+ new_resource[key] = val
18
+ end
19
+ new_resource
20
+ end
21
+ when Enumerable
22
+ resource.map{ |el| dereference_descriptor(el, **options)}
23
+ when String
24
+ begin
25
+ resolve_json_reference(resource, deep_dereference: true, base_path: base_path)
26
+ rescue Errno::ENOENT
27
+ resource
28
+ end
29
+ else
30
+ resource
31
+ end
32
+ end
33
+
34
+ # Resolve a reference to a JSON file; Returns the JSON as hash
35
+ # Raises JSON::ParserError, OpenURI::HTTPError, SocketError or TypeError for invalid references
36
+ def resolve_json_reference(reference, deep_dereference: false, base_path: nil)
37
+ # Try to extract JSON from file or webpage
38
+ reference = join_paths(base_path, reference)
39
+ resolved_reference = load_json(reference)
40
+ if deep_dereference == true
41
+ dereference_descriptor(resolved_reference, base_path: base_path)
42
+ else
43
+ resolved_reference
44
+ end
45
+ end
46
+
47
+ # Load JSON from path or URL;
48
+ # Raises: Errno::ENOENT, OpenURI::HTTPError, SocketError, JSON::ParserError
49
+ def load_json(reference)
50
+ JSON.parse open(reference).read
51
+ end
52
+
53
+ def base_path(path_or_url)
54
+ path_or_url = path_or_url.to_s
55
+ if path_or_url.empty?
56
+ nil
57
+ elsif path_or_url =~ /\A#{URI::regexp}\z/
58
+ uri = URI.parse path_or_url
59
+ return "#{uri.scheme}://#{uri.host}#{File.dirname uri.path}".chomp('/')
60
+ else
61
+ if File.directory?(path_or_url)
62
+ return path_or_url
63
+ else
64
+ return File.expand_path File.dirname path_or_url
65
+ end
66
+ end
67
+ end
68
+
69
+ def join_paths(base_path, resource)
70
+ if base_path.nil? || base_path.empty?
71
+ resource
72
+ elsif base_path =~ /\A#{URI::regexp}\z/
73
+ URI.join(base_path, resource).to_s
74
+ elsif File.directory?(base_path)
75
+ File.join(base_path, resource).to_s
76
+ elsif File.file?(base_path)
77
+ base_path
78
+ else
79
+ resource
80
+ end
81
+ end
82
+
83
+ def is_fully_qualified_url?(string)
84
+ uri = URI.parse(string)
85
+ uri.is_a?(URI::HTTP) && !uri.host.nil?
86
+ rescue URI::InvalidURIError
87
+ false
88
+ end
89
+
90
+ def is_safe_path?(string)
91
+ path = Pathname.new(string)
92
+ return false if path.absolute?
93
+ return false unless /^\.+$/.match(path.to_s.split('/').first).nil?
94
+ true
95
+ end
96
+
97
+ end
98
+ end
@@ -0,0 +1,212 @@
1
+ require 'open-uri'
2
+
3
+ module DataPackage
4
+ class Package < Hash
5
+ include DataPackage::Helpers
6
+
7
+ attr_reader :opts, :errors, :profile, :dead_resources
8
+
9
+ # Parse or create a data package
10
+ # Supports reading data from JSON file, directory, and a URL
11
+ # descriptor:: Hash or String
12
+ # opts:: Options used to customize reading and parsing
13
+ def initialize(descriptor = nil, opts: {})
14
+ @opts = opts
15
+ @dead_resources = []
16
+ self.merge! parse_package(descriptor)
17
+ @profile = DataPackage::Profile.new(self.fetch('profile', DataPackage::DEFAULTS[:package][:profile]))
18
+ self['profile'] = @profile.name
19
+ define_properties!
20
+ load_resources!
21
+ rescue OpenURI::HTTPError, SocketError => e
22
+ raise PackageException.new "Package URL returned #{e.message}"
23
+ rescue JSON::ParserError
24
+ raise PackageException.new 'Package descriptor is not valid JSON'
25
+ end
26
+
27
+ def descriptor
28
+ self.to_h
29
+ end
30
+
31
+ # Returns the directory for a local file package or base url for a remote
32
+ # Returns nil for an in-memory object (because it has no base as yet)
33
+ def base
34
+ # user can override base
35
+ return @opts[:base] if @opts[:base]
36
+ return '' unless @location
37
+ # work out base directory or uri
38
+ if local?
39
+ return File.dirname(@location)
40
+ else
41
+ return @location.split('/')[0..-2].join('/')
42
+ end
43
+ end
44
+
45
+ # Is this a local package? Returns true if created from an in-memory object or a file/directory reference
46
+ def local?
47
+ return @local if @local
48
+ return false if @location =~ /\A#{URI::regexp}\z/
49
+ true
50
+ end
51
+
52
+ def resources
53
+ update_resources!
54
+ self['resources']
55
+ end
56
+
57
+ def resource_names
58
+ update_resources!
59
+ self['resources'].map{|res| res.name}
60
+ end
61
+
62
+ def valid?
63
+ return false unless @profile.valid?(self)
64
+ return false if self['resources'].map{ |resource| resource.valid? }.include?(false)
65
+ true
66
+ end
67
+
68
+ alias :valid :valid?
69
+
70
+ def validate
71
+ @profile.validate(self)
72
+ self['resources'].each { |resource| resource.validate }
73
+ true
74
+ end
75
+
76
+ def iter_errors
77
+ errors = @profile.iter_errors(self){ |err| err }
78
+ self['resources'].each do |resource|
79
+ resource.iter_errors{ |err| errors << err }
80
+ end
81
+ errors.each{ |error| yield error }
82
+ end
83
+
84
+ def add_resource(resource)
85
+ resource = load_resource(resource)
86
+ self['resources'].push(resource)
87
+ begin
88
+ self.validate
89
+ resource
90
+ rescue DataPackage::ValidationError
91
+ self['resources'].pop
92
+ nil
93
+ end
94
+ end
95
+
96
+ def remove_resource(resource_name)
97
+ update_resources!
98
+ resource = get_resource(resource_name)
99
+ self['resources'].reject!{ |resource| resource.name == resource_name }
100
+ resource
101
+ end
102
+
103
+ def get_resource(resource_name)
104
+ update_resources!
105
+ self['resources'].find{ |resource| resource.name == resource_name }
106
+ end
107
+
108
+ def save(target=@location)
109
+ update_resources!
110
+ File.open(target, "w") { |file| file << JSON.pretty_generate(self) }
111
+ true
112
+ end
113
+
114
+ def property(property, default = nil)
115
+ self[property] || default
116
+ end
117
+
118
+ private
119
+
120
+ def define_properties!
121
+ (@profile['properties'] || {}).each do |k, v|
122
+ next if k == 'resources' || k == 'profile'
123
+ define_singleton_method("#{k.to_sym}=", proc { |p| set_property(k, p) })
124
+ define_singleton_method(k.to_sym.to_s, proc { property k, default_value(v) })
125
+ end
126
+ end
127
+
128
+ def load_resources!
129
+ self['resources'] ||= []
130
+ update_resources!
131
+ end
132
+
133
+ def update_resources!
134
+ self['resources'].map! do |resource|
135
+ begin
136
+ load_resource(resource)
137
+ rescue ResourceException
138
+ @dead_resources << resource
139
+ nil
140
+ end
141
+ end.compact!
142
+ end
143
+
144
+ def load_resource(resource)
145
+ if resource.is_a?(Resource)
146
+ resource
147
+ else
148
+ Resource.new(resource, base)
149
+ end
150
+ end
151
+
152
+ def default_value(field_data)
153
+ case field_data['type']
154
+ when 'array'
155
+ []
156
+ when 'object'
157
+ {}
158
+ else
159
+ nil
160
+ end
161
+ end
162
+
163
+ def set_property(key, value)
164
+ self[key] = value
165
+ end
166
+
167
+ def parse_package(descriptor)
168
+ # TODO: base directory/url
169
+ if descriptor.nil?
170
+ {}
171
+ elsif descriptor.class == Hash
172
+ descriptor
173
+ else
174
+ read_package(descriptor)
175
+ end
176
+ end
177
+
178
+ def read_package(descriptor)
179
+ if File.extname(descriptor) == '.zip'
180
+ unzip_package(descriptor)
181
+ else
182
+ @location = descriptor.to_s
183
+ load_json(descriptor)
184
+ end
185
+ end
186
+
187
+ def unzip_package(descriptor)
188
+ descriptor = write_to_tempfile(descriptor) if descriptor =~ /\A#{URI::regexp}\z/
189
+ dir = Dir.mktmpdir
190
+ package = {}
191
+ Zip::File.open(descriptor) do |zip_file|
192
+ # Extract all the files
193
+ zip_file.each { |entry| entry.extract("#{dir}/#{File.basename entry.name}") }
194
+ # Get and parse the datapackage metadata
195
+ entry = zip_file.glob("*/#{@opts[:default_filename] || 'datapackage.json'}").first
196
+ package = JSON.parse(entry.get_input_stream.read)
197
+ end
198
+ # Set the base dir to the directory we unzipped to
199
+ @opts[:base] = dir
200
+ # This is now a local file, not a URL
201
+ @local = true
202
+ package
203
+ end
204
+
205
+ def write_to_tempfile(url)
206
+ tempfile = Tempfile.new('datapackage')
207
+ tempfile.write(open(url).read)
208
+ tempfile.rewind
209
+ tempfile
210
+ end
211
+ end
212
+ end
@@ -0,0 +1,62 @@
1
+ module DataPackage
2
+ class Profile < Hash
3
+ include DataPackage::Helpers
4
+
5
+ attr_reader :name, :registry
6
+
7
+ def initialize(descriptor)
8
+ unless descriptor.is_a?(String)
9
+ raise ProfileException.new 'Profile must be a URL or registry identifier'
10
+ end
11
+ @name = descriptor
12
+ if is_fully_qualified_url?(descriptor)
13
+ self.merge!(load_json(descriptor))
14
+ else
15
+ self.merge!(get_profile_from_registry(descriptor))
16
+ end
17
+ rescue OpenURI::HTTPError, SocketError => e
18
+ raise ProfileException.new "Profile URL returned #{e.message}"
19
+ rescue JSON::ParserError
20
+ raise ProfileException.new 'Profile is not valid JSON'
21
+ end
22
+
23
+ def jsonschema
24
+ self.to_h
25
+ end
26
+
27
+ # Returns true if there are no errors in data, false if there are
28
+ def valid?(data)
29
+ JSON::Validator.validate(self, data)
30
+ end
31
+
32
+ alias :valid :valid?
33
+
34
+ # Validate data against this profile. Returns true or raises DataPackage::ValidationError
35
+ def validate(data)
36
+ JSON::Validator.validate!(self, data)
37
+ rescue JSON::Schema::ValidationError => e
38
+ raise DataPackage::ValidationError.new(e.message)
39
+ end
40
+
41
+ # Lazily yields each ValidationError raised for data
42
+ def iter_errors(data)
43
+ JSON::Validator.fully_validate(self, data).each{ |error| yield error }
44
+ end
45
+
46
+ private
47
+
48
+ def get_profile_from_registry(descriptor)
49
+ @registry = DataPackage::Registry.new
50
+ profile_metadata = registry.profiles.fetch(descriptor)
51
+ if profile_metadata.fetch('schema_path', nil)
52
+ profile_path = join_paths(base_path(registry.path), profile_metadata['schema_path'])
53
+ else
54
+ profile_path = profile_metadata['schema']
55
+ end
56
+ load_json(profile_path)
57
+ rescue KeyError
58
+ raise ProfileException.new "Couldn't find profile with id `#{descriptor}` in registry"
59
+ end
60
+
61
+ end
62
+ end
@@ -0,0 +1,36 @@
1
+ module DataPackage
2
+ # Allow loading Data Package profiles from the official registry.
3
+
4
+ class Registry
5
+ include DataPackage::Helpers
6
+
7
+ attr_reader :path, :profiles
8
+
9
+ DEFAULT_REGISTRY_URL = 'https://specs.frictionlessdata.io/schemas/registry.json'.freeze
10
+ DEFAULT_REGISTRY_PATH = File.join(File.expand_path(File.dirname(__FILE__)), '..', 'profiles', 'registry.json').freeze
11
+
12
+ def initialize
13
+ @path = DEFAULT_REGISTRY_PATH
14
+ @profiles = get_registry(DEFAULT_REGISTRY_PATH)
15
+ rescue Errno::ENOENT
16
+ raise RegistryException.new 'Registry path is not valid'
17
+ rescue OpenURI::HTTPError, SocketError => e
18
+ raise RegistryException.new "Registry URL returned #{e.message}"
19
+ rescue JSON::ParserError
20
+ raise RegistryException.new 'Registry descriptor is not valid JSON'
21
+ rescue KeyError
22
+ raise RegistryException.new 'Property `id` is mandatory for profiles'
23
+ end
24
+
25
+ private
26
+
27
+ def get_registry(descriptor)
28
+ resources = load_json(descriptor)
29
+ resources.reduce({}) do |registry, resource|
30
+ registry[resource['id']] = resource
31
+ registry
32
+ end
33
+ end
34
+
35
+ end
36
+ end