datapackage 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,81 @@
1
+ module DataPackage
2
+ ##
3
+ # Allow loading Data Package profiles from a registry.
4
+
5
+ class Registry
6
+
7
+ DEFAULT_REGISTRY_URL = 'http://schemas.datapackages.org/registry.csv'
8
+ DEFAULT_REGISTRY_PATH = File.join(File.expand_path(File.dirname(__FILE__)), '..', '..', 'datapackage', 'schemas', 'registry.csv')
9
+
10
+ attr_reader :base_path
11
+
12
+ def initialize(registry_path_or_url = DEFAULT_REGISTRY_PATH)
13
+ registry_path_or_url ||= DEFAULT_REGISTRY_PATH
14
+ if File.file?(registry_path_or_url)
15
+ @base_path = File.dirname(
16
+ File.absolute_path(registry_path_or_url)
17
+ )
18
+ end
19
+ @profiles = {}
20
+ @registry = get_registry(registry_path_or_url)
21
+ end
22
+
23
+ def get(profile_id)
24
+ @profiles[profile_id] ||= get_profile(profile_id)
25
+ end
26
+
27
+ def available_profiles
28
+ @registry
29
+ end
30
+
31
+ private
32
+
33
+ def get_profile(profile_id)
34
+ profile_metadata = @registry[profile_id]
35
+ return if profile_metadata.nil?
36
+
37
+ path = get_absolute_path(profile_metadata[:schema_path])
38
+
39
+ if path && File.file?(path)
40
+ load_json(path)
41
+ else
42
+ url = profile_metadata[:schema]
43
+ load_json(url)
44
+ end
45
+ end
46
+
47
+ def get_registry(registry_path_or_url)
48
+ begin
49
+ csv = parse_csv(registry_path_or_url)
50
+ registry = {}
51
+ csv.each { |row| registry[row.fetch(:id)] = Hash[row.headers.zip(row.fields)] }
52
+ rescue KeyError, OpenURI::HTTPError, Errno::ENOENT
53
+ raise(RegistryError)
54
+ end
55
+ registry
56
+ end
57
+
58
+ def parse_csv(path_or_url)
59
+ csv = open(path_or_url).read
60
+ if csv.match(/,/)
61
+ CSV.new(csv, headers: :first_row, header_converters: :symbol)
62
+ else
63
+ raise RegistryError
64
+ end
65
+ end
66
+
67
+ def get_absolute_path(relative_path)
68
+ File.join(@base_path, relative_path)
69
+ rescue TypeError
70
+ nil
71
+ end
72
+
73
+ def load_json(path)
74
+ json = open(path).read
75
+ JSON.parse(json)
76
+ rescue JSON::ParserError, OpenURI::HTTPError
77
+ raise RegistryError
78
+ end
79
+
80
+ end
81
+ end
@@ -0,0 +1,79 @@
1
+ module DataPackage
2
+ class Resource < Hash
3
+
4
+ def initialize(resource, base_path = '')
5
+ self.merge! resource
6
+ end
7
+
8
+ def self.load(resource, base_path = '')
9
+ # This returns if there are no alternative ways to access the data OR there
10
+ # is a base_path which is a URL
11
+ if is_url?(resource, base_path)
12
+ RemoteResource.new(resource, base_path)
13
+ else
14
+ # If there's a data attribute, we definitely want an inline resource
15
+ if resource['data']
16
+ InlineResource.new(resource)
17
+ else
18
+ # If the file exists - we want a local resource
19
+ if file_exists?(resource, base_path)
20
+ LocalResource.new(resource, base_path)
21
+ # If it doesn't exist and there's a URL to grab the data from, we want
22
+ # a remote resource
23
+ elsif resource['url']
24
+ RemoteResource.new(resource, base_path)
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ def self.file_exists?(resource, base_path)
31
+ path = resource['path']
32
+ path = File.join(base_path, path) if base_path != ''
33
+ File.exists?(path)
34
+ end
35
+
36
+ def self.is_url?(resource, base_path)
37
+ return true if resource['url'] != nil && resource['path'] == nil && resource['data'] == nil
38
+ return true if base_path.start_with?('http')
39
+ end
40
+
41
+ end
42
+
43
+ class LocalResource < Resource
44
+
45
+ def initialize(resource, base_path = '')
46
+ @base_path = base_path
47
+ @path = resource['path']
48
+ super
49
+ end
50
+
51
+ def data
52
+ @path = File.join(@base_path, @path) if @base_path != ''
53
+ open(@path).read
54
+ end
55
+
56
+ end
57
+
58
+ class InlineResource < Resource
59
+ def data
60
+ self['data']
61
+ end
62
+ end
63
+
64
+ class RemoteResource < Resource
65
+
66
+ def initialize(resource, base_url = '')
67
+ @base_url = base_url
68
+ @url = resource['url']
69
+ @path = resource['path']
70
+ super
71
+ end
72
+
73
+ def data
74
+ url = @url ? @url : URI.join(@base_url, @path)
75
+ open(url).read
76
+ end
77
+
78
+ end
79
+ end
@@ -0,0 +1,111 @@
1
+ module DataPackage
2
+ class Schema < Hash
3
+
4
+ attr_reader :schema
5
+
6
+ def initialize(schema, options = {})
7
+ @registry_url = options[:registry_url]
8
+ if schema.class == Hash
9
+ self.merge! schema
10
+ elsif schema.class == Symbol
11
+ self.merge! get_schema_from_registry schema
12
+ elsif schema.class == String
13
+ self.merge! load_schema(schema)
14
+ else
15
+ raise SchemaException.new "Schema must be a URL, path, Hash or registry-identifier"
16
+ end
17
+ end
18
+
19
+ # https://gist.github.com/vdw/f3c832df8ce271a036f2
20
+ def hash_to_slashed_path(hash, path = "")
21
+ return {} unless hash
22
+ hash.each_with_object({}) do |(k, v), ret|
23
+ key = path + k.to_s
24
+
25
+ if v.is_a? Hash
26
+ ret.merge! hash_to_slashed_path(v, key.to_s + "/")
27
+ else
28
+ ret[key] = v
29
+ end
30
+ end
31
+ end
32
+
33
+ def dereference_schema path_or_url, schema
34
+ paths = hash_to_slashed_path schema
35
+ ref_keys = paths.keys.select { |p| p =~ /\$ref/ }
36
+ if ref_keys
37
+ ref_keys = [ref_keys] unless ref_keys.is_a? Array
38
+
39
+ ref_keys.each do |key|
40
+ path = key.split('/')[0..-2]
41
+
42
+ replacement = resolve(schema.dig(*path, '$ref'), path_or_url, schema)
43
+
44
+ s = "schema#{path.map { |k| "['#{k}']" }.join}.merge! replacement"
45
+ eval s
46
+ s = "schema#{path.map { |k| "['#{k}']" }.join}.delete '$ref'"
47
+ eval s
48
+ end
49
+ end
50
+
51
+ schema
52
+ end
53
+
54
+ def resolve reference, path_or_url, schema
55
+ base_path = base_path path_or_url
56
+ filename, reference = reference.split '#'
57
+ if filename == ''
58
+ schema['define'][reference.split('/').last]
59
+ else
60
+ dereference_schema("#{base_path}/#{filename}", get_definitions(filename, base_path)).dig(*reference.split('/').reject(&:empty?))
61
+ end
62
+ end
63
+
64
+ def get_definitions filename, base_path
65
+ JSON.parse open("#{base_path}/#{filename}").read
66
+ end
67
+
68
+ def base_path path_or_url
69
+ if path_or_url =~ /\A#{URI::regexp}\z/
70
+ uri = URI.parse path_or_url
71
+ return "#{uri.scheme}://#{uri.host}#{File.dirname uri.path}".chomp('/')
72
+ else
73
+
74
+ if File.directory?(path_or_url)
75
+ return path_or_url
76
+ else
77
+ return File.expand_path File.dirname path_or_url
78
+ end
79
+ end
80
+ end
81
+
82
+ def load_schema(path_or_url)
83
+ json = open(path_or_url).read
84
+ schema = JSON.parse json
85
+ s = dereference_schema path_or_url, schema
86
+
87
+ rescue OpenURI::HTTPError => e
88
+ raise SchemaException.new "Schema URL returned #{e.message}"
89
+
90
+ rescue JSON::ParserError
91
+ raise SchemaException.new 'Schema is not valid JSON'
92
+
93
+ rescue Errno::ENOENT
94
+ raise SchemaException.new "Path '#{path_or_url}' does not exist"
95
+ end
96
+
97
+ def get_schema_from_registry schema
98
+ d = DataPackage::Registry.new(@registry_url)
99
+ dereference_schema (@registry_url || d.base_path), d.get(schema.to_s)
100
+ end
101
+
102
+ def valid?(package)
103
+ JSON::Validator.validate(self, package)
104
+ end
105
+
106
+ def validation_errors(package)
107
+ JSON::Validator.fully_validate(self, package)
108
+ end
109
+
110
+ end
111
+ end
@@ -1,3 +1,3 @@
1
1
  module DataPackage
2
- VERSION = "0.0.4"
2
+ VERSION = "0.1.0"
3
3
  end
data/lib/datapackage.rb CHANGED
@@ -5,7 +5,12 @@ require 'csv'
5
5
  require 'json'
6
6
  require 'json-schema'
7
7
  require 'rest-client'
8
+ require 'zip'
9
+ require 'ruby_dig'
8
10
 
9
11
  require 'datapackage/version'
10
- require 'datapackage/validator'
11
- require 'datapackage/package'
12
+ require 'datapackage/exceptions'
13
+ require 'datapackage/schema'
14
+ require 'datapackage/resource'
15
+ require 'datapackage/package'
16
+ require 'datapackage/registry'
metadata CHANGED
@@ -1,116 +1,216 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datapackage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leigh Dodds
8
+ - pezholio
9
+ - pikesley
8
10
  autorequire:
9
11
  bindir: bin
10
12
  cert_chain: []
11
- date: 2014-01-06 00:00:00.000000000 Z
13
+ date: 2016-09-13 00:00:00.000000000 Z
12
14
  dependencies:
13
15
  - !ruby/object:Gem::Dependency
14
16
  name: json
15
17
  requirement: !ruby/object:Gem::Requirement
16
18
  requirements:
17
- - - ! '>='
19
+ - - "~>"
18
20
  - !ruby/object:Gem::Version
19
- version: '0'
21
+ version: 1.8.3
20
22
  type: :runtime
21
23
  prerelease: false
22
24
  version_requirements: !ruby/object:Gem::Requirement
23
25
  requirements:
24
- - - ! '>='
26
+ - - "~>"
25
27
  - !ruby/object:Gem::Version
26
- version: '0'
28
+ version: 1.8.3
27
29
  - !ruby/object:Gem::Dependency
28
30
  name: json-schema
29
31
  requirement: !ruby/object:Gem::Requirement
30
32
  requirements:
31
- - - ! '>='
33
+ - - ">="
32
34
  - !ruby/object:Gem::Version
33
35
  version: '0'
34
36
  type: :runtime
35
37
  prerelease: false
36
38
  version_requirements: !ruby/object:Gem::Requirement
37
39
  requirements:
38
- - - ! '>='
40
+ - - ">="
39
41
  - !ruby/object:Gem::Version
40
42
  version: '0'
41
43
  - !ruby/object:Gem::Dependency
42
44
  name: rest-client
43
45
  requirement: !ruby/object:Gem::Requirement
44
46
  requirements:
45
- - - ! '>='
47
+ - - ">="
46
48
  - !ruby/object:Gem::Version
47
49
  version: '0'
48
50
  type: :runtime
49
51
  prerelease: false
50
52
  version_requirements: !ruby/object:Gem::Requirement
51
53
  requirements:
52
- - - ! '>='
54
+ - - ">="
53
55
  - !ruby/object:Gem::Version
54
56
  version: '0'
55
57
  - !ruby/object:Gem::Dependency
56
58
  name: colorize
57
59
  requirement: !ruby/object:Gem::Requirement
58
60
  requirements:
59
- - - ! '>='
61
+ - - ">="
60
62
  - !ruby/object:Gem::Version
61
63
  version: '0'
62
64
  type: :runtime
63
65
  prerelease: false
64
66
  version_requirements: !ruby/object:Gem::Requirement
65
67
  requirements:
66
- - - ! '>='
68
+ - - ">="
67
69
  - !ruby/object:Gem::Version
68
70
  version: '0'
69
71
  - !ruby/object:Gem::Dependency
70
- name: rspec
72
+ name: rack
73
+ requirement: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - "~>"
76
+ - !ruby/object:Gem::Version
77
+ version: 1.6.4
78
+ type: :runtime
79
+ prerelease: false
80
+ version_requirements: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - "~>"
83
+ - !ruby/object:Gem::Version
84
+ version: 1.6.4
85
+ - !ruby/object:Gem::Dependency
86
+ name: rubyzip
87
+ requirement: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ type: :runtime
93
+ prerelease: false
94
+ version_requirements: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ - !ruby/object:Gem::Dependency
100
+ name: ruby_dig
101
+ requirement: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ type: :runtime
107
+ prerelease: false
108
+ version_requirements: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ - !ruby/object:Gem::Dependency
114
+ name: bundler
115
+ requirement: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - "~>"
118
+ - !ruby/object:Gem::Version
119
+ version: '1.3'
120
+ type: :development
121
+ prerelease: false
122
+ version_requirements: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - "~>"
125
+ - !ruby/object:Gem::Version
126
+ version: '1.3'
127
+ - !ruby/object:Gem::Dependency
128
+ name: rake
71
129
  requirement: !ruby/object:Gem::Requirement
72
130
  requirements:
73
- - - ! '>='
131
+ - - ">="
74
132
  - !ruby/object:Gem::Version
75
133
  version: '0'
76
134
  type: :development
77
135
  prerelease: false
78
136
  version_requirements: !ruby/object:Gem::Requirement
79
137
  requirements:
80
- - - ! '>='
138
+ - - ">="
81
139
  - !ruby/object:Gem::Version
82
140
  version: '0'
83
141
  - !ruby/object:Gem::Dependency
84
- name: simplecov-rcov
142
+ name: rspec
85
143
  requirement: !ruby/object:Gem::Requirement
86
144
  requirements:
87
- - - ! '>='
145
+ - - ">="
88
146
  - !ruby/object:Gem::Version
89
147
  version: '0'
90
148
  type: :development
91
149
  prerelease: false
92
150
  version_requirements: !ruby/object:Gem::Requirement
93
151
  requirements:
94
- - - ! '>='
152
+ - - ">="
95
153
  - !ruby/object:Gem::Version
96
154
  version: '0'
97
155
  - !ruby/object:Gem::Dependency
98
156
  name: fakeweb
99
157
  requirement: !ruby/object:Gem::Requirement
100
158
  requirements:
101
- - - ~>
159
+ - - "~>"
102
160
  - !ruby/object:Gem::Version
103
161
  version: '1.3'
104
162
  type: :development
105
163
  prerelease: false
106
164
  version_requirements: !ruby/object:Gem::Requirement
107
165
  requirements:
108
- - - ~>
166
+ - - "~>"
109
167
  - !ruby/object:Gem::Version
110
168
  version: '1.3'
169
+ - !ruby/object:Gem::Dependency
170
+ name: coveralls
171
+ requirement: !ruby/object:Gem::Requirement
172
+ requirements:
173
+ - - ">="
174
+ - !ruby/object:Gem::Version
175
+ version: '0'
176
+ type: :development
177
+ prerelease: false
178
+ version_requirements: !ruby/object:Gem::Requirement
179
+ requirements:
180
+ - - ">="
181
+ - !ruby/object:Gem::Version
182
+ version: '0'
183
+ - !ruby/object:Gem::Dependency
184
+ name: pry
185
+ requirement: !ruby/object:Gem::Requirement
186
+ requirements:
187
+ - - ">="
188
+ - !ruby/object:Gem::Version
189
+ version: '0'
190
+ type: :development
191
+ prerelease: false
192
+ version_requirements: !ruby/object:Gem::Requirement
193
+ requirements:
194
+ - - ">="
195
+ - !ruby/object:Gem::Version
196
+ version: '0'
197
+ - !ruby/object:Gem::Dependency
198
+ name: henry
199
+ requirement: !ruby/object:Gem::Requirement
200
+ requirements:
201
+ - - ">="
202
+ - !ruby/object:Gem::Version
203
+ version: '0'
204
+ type: :development
205
+ prerelease: false
206
+ version_requirements: !ruby/object:Gem::Requirement
207
+ requirements:
208
+ - - ">="
209
+ - !ruby/object:Gem::Version
210
+ version: '0'
111
211
  description:
112
212
  email:
113
- - leigh@ldodds.com
213
+ - ops@theodi.org
114
214
  executables:
115
215
  - datapackage
116
216
  extensions: []
@@ -119,16 +219,16 @@ files:
119
219
  - LICENSE.md
120
220
  - README.md
121
221
  - bin/datapackage
122
- - etc/README.md
123
- - etc/csvddf-dialect-schema.json
124
- - etc/datapackage-schema.json
125
- - etc/jsontable-schema.json
126
222
  - lib/datapackage.rb
223
+ - lib/datapackage/exceptions.rb
127
224
  - lib/datapackage/package.rb
128
- - lib/datapackage/validator.rb
225
+ - lib/datapackage/registry.rb
226
+ - lib/datapackage/resource.rb
227
+ - lib/datapackage/schema.rb
129
228
  - lib/datapackage/version.rb
130
229
  homepage: http://github.com/theodi/datapackage.rb
131
- licenses: []
230
+ licenses:
231
+ - MIT
132
232
  metadata: {}
133
233
  post_install_message:
134
234
  rdoc_options: []
@@ -136,17 +236,17 @@ require_paths:
136
236
  - lib
137
237
  required_ruby_version: !ruby/object:Gem::Requirement
138
238
  requirements:
139
- - - ! '>='
239
+ - - ">="
140
240
  - !ruby/object:Gem::Version
141
- version: '0'
241
+ version: '2.0'
142
242
  required_rubygems_version: !ruby/object:Gem::Requirement
143
243
  requirements:
144
- - - ! '>='
244
+ - - ">="
145
245
  - !ruby/object:Gem::Version
146
246
  version: '0'
147
247
  requirements: []
148
248
  rubyforge_project:
149
- rubygems_version: 2.2.0
249
+ rubygems_version: 2.5.1
150
250
  signing_key:
151
251
  specification_version: 4
152
252
  summary: Library for working with data packages
data/etc/README.md DELETED
@@ -1,18 +0,0 @@
1
- This directory contains some JSON Schema documents for validating:
2
-
3
- * `datapackage-schema.json` -- [datapackage.json](http://dataprotocols.org/data-packages/) package files
4
- * `jsontable-schema.json` -- [JSON Table Schemas](http://dataprotocols.org/json-table-schema/) objects
5
- * `csvddf-dialect-schema.json` -- [CSV Dialect Description Format](http://dataprotocols.org/csv-dialect/) dialect objects
6
-
7
- The JSON Table Schemas and CSV Dialect Description Format both define JSON object structures that can appear in `datapackage.json` files (via the `schema` and `dialect` keywords). In the main `datapackage-schema.json` object, these keywords are only validated as simple objects.
8
-
9
- In the application the subsidiary schemas are automatically applied to relevant keys. This could be improved by using JSON Schema cross-referencing.
10
-
11
- Other potential improvements include:
12
-
13
- * Add `data` keyword validation to `datapackage-schema.json`
14
- * Add `format` keywords for validating email addresses and date/date-times
15
- * Or, add `pattern` for validating dates
16
- * Improve regexs used in various places
17
-
18
-
@@ -1,24 +0,0 @@
1
- {
2
- "$schema": "http://json-schema.org/draft-04/schema#",
3
- "title": "CSVDDF",
4
- "description": "JSON Schema for validating CSVDDF dialect structures",
5
- "type": "object",
6
- "properties": {
7
- "delimiter": {
8
- "type": "string"
9
- },
10
- "doublequote": {
11
- "type": "boolean"
12
- },
13
- "lineterminator": {
14
- "type": "string"
15
- },
16
- "quotechar": {
17
- "type": "string"
18
- },
19
- "skipinitialspace": {
20
- "type": "boolean"
21
- }
22
- },
23
- "required": [ "delimiter", "doublequote", "lineterminator", "quotechar", "skipinitialspace" ]
24
- }