datapackage 0.0.4 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,81 @@
1
+ module DataPackage
2
+ ##
3
+ # Allow loading Data Package profiles from a registry.
4
+
5
+ class Registry
6
+
7
+ DEFAULT_REGISTRY_URL = 'http://schemas.datapackages.org/registry.csv'
8
+ DEFAULT_REGISTRY_PATH = File.join(File.expand_path(File.dirname(__FILE__)), '..', '..', 'datapackage', 'schemas', 'registry.csv')
9
+
10
+ attr_reader :base_path
11
+
12
+ def initialize(registry_path_or_url = DEFAULT_REGISTRY_PATH)
13
+ registry_path_or_url ||= DEFAULT_REGISTRY_PATH
14
+ if File.file?(registry_path_or_url)
15
+ @base_path = File.dirname(
16
+ File.absolute_path(registry_path_or_url)
17
+ )
18
+ end
19
+ @profiles = {}
20
+ @registry = get_registry(registry_path_or_url)
21
+ end
22
+
23
+ def get(profile_id)
24
+ @profiles[profile_id] ||= get_profile(profile_id)
25
+ end
26
+
27
+ def available_profiles
28
+ @registry
29
+ end
30
+
31
+ private
32
+
33
+ def get_profile(profile_id)
34
+ profile_metadata = @registry[profile_id]
35
+ return if profile_metadata.nil?
36
+
37
+ path = get_absolute_path(profile_metadata[:schema_path])
38
+
39
+ if path && File.file?(path)
40
+ load_json(path)
41
+ else
42
+ url = profile_metadata[:schema]
43
+ load_json(url)
44
+ end
45
+ end
46
+
47
+ def get_registry(registry_path_or_url)
48
+ begin
49
+ csv = parse_csv(registry_path_or_url)
50
+ registry = {}
51
+ csv.each { |row| registry[row.fetch(:id)] = Hash[row.headers.zip(row.fields)] }
52
+ rescue KeyError, OpenURI::HTTPError, Errno::ENOENT
53
+ raise(RegistryError)
54
+ end
55
+ registry
56
+ end
57
+
58
+ def parse_csv(path_or_url)
59
+ csv = open(path_or_url).read
60
+ if csv.match(/,/)
61
+ CSV.new(csv, headers: :first_row, header_converters: :symbol)
62
+ else
63
+ raise RegistryError
64
+ end
65
+ end
66
+
67
+ def get_absolute_path(relative_path)
68
+ File.join(@base_path, relative_path)
69
+ rescue TypeError
70
+ nil
71
+ end
72
+
73
+ def load_json(path)
74
+ json = open(path).read
75
+ JSON.parse(json)
76
+ rescue JSON::ParserError, OpenURI::HTTPError
77
+ raise RegistryError
78
+ end
79
+
80
+ end
81
+ end
@@ -0,0 +1,79 @@
1
+ module DataPackage
2
+ class Resource < Hash
3
+
4
+ def initialize(resource, base_path = '')
5
+ self.merge! resource
6
+ end
7
+
8
+ def self.load(resource, base_path = '')
9
+ # This returns if there are no alternative ways to access the data OR there
10
+ # is a base_path which is a URL
11
+ if is_url?(resource, base_path)
12
+ RemoteResource.new(resource, base_path)
13
+ else
14
+ # If there's a data attribute, we definitely want an inline resource
15
+ if resource['data']
16
+ InlineResource.new(resource)
17
+ else
18
+ # If the file exists - we want a local resource
19
+ if file_exists?(resource, base_path)
20
+ LocalResource.new(resource, base_path)
21
+ # If it doesn't exist and there's a URL to grab the data from, we want
22
+ # a remote resource
23
+ elsif resource['url']
24
+ RemoteResource.new(resource, base_path)
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ def self.file_exists?(resource, base_path)
31
+ path = resource['path']
32
+ path = File.join(base_path, path) if base_path != ''
33
+ File.exists?(path)
34
+ end
35
+
36
+ def self.is_url?(resource, base_path)
37
+ return true if resource['url'] != nil && resource['path'] == nil && resource['data'] == nil
38
+ return true if base_path.start_with?('http')
39
+ end
40
+
41
+ end
42
+
43
+ class LocalResource < Resource
44
+
45
+ def initialize(resource, base_path = '')
46
+ @base_path = base_path
47
+ @path = resource['path']
48
+ super
49
+ end
50
+
51
+ def data
52
+ @path = File.join(@base_path, @path) if @base_path != ''
53
+ open(@path).read
54
+ end
55
+
56
+ end
57
+
58
+ class InlineResource < Resource
59
+ def data
60
+ self['data']
61
+ end
62
+ end
63
+
64
+ class RemoteResource < Resource
65
+
66
+ def initialize(resource, base_url = '')
67
+ @base_url = base_url
68
+ @url = resource['url']
69
+ @path = resource['path']
70
+ super
71
+ end
72
+
73
+ def data
74
+ url = @url ? @url : URI.join(@base_url, @path)
75
+ open(url).read
76
+ end
77
+
78
+ end
79
+ end
@@ -0,0 +1,111 @@
1
+ module DataPackage
2
+ class Schema < Hash
3
+
4
+ attr_reader :schema
5
+
6
+ def initialize(schema, options = {})
7
+ @registry_url = options[:registry_url]
8
+ if schema.class == Hash
9
+ self.merge! schema
10
+ elsif schema.class == Symbol
11
+ self.merge! get_schema_from_registry schema
12
+ elsif schema.class == String
13
+ self.merge! load_schema(schema)
14
+ else
15
+ raise SchemaException.new "Schema must be a URL, path, Hash or registry-identifier"
16
+ end
17
+ end
18
+
19
+ # https://gist.github.com/vdw/f3c832df8ce271a036f2
20
+ def hash_to_slashed_path(hash, path = "")
21
+ return {} unless hash
22
+ hash.each_with_object({}) do |(k, v), ret|
23
+ key = path + k.to_s
24
+
25
+ if v.is_a? Hash
26
+ ret.merge! hash_to_slashed_path(v, key.to_s + "/")
27
+ else
28
+ ret[key] = v
29
+ end
30
+ end
31
+ end
32
+
33
+ def dereference_schema path_or_url, schema
34
+ paths = hash_to_slashed_path schema
35
+ ref_keys = paths.keys.select { |p| p =~ /\$ref/ }
36
+ if ref_keys
37
+ ref_keys = [ref_keys] unless ref_keys.is_a? Array
38
+
39
+ ref_keys.each do |key|
40
+ path = key.split('/')[0..-2]
41
+
42
+ replacement = resolve(schema.dig(*path, '$ref'), path_or_url, schema)
43
+
44
+ s = "schema#{path.map { |k| "['#{k}']" }.join}.merge! replacement"
45
+ eval s
46
+ s = "schema#{path.map { |k| "['#{k}']" }.join}.delete '$ref'"
47
+ eval s
48
+ end
49
+ end
50
+
51
+ schema
52
+ end
53
+
54
+ def resolve reference, path_or_url, schema
55
+ base_path = base_path path_or_url
56
+ filename, reference = reference.split '#'
57
+ if filename == ''
58
+ schema['define'][reference.split('/').last]
59
+ else
60
+ dereference_schema("#{base_path}/#{filename}", get_definitions(filename, base_path)).dig(*reference.split('/').reject(&:empty?))
61
+ end
62
+ end
63
+
64
+ def get_definitions filename, base_path
65
+ JSON.parse open("#{base_path}/#{filename}").read
66
+ end
67
+
68
+ def base_path path_or_url
69
+ if path_or_url =~ /\A#{URI::regexp}\z/
70
+ uri = URI.parse path_or_url
71
+ return "#{uri.scheme}://#{uri.host}#{File.dirname uri.path}".chomp('/')
72
+ else
73
+
74
+ if File.directory?(path_or_url)
75
+ return path_or_url
76
+ else
77
+ return File.expand_path File.dirname path_or_url
78
+ end
79
+ end
80
+ end
81
+
82
+ def load_schema(path_or_url)
83
+ json = open(path_or_url).read
84
+ schema = JSON.parse json
85
+ s = dereference_schema path_or_url, schema
86
+
87
+ rescue OpenURI::HTTPError => e
88
+ raise SchemaException.new "Schema URL returned #{e.message}"
89
+
90
+ rescue JSON::ParserError
91
+ raise SchemaException.new 'Schema is not valid JSON'
92
+
93
+ rescue Errno::ENOENT
94
+ raise SchemaException.new "Path '#{path_or_url}' does not exist"
95
+ end
96
+
97
+ def get_schema_from_registry schema
98
+ d = DataPackage::Registry.new(@registry_url)
99
+ dereference_schema (@registry_url || d.base_path), d.get(schema.to_s)
100
+ end
101
+
102
+ def valid?(package)
103
+ JSON::Validator.validate(self, package)
104
+ end
105
+
106
+ def validation_errors(package)
107
+ JSON::Validator.fully_validate(self, package)
108
+ end
109
+
110
+ end
111
+ end
@@ -1,3 +1,3 @@
1
1
  module DataPackage
2
- VERSION = "0.0.4"
2
+ VERSION = "0.1.0"
3
3
  end
data/lib/datapackage.rb CHANGED
@@ -5,7 +5,12 @@ require 'csv'
5
5
  require 'json'
6
6
  require 'json-schema'
7
7
  require 'rest-client'
8
+ require 'zip'
9
+ require 'ruby_dig'
8
10
 
9
11
  require 'datapackage/version'
10
- require 'datapackage/validator'
11
- require 'datapackage/package'
12
+ require 'datapackage/exceptions'
13
+ require 'datapackage/schema'
14
+ require 'datapackage/resource'
15
+ require 'datapackage/package'
16
+ require 'datapackage/registry'
metadata CHANGED
@@ -1,116 +1,216 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datapackage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leigh Dodds
8
+ - pezholio
9
+ - pikesley
8
10
  autorequire:
9
11
  bindir: bin
10
12
  cert_chain: []
11
- date: 2014-01-06 00:00:00.000000000 Z
13
+ date: 2016-09-13 00:00:00.000000000 Z
12
14
  dependencies:
13
15
  - !ruby/object:Gem::Dependency
14
16
  name: json
15
17
  requirement: !ruby/object:Gem::Requirement
16
18
  requirements:
17
- - - ! '>='
19
+ - - "~>"
18
20
  - !ruby/object:Gem::Version
19
- version: '0'
21
+ version: 1.8.3
20
22
  type: :runtime
21
23
  prerelease: false
22
24
  version_requirements: !ruby/object:Gem::Requirement
23
25
  requirements:
24
- - - ! '>='
26
+ - - "~>"
25
27
  - !ruby/object:Gem::Version
26
- version: '0'
28
+ version: 1.8.3
27
29
  - !ruby/object:Gem::Dependency
28
30
  name: json-schema
29
31
  requirement: !ruby/object:Gem::Requirement
30
32
  requirements:
31
- - - ! '>='
33
+ - - ">="
32
34
  - !ruby/object:Gem::Version
33
35
  version: '0'
34
36
  type: :runtime
35
37
  prerelease: false
36
38
  version_requirements: !ruby/object:Gem::Requirement
37
39
  requirements:
38
- - - ! '>='
40
+ - - ">="
39
41
  - !ruby/object:Gem::Version
40
42
  version: '0'
41
43
  - !ruby/object:Gem::Dependency
42
44
  name: rest-client
43
45
  requirement: !ruby/object:Gem::Requirement
44
46
  requirements:
45
- - - ! '>='
47
+ - - ">="
46
48
  - !ruby/object:Gem::Version
47
49
  version: '0'
48
50
  type: :runtime
49
51
  prerelease: false
50
52
  version_requirements: !ruby/object:Gem::Requirement
51
53
  requirements:
52
- - - ! '>='
54
+ - - ">="
53
55
  - !ruby/object:Gem::Version
54
56
  version: '0'
55
57
  - !ruby/object:Gem::Dependency
56
58
  name: colorize
57
59
  requirement: !ruby/object:Gem::Requirement
58
60
  requirements:
59
- - - ! '>='
61
+ - - ">="
60
62
  - !ruby/object:Gem::Version
61
63
  version: '0'
62
64
  type: :runtime
63
65
  prerelease: false
64
66
  version_requirements: !ruby/object:Gem::Requirement
65
67
  requirements:
66
- - - ! '>='
68
+ - - ">="
67
69
  - !ruby/object:Gem::Version
68
70
  version: '0'
69
71
  - !ruby/object:Gem::Dependency
70
- name: rspec
72
+ name: rack
73
+ requirement: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - "~>"
76
+ - !ruby/object:Gem::Version
77
+ version: 1.6.4
78
+ type: :runtime
79
+ prerelease: false
80
+ version_requirements: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - "~>"
83
+ - !ruby/object:Gem::Version
84
+ version: 1.6.4
85
+ - !ruby/object:Gem::Dependency
86
+ name: rubyzip
87
+ requirement: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ type: :runtime
93
+ prerelease: false
94
+ version_requirements: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ - !ruby/object:Gem::Dependency
100
+ name: ruby_dig
101
+ requirement: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ type: :runtime
107
+ prerelease: false
108
+ version_requirements: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ - !ruby/object:Gem::Dependency
114
+ name: bundler
115
+ requirement: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - "~>"
118
+ - !ruby/object:Gem::Version
119
+ version: '1.3'
120
+ type: :development
121
+ prerelease: false
122
+ version_requirements: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - "~>"
125
+ - !ruby/object:Gem::Version
126
+ version: '1.3'
127
+ - !ruby/object:Gem::Dependency
128
+ name: rake
71
129
  requirement: !ruby/object:Gem::Requirement
72
130
  requirements:
73
- - - ! '>='
131
+ - - ">="
74
132
  - !ruby/object:Gem::Version
75
133
  version: '0'
76
134
  type: :development
77
135
  prerelease: false
78
136
  version_requirements: !ruby/object:Gem::Requirement
79
137
  requirements:
80
- - - ! '>='
138
+ - - ">="
81
139
  - !ruby/object:Gem::Version
82
140
  version: '0'
83
141
  - !ruby/object:Gem::Dependency
84
- name: simplecov-rcov
142
+ name: rspec
85
143
  requirement: !ruby/object:Gem::Requirement
86
144
  requirements:
87
- - - ! '>='
145
+ - - ">="
88
146
  - !ruby/object:Gem::Version
89
147
  version: '0'
90
148
  type: :development
91
149
  prerelease: false
92
150
  version_requirements: !ruby/object:Gem::Requirement
93
151
  requirements:
94
- - - ! '>='
152
+ - - ">="
95
153
  - !ruby/object:Gem::Version
96
154
  version: '0'
97
155
  - !ruby/object:Gem::Dependency
98
156
  name: fakeweb
99
157
  requirement: !ruby/object:Gem::Requirement
100
158
  requirements:
101
- - - ~>
159
+ - - "~>"
102
160
  - !ruby/object:Gem::Version
103
161
  version: '1.3'
104
162
  type: :development
105
163
  prerelease: false
106
164
  version_requirements: !ruby/object:Gem::Requirement
107
165
  requirements:
108
- - - ~>
166
+ - - "~>"
109
167
  - !ruby/object:Gem::Version
110
168
  version: '1.3'
169
+ - !ruby/object:Gem::Dependency
170
+ name: coveralls
171
+ requirement: !ruby/object:Gem::Requirement
172
+ requirements:
173
+ - - ">="
174
+ - !ruby/object:Gem::Version
175
+ version: '0'
176
+ type: :development
177
+ prerelease: false
178
+ version_requirements: !ruby/object:Gem::Requirement
179
+ requirements:
180
+ - - ">="
181
+ - !ruby/object:Gem::Version
182
+ version: '0'
183
+ - !ruby/object:Gem::Dependency
184
+ name: pry
185
+ requirement: !ruby/object:Gem::Requirement
186
+ requirements:
187
+ - - ">="
188
+ - !ruby/object:Gem::Version
189
+ version: '0'
190
+ type: :development
191
+ prerelease: false
192
+ version_requirements: !ruby/object:Gem::Requirement
193
+ requirements:
194
+ - - ">="
195
+ - !ruby/object:Gem::Version
196
+ version: '0'
197
+ - !ruby/object:Gem::Dependency
198
+ name: henry
199
+ requirement: !ruby/object:Gem::Requirement
200
+ requirements:
201
+ - - ">="
202
+ - !ruby/object:Gem::Version
203
+ version: '0'
204
+ type: :development
205
+ prerelease: false
206
+ version_requirements: !ruby/object:Gem::Requirement
207
+ requirements:
208
+ - - ">="
209
+ - !ruby/object:Gem::Version
210
+ version: '0'
111
211
  description:
112
212
  email:
113
- - leigh@ldodds.com
213
+ - ops@theodi.org
114
214
  executables:
115
215
  - datapackage
116
216
  extensions: []
@@ -119,16 +219,16 @@ files:
119
219
  - LICENSE.md
120
220
  - README.md
121
221
  - bin/datapackage
122
- - etc/README.md
123
- - etc/csvddf-dialect-schema.json
124
- - etc/datapackage-schema.json
125
- - etc/jsontable-schema.json
126
222
  - lib/datapackage.rb
223
+ - lib/datapackage/exceptions.rb
127
224
  - lib/datapackage/package.rb
128
- - lib/datapackage/validator.rb
225
+ - lib/datapackage/registry.rb
226
+ - lib/datapackage/resource.rb
227
+ - lib/datapackage/schema.rb
129
228
  - lib/datapackage/version.rb
130
229
  homepage: http://github.com/theodi/datapackage.rb
131
- licenses: []
230
+ licenses:
231
+ - MIT
132
232
  metadata: {}
133
233
  post_install_message:
134
234
  rdoc_options: []
@@ -136,17 +236,17 @@ require_paths:
136
236
  - lib
137
237
  required_ruby_version: !ruby/object:Gem::Requirement
138
238
  requirements:
139
- - - ! '>='
239
+ - - ">="
140
240
  - !ruby/object:Gem::Version
141
- version: '0'
241
+ version: '2.0'
142
242
  required_rubygems_version: !ruby/object:Gem::Requirement
143
243
  requirements:
144
- - - ! '>='
244
+ - - ">="
145
245
  - !ruby/object:Gem::Version
146
246
  version: '0'
147
247
  requirements: []
148
248
  rubyforge_project:
149
- rubygems_version: 2.2.0
249
+ rubygems_version: 2.5.1
150
250
  signing_key:
151
251
  specification_version: 4
152
252
  summary: Library for working with data packages
data/etc/README.md DELETED
@@ -1,18 +0,0 @@
1
- This directory contains some JSON Schema documents for validating:
2
-
3
- * `datapackage-schema.json` -- [datapackage.json](http://dataprotocols.org/data-packages/) package files
4
- * `jsontable-schema.json` -- [JSON Table Schemas](http://dataprotocols.org/json-table-schema/) objects
5
- * `csvddf-dialect-schema.json` -- [CSV Dialect Description Format](http://dataprotocols.org/csv-dialect/) dialect objects
6
-
7
- The JSON Table Schemas and CSV Dialect Description Format both define JSON object structures that can appear in `datapackage.json` files (via the `schema` and `dialect` keywords). In the main `datapackage-schema.json` object, these keywords are only validated as simple objects.
8
-
9
- In the application the subsidiary schemas are automatically applied to relevant keys. This could be improved by using JSON Schema cross-referencing.
10
-
11
- Other potential improvements include:
12
-
13
- * Add `data` keyword validation to `datapackage-schema.json`
14
- * Add `format` keywords for validating email addresses and date/date-times
15
- * Or, add `pattern` for validating dates
16
- * Improve regexs used in various places
17
-
18
-
@@ -1,24 +0,0 @@
1
- {
2
- "$schema": "http://json-schema.org/draft-04/schema#",
3
- "title": "CSVDDF",
4
- "description": "JSON Schema for validating CSVDDF dialect structures",
5
- "type": "object",
6
- "properties": {
7
- "delimiter": {
8
- "type": "string"
9
- },
10
- "doublequote": {
11
- "type": "boolean"
12
- },
13
- "lineterminator": {
14
- "type": "string"
15
- },
16
- "quotechar": {
17
- "type": "string"
18
- },
19
- "skipinitialspace": {
20
- "type": "boolean"
21
- }
22
- },
23
- "required": [ "delimiter", "doublequote", "lineterminator", "quotechar", "skipinitialspace" ]
24
- }