datapackage 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,86 @@
1
+ module DataPackage
2
+ class Resource < Hash
3
+ include DataPackage::Helpers
4
+
5
+ attr_reader :name, :profile, :source, :source_type, :errors
6
+
7
+ def initialize(resource, base_path = '')
8
+ self.merge! dereference_descriptor(resource, base_path: base_path,
9
+ reference_fields: ['schema', 'dialect'])
10
+ apply_defaults!
11
+ @profile = DataPackage::Profile.new(self['profile'])
12
+ @name = self.fetch('name')
13
+ get_source!(base_path)
14
+ apply_table_defaults! if self.tabular?
15
+ end
16
+
17
+ def descriptor
18
+ self.to_h
19
+ end
20
+
21
+ def table
22
+ @table ||= TableSchema::Table.new(self.source, self['schema']) if tabular?
23
+ end
24
+
25
+ def tabular?
26
+ tabular_profile = DataPackage::DEFAULTS[:resource][:tabular_profile]
27
+ return true if @profile.name == tabular_profile
28
+ return true if DataPackage::Profile.new(tabular_profile).valid?(self)
29
+ false
30
+ end
31
+
32
+ alias :tabular :tabular?
33
+
34
+ def valid?
35
+ @profile.valid?(self)
36
+ end
37
+
38
+ alias :valid :valid?
39
+
40
+ def validate
41
+ @profile.validate(self)
42
+ end
43
+
44
+ def iter_errors
45
+ @profile.iter_errors(self){ |err| yield err }
46
+ end
47
+
48
+ private
49
+
50
+ def get_source!(base_path)
51
+ if self.fetch('data', nil)
52
+ @source = self['data']
53
+ @source_type = 'inline'
54
+ elsif self.fetch('path', nil)
55
+ unless is_safe_path?(self['path'])
56
+ raise ResourceException.new "Path `#{self['path']}` is not safe"
57
+ end
58
+ @source = join_paths(base_path, self['path'])
59
+ @source_type = is_fully_qualified_url?(@source) ? 'remote' : 'local'
60
+ else
61
+ raise ResourceException.new 'A resource descriptor must have a `path` or `data` property.'
62
+ end
63
+ end
64
+
65
+ def apply_defaults!
66
+ self['profile'] ||= DataPackage::DEFAULTS[:resource][:profile]
67
+ self['encoding'] ||= DataPackage::DEFAULTS[:resource][:encoding]
68
+ end
69
+
70
+ def apply_table_defaults!
71
+ if self.fetch('schema', nil)
72
+ self['schema']['missingValues'] = DataPackage::DEFAULTS[:schema][:missing_values]
73
+ self['schema'].fetch('fields', []).each do |field_descriptor|
74
+ field_descriptor['type'] ||= DataPackage::DEFAULTS[:schema][:type]
75
+ field_descriptor['format'] ||= DataPackage::DEFAULTS[:schema][:format]
76
+ end
77
+ end
78
+
79
+ if self.fetch('dialect', nil)
80
+ DataPackage::DEFAULTS[:dialect].each do |key, val|
81
+ self['dialect'][key.to_s] ||= val
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,3 @@
1
+ module DataPackage
2
+ VERSION = "0.2.6"
3
+ end
@@ -0,0 +1,541 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "title": "Data Package",
4
+ "description": "Data Package is a simple specification for data access and delivery.",
5
+ "type": "object",
6
+ "required": [
7
+ "resources"
8
+ ],
9
+ "properties": {
10
+ "profile": {
11
+ "propertyOrder": 10,
12
+ "default": "data-package",
13
+ "title": "Profile",
14
+ "description": "The profile of this descriptor.",
15
+ "context": "Every Package and Resource descriptor has a profile. The default profile, if none is declared, is `data-package` for Package and `data-resource` for Resource.",
16
+ "type": "string",
17
+ "examples": [
18
+ "{\n \"profile\": \"tabular-data-package\"\n}\n",
19
+ "{\n \"profile\": \"http://example.com/my-profiles-json-schema.json\"\n}\n"
20
+ ]
21
+ },
22
+ "name": {
23
+ "propertyOrder": 20,
24
+ "title": "Name",
25
+ "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
26
+ "type": "string",
27
+ "pattern": "^([-a-z0-9._/])+$",
28
+ "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
29
+ "examples": [
30
+ "{\n \"name\": \"my-nice-name\"\n}\n"
31
+ ]
32
+ },
33
+ "id": {
34
+ "propertyOrder": 30,
35
+ "title": "ID",
36
+ "description": "A property reserved for globally unique identifiers. Examples of identifiers that are unique include UUIDs and DOIs.",
37
+ "context": "A common usage pattern for Data Packages is as a packaging format within the bounds of a system or platform. In these cases, a unique identifier for a package is desired for common data handling workflows, such as updating an existing package. While at the level of the specification, global uniqueness cannot be validated, consumers using the `id` property `MUST` ensure identifiers are globally unique.",
38
+ "type": "string",
39
+ "examples": [
40
+ "{\n \"id\": \"b03ec84-77fd-4270-813b-0c698943f7ce\"\n}\n",
41
+ "{\n \"id\": \"http://dx.doi.org/10.1594/PANGAEA.726855\"\n}\n"
42
+ ]
43
+ },
44
+ "title": {
45
+ "propertyOrder": 40,
46
+ "title": "Title",
47
+ "description": "A human-readable title.",
48
+ "type": "string",
49
+ "examples": [
50
+ "{\n \"title\": \"My Package Title\"\n}\n"
51
+ ]
52
+ },
53
+ "description": {
54
+ "propertyOrder": 50,
55
+ "format": "textarea",
56
+ "title": "Description",
57
+ "description": "A text description. Markdown is encouraged.",
58
+ "type": "string",
59
+ "examples": [
60
+ "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n"
61
+ ]
62
+ },
63
+ "homepage": {
64
+ "propertyOrder": 60,
65
+ "title": "Home Page",
66
+ "description": "The home on the web that is related to this data package.",
67
+ "type": "string",
68
+ "format": "uri",
69
+ "examples": [
70
+ "{\n \"homepage\": \"http://example.com/\"\n}\n"
71
+ ]
72
+ },
73
+ "created": {
74
+ "propertyOrder": 70,
75
+ "title": "Created",
76
+ "description": "The datetime on which this descriptor was created.",
77
+ "context": "The datetime must conform to the string formats for datetime as described in [RFC3339](https://tools.ietf.org/html/rfc3339#section-5.6)",
78
+ "type": "string",
79
+ "format": "date-time",
80
+ "examples": [
81
+ "{\n \"created\": \"1985-04-12T23:20:50.52Z\"\n}\n"
82
+ ]
83
+ },
84
+ "contributors": {
85
+ "propertyOrder": 80,
86
+ "title": "Contributors",
87
+ "description": "The contributors to this descriptor.",
88
+ "type": "array",
89
+ "minItems": 1,
90
+ "items": {
91
+ "title": "Contributor",
92
+ "description": "A contributor to this descriptor.",
93
+ "properties": {
94
+ "title": {
95
+ "title": "Title",
96
+ "description": "A human-readable title.",
97
+ "type": "string",
98
+ "examples": [
99
+ "{\n \"title\": \"My Package Title\"\n}\n"
100
+ ]
101
+ },
102
+ "path": {
103
+ "title": "Path",
104
+ "description": "A fully qualified URL, or a POSIX file path..",
105
+ "type": "string",
106
+ "examples": [
107
+ "{\n \"path\": \"file.csv\"\n}\n",
108
+ "{\n \"path\": \"http://example.com/file.csv\"\n}\n"
109
+ ],
110
+ "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly."
111
+ },
112
+ "email": {
113
+ "title": "Email",
114
+ "description": "An email address.",
115
+ "type": "string",
116
+ "format": "email",
117
+ "examples": [
118
+ "{\n \"email\": \"example@example.com\"\n}\n"
119
+ ]
120
+ },
121
+ "organisation": {
122
+ "title": "Organisation",
123
+ "description": "An organizational affiliation for this contributor.",
124
+ "type": "string"
125
+ },
126
+ "role": {
127
+ "type": "string",
128
+ "enum": [
129
+ "publisher",
130
+ "author",
131
+ "maintainer",
132
+ "wrangler",
133
+ "contributor"
134
+ ]
135
+ }
136
+ },
137
+ "required": [
138
+ "title"
139
+ ],
140
+ "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself."
141
+ },
142
+ "examples": [
143
+ "{\n \"contributors\": [\n {\n \"title\": \"Joe Bloggs\"\n }\n ]\n}\n",
144
+ "{\n \"contributors\": [\n {\n \"title\": \"Joe Bloggs\",\n \"email\": \"joe@example.com\",\n \"role\": \"author\"\n }\n ]\n}\n"
145
+ ]
146
+ },
147
+ "keywords": {
148
+ "propertyOrder": 90,
149
+ "title": "Keywords",
150
+ "description": "A list of keywords that describe this package.",
151
+ "type": "array",
152
+ "minItems": 1,
153
+ "items": {
154
+ "type": "string"
155
+ },
156
+ "examples": [
157
+ "{\n \"keywords\": [\n \"data\",\n \"fiscal\",\n \"transparency\"\n ]\n}\n"
158
+ ]
159
+ },
160
+ "licenses": {
161
+ "propertyOrder": 100,
162
+ "title": "Licenses",
163
+ "description": "The license(s) under which this package is published.",
164
+ "type": "array",
165
+ "minItems": 1,
166
+ "items": {
167
+ "title": "License",
168
+ "description": "A license for this descriptor.",
169
+ "type": "object",
170
+ "properties": {
171
+ "name": {
172
+ "title": "Open Definition license identifier",
173
+ "description": "MUST be an Open Definition license identifier, see http://licenses.opendefinition.org/",
174
+ "type": "string",
175
+ "pattern": "^([-a-zA-Z0-9._])+$"
176
+ },
177
+ "path": {
178
+ "title": "Path",
179
+ "description": "A fully qualified URL, or a POSIX file path..",
180
+ "type": "string",
181
+ "examples": [
182
+ "{\n \"path\": \"file.csv\"\n}\n",
183
+ "{\n \"path\": \"http://example.com/file.csv\"\n}\n"
184
+ ],
185
+ "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly."
186
+ },
187
+ "title": {
188
+ "title": "Title",
189
+ "description": "A human-readable title.",
190
+ "type": "string",
191
+ "examples": [
192
+ "{\n \"title\": \"My Package Title\"\n}\n"
193
+ ]
194
+ }
195
+ },
196
+ "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself."
197
+ },
198
+ "context": "This property is not legally binding and does not guarantee that the package is licensed under the terms defined herein.",
199
+ "examples": [
200
+ "{\n \"licenses\": [\n {\n \"name\": \"odc-pddl-1.0\",\n \"uri\": \"http://opendatacommons.org/licenses/pddl/\"\n }\n ]\n}\n"
201
+ ]
202
+ },
203
+ "resources": {
204
+ "propertyOrder": 110,
205
+ "title": "Data Resources",
206
+ "description": "An `array` of Data Resource objects, each compliant with the [Data Resource](/data-resource/) specification.",
207
+ "type": "array",
208
+ "minItems": 1,
209
+ "items": {
210
+ "title": "Data Resource",
211
+ "description": "Data Resource.",
212
+ "type": "object",
213
+ "oneOf": [
214
+ {
215
+ "required": [
216
+ "name",
217
+ "data"
218
+ ]
219
+ },
220
+ {
221
+ "required": [
222
+ "name",
223
+ "path"
224
+ ]
225
+ }
226
+ ],
227
+ "properties": {
228
+ "profile": {
229
+ "propertyOrder": 10,
230
+ "default": "data-resource",
231
+ "title": "Profile",
232
+ "description": "The profile of this descriptor.",
233
+ "context": "Every Package and Resource descriptor has a profile. The default profile, if none is declared, is `data-package` for Package and `data-resource` for Resource.",
234
+ "type": "string",
235
+ "examples": [
236
+ "{\n \"profile\": \"tabular-data-package\"\n}\n",
237
+ "{\n \"profile\": \"http://example.com/my-profiles-json-schema.json\"\n}\n"
238
+ ]
239
+ },
240
+ "name": {
241
+ "propertyOrder": 20,
242
+ "title": "Name",
243
+ "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
244
+ "type": "string",
245
+ "pattern": "^([-a-z0-9._/])+$",
246
+ "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
247
+ "examples": [
248
+ "{\n \"name\": \"my-nice-name\"\n}\n"
249
+ ]
250
+ },
251
+ "path": {
252
+ "propertyOrder": 30,
253
+ "title": "Path",
254
+ "description": "A reference to the data for this resource, as either a path as a string, or an array of paths as strings. of valid URIs.",
255
+ "oneOf": [
256
+ {
257
+ "title": "Path",
258
+ "description": "A fully qualified URL, or a POSIX file path..",
259
+ "type": "string",
260
+ "examples": [
261
+ "{\n \"path\": \"file.csv\"\n}\n",
262
+ "{\n \"path\": \"http://example.com/file.csv\"\n}\n"
263
+ ],
264
+ "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly."
265
+ },
266
+ {
267
+ "type": "array",
268
+ "minItems": 1,
269
+ "items": {
270
+ "title": "Path",
271
+ "description": "A fully qualified URL, or a POSIX file path..",
272
+ "type": "string",
273
+ "examples": [
274
+ "{\n \"path\": \"file.csv\"\n}\n",
275
+ "{\n \"path\": \"http://example.com/file.csv\"\n}\n"
276
+ ],
277
+ "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly."
278
+ },
279
+ "examples": [
280
+ "[ \"file.csv\" ]\n",
281
+ "[ \"http://example.com/file.csv\" ]\n"
282
+ ]
283
+ }
284
+ ],
285
+ "context": "The dereferenced value of each referenced data source in `path` `MUST` be commensurate with a native, dereferenced representation of the data the resource describes. For example, in a *Tabular* Data Resource, this means that the dereferenced value of `path` `MUST` be an array.",
286
+ "examples": [
287
+ "{\n \"path\": [\n \"file.csv\",\n \"file2.csv\"\n ]\n}\n",
288
+ "{\n \"path\": [\n \"http://example.com/file.csv\",\n \"http://example.com/file2.csv\"\n ]\n}\n",
289
+ "{\n \"path\": \"http://example.com/file.csv\"\n}\n"
290
+ ]
291
+ },
292
+ "data": {
293
+ "propertyOrder": 230,
294
+ "title": "Data",
295
+ "description": "Inline data for this resource."
296
+ },
297
+ "schema": {
298
+ "propertyOrder": 40,
299
+ "title": "Schema",
300
+ "description": "A schema for this resource.",
301
+ "type": "object"
302
+ },
303
+ "title": {
304
+ "propertyOrder": 50,
305
+ "title": "Title",
306
+ "description": "A human-readable title.",
307
+ "type": "string",
308
+ "examples": [
309
+ "{\n \"title\": \"My Package Title\"\n}\n"
310
+ ]
311
+ },
312
+ "description": {
313
+ "propertyOrder": 60,
314
+ "format": "textarea",
315
+ "title": "Description",
316
+ "description": "A text description. Markdown is encouraged.",
317
+ "type": "string",
318
+ "examples": [
319
+ "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n"
320
+ ]
321
+ },
322
+ "homepage": {
323
+ "propertyOrder": 70,
324
+ "title": "Home Page",
325
+ "description": "The home on the web that is related to this data package.",
326
+ "type": "string",
327
+ "format": "uri",
328
+ "examples": [
329
+ "{\n \"homepage\": \"http://example.com/\"\n}\n"
330
+ ]
331
+ },
332
+ "sources": {
333
+ "propertyOrder": 140,
334
+ "options": {
335
+ "hidden": true
336
+ },
337
+ "title": "Sources",
338
+ "description": "The raw sources for this resource.",
339
+ "type": "array",
340
+ "minItems": 1,
341
+ "items": {
342
+ "title": "Source",
343
+ "description": "A source file.",
344
+ "type": "object",
345
+ "required": [
346
+ "title"
347
+ ],
348
+ "properties": {
349
+ "title": {
350
+ "title": "Title",
351
+ "description": "A human-readable title.",
352
+ "type": "string",
353
+ "examples": [
354
+ "{\n \"title\": \"My Package Title\"\n}\n"
355
+ ]
356
+ },
357
+ "path": {
358
+ "title": "Path",
359
+ "description": "A fully qualified URL, or a POSIX file path..",
360
+ "type": "string",
361
+ "examples": [
362
+ "{\n \"path\": \"file.csv\"\n}\n",
363
+ "{\n \"path\": \"http://example.com/file.csv\"\n}\n"
364
+ ],
365
+ "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly."
366
+ },
367
+ "email": {
368
+ "title": "Email",
369
+ "description": "An email address.",
370
+ "type": "string",
371
+ "format": "email",
372
+ "examples": [
373
+ "{\n \"email\": \"example@example.com\"\n}\n"
374
+ ]
375
+ }
376
+ }
377
+ },
378
+ "examples": [
379
+ "{\n \"sources\": [\n {\n \"name\": \"World Bank and OECD\",\n \"uri\": \"http://data.worldbank.org/indicator/NY.GDP.MKTP.CD\"\n }\n ]\n}\n"
380
+ ]
381
+ },
382
+ "licenses": {
383
+ "description": "The license(s) under which the resource is published.",
384
+ "propertyOrder": 150,
385
+ "options": {
386
+ "hidden": true
387
+ },
388
+ "title": "Licenses",
389
+ "type": "array",
390
+ "minItems": 1,
391
+ "items": {
392
+ "title": "License",
393
+ "description": "A license for this descriptor.",
394
+ "type": "object",
395
+ "properties": {
396
+ "name": {
397
+ "title": "Open Definition license identifier",
398
+ "description": "MUST be an Open Definition license identifier, see http://licenses.opendefinition.org/",
399
+ "type": "string",
400
+ "pattern": "^([-a-zA-Z0-9._])+$"
401
+ },
402
+ "path": {
403
+ "title": "Path",
404
+ "description": "A fully qualified URL, or a POSIX file path..",
405
+ "type": "string",
406
+ "examples": [
407
+ "{\n \"path\": \"file.csv\"\n}\n",
408
+ "{\n \"path\": \"http://example.com/file.csv\"\n}\n"
409
+ ],
410
+ "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly."
411
+ },
412
+ "title": {
413
+ "title": "Title",
414
+ "description": "A human-readable title.",
415
+ "type": "string",
416
+ "examples": [
417
+ "{\n \"title\": \"My Package Title\"\n}\n"
418
+ ]
419
+ }
420
+ },
421
+ "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself."
422
+ },
423
+ "context": "This property is not legally binding and does not guarantee that the package is licensed under the terms defined herein.",
424
+ "examples": [
425
+ "{\n \"licenses\": [\n {\n \"name\": \"odc-pddl-1.0\",\n \"uri\": \"http://opendatacommons.org/licenses/pddl/\"\n }\n ]\n}\n"
426
+ ]
427
+ },
428
+ "format": {
429
+ "propertyOrder": 80,
430
+ "title": "Format",
431
+ "description": "The file format of this resource.",
432
+ "context": "`csv`, `xls`, `json` are examples of common formats.",
433
+ "type": "string",
434
+ "examples": [
435
+ "{\n \"format\": \"xls\"\n}\n"
436
+ ]
437
+ },
438
+ "mediatype": {
439
+ "propertyOrder": 90,
440
+ "title": "Media Type",
441
+ "description": "The media type of this resource. Can be any valid media type listed with [IANA](https://www.iana.org/assignments/media-types/media-types.xhtml).",
442
+ "type": "string",
443
+ "pattern": "^(.+)/(.+)$",
444
+ "examples": [
445
+ "{\n \"mediatype\": \"text/csv\"\n}\n"
446
+ ]
447
+ },
448
+ "encoding": {
449
+ "propertyOrder": 100,
450
+ "title": "Encoding",
451
+ "description": "The file encoding of this resource.",
452
+ "type": "string",
453
+ "default": "utf-8",
454
+ "examples": [
455
+ "{\n \"encoding\": \"utf-8\"\n}\n"
456
+ ]
457
+ },
458
+ "bytes": {
459
+ "propertyOrder": 110,
460
+ "options": {
461
+ "hidden": true
462
+ },
463
+ "title": "Bytes",
464
+ "description": "The size of this resource in bytes.",
465
+ "type": "integer",
466
+ "examples": [
467
+ "{\n \"bytes\": 2082\n}\n"
468
+ ]
469
+ },
470
+ "hash": {
471
+ "propertyOrder": 120,
472
+ "options": {
473
+ "hidden": true
474
+ },
475
+ "title": "Hash",
476
+ "type": "string",
477
+ "description": "The MD5 hash of this resource. Indicate other hashing algorithms with the {algorithm}:{hash} format.",
478
+ "pattern": "^([^:]+:[a-fA-F0-9]+|[a-fA-F0-9]{32}|)$",
479
+ "examples": [
480
+ "{\n \"hash\": \"d25c9c77f588f5dc32059d2da1136c02\"\n}\n",
481
+ "{\n \"hash\": \"SHA256:5262f12512590031bbcc9a430452bfd75c2791ad6771320bb4b5728bfb78c4d0\"\n}\n"
482
+ ]
483
+ }
484
+ }
485
+ },
486
+ "examples": [
487
+ "{\n \"resources\": [\n {\n \"name\": \"my-data\",\n \"data\": [\n \"data.csv\"\n ],\n \"mediatype\": \"text/csv\"\n }\n ]\n}\n"
488
+ ]
489
+ },
490
+ "sources": {
491
+ "propertyOrder": 200,
492
+ "options": {
493
+ "hidden": true
494
+ },
495
+ "title": "Sources",
496
+ "description": "The raw sources for this resource.",
497
+ "type": "array",
498
+ "minItems": 1,
499
+ "items": {
500
+ "title": "Source",
501
+ "description": "A source file.",
502
+ "type": "object",
503
+ "required": [
504
+ "title"
505
+ ],
506
+ "properties": {
507
+ "title": {
508
+ "title": "Title",
509
+ "description": "A human-readable title.",
510
+ "type": "string",
511
+ "examples": [
512
+ "{\n \"title\": \"My Package Title\"\n}\n"
513
+ ]
514
+ },
515
+ "path": {
516
+ "title": "Path",
517
+ "description": "A fully qualified URL, or a POSIX file path..",
518
+ "type": "string",
519
+ "examples": [
520
+ "{\n \"path\": \"file.csv\"\n}\n",
521
+ "{\n \"path\": \"http://example.com/file.csv\"\n}\n"
522
+ ],
523
+ "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly."
524
+ },
525
+ "email": {
526
+ "title": "Email",
527
+ "description": "An email address.",
528
+ "type": "string",
529
+ "format": "email",
530
+ "examples": [
531
+ "{\n \"email\": \"example@example.com\"\n}\n"
532
+ ]
533
+ }
534
+ }
535
+ },
536
+ "examples": [
537
+ "{\n \"sources\": [\n {\n \"name\": \"World Bank and OECD\",\n \"uri\": \"http://data.worldbank.org/indicator/NY.GDP.MKTP.CD\"\n }\n ]\n}\n"
538
+ ]
539
+ }
540
+ }
541
+ }