tableschema 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +14 -0
  5. data/CHANGELOG.md +31 -0
  6. data/CODE_OF_CONDUCT.md +49 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +274 -0
  10. data/Rakefile +6 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +8 -0
  13. data/etc/schemas/geojson.json +209 -0
  14. data/etc/schemas/json-table-schema.json +102 -0
  15. data/lib/tableschema.rb +42 -0
  16. data/lib/tableschema/constraints/constraints.rb +76 -0
  17. data/lib/tableschema/constraints/enum.rb +14 -0
  18. data/lib/tableschema/constraints/max_length.rb +15 -0
  19. data/lib/tableschema/constraints/maximum.rb +14 -0
  20. data/lib/tableschema/constraints/min_length.rb +15 -0
  21. data/lib/tableschema/constraints/minimum.rb +14 -0
  22. data/lib/tableschema/constraints/pattern.rb +14 -0
  23. data/lib/tableschema/constraints/required.rb +32 -0
  24. data/lib/tableschema/data.rb +60 -0
  25. data/lib/tableschema/exceptions.rb +28 -0
  26. data/lib/tableschema/field.rb +41 -0
  27. data/lib/tableschema/helpers.rb +48 -0
  28. data/lib/tableschema/infer.rb +143 -0
  29. data/lib/tableschema/model.rb +73 -0
  30. data/lib/tableschema/schema.rb +36 -0
  31. data/lib/tableschema/table.rb +51 -0
  32. data/lib/tableschema/types/any.rb +23 -0
  33. data/lib/tableschema/types/array.rb +37 -0
  34. data/lib/tableschema/types/base.rb +54 -0
  35. data/lib/tableschema/types/boolean.rb +35 -0
  36. data/lib/tableschema/types/date.rb +56 -0
  37. data/lib/tableschema/types/datetime.rb +63 -0
  38. data/lib/tableschema/types/geojson.rb +38 -0
  39. data/lib/tableschema/types/geopoint.rb +56 -0
  40. data/lib/tableschema/types/integer.rb +35 -0
  41. data/lib/tableschema/types/null.rb +37 -0
  42. data/lib/tableschema/types/number.rb +60 -0
  43. data/lib/tableschema/types/object.rb +37 -0
  44. data/lib/tableschema/types/string.rb +64 -0
  45. data/lib/tableschema/types/time.rb +55 -0
  46. data/lib/tableschema/validate.rb +54 -0
  47. data/lib/tableschema/version.rb +3 -0
  48. data/tableschema.gemspec +32 -0
  49. metadata +231 -0
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,209 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "id": "https://raw.githubusercontent.com/fge/sample-json-schemas/master/geojson/geojson.json#",
4
+ "title": "Geo JSON object",
5
+ "description": "Schema for a Geo JSON object",
6
+ "type": "object",
7
+ "required": [ "type" ],
8
+ "properties": {
9
+ "crs": { "$ref": "#/definitions/crs" },
10
+ "bbox": { "$ref": "#/definitions/bbox" }
11
+ },
12
+ "oneOf": [
13
+ { "$ref": "#/definitions/geometry" },
14
+ { "$ref": "#/definitions/geometryCollection" },
15
+ { "$ref": "#/definitions/feature" },
16
+ { "$ref": "#/definitions/featureCollection" }
17
+ ],
18
+ "definitions": {
19
+ "geometryCollection": {
20
+ "title": "GeometryCollection",
21
+ "description": "A collection of geometry objects",
22
+ "required": [ "geometries" ],
23
+ "properties": {
24
+ "type": { "enum": [ "GeometryCollection" ] },
25
+ "geometries": {
26
+ "type": "array",
27
+ "items": { "$ref": "#/definitions/geometry" }
28
+ }
29
+ }
30
+ },
31
+ "feature": {
32
+ "title": "Feature",
33
+ "description": "A Geo JSON feature object",
34
+ "required": [ "geometry", "properties" ],
35
+ "properties": {
36
+ "type": { "enum": [ "Feature" ] },
37
+ "geometry": {
38
+ "oneOf": [
39
+ { "type": "null" },
40
+ { "$ref": "#/definitions/geometry" }
41
+ ]
42
+ },
43
+ "properties": { "type": [ "object", "null" ] },
44
+ "id": { "FIXME": "may be there, type not known (string? number?)" }
45
+ }
46
+ },
47
+ "featureCollection": {
48
+ "title": "FeatureCollection",
49
+ "description": "A Geo JSON feature collection",
50
+ "required": [ "features" ],
51
+ "properties": {
52
+ "type": { "enum": [ "FeatureCollection" ] },
53
+ "features": {
54
+ "type": "array",
55
+ "items": { "$ref": "#/definitions/feature" }
56
+ }
57
+ }
58
+ },
59
+ "geometry": {
60
+ "title": "geometry",
61
+ "description": "One geometry as defined by GeoJSON",
62
+ "type": "object",
63
+ "required": [ "type", "coordinates" ],
64
+ "oneOf": [
65
+ {
66
+ "title": "Point",
67
+ "properties": {
68
+ "type": { "enum": [ "Point" ] },
69
+ "coordinates": { "$ref": "#/definitions/geometry/definitions/position" }
70
+ }
71
+ },
72
+ {
73
+ "title": "MultiPoint",
74
+ "properties": {
75
+ "type": { "enum": [ "MultiPoint" ] },
76
+ "coordinates": { "$ref": "#/definitions/geometry/definitions/positionArray" }
77
+ }
78
+ },
79
+ {
80
+ "title": "LineString",
81
+ "properties": {
82
+ "type": { "enum": [ "LineString" ] },
83
+ "coordinates": { "$ref": "#/definitions/geometry/definitions/lineString" }
84
+ }
85
+ },
86
+ {
87
+ "title": "MultiLineString",
88
+ "properties": {
89
+ "type": { "enum": [ "MultiLineString" ] },
90
+ "coordinates": {
91
+ "type": "array",
92
+ "items": { "$ref": "#/definitions/geometry/definitions/lineString" }
93
+ }
94
+ }
95
+ },
96
+ {
97
+ "title": "Polygon",
98
+ "properties": {
99
+ "type": { "enum": [ "Polygon" ] },
100
+ "coordinates": { "$ref": "#/definitions/geometry/definitions/polygon" }
101
+ }
102
+ },
103
+ {
104
+ "title": "MultiPolygon",
105
+ "properties": {
106
+ "type": { "enum": [ "MultiPolygon" ] },
107
+ "coordinates": {
108
+ "type": "array",
109
+ "items": { "$ref": "#/definitions/geometry/definitions/polygon" }
110
+ }
111
+ }
112
+ }
113
+ ],
114
+ "definitions": {
115
+ "position": {
116
+ "description": "A single position",
117
+ "type": "array",
118
+ "minItems": 2,
119
+ "items": [ { "type": "number" }, { "type": "number" } ],
120
+ "additionalItems": false
121
+ },
122
+ "positionArray": {
123
+ "description": "An array of positions",
124
+ "type": "array",
125
+ "items": { "$ref": "#/definitions/geometry/definitions/position" }
126
+ },
127
+ "lineString": {
128
+ "description": "An array of two or more positions",
129
+ "allOf": [
130
+ { "$ref": "#/definitions/geometry/definitions/positionArray" },
131
+ { "minItems": 2 }
132
+ ]
133
+ },
134
+ "linearRing": {
135
+ "description": "An array of four positions where the first equals the last",
136
+ "allOf": [
137
+ { "$ref": "#/definitions/geometry/definitions/positionArray" },
138
+ { "minItems": 4 }
139
+ ]
140
+ },
141
+ "polygon": {
142
+ "description": "An array of linear rings",
143
+ "type": "array",
144
+ "items": { "$ref": "#/definitions/geometry/definitions/linearRing" }
145
+ }
146
+ }
147
+ },
148
+ "crs": {
149
+ "title": "crs",
150
+ "description": "a Coordinate Reference System object",
151
+ "type": [ "object", "null" ],
152
+ "required": [ "type", "properties" ],
153
+ "properties": {
154
+ "type": { "type": "string" },
155
+ "properties": { "type": "object" }
156
+ },
157
+ "additionalProperties": false,
158
+ "oneOf": [
159
+ { "$ref": "#/definitions/crs/definitions/namedCrs" },
160
+ { "$ref": "#/definitions/crs/definitions/linkedCrs" }
161
+ ],
162
+ "definitions": {
163
+ "namedCrs": {
164
+ "properties": {
165
+ "type": { "enum": [ "name" ] },
166
+ "properties": {
167
+ "required": [ "name" ],
168
+ "additionalProperties": false,
169
+ "properties": {
170
+ "name": {
171
+ "type": "string",
172
+ "FIXME": "semantic validation necessary"
173
+ }
174
+ }
175
+ }
176
+ }
177
+ },
178
+ "linkedObject": {
179
+ "type": "object",
180
+ "required": [ "href" ],
181
+ "properties": {
182
+ "href": {
183
+ "type": "string",
184
+ "format": "uri",
185
+ "FIXME": "spec says \"dereferenceable\", cannot enforce that"
186
+ },
187
+ "type": {
188
+ "type": "string",
189
+ "description": "Suggested values: proj4, ogjwkt, esriwkt"
190
+ }
191
+ }
192
+ },
193
+ "linkedCrs": {
194
+ "properties": {
195
+ "type": { "enum": [ "link" ] },
196
+ "properties": { "$ref": "#/definitions/crs/definitions/linkedObject" }
197
+ }
198
+ }
199
+ }
200
+ },
201
+ "bbox": {
202
+ "description": "A bounding box as defined by GeoJSON",
203
+ "FIXME": "unenforceable constraint: even number of elements in array",
204
+ "type": "array",
205
+ "items": { "type": "number" }
206
+ }
207
+ }
208
+ }
209
+
@@ -0,0 +1,102 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "title": "JSON Table Schema",
4
+ "description": "JSON Schema for validating JSON Table structures",
5
+ "type": "object",
6
+ "properties": {
7
+ "fields": {
8
+ "type": "array",
9
+ "minItems": 1,
10
+ "items": {
11
+ "type": "object",
12
+ "properties": {
13
+ "name": {
14
+ "type": "string"
15
+ },
16
+ "title": {
17
+ "type": "string"
18
+ },
19
+ "description": {
20
+ "type": "string"
21
+ },
22
+ "type": {
23
+ "enum": [ "string", "number", "integer", "date", "time", "datetime", "boolean", "binary", "object", "geopoint", "geojson", "array", "any" ]
24
+ },
25
+ "format": {
26
+ "type": "string"
27
+ },
28
+ "constraints": {
29
+ "type": "object",
30
+ "properties": {
31
+ "required": {
32
+ "type": "boolean"
33
+ },
34
+ "minLength": {
35
+ "type": "integer"
36
+ },
37
+ "maxLength": {
38
+ "type": "integer"
39
+ },
40
+ "unique": {
41
+ "type": "boolean"
42
+ },
43
+ "pattern": {
44
+ "type": "string"
45
+ },
46
+ "minimum": {
47
+ "oneOf": [
48
+ {"type": "string"},
49
+ {"type": "number"}
50
+ ]
51
+ },
52
+ "maximum": {
53
+ "oneOf": [
54
+ {"type": "string"},
55
+ {"type": "number"}
56
+ ]
57
+ }
58
+ }
59
+ }
60
+ },
61
+ "required": ["name"]
62
+ }
63
+ },
64
+ "primaryKey": {
65
+ "oneOf": [
66
+ {"type": "string"},
67
+ {"type": "array"}
68
+ ]
69
+ },
70
+ "foreignKeys": {
71
+ "type": "array",
72
+ "items": {
73
+ "type": "object",
74
+ "required": ["fields", "reference"],
75
+ "properties": {
76
+ "fields": {
77
+ "oneOf": [
78
+ {"type": "string"},
79
+ {"type": "array"}
80
+ ]
81
+ },
82
+ "reference": {
83
+ "type": "object",
84
+ "required": ["resource", "fields"],
85
+ "properties": {
86
+ "resource": {
87
+ "type": "string"
88
+ },
89
+ "fields": {
90
+ "oneOf": [
91
+ {"type": "string"},
92
+ {"type": "array"}
93
+ ]
94
+ }
95
+ }
96
+ }
97
+ }
98
+ }
99
+ }
100
+ },
101
+ "required": ["fields"]
102
+ }
@@ -0,0 +1,42 @@
1
+ require "json"
2
+ require "json-schema"
3
+ require "uuid"
4
+ require "currencies"
5
+ require "date"
6
+ require "tod"
7
+ require "tod/core_extensions"
8
+ require "csv"
9
+
10
+ require "tableschema/version"
11
+ require "tableschema/exceptions"
12
+ require "tableschema/helpers"
13
+
14
+ require "tableschema/constraints/constraints"
15
+
16
+ require "tableschema/types/base"
17
+ require "tableschema/types/any"
18
+ require "tableschema/types/array"
19
+ require "tableschema/types/boolean"
20
+ require "tableschema/types/date"
21
+ require "tableschema/types/datetime"
22
+ require "tableschema/types/geojson"
23
+ require "tableschema/types/geopoint"
24
+ require "tableschema/types/integer"
25
+ require "tableschema/types/null"
26
+ require "tableschema/types/number"
27
+ require "tableschema/types/object"
28
+ require "tableschema/types/string"
29
+ require "tableschema/types/time"
30
+
31
+ require "tableschema/field"
32
+ require "tableschema/validate"
33
+ require "tableschema/model"
34
+ require "tableschema/data"
35
+ require "tableschema/schema"
36
+ require "tableschema/table"
37
+ require "tableschema/infer"
38
+
39
+ module TableSchema
40
+ module Types
41
+ end
42
+ end
@@ -0,0 +1,76 @@
1
+ require "tableschema/constraints/required"
2
+ require "tableschema/constraints/min_length"
3
+ require "tableschema/constraints/max_length"
4
+ require "tableschema/constraints/minimum"
5
+ require "tableschema/constraints/maximum"
6
+ require "tableschema/constraints/enum"
7
+ require "tableschema/constraints/pattern"
8
+
9
+ module TableSchema
10
+ class Constraints
11
+ include TableSchema::Helpers
12
+
13
+ include TableSchema::Constraints::Required
14
+ include TableSchema::Constraints::MinLength
15
+ include TableSchema::Constraints::MaxLength
16
+ include TableSchema::Constraints::Minimum
17
+ include TableSchema::Constraints::Maximum
18
+ include TableSchema::Constraints::Enum
19
+ include TableSchema::Constraints::Pattern
20
+
21
+ def initialize(field, value)
22
+ @field = field
23
+ @value = value
24
+ @constraints = @field['constraints'] || {}
25
+ end
26
+
27
+ def validate!
28
+ result = true
29
+ @constraints.each do |c|
30
+ constraint = c.first
31
+ if is_supported_type?(constraint)
32
+ result = self.send("check_#{underscore constraint}")
33
+ else
34
+ raise(TableSchema::ConstraintNotSupported.new("The field type `#{@field['type']}` does not support the `#{constraint}` constraint"))
35
+ end
36
+ end
37
+ result
38
+ end
39
+
40
+ private
41
+
42
+ def underscore(value)
43
+ value.gsub(/::/, '/').
44
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
45
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
46
+ tr("-", "_").
47
+ downcase
48
+ end
49
+
50
+ def is_supported_type?(constraint)
51
+ klass = get_class_for_type(@field['type'])
52
+ Kernel.const_get(klass).supported_constraints.include?(constraint)
53
+ end
54
+
55
+ def parse_constraint(constraint)
56
+ if @value.is_a?(::Integer) && constraint.is_a?(::String)
57
+ constraint.to_i
58
+ elsif @value.is_a?(::Tod::TimeOfDay)
59
+ Tod::TimeOfDay.parse(constraint)
60
+ elsif @value.is_a?(::DateTime)
61
+ DateTime.parse(constraint)
62
+ elsif @value.is_a?(::Date) && constraint.is_a?(::String)
63
+ Date.parse(constraint)
64
+ elsif @value.is_a?(::Float) && constraint.is_a?(Array)
65
+ constraint.map { |c| Float(c) }
66
+ elsif @value.is_a?(Boolean) && constraint.is_a?(Array)
67
+ constraint.map { |c| convert_to_boolean(c) }
68
+ elsif @value.is_a?(Date) && constraint.is_a?(Array)
69
+ constraint.map { |c| Date.parse(c) }
70
+ else
71
+ constraint
72
+ end
73
+ end
74
+
75
+ end
76
+ end
@@ -0,0 +1,14 @@
1
+ module TableSchema
2
+ class Constraints
3
+ module Enum
4
+
5
+ def check_enum
6
+ if !parse_constraint(@constraints['enum']).include?(@value)
7
+ raise TableSchema::ConstraintError.new("The value for the field `#{@field['name']}` must be in the enum array")
8
+ end
9
+ true
10
+ end
11
+
12
+ end
13
+ end
14
+ end