tableschema 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +21 -0
- data/.travis.yml +15 -1
- data/README.md +164 -129
- data/Rakefile +10 -1
- data/bin/console +2 -6
- data/{etc/schemas → lib/profiles}/geojson.json +0 -1
- data/lib/profiles/table-schema.json +1625 -0
- data/lib/profiles/topojson.json +311 -0
- data/lib/tableschema.rb +5 -3
- data/lib/tableschema/constraints/constraints.rb +12 -24
- data/lib/tableschema/constraints/enum.rb +6 -2
- data/lib/tableschema/constraints/max_length.rb +6 -2
- data/lib/tableschema/constraints/maximum.rb +12 -2
- data/lib/tableschema/constraints/min_length.rb +6 -2
- data/lib/tableschema/constraints/minimum.rb +12 -2
- data/lib/tableschema/constraints/pattern.rb +9 -2
- data/lib/tableschema/constraints/required.rb +6 -15
- data/lib/tableschema/constraints/unique.rb +12 -0
- data/lib/tableschema/defaults.rb +9 -0
- data/lib/tableschema/exceptions.rb +15 -2
- data/lib/tableschema/field.rb +39 -20
- data/lib/tableschema/helpers.rb +32 -15
- data/lib/tableschema/infer.rb +31 -28
- data/lib/tableschema/model.rb +57 -34
- data/lib/tableschema/schema.rb +40 -6
- data/lib/tableschema/table.rb +75 -26
- data/lib/tableschema/types/any.rb +1 -0
- data/lib/tableschema/types/array.rb +2 -1
- data/lib/tableschema/types/base.rb +9 -21
- data/lib/tableschema/types/date.rb +1 -0
- data/lib/tableschema/types/datetime.rb +1 -0
- data/lib/tableschema/types/duration.rb +31 -0
- data/lib/tableschema/types/geojson.rb +27 -5
- data/lib/tableschema/types/geopoint.rb +4 -3
- data/lib/tableschema/types/integer.rb +1 -0
- data/lib/tableschema/types/number.rb +40 -25
- data/lib/tableschema/types/object.rb +2 -1
- data/lib/tableschema/types/string.rb +8 -0
- data/lib/tableschema/types/time.rb +1 -0
- data/lib/tableschema/types/year.rb +34 -0
- data/lib/tableschema/types/yearmonth.rb +52 -0
- data/lib/tableschema/validate.rb +45 -29
- data/lib/tableschema/version.rb +1 -1
- data/tableschema.gemspec +2 -1
- metadata +31 -12
- data/etc/schemas/json-table-schema.json +0 -102
- data/lib/tableschema/data.rb +0 -60
- data/lib/tableschema/types/null.rb +0 -37
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be0ea32c71fc75dd1acca11a181b3fe8a7b69e33
|
4
|
+
data.tar.gz: e73959a568fd604b31fbe72376b9f2987095602c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2795a5b5d62696987588e9dfd7970c5539b68385d8e20eba1f92249645a7a72d338c5a44e96514531e562f7fdec0a40b49c3fd095665a027a4763e21e43a0fb9
|
7
|
+
data.tar.gz: e145c763e64f6384cadf6d8b3fdfeae01b562c50a1243def427036f81d2f6f39c95dd22837cdddc0844e2e4287b2aae71d24b57101af63b62908090093abcf11
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
AllCops:
|
2
|
+
DisabledByDefault: true
|
3
|
+
Exclude:
|
4
|
+
- 'lib/tableschema/exceptions.rb'
|
5
|
+
|
6
|
+
Security:
|
7
|
+
Enabled: true
|
8
|
+
|
9
|
+
Lint:
|
10
|
+
Enabled: true
|
11
|
+
|
12
|
+
Style/HashSyntax:
|
13
|
+
Enabled: true
|
14
|
+
EnforcedStyle: ruby19_no_mixed_keys
|
15
|
+
|
16
|
+
Style/MutableConstant:
|
17
|
+
Enabled: true
|
18
|
+
|
19
|
+
Metrics/CyclomaticComplexity:
|
20
|
+
Max: 10
|
21
|
+
Severity: error
|
data/.travis.yml
CHANGED
@@ -1,9 +1,23 @@
|
|
1
1
|
---
|
2
2
|
language: ruby
|
3
|
+
|
3
4
|
rvm:
|
4
5
|
- 2.3.1
|
5
6
|
- 2.4.1
|
6
|
-
|
7
|
+
|
8
|
+
before_install:
|
9
|
+
gem install bundler -v 1.11.2
|
10
|
+
|
11
|
+
install:
|
12
|
+
- bundle
|
13
|
+
- gem install rubocop
|
14
|
+
|
15
|
+
script:
|
16
|
+
- rake spec
|
17
|
+
|
18
|
+
after_success:
|
19
|
+
- rubocop
|
20
|
+
|
7
21
|
deploy:
|
8
22
|
provider: rubygems
|
9
23
|
api_key:
|
data/README.md
CHANGED
@@ -33,24 +33,25 @@ Since version 0.3 the library was renamed `tableschema` and has a gem with the s
|
|
33
33
|
The gem `jsontableschema` is no longer maintained. Here are the steps to transition your code to `tableschema`:
|
34
34
|
|
35
35
|
1. Replace
|
36
|
-
```ruby
|
37
|
-
gem 'jsontableschema'
|
38
|
-
```
|
39
|
-
with
|
40
36
|
|
41
|
-
|
42
|
-
|
43
|
-
|
37
|
+
```ruby
|
38
|
+
gem 'jsontableschema'
|
39
|
+
```
|
40
|
+
with
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
gem 'tableschema', '0.3.0'
|
44
|
+
```
|
44
45
|
|
45
46
|
2. Replace module name `JsonTableSchema` with module name `TableSchema`. For example:
|
46
47
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
48
|
+
```ruby
|
49
|
+
JsonTableSchema::Table.infer_schema(csv)
|
50
|
+
```
|
51
|
+
with
|
52
|
+
```ruby
|
53
|
+
TableSchema::Table.infer_schema(csv)
|
54
|
+
```
|
54
55
|
|
55
56
|
## Usage
|
56
57
|
|
@@ -60,27 +61,40 @@ Validate and cast data from a CSV as described by a schema.
|
|
60
61
|
|
61
62
|
```ruby
|
62
63
|
schema = {
|
63
|
-
|
64
|
+
fields: [
|
64
65
|
{
|
65
|
-
|
66
|
-
|
67
|
-
|
66
|
+
name: 'id',
|
67
|
+
title: 'Identifier',
|
68
|
+
type: 'integer'
|
68
69
|
},
|
69
70
|
{
|
70
|
-
|
71
|
-
|
72
|
-
|
71
|
+
name: 'title',
|
72
|
+
title: 'Title',
|
73
|
+
type: 'string'
|
73
74
|
}
|
74
75
|
]
|
75
|
-
}
|
76
|
+
}
|
76
77
|
|
77
|
-
csv = 'https://github.com/frictionlessdata/tableschema-rb/raw/master/spec/fixtures/simple_data.csv'
|
78
|
+
csv = 'https://github.com/frictionlessdata/tableschema-rb/raw/master/spec/fixtures/simple_data.csv'
|
78
79
|
|
79
80
|
table = TableSchema::Table.new(csv, schema)
|
80
|
-
|
81
|
+
|
82
|
+
# Iterate through rows
|
83
|
+
table.iter{ |row| print row }
|
84
|
+
# [1, "foo"]
|
85
|
+
# [2, "bar"]
|
86
|
+
# [3, "baz"]
|
87
|
+
|
88
|
+
# Read the entire CSV in memory
|
89
|
+
table.read
|
81
90
|
#=> [[1,'foo'],[2,'bar'],[3,'baz']]
|
82
91
|
```
|
83
92
|
|
93
|
+
Both `iter` and `read` take the optional parameters:
|
94
|
+
- `row_limit`: integer, default `nil` - stop at this many rows
|
95
|
+
- `cast`: boolean, default `true` - cast values for each row
|
96
|
+
- `keyed`: boolean, default: `false` - return the rows as Hashes with headers as keys
|
97
|
+
|
84
98
|
### Infer a schema
|
85
99
|
|
86
100
|
If you don't have a schema for a CSV, and want to generate one, you can infer a schema like so:
|
@@ -90,95 +104,69 @@ csv = 'https://github.com/frictionlessdata/tableschema-rb/raw/master/spec/fixtur
|
|
90
104
|
|
91
105
|
table = TableSchema::Table.infer_schema(csv)
|
92
106
|
table.schema
|
93
|
-
#=> {
|
107
|
+
#=> {:fields=>[{:name=>"id", :title=>"", :description=>"", :type=>"integer", :format=>"default", :constraints=>{}}, {:name=>"title", :title=>"", :description=>"", :type=>"string", :format=>"default", :constraints=>{}}]}
|
94
108
|
```
|
95
109
|
|
96
|
-
###
|
110
|
+
### Build a Schema
|
97
111
|
|
98
|
-
|
112
|
+
You can also build a schema from scratch or modify an existing one:
|
99
113
|
|
100
114
|
```ruby
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
+
schema = TableSchema::Schema.new({
|
116
|
+
fields: [],
|
117
|
+
})
|
118
|
+
|
119
|
+
# Add a field
|
120
|
+
schema.add_field({
|
121
|
+
name: 'id',
|
122
|
+
type: 'string',
|
123
|
+
constraints: {
|
124
|
+
required: true,
|
125
|
+
}
|
126
|
+
})
|
127
|
+
|
128
|
+
# Remove a field
|
129
|
+
schema.remove_field('id')
|
115
130
|
```
|
116
131
|
|
117
|
-
|
132
|
+
`add_field` will ignore the updates if the updated version of the the schema fails [validation](#validate-a-schema).
|
133
|
+
If you wish to prevent an invalid schema from being created or updated by raising validation errors, you can pass the `strict: true` argument to the Schema initializer:
|
118
134
|
|
119
135
|
```ruby
|
120
|
-
schema = TableSchema::Schema.new(
|
121
|
-
schema.valid?
|
122
|
-
#=> true
|
136
|
+
schema = TableSchema::Schema.new(schema_hash, strict: true)
|
123
137
|
```
|
124
138
|
|
125
|
-
|
139
|
+
There are multiple methods to inspect a schema:
|
126
140
|
|
127
141
|
```ruby
|
128
142
|
schema_hash = {
|
129
|
-
|
143
|
+
fields: [
|
130
144
|
{
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
{
|
136
|
-
"name"=>"title",
|
137
|
-
"title"=>"Title",
|
138
|
-
"type"=>"string"
|
139
|
-
}
|
140
|
-
],
|
141
|
-
"primaryKey"=>"identifier"
|
142
|
-
}
|
143
|
-
|
144
|
-
schema.valid?
|
145
|
-
#=> false
|
146
|
-
schema.messages
|
147
|
-
#=> ["The JSON Table Schema primaryKey value `identifier` is not found in any of the schema's field names"]
|
148
|
-
```
|
149
|
-
|
150
|
-
## Schema Model
|
151
|
-
|
152
|
-
You can also access the schema via a Ruby model, with some useful methods for interaction:
|
153
|
-
|
154
|
-
```ruby
|
155
|
-
schema_hash = {
|
156
|
-
"fields" => [
|
157
|
-
{
|
158
|
-
"name" => "id",
|
159
|
-
"type" => "string",
|
160
|
-
"constraints" => {
|
161
|
-
"required" => true,
|
162
|
-
}
|
145
|
+
name: 'id',
|
146
|
+
type: 'string',
|
147
|
+
constraints: {
|
148
|
+
required: true,
|
163
149
|
},
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
150
|
+
},
|
151
|
+
{
|
152
|
+
name: 'height',
|
153
|
+
type: 'number',
|
154
|
+
},
|
155
|
+
{
|
156
|
+
name: 'state',
|
157
|
+
},
|
168
158
|
],
|
169
|
-
|
170
|
-
|
159
|
+
primaryKey: 'id',
|
160
|
+
foreignKeys: [
|
171
161
|
{
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
}
|
162
|
+
fields: 'state',
|
163
|
+
reference: {
|
164
|
+
resource: 'the-resource',
|
165
|
+
fields: 'state_id',
|
166
|
+
},
|
167
|
+
},
|
179
168
|
]
|
180
169
|
}
|
181
|
-
|
182
170
|
schema = TableSchema::Schema.new(schema_hash)
|
183
171
|
|
184
172
|
schema.headers
|
@@ -186,79 +174,126 @@ schema.headers
|
|
186
174
|
schema.required_headers
|
187
175
|
#=> ["id"]
|
188
176
|
schema.fields
|
189
|
-
#=> [{
|
177
|
+
#=> [{:name=>"id", :type=>"string", :constraints=>{:required=>true}, :format=>"default"}, {:name=>"height", :type=>"number", :format=>"default", :constraints=>{}}]
|
190
178
|
schema.primary_keys
|
191
179
|
#=> ["id"]
|
192
180
|
schema.foreign_keys
|
193
|
-
|
181
|
+
# => [{:fields=>"state", :reference=>{:resource=>"the-resource", :fields=>"state_id"}}]
|
194
182
|
schema.get_field('id')
|
195
|
-
|
183
|
+
# => {:name=>"id", :type=>"string", :constraints=>{:required=>true}, :format=>"default"}
|
196
184
|
schema.has_field?('foo')
|
197
185
|
#=> false
|
198
186
|
schema.get_type('id')
|
199
187
|
#=> 'string'
|
200
188
|
schema.get_fields_by_type('string')
|
201
|
-
|
189
|
+
# => [{:name=>"id", :type=>"string", :constraints=>{:required=>true}, :format=>"default"}, {:name=>"state", :type=>"string", :format=>"default", :constraints=>{}}]
|
202
190
|
schema.get_constraints('id')
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
191
|
+
# => {:required=>true}
|
192
|
+
```
|
193
|
+
|
194
|
+
#### Cast row
|
195
|
+
|
196
|
+
To check if a given set of values complies with the schema, you can use `cast_row`:
|
197
|
+
|
198
|
+
```
|
199
|
+
schema.cast_row(['string', '10.0', 'State'])
|
200
|
+
#=> ['string', 10.0, 'State']
|
201
|
+
```
|
202
|
+
|
203
|
+
By default the converter will fail on the first error it finds. However, by passing `fail_fast: false` as the second argument the errors will be collected into an `exception.errors` attribute for you to review later. For example:
|
204
|
+
|
205
|
+
```ruby
|
206
|
+
row = [3, 'nan', 'State']
|
207
|
+
|
208
|
+
schema.cast_row(row)
|
209
|
+
#=> TableSchema::InvalidCast: 3 is not a string
|
210
|
+
begin
|
211
|
+
schema.cast_row(row, fail_fast: false)
|
212
|
+
rescue TableSchema::MultipleInvalid => exception
|
213
|
+
exception.errors
|
214
|
+
end
|
215
|
+
#=> #<Set: {#<TableSchema::InvalidCast: 3 is not a string>,
|
216
|
+
#<TableSchema::InvalidCast: nan is not a number>}>
|
208
217
|
```
|
209
218
|
|
210
|
-
|
219
|
+
### Validate a schema
|
220
|
+
|
221
|
+
To make sure a schema complies with [Table Schema spec](https://specs.frictionlessdata.io/table-schema), we validate each custom schema against the
|
222
|
+
official [Table Schema schema](https://specs.frictionlessdata.io/schemas/table-schema.json):
|
211
223
|
|
212
224
|
```ruby
|
213
225
|
schema_hash = {
|
214
|
-
|
215
|
-
{
|
216
|
-
"name" => "id",
|
217
|
-
"type" => "string",
|
218
|
-
"constraints" => {
|
219
|
-
"required" => true,
|
220
|
-
}
|
221
|
-
},
|
222
|
-
{
|
223
|
-
"name" => "height",
|
224
|
-
"type" => "number"
|
225
|
-
}
|
226
|
+
fields: [
|
227
|
+
{ name: 'id' },
|
226
228
|
]
|
227
229
|
}
|
228
|
-
|
229
230
|
schema = TableSchema::Schema.new(schema_hash)
|
231
|
+
schema.validate
|
232
|
+
#=> true
|
233
|
+
```
|
230
234
|
|
231
|
-
|
232
|
-
['foo', 'notanumber'],
|
233
|
-
['bar', 'notanumber'],
|
234
|
-
['wrong column count']
|
235
|
-
]
|
235
|
+
If the schema is invalid, you can access the errors via the `errors` attribute
|
236
236
|
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
237
|
+
```ruby
|
238
|
+
schema_hash = {
|
239
|
+
fields: [
|
240
|
+
{
|
241
|
+
name: 'id',
|
242
|
+
title: 'Identifier',
|
243
|
+
type: 'integer'
|
244
|
+
},
|
245
|
+
{
|
246
|
+
name: 'title',
|
247
|
+
title: 'Title',
|
248
|
+
type: 'string'
|
249
|
+
}
|
250
|
+
],
|
251
|
+
primaryKey: 'identifier'
|
252
|
+
}
|
253
|
+
|
254
|
+
schema = TableSchema::Schema.new(schema_hash)
|
255
|
+
schema.validate
|
256
|
+
#=> false
|
241
257
|
schema.errors
|
242
|
-
#=>
|
258
|
+
#=> #<Set: {"The TableSchema primaryKey value `identifier` is not found in any of the schema's field names"}>
|
259
|
+
|
260
|
+
# Raise error if validation fails
|
261
|
+
schema.validate!
|
262
|
+
#=> TableSchema::SchemaException: The TableSchema primaryKey value `identifier` is not found in any of the schema's field names
|
243
263
|
```
|
244
264
|
|
245
265
|
## Field
|
246
266
|
|
267
|
+
Data values can be cast to native Ruby objects with a Field instance. This allows formats and constraints to be defined for the field in the [field descriptor](https://specs.frictionlessdata.io/table-schema/#field-descriptors):
|
268
|
+
|
247
269
|
```ruby
|
248
270
|
# Init field
|
249
|
-
field = TableSchema::Field.new({
|
271
|
+
field = TableSchema::Field.new({
|
272
|
+
name: 'over_1700',
|
273
|
+
type: 'number',
|
274
|
+
constraints: {
|
275
|
+
minimum: '1700',
|
276
|
+
},
|
277
|
+
})
|
250
278
|
|
251
279
|
# Cast a value
|
252
280
|
field.cast_value('12345')
|
253
281
|
#=> 12345.0
|
254
282
|
```
|
255
283
|
|
256
|
-
|
284
|
+
Casting a value will check the value is of the expected `type`, is in the correct `format`, and complies with any `constraints` imposed in the descriptor.
|
257
285
|
|
258
|
-
|
286
|
+
Value that can't be cast will raise an `InvalidCast` exception.
|
259
287
|
|
260
288
|
Casting a value that doesn't meet the constraints will raise a `ConstraintError` exception.
|
261
289
|
|
290
|
+
```ruby
|
291
|
+
field.cast_value('nan')
|
292
|
+
#=> TableSchema::InvalidCast: nan is not a number
|
293
|
+
field.cast_value('1200')
|
294
|
+
#=> TableSchema::ConstraintError: The field `over_1700` must not be less than 1700
|
295
|
+
```
|
296
|
+
|
262
297
|
## Development
|
263
298
|
|
264
299
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|