tableschema 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +21 -0
- data/.travis.yml +15 -1
- data/README.md +164 -129
- data/Rakefile +10 -1
- data/bin/console +2 -6
- data/{etc/schemas → lib/profiles}/geojson.json +0 -1
- data/lib/profiles/table-schema.json +1625 -0
- data/lib/profiles/topojson.json +311 -0
- data/lib/tableschema.rb +5 -3
- data/lib/tableschema/constraints/constraints.rb +12 -24
- data/lib/tableschema/constraints/enum.rb +6 -2
- data/lib/tableschema/constraints/max_length.rb +6 -2
- data/lib/tableschema/constraints/maximum.rb +12 -2
- data/lib/tableschema/constraints/min_length.rb +6 -2
- data/lib/tableschema/constraints/minimum.rb +12 -2
- data/lib/tableschema/constraints/pattern.rb +9 -2
- data/lib/tableschema/constraints/required.rb +6 -15
- data/lib/tableschema/constraints/unique.rb +12 -0
- data/lib/tableschema/defaults.rb +9 -0
- data/lib/tableschema/exceptions.rb +15 -2
- data/lib/tableschema/field.rb +39 -20
- data/lib/tableschema/helpers.rb +32 -15
- data/lib/tableschema/infer.rb +31 -28
- data/lib/tableschema/model.rb +57 -34
- data/lib/tableschema/schema.rb +40 -6
- data/lib/tableschema/table.rb +75 -26
- data/lib/tableschema/types/any.rb +1 -0
- data/lib/tableschema/types/array.rb +2 -1
- data/lib/tableschema/types/base.rb +9 -21
- data/lib/tableschema/types/date.rb +1 -0
- data/lib/tableschema/types/datetime.rb +1 -0
- data/lib/tableschema/types/duration.rb +31 -0
- data/lib/tableschema/types/geojson.rb +27 -5
- data/lib/tableschema/types/geopoint.rb +4 -3
- data/lib/tableschema/types/integer.rb +1 -0
- data/lib/tableschema/types/number.rb +40 -25
- data/lib/tableschema/types/object.rb +2 -1
- data/lib/tableschema/types/string.rb +8 -0
- data/lib/tableschema/types/time.rb +1 -0
- data/lib/tableschema/types/year.rb +34 -0
- data/lib/tableschema/types/yearmonth.rb +52 -0
- data/lib/tableschema/validate.rb +45 -29
- data/lib/tableschema/version.rb +1 -1
- data/tableschema.gemspec +2 -1
- metadata +31 -12
- data/etc/schemas/json-table-schema.json +0 -102
- data/lib/tableschema/data.rb +0 -60
- data/lib/tableschema/types/null.rb +0 -37
@@ -3,12 +3,22 @@ module TableSchema
|
|
3
3
|
module Maximum
|
4
4
|
|
5
5
|
def check_maximum
|
6
|
-
if @
|
7
|
-
|
6
|
+
if @field.type == 'yearmonth'
|
7
|
+
valid = Date.new(@value[:year], @value[:month]) <= Date.new(parsed_maximum[:year], parsed_maximum[:month])
|
8
|
+
else
|
9
|
+
valid = @value <= parsed_maximum
|
10
|
+
end
|
11
|
+
|
12
|
+
unless valid
|
13
|
+
raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` must not be more than #{@constraints[:maximum]}")
|
8
14
|
end
|
9
15
|
true
|
10
16
|
end
|
11
17
|
|
18
|
+
def parsed_maximum
|
19
|
+
@field.cast_type(@constraints[:maximum])
|
20
|
+
end
|
21
|
+
|
12
22
|
end
|
13
23
|
end
|
14
24
|
end
|
@@ -4,12 +4,16 @@ module TableSchema
|
|
4
4
|
|
5
5
|
def check_min_length
|
6
6
|
return if @value.nil?
|
7
|
-
if @value.length <
|
8
|
-
raise TableSchema::ConstraintError.new("The field `#{@field[
|
7
|
+
if @value.length < parsed_min_length
|
8
|
+
raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` must have a minimum length of #{@constraints[:minLength]}")
|
9
9
|
end
|
10
10
|
true
|
11
11
|
end
|
12
12
|
|
13
|
+
def parsed_min_length
|
14
|
+
@constraints[:minLength].to_i
|
15
|
+
end
|
16
|
+
|
13
17
|
end
|
14
18
|
end
|
15
19
|
end
|
@@ -3,12 +3,22 @@ module TableSchema
|
|
3
3
|
module Minimum
|
4
4
|
|
5
5
|
def check_minimum
|
6
|
-
if @
|
7
|
-
|
6
|
+
if @field.type == 'yearmonth'
|
7
|
+
valid = Date.new(@value[:year], @value[:month]) >= Date.new(parsed_minimum[:year], parsed_minimum[:month])
|
8
|
+
else
|
9
|
+
valid = @value >= parsed_minimum
|
10
|
+
end
|
11
|
+
|
12
|
+
unless valid
|
13
|
+
raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` must not be less than #{@constraints[:minimum]}")
|
8
14
|
end
|
9
15
|
true
|
10
16
|
end
|
11
17
|
|
18
|
+
def parsed_minimum
|
19
|
+
@field.cast_type(@constraints[:minimum])
|
20
|
+
end
|
21
|
+
|
12
22
|
end
|
13
23
|
end
|
14
24
|
end
|
@@ -3,8 +3,15 @@ module TableSchema
|
|
3
3
|
module Pattern
|
4
4
|
|
5
5
|
def check_pattern
|
6
|
-
|
7
|
-
|
6
|
+
constraint = lambda { |value| value.match(/#{@constraints[:pattern]}/) }
|
7
|
+
if @field.type == 'yearmonth'
|
8
|
+
valid = constraint.call(Date.new(@value[:year], @value[:month]).strftime('%Y-%m'))
|
9
|
+
else
|
10
|
+
valid = constraint.call(@value.to_json)
|
11
|
+
end
|
12
|
+
|
13
|
+
unless valid
|
14
|
+
raise TableSchema::ConstraintError.new("The value for the field `#{@field[:name]}` must match the pattern")
|
8
15
|
end
|
9
16
|
true
|
10
17
|
end
|
@@ -3,30 +3,21 @@ module TableSchema
|
|
3
3
|
module Required
|
4
4
|
|
5
5
|
def check_required
|
6
|
-
if
|
7
|
-
raise TableSchema::ConstraintError.new("The field `#{@field[
|
6
|
+
if parsed_required == true && value_is_empty?
|
7
|
+
raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` requires a value")
|
8
8
|
end
|
9
9
|
true
|
10
10
|
end
|
11
11
|
|
12
12
|
private
|
13
13
|
|
14
|
-
def
|
15
|
-
|
14
|
+
def value_is_empty?
|
15
|
+
@value.nil? || @value == ''
|
16
16
|
end
|
17
17
|
|
18
|
-
def
|
19
|
-
|
18
|
+
def parsed_required
|
19
|
+
@constraints[:required].to_s == 'true'
|
20
20
|
end
|
21
|
-
|
22
|
-
def required
|
23
|
-
@constraints['required'].to_s == 'true'
|
24
|
-
end
|
25
|
-
|
26
|
-
def null_values
|
27
|
-
['null', 'none', 'nil', 'nan', '-', '']
|
28
|
-
end
|
29
|
-
|
30
21
|
end
|
31
22
|
end
|
32
23
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module TableSchema
|
2
2
|
class Exception < ::Exception ; end
|
3
|
-
|
3
|
+
|
4
4
|
class SchemaException < Exception
|
5
5
|
attr_reader :message
|
6
6
|
|
@@ -9,20 +9,33 @@ module TableSchema
|
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
+
class MultipleInvalid < Exception
|
13
|
+
attr_reader :message, :errors
|
14
|
+
|
15
|
+
def initialize(message, errors=[])
|
16
|
+
@message = message
|
17
|
+
@errors = errors
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
12
21
|
class InvalidFormat < Exception ; end
|
13
22
|
class InvalidCast < Exception ; end
|
14
23
|
class InvalidEmail < Exception ; end
|
15
24
|
class InvalidURI < Exception ; end
|
16
25
|
class InvalidUUID < Exception ; end
|
26
|
+
class InvalidBinary < Exception ; end
|
17
27
|
class InvalidObjectType < Exception ; end
|
18
28
|
class InvalidArrayType < Exception ; end
|
19
29
|
class InvalidDateType < Exception ; end
|
20
30
|
class InvalidTimeType < Exception ; end
|
21
31
|
class InvalidDateTimeType < Exception ; end
|
32
|
+
class InvalidYearType < Exception; end
|
33
|
+
class InvalidYearMonthType < Exception; end
|
34
|
+
class InvalidDurationType < Exception; end
|
22
35
|
class InvalidGeoJSONType < Exception ; end
|
36
|
+
class InvalidTopoJSONType < Exception ; end
|
23
37
|
class InvalidGeoPointType < Exception ; end
|
24
38
|
class ConstraintError < Exception ; end
|
25
39
|
class ConstraintNotSupported < Exception ; end
|
26
40
|
class ConversionError < Exception ; end
|
27
|
-
class MultipleInvalid < Exception ; end
|
28
41
|
end
|
data/lib/tableschema/field.rb
CHANGED
@@ -1,40 +1,59 @@
|
|
1
|
+
require 'tableschema/defaults'
|
2
|
+
|
1
3
|
module TableSchema
|
2
4
|
class Field < Hash
|
3
5
|
include TableSchema::Helpers
|
4
6
|
|
5
|
-
attr_reader :
|
6
|
-
|
7
|
-
def initialize(descriptor)
|
8
|
-
self.merge! descriptor
|
9
|
-
@type_class = get_type
|
10
|
-
end
|
7
|
+
attr_reader :name, :type, :format, :missing_values, :constraints
|
11
8
|
|
12
|
-
def
|
13
|
-
self
|
9
|
+
def initialize(descriptor, missing_values=nil)
|
10
|
+
self.merge! deep_symbolize_keys(descriptor)
|
11
|
+
@name = self[:name]
|
12
|
+
@type = self[:type] = self.fetch(:type, TableSchema::DEFAULTS[:type])
|
13
|
+
@format = self[:format] = self.fetch(:format, TableSchema::DEFAULTS[:format])
|
14
|
+
@constraints = self[:constraints] = self.fetch(:constraints, {})
|
15
|
+
@missing_values = missing_values || default_missing_values
|
14
16
|
end
|
15
17
|
|
16
|
-
def
|
17
|
-
self
|
18
|
+
def descriptor
|
19
|
+
self.to_h
|
18
20
|
end
|
19
21
|
|
20
|
-
def
|
21
|
-
|
22
|
+
def cast_value(value, check_constraints: true)
|
23
|
+
cast_value = cast_type(value)
|
24
|
+
return cast_value if check_constraints == false
|
25
|
+
TableSchema::Constraints.new(self, cast_value).validate!
|
26
|
+
cast_value
|
22
27
|
end
|
23
28
|
|
24
|
-
def
|
25
|
-
|
29
|
+
def test_value(value, check_constraints: true)
|
30
|
+
cast_value(value, check_constraints: check_constraints)
|
31
|
+
true
|
32
|
+
rescue TableSchema::Exception
|
33
|
+
false
|
26
34
|
end
|
27
35
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
|
36
|
+
def cast_type(value)
|
37
|
+
if is_null?(value)
|
38
|
+
nil
|
39
|
+
else
|
40
|
+
type_class.new(self).cast(value)
|
41
|
+
end
|
32
42
|
end
|
33
43
|
|
34
44
|
private
|
35
45
|
|
36
|
-
def
|
37
|
-
|
46
|
+
def default_missing_values
|
47
|
+
defaults = TableSchema::DEFAULTS[:missing_values]
|
48
|
+
@type == 'string' ? defaults - [''] : defaults
|
49
|
+
end
|
50
|
+
|
51
|
+
def type_class
|
52
|
+
Object.const_get get_class_for_type(@type)
|
53
|
+
end
|
54
|
+
|
55
|
+
def is_null?(value)
|
56
|
+
@missing_values.include?(value)
|
38
57
|
end
|
39
58
|
|
40
59
|
end
|
data/lib/tableschema/helpers.rb
CHANGED
@@ -1,6 +1,21 @@
|
|
1
1
|
module TableSchema
|
2
2
|
module Helpers
|
3
3
|
|
4
|
+
def deep_symbolize_keys(descriptor)
|
5
|
+
case descriptor
|
6
|
+
when Hash
|
7
|
+
descriptor.inject({}) do |new_descriptor, (key, val)|
|
8
|
+
key_sym = key.respond_to?(:to_sym) ? key.to_sym : key
|
9
|
+
new_descriptor[key_sym] = deep_symbolize_keys(val)
|
10
|
+
new_descriptor
|
11
|
+
end
|
12
|
+
when Enumerable
|
13
|
+
descriptor.map{ |el| deep_symbolize_keys(el)}
|
14
|
+
else
|
15
|
+
descriptor
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
4
19
|
def convert_to_boolean(value)
|
5
20
|
if value.is_a?(Boolean)
|
6
21
|
return value
|
@@ -22,25 +37,27 @@ module TableSchema
|
|
22
37
|
end
|
23
38
|
|
24
39
|
def get_class_for_type(type)
|
25
|
-
"TableSchema::Types::#{type_class_lookup[type] || 'String'}"
|
40
|
+
"TableSchema::Types::#{type_class_lookup[type.to_sym] || 'String'}"
|
26
41
|
end
|
27
42
|
|
28
43
|
def type_class_lookup
|
29
44
|
{
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
45
|
+
any: 'Any',
|
46
|
+
array: 'Array',
|
47
|
+
base: 'Base',
|
48
|
+
boolean: 'Boolean',
|
49
|
+
date: 'Date',
|
50
|
+
datetime: 'DateTime',
|
51
|
+
geojson: 'GeoJSON',
|
52
|
+
geopoint: 'GeoPoint',
|
53
|
+
integer: 'Integer',
|
54
|
+
number: 'Number',
|
55
|
+
object: 'Object',
|
56
|
+
string: 'String',
|
57
|
+
time: 'Time',
|
58
|
+
year: 'Year',
|
59
|
+
yearmonth: 'YearMonth',
|
60
|
+
duration: 'Duration',
|
44
61
|
}
|
45
62
|
end
|
46
63
|
|
data/lib/tableschema/infer.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'tableschema/defaults'
|
2
|
+
require 'tableschema/field'
|
3
|
+
|
1
4
|
module TableSchema
|
2
5
|
class Infer
|
3
6
|
|
@@ -5,41 +8,41 @@ module TableSchema
|
|
5
8
|
|
6
9
|
attr_reader :schema
|
7
10
|
|
8
|
-
def initialize(headers, rows,
|
11
|
+
def initialize(headers, rows, explicit: false, primary_key: nil, row_limit: nil)
|
9
12
|
@headers = headers
|
10
13
|
@rows = rows
|
11
|
-
@explicit =
|
12
|
-
@primary_key =
|
13
|
-
@row_limit =
|
14
|
+
@explicit = explicit
|
15
|
+
@primary_key = primary_key
|
16
|
+
@row_limit = row_limit
|
14
17
|
|
15
18
|
@schema = {
|
16
|
-
|
19
|
+
fields: fields
|
17
20
|
}
|
18
|
-
@schema[
|
21
|
+
@schema[:primaryKey] = @primary_key if @primary_key
|
19
22
|
infer!
|
20
23
|
end
|
21
24
|
|
22
25
|
def fields
|
23
26
|
@headers.map do |header|
|
24
27
|
descriptor = {
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
+
name: header,
|
29
|
+
title: '',
|
30
|
+
description: '',
|
28
31
|
}
|
29
32
|
|
30
33
|
constraints = {}
|
31
|
-
constraints[
|
32
|
-
constraints[
|
33
|
-
constraints.delete_if { |
|
34
|
-
descriptor[
|
35
|
-
descriptor
|
34
|
+
constraints[:required] = @explicit === true
|
35
|
+
constraints[:unique] = (header == @primary_key)
|
36
|
+
constraints.delete_if { |_,v| v == false } unless @explicit === true
|
37
|
+
descriptor[:constraints] = constraints if constraints.count > 0
|
38
|
+
TableSchema::Field.new(descriptor)
|
36
39
|
end
|
37
40
|
end
|
38
41
|
|
39
42
|
def infer!
|
40
43
|
type_matches = []
|
41
|
-
@rows.each_with_index do |row,
|
42
|
-
break if @row_limit &&
|
44
|
+
@rows.each_with_index do |row, index|
|
45
|
+
break if @row_limit && index > @row_limit
|
43
46
|
row = row.fields if row.class == CSV::Row
|
44
47
|
|
45
48
|
row_length = row.count
|
@@ -53,9 +56,9 @@ module TableSchema
|
|
53
56
|
row = row.push(fill).flatten
|
54
57
|
end
|
55
58
|
|
56
|
-
row.each_with_index do |col,
|
57
|
-
type_matches[
|
58
|
-
type_matches[
|
59
|
+
row.each_with_index do |col, idx|
|
60
|
+
type_matches[idx] ||= []
|
61
|
+
type_matches[idx] << guess_type(col, idx)
|
59
62
|
end
|
60
63
|
|
61
64
|
end
|
@@ -64,12 +67,12 @@ module TableSchema
|
|
64
67
|
end
|
65
68
|
|
66
69
|
def guess_type(col, index)
|
67
|
-
guessed_type =
|
68
|
-
guessed_format =
|
70
|
+
guessed_type = TableSchema::DEFAULTS[:type]
|
71
|
+
guessed_format = TableSchema::DEFAULTS[:format]
|
69
72
|
|
70
73
|
available_types.reverse_each do |type|
|
71
74
|
klass = get_class_for_type(type)
|
72
|
-
converter = Kernel.const_get(klass).new(@schema[
|
75
|
+
converter = Kernel.const_get(klass).new(@schema[:fields][index])
|
73
76
|
if converter.test(col) === true
|
74
77
|
guessed_type = type
|
75
78
|
guessed_format = guess_format(converter, col)
|
@@ -78,22 +81,23 @@ module TableSchema
|
|
78
81
|
end
|
79
82
|
|
80
83
|
{
|
81
|
-
|
82
|
-
|
84
|
+
type: guessed_type,
|
85
|
+
format: guessed_format
|
83
86
|
}
|
84
87
|
end
|
85
88
|
|
86
89
|
def guess_format(converter, col)
|
87
|
-
guessed_format =
|
90
|
+
guessed_format = TableSchema::DEFAULTS[:format]
|
88
91
|
converter.class.instance_methods.grep(/cast_/).each do |method|
|
89
92
|
begin
|
90
93
|
format = method.to_s
|
91
94
|
format.slice!('cast_')
|
92
|
-
next if format ==
|
95
|
+
next if format == TableSchema::DEFAULTS[:format]
|
93
96
|
converter.send(method, col)
|
94
97
|
guessed_format = format
|
95
98
|
break
|
96
99
|
rescue TableSchema::Exception
|
100
|
+
next
|
97
101
|
end
|
98
102
|
end
|
99
103
|
guessed_format
|
@@ -116,7 +120,7 @@ module TableSchema
|
|
116
120
|
rv = sorted_counts[0][0]
|
117
121
|
end
|
118
122
|
|
119
|
-
@schema[
|
123
|
+
@schema[:fields][v].merge!(rv)
|
120
124
|
end
|
121
125
|
|
122
126
|
end
|
@@ -128,7 +132,6 @@ module TableSchema
|
|
128
132
|
'boolean',
|
129
133
|
'number',
|
130
134
|
'integer',
|
131
|
-
'null',
|
132
135
|
'date',
|
133
136
|
'time',
|
134
137
|
'datetime',
|