tableschema 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +14 -0
- data/CHANGELOG.md +31 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +274 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/etc/schemas/geojson.json +209 -0
- data/etc/schemas/json-table-schema.json +102 -0
- data/lib/tableschema.rb +42 -0
- data/lib/tableschema/constraints/constraints.rb +76 -0
- data/lib/tableschema/constraints/enum.rb +14 -0
- data/lib/tableschema/constraints/max_length.rb +15 -0
- data/lib/tableschema/constraints/maximum.rb +14 -0
- data/lib/tableschema/constraints/min_length.rb +15 -0
- data/lib/tableschema/constraints/minimum.rb +14 -0
- data/lib/tableschema/constraints/pattern.rb +14 -0
- data/lib/tableschema/constraints/required.rb +32 -0
- data/lib/tableschema/data.rb +60 -0
- data/lib/tableschema/exceptions.rb +28 -0
- data/lib/tableschema/field.rb +41 -0
- data/lib/tableschema/helpers.rb +48 -0
- data/lib/tableschema/infer.rb +143 -0
- data/lib/tableschema/model.rb +73 -0
- data/lib/tableschema/schema.rb +36 -0
- data/lib/tableschema/table.rb +51 -0
- data/lib/tableschema/types/any.rb +23 -0
- data/lib/tableschema/types/array.rb +37 -0
- data/lib/tableschema/types/base.rb +54 -0
- data/lib/tableschema/types/boolean.rb +35 -0
- data/lib/tableschema/types/date.rb +56 -0
- data/lib/tableschema/types/datetime.rb +63 -0
- data/lib/tableschema/types/geojson.rb +38 -0
- data/lib/tableschema/types/geopoint.rb +56 -0
- data/lib/tableschema/types/integer.rb +35 -0
- data/lib/tableschema/types/null.rb +37 -0
- data/lib/tableschema/types/number.rb +60 -0
- data/lib/tableschema/types/object.rb +37 -0
- data/lib/tableschema/types/string.rb +64 -0
- data/lib/tableschema/types/time.rb +55 -0
- data/lib/tableschema/validate.rb +54 -0
- data/lib/tableschema/version.rb +3 -0
- data/tableschema.gemspec +32 -0
- metadata +231 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Schema < Hash
|
3
|
+
include TableSchema::Validate
|
4
|
+
include TableSchema::Model
|
5
|
+
include TableSchema::Data
|
6
|
+
include TableSchema::Helpers
|
7
|
+
|
8
|
+
def initialize(descriptor, opts = {})
|
9
|
+
self.merge! parse_schema(descriptor)
|
10
|
+
@messages = []
|
11
|
+
@opts = opts
|
12
|
+
load_fields!
|
13
|
+
load_validator!
|
14
|
+
expand!
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse_schema(descriptor)
|
18
|
+
if descriptor.class == Hash
|
19
|
+
descriptor
|
20
|
+
elsif descriptor.class == String
|
21
|
+
begin
|
22
|
+
JSON.parse open(descriptor).read
|
23
|
+
rescue Errno::ENOENT
|
24
|
+
raise SchemaException.new("File not found at `#{descriptor}`")
|
25
|
+
rescue OpenURI::HTTPError => e
|
26
|
+
raise SchemaException.new("URL `#{descriptor}` returned #{e.message}")
|
27
|
+
rescue JSON::ParserError
|
28
|
+
raise SchemaException.new("File at `#{descriptor}` is not valid JSON")
|
29
|
+
end
|
30
|
+
else
|
31
|
+
raise SchemaException.new("A schema must be a hash, path or URL")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module TableSchema
|
2
|
+
class Table
|
3
|
+
|
4
|
+
attr_reader :schema
|
5
|
+
|
6
|
+
def self.infer_schema(csv, opts = {})
|
7
|
+
TableSchema::Table.new(csv, nil, opts)
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(csv, descriptor, opts = {})
|
11
|
+
@opts = opts
|
12
|
+
@csv = parse_csv(csv)
|
13
|
+
@schema = descriptor.nil? ? infer_schema(@csv) : TableSchema::Schema.new(descriptor)
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_csv(csv)
|
17
|
+
csv = csv.is_a?(Array) ? StringIO.new(array_to_csv csv) : open(csv)
|
18
|
+
CSV.new(csv, csv_options)
|
19
|
+
end
|
20
|
+
|
21
|
+
def csv_options
|
22
|
+
(@opts[:csv_options] || {}).merge(headers: true)
|
23
|
+
end
|
24
|
+
|
25
|
+
def rows(opts = {})
|
26
|
+
fail_fast = opts[:fail_fast] || opts[:fail_fast].nil?
|
27
|
+
converted = @schema.cast_rows(@csv, fail_fast, opts[:limit])
|
28
|
+
opts[:keyed] ? coverted_to_hash(@csv.headers, converted) : converted
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def array_to_csv(array)
|
34
|
+
array.map { |row| row.to_csv(row_sep: nil) }.join("\r\n")
|
35
|
+
end
|
36
|
+
|
37
|
+
def coverted_to_hash(headers, array)
|
38
|
+
array.map do |row|
|
39
|
+
Hash[row.map.with_index { |col, i| [headers[i], col] }]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def infer_schema(csv)
|
44
|
+
headers = csv.first.to_h.keys
|
45
|
+
csv.rewind
|
46
|
+
inferer = TableSchema::Infer.new(headers, csv)
|
47
|
+
inferer.schema
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Types
|
3
|
+
class Any < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'any'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum'
|
14
|
+
]
|
15
|
+
end
|
16
|
+
|
17
|
+
def cast_default(value)
|
18
|
+
value
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Types
|
3
|
+
class Array < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'array'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum',
|
14
|
+
'minLength',
|
15
|
+
'maxLength',
|
16
|
+
]
|
17
|
+
end
|
18
|
+
|
19
|
+
def type
|
20
|
+
::Array
|
21
|
+
end
|
22
|
+
|
23
|
+
def cast_default(value)
|
24
|
+
return value if value.is_a?(type)
|
25
|
+
parsed = JSON.parse(value)
|
26
|
+
if parsed.is_a?(type)
|
27
|
+
return parsed
|
28
|
+
else
|
29
|
+
raise TableSchema::InvalidArrayType.new("#{value} is not a valid array")
|
30
|
+
end
|
31
|
+
rescue
|
32
|
+
raise TableSchema::InvalidArrayType.new("#{value} is not a valid array")
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Types
|
3
|
+
class Base
|
4
|
+
include TableSchema::Helpers
|
5
|
+
|
6
|
+
|
7
|
+
def initialize(field)
|
8
|
+
@field = field
|
9
|
+
@constraints = field['constraints'] || {}
|
10
|
+
@required = ['true', true].include?(@constraints['required'])
|
11
|
+
@type = @field['type']
|
12
|
+
set_format
|
13
|
+
end
|
14
|
+
|
15
|
+
def cast(value, skip_constraints = false)
|
16
|
+
TableSchema::Constraints.new(@field, value).validate! unless skip_constraints
|
17
|
+
return nil if is_null?(value)
|
18
|
+
send("cast_#{@format}", value)
|
19
|
+
rescue NoMethodError => e
|
20
|
+
if e.message.start_with?('undefined method `cast_')
|
21
|
+
raise(TableSchema::InvalidFormat.new("The format `#{@format}` is not supported by the type `#{@type}`"))
|
22
|
+
else
|
23
|
+
raise e
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def test(value)
|
28
|
+
cast(value, true)
|
29
|
+
true
|
30
|
+
rescue TableSchema::Exception
|
31
|
+
false
|
32
|
+
end
|
33
|
+
|
34
|
+
def set_format
|
35
|
+
if (@field['format'] || '').start_with?('fmt:')
|
36
|
+
@format, @format_string = *@field['format'].split(':', 2)
|
37
|
+
else
|
38
|
+
@format = @field['format'] || 'default'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def is_null?(value)
|
45
|
+
null_values.include?(value) && @required == false
|
46
|
+
end
|
47
|
+
|
48
|
+
def null_values
|
49
|
+
['null', 'none', 'nil', 'nan', '-', '']
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# Hack to check against one type from http://stackoverflow.com/a/3028378/452684
|
2
|
+
# because Ruby doesn't have a single boolean class
|
3
|
+
module Boolean; end
|
4
|
+
class TrueClass; include Boolean; end
|
5
|
+
class FalseClass; include Boolean; end
|
6
|
+
|
7
|
+
module TableSchema
|
8
|
+
module Types
|
9
|
+
class Boolean < Base
|
10
|
+
|
11
|
+
def name
|
12
|
+
'boolean'
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.supported_constraints
|
16
|
+
[
|
17
|
+
'required',
|
18
|
+
'pattern',
|
19
|
+
'enum',
|
20
|
+
]
|
21
|
+
end
|
22
|
+
|
23
|
+
def type
|
24
|
+
::Boolean
|
25
|
+
end
|
26
|
+
|
27
|
+
def cast_default(value)
|
28
|
+
value = convert_to_boolean(value)
|
29
|
+
raise TableSchema::InvalidCast.new("#{value} is not a #{name}") if value.nil?
|
30
|
+
value
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Types
|
3
|
+
class Date < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'date'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum',
|
14
|
+
'minimum',
|
15
|
+
'maximum',
|
16
|
+
]
|
17
|
+
end
|
18
|
+
|
19
|
+
def type
|
20
|
+
::Date
|
21
|
+
end
|
22
|
+
|
23
|
+
def iso8601
|
24
|
+
'%Y-%m-%d'
|
25
|
+
end
|
26
|
+
|
27
|
+
def cast_default(value)
|
28
|
+
@format_string = iso8601
|
29
|
+
cast_fmt(value)
|
30
|
+
end
|
31
|
+
|
32
|
+
def cast_any(value)
|
33
|
+
return value if value.is_a?(type)
|
34
|
+
|
35
|
+
date = ::Date._parse(value)
|
36
|
+
if date.values.count == 3
|
37
|
+
::Date.parse(value)
|
38
|
+
else
|
39
|
+
raise TableSchema::InvalidDateType.new("#{value} is not a valid date")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def cast_fmt(value)
|
44
|
+
return value if value.is_a?(type)
|
45
|
+
|
46
|
+
begin
|
47
|
+
return ::Date.strptime(value, @format_string)
|
48
|
+
rescue ArgumentError
|
49
|
+
raise TableSchema::InvalidDateType.new("#{value} is not a valid date")
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Types
|
3
|
+
class DateTime < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'datetime'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum',
|
14
|
+
'minimum',
|
15
|
+
'maximum'
|
16
|
+
]
|
17
|
+
end
|
18
|
+
|
19
|
+
def type
|
20
|
+
::DateTime
|
21
|
+
end
|
22
|
+
|
23
|
+
def iso8601
|
24
|
+
'%Y-%m-%dT%H:%M:%SZ'
|
25
|
+
end
|
26
|
+
|
27
|
+
# raw_formats = ['DD/MM/YYYYThh/mm/ss']
|
28
|
+
# py_formats = ['%Y/%m/%dT%H:%M:%S']
|
29
|
+
# format_map = dict(zip(raw_formats, py_formats))
|
30
|
+
|
31
|
+
def cast_default(value)
|
32
|
+
@format_string = iso8601
|
33
|
+
cast_fmt(value)
|
34
|
+
end
|
35
|
+
|
36
|
+
def cast_any(value)
|
37
|
+
return value if value.is_a?(type)
|
38
|
+
|
39
|
+
begin
|
40
|
+
date = ::DateTime._parse(value)
|
41
|
+
if date.values.count >= 4
|
42
|
+
::DateTime.parse(value)
|
43
|
+
else
|
44
|
+
raise TableSchema::InvalidDateTimeType.new("#{value} is not a valid datetime")
|
45
|
+
end
|
46
|
+
rescue ArgumentError
|
47
|
+
raise TableSchema::InvalidDateTimeType.new("#{value} is not a valid datetime")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def cast_fmt(value)
|
52
|
+
return value if value.is_a?(type)
|
53
|
+
|
54
|
+
begin
|
55
|
+
return ::DateTime.strptime(value, @format_string)
|
56
|
+
rescue ArgumentError
|
57
|
+
raise TableSchema::InvalidDateTimeType.new("#{value} is not a valid date")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Types
|
3
|
+
class GeoJSON < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'geojson'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum'
|
14
|
+
]
|
15
|
+
end
|
16
|
+
|
17
|
+
def type
|
18
|
+
::Hash
|
19
|
+
end
|
20
|
+
|
21
|
+
def cast_default(value)
|
22
|
+
value = JSON.parse(value) if !value.is_a?(type)
|
23
|
+
JSON::Validator.validate!(geojson_schema, value)
|
24
|
+
value
|
25
|
+
rescue JSON::Schema::ValidationError, JSON::ParserError
|
26
|
+
raise TableSchema::InvalidGeoJSONType.new("#{value} is not valid GeoJSON")
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def geojson_schema
|
32
|
+
path = File.join( File.dirname(__FILE__), "..", "..", "..", "etc", "schemas", "geojson.json" )
|
33
|
+
@geojson_schema ||= JSON.parse File.read(path)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module TableSchema
|
2
|
+
module Types
|
3
|
+
class GeoPoint < Base
|
4
|
+
|
5
|
+
def name
|
6
|
+
'geopoint'
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.supported_constraints
|
10
|
+
[
|
11
|
+
'required',
|
12
|
+
'pattern',
|
13
|
+
'enum'
|
14
|
+
]
|
15
|
+
end
|
16
|
+
|
17
|
+
def types
|
18
|
+
[::String, ::Array, ::Hash]
|
19
|
+
end
|
20
|
+
|
21
|
+
def cast_default(value)
|
22
|
+
latlng = value.split(',', 2)
|
23
|
+
cast_array([latlng[0], latlng[1]])
|
24
|
+
end
|
25
|
+
|
26
|
+
def cast_object(value)
|
27
|
+
value = JSON.parse(value) if value.is_a?(::String)
|
28
|
+
cast_array([value['longitude'], value['latitude']])
|
29
|
+
rescue JSON::ParserError
|
30
|
+
raise TableSchema::InvalidGeoPointType.new("#{value} is not a valid geopoint")
|
31
|
+
end
|
32
|
+
|
33
|
+
def cast_array(value)
|
34
|
+
value = JSON.parse(value) if value.is_a?(::String)
|
35
|
+
value = [Float(value[0]), Float(value[1])]
|
36
|
+
check_latlng_range(value)
|
37
|
+
value
|
38
|
+
rescue JSON::ParserError, ArgumentError, TypeError
|
39
|
+
raise TableSchema::InvalidGeoPointType.new("#{value} is not a valid geopoint")
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def check_latlng_range(geopoint)
|
45
|
+
longitude = geopoint[0]
|
46
|
+
latitude = geopoint[1]
|
47
|
+
if longitude >= 180 or longitude <= -180
|
48
|
+
raise TableSchema::InvalidGeoPointType.new("longtitude should be between -180 and 180, found `#{longitude}`")
|
49
|
+
elsif latitude >= 90 or latitude <= -90
|
50
|
+
raise TableSchema::InvalidGeoPointType.new("longtitude should be between -90 and 90, found `#{latitude}`")
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|