tableschema 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +14 -0
  5. data/CHANGELOG.md +31 -0
  6. data/CODE_OF_CONDUCT.md +49 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +274 -0
  10. data/Rakefile +6 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +8 -0
  13. data/etc/schemas/geojson.json +209 -0
  14. data/etc/schemas/json-table-schema.json +102 -0
  15. data/lib/tableschema.rb +42 -0
  16. data/lib/tableschema/constraints/constraints.rb +76 -0
  17. data/lib/tableschema/constraints/enum.rb +14 -0
  18. data/lib/tableschema/constraints/max_length.rb +15 -0
  19. data/lib/tableschema/constraints/maximum.rb +14 -0
  20. data/lib/tableschema/constraints/min_length.rb +15 -0
  21. data/lib/tableschema/constraints/minimum.rb +14 -0
  22. data/lib/tableschema/constraints/pattern.rb +14 -0
  23. data/lib/tableschema/constraints/required.rb +32 -0
  24. data/lib/tableschema/data.rb +60 -0
  25. data/lib/tableschema/exceptions.rb +28 -0
  26. data/lib/tableschema/field.rb +41 -0
  27. data/lib/tableschema/helpers.rb +48 -0
  28. data/lib/tableschema/infer.rb +143 -0
  29. data/lib/tableschema/model.rb +73 -0
  30. data/lib/tableschema/schema.rb +36 -0
  31. data/lib/tableschema/table.rb +51 -0
  32. data/lib/tableschema/types/any.rb +23 -0
  33. data/lib/tableschema/types/array.rb +37 -0
  34. data/lib/tableschema/types/base.rb +54 -0
  35. data/lib/tableschema/types/boolean.rb +35 -0
  36. data/lib/tableschema/types/date.rb +56 -0
  37. data/lib/tableschema/types/datetime.rb +63 -0
  38. data/lib/tableschema/types/geojson.rb +38 -0
  39. data/lib/tableschema/types/geopoint.rb +56 -0
  40. data/lib/tableschema/types/integer.rb +35 -0
  41. data/lib/tableschema/types/null.rb +37 -0
  42. data/lib/tableschema/types/number.rb +60 -0
  43. data/lib/tableschema/types/object.rb +37 -0
  44. data/lib/tableschema/types/string.rb +64 -0
  45. data/lib/tableschema/types/time.rb +55 -0
  46. data/lib/tableschema/validate.rb +54 -0
  47. data/lib/tableschema/version.rb +3 -0
  48. data/tableschema.gemspec +32 -0
  49. metadata +231 -0
@@ -0,0 +1,15 @@
1
+ module TableSchema
2
+ class Constraints
3
+ module MaxLength
4
+
5
+ def check_max_length
6
+ return if @value.nil?
7
+ if @value.length > @constraints['maxLength'].to_i
8
+ raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must have a maximum length of #{@constraints['maxLength']}")
9
+ end
10
+ true
11
+ end
12
+
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,14 @@
1
+ module TableSchema
2
+ class Constraints
3
+ module Maximum
4
+
5
+ def check_maximum
6
+ if @value > parse_constraint(@constraints['maximum'])
7
+ raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must not be more than #{@constraints['maximum']}")
8
+ end
9
+ true
10
+ end
11
+
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,15 @@
1
+ module TableSchema
2
+ class Constraints
3
+ module MinLength
4
+
5
+ def check_min_length
6
+ return if @value.nil?
7
+ if @value.length < @constraints['minLength'].to_i
8
+ raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must have a minimum length of #{@constraints['minLength']}")
9
+ end
10
+ true
11
+ end
12
+
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,14 @@
1
+ module TableSchema
2
+ class Constraints
3
+ module Minimum
4
+
5
+ def check_minimum
6
+ if @value < parse_constraint(@constraints['minimum'])
7
+ raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must not be less than #{@constraints['minimum']}")
8
+ end
9
+ true
10
+ end
11
+
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,14 @@
1
+ module TableSchema
2
+ class Constraints
3
+ module Pattern
4
+
5
+ def check_pattern
6
+ if !@value.to_json.match /#{@constraints['pattern']}/
7
+ raise TableSchema::ConstraintError.new("The value for the field `#{@field['name']}` must match the pattern")
8
+ end
9
+ true
10
+ end
11
+
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,32 @@
1
+ module TableSchema
2
+ class Constraints
3
+ module Required
4
+
5
+ def check_required
6
+ if required? && is_empty?
7
+ raise TableSchema::ConstraintError.new("The field `#{@field['name']}` requires a value")
8
+ end
9
+ true
10
+ end
11
+
12
+ private
13
+
14
+ def required?
15
+ required == true && @field['type'] != 'null'
16
+ end
17
+
18
+ def is_empty?
19
+ null_values.include?(@value)
20
+ end
21
+
22
+ def required
23
+ @constraints['required'].to_s == 'true'
24
+ end
25
+
26
+ def null_values
27
+ ['null', 'none', 'nil', 'nan', '-', '']
28
+ end
29
+
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,60 @@
1
+ module TableSchema
2
+ module Data
3
+
4
+ attr_reader :errors
5
+
6
+ def cast_rows(rows, fail_fast = true, limit = nil)
7
+ @errors ||= []
8
+ parsed_rows = []
9
+ rows.each_with_index do |r, i|
10
+ begin
11
+ break if limit && (limit <= i)
12
+ r = r.fields if r.class == CSV::Row
13
+ parsed_rows << cast_row(r, fail_fast)
14
+ rescue MultipleInvalid, ConversionError => e
15
+ raise e if fail_fast == true
16
+ @errors << e if e.is_a?(ConversionError)
17
+ end
18
+ end
19
+ check_for_errors
20
+ parsed_rows
21
+ end
22
+
23
+ alias_method :convert, :cast_rows
24
+
25
+ def cast_row(row, fail_fast = true)
26
+ @errors ||= []
27
+ raise_header_error(row) if row.count != fields.count
28
+ fields.each_with_index do |field,i|
29
+ row[i] = cast_column(field, row[i], fail_fast)
30
+ end
31
+ check_for_errors
32
+ row
33
+ end
34
+
35
+ alias_method :convert_row, :cast_row
36
+
37
+ private
38
+
39
+ def raise_header_error(row)
40
+ raise(TableSchema::ConversionError.new("The number of items to convert (#{row.count}) does not match the number of headers in the schema (#{fields.count})"))
41
+ end
42
+
43
+ def check_for_errors
44
+ raise(TableSchema::MultipleInvalid.new("There were errors parsing the data")) if @errors.count > 0
45
+ end
46
+
47
+ def cast_column(field, col, fail_fast)
48
+ field.cast_value(col)
49
+ rescue Exception => e
50
+ if fail_fast == true
51
+ raise e
52
+ else
53
+ @errors << e
54
+ end
55
+ end
56
+
57
+ alias_method :convert_column, :cast_column
58
+
59
+ end
60
+ end
@@ -0,0 +1,28 @@
1
+ module TableSchema
2
+ class Exception < ::Exception ; end
3
+
4
+ class SchemaException < Exception
5
+ attr_reader :message
6
+
7
+ def initialize message
8
+ @message = message
9
+ end
10
+ end
11
+
12
+ class InvalidFormat < Exception ; end
13
+ class InvalidCast < Exception ; end
14
+ class InvalidEmail < Exception ; end
15
+ class InvalidURI < Exception ; end
16
+ class InvalidUUID < Exception ; end
17
+ class InvalidObjectType < Exception ; end
18
+ class InvalidArrayType < Exception ; end
19
+ class InvalidDateType < Exception ; end
20
+ class InvalidTimeType < Exception ; end
21
+ class InvalidDateTimeType < Exception ; end
22
+ class InvalidGeoJSONType < Exception ; end
23
+ class InvalidGeoPointType < Exception ; end
24
+ class ConstraintError < Exception ; end
25
+ class ConstraintNotSupported < Exception ; end
26
+ class ConversionError < Exception ; end
27
+ class MultipleInvalid < Exception ; end
28
+ end
@@ -0,0 +1,41 @@
1
+ module TableSchema
2
+ class Field < Hash
3
+ include TableSchema::Helpers
4
+
5
+ attr_reader :type_class
6
+
7
+ def initialize(descriptor)
8
+ self.merge! descriptor
9
+ @type_class = get_type
10
+ end
11
+
12
+ def name
13
+ self['name']
14
+ end
15
+
16
+ def type
17
+ self['type'] || 'string'
18
+ end
19
+
20
+ def format
21
+ self['format'] || 'default'
22
+ end
23
+
24
+ def constraints
25
+ self['constraints'] || {}
26
+ end
27
+
28
+ def cast_value(col)
29
+ klass = get_class_for_type(type)
30
+ converter = Kernel.const_get(klass).new(self)
31
+ converter.cast(col)
32
+ end
33
+
34
+ private
35
+
36
+ def get_type
37
+ Object.const_get get_class_for_type(type)
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,48 @@
1
+ module TableSchema
2
+ module Helpers
3
+
4
+ def convert_to_boolean(value)
5
+ if value.is_a?(Boolean)
6
+ return value
7
+ elsif true_values.include?(value.to_s.downcase)
8
+ true
9
+ elsif false_values.include?(value.to_s.downcase)
10
+ false
11
+ else
12
+ nil
13
+ end
14
+ end
15
+
16
+ def true_values
17
+ ['yes', 'y', 'true', 't', '1']
18
+ end
19
+
20
+ def false_values
21
+ ['no', 'n', 'false', 'f', '0']
22
+ end
23
+
24
+ def get_class_for_type(type)
25
+ "TableSchema::Types::#{type_class_lookup[type] || 'String'}"
26
+ end
27
+
28
+ def type_class_lookup
29
+ {
30
+ 'any' => 'Any',
31
+ 'array' => 'Array',
32
+ 'base' => 'Base',
33
+ 'boolean' => 'Boolean',
34
+ 'date' => 'Date',
35
+ 'datetime' => 'DateTime',
36
+ 'geojson' => 'GeoJSON',
37
+ 'geopoint' => 'GeoPoint',
38
+ 'integer' => 'Integer',
39
+ 'null' => 'Null',
40
+ 'number' => 'Number',
41
+ 'object' => 'Object',
42
+ 'string' => 'String',
43
+ 'time' => 'Time',
44
+ }
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,143 @@
1
+ module TableSchema
2
+ class Infer
3
+
4
+ include TableSchema::Helpers
5
+
6
+ attr_reader :schema
7
+
8
+ def initialize(headers, rows, opts = {})
9
+ @headers = headers
10
+ @rows = rows
11
+ @explicit = opts[:explicit]
12
+ @primary_key = opts[:primary_key]
13
+ @row_limit = opts[:row_limit]
14
+
15
+ @schema = {
16
+ 'fields' => fields
17
+ }
18
+ @schema['primaryKey'] = @primary_key if @primary_key
19
+ infer!
20
+ end
21
+
22
+ def fields
23
+ @headers.map do |header|
24
+ descriptor = {
25
+ 'name' => header,
26
+ 'title' => '',
27
+ 'description' => '',
28
+ }
29
+
30
+ constraints = {}
31
+ constraints['required'] = @explicit === true
32
+ constraints['unique'] = (header == @primary_key)
33
+ constraints.delete_if { |k,v| v == false } unless @explicit === true
34
+ descriptor['constraints'] = constraints if constraints.count > 0
35
+ descriptor
36
+ end
37
+ end
38
+
39
+ def infer!
40
+ type_matches = []
41
+ @rows.each_with_index do |row, i|
42
+ break if @row_limit && i > @row_limit
43
+ row = row.fields if row.class == CSV::Row
44
+
45
+ row_length = row.count
46
+ headers_length = @headers.count
47
+
48
+ if row_length > headers_length
49
+ row = row[0..headers_length]
50
+ elsif row_length < headers_length
51
+ diff = headers_length - row_length
52
+ fill = [''] * diff
53
+ row = row.push(fill).flatten
54
+ end
55
+
56
+ row.each_with_index do |col, i|
57
+ type_matches[i] ||= []
58
+ type_matches[i] << guess_type(col, i)
59
+ end
60
+
61
+ end
62
+ resolve_types(type_matches)
63
+ @schema = TableSchema::Schema.new(@schema)
64
+ end
65
+
66
+ def guess_type(col, index)
67
+ guessed_type = 'string'
68
+ guessed_format = 'default'
69
+
70
+ available_types.reverse_each do |type|
71
+ klass = get_class_for_type(type)
72
+ converter = Kernel.const_get(klass).new(@schema['fields'][index])
73
+ if converter.test(col) === true
74
+ guessed_type = type
75
+ guessed_format = guess_format(converter, col)
76
+ break
77
+ end
78
+ end
79
+
80
+ {
81
+ 'type' => guessed_type,
82
+ 'format' => guessed_format
83
+ }
84
+ end
85
+
86
+ def guess_format(converter, col)
87
+ guessed_format = 'default'
88
+ converter.class.instance_methods.grep(/cast_/).each do |method|
89
+ begin
90
+ format = method.to_s
91
+ format.slice!('cast_')
92
+ next if format == 'default'
93
+ converter.send(method, col)
94
+ guessed_format = format
95
+ break
96
+ rescue TableSchema::Exception
97
+ end
98
+ end
99
+ guessed_format
100
+ end
101
+
102
+ def resolve_types(results)
103
+ results.each_with_index do |result,v|
104
+ result.uniq!
105
+
106
+ if result.count == 1
107
+ rv = result[0]
108
+ else
109
+ counts = {}
110
+ result.each do |r|
111
+ counts[r] ||= 0
112
+ counts[r] += 1
113
+ end
114
+
115
+ sorted_counts = counts.sort_by {|_key, value| value}
116
+ rv = sorted_counts[0][0]
117
+ end
118
+
119
+ @schema['fields'][v].merge!(rv)
120
+ end
121
+
122
+ end
123
+
124
+ def available_types
125
+ [
126
+ 'any',
127
+ 'string',
128
+ 'boolean',
129
+ 'number',
130
+ 'integer',
131
+ 'null',
132
+ 'date',
133
+ 'time',
134
+ 'datetime',
135
+ 'array',
136
+ 'object',
137
+ 'geopoint',
138
+ 'geojson'
139
+ ]
140
+ end
141
+
142
+ end
143
+ end
@@ -0,0 +1,73 @@
1
+ module TableSchema
2
+ module Model
3
+
4
+ DEFAULTS = {
5
+ 'format' => 'default',
6
+ 'type' => 'string'
7
+ }
8
+
9
+ def headers
10
+ fields.map { |f| transform(f['name']) }
11
+ rescue NoMethodError
12
+ []
13
+ end
14
+
15
+ def fields
16
+ self['fields']
17
+ end
18
+
19
+ def primary_keys
20
+ [self['primaryKey']].flatten.reject { |k| k.nil? }
21
+ end
22
+
23
+ def foreign_keys
24
+ self['foreignKeys'] || []
25
+ end
26
+
27
+ def get_type(key)
28
+ get_field(key)['type']
29
+ end
30
+
31
+ def get_constraints(key)
32
+ get_field(key)['constraints'] || {}
33
+ end
34
+
35
+ def required_headers
36
+ fields.select { |f| f['constraints']!= nil && f['constraints']['required'] == true }
37
+ .map { |f| transform(f['name']) }
38
+ rescue NoMethodError
39
+ []
40
+ end
41
+
42
+ def has_field?(key)
43
+ get_field(key) != nil
44
+ end
45
+
46
+ def get_field(key)
47
+ fields.find { |f| f['name'] == key }
48
+ end
49
+
50
+ def get_fields_by_type(type)
51
+ fields.select { |f| f['type'] == type }
52
+ end
53
+
54
+ private
55
+
56
+ def transform(name)
57
+ name.downcase! if @opts[:case_insensitive_headers]
58
+ name
59
+ end
60
+
61
+ def expand!
62
+ (self['fields'] || []).each do |f|
63
+ f['type'] = DEFAULTS['type'] if f['type'] == nil
64
+ f['format'] = DEFAULTS['format'] if f['format'] == nil
65
+ end
66
+ end
67
+
68
+ def load_fields!
69
+ self['fields'] = (self['fields'] || []).map { |f| TableSchema::Field.new(f) }
70
+ end
71
+
72
+ end
73
+ end