tableschema 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +21 -0
  3. data/.travis.yml +15 -1
  4. data/README.md +164 -129
  5. data/Rakefile +10 -1
  6. data/bin/console +2 -6
  7. data/{etc/schemas → lib/profiles}/geojson.json +0 -1
  8. data/lib/profiles/table-schema.json +1625 -0
  9. data/lib/profiles/topojson.json +311 -0
  10. data/lib/tableschema.rb +5 -3
  11. data/lib/tableschema/constraints/constraints.rb +12 -24
  12. data/lib/tableschema/constraints/enum.rb +6 -2
  13. data/lib/tableschema/constraints/max_length.rb +6 -2
  14. data/lib/tableschema/constraints/maximum.rb +12 -2
  15. data/lib/tableschema/constraints/min_length.rb +6 -2
  16. data/lib/tableschema/constraints/minimum.rb +12 -2
  17. data/lib/tableschema/constraints/pattern.rb +9 -2
  18. data/lib/tableschema/constraints/required.rb +6 -15
  19. data/lib/tableschema/constraints/unique.rb +12 -0
  20. data/lib/tableschema/defaults.rb +9 -0
  21. data/lib/tableschema/exceptions.rb +15 -2
  22. data/lib/tableschema/field.rb +39 -20
  23. data/lib/tableschema/helpers.rb +32 -15
  24. data/lib/tableschema/infer.rb +31 -28
  25. data/lib/tableschema/model.rb +57 -34
  26. data/lib/tableschema/schema.rb +40 -6
  27. data/lib/tableschema/table.rb +75 -26
  28. data/lib/tableschema/types/any.rb +1 -0
  29. data/lib/tableschema/types/array.rb +2 -1
  30. data/lib/tableschema/types/base.rb +9 -21
  31. data/lib/tableschema/types/date.rb +1 -0
  32. data/lib/tableschema/types/datetime.rb +1 -0
  33. data/lib/tableschema/types/duration.rb +31 -0
  34. data/lib/tableschema/types/geojson.rb +27 -5
  35. data/lib/tableschema/types/geopoint.rb +4 -3
  36. data/lib/tableschema/types/integer.rb +1 -0
  37. data/lib/tableschema/types/number.rb +40 -25
  38. data/lib/tableschema/types/object.rb +2 -1
  39. data/lib/tableschema/types/string.rb +8 -0
  40. data/lib/tableschema/types/time.rb +1 -0
  41. data/lib/tableschema/types/year.rb +34 -0
  42. data/lib/tableschema/types/yearmonth.rb +52 -0
  43. data/lib/tableschema/validate.rb +45 -29
  44. data/lib/tableschema/version.rb +1 -1
  45. data/tableschema.gemspec +2 -1
  46. metadata +31 -12
  47. data/etc/schemas/json-table-schema.json +0 -102
  48. data/lib/tableschema/data.rb +0 -60
  49. data/lib/tableschema/types/null.rb +0 -37
@@ -3,12 +3,22 @@ module TableSchema
3
3
  module Maximum
4
4
 
5
5
  def check_maximum
6
- if @value > parse_constraint(@constraints['maximum'])
7
- raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must not be more than #{@constraints['maximum']}")
6
+ if @field.type == 'yearmonth'
7
+ valid = Date.new(@value[:year], @value[:month]) <= Date.new(parsed_maximum[:year], parsed_maximum[:month])
8
+ else
9
+ valid = @value <= parsed_maximum
10
+ end
11
+
12
+ unless valid
13
+ raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` must not be more than #{@constraints[:maximum]}")
8
14
  end
9
15
  true
10
16
  end
11
17
 
18
+ def parsed_maximum
19
+ @field.cast_type(@constraints[:maximum])
20
+ end
21
+
12
22
  end
13
23
  end
14
24
  end
@@ -4,12 +4,16 @@ module TableSchema
4
4
 
5
5
  def check_min_length
6
6
  return if @value.nil?
7
- if @value.length < @constraints['minLength'].to_i
8
- raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must have a minimum length of #{@constraints['minLength']}")
7
+ if @value.length < parsed_min_length
8
+ raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` must have a minimum length of #{@constraints[:minLength]}")
9
9
  end
10
10
  true
11
11
  end
12
12
 
13
+ def parsed_min_length
14
+ @constraints[:minLength].to_i
15
+ end
16
+
13
17
  end
14
18
  end
15
19
  end
@@ -3,12 +3,22 @@ module TableSchema
3
3
  module Minimum
4
4
 
5
5
  def check_minimum
6
- if @value < parse_constraint(@constraints['minimum'])
7
- raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must not be less than #{@constraints['minimum']}")
6
+ if @field.type == 'yearmonth'
7
+ valid = Date.new(@value[:year], @value[:month]) >= Date.new(parsed_minimum[:year], parsed_minimum[:month])
8
+ else
9
+ valid = @value >= parsed_minimum
10
+ end
11
+
12
+ unless valid
13
+ raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` must not be less than #{@constraints[:minimum]}")
8
14
  end
9
15
  true
10
16
  end
11
17
 
18
+ def parsed_minimum
19
+ @field.cast_type(@constraints[:minimum])
20
+ end
21
+
12
22
  end
13
23
  end
14
24
  end
@@ -3,8 +3,15 @@ module TableSchema
3
3
  module Pattern
4
4
 
5
5
  def check_pattern
6
- if !@value.to_json.match /#{@constraints['pattern']}/
7
- raise TableSchema::ConstraintError.new("The value for the field `#{@field['name']}` must match the pattern")
6
+ constraint = lambda { |value| value.match(/#{@constraints[:pattern]}/) }
7
+ if @field.type == 'yearmonth'
8
+ valid = constraint.call(Date.new(@value[:year], @value[:month]).strftime('%Y-%m'))
9
+ else
10
+ valid = constraint.call(@value.to_json)
11
+ end
12
+
13
+ unless valid
14
+ raise TableSchema::ConstraintError.new("The value for the field `#{@field[:name]}` must match the pattern")
8
15
  end
9
16
  true
10
17
  end
@@ -3,30 +3,21 @@ module TableSchema
3
3
  module Required
4
4
 
5
5
  def check_required
6
- if required? && is_empty?
7
- raise TableSchema::ConstraintError.new("The field `#{@field['name']}` requires a value")
6
+ if parsed_required == true && value_is_empty?
7
+ raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` requires a value")
8
8
  end
9
9
  true
10
10
  end
11
11
 
12
12
  private
13
13
 
14
- def required?
15
- required == true && @field['type'] != 'null'
14
+ def value_is_empty?
15
+ @value.nil? || @value == ''
16
16
  end
17
17
 
18
- def is_empty?
19
- null_values.include?(@value)
18
+ def parsed_required
19
+ @constraints[:required].to_s == 'true'
20
20
  end
21
-
22
- def required
23
- @constraints['required'].to_s == 'true'
24
- end
25
-
26
- def null_values
27
- ['null', 'none', 'nil', 'nan', '-', '']
28
- end
29
-
30
21
  end
31
22
  end
32
23
  end
@@ -0,0 +1,12 @@
1
+ module TableSchema
2
+ class Constraints
3
+ module Unique
4
+
5
+ def check_unique
6
+ # This check is done in Table because it needs the previous values in the column
7
+ true
8
+ end
9
+
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,9 @@
1
+ module TableSchema
2
+ DEFAULTS = {
3
+ format: 'default',
4
+ type: 'string',
5
+ missing_values: [''],
6
+ group_char: ',',
7
+ decimal_char: '.'
8
+ }.freeze
9
+ end
@@ -1,6 +1,6 @@
1
1
  module TableSchema
2
2
  class Exception < ::Exception ; end
3
-
3
+
4
4
  class SchemaException < Exception
5
5
  attr_reader :message
6
6
 
@@ -9,20 +9,33 @@ module TableSchema
9
9
  end
10
10
  end
11
11
 
12
+ class MultipleInvalid < Exception
13
+ attr_reader :message, :errors
14
+
15
+ def initialize(message, errors=[])
16
+ @message = message
17
+ @errors = errors
18
+ end
19
+ end
20
+
12
21
  class InvalidFormat < Exception ; end
13
22
  class InvalidCast < Exception ; end
14
23
  class InvalidEmail < Exception ; end
15
24
  class InvalidURI < Exception ; end
16
25
  class InvalidUUID < Exception ; end
26
+ class InvalidBinary < Exception ; end
17
27
  class InvalidObjectType < Exception ; end
18
28
  class InvalidArrayType < Exception ; end
19
29
  class InvalidDateType < Exception ; end
20
30
  class InvalidTimeType < Exception ; end
21
31
  class InvalidDateTimeType < Exception ; end
32
+ class InvalidYearType < Exception; end
33
+ class InvalidYearMonthType < Exception; end
34
+ class InvalidDurationType < Exception; end
22
35
  class InvalidGeoJSONType < Exception ; end
36
+ class InvalidTopoJSONType < Exception ; end
23
37
  class InvalidGeoPointType < Exception ; end
24
38
  class ConstraintError < Exception ; end
25
39
  class ConstraintNotSupported < Exception ; end
26
40
  class ConversionError < Exception ; end
27
- class MultipleInvalid < Exception ; end
28
41
  end
@@ -1,40 +1,59 @@
1
+ require 'tableschema/defaults'
2
+
1
3
  module TableSchema
2
4
  class Field < Hash
3
5
  include TableSchema::Helpers
4
6
 
5
- attr_reader :type_class
6
-
7
- def initialize(descriptor)
8
- self.merge! descriptor
9
- @type_class = get_type
10
- end
7
+ attr_reader :name, :type, :format, :missing_values, :constraints
11
8
 
12
- def name
13
- self['name']
9
+ def initialize(descriptor, missing_values=nil)
10
+ self.merge! deep_symbolize_keys(descriptor)
11
+ @name = self[:name]
12
+ @type = self[:type] = self.fetch(:type, TableSchema::DEFAULTS[:type])
13
+ @format = self[:format] = self.fetch(:format, TableSchema::DEFAULTS[:format])
14
+ @constraints = self[:constraints] = self.fetch(:constraints, {})
15
+ @missing_values = missing_values || default_missing_values
14
16
  end
15
17
 
16
- def type
17
- self['type'] || 'string'
18
+ def descriptor
19
+ self.to_h
18
20
  end
19
21
 
20
- def format
21
- self['format'] || 'default'
22
+ def cast_value(value, check_constraints: true)
23
+ cast_value = cast_type(value)
24
+ return cast_value if check_constraints == false
25
+ TableSchema::Constraints.new(self, cast_value).validate!
26
+ cast_value
22
27
  end
23
28
 
24
- def constraints
25
- self['constraints'] || {}
29
+ def test_value(value, check_constraints: true)
30
+ cast_value(value, check_constraints: check_constraints)
31
+ true
32
+ rescue TableSchema::Exception
33
+ false
26
34
  end
27
35
 
28
- def cast_value(col)
29
- klass = get_class_for_type(type)
30
- converter = Kernel.const_get(klass).new(self)
31
- converter.cast(col)
36
+ def cast_type(value)
37
+ if is_null?(value)
38
+ nil
39
+ else
40
+ type_class.new(self).cast(value)
41
+ end
32
42
  end
33
43
 
34
44
  private
35
45
 
36
- def get_type
37
- Object.const_get get_class_for_type(type)
46
+ def default_missing_values
47
+ defaults = TableSchema::DEFAULTS[:missing_values]
48
+ @type == 'string' ? defaults - [''] : defaults
49
+ end
50
+
51
+ def type_class
52
+ Object.const_get get_class_for_type(@type)
53
+ end
54
+
55
+ def is_null?(value)
56
+ @missing_values.include?(value)
38
57
  end
39
58
 
40
59
  end
@@ -1,6 +1,21 @@
1
1
  module TableSchema
2
2
  module Helpers
3
3
 
4
+ def deep_symbolize_keys(descriptor)
5
+ case descriptor
6
+ when Hash
7
+ descriptor.inject({}) do |new_descriptor, (key, val)|
8
+ key_sym = key.respond_to?(:to_sym) ? key.to_sym : key
9
+ new_descriptor[key_sym] = deep_symbolize_keys(val)
10
+ new_descriptor
11
+ end
12
+ when Enumerable
13
+ descriptor.map{ |el| deep_symbolize_keys(el)}
14
+ else
15
+ descriptor
16
+ end
17
+ end
18
+
4
19
  def convert_to_boolean(value)
5
20
  if value.is_a?(Boolean)
6
21
  return value
@@ -22,25 +37,27 @@ module TableSchema
22
37
  end
23
38
 
24
39
  def get_class_for_type(type)
25
- "TableSchema::Types::#{type_class_lookup[type] || 'String'}"
40
+ "TableSchema::Types::#{type_class_lookup[type.to_sym] || 'String'}"
26
41
  end
27
42
 
28
43
  def type_class_lookup
29
44
  {
30
- 'any' => 'Any',
31
- 'array' => 'Array',
32
- 'base' => 'Base',
33
- 'boolean' => 'Boolean',
34
- 'date' => 'Date',
35
- 'datetime' => 'DateTime',
36
- 'geojson' => 'GeoJSON',
37
- 'geopoint' => 'GeoPoint',
38
- 'integer' => 'Integer',
39
- 'null' => 'Null',
40
- 'number' => 'Number',
41
- 'object' => 'Object',
42
- 'string' => 'String',
43
- 'time' => 'Time',
45
+ any: 'Any',
46
+ array: 'Array',
47
+ base: 'Base',
48
+ boolean: 'Boolean',
49
+ date: 'Date',
50
+ datetime: 'DateTime',
51
+ geojson: 'GeoJSON',
52
+ geopoint: 'GeoPoint',
53
+ integer: 'Integer',
54
+ number: 'Number',
55
+ object: 'Object',
56
+ string: 'String',
57
+ time: 'Time',
58
+ year: 'Year',
59
+ yearmonth: 'YearMonth',
60
+ duration: 'Duration',
44
61
  }
45
62
  end
46
63
 
@@ -1,3 +1,6 @@
1
+ require 'tableschema/defaults'
2
+ require 'tableschema/field'
3
+
1
4
  module TableSchema
2
5
  class Infer
3
6
 
@@ -5,41 +8,41 @@ module TableSchema
5
8
 
6
9
  attr_reader :schema
7
10
 
8
- def initialize(headers, rows, opts = {})
11
+ def initialize(headers, rows, explicit: false, primary_key: nil, row_limit: nil)
9
12
  @headers = headers
10
13
  @rows = rows
11
- @explicit = opts[:explicit]
12
- @primary_key = opts[:primary_key]
13
- @row_limit = opts[:row_limit]
14
+ @explicit = explicit
15
+ @primary_key = primary_key
16
+ @row_limit = row_limit
14
17
 
15
18
  @schema = {
16
- 'fields' => fields
19
+ fields: fields
17
20
  }
18
- @schema['primaryKey'] = @primary_key if @primary_key
21
+ @schema[:primaryKey] = @primary_key if @primary_key
19
22
  infer!
20
23
  end
21
24
 
22
25
  def fields
23
26
  @headers.map do |header|
24
27
  descriptor = {
25
- 'name' => header,
26
- 'title' => '',
27
- 'description' => '',
28
+ name: header,
29
+ title: '',
30
+ description: '',
28
31
  }
29
32
 
30
33
  constraints = {}
31
- constraints['required'] = @explicit === true
32
- constraints['unique'] = (header == @primary_key)
33
- constraints.delete_if { |k,v| v == false } unless @explicit === true
34
- descriptor['constraints'] = constraints if constraints.count > 0
35
- descriptor
34
+ constraints[:required] = @explicit === true
35
+ constraints[:unique] = (header == @primary_key)
36
+ constraints.delete_if { |_,v| v == false } unless @explicit === true
37
+ descriptor[:constraints] = constraints if constraints.count > 0
38
+ TableSchema::Field.new(descriptor)
36
39
  end
37
40
  end
38
41
 
39
42
  def infer!
40
43
  type_matches = []
41
- @rows.each_with_index do |row, i|
42
- break if @row_limit && i > @row_limit
44
+ @rows.each_with_index do |row, index|
45
+ break if @row_limit && index > @row_limit
43
46
  row = row.fields if row.class == CSV::Row
44
47
 
45
48
  row_length = row.count
@@ -53,9 +56,9 @@ module TableSchema
53
56
  row = row.push(fill).flatten
54
57
  end
55
58
 
56
- row.each_with_index do |col, i|
57
- type_matches[i] ||= []
58
- type_matches[i] << guess_type(col, i)
59
+ row.each_with_index do |col, idx|
60
+ type_matches[idx] ||= []
61
+ type_matches[idx] << guess_type(col, idx)
59
62
  end
60
63
 
61
64
  end
@@ -64,12 +67,12 @@ module TableSchema
64
67
  end
65
68
 
66
69
  def guess_type(col, index)
67
- guessed_type = 'string'
68
- guessed_format = 'default'
70
+ guessed_type = TableSchema::DEFAULTS[:type]
71
+ guessed_format = TableSchema::DEFAULTS[:format]
69
72
 
70
73
  available_types.reverse_each do |type|
71
74
  klass = get_class_for_type(type)
72
- converter = Kernel.const_get(klass).new(@schema['fields'][index])
75
+ converter = Kernel.const_get(klass).new(@schema[:fields][index])
73
76
  if converter.test(col) === true
74
77
  guessed_type = type
75
78
  guessed_format = guess_format(converter, col)
@@ -78,22 +81,23 @@ module TableSchema
78
81
  end
79
82
 
80
83
  {
81
- 'type' => guessed_type,
82
- 'format' => guessed_format
84
+ type: guessed_type,
85
+ format: guessed_format
83
86
  }
84
87
  end
85
88
 
86
89
  def guess_format(converter, col)
87
- guessed_format = 'default'
90
+ guessed_format = TableSchema::DEFAULTS[:format]
88
91
  converter.class.instance_methods.grep(/cast_/).each do |method|
89
92
  begin
90
93
  format = method.to_s
91
94
  format.slice!('cast_')
92
- next if format == 'default'
95
+ next if format == TableSchema::DEFAULTS[:format]
93
96
  converter.send(method, col)
94
97
  guessed_format = format
95
98
  break
96
99
  rescue TableSchema::Exception
100
+ next
97
101
  end
98
102
  end
99
103
  guessed_format
@@ -116,7 +120,7 @@ module TableSchema
116
120
  rv = sorted_counts[0][0]
117
121
  end
118
122
 
119
- @schema['fields'][v].merge!(rv)
123
+ @schema[:fields][v].merge!(rv)
120
124
  end
121
125
 
122
126
  end
@@ -128,7 +132,6 @@ module TableSchema
128
132
  'boolean',
129
133
  'number',
130
134
  'integer',
131
- 'null',
132
135
  'date',
133
136
  'time',
134
137
  'datetime',