tableschema 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +21 -0
  3. data/.travis.yml +15 -1
  4. data/README.md +164 -129
  5. data/Rakefile +10 -1
  6. data/bin/console +2 -6
  7. data/{etc/schemas → lib/profiles}/geojson.json +0 -1
  8. data/lib/profiles/table-schema.json +1625 -0
  9. data/lib/profiles/topojson.json +311 -0
  10. data/lib/tableschema.rb +5 -3
  11. data/lib/tableschema/constraints/constraints.rb +12 -24
  12. data/lib/tableschema/constraints/enum.rb +6 -2
  13. data/lib/tableschema/constraints/max_length.rb +6 -2
  14. data/lib/tableschema/constraints/maximum.rb +12 -2
  15. data/lib/tableschema/constraints/min_length.rb +6 -2
  16. data/lib/tableschema/constraints/minimum.rb +12 -2
  17. data/lib/tableschema/constraints/pattern.rb +9 -2
  18. data/lib/tableschema/constraints/required.rb +6 -15
  19. data/lib/tableschema/constraints/unique.rb +12 -0
  20. data/lib/tableschema/defaults.rb +9 -0
  21. data/lib/tableschema/exceptions.rb +15 -2
  22. data/lib/tableschema/field.rb +39 -20
  23. data/lib/tableschema/helpers.rb +32 -15
  24. data/lib/tableschema/infer.rb +31 -28
  25. data/lib/tableschema/model.rb +57 -34
  26. data/lib/tableschema/schema.rb +40 -6
  27. data/lib/tableschema/table.rb +75 -26
  28. data/lib/tableschema/types/any.rb +1 -0
  29. data/lib/tableschema/types/array.rb +2 -1
  30. data/lib/tableschema/types/base.rb +9 -21
  31. data/lib/tableschema/types/date.rb +1 -0
  32. data/lib/tableschema/types/datetime.rb +1 -0
  33. data/lib/tableschema/types/duration.rb +31 -0
  34. data/lib/tableschema/types/geojson.rb +27 -5
  35. data/lib/tableschema/types/geopoint.rb +4 -3
  36. data/lib/tableschema/types/integer.rb +1 -0
  37. data/lib/tableschema/types/number.rb +40 -25
  38. data/lib/tableschema/types/object.rb +2 -1
  39. data/lib/tableschema/types/string.rb +8 -0
  40. data/lib/tableschema/types/time.rb +1 -0
  41. data/lib/tableschema/types/year.rb +34 -0
  42. data/lib/tableschema/types/yearmonth.rb +52 -0
  43. data/lib/tableschema/validate.rb +45 -29
  44. data/lib/tableschema/version.rb +1 -1
  45. data/tableschema.gemspec +2 -1
  46. metadata +31 -12
  47. data/etc/schemas/json-table-schema.json +0 -102
  48. data/lib/tableschema/data.rb +0 -60
  49. data/lib/tableschema/types/null.rb +0 -37
@@ -3,12 +3,22 @@ module TableSchema
3
3
  module Maximum
4
4
 
5
5
  def check_maximum
6
- if @value > parse_constraint(@constraints['maximum'])
7
- raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must not be more than #{@constraints['maximum']}")
6
+ if @field.type == 'yearmonth'
7
+ valid = Date.new(@value[:year], @value[:month]) <= Date.new(parsed_maximum[:year], parsed_maximum[:month])
8
+ else
9
+ valid = @value <= parsed_maximum
10
+ end
11
+
12
+ unless valid
13
+ raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` must not be more than #{@constraints[:maximum]}")
8
14
  end
9
15
  true
10
16
  end
11
17
 
18
+ def parsed_maximum
19
+ @field.cast_type(@constraints[:maximum])
20
+ end
21
+
12
22
  end
13
23
  end
14
24
  end
@@ -4,12 +4,16 @@ module TableSchema
4
4
 
5
5
  def check_min_length
6
6
  return if @value.nil?
7
- if @value.length < @constraints['minLength'].to_i
8
- raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must have a minimum length of #{@constraints['minLength']}")
7
+ if @value.length < parsed_min_length
8
+ raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` must have a minimum length of #{@constraints[:minLength]}")
9
9
  end
10
10
  true
11
11
  end
12
12
 
13
+ def parsed_min_length
14
+ @constraints[:minLength].to_i
15
+ end
16
+
13
17
  end
14
18
  end
15
19
  end
@@ -3,12 +3,22 @@ module TableSchema
3
3
  module Minimum
4
4
 
5
5
  def check_minimum
6
- if @value < parse_constraint(@constraints['minimum'])
7
- raise TableSchema::ConstraintError.new("The field `#{@field['name']}` must not be less than #{@constraints['minimum']}")
6
+ if @field.type == 'yearmonth'
7
+ valid = Date.new(@value[:year], @value[:month]) >= Date.new(parsed_minimum[:year], parsed_minimum[:month])
8
+ else
9
+ valid = @value >= parsed_minimum
10
+ end
11
+
12
+ unless valid
13
+ raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` must not be less than #{@constraints[:minimum]}")
8
14
  end
9
15
  true
10
16
  end
11
17
 
18
+ def parsed_minimum
19
+ @field.cast_type(@constraints[:minimum])
20
+ end
21
+
12
22
  end
13
23
  end
14
24
  end
@@ -3,8 +3,15 @@ module TableSchema
3
3
  module Pattern
4
4
 
5
5
  def check_pattern
6
- if !@value.to_json.match /#{@constraints['pattern']}/
7
- raise TableSchema::ConstraintError.new("The value for the field `#{@field['name']}` must match the pattern")
6
+ constraint = lambda { |value| value.match(/#{@constraints[:pattern]}/) }
7
+ if @field.type == 'yearmonth'
8
+ valid = constraint.call(Date.new(@value[:year], @value[:month]).strftime('%Y-%m'))
9
+ else
10
+ valid = constraint.call(@value.to_json)
11
+ end
12
+
13
+ unless valid
14
+ raise TableSchema::ConstraintError.new("The value for the field `#{@field[:name]}` must match the pattern")
8
15
  end
9
16
  true
10
17
  end
@@ -3,30 +3,21 @@ module TableSchema
3
3
  module Required
4
4
 
5
5
  def check_required
6
- if required? && is_empty?
7
- raise TableSchema::ConstraintError.new("The field `#{@field['name']}` requires a value")
6
+ if parsed_required == true && value_is_empty?
7
+ raise TableSchema::ConstraintError.new("The field `#{@field[:name]}` requires a value")
8
8
  end
9
9
  true
10
10
  end
11
11
 
12
12
  private
13
13
 
14
- def required?
15
- required == true && @field['type'] != 'null'
14
+ def value_is_empty?
15
+ @value.nil? || @value == ''
16
16
  end
17
17
 
18
- def is_empty?
19
- null_values.include?(@value)
18
+ def parsed_required
19
+ @constraints[:required].to_s == 'true'
20
20
  end
21
-
22
- def required
23
- @constraints['required'].to_s == 'true'
24
- end
25
-
26
- def null_values
27
- ['null', 'none', 'nil', 'nan', '-', '']
28
- end
29
-
30
21
  end
31
22
  end
32
23
  end
@@ -0,0 +1,12 @@
1
+ module TableSchema
2
+ class Constraints
3
+ module Unique
4
+
5
+ def check_unique
6
+ # This check is done in Table because it needs the previous values in the column
7
+ true
8
+ end
9
+
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,9 @@
1
+ module TableSchema
2
+ DEFAULTS = {
3
+ format: 'default',
4
+ type: 'string',
5
+ missing_values: [''],
6
+ group_char: ',',
7
+ decimal_char: '.'
8
+ }.freeze
9
+ end
@@ -1,6 +1,6 @@
1
1
  module TableSchema
2
2
  class Exception < ::Exception ; end
3
-
3
+
4
4
  class SchemaException < Exception
5
5
  attr_reader :message
6
6
 
@@ -9,20 +9,33 @@ module TableSchema
9
9
  end
10
10
  end
11
11
 
12
+ class MultipleInvalid < Exception
13
+ attr_reader :message, :errors
14
+
15
+ def initialize(message, errors=[])
16
+ @message = message
17
+ @errors = errors
18
+ end
19
+ end
20
+
12
21
  class InvalidFormat < Exception ; end
13
22
  class InvalidCast < Exception ; end
14
23
  class InvalidEmail < Exception ; end
15
24
  class InvalidURI < Exception ; end
16
25
  class InvalidUUID < Exception ; end
26
+ class InvalidBinary < Exception ; end
17
27
  class InvalidObjectType < Exception ; end
18
28
  class InvalidArrayType < Exception ; end
19
29
  class InvalidDateType < Exception ; end
20
30
  class InvalidTimeType < Exception ; end
21
31
  class InvalidDateTimeType < Exception ; end
32
+ class InvalidYearType < Exception; end
33
+ class InvalidYearMonthType < Exception; end
34
+ class InvalidDurationType < Exception; end
22
35
  class InvalidGeoJSONType < Exception ; end
36
+ class InvalidTopoJSONType < Exception ; end
23
37
  class InvalidGeoPointType < Exception ; end
24
38
  class ConstraintError < Exception ; end
25
39
  class ConstraintNotSupported < Exception ; end
26
40
  class ConversionError < Exception ; end
27
- class MultipleInvalid < Exception ; end
28
41
  end
@@ -1,40 +1,59 @@
1
+ require 'tableschema/defaults'
2
+
1
3
  module TableSchema
2
4
  class Field < Hash
3
5
  include TableSchema::Helpers
4
6
 
5
- attr_reader :type_class
6
-
7
- def initialize(descriptor)
8
- self.merge! descriptor
9
- @type_class = get_type
10
- end
7
+ attr_reader :name, :type, :format, :missing_values, :constraints
11
8
 
12
- def name
13
- self['name']
9
+ def initialize(descriptor, missing_values=nil)
10
+ self.merge! deep_symbolize_keys(descriptor)
11
+ @name = self[:name]
12
+ @type = self[:type] = self.fetch(:type, TableSchema::DEFAULTS[:type])
13
+ @format = self[:format] = self.fetch(:format, TableSchema::DEFAULTS[:format])
14
+ @constraints = self[:constraints] = self.fetch(:constraints, {})
15
+ @missing_values = missing_values || default_missing_values
14
16
  end
15
17
 
16
- def type
17
- self['type'] || 'string'
18
+ def descriptor
19
+ self.to_h
18
20
  end
19
21
 
20
- def format
21
- self['format'] || 'default'
22
+ def cast_value(value, check_constraints: true)
23
+ cast_value = cast_type(value)
24
+ return cast_value if check_constraints == false
25
+ TableSchema::Constraints.new(self, cast_value).validate!
26
+ cast_value
22
27
  end
23
28
 
24
- def constraints
25
- self['constraints'] || {}
29
+ def test_value(value, check_constraints: true)
30
+ cast_value(value, check_constraints: check_constraints)
31
+ true
32
+ rescue TableSchema::Exception
33
+ false
26
34
  end
27
35
 
28
- def cast_value(col)
29
- klass = get_class_for_type(type)
30
- converter = Kernel.const_get(klass).new(self)
31
- converter.cast(col)
36
+ def cast_type(value)
37
+ if is_null?(value)
38
+ nil
39
+ else
40
+ type_class.new(self).cast(value)
41
+ end
32
42
  end
33
43
 
34
44
  private
35
45
 
36
- def get_type
37
- Object.const_get get_class_for_type(type)
46
+ def default_missing_values
47
+ defaults = TableSchema::DEFAULTS[:missing_values]
48
+ @type == 'string' ? defaults - [''] : defaults
49
+ end
50
+
51
+ def type_class
52
+ Object.const_get get_class_for_type(@type)
53
+ end
54
+
55
+ def is_null?(value)
56
+ @missing_values.include?(value)
38
57
  end
39
58
 
40
59
  end
@@ -1,6 +1,21 @@
1
1
  module TableSchema
2
2
  module Helpers
3
3
 
4
+ def deep_symbolize_keys(descriptor)
5
+ case descriptor
6
+ when Hash
7
+ descriptor.inject({}) do |new_descriptor, (key, val)|
8
+ key_sym = key.respond_to?(:to_sym) ? key.to_sym : key
9
+ new_descriptor[key_sym] = deep_symbolize_keys(val)
10
+ new_descriptor
11
+ end
12
+ when Enumerable
13
+ descriptor.map{ |el| deep_symbolize_keys(el)}
14
+ else
15
+ descriptor
16
+ end
17
+ end
18
+
4
19
  def convert_to_boolean(value)
5
20
  if value.is_a?(Boolean)
6
21
  return value
@@ -22,25 +37,27 @@ module TableSchema
22
37
  end
23
38
 
24
39
  def get_class_for_type(type)
25
- "TableSchema::Types::#{type_class_lookup[type] || 'String'}"
40
+ "TableSchema::Types::#{type_class_lookup[type.to_sym] || 'String'}"
26
41
  end
27
42
 
28
43
  def type_class_lookup
29
44
  {
30
- 'any' => 'Any',
31
- 'array' => 'Array',
32
- 'base' => 'Base',
33
- 'boolean' => 'Boolean',
34
- 'date' => 'Date',
35
- 'datetime' => 'DateTime',
36
- 'geojson' => 'GeoJSON',
37
- 'geopoint' => 'GeoPoint',
38
- 'integer' => 'Integer',
39
- 'null' => 'Null',
40
- 'number' => 'Number',
41
- 'object' => 'Object',
42
- 'string' => 'String',
43
- 'time' => 'Time',
45
+ any: 'Any',
46
+ array: 'Array',
47
+ base: 'Base',
48
+ boolean: 'Boolean',
49
+ date: 'Date',
50
+ datetime: 'DateTime',
51
+ geojson: 'GeoJSON',
52
+ geopoint: 'GeoPoint',
53
+ integer: 'Integer',
54
+ number: 'Number',
55
+ object: 'Object',
56
+ string: 'String',
57
+ time: 'Time',
58
+ year: 'Year',
59
+ yearmonth: 'YearMonth',
60
+ duration: 'Duration',
44
61
  }
45
62
  end
46
63
 
@@ -1,3 +1,6 @@
1
+ require 'tableschema/defaults'
2
+ require 'tableschema/field'
3
+
1
4
  module TableSchema
2
5
  class Infer
3
6
 
@@ -5,41 +8,41 @@ module TableSchema
5
8
 
6
9
  attr_reader :schema
7
10
 
8
- def initialize(headers, rows, opts = {})
11
+ def initialize(headers, rows, explicit: false, primary_key: nil, row_limit: nil)
9
12
  @headers = headers
10
13
  @rows = rows
11
- @explicit = opts[:explicit]
12
- @primary_key = opts[:primary_key]
13
- @row_limit = opts[:row_limit]
14
+ @explicit = explicit
15
+ @primary_key = primary_key
16
+ @row_limit = row_limit
14
17
 
15
18
  @schema = {
16
- 'fields' => fields
19
+ fields: fields
17
20
  }
18
- @schema['primaryKey'] = @primary_key if @primary_key
21
+ @schema[:primaryKey] = @primary_key if @primary_key
19
22
  infer!
20
23
  end
21
24
 
22
25
  def fields
23
26
  @headers.map do |header|
24
27
  descriptor = {
25
- 'name' => header,
26
- 'title' => '',
27
- 'description' => '',
28
+ name: header,
29
+ title: '',
30
+ description: '',
28
31
  }
29
32
 
30
33
  constraints = {}
31
- constraints['required'] = @explicit === true
32
- constraints['unique'] = (header == @primary_key)
33
- constraints.delete_if { |k,v| v == false } unless @explicit === true
34
- descriptor['constraints'] = constraints if constraints.count > 0
35
- descriptor
34
+ constraints[:required] = @explicit === true
35
+ constraints[:unique] = (header == @primary_key)
36
+ constraints.delete_if { |_,v| v == false } unless @explicit === true
37
+ descriptor[:constraints] = constraints if constraints.count > 0
38
+ TableSchema::Field.new(descriptor)
36
39
  end
37
40
  end
38
41
 
39
42
  def infer!
40
43
  type_matches = []
41
- @rows.each_with_index do |row, i|
42
- break if @row_limit && i > @row_limit
44
+ @rows.each_with_index do |row, index|
45
+ break if @row_limit && index > @row_limit
43
46
  row = row.fields if row.class == CSV::Row
44
47
 
45
48
  row_length = row.count
@@ -53,9 +56,9 @@ module TableSchema
53
56
  row = row.push(fill).flatten
54
57
  end
55
58
 
56
- row.each_with_index do |col, i|
57
- type_matches[i] ||= []
58
- type_matches[i] << guess_type(col, i)
59
+ row.each_with_index do |col, idx|
60
+ type_matches[idx] ||= []
61
+ type_matches[idx] << guess_type(col, idx)
59
62
  end
60
63
 
61
64
  end
@@ -64,12 +67,12 @@ module TableSchema
64
67
  end
65
68
 
66
69
  def guess_type(col, index)
67
- guessed_type = 'string'
68
- guessed_format = 'default'
70
+ guessed_type = TableSchema::DEFAULTS[:type]
71
+ guessed_format = TableSchema::DEFAULTS[:format]
69
72
 
70
73
  available_types.reverse_each do |type|
71
74
  klass = get_class_for_type(type)
72
- converter = Kernel.const_get(klass).new(@schema['fields'][index])
75
+ converter = Kernel.const_get(klass).new(@schema[:fields][index])
73
76
  if converter.test(col) === true
74
77
  guessed_type = type
75
78
  guessed_format = guess_format(converter, col)
@@ -78,22 +81,23 @@ module TableSchema
78
81
  end
79
82
 
80
83
  {
81
- 'type' => guessed_type,
82
- 'format' => guessed_format
84
+ type: guessed_type,
85
+ format: guessed_format
83
86
  }
84
87
  end
85
88
 
86
89
  def guess_format(converter, col)
87
- guessed_format = 'default'
90
+ guessed_format = TableSchema::DEFAULTS[:format]
88
91
  converter.class.instance_methods.grep(/cast_/).each do |method|
89
92
  begin
90
93
  format = method.to_s
91
94
  format.slice!('cast_')
92
- next if format == 'default'
95
+ next if format == TableSchema::DEFAULTS[:format]
93
96
  converter.send(method, col)
94
97
  guessed_format = format
95
98
  break
96
99
  rescue TableSchema::Exception
100
+ next
97
101
  end
98
102
  end
99
103
  guessed_format
@@ -116,7 +120,7 @@ module TableSchema
116
120
  rv = sorted_counts[0][0]
117
121
  end
118
122
 
119
- @schema['fields'][v].merge!(rv)
123
+ @schema[:fields][v].merge!(rv)
120
124
  end
121
125
 
122
126
  end
@@ -128,7 +132,6 @@ module TableSchema
128
132
  'boolean',
129
133
  'number',
130
134
  'integer',
131
- 'null',
132
135
  'date',
133
136
  'time',
134
137
  'datetime',