tableschema 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e38a1a391b93666fb188277be7cad9a1d553a315
4
- data.tar.gz: 86cba39876c3d6111942964cef2b812c456d4ba5
3
+ metadata.gz: af9d2ac90e521ace72b172703da49b220a957978
4
+ data.tar.gz: 95a2e84830de62dbe00e0b4a107222b60d48bdd4
5
5
  SHA512:
6
- metadata.gz: 4b593795d0ca14c17d876bbc28941454ad956ba72d4fc3d645eea82eb0631716b61382efd7a466cd4320ab133cea27534b66aa373a9e408b17c5bbf8323c73c4
7
- data.tar.gz: 28b6e768e4190537b316ceb5ae8f45731b90114a664aa7ae99a7acd3e30a70df099e155af7c0078f6a72ff9b6c8c67eee4803a4e2cac3a1930b91f8449a44f2d
6
+ metadata.gz: e9de112e6d3f5bc137fde4fd9e972ca2ff97a6b818e3a66e184bc3d0868c9e77e71221ad76bbad56a2014753a24bef71705607063b08a96afa730b6dbd4114d0
7
+ data.tar.gz: 5caf4a5bde09a437ccf4d7a6cb91ae44fc07395366179ac682b6ba9b0a90dc966a5d5c51f2f80b70ae0911250aa5244e59b0b77e9df8821901929663807f631b
@@ -2,8 +2,8 @@
2
2
  language: ruby
3
3
 
4
4
  rvm:
5
- - 2.3.1
6
- - 2.4.1
5
+ - 2.3.3
6
+ - 2.4.0
7
7
 
8
8
  before_install:
9
9
  gem install bundler -v 1.11.2
data/README.md CHANGED
@@ -46,11 +46,11 @@ The gem `jsontableschema` is no longer maintained. Here are the steps to transit
46
46
  2. Replace module name `JsonTableSchema` with module name `TableSchema`. For example:
47
47
 
48
48
  ```ruby
49
- JsonTableSchema::Table.infer_schema(csv)
49
+ JsonTableSchema::Table.new(source, schema)
50
50
  ```
51
51
  with
52
52
  ```ruby
53
- TableSchema::Table.infer_schema(csv)
53
+ TableSchema::Table.new(source, schema)
54
54
  ```
55
55
 
56
56
  ## Usage
@@ -91,9 +91,9 @@ table.read
91
91
  ```
92
92
 
93
93
  Both `iter` and `read` take the optional parameters:
94
- - `row_limit`: integer, default `nil` - stop at this many rows
95
- - `cast`: boolean, default `true` - cast values for each row
96
94
  - `keyed`: boolean, default: `false` - return the rows as Hashes with headers as keys
95
+ - `cast`: boolean, default `true` - cast values for each row
96
+ - `limit`: integer, default `nil` - stop at this many rows
97
97
 
98
98
  ### Infer a schema
99
99
 
@@ -102,7 +102,8 @@ If you don't have a schema for a CSV, and want to generate one, you can infer a
102
102
  ```ruby
103
103
  csv = 'https://github.com/frictionlessdata/tableschema-rb/raw/master/spec/fixtures/simple_data.csv' # Can also be a url or array of arrays
104
104
 
105
- table = TableSchema::Table.infer_schema(csv)
105
+ table = TableSchema::Table.new(csv, nil)
106
+ table.infer()
106
107
  table.schema
107
108
  #=> {:fields=>[{:name=>"id", :title=>"", :description=>"", :type=>"integer", :format=>"default", :constraints=>{}}, {:name=>"title", :title=>"", :description=>"", :type=>"string", :format=>"default", :constraints=>{}}]}
108
109
  ```
@@ -169,26 +170,16 @@ schema_hash = {
169
170
  }
170
171
  schema = TableSchema::Schema.new(schema_hash)
171
172
 
172
- schema.headers
173
+ schema.field_names
173
174
  #=> ["id", "height"]
174
- schema.required_headers
175
- #=> ["id"]
176
175
  schema.fields
177
176
  #=> [{:name=>"id", :type=>"string", :constraints=>{:required=>true}, :format=>"default"}, {:name=>"height", :type=>"number", :format=>"default", :constraints=>{}}]
178
- schema.primary_keys
177
+ schema.primary_key
179
178
  #=> ["id"]
180
179
  schema.foreign_keys
181
180
  # => [{:fields=>"state", :reference=>{:resource=>"the-resource", :fields=>"state_id"}}]
182
181
  schema.get_field('id')
183
182
  # => {:name=>"id", :type=>"string", :constraints=>{:required=>true}, :format=>"default"}
184
- schema.has_field?('foo')
185
- #=> false
186
- schema.get_type('id')
187
- #=> 'string'
188
- schema.get_fields_by_type('string')
189
- # => [{:name=>"id", :type=>"string", :constraints=>{:required=>true}, :format=>"default"}, {:name=>"state", :type=>"string", :format=>"default", :constraints=>{}}]
190
- schema.get_constraints('id')
191
- # => {:required=>true}
192
183
  ```
193
184
 
194
185
  #### Cast row
@@ -24,13 +24,8 @@
24
24
  "properties": {
25
25
  "name": {
26
26
  "title": "Name",
27
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
28
- "type": "string",
29
- "pattern": "^([-a-z0-9._/])+$",
30
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
31
- "examples": [
32
- "{\n \"name\": \"my-nice-name\"\n}\n"
33
- ]
27
+ "description": "A name for this field.",
28
+ "type": "string"
34
29
  },
35
30
  "title": {
36
31
  "title": "Title",
@@ -125,13 +120,8 @@
125
120
  "properties": {
126
121
  "name": {
127
122
  "title": "Name",
128
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
129
- "type": "string",
130
- "pattern": "^([-a-z0-9._/])+$",
131
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
132
- "examples": [
133
- "{\n \"name\": \"my-nice-name\"\n}\n"
134
- ]
123
+ "description": "A name for this field.",
124
+ "type": "string"
135
125
  },
136
126
  "title": {
137
127
  "title": "Title",
@@ -162,6 +152,12 @@
162
152
  ],
163
153
  "default": "default"
164
154
  },
155
+ "bareNumber": {
156
+ "type": "boolean",
157
+ "title": "bareNumber",
158
+ "description": "a boolean field with a default of `true`. If `true` the physical contents of this field must follow the formatting constraints already set out. If `false` the contents of this field may contain leading and/or trailing non-numeric characters (which implementors MUST therefore strip). The purpose of `bareNumber` is to allow publishers to publish numeric data that contains trailing characters such as percentages e.g. `95%` or leading characters such as currencies e.g. `€95` or `EUR 95`. Note that it is entirely up to implementors what, if anything, they do with stripped text.",
159
+ "default": true
160
+ },
165
161
  "decimalChar": {
166
162
  "type": "string",
167
163
  "description": "A string whose value is used to represent a decimal point within the number. The default value is `.`."
@@ -170,10 +166,6 @@
170
166
  "type": "string",
171
167
  "description": "A string whose value is used to group digits within the number. The default value is `null`. A common value is `,` e.g. '100,000'."
172
168
  },
173
- "currency": {
174
- "type": "string",
175
- "description": "A number that may include additional currency symbols."
176
- },
177
169
  "constraints": {
178
170
  "title": "Constraints",
179
171
  "description": "The following constraints are supported for `number` fields.",
@@ -257,13 +249,8 @@
257
249
  "properties": {
258
250
  "name": {
259
251
  "title": "Name",
260
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
261
- "type": "string",
262
- "pattern": "^([-a-z0-9._/])+$",
263
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
264
- "examples": [
265
- "{\n \"name\": \"my-nice-name\"\n}\n"
266
- ]
252
+ "description": "A name for this field.",
253
+ "type": "string"
267
254
  },
268
255
  "title": {
269
256
  "title": "Title",
@@ -294,6 +281,12 @@
294
281
  ],
295
282
  "default": "default"
296
283
  },
284
+ "bareNumber": {
285
+ "type": "boolean",
286
+ "title": "bareNumber",
287
+ "description": "a boolean field with a default of `true`. If `true` the physical contents of this field must follow the formatting constraints already set out. If `false` the contents of this field may contain leading and/or trailing non-numeric characters (which implementors MUST therefore strip). The purpose of `bareNumber` is to allow publishers to publish numeric data that contains trailing characters such as percentages e.g. `95%` or leading characters such as currencies e.g. `€95` or `EUR 95`. Note that it is entirely up to implementors what, if anything, they do with stripped text.",
288
+ "default": true
289
+ },
297
290
  "constraints": {
298
291
  "title": "Constraints",
299
292
  "description": "The following constraints are supported for `integer` fields.",
@@ -375,13 +368,8 @@
375
368
  "properties": {
376
369
  "name": {
377
370
  "title": "Name",
378
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
379
- "type": "string",
380
- "pattern": "^([-a-z0-9._/])+$",
381
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
382
- "examples": [
383
- "{\n \"name\": \"my-nice-name\"\n}\n"
384
- ]
371
+ "description": "A name for this field.",
372
+ "type": "string"
385
373
  },
386
374
  "title": {
387
375
  "title": "Title",
@@ -462,13 +450,8 @@
462
450
  "properties": {
463
451
  "name": {
464
452
  "title": "Name",
465
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
466
- "type": "string",
467
- "pattern": "^([-a-z0-9._/])+$",
468
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
469
- "examples": [
470
- "{\n \"name\": \"my-nice-name\"\n}\n"
471
- ]
453
+ "description": "A name for this field.",
454
+ "type": "string"
472
455
  },
473
456
  "title": {
474
457
  "title": "Title",
@@ -548,13 +531,8 @@
548
531
  "properties": {
549
532
  "name": {
550
533
  "title": "Name",
551
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
552
- "type": "string",
553
- "pattern": "^([-a-z0-9._/])+$",
554
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
555
- "examples": [
556
- "{\n \"name\": \"my-nice-name\"\n}\n"
557
- ]
534
+ "description": "A name for this field.",
535
+ "type": "string"
558
536
  },
559
537
  "title": {
560
538
  "title": "Title",
@@ -634,13 +612,8 @@
634
612
  "properties": {
635
613
  "name": {
636
614
  "title": "Name",
637
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
638
- "type": "string",
639
- "pattern": "^([-a-z0-9._/])+$",
640
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
641
- "examples": [
642
- "{\n \"name\": \"my-nice-name\"\n}\n"
643
- ]
615
+ "description": "A name for this field.",
616
+ "type": "string"
644
617
  },
645
618
  "title": {
646
619
  "title": "Title",
@@ -748,13 +721,8 @@
748
721
  "properties": {
749
722
  "name": {
750
723
  "title": "Name",
751
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
752
- "type": "string",
753
- "pattern": "^([-a-z0-9._/])+$",
754
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
755
- "examples": [
756
- "{\n \"name\": \"my-nice-name\"\n}\n"
757
- ]
724
+ "description": "A name for this field.",
725
+ "type": "string"
758
726
  },
759
727
  "title": {
760
728
  "title": "Title",
@@ -841,13 +809,8 @@
841
809
  "properties": {
842
810
  "name": {
843
811
  "title": "Name",
844
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
845
- "type": "string",
846
- "pattern": "^([-a-z0-9._/])+$",
847
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
848
- "examples": [
849
- "{\n \"name\": \"my-nice-name\"\n}\n"
850
- ]
812
+ "description": "A name for this field.",
813
+ "type": "string"
851
814
  },
852
815
  "title": {
853
816
  "title": "Title",
@@ -937,13 +900,8 @@
937
900
  "properties": {
938
901
  "name": {
939
902
  "title": "Name",
940
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
941
- "type": "string",
942
- "pattern": "^([-a-z0-9._/])+$",
943
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
944
- "examples": [
945
- "{\n \"name\": \"my-nice-name\"\n}\n"
946
- ]
903
+ "description": "A name for this field.",
904
+ "type": "string"
947
905
  },
948
906
  "title": {
949
907
  "title": "Title",
@@ -1038,13 +996,8 @@
1038
996
  "properties": {
1039
997
  "name": {
1040
998
  "title": "Name",
1041
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
1042
- "type": "string",
1043
- "pattern": "^([-a-z0-9._/])+$",
1044
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
1045
- "examples": [
1046
- "{\n \"name\": \"my-nice-name\"\n}\n"
1047
- ]
999
+ "description": "A name for this field.",
1000
+ "type": "string"
1048
1001
  },
1049
1002
  "title": {
1050
1003
  "title": "Title",
@@ -1146,13 +1099,8 @@
1146
1099
  "properties": {
1147
1100
  "name": {
1148
1101
  "title": "Name",
1149
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
1150
- "type": "string",
1151
- "pattern": "^([-a-z0-9._/])+$",
1152
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
1153
- "examples": [
1154
- "{\n \"name\": \"my-nice-name\"\n}\n"
1155
- ]
1102
+ "description": "A name for this field.",
1103
+ "type": "string"
1156
1104
  },
1157
1105
  "title": {
1158
1106
  "title": "Title",
@@ -1250,13 +1198,8 @@
1250
1198
  "properties": {
1251
1199
  "name": {
1252
1200
  "title": "Name",
1253
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
1254
- "type": "string",
1255
- "pattern": "^([-a-z0-9._/])+$",
1256
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
1257
- "examples": [
1258
- "{\n \"name\": \"my-nice-name\"\n}\n"
1259
- ]
1201
+ "description": "A name for this field.",
1202
+ "type": "string"
1260
1203
  },
1261
1204
  "title": {
1262
1205
  "title": "Title",
@@ -1352,13 +1295,8 @@
1352
1295
  "properties": {
1353
1296
  "name": {
1354
1297
  "title": "Name",
1355
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
1356
- "type": "string",
1357
- "pattern": "^([-a-z0-9._/])+$",
1358
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
1359
- "examples": [
1360
- "{\n \"name\": \"my-nice-name\"\n}\n"
1361
- ]
1298
+ "description": "A name for this field.",
1299
+ "type": "string"
1362
1300
  },
1363
1301
  "title": {
1364
1302
  "title": "Title",
@@ -1439,13 +1377,8 @@
1439
1377
  "properties": {
1440
1378
  "name": {
1441
1379
  "title": "Name",
1442
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
1443
- "type": "string",
1444
- "pattern": "^([-a-z0-9._/])+$",
1445
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
1446
- "examples": [
1447
- "{\n \"name\": \"my-nice-name\"\n}\n"
1448
- ]
1380
+ "description": "A name for this field.",
1381
+ "type": "string"
1449
1382
  },
1450
1383
  "title": {
1451
1384
  "title": "Title",
@@ -1539,9 +1472,9 @@
1539
1472
  "fields",
1540
1473
  "reference"
1541
1474
  ],
1542
- "properties": {
1543
- "oneOf": [
1544
- {
1475
+ "oneOf": [
1476
+ {
1477
+ "properties": {
1545
1478
  "fields": {
1546
1479
  "type": "array",
1547
1480
  "items": {
@@ -1572,8 +1505,10 @@
1572
1505
  }
1573
1506
  }
1574
1507
  }
1575
- },
1576
- {
1508
+ }
1509
+ },
1510
+ {
1511
+ "properties": {
1577
1512
  "fields": {
1578
1513
  "type": "string",
1579
1514
  "description": "Fields that make up the primary key."
@@ -1595,8 +1530,8 @@
1595
1530
  }
1596
1531
  }
1597
1532
  }
1598
- ]
1599
- }
1533
+ }
1534
+ ]
1600
1535
  },
1601
1536
  "examples": [
1602
1537
  "{\n \"foreignKeys\": [\n {\n \"fields\": \"state\",\n \"reference\": {\n \"resource\": \"the-resource\",\n \"fields\": \"state_id\"\n }\n }\n ]\n}\n",
@@ -32,8 +32,6 @@ require "tableschema/types/duration"
32
32
  require "tableschema/defaults"
33
33
 
34
34
  require "tableschema/field"
35
- require "tableschema/validate"
36
- require "tableschema/model"
37
35
  require "tableschema/schema"
38
36
  require "tableschema/table"
39
37
  require "tableschema/infer"
@@ -4,6 +4,9 @@ module TableSchema
4
4
  type: 'string',
5
5
  missing_values: [''],
6
6
  group_char: ',',
7
- decimal_char: '.'
7
+ decimal_char: '.',
8
+ true_values: ['true', 'True', 'TRUE', '1'],
9
+ false_values: ['false', 'False', 'FALSE', '0'],
10
+ bare_number: true,
8
11
  }.freeze
9
12
  end
@@ -4,7 +4,9 @@ module TableSchema
4
4
  class Field < Hash
5
5
  include TableSchema::Helpers
6
6
 
7
- attr_reader :name, :type, :format, :missing_values, :constraints
7
+ # Public
8
+
9
+ attr_reader :name, :type, :format, :required, :constraints
8
10
 
9
11
  def initialize(descriptor, missing_values=nil)
10
12
  self.merge! deep_symbolize_keys(descriptor)
@@ -12,6 +14,7 @@ module TableSchema
12
14
  @type = self[:type] = self.fetch(:type, TableSchema::DEFAULTS[:type])
13
15
  @format = self[:format] = self.fetch(:format, TableSchema::DEFAULTS[:format])
14
16
  @constraints = self[:constraints] = self.fetch(:constraints, {})
17
+ @required = @constraints.fetch(:required, false)
15
18
  @missing_values = missing_values || default_missing_values
16
19
  end
17
20
 
@@ -19,15 +22,15 @@ module TableSchema
19
22
  self.to_h
20
23
  end
21
24
 
22
- def cast_value(value, check_constraints: true)
25
+ def cast_value(value, constraints: true)
23
26
  cast_value = cast_type(value)
24
- return cast_value if check_constraints == false
27
+ return cast_value if constraints == false
25
28
  TableSchema::Constraints.new(self, cast_value).validate!
26
29
  cast_value
27
30
  end
28
31
 
29
- def test_value(value, check_constraints: true)
30
- cast_value(value, check_constraints: check_constraints)
32
+ def test_value(value, constraints: true)
33
+ cast_value(value, constraints: constraints)
31
34
  true
32
35
  rescue TableSchema::Exception
33
36
  false
@@ -41,6 +44,8 @@ module TableSchema
41
44
  end
42
45
  end
43
46
 
47
+ # Private
48
+
44
49
  private
45
50
 
46
51
  def default_missing_values
@@ -16,26 +16,6 @@ module TableSchema
16
16
  end
17
17
  end
18
18
 
19
- def convert_to_boolean(value)
20
- if value.is_a?(Boolean)
21
- return value
22
- elsif true_values.include?(value.to_s.downcase)
23
- true
24
- elsif false_values.include?(value.to_s.downcase)
25
- false
26
- else
27
- nil
28
- end
29
- end
30
-
31
- def true_values
32
- ['yes', 'y', 'true', 't', '1']
33
- end
34
-
35
- def false_values
36
- ['no', 'n', 'false', 'f', '0']
37
- end
38
-
39
19
  def get_class_for_type(type)
40
20
  "TableSchema::Types::#{type_class_lookup[type.to_sym] || 'String'}"
41
21
  end
@@ -1,12 +1,14 @@
1
+ require 'tableschema/defaults'
2
+
1
3
  module TableSchema
2
4
  class Schema < Hash
3
- include TableSchema::Validate
4
- include TableSchema::Model
5
5
  include TableSchema::Helpers
6
6
 
7
+ # Public
8
+
7
9
  attr_reader :errors
8
10
 
9
- def initialize(descriptor, case_insensitive_headers: false, strict: false)
11
+ def initialize(descriptor, strict: false, case_insensitive_headers: false)
10
12
  self.merge! deep_symbolize_keys(parse_schema(descriptor))
11
13
  @case_insensitive_headers = case_insensitive_headers
12
14
  @strict = strict
@@ -17,26 +19,60 @@ module TableSchema
17
19
  self
18
20
  end
19
21
 
22
+ def validate
23
+ @errors = Set.new(JSON::Validator.fully_validate(@profile, self))
24
+ check_primary_key
25
+ check_foreign_keys
26
+ @errors.empty?
27
+ end
28
+
29
+ def validate!
30
+ validate
31
+ raise SchemaException.new(@errors.first) unless @errors.empty?
32
+ true
33
+ end
34
+
20
35
  def descriptor
21
36
  self.to_h
22
37
  end
23
38
 
24
- def parse_schema(descriptor)
25
- if descriptor.class == Hash
26
- descriptor
27
- elsif descriptor.class == String
28
- begin
29
- JSON.parse(open(descriptor).read, symbolize_names: true)
30
- rescue Errno::ENOENT
31
- raise SchemaException.new("File not found at `#{descriptor}`")
32
- rescue OpenURI::HTTPError => e
33
- raise SchemaException.new("URL `#{descriptor}` returned #{e.message}")
34
- rescue JSON::ParserError
35
- raise SchemaException.new("File at `#{descriptor}` is not valid JSON")
36
- end
37
- else
38
- raise SchemaException.new("A schema must be a hash, path or URL")
39
- end
39
+ def primary_key
40
+ [self[:primaryKey]].flatten.reject { |k| k.nil? }
41
+ end
42
+
43
+ def foreign_keys
44
+ self[:foreignKeys] || []
45
+ end
46
+
47
+ def fields
48
+ self[:fields]
49
+ end
50
+
51
+ def field_names
52
+ fields.map { |f| transform(f[:name]) }
53
+ rescue NoMethodError
54
+ []
55
+ end
56
+
57
+ def get_field(field_name)
58
+ fields.find { |f| f[:name] == field_name }
59
+ end
60
+
61
+ def add_field(descriptor)
62
+ self[:fields].push(descriptor)
63
+ validate!
64
+ descriptor
65
+ rescue TableSchema::SchemaException => e
66
+ self[:fields].pop
67
+ raise e if @strict
68
+ nil
69
+ end
70
+
71
+ def remove_field(field_name)
72
+ field = get_field(field_name)
73
+ self[:fields].reject!{ |f| f.name == field_name }
74
+ validate
75
+ field
40
76
  end
41
77
 
42
78
  def cast_row(row, fail_fast: true)
@@ -66,5 +102,125 @@ module TableSchema
66
102
  true
67
103
  end
68
104
 
105
+ # Deprecated
106
+
107
+ alias :headers :field_names
108
+
109
+ def missing_values
110
+ self.fetch(:missingValues, TableSchema::DEFAULTS[:missing_values])
111
+ end
112
+
113
+ def get_type(field_name)
114
+ get_field(field_name)[:type]
115
+ end
116
+
117
+ def get_constraints(field_name)
118
+ get_field(field_name)[:constraints] || {}
119
+ end
120
+
121
+ def required_headers
122
+ fields.select { |f| f.fetch(:constraints, {}).fetch(:required, nil).to_s == 'true' }
123
+ .map { |f| transform(f[:name]) }
124
+ end
125
+
126
+ def unique_headers
127
+ fields.select { |f| f.fetch(:constraints, {}).fetch(:unique, nil).to_s == 'true' }
128
+ .map { |f| transform(f[:name]) }
129
+ end
130
+
131
+ def has_field?(field_name)
132
+ get_field(field_name) != nil
133
+ end
134
+
135
+ def get_fields_by_type(type)
136
+ fields.select { |f| f[:type] == type }
137
+ end
138
+
139
+ # Private
140
+
141
+ private
142
+
143
+ def parse_schema(descriptor)
144
+ if descriptor.class == Hash
145
+ descriptor
146
+ elsif descriptor.class == String
147
+ begin
148
+ JSON.parse(open(descriptor).read, symbolize_names: true)
149
+ rescue Errno::ENOENT
150
+ raise SchemaException.new("File not found at `#{descriptor}`")
151
+ rescue OpenURI::HTTPError => e
152
+ raise SchemaException.new("URL `#{descriptor}` returned #{e.message}")
153
+ rescue JSON::ParserError
154
+ raise SchemaException.new("File at `#{descriptor}` is not valid JSON")
155
+ end
156
+ else
157
+ raise SchemaException.new("A schema must be a hash, path or URL")
158
+ end
159
+ end
160
+
161
+ def transform(name)
162
+ name.downcase! if @case_insensitive_headers == true
163
+ name
164
+ end
165
+
166
+ def expand!
167
+ (self[:fields] || []).each do |f|
168
+ f[:type] = TableSchema::DEFAULTS[:type] if f[:type] == nil
169
+ f[:format] = TableSchema::DEFAULTS[:format] if f[:format] == nil
170
+ end
171
+ end
172
+
173
+ def load_fields!
174
+ self[:fields] = (self[:fields] || []).map { |f| TableSchema::Field.new(f, missing_values) }
175
+ end
176
+
177
+ def load_validator!
178
+ filepath = File.join(File.dirname(__FILE__), '..', 'profiles', 'table-schema.json')
179
+ @profile ||= JSON.parse(File.read(filepath), symbolize_names: true)
180
+ end
181
+
182
+ def check_primary_key
183
+ return if self[:primaryKey].nil?
184
+ primary_key.each { |pk| check_field_value(pk, 'primaryKey') }
185
+ end
186
+
187
+ def check_foreign_keys
188
+ return if self[:foreignKeys].nil?
189
+ self[:foreignKeys].each do |key|
190
+ if field_type_mismatch?(key)
191
+ add_error("A TableSchema `foreignKey.fields` value must be the same type as `foreignKey.reference.fields`")
192
+ end
193
+ if field_count_mismatch?(key)
194
+ add_error("A TableSchema `foreignKey.fields` must contain the same number of entries as `foreignKey.reference.fields`")
195
+ end
196
+ foreign_key_fields(key).each { |fk| check_field_value(fk, 'foreignKey.fields') }
197
+ if key.fetch(:reference).fetch(:resource).empty?
198
+ foreign_key_fields(key.fetch(:reference)).each { |fk| check_field_value(fk, 'foreignKey.reference.fields')}
199
+ end
200
+ end
201
+ end
202
+
203
+ def check_field_value(key, type)
204
+ if headers.select { |f| key == f }.count == 0
205
+ add_error("The TableSchema #{type} value `#{key}` is not found in any of the schema's field names")
206
+ end
207
+ end
208
+
209
+ def foreign_key_fields(key)
210
+ [key.fetch(:fields)].flatten
211
+ end
212
+
213
+ def field_count_mismatch?(key)
214
+ foreign_key_fields(key).count != foreign_key_fields(key.fetch(:reference)).count
215
+ end
216
+
217
+ def field_type_mismatch?(key)
218
+ key.fetch(:fields).class.name != key.fetch(:reference).fetch(:fields).class.name
219
+ end
220
+
221
+ def add_error(error)
222
+ @errors << error
223
+ end
224
+
69
225
  end
70
226
  end
@@ -1,27 +1,28 @@
1
1
  module TableSchema
2
2
  class Table
3
3
 
4
- attr_reader :schema, :headers
4
+ # Public
5
5
 
6
- def self.infer_schema(csv, csv_options: {})
7
- TableSchema::Table.new(csv, nil, csv_options)
8
- end
6
+ attr_reader :headers, :schema
9
7
 
10
8
  def initialize(csv, descriptor, csv_options: {})
11
9
  @csv_options = csv_options.merge(headers: true)
10
+ @descriptor = descriptor
12
11
  @csv = parse_csv(csv)
13
12
  @headers = initialize_headers
14
- @schema = descriptor.nil? ? infer_schema : TableSchema::Schema.new(descriptor)
15
- initialize_unique_colums
13
+ if !descriptor.nil?
14
+ @schema = TableSchema::Schema.new(@descriptor)
15
+ initialize_unique_colums
16
+ end
16
17
  end
17
18
 
18
- def iter(row_limit: nil, cast: true, keyed: false)
19
+ def iter(keyed: false, cast: true, limit: nil)
19
20
  unless block_given?
20
- return enum_for(:iter, row_limit: row_limit, cast: cast, keyed: keyed)
21
+ return enum_for(:iter, limit: limit, cast: cast, keyed: keyed)
21
22
  end
22
23
 
23
24
  @csv.each_with_index do |row, i|
24
- break if row_limit && (row_limit <= i)
25
+ break if limit && (limit <= i)
25
26
  if cast == true
26
27
  cast_values = @schema.cast_row(row)
27
28
  row = CSV::Row.new(@headers, cast_values)
@@ -38,11 +39,21 @@ module TableSchema
38
39
  @csv.rewind
39
40
  end
40
41
 
41
- def read(row_limit: nil, cast: true, keyed: false)
42
- iterator = self.iter(row_limit: row_limit, cast: cast, keyed: keyed)
42
+ def read(keyed: false, cast: true, limit: nil)
43
+ iterator = self.iter(keyed: keyed, cast: cast, limit: limit)
43
44
  iterator.to_a
44
45
  end
45
46
 
47
+ def infer()
48
+ if !@schema
49
+ inferer = TableSchema::Infer.new(@headers, @csv)
50
+ @schema = inferer.schema
51
+ initialize_unique_colums
52
+ @csv.rewind
53
+ end
54
+ @schema.descriptor
55
+ end
56
+
46
57
  def save(target)
47
58
  CSV.open(target, "wb", @csv_options) do |csv|
48
59
  csv << @headers
@@ -51,6 +62,8 @@ module TableSchema
51
62
  true
52
63
  end
53
64
 
65
+ # Private
66
+
54
67
  private
55
68
 
56
69
  def parse_csv(csv)
@@ -62,12 +75,6 @@ module TableSchema
62
75
  array.map { |row| row.to_csv(row_sep: nil) }.join("\r\n")
63
76
  end
64
77
 
65
- def infer_schema
66
- inferer = TableSchema::Infer.new(@headers, @csv)
67
- @csv.rewind
68
- inferer.schema
69
- end
70
-
71
78
  def initialize_headers
72
79
  headers = @csv.first.to_h.keys
73
80
  @csv.rewind
@@ -25,9 +25,17 @@ module TableSchema
25
25
  end
26
26
 
27
27
  def cast_default(value)
28
- value = convert_to_boolean(value)
29
- raise TableSchema::InvalidCast.new("#{value} is not a #{name}") if value.nil?
30
- value
28
+ true_values = @field.fetch(:trueValues, TableSchema::DEFAULTS[:true_values])
29
+ false_values = @field.fetch(:falseValues, TableSchema::DEFAULTS[:false_values])
30
+ if [true, false].include?(value)
31
+ return value
32
+ elsif true_values.include?(value)
33
+ return true
34
+ elsif false_values.include?(value)
35
+ return false
36
+ else
37
+ raise TableSchema::InvalidCast.new("#{value} is not a #{name}")
38
+ end
31
39
  end
32
40
 
33
41
  end
@@ -25,6 +25,10 @@ module TableSchema
25
25
  if value.is_a?(type)
26
26
  value
27
27
  else
28
+ bare_number = @field.fetch(:bareNumber, TableSchema::DEFAULTS[:bare_number])
29
+ if !bare_number
30
+ value = value.gsub(/((^\D*)|(\D*$))/, '')
31
+ end
28
32
  Integer(value)
29
33
  end
30
34
  rescue ArgumentError
@@ -48,28 +48,16 @@ module TableSchema
48
48
  else
49
49
  group_char = @field.fetch(:groupChar, TableSchema::DEFAULTS[:group_char])
50
50
  decimal_char = @field.fetch(:decimalChar, TableSchema::DEFAULTS[:decimal_char])
51
- formatted_value = value.gsub(group_char, '').gsub(decimal_char, '.')
52
- if formatted_value.match(percent_chars)
53
- process_percent(formatted_value)
54
- elsif @field.fetch(:currency, nil)
55
- process_currency(formatted_value)
56
- else
57
- Float(formatted_value)
51
+ bare_number = @field.fetch(:bareNumber, TableSchema::DEFAULTS[:bare_number])
52
+ formatted_value = value
53
+ formatted_value = formatted_value.gsub(group_char, '')
54
+ formatted_value = formatted_value.gsub(decimal_char, '.')
55
+ if !bare_number
56
+ formatted_value = formatted_value.gsub(/((^\D*)|(\D*$))/, '')
58
57
  end
58
+ Float(formatted_value)
59
59
  end
60
60
  end
61
-
62
- def process_percent(value)
63
- Float(value.gsub(percent_chars, '')) / 100
64
- end
65
-
66
- def process_currency(value)
67
- Float(value.gsub(@field[:currency], ''))
68
- end
69
-
70
- def percent_chars
71
- /%|‰|‱|%|﹪|٪/
72
- end
73
61
  end
74
62
  end
75
63
  end
@@ -1,3 +1,3 @@
1
1
  module TableSchema
2
- VERSION = "0.4.1".freeze
2
+ VERSION = "0.5.0".freeze
3
3
  end
@@ -26,7 +26,7 @@ Gem::Specification.new do |spec|
26
26
  spec.add_development_dependency "coveralls", "~> 0.8.13"
27
27
  spec.add_development_dependency "rubocop", "~> 0.49.1"
28
28
 
29
- spec.add_dependency "json-schema", "~> 2.6.0"
29
+ spec.add_dependency "json-schema", "~> 2.8.0"
30
30
  spec.add_dependency "uuid", "~> 2.3.8"
31
31
  spec.add_dependency "tod", "~> 2.1.0"
32
32
  spec.add_dependency "activesupport", "~> 5.1.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tableschema
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Open Knowledge Foundation
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-08-27 00:00:00.000000000 Z
11
+ date: 2017-08-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -114,14 +114,14 @@ dependencies:
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: 2.6.0
117
+ version: 2.8.0
118
118
  type: :runtime
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: 2.6.0
124
+ version: 2.8.0
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: uuid
127
127
  requirement: !ruby/object:Gem::Requirement
@@ -201,7 +201,6 @@ files:
201
201
  - lib/tableschema/field.rb
202
202
  - lib/tableschema/helpers.rb
203
203
  - lib/tableschema/infer.rb
204
- - lib/tableschema/model.rb
205
204
  - lib/tableschema/schema.rb
206
205
  - lib/tableschema/table.rb
207
206
  - lib/tableschema/types/any.rb
@@ -220,7 +219,6 @@ files:
220
219
  - lib/tableschema/types/time.rb
221
220
  - lib/tableschema/types/year.rb
222
221
  - lib/tableschema/types/yearmonth.rb
223
- - lib/tableschema/validate.rb
224
222
  - lib/tableschema/version.rb
225
223
  - tableschema.gemspec
226
224
  homepage: https://github.com/frictionlessdata/tableschema-rb
@@ -1,96 +0,0 @@
1
- require 'tableschema/defaults'
2
-
3
- module TableSchema
4
- module Model
5
-
6
- def headers
7
- fields.map { |f| transform(f[:name]) }
8
- rescue NoMethodError
9
- []
10
- end
11
-
12
- alias :field_names :headers
13
-
14
- def fields
15
- self[:fields]
16
- end
17
-
18
- def primary_keys
19
- [self[:primaryKey]].flatten.reject { |k| k.nil? }
20
- end
21
-
22
- def foreign_keys
23
- self[:foreignKeys] || []
24
- end
25
-
26
- def missing_values
27
- self.fetch(:missingValues, TableSchema::DEFAULTS[:missing_values])
28
- end
29
-
30
- def get_type(field_name)
31
- get_field(field_name)[:type]
32
- end
33
-
34
- def get_constraints(field_name)
35
- get_field(field_name)[:constraints] || {}
36
- end
37
-
38
- def required_headers
39
- fields.select { |f| f.fetch(:constraints, {}).fetch(:required, nil).to_s == 'true' }
40
- .map { |f| transform(f[:name]) }
41
- end
42
-
43
- def unique_headers
44
- fields.select { |f| f.fetch(:constraints, {}).fetch(:unique, nil).to_s == 'true' }
45
- .map { |f| transform(f[:name]) }
46
- end
47
-
48
- def has_field?(field_name)
49
- get_field(field_name) != nil
50
- end
51
-
52
- def get_field(field_name)
53
- fields.find { |f| f[:name] == field_name }
54
- end
55
-
56
- def get_fields_by_type(type)
57
- fields.select { |f| f[:type] == type }
58
- end
59
-
60
- def add_field(descriptor)
61
- self[:fields].push(descriptor)
62
- validate!
63
- descriptor
64
- rescue TableSchema::SchemaException => e
65
- self[:fields].pop
66
- raise e if @strict
67
- nil
68
- end
69
-
70
- def remove_field(field_name)
71
- field = get_field(field_name)
72
- self[:fields].reject!{ |f| f.name == field_name }
73
- validate
74
- field
75
- end
76
-
77
- private
78
-
79
- def transform(name)
80
- name.downcase! if @case_insensitive_headers == true
81
- name
82
- end
83
-
84
- def expand!
85
- (self[:fields] || []).each do |f|
86
- f[:type] = TableSchema::DEFAULTS[:type] if f[:type] == nil
87
- f[:format] = TableSchema::DEFAULTS[:format] if f[:format] == nil
88
- end
89
- end
90
-
91
- def load_fields!
92
- self[:fields] = (self[:fields] || []).map { |f| TableSchema::Field.new(f, missing_values) }
93
- end
94
-
95
- end
96
- end
@@ -1,70 +0,0 @@
1
- module TableSchema
2
- module Validate
3
-
4
- attr_reader :errors
5
-
6
- def load_validator!
7
- filepath = File.join(File.dirname(__FILE__), '..', 'profiles', 'table-schema.json')
8
- @profile ||= JSON.parse(File.read(filepath), symbolize_names: true)
9
- end
10
-
11
- def validate
12
- @errors = Set.new(JSON::Validator.fully_validate(@profile, self))
13
- check_primary_keys
14
- check_foreign_keys
15
- @errors.empty?
16
- end
17
-
18
- def validate!
19
- validate
20
- raise SchemaException.new(@errors.first) unless @errors.empty?
21
- true
22
- end
23
-
24
- private
25
-
26
- def check_primary_keys
27
- return if self[:primaryKey].nil?
28
- primary_keys.each { |pk| check_field_value(pk, 'primaryKey') }
29
- end
30
-
31
- def check_foreign_keys
32
- return if self[:foreignKeys].nil?
33
- self[:foreignKeys].each do |key|
34
- if field_type_mismatch?(key)
35
- add_error("A TableSchema `foreignKey.fields` value must be the same type as `foreignKey.reference.fields`")
36
- end
37
- if field_count_mismatch?(key)
38
- add_error("A TableSchema `foreignKey.fields` must contain the same number of entries as `foreignKey.reference.fields`")
39
- end
40
- foreign_key_fields(key).each { |fk| check_field_value(fk, 'foreignKey.fields') }
41
- if key.fetch(:reference).fetch(:resource).empty?
42
- foreign_key_fields(key.fetch(:reference)).each { |fk| check_field_value(fk, 'foreignKey.reference.fields')}
43
- end
44
- end
45
- end
46
-
47
- def check_field_value(key, type)
48
- if headers.select { |f| key == f }.count == 0
49
- add_error("The TableSchema #{type} value `#{key}` is not found in any of the schema's field names")
50
- end
51
- end
52
-
53
- def foreign_key_fields(key)
54
- [key.fetch(:fields)].flatten
55
- end
56
-
57
- def field_count_mismatch?(key)
58
- foreign_key_fields(key).count != foreign_key_fields(key.fetch(:reference)).count
59
- end
60
-
61
- def field_type_mismatch?(key)
62
- key.fetch(:fields).class.name != key.fetch(:reference).fetch(:fields).class.name
63
- end
64
-
65
- def add_error(error)
66
- @errors << error
67
- end
68
-
69
- end
70
- end