tableschema 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e38a1a391b93666fb188277be7cad9a1d553a315
4
- data.tar.gz: 86cba39876c3d6111942964cef2b812c456d4ba5
3
+ metadata.gz: af9d2ac90e521ace72b172703da49b220a957978
4
+ data.tar.gz: 95a2e84830de62dbe00e0b4a107222b60d48bdd4
5
5
  SHA512:
6
- metadata.gz: 4b593795d0ca14c17d876bbc28941454ad956ba72d4fc3d645eea82eb0631716b61382efd7a466cd4320ab133cea27534b66aa373a9e408b17c5bbf8323c73c4
7
- data.tar.gz: 28b6e768e4190537b316ceb5ae8f45731b90114a664aa7ae99a7acd3e30a70df099e155af7c0078f6a72ff9b6c8c67eee4803a4e2cac3a1930b91f8449a44f2d
6
+ metadata.gz: e9de112e6d3f5bc137fde4fd9e972ca2ff97a6b818e3a66e184bc3d0868c9e77e71221ad76bbad56a2014753a24bef71705607063b08a96afa730b6dbd4114d0
7
+ data.tar.gz: 5caf4a5bde09a437ccf4d7a6cb91ae44fc07395366179ac682b6ba9b0a90dc966a5d5c51f2f80b70ae0911250aa5244e59b0b77e9df8821901929663807f631b
@@ -2,8 +2,8 @@
2
2
  language: ruby
3
3
 
4
4
  rvm:
5
- - 2.3.1
6
- - 2.4.1
5
+ - 2.3.3
6
+ - 2.4.0
7
7
 
8
8
  before_install:
9
9
  gem install bundler -v 1.11.2
data/README.md CHANGED
@@ -46,11 +46,11 @@ The gem `jsontableschema` is no longer maintained. Here are the steps to transit
46
46
  2. Replace module name `JsonTableSchema` with module name `TableSchema`. For example:
47
47
 
48
48
  ```ruby
49
- JsonTableSchema::Table.infer_schema(csv)
49
+ JsonTableSchema::Table.new(source, schema)
50
50
  ```
51
51
  with
52
52
  ```ruby
53
- TableSchema::Table.infer_schema(csv)
53
+ TableSchema::Table.new(source, schema)
54
54
  ```
55
55
 
56
56
  ## Usage
@@ -91,9 +91,9 @@ table.read
91
91
  ```
92
92
 
93
93
  Both `iter` and `read` take the optional parameters:
94
- - `row_limit`: integer, default `nil` - stop at this many rows
95
- - `cast`: boolean, default `true` - cast values for each row
96
94
  - `keyed`: boolean, default: `false` - return the rows as Hashes with headers as keys
95
+ - `cast`: boolean, default `true` - cast values for each row
96
+ - `limit`: integer, default `nil` - stop at this many rows
97
97
 
98
98
  ### Infer a schema
99
99
 
@@ -102,7 +102,8 @@ If you don't have a schema for a CSV, and want to generate one, you can infer a
102
102
  ```ruby
103
103
  csv = 'https://github.com/frictionlessdata/tableschema-rb/raw/master/spec/fixtures/simple_data.csv' # Can also be a url or array of arrays
104
104
 
105
- table = TableSchema::Table.infer_schema(csv)
105
+ table = TableSchema::Table.new(csv, nil)
106
+ table.infer()
106
107
  table.schema
107
108
  #=> {:fields=>[{:name=>"id", :title=>"", :description=>"", :type=>"integer", :format=>"default", :constraints=>{}}, {:name=>"title", :title=>"", :description=>"", :type=>"string", :format=>"default", :constraints=>{}}]}
108
109
  ```
@@ -169,26 +170,16 @@ schema_hash = {
169
170
  }
170
171
  schema = TableSchema::Schema.new(schema_hash)
171
172
 
172
- schema.headers
173
+ schema.field_names
173
174
  #=> ["id", "height"]
174
- schema.required_headers
175
- #=> ["id"]
176
175
  schema.fields
177
176
  #=> [{:name=>"id", :type=>"string", :constraints=>{:required=>true}, :format=>"default"}, {:name=>"height", :type=>"number", :format=>"default", :constraints=>{}}]
178
- schema.primary_keys
177
+ schema.primary_key
179
178
  #=> ["id"]
180
179
  schema.foreign_keys
181
180
  # => [{:fields=>"state", :reference=>{:resource=>"the-resource", :fields=>"state_id"}}]
182
181
  schema.get_field('id')
183
182
  # => {:name=>"id", :type=>"string", :constraints=>{:required=>true}, :format=>"default"}
184
- schema.has_field?('foo')
185
- #=> false
186
- schema.get_type('id')
187
- #=> 'string'
188
- schema.get_fields_by_type('string')
189
- # => [{:name=>"id", :type=>"string", :constraints=>{:required=>true}, :format=>"default"}, {:name=>"state", :type=>"string", :format=>"default", :constraints=>{}}]
190
- schema.get_constraints('id')
191
- # => {:required=>true}
192
183
  ```
193
184
 
194
185
  #### Cast row
@@ -24,13 +24,8 @@
24
24
  "properties": {
25
25
  "name": {
26
26
  "title": "Name",
27
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
28
- "type": "string",
29
- "pattern": "^([-a-z0-9._/])+$",
30
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
31
- "examples": [
32
- "{\n \"name\": \"my-nice-name\"\n}\n"
33
- ]
27
+ "description": "A name for this field.",
28
+ "type": "string"
34
29
  },
35
30
  "title": {
36
31
  "title": "Title",
@@ -125,13 +120,8 @@
125
120
  "properties": {
126
121
  "name": {
127
122
  "title": "Name",
128
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
129
- "type": "string",
130
- "pattern": "^([-a-z0-9._/])+$",
131
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
132
- "examples": [
133
- "{\n \"name\": \"my-nice-name\"\n}\n"
134
- ]
123
+ "description": "A name for this field.",
124
+ "type": "string"
135
125
  },
136
126
  "title": {
137
127
  "title": "Title",
@@ -162,6 +152,12 @@
162
152
  ],
163
153
  "default": "default"
164
154
  },
155
+ "bareNumber": {
156
+ "type": "boolean",
157
+ "title": "bareNumber",
158
+ "description": "a boolean field with a default of `true`. If `true` the physical contents of this field must follow the formatting constraints already set out. If `false` the contents of this field may contain leading and/or trailing non-numeric characters (which implementors MUST therefore strip). The purpose of `bareNumber` is to allow publishers to publish numeric data that contains trailing characters such as percentages e.g. `95%` or leading characters such as currencies e.g. `€95` or `EUR 95`. Note that it is entirely up to implementors what, if anything, they do with stripped text.",
159
+ "default": true
160
+ },
165
161
  "decimalChar": {
166
162
  "type": "string",
167
163
  "description": "A string whose value is used to represent a decimal point within the number. The default value is `.`."
@@ -170,10 +166,6 @@
170
166
  "type": "string",
171
167
  "description": "A string whose value is used to group digits within the number. The default value is `null`. A common value is `,` e.g. '100,000'."
172
168
  },
173
- "currency": {
174
- "type": "string",
175
- "description": "A number that may include additional currency symbols."
176
- },
177
169
  "constraints": {
178
170
  "title": "Constraints",
179
171
  "description": "The following constraints are supported for `number` fields.",
@@ -257,13 +249,8 @@
257
249
  "properties": {
258
250
  "name": {
259
251
  "title": "Name",
260
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
261
- "type": "string",
262
- "pattern": "^([-a-z0-9._/])+$",
263
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
264
- "examples": [
265
- "{\n \"name\": \"my-nice-name\"\n}\n"
266
- ]
252
+ "description": "A name for this field.",
253
+ "type": "string"
267
254
  },
268
255
  "title": {
269
256
  "title": "Title",
@@ -294,6 +281,12 @@
294
281
  ],
295
282
  "default": "default"
296
283
  },
284
+ "bareNumber": {
285
+ "type": "boolean",
286
+ "title": "bareNumber",
287
+ "description": "a boolean field with a default of `true`. If `true` the physical contents of this field must follow the formatting constraints already set out. If `false` the contents of this field may contain leading and/or trailing non-numeric characters (which implementors MUST therefore strip). The purpose of `bareNumber` is to allow publishers to publish numeric data that contains trailing characters such as percentages e.g. `95%` or leading characters such as currencies e.g. `€95` or `EUR 95`. Note that it is entirely up to implementors what, if anything, they do with stripped text.",
288
+ "default": true
289
+ },
297
290
  "constraints": {
298
291
  "title": "Constraints",
299
292
  "description": "The following constraints are supported for `integer` fields.",
@@ -375,13 +368,8 @@
375
368
  "properties": {
376
369
  "name": {
377
370
  "title": "Name",
378
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
379
- "type": "string",
380
- "pattern": "^([-a-z0-9._/])+$",
381
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
382
- "examples": [
383
- "{\n \"name\": \"my-nice-name\"\n}\n"
384
- ]
371
+ "description": "A name for this field.",
372
+ "type": "string"
385
373
  },
386
374
  "title": {
387
375
  "title": "Title",
@@ -462,13 +450,8 @@
462
450
  "properties": {
463
451
  "name": {
464
452
  "title": "Name",
465
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
466
- "type": "string",
467
- "pattern": "^([-a-z0-9._/])+$",
468
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
469
- "examples": [
470
- "{\n \"name\": \"my-nice-name\"\n}\n"
471
- ]
453
+ "description": "A name for this field.",
454
+ "type": "string"
472
455
  },
473
456
  "title": {
474
457
  "title": "Title",
@@ -548,13 +531,8 @@
548
531
  "properties": {
549
532
  "name": {
550
533
  "title": "Name",
551
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
552
- "type": "string",
553
- "pattern": "^([-a-z0-9._/])+$",
554
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
555
- "examples": [
556
- "{\n \"name\": \"my-nice-name\"\n}\n"
557
- ]
534
+ "description": "A name for this field.",
535
+ "type": "string"
558
536
  },
559
537
  "title": {
560
538
  "title": "Title",
@@ -634,13 +612,8 @@
634
612
  "properties": {
635
613
  "name": {
636
614
  "title": "Name",
637
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
638
- "type": "string",
639
- "pattern": "^([-a-z0-9._/])+$",
640
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
641
- "examples": [
642
- "{\n \"name\": \"my-nice-name\"\n}\n"
643
- ]
615
+ "description": "A name for this field.",
616
+ "type": "string"
644
617
  },
645
618
  "title": {
646
619
  "title": "Title",
@@ -748,13 +721,8 @@
748
721
  "properties": {
749
722
  "name": {
750
723
  "title": "Name",
751
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
752
- "type": "string",
753
- "pattern": "^([-a-z0-9._/])+$",
754
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
755
- "examples": [
756
- "{\n \"name\": \"my-nice-name\"\n}\n"
757
- ]
724
+ "description": "A name for this field.",
725
+ "type": "string"
758
726
  },
759
727
  "title": {
760
728
  "title": "Title",
@@ -841,13 +809,8 @@
841
809
  "properties": {
842
810
  "name": {
843
811
  "title": "Name",
844
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
845
- "type": "string",
846
- "pattern": "^([-a-z0-9._/])+$",
847
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
848
- "examples": [
849
- "{\n \"name\": \"my-nice-name\"\n}\n"
850
- ]
812
+ "description": "A name for this field.",
813
+ "type": "string"
851
814
  },
852
815
  "title": {
853
816
  "title": "Title",
@@ -937,13 +900,8 @@
937
900
  "properties": {
938
901
  "name": {
939
902
  "title": "Name",
940
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
941
- "type": "string",
942
- "pattern": "^([-a-z0-9._/])+$",
943
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
944
- "examples": [
945
- "{\n \"name\": \"my-nice-name\"\n}\n"
946
- ]
903
+ "description": "A name for this field.",
904
+ "type": "string"
947
905
  },
948
906
  "title": {
949
907
  "title": "Title",
@@ -1038,13 +996,8 @@
1038
996
  "properties": {
1039
997
  "name": {
1040
998
  "title": "Name",
1041
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
1042
- "type": "string",
1043
- "pattern": "^([-a-z0-9._/])+$",
1044
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
1045
- "examples": [
1046
- "{\n \"name\": \"my-nice-name\"\n}\n"
1047
- ]
999
+ "description": "A name for this field.",
1000
+ "type": "string"
1048
1001
  },
1049
1002
  "title": {
1050
1003
  "title": "Title",
@@ -1146,13 +1099,8 @@
1146
1099
  "properties": {
1147
1100
  "name": {
1148
1101
  "title": "Name",
1149
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
1150
- "type": "string",
1151
- "pattern": "^([-a-z0-9._/])+$",
1152
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
1153
- "examples": [
1154
- "{\n \"name\": \"my-nice-name\"\n}\n"
1155
- ]
1102
+ "description": "A name for this field.",
1103
+ "type": "string"
1156
1104
  },
1157
1105
  "title": {
1158
1106
  "title": "Title",
@@ -1250,13 +1198,8 @@
1250
1198
  "properties": {
1251
1199
  "name": {
1252
1200
  "title": "Name",
1253
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
1254
- "type": "string",
1255
- "pattern": "^([-a-z0-9._/])+$",
1256
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
1257
- "examples": [
1258
- "{\n \"name\": \"my-nice-name\"\n}\n"
1259
- ]
1201
+ "description": "A name for this field.",
1202
+ "type": "string"
1260
1203
  },
1261
1204
  "title": {
1262
1205
  "title": "Title",
@@ -1352,13 +1295,8 @@
1352
1295
  "properties": {
1353
1296
  "name": {
1354
1297
  "title": "Name",
1355
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
1356
- "type": "string",
1357
- "pattern": "^([-a-z0-9._/])+$",
1358
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
1359
- "examples": [
1360
- "{\n \"name\": \"my-nice-name\"\n}\n"
1361
- ]
1298
+ "description": "A name for this field.",
1299
+ "type": "string"
1362
1300
  },
1363
1301
  "title": {
1364
1302
  "title": "Title",
@@ -1439,13 +1377,8 @@
1439
1377
  "properties": {
1440
1378
  "name": {
1441
1379
  "title": "Name",
1442
- "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
1443
- "type": "string",
1444
- "pattern": "^([-a-z0-9._/])+$",
1445
- "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
1446
- "examples": [
1447
- "{\n \"name\": \"my-nice-name\"\n}\n"
1448
- ]
1380
+ "description": "A name for this field.",
1381
+ "type": "string"
1449
1382
  },
1450
1383
  "title": {
1451
1384
  "title": "Title",
@@ -1539,9 +1472,9 @@
1539
1472
  "fields",
1540
1473
  "reference"
1541
1474
  ],
1542
- "properties": {
1543
- "oneOf": [
1544
- {
1475
+ "oneOf": [
1476
+ {
1477
+ "properties": {
1545
1478
  "fields": {
1546
1479
  "type": "array",
1547
1480
  "items": {
@@ -1572,8 +1505,10 @@
1572
1505
  }
1573
1506
  }
1574
1507
  }
1575
- },
1576
- {
1508
+ }
1509
+ },
1510
+ {
1511
+ "properties": {
1577
1512
  "fields": {
1578
1513
  "type": "string",
1579
1514
  "description": "Fields that make up the primary key."
@@ -1595,8 +1530,8 @@
1595
1530
  }
1596
1531
  }
1597
1532
  }
1598
- ]
1599
- }
1533
+ }
1534
+ ]
1600
1535
  },
1601
1536
  "examples": [
1602
1537
  "{\n \"foreignKeys\": [\n {\n \"fields\": \"state\",\n \"reference\": {\n \"resource\": \"the-resource\",\n \"fields\": \"state_id\"\n }\n }\n ]\n}\n",
@@ -32,8 +32,6 @@ require "tableschema/types/duration"
32
32
  require "tableschema/defaults"
33
33
 
34
34
  require "tableschema/field"
35
- require "tableschema/validate"
36
- require "tableschema/model"
37
35
  require "tableschema/schema"
38
36
  require "tableschema/table"
39
37
  require "tableschema/infer"
@@ -4,6 +4,9 @@ module TableSchema
4
4
  type: 'string',
5
5
  missing_values: [''],
6
6
  group_char: ',',
7
- decimal_char: '.'
7
+ decimal_char: '.',
8
+ true_values: ['true', 'True', 'TRUE', '1'],
9
+ false_values: ['false', 'False', 'FALSE', '0'],
10
+ bare_number: true,
8
11
  }.freeze
9
12
  end
@@ -4,7 +4,9 @@ module TableSchema
4
4
  class Field < Hash
5
5
  include TableSchema::Helpers
6
6
 
7
- attr_reader :name, :type, :format, :missing_values, :constraints
7
+ # Public
8
+
9
+ attr_reader :name, :type, :format, :required, :constraints
8
10
 
9
11
  def initialize(descriptor, missing_values=nil)
10
12
  self.merge! deep_symbolize_keys(descriptor)
@@ -12,6 +14,7 @@ module TableSchema
12
14
  @type = self[:type] = self.fetch(:type, TableSchema::DEFAULTS[:type])
13
15
  @format = self[:format] = self.fetch(:format, TableSchema::DEFAULTS[:format])
14
16
  @constraints = self[:constraints] = self.fetch(:constraints, {})
17
+ @required = @constraints.fetch(:required, false)
15
18
  @missing_values = missing_values || default_missing_values
16
19
  end
17
20
 
@@ -19,15 +22,15 @@ module TableSchema
19
22
  self.to_h
20
23
  end
21
24
 
22
- def cast_value(value, check_constraints: true)
25
+ def cast_value(value, constraints: true)
23
26
  cast_value = cast_type(value)
24
- return cast_value if check_constraints == false
27
+ return cast_value if constraints == false
25
28
  TableSchema::Constraints.new(self, cast_value).validate!
26
29
  cast_value
27
30
  end
28
31
 
29
- def test_value(value, check_constraints: true)
30
- cast_value(value, check_constraints: check_constraints)
32
+ def test_value(value, constraints: true)
33
+ cast_value(value, constraints: constraints)
31
34
  true
32
35
  rescue TableSchema::Exception
33
36
  false
@@ -41,6 +44,8 @@ module TableSchema
41
44
  end
42
45
  end
43
46
 
47
+ # Private
48
+
44
49
  private
45
50
 
46
51
  def default_missing_values
@@ -16,26 +16,6 @@ module TableSchema
16
16
  end
17
17
  end
18
18
 
19
- def convert_to_boolean(value)
20
- if value.is_a?(Boolean)
21
- return value
22
- elsif true_values.include?(value.to_s.downcase)
23
- true
24
- elsif false_values.include?(value.to_s.downcase)
25
- false
26
- else
27
- nil
28
- end
29
- end
30
-
31
- def true_values
32
- ['yes', 'y', 'true', 't', '1']
33
- end
34
-
35
- def false_values
36
- ['no', 'n', 'false', 'f', '0']
37
- end
38
-
39
19
  def get_class_for_type(type)
40
20
  "TableSchema::Types::#{type_class_lookup[type.to_sym] || 'String'}"
41
21
  end
@@ -1,12 +1,14 @@
1
+ require 'tableschema/defaults'
2
+
1
3
  module TableSchema
2
4
  class Schema < Hash
3
- include TableSchema::Validate
4
- include TableSchema::Model
5
5
  include TableSchema::Helpers
6
6
 
7
+ # Public
8
+
7
9
  attr_reader :errors
8
10
 
9
- def initialize(descriptor, case_insensitive_headers: false, strict: false)
11
+ def initialize(descriptor, strict: false, case_insensitive_headers: false)
10
12
  self.merge! deep_symbolize_keys(parse_schema(descriptor))
11
13
  @case_insensitive_headers = case_insensitive_headers
12
14
  @strict = strict
@@ -17,26 +19,60 @@ module TableSchema
17
19
  self
18
20
  end
19
21
 
22
+ def validate
23
+ @errors = Set.new(JSON::Validator.fully_validate(@profile, self))
24
+ check_primary_key
25
+ check_foreign_keys
26
+ @errors.empty?
27
+ end
28
+
29
+ def validate!
30
+ validate
31
+ raise SchemaException.new(@errors.first) unless @errors.empty?
32
+ true
33
+ end
34
+
20
35
  def descriptor
21
36
  self.to_h
22
37
  end
23
38
 
24
- def parse_schema(descriptor)
25
- if descriptor.class == Hash
26
- descriptor
27
- elsif descriptor.class == String
28
- begin
29
- JSON.parse(open(descriptor).read, symbolize_names: true)
30
- rescue Errno::ENOENT
31
- raise SchemaException.new("File not found at `#{descriptor}`")
32
- rescue OpenURI::HTTPError => e
33
- raise SchemaException.new("URL `#{descriptor}` returned #{e.message}")
34
- rescue JSON::ParserError
35
- raise SchemaException.new("File at `#{descriptor}` is not valid JSON")
36
- end
37
- else
38
- raise SchemaException.new("A schema must be a hash, path or URL")
39
- end
39
+ def primary_key
40
+ [self[:primaryKey]].flatten.reject { |k| k.nil? }
41
+ end
42
+
43
+ def foreign_keys
44
+ self[:foreignKeys] || []
45
+ end
46
+
47
+ def fields
48
+ self[:fields]
49
+ end
50
+
51
+ def field_names
52
+ fields.map { |f| transform(f[:name]) }
53
+ rescue NoMethodError
54
+ []
55
+ end
56
+
57
+ def get_field(field_name)
58
+ fields.find { |f| f[:name] == field_name }
59
+ end
60
+
61
+ def add_field(descriptor)
62
+ self[:fields].push(descriptor)
63
+ validate!
64
+ descriptor
65
+ rescue TableSchema::SchemaException => e
66
+ self[:fields].pop
67
+ raise e if @strict
68
+ nil
69
+ end
70
+
71
+ def remove_field(field_name)
72
+ field = get_field(field_name)
73
+ self[:fields].reject!{ |f| f.name == field_name }
74
+ validate
75
+ field
40
76
  end
41
77
 
42
78
  def cast_row(row, fail_fast: true)
@@ -66,5 +102,125 @@ module TableSchema
66
102
  true
67
103
  end
68
104
 
105
+ # Deprecated
106
+
107
+ alias :headers :field_names
108
+
109
+ def missing_values
110
+ self.fetch(:missingValues, TableSchema::DEFAULTS[:missing_values])
111
+ end
112
+
113
+ def get_type(field_name)
114
+ get_field(field_name)[:type]
115
+ end
116
+
117
+ def get_constraints(field_name)
118
+ get_field(field_name)[:constraints] || {}
119
+ end
120
+
121
+ def required_headers
122
+ fields.select { |f| f.fetch(:constraints, {}).fetch(:required, nil).to_s == 'true' }
123
+ .map { |f| transform(f[:name]) }
124
+ end
125
+
126
+ def unique_headers
127
+ fields.select { |f| f.fetch(:constraints, {}).fetch(:unique, nil).to_s == 'true' }
128
+ .map { |f| transform(f[:name]) }
129
+ end
130
+
131
+ def has_field?(field_name)
132
+ get_field(field_name) != nil
133
+ end
134
+
135
+ def get_fields_by_type(type)
136
+ fields.select { |f| f[:type] == type }
137
+ end
138
+
139
+ # Private
140
+
141
+ private
142
+
143
+ def parse_schema(descriptor)
144
+ if descriptor.class == Hash
145
+ descriptor
146
+ elsif descriptor.class == String
147
+ begin
148
+ JSON.parse(open(descriptor).read, symbolize_names: true)
149
+ rescue Errno::ENOENT
150
+ raise SchemaException.new("File not found at `#{descriptor}`")
151
+ rescue OpenURI::HTTPError => e
152
+ raise SchemaException.new("URL `#{descriptor}` returned #{e.message}")
153
+ rescue JSON::ParserError
154
+ raise SchemaException.new("File at `#{descriptor}` is not valid JSON")
155
+ end
156
+ else
157
+ raise SchemaException.new("A schema must be a hash, path or URL")
158
+ end
159
+ end
160
+
161
+ def transform(name)
162
+ name.downcase! if @case_insensitive_headers == true
163
+ name
164
+ end
165
+
166
+ def expand!
167
+ (self[:fields] || []).each do |f|
168
+ f[:type] = TableSchema::DEFAULTS[:type] if f[:type] == nil
169
+ f[:format] = TableSchema::DEFAULTS[:format] if f[:format] == nil
170
+ end
171
+ end
172
+
173
+ def load_fields!
174
+ self[:fields] = (self[:fields] || []).map { |f| TableSchema::Field.new(f, missing_values) }
175
+ end
176
+
177
+ def load_validator!
178
+ filepath = File.join(File.dirname(__FILE__), '..', 'profiles', 'table-schema.json')
179
+ @profile ||= JSON.parse(File.read(filepath), symbolize_names: true)
180
+ end
181
+
182
+ def check_primary_key
183
+ return if self[:primaryKey].nil?
184
+ primary_key.each { |pk| check_field_value(pk, 'primaryKey') }
185
+ end
186
+
187
+ def check_foreign_keys
188
+ return if self[:foreignKeys].nil?
189
+ self[:foreignKeys].each do |key|
190
+ if field_type_mismatch?(key)
191
+ add_error("A TableSchema `foreignKey.fields` value must be the same type as `foreignKey.reference.fields`")
192
+ end
193
+ if field_count_mismatch?(key)
194
+ add_error("A TableSchema `foreignKey.fields` must contain the same number of entries as `foreignKey.reference.fields`")
195
+ end
196
+ foreign_key_fields(key).each { |fk| check_field_value(fk, 'foreignKey.fields') }
197
+ if key.fetch(:reference).fetch(:resource).empty?
198
+ foreign_key_fields(key.fetch(:reference)).each { |fk| check_field_value(fk, 'foreignKey.reference.fields')}
199
+ end
200
+ end
201
+ end
202
+
203
+ def check_field_value(key, type)
204
+ if headers.select { |f| key == f }.count == 0
205
+ add_error("The TableSchema #{type} value `#{key}` is not found in any of the schema's field names")
206
+ end
207
+ end
208
+
209
+ def foreign_key_fields(key)
210
+ [key.fetch(:fields)].flatten
211
+ end
212
+
213
+ def field_count_mismatch?(key)
214
+ foreign_key_fields(key).count != foreign_key_fields(key.fetch(:reference)).count
215
+ end
216
+
217
+ def field_type_mismatch?(key)
218
+ key.fetch(:fields).class.name != key.fetch(:reference).fetch(:fields).class.name
219
+ end
220
+
221
+ def add_error(error)
222
+ @errors << error
223
+ end
224
+
69
225
  end
70
226
  end
@@ -1,27 +1,28 @@
1
1
  module TableSchema
2
2
  class Table
3
3
 
4
- attr_reader :schema, :headers
4
+ # Public
5
5
 
6
- def self.infer_schema(csv, csv_options: {})
7
- TableSchema::Table.new(csv, nil, csv_options)
8
- end
6
+ attr_reader :headers, :schema
9
7
 
10
8
  def initialize(csv, descriptor, csv_options: {})
11
9
  @csv_options = csv_options.merge(headers: true)
10
+ @descriptor = descriptor
12
11
  @csv = parse_csv(csv)
13
12
  @headers = initialize_headers
14
- @schema = descriptor.nil? ? infer_schema : TableSchema::Schema.new(descriptor)
15
- initialize_unique_colums
13
+ if !descriptor.nil?
14
+ @schema = TableSchema::Schema.new(@descriptor)
15
+ initialize_unique_colums
16
+ end
16
17
  end
17
18
 
18
- def iter(row_limit: nil, cast: true, keyed: false)
19
+ def iter(keyed: false, cast: true, limit: nil)
19
20
  unless block_given?
20
- return enum_for(:iter, row_limit: row_limit, cast: cast, keyed: keyed)
21
+ return enum_for(:iter, limit: limit, cast: cast, keyed: keyed)
21
22
  end
22
23
 
23
24
  @csv.each_with_index do |row, i|
24
- break if row_limit && (row_limit <= i)
25
+ break if limit && (limit <= i)
25
26
  if cast == true
26
27
  cast_values = @schema.cast_row(row)
27
28
  row = CSV::Row.new(@headers, cast_values)
@@ -38,11 +39,21 @@ module TableSchema
38
39
  @csv.rewind
39
40
  end
40
41
 
41
- def read(row_limit: nil, cast: true, keyed: false)
42
- iterator = self.iter(row_limit: row_limit, cast: cast, keyed: keyed)
42
+ def read(keyed: false, cast: true, limit: nil)
43
+ iterator = self.iter(keyed: keyed, cast: cast, limit: limit)
43
44
  iterator.to_a
44
45
  end
45
46
 
47
+ def infer()
48
+ if !@schema
49
+ inferer = TableSchema::Infer.new(@headers, @csv)
50
+ @schema = inferer.schema
51
+ initialize_unique_colums
52
+ @csv.rewind
53
+ end
54
+ @schema.descriptor
55
+ end
56
+
46
57
  def save(target)
47
58
  CSV.open(target, "wb", @csv_options) do |csv|
48
59
  csv << @headers
@@ -51,6 +62,8 @@ module TableSchema
51
62
  true
52
63
  end
53
64
 
65
+ # Private
66
+
54
67
  private
55
68
 
56
69
  def parse_csv(csv)
@@ -62,12 +75,6 @@ module TableSchema
62
75
  array.map { |row| row.to_csv(row_sep: nil) }.join("\r\n")
63
76
  end
64
77
 
65
- def infer_schema
66
- inferer = TableSchema::Infer.new(@headers, @csv)
67
- @csv.rewind
68
- inferer.schema
69
- end
70
-
71
78
  def initialize_headers
72
79
  headers = @csv.first.to_h.keys
73
80
  @csv.rewind
@@ -25,9 +25,17 @@ module TableSchema
25
25
  end
26
26
 
27
27
  def cast_default(value)
28
- value = convert_to_boolean(value)
29
- raise TableSchema::InvalidCast.new("#{value} is not a #{name}") if value.nil?
30
- value
28
+ true_values = @field.fetch(:trueValues, TableSchema::DEFAULTS[:true_values])
29
+ false_values = @field.fetch(:falseValues, TableSchema::DEFAULTS[:false_values])
30
+ if [true, false].include?(value)
31
+ return value
32
+ elsif true_values.include?(value)
33
+ return true
34
+ elsif false_values.include?(value)
35
+ return false
36
+ else
37
+ raise TableSchema::InvalidCast.new("#{value} is not a #{name}")
38
+ end
31
39
  end
32
40
 
33
41
  end
@@ -25,6 +25,10 @@ module TableSchema
25
25
  if value.is_a?(type)
26
26
  value
27
27
  else
28
+ bare_number = @field.fetch(:bareNumber, TableSchema::DEFAULTS[:bare_number])
29
+ if !bare_number
30
+ value = value.gsub(/((^\D*)|(\D*$))/, '')
31
+ end
28
32
  Integer(value)
29
33
  end
30
34
  rescue ArgumentError
@@ -48,28 +48,16 @@ module TableSchema
48
48
  else
49
49
  group_char = @field.fetch(:groupChar, TableSchema::DEFAULTS[:group_char])
50
50
  decimal_char = @field.fetch(:decimalChar, TableSchema::DEFAULTS[:decimal_char])
51
- formatted_value = value.gsub(group_char, '').gsub(decimal_char, '.')
52
- if formatted_value.match(percent_chars)
53
- process_percent(formatted_value)
54
- elsif @field.fetch(:currency, nil)
55
- process_currency(formatted_value)
56
- else
57
- Float(formatted_value)
51
+ bare_number = @field.fetch(:bareNumber, TableSchema::DEFAULTS[:bare_number])
52
+ formatted_value = value
53
+ formatted_value = formatted_value.gsub(group_char, '')
54
+ formatted_value = formatted_value.gsub(decimal_char, '.')
55
+ if !bare_number
56
+ formatted_value = formatted_value.gsub(/((^\D*)|(\D*$))/, '')
58
57
  end
58
+ Float(formatted_value)
59
59
  end
60
60
  end
61
-
62
- def process_percent(value)
63
- Float(value.gsub(percent_chars, '')) / 100
64
- end
65
-
66
- def process_currency(value)
67
- Float(value.gsub(@field[:currency], ''))
68
- end
69
-
70
- def percent_chars
71
- /%|‰|‱|%|﹪|٪/
72
- end
73
61
  end
74
62
  end
75
63
  end
@@ -1,3 +1,3 @@
1
1
  module TableSchema
2
- VERSION = "0.4.1".freeze
2
+ VERSION = "0.5.0".freeze
3
3
  end
@@ -26,7 +26,7 @@ Gem::Specification.new do |spec|
26
26
  spec.add_development_dependency "coveralls", "~> 0.8.13"
27
27
  spec.add_development_dependency "rubocop", "~> 0.49.1"
28
28
 
29
- spec.add_dependency "json-schema", "~> 2.6.0"
29
+ spec.add_dependency "json-schema", "~> 2.8.0"
30
30
  spec.add_dependency "uuid", "~> 2.3.8"
31
31
  spec.add_dependency "tod", "~> 2.1.0"
32
32
  spec.add_dependency "activesupport", "~> 5.1.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tableschema
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Open Knowledge Foundation
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-08-27 00:00:00.000000000 Z
11
+ date: 2017-08-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -114,14 +114,14 @@ dependencies:
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: 2.6.0
117
+ version: 2.8.0
118
118
  type: :runtime
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: 2.6.0
124
+ version: 2.8.0
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: uuid
127
127
  requirement: !ruby/object:Gem::Requirement
@@ -201,7 +201,6 @@ files:
201
201
  - lib/tableschema/field.rb
202
202
  - lib/tableschema/helpers.rb
203
203
  - lib/tableschema/infer.rb
204
- - lib/tableschema/model.rb
205
204
  - lib/tableschema/schema.rb
206
205
  - lib/tableschema/table.rb
207
206
  - lib/tableschema/types/any.rb
@@ -220,7 +219,6 @@ files:
220
219
  - lib/tableschema/types/time.rb
221
220
  - lib/tableschema/types/year.rb
222
221
  - lib/tableschema/types/yearmonth.rb
223
- - lib/tableschema/validate.rb
224
222
  - lib/tableschema/version.rb
225
223
  - tableschema.gemspec
226
224
  homepage: https://github.com/frictionlessdata/tableschema-rb
@@ -1,96 +0,0 @@
1
- require 'tableschema/defaults'
2
-
3
- module TableSchema
4
- module Model
5
-
6
- def headers
7
- fields.map { |f| transform(f[:name]) }
8
- rescue NoMethodError
9
- []
10
- end
11
-
12
- alias :field_names :headers
13
-
14
- def fields
15
- self[:fields]
16
- end
17
-
18
- def primary_keys
19
- [self[:primaryKey]].flatten.reject { |k| k.nil? }
20
- end
21
-
22
- def foreign_keys
23
- self[:foreignKeys] || []
24
- end
25
-
26
- def missing_values
27
- self.fetch(:missingValues, TableSchema::DEFAULTS[:missing_values])
28
- end
29
-
30
- def get_type(field_name)
31
- get_field(field_name)[:type]
32
- end
33
-
34
- def get_constraints(field_name)
35
- get_field(field_name)[:constraints] || {}
36
- end
37
-
38
- def required_headers
39
- fields.select { |f| f.fetch(:constraints, {}).fetch(:required, nil).to_s == 'true' }
40
- .map { |f| transform(f[:name]) }
41
- end
42
-
43
- def unique_headers
44
- fields.select { |f| f.fetch(:constraints, {}).fetch(:unique, nil).to_s == 'true' }
45
- .map { |f| transform(f[:name]) }
46
- end
47
-
48
- def has_field?(field_name)
49
- get_field(field_name) != nil
50
- end
51
-
52
- def get_field(field_name)
53
- fields.find { |f| f[:name] == field_name }
54
- end
55
-
56
- def get_fields_by_type(type)
57
- fields.select { |f| f[:type] == type }
58
- end
59
-
60
- def add_field(descriptor)
61
- self[:fields].push(descriptor)
62
- validate!
63
- descriptor
64
- rescue TableSchema::SchemaException => e
65
- self[:fields].pop
66
- raise e if @strict
67
- nil
68
- end
69
-
70
- def remove_field(field_name)
71
- field = get_field(field_name)
72
- self[:fields].reject!{ |f| f.name == field_name }
73
- validate
74
- field
75
- end
76
-
77
- private
78
-
79
- def transform(name)
80
- name.downcase! if @case_insensitive_headers == true
81
- name
82
- end
83
-
84
- def expand!
85
- (self[:fields] || []).each do |f|
86
- f[:type] = TableSchema::DEFAULTS[:type] if f[:type] == nil
87
- f[:format] = TableSchema::DEFAULTS[:format] if f[:format] == nil
88
- end
89
- end
90
-
91
- def load_fields!
92
- self[:fields] = (self[:fields] || []).map { |f| TableSchema::Field.new(f, missing_values) }
93
- end
94
-
95
- end
96
- end
@@ -1,70 +0,0 @@
1
- module TableSchema
2
- module Validate
3
-
4
- attr_reader :errors
5
-
6
- def load_validator!
7
- filepath = File.join(File.dirname(__FILE__), '..', 'profiles', 'table-schema.json')
8
- @profile ||= JSON.parse(File.read(filepath), symbolize_names: true)
9
- end
10
-
11
- def validate
12
- @errors = Set.new(JSON::Validator.fully_validate(@profile, self))
13
- check_primary_keys
14
- check_foreign_keys
15
- @errors.empty?
16
- end
17
-
18
- def validate!
19
- validate
20
- raise SchemaException.new(@errors.first) unless @errors.empty?
21
- true
22
- end
23
-
24
- private
25
-
26
- def check_primary_keys
27
- return if self[:primaryKey].nil?
28
- primary_keys.each { |pk| check_field_value(pk, 'primaryKey') }
29
- end
30
-
31
- def check_foreign_keys
32
- return if self[:foreignKeys].nil?
33
- self[:foreignKeys].each do |key|
34
- if field_type_mismatch?(key)
35
- add_error("A TableSchema `foreignKey.fields` value must be the same type as `foreignKey.reference.fields`")
36
- end
37
- if field_count_mismatch?(key)
38
- add_error("A TableSchema `foreignKey.fields` must contain the same number of entries as `foreignKey.reference.fields`")
39
- end
40
- foreign_key_fields(key).each { |fk| check_field_value(fk, 'foreignKey.fields') }
41
- if key.fetch(:reference).fetch(:resource).empty?
42
- foreign_key_fields(key.fetch(:reference)).each { |fk| check_field_value(fk, 'foreignKey.reference.fields')}
43
- end
44
- end
45
- end
46
-
47
- def check_field_value(key, type)
48
- if headers.select { |f| key == f }.count == 0
49
- add_error("The TableSchema #{type} value `#{key}` is not found in any of the schema's field names")
50
- end
51
- end
52
-
53
- def foreign_key_fields(key)
54
- [key.fetch(:fields)].flatten
55
- end
56
-
57
- def field_count_mismatch?(key)
58
- foreign_key_fields(key).count != foreign_key_fields(key.fetch(:reference)).count
59
- end
60
-
61
- def field_type_mismatch?(key)
62
- key.fetch(:fields).class.name != key.fetch(:reference).fetch(:fields).class.name
63
- end
64
-
65
- def add_error(error)
66
- @errors << error
67
- end
68
-
69
- end
70
- end