csvlint 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +4 -0
- data/.github/workflows/push.yml +14 -2
- data/.ruby-version +1 -1
- data/.standard_todo.yml +43 -0
- data/Dockerfile +16 -0
- data/Gemfile +2 -2
- data/README.md +9 -9
- data/Rakefile +7 -7
- data/csvlint.gemspec +14 -16
- data/docker_notes_for_windows.txt +20 -0
- data/features/step_definitions/cli_steps.rb +11 -11
- data/features/step_definitions/information_steps.rb +4 -4
- data/features/step_definitions/parse_csv_steps.rb +11 -11
- data/features/step_definitions/schema_validation_steps.rb +10 -10
- data/features/step_definitions/sources_steps.rb +1 -1
- data/features/step_definitions/validation_errors_steps.rb +19 -19
- data/features/step_definitions/validation_info_steps.rb +9 -9
- data/features/step_definitions/validation_warnings_steps.rb +11 -11
- data/features/support/aruba.rb +6 -6
- data/features/support/earl_formatter.rb +39 -39
- data/features/support/env.rb +10 -11
- data/features/support/load_tests.rb +107 -103
- data/features/support/webmock.rb +2 -2
- data/lib/csvlint/cli.rb +133 -130
- data/lib/csvlint/csvw/column.rb +279 -280
- data/lib/csvlint/csvw/date_format.rb +90 -92
- data/lib/csvlint/csvw/metadata_error.rb +1 -3
- data/lib/csvlint/csvw/number_format.rb +40 -32
- data/lib/csvlint/csvw/property_checker.rb +714 -717
- data/lib/csvlint/csvw/table.rb +49 -52
- data/lib/csvlint/csvw/table_group.rb +24 -23
- data/lib/csvlint/error_collector.rb +2 -0
- data/lib/csvlint/error_message.rb +0 -1
- data/lib/csvlint/field.rb +153 -141
- data/lib/csvlint/schema.rb +34 -42
- data/lib/csvlint/validate.rb +161 -143
- data/lib/csvlint/version.rb +1 -1
- data/lib/csvlint.rb +22 -23
- data/spec/csvw/column_spec.rb +15 -16
- data/spec/csvw/date_format_spec.rb +5 -7
- data/spec/csvw/number_format_spec.rb +2 -4
- data/spec/csvw/table_group_spec.rb +103 -105
- data/spec/csvw/table_spec.rb +71 -73
- data/spec/field_spec.rb +116 -121
- data/spec/schema_spec.rb +129 -139
- data/spec/spec_helper.rb +6 -6
- data/spec/validator_spec.rb +167 -190
- metadata +22 -55
data/lib/csvlint/validate.rb
CHANGED
@@ -1,21 +1,20 @@
|
|
1
1
|
module Csvlint
|
2
|
-
|
3
2
|
class Validator
|
4
3
|
class LineCSV < CSV
|
5
|
-
ENCODE_RE = Hash.new do |h,str|
|
4
|
+
ENCODE_RE = Hash.new do |h, str|
|
6
5
|
h[str] = Regexp.new(str)
|
7
6
|
end
|
8
7
|
|
9
|
-
ENCODE_STR = Hash.new do |h,encoding_name|
|
10
|
-
h[encoding_name] = Hash.new do |h,chunks|
|
11
|
-
h[chunks] = chunks.map { |chunk| chunk.encode(encoding_name) }.join(
|
8
|
+
ENCODE_STR = Hash.new do |h, encoding_name|
|
9
|
+
h[encoding_name] = Hash.new do |h, chunks|
|
10
|
+
h[chunks] = chunks.map { |chunk| chunk.encode(encoding_name) }.join("")
|
12
11
|
end
|
13
12
|
end
|
14
13
|
|
15
|
-
ESCAPE_RE = Hash.new do |h,re_chars|
|
16
|
-
h[re_chars] = Hash.new do |h,re_esc|
|
17
|
-
h[re_esc] = Hash.new do |h,str|
|
18
|
-
h[str] = str.gsub(re_chars) {|c| re_esc + c}
|
14
|
+
ESCAPE_RE = Hash.new do |h, re_chars|
|
15
|
+
h[re_chars] = Hash.new do |h, re_esc|
|
16
|
+
h[re_esc] = Hash.new do |h, str|
|
17
|
+
h[str] = str.gsub(re_chars) { |c| re_esc + c }
|
19
18
|
end
|
20
19
|
end
|
21
20
|
end
|
@@ -38,7 +37,7 @@ module Csvlint
|
|
38
37
|
ESCAPE_RE[@re_chars][@re_esc][str]
|
39
38
|
end
|
40
39
|
|
41
|
-
if RUBY_VERSION <
|
40
|
+
if RUBY_VERSION < "2.5"
|
42
41
|
# Optimization: Disable the CSV library's converters feature.
|
43
42
|
# @see https://github.com/ruby/ruby/blob/v2_2_3/lib/csv.rb#L2100
|
44
43
|
def init_converters(options, field_name = :converters)
|
@@ -55,11 +54,11 @@ module Csvlint
|
|
55
54
|
attr_reader :encoding, :content_type, :extension, :headers, :link_headers, :dialect, :csv_header, :schema, :data, :current_line
|
56
55
|
|
57
56
|
ERROR_MATCHERS = {
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
57
|
+
"Missing or stray quote" => :stray_quote,
|
58
|
+
"Illegal quoting" => :whitespace,
|
59
|
+
"Unclosed quoted field" => :unclosed_quote,
|
60
|
+
"Any value after quoted field isn't allowed" => :unclosed_quote,
|
61
|
+
"Unquoted fields do not allow \\r or \\n" => :line_breaks
|
63
62
|
}
|
64
63
|
|
65
64
|
def initialize(source, dialect = {}, schema = nil, options = {})
|
@@ -90,14 +89,14 @@ module Csvlint
|
|
90
89
|
end
|
91
90
|
|
92
91
|
def validate
|
93
|
-
if
|
92
|
+
if /.xls(x)?/.match?(@extension)
|
94
93
|
build_warnings(:excel, :context)
|
95
94
|
return
|
96
95
|
end
|
97
96
|
locate_schema unless @schema.instance_of?(Csvlint::Schema)
|
98
97
|
set_dialect
|
99
98
|
|
100
|
-
if @source.
|
99
|
+
if @source.instance_of?(String)
|
101
100
|
validate_url
|
102
101
|
else
|
103
102
|
validate_metadata
|
@@ -120,7 +119,11 @@ module Csvlint
|
|
120
119
|
request = Typhoeus::Request.new(@source, followlocation: true)
|
121
120
|
request.on_headers do |response|
|
122
121
|
@headers = response.headers || {}
|
123
|
-
@content_type =
|
122
|
+
@content_type = begin
|
123
|
+
response.headers["content-type"]
|
124
|
+
rescue
|
125
|
+
nil
|
126
|
+
end
|
124
127
|
@response_code = response.code
|
125
128
|
return build_errors(:not_found) if response.code == 404
|
126
129
|
validate_metadata
|
@@ -148,7 +151,7 @@ module Csvlint
|
|
148
151
|
else
|
149
152
|
validate_line(line, @current_line)
|
150
153
|
@leading = ""
|
151
|
-
@current_line
|
154
|
+
@current_line += 1
|
152
155
|
end
|
153
156
|
else
|
154
157
|
# If it's not a full line, then prepare to add it to the beginning of the next chunk
|
@@ -156,7 +159,7 @@ module Csvlint
|
|
156
159
|
end
|
157
160
|
rescue ArgumentError => ae
|
158
161
|
build_errors(:invalid_encoding, :structure, @current_line, nil, @current_line) unless @reported_invalid_encoding
|
159
|
-
@current_line
|
162
|
+
@current_line += 1
|
160
163
|
@reported_invalid_encoding = true
|
161
164
|
end
|
162
165
|
|
@@ -167,7 +170,7 @@ module Csvlint
|
|
167
170
|
@encoding = input.encoding.to_s
|
168
171
|
report_line_breaks(line)
|
169
172
|
parse_contents(input, line)
|
170
|
-
@lambda
|
173
|
+
@lambda&.call(self)
|
171
174
|
rescue ArgumentError => ae
|
172
175
|
build_errors(:invalid_encoding, :structure, @current_line, nil, index) unless @reported_invalid_encoding
|
173
176
|
@reported_invalid_encoding = true
|
@@ -204,8 +207,8 @@ module Csvlint
|
|
204
207
|
@errors += @schema.errors
|
205
208
|
all_errors += @schema.errors
|
206
209
|
@warnings += @schema.warnings
|
207
|
-
|
208
|
-
build_errors(:ragged_rows, :structure, current_line, nil, stream.to_s)
|
210
|
+
elsif !row.empty? && row.size != @expected_columns
|
211
|
+
build_errors(:ragged_rows, :structure, current_line, nil, stream.to_s)
|
209
212
|
end
|
210
213
|
end
|
211
214
|
end
|
@@ -228,8 +231,8 @@ module Csvlint
|
|
228
231
|
def validate_metadata
|
229
232
|
assumed_header = !@supplied_dialect
|
230
233
|
unless @headers.empty?
|
231
|
-
if @headers["content-type"]
|
232
|
-
@csv_header
|
234
|
+
if /text\/csv/.match?(@headers["content-type"])
|
235
|
+
@csv_header &&= true
|
233
236
|
assumed_header = @assumed_header.present?
|
234
237
|
end
|
235
238
|
if @headers["content-type"] =~ /header=(present|absent)/
|
@@ -237,19 +240,35 @@ module Csvlint
|
|
237
240
|
@csv_header = false if $1 == "absent"
|
238
241
|
assumed_header = false
|
239
242
|
end
|
240
|
-
build_warnings(:no_content_type, :context) if @content_type
|
241
|
-
build_errors(:wrong_content_type, :context) unless
|
243
|
+
build_warnings(:no_content_type, :context) if @content_type.nil?
|
244
|
+
build_errors(:wrong_content_type, :context) unless @content_type && @content_type =~ /text\/csv/
|
242
245
|
end
|
243
246
|
@header_processed = true
|
244
247
|
build_info_messages(:assumed_header, :structure) if assumed_header
|
245
248
|
|
246
|
-
@link_headers =
|
247
|
-
|
249
|
+
@link_headers = begin
|
250
|
+
@headers["link"].split(",")
|
251
|
+
rescue
|
252
|
+
nil
|
253
|
+
end
|
254
|
+
@link_headers&.each do |link_header|
|
248
255
|
match = LINK_HEADER_REGEXP.match(link_header)
|
249
|
-
uri =
|
250
|
-
|
256
|
+
uri = begin
|
257
|
+
match["uri"].gsub(/(^<|>$)/, "")
|
258
|
+
rescue
|
259
|
+
nil
|
260
|
+
end
|
261
|
+
rel = begin
|
262
|
+
match["rel-relationship"].gsub(/(^"|"$)/, "")
|
263
|
+
rescue
|
264
|
+
nil
|
265
|
+
end
|
251
266
|
param = match["param"]
|
252
|
-
param_value =
|
267
|
+
param_value = begin
|
268
|
+
match["param-value"].gsub(/(^"|"$)/, "")
|
269
|
+
rescue
|
270
|
+
nil
|
271
|
+
end
|
253
272
|
if rel == "describedby" && param == "type" && ["application/csvm+json", "application/ld+json", "application/json"].include?(param_value)
|
254
273
|
begin
|
255
274
|
url = URI.join(@source_url, uri)
|
@@ -265,14 +284,14 @@ module Csvlint
|
|
265
284
|
rescue OpenURI::HTTPError
|
266
285
|
end
|
267
286
|
end
|
268
|
-
end
|
287
|
+
end
|
269
288
|
end
|
270
289
|
|
271
290
|
def header?
|
272
291
|
@csv_header && @dialect["header"]
|
273
292
|
end
|
274
293
|
|
275
|
-
def report_line_breaks(line_no=nil)
|
294
|
+
def report_line_breaks(line_no = nil)
|
276
295
|
return unless @input[-1, 1].include?("\n") # Return straight away if there's no newline character - i.e. we're on the last line
|
277
296
|
line_break = get_line_break(@input)
|
278
297
|
@line_breaks << line_break
|
@@ -298,24 +317,24 @@ module Csvlint
|
|
298
317
|
schema_dialect = {}
|
299
318
|
end
|
300
319
|
@dialect = {
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
320
|
+
"header" => true,
|
321
|
+
"headerRowCount" => 1,
|
322
|
+
"delimiter" => ",",
|
323
|
+
"skipInitialSpace" => true,
|
324
|
+
"lineTerminator" => :auto,
|
325
|
+
"quoteChar" => '"',
|
326
|
+
"trim" => :true
|
308
327
|
}.merge(schema_dialect).merge(@dialect || {})
|
309
328
|
|
310
|
-
@csv_header
|
329
|
+
@csv_header &&= @dialect["header"]
|
311
330
|
@csv_options = dialect_to_csv_options(@dialect)
|
312
331
|
end
|
313
332
|
|
314
333
|
def validate_encoding
|
315
334
|
if @headers["content-type"]
|
316
|
-
if @headers["content-type"]
|
335
|
+
if !/charset=/.match?(@headers["content-type"])
|
317
336
|
build_warnings(:no_encoding, :context)
|
318
|
-
elsif @headers["content-type"]
|
337
|
+
elsif !/charset=utf-8/i.match?(@headers["content-type"])
|
319
338
|
build_warnings(:encoding, :context)
|
320
339
|
end
|
321
340
|
end
|
@@ -339,10 +358,10 @@ module Csvlint
|
|
339
358
|
end
|
340
359
|
|
341
360
|
def build_exception_messages(csvException, errChars, lineNo)
|
342
|
-
#TODO 1 - this is a change in logic, rather than straight refactor of previous error building, however original logic is bonkers
|
343
|
-
#TODO 2 - using .kind_of? is a very ugly fix here and it meant to work around instances where :auto symbol is preserved in @csv_options
|
361
|
+
# TODO 1 - this is a change in logic, rather than straight refactor of previous error building, however original logic is bonkers
|
362
|
+
# TODO 2 - using .kind_of? is a very ugly fix here and it meant to work around instances where :auto symbol is preserved in @csv_options
|
344
363
|
type = fetch_error(csvException)
|
345
|
-
if !@csv_options[:row_sep].
|
364
|
+
if !@csv_options[:row_sep].is_a?(Symbol) && [:unclosed_quote, :stray_quote].include?(type) && !@input.match(@csv_options[:row_sep])
|
346
365
|
build_linebreak_error
|
347
366
|
else
|
348
367
|
build_errors(type, :structure, lineNo, nil, errChars)
|
@@ -355,11 +374,11 @@ module Csvlint
|
|
355
374
|
|
356
375
|
def validate_header(header)
|
357
376
|
names = Set.new
|
358
|
-
header.map{|h| h.strip! } if @dialect["trim"] == :true
|
359
|
-
header.each_with_index do |name,i|
|
360
|
-
build_warnings(:empty_column_name, :schema, nil, i+1) if name == ""
|
377
|
+
header.map { |h| h.strip! } if @dialect["trim"] == :true
|
378
|
+
header.each_with_index do |name, i|
|
379
|
+
build_warnings(:empty_column_name, :schema, nil, i + 1) if name == ""
|
361
380
|
if names.include?(name)
|
362
|
-
build_warnings(:duplicate_column_name, :schema, nil, i+1)
|
381
|
+
build_warnings(:duplicate_column_name, :schema, nil, i + 1)
|
363
382
|
else
|
364
383
|
names << name
|
365
384
|
end
|
@@ -369,24 +388,28 @@ module Csvlint
|
|
369
388
|
@errors += @schema.errors
|
370
389
|
@warnings += @schema.warnings
|
371
390
|
end
|
372
|
-
|
391
|
+
valid?
|
373
392
|
end
|
374
393
|
|
375
394
|
def fetch_error(error)
|
376
395
|
e = error.message.match(/^(.+?)(?: [io]n)? \(?line \d+\)?\.?$/i)
|
377
|
-
message =
|
396
|
+
message = begin
|
397
|
+
e[1]
|
398
|
+
rescue
|
399
|
+
nil
|
400
|
+
end
|
378
401
|
ERROR_MATCHERS.fetch(message, :unknown_error)
|
379
402
|
end
|
380
403
|
|
381
404
|
def dialect_to_csv_options(dialect)
|
382
405
|
skipinitialspace = dialect["skipInitialSpace"] || true
|
383
406
|
delimiter = dialect["delimiter"]
|
384
|
-
delimiter
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
407
|
+
delimiter += " " if !skipinitialspace
|
408
|
+
{
|
409
|
+
col_sep: delimiter,
|
410
|
+
row_sep: dialect["lineTerminator"],
|
411
|
+
quote_char: dialect["quoteChar"],
|
412
|
+
skip_blanks: false
|
390
413
|
}
|
391
414
|
end
|
392
415
|
|
@@ -396,25 +419,25 @@ module Csvlint
|
|
396
419
|
@formats[i] ||= Hash.new(0)
|
397
420
|
|
398
421
|
format =
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
422
|
+
if col.strip[FORMATS[:numeric]]
|
423
|
+
:numeric
|
424
|
+
elsif uri?(col)
|
425
|
+
:uri
|
426
|
+
elsif possible_date?(col)
|
427
|
+
date_formats(col)
|
428
|
+
else
|
429
|
+
:string
|
430
|
+
end
|
408
431
|
|
409
432
|
@formats[i][format] += 1
|
410
433
|
end
|
411
434
|
end
|
412
435
|
|
413
436
|
def check_consistency
|
414
|
-
@formats.each_with_index do |format,i|
|
437
|
+
@formats.each_with_index do |format, i|
|
415
438
|
if format
|
416
439
|
total = format.values.reduce(:+).to_f
|
417
|
-
if format.none?{|_,count| count / total >= 0.9}
|
440
|
+
if format.none? { |_, count| count / total >= 0.9 }
|
418
441
|
build_warnings(:inconsistent_values, :schema, nil, i + 1)
|
419
442
|
end
|
420
443
|
end
|
@@ -430,17 +453,16 @@ module Csvlint
|
|
430
453
|
end
|
431
454
|
|
432
455
|
def locate_schema
|
433
|
-
|
434
456
|
@source_url = nil
|
435
457
|
warn_if_unsuccessful = false
|
436
458
|
case @source
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
459
|
+
when StringIO
|
460
|
+
return
|
461
|
+
when File
|
462
|
+
uri_parser = URI::DEFAULT_PARSER
|
463
|
+
@source_url = "file:#{uri_parser.escape(File.expand_path(@source))}"
|
464
|
+
else
|
465
|
+
@source_url = @source
|
444
466
|
end
|
445
467
|
unless @schema.nil?
|
446
468
|
if @schema.tables[@source_url]
|
@@ -450,7 +472,7 @@ module Csvlint
|
|
450
472
|
end
|
451
473
|
end
|
452
474
|
paths = []
|
453
|
-
if
|
475
|
+
if /^http(s)?/.match?(@source_url)
|
454
476
|
begin
|
455
477
|
well_known_uri = URI.join(@source_url, "/.well-known/csvm")
|
456
478
|
paths = URI.open(well_known_uri.to_s).read.split("\n")
|
@@ -459,26 +481,24 @@ module Csvlint
|
|
459
481
|
end
|
460
482
|
paths = ["{+url}-metadata.json", "csv-metadata.json"] if paths.empty?
|
461
483
|
paths.each do |template|
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
if schema.
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
build_warnings(:schema_mismatch, :context, nil, nil, @source_url, schema)
|
475
|
-
end
|
484
|
+
template = URITemplate.new(template)
|
485
|
+
path = template.expand("url" => @source_url)
|
486
|
+
url = URI.join(@source_url, path)
|
487
|
+
url = File.new(url.to_s.sub(/^file:/, "")) if /^file:/.match?(url.to_s)
|
488
|
+
schema = Schema.load_from_uri(url)
|
489
|
+
if schema.instance_of? Csvlint::Csvw::TableGroup
|
490
|
+
if schema.tables[@source_url]
|
491
|
+
@schema = schema
|
492
|
+
return
|
493
|
+
else
|
494
|
+
warn_if_unsuccessful = true
|
495
|
+
build_warnings(:schema_mismatch, :context, nil, nil, @source_url, schema)
|
476
496
|
end
|
477
|
-
rescue Errno::ENOENT
|
478
|
-
rescue OpenURI::HTTPError, URI::BadURIError, ArgumentError
|
479
|
-
rescue => e
|
480
|
-
raise e
|
481
497
|
end
|
498
|
+
rescue Errno::ENOENT
|
499
|
+
rescue OpenURI::HTTPError, URI::BadURIError, ArgumentError
|
500
|
+
rescue => e
|
501
|
+
raise e
|
482
502
|
end
|
483
503
|
build_warnings(:schema_mismatch, :context, nil, nil, @source_url, schema) if warn_if_unsuccessful
|
484
504
|
@schema = nil
|
@@ -487,31 +507,30 @@ module Csvlint
|
|
487
507
|
private
|
488
508
|
|
489
509
|
def parse_extension(source)
|
490
|
-
|
491
510
|
case source
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
511
|
+
when File
|
512
|
+
File.extname(source.path)
|
513
|
+
when IO
|
514
|
+
""
|
515
|
+
when StringIO
|
516
|
+
""
|
517
|
+
when Tempfile
|
518
|
+
# this is triggered when the revalidate dialect use case happens
|
519
|
+
""
|
520
|
+
else
|
521
|
+
begin
|
522
|
+
parsed = URI.parse(source)
|
523
|
+
File.extname(parsed.path)
|
524
|
+
rescue URI::InvalidURIError
|
525
|
+
""
|
526
|
+
end
|
508
527
|
end
|
509
528
|
end
|
510
529
|
|
511
530
|
def uri?(value)
|
512
531
|
if value.strip[FORMATS[:uri]]
|
513
532
|
uri = URI.parse(value)
|
514
|
-
uri.
|
533
|
+
uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
515
534
|
end
|
516
535
|
rescue URI::InvalidURIError
|
517
536
|
false
|
@@ -522,25 +541,25 @@ module Csvlint
|
|
522
541
|
end
|
523
542
|
|
524
543
|
def date_formats(col)
|
525
|
-
if col[FORMATS[:date_db]] && date_format?(Date, col,
|
544
|
+
if col[FORMATS[:date_db]] && date_format?(Date, col, "%Y-%m-%d")
|
526
545
|
:date_db
|
527
|
-
elsif col[FORMATS[:date_short]] && date_format?(Date, col,
|
546
|
+
elsif col[FORMATS[:date_short]] && date_format?(Date, col, "%e %b")
|
528
547
|
:date_short
|
529
|
-
elsif col[FORMATS[:date_rfc822]] && date_format?(Date, col,
|
548
|
+
elsif col[FORMATS[:date_rfc822]] && date_format?(Date, col, "%e %b %Y")
|
530
549
|
:date_rfc822
|
531
|
-
elsif col[FORMATS[:date_long]] && date_format?(Date, col,
|
550
|
+
elsif col[FORMATS[:date_long]] && date_format?(Date, col, "%B %e, %Y")
|
532
551
|
:date_long
|
533
|
-
elsif col[FORMATS[:dateTime_time]] && date_format?(Time, col,
|
552
|
+
elsif col[FORMATS[:dateTime_time]] && date_format?(Time, col, "%H:%M")
|
534
553
|
:dateTime_time
|
535
|
-
elsif col[FORMATS[:dateTime_hms]] && date_format?(Time, col,
|
554
|
+
elsif col[FORMATS[:dateTime_hms]] && date_format?(Time, col, "%H:%M:%S")
|
536
555
|
:dateTime_hms
|
537
|
-
elsif col[FORMATS[:dateTime_db]] && date_format?(Time, col,
|
556
|
+
elsif col[FORMATS[:dateTime_db]] && date_format?(Time, col, "%Y-%m-%d %H:%M:%S")
|
538
557
|
:dateTime_db
|
539
|
-
elsif col[FORMATS[:dateTime_iso8601]] && date_format?(Time, col,
|
558
|
+
elsif col[FORMATS[:dateTime_iso8601]] && date_format?(Time, col, "%Y-%m-%dT%H:%M:%SZ")
|
540
559
|
:dateTime_iso8601
|
541
|
-
elsif col[FORMATS[:dateTime_short]] && date_format?(Time, col,
|
560
|
+
elsif col[FORMATS[:dateTime_short]] && date_format?(Time, col, "%d %b %H:%M")
|
542
561
|
:dateTime_short
|
543
|
-
elsif col[FORMATS[:dateTime_long]] && date_format?(Time, col,
|
562
|
+
elsif col[FORMATS[:dateTime_long]] && date_format?(Time, col, "%B %d, %Y %H:%M")
|
544
563
|
:dateTime_long
|
545
564
|
else
|
546
565
|
:string
|
@@ -567,25 +586,25 @@ module Csvlint
|
|
567
586
|
end
|
568
587
|
|
569
588
|
FORMATS = {
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
589
|
+
string: nil,
|
590
|
+
numeric: /\A[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?\z/,
|
591
|
+
uri: /\Ahttps?:/,
|
592
|
+
date_db: /\A\d{4,}-\d\d-\d\d\z/, # "12345-01-01"
|
593
|
+
date_long: /\A(?:#{Date::MONTHNAMES.join('|')}) [ \d]\d, \d{4,}\z/, # "January 1, 12345"
|
594
|
+
date_rfc822: /\A[ \d]\d (?:#{Date::ABBR_MONTHNAMES.join('|')}) \d{4,}\z/, # " 1 Jan 12345"
|
595
|
+
date_short: /\A[ \d]\d (?:#{Date::ABBR_MONTHNAMES.join('|')})\z/, # "1 Jan"
|
596
|
+
dateTime_db: /\A\d{4,}-\d\d-\d\d \d\d:\d\d:\d\d\z/, # "12345-01-01 00:00:00"
|
597
|
+
dateTime_hms: /\A\d\d:\d\d:\d\d\z/, # "00:00:00"
|
598
|
+
dateTime_iso8601: /\A\d{4,}-\d\d-\d\dT\d\d:\d\d:\d\dZ\z/, # "12345-01-01T00:00:00Z"
|
599
|
+
dateTime_long: /\A(?:#{Date::MONTHNAMES.join('|')}) \d\d, \d{4,} \d\d:\d\d\z/, # "January 01, 12345 00:00"
|
600
|
+
dateTime_short: /\A\d\d (?:#{Date::ABBR_MONTHNAMES.join('|')}) \d\d:\d\d\z/, # "01 Jan 00:00"
|
601
|
+
dateTime_time: /\A\d\d:\d\d\z/ # "00:00"
|
583
602
|
}.freeze
|
584
603
|
|
585
604
|
URI_REGEXP = /(?<uri>.*?)/
|
586
|
-
TOKEN_REGEXP = /([
|
605
|
+
TOKEN_REGEXP = /([^()<>@,;:\\"\/\[\]?={} \t]+)/
|
587
606
|
QUOTED_STRING_REGEXP = /("[^"]*")/
|
588
|
-
SGML_NAME_REGEXP = /([A-Za-z][-A-Za-z0-9
|
607
|
+
SGML_NAME_REGEXP = /([A-Za-z][-A-Za-z0-9.]*)/
|
589
608
|
RELATIONSHIP_REGEXP = Regexp.new("(?<relationship>#{SGML_NAME_REGEXP}|(\"#{SGML_NAME_REGEXP}(\\s+#{SGML_NAME_REGEXP})*\"))")
|
590
609
|
REL_REGEXP = Regexp.new("(?<rel>\\s*rel\\s*=\\s*(?<rel-relationship>#{RELATIONSHIP_REGEXP}))")
|
591
610
|
REV_REGEXP = Regexp.new("(?<rev>\\s*rev\\s*=\\s*#{RELATIONSHIP_REGEXP})")
|
@@ -593,8 +612,7 @@ module Csvlint
|
|
593
612
|
ANCHOR_REGEXP = Regexp.new("(?<anchor>\\s*anchor\\s*=\\s*\\<#{URI_REGEXP}\\>)")
|
594
613
|
LINK_EXTENSION_REGEXP = Regexp.new("(?<link-extension>(?<param>#{TOKEN_REGEXP})(\\s*=\\s*(?<param-value>#{TOKEN_REGEXP}|#{QUOTED_STRING_REGEXP}))?)")
|
595
614
|
LINK_PARAM_REGEXP = Regexp.new("(#{REL_REGEXP}|#{REV_REGEXP}|#{TITLE_REGEXP}|#{ANCHOR_REGEXP}|#{LINK_EXTENSION_REGEXP})")
|
596
|
-
LINK_HEADER_REGEXP = Regexp.new("
|
597
|
-
POSSIBLE_DATE_REGEXP = Regexp.new("\\A(\\d|\\s\\d#{Date::ABBR_MONTHNAMES.join(
|
598
|
-
|
615
|
+
LINK_HEADER_REGEXP = Regexp.new("<#{URI_REGEXP}>(\\s*;\\s*#{LINK_PARAM_REGEXP})*")
|
616
|
+
POSSIBLE_DATE_REGEXP = Regexp.new("\\A(\\d|\\s\\d#{Date::ABBR_MONTHNAMES.join("|")}#{Date::MONTHNAMES.join("|")})")
|
599
617
|
end
|
600
618
|
end
|
data/lib/csvlint/version.rb
CHANGED
data/lib/csvlint.rb
CHANGED
@@ -1,27 +1,26 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require 'typhoeus'
|
1
|
+
require "csv"
|
2
|
+
require "date"
|
3
|
+
require "open-uri"
|
4
|
+
require "tempfile"
|
5
|
+
require "typhoeus"
|
7
6
|
|
8
|
-
require
|
9
|
-
require
|
10
|
-
require
|
11
|
-
require
|
12
|
-
require
|
7
|
+
require "active_support/core_ext/date/conversions"
|
8
|
+
require "active_support/core_ext/time/conversions"
|
9
|
+
require "active_support/core_ext/object"
|
10
|
+
require "open_uri_redirections"
|
11
|
+
require "uri_template"
|
13
12
|
|
14
|
-
require
|
15
|
-
require
|
16
|
-
require
|
17
|
-
require
|
13
|
+
require "csvlint/error_message"
|
14
|
+
require "csvlint/error_collector"
|
15
|
+
require "csvlint/validate"
|
16
|
+
require "csvlint/field"
|
18
17
|
|
19
|
-
require
|
20
|
-
require
|
21
|
-
require
|
22
|
-
require
|
23
|
-
require
|
24
|
-
require
|
25
|
-
require
|
18
|
+
require "csvlint/csvw/metadata_error"
|
19
|
+
require "csvlint/csvw/number_format"
|
20
|
+
require "csvlint/csvw/date_format"
|
21
|
+
require "csvlint/csvw/property_checker"
|
22
|
+
require "csvlint/csvw/column"
|
23
|
+
require "csvlint/csvw/table"
|
24
|
+
require "csvlint/csvw/table_group"
|
26
25
|
|
27
|
-
require
|
26
|
+
require "csvlint/schema"
|