csvlint 0.4.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/dependabot.yml +11 -0
- data/.github/workflows/push.yml +35 -0
- data/.gitignore +1 -0
- data/.ruby-version +1 -1
- data/.standard_todo.yml +43 -0
- data/CHANGELOG.md +38 -0
- data/Dockerfile +16 -0
- data/Gemfile +2 -2
- data/README.md +13 -10
- data/Rakefile +7 -7
- data/bin/create_schema +2 -2
- data/csvlint.gemspec +19 -22
- data/docker_notes_for_windows.txt +20 -0
- data/features/step_definitions/cli_steps.rb +11 -11
- data/features/step_definitions/information_steps.rb +4 -4
- data/features/step_definitions/parse_csv_steps.rb +11 -11
- data/features/step_definitions/schema_validation_steps.rb +10 -10
- data/features/step_definitions/sources_steps.rb +1 -1
- data/features/step_definitions/validation_errors_steps.rb +19 -19
- data/features/step_definitions/validation_info_steps.rb +9 -9
- data/features/step_definitions/validation_warnings_steps.rb +11 -11
- data/features/support/aruba.rb +10 -9
- data/features/support/earl_formatter.rb +39 -39
- data/features/support/env.rb +10 -11
- data/features/support/load_tests.rb +109 -105
- data/features/support/webmock.rb +3 -1
- data/lib/csvlint/cli.rb +136 -142
- data/lib/csvlint/csvw/column.rb +279 -280
- data/lib/csvlint/csvw/date_format.rb +90 -92
- data/lib/csvlint/csvw/metadata_error.rb +1 -3
- data/lib/csvlint/csvw/number_format.rb +40 -32
- data/lib/csvlint/csvw/property_checker.rb +714 -717
- data/lib/csvlint/csvw/table.rb +49 -52
- data/lib/csvlint/csvw/table_group.rb +24 -23
- data/lib/csvlint/error_collector.rb +2 -0
- data/lib/csvlint/error_message.rb +0 -1
- data/lib/csvlint/field.rb +153 -141
- data/lib/csvlint/schema.rb +35 -43
- data/lib/csvlint/validate.rb +173 -151
- data/lib/csvlint/version.rb +1 -1
- data/lib/csvlint.rb +22 -23
- data/spec/csvw/column_spec.rb +15 -16
- data/spec/csvw/date_format_spec.rb +5 -7
- data/spec/csvw/number_format_spec.rb +2 -4
- data/spec/csvw/table_group_spec.rb +103 -105
- data/spec/csvw/table_spec.rb +71 -73
- data/spec/field_spec.rb +116 -121
- data/spec/schema_spec.rb +131 -141
- data/spec/spec_helper.rb +6 -6
- data/spec/validator_spec.rb +167 -203
- metadata +41 -85
- data/.travis.yml +0 -37
data/lib/csvlint/validate.rb
CHANGED
@@ -1,21 +1,20 @@
|
|
1
1
|
module Csvlint
|
2
|
-
|
3
2
|
class Validator
|
4
3
|
class LineCSV < CSV
|
5
|
-
ENCODE_RE = Hash.new do |h,str|
|
4
|
+
ENCODE_RE = Hash.new do |h, str|
|
6
5
|
h[str] = Regexp.new(str)
|
7
6
|
end
|
8
7
|
|
9
|
-
ENCODE_STR = Hash.new do |h,encoding_name|
|
10
|
-
h[encoding_name] = Hash.new do |h,chunks|
|
11
|
-
h[chunks] = chunks.map { |chunk| chunk.encode(encoding_name) }.join(
|
8
|
+
ENCODE_STR = Hash.new do |h, encoding_name|
|
9
|
+
h[encoding_name] = Hash.new do |h, chunks|
|
10
|
+
h[chunks] = chunks.map { |chunk| chunk.encode(encoding_name) }.join("")
|
12
11
|
end
|
13
12
|
end
|
14
13
|
|
15
|
-
ESCAPE_RE = Hash.new do |h,re_chars|
|
16
|
-
h[re_chars] = Hash.new do |h,re_esc|
|
17
|
-
h[re_esc] = Hash.new do |h,str|
|
18
|
-
h[str] = str.gsub(re_chars) {|c| re_esc + c}
|
14
|
+
ESCAPE_RE = Hash.new do |h, re_chars|
|
15
|
+
h[re_chars] = Hash.new do |h, re_esc|
|
16
|
+
h[re_esc] = Hash.new do |h, str|
|
17
|
+
h[str] = str.gsub(re_chars) { |c| re_esc + c }
|
19
18
|
end
|
20
19
|
end
|
21
20
|
end
|
@@ -38,13 +37,15 @@ module Csvlint
|
|
38
37
|
ESCAPE_RE[@re_chars][@re_esc][str]
|
39
38
|
end
|
40
39
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
40
|
+
if RUBY_VERSION < "2.5"
|
41
|
+
# Optimization: Disable the CSV library's converters feature.
|
42
|
+
# @see https://github.com/ruby/ruby/blob/v2_2_3/lib/csv.rb#L2100
|
43
|
+
def init_converters(options, field_name = :converters)
|
44
|
+
@converters = []
|
45
|
+
@header_converters = []
|
46
|
+
options.delete(:unconverted_fields)
|
47
|
+
options.delete(field_name)
|
48
|
+
end
|
48
49
|
end
|
49
50
|
end
|
50
51
|
|
@@ -53,10 +54,11 @@ module Csvlint
|
|
53
54
|
attr_reader :encoding, :content_type, :extension, :headers, :link_headers, :dialect, :csv_header, :schema, :data, :current_line
|
54
55
|
|
55
56
|
ERROR_MATCHERS = {
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
57
|
+
"Missing or stray quote" => :stray_quote,
|
58
|
+
"Illegal quoting" => :whitespace,
|
59
|
+
"Unclosed quoted field" => :unclosed_quote,
|
60
|
+
"Any value after quoted field isn't allowed" => :unclosed_quote,
|
61
|
+
"Unquoted fields do not allow \\r or \\n" => :line_breaks
|
60
62
|
}
|
61
63
|
|
62
64
|
def initialize(source, dialect = {}, schema = nil, options = {})
|
@@ -87,14 +89,14 @@ module Csvlint
|
|
87
89
|
end
|
88
90
|
|
89
91
|
def validate
|
90
|
-
if
|
92
|
+
if /.xls(x)?/.match?(@extension)
|
91
93
|
build_warnings(:excel, :context)
|
92
94
|
return
|
93
95
|
end
|
94
96
|
locate_schema unless @schema.instance_of?(Csvlint::Schema)
|
95
97
|
set_dialect
|
96
98
|
|
97
|
-
if @source.
|
99
|
+
if @source.instance_of?(String)
|
98
100
|
validate_url
|
99
101
|
else
|
100
102
|
validate_metadata
|
@@ -117,7 +119,11 @@ module Csvlint
|
|
117
119
|
request = Typhoeus::Request.new(@source, followlocation: true)
|
118
120
|
request.on_headers do |response|
|
119
121
|
@headers = response.headers || {}
|
120
|
-
@content_type =
|
122
|
+
@content_type = begin
|
123
|
+
response.headers["content-type"]
|
124
|
+
rescue
|
125
|
+
nil
|
126
|
+
end
|
121
127
|
@response_code = response.code
|
122
128
|
return build_errors(:not_found) if response.code == 404
|
123
129
|
validate_metadata
|
@@ -145,7 +151,7 @@ module Csvlint
|
|
145
151
|
else
|
146
152
|
validate_line(line, @current_line)
|
147
153
|
@leading = ""
|
148
|
-
@current_line
|
154
|
+
@current_line += 1
|
149
155
|
end
|
150
156
|
else
|
151
157
|
# If it's not a full line, then prepare to add it to the beginning of the next chunk
|
@@ -153,7 +159,7 @@ module Csvlint
|
|
153
159
|
end
|
154
160
|
rescue ArgumentError => ae
|
155
161
|
build_errors(:invalid_encoding, :structure, @current_line, nil, @current_line) unless @reported_invalid_encoding
|
156
|
-
@current_line
|
162
|
+
@current_line += 1
|
157
163
|
@reported_invalid_encoding = true
|
158
164
|
end
|
159
165
|
|
@@ -164,7 +170,7 @@ module Csvlint
|
|
164
170
|
@encoding = input.encoding.to_s
|
165
171
|
report_line_breaks(line)
|
166
172
|
parse_contents(input, line)
|
167
|
-
@lambda
|
173
|
+
@lambda&.call(self)
|
168
174
|
rescue ArgumentError => ae
|
169
175
|
build_errors(:invalid_encoding, :structure, @current_line, nil, index) unless @reported_invalid_encoding
|
170
176
|
@reported_invalid_encoding = true
|
@@ -179,9 +185,9 @@ module Csvlint
|
|
179
185
|
@csv_options[:encoding] = @encoding
|
180
186
|
|
181
187
|
begin
|
182
|
-
row = LineCSV.parse_line(stream,
|
188
|
+
row = LineCSV.parse_line(stream, **@csv_options)
|
183
189
|
rescue LineCSV::MalformedCSVError => e
|
184
|
-
build_exception_messages(e, stream, current_line)
|
190
|
+
build_exception_messages(e, stream, current_line) unless e.message.include?("UTF") && @reported_invalid_encoding
|
185
191
|
end
|
186
192
|
|
187
193
|
if row
|
@@ -201,8 +207,8 @@ module Csvlint
|
|
201
207
|
@errors += @schema.errors
|
202
208
|
all_errors += @schema.errors
|
203
209
|
@warnings += @schema.warnings
|
204
|
-
|
205
|
-
build_errors(:ragged_rows, :structure, current_line, nil, stream.to_s)
|
210
|
+
elsif !row.empty? && row.size != @expected_columns
|
211
|
+
build_errors(:ragged_rows, :structure, current_line, nil, stream.to_s)
|
206
212
|
end
|
207
213
|
end
|
208
214
|
end
|
@@ -225,8 +231,8 @@ module Csvlint
|
|
225
231
|
def validate_metadata
|
226
232
|
assumed_header = !@supplied_dialect
|
227
233
|
unless @headers.empty?
|
228
|
-
if @headers["content-type"]
|
229
|
-
@csv_header
|
234
|
+
if /text\/csv/.match?(@headers["content-type"])
|
235
|
+
@csv_header &&= true
|
230
236
|
assumed_header = @assumed_header.present?
|
231
237
|
end
|
232
238
|
if @headers["content-type"] =~ /header=(present|absent)/
|
@@ -234,23 +240,39 @@ module Csvlint
|
|
234
240
|
@csv_header = false if $1 == "absent"
|
235
241
|
assumed_header = false
|
236
242
|
end
|
237
|
-
build_warnings(:no_content_type, :context) if @content_type
|
238
|
-
build_errors(:wrong_content_type, :context) unless
|
243
|
+
build_warnings(:no_content_type, :context) if @content_type.nil?
|
244
|
+
build_errors(:wrong_content_type, :context) unless @content_type && @content_type =~ /text\/csv/
|
239
245
|
end
|
240
246
|
@header_processed = true
|
241
247
|
build_info_messages(:assumed_header, :structure) if assumed_header
|
242
248
|
|
243
|
-
@link_headers =
|
244
|
-
|
249
|
+
@link_headers = begin
|
250
|
+
@headers["link"].split(",")
|
251
|
+
rescue
|
252
|
+
nil
|
253
|
+
end
|
254
|
+
@link_headers&.each do |link_header|
|
245
255
|
match = LINK_HEADER_REGEXP.match(link_header)
|
246
|
-
uri =
|
247
|
-
|
256
|
+
uri = begin
|
257
|
+
match["uri"].gsub(/(^<|>$)/, "")
|
258
|
+
rescue
|
259
|
+
nil
|
260
|
+
end
|
261
|
+
rel = begin
|
262
|
+
match["rel-relationship"].gsub(/(^"|"$)/, "")
|
263
|
+
rescue
|
264
|
+
nil
|
265
|
+
end
|
248
266
|
param = match["param"]
|
249
|
-
param_value =
|
267
|
+
param_value = begin
|
268
|
+
match["param-value"].gsub(/(^"|"$)/, "")
|
269
|
+
rescue
|
270
|
+
nil
|
271
|
+
end
|
250
272
|
if rel == "describedby" && param == "type" && ["application/csvm+json", "application/ld+json", "application/json"].include?(param_value)
|
251
273
|
begin
|
252
274
|
url = URI.join(@source_url, uri)
|
253
|
-
schema = Schema.
|
275
|
+
schema = Schema.load_from_uri(url)
|
254
276
|
if schema.instance_of? Csvlint::Csvw::TableGroup
|
255
277
|
if schema.tables[@source_url]
|
256
278
|
@schema = schema
|
@@ -262,14 +284,14 @@ module Csvlint
|
|
262
284
|
rescue OpenURI::HTTPError
|
263
285
|
end
|
264
286
|
end
|
265
|
-
end
|
287
|
+
end
|
266
288
|
end
|
267
289
|
|
268
290
|
def header?
|
269
291
|
@csv_header && @dialect["header"]
|
270
292
|
end
|
271
293
|
|
272
|
-
def report_line_breaks(line_no=nil)
|
294
|
+
def report_line_breaks(line_no = nil)
|
273
295
|
return unless @input[-1, 1].include?("\n") # Return straight away if there's no newline character - i.e. we're on the last line
|
274
296
|
line_break = get_line_break(@input)
|
275
297
|
@line_breaks << line_break
|
@@ -295,24 +317,24 @@ module Csvlint
|
|
295
317
|
schema_dialect = {}
|
296
318
|
end
|
297
319
|
@dialect = {
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
320
|
+
"header" => true,
|
321
|
+
"headerRowCount" => 1,
|
322
|
+
"delimiter" => ",",
|
323
|
+
"skipInitialSpace" => true,
|
324
|
+
"lineTerminator" => :auto,
|
325
|
+
"quoteChar" => '"',
|
326
|
+
"trim" => :true
|
305
327
|
}.merge(schema_dialect).merge(@dialect || {})
|
306
328
|
|
307
|
-
@csv_header
|
329
|
+
@csv_header &&= @dialect["header"]
|
308
330
|
@csv_options = dialect_to_csv_options(@dialect)
|
309
331
|
end
|
310
332
|
|
311
333
|
def validate_encoding
|
312
334
|
if @headers["content-type"]
|
313
|
-
if @headers["content-type"]
|
335
|
+
if !/charset=/.match?(@headers["content-type"])
|
314
336
|
build_warnings(:no_encoding, :context)
|
315
|
-
elsif @headers["content-type"]
|
337
|
+
elsif !/charset=utf-8/i.match?(@headers["content-type"])
|
316
338
|
build_warnings(:encoding, :context)
|
317
339
|
end
|
318
340
|
end
|
@@ -336,10 +358,10 @@ module Csvlint
|
|
336
358
|
end
|
337
359
|
|
338
360
|
def build_exception_messages(csvException, errChars, lineNo)
|
339
|
-
#TODO 1 - this is a change in logic, rather than straight refactor of previous error building, however original logic is bonkers
|
340
|
-
#TODO 2 - using .kind_of? is a very ugly fix here and it meant to work around instances where :auto symbol is preserved in @csv_options
|
361
|
+
# TODO 1 - this is a change in logic, rather than straight refactor of previous error building, however original logic is bonkers
|
362
|
+
# TODO 2 - using .kind_of? is a very ugly fix here and it meant to work around instances where :auto symbol is preserved in @csv_options
|
341
363
|
type = fetch_error(csvException)
|
342
|
-
if !@csv_options[:row_sep].
|
364
|
+
if !@csv_options[:row_sep].is_a?(Symbol) && [:unclosed_quote, :stray_quote].include?(type) && !@input.match(@csv_options[:row_sep])
|
343
365
|
build_linebreak_error
|
344
366
|
else
|
345
367
|
build_errors(type, :structure, lineNo, nil, errChars)
|
@@ -352,11 +374,11 @@ module Csvlint
|
|
352
374
|
|
353
375
|
def validate_header(header)
|
354
376
|
names = Set.new
|
355
|
-
header.map{|h| h.strip! } if @dialect["trim"] == :true
|
356
|
-
header.each_with_index do |name,i|
|
357
|
-
build_warnings(:empty_column_name, :schema, nil, i+1) if name == ""
|
377
|
+
header.map { |h| h.strip! } if @dialect["trim"] == :true
|
378
|
+
header.each_with_index do |name, i|
|
379
|
+
build_warnings(:empty_column_name, :schema, nil, i + 1) if name == ""
|
358
380
|
if names.include?(name)
|
359
|
-
build_warnings(:duplicate_column_name, :schema, nil, i+1)
|
381
|
+
build_warnings(:duplicate_column_name, :schema, nil, i + 1)
|
360
382
|
else
|
361
383
|
names << name
|
362
384
|
end
|
@@ -366,24 +388,28 @@ module Csvlint
|
|
366
388
|
@errors += @schema.errors
|
367
389
|
@warnings += @schema.warnings
|
368
390
|
end
|
369
|
-
|
391
|
+
valid?
|
370
392
|
end
|
371
393
|
|
372
394
|
def fetch_error(error)
|
373
395
|
e = error.message.match(/^(.+?)(?: [io]n)? \(?line \d+\)?\.?$/i)
|
374
|
-
message =
|
396
|
+
message = begin
|
397
|
+
e[1]
|
398
|
+
rescue
|
399
|
+
nil
|
400
|
+
end
|
375
401
|
ERROR_MATCHERS.fetch(message, :unknown_error)
|
376
402
|
end
|
377
403
|
|
378
404
|
def dialect_to_csv_options(dialect)
|
379
405
|
skipinitialspace = dialect["skipInitialSpace"] || true
|
380
406
|
delimiter = dialect["delimiter"]
|
381
|
-
delimiter
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
407
|
+
delimiter += " " if !skipinitialspace
|
408
|
+
{
|
409
|
+
col_sep: delimiter,
|
410
|
+
row_sep: dialect["lineTerminator"],
|
411
|
+
quote_char: dialect["quoteChar"],
|
412
|
+
skip_blanks: false
|
387
413
|
}
|
388
414
|
end
|
389
415
|
|
@@ -393,25 +419,25 @@ module Csvlint
|
|
393
419
|
@formats[i] ||= Hash.new(0)
|
394
420
|
|
395
421
|
format =
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
422
|
+
if col.strip[FORMATS[:numeric]]
|
423
|
+
:numeric
|
424
|
+
elsif uri?(col)
|
425
|
+
:uri
|
426
|
+
elsif possible_date?(col)
|
427
|
+
date_formats(col)
|
428
|
+
else
|
429
|
+
:string
|
430
|
+
end
|
405
431
|
|
406
432
|
@formats[i][format] += 1
|
407
433
|
end
|
408
434
|
end
|
409
435
|
|
410
436
|
def check_consistency
|
411
|
-
@formats.each_with_index do |format,i|
|
437
|
+
@formats.each_with_index do |format, i|
|
412
438
|
if format
|
413
439
|
total = format.values.reduce(:+).to_f
|
414
|
-
if format.none?{|_,count| count / total >= 0.9}
|
440
|
+
if format.none? { |_, count| count / total >= 0.9 }
|
415
441
|
build_warnings(:inconsistent_values, :schema, nil, i + 1)
|
416
442
|
end
|
417
443
|
end
|
@@ -427,16 +453,16 @@ module Csvlint
|
|
427
453
|
end
|
428
454
|
|
429
455
|
def locate_schema
|
430
|
-
|
431
456
|
@source_url = nil
|
432
457
|
warn_if_unsuccessful = false
|
433
458
|
case @source
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
459
|
+
when StringIO
|
460
|
+
return
|
461
|
+
when File
|
462
|
+
uri_parser = URI::DEFAULT_PARSER
|
463
|
+
@source_url = "file:#{uri_parser.escape(File.expand_path(@source))}"
|
464
|
+
else
|
465
|
+
@source_url = @source
|
440
466
|
end
|
441
467
|
unless @schema.nil?
|
442
468
|
if @schema.tables[@source_url]
|
@@ -446,35 +472,33 @@ module Csvlint
|
|
446
472
|
end
|
447
473
|
end
|
448
474
|
paths = []
|
449
|
-
if
|
475
|
+
if /^http(s)?/.match?(@source_url)
|
450
476
|
begin
|
451
477
|
well_known_uri = URI.join(@source_url, "/.well-known/csvm")
|
452
|
-
paths = open(well_known_uri).read.split("\n")
|
478
|
+
paths = URI.open(well_known_uri.to_s).read.split("\n")
|
453
479
|
rescue OpenURI::HTTPError, URI::BadURIError
|
454
480
|
end
|
455
481
|
end
|
456
482
|
paths = ["{+url}-metadata.json", "csv-metadata.json"] if paths.empty?
|
457
483
|
paths.each do |template|
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
if schema.
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
build_warnings(:schema_mismatch, :context, nil, nil, @source_url, schema)
|
471
|
-
end
|
484
|
+
template = URITemplate.new(template)
|
485
|
+
path = template.expand("url" => @source_url)
|
486
|
+
url = URI.join(@source_url, path)
|
487
|
+
url = File.new(url.to_s.sub(/^file:/, "")) if /^file:/.match?(url.to_s)
|
488
|
+
schema = Schema.load_from_uri(url)
|
489
|
+
if schema.instance_of? Csvlint::Csvw::TableGroup
|
490
|
+
if schema.tables[@source_url]
|
491
|
+
@schema = schema
|
492
|
+
return
|
493
|
+
else
|
494
|
+
warn_if_unsuccessful = true
|
495
|
+
build_warnings(:schema_mismatch, :context, nil, nil, @source_url, schema)
|
472
496
|
end
|
473
|
-
rescue Errno::ENOENT
|
474
|
-
rescue OpenURI::HTTPError, URI::BadURIError, ArgumentError
|
475
|
-
rescue => e
|
476
|
-
raise e
|
477
497
|
end
|
498
|
+
rescue Errno::ENOENT
|
499
|
+
rescue OpenURI::HTTPError, URI::BadURIError, ArgumentError
|
500
|
+
rescue => e
|
501
|
+
raise e
|
478
502
|
end
|
479
503
|
build_warnings(:schema_mismatch, :context, nil, nil, @source_url, schema) if warn_if_unsuccessful
|
480
504
|
@schema = nil
|
@@ -483,31 +507,30 @@ module Csvlint
|
|
483
507
|
private
|
484
508
|
|
485
509
|
def parse_extension(source)
|
486
|
-
|
487
510
|
case source
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
511
|
+
when File
|
512
|
+
File.extname(source.path)
|
513
|
+
when IO
|
514
|
+
""
|
515
|
+
when StringIO
|
516
|
+
""
|
517
|
+
when Tempfile
|
518
|
+
# this is triggered when the revalidate dialect use case happens
|
519
|
+
""
|
520
|
+
else
|
521
|
+
begin
|
522
|
+
parsed = URI.parse(source)
|
523
|
+
File.extname(parsed.path)
|
524
|
+
rescue URI::InvalidURIError
|
525
|
+
""
|
526
|
+
end
|
504
527
|
end
|
505
528
|
end
|
506
529
|
|
507
530
|
def uri?(value)
|
508
531
|
if value.strip[FORMATS[:uri]]
|
509
532
|
uri = URI.parse(value)
|
510
|
-
uri.
|
533
|
+
uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
511
534
|
end
|
512
535
|
rescue URI::InvalidURIError
|
513
536
|
false
|
@@ -518,25 +541,25 @@ module Csvlint
|
|
518
541
|
end
|
519
542
|
|
520
543
|
def date_formats(col)
|
521
|
-
if col[FORMATS[:date_db]] && date_format?(Date, col,
|
544
|
+
if col[FORMATS[:date_db]] && date_format?(Date, col, "%Y-%m-%d")
|
522
545
|
:date_db
|
523
|
-
elsif col[FORMATS[:date_short]] && date_format?(Date, col,
|
546
|
+
elsif col[FORMATS[:date_short]] && date_format?(Date, col, "%e %b")
|
524
547
|
:date_short
|
525
|
-
elsif col[FORMATS[:date_rfc822]] && date_format?(Date, col,
|
548
|
+
elsif col[FORMATS[:date_rfc822]] && date_format?(Date, col, "%e %b %Y")
|
526
549
|
:date_rfc822
|
527
|
-
elsif col[FORMATS[:date_long]] && date_format?(Date, col,
|
550
|
+
elsif col[FORMATS[:date_long]] && date_format?(Date, col, "%B %e, %Y")
|
528
551
|
:date_long
|
529
|
-
elsif col[FORMATS[:dateTime_time]] && date_format?(Time, col,
|
552
|
+
elsif col[FORMATS[:dateTime_time]] && date_format?(Time, col, "%H:%M")
|
530
553
|
:dateTime_time
|
531
|
-
elsif col[FORMATS[:dateTime_hms]] && date_format?(Time, col,
|
554
|
+
elsif col[FORMATS[:dateTime_hms]] && date_format?(Time, col, "%H:%M:%S")
|
532
555
|
:dateTime_hms
|
533
|
-
elsif col[FORMATS[:dateTime_db]] && date_format?(Time, col,
|
556
|
+
elsif col[FORMATS[:dateTime_db]] && date_format?(Time, col, "%Y-%m-%d %H:%M:%S")
|
534
557
|
:dateTime_db
|
535
|
-
elsif col[FORMATS[:dateTime_iso8601]] && date_format?(Time, col,
|
558
|
+
elsif col[FORMATS[:dateTime_iso8601]] && date_format?(Time, col, "%Y-%m-%dT%H:%M:%SZ")
|
536
559
|
:dateTime_iso8601
|
537
|
-
elsif col[FORMATS[:dateTime_short]] && date_format?(Time, col,
|
560
|
+
elsif col[FORMATS[:dateTime_short]] && date_format?(Time, col, "%d %b %H:%M")
|
538
561
|
:dateTime_short
|
539
|
-
elsif col[FORMATS[:dateTime_long]] && date_format?(Time, col,
|
562
|
+
elsif col[FORMATS[:dateTime_long]] && date_format?(Time, col, "%B %d, %Y %H:%M")
|
540
563
|
:dateTime_long
|
541
564
|
else
|
542
565
|
:string
|
@@ -563,25 +586,25 @@ module Csvlint
|
|
563
586
|
end
|
564
587
|
|
565
588
|
FORMATS = {
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
589
|
+
string: nil,
|
590
|
+
numeric: /\A[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?\z/,
|
591
|
+
uri: /\Ahttps?:/,
|
592
|
+
date_db: /\A\d{4,}-\d\d-\d\d\z/, # "12345-01-01"
|
593
|
+
date_long: /\A(?:#{Date::MONTHNAMES.join('|')}) [ \d]\d, \d{4,}\z/, # "January 1, 12345"
|
594
|
+
date_rfc822: /\A[ \d]\d (?:#{Date::ABBR_MONTHNAMES.join('|')}) \d{4,}\z/, # " 1 Jan 12345"
|
595
|
+
date_short: /\A[ \d]\d (?:#{Date::ABBR_MONTHNAMES.join('|')})\z/, # "1 Jan"
|
596
|
+
dateTime_db: /\A\d{4,}-\d\d-\d\d \d\d:\d\d:\d\d\z/, # "12345-01-01 00:00:00"
|
597
|
+
dateTime_hms: /\A\d\d:\d\d:\d\d\z/, # "00:00:00"
|
598
|
+
dateTime_iso8601: /\A\d{4,}-\d\d-\d\dT\d\d:\d\d:\d\dZ\z/, # "12345-01-01T00:00:00Z"
|
599
|
+
dateTime_long: /\A(?:#{Date::MONTHNAMES.join('|')}) \d\d, \d{4,} \d\d:\d\d\z/, # "January 01, 12345 00:00"
|
600
|
+
dateTime_short: /\A\d\d (?:#{Date::ABBR_MONTHNAMES.join('|')}) \d\d:\d\d\z/, # "01 Jan 00:00"
|
601
|
+
dateTime_time: /\A\d\d:\d\d\z/ # "00:00"
|
579
602
|
}.freeze
|
580
603
|
|
581
604
|
URI_REGEXP = /(?<uri>.*?)/
|
582
|
-
TOKEN_REGEXP = /([
|
605
|
+
TOKEN_REGEXP = /([^()<>@,;:\\"\/\[\]?={} \t]+)/
|
583
606
|
QUOTED_STRING_REGEXP = /("[^"]*")/
|
584
|
-
SGML_NAME_REGEXP = /([A-Za-z][-A-Za-z0-9
|
607
|
+
SGML_NAME_REGEXP = /([A-Za-z][-A-Za-z0-9.]*)/
|
585
608
|
RELATIONSHIP_REGEXP = Regexp.new("(?<relationship>#{SGML_NAME_REGEXP}|(\"#{SGML_NAME_REGEXP}(\\s+#{SGML_NAME_REGEXP})*\"))")
|
586
609
|
REL_REGEXP = Regexp.new("(?<rel>\\s*rel\\s*=\\s*(?<rel-relationship>#{RELATIONSHIP_REGEXP}))")
|
587
610
|
REV_REGEXP = Regexp.new("(?<rev>\\s*rev\\s*=\\s*#{RELATIONSHIP_REGEXP})")
|
@@ -589,8 +612,7 @@ module Csvlint
|
|
589
612
|
ANCHOR_REGEXP = Regexp.new("(?<anchor>\\s*anchor\\s*=\\s*\\<#{URI_REGEXP}\\>)")
|
590
613
|
LINK_EXTENSION_REGEXP = Regexp.new("(?<link-extension>(?<param>#{TOKEN_REGEXP})(\\s*=\\s*(?<param-value>#{TOKEN_REGEXP}|#{QUOTED_STRING_REGEXP}))?)")
|
591
614
|
LINK_PARAM_REGEXP = Regexp.new("(#{REL_REGEXP}|#{REV_REGEXP}|#{TITLE_REGEXP}|#{ANCHOR_REGEXP}|#{LINK_EXTENSION_REGEXP})")
|
592
|
-
LINK_HEADER_REGEXP = Regexp.new("
|
593
|
-
POSSIBLE_DATE_REGEXP = Regexp.new("\\A(\\d|\\s\\d#{Date::ABBR_MONTHNAMES.join(
|
594
|
-
|
615
|
+
LINK_HEADER_REGEXP = Regexp.new("<#{URI_REGEXP}>(\\s*;\\s*#{LINK_PARAM_REGEXP})*")
|
616
|
+
POSSIBLE_DATE_REGEXP = Regexp.new("\\A(\\d|\\s\\d#{Date::ABBR_MONTHNAMES.join("|")}#{Date::MONTHNAMES.join("|")})")
|
595
617
|
end
|
596
618
|
end
|
data/lib/csvlint/version.rb
CHANGED
data/lib/csvlint.rb
CHANGED
@@ -1,27 +1,26 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require 'typhoeus'
|
1
|
+
require "csv"
|
2
|
+
require "date"
|
3
|
+
require "open-uri"
|
4
|
+
require "tempfile"
|
5
|
+
require "typhoeus"
|
7
6
|
|
8
|
-
require
|
9
|
-
require
|
10
|
-
require
|
11
|
-
require
|
12
|
-
require
|
7
|
+
require "active_support/core_ext/date/conversions"
|
8
|
+
require "active_support/core_ext/time/conversions"
|
9
|
+
require "active_support/core_ext/object"
|
10
|
+
require "open_uri_redirections"
|
11
|
+
require "uri_template"
|
13
12
|
|
14
|
-
require
|
15
|
-
require
|
16
|
-
require
|
17
|
-
require
|
13
|
+
require "csvlint/error_message"
|
14
|
+
require "csvlint/error_collector"
|
15
|
+
require "csvlint/validate"
|
16
|
+
require "csvlint/field"
|
18
17
|
|
19
|
-
require
|
20
|
-
require
|
21
|
-
require
|
22
|
-
require
|
23
|
-
require
|
24
|
-
require
|
25
|
-
require
|
18
|
+
require "csvlint/csvw/metadata_error"
|
19
|
+
require "csvlint/csvw/number_format"
|
20
|
+
require "csvlint/csvw/date_format"
|
21
|
+
require "csvlint/csvw/property_checker"
|
22
|
+
require "csvlint/csvw/column"
|
23
|
+
require "csvlint/csvw/table"
|
24
|
+
require "csvlint/csvw/table_group"
|
26
25
|
|
27
|
-
require
|
26
|
+
require "csvlint/schema"
|