csvlint 0.4.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/dependabot.yml +11 -0
- data/.github/workflows/push.yml +35 -0
- data/.gitignore +1 -0
- data/.ruby-version +1 -1
- data/.standard_todo.yml +43 -0
- data/CHANGELOG.md +38 -0
- data/Dockerfile +16 -0
- data/Gemfile +2 -2
- data/README.md +13 -10
- data/Rakefile +7 -7
- data/bin/create_schema +2 -2
- data/csvlint.gemspec +19 -22
- data/docker_notes_for_windows.txt +20 -0
- data/features/step_definitions/cli_steps.rb +11 -11
- data/features/step_definitions/information_steps.rb +4 -4
- data/features/step_definitions/parse_csv_steps.rb +11 -11
- data/features/step_definitions/schema_validation_steps.rb +10 -10
- data/features/step_definitions/sources_steps.rb +1 -1
- data/features/step_definitions/validation_errors_steps.rb +19 -19
- data/features/step_definitions/validation_info_steps.rb +9 -9
- data/features/step_definitions/validation_warnings_steps.rb +11 -11
- data/features/support/aruba.rb +10 -9
- data/features/support/earl_formatter.rb +39 -39
- data/features/support/env.rb +10 -11
- data/features/support/load_tests.rb +109 -105
- data/features/support/webmock.rb +3 -1
- data/lib/csvlint/cli.rb +136 -142
- data/lib/csvlint/csvw/column.rb +279 -280
- data/lib/csvlint/csvw/date_format.rb +90 -92
- data/lib/csvlint/csvw/metadata_error.rb +1 -3
- data/lib/csvlint/csvw/number_format.rb +40 -32
- data/lib/csvlint/csvw/property_checker.rb +714 -717
- data/lib/csvlint/csvw/table.rb +49 -52
- data/lib/csvlint/csvw/table_group.rb +24 -23
- data/lib/csvlint/error_collector.rb +2 -0
- data/lib/csvlint/error_message.rb +0 -1
- data/lib/csvlint/field.rb +153 -141
- data/lib/csvlint/schema.rb +35 -43
- data/lib/csvlint/validate.rb +173 -151
- data/lib/csvlint/version.rb +1 -1
- data/lib/csvlint.rb +22 -23
- data/spec/csvw/column_spec.rb +15 -16
- data/spec/csvw/date_format_spec.rb +5 -7
- data/spec/csvw/number_format_spec.rb +2 -4
- data/spec/csvw/table_group_spec.rb +103 -105
- data/spec/csvw/table_spec.rb +71 -73
- data/spec/field_spec.rb +116 -121
- data/spec/schema_spec.rb +131 -141
- data/spec/spec_helper.rb +6 -6
- data/spec/validator_spec.rb +167 -203
- metadata +41 -85
- data/.travis.yml +0 -37
data/lib/csvlint/validate.rb
CHANGED
@@ -1,21 +1,20 @@
|
|
1
1
|
module Csvlint
|
2
|
-
|
3
2
|
class Validator
|
4
3
|
class LineCSV < CSV
|
5
|
-
ENCODE_RE = Hash.new do |h,str|
|
4
|
+
ENCODE_RE = Hash.new do |h, str|
|
6
5
|
h[str] = Regexp.new(str)
|
7
6
|
end
|
8
7
|
|
9
|
-
ENCODE_STR = Hash.new do |h,encoding_name|
|
10
|
-
h[encoding_name] = Hash.new do |h,chunks|
|
11
|
-
h[chunks] = chunks.map { |chunk| chunk.encode(encoding_name) }.join(
|
8
|
+
ENCODE_STR = Hash.new do |h, encoding_name|
|
9
|
+
h[encoding_name] = Hash.new do |h, chunks|
|
10
|
+
h[chunks] = chunks.map { |chunk| chunk.encode(encoding_name) }.join("")
|
12
11
|
end
|
13
12
|
end
|
14
13
|
|
15
|
-
ESCAPE_RE = Hash.new do |h,re_chars|
|
16
|
-
h[re_chars] = Hash.new do |h,re_esc|
|
17
|
-
h[re_esc] = Hash.new do |h,str|
|
18
|
-
h[str] = str.gsub(re_chars) {|c| re_esc + c}
|
14
|
+
ESCAPE_RE = Hash.new do |h, re_chars|
|
15
|
+
h[re_chars] = Hash.new do |h, re_esc|
|
16
|
+
h[re_esc] = Hash.new do |h, str|
|
17
|
+
h[str] = str.gsub(re_chars) { |c| re_esc + c }
|
19
18
|
end
|
20
19
|
end
|
21
20
|
end
|
@@ -38,13 +37,15 @@ module Csvlint
|
|
38
37
|
ESCAPE_RE[@re_chars][@re_esc][str]
|
39
38
|
end
|
40
39
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
40
|
+
if RUBY_VERSION < "2.5"
|
41
|
+
# Optimization: Disable the CSV library's converters feature.
|
42
|
+
# @see https://github.com/ruby/ruby/blob/v2_2_3/lib/csv.rb#L2100
|
43
|
+
def init_converters(options, field_name = :converters)
|
44
|
+
@converters = []
|
45
|
+
@header_converters = []
|
46
|
+
options.delete(:unconverted_fields)
|
47
|
+
options.delete(field_name)
|
48
|
+
end
|
48
49
|
end
|
49
50
|
end
|
50
51
|
|
@@ -53,10 +54,11 @@ module Csvlint
|
|
53
54
|
attr_reader :encoding, :content_type, :extension, :headers, :link_headers, :dialect, :csv_header, :schema, :data, :current_line
|
54
55
|
|
55
56
|
ERROR_MATCHERS = {
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
57
|
+
"Missing or stray quote" => :stray_quote,
|
58
|
+
"Illegal quoting" => :whitespace,
|
59
|
+
"Unclosed quoted field" => :unclosed_quote,
|
60
|
+
"Any value after quoted field isn't allowed" => :unclosed_quote,
|
61
|
+
"Unquoted fields do not allow \\r or \\n" => :line_breaks
|
60
62
|
}
|
61
63
|
|
62
64
|
def initialize(source, dialect = {}, schema = nil, options = {})
|
@@ -87,14 +89,14 @@ module Csvlint
|
|
87
89
|
end
|
88
90
|
|
89
91
|
def validate
|
90
|
-
if
|
92
|
+
if /.xls(x)?/.match?(@extension)
|
91
93
|
build_warnings(:excel, :context)
|
92
94
|
return
|
93
95
|
end
|
94
96
|
locate_schema unless @schema.instance_of?(Csvlint::Schema)
|
95
97
|
set_dialect
|
96
98
|
|
97
|
-
if @source.
|
99
|
+
if @source.instance_of?(String)
|
98
100
|
validate_url
|
99
101
|
else
|
100
102
|
validate_metadata
|
@@ -117,7 +119,11 @@ module Csvlint
|
|
117
119
|
request = Typhoeus::Request.new(@source, followlocation: true)
|
118
120
|
request.on_headers do |response|
|
119
121
|
@headers = response.headers || {}
|
120
|
-
@content_type =
|
122
|
+
@content_type = begin
|
123
|
+
response.headers["content-type"]
|
124
|
+
rescue
|
125
|
+
nil
|
126
|
+
end
|
121
127
|
@response_code = response.code
|
122
128
|
return build_errors(:not_found) if response.code == 404
|
123
129
|
validate_metadata
|
@@ -145,7 +151,7 @@ module Csvlint
|
|
145
151
|
else
|
146
152
|
validate_line(line, @current_line)
|
147
153
|
@leading = ""
|
148
|
-
@current_line
|
154
|
+
@current_line += 1
|
149
155
|
end
|
150
156
|
else
|
151
157
|
# If it's not a full line, then prepare to add it to the beginning of the next chunk
|
@@ -153,7 +159,7 @@ module Csvlint
|
|
153
159
|
end
|
154
160
|
rescue ArgumentError => ae
|
155
161
|
build_errors(:invalid_encoding, :structure, @current_line, nil, @current_line) unless @reported_invalid_encoding
|
156
|
-
@current_line
|
162
|
+
@current_line += 1
|
157
163
|
@reported_invalid_encoding = true
|
158
164
|
end
|
159
165
|
|
@@ -164,7 +170,7 @@ module Csvlint
|
|
164
170
|
@encoding = input.encoding.to_s
|
165
171
|
report_line_breaks(line)
|
166
172
|
parse_contents(input, line)
|
167
|
-
@lambda
|
173
|
+
@lambda&.call(self)
|
168
174
|
rescue ArgumentError => ae
|
169
175
|
build_errors(:invalid_encoding, :structure, @current_line, nil, index) unless @reported_invalid_encoding
|
170
176
|
@reported_invalid_encoding = true
|
@@ -179,9 +185,9 @@ module Csvlint
|
|
179
185
|
@csv_options[:encoding] = @encoding
|
180
186
|
|
181
187
|
begin
|
182
|
-
row = LineCSV.parse_line(stream,
|
188
|
+
row = LineCSV.parse_line(stream, **@csv_options)
|
183
189
|
rescue LineCSV::MalformedCSVError => e
|
184
|
-
build_exception_messages(e, stream, current_line)
|
190
|
+
build_exception_messages(e, stream, current_line) unless e.message.include?("UTF") && @reported_invalid_encoding
|
185
191
|
end
|
186
192
|
|
187
193
|
if row
|
@@ -201,8 +207,8 @@ module Csvlint
|
|
201
207
|
@errors += @schema.errors
|
202
208
|
all_errors += @schema.errors
|
203
209
|
@warnings += @schema.warnings
|
204
|
-
|
205
|
-
build_errors(:ragged_rows, :structure, current_line, nil, stream.to_s)
|
210
|
+
elsif !row.empty? && row.size != @expected_columns
|
211
|
+
build_errors(:ragged_rows, :structure, current_line, nil, stream.to_s)
|
206
212
|
end
|
207
213
|
end
|
208
214
|
end
|
@@ -225,8 +231,8 @@ module Csvlint
|
|
225
231
|
def validate_metadata
|
226
232
|
assumed_header = !@supplied_dialect
|
227
233
|
unless @headers.empty?
|
228
|
-
if @headers["content-type"]
|
229
|
-
@csv_header
|
234
|
+
if /text\/csv/.match?(@headers["content-type"])
|
235
|
+
@csv_header &&= true
|
230
236
|
assumed_header = @assumed_header.present?
|
231
237
|
end
|
232
238
|
if @headers["content-type"] =~ /header=(present|absent)/
|
@@ -234,23 +240,39 @@ module Csvlint
|
|
234
240
|
@csv_header = false if $1 == "absent"
|
235
241
|
assumed_header = false
|
236
242
|
end
|
237
|
-
build_warnings(:no_content_type, :context) if @content_type
|
238
|
-
build_errors(:wrong_content_type, :context) unless
|
243
|
+
build_warnings(:no_content_type, :context) if @content_type.nil?
|
244
|
+
build_errors(:wrong_content_type, :context) unless @content_type && @content_type =~ /text\/csv/
|
239
245
|
end
|
240
246
|
@header_processed = true
|
241
247
|
build_info_messages(:assumed_header, :structure) if assumed_header
|
242
248
|
|
243
|
-
@link_headers =
|
244
|
-
|
249
|
+
@link_headers = begin
|
250
|
+
@headers["link"].split(",")
|
251
|
+
rescue
|
252
|
+
nil
|
253
|
+
end
|
254
|
+
@link_headers&.each do |link_header|
|
245
255
|
match = LINK_HEADER_REGEXP.match(link_header)
|
246
|
-
uri =
|
247
|
-
|
256
|
+
uri = begin
|
257
|
+
match["uri"].gsub(/(^<|>$)/, "")
|
258
|
+
rescue
|
259
|
+
nil
|
260
|
+
end
|
261
|
+
rel = begin
|
262
|
+
match["rel-relationship"].gsub(/(^"|"$)/, "")
|
263
|
+
rescue
|
264
|
+
nil
|
265
|
+
end
|
248
266
|
param = match["param"]
|
249
|
-
param_value =
|
267
|
+
param_value = begin
|
268
|
+
match["param-value"].gsub(/(^"|"$)/, "")
|
269
|
+
rescue
|
270
|
+
nil
|
271
|
+
end
|
250
272
|
if rel == "describedby" && param == "type" && ["application/csvm+json", "application/ld+json", "application/json"].include?(param_value)
|
251
273
|
begin
|
252
274
|
url = URI.join(@source_url, uri)
|
253
|
-
schema = Schema.
|
275
|
+
schema = Schema.load_from_uri(url)
|
254
276
|
if schema.instance_of? Csvlint::Csvw::TableGroup
|
255
277
|
if schema.tables[@source_url]
|
256
278
|
@schema = schema
|
@@ -262,14 +284,14 @@ module Csvlint
|
|
262
284
|
rescue OpenURI::HTTPError
|
263
285
|
end
|
264
286
|
end
|
265
|
-
end
|
287
|
+
end
|
266
288
|
end
|
267
289
|
|
268
290
|
def header?
|
269
291
|
@csv_header && @dialect["header"]
|
270
292
|
end
|
271
293
|
|
272
|
-
def report_line_breaks(line_no=nil)
|
294
|
+
def report_line_breaks(line_no = nil)
|
273
295
|
return unless @input[-1, 1].include?("\n") # Return straight away if there's no newline character - i.e. we're on the last line
|
274
296
|
line_break = get_line_break(@input)
|
275
297
|
@line_breaks << line_break
|
@@ -295,24 +317,24 @@ module Csvlint
|
|
295
317
|
schema_dialect = {}
|
296
318
|
end
|
297
319
|
@dialect = {
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
320
|
+
"header" => true,
|
321
|
+
"headerRowCount" => 1,
|
322
|
+
"delimiter" => ",",
|
323
|
+
"skipInitialSpace" => true,
|
324
|
+
"lineTerminator" => :auto,
|
325
|
+
"quoteChar" => '"',
|
326
|
+
"trim" => :true
|
305
327
|
}.merge(schema_dialect).merge(@dialect || {})
|
306
328
|
|
307
|
-
@csv_header
|
329
|
+
@csv_header &&= @dialect["header"]
|
308
330
|
@csv_options = dialect_to_csv_options(@dialect)
|
309
331
|
end
|
310
332
|
|
311
333
|
def validate_encoding
|
312
334
|
if @headers["content-type"]
|
313
|
-
if @headers["content-type"]
|
335
|
+
if !/charset=/.match?(@headers["content-type"])
|
314
336
|
build_warnings(:no_encoding, :context)
|
315
|
-
elsif @headers["content-type"]
|
337
|
+
elsif !/charset=utf-8/i.match?(@headers["content-type"])
|
316
338
|
build_warnings(:encoding, :context)
|
317
339
|
end
|
318
340
|
end
|
@@ -336,10 +358,10 @@ module Csvlint
|
|
336
358
|
end
|
337
359
|
|
338
360
|
def build_exception_messages(csvException, errChars, lineNo)
|
339
|
-
#TODO 1 - this is a change in logic, rather than straight refactor of previous error building, however original logic is bonkers
|
340
|
-
#TODO 2 - using .kind_of? is a very ugly fix here and it meant to work around instances where :auto symbol is preserved in @csv_options
|
361
|
+
# TODO 1 - this is a change in logic, rather than straight refactor of previous error building, however original logic is bonkers
|
362
|
+
# TODO 2 - using .kind_of? is a very ugly fix here and it meant to work around instances where :auto symbol is preserved in @csv_options
|
341
363
|
type = fetch_error(csvException)
|
342
|
-
if !@csv_options[:row_sep].
|
364
|
+
if !@csv_options[:row_sep].is_a?(Symbol) && [:unclosed_quote, :stray_quote].include?(type) && !@input.match(@csv_options[:row_sep])
|
343
365
|
build_linebreak_error
|
344
366
|
else
|
345
367
|
build_errors(type, :structure, lineNo, nil, errChars)
|
@@ -352,11 +374,11 @@ module Csvlint
|
|
352
374
|
|
353
375
|
def validate_header(header)
|
354
376
|
names = Set.new
|
355
|
-
header.map{|h| h.strip! } if @dialect["trim"] == :true
|
356
|
-
header.each_with_index do |name,i|
|
357
|
-
build_warnings(:empty_column_name, :schema, nil, i+1) if name == ""
|
377
|
+
header.map { |h| h.strip! } if @dialect["trim"] == :true
|
378
|
+
header.each_with_index do |name, i|
|
379
|
+
build_warnings(:empty_column_name, :schema, nil, i + 1) if name == ""
|
358
380
|
if names.include?(name)
|
359
|
-
build_warnings(:duplicate_column_name, :schema, nil, i+1)
|
381
|
+
build_warnings(:duplicate_column_name, :schema, nil, i + 1)
|
360
382
|
else
|
361
383
|
names << name
|
362
384
|
end
|
@@ -366,24 +388,28 @@ module Csvlint
|
|
366
388
|
@errors += @schema.errors
|
367
389
|
@warnings += @schema.warnings
|
368
390
|
end
|
369
|
-
|
391
|
+
valid?
|
370
392
|
end
|
371
393
|
|
372
394
|
def fetch_error(error)
|
373
395
|
e = error.message.match(/^(.+?)(?: [io]n)? \(?line \d+\)?\.?$/i)
|
374
|
-
message =
|
396
|
+
message = begin
|
397
|
+
e[1]
|
398
|
+
rescue
|
399
|
+
nil
|
400
|
+
end
|
375
401
|
ERROR_MATCHERS.fetch(message, :unknown_error)
|
376
402
|
end
|
377
403
|
|
378
404
|
def dialect_to_csv_options(dialect)
|
379
405
|
skipinitialspace = dialect["skipInitialSpace"] || true
|
380
406
|
delimiter = dialect["delimiter"]
|
381
|
-
delimiter
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
407
|
+
delimiter += " " if !skipinitialspace
|
408
|
+
{
|
409
|
+
col_sep: delimiter,
|
410
|
+
row_sep: dialect["lineTerminator"],
|
411
|
+
quote_char: dialect["quoteChar"],
|
412
|
+
skip_blanks: false
|
387
413
|
}
|
388
414
|
end
|
389
415
|
|
@@ -393,25 +419,25 @@ module Csvlint
|
|
393
419
|
@formats[i] ||= Hash.new(0)
|
394
420
|
|
395
421
|
format =
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
422
|
+
if col.strip[FORMATS[:numeric]]
|
423
|
+
:numeric
|
424
|
+
elsif uri?(col)
|
425
|
+
:uri
|
426
|
+
elsif possible_date?(col)
|
427
|
+
date_formats(col)
|
428
|
+
else
|
429
|
+
:string
|
430
|
+
end
|
405
431
|
|
406
432
|
@formats[i][format] += 1
|
407
433
|
end
|
408
434
|
end
|
409
435
|
|
410
436
|
def check_consistency
|
411
|
-
@formats.each_with_index do |format,i|
|
437
|
+
@formats.each_with_index do |format, i|
|
412
438
|
if format
|
413
439
|
total = format.values.reduce(:+).to_f
|
414
|
-
if format.none?{|_,count| count / total >= 0.9}
|
440
|
+
if format.none? { |_, count| count / total >= 0.9 }
|
415
441
|
build_warnings(:inconsistent_values, :schema, nil, i + 1)
|
416
442
|
end
|
417
443
|
end
|
@@ -427,16 +453,16 @@ module Csvlint
|
|
427
453
|
end
|
428
454
|
|
429
455
|
def locate_schema
|
430
|
-
|
431
456
|
@source_url = nil
|
432
457
|
warn_if_unsuccessful = false
|
433
458
|
case @source
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
459
|
+
when StringIO
|
460
|
+
return
|
461
|
+
when File
|
462
|
+
uri_parser = URI::DEFAULT_PARSER
|
463
|
+
@source_url = "file:#{uri_parser.escape(File.expand_path(@source))}"
|
464
|
+
else
|
465
|
+
@source_url = @source
|
440
466
|
end
|
441
467
|
unless @schema.nil?
|
442
468
|
if @schema.tables[@source_url]
|
@@ -446,35 +472,33 @@ module Csvlint
|
|
446
472
|
end
|
447
473
|
end
|
448
474
|
paths = []
|
449
|
-
if
|
475
|
+
if /^http(s)?/.match?(@source_url)
|
450
476
|
begin
|
451
477
|
well_known_uri = URI.join(@source_url, "/.well-known/csvm")
|
452
|
-
paths = open(well_known_uri).read.split("\n")
|
478
|
+
paths = URI.open(well_known_uri.to_s).read.split("\n")
|
453
479
|
rescue OpenURI::HTTPError, URI::BadURIError
|
454
480
|
end
|
455
481
|
end
|
456
482
|
paths = ["{+url}-metadata.json", "csv-metadata.json"] if paths.empty?
|
457
483
|
paths.each do |template|
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
if schema.
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
build_warnings(:schema_mismatch, :context, nil, nil, @source_url, schema)
|
471
|
-
end
|
484
|
+
template = URITemplate.new(template)
|
485
|
+
path = template.expand("url" => @source_url)
|
486
|
+
url = URI.join(@source_url, path)
|
487
|
+
url = File.new(url.to_s.sub(/^file:/, "")) if /^file:/.match?(url.to_s)
|
488
|
+
schema = Schema.load_from_uri(url)
|
489
|
+
if schema.instance_of? Csvlint::Csvw::TableGroup
|
490
|
+
if schema.tables[@source_url]
|
491
|
+
@schema = schema
|
492
|
+
return
|
493
|
+
else
|
494
|
+
warn_if_unsuccessful = true
|
495
|
+
build_warnings(:schema_mismatch, :context, nil, nil, @source_url, schema)
|
472
496
|
end
|
473
|
-
rescue Errno::ENOENT
|
474
|
-
rescue OpenURI::HTTPError, URI::BadURIError, ArgumentError
|
475
|
-
rescue => e
|
476
|
-
raise e
|
477
497
|
end
|
498
|
+
rescue Errno::ENOENT
|
499
|
+
rescue OpenURI::HTTPError, URI::BadURIError, ArgumentError
|
500
|
+
rescue => e
|
501
|
+
raise e
|
478
502
|
end
|
479
503
|
build_warnings(:schema_mismatch, :context, nil, nil, @source_url, schema) if warn_if_unsuccessful
|
480
504
|
@schema = nil
|
@@ -483,31 +507,30 @@ module Csvlint
|
|
483
507
|
private
|
484
508
|
|
485
509
|
def parse_extension(source)
|
486
|
-
|
487
510
|
case source
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
511
|
+
when File
|
512
|
+
File.extname(source.path)
|
513
|
+
when IO
|
514
|
+
""
|
515
|
+
when StringIO
|
516
|
+
""
|
517
|
+
when Tempfile
|
518
|
+
# this is triggered when the revalidate dialect use case happens
|
519
|
+
""
|
520
|
+
else
|
521
|
+
begin
|
522
|
+
parsed = URI.parse(source)
|
523
|
+
File.extname(parsed.path)
|
524
|
+
rescue URI::InvalidURIError
|
525
|
+
""
|
526
|
+
end
|
504
527
|
end
|
505
528
|
end
|
506
529
|
|
507
530
|
def uri?(value)
|
508
531
|
if value.strip[FORMATS[:uri]]
|
509
532
|
uri = URI.parse(value)
|
510
|
-
uri.
|
533
|
+
uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
511
534
|
end
|
512
535
|
rescue URI::InvalidURIError
|
513
536
|
false
|
@@ -518,25 +541,25 @@ module Csvlint
|
|
518
541
|
end
|
519
542
|
|
520
543
|
def date_formats(col)
|
521
|
-
if col[FORMATS[:date_db]] && date_format?(Date, col,
|
544
|
+
if col[FORMATS[:date_db]] && date_format?(Date, col, "%Y-%m-%d")
|
522
545
|
:date_db
|
523
|
-
elsif col[FORMATS[:date_short]] && date_format?(Date, col,
|
546
|
+
elsif col[FORMATS[:date_short]] && date_format?(Date, col, "%e %b")
|
524
547
|
:date_short
|
525
|
-
elsif col[FORMATS[:date_rfc822]] && date_format?(Date, col,
|
548
|
+
elsif col[FORMATS[:date_rfc822]] && date_format?(Date, col, "%e %b %Y")
|
526
549
|
:date_rfc822
|
527
|
-
elsif col[FORMATS[:date_long]] && date_format?(Date, col,
|
550
|
+
elsif col[FORMATS[:date_long]] && date_format?(Date, col, "%B %e, %Y")
|
528
551
|
:date_long
|
529
|
-
elsif col[FORMATS[:dateTime_time]] && date_format?(Time, col,
|
552
|
+
elsif col[FORMATS[:dateTime_time]] && date_format?(Time, col, "%H:%M")
|
530
553
|
:dateTime_time
|
531
|
-
elsif col[FORMATS[:dateTime_hms]] && date_format?(Time, col,
|
554
|
+
elsif col[FORMATS[:dateTime_hms]] && date_format?(Time, col, "%H:%M:%S")
|
532
555
|
:dateTime_hms
|
533
|
-
elsif col[FORMATS[:dateTime_db]] && date_format?(Time, col,
|
556
|
+
elsif col[FORMATS[:dateTime_db]] && date_format?(Time, col, "%Y-%m-%d %H:%M:%S")
|
534
557
|
:dateTime_db
|
535
|
-
elsif col[FORMATS[:dateTime_iso8601]] && date_format?(Time, col,
|
558
|
+
elsif col[FORMATS[:dateTime_iso8601]] && date_format?(Time, col, "%Y-%m-%dT%H:%M:%SZ")
|
536
559
|
:dateTime_iso8601
|
537
|
-
elsif col[FORMATS[:dateTime_short]] && date_format?(Time, col,
|
560
|
+
elsif col[FORMATS[:dateTime_short]] && date_format?(Time, col, "%d %b %H:%M")
|
538
561
|
:dateTime_short
|
539
|
-
elsif col[FORMATS[:dateTime_long]] && date_format?(Time, col,
|
562
|
+
elsif col[FORMATS[:dateTime_long]] && date_format?(Time, col, "%B %d, %Y %H:%M")
|
540
563
|
:dateTime_long
|
541
564
|
else
|
542
565
|
:string
|
@@ -563,25 +586,25 @@ module Csvlint
|
|
563
586
|
end
|
564
587
|
|
565
588
|
FORMATS = {
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
589
|
+
string: nil,
|
590
|
+
numeric: /\A[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?\z/,
|
591
|
+
uri: /\Ahttps?:/,
|
592
|
+
date_db: /\A\d{4,}-\d\d-\d\d\z/, # "12345-01-01"
|
593
|
+
date_long: /\A(?:#{Date::MONTHNAMES.join('|')}) [ \d]\d, \d{4,}\z/, # "January 1, 12345"
|
594
|
+
date_rfc822: /\A[ \d]\d (?:#{Date::ABBR_MONTHNAMES.join('|')}) \d{4,}\z/, # " 1 Jan 12345"
|
595
|
+
date_short: /\A[ \d]\d (?:#{Date::ABBR_MONTHNAMES.join('|')})\z/, # "1 Jan"
|
596
|
+
dateTime_db: /\A\d{4,}-\d\d-\d\d \d\d:\d\d:\d\d\z/, # "12345-01-01 00:00:00"
|
597
|
+
dateTime_hms: /\A\d\d:\d\d:\d\d\z/, # "00:00:00"
|
598
|
+
dateTime_iso8601: /\A\d{4,}-\d\d-\d\dT\d\d:\d\d:\d\dZ\z/, # "12345-01-01T00:00:00Z"
|
599
|
+
dateTime_long: /\A(?:#{Date::MONTHNAMES.join('|')}) \d\d, \d{4,} \d\d:\d\d\z/, # "January 01, 12345 00:00"
|
600
|
+
dateTime_short: /\A\d\d (?:#{Date::ABBR_MONTHNAMES.join('|')}) \d\d:\d\d\z/, # "01 Jan 00:00"
|
601
|
+
dateTime_time: /\A\d\d:\d\d\z/ # "00:00"
|
579
602
|
}.freeze
|
580
603
|
|
581
604
|
URI_REGEXP = /(?<uri>.*?)/
|
582
|
-
TOKEN_REGEXP = /([
|
605
|
+
TOKEN_REGEXP = /([^()<>@,;:\\"\/\[\]?={} \t]+)/
|
583
606
|
QUOTED_STRING_REGEXP = /("[^"]*")/
|
584
|
-
SGML_NAME_REGEXP = /([A-Za-z][-A-Za-z0-9
|
607
|
+
SGML_NAME_REGEXP = /([A-Za-z][-A-Za-z0-9.]*)/
|
585
608
|
RELATIONSHIP_REGEXP = Regexp.new("(?<relationship>#{SGML_NAME_REGEXP}|(\"#{SGML_NAME_REGEXP}(\\s+#{SGML_NAME_REGEXP})*\"))")
|
586
609
|
REL_REGEXP = Regexp.new("(?<rel>\\s*rel\\s*=\\s*(?<rel-relationship>#{RELATIONSHIP_REGEXP}))")
|
587
610
|
REV_REGEXP = Regexp.new("(?<rev>\\s*rev\\s*=\\s*#{RELATIONSHIP_REGEXP})")
|
@@ -589,8 +612,7 @@ module Csvlint
|
|
589
612
|
ANCHOR_REGEXP = Regexp.new("(?<anchor>\\s*anchor\\s*=\\s*\\<#{URI_REGEXP}\\>)")
|
590
613
|
LINK_EXTENSION_REGEXP = Regexp.new("(?<link-extension>(?<param>#{TOKEN_REGEXP})(\\s*=\\s*(?<param-value>#{TOKEN_REGEXP}|#{QUOTED_STRING_REGEXP}))?)")
|
591
614
|
LINK_PARAM_REGEXP = Regexp.new("(#{REL_REGEXP}|#{REV_REGEXP}|#{TITLE_REGEXP}|#{ANCHOR_REGEXP}|#{LINK_EXTENSION_REGEXP})")
|
592
|
-
LINK_HEADER_REGEXP = Regexp.new("
|
593
|
-
POSSIBLE_DATE_REGEXP = Regexp.new("\\A(\\d|\\s\\d#{Date::ABBR_MONTHNAMES.join(
|
594
|
-
|
615
|
+
LINK_HEADER_REGEXP = Regexp.new("<#{URI_REGEXP}>(\\s*;\\s*#{LINK_PARAM_REGEXP})*")
|
616
|
+
POSSIBLE_DATE_REGEXP = Regexp.new("\\A(\\d|\\s\\d#{Date::ABBR_MONTHNAMES.join("|")}#{Date::MONTHNAMES.join("|")})")
|
595
617
|
end
|
596
618
|
end
|
data/lib/csvlint/version.rb
CHANGED
data/lib/csvlint.rb
CHANGED
@@ -1,27 +1,26 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require 'typhoeus'
|
1
|
+
require "csv"
|
2
|
+
require "date"
|
3
|
+
require "open-uri"
|
4
|
+
require "tempfile"
|
5
|
+
require "typhoeus"
|
7
6
|
|
8
|
-
require
|
9
|
-
require
|
10
|
-
require
|
11
|
-
require
|
12
|
-
require
|
7
|
+
require "active_support/core_ext/date/conversions"
|
8
|
+
require "active_support/core_ext/time/conversions"
|
9
|
+
require "active_support/core_ext/object"
|
10
|
+
require "open_uri_redirections"
|
11
|
+
require "uri_template"
|
13
12
|
|
14
|
-
require
|
15
|
-
require
|
16
|
-
require
|
17
|
-
require
|
13
|
+
require "csvlint/error_message"
|
14
|
+
require "csvlint/error_collector"
|
15
|
+
require "csvlint/validate"
|
16
|
+
require "csvlint/field"
|
18
17
|
|
19
|
-
require
|
20
|
-
require
|
21
|
-
require
|
22
|
-
require
|
23
|
-
require
|
24
|
-
require
|
25
|
-
require
|
18
|
+
require "csvlint/csvw/metadata_error"
|
19
|
+
require "csvlint/csvw/number_format"
|
20
|
+
require "csvlint/csvw/date_format"
|
21
|
+
require "csvlint/csvw/property_checker"
|
22
|
+
require "csvlint/csvw/column"
|
23
|
+
require "csvlint/csvw/table"
|
24
|
+
require "csvlint/csvw/table_group"
|
26
25
|
|
27
|
-
require
|
26
|
+
require "csvlint/schema"
|