wjordan213-csvlint 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitattributes +2 -0
- data/.gitignore +28 -0
- data/.ruby-version +1 -0
- data/.travis.yml +32 -0
- data/CHANGELOG.md +361 -0
- data/Gemfile +7 -0
- data/LICENSE.md +22 -0
- data/README.md +328 -0
- data/Rakefile +17 -0
- data/bin/create_schema +32 -0
- data/bin/csvlint +10 -0
- data/features/check_format.feature +46 -0
- data/features/cli.feature +210 -0
- data/features/csv_options.feature +35 -0
- data/features/csvupload.feature +145 -0
- data/features/csvw_schema_validation.feature +127 -0
- data/features/fixtures/cr-line-endings.csv +0 -0
- data/features/fixtures/crlf-line-endings.csv +0 -0
- data/features/fixtures/inconsistent-line-endings-unquoted.csv +0 -0
- data/features/fixtures/inconsistent-line-endings.csv +0 -0
- data/features/fixtures/invalid-byte-sequence.csv +0 -0
- data/features/fixtures/invalid_many_rows.csv +0 -0
- data/features/fixtures/lf-line-endings.csv +0 -0
- data/features/fixtures/spreadsheet.xls +0 -0
- data/features/fixtures/spreadsheet.xlsx +0 -0
- data/features/fixtures/title-row.csv +0 -0
- data/features/fixtures/valid.csv +0 -0
- data/features/fixtures/valid_many_rows.csv +0 -0
- data/features/fixtures/windows-line-endings.csv +0 -0
- data/features/information.feature +22 -0
- data/features/parse_csv.feature +90 -0
- data/features/schema_validation.feature +105 -0
- data/features/sources.feature +17 -0
- data/features/step_definitions/cli_steps.rb +11 -0
- data/features/step_definitions/csv_options_steps.rb +24 -0
- data/features/step_definitions/information_steps.rb +13 -0
- data/features/step_definitions/parse_csv_steps.rb +42 -0
- data/features/step_definitions/schema_validation_steps.rb +33 -0
- data/features/step_definitions/sources_steps.rb +7 -0
- data/features/step_definitions/validation_errors_steps.rb +90 -0
- data/features/step_definitions/validation_info_steps.rb +22 -0
- data/features/step_definitions/validation_warnings_steps.rb +60 -0
- data/features/support/aruba.rb +56 -0
- data/features/support/env.rb +26 -0
- data/features/support/load_tests.rb +114 -0
- data/features/support/webmock.rb +1 -0
- data/features/validation_errors.feature +147 -0
- data/features/validation_info.feature +16 -0
- data/features/validation_warnings.feature +86 -0
- data/lib/csvlint.rb +27 -0
- data/lib/csvlint/cli.rb +165 -0
- data/lib/csvlint/csvw/column.rb +359 -0
- data/lib/csvlint/csvw/date_format.rb +182 -0
- data/lib/csvlint/csvw/metadata_error.rb +13 -0
- data/lib/csvlint/csvw/number_format.rb +211 -0
- data/lib/csvlint/csvw/property_checker.rb +761 -0
- data/lib/csvlint/csvw/table.rb +204 -0
- data/lib/csvlint/csvw/table_group.rb +165 -0
- data/lib/csvlint/error_collector.rb +27 -0
- data/lib/csvlint/error_message.rb +15 -0
- data/lib/csvlint/field.rb +196 -0
- data/lib/csvlint/schema.rb +92 -0
- data/lib/csvlint/validate.rb +599 -0
- data/lib/csvlint/version.rb +3 -0
- data/spec/csvw/column_spec.rb +112 -0
- data/spec/csvw/date_format_spec.rb +49 -0
- data/spec/csvw/number_format_spec.rb +417 -0
- data/spec/csvw/table_group_spec.rb +143 -0
- data/spec/csvw/table_spec.rb +90 -0
- data/spec/field_spec.rb +252 -0
- data/spec/schema_spec.rb +211 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/validator_spec.rb +619 -0
- data/wjordan213_csvlint.gemspec +46 -0
- metadata +490 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
Feature: Validation warnings
|
2
|
+
|
3
|
+
Scenario: UTF-8 Encoding
|
4
|
+
Given I have a CSV with the following content:
|
5
|
+
"""
|
6
|
+
"col1","col2","col3"
|
7
|
+
"abc","2","3"
|
8
|
+
"""
|
9
|
+
And it is encoded as "utf-8"
|
10
|
+
And it is stored at the url "http://example.com/example1.csv"
|
11
|
+
When I ask if there are warnings
|
12
|
+
Then there should be 0 warnings
|
13
|
+
|
14
|
+
Scenario: ISO-8859-1 Encoding
|
15
|
+
Given I have a CSV with the following content:
|
16
|
+
"""
|
17
|
+
"col1","col2","col3"
|
18
|
+
"1","2","3"
|
19
|
+
"""
|
20
|
+
And it is encoded as "iso-8859-1"
|
21
|
+
And it is stored at the url "http://example.com/example1.csv"
|
22
|
+
When I ask if there are warnings
|
23
|
+
Then there should be 1 warnings
|
24
|
+
|
25
|
+
Scenario: Correct content type
|
26
|
+
Given I have a CSV with the following content:
|
27
|
+
"""
|
28
|
+
"col1","col2","col3"
|
29
|
+
"abc","2","3"
|
30
|
+
"""
|
31
|
+
And the content type is set to "text/csv"
|
32
|
+
And it is stored at the url "http://example.com/example1.csv"
|
33
|
+
And I ask if there are warnings
|
34
|
+
Then there should be 0 warnings
|
35
|
+
|
36
|
+
Scenario: No extension
|
37
|
+
Given I have a CSV with the following content:
|
38
|
+
"""
|
39
|
+
"col1","col2","col3"
|
40
|
+
"abc","2","3"
|
41
|
+
"""
|
42
|
+
And the content type is set to "text/csv"
|
43
|
+
And it is stored at the url "http://example.com/example1"
|
44
|
+
And I ask if there are warnings
|
45
|
+
Then there should be 0 warnings
|
46
|
+
|
47
|
+
Scenario: Allow query params after extension
|
48
|
+
Given I have a CSV with the following content:
|
49
|
+
"""
|
50
|
+
"col1","col2","col3"
|
51
|
+
"abc","2","3"
|
52
|
+
"""
|
53
|
+
And the content type is set to "text/csv"
|
54
|
+
And it is stored at the url "http://example.com/example1.csv?query=param"
|
55
|
+
And I ask if there are warnings
|
56
|
+
Then there should be 0 warnings
|
57
|
+
|
58
|
+
Scenario: User doesn't supply encoding
|
59
|
+
Given I have a CSV with the following content:
|
60
|
+
"""
|
61
|
+
"col1","col2","col3"
|
62
|
+
"abc","2","3"
|
63
|
+
"""
|
64
|
+
And it is stored at the url "http://example.com/example1.csv" with no character set
|
65
|
+
When I ask if there are warnings
|
66
|
+
Then there should be 1 warnings
|
67
|
+
And that warning should have the type "no_encoding"
|
68
|
+
|
69
|
+
Scenario: Title rows
|
70
|
+
Given I have a CSV file called "title-row.csv"
|
71
|
+
And it is stored at the url "http://example.com/example1.csv"
|
72
|
+
And I ask if there are warnings
|
73
|
+
Then there should be 1 warnings
|
74
|
+
And that warning should have the type "title_row"
|
75
|
+
|
76
|
+
Scenario: catch excel warnings
|
77
|
+
Given I parse a file called "spreadsheet.xls"
|
78
|
+
And I ask if there are warnings
|
79
|
+
Then there should be 1 warnings
|
80
|
+
And that warning should have the type "excel"
|
81
|
+
|
82
|
+
Scenario: catch excel warnings
|
83
|
+
Given I parse a file called "spreadsheet.xlsx"
|
84
|
+
And I ask if there are warnings
|
85
|
+
Then there should be 1 warnings
|
86
|
+
And that warning should have the type "excel"
|
data/lib/csvlint.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'date'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'set'
|
5
|
+
require 'tempfile'
|
6
|
+
require 'typhoeus'
|
7
|
+
|
8
|
+
require 'active_support/core_ext/date/conversions'
|
9
|
+
require 'active_support/core_ext/time/conversions'
|
10
|
+
require 'mime/types'
|
11
|
+
require 'open_uri_redirections'
|
12
|
+
require 'uri_template'
|
13
|
+
|
14
|
+
require 'csvlint/error_message'
|
15
|
+
require 'csvlint/error_collector'
|
16
|
+
require 'csvlint/validate'
|
17
|
+
require 'csvlint/field'
|
18
|
+
|
19
|
+
require 'csvlint/csvw/metadata_error'
|
20
|
+
require 'csvlint/csvw/number_format'
|
21
|
+
require 'csvlint/csvw/date_format'
|
22
|
+
require 'csvlint/csvw/property_checker'
|
23
|
+
require 'csvlint/csvw/column'
|
24
|
+
require 'csvlint/csvw/table'
|
25
|
+
require 'csvlint/csvw/table_group'
|
26
|
+
|
27
|
+
require 'csvlint/schema'
|
data/lib/csvlint/cli.rb
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
require 'csvlint'
|
2
|
+
require 'colorize'
|
3
|
+
require 'json'
|
4
|
+
require 'pp'
|
5
|
+
require 'thor'
|
6
|
+
|
7
|
+
module Csvlint
|
8
|
+
class Cli < Thor
|
9
|
+
|
10
|
+
desc "myfile.csv OR csvlint http://example.com/myfile.csv", "Supports validating CSV files to check their syntax and contents"
|
11
|
+
option :dump_errors, desc: "Pretty print error and warning objects.", type: :boolean, aliases: :d
|
12
|
+
option :schema, banner: "FILENAME OR URL", desc: "Schema file", aliases: :s
|
13
|
+
def validate(source = nil)
|
14
|
+
source = read_source(source)
|
15
|
+
@schema = get_schema(options[:schema]) if options[:schema]
|
16
|
+
fetch_schema_tables(@schema, options) if source.nil?
|
17
|
+
|
18
|
+
valid = validate_csv(source, @schema, options[:dump])
|
19
|
+
exit 1 unless valid
|
20
|
+
end
|
21
|
+
|
22
|
+
def help
|
23
|
+
self.class.command_help(shell, :validate)
|
24
|
+
end
|
25
|
+
|
26
|
+
default_task :validate
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def read_source(source)
|
31
|
+
if source.nil?
|
32
|
+
# If no source is present, try reading from stdin
|
33
|
+
if !$stdin.tty?
|
34
|
+
source = StringIO.new(STDIN.read) rescue nil
|
35
|
+
return_error "No CSV data to validate" if !options[:schema] && source.nil?
|
36
|
+
end
|
37
|
+
else
|
38
|
+
# If the source isn't a URL, it's a file
|
39
|
+
unless source =~ /^http(s)?/
|
40
|
+
begin
|
41
|
+
source = File.new( source )
|
42
|
+
rescue Errno::ENOENT
|
43
|
+
return_error "#{source} not found"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
source
|
49
|
+
end
|
50
|
+
|
51
|
+
def get_schema(schema)
|
52
|
+
begin
|
53
|
+
schema = Csvlint::Schema.load_from_json(schema, false)
|
54
|
+
rescue Csvlint::Csvw::MetadataError => e
|
55
|
+
return_error "invalid metadata: #{e.message}#{" at " + e.path if e.path}"
|
56
|
+
rescue OpenURI::HTTPError, Errno::ENOENT
|
57
|
+
return_error "#{options[:schema]} not found"
|
58
|
+
end
|
59
|
+
|
60
|
+
if schema.class == Csvlint::Schema && schema.description == "malformed"
|
61
|
+
return_error "invalid metadata: malformed JSON"
|
62
|
+
end
|
63
|
+
|
64
|
+
schema
|
65
|
+
end
|
66
|
+
|
67
|
+
def fetch_schema_tables(schema, options)
|
68
|
+
valid = true
|
69
|
+
|
70
|
+
unless schema.instance_of? Csvlint::Csvw::TableGroup
|
71
|
+
return_error "No CSV data to validate."
|
72
|
+
end
|
73
|
+
schema.tables.keys.each do |source|
|
74
|
+
begin
|
75
|
+
source = source.sub("file:","")
|
76
|
+
source = File.new( source )
|
77
|
+
rescue Errno::ENOENT
|
78
|
+
return_error "#{source} not found"
|
79
|
+
end unless source =~ /^http(s)?/
|
80
|
+
valid &= validate_csv(source, schema, options[:dump])
|
81
|
+
end
|
82
|
+
|
83
|
+
exit 1 unless valid
|
84
|
+
end
|
85
|
+
|
86
|
+
def print_error(index, error, dump, color)
|
87
|
+
location = ""
|
88
|
+
location += error.row.to_s if error.row
|
89
|
+
location += "#{error.row ? "," : ""}#{error.column.to_s}" if error.column
|
90
|
+
if error.row || error.column
|
91
|
+
location = "#{error.row ? "Row" : "Column"}: #{location}"
|
92
|
+
end
|
93
|
+
output_string = "#{index+1}. "
|
94
|
+
if error.column && @schema && @schema.class == Csvlint::Schema
|
95
|
+
output_string += "#{@schema.fields[error.column - 1].name}: "
|
96
|
+
end
|
97
|
+
output_string += "#{error.type}"
|
98
|
+
output_string += ". #{location}" unless location.empty?
|
99
|
+
output_string += ". #{error.content}" if error.content
|
100
|
+
|
101
|
+
if $stdout.tty?
|
102
|
+
puts output_string.colorize(color)
|
103
|
+
else
|
104
|
+
puts output_string
|
105
|
+
end
|
106
|
+
|
107
|
+
if dump
|
108
|
+
pp error
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def print_errors(errors, dump)
|
113
|
+
if errors.size > 0
|
114
|
+
errors.each_with_index { |error, i| print_error(i, error, dump, :red) }
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def return_error(message)
|
119
|
+
if $stdout.tty?
|
120
|
+
puts message.colorize(:red)
|
121
|
+
else
|
122
|
+
puts message
|
123
|
+
end
|
124
|
+
exit 1
|
125
|
+
end
|
126
|
+
|
127
|
+
def validate_csv(source, schema, dump)
|
128
|
+
@error_count = 0
|
129
|
+
|
130
|
+
validator = Csvlint::Validator.new( source, {}, schema, { lambda: report_lines } )
|
131
|
+
|
132
|
+
if source.class == String
|
133
|
+
csv = source
|
134
|
+
elsif source.class == File
|
135
|
+
csv = source.path
|
136
|
+
else
|
137
|
+
csv = "CSV"
|
138
|
+
end
|
139
|
+
|
140
|
+
if $stdout.tty?
|
141
|
+
puts "\r\n#{csv} is #{validator.valid? ? "VALID".green : "INVALID".red}"
|
142
|
+
else
|
143
|
+
puts "\r\n#{csv} is #{validator.valid? ? "VALID" : "INVALID"}"
|
144
|
+
end
|
145
|
+
|
146
|
+
print_errors(validator.errors, dump)
|
147
|
+
print_errors(validator.warnings, dump)
|
148
|
+
|
149
|
+
return validator.valid?
|
150
|
+
end
|
151
|
+
|
152
|
+
def report_lines
|
153
|
+
lambda do |row|
|
154
|
+
new_errors = row.errors.count
|
155
|
+
if new_errors > @error_count
|
156
|
+
print "!".red
|
157
|
+
else
|
158
|
+
print ".".green
|
159
|
+
end
|
160
|
+
@error_count = new_errors
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
end
|
165
|
+
end
|
@@ -0,0 +1,359 @@
|
|
1
|
+
module Csvlint
|
2
|
+
module Csvw
|
3
|
+
class Column
|
4
|
+
include Csvlint::ErrorCollector
|
5
|
+
|
6
|
+
attr_reader :id, :about_url, :datatype, :default, :lang, :name, :null, :number, :ordered, :property_url, :required, :separator, :source_number, :suppress_output, :text_direction, :titles, :value_url, :virtual, :annotations
|
7
|
+
|
8
|
+
def initialize(number, name, id: nil, about_url: nil, datatype: { "@id" => "http://www.w3.org/2001/XMLSchema#string" }, default: "", lang: "und", null: [""], ordered: false, property_url: nil, required: false, separator: nil, source_number: nil, suppress_output: false, text_direction: :inherit, titles: {}, value_url: nil, virtual: false, annotations: [], warnings: [])
|
9
|
+
@number = number
|
10
|
+
@name = name
|
11
|
+
@id = id
|
12
|
+
@about_url = about_url
|
13
|
+
@datatype = datatype
|
14
|
+
@default = default
|
15
|
+
@lang = lang
|
16
|
+
@null = null
|
17
|
+
@ordered = ordered
|
18
|
+
@property_url = property_url
|
19
|
+
@required = required
|
20
|
+
@separator = separator
|
21
|
+
@source_number = source_number || number
|
22
|
+
@suppress_output = suppress_output
|
23
|
+
@text_direction = text_direction
|
24
|
+
@titles = titles
|
25
|
+
@value_url = value_url
|
26
|
+
@virtual = virtual
|
27
|
+
@annotations = annotations
|
28
|
+
reset
|
29
|
+
@warnings += warnings
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.from_json(number, column_desc, base_url=nil, lang="und", inherited_properties={})
|
33
|
+
annotations = {}
|
34
|
+
warnings = []
|
35
|
+
column_properties = {}
|
36
|
+
inherited_properties = inherited_properties.clone
|
37
|
+
|
38
|
+
column_desc.each do |property,value|
|
39
|
+
if property == "@type"
|
40
|
+
raise Csvlint::Csvw::MetadataError.new("columns[#{number}].@type"), "@type of column is not 'Column'" if value != 'Column'
|
41
|
+
else
|
42
|
+
v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang)
|
43
|
+
warnings += Array(warning).map{ |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty?
|
44
|
+
if type == :annotation
|
45
|
+
annotations[property] = v
|
46
|
+
elsif type == :common || type == :column
|
47
|
+
column_properties[property] = v
|
48
|
+
elsif type == :inherited
|
49
|
+
inherited_properties[property] = v
|
50
|
+
else
|
51
|
+
warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "column: #{property}", nil)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
return self.new(number, column_properties["name"],
|
57
|
+
id: column_properties["@id"],
|
58
|
+
datatype: inherited_properties["datatype"] || { "@id" => "http://www.w3.org/2001/XMLSchema#string" },
|
59
|
+
lang: inherited_properties["lang"] || "und",
|
60
|
+
null: inherited_properties["null"] || [""],
|
61
|
+
property_url: column_desc["propertyUrl"],
|
62
|
+
required: inherited_properties["required"] || false,
|
63
|
+
separator: inherited_properties["separator"],
|
64
|
+
titles: column_properties["titles"],
|
65
|
+
virtual: column_properties["virtual"] || false,
|
66
|
+
annotations: annotations,
|
67
|
+
warnings: warnings
|
68
|
+
)
|
69
|
+
end
|
70
|
+
|
71
|
+
def validate_header(header)
|
72
|
+
reset
|
73
|
+
valid_headers = @titles ? @titles.map{ |l,v| v if Column.languages_match(l, lang) }.flatten : []
|
74
|
+
build_errors(:invalid_header, :schema, 1, @number, header, @titles) unless valid_headers.include? header
|
75
|
+
return valid?
|
76
|
+
end
|
77
|
+
|
78
|
+
def validate(string_value, row=nil)
|
79
|
+
reset
|
80
|
+
values = parse(string_value || "", row)
|
81
|
+
# STDERR.puts "#{name} - #{string_value.inspect} - #{values.inspect}"
|
82
|
+
values.each do |value|
|
83
|
+
validate_required(value, row)
|
84
|
+
validate_format(value, row)
|
85
|
+
validate_length(value, row)
|
86
|
+
validate_value(value, row)
|
87
|
+
end unless values.nil?
|
88
|
+
validate_required(values, row) if values.nil?
|
89
|
+
return valid?
|
90
|
+
end
|
91
|
+
|
92
|
+
def parse(string_value, row=nil)
|
93
|
+
return nil if null.include? string_value
|
94
|
+
string_values = @separator.nil? ? [string_value] : string_value.split(@separator)
|
95
|
+
values = []
|
96
|
+
string_values.each do |s|
|
97
|
+
value, warning = DATATYPE_PARSER[@datatype["base"] || @datatype["@id"]].call(s, @datatype["format"])
|
98
|
+
if warning.nil?
|
99
|
+
values << value
|
100
|
+
else
|
101
|
+
build_errors(warning, :schema, row, @number, s, @datatype)
|
102
|
+
values << s
|
103
|
+
end
|
104
|
+
end
|
105
|
+
return values
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
class << self
|
110
|
+
|
111
|
+
def create_date_parser(type, warning)
|
112
|
+
return lambda { |value, format|
|
113
|
+
format = Csvlint::Csvw::DateFormat.new(nil, type) if format.nil?
|
114
|
+
v = format.parse(value)
|
115
|
+
return nil, warning if v.nil?
|
116
|
+
return v, nil
|
117
|
+
}
|
118
|
+
end
|
119
|
+
|
120
|
+
def create_regexp_based_parser(regexp, warning)
|
121
|
+
return lambda { |value, format|
|
122
|
+
return nil, warning unless value =~ regexp
|
123
|
+
return value, nil
|
124
|
+
}
|
125
|
+
end
|
126
|
+
|
127
|
+
def languages_match(l1, l2)
|
128
|
+
return true if l1 == l2 || l1 == "und" || l2 == "und"
|
129
|
+
return true if l1 =~ Regexp.new("^#{l2}-") || l2 =~ Regexp.new("^#{l1}-")
|
130
|
+
return false
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def validate_required(value, row)
|
135
|
+
build_errors(:required, :schema, row, number, value, { "required" => @required }) if @required && value.nil?
|
136
|
+
end
|
137
|
+
|
138
|
+
def validate_length(value, row)
|
139
|
+
if datatype["length"] || datatype["minLength"] || datatype["maxLength"]
|
140
|
+
length = value.length
|
141
|
+
length = value.gsub(/==?$/,"").length * 3 / 4 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#base64Binary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#base64Binary"
|
142
|
+
length = value.length / 2 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#hexBinary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#hexBinary"
|
143
|
+
|
144
|
+
build_errors(:min_length, :schema, row, number, value, { "minLength" => datatype["minLength"] }) if datatype["minLength"] && length < datatype["minLength"]
|
145
|
+
build_errors(:max_length, :schema, row, number, value, { "maxLength" => datatype["maxLength"] }) if datatype["maxLength"] && length > datatype["maxLength"]
|
146
|
+
build_errors(:length, :schema, row, number, value, { "length" => datatype["length"] }) if datatype["length"] && length != datatype["length"]
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def validate_format(value, row)
|
151
|
+
if datatype["format"]
|
152
|
+
build_errors(:format, :schema, row, number, value, { "format" => datatype["format"] }) unless DATATYPE_FORMAT_VALIDATION[datatype["base"]].call(value, datatype["format"])
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def validate_value(value, row)
|
157
|
+
build_errors(:min_inclusive, :schema, row, number, value, { "minInclusive" => datatype["minInclusive"] }) if datatype["minInclusive"] && value < datatype["minInclusive"]
|
158
|
+
build_errors(:max_inclusive, :schema, row, number, value, { "maxInclusive" => datatype["maxInclusive"] }) if datatype["maxInclusive"] && value > datatype["maxInclusive"]
|
159
|
+
build_errors(:min_exclusive, :schema, row, number, value, { "minExclusive" => datatype["minExclusive"] }) if datatype["minExclusive"] && value <= datatype["minExclusive"]
|
160
|
+
build_errors(:max_exclusive, :schema, row, number, value, { "maxExclusive" => datatype["maxExclusive"] }) if datatype["maxExclusive"] && value >= datatype["maxExclusive"]
|
161
|
+
end
|
162
|
+
|
163
|
+
REGEXP_VALIDATION = lambda { |value, format| value =~ format }
|
164
|
+
|
165
|
+
NO_ADDITIONAL_VALIDATION = lambda { |value, format| true }
|
166
|
+
|
167
|
+
DATATYPE_FORMAT_VALIDATION = {
|
168
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => REGEXP_VALIDATION,
|
169
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => REGEXP_VALIDATION,
|
170
|
+
"http://www.w3.org/ns/csvw#JSON" => REGEXP_VALIDATION,
|
171
|
+
"http://www.w3.org/2001/XMLSchema#anyAtomicType" => REGEXP_VALIDATION,
|
172
|
+
"http://www.w3.org/2001/XMLSchema#anyURI" => REGEXP_VALIDATION,
|
173
|
+
"http://www.w3.org/2001/XMLSchema#base64Binary" => REGEXP_VALIDATION,
|
174
|
+
"http://www.w3.org/2001/XMLSchema#boolean" => NO_ADDITIONAL_VALIDATION,
|
175
|
+
"http://www.w3.org/2001/XMLSchema#date" => NO_ADDITIONAL_VALIDATION,
|
176
|
+
"http://www.w3.org/2001/XMLSchema#dateTime" => NO_ADDITIONAL_VALIDATION,
|
177
|
+
"http://www.w3.org/2001/XMLSchema#dateTimeStamp" => NO_ADDITIONAL_VALIDATION,
|
178
|
+
"http://www.w3.org/2001/XMLSchema#decimal" => NO_ADDITIONAL_VALIDATION,
|
179
|
+
"http://www.w3.org/2001/XMLSchema#integer" => NO_ADDITIONAL_VALIDATION,
|
180
|
+
"http://www.w3.org/2001/XMLSchema#long" => NO_ADDITIONAL_VALIDATION,
|
181
|
+
"http://www.w3.org/2001/XMLSchema#int" => NO_ADDITIONAL_VALIDATION,
|
182
|
+
"http://www.w3.org/2001/XMLSchema#short" => NO_ADDITIONAL_VALIDATION,
|
183
|
+
"http://www.w3.org/2001/XMLSchema#byte" => NO_ADDITIONAL_VALIDATION,
|
184
|
+
"http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => NO_ADDITIONAL_VALIDATION,
|
185
|
+
"http://www.w3.org/2001/XMLSchema#positiveInteger" => NO_ADDITIONAL_VALIDATION,
|
186
|
+
"http://www.w3.org/2001/XMLSchema#unsignedLong" => NO_ADDITIONAL_VALIDATION,
|
187
|
+
"http://www.w3.org/2001/XMLSchema#unsignedInt" => NO_ADDITIONAL_VALIDATION,
|
188
|
+
"http://www.w3.org/2001/XMLSchema#unsignedShort" => NO_ADDITIONAL_VALIDATION,
|
189
|
+
"http://www.w3.org/2001/XMLSchema#unsignedByte" => NO_ADDITIONAL_VALIDATION,
|
190
|
+
"http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => NO_ADDITIONAL_VALIDATION,
|
191
|
+
"http://www.w3.org/2001/XMLSchema#negativeInteger" => NO_ADDITIONAL_VALIDATION,
|
192
|
+
"http://www.w3.org/2001/XMLSchema#double" => NO_ADDITIONAL_VALIDATION,
|
193
|
+
"http://www.w3.org/2001/XMLSchema#duration" => REGEXP_VALIDATION,
|
194
|
+
"http://www.w3.org/2001/XMLSchema#dayTimeDuration" => REGEXP_VALIDATION,
|
195
|
+
"http://www.w3.org/2001/XMLSchema#yearMonthDuration" => REGEXP_VALIDATION,
|
196
|
+
"http://www.w3.org/2001/XMLSchema#float" => NO_ADDITIONAL_VALIDATION,
|
197
|
+
"http://www.w3.org/2001/XMLSchema#gDay" => NO_ADDITIONAL_VALIDATION,
|
198
|
+
"http://www.w3.org/2001/XMLSchema#gMonth" => NO_ADDITIONAL_VALIDATION,
|
199
|
+
"http://www.w3.org/2001/XMLSchema#gMonthDay" => NO_ADDITIONAL_VALIDATION,
|
200
|
+
"http://www.w3.org/2001/XMLSchema#gYear" => NO_ADDITIONAL_VALIDATION,
|
201
|
+
"http://www.w3.org/2001/XMLSchema#gYearMonth" => NO_ADDITIONAL_VALIDATION,
|
202
|
+
"http://www.w3.org/2001/XMLSchema#hexBinary" => REGEXP_VALIDATION,
|
203
|
+
"http://www.w3.org/2001/XMLSchema#QName" => REGEXP_VALIDATION,
|
204
|
+
"http://www.w3.org/2001/XMLSchema#string" => REGEXP_VALIDATION,
|
205
|
+
"http://www.w3.org/2001/XMLSchema#normalizedString" => REGEXP_VALIDATION,
|
206
|
+
"http://www.w3.org/2001/XMLSchema#token" => REGEXP_VALIDATION,
|
207
|
+
"http://www.w3.org/2001/XMLSchema#language" => REGEXP_VALIDATION,
|
208
|
+
"http://www.w3.org/2001/XMLSchema#Name" => REGEXP_VALIDATION,
|
209
|
+
"http://www.w3.org/2001/XMLSchema#NMTOKEN" => REGEXP_VALIDATION,
|
210
|
+
"http://www.w3.org/2001/XMLSchema#time" => NO_ADDITIONAL_VALIDATION
|
211
|
+
}
|
212
|
+
|
213
|
+
ALL_VALUES_VALID = lambda { |value, format| return value, nil }
|
214
|
+
|
215
|
+
NUMERIC_PARSER = lambda { |value, format|
|
216
|
+
format = Csvlint::Csvw::NumberFormat.new() if format.nil?
|
217
|
+
v = format.parse(value)
|
218
|
+
return nil, :invalid_number if v.nil?
|
219
|
+
return v, nil
|
220
|
+
}
|
221
|
+
|
222
|
+
DATATYPE_PARSER = {
|
223
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => ALL_VALUES_VALID,
|
224
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => ALL_VALUES_VALID,
|
225
|
+
"http://www.w3.org/ns/csvw#JSON" => ALL_VALUES_VALID,
|
226
|
+
"http://www.w3.org/2001/XMLSchema#anyAtomicType" => ALL_VALUES_VALID,
|
227
|
+
"http://www.w3.org/2001/XMLSchema#anyURI" => ALL_VALUES_VALID,
|
228
|
+
"http://www.w3.org/2001/XMLSchema#base64Binary" => ALL_VALUES_VALID,
|
229
|
+
"http://www.w3.org/2001/XMLSchema#boolean" => lambda { |value, format|
|
230
|
+
if format.nil?
|
231
|
+
return true, nil if ["true", "1"].include? value
|
232
|
+
return false, nil if ["false", "0"].include? value
|
233
|
+
else
|
234
|
+
return true, nil if value == format[0]
|
235
|
+
return false, nil if value == format[1]
|
236
|
+
end
|
237
|
+
return value, :invalid_boolean
|
238
|
+
},
|
239
|
+
"http://www.w3.org/2001/XMLSchema#date" =>
|
240
|
+
create_date_parser("http://www.w3.org/2001/XMLSchema#date", :invalid_date),
|
241
|
+
"http://www.w3.org/2001/XMLSchema#dateTime" =>
|
242
|
+
create_date_parser("http://www.w3.org/2001/XMLSchema#dateTime", :invalid_date_time),
|
243
|
+
"http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
|
244
|
+
create_date_parser("http://www.w3.org/2001/XMLSchema#dateTimeStamp", :invalid_date_time_stamp),
|
245
|
+
"http://www.w3.org/2001/XMLSchema#decimal" => lambda { |value, format|
|
246
|
+
return nil, :invalid_decimal if value =~ /(E|^(NaN|INF|-INF)$)/
|
247
|
+
return NUMERIC_PARSER.call(value, format)
|
248
|
+
},
|
249
|
+
"http://www.w3.org/2001/XMLSchema#integer" => lambda { |value, format|
|
250
|
+
v, w = NUMERIC_PARSER.call(value, format)
|
251
|
+
return v, :invalid_integer unless w.nil?
|
252
|
+
return nil, :invalid_integer unless v.kind_of? Integer
|
253
|
+
return v, w
|
254
|
+
},
|
255
|
+
"http://www.w3.org/2001/XMLSchema#long" => lambda { |value, format|
|
256
|
+
v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
|
257
|
+
return v, :invalid_long unless w.nil?
|
258
|
+
return nil, :invalid_long unless v <= 9223372036854775807 && v >= -9223372036854775808
|
259
|
+
return v, w
|
260
|
+
},
|
261
|
+
"http://www.w3.org/2001/XMLSchema#int" => lambda { |value, format|
|
262
|
+
v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
|
263
|
+
return v, :invalid_int unless w.nil?
|
264
|
+
return nil, :invalid_int unless v <= 2147483647 && v >= -2147483648
|
265
|
+
return v, w
|
266
|
+
},
|
267
|
+
"http://www.w3.org/2001/XMLSchema#short" => lambda { |value, format|
|
268
|
+
v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
|
269
|
+
return v, :invalid_short unless w.nil?
|
270
|
+
return nil, :invalid_short unless v <= 32767 && v >= -32768
|
271
|
+
return v, w
|
272
|
+
},
|
273
|
+
"http://www.w3.org/2001/XMLSchema#byte" => lambda { |value, format|
|
274
|
+
v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
|
275
|
+
return v, :invalid_byte unless w.nil?
|
276
|
+
return nil, :invalid_byte unless v <= 127 && v >= -128
|
277
|
+
return v, w
|
278
|
+
},
|
279
|
+
"http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => lambda { |value, format|
|
280
|
+
v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
|
281
|
+
return v, :invalid_nonNegativeInteger unless w.nil?
|
282
|
+
return nil, :invalid_nonNegativeInteger unless v >= 0
|
283
|
+
return v, w
|
284
|
+
},
|
285
|
+
"http://www.w3.org/2001/XMLSchema#positiveInteger" => lambda { |value, format|
|
286
|
+
v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
|
287
|
+
return v, :invalid_positiveInteger unless w.nil?
|
288
|
+
return nil, :invalid_positiveInteger unless v > 0
|
289
|
+
return v, w
|
290
|
+
},
|
291
|
+
"http://www.w3.org/2001/XMLSchema#unsignedLong" => lambda { |value, format|
|
292
|
+
v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
|
293
|
+
return v, :invalid_unsignedLong unless w.nil?
|
294
|
+
return nil, :invalid_unsignedLong unless v <= 18446744073709551615
|
295
|
+
return v, w
|
296
|
+
},
|
297
|
+
"http://www.w3.org/2001/XMLSchema#unsignedInt" => lambda { |value, format|
|
298
|
+
v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
|
299
|
+
return v, :invalid_unsignedInt unless w.nil?
|
300
|
+
return nil, :invalid_unsignedInt unless v <= 4294967295
|
301
|
+
return v, w
|
302
|
+
},
|
303
|
+
"http://www.w3.org/2001/XMLSchema#unsignedShort" => lambda { |value, format|
|
304
|
+
v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
|
305
|
+
return v, :invalid_unsignedShort unless w.nil?
|
306
|
+
return nil, :invalid_unsignedShort unless v <= 65535
|
307
|
+
return v, w
|
308
|
+
},
|
309
|
+
"http://www.w3.org/2001/XMLSchema#unsignedByte" => lambda { |value, format|
|
310
|
+
v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format)
|
311
|
+
return v, :invalid_unsignedByte unless w.nil?
|
312
|
+
return nil, :invalid_unsignedByte unless v <= 255
|
313
|
+
return v, w
|
314
|
+
},
|
315
|
+
"http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => lambda { |value, format|
|
316
|
+
v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
|
317
|
+
return v, :invalid_nonPositiveInteger unless w.nil?
|
318
|
+
return nil, :invalid_nonPositiveInteger unless v <= 0
|
319
|
+
return v, w
|
320
|
+
},
|
321
|
+
"http://www.w3.org/2001/XMLSchema#negativeInteger" => lambda { |value, format|
|
322
|
+
v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format)
|
323
|
+
return v, :invalid_negativeInteger unless w.nil?
|
324
|
+
return nil, :invalid_negativeInteger unless v < 0
|
325
|
+
return v, w
|
326
|
+
},
|
327
|
+
"http://www.w3.org/2001/XMLSchema#double" => NUMERIC_PARSER,
|
328
|
+
# regular expressions here taken from XML Schema datatypes spec
|
329
|
+
"http://www.w3.org/2001/XMLSchema#duration" =>
|
330
|
+
create_regexp_based_parser(/-?P((([0-9]+Y([0-9]+M)?([0-9]+D)?|([0-9]+M)([0-9]+D)?|([0-9]+D))(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_duration),
|
331
|
+
"http://www.w3.org/2001/XMLSchema#dayTimeDuration" =>
|
332
|
+
create_regexp_based_parser(/-?P(([0-9]+D(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_dayTimeDuration),
|
333
|
+
"http://www.w3.org/2001/XMLSchema#yearMonthDuration" =>
|
334
|
+
create_regexp_based_parser(/-?P([0-9]+Y([0-9]+M)?|([0-9]+M))/, :invalid_duration),
|
335
|
+
"http://www.w3.org/2001/XMLSchema#float" => NUMERIC_PARSER,
|
336
|
+
"http://www.w3.org/2001/XMLSchema#gDay" =>
|
337
|
+
create_date_parser("http://www.w3.org/2001/XMLSchema#gDay", :invalid_gDay),
|
338
|
+
"http://www.w3.org/2001/XMLSchema#gMonth" =>
|
339
|
+
create_date_parser("http://www.w3.org/2001/XMLSchema#gMonth", :invalid_gMonth),
|
340
|
+
"http://www.w3.org/2001/XMLSchema#gMonthDay" =>
|
341
|
+
create_date_parser("http://www.w3.org/2001/XMLSchema#gMonthDay", :invalid_gMonthDay),
|
342
|
+
"http://www.w3.org/2001/XMLSchema#gYear" =>
|
343
|
+
create_date_parser("http://www.w3.org/2001/XMLSchema#gYear", :invalid_gYear),
|
344
|
+
"http://www.w3.org/2001/XMLSchema#gYearMonth" =>
|
345
|
+
create_date_parser("http://www.w3.org/2001/XMLSchema#gYearMonth", :invalid_gYearMonth),
|
346
|
+
"http://www.w3.org/2001/XMLSchema#hexBinary" => ALL_VALUES_VALID,
|
347
|
+
"http://www.w3.org/2001/XMLSchema#QName" => ALL_VALUES_VALID,
|
348
|
+
"http://www.w3.org/2001/XMLSchema#string" => ALL_VALUES_VALID,
|
349
|
+
"http://www.w3.org/2001/XMLSchema#normalizedString" => ALL_VALUES_VALID,
|
350
|
+
"http://www.w3.org/2001/XMLSchema#token" => ALL_VALUES_VALID,
|
351
|
+
"http://www.w3.org/2001/XMLSchema#language" => ALL_VALUES_VALID,
|
352
|
+
"http://www.w3.org/2001/XMLSchema#Name" => ALL_VALUES_VALID,
|
353
|
+
"http://www.w3.org/2001/XMLSchema#NMTOKEN" => ALL_VALUES_VALID,
|
354
|
+
"http://www.w3.org/2001/XMLSchema#time" =>
|
355
|
+
create_date_parser("http://www.w3.org/2001/XMLSchema#time", :invalid_time)
|
356
|
+
}
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|