csvlint 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +4 -0
- data/.github/workflows/push.yml +14 -2
- data/.ruby-version +1 -1
- data/.standard_todo.yml +43 -0
- data/Dockerfile +16 -0
- data/Gemfile +2 -2
- data/README.md +9 -9
- data/Rakefile +7 -7
- data/csvlint.gemspec +14 -16
- data/docker_notes_for_windows.txt +20 -0
- data/features/step_definitions/cli_steps.rb +11 -11
- data/features/step_definitions/information_steps.rb +4 -4
- data/features/step_definitions/parse_csv_steps.rb +11 -11
- data/features/step_definitions/schema_validation_steps.rb +10 -10
- data/features/step_definitions/sources_steps.rb +1 -1
- data/features/step_definitions/validation_errors_steps.rb +19 -19
- data/features/step_definitions/validation_info_steps.rb +9 -9
- data/features/step_definitions/validation_warnings_steps.rb +11 -11
- data/features/support/aruba.rb +6 -6
- data/features/support/earl_formatter.rb +39 -39
- data/features/support/env.rb +10 -11
- data/features/support/load_tests.rb +107 -103
- data/features/support/webmock.rb +2 -2
- data/lib/csvlint/cli.rb +133 -130
- data/lib/csvlint/csvw/column.rb +279 -280
- data/lib/csvlint/csvw/date_format.rb +90 -92
- data/lib/csvlint/csvw/metadata_error.rb +1 -3
- data/lib/csvlint/csvw/number_format.rb +40 -32
- data/lib/csvlint/csvw/property_checker.rb +714 -717
- data/lib/csvlint/csvw/table.rb +49 -52
- data/lib/csvlint/csvw/table_group.rb +24 -23
- data/lib/csvlint/error_collector.rb +2 -0
- data/lib/csvlint/error_message.rb +0 -1
- data/lib/csvlint/field.rb +153 -141
- data/lib/csvlint/schema.rb +34 -42
- data/lib/csvlint/validate.rb +161 -143
- data/lib/csvlint/version.rb +1 -1
- data/lib/csvlint.rb +22 -23
- data/spec/csvw/column_spec.rb +15 -16
- data/spec/csvw/date_format_spec.rb +5 -7
- data/spec/csvw/number_format_spec.rb +2 -4
- data/spec/csvw/table_group_spec.rb +103 -105
- data/spec/csvw/table_spec.rb +71 -73
- data/spec/field_spec.rb +116 -121
- data/spec/schema_spec.rb +129 -139
- data/spec/spec_helper.rb +6 -6
- data/spec/validator_spec.rb +167 -190
- metadata +22 -55
@@ -1,10 +1,9 @@
|
|
1
1
|
module Csvlint
|
2
2
|
module Csvw
|
3
3
|
class DateFormat
|
4
|
-
|
5
4
|
attr_reader :pattern
|
6
5
|
|
7
|
-
def initialize(pattern, datatype=nil)
|
6
|
+
def initialize(pattern, datatype = nil)
|
8
7
|
@pattern = pattern
|
9
8
|
|
10
9
|
if @pattern.nil?
|
@@ -13,16 +12,16 @@ module Csvlint
|
|
13
12
|
else
|
14
13
|
test_pattern = pattern.clone
|
15
14
|
test_pattern.gsub!(/S+/, "")
|
16
|
-
FIELDS.keys.sort_by{|f| -f.length}.each do |field|
|
15
|
+
FIELDS.keys.sort_by { |f| -f.length }.each do |field|
|
17
16
|
test_pattern.gsub!(field, "")
|
18
17
|
end
|
19
|
-
raise Csvw::DateFormatError, "unrecognised date field symbols in date format" if
|
18
|
+
raise Csvw::DateFormatError, "unrecognised date field symbols in date format" if /[GyYuUrQqMLlwWdDFgEecahHKkjJmsSAzZOvVXx]/.match?(test_pattern)
|
20
19
|
|
21
20
|
@regexp = DATE_PATTERN_REGEXP[@pattern]
|
22
21
|
@type = @regexp.nil? ? "http://www.w3.org/2001/XMLSchema#time" : "http://www.w3.org/2001/XMLSchema#date"
|
23
|
-
@regexp
|
22
|
+
@regexp ||= TIME_PATTERN_REGEXP[@pattern]
|
24
23
|
@type = @regexp.nil? ? "http://www.w3.org/2001/XMLSchema#dateTime" : @type
|
25
|
-
@regexp
|
24
|
+
@regexp ||= DATE_TIME_PATTERN_REGEXP[@pattern]
|
26
25
|
|
27
26
|
if @regexp.nil?
|
28
27
|
regexp = @pattern
|
@@ -33,19 +32,19 @@ module Csvlint
|
|
33
32
|
|
34
33
|
regexp = regexp.sub("HH", FIELDS["HH"].to_s)
|
35
34
|
regexp = regexp.sub("mm", FIELDS["mm"].to_s)
|
36
|
-
if
|
35
|
+
if /ss\.S+/.match?(@pattern)
|
37
36
|
max_fractional_seconds = @pattern.split(".")[-1].length
|
38
|
-
regexp = regexp.sub(/ss\.S+$/, "(?<second>#{FIELDS["ss"]}(
|
37
|
+
regexp = regexp.sub(/ss\.S+$/, "(?<second>#{FIELDS["ss"]}(.[0-9]{1,#{max_fractional_seconds}})?)")
|
39
38
|
else
|
40
39
|
regexp = regexp.sub("ss", "(?<second>#{FIELDS["ss"]})")
|
41
40
|
end
|
42
41
|
|
43
|
-
if
|
42
|
+
if /yyyy/.match?(regexp)
|
44
43
|
regexp = regexp.sub("yyyy", FIELDS["yyyy"].to_s)
|
45
44
|
regexp = regexp.sub("MM", FIELDS["MM"].to_s)
|
46
45
|
regexp = regexp.sub("M", FIELDS["M"].to_s)
|
47
46
|
regexp = regexp.sub("dd", FIELDS["dd"].to_s)
|
48
|
-
regexp = regexp.sub(/d(?=[-T
|
47
|
+
regexp = regexp.sub(/d(?=[-T \/.])/, FIELDS["d"].to_s)
|
49
48
|
end
|
50
49
|
|
51
50
|
regexp = regexp.sub("XXX", FIELDS["XXX"].to_s)
|
@@ -61,7 +60,7 @@ module Csvlint
|
|
61
60
|
end
|
62
61
|
|
63
62
|
def match(value)
|
64
|
-
value
|
63
|
+
value&.match?(@regexp) ? true : false
|
65
64
|
end
|
66
65
|
|
67
66
|
def parse(value)
|
@@ -76,9 +75,9 @@ module Csvlint
|
|
76
75
|
case field
|
77
76
|
when "timezone"
|
78
77
|
tz = match["timezone"]
|
79
|
-
tz = "+00:00" if tz ==
|
80
|
-
tz +=
|
81
|
-
tz = "#{tz[0..2]}:#{tz[3..4]}" unless tz
|
78
|
+
tz = "+00:00" if tz == "Z"
|
79
|
+
tz += ":00" if tz.length == 3
|
80
|
+
tz = "#{tz[0..2]}:#{tz[3..4]}" unless /:/.match?(tz)
|
82
81
|
value[:timezone] = tz
|
83
82
|
when "second"
|
84
83
|
value[:second] = match["second"].to_f
|
@@ -96,120 +95,119 @@ module Csvlint
|
|
96
95
|
end
|
97
96
|
when "http://www.w3.org/2001/XMLSchema#dateTime"
|
98
97
|
begin
|
99
|
-
value[:dateTime] = DateTime.new(match["year"].to_i, match["month"].to_i, match["day"].to_i, match["hour"].to_i, match["minute"].to_i, (match.names.include?("second") ? match["second"].to_f : 0), match.names.include?("timezone") && match["timezone"] ? match["timezone"] :
|
98
|
+
value[:dateTime] = DateTime.new(match["year"].to_i, match["month"].to_i, match["day"].to_i, match["hour"].to_i, match["minute"].to_i, (match.names.include?("second") ? match["second"].to_f : 0), (match.names.include?("timezone") && match["timezone"]) ? match["timezone"] : "")
|
100
99
|
rescue ArgumentError
|
101
100
|
return nil
|
102
101
|
end
|
103
102
|
else
|
104
103
|
value[:dateTime] = DateTime.new(value[:year] || 0, value[:month] || 1, value[:day] || 1, value[:hour] || 0, value[:minute] || 0, value[:second] || 0, value[:timezone] || "+00:00")
|
105
104
|
end
|
106
|
-
if value[:year]
|
105
|
+
value[:string] = if value[:year]
|
107
106
|
if value[:month]
|
108
107
|
if value[:day]
|
109
108
|
if value[:hour]
|
110
109
|
# dateTime
|
111
|
-
|
110
|
+
"#{format("%04d", value[:year])}-#{format("%02d", value[:month])}-#{format("%02d", value[:day])}T#{format("%02d", value[:hour])}:#{format("%02d", value[:minute] || 0)}:#{format("%02g", value[:second] || 0)}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}"
|
112
111
|
else
|
113
112
|
# date
|
114
|
-
|
113
|
+
"#{format("%04d", value[:year])}-#{format("%02d", value[:month])}-#{format("%02d", value[:day])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}"
|
115
114
|
end
|
116
115
|
else
|
117
116
|
# gYearMonth
|
118
|
-
|
117
|
+
"#{format("%04d", value[:year])}-#{format("%02d", value[:month])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}"
|
119
118
|
end
|
120
119
|
else
|
121
120
|
# gYear
|
122
|
-
|
121
|
+
"#{format("%04d", value[:year])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}"
|
123
122
|
end
|
124
123
|
elsif value[:month]
|
125
124
|
if value[:day]
|
126
125
|
# gMonthDay
|
127
|
-
|
126
|
+
"--#{format("%02d", value[:month])}-#{format("%02d", value[:day])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}"
|
128
127
|
else
|
129
128
|
# gMonth
|
130
|
-
|
129
|
+
"--#{format("%02d", value[:month])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}"
|
131
130
|
end
|
132
131
|
elsif value[:day]
|
133
132
|
# gDay
|
134
|
-
|
133
|
+
"---#{format("%02d", value[:day])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}"
|
135
134
|
else
|
136
|
-
|
135
|
+
"#{format("%02d", value[:hour])}:#{format("%02d", value[:minute])}:#{format("%02g", value[:second] || 0)}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}"
|
137
136
|
end
|
138
|
-
|
137
|
+
value
|
139
138
|
end
|
140
139
|
|
141
140
|
private
|
142
|
-
FIELDS = {
|
143
|
-
"yyyy" => /(?<year>-?([1-9][0-9]{3,}|0[0-9]{3}))/,
|
144
|
-
"MM" => /(?<month>0[1-9]|1[0-2])/,
|
145
|
-
"M" => /(?<month>[1-9]|1[0-2])/,
|
146
|
-
"dd" => /(?<day>0[1-9]|[12][0-9]|3[01])/,
|
147
|
-
"d" => /(?<day>[1-9]|[12][0-9]|3[01])/,
|
148
|
-
"HH" => /(?<hour>[01][0-9]|2[0-3])/,
|
149
|
-
"mm" => /(?<minute>[0-5][0-9])/,
|
150
|
-
"ss" => /([0-6][0-9])/,
|
151
|
-
"X" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/,
|
152
|
-
"XX" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/,
|
153
|
-
"XXX" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/,
|
154
|
-
"x" => /(?<timezone>[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/,
|
155
|
-
"xx" => /(?<timezone>[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/,
|
156
|
-
"xxx" => /(?<timezone>[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/,
|
157
|
-
}
|
158
|
-
|
159
|
-
DATE_PATTERN_REGEXP = {
|
160
|
-
"yyyy-MM-dd" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}$"),
|
161
|
-
"yyyyMMdd" => Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["MM"]}#{FIELDS["dd"]}$"),
|
162
|
-
"dd-MM-yyyy" => Regexp.new("^#{FIELDS["dd"]}-#{FIELDS["MM"]}-#{FIELDS["yyyy"]}$"),
|
163
|
-
"d-M-yyyy" => Regexp.new("^#{FIELDS["d"]}-#{FIELDS["M"]}-#{FIELDS["yyyy"]}$"),
|
164
|
-
"MM-dd-yyyy" => Regexp.new("^#{FIELDS["MM"]}-#{FIELDS["dd"]}-#{FIELDS["yyyy"]}$"),
|
165
|
-
"M-d-yyyy" => Regexp.new("^#{FIELDS["M"]}-#{FIELDS["d"]}-#{FIELDS["yyyy"]}$"),
|
166
|
-
"dd/MM/yyyy" => Regexp.new("^#{FIELDS["dd"]}/#{FIELDS["MM"]}/#{FIELDS["yyyy"]}$"),
|
167
|
-
"d/M/yyyy" => Regexp.new("^#{FIELDS["d"]}/#{FIELDS["M"]}/#{FIELDS["yyyy"]}$"),
|
168
|
-
"MM/dd/yyyy" => Regexp.new("^#{FIELDS["MM"]}/#{FIELDS["dd"]}/#{FIELDS["yyyy"]}$"),
|
169
|
-
"M/d/yyyy" => Regexp.new("^#{FIELDS["M"]}/#{FIELDS["d"]}/#{FIELDS["yyyy"]}$"),
|
170
|
-
"dd.MM.yyyy" => Regexp.new("^#{FIELDS["dd"]}.#{FIELDS["MM"]}.#{FIELDS["yyyy"]}$"),
|
171
|
-
"d.M.yyyy" => Regexp.new("^#{FIELDS["d"]}.#{FIELDS["M"]}.#{FIELDS["yyyy"]}$"),
|
172
|
-
"MM.dd.yyyy" => Regexp.new("^#{FIELDS["MM"]}.#{FIELDS["dd"]}.#{FIELDS["yyyy"]}$"),
|
173
|
-
"M.d.yyyy" => Regexp.new("^#{FIELDS["M"]}.#{FIELDS["d"]}.#{FIELDS["yyyy"]}$")
|
174
|
-
}
|
175
|
-
|
176
|
-
TIME_PATTERN_REGEXP = {
|
177
|
-
"HH:mm:ss" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]})$"),
|
178
|
-
"HHmmss" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}(?<second>#{FIELDS["ss"]})$"),
|
179
|
-
"HH:mm" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}$"),
|
180
|
-
"HHmm" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}$")
|
181
|
-
}
|
182
|
-
|
183
|
-
DATE_TIME_PATTERN_REGEXP = {
|
184
|
-
"yyyy-MM-ddTHH:mm:ss" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]})$"),
|
185
|
-
"yyyy-MM-ddTHH:mm" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}$")
|
186
|
-
}
|
187
|
-
|
188
|
-
DEFAULT_REGEXP = {
|
189
|
-
"http://www.w3.org/2001/XMLSchema#date" =>
|
190
|
-
Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
|
191
|
-
"http://www.w3.org/2001/XMLSchema#dateTime" =>
|
192
|
-
Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}?$"),
|
193
|
-
"http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
|
194
|
-
Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}$"),
|
195
|
-
"http://www.w3.org/2001/XMLSchema#gDay" =>
|
196
|
-
Regexp.new("^---#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
|
197
|
-
"http://www.w3.org/2001/XMLSchema#gMonth" =>
|
198
|
-
Regexp.new("^--#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"),
|
199
|
-
"http://www.w3.org/2001/XMLSchema#gMonthDay" =>
|
200
|
-
Regexp.new("^--#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
|
201
|
-
"http://www.w3.org/2001/XMLSchema#gYear" =>
|
202
|
-
Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["XXX"]}?$"),
|
203
|
-
"http://www.w3.org/2001/XMLSchema#gYearMonth" =>
|
204
|
-
Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"),
|
205
|
-
"http://www.w3.org/2001/XMLSchema#time" =>
|
206
|
-
Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(\.[0-9]+)?)#{FIELDS["XXX"]}?$")
|
207
|
-
}
|
208
141
|
|
142
|
+
FIELDS = {
|
143
|
+
"yyyy" => /(?<year>-?([1-9][0-9]{3,}|0[0-9]{3}))/,
|
144
|
+
"MM" => /(?<month>0[1-9]|1[0-2])/,
|
145
|
+
"M" => /(?<month>[1-9]|1[0-2])/,
|
146
|
+
"dd" => /(?<day>0[1-9]|[12][0-9]|3[01])/,
|
147
|
+
"d" => /(?<day>[1-9]|[12][0-9]|3[01])/,
|
148
|
+
"HH" => /(?<hour>[01][0-9]|2[0-3])/,
|
149
|
+
"mm" => /(?<minute>[0-5][0-9])/,
|
150
|
+
"ss" => /([0-6][0-9])/,
|
151
|
+
"X" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/,
|
152
|
+
"XX" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/,
|
153
|
+
"XXX" => /(?<timezone>Z|[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/,
|
154
|
+
"x" => /(?<timezone>[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/,
|
155
|
+
"xx" => /(?<timezone>[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/,
|
156
|
+
"xxx" => /(?<timezone>[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/
|
157
|
+
}
|
158
|
+
|
159
|
+
DATE_PATTERN_REGEXP = {
|
160
|
+
"yyyy-MM-dd" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}$"),
|
161
|
+
"yyyyMMdd" => Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["MM"]}#{FIELDS["dd"]}$"),
|
162
|
+
"dd-MM-yyyy" => Regexp.new("^#{FIELDS["dd"]}-#{FIELDS["MM"]}-#{FIELDS["yyyy"]}$"),
|
163
|
+
"d-M-yyyy" => Regexp.new("^#{FIELDS["d"]}-#{FIELDS["M"]}-#{FIELDS["yyyy"]}$"),
|
164
|
+
"MM-dd-yyyy" => Regexp.new("^#{FIELDS["MM"]}-#{FIELDS["dd"]}-#{FIELDS["yyyy"]}$"),
|
165
|
+
"M-d-yyyy" => Regexp.new("^#{FIELDS["M"]}-#{FIELDS["d"]}-#{FIELDS["yyyy"]}$"),
|
166
|
+
"dd/MM/yyyy" => Regexp.new("^#{FIELDS["dd"]}/#{FIELDS["MM"]}/#{FIELDS["yyyy"]}$"),
|
167
|
+
"d/M/yyyy" => Regexp.new("^#{FIELDS["d"]}/#{FIELDS["M"]}/#{FIELDS["yyyy"]}$"),
|
168
|
+
"MM/dd/yyyy" => Regexp.new("^#{FIELDS["MM"]}/#{FIELDS["dd"]}/#{FIELDS["yyyy"]}$"),
|
169
|
+
"M/d/yyyy" => Regexp.new("^#{FIELDS["M"]}/#{FIELDS["d"]}/#{FIELDS["yyyy"]}$"),
|
170
|
+
"dd.MM.yyyy" => Regexp.new("^#{FIELDS["dd"]}.#{FIELDS["MM"]}.#{FIELDS["yyyy"]}$"),
|
171
|
+
"d.M.yyyy" => Regexp.new("^#{FIELDS["d"]}.#{FIELDS["M"]}.#{FIELDS["yyyy"]}$"),
|
172
|
+
"MM.dd.yyyy" => Regexp.new("^#{FIELDS["MM"]}.#{FIELDS["dd"]}.#{FIELDS["yyyy"]}$"),
|
173
|
+
"M.d.yyyy" => Regexp.new("^#{FIELDS["M"]}.#{FIELDS["d"]}.#{FIELDS["yyyy"]}$")
|
174
|
+
}
|
175
|
+
|
176
|
+
TIME_PATTERN_REGEXP = {
|
177
|
+
"HH:mm:ss" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]})$"),
|
178
|
+
"HHmmss" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}(?<second>#{FIELDS["ss"]})$"),
|
179
|
+
"HH:mm" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}$"),
|
180
|
+
"HHmm" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}$")
|
181
|
+
}
|
182
|
+
|
183
|
+
DATE_TIME_PATTERN_REGEXP = {
|
184
|
+
"yyyy-MM-ddTHH:mm:ss" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]})$"),
|
185
|
+
"yyyy-MM-ddTHH:mm" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}$")
|
186
|
+
}
|
187
|
+
|
188
|
+
DEFAULT_REGEXP = {
|
189
|
+
"http://www.w3.org/2001/XMLSchema#date" =>
|
190
|
+
Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
|
191
|
+
"http://www.w3.org/2001/XMLSchema#dateTime" =>
|
192
|
+
Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(.[0-9]+)?)#{FIELDS["XXX"]}?$"),
|
193
|
+
"http://www.w3.org/2001/XMLSchema#dateTimeStamp" =>
|
194
|
+
Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(.[0-9]+)?)#{FIELDS["XXX"]}$"),
|
195
|
+
"http://www.w3.org/2001/XMLSchema#gDay" =>
|
196
|
+
Regexp.new("^---#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
|
197
|
+
"http://www.w3.org/2001/XMLSchema#gMonth" =>
|
198
|
+
Regexp.new("^--#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"),
|
199
|
+
"http://www.w3.org/2001/XMLSchema#gMonthDay" =>
|
200
|
+
Regexp.new("^--#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"),
|
201
|
+
"http://www.w3.org/2001/XMLSchema#gYear" =>
|
202
|
+
Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["XXX"]}?$"),
|
203
|
+
"http://www.w3.org/2001/XMLSchema#gYearMonth" =>
|
204
|
+
Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"),
|
205
|
+
"http://www.w3.org/2001/XMLSchema#time" =>
|
206
|
+
Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?<second>#{FIELDS["ss"]}(.[0-9]+)?)#{FIELDS["XXX"]}?$")
|
207
|
+
}
|
209
208
|
end
|
210
209
|
|
211
210
|
class DateFormatError < StandardError
|
212
|
-
|
213
211
|
end
|
214
212
|
end
|
215
213
|
end
|
@@ -1,26 +1,25 @@
|
|
1
1
|
module Csvlint
|
2
2
|
module Csvw
|
3
3
|
class NumberFormat
|
4
|
-
|
5
4
|
attr_reader :integer, :pattern, :prefix, :numeric_part, :suffix, :grouping_separator, :decimal_separator, :primary_grouping_size, :secondary_grouping_size, :fractional_grouping_size
|
6
5
|
|
7
|
-
def initialize(pattern=nil, grouping_separator=nil, decimal_separator=".", integer=nil)
|
6
|
+
def initialize(pattern = nil, grouping_separator = nil, decimal_separator = ".", integer = nil)
|
8
7
|
@pattern = pattern
|
9
8
|
@integer = integer
|
10
9
|
if @integer.nil?
|
11
|
-
if @pattern.nil?
|
12
|
-
|
10
|
+
@integer = if @pattern.nil?
|
11
|
+
nil
|
13
12
|
else
|
14
|
-
|
13
|
+
!@pattern.include?(decimal_separator)
|
15
14
|
end
|
16
15
|
end
|
17
16
|
@grouping_separator = grouping_separator || (@pattern.nil? ? nil : ",")
|
18
17
|
@decimal_separator = decimal_separator || "."
|
19
18
|
if pattern.nil?
|
20
|
-
if integer
|
21
|
-
|
19
|
+
@regexp = if integer
|
20
|
+
INTEGER_REGEXP
|
22
21
|
else
|
23
|
-
|
22
|
+
Regexp.new("^(([-+]?[0-9]+(\\.[0-9]+)?([Ee][-+]?[0-9]+)?[%‰]?)|NaN|INF|-INF)$")
|
24
23
|
end
|
25
24
|
else
|
26
25
|
numeric_part_regexp = Regexp.new("(?<numeric_part>[-+]?([0#Ee]|#{Regexp.escape(@grouping_separator)}|#{Regexp.escape(@decimal_separator)})+)")
|
@@ -47,18 +46,30 @@ module Csvlint
|
|
47
46
|
numeric_part_regexp = "[-+]?"
|
48
47
|
end
|
49
48
|
|
50
|
-
min_integer_digits = integer_part.gsub(@grouping_separator, "").
|
51
|
-
min_fraction_digits = fractional_part.gsub(@grouping_separator, "").
|
49
|
+
min_integer_digits = integer_part.gsub(@grouping_separator, "").delete("#").length
|
50
|
+
min_fraction_digits = fractional_part.gsub(@grouping_separator, "").delete("#").length
|
52
51
|
max_fraction_digits = fractional_part.gsub(@grouping_separator, "").length
|
53
|
-
min_exponent_digits = exponent_part.
|
52
|
+
min_exponent_digits = exponent_part.delete("#").length
|
54
53
|
max_exponent_digits = exponent_part.length
|
55
54
|
|
56
55
|
integer_parts = integer_part.split(@grouping_separator)[1..-1]
|
57
|
-
@primary_grouping_size =
|
58
|
-
|
56
|
+
@primary_grouping_size = begin
|
57
|
+
integer_parts[-1].length
|
58
|
+
rescue
|
59
|
+
0
|
60
|
+
end
|
61
|
+
@secondary_grouping_size = begin
|
62
|
+
integer_parts[-2].length
|
63
|
+
rescue
|
64
|
+
@primary_grouping_size
|
65
|
+
end
|
59
66
|
|
60
67
|
fractional_parts = fractional_part.split(@grouping_separator)[0..-2]
|
61
|
-
@fractional_grouping_size =
|
68
|
+
@fractional_grouping_size = begin
|
69
|
+
fractional_parts[0].length
|
70
|
+
rescue
|
71
|
+
0
|
72
|
+
end
|
62
73
|
|
63
74
|
if @primary_grouping_size == 0
|
64
75
|
integer_regexp = "[0-9]*[0-9]{#{min_integer_digits}}"
|
@@ -67,7 +78,7 @@ module Csvlint
|
|
67
78
|
secondary_groups = "([0-9]{#{@secondary_grouping_size}}#{Regexp.escape(@grouping_separator)})*"
|
68
79
|
if min_integer_digits > @primary_grouping_size
|
69
80
|
remaining_req_digits = min_integer_digits - @primary_grouping_size
|
70
|
-
req_secondary_groups = remaining_req_digits / @secondary_grouping_size > 0 ? "([0-9]{#{@secondary_grouping_size}}#{Regexp.escape(@grouping_separator)}){#{remaining_req_digits / @secondary_grouping_size}}" : ""
|
81
|
+
req_secondary_groups = (remaining_req_digits / @secondary_grouping_size > 0) ? "([0-9]{#{@secondary_grouping_size}}#{Regexp.escape(@grouping_separator)}){#{remaining_req_digits / @secondary_grouping_size}}" : ""
|
71
82
|
if remaining_req_digits % @secondary_grouping_size > 0
|
72
83
|
final_req_digits = "[0-9]{#{@secondary_grouping_size - (remaining_req_digits % @secondary_grouping_size)}}"
|
73
84
|
final_opt_digits = "[0-9]{0,#{@secondary_grouping_size - (remaining_req_digits % @secondary_grouping_size)}}"
|
@@ -76,8 +87,8 @@ module Csvlint
|
|
76
87
|
integer_regexp = "(#{leading_regexp}#{secondary_groups})?#{req_secondary_groups}[0-9]{#{@primary_grouping_size}}"
|
77
88
|
end
|
78
89
|
else
|
79
|
-
final_req_digits = @primary_grouping_size > min_integer_digits ? "[0-9]{#{@primary_grouping_size - min_integer_digits}}" : ""
|
80
|
-
final_opt_digits = @primary_grouping_size > min_integer_digits ? "[0-9]{0,#{@primary_grouping_size - min_integer_digits}}" : ""
|
90
|
+
final_req_digits = (@primary_grouping_size > min_integer_digits) ? "[0-9]{#{@primary_grouping_size - min_integer_digits}}" : ""
|
91
|
+
final_opt_digits = (@primary_grouping_size > min_integer_digits) ? "[0-9]{0,#{@primary_grouping_size - min_integer_digits}}" : ""
|
81
92
|
integer_regexp = "((#{leading_regexp}#{secondary_groups}#{final_req_digits})|#{final_opt_digits})[0-9]{#{min_integer_digits}}"
|
82
93
|
end
|
83
94
|
end
|
@@ -101,7 +112,7 @@ module Csvlint
|
|
101
112
|
fractional_regexp += "[0-9]{#{@fractional_grouping_size}}"
|
102
113
|
# additional groups of required digits - something like "(,[0-9]{3}){1}"
|
103
114
|
fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){#{min_fraction_digits / @fractional_grouping_size - 1}}" if min_fraction_digits / @fractional_grouping_size > 1
|
104
|
-
fractional_regexp +=
|
115
|
+
fractional_regexp += Regexp.escape(@grouping_separator).to_s if min_fraction_digits % @fractional_grouping_size > 0
|
105
116
|
end
|
106
117
|
# additional required digits - something like ",[0-9]{1}"
|
107
118
|
fractional_regexp += "[0-9]{#{min_fraction_digits % @fractional_grouping_size}}" if min_fraction_digits % @fractional_grouping_size > 0
|
@@ -168,7 +179,7 @@ module Csvlint
|
|
168
179
|
end
|
169
180
|
|
170
181
|
def match(value)
|
171
|
-
value
|
182
|
+
value&.match?(@regexp) ? true : false
|
172
183
|
end
|
173
184
|
|
174
185
|
def parse(value)
|
@@ -176,30 +187,28 @@ module Csvlint
|
|
176
187
|
return nil if !@grouping_separator.nil? && value =~ Regexp.new("((^#{Regexp.escape(@grouping_separator)})|#{Regexp.escape(@grouping_separator)}{2})")
|
177
188
|
value.gsub!(@grouping_separator, "") unless @grouping_separator.nil?
|
178
189
|
value.gsub!(@decimal_separator, ".") unless @decimal_separator.nil?
|
179
|
-
if value
|
190
|
+
if value&.match?(@regexp)
|
180
191
|
case value
|
181
192
|
when "NaN"
|
182
|
-
|
193
|
+
Float::NAN
|
183
194
|
when "INF"
|
184
|
-
|
195
|
+
Float::INFINITY
|
185
196
|
when "-INF"
|
186
|
-
|
197
|
+
-Float::INFINITY
|
187
198
|
else
|
188
199
|
case value[-1]
|
189
200
|
when "%"
|
190
|
-
|
201
|
+
value.to_f / 100
|
191
202
|
when "‰"
|
192
|
-
|
203
|
+
value.to_f / 1000
|
193
204
|
else
|
194
205
|
if @integer.nil?
|
195
|
-
|
206
|
+
value.include?(".") ? value.to_f : value.to_i
|
196
207
|
else
|
197
|
-
|
208
|
+
@integer ? value.to_i : value.to_f
|
198
209
|
end
|
199
210
|
end
|
200
211
|
end
|
201
|
-
else
|
202
|
-
return nil
|
203
212
|
end
|
204
213
|
else
|
205
214
|
match = @regexp.match(value)
|
@@ -210,17 +219,16 @@ module Csvlint
|
|
210
219
|
number = @integer ? number.to_i : number.to_f
|
211
220
|
number = number.to_f / 100 if match["prefix"].include?("%") || match["suffix"].include?("%")
|
212
221
|
number = number.to_f / 1000 if match["prefix"].include?("‰") || match["suffix"].include?("‰")
|
213
|
-
|
222
|
+
number
|
214
223
|
end
|
215
224
|
end
|
216
225
|
|
217
226
|
private
|
218
|
-
INTEGER_REGEXP = /^[-+]?[0-9]+[%‰]?$/
|
219
227
|
|
228
|
+
INTEGER_REGEXP = /^[-+]?[0-9]+[%‰]?$/
|
220
229
|
end
|
221
230
|
|
222
231
|
class NumberFormatError < StandardError
|
223
|
-
|
224
232
|
end
|
225
233
|
end
|
226
234
|
end
|