dsv7-parser 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.rubocop.yml +22 -0
- data/.ruby-version +1 -0
- data/AGENTS.md +255 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +61 -0
- data/LICENSE +21 -0
- data/README.md +370 -0
- data/Rakefile +24 -0
- data/TODO.md +20 -0
- data/dsv7-parser.gemspec +33 -0
- data/lib/dsv7/lex.rb +27 -0
- data/lib/dsv7/parser/engine.rb +116 -0
- data/lib/dsv7/parser/io_util.rb +42 -0
- data/lib/dsv7/parser/version.rb +7 -0
- data/lib/dsv7/parser.rb +66 -0
- data/lib/dsv7/stream.rb +61 -0
- data/lib/dsv7/validator/cardinality.rb +168 -0
- data/lib/dsv7/validator/core.rb +57 -0
- data/lib/dsv7/validator/line_analyzer.rb +141 -0
- data/lib/dsv7/validator/line_analyzer_common.rb +140 -0
- data/lib/dsv7/validator/result.rb +34 -0
- data/lib/dsv7/validator/schemas/base.rb +50 -0
- data/lib/dsv7/validator/schemas/erg_schema.rb +72 -0
- data/lib/dsv7/validator/schemas/vml_schema.rb +50 -0
- data/lib/dsv7/validator/schemas/vrl_schema.rb +78 -0
- data/lib/dsv7/validator/schemas/wk_schema.rb +65 -0
- data/lib/dsv7/validator/types/common.rb +42 -0
- data/lib/dsv7/validator/types/datetime.rb +67 -0
- data/lib/dsv7/validator/types/enums1.rb +74 -0
- data/lib/dsv7/validator/types/enums2.rb +117 -0
- data/lib/dsv7/validator/types.rb +17 -0
- data/lib/dsv7/validator.rb +51 -0
- data/specification/dsv7/dsv7_specification.md +1305 -0
- metadata +78 -0
data/README.md
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
# dsv7-parser
|
|
2
|
+
|
|
3
|
+
Ruby gem for a SAX parser targeting the DSV7 swim file format.
|
|
4
|
+
|
|
5
|
+
Requirements
|
|
6
|
+
|
|
7
|
+
- Ruby >= 2.7.0
|
|
8
|
+
|
|
9
|
+
## Specification
|
|
10
|
+
|
|
11
|
+
- DSV-Standard zur Datenübermittlung "Format 7" (external, DE): https://www.dsv.de/de/service/formulare/schwimmen/
|
|
12
|
+
|
|
13
|
+
## Validator
|
|
14
|
+
|
|
15
|
+
Basic envelope checks plus element validation for all four list types (WKDL, VML, ERG, VRL) are available via one entrypoint:
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
require 'dsv7/parser'
|
|
19
|
+
|
|
20
|
+
# Pass a path, IO, or a String with file content
|
|
21
|
+
result = Dsv7::Validator.validate('path/to/file.DSV7')
|
|
22
|
+
|
|
23
|
+
puts "valid? #{result.valid?}"
|
|
24
|
+
puts "list_type: #{result.list_type}"
|
|
25
|
+
puts "version: #{result.version}"
|
|
26
|
+
puts "errors: #{result.errors.inspect}"
|
|
27
|
+
puts "warnings: #{result.warnings.inspect}"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Accepted inputs:
|
|
31
|
+
|
|
32
|
+
- File path String: streamed from disk
|
|
33
|
+
- IO object (e.g., `File.open` or `StringIO`): streamed
|
|
34
|
+
- Content String: streamed via `StringIO`
|
|
35
|
+
|
|
36
|
+
Structural checks (all list types):
|
|
37
|
+
|
|
38
|
+
- First effective line is `FORMAT:<Listentyp>;7;` (whitespace tolerated)
|
|
39
|
+
- List type is one of: `Wettkampfdefinitionsliste`, `Vereinsmeldeliste`,
|
|
40
|
+
`Wettkampfergebnisliste`, `Vereinsergebnisliste`
|
|
41
|
+
- UTF‑8 encoding, BOM detection (BOM is an error)
|
|
42
|
+
- Inline comments `(* ... *)` stripped; unbalanced `(*`/`*)` on a line is an error
|
|
43
|
+
- Non‑empty data lines after FORMAT must contain at least one `;`
|
|
44
|
+
- Terminator `DATEIENDE` present; no effective content after it
|
|
45
|
+
|
|
46
|
+
Filename guidance (when validating by path):
|
|
47
|
+
|
|
48
|
+
- Warns if the filename does not match `JJJJ-MM-TT-Ort-Zusatz.DSV7`
|
|
49
|
+
|
|
50
|
+
Minimal example (generic list type):
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
content = <<~DSV
|
|
54
|
+
FORMAT:Vereinsmeldeliste;7;
|
|
55
|
+
DATA;ok
|
|
56
|
+
DATEIENDE
|
|
57
|
+
DSV
|
|
58
|
+
|
|
59
|
+
result = Dsv7::Validator.validate(content)
|
|
60
|
+
puts result.valid? # => true
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Wettkampfdefinitionsliste validation (cardinality + attribute types):
|
|
64
|
+
|
|
65
|
+
```
|
|
66
|
+
wkdl = <<~DSV
|
|
67
|
+
FORMAT:Wettkampfdefinitionsliste;7;
|
|
68
|
+
ERZEUGER:Soft;1.0;mail@example.com;
|
|
69
|
+
VERANSTALTUNG:Name;Ort;25;HANDZEIT;
|
|
70
|
+
VERANSTALTUNGSORT:Schwimmstadion;Strasse;12345;Ort;GER;tel;fax;mail@ex.amp.le;
|
|
71
|
+
AUSSCHREIBUNGIMNETZ:;
|
|
72
|
+
VERANSTALTER:Club;
|
|
73
|
+
AUSRICHTER:Verein;Kontakt;;;Ort;GER;;;kontakt@example.com;
|
|
74
|
+
MELDEADRESSE:Kontakt;;;;;;;kontakt@example.com;
|
|
75
|
+
MELDESCHLUSS:01.01.2024;12:00;
|
|
76
|
+
ABSCHNITT:1;01.01.2024;;;10:00;;
|
|
77
|
+
WETTKAMPF:1;V;1;;100;F;GL;M;SW;;;
|
|
78
|
+
MELDEGELD:EINZELMELDEGELD;2,00;;
|
|
79
|
+
DATEIENDE
|
|
80
|
+
DSV
|
|
81
|
+
|
|
82
|
+
wk_result = Dsv7::Validator.validate(wkdl)
|
|
83
|
+
puts wk_result.valid? # => true
|
|
84
|
+
puts wk_result.errors # => []
|
|
85
|
+
puts wk_result.warnings # => []
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Vereinsmeldeliste validation (cardinality + attribute types):
|
|
89
|
+
|
|
90
|
+
```
|
|
91
|
+
vml = <<~DSV
|
|
92
|
+
FORMAT:Vereinsmeldeliste;7;
|
|
93
|
+
ERZEUGER:Soft;1.0;mail@example.com;
|
|
94
|
+
VERANSTALTUNG:Name;Ort;25;HANDZEIT;
|
|
95
|
+
ABSCHNITT:1;01.01.2024;10:00;N;
|
|
96
|
+
WETTKAMPF:1;V;1;;100;F;GL;M;;;
|
|
97
|
+
VEREIN:Mein Verein;1234;17;GER;
|
|
98
|
+
ANSPRECHPARTNER:Beispiel, Alice;;;;;;;alice@example.com;
|
|
99
|
+
DATEIENDE
|
|
100
|
+
DSV
|
|
101
|
+
|
|
102
|
+
vml_result = Dsv7::Validator.validate(vml)
|
|
103
|
+
puts vml_result.valid? # => true
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Validated VML elements: ERZEUGER, VERANSTALTUNG, ABSCHNITT, WETTKAMPF, VEREIN,
|
|
107
|
+
ANSPRECHPARTNER, KARIMELDUNG, KARIABSCHNITT, TRAINER, PNMELDUNG, HANDICAP,
|
|
108
|
+
STARTPN, STMELDUNG, STARTST, STAFFELPERSON.
|
|
109
|
+
|
|
110
|
+
Validated WKDL elements: ERZEUGER, VERANSTALTUNG, VERANSTALTUNGSORT, AUSSCHREIBUNGIMNETZ,
|
|
111
|
+
VERANSTALTER, AUSRICHTER, MELDEADRESSE, MELDESCHLUSS, BANKVERBINDUNG, BESONDERES,
|
|
112
|
+
NACHWEIS, ABSCHNITT, WETTKAMPF, WERTUNG, MELDEGELD.
|
|
113
|
+
|
|
114
|
+
Validated ERG elements: ERZEUGER, VERANSTALTUNG, VERANSTALTER, AUSRICHTER, ABSCHNITT,
|
|
115
|
+
KAMPFGERICHT, WETTKAMPF, WERTUNG, VEREIN, PNERGEBNIS, PNZWISCHENZEIT, PNREAKTION,
|
|
116
|
+
STAFFELERGEBNIS/STERGEBNIS, STAFFELPERSON, STZWISCHENZEIT, STABLOESE.
|
|
117
|
+
|
|
118
|
+
Validated VRL elements: ERZEUGER, VERANSTALTUNG, VERANSTALTER, AUSRICHTER, ABSCHNITT,
|
|
119
|
+
KAMPFGERICHT, WETTKAMPF, WERTUNG, VEREIN, PERSON, PERSONENERGEBNIS, PNZWISCHENZEIT,
|
|
120
|
+
PNREAKTION, STAFFEL, STAFFELPERSON, STAFFELERGEBNIS/STERGEBNIS, STZWISCHENZEIT, STABLOESE.
|
|
121
|
+
|
|
122
|
+
Common error and warning examples:
|
|
123
|
+
|
|
124
|
+
```
|
|
125
|
+
# 1) Unknown list type and missing DATEIENDE
|
|
126
|
+
bad = "FORMAT:Unbekannt;7;\n"
|
|
127
|
+
r = Dsv7::Validator.validate(bad)
|
|
128
|
+
r.errors.any? { |e| e.include?("Unknown list type in FORMAT: 'Unbekannt'") }
|
|
129
|
+
r.errors.include?("Missing 'DATEIENDE' terminator line")
|
|
130
|
+
|
|
131
|
+
# 2) Unsupported version
|
|
132
|
+
r = Dsv7::Validator.validate("FORMAT:Vereinsergebnisliste;6;\nDATEIENDE\n")
|
|
133
|
+
r.errors.any? { |e| e.include?("Unsupported format version '6', expected '7'") }
|
|
134
|
+
|
|
135
|
+
# 3) Unbalanced comment delimiters
|
|
136
|
+
r = Dsv7::Validator.validate("FORMAT:Vereinsmeldeliste;7; (* open\nDATEIENDE\n")
|
|
137
|
+
r.errors.any? { |e| e.include?('Unmatched comment delimiters') }
|
|
138
|
+
|
|
139
|
+
# 4) CRLF detection (warning only)
|
|
140
|
+
crlf = "FORMAT:Vereinsmeldeliste;7;\r\nDATEIENDE\r\n"
|
|
141
|
+
r = Dsv7::Validator.validate(crlf)
|
|
142
|
+
r.valid? # => true
|
|
143
|
+
r.warnings # => ['CRLF line endings detected']
|
|
144
|
+
|
|
145
|
+
# 5) Missing delimiter ';' in a data line
|
|
146
|
+
r = Dsv7::Validator.validate("FORMAT:Vereinsmeldeliste;7;\nDATA no semicolon\nDATEIENDE\n")
|
|
147
|
+
r.errors.any? { |e| e.include?("Missing attribute delimiter ';'") }
|
|
148
|
+
|
|
149
|
+
# 6) Filename pattern warning
|
|
150
|
+
File.write('tmp/badname.txt', "FORMAT:Vereinsmeldeliste;7;\nDATEIENDE\n")
|
|
151
|
+
begin
|
|
152
|
+
r = Dsv7::Validator.validate('tmp/badname.txt')
|
|
153
|
+
r.warnings.first.include?("does not follow 'JJJJ-MM-TT-Ort-Zusatz.DSV7'")
|
|
154
|
+
ensure
|
|
155
|
+
File.delete('tmp/badname.txt')
|
|
156
|
+
end
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Parser (Streaming: WKDL, VML, ERG, VRL)
|
|
160
|
+
|
|
161
|
+
The parser provides a streaming API via one generic entrypoint and four type-specific helpers.
|
|
162
|
+
|
|
163
|
+
- Generic (auto-detects list type): `Dsv7::Parser.parse(...)`
|
|
164
|
+
|
|
165
|
+
Type-specific helpers (enforce list type):
|
|
166
|
+
|
|
167
|
+
- Wettkampfdefinitionsliste (WKDL): `Dsv7::Parser.parse_wettkampfdefinitionsliste(...)`
|
|
168
|
+
- Vereinsmeldeliste (VML): `Dsv7::Parser.parse_vereinsmeldeliste(...)`
|
|
169
|
+
- Wettkampfergebnisliste (ERG): `Dsv7::Parser.parse_wettkampfergebnisliste(...)`
|
|
170
|
+
- Vereinsergebnisliste (VRL): `Dsv7::Parser.parse_vereinsergebnisliste(...)`
|
|
171
|
+
|
|
172
|
+
It is tolerant and focuses on extracting elements efficiently; use the validator for strict checks.
|
|
173
|
+
|
|
174
|
+
Generic example (auto-detect list type):
|
|
175
|
+
|
|
176
|
+
```
|
|
177
|
+
enum = Dsv7::Parser.parse('path/to/file.DSV7')
|
|
178
|
+
enum.each do |type, payload, line_number|
|
|
179
|
+
case type
|
|
180
|
+
when :format
|
|
181
|
+
# payload = { list_type: 'Vereinsmeldeliste', version: '7' }
|
|
182
|
+
when :element
|
|
183
|
+
# payload = { name: 'ERZEUGER', attrs: [...] }
|
|
184
|
+
when :end
|
|
185
|
+
# reached DATEIENDE
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Key points:
|
|
191
|
+
|
|
192
|
+
- Input: pass a file path, an IO, or a String with file content.
|
|
193
|
+
- Yields events: `:format`, `:element`, `:end` along with payload and line number.
|
|
194
|
+
- Strips inline comments `(* ... *)` and scrubs invalid UTF‑8 in lines.
|
|
195
|
+
- Accepts UTF‑8 with or without BOM (validator will still report BOM as an error).
|
|
196
|
+
|
|
197
|
+
Basic example (block style):
|
|
198
|
+
|
|
199
|
+
```
|
|
200
|
+
require 'dsv7/parser'
|
|
201
|
+
|
|
202
|
+
content = <<~DSV
|
|
203
|
+
(* header comment *)
|
|
204
|
+
FORMAT:Wettkampfdefinitionsliste;7;
|
|
205
|
+
ERZEUGER:Soft;1.0;mail@example.com;
|
|
206
|
+
VERANSTALTUNG:Name;Ort;25;HANDZEIT; (* inline *)
|
|
207
|
+
MELDESCHLUSS:01.01.2024;12:00;
|
|
208
|
+
DATEIENDE
|
|
209
|
+
DSV
|
|
210
|
+
|
|
211
|
+
Dsv7::Parser.parse(content) do |type, payload, line_number|
|
|
212
|
+
case type
|
|
213
|
+
when :format
|
|
214
|
+
# { list_type: 'Wettkampfdefinitionsliste', version: '7' }
|
|
215
|
+
p [:format, payload, line_number]
|
|
216
|
+
when :element
|
|
217
|
+
# { name: 'ERZEUGER', attrs: ['Soft','1.0','mail@example.com'] }
|
|
218
|
+
p [:element, payload, line_number]
|
|
219
|
+
when :end
|
|
220
|
+
p [:end, line_number]
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Enumerator style:
|
|
226
|
+
|
|
227
|
+
```
|
|
228
|
+
enum = Dsv7::Parser.parse('path/to/2002-03-10-Duisburg-Wk.DSV7')
|
|
229
|
+
enum.each do |type, payload, line_number|
|
|
230
|
+
# same triplets as the block example
|
|
231
|
+
end
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
Building a simple structure (header + elements) from the stream:
|
|
235
|
+
|
|
236
|
+
```
|
|
237
|
+
data = { format: nil, elements: [] }
|
|
238
|
+
|
|
239
|
+
Dsv7::Parser.parse(content) do |type, payload, line_number|
|
|
240
|
+
case type
|
|
241
|
+
when :format
|
|
242
|
+
data[:format] = payload # { list_type: 'Wettkampfdefinitionsliste', version: '7' }
|
|
243
|
+
when :element
|
|
244
|
+
data[:elements] << { name: payload[:name], attrs: payload[:attrs], line_number: line_number }
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Example: pick only WETTKAMPF rows
|
|
249
|
+
wettkaempfe = data[:elements]
|
|
250
|
+
.select { |e| e[:name] == 'WETTKAMPF' }
|
|
251
|
+
.map { |e| e[:attrs] } # arrays of attributes per row
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
Combining validation with parsing:
|
|
255
|
+
|
|
256
|
+
```
|
|
257
|
+
result = Dsv7::Validator.validate('path/to/file.DSV7')
|
|
258
|
+
if result.valid?
|
|
259
|
+
Dsv7::Parser.parse('path/to/file.DSV7') do |type, payload, line_number|
|
|
260
|
+
# consume events
|
|
261
|
+
end
|
|
262
|
+
else
|
|
263
|
+
warn "Invalid DSV7: #{result.errors.join('; ')}"
|
|
264
|
+
end
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
VML usage mirrors WKDL:
|
|
268
|
+
|
|
269
|
+
```
|
|
270
|
+
content = <<~DSV
|
|
271
|
+
FORMAT:Vereinsmeldeliste;7;
|
|
272
|
+
ERZEUGER:Soft;1.0;mail@example.com;
|
|
273
|
+
VERANSTALTUNG:Name;Ort;25;HANDZEIT;
|
|
274
|
+
ABSCHNITT:1;01.01.2024;10:00;N;
|
|
275
|
+
WETTKAMPF:1;V;1;;100;F;GL;M;;;
|
|
276
|
+
VEREIN:Mein Verein;1234;17;GER;
|
|
277
|
+
ANSPRECHPARTNER:Beispiel, Alice;;;;;;;alice@example.com;
|
|
278
|
+
DATEIENDE
|
|
279
|
+
DSV
|
|
280
|
+
|
|
281
|
+
Dsv7::Parser.parse(content) do |type, payload, line_number|
|
|
282
|
+
# same :format, :element, :end semantics
|
|
283
|
+
end
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
ERG usage mirrors WKDL as well:
|
|
287
|
+
|
|
288
|
+
```
|
|
289
|
+
content = <<~DSV
|
|
290
|
+
FORMAT:Wettkampfergebnisliste;7;
|
|
291
|
+
ERZEUGER:Soft;1.0;mail@example.com;
|
|
292
|
+
VERANSTALTUNG:Name;Ort;25;HANDZEIT;
|
|
293
|
+
ABSCHNITT:1;01.01.2024;10:00;N;
|
|
294
|
+
WETTKAMPF:1;A;1;;100;F;GL;M;SW;;;
|
|
295
|
+
VEREIN:SV Hansa Adorf;1234;17;GER;
|
|
296
|
+
DATEIENDE
|
|
297
|
+
DSV
|
|
298
|
+
|
|
299
|
+
Dsv7::Parser.parse(content) do |type, payload, line_number|
|
|
300
|
+
# same :format, :element, :end semantics
|
|
301
|
+
end
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
VRL usage mirrors WKDL as well:
|
|
305
|
+
|
|
306
|
+
```
|
|
307
|
+
content = <<~DSV
|
|
308
|
+
FORMAT:Vereinsergebnisliste;7;
|
|
309
|
+
ERZEUGER:Soft;1.0;mail@example.com;
|
|
310
|
+
VERANSTALTUNG:Name;Ort;25;HANDZEIT;
|
|
311
|
+
ABSCHNITT:1;01.01.2024;10:00;N;
|
|
312
|
+
WETTKAMPF:1;A;1;;100;F;GL;M;SW;;;
|
|
313
|
+
VEREIN:SV Hansa Adorf;1234;17;GER;
|
|
314
|
+
DATEIENDE
|
|
315
|
+
DSV
|
|
316
|
+
|
|
317
|
+
Dsv7::Parser.parse(content) do |type, payload, line_number|
|
|
318
|
+
# same :format, :element, :end semantics
|
|
319
|
+
end
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
Errors and edge cases:
|
|
323
|
+
|
|
324
|
+
- Raises `Dsv7::Parser::Error` if the first effective line is not a `FORMAT` line.
|
|
325
|
+
- Raises `Dsv7::Parser::Error` if the list type does not match the parser method
|
|
326
|
+
(WKDL expects `Wettkampfdefinitionsliste`, VML expects `Vereinsmeldeliste`,
|
|
327
|
+
ERG expects `Wettkampfergebnisliste`, VRL expects `Vereinsergebnisliste`).
|
|
328
|
+
- Stops at `DATEIENDE`. Whitespace/comments after `DATEIENDE` are ignored by the parser (validator permits only comments/whitespace after it).
|
|
329
|
+
|
|
330
|
+
## Development
|
|
331
|
+
|
|
332
|
+
- Tests use Minitest and live under `test/dsv7/`.
|
|
333
|
+
- Version is defined in `lib/dsv7/parser/version.rb`.
|
|
334
|
+
|
|
335
|
+
## Compact ERG Example
|
|
336
|
+
|
|
337
|
+
Minimal Wettkampfergebnisliste validation and parsing in one go:
|
|
338
|
+
|
|
339
|
+
```
|
|
340
|
+
require 'dsv7/parser'
|
|
341
|
+
|
|
342
|
+
content = <<~DSV
|
|
343
|
+
FORMAT:Wettkampfergebnisliste;7;
|
|
344
|
+
ERZEUGER:Soft;1.0;mail@example.com;
|
|
345
|
+
VERANSTALTUNG:Name;Ort;25;HANDZEIT;
|
|
346
|
+
VERANSTALTER:Club;
|
|
347
|
+
AUSRICHTER:Verein;Kontakt;;;;;;kontakt@example.com;
|
|
348
|
+
ABSCHNITT:1;01.01.2024;10:00;N;
|
|
349
|
+
WETTKAMPF:1;A;1;;100;F;GL;M;SW;;;
|
|
350
|
+
WERTUNG:1;V;1;JG;0;;;OFFEN;
|
|
351
|
+
VEREIN:SV Hansa Adorf;1234;17;GER;
|
|
352
|
+
DATEIENDE
|
|
353
|
+
DSV
|
|
354
|
+
|
|
355
|
+
result = Dsv7::Validator.validate(content)
|
|
356
|
+
if result.valid?
|
|
357
|
+
Dsv7::Parser.parse(content) do |type, payload, line_number|
|
|
358
|
+
case type
|
|
359
|
+
when :format
|
|
360
|
+
# { list_type: 'Wettkampfergebnisliste', version: '7' }
|
|
361
|
+
when :element
|
|
362
|
+
# payload: { name: 'ERZEUGER', attrs: ['Soft','1.0','mail@example.com'] }
|
|
363
|
+
when :end
|
|
364
|
+
# reached DATEIENDE
|
|
365
|
+
end
|
|
366
|
+
end
|
|
367
|
+
else
|
|
368
|
+
warn "Invalid ERG: #{result.errors.join('; ')}"
|
|
369
|
+
end
|
|
370
|
+
```
|
data/Rakefile
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'bundler/gem_tasks'
|
|
4
|
+
require 'rake/testtask'
|
|
5
|
+
require 'rubocop/rake_task'
|
|
6
|
+
|
|
7
|
+
Rake::TestTask.new(:test) do |t|
|
|
8
|
+
t.libs << 'lib'
|
|
9
|
+
t.test_files = FileList['test/**/*_test.rb']
|
|
10
|
+
# Do not echo the underlying ruby command with the test file list
|
|
11
|
+
t.verbose = false
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Run tests and lint by default
|
|
15
|
+
task default: %i[test lint]
|
|
16
|
+
|
|
17
|
+
desc 'Run RuboCop'
|
|
18
|
+
RuboCop::RakeTask.new(:rubocop)
|
|
19
|
+
|
|
20
|
+
desc 'Run all linters'
|
|
21
|
+
task lint: :rubocop
|
|
22
|
+
|
|
23
|
+
desc 'CI: run tests and lint'
|
|
24
|
+
task ci: %i[test lint]
|
data/TODO.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# TODO
|
|
2
|
+
|
|
3
|
+
* Cardinalities are all in one file (lib/dsv7/validator/cardinality.rb). Split by list type (e.g., wk_cardinality.rb, vml_cardinality.rb, …) to keep files small and grep‑friendly.
|
|
4
|
+
|
|
5
|
+
- Extracting allowed enum sets to named constants to reuse across modules
|
|
6
|
+
|
|
7
|
+
- Add a tiny unit test for Lex.element edge cases (e.g., multiple trailing ;, empty attributes in the middle) to lock in splitting semantics.
|
|
8
|
+
|
|
9
|
+
- Parser resource handling test: add a test that parses a file path and verifies the underlying File is closed after enumeration.
|
|
10
|
+
|
|
11
|
+
- In README.md, add a short “Supported validations” matrix per list type (what’s checked today vs. planned).
|
|
12
|
+
|
|
13
|
+
- Add fuzz/robustness slices around:
|
|
14
|
+
Extremely long lines and many inline comments on a single line.
|
|
15
|
+
|
|
16
|
+
- Garbage bytes mixed into attributes (parser should still emit sanitized attributes; validator should flag encoding).
|
|
17
|
+
|
|
18
|
+
- Add a tiny CLI (bin/dsv7-validate) that reads from a path/stdin and prints errors/warnings. It improves adoption and makes quick checks easy.
|
|
19
|
+
|
|
20
|
+
- Remove Gemfile.lock from VCS for a library gem.
|
data/dsv7-parser.gemspec
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/dsv7/parser/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = 'dsv7-parser'
|
|
7
|
+
spec.version = Dsv7::Parser::VERSION
|
|
8
|
+
spec.authors = ['bigcurl']
|
|
9
|
+
spec.email = ['maintheme@gmail.com']
|
|
10
|
+
|
|
11
|
+
spec.summary = 'SAX parser for the DSV7 swim file format'
|
|
12
|
+
spec.description = 'Ruby gem for a DSV7 SAX parser.'
|
|
13
|
+
spec.homepage = 'https://github.com/bigcurl/dsv7-parser'
|
|
14
|
+
spec.license = 'MIT'
|
|
15
|
+
|
|
16
|
+
spec.required_ruby_version = Gem::Requirement.new('>= 2.7.0')
|
|
17
|
+
|
|
18
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
|
19
|
+
spec.metadata['source_code_uri'] = spec.homepage
|
|
20
|
+
spec.metadata['changelog_uri'] = spec.homepage
|
|
21
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
|
22
|
+
|
|
23
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
|
24
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|features)/}) }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
spec.require_paths = ['lib']
|
|
28
|
+
|
|
29
|
+
# Runtime dependencies (none yet)
|
|
30
|
+
# spec.add_dependency "nokogiri", ">= 1.14"
|
|
31
|
+
|
|
32
|
+
# Development dependencies are declared in Gemfile
|
|
33
|
+
end
|
data/lib/dsv7/lex.rb
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Dsv7
|
|
4
|
+
module Lex
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
# Parses a FORMAT line. Returns [list_type, version] or nil if not a FORMAT line.
|
|
8
|
+
def parse_format(line)
|
|
9
|
+
m = line.match(/^FORMAT:([^;]+);([^;]+);$/)
|
|
10
|
+
return nil unless m
|
|
11
|
+
|
|
12
|
+
[m[1], m[2]]
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Splits an element content line into name and attributes.
|
|
16
|
+
# Returns [name, attrs] or nil if the line is not an element line.
|
|
17
|
+
def element(content)
|
|
18
|
+
return nil unless content.include?(':')
|
|
19
|
+
|
|
20
|
+
name, rest = content.split(':', 2)
|
|
21
|
+
name = name.strip if name
|
|
22
|
+
attrs = rest.split(';', -1)
|
|
23
|
+
attrs.pop if attrs.last == ''
|
|
24
|
+
[name, attrs]
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'stringio'
|
|
4
|
+
require_relative '../stream'
|
|
5
|
+
require_relative '../lex'
|
|
6
|
+
require_relative 'io_util'
|
|
7
|
+
|
|
8
|
+
module Dsv7
|
|
9
|
+
module Parser
|
|
10
|
+
# Internal engine that implements the streaming mechanics.
|
|
11
|
+
class Engine
|
|
12
|
+
def self.stream_any(input, emitter)
|
|
13
|
+
new(input, emitter).stream_any
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def self.stream_list(input, emitter, expected_list_type)
|
|
17
|
+
new(input, emitter).stream_list(expected_list_type)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def initialize(input, emitter)
|
|
21
|
+
@input = input
|
|
22
|
+
@emitter = emitter
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def stream_any
|
|
26
|
+
state = { ln: 0, saw: false }
|
|
27
|
+
IoUtil.with_io(@input) do |io|
|
|
28
|
+
IoUtil.each_content_line(io) do |content, ln|
|
|
29
|
+
state[:ln] = ln
|
|
30
|
+
break if handle_first_or_emit_any?(content, ln, @emitter, state)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
@emitter << [:end, nil, state[:ln]]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def stream_list(expected_list_type)
|
|
37
|
+
state = { ln: 0, saw: false }
|
|
38
|
+
IoUtil.with_io(@input) do |io|
|
|
39
|
+
IoUtil.each_content_line(io) do |content, ln|
|
|
40
|
+
state[:ln] = ln
|
|
41
|
+
break if handle_first_or_emit?(content, ln, @emitter, state, expected_list_type)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
@emitter << [:end, nil, state[:ln]]
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
def handle_first_or_emit?(content, line_number, emitter, state, expected_list_type)
|
|
50
|
+
unless state[:saw]
|
|
51
|
+
lt, ver = parse_format_expect(content, line_number, expected_list_type)
|
|
52
|
+
emitter << [:format, { list_type: lt, version: ver }, line_number]
|
|
53
|
+
state[:saw] = true
|
|
54
|
+
return false
|
|
55
|
+
end
|
|
56
|
+
return true if content == 'DATEIENDE'
|
|
57
|
+
|
|
58
|
+
emit_element(content, line_number, emitter)
|
|
59
|
+
false
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def handle_first_or_emit_any?(content, line_number, emitter, state)
|
|
63
|
+
unless state[:saw]
|
|
64
|
+
lt, ver = parse_format_any(content, line_number)
|
|
65
|
+
emitter << [:format, { list_type: lt, version: ver }, line_number]
|
|
66
|
+
state[:saw] = true
|
|
67
|
+
return false
|
|
68
|
+
end
|
|
69
|
+
return true if content == 'DATEIENDE'
|
|
70
|
+
|
|
71
|
+
emit_element(content, line_number, emitter)
|
|
72
|
+
false
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def parse_format_expect(content, line_number, expected_list_type)
|
|
76
|
+
m = Dsv7::Lex.parse_format(content)
|
|
77
|
+
format_required!(line_number) unless m
|
|
78
|
+
|
|
79
|
+
list_type, version = m
|
|
80
|
+
unless list_type == expected_list_type
|
|
81
|
+
short = parser_short_name(expected_list_type)
|
|
82
|
+
raise Dsv7::Parser::Error, "Unsupported list type '#{list_type}' for #{short} parser"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
[list_type, version]
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def parse_format_any(content, line_number)
|
|
89
|
+
pair = Dsv7::Lex.parse_format(content)
|
|
90
|
+
format_required!(line_number) unless pair
|
|
91
|
+
pair
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def format_required!(line_number)
|
|
95
|
+
raise Dsv7::Parser::Error, "First non-empty line must be FORMAT (line #{line_number})"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def emit_element(content, line_number, emitter)
|
|
99
|
+
pair = Dsv7::Lex.element(content)
|
|
100
|
+
return unless pair
|
|
101
|
+
|
|
102
|
+
name, attrs = pair
|
|
103
|
+
emitter << [:element, { name: name, attrs: attrs }, line_number]
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def parser_short_name(expected_list_type)
|
|
107
|
+
case expected_list_type
|
|
108
|
+
when 'Wettkampfdefinitionsliste' then 'WKDL'
|
|
109
|
+
when 'Vereinsmeldeliste' then 'VML'
|
|
110
|
+
when 'Wettkampfergebnisliste' then 'ERG'
|
|
111
|
+
else expected_list_type
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'stringio'
|
|
4
|
+
require_relative '../stream'
|
|
5
|
+
|
|
6
|
+
module Dsv7
|
|
7
|
+
module Parser
|
|
8
|
+
module IoUtil
|
|
9
|
+
module_function
|
|
10
|
+
|
|
11
|
+
def to_io(input)
|
|
12
|
+
return input if input.respond_to?(:read)
|
|
13
|
+
return File.open(input, 'rb') if input.is_a?(String) && File.file?(input)
|
|
14
|
+
return StringIO.new(String(input).b) if input.is_a?(String)
|
|
15
|
+
|
|
16
|
+
raise ArgumentError, 'Unsupported input; pass IO, file path String, or content String'
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def with_io(input)
|
|
20
|
+
close_after = input.is_a?(String) && File.file?(input)
|
|
21
|
+
io = to_io(input)
|
|
22
|
+
Dsv7::Stream.binmode_if_possible(io)
|
|
23
|
+
Dsv7::Stream.read_bom?(io) # parser tolerates BOM
|
|
24
|
+
yield io
|
|
25
|
+
ensure
|
|
26
|
+
io&.close if close_after
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def each_content_line(io)
|
|
30
|
+
ln = 0
|
|
31
|
+
io.each_line("\n") do |raw|
|
|
32
|
+
ln += 1
|
|
33
|
+
line = Dsv7::Stream.sanitize_line(raw)
|
|
34
|
+
content = Dsv7::Stream.strip_inline_comment(line).strip
|
|
35
|
+
next if content.empty?
|
|
36
|
+
|
|
37
|
+
yield content, ln
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|