dsv7-parser 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'parser/version'
4
+ require_relative 'parser/engine'
5
+ require_relative 'validator'
6
+ require_relative 'stream'
7
+ require_relative 'lex'
8
+
9
+ module Dsv7
10
+ module Parser
11
+ class Error < StandardError; end
12
+
13
+ # Generic streaming parser that auto-detects the list type from the
14
+ # first effective FORMAT line and yields events for any DSV7 list.
15
+ # Yields [:format|:element|:end, payload, line_number].
16
+ # The first event is always :format with payload
17
+ # { list_type: <String>, version: <String> }.
18
+ def self.parse(input, &block)
19
+ enum = Enumerator.new { |y| Engine.stream_any(input, y) }
20
+ return enum.each(&block) if block_given?
21
+
22
+ enum
23
+ end
24
+
25
+ # Streaming parser for Wettkampfdefinitionsliste (WKDL).
26
+ # Yields [:format|:element|:end, payload, line_number].
27
+ # Performs inline comment stripping, tolerates BOM, and scrubs UTF-8.
28
+ def self.parse_wettkampfdefinitionsliste(input, &block)
29
+ enum = Enumerator.new { |y| Engine.stream_list(input, y, 'Wettkampfdefinitionsliste') }
30
+ return enum.each(&block) if block_given?
31
+
32
+ enum
33
+ end
34
+
35
+ # Streaming parser for Vereinsmeldeliste (VML).
36
+ # Same contract as parse_wettkampfdefinitionsliste, but expects
37
+ # FORMAT:Vereinsmeldeliste;7; as the first effective line.
38
+ def self.parse_vereinsmeldeliste(input, &block)
39
+ enum = Enumerator.new { |y| Engine.stream_list(input, y, 'Vereinsmeldeliste') }
40
+ return enum.each(&block) if block_given?
41
+
42
+ enum
43
+ end
44
+
45
+ # Streaming parser for Wettkampfergebnisliste (ERG).
46
+ # Same contract as the other parse_* methods, but expects
47
+ # FORMAT:Wettkampfergebnisliste;7; as the first effective line.
48
+ def self.parse_wettkampfergebnisliste(input, &block)
49
+ enum = Enumerator.new { |y| Engine.stream_list(input, y, 'Wettkampfergebnisliste') }
50
+ return enum.each(&block) if block_given?
51
+
52
+ enum
53
+ end
54
+
55
+ # Streaming parser for Vereinsergebnisliste (VRL).
56
+ # Same contract as the other parse_* methods, but expects
57
+ # FORMAT:Vereinsergebnisliste;7; as the first effective line.
58
+ def self.parse_vereinsergebnisliste(input, &block)
59
+ enum = Enumerator.new { |y| Engine.stream_list(input, y, 'Vereinsergebnisliste') }
60
+ return enum.each(&block) if block_given?
61
+
62
+ enum
63
+ end
64
+ # no additional private class methods
65
+ end
66
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dsv7
4
+ module Stream
5
+ module_function
6
+
7
+ # Puts IO into binary mode when possible (no-op for StringIO)
8
+ def binmode_if_possible(io)
9
+ io.binmode
10
+ rescue StandardError
11
+ # ignore
12
+ end
13
+
14
+ # Reads potential UTF-8 BOM from the start of IO.
15
+ # Returns true if a BOM was found (and consumed), false otherwise.
16
+ # If no BOM was found, unread the peeked bytes back into the IO.
17
+ def read_bom?(io)
18
+ head = io.read(3)
19
+ return false if head.nil? || head.empty?
20
+
21
+ if head.bytes == [0xEF, 0xBB, 0xBF]
22
+ true
23
+ else
24
+ head.bytes.reverse_each { |b| io.ungetbyte(b) }
25
+ false
26
+ end
27
+ end
28
+
29
+ # Normalizes a raw line by trimming trailing LF/CR and forcing UTF-8.
30
+ # If invalid encoding is detected, it scrubs replacement chars and
31
+ # calls the optional on_invalid callback.
32
+ def sanitize_line(raw, on_invalid: nil)
33
+ s = raw.delete_suffix("\n").delete_suffix("\r")
34
+ s.force_encoding(Encoding::UTF_8)
35
+ return s if s.valid_encoding?
36
+
37
+ on_invalid&.call
38
+ s.scrub('�')
39
+ end
40
+
41
+ # Removes inline single-line comments in the form: (* ... *)
42
+ def strip_inline_comment(line)
43
+ return line unless line.include?('(*') && line.include?('*)')
44
+
45
+ line.gsub(/\(\*.*?\*\)/, '')
46
+ end
47
+
48
+ # Iterates sanitized lines, yielding [line, line_number].
49
+ # Returns true if any CRLF lines were observed.
50
+ def each_sanitized_line(io, on_invalid: nil)
51
+ had_crlf = false
52
+ line_number = 0
53
+ io.each_line("\n") do |raw|
54
+ line_number += 1
55
+ had_crlf ||= raw.end_with?("\r\n")
56
+ yield sanitize_line(raw, on_invalid: on_invalid), line_number
57
+ end
58
+ had_crlf
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,168 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dsv7
4
+ class Validator
5
+ # Validates Wettkampfdefinitionsliste element cardinalities
6
+ class WkCardinality
7
+ def initialize(result, wk_elements)
8
+ @result = result
9
+ @wk_elements = wk_elements
10
+ end
11
+
12
+ def validate!
13
+ require_exactly_one(
14
+ %w[ERZEUGER VERANSTALTUNG VERANSTALTUNGSORT AUSSCHREIBUNGIMNETZ
15
+ VERANSTALTER AUSRICHTER MELDEADRESSE MELDESCHLUSS]
16
+ )
17
+ forbid_more_than_one(%w[BANKVERBINDUNG BESONDERES NACHWEIS])
18
+ require_at_least_one(%w[ABSCHNITT WETTKAMPF WERTUNG MELDEGELD])
19
+ end
20
+
21
+ private
22
+
23
+ def require_exactly_one(elements)
24
+ elements.each do |el|
25
+ count = @wk_elements[el]
26
+ if count.zero?
27
+ @result.add_error("Wettkampfdefinitionsliste: missing required element '#{el}'")
28
+ elsif count != 1
29
+ @result.add_error(
30
+ "Wettkampfdefinitionsliste: element '#{el}' occurs #{count} times (expected 1)"
31
+ )
32
+ end
33
+ end
34
+ end
35
+
36
+ def forbid_more_than_one(elements)
37
+ elements.each do |el|
38
+ count = @wk_elements[el]
39
+ next if count <= 1
40
+
41
+ @result.add_error(
42
+ "Wettkampfdefinitionsliste: element '#{el}' occurs #{count} times (max 1)"
43
+ )
44
+ end
45
+ end
46
+
47
+ def require_at_least_one(elements)
48
+ elements.each do |el|
49
+ count = @wk_elements[el]
50
+ next if count >= 1
51
+
52
+ @result.add_error("Wettkampfdefinitionsliste: missing required element '#{el}'")
53
+ end
54
+ end
55
+ end
56
+
57
+ # Validates Vereinsmeldeliste element cardinalities
58
+ class VmlCardinality
59
+ def initialize(result, elements)
60
+ @result = result
61
+ @elements = elements
62
+ end
63
+
64
+ def validate!
65
+ require_exactly_one(%w[ERZEUGER VERANSTALTUNG VEREIN ANSPRECHPARTNER])
66
+ require_at_least_one(%w[ABSCHNITT WETTKAMPF])
67
+ end
68
+
69
+ private
70
+
71
+ def require_exactly_one(elements)
72
+ elements.each do |el|
73
+ count = @elements[el]
74
+ if count.zero?
75
+ @result.add_error("Vereinsmeldeliste: missing required element '#{el}'")
76
+ elsif count != 1
77
+ @result.add_error(
78
+ "Vereinsmeldeliste: element '#{el}' occurs #{count} times (expected 1)"
79
+ )
80
+ end
81
+ end
82
+ end
83
+
84
+ def require_at_least_one(elements)
85
+ elements.each do |el|
86
+ count = @elements[el]
87
+ next if count >= 1
88
+
89
+ @result.add_error("Vereinsmeldeliste: missing required element '#{el}'")
90
+ end
91
+ end
92
+ end
93
+
94
+ # Validates Wettkampfergebnisliste element cardinalities
95
+ class ErgCardinality
96
+ def initialize(result, elements)
97
+ @result = result
98
+ @elements = elements
99
+ end
100
+
101
+ def validate!
102
+ require_exactly_one(%w[ERZEUGER VERANSTALTUNG VERANSTALTER AUSRICHTER])
103
+ require_at_least_one(%w[ABSCHNITT WETTKAMPF WERTUNG VEREIN])
104
+ end
105
+
106
+ private
107
+
108
+ def require_exactly_one(elements)
109
+ elements.each do |el|
110
+ count = @elements[el]
111
+ if count.zero?
112
+ @result.add_error("Wettkampfergebnisliste: missing required element '#{el}'")
113
+ elsif count != 1
114
+ @result.add_error(
115
+ "Wettkampfergebnisliste: element '#{el}' occurs #{count} times (expected 1)"
116
+ )
117
+ end
118
+ end
119
+ end
120
+
121
+ def require_at_least_one(elements)
122
+ elements.each do |el|
123
+ count = @elements[el]
124
+ next if count >= 1
125
+
126
+ @result.add_error("Wettkampfergebnisliste: missing required element '#{el}'")
127
+ end
128
+ end
129
+ end
130
+
131
+ # Validates Vereinsergebnisliste element cardinalities
132
+ class VrlCardinality
133
+ def initialize(result, elements)
134
+ @result = result
135
+ @elements = elements
136
+ end
137
+
138
+ def validate!
139
+ require_exactly_one(%w[ERZEUGER VERANSTALTUNG VERANSTALTER AUSRICHTER])
140
+ require_at_least_one(%w[ABSCHNITT WETTKAMPF WERTUNG VEREIN])
141
+ end
142
+
143
+ private
144
+
145
+ def require_exactly_one(elements)
146
+ elements.each do |el|
147
+ count = @elements[el]
148
+ if count.zero?
149
+ @result.add_error("Vereinsergebnisliste: missing required element '#{el}'")
150
+ elsif count != 1
151
+ @result.add_error(
152
+ "Vereinsergebnisliste: element '#{el}' occurs #{count} times (expected 1)"
153
+ )
154
+ end
155
+ end
156
+ end
157
+
158
+ def require_at_least_one(elements)
159
+ elements.each do |el|
160
+ count = @elements[el]
161
+ next if count >= 1
162
+
163
+ @result.add_error("Vereinsergebnisliste: missing required element '#{el}'")
164
+ end
165
+ end
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../stream'
4
+ require_relative '../lex'
5
+ require_relative 'line_analyzer'
6
+
7
+ module Dsv7
8
+ class Validator
9
+ # Core pipeline for validator: encoding + line parsing
10
+ class Core
11
+ def initialize(result, filename)
12
+ @result = result
13
+ @filename = filename
14
+ end
15
+
16
+ def call_io(io)
17
+ Dsv7::Stream.binmode_if_possible(io)
18
+ check_bom_and_rewind(io)
19
+ process_lines(io)
20
+ end
21
+
22
+ private
23
+
24
+ def check_bom_and_rewind(io)
25
+ bom = Dsv7::Stream.read_bom?(io)
26
+ @result.add_error('UTF-8 BOM detected (spec requires UTF-8 without BOM)') if bom
27
+ end
28
+
29
+ def process_lines(io)
30
+ analyzer = LineAnalyzer.new(@result)
31
+ had_crlf = iterate(io) { |line, line_number| analyzer.process_line(line, line_number) }
32
+ @result.add_warning('CRLF line endings detected') if had_crlf
33
+ analyzer.finish
34
+ check_filename(@filename)
35
+ @result
36
+ end
37
+
38
+ def iterate(io, &block)
39
+ Dsv7::Stream.each_sanitized_line(
40
+ io,
41
+ on_invalid: -> { @result.add_error('File is not valid UTF-8 encoding') }
42
+ ) do |line, line_number|
43
+ block.call(line, line_number)
44
+ end
45
+ end
46
+
47
+ def check_filename(filename)
48
+ return if filename.nil?
49
+ return if filename.match(/^\d{4}-\d{2}-\d{2}-[^.]+\.DSV7$/)
50
+
51
+ @result.add_warning(
52
+ "Filename '#{filename}' does not follow 'JJJJ-MM-TT-Ort-Zusatz.DSV7'"
53
+ )
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../stream'
4
+ require_relative 'line_analyzer_common'
5
+ require_relative 'schemas/wk_schema'
6
+ require_relative 'schemas/vml_schema'
7
+ require_relative 'schemas/erg_schema'
8
+ require_relative 'schemas/vrl_schema'
9
+
10
+ module Dsv7
11
+ class Validator
12
+ class LineAnalyzer
13
+ include LineAnalyzerWk
14
+ include LineAnalyzerVml
15
+ include LineAnalyzerErg
16
+ include LineAnalyzerVrl
17
+
18
+ def initialize(result)
19
+ @result = result
20
+ @effective_index = 0
21
+ @format_line_index = nil
22
+ @dateiende_index = nil
23
+ @after_dateiende_effective_line_number = nil
24
+ init_schemas_and_counters
25
+ end
26
+
27
+ def init_schemas_and_counters
28
+ @wk_elements = Hash.new(0)
29
+ @wk_schema = WkSchema.new(@result)
30
+ @vml_elements = Hash.new(0)
31
+ @vml_schema = VmlSchema.new(@result)
32
+ @erg_elements = Hash.new(0)
33
+ @erg_schema = ErgSchema.new(@result)
34
+ @vrl_elements = Hash.new(0)
35
+ @vrl_schema = VrlSchema.new(@result)
36
+ end
37
+
38
+ def process_line(line, line_number)
39
+ check_comment_balance(line, line_number)
40
+ trimmed = strip_inline_comment(line)
41
+ return if trimmed.empty?
42
+
43
+ @effective_index += 1
44
+ return handle_first_effective(trimmed, line_number) if @effective_index == 1
45
+ return @dateiende_index = line_number if trimmed == 'DATEIENDE'
46
+
47
+ handle_content_line(trimmed, line_number)
48
+ end
49
+
50
+ def finish
51
+ post_validate_positions
52
+ validate_wk_list_elements if @result.list_type == 'Wettkampfdefinitionsliste'
53
+ validate_vml_list_elements if @result.list_type == 'Vereinsmeldeliste'
54
+ validate_erg_list_elements if @result.list_type == 'Wettkampfergebnisliste'
55
+ validate_vrl_list_elements if @result.list_type == 'Vereinsergebnisliste'
56
+ end
57
+
58
+ private
59
+
60
+ def check_comment_balance(line, line_number)
61
+ return unless line.include?('(*') || line.include?('*)')
62
+
63
+ opens = line.scan('(*').size
64
+ closes = line.scan('*)').size
65
+ return unless opens != closes
66
+
67
+ @result.add_error("Unmatched comment delimiters (line #{line_number})")
68
+ end
69
+
70
+ def handle_first_effective(trimmed, line_number)
71
+ @format_line_index = line_number
72
+ check_format_line(trimmed, line_number)
73
+ end
74
+
75
+ def strip_inline_comment(line)
76
+ Dsv7::Stream.strip_inline_comment(line).strip
77
+ end
78
+
79
+ def check_format_line(trimmed, line_number)
80
+ m = Dsv7::Lex.parse_format(trimmed)
81
+ return format_error(line_number) unless m
82
+
83
+ list_type, ver = m
84
+ # Enforce numeric version token for exact syntax compatibility
85
+ return format_error(line_number) unless ver.match?(/^\d+$/)
86
+
87
+ @result.set_format(list_type, ver)
88
+ check_list_type(list_type, line_number)
89
+ check_format_version(ver, line_number)
90
+ end
91
+
92
+ def format_error(line_number)
93
+ @result.add_error(
94
+ "First non-empty line must be 'FORMAT:<Listentyp>;7;' " \
95
+ "(line #{line_number})"
96
+ )
97
+ end
98
+
99
+ def check_list_type(list_type, line_number)
100
+ return if Validator::ALLOWED_LIST_TYPES.include?(list_type)
101
+
102
+ @result.add_error("Unknown list type in FORMAT: '#{list_type}' (line #{line_number})")
103
+ end
104
+
105
+ def check_format_version(ver, line_number)
106
+ return if ver == '7'
107
+
108
+ @result.add_error("Unsupported format version '#{ver}', expected '7' (line #{line_number})")
109
+ end
110
+
111
+ def require_semicolon(trimmed, line_number)
112
+ return if trimmed.include?(';')
113
+
114
+ @result.add_error("Missing attribute delimiter ';' (line #{line_number})")
115
+ end
116
+
117
+ def post_validate_positions
118
+ @result.add_error('Missing FORMAT line at top of file') if @format_line_index.nil?
119
+ return @result.add_error("Missing 'DATEIENDE' terminator line") if @dateiende_index.nil?
120
+
121
+ return unless @after_dateiende_effective_line_number
122
+
123
+ n = @after_dateiende_effective_line_number
124
+ @result.add_error("Content found after 'DATEIENDE' (line #{n})")
125
+ end
126
+
127
+ def handle_content_line(trimmed, line_number)
128
+ @after_dateiende_effective_line_number ||= line_number if @dateiende_index
129
+ require_semicolon(trimmed, line_number)
130
+ track_wk_element(trimmed)
131
+ track_vml_element(trimmed)
132
+ track_erg_element(trimmed)
133
+ track_vrl_element(trimmed)
134
+ validate_wk_line(trimmed, line_number)
135
+ validate_vml_line(trimmed, line_number)
136
+ validate_erg_line(trimmed, line_number)
137
+ validate_vrl_line(trimmed, line_number)
138
+ end
139
+ end
140
+ end
141
+ end
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../lex'
4
+ require_relative 'cardinality'
5
+
6
+ module Dsv7
7
+ class Validator
8
+ # Handles line-by-line structural checks for WKDL-specific logic
9
+ module LineAnalyzerWk
10
+ def track_wk_element(trimmed)
11
+ return unless @result.list_type == 'Wettkampfdefinitionsliste'
12
+
13
+ pair = Dsv7::Lex.element(trimmed)
14
+ return unless pair
15
+
16
+ name, = pair
17
+ return if name == 'FORMAT'
18
+ return if name == 'DATEIENDE'
19
+
20
+ @wk_elements[name] += 1
21
+ end
22
+
23
+ def validate_wk_list_elements
24
+ WkCardinality.new(@result, @wk_elements).validate!
25
+ end
26
+
27
+ def validate_wk_line(trimmed, line_number)
28
+ return unless @result.list_type == 'Wettkampfdefinitionsliste'
29
+
30
+ pair = Dsv7::Lex.element(trimmed)
31
+ return unless pair
32
+
33
+ name, attrs = pair
34
+ return if %w[FORMAT DATEIENDE].include?(name)
35
+
36
+ @wk_schema.validate_element(name, attrs, line_number)
37
+ end
38
+ end
39
+
40
+ # Handles line-by-line structural checks for VML-specific logic
41
+ module LineAnalyzerVml
42
+ def track_vml_element(trimmed)
43
+ return unless @result.list_type == 'Vereinsmeldeliste'
44
+
45
+ pair = Dsv7::Lex.element(trimmed)
46
+ return unless pair
47
+
48
+ name, = pair
49
+ return if name == 'FORMAT'
50
+ return if name == 'DATEIENDE'
51
+
52
+ @vml_elements[name] += 1
53
+ end
54
+
55
+ def validate_vml_list_elements
56
+ VmlCardinality.new(@result, @vml_elements).validate!
57
+ end
58
+
59
+ def validate_vml_line(trimmed, line_number)
60
+ return unless @result.list_type == 'Vereinsmeldeliste'
61
+
62
+ pair = Dsv7::Lex.element(trimmed)
63
+ return unless pair
64
+
65
+ name, attrs = pair
66
+ return if %w[FORMAT DATEIENDE].include?(name)
67
+
68
+ @vml_schema.validate_element(name, attrs, line_number)
69
+ end
70
+ end
71
+
72
+ # Handles line-by-line checks for Wettkampfergebnisliste
73
+ module LineAnalyzerErg
74
+ def track_erg_element(trimmed)
75
+ return unless @result.list_type == 'Wettkampfergebnisliste'
76
+
77
+ pair = Dsv7::Lex.element(trimmed)
78
+ return unless pair
79
+
80
+ name, = pair
81
+ return if name == 'FORMAT'
82
+ return if name == 'DATEIENDE'
83
+
84
+ @erg_elements[name] += 1
85
+ end
86
+
87
+ def validate_erg_list_elements
88
+ return if @erg_elements.empty?
89
+
90
+ ErgCardinality.new(@result, @erg_elements).validate!
91
+ end
92
+
93
+ def validate_erg_line(trimmed, line_number)
94
+ return unless @result.list_type == 'Wettkampfergebnisliste'
95
+
96
+ pair = Dsv7::Lex.element(trimmed)
97
+ return unless pair
98
+
99
+ name, attrs = pair
100
+ return if %w[FORMAT DATEIENDE].include?(name)
101
+
102
+ @erg_schema.validate_element(name, attrs, line_number)
103
+ end
104
+ end
105
+
106
+ # Handles line-by-line checks for Vereinsergebnisliste
107
+ module LineAnalyzerVrl
108
+ def track_vrl_element(trimmed)
109
+ return unless @result.list_type == 'Vereinsergebnisliste'
110
+
111
+ pair = Dsv7::Lex.element(trimmed)
112
+ return unless pair
113
+
114
+ name, = pair
115
+ return if name == 'FORMAT'
116
+ return if name == 'DATEIENDE'
117
+
118
+ @vrl_elements[name] += 1
119
+ end
120
+
121
+ def validate_vrl_list_elements
122
+ return if @vrl_elements.empty?
123
+
124
+ VrlCardinality.new(@result, @vrl_elements).validate!
125
+ end
126
+
127
+ def validate_vrl_line(trimmed, line_number)
128
+ return unless @result.list_type == 'Vereinsergebnisliste'
129
+
130
+ pair = Dsv7::Lex.element(trimmed)
131
+ return unless pair
132
+
133
+ name, attrs = pair
134
+ return if %w[FORMAT DATEIENDE].include?(name)
135
+
136
+ @vrl_schema.validate_element(name, attrs, line_number)
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dsv7
4
+ class Validator
5
+ # Result container for validation
6
+ class Result
7
+ attr_reader :errors, :warnings, :list_type, :version
8
+
9
+ def initialize
10
+ @errors = []
11
+ @warnings = []
12
+ @list_type = nil
13
+ @version = nil
14
+ end
15
+
16
+ def add_error(message)
17
+ @errors << message
18
+ end
19
+
20
+ def add_warning(message)
21
+ @warnings << message
22
+ end
23
+
24
+ def set_format(list_type, version)
25
+ @list_type = list_type
26
+ @version = version
27
+ end
28
+
29
+ def valid?
30
+ @errors.empty?
31
+ end
32
+ end
33
+ end
34
+ end