sportdb-parser 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c94dcd42fc13a7043f6b926ca1d947df3199877693b22e53e4f50b5aa522bf5d
4
- data.tar.gz: 33eb689dcfb2bab0728c19b7d706da1556ddefafbfbcc6e424ac5bcbe3bccef6
3
+ metadata.gz: 0c9225b21f400b9f9cced2052c3062f41a091ed81d3d4239164c9652f53ebc6e
4
+ data.tar.gz: f7250eaa21324962df27e7cdd397857afa570c610f00c80c31e5105e40964002
5
5
  SHA512:
6
- metadata.gz: 97ef8d76ffa26312d66359f364588af3d7c76a3b0cebd3644b1f1ae775463defa9cb9552b267f26677c2c6f4e9b7b9fe62479dd34a7211fd1a4a3c1b5e9af830
7
- data.tar.gz: ca9b56c6c02c132f3924fb40c293e90379812b830a2899e2be02c1d6469a278456c6d68db7f73d5f5fd69b372c958953e3fefd829ac1120cf56b0944176a2b87
6
+ metadata.gz: 471c938c233d8f81d7a0fd5e4470a27a52486906764816b6c35ea3d88e19650c81302fd5ff9ee30b85d3a8e9f81ada8eef20b49bd3de924c7238acb106ba6082
7
+ data.tar.gz: 24d1cf3846404859ad7e751895325b256321d43e2881413fda6325c744ca0c31b52ef2032a9dfc8e56e67d7a06df54a6d2780a297982440b8e40b7055fe06c26
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.2.0
1
+ ### 0.2.2
2
2
 
3
3
  ### 0.0.1 / 2024-07-12
4
4
 
data/Manifest.txt CHANGED
@@ -2,12 +2,8 @@ CHANGELOG.md
2
2
  Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
- bin/fbt
6
5
  lib/sportdb/parser.rb
7
6
  lib/sportdb/parser/lang.rb
8
- lib/sportdb/parser/linter.rb
9
- lib/sportdb/parser/opts.rb
10
- lib/sportdb/parser/outline_reader.rb
11
7
  lib/sportdb/parser/parser.rb
12
8
  lib/sportdb/parser/token-date.rb
13
9
  lib/sportdb/parser/token-score.rb
data/Rakefile CHANGED
@@ -26,6 +26,6 @@ Hoe.spec 'sportdb-parser' do
26
26
  ]
27
27
 
28
28
  self.spec_extras = {
29
- required_ruby_version: '>= 2.2.2'
29
+ required_ruby_version: '>= 3.1.0'
30
30
  }
31
31
  end
@@ -27,6 +27,12 @@ end
27
27
 
28
28
  ROUND_RE = %r{^(
29
29
 
30
+ ## add special case for group play-off rounds!
31
+ ## group 2 play-off (e.g. worldcup 1954, 1958)
32
+ (?: Group [ ] [a-z0-9]+ [ ]
33
+ Play-?offs?
34
+ )
35
+ |
30
36
  # round - note - requiers number e.g. round 1,2, etc.
31
37
  # note - use 1-9 regex (cannot start with 0) - why? why not?
32
38
  # make week 01 or round 01 or matchday 01 possible?
@@ -46,17 +52,23 @@ ROUND_RE = %r{^(
46
52
  |
47
53
  ## 1. Round / 2. Round / 3. Round / etc.
48
54
  ## Play-off Round
55
+ ## First Round
56
+ ## Final Round (e.g. Worldcup 1950)
49
57
  (?:
50
- (?: [1-9][0-9]* \.
51
- |
52
- Play-?off
58
+ (?: [1-9][0-9]* \. |
59
+ Play-?off |
60
+ 1st | First |
61
+ 2nd | Second |
62
+ Final
53
63
  )
54
64
  [ ] Round
55
65
  )
56
66
  |
57
67
  ## starting with preliminary
68
+ # e.g. Preliminary round
58
69
  (?: Preliminary [ ]
59
- (?: Semi-?finals |
70
+ (?: Round |
71
+ Semi-?finals |
60
72
  Final
61
73
  )
62
74
  )
@@ -110,10 +122,15 @@ ROUND_RE = %r{^(
110
122
  Finals?
111
123
  |
112
124
  ## add replays
113
- ## Final Replay
125
+ ## e.g. Final Replay
126
+ ## Quarter-finals replays
127
+ ## First round replays
114
128
  (?:
115
- Final
116
- [ ] Replay
129
+ (?: First [ ] Round |
130
+ Quarter-?finals? |
131
+ Finals?
132
+ )
133
+ [ ] Replays?
117
134
  )
118
135
  )$}ix
119
136
 
@@ -1,6 +1,6 @@
1
- module SportDb
1
+ module SportDb
2
2
  class Parser
3
-
3
+
4
4
 
5
5
 
6
6
  def self.parse_names( txt )
@@ -47,8 +47,8 @@ def self.build_map( lines, downcase: false )
47
47
  ## "may" => 5,
48
48
  ## "june" => 6, "jun" => 6, ...
49
49
  lines.each_with_index.reduce( {} ) do |h,(line,i)|
50
- line.each do |name|
51
- h[ downcase ? name.downcase : name ] = i+1
50
+ line.each do |name|
51
+ h[ downcase ? name.downcase : name ] = i+1
52
52
  end ## note: start mapping with 1 (and NOT zero-based, that is, 0)
53
53
  h
54
54
  end
@@ -109,28 +109,85 @@ DAY_MAP = build_map( DAY_LINES, downcase: true )
109
109
  ## todo - add more date variants !!!!
110
110
 
111
111
  # e.g. Fri Aug/9 or Fri Aug 9
112
- DATE_RE = %r{
112
+ DATE_I_RE = %r{
113
113
  (?<date>
114
114
  \b
115
115
  ## optional day name
116
116
  ((?<day_name>#{DAY_NAMES})
117
117
  [ ]
118
- )?
118
+ )?
119
119
  (?<month_name>#{MONTH_NAMES})
120
120
  (?: \/|[ ] )
121
121
  (?<day>\d{1,2})
122
122
  ## optional year
123
123
  ( [ ]
124
124
  (?<year>\d{4})
125
- )?
126
- \b
125
+ )?
126
+ \b
127
127
  )}ix
128
128
 
129
129
 
130
+ # e.g. 3 June or 10 June
131
+ DATE_II_RE = %r{
132
+ (?<date>
133
+ \b
134
+ ## optional day name
135
+ ((?<day_name>#{DAY_NAMES})
136
+ [ ]
137
+ )?
138
+ (?<day>\d{1,2})
139
+ [ ]
140
+ (?<month_name>#{MONTH_NAMES})
141
+ ## optional year
142
+ ( [ ]
143
+ (?<year>\d{4})
144
+ )?
145
+ \b
146
+ )}ix
147
+
148
+
149
+ #############################################
150
+ # map tables
151
+ # note: order matters; first come-first matched/served
152
+ DATE_RE = Regexp.union(
153
+ DATE_I_RE,
154
+ DATE_II_RE
155
+ )
156
+
157
+
158
+ ##
159
+ ## add a date parser helper
160
+ def self.parse_date( str, start: )
161
+ if m=DATE_RE.match( str )
162
+
163
+ year = m[:year].to_i(10) if m[:year]
164
+ month = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
165
+ day = m[:day].to_i(10) if m[:day]
166
+ wday = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
167
+
168
+ if year.nil? ## try to calculate year
169
+ year = if month > start.month ||
170
+ (month == start.month && day >= start.day)
171
+ # assume same year as start_at event (e.g. 2013 for 2013/14 season)
172
+ start.year
173
+ else
174
+ # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
175
+ start.year+1
176
+ end
177
+ end
178
+ Date.new( year,month,day )
179
+ else
180
+ puts "!! ERROR - unexpected date format; cannot parse >#{str}<"
181
+ exit 1
182
+ end
183
+ end
184
+
185
+
186
+
130
187
  ###
131
- # date duration
188
+ # date duration
132
189
  # use - or + as separator
133
- # in theory plus( +) only if dates
190
+ # in theory plus( +) only if dates
134
191
  # are two days next to each other
135
192
  #
136
193
  # otherwise define new dates type in the future? why? why not?
@@ -147,7 +204,7 @@ DATE_RE = %r{
147
204
  # Jun/25 .. 26 - why? why not???
148
205
  # Jun/25 to 26 - why? why not???
149
206
  # Jun/25 + 26 - add - why? why not???
150
- # Sun-Wed Jun/23-26 - add - why? why not???
207
+ # Sun-Wed Jun/23-26 - add - why? why not???
151
208
  # Wed+Thu Jun/26+27 2024 - add - why? why not???
152
209
  #
153
210
  # maybe use comman and plus for list of dates
@@ -157,39 +214,89 @@ DATE_RE = %r{
157
214
  # add back optional comma (before) year - why? why not?
158
215
 
159
216
 
160
- DURATION_RE = %r{
217
+ ##
218
+ # todo add plus later on - why? why not?
219
+
220
+ DURATION_I_RE = %r{
161
221
  (?<duration>
162
222
  \b
163
223
  ## optional day name
164
224
  ((?<day_name1>#{DAY_NAMES})
165
225
  [ ]
166
- )?
226
+ )?
167
227
  (?<month_name1>#{MONTH_NAMES})
168
228
  (?: \/|[ ] )
169
229
  (?<day1>\d{1,2})
170
230
  ## optional year
171
231
  ( [ ]
172
232
  (?<year1>\d{4})
173
- )?
233
+ )?
174
234
 
175
235
  ## support + and - (add .. or such - why??)
176
- [ ]*[+-][ ]*
177
-
236
+ [ ]*[-][ ]*
237
+
178
238
  ## optional day name
179
239
  ((?<day_name2>#{DAY_NAMES})
180
240
  [ ]
181
- )?
241
+ )?
182
242
  (?<month_name2>#{MONTH_NAMES})
183
243
  (?: \/|[ ] )
184
244
  (?<day2>\d{1,2})
185
245
  ## optional year
186
246
  ( [ ]
187
247
  (?<year2>\d{4})
188
- )?
189
- \b
248
+ )?
249
+ \b
250
+ )}ix
251
+
252
+
253
+ ###
254
+ # variant ii
255
+ # e.g. 26 July - 27 July
256
+
257
+ DURATION_II_RE = %r{
258
+ (?<duration>
259
+ \b
260
+ ## optional day name
261
+ ((?<day_name1>#{DAY_NAMES})
262
+ [ ]
263
+ )?
264
+ (?<day1>\d{1,2})
265
+ [ ]
266
+ (?<month_name1>#{MONTH_NAMES})
267
+ ## optional year
268
+ ( [ ]
269
+ (?<year1>\d{4})
270
+ )?
271
+
272
+ ## support + and - (add .. or such - why??)
273
+ [ ]*[-][ ]*
274
+
275
+ ## optional day name
276
+ ((?<day_name2>#{DAY_NAMES})
277
+ [ ]
278
+ )?
279
+ (?<day2>\d{1,2})
280
+ [ ]
281
+ (?<month_name2>#{MONTH_NAMES})
282
+ ## optional year
283
+ ( [ ]
284
+ (?<year2>\d{4})
285
+ )?
286
+ \b
190
287
  )}ix
191
288
 
192
289
 
290
+ #############################################
291
+ # map tables
292
+ # note: order matters; first come-first matched/served
293
+ DURATION_RE = Regexp.union(
294
+ DURATION_I_RE,
295
+ DURATION_II_RE
296
+ )
297
+
298
+
299
+
193
300
  end # class Parser
194
- end # module SportDb
195
-
301
+ end # module SportDb
302
+
@@ -4,7 +4,7 @@ module SportDb
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
6
  MINOR = 2
7
- PATCH = 0
7
+ PATCH = 2
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -24,11 +24,6 @@ require_relative 'parser/lang'
24
24
  require_relative 'parser/parser'
25
25
 
26
26
 
27
- ## more
28
- require_relative 'parser/outline_reader'
29
- require_relative 'parser/linter'
30
- require_relative 'parser/opts'
31
-
32
27
 
33
28
  ###
34
29
  # make parser api (easily) available - why? why not?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-22 00:00:00.000000000 Z
11
+ date: 2024-08-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -74,8 +74,7 @@ dependencies:
74
74
  version: '4.1'
75
75
  description: sportdb-parser - football.txt match parser (& tokenizer)
76
76
  email: gerald.bauer@gmail.com
77
- executables:
78
- - fbt
77
+ executables: []
79
78
  extensions: []
80
79
  extra_rdoc_files:
81
80
  - CHANGELOG.md
@@ -86,12 +85,8 @@ files:
86
85
  - Manifest.txt
87
86
  - README.md
88
87
  - Rakefile
89
- - bin/fbt
90
88
  - lib/sportdb/parser.rb
91
89
  - lib/sportdb/parser/lang.rb
92
- - lib/sportdb/parser/linter.rb
93
- - lib/sportdb/parser/opts.rb
94
- - lib/sportdb/parser/outline_reader.rb
95
90
  - lib/sportdb/parser/parser.rb
96
91
  - lib/sportdb/parser/token-date.rb
97
92
  - lib/sportdb/parser/token-score.rb
@@ -112,7 +107,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
112
107
  requirements:
113
108
  - - ">="
114
109
  - !ruby/object:Gem::Version
115
- version: 2.2.2
110
+ version: 3.1.0
116
111
  required_rubygems_version: !ruby/object:Gem::Requirement
117
112
  requirements:
118
113
  - - ">="
data/bin/fbt DELETED
@@ -1,94 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- ## tip: to test run:
4
- ## ruby -I ./lib bin/fbt
5
-
6
- ## our own code
7
- require 'sportdb/parser'
8
-
9
-
10
-
11
- require 'optparse'
12
-
13
- ##
14
- ## read textfile
15
- ## and dump tokens
16
- ##
17
- ## fbt ../openfootball/.../euro.txt
18
-
19
-
20
-
21
-
22
- args = ARGV
23
- opts = { debug: false,
24
- metal: false }
25
-
26
- parser = OptionParser.new do |parser|
27
- parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
28
-
29
- ##
30
- ## check if git has a offline option?? (use same)
31
- ## check for other tools - why? why not?
32
-
33
-
34
- parser.on( "--verbose", "--debug",
35
- "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
36
- opts[:debug] = debug
37
- end
38
-
39
- parser.on( "--metal",
40
- "turn off typed parse tree; show to the metal tokens"+
41
- " (default: #{opts[:metal]})" ) do |metal|
42
- opts[:metal] = metal
43
- end
44
- end
45
- parser.parse!( args )
46
-
47
- puts "OPTS:"
48
- p opts
49
- puts "ARGV:"
50
- p args
51
-
52
-
53
-
54
-
55
-
56
- paths = if args.empty?
57
- [
58
- '../../../openfootball/euro/2020--europe/euro.txt',
59
- '../../../openfootball/euro/2024--germany/euro.txt',
60
- ]
61
- else
62
- ## check for directories
63
- ## and auto-expand
64
-
65
- SportDb::Parser::Opts.expand_args( args )
66
- end
67
-
68
-
69
-
70
- SportDb::Parser::Linter.debug = true if opts[:debug]
71
-
72
- linter = SportDb::Parser::Linter.new
73
-
74
- errors = []
75
-
76
- paths.each_with_index do |path,i|
77
- puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
78
- linter.read( path, parse: !opts[:metal] )
79
-
80
- errors += linter.errors if linter.errors?
81
- end
82
-
83
- if errors.size > 0
84
- puts
85
- pp errors
86
- puts
87
- puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
88
- else
89
- puts
90
- puts "OK no parse errors found in #{paths.size} datafile(s)"
91
- end
92
-
93
- puts "bye"
94
-
@@ -1,149 +0,0 @@
1
-
2
- module SportDb
3
- class Parser
4
-
5
- ###
6
- ## note - Linter for now nested inside Parser - keep? why? why not?
7
- class Linter
8
-
9
- def self.debug=(value) @@debug = value; end
10
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
- def debug?() self.class.debug?; end
12
-
13
-
14
-
15
- attr_reader :errors
16
-
17
- def initialize
18
- @errors = []
19
- @parser = Parser.new ## use own parser instance (not shared) - why? why not?
20
- end
21
-
22
-
23
- def errors?() @errors.size > 0; end
24
-
25
-
26
-
27
- ## note: colon (:) MUST be followed by one (or more) spaces
28
- ## make sure mon feb 12 18:10 will not match
29
- ## allow 1. FC Köln etc.
30
- ## Mainz 05:
31
- ## limit to 30 chars max
32
- ## only allow chars incl. intl buut (NOT ()[]/;)
33
- ##
34
- ## Group A:
35
- ## Group B: - remove colon
36
- ## or lookup first
37
-
38
- ATTRIB_RE = %r{^
39
- [ ]*? # slurp leading spaces
40
- (?<key>[^:|\]\[()\/; -]
41
- [^:|\]\[()\/;]{0,30}
42
- )
43
- [ ]*? # slurp trailing spaces
44
- :[ ]+
45
- (?<value>.+)
46
- [ ]*? # slurp trailing spaces
47
- $
48
- }ix
49
-
50
-
51
- #########
52
- ## parse - false (default) - tokenize (only)
53
- ## - true - tokenize & parse
54
- def read( path, parse: false )
55
- ## note: every (new) read call - resets errors list to empty
56
- @errors = []
57
-
58
- nodes = OutlineReader.read( path )
59
-
60
- ## process nodes
61
- h1 = nil
62
- orphans = 0 ## track paragraphs's with no heading
63
-
64
- attrib_found = false
65
-
66
-
67
- nodes.each do |node|
68
- type = node[0]
69
-
70
- if type == :h1
71
- h1 = node[1] ## get heading text
72
- puts
73
- puts " = Heading 1 >#{node[1]}<"
74
- elsif type == :p
75
-
76
- if h1.nil?
77
- orphans += 1 ## only warn once
78
- puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
79
- next
80
- end
81
-
82
- lines = node[1]
83
-
84
- tree = []
85
- lines.each_with_index do |line,i|
86
-
87
- if debug?
88
- puts
89
- puts "line >#{line}<"
90
- end
91
-
92
-
93
- ## skip new (experimental attrib syntax)
94
- if attrib_found == false &&
95
- ATTRIB_RE.match?( line )
96
- ## note: check attrib regex AFTER group def e.g.:
97
- ## Group A:
98
- ## Group B: etc.
99
- ## todo/fix - change Group A: to Group A etc.
100
- ## Group B: to Group B
101
- attrib_found = true
102
- ## logger.debug "skipping key/value line - >#{line}<"
103
- next
104
- end
105
-
106
- if attrib_found
107
- ## check if line ends with dot
108
- ## if not slurp up lines to the next do!!!
109
- ## logger.debug "skipping key/value line - >#{line}<"
110
- attrib_found = false if line.end_with?( '.' )
111
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
112
- next
113
- end
114
-
115
- t, error_messages = if parse
116
- @parser.parse_with_errors( line )
117
- else
118
- @parser.tokenize_with_errors( line )
119
- end
120
-
121
-
122
- if error_messages.size > 0
123
- ## add to "global" error list
124
- ## make a triplet tuple (file / msg / line text)
125
- error_messages.each do |msg|
126
- @errors << [ path,
127
- msg,
128
- line
129
- ]
130
- end
131
- end
132
-
133
- pp t if debug?
134
-
135
- tree << t
136
- end
137
-
138
- ## pp tree
139
- else
140
- pp node
141
- raise ArgumentError, "unsupported (node) type >#{type}<"
142
- end
143
- end # each node
144
- end # read
145
- end # class Linter
146
-
147
-
148
- end # class Parser
149
- end # module SportDb
@@ -1,70 +0,0 @@
1
-
2
- module SportDb
3
- class Parser
4
-
5
- ###
6
- ## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
7
- class Opts
8
-
9
- SEASON_RE = %r{ (?:
10
- \d{4}-\d{2}
11
- | \d{4}(--[a-z0-9_-]+)?
12
- )
13
- }x
14
- SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
15
-
16
-
17
- ## note: if pattern includes directory add here
18
- ## (otherwise move to more "generic" datafile) - why? why not?
19
- MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
20
- #{SEASON}
21
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
22
- }x
23
-
24
-
25
- def self.find( path )
26
- datafiles = []
27
-
28
- ## note: normalize path - use File.expand_path ??
29
- ## change all backslash to slash for now
30
- ## path = path.gsub( "\\", '/' )
31
- path = File.expand_path( path )
32
-
33
- ## check all txt files
34
- ## note: incl. files starting with dot (.)) as candidates
35
- ## (normally excluded with just *)
36
- candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
37
- ## pp candidates
38
- candidates.each do |candidate|
39
- datafiles << candidate if MATCH_RE.match( candidate )
40
- end
41
-
42
- ## pp datafiles
43
- datafiles
44
- end
45
-
46
-
47
- def self.expand_args( args )
48
- paths = []
49
-
50
- args.each do |arg|
51
- ## check if directory
52
- if Dir.exist?( arg )
53
- datafiles = find( arg )
54
- puts
55
- puts " found #{datafiles.size} match txt datafiles in #{arg}"
56
- pp datafiles
57
- paths += datafiles
58
- else
59
- ## assume it's a file
60
- paths << arg
61
- end
62
- end
63
-
64
- paths
65
- end
66
- end # class Opts
67
-
68
-
69
- end # class Parser
70
- end # module SportDb
@@ -1,97 +0,0 @@
1
-
2
-
3
- module SportDb
4
-
5
- class OutlineReader
6
-
7
- def self.debug=(value) @@debug = value; end
8
- def self.debug?() @@debug ||= false; end
9
- def debug?() self.class.debug?; end
10
-
11
-
12
-
13
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
14
- txt = File.open( path, 'r:utf-8' ) {|f| f.read }
15
- parse( txt )
16
- end
17
-
18
- def self.parse( txt )
19
- new( txt ).parse
20
- end
21
-
22
- def initialize( txt )
23
- @txt = txt
24
- end
25
-
26
- ## note: skip "decorative" only heading e.g. ========
27
- ## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
28
- HEADING_BLANK_RE = %r{\A
29
- ={1,}
30
- \z}x
31
-
32
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
33
- HEADING_RE = %r{\A
34
- (?<marker>={1,}) ## 1. leading ======
35
- [ ]*
36
- (?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
37
- [ ]*
38
- =* ## 3. (optional) trailing ====
39
- \z}x
40
-
41
- def parse
42
- outline=[] ## outline structure
43
- start_para = true ## start new para(graph) on new text line?
44
-
45
- @txt.each_line do |line|
46
- line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
47
-
48
- if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
49
- start_para = true
50
- next
51
- end
52
-
53
- break if line == '__END__'
54
-
55
- next if line.start_with?( '#' ) ## skip comments too
56
- ## strip inline (until end-of-line) comments too
57
- ## e.g Eupen | KAS Eupen ## [de]
58
- ## => Eupen | KAS Eupen
59
- ## e.g bq Bonaire, BOE # CONCACAF
60
- ## => bq Bonaire, BOE
61
- line = line.sub( /#.*/, '' ).strip
62
- pp line if debug?
63
-
64
- ## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
65
- next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
66
-
67
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
68
- if m=HEADING_RE.match( line )
69
- start_para = true
70
-
71
- heading_marker = m[:marker]
72
- heading_level = heading_marker.length ## count number of = for heading level
73
- heading = m[:text].strip
74
-
75
- puts "heading #{heading_level} >#{heading}<" if debug?
76
- outline << [:"h#{heading_level}", heading]
77
- else ## assume it's a (plain/regular) text line
78
- if start_para
79
- outline << [:p, [line]]
80
- start_para = false
81
- else
82
- node = outline[-1] ## get last entry
83
- if node[0] == :p ## assert it's a p(aragraph) node!!!
84
- node[1] << line ## add line to p(aragraph)
85
- else
86
- puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
87
- pp node
88
- exit 1
89
- end
90
- end
91
- end
92
- end
93
- outline
94
- end # method read
95
- end # class OutlineReader
96
-
97
- end # module SportDb