sportdb-parser 0.2.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c94dcd42fc13a7043f6b926ca1d947df3199877693b22e53e4f50b5aa522bf5d
4
- data.tar.gz: 33eb689dcfb2bab0728c19b7d706da1556ddefafbfbcc6e424ac5bcbe3bccef6
3
+ metadata.gz: 0c9225b21f400b9f9cced2052c3062f41a091ed81d3d4239164c9652f53ebc6e
4
+ data.tar.gz: f7250eaa21324962df27e7cdd397857afa570c610f00c80c31e5105e40964002
5
5
  SHA512:
6
- metadata.gz: 97ef8d76ffa26312d66359f364588af3d7c76a3b0cebd3644b1f1ae775463defa9cb9552b267f26677c2c6f4e9b7b9fe62479dd34a7211fd1a4a3c1b5e9af830
7
- data.tar.gz: ca9b56c6c02c132f3924fb40c293e90379812b830a2899e2be02c1d6469a278456c6d68db7f73d5f5fd69b372c958953e3fefd829ac1120cf56b0944176a2b87
6
+ metadata.gz: 471c938c233d8f81d7a0fd5e4470a27a52486906764816b6c35ea3d88e19650c81302fd5ff9ee30b85d3a8e9f81ada8eef20b49bd3de924c7238acb106ba6082
7
+ data.tar.gz: 24d1cf3846404859ad7e751895325b256321d43e2881413fda6325c744ca0c31b52ef2032a9dfc8e56e67d7a06df54a6d2780a297982440b8e40b7055fe06c26
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.2.0
1
+ ### 0.2.2
2
2
 
3
3
  ### 0.0.1 / 2024-07-12
4
4
 
data/Manifest.txt CHANGED
@@ -2,12 +2,8 @@ CHANGELOG.md
2
2
  Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
- bin/fbt
6
5
  lib/sportdb/parser.rb
7
6
  lib/sportdb/parser/lang.rb
8
- lib/sportdb/parser/linter.rb
9
- lib/sportdb/parser/opts.rb
10
- lib/sportdb/parser/outline_reader.rb
11
7
  lib/sportdb/parser/parser.rb
12
8
  lib/sportdb/parser/token-date.rb
13
9
  lib/sportdb/parser/token-score.rb
data/Rakefile CHANGED
@@ -26,6 +26,6 @@ Hoe.spec 'sportdb-parser' do
26
26
  ]
27
27
 
28
28
  self.spec_extras = {
29
- required_ruby_version: '>= 2.2.2'
29
+ required_ruby_version: '>= 3.1.0'
30
30
  }
31
31
  end
@@ -27,6 +27,12 @@ end
27
27
 
28
28
  ROUND_RE = %r{^(
29
29
 
30
+ ## add special case for group play-off rounds!
31
+ ## group 2 play-off (e.g. worldcup 1954, 1958)
32
+ (?: Group [ ] [a-z0-9]+ [ ]
33
+ Play-?offs?
34
+ )
35
+ |
30
36
  # round - note - requiers number e.g. round 1,2, etc.
31
37
  # note - use 1-9 regex (cannot start with 0) - why? why not?
32
38
  # make week 01 or round 01 or matchday 01 possible?
@@ -46,17 +52,23 @@ ROUND_RE = %r{^(
46
52
  |
47
53
  ## 1. Round / 2. Round / 3. Round / etc.
48
54
  ## Play-off Round
55
+ ## First Round
56
+ ## Final Round (e.g. Worldcup 1950)
49
57
  (?:
50
- (?: [1-9][0-9]* \.
51
- |
52
- Play-?off
58
+ (?: [1-9][0-9]* \. |
59
+ Play-?off |
60
+ 1st | First |
61
+ 2nd | Second |
62
+ Final
53
63
  )
54
64
  [ ] Round
55
65
  )
56
66
  |
57
67
  ## starting with preliminary
68
+ # e.g. Preliminary round
58
69
  (?: Preliminary [ ]
59
- (?: Semi-?finals |
70
+ (?: Round |
71
+ Semi-?finals |
60
72
  Final
61
73
  )
62
74
  )
@@ -110,10 +122,15 @@ ROUND_RE = %r{^(
110
122
  Finals?
111
123
  |
112
124
  ## add replays
113
- ## Final Replay
125
+ ## e.g. Final Replay
126
+ ## Quarter-finals replays
127
+ ## First round replays
114
128
  (?:
115
- Final
116
- [ ] Replay
129
+ (?: First [ ] Round |
130
+ Quarter-?finals? |
131
+ Finals?
132
+ )
133
+ [ ] Replays?
117
134
  )
118
135
  )$}ix
119
136
 
@@ -1,6 +1,6 @@
1
- module SportDb
1
+ module SportDb
2
2
  class Parser
3
-
3
+
4
4
 
5
5
 
6
6
  def self.parse_names( txt )
@@ -47,8 +47,8 @@ def self.build_map( lines, downcase: false )
47
47
  ## "may" => 5,
48
48
  ## "june" => 6, "jun" => 6, ...
49
49
  lines.each_with_index.reduce( {} ) do |h,(line,i)|
50
- line.each do |name|
51
- h[ downcase ? name.downcase : name ] = i+1
50
+ line.each do |name|
51
+ h[ downcase ? name.downcase : name ] = i+1
52
52
  end ## note: start mapping with 1 (and NOT zero-based, that is, 0)
53
53
  h
54
54
  end
@@ -109,28 +109,85 @@ DAY_MAP = build_map( DAY_LINES, downcase: true )
109
109
  ## todo - add more date variants !!!!
110
110
 
111
111
  # e.g. Fri Aug/9 or Fri Aug 9
112
- DATE_RE = %r{
112
+ DATE_I_RE = %r{
113
113
  (?<date>
114
114
  \b
115
115
  ## optional day name
116
116
  ((?<day_name>#{DAY_NAMES})
117
117
  [ ]
118
- )?
118
+ )?
119
119
  (?<month_name>#{MONTH_NAMES})
120
120
  (?: \/|[ ] )
121
121
  (?<day>\d{1,2})
122
122
  ## optional year
123
123
  ( [ ]
124
124
  (?<year>\d{4})
125
- )?
126
- \b
125
+ )?
126
+ \b
127
127
  )}ix
128
128
 
129
129
 
130
+ # e.g. 3 June or 10 June
131
+ DATE_II_RE = %r{
132
+ (?<date>
133
+ \b
134
+ ## optional day name
135
+ ((?<day_name>#{DAY_NAMES})
136
+ [ ]
137
+ )?
138
+ (?<day>\d{1,2})
139
+ [ ]
140
+ (?<month_name>#{MONTH_NAMES})
141
+ ## optional year
142
+ ( [ ]
143
+ (?<year>\d{4})
144
+ )?
145
+ \b
146
+ )}ix
147
+
148
+
149
+ #############################################
150
+ # map tables
151
+ # note: order matters; first come-first matched/served
152
+ DATE_RE = Regexp.union(
153
+ DATE_I_RE,
154
+ DATE_II_RE
155
+ )
156
+
157
+
158
+ ##
159
+ ## add a date parser helper
160
+ def self.parse_date( str, start: )
161
+ if m=DATE_RE.match( str )
162
+
163
+ year = m[:year].to_i(10) if m[:year]
164
+ month = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
165
+ day = m[:day].to_i(10) if m[:day]
166
+ wday = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
167
+
168
+ if year.nil? ## try to calculate year
169
+ year = if month > start.month ||
170
+ (month == start.month && day >= start.day)
171
+ # assume same year as start_at event (e.g. 2013 for 2013/14 season)
172
+ start.year
173
+ else
174
+ # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
175
+ start.year+1
176
+ end
177
+ end
178
+ Date.new( year,month,day )
179
+ else
180
+ puts "!! ERROR - unexpected date format; cannot parse >#{str}<"
181
+ exit 1
182
+ end
183
+ end
184
+
185
+
186
+
130
187
  ###
131
- # date duration
188
+ # date duration
132
189
  # use - or + as separator
133
- # in theory plus( +) only if dates
190
+ # in theory plus( +) only if dates
134
191
  # are two days next to each other
135
192
  #
136
193
  # otherwise define new dates type in the future? why? why not?
@@ -147,7 +204,7 @@ DATE_RE = %r{
147
204
  # Jun/25 .. 26 - why? why not???
148
205
  # Jun/25 to 26 - why? why not???
149
206
  # Jun/25 + 26 - add - why? why not???
150
- # Sun-Wed Jun/23-26 - add - why? why not???
207
+ # Sun-Wed Jun/23-26 - add - why? why not???
151
208
  # Wed+Thu Jun/26+27 2024 - add - why? why not???
152
209
  #
153
210
  # maybe use comman and plus for list of dates
@@ -157,39 +214,89 @@ DATE_RE = %r{
157
214
  # add back optional comma (before) year - why? why not?
158
215
 
159
216
 
160
- DURATION_RE = %r{
217
+ ##
218
+ # todo add plus later on - why? why not?
219
+
220
+ DURATION_I_RE = %r{
161
221
  (?<duration>
162
222
  \b
163
223
  ## optional day name
164
224
  ((?<day_name1>#{DAY_NAMES})
165
225
  [ ]
166
- )?
226
+ )?
167
227
  (?<month_name1>#{MONTH_NAMES})
168
228
  (?: \/|[ ] )
169
229
  (?<day1>\d{1,2})
170
230
  ## optional year
171
231
  ( [ ]
172
232
  (?<year1>\d{4})
173
- )?
233
+ )?
174
234
 
175
235
  ## support + and - (add .. or such - why??)
176
- [ ]*[+-][ ]*
177
-
236
+ [ ]*[-][ ]*
237
+
178
238
  ## optional day name
179
239
  ((?<day_name2>#{DAY_NAMES})
180
240
  [ ]
181
- )?
241
+ )?
182
242
  (?<month_name2>#{MONTH_NAMES})
183
243
  (?: \/|[ ] )
184
244
  (?<day2>\d{1,2})
185
245
  ## optional year
186
246
  ( [ ]
187
247
  (?<year2>\d{4})
188
- )?
189
- \b
248
+ )?
249
+ \b
250
+ )}ix
251
+
252
+
253
+ ###
254
+ # variant ii
255
+ # e.g. 26 July - 27 July
256
+
257
+ DURATION_II_RE = %r{
258
+ (?<duration>
259
+ \b
260
+ ## optional day name
261
+ ((?<day_name1>#{DAY_NAMES})
262
+ [ ]
263
+ )?
264
+ (?<day1>\d{1,2})
265
+ [ ]
266
+ (?<month_name1>#{MONTH_NAMES})
267
+ ## optional year
268
+ ( [ ]
269
+ (?<year1>\d{4})
270
+ )?
271
+
272
+ ## support + and - (add .. or such - why??)
273
+ [ ]*[-][ ]*
274
+
275
+ ## optional day name
276
+ ((?<day_name2>#{DAY_NAMES})
277
+ [ ]
278
+ )?
279
+ (?<day2>\d{1,2})
280
+ [ ]
281
+ (?<month_name2>#{MONTH_NAMES})
282
+ ## optional year
283
+ ( [ ]
284
+ (?<year2>\d{4})
285
+ )?
286
+ \b
190
287
  )}ix
191
288
 
192
289
 
290
+ #############################################
291
+ # map tables
292
+ # note: order matters; first come-first matched/served
293
+ DURATION_RE = Regexp.union(
294
+ DURATION_I_RE,
295
+ DURATION_II_RE
296
+ )
297
+
298
+
299
+
193
300
  end # class Parser
194
- end # module SportDb
195
-
301
+ end # module SportDb
302
+
@@ -4,7 +4,7 @@ module SportDb
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
6
  MINOR = 2
7
- PATCH = 0
7
+ PATCH = 2
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -24,11 +24,6 @@ require_relative 'parser/lang'
24
24
  require_relative 'parser/parser'
25
25
 
26
26
 
27
- ## more
28
- require_relative 'parser/outline_reader'
29
- require_relative 'parser/linter'
30
- require_relative 'parser/opts'
31
-
32
27
 
33
28
  ###
34
29
  # make parser api (easily) available - why? why not?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-22 00:00:00.000000000 Z
11
+ date: 2024-08-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -74,8 +74,7 @@ dependencies:
74
74
  version: '4.1'
75
75
  description: sportdb-parser - football.txt match parser (& tokenizer)
76
76
  email: gerald.bauer@gmail.com
77
- executables:
78
- - fbt
77
+ executables: []
79
78
  extensions: []
80
79
  extra_rdoc_files:
81
80
  - CHANGELOG.md
@@ -86,12 +85,8 @@ files:
86
85
  - Manifest.txt
87
86
  - README.md
88
87
  - Rakefile
89
- - bin/fbt
90
88
  - lib/sportdb/parser.rb
91
89
  - lib/sportdb/parser/lang.rb
92
- - lib/sportdb/parser/linter.rb
93
- - lib/sportdb/parser/opts.rb
94
- - lib/sportdb/parser/outline_reader.rb
95
90
  - lib/sportdb/parser/parser.rb
96
91
  - lib/sportdb/parser/token-date.rb
97
92
  - lib/sportdb/parser/token-score.rb
@@ -112,7 +107,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
112
107
  requirements:
113
108
  - - ">="
114
109
  - !ruby/object:Gem::Version
115
- version: 2.2.2
110
+ version: 3.1.0
116
111
  required_rubygems_version: !ruby/object:Gem::Requirement
117
112
  requirements:
118
113
  - - ">="
data/bin/fbt DELETED
@@ -1,94 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- ## tip: to test run:
4
- ## ruby -I ./lib bin/fbt
5
-
6
- ## our own code
7
- require 'sportdb/parser'
8
-
9
-
10
-
11
- require 'optparse'
12
-
13
- ##
14
- ## read textfile
15
- ## and dump tokens
16
- ##
17
- ## fbt ../openfootball/.../euro.txt
18
-
19
-
20
-
21
-
22
- args = ARGV
23
- opts = { debug: false,
24
- metal: false }
25
-
26
- parser = OptionParser.new do |parser|
27
- parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
28
-
29
- ##
30
- ## check if git has a offline option?? (use same)
31
- ## check for other tools - why? why not?
32
-
33
-
34
- parser.on( "--verbose", "--debug",
35
- "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
36
- opts[:debug] = debug
37
- end
38
-
39
- parser.on( "--metal",
40
- "turn off typed parse tree; show to the metal tokens"+
41
- " (default: #{opts[:metal]})" ) do |metal|
42
- opts[:metal] = metal
43
- end
44
- end
45
- parser.parse!( args )
46
-
47
- puts "OPTS:"
48
- p opts
49
- puts "ARGV:"
50
- p args
51
-
52
-
53
-
54
-
55
-
56
- paths = if args.empty?
57
- [
58
- '../../../openfootball/euro/2020--europe/euro.txt',
59
- '../../../openfootball/euro/2024--germany/euro.txt',
60
- ]
61
- else
62
- ## check for directories
63
- ## and auto-expand
64
-
65
- SportDb::Parser::Opts.expand_args( args )
66
- end
67
-
68
-
69
-
70
- SportDb::Parser::Linter.debug = true if opts[:debug]
71
-
72
- linter = SportDb::Parser::Linter.new
73
-
74
- errors = []
75
-
76
- paths.each_with_index do |path,i|
77
- puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
78
- linter.read( path, parse: !opts[:metal] )
79
-
80
- errors += linter.errors if linter.errors?
81
- end
82
-
83
- if errors.size > 0
84
- puts
85
- pp errors
86
- puts
87
- puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
88
- else
89
- puts
90
- puts "OK no parse errors found in #{paths.size} datafile(s)"
91
- end
92
-
93
- puts "bye"
94
-
@@ -1,149 +0,0 @@
1
-
2
- module SportDb
3
- class Parser
4
-
5
- ###
6
- ## note - Linter for now nested inside Parser - keep? why? why not?
7
- class Linter
8
-
9
- def self.debug=(value) @@debug = value; end
10
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
- def debug?() self.class.debug?; end
12
-
13
-
14
-
15
- attr_reader :errors
16
-
17
- def initialize
18
- @errors = []
19
- @parser = Parser.new ## use own parser instance (not shared) - why? why not?
20
- end
21
-
22
-
23
- def errors?() @errors.size > 0; end
24
-
25
-
26
-
27
- ## note: colon (:) MUST be followed by one (or more) spaces
28
- ## make sure mon feb 12 18:10 will not match
29
- ## allow 1. FC Köln etc.
30
- ## Mainz 05:
31
- ## limit to 30 chars max
32
- ## only allow chars incl. intl buut (NOT ()[]/;)
33
- ##
34
- ## Group A:
35
- ## Group B: - remove colon
36
- ## or lookup first
37
-
38
- ATTRIB_RE = %r{^
39
- [ ]*? # slurp leading spaces
40
- (?<key>[^:|\]\[()\/; -]
41
- [^:|\]\[()\/;]{0,30}
42
- )
43
- [ ]*? # slurp trailing spaces
44
- :[ ]+
45
- (?<value>.+)
46
- [ ]*? # slurp trailing spaces
47
- $
48
- }ix
49
-
50
-
51
- #########
52
- ## parse - false (default) - tokenize (only)
53
- ## - true - tokenize & parse
54
- def read( path, parse: false )
55
- ## note: every (new) read call - resets errors list to empty
56
- @errors = []
57
-
58
- nodes = OutlineReader.read( path )
59
-
60
- ## process nodes
61
- h1 = nil
62
- orphans = 0 ## track paragraphs's with no heading
63
-
64
- attrib_found = false
65
-
66
-
67
- nodes.each do |node|
68
- type = node[0]
69
-
70
- if type == :h1
71
- h1 = node[1] ## get heading text
72
- puts
73
- puts " = Heading 1 >#{node[1]}<"
74
- elsif type == :p
75
-
76
- if h1.nil?
77
- orphans += 1 ## only warn once
78
- puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
79
- next
80
- end
81
-
82
- lines = node[1]
83
-
84
- tree = []
85
- lines.each_with_index do |line,i|
86
-
87
- if debug?
88
- puts
89
- puts "line >#{line}<"
90
- end
91
-
92
-
93
- ## skip new (experimental attrib syntax)
94
- if attrib_found == false &&
95
- ATTRIB_RE.match?( line )
96
- ## note: check attrib regex AFTER group def e.g.:
97
- ## Group A:
98
- ## Group B: etc.
99
- ## todo/fix - change Group A: to Group A etc.
100
- ## Group B: to Group B
101
- attrib_found = true
102
- ## logger.debug "skipping key/value line - >#{line}<"
103
- next
104
- end
105
-
106
- if attrib_found
107
- ## check if line ends with dot
108
- ## if not slurp up lines to the next do!!!
109
- ## logger.debug "skipping key/value line - >#{line}<"
110
- attrib_found = false if line.end_with?( '.' )
111
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
112
- next
113
- end
114
-
115
- t, error_messages = if parse
116
- @parser.parse_with_errors( line )
117
- else
118
- @parser.tokenize_with_errors( line )
119
- end
120
-
121
-
122
- if error_messages.size > 0
123
- ## add to "global" error list
124
- ## make a triplet tuple (file / msg / line text)
125
- error_messages.each do |msg|
126
- @errors << [ path,
127
- msg,
128
- line
129
- ]
130
- end
131
- end
132
-
133
- pp t if debug?
134
-
135
- tree << t
136
- end
137
-
138
- ## pp tree
139
- else
140
- pp node
141
- raise ArgumentError, "unsupported (node) type >#{type}<"
142
- end
143
- end # each node
144
- end # read
145
- end # class Linter
146
-
147
-
148
- end # class Parser
149
- end # module SportDb
@@ -1,70 +0,0 @@
1
-
2
- module SportDb
3
- class Parser
4
-
5
- ###
6
- ## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
7
- class Opts
8
-
9
- SEASON_RE = %r{ (?:
10
- \d{4}-\d{2}
11
- | \d{4}(--[a-z0-9_-]+)?
12
- )
13
- }x
14
- SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
15
-
16
-
17
- ## note: if pattern includes directory add here
18
- ## (otherwise move to more "generic" datafile) - why? why not?
19
- MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
20
- #{SEASON}
21
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
22
- }x
23
-
24
-
25
- def self.find( path )
26
- datafiles = []
27
-
28
- ## note: normalize path - use File.expand_path ??
29
- ## change all backslash to slash for now
30
- ## path = path.gsub( "\\", '/' )
31
- path = File.expand_path( path )
32
-
33
- ## check all txt files
34
- ## note: incl. files starting with dot (.)) as candidates
35
- ## (normally excluded with just *)
36
- candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
37
- ## pp candidates
38
- candidates.each do |candidate|
39
- datafiles << candidate if MATCH_RE.match( candidate )
40
- end
41
-
42
- ## pp datafiles
43
- datafiles
44
- end
45
-
46
-
47
- def self.expand_args( args )
48
- paths = []
49
-
50
- args.each do |arg|
51
- ## check if directory
52
- if Dir.exist?( arg )
53
- datafiles = find( arg )
54
- puts
55
- puts " found #{datafiles.size} match txt datafiles in #{arg}"
56
- pp datafiles
57
- paths += datafiles
58
- else
59
- ## assume it's a file
60
- paths << arg
61
- end
62
- end
63
-
64
- paths
65
- end
66
- end # class Opts
67
-
68
-
69
- end # class Parser
70
- end # module SportDb
@@ -1,97 +0,0 @@
1
-
2
-
3
- module SportDb
4
-
5
- class OutlineReader
6
-
7
- def self.debug=(value) @@debug = value; end
8
- def self.debug?() @@debug ||= false; end
9
- def debug?() self.class.debug?; end
10
-
11
-
12
-
13
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
14
- txt = File.open( path, 'r:utf-8' ) {|f| f.read }
15
- parse( txt )
16
- end
17
-
18
- def self.parse( txt )
19
- new( txt ).parse
20
- end
21
-
22
- def initialize( txt )
23
- @txt = txt
24
- end
25
-
26
- ## note: skip "decorative" only heading e.g. ========
27
- ## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
28
- HEADING_BLANK_RE = %r{\A
29
- ={1,}
30
- \z}x
31
-
32
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
33
- HEADING_RE = %r{\A
34
- (?<marker>={1,}) ## 1. leading ======
35
- [ ]*
36
- (?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
37
- [ ]*
38
- =* ## 3. (optional) trailing ====
39
- \z}x
40
-
41
- def parse
42
- outline=[] ## outline structure
43
- start_para = true ## start new para(graph) on new text line?
44
-
45
- @txt.each_line do |line|
46
- line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
47
-
48
- if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
49
- start_para = true
50
- next
51
- end
52
-
53
- break if line == '__END__'
54
-
55
- next if line.start_with?( '#' ) ## skip comments too
56
- ## strip inline (until end-of-line) comments too
57
- ## e.g Eupen | KAS Eupen ## [de]
58
- ## => Eupen | KAS Eupen
59
- ## e.g bq Bonaire, BOE # CONCACAF
60
- ## => bq Bonaire, BOE
61
- line = line.sub( /#.*/, '' ).strip
62
- pp line if debug?
63
-
64
- ## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
65
- next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
66
-
67
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
68
- if m=HEADING_RE.match( line )
69
- start_para = true
70
-
71
- heading_marker = m[:marker]
72
- heading_level = heading_marker.length ## count number of = for heading level
73
- heading = m[:text].strip
74
-
75
- puts "heading #{heading_level} >#{heading}<" if debug?
76
- outline << [:"h#{heading_level}", heading]
77
- else ## assume it's a (plain/regular) text line
78
- if start_para
79
- outline << [:p, [line]]
80
- start_para = false
81
- else
82
- node = outline[-1] ## get last entry
83
- if node[0] == :p ## assert it's a p(aragraph) node!!!
84
- node[1] << line ## add line to p(aragraph)
85
- else
86
- puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
87
- pp node
88
- exit 1
89
- end
90
- end
91
- end
92
- end
93
- outline
94
- end # method read
95
- end # class OutlineReader
96
-
97
- end # module SportDb