sportdb-parser 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e5af84e3a141fc577287c8c788eb27a79bf1fc78ed0c08e80df6004383788b66
4
- data.tar.gz: 0e23fca8e4566021eb220d20925f97694fcc5b8b7c165c6ce469b5f08feb9cc1
3
+ metadata.gz: 3657cedc5125ee2515efa8be4a1838d05b7290523dd893f7eba5b87024e71238
4
+ data.tar.gz: caf6d7e909e17fa0dcabf659ab8f5046ca1940d8f7c1c6f5312e485dc0089384
5
5
  SHA512:
6
- metadata.gz: b6434b5d4df17e72a83f9b63ceef117ddfe50157073cbe4657e6e47e8aa820e8aaf986030642fc86160fd9c551cc55c9e9a35187cf09de3e0c346a00d1f58f17
7
- data.tar.gz: dc9b9fd5c782409c019aa2de0d4aea5bdeb90a7a4e01c83ed58b08d4315f1a87ae84268f269d20a10463567633471e1b05052c24bfa47bc49e988bb927e2f927
6
+ metadata.gz: 4063565aada304a1eb96009b6fe542392f41a55d4ad4d21b5de156004bd69a055c5f86b076bed1defbe50423c8c891dd538931ea6ca9b8ec41e237c23e699219
7
+ data.tar.gz: 91f6476810cb6617dfcd703ada57592cd38b87f3b4b9fc6fd4468a9457ff0e6ae6337a4e4f5c782e1b80f5f6b6015d5ce26ed6330915cd67a5fb6606f665017f
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.1.0
1
+ ### 0.2.1
2
2
 
3
3
  ### 0.0.1 / 2024-07-12
4
4
 
data/Rakefile CHANGED
@@ -21,11 +21,11 @@ Hoe.spec 'sportdb-parser' do
21
21
  self.licenses = ['Public Domain']
22
22
 
23
23
  self.extra_deps = [
24
- ['cocos'],
24
+ ['cocos', '>= 0.4.0'],
25
25
  ['season-formats'],
26
26
  ]
27
27
 
28
28
  self.spec_extras = {
29
- required_ruby_version: '>= 2.2.2'
29
+ required_ruby_version: '>= 3.1.0'
30
30
  }
31
31
  end
data/bin/fbt CHANGED
@@ -11,7 +11,7 @@ require 'sportdb/parser'
11
11
  require 'optparse'
12
12
 
13
13
  ##
14
- ## read textfile
14
+ ## read textfile
15
15
  ## and dump tokens
16
16
  ##
17
17
  ## fbt ../openfootball/.../euro.txt
@@ -32,7 +32,7 @@ require 'optparse'
32
32
 
33
33
 
34
34
  parser.on( "--verbose", "--debug",
35
- "turn on verbose / debug output (default: #{opts[:debug]} )" ) do |debug|
35
+ "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
36
36
  opts[:debug] = debug
37
37
  end
38
38
 
@@ -53,18 +53,15 @@ p args
53
53
 
54
54
 
55
55
 
56
-
57
-
58
-
59
56
  paths = if args.empty?
60
57
  [
61
- '../../../openfootball/euro/2020--europe/euro.txt',
58
+ '../../../openfootball/euro/2021--europe/euro.txt',
62
59
  '../../../openfootball/euro/2024--germany/euro.txt',
63
60
  ]
64
61
  else
65
62
  ## check for directories
66
63
  ## and auto-expand
67
-
64
+
68
65
  SportDb::Parser::Opts.expand_args( args )
69
66
  end
70
67
 
@@ -86,8 +83,10 @@ end
86
83
  if errors.size > 0
87
84
  puts
88
85
  pp errors
86
+ puts
89
87
  puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
90
88
  else
89
+ puts
91
90
  puts "OK no parse errors found in #{paths.size} datafile(s)"
92
91
  end
93
92
 
@@ -15,7 +15,7 @@ class Parser
15
15
 
16
16
  GROUP_RE = %r{^
17
17
  Group [ ]
18
- (?<key>[a-z0-9]+)
18
+ (?<key>[a-z0-9]+)
19
19
  $}ix
20
20
  def is_group?( text )
21
21
  ## use regex for match
@@ -27,43 +27,81 @@ end
27
27
 
28
28
  ROUND_RE = %r{^(
29
29
 
30
+ ## add special case for group play-off rounds!
31
+ ## group 2 play-off (e.g. worldcup 1954, 1958)
32
+ (?: Group [ ] [a-z0-9]+ [ ]
33
+ Play-?offs?
34
+ )
35
+ |
30
36
  # round - note - requiers number e.g. round 1,2, etc.
37
+ # note - use 1-9 regex (cannot start with 0) - why? why not?
38
+ # make week 01 or round 01 or matchday 01 possible?
31
39
  (?: (?: Round |
32
40
  Matchday |
33
41
  Week
34
42
  )
35
- [ ] [0-9]+
43
+ [ ] [1-9][0-9]*
44
+ )
45
+ |
46
+ ## starting with qual(ification)
47
+ ## Qual. Round 1 / Qual. Round 2 / Qual. Round 3
48
+ (?: Qual \. [ ]
49
+ Round
50
+ [ ] [1-9][0-9]*
36
51
  )
37
52
  |
53
+ ## 1. Round / 2. Round / 3. Round / etc.
54
+ ## Play-off Round
55
+ ## First Round
56
+ ## Final Round (e.g. Worldcup 1950)
57
+ (?:
58
+ (?: [1-9][0-9]* \. |
59
+ Play-?off |
60
+ 1st | First |
61
+ 2nd | Second |
62
+ Final
63
+ )
64
+ [ ] Round
65
+ )
66
+ |
67
+ ## starting with preliminary
68
+ # e.g. Preliminary round
69
+ (?: Preliminary [ ]
70
+ (?: Round |
71
+ Semi-?finals |
72
+ Final
73
+ )
74
+ )
75
+ |
38
76
  # more (kockout) rounds
39
77
  # playoffs - playoff, play-off, play-offs
40
- (?: Play-?offs?
78
+ (?: Play-?offs?
41
79
  (?: [ ]for[ ]quarter-?finals )?
42
80
  )
43
- |
81
+ |
44
82
  # round32
45
- (?: Round[ ]of[ ]32 |
83
+ (?: Round[ ]of[ ]32 |
46
84
  Last[ ]32 )
47
85
  |
48
- # round16
86
+ # round16
49
87
  (?: Round[ ]of[ ]16 |
50
- Last[ ]16 |
88
+ Last[ ]16 |
51
89
  8th[ ]finals )
52
90
  |
53
91
  # fifthplace
54
92
  (?:
55
- (?: (Fifth|5th)[ -]place
93
+ (?: (Fifth|5th)[ -]place
56
94
  (?: [ ] (?: match|play-?off|final ))?
57
95
  ) |
58
96
  (?: Match[ ]for[ ](?: fifth|5th )[ -]place )
59
97
  )
60
98
  |
61
99
  # thirdplace
62
- (?:
63
- (?: (Third|3rd)[ -]place
64
- (?: [ ] (?: match|play-?off|final ))?
100
+ (?:
101
+ (?: (Third|3rd)[ -]place
102
+ (?: [ ] (?: match|play-?off|final ))?
65
103
  ) |
66
- (?: Match[ ]for[ ](?: third|3rd )[ -]place )
104
+ (?: Match[ ]for[ ](?: third|3rd )[ -]place )
67
105
  )
68
106
  |
69
107
  # quarterfinals
@@ -72,18 +110,29 @@ ROUND_RE = %r{^(
72
110
  Quarters |
73
111
  Last[ ]8
74
112
  )
75
- |
113
+ |
76
114
  # semifinals
77
- (?:
115
+ (?:
78
116
  Semi-?finals? |
79
117
  Semis |
80
118
  Last[ ]4
81
119
  )
82
120
  |
83
121
  # final
84
- Finals?
85
-
86
- )$}ix
122
+ Finals?
123
+ |
124
+ ## add replays
125
+ ## e.g. Final Replay
126
+ ## Quarter-finals replays
127
+ ## First round replays
128
+ (?:
129
+ (?: First [ ] Round |
130
+ Quarter-?finals? |
131
+ Finals?
132
+ )
133
+ [ ] Replays?
134
+ )
135
+ )$}ix
87
136
 
88
137
 
89
138
  def is_round?( text )
@@ -95,9 +144,9 @@ end
95
144
  ##
96
145
  LEG_RE = %r{^
97
146
  # leg1
98
- (?: 1st|First)[ ]leg
147
+ (?: 1st|First)[ ]leg
99
148
  |
100
- # leg2
149
+ # leg2
101
150
  (?: 2nd|Second)[ ]leg
102
151
  $}ix
103
152
 
@@ -1,8 +1,4 @@
1
1
 
2
- ###
3
- ## todo/fix - move to sportdb-parser - why? why not? !!!!!!
4
- ##
5
-
6
2
 
7
3
  module SportDb
8
4
 
@@ -10,7 +6,7 @@ class OutlineReader
10
6
 
11
7
  def self.debug=(value) @@debug = value; end
12
8
  def self.debug?() @@debug ||= false; end
13
- def debug?() self.class.debug?; end
9
+ def debug?() self.class.debug?; end
14
10
 
15
11
 
16
12
 
@@ -1,24 +1,24 @@
1
- module SportDb
1
+ module SportDb
2
2
  class Parser
3
-
3
+
4
4
 
5
5
  ## transforms
6
6
  ##
7
7
  ## Netherlands 1-2 (1-1) England
8
- ## => text => team
9
- ## score|vs
8
+ ## => text => team
9
+ ## score|vs
10
10
  ## text => team
11
11
 
12
12
 
13
13
  ## token iter/find better name
14
14
  ## e.g. TokenBuffer/Scanner or such ??
15
- class Tokens
15
+ class Tokens
16
16
  def initialize( tokens )
17
17
  @tokens = tokens
18
18
  @pos = 0
19
19
  end
20
20
 
21
- def pos() @pos; end
21
+ def pos() @pos; end
22
22
  def eos?() @pos >= @tokens.size; end
23
23
 
24
24
 
@@ -47,17 +47,17 @@ class Tokens
47
47
  ## return token type (e.g. :text, :num, etc.)
48
48
  def cur() peek(0); end
49
49
  ## return content (assumed to be text)
50
- def text(offset=0)
50
+ def text(offset=0)
51
51
  ## raise error - why? why not?
52
52
  ## return nil?
53
53
  if peek( offset ) != :text
54
54
  raise ArgumentError, "text(#{offset}) - token not a text type"
55
55
  end
56
- @tokens[@pos+offset][1]
56
+ @tokens[@pos+offset][1]
57
57
  end
58
58
 
59
59
 
60
- def peek(offset=1)
60
+ def peek(offset=1)
61
61
  ## return nil if eos
62
62
  if @pos+offset >= @tokens.size
63
63
  nil
@@ -66,7 +66,7 @@ class Tokens
66
66
  end
67
67
  end
68
68
 
69
- ## note - returns complete token
69
+ ## note - returns complete token
70
70
  def next
71
71
  # if @pos >= @tokens.size
72
72
  # raise ArgumentError, "end of array - #{@pos} >= #{@tokens.size}"
@@ -81,7 +81,7 @@ class Tokens
81
81
  def collect( &blk )
82
82
  tokens = []
83
83
  loop do
84
- break if eos?
84
+ break if eos?
85
85
  tokens << if block_given?
86
86
  blk.call( self.next )
87
87
  else
@@ -106,7 +106,7 @@ def parse_with_errors( line, debug: false )
106
106
  errors += token_errors
107
107
 
108
108
  #############
109
- ## pass 1
109
+ ## pass 1
110
110
  ## replace all texts with keyword matches (e.g. group, round, leg, etc.)
111
111
  tokens = tokens.map do |t|
112
112
  if t[0] == :text
@@ -129,24 +129,40 @@ def parse_with_errors( line, debug: false )
129
129
  ## puts "tokens:"
130
130
  ## pp tokens
131
131
 
132
- ## transform tokens into (parse tree/ast) nodes
132
+ ## transform tokens into (parse tree/ast) nodes
133
133
  nodes = []
134
-
134
+
135
135
  buf = Tokens.new( tokens )
136
136
  ## pp buf
137
137
 
138
138
 
139
- loop do
140
- if buf.pos == 0
141
- ## check for
142
- ## group def or round def
143
- if buf.match?( :round, :'|' ) ## assume round def (change round to round_def)
139
+ loop do
140
+ break if buf.eos?
141
+
142
+ ## simplify - remove separator for round + leg pair
143
+ ## e.g. Round of 16, 1st Leg
144
+ ## allow Round of 16 - 1st Leg too - why? why not?
145
+ if buf.match?( :round, [:',', :'|',
146
+ :'-',
147
+ :vs, ### fix - change parser to issue :'-' only for (-) not :vs!!!
148
+ ], :leg )
149
+ nodes << [:round, buf.next[1]]
150
+ buf.next ## swallow separator
151
+ nodes << [:leg, buf.next[1]]
152
+ next
153
+ end
154
+
155
+
156
+ if buf.pos == 0 ## MUST start line
157
+ ## check for
158
+ ## group def or round def
159
+ if buf.match?( :round, :'|', [:date, :duration] ) ## assume round def (change round to round_def)
144
160
  nodes << [:round_def, buf.next[1]]
145
161
  buf.next ## swallow pipe
146
162
  nodes += buf.collect
147
163
  break
148
164
  end
149
- if buf.match?( :group, :'|' ) ## assume group def (change group to group_def)
165
+ if buf.match?( :group, :'|', :text ) ## assume group def (change group to group_def)
150
166
  nodes << [:group_def, buf.next[1]]
151
167
  buf.next ## swallow pipe
152
168
  ## change all text to team
@@ -154,11 +170,15 @@ def parse_with_errors( line, debug: false )
154
170
  t[0] == :text ? [:team, t[1]] : t
155
171
  }
156
172
  break
157
- end
173
+ end
158
174
  end
159
175
 
160
176
 
161
- if buf.match?( :text, [:score, :vs], :text )
177
+ if buf.match?( :text, :'-', :text ) ## hacky? convert "generic" :- to :vs
178
+ nodes << [:team, buf.next[1]] ## keep this rule/option - why? why not?
179
+ nodes << [:vs]
180
+ nodes << [:team, buf.next[1]]
181
+ elsif buf.match?( :text, [:score, :vs], :text )
162
182
  nodes << [:team, buf.next[1]]
163
183
  nodes << buf.next
164
184
  nodes << [:team, buf.next[1]]
@@ -170,14 +190,12 @@ def parse_with_errors( line, debug: false )
170
190
  ## only change text to geo
171
191
  nodes += buf.collect { |t|
172
192
  t[0] == :text ? [:geo, t[1]] : t
173
- }
193
+ }
174
194
  break
175
195
  else
176
196
  ## pass through
177
197
  nodes << buf.next
178
198
  end
179
-
180
- break if buf.eos?
181
199
  end
182
200
 
183
201
  [nodes,errors]
@@ -192,5 +210,5 @@ end
192
210
 
193
211
 
194
212
  end # class Parser
195
- end # module SportDb
196
-
213
+ end # module SportDb
214
+
@@ -1,6 +1,6 @@
1
- module SportDb
1
+ module SportDb
2
2
  class Parser
3
-
3
+
4
4
 
5
5
 
6
6
  def self.parse_names( txt )
@@ -47,8 +47,8 @@ def self.build_map( lines, downcase: false )
47
47
  ## "may" => 5,
48
48
  ## "june" => 6, "jun" => 6, ...
49
49
  lines.each_with_index.reduce( {} ) do |h,(line,i)|
50
- line.each do |name|
51
- h[ downcase ? name.downcase : name ] = i+1
50
+ line.each do |name|
51
+ h[ downcase ? name.downcase : name ] = i+1
52
52
  end ## note: start mapping with 1 (and NOT zero-based, that is, 0)
53
53
  h
54
54
  end
@@ -109,28 +109,56 @@ DAY_MAP = build_map( DAY_LINES, downcase: true )
109
109
  ## todo - add more date variants !!!!
110
110
 
111
111
  # e.g. Fri Aug/9 or Fri Aug 9
112
- DATE_RE = %r{
112
+ DATE_I_RE = %r{
113
113
  (?<date>
114
114
  \b
115
115
  ## optional day name
116
116
  ((?<day_name>#{DAY_NAMES})
117
117
  [ ]
118
- )?
118
+ )?
119
119
  (?<month_name>#{MONTH_NAMES})
120
120
  (?: \/|[ ] )
121
121
  (?<day>\d{1,2})
122
122
  ## optional year
123
123
  ( [ ]
124
124
  (?<year>\d{4})
125
- )?
126
- \b
125
+ )?
126
+ \b
127
+ )}ix
128
+
129
+
130
+ # e.g. 3 June or 10 June
131
+ DATE_II_RE = %r{
132
+ (?<date>
133
+ \b
134
+ ## optional day name
135
+ ((?<day_name>#{DAY_NAMES})
136
+ [ ]
137
+ )?
138
+ (?<day>\d{1,2})
139
+ [ ]
140
+ (?<month_name>#{MONTH_NAMES})
141
+ ## optional year
142
+ ( [ ]
143
+ (?<year>\d{4})
144
+ )?
145
+ \b
127
146
  )}ix
128
147
 
129
148
 
149
+ #############################################
150
+ # map tables
151
+ # note: order matters; first come-first matched/served
152
+ DATE_RE = Regexp.union(
153
+ DATE_I_RE,
154
+ DATE_II_RE
155
+ )
156
+
157
+
130
158
  ###
131
- # date duration
159
+ # date duration
132
160
  # use - or + as separator
133
- # in theory plus( +) only if dates
161
+ # in theory plus( +) only if dates
134
162
  # are two days next to each other
135
163
  #
136
164
  # otherwise define new dates type in the future? why? why not?
@@ -147,7 +175,7 @@ DATE_RE = %r{
147
175
  # Jun/25 .. 26 - why? why not???
148
176
  # Jun/25 to 26 - why? why not???
149
177
  # Jun/25 + 26 - add - why? why not???
150
- # Sun-Wed Jun/23-26 - add - why? why not???
178
+ # Sun-Wed Jun/23-26 - add - why? why not???
151
179
  # Wed+Thu Jun/26+27 2024 - add - why? why not???
152
180
  #
153
181
  # maybe use comman and plus for list of dates
@@ -157,39 +185,89 @@ DATE_RE = %r{
157
185
  # add back optional comma (before) year - why? why not?
158
186
 
159
187
 
160
- DURATION_RE = %r{
188
+ ##
189
+ # todo add plus later on - why? why not?
190
+
191
+ DURATION_I_RE = %r{
161
192
  (?<duration>
162
193
  \b
163
194
  ## optional day name
164
195
  ((?<day_name1>#{DAY_NAMES})
165
196
  [ ]
166
- )?
197
+ )?
167
198
  (?<month_name1>#{MONTH_NAMES})
168
199
  (?: \/|[ ] )
169
200
  (?<day1>\d{1,2})
170
201
  ## optional year
171
202
  ( [ ]
172
203
  (?<year1>\d{4})
173
- )?
204
+ )?
174
205
 
175
206
  ## support + and - (add .. or such - why??)
176
- [ ]*[+-][ ]*
177
-
207
+ [ ]*[-][ ]*
208
+
178
209
  ## optional day name
179
210
  ((?<day_name2>#{DAY_NAMES})
180
211
  [ ]
181
- )?
212
+ )?
182
213
  (?<month_name2>#{MONTH_NAMES})
183
214
  (?: \/|[ ] )
184
215
  (?<day2>\d{1,2})
185
216
  ## optional year
186
217
  ( [ ]
187
218
  (?<year2>\d{4})
188
- )?
189
- \b
219
+ )?
220
+ \b
221
+ )}ix
222
+
223
+
224
+ ###
225
+ # variant ii
226
+ # e.g. 26 July - 27 July
227
+
228
+ DURATION_II_RE = %r{
229
+ (?<duration>
230
+ \b
231
+ ## optional day name
232
+ ((?<day_name1>#{DAY_NAMES})
233
+ [ ]
234
+ )?
235
+ (?<day1>\d{1,2})
236
+ [ ]
237
+ (?<month_name1>#{MONTH_NAMES})
238
+ ## optional year
239
+ ( [ ]
240
+ (?<year1>\d{4})
241
+ )?
242
+
243
+ ## support + and - (add .. or such - why??)
244
+ [ ]*[-][ ]*
245
+
246
+ ## optional day name
247
+ ((?<day_name2>#{DAY_NAMES})
248
+ [ ]
249
+ )?
250
+ (?<day2>\d{1,2})
251
+ [ ]
252
+ (?<month_name2>#{MONTH_NAMES})
253
+ ## optional year
254
+ ( [ ]
255
+ (?<year2>\d{4})
256
+ )?
257
+ \b
190
258
  )}ix
191
259
 
192
260
 
261
+ #############################################
262
+ # map tables
263
+ # note: order matters; first come-first matched/served
264
+ DURATION_RE = Regexp.union(
265
+ DURATION_I_RE,
266
+ DURATION_II_RE
267
+ )
268
+
269
+
270
+
193
271
  end # class Parser
194
- end # module SportDb
195
-
272
+ end # module SportDb
273
+
@@ -1,6 +1,6 @@
1
1
 
2
2
 
3
- module SportDb
3
+ module SportDb
4
4
  class Parser
5
5
 
6
6
 
@@ -15,7 +15,7 @@ TIME_RE = %r{
15
15
  (?: :|\.|h )
16
16
  (?<minute>\d{2})
17
17
  \b
18
- )
18
+ )
19
19
  }ix
20
20
 
21
21
 
@@ -28,7 +28,7 @@ TIME_RE = %r{
28
28
  # (CEST/UTC+2) - central european summer time - daylight saving time (DST).
29
29
  # (EET/UTC+1) - eastern european time
30
30
  # (EEST/UTC+2) - eastern european summer time - daylight saving time (DST).
31
- #
31
+ #
32
32
  # UTC+3
33
33
  # UTC+4
34
34
  # UTC+0
@@ -45,7 +45,7 @@ TIME_RE = %r{
45
45
 
46
46
  TIMEZONE_RE = %r{
47
47
  ## e.g. (UTC-2) or (CEST/UTC-2) etc.
48
- (?<timezone>
48
+ (?<timezone>
49
49
  \(
50
50
  ## optional "local" timezone name eg. BRT or CEST etc.
51
51
  (?: [a-z]+
@@ -63,28 +63,28 @@ TIMEZONE_RE = %r{
63
63
 
64
64
  BASICS_RE = %r{
65
65
  ## e.g. (51) or (1) etc. - limit digits of number???
66
- (?<num> \( (?<value>\d+) \) )
66
+ (?<num> \( (?<value>\d+) \) )
67
67
  |
68
- (?<vs>
69
- (?<=[ ]) # Positive lookbehind for space
70
- (?:
68
+ (?<vs>
69
+ (?<=[ ]) # Positive lookbehind for space
70
+ (?:
71
71
  vs\.?| ## allow optional dot (eg. vs. v.)
72
72
  v\.?|
73
73
  -
74
74
  ) # not bigger match first e.g. vs than v etc.
75
75
  (?=[ ]) # positive lookahead for space
76
- )
77
- |
76
+ )
77
+ |
78
78
  (?<none>
79
- (?<=[ \[]|^) # Positive lookbehind for space or [
79
+ (?<=[ \[]|^) # Positive lookbehind for space or [
80
80
  -
81
81
  (?=[ ]*;) # positive lookahead for space
82
82
  )
83
83
  |
84
84
  (?<spaces> [ ]{2,}) |
85
- (?<space> [ ])
85
+ (?<space> [ ])
86
86
  |
87
- (?<sym>[;,@|\[\]])
87
+ (?<sym>[;,@|\[\]])
88
88
  }ix
89
89
 
90
90
 
@@ -94,13 +94,13 @@ MINUTE_RE = %r{
94
94
  (?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
95
95
  (?: \+
96
96
  (?<value2>\d{1,3})
97
- )?
97
+ )?
98
98
  ' ## must have minute marker!!!!
99
99
  )
100
100
  }ix
101
101
 
102
102
 
103
- ## (match) status
103
+ ## (match) status
104
104
  ## note: english usage - cancelled (in UK), canceled (in US)
105
105
  ##
106
106
  ## add more variants - why? why not?
@@ -115,30 +115,30 @@ STATUS_RE = %r{
115
115
  |
116
116
  postponed
117
117
  |
118
- awarded|awd\.
118
+ awarded|awd\.
119
119
  |
120
- replay
120
+ replay
121
121
  )
122
122
  (?=[ \]]|$)
123
123
  )}ix
124
124
 
125
125
  ## todo/check: remove loakahead assertion here - why require space?
126
- ## note: \b works only after non-alphanum
127
- ## to make it work with awd. (dot) "custom" lookahead neeeded
126
+ ## note: \b works only after non-alphanum
127
+ ## to make it work with awd. (dot) "custom" lookahead neeeded
128
128
 
129
129
 
130
130
  ## goal types
131
- # (pen.) or (pen) or (p.) or (p)
131
+ # (pen.) or (pen) or (p.) or (p)
132
132
  ## (o.g.) or (og)
133
133
  GOAL_PEN_RE = %r{
134
- (?<pen> \(
135
- (?:pen|p)\.?
134
+ (?<pen> \(
135
+ (?:pen|p)\.?
136
136
  \)
137
137
  )
138
138
  }ix
139
139
  GOAL_OG_RE = %r{
140
- (?<og> \(
141
- (?:og|o\.g\.)
140
+ (?<og> \(
141
+ (?:og|o\.g\.)
142
142
  \)
143
143
  )
144
144
  }ix
@@ -158,11 +158,11 @@ RE = Regexp.union( STATUS_RE,
158
158
 
159
159
 
160
160
  def log( msg )
161
- ## append msg to ./logs.txt
161
+ ## append msg to ./logs.txt
162
162
  ## use ./errors.txt - why? why not?
163
163
  File.open( './logs.txt', 'a:utf-8' ) do |f|
164
164
  f.write( msg )
165
- f.write( "\n" )
165
+ f.write( "\n" )
166
166
  end
167
167
  end
168
168
 
@@ -176,7 +176,7 @@ def tokenize_with_errors( line, typed: false,
176
176
  puts ">#{line}<" if debug
177
177
 
178
178
  pos = 0
179
- ## track last offsets - to report error on no match
179
+ ## track last offsets - to report error on no match
180
180
  ## or no match in end of string
181
181
  offsets = [0,0]
182
182
  m = nil
@@ -184,7 +184,7 @@ def tokenize_with_errors( line, typed: false,
184
184
  while m = RE.match( line, pos )
185
185
  if debug
186
186
  pp m
187
- puts "pos: #{pos}"
187
+ puts "pos: #{pos}"
188
188
  end
189
189
  offsets = [m.begin(0), m.end(0)]
190
190
 
@@ -213,10 +213,10 @@ def tokenize_with_errors( line, typed: false,
213
213
  elsif m[:spaces]
214
214
  ## skip spaces
215
215
  nil
216
- elsif m[:text]
216
+ elsif m[:text]
217
217
  [:text, m[:text]] ## keep pos - why? why not?
218
218
  elsif m[:status] ## (match) status e.g. cancelled, awarded, etc.
219
- [:status, m[:status]]
219
+ [:status, m[:status]]
220
220
  elsif m[:time]
221
221
  if typed
222
222
  ## unify to iso-format
@@ -230,7 +230,7 @@ def tokenize_with_errors( line, typed: false,
230
230
  if (hour >= 0 && hour <= 24) &&
231
231
  (minute >=0 && minute <= 59)
232
232
  ## note - for debugging keep (pass along) "literal" time
233
- ## might use/add support for am/pm later
233
+ ## might use/add support for am/pm later
234
234
  [:time, m[:time], {h:hour,m:minute}]
235
235
  else
236
236
  raise ArgumentError, "parse error - time >#{m[:time]}< out-of-range"
@@ -241,54 +241,68 @@ def tokenize_with_errors( line, typed: false,
241
241
  elsif m[:date]
242
242
  if typed
243
243
  date = {}
244
- =begin
244
+ =begin
245
245
  ((?<day_name>#{DAY_NAMES})
246
246
  [ ]
247
- )?
247
+ )?
248
248
  (?<month_name>#{MONTH_NAMES})
249
249
  (?: \/|[ ] )
250
250
  (?<day>\d{1,2})
251
251
  ## optional year
252
252
  ( [ ]
253
253
  (?<year>\d{4})
254
- )?
254
+ )?
255
255
  =end
256
256
  ## map month names
257
257
  ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
258
- date[:y] = m[:year].to_i(10) if m[:year]
258
+ date[:y] = m[:year].to_i(10) if m[:year]
259
259
  date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
260
260
  date[:d] = m[:day].to_i(10) if m[:day]
261
261
  date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
262
- ## note - for debugging keep (pass along) "literal" date
263
- [:date, m[:date], date]
262
+ ## note - for debugging keep (pass along) "literal" date
263
+ [:date, m[:date], date]
264
264
  else
265
265
  [:date, m[:date]]
266
266
  end
267
267
  elsif m[:timezone]
268
268
  [:timezone, m[:timezone]]
269
269
  elsif m[:duration]
270
- [:duration, m[:duration]]
270
+ if typed
271
+ duration = { start: {}, end: {}}
272
+ duration[:start][:y] = m[:year1].to_i(10) if m[:year1]
273
+ duration[:start][:m] = MONTH_MAP[ m[:month_name1].downcase ] if m[:month_name1]
274
+ duration[:start][:d] = m[:day1].to_i(10) if m[:day1]
275
+ duration[:start][:wday] = DAY_MAP[ m[:day_name1].downcase ] if m[:day_name1]
276
+ duration[:end][:y] = m[:year2].to_i(10) if m[:year2]
277
+ duration[:end][:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
278
+ duration[:end][:d] = m[:day2].to_i(10) if m[:day2]
279
+ duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ] if m[:day_name2]
280
+ ## note - for debugging keep (pass along) "literal" duration
281
+ [:duration, m[:duration], duration]
282
+ else
283
+ [:duration, m[:duration]]
284
+ end
271
285
  elsif m[:num]
272
286
  if typed
273
287
  ## note - strip enclosing () and convert to integer
274
288
  [:num, m[:value].to_i(10)]
275
- else
289
+ else
276
290
  [:num, m[:num]]
277
291
  end
278
292
  elsif m[:score]
279
293
  if typed
280
294
  score = {}
281
295
  ## check for pen
282
- score[:p] = [m[:p1].to_i(10),
296
+ score[:p] = [m[:p1].to_i(10),
283
297
  m[:p2].to_i(10)] if m[:p1] && m[:p2]
284
- score[:et] = [m[:et1].to_i(10),
298
+ score[:et] = [m[:et1].to_i(10),
285
299
  m[:et2].to_i(10)] if m[:et1] && m[:et2]
286
- score[:ft] = [m[:ft1].to_i(10),
300
+ score[:ft] = [m[:ft1].to_i(10),
287
301
  m[:ft2].to_i(10)] if m[:ft1] && m[:ft2]
288
- score[:ht] = [m[:ht1].to_i(10),
302
+ score[:ht] = [m[:ht1].to_i(10),
289
303
  m[:ht2].to_i(10)] if m[:ht1] && m[:ht2]
290
304
 
291
- ## note - for debugging keep (pass along) "literal" score
305
+ ## note - for debugging keep (pass along) "literal" score
292
306
  [:score, m[:score], score]
293
307
  else
294
308
  [:score, m[:score]]
@@ -298,7 +312,7 @@ def tokenize_with_errors( line, typed: false,
298
312
  minute = {}
299
313
  minute[:m] = m[:value].to_i(10)
300
314
  minute[:offset] = m[:value2].to_i(10) if m[:value2]
301
- ## note - for debugging keep (pass along) "literal" minute
315
+ ## note - for debugging keep (pass along) "literal" minute
302
316
  [:minute, m[:minute], minute]
303
317
  else
304
318
  [:minute, m[:minute]]
@@ -318,16 +332,16 @@ def tokenize_with_errors( line, typed: false,
318
332
  when ',' then [:',']
319
333
  when ';' then [:';']
320
334
  when '@' then [:'@']
321
- when '|' then [:'|']
335
+ when '|' then [:'|']
322
336
  else
323
337
  nil ## ignore others (e.g. brackets [])
324
338
  end
325
339
  else
326
- ## report error
340
+ ## report error
327
341
  nil
328
342
  end
329
343
 
330
- tokens << t if t
344
+ tokens << t if t
331
345
 
332
346
  if debug
333
347
  print ">"
@@ -346,7 +360,7 @@ def tokenize_with_errors( line, typed: false,
346
360
  end
347
361
 
348
362
 
349
- [tokens,errors]
363
+ [tokens,errors]
350
364
  end
351
365
 
352
366
 
@@ -360,5 +374,4 @@ end
360
374
 
361
375
 
362
376
  end # class Parser
363
- end # module SportDb
364
-
377
+ end # module SportDb
@@ -3,8 +3,8 @@ module SportDb
3
3
  module Module
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 1
7
- PATCH = 0
6
+ MINOR = 2
7
+ PATCH = 1
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -1,5 +1,5 @@
1
- ## pulls in
2
- require 'cocos'
1
+ ## pulls in
2
+ require 'cocos'
3
3
  require 'season/formats' # e.g. Season() support machinery
4
4
 
5
5
 
@@ -36,7 +36,7 @@ require_relative 'parser/opts'
36
36
  =begin
37
37
  module SportDb
38
38
  def self.parser() @@parser ||= Parser.new; end
39
- def self.parse( ... )
39
+ def self.parse( ... )
40
40
  end
41
41
  def self.tokenize( ... )
42
42
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-22 00:00:00.000000000 Z
11
+ date: 2024-08-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: 0.4.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: 0.4.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: season-formats
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -112,7 +112,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
112
112
  requirements:
113
113
  - - ">="
114
114
  - !ruby/object:Gem::Version
115
- version: 2.2.2
115
+ version: 3.1.0
116
116
  required_rubygems_version: !ruby/object:Gem::Requirement
117
117
  requirements:
118
118
  - - ">="