sportdb-parser 0.5.9 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 43f5fed1d5709a2bd2642046f1e3d367212c5ebcc71084f1a17b00738e5036de
4
- data.tar.gz: 3082d8cf6879adb3735e2dc28a9213dbc5f0781438d0e4248244ac10d02085b5
3
+ metadata.gz: b299ddece5e64b86bb7ee6b55578099b0624b11d8e5f10721363f45d6ef5d8d8
4
+ data.tar.gz: 5712c99b200e6116c9f07fba1215a4bf2560e5bd848c3c8cc48959aa17997b85
5
5
  SHA512:
6
- metadata.gz: 50176108fb5d9c81ce7234a7abbdbc4e29aaf25af37005d442d0b351ea699c93dee02293d975a340d2e9f03a13c76836aa30d0278e6a6a82ba28fbbba313f50c
7
- data.tar.gz: 90b3f15722d7540f70ebb10718a643473e19de09a15e014f70e65b815ff4ed014c4c953266918dbca2baf34da94bd5681e403d27cc2b97f643781bf6f714e23f
6
+ metadata.gz: 5d2fce54482e12542c35abd46a292d7f5e6b4db894bba3a7f911269f116d9fa530653d36ace4295e2f819bb974093b5567a5494a2d50b54ee3f250b314d40a73
7
+ data.tar.gz: 7b6ef8aaafa2d20c0356fcdc048211f24a04cc4f95819ad8d225b2c9a4a29e44d8f415190acfbe3e31b2f9cc457a12f8e75c460394e984d5b9b1f476f0f8e30f
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.5.9
1
+ ### 0.6.1
2
2
  ### 0.0.1 / 2024-07-12
3
3
 
4
4
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -14,6 +14,8 @@ lib/sportdb/parser/parser.rb
14
14
  lib/sportdb/parser/racc_parser.rb
15
15
  lib/sportdb/parser/racc_tree.rb
16
16
  lib/sportdb/parser/token-date.rb
17
+ lib/sportdb/parser/token-minute.rb
18
+ lib/sportdb/parser/token-prop.rb
17
19
  lib/sportdb/parser/token-score.rb
18
20
  lib/sportdb/parser/token-status.rb
19
21
  lib/sportdb/parser/token-text.rb
@@ -147,12 +147,15 @@ def initialize( lines, debug: false )
147
147
  ## strip lines with comments and empty lines striped / removed
148
148
  ## keep empty lines? why? why not?
149
149
  ## keep leading spaces (indent) - why?
150
+ ##
151
+ ## note - KEEP empty lines (get turned into BLANK token!!!!)
152
+
150
153
  @txt = String.new
151
154
  txt_pre.each_line do |line| ## preprocess
152
155
  line = line.strip
153
- next if line.empty? || line.start_with?('#') ### skip empty lines and comments
156
+ next if line.start_with?('#') ### skip comments
154
157
 
155
- line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
158
+ line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
156
159
 
157
160
  @txt << line
158
161
  @txt << "\n"
@@ -193,6 +196,18 @@ def tokenize_with_errors
193
196
  t
194
197
  end
195
198
 
199
+ ### check for "section" starters e.g. Teams or such
200
+ t = tokens[0]
201
+ if t[0] == :TEXT
202
+ text = t[1]
203
+ if text =~ /^teams$/i
204
+ t[0] = :TEAMS
205
+ elsif text =~ /^blank$/i ### todo/fix -- remove!!! add real blanks!!
206
+ t[0] = :BLANK
207
+ else
208
+ end
209
+ end
210
+
196
211
  #################
197
212
  ## pass 2
198
213
  ## transform tokens (using simple patterns)
@@ -227,13 +242,22 @@ def tokenize_with_errors
227
242
  end
228
243
 
229
244
 
230
- if buf.match?( :TEXT, [:SCORE, :VS, :'-'], :TEXT )
245
+ if buf.match?( :TEXT, [:SCORE, :SCORE_MORE, :VS, :'-'], :TEXT )
231
246
  nodes << [:TEAM, buf.next[1]]
232
247
  nodes << buf.next
233
248
  nodes << [:TEAM, buf.next[1]]
234
249
  elsif buf.match?( :TEXT, :MINUTE )
235
250
  nodes << [:PLAYER, buf.next[1]]
236
251
  nodes << buf.next
252
+ elsif buf.match?( :DATE, :TIME ) ## merge DATE TIME into DATETIME
253
+ date = buf.next[1]
254
+ time = buf.next[1]
255
+ ## puts "DATETIME:"
256
+ ## pp date, time
257
+ val = [date[0] + ' ' + time[0], ## concat string of two tokens
258
+ { date: date[1], time: time[1] }
259
+ ]
260
+ nodes << [:DATETIME, val]
237
261
  else
238
262
  ## pass through
239
263
  nodes << buf.next
@@ -247,8 +271,14 @@ def tokenize_with_errors
247
271
  ## flatten tokens
248
272
  tokens = []
249
273
  tokens_by_line.each do |tok|
274
+
275
+ if debug?
276
+ pp tok
277
+ end
278
+
250
279
  tokens += tok
251
- tokens << [:NEWLINE, "\n"] ## auto-add newlines
280
+ ## auto-add newlines (unless BLANK!!)
281
+ tokens << [:NEWLINE, "\n"] unless tok[0][0] == :BLANK
252
282
  end
253
283
 
254
284
  [tokens,errors]
@@ -260,7 +290,18 @@ def _tokenize_line( line )
260
290
  tokens = []
261
291
  errors = [] ## keep a list of errors - why? why not?
262
292
 
263
- puts ">#{line}<" if debug?
293
+ puts "line: >#{line}<" if debug?
294
+
295
+
296
+ ### special case for empty line (aka BLANK)
297
+ if line.empty?
298
+ ## note - blank always resets parser mode to std/top-level!!!
299
+ @re = RE
300
+
301
+ tokens << [:BLANK, '<|BLANK|>']
302
+ return [tokens, errors]
303
+ end
304
+
264
305
 
265
306
  pos = 0
266
307
  ## track last offsets - to report error on no match
@@ -275,10 +316,10 @@ def _tokenize_line( line )
275
316
 
276
317
 
277
318
  while m = @re.match( line, pos )
278
- if debug?
279
- pp m
280
- puts "pos: #{pos}"
281
- end
319
+ # if debug?
320
+ # pp m
321
+ # puts "pos: #{pos}"
322
+ # end
282
323
  offsets = [m.begin(0), m.end(0)]
283
324
 
284
325
  if offsets[0] != pos
@@ -298,7 +339,7 @@ def _tokenize_line( line )
298
339
 
299
340
  pos = offsets[1]
300
341
 
301
- pp offsets if debug?
342
+ # pp offsets if debug?
302
343
 
303
344
  ##
304
345
  ## note: racc requires pairs e.g. [:TOKEN, VAL]
@@ -306,12 +347,8 @@ def _tokenize_line( line )
306
347
 
307
348
 
308
349
  t = if @re == PROP_RE
309
- if m[:space]
310
- ## skip space
311
- nil
312
- elsif m[:spaces]
313
- ## skip spaces
314
- nil
350
+ if m[:space] || m[:spaces]
351
+ nil ## skip space(s)
315
352
  elsif m[:prop_name]
316
353
  if m[:name] == 'Y'
317
354
  [:YELLOW_CARD, m[:name]]
@@ -339,11 +376,11 @@ def _tokenize_line( line )
339
376
  when '(' then [:'(']
340
377
  when ')' then [:')']
341
378
  when '-' then [:'-']
342
- when '.' then
343
- ## switch back to top-level mode!!
344
- puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
345
- @re = RE
346
- [:'.']
379
+ # when '.' then
380
+ # ## switch back to top-level mode!!
381
+ # puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
382
+ # @re = RE
383
+ # [:'.']
347
384
  else
348
385
  nil ## ignore others (e.g. brackets [])
349
386
  end
@@ -353,12 +390,8 @@ def _tokenize_line( line )
353
390
  nil
354
391
  end
355
392
  else ## assume TOP_LEVEL (a.k.a. RE) machinery
356
- if m[:space]
357
- ## skip space
358
- nil
359
- elsif m[:spaces]
360
- ## skip spaces
361
- nil
393
+ if m[:space] || m[:spaces]
394
+ nil ## skip space(s)
362
395
  elsif m[:prop_key]
363
396
  ## switch context to PROP_RE
364
397
  @re = PROP_RE
@@ -397,6 +430,7 @@ def _tokenize_line( line )
397
430
  ## map month names
398
431
  ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
399
432
  date[:y] = m[:year].to_i(10) if m[:year]
433
+ date[:m] = m[:month].to_i(10) if m[:month]
400
434
  date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
401
435
  date[:d] = m[:day].to_i(10) if m[:day]
402
436
  date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
@@ -417,10 +451,12 @@ def _tokenize_line( line )
417
451
  duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ] if m[:day_name2]
418
452
  ## note - for debugging keep (pass along) "literal" duration
419
453
  [:DURATION, [m[:duration], duration]]
454
+ elsif m[:wday] ## standalone weekday e.g. Mo/Tu/We/etc.
455
+ [:WDAY, [m[:wday], { wday: DAY_MAP[ m[:day_name].downcase ] } ]]
420
456
  elsif m[:num] ## fix - change to ord (for ordinal number!!!)
421
457
  ## note - strip enclosing () and convert to integer
422
458
  [:ORD, [m[:num], { value: m[:value].to_i(10) } ]]
423
- elsif m[:score]
459
+ elsif m[:score_more]
424
460
  score = {}
425
461
  ## check for pen
426
462
  score[:p] = [m[:p1].to_i(10),
@@ -433,8 +469,15 @@ def _tokenize_line( line )
433
469
  m[:ht2].to_i(10)] if m[:ht1] && m[:ht2]
434
470
 
435
471
  ## note - for debugging keep (pass along) "literal" score
436
- [:SCORE, [m[:score], score]]
437
- elsif m[:minute]
472
+ [:SCORE_MORE, [m[:score_more], score]]
473
+ elsif m[:score]
474
+ score = {}
475
+ ## must always have ft for now e.g. 1-1 or such
476
+ score[:ft] = [m[:ft1].to_i(10),
477
+ m[:ft2].to_i(10)]
478
+ ## note - for debugging keep (pass along) "literal" score
479
+ [:SCORE, [m[:score], score]]
480
+ elsif m[:minute]
438
481
  minute = {}
439
482
  minute[:m] = m[:value].to_i(10)
440
483
  minute[:offset] = m[:value2].to_i(10) if m[:value2]
@@ -454,11 +497,15 @@ def _tokenize_line( line )
454
497
  case sym
455
498
  when ',' then [:',']
456
499
  when ';' then [:';']
500
+ when '/' then [:'/']
457
501
  when '@' then [:'@']
458
502
  when '|' then [:'|']
459
503
  when '[' then [:'[']
460
504
  when ']' then [:']']
461
- when '-' then [:'-']
505
+ when '-' then [:'-'] # level 1 OR (classic) dash
506
+ when '--' then [:'--'] # level 2
507
+ when '---' then [:'---'] # level 3
508
+ when '----' then [:'----'] # level 4
462
509
  else
463
510
  nil ## ignore others (e.g. brackets [])
464
511
  end
@@ -472,11 +519,11 @@ def _tokenize_line( line )
472
519
 
473
520
  tokens << t if t
474
521
 
475
- if debug?
476
- print ">"
477
- print "*" * pos
478
- puts "#{line[pos..-1]}<"
479
- end
522
+ # if debug?
523
+ # print ">"
524
+ # print "*" * pos
525
+ # puts "#{line[pos..-1]}<"
526
+ # end
480
527
  end
481
528
 
482
529
  ## check if no match in end of string
@@ -489,6 +536,24 @@ def _tokenize_line( line )
489
536
  end
490
537
 
491
538
 
539
+ ##
540
+ ## if in prop mode continue if last token is [,-]
541
+ ## otherwise change back to "standard" mode
542
+ if @re == PROP_RE
543
+ if [:',', :'-'].include?( tokens[-1][0] )
544
+ ## continue/stay in PROP_RE mode
545
+ ## todo/check - auto-add PROP_CONT token or such
546
+ ## to help parser with possible NEWLINE
547
+ ## conflicts - why? why not?
548
+ else
549
+ ## switch back to top-level mode!!
550
+ puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug?
551
+ @re = RE
552
+ ## note - auto-add PROP_END (<PROP_END>)
553
+ tokens << [:PROP_END, "<|PROP_END|>"]
554
+ end
555
+ end
556
+
492
557
  [tokens,errors]
493
558
  end
494
559