sportdb-parser 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1466b82654b4a4f0f823a96709488dedb595d08731a55abc128691e0ffe2a80b
4
- data.tar.gz: 14995e94dc079ab61e77d056d15c9a5830dc573129661ca453b2892d087c2061
3
+ metadata.gz: c94dcd42fc13a7043f6b926ca1d947df3199877693b22e53e4f50b5aa522bf5d
4
+ data.tar.gz: 33eb689dcfb2bab0728c19b7d706da1556ddefafbfbcc6e424ac5bcbe3bccef6
5
5
  SHA512:
6
- metadata.gz: 75c2b4f455e8bb1b5e471c39f8fa3b5069bd0bb2a808ad8b246c0f2b060c5416f9f56a3619ad7db7ac5f21a6177c762aa28ae8e9c939b03a2569cf27d34f9b81
7
- data.tar.gz: 9c4f9095a61410499ae7628b1eb3295d8f456e62feae45a4c254d9157904326abf6571f3c4a04c078551b6364cd09252509f709bfeef46a569dbe202f4058460
6
+ metadata.gz: 97ef8d76ffa26312d66359f364588af3d7c76a3b0cebd3644b1f1ae775463defa9cb9552b267f26677c2c6f4e9b7b9fe62479dd34a7211fd1a4a3c1b5e9af830
7
+ data.tar.gz: ca9b56c6c02c132f3924fb40c293e90379812b830a2899e2be02c1d6469a278456c6d68db7f73d5f5fd69b372c958953e3fefd829ac1120cf56b0944176a2b87
data/CHANGELOG.md CHANGED
@@ -1,3 +1,5 @@
1
+ ### 0.2.0
2
+
1
3
  ### 0.0.1 / 2024-07-12
2
4
 
3
5
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -6,9 +6,11 @@ bin/fbt
6
6
  lib/sportdb/parser.rb
7
7
  lib/sportdb/parser/lang.rb
8
8
  lib/sportdb/parser/linter.rb
9
+ lib/sportdb/parser/opts.rb
9
10
  lib/sportdb/parser/outline_reader.rb
10
11
  lib/sportdb/parser/parser.rb
11
12
  lib/sportdb/parser/token-date.rb
12
13
  lib/sportdb/parser/token-score.rb
13
14
  lib/sportdb/parser/token-text.rb
14
15
  lib/sportdb/parser/token.rb
16
+ lib/sportdb/parser/version.rb
data/Rakefile CHANGED
@@ -1,9 +1,10 @@
1
1
  require 'hoe'
2
+ require './lib/sportdb/parser/version.rb'
2
3
 
3
4
 
4
5
  Hoe.spec 'sportdb-parser' do
5
6
 
6
- self.version = '0.0.1'
7
+ self.version = SportDb::Module::Parser::VERSION
7
8
 
8
9
  self.summary = "sportdb-parser - football.txt match parser (& tokenizer)"
9
10
  self.description = summary
@@ -19,7 +20,10 @@ Hoe.spec 'sportdb-parser' do
19
20
 
20
21
  self.licenses = ['Public Domain']
21
22
 
22
- self.extra_deps = []
23
+ self.extra_deps = [
24
+ ['cocos', '>= 0.4.0'],
25
+ ['season-formats'],
26
+ ]
23
27
 
24
28
  self.spec_extras = {
25
29
  required_ruby_version: '>= 2.2.2'
data/bin/fbt CHANGED
@@ -3,52 +3,20 @@
3
3
  ## tip: to test run:
4
4
  ## ruby -I ./lib bin/fbt
5
5
 
6
+ ## our own code
6
7
  require 'sportdb/parser'
7
8
 
8
9
 
10
+
9
11
  require 'optparse'
10
12
 
11
13
  ##
12
- ## read textfile
14
+ ## read textfile
13
15
  ## and dump tokens
14
16
  ##
15
17
  ## fbt ../openfootball/.../euro.txt
16
18
 
17
19
 
18
- SEASON_RE = %r{ (?:
19
- \d{4}-\d{2}
20
- | \d{4}(--[a-z0-9_-]+)?
21
- )
22
- }x
23
- SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
24
-
25
-
26
- ## note: if pattern includes directory add here
27
- ## (otherwise move to more "generic" datafile) - why? why not?
28
- MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
29
- #{SEASON}
30
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
31
- }x
32
-
33
-
34
- def find( path, pattern=MATCH_RE )
35
- datafiles = []
36
-
37
- ## check all txt files
38
- ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
39
- candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
40
- ## pp candidates
41
- candidates.each do |candidate|
42
- datafiles << candidate if pattern.match( candidate )
43
- end
44
-
45
- ## pp datafiles
46
- datafiles
47
- end
48
-
49
-
50
-
51
-
52
20
 
53
21
 
54
22
  args = ARGV
@@ -64,7 +32,7 @@ def find( path, pattern=MATCH_RE )
64
32
 
65
33
 
66
34
  parser.on( "--verbose", "--debug",
67
- "turn on verbose / debug output (default: #{opts[:debug]} )" ) do |debug|
35
+ "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
68
36
  opts[:debug] = debug
69
37
  end
70
38
 
@@ -85,28 +53,6 @@ p args
85
53
 
86
54
 
87
55
 
88
-
89
- def expand_args( args )
90
- paths = []
91
-
92
- args.each do |arg|
93
- ## check if directory
94
- if Dir.exist?( arg )
95
- datafiles = find( arg )
96
- puts
97
- puts " found #{datafiles.size} match txt datafiles in #{arg}"
98
- pp datafiles
99
- paths += datafiles
100
- else
101
- ## assume it's a file
102
- paths << arg
103
- end
104
- end
105
-
106
- paths
107
- end
108
-
109
-
110
56
  paths = if args.empty?
111
57
  [
112
58
  '../../../openfootball/euro/2020--europe/euro.txt',
@@ -115,8 +61,8 @@ paths = if args.empty?
115
61
  else
116
62
  ## check for directories
117
63
  ## and auto-expand
118
-
119
- expand_args( args )
64
+
65
+ SportDb::Parser::Opts.expand_args( args )
120
66
  end
121
67
 
122
68
 
@@ -125,18 +71,22 @@ SportDb::Parser::Linter.debug = true if opts[:debug]
125
71
 
126
72
  linter = SportDb::Parser::Linter.new
127
73
 
128
-
74
+ errors = []
129
75
 
130
76
  paths.each_with_index do |path,i|
131
77
  puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
132
78
  linter.read( path, parse: !opts[:metal] )
79
+
80
+ errors += linter.errors if linter.errors?
133
81
  end
134
82
 
135
- if linter.errors?
83
+ if errors.size > 0
136
84
  puts
137
- pp linter.errors
138
- puts "!! #{linter.errors.size} parse error(s) in #{paths.size} datafiles(s)"
85
+ pp errors
86
+ puts
87
+ puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
139
88
  else
89
+ puts
140
90
  puts "OK no parse errors found in #{paths.size} datafile(s)"
141
91
  end
142
92
 
@@ -15,7 +15,7 @@ class Parser
15
15
 
16
16
  GROUP_RE = %r{^
17
17
  Group [ ]
18
- (?<key>[a-z0-9]+)
18
+ (?<key>[a-z0-9]+)
19
19
  $}ix
20
20
  def is_group?( text )
21
21
  ## use regex for match
@@ -28,42 +28,68 @@ end
28
28
  ROUND_RE = %r{^(
29
29
 
30
30
  # round - note - requiers number e.g. round 1,2, etc.
31
+ # note - use 1-9 regex (cannot start with 0) - why? why not?
32
+ # make week 01 or round 01 or matchday 01 possible?
31
33
  (?: (?: Round |
32
34
  Matchday |
33
35
  Week
34
36
  )
35
- [ ] [0-9]+
37
+ [ ] [1-9][0-9]*
36
38
  )
37
39
  |
40
+ ## starting with qual(ification)
41
+ ## Qual. Round 1 / Qual. Round 2 / Qual. Round 3
42
+ (?: Qual \. [ ]
43
+ Round
44
+ [ ] [1-9][0-9]*
45
+ )
46
+ |
47
+ ## 1. Round / 2. Round / 3. Round / etc.
48
+ ## Play-off Round
49
+ (?:
50
+ (?: [1-9][0-9]* \.
51
+ |
52
+ Play-?off
53
+ )
54
+ [ ] Round
55
+ )
56
+ |
57
+ ## starting with preliminary
58
+ (?: Preliminary [ ]
59
+ (?: Semi-?finals |
60
+ Final
61
+ )
62
+ )
63
+ |
38
64
  # more (kockout) rounds
39
65
  # playoffs - playoff, play-off, play-offs
40
- (?: Play-?offs?
66
+ (?: Play-?offs?
41
67
  (?: [ ]for[ ]quarter-?finals )?
42
68
  )
43
- |
69
+ |
44
70
  # round32
45
- (?: Round[ ]of[ ]32 |
71
+ (?: Round[ ]of[ ]32 |
46
72
  Last[ ]32 )
47
73
  |
48
- # round16
74
+ # round16
49
75
  (?: Round[ ]of[ ]16 |
50
- Last[ ]16 |
76
+ Last[ ]16 |
51
77
  8th[ ]finals )
52
78
  |
53
79
  # fifthplace
54
80
  (?:
55
- (?: (Fifth|5th)[ -]place
81
+ (?: (Fifth|5th)[ -]place
56
82
  (?: [ ] (?: match|play-?off|final ))?
57
83
  ) |
58
84
  (?: Match[ ]for[ ](?: fifth|5th )[ -]place )
59
85
  )
60
86
  |
61
87
  # thirdplace
62
- (?:
63
- (?: (Third|3rd)[ -]place
64
- (?: [ ] (?: match|play-?off|final ))?
88
+ (?:
89
+ (?: (Third|3rd)[ -]place
90
+ (?: [ ] (?: match|play-?off|final ))?
65
91
  ) |
66
- (?: Match[ ]for[ ](?: third|3rd )[ -]place )
92
+ (?: Match[ ]for[ ](?: third|3rd )[ -]place )
67
93
  )
68
94
  |
69
95
  # quarterfinals
@@ -72,18 +98,24 @@ ROUND_RE = %r{^(
72
98
  Quarters |
73
99
  Last[ ]8
74
100
  )
75
- |
101
+ |
76
102
  # semifinals
77
- (?:
103
+ (?:
78
104
  Semi-?finals? |
79
105
  Semis |
80
106
  Last[ ]4
81
107
  )
82
108
  |
83
109
  # final
84
- Finals?
85
-
86
- )$}ix
110
+ Finals?
111
+ |
112
+ ## add replays
113
+ ## Final Replay
114
+ (?:
115
+ Final
116
+ [ ] Replay
117
+ )
118
+ )$}ix
87
119
 
88
120
 
89
121
  def is_round?( text )
@@ -95,9 +127,9 @@ end
95
127
  ##
96
128
  LEG_RE = %r{^
97
129
  # leg1
98
- (?: 1st|First)[ ]leg
130
+ (?: 1st|First)[ ]leg
99
131
  |
100
- # leg2
132
+ # leg2
101
133
  (?: 2nd|Second)[ ]leg
102
134
  $}ix
103
135
 
@@ -10,12 +10,6 @@ def self.debug=(value) @@debug = value; end
10
10
  def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
11
  def debug?() self.class.debug?; end
12
12
 
13
- ## keep typed - why? why not?
14
- ## - used anywhere?
15
- def self.typed=(value) @@typed = value; end
16
- def self.typed?() @@typed ||= true; end ## note: default is TRUE
17
- def typed?() self.class.typed?; end
18
-
19
13
 
20
14
 
21
15
  attr_reader :errors
@@ -58,6 +52,9 @@ def errors?() @errors.size > 0; end
58
52
  ## parse - false (default) - tokenize (only)
59
53
  ## - true - tokenize & parse
60
54
  def read( path, parse: false )
55
+ ## note: every (new) read call - resets errors list to empty
56
+ @errors = []
57
+
61
58
  nodes = OutlineReader.read( path )
62
59
 
63
60
  ## process nodes
@@ -94,9 +91,8 @@ def read( path, parse: false )
94
91
 
95
92
 
96
93
  ## skip new (experimental attrib syntax)
97
- m = nil
98
94
  if attrib_found == false &&
99
- m=ATTRIB_RE.match( line )
95
+ ATTRIB_RE.match?( line )
100
96
  ## note: check attrib regex AFTER group def e.g.:
101
97
  ## Group A:
102
98
  ## Group B: etc.
@@ -0,0 +1,70 @@
1
+
2
+ module SportDb
3
+ class Parser
4
+
5
+ ###
6
+ ## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
7
+ class Opts
8
+
9
+ SEASON_RE = %r{ (?:
10
+ \d{4}-\d{2}
11
+ | \d{4}(--[a-z0-9_-]+)?
12
+ )
13
+ }x
14
+ SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
15
+
16
+
17
+ ## note: if pattern includes directory add here
18
+ ## (otherwise move to more "generic" datafile) - why? why not?
19
+ MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
20
+ #{SEASON}
21
+ /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
22
+ }x
23
+
24
+
25
+ def self.find( path )
26
+ datafiles = []
27
+
28
+ ## note: normalize path - use File.expand_path ??
29
+ ## change all backslash to slash for now
30
+ ## path = path.gsub( "\\", '/' )
31
+ path = File.expand_path( path )
32
+
33
+ ## check all txt files
34
+ ## note: incl. files starting with dot (.)) as candidates
35
+ ## (normally excluded with just *)
36
+ candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
37
+ ## pp candidates
38
+ candidates.each do |candidate|
39
+ datafiles << candidate if MATCH_RE.match( candidate )
40
+ end
41
+
42
+ ## pp datafiles
43
+ datafiles
44
+ end
45
+
46
+
47
+ def self.expand_args( args )
48
+ paths = []
49
+
50
+ args.each do |arg|
51
+ ## check if directory
52
+ if Dir.exist?( arg )
53
+ datafiles = find( arg )
54
+ puts
55
+ puts " found #{datafiles.size} match txt datafiles in #{arg}"
56
+ pp datafiles
57
+ paths += datafiles
58
+ else
59
+ ## assume it's a file
60
+ paths << arg
61
+ end
62
+ end
63
+
64
+ paths
65
+ end
66
+ end # class Opts
67
+
68
+
69
+ end # class Parser
70
+ end # module SportDb
@@ -1,8 +1,4 @@
1
1
 
2
- ###
3
- ## todo/fix - move to sportdb-parser - why? why not? !!!!!!
4
- ##
5
-
6
2
 
7
3
  module SportDb
8
4
 
@@ -10,7 +6,7 @@ class OutlineReader
10
6
 
11
7
  def self.debug=(value) @@debug = value; end
12
8
  def self.debug?() @@debug ||= false; end
13
- def debug?() self.class.debug?; end
9
+ def debug?() self.class.debug?; end
14
10
 
15
11
 
16
12
 
@@ -73,7 +69,7 @@ class OutlineReader
73
69
  start_para = true
74
70
 
75
71
  heading_marker = m[:marker]
76
- heading_level = m[:marker].length ## count number of = for heading level
72
+ heading_level = heading_marker.length ## count number of = for heading level
77
73
  heading = m[:text].strip
78
74
 
79
75
  puts "heading #{heading_level} >#{heading}<" if debug?
@@ -1,24 +1,24 @@
1
- module SportDb
1
+ module SportDb
2
2
  class Parser
3
-
3
+
4
4
 
5
5
  ## transforms
6
6
  ##
7
7
  ## Netherlands 1-2 (1-1) England
8
- ## => text => team
9
- ## score|vs
8
+ ## => text => team
9
+ ## score|vs
10
10
  ## text => team
11
11
 
12
12
 
13
13
  ## token iter/find better name
14
14
  ## e.g. TokenBuffer/Scanner or such ??
15
- class Tokens
15
+ class Tokens
16
16
  def initialize( tokens )
17
17
  @tokens = tokens
18
18
  @pos = 0
19
19
  end
20
20
 
21
- def pos() @pos; end
21
+ def pos() @pos; end
22
22
  def eos?() @pos >= @tokens.size; end
23
23
 
24
24
 
@@ -47,17 +47,17 @@ class Tokens
47
47
  ## return token type (e.g. :text, :num, etc.)
48
48
  def cur() peek(0); end
49
49
  ## return content (assumed to be text)
50
- def text(offset=0)
50
+ def text(offset=0)
51
51
  ## raise error - why? why not?
52
52
  ## return nil?
53
53
  if peek( offset ) != :text
54
54
  raise ArgumentError, "text(#{offset}) - token not a text type"
55
55
  end
56
- @tokens[@pos+offset][1]
56
+ @tokens[@pos+offset][1]
57
57
  end
58
58
 
59
59
 
60
- def peek(offset=1)
60
+ def peek(offset=1)
61
61
  ## return nil if eos
62
62
  if @pos+offset >= @tokens.size
63
63
  nil
@@ -66,7 +66,7 @@ class Tokens
66
66
  end
67
67
  end
68
68
 
69
- ## note - returns complete token
69
+ ## note - returns complete token
70
70
  def next
71
71
  # if @pos >= @tokens.size
72
72
  # raise ArgumentError, "end of array - #{@pos} >= #{@tokens.size}"
@@ -81,7 +81,7 @@ class Tokens
81
81
  def collect( &blk )
82
82
  tokens = []
83
83
  loop do
84
- break if eos?
84
+ break if eos?
85
85
  tokens << if block_given?
86
86
  blk.call( self.next )
87
87
  else
@@ -106,7 +106,7 @@ def parse_with_errors( line, debug: false )
106
106
  errors += token_errors
107
107
 
108
108
  #############
109
- ## pass 1
109
+ ## pass 1
110
110
  ## replace all texts with keyword matches (e.g. group, round, leg, etc.)
111
111
  tokens = tokens.map do |t|
112
112
  if t[0] == :text
@@ -129,24 +129,40 @@ def parse_with_errors( line, debug: false )
129
129
  ## puts "tokens:"
130
130
  ## pp tokens
131
131
 
132
- ## transform tokens into (parse tree/ast) nodes
132
+ ## transform tokens into (parse tree/ast) nodes
133
133
  nodes = []
134
-
134
+
135
135
  buf = Tokens.new( tokens )
136
136
  ## pp buf
137
137
 
138
138
 
139
- loop do
140
- if buf.pos == 0
141
- ## check for
142
- ## group def or round def
143
- if buf.match?( :round, :'|' ) ## assume round def (change round to round_def)
139
+ loop do
140
+ break if buf.eos?
141
+
142
+ ## simplify - remove separator for round + leg pair
143
+ ## e.g. Round of 16, 1st Leg
144
+ ## allow Round of 16 - 1st Leg too - why? why not?
145
+ if buf.match?( :round, [:',', :'|',
146
+ :'-',
147
+ :vs, ### fix - change parser to issue :'-' only for (-) not :vs!!!
148
+ ], :leg )
149
+ nodes << [:round, buf.next[1]]
150
+ buf.next ## swallow separator
151
+ nodes << [:leg, buf.next[1]]
152
+ next
153
+ end
154
+
155
+
156
+ if buf.pos == 0 ## MUST start line
157
+ ## check for
158
+ ## group def or round def
159
+ if buf.match?( :round, :'|', [:date, :duration] ) ## assume round def (change round to round_def)
144
160
  nodes << [:round_def, buf.next[1]]
145
161
  buf.next ## swallow pipe
146
162
  nodes += buf.collect
147
163
  break
148
164
  end
149
- if buf.match?( :group, :'|' ) ## assume group def (change group to group_def)
165
+ if buf.match?( :group, :'|', :text ) ## assume group def (change group to group_def)
150
166
  nodes << [:group_def, buf.next[1]]
151
167
  buf.next ## swallow pipe
152
168
  ## change all text to team
@@ -154,11 +170,15 @@ def parse_with_errors( line, debug: false )
154
170
  t[0] == :text ? [:team, t[1]] : t
155
171
  }
156
172
  break
157
- end
173
+ end
158
174
  end
159
175
 
160
176
 
161
- if buf.match?( :text, [:score, :vs], :text )
177
+ if buf.match?( :text, :'-', :text ) ## hacky? convert "generic" :- to :vs
178
+ nodes << [:team, buf.next[1]] ## keep this rule/option - why? why not?
179
+ nodes << [:vs]
180
+ nodes << [:team, buf.next[1]]
181
+ elsif buf.match?( :text, [:score, :vs], :text )
162
182
  nodes << [:team, buf.next[1]]
163
183
  nodes << buf.next
164
184
  nodes << [:team, buf.next[1]]
@@ -170,14 +190,12 @@ def parse_with_errors( line, debug: false )
170
190
  ## only change text to geo
171
191
  nodes += buf.collect { |t|
172
192
  t[0] == :text ? [:geo, t[1]] : t
173
- }
193
+ }
174
194
  break
175
195
  else
176
196
  ## pass through
177
197
  nodes << buf.next
178
198
  end
179
-
180
- break if buf.eos?
181
199
  end
182
200
 
183
201
  [nodes,errors]
@@ -192,5 +210,5 @@ end
192
210
 
193
211
 
194
212
  end # class Parser
195
- end # module SportDb
196
-
213
+ end # module SportDb
214
+
@@ -37,22 +37,24 @@ def self.build_names( lines )
37
37
  end
38
38
 
39
39
 
40
+ def self.build_map( lines, downcase: false )
41
+ ## note: downcase name!!!
42
+ ## build a lookup map that maps the word to the index (line no) plus 1 e.g.
43
+ ## {"january" => 1, "jan" => 1,
44
+ ## "february" => 2, "feb" => 2,
45
+ ## "march" => 3, "mar" => 3,
46
+ ## "april" => 4, "apr" => 4,
47
+ ## "may" => 5,
48
+ ## "june" => 6, "jun" => 6, ...
49
+ lines.each_with_index.reduce( {} ) do |h,(line,i)|
50
+ line.each do |name|
51
+ h[ downcase ? name.downcase : name ] = i+1
52
+ end ## note: start mapping with 1 (and NOT zero-based, that is, 0)
53
+ h
54
+ end
55
+ end
56
+
40
57
 
41
- ## add normalize option (for downcase) - why? why not?
42
- def self.build_map( lines )
43
- ## note: downcase name!!!
44
- ## build a lookup map that maps the word to the index (line no) plus 1 e.g.
45
- ## {"january" => 1, "jan" => 1,
46
- ## "february" => 2, "feb" => 2,
47
- ## "march" => 3, "mar" => 3,
48
- ## "april" => 4, "apr" => 4,
49
- ## "may" => 5,
50
- ## "june" => 6, "jun" => 6, ...
51
- lines.each_with_index.reduce( {} ) do |h,(line,i)|
52
- line.each { |name| h[ name.downcase ] = i+1 } ## note: start mapping with 1 (and NOT zero-based, that is, 0)
53
- h
54
- end
55
- end
56
58
 
57
59
 
58
60
  MONTH_LINES = parse_names( <<TXT )
@@ -72,7 +74,7 @@ TXT
72
74
 
73
75
  MONTH_NAMES = build_names( MONTH_LINES )
74
76
  # pp MONTH_NAMES
75
- MONTH_MAP = build_map( MONTH_LINES )
77
+ MONTH_MAP = build_map( MONTH_LINES, downcase: true )
76
78
  # pp MONTH_MAP
77
79
 
78
80
 
@@ -89,7 +91,7 @@ TXT
89
91
 
90
92
  DAY_NAMES = build_names( DAY_LINES )
91
93
  # pp DAY_NAMES
92
- DAY_MAP = build_map( DAY_LINES )
94
+ DAY_MAP = build_map( DAY_LINES, downcase: true )
93
95
  # pp DAY_MAP
94
96
 
95
97
 
@@ -1,6 +1,6 @@
1
1
 
2
2
 
3
- module SportDb
3
+ module SportDb
4
4
  class Parser
5
5
 
6
6
 
@@ -15,7 +15,7 @@ TIME_RE = %r{
15
15
  (?: :|\.|h )
16
16
  (?<minute>\d{2})
17
17
  \b
18
- )
18
+ )
19
19
  }ix
20
20
 
21
21
 
@@ -28,7 +28,7 @@ TIME_RE = %r{
28
28
  # (CEST/UTC+2) - central european summer time - daylight saving time (DST).
29
29
  # (EET/UTC+1) - eastern european time
30
30
  # (EEST/UTC+2) - eastern european summer time - daylight saving time (DST).
31
- #
31
+ #
32
32
  # UTC+3
33
33
  # UTC+4
34
34
  # UTC+0
@@ -45,7 +45,7 @@ TIME_RE = %r{
45
45
 
46
46
  TIMEZONE_RE = %r{
47
47
  ## e.g. (UTC-2) or (CEST/UTC-2) etc.
48
- (?<timezone>
48
+ (?<timezone>
49
49
  \(
50
50
  ## optional "local" timezone name eg. BRT or CEST etc.
51
51
  (?: [a-z]+
@@ -63,28 +63,28 @@ TIMEZONE_RE = %r{
63
63
 
64
64
  BASICS_RE = %r{
65
65
  ## e.g. (51) or (1) etc. - limit digits of number???
66
- (?<num> \( (?<value>\d+) \) )
66
+ (?<num> \( (?<value>\d+) \) )
67
67
  |
68
- (?<vs>
69
- (?<=[ ]) # Positive lookbehind for space
70
- (?:
68
+ (?<vs>
69
+ (?<=[ ]) # Positive lookbehind for space
70
+ (?:
71
71
  vs\.?| ## allow optional dot (eg. vs. v.)
72
72
  v\.?|
73
73
  -
74
74
  ) # not bigger match first e.g. vs than v etc.
75
75
  (?=[ ]) # positive lookahead for space
76
- )
77
- |
76
+ )
77
+ |
78
78
  (?<none>
79
- (?<=[ \[]|^) # Positive lookbehind for space or [
79
+ (?<=[ \[]|^) # Positive lookbehind for space or [
80
80
  -
81
81
  (?=[ ]*;) # positive lookahead for space
82
82
  )
83
83
  |
84
84
  (?<spaces> [ ]{2,}) |
85
- (?<space> [ ])
85
+ (?<space> [ ])
86
86
  |
87
- (?<sym>[;,@|\[\]])
87
+ (?<sym>[;,@|\[\]])
88
88
  }ix
89
89
 
90
90
 
@@ -94,13 +94,13 @@ MINUTE_RE = %r{
94
94
  (?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
95
95
  (?: \+
96
96
  (?<value2>\d{1,3})
97
- )?
97
+ )?
98
98
  ' ## must have minute marker!!!!
99
99
  )
100
100
  }ix
101
101
 
102
102
 
103
- ## (match) status
103
+ ## (match) status
104
104
  ## note: english usage - cancelled (in UK), canceled (in US)
105
105
  ##
106
106
  ## add more variants - why? why not?
@@ -115,30 +115,30 @@ STATUS_RE = %r{
115
115
  |
116
116
  postponed
117
117
  |
118
- awarded|awd\.
118
+ awarded|awd\.
119
119
  |
120
- replay
120
+ replay
121
121
  )
122
122
  (?=[ \]]|$)
123
123
  )}ix
124
124
 
125
125
  ## todo/check: remove loakahead assertion here - why require space?
126
- ## note: \b works only after non-alphanum
127
- ## to make it work with awd. (dot) "custom" lookahead neeeded
126
+ ## note: \b works only after non-alphanum
127
+ ## to make it work with awd. (dot) "custom" lookahead neeeded
128
128
 
129
129
 
130
130
  ## goal types
131
- # (pen.) or (pen) or (p.) or (p)
131
+ # (pen.) or (pen) or (p.) or (p)
132
132
  ## (o.g.) or (og)
133
133
  GOAL_PEN_RE = %r{
134
- (?<pen> \(
135
- (?:pen|p)\.?
134
+ (?<pen> \(
135
+ (?:pen|p)\.?
136
136
  \)
137
137
  )
138
138
  }ix
139
139
  GOAL_OG_RE = %r{
140
- (?<og> \(
141
- (?:og|o\.g\.)
140
+ (?<og> \(
141
+ (?:og|o\.g\.)
142
142
  \)
143
143
  )
144
144
  }ix
@@ -158,11 +158,11 @@ RE = Regexp.union( STATUS_RE,
158
158
 
159
159
 
160
160
  def log( msg )
161
- ## append msg to ./logs.txt
161
+ ## append msg to ./logs.txt
162
162
  ## use ./errors.txt - why? why not?
163
163
  File.open( './logs.txt', 'a:utf-8' ) do |f|
164
164
  f.write( msg )
165
- f.write( "\n" )
165
+ f.write( "\n" )
166
166
  end
167
167
  end
168
168
 
@@ -176,7 +176,7 @@ def tokenize_with_errors( line, typed: false,
176
176
  puts ">#{line}<" if debug
177
177
 
178
178
  pos = 0
179
- ## track last offsets - to report error on no match
179
+ ## track last offsets - to report error on no match
180
180
  ## or no match in end of string
181
181
  offsets = [0,0]
182
182
  m = nil
@@ -184,7 +184,7 @@ def tokenize_with_errors( line, typed: false,
184
184
  while m = RE.match( line, pos )
185
185
  if debug
186
186
  pp m
187
- puts "pos: #{pos}"
187
+ puts "pos: #{pos}"
188
188
  end
189
189
  offsets = [m.begin(0), m.end(0)]
190
190
 
@@ -213,10 +213,10 @@ def tokenize_with_errors( line, typed: false,
213
213
  elsif m[:spaces]
214
214
  ## skip spaces
215
215
  nil
216
- elsif m[:text]
216
+ elsif m[:text]
217
217
  [:text, m[:text]] ## keep pos - why? why not?
218
218
  elsif m[:status] ## (match) status e.g. cancelled, awarded, etc.
219
- [:status, m[:status]]
219
+ [:status, m[:status]]
220
220
  elsif m[:time]
221
221
  if typed
222
222
  ## unify to iso-format
@@ -230,7 +230,7 @@ def tokenize_with_errors( line, typed: false,
230
230
  if (hour >= 0 && hour <= 24) &&
231
231
  (minute >=0 && minute <= 59)
232
232
  ## note - for debugging keep (pass along) "literal" time
233
- ## might use/add support for am/pm later
233
+ ## might use/add support for am/pm later
234
234
  [:time, m[:time], {h:hour,m:minute}]
235
235
  else
236
236
  raise ArgumentError, "parse error - time >#{m[:time]}< out-of-range"
@@ -241,54 +241,68 @@ def tokenize_with_errors( line, typed: false,
241
241
  elsif m[:date]
242
242
  if typed
243
243
  date = {}
244
- =begin
244
+ =begin
245
245
  ((?<day_name>#{DAY_NAMES})
246
246
  [ ]
247
- )?
247
+ )?
248
248
  (?<month_name>#{MONTH_NAMES})
249
249
  (?: \/|[ ] )
250
250
  (?<day>\d{1,2})
251
251
  ## optional year
252
252
  ( [ ]
253
253
  (?<year>\d{4})
254
- )?
254
+ )?
255
255
  =end
256
256
  ## map month names
257
257
  ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
258
- date[:y] = m[:year].to_i(10) if m[:year]
258
+ date[:y] = m[:year].to_i(10) if m[:year]
259
259
  date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
260
260
  date[:d] = m[:day].to_i(10) if m[:day]
261
261
  date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
262
- ## note - for debugging keep (pass along) "literal" date
263
- [:date, m[:date], date]
262
+ ## note - for debugging keep (pass along) "literal" date
263
+ [:date, m[:date], date]
264
264
  else
265
265
  [:date, m[:date]]
266
266
  end
267
267
  elsif m[:timezone]
268
268
  [:timezone, m[:timezone]]
269
269
  elsif m[:duration]
270
- [:duration, m[:duration]]
270
+ if typed
271
+ duration = { start: {}, end: {}}
272
+ duration[:start][:y] = m[:year1].to_i(10) if m[:year1]
273
+ duration[:start][:m] = MONTH_MAP[ m[:month_name1].downcase ] if m[:month_name1]
274
+ duration[:start][:d] = m[:day1].to_i(10) if m[:day1]
275
+ duration[:start][:wday] = DAY_MAP[ m[:day_name1].downcase ] if m[:day_name1]
276
+ duration[:end][:y] = m[:year2].to_i(10) if m[:year2]
277
+ duration[:end][:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
278
+ duration[:end][:d] = m[:day2].to_i(10) if m[:day2]
279
+ duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ] if m[:day_name2]
280
+ ## note - for debugging keep (pass along) "literal" duration
281
+ [:duration, m[:duration], duration]
282
+ else
283
+ [:duration, m[:duration]]
284
+ end
271
285
  elsif m[:num]
272
286
  if typed
273
287
  ## note - strip enclosing () and convert to integer
274
288
  [:num, m[:value].to_i(10)]
275
- else
289
+ else
276
290
  [:num, m[:num]]
277
291
  end
278
292
  elsif m[:score]
279
293
  if typed
280
294
  score = {}
281
295
  ## check for pen
282
- score[:p] = [m[:p1].to_i(10),
296
+ score[:p] = [m[:p1].to_i(10),
283
297
  m[:p2].to_i(10)] if m[:p1] && m[:p2]
284
- score[:et] = [m[:et1].to_i(10),
298
+ score[:et] = [m[:et1].to_i(10),
285
299
  m[:et2].to_i(10)] if m[:et1] && m[:et2]
286
- score[:ft] = [m[:ft1].to_i(10),
300
+ score[:ft] = [m[:ft1].to_i(10),
287
301
  m[:ft2].to_i(10)] if m[:ft1] && m[:ft2]
288
- score[:ht] = [m[:ht1].to_i(10),
302
+ score[:ht] = [m[:ht1].to_i(10),
289
303
  m[:ht2].to_i(10)] if m[:ht1] && m[:ht2]
290
304
 
291
- ## note - for debugging keep (pass along) "literal" score
305
+ ## note - for debugging keep (pass along) "literal" score
292
306
  [:score, m[:score], score]
293
307
  else
294
308
  [:score, m[:score]]
@@ -298,7 +312,7 @@ def tokenize_with_errors( line, typed: false,
298
312
  minute = {}
299
313
  minute[:m] = m[:value].to_i(10)
300
314
  minute[:offset] = m[:value2].to_i(10) if m[:value2]
301
- ## note - for debugging keep (pass along) "literal" minute
315
+ ## note - for debugging keep (pass along) "literal" minute
302
316
  [:minute, m[:minute], minute]
303
317
  else
304
318
  [:minute, m[:minute]]
@@ -318,16 +332,16 @@ def tokenize_with_errors( line, typed: false,
318
332
  when ',' then [:',']
319
333
  when ';' then [:';']
320
334
  when '@' then [:'@']
321
- when '|' then [:'|']
335
+ when '|' then [:'|']
322
336
  else
323
337
  nil ## ignore others (e.g. brackets [])
324
338
  end
325
339
  else
326
- ## report error
340
+ ## report error
327
341
  nil
328
342
  end
329
343
 
330
- tokens << t if t
344
+ tokens << t if t
331
345
 
332
346
  if debug
333
347
  print ">"
@@ -346,7 +360,7 @@ def tokenize_with_errors( line, typed: false,
346
360
  end
347
361
 
348
362
 
349
- [tokens,errors]
363
+ [tokens,errors]
350
364
  end
351
365
 
352
366
 
@@ -360,5 +374,4 @@ end
360
374
 
361
375
 
362
376
  end # class Parser
363
- end # module SportDb
364
-
377
+ end # module SportDb
@@ -0,0 +1,24 @@
1
+
2
+ module SportDb
3
+ module Module
4
+ module Parser
5
+ MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
+ MINOR = 2
7
+ PATCH = 0
8
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
9
+
10
+ def self.version
11
+ VERSION
12
+ end
13
+
14
+ def self.banner
15
+ "sportdb-parser/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
16
+ end
17
+
18
+ def self.root
19
+ File.expand_path( File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) )
20
+ end
21
+
22
+ end # module Parser
23
+ end
24
+ end
@@ -1,3 +1,7 @@
1
+ ## pulls in
2
+ require 'cocos'
3
+ require 'season/formats' # e.g. Season() support machinery
4
+
1
5
 
2
6
 
3
7
  ####
@@ -11,7 +15,7 @@
11
15
  ## text - change text to name - why? why not?
12
16
 
13
17
 
14
-
18
+ require_relative 'parser/version'
15
19
  require_relative 'parser/token-score'
16
20
  require_relative 'parser/token-date'
17
21
  require_relative 'parser/token-text'
@@ -23,6 +27,7 @@ require_relative 'parser/parser'
23
27
  ## more
24
28
  require_relative 'parser/outline_reader'
25
29
  require_relative 'parser/linter'
30
+ require_relative 'parser/opts'
26
31
 
27
32
 
28
33
  ###
@@ -31,7 +36,7 @@ require_relative 'parser/linter'
31
36
  =begin
32
37
  module SportDb
33
38
  def self.parser() @@parser ||= Parser.new; end
34
- def self.parse( ... )
39
+ def self.parse( ... )
35
40
  end
36
41
  def self.tokenize( ... )
37
42
  end
@@ -39,6 +44,5 @@ end # module SportDb
39
44
  =end
40
45
 
41
46
 
42
-
43
-
47
+ puts SportDb::Module::Parser.banner # say hello
44
48
 
metadata CHANGED
@@ -1,15 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-14 00:00:00.000000000 Z
11
+ date: 2024-08-22 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: cocos
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.4.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.4.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: season-formats
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
13
41
  - !ruby/object:Gem::Dependency
14
42
  name: rdoc
15
43
  requirement: !ruby/object:Gem::Requirement
@@ -62,12 +90,14 @@ files:
62
90
  - lib/sportdb/parser.rb
63
91
  - lib/sportdb/parser/lang.rb
64
92
  - lib/sportdb/parser/linter.rb
93
+ - lib/sportdb/parser/opts.rb
65
94
  - lib/sportdb/parser/outline_reader.rb
66
95
  - lib/sportdb/parser/parser.rb
67
96
  - lib/sportdb/parser/token-date.rb
68
97
  - lib/sportdb/parser/token-score.rb
69
98
  - lib/sportdb/parser/token-text.rb
70
99
  - lib/sportdb/parser/token.rb
100
+ - lib/sportdb/parser/version.rb
71
101
  homepage: https://github.com/sportdb/sport.db
72
102
  licenses:
73
103
  - Public Domain