sportdb-parser 0.0.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1466b82654b4a4f0f823a96709488dedb595d08731a55abc128691e0ffe2a80b
4
- data.tar.gz: 14995e94dc079ab61e77d056d15c9a5830dc573129661ca453b2892d087c2061
3
+ metadata.gz: c94dcd42fc13a7043f6b926ca1d947df3199877693b22e53e4f50b5aa522bf5d
4
+ data.tar.gz: 33eb689dcfb2bab0728c19b7d706da1556ddefafbfbcc6e424ac5bcbe3bccef6
5
5
  SHA512:
6
- metadata.gz: 75c2b4f455e8bb1b5e471c39f8fa3b5069bd0bb2a808ad8b246c0f2b060c5416f9f56a3619ad7db7ac5f21a6177c762aa28ae8e9c939b03a2569cf27d34f9b81
7
- data.tar.gz: 9c4f9095a61410499ae7628b1eb3295d8f456e62feae45a4c254d9157904326abf6571f3c4a04c078551b6364cd09252509f709bfeef46a569dbe202f4058460
6
+ metadata.gz: 97ef8d76ffa26312d66359f364588af3d7c76a3b0cebd3644b1f1ae775463defa9cb9552b267f26677c2c6f4e9b7b9fe62479dd34a7211fd1a4a3c1b5e9af830
7
+ data.tar.gz: ca9b56c6c02c132f3924fb40c293e90379812b830a2899e2be02c1d6469a278456c6d68db7f73d5f5fd69b372c958953e3fefd829ac1120cf56b0944176a2b87
data/CHANGELOG.md CHANGED
@@ -1,3 +1,5 @@
1
+ ### 0.2.0
2
+
1
3
  ### 0.0.1 / 2024-07-12
2
4
 
3
5
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -6,9 +6,11 @@ bin/fbt
6
6
  lib/sportdb/parser.rb
7
7
  lib/sportdb/parser/lang.rb
8
8
  lib/sportdb/parser/linter.rb
9
+ lib/sportdb/parser/opts.rb
9
10
  lib/sportdb/parser/outline_reader.rb
10
11
  lib/sportdb/parser/parser.rb
11
12
  lib/sportdb/parser/token-date.rb
12
13
  lib/sportdb/parser/token-score.rb
13
14
  lib/sportdb/parser/token-text.rb
14
15
  lib/sportdb/parser/token.rb
16
+ lib/sportdb/parser/version.rb
data/Rakefile CHANGED
@@ -1,9 +1,10 @@
1
1
  require 'hoe'
2
+ require './lib/sportdb/parser/version.rb'
2
3
 
3
4
 
4
5
  Hoe.spec 'sportdb-parser' do
5
6
 
6
- self.version = '0.0.1'
7
+ self.version = SportDb::Module::Parser::VERSION
7
8
 
8
9
  self.summary = "sportdb-parser - football.txt match parser (& tokenizer)"
9
10
  self.description = summary
@@ -19,7 +20,10 @@ Hoe.spec 'sportdb-parser' do
19
20
 
20
21
  self.licenses = ['Public Domain']
21
22
 
22
- self.extra_deps = []
23
+ self.extra_deps = [
24
+ ['cocos', '>= 0.4.0'],
25
+ ['season-formats'],
26
+ ]
23
27
 
24
28
  self.spec_extras = {
25
29
  required_ruby_version: '>= 2.2.2'
data/bin/fbt CHANGED
@@ -3,52 +3,20 @@
3
3
  ## tip: to test run:
4
4
  ## ruby -I ./lib bin/fbt
5
5
 
6
+ ## our own code
6
7
  require 'sportdb/parser'
7
8
 
8
9
 
10
+
9
11
  require 'optparse'
10
12
 
11
13
  ##
12
- ## read textfile
14
+ ## read textfile
13
15
  ## and dump tokens
14
16
  ##
15
17
  ## fbt ../openfootball/.../euro.txt
16
18
 
17
19
 
18
- SEASON_RE = %r{ (?:
19
- \d{4}-\d{2}
20
- | \d{4}(--[a-z0-9_-]+)?
21
- )
22
- }x
23
- SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
24
-
25
-
26
- ## note: if pattern includes directory add here
27
- ## (otherwise move to more "generic" datafile) - why? why not?
28
- MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
29
- #{SEASON}
30
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
31
- }x
32
-
33
-
34
- def find( path, pattern=MATCH_RE )
35
- datafiles = []
36
-
37
- ## check all txt files
38
- ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
39
- candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
40
- ## pp candidates
41
- candidates.each do |candidate|
42
- datafiles << candidate if pattern.match( candidate )
43
- end
44
-
45
- ## pp datafiles
46
- datafiles
47
- end
48
-
49
-
50
-
51
-
52
20
 
53
21
 
54
22
  args = ARGV
@@ -64,7 +32,7 @@ def find( path, pattern=MATCH_RE )
64
32
 
65
33
 
66
34
  parser.on( "--verbose", "--debug",
67
- "turn on verbose / debug output (default: #{opts[:debug]} )" ) do |debug|
35
+ "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
68
36
  opts[:debug] = debug
69
37
  end
70
38
 
@@ -85,28 +53,6 @@ p args
85
53
 
86
54
 
87
55
 
88
-
89
- def expand_args( args )
90
- paths = []
91
-
92
- args.each do |arg|
93
- ## check if directory
94
- if Dir.exist?( arg )
95
- datafiles = find( arg )
96
- puts
97
- puts " found #{datafiles.size} match txt datafiles in #{arg}"
98
- pp datafiles
99
- paths += datafiles
100
- else
101
- ## assume it's a file
102
- paths << arg
103
- end
104
- end
105
-
106
- paths
107
- end
108
-
109
-
110
56
  paths = if args.empty?
111
57
  [
112
58
  '../../../openfootball/euro/2020--europe/euro.txt',
@@ -115,8 +61,8 @@ paths = if args.empty?
115
61
  else
116
62
  ## check for directories
117
63
  ## and auto-expand
118
-
119
- expand_args( args )
64
+
65
+ SportDb::Parser::Opts.expand_args( args )
120
66
  end
121
67
 
122
68
 
@@ -125,18 +71,22 @@ SportDb::Parser::Linter.debug = true if opts[:debug]
125
71
 
126
72
  linter = SportDb::Parser::Linter.new
127
73
 
128
-
74
+ errors = []
129
75
 
130
76
  paths.each_with_index do |path,i|
131
77
  puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
132
78
  linter.read( path, parse: !opts[:metal] )
79
+
80
+ errors += linter.errors if linter.errors?
133
81
  end
134
82
 
135
- if linter.errors?
83
+ if errors.size > 0
136
84
  puts
137
- pp linter.errors
138
- puts "!! #{linter.errors.size} parse error(s) in #{paths.size} datafiles(s)"
85
+ pp errors
86
+ puts
87
+ puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
139
88
  else
89
+ puts
140
90
  puts "OK no parse errors found in #{paths.size} datafile(s)"
141
91
  end
142
92
 
@@ -15,7 +15,7 @@ class Parser
15
15
 
16
16
  GROUP_RE = %r{^
17
17
  Group [ ]
18
- (?<key>[a-z0-9]+)
18
+ (?<key>[a-z0-9]+)
19
19
  $}ix
20
20
  def is_group?( text )
21
21
  ## use regex for match
@@ -28,42 +28,68 @@ end
28
28
  ROUND_RE = %r{^(
29
29
 
30
30
  # round - note - requiers number e.g. round 1,2, etc.
31
+ # note - use 1-9 regex (cannot start with 0) - why? why not?
32
+ # make week 01 or round 01 or matchday 01 possible?
31
33
  (?: (?: Round |
32
34
  Matchday |
33
35
  Week
34
36
  )
35
- [ ] [0-9]+
37
+ [ ] [1-9][0-9]*
36
38
  )
37
39
  |
40
+ ## starting with qual(ification)
41
+ ## Qual. Round 1 / Qual. Round 2 / Qual. Round 3
42
+ (?: Qual \. [ ]
43
+ Round
44
+ [ ] [1-9][0-9]*
45
+ )
46
+ |
47
+ ## 1. Round / 2. Round / 3. Round / etc.
48
+ ## Play-off Round
49
+ (?:
50
+ (?: [1-9][0-9]* \.
51
+ |
52
+ Play-?off
53
+ )
54
+ [ ] Round
55
+ )
56
+ |
57
+ ## starting with preliminary
58
+ (?: Preliminary [ ]
59
+ (?: Semi-?finals |
60
+ Final
61
+ )
62
+ )
63
+ |
38
64
  # more (kockout) rounds
39
65
  # playoffs - playoff, play-off, play-offs
40
- (?: Play-?offs?
66
+ (?: Play-?offs?
41
67
  (?: [ ]for[ ]quarter-?finals )?
42
68
  )
43
- |
69
+ |
44
70
  # round32
45
- (?: Round[ ]of[ ]32 |
71
+ (?: Round[ ]of[ ]32 |
46
72
  Last[ ]32 )
47
73
  |
48
- # round16
74
+ # round16
49
75
  (?: Round[ ]of[ ]16 |
50
- Last[ ]16 |
76
+ Last[ ]16 |
51
77
  8th[ ]finals )
52
78
  |
53
79
  # fifthplace
54
80
  (?:
55
- (?: (Fifth|5th)[ -]place
81
+ (?: (Fifth|5th)[ -]place
56
82
  (?: [ ] (?: match|play-?off|final ))?
57
83
  ) |
58
84
  (?: Match[ ]for[ ](?: fifth|5th )[ -]place )
59
85
  )
60
86
  |
61
87
  # thirdplace
62
- (?:
63
- (?: (Third|3rd)[ -]place
64
- (?: [ ] (?: match|play-?off|final ))?
88
+ (?:
89
+ (?: (Third|3rd)[ -]place
90
+ (?: [ ] (?: match|play-?off|final ))?
65
91
  ) |
66
- (?: Match[ ]for[ ](?: third|3rd )[ -]place )
92
+ (?: Match[ ]for[ ](?: third|3rd )[ -]place )
67
93
  )
68
94
  |
69
95
  # quarterfinals
@@ -72,18 +98,24 @@ ROUND_RE = %r{^(
72
98
  Quarters |
73
99
  Last[ ]8
74
100
  )
75
- |
101
+ |
76
102
  # semifinals
77
- (?:
103
+ (?:
78
104
  Semi-?finals? |
79
105
  Semis |
80
106
  Last[ ]4
81
107
  )
82
108
  |
83
109
  # final
84
- Finals?
85
-
86
- )$}ix
110
+ Finals?
111
+ |
112
+ ## add replays
113
+ ## Final Replay
114
+ (?:
115
+ Final
116
+ [ ] Replay
117
+ )
118
+ )$}ix
87
119
 
88
120
 
89
121
  def is_round?( text )
@@ -95,9 +127,9 @@ end
95
127
  ##
96
128
  LEG_RE = %r{^
97
129
  # leg1
98
- (?: 1st|First)[ ]leg
130
+ (?: 1st|First)[ ]leg
99
131
  |
100
- # leg2
132
+ # leg2
101
133
  (?: 2nd|Second)[ ]leg
102
134
  $}ix
103
135
 
@@ -10,12 +10,6 @@ def self.debug=(value) @@debug = value; end
10
10
  def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
11
  def debug?() self.class.debug?; end
12
12
 
13
- ## keep typed - why? why not?
14
- ## - used anywhere?
15
- def self.typed=(value) @@typed = value; end
16
- def self.typed?() @@typed ||= true; end ## note: default is TRUE
17
- def typed?() self.class.typed?; end
18
-
19
13
 
20
14
 
21
15
  attr_reader :errors
@@ -58,6 +52,9 @@ def errors?() @errors.size > 0; end
58
52
  ## parse - false (default) - tokenize (only)
59
53
  ## - true - tokenize & parse
60
54
  def read( path, parse: false )
55
+ ## note: every (new) read call - resets errors list to empty
56
+ @errors = []
57
+
61
58
  nodes = OutlineReader.read( path )
62
59
 
63
60
  ## process nodes
@@ -94,9 +91,8 @@ def read( path, parse: false )
94
91
 
95
92
 
96
93
  ## skip new (experimental attrib syntax)
97
- m = nil
98
94
  if attrib_found == false &&
99
- m=ATTRIB_RE.match( line )
95
+ ATTRIB_RE.match?( line )
100
96
  ## note: check attrib regex AFTER group def e.g.:
101
97
  ## Group A:
102
98
  ## Group B: etc.
@@ -0,0 +1,70 @@
1
+
2
+ module SportDb
3
+ class Parser
4
+
5
+ ###
6
+ ## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
7
+ class Opts
8
+
9
+ SEASON_RE = %r{ (?:
10
+ \d{4}-\d{2}
11
+ | \d{4}(--[a-z0-9_-]+)?
12
+ )
13
+ }x
14
+ SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
15
+
16
+
17
+ ## note: if pattern includes directory add here
18
+ ## (otherwise move to more "generic" datafile) - why? why not?
19
+ MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
20
+ #{SEASON}
21
+ /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
22
+ }x
23
+
24
+
25
+ def self.find( path )
26
+ datafiles = []
27
+
28
+ ## note: normalize path - use File.expand_path ??
29
+ ## change all backslash to slash for now
30
+ ## path = path.gsub( "\\", '/' )
31
+ path = File.expand_path( path )
32
+
33
+ ## check all txt files
34
+ ## note: incl. files starting with dot (.)) as candidates
35
+ ## (normally excluded with just *)
36
+ candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
37
+ ## pp candidates
38
+ candidates.each do |candidate|
39
+ datafiles << candidate if MATCH_RE.match( candidate )
40
+ end
41
+
42
+ ## pp datafiles
43
+ datafiles
44
+ end
45
+
46
+
47
+ def self.expand_args( args )
48
+ paths = []
49
+
50
+ args.each do |arg|
51
+ ## check if directory
52
+ if Dir.exist?( arg )
53
+ datafiles = find( arg )
54
+ puts
55
+ puts " found #{datafiles.size} match txt datafiles in #{arg}"
56
+ pp datafiles
57
+ paths += datafiles
58
+ else
59
+ ## assume it's a file
60
+ paths << arg
61
+ end
62
+ end
63
+
64
+ paths
65
+ end
66
+ end # class Opts
67
+
68
+
69
+ end # class Parser
70
+ end # module SportDb
@@ -1,8 +1,4 @@
1
1
 
2
- ###
3
- ## todo/fix - move to sportdb-parser - why? why not? !!!!!!
4
- ##
5
-
6
2
 
7
3
  module SportDb
8
4
 
@@ -10,7 +6,7 @@ class OutlineReader
10
6
 
11
7
  def self.debug=(value) @@debug = value; end
12
8
  def self.debug?() @@debug ||= false; end
13
- def debug?() self.class.debug?; end
9
+ def debug?() self.class.debug?; end
14
10
 
15
11
 
16
12
 
@@ -73,7 +69,7 @@ class OutlineReader
73
69
  start_para = true
74
70
 
75
71
  heading_marker = m[:marker]
76
- heading_level = m[:marker].length ## count number of = for heading level
72
+ heading_level = heading_marker.length ## count number of = for heading level
77
73
  heading = m[:text].strip
78
74
 
79
75
  puts "heading #{heading_level} >#{heading}<" if debug?
@@ -1,24 +1,24 @@
1
- module SportDb
1
+ module SportDb
2
2
  class Parser
3
-
3
+
4
4
 
5
5
  ## transforms
6
6
  ##
7
7
  ## Netherlands 1-2 (1-1) England
8
- ## => text => team
9
- ## score|vs
8
+ ## => text => team
9
+ ## score|vs
10
10
  ## text => team
11
11
 
12
12
 
13
13
  ## token iter/find better name
14
14
  ## e.g. TokenBuffer/Scanner or such ??
15
- class Tokens
15
+ class Tokens
16
16
  def initialize( tokens )
17
17
  @tokens = tokens
18
18
  @pos = 0
19
19
  end
20
20
 
21
- def pos() @pos; end
21
+ def pos() @pos; end
22
22
  def eos?() @pos >= @tokens.size; end
23
23
 
24
24
 
@@ -47,17 +47,17 @@ class Tokens
47
47
  ## return token type (e.g. :text, :num, etc.)
48
48
  def cur() peek(0); end
49
49
  ## return content (assumed to be text)
50
- def text(offset=0)
50
+ def text(offset=0)
51
51
  ## raise error - why? why not?
52
52
  ## return nil?
53
53
  if peek( offset ) != :text
54
54
  raise ArgumentError, "text(#{offset}) - token not a text type"
55
55
  end
56
- @tokens[@pos+offset][1]
56
+ @tokens[@pos+offset][1]
57
57
  end
58
58
 
59
59
 
60
- def peek(offset=1)
60
+ def peek(offset=1)
61
61
  ## return nil if eos
62
62
  if @pos+offset >= @tokens.size
63
63
  nil
@@ -66,7 +66,7 @@ class Tokens
66
66
  end
67
67
  end
68
68
 
69
- ## note - returns complete token
69
+ ## note - returns complete token
70
70
  def next
71
71
  # if @pos >= @tokens.size
72
72
  # raise ArgumentError, "end of array - #{@pos} >= #{@tokens.size}"
@@ -81,7 +81,7 @@ class Tokens
81
81
  def collect( &blk )
82
82
  tokens = []
83
83
  loop do
84
- break if eos?
84
+ break if eos?
85
85
  tokens << if block_given?
86
86
  blk.call( self.next )
87
87
  else
@@ -106,7 +106,7 @@ def parse_with_errors( line, debug: false )
106
106
  errors += token_errors
107
107
 
108
108
  #############
109
- ## pass 1
109
+ ## pass 1
110
110
  ## replace all texts with keyword matches (e.g. group, round, leg, etc.)
111
111
  tokens = tokens.map do |t|
112
112
  if t[0] == :text
@@ -129,24 +129,40 @@ def parse_with_errors( line, debug: false )
129
129
  ## puts "tokens:"
130
130
  ## pp tokens
131
131
 
132
- ## transform tokens into (parse tree/ast) nodes
132
+ ## transform tokens into (parse tree/ast) nodes
133
133
  nodes = []
134
-
134
+
135
135
  buf = Tokens.new( tokens )
136
136
  ## pp buf
137
137
 
138
138
 
139
- loop do
140
- if buf.pos == 0
141
- ## check for
142
- ## group def or round def
143
- if buf.match?( :round, :'|' ) ## assume round def (change round to round_def)
139
+ loop do
140
+ break if buf.eos?
141
+
142
+ ## simplify - remove separator for round + leg pair
143
+ ## e.g. Round of 16, 1st Leg
144
+ ## allow Round of 16 - 1st Leg too - why? why not?
145
+ if buf.match?( :round, [:',', :'|',
146
+ :'-',
147
+ :vs, ### fix - change parser to issue :'-' only for (-) not :vs!!!
148
+ ], :leg )
149
+ nodes << [:round, buf.next[1]]
150
+ buf.next ## swallow separator
151
+ nodes << [:leg, buf.next[1]]
152
+ next
153
+ end
154
+
155
+
156
+ if buf.pos == 0 ## MUST start line
157
+ ## check for
158
+ ## group def or round def
159
+ if buf.match?( :round, :'|', [:date, :duration] ) ## assume round def (change round to round_def)
144
160
  nodes << [:round_def, buf.next[1]]
145
161
  buf.next ## swallow pipe
146
162
  nodes += buf.collect
147
163
  break
148
164
  end
149
- if buf.match?( :group, :'|' ) ## assume group def (change group to group_def)
165
+ if buf.match?( :group, :'|', :text ) ## assume group def (change group to group_def)
150
166
  nodes << [:group_def, buf.next[1]]
151
167
  buf.next ## swallow pipe
152
168
  ## change all text to team
@@ -154,11 +170,15 @@ def parse_with_errors( line, debug: false )
154
170
  t[0] == :text ? [:team, t[1]] : t
155
171
  }
156
172
  break
157
- end
173
+ end
158
174
  end
159
175
 
160
176
 
161
- if buf.match?( :text, [:score, :vs], :text )
177
+ if buf.match?( :text, :'-', :text ) ## hacky? convert "generic" :- to :vs
178
+ nodes << [:team, buf.next[1]] ## keep this rule/option - why? why not?
179
+ nodes << [:vs]
180
+ nodes << [:team, buf.next[1]]
181
+ elsif buf.match?( :text, [:score, :vs], :text )
162
182
  nodes << [:team, buf.next[1]]
163
183
  nodes << buf.next
164
184
  nodes << [:team, buf.next[1]]
@@ -170,14 +190,12 @@ def parse_with_errors( line, debug: false )
170
190
  ## only change text to geo
171
191
  nodes += buf.collect { |t|
172
192
  t[0] == :text ? [:geo, t[1]] : t
173
- }
193
+ }
174
194
  break
175
195
  else
176
196
  ## pass through
177
197
  nodes << buf.next
178
198
  end
179
-
180
- break if buf.eos?
181
199
  end
182
200
 
183
201
  [nodes,errors]
@@ -192,5 +210,5 @@ end
192
210
 
193
211
 
194
212
  end # class Parser
195
- end # module SportDb
196
-
213
+ end # module SportDb
214
+
@@ -37,22 +37,24 @@ def self.build_names( lines )
37
37
  end
38
38
 
39
39
 
40
+ def self.build_map( lines, downcase: false )
41
+ ## note: downcase name!!!
42
+ ## build a lookup map that maps the word to the index (line no) plus 1 e.g.
43
+ ## {"january" => 1, "jan" => 1,
44
+ ## "february" => 2, "feb" => 2,
45
+ ## "march" => 3, "mar" => 3,
46
+ ## "april" => 4, "apr" => 4,
47
+ ## "may" => 5,
48
+ ## "june" => 6, "jun" => 6, ...
49
+ lines.each_with_index.reduce( {} ) do |h,(line,i)|
50
+ line.each do |name|
51
+ h[ downcase ? name.downcase : name ] = i+1
52
+ end ## note: start mapping with 1 (and NOT zero-based, that is, 0)
53
+ h
54
+ end
55
+ end
56
+
40
57
 
41
- ## add normalize option (for downcase) - why? why not?
42
- def self.build_map( lines )
43
- ## note: downcase name!!!
44
- ## build a lookup map that maps the word to the index (line no) plus 1 e.g.
45
- ## {"january" => 1, "jan" => 1,
46
- ## "february" => 2, "feb" => 2,
47
- ## "march" => 3, "mar" => 3,
48
- ## "april" => 4, "apr" => 4,
49
- ## "may" => 5,
50
- ## "june" => 6, "jun" => 6, ...
51
- lines.each_with_index.reduce( {} ) do |h,(line,i)|
52
- line.each { |name| h[ name.downcase ] = i+1 } ## note: start mapping with 1 (and NOT zero-based, that is, 0)
53
- h
54
- end
55
- end
56
58
 
57
59
 
58
60
  MONTH_LINES = parse_names( <<TXT )
@@ -72,7 +74,7 @@ TXT
72
74
 
73
75
  MONTH_NAMES = build_names( MONTH_LINES )
74
76
  # pp MONTH_NAMES
75
- MONTH_MAP = build_map( MONTH_LINES )
77
+ MONTH_MAP = build_map( MONTH_LINES, downcase: true )
76
78
  # pp MONTH_MAP
77
79
 
78
80
 
@@ -89,7 +91,7 @@ TXT
89
91
 
90
92
  DAY_NAMES = build_names( DAY_LINES )
91
93
  # pp DAY_NAMES
92
- DAY_MAP = build_map( DAY_LINES )
94
+ DAY_MAP = build_map( DAY_LINES, downcase: true )
93
95
  # pp DAY_MAP
94
96
 
95
97
 
@@ -1,6 +1,6 @@
1
1
 
2
2
 
3
- module SportDb
3
+ module SportDb
4
4
  class Parser
5
5
 
6
6
 
@@ -15,7 +15,7 @@ TIME_RE = %r{
15
15
  (?: :|\.|h )
16
16
  (?<minute>\d{2})
17
17
  \b
18
- )
18
+ )
19
19
  }ix
20
20
 
21
21
 
@@ -28,7 +28,7 @@ TIME_RE = %r{
28
28
  # (CEST/UTC+2) - central european summer time - daylight saving time (DST).
29
29
  # (EET/UTC+1) - eastern european time
30
30
  # (EEST/UTC+2) - eastern european summer time - daylight saving time (DST).
31
- #
31
+ #
32
32
  # UTC+3
33
33
  # UTC+4
34
34
  # UTC+0
@@ -45,7 +45,7 @@ TIME_RE = %r{
45
45
 
46
46
  TIMEZONE_RE = %r{
47
47
  ## e.g. (UTC-2) or (CEST/UTC-2) etc.
48
- (?<timezone>
48
+ (?<timezone>
49
49
  \(
50
50
  ## optional "local" timezone name eg. BRT or CEST etc.
51
51
  (?: [a-z]+
@@ -63,28 +63,28 @@ TIMEZONE_RE = %r{
63
63
 
64
64
  BASICS_RE = %r{
65
65
  ## e.g. (51) or (1) etc. - limit digits of number???
66
- (?<num> \( (?<value>\d+) \) )
66
+ (?<num> \( (?<value>\d+) \) )
67
67
  |
68
- (?<vs>
69
- (?<=[ ]) # Positive lookbehind for space
70
- (?:
68
+ (?<vs>
69
+ (?<=[ ]) # Positive lookbehind for space
70
+ (?:
71
71
  vs\.?| ## allow optional dot (eg. vs. v.)
72
72
  v\.?|
73
73
  -
74
74
  ) # not bigger match first e.g. vs than v etc.
75
75
  (?=[ ]) # positive lookahead for space
76
- )
77
- |
76
+ )
77
+ |
78
78
  (?<none>
79
- (?<=[ \[]|^) # Positive lookbehind for space or [
79
+ (?<=[ \[]|^) # Positive lookbehind for space or [
80
80
  -
81
81
  (?=[ ]*;) # positive lookahead for space
82
82
  )
83
83
  |
84
84
  (?<spaces> [ ]{2,}) |
85
- (?<space> [ ])
85
+ (?<space> [ ])
86
86
  |
87
- (?<sym>[;,@|\[\]])
87
+ (?<sym>[;,@|\[\]])
88
88
  }ix
89
89
 
90
90
 
@@ -94,13 +94,13 @@ MINUTE_RE = %r{
94
94
  (?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
95
95
  (?: \+
96
96
  (?<value2>\d{1,3})
97
- )?
97
+ )?
98
98
  ' ## must have minute marker!!!!
99
99
  )
100
100
  }ix
101
101
 
102
102
 
103
- ## (match) status
103
+ ## (match) status
104
104
  ## note: english usage - cancelled (in UK), canceled (in US)
105
105
  ##
106
106
  ## add more variants - why? why not?
@@ -115,30 +115,30 @@ STATUS_RE = %r{
115
115
  |
116
116
  postponed
117
117
  |
118
- awarded|awd\.
118
+ awarded|awd\.
119
119
  |
120
- replay
120
+ replay
121
121
  )
122
122
  (?=[ \]]|$)
123
123
  )}ix
124
124
 
125
125
  ## todo/check: remove loakahead assertion here - why require space?
126
- ## note: \b works only after non-alphanum
127
- ## to make it work with awd. (dot) "custom" lookahead neeeded
126
+ ## note: \b works only after non-alphanum
127
+ ## to make it work with awd. (dot) "custom" lookahead neeeded
128
128
 
129
129
 
130
130
  ## goal types
131
- # (pen.) or (pen) or (p.) or (p)
131
+ # (pen.) or (pen) or (p.) or (p)
132
132
  ## (o.g.) or (og)
133
133
  GOAL_PEN_RE = %r{
134
- (?<pen> \(
135
- (?:pen|p)\.?
134
+ (?<pen> \(
135
+ (?:pen|p)\.?
136
136
  \)
137
137
  )
138
138
  }ix
139
139
  GOAL_OG_RE = %r{
140
- (?<og> \(
141
- (?:og|o\.g\.)
140
+ (?<og> \(
141
+ (?:og|o\.g\.)
142
142
  \)
143
143
  )
144
144
  }ix
@@ -158,11 +158,11 @@ RE = Regexp.union( STATUS_RE,
158
158
 
159
159
 
160
160
  def log( msg )
161
- ## append msg to ./logs.txt
161
+ ## append msg to ./logs.txt
162
162
  ## use ./errors.txt - why? why not?
163
163
  File.open( './logs.txt', 'a:utf-8' ) do |f|
164
164
  f.write( msg )
165
- f.write( "\n" )
165
+ f.write( "\n" )
166
166
  end
167
167
  end
168
168
 
@@ -176,7 +176,7 @@ def tokenize_with_errors( line, typed: false,
176
176
  puts ">#{line}<" if debug
177
177
 
178
178
  pos = 0
179
- ## track last offsets - to report error on no match
179
+ ## track last offsets - to report error on no match
180
180
  ## or no match in end of string
181
181
  offsets = [0,0]
182
182
  m = nil
@@ -184,7 +184,7 @@ def tokenize_with_errors( line, typed: false,
184
184
  while m = RE.match( line, pos )
185
185
  if debug
186
186
  pp m
187
- puts "pos: #{pos}"
187
+ puts "pos: #{pos}"
188
188
  end
189
189
  offsets = [m.begin(0), m.end(0)]
190
190
 
@@ -213,10 +213,10 @@ def tokenize_with_errors( line, typed: false,
213
213
  elsif m[:spaces]
214
214
  ## skip spaces
215
215
  nil
216
- elsif m[:text]
216
+ elsif m[:text]
217
217
  [:text, m[:text]] ## keep pos - why? why not?
218
218
  elsif m[:status] ## (match) status e.g. cancelled, awarded, etc.
219
- [:status, m[:status]]
219
+ [:status, m[:status]]
220
220
  elsif m[:time]
221
221
  if typed
222
222
  ## unify to iso-format
@@ -230,7 +230,7 @@ def tokenize_with_errors( line, typed: false,
230
230
  if (hour >= 0 && hour <= 24) &&
231
231
  (minute >=0 && minute <= 59)
232
232
  ## note - for debugging keep (pass along) "literal" time
233
- ## might use/add support for am/pm later
233
+ ## might use/add support for am/pm later
234
234
  [:time, m[:time], {h:hour,m:minute}]
235
235
  else
236
236
  raise ArgumentError, "parse error - time >#{m[:time]}< out-of-range"
@@ -241,54 +241,68 @@ def tokenize_with_errors( line, typed: false,
241
241
  elsif m[:date]
242
242
  if typed
243
243
  date = {}
244
- =begin
244
+ =begin
245
245
  ((?<day_name>#{DAY_NAMES})
246
246
  [ ]
247
- )?
247
+ )?
248
248
  (?<month_name>#{MONTH_NAMES})
249
249
  (?: \/|[ ] )
250
250
  (?<day>\d{1,2})
251
251
  ## optional year
252
252
  ( [ ]
253
253
  (?<year>\d{4})
254
- )?
254
+ )?
255
255
  =end
256
256
  ## map month names
257
257
  ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
258
- date[:y] = m[:year].to_i(10) if m[:year]
258
+ date[:y] = m[:year].to_i(10) if m[:year]
259
259
  date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
260
260
  date[:d] = m[:day].to_i(10) if m[:day]
261
261
  date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
262
- ## note - for debugging keep (pass along) "literal" date
263
- [:date, m[:date], date]
262
+ ## note - for debugging keep (pass along) "literal" date
263
+ [:date, m[:date], date]
264
264
  else
265
265
  [:date, m[:date]]
266
266
  end
267
267
  elsif m[:timezone]
268
268
  [:timezone, m[:timezone]]
269
269
  elsif m[:duration]
270
- [:duration, m[:duration]]
270
+ if typed
271
+ duration = { start: {}, end: {}}
272
+ duration[:start][:y] = m[:year1].to_i(10) if m[:year1]
273
+ duration[:start][:m] = MONTH_MAP[ m[:month_name1].downcase ] if m[:month_name1]
274
+ duration[:start][:d] = m[:day1].to_i(10) if m[:day1]
275
+ duration[:start][:wday] = DAY_MAP[ m[:day_name1].downcase ] if m[:day_name1]
276
+ duration[:end][:y] = m[:year2].to_i(10) if m[:year2]
277
+ duration[:end][:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
278
+ duration[:end][:d] = m[:day2].to_i(10) if m[:day2]
279
+ duration[:end][:wday] = DAY_MAP[ m[:day_name2].downcase ] if m[:day_name2]
280
+ ## note - for debugging keep (pass along) "literal" duration
281
+ [:duration, m[:duration], duration]
282
+ else
283
+ [:duration, m[:duration]]
284
+ end
271
285
  elsif m[:num]
272
286
  if typed
273
287
  ## note - strip enclosing () and convert to integer
274
288
  [:num, m[:value].to_i(10)]
275
- else
289
+ else
276
290
  [:num, m[:num]]
277
291
  end
278
292
  elsif m[:score]
279
293
  if typed
280
294
  score = {}
281
295
  ## check for pen
282
- score[:p] = [m[:p1].to_i(10),
296
+ score[:p] = [m[:p1].to_i(10),
283
297
  m[:p2].to_i(10)] if m[:p1] && m[:p2]
284
- score[:et] = [m[:et1].to_i(10),
298
+ score[:et] = [m[:et1].to_i(10),
285
299
  m[:et2].to_i(10)] if m[:et1] && m[:et2]
286
- score[:ft] = [m[:ft1].to_i(10),
300
+ score[:ft] = [m[:ft1].to_i(10),
287
301
  m[:ft2].to_i(10)] if m[:ft1] && m[:ft2]
288
- score[:ht] = [m[:ht1].to_i(10),
302
+ score[:ht] = [m[:ht1].to_i(10),
289
303
  m[:ht2].to_i(10)] if m[:ht1] && m[:ht2]
290
304
 
291
- ## note - for debugging keep (pass along) "literal" score
305
+ ## note - for debugging keep (pass along) "literal" score
292
306
  [:score, m[:score], score]
293
307
  else
294
308
  [:score, m[:score]]
@@ -298,7 +312,7 @@ def tokenize_with_errors( line, typed: false,
298
312
  minute = {}
299
313
  minute[:m] = m[:value].to_i(10)
300
314
  minute[:offset] = m[:value2].to_i(10) if m[:value2]
301
- ## note - for debugging keep (pass along) "literal" minute
315
+ ## note - for debugging keep (pass along) "literal" minute
302
316
  [:minute, m[:minute], minute]
303
317
  else
304
318
  [:minute, m[:minute]]
@@ -318,16 +332,16 @@ def tokenize_with_errors( line, typed: false,
318
332
  when ',' then [:',']
319
333
  when ';' then [:';']
320
334
  when '@' then [:'@']
321
- when '|' then [:'|']
335
+ when '|' then [:'|']
322
336
  else
323
337
  nil ## ignore others (e.g. brackets [])
324
338
  end
325
339
  else
326
- ## report error
340
+ ## report error
327
341
  nil
328
342
  end
329
343
 
330
- tokens << t if t
344
+ tokens << t if t
331
345
 
332
346
  if debug
333
347
  print ">"
@@ -346,7 +360,7 @@ def tokenize_with_errors( line, typed: false,
346
360
  end
347
361
 
348
362
 
349
- [tokens,errors]
363
+ [tokens,errors]
350
364
  end
351
365
 
352
366
 
@@ -360,5 +374,4 @@ end
360
374
 
361
375
 
362
376
  end # class Parser
363
- end # module SportDb
364
-
377
+ end # module SportDb
@@ -0,0 +1,24 @@
1
+
2
+ module SportDb
3
+ module Module
4
+ module Parser
5
+ MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
+ MINOR = 2
7
+ PATCH = 0
8
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
9
+
10
+ def self.version
11
+ VERSION
12
+ end
13
+
14
+ def self.banner
15
+ "sportdb-parser/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
16
+ end
17
+
18
+ def self.root
19
+ File.expand_path( File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) )
20
+ end
21
+
22
+ end # module Parser
23
+ end
24
+ end
@@ -1,3 +1,7 @@
1
+ ## pulls in
2
+ require 'cocos'
3
+ require 'season/formats' # e.g. Season() support machinery
4
+
1
5
 
2
6
 
3
7
  ####
@@ -11,7 +15,7 @@
11
15
  ## text - change text to name - why? why not?
12
16
 
13
17
 
14
-
18
+ require_relative 'parser/version'
15
19
  require_relative 'parser/token-score'
16
20
  require_relative 'parser/token-date'
17
21
  require_relative 'parser/token-text'
@@ -23,6 +27,7 @@ require_relative 'parser/parser'
23
27
  ## more
24
28
  require_relative 'parser/outline_reader'
25
29
  require_relative 'parser/linter'
30
+ require_relative 'parser/opts'
26
31
 
27
32
 
28
33
  ###
@@ -31,7 +36,7 @@ require_relative 'parser/linter'
31
36
  =begin
32
37
  module SportDb
33
38
  def self.parser() @@parser ||= Parser.new; end
34
- def self.parse( ... )
39
+ def self.parse( ... )
35
40
  end
36
41
  def self.tokenize( ... )
37
42
  end
@@ -39,6 +44,5 @@ end # module SportDb
39
44
  =end
40
45
 
41
46
 
42
-
43
-
47
+ puts SportDb::Module::Parser.banner # say hello
44
48
 
metadata CHANGED
@@ -1,15 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-14 00:00:00.000000000 Z
11
+ date: 2024-08-22 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: cocos
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.4.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.4.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: season-formats
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
13
41
  - !ruby/object:Gem::Dependency
14
42
  name: rdoc
15
43
  requirement: !ruby/object:Gem::Requirement
@@ -62,12 +90,14 @@ files:
62
90
  - lib/sportdb/parser.rb
63
91
  - lib/sportdb/parser/lang.rb
64
92
  - lib/sportdb/parser/linter.rb
93
+ - lib/sportdb/parser/opts.rb
65
94
  - lib/sportdb/parser/outline_reader.rb
66
95
  - lib/sportdb/parser/parser.rb
67
96
  - lib/sportdb/parser/token-date.rb
68
97
  - lib/sportdb/parser/token-score.rb
69
98
  - lib/sportdb/parser/token-text.rb
70
99
  - lib/sportdb/parser/token.rb
100
+ - lib/sportdb/parser/version.rb
71
101
  homepage: https://github.com/sportdb/sport.db
72
102
  licenses:
73
103
  - Public Domain