sportdb-parser 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +17 -4
- data/lib/sportdb/parser/lexer-on_goal.rb +172 -0
- data/lib/sportdb/parser/lexer-on_group_def.rb +31 -0
- data/lib/sportdb/parser/lexer-on_prop_lineup.rb +79 -0
- data/lib/sportdb/parser/lexer-on_prop_misc.rb +110 -0
- data/lib/sportdb/parser/lexer-on_prop_penalties.rb +40 -0
- data/lib/sportdb/parser/lexer-on_round_def.rb +37 -0
- data/lib/sportdb/parser/lexer-on_top.rb +125 -0
- data/lib/sportdb/parser/lexer-prep_doc.rb +131 -0
- data/lib/sportdb/parser/lexer-prep_line.rb +63 -0
- data/lib/sportdb/parser/lexer-tokenize.rb +449 -0
- data/lib/sportdb/parser/lexer.rb +133 -1363
- data/lib/sportdb/parser/lexer_buffer.rb +8 -37
- data/lib/sportdb/parser/lexer_token.rb +126 -0
- data/lib/sportdb/parser/parser.rb +1104 -1403
- data/lib/sportdb/parser/racc_parser.rb +36 -32
- data/lib/sportdb/parser/racc_tree.rb +65 -98
- data/lib/sportdb/parser/token-date--helpers.rb +130 -0
- data/lib/sportdb/parser/token-date--names.rb +108 -0
- data/lib/sportdb/parser/token-date.rb +20 -192
- data/lib/sportdb/parser/token-date_duration.rb +8 -27
- data/lib/sportdb/parser/token-geo.rb +16 -16
- data/lib/sportdb/parser/token-goals--helpers.rb +114 -0
- data/lib/sportdb/parser/token-goals.rb +103 -249
- data/lib/sportdb/parser/token-group.rb +8 -22
- data/lib/sportdb/parser/token-prop.rb +138 -124
- data/lib/sportdb/parser/token-prop_name.rb +48 -39
- data/lib/sportdb/parser/token-round.rb +21 -35
- data/lib/sportdb/parser/token-score--helpers.rb +189 -0
- data/lib/sportdb/parser/token-score.rb +9 -393
- data/lib/sportdb/parser/token-score_full.rb +331 -0
- data/lib/sportdb/parser/token-status.rb +44 -46
- data/lib/sportdb/parser/token-status_inline.rb +112 -0
- data/lib/sportdb/parser/token-text.rb +41 -31
- data/lib/sportdb/parser/token-time.rb +29 -26
- data/lib/sportdb/parser/token.rb +58 -159
- data/lib/sportdb/parser/version.rb +1 -1
- data/lib/sportdb/parser.rb +45 -17
- metadata +19 -6
- data/lib/sportdb/parser/blocktxt.rb +0 -99
- data/lib/sportdb/parser/lexer_tty.rb +0 -111
- data/lib/sportdb/parser/token-table.rb +0 -149
- data/lib/sportdb/parser/token_helpers.rb +0 -92
data/lib/sportdb/parser.rb
CHANGED
|
@@ -1,52 +1,80 @@
|
|
|
1
|
-
## pulls in
|
|
2
1
|
require 'cocos'
|
|
3
2
|
|
|
4
3
|
|
|
5
|
-
####
|
|
6
|
-
# try a (simple) tokenizer/parser with regex
|
|
7
4
|
|
|
8
|
-
## note - match line-by-line
|
|
9
|
-
# avoid massive backtracking by definition
|
|
10
|
-
# that is, making it impossible
|
|
11
5
|
|
|
12
|
-
|
|
13
|
-
## text - change text to name - why? why not?
|
|
6
|
+
require_relative 'parser/version'
|
|
14
7
|
|
|
15
8
|
|
|
16
|
-
|
|
9
|
+
|
|
10
|
+
## core machinery
|
|
17
11
|
|
|
18
12
|
##
|
|
19
|
-
##
|
|
20
|
-
|
|
13
|
+
## add shared/most basic regexes here
|
|
14
|
+
## todo - use ANY_RE to token_commons or such - for shared by many?
|
|
15
|
+
module SportDb
|
|
16
|
+
class Lexer
|
|
17
|
+
|
|
18
|
+
## general catch-all (RECOMMENDED (ALWAYS) use as last entry in union)
|
|
19
|
+
## to avoid advance of pos match!!!
|
|
20
|
+
ANY_RE = %r{
|
|
21
|
+
(?<any> .)
|
|
22
|
+
}ix
|
|
23
|
+
|
|
24
|
+
SPACES_RE = %r{
|
|
25
|
+
(?<spaces> [ ]{2,})
|
|
26
|
+
| (?<space> [ ])
|
|
27
|
+
}ix
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
end # class Lexer
|
|
31
|
+
end # module SportDb
|
|
21
32
|
|
|
22
|
-
## core machinery
|
|
23
33
|
|
|
24
34
|
require_relative 'parser/token-score'
|
|
35
|
+
require_relative 'parser/token-score_full'
|
|
25
36
|
require_relative 'parser/token-score_fuller'
|
|
26
37
|
require_relative 'parser/token-score_legs'
|
|
38
|
+
require_relative 'parser/token-score--helpers'
|
|
39
|
+
|
|
27
40
|
require_relative 'parser/token-time'
|
|
41
|
+
require_relative 'parser/token-date--names'
|
|
28
42
|
require_relative 'parser/token-date'
|
|
29
43
|
require_relative 'parser/token-date_duration'
|
|
44
|
+
require_relative 'parser/token-date--helpers'
|
|
45
|
+
|
|
30
46
|
require_relative 'parser/token-text'
|
|
47
|
+
require_relative 'parser/token-prop' ## team prop(erty) mode (note - must be before token)
|
|
31
48
|
require_relative 'parser/token-prop_name' ## a.k.a token-text_ii
|
|
32
49
|
require_relative 'parser/token-status'
|
|
50
|
+
require_relative 'parser/token-status_inline'
|
|
33
51
|
require_relative 'parser/token-note'
|
|
34
52
|
require_relative 'parser/token-goals'
|
|
35
|
-
require_relative 'parser/token-
|
|
53
|
+
require_relative 'parser/token-goals--helpers'
|
|
36
54
|
require_relative 'parser/token-geo'
|
|
37
55
|
require_relative 'parser/token-group'
|
|
38
56
|
require_relative 'parser/token-round'
|
|
39
|
-
require_relative 'parser/token-table'
|
|
40
57
|
require_relative 'parser/token'
|
|
41
58
|
|
|
42
59
|
|
|
43
|
-
|
|
44
|
-
require_relative 'parser/token_helpers'
|
|
60
|
+
|
|
45
61
|
|
|
46
62
|
|
|
47
63
|
require_relative 'parser/lexer_buffer' ## incl. Tokens (aka TokenBuffer)
|
|
64
|
+
|
|
65
|
+
require_relative 'parser/lexer-prep_doc'
|
|
66
|
+
require_relative 'parser/lexer-prep_line'
|
|
67
|
+
|
|
68
|
+
require_relative 'parser/lexer_token'
|
|
69
|
+
require_relative 'parser/lexer-tokenize'
|
|
70
|
+
require_relative 'parser/lexer-on_round_def'
|
|
71
|
+
require_relative 'parser/lexer-on_group_def'
|
|
72
|
+
require_relative 'parser/lexer-on_prop_misc'
|
|
73
|
+
require_relative 'parser/lexer-on_prop_lineup'
|
|
74
|
+
require_relative 'parser/lexer-on_prop_penalties'
|
|
75
|
+
require_relative 'parser/lexer-on_goal'
|
|
76
|
+
require_relative 'parser/lexer-on_top'
|
|
48
77
|
require_relative 'parser/lexer'
|
|
49
|
-
require_relative 'parser/lexer_tty' ## teletype (tty) mode
|
|
50
78
|
|
|
51
79
|
|
|
52
80
|
## note - use "embeded" racc parser runtime
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: sportdb-parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.7.
|
|
4
|
+
version: 0.7.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Gerald Bauer
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-06-08 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: cocos
|
|
@@ -72,32 +72,45 @@ files:
|
|
|
72
72
|
- README.md
|
|
73
73
|
- Rakefile
|
|
74
74
|
- lib/sportdb/parser.rb
|
|
75
|
-
- lib/sportdb/parser/
|
|
75
|
+
- lib/sportdb/parser/lexer-on_goal.rb
|
|
76
|
+
- lib/sportdb/parser/lexer-on_group_def.rb
|
|
77
|
+
- lib/sportdb/parser/lexer-on_prop_lineup.rb
|
|
78
|
+
- lib/sportdb/parser/lexer-on_prop_misc.rb
|
|
79
|
+
- lib/sportdb/parser/lexer-on_prop_penalties.rb
|
|
80
|
+
- lib/sportdb/parser/lexer-on_round_def.rb
|
|
81
|
+
- lib/sportdb/parser/lexer-on_top.rb
|
|
82
|
+
- lib/sportdb/parser/lexer-prep_doc.rb
|
|
83
|
+
- lib/sportdb/parser/lexer-prep_line.rb
|
|
84
|
+
- lib/sportdb/parser/lexer-tokenize.rb
|
|
76
85
|
- lib/sportdb/parser/lexer.rb
|
|
77
86
|
- lib/sportdb/parser/lexer_buffer.rb
|
|
78
|
-
- lib/sportdb/parser/
|
|
87
|
+
- lib/sportdb/parser/lexer_token.rb
|
|
79
88
|
- lib/sportdb/parser/parser.rb
|
|
80
89
|
- lib/sportdb/parser/parser_runtime.rb
|
|
81
90
|
- lib/sportdb/parser/racc_parser.rb
|
|
82
91
|
- lib/sportdb/parser/racc_tree.rb
|
|
92
|
+
- lib/sportdb/parser/token-date--helpers.rb
|
|
93
|
+
- lib/sportdb/parser/token-date--names.rb
|
|
83
94
|
- lib/sportdb/parser/token-date.rb
|
|
84
95
|
- lib/sportdb/parser/token-date_duration.rb
|
|
85
96
|
- lib/sportdb/parser/token-geo.rb
|
|
97
|
+
- lib/sportdb/parser/token-goals--helpers.rb
|
|
86
98
|
- lib/sportdb/parser/token-goals.rb
|
|
87
99
|
- lib/sportdb/parser/token-group.rb
|
|
88
100
|
- lib/sportdb/parser/token-note.rb
|
|
89
101
|
- lib/sportdb/parser/token-prop.rb
|
|
90
102
|
- lib/sportdb/parser/token-prop_name.rb
|
|
91
103
|
- lib/sportdb/parser/token-round.rb
|
|
104
|
+
- lib/sportdb/parser/token-score--helpers.rb
|
|
92
105
|
- lib/sportdb/parser/token-score.rb
|
|
106
|
+
- lib/sportdb/parser/token-score_full.rb
|
|
93
107
|
- lib/sportdb/parser/token-score_fuller.rb
|
|
94
108
|
- lib/sportdb/parser/token-score_legs.rb
|
|
95
109
|
- lib/sportdb/parser/token-status.rb
|
|
96
|
-
- lib/sportdb/parser/token-
|
|
110
|
+
- lib/sportdb/parser/token-status_inline.rb
|
|
97
111
|
- lib/sportdb/parser/token-text.rb
|
|
98
112
|
- lib/sportdb/parser/token-time.rb
|
|
99
113
|
- lib/sportdb/parser/token.rb
|
|
100
|
-
- lib/sportdb/parser/token_helpers.rb
|
|
101
114
|
- lib/sportdb/parser/version.rb
|
|
102
115
|
homepage: https://github.com/sportdb/sport.db
|
|
103
116
|
licenses:
|
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
###
|
|
2
|
-
# generic block text/txt helper
|
|
3
|
-
|
|
4
|
-
## todo/chekc - find a better name SectTxt or ??
|
|
5
|
-
|
|
6
|
-
class BlockTxt
|
|
7
|
-
|
|
8
|
-
def self.parse( txt ) new( txt); end
|
|
9
|
-
def self.read( path ) parse( read_text( path )); end
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def initialize( txt )
|
|
13
|
-
@sects = _parse( txt )
|
|
14
|
-
self
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
def size() @sects.size; end
|
|
18
|
-
|
|
19
|
-
def each( &blk ) @sects.each( &blk ); end
|
|
20
|
-
def each_with_index( &blk ) @sects.each_with_index( &blk ); end
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def text
|
|
24
|
-
## only get all txt1 parts joined as single all-in-one string txt
|
|
25
|
-
@sects.map {|sect| sect[0] }.join( "\n" )
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def dump ## for debugging
|
|
30
|
-
puts "==> sects (#{@sects.size}):"
|
|
31
|
-
pp @sects
|
|
32
|
-
puts " #{@sects.size} sect(s)"
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
##
|
|
37
|
-
# quick support for __END__
|
|
38
|
-
END_RE = %r{ ^
|
|
39
|
-
[ ]* __END__ [ ]*
|
|
40
|
-
.*?
|
|
41
|
-
\z ## note - until end-of-string/file !!!
|
|
42
|
-
}mx
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
## SECT_RE (old) = %r{^
|
|
46
|
-
## [ ]* --- [ ]*
|
|
47
|
-
## $}x
|
|
48
|
-
##
|
|
49
|
-
## do NOT use --- (used in fbtxt and markdown and yaml etc.)
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
## e.g. § or §§ or § § § or such
|
|
53
|
-
## maybe allow :: § :: or such too or --- § --- or such
|
|
54
|
-
SECT_RE = %r{^
|
|
55
|
-
[ ]* §
|
|
56
|
-
([ ]*§)*
|
|
57
|
-
[ ]*
|
|
58
|
-
$}x
|
|
59
|
-
|
|
60
|
-
## split by " => or =====> "
|
|
61
|
-
## todo/check - subsect?? find a better name? in/out or txt1/txt2
|
|
62
|
-
SUBSECT_RE = %r{^
|
|
63
|
-
[ ]*
|
|
64
|
-
=+ >
|
|
65
|
-
[ ]*
|
|
66
|
-
$}x
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def _parse( txt )
|
|
71
|
-
blocks = [] ## note - holds [txt,exp] pairs
|
|
72
|
-
|
|
73
|
-
txt = txt.sub( END_RE, '' )
|
|
74
|
-
|
|
75
|
-
## split by §
|
|
76
|
-
sections = txt.split( SECT_RE )
|
|
77
|
-
|
|
78
|
-
sections.each_with_index do |sect,i|
|
|
79
|
-
## puts ">>> start #{i+1}"
|
|
80
|
-
## pp sect
|
|
81
|
-
## puts "<<< end #{i+1}"
|
|
82
|
-
|
|
83
|
-
txt1, txt2 = sect.split( SUBSECT_RE )
|
|
84
|
-
blocks << [txt1,txt2]
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
blocks
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
end # class BlockTxt
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
###
|
|
96
|
-
# function-style helpers
|
|
97
|
-
|
|
98
|
-
def read_blocktxt( path ); BlockTxt.read( path ); end
|
|
99
|
-
def parse_blocktxt( txt ); BlockTxt.new( txt ); end
|
|
@@ -1,111 +0,0 @@
|
|
|
1
|
-
module SportDb
|
|
2
|
-
class Lexer
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
########
|
|
6
|
-
## experimental teletype mode
|
|
7
|
-
## only space, A-Z and 0-9 allowed
|
|
8
|
-
IS_TTY_LINE_RE = %r{ \A
|
|
9
|
-
## note - use NEGATIVE lookahead to exclude blank lines
|
|
10
|
-
(?! [ ]*\z)
|
|
11
|
-
|
|
12
|
-
[A-Z0-9 ]+
|
|
13
|
-
\z
|
|
14
|
-
}x
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
TTY_SPACES_RE = %r{ (?<spaces> [ ]{2,}) |
|
|
18
|
-
(?<space> [ ])
|
|
19
|
-
}x
|
|
20
|
-
TTY_NUM_RE = %r{ \b (?<num> \d+ ) \b
|
|
21
|
-
}x
|
|
22
|
-
|
|
23
|
-
##
|
|
24
|
-
## note - TEXT for now allows A, 1A, A1, A1A, A1 B1 C1,
|
|
25
|
-
## A1AA1 2B22 3C33
|
|
26
|
-
## - single space only for concat
|
|
27
|
-
## text segments MUST NOT be all numbers e.g. 1, 11, etc.
|
|
28
|
-
TTY_TEXT_RE = %r{ \b (?<text>
|
|
29
|
-
(?:
|
|
30
|
-
[A-Z] ## MUST start with letter
|
|
31
|
-
|
|
|
32
|
-
[0-9]+[A-Z] ## or numbers followed by letter
|
|
33
|
-
)
|
|
34
|
-
[0-9A-Z]*
|
|
35
|
-
(?:
|
|
36
|
-
### allow move segements separated
|
|
37
|
-
## by single space
|
|
38
|
-
[ ]
|
|
39
|
-
(?:
|
|
40
|
-
[A-Z] ## MUST start with letter
|
|
41
|
-
|
|
|
42
|
-
[0-9]+[A-Z] ## or numbers followed by letter
|
|
43
|
-
)
|
|
44
|
-
[0-9A-Z]*
|
|
45
|
-
)*
|
|
46
|
-
)
|
|
47
|
-
\b
|
|
48
|
-
}x
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
TTY_RE = Regexp.union(
|
|
52
|
-
TTY_SPACES_RE,
|
|
53
|
-
TTY_TEXT_RE,
|
|
54
|
-
TTY_NUM_RE,
|
|
55
|
-
## fix add ANY_RE,
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def _tokenize_tty_line( line )
|
|
60
|
-
line = line.strip
|
|
61
|
-
|
|
62
|
-
tokens = []
|
|
63
|
-
|
|
64
|
-
## track last offsets - to report error on no match
|
|
65
|
-
## or no match in end of string
|
|
66
|
-
offsets = [0,0]
|
|
67
|
-
pos = 0
|
|
68
|
-
m = nil
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
while m = TTY_RE.match( line, pos )
|
|
72
|
-
offsets = [m.begin(0), m.end(0)]
|
|
73
|
-
|
|
74
|
-
if offsets[0] != pos
|
|
75
|
-
## match NOT starting at start/begin position!!!
|
|
76
|
-
## report parse error!!!
|
|
77
|
-
msg = "!! WARN - tokenize (tty) error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
|
|
78
|
-
puts msg
|
|
79
|
-
log( msg )
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
pos = offsets[1]
|
|
83
|
-
|
|
84
|
-
t = if m[:spaces] || m[:space]
|
|
85
|
-
nil ## skip spaces
|
|
86
|
-
elsif m[:text]
|
|
87
|
-
[:TTY_TEXT, m[:text]]
|
|
88
|
-
elsif m[:num]
|
|
89
|
-
[:TTY_NUM, m[:num].to_i(10)]
|
|
90
|
-
else
|
|
91
|
-
## report error/raise expection
|
|
92
|
-
puts "!!! TTY TOKENIZE ERROR - no match found"
|
|
93
|
-
nil
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
tokens << t if t
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
## check if no match in end of string
|
|
100
|
-
if offsets[1] != line.size
|
|
101
|
-
msg = "!! WARN - tokenize (tty) error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
|
|
102
|
-
puts msg
|
|
103
|
-
log( msg )
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
tokens
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
end # class Lexer
|
|
110
|
-
end # module SportDb
|
|
111
|
-
|
|
@@ -1,149 +0,0 @@
|
|
|
1
|
-
module SportDb
|
|
2
|
-
class Lexer
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
###
|
|
6
|
-
## check for
|
|
7
|
-
## table (standing) lines
|
|
8
|
-
##
|
|
9
|
-
## e.g.
|
|
10
|
-
##
|
|
11
|
-
## Pld W D L GF-GA Pts | d d d d-d d
|
|
12
|
-
## Pld GF-GA Pts | d d-d d
|
|
13
|
-
## Pld Pts W D L GF-GA | d d d d d d-d
|
|
14
|
-
##
|
|
15
|
-
## Pld = matches played
|
|
16
|
-
## GF-GA = goal for, goal against
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
## Pld W D L GF-GA Pts | d d d d-d d
|
|
20
|
-
##
|
|
21
|
-
## 1.BRAZIL 3 2 1 0 7- 2 7
|
|
22
|
-
## 2.MEXICO 3 2 1 0 4- 1 7
|
|
23
|
-
## 3.Croatia 3 1 0 2 6- 6 3
|
|
24
|
-
## 4.Cameroon 3 0 0 3 1- 9 0
|
|
25
|
-
|
|
26
|
-
## add more headings?? e.g.
|
|
27
|
-
## Final Table:
|
|
28
|
-
##
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
TABLE_HEADING_I_RE = %r{
|
|
32
|
-
\A
|
|
33
|
-
[ ]* ## ignore leading spaces (if any)
|
|
34
|
-
(?<table_heading>
|
|
35
|
-
\b
|
|
36
|
-
P(?:ld)? [ ]+
|
|
37
|
-
W [ ]+
|
|
38
|
-
D [ ]+
|
|
39
|
-
L [ ]+
|
|
40
|
-
Gls [ ]+
|
|
41
|
-
Pts
|
|
42
|
-
\b
|
|
43
|
-
)
|
|
44
|
-
[ ]* ## ignore trailing spaces (if any)
|
|
45
|
-
\z
|
|
46
|
-
}xi
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
##
|
|
50
|
-
## "solid"-style
|
|
51
|
-
## -----------------------------------------------------
|
|
52
|
-
## "dashed"-style ??
|
|
53
|
-
## - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
TABLE_DIVIDER_RE = %r{
|
|
57
|
-
\A
|
|
58
|
-
[ ]* ## ignore leading spaces (if any)
|
|
59
|
-
(?<table_divider>
|
|
60
|
-
(?: --- ## note - require three dashes minimum (---)
|
|
61
|
-
[-]*
|
|
62
|
-
)
|
|
63
|
-
|
|
|
64
|
-
(?: - [ ]+ - [ ]+ - ## note - require three dashes minimum (- - -)
|
|
65
|
-
(?: [ ]+ -)* ## todo/check - restrict spaces to 2 or 3 or such - why? why not?
|
|
66
|
-
)
|
|
67
|
-
)
|
|
68
|
-
[ ]* ## ignore trailing spaces (if any)
|
|
69
|
-
\z
|
|
70
|
-
}xi
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
####
|
|
74
|
-
## 1.SOLOMON I. 1 1 0 0 3- 1 3
|
|
75
|
-
## 2.TAHITI 1 0 0 1 1- 3 0
|
|
76
|
-
## -.Cook Islands withdrew after first match (annulled) due to Covid-19 outbreak in squad
|
|
77
|
-
## -.Vanuatu withdrew before playing any matches due to Covid-19 outbreak in squad -->
|
|
78
|
-
##
|
|
79
|
-
## note - starting with -. is a table note!!!
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
TABLE_NOTE_RE = %r{
|
|
83
|
-
\A
|
|
84
|
-
[ ]* ## ignore leading spaces (if any)
|
|
85
|
-
-\.
|
|
86
|
-
[ ]*
|
|
87
|
-
(?<table_note>
|
|
88
|
-
.+? ## note - use non-greedy
|
|
89
|
-
)
|
|
90
|
-
[ ]* ## ignore trailing spaces (if any)
|
|
91
|
-
\z
|
|
92
|
-
}xi
|
|
93
|
-
|
|
94
|
-
TABLE_I_RE = %r{
|
|
95
|
-
(?<table>\b
|
|
96
|
-
\d{1,2} [ ]+ # Pld
|
|
97
|
-
\d{1,2} [ ]+ # W
|
|
98
|
-
\d{1,2} [ ]+ # D
|
|
99
|
-
\d{1,2} [ ]+ # L
|
|
100
|
-
(?: \d{1,3} - [ ]* \d{1,3} [ ]+ ) # GF-GA
|
|
101
|
-
\d{1,3} # Pts
|
|
102
|
-
\b
|
|
103
|
-
)}xi
|
|
104
|
-
|
|
105
|
-
## Pld Pts W D L GF-GA | d d d d d d-d
|
|
106
|
-
##
|
|
107
|
-
## 1. ARG^ 3 6 3 0 0 10-4
|
|
108
|
-
## 2. CHI 3 4 2 0 1 5-3
|
|
109
|
-
## 3. FRA 3 2 1 0 2 4-3
|
|
110
|
-
## 4. MEX 3 0 0 0 3 4-13
|
|
111
|
-
|
|
112
|
-
TABLE_II_RE = %r{
|
|
113
|
-
(?<table>\b
|
|
114
|
-
\d{1,2} [ ]+ # Pld
|
|
115
|
-
\d{1,3} [ ]+ # Pts
|
|
116
|
-
\d{1,2} [ ]+ # W
|
|
117
|
-
\d{1,2} [ ]+ # D
|
|
118
|
-
\d{1,2} [ ]+ # L
|
|
119
|
-
(?: \d{1,3} - [ ]* \d{1,3}) # GF-GA
|
|
120
|
-
\b
|
|
121
|
-
)}xi
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
#############################################
|
|
126
|
-
# map tables
|
|
127
|
-
# note: order matters; first come-first matched/served
|
|
128
|
-
|
|
129
|
-
## possible start lines for a table
|
|
130
|
-
## excludes NOTE
|
|
131
|
-
## and RULER (e.g. --- or) or such in the future
|
|
132
|
-
TABLE_RE = Regexp.union(
|
|
133
|
-
TABLE_HEADING_I_RE,
|
|
134
|
-
TABLE_I_RE,
|
|
135
|
-
TABLE_II_RE,
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
## all possible continuation for a table
|
|
139
|
-
## excludes HEADING
|
|
140
|
-
TABLE_MORE_RE = Regexp.union(
|
|
141
|
-
TABLE_NOTE_RE,
|
|
142
|
-
TABLE_DIVIDER_RE,
|
|
143
|
-
TABLE_I_RE,
|
|
144
|
-
TABLE_II_RE,
|
|
145
|
-
)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
end # class Lexer
|
|
149
|
-
end # module SportDb
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
module SportDb
|
|
2
|
-
class Lexer
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
=begin
|
|
6
|
-
def self._mk_is( re )
|
|
7
|
-
## add \A ... \z to regex
|
|
8
|
-
## for strict matching of beginning and end of string
|
|
9
|
-
## regex note - \z will NOT allow trailing newline(s)!!!!
|
|
10
|
-
## note - must double espace \\A,\\z in quoted string!!
|
|
11
|
-
Regexp.new( %Q< \\A
|
|
12
|
-
(?:#{re.source})
|
|
13
|
-
\\z
|
|
14
|
-
>, re.options )
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
IS_TEAM_RE = _mk_is( TEXT_RE ) ## todo/fix - rename TEXT_RE to TEAM_RE!!!
|
|
19
|
-
IS_DATE_RE = _mk_is( DATE_IIII_RE ) ## DATE_RE )
|
|
20
|
-
=end
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def self._parse_team( str )
|
|
24
|
-
## note - strip - leading/trailing spaces
|
|
25
|
-
m = TEXT_RE.match( str.strip )
|
|
26
|
-
if m && m.pre_match == '' && m.post_match == ''
|
|
27
|
-
m
|
|
28
|
-
elsif m
|
|
29
|
-
## note - match BUT not anchored to start and end-of-string!!!
|
|
30
|
-
## report, error somehow??
|
|
31
|
-
nil
|
|
32
|
-
else
|
|
33
|
-
nil ## no match - return nil
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def self._parse_date( str )
|
|
39
|
-
## note - strip - leading/trailing spaces
|
|
40
|
-
m = DATE_RE.match( str.strip )
|
|
41
|
-
|
|
42
|
-
#### todo/fix/check:
|
|
43
|
-
### wrapped with \A \z NOT working with union - check later - why?
|
|
44
|
-
### use hand-coded with pre_match = "" and post_match = ""
|
|
45
|
-
|
|
46
|
-
if m && m.pre_match == '' && m.post_match == ''
|
|
47
|
-
## return hash table with captured components
|
|
48
|
-
date = {}
|
|
49
|
-
## map month names
|
|
50
|
-
## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
|
|
51
|
-
date[:y] = m[:year].to_i(10) if m[:year]
|
|
52
|
-
## check - use y too for two-digit year or keep separate - why? why not?
|
|
53
|
-
date[:yy] = m[:yy].to_i(10) if m[:yy] ## two digit year (e.g. 25 or 78 etc.)
|
|
54
|
-
date[:m] = m[:month].to_i(10) if m[:month]
|
|
55
|
-
date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
|
|
56
|
-
date[:d] = m[:day].to_i(10) if m[:day]
|
|
57
|
-
date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
|
|
58
|
-
date
|
|
59
|
-
elsif m
|
|
60
|
-
## note - match BUT not anchored to start and end-of-string!!!
|
|
61
|
-
## report, error somehow??
|
|
62
|
-
nil
|
|
63
|
-
else
|
|
64
|
-
nil ## no match - return nil
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def self._parse_score_full( str )
|
|
70
|
-
## note - strip - leading/trailing spaces
|
|
71
|
-
m=SCORE_FULL_RE.match( str )
|
|
72
|
-
|
|
73
|
-
if m && m.pre_match == '' && m.post_match == ''
|
|
74
|
-
score = {}
|
|
75
|
-
score[:p] = [m[:p1].to_i,m[:p2].to_i] if m[:p1] && m[:p2]
|
|
76
|
-
score[:et] = [m[:et1].to_i,m[:et2].to_i] if m[:et1] && m[:et2]
|
|
77
|
-
score[:ft] = [m[:ft1].to_i,m[:ft2].to_i] if m[:ft1] && m[:ft2]
|
|
78
|
-
score[:ht] = [m[:ht1].to_i,m[:ht2].to_i] if m[:ht1] && m[:ht2]
|
|
79
|
-
## score[:agg] = [m[:agg1].to_i,m[:agg2].to_i] if m[:agg1] && m[:agg2]
|
|
80
|
-
score
|
|
81
|
-
elsif m
|
|
82
|
-
## note - match BUT not anchored to start and end-of-string!!!
|
|
83
|
-
## report, error somehow??
|
|
84
|
-
nil
|
|
85
|
-
else
|
|
86
|
-
nil ## no match - return nil
|
|
87
|
-
end
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
end # class Lexer
|
|
92
|
-
end # module SportDb
|