sportdb-parser 0.6.20 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +14 -8
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/parser/blocktxt.rb +99 -0
  6. data/lib/sportdb/parser/lexer.rb +958 -395
  7. data/lib/sportdb/parser/lexer_buffer.rb +97 -0
  8. data/lib/sportdb/parser/lexer_tty.rb +111 -0
  9. data/lib/sportdb/parser/parser.rb +1768 -855
  10. data/lib/sportdb/parser/racc_parser.rb +1 -1
  11. data/lib/sportdb/parser/racc_tree.rb +327 -41
  12. data/lib/sportdb/parser/token-date.rb +160 -178
  13. data/lib/sportdb/parser/token-date_duration.rb +190 -0
  14. data/lib/sportdb/parser/token-geo.rb +59 -59
  15. data/lib/sportdb/parser/token-goals.rb +460 -0
  16. data/lib/sportdb/parser/token-group.rb +43 -0
  17. data/lib/sportdb/parser/token-note.rb +40 -0
  18. data/lib/sportdb/parser/token-prop.rb +70 -54
  19. data/lib/sportdb/parser/token-prop_name.rb +74 -0
  20. data/lib/sportdb/parser/token-round.rb +102 -0
  21. data/lib/sportdb/parser/token-score.rb +323 -47
  22. data/lib/sportdb/parser/token-score_fuller.rb +435 -0
  23. data/lib/sportdb/parser/token-score_legs.rb +59 -0
  24. data/lib/sportdb/parser/token-status.rb +157 -160
  25. data/lib/sportdb/parser/token-table.rb +149 -0
  26. data/lib/sportdb/parser/token-text.rb +72 -23
  27. data/lib/sportdb/parser/token-time.rb +141 -0
  28. data/lib/sportdb/parser/token.rb +242 -105
  29. data/lib/sportdb/parser/token_helpers.rb +92 -0
  30. data/lib/sportdb/parser/version.rb +2 -2
  31. data/lib/sportdb/parser.rb +24 -2
  32. metadata +18 -18
  33. data/config/rounds_de.txt +0 -125
  34. data/config/rounds_en.txt +0 -29
  35. data/config/rounds_es.txt +0 -26
  36. data/config/rounds_misc.txt +0 -25
  37. data/config/rounds_pt.txt +0 -4
  38. data/config/zones_en.txt +0 -20
  39. data/lib/sportdb/parser/lang.rb +0 -298
  40. data/lib/sportdb/parser/token-minute.rb +0 -205
@@ -0,0 +1,97 @@
1
+
2
+ module SportDb
3
+
4
+ ## note - Tokens was placed inside Lexer - keep "top-level" for now inside SportDb
5
+ ## for easier reuse with (new) lexer variants!!
6
+
7
+ ## transforms
8
+ ##
9
+ ## Netherlands 1-2 (1-1) England
10
+ ## => text => team
11
+ ## score|vs
12
+ ## text => team
13
+
14
+
15
+
16
+ ## token iter/find better name
17
+ ## e.g. TokenBuffer/Scanner or such ??
18
+ class Tokens
19
+ def initialize( tokens )
20
+ @tokens = tokens
21
+ @pos = 0
22
+ end
23
+
24
+ def pos() @pos; end
25
+ def eos?() @pos >= @tokens.size; end
26
+
27
+
28
+ def include?( *types )
29
+ pos = @pos
30
+ ## puts " starting include? #{types.inspect} @ #{pos}"
31
+ while pos < @tokens.size do
32
+ return true if types.include?( @tokens[pos][0] )
33
+ pos +=1
34
+ end
35
+ false
36
+ end
37
+
38
+ ## pattern e.g. [:TEXT, [:VS,:SCORE], :TEXT]
39
+ def match?( *pattern )
40
+ ## puts " starting match? #{pattern.inspect} @ #{@pos}"
41
+ pattern.each_with_index do |types,offset|
42
+ ## if single symbol wrap in array
43
+ types = types.is_a?(Array) ? types : [types]
44
+ return false unless types.include?( peek(offset) )
45
+ end
46
+ true
47
+ end
48
+
49
+
50
+ ## return token type (e.g. :TEXT, :NUM, etc.)
51
+ def cur() peek(0); end
52
+ ## return content (assumed to be text)
53
+ def text(offset=0)
54
+ ## raise error - why? why not?
55
+ ## return nil?
56
+ if peek( offset ) != :text
57
+ raise ArgumentError, "text(#{offset}) - token not a text type"
58
+ end
59
+ @tokens[@pos+offset][1]
60
+ end
61
+
62
+
63
+ def peek(offset=1)
64
+ ## return nil if eos
65
+ if @pos+offset >= @tokens.size
66
+ nil
67
+ else
68
+ @tokens[@pos+offset][0]
69
+ end
70
+ end
71
+
72
+ ## note - returns complete token
73
+ def next
74
+ # if @pos >= @tokens.size
75
+ # raise ArgumentError, "end of array - #{@pos} >= #{@tokens.size}"
76
+ # end
77
+ # throw (standard) end of iteration here why? why not?
78
+
79
+ t = @tokens[@pos]
80
+ @pos += 1
81
+ t
82
+ end
83
+
84
+ def collect( &blk )
85
+ tokens = []
86
+ loop do
87
+ break if eos?
88
+ tokens << if block_given?
89
+ blk.call( self.next )
90
+ else
91
+ self.next
92
+ end
93
+ end
94
+ tokens
95
+ end
96
+ end # class Tokens
97
+ end # module SportDb
@@ -0,0 +1,111 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+
5
+ ########
6
+ ## experimental teletype mode
7
+ ## only space, A-Z and 0-9 allowed
8
+ IS_TTY_LINE_RE = %r{ \A
9
+ ## note - use NEGATIVE lookahead to exclude blank lines
10
+ (?! [ ]*\z)
11
+
12
+ [A-Z0-9 ]+
13
+ \z
14
+ }x
15
+
16
+
17
+ TTY_SPACES_RE = %r{ (?<spaces> [ ]{2,}) |
18
+ (?<space> [ ])
19
+ }x
20
+ TTY_NUM_RE = %r{ \b (?<num> \d+ ) \b
21
+ }x
22
+
23
+ ##
24
+ ## note - TEXT for now allows A, 1A, A1, A1A, A1 B1 C1,
25
+ ## A1AA1 2B22 3C33
26
+ ## - single space only for concat
27
+ ## text segments MUST NOT be all numbers e.g. 1, 11, etc.
28
+ TTY_TEXT_RE = %r{ \b (?<text>
29
+ (?:
30
+ [A-Z] ## MUST start with letter
31
+ |
32
+ [0-9]+[A-Z] ## or numbers followed by letter
33
+ )
34
+ [0-9A-Z]*
35
+ (?:
36
+ ### allow move segements separated
37
+ ## by single space
38
+ [ ]
39
+ (?:
40
+ [A-Z] ## MUST start with letter
41
+ |
42
+ [0-9]+[A-Z] ## or numbers followed by letter
43
+ )
44
+ [0-9A-Z]*
45
+ )*
46
+ )
47
+ \b
48
+ }x
49
+
50
+
51
+ TTY_RE = Regexp.union(
52
+ TTY_SPACES_RE,
53
+ TTY_TEXT_RE,
54
+ TTY_NUM_RE,
55
+ ## fix add ANY_RE,
56
+ )
57
+
58
+
59
+ def _tokenize_tty_line( line )
60
+ line = line.strip
61
+
62
+ tokens = []
63
+
64
+ ## track last offsets - to report error on no match
65
+ ## or no match in end of string
66
+ offsets = [0,0]
67
+ pos = 0
68
+ m = nil
69
+
70
+
71
+ while m = TTY_RE.match( line, pos )
72
+ offsets = [m.begin(0), m.end(0)]
73
+
74
+ if offsets[0] != pos
75
+ ## match NOT starting at start/begin position!!!
76
+ ## report parse error!!!
77
+ msg = "!! WARN - tokenize (tty) error - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
78
+ puts msg
79
+ log( msg )
80
+ end
81
+
82
+ pos = offsets[1]
83
+
84
+ t = if m[:spaces] || m[:space]
85
+ nil ## skip spaces
86
+ elsif m[:text]
87
+ [:TTY_TEXT, m[:text]]
88
+ elsif m[:num]
89
+ [:TTY_NUM, m[:num].to_i(10)]
90
+ else
91
+ ## report error/raise expection
92
+ puts "!!! TTY TOKENIZE ERROR - no match found"
93
+ nil
94
+ end
95
+
96
+ tokens << t if t
97
+ end
98
+
99
+ ## check if no match in end of string
100
+ if offsets[1] != line.size
101
+ msg = "!! WARN - tokenize (tty) error - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
102
+ puts msg
103
+ log( msg )
104
+ end
105
+
106
+ tokens
107
+ end
108
+
109
+ end # class Lexer
110
+ end # module SportDb
111
+