tokn 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.txt +4 -5
- data/bin/tokncompile +1 -1
- data/bin/toknprocess +10 -4
- data/lib/tokn/code_set.rb +332 -337
- data/lib/tokn/dfa.rb +187 -162
- data/lib/tokn/dfa_builder.rb +218 -220
- data/lib/tokn/range_partition.rb +205 -203
- data/lib/tokn/reg_parse.rb +336 -331
- data/lib/tokn/state.rb +267 -270
- data/lib/tokn/token_defn_parser.rb +144 -139
- data/lib/tokn/tokenizer.rb +243 -175
- data/lib/tokn/tokn_const.rb +11 -6
- data/lib/tokn/tools.rb +42 -20
- data/test/Example1.rb +50 -0
- data/test/data/compileddfa.txt +1 -0
- data/test/data/sampletext.txt +6 -1
- data/test/test.rb +17 -12
- metadata +7 -6
- data/test/simple.rb +0 -33
@@ -1,156 +1,161 @@
|
|
1
1
|
require_relative 'tools'
|
2
2
|
req('tokn_const code_set dfa_builder state reg_parse')
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
attr_reader :dfa
|
24
|
-
|
25
|
-
# Compile a token definition script into a DFA
|
26
|
-
#
|
27
|
-
def initialize(script, createPDF = false)
|
28
|
-
@script = script
|
29
|
-
parseScript
|
30
|
-
if createPDF
|
31
|
-
dfa.startState.generatePDF("tokenizer_dfa")
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
36
|
-
|
37
|
-
def parseScript
|
38
|
-
db = false
|
39
|
-
|
40
|
-
nextTokenId = 0
|
41
|
-
|
42
|
-
# List of tokens entries, including anonymous ones
|
43
|
-
@tokenListBig = []
|
4
|
+
module ToknInternal
|
5
|
+
|
6
|
+
# Parses a token definition script, and generates an NFA that
|
7
|
+
# is capable of recognizing and distinguishing between the various
|
8
|
+
# tokens.
|
9
|
+
#
|
10
|
+
# Each line in the script is one of
|
11
|
+
#
|
12
|
+
# # ...comment... (the # must appear as the first character in the line)
|
13
|
+
#
|
14
|
+
# <tokenname> ':' <regex>
|
15
|
+
#
|
16
|
+
#
|
17
|
+
# A <tokenname> must be an 'identifier' (alphanumeric, with first character a letter (or '_')).
|
18
|
+
# If the first character is '_', the token is treated as an 'anonymous' token; these can
|
19
|
+
# appear in the curly brace portions of previous reg. expr. entries, but do not appear as tokens in the
|
20
|
+
# generated NFA.
|
21
|
+
#
|
22
|
+
class TokenDefParser
|
44
23
|
|
45
|
-
|
46
|
-
tokenListSmall = []
|
24
|
+
attr_reader :dfa
|
47
25
|
|
48
|
-
#
|
49
|
-
|
26
|
+
# Compile a token definition script into a DFA
|
27
|
+
#
|
28
|
+
def initialize(script, createPDF = false)
|
29
|
+
@script = script
|
30
|
+
parseScript
|
31
|
+
if createPDF
|
32
|
+
dfa.startState.generatePDF("tokenizer_dfa")
|
33
|
+
end
|
34
|
+
end
|
50
35
|
|
51
|
-
|
36
|
+
private
|
52
37
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
38
|
+
def parseScript
|
39
|
+
db = false
|
40
|
+
|
41
|
+
nextTokenId = 0
|
42
|
+
|
43
|
+
# List of tokens entries, including anonymous ones
|
44
|
+
@tokenListBig = []
|
45
|
+
|
46
|
+
# List of tokens names, excluding anonymous ones
|
47
|
+
tokenListSmall = []
|
48
|
+
|
49
|
+
# Maps token name to token entry
|
50
|
+
@tokenNameMap = {}
|
51
|
+
|
52
|
+
@lines = @script.split("\n")
|
53
|
+
|
54
|
+
@lines.each_with_index do |line, lineNumber|
|
55
|
+
|
56
|
+
line.strip!
|
57
|
+
|
58
|
+
# If line is empty, or starts with '#', it's a comment
|
59
|
+
if line.length == 0 || line[0] == '#'
|
60
|
+
next
|
61
|
+
end
|
62
|
+
|
63
|
+
if !(line =~ TOKENNAME_EXPR)
|
64
|
+
raise ParseException, "Syntax error, line #"+lineNumber.to_s+": "+line
|
65
|
+
end
|
66
|
+
|
67
|
+
pos = line.index(":")
|
68
|
+
|
69
|
+
tokenName = line[0,pos].strip()
|
70
|
+
|
71
|
+
expr = line[pos+1..-1].strip()
|
72
|
+
|
73
|
+
rex = RegParse.new(expr, @tokenNameMap)
|
74
|
+
|
75
|
+
# Give it the next available token id, if it's not an anonymous token
|
76
|
+
tkId = nil
|
77
|
+
if tokenName[0] != '_'
|
78
|
+
tkId = nextTokenId
|
79
|
+
nextTokenId += 1
|
80
|
+
end
|
81
|
+
|
82
|
+
tkEntry = [tokenName, rex, @tokenListBig.size, tkId]
|
83
|
+
|
84
|
+
!db || pr("token entry: %s\n",d(tkEntry))
|
85
|
+
|
86
|
+
if @tokenNameMap.has_key?(tokenName)
|
87
|
+
raise ParseException, "Duplicate token name: "+line
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
@tokenListBig.push(tkEntry)
|
92
|
+
@tokenNameMap[tkEntry[0]] = tkEntry
|
93
|
+
|
94
|
+
if tkId
|
95
|
+
tokenListSmall.push(tokenName)
|
96
|
+
end
|
97
|
+
|
98
|
+
!db || pr(" added token name [%s] to map\n",d(tkEntry[0]))
|
99
|
+
|
87
100
|
end
|
88
101
|
|
102
|
+
combined = combineTokenNFAs()
|
103
|
+
!db || combined.generatePDF("combined")
|
89
104
|
|
90
|
-
|
91
|
-
|
105
|
+
dfa = DFABuilder.nfa_to_dfa(combined)
|
106
|
+
!db || dfa.generatePDF("combined_minimized")
|
92
107
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
108
|
+
@dfa = Tokn::DFA.new(tokenListSmall, dfa)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Combine the individual NFAs constructed for the token definitions into
|
112
|
+
# one large NFA, each augmented with an edge labelled with the appropriate
|
113
|
+
# token identifier to let the tokenizer see which token led to the final state.
|
114
|
+
#
|
115
|
+
def combineTokenNFAs
|
116
|
+
|
98
117
|
|
118
|
+
baseId = 0
|
119
|
+
startState = nil
|
120
|
+
|
121
|
+
@tokenListBig.each do |tokenName, regParse, index, tokenId|
|
122
|
+
|
123
|
+
# Skip anonymous token definitions
|
124
|
+
if !tokenId
|
125
|
+
next
|
126
|
+
end
|
127
|
+
|
128
|
+
oldToNewMap, baseId = regParse.startState.duplicateNFA( baseId)
|
129
|
+
|
130
|
+
dupStart = oldToNewMap[regParse.startState]
|
131
|
+
|
132
|
+
# Transition from the expression's end state (not a final state)
|
133
|
+
# to a new final state, with the transitioning edge
|
134
|
+
# labelled with the token id (actually, a transformed token id to distinguish
|
135
|
+
# it from character codes)
|
136
|
+
dupEnd = oldToNewMap[regParse.endState]
|
137
|
+
|
138
|
+
dupfinalState = State.new(baseId)
|
139
|
+
baseId += 1
|
140
|
+
dupfinalState.finalState = true
|
141
|
+
|
142
|
+
# Why do I need to add 'ToknInternal.' here? Very confusing.
|
143
|
+
dupEnd.addEdge(CodeSet.new(ToknInternal.tokenIdToEdgeLabel(tokenId)), dupfinalState)
|
144
|
+
|
145
|
+
if !startState
|
146
|
+
startState = dupStart
|
147
|
+
else
|
148
|
+
# Add an e-transition from the start state to this expression's start
|
149
|
+
startState.addEdge(CodeSet.new(EPSILON),dupStart)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
startState
|
99
153
|
end
|
100
|
-
|
101
|
-
combined = combineTokenNFAs()
|
102
|
-
!db || combined.generatePDF("combined")
|
103
|
-
|
104
|
-
dfa = DFABuilder.nfa_to_dfa(combined)
|
105
|
-
!db || dfa.generatePDF("combined_minimized")
|
106
|
-
|
107
|
-
@dfa = DFA.new(tokenListSmall, dfa)
|
108
|
-
end
|
109
|
-
|
110
|
-
# Combine the individual NFAs constructed for the token definitions into
|
111
|
-
# one large NFA, each augmented with an edge labelled with the appropriate
|
112
|
-
# token identifier to let the tokenizer see which token led to the final state.
|
113
|
-
#
|
114
|
-
def combineTokenNFAs
|
115
154
|
|
116
|
-
|
117
|
-
|
155
|
+
# Regex for token names preceding regular expressions
|
156
|
+
#
|
157
|
+
TOKENNAME_EXPR = Regexp.new("[_A-Za-z][_A-Za-z0-9]*\s*:\s*")
|
118
158
|
|
119
|
-
@tokenListBig.each do |tokenName, regParse, index, tokenId|
|
120
|
-
|
121
|
-
# Skip anonymous token definitions
|
122
|
-
if !tokenId
|
123
|
-
next
|
124
|
-
end
|
125
|
-
|
126
|
-
oldToNewMap, baseId = regParse.startState.duplicateNFA( baseId)
|
127
|
-
|
128
|
-
dupStart = oldToNewMap[regParse.startState]
|
129
|
-
|
130
|
-
# Transition from the expression's end state (not a final state)
|
131
|
-
# to a new final state, with the transitioning edge
|
132
|
-
# labelled with the token id (actually, a transformed token id to distinguish
|
133
|
-
# it from character codes)
|
134
|
-
dupEnd = oldToNewMap[regParse.endState]
|
135
|
-
|
136
|
-
dupfinalState = State.new(baseId)
|
137
|
-
baseId += 1
|
138
|
-
dupfinalState.finalState = true
|
139
|
-
|
140
|
-
dupEnd.addEdge(CodeSet.new(tokenIdToEdgeLabel(tokenId)), dupfinalState)
|
141
|
-
|
142
|
-
if !startState
|
143
|
-
startState = dupStart
|
144
|
-
else
|
145
|
-
# Add an e-transition from the start state to this expression's start
|
146
|
-
startState.addEdge(CodeSet.new(EPSILON),dupStart)
|
147
|
-
end
|
148
|
-
end
|
149
|
-
startState
|
150
159
|
end
|
151
160
|
|
152
|
-
#
|
153
|
-
#
|
154
|
-
TOKENNAME_EXPR = Regexp.new("[_A-Za-z][_A-Za-z0-9]*\s*:\s*")
|
155
|
-
|
156
|
-
end
|
161
|
+
end # module ToknInternal
|
data/lib/tokn/tokenizer.rb
CHANGED
@@ -1,211 +1,279 @@
|
|
1
1
|
require_relative 'tools'
|
2
|
-
req('tokn_const ')
|
2
|
+
req('tokn_const dfa')
|
3
3
|
|
4
|
-
|
5
|
-
#
|
6
|
-
class Tokenizer
|
7
|
-
include Tokn
|
8
|
-
|
9
|
-
# Construct a tokenizer, given a DFA and some text to process
|
10
|
-
#
|
11
|
-
def initialize(dfa, text)
|
12
|
-
@dfa = dfa
|
13
|
-
@text = text
|
14
|
-
@lineNumber = 0
|
15
|
-
@column = 0
|
16
|
-
@cursor = 0
|
17
|
-
@tokenHistory = []
|
18
|
-
@historyPointer = 0
|
19
|
-
end
|
4
|
+
module Tokn
|
20
5
|
|
21
|
-
#
|
6
|
+
# Extracts tokens from a script, given a previously constructed DFA.
|
22
7
|
#
|
23
|
-
|
24
|
-
#
|
25
|
-
def peek
|
26
|
-
if !@text
|
27
|
-
raise IllegalStateException, "No input text specified"
|
28
|
-
end
|
29
|
-
|
30
|
-
db = false
|
31
|
-
!db || warn("debug printing is on")
|
32
|
-
!db || pr("peek, cursor=%d\n",@cursor)
|
8
|
+
class Tokenizer
|
33
9
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
10
|
+
# Construct a tokenizer
|
11
|
+
#
|
12
|
+
# @param dfa the DFA to use
|
13
|
+
# @param text the text to extract tokens from
|
14
|
+
# @param skipName if not nil, tokens with this name will be skipped
|
15
|
+
#
|
16
|
+
def initialize(dfa, text, skipName = nil)
|
17
|
+
@dfa = dfa
|
18
|
+
@text = text
|
19
|
+
if !text
|
20
|
+
raise ArgumentError, "No text defined"
|
21
|
+
end
|
22
|
+
@skipTokenId = nil
|
23
|
+
if skipName
|
24
|
+
@skipTokenId = dfa.tokenId(skipName)
|
25
|
+
if !@skipTokenId
|
26
|
+
raise ArgumentError, "No token with name "+skipName+" found"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
@lineNumber = 0
|
30
|
+
@column = 0
|
31
|
+
@cursor = 0
|
32
|
+
@tokenHistory = []
|
33
|
+
@historyPointer = 0
|
34
|
+
end
|
35
|
+
|
36
|
+
# Determine next token (without reading it)
|
37
|
+
#
|
38
|
+
# Returns Token, or nil if end of input
|
39
|
+
#
|
40
|
+
def peek
|
41
|
+
# if !@text
|
42
|
+
# raise IllegalStateException, "No input text specified"
|
43
|
+
# end
|
44
|
+
|
45
|
+
db = false
|
46
|
+
!db || warn("debug printing is on")
|
47
|
+
!db || pr("peek, cursor=%d\n",@cursor)
|
48
|
+
|
49
|
+
if @historyPointer == @tokenHistory.size
|
50
|
+
while true # repeat until we find a non-skipped token, or run out of text
|
51
|
+
break if @cursor >= @text.length
|
55
52
|
|
56
|
-
|
53
|
+
bestLength = 0
|
54
|
+
bestId = ToknInternal::UNKNOWN_TOKEN
|
57
55
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
if
|
63
|
-
|
64
|
-
!db || pr("
|
56
|
+
charOffset = 0
|
57
|
+
state = @dfa.startState
|
58
|
+
while @cursor + charOffset <= @text.length
|
59
|
+
ch = nil
|
60
|
+
if @cursor + charOffset < @text.length
|
61
|
+
ch = @text[@cursor + charOffset].ord()
|
62
|
+
!db || pr(" offset=%d, ch=%d (%s)\n",charOffset,ch,ch.chr)
|
63
|
+
end
|
64
|
+
|
65
|
+
nextState = nil
|
66
|
+
|
67
|
+
# Examine edges leaving this state.
|
68
|
+
# If one is labelled with a token id, we don't need to match the character with it;
|
69
|
+
# store as best token found if length is longer than previous, or equal to previous
|
70
|
+
# with higher id.
|
65
71
|
|
66
|
-
|
67
|
-
|
68
|
-
|
72
|
+
# If an edge is labelled with the current character, advance to that state.
|
73
|
+
|
74
|
+
edges = state.edges
|
75
|
+
edges.each do |lbl,dest|
|
76
|
+
a = lbl.array
|
77
|
+
!db || pr(" edge lbl=%s, dest=%s\n",d(lbl),d(dest))
|
78
|
+
if a[0] < ToknInternal::EPSILON
|
79
|
+
newTokenId = ToknInternal::edgeLabelToTokenId(a[0])
|
80
|
+
!db || pr(" new token id=%d\n",newTokenId)
|
81
|
+
|
82
|
+
if (bestLength < charOffset || newTokenId > bestId)
|
83
|
+
bestLength, bestId = charOffset, newTokenId
|
84
|
+
!db || pr(" making longest found so far\n")
|
85
|
+
end
|
69
86
|
end
|
70
|
-
|
87
|
+
|
88
|
+
if ch && lbl.contains?(ch)
|
89
|
+
!db || pr(" setting next state to %s\n",d(dest))
|
90
|
+
nextState = dest
|
91
|
+
break
|
92
|
+
end
|
93
|
+
end
|
71
94
|
|
72
|
-
if
|
73
|
-
!db || pr(" setting next state to %s\n",d(dest))
|
74
|
-
nextState = dest
|
95
|
+
if !nextState
|
75
96
|
break
|
76
97
|
end
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
break
|
98
|
+
state = nextState
|
99
|
+
charOffset += 1
|
100
|
+
!db || pr(" advanced to next state\n")
|
81
101
|
end
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
102
|
+
|
103
|
+
if bestId == @skipTokenId
|
104
|
+
@cursor += bestLength
|
105
|
+
next
|
106
|
+
end
|
107
|
+
|
108
|
+
peekToken = Token.new(bestId, @text[@cursor, bestLength], 1 + @lineNumber, 1 + @column)
|
109
|
+
|
110
|
+
@tokenHistory.push(peekToken)
|
111
|
+
break # We found a token, so stop
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
ret = nil
|
116
|
+
if @historyPointer < @tokenHistory.size
|
117
|
+
ret = @tokenHistory[@historyPointer]
|
118
|
+
end
|
86
119
|
|
87
|
-
|
88
|
-
|
89
|
-
|
120
|
+
ret
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
# Read next token
|
125
|
+
#
|
126
|
+
# @param tokenName if not nil, the (string) name of the token expected
|
127
|
+
#
|
128
|
+
# @raise TokenizerException if no more tokens,if unrecognized token, or
|
129
|
+
# if token has different than expected name
|
130
|
+
#
|
131
|
+
def read(tokenName = nil)
|
132
|
+
token = peek()
|
133
|
+
if !token
|
134
|
+
raise TokenizerException,"No more tokens"
|
135
|
+
end
|
136
|
+
|
137
|
+
if token.id == ToknInternal::UNKNOWN_TOKEN
|
138
|
+
raise TokenizerException, "Unknown token "+token.inspect
|
139
|
+
end
|
140
|
+
|
141
|
+
if tokenName && tokenName != nameOf(token)
|
142
|
+
raise TokenizerException, "Unexpected token "+token.inspect
|
143
|
+
end
|
144
|
+
|
145
|
+
@historyPointer += 1
|
146
|
+
|
147
|
+
# Advance cursor, line number, column
|
148
|
+
|
149
|
+
tl = token.text.length
|
150
|
+
@cursor += tl
|
151
|
+
tl.times do |i|
|
152
|
+
c = token.text[i]
|
153
|
+
@column += 1
|
154
|
+
if c == "\n"
|
155
|
+
@lineNumber += 1
|
156
|
+
@column = 0
|
157
|
+
end
|
158
|
+
end
|
159
|
+
token
|
90
160
|
end
|
91
161
|
|
92
|
-
|
93
|
-
|
94
|
-
|
162
|
+
# Read next token if it has a particular name
|
163
|
+
#
|
164
|
+
# > tokenName : name to look for
|
165
|
+
# < token read, or nil
|
166
|
+
#
|
167
|
+
def readIf(tokenName)
|
168
|
+
ret = nil
|
169
|
+
token = peek()
|
170
|
+
if token && nameOf(token) == tokenName
|
171
|
+
ret = read()
|
172
|
+
end
|
173
|
+
ret
|
95
174
|
end
|
96
175
|
|
97
|
-
|
98
|
-
|
176
|
+
# Read a sequence of tokens
|
177
|
+
# @param seq string of space-delimited token names; if name is '_',
|
178
|
+
# allows any token name in that position
|
179
|
+
# @return array of tokens read
|
180
|
+
#
|
181
|
+
def readSequence(seq)
|
182
|
+
seqNames = seq.split(' ')
|
183
|
+
ret = []
|
184
|
+
seqNames.each do |name|
|
185
|
+
tk = name != '_' ? read(name) : read
|
186
|
+
ret.push(tk)
|
187
|
+
end
|
188
|
+
ret
|
189
|
+
end
|
99
190
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
191
|
+
# Read a sequence of tokens, if they have particular names
|
192
|
+
# @param seq string of space-delimited token names; if name is '_',
|
193
|
+
# allows any token name in that position
|
194
|
+
# @return array of tokens read, or nil if the tokens had different
|
195
|
+
# names (or an end of input was encountered)
|
196
|
+
#
|
197
|
+
def readSequenceIf(seq)
|
198
|
+
ret = []
|
199
|
+
seqNames = seq.split(' ')
|
200
|
+
seqNames.each do |name|
|
201
|
+
tk = peek
|
202
|
+
break if !tk
|
203
|
+
if name != '_' && nameOf(tk) != name
|
204
|
+
break
|
205
|
+
end
|
206
|
+
ret.push(read)
|
207
|
+
end
|
208
|
+
|
209
|
+
if ret.size != seqNames.size
|
210
|
+
unread(ret.size)
|
211
|
+
ret = nil
|
212
|
+
end
|
213
|
+
ret
|
112
214
|
end
|
113
215
|
|
114
|
-
if token.id == UNKNOWN_TOKEN
|
115
|
-
raise TokenizerException, "Unknown token "+token.inspect
|
116
|
-
end
|
117
216
|
|
118
|
-
|
119
|
-
|
217
|
+
# Determine if another token exists
|
218
|
+
#
|
219
|
+
def hasNext
|
220
|
+
!peek().nil?
|
120
221
|
end
|
121
222
|
|
122
|
-
|
123
|
-
|
124
|
-
#
|
223
|
+
# Get the name of a token
|
224
|
+
# (i.e., the name of the token definition, not its text)
|
225
|
+
#
|
226
|
+
# > token read from this tokenizer
|
227
|
+
#
|
228
|
+
def nameOf(token)
|
229
|
+
@dfa.tokenName(token.id)
|
230
|
+
end
|
125
231
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
if
|
132
|
-
|
133
|
-
@column = 0
|
232
|
+
# Unread one (or more) previously read tokens
|
233
|
+
#
|
234
|
+
# @raise TokenizerException if attempt to unread token that was never read
|
235
|
+
#
|
236
|
+
def unread(count = 1)
|
237
|
+
if @historyPointer < count
|
238
|
+
raise TokenizerException, "Cannot unread before start"
|
134
239
|
end
|
240
|
+
@historyPointer -= count
|
135
241
|
end
|
136
|
-
|
137
|
-
end
|
138
|
-
|
139
|
-
# Read next token if it has a particular name
|
140
|
-
#
|
141
|
-
# > tokenName : name to look for
|
142
|
-
# < token read, or nil
|
143
|
-
#
|
144
|
-
def readIf(tokenName)
|
145
|
-
ret = nil
|
146
|
-
token = peek()
|
147
|
-
if token && nameOf(token) == tokenName
|
148
|
-
ret = read()
|
149
|
-
end
|
150
|
-
ret
|
242
|
+
|
151
243
|
end
|
152
244
|
|
153
|
-
# Determine if another token exists
|
154
|
-
#
|
155
|
-
def hasNext
|
156
|
-
!peek().nil?
|
157
|
-
end
|
158
245
|
|
159
|
-
#
|
160
|
-
# (i.e., the name of the token definition, not its text)
|
246
|
+
# Tokens read by Tokenizer
|
161
247
|
#
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
248
|
+
class Token
|
249
|
+
|
250
|
+
attr_reader :text, :lineNumber, :column, :id
|
251
|
+
|
252
|
+
def initialize(id, text, lineNumber, column)
|
253
|
+
@id = id
|
254
|
+
@text = text
|
255
|
+
@lineNumber = lineNumber
|
256
|
+
@column = column
|
257
|
+
end
|
258
|
+
|
259
|
+
def unknown?
|
260
|
+
id == ToknInternal::UNKNOWN_TOKEN
|
261
|
+
end
|
262
|
+
|
263
|
+
# Construct description of token location within text
|
264
|
+
#
|
265
|
+
def inspect
|
266
|
+
s = "(line "+lineNumber.to_s+", col "+column.to_s+")"
|
267
|
+
if !unknown?
|
268
|
+
s = s.ljust(17) + " : " + text
|
269
|
+
end
|
270
|
+
s
|
173
271
|
end
|
174
|
-
@historyPointer -= count
|
175
|
-
end
|
176
|
-
|
177
|
-
end
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
# Tokens read by Tokenizer
|
183
|
-
#
|
184
|
-
class Token
|
185
|
-
include Tokn
|
186
|
-
|
187
|
-
attr_reader :text, :lineNumber, :column, :id
|
188
|
-
|
189
|
-
def initialize(id, text, lineNumber, column)
|
190
|
-
@id = id
|
191
|
-
@text = text
|
192
|
-
@lineNumber = lineNumber
|
193
|
-
@column = column
|
194
|
-
end
|
195
|
-
|
196
|
-
def unknown?
|
197
|
-
id == UNKNOWN_TOKEN
|
198
272
|
end
|
199
273
|
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
s = s.ljust(17) + " : " + text
|
204
|
-
end
|
205
|
-
s
|
274
|
+
# Exception class for Tokenizer methods
|
275
|
+
#
|
276
|
+
class TokenizerException < Exception
|
206
277
|
end
|
207
|
-
end
|
208
|
-
|
209
278
|
|
210
|
-
|
211
|
-
end
|
279
|
+
end # module Tokn
|