tokn 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.txt +4 -5
- data/bin/tokncompile +1 -1
- data/bin/toknprocess +10 -4
- data/lib/tokn/code_set.rb +332 -337
- data/lib/tokn/dfa.rb +187 -162
- data/lib/tokn/dfa_builder.rb +218 -220
- data/lib/tokn/range_partition.rb +205 -203
- data/lib/tokn/reg_parse.rb +336 -331
- data/lib/tokn/state.rb +267 -270
- data/lib/tokn/token_defn_parser.rb +144 -139
- data/lib/tokn/tokenizer.rb +243 -175
- data/lib/tokn/tokn_const.rb +11 -6
- data/lib/tokn/tools.rb +42 -20
- data/test/Example1.rb +50 -0
- data/test/data/compileddfa.txt +1 -0
- data/test/data/sampletext.txt +6 -1
- data/test/test.rb +17 -12
- metadata +7 -6
- data/test/simple.rb +0 -33
@@ -1,156 +1,161 @@
|
|
1
1
|
require_relative 'tools'
|
2
2
|
req('tokn_const code_set dfa_builder state reg_parse')
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
attr_reader :dfa
|
24
|
-
|
25
|
-
# Compile a token definition script into a DFA
|
26
|
-
#
|
27
|
-
def initialize(script, createPDF = false)
|
28
|
-
@script = script
|
29
|
-
parseScript
|
30
|
-
if createPDF
|
31
|
-
dfa.startState.generatePDF("tokenizer_dfa")
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
36
|
-
|
37
|
-
def parseScript
|
38
|
-
db = false
|
39
|
-
|
40
|
-
nextTokenId = 0
|
41
|
-
|
42
|
-
# List of tokens entries, including anonymous ones
|
43
|
-
@tokenListBig = []
|
4
|
+
module ToknInternal
|
5
|
+
|
6
|
+
# Parses a token definition script, and generates an NFA that
|
7
|
+
# is capable of recognizing and distinguishing between the various
|
8
|
+
# tokens.
|
9
|
+
#
|
10
|
+
# Each line in the script is one of
|
11
|
+
#
|
12
|
+
# # ...comment... (the # must appear as the first character in the line)
|
13
|
+
#
|
14
|
+
# <tokenname> ':' <regex>
|
15
|
+
#
|
16
|
+
#
|
17
|
+
# A <tokenname> must be an 'identifier' (alphanumeric, with first character a letter (or '_')).
|
18
|
+
# If the first character is '_', the token is treated as an 'anonymous' token; these can
|
19
|
+
# appear in the curly brace portions of previous reg. expr. entries, but do not appear as tokens in the
|
20
|
+
# generated NFA.
|
21
|
+
#
|
22
|
+
class TokenDefParser
|
44
23
|
|
45
|
-
|
46
|
-
tokenListSmall = []
|
24
|
+
attr_reader :dfa
|
47
25
|
|
48
|
-
#
|
49
|
-
|
26
|
+
# Compile a token definition script into a DFA
|
27
|
+
#
|
28
|
+
def initialize(script, createPDF = false)
|
29
|
+
@script = script
|
30
|
+
parseScript
|
31
|
+
if createPDF
|
32
|
+
dfa.startState.generatePDF("tokenizer_dfa")
|
33
|
+
end
|
34
|
+
end
|
50
35
|
|
51
|
-
|
36
|
+
private
|
52
37
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
38
|
+
def parseScript
|
39
|
+
db = false
|
40
|
+
|
41
|
+
nextTokenId = 0
|
42
|
+
|
43
|
+
# List of tokens entries, including anonymous ones
|
44
|
+
@tokenListBig = []
|
45
|
+
|
46
|
+
# List of tokens names, excluding anonymous ones
|
47
|
+
tokenListSmall = []
|
48
|
+
|
49
|
+
# Maps token name to token entry
|
50
|
+
@tokenNameMap = {}
|
51
|
+
|
52
|
+
@lines = @script.split("\n")
|
53
|
+
|
54
|
+
@lines.each_with_index do |line, lineNumber|
|
55
|
+
|
56
|
+
line.strip!
|
57
|
+
|
58
|
+
# If line is empty, or starts with '#', it's a comment
|
59
|
+
if line.length == 0 || line[0] == '#'
|
60
|
+
next
|
61
|
+
end
|
62
|
+
|
63
|
+
if !(line =~ TOKENNAME_EXPR)
|
64
|
+
raise ParseException, "Syntax error, line #"+lineNumber.to_s+": "+line
|
65
|
+
end
|
66
|
+
|
67
|
+
pos = line.index(":")
|
68
|
+
|
69
|
+
tokenName = line[0,pos].strip()
|
70
|
+
|
71
|
+
expr = line[pos+1..-1].strip()
|
72
|
+
|
73
|
+
rex = RegParse.new(expr, @tokenNameMap)
|
74
|
+
|
75
|
+
# Give it the next available token id, if it's not an anonymous token
|
76
|
+
tkId = nil
|
77
|
+
if tokenName[0] != '_'
|
78
|
+
tkId = nextTokenId
|
79
|
+
nextTokenId += 1
|
80
|
+
end
|
81
|
+
|
82
|
+
tkEntry = [tokenName, rex, @tokenListBig.size, tkId]
|
83
|
+
|
84
|
+
!db || pr("token entry: %s\n",d(tkEntry))
|
85
|
+
|
86
|
+
if @tokenNameMap.has_key?(tokenName)
|
87
|
+
raise ParseException, "Duplicate token name: "+line
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
@tokenListBig.push(tkEntry)
|
92
|
+
@tokenNameMap[tkEntry[0]] = tkEntry
|
93
|
+
|
94
|
+
if tkId
|
95
|
+
tokenListSmall.push(tokenName)
|
96
|
+
end
|
97
|
+
|
98
|
+
!db || pr(" added token name [%s] to map\n",d(tkEntry[0]))
|
99
|
+
|
87
100
|
end
|
88
101
|
|
102
|
+
combined = combineTokenNFAs()
|
103
|
+
!db || combined.generatePDF("combined")
|
89
104
|
|
90
|
-
|
91
|
-
|
105
|
+
dfa = DFABuilder.nfa_to_dfa(combined)
|
106
|
+
!db || dfa.generatePDF("combined_minimized")
|
92
107
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
108
|
+
@dfa = Tokn::DFA.new(tokenListSmall, dfa)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Combine the individual NFAs constructed for the token definitions into
|
112
|
+
# one large NFA, each augmented with an edge labelled with the appropriate
|
113
|
+
# token identifier to let the tokenizer see which token led to the final state.
|
114
|
+
#
|
115
|
+
def combineTokenNFAs
|
116
|
+
|
98
117
|
|
118
|
+
baseId = 0
|
119
|
+
startState = nil
|
120
|
+
|
121
|
+
@tokenListBig.each do |tokenName, regParse, index, tokenId|
|
122
|
+
|
123
|
+
# Skip anonymous token definitions
|
124
|
+
if !tokenId
|
125
|
+
next
|
126
|
+
end
|
127
|
+
|
128
|
+
oldToNewMap, baseId = regParse.startState.duplicateNFA( baseId)
|
129
|
+
|
130
|
+
dupStart = oldToNewMap[regParse.startState]
|
131
|
+
|
132
|
+
# Transition from the expression's end state (not a final state)
|
133
|
+
# to a new final state, with the transitioning edge
|
134
|
+
# labelled with the token id (actually, a transformed token id to distinguish
|
135
|
+
# it from character codes)
|
136
|
+
dupEnd = oldToNewMap[regParse.endState]
|
137
|
+
|
138
|
+
dupfinalState = State.new(baseId)
|
139
|
+
baseId += 1
|
140
|
+
dupfinalState.finalState = true
|
141
|
+
|
142
|
+
# Why do I need to add 'ToknInternal.' here? Very confusing.
|
143
|
+
dupEnd.addEdge(CodeSet.new(ToknInternal.tokenIdToEdgeLabel(tokenId)), dupfinalState)
|
144
|
+
|
145
|
+
if !startState
|
146
|
+
startState = dupStart
|
147
|
+
else
|
148
|
+
# Add an e-transition from the start state to this expression's start
|
149
|
+
startState.addEdge(CodeSet.new(EPSILON),dupStart)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
startState
|
99
153
|
end
|
100
|
-
|
101
|
-
combined = combineTokenNFAs()
|
102
|
-
!db || combined.generatePDF("combined")
|
103
|
-
|
104
|
-
dfa = DFABuilder.nfa_to_dfa(combined)
|
105
|
-
!db || dfa.generatePDF("combined_minimized")
|
106
|
-
|
107
|
-
@dfa = DFA.new(tokenListSmall, dfa)
|
108
|
-
end
|
109
|
-
|
110
|
-
# Combine the individual NFAs constructed for the token definitions into
|
111
|
-
# one large NFA, each augmented with an edge labelled with the appropriate
|
112
|
-
# token identifier to let the tokenizer see which token led to the final state.
|
113
|
-
#
|
114
|
-
def combineTokenNFAs
|
115
154
|
|
116
|
-
|
117
|
-
|
155
|
+
# Regex for token names preceding regular expressions
|
156
|
+
#
|
157
|
+
TOKENNAME_EXPR = Regexp.new("[_A-Za-z][_A-Za-z0-9]*\s*:\s*")
|
118
158
|
|
119
|
-
@tokenListBig.each do |tokenName, regParse, index, tokenId|
|
120
|
-
|
121
|
-
# Skip anonymous token definitions
|
122
|
-
if !tokenId
|
123
|
-
next
|
124
|
-
end
|
125
|
-
|
126
|
-
oldToNewMap, baseId = regParse.startState.duplicateNFA( baseId)
|
127
|
-
|
128
|
-
dupStart = oldToNewMap[regParse.startState]
|
129
|
-
|
130
|
-
# Transition from the expression's end state (not a final state)
|
131
|
-
# to a new final state, with the transitioning edge
|
132
|
-
# labelled with the token id (actually, a transformed token id to distinguish
|
133
|
-
# it from character codes)
|
134
|
-
dupEnd = oldToNewMap[regParse.endState]
|
135
|
-
|
136
|
-
dupfinalState = State.new(baseId)
|
137
|
-
baseId += 1
|
138
|
-
dupfinalState.finalState = true
|
139
|
-
|
140
|
-
dupEnd.addEdge(CodeSet.new(tokenIdToEdgeLabel(tokenId)), dupfinalState)
|
141
|
-
|
142
|
-
if !startState
|
143
|
-
startState = dupStart
|
144
|
-
else
|
145
|
-
# Add an e-transition from the start state to this expression's start
|
146
|
-
startState.addEdge(CodeSet.new(EPSILON),dupStart)
|
147
|
-
end
|
148
|
-
end
|
149
|
-
startState
|
150
159
|
end
|
151
160
|
|
152
|
-
#
|
153
|
-
#
|
154
|
-
TOKENNAME_EXPR = Regexp.new("[_A-Za-z][_A-Za-z0-9]*\s*:\s*")
|
155
|
-
|
156
|
-
end
|
161
|
+
end # module ToknInternal
|
data/lib/tokn/tokenizer.rb
CHANGED
@@ -1,211 +1,279 @@
|
|
1
1
|
require_relative 'tools'
|
2
|
-
req('tokn_const ')
|
2
|
+
req('tokn_const dfa')
|
3
3
|
|
4
|
-
|
5
|
-
#
|
6
|
-
class Tokenizer
|
7
|
-
include Tokn
|
8
|
-
|
9
|
-
# Construct a tokenizer, given a DFA and some text to process
|
10
|
-
#
|
11
|
-
def initialize(dfa, text)
|
12
|
-
@dfa = dfa
|
13
|
-
@text = text
|
14
|
-
@lineNumber = 0
|
15
|
-
@column = 0
|
16
|
-
@cursor = 0
|
17
|
-
@tokenHistory = []
|
18
|
-
@historyPointer = 0
|
19
|
-
end
|
4
|
+
module Tokn
|
20
5
|
|
21
|
-
#
|
6
|
+
# Extracts tokens from a script, given a previously constructed DFA.
|
22
7
|
#
|
23
|
-
|
24
|
-
#
|
25
|
-
def peek
|
26
|
-
if !@text
|
27
|
-
raise IllegalStateException, "No input text specified"
|
28
|
-
end
|
29
|
-
|
30
|
-
db = false
|
31
|
-
!db || warn("debug printing is on")
|
32
|
-
!db || pr("peek, cursor=%d\n",@cursor)
|
8
|
+
class Tokenizer
|
33
9
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
10
|
+
# Construct a tokenizer
|
11
|
+
#
|
12
|
+
# @param dfa the DFA to use
|
13
|
+
# @param text the text to extract tokens from
|
14
|
+
# @param skipName if not nil, tokens with this name will be skipped
|
15
|
+
#
|
16
|
+
def initialize(dfa, text, skipName = nil)
|
17
|
+
@dfa = dfa
|
18
|
+
@text = text
|
19
|
+
if !text
|
20
|
+
raise ArgumentError, "No text defined"
|
21
|
+
end
|
22
|
+
@skipTokenId = nil
|
23
|
+
if skipName
|
24
|
+
@skipTokenId = dfa.tokenId(skipName)
|
25
|
+
if !@skipTokenId
|
26
|
+
raise ArgumentError, "No token with name "+skipName+" found"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
@lineNumber = 0
|
30
|
+
@column = 0
|
31
|
+
@cursor = 0
|
32
|
+
@tokenHistory = []
|
33
|
+
@historyPointer = 0
|
34
|
+
end
|
35
|
+
|
36
|
+
# Determine next token (without reading it)
|
37
|
+
#
|
38
|
+
# Returns Token, or nil if end of input
|
39
|
+
#
|
40
|
+
def peek
|
41
|
+
# if !@text
|
42
|
+
# raise IllegalStateException, "No input text specified"
|
43
|
+
# end
|
44
|
+
|
45
|
+
db = false
|
46
|
+
!db || warn("debug printing is on")
|
47
|
+
!db || pr("peek, cursor=%d\n",@cursor)
|
48
|
+
|
49
|
+
if @historyPointer == @tokenHistory.size
|
50
|
+
while true # repeat until we find a non-skipped token, or run out of text
|
51
|
+
break if @cursor >= @text.length
|
55
52
|
|
56
|
-
|
53
|
+
bestLength = 0
|
54
|
+
bestId = ToknInternal::UNKNOWN_TOKEN
|
57
55
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
if
|
63
|
-
|
64
|
-
!db || pr("
|
56
|
+
charOffset = 0
|
57
|
+
state = @dfa.startState
|
58
|
+
while @cursor + charOffset <= @text.length
|
59
|
+
ch = nil
|
60
|
+
if @cursor + charOffset < @text.length
|
61
|
+
ch = @text[@cursor + charOffset].ord()
|
62
|
+
!db || pr(" offset=%d, ch=%d (%s)\n",charOffset,ch,ch.chr)
|
63
|
+
end
|
64
|
+
|
65
|
+
nextState = nil
|
66
|
+
|
67
|
+
# Examine edges leaving this state.
|
68
|
+
# If one is labelled with a token id, we don't need to match the character with it;
|
69
|
+
# store as best token found if length is longer than previous, or equal to previous
|
70
|
+
# with higher id.
|
65
71
|
|
66
|
-
|
67
|
-
|
68
|
-
|
72
|
+
# If an edge is labelled with the current character, advance to that state.
|
73
|
+
|
74
|
+
edges = state.edges
|
75
|
+
edges.each do |lbl,dest|
|
76
|
+
a = lbl.array
|
77
|
+
!db || pr(" edge lbl=%s, dest=%s\n",d(lbl),d(dest))
|
78
|
+
if a[0] < ToknInternal::EPSILON
|
79
|
+
newTokenId = ToknInternal::edgeLabelToTokenId(a[0])
|
80
|
+
!db || pr(" new token id=%d\n",newTokenId)
|
81
|
+
|
82
|
+
if (bestLength < charOffset || newTokenId > bestId)
|
83
|
+
bestLength, bestId = charOffset, newTokenId
|
84
|
+
!db || pr(" making longest found so far\n")
|
85
|
+
end
|
69
86
|
end
|
70
|
-
|
87
|
+
|
88
|
+
if ch && lbl.contains?(ch)
|
89
|
+
!db || pr(" setting next state to %s\n",d(dest))
|
90
|
+
nextState = dest
|
91
|
+
break
|
92
|
+
end
|
93
|
+
end
|
71
94
|
|
72
|
-
if
|
73
|
-
!db || pr(" setting next state to %s\n",d(dest))
|
74
|
-
nextState = dest
|
95
|
+
if !nextState
|
75
96
|
break
|
76
97
|
end
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
break
|
98
|
+
state = nextState
|
99
|
+
charOffset += 1
|
100
|
+
!db || pr(" advanced to next state\n")
|
81
101
|
end
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
102
|
+
|
103
|
+
if bestId == @skipTokenId
|
104
|
+
@cursor += bestLength
|
105
|
+
next
|
106
|
+
end
|
107
|
+
|
108
|
+
peekToken = Token.new(bestId, @text[@cursor, bestLength], 1 + @lineNumber, 1 + @column)
|
109
|
+
|
110
|
+
@tokenHistory.push(peekToken)
|
111
|
+
break # We found a token, so stop
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
ret = nil
|
116
|
+
if @historyPointer < @tokenHistory.size
|
117
|
+
ret = @tokenHistory[@historyPointer]
|
118
|
+
end
|
86
119
|
|
87
|
-
|
88
|
-
|
89
|
-
|
120
|
+
ret
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
# Read next token
|
125
|
+
#
|
126
|
+
# @param tokenName if not nil, the (string) name of the token expected
|
127
|
+
#
|
128
|
+
# @raise TokenizerException if no more tokens,if unrecognized token, or
|
129
|
+
# if token has different than expected name
|
130
|
+
#
|
131
|
+
def read(tokenName = nil)
|
132
|
+
token = peek()
|
133
|
+
if !token
|
134
|
+
raise TokenizerException,"No more tokens"
|
135
|
+
end
|
136
|
+
|
137
|
+
if token.id == ToknInternal::UNKNOWN_TOKEN
|
138
|
+
raise TokenizerException, "Unknown token "+token.inspect
|
139
|
+
end
|
140
|
+
|
141
|
+
if tokenName && tokenName != nameOf(token)
|
142
|
+
raise TokenizerException, "Unexpected token "+token.inspect
|
143
|
+
end
|
144
|
+
|
145
|
+
@historyPointer += 1
|
146
|
+
|
147
|
+
# Advance cursor, line number, column
|
148
|
+
|
149
|
+
tl = token.text.length
|
150
|
+
@cursor += tl
|
151
|
+
tl.times do |i|
|
152
|
+
c = token.text[i]
|
153
|
+
@column += 1
|
154
|
+
if c == "\n"
|
155
|
+
@lineNumber += 1
|
156
|
+
@column = 0
|
157
|
+
end
|
158
|
+
end
|
159
|
+
token
|
90
160
|
end
|
91
161
|
|
92
|
-
|
93
|
-
|
94
|
-
|
162
|
+
# Read next token if it has a particular name
|
163
|
+
#
|
164
|
+
# > tokenName : name to look for
|
165
|
+
# < token read, or nil
|
166
|
+
#
|
167
|
+
def readIf(tokenName)
|
168
|
+
ret = nil
|
169
|
+
token = peek()
|
170
|
+
if token && nameOf(token) == tokenName
|
171
|
+
ret = read()
|
172
|
+
end
|
173
|
+
ret
|
95
174
|
end
|
96
175
|
|
97
|
-
|
98
|
-
|
176
|
+
# Read a sequence of tokens
|
177
|
+
# @param seq string of space-delimited token names; if name is '_',
|
178
|
+
# allows any token name in that position
|
179
|
+
# @return array of tokens read
|
180
|
+
#
|
181
|
+
def readSequence(seq)
|
182
|
+
seqNames = seq.split(' ')
|
183
|
+
ret = []
|
184
|
+
seqNames.each do |name|
|
185
|
+
tk = name != '_' ? read(name) : read
|
186
|
+
ret.push(tk)
|
187
|
+
end
|
188
|
+
ret
|
189
|
+
end
|
99
190
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
191
|
+
# Read a sequence of tokens, if they have particular names
|
192
|
+
# @param seq string of space-delimited token names; if name is '_',
|
193
|
+
# allows any token name in that position
|
194
|
+
# @return array of tokens read, or nil if the tokens had different
|
195
|
+
# names (or an end of input was encountered)
|
196
|
+
#
|
197
|
+
def readSequenceIf(seq)
|
198
|
+
ret = []
|
199
|
+
seqNames = seq.split(' ')
|
200
|
+
seqNames.each do |name|
|
201
|
+
tk = peek
|
202
|
+
break if !tk
|
203
|
+
if name != '_' && nameOf(tk) != name
|
204
|
+
break
|
205
|
+
end
|
206
|
+
ret.push(read)
|
207
|
+
end
|
208
|
+
|
209
|
+
if ret.size != seqNames.size
|
210
|
+
unread(ret.size)
|
211
|
+
ret = nil
|
212
|
+
end
|
213
|
+
ret
|
112
214
|
end
|
113
215
|
|
114
|
-
if token.id == UNKNOWN_TOKEN
|
115
|
-
raise TokenizerException, "Unknown token "+token.inspect
|
116
|
-
end
|
117
216
|
|
118
|
-
|
119
|
-
|
217
|
+
# Determine if another token exists
|
218
|
+
#
|
219
|
+
def hasNext
|
220
|
+
!peek().nil?
|
120
221
|
end
|
121
222
|
|
122
|
-
|
123
|
-
|
124
|
-
#
|
223
|
+
# Get the name of a token
|
224
|
+
# (i.e., the name of the token definition, not its text)
|
225
|
+
#
|
226
|
+
# > token read from this tokenizer
|
227
|
+
#
|
228
|
+
def nameOf(token)
|
229
|
+
@dfa.tokenName(token.id)
|
230
|
+
end
|
125
231
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
if
|
132
|
-
|
133
|
-
@column = 0
|
232
|
+
# Unread one (or more) previously read tokens
|
233
|
+
#
|
234
|
+
# @raise TokenizerException if attempt to unread token that was never read
|
235
|
+
#
|
236
|
+
def unread(count = 1)
|
237
|
+
if @historyPointer < count
|
238
|
+
raise TokenizerException, "Cannot unread before start"
|
134
239
|
end
|
240
|
+
@historyPointer -= count
|
135
241
|
end
|
136
|
-
|
137
|
-
end
|
138
|
-
|
139
|
-
# Read next token if it has a particular name
|
140
|
-
#
|
141
|
-
# > tokenName : name to look for
|
142
|
-
# < token read, or nil
|
143
|
-
#
|
144
|
-
def readIf(tokenName)
|
145
|
-
ret = nil
|
146
|
-
token = peek()
|
147
|
-
if token && nameOf(token) == tokenName
|
148
|
-
ret = read()
|
149
|
-
end
|
150
|
-
ret
|
242
|
+
|
151
243
|
end
|
152
244
|
|
153
|
-
# Determine if another token exists
|
154
|
-
#
|
155
|
-
def hasNext
|
156
|
-
!peek().nil?
|
157
|
-
end
|
158
245
|
|
159
|
-
#
|
160
|
-
# (i.e., the name of the token definition, not its text)
|
246
|
+
# Tokens read by Tokenizer
|
161
247
|
#
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
248
|
+
class Token
|
249
|
+
|
250
|
+
attr_reader :text, :lineNumber, :column, :id
|
251
|
+
|
252
|
+
def initialize(id, text, lineNumber, column)
|
253
|
+
@id = id
|
254
|
+
@text = text
|
255
|
+
@lineNumber = lineNumber
|
256
|
+
@column = column
|
257
|
+
end
|
258
|
+
|
259
|
+
def unknown?
|
260
|
+
id == ToknInternal::UNKNOWN_TOKEN
|
261
|
+
end
|
262
|
+
|
263
|
+
# Construct description of token location within text
|
264
|
+
#
|
265
|
+
def inspect
|
266
|
+
s = "(line "+lineNumber.to_s+", col "+column.to_s+")"
|
267
|
+
if !unknown?
|
268
|
+
s = s.ljust(17) + " : " + text
|
269
|
+
end
|
270
|
+
s
|
173
271
|
end
|
174
|
-
@historyPointer -= count
|
175
|
-
end
|
176
|
-
|
177
|
-
end
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
# Tokens read by Tokenizer
|
183
|
-
#
|
184
|
-
class Token
|
185
|
-
include Tokn
|
186
|
-
|
187
|
-
attr_reader :text, :lineNumber, :column, :id
|
188
|
-
|
189
|
-
def initialize(id, text, lineNumber, column)
|
190
|
-
@id = id
|
191
|
-
@text = text
|
192
|
-
@lineNumber = lineNumber
|
193
|
-
@column = column
|
194
|
-
end
|
195
|
-
|
196
|
-
def unknown?
|
197
|
-
id == UNKNOWN_TOKEN
|
198
272
|
end
|
199
273
|
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
s = s.ljust(17) + " : " + text
|
204
|
-
end
|
205
|
-
s
|
274
|
+
# Exception class for Tokenizer methods
|
275
|
+
#
|
276
|
+
class TokenizerException < Exception
|
206
277
|
end
|
207
|
-
end
|
208
|
-
|
209
278
|
|
210
|
-
|
211
|
-
end
|
279
|
+
end # module Tokn
|