clucumber 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +1 -1
- data/README.md +4 -9
- data/lib/clucumber/clucumber-bootstrap.lisp +32 -0
- data/lib/clucumber/vendor/cl-interpol/alias.lisp +55 -0
- data/lib/clucumber/vendor/cl-interpol/cl-interpol.asd +56 -0
- data/lib/clucumber/vendor/cl-interpol/load.lisp +53 -0
- data/lib/clucumber/vendor/cl-interpol/packages.lisp +40 -0
- data/lib/clucumber/vendor/cl-interpol/read.lisp +716 -0
- data/lib/clucumber/vendor/cl-interpol/specials.lisp +113 -0
- data/lib/clucumber/vendor/cl-interpol/test/packages.lisp +34 -0
- data/lib/clucumber/vendor/cl-interpol/test/tests.lisp +128 -0
- data/lib/clucumber/vendor/cl-interpol/test.lisp +177 -0
- data/lib/clucumber/vendor/cl-interpol/test2.lisp +6254 -0
- data/lib/clucumber/vendor/cl-interpol/unicode.lisp +13912 -0
- data/lib/clucumber/vendor/cl-interpol/util.lisp +122 -0
- data/lib/clucumber/vendor/cl-ppcre/api.lisp +1262 -0
- data/lib/clucumber/vendor/cl-ppcre/charmap.lisp +152 -0
- data/lib/clucumber/vendor/cl-ppcre/charset.lisp +242 -0
- data/lib/clucumber/vendor/cl-ppcre/chartest.lisp +98 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-test.asd +34 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/packages.lisp +38 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/resolver.lisp +61 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode.asd +58 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre.asd +79 -0
- data/lib/clucumber/vendor/cl-ppcre/closures.lisp +471 -0
- data/lib/clucumber/vendor/cl-ppcre/convert.lisp +875 -0
- data/lib/clucumber/vendor/cl-ppcre/errors.lisp +84 -0
- data/lib/clucumber/vendor/cl-ppcre/lexer.lisp +737 -0
- data/lib/clucumber/vendor/cl-ppcre/lispworks-defsystem.lisp +57 -0
- data/lib/clucumber/vendor/cl-ppcre/load.lisp +67 -0
- data/lib/clucumber/vendor/cl-ppcre/optimize.lisp +578 -0
- data/lib/clucumber/vendor/cl-ppcre/packages.lisp +68 -0
- data/lib/clucumber/vendor/cl-ppcre/parser.lisp +319 -0
- data/lib/clucumber/vendor/cl-ppcre/ppcre-tests.lisp +269 -0
- data/lib/clucumber/vendor/cl-ppcre/regex-class-util.lisp +555 -0
- data/lib/clucumber/vendor/cl-ppcre/regex-class.lisp +259 -0
- data/lib/clucumber/vendor/cl-ppcre/repetition-closures.lisp +833 -0
- data/lib/clucumber/vendor/cl-ppcre/scanner.lisp +506 -0
- data/lib/clucumber/vendor/cl-ppcre/specials.lisp +172 -0
- data/lib/clucumber/vendor/cl-ppcre/test/packages.lisp +37 -0
- data/lib/clucumber/vendor/cl-ppcre/test/perl-tests.lisp +150 -0
- data/lib/clucumber/vendor/cl-ppcre/test/tests.lisp +159 -0
- data/lib/clucumber/vendor/cl-ppcre/test/unicode-tests.lisp +80 -0
- data/lib/clucumber/vendor/cl-ppcre/util.lisp +201 -0
- data/lib/clucumber/vendor/cl-unicode/alias.lisp +135 -0
- data/lib/clucumber/vendor/cl-unicode/api.lisp +412 -0
- data/lib/clucumber/vendor/cl-unicode/build/char-info.lisp +133 -0
- data/lib/clucumber/vendor/cl-unicode/build/dump.lisp +239 -0
- data/lib/clucumber/vendor/cl-unicode/build/read.lisp +280 -0
- data/lib/clucumber/vendor/cl-unicode/build/util.lisp +182 -0
- data/lib/clucumber/vendor/cl-unicode/cl-unicode.asd +90 -0
- data/lib/clucumber/vendor/cl-unicode/conditions.lisp +54 -0
- data/lib/clucumber/vendor/cl-unicode/derived.lisp +120 -0
- data/lib/clucumber/vendor/cl-unicode/hash-tables.lisp +20 -0
- data/lib/clucumber/vendor/cl-unicode/lists.lisp +10 -0
- data/lib/clucumber/vendor/cl-unicode/methods.lisp +15 -0
- data/lib/clucumber/vendor/cl-unicode/packages.lisp +76 -0
- data/lib/clucumber/vendor/cl-unicode/specials.lisp +135 -0
- data/lib/clucumber/vendor/cl-unicode/test/packages.lisp +34 -0
- data/lib/clucumber/vendor/cl-unicode/test/tests.lisp +150 -0
- data/lib/clucumber/vendor/cl-unicode/test-functions.lisp +94 -0
- data/lib/clucumber/vendor/cl-unicode/util.lisp +274 -0
- data/lib/clucumber/vendor/lift/compare/fiveam-tests.lisp +14 -0
- data/lib/clucumber/vendor/lift/compare/lift-tests.lisp +13 -0
- data/lib/clucumber/vendor/lift/compare/rt-tests.lisp +16 -0
- data/lib/clucumber/vendor/lift/compare/xlunit-tests.lisp +15 -0
- data/lib/clucumber/vendor/lift/dev/changes.lisp +61 -0
- data/lib/clucumber/vendor/lift/dev/config.lisp +354 -0
- data/lib/clucumber/vendor/lift/dev/copy-file.lisp +117 -0
- data/lib/clucumber/vendor/lift/dev/introspection.lisp +232 -0
- data/lib/clucumber/vendor/lift/dev/lift-interface.lisp +56 -0
- data/lib/clucumber/vendor/lift/dev/lift-notes.lisp +202 -0
- data/lib/clucumber/vendor/lift/dev/lift-randomized.lisp +45 -0
- data/lib/clucumber/vendor/lift/dev/lift.lisp +2383 -0
- data/lib/clucumber/vendor/lift/dev/macros.lisp +229 -0
- data/lib/clucumber/vendor/lift/dev/measuring.lisp +156 -0
- data/lib/clucumber/vendor/lift/dev/packages.lisp +161 -0
- data/lib/clucumber/vendor/lift/dev/port.lisp +151 -0
- data/lib/clucumber/vendor/lift/dev/prototypes.lisp +282 -0
- data/lib/clucumber/vendor/lift/dev/random-testing.lisp +124 -0
- data/lib/clucumber/vendor/lift/dev/report-locations.lisp +13 -0
- data/lib/clucumber/vendor/lift/dev/reports.lisp +916 -0
- data/lib/clucumber/vendor/lift/dev/utilities.lisp +242 -0
- data/lib/clucumber/vendor/lift/docs/package.lisp +6 -0
- data/lib/clucumber/vendor/lift/docs/setup.lisp +17 -0
- data/lib/clucumber/vendor/lift/examples/basic-examples.lisp +289 -0
- data/lib/clucumber/vendor/lift/examples/random-testing.lisp +32 -0
- data/lib/clucumber/vendor/lift/lift-documentation.asd +28 -0
- data/lib/clucumber/vendor/lift/lift-test.asd +35 -0
- data/lib/clucumber/vendor/lift/lift.asd +77 -0
- data/lib/clucumber/vendor/lift/test/equality-tests.lisp +40 -0
- data/lib/clucumber/vendor/lift/test/finding-tests.lisp +32 -0
- data/lib/clucumber/vendor/lift/test/lift-test.lisp +783 -0
- data/lib/clucumber/vendor/lift/test/order-of-operations.lisp +54 -0
- data/lib/clucumber/vendor/lift/test/packages.lisp +17 -0
- data/lib/clucumber/vendor/lift/test/test-config-files.lisp +20 -0
- data/lib/clucumber/vendor/lift/test/test-dynamic-variables.lisp +57 -0
- data/lib/clucumber/vendor/lift/test/test-maximum-problems.lisp +74 -0
- data/lib/clucumber/vendor/lift/test/test-prototypes.lisp +278 -0
- data/lib/clucumber/vendor/lift/test/test-timeout.lisp +37 -0
- data/lib/clucumber/vendor/lift/test/tests-in-progress.lisp +62 -0
- data/lib/clucumber/vendor/lift/test/testsuite-expects.lisp +60 -0
- data/lib/clucumber/vendor/lift/timeout/package.lisp +13 -0
- data/lib/clucumber/vendor/lift/timeout/with-timeout.lisp +123 -0
- data/lib/clucumber/vendor/lift/website/stuff/Temp.lisp +34 -0
- data/lib/clucumber/vendor/st-json/st-json.asd +3 -0
- data/lib/clucumber/vendor/st-json/st-json.lisp +310 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/backtrace.lisp +127 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/fallback.lisp +10 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/map-backtrace.lisp +103 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/mucking.lisp +75 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/packages.lisp +13 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/utilities.lisp +104 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/packages.lisp +5 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/test-setup.lisp +4 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/tests.lisp +16 -0
- data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace-test.asd +22 -0
- data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace.asd +35 -0
- data/lib/clucumber/vendor/usocket/backend/allegro.lisp +199 -0
- data/lib/clucumber/vendor/usocket/backend/armedbear.lisp +491 -0
- data/lib/clucumber/vendor/usocket/backend/clisp.lisp +260 -0
- data/lib/clucumber/vendor/usocket/backend/cmucl.lisp +266 -0
- data/lib/clucumber/vendor/usocket/backend/lispworks.lisp +741 -0
- data/lib/clucumber/vendor/usocket/backend/mcl.lisp +369 -0
- data/lib/clucumber/vendor/usocket/backend/openmcl.lisp +206 -0
- data/lib/clucumber/vendor/usocket/backend/sbcl.lisp +424 -0
- data/lib/clucumber/vendor/usocket/backend/scl.lisp +261 -0
- data/lib/clucumber/vendor/usocket/condition.lisp +227 -0
- data/lib/clucumber/vendor/usocket/package.lisp +82 -0
- data/lib/clucumber/vendor/usocket/server.lisp +45 -0
- data/lib/clucumber/vendor/usocket/test/package.lisp +13 -0
- data/lib/clucumber/vendor/usocket/test/test-usocket.lisp +166 -0
- data/lib/clucumber/vendor/usocket/usocket-test.asd +26 -0
- data/lib/clucumber/vendor/usocket/usocket.asd +37 -0
- data/lib/clucumber/vendor/usocket/usocket.lisp +542 -0
- data/lib/clucumber/vendor/usocket/vendor/kqueue.lisp +1 -0
- data/lib/clucumber/vendor/usocket/vendor/split-sequence.lisp +245 -0
- data/lib/clucumber.rb +29 -7
- metadata +151 -5
@@ -0,0 +1,737 @@
|
|
1
|
+
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
2
|
+
;;; $Header: /usr/local/cvsrep/cl-ppcre/lexer.lisp,v 1.35 2009/09/17 19:17:31 edi Exp $
|
3
|
+
|
4
|
+
;;; The lexer's responsibility is to convert the regex string into a
|
5
|
+
;;; sequence of tokens which are in turn consumed by the parser.
|
6
|
+
;;;
|
7
|
+
;;; The lexer is aware of Perl's 'extended mode' and it also 'knows'
|
8
|
+
;;; (with a little help from the parser) how many register groups it
|
9
|
+
;;; has opened so far. (The latter is necessary for interpreting
|
10
|
+
;;; strings like "\\10" correctly.)
|
11
|
+
|
12
|
+
;;; Copyright (c) 2002-2009, Dr. Edmund Weitz. All rights reserved.
|
13
|
+
|
14
|
+
;;; Redistribution and use in source and binary forms, with or without
|
15
|
+
;;; modification, are permitted provided that the following conditions
|
16
|
+
;;; are met:
|
17
|
+
|
18
|
+
;;; * Redistributions of source code must retain the above copyright
|
19
|
+
;;; notice, this list of conditions and the following disclaimer.
|
20
|
+
|
21
|
+
;;; * Redistributions in binary form must reproduce the above
|
22
|
+
;;; copyright notice, this list of conditions and the following
|
23
|
+
;;; disclaimer in the documentation and/or other materials
|
24
|
+
;;; provided with the distribution.
|
25
|
+
|
26
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
27
|
+
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
28
|
+
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
29
|
+
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
30
|
+
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
31
|
+
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
32
|
+
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
33
|
+
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
34
|
+
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
35
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
36
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
37
|
+
|
38
|
+
(in-package :cl-ppcre)
|
39
|
+
|
40
|
+
(declaim (inline map-char-to-special-class))
|
41
|
+
(defun map-char-to-special-char-class (chr)
|
42
|
+
(declare #.*standard-optimize-settings*)
|
43
|
+
"Maps escaped characters like \"\\d\" to the tokens which represent
|
44
|
+
their associated character classes."
|
45
|
+
(case chr
|
46
|
+
((#\d)
|
47
|
+
:digit-class)
|
48
|
+
((#\D)
|
49
|
+
:non-digit-class)
|
50
|
+
((#\w)
|
51
|
+
:word-char-class)
|
52
|
+
((#\W)
|
53
|
+
:non-word-char-class)
|
54
|
+
((#\s)
|
55
|
+
:whitespace-char-class)
|
56
|
+
((#\S)
|
57
|
+
:non-whitespace-char-class)))
|
58
|
+
|
59
|
+
(defstruct (lexer (:constructor make-lexer-internal))
|
60
|
+
"LEXER structures are used to hold the regex string which is
|
61
|
+
currently lexed and to keep track of the lexer's state."
|
62
|
+
(str "" :type string :read-only t)
|
63
|
+
(len 0 :type fixnum :read-only t)
|
64
|
+
(reg 0 :type fixnum)
|
65
|
+
(pos 0 :type fixnum)
|
66
|
+
(last-pos nil :type list))
|
67
|
+
|
68
|
+
(defun make-lexer (string)
|
69
|
+
(declare (inline make-lexer-internal)
|
70
|
+
#-:genera (string string))
|
71
|
+
(make-lexer-internal :str (maybe-coerce-to-simple-string string)
|
72
|
+
:len (length string)))
|
73
|
+
|
74
|
+
(declaim (inline end-of-string-p))
|
75
|
+
(defun end-of-string-p (lexer)
|
76
|
+
(declare #.*standard-optimize-settings*)
|
77
|
+
"Tests whether we're at the end of the regex string."
|
78
|
+
(<= (lexer-len lexer)
|
79
|
+
(lexer-pos lexer)))
|
80
|
+
|
81
|
+
(declaim (inline looking-at-p))
|
82
|
+
(defun looking-at-p (lexer chr)
|
83
|
+
(declare #.*standard-optimize-settings*)
|
84
|
+
"Tests whether the next character the lexer would see is CHR.
|
85
|
+
Does not respect extended mode."
|
86
|
+
(and (not (end-of-string-p lexer))
|
87
|
+
(char= (schar (lexer-str lexer) (lexer-pos lexer))
|
88
|
+
chr)))
|
89
|
+
|
90
|
+
(declaim (inline next-char-non-extended))
|
91
|
+
(defun next-char-non-extended (lexer)
|
92
|
+
(declare #.*standard-optimize-settings*)
|
93
|
+
"Returns the next character which is to be examined and updates the
|
94
|
+
POS slot. Does not respect extended mode."
|
95
|
+
(cond ((end-of-string-p lexer) nil)
|
96
|
+
(t (prog1
|
97
|
+
(schar (lexer-str lexer) (lexer-pos lexer))
|
98
|
+
(incf (lexer-pos lexer))))))
|
99
|
+
|
100
|
+
(defun next-char (lexer)
|
101
|
+
(declare #.*standard-optimize-settings*)
|
102
|
+
"Returns the next character which is to be examined and updates the
|
103
|
+
POS slot. Respects extended mode, i.e. whitespace, comments, and also
|
104
|
+
nested comments are skipped if applicable."
|
105
|
+
(let ((next-char (next-char-non-extended lexer))
|
106
|
+
last-loop-pos)
|
107
|
+
(loop
|
108
|
+
;; remember where we started
|
109
|
+
(setq last-loop-pos (lexer-pos lexer))
|
110
|
+
;; first we look for nested comments like (?#foo)
|
111
|
+
(when (and next-char
|
112
|
+
(char= next-char #\()
|
113
|
+
(looking-at-p lexer #\?))
|
114
|
+
(incf (lexer-pos lexer))
|
115
|
+
(cond ((looking-at-p lexer #\#)
|
116
|
+
;; must be a nested comment - so we have to search for
|
117
|
+
;; the closing parenthesis
|
118
|
+
(let ((error-pos (- (lexer-pos lexer) 2)))
|
119
|
+
(unless
|
120
|
+
;; loop 'til ')' or end of regex string and
|
121
|
+
;; return NIL if ')' wasn't encountered
|
122
|
+
(loop for skip-char = next-char
|
123
|
+
then (next-char-non-extended lexer)
|
124
|
+
while (and skip-char
|
125
|
+
(char/= skip-char #\)))
|
126
|
+
finally (return skip-char))
|
127
|
+
(signal-syntax-error* error-pos "Comment group not closed.")))
|
128
|
+
(setq next-char (next-char-non-extended lexer)))
|
129
|
+
(t
|
130
|
+
;; undo effect of previous INCF if we didn't see a #
|
131
|
+
(decf (lexer-pos lexer)))))
|
132
|
+
(when *extended-mode-p*
|
133
|
+
;; now - if we're in extended mode - we skip whitespace and
|
134
|
+
;; comments; repeat the following loop while we look at
|
135
|
+
;; whitespace or #\#
|
136
|
+
(loop while (and next-char
|
137
|
+
(or (char= next-char #\#)
|
138
|
+
(whitespacep next-char)))
|
139
|
+
do (setq next-char
|
140
|
+
(if (char= next-char #\#)
|
141
|
+
;; if we saw a comment marker skip until
|
142
|
+
;; we're behind #\Newline...
|
143
|
+
(loop for skip-char = next-char
|
144
|
+
then (next-char-non-extended lexer)
|
145
|
+
while (and skip-char
|
146
|
+
(char/= skip-char #\Newline))
|
147
|
+
finally (return (next-char-non-extended lexer)))
|
148
|
+
;; ...otherwise (whitespace) skip until we
|
149
|
+
;; see the next non-whitespace character
|
150
|
+
(loop for skip-char = next-char
|
151
|
+
then (next-char-non-extended lexer)
|
152
|
+
while (and skip-char
|
153
|
+
(whitespacep skip-char))
|
154
|
+
finally (return skip-char))))))
|
155
|
+
;; if the position has moved we have to repeat our tests
|
156
|
+
;; because of cases like /^a (?#xxx) (?#yyy) {3}c/x which
|
157
|
+
;; would be equivalent to /^a{3}c/ in Perl
|
158
|
+
(unless (> (lexer-pos lexer) last-loop-pos)
|
159
|
+
(return next-char)))))
|
160
|
+
|
161
|
+
(declaim (inline fail))
|
162
|
+
(defun fail (lexer)
|
163
|
+
(declare #.*standard-optimize-settings*)
|
164
|
+
"Moves (LEXER-POS LEXER) back to the last position stored in
|
165
|
+
\(LEXER-LAST-POS LEXER) and pops the LAST-POS stack."
|
166
|
+
(unless (lexer-last-pos lexer)
|
167
|
+
(signal-syntax-error "LAST-POS stack of LEXER ~A is empty." lexer))
|
168
|
+
(setf (lexer-pos lexer) (pop (lexer-last-pos lexer)))
|
169
|
+
nil)
|
170
|
+
|
171
|
+
(defun get-number (lexer &key (radix 10) max-length no-whitespace-p)
|
172
|
+
(declare #.*standard-optimize-settings*)
|
173
|
+
"Read and consume the number the lexer is currently looking at and
|
174
|
+
return it. Returns NIL if no number could be identified.
|
175
|
+
RADIX is used as in PARSE-INTEGER. If MAX-LENGTH is not NIL we'll read
|
176
|
+
at most the next MAX-LENGTH characters. If NO-WHITESPACE-P is not NIL
|
177
|
+
we don't tolerate whitespace in front of the number."
|
178
|
+
(when (or (end-of-string-p lexer)
|
179
|
+
(and no-whitespace-p
|
180
|
+
(whitespacep (schar (lexer-str lexer) (lexer-pos lexer)))))
|
181
|
+
(return-from get-number nil))
|
182
|
+
(multiple-value-bind (integer new-pos)
|
183
|
+
(parse-integer (lexer-str lexer)
|
184
|
+
:start (lexer-pos lexer)
|
185
|
+
:end (if max-length
|
186
|
+
(let ((end-pos (+ (lexer-pos lexer)
|
187
|
+
(the fixnum max-length)))
|
188
|
+
(lexer-len (lexer-len lexer)))
|
189
|
+
(if (< end-pos lexer-len)
|
190
|
+
end-pos
|
191
|
+
lexer-len))
|
192
|
+
(lexer-len lexer))
|
193
|
+
:radix radix
|
194
|
+
:junk-allowed t)
|
195
|
+
(cond ((and integer (>= (the fixnum integer) 0))
|
196
|
+
(setf (lexer-pos lexer) new-pos)
|
197
|
+
integer)
|
198
|
+
(t nil))))
|
199
|
+
|
200
|
+
(declaim (inline try-number))
|
201
|
+
(defun try-number (lexer &key (radix 10) max-length no-whitespace-p)
|
202
|
+
(declare #.*standard-optimize-settings*)
|
203
|
+
"Like GET-NUMBER but won't consume anything if no number is seen."
|
204
|
+
;; remember current position
|
205
|
+
(push (lexer-pos lexer) (lexer-last-pos lexer))
|
206
|
+
(let ((number (get-number lexer
|
207
|
+
:radix radix
|
208
|
+
:max-length max-length
|
209
|
+
:no-whitespace-p no-whitespace-p)))
|
210
|
+
(or number (fail lexer))))
|
211
|
+
|
212
|
+
(declaim (inline make-char-from-code))
|
213
|
+
(defun make-char-from-code (number error-pos)
|
214
|
+
(declare #.*standard-optimize-settings*)
|
215
|
+
"Create character from char-code NUMBER. NUMBER can be NIL
|
216
|
+
which is interpreted as 0. ERROR-POS is the position where
|
217
|
+
the corresponding number started within the regex string."
|
218
|
+
;; only look at rightmost eight bits in compliance with Perl
|
219
|
+
(let ((code (logand #o377 (the fixnum (or number 0)))))
|
220
|
+
(or (and (< code char-code-limit)
|
221
|
+
(code-char code))
|
222
|
+
(signal-syntax-error* error-pos "No character for hex-code ~X." number))))
|
223
|
+
|
224
|
+
(defun unescape-char (lexer)
|
225
|
+
(declare #.*standard-optimize-settings*)
|
226
|
+
"Convert the characters\(s) following a backslash into a token
|
227
|
+
which is returned. This function is to be called when the backslash
|
228
|
+
has already been consumed. Special character classes like \\W are
|
229
|
+
handled elsewhere."
|
230
|
+
(when (end-of-string-p lexer)
|
231
|
+
(signal-syntax-error "String ends with backslash."))
|
232
|
+
(let ((chr (next-char-non-extended lexer)))
|
233
|
+
(case chr
|
234
|
+
((#\E)
|
235
|
+
;; if \Q quoting is on this is ignored, otherwise it's just an
|
236
|
+
;; #\E
|
237
|
+
(if *allow-quoting*
|
238
|
+
:void
|
239
|
+
#\E))
|
240
|
+
((#\c)
|
241
|
+
;; \cx means control-x in Perl
|
242
|
+
(let ((next-char (next-char-non-extended lexer)))
|
243
|
+
(unless next-char
|
244
|
+
(signal-syntax-error* (lexer-pos lexer) "Character missing after '\\c' at position ~A."))
|
245
|
+
(code-char (logxor #x40 (char-code (char-upcase next-char))))))
|
246
|
+
((#\x)
|
247
|
+
;; \x should be followed by a hexadecimal char code,
|
248
|
+
;; two digits or less
|
249
|
+
(let* ((error-pos (lexer-pos lexer))
|
250
|
+
(number (get-number lexer :radix 16 :max-length 2 :no-whitespace-p t)))
|
251
|
+
;; note that it is OK if \x is followed by zero digits
|
252
|
+
(make-char-from-code number error-pos)))
|
253
|
+
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
|
254
|
+
;; \x should be followed by an octal char code,
|
255
|
+
;; three digits or less
|
256
|
+
(let* ((error-pos (decf (lexer-pos lexer)))
|
257
|
+
(number (get-number lexer :radix 8 :max-length 3)))
|
258
|
+
(make-char-from-code number error-pos)))
|
259
|
+
;; the following five character names are 'semi-standard'
|
260
|
+
;; according to the CLHS but I'm not aware of any implementation
|
261
|
+
;; that doesn't implement them
|
262
|
+
((#\t)
|
263
|
+
#\Tab)
|
264
|
+
((#\n)
|
265
|
+
#\Newline)
|
266
|
+
((#\r)
|
267
|
+
#\Return)
|
268
|
+
((#\f)
|
269
|
+
#\Page)
|
270
|
+
((#\b)
|
271
|
+
#\Backspace)
|
272
|
+
((#\a)
|
273
|
+
(code-char 7)) ; ASCII bell
|
274
|
+
((#\e)
|
275
|
+
(code-char 27)) ; ASCII escape
|
276
|
+
(otherwise
|
277
|
+
;; all other characters aren't affected by a backslash
|
278
|
+
chr))))
|
279
|
+
|
280
|
+
(defun read-char-property (lexer first-char)
|
281
|
+
(declare #.*standard-optimize-settings*)
|
282
|
+
(unless (eql (next-char-non-extended lexer) #\{)
|
283
|
+
(signal-syntax-error* (lexer-pos lexer) "Expected left brace after \\~A." first-char))
|
284
|
+
(let ((name (with-output-to-string (out nil :element-type
|
285
|
+
#+:lispworks 'lw:simple-char #-:lispworks 'character)
|
286
|
+
(loop
|
287
|
+
(let ((char (or (next-char-non-extended lexer)
|
288
|
+
(signal-syntax-error "Unexpected EOF after \\~A{." first-char))))
|
289
|
+
(when (char= char #\})
|
290
|
+
(return))
|
291
|
+
(write-char char out))))))
|
292
|
+
(list (if (char= first-char #\p) :property :inverted-property)
|
293
|
+
;; we must reverse here because of what PARSE-STRING does
|
294
|
+
(nreverse name))))
|
295
|
+
|
296
|
+
(defun collect-char-class (lexer)
|
297
|
+
"Reads and consumes characters from regex string until a right
|
298
|
+
bracket is seen. Assembles them into a list \(which is returned) of
|
299
|
+
characters, character ranges, like \(:RANGE #\\A #\\E) for a-e, and
|
300
|
+
tokens representing special character classes."
|
301
|
+
(declare #.*standard-optimize-settings*)
|
302
|
+
(let ((start-pos (lexer-pos lexer)) ; remember start for error message
|
303
|
+
hyphen-seen
|
304
|
+
last-char
|
305
|
+
list)
|
306
|
+
(flet ((handle-char (c)
|
307
|
+
"Do the right thing with character C depending on whether
|
308
|
+
we're inside a range or not."
|
309
|
+
(cond ((and hyphen-seen last-char)
|
310
|
+
(setf (car list) (list :range last-char c)
|
311
|
+
last-char nil))
|
312
|
+
(t
|
313
|
+
(push c list)
|
314
|
+
(setq last-char c)))
|
315
|
+
(setq hyphen-seen nil)))
|
316
|
+
(loop for first = t then nil
|
317
|
+
for c = (next-char-non-extended lexer)
|
318
|
+
;; leave loop if at end of string
|
319
|
+
while c
|
320
|
+
do (cond
|
321
|
+
((char= c #\\)
|
322
|
+
;; we've seen a backslash
|
323
|
+
(let ((next-char (next-char-non-extended lexer)))
|
324
|
+
(case next-char
|
325
|
+
((#\d #\D #\w #\W #\s #\S)
|
326
|
+
;; a special character class
|
327
|
+
(push (map-char-to-special-char-class next-char) list)
|
328
|
+
;; if the last character was a hyphen
|
329
|
+
;; just collect it literally
|
330
|
+
(when hyphen-seen
|
331
|
+
(push #\- list))
|
332
|
+
;; if the next character is a hyphen do the same
|
333
|
+
(when (looking-at-p lexer #\-)
|
334
|
+
(push #\- list)
|
335
|
+
(incf (lexer-pos lexer)))
|
336
|
+
(setq hyphen-seen nil))
|
337
|
+
((#\P #\p)
|
338
|
+
;; maybe a character property
|
339
|
+
(cond ((null *property-resolver*)
|
340
|
+
(handle-char next-char))
|
341
|
+
(t
|
342
|
+
(push (read-char-property lexer next-char) list)
|
343
|
+
;; if the last character was a hyphen
|
344
|
+
;; just collect it literally
|
345
|
+
(when hyphen-seen
|
346
|
+
(push #\- list))
|
347
|
+
;; if the next character is a hyphen do the same
|
348
|
+
(when (looking-at-p lexer #\-)
|
349
|
+
(push #\- list)
|
350
|
+
(incf (lexer-pos lexer)))
|
351
|
+
(setq hyphen-seen nil))))
|
352
|
+
((#\E)
|
353
|
+
;; if \Q quoting is on we ignore \E,
|
354
|
+
;; otherwise it's just a plain #\E
|
355
|
+
(unless *allow-quoting*
|
356
|
+
(handle-char #\E)))
|
357
|
+
(otherwise
|
358
|
+
;; otherwise unescape the following character(s)
|
359
|
+
(decf (lexer-pos lexer))
|
360
|
+
(handle-char (unescape-char lexer))))))
|
361
|
+
(first
|
362
|
+
;; the first character must not be a right bracket
|
363
|
+
;; and isn't treated specially if it's a hyphen
|
364
|
+
(handle-char c))
|
365
|
+
((char= c #\])
|
366
|
+
;; end of character class
|
367
|
+
;; make sure we collect a pending hyphen
|
368
|
+
(when hyphen-seen
|
369
|
+
(setq hyphen-seen nil)
|
370
|
+
(handle-char #\-))
|
371
|
+
;; reverse the list to preserve the order intended
|
372
|
+
;; by the author of the regex string
|
373
|
+
(return-from collect-char-class (nreverse list)))
|
374
|
+
((and (char= c #\-)
|
375
|
+
last-char
|
376
|
+
(not hyphen-seen))
|
377
|
+
;; if the last character was 'just a character'
|
378
|
+
;; we expect to be in the middle of a range
|
379
|
+
(setq hyphen-seen t))
|
380
|
+
((char= c #\-)
|
381
|
+
;; otherwise this is just an ordinary hyphen
|
382
|
+
(handle-char #\-))
|
383
|
+
(t
|
384
|
+
;; default case - just collect the character
|
385
|
+
(handle-char c))))
|
386
|
+
;; we can only exit the loop normally if we've reached the end
|
387
|
+
;; of the regex string without seeing a right bracket
|
388
|
+
(signal-syntax-error* start-pos "Missing right bracket to close character class."))))
|
389
|
+
|
390
|
+
(defun maybe-parse-flags (lexer)
|
391
|
+
(declare #.*standard-optimize-settings*)
|
392
|
+
"Reads a sequence of modifiers \(including #\\- to reverse their
|
393
|
+
meaning) and returns a corresponding list of \"flag\" tokens. The
|
394
|
+
\"x\" modifier is treated specially in that it dynamically modifies
|
395
|
+
the behaviour of the lexer itself via the special variable
|
396
|
+
*EXTENDED-MODE-P*."
|
397
|
+
(prog1
|
398
|
+
(loop with set = t
|
399
|
+
for chr = (next-char-non-extended lexer)
|
400
|
+
unless chr
|
401
|
+
do (signal-syntax-error "Unexpected end of string.")
|
402
|
+
while (find chr "-imsx" :test #'char=)
|
403
|
+
;; the first #\- will invert the meaning of all modifiers
|
404
|
+
;; following it
|
405
|
+
if (char= chr #\-)
|
406
|
+
do (setq set nil)
|
407
|
+
else if (char= chr #\x)
|
408
|
+
do (setq *extended-mode-p* set)
|
409
|
+
else collect (if set
|
410
|
+
(case chr
|
411
|
+
((#\i)
|
412
|
+
:case-insensitive-p)
|
413
|
+
((#\m)
|
414
|
+
:multi-line-mode-p)
|
415
|
+
((#\s)
|
416
|
+
:single-line-mode-p))
|
417
|
+
(case chr
|
418
|
+
((#\i)
|
419
|
+
:case-sensitive-p)
|
420
|
+
((#\m)
|
421
|
+
:not-multi-line-mode-p)
|
422
|
+
((#\s)
|
423
|
+
:not-single-line-mode-p))))
|
424
|
+
(decf (lexer-pos lexer))))
|
425
|
+
|
426
|
+
(defun get-quantifier (lexer)
|
427
|
+
(declare #.*standard-optimize-settings*)
|
428
|
+
"Returns a list of two values (min max) if what the lexer is looking
|
429
|
+
at can be interpreted as a quantifier. Otherwise returns NIL and
|
430
|
+
resets the lexer to its old position."
|
431
|
+
;; remember starting position for FAIL and UNGET-TOKEN functions
|
432
|
+
(push (lexer-pos lexer) (lexer-last-pos lexer))
|
433
|
+
(let ((next-char (next-char lexer)))
|
434
|
+
(case next-char
|
435
|
+
((#\*)
|
436
|
+
;; * (Kleene star): match 0 or more times
|
437
|
+
'(0 nil))
|
438
|
+
((#\+)
|
439
|
+
;; +: match 1 or more times
|
440
|
+
'(1 nil))
|
441
|
+
((#\?)
|
442
|
+
;; ?: match 0 or 1 times
|
443
|
+
'(0 1))
|
444
|
+
((#\{)
|
445
|
+
;; one of
|
446
|
+
;; {n}: match exactly n times
|
447
|
+
;; {n,}: match at least n times
|
448
|
+
;; {n,m}: match at least n but not more than m times
|
449
|
+
;; note that anything not matching one of these patterns will
|
450
|
+
;; be interpreted literally - even whitespace isn't allowed
|
451
|
+
(let ((num1 (get-number lexer :no-whitespace-p t)))
|
452
|
+
(if num1
|
453
|
+
(let ((next-char (next-char-non-extended lexer)))
|
454
|
+
(case next-char
|
455
|
+
((#\,)
|
456
|
+
(let* ((num2 (get-number lexer :no-whitespace-p t))
|
457
|
+
(next-char (next-char-non-extended lexer)))
|
458
|
+
(case next-char
|
459
|
+
((#\})
|
460
|
+
;; this is the case {n,} (NUM2 is NIL) or {n,m}
|
461
|
+
(list num1 num2))
|
462
|
+
(otherwise
|
463
|
+
(fail lexer)))))
|
464
|
+
((#\})
|
465
|
+
;; this is the case {n}
|
466
|
+
(list num1 num1))
|
467
|
+
(otherwise
|
468
|
+
(fail lexer))))
|
469
|
+
;; no number following left curly brace, so we treat it
|
470
|
+
;; like a normal character
|
471
|
+
(fail lexer))))
|
472
|
+
;; cannot be a quantifier
|
473
|
+
(otherwise
|
474
|
+
(fail lexer)))))
|
475
|
+
|
476
|
+
(defun parse-register-name-aux (lexer)
|
477
|
+
"Reads and returns the name in a named register group. It is
|
478
|
+
assumed that the starting #\< character has already been read. The
|
479
|
+
closing #\> will also be consumed."
|
480
|
+
;; we have to look for an ending > character now
|
481
|
+
(let ((end-name (position #\>
|
482
|
+
(lexer-str lexer)
|
483
|
+
:start (lexer-pos lexer)
|
484
|
+
:test #'char=)))
|
485
|
+
(unless end-name
|
486
|
+
;; there has to be > somewhere, syntax error otherwise
|
487
|
+
(signal-syntax-error* (1- (lexer-pos lexer)) "Opening #\< in named group has no closing #\>."))
|
488
|
+
(let ((name (subseq (lexer-str lexer)
|
489
|
+
(lexer-pos lexer)
|
490
|
+
end-name)))
|
491
|
+
(unless (every #'(lambda (char)
|
492
|
+
(or (alphanumericp char)
|
493
|
+
(char= #\- char)))
|
494
|
+
name)
|
495
|
+
;; register name can contain only alphanumeric characters or #\-
|
496
|
+
(signal-syntax-error* (lexer-pos lexer) "Invalid character in named register group."))
|
497
|
+
;; advance lexer beyond "<name>" part
|
498
|
+
(setf (lexer-pos lexer) (1+ end-name))
|
499
|
+
name)))
|
500
|
+
|
501
|
+
(defun get-token (lexer)
|
502
|
+
(declare #.*standard-optimize-settings*)
|
503
|
+
"Returns and consumes the next token from the regex string \(or NIL)."
|
504
|
+
;; remember starting position for UNGET-TOKEN function
|
505
|
+
(push (lexer-pos lexer)
|
506
|
+
(lexer-last-pos lexer))
|
507
|
+
(let ((next-char (next-char lexer)))
|
508
|
+
(cond (next-char
|
509
|
+
(case next-char
|
510
|
+
;; the easy cases first - the following six characters
|
511
|
+
;; always have a special meaning and get translated
|
512
|
+
;; into tokens immediately
|
513
|
+
((#\))
|
514
|
+
:close-paren)
|
515
|
+
((#\|)
|
516
|
+
:vertical-bar)
|
517
|
+
((#\?)
|
518
|
+
:question-mark)
|
519
|
+
((#\.)
|
520
|
+
:everything)
|
521
|
+
((#\^)
|
522
|
+
:start-anchor)
|
523
|
+
((#\$)
|
524
|
+
:end-anchor)
|
525
|
+
((#\+ #\*)
|
526
|
+
;; quantifiers will always be consumend by
|
527
|
+
;; GET-QUANTIFIER, they must not appear here
|
528
|
+
(signal-syntax-error* (1- (lexer-pos lexer)) "Quantifier '~A' not allowed." next-char))
|
529
|
+
((#\{)
|
530
|
+
;; left brace isn't a special character in it's own
|
531
|
+
;; right but we must check if what follows might
|
532
|
+
;; look like a quantifier
|
533
|
+
(let ((this-pos (lexer-pos lexer))
|
534
|
+
(this-last-pos (lexer-last-pos lexer)))
|
535
|
+
(unget-token lexer)
|
536
|
+
(when (get-quantifier lexer)
|
537
|
+
(signal-syntax-error* (car this-last-pos)
|
538
|
+
"Quantifier '~A' not allowed."
|
539
|
+
(subseq (lexer-str lexer)
|
540
|
+
(car this-last-pos)
|
541
|
+
(lexer-pos lexer))))
|
542
|
+
(setf (lexer-pos lexer) this-pos
|
543
|
+
(lexer-last-pos lexer) this-last-pos)
|
544
|
+
next-char))
|
545
|
+
((#\[)
|
546
|
+
;; left bracket always starts a character class
|
547
|
+
(cons (cond ((looking-at-p lexer #\^)
|
548
|
+
(incf (lexer-pos lexer))
|
549
|
+
:inverted-char-class)
|
550
|
+
(t
|
551
|
+
:char-class))
|
552
|
+
(collect-char-class lexer)))
|
553
|
+
((#\\)
|
554
|
+
;; backslash might mean different things so we have
|
555
|
+
;; to peek one char ahead:
|
556
|
+
(let ((next-char (next-char-non-extended lexer)))
|
557
|
+
(case next-char
|
558
|
+
((#\A)
|
559
|
+
:modeless-start-anchor)
|
560
|
+
((#\Z)
|
561
|
+
:modeless-end-anchor)
|
562
|
+
((#\z)
|
563
|
+
:modeless-end-anchor-no-newline)
|
564
|
+
((#\b)
|
565
|
+
:word-boundary)
|
566
|
+
((#\B)
|
567
|
+
:non-word-boundary)
|
568
|
+
((#\k)
|
569
|
+
(cond ((and *allow-named-registers*
|
570
|
+
(looking-at-p lexer #\<))
|
571
|
+
;; back-referencing a named register
|
572
|
+
(incf (lexer-pos lexer))
|
573
|
+
(list :back-reference
|
574
|
+
(nreverse (parse-register-name-aux lexer))))
|
575
|
+
(t
|
576
|
+
;; false alarm, just unescape \k
|
577
|
+
#\k)))
|
578
|
+
((#\d #\D #\w #\W #\s #\S)
|
579
|
+
;; these will be treated like character classes
|
580
|
+
(map-char-to-special-char-class next-char))
|
581
|
+
((#\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
|
582
|
+
;; uh, a digit...
|
583
|
+
(let* ((old-pos (decf (lexer-pos lexer)))
|
584
|
+
;; ...so let's get the whole number first
|
585
|
+
(backref-number (get-number lexer)))
|
586
|
+
(declare (fixnum backref-number))
|
587
|
+
(cond ((and (> backref-number (lexer-reg lexer))
|
588
|
+
(<= 10 backref-number))
|
589
|
+
;; \10 and higher are treated as octal
|
590
|
+
;; character codes if we haven't
|
591
|
+
;; opened that much register groups
|
592
|
+
;; yet
|
593
|
+
(setf (lexer-pos lexer) old-pos)
|
594
|
+
;; re-read the number from the old
|
595
|
+
;; position and convert it to its
|
596
|
+
;; corresponding character
|
597
|
+
(make-char-from-code (get-number lexer :radix 8 :max-length 3)
|
598
|
+
old-pos))
|
599
|
+
(t
|
600
|
+
;; otherwise this must refer to a
|
601
|
+
;; backreference
|
602
|
+
(list :back-reference backref-number)))))
|
603
|
+
((#\0)
|
604
|
+
;; this always means an octal character code
|
605
|
+
;; (at most three digits)
|
606
|
+
(let ((old-pos (decf (lexer-pos lexer))))
|
607
|
+
(make-char-from-code (get-number lexer :radix 8 :max-length 3)
|
608
|
+
old-pos)))
|
609
|
+
((#\P #\p)
|
610
|
+
;; might be a named property
|
611
|
+
(cond (*property-resolver* (read-char-property lexer next-char))
|
612
|
+
(t next-char)))
|
613
|
+
(otherwise
|
614
|
+
;; in all other cases just unescape the
|
615
|
+
;; character
|
616
|
+
(decf (lexer-pos lexer))
|
617
|
+
(unescape-char lexer)))))
|
618
|
+
((#\()
|
619
|
+
;; an open parenthesis might mean different things
|
620
|
+
;; depending on what follows...
|
621
|
+
(cond ((looking-at-p lexer #\?)
|
622
|
+
;; this is the case '(?' (and probably more behind)
|
623
|
+
(incf (lexer-pos lexer))
|
624
|
+
;; we have to check for modifiers first
|
625
|
+
;; because a colon might follow
|
626
|
+
(let* ((flags (maybe-parse-flags lexer))
|
627
|
+
(next-char (next-char-non-extended lexer)))
|
628
|
+
;; modifiers are only allowed if a colon
|
629
|
+
;; or a closing parenthesis are following
|
630
|
+
(when (and flags
|
631
|
+
(not (find next-char ":)" :test #'char=)))
|
632
|
+
(signal-syntax-error* (car (lexer-last-pos lexer))
|
633
|
+
"Sequence '~A' not recognized."
|
634
|
+
(subseq (lexer-str lexer)
|
635
|
+
(car (lexer-last-pos lexer))
|
636
|
+
(lexer-pos lexer))))
|
637
|
+
(case next-char
|
638
|
+
((nil)
|
639
|
+
;; syntax error
|
640
|
+
(signal-syntax-error "End of string following '(?'."))
|
641
|
+
((#\))
|
642
|
+
;; an empty group except for the flags
|
643
|
+
;; (if there are any)
|
644
|
+
(or (and flags
|
645
|
+
(cons :flags flags))
|
646
|
+
:void))
|
647
|
+
((#\()
|
648
|
+
;; branch
|
649
|
+
:open-paren-paren)
|
650
|
+
((#\>)
|
651
|
+
;; standalone
|
652
|
+
:open-paren-greater)
|
653
|
+
((#\=)
|
654
|
+
;; positive look-ahead
|
655
|
+
:open-paren-equal)
|
656
|
+
((#\!)
|
657
|
+
;; negative look-ahead
|
658
|
+
:open-paren-exclamation)
|
659
|
+
((#\:)
|
660
|
+
;; non-capturing group - return flags as
|
661
|
+
;; second value
|
662
|
+
(values :open-paren-colon flags))
|
663
|
+
((#\<)
|
664
|
+
;; might be a look-behind assertion or a named group, so
|
665
|
+
;; check next character
|
666
|
+
(let ((next-char (next-char-non-extended lexer)))
|
667
|
+
(if (alpha-char-p next-char)
|
668
|
+
(progn
|
669
|
+
;; we have encountered a named group
|
670
|
+
;; are we supporting register naming?
|
671
|
+
(unless *allow-named-registers*
|
672
|
+
(signal-syntax-error* (1- (lexer-pos lexer))
|
673
|
+
"Character '~A' may not follow '(?<'."
|
674
|
+
next-char))
|
675
|
+
;; put the letter back
|
676
|
+
(decf (lexer-pos lexer))
|
677
|
+
;; named group
|
678
|
+
:open-paren-less-letter)
|
679
|
+
(case next-char
|
680
|
+
((#\=)
|
681
|
+
;; positive look-behind
|
682
|
+
:open-paren-less-equal)
|
683
|
+
((#\!)
|
684
|
+
;; negative look-behind
|
685
|
+
:open-paren-less-exclamation)
|
686
|
+
((#\))
|
687
|
+
;; Perl allows "(?<)" and treats
|
688
|
+
;; it like a null string
|
689
|
+
:void)
|
690
|
+
((nil)
|
691
|
+
;; syntax error
|
692
|
+
(signal-syntax-error "End of string following '(?<'."))
|
693
|
+
(t
|
694
|
+
;; also syntax error
|
695
|
+
(signal-syntax-error* (1- (lexer-pos lexer))
|
696
|
+
"Character '~A' may not follow '(?<'."
|
697
|
+
next-char ))))))
|
698
|
+
(otherwise
|
699
|
+
(signal-syntax-error* (1- (lexer-pos lexer))
|
700
|
+
"Character '~A' may not follow '(?'."
|
701
|
+
next-char)))))
|
702
|
+
(t
|
703
|
+
;; if next-char was not #\? (this is within
|
704
|
+
;; the first COND), we've just seen an opening
|
705
|
+
;; parenthesis and leave it like that
|
706
|
+
:open-paren)))
|
707
|
+
(otherwise
|
708
|
+
;; all other characters are their own tokens
|
709
|
+
next-char)))
|
710
|
+
;; we didn't get a character (this if the "else" branch from
|
711
|
+
;; the first IF), so we don't return a token but NIL
|
712
|
+
(t
|
713
|
+
(pop (lexer-last-pos lexer))
|
714
|
+
nil))))
|
715
|
+
|
716
|
+
(declaim (inline unget-token))
|
717
|
+
(defun unget-token (lexer)
|
718
|
+
(declare #.*standard-optimize-settings*)
|
719
|
+
"Moves the lexer back to the last position stored in the LAST-POS stack."
|
720
|
+
(if (lexer-last-pos lexer)
|
721
|
+
(setf (lexer-pos lexer)
|
722
|
+
(pop (lexer-last-pos lexer)))
|
723
|
+
(error "No token to unget \(this should not happen)")))
|
724
|
+
|
725
|
+
(declaim (inline start-of-subexpr-p))
|
726
|
+
(defun start-of-subexpr-p (lexer)
|
727
|
+
(declare #.*standard-optimize-settings*)
|
728
|
+
"Tests whether the next token can start a valid sub-expression, i.e.
|
729
|
+
a stand-alone regex."
|
730
|
+
(let* ((pos (lexer-pos lexer))
|
731
|
+
(next-char (next-char lexer)))
|
732
|
+
(not (or (null next-char)
|
733
|
+
(prog1
|
734
|
+
(member (the character next-char)
|
735
|
+
'(#\) #\|)
|
736
|
+
:test #'char=)
|
737
|
+
(setf (lexer-pos lexer) pos))))))
|