clucumber 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +1 -1
- data/README.md +4 -9
- data/lib/clucumber/clucumber-bootstrap.lisp +32 -0
- data/lib/clucumber/vendor/cl-interpol/alias.lisp +55 -0
- data/lib/clucumber/vendor/cl-interpol/cl-interpol.asd +56 -0
- data/lib/clucumber/vendor/cl-interpol/load.lisp +53 -0
- data/lib/clucumber/vendor/cl-interpol/packages.lisp +40 -0
- data/lib/clucumber/vendor/cl-interpol/read.lisp +716 -0
- data/lib/clucumber/vendor/cl-interpol/specials.lisp +113 -0
- data/lib/clucumber/vendor/cl-interpol/test/packages.lisp +34 -0
- data/lib/clucumber/vendor/cl-interpol/test/tests.lisp +128 -0
- data/lib/clucumber/vendor/cl-interpol/test.lisp +177 -0
- data/lib/clucumber/vendor/cl-interpol/test2.lisp +6254 -0
- data/lib/clucumber/vendor/cl-interpol/unicode.lisp +13912 -0
- data/lib/clucumber/vendor/cl-interpol/util.lisp +122 -0
- data/lib/clucumber/vendor/cl-ppcre/api.lisp +1262 -0
- data/lib/clucumber/vendor/cl-ppcre/charmap.lisp +152 -0
- data/lib/clucumber/vendor/cl-ppcre/charset.lisp +242 -0
- data/lib/clucumber/vendor/cl-ppcre/chartest.lisp +98 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-test.asd +34 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/packages.lisp +38 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/resolver.lisp +61 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode.asd +58 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre.asd +79 -0
- data/lib/clucumber/vendor/cl-ppcre/closures.lisp +471 -0
- data/lib/clucumber/vendor/cl-ppcre/convert.lisp +875 -0
- data/lib/clucumber/vendor/cl-ppcre/errors.lisp +84 -0
- data/lib/clucumber/vendor/cl-ppcre/lexer.lisp +737 -0
- data/lib/clucumber/vendor/cl-ppcre/lispworks-defsystem.lisp +57 -0
- data/lib/clucumber/vendor/cl-ppcre/load.lisp +67 -0
- data/lib/clucumber/vendor/cl-ppcre/optimize.lisp +578 -0
- data/lib/clucumber/vendor/cl-ppcre/packages.lisp +68 -0
- data/lib/clucumber/vendor/cl-ppcre/parser.lisp +319 -0
- data/lib/clucumber/vendor/cl-ppcre/ppcre-tests.lisp +269 -0
- data/lib/clucumber/vendor/cl-ppcre/regex-class-util.lisp +555 -0
- data/lib/clucumber/vendor/cl-ppcre/regex-class.lisp +259 -0
- data/lib/clucumber/vendor/cl-ppcre/repetition-closures.lisp +833 -0
- data/lib/clucumber/vendor/cl-ppcre/scanner.lisp +506 -0
- data/lib/clucumber/vendor/cl-ppcre/specials.lisp +172 -0
- data/lib/clucumber/vendor/cl-ppcre/test/packages.lisp +37 -0
- data/lib/clucumber/vendor/cl-ppcre/test/perl-tests.lisp +150 -0
- data/lib/clucumber/vendor/cl-ppcre/test/tests.lisp +159 -0
- data/lib/clucumber/vendor/cl-ppcre/test/unicode-tests.lisp +80 -0
- data/lib/clucumber/vendor/cl-ppcre/util.lisp +201 -0
- data/lib/clucumber/vendor/cl-unicode/alias.lisp +135 -0
- data/lib/clucumber/vendor/cl-unicode/api.lisp +412 -0
- data/lib/clucumber/vendor/cl-unicode/build/char-info.lisp +133 -0
- data/lib/clucumber/vendor/cl-unicode/build/dump.lisp +239 -0
- data/lib/clucumber/vendor/cl-unicode/build/read.lisp +280 -0
- data/lib/clucumber/vendor/cl-unicode/build/util.lisp +182 -0
- data/lib/clucumber/vendor/cl-unicode/cl-unicode.asd +90 -0
- data/lib/clucumber/vendor/cl-unicode/conditions.lisp +54 -0
- data/lib/clucumber/vendor/cl-unicode/derived.lisp +120 -0
- data/lib/clucumber/vendor/cl-unicode/hash-tables.lisp +20 -0
- data/lib/clucumber/vendor/cl-unicode/lists.lisp +10 -0
- data/lib/clucumber/vendor/cl-unicode/methods.lisp +15 -0
- data/lib/clucumber/vendor/cl-unicode/packages.lisp +76 -0
- data/lib/clucumber/vendor/cl-unicode/specials.lisp +135 -0
- data/lib/clucumber/vendor/cl-unicode/test/packages.lisp +34 -0
- data/lib/clucumber/vendor/cl-unicode/test/tests.lisp +150 -0
- data/lib/clucumber/vendor/cl-unicode/test-functions.lisp +94 -0
- data/lib/clucumber/vendor/cl-unicode/util.lisp +274 -0
- data/lib/clucumber/vendor/lift/compare/fiveam-tests.lisp +14 -0
- data/lib/clucumber/vendor/lift/compare/lift-tests.lisp +13 -0
- data/lib/clucumber/vendor/lift/compare/rt-tests.lisp +16 -0
- data/lib/clucumber/vendor/lift/compare/xlunit-tests.lisp +15 -0
- data/lib/clucumber/vendor/lift/dev/changes.lisp +61 -0
- data/lib/clucumber/vendor/lift/dev/config.lisp +354 -0
- data/lib/clucumber/vendor/lift/dev/copy-file.lisp +117 -0
- data/lib/clucumber/vendor/lift/dev/introspection.lisp +232 -0
- data/lib/clucumber/vendor/lift/dev/lift-interface.lisp +56 -0
- data/lib/clucumber/vendor/lift/dev/lift-notes.lisp +202 -0
- data/lib/clucumber/vendor/lift/dev/lift-randomized.lisp +45 -0
- data/lib/clucumber/vendor/lift/dev/lift.lisp +2383 -0
- data/lib/clucumber/vendor/lift/dev/macros.lisp +229 -0
- data/lib/clucumber/vendor/lift/dev/measuring.lisp +156 -0
- data/lib/clucumber/vendor/lift/dev/packages.lisp +161 -0
- data/lib/clucumber/vendor/lift/dev/port.lisp +151 -0
- data/lib/clucumber/vendor/lift/dev/prototypes.lisp +282 -0
- data/lib/clucumber/vendor/lift/dev/random-testing.lisp +124 -0
- data/lib/clucumber/vendor/lift/dev/report-locations.lisp +13 -0
- data/lib/clucumber/vendor/lift/dev/reports.lisp +916 -0
- data/lib/clucumber/vendor/lift/dev/utilities.lisp +242 -0
- data/lib/clucumber/vendor/lift/docs/package.lisp +6 -0
- data/lib/clucumber/vendor/lift/docs/setup.lisp +17 -0
- data/lib/clucumber/vendor/lift/examples/basic-examples.lisp +289 -0
- data/lib/clucumber/vendor/lift/examples/random-testing.lisp +32 -0
- data/lib/clucumber/vendor/lift/lift-documentation.asd +28 -0
- data/lib/clucumber/vendor/lift/lift-test.asd +35 -0
- data/lib/clucumber/vendor/lift/lift.asd +77 -0
- data/lib/clucumber/vendor/lift/test/equality-tests.lisp +40 -0
- data/lib/clucumber/vendor/lift/test/finding-tests.lisp +32 -0
- data/lib/clucumber/vendor/lift/test/lift-test.lisp +783 -0
- data/lib/clucumber/vendor/lift/test/order-of-operations.lisp +54 -0
- data/lib/clucumber/vendor/lift/test/packages.lisp +17 -0
- data/lib/clucumber/vendor/lift/test/test-config-files.lisp +20 -0
- data/lib/clucumber/vendor/lift/test/test-dynamic-variables.lisp +57 -0
- data/lib/clucumber/vendor/lift/test/test-maximum-problems.lisp +74 -0
- data/lib/clucumber/vendor/lift/test/test-prototypes.lisp +278 -0
- data/lib/clucumber/vendor/lift/test/test-timeout.lisp +37 -0
- data/lib/clucumber/vendor/lift/test/tests-in-progress.lisp +62 -0
- data/lib/clucumber/vendor/lift/test/testsuite-expects.lisp +60 -0
- data/lib/clucumber/vendor/lift/timeout/package.lisp +13 -0
- data/lib/clucumber/vendor/lift/timeout/with-timeout.lisp +123 -0
- data/lib/clucumber/vendor/lift/website/stuff/Temp.lisp +34 -0
- data/lib/clucumber/vendor/st-json/st-json.asd +3 -0
- data/lib/clucumber/vendor/st-json/st-json.lisp +310 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/backtrace.lisp +127 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/fallback.lisp +10 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/map-backtrace.lisp +103 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/mucking.lisp +75 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/packages.lisp +13 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/utilities.lisp +104 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/packages.lisp +5 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/test-setup.lisp +4 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/tests.lisp +16 -0
- data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace-test.asd +22 -0
- data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace.asd +35 -0
- data/lib/clucumber/vendor/usocket/backend/allegro.lisp +199 -0
- data/lib/clucumber/vendor/usocket/backend/armedbear.lisp +491 -0
- data/lib/clucumber/vendor/usocket/backend/clisp.lisp +260 -0
- data/lib/clucumber/vendor/usocket/backend/cmucl.lisp +266 -0
- data/lib/clucumber/vendor/usocket/backend/lispworks.lisp +741 -0
- data/lib/clucumber/vendor/usocket/backend/mcl.lisp +369 -0
- data/lib/clucumber/vendor/usocket/backend/openmcl.lisp +206 -0
- data/lib/clucumber/vendor/usocket/backend/sbcl.lisp +424 -0
- data/lib/clucumber/vendor/usocket/backend/scl.lisp +261 -0
- data/lib/clucumber/vendor/usocket/condition.lisp +227 -0
- data/lib/clucumber/vendor/usocket/package.lisp +82 -0
- data/lib/clucumber/vendor/usocket/server.lisp +45 -0
- data/lib/clucumber/vendor/usocket/test/package.lisp +13 -0
- data/lib/clucumber/vendor/usocket/test/test-usocket.lisp +166 -0
- data/lib/clucumber/vendor/usocket/usocket-test.asd +26 -0
- data/lib/clucumber/vendor/usocket/usocket.asd +37 -0
- data/lib/clucumber/vendor/usocket/usocket.lisp +542 -0
- data/lib/clucumber/vendor/usocket/vendor/kqueue.lisp +1 -0
- data/lib/clucumber/vendor/usocket/vendor/split-sequence.lisp +245 -0
- data/lib/clucumber.rb +29 -7
- metadata +151 -5
@@ -0,0 +1,875 @@
|
|
1
|
+
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
2
|
+
;;; $Header: /usr/local/cvsrep/cl-ppcre/convert.lisp,v 1.57 2009/09/17 19:17:31 edi Exp $
|
3
|
+
|
4
|
+
;;; Here the parse tree is converted into its internal representation
|
5
|
+
;;; using REGEX objects. At the same time some optimizations are
|
6
|
+
;;; already applied.
|
7
|
+
|
8
|
+
;;; Copyright (c) 2002-2009, Dr. Edmund Weitz. All rights reserved.
|
9
|
+
|
10
|
+
;;; Redistribution and use in source and binary forms, with or without
|
11
|
+
;;; modification, are permitted provided that the following conditions
|
12
|
+
;;; are met:
|
13
|
+
|
14
|
+
;;; * Redistributions of source code must retain the above copyright
|
15
|
+
;;; notice, this list of conditions and the following disclaimer.
|
16
|
+
|
17
|
+
;;; * Redistributions in binary form must reproduce the above
|
18
|
+
;;; copyright notice, this list of conditions and the following
|
19
|
+
;;; disclaimer in the documentation and/or other materials
|
20
|
+
;;; provided with the distribution.
|
21
|
+
|
22
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
23
|
+
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
24
|
+
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
25
|
+
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
26
|
+
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
27
|
+
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
28
|
+
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
29
|
+
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
30
|
+
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
|
34
|
+
(in-package :cl-ppcre)
|
35
|
+
|
36
|
+
;;; The flags that represent the "ism" modifiers are always kept
|
37
|
+
;;; together in a three-element list. We use the following macros to
|
38
|
+
;;; access individual elements.
|
39
|
+
|
40
|
+
(defmacro case-insensitive-mode-p (flags)
|
41
|
+
"Accessor macro to extract the first flag out of a three-element flag list."
|
42
|
+
`(first ,flags))
|
43
|
+
|
44
|
+
(defmacro multi-line-mode-p (flags)
|
45
|
+
"Accessor macro to extract the second flag out of a three-element flag list."
|
46
|
+
`(second ,flags))
|
47
|
+
|
48
|
+
(defmacro single-line-mode-p (flags)
|
49
|
+
"Accessor macro to extract the third flag out of a three-element flag list."
|
50
|
+
`(third ,flags))
|
51
|
+
|
52
|
+
(defun set-flag (token)
|
53
|
+
"Reads a flag token and sets or unsets the corresponding entry in
|
54
|
+
the special FLAGS list."
|
55
|
+
(declare #.*standard-optimize-settings*)
|
56
|
+
(declare (special flags))
|
57
|
+
(case token
|
58
|
+
((:case-insensitive-p)
|
59
|
+
(setf (case-insensitive-mode-p flags) t))
|
60
|
+
((:case-sensitive-p)
|
61
|
+
(setf (case-insensitive-mode-p flags) nil))
|
62
|
+
((:multi-line-mode-p)
|
63
|
+
(setf (multi-line-mode-p flags) t))
|
64
|
+
((:not-multi-line-mode-p)
|
65
|
+
(setf (multi-line-mode-p flags) nil))
|
66
|
+
((:single-line-mode-p)
|
67
|
+
(setf (single-line-mode-p flags) t))
|
68
|
+
((:not-single-line-mode-p)
|
69
|
+
(setf (single-line-mode-p flags) nil))
|
70
|
+
(otherwise
|
71
|
+
(signal-syntax-error "Unknown flag token ~A." token))))
|
72
|
+
|
73
|
+
(defgeneric resolve-property (property)
|
74
|
+
(:documentation "Resolves PROPERTY to a unary character test
|
75
|
+
function. PROPERTY can either be a function designator or it can be a
|
76
|
+
string which is resolved using *PROPERTY-RESOLVER*.")
|
77
|
+
(:method ((property-name string))
|
78
|
+
(funcall *property-resolver* property-name))
|
79
|
+
(:method ((function-name symbol))
|
80
|
+
function-name)
|
81
|
+
(:method ((test-function function))
|
82
|
+
test-function))
|
83
|
+
|
84
|
+
(defun convert-char-class-to-test-function (list invertedp case-insensitive-p)
|
85
|
+
"Combines all items in LIST into test function and returns a
|
86
|
+
logical-OR combination of these functions. Items can be single
|
87
|
+
characters, character ranges like \(:RANGE #\\A #\\E), or special
|
88
|
+
character classes like :DIGIT-CLASS. Does the right thing with
|
89
|
+
respect to case-\(in)sensitivity as specified by the special variable
|
90
|
+
FLAGS."
|
91
|
+
(declare #.*standard-optimize-settings*)
|
92
|
+
(declare (special flags))
|
93
|
+
(let ((test-functions
|
94
|
+
(loop for item in list
|
95
|
+
collect (cond ((characterp item)
|
96
|
+
;; rebind so closure captures the right one
|
97
|
+
(let ((this-char item))
|
98
|
+
(lambda (char)
|
99
|
+
(declare (character char this-char))
|
100
|
+
(char= char this-char))))
|
101
|
+
((symbolp item)
|
102
|
+
(case item
|
103
|
+
((:digit-class) #'digit-char-p)
|
104
|
+
((:non-digit-class) (complement* #'digit-char-p))
|
105
|
+
((:whitespace-char-class) #'whitespacep)
|
106
|
+
((:non-whitespace-char-class) (complement* #'whitespacep))
|
107
|
+
((:word-char-class) #'word-char-p)
|
108
|
+
((:non-word-char-class) (complement* #'word-char-p))
|
109
|
+
(otherwise
|
110
|
+
(signal-syntax-error "Unknown symbol ~A in character class." item))))
|
111
|
+
((and (consp item)
|
112
|
+
(eq (first item) :property))
|
113
|
+
(resolve-property (second item)))
|
114
|
+
((and (consp item)
|
115
|
+
(eq (first item) :inverted-property))
|
116
|
+
(complement* (resolve-property (second item))))
|
117
|
+
((and (consp item)
|
118
|
+
(eq (first item) :range))
|
119
|
+
(let ((from (second item))
|
120
|
+
(to (third item)))
|
121
|
+
(when (char> from to)
|
122
|
+
(signal-syntax-error "Invalid range from ~S to ~S in char-class." from to))
|
123
|
+
(lambda (char)
|
124
|
+
(declare (character char from to))
|
125
|
+
(char<= from char to))))
|
126
|
+
(t (signal-syntax-error "Unknown item ~A in char-class list." item))))))
|
127
|
+
(unless test-functions
|
128
|
+
(signal-syntax-error "Empty character class."))
|
129
|
+
(cond ((cdr test-functions)
|
130
|
+
(cond ((and invertedp case-insensitive-p)
|
131
|
+
(lambda (char)
|
132
|
+
(declare (character char))
|
133
|
+
(loop with both-case-p = (both-case-p char)
|
134
|
+
with char-down = (if both-case-p (char-downcase char) char)
|
135
|
+
with char-up = (if both-case-p (char-upcase char) nil)
|
136
|
+
for test-function in test-functions
|
137
|
+
never (or (funcall test-function char-down)
|
138
|
+
(and char-up (funcall test-function char-up))))))
|
139
|
+
(case-insensitive-p
|
140
|
+
(lambda (char)
|
141
|
+
(declare (character char))
|
142
|
+
(loop with both-case-p = (both-case-p char)
|
143
|
+
with char-down = (if both-case-p (char-downcase char) char)
|
144
|
+
with char-up = (if both-case-p (char-upcase char) nil)
|
145
|
+
for test-function in test-functions
|
146
|
+
thereis (or (funcall test-function char-down)
|
147
|
+
(and char-up (funcall test-function char-up))))))
|
148
|
+
(invertedp
|
149
|
+
(lambda (char)
|
150
|
+
(loop for test-function in test-functions
|
151
|
+
never (funcall test-function char))))
|
152
|
+
(t
|
153
|
+
(lambda (char)
|
154
|
+
(loop for test-function in test-functions
|
155
|
+
thereis (funcall test-function char))))))
|
156
|
+
;; there's only one test-function
|
157
|
+
(t (let ((test-function (first test-functions)))
|
158
|
+
(cond ((and invertedp case-insensitive-p)
|
159
|
+
(lambda (char)
|
160
|
+
(declare (character char))
|
161
|
+
(not (or (funcall test-function (char-downcase char))
|
162
|
+
(and (both-case-p char)
|
163
|
+
(funcall test-function (char-upcase char)))))))
|
164
|
+
(case-insensitive-p
|
165
|
+
(lambda (char)
|
166
|
+
(declare (character char))
|
167
|
+
(or (funcall test-function (char-downcase char))
|
168
|
+
(and (both-case-p char)
|
169
|
+
(funcall test-function (char-upcase char))))))
|
170
|
+
(invertedp (complement* test-function))
|
171
|
+
(t test-function)))))))
|
172
|
+
|
173
|
+
(defun maybe-split-repetition (regex
|
174
|
+
greedyp
|
175
|
+
minimum
|
176
|
+
maximum
|
177
|
+
min-len
|
178
|
+
length
|
179
|
+
reg-seen)
|
180
|
+
"Splits a REPETITION object into a constant and a varying part if
|
181
|
+
applicable, i.e. something like
|
182
|
+
a{3,} -> a{3}a*
|
183
|
+
The arguments to this function correspond to the REPETITION slots of
|
184
|
+
the same name."
|
185
|
+
(declare #.*standard-optimize-settings*)
|
186
|
+
(declare (fixnum minimum)
|
187
|
+
(type (or fixnum null) maximum))
|
188
|
+
;; note the usage of COPY-REGEX here; we can't use the same REGEX
|
189
|
+
;; object in both REPETITIONS because they will have different
|
190
|
+
;; offsets
|
191
|
+
(when maximum
|
192
|
+
(when (zerop maximum)
|
193
|
+
;; trivial case: don't repeat at all
|
194
|
+
(return-from maybe-split-repetition
|
195
|
+
(make-instance 'void)))
|
196
|
+
(when (= 1 minimum maximum)
|
197
|
+
;; another trivial case: "repeat" exactly once
|
198
|
+
(return-from maybe-split-repetition
|
199
|
+
regex)))
|
200
|
+
;; first set up the constant part of the repetition
|
201
|
+
;; maybe that's all we need
|
202
|
+
(let ((constant-repetition (if (plusp minimum)
|
203
|
+
(make-instance 'repetition
|
204
|
+
:regex (copy-regex regex)
|
205
|
+
:greedyp greedyp
|
206
|
+
:minimum minimum
|
207
|
+
:maximum minimum
|
208
|
+
:min-len min-len
|
209
|
+
:len length
|
210
|
+
:contains-register-p reg-seen)
|
211
|
+
;; don't create garbage if minimum is 0
|
212
|
+
nil)))
|
213
|
+
(when (and maximum
|
214
|
+
(= maximum minimum))
|
215
|
+
(return-from maybe-split-repetition
|
216
|
+
;; no varying part needed because min = max
|
217
|
+
constant-repetition))
|
218
|
+
;; now construct the varying part
|
219
|
+
(let ((varying-repetition
|
220
|
+
(make-instance 'repetition
|
221
|
+
:regex regex
|
222
|
+
:greedyp greedyp
|
223
|
+
:minimum 0
|
224
|
+
:maximum (if maximum (- maximum minimum) nil)
|
225
|
+
:min-len min-len
|
226
|
+
:len length
|
227
|
+
:contains-register-p reg-seen)))
|
228
|
+
(cond ((zerop minimum)
|
229
|
+
;; min = 0, no constant part needed
|
230
|
+
varying-repetition)
|
231
|
+
((= 1 minimum)
|
232
|
+
;; min = 1, constant part needs no REPETITION wrapped around
|
233
|
+
(make-instance 'seq
|
234
|
+
:elements (list (copy-regex regex)
|
235
|
+
varying-repetition)))
|
236
|
+
(t
|
237
|
+
;; general case
|
238
|
+
(make-instance 'seq
|
239
|
+
:elements (list constant-repetition
|
240
|
+
varying-repetition)))))))
|
241
|
+
|
242
|
+
;; During the conversion of the parse tree we keep track of the start
|
243
|
+
;; of the parse tree in the special variable STARTS-WITH which'll
|
244
|
+
;; either hold a STR object or an EVERYTHING object. The latter is the
|
245
|
+
;; case if the regex starts with ".*" which implicitly anchors the
|
246
|
+
;; regex at the start (perhaps modulo #\Newline).
|
247
|
+
|
248
|
+
(defun maybe-accumulate (str)
|
249
|
+
"Accumulate STR into the special variable STARTS-WITH if
|
250
|
+
ACCUMULATE-START-P (also special) is true and STARTS-WITH is either
|
251
|
+
NIL or a STR object of the same case mode. Always returns NIL."
|
252
|
+
(declare #.*standard-optimize-settings*)
|
253
|
+
(declare (special accumulate-start-p starts-with))
|
254
|
+
(declare (ftype (function (t) fixnum) len))
|
255
|
+
(when accumulate-start-p
|
256
|
+
(etypecase starts-with
|
257
|
+
(str
|
258
|
+
;; STARTS-WITH already holds a STR, so we check if we can
|
259
|
+
;; concatenate
|
260
|
+
(cond ((eq (case-insensitive-p starts-with)
|
261
|
+
(case-insensitive-p str))
|
262
|
+
;; we modify STARTS-WITH in place
|
263
|
+
(setf (len starts-with)
|
264
|
+
(+ (len starts-with) (len str)))
|
265
|
+
;; note that we use SLOT-VALUE because the accessor
|
266
|
+
;; STR has a declared FTYPE which doesn't fit here
|
267
|
+
(adjust-array (slot-value starts-with 'str)
|
268
|
+
(len starts-with)
|
269
|
+
:fill-pointer t)
|
270
|
+
(setf (subseq (slot-value starts-with 'str)
|
271
|
+
(- (len starts-with) (len str)))
|
272
|
+
(str str)
|
273
|
+
;; STR objects that are parts of STARTS-WITH
|
274
|
+
;; always have their SKIP slot set to true
|
275
|
+
;; because the SCAN function will take care of
|
276
|
+
;; them, i.e. the matcher can ignore them
|
277
|
+
(skip str) t))
|
278
|
+
(t (setq accumulate-start-p nil))))
|
279
|
+
(null
|
280
|
+
;; STARTS-WITH is still empty, so we create a new STR object
|
281
|
+
(setf starts-with
|
282
|
+
(make-instance 'str
|
283
|
+
:str ""
|
284
|
+
:case-insensitive-p (case-insensitive-p str))
|
285
|
+
;; INITIALIZE-INSTANCE will coerce the STR to a simple
|
286
|
+
;; string, so we have to fill it afterwards
|
287
|
+
(slot-value starts-with 'str)
|
288
|
+
(make-array (len str)
|
289
|
+
:initial-contents (str str)
|
290
|
+
:element-type 'character
|
291
|
+
:fill-pointer t
|
292
|
+
:adjustable t)
|
293
|
+
(len starts-with)
|
294
|
+
(len str)
|
295
|
+
;; see remark about SKIP above
|
296
|
+
(skip str) t))
|
297
|
+
(everything
|
298
|
+
;; STARTS-WITH already holds an EVERYTHING object - we can't
|
299
|
+
;; concatenate
|
300
|
+
(setq accumulate-start-p nil))))
|
301
|
+
nil)
|
302
|
+
|
303
|
+
(declaim (inline convert-aux))
|
304
|
+
(defun convert-aux (parse-tree)
|
305
|
+
"Converts the parse tree PARSE-TREE into a REGEX object and returns
|
306
|
+
it. Will also
|
307
|
+
|
308
|
+
- split and optimize repetitions,
|
309
|
+
- accumulate strings or EVERYTHING objects into the special variable
|
310
|
+
STARTS-WITH,
|
311
|
+
- keep track of all registers seen in the special variable REG-NUM,
|
312
|
+
- keep track of all named registers seen in the special variable REG-NAMES
|
313
|
+
- keep track of the highest backreference seen in the special
|
314
|
+
variable MAX-BACK-REF,
|
315
|
+
- maintain and adher to the currently applicable modifiers in the special
|
316
|
+
variable FLAGS, and
|
317
|
+
- maybe even wash your car..."
|
318
|
+
(declare #.*standard-optimize-settings*)
|
319
|
+
(if (consp parse-tree)
|
320
|
+
(convert-compound-parse-tree (first parse-tree) parse-tree)
|
321
|
+
(convert-simple-parse-tree parse-tree)))
|
322
|
+
|
323
|
+
(defgeneric convert-compound-parse-tree (token parse-tree &key)
|
324
|
+
(declare #.*standard-optimize-settings*)
|
325
|
+
(:documentation "Helper function for CONVERT-AUX which converts
|
326
|
+
parse trees which are conses and dispatches on TOKEN which is the
|
327
|
+
first element of the parse tree.")
|
328
|
+
(:method ((token t) parse-tree &key)
|
329
|
+
(signal-syntax-error "Unknown token ~A in parse-tree." token)))
|
330
|
+
|
331
|
+
(defmethod convert-compound-parse-tree ((token (eql :sequence)) parse-tree &key)
|
332
|
+
"The case for parse trees like \(:SEQUENCE {<regex>}*)."
|
333
|
+
(declare #.*standard-optimize-settings*)
|
334
|
+
(cond ((cddr parse-tree)
|
335
|
+
;; this is essentially like
|
336
|
+
;; (MAPCAR 'CONVERT-AUX (REST PARSE-TREE))
|
337
|
+
;; but we don't cons a new list
|
338
|
+
(loop for parse-tree-rest on (rest parse-tree)
|
339
|
+
while parse-tree-rest
|
340
|
+
do (setf (car parse-tree-rest)
|
341
|
+
(convert-aux (car parse-tree-rest))))
|
342
|
+
(make-instance 'seq :elements (rest parse-tree)))
|
343
|
+
(t (convert-aux (second parse-tree)))))
|
344
|
+
|
345
|
+
(defmethod convert-compound-parse-tree ((token (eql :group)) parse-tree &key)
|
346
|
+
"The case for parse trees like \(:GROUP {<regex>}*).
|
347
|
+
|
348
|
+
This is a syntactical construct equivalent to :SEQUENCE intended to
|
349
|
+
keep the effect of modifiers local."
|
350
|
+
(declare #.*standard-optimize-settings*)
|
351
|
+
(declare (special flags))
|
352
|
+
;; make a local copy of FLAGS and shadow the global value while we
|
353
|
+
;; descend into the enclosed regexes
|
354
|
+
(let ((flags (copy-list flags)))
|
355
|
+
(declare (special flags))
|
356
|
+
(cond ((cddr parse-tree)
|
357
|
+
(loop for parse-tree-rest on (rest parse-tree)
|
358
|
+
while parse-tree-rest
|
359
|
+
do (setf (car parse-tree-rest)
|
360
|
+
(convert-aux (car parse-tree-rest))))
|
361
|
+
(make-instance 'seq :elements (rest parse-tree)))
|
362
|
+
(t (convert-aux (second parse-tree))))))
|
363
|
+
|
364
|
+
(defmethod convert-compound-parse-tree ((token (eql :alternation)) parse-tree &key)
|
365
|
+
"The case for \(:ALTERNATION {<regex>}*)."
|
366
|
+
(declare #.*standard-optimize-settings*)
|
367
|
+
(declare (special accumulate-start-p))
|
368
|
+
;; we must stop accumulating objects into STARTS-WITH once we reach
|
369
|
+
;; an alternation
|
370
|
+
(setq accumulate-start-p nil)
|
371
|
+
(loop for parse-tree-rest on (rest parse-tree)
|
372
|
+
while parse-tree-rest
|
373
|
+
do (setf (car parse-tree-rest)
|
374
|
+
(convert-aux (car parse-tree-rest))))
|
375
|
+
(make-instance 'alternation :choices (rest parse-tree)))
|
376
|
+
|
377
|
+
(defmethod convert-compound-parse-tree ((token (eql :branch)) parse-tree &key)
|
378
|
+
"The case for \(:BRANCH <test> <regex>).
|
379
|
+
|
380
|
+
Here, <test> must be look-ahead, look-behind or number; if <regex> is
|
381
|
+
an alternation it must have one or two choices."
|
382
|
+
(declare #.*standard-optimize-settings*)
|
383
|
+
(declare (special accumulate-start-p))
|
384
|
+
(setq accumulate-start-p nil)
|
385
|
+
(let* ((test-candidate (second parse-tree))
|
386
|
+
(test (cond ((numberp test-candidate)
|
387
|
+
(when (zerop (the fixnum test-candidate))
|
388
|
+
(signal-syntax-error "Register 0 doesn't exist: ~S." parse-tree))
|
389
|
+
(1- (the fixnum test-candidate)))
|
390
|
+
(t (convert-aux test-candidate))))
|
391
|
+
(alternations (convert-aux (third parse-tree))))
|
392
|
+
(when (and (not (numberp test))
|
393
|
+
(not (typep test 'lookahead))
|
394
|
+
(not (typep test 'lookbehind)))
|
395
|
+
(signal-syntax-error "Branch test must be look-ahead, look-behind or number: ~S." parse-tree))
|
396
|
+
(typecase alternations
|
397
|
+
(alternation
|
398
|
+
(case (length (choices alternations))
|
399
|
+
((0)
|
400
|
+
(signal-syntax-error "No choices in branch: ~S." parse-tree))
|
401
|
+
((1)
|
402
|
+
(make-instance 'branch
|
403
|
+
:test test
|
404
|
+
:then-regex (first
|
405
|
+
(choices alternations))))
|
406
|
+
((2)
|
407
|
+
(make-instance 'branch
|
408
|
+
:test test
|
409
|
+
:then-regex (first
|
410
|
+
(choices alternations))
|
411
|
+
:else-regex (second
|
412
|
+
(choices alternations))))
|
413
|
+
(otherwise
|
414
|
+
(signal-syntax-error "Too much choices in branch: ~S." parse-tree))))
|
415
|
+
(t
|
416
|
+
(make-instance 'branch
|
417
|
+
:test test
|
418
|
+
:then-regex alternations)))))
|
419
|
+
|
420
|
+
(defmethod convert-compound-parse-tree ((token (eql :positive-lookahead)) parse-tree &key)
|
421
|
+
"The case for \(:POSITIVE-LOOKAHEAD <regex>)."
|
422
|
+
(declare #.*standard-optimize-settings*)
|
423
|
+
(declare (special flags accumulate-start-p))
|
424
|
+
;; keep the effect of modifiers local to the enclosed regex and stop
|
425
|
+
;; accumulating into STARTS-WITH
|
426
|
+
(setq accumulate-start-p nil)
|
427
|
+
(let ((flags (copy-list flags)))
|
428
|
+
(declare (special flags))
|
429
|
+
(make-instance 'lookahead
|
430
|
+
:regex (convert-aux (second parse-tree))
|
431
|
+
:positivep t)))
|
432
|
+
|
433
|
+
(defmethod convert-compound-parse-tree ((token (eql :negative-lookahead)) parse-tree &key)
|
434
|
+
"The case for \(:NEGATIVE-LOOKAHEAD <regex>)."
|
435
|
+
(declare #.*standard-optimize-settings*)
|
436
|
+
;; do the same as for positive look-aheads and just switch afterwards
|
437
|
+
(let ((regex (convert-compound-parse-tree :positive-lookahead parse-tree)))
|
438
|
+
(setf (slot-value regex 'positivep) nil)
|
439
|
+
regex))
|
440
|
+
|
441
|
+
(defmethod convert-compound-parse-tree ((token (eql :positive-lookbehind)) parse-tree &key)
|
442
|
+
"The case for \(:POSITIVE-LOOKBEHIND <regex>)."
|
443
|
+
(declare #.*standard-optimize-settings*)
|
444
|
+
(declare (special flags accumulate-start-p))
|
445
|
+
;; keep the effect of modifiers local to the enclosed regex and stop
|
446
|
+
;; accumulating into STARTS-WITH
|
447
|
+
(setq accumulate-start-p nil)
|
448
|
+
(let* ((flags (copy-list flags))
|
449
|
+
(regex (convert-aux (second parse-tree)))
|
450
|
+
(len (regex-length regex)))
|
451
|
+
(declare (special flags))
|
452
|
+
;; lookbehind assertions must be of fixed length
|
453
|
+
(unless len
|
454
|
+
(signal-syntax-error "Variable length look-behind not implemented \(yet): ~S." parse-tree))
|
455
|
+
(make-instance 'lookbehind
|
456
|
+
:regex regex
|
457
|
+
:positivep t
|
458
|
+
:len len)))
|
459
|
+
|
460
|
+
(defmethod convert-compound-parse-tree ((token (eql :negative-lookbehind)) parse-tree &key)
|
461
|
+
"The case for \(:NEGATIVE-LOOKBEHIND <regex>)."
|
462
|
+
(declare #.*standard-optimize-settings*)
|
463
|
+
;; do the same as for positive look-behinds and just switch afterwards
|
464
|
+
(let ((regex (convert-compound-parse-tree :positive-lookbehind parse-tree)))
|
465
|
+
(setf (slot-value regex 'positivep) nil)
|
466
|
+
regex))
|
467
|
+
|
468
|
+
(defmethod convert-compound-parse-tree ((token (eql :greedy-repetition)) parse-tree &key (greedyp t))
|
469
|
+
"The case for \(:GREEDY-REPETITION|:NON-GREEDY-REPETITION <min> <max> <regex>).
|
470
|
+
|
471
|
+
This function is also used for the non-greedy case in which case it is
|
472
|
+
called with GREEDYP set to NIL as you would expect."
|
473
|
+
(declare #.*standard-optimize-settings*)
|
474
|
+
(declare (special accumulate-start-p starts-with))
|
475
|
+
;; remember the value of ACCUMULATE-START-P upon entering
|
476
|
+
(let ((local-accumulate-start-p accumulate-start-p))
|
477
|
+
(let ((minimum (second parse-tree))
|
478
|
+
(maximum (third parse-tree)))
|
479
|
+
(declare (fixnum minimum))
|
480
|
+
(declare (type (or null fixnum) maximum))
|
481
|
+
(unless (and maximum
|
482
|
+
(= 1 minimum maximum))
|
483
|
+
;; set ACCUMULATE-START-P to NIL for the rest of
|
484
|
+
;; the conversion because we can't continue to
|
485
|
+
;; accumulate inside as well as after a proper
|
486
|
+
;; repetition
|
487
|
+
(setq accumulate-start-p nil))
|
488
|
+
(let* (reg-seen
|
489
|
+
(regex (convert-aux (fourth parse-tree)))
|
490
|
+
(min-len (regex-min-length regex))
|
491
|
+
(length (regex-length regex)))
|
492
|
+
;; note that this declaration already applies to
|
493
|
+
;; the call to CONVERT-AUX above
|
494
|
+
(declare (special reg-seen))
|
495
|
+
(when (and local-accumulate-start-p
|
496
|
+
(not starts-with)
|
497
|
+
(zerop minimum)
|
498
|
+
(not maximum))
|
499
|
+
;; if this repetition is (equivalent to) ".*"
|
500
|
+
;; and if we're at the start of the regex we
|
501
|
+
;; remember it for ADVANCE-FN (see the SCAN
|
502
|
+
;; function)
|
503
|
+
(setq starts-with (everythingp regex)))
|
504
|
+
(if (or (not reg-seen)
|
505
|
+
(not greedyp)
|
506
|
+
(not length)
|
507
|
+
(zerop length)
|
508
|
+
(and maximum (= minimum maximum)))
|
509
|
+
;; the repetition doesn't enclose a register, or
|
510
|
+
;; it's not greedy, or we can't determine it's
|
511
|
+
;; (inner) length, or the length is zero, or the
|
512
|
+
;; number of repetitions is fixed; in all of
|
513
|
+
;; these cases we don't bother to optimize
|
514
|
+
(maybe-split-repetition regex
|
515
|
+
greedyp
|
516
|
+
minimum
|
517
|
+
maximum
|
518
|
+
min-len
|
519
|
+
length
|
520
|
+
reg-seen)
|
521
|
+
;; otherwise we make a transformation that looks
|
522
|
+
;; roughly like one of
|
523
|
+
;; <regex>* -> (?:<regex'>*<regex>)?
|
524
|
+
;; <regex>+ -> <regex'>*<regex>
|
525
|
+
;; where the trick is that as much as possible
|
526
|
+
;; registers from <regex> are removed in
|
527
|
+
;; <regex'>
|
528
|
+
(let* (reg-seen ; new instance for REMOVE-REGISTERS
|
529
|
+
(remove-registers-p t)
|
530
|
+
(inner-regex (remove-registers regex))
|
531
|
+
(inner-repetition
|
532
|
+
;; this is the "<regex'>" part
|
533
|
+
(maybe-split-repetition inner-regex
|
534
|
+
;; always greedy
|
535
|
+
t
|
536
|
+
;; reduce minimum by 1
|
537
|
+
;; unless it's already 0
|
538
|
+
(if (zerop minimum)
|
539
|
+
0
|
540
|
+
(1- minimum))
|
541
|
+
;; reduce maximum by 1
|
542
|
+
;; unless it's NIL
|
543
|
+
(and maximum
|
544
|
+
(1- maximum))
|
545
|
+
min-len
|
546
|
+
length
|
547
|
+
reg-seen))
|
548
|
+
(inner-seq
|
549
|
+
;; this is the "<regex'>*<regex>" part
|
550
|
+
(make-instance 'seq
|
551
|
+
:elements (list inner-repetition
|
552
|
+
regex))))
|
553
|
+
;; note that this declaration already applies
|
554
|
+
;; to the call to REMOVE-REGISTERS above
|
555
|
+
(declare (special remove-registers-p reg-seen))
|
556
|
+
;; wrap INNER-SEQ with a greedy
|
557
|
+
;; {0,1}-repetition (i.e. "?") if necessary
|
558
|
+
(if (plusp minimum)
|
559
|
+
inner-seq
|
560
|
+
(maybe-split-repetition inner-seq
|
561
|
+
t
|
562
|
+
0
|
563
|
+
1
|
564
|
+
min-len
|
565
|
+
nil
|
566
|
+
t))))))))
|
567
|
+
|
568
|
+
(defmethod convert-compound-parse-tree ((token (eql :non-greedy-repetition)) parse-tree &key)
|
569
|
+
"The case for \(:NON-GREEDY-REPETITION <min> <max> <regex>)."
|
570
|
+
(declare #.*standard-optimize-settings*)
|
571
|
+
;; just dispatch to the method above with GREEDYP explicitly set to NIL
|
572
|
+
(convert-compound-parse-tree :greedy-repetition parse-tree :greedyp nil))
|
573
|
+
|
574
|
+
(defmethod convert-compound-parse-tree ((token (eql :register)) parse-tree &key name)
|
575
|
+
"The case for \(:REGISTER <regex>). Also used for named registers
|
576
|
+
when NAME is not NIL."
|
577
|
+
(declare #.*standard-optimize-settings*)
|
578
|
+
(declare (special flags reg-num reg-names))
|
579
|
+
;; keep the effect of modifiers local to the enclosed regex; also,
|
580
|
+
;; assign the current value of REG-NUM to the corresponding slot of
|
581
|
+
;; the REGISTER object and increase this counter afterwards; for
|
582
|
+
;; named register update REG-NAMES and set the corresponding name
|
583
|
+
;; slot of the REGISTER object too
|
584
|
+
(let ((flags (copy-list flags))
|
585
|
+
(stored-reg-num reg-num))
|
586
|
+
(declare (special flags reg-seen named-reg-seen))
|
587
|
+
(setq reg-seen t)
|
588
|
+
(when name (setq named-reg-seen t))
|
589
|
+
(incf (the fixnum reg-num))
|
590
|
+
(push name reg-names)
|
591
|
+
(make-instance 'register
|
592
|
+
:regex (convert-aux (if name (third parse-tree) (second parse-tree)))
|
593
|
+
:num stored-reg-num
|
594
|
+
:name name)))
|
595
|
+
|
596
|
+
(defmethod convert-compound-parse-tree ((token (eql :named-register)) parse-tree &key)
|
597
|
+
"The case for \(:NAMED-REGISTER <regex>)."
|
598
|
+
(declare #.*standard-optimize-settings*)
|
599
|
+
;; call the method above and use the :NAME keyword argument
|
600
|
+
(convert-compound-parse-tree :register parse-tree :name (copy-seq (second parse-tree))))
|
601
|
+
|
602
|
+
(defmethod convert-compound-parse-tree ((token (eql :filter)) parse-tree &key)
|
603
|
+
"The case for \(:FILTER <function> &optional <length>)."
|
604
|
+
(declare #.*standard-optimize-settings*)
|
605
|
+
(declare (special accumulate-start-p))
|
606
|
+
;; stop accumulating into STARTS-WITH
|
607
|
+
(setq accumulate-start-p nil)
|
608
|
+
(make-instance 'filter
|
609
|
+
:fn (second parse-tree)
|
610
|
+
:len (third parse-tree)))
|
611
|
+
|
612
|
+
(defmethod convert-compound-parse-tree ((token (eql :standalone)) parse-tree &key)
|
613
|
+
"The case for \(:STANDALONE <regex>)."
|
614
|
+
(declare #.*standard-optimize-settings*)
|
615
|
+
(declare (special flags accumulate-start-p))
|
616
|
+
;; stop accumulating into STARTS-WITH
|
617
|
+
(setq accumulate-start-p nil)
|
618
|
+
;; keep the effect of modifiers local to the enclosed regex
|
619
|
+
(let ((flags (copy-list flags)))
|
620
|
+
(declare (special flags))
|
621
|
+
(make-instance 'standalone :regex (convert-aux (second parse-tree)))))
|
622
|
+
|
623
|
+
(defmethod convert-compound-parse-tree ((token (eql :back-reference)) parse-tree &key)
|
624
|
+
"The case for \(:BACK-REFERENCE <number>|<name>)."
|
625
|
+
(declare #.*standard-optimize-settings*)
|
626
|
+
(declare (special flags accumulate-start-p reg-num reg-names max-back-ref))
|
627
|
+
(let* ((backref-name (and (stringp (second parse-tree))
|
628
|
+
(second parse-tree)))
|
629
|
+
(referred-regs
|
630
|
+
(when backref-name
|
631
|
+
;; find which register corresponds to the given name
|
632
|
+
;; we have to deal with case where several registers share
|
633
|
+
;; the same name and collect their respective numbers
|
634
|
+
(loop for name in reg-names
|
635
|
+
for reg-index from 0
|
636
|
+
when (string= name backref-name)
|
637
|
+
;; NOTE: REG-NAMES stores register names in reversed
|
638
|
+
;; order REG-NUM contains number of (any) registers
|
639
|
+
;; seen so far; 1- will be done later
|
640
|
+
collect (- reg-num reg-index))))
|
641
|
+
;; store the register number for the simple case
|
642
|
+
(backref-number (or (first referred-regs) (second parse-tree))))
|
643
|
+
(declare (type (or fixnum null) backref-number))
|
644
|
+
(when (or (not (typep backref-number 'fixnum))
|
645
|
+
(<= backref-number 0))
|
646
|
+
(signal-syntax-error "Illegal back-reference: ~S." parse-tree))
|
647
|
+
;; stop accumulating into STARTS-WITH and increase MAX-BACK-REF if
|
648
|
+
;; necessary
|
649
|
+
(setq accumulate-start-p nil
|
650
|
+
max-back-ref (max (the fixnum max-back-ref)
|
651
|
+
backref-number))
|
652
|
+
(flet ((make-back-ref (backref-number)
|
653
|
+
(make-instance 'back-reference
|
654
|
+
;; we start counting from 0 internally
|
655
|
+
:num (1- backref-number)
|
656
|
+
:case-insensitive-p (case-insensitive-mode-p flags)
|
657
|
+
;; backref-name is NIL or string, safe to copy
|
658
|
+
:name (copy-seq backref-name))))
|
659
|
+
(cond
|
660
|
+
((cdr referred-regs)
|
661
|
+
;; several registers share the same name we will try to match
|
662
|
+
;; any of them, starting with the most recent first
|
663
|
+
;; alternation is used to accomplish matching
|
664
|
+
(make-instance 'alternation
|
665
|
+
:choices (loop
|
666
|
+
for reg-index in referred-regs
|
667
|
+
collect (make-back-ref reg-index))))
|
668
|
+
;; simple case - backref corresponds to only one register
|
669
|
+
(t
|
670
|
+
(make-back-ref backref-number))))))
|
671
|
+
|
672
|
+
(defmethod convert-compound-parse-tree ((token (eql :regex)) parse-tree &key)
|
673
|
+
"The case for \(:REGEX <string>)."
|
674
|
+
(declare #.*standard-optimize-settings*)
|
675
|
+
(convert-aux (parse-string (second parse-tree))))
|
676
|
+
|
677
|
+
(defmethod convert-compound-parse-tree ((token (eql :char-class)) parse-tree &key invertedp)
|
678
|
+
"The case for \(:CHAR-CLASS {<item>}*) where item is one of
|
679
|
+
|
680
|
+
- a character,
|
681
|
+
- a character range: \(:RANGE <char1> <char2>), or
|
682
|
+
- a special char class symbol like :DIGIT-CHAR-CLASS.
|
683
|
+
|
684
|
+
Also used for inverted char classes when INVERTEDP is true."
|
685
|
+
(declare #.*standard-optimize-settings*)
|
686
|
+
(declare (special flags accumulate-start-p))
|
687
|
+
(let ((test-function
|
688
|
+
(create-optimized-test-function
|
689
|
+
(convert-char-class-to-test-function (rest parse-tree)
|
690
|
+
invertedp
|
691
|
+
(case-insensitive-mode-p flags)))))
|
692
|
+
(setq accumulate-start-p nil)
|
693
|
+
(make-instance 'char-class :test-function test-function)))
|
694
|
+
|
695
|
+
(defmethod convert-compound-parse-tree ((token (eql :inverted-char-class)) parse-tree &key)
|
696
|
+
"The case for \(:INVERTED-CHAR-CLASS {<item>}*)."
|
697
|
+
(declare #.*standard-optimize-settings*)
|
698
|
+
;; just dispatch to the "real" method
|
699
|
+
(convert-compound-parse-tree :char-class parse-tree :invertedp t))
|
700
|
+
|
701
|
+
(defmethod convert-compound-parse-tree ((token (eql :property)) parse-tree &key)
|
702
|
+
"The case for \(:PROPERTY <name>) where <name> is a string."
|
703
|
+
(declare #.*standard-optimize-settings*)
|
704
|
+
(make-instance 'char-class :test-function (resolve-property (second parse-tree))))
|
705
|
+
|
706
|
+
(defmethod convert-compound-parse-tree ((token (eql :inverted-property)) parse-tree &key)
|
707
|
+
"The case for \(:INVERTED-PROPERTY <name>) where <name> is a string."
|
708
|
+
(declare #.*standard-optimize-settings*)
|
709
|
+
(make-instance 'char-class :test-function (complement* (resolve-property (second parse-tree)))))
|
710
|
+
|
711
|
+
(defmethod convert-compound-parse-tree ((token (eql :flags)) parse-tree &key)
|
712
|
+
"The case for \(:FLAGS {<flag>}*) where flag is a modifier symbol
|
713
|
+
like :CASE-INSENSITIVE-P."
|
714
|
+
(declare #.*standard-optimize-settings*)
|
715
|
+
;; set/unset the flags corresponding to the symbols
|
716
|
+
;; following :FLAGS
|
717
|
+
(mapc #'set-flag (rest parse-tree))
|
718
|
+
;; we're only interested in the side effect of
|
719
|
+
;; setting/unsetting the flags and turn this syntactical
|
720
|
+
;; construct into a VOID object which'll be optimized
|
721
|
+
;; away when creating the matcher
|
722
|
+
(make-instance 'void))
|
723
|
+
|
724
|
+
(defgeneric convert-simple-parse-tree (parse-tree)
|
725
|
+
(declare #.*standard-optimize-settings*)
|
726
|
+
(:documentation "Helper function for CONVERT-AUX which converts
|
727
|
+
parse trees which are atoms.")
|
728
|
+
(:method ((parse-tree (eql :void)))
|
729
|
+
(declare #.*standard-optimize-settings*)
|
730
|
+
(make-instance 'void))
|
731
|
+
(:method ((parse-tree (eql :word-boundary)))
|
732
|
+
(declare #.*standard-optimize-settings*)
|
733
|
+
(make-instance 'word-boundary :negatedp nil))
|
734
|
+
(:method ((parse-tree (eql :non-word-boundary)))
|
735
|
+
(declare #.*standard-optimize-settings*)
|
736
|
+
(make-instance 'word-boundary :negatedp t))
|
737
|
+
(:method ((parse-tree (eql :everything)))
|
738
|
+
(declare #.*standard-optimize-settings*)
|
739
|
+
(declare (special flags accumulate-start-p))
|
740
|
+
(setq accumulate-start-p nil)
|
741
|
+
(make-instance 'everything :single-line-p (single-line-mode-p flags)))
|
742
|
+
(:method ((parse-tree (eql :digit-class)))
|
743
|
+
(declare #.*standard-optimize-settings*)
|
744
|
+
(declare (special accumulate-start-p))
|
745
|
+
(setq accumulate-start-p nil)
|
746
|
+
(make-instance 'char-class :test-function #'digit-char-p))
|
747
|
+
(:method ((parse-tree (eql :word-char-class)))
|
748
|
+
(declare #.*standard-optimize-settings*)
|
749
|
+
(declare (special accumulate-start-p))
|
750
|
+
(setq accumulate-start-p nil)
|
751
|
+
(make-instance 'char-class :test-function #'word-char-p))
|
752
|
+
(:method ((parse-tree (eql :whitespace-char-class)))
|
753
|
+
(declare #.*standard-optimize-settings*)
|
754
|
+
(declare (special accumulate-start-p))
|
755
|
+
(setq accumulate-start-p nil)
|
756
|
+
(make-instance 'char-class :test-function #'whitespacep))
|
757
|
+
(:method ((parse-tree (eql :non-digit-class)))
|
758
|
+
(declare #.*standard-optimize-settings*)
|
759
|
+
(declare (special accumulate-start-p))
|
760
|
+
(setq accumulate-start-p nil)
|
761
|
+
(make-instance 'char-class :test-function (complement* #'digit-char-p)))
|
762
|
+
(:method ((parse-tree (eql :non-word-char-class)))
|
763
|
+
(declare #.*standard-optimize-settings*)
|
764
|
+
(declare (special accumulate-start-p))
|
765
|
+
(setq accumulate-start-p nil)
|
766
|
+
(make-instance 'char-class :test-function (complement* #'word-char-p)))
|
767
|
+
(:method ((parse-tree (eql :non-whitespace-char-class)))
|
768
|
+
(declare #.*standard-optimize-settings*)
|
769
|
+
(declare (special accumulate-start-p))
|
770
|
+
(setq accumulate-start-p nil)
|
771
|
+
(make-instance 'char-class :test-function (complement* #'whitespacep)))
|
772
|
+
(:method ((parse-tree (eql :start-anchor)))
|
773
|
+
;; Perl's "^"
|
774
|
+
(declare #.*standard-optimize-settings*)
|
775
|
+
(declare (special flags))
|
776
|
+
(make-instance 'anchor :startp t :multi-line-p (multi-line-mode-p flags)))
|
777
|
+
(:method ((parse-tree (eql :end-anchor)))
|
778
|
+
;; Perl's "$"
|
779
|
+
(declare #.*standard-optimize-settings*)
|
780
|
+
(declare (special flags))
|
781
|
+
(make-instance 'anchor :startp nil :multi-line-p (multi-line-mode-p flags)))
|
782
|
+
(:method ((parse-tree (eql :modeless-start-anchor)))
|
783
|
+
;; Perl's "\A"
|
784
|
+
(declare #.*standard-optimize-settings*)
|
785
|
+
(make-instance 'anchor :startp t))
|
786
|
+
(:method ((parse-tree (eql :modeless-end-anchor)))
|
787
|
+
;; Perl's "$\Z"
|
788
|
+
(declare #.*standard-optimize-settings*)
|
789
|
+
(make-instance 'anchor :startp nil))
|
790
|
+
(:method ((parse-tree (eql :modeless-end-anchor-no-newline)))
|
791
|
+
;; Perl's "$\z"
|
792
|
+
(declare #.*standard-optimize-settings*)
|
793
|
+
(make-instance 'anchor :startp nil :no-newline-p t))
|
794
|
+
(:method ((parse-tree (eql :case-insensitive-p)))
|
795
|
+
(declare #.*standard-optimize-settings*)
|
796
|
+
(set-flag parse-tree)
|
797
|
+
(make-instance 'void))
|
798
|
+
(:method ((parse-tree (eql :case-sensitive-p)))
|
799
|
+
(declare #.*standard-optimize-settings*)
|
800
|
+
(set-flag parse-tree)
|
801
|
+
(make-instance 'void))
|
802
|
+
(:method ((parse-tree (eql :multi-line-mode-p)))
|
803
|
+
(declare #.*standard-optimize-settings*)
|
804
|
+
(set-flag parse-tree)
|
805
|
+
(make-instance 'void))
|
806
|
+
(:method ((parse-tree (eql :not-multi-line-mode-p)))
|
807
|
+
(declare #.*standard-optimize-settings*)
|
808
|
+
(set-flag parse-tree)
|
809
|
+
(make-instance 'void))
|
810
|
+
(:method ((parse-tree (eql :single-line-mode-p)))
|
811
|
+
(declare #.*standard-optimize-settings*)
|
812
|
+
(set-flag parse-tree)
|
813
|
+
(make-instance 'void))
|
814
|
+
(:method ((parse-tree (eql :not-single-line-mode-p)))
|
815
|
+
(declare #.*standard-optimize-settings*)
|
816
|
+
(set-flag parse-tree)
|
817
|
+
(make-instance 'void)))
|
818
|
+
|
819
|
+
(defmethod convert-simple-parse-tree ((parse-tree string))
|
820
|
+
(declare #.*standard-optimize-settings*)
|
821
|
+
(declare (special flags))
|
822
|
+
;; turn strings into STR objects and try to accumulate into
|
823
|
+
;; STARTS-WITH
|
824
|
+
(let ((str (make-instance 'str
|
825
|
+
:str parse-tree
|
826
|
+
:case-insensitive-p (case-insensitive-mode-p flags))))
|
827
|
+
(maybe-accumulate str)
|
828
|
+
str))
|
829
|
+
|
830
|
+
(defmethod convert-simple-parse-tree ((parse-tree character))
|
831
|
+
(declare #.*standard-optimize-settings*)
|
832
|
+
;; dispatch to the method for strings
|
833
|
+
(convert-simple-parse-tree (string parse-tree)))
|
834
|
+
|
835
|
+
(defmethod convert-simple-parse-tree (parse-tree)
|
836
|
+
"The default method - check if there's a translation."
|
837
|
+
(declare #.*standard-optimize-settings*)
|
838
|
+
(let ((translation (and (symbolp parse-tree) (parse-tree-synonym parse-tree))))
|
839
|
+
(if translation
|
840
|
+
(convert-aux (copy-tree translation))
|
841
|
+
(signal-syntax-error "Unknown token ~A in parse tree." parse-tree))))
|
842
|
+
|
843
|
+
(defun convert (parse-tree)
|
844
|
+
"Converts the parse tree PARSE-TREE into an equivalent REGEX object
|
845
|
+
and returns three values: the REGEX object, the number of registers
|
846
|
+
seen and an object the regex starts with which is either a STR object
|
847
|
+
or an EVERYTHING object \(if the regex starts with something like
|
848
|
+
\".*\") or NIL."
|
849
|
+
(declare #.*standard-optimize-settings*)
|
850
|
+
;; this function basically just initializes the special variables
|
851
|
+
;; and then calls CONVERT-AUX to do all the work
|
852
|
+
(let* ((flags (list nil nil nil))
|
853
|
+
(reg-num 0)
|
854
|
+
reg-names
|
855
|
+
named-reg-seen
|
856
|
+
(accumulate-start-p t)
|
857
|
+
starts-with
|
858
|
+
(max-back-ref 0)
|
859
|
+
(converted-parse-tree (convert-aux parse-tree)))
|
860
|
+
(declare (special flags reg-num reg-names named-reg-seen
|
861
|
+
accumulate-start-p starts-with max-back-ref))
|
862
|
+
;; make sure we don't reference registers which aren't there
|
863
|
+
(when (> (the fixnum max-back-ref)
|
864
|
+
(the fixnum reg-num))
|
865
|
+
(signal-syntax-error "Backreference to register ~A which has not been defined." max-back-ref))
|
866
|
+
(when (typep starts-with 'str)
|
867
|
+
(setf (slot-value starts-with 'str)
|
868
|
+
(coerce (slot-value starts-with 'str)
|
869
|
+
#+:lispworks 'lw:simple-text-string
|
870
|
+
#-:lispworks 'simple-string)))
|
871
|
+
(values converted-parse-tree reg-num starts-with
|
872
|
+
;; we can't simply use *ALLOW-NAMED-REGISTERS*
|
873
|
+
;; since parse-tree syntax ignores it
|
874
|
+
(when named-reg-seen
|
875
|
+
(nreverse reg-names)))))
|