clucumber 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +1 -1
- data/README.md +4 -9
- data/lib/clucumber/clucumber-bootstrap.lisp +32 -0
- data/lib/clucumber/vendor/cl-interpol/alias.lisp +55 -0
- data/lib/clucumber/vendor/cl-interpol/cl-interpol.asd +56 -0
- data/lib/clucumber/vendor/cl-interpol/load.lisp +53 -0
- data/lib/clucumber/vendor/cl-interpol/packages.lisp +40 -0
- data/lib/clucumber/vendor/cl-interpol/read.lisp +716 -0
- data/lib/clucumber/vendor/cl-interpol/specials.lisp +113 -0
- data/lib/clucumber/vendor/cl-interpol/test/packages.lisp +34 -0
- data/lib/clucumber/vendor/cl-interpol/test/tests.lisp +128 -0
- data/lib/clucumber/vendor/cl-interpol/test.lisp +177 -0
- data/lib/clucumber/vendor/cl-interpol/test2.lisp +6254 -0
- data/lib/clucumber/vendor/cl-interpol/unicode.lisp +13912 -0
- data/lib/clucumber/vendor/cl-interpol/util.lisp +122 -0
- data/lib/clucumber/vendor/cl-ppcre/api.lisp +1262 -0
- data/lib/clucumber/vendor/cl-ppcre/charmap.lisp +152 -0
- data/lib/clucumber/vendor/cl-ppcre/charset.lisp +242 -0
- data/lib/clucumber/vendor/cl-ppcre/chartest.lisp +98 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-test.asd +34 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/packages.lisp +38 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/resolver.lisp +61 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode.asd +58 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre.asd +79 -0
- data/lib/clucumber/vendor/cl-ppcre/closures.lisp +471 -0
- data/lib/clucumber/vendor/cl-ppcre/convert.lisp +875 -0
- data/lib/clucumber/vendor/cl-ppcre/errors.lisp +84 -0
- data/lib/clucumber/vendor/cl-ppcre/lexer.lisp +737 -0
- data/lib/clucumber/vendor/cl-ppcre/lispworks-defsystem.lisp +57 -0
- data/lib/clucumber/vendor/cl-ppcre/load.lisp +67 -0
- data/lib/clucumber/vendor/cl-ppcre/optimize.lisp +578 -0
- data/lib/clucumber/vendor/cl-ppcre/packages.lisp +68 -0
- data/lib/clucumber/vendor/cl-ppcre/parser.lisp +319 -0
- data/lib/clucumber/vendor/cl-ppcre/ppcre-tests.lisp +269 -0
- data/lib/clucumber/vendor/cl-ppcre/regex-class-util.lisp +555 -0
- data/lib/clucumber/vendor/cl-ppcre/regex-class.lisp +259 -0
- data/lib/clucumber/vendor/cl-ppcre/repetition-closures.lisp +833 -0
- data/lib/clucumber/vendor/cl-ppcre/scanner.lisp +506 -0
- data/lib/clucumber/vendor/cl-ppcre/specials.lisp +172 -0
- data/lib/clucumber/vendor/cl-ppcre/test/packages.lisp +37 -0
- data/lib/clucumber/vendor/cl-ppcre/test/perl-tests.lisp +150 -0
- data/lib/clucumber/vendor/cl-ppcre/test/tests.lisp +159 -0
- data/lib/clucumber/vendor/cl-ppcre/test/unicode-tests.lisp +80 -0
- data/lib/clucumber/vendor/cl-ppcre/util.lisp +201 -0
- data/lib/clucumber/vendor/cl-unicode/alias.lisp +135 -0
- data/lib/clucumber/vendor/cl-unicode/api.lisp +412 -0
- data/lib/clucumber/vendor/cl-unicode/build/char-info.lisp +133 -0
- data/lib/clucumber/vendor/cl-unicode/build/dump.lisp +239 -0
- data/lib/clucumber/vendor/cl-unicode/build/read.lisp +280 -0
- data/lib/clucumber/vendor/cl-unicode/build/util.lisp +182 -0
- data/lib/clucumber/vendor/cl-unicode/cl-unicode.asd +90 -0
- data/lib/clucumber/vendor/cl-unicode/conditions.lisp +54 -0
- data/lib/clucumber/vendor/cl-unicode/derived.lisp +120 -0
- data/lib/clucumber/vendor/cl-unicode/hash-tables.lisp +20 -0
- data/lib/clucumber/vendor/cl-unicode/lists.lisp +10 -0
- data/lib/clucumber/vendor/cl-unicode/methods.lisp +15 -0
- data/lib/clucumber/vendor/cl-unicode/packages.lisp +76 -0
- data/lib/clucumber/vendor/cl-unicode/specials.lisp +135 -0
- data/lib/clucumber/vendor/cl-unicode/test/packages.lisp +34 -0
- data/lib/clucumber/vendor/cl-unicode/test/tests.lisp +150 -0
- data/lib/clucumber/vendor/cl-unicode/test-functions.lisp +94 -0
- data/lib/clucumber/vendor/cl-unicode/util.lisp +274 -0
- data/lib/clucumber/vendor/lift/compare/fiveam-tests.lisp +14 -0
- data/lib/clucumber/vendor/lift/compare/lift-tests.lisp +13 -0
- data/lib/clucumber/vendor/lift/compare/rt-tests.lisp +16 -0
- data/lib/clucumber/vendor/lift/compare/xlunit-tests.lisp +15 -0
- data/lib/clucumber/vendor/lift/dev/changes.lisp +61 -0
- data/lib/clucumber/vendor/lift/dev/config.lisp +354 -0
- data/lib/clucumber/vendor/lift/dev/copy-file.lisp +117 -0
- data/lib/clucumber/vendor/lift/dev/introspection.lisp +232 -0
- data/lib/clucumber/vendor/lift/dev/lift-interface.lisp +56 -0
- data/lib/clucumber/vendor/lift/dev/lift-notes.lisp +202 -0
- data/lib/clucumber/vendor/lift/dev/lift-randomized.lisp +45 -0
- data/lib/clucumber/vendor/lift/dev/lift.lisp +2383 -0
- data/lib/clucumber/vendor/lift/dev/macros.lisp +229 -0
- data/lib/clucumber/vendor/lift/dev/measuring.lisp +156 -0
- data/lib/clucumber/vendor/lift/dev/packages.lisp +161 -0
- data/lib/clucumber/vendor/lift/dev/port.lisp +151 -0
- data/lib/clucumber/vendor/lift/dev/prototypes.lisp +282 -0
- data/lib/clucumber/vendor/lift/dev/random-testing.lisp +124 -0
- data/lib/clucumber/vendor/lift/dev/report-locations.lisp +13 -0
- data/lib/clucumber/vendor/lift/dev/reports.lisp +916 -0
- data/lib/clucumber/vendor/lift/dev/utilities.lisp +242 -0
- data/lib/clucumber/vendor/lift/docs/package.lisp +6 -0
- data/lib/clucumber/vendor/lift/docs/setup.lisp +17 -0
- data/lib/clucumber/vendor/lift/examples/basic-examples.lisp +289 -0
- data/lib/clucumber/vendor/lift/examples/random-testing.lisp +32 -0
- data/lib/clucumber/vendor/lift/lift-documentation.asd +28 -0
- data/lib/clucumber/vendor/lift/lift-test.asd +35 -0
- data/lib/clucumber/vendor/lift/lift.asd +77 -0
- data/lib/clucumber/vendor/lift/test/equality-tests.lisp +40 -0
- data/lib/clucumber/vendor/lift/test/finding-tests.lisp +32 -0
- data/lib/clucumber/vendor/lift/test/lift-test.lisp +783 -0
- data/lib/clucumber/vendor/lift/test/order-of-operations.lisp +54 -0
- data/lib/clucumber/vendor/lift/test/packages.lisp +17 -0
- data/lib/clucumber/vendor/lift/test/test-config-files.lisp +20 -0
- data/lib/clucumber/vendor/lift/test/test-dynamic-variables.lisp +57 -0
- data/lib/clucumber/vendor/lift/test/test-maximum-problems.lisp +74 -0
- data/lib/clucumber/vendor/lift/test/test-prototypes.lisp +278 -0
- data/lib/clucumber/vendor/lift/test/test-timeout.lisp +37 -0
- data/lib/clucumber/vendor/lift/test/tests-in-progress.lisp +62 -0
- data/lib/clucumber/vendor/lift/test/testsuite-expects.lisp +60 -0
- data/lib/clucumber/vendor/lift/timeout/package.lisp +13 -0
- data/lib/clucumber/vendor/lift/timeout/with-timeout.lisp +123 -0
- data/lib/clucumber/vendor/lift/website/stuff/Temp.lisp +34 -0
- data/lib/clucumber/vendor/st-json/st-json.asd +3 -0
- data/lib/clucumber/vendor/st-json/st-json.lisp +310 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/backtrace.lisp +127 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/fallback.lisp +10 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/map-backtrace.lisp +103 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/mucking.lisp +75 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/packages.lisp +13 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/utilities.lisp +104 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/packages.lisp +5 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/test-setup.lisp +4 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/tests.lisp +16 -0
- data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace-test.asd +22 -0
- data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace.asd +35 -0
- data/lib/clucumber/vendor/usocket/backend/allegro.lisp +199 -0
- data/lib/clucumber/vendor/usocket/backend/armedbear.lisp +491 -0
- data/lib/clucumber/vendor/usocket/backend/clisp.lisp +260 -0
- data/lib/clucumber/vendor/usocket/backend/cmucl.lisp +266 -0
- data/lib/clucumber/vendor/usocket/backend/lispworks.lisp +741 -0
- data/lib/clucumber/vendor/usocket/backend/mcl.lisp +369 -0
- data/lib/clucumber/vendor/usocket/backend/openmcl.lisp +206 -0
- data/lib/clucumber/vendor/usocket/backend/sbcl.lisp +424 -0
- data/lib/clucumber/vendor/usocket/backend/scl.lisp +261 -0
- data/lib/clucumber/vendor/usocket/condition.lisp +227 -0
- data/lib/clucumber/vendor/usocket/package.lisp +82 -0
- data/lib/clucumber/vendor/usocket/server.lisp +45 -0
- data/lib/clucumber/vendor/usocket/test/package.lisp +13 -0
- data/lib/clucumber/vendor/usocket/test/test-usocket.lisp +166 -0
- data/lib/clucumber/vendor/usocket/usocket-test.asd +26 -0
- data/lib/clucumber/vendor/usocket/usocket.asd +37 -0
- data/lib/clucumber/vendor/usocket/usocket.lisp +542 -0
- data/lib/clucumber/vendor/usocket/vendor/kqueue.lisp +1 -0
- data/lib/clucumber/vendor/usocket/vendor/split-sequence.lisp +245 -0
- data/lib/clucumber.rb +29 -7
- metadata +151 -5
@@ -0,0 +1,833 @@
|
|
1
|
+
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
2
|
+
;;; $Header: /usr/local/cvsrep/cl-ppcre/repetition-closures.lisp,v 1.34 2009/09/17 19:17:31 edi Exp $
|
3
|
+
|
4
|
+
;;; This is actually a part of closures.lisp which we put into a
|
5
|
+
;;; separate file because it is rather complex. We only deal with
|
6
|
+
;;; REPETITIONs here. Note that this part of the code contains some
|
7
|
+
;;; rather crazy micro-optimizations which were introduced to be as
|
8
|
+
;;; competitive with Perl as possible in tight loops.
|
9
|
+
|
10
|
+
;;; Copyright (c) 2002-2009, Dr. Edmund Weitz. All rights reserved.
|
11
|
+
|
12
|
+
;;; Redistribution and use in source and binary forms, with or without
|
13
|
+
;;; modification, are permitted provided that the following conditions
|
14
|
+
;;; are met:
|
15
|
+
|
16
|
+
;;; * Redistributions of source code must retain the above copyright
|
17
|
+
;;; notice, this list of conditions and the following disclaimer.
|
18
|
+
|
19
|
+
;;; * Redistributions in binary form must reproduce the above
|
20
|
+
;;; copyright notice, this list of conditions and the following
|
21
|
+
;;; disclaimer in the documentation and/or other materials
|
22
|
+
;;; provided with the distribution.
|
23
|
+
|
24
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
25
|
+
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
26
|
+
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
27
|
+
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
28
|
+
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
29
|
+
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
30
|
+
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
31
|
+
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
32
|
+
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
33
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
34
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
35
|
+
|
36
|
+
(in-package :cl-ppcre)
|
37
|
+
|
38
|
+
(defmacro incf-after (place &optional (delta 1) &environment env)
|
39
|
+
"Utility macro inspired by C's \"place++\", i.e. first return the
|
40
|
+
value of PLACE and afterwards increment it by DELTA."
|
41
|
+
(with-unique-names (%temp)
|
42
|
+
(multiple-value-bind (vars vals store-vars writer-form reader-form)
|
43
|
+
(get-setf-expansion place env)
|
44
|
+
`(let* (,@(mapcar #'list vars vals)
|
45
|
+
(,%temp ,reader-form)
|
46
|
+
(,(car store-vars) (+ ,%temp ,delta)))
|
47
|
+
,writer-form
|
48
|
+
,%temp))))
|
49
|
+
|
50
|
+
;; code for greedy repetitions with minimum zero
|
51
|
+
|
52
|
+
(defmacro greedy-constant-length-closure (check-curr-pos)
|
53
|
+
"This is the template for simple greedy repetitions (where simple
|
54
|
+
means that the minimum number of repetitions is zero, that the inner
|
55
|
+
regex to be checked is of fixed length LEN, and that it doesn't
|
56
|
+
contain registers, i.e. there's no need for backtracking).
|
57
|
+
CHECK-CURR-POS is a form which checks whether the inner regex of the
|
58
|
+
repetition matches at CURR-POS."
|
59
|
+
`(if maximum
|
60
|
+
(lambda (start-pos)
|
61
|
+
(declare (fixnum start-pos maximum))
|
62
|
+
;; because we know LEN we know in advance where to stop at the
|
63
|
+
;; latest; we also take into consideration MIN-REST, i.e. the
|
64
|
+
;; minimal length of the part behind the repetition
|
65
|
+
(let ((target-end-pos (min (1+ (- *end-pos* len min-rest))
|
66
|
+
;; don't go further than MAXIMUM
|
67
|
+
;; repetitions, of course
|
68
|
+
(+ start-pos
|
69
|
+
(the fixnum (* len maximum)))))
|
70
|
+
(curr-pos start-pos))
|
71
|
+
(declare (fixnum target-end-pos curr-pos))
|
72
|
+
(block greedy-constant-length-matcher
|
73
|
+
;; we use an ugly TAGBODY construct because this might be a
|
74
|
+
;; tight loop and this version is a bit faster than our LOOP
|
75
|
+
;; version (at least in CMUCL)
|
76
|
+
(tagbody
|
77
|
+
forward-loop
|
78
|
+
;; first go forward as far as possible, i.e. while
|
79
|
+
;; the inner regex matches
|
80
|
+
(when (>= curr-pos target-end-pos)
|
81
|
+
(go backward-loop))
|
82
|
+
(when ,check-curr-pos
|
83
|
+
(incf curr-pos len)
|
84
|
+
(go forward-loop))
|
85
|
+
backward-loop
|
86
|
+
;; now go back LEN steps each until we're able to match
|
87
|
+
;; the rest of the regex
|
88
|
+
(when (< curr-pos start-pos)
|
89
|
+
(return-from greedy-constant-length-matcher nil))
|
90
|
+
(let ((result (funcall next-fn curr-pos)))
|
91
|
+
(when result
|
92
|
+
(return-from greedy-constant-length-matcher result)))
|
93
|
+
(decf curr-pos len)
|
94
|
+
(go backward-loop)))))
|
95
|
+
;; basically the same code; it's just a bit easier because we're
|
96
|
+
;; not bounded by MAXIMUM
|
97
|
+
(lambda (start-pos)
|
98
|
+
(declare (fixnum start-pos))
|
99
|
+
(let ((target-end-pos (1+ (- *end-pos* len min-rest)))
|
100
|
+
(curr-pos start-pos))
|
101
|
+
(declare (fixnum target-end-pos curr-pos))
|
102
|
+
(block greedy-constant-length-matcher
|
103
|
+
(tagbody
|
104
|
+
forward-loop
|
105
|
+
(when (>= curr-pos target-end-pos)
|
106
|
+
(go backward-loop))
|
107
|
+
(when ,check-curr-pos
|
108
|
+
(incf curr-pos len)
|
109
|
+
(go forward-loop))
|
110
|
+
backward-loop
|
111
|
+
(when (< curr-pos start-pos)
|
112
|
+
(return-from greedy-constant-length-matcher nil))
|
113
|
+
(let ((result (funcall next-fn curr-pos)))
|
114
|
+
(when result
|
115
|
+
(return-from greedy-constant-length-matcher result)))
|
116
|
+
(decf curr-pos len)
|
117
|
+
(go backward-loop)))))))
|
118
|
+
|
119
|
+
(defun create-greedy-everything-matcher (maximum min-rest next-fn)
|
120
|
+
"Creates a closure which just matches as far ahead as possible,
|
121
|
+
i.e. a closure for a dot in single-line mode."
|
122
|
+
(declare #.*standard-optimize-settings*)
|
123
|
+
(declare (fixnum min-rest) (function next-fn))
|
124
|
+
(if maximum
|
125
|
+
(lambda (start-pos)
|
126
|
+
(declare (fixnum start-pos maximum))
|
127
|
+
;; because we know LEN we know in advance where to stop at the
|
128
|
+
;; latest; we also take into consideration MIN-REST, i.e. the
|
129
|
+
;; minimal length of the part behind the repetition
|
130
|
+
(let ((target-end-pos (min (+ start-pos maximum)
|
131
|
+
(- *end-pos* min-rest))))
|
132
|
+
(declare (fixnum target-end-pos))
|
133
|
+
;; start from the highest possible position and go backward
|
134
|
+
;; until we're able to match the rest of the regex
|
135
|
+
(loop for curr-pos of-type fixnum from target-end-pos downto start-pos
|
136
|
+
thereis (funcall next-fn curr-pos))))
|
137
|
+
;; basically the same code; it's just a bit easier because we're
|
138
|
+
;; not bounded by MAXIMUM
|
139
|
+
(lambda (start-pos)
|
140
|
+
(declare (fixnum start-pos))
|
141
|
+
(let ((target-end-pos (- *end-pos* min-rest)))
|
142
|
+
(declare (fixnum target-end-pos))
|
143
|
+
(loop for curr-pos of-type fixnum from target-end-pos downto start-pos
|
144
|
+
thereis (funcall next-fn curr-pos))))))
|
145
|
+
|
146
|
+
(defgeneric create-greedy-constant-length-matcher (repetition next-fn)
|
147
|
+
(declare #.*standard-optimize-settings*)
|
148
|
+
(:documentation "Creates a closure which tries to match REPETITION.
|
149
|
+
It is assumed that REPETITION is greedy and the minimal number of
|
150
|
+
repetitions is zero. It is furthermore assumed that the inner regex
|
151
|
+
of REPETITION is of fixed length and doesn't contain registers."))
|
152
|
+
|
153
|
+
(defmethod create-greedy-constant-length-matcher ((repetition repetition)
|
154
|
+
next-fn)
|
155
|
+
(declare #.*standard-optimize-settings*)
|
156
|
+
(let ((len (len repetition))
|
157
|
+
(maximum (maximum repetition))
|
158
|
+
(regex (regex repetition))
|
159
|
+
(min-rest (min-rest repetition)))
|
160
|
+
(declare (fixnum len min-rest)
|
161
|
+
(function next-fn))
|
162
|
+
(cond ((zerop len)
|
163
|
+
;; inner regex has zero-length, so we can discard it
|
164
|
+
;; completely
|
165
|
+
next-fn)
|
166
|
+
(t
|
167
|
+
;; now first try to optimize for a couple of common cases
|
168
|
+
(typecase regex
|
169
|
+
(str
|
170
|
+
(let ((str (str regex)))
|
171
|
+
(if (= 1 len)
|
172
|
+
;; a single character
|
173
|
+
(let ((chr (schar str 0)))
|
174
|
+
(if (case-insensitive-p regex)
|
175
|
+
(greedy-constant-length-closure
|
176
|
+
(char-equal chr (schar *string* curr-pos)))
|
177
|
+
(greedy-constant-length-closure
|
178
|
+
(char= chr (schar *string* curr-pos)))))
|
179
|
+
;; a string
|
180
|
+
(if (case-insensitive-p regex)
|
181
|
+
(greedy-constant-length-closure
|
182
|
+
(*string*-equal str curr-pos (+ curr-pos len) 0 len))
|
183
|
+
(greedy-constant-length-closure
|
184
|
+
(*string*= str curr-pos (+ curr-pos len) 0 len))))))
|
185
|
+
(char-class
|
186
|
+
;; a character class
|
187
|
+
(insert-char-class-tester (regex (schar *string* curr-pos))
|
188
|
+
(greedy-constant-length-closure
|
189
|
+
(char-class-test))))
|
190
|
+
(everything
|
191
|
+
;; an EVERYTHING object, i.e. a dot
|
192
|
+
(if (single-line-p regex)
|
193
|
+
(create-greedy-everything-matcher maximum min-rest next-fn)
|
194
|
+
(greedy-constant-length-closure
|
195
|
+
(char/= #\Newline (schar *string* curr-pos)))))
|
196
|
+
(t
|
197
|
+
;; the general case - we build an inner matcher which
|
198
|
+
;; just checks for immediate success, i.e. NEXT-FN is
|
199
|
+
;; #'IDENTITY
|
200
|
+
(let ((inner-matcher (create-matcher-aux regex #'identity)))
|
201
|
+
(declare (function inner-matcher))
|
202
|
+
(greedy-constant-length-closure
|
203
|
+
(funcall inner-matcher curr-pos)))))))))
|
204
|
+
|
205
|
+
(defgeneric create-greedy-no-zero-matcher (repetition next-fn)
|
206
|
+
(declare #.*standard-optimize-settings*)
|
207
|
+
(:documentation "Creates a closure which tries to match REPETITION.
|
208
|
+
It is assumed that REPETITION is greedy and the minimal number of
|
209
|
+
repetitions is zero. It is furthermore assumed that the inner regex
|
210
|
+
of REPETITION can never match a zero-length string \(or instead the
|
211
|
+
maximal number of repetitions is 1)."))
|
212
|
+
|
213
|
+
(defmethod create-greedy-no-zero-matcher ((repetition repetition) next-fn)
|
214
|
+
(declare #.*standard-optimize-settings*)
|
215
|
+
(let ((maximum (maximum repetition))
|
216
|
+
;; REPEAT-MATCHER is part of the closure's environment but it
|
217
|
+
;; can only be defined after GREEDY-AUX is defined
|
218
|
+
repeat-matcher)
|
219
|
+
(declare (function next-fn))
|
220
|
+
(cond
|
221
|
+
((eql maximum 1)
|
222
|
+
;; this is essentially like the next case but with a known
|
223
|
+
;; MAXIMUM of 1 we can get away without a counter; note that
|
224
|
+
;; we always arrive here if CONVERT optimizes <regex>* to
|
225
|
+
;; (?:<regex'>*<regex>)?
|
226
|
+
(setq repeat-matcher
|
227
|
+
(create-matcher-aux (regex repetition) next-fn))
|
228
|
+
(lambda (start-pos)
|
229
|
+
(declare (function repeat-matcher))
|
230
|
+
(or (funcall repeat-matcher start-pos)
|
231
|
+
(funcall next-fn start-pos))))
|
232
|
+
(maximum
|
233
|
+
;; we make a reservation for our slot in *REPEAT-COUNTERS*
|
234
|
+
;; because we need to keep track whether we've reached MAXIMUM
|
235
|
+
;; repetitions
|
236
|
+
(let ((rep-num (incf-after *rep-num*)))
|
237
|
+
(flet ((greedy-aux (start-pos)
|
238
|
+
(declare (fixnum start-pos maximum rep-num)
|
239
|
+
(function repeat-matcher))
|
240
|
+
;; the actual matcher which first tries to match the
|
241
|
+
;; inner regex of REPETITION (if we haven't done so
|
242
|
+
;; too often) and on failure calls NEXT-FN
|
243
|
+
(or (and (< (aref *repeat-counters* rep-num) maximum)
|
244
|
+
(incf (aref *repeat-counters* rep-num))
|
245
|
+
;; note that REPEAT-MATCHER will call
|
246
|
+
;; GREEDY-AUX again recursively
|
247
|
+
(prog1
|
248
|
+
(funcall repeat-matcher start-pos)
|
249
|
+
(decf (aref *repeat-counters* rep-num))))
|
250
|
+
(funcall next-fn start-pos))))
|
251
|
+
;; create a closure to match the inner regex and to
|
252
|
+
;; implement backtracking via GREEDY-AUX
|
253
|
+
(setq repeat-matcher
|
254
|
+
(create-matcher-aux (regex repetition) #'greedy-aux))
|
255
|
+
;; the closure we return is just a thin wrapper around
|
256
|
+
;; GREEDY-AUX to initialize the repetition counter
|
257
|
+
(lambda (start-pos)
|
258
|
+
(declare (fixnum start-pos))
|
259
|
+
(setf (aref *repeat-counters* rep-num) 0)
|
260
|
+
(greedy-aux start-pos)))))
|
261
|
+
(t
|
262
|
+
;; easier code because we're not bounded by MAXIMUM, but
|
263
|
+
;; basically the same
|
264
|
+
(flet ((greedy-aux (start-pos)
|
265
|
+
(declare (fixnum start-pos)
|
266
|
+
(function repeat-matcher))
|
267
|
+
(or (funcall repeat-matcher start-pos)
|
268
|
+
(funcall next-fn start-pos))))
|
269
|
+
(setq repeat-matcher
|
270
|
+
(create-matcher-aux (regex repetition) #'greedy-aux))
|
271
|
+
#'greedy-aux)))))
|
272
|
+
|
273
|
+
(defgeneric create-greedy-matcher (repetition next-fn)
|
274
|
+
(declare #.*standard-optimize-settings*)
|
275
|
+
(:documentation "Creates a closure which tries to match REPETITION.
|
276
|
+
It is assumed that REPETITION is greedy and the minimal number of
|
277
|
+
repetitions is zero."))
|
278
|
+
|
279
|
+
(defmethod create-greedy-matcher ((repetition repetition) next-fn)
|
280
|
+
(declare #.*standard-optimize-settings*)
|
281
|
+
(let ((maximum (maximum repetition))
|
282
|
+
;; we make a reservation for our slot in *LAST-POS-STORES* because
|
283
|
+
;; we have to watch out for endless loops as the inner regex might
|
284
|
+
;; match zero-length strings
|
285
|
+
(zero-length-num (incf-after *zero-length-num*))
|
286
|
+
;; REPEAT-MATCHER is part of the closure's environment but it
|
287
|
+
;; can only be defined after GREEDY-AUX is defined
|
288
|
+
repeat-matcher)
|
289
|
+
(declare (fixnum zero-length-num)
|
290
|
+
(function next-fn))
|
291
|
+
(cond
|
292
|
+
(maximum
|
293
|
+
;; we make a reservation for our slot in *REPEAT-COUNTERS*
|
294
|
+
;; because we need to keep track whether we've reached MAXIMUM
|
295
|
+
;; repetitions
|
296
|
+
(let ((rep-num (incf-after *rep-num*)))
|
297
|
+
(flet ((greedy-aux (start-pos)
|
298
|
+
;; the actual matcher which first tries to match the
|
299
|
+
;; inner regex of REPETITION (if we haven't done so
|
300
|
+
;; too often) and on failure calls NEXT-FN
|
301
|
+
(declare (fixnum start-pos maximum rep-num)
|
302
|
+
(function repeat-matcher))
|
303
|
+
(let ((old-last-pos
|
304
|
+
(svref *last-pos-stores* zero-length-num)))
|
305
|
+
(when (and old-last-pos
|
306
|
+
(= (the fixnum old-last-pos) start-pos))
|
307
|
+
;; stop immediately if we've been here before,
|
308
|
+
;; i.e. if the last attempt matched a zero-length
|
309
|
+
;; string
|
310
|
+
(return-from greedy-aux (funcall next-fn start-pos)))
|
311
|
+
;; otherwise remember this position for the next
|
312
|
+
;; repetition
|
313
|
+
(setf (svref *last-pos-stores* zero-length-num) start-pos)
|
314
|
+
(or (and (< (aref *repeat-counters* rep-num) maximum)
|
315
|
+
(incf (aref *repeat-counters* rep-num))
|
316
|
+
;; note that REPEAT-MATCHER will call
|
317
|
+
;; GREEDY-AUX again recursively
|
318
|
+
(prog1
|
319
|
+
(funcall repeat-matcher start-pos)
|
320
|
+
(decf (aref *repeat-counters* rep-num))
|
321
|
+
(setf (svref *last-pos-stores* zero-length-num)
|
322
|
+
old-last-pos)))
|
323
|
+
(funcall next-fn start-pos)))))
|
324
|
+
;; create a closure to match the inner regex and to
|
325
|
+
;; implement backtracking via GREEDY-AUX
|
326
|
+
(setq repeat-matcher
|
327
|
+
(create-matcher-aux (regex repetition) #'greedy-aux))
|
328
|
+
;; the closure we return is just a thin wrapper around
|
329
|
+
;; GREEDY-AUX to initialize the repetition counter and our
|
330
|
+
;; slot in *LAST-POS-STORES*
|
331
|
+
(lambda (start-pos)
|
332
|
+
(declare (fixnum start-pos))
|
333
|
+
(setf (aref *repeat-counters* rep-num) 0
|
334
|
+
(svref *last-pos-stores* zero-length-num) nil)
|
335
|
+
(greedy-aux start-pos)))))
|
336
|
+
(t
|
337
|
+
;; easier code because we're not bounded by MAXIMUM, but
|
338
|
+
;; basically the same
|
339
|
+
(flet ((greedy-aux (start-pos)
|
340
|
+
(declare (fixnum start-pos)
|
341
|
+
(function repeat-matcher))
|
342
|
+
(let ((old-last-pos
|
343
|
+
(svref *last-pos-stores* zero-length-num)))
|
344
|
+
(when (and old-last-pos
|
345
|
+
(= (the fixnum old-last-pos) start-pos))
|
346
|
+
(return-from greedy-aux (funcall next-fn start-pos)))
|
347
|
+
(setf (svref *last-pos-stores* zero-length-num) start-pos)
|
348
|
+
(or (prog1
|
349
|
+
(funcall repeat-matcher start-pos)
|
350
|
+
(setf (svref *last-pos-stores* zero-length-num) old-last-pos))
|
351
|
+
(funcall next-fn start-pos)))))
|
352
|
+
(setq repeat-matcher
|
353
|
+
(create-matcher-aux (regex repetition) #'greedy-aux))
|
354
|
+
(lambda (start-pos)
|
355
|
+
(declare (fixnum start-pos))
|
356
|
+
(setf (svref *last-pos-stores* zero-length-num) nil)
|
357
|
+
(greedy-aux start-pos)))))))
|
358
|
+
|
359
|
+
;; code for non-greedy repetitions with minimum zero
|
360
|
+
|
361
|
+
(defmacro non-greedy-constant-length-closure (check-curr-pos)
|
362
|
+
"This is the template for simple non-greedy repetitions \(where
|
363
|
+
simple means that the minimum number of repetitions is zero, that the
|
364
|
+
inner regex to be checked is of fixed length LEN, and that it doesn't
|
365
|
+
contain registers, i.e. there's no need for backtracking).
|
366
|
+
CHECK-CURR-POS is a form which checks whether the inner regex of the
|
367
|
+
repetition matches at CURR-POS."
|
368
|
+
`(if maximum
|
369
|
+
(lambda (start-pos)
|
370
|
+
(declare (fixnum start-pos maximum))
|
371
|
+
;; because we know LEN we know in advance where to stop at the
|
372
|
+
;; latest; we also take into consideration MIN-REST, i.e. the
|
373
|
+
;; minimal length of the part behind the repetition
|
374
|
+
(let ((target-end-pos (min (1+ (- *end-pos* len min-rest))
|
375
|
+
(+ start-pos
|
376
|
+
(the fixnum (* len maximum))))))
|
377
|
+
;; move forward by LEN and always try NEXT-FN first, then
|
378
|
+
;; CHECK-CUR-POS
|
379
|
+
(loop for curr-pos of-type fixnum from start-pos
|
380
|
+
below target-end-pos
|
381
|
+
by len
|
382
|
+
thereis (funcall next-fn curr-pos)
|
383
|
+
while ,check-curr-pos
|
384
|
+
finally (return (funcall next-fn curr-pos)))))
|
385
|
+
;; basically the same code; it's just a bit easier because we're
|
386
|
+
;; not bounded by MAXIMUM
|
387
|
+
(lambda (start-pos)
|
388
|
+
(declare (fixnum start-pos))
|
389
|
+
(let ((target-end-pos (1+ (- *end-pos* len min-rest))))
|
390
|
+
(loop for curr-pos of-type fixnum from start-pos
|
391
|
+
below target-end-pos
|
392
|
+
by len
|
393
|
+
thereis (funcall next-fn curr-pos)
|
394
|
+
while ,check-curr-pos
|
395
|
+
finally (return (funcall next-fn curr-pos)))))))
|
396
|
+
|
397
|
+
(defgeneric create-non-greedy-constant-length-matcher (repetition next-fn)
|
398
|
+
(declare #.*standard-optimize-settings*)
|
399
|
+
(:documentation "Creates a closure which tries to match REPETITION.
|
400
|
+
It is assumed that REPETITION is non-greedy and the minimal number of
|
401
|
+
repetitions is zero. It is furthermore assumed that the inner regex
|
402
|
+
of REPETITION is of fixed length and doesn't contain registers."))
|
403
|
+
|
404
|
+
(defmethod create-non-greedy-constant-length-matcher ((repetition repetition) next-fn)
|
405
|
+
(declare #.*standard-optimize-settings*)
|
406
|
+
(let ((len (len repetition))
|
407
|
+
(maximum (maximum repetition))
|
408
|
+
(regex (regex repetition))
|
409
|
+
(min-rest (min-rest repetition)))
|
410
|
+
(declare (fixnum len min-rest)
|
411
|
+
(function next-fn))
|
412
|
+
(cond ((zerop len)
|
413
|
+
;; inner regex has zero-length, so we can discard it
|
414
|
+
;; completely
|
415
|
+
next-fn)
|
416
|
+
(t
|
417
|
+
;; now first try to optimize for a couple of common cases
|
418
|
+
(typecase regex
|
419
|
+
(str
|
420
|
+
(let ((str (str regex)))
|
421
|
+
(if (= 1 len)
|
422
|
+
;; a single character
|
423
|
+
(let ((chr (schar str 0)))
|
424
|
+
(if (case-insensitive-p regex)
|
425
|
+
(non-greedy-constant-length-closure
|
426
|
+
(char-equal chr (schar *string* curr-pos)))
|
427
|
+
(non-greedy-constant-length-closure
|
428
|
+
(char= chr (schar *string* curr-pos)))))
|
429
|
+
;; a string
|
430
|
+
(if (case-insensitive-p regex)
|
431
|
+
(non-greedy-constant-length-closure
|
432
|
+
(*string*-equal str curr-pos (+ curr-pos len) 0 len))
|
433
|
+
(non-greedy-constant-length-closure
|
434
|
+
(*string*= str curr-pos (+ curr-pos len) 0 len))))))
|
435
|
+
(char-class
|
436
|
+
;; a character class
|
437
|
+
(insert-char-class-tester (regex (schar *string* curr-pos))
|
438
|
+
(non-greedy-constant-length-closure
|
439
|
+
(char-class-test))))
|
440
|
+
(everything
|
441
|
+
(if (single-line-p regex)
|
442
|
+
;; a dot which really can match everything; we rely
|
443
|
+
;; on the compiler to optimize this away
|
444
|
+
(non-greedy-constant-length-closure
|
445
|
+
t)
|
446
|
+
;; a dot which has to watch out for #\Newline
|
447
|
+
(non-greedy-constant-length-closure
|
448
|
+
(char/= #\Newline (schar *string* curr-pos)))))
|
449
|
+
(t
|
450
|
+
;; the general case - we build an inner matcher which
|
451
|
+
;; just checks for immediate success, i.e. NEXT-FN is
|
452
|
+
;; #'IDENTITY
|
453
|
+
(let ((inner-matcher (create-matcher-aux regex #'identity)))
|
454
|
+
(declare (function inner-matcher))
|
455
|
+
(non-greedy-constant-length-closure
|
456
|
+
(funcall inner-matcher curr-pos)))))))))
|
457
|
+
|
458
|
+
(defgeneric create-non-greedy-no-zero-matcher (repetition next-fn)
|
459
|
+
(declare #.*standard-optimize-settings*)
|
460
|
+
(:documentation "Creates a closure which tries to match REPETITION.
|
461
|
+
It is assumed that REPETITION is non-greedy and the minimal number of
|
462
|
+
repetitions is zero. It is furthermore assumed that the inner regex
|
463
|
+
of REPETITION can never match a zero-length string \(or instead the
|
464
|
+
maximal number of repetitions is 1)."))
|
465
|
+
|
466
|
+
(defmethod create-non-greedy-no-zero-matcher ((repetition repetition) next-fn)
|
467
|
+
(declare #.*standard-optimize-settings*)
|
468
|
+
(let ((maximum (maximum repetition))
|
469
|
+
;; REPEAT-MATCHER is part of the closure's environment but it
|
470
|
+
;; can only be defined after NON-GREEDY-AUX is defined
|
471
|
+
repeat-matcher)
|
472
|
+
(declare (function next-fn))
|
473
|
+
(cond
|
474
|
+
((eql maximum 1)
|
475
|
+
;; this is essentially like the next case but with a known
|
476
|
+
;; MAXIMUM of 1 we can get away without a counter
|
477
|
+
(setq repeat-matcher
|
478
|
+
(create-matcher-aux (regex repetition) next-fn))
|
479
|
+
(lambda (start-pos)
|
480
|
+
(declare (function repeat-matcher))
|
481
|
+
(or (funcall next-fn start-pos)
|
482
|
+
(funcall repeat-matcher start-pos))))
|
483
|
+
(maximum
|
484
|
+
;; we make a reservation for our slot in *REPEAT-COUNTERS*
|
485
|
+
;; because we need to keep track whether we've reached MAXIMUM
|
486
|
+
;; repetitions
|
487
|
+
(let ((rep-num (incf-after *rep-num*)))
|
488
|
+
(flet ((non-greedy-aux (start-pos)
|
489
|
+
;; the actual matcher which first calls NEXT-FN and
|
490
|
+
;; on failure tries to match the inner regex of
|
491
|
+
;; REPETITION (if we haven't done so too often)
|
492
|
+
(declare (fixnum start-pos maximum rep-num)
|
493
|
+
(function repeat-matcher))
|
494
|
+
(or (funcall next-fn start-pos)
|
495
|
+
(and (< (aref *repeat-counters* rep-num) maximum)
|
496
|
+
(incf (aref *repeat-counters* rep-num))
|
497
|
+
;; note that REPEAT-MATCHER will call
|
498
|
+
;; NON-GREEDY-AUX again recursively
|
499
|
+
(prog1
|
500
|
+
(funcall repeat-matcher start-pos)
|
501
|
+
(decf (aref *repeat-counters* rep-num)))))))
|
502
|
+
;; create a closure to match the inner regex and to
|
503
|
+
;; implement backtracking via NON-GREEDY-AUX
|
504
|
+
(setq repeat-matcher
|
505
|
+
(create-matcher-aux (regex repetition) #'non-greedy-aux))
|
506
|
+
;; the closure we return is just a thin wrapper around
|
507
|
+
;; NON-GREEDY-AUX to initialize the repetition counter
|
508
|
+
(lambda (start-pos)
|
509
|
+
(declare (fixnum start-pos))
|
510
|
+
(setf (aref *repeat-counters* rep-num) 0)
|
511
|
+
(non-greedy-aux start-pos)))))
|
512
|
+
(t
|
513
|
+
;; easier code because we're not bounded by MAXIMUM, but
|
514
|
+
;; basically the same
|
515
|
+
(flet ((non-greedy-aux (start-pos)
|
516
|
+
(declare (fixnum start-pos)
|
517
|
+
(function repeat-matcher))
|
518
|
+
(or (funcall next-fn start-pos)
|
519
|
+
(funcall repeat-matcher start-pos))))
|
520
|
+
(setq repeat-matcher
|
521
|
+
(create-matcher-aux (regex repetition) #'non-greedy-aux))
|
522
|
+
#'non-greedy-aux)))))
|
523
|
+
|
524
|
+
(defgeneric create-non-greedy-matcher (repetition next-fn)
|
525
|
+
(declare #.*standard-optimize-settings*)
|
526
|
+
(:documentation "Creates a closure which tries to match REPETITION.
|
527
|
+
It is assumed that REPETITION is non-greedy and the minimal number of
|
528
|
+
repetitions is zero."))
|
529
|
+
|
530
|
+
(defmethod create-non-greedy-matcher ((repetition repetition) next-fn)
|
531
|
+
(declare #.*standard-optimize-settings*)
|
532
|
+
;; we make a reservation for our slot in *LAST-POS-STORES* because
|
533
|
+
;; we have to watch out for endless loops as the inner regex might
|
534
|
+
;; match zero-length strings
|
535
|
+
(let ((zero-length-num (incf-after *zero-length-num*))
|
536
|
+
(maximum (maximum repetition))
|
537
|
+
;; REPEAT-MATCHER is part of the closure's environment but it
|
538
|
+
;; can only be defined after NON-GREEDY-AUX is defined
|
539
|
+
repeat-matcher)
|
540
|
+
(declare (fixnum zero-length-num)
|
541
|
+
(function next-fn))
|
542
|
+
(cond
|
543
|
+
(maximum
|
544
|
+
;; we make a reservation for our slot in *REPEAT-COUNTERS*
|
545
|
+
;; because we need to keep track whether we've reached MAXIMUM
|
546
|
+
;; repetitions
|
547
|
+
(let ((rep-num (incf-after *rep-num*)))
|
548
|
+
(flet ((non-greedy-aux (start-pos)
|
549
|
+
;; the actual matcher which first calls NEXT-FN and
|
550
|
+
;; on failure tries to match the inner regex of
|
551
|
+
;; REPETITION (if we haven't done so too often)
|
552
|
+
(declare (fixnum start-pos maximum rep-num)
|
553
|
+
(function repeat-matcher))
|
554
|
+
(let ((old-last-pos
|
555
|
+
(svref *last-pos-stores* zero-length-num)))
|
556
|
+
(when (and old-last-pos
|
557
|
+
(= (the fixnum old-last-pos) start-pos))
|
558
|
+
;; stop immediately if we've been here before,
|
559
|
+
;; i.e. if the last attempt matched a zero-length
|
560
|
+
;; string
|
561
|
+
(return-from non-greedy-aux (funcall next-fn start-pos)))
|
562
|
+
;; otherwise remember this position for the next
|
563
|
+
;; repetition
|
564
|
+
(setf (svref *last-pos-stores* zero-length-num) start-pos)
|
565
|
+
(or (funcall next-fn start-pos)
|
566
|
+
(and (< (aref *repeat-counters* rep-num) maximum)
|
567
|
+
(incf (aref *repeat-counters* rep-num))
|
568
|
+
;; note that REPEAT-MATCHER will call
|
569
|
+
;; NON-GREEDY-AUX again recursively
|
570
|
+
(prog1
|
571
|
+
(funcall repeat-matcher start-pos)
|
572
|
+
(decf (aref *repeat-counters* rep-num))
|
573
|
+
(setf (svref *last-pos-stores* zero-length-num)
|
574
|
+
old-last-pos)))))))
|
575
|
+
;; create a closure to match the inner regex and to
|
576
|
+
;; implement backtracking via NON-GREEDY-AUX
|
577
|
+
(setq repeat-matcher
|
578
|
+
(create-matcher-aux (regex repetition) #'non-greedy-aux))
|
579
|
+
;; the closure we return is just a thin wrapper around
|
580
|
+
;; NON-GREEDY-AUX to initialize the repetition counter and our
|
581
|
+
;; slot in *LAST-POS-STORES*
|
582
|
+
(lambda (start-pos)
|
583
|
+
(declare (fixnum start-pos))
|
584
|
+
(setf (aref *repeat-counters* rep-num) 0
|
585
|
+
(svref *last-pos-stores* zero-length-num) nil)
|
586
|
+
(non-greedy-aux start-pos)))))
|
587
|
+
(t
|
588
|
+
;; easier code because we're not bounded by MAXIMUM, but
|
589
|
+
;; basically the same
|
590
|
+
(flet ((non-greedy-aux (start-pos)
|
591
|
+
(declare (fixnum start-pos)
|
592
|
+
(function repeat-matcher))
|
593
|
+
(let ((old-last-pos
|
594
|
+
(svref *last-pos-stores* zero-length-num)))
|
595
|
+
(when (and old-last-pos
|
596
|
+
(= (the fixnum old-last-pos) start-pos))
|
597
|
+
(return-from non-greedy-aux (funcall next-fn start-pos)))
|
598
|
+
(setf (svref *last-pos-stores* zero-length-num) start-pos)
|
599
|
+
(or (funcall next-fn start-pos)
|
600
|
+
(prog1
|
601
|
+
(funcall repeat-matcher start-pos)
|
602
|
+
(setf (svref *last-pos-stores* zero-length-num)
|
603
|
+
old-last-pos))))))
|
604
|
+
(setq repeat-matcher
|
605
|
+
(create-matcher-aux (regex repetition) #'non-greedy-aux))
|
606
|
+
(lambda (start-pos)
|
607
|
+
(declare (fixnum start-pos))
|
608
|
+
(setf (svref *last-pos-stores* zero-length-num) nil)
|
609
|
+
(non-greedy-aux start-pos)))))))
|
610
|
+
|
611
|
+
;; code for constant repetitions, i.e. those with a fixed number of repetitions
|
612
|
+
|
613
|
+
(defmacro constant-repetition-constant-length-closure (check-curr-pos)
|
614
|
+
"This is the template for simple constant repetitions (where simple
|
615
|
+
means that the inner regex to be checked is of fixed length LEN, and
|
616
|
+
that it doesn't contain registers, i.e. there's no need for
|
617
|
+
backtracking) and where constant means that MINIMUM is equal to
|
618
|
+
MAXIMUM. CHECK-CURR-POS is a form which checks whether the inner
|
619
|
+
regex of the repetition matches at CURR-POS."
|
620
|
+
`(lambda (start-pos)
|
621
|
+
(declare (fixnum start-pos))
|
622
|
+
(let ((target-end-pos (+ start-pos
|
623
|
+
(the fixnum (* len repetitions)))))
|
624
|
+
(declare (fixnum target-end-pos))
|
625
|
+
;; first check if we won't go beyond the end of the string
|
626
|
+
(and (>= *end-pos* target-end-pos)
|
627
|
+
;; then loop through all repetitions step by step
|
628
|
+
(loop for curr-pos of-type fixnum from start-pos
|
629
|
+
below target-end-pos
|
630
|
+
by len
|
631
|
+
always ,check-curr-pos)
|
632
|
+
;; finally call NEXT-FN if we made it that far
|
633
|
+
(funcall next-fn target-end-pos)))))
|
634
|
+
|
635
|
+
(defgeneric create-constant-repetition-constant-length-matcher
|
636
|
+
(repetition next-fn)
|
637
|
+
(declare #.*standard-optimize-settings*)
|
638
|
+
(:documentation "Creates a closure which tries to match REPETITION.
|
639
|
+
It is assumed that REPETITION has a constant number of repetitions.
|
640
|
+
It is furthermore assumed that the inner regex of REPETITION is of
|
641
|
+
fixed length and doesn't contain registers."))
|
642
|
+
|
643
|
+
(defmethod create-constant-repetition-constant-length-matcher
|
644
|
+
((repetition repetition) next-fn)
|
645
|
+
(declare #.*standard-optimize-settings*)
|
646
|
+
(let ((len (len repetition))
|
647
|
+
(repetitions (minimum repetition))
|
648
|
+
(regex (regex repetition)))
|
649
|
+
(declare (fixnum len repetitions)
|
650
|
+
(function next-fn))
|
651
|
+
(if (zerop len)
|
652
|
+
;; if the length is zero it suffices to try once
|
653
|
+
(create-matcher-aux regex next-fn)
|
654
|
+
;; otherwise try to optimize for a couple of common cases
|
655
|
+
(typecase regex
|
656
|
+
(str
|
657
|
+
(let ((str (str regex)))
|
658
|
+
(if (= 1 len)
|
659
|
+
;; a single character
|
660
|
+
(let ((chr (schar str 0)))
|
661
|
+
(if (case-insensitive-p regex)
|
662
|
+
(constant-repetition-constant-length-closure
|
663
|
+
(and (char-equal chr (schar *string* curr-pos))
|
664
|
+
(1+ curr-pos)))
|
665
|
+
(constant-repetition-constant-length-closure
|
666
|
+
(and (char= chr (schar *string* curr-pos))
|
667
|
+
(1+ curr-pos)))))
|
668
|
+
;; a string
|
669
|
+
(if (case-insensitive-p regex)
|
670
|
+
(constant-repetition-constant-length-closure
|
671
|
+
(let ((next-pos (+ curr-pos len)))
|
672
|
+
(declare (fixnum next-pos))
|
673
|
+
(and (*string*-equal str curr-pos next-pos 0 len)
|
674
|
+
next-pos)))
|
675
|
+
(constant-repetition-constant-length-closure
|
676
|
+
(let ((next-pos (+ curr-pos len)))
|
677
|
+
(declare (fixnum next-pos))
|
678
|
+
(and (*string*= str curr-pos next-pos 0 len)
|
679
|
+
next-pos)))))))
|
680
|
+
(char-class
|
681
|
+
;; a character class
|
682
|
+
(insert-char-class-tester (regex (schar *string* curr-pos))
|
683
|
+
(constant-repetition-constant-length-closure
|
684
|
+
(and (char-class-test)
|
685
|
+
(1+ curr-pos)))))
|
686
|
+
(everything
|
687
|
+
(if (single-line-p regex)
|
688
|
+
;; a dot which really matches everything - we just have to
|
689
|
+
;; advance the index into *STRING* accordingly and check
|
690
|
+
;; if we didn't go past the end
|
691
|
+
(lambda (start-pos)
|
692
|
+
(declare (fixnum start-pos))
|
693
|
+
(let ((next-pos (+ start-pos repetitions)))
|
694
|
+
(declare (fixnum next-pos))
|
695
|
+
(and (<= next-pos *end-pos*)
|
696
|
+
(funcall next-fn next-pos))))
|
697
|
+
;; a dot which is not in single-line-mode - make sure we
|
698
|
+
;; don't match #\Newline
|
699
|
+
(constant-repetition-constant-length-closure
|
700
|
+
(and (char/= #\Newline (schar *string* curr-pos))
|
701
|
+
(1+ curr-pos)))))
|
702
|
+
(t
|
703
|
+
;; the general case - we build an inner matcher which just
|
704
|
+
;; checks for immediate success, i.e. NEXT-FN is #'IDENTITY
|
705
|
+
(let ((inner-matcher (create-matcher-aux regex #'identity)))
|
706
|
+
(declare (function inner-matcher))
|
707
|
+
(constant-repetition-constant-length-closure
|
708
|
+
(funcall inner-matcher curr-pos))))))))
|
709
|
+
|
710
|
+
(defgeneric create-constant-repetition-matcher (repetition next-fn)
|
711
|
+
(declare #.*standard-optimize-settings*)
|
712
|
+
(:documentation "Creates a closure which tries to match REPETITION.
|
713
|
+
It is assumed that REPETITION has a constant number of repetitions."))
|
714
|
+
|
715
|
+
(defmethod create-constant-repetition-matcher ((repetition repetition) next-fn)
|
716
|
+
(declare #.*standard-optimize-settings*)
|
717
|
+
(let ((repetitions (minimum repetition))
|
718
|
+
;; we make a reservation for our slot in *REPEAT-COUNTERS*
|
719
|
+
;; because we need to keep track of the number of repetitions
|
720
|
+
(rep-num (incf-after *rep-num*))
|
721
|
+
;; REPEAT-MATCHER is part of the closure's environment but it
|
722
|
+
;; can only be defined after NON-GREEDY-AUX is defined
|
723
|
+
repeat-matcher)
|
724
|
+
(declare (fixnum repetitions rep-num)
|
725
|
+
(function next-fn))
|
726
|
+
(if (zerop (min-len repetition))
|
727
|
+
;; we make a reservation for our slot in *LAST-POS-STORES*
|
728
|
+
;; because we have to watch out for needless loops as the inner
|
729
|
+
;; regex might match zero-length strings
|
730
|
+
(let ((zero-length-num (incf-after *zero-length-num*)))
|
731
|
+
(declare (fixnum zero-length-num))
|
732
|
+
(flet ((constant-aux (start-pos)
|
733
|
+
;; the actual matcher which first calls NEXT-FN and
|
734
|
+
;; on failure tries to match the inner regex of
|
735
|
+
;; REPETITION (if we haven't done so too often)
|
736
|
+
(declare (fixnum start-pos)
|
737
|
+
(function repeat-matcher))
|
738
|
+
(let ((old-last-pos
|
739
|
+
(svref *last-pos-stores* zero-length-num)))
|
740
|
+
(when (and old-last-pos
|
741
|
+
(= (the fixnum old-last-pos) start-pos))
|
742
|
+
;; if we've been here before we matched a
|
743
|
+
;; zero-length string the last time, so we can
|
744
|
+
;; just carry on because we will definitely be
|
745
|
+
;; able to do this again often enough
|
746
|
+
(return-from constant-aux (funcall next-fn start-pos)))
|
747
|
+
;; otherwise remember this position for the next
|
748
|
+
;; repetition
|
749
|
+
(setf (svref *last-pos-stores* zero-length-num) start-pos)
|
750
|
+
(cond ((< (aref *repeat-counters* rep-num) repetitions)
|
751
|
+
;; not enough repetitions yet, try it again
|
752
|
+
(incf (aref *repeat-counters* rep-num))
|
753
|
+
;; note that REPEAT-MATCHER will call
|
754
|
+
;; CONSTANT-AUX again recursively
|
755
|
+
(prog1
|
756
|
+
(funcall repeat-matcher start-pos)
|
757
|
+
(decf (aref *repeat-counters* rep-num))
|
758
|
+
(setf (svref *last-pos-stores* zero-length-num)
|
759
|
+
old-last-pos)))
|
760
|
+
(t
|
761
|
+
;; we're done - call NEXT-FN
|
762
|
+
(funcall next-fn start-pos))))))
|
763
|
+
;; create a closure to match the inner regex and to
|
764
|
+
;; implement backtracking via CONSTANT-AUX
|
765
|
+
(setq repeat-matcher
|
766
|
+
(create-matcher-aux (regex repetition) #'constant-aux))
|
767
|
+
;; the closure we return is just a thin wrapper around
|
768
|
+
;; CONSTANT-AUX to initialize the repetition counter
|
769
|
+
(lambda (start-pos)
|
770
|
+
(declare (fixnum start-pos))
|
771
|
+
(setf (aref *repeat-counters* rep-num) 0
|
772
|
+
(aref *last-pos-stores* zero-length-num) nil)
|
773
|
+
(constant-aux start-pos))))
|
774
|
+
;; easier code because we don't have to care about zero-length
|
775
|
+
;; matches but basically the same
|
776
|
+
(flet ((constant-aux (start-pos)
|
777
|
+
(declare (fixnum start-pos)
|
778
|
+
(function repeat-matcher))
|
779
|
+
(cond ((< (aref *repeat-counters* rep-num) repetitions)
|
780
|
+
(incf (aref *repeat-counters* rep-num))
|
781
|
+
(prog1
|
782
|
+
(funcall repeat-matcher start-pos)
|
783
|
+
(decf (aref *repeat-counters* rep-num))))
|
784
|
+
(t (funcall next-fn start-pos)))))
|
785
|
+
(setq repeat-matcher
|
786
|
+
(create-matcher-aux (regex repetition) #'constant-aux))
|
787
|
+
(lambda (start-pos)
|
788
|
+
(declare (fixnum start-pos))
|
789
|
+
(setf (aref *repeat-counters* rep-num) 0)
|
790
|
+
(constant-aux start-pos))))))
|
791
|
+
|
792
|
+
;; the actual CREATE-MATCHER-AUX method for REPETITION objects which
|
793
|
+
;; utilizes all the functions and macros defined above
|
794
|
+
|
795
|
+
(defmethod create-matcher-aux ((repetition repetition) next-fn)
|
796
|
+
(declare #.*standard-optimize-settings*)
|
797
|
+
(with-slots (minimum maximum len min-len greedyp contains-register-p)
|
798
|
+
repetition
|
799
|
+
(cond ((and maximum
|
800
|
+
(zerop maximum))
|
801
|
+
;; this should have been optimized away by CONVERT but just
|
802
|
+
;; in case...
|
803
|
+
(error "Got REPETITION with MAXIMUM 0 \(should not happen)"))
|
804
|
+
((and maximum
|
805
|
+
(= minimum maximum 1))
|
806
|
+
;; this should have been optimized away by CONVERT but just
|
807
|
+
;; in case...
|
808
|
+
(error "Got REPETITION with MAXIMUM 1 and MINIMUM 1 \(should not happen)"))
|
809
|
+
((and (eql minimum maximum)
|
810
|
+
len
|
811
|
+
(not contains-register-p))
|
812
|
+
(create-constant-repetition-constant-length-matcher repetition next-fn))
|
813
|
+
((eql minimum maximum)
|
814
|
+
(create-constant-repetition-matcher repetition next-fn))
|
815
|
+
((and greedyp
|
816
|
+
len
|
817
|
+
(not contains-register-p))
|
818
|
+
(create-greedy-constant-length-matcher repetition next-fn))
|
819
|
+
((and greedyp
|
820
|
+
(or (plusp min-len)
|
821
|
+
(eql maximum 1)))
|
822
|
+
(create-greedy-no-zero-matcher repetition next-fn))
|
823
|
+
(greedyp
|
824
|
+
(create-greedy-matcher repetition next-fn))
|
825
|
+
((and len
|
826
|
+
(plusp len)
|
827
|
+
(not contains-register-p))
|
828
|
+
(create-non-greedy-constant-length-matcher repetition next-fn))
|
829
|
+
((or (plusp min-len)
|
830
|
+
(eql maximum 1))
|
831
|
+
(create-non-greedy-no-zero-matcher repetition next-fn))
|
832
|
+
(t
|
833
|
+
(create-non-greedy-matcher repetition next-fn)))))
|