clucumber 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +1 -1
- data/README.md +4 -9
- data/lib/clucumber/clucumber-bootstrap.lisp +32 -0
- data/lib/clucumber/vendor/cl-interpol/alias.lisp +55 -0
- data/lib/clucumber/vendor/cl-interpol/cl-interpol.asd +56 -0
- data/lib/clucumber/vendor/cl-interpol/load.lisp +53 -0
- data/lib/clucumber/vendor/cl-interpol/packages.lisp +40 -0
- data/lib/clucumber/vendor/cl-interpol/read.lisp +716 -0
- data/lib/clucumber/vendor/cl-interpol/specials.lisp +113 -0
- data/lib/clucumber/vendor/cl-interpol/test/packages.lisp +34 -0
- data/lib/clucumber/vendor/cl-interpol/test/tests.lisp +128 -0
- data/lib/clucumber/vendor/cl-interpol/test.lisp +177 -0
- data/lib/clucumber/vendor/cl-interpol/test2.lisp +6254 -0
- data/lib/clucumber/vendor/cl-interpol/unicode.lisp +13912 -0
- data/lib/clucumber/vendor/cl-interpol/util.lisp +122 -0
- data/lib/clucumber/vendor/cl-ppcre/api.lisp +1262 -0
- data/lib/clucumber/vendor/cl-ppcre/charmap.lisp +152 -0
- data/lib/clucumber/vendor/cl-ppcre/charset.lisp +242 -0
- data/lib/clucumber/vendor/cl-ppcre/chartest.lisp +98 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-test.asd +34 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/packages.lisp +38 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/resolver.lisp +61 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode.asd +58 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre.asd +79 -0
- data/lib/clucumber/vendor/cl-ppcre/closures.lisp +471 -0
- data/lib/clucumber/vendor/cl-ppcre/convert.lisp +875 -0
- data/lib/clucumber/vendor/cl-ppcre/errors.lisp +84 -0
- data/lib/clucumber/vendor/cl-ppcre/lexer.lisp +737 -0
- data/lib/clucumber/vendor/cl-ppcre/lispworks-defsystem.lisp +57 -0
- data/lib/clucumber/vendor/cl-ppcre/load.lisp +67 -0
- data/lib/clucumber/vendor/cl-ppcre/optimize.lisp +578 -0
- data/lib/clucumber/vendor/cl-ppcre/packages.lisp +68 -0
- data/lib/clucumber/vendor/cl-ppcre/parser.lisp +319 -0
- data/lib/clucumber/vendor/cl-ppcre/ppcre-tests.lisp +269 -0
- data/lib/clucumber/vendor/cl-ppcre/regex-class-util.lisp +555 -0
- data/lib/clucumber/vendor/cl-ppcre/regex-class.lisp +259 -0
- data/lib/clucumber/vendor/cl-ppcre/repetition-closures.lisp +833 -0
- data/lib/clucumber/vendor/cl-ppcre/scanner.lisp +506 -0
- data/lib/clucumber/vendor/cl-ppcre/specials.lisp +172 -0
- data/lib/clucumber/vendor/cl-ppcre/test/packages.lisp +37 -0
- data/lib/clucumber/vendor/cl-ppcre/test/perl-tests.lisp +150 -0
- data/lib/clucumber/vendor/cl-ppcre/test/tests.lisp +159 -0
- data/lib/clucumber/vendor/cl-ppcre/test/unicode-tests.lisp +80 -0
- data/lib/clucumber/vendor/cl-ppcre/util.lisp +201 -0
- data/lib/clucumber/vendor/cl-unicode/alias.lisp +135 -0
- data/lib/clucumber/vendor/cl-unicode/api.lisp +412 -0
- data/lib/clucumber/vendor/cl-unicode/build/char-info.lisp +133 -0
- data/lib/clucumber/vendor/cl-unicode/build/dump.lisp +239 -0
- data/lib/clucumber/vendor/cl-unicode/build/read.lisp +280 -0
- data/lib/clucumber/vendor/cl-unicode/build/util.lisp +182 -0
- data/lib/clucumber/vendor/cl-unicode/cl-unicode.asd +90 -0
- data/lib/clucumber/vendor/cl-unicode/conditions.lisp +54 -0
- data/lib/clucumber/vendor/cl-unicode/derived.lisp +120 -0
- data/lib/clucumber/vendor/cl-unicode/hash-tables.lisp +20 -0
- data/lib/clucumber/vendor/cl-unicode/lists.lisp +10 -0
- data/lib/clucumber/vendor/cl-unicode/methods.lisp +15 -0
- data/lib/clucumber/vendor/cl-unicode/packages.lisp +76 -0
- data/lib/clucumber/vendor/cl-unicode/specials.lisp +135 -0
- data/lib/clucumber/vendor/cl-unicode/test/packages.lisp +34 -0
- data/lib/clucumber/vendor/cl-unicode/test/tests.lisp +150 -0
- data/lib/clucumber/vendor/cl-unicode/test-functions.lisp +94 -0
- data/lib/clucumber/vendor/cl-unicode/util.lisp +274 -0
- data/lib/clucumber/vendor/lift/compare/fiveam-tests.lisp +14 -0
- data/lib/clucumber/vendor/lift/compare/lift-tests.lisp +13 -0
- data/lib/clucumber/vendor/lift/compare/rt-tests.lisp +16 -0
- data/lib/clucumber/vendor/lift/compare/xlunit-tests.lisp +15 -0
- data/lib/clucumber/vendor/lift/dev/changes.lisp +61 -0
- data/lib/clucumber/vendor/lift/dev/config.lisp +354 -0
- data/lib/clucumber/vendor/lift/dev/copy-file.lisp +117 -0
- data/lib/clucumber/vendor/lift/dev/introspection.lisp +232 -0
- data/lib/clucumber/vendor/lift/dev/lift-interface.lisp +56 -0
- data/lib/clucumber/vendor/lift/dev/lift-notes.lisp +202 -0
- data/lib/clucumber/vendor/lift/dev/lift-randomized.lisp +45 -0
- data/lib/clucumber/vendor/lift/dev/lift.lisp +2383 -0
- data/lib/clucumber/vendor/lift/dev/macros.lisp +229 -0
- data/lib/clucumber/vendor/lift/dev/measuring.lisp +156 -0
- data/lib/clucumber/vendor/lift/dev/packages.lisp +161 -0
- data/lib/clucumber/vendor/lift/dev/port.lisp +151 -0
- data/lib/clucumber/vendor/lift/dev/prototypes.lisp +282 -0
- data/lib/clucumber/vendor/lift/dev/random-testing.lisp +124 -0
- data/lib/clucumber/vendor/lift/dev/report-locations.lisp +13 -0
- data/lib/clucumber/vendor/lift/dev/reports.lisp +916 -0
- data/lib/clucumber/vendor/lift/dev/utilities.lisp +242 -0
- data/lib/clucumber/vendor/lift/docs/package.lisp +6 -0
- data/lib/clucumber/vendor/lift/docs/setup.lisp +17 -0
- data/lib/clucumber/vendor/lift/examples/basic-examples.lisp +289 -0
- data/lib/clucumber/vendor/lift/examples/random-testing.lisp +32 -0
- data/lib/clucumber/vendor/lift/lift-documentation.asd +28 -0
- data/lib/clucumber/vendor/lift/lift-test.asd +35 -0
- data/lib/clucumber/vendor/lift/lift.asd +77 -0
- data/lib/clucumber/vendor/lift/test/equality-tests.lisp +40 -0
- data/lib/clucumber/vendor/lift/test/finding-tests.lisp +32 -0
- data/lib/clucumber/vendor/lift/test/lift-test.lisp +783 -0
- data/lib/clucumber/vendor/lift/test/order-of-operations.lisp +54 -0
- data/lib/clucumber/vendor/lift/test/packages.lisp +17 -0
- data/lib/clucumber/vendor/lift/test/test-config-files.lisp +20 -0
- data/lib/clucumber/vendor/lift/test/test-dynamic-variables.lisp +57 -0
- data/lib/clucumber/vendor/lift/test/test-maximum-problems.lisp +74 -0
- data/lib/clucumber/vendor/lift/test/test-prototypes.lisp +278 -0
- data/lib/clucumber/vendor/lift/test/test-timeout.lisp +37 -0
- data/lib/clucumber/vendor/lift/test/tests-in-progress.lisp +62 -0
- data/lib/clucumber/vendor/lift/test/testsuite-expects.lisp +60 -0
- data/lib/clucumber/vendor/lift/timeout/package.lisp +13 -0
- data/lib/clucumber/vendor/lift/timeout/with-timeout.lisp +123 -0
- data/lib/clucumber/vendor/lift/website/stuff/Temp.lisp +34 -0
- data/lib/clucumber/vendor/st-json/st-json.asd +3 -0
- data/lib/clucumber/vendor/st-json/st-json.lisp +310 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/backtrace.lisp +127 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/fallback.lisp +10 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/map-backtrace.lisp +103 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/mucking.lisp +75 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/packages.lisp +13 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/utilities.lisp +104 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/packages.lisp +5 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/test-setup.lisp +4 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/tests.lisp +16 -0
- data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace-test.asd +22 -0
- data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace.asd +35 -0
- data/lib/clucumber/vendor/usocket/backend/allegro.lisp +199 -0
- data/lib/clucumber/vendor/usocket/backend/armedbear.lisp +491 -0
- data/lib/clucumber/vendor/usocket/backend/clisp.lisp +260 -0
- data/lib/clucumber/vendor/usocket/backend/cmucl.lisp +266 -0
- data/lib/clucumber/vendor/usocket/backend/lispworks.lisp +741 -0
- data/lib/clucumber/vendor/usocket/backend/mcl.lisp +369 -0
- data/lib/clucumber/vendor/usocket/backend/openmcl.lisp +206 -0
- data/lib/clucumber/vendor/usocket/backend/sbcl.lisp +424 -0
- data/lib/clucumber/vendor/usocket/backend/scl.lisp +261 -0
- data/lib/clucumber/vendor/usocket/condition.lisp +227 -0
- data/lib/clucumber/vendor/usocket/package.lisp +82 -0
- data/lib/clucumber/vendor/usocket/server.lisp +45 -0
- data/lib/clucumber/vendor/usocket/test/package.lisp +13 -0
- data/lib/clucumber/vendor/usocket/test/test-usocket.lisp +166 -0
- data/lib/clucumber/vendor/usocket/usocket-test.asd +26 -0
- data/lib/clucumber/vendor/usocket/usocket.asd +37 -0
- data/lib/clucumber/vendor/usocket/usocket.lisp +542 -0
- data/lib/clucumber/vendor/usocket/vendor/kqueue.lisp +1 -0
- data/lib/clucumber/vendor/usocket/vendor/split-sequence.lisp +245 -0
- data/lib/clucumber.rb +29 -7
- metadata +151 -5
@@ -0,0 +1,555 @@
|
|
1
|
+
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
2
|
+
;;; $Header: /usr/local/cvsrep/cl-ppcre/regex-class-util.lisp,v 1.9 2009/09/17 19:17:31 edi Exp $
|
3
|
+
|
4
|
+
;;; This file contains some utility methods for REGEX objects.
|
5
|
+
|
6
|
+
;;; Copyright (c) 2002-2009, Dr. Edmund Weitz. All rights reserved.
|
7
|
+
|
8
|
+
;;; Redistribution and use in source and binary forms, with or without
|
9
|
+
;;; modification, are permitted provided that the following conditions
|
10
|
+
;;; are met:
|
11
|
+
|
12
|
+
;;; * Redistributions of source code must retain the above copyright
|
13
|
+
;;; notice, this list of conditions and the following disclaimer.
|
14
|
+
|
15
|
+
;;; * Redistributions in binary form must reproduce the above
|
16
|
+
;;; copyright notice, this list of conditions and the following
|
17
|
+
;;; disclaimer in the documentation and/or other materials
|
18
|
+
;;; provided with the distribution.
|
19
|
+
|
20
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
21
|
+
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
22
|
+
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
23
|
+
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
24
|
+
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
25
|
+
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
26
|
+
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
27
|
+
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
28
|
+
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
29
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
30
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
|
32
|
+
(in-package :cl-ppcre)
|
33
|
+
|
34
|
+
;;; The following four methods allow a VOID object to behave like a
|
35
|
+
;;; zero-length STR object (only readers needed)
|
36
|
+
|
37
|
+
(defmethod len ((void void))
|
38
|
+
(declare #.*standard-optimize-settings*)
|
39
|
+
0)
|
40
|
+
|
41
|
+
(defmethod str ((void void))
|
42
|
+
(declare #.*standard-optimize-settings*)
|
43
|
+
"")
|
44
|
+
|
45
|
+
(defmethod skip ((void void))
|
46
|
+
(declare #.*standard-optimize-settings*)
|
47
|
+
nil)
|
48
|
+
|
49
|
+
(defmethod start-of-end-string-p ((void void))
|
50
|
+
(declare #.*standard-optimize-settings*)
|
51
|
+
nil)
|
52
|
+
|
53
|
+
(defgeneric case-mode (regex old-case-mode)
|
54
|
+
(declare #.*standard-optimize-settings*)
|
55
|
+
(:documentation "Utility function used by the optimizer (see GATHER-STRINGS).
|
56
|
+
Returns a keyword denoting the case-(in)sensitivity of a STR or its
|
57
|
+
second argument if the STR has length 0. Returns NIL for REGEX objects
|
58
|
+
which are not of type STR."))
|
59
|
+
|
60
|
+
(defmethod case-mode ((str str) old-case-mode)
|
61
|
+
(declare #.*standard-optimize-settings*)
|
62
|
+
(cond ((zerop (len str))
|
63
|
+
old-case-mode)
|
64
|
+
((case-insensitive-p str)
|
65
|
+
:case-insensitive)
|
66
|
+
(t
|
67
|
+
:case-sensitive)))
|
68
|
+
|
69
|
+
(defmethod case-mode ((regex regex) old-case-mode)
|
70
|
+
(declare #.*standard-optimize-settings*)
|
71
|
+
(declare (ignore old-case-mode))
|
72
|
+
nil)
|
73
|
+
|
74
|
+
(defgeneric copy-regex (regex)
|
75
|
+
(declare #.*standard-optimize-settings*)
|
76
|
+
(:documentation "Implements a deep copy of a REGEX object."))
|
77
|
+
|
78
|
+
(defmethod copy-regex ((anchor anchor))
|
79
|
+
(declare #.*standard-optimize-settings*)
|
80
|
+
(make-instance 'anchor
|
81
|
+
:startp (startp anchor)
|
82
|
+
:multi-line-p (multi-line-p anchor)
|
83
|
+
:no-newline-p (no-newline-p anchor)))
|
84
|
+
|
85
|
+
(defmethod copy-regex ((everything everything))
|
86
|
+
(declare #.*standard-optimize-settings*)
|
87
|
+
(make-instance 'everything
|
88
|
+
:single-line-p (single-line-p everything)))
|
89
|
+
|
90
|
+
(defmethod copy-regex ((word-boundary word-boundary))
|
91
|
+
(declare #.*standard-optimize-settings*)
|
92
|
+
(make-instance 'word-boundary
|
93
|
+
:negatedp (negatedp word-boundary)))
|
94
|
+
|
95
|
+
(defmethod copy-regex ((void void))
|
96
|
+
(declare #.*standard-optimize-settings*)
|
97
|
+
(make-instance 'void))
|
98
|
+
|
99
|
+
(defmethod copy-regex ((lookahead lookahead))
|
100
|
+
(declare #.*standard-optimize-settings*)
|
101
|
+
(make-instance 'lookahead
|
102
|
+
:regex (copy-regex (regex lookahead))
|
103
|
+
:positivep (positivep lookahead)))
|
104
|
+
|
105
|
+
(defmethod copy-regex ((seq seq))
|
106
|
+
(declare #.*standard-optimize-settings*)
|
107
|
+
(make-instance 'seq
|
108
|
+
:elements (mapcar #'copy-regex (elements seq))))
|
109
|
+
|
110
|
+
(defmethod copy-regex ((alternation alternation))
|
111
|
+
(declare #.*standard-optimize-settings*)
|
112
|
+
(make-instance 'alternation
|
113
|
+
:choices (mapcar #'copy-regex (choices alternation))))
|
114
|
+
|
115
|
+
(defmethod copy-regex ((branch branch))
|
116
|
+
(declare #.*standard-optimize-settings*)
|
117
|
+
(with-slots (test)
|
118
|
+
branch
|
119
|
+
(make-instance 'branch
|
120
|
+
:test (if (typep test 'regex)
|
121
|
+
(copy-regex test)
|
122
|
+
test)
|
123
|
+
:then-regex (copy-regex (then-regex branch))
|
124
|
+
:else-regex (copy-regex (else-regex branch)))))
|
125
|
+
|
126
|
+
(defmethod copy-regex ((lookbehind lookbehind))
|
127
|
+
(declare #.*standard-optimize-settings*)
|
128
|
+
(make-instance 'lookbehind
|
129
|
+
:regex (copy-regex (regex lookbehind))
|
130
|
+
:positivep (positivep lookbehind)
|
131
|
+
:len (len lookbehind)))
|
132
|
+
|
133
|
+
(defmethod copy-regex ((repetition repetition))
|
134
|
+
(declare #.*standard-optimize-settings*)
|
135
|
+
(make-instance 'repetition
|
136
|
+
:regex (copy-regex (regex repetition))
|
137
|
+
:greedyp (greedyp repetition)
|
138
|
+
:minimum (minimum repetition)
|
139
|
+
:maximum (maximum repetition)
|
140
|
+
:min-len (min-len repetition)
|
141
|
+
:len (len repetition)
|
142
|
+
:contains-register-p (contains-register-p repetition)))
|
143
|
+
|
144
|
+
(defmethod copy-regex ((register register))
|
145
|
+
(declare #.*standard-optimize-settings*)
|
146
|
+
(make-instance 'register
|
147
|
+
:regex (copy-regex (regex register))
|
148
|
+
:num (num register)
|
149
|
+
:name (name register)))
|
150
|
+
|
151
|
+
(defmethod copy-regex ((standalone standalone))
|
152
|
+
(declare #.*standard-optimize-settings*)
|
153
|
+
(make-instance 'standalone
|
154
|
+
:regex (copy-regex (regex standalone))))
|
155
|
+
|
156
|
+
(defmethod copy-regex ((back-reference back-reference))
|
157
|
+
(declare #.*standard-optimize-settings*)
|
158
|
+
(make-instance 'back-reference
|
159
|
+
:num (num back-reference)
|
160
|
+
:case-insensitive-p (case-insensitive-p back-reference)))
|
161
|
+
|
162
|
+
(defmethod copy-regex ((char-class char-class))
|
163
|
+
(declare #.*standard-optimize-settings*)
|
164
|
+
(make-instance 'char-class
|
165
|
+
:test-function (test-function char-class)))
|
166
|
+
|
167
|
+
(defmethod copy-regex ((str str))
|
168
|
+
(declare #.*standard-optimize-settings*)
|
169
|
+
(make-instance 'str
|
170
|
+
:str (str str)
|
171
|
+
:case-insensitive-p (case-insensitive-p str)))
|
172
|
+
|
173
|
+
(defmethod copy-regex ((filter filter))
|
174
|
+
(declare #.*standard-optimize-settings*)
|
175
|
+
(make-instance 'filter
|
176
|
+
:fn (fn filter)
|
177
|
+
:len (len filter)))
|
178
|
+
|
179
|
+
;;; Note that COPY-REGEX and REMOVE-REGISTERS could have easily been
|
180
|
+
;;; wrapped into one function. Maybe in the next release...
|
181
|
+
|
182
|
+
;;; Further note that this function is used by CONVERT to factor out
|
183
|
+
;;; complicated repetitions, i.e. cases like
|
184
|
+
;;; (a)* -> (?:a*(a))?
|
185
|
+
;;; This won't work for, say,
|
186
|
+
;;; ((a)|(b))* -> (?:(?:a|b)*((a)|(b)))?
|
187
|
+
;;; and therefore we stop REGISTER removal once we see an ALTERNATION.
|
188
|
+
|
189
|
+
(defgeneric remove-registers (regex)
|
190
|
+
(declare #.*standard-optimize-settings*)
|
191
|
+
(:documentation "Returns a deep copy of a REGEX (see COPY-REGEX) and
|
192
|
+
optionally removes embedded REGISTER objects if possible and if the
|
193
|
+
special variable REMOVE-REGISTERS-P is true."))
|
194
|
+
|
195
|
+
(defmethod remove-registers ((register register))
|
196
|
+
(declare #.*standard-optimize-settings*)
|
197
|
+
(declare (special remove-registers-p reg-seen))
|
198
|
+
(cond (remove-registers-p
|
199
|
+
(remove-registers (regex register)))
|
200
|
+
(t
|
201
|
+
;; mark REG-SEEN as true so enclosing REPETITION objects
|
202
|
+
;; (see method below) know if they contain a register or not
|
203
|
+
(setq reg-seen t)
|
204
|
+
(copy-regex register))))
|
205
|
+
|
206
|
+
(defmethod remove-registers ((repetition repetition))
|
207
|
+
(declare #.*standard-optimize-settings*)
|
208
|
+
(let* (reg-seen
|
209
|
+
(inner-regex (remove-registers (regex repetition))))
|
210
|
+
;; REMOVE-REGISTERS will set REG-SEEN (see method above) if
|
211
|
+
;; (REGEX REPETITION) contains a REGISTER
|
212
|
+
(declare (special reg-seen))
|
213
|
+
(make-instance 'repetition
|
214
|
+
:regex inner-regex
|
215
|
+
:greedyp (greedyp repetition)
|
216
|
+
:minimum (minimum repetition)
|
217
|
+
:maximum (maximum repetition)
|
218
|
+
:min-len (min-len repetition)
|
219
|
+
:len (len repetition)
|
220
|
+
:contains-register-p reg-seen)))
|
221
|
+
|
222
|
+
(defmethod remove-registers ((standalone standalone))
|
223
|
+
(declare #.*standard-optimize-settings*)
|
224
|
+
(make-instance 'standalone
|
225
|
+
:regex (remove-registers (regex standalone))))
|
226
|
+
|
227
|
+
(defmethod remove-registers ((lookahead lookahead))
|
228
|
+
(declare #.*standard-optimize-settings*)
|
229
|
+
(make-instance 'lookahead
|
230
|
+
:regex (remove-registers (regex lookahead))
|
231
|
+
:positivep (positivep lookahead)))
|
232
|
+
|
233
|
+
(defmethod remove-registers ((lookbehind lookbehind))
|
234
|
+
(declare #.*standard-optimize-settings*)
|
235
|
+
(make-instance 'lookbehind
|
236
|
+
:regex (remove-registers (regex lookbehind))
|
237
|
+
:positivep (positivep lookbehind)
|
238
|
+
:len (len lookbehind)))
|
239
|
+
|
240
|
+
(defmethod remove-registers ((branch branch))
|
241
|
+
(declare #.*standard-optimize-settings*)
|
242
|
+
(with-slots (test)
|
243
|
+
branch
|
244
|
+
(make-instance 'branch
|
245
|
+
:test (if (typep test 'regex)
|
246
|
+
(remove-registers test)
|
247
|
+
test)
|
248
|
+
:then-regex (remove-registers (then-regex branch))
|
249
|
+
:else-regex (remove-registers (else-regex branch)))))
|
250
|
+
|
251
|
+
(defmethod remove-registers ((alternation alternation))
|
252
|
+
(declare #.*standard-optimize-settings*)
|
253
|
+
(declare (special remove-registers-p))
|
254
|
+
;; an ALTERNATION, so we can't remove REGISTER objects further down
|
255
|
+
(setq remove-registers-p nil)
|
256
|
+
(copy-regex alternation))
|
257
|
+
|
258
|
+
(defmethod remove-registers ((regex regex))
|
259
|
+
(declare #.*standard-optimize-settings*)
|
260
|
+
(copy-regex regex))
|
261
|
+
|
262
|
+
(defmethod remove-registers ((seq seq))
|
263
|
+
(declare #.*standard-optimize-settings*)
|
264
|
+
(make-instance 'seq
|
265
|
+
:elements (mapcar #'remove-registers (elements seq))))
|
266
|
+
|
267
|
+
(defgeneric everythingp (regex)
|
268
|
+
(declare #.*standard-optimize-settings*)
|
269
|
+
(:documentation "Returns an EVERYTHING object if REGEX is equivalent
|
270
|
+
to this object, otherwise NIL. So, \"(.){1}\" would return true
|
271
|
+
\(i.e. the object corresponding to \".\", for example."))
|
272
|
+
|
273
|
+
(defmethod everythingp ((seq seq))
|
274
|
+
(declare #.*standard-optimize-settings*)
|
275
|
+
;; we might have degenerate cases like (:SEQUENCE :VOID ...)
|
276
|
+
;; due to the parsing process
|
277
|
+
(let ((cleaned-elements (remove-if #'(lambda (element)
|
278
|
+
(typep element 'void))
|
279
|
+
(elements seq))))
|
280
|
+
(and (= 1 (length cleaned-elements))
|
281
|
+
(everythingp (first cleaned-elements)))))
|
282
|
+
|
283
|
+
(defmethod everythingp ((alternation alternation))
|
284
|
+
(declare #.*standard-optimize-settings*)
|
285
|
+
(with-slots (choices)
|
286
|
+
alternation
|
287
|
+
(and (= 1 (length choices))
|
288
|
+
;; this is unlikely to happen for human-generated regexes,
|
289
|
+
;; but machine-generated ones might look like this
|
290
|
+
(everythingp (first choices)))))
|
291
|
+
|
292
|
+
(defmethod everythingp ((repetition repetition))
|
293
|
+
(declare #.*standard-optimize-settings*)
|
294
|
+
(with-slots (maximum minimum regex)
|
295
|
+
repetition
|
296
|
+
(and maximum
|
297
|
+
(= 1 minimum maximum)
|
298
|
+
;; treat "<regex>{1,1}" like "<regex>"
|
299
|
+
(everythingp regex))))
|
300
|
+
|
301
|
+
(defmethod everythingp ((register register))
|
302
|
+
(declare #.*standard-optimize-settings*)
|
303
|
+
(everythingp (regex register)))
|
304
|
+
|
305
|
+
(defmethod everythingp ((standalone standalone))
|
306
|
+
(declare #.*standard-optimize-settings*)
|
307
|
+
(everythingp (regex standalone)))
|
308
|
+
|
309
|
+
(defmethod everythingp ((everything everything))
|
310
|
+
(declare #.*standard-optimize-settings*)
|
311
|
+
everything)
|
312
|
+
|
313
|
+
(defmethod everythingp ((regex regex))
|
314
|
+
(declare #.*standard-optimize-settings*)
|
315
|
+
;; the general case for ANCHOR, BACK-REFERENCE, BRANCH, CHAR-CLASS,
|
316
|
+
;; LOOKAHEAD, LOOKBEHIND, STR, VOID, FILTER, and WORD-BOUNDARY
|
317
|
+
nil)
|
318
|
+
|
319
|
+
(defgeneric regex-length (regex)
|
320
|
+
(declare #.*standard-optimize-settings*)
|
321
|
+
(:documentation "Return the length of REGEX if it is fixed, NIL otherwise."))
|
322
|
+
|
323
|
+
(defmethod regex-length ((seq seq))
|
324
|
+
(declare #.*standard-optimize-settings*)
|
325
|
+
;; simply add all inner lengths unless one of them is NIL
|
326
|
+
(loop for sub-regex in (elements seq)
|
327
|
+
for len = (regex-length sub-regex)
|
328
|
+
if (not len) do (return nil)
|
329
|
+
sum len))
|
330
|
+
|
331
|
+
(defmethod regex-length ((alternation alternation))
|
332
|
+
(declare #.*standard-optimize-settings*)
|
333
|
+
;; only return a true value if all inner lengths are non-NIL and
|
334
|
+
;; mutually equal
|
335
|
+
(loop for sub-regex in (choices alternation)
|
336
|
+
for old-len = nil then len
|
337
|
+
for len = (regex-length sub-regex)
|
338
|
+
if (or (not len)
|
339
|
+
(and old-len (/= len old-len))) do (return nil)
|
340
|
+
finally (return len)))
|
341
|
+
|
342
|
+
(defmethod regex-length ((branch branch))
|
343
|
+
(declare #.*standard-optimize-settings*)
|
344
|
+
;; only return a true value if both alternations have a length and
|
345
|
+
;; if they're equal
|
346
|
+
(let ((then-length (regex-length (then-regex branch))))
|
347
|
+
(and then-length
|
348
|
+
(eql then-length (regex-length (else-regex branch)))
|
349
|
+
then-length)))
|
350
|
+
|
351
|
+
(defmethod regex-length ((repetition repetition))
|
352
|
+
(declare #.*standard-optimize-settings*)
|
353
|
+
;; we can only compute the length of a REPETITION object if the
|
354
|
+
;; number of repetitions is fixed; note that we don't call
|
355
|
+
;; REGEX-LENGTH for the inner regex, we assume that the LEN slot is
|
356
|
+
;; always set correctly
|
357
|
+
(with-slots (len minimum maximum)
|
358
|
+
repetition
|
359
|
+
(if (and len
|
360
|
+
(eql minimum maximum))
|
361
|
+
(* minimum len)
|
362
|
+
nil)))
|
363
|
+
|
364
|
+
(defmethod regex-length ((register register))
|
365
|
+
(declare #.*standard-optimize-settings*)
|
366
|
+
(regex-length (regex register)))
|
367
|
+
|
368
|
+
(defmethod regex-length ((standalone standalone))
|
369
|
+
(declare #.*standard-optimize-settings*)
|
370
|
+
(regex-length (regex standalone)))
|
371
|
+
|
372
|
+
(defmethod regex-length ((back-reference back-reference))
|
373
|
+
(declare #.*standard-optimize-settings*)
|
374
|
+
;; with enough effort we could possibly do better here, but
|
375
|
+
;; currently we just give up and return NIL
|
376
|
+
nil)
|
377
|
+
|
378
|
+
(defmethod regex-length ((char-class char-class))
|
379
|
+
(declare #.*standard-optimize-settings*)
|
380
|
+
1)
|
381
|
+
|
382
|
+
(defmethod regex-length ((everything everything))
|
383
|
+
(declare #.*standard-optimize-settings*)
|
384
|
+
1)
|
385
|
+
|
386
|
+
(defmethod regex-length ((str str))
|
387
|
+
(declare #.*standard-optimize-settings*)
|
388
|
+
(len str))
|
389
|
+
|
390
|
+
(defmethod regex-length ((filter filter))
|
391
|
+
(declare #.*standard-optimize-settings*)
|
392
|
+
(len filter))
|
393
|
+
|
394
|
+
(defmethod regex-length ((regex regex))
|
395
|
+
(declare #.*standard-optimize-settings*)
|
396
|
+
;; the general case for ANCHOR, LOOKAHEAD, LOOKBEHIND, VOID, and
|
397
|
+
;; WORD-BOUNDARY (which all have zero-length)
|
398
|
+
0)
|
399
|
+
|
400
|
+
(defgeneric regex-min-length (regex)
|
401
|
+
(declare #.*standard-optimize-settings*)
|
402
|
+
(:documentation "Returns the minimal length of REGEX."))
|
403
|
+
|
404
|
+
(defmethod regex-min-length ((seq seq))
|
405
|
+
(declare #.*standard-optimize-settings*)
|
406
|
+
;; simply add all inner minimal lengths
|
407
|
+
(loop for sub-regex in (elements seq)
|
408
|
+
for len = (regex-min-length sub-regex)
|
409
|
+
sum len))
|
410
|
+
|
411
|
+
(defmethod regex-min-length ((alternation alternation))
|
412
|
+
(declare #.*standard-optimize-settings*)
|
413
|
+
;; minimal length of an alternation is the minimal length of the
|
414
|
+
;; "shortest" element
|
415
|
+
(loop for sub-regex in (choices alternation)
|
416
|
+
for len = (regex-min-length sub-regex)
|
417
|
+
minimize len))
|
418
|
+
|
419
|
+
(defmethod regex-min-length ((branch branch))
|
420
|
+
(declare #.*standard-optimize-settings*)
|
421
|
+
;; minimal length of both alternations
|
422
|
+
(min (regex-min-length (then-regex branch))
|
423
|
+
(regex-min-length (else-regex branch))))
|
424
|
+
|
425
|
+
(defmethod regex-min-length ((repetition repetition))
|
426
|
+
(declare #.*standard-optimize-settings*)
|
427
|
+
;; obviously the product of the inner minimal length and the minimal
|
428
|
+
;; number of repetitions
|
429
|
+
(* (minimum repetition) (min-len repetition)))
|
430
|
+
|
431
|
+
(defmethod regex-min-length ((register register))
|
432
|
+
(declare #.*standard-optimize-settings*)
|
433
|
+
(regex-min-length (regex register)))
|
434
|
+
|
435
|
+
(defmethod regex-min-length ((standalone standalone))
|
436
|
+
(declare #.*standard-optimize-settings*)
|
437
|
+
(regex-min-length (regex standalone)))
|
438
|
+
|
439
|
+
(defmethod regex-min-length ((char-class char-class))
|
440
|
+
(declare #.*standard-optimize-settings*)
|
441
|
+
1)
|
442
|
+
|
443
|
+
(defmethod regex-min-length ((everything everything))
|
444
|
+
(declare #.*standard-optimize-settings*)
|
445
|
+
1)
|
446
|
+
|
447
|
+
(defmethod regex-min-length ((str str))
|
448
|
+
(declare #.*standard-optimize-settings*)
|
449
|
+
(len str))
|
450
|
+
|
451
|
+
(defmethod regex-min-length ((filter filter))
|
452
|
+
(declare #.*standard-optimize-settings*)
|
453
|
+
(or (len filter)
|
454
|
+
0))
|
455
|
+
|
456
|
+
(defmethod regex-min-length ((regex regex))
|
457
|
+
(declare #.*standard-optimize-settings*)
|
458
|
+
;; the general case for ANCHOR, BACK-REFERENCE, LOOKAHEAD,
|
459
|
+
;; LOOKBEHIND, VOID, and WORD-BOUNDARY
|
460
|
+
0)
|
461
|
+
|
462
|
+
(defgeneric compute-offsets (regex start-pos)
|
463
|
+
(declare #.*standard-optimize-settings*)
|
464
|
+
(:documentation "Returns the offset the following regex would have
|
465
|
+
relative to START-POS or NIL if we can't compute it. Sets the OFFSET
|
466
|
+
slot of REGEX to START-POS if REGEX is a STR. May also affect OFFSET
|
467
|
+
slots of STR objects further down the tree."))
|
468
|
+
|
469
|
+
;; note that we're actually only interested in the offset of
|
470
|
+
;; "top-level" STR objects (see ADVANCE-FN in the SCAN function) so we
|
471
|
+
;; can stop at variable-length alternations and don't need to descend
|
472
|
+
;; into repetitions
|
473
|
+
|
474
|
+
(defmethod compute-offsets ((seq seq) start-pos)
|
475
|
+
(declare #.*standard-optimize-settings*)
|
476
|
+
(loop for element in (elements seq)
|
477
|
+
;; advance offset argument for next call while looping through
|
478
|
+
;; the elements
|
479
|
+
for pos = start-pos then curr-offset
|
480
|
+
for curr-offset = (compute-offsets element pos)
|
481
|
+
while curr-offset
|
482
|
+
finally (return curr-offset)))
|
483
|
+
|
484
|
+
(defmethod compute-offsets ((alternation alternation) start-pos)
|
485
|
+
(declare #.*standard-optimize-settings*)
|
486
|
+
(loop for choice in (choices alternation)
|
487
|
+
for old-offset = nil then curr-offset
|
488
|
+
for curr-offset = (compute-offsets choice start-pos)
|
489
|
+
;; we stop immediately if two alternations don't result in the
|
490
|
+
;; same offset
|
491
|
+
if (or (not curr-offset)
|
492
|
+
(and old-offset (/= curr-offset old-offset)))
|
493
|
+
do (return nil)
|
494
|
+
finally (return curr-offset)))
|
495
|
+
|
496
|
+
(defmethod compute-offsets ((branch branch) start-pos)
|
497
|
+
(declare #.*standard-optimize-settings*)
|
498
|
+
;; only return offset if both alternations have equal value
|
499
|
+
(let ((then-offset (compute-offsets (then-regex branch) start-pos)))
|
500
|
+
(and then-offset
|
501
|
+
(eql then-offset (compute-offsets (else-regex branch) start-pos))
|
502
|
+
then-offset)))
|
503
|
+
|
504
|
+
(defmethod compute-offsets ((repetition repetition) start-pos)
|
505
|
+
(declare #.*standard-optimize-settings*)
|
506
|
+
;; no need to descend into the inner regex
|
507
|
+
(with-slots (len minimum maximum)
|
508
|
+
repetition
|
509
|
+
(if (and len
|
510
|
+
(eq minimum maximum))
|
511
|
+
;; fixed number of repetitions, so we know how to proceed
|
512
|
+
(+ start-pos (* minimum len))
|
513
|
+
;; otherwise return NIL
|
514
|
+
nil)))
|
515
|
+
|
516
|
+
(defmethod compute-offsets ((register register) start-pos)
|
517
|
+
(declare #.*standard-optimize-settings*)
|
518
|
+
(compute-offsets (regex register) start-pos))
|
519
|
+
|
520
|
+
(defmethod compute-offsets ((standalone standalone) start-pos)
|
521
|
+
(declare #.*standard-optimize-settings*)
|
522
|
+
(compute-offsets (regex standalone) start-pos))
|
523
|
+
|
524
|
+
(defmethod compute-offsets ((char-class char-class) start-pos)
|
525
|
+
(declare #.*standard-optimize-settings*)
|
526
|
+
(1+ start-pos))
|
527
|
+
|
528
|
+
(defmethod compute-offsets ((everything everything) start-pos)
|
529
|
+
(declare #.*standard-optimize-settings*)
|
530
|
+
(1+ start-pos))
|
531
|
+
|
532
|
+
(defmethod compute-offsets ((str str) start-pos)
|
533
|
+
(declare #.*standard-optimize-settings*)
|
534
|
+
(setf (offset str) start-pos)
|
535
|
+
(+ start-pos (len str)))
|
536
|
+
|
537
|
+
(defmethod compute-offsets ((back-reference back-reference) start-pos)
|
538
|
+
(declare #.*standard-optimize-settings*)
|
539
|
+
;; with enough effort we could possibly do better here, but
|
540
|
+
;; currently we just give up and return NIL
|
541
|
+
(declare (ignore start-pos))
|
542
|
+
nil)
|
543
|
+
|
544
|
+
(defmethod compute-offsets ((filter filter) start-pos)
|
545
|
+
(declare #.*standard-optimize-settings*)
|
546
|
+
(let ((len (len filter)))
|
547
|
+
(if len
|
548
|
+
(+ start-pos len)
|
549
|
+
nil)))
|
550
|
+
|
551
|
+
(defmethod compute-offsets ((regex regex) start-pos)
|
552
|
+
(declare #.*standard-optimize-settings*)
|
553
|
+
;; the general case for ANCHOR, LOOKAHEAD, LOOKBEHIND, VOID, and
|
554
|
+
;; WORD-BOUNDARY (which all have zero-length)
|
555
|
+
start-pos)
|