clucumber 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +1 -1
- data/README.md +4 -9
- data/lib/clucumber/clucumber-bootstrap.lisp +32 -0
- data/lib/clucumber/vendor/cl-interpol/alias.lisp +55 -0
- data/lib/clucumber/vendor/cl-interpol/cl-interpol.asd +56 -0
- data/lib/clucumber/vendor/cl-interpol/load.lisp +53 -0
- data/lib/clucumber/vendor/cl-interpol/packages.lisp +40 -0
- data/lib/clucumber/vendor/cl-interpol/read.lisp +716 -0
- data/lib/clucumber/vendor/cl-interpol/specials.lisp +113 -0
- data/lib/clucumber/vendor/cl-interpol/test/packages.lisp +34 -0
- data/lib/clucumber/vendor/cl-interpol/test/tests.lisp +128 -0
- data/lib/clucumber/vendor/cl-interpol/test.lisp +177 -0
- data/lib/clucumber/vendor/cl-interpol/test2.lisp +6254 -0
- data/lib/clucumber/vendor/cl-interpol/unicode.lisp +13912 -0
- data/lib/clucumber/vendor/cl-interpol/util.lisp +122 -0
- data/lib/clucumber/vendor/cl-ppcre/api.lisp +1262 -0
- data/lib/clucumber/vendor/cl-ppcre/charmap.lisp +152 -0
- data/lib/clucumber/vendor/cl-ppcre/charset.lisp +242 -0
- data/lib/clucumber/vendor/cl-ppcre/chartest.lisp +98 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-test.asd +34 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/packages.lisp +38 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/resolver.lisp +61 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode.asd +58 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre.asd +79 -0
- data/lib/clucumber/vendor/cl-ppcre/closures.lisp +471 -0
- data/lib/clucumber/vendor/cl-ppcre/convert.lisp +875 -0
- data/lib/clucumber/vendor/cl-ppcre/errors.lisp +84 -0
- data/lib/clucumber/vendor/cl-ppcre/lexer.lisp +737 -0
- data/lib/clucumber/vendor/cl-ppcre/lispworks-defsystem.lisp +57 -0
- data/lib/clucumber/vendor/cl-ppcre/load.lisp +67 -0
- data/lib/clucumber/vendor/cl-ppcre/optimize.lisp +578 -0
- data/lib/clucumber/vendor/cl-ppcre/packages.lisp +68 -0
- data/lib/clucumber/vendor/cl-ppcre/parser.lisp +319 -0
- data/lib/clucumber/vendor/cl-ppcre/ppcre-tests.lisp +269 -0
- data/lib/clucumber/vendor/cl-ppcre/regex-class-util.lisp +555 -0
- data/lib/clucumber/vendor/cl-ppcre/regex-class.lisp +259 -0
- data/lib/clucumber/vendor/cl-ppcre/repetition-closures.lisp +833 -0
- data/lib/clucumber/vendor/cl-ppcre/scanner.lisp +506 -0
- data/lib/clucumber/vendor/cl-ppcre/specials.lisp +172 -0
- data/lib/clucumber/vendor/cl-ppcre/test/packages.lisp +37 -0
- data/lib/clucumber/vendor/cl-ppcre/test/perl-tests.lisp +150 -0
- data/lib/clucumber/vendor/cl-ppcre/test/tests.lisp +159 -0
- data/lib/clucumber/vendor/cl-ppcre/test/unicode-tests.lisp +80 -0
- data/lib/clucumber/vendor/cl-ppcre/util.lisp +201 -0
- data/lib/clucumber/vendor/cl-unicode/alias.lisp +135 -0
- data/lib/clucumber/vendor/cl-unicode/api.lisp +412 -0
- data/lib/clucumber/vendor/cl-unicode/build/char-info.lisp +133 -0
- data/lib/clucumber/vendor/cl-unicode/build/dump.lisp +239 -0
- data/lib/clucumber/vendor/cl-unicode/build/read.lisp +280 -0
- data/lib/clucumber/vendor/cl-unicode/build/util.lisp +182 -0
- data/lib/clucumber/vendor/cl-unicode/cl-unicode.asd +90 -0
- data/lib/clucumber/vendor/cl-unicode/conditions.lisp +54 -0
- data/lib/clucumber/vendor/cl-unicode/derived.lisp +120 -0
- data/lib/clucumber/vendor/cl-unicode/hash-tables.lisp +20 -0
- data/lib/clucumber/vendor/cl-unicode/lists.lisp +10 -0
- data/lib/clucumber/vendor/cl-unicode/methods.lisp +15 -0
- data/lib/clucumber/vendor/cl-unicode/packages.lisp +76 -0
- data/lib/clucumber/vendor/cl-unicode/specials.lisp +135 -0
- data/lib/clucumber/vendor/cl-unicode/test/packages.lisp +34 -0
- data/lib/clucumber/vendor/cl-unicode/test/tests.lisp +150 -0
- data/lib/clucumber/vendor/cl-unicode/test-functions.lisp +94 -0
- data/lib/clucumber/vendor/cl-unicode/util.lisp +274 -0
- data/lib/clucumber/vendor/lift/compare/fiveam-tests.lisp +14 -0
- data/lib/clucumber/vendor/lift/compare/lift-tests.lisp +13 -0
- data/lib/clucumber/vendor/lift/compare/rt-tests.lisp +16 -0
- data/lib/clucumber/vendor/lift/compare/xlunit-tests.lisp +15 -0
- data/lib/clucumber/vendor/lift/dev/changes.lisp +61 -0
- data/lib/clucumber/vendor/lift/dev/config.lisp +354 -0
- data/lib/clucumber/vendor/lift/dev/copy-file.lisp +117 -0
- data/lib/clucumber/vendor/lift/dev/introspection.lisp +232 -0
- data/lib/clucumber/vendor/lift/dev/lift-interface.lisp +56 -0
- data/lib/clucumber/vendor/lift/dev/lift-notes.lisp +202 -0
- data/lib/clucumber/vendor/lift/dev/lift-randomized.lisp +45 -0
- data/lib/clucumber/vendor/lift/dev/lift.lisp +2383 -0
- data/lib/clucumber/vendor/lift/dev/macros.lisp +229 -0
- data/lib/clucumber/vendor/lift/dev/measuring.lisp +156 -0
- data/lib/clucumber/vendor/lift/dev/packages.lisp +161 -0
- data/lib/clucumber/vendor/lift/dev/port.lisp +151 -0
- data/lib/clucumber/vendor/lift/dev/prototypes.lisp +282 -0
- data/lib/clucumber/vendor/lift/dev/random-testing.lisp +124 -0
- data/lib/clucumber/vendor/lift/dev/report-locations.lisp +13 -0
- data/lib/clucumber/vendor/lift/dev/reports.lisp +916 -0
- data/lib/clucumber/vendor/lift/dev/utilities.lisp +242 -0
- data/lib/clucumber/vendor/lift/docs/package.lisp +6 -0
- data/lib/clucumber/vendor/lift/docs/setup.lisp +17 -0
- data/lib/clucumber/vendor/lift/examples/basic-examples.lisp +289 -0
- data/lib/clucumber/vendor/lift/examples/random-testing.lisp +32 -0
- data/lib/clucumber/vendor/lift/lift-documentation.asd +28 -0
- data/lib/clucumber/vendor/lift/lift-test.asd +35 -0
- data/lib/clucumber/vendor/lift/lift.asd +77 -0
- data/lib/clucumber/vendor/lift/test/equality-tests.lisp +40 -0
- data/lib/clucumber/vendor/lift/test/finding-tests.lisp +32 -0
- data/lib/clucumber/vendor/lift/test/lift-test.lisp +783 -0
- data/lib/clucumber/vendor/lift/test/order-of-operations.lisp +54 -0
- data/lib/clucumber/vendor/lift/test/packages.lisp +17 -0
- data/lib/clucumber/vendor/lift/test/test-config-files.lisp +20 -0
- data/lib/clucumber/vendor/lift/test/test-dynamic-variables.lisp +57 -0
- data/lib/clucumber/vendor/lift/test/test-maximum-problems.lisp +74 -0
- data/lib/clucumber/vendor/lift/test/test-prototypes.lisp +278 -0
- data/lib/clucumber/vendor/lift/test/test-timeout.lisp +37 -0
- data/lib/clucumber/vendor/lift/test/tests-in-progress.lisp +62 -0
- data/lib/clucumber/vendor/lift/test/testsuite-expects.lisp +60 -0
- data/lib/clucumber/vendor/lift/timeout/package.lisp +13 -0
- data/lib/clucumber/vendor/lift/timeout/with-timeout.lisp +123 -0
- data/lib/clucumber/vendor/lift/website/stuff/Temp.lisp +34 -0
- data/lib/clucumber/vendor/st-json/st-json.asd +3 -0
- data/lib/clucumber/vendor/st-json/st-json.lisp +310 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/backtrace.lisp +127 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/fallback.lisp +10 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/map-backtrace.lisp +103 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/mucking.lisp +75 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/packages.lisp +13 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/utilities.lisp +104 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/packages.lisp +5 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/test-setup.lisp +4 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/tests.lisp +16 -0
- data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace-test.asd +22 -0
- data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace.asd +35 -0
- data/lib/clucumber/vendor/usocket/backend/allegro.lisp +199 -0
- data/lib/clucumber/vendor/usocket/backend/armedbear.lisp +491 -0
- data/lib/clucumber/vendor/usocket/backend/clisp.lisp +260 -0
- data/lib/clucumber/vendor/usocket/backend/cmucl.lisp +266 -0
- data/lib/clucumber/vendor/usocket/backend/lispworks.lisp +741 -0
- data/lib/clucumber/vendor/usocket/backend/mcl.lisp +369 -0
- data/lib/clucumber/vendor/usocket/backend/openmcl.lisp +206 -0
- data/lib/clucumber/vendor/usocket/backend/sbcl.lisp +424 -0
- data/lib/clucumber/vendor/usocket/backend/scl.lisp +261 -0
- data/lib/clucumber/vendor/usocket/condition.lisp +227 -0
- data/lib/clucumber/vendor/usocket/package.lisp +82 -0
- data/lib/clucumber/vendor/usocket/server.lisp +45 -0
- data/lib/clucumber/vendor/usocket/test/package.lisp +13 -0
- data/lib/clucumber/vendor/usocket/test/test-usocket.lisp +166 -0
- data/lib/clucumber/vendor/usocket/usocket-test.asd +26 -0
- data/lib/clucumber/vendor/usocket/usocket.asd +37 -0
- data/lib/clucumber/vendor/usocket/usocket.lisp +542 -0
- data/lib/clucumber/vendor/usocket/vendor/kqueue.lisp +1 -0
- data/lib/clucumber/vendor/usocket/vendor/split-sequence.lisp +245 -0
- data/lib/clucumber.rb +29 -7
- metadata +151 -5
@@ -0,0 +1,319 @@
|
|
1
|
+
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
2
|
+
;;; $Header: /usr/local/cvsrep/cl-ppcre/parser.lisp,v 1.31 2009/09/17 19:17:31 edi Exp $
|
3
|
+
|
4
|
+
;;; The parser will - with the help of the lexer - parse a regex
|
5
|
+
;;; string and convert it into a "parse tree" (see docs for details
|
6
|
+
;;; about the syntax of these trees). Note that the lexer might
|
7
|
+
;;; return illegal parse trees. It is assumed that the conversion
|
8
|
+
;;; process later on will track them down.
|
9
|
+
|
10
|
+
;;; Copyright (c) 2002-2009, Dr. Edmund Weitz. All rights reserved.
|
11
|
+
|
12
|
+
;;; Redistribution and use in source and binary forms, with or without
|
13
|
+
;;; modification, are permitted provided that the following conditions
|
14
|
+
;;; are met:
|
15
|
+
|
16
|
+
;;; * Redistributions of source code must retain the above copyright
|
17
|
+
;;; notice, this list of conditions and the following disclaimer.
|
18
|
+
|
19
|
+
;;; * Redistributions in binary form must reproduce the above
|
20
|
+
;;; copyright notice, this list of conditions and the following
|
21
|
+
;;; disclaimer in the documentation and/or other materials
|
22
|
+
;;; provided with the distribution.
|
23
|
+
|
24
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
25
|
+
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
26
|
+
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
27
|
+
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
28
|
+
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
29
|
+
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
30
|
+
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
31
|
+
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
32
|
+
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
33
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
34
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
35
|
+
|
36
|
+
(in-package :cl-ppcre)
|
37
|
+
|
38
|
+
(defun group (lexer)
|
39
|
+
"Parses and consumes a <group>.
|
40
|
+
The productions are: <group> -> \"\(\"<regex>\")\"
|
41
|
+
\"\(?:\"<regex>\")\"
|
42
|
+
\"\(?>\"<regex>\")\"
|
43
|
+
\"\(?<flags>:\"<regex>\")\"
|
44
|
+
\"\(?=\"<regex>\")\"
|
45
|
+
\"\(?!\"<regex>\")\"
|
46
|
+
\"\(?<=\"<regex>\")\"
|
47
|
+
\"\(?<!\"<regex>\")\"
|
48
|
+
\"\(?\(\"<num>\")\"<regex>\")\"
|
49
|
+
\"\(?\(\"<regex>\")\"<regex>\")\"
|
50
|
+
\"\(?<name>\"<regex>\")\" \(when *ALLOW-NAMED-REGISTERS* is T)
|
51
|
+
<legal-token>
|
52
|
+
where <flags> is parsed by the lexer function MAYBE-PARSE-FLAGS.
|
53
|
+
Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
|
54
|
+
<grouping-type> is one of six keywords - see source for details."
|
55
|
+
(declare #.*standard-optimize-settings*)
|
56
|
+
(multiple-value-bind (open-token flags)
|
57
|
+
(get-token lexer)
|
58
|
+
(cond ((eq open-token :open-paren-paren)
|
59
|
+
;; special case for conditional regular expressions; note
|
60
|
+
;; that at this point we accept a couple of illegal
|
61
|
+
;; combinations which'll be sorted out later by the
|
62
|
+
;; converter
|
63
|
+
(let* ((open-paren-pos (car (lexer-last-pos lexer)))
|
64
|
+
;; check if what follows "(?(" is a number
|
65
|
+
(number (try-number lexer :no-whitespace-p t))
|
66
|
+
;; make changes to extended-mode-p local
|
67
|
+
(*extended-mode-p* *extended-mode-p*))
|
68
|
+
(declare (fixnum open-paren-pos))
|
69
|
+
(cond (number
|
70
|
+
;; condition is a number (i.e. refers to a
|
71
|
+
;; back-reference)
|
72
|
+
(let* ((inner-close-token (get-token lexer))
|
73
|
+
(reg-expr (reg-expr lexer))
|
74
|
+
(close-token (get-token lexer)))
|
75
|
+
(unless (eq inner-close-token :close-paren)
|
76
|
+
(signal-syntax-error* (+ open-paren-pos 2)
|
77
|
+
"Opening paren has no matching closing paren."))
|
78
|
+
(unless (eq close-token :close-paren)
|
79
|
+
(signal-syntax-error* open-paren-pos
|
80
|
+
"Opening paren has no matching closing paren."))
|
81
|
+
(list :branch number reg-expr)))
|
82
|
+
(t
|
83
|
+
;; condition must be a full regex (actually a
|
84
|
+
;; look-behind or look-ahead); and here comes a
|
85
|
+
;; terrible kludge: instead of being cleanly
|
86
|
+
;; separated from the lexer, the parser pushes
|
87
|
+
;; back the lexer by one position, thereby
|
88
|
+
;; landing in the middle of the 'token' "(?(" -
|
89
|
+
;; yuck!!
|
90
|
+
(decf (lexer-pos lexer))
|
91
|
+
(let* ((inner-reg-expr (group lexer))
|
92
|
+
(reg-expr (reg-expr lexer))
|
93
|
+
(close-token (get-token lexer)))
|
94
|
+
(unless (eq close-token :close-paren)
|
95
|
+
(signal-syntax-error* open-paren-pos
|
96
|
+
"Opening paren has no matching closing paren."))
|
97
|
+
(list :branch inner-reg-expr reg-expr))))))
|
98
|
+
((member open-token '(:open-paren
|
99
|
+
:open-paren-colon
|
100
|
+
:open-paren-greater
|
101
|
+
:open-paren-equal
|
102
|
+
:open-paren-exclamation
|
103
|
+
:open-paren-less-equal
|
104
|
+
:open-paren-less-exclamation
|
105
|
+
:open-paren-less-letter)
|
106
|
+
:test #'eq)
|
107
|
+
;; make changes to extended-mode-p local
|
108
|
+
(let ((*extended-mode-p* *extended-mode-p*))
|
109
|
+
;; we saw one of the six token representing opening
|
110
|
+
;; parentheses
|
111
|
+
(let* ((open-paren-pos (car (lexer-last-pos lexer)))
|
112
|
+
(register-name (when (eq open-token :open-paren-less-letter)
|
113
|
+
(parse-register-name-aux lexer)))
|
114
|
+
(reg-expr (reg-expr lexer))
|
115
|
+
(close-token (get-token lexer)))
|
116
|
+
(when (or (eq open-token :open-paren)
|
117
|
+
(eq open-token :open-paren-less-letter))
|
118
|
+
;; if this is the "("<regex>")" or "(?"<name>""<regex>")" production we have to
|
119
|
+
;; increment the register counter of the lexer
|
120
|
+
(incf (lexer-reg lexer)))
|
121
|
+
(unless (eq close-token :close-paren)
|
122
|
+
;; the token following <regex> must be the closing
|
123
|
+
;; parenthesis or this is a syntax error
|
124
|
+
(signal-syntax-error* open-paren-pos
|
125
|
+
"Opening paren has no matching closing paren."))
|
126
|
+
(if flags
|
127
|
+
;; if the lexer has returned a list of flags this must
|
128
|
+
;; have been the "(?:"<regex>")" production
|
129
|
+
(cons :group (nconc flags (list reg-expr)))
|
130
|
+
(if (eq open-token :open-paren-less-letter)
|
131
|
+
(list :named-register
|
132
|
+
;; every string was reversed, so we have to
|
133
|
+
;; reverse it back to get the name
|
134
|
+
(nreverse register-name)
|
135
|
+
reg-expr)
|
136
|
+
(list (case open-token
|
137
|
+
((:open-paren)
|
138
|
+
:register)
|
139
|
+
((:open-paren-colon)
|
140
|
+
:group)
|
141
|
+
((:open-paren-greater)
|
142
|
+
:standalone)
|
143
|
+
((:open-paren-equal)
|
144
|
+
:positive-lookahead)
|
145
|
+
((:open-paren-exclamation)
|
146
|
+
:negative-lookahead)
|
147
|
+
((:open-paren-less-equal)
|
148
|
+
:positive-lookbehind)
|
149
|
+
((:open-paren-less-exclamation)
|
150
|
+
:negative-lookbehind))
|
151
|
+
reg-expr))))))
|
152
|
+
(t
|
153
|
+
;; this is the <legal-token> production; <legal-token> is
|
154
|
+
;; any token which passes START-OF-SUBEXPR-P (otherwise
|
155
|
+
;; parsing had already stopped in the SEQ method)
|
156
|
+
open-token))))
|
157
|
+
|
158
|
+
(defun greedy-quant (lexer)
|
159
|
+
"Parses and consumes a <greedy-quant>.
|
160
|
+
The productions are: <greedy-quant> -> <group> | <group><quantifier>
|
161
|
+
where <quantifier> is parsed by the lexer function GET-QUANTIFIER.
|
162
|
+
Will return <parse-tree> or (:GREEDY-REPETITION <min> <max> <parse-tree>)."
|
163
|
+
(declare #.*standard-optimize-settings*)
|
164
|
+
(let* ((group (group lexer))
|
165
|
+
(token (get-quantifier lexer)))
|
166
|
+
(if token
|
167
|
+
;; if GET-QUANTIFIER returned a non-NIL value it's the
|
168
|
+
;; two-element list (<min> <max>)
|
169
|
+
(list :greedy-repetition (first token) (second token) group)
|
170
|
+
group)))
|
171
|
+
|
172
|
+
(defun quant (lexer)
|
173
|
+
"Parses and consumes a <quant>.
|
174
|
+
The productions are: <quant> -> <greedy-quant> | <greedy-quant>\"?\".
|
175
|
+
Will return the <parse-tree> returned by GREEDY-QUANT and optionally
|
176
|
+
change :GREEDY-REPETITION to :NON-GREEDY-REPETITION."
|
177
|
+
(declare #.*standard-optimize-settings*)
|
178
|
+
(let* ((greedy-quant (greedy-quant lexer))
|
179
|
+
(pos (lexer-pos lexer))
|
180
|
+
(next-char (next-char lexer)))
|
181
|
+
(when next-char
|
182
|
+
(if (char= next-char #\?)
|
183
|
+
(setf (car greedy-quant) :non-greedy-repetition)
|
184
|
+
(setf (lexer-pos lexer) pos)))
|
185
|
+
greedy-quant))
|
186
|
+
|
187
|
+
(defun seq (lexer)
|
188
|
+
"Parses and consumes a <seq>.
|
189
|
+
The productions are: <seq> -> <quant> | <quant><seq>.
|
190
|
+
Will return <parse-tree> or (:SEQUENCE <parse-tree> <parse-tree>)."
|
191
|
+
(declare #.*standard-optimize-settings*)
|
192
|
+
(flet ((make-array-from-two-chars (char1 char2)
|
193
|
+
(let ((string (make-array 2
|
194
|
+
:element-type 'character
|
195
|
+
:fill-pointer t
|
196
|
+
:adjustable t)))
|
197
|
+
(setf (aref string 0) char1)
|
198
|
+
(setf (aref string 1) char2)
|
199
|
+
string)))
|
200
|
+
;; Note that we're calling START-OF-SUBEXPR-P before we actually try
|
201
|
+
;; to parse a <seq> or <quant> in order to catch empty regular
|
202
|
+
;; expressions
|
203
|
+
(if (start-of-subexpr-p lexer)
|
204
|
+
(let ((quant (quant lexer)))
|
205
|
+
(if (start-of-subexpr-p lexer)
|
206
|
+
(let* ((seq (seq lexer))
|
207
|
+
(quant-is-char-p (characterp quant))
|
208
|
+
(seq-is-sequence-p (and (consp seq)
|
209
|
+
(eq (first seq) :sequence))))
|
210
|
+
(cond ((and quant-is-char-p
|
211
|
+
(characterp seq))
|
212
|
+
(make-array-from-two-chars seq quant))
|
213
|
+
((and quant-is-char-p
|
214
|
+
(stringp seq))
|
215
|
+
(vector-push-extend quant seq)
|
216
|
+
seq)
|
217
|
+
((and quant-is-char-p
|
218
|
+
seq-is-sequence-p
|
219
|
+
(characterp (second seq)))
|
220
|
+
(cond ((cddr seq)
|
221
|
+
(setf (cdr seq)
|
222
|
+
(cons
|
223
|
+
(make-array-from-two-chars (second seq)
|
224
|
+
quant)
|
225
|
+
(cddr seq)))
|
226
|
+
seq)
|
227
|
+
(t (make-array-from-two-chars (second seq) quant))))
|
228
|
+
((and quant-is-char-p
|
229
|
+
seq-is-sequence-p
|
230
|
+
(stringp (second seq)))
|
231
|
+
(cond ((cddr seq)
|
232
|
+
(setf (cdr seq)
|
233
|
+
(cons
|
234
|
+
(progn
|
235
|
+
(vector-push-extend quant (second seq))
|
236
|
+
(second seq))
|
237
|
+
(cddr seq)))
|
238
|
+
seq)
|
239
|
+
(t
|
240
|
+
(vector-push-extend quant (second seq))
|
241
|
+
(second seq))))
|
242
|
+
(seq-is-sequence-p
|
243
|
+
;; if <seq> is also a :SEQUENCE parse tree we merge
|
244
|
+
;; both lists into one to avoid unnecessary consing
|
245
|
+
(setf (cdr seq)
|
246
|
+
(cons quant (cdr seq)))
|
247
|
+
seq)
|
248
|
+
(t (list :sequence quant seq))))
|
249
|
+
quant))
|
250
|
+
:void)))
|
251
|
+
|
252
|
+
(defun reg-expr (lexer)
|
253
|
+
"Parses and consumes a <regex>, a complete regular expression.
|
254
|
+
The productions are: <regex> -> <seq> | <seq>\"|\"<regex>.
|
255
|
+
Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
|
256
|
+
(declare #.*standard-optimize-settings*)
|
257
|
+
(let ((pos (lexer-pos lexer)))
|
258
|
+
(case (next-char lexer)
|
259
|
+
((nil)
|
260
|
+
;; if we didn't get any token we return :VOID which stands for
|
261
|
+
;; "empty regular expression"
|
262
|
+
:void)
|
263
|
+
((#\|)
|
264
|
+
;; now check whether the expression started with a vertical
|
265
|
+
;; bar, i.e. <seq> - the left alternation - is empty
|
266
|
+
(list :alternation :void (reg-expr lexer)))
|
267
|
+
(otherwise
|
268
|
+
;; otherwise un-read the character we just saw and parse a
|
269
|
+
;; <seq> plus the character following it
|
270
|
+
(setf (lexer-pos lexer) pos)
|
271
|
+
(let* ((seq (seq lexer))
|
272
|
+
(pos (lexer-pos lexer)))
|
273
|
+
(case (next-char lexer)
|
274
|
+
((nil)
|
275
|
+
;; no further character, just a <seq>
|
276
|
+
seq)
|
277
|
+
((#\|)
|
278
|
+
;; if the character was a vertical bar, this is an
|
279
|
+
;; alternation and we have the second production
|
280
|
+
(let ((reg-expr (reg-expr lexer)))
|
281
|
+
(cond ((and (consp reg-expr)
|
282
|
+
(eq (first reg-expr) :alternation))
|
283
|
+
;; again we try to merge as above in SEQ
|
284
|
+
(setf (cdr reg-expr)
|
285
|
+
(cons seq (cdr reg-expr)))
|
286
|
+
reg-expr)
|
287
|
+
(t (list :alternation seq reg-expr)))))
|
288
|
+
(otherwise
|
289
|
+
;; a character which is not a vertical bar - this is
|
290
|
+
;; either a syntax error or we're inside of a group and
|
291
|
+
;; the next character is a closing parenthesis; so we
|
292
|
+
;; just un-read the character and let another function
|
293
|
+
;; take care of it
|
294
|
+
(setf (lexer-pos lexer) pos)
|
295
|
+
seq)))))))
|
296
|
+
|
297
|
+
(defun reverse-strings (parse-tree)
|
298
|
+
"Recursively walks through PARSE-TREE and destructively reverses all
|
299
|
+
strings in it."
|
300
|
+
(declare #.*standard-optimize-settings*)
|
301
|
+
(cond ((stringp parse-tree)
|
302
|
+
(nreverse parse-tree))
|
303
|
+
((consp parse-tree)
|
304
|
+
(loop for parse-tree-rest on parse-tree
|
305
|
+
while parse-tree-rest
|
306
|
+
do (setf (car parse-tree-rest)
|
307
|
+
(reverse-strings (car parse-tree-rest))))
|
308
|
+
parse-tree)
|
309
|
+
(t parse-tree)))
|
310
|
+
|
311
|
+
(defun parse-string (string)
|
312
|
+
"Translate the regex string STRING into a parse tree."
|
313
|
+
(declare #.*standard-optimize-settings*)
|
314
|
+
(let* ((lexer (make-lexer string))
|
315
|
+
(parse-tree (reverse-strings (reg-expr lexer))))
|
316
|
+
;; check whether we've consumed the whole regex string
|
317
|
+
(if (end-of-string-p lexer)
|
318
|
+
parse-tree
|
319
|
+
(signal-syntax-error* (lexer-pos lexer) "Expected end of string."))))
|
@@ -0,0 +1,269 @@
|
|
1
|
+
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
|
2
|
+
;;; $Header: /usr/local/cvsrep/cl-ppcre/ppcre-tests.lisp,v 1.36 2008/06/25 14:04:28 edi Exp $
|
3
|
+
|
4
|
+
;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.
|
5
|
+
|
6
|
+
;;; Redistribution and use in source and binary forms, with or without
|
7
|
+
;;; modification, are permitted provided that the following conditions
|
8
|
+
;;; are met:
|
9
|
+
|
10
|
+
;;; * Redistributions of source code must retain the above copyright
|
11
|
+
;;; notice, this list of conditions and the following disclaimer.
|
12
|
+
|
13
|
+
;;; * Redistributions in binary form must reproduce the above
|
14
|
+
;;; copyright notice, this list of conditions and the following
|
15
|
+
;;; disclaimer in the documentation and/or other materials
|
16
|
+
;;; provided with the distribution.
|
17
|
+
|
18
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
19
|
+
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
20
|
+
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
21
|
+
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
22
|
+
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
24
|
+
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
25
|
+
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
26
|
+
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
27
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
28
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
|
30
|
+
(in-package #:cl-ppcre-test)
|
31
|
+
|
32
|
+
(defparameter *cl-ppcre-test-base-directory*
|
33
|
+
(make-pathname :name nil :type nil :version nil
|
34
|
+
:defaults (parse-namestring *load-truename*)))
|
35
|
+
|
36
|
+
(defun full-gc ()
|
37
|
+
"Start a full garbage collection."
|
38
|
+
;; what are the corresponding values for MCL and OpenMCL?
|
39
|
+
#+:allegro (excl:gc t)
|
40
|
+
#+(or :cmu :scl) (ext:gc :full t)
|
41
|
+
#+:ecl (si:gc t)
|
42
|
+
#+:clisp (ext:gc)
|
43
|
+
#+:cormanlisp (loop for i from 0 to 3 do (cormanlisp:gc i))
|
44
|
+
#+:lispworks4 (hcl:mark-and-sweep 3)
|
45
|
+
#+:lispworks5 (hcl:gc-generation #+:lispworks-32bit 3 #+:lispworks-64bit :blocking-gen-num)
|
46
|
+
#+:sbcl (sb-ext:gc :full t))
|
47
|
+
|
48
|
+
;; warning: ugly code ahead!!
|
49
|
+
;; this is just a quick hack for testing purposes
|
50
|
+
|
51
|
+
(defun time-regex (factor regex string
|
52
|
+
&key case-insensitive-mode
|
53
|
+
multi-line-mode
|
54
|
+
single-line-mode
|
55
|
+
extended-mode)
|
56
|
+
(declare #.ppcre::*standard-optimize-settings*)
|
57
|
+
"Auxiliary function used by TEST to benchmark a regex scanner
|
58
|
+
against Perl timings."
|
59
|
+
(declare (type string string))
|
60
|
+
(let* ((scanner (create-scanner regex
|
61
|
+
:case-insensitive-mode case-insensitive-mode
|
62
|
+
:multi-line-mode multi-line-mode
|
63
|
+
:single-line-mode single-line-mode
|
64
|
+
:extended-mode extended-mode))
|
65
|
+
;; make sure GC doesn't invalidate our benchmarking
|
66
|
+
(dummy (full-gc))
|
67
|
+
(start (get-internal-real-time)))
|
68
|
+
(declare (ignore dummy))
|
69
|
+
(dotimes (i factor)
|
70
|
+
(funcall scanner string 0 (length string)))
|
71
|
+
(float (/ (- (get-internal-real-time) start) internal-time-units-per-second))))
|
72
|
+
|
73
|
+
#+(or scl
|
74
|
+
lispworks
|
75
|
+
(and sbcl sb-thread))
|
76
|
+
(defun threaded-scan (scanner target-string &key (threads 10) (repetitions 5000))
|
77
|
+
(declare #.ppcre::*standard-optimize-settings*)
|
78
|
+
"Auxiliary function used by TEST to check whether SCANNER is thread-safe."
|
79
|
+
(full-gc)
|
80
|
+
(let ((collector (make-array threads))
|
81
|
+
(counter 0))
|
82
|
+
(loop for i below threads
|
83
|
+
do (let* ((j i)
|
84
|
+
(fn
|
85
|
+
(lambda ()
|
86
|
+
(let ((r (random repetitions)))
|
87
|
+
(loop for k below repetitions
|
88
|
+
if (= k r)
|
89
|
+
do (setf (aref collector j)
|
90
|
+
(let ((result
|
91
|
+
(multiple-value-list
|
92
|
+
(cl-ppcre:scan scanner target-string))))
|
93
|
+
(unless (cdr result)
|
94
|
+
(setq result '(nil nil #() #())))
|
95
|
+
result))
|
96
|
+
else
|
97
|
+
do (cl-ppcre:scan scanner target-string))
|
98
|
+
(incf counter)))))
|
99
|
+
#+scl (thread:thread-create fn)
|
100
|
+
#+lispworks (mp:process-run-function "" nil fn)
|
101
|
+
#+(and sbcl sb-thread) (sb-thread:make-thread fn)))
|
102
|
+
(loop while (< counter threads)
|
103
|
+
do (sleep .1))
|
104
|
+
(destructuring-bind (first-start first-end first-reg-starts first-reg-ends)
|
105
|
+
(aref collector 0)
|
106
|
+
(loop for (start end reg-starts reg-ends) across collector
|
107
|
+
if (or (not (eql first-start start))
|
108
|
+
(not (eql first-end end))
|
109
|
+
(/= (length first-reg-starts) (length reg-starts))
|
110
|
+
(/= (length first-reg-ends) (length reg-ends))
|
111
|
+
(loop for first-reg-start across first-reg-starts
|
112
|
+
for reg-start across reg-starts
|
113
|
+
thereis (not (eql first-reg-start reg-start)))
|
114
|
+
(loop for first-reg-end across first-reg-ends
|
115
|
+
for reg-end across reg-ends
|
116
|
+
thereis (not (eql first-reg-end reg-end))))
|
117
|
+
do (return (format nil "~&Inconsistent results during multi-threading"))))))
|
118
|
+
|
119
|
+
(defun create-string-from-input (input)
|
120
|
+
(cond ((or (null input)
|
121
|
+
(stringp input))
|
122
|
+
input)
|
123
|
+
(t
|
124
|
+
(cl-ppcre::string-list-to-simple-string
|
125
|
+
(loop for element in input
|
126
|
+
if (stringp element)
|
127
|
+
collect element
|
128
|
+
else
|
129
|
+
collect (string (code-char element)))))))
|
130
|
+
|
131
|
+
(defun test (&key (file-name
|
132
|
+
(make-pathname :name "testdata"
|
133
|
+
:type nil :version nil
|
134
|
+
:defaults *cl-ppcre-test-base-directory*)
|
135
|
+
file-name-provided-p)
|
136
|
+
threaded)
|
137
|
+
(declare #.ppcre::*standard-optimize-settings*)
|
138
|
+
(declare (ignorable threaded))
|
139
|
+
"Loop through all test cases in FILE-NAME and print report. Only in
|
140
|
+
LispWorks and SCL: If THREADED is true, also test whether the scanners
|
141
|
+
work multi-threaded."
|
142
|
+
(with-open-file (stream file-name
|
143
|
+
#+(or :allegro :clisp :scl :sbcl)
|
144
|
+
:external-format
|
145
|
+
#+(or :allegro :clisp :scl :sbcl)
|
146
|
+
(if file-name-provided-p
|
147
|
+
:default
|
148
|
+
#+(or :allegro :scl :sbcl) :iso-8859-1
|
149
|
+
#+:clisp charset:iso-8859-1))
|
150
|
+
(loop with testcount of-type fixnum = 0
|
151
|
+
with *regex-char-code-limit* = (if file-name-provided-p
|
152
|
+
*regex-char-code-limit*
|
153
|
+
;; the standard test suite
|
154
|
+
;; doesn't need Unicode
|
155
|
+
;; support
|
156
|
+
256)
|
157
|
+
with *allow-quoting* = (if file-name-provided-p
|
158
|
+
*allow-quoting*
|
159
|
+
t)
|
160
|
+
for input-line = (read stream nil nil)
|
161
|
+
for (counter info-string regex
|
162
|
+
case-insensitive-mode multi-line-mode
|
163
|
+
single-line-mode extended-mode
|
164
|
+
string perl-error factor
|
165
|
+
perl-time ex-result ex-subs) = input-line
|
166
|
+
while input-line
|
167
|
+
do (let ((info-string (create-string-from-input info-string))
|
168
|
+
(regex (create-string-from-input regex))
|
169
|
+
(string (create-string-from-input string))
|
170
|
+
(ex-result (create-string-from-input ex-result))
|
171
|
+
(ex-subs (mapcar #'create-string-from-input ex-subs))
|
172
|
+
(errors '()))
|
173
|
+
;; provide some visual feedback for slow CL
|
174
|
+
;; implementations; suggested by JP Massar
|
175
|
+
(incf testcount)
|
176
|
+
#+(or scl
|
177
|
+
lispworks
|
178
|
+
(and sbcl sb-thread))
|
179
|
+
(when threaded
|
180
|
+
(format t "Test #~A (ID ~A)~%" testcount counter)
|
181
|
+
(force-output))
|
182
|
+
(unless #-(or scl
|
183
|
+
lispworks
|
184
|
+
(and sbcl sb-thread))
|
185
|
+
nil
|
186
|
+
#+(or scl
|
187
|
+
lispworks
|
188
|
+
(and sbcl sb-thread))
|
189
|
+
threaded
|
190
|
+
(when (zerop (mod testcount 10))
|
191
|
+
(format t ".")
|
192
|
+
(force-output))
|
193
|
+
(when (zerop (mod testcount 100))
|
194
|
+
(terpri)))
|
195
|
+
(handler-case
|
196
|
+
(let* ((*use-bmh-matchers* (if (and (> factor 1) (plusp perl-time))
|
197
|
+
*use-bmh-matchers*
|
198
|
+
;; if we only check for
|
199
|
+
;; correctness we don't
|
200
|
+
;; care about speed that
|
201
|
+
;; match (but rather
|
202
|
+
;; about space
|
203
|
+
;; constraints of the
|
204
|
+
;; trial versions)
|
205
|
+
nil))
|
206
|
+
(scanner (create-scanner regex
|
207
|
+
:case-insensitive-mode case-insensitive-mode
|
208
|
+
:multi-line-mode multi-line-mode
|
209
|
+
:single-line-mode single-line-mode
|
210
|
+
:extended-mode extended-mode)))
|
211
|
+
(multiple-value-bind (result1 result2 sub-starts sub-ends)
|
212
|
+
(scan scanner string)
|
213
|
+
(cond (perl-error
|
214
|
+
(push (format nil
|
215
|
+
"~&expected an error but got a result")
|
216
|
+
errors))
|
217
|
+
(t
|
218
|
+
(when (not (eq result1 ex-result))
|
219
|
+
(if result1
|
220
|
+
(let ((result (subseq string result1 result2)))
|
221
|
+
(unless (string= result ex-result)
|
222
|
+
(push (format nil
|
223
|
+
"~&expected ~S but got ~S"
|
224
|
+
ex-result result)
|
225
|
+
errors))
|
226
|
+
(setq sub-starts (coerce sub-starts 'list)
|
227
|
+
sub-ends (coerce sub-ends 'list))
|
228
|
+
(loop for i from 0
|
229
|
+
for ex-sub in ex-subs
|
230
|
+
for sub-start = (nth i sub-starts)
|
231
|
+
for sub-end = (nth i sub-ends)
|
232
|
+
for sub = (if (and sub-start sub-end)
|
233
|
+
(subseq string sub-start sub-end)
|
234
|
+
nil)
|
235
|
+
unless (string= ex-sub sub)
|
236
|
+
do (push (format nil
|
237
|
+
"~&\\~A: expected ~S but got ~S"
|
238
|
+
(1+ i) ex-sub sub) errors)))
|
239
|
+
(push (format nil
|
240
|
+
"~&expected ~S but got ~S"
|
241
|
+
ex-result result1)
|
242
|
+
errors)))))
|
243
|
+
#+(or scl
|
244
|
+
lispworks
|
245
|
+
(and sbcl sb-thread))
|
246
|
+
(when threaded
|
247
|
+
(let ((thread-result (threaded-scan scanner string)))
|
248
|
+
(when thread-result
|
249
|
+
(push thread-result errors))))))
|
250
|
+
(condition (msg)
|
251
|
+
(unless perl-error
|
252
|
+
(push (format nil "~&got an unexpected error: '~A'" msg)
|
253
|
+
errors))))
|
254
|
+
(setq errors (nreverse errors))
|
255
|
+
(cond (errors
|
256
|
+
(when (or (<= factor 1) (zerop perl-time))
|
257
|
+
(format t "~&~4@A (~A):~{~& ~A~}~%"
|
258
|
+
counter info-string errors)))
|
259
|
+
((and (> factor 1) (plusp perl-time))
|
260
|
+
(let ((result (time-regex factor regex string
|
261
|
+
:case-insensitive-mode case-insensitive-mode
|
262
|
+
:multi-line-mode multi-line-mode
|
263
|
+
:single-line-mode single-line-mode
|
264
|
+
:extended-mode extended-mode)))
|
265
|
+
(format t "~&~4@A: ~,4F (~A repetitions, Perl: ~,4F seconds, CL-PPCRE: ~,4F seconds)" counter
|
266
|
+
(float (/ result perl-time)) factor perl-time result)
|
267
|
+
#+:cormanlisp (force-output *standard-output*)))
|
268
|
+
(t nil))))
|
269
|
+
(values)))
|