clucumber 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (139) hide show
  1. data/LICENSE +1 -1
  2. data/README.md +4 -9
  3. data/lib/clucumber/clucumber-bootstrap.lisp +32 -0
  4. data/lib/clucumber/vendor/cl-interpol/alias.lisp +55 -0
  5. data/lib/clucumber/vendor/cl-interpol/cl-interpol.asd +56 -0
  6. data/lib/clucumber/vendor/cl-interpol/load.lisp +53 -0
  7. data/lib/clucumber/vendor/cl-interpol/packages.lisp +40 -0
  8. data/lib/clucumber/vendor/cl-interpol/read.lisp +716 -0
  9. data/lib/clucumber/vendor/cl-interpol/specials.lisp +113 -0
  10. data/lib/clucumber/vendor/cl-interpol/test/packages.lisp +34 -0
  11. data/lib/clucumber/vendor/cl-interpol/test/tests.lisp +128 -0
  12. data/lib/clucumber/vendor/cl-interpol/test.lisp +177 -0
  13. data/lib/clucumber/vendor/cl-interpol/test2.lisp +6254 -0
  14. data/lib/clucumber/vendor/cl-interpol/unicode.lisp +13912 -0
  15. data/lib/clucumber/vendor/cl-interpol/util.lisp +122 -0
  16. data/lib/clucumber/vendor/cl-ppcre/api.lisp +1262 -0
  17. data/lib/clucumber/vendor/cl-ppcre/charmap.lisp +152 -0
  18. data/lib/clucumber/vendor/cl-ppcre/charset.lisp +242 -0
  19. data/lib/clucumber/vendor/cl-ppcre/chartest.lisp +98 -0
  20. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-test.asd +34 -0
  21. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/packages.lisp +38 -0
  22. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/resolver.lisp +61 -0
  23. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode.asd +58 -0
  24. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre.asd +79 -0
  25. data/lib/clucumber/vendor/cl-ppcre/closures.lisp +471 -0
  26. data/lib/clucumber/vendor/cl-ppcre/convert.lisp +875 -0
  27. data/lib/clucumber/vendor/cl-ppcre/errors.lisp +84 -0
  28. data/lib/clucumber/vendor/cl-ppcre/lexer.lisp +737 -0
  29. data/lib/clucumber/vendor/cl-ppcre/lispworks-defsystem.lisp +57 -0
  30. data/lib/clucumber/vendor/cl-ppcre/load.lisp +67 -0
  31. data/lib/clucumber/vendor/cl-ppcre/optimize.lisp +578 -0
  32. data/lib/clucumber/vendor/cl-ppcre/packages.lisp +68 -0
  33. data/lib/clucumber/vendor/cl-ppcre/parser.lisp +319 -0
  34. data/lib/clucumber/vendor/cl-ppcre/ppcre-tests.lisp +269 -0
  35. data/lib/clucumber/vendor/cl-ppcre/regex-class-util.lisp +555 -0
  36. data/lib/clucumber/vendor/cl-ppcre/regex-class.lisp +259 -0
  37. data/lib/clucumber/vendor/cl-ppcre/repetition-closures.lisp +833 -0
  38. data/lib/clucumber/vendor/cl-ppcre/scanner.lisp +506 -0
  39. data/lib/clucumber/vendor/cl-ppcre/specials.lisp +172 -0
  40. data/lib/clucumber/vendor/cl-ppcre/test/packages.lisp +37 -0
  41. data/lib/clucumber/vendor/cl-ppcre/test/perl-tests.lisp +150 -0
  42. data/lib/clucumber/vendor/cl-ppcre/test/tests.lisp +159 -0
  43. data/lib/clucumber/vendor/cl-ppcre/test/unicode-tests.lisp +80 -0
  44. data/lib/clucumber/vendor/cl-ppcre/util.lisp +201 -0
  45. data/lib/clucumber/vendor/cl-unicode/alias.lisp +135 -0
  46. data/lib/clucumber/vendor/cl-unicode/api.lisp +412 -0
  47. data/lib/clucumber/vendor/cl-unicode/build/char-info.lisp +133 -0
  48. data/lib/clucumber/vendor/cl-unicode/build/dump.lisp +239 -0
  49. data/lib/clucumber/vendor/cl-unicode/build/read.lisp +280 -0
  50. data/lib/clucumber/vendor/cl-unicode/build/util.lisp +182 -0
  51. data/lib/clucumber/vendor/cl-unicode/cl-unicode.asd +90 -0
  52. data/lib/clucumber/vendor/cl-unicode/conditions.lisp +54 -0
  53. data/lib/clucumber/vendor/cl-unicode/derived.lisp +120 -0
  54. data/lib/clucumber/vendor/cl-unicode/hash-tables.lisp +20 -0
  55. data/lib/clucumber/vendor/cl-unicode/lists.lisp +10 -0
  56. data/lib/clucumber/vendor/cl-unicode/methods.lisp +15 -0
  57. data/lib/clucumber/vendor/cl-unicode/packages.lisp +76 -0
  58. data/lib/clucumber/vendor/cl-unicode/specials.lisp +135 -0
  59. data/lib/clucumber/vendor/cl-unicode/test/packages.lisp +34 -0
  60. data/lib/clucumber/vendor/cl-unicode/test/tests.lisp +150 -0
  61. data/lib/clucumber/vendor/cl-unicode/test-functions.lisp +94 -0
  62. data/lib/clucumber/vendor/cl-unicode/util.lisp +274 -0
  63. data/lib/clucumber/vendor/lift/compare/fiveam-tests.lisp +14 -0
  64. data/lib/clucumber/vendor/lift/compare/lift-tests.lisp +13 -0
  65. data/lib/clucumber/vendor/lift/compare/rt-tests.lisp +16 -0
  66. data/lib/clucumber/vendor/lift/compare/xlunit-tests.lisp +15 -0
  67. data/lib/clucumber/vendor/lift/dev/changes.lisp +61 -0
  68. data/lib/clucumber/vendor/lift/dev/config.lisp +354 -0
  69. data/lib/clucumber/vendor/lift/dev/copy-file.lisp +117 -0
  70. data/lib/clucumber/vendor/lift/dev/introspection.lisp +232 -0
  71. data/lib/clucumber/vendor/lift/dev/lift-interface.lisp +56 -0
  72. data/lib/clucumber/vendor/lift/dev/lift-notes.lisp +202 -0
  73. data/lib/clucumber/vendor/lift/dev/lift-randomized.lisp +45 -0
  74. data/lib/clucumber/vendor/lift/dev/lift.lisp +2383 -0
  75. data/lib/clucumber/vendor/lift/dev/macros.lisp +229 -0
  76. data/lib/clucumber/vendor/lift/dev/measuring.lisp +156 -0
  77. data/lib/clucumber/vendor/lift/dev/packages.lisp +161 -0
  78. data/lib/clucumber/vendor/lift/dev/port.lisp +151 -0
  79. data/lib/clucumber/vendor/lift/dev/prototypes.lisp +282 -0
  80. data/lib/clucumber/vendor/lift/dev/random-testing.lisp +124 -0
  81. data/lib/clucumber/vendor/lift/dev/report-locations.lisp +13 -0
  82. data/lib/clucumber/vendor/lift/dev/reports.lisp +916 -0
  83. data/lib/clucumber/vendor/lift/dev/utilities.lisp +242 -0
  84. data/lib/clucumber/vendor/lift/docs/package.lisp +6 -0
  85. data/lib/clucumber/vendor/lift/docs/setup.lisp +17 -0
  86. data/lib/clucumber/vendor/lift/examples/basic-examples.lisp +289 -0
  87. data/lib/clucumber/vendor/lift/examples/random-testing.lisp +32 -0
  88. data/lib/clucumber/vendor/lift/lift-documentation.asd +28 -0
  89. data/lib/clucumber/vendor/lift/lift-test.asd +35 -0
  90. data/lib/clucumber/vendor/lift/lift.asd +77 -0
  91. data/lib/clucumber/vendor/lift/test/equality-tests.lisp +40 -0
  92. data/lib/clucumber/vendor/lift/test/finding-tests.lisp +32 -0
  93. data/lib/clucumber/vendor/lift/test/lift-test.lisp +783 -0
  94. data/lib/clucumber/vendor/lift/test/order-of-operations.lisp +54 -0
  95. data/lib/clucumber/vendor/lift/test/packages.lisp +17 -0
  96. data/lib/clucumber/vendor/lift/test/test-config-files.lisp +20 -0
  97. data/lib/clucumber/vendor/lift/test/test-dynamic-variables.lisp +57 -0
  98. data/lib/clucumber/vendor/lift/test/test-maximum-problems.lisp +74 -0
  99. data/lib/clucumber/vendor/lift/test/test-prototypes.lisp +278 -0
  100. data/lib/clucumber/vendor/lift/test/test-timeout.lisp +37 -0
  101. data/lib/clucumber/vendor/lift/test/tests-in-progress.lisp +62 -0
  102. data/lib/clucumber/vendor/lift/test/testsuite-expects.lisp +60 -0
  103. data/lib/clucumber/vendor/lift/timeout/package.lisp +13 -0
  104. data/lib/clucumber/vendor/lift/timeout/with-timeout.lisp +123 -0
  105. data/lib/clucumber/vendor/lift/website/stuff/Temp.lisp +34 -0
  106. data/lib/clucumber/vendor/st-json/st-json.asd +3 -0
  107. data/lib/clucumber/vendor/st-json/st-json.lisp +310 -0
  108. data/lib/clucumber/vendor/trivial-backtrace/dev/backtrace.lisp +127 -0
  109. data/lib/clucumber/vendor/trivial-backtrace/dev/fallback.lisp +10 -0
  110. data/lib/clucumber/vendor/trivial-backtrace/dev/map-backtrace.lisp +103 -0
  111. data/lib/clucumber/vendor/trivial-backtrace/dev/mucking.lisp +75 -0
  112. data/lib/clucumber/vendor/trivial-backtrace/dev/packages.lisp +13 -0
  113. data/lib/clucumber/vendor/trivial-backtrace/dev/utilities.lisp +104 -0
  114. data/lib/clucumber/vendor/trivial-backtrace/test/packages.lisp +5 -0
  115. data/lib/clucumber/vendor/trivial-backtrace/test/test-setup.lisp +4 -0
  116. data/lib/clucumber/vendor/trivial-backtrace/test/tests.lisp +16 -0
  117. data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace-test.asd +22 -0
  118. data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace.asd +35 -0
  119. data/lib/clucumber/vendor/usocket/backend/allegro.lisp +199 -0
  120. data/lib/clucumber/vendor/usocket/backend/armedbear.lisp +491 -0
  121. data/lib/clucumber/vendor/usocket/backend/clisp.lisp +260 -0
  122. data/lib/clucumber/vendor/usocket/backend/cmucl.lisp +266 -0
  123. data/lib/clucumber/vendor/usocket/backend/lispworks.lisp +741 -0
  124. data/lib/clucumber/vendor/usocket/backend/mcl.lisp +369 -0
  125. data/lib/clucumber/vendor/usocket/backend/openmcl.lisp +206 -0
  126. data/lib/clucumber/vendor/usocket/backend/sbcl.lisp +424 -0
  127. data/lib/clucumber/vendor/usocket/backend/scl.lisp +261 -0
  128. data/lib/clucumber/vendor/usocket/condition.lisp +227 -0
  129. data/lib/clucumber/vendor/usocket/package.lisp +82 -0
  130. data/lib/clucumber/vendor/usocket/server.lisp +45 -0
  131. data/lib/clucumber/vendor/usocket/test/package.lisp +13 -0
  132. data/lib/clucumber/vendor/usocket/test/test-usocket.lisp +166 -0
  133. data/lib/clucumber/vendor/usocket/usocket-test.asd +26 -0
  134. data/lib/clucumber/vendor/usocket/usocket.asd +37 -0
  135. data/lib/clucumber/vendor/usocket/usocket.lisp +542 -0
  136. data/lib/clucumber/vendor/usocket/vendor/kqueue.lisp +1 -0
  137. data/lib/clucumber/vendor/usocket/vendor/split-sequence.lisp +245 -0
  138. data/lib/clucumber.rb +29 -7
  139. metadata +151 -5
@@ -0,0 +1,737 @@
1
+ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
2
+ ;;; $Header: /usr/local/cvsrep/cl-ppcre/lexer.lisp,v 1.35 2009/09/17 19:17:31 edi Exp $
3
+
4
+ ;;; The lexer's responsibility is to convert the regex string into a
5
+ ;;; sequence of tokens which are in turn consumed by the parser.
6
+ ;;;
7
+ ;;; The lexer is aware of Perl's 'extended mode' and it also 'knows'
8
+ ;;; (with a little help from the parser) how many register groups it
9
+ ;;; has opened so far. (The latter is necessary for interpreting
10
+ ;;; strings like "\\10" correctly.)
11
+
12
+ ;;; Copyright (c) 2002-2009, Dr. Edmund Weitz. All rights reserved.
13
+
14
+ ;;; Redistribution and use in source and binary forms, with or without
15
+ ;;; modification, are permitted provided that the following conditions
16
+ ;;; are met:
17
+
18
+ ;;; * Redistributions of source code must retain the above copyright
19
+ ;;; notice, this list of conditions and the following disclaimer.
20
+
21
+ ;;; * Redistributions in binary form must reproduce the above
22
+ ;;; copyright notice, this list of conditions and the following
23
+ ;;; disclaimer in the documentation and/or other materials
24
+ ;;; provided with the distribution.
25
+
26
+ ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
27
+ ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
28
+ ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29
+ ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
30
+ ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31
+ ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
32
+ ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33
+ ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
34
+ ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35
+ ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36
+ ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
+
38
+ (in-package :cl-ppcre)
39
+
40
+ (declaim (inline map-char-to-special-class))
41
+ (defun map-char-to-special-char-class (chr)
42
+ (declare #.*standard-optimize-settings*)
43
+ "Maps escaped characters like \"\\d\" to the tokens which represent
44
+ their associated character classes."
45
+ (case chr
46
+ ((#\d)
47
+ :digit-class)
48
+ ((#\D)
49
+ :non-digit-class)
50
+ ((#\w)
51
+ :word-char-class)
52
+ ((#\W)
53
+ :non-word-char-class)
54
+ ((#\s)
55
+ :whitespace-char-class)
56
+ ((#\S)
57
+ :non-whitespace-char-class)))
58
+
59
+ (defstruct (lexer (:constructor make-lexer-internal))
60
+ "LEXER structures are used to hold the regex string which is
61
+ currently lexed and to keep track of the lexer's state."
62
+ (str "" :type string :read-only t)
63
+ (len 0 :type fixnum :read-only t)
64
+ (reg 0 :type fixnum)
65
+ (pos 0 :type fixnum)
66
+ (last-pos nil :type list))
67
+
68
+ (defun make-lexer (string)
69
+ (declare (inline make-lexer-internal)
70
+ #-:genera (string string))
71
+ (make-lexer-internal :str (maybe-coerce-to-simple-string string)
72
+ :len (length string)))
73
+
74
+ (declaim (inline end-of-string-p))
75
+ (defun end-of-string-p (lexer)
76
+ (declare #.*standard-optimize-settings*)
77
+ "Tests whether we're at the end of the regex string."
78
+ (<= (lexer-len lexer)
79
+ (lexer-pos lexer)))
80
+
81
+ (declaim (inline looking-at-p))
82
+ (defun looking-at-p (lexer chr)
83
+ (declare #.*standard-optimize-settings*)
84
+ "Tests whether the next character the lexer would see is CHR.
85
+ Does not respect extended mode."
86
+ (and (not (end-of-string-p lexer))
87
+ (char= (schar (lexer-str lexer) (lexer-pos lexer))
88
+ chr)))
89
+
90
+ (declaim (inline next-char-non-extended))
91
+ (defun next-char-non-extended (lexer)
92
+ (declare #.*standard-optimize-settings*)
93
+ "Returns the next character which is to be examined and updates the
94
+ POS slot. Does not respect extended mode."
95
+ (cond ((end-of-string-p lexer) nil)
96
+ (t (prog1
97
+ (schar (lexer-str lexer) (lexer-pos lexer))
98
+ (incf (lexer-pos lexer))))))
99
+
100
+ (defun next-char (lexer)
101
+ (declare #.*standard-optimize-settings*)
102
+ "Returns the next character which is to be examined and updates the
103
+ POS slot. Respects extended mode, i.e. whitespace, comments, and also
104
+ nested comments are skipped if applicable."
105
+ (let ((next-char (next-char-non-extended lexer))
106
+ last-loop-pos)
107
+ (loop
108
+ ;; remember where we started
109
+ (setq last-loop-pos (lexer-pos lexer))
110
+ ;; first we look for nested comments like (?#foo)
111
+ (when (and next-char
112
+ (char= next-char #\()
113
+ (looking-at-p lexer #\?))
114
+ (incf (lexer-pos lexer))
115
+ (cond ((looking-at-p lexer #\#)
116
+ ;; must be a nested comment - so we have to search for
117
+ ;; the closing parenthesis
118
+ (let ((error-pos (- (lexer-pos lexer) 2)))
119
+ (unless
120
+ ;; loop 'til ')' or end of regex string and
121
+ ;; return NIL if ')' wasn't encountered
122
+ (loop for skip-char = next-char
123
+ then (next-char-non-extended lexer)
124
+ while (and skip-char
125
+ (char/= skip-char #\)))
126
+ finally (return skip-char))
127
+ (signal-syntax-error* error-pos "Comment group not closed.")))
128
+ (setq next-char (next-char-non-extended lexer)))
129
+ (t
130
+ ;; undo effect of previous INCF if we didn't see a #
131
+ (decf (lexer-pos lexer)))))
132
+ (when *extended-mode-p*
133
+ ;; now - if we're in extended mode - we skip whitespace and
134
+ ;; comments; repeat the following loop while we look at
135
+ ;; whitespace or #\#
136
+ (loop while (and next-char
137
+ (or (char= next-char #\#)
138
+ (whitespacep next-char)))
139
+ do (setq next-char
140
+ (if (char= next-char #\#)
141
+ ;; if we saw a comment marker skip until
142
+ ;; we're behind #\Newline...
143
+ (loop for skip-char = next-char
144
+ then (next-char-non-extended lexer)
145
+ while (and skip-char
146
+ (char/= skip-char #\Newline))
147
+ finally (return (next-char-non-extended lexer)))
148
+ ;; ...otherwise (whitespace) skip until we
149
+ ;; see the next non-whitespace character
150
+ (loop for skip-char = next-char
151
+ then (next-char-non-extended lexer)
152
+ while (and skip-char
153
+ (whitespacep skip-char))
154
+ finally (return skip-char))))))
155
+ ;; if the position has moved we have to repeat our tests
156
+ ;; because of cases like /^a (?#xxx) (?#yyy) {3}c/x which
157
+ ;; would be equivalent to /^a{3}c/ in Perl
158
+ (unless (> (lexer-pos lexer) last-loop-pos)
159
+ (return next-char)))))
160
+
161
+ (declaim (inline fail))
162
+ (defun fail (lexer)
163
+ (declare #.*standard-optimize-settings*)
164
+ "Moves (LEXER-POS LEXER) back to the last position stored in
165
+ \(LEXER-LAST-POS LEXER) and pops the LAST-POS stack."
166
+ (unless (lexer-last-pos lexer)
167
+ (signal-syntax-error "LAST-POS stack of LEXER ~A is empty." lexer))
168
+ (setf (lexer-pos lexer) (pop (lexer-last-pos lexer)))
169
+ nil)
170
+
171
+ (defun get-number (lexer &key (radix 10) max-length no-whitespace-p)
172
+ (declare #.*standard-optimize-settings*)
173
+ "Read and consume the number the lexer is currently looking at and
174
+ return it. Returns NIL if no number could be identified.
175
+ RADIX is used as in PARSE-INTEGER. If MAX-LENGTH is not NIL we'll read
176
+ at most the next MAX-LENGTH characters. If NO-WHITESPACE-P is not NIL
177
+ we don't tolerate whitespace in front of the number."
178
+ (when (or (end-of-string-p lexer)
179
+ (and no-whitespace-p
180
+ (whitespacep (schar (lexer-str lexer) (lexer-pos lexer)))))
181
+ (return-from get-number nil))
182
+ (multiple-value-bind (integer new-pos)
183
+ (parse-integer (lexer-str lexer)
184
+ :start (lexer-pos lexer)
185
+ :end (if max-length
186
+ (let ((end-pos (+ (lexer-pos lexer)
187
+ (the fixnum max-length)))
188
+ (lexer-len (lexer-len lexer)))
189
+ (if (< end-pos lexer-len)
190
+ end-pos
191
+ lexer-len))
192
+ (lexer-len lexer))
193
+ :radix radix
194
+ :junk-allowed t)
195
+ (cond ((and integer (>= (the fixnum integer) 0))
196
+ (setf (lexer-pos lexer) new-pos)
197
+ integer)
198
+ (t nil))))
199
+
200
+ (declaim (inline try-number))
201
+ (defun try-number (lexer &key (radix 10) max-length no-whitespace-p)
202
+ (declare #.*standard-optimize-settings*)
203
+ "Like GET-NUMBER but won't consume anything if no number is seen."
204
+ ;; remember current position
205
+ (push (lexer-pos lexer) (lexer-last-pos lexer))
206
+ (let ((number (get-number lexer
207
+ :radix radix
208
+ :max-length max-length
209
+ :no-whitespace-p no-whitespace-p)))
210
+ (or number (fail lexer))))
211
+
212
+ (declaim (inline make-char-from-code))
213
+ (defun make-char-from-code (number error-pos)
214
+ (declare #.*standard-optimize-settings*)
215
+ "Create character from char-code NUMBER. NUMBER can be NIL
216
+ which is interpreted as 0. ERROR-POS is the position where
217
+ the corresponding number started within the regex string."
218
+ ;; only look at rightmost eight bits in compliance with Perl
219
+ (let ((code (logand #o377 (the fixnum (or number 0)))))
220
+ (or (and (< code char-code-limit)
221
+ (code-char code))
222
+ (signal-syntax-error* error-pos "No character for hex-code ~X." number))))
223
+
224
+ (defun unescape-char (lexer)
225
+ (declare #.*standard-optimize-settings*)
226
+ "Convert the characters\(s) following a backslash into a token
227
+ which is returned. This function is to be called when the backslash
228
+ has already been consumed. Special character classes like \\W are
229
+ handled elsewhere."
230
+ (when (end-of-string-p lexer)
231
+ (signal-syntax-error "String ends with backslash."))
232
+ (let ((chr (next-char-non-extended lexer)))
233
+ (case chr
234
+ ((#\E)
235
+ ;; if \Q quoting is on this is ignored, otherwise it's just an
236
+ ;; #\E
237
+ (if *allow-quoting*
238
+ :void
239
+ #\E))
240
+ ((#\c)
241
+ ;; \cx means control-x in Perl
242
+ (let ((next-char (next-char-non-extended lexer)))
243
+ (unless next-char
244
+ (signal-syntax-error* (lexer-pos lexer) "Character missing after '\\c' at position ~A."))
245
+ (code-char (logxor #x40 (char-code (char-upcase next-char))))))
246
+ ((#\x)
247
+ ;; \x should be followed by a hexadecimal char code,
248
+ ;; two digits or less
249
+ (let* ((error-pos (lexer-pos lexer))
250
+ (number (get-number lexer :radix 16 :max-length 2 :no-whitespace-p t)))
251
+ ;; note that it is OK if \x is followed by zero digits
252
+ (make-char-from-code number error-pos)))
253
+ ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
254
+ ;; \x should be followed by an octal char code,
255
+ ;; three digits or less
256
+ (let* ((error-pos (decf (lexer-pos lexer)))
257
+ (number (get-number lexer :radix 8 :max-length 3)))
258
+ (make-char-from-code number error-pos)))
259
+ ;; the following five character names are 'semi-standard'
260
+ ;; according to the CLHS but I'm not aware of any implementation
261
+ ;; that doesn't implement them
262
+ ((#\t)
263
+ #\Tab)
264
+ ((#\n)
265
+ #\Newline)
266
+ ((#\r)
267
+ #\Return)
268
+ ((#\f)
269
+ #\Page)
270
+ ((#\b)
271
+ #\Backspace)
272
+ ((#\a)
273
+ (code-char 7)) ; ASCII bell
274
+ ((#\e)
275
+ (code-char 27)) ; ASCII escape
276
+ (otherwise
277
+ ;; all other characters aren't affected by a backslash
278
+ chr))))
279
+
280
+ (defun read-char-property (lexer first-char)
281
+ (declare #.*standard-optimize-settings*)
282
+ (unless (eql (next-char-non-extended lexer) #\{)
283
+ (signal-syntax-error* (lexer-pos lexer) "Expected left brace after \\~A." first-char))
284
+ (let ((name (with-output-to-string (out nil :element-type
285
+ #+:lispworks 'lw:simple-char #-:lispworks 'character)
286
+ (loop
287
+ (let ((char (or (next-char-non-extended lexer)
288
+ (signal-syntax-error "Unexpected EOF after \\~A{." first-char))))
289
+ (when (char= char #\})
290
+ (return))
291
+ (write-char char out))))))
292
+ (list (if (char= first-char #\p) :property :inverted-property)
293
+ ;; we must reverse here because of what PARSE-STRING does
294
+ (nreverse name))))
295
+
296
+ (defun collect-char-class (lexer)
297
+ "Reads and consumes characters from regex string until a right
298
+ bracket is seen. Assembles them into a list \(which is returned) of
299
+ characters, character ranges, like \(:RANGE #\\A #\\E) for a-e, and
300
+ tokens representing special character classes."
301
+ (declare #.*standard-optimize-settings*)
302
+ (let ((start-pos (lexer-pos lexer)) ; remember start for error message
303
+ hyphen-seen
304
+ last-char
305
+ list)
306
+ (flet ((handle-char (c)
307
+ "Do the right thing with character C depending on whether
308
+ we're inside a range or not."
309
+ (cond ((and hyphen-seen last-char)
310
+ (setf (car list) (list :range last-char c)
311
+ last-char nil))
312
+ (t
313
+ (push c list)
314
+ (setq last-char c)))
315
+ (setq hyphen-seen nil)))
316
+ (loop for first = t then nil
317
+ for c = (next-char-non-extended lexer)
318
+ ;; leave loop if at end of string
319
+ while c
320
+ do (cond
321
+ ((char= c #\\)
322
+ ;; we've seen a backslash
323
+ (let ((next-char (next-char-non-extended lexer)))
324
+ (case next-char
325
+ ((#\d #\D #\w #\W #\s #\S)
326
+ ;; a special character class
327
+ (push (map-char-to-special-char-class next-char) list)
328
+ ;; if the last character was a hyphen
329
+ ;; just collect it literally
330
+ (when hyphen-seen
331
+ (push #\- list))
332
+ ;; if the next character is a hyphen do the same
333
+ (when (looking-at-p lexer #\-)
334
+ (push #\- list)
335
+ (incf (lexer-pos lexer)))
336
+ (setq hyphen-seen nil))
337
+ ((#\P #\p)
338
+ ;; maybe a character property
339
+ (cond ((null *property-resolver*)
340
+ (handle-char next-char))
341
+ (t
342
+ (push (read-char-property lexer next-char) list)
343
+ ;; if the last character was a hyphen
344
+ ;; just collect it literally
345
+ (when hyphen-seen
346
+ (push #\- list))
347
+ ;; if the next character is a hyphen do the same
348
+ (when (looking-at-p lexer #\-)
349
+ (push #\- list)
350
+ (incf (lexer-pos lexer)))
351
+ (setq hyphen-seen nil))))
352
+ ((#\E)
353
+ ;; if \Q quoting is on we ignore \E,
354
+ ;; otherwise it's just a plain #\E
355
+ (unless *allow-quoting*
356
+ (handle-char #\E)))
357
+ (otherwise
358
+ ;; otherwise unescape the following character(s)
359
+ (decf (lexer-pos lexer))
360
+ (handle-char (unescape-char lexer))))))
361
+ (first
362
+ ;; the first character must not be a right bracket
363
+ ;; and isn't treated specially if it's a hyphen
364
+ (handle-char c))
365
+ ((char= c #\])
366
+ ;; end of character class
367
+ ;; make sure we collect a pending hyphen
368
+ (when hyphen-seen
369
+ (setq hyphen-seen nil)
370
+ (handle-char #\-))
371
+ ;; reverse the list to preserve the order intended
372
+ ;; by the author of the regex string
373
+ (return-from collect-char-class (nreverse list)))
374
+ ((and (char= c #\-)
375
+ last-char
376
+ (not hyphen-seen))
377
+ ;; if the last character was 'just a character'
378
+ ;; we expect to be in the middle of a range
379
+ (setq hyphen-seen t))
380
+ ((char= c #\-)
381
+ ;; otherwise this is just an ordinary hyphen
382
+ (handle-char #\-))
383
+ (t
384
+ ;; default case - just collect the character
385
+ (handle-char c))))
386
+ ;; we can only exit the loop normally if we've reached the end
387
+ ;; of the regex string without seeing a right bracket
388
+ (signal-syntax-error* start-pos "Missing right bracket to close character class."))))
389
+
390
+ (defun maybe-parse-flags (lexer)
391
+ (declare #.*standard-optimize-settings*)
392
+ "Reads a sequence of modifiers \(including #\\- to reverse their
393
+ meaning) and returns a corresponding list of \"flag\" tokens. The
394
+ \"x\" modifier is treated specially in that it dynamically modifies
395
+ the behaviour of the lexer itself via the special variable
396
+ *EXTENDED-MODE-P*."
397
+ (prog1
398
+ (loop with set = t
399
+ for chr = (next-char-non-extended lexer)
400
+ unless chr
401
+ do (signal-syntax-error "Unexpected end of string.")
402
+ while (find chr "-imsx" :test #'char=)
403
+ ;; the first #\- will invert the meaning of all modifiers
404
+ ;; following it
405
+ if (char= chr #\-)
406
+ do (setq set nil)
407
+ else if (char= chr #\x)
408
+ do (setq *extended-mode-p* set)
409
+ else collect (if set
410
+ (case chr
411
+ ((#\i)
412
+ :case-insensitive-p)
413
+ ((#\m)
414
+ :multi-line-mode-p)
415
+ ((#\s)
416
+ :single-line-mode-p))
417
+ (case chr
418
+ ((#\i)
419
+ :case-sensitive-p)
420
+ ((#\m)
421
+ :not-multi-line-mode-p)
422
+ ((#\s)
423
+ :not-single-line-mode-p))))
424
+ (decf (lexer-pos lexer))))
425
+
426
+ (defun get-quantifier (lexer)
427
+ (declare #.*standard-optimize-settings*)
428
+ "Returns a list of two values (min max) if what the lexer is looking
429
+ at can be interpreted as a quantifier. Otherwise returns NIL and
430
+ resets the lexer to its old position."
431
+ ;; remember starting position for FAIL and UNGET-TOKEN functions
432
+ (push (lexer-pos lexer) (lexer-last-pos lexer))
433
+ (let ((next-char (next-char lexer)))
434
+ (case next-char
435
+ ((#\*)
436
+ ;; * (Kleene star): match 0 or more times
437
+ '(0 nil))
438
+ ((#\+)
439
+ ;; +: match 1 or more times
440
+ '(1 nil))
441
+ ((#\?)
442
+ ;; ?: match 0 or 1 times
443
+ '(0 1))
444
+ ((#\{)
445
+ ;; one of
446
+ ;; {n}: match exactly n times
447
+ ;; {n,}: match at least n times
448
+ ;; {n,m}: match at least n but not more than m times
449
+ ;; note that anything not matching one of these patterns will
450
+ ;; be interpreted literally - even whitespace isn't allowed
451
+ (let ((num1 (get-number lexer :no-whitespace-p t)))
452
+ (if num1
453
+ (let ((next-char (next-char-non-extended lexer)))
454
+ (case next-char
455
+ ((#\,)
456
+ (let* ((num2 (get-number lexer :no-whitespace-p t))
457
+ (next-char (next-char-non-extended lexer)))
458
+ (case next-char
459
+ ((#\})
460
+ ;; this is the case {n,} (NUM2 is NIL) or {n,m}
461
+ (list num1 num2))
462
+ (otherwise
463
+ (fail lexer)))))
464
+ ((#\})
465
+ ;; this is the case {n}
466
+ (list num1 num1))
467
+ (otherwise
468
+ (fail lexer))))
469
+ ;; no number following left curly brace, so we treat it
470
+ ;; like a normal character
471
+ (fail lexer))))
472
+ ;; cannot be a quantifier
473
+ (otherwise
474
+ (fail lexer)))))
475
+
476
+ (defun parse-register-name-aux (lexer)
477
+ "Reads and returns the name in a named register group. It is
478
+ assumed that the starting #\< character has already been read. The
479
+ closing #\> will also be consumed."
480
+ ;; we have to look for an ending > character now
481
+ (let ((end-name (position #\>
482
+ (lexer-str lexer)
483
+ :start (lexer-pos lexer)
484
+ :test #'char=)))
485
+ (unless end-name
486
+ ;; there has to be > somewhere, syntax error otherwise
487
+ (signal-syntax-error* (1- (lexer-pos lexer)) "Opening #\< in named group has no closing #\>."))
488
+ (let ((name (subseq (lexer-str lexer)
489
+ (lexer-pos lexer)
490
+ end-name)))
491
+ (unless (every #'(lambda (char)
492
+ (or (alphanumericp char)
493
+ (char= #\- char)))
494
+ name)
495
+ ;; register name can contain only alphanumeric characters or #\-
496
+ (signal-syntax-error* (lexer-pos lexer) "Invalid character in named register group."))
497
+ ;; advance lexer beyond "<name>" part
498
+ (setf (lexer-pos lexer) (1+ end-name))
499
+ name)))
500
+
501
+ (defun get-token (lexer)
502
+ (declare #.*standard-optimize-settings*)
503
+ "Returns and consumes the next token from the regex string \(or NIL)."
504
+ ;; remember starting position for UNGET-TOKEN function
505
+ (push (lexer-pos lexer)
506
+ (lexer-last-pos lexer))
507
+ (let ((next-char (next-char lexer)))
508
+ (cond (next-char
509
+ (case next-char
510
+ ;; the easy cases first - the following six characters
511
+ ;; always have a special meaning and get translated
512
+ ;; into tokens immediately
513
+ ((#\))
514
+ :close-paren)
515
+ ((#\|)
516
+ :vertical-bar)
517
+ ((#\?)
518
+ :question-mark)
519
+ ((#\.)
520
+ :everything)
521
+ ((#\^)
522
+ :start-anchor)
523
+ ((#\$)
524
+ :end-anchor)
525
+ ((#\+ #\*)
526
+ ;; quantifiers will always be consumend by
527
+ ;; GET-QUANTIFIER, they must not appear here
528
+ (signal-syntax-error* (1- (lexer-pos lexer)) "Quantifier '~A' not allowed." next-char))
529
+ ((#\{)
530
+ ;; left brace isn't a special character in it's own
531
+ ;; right but we must check if what follows might
532
+ ;; look like a quantifier
533
+ (let ((this-pos (lexer-pos lexer))
534
+ (this-last-pos (lexer-last-pos lexer)))
535
+ (unget-token lexer)
536
+ (when (get-quantifier lexer)
537
+ (signal-syntax-error* (car this-last-pos)
538
+ "Quantifier '~A' not allowed."
539
+ (subseq (lexer-str lexer)
540
+ (car this-last-pos)
541
+ (lexer-pos lexer))))
542
+ (setf (lexer-pos lexer) this-pos
543
+ (lexer-last-pos lexer) this-last-pos)
544
+ next-char))
545
+ ((#\[)
546
+ ;; left bracket always starts a character class
547
+ (cons (cond ((looking-at-p lexer #\^)
548
+ (incf (lexer-pos lexer))
549
+ :inverted-char-class)
550
+ (t
551
+ :char-class))
552
+ (collect-char-class lexer)))
553
+ ((#\\)
554
+ ;; backslash might mean different things so we have
555
+ ;; to peek one char ahead:
556
+ (let ((next-char (next-char-non-extended lexer)))
557
+ (case next-char
558
+ ((#\A)
559
+ :modeless-start-anchor)
560
+ ((#\Z)
561
+ :modeless-end-anchor)
562
+ ((#\z)
563
+ :modeless-end-anchor-no-newline)
564
+ ((#\b)
565
+ :word-boundary)
566
+ ((#\B)
567
+ :non-word-boundary)
568
+ ((#\k)
569
+ (cond ((and *allow-named-registers*
570
+ (looking-at-p lexer #\<))
571
+ ;; back-referencing a named register
572
+ (incf (lexer-pos lexer))
573
+ (list :back-reference
574
+ (nreverse (parse-register-name-aux lexer))))
575
+ (t
576
+ ;; false alarm, just unescape \k
577
+ #\k)))
578
+ ((#\d #\D #\w #\W #\s #\S)
579
+ ;; these will be treated like character classes
580
+ (map-char-to-special-char-class next-char))
581
+ ((#\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
582
+ ;; uh, a digit...
583
+ (let* ((old-pos (decf (lexer-pos lexer)))
584
+ ;; ...so let's get the whole number first
585
+ (backref-number (get-number lexer)))
586
+ (declare (fixnum backref-number))
587
+ (cond ((and (> backref-number (lexer-reg lexer))
588
+ (<= 10 backref-number))
589
+ ;; \10 and higher are treated as octal
590
+ ;; character codes if we haven't
591
+ ;; opened that much register groups
592
+ ;; yet
593
+ (setf (lexer-pos lexer) old-pos)
594
+ ;; re-read the number from the old
595
+ ;; position and convert it to its
596
+ ;; corresponding character
597
+ (make-char-from-code (get-number lexer :radix 8 :max-length 3)
598
+ old-pos))
599
+ (t
600
+ ;; otherwise this must refer to a
601
+ ;; backreference
602
+ (list :back-reference backref-number)))))
603
+ ((#\0)
604
+ ;; this always means an octal character code
605
+ ;; (at most three digits)
606
+ (let ((old-pos (decf (lexer-pos lexer))))
607
+ (make-char-from-code (get-number lexer :radix 8 :max-length 3)
608
+ old-pos)))
609
+ ((#\P #\p)
610
+ ;; might be a named property
611
+ (cond (*property-resolver* (read-char-property lexer next-char))
612
+ (t next-char)))
613
+ (otherwise
614
+ ;; in all other cases just unescape the
615
+ ;; character
616
+ (decf (lexer-pos lexer))
617
+ (unescape-char lexer)))))
618
+ ((#\()
619
+ ;; an open parenthesis might mean different things
620
+ ;; depending on what follows...
621
+ (cond ((looking-at-p lexer #\?)
622
+ ;; this is the case '(?' (and probably more behind)
623
+ (incf (lexer-pos lexer))
624
+ ;; we have to check for modifiers first
625
+ ;; because a colon might follow
626
+ (let* ((flags (maybe-parse-flags lexer))
627
+ (next-char (next-char-non-extended lexer)))
628
+ ;; modifiers are only allowed if a colon
629
+ ;; or a closing parenthesis are following
630
+ (when (and flags
631
+ (not (find next-char ":)" :test #'char=)))
632
+ (signal-syntax-error* (car (lexer-last-pos lexer))
633
+ "Sequence '~A' not recognized."
634
+ (subseq (lexer-str lexer)
635
+ (car (lexer-last-pos lexer))
636
+ (lexer-pos lexer))))
637
+ (case next-char
638
+ ((nil)
639
+ ;; syntax error
640
+ (signal-syntax-error "End of string following '(?'."))
641
+ ((#\))
642
+ ;; an empty group except for the flags
643
+ ;; (if there are any)
644
+ (or (and flags
645
+ (cons :flags flags))
646
+ :void))
647
+ ((#\()
648
+ ;; branch
649
+ :open-paren-paren)
650
+ ((#\>)
651
+ ;; standalone
652
+ :open-paren-greater)
653
+ ((#\=)
654
+ ;; positive look-ahead
655
+ :open-paren-equal)
656
+ ((#\!)
657
+ ;; negative look-ahead
658
+ :open-paren-exclamation)
659
+ ((#\:)
660
+ ;; non-capturing group - return flags as
661
+ ;; second value
662
+ (values :open-paren-colon flags))
663
+ ((#\<)
664
+ ;; might be a look-behind assertion or a named group, so
665
+ ;; check next character
666
+ (let ((next-char (next-char-non-extended lexer)))
667
+ (if (alpha-char-p next-char)
668
+ (progn
669
+ ;; we have encountered a named group
670
+ ;; are we supporting register naming?
671
+ (unless *allow-named-registers*
672
+ (signal-syntax-error* (1- (lexer-pos lexer))
673
+ "Character '~A' may not follow '(?<'."
674
+ next-char))
675
+ ;; put the letter back
676
+ (decf (lexer-pos lexer))
677
+ ;; named group
678
+ :open-paren-less-letter)
679
+ (case next-char
680
+ ((#\=)
681
+ ;; positive look-behind
682
+ :open-paren-less-equal)
683
+ ((#\!)
684
+ ;; negative look-behind
685
+ :open-paren-less-exclamation)
686
+ ((#\))
687
+ ;; Perl allows "(?<)" and treats
688
+ ;; it like a null string
689
+ :void)
690
+ ((nil)
691
+ ;; syntax error
692
+ (signal-syntax-error "End of string following '(?<'."))
693
+ (t
694
+ ;; also syntax error
695
+ (signal-syntax-error* (1- (lexer-pos lexer))
696
+ "Character '~A' may not follow '(?<'."
697
+ next-char ))))))
698
+ (otherwise
699
+ (signal-syntax-error* (1- (lexer-pos lexer))
700
+ "Character '~A' may not follow '(?'."
701
+ next-char)))))
702
+ (t
703
+ ;; if next-char was not #\? (this is within
704
+ ;; the first COND), we've just seen an opening
705
+ ;; parenthesis and leave it like that
706
+ :open-paren)))
707
+ (otherwise
708
+ ;; all other characters are their own tokens
709
+ next-char)))
710
+ ;; we didn't get a character (this if the "else" branch from
711
+ ;; the first IF), so we don't return a token but NIL
712
+ (t
713
+ (pop (lexer-last-pos lexer))
714
+ nil))))
715
+
716
+ (declaim (inline unget-token))
717
+ (defun unget-token (lexer)
718
+ (declare #.*standard-optimize-settings*)
719
+ "Moves the lexer back to the last position stored in the LAST-POS stack."
720
+ (if (lexer-last-pos lexer)
721
+ (setf (lexer-pos lexer)
722
+ (pop (lexer-last-pos lexer)))
723
+ (error "No token to unget \(this should not happen)")))
724
+
725
+ (declaim (inline start-of-subexpr-p))
726
+ (defun start-of-subexpr-p (lexer)
727
+ (declare #.*standard-optimize-settings*)
728
+ "Tests whether the next token can start a valid sub-expression, i.e.
729
+ a stand-alone regex."
730
+ (let* ((pos (lexer-pos lexer))
731
+ (next-char (next-char lexer)))
732
+ (not (or (null next-char)
733
+ (prog1
734
+ (member (the character next-char)
735
+ '(#\) #\|)
736
+ :test #'char=)
737
+ (setf (lexer-pos lexer) pos))))))