clucumber 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (139) hide show
  1. data/LICENSE +1 -1
  2. data/README.md +4 -9
  3. data/lib/clucumber/clucumber-bootstrap.lisp +32 -0
  4. data/lib/clucumber/vendor/cl-interpol/alias.lisp +55 -0
  5. data/lib/clucumber/vendor/cl-interpol/cl-interpol.asd +56 -0
  6. data/lib/clucumber/vendor/cl-interpol/load.lisp +53 -0
  7. data/lib/clucumber/vendor/cl-interpol/packages.lisp +40 -0
  8. data/lib/clucumber/vendor/cl-interpol/read.lisp +716 -0
  9. data/lib/clucumber/vendor/cl-interpol/specials.lisp +113 -0
  10. data/lib/clucumber/vendor/cl-interpol/test/packages.lisp +34 -0
  11. data/lib/clucumber/vendor/cl-interpol/test/tests.lisp +128 -0
  12. data/lib/clucumber/vendor/cl-interpol/test.lisp +177 -0
  13. data/lib/clucumber/vendor/cl-interpol/test2.lisp +6254 -0
  14. data/lib/clucumber/vendor/cl-interpol/unicode.lisp +13912 -0
  15. data/lib/clucumber/vendor/cl-interpol/util.lisp +122 -0
  16. data/lib/clucumber/vendor/cl-ppcre/api.lisp +1262 -0
  17. data/lib/clucumber/vendor/cl-ppcre/charmap.lisp +152 -0
  18. data/lib/clucumber/vendor/cl-ppcre/charset.lisp +242 -0
  19. data/lib/clucumber/vendor/cl-ppcre/chartest.lisp +98 -0
  20. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-test.asd +34 -0
  21. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/packages.lisp +38 -0
  22. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/resolver.lisp +61 -0
  23. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode.asd +58 -0
  24. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre.asd +79 -0
  25. data/lib/clucumber/vendor/cl-ppcre/closures.lisp +471 -0
  26. data/lib/clucumber/vendor/cl-ppcre/convert.lisp +875 -0
  27. data/lib/clucumber/vendor/cl-ppcre/errors.lisp +84 -0
  28. data/lib/clucumber/vendor/cl-ppcre/lexer.lisp +737 -0
  29. data/lib/clucumber/vendor/cl-ppcre/lispworks-defsystem.lisp +57 -0
  30. data/lib/clucumber/vendor/cl-ppcre/load.lisp +67 -0
  31. data/lib/clucumber/vendor/cl-ppcre/optimize.lisp +578 -0
  32. data/lib/clucumber/vendor/cl-ppcre/packages.lisp +68 -0
  33. data/lib/clucumber/vendor/cl-ppcre/parser.lisp +319 -0
  34. data/lib/clucumber/vendor/cl-ppcre/ppcre-tests.lisp +269 -0
  35. data/lib/clucumber/vendor/cl-ppcre/regex-class-util.lisp +555 -0
  36. data/lib/clucumber/vendor/cl-ppcre/regex-class.lisp +259 -0
  37. data/lib/clucumber/vendor/cl-ppcre/repetition-closures.lisp +833 -0
  38. data/lib/clucumber/vendor/cl-ppcre/scanner.lisp +506 -0
  39. data/lib/clucumber/vendor/cl-ppcre/specials.lisp +172 -0
  40. data/lib/clucumber/vendor/cl-ppcre/test/packages.lisp +37 -0
  41. data/lib/clucumber/vendor/cl-ppcre/test/perl-tests.lisp +150 -0
  42. data/lib/clucumber/vendor/cl-ppcre/test/tests.lisp +159 -0
  43. data/lib/clucumber/vendor/cl-ppcre/test/unicode-tests.lisp +80 -0
  44. data/lib/clucumber/vendor/cl-ppcre/util.lisp +201 -0
  45. data/lib/clucumber/vendor/cl-unicode/alias.lisp +135 -0
  46. data/lib/clucumber/vendor/cl-unicode/api.lisp +412 -0
  47. data/lib/clucumber/vendor/cl-unicode/build/char-info.lisp +133 -0
  48. data/lib/clucumber/vendor/cl-unicode/build/dump.lisp +239 -0
  49. data/lib/clucumber/vendor/cl-unicode/build/read.lisp +280 -0
  50. data/lib/clucumber/vendor/cl-unicode/build/util.lisp +182 -0
  51. data/lib/clucumber/vendor/cl-unicode/cl-unicode.asd +90 -0
  52. data/lib/clucumber/vendor/cl-unicode/conditions.lisp +54 -0
  53. data/lib/clucumber/vendor/cl-unicode/derived.lisp +120 -0
  54. data/lib/clucumber/vendor/cl-unicode/hash-tables.lisp +20 -0
  55. data/lib/clucumber/vendor/cl-unicode/lists.lisp +10 -0
  56. data/lib/clucumber/vendor/cl-unicode/methods.lisp +15 -0
  57. data/lib/clucumber/vendor/cl-unicode/packages.lisp +76 -0
  58. data/lib/clucumber/vendor/cl-unicode/specials.lisp +135 -0
  59. data/lib/clucumber/vendor/cl-unicode/test/packages.lisp +34 -0
  60. data/lib/clucumber/vendor/cl-unicode/test/tests.lisp +150 -0
  61. data/lib/clucumber/vendor/cl-unicode/test-functions.lisp +94 -0
  62. data/lib/clucumber/vendor/cl-unicode/util.lisp +274 -0
  63. data/lib/clucumber/vendor/lift/compare/fiveam-tests.lisp +14 -0
  64. data/lib/clucumber/vendor/lift/compare/lift-tests.lisp +13 -0
  65. data/lib/clucumber/vendor/lift/compare/rt-tests.lisp +16 -0
  66. data/lib/clucumber/vendor/lift/compare/xlunit-tests.lisp +15 -0
  67. data/lib/clucumber/vendor/lift/dev/changes.lisp +61 -0
  68. data/lib/clucumber/vendor/lift/dev/config.lisp +354 -0
  69. data/lib/clucumber/vendor/lift/dev/copy-file.lisp +117 -0
  70. data/lib/clucumber/vendor/lift/dev/introspection.lisp +232 -0
  71. data/lib/clucumber/vendor/lift/dev/lift-interface.lisp +56 -0
  72. data/lib/clucumber/vendor/lift/dev/lift-notes.lisp +202 -0
  73. data/lib/clucumber/vendor/lift/dev/lift-randomized.lisp +45 -0
  74. data/lib/clucumber/vendor/lift/dev/lift.lisp +2383 -0
  75. data/lib/clucumber/vendor/lift/dev/macros.lisp +229 -0
  76. data/lib/clucumber/vendor/lift/dev/measuring.lisp +156 -0
  77. data/lib/clucumber/vendor/lift/dev/packages.lisp +161 -0
  78. data/lib/clucumber/vendor/lift/dev/port.lisp +151 -0
  79. data/lib/clucumber/vendor/lift/dev/prototypes.lisp +282 -0
  80. data/lib/clucumber/vendor/lift/dev/random-testing.lisp +124 -0
  81. data/lib/clucumber/vendor/lift/dev/report-locations.lisp +13 -0
  82. data/lib/clucumber/vendor/lift/dev/reports.lisp +916 -0
  83. data/lib/clucumber/vendor/lift/dev/utilities.lisp +242 -0
  84. data/lib/clucumber/vendor/lift/docs/package.lisp +6 -0
  85. data/lib/clucumber/vendor/lift/docs/setup.lisp +17 -0
  86. data/lib/clucumber/vendor/lift/examples/basic-examples.lisp +289 -0
  87. data/lib/clucumber/vendor/lift/examples/random-testing.lisp +32 -0
  88. data/lib/clucumber/vendor/lift/lift-documentation.asd +28 -0
  89. data/lib/clucumber/vendor/lift/lift-test.asd +35 -0
  90. data/lib/clucumber/vendor/lift/lift.asd +77 -0
  91. data/lib/clucumber/vendor/lift/test/equality-tests.lisp +40 -0
  92. data/lib/clucumber/vendor/lift/test/finding-tests.lisp +32 -0
  93. data/lib/clucumber/vendor/lift/test/lift-test.lisp +783 -0
  94. data/lib/clucumber/vendor/lift/test/order-of-operations.lisp +54 -0
  95. data/lib/clucumber/vendor/lift/test/packages.lisp +17 -0
  96. data/lib/clucumber/vendor/lift/test/test-config-files.lisp +20 -0
  97. data/lib/clucumber/vendor/lift/test/test-dynamic-variables.lisp +57 -0
  98. data/lib/clucumber/vendor/lift/test/test-maximum-problems.lisp +74 -0
  99. data/lib/clucumber/vendor/lift/test/test-prototypes.lisp +278 -0
  100. data/lib/clucumber/vendor/lift/test/test-timeout.lisp +37 -0
  101. data/lib/clucumber/vendor/lift/test/tests-in-progress.lisp +62 -0
  102. data/lib/clucumber/vendor/lift/test/testsuite-expects.lisp +60 -0
  103. data/lib/clucumber/vendor/lift/timeout/package.lisp +13 -0
  104. data/lib/clucumber/vendor/lift/timeout/with-timeout.lisp +123 -0
  105. data/lib/clucumber/vendor/lift/website/stuff/Temp.lisp +34 -0
  106. data/lib/clucumber/vendor/st-json/st-json.asd +3 -0
  107. data/lib/clucumber/vendor/st-json/st-json.lisp +310 -0
  108. data/lib/clucumber/vendor/trivial-backtrace/dev/backtrace.lisp +127 -0
  109. data/lib/clucumber/vendor/trivial-backtrace/dev/fallback.lisp +10 -0
  110. data/lib/clucumber/vendor/trivial-backtrace/dev/map-backtrace.lisp +103 -0
  111. data/lib/clucumber/vendor/trivial-backtrace/dev/mucking.lisp +75 -0
  112. data/lib/clucumber/vendor/trivial-backtrace/dev/packages.lisp +13 -0
  113. data/lib/clucumber/vendor/trivial-backtrace/dev/utilities.lisp +104 -0
  114. data/lib/clucumber/vendor/trivial-backtrace/test/packages.lisp +5 -0
  115. data/lib/clucumber/vendor/trivial-backtrace/test/test-setup.lisp +4 -0
  116. data/lib/clucumber/vendor/trivial-backtrace/test/tests.lisp +16 -0
  117. data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace-test.asd +22 -0
  118. data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace.asd +35 -0
  119. data/lib/clucumber/vendor/usocket/backend/allegro.lisp +199 -0
  120. data/lib/clucumber/vendor/usocket/backend/armedbear.lisp +491 -0
  121. data/lib/clucumber/vendor/usocket/backend/clisp.lisp +260 -0
  122. data/lib/clucumber/vendor/usocket/backend/cmucl.lisp +266 -0
  123. data/lib/clucumber/vendor/usocket/backend/lispworks.lisp +741 -0
  124. data/lib/clucumber/vendor/usocket/backend/mcl.lisp +369 -0
  125. data/lib/clucumber/vendor/usocket/backend/openmcl.lisp +206 -0
  126. data/lib/clucumber/vendor/usocket/backend/sbcl.lisp +424 -0
  127. data/lib/clucumber/vendor/usocket/backend/scl.lisp +261 -0
  128. data/lib/clucumber/vendor/usocket/condition.lisp +227 -0
  129. data/lib/clucumber/vendor/usocket/package.lisp +82 -0
  130. data/lib/clucumber/vendor/usocket/server.lisp +45 -0
  131. data/lib/clucumber/vendor/usocket/test/package.lisp +13 -0
  132. data/lib/clucumber/vendor/usocket/test/test-usocket.lisp +166 -0
  133. data/lib/clucumber/vendor/usocket/usocket-test.asd +26 -0
  134. data/lib/clucumber/vendor/usocket/usocket.asd +37 -0
  135. data/lib/clucumber/vendor/usocket/usocket.lisp +542 -0
  136. data/lib/clucumber/vendor/usocket/vendor/kqueue.lisp +1 -0
  137. data/lib/clucumber/vendor/usocket/vendor/split-sequence.lisp +245 -0
  138. data/lib/clucumber.rb +29 -7
  139. metadata +151 -5
@@ -0,0 +1,1262 @@
1
+ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
2
+ ;;; $Header: /usr/local/cvsrep/cl-ppcre/api.lisp,v 1.85 2009/09/17 19:17:30 edi Exp $
3
+
4
+ ;;; The external API for creating and using scanners.
5
+
6
+ ;;; Copyright (c) 2002-2009, Dr. Edmund Weitz. All rights reserved.
7
+
8
+ ;;; Redistribution and use in source and binary forms, with or without
9
+ ;;; modification, are permitted provided that the following conditions
10
+ ;;; are met:
11
+
12
+ ;;; * Redistributions of source code must retain the above copyright
13
+ ;;; notice, this list of conditions and the following disclaimer.
14
+
15
+ ;;; * Redistributions in binary form must reproduce the above
16
+ ;;; copyright notice, this list of conditions and the following
17
+ ;;; disclaimer in the documentation and/or other materials
18
+ ;;; provided with the distribution.
19
+
20
+ ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
21
+ ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22
+ ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
+ ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
24
+ ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
26
+ ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27
+ ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28
+ ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29
+ ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30
+ ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+
32
+ (in-package :cl-ppcre)
33
+
34
+ (defgeneric create-scanner (regex &key case-insensitive-mode
35
+ multi-line-mode
36
+ single-line-mode
37
+ extended-mode
38
+ destructive)
39
+ (:documentation "Accepts a regular expression - either as a
40
+ parse-tree or as a string - and returns a scan closure which will scan
41
+ strings for this regular expression and a list mapping registers to
42
+ their names \(NIL stands for unnamed ones). The \"mode\" keyboard
43
+ arguments are equivalent to the imsx modifiers in Perl. If
44
+ DESTRUCTIVE is not NIL, the function is allowed to destructively
45
+ modify its first argument \(but only if it's a parse tree)."))
46
+
47
+ #-:use-acl-regexp2-engine
48
+ (defmethod create-scanner ((regex-string string) &key case-insensitive-mode
49
+ multi-line-mode
50
+ single-line-mode
51
+ extended-mode
52
+ destructive)
53
+ (declare #.*standard-optimize-settings*)
54
+ (declare (ignore destructive))
55
+ ;; parse the string into a parse-tree and then call CREATE-SCANNER
56
+ ;; again
57
+ (let* ((*extended-mode-p* extended-mode)
58
+ (quoted-regex-string (if *allow-quoting*
59
+ (quote-sections (clean-comments regex-string extended-mode))
60
+ regex-string))
61
+ (*syntax-error-string* (copy-seq quoted-regex-string)))
62
+ ;; wrap the result with :GROUP to avoid infinite loops for
63
+ ;; constant strings
64
+ (create-scanner (cons :group (list (parse-string quoted-regex-string)))
65
+ :case-insensitive-mode case-insensitive-mode
66
+ :multi-line-mode multi-line-mode
67
+ :single-line-mode single-line-mode
68
+ :destructive t)))
69
+
70
+ #-:use-acl-regexp2-engine
71
+ (defmethod create-scanner ((scanner function) &key case-insensitive-mode
72
+ multi-line-mode
73
+ single-line-mode
74
+ extended-mode
75
+ destructive)
76
+ (declare #.*standard-optimize-settings*)
77
+ (declare (ignore destructive))
78
+ (when (or case-insensitive-mode multi-line-mode single-line-mode extended-mode)
79
+ (signal-invocation-error "You can't use the keyword arguments to modify an existing scanner."))
80
+ scanner)
81
+
82
+ #-:use-acl-regexp2-engine
83
+ (defmethod create-scanner ((parse-tree t) &key case-insensitive-mode
84
+ multi-line-mode
85
+ single-line-mode
86
+ extended-mode
87
+ destructive)
88
+ (declare #.*standard-optimize-settings*)
89
+ (when extended-mode
90
+ (signal-invocation-error "Extended mode doesn't make sense in parse trees."))
91
+ ;; convert parse-tree into internal representation REGEX and at the
92
+ ;; same time compute the number of registers and the constant string
93
+ ;; (or anchor) the regex starts with (if any)
94
+ (unless destructive
95
+ (setq parse-tree (copy-tree parse-tree)))
96
+ (let (flags)
97
+ (if single-line-mode
98
+ (push :single-line-mode-p flags))
99
+ (if multi-line-mode
100
+ (push :multi-line-mode-p flags))
101
+ (if case-insensitive-mode
102
+ (push :case-insensitive-p flags))
103
+ (when flags
104
+ (setq parse-tree (list :group (cons :flags flags) parse-tree))))
105
+ (let ((*syntax-error-string* nil))
106
+ (multiple-value-bind (regex reg-num starts-with reg-names)
107
+ (convert parse-tree)
108
+ ;; simplify REGEX by flattening nested SEQ and ALTERNATION
109
+ ;; constructs and gathering STR objects
110
+ (let ((regex (gather-strings (flatten regex))))
111
+ ;; set the MIN-REST slots of the REPETITION objects
112
+ (compute-min-rest regex 0)
113
+ ;; set the OFFSET slots of the STR objects
114
+ (compute-offsets regex 0)
115
+ (let* (end-string-offset
116
+ end-anchored-p
117
+ ;; compute the constant string the regex ends with (if
118
+ ;; any) and at the same time set the special variables
119
+ ;; END-STRING-OFFSET and END-ANCHORED-P
120
+ (end-string (end-string regex))
121
+ ;; if we found a non-zero-length end-string we create an
122
+ ;; efficient search function for it
123
+ (end-string-test (and end-string
124
+ (plusp (len end-string))
125
+ (if (= 1 (len end-string))
126
+ (create-char-searcher
127
+ (schar (str end-string) 0)
128
+ (case-insensitive-p end-string))
129
+ (create-bmh-matcher
130
+ (str end-string)
131
+ (case-insensitive-p end-string)))))
132
+ ;; initialize the counters for CREATE-MATCHER-AUX
133
+ (*rep-num* 0)
134
+ (*zero-length-num* 0)
135
+ ;; create the actual matcher function (which does all the
136
+ ;; work of matching the regular expression) corresponding
137
+ ;; to REGEX and at the same time set the special
138
+ ;; variables *REP-NUM* and *ZERO-LENGTH-NUM*
139
+ (match-fn (create-matcher-aux regex #'identity))
140
+ ;; if the regex starts with a string we create an
141
+ ;; efficient search function for it
142
+ (start-string-test (and (typep starts-with 'str)
143
+ (plusp (len starts-with))
144
+ (if (= 1 (len starts-with))
145
+ (create-char-searcher
146
+ (schar (str starts-with) 0)
147
+ (case-insensitive-p starts-with))
148
+ (create-bmh-matcher
149
+ (str starts-with)
150
+ (case-insensitive-p starts-with))))))
151
+ (declare (special end-string-offset end-anchored-p end-string))
152
+ ;; now create the scanner and return it
153
+ (values (create-scanner-aux match-fn
154
+ (regex-min-length regex)
155
+ (or (start-anchored-p regex)
156
+ ;; a dot in single-line-mode also
157
+ ;; implicitly anchors the regex at
158
+ ;; the start, i.e. if we can't match
159
+ ;; from the first position we won't
160
+ ;; match at all
161
+ (and (typep starts-with 'everything)
162
+ (single-line-p starts-with)))
163
+ starts-with
164
+ start-string-test
165
+ ;; only mark regex as end-anchored if we
166
+ ;; found a non-zero-length string before
167
+ ;; the anchor
168
+ (and end-string-test end-anchored-p)
169
+ end-string-test
170
+ (if end-string-test
171
+ (len end-string)
172
+ nil)
173
+ end-string-offset
174
+ *rep-num*
175
+ *zero-length-num*
176
+ reg-num)
177
+ reg-names))))))
178
+
179
+ #+:use-acl-regexp2-engine
180
+ (declaim (inline create-scanner))
181
+ #+:use-acl-regexp2-engine
182
+ (defmethod create-scanner ((scanner regexp::regular-expression) &key case-insensitive-mode
183
+ multi-line-mode
184
+ single-line-mode
185
+ extended-mode
186
+ destructive)
187
+ (declare #.*standard-optimize-settings*)
188
+ (declare (ignore destructive))
189
+ (when (or case-insensitive-mode multi-line-mode single-line-mode extended-mode)
190
+ (signal-invocation-error "You can't use the keyword arguments to modify an existing scanner."))
191
+ scanner)
192
+
193
+ #+:use-acl-regexp2-engine
194
+ (defmethod create-scanner ((parse-tree t) &key case-insensitive-mode
195
+ multi-line-mode
196
+ single-line-mode
197
+ extended-mode
198
+ destructive)
199
+ (declare #.*standard-optimize-settings*)
200
+ (declare (ignore destructive))
201
+ (excl:compile-re parse-tree
202
+ :case-fold case-insensitive-mode
203
+ :ignore-whitespace extended-mode
204
+ :multiple-lines multi-line-mode
205
+ :single-line single-line-mode
206
+ :return :index))
207
+
208
+ (defgeneric scan (regex target-string &key start end real-start-pos)
209
+ (:documentation "Searches TARGET-STRING from START to END and tries
210
+ to match REGEX. On success returns four values - the start of the
211
+ match, the end of the match, and two arrays denoting the beginnings
212
+ and ends of register matches. On failure returns NIL. REGEX can be a
213
+ string which will be parsed according to Perl syntax, a parse tree, or
214
+ a pre-compiled scanner created by CREATE-SCANNER. TARGET-STRING will
215
+ be coerced to a simple string if it isn't one already. The
216
+ REAL-START-POS parameter should be ignored - it exists only for
217
+ internal purposes."))
218
+
219
+ #-:use-acl-regexp2-engine
220
+ (defmethod scan ((regex-string string) target-string
221
+ &key (start 0)
222
+ (end (length target-string))
223
+ ((:real-start-pos *real-start-pos*) nil))
224
+ (declare #.*standard-optimize-settings*)
225
+ ;; note that the scanners are optimized for simple strings so we
226
+ ;; have to coerce TARGET-STRING into one if it isn't already
227
+ (funcall (create-scanner regex-string)
228
+ (maybe-coerce-to-simple-string target-string)
229
+ start end))
230
+
231
+ #-:use-acl-regexp2-engine
232
+ (defmethod scan ((scanner function) target-string
233
+ &key (start 0)
234
+ (end (length target-string))
235
+ ((:real-start-pos *real-start-pos*) nil))
236
+ (declare #.*standard-optimize-settings*)
237
+ (funcall scanner
238
+ (maybe-coerce-to-simple-string target-string)
239
+ start end))
240
+
241
+ #-:use-acl-regexp2-engine
242
+ (defmethod scan ((parse-tree t) target-string
243
+ &key (start 0)
244
+ (end (length target-string))
245
+ ((:real-start-pos *real-start-pos*) nil))
246
+ (declare #.*standard-optimize-settings*)
247
+ (funcall (create-scanner parse-tree)
248
+ (maybe-coerce-to-simple-string target-string)
249
+ start end))
250
+
251
+ #+:use-acl-regexp2-engine
252
+ (declaim (inline scan))
253
+ #+:use-acl-regexp2-engine
254
+ (defmethod scan ((parse-tree t) target-string
255
+ &key (start 0)
256
+ (end (length target-string))
257
+ ((:real-start-pos *real-start-pos*) nil))
258
+ (declare #.*standard-optimize-settings*)
259
+ (when (< end start)
260
+ (return-from scan nil))
261
+ (let ((results (multiple-value-list (excl:match-re parse-tree target-string
262
+ :start start
263
+ :end end
264
+ :return :index))))
265
+ (declare (dynamic-extent results))
266
+ (cond ((null (first results)) nil)
267
+ (t (let* ((no-of-regs (- (length results) 2))
268
+ (reg-starts (make-array no-of-regs
269
+ :element-type '(or null fixnum)))
270
+ (reg-ends (make-array no-of-regs
271
+ :element-type '(or null fixnum)))
272
+ (match (second results)))
273
+ (loop for (start . end) in (cddr results)
274
+ for i from 0
275
+ do (setf (aref reg-starts i) start
276
+ (aref reg-ends i) end))
277
+ (values (car match) (cdr match) reg-starts reg-ends))))))
278
+
279
+ #-:cormanlisp
280
+ (define-compiler-macro scan (&whole form &environment env regex target-string &rest rest)
281
+ "Make sure that constant forms are compiled into scanners at compile time."
282
+ (cond ((constantp regex env)
283
+ `(scan (load-time-value (create-scanner ,regex))
284
+ ,target-string ,@rest))
285
+ (t form)))
286
+
287
+ (defun scan-to-strings (regex target-string &key (start 0)
288
+ (end (length target-string))
289
+ sharedp)
290
+ "Like SCAN but returns substrings of TARGET-STRING instead of
291
+ positions, i.e. this function returns two values on success: the whole
292
+ match as a string plus an array of substrings (or NILs) corresponding
293
+ to the matched registers. If SHAREDP is true, the substrings may
294
+ share structure with TARGET-STRING."
295
+ (declare #.*standard-optimize-settings*)
296
+ (multiple-value-bind (match-start match-end reg-starts reg-ends)
297
+ (scan regex target-string :start start :end end)
298
+ (unless match-start
299
+ (return-from scan-to-strings nil))
300
+ (let ((substr-fn (if sharedp #'nsubseq #'subseq)))
301
+ (values (funcall substr-fn
302
+ target-string match-start match-end)
303
+ (map 'vector
304
+ (lambda (reg-start reg-end)
305
+ (if reg-start
306
+ (funcall substr-fn
307
+ target-string reg-start reg-end)
308
+ nil))
309
+ reg-starts
310
+ reg-ends)))))
311
+
312
+ #-:cormanlisp
313
+ (define-compiler-macro scan-to-strings
314
+ (&whole form &environment env regex target-string &rest rest)
315
+ "Make sure that constant forms are compiled into scanners at compile time."
316
+ (cond ((constantp regex env)
317
+ `(scan-to-strings (load-time-value (create-scanner ,regex))
318
+ ,target-string ,@rest))
319
+ (t form)))
320
+
321
+ (defmacro register-groups-bind (var-list (regex target-string
322
+ &key start end sharedp)
323
+ &body body)
324
+ "Executes BODY with the variables in VAR-LIST bound to the
325
+ corresponding register groups after TARGET-STRING has been matched
326
+ against REGEX, i.e. each variable is either bound to a string or to
327
+ NIL. If there is no match, BODY is _not_ executed. For each element
328
+ of VAR-LIST which is NIL there's no binding to the corresponding
329
+ register group. The number of variables in VAR-LIST must not be
330
+ greater than the number of register groups. If SHAREDP is true, the
331
+ substrings may share structure with TARGET-STRING."
332
+ (with-rebinding (target-string)
333
+ (with-unique-names (match-start match-end reg-starts reg-ends
334
+ start-index substr-fn)
335
+ `(multiple-value-bind (,match-start ,match-end ,reg-starts ,reg-ends)
336
+ (scan ,regex ,target-string :start (or ,start 0)
337
+ :end (or ,end (length ,target-string)))
338
+ (declare (ignore ,match-end))
339
+ (when ,match-start
340
+ (let* ,(cons
341
+ `(,substr-fn (if ,sharedp
342
+ #'nsubseq
343
+ #'subseq))
344
+ (loop for (function var) in (normalize-var-list var-list)
345
+ for counter from 0
346
+ when var
347
+ collect `(,var (let ((,start-index
348
+ (aref ,reg-starts ,counter)))
349
+ (if ,start-index
350
+ (funcall ,function
351
+ (funcall ,substr-fn
352
+ ,target-string
353
+ ,start-index
354
+ (aref ,reg-ends ,counter)))
355
+ nil)))))
356
+ ,@body))))))
357
+
358
+ (defmacro do-scans ((match-start match-end reg-starts reg-ends regex
359
+ target-string
360
+ &optional result-form
361
+ &key start end)
362
+ &body body
363
+ &environment env)
364
+ "Iterates over TARGET-STRING and tries to match REGEX as often as
365
+ possible evaluating BODY with MATCH-START, MATCH-END, REG-STARTS, and
366
+ REG-ENDS bound to the four return values of each match in turn. After
367
+ the last match, returns RESULT-FORM if provided or NIL otherwise. An
368
+ implicit block named NIL surrounds DO-SCANS; RETURN may be used to
369
+ terminate the loop immediately. If REGEX matches an empty string the
370
+ scan is continued one position behind this match. BODY may start with
371
+ declarations."
372
+ (with-rebinding (target-string)
373
+ (with-unique-names (%start %end %regex scanner)
374
+ (declare (ignorable %regex scanner))
375
+ ;; the NIL BLOCK to enable exits via (RETURN ...)
376
+ `(block nil
377
+ (let* ((,%start (or ,start 0))
378
+ (,%end (or ,end (length ,target-string)))
379
+ ,@(unless (constantp regex env)
380
+ ;; leave constant regular expressions as they are -
381
+ ;; SCAN's compiler macro will take care of them;
382
+ ;; otherwise create a scanner unless the regex is
383
+ ;; already a function (otherwise SCAN will do this
384
+ ;; on each iteration)
385
+ `((,%regex ,regex)
386
+ (,scanner (typecase ,%regex
387
+ (function ,%regex)
388
+ (t (create-scanner ,%regex)))))))
389
+ ;; coerce TARGET-STRING to a simple string unless it is one
390
+ ;; already (otherwise SCAN will do this on each iteration)
391
+ (setq ,target-string
392
+ (maybe-coerce-to-simple-string ,target-string))
393
+ (loop
394
+ ;; invoke SCAN and bind the returned values to the
395
+ ;; provided variables
396
+ (multiple-value-bind
397
+ (,match-start ,match-end ,reg-starts ,reg-ends)
398
+ (scan ,(cond ((constantp regex env) regex)
399
+ (t scanner))
400
+ ,target-string :start ,%start :end ,%end
401
+ :real-start-pos (or ,start 0))
402
+ ;; declare the variables to be IGNORABLE to prevent the
403
+ ;; compiler from issuing warnings
404
+ (declare
405
+ (ignorable ,match-start ,match-end ,reg-starts ,reg-ends))
406
+ (unless ,match-start
407
+ ;; stop iteration on first failure
408
+ (return ,result-form))
409
+ ;; execute BODY (wrapped in LOCALLY so it can start with
410
+ ;; declarations)
411
+ (locally
412
+ ,@body)
413
+ ;; advance by one position if we had a zero-length match
414
+ (setq ,%start (if (= ,match-start ,match-end)
415
+ (1+ ,match-end)
416
+ ,match-end)))))))))
417
+
418
+ (defmacro do-matches ((match-start match-end regex
419
+ target-string
420
+ &optional result-form
421
+ &key start end)
422
+ &body body)
423
+ "Iterates over TARGET-STRING and tries to match REGEX as often as
424
+ possible evaluating BODY with MATCH-START and MATCH-END bound to the
425
+ start/end positions of each match in turn. After the last match,
426
+ returns RESULT-FORM if provided or NIL otherwise. An implicit block
427
+ named NIL surrounds DO-MATCHES; RETURN may be used to terminate the
428
+ loop immediately. If REGEX matches an empty string the scan is
429
+ continued one position behind this match. BODY may start with
430
+ declarations."
431
+ ;; this is a simplified form of DO-SCANS - we just provide two dummy
432
+ ;; vars and ignore them
433
+ (with-unique-names (reg-starts reg-ends)
434
+ `(do-scans (,match-start ,match-end
435
+ ,reg-starts ,reg-ends
436
+ ,regex ,target-string
437
+ ,result-form
438
+ :start ,start :end ,end)
439
+ ,@body)))
440
+
441
+ (defmacro do-matches-as-strings ((match-var regex
442
+ target-string
443
+ &optional result-form
444
+ &key start end sharedp)
445
+ &body body)
446
+ "Iterates over TARGET-STRING and tries to match REGEX as often as
447
+ possible evaluating BODY with MATCH-VAR bound to the substring of
448
+ TARGET-STRING corresponding to each match in turn. After the last
449
+ match, returns RESULT-FORM if provided or NIL otherwise. An implicit
450
+ block named NIL surrounds DO-MATCHES-AS-STRINGS; RETURN may be used to
451
+ terminate the loop immediately. If REGEX matches an empty string the
452
+ scan is continued one position behind this match. If SHAREDP is true,
453
+ the substrings may share structure with TARGET-STRING. BODY may start
454
+ with declarations."
455
+ (with-rebinding (target-string)
456
+ (with-unique-names (match-start match-end substr-fn)
457
+ `(let ((,substr-fn (if ,sharedp #'nsubseq #'subseq)))
458
+ ;; simple use DO-MATCHES to extract the substrings
459
+ (do-matches (,match-start ,match-end ,regex ,target-string
460
+ ,result-form :start ,start :end ,end)
461
+ (let ((,match-var
462
+ (funcall ,substr-fn
463
+ ,target-string ,match-start ,match-end)))
464
+ ,@body))))))
465
+
466
+ (defmacro do-register-groups (var-list (regex target-string
467
+ &optional result-form
468
+ &key start end sharedp)
469
+ &body body)
470
+ "Iterates over TARGET-STRING and tries to match REGEX as often as
471
+ possible evaluating BODY with the variables in VAR-LIST bound to the
472
+ corresponding register groups for each match in turn, i.e. each
473
+ variable is either bound to a string or to NIL. For each element of
474
+ VAR-LIST which is NIL there's no binding to the corresponding register
475
+ group. The number of variables in VAR-LIST must not be greater than
476
+ the number of register groups. After the last match, returns
477
+ RESULT-FORM if provided or NIL otherwise. An implicit block named NIL
478
+ surrounds DO-REGISTER-GROUPS; RETURN may be used to terminate the loop
479
+ immediately. If REGEX matches an empty string the scan is continued
480
+ one position behind this match. If SHAREDP is true, the substrings
481
+ may share structure with TARGET-STRING. BODY may start with
482
+ declarations."
483
+ (with-rebinding (target-string)
484
+ (with-unique-names (substr-fn match-start match-end
485
+ reg-starts reg-ends start-index)
486
+ `(let ((,substr-fn (if ,sharedp
487
+ #'nsubseq
488
+ #'subseq)))
489
+ (do-scans (,match-start ,match-end ,reg-starts ,reg-ends
490
+ ,regex ,target-string
491
+ ,result-form :start ,start :end ,end)
492
+ (let ,(loop for (function var) in (normalize-var-list var-list)
493
+ for counter from 0
494
+ when var
495
+ collect `(,var (let ((,start-index
496
+ (aref ,reg-starts ,counter)))
497
+ (if ,start-index
498
+ (funcall ,function
499
+ (funcall ,substr-fn
500
+ ,target-string
501
+ ,start-index
502
+ (aref ,reg-ends ,counter)))
503
+ nil))))
504
+ ,@body))))))
505
+
506
+ (defun all-matches (regex target-string
507
+ &key (start 0)
508
+ (end (length target-string)))
509
+ "Returns a list containing the start and end positions of all
510
+ matches of REGEX against TARGET-STRING, i.e. if there are N matches
511
+ the list contains (* 2 N) elements. If REGEX matches an empty string
512
+ the scan is continued one position behind this match."
513
+ (declare #.*standard-optimize-settings*)
514
+ (let (result-list)
515
+ (do-matches (match-start match-end
516
+ regex target-string
517
+ (nreverse result-list)
518
+ :start start :end end)
519
+ (push match-start result-list)
520
+ (push match-end result-list))))
521
+
522
+ #-:cormanlisp
523
+ (define-compiler-macro all-matches (&whole form &environment env regex &rest rest)
524
+ "Make sure that constant forms are compiled into scanners at
525
+ compile time."
526
+ (cond ((constantp regex env)
527
+ `(all-matches (load-time-value (create-scanner ,regex))
528
+ ,@rest))
529
+ (t form)))
530
+
531
+ (defun all-matches-as-strings (regex target-string
532
+ &key (start 0)
533
+ (end (length target-string))
534
+ sharedp)
535
+ "Returns a list containing all substrings of TARGET-STRING which
536
+ match REGEX. If REGEX matches an empty string the scan is continued
537
+ one position behind this match. If SHAREDP is true, the substrings may
538
+ share structure with TARGET-STRING."
539
+ (declare #.*standard-optimize-settings*)
540
+ (let (result-list)
541
+ (do-matches-as-strings (match regex target-string (nreverse result-list)
542
+ :start start :end end :sharedp sharedp)
543
+ (push match result-list))))
544
+
545
+ #-:cormanlisp
546
+ (define-compiler-macro all-matches-as-strings (&whole form &environment env regex &rest rest)
547
+ "Make sure that constant forms are compiled into scanners at
548
+ compile time."
549
+ (cond ((constantp regex env)
550
+ `(all-matches-as-strings
551
+ (load-time-value (create-scanner ,regex))
552
+ ,@rest))
553
+ (t form)))
554
+
555
+ (defun split (regex target-string
556
+ &key (start 0)
557
+ (end (length target-string))
558
+ limit
559
+ with-registers-p
560
+ omit-unmatched-p
561
+ sharedp)
562
+ "Matches REGEX against TARGET-STRING as often as possible and
563
+ returns a list of the substrings between the matches. If
564
+ WITH-REGISTERS-P is true, substrings corresponding to matched
565
+ registers are inserted into the list as well. If OMIT-UNMATCHED-P is
566
+ true, unmatched registers will simply be left out, otherwise they will
567
+ show up as NIL. LIMIT limits the number of elements returned -
568
+ registers aren't counted. If LIMIT is NIL \(or 0 which is
569
+ equivalent), trailing empty strings are removed from the result list.
570
+ If REGEX matches an empty string the scan is continued one position
571
+ behind this match. If SHAREDP is true, the substrings may share
572
+ structure with TARGET-STRING."
573
+ (declare #.*standard-optimize-settings*)
574
+ ;; initialize list of positions POS-LIST to extract substrings with
575
+ ;; START so that the start of the next match will mark the end of
576
+ ;; the first substring
577
+ (let ((pos-list (list start))
578
+ (counter 0))
579
+ ;; how would Larry Wall do it?
580
+ (when (eql limit 0)
581
+ (setq limit nil))
582
+ (do-scans (match-start match-end
583
+ reg-starts reg-ends
584
+ regex target-string nil
585
+ :start start :end end)
586
+ (unless (and (= match-start match-end)
587
+ (= match-start (car pos-list)))
588
+ ;; push start of match on list unless this would be an empty
589
+ ;; string adjacent to the last element pushed onto the list
590
+ (when (and limit
591
+ (>= (incf counter) limit))
592
+ (return))
593
+ (push match-start pos-list)
594
+ (when with-registers-p
595
+ ;; optionally insert matched registers
596
+ (loop for reg-start across reg-starts
597
+ for reg-end across reg-ends
598
+ if reg-start
599
+ ;; but only if they've matched
600
+ do (push reg-start pos-list)
601
+ (push reg-end pos-list)
602
+ else unless omit-unmatched-p
603
+ ;; or if we're allowed to insert NIL instead
604
+ do (push nil pos-list)
605
+ (push nil pos-list)))
606
+ ;; now end of match
607
+ (push match-end pos-list)))
608
+ ;; end of whole string
609
+ (push end pos-list)
610
+ ;; now collect substrings
611
+ (nreverse
612
+ (loop with substr-fn = (if sharedp #'nsubseq #'subseq)
613
+ with string-seen = nil
614
+ for (this-end this-start) on pos-list by #'cddr
615
+ ;; skip empty strings from end of list
616
+ if (or limit
617
+ (setq string-seen
618
+ (or string-seen
619
+ (and this-start
620
+ (> this-end this-start)))))
621
+ collect (if this-start
622
+ (funcall substr-fn
623
+ target-string this-start this-end)
624
+ nil)))))
625
+
626
+ #-:cormanlisp
627
+ (define-compiler-macro split (&whole form &environment env regex target-string &rest rest)
628
+ "Make sure that constant forms are compiled into scanners at compile time."
629
+ (cond ((constantp regex env)
630
+ `(split (load-time-value (create-scanner ,regex))
631
+ ,target-string ,@rest))
632
+ (t form)))
633
+
634
+ (defun string-case-modifier (str from to start end)
635
+ (declare #.*standard-optimize-settings*)
636
+ (declare (fixnum from to start end))
637
+ "Checks whether all words in STR between FROM and TO are upcased,
638
+ downcased or capitalized and returns a function which applies a
639
+ corresponding case modification to strings. Returns #'IDENTITY
640
+ otherwise, especially if words in the target area extend beyond FROM
641
+ or TO. STR is supposed to be bounded by START and END. It is assumed
642
+ that \(<= START FROM TO END)."
643
+ (case
644
+ (if (or (<= to from)
645
+ (and (< start from)
646
+ (alphanumericp (char str (1- from)))
647
+ (alphanumericp (char str from)))
648
+ (and (< to end)
649
+ (alphanumericp (char str to))
650
+ (alphanumericp (char str (1- to)))))
651
+ ;; if it's a zero-length string or if words extend beyond FROM
652
+ ;; or TO we return NIL, i.e. #'IDENTITY
653
+ nil
654
+ ;; otherwise we loop through STR from FROM to TO
655
+ (loop with last-char-both-case
656
+ with current-result
657
+ for index of-type fixnum from from below to
658
+ for chr = (char str index)
659
+ do (cond ((not #-:cormanlisp (both-case-p chr)
660
+ #+:cormanlisp (or (upper-case-p chr)
661
+ (lower-case-p chr)))
662
+ ;; this character doesn't have a case so we
663
+ ;; consider it as a word boundary (note that
664
+ ;; this differs from how \b works in Perl)
665
+ (setq last-char-both-case nil))
666
+ ((upper-case-p chr)
667
+ ;; an uppercase character
668
+ (setq current-result
669
+ (if last-char-both-case
670
+ ;; not the first character in a
671
+ (case current-result
672
+ ((:undecided) :upcase)
673
+ ((:downcase :capitalize) (return nil))
674
+ ((:upcase) current-result))
675
+ (case current-result
676
+ ((nil) :undecided)
677
+ ((:downcase) (return nil))
678
+ ((:capitalize :upcase) current-result)))
679
+ last-char-both-case t))
680
+ (t
681
+ ;; a lowercase character
682
+ (setq current-result
683
+ (case current-result
684
+ ((nil) :downcase)
685
+ ((:undecided) :capitalize)
686
+ ((:downcase) current-result)
687
+ ((:capitalize) (if last-char-both-case
688
+ current-result
689
+ (return nil)))
690
+ ((:upcase) (return nil)))
691
+ last-char-both-case t)))
692
+ finally (return current-result)))
693
+ ((nil) #'identity)
694
+ ((:undecided :upcase) #'string-upcase)
695
+ ((:downcase) #'string-downcase)
696
+ ((:capitalize) #'string-capitalize)))
697
+
698
+ ;; first create a scanner to identify the special parts of the
699
+ ;; replacement string (eat your own dog food...)
700
+
701
+ (defgeneric build-replacement-template (replacement-string)
702
+ (declare #.*standard-optimize-settings*)
703
+ (:documentation "Converts a replacement string for REGEX-REPLACE or
704
+ REGEX-REPLACE-ALL into a replacement template which is an
705
+ S-expression."))
706
+
707
+ #-:cormanlisp
708
+ (let* ((*use-bmh-matchers* nil)
709
+ (reg-scanner (create-scanner "\\\\(?:\\\\|\\{\\d+\\}|\\d+|&|`|')")))
710
+ (defmethod build-replacement-template ((replacement-string string))
711
+ (declare #.*standard-optimize-settings*)
712
+ (let ((from 0)
713
+ ;; COLLECTOR will hold the (reversed) template
714
+ (collector '()))
715
+ ;; scan through all special parts of the replacement string
716
+ (do-matches (match-start match-end reg-scanner replacement-string)
717
+ (when (< from match-start)
718
+ ;; strings between matches are copied verbatim
719
+ (push (subseq replacement-string from match-start) collector))
720
+ ;; PARSE-START is true if the pattern matched a number which
721
+ ;; refers to a register
722
+ (let* ((parse-start (position-if #'digit-char-p
723
+ replacement-string
724
+ :start match-start
725
+ :end match-end))
726
+ (token (if parse-start
727
+ (1- (parse-integer replacement-string
728
+ :start parse-start
729
+ :junk-allowed t))
730
+ ;; if we didn't match a number we convert the
731
+ ;; character to a symbol
732
+ (case (char replacement-string (1+ match-start))
733
+ ((#\&) :match)
734
+ ((#\`) :before-match)
735
+ ((#\') :after-match)
736
+ ((#\\) :backslash)))))
737
+ (when (and (numberp token) (< token 0))
738
+ ;; make sure we don't accept something like "\\0"
739
+ (signal-invocation-error "Illegal substring ~S in replacement string."
740
+ (subseq replacement-string match-start match-end)))
741
+ (push token collector))
742
+ ;; remember where the match ended
743
+ (setq from match-end))
744
+ (when (< from (length replacement-string))
745
+ ;; push the rest of the replacement string onto the list
746
+ (push (subseq replacement-string from) collector))
747
+ (nreverse collector))))
748
+
749
+ #-:cormanlisp
750
+ (defmethod build-replacement-template ((replacement-function function))
751
+ (declare #.*standard-optimize-settings*)
752
+ (list replacement-function))
753
+
754
+ #-:cormanlisp
755
+ (defmethod build-replacement-template ((replacement-function-symbol symbol))
756
+ (declare #.*standard-optimize-settings*)
757
+ (list replacement-function-symbol))
758
+
759
+ #-:cormanlisp
760
+ (defmethod build-replacement-template ((replacement-list list))
761
+ (declare #.*standard-optimize-settings*)
762
+ replacement-list)
763
+
764
+ ;;; Corman Lisp's methods can't be closures... :(
765
+ #+:cormanlisp
766
+ (let* ((*use-bmh-matchers* nil)
767
+ (reg-scanner (create-scanner "\\\\(?:\\\\|\\{\\d+\\}|\\d+|&|`|')")))
768
+ (defun build-replacement-template (replacement)
769
+ (declare #.*standard-optimize-settings*)
770
+ (typecase replacement
771
+ (string
772
+ (let ((from 0)
773
+ ;; COLLECTOR will hold the (reversed) template
774
+ (collector '()))
775
+ ;; scan through all special parts of the replacement string
776
+ (do-matches (match-start match-end reg-scanner replacement)
777
+ (when (< from match-start)
778
+ ;; strings between matches are copied verbatim
779
+ (push (subseq replacement from match-start) collector))
780
+ ;; PARSE-START is true if the pattern matched a number which
781
+ ;; refers to a register
782
+ (let* ((parse-start (position-if #'digit-char-p
783
+ replacement
784
+ :start match-start
785
+ :end match-end))
786
+ (token (if parse-start
787
+ (1- (parse-integer replacement
788
+ :start parse-start
789
+ :junk-allowed t))
790
+ ;; if we didn't match a number we convert the
791
+ ;; character to a symbol
792
+ (case (char replacement (1+ match-start))
793
+ ((#\&) :match)
794
+ ((#\`) :before-match)
795
+ ((#\') :after-match)
796
+ ((#\\) :backslash)))))
797
+ (when (and (numberp token) (< token 0))
798
+ ;; make sure we don't accept something like "\\0"
799
+ (signal-invocation-error "Illegal substring ~S in replacement string."
800
+ (subseq replacement match-start match-end)))
801
+ (push token collector))
802
+ ;; remember where the match ended
803
+ (setq from match-end))
804
+ (when (< from (length replacement))
805
+ ;; push the rest of the replacement string onto the list
806
+ (push (nsubseq replacement from) collector))
807
+ (nreverse collector)))
808
+ (list
809
+ replacement)
810
+ (t
811
+ (list replacement)))))
812
+
813
+ (defun build-replacement (replacement-template
814
+ target-string
815
+ start end
816
+ match-start match-end
817
+ reg-starts reg-ends
818
+ simple-calls
819
+ element-type)
820
+ (declare #.*standard-optimize-settings*)
821
+ "Accepts a replacement template and the current values from the
822
+ matching process in REGEX-REPLACE or REGEX-REPLACE-ALL and returns the
823
+ corresponding string."
824
+ ;; the upper exclusive bound of the register numbers in the regular
825
+ ;; expression
826
+ (let ((reg-bound (if reg-starts
827
+ (array-dimension reg-starts 0)
828
+ 0)))
829
+ (with-output-to-string (s nil :element-type element-type)
830
+ (loop for token in replacement-template
831
+ do (typecase token
832
+ (string
833
+ ;; transfer string parts verbatim
834
+ (write-string token s))
835
+ (integer
836
+ ;; replace numbers with the corresponding registers
837
+ (when (>= token reg-bound)
838
+ ;; but only if the register was referenced in the
839
+ ;; regular expression
840
+ (signal-invocation-error "Reference to non-existent register ~A in replacement string."
841
+ (1+ token)))
842
+ (when (svref reg-starts token)
843
+ ;; and only if it matched, i.e. no match results
844
+ ;; in an empty string
845
+ (write-string target-string s
846
+ :start (svref reg-starts token)
847
+ :end (svref reg-ends token))))
848
+ (function
849
+ (write-string
850
+ (cond (simple-calls
851
+ (apply token
852
+ (nsubseq target-string match-start match-end)
853
+ (map 'list
854
+ (lambda (reg-start reg-end)
855
+ (and reg-start
856
+ (nsubseq target-string reg-start reg-end)))
857
+ reg-starts reg-ends)))
858
+ (t
859
+ (funcall token
860
+ target-string
861
+ start end
862
+ match-start match-end
863
+ reg-starts reg-ends)))
864
+ s))
865
+ (symbol
866
+ (case token
867
+ ((:backslash)
868
+ ;; just a backslash
869
+ (write-char #\\ s))
870
+ ((:match)
871
+ ;; the whole match
872
+ (write-string target-string s
873
+ :start match-start
874
+ :end match-end))
875
+ ((:before-match)
876
+ ;; the part of the target string before the match
877
+ (write-string target-string s
878
+ :start start
879
+ :end match-start))
880
+ ((:after-match)
881
+ ;; the part of the target string after the match
882
+ (write-string target-string s
883
+ :start match-end
884
+ :end end))
885
+ (otherwise
886
+ (write-string
887
+ (cond (simple-calls
888
+ (apply token
889
+ (nsubseq target-string match-start match-end)
890
+ (map 'list
891
+ (lambda (reg-start reg-end)
892
+ (and reg-start
893
+ (nsubseq target-string reg-start reg-end)))
894
+ reg-starts reg-ends)))
895
+ (t
896
+ (funcall token
897
+ target-string
898
+ start end
899
+ match-start match-end
900
+ reg-starts reg-ends)))
901
+ s)))))))))
902
+
903
+ (defun replace-aux (target-string replacement pos-list reg-list start end
904
+ preserve-case simple-calls element-type)
905
+ "Auxiliary function used by REGEX-REPLACE and REGEX-REPLACE-ALL.
906
+ POS-LIST contains a list with the start and end positions of all
907
+ matches while REG-LIST contains a list of arrays representing the
908
+ corresponding register start and end positions."
909
+ (declare #.*standard-optimize-settings*)
910
+ ;; build the template once before we start the loop
911
+ (let ((replacement-template (build-replacement-template replacement)))
912
+ (with-output-to-string (s nil :element-type element-type)
913
+ ;; loop through all matches and take the start and end of the
914
+ ;; whole string into account
915
+ (loop for (from to) on (append (list start) pos-list (list end))
916
+ ;; alternate between replacement and no replacement
917
+ for replace = nil then (and (not replace) to)
918
+ for reg-starts = (if replace (pop reg-list) nil)
919
+ for reg-ends = (if replace (pop reg-list) nil)
920
+ for curr-replacement = (if replace
921
+ ;; build the replacement string
922
+ (build-replacement replacement-template
923
+ target-string
924
+ start end
925
+ from to
926
+ reg-starts reg-ends
927
+ simple-calls
928
+ element-type)
929
+ nil)
930
+ while to
931
+ if replace
932
+ do (write-string (if preserve-case
933
+ ;; modify the case of the replacement
934
+ ;; string if necessary
935
+ (funcall (string-case-modifier target-string
936
+ from to
937
+ start end)
938
+ curr-replacement)
939
+ curr-replacement)
940
+ s)
941
+ else
942
+ ;; no replacement
943
+ do (write-string target-string s :start from :end to)))))
944
+
945
+ (defun regex-replace (regex target-string replacement &key
946
+ (start 0)
947
+ (end (length target-string))
948
+ preserve-case
949
+ simple-calls
950
+ (element-type #+:lispworks 'lw:simple-char #-:lispworks 'character))
951
+ "Try to match TARGET-STRING between START and END against REGEX and
952
+ replace the first match with REPLACEMENT. Two values are returned;
953
+ the modified string, and T if REGEX matched or NIL otherwise.
954
+
955
+ REPLACEMENT can be a string which may contain the special substrings
956
+ \"\\&\" for the whole match, \"\\`\" for the part of TARGET-STRING
957
+ before the match, \"\\'\" for the part of TARGET-STRING after the
958
+ match, \"\\N\" or \"\\{N}\" for the Nth register where N is a positive
959
+ integer.
960
+
961
+ REPLACEMENT can also be a function designator in which case the
962
+ match will be replaced with the result of calling the function
963
+ designated by REPLACEMENT with the arguments TARGET-STRING, START,
964
+ END, MATCH-START, MATCH-END, REG-STARTS, and REG-ENDS. (REG-STARTS and
965
+ REG-ENDS are arrays holding the start and end positions of matched
966
+ registers or NIL - the meaning of the other arguments should be
967
+ obvious.)
968
+
969
+ Finally, REPLACEMENT can be a list where each element is a string,
970
+ one of the symbols :MATCH, :BEFORE-MATCH, or :AFTER-MATCH -
971
+ corresponding to \"\\&\", \"\\`\", and \"\\'\" above -, an integer N -
972
+ representing register (1+ N) -, or a function designator.
973
+
974
+ If PRESERVE-CASE is true, the replacement will try to preserve the
975
+ case (all upper case, all lower case, or capitalized) of the
976
+ match. The result will always be a fresh string, even if REGEX doesn't
977
+ match.
978
+
979
+ ELEMENT-TYPE is the element type of the resulting string."
980
+ (declare #.*standard-optimize-settings*)
981
+ (multiple-value-bind (match-start match-end reg-starts reg-ends)
982
+ (scan regex target-string :start start :end end)
983
+ (if match-start
984
+ (values (replace-aux target-string replacement
985
+ (list match-start match-end)
986
+ (list reg-starts reg-ends)
987
+ start end preserve-case
988
+ simple-calls element-type)
989
+ t)
990
+ (values (subseq target-string start end)
991
+ nil))))
992
+
993
+ #-:cormanlisp
994
+ (define-compiler-macro regex-replace
995
+ (&whole form &environment env regex target-string replacement &rest rest)
996
+ "Make sure that constant forms are compiled into scanners at compile time."
997
+ (cond ((constantp regex env)
998
+ `(regex-replace (load-time-value (create-scanner ,regex))
999
+ ,target-string ,replacement ,@rest))
1000
+ (t form)))
1001
+
1002
+ (defun regex-replace-all (regex target-string replacement &key
1003
+ (start 0)
1004
+ (end (length target-string))
1005
+ preserve-case
1006
+ simple-calls
1007
+ (element-type #+:lispworks 'lw:simple-char #-:lispworks 'character))
1008
+ "Try to match TARGET-STRING between START and END against REGEX and
1009
+ replace all matches with REPLACEMENT. Two values are returned; the
1010
+ modified string, and T if REGEX matched or NIL otherwise.
1011
+
1012
+ REPLACEMENT can be a string which may contain the special substrings
1013
+ \"\\&\" for the whole match, \"\\`\" for the part of TARGET-STRING
1014
+ before the match, \"\\'\" for the part of TARGET-STRING after the
1015
+ match, \"\\N\" or \"\\{N}\" for the Nth register where N is a positive
1016
+ integer.
1017
+
1018
+ REPLACEMENT can also be a function designator in which case the
1019
+ match will be replaced with the result of calling the function
1020
+ designated by REPLACEMENT with the arguments TARGET-STRING, START,
1021
+ END, MATCH-START, MATCH-END, REG-STARTS, and REG-ENDS. (REG-STARTS and
1022
+ REG-ENDS are arrays holding the start and end positions of matched
1023
+ registers or NIL - the meaning of the other arguments should be
1024
+ obvious.)
1025
+
1026
+ Finally, REPLACEMENT can be a list where each element is a string,
1027
+ one of the symbols :MATCH, :BEFORE-MATCH, or :AFTER-MATCH -
1028
+ corresponding to \"\\&\", \"\\`\", and \"\\'\" above -, an integer N -
1029
+ representing register (1+ N) -, or a function designator.
1030
+
1031
+ If PRESERVE-CASE is true, the replacement will try to preserve the
1032
+ case (all upper case, all lower case, or capitalized) of the
1033
+ match. The result will always be a fresh string, even if REGEX doesn't
1034
+ match.
1035
+
1036
+ ELEMENT-TYPE is the element type of the resulting string."
1037
+ (declare #.*standard-optimize-settings*)
1038
+ (let ((pos-list '())
1039
+ (reg-list '()))
1040
+ (do-scans (match-start match-end reg-starts reg-ends regex target-string
1041
+ nil
1042
+ :start start :end end)
1043
+ (push match-start pos-list)
1044
+ (push match-end pos-list)
1045
+ (push reg-starts reg-list)
1046
+ (push reg-ends reg-list))
1047
+ (if pos-list
1048
+ (values (replace-aux target-string replacement
1049
+ (nreverse pos-list)
1050
+ (nreverse reg-list)
1051
+ start end preserve-case
1052
+ simple-calls element-type)
1053
+ t)
1054
+ (values (subseq target-string start end)
1055
+ nil))))
1056
+
1057
+ #-:cormanlisp
1058
+ (define-compiler-macro regex-replace-all
1059
+ (&whole form &environment env regex target-string replacement &rest rest)
1060
+ "Make sure that constant forms are compiled into scanners at compile time."
1061
+ (cond ((constantp regex env)
1062
+ `(regex-replace-all (load-time-value (create-scanner ,regex))
1063
+ ,target-string ,replacement ,@rest))
1064
+ (t form)))
1065
+
1066
+ #-:cormanlisp
1067
+ (defmacro regex-apropos-aux ((regex packages case-insensitive &optional return-form)
1068
+ &body body)
1069
+ "Auxiliary macro used by REGEX-APROPOS and REGEX-APROPOS-LIST. Loops
1070
+ through PACKAGES and executes BODY with SYMBOL bound to each symbol
1071
+ which matches REGEX. Optionally evaluates and returns RETURN-FORM at
1072
+ the end. If CASE-INSENSITIVE is true and REGEX isn't already a
1073
+ scanner, a case-insensitive scanner is used."
1074
+ (with-rebinding (regex)
1075
+ (with-unique-names (scanner %packages next morep hash)
1076
+ `(let* ((,scanner (create-scanner ,regex
1077
+ :case-insensitive-mode
1078
+ (and ,case-insensitive
1079
+ (not (functionp ,regex)))))
1080
+ (,%packages (or ,packages
1081
+ (list-all-packages)))
1082
+ (,hash (make-hash-table :test #'eq)))
1083
+ (with-package-iterator (,next ,%packages :external :internal :inherited)
1084
+ (loop
1085
+ (multiple-value-bind (,morep symbol)
1086
+ (,next)
1087
+ (unless ,morep
1088
+ (return ,return-form))
1089
+ (unless (gethash symbol ,hash)
1090
+ (when (scan ,scanner (symbol-name symbol))
1091
+ (setf (gethash symbol ,hash) t)
1092
+ ,@body)))))))))
1093
+
1094
+ ;;; The following two functions were provided by Karsten Poeck
1095
+
1096
+ #+:cormanlisp
1097
+ (defmacro do-with-all-symbols ((variable package-or-packagelist) &body body)
1098
+ "Executes BODY with VARIABLE bound to each symbol in
1099
+ PACKAGE-OR-PACKAGELIST \(a designator for a list of packages) in
1100
+ turn."
1101
+ (with-unique-names (pack-var)
1102
+ `(if (listp ,package-or-packagelist)
1103
+ (dolist (,pack-var ,package-or-packagelist)
1104
+ (do-symbols (,variable ,pack-var)
1105
+ ,@body))
1106
+ (do-symbols (,variable ,package-or-packagelist)
1107
+ ,@body))))
1108
+
1109
+ #+:cormanlisp
1110
+ (defmacro regex-apropos-aux ((regex packages case-insensitive &optional return-form)
1111
+ &body body)
1112
+ "Auxiliary macro used by REGEX-APROPOS and REGEX-APROPOS-LIST.
1113
+ Loops through PACKAGES and executes BODY with SYMBOL bound to each
1114
+ symbol which matches REGEX. Optionally evaluates and returns
1115
+ RETURN-FORM at the end. If CASE-INSENSITIVE is true and REGEX isn't
1116
+ already a scanner, a case-insensitive scanner is used."
1117
+ (with-rebinding (regex)
1118
+ (with-unique-names (scanner %packages hash)
1119
+ `(let* ((,scanner (create-scanner ,regex
1120
+ :case-insensitive-mode
1121
+ (and ,case-insensitive
1122
+ (not (functionp ,regex)))))
1123
+ (,%packages (or ,packages
1124
+ (list-all-packages)))
1125
+ (,hash (make-hash-table :test #'eq)))
1126
+ (do-with-all-symbols (symbol ,%packages)
1127
+ (unless (gethash symbol ,hash)
1128
+ (when (scan ,scanner (symbol-name symbol))
1129
+ (setf (gethash symbol ,hash) t)
1130
+ ,@body)))
1131
+ ,return-form))))
1132
+
1133
+ (defun regex-apropos-list (regex &optional packages &key (case-insensitive t))
1134
+ (declare #.*standard-optimize-settings*)
1135
+ "Similar to the standard function APROPOS-LIST but returns a list of
1136
+ all symbols which match the regular expression REGEX. If
1137
+ CASE-INSENSITIVE is true and REGEX isn't already a scanner, a
1138
+ case-insensitive scanner is used."
1139
+ (let ((collector '()))
1140
+ (regex-apropos-aux (regex packages case-insensitive collector)
1141
+ (push symbol collector))))
1142
+
1143
+ (defun print-symbol-info (symbol)
1144
+ "Auxiliary function used by REGEX-APROPOS. Tries to print some
1145
+ meaningful information about a symbol."
1146
+ (declare #.*standard-optimize-settings*)
1147
+ (handler-case
1148
+ (let ((output-list '()))
1149
+ (cond ((special-operator-p symbol)
1150
+ (push "[special operator]" output-list))
1151
+ ((macro-function symbol)
1152
+ (push "[macro]" output-list))
1153
+ ((fboundp symbol)
1154
+ (let* ((function (symbol-function symbol))
1155
+ (compiledp (compiled-function-p function)))
1156
+ (multiple-value-bind (lambda-expr closurep)
1157
+ (function-lambda-expression function)
1158
+ (push
1159
+ (format nil
1160
+ "[~:[~;compiled ~]~:[function~;closure~]]~:[~; ~A~]"
1161
+ compiledp closurep lambda-expr (cadr lambda-expr))
1162
+ output-list)))))
1163
+ (let ((class (find-class symbol nil)))
1164
+ (when class
1165
+ (push (format nil "[class] ~S" class) output-list)))
1166
+ (cond ((keywordp symbol)
1167
+ (push "[keyword]" output-list))
1168
+ ((constantp symbol)
1169
+ (push (format nil "[constant]~:[~; value: ~S~]"
1170
+ (boundp symbol) (symbol-value symbol)) output-list))
1171
+ ((boundp symbol)
1172
+ (push #+(or :lispworks :clisp) "[variable]"
1173
+ #-(or :lispworks :clisp) (format nil "[variable] value: ~S"
1174
+ (symbol-value symbol))
1175
+ output-list)))
1176
+ #-(or :cormanlisp :clisp)
1177
+ (format t "~&~S ~<~;~^~A~@{~:@_~A~}~;~:>" symbol output-list)
1178
+ #+(or :cormanlisp :clisp)
1179
+ (loop for line in output-list
1180
+ do (format t "~&~S ~A" symbol line)))
1181
+ (condition ()
1182
+ ;; this seems to be necessary due to some errors I encountered
1183
+ ;; with LispWorks
1184
+ (format t "~&~S [an error occurred while trying to print more info]" symbol))))
1185
+
1186
+ (defun regex-apropos (regex &optional packages &key (case-insensitive t))
1187
+ "Similar to the standard function APROPOS but returns a list of all
1188
+ symbols which match the regular expression REGEX. If CASE-INSENSITIVE
1189
+ is true and REGEX isn't already a scanner, a case-insensitive scanner
1190
+ is used."
1191
+ (declare #.*standard-optimize-settings*)
1192
+ (regex-apropos-aux (regex packages case-insensitive)
1193
+ (print-symbol-info symbol))
1194
+ (values))
1195
+
1196
+ (let* ((*use-bmh-matchers* nil)
1197
+ (non-word-char-scanner (create-scanner "[^a-zA-Z_0-9]")))
1198
+ (defun quote-meta-chars (string &key (start 0) (end (length string)))
1199
+ "Quote, i.e. prefix with #\\\\, all non-word characters in STRING."
1200
+ (regex-replace-all non-word-char-scanner string "\\\\\\&"
1201
+ :start start :end end)))
1202
+
1203
+ (let* ((*use-bmh-matchers* nil)
1204
+ (*allow-quoting* nil)
1205
+ (quote-char-scanner (create-scanner "\\\\Q"))
1206
+ (section-scanner (create-scanner "\\\\Q((?:[^\\\\]|\\\\(?!Q))*?)(?:\\\\E|$)")))
1207
+ (defun quote-sections (string)
1208
+ "Replace sections inside of STRING which are enclosed by \\Q and
1209
+ \\E with the quoted equivalent of these sections \(see
1210
+ QUOTE-META-CHARS). Repeat this as long as there are such
1211
+ sections. These sections may nest."
1212
+ (flet ((quote-substring (target-string start end match-start
1213
+ match-end reg-starts reg-ends)
1214
+ (declare (ignore start end match-start match-end))
1215
+ (quote-meta-chars target-string
1216
+ :start (svref reg-starts 0)
1217
+ :end (svref reg-ends 0))))
1218
+ (loop for result = string then (regex-replace-all section-scanner
1219
+ result
1220
+ #'quote-substring)
1221
+ while (scan quote-char-scanner result)
1222
+ finally (return result)))))
1223
+
1224
+ (let* ((*use-bmh-matchers* nil)
1225
+ (comment-scanner (create-scanner "(?s)\\(\\?#.*?\\)"))
1226
+ (extended-comment-scanner (create-scanner "(?m:#.*?$)|(?s:\\(\\?#.*?\\))"))
1227
+ (quote-token-scanner "\\\\[QE]")
1228
+ (quote-token-replace-scanner "\\\\([QE])"))
1229
+ (defun clean-comments (string &optional extended-mode)
1230
+ "Clean \(?#...) comments within STRING for quoting, i.e. convert
1231
+ \\Q to Q and \\E to E. If EXTENDED-MODE is true, also clean
1232
+ end-of-line comments, i.e. those starting with #\\# and ending with
1233
+ #\\Newline."
1234
+ (flet ((remove-tokens (target-string start end match-start
1235
+ match-end reg-starts reg-ends)
1236
+ (declare (ignore start end reg-starts reg-ends))
1237
+ (loop for result = (nsubseq target-string match-start match-end)
1238
+ then (regex-replace-all quote-token-replace-scanner result "\\1")
1239
+ ;; we must probably repeat this because the comment
1240
+ ;; can contain substrings like \\Q
1241
+ while (scan quote-token-scanner result)
1242
+ finally (return result))))
1243
+ (regex-replace-all (if extended-mode
1244
+ extended-comment-scanner
1245
+ comment-scanner)
1246
+ string
1247
+ #'remove-tokens))))
1248
+
1249
+ (defun parse-tree-synonym (symbol)
1250
+ "Returns the parse tree the SYMBOL symbol is a synonym for. Returns
1251
+ NIL is SYMBOL wasn't yet defined to be a synonym."
1252
+ (get symbol 'parse-tree-synonym))
1253
+
1254
+ (defun (setf parse-tree-synonym) (new-parse-tree symbol)
1255
+ "Defines SYMBOL to be a synonm for the parse tree NEW-PARSE-TREE."
1256
+ (setf (get symbol 'parse-tree-synonym) new-parse-tree))
1257
+
1258
+ (defmacro define-parse-tree-synonym (name parse-tree)
1259
+ "Defines the symbol NAME to be a synonym for the parse tree
1260
+ PARSE-TREE. Both arguments are quoted."
1261
+ `(eval-when (:compile-toplevel :load-toplevel :execute)
1262
+ (setf (parse-tree-synonym ',name) ',parse-tree)))