clucumber 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +1 -1
- data/README.md +4 -9
- data/lib/clucumber/clucumber-bootstrap.lisp +32 -0
- data/lib/clucumber/vendor/cl-interpol/alias.lisp +55 -0
- data/lib/clucumber/vendor/cl-interpol/cl-interpol.asd +56 -0
- data/lib/clucumber/vendor/cl-interpol/load.lisp +53 -0
- data/lib/clucumber/vendor/cl-interpol/packages.lisp +40 -0
- data/lib/clucumber/vendor/cl-interpol/read.lisp +716 -0
- data/lib/clucumber/vendor/cl-interpol/specials.lisp +113 -0
- data/lib/clucumber/vendor/cl-interpol/test/packages.lisp +34 -0
- data/lib/clucumber/vendor/cl-interpol/test/tests.lisp +128 -0
- data/lib/clucumber/vendor/cl-interpol/test.lisp +177 -0
- data/lib/clucumber/vendor/cl-interpol/test2.lisp +6254 -0
- data/lib/clucumber/vendor/cl-interpol/unicode.lisp +13912 -0
- data/lib/clucumber/vendor/cl-interpol/util.lisp +122 -0
- data/lib/clucumber/vendor/cl-ppcre/api.lisp +1262 -0
- data/lib/clucumber/vendor/cl-ppcre/charmap.lisp +152 -0
- data/lib/clucumber/vendor/cl-ppcre/charset.lisp +242 -0
- data/lib/clucumber/vendor/cl-ppcre/chartest.lisp +98 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-test.asd +34 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/packages.lisp +38 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/resolver.lisp +61 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode.asd +58 -0
- data/lib/clucumber/vendor/cl-ppcre/cl-ppcre.asd +79 -0
- data/lib/clucumber/vendor/cl-ppcre/closures.lisp +471 -0
- data/lib/clucumber/vendor/cl-ppcre/convert.lisp +875 -0
- data/lib/clucumber/vendor/cl-ppcre/errors.lisp +84 -0
- data/lib/clucumber/vendor/cl-ppcre/lexer.lisp +737 -0
- data/lib/clucumber/vendor/cl-ppcre/lispworks-defsystem.lisp +57 -0
- data/lib/clucumber/vendor/cl-ppcre/load.lisp +67 -0
- data/lib/clucumber/vendor/cl-ppcre/optimize.lisp +578 -0
- data/lib/clucumber/vendor/cl-ppcre/packages.lisp +68 -0
- data/lib/clucumber/vendor/cl-ppcre/parser.lisp +319 -0
- data/lib/clucumber/vendor/cl-ppcre/ppcre-tests.lisp +269 -0
- data/lib/clucumber/vendor/cl-ppcre/regex-class-util.lisp +555 -0
- data/lib/clucumber/vendor/cl-ppcre/regex-class.lisp +259 -0
- data/lib/clucumber/vendor/cl-ppcre/repetition-closures.lisp +833 -0
- data/lib/clucumber/vendor/cl-ppcre/scanner.lisp +506 -0
- data/lib/clucumber/vendor/cl-ppcre/specials.lisp +172 -0
- data/lib/clucumber/vendor/cl-ppcre/test/packages.lisp +37 -0
- data/lib/clucumber/vendor/cl-ppcre/test/perl-tests.lisp +150 -0
- data/lib/clucumber/vendor/cl-ppcre/test/tests.lisp +159 -0
- data/lib/clucumber/vendor/cl-ppcre/test/unicode-tests.lisp +80 -0
- data/lib/clucumber/vendor/cl-ppcre/util.lisp +201 -0
- data/lib/clucumber/vendor/cl-unicode/alias.lisp +135 -0
- data/lib/clucumber/vendor/cl-unicode/api.lisp +412 -0
- data/lib/clucumber/vendor/cl-unicode/build/char-info.lisp +133 -0
- data/lib/clucumber/vendor/cl-unicode/build/dump.lisp +239 -0
- data/lib/clucumber/vendor/cl-unicode/build/read.lisp +280 -0
- data/lib/clucumber/vendor/cl-unicode/build/util.lisp +182 -0
- data/lib/clucumber/vendor/cl-unicode/cl-unicode.asd +90 -0
- data/lib/clucumber/vendor/cl-unicode/conditions.lisp +54 -0
- data/lib/clucumber/vendor/cl-unicode/derived.lisp +120 -0
- data/lib/clucumber/vendor/cl-unicode/hash-tables.lisp +20 -0
- data/lib/clucumber/vendor/cl-unicode/lists.lisp +10 -0
- data/lib/clucumber/vendor/cl-unicode/methods.lisp +15 -0
- data/lib/clucumber/vendor/cl-unicode/packages.lisp +76 -0
- data/lib/clucumber/vendor/cl-unicode/specials.lisp +135 -0
- data/lib/clucumber/vendor/cl-unicode/test/packages.lisp +34 -0
- data/lib/clucumber/vendor/cl-unicode/test/tests.lisp +150 -0
- data/lib/clucumber/vendor/cl-unicode/test-functions.lisp +94 -0
- data/lib/clucumber/vendor/cl-unicode/util.lisp +274 -0
- data/lib/clucumber/vendor/lift/compare/fiveam-tests.lisp +14 -0
- data/lib/clucumber/vendor/lift/compare/lift-tests.lisp +13 -0
- data/lib/clucumber/vendor/lift/compare/rt-tests.lisp +16 -0
- data/lib/clucumber/vendor/lift/compare/xlunit-tests.lisp +15 -0
- data/lib/clucumber/vendor/lift/dev/changes.lisp +61 -0
- data/lib/clucumber/vendor/lift/dev/config.lisp +354 -0
- data/lib/clucumber/vendor/lift/dev/copy-file.lisp +117 -0
- data/lib/clucumber/vendor/lift/dev/introspection.lisp +232 -0
- data/lib/clucumber/vendor/lift/dev/lift-interface.lisp +56 -0
- data/lib/clucumber/vendor/lift/dev/lift-notes.lisp +202 -0
- data/lib/clucumber/vendor/lift/dev/lift-randomized.lisp +45 -0
- data/lib/clucumber/vendor/lift/dev/lift.lisp +2383 -0
- data/lib/clucumber/vendor/lift/dev/macros.lisp +229 -0
- data/lib/clucumber/vendor/lift/dev/measuring.lisp +156 -0
- data/lib/clucumber/vendor/lift/dev/packages.lisp +161 -0
- data/lib/clucumber/vendor/lift/dev/port.lisp +151 -0
- data/lib/clucumber/vendor/lift/dev/prototypes.lisp +282 -0
- data/lib/clucumber/vendor/lift/dev/random-testing.lisp +124 -0
- data/lib/clucumber/vendor/lift/dev/report-locations.lisp +13 -0
- data/lib/clucumber/vendor/lift/dev/reports.lisp +916 -0
- data/lib/clucumber/vendor/lift/dev/utilities.lisp +242 -0
- data/lib/clucumber/vendor/lift/docs/package.lisp +6 -0
- data/lib/clucumber/vendor/lift/docs/setup.lisp +17 -0
- data/lib/clucumber/vendor/lift/examples/basic-examples.lisp +289 -0
- data/lib/clucumber/vendor/lift/examples/random-testing.lisp +32 -0
- data/lib/clucumber/vendor/lift/lift-documentation.asd +28 -0
- data/lib/clucumber/vendor/lift/lift-test.asd +35 -0
- data/lib/clucumber/vendor/lift/lift.asd +77 -0
- data/lib/clucumber/vendor/lift/test/equality-tests.lisp +40 -0
- data/lib/clucumber/vendor/lift/test/finding-tests.lisp +32 -0
- data/lib/clucumber/vendor/lift/test/lift-test.lisp +783 -0
- data/lib/clucumber/vendor/lift/test/order-of-operations.lisp +54 -0
- data/lib/clucumber/vendor/lift/test/packages.lisp +17 -0
- data/lib/clucumber/vendor/lift/test/test-config-files.lisp +20 -0
- data/lib/clucumber/vendor/lift/test/test-dynamic-variables.lisp +57 -0
- data/lib/clucumber/vendor/lift/test/test-maximum-problems.lisp +74 -0
- data/lib/clucumber/vendor/lift/test/test-prototypes.lisp +278 -0
- data/lib/clucumber/vendor/lift/test/test-timeout.lisp +37 -0
- data/lib/clucumber/vendor/lift/test/tests-in-progress.lisp +62 -0
- data/lib/clucumber/vendor/lift/test/testsuite-expects.lisp +60 -0
- data/lib/clucumber/vendor/lift/timeout/package.lisp +13 -0
- data/lib/clucumber/vendor/lift/timeout/with-timeout.lisp +123 -0
- data/lib/clucumber/vendor/lift/website/stuff/Temp.lisp +34 -0
- data/lib/clucumber/vendor/st-json/st-json.asd +3 -0
- data/lib/clucumber/vendor/st-json/st-json.lisp +310 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/backtrace.lisp +127 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/fallback.lisp +10 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/map-backtrace.lisp +103 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/mucking.lisp +75 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/packages.lisp +13 -0
- data/lib/clucumber/vendor/trivial-backtrace/dev/utilities.lisp +104 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/packages.lisp +5 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/test-setup.lisp +4 -0
- data/lib/clucumber/vendor/trivial-backtrace/test/tests.lisp +16 -0
- data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace-test.asd +22 -0
- data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace.asd +35 -0
- data/lib/clucumber/vendor/usocket/backend/allegro.lisp +199 -0
- data/lib/clucumber/vendor/usocket/backend/armedbear.lisp +491 -0
- data/lib/clucumber/vendor/usocket/backend/clisp.lisp +260 -0
- data/lib/clucumber/vendor/usocket/backend/cmucl.lisp +266 -0
- data/lib/clucumber/vendor/usocket/backend/lispworks.lisp +741 -0
- data/lib/clucumber/vendor/usocket/backend/mcl.lisp +369 -0
- data/lib/clucumber/vendor/usocket/backend/openmcl.lisp +206 -0
- data/lib/clucumber/vendor/usocket/backend/sbcl.lisp +424 -0
- data/lib/clucumber/vendor/usocket/backend/scl.lisp +261 -0
- data/lib/clucumber/vendor/usocket/condition.lisp +227 -0
- data/lib/clucumber/vendor/usocket/package.lisp +82 -0
- data/lib/clucumber/vendor/usocket/server.lisp +45 -0
- data/lib/clucumber/vendor/usocket/test/package.lisp +13 -0
- data/lib/clucumber/vendor/usocket/test/test-usocket.lisp +166 -0
- data/lib/clucumber/vendor/usocket/usocket-test.asd +26 -0
- data/lib/clucumber/vendor/usocket/usocket.asd +37 -0
- data/lib/clucumber/vendor/usocket/usocket.lisp +542 -0
- data/lib/clucumber/vendor/usocket/vendor/kqueue.lisp +1 -0
- data/lib/clucumber/vendor/usocket/vendor/split-sequence.lisp +245 -0
- data/lib/clucumber.rb +29 -7
- metadata +151 -5
@@ -0,0 +1,239 @@
|
|
1
|
+
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-
|
2
|
+
;;; $Header: /usr/local/cvsrep/cl-unicode/build/dump.lisp,v 1.37 2008/07/21 23:02:25 edi Exp $
|
3
|
+
|
4
|
+
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
|
5
|
+
|
6
|
+
;;; Redistribution and use in source and binary forms, with or without
|
7
|
+
;;; modification, are permitted provided that the following conditions
|
8
|
+
;;; are met:
|
9
|
+
|
10
|
+
;;; * Redistributions of source code must retain the above copyright
|
11
|
+
;;; notice, this list of conditions and the following disclaimer.
|
12
|
+
|
13
|
+
;;; * Redistributions in binary form must reproduce the above
|
14
|
+
;;; copyright notice, this list of conditions and the following
|
15
|
+
;;; disclaimer in the documentation and/or other materials
|
16
|
+
;;; provided with the distribution.
|
17
|
+
|
18
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
19
|
+
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
20
|
+
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
21
|
+
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
22
|
+
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
24
|
+
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
25
|
+
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
26
|
+
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
27
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
28
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
|
30
|
+
(in-package :cl-unicode)
|
31
|
+
|
32
|
+
(defun build-range-list (reader test)
|
33
|
+
"Loops through all non-NIL elements of *CHAR-DATABASE*, looks at
|
34
|
+
each element using the reader READER and builds an alist of the form
|
35
|
+
\((X1 . A1) (X2 . A2)) where the Xi are code points and the Ai are the
|
36
|
+
corresponding attributes as returned by READER. The interesting part
|
37
|
+
is that the Xi can also be code point ranges like \(Y1 . Y2) meaning
|
38
|
+
that all code points from Y1 to \(including) Y2 have the same
|
39
|
+
attribute Ai and the function tries to make the alist as short as
|
40
|
+
possible, i.e. to partition it into as few ranges as possible.
|
41
|
+
Whether two attributes are the same is determined by the test function
|
42
|
+
TEST. The resulting list is sorted."
|
43
|
+
(let (range-list
|
44
|
+
(last-attribute (funcall reader (aref *char-database* 0)))
|
45
|
+
(last-code-point 0)
|
46
|
+
(code-point 0))
|
47
|
+
(flet ((add ()
|
48
|
+
"Adds the range from LAST-CODE-POINT to \(excluding)
|
49
|
+
CODE-POINT with the attribute LAST-ATTRIBUTE to the result."
|
50
|
+
(push (cons (cons last-code-point (1- code-point))
|
51
|
+
last-attribute)
|
52
|
+
range-list)))
|
53
|
+
(loop
|
54
|
+
(incf code-point)
|
55
|
+
(when (= code-point #.(1- +code-point-limit+))
|
56
|
+
(add)
|
57
|
+
(return))
|
58
|
+
(let* ((char-info (aref *char-database* code-point))
|
59
|
+
(attribute (and char-info (funcall reader char-info))))
|
60
|
+
(unless (funcall test attribute last-attribute)
|
61
|
+
(add)
|
62
|
+
(setq last-attribute attribute
|
63
|
+
last-code-point code-point)))))
|
64
|
+
(nreverse range-list)))
|
65
|
+
|
66
|
+
(defun split-range-list (range-list)
|
67
|
+
"Recursively splits a range list as returned by BUILD-RANGE-LIST in
|
68
|
+
the middle and thus converts it into a binary search tree which can be
|
69
|
+
used by the TREE-LOOKUP function."
|
70
|
+
(let ((length (length range-list)))
|
71
|
+
(cond ((zerop length) nil)
|
72
|
+
(t (let ((middle (round (1- length) 2)))
|
73
|
+
(list (nth middle range-list)
|
74
|
+
(split-range-list (subseq range-list 0 middle))
|
75
|
+
(split-range-list (subseq range-list (1+ middle)))))))))
|
76
|
+
|
77
|
+
(defun build-tree (reader &optional (test #'eq))
|
78
|
+
"Uses BUILD-RANGE-LIST and SPLIT-RANGE-LIST to build a binary search
|
79
|
+
tree for READER which is one of the readers of the CHAR-INFO class.
|
80
|
+
Attributes are compared with TEST."
|
81
|
+
(split-range-list (build-range-list reader test)))
|
82
|
+
|
83
|
+
(defun dump-method (name reader stream &optional (test #'eq test-provided-p))
|
84
|
+
"Writes a method definition for a unary method called NAME
|
85
|
+
specialized for integer \(code points) to the stream STREAM which
|
86
|
+
returns a value equivalent to
|
87
|
+
|
88
|
+
\(APPLY READER \(AREF *CHAR-DATABASE* <code-point>))
|
89
|
+
|
90
|
+
but uses compact binary search trees instead of the *CHAR-DATABASE*
|
91
|
+
array. TEST is used by BUILD-TREE to decide whether two adjacent
|
92
|
+
characters have the same attribute. If TEST isn't provided, it is
|
93
|
+
assumed that the attribute is a property symbol and the method will
|
94
|
+
return two values - the symbol and the canonical name of the symbol."
|
95
|
+
(let ((definition (if test-provided-p
|
96
|
+
`(defmethod ,name ((code-point integer))
|
97
|
+
(tree-lookup code-point ',(build-tree reader test)))
|
98
|
+
`(defmethod ,name ((code-point integer))
|
99
|
+
(let ((symbol (tree-lookup code-point ',(build-tree reader))))
|
100
|
+
(values (property-name symbol) symbol))))))
|
101
|
+
(print definition stream)))
|
102
|
+
|
103
|
+
(defmacro with-output-to-source-file ((stream relative-path &key no-header-p) &body body)
|
104
|
+
"Executes BODY with STREAM bound to an output file stream which
|
105
|
+
writes to the file denoted by RELATIVE-PATH - a path relative to the
|
106
|
+
location of this source file. Writes a Lisp header to the files
|
107
|
+
unless NO-HEADER-P is true."
|
108
|
+
`(let ((pathname (merge-pathnames ,relative-path *this-file*)))
|
109
|
+
(format t "~&;;; Writing source file ~A" (file-namestring pathname))
|
110
|
+
(force-output)
|
111
|
+
(with-open-file (,stream pathname
|
112
|
+
:direction :output
|
113
|
+
:if-exists :supersede)
|
114
|
+
(with-standard-io-syntax ()
|
115
|
+
(let ((*package* (find-package :cl-unicode)))
|
116
|
+
,@(unless no-header-p
|
117
|
+
'((format out ";;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-~%")))
|
118
|
+
(format out ";;; This file was auto-generated by the BUILD-CL-UNICODE system~2%")
|
119
|
+
,@(unless no-header-p
|
120
|
+
'((print '(in-package :cl-unicode) out)))
|
121
|
+
,@body)))))
|
122
|
+
|
123
|
+
(defun dump-methods ()
|
124
|
+
"Dumps several methods to the CL-UNICODE source file methods.lisp
|
125
|
+
using DUMP-METHOD."
|
126
|
+
(with-output-to-source-file (out "../methods.lisp")
|
127
|
+
(dump-method 'script 'script* out)
|
128
|
+
(dump-method 'code-block 'code-block* out)
|
129
|
+
(dump-method 'age 'age* out #'equal)
|
130
|
+
(dump-method 'general-category 'general-category* out)
|
131
|
+
(dump-method 'bidi-class 'bidi-class* out)
|
132
|
+
(dump-method 'numeric-type 'numeric-type* out)
|
133
|
+
(dump-method 'numeric-value 'numeric-value* out #'eql)
|
134
|
+
(dump-method 'combining-class 'combining-class* out #'eql)
|
135
|
+
(dump-method 'bidi-mirroring-glyph% 'bidi-mirroring-glyph* out #'eql)
|
136
|
+
(dump-method 'binary-props 'binary-props* out #'equal)))
|
137
|
+
|
138
|
+
(defun dump-hash-table (hash-table-name stream)
|
139
|
+
"Writes code to the STREAM which reinitializes the hash table
|
140
|
+
contained in the global special variable named HASH-TABLE-NAME to its
|
141
|
+
current state. It is assumed that all keys and values of the hash
|
142
|
+
table can be printed readably."
|
143
|
+
(print `(clrhash ,hash-table-name) stream)
|
144
|
+
(let ((key-value-alist
|
145
|
+
(loop for key being the hash-keys of (symbol-value hash-table-name)
|
146
|
+
using (hash-value value)
|
147
|
+
when value
|
148
|
+
collect (cons key value))))
|
149
|
+
(print `(loop for (key . value) in ',key-value-alist
|
150
|
+
do (setf (gethash key ,hash-table-name) value))
|
151
|
+
stream)))
|
152
|
+
|
153
|
+
(defun dump-hash-tables ()
|
154
|
+
"Dumps several hash tables to the CL-UNICODE source file
|
155
|
+
hash-tables.lisp using DUMP-HASH-TABLE."
|
156
|
+
(with-output-to-source-file (out "../hash-tables.lisp")
|
157
|
+
(dump-hash-table '*canonical-names* out)
|
158
|
+
(dump-hash-table '*names-to-code-points* out)
|
159
|
+
(dump-hash-table '*code-points-to-names* out)
|
160
|
+
(dump-hash-table '*unicode1-names-to-code-points* out)
|
161
|
+
(dump-hash-table '*code-points-to-unicode1-names* out)
|
162
|
+
(dump-hash-table '*case-mappings* out)
|
163
|
+
(dump-hash-table '*jamo-short-names* out)
|
164
|
+
;; finally add code which adds the computed Hangul syllable names
|
165
|
+
;; at load time
|
166
|
+
(print '(add-hangul-names) out)))
|
167
|
+
|
168
|
+
(defun dump-list (list-name stream)
|
169
|
+
"Writes code to the STREAM which reinitializes the list contained in
|
170
|
+
the global special variable named LIST-NAME to its current state. It
|
171
|
+
is assumed that all elements of the list can be printed readably."
|
172
|
+
(print `(setq ,list-name ',(symbol-value list-name)) stream))
|
173
|
+
|
174
|
+
(defun dump-lists ()
|
175
|
+
"Dumps several list to the CL-UNICODE source file lists.lisp using
|
176
|
+
DUMP-LIST."
|
177
|
+
(with-output-to-source-file (out "../lists.lisp")
|
178
|
+
(dump-list '*general-categories* out)
|
179
|
+
(dump-list '*scripts* out)
|
180
|
+
(dump-list '*code-blocks* out)
|
181
|
+
(dump-list '*binary-properties* out)
|
182
|
+
(dump-list '*bidi-classes* out)))
|
183
|
+
|
184
|
+
(defun dump-derived-tests ()
|
185
|
+
"Parses the Unicode data file \"DerivedCoreProperties.txt\" \(which
|
186
|
+
is not used in read.lisp) and uses it to create a file
|
187
|
+
\"derived-properties\" which will be used by CL-UNICODE-TEST."
|
188
|
+
(with-output-to-source-file (out (make-pathname :name "derived-properties"
|
189
|
+
:type :unspecific
|
190
|
+
:directory '(:relative :up "test"))
|
191
|
+
:no-header-p t)
|
192
|
+
(let (last-test)
|
193
|
+
(labels ((really-add-test (test)
|
194
|
+
"Writes the test designator from ADD-TEST in a
|
195
|
+
\"delayed\" fashion to make sure that the previous test \(which hasn't
|
196
|
+
been written to disk yet) doesn't contradict the current test. This
|
197
|
+
is necessary because the file we're parsing contains several adjacent
|
198
|
+
ranges."
|
199
|
+
(when last-test
|
200
|
+
(unless (= (first last-test) (first test))
|
201
|
+
(print last-test out)))
|
202
|
+
(setq last-test test))
|
203
|
+
(add-test (code-point property &optional (successp t))
|
204
|
+
"Writes a test designator \(used by the function
|
205
|
+
CL-UNICODE-TEST::PROPERTY-TESTS) for a check whether CODE-POINT has
|
206
|
+
the property PROPERTY to the stream OUT, but only does this if
|
207
|
+
CODE-POINT is below +CODE-POINT-LIMIT+. Tests for the inverse if
|
208
|
+
SUCCESSP is NIL. The test designator isn't actually written to the
|
209
|
+
stream, though, but handed over to REALLY-ADD-TEST."
|
210
|
+
(when (< code-point +code-point-limit+)
|
211
|
+
(really-add-test `(,code-point ,property ,successp)))))
|
212
|
+
(with-unicode-file ((code-point-range property) "DerivedCoreProperties.txt")
|
213
|
+
(cond ((atom code-point-range)
|
214
|
+
(add-test code-point-range property)
|
215
|
+
(add-test (1+ code-point-range) property nil))
|
216
|
+
(t
|
217
|
+
(add-test (car code-point-range) property)
|
218
|
+
(add-test (cdr code-point-range) property)
|
219
|
+
(add-test (1+ (cdr code-point-range)) property nil))))
|
220
|
+
(print last-test out)))))
|
221
|
+
|
222
|
+
(defun dump-data-structures ()
|
223
|
+
"Dumps all the information contained in *CHAR-DATABASE* and the
|
224
|
+
related hash tables and lists to the corresponding Lisp and test
|
225
|
+
source files."
|
226
|
+
(dump-methods)
|
227
|
+
(dump-hash-tables)
|
228
|
+
(dump-lists)
|
229
|
+
(dump-derived-tests)
|
230
|
+
(setq *char-database* nil))
|
231
|
+
|
232
|
+
(defun create-source-files ()
|
233
|
+
"Combines BUILD-DATA-STRUCTURES and DUMP-DATA-STRUCTURES to create
|
234
|
+
the \"missing\" CL-UNICODE source files."
|
235
|
+
(build-data-structures)
|
236
|
+
(dump-data-structures)
|
237
|
+
(setq *char-database* nil))
|
238
|
+
|
239
|
+
(create-source-files)
|
@@ -0,0 +1,280 @@
|
|
1
|
+
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-
|
2
|
+
;;; $Header: /usr/local/cvsrep/cl-unicode/build/read.lisp,v 1.32 2008/07/22 02:42:15 edi Exp $
|
3
|
+
|
4
|
+
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
|
5
|
+
|
6
|
+
;;; Redistribution and use in source and binary forms, with or without
|
7
|
+
;;; modification, are permitted provided that the following conditions
|
8
|
+
;;; are met:
|
9
|
+
|
10
|
+
;;; * Redistributions of source code must retain the above copyright
|
11
|
+
;;; notice, this list of conditions and the following disclaimer.
|
12
|
+
|
13
|
+
;;; * Redistributions in binary form must reproduce the above
|
14
|
+
;;; copyright notice, this list of conditions and the following
|
15
|
+
;;; disclaimer in the documentation and/or other materials
|
16
|
+
;;; provided with the distribution.
|
17
|
+
|
18
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
19
|
+
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
20
|
+
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
21
|
+
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
22
|
+
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
24
|
+
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
25
|
+
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
26
|
+
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
27
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
28
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
|
30
|
+
(in-package :cl-unicode)
|
31
|
+
|
32
|
+
(defmacro with-unicode-file (((&rest bindings) file-name &optional two-line-ranges) &body body)
|
33
|
+
"Utility macro to parse a file which is formatted as described in
|
34
|
+
<http://unicode.org/Public/UNIDATA/UCD.html#UCD_File_Format>. The
|
35
|
+
file named FILE-NAME is searched for in the directory \"data/\"
|
36
|
+
relative to this source file. The code then iterates through the file
|
37
|
+
and executes BODY for each non-comment line binding the variables in
|
38
|
+
BINDINGS to the parsed fields of the line. For the details of
|
39
|
+
BINDINGS see the EXTRACT-FOO functions in util.lisp or the usage of
|
40
|
+
this macro below. If TWO-LINE-RANGES is true, then the macro expects
|
41
|
+
a file like \"UnicodeData.txt\" where ranges aren't denoted as usual
|
42
|
+
but rather using <..., First> and <..., Last>."
|
43
|
+
(let ((variables (extract-variables bindings))
|
44
|
+
(types (extract-types bindings)))
|
45
|
+
`(let ((pathname (merge-pathnames ,file-name (merge-pathnames "data/" *this-file*))))
|
46
|
+
(format t "~&;;; Parsing Unicode file ~A" (file-namestring pathname))
|
47
|
+
(force-output)
|
48
|
+
(with-open-file (binary-in pathname :element-type 'flex:octet)
|
49
|
+
;; Unicode data files must be read as UTF-8
|
50
|
+
(let ((in (flex:make-flexi-stream binary-in :external-format '(:utf-8 :eol-style :lf))))
|
51
|
+
(loop
|
52
|
+
(flet ((get-line-contents ()
|
53
|
+
(let ((line (or (read-line in nil) (return))))
|
54
|
+
(and (not (ppcre:scan "^\\s*(?:#|$)" line))
|
55
|
+
(ppcre:split "\\s*#.*$|\\s*;\\s*" line :limit most-positive-fixnum)))))
|
56
|
+
(let ((contents (get-line-contents)))
|
57
|
+
(when contents
|
58
|
+
(destructuring-bind ,variables
|
59
|
+
(parse-one-line contents ',types (list ,@(extract-defaults bindings)))
|
60
|
+
,@(when two-line-ranges
|
61
|
+
`((when (ppcre:scan "^<.*, First>$" ,(second variables))
|
62
|
+
(let ((range-end (first (parse-one-line (list (first (get-line-contents)))))))
|
63
|
+
(setq ,(first variables) (cons ,(first variables) range-end))))))
|
64
|
+
,@body))))))))))
|
65
|
+
|
66
|
+
(defmacro with-code-point-range ((var range) &body body)
|
67
|
+
"Utility macro which executes BODY with VAR bound to each code point
|
68
|
+
in RANGE in turn. VAR can either be an integer \(for one code point)
|
69
|
+
or a cons of two integers \(for an inclusive range)."
|
70
|
+
(with-rebinding (range)
|
71
|
+
`(flet ((thunk (,var) ,@body))
|
72
|
+
(cond ((atom ,range) (thunk ,range))
|
73
|
+
(t (loop for point from (car ,range) to (cdr ,range)
|
74
|
+
do (thunk point)))))))
|
75
|
+
|
76
|
+
(defun read-character-data ()
|
77
|
+
"Parses the file \"UnicodeData.txt\" and generates one CHAR-INFO
|
78
|
+
entry per code point which is stored in *CHAR-DATABASE*."
|
79
|
+
;; by definition, we'll never see this property in the file, so we
|
80
|
+
;; have to add it to *GENERAL-CATEGORIES* explicitly
|
81
|
+
(setq *general-categories* (list '#.(property-symbol "Cn")))
|
82
|
+
(with-unicode-file ((code-point-range
|
83
|
+
name
|
84
|
+
(general-category symbol)
|
85
|
+
(combining-class integer)
|
86
|
+
(bidi-class symbol)
|
87
|
+
;; decomposition mapping, ignored for now
|
88
|
+
_
|
89
|
+
(decimal-digit integer nil)
|
90
|
+
(digit integer nil)
|
91
|
+
(numeric rational nil)
|
92
|
+
(bidi-mirrored boolean)
|
93
|
+
(unicode1-name string nil)
|
94
|
+
;; ISO comment, ignored
|
95
|
+
_
|
96
|
+
(uppercase-mapping hex nil)
|
97
|
+
(lowercase-mapping hex nil)
|
98
|
+
(titlecase-mapping hex nil))
|
99
|
+
"UnicodeData.txt" t)
|
100
|
+
(pushnew general-category *general-categories* :test #'eq)
|
101
|
+
(pushnew bidi-class *bidi-classes* :test #'eq)
|
102
|
+
;; if the name starts with #\<, it's not really a name but denotes
|
103
|
+
;; a range - some of these names (CJK unified ideographs and
|
104
|
+
;; Hangul syllables) will be computed later, the others are NIL
|
105
|
+
(when (char= (char name 0) #\<)
|
106
|
+
(setq name nil))
|
107
|
+
(with-code-point-range (code-point code-point-range)
|
108
|
+
(setf (aref *char-database* code-point)
|
109
|
+
(make-instance 'char-info
|
110
|
+
:code-point code-point
|
111
|
+
:name name
|
112
|
+
:general-category general-category
|
113
|
+
:combining-class combining-class
|
114
|
+
:bidi-class bidi-class
|
115
|
+
:numeric-type (cond (decimal-digit '#.(property-symbol "Decimal"))
|
116
|
+
(digit '#.(property-symbol "Digit"))
|
117
|
+
(numeric '#.(property-symbol "Numeric")))
|
118
|
+
:numeric-value numeric
|
119
|
+
:binary-props (and bidi-mirrored
|
120
|
+
(list '#.(property-symbol "BidiMirrored")))
|
121
|
+
:unicode1-name unicode1-name
|
122
|
+
:uppercase-mapping uppercase-mapping
|
123
|
+
:lowercase-mapping lowercase-mapping
|
124
|
+
:titlecase-mapping titlecase-mapping)))))
|
125
|
+
|
126
|
+
(defun read-scripts ()
|
127
|
+
"Parses the file \"Scripts.txt\" and adds the information about the
|
128
|
+
script to the corresponding entries in *CHAR-DATABASE*."
|
129
|
+
(with-unicode-file ((code-point-range (script symbol)) "Scripts.txt")
|
130
|
+
(pushnew script *scripts* :test #'eq)
|
131
|
+
(with-code-point-range (code-point code-point-range)
|
132
|
+
(let ((char-info (aref *char-database* code-point)))
|
133
|
+
(when char-info
|
134
|
+
(setf (script* char-info) script))))))
|
135
|
+
|
136
|
+
(defun read-code-blocks ()
|
137
|
+
"Parses the file \"Blocks.txt\" and adds the information about the
|
138
|
+
code block to the corresponding entries in *CHAR-DATABASE*."
|
139
|
+
(with-unicode-file ((code-point-range (code-block symbol)) "Blocks.txt")
|
140
|
+
(pushnew code-block *code-blocks* :test #'eq)
|
141
|
+
(with-code-point-range (code-point code-point-range)
|
142
|
+
(let ((char-info (aref *char-database* code-point)))
|
143
|
+
(when char-info
|
144
|
+
(setf (code-block* char-info) code-block))))))
|
145
|
+
|
146
|
+
(defun read-binary-properties ()
|
147
|
+
"Parses the file \"PropList.txt\" and adds information about binary
|
148
|
+
properties to the corresponding entries in *CHAR-DATABASE*."
|
149
|
+
;; this property was derived from UnicodeData.txt already
|
150
|
+
(setq *binary-properties* (list '#.(property-symbol "BidiMirrored")))
|
151
|
+
(with-unicode-file ((code-point-range (property symbol)) "PropList.txt")
|
152
|
+
;; we don't need this information as we derive it from a code
|
153
|
+
;; point not being mentioned in UnicodeData.txt - see also the
|
154
|
+
;; initform for GENERAL-CATEGORY in the definition of CHAR-INFO
|
155
|
+
(unless (eq property '#.(property-symbol "NoncharacterCodePoint"))
|
156
|
+
(pushnew property *binary-properties* :test #'eq)
|
157
|
+
(with-code-point-range (code-point code-point-range)
|
158
|
+
(let ((char-info (aref *char-database* code-point)))
|
159
|
+
(unless char-info
|
160
|
+
;; this file actually contains some information for
|
161
|
+
;; unassigned (but reserved) code points, like e.g. #xfff0
|
162
|
+
(setf char-info (make-instance 'char-info :code-point code-point)
|
163
|
+
(aref *char-database* code-point) char-info))
|
164
|
+
(push property (binary-props* char-info)))))))
|
165
|
+
|
166
|
+
(defun read-derived-age ()
|
167
|
+
"Parses the file \"DerivedAge.txt\" and adds information about the
|
168
|
+
\"age\" to the corresponding entries in *CHAR-DATABASE*."
|
169
|
+
(with-unicode-file ((code-point-range (age age)) "DerivedAge.txt")
|
170
|
+
(with-code-point-range (code-point code-point-range)
|
171
|
+
(let ((char-info (aref *char-database* code-point)))
|
172
|
+
(when char-info
|
173
|
+
(setf (age* char-info) age))))))
|
174
|
+
|
175
|
+
(defun read-mirroring-glyphs ()
|
176
|
+
"Parses the file \"BidiMirroring.txt\" and adds information about
|
177
|
+
mirroring glyphs to the corresponding entries in *CHAR-DATABASE*."
|
178
|
+
(with-unicode-file ((code-point-range (mirroring-glyph hex)) "BidiMirroring.txt")
|
179
|
+
(with-code-point-range (code-point code-point-range)
|
180
|
+
(let ((char-info (aref *char-database* code-point)))
|
181
|
+
(when char-info
|
182
|
+
(setf (bidi-mirroring-glyph* char-info) mirroring-glyph))))))
|
183
|
+
|
184
|
+
(defun read-jamo ()
|
185
|
+
"Parses the file \"Jamo.txt\" and stores information about Jamo
|
186
|
+
short names in the *JAMO-SHORT-NAMES* hash table. This information is
|
187
|
+
later used to compute Hangul syllable names."
|
188
|
+
(clrhash *jamo-short-names*)
|
189
|
+
(with-unicode-file ((code-point-range (short-name string "")) "Jamo.txt")
|
190
|
+
(with-code-point-range (code-point code-point-range)
|
191
|
+
(setf (gethash code-point *jamo-short-names*) short-name))))
|
192
|
+
|
193
|
+
(defun default-bidi-class (char-info)
|
194
|
+
"Returns the default Bidi class for the character described by the
|
195
|
+
CHAR-INFO object CHAR-INFO. The default is computed as explained in
|
196
|
+
<http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt>."
|
197
|
+
(let ((code-point (code-point char-info)))
|
198
|
+
(cond ((and (or (<= #x0600 code-point #x07BF)
|
199
|
+
(<= #xFB50 code-point #xFDFF)
|
200
|
+
(<= #xFE70 code-point #xFEFF))
|
201
|
+
(not (find '#.(property-symbol "NoncharacterCodePoint")
|
202
|
+
(binary-props* code-point))))
|
203
|
+
'#.(property-symbol "AL"))
|
204
|
+
((or (<= #x0590 code-point #x05FF)
|
205
|
+
(<= #x07C0 code-point #x08ff)
|
206
|
+
(<= #xFB1D code-point #xFB4F)
|
207
|
+
(<= #x10800 code-point #x10FFF))
|
208
|
+
'#.(property-symbol "R"))
|
209
|
+
(t '#.(property-symbol "L")))))
|
210
|
+
|
211
|
+
(defun set-default-bidi-classes ()
|
212
|
+
"Loops through all assigned characters in *CHAR-DATABASE* and
|
213
|
+
defaults their Bidi class if it wasn't set already."
|
214
|
+
(loop for char-info across *char-database*
|
215
|
+
when (and char-info (not (bidi-class* char-info)))
|
216
|
+
do (let ((default-bidi-class (default-bidi-class char-info)))
|
217
|
+
(pushnew default-bidi-class *bidi-classes* :test #'eq)
|
218
|
+
(setf (bidi-class* char-info) default-bidi-class))))
|
219
|
+
|
220
|
+
(defun fill-database ()
|
221
|
+
"Initializes all relevant datastructures and parses all Unicode data
|
222
|
+
files in the \"data/\" directory to build up enough information in
|
223
|
+
memory \(specifically the *CHAR-DATABASE* array) to write the missing
|
224
|
+
source code files for CL-UNICODE."
|
225
|
+
(setq *char-database* (make-empty-char-database)
|
226
|
+
*general-categories* nil
|
227
|
+
*scripts* nil
|
228
|
+
*code-blocks* nil
|
229
|
+
*binary-properties* nil
|
230
|
+
*bidi-classes* nil)
|
231
|
+
(initialize-property-symbols)
|
232
|
+
(read-character-data)
|
233
|
+
(read-scripts)
|
234
|
+
(read-code-blocks)
|
235
|
+
(read-binary-properties)
|
236
|
+
(read-derived-age)
|
237
|
+
(read-mirroring-glyphs)
|
238
|
+
(read-jamo)
|
239
|
+
(set-default-bidi-classes))
|
240
|
+
|
241
|
+
(defun build-name-mappings ()
|
242
|
+
"Initializes and fills the hash tables which map code points to
|
243
|
+
\(Unicode 1.0) names and vice versa using the information in
|
244
|
+
*CHAR-DATABASE*."
|
245
|
+
(clrhash *names-to-code-points*)
|
246
|
+
(clrhash *unicode1-names-to-code-points*)
|
247
|
+
(clrhash *code-points-to-names*)
|
248
|
+
(clrhash *code-points-to-unicode1-names*)
|
249
|
+
(loop for char-info across *char-database*
|
250
|
+
for name = (and char-info (name char-info))
|
251
|
+
for unicode1-name = (and char-info (unicode1-name char-info))
|
252
|
+
for code-point = (and char-info (code-point char-info))
|
253
|
+
when name
|
254
|
+
do (setf (gethash code-point *code-points-to-names*) name
|
255
|
+
(gethash (canonicalize-name name) *names-to-code-points*) code-point)
|
256
|
+
when unicode1-name
|
257
|
+
do (setf (gethash code-point *code-points-to-unicode1-names*) unicode1-name
|
258
|
+
(gethash (canonicalize-name unicode1-name) *unicode1-names-to-code-points*) code-point)))
|
259
|
+
|
260
|
+
(defun build-case-mapping ()
|
261
|
+
"Initializes and filles the *CASE-MAPPINGS* hash table from
|
262
|
+
*CHAR-DATABASE*."
|
263
|
+
(clrhash *case-mappings*)
|
264
|
+
(loop for char-info across *char-database*
|
265
|
+
for mappings = (and char-info
|
266
|
+
(list (uppercase-mapping* char-info)
|
267
|
+
(lowercase-mapping* char-info)
|
268
|
+
(titlecase-mapping* char-info)))
|
269
|
+
when (and mappings (some #'identity mappings))
|
270
|
+
do (setf (gethash (code-point char-info) *case-mappings*) mappings)))
|
271
|
+
|
272
|
+
(defun build-data-structures ()
|
273
|
+
"One function to combine the complete process of parsing all Unicode
|
274
|
+
data files and building the corresponding Lisp datastructures in
|
275
|
+
memory."
|
276
|
+
(fill-database)
|
277
|
+
(format t "~&;;; Building hash tables")
|
278
|
+
(force-output)
|
279
|
+
(build-name-mappings)
|
280
|
+
(build-case-mapping))
|