clucumber 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (139) hide show
  1. data/LICENSE +1 -1
  2. data/README.md +4 -9
  3. data/lib/clucumber/clucumber-bootstrap.lisp +32 -0
  4. data/lib/clucumber/vendor/cl-interpol/alias.lisp +55 -0
  5. data/lib/clucumber/vendor/cl-interpol/cl-interpol.asd +56 -0
  6. data/lib/clucumber/vendor/cl-interpol/load.lisp +53 -0
  7. data/lib/clucumber/vendor/cl-interpol/packages.lisp +40 -0
  8. data/lib/clucumber/vendor/cl-interpol/read.lisp +716 -0
  9. data/lib/clucumber/vendor/cl-interpol/specials.lisp +113 -0
  10. data/lib/clucumber/vendor/cl-interpol/test/packages.lisp +34 -0
  11. data/lib/clucumber/vendor/cl-interpol/test/tests.lisp +128 -0
  12. data/lib/clucumber/vendor/cl-interpol/test.lisp +177 -0
  13. data/lib/clucumber/vendor/cl-interpol/test2.lisp +6254 -0
  14. data/lib/clucumber/vendor/cl-interpol/unicode.lisp +13912 -0
  15. data/lib/clucumber/vendor/cl-interpol/util.lisp +122 -0
  16. data/lib/clucumber/vendor/cl-ppcre/api.lisp +1262 -0
  17. data/lib/clucumber/vendor/cl-ppcre/charmap.lisp +152 -0
  18. data/lib/clucumber/vendor/cl-ppcre/charset.lisp +242 -0
  19. data/lib/clucumber/vendor/cl-ppcre/chartest.lisp +98 -0
  20. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-test.asd +34 -0
  21. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/packages.lisp +38 -0
  22. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/resolver.lisp +61 -0
  23. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode.asd +58 -0
  24. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre.asd +79 -0
  25. data/lib/clucumber/vendor/cl-ppcre/closures.lisp +471 -0
  26. data/lib/clucumber/vendor/cl-ppcre/convert.lisp +875 -0
  27. data/lib/clucumber/vendor/cl-ppcre/errors.lisp +84 -0
  28. data/lib/clucumber/vendor/cl-ppcre/lexer.lisp +737 -0
  29. data/lib/clucumber/vendor/cl-ppcre/lispworks-defsystem.lisp +57 -0
  30. data/lib/clucumber/vendor/cl-ppcre/load.lisp +67 -0
  31. data/lib/clucumber/vendor/cl-ppcre/optimize.lisp +578 -0
  32. data/lib/clucumber/vendor/cl-ppcre/packages.lisp +68 -0
  33. data/lib/clucumber/vendor/cl-ppcre/parser.lisp +319 -0
  34. data/lib/clucumber/vendor/cl-ppcre/ppcre-tests.lisp +269 -0
  35. data/lib/clucumber/vendor/cl-ppcre/regex-class-util.lisp +555 -0
  36. data/lib/clucumber/vendor/cl-ppcre/regex-class.lisp +259 -0
  37. data/lib/clucumber/vendor/cl-ppcre/repetition-closures.lisp +833 -0
  38. data/lib/clucumber/vendor/cl-ppcre/scanner.lisp +506 -0
  39. data/lib/clucumber/vendor/cl-ppcre/specials.lisp +172 -0
  40. data/lib/clucumber/vendor/cl-ppcre/test/packages.lisp +37 -0
  41. data/lib/clucumber/vendor/cl-ppcre/test/perl-tests.lisp +150 -0
  42. data/lib/clucumber/vendor/cl-ppcre/test/tests.lisp +159 -0
  43. data/lib/clucumber/vendor/cl-ppcre/test/unicode-tests.lisp +80 -0
  44. data/lib/clucumber/vendor/cl-ppcre/util.lisp +201 -0
  45. data/lib/clucumber/vendor/cl-unicode/alias.lisp +135 -0
  46. data/lib/clucumber/vendor/cl-unicode/api.lisp +412 -0
  47. data/lib/clucumber/vendor/cl-unicode/build/char-info.lisp +133 -0
  48. data/lib/clucumber/vendor/cl-unicode/build/dump.lisp +239 -0
  49. data/lib/clucumber/vendor/cl-unicode/build/read.lisp +280 -0
  50. data/lib/clucumber/vendor/cl-unicode/build/util.lisp +182 -0
  51. data/lib/clucumber/vendor/cl-unicode/cl-unicode.asd +90 -0
  52. data/lib/clucumber/vendor/cl-unicode/conditions.lisp +54 -0
  53. data/lib/clucumber/vendor/cl-unicode/derived.lisp +120 -0
  54. data/lib/clucumber/vendor/cl-unicode/hash-tables.lisp +20 -0
  55. data/lib/clucumber/vendor/cl-unicode/lists.lisp +10 -0
  56. data/lib/clucumber/vendor/cl-unicode/methods.lisp +15 -0
  57. data/lib/clucumber/vendor/cl-unicode/packages.lisp +76 -0
  58. data/lib/clucumber/vendor/cl-unicode/specials.lisp +135 -0
  59. data/lib/clucumber/vendor/cl-unicode/test/packages.lisp +34 -0
  60. data/lib/clucumber/vendor/cl-unicode/test/tests.lisp +150 -0
  61. data/lib/clucumber/vendor/cl-unicode/test-functions.lisp +94 -0
  62. data/lib/clucumber/vendor/cl-unicode/util.lisp +274 -0
  63. data/lib/clucumber/vendor/lift/compare/fiveam-tests.lisp +14 -0
  64. data/lib/clucumber/vendor/lift/compare/lift-tests.lisp +13 -0
  65. data/lib/clucumber/vendor/lift/compare/rt-tests.lisp +16 -0
  66. data/lib/clucumber/vendor/lift/compare/xlunit-tests.lisp +15 -0
  67. data/lib/clucumber/vendor/lift/dev/changes.lisp +61 -0
  68. data/lib/clucumber/vendor/lift/dev/config.lisp +354 -0
  69. data/lib/clucumber/vendor/lift/dev/copy-file.lisp +117 -0
  70. data/lib/clucumber/vendor/lift/dev/introspection.lisp +232 -0
  71. data/lib/clucumber/vendor/lift/dev/lift-interface.lisp +56 -0
  72. data/lib/clucumber/vendor/lift/dev/lift-notes.lisp +202 -0
  73. data/lib/clucumber/vendor/lift/dev/lift-randomized.lisp +45 -0
  74. data/lib/clucumber/vendor/lift/dev/lift.lisp +2383 -0
  75. data/lib/clucumber/vendor/lift/dev/macros.lisp +229 -0
  76. data/lib/clucumber/vendor/lift/dev/measuring.lisp +156 -0
  77. data/lib/clucumber/vendor/lift/dev/packages.lisp +161 -0
  78. data/lib/clucumber/vendor/lift/dev/port.lisp +151 -0
  79. data/lib/clucumber/vendor/lift/dev/prototypes.lisp +282 -0
  80. data/lib/clucumber/vendor/lift/dev/random-testing.lisp +124 -0
  81. data/lib/clucumber/vendor/lift/dev/report-locations.lisp +13 -0
  82. data/lib/clucumber/vendor/lift/dev/reports.lisp +916 -0
  83. data/lib/clucumber/vendor/lift/dev/utilities.lisp +242 -0
  84. data/lib/clucumber/vendor/lift/docs/package.lisp +6 -0
  85. data/lib/clucumber/vendor/lift/docs/setup.lisp +17 -0
  86. data/lib/clucumber/vendor/lift/examples/basic-examples.lisp +289 -0
  87. data/lib/clucumber/vendor/lift/examples/random-testing.lisp +32 -0
  88. data/lib/clucumber/vendor/lift/lift-documentation.asd +28 -0
  89. data/lib/clucumber/vendor/lift/lift-test.asd +35 -0
  90. data/lib/clucumber/vendor/lift/lift.asd +77 -0
  91. data/lib/clucumber/vendor/lift/test/equality-tests.lisp +40 -0
  92. data/lib/clucumber/vendor/lift/test/finding-tests.lisp +32 -0
  93. data/lib/clucumber/vendor/lift/test/lift-test.lisp +783 -0
  94. data/lib/clucumber/vendor/lift/test/order-of-operations.lisp +54 -0
  95. data/lib/clucumber/vendor/lift/test/packages.lisp +17 -0
  96. data/lib/clucumber/vendor/lift/test/test-config-files.lisp +20 -0
  97. data/lib/clucumber/vendor/lift/test/test-dynamic-variables.lisp +57 -0
  98. data/lib/clucumber/vendor/lift/test/test-maximum-problems.lisp +74 -0
  99. data/lib/clucumber/vendor/lift/test/test-prototypes.lisp +278 -0
  100. data/lib/clucumber/vendor/lift/test/test-timeout.lisp +37 -0
  101. data/lib/clucumber/vendor/lift/test/tests-in-progress.lisp +62 -0
  102. data/lib/clucumber/vendor/lift/test/testsuite-expects.lisp +60 -0
  103. data/lib/clucumber/vendor/lift/timeout/package.lisp +13 -0
  104. data/lib/clucumber/vendor/lift/timeout/with-timeout.lisp +123 -0
  105. data/lib/clucumber/vendor/lift/website/stuff/Temp.lisp +34 -0
  106. data/lib/clucumber/vendor/st-json/st-json.asd +3 -0
  107. data/lib/clucumber/vendor/st-json/st-json.lisp +310 -0
  108. data/lib/clucumber/vendor/trivial-backtrace/dev/backtrace.lisp +127 -0
  109. data/lib/clucumber/vendor/trivial-backtrace/dev/fallback.lisp +10 -0
  110. data/lib/clucumber/vendor/trivial-backtrace/dev/map-backtrace.lisp +103 -0
  111. data/lib/clucumber/vendor/trivial-backtrace/dev/mucking.lisp +75 -0
  112. data/lib/clucumber/vendor/trivial-backtrace/dev/packages.lisp +13 -0
  113. data/lib/clucumber/vendor/trivial-backtrace/dev/utilities.lisp +104 -0
  114. data/lib/clucumber/vendor/trivial-backtrace/test/packages.lisp +5 -0
  115. data/lib/clucumber/vendor/trivial-backtrace/test/test-setup.lisp +4 -0
  116. data/lib/clucumber/vendor/trivial-backtrace/test/tests.lisp +16 -0
  117. data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace-test.asd +22 -0
  118. data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace.asd +35 -0
  119. data/lib/clucumber/vendor/usocket/backend/allegro.lisp +199 -0
  120. data/lib/clucumber/vendor/usocket/backend/armedbear.lisp +491 -0
  121. data/lib/clucumber/vendor/usocket/backend/clisp.lisp +260 -0
  122. data/lib/clucumber/vendor/usocket/backend/cmucl.lisp +266 -0
  123. data/lib/clucumber/vendor/usocket/backend/lispworks.lisp +741 -0
  124. data/lib/clucumber/vendor/usocket/backend/mcl.lisp +369 -0
  125. data/lib/clucumber/vendor/usocket/backend/openmcl.lisp +206 -0
  126. data/lib/clucumber/vendor/usocket/backend/sbcl.lisp +424 -0
  127. data/lib/clucumber/vendor/usocket/backend/scl.lisp +261 -0
  128. data/lib/clucumber/vendor/usocket/condition.lisp +227 -0
  129. data/lib/clucumber/vendor/usocket/package.lisp +82 -0
  130. data/lib/clucumber/vendor/usocket/server.lisp +45 -0
  131. data/lib/clucumber/vendor/usocket/test/package.lisp +13 -0
  132. data/lib/clucumber/vendor/usocket/test/test-usocket.lisp +166 -0
  133. data/lib/clucumber/vendor/usocket/usocket-test.asd +26 -0
  134. data/lib/clucumber/vendor/usocket/usocket.asd +37 -0
  135. data/lib/clucumber/vendor/usocket/usocket.lisp +542 -0
  136. data/lib/clucumber/vendor/usocket/vendor/kqueue.lisp +1 -0
  137. data/lib/clucumber/vendor/usocket/vendor/split-sequence.lisp +245 -0
  138. data/lib/clucumber.rb +29 -7
  139. metadata +151 -5
@@ -0,0 +1,239 @@
1
+ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-
2
+ ;;; $Header: /usr/local/cvsrep/cl-unicode/build/dump.lisp,v 1.37 2008/07/21 23:02:25 edi Exp $
3
+
4
+ ;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
5
+
6
+ ;;; Redistribution and use in source and binary forms, with or without
7
+ ;;; modification, are permitted provided that the following conditions
8
+ ;;; are met:
9
+
10
+ ;;; * Redistributions of source code must retain the above copyright
11
+ ;;; notice, this list of conditions and the following disclaimer.
12
+
13
+ ;;; * Redistributions in binary form must reproduce the above
14
+ ;;; copyright notice, this list of conditions and the following
15
+ ;;; disclaimer in the documentation and/or other materials
16
+ ;;; provided with the distribution.
17
+
18
+ ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19
+ ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
+ ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
+ ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22
+ ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24
+ ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
+ ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26
+ ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+
30
+ (in-package :cl-unicode)
31
+
32
+ (defun build-range-list (reader test)
33
+ "Loops through all non-NIL elements of *CHAR-DATABASE*, looks at
34
+ each element using the reader READER and builds an alist of the form
35
+ \((X1 . A1) (X2 . A2)) where the Xi are code points and the Ai are the
36
+ corresponding attributes as returned by READER. The interesting part
37
+ is that the Xi can also be code point ranges like \(Y1 . Y2) meaning
38
+ that all code points from Y1 to \(including) Y2 have the same
39
+ attribute Ai and the function tries to make the alist as short as
40
+ possible, i.e. to partition it into as few ranges as possible.
41
+ Whether two attributes are the same is determined by the test function
42
+ TEST. The resulting list is sorted."
43
+ (let (range-list
44
+ (last-attribute (funcall reader (aref *char-database* 0)))
45
+ (last-code-point 0)
46
+ (code-point 0))
47
+ (flet ((add ()
48
+ "Adds the range from LAST-CODE-POINT to \(excluding)
49
+ CODE-POINT with the attribute LAST-ATTRIBUTE to the result."
50
+ (push (cons (cons last-code-point (1- code-point))
51
+ last-attribute)
52
+ range-list)))
53
+ (loop
54
+ (incf code-point)
55
+ (when (= code-point #.(1- +code-point-limit+))
56
+ (add)
57
+ (return))
58
+ (let* ((char-info (aref *char-database* code-point))
59
+ (attribute (and char-info (funcall reader char-info))))
60
+ (unless (funcall test attribute last-attribute)
61
+ (add)
62
+ (setq last-attribute attribute
63
+ last-code-point code-point)))))
64
+ (nreverse range-list)))
65
+
66
+ (defun split-range-list (range-list)
67
+ "Recursively splits a range list as returned by BUILD-RANGE-LIST in
68
+ the middle and thus converts it into a binary search tree which can be
69
+ used by the TREE-LOOKUP function."
70
+ (let ((length (length range-list)))
71
+ (cond ((zerop length) nil)
72
+ (t (let ((middle (round (1- length) 2)))
73
+ (list (nth middle range-list)
74
+ (split-range-list (subseq range-list 0 middle))
75
+ (split-range-list (subseq range-list (1+ middle)))))))))
76
+
77
+ (defun build-tree (reader &optional (test #'eq))
78
+ "Uses BUILD-RANGE-LIST and SPLIT-RANGE-LIST to build a binary search
79
+ tree for READER which is one of the readers of the CHAR-INFO class.
80
+ Attributes are compared with TEST."
81
+ (split-range-list (build-range-list reader test)))
82
+
83
+ (defun dump-method (name reader stream &optional (test #'eq test-provided-p))
84
+ "Writes a method definition for a unary method called NAME
85
+ specialized for integer \(code points) to the stream STREAM which
86
+ returns a value equivalent to
87
+
88
+ \(APPLY READER \(AREF *CHAR-DATABASE* <code-point>))
89
+
90
+ but uses compact binary search trees instead of the *CHAR-DATABASE*
91
+ array. TEST is used by BUILD-TREE to decide whether two adjacent
92
+ characters have the same attribute. If TEST isn't provided, it is
93
+ assumed that the attribute is a property symbol and the method will
94
+ return two values - the symbol and the canonical name of the symbol."
95
+ (let ((definition (if test-provided-p
96
+ `(defmethod ,name ((code-point integer))
97
+ (tree-lookup code-point ',(build-tree reader test)))
98
+ `(defmethod ,name ((code-point integer))
99
+ (let ((symbol (tree-lookup code-point ',(build-tree reader))))
100
+ (values (property-name symbol) symbol))))))
101
+ (print definition stream)))
102
+
103
+ (defmacro with-output-to-source-file ((stream relative-path &key no-header-p) &body body)
104
+ "Executes BODY with STREAM bound to an output file stream which
105
+ writes to the file denoted by RELATIVE-PATH - a path relative to the
106
+ location of this source file. Writes a Lisp header to the files
107
+ unless NO-HEADER-P is true."
108
+ `(let ((pathname (merge-pathnames ,relative-path *this-file*)))
109
+ (format t "~&;;; Writing source file ~A" (file-namestring pathname))
110
+ (force-output)
111
+ (with-open-file (,stream pathname
112
+ :direction :output
113
+ :if-exists :supersede)
114
+ (with-standard-io-syntax ()
115
+ (let ((*package* (find-package :cl-unicode)))
116
+ ,@(unless no-header-p
117
+ '((format out ";;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-~%")))
118
+ (format out ";;; This file was auto-generated by the BUILD-CL-UNICODE system~2%")
119
+ ,@(unless no-header-p
120
+ '((print '(in-package :cl-unicode) out)))
121
+ ,@body)))))
122
+
123
+ (defun dump-methods ()
124
+ "Dumps several methods to the CL-UNICODE source file methods.lisp
125
+ using DUMP-METHOD."
126
+ (with-output-to-source-file (out "../methods.lisp")
127
+ (dump-method 'script 'script* out)
128
+ (dump-method 'code-block 'code-block* out)
129
+ (dump-method 'age 'age* out #'equal)
130
+ (dump-method 'general-category 'general-category* out)
131
+ (dump-method 'bidi-class 'bidi-class* out)
132
+ (dump-method 'numeric-type 'numeric-type* out)
133
+ (dump-method 'numeric-value 'numeric-value* out #'eql)
134
+ (dump-method 'combining-class 'combining-class* out #'eql)
135
+ (dump-method 'bidi-mirroring-glyph% 'bidi-mirroring-glyph* out #'eql)
136
+ (dump-method 'binary-props 'binary-props* out #'equal)))
137
+
138
+ (defun dump-hash-table (hash-table-name stream)
139
+ "Writes code to the STREAM which reinitializes the hash table
140
+ contained in the global special variable named HASH-TABLE-NAME to its
141
+ current state. It is assumed that all keys and values of the hash
142
+ table can be printed readably."
143
+ (print `(clrhash ,hash-table-name) stream)
144
+ (let ((key-value-alist
145
+ (loop for key being the hash-keys of (symbol-value hash-table-name)
146
+ using (hash-value value)
147
+ when value
148
+ collect (cons key value))))
149
+ (print `(loop for (key . value) in ',key-value-alist
150
+ do (setf (gethash key ,hash-table-name) value))
151
+ stream)))
152
+
153
+ (defun dump-hash-tables ()
154
+ "Dumps several hash tables to the CL-UNICODE source file
155
+ hash-tables.lisp using DUMP-HASH-TABLE."
156
+ (with-output-to-source-file (out "../hash-tables.lisp")
157
+ (dump-hash-table '*canonical-names* out)
158
+ (dump-hash-table '*names-to-code-points* out)
159
+ (dump-hash-table '*code-points-to-names* out)
160
+ (dump-hash-table '*unicode1-names-to-code-points* out)
161
+ (dump-hash-table '*code-points-to-unicode1-names* out)
162
+ (dump-hash-table '*case-mappings* out)
163
+ (dump-hash-table '*jamo-short-names* out)
164
+ ;; finally add code which adds the computed Hangul syllable names
165
+ ;; at load time
166
+ (print '(add-hangul-names) out)))
167
+
168
+ (defun dump-list (list-name stream)
169
+ "Writes code to the STREAM which reinitializes the list contained in
170
+ the global special variable named LIST-NAME to its current state. It
171
+ is assumed that all elements of the list can be printed readably."
172
+ (print `(setq ,list-name ',(symbol-value list-name)) stream))
173
+
174
+ (defun dump-lists ()
175
+ "Dumps several list to the CL-UNICODE source file lists.lisp using
176
+ DUMP-LIST."
177
+ (with-output-to-source-file (out "../lists.lisp")
178
+ (dump-list '*general-categories* out)
179
+ (dump-list '*scripts* out)
180
+ (dump-list '*code-blocks* out)
181
+ (dump-list '*binary-properties* out)
182
+ (dump-list '*bidi-classes* out)))
183
+
184
+ (defun dump-derived-tests ()
185
+ "Parses the Unicode data file \"DerivedCoreProperties.txt\" \(which
186
+ is not used in read.lisp) and uses it to create a file
187
+ \"derived-properties\" which will be used by CL-UNICODE-TEST."
188
+ (with-output-to-source-file (out (make-pathname :name "derived-properties"
189
+ :type :unspecific
190
+ :directory '(:relative :up "test"))
191
+ :no-header-p t)
192
+ (let (last-test)
193
+ (labels ((really-add-test (test)
194
+ "Writes the test designator from ADD-TEST in a
195
+ \"delayed\" fashion to make sure that the previous test \(which hasn't
196
+ been written to disk yet) doesn't contradict the current test. This
197
+ is necessary because the file we're parsing contains several adjacent
198
+ ranges."
199
+ (when last-test
200
+ (unless (= (first last-test) (first test))
201
+ (print last-test out)))
202
+ (setq last-test test))
203
+ (add-test (code-point property &optional (successp t))
204
+ "Writes a test designator \(used by the function
205
+ CL-UNICODE-TEST::PROPERTY-TESTS) for a check whether CODE-POINT has
206
+ the property PROPERTY to the stream OUT, but only does this if
207
+ CODE-POINT is below +CODE-POINT-LIMIT+. Tests for the inverse if
208
+ SUCCESSP is NIL. The test designator isn't actually written to the
209
+ stream, though, but handed over to REALLY-ADD-TEST."
210
+ (when (< code-point +code-point-limit+)
211
+ (really-add-test `(,code-point ,property ,successp)))))
212
+ (with-unicode-file ((code-point-range property) "DerivedCoreProperties.txt")
213
+ (cond ((atom code-point-range)
214
+ (add-test code-point-range property)
215
+ (add-test (1+ code-point-range) property nil))
216
+ (t
217
+ (add-test (car code-point-range) property)
218
+ (add-test (cdr code-point-range) property)
219
+ (add-test (1+ (cdr code-point-range)) property nil))))
220
+ (print last-test out)))))
221
+
222
+ (defun dump-data-structures ()
223
+ "Dumps all the information contained in *CHAR-DATABASE* and the
224
+ related hash tables and lists to the corresponding Lisp and test
225
+ source files."
226
+ (dump-methods)
227
+ (dump-hash-tables)
228
+ (dump-lists)
229
+ (dump-derived-tests)
230
+ (setq *char-database* nil))
231
+
232
+ (defun create-source-files ()
233
+ "Combines BUILD-DATA-STRUCTURES and DUMP-DATA-STRUCTURES to create
234
+ the \"missing\" CL-UNICODE source files."
235
+ (build-data-structures)
236
+ (dump-data-structures)
237
+ (setq *char-database* nil))
238
+
239
+ (create-source-files)
@@ -0,0 +1,280 @@
1
+ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-
2
+ ;;; $Header: /usr/local/cvsrep/cl-unicode/build/read.lisp,v 1.32 2008/07/22 02:42:15 edi Exp $
3
+
4
+ ;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
5
+
6
+ ;;; Redistribution and use in source and binary forms, with or without
7
+ ;;; modification, are permitted provided that the following conditions
8
+ ;;; are met:
9
+
10
+ ;;; * Redistributions of source code must retain the above copyright
11
+ ;;; notice, this list of conditions and the following disclaimer.
12
+
13
+ ;;; * Redistributions in binary form must reproduce the above
14
+ ;;; copyright notice, this list of conditions and the following
15
+ ;;; disclaimer in the documentation and/or other materials
16
+ ;;; provided with the distribution.
17
+
18
+ ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19
+ ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
+ ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
+ ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22
+ ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24
+ ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
+ ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26
+ ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+
30
+ (in-package :cl-unicode)
31
+
32
+ (defmacro with-unicode-file (((&rest bindings) file-name &optional two-line-ranges) &body body)
33
+ "Utility macro to parse a file which is formatted as described in
34
+ <http://unicode.org/Public/UNIDATA/UCD.html#UCD_File_Format>. The
35
+ file named FILE-NAME is searched for in the directory \"data/\"
36
+ relative to this source file. The code then iterates through the file
37
+ and executes BODY for each non-comment line binding the variables in
38
+ BINDINGS to the parsed fields of the line. For the details of
39
+ BINDINGS see the EXTRACT-FOO functions in util.lisp or the usage of
40
+ this macro below. If TWO-LINE-RANGES is true, then the macro expects
41
+ a file like \"UnicodeData.txt\" where ranges aren't denoted as usual
42
+ but rather using <..., First> and <..., Last>."
43
+ (let ((variables (extract-variables bindings))
44
+ (types (extract-types bindings)))
45
+ `(let ((pathname (merge-pathnames ,file-name (merge-pathnames "data/" *this-file*))))
46
+ (format t "~&;;; Parsing Unicode file ~A" (file-namestring pathname))
47
+ (force-output)
48
+ (with-open-file (binary-in pathname :element-type 'flex:octet)
49
+ ;; Unicode data files must be read as UTF-8
50
+ (let ((in (flex:make-flexi-stream binary-in :external-format '(:utf-8 :eol-style :lf))))
51
+ (loop
52
+ (flet ((get-line-contents ()
53
+ (let ((line (or (read-line in nil) (return))))
54
+ (and (not (ppcre:scan "^\\s*(?:#|$)" line))
55
+ (ppcre:split "\\s*#.*$|\\s*;\\s*" line :limit most-positive-fixnum)))))
56
+ (let ((contents (get-line-contents)))
57
+ (when contents
58
+ (destructuring-bind ,variables
59
+ (parse-one-line contents ',types (list ,@(extract-defaults bindings)))
60
+ ,@(when two-line-ranges
61
+ `((when (ppcre:scan "^<.*, First>$" ,(second variables))
62
+ (let ((range-end (first (parse-one-line (list (first (get-line-contents)))))))
63
+ (setq ,(first variables) (cons ,(first variables) range-end))))))
64
+ ,@body))))))))))
65
+
66
+ (defmacro with-code-point-range ((var range) &body body)
67
+ "Utility macro which executes BODY with VAR bound to each code point
68
+ in RANGE in turn. VAR can either be an integer \(for one code point)
69
+ or a cons of two integers \(for an inclusive range)."
70
+ (with-rebinding (range)
71
+ `(flet ((thunk (,var) ,@body))
72
+ (cond ((atom ,range) (thunk ,range))
73
+ (t (loop for point from (car ,range) to (cdr ,range)
74
+ do (thunk point)))))))
75
+
76
+ (defun read-character-data ()
77
+ "Parses the file \"UnicodeData.txt\" and generates one CHAR-INFO
78
+ entry per code point which is stored in *CHAR-DATABASE*."
79
+ ;; by definition, we'll never see this property in the file, so we
80
+ ;; have to add it to *GENERAL-CATEGORIES* explicitly
81
+ (setq *general-categories* (list '#.(property-symbol "Cn")))
82
+ (with-unicode-file ((code-point-range
83
+ name
84
+ (general-category symbol)
85
+ (combining-class integer)
86
+ (bidi-class symbol)
87
+ ;; decomposition mapping, ignored for now
88
+ _
89
+ (decimal-digit integer nil)
90
+ (digit integer nil)
91
+ (numeric rational nil)
92
+ (bidi-mirrored boolean)
93
+ (unicode1-name string nil)
94
+ ;; ISO comment, ignored
95
+ _
96
+ (uppercase-mapping hex nil)
97
+ (lowercase-mapping hex nil)
98
+ (titlecase-mapping hex nil))
99
+ "UnicodeData.txt" t)
100
+ (pushnew general-category *general-categories* :test #'eq)
101
+ (pushnew bidi-class *bidi-classes* :test #'eq)
102
+ ;; if the name starts with #\<, it's not really a name but denotes
103
+ ;; a range - some of these names (CJK unified ideographs and
104
+ ;; Hangul syllables) will be computed later, the others are NIL
105
+ (when (char= (char name 0) #\<)
106
+ (setq name nil))
107
+ (with-code-point-range (code-point code-point-range)
108
+ (setf (aref *char-database* code-point)
109
+ (make-instance 'char-info
110
+ :code-point code-point
111
+ :name name
112
+ :general-category general-category
113
+ :combining-class combining-class
114
+ :bidi-class bidi-class
115
+ :numeric-type (cond (decimal-digit '#.(property-symbol "Decimal"))
116
+ (digit '#.(property-symbol "Digit"))
117
+ (numeric '#.(property-symbol "Numeric")))
118
+ :numeric-value numeric
119
+ :binary-props (and bidi-mirrored
120
+ (list '#.(property-symbol "BidiMirrored")))
121
+ :unicode1-name unicode1-name
122
+ :uppercase-mapping uppercase-mapping
123
+ :lowercase-mapping lowercase-mapping
124
+ :titlecase-mapping titlecase-mapping)))))
125
+
126
+ (defun read-scripts ()
127
+ "Parses the file \"Scripts.txt\" and adds the information about the
128
+ script to the corresponding entries in *CHAR-DATABASE*."
129
+ (with-unicode-file ((code-point-range (script symbol)) "Scripts.txt")
130
+ (pushnew script *scripts* :test #'eq)
131
+ (with-code-point-range (code-point code-point-range)
132
+ (let ((char-info (aref *char-database* code-point)))
133
+ (when char-info
134
+ (setf (script* char-info) script))))))
135
+
136
+ (defun read-code-blocks ()
137
+ "Parses the file \"Blocks.txt\" and adds the information about the
138
+ code block to the corresponding entries in *CHAR-DATABASE*."
139
+ (with-unicode-file ((code-point-range (code-block symbol)) "Blocks.txt")
140
+ (pushnew code-block *code-blocks* :test #'eq)
141
+ (with-code-point-range (code-point code-point-range)
142
+ (let ((char-info (aref *char-database* code-point)))
143
+ (when char-info
144
+ (setf (code-block* char-info) code-block))))))
145
+
146
+ (defun read-binary-properties ()
147
+ "Parses the file \"PropList.txt\" and adds information about binary
148
+ properties to the corresponding entries in *CHAR-DATABASE*."
149
+ ;; this property was derived from UnicodeData.txt already
150
+ (setq *binary-properties* (list '#.(property-symbol "BidiMirrored")))
151
+ (with-unicode-file ((code-point-range (property symbol)) "PropList.txt")
152
+ ;; we don't need this information as we derive it from a code
153
+ ;; point not being mentioned in UnicodeData.txt - see also the
154
+ ;; initform for GENERAL-CATEGORY in the definition of CHAR-INFO
155
+ (unless (eq property '#.(property-symbol "NoncharacterCodePoint"))
156
+ (pushnew property *binary-properties* :test #'eq)
157
+ (with-code-point-range (code-point code-point-range)
158
+ (let ((char-info (aref *char-database* code-point)))
159
+ (unless char-info
160
+ ;; this file actually contains some information for
161
+ ;; unassigned (but reserved) code points, like e.g. #xfff0
162
+ (setf char-info (make-instance 'char-info :code-point code-point)
163
+ (aref *char-database* code-point) char-info))
164
+ (push property (binary-props* char-info)))))))
165
+
166
+ (defun read-derived-age ()
167
+ "Parses the file \"DerivedAge.txt\" and adds information about the
168
+ \"age\" to the corresponding entries in *CHAR-DATABASE*."
169
+ (with-unicode-file ((code-point-range (age age)) "DerivedAge.txt")
170
+ (with-code-point-range (code-point code-point-range)
171
+ (let ((char-info (aref *char-database* code-point)))
172
+ (when char-info
173
+ (setf (age* char-info) age))))))
174
+
175
+ (defun read-mirroring-glyphs ()
176
+ "Parses the file \"BidiMirroring.txt\" and adds information about
177
+ mirroring glyphs to the corresponding entries in *CHAR-DATABASE*."
178
+ (with-unicode-file ((code-point-range (mirroring-glyph hex)) "BidiMirroring.txt")
179
+ (with-code-point-range (code-point code-point-range)
180
+ (let ((char-info (aref *char-database* code-point)))
181
+ (when char-info
182
+ (setf (bidi-mirroring-glyph* char-info) mirroring-glyph))))))
183
+
184
+ (defun read-jamo ()
185
+ "Parses the file \"Jamo.txt\" and stores information about Jamo
186
+ short names in the *JAMO-SHORT-NAMES* hash table. This information is
187
+ later used to compute Hangul syllable names."
188
+ (clrhash *jamo-short-names*)
189
+ (with-unicode-file ((code-point-range (short-name string "")) "Jamo.txt")
190
+ (with-code-point-range (code-point code-point-range)
191
+ (setf (gethash code-point *jamo-short-names*) short-name))))
192
+
193
+ (defun default-bidi-class (char-info)
194
+ "Returns the default Bidi class for the character described by the
195
+ CHAR-INFO object CHAR-INFO. The default is computed as explained in
196
+ <http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt>."
197
+ (let ((code-point (code-point char-info)))
198
+ (cond ((and (or (<= #x0600 code-point #x07BF)
199
+ (<= #xFB50 code-point #xFDFF)
200
+ (<= #xFE70 code-point #xFEFF))
201
+ (not (find '#.(property-symbol "NoncharacterCodePoint")
202
+ (binary-props* code-point))))
203
+ '#.(property-symbol "AL"))
204
+ ((or (<= #x0590 code-point #x05FF)
205
+ (<= #x07C0 code-point #x08ff)
206
+ (<= #xFB1D code-point #xFB4F)
207
+ (<= #x10800 code-point #x10FFF))
208
+ '#.(property-symbol "R"))
209
+ (t '#.(property-symbol "L")))))
210
+
211
+ (defun set-default-bidi-classes ()
212
+ "Loops through all assigned characters in *CHAR-DATABASE* and
213
+ defaults their Bidi class if it wasn't set already."
214
+ (loop for char-info across *char-database*
215
+ when (and char-info (not (bidi-class* char-info)))
216
+ do (let ((default-bidi-class (default-bidi-class char-info)))
217
+ (pushnew default-bidi-class *bidi-classes* :test #'eq)
218
+ (setf (bidi-class* char-info) default-bidi-class))))
219
+
220
+ (defun fill-database ()
221
+ "Initializes all relevant datastructures and parses all Unicode data
222
+ files in the \"data/\" directory to build up enough information in
223
+ memory \(specifically the *CHAR-DATABASE* array) to write the missing
224
+ source code files for CL-UNICODE."
225
+ (setq *char-database* (make-empty-char-database)
226
+ *general-categories* nil
227
+ *scripts* nil
228
+ *code-blocks* nil
229
+ *binary-properties* nil
230
+ *bidi-classes* nil)
231
+ (initialize-property-symbols)
232
+ (read-character-data)
233
+ (read-scripts)
234
+ (read-code-blocks)
235
+ (read-binary-properties)
236
+ (read-derived-age)
237
+ (read-mirroring-glyphs)
238
+ (read-jamo)
239
+ (set-default-bidi-classes))
240
+
241
+ (defun build-name-mappings ()
242
+ "Initializes and fills the hash tables which map code points to
243
+ \(Unicode 1.0) names and vice versa using the information in
244
+ *CHAR-DATABASE*."
245
+ (clrhash *names-to-code-points*)
246
+ (clrhash *unicode1-names-to-code-points*)
247
+ (clrhash *code-points-to-names*)
248
+ (clrhash *code-points-to-unicode1-names*)
249
+ (loop for char-info across *char-database*
250
+ for name = (and char-info (name char-info))
251
+ for unicode1-name = (and char-info (unicode1-name char-info))
252
+ for code-point = (and char-info (code-point char-info))
253
+ when name
254
+ do (setf (gethash code-point *code-points-to-names*) name
255
+ (gethash (canonicalize-name name) *names-to-code-points*) code-point)
256
+ when unicode1-name
257
+ do (setf (gethash code-point *code-points-to-unicode1-names*) unicode1-name
258
+ (gethash (canonicalize-name unicode1-name) *unicode1-names-to-code-points*) code-point)))
259
+
260
+ (defun build-case-mapping ()
261
+ "Initializes and filles the *CASE-MAPPINGS* hash table from
262
+ *CHAR-DATABASE*."
263
+ (clrhash *case-mappings*)
264
+ (loop for char-info across *char-database*
265
+ for mappings = (and char-info
266
+ (list (uppercase-mapping* char-info)
267
+ (lowercase-mapping* char-info)
268
+ (titlecase-mapping* char-info)))
269
+ when (and mappings (some #'identity mappings))
270
+ do (setf (gethash (code-point char-info) *case-mappings*) mappings)))
271
+
272
+ (defun build-data-structures ()
273
+ "One function to combine the complete process of parsing all Unicode
274
+ data files and building the corresponding Lisp datastructures in
275
+ memory."
276
+ (fill-database)
277
+ (format t "~&;;; Building hash tables")
278
+ (force-output)
279
+ (build-name-mappings)
280
+ (build-case-mapping))