clucumber 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (139) hide show
  1. data/LICENSE +1 -1
  2. data/README.md +4 -9
  3. data/lib/clucumber/clucumber-bootstrap.lisp +32 -0
  4. data/lib/clucumber/vendor/cl-interpol/alias.lisp +55 -0
  5. data/lib/clucumber/vendor/cl-interpol/cl-interpol.asd +56 -0
  6. data/lib/clucumber/vendor/cl-interpol/load.lisp +53 -0
  7. data/lib/clucumber/vendor/cl-interpol/packages.lisp +40 -0
  8. data/lib/clucumber/vendor/cl-interpol/read.lisp +716 -0
  9. data/lib/clucumber/vendor/cl-interpol/specials.lisp +113 -0
  10. data/lib/clucumber/vendor/cl-interpol/test/packages.lisp +34 -0
  11. data/lib/clucumber/vendor/cl-interpol/test/tests.lisp +128 -0
  12. data/lib/clucumber/vendor/cl-interpol/test.lisp +177 -0
  13. data/lib/clucumber/vendor/cl-interpol/test2.lisp +6254 -0
  14. data/lib/clucumber/vendor/cl-interpol/unicode.lisp +13912 -0
  15. data/lib/clucumber/vendor/cl-interpol/util.lisp +122 -0
  16. data/lib/clucumber/vendor/cl-ppcre/api.lisp +1262 -0
  17. data/lib/clucumber/vendor/cl-ppcre/charmap.lisp +152 -0
  18. data/lib/clucumber/vendor/cl-ppcre/charset.lisp +242 -0
  19. data/lib/clucumber/vendor/cl-ppcre/chartest.lisp +98 -0
  20. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-test.asd +34 -0
  21. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/packages.lisp +38 -0
  22. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode/resolver.lisp +61 -0
  23. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre-unicode.asd +58 -0
  24. data/lib/clucumber/vendor/cl-ppcre/cl-ppcre.asd +79 -0
  25. data/lib/clucumber/vendor/cl-ppcre/closures.lisp +471 -0
  26. data/lib/clucumber/vendor/cl-ppcre/convert.lisp +875 -0
  27. data/lib/clucumber/vendor/cl-ppcre/errors.lisp +84 -0
  28. data/lib/clucumber/vendor/cl-ppcre/lexer.lisp +737 -0
  29. data/lib/clucumber/vendor/cl-ppcre/lispworks-defsystem.lisp +57 -0
  30. data/lib/clucumber/vendor/cl-ppcre/load.lisp +67 -0
  31. data/lib/clucumber/vendor/cl-ppcre/optimize.lisp +578 -0
  32. data/lib/clucumber/vendor/cl-ppcre/packages.lisp +68 -0
  33. data/lib/clucumber/vendor/cl-ppcre/parser.lisp +319 -0
  34. data/lib/clucumber/vendor/cl-ppcre/ppcre-tests.lisp +269 -0
  35. data/lib/clucumber/vendor/cl-ppcre/regex-class-util.lisp +555 -0
  36. data/lib/clucumber/vendor/cl-ppcre/regex-class.lisp +259 -0
  37. data/lib/clucumber/vendor/cl-ppcre/repetition-closures.lisp +833 -0
  38. data/lib/clucumber/vendor/cl-ppcre/scanner.lisp +506 -0
  39. data/lib/clucumber/vendor/cl-ppcre/specials.lisp +172 -0
  40. data/lib/clucumber/vendor/cl-ppcre/test/packages.lisp +37 -0
  41. data/lib/clucumber/vendor/cl-ppcre/test/perl-tests.lisp +150 -0
  42. data/lib/clucumber/vendor/cl-ppcre/test/tests.lisp +159 -0
  43. data/lib/clucumber/vendor/cl-ppcre/test/unicode-tests.lisp +80 -0
  44. data/lib/clucumber/vendor/cl-ppcre/util.lisp +201 -0
  45. data/lib/clucumber/vendor/cl-unicode/alias.lisp +135 -0
  46. data/lib/clucumber/vendor/cl-unicode/api.lisp +412 -0
  47. data/lib/clucumber/vendor/cl-unicode/build/char-info.lisp +133 -0
  48. data/lib/clucumber/vendor/cl-unicode/build/dump.lisp +239 -0
  49. data/lib/clucumber/vendor/cl-unicode/build/read.lisp +280 -0
  50. data/lib/clucumber/vendor/cl-unicode/build/util.lisp +182 -0
  51. data/lib/clucumber/vendor/cl-unicode/cl-unicode.asd +90 -0
  52. data/lib/clucumber/vendor/cl-unicode/conditions.lisp +54 -0
  53. data/lib/clucumber/vendor/cl-unicode/derived.lisp +120 -0
  54. data/lib/clucumber/vendor/cl-unicode/hash-tables.lisp +20 -0
  55. data/lib/clucumber/vendor/cl-unicode/lists.lisp +10 -0
  56. data/lib/clucumber/vendor/cl-unicode/methods.lisp +15 -0
  57. data/lib/clucumber/vendor/cl-unicode/packages.lisp +76 -0
  58. data/lib/clucumber/vendor/cl-unicode/specials.lisp +135 -0
  59. data/lib/clucumber/vendor/cl-unicode/test/packages.lisp +34 -0
  60. data/lib/clucumber/vendor/cl-unicode/test/tests.lisp +150 -0
  61. data/lib/clucumber/vendor/cl-unicode/test-functions.lisp +94 -0
  62. data/lib/clucumber/vendor/cl-unicode/util.lisp +274 -0
  63. data/lib/clucumber/vendor/lift/compare/fiveam-tests.lisp +14 -0
  64. data/lib/clucumber/vendor/lift/compare/lift-tests.lisp +13 -0
  65. data/lib/clucumber/vendor/lift/compare/rt-tests.lisp +16 -0
  66. data/lib/clucumber/vendor/lift/compare/xlunit-tests.lisp +15 -0
  67. data/lib/clucumber/vendor/lift/dev/changes.lisp +61 -0
  68. data/lib/clucumber/vendor/lift/dev/config.lisp +354 -0
  69. data/lib/clucumber/vendor/lift/dev/copy-file.lisp +117 -0
  70. data/lib/clucumber/vendor/lift/dev/introspection.lisp +232 -0
  71. data/lib/clucumber/vendor/lift/dev/lift-interface.lisp +56 -0
  72. data/lib/clucumber/vendor/lift/dev/lift-notes.lisp +202 -0
  73. data/lib/clucumber/vendor/lift/dev/lift-randomized.lisp +45 -0
  74. data/lib/clucumber/vendor/lift/dev/lift.lisp +2383 -0
  75. data/lib/clucumber/vendor/lift/dev/macros.lisp +229 -0
  76. data/lib/clucumber/vendor/lift/dev/measuring.lisp +156 -0
  77. data/lib/clucumber/vendor/lift/dev/packages.lisp +161 -0
  78. data/lib/clucumber/vendor/lift/dev/port.lisp +151 -0
  79. data/lib/clucumber/vendor/lift/dev/prototypes.lisp +282 -0
  80. data/lib/clucumber/vendor/lift/dev/random-testing.lisp +124 -0
  81. data/lib/clucumber/vendor/lift/dev/report-locations.lisp +13 -0
  82. data/lib/clucumber/vendor/lift/dev/reports.lisp +916 -0
  83. data/lib/clucumber/vendor/lift/dev/utilities.lisp +242 -0
  84. data/lib/clucumber/vendor/lift/docs/package.lisp +6 -0
  85. data/lib/clucumber/vendor/lift/docs/setup.lisp +17 -0
  86. data/lib/clucumber/vendor/lift/examples/basic-examples.lisp +289 -0
  87. data/lib/clucumber/vendor/lift/examples/random-testing.lisp +32 -0
  88. data/lib/clucumber/vendor/lift/lift-documentation.asd +28 -0
  89. data/lib/clucumber/vendor/lift/lift-test.asd +35 -0
  90. data/lib/clucumber/vendor/lift/lift.asd +77 -0
  91. data/lib/clucumber/vendor/lift/test/equality-tests.lisp +40 -0
  92. data/lib/clucumber/vendor/lift/test/finding-tests.lisp +32 -0
  93. data/lib/clucumber/vendor/lift/test/lift-test.lisp +783 -0
  94. data/lib/clucumber/vendor/lift/test/order-of-operations.lisp +54 -0
  95. data/lib/clucumber/vendor/lift/test/packages.lisp +17 -0
  96. data/lib/clucumber/vendor/lift/test/test-config-files.lisp +20 -0
  97. data/lib/clucumber/vendor/lift/test/test-dynamic-variables.lisp +57 -0
  98. data/lib/clucumber/vendor/lift/test/test-maximum-problems.lisp +74 -0
  99. data/lib/clucumber/vendor/lift/test/test-prototypes.lisp +278 -0
  100. data/lib/clucumber/vendor/lift/test/test-timeout.lisp +37 -0
  101. data/lib/clucumber/vendor/lift/test/tests-in-progress.lisp +62 -0
  102. data/lib/clucumber/vendor/lift/test/testsuite-expects.lisp +60 -0
  103. data/lib/clucumber/vendor/lift/timeout/package.lisp +13 -0
  104. data/lib/clucumber/vendor/lift/timeout/with-timeout.lisp +123 -0
  105. data/lib/clucumber/vendor/lift/website/stuff/Temp.lisp +34 -0
  106. data/lib/clucumber/vendor/st-json/st-json.asd +3 -0
  107. data/lib/clucumber/vendor/st-json/st-json.lisp +310 -0
  108. data/lib/clucumber/vendor/trivial-backtrace/dev/backtrace.lisp +127 -0
  109. data/lib/clucumber/vendor/trivial-backtrace/dev/fallback.lisp +10 -0
  110. data/lib/clucumber/vendor/trivial-backtrace/dev/map-backtrace.lisp +103 -0
  111. data/lib/clucumber/vendor/trivial-backtrace/dev/mucking.lisp +75 -0
  112. data/lib/clucumber/vendor/trivial-backtrace/dev/packages.lisp +13 -0
  113. data/lib/clucumber/vendor/trivial-backtrace/dev/utilities.lisp +104 -0
  114. data/lib/clucumber/vendor/trivial-backtrace/test/packages.lisp +5 -0
  115. data/lib/clucumber/vendor/trivial-backtrace/test/test-setup.lisp +4 -0
  116. data/lib/clucumber/vendor/trivial-backtrace/test/tests.lisp +16 -0
  117. data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace-test.asd +22 -0
  118. data/lib/clucumber/vendor/trivial-backtrace/trivial-backtrace.asd +35 -0
  119. data/lib/clucumber/vendor/usocket/backend/allegro.lisp +199 -0
  120. data/lib/clucumber/vendor/usocket/backend/armedbear.lisp +491 -0
  121. data/lib/clucumber/vendor/usocket/backend/clisp.lisp +260 -0
  122. data/lib/clucumber/vendor/usocket/backend/cmucl.lisp +266 -0
  123. data/lib/clucumber/vendor/usocket/backend/lispworks.lisp +741 -0
  124. data/lib/clucumber/vendor/usocket/backend/mcl.lisp +369 -0
  125. data/lib/clucumber/vendor/usocket/backend/openmcl.lisp +206 -0
  126. data/lib/clucumber/vendor/usocket/backend/sbcl.lisp +424 -0
  127. data/lib/clucumber/vendor/usocket/backend/scl.lisp +261 -0
  128. data/lib/clucumber/vendor/usocket/condition.lisp +227 -0
  129. data/lib/clucumber/vendor/usocket/package.lisp +82 -0
  130. data/lib/clucumber/vendor/usocket/server.lisp +45 -0
  131. data/lib/clucumber/vendor/usocket/test/package.lisp +13 -0
  132. data/lib/clucumber/vendor/usocket/test/test-usocket.lisp +166 -0
  133. data/lib/clucumber/vendor/usocket/usocket-test.asd +26 -0
  134. data/lib/clucumber/vendor/usocket/usocket.asd +37 -0
  135. data/lib/clucumber/vendor/usocket/usocket.lisp +542 -0
  136. data/lib/clucumber/vendor/usocket/vendor/kqueue.lisp +1 -0
  137. data/lib/clucumber/vendor/usocket/vendor/split-sequence.lisp +245 -0
  138. data/lib/clucumber.rb +29 -7
  139. metadata +151 -5
@@ -0,0 +1,135 @@
1
+ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-
2
+ ;;; $Header: /usr/local/cvsrep/cl-unicode/alias.lisp,v 1.8 2008/07/21 20:04:13 edi Exp $
3
+
4
+ ;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
5
+
6
+ ;;; Redistribution and use in source and binary forms, with or without
7
+ ;;; modification, are permitted provided that the following conditions
8
+ ;;; are met:
9
+
10
+ ;;; * Redistributions of source code must retain the above copyright
11
+ ;;; notice, this list of conditions and the following disclaimer.
12
+
13
+ ;;; * Redistributions in binary form must reproduce the above
14
+ ;;; copyright notice, this list of conditions and the following
15
+ ;;; disclaimer in the documentation and/or other materials
16
+ ;;; provided with the distribution.
17
+
18
+ ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19
+ ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
+ ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
+ ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22
+ ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24
+ ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
+ ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26
+ ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+
30
+ (in-package :cl-unicode)
31
+
32
+ (defvar *alias-map*
33
+ '(("L" . "Letter")
34
+ ("LC" . "CasedLetter")
35
+ ("Lu" . "UppercaseLetter")
36
+ ("Ll" . "LowercaseLetter")
37
+ ("Lt" . "TitlecaseLetter")
38
+ ("Lm" . "ModifierLetter")
39
+ ("Lo" . "OtherLetter")
40
+ ("M" . "Mark")
41
+ ("Mn" . "NonspacingMark")
42
+ ("Mc" . "SpacingMark")
43
+ ("Me" . "EnclosingMark")
44
+ ("N" . "Number")
45
+ ("Nd" . "DecimalNumber")
46
+ ("Nl" . "LetterNumber")
47
+ ("No" . "OtherNumber")
48
+ ("P" . "Punctuation")
49
+ ("Pc" . "ConnectorPunctuation")
50
+ ("Pd" . "DashPunctuation")
51
+ ("Ps" . "OpenPunctuation")
52
+ ("Pe" . "ClosePunctuation")
53
+ ("Pi" . "InitialPunctuation")
54
+ ("Pf" . "FinalPunctuation")
55
+ ("Po" . "OtherPunctuation")
56
+ ("S" . "Symbol")
57
+ ("Sm" . "MathSymbol")
58
+ ("Sc" . "CurrencySymbol")
59
+ ("Sk" . "ModifierSymbol")
60
+ ("So" . "OtherSymbol")
61
+ ("Z" . "Separator")
62
+ ("Zs" . "SpaceSeparator")
63
+ ("Zl" . "LineSeparator")
64
+ ("Zp" . "ParagraphSeparator")
65
+ ("C" . "Other")
66
+ ("Cc" . "Control")
67
+ ("Cf" . "Format")
68
+ ("Cs" . "Surrogate")
69
+ ("Co" . "PrivateUse")
70
+ ("Cn" . "Unassigned")
71
+ ("Cn" . "NoncharacterCodePoint")))
72
+
73
+ (defvar *bidi-alias-map*
74
+ '(("L" . "LeftToRight")
75
+ ("LRE" . "LeftToRightEmbedding")
76
+ ("LRO" . "LeftToRightOverride")
77
+ ("R" . "RightToLeft")
78
+ ("AL" . "RightToLeftArabic")
79
+ ("RLE" . "RightToLeftEmbedding")
80
+ ("RLO" . "RightToLeftOverride")
81
+ ("PDF" . "PopDirectionalFormat")
82
+ ("EN" . "EuropeanNumber")
83
+ ("ES" . "EuropeanNumberSeparator")
84
+ ("ET" . "EuropeanNumberTerminator")
85
+ ("AN" . "ArabicNumber")
86
+ ("CS" . "CommonNumberSeparator")
87
+ ("NSM" . "NonSpacingMark")
88
+ ("BN" . "BoundaryNeutral")
89
+ ("B" . "ParagraphSeparator")
90
+ ("S" . "SegmentSeparator")
91
+ ("WS" . "Whitespace")
92
+ ("ON" . "OtherNeutral")))
93
+
94
+ (defun create-alias (new-name old-name &optional only-if-unambiguous)
95
+ (setq new-name (canonicalize-name new-name)
96
+ old-name (canonicalize-name old-name))
97
+ (unless only-if-unambiguous
98
+ (assert (null (gethash new-name *property-map*)) (new-name)
99
+ "There is already a property named ~S." new-name))
100
+ (when (gethash new-name *property-map*)
101
+ (return-from create-alias))
102
+ (assert (gethash old-name *property-map*) (old-name)
103
+ "There is no property named ~S." old-name)
104
+ (setf (gethash new-name *property-map*)
105
+ (gethash old-name *property-map*)))
106
+
107
+ (defun create-aliases ()
108
+ (loop for (old-name . new-name) in *alias-map*
109
+ do (create-alias new-name old-name))
110
+ (loop for (old-name . new-name) in *bidi-alias-map*
111
+ do (create-alias (format nil "BidiClass:~A" new-name)
112
+ (format nil "BidiClass:~A" old-name)))
113
+ (loop for name in (scripts)
114
+ do (create-alias (format nil "Script:~A" name) name))
115
+ (loop for name in (loop for name being the hash-keys of *property-map*
116
+ collect name)
117
+ unless (ppcre:scan ":" name)
118
+ do (create-alias (format nil "Is~A" name) name))
119
+ (loop for name in (code-blocks)
120
+ do (create-alias (format nil "In~A" name) (format nil "Block:~A" name)))
121
+ (loop for name in (code-blocks)
122
+ do (create-alias name (format nil "Block:~A" name) t))
123
+ (loop for name in (bidi-classes)
124
+ do (create-alias name (format nil "BidiClass:~A" name) t))
125
+ (loop for (old-name . new-name) in *bidi-alias-map*
126
+ do (create-alias new-name (format nil "BidiClass:~A" old-name) t)))
127
+
128
+ (defun build-all-property-tests ()
129
+ (clrhash *property-map*)
130
+ (clrhash *property-tests*)
131
+ (install-tests)
132
+ (build-derived-test-functions)
133
+ (create-aliases))
134
+
135
+ (build-all-property-tests)
@@ -0,0 +1,412 @@
1
+ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-
2
+ ;;; $Header: /usr/local/cvsrep/cl-unicode/api.lisp,v 1.31 2008/07/24 14:46:20 edi Exp $
3
+
4
+ ;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
5
+
6
+ ;;; Redistribution and use in source and binary forms, with or without
7
+ ;;; modification, are permitted provided that the following conditions
8
+ ;;; are met:
9
+
10
+ ;;; * Redistributions of source code must retain the above copyright
11
+ ;;; notice, this list of conditions and the following disclaimer.
12
+
13
+ ;;; * Redistributions in binary form must reproduce the above
14
+ ;;; copyright notice, this list of conditions and the following
15
+ ;;; disclaimer in the documentation and/or other materials
16
+ ;;; provided with the distribution.
17
+
18
+ ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19
+ ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
+ ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
+ ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22
+ ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24
+ ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
+ ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26
+ ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+
30
+ (in-package :cl-unicode)
31
+
32
+ (defgeneric unicode-name (c)
33
+ (:documentation "Returns the Unicode name of a character as a string
34
+ or NIL if there is no name for that particular character. C can be
35
+ the character's code point \(a positive integer) or a \(Lisp)
36
+ character assuming its character code is also its Unicode code
37
+ point.")
38
+ (:method ((char character))
39
+ (unicode-name (char-code char)))
40
+ (:method ((code-point integer))
41
+ (or (gethash code-point *code-points-to-names*)
42
+ (maybe-compute-hangul-syllable-name code-point)
43
+ (maybe-compute-cjk-name code-point))))
44
+
45
+ (defgeneric unicode1-name (c)
46
+ (:documentation "Returns the Unicode 1.0 name of a character as a
47
+ string or NIL if there is no name for that particular character. This
48
+ name is only non-NIL if it is significantly different from the Unicode
49
+ name (see UNICODE-NAME). For control characters, sometimes the ISO
50
+ 6429 name is returned instead.
51
+
52
+ C can be the character's code point \(a positive integer) or a \(Lisp)
53
+ character assuming its character code is also its Unicode code
54
+ point.")
55
+ (:method ((char character))
56
+ (unicode1-name (char-code char)))
57
+ (:method ((code-point integer))
58
+ (values (gethash code-point *code-points-to-unicode1-names*))))
59
+
60
+ (defun character-named (name &key
61
+ want-code-point-p
62
+ (try-unicode1-names-p *try-unicode1-names-p*)
63
+ (try-abbreviations-p *try-abbreviations-p*)
64
+ (scripts-to-try *scripts-to-try*)
65
+ (try-hex-notation-p *try-hex-notation-p*)
66
+ (try-lisp-names-p *try-lisp-names-p*))
67
+ "Returns the character which has the name NAME \(a string) by
68
+ looking up the Unicode name \(see UNICODE-NAME).
69
+
70
+ If TRY-UNICODE1-NAMES is true, the Unicode 1.0 name \(see
71
+ UNICODE1-NAME) will be used as a fallback.
72
+
73
+ If TRY-ABBREVIATIONS-P is true, NAME is treated as an abbreviation as
74
+ follows: If NAME contains a colon, it is interpreted as
75
+ \"<script>:<short-name>\" and the function tries to look up, in turn,
76
+ the characters named \"<script> <size> LETTER <short-name>\",
77
+ \"<script> LETTER <short-name>\", and \"<script> <short-name>\" where
78
+ <size> is \"SMALL\" if none of the characters in <short-name> is
79
+ uppercase, \"CAPITAL\" otherwise. If NAME does not contain a colon,
80
+ the same algorithm as above is tried with NAME instead of <short-name>
81
+ and each element of the list of strings SCRIPTS-TO-TRY as <string>.
82
+ \(SCRIPTS-TO-TRY can also be a single string which is interpreted as a
83
+ one-element list.)
84
+
85
+ If TRY-HEX-NOTATION-P is true, NAME can be of the form \"U+<x>\" where
86
+ <x> is a hexadecimal number with four to six digits with the obvious
87
+ meaning.
88
+
89
+ If TRY-LISP-NAMES-P is true, the function returns the character with
90
+ the character name NAME \(if there is one) or, if NAME is exactly one
91
+ character, it returns this character.
92
+
93
+ All the keyword-governed alternatives are tried in the order they're
94
+ described here.
95
+
96
+ See also *TRY-UNICODE1-NAMES-P*, *TRY-ABBREVIATIONS-P*,
97
+ *SCRIPTS-TO-TRY*, *TRY-HEX-NOTATION-P*, and *TRY-LISP-NAMES-P*.
98
+
99
+ Returns the code point instead of the character if WANT-CODE-POINT-P
100
+ is true. This can be especially useful for Lisp implementations where
101
+ CHAR-CODE-LIMIT is smaller than +CODE-POINT-LIMIT+."
102
+ (when (stringp scripts-to-try)
103
+ (setq scripts-to-try (list scripts-to-try)))
104
+ (let* ((canonicalized-name (canonicalize-name name))
105
+ (code-point (or (gethash canonicalized-name *names-to-code-points*)
106
+ (maybe-find-hangul-syllable-code-point canonicalized-name)
107
+ (maybe-find-cjk-code-point canonicalized-name)
108
+ (and try-unicode1-names-p
109
+ (gethash canonicalized-name *unicode1-names-to-code-points*))
110
+ (and try-abbreviations-p
111
+ (let ((*try-unicode1-names-p* try-unicode1-names-p)
112
+ (*try-abbreviations-p* nil))
113
+ (try-abbreviations name scripts-to-try)))
114
+ (and try-hex-notation-p
115
+ (ppcre:register-groups-bind ((#'parse-hex code-point))
116
+ ("^U\\+([a-zA-Z0-9]{4,6})$" name)
117
+ (and (< code-point +code-point-limit+)
118
+ code-point)))
119
+ (and try-lisp-names-p
120
+ (case (length name)
121
+ (1 (char-code (char name 0)))
122
+ (otherwise (let ((char (name-char name)))
123
+ (and char (char-code char)))))))))
124
+ (if want-code-point-p
125
+ code-point
126
+ (and code-point (code-char code-point)))))
127
+
128
+ (defgeneric script (c)
129
+ (:documentation "Returns the script of a character as a string or
130
+ NIL if there is no script for that particular character. C can be the
131
+ character's code point \(a positive integer) or a \(Lisp) character
132
+ assuming its character code is also its Unicode code point. The
133
+ second return value \(if there is one) is the property symbol of the
134
+ script.
135
+
136
+ See also SCRIPTS.")
137
+ (:method ((char character))
138
+ (script (char-code char))))
139
+
140
+ (defgeneric code-block (c)
141
+ (:documentation "Returns the block of a character as a string or NIL
142
+ if there is no block for that particular character. C can be the
143
+ character's code point \(a positive integer) or a \(Lisp) character
144
+ assuming its character code is also its Unicode code point. The
145
+ second return value \(if there is one) is the property symbol of the
146
+ block.
147
+
148
+ See also CODE-BLOCKS.")
149
+ (:method ((char character))
150
+ (code-block (char-code char))))
151
+
152
+ (defgeneric age (c)
153
+ (:documentation "Returns the \"age\" of a character or NIL if there
154
+ is no age entry for that particular character. The age of a character
155
+ is a list of two integers denoting the major and minor number of the
156
+ Unicode version where the character first appeared. C can be the
157
+ character's code point \(a positive integer) or a \(Lisp) character
158
+ assuming its character code is also its Unicode code point.")
159
+ (:method ((char character))
160
+ (age (char-code char))))
161
+
162
+ (defgeneric general-category (c)
163
+ (:documentation "Returns the general category of a character as a
164
+ string. C can be the character's code point \(a positive integer) or
165
+ a \(Lisp) character assuming its character code is also its Unicode
166
+ code point. The second return value is the property symbol of the
167
+ category.
168
+
169
+ See also GENERAL-CATEGORIES.")
170
+ (:method :around (c)
171
+ (multiple-value-bind (name symbol)
172
+ (call-next-method)
173
+ (cond (name (values name symbol))
174
+ (t (values "Cn" '#.(property-symbol "Cn"))))))
175
+ (:method ((char character))
176
+ (general-category (char-code char))))
177
+
178
+ (defgeneric bidi-class (c)
179
+ (:documentation "Returns the bidirectional \(\"Bidi\") class of a
180
+ character as a string or NIL if there is no bidirectional class for
181
+ that particular character. C can be the character's code point \(a
182
+ positive integer) or a \(Lisp) character assuming its character code
183
+ is also its Unicode code point. The second return value \(if there is
184
+ one) is the property symbol of the class.
185
+
186
+ See also BIDI-CLASSES")
187
+ (:method ((char character))
188
+ (bidi-class (char-code char))))
189
+
190
+ (defun bidi-mirroring-glyph (c &key want-code-point-p)
191
+ "Returns the Bidi mirroring glyph for a character if the character
192
+ has the \"BidiMirrored\" property and an appropriate mirroring glyph
193
+ is defined. C can be the character's code point \(a positive integer)
194
+ or a \(Lisp) character assuming its character code is also its Unicode
195
+ code point.
196
+
197
+ Returns the code point instead of the character if WANT-CODE-POINT-P
198
+ is true. This can be especially useful for Lisp implementations where
199
+ CHAR-CODE-LIMIT is smaller than +CODE-POINT-LIMIT+."
200
+ (let ((code-point (bidi-mirroring-glyph% (ensure-code-point c))))
201
+ (cond ((and code-point (not want-code-point-p))
202
+ (code-char code-point))
203
+ (t code-point))))
204
+
205
+ (defgeneric numeric-type (c)
206
+ (:documentation "Returns the numeric type of a character as a string
207
+ or NIL if that particular character has no numeric type. C can be the
208
+ character's code point \(a positive integer) or a \(Lisp) character
209
+ assuming its character code is also its Unicode code point. The
210
+ second return value \(if there is one) is the property symbol of the
211
+ numeric type.")
212
+ (:method ((char character))
213
+ (numeric-type (char-code char))))
214
+
215
+ (defgeneric numeric-value (c)
216
+ (:documentation "Returns the numeric value of a character as a Lisp
217
+ rational or NIL \(for NaN). C can be the character's code point \(a
218
+ positive integer) or a \(Lisp) character assuming its character code
219
+ is also its Unicode code point.")
220
+ (:method ((char character))
221
+ (numeric-value (char-code char))))
222
+
223
+ (defgeneric combining-class (c)
224
+ (:documentation "Returns the combining class of a character as a
225
+ non-negative integer. C can be the character's code point \(a
226
+ positive integer) or a \(Lisp) character assuming its character code
227
+ is also its Unicode code point.")
228
+ (:method :around (c)
229
+ (or (call-next-method) 0))
230
+ (:method ((char character))
231
+ (combining-class (char-code char))))
232
+
233
+ (defgeneric has-binary-property (c property)
234
+ (:documentation "Checks whether a character has the binary property
235
+ PROPERTY. C can be the character's code point \(a positive integer)
236
+ or a \(Lisp) character assuming its character code is also its Unicode
237
+ code point. PROPERTY can be a string naming the property or the
238
+ corresponding property symbol. If a true value is returned, it is the
239
+ property symbol.
240
+
241
+ See also BINARY-PROPERTIES.")
242
+ (:method ((char character) property)
243
+ (has-binary-property (char-code char) property))
244
+ (:method (char (property-name string))
245
+ (has-binary-property char (property-symbol property-name)))
246
+ (:method ((code-point integer) (property-symbol symbol))
247
+ (find property-symbol (binary-props code-point) :test #'eq)))
248
+
249
+ (defun uppercase-mapping (c &key want-code-point-p)
250
+ "Returns the simple uppercase mapping of a character. C can be the
251
+ character's code point \(a positive integer) or a \(Lisp) character
252
+ assuming its character code is also its Unicode code point. Returns
253
+ the character itself if no such mapping is explicitly defined. Note
254
+ that case mapping only makes sense for characters with the \"LC\"
255
+ property.
256
+
257
+ Returns the code point instead of the character if WANT-CODE-POINT-P
258
+ is true. This can be especially useful for Lisp implementations where
259
+ CHAR-CODE-LIMIT is smaller than +CODE-POINT-LIMIT+."
260
+ (mapping c 0 want-code-point-p))
261
+
262
+ (defun lowercase-mapping (c &key want-code-point-p)
263
+ "Returns the simple lowercase mapping of a character. C can be the
264
+ character's code point \(a positive integer) or a \(Lisp) character
265
+ assuming its character code is also its Unicode code point. Returns
266
+ the character itself if no such mapping is explicitly defined. Note
267
+ that case mapping only makes sense for characters with the \"LC\"
268
+ property.
269
+
270
+ Returns the code point instead of the character if WANT-CODE-POINT-P
271
+ is true. This can be especially useful for Lisp implementations where
272
+ CHAR-CODE-LIMIT is smaller than +CODE-POINT-LIMIT+."
273
+ (mapping c 1 want-code-point-p))
274
+
275
+ (defun titlecase-mapping (c &key want-code-point-p)
276
+ "Returns the simple titlecase mapping of a character. C can be the
277
+ character's code point \(a positive integer) or a \(Lisp) character
278
+ assuming its character code is also its Unicode code point. Returns
279
+ the character itself if no such mapping is explicitly defined. Note
280
+ that case mapping only makes sense for characters with the \"LC\"
281
+ property.
282
+
283
+ Returns the code point instead of the character if WANT-CODE-POINT-P
284
+ is true. This can be especially useful for Lisp implementations where
285
+ CHAR-CODE-LIMIT is smaller than +CODE-POINT-LIMIT+."
286
+ (mapping c 2 want-code-point-p))
287
+
288
+ (defun general-categories ()
289
+ "Returns a sorted list of all general categories known to
290
+ CL-UNICODE. These are the possible return values of
291
+ GENERAL-CATEGORY."
292
+ (sort (mapcar 'property-name *general-categories*) 'string-lessp))
293
+
294
+ (defun scripts ()
295
+ "Returns a sorted list of all scripts known to CL-UNICODE. These
296
+ are the possible return values of SCRIPT."
297
+ (sort (mapcar 'property-name *scripts*) 'string-lessp))
298
+
299
+ (defun code-blocks ()
300
+ "Returns a sorted list of all blocks known to CL-UNICODE. These are
301
+ the possible return values of CODE-BLOCK."
302
+ (sort (mapcar 'property-name *code-blocks*) 'string-lessp))
303
+
304
+ (defun binary-properties ()
305
+ "Returns a sorted list of all binary properties known to CL-UNICODE.
306
+ These are the allowed second arguments \(modulo canonicalization) to
307
+ HAS-BINARY-PROPERTY."
308
+ (sort (mapcar 'property-name *binary-properties*) 'string-lessp))
309
+
310
+ (defun bidi-classes ()
311
+ "Returns a sorted list of all Bidi classes known to CL-UNICODE.
312
+ These are the possible return values of BIDI-CLASS."
313
+ (sort (mapcar 'property-name *bidi-classes*) 'string-lessp))
314
+
315
+ (defun recognized-properties (&optional all)
316
+ "Returns a list of all property names known to CL-UNICODE. These
317
+ are the allowed second arguments \(modulo canonicalization) to
318
+ HAS-PROPERTY. If ALL is true, known aliases \(like \"Letter\" for
319
+ \"L\") are also included."
320
+ (sort (cond (all (loop for key being the hash-keys of *property-map*
321
+ collect key))
322
+ (t (mapcar 'property-name
323
+ (loop for key being the hash-keys of *property-tests*
324
+ collect key))))
325
+ 'string-lessp))
326
+
327
+ (defgeneric property-test (property &key errorp)
328
+ (:documentation "Returns a unary function which can test code points
329
+ or Lisp characters for the property PROPERTY. PROPERTY is interpreted
330
+ as in HAS-PROPERTY and PROPERTY-TEST is actually used internally by
331
+ HAS-PROPERTY but might come in handy if you need a faster way to test
332
+ for PROPERTY \(as you're saving the time to look up the property).
333
+
334
+ Returns NIL if no property named PROPERTY was found or signals an
335
+ error if ERRORP is true.")
336
+ (:method ((property-name string) &key errorp)
337
+ (property-test (or (gethash (canonicalize-name property-name) *property-map*)
338
+ (and errorp (signal-unicode-error "There is no property called ~S."
339
+ property-name)))))
340
+ (:method ((property-symbol symbol) &key errorp)
341
+ (or (gethash property-symbol *property-tests*)
342
+ (and errorp (signal-unicode-error "There is no property called ~S." property-symbol)))))
343
+
344
+ (defun has-property (c property)
345
+ "Checks whether a character has the named property PROPERTY.
346
+ PROPERTY can be a string naming a property \(which will be used for
347
+ look-up after canonicalization) or it can be a property symbol \(see
348
+ PROPERTY-SYMBOL). C can be the character's code point \(a positive
349
+ integer) or a \(Lisp) character assuming its character code is also
350
+ its Unicode code point.
351
+
352
+ \"Properties\" in the sense of CL-UNICODE can be names of general
353
+ categories, scripts, blocks, binary properties, or Bidi classes,
354
+ amongst other things. If there are a block and a script with the same
355
+ name \(like, say, \"Cyrillic\"), the bare name denotes the script.
356
+ Prepend \"Block:\" to the name to refer to the block. \(You can also
357
+ prepend \"Script:\" to refer to the script unambiguously.) Names of
358
+ Bidi classes must be prepended with \"BidiClass:\" if there's a
359
+ potential for ambiguity.
360
+
361
+ This function also recognizes several aliases for properties \(like
362
+ \"Symbol\" for \"S\") and you can, as in Perl, prepend block names
363
+ with \"In\" instead of \"Block:\" and most other properties with
364
+ \"Is\". See RECOGNIZED-PROPERTIES.
365
+
366
+ See also PROPERTY-TEST."
367
+ (funcall (property-test property :errorp t) c))
368
+
369
+ (defun list-all-characters (property &key want-code-point-p)
370
+ "Lists all character \(ordered by code point) which have the
371
+ property PROPERTY where PROPERTY is interpreted as in HAS-PROPERTY.
372
+ If WANT-CODE-POINT-P is true, a list of code points instead of a list
373
+ of characters is returned. \(If CHAR-CODE-LIMIT is smaller than
374
+ +CODE-POINT-LIMIT+ in your Lisp implementation, the list of code
375
+ points can actually be longer than the list of characters.)."
376
+ (loop with test-function = (property-test property :errorp t)
377
+ for code-point below (if want-code-point-p +code-point-limit+ char-code-limit)
378
+ for thing = (if want-code-point-p code-point (code-char code-point))
379
+ when (and thing (funcall (the function test-function) code-point))
380
+ collect thing))
381
+
382
+ (defmacro enable-alternative-character-syntax ()
383
+ "Enables an alternative Lisp character syntax which /replaces/ the
384
+ usual syntax: After a sharpsign and a backslash have been read, at
385
+ least one more character is read. Reading then continues as long as
386
+ ASCII letters, digits, underlines, hyphens, colons, or plus signs are
387
+ read. The resulting string is then used as input to CHARACTER-NAMED
388
+ to produce a character.
389
+
390
+ This macro expands into an EVAL-WHEN so that if you use it as a
391
+ top-level form in a file to be loaded and/or compiled it'll do what
392
+ you expect. Technically, this'll push the current readtable on a
393
+ stack so that matching calls of this macro and
394
+ DISABLE-ALTERNATIVE-CHARACTER-SYNTAX can be nested.
395
+
396
+ Note that by default the alternative character syntax is not enabled
397
+ after loading CL-UNICODE."
398
+ `(eval-when (:compile-toplevel :load-toplevel :execute)
399
+ (%enable-alternative-character-syntax)))
400
+
401
+ (defmacro disable-alternative-character-syntax ()
402
+ "Restores the readtable which was active before the last call to
403
+ ENABLE-ALTERNATIVE-CHARACTER-SYNTAX. If there was no such call, the
404
+ standard readtable is used.
405
+
406
+ This macro expands into an EVAL-WHEN so that if you use it as a
407
+ top-level form in a file to be loaded and/or compiled it'll do what
408
+ you expect. Technically, this'll pop a readtable from the stack
409
+ described in ENABLE-ALTERNATIVE-CHARACTER-SYNTAX so that matching
410
+ calls of these macros can be nested."
411
+ `(eval-when (:compile-toplevel :load-toplevel :execute)
412
+ (%disable-alternative-character-syntax)))
@@ -0,0 +1,133 @@
1
+ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-
2
+ ;;; $Header: /usr/local/cvsrep/cl-unicode/build/char-info.lisp,v 1.5 2008/07/21 14:23:09 edi Exp $
3
+
4
+ ;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
5
+
6
+ ;;; Redistribution and use in source and binary forms, with or without
7
+ ;;; modification, are permitted provided that the following conditions
8
+ ;;; are met:
9
+
10
+ ;;; * Redistributions of source code must retain the above copyright
11
+ ;;; notice, this list of conditions and the following disclaimer.
12
+
13
+ ;;; * Redistributions in binary form must reproduce the above
14
+ ;;; copyright notice, this list of conditions and the following
15
+ ;;; disclaimer in the documentation and/or other materials
16
+ ;;; provided with the distribution.
17
+
18
+ ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19
+ ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
+ ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
+ ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22
+ ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24
+ ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
+ ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26
+ ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+
30
+ (in-package :cl-unicode)
31
+
32
+ (defclass char-info ()
33
+ ((code-point :initarg :code-point
34
+ :reader code-point
35
+ :type fixnum
36
+ :documentation "The code point of the character. This
37
+ is redundant information, but convenient.")
38
+ (name :initarg :name
39
+ :initform nil
40
+ :reader name
41
+ :type (or string null)
42
+ :documentation "The name of the character - a string.")
43
+ (script :initform nil
44
+ :accessor script*
45
+ :type (or symbol null)
46
+ :documentation "The script the character belongs to - a
47
+ property symbol.")
48
+ (code-block :initform nil
49
+ :accessor code-block*
50
+ :type (or symbol null)
51
+ :documentation "The block the character belongs to - a
52
+ property symbol.")
53
+ (unicode1-name :initarg :unicode1-name
54
+ :initform nil
55
+ :reader unicode1-name
56
+ :type (or string null)
57
+ :documentation "The Unicode 1.0 name of the
58
+ character - a string.")
59
+ (age :initform nil
60
+ :accessor age*
61
+ :type list
62
+ :documentation "The Unicode version this character first
63
+ appeared in, a cons of two integers which denote the major and minor
64
+ version.")
65
+ (general-category :initarg :general-category
66
+ ;; this is the default for unassigned characters
67
+ ;; - see READ-BINARY-PROPERTIES
68
+ :initform (property-symbol "Cn")
69
+ :reader general-category*
70
+ :type symbol
71
+ :documentation "The general category of this
72
+ character - a property symbol.")
73
+ (bidi-class :initarg :bidi-class
74
+ ;; will be defaulted later, see
75
+ ;; SET-DEFAULT-BIDI-CLASSES
76
+ :initform nil
77
+ :accessor bidi-class*
78
+ :type symbol
79
+ :documentation "The Bidi class of the character - a
80
+ property symbol.")
81
+ (bidi-mirroring-glyph :initform nil
82
+ :accessor bidi-mirroring-glyph*
83
+ :type (or fixnum null)
84
+ :documentation "The code point of the mirror
85
+ image of the character, if there is one.")
86
+ (binary-props :initarg :binary-props
87
+ :initform nil
88
+ :accessor binary-props*
89
+ :type list
90
+ :documentation "A list of property symbols denoting
91
+ the binary properties of the character.")
92
+ (combining-class :initarg :combining-class
93
+ ;; the default combining class
94
+ :initform 0
95
+ :reader combining-class*
96
+ :type fixnum
97
+ :documentation "The combining class of the
98
+ character - an integer.")
99
+ (numeric-type :initarg :numeric-type
100
+ :initform nil
101
+ :reader numeric-type*
102
+ :type symbol
103
+ :documentation "The numeric type \(one of
104
+ \"Decimal\", \"Digit\", or \"Numeric\") of the character if it has one
105
+ - a property symbol.")
106
+ (numeric-value :initarg :numeric-value
107
+ :initform nil
108
+ :reader numeric-value*
109
+ :type (or rational null)
110
+ :documentation "The numeric value of the character
111
+ if it has one - a Lisp rational.")
112
+ (uppercase-mapping :initarg :uppercase-mapping
113
+ :initform nil
114
+ :reader uppercase-mapping*
115
+ :type (or fixnum null)
116
+ :documentation "The simple uppercase mapping of
117
+ the character \(as a code point) if explicitly specified.")
118
+ (lowercase-mapping :initarg :lowercase-mapping
119
+ :initform nil
120
+ :reader lowercase-mapping*
121
+ :type (or fixnum null)
122
+ :documentation "The simple lowercase mapping of
123
+ the character \(as a code point) if explicitly specified.")
124
+ (titlecase-mapping :initarg :titlecase-mapping
125
+ :initform nil
126
+ :reader titlecase-mapping*
127
+ :type (or fixnum null)
128
+ :documentation "The simple titlecase mapping of
129
+ the character \(as a code point) if explicitly specified."))
130
+ (:documentation "A CHAR-INFO object is a datastructure which is used
131
+ to \(temporarily) hold the information about one character as gathered
132
+ from parsing the Unicode data files - see the code in read.lisp."))
133
+