glaemscribe 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +19 -0
- data/bin/glaemscribe +307 -0
- data/glaemresources/charsets/cirth_ds.cst +205 -0
- data/glaemresources/charsets/sarati_eldamar.cst +256 -0
- data/glaemresources/charsets/tengwar_ds.cst +318 -0
- data/glaemresources/charsets/unicode_gothic.cst +64 -0
- data/glaemresources/charsets/unicode_runes.cst +120 -0
- data/glaemresources/modes/adunaic.glaem +251 -0
- data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
- data/glaemresources/modes/blackspeech.glaem +260 -0
- data/glaemresources/modes/gothic.glaem +78 -0
- data/glaemresources/modes/khuzdul.glaem +141 -0
- data/glaemresources/modes/mercian.glaem +419 -0
- data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
- data/glaemresources/modes/quenya-sarati.glaem +320 -0
- data/glaemresources/modes/quenya.glaem +307 -0
- data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
- data/glaemresources/modes/sindarin-classical.glaem +276 -0
- data/glaemresources/modes/sindarin-daeron.glaem +182 -0
- data/glaemresources/modes/telerin.glaem +302 -0
- data/glaemresources/modes/valarin-sarati.glaem +210 -0
- data/glaemresources/modes/westron.glaem +340 -0
- data/glaemresources/modes/westsaxon.glaem +342 -0
- data/lib/api/charset.rb +84 -0
- data/lib/api/charset_parser.rb +55 -0
- data/lib/api/constants.rb +29 -0
- data/lib/api/debug.rb +36 -0
- data/lib/api/eval.rb +268 -0
- data/lib/api/fragment.rb +113 -0
- data/lib/api/glaeml.rb +200 -0
- data/lib/api/if_tree.rb +96 -0
- data/lib/api/mode.rb +112 -0
- data/lib/api/mode_parser.rb +314 -0
- data/lib/api/option.rb +64 -0
- data/lib/api/post_processor/reverse.rb +36 -0
- data/lib/api/pre_processor/downcase.rb +35 -0
- data/lib/api/pre_processor/elvish_numbers.rb +47 -0
- data/lib/api/pre_processor/rxsubstitute.rb +40 -0
- data/lib/api/pre_processor/substitute.rb +38 -0
- data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
- data/lib/api/resource_manager.rb +130 -0
- data/lib/api/rule.rb +99 -0
- data/lib/api/rule_group.rb +159 -0
- data/lib/api/sheaf.rb +70 -0
- data/lib/api/sheaf_chain.rb +86 -0
- data/lib/api/sheaf_chain_iterator.rb +108 -0
- data/lib/api/sub_rule.rb +40 -0
- data/lib/api/transcription_pre_post_processor.rb +118 -0
- data/lib/api/transcription_processor.rb +137 -0
- data/lib/api/transcription_tree_node.rb +91 -0
- data/lib/glaemscribe.rb +70 -0
- metadata +112 -0
@@ -0,0 +1,260 @@
|
|
1
|
+
\**
|
2
|
+
|
3
|
+
Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
the transcription of texts between writing systems, and more
|
5
|
+
specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
invented languages to some of his devised writing systems.
|
7
|
+
|
8
|
+
Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
|
10
|
+
This program is free software: you can redistribute it and/or modify
|
11
|
+
it under the terms of the GNU Affero General Public License as published by
|
12
|
+
the Free Software Foundation, either version 3 of the License, or
|
13
|
+
any later version.
|
14
|
+
|
15
|
+
This program is distributed in the hope that it will be useful,
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
GNU Affero General Public License for more details.
|
19
|
+
|
20
|
+
You should have received a copy of the GNU Affero General Public License
|
21
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
**\
|
24
|
+
|
25
|
+
\** BlackSpeech ring mode for glaemscribe (MAY BE INCOMPLETE) **\
|
26
|
+
|
27
|
+
\language "Black Speech"
|
28
|
+
\writing "Tengwar"
|
29
|
+
\mode "General Use"
|
30
|
+
\version "0.0.1"
|
31
|
+
\authors "Talagan (Benjamin Babut)"
|
32
|
+
|
33
|
+
\charset tengwar_ds true
|
34
|
+
|
35
|
+
\beg options
|
36
|
+
\option reverse_numbers true
|
37
|
+
\beg option numbers_base BASE_12
|
38
|
+
\value BASE_10 10
|
39
|
+
\value BASE_12 12
|
40
|
+
\end
|
41
|
+
\end
|
42
|
+
|
43
|
+
\beg preprocessor
|
44
|
+
\** Work exclusively downcase **\
|
45
|
+
\downcase
|
46
|
+
|
47
|
+
\** Simplify trema vowels **\
|
48
|
+
\substitute "ä" "a"
|
49
|
+
\substitute "ë" "e"
|
50
|
+
\substitute "ï" "i"
|
51
|
+
\substitute "ö" "o"
|
52
|
+
\substitute "ü" "u"
|
53
|
+
\substitute "ÿ" "y"
|
54
|
+
|
55
|
+
\** Dis-ambiguate long vowels **\
|
56
|
+
\rxsubstitute "(ā|â|aa)" "á"
|
57
|
+
\rxsubstitute "(ē|ê|ee)" "é"
|
58
|
+
\rxsubstitute "(ī|î|ii)" "í"
|
59
|
+
\rxsubstitute "(ō|ô|oo)" "ó"
|
60
|
+
\rxsubstitute "(ū|û|uu)" "ú"
|
61
|
+
|
62
|
+
\** Preprocess numbers **\
|
63
|
+
\elvish_numbers "\\eval numbers_base" "\\eval reverse_numbers"
|
64
|
+
\end
|
65
|
+
|
66
|
+
\beg processor
|
67
|
+
|
68
|
+
\beg rules litteral
|
69
|
+
{A} === a
|
70
|
+
{AA} === á
|
71
|
+
{E} === e
|
72
|
+
{EE} === é
|
73
|
+
{I} === i
|
74
|
+
{II} === í
|
75
|
+
{O} === o
|
76
|
+
{OO} === ó
|
77
|
+
{U} === u
|
78
|
+
{UU} === ú
|
79
|
+
|
80
|
+
{AI} === {A}{I} \** attested **\
|
81
|
+
{AU} === {A}{U} \** attested **\
|
82
|
+
{OI} === {O}{I} \** Not quite sure (dushgoi) **\
|
83
|
+
|
84
|
+
{K} === (c,k)
|
85
|
+
|
86
|
+
{VOWELS} === {A} * {E} * {I} * {O} * {U}
|
87
|
+
{LVOWELS} === {AA} * {EE} * {II} * {OO} * {UU}
|
88
|
+
|
89
|
+
\** Revers o and u **\
|
90
|
+
{TEHTA_XS} === A_TEHTA_XS * E_TEHTA_XS * I_TEHTA_XS * U_TEHTA_XS * O_TEHTA_XS
|
91
|
+
{TEHTA__S} === A_TEHTA_S * E_TEHTA_S * I_TEHTA_S * U_TEHTA_S * O_TEHTA_S
|
92
|
+
{TEHTA__L} === A_TEHTA_L * E_TEHTA_L * I_TEHTA_L * U_TEHTA_L * O_TEHTA_L
|
93
|
+
{TEHTA_XL} === A_TEHTA_XL * E_TEHTA_XL * I_TEHTA_XL * U_TEHTA_XL * O_TEHTA_XL
|
94
|
+
|
95
|
+
{DIPHTHONGS} === {AI} * {AU} * {OI}
|
96
|
+
{DIPHTHENGS} === YANTA A_TEHTA_L * URE A_TEHTA_L * YANTA U_TEHTA_L
|
97
|
+
|
98
|
+
{V_D_KER} === [ {VOWELS} * {DIPHTHONGS} ]
|
99
|
+
{V_D_IMG_XS} === [ {TEHTA_XS} * {DIPHTHENGS} ]
|
100
|
+
{V_D_IMG__S} === [ {TEHTA__L} * {DIPHTHENGS} ]
|
101
|
+
{V_D_IMG__L} === [ {TEHTA__S} * {DIPHTHENGS} ]
|
102
|
+
{V_D_IMG_XL} === [ {TEHTA_XL} * {DIPHTHENGS} ]
|
103
|
+
|
104
|
+
[{VOWELS}] --> TELCO [{TEHTA_XS}] \** Replace isolated short vowels **\
|
105
|
+
[{LVOWELS}] --> ARA [{TEHTA_XS}] \** Replace long vowels **\
|
106
|
+
[{DIPHTHONGS}] --> [{DIPHTHENGS}] \** Replace diphthongs **\
|
107
|
+
|
108
|
+
b --> UMBAR
|
109
|
+
[{VOWELS}] b --> UMBAR [{TEHTA_XL}]
|
110
|
+
|
111
|
+
d --> ANDO
|
112
|
+
[{VOWELS}] d --> ANDO [{TEHTA_XL}]
|
113
|
+
|
114
|
+
f --> FORMEN
|
115
|
+
[{VOWELS}] f --> FORMEN_EXT [{TEHTA__S}] \** Beware. **\
|
116
|
+
|
117
|
+
g --> UNGWE
|
118
|
+
[{VOWELS}] g --> UNGWE [{TEHTA_XL}]
|
119
|
+
|
120
|
+
gh --> UNGWE_EXT
|
121
|
+
[{VOWELS}] gh --> UNGWE_EXT [{TEHTA_XL}]
|
122
|
+
|
123
|
+
h --> HYARMEN
|
124
|
+
[{VOWELS}] h --> HYARMEN [{TEHTA_XS}]
|
125
|
+
|
126
|
+
\** ======================== **\
|
127
|
+
|
128
|
+
{K} --> QUESSE
|
129
|
+
[{VOWELS}]{K} --> QUESSE [{TEHTA__L}]
|
130
|
+
|
131
|
+
{K}h --> HWESTA
|
132
|
+
[{VOWELS}]{K}h --> HWESTA_EXT [{TEHTA__L}] \** Take care. **\
|
133
|
+
|
134
|
+
\** ======================== **\
|
135
|
+
|
136
|
+
l --> LAMBE
|
137
|
+
[{VOWELS}] l --> LAMBE [{TEHTA__L}]
|
138
|
+
|
139
|
+
\** ======================== **\
|
140
|
+
|
141
|
+
m --> MALTA
|
142
|
+
[{VOWELS}] m --> MALTA [{TEHTA_XL}]
|
143
|
+
|
144
|
+
mb --> UMBAR TILD_SUP_L
|
145
|
+
[{VOWELS}] mb --> UMBAR TILD_SUP_L [{TEHTA_XL}]
|
146
|
+
|
147
|
+
mp --> PARMA TILD_SUP_S
|
148
|
+
[{VOWELS}] mp --> PARMA TILD_SUP_S [{TEHTA__L}]
|
149
|
+
|
150
|
+
\** ======================== **\
|
151
|
+
|
152
|
+
n --> NUMEN
|
153
|
+
[{VOWELS}]n --> NUMEN [{TEHTA_XL}]
|
154
|
+
|
155
|
+
n{K} --> QUESSE TILD_SUP_S
|
156
|
+
[{VOWELS}]n{K} --> QUESSE TILD_SUP_S [{TEHTA__S}]
|
157
|
+
|
158
|
+
\** ======================== **\
|
159
|
+
|
160
|
+
p --> PARMA
|
161
|
+
|
162
|
+
r --> ROMEN
|
163
|
+
r_ --> ORE
|
164
|
+
[{VOWELS}]r --> ORE [{TEHTA__S}]
|
165
|
+
|
166
|
+
[{VOWELS}]rb --> ORE [{TEHTA__L}] UMBAR
|
167
|
+
[{LVOWELS}]rb --> ARA [{TEHTA_XS}] ORE UMBAR
|
168
|
+
|
169
|
+
[{VOWELS}]rz --> ORE [{TEHTA__L}] ESSE
|
170
|
+
[{LVOWELS}]rz --> ARA [{TEHTA_XS}] ORE ESSE
|
171
|
+
|
172
|
+
\** ======================== **\
|
173
|
+
|
174
|
+
s --> SILME
|
175
|
+
|
176
|
+
\** ======================== **\
|
177
|
+
|
178
|
+
sh --> AHA
|
179
|
+
[{VOWELS}] sh --> AHA_EXT [{TEHTA__L}] \** BEWARE. **\
|
180
|
+
|
181
|
+
t --> TINCO
|
182
|
+
[{VOWELS}]t --> TINCO [{TEHTA__L}]
|
183
|
+
|
184
|
+
th --> SULE
|
185
|
+
|
186
|
+
y --> ANNA
|
187
|
+
|
188
|
+
z --> ESSE
|
189
|
+
[{VOWELS}] z --> ESSE_NUQUERNA [{TEHTA__L}]
|
190
|
+
|
191
|
+
\end
|
192
|
+
|
193
|
+
\beg rules punctuation
|
194
|
+
. --> PUNCT_DDOT
|
195
|
+
.. --> PUNCT_DOT PUNCT_DDOT PUNCT_DOT
|
196
|
+
… --> PUNCT_TILD
|
197
|
+
... --> PUNCT_TILD
|
198
|
+
.... --> PUNCT_TILD
|
199
|
+
..... --> PUNCT_TILD
|
200
|
+
...... --> PUNCT_TILD
|
201
|
+
....... --> PUNCT_TILD
|
202
|
+
|
203
|
+
, --> PUNCT_DOT
|
204
|
+
: --> PUNCT_DOT
|
205
|
+
; --> PUNCT_DOT
|
206
|
+
! --> PUNCT_EXCLAM
|
207
|
+
? --> PUNCT_INTERR
|
208
|
+
· --> PUNCT_DOT
|
209
|
+
|
210
|
+
\** Apostrophe **\
|
211
|
+
|
212
|
+
' --> {NULL}
|
213
|
+
’ --> {NULL}
|
214
|
+
|
215
|
+
\** Quotes **\
|
216
|
+
|
217
|
+
“ --> DQUOT_OPEN
|
218
|
+
” --> DQUOT_CLOSE
|
219
|
+
« --> DQUOT_OPEN
|
220
|
+
» --> DQUOT_CLOSE
|
221
|
+
|
222
|
+
- --> {NULL}
|
223
|
+
– --> PUNCT_TILD
|
224
|
+
— --> PUNCT_TILD
|
225
|
+
|
226
|
+
[ --> PUNCT_PAREN_L
|
227
|
+
] --> PUNCT_PAREN_R
|
228
|
+
( --> PUNCT_PAREN_L
|
229
|
+
) --> PUNCT_PAREN_R
|
230
|
+
{ --> PUNCT_PAREN_L
|
231
|
+
} --> PUNCT_PAREN_R
|
232
|
+
< --> PUNCT_PAREN_L
|
233
|
+
> --> PUNCT_PAREN_R
|
234
|
+
|
235
|
+
\** Not universal between fonts ... **\
|
236
|
+
$ --> BOOKMARK_SIGN
|
237
|
+
≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
|
238
|
+
≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
|
239
|
+
|
240
|
+
\end
|
241
|
+
|
242
|
+
\beg rules numbers
|
243
|
+
0 --> NUM_0
|
244
|
+
1 --> NUM_1
|
245
|
+
2 --> NUM_2
|
246
|
+
3 --> NUM_3
|
247
|
+
4 --> NUM_4
|
248
|
+
5 --> NUM_5
|
249
|
+
6 --> NUM_6
|
250
|
+
7 --> NUM_7
|
251
|
+
8 --> NUM_8
|
252
|
+
9 --> NUM_9
|
253
|
+
A --> NUM_10
|
254
|
+
B --> NUM_11
|
255
|
+
\end
|
256
|
+
|
257
|
+
\end
|
258
|
+
|
259
|
+
|
260
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
\**
|
2
|
+
|
3
|
+
Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
the transcription of texts between writing systems, and more
|
5
|
+
specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
invented languages to some of his devised writing systems.
|
7
|
+
|
8
|
+
Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
|
10
|
+
This program is free software: you can redistribute it and/or modify
|
11
|
+
it under the terms of the GNU Affero General Public License as published by
|
12
|
+
the Free Software Foundation, either version 3 of the License, or
|
13
|
+
any later version.
|
14
|
+
|
15
|
+
This program is distributed in the hope that it will be useful,
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
GNU Affero General Public License for more details.
|
19
|
+
|
20
|
+
You should have received a copy of the GNU Affero General Public License
|
21
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
**\
|
24
|
+
|
25
|
+
\language "Gothic"
|
26
|
+
\writing "Gothic Alphabet"
|
27
|
+
\mode "Standard"
|
28
|
+
\version "0.0.1"
|
29
|
+
\authors "Talagan (Benjamin Babut)"
|
30
|
+
|
31
|
+
\charset unicode_gothic true
|
32
|
+
|
33
|
+
\beg preprocessor
|
34
|
+
\downcase
|
35
|
+
\end
|
36
|
+
|
37
|
+
\beg processor
|
38
|
+
|
39
|
+
\beg rules litteral
|
40
|
+
(a,ā) --> AZA
|
41
|
+
b --> BERCNA
|
42
|
+
d --> DAAZ
|
43
|
+
(e,ē) --> EYZ
|
44
|
+
f --> FE
|
45
|
+
g --> GEUUA
|
46
|
+
h --> HAAL
|
47
|
+
i --> IIZ
|
48
|
+
j --> GAAR
|
49
|
+
k --> CHOZMA
|
50
|
+
l --> LAAZ
|
51
|
+
m --> MANNA
|
52
|
+
n --> NOICZ
|
53
|
+
(o,ō) --> UTAL
|
54
|
+
p --> PERTRA
|
55
|
+
q --> QUETRA
|
56
|
+
r --> REDA
|
57
|
+
s --> SUGIL
|
58
|
+
t --> TYZ
|
59
|
+
þ --> THYTH
|
60
|
+
u --> URAZ
|
61
|
+
x --> ENGUZ
|
62
|
+
(w,y) --> UUINNE
|
63
|
+
z --> EZEC
|
64
|
+
ƕ --> UUAER
|
65
|
+
ï --> IIZ_TREMA
|
66
|
+
_i --> IIZ_TREMA
|
67
|
+
\end
|
68
|
+
|
69
|
+
\beg rules punctuation
|
70
|
+
, --> COMA
|
71
|
+
. --> PERIOD
|
72
|
+
; --> SEMICOLON
|
73
|
+
: --> COLON
|
74
|
+
\end
|
75
|
+
|
76
|
+
\end
|
77
|
+
|
78
|
+
|
@@ -0,0 +1,141 @@
|
|
1
|
+
\**
|
2
|
+
|
3
|
+
Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
the transcription of texts between writing systems, and more
|
5
|
+
specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
invented languages to some of his devised writing systems.
|
7
|
+
|
8
|
+
Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
|
10
|
+
This program is free software: you can redistribute it and/or modify
|
11
|
+
it under the terms of the GNU Affero General Public License as published by
|
12
|
+
the Free Software Foundation, either version 3 of the License, or
|
13
|
+
any later version.
|
14
|
+
|
15
|
+
This program is distributed in the hope that it will be useful,
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
GNU Affero General Public License for more details.
|
19
|
+
|
20
|
+
You should have received a copy of the GNU Affero General Public License
|
21
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
**\
|
24
|
+
|
25
|
+
\** Khuzdul mode for glaemscribe **\
|
26
|
+
|
27
|
+
\language "Khuzdul"
|
28
|
+
\writing "Cirth"
|
29
|
+
\mode "Angerthas Moria"
|
30
|
+
\version "0.0.1"
|
31
|
+
\authors "Talagan (Benjamin Babut)"
|
32
|
+
|
33
|
+
\charset cirth_ds true
|
34
|
+
|
35
|
+
\beg preprocessor
|
36
|
+
\** Work exclusively downcase **\
|
37
|
+
\downcase
|
38
|
+
|
39
|
+
\** Simplify trema vowels **\
|
40
|
+
\substitute "ä" "a"
|
41
|
+
\substitute "ë" "e"
|
42
|
+
\substitute "ï" "i"
|
43
|
+
\substitute "ö" "o"
|
44
|
+
\substitute "ü" "u"
|
45
|
+
\substitute "ÿ" "y"
|
46
|
+
|
47
|
+
\** Dis-ambiguate long vowels **\
|
48
|
+
\rxsubstitute "(ā|â|aa)" "â"
|
49
|
+
\rxsubstitute "(ē|ê|ee)" "ê"
|
50
|
+
\rxsubstitute "(ī|î|ii)" "î"
|
51
|
+
\rxsubstitute "(ō|ô|oo)" "ô"
|
52
|
+
\rxsubstitute "(ū|û|uu)" "û"
|
53
|
+
\rxsubstitute "(ȳ|ŷ|yy)" "ŷ"
|
54
|
+
\end
|
55
|
+
|
56
|
+
\beg processor
|
57
|
+
|
58
|
+
\** We redefine the output space to have something beautiful, especially with erebor1 and erebor2 **\
|
59
|
+
\outspace CIRTH_SPACE_BIG
|
60
|
+
|
61
|
+
\beg rules litteral
|
62
|
+
a --> CIRTH_48
|
63
|
+
â --> CIRTH_49
|
64
|
+
e --> CIRTH_46
|
65
|
+
ê --> CIRTH_47
|
66
|
+
|
67
|
+
i --> CIRTH_39
|
68
|
+
î --> CIRTH_39 CIRTH_39
|
69
|
+
|
70
|
+
o --> CIRTH_50
|
71
|
+
|
72
|
+
u --> CIRTH_42
|
73
|
+
û --> CIRTH_43
|
74
|
+
|
75
|
+
b --> CIRTH_2
|
76
|
+
d --> CIRTH_9
|
77
|
+
f --> CIRTH_3
|
78
|
+
g --> CIRTH_19
|
79
|
+
h --> CIRTH_34
|
80
|
+
gh --> CIRTH_19 CIRTH_34
|
81
|
+
k --> CIRTH_18
|
82
|
+
l --> CIRTH_31
|
83
|
+
m --> CIRTH_6
|
84
|
+
n --> CIRTH_22
|
85
|
+
nd --> CIRTH_33
|
86
|
+
r --> CIRTH_12
|
87
|
+
s --> CIRTH_54
|
88
|
+
t --> CIRTH_8
|
89
|
+
sh --> CIRTH_15
|
90
|
+
th --> CIRTH_8 CIRTH_59
|
91
|
+
z --> CIRTH_17
|
92
|
+
|
93
|
+
k --> CIRTH_18
|
94
|
+
kh --> CIRTH_18 CIRTH_59
|
95
|
+
\end
|
96
|
+
|
97
|
+
\beg rules punctuation
|
98
|
+
. --> CIRTH_PUNCT_THREE_DOTS
|
99
|
+
.. --> CIRTH_PUNCT_THREE_DOTS
|
100
|
+
... --> CIRTH_PUNCT_THREE_DOTS
|
101
|
+
… --> CIRTH_PUNCT_THREE_DOTS
|
102
|
+
.... --> CIRTH_PUNCT_THREE_DOTS
|
103
|
+
..... --> CIRTH_PUNCT_THREE_DOTS
|
104
|
+
...... --> CIRTH_PUNCT_THREE_DOTS
|
105
|
+
....... --> CIRTH_PUNCT_THREE_DOTS
|
106
|
+
|
107
|
+
, --> CIRTH_PUNCT_MID_DOT
|
108
|
+
: --> CIRTH_PUNCT_TWO_DOTS
|
109
|
+
; --> CIRTH_PUNCT_TWO_DOTS
|
110
|
+
! --> CIRTH_PUNCT_THREE_DOTS
|
111
|
+
? --> CIRTH_PUNCT_THREE_DOTS
|
112
|
+
· --> CIRTH_PUNCT_MID_DOT
|
113
|
+
|
114
|
+
- --> CIRTH_PUNCT_MID_DOT
|
115
|
+
– --> CIRTH_PUNCT_TWO_DOTS
|
116
|
+
— --> CIRTH_PUNCT_TWO_DOTS
|
117
|
+
|
118
|
+
\** Apostrophe **\
|
119
|
+
|
120
|
+
' --> {NULL}
|
121
|
+
’ --> {NULL}
|
122
|
+
|
123
|
+
\** Quotes **\
|
124
|
+
|
125
|
+
“ --> {NULL}
|
126
|
+
” --> {NULL}
|
127
|
+
« --> {NULL}
|
128
|
+
» --> {NULL}
|
129
|
+
|
130
|
+
[ --> CIRTH_PUNCT_THREE_DOTS_L
|
131
|
+
] --> CIRTH_PUNCT_THREE_DOTS_L
|
132
|
+
( --> CIRTH_PUNCT_THREE_DOTS_L
|
133
|
+
) --> CIRTH_PUNCT_THREE_DOTS_L
|
134
|
+
{ --> CIRTH_PUNCT_THREE_DOTS_L
|
135
|
+
} --> CIRTH_PUNCT_THREE_DOTS_L
|
136
|
+
< --> CIRTH_PUNCT_THREE_DOTS_L
|
137
|
+
> --> CIRTH_PUNCT_THREE_DOTS_L
|
138
|
+
|
139
|
+
/ --> CIRTH_PUNCT_FOUR_DOTS
|
140
|
+
\end
|
141
|
+
\end
|