glaemscribe 1.1.14 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/bin/glaemscribe +21 -17
- data/glaemresources/charsets/cirth_ds.cst +540 -0
- data/glaemresources/charsets/eldamar.cst +210 -0
- data/glaemresources/charsets/sarati_eldamar.cst +256 -0
- data/glaemresources/charsets/tengwar_ds_annatar.cst +2868 -0
- data/glaemresources/charsets/tengwar_ds_eldamar.cst +2729 -0
- data/glaemresources/charsets/tengwar_ds_elfica.cst +2742 -0
- data/glaemresources/charsets/tengwar_ds_parmaite.cst +2726 -0
- data/glaemresources/charsets/tengwar_ds_sindarin.cst +2722 -0
- data/glaemresources/charsets/tengwar_freemono.cst +217 -0
- data/glaemresources/charsets/tengwar_guni_annatar.cst +2948 -0
- data/glaemresources/charsets/tengwar_guni_eldamar.cst +2809 -0
- data/glaemresources/charsets/tengwar_guni_elfica.cst +2809 -0
- data/glaemresources/charsets/tengwar_guni_parmaite.cst +2813 -0
- data/glaemresources/charsets/tengwar_guni_sindarin.cst +2808 -0
- data/glaemresources/charsets/tengwar_telcontar.cst +225 -0
- data/glaemresources/charsets/unicode_gothic.cst +64 -0
- data/glaemresources/charsets/unicode_runes.cst +121 -0
- data/glaemresources/modes/{adunaic.glaem → adunaic-tengwar-glaemscrafu.glaem} +14 -2
- data/glaemresources/modes/{blackspeech.glaem → blackspeech-tengwar-general_use.glaem} +13 -3
- data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
- data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
- data/glaemresources/modes/japanese-tengwar.glaem +776 -0
- data/glaemresources/modes/{khuzdul.glaem → khuzdul-cirth-moria.glaem} +4 -1
- data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
- data/glaemresources/modes/{futhorc.glaem → old_english-futhorc.glaem} +0 -0
- data/glaemresources/modes/{mercian.glaem → old_english-tengwar-mercian.glaem} +22 -12
- data/glaemresources/modes/{westsaxon.glaem → old_english-tengwar-westsaxon.glaem} +20 -11
- data/glaemresources/modes/{futhark-runicus.glaem → old_norse-futhark-runicus.glaem} +0 -0
- data/glaemresources/modes/{futhark-younger.glaem → old_norse-futhark-younger.glaem} +0 -0
- data/glaemresources/modes/{quenya.glaem → quenya-tengwar-classical.glaem} +32 -50
- data/glaemresources/modes/raw-cirth.glaem +154 -0
- data/glaemresources/modes/raw-tengwar.glaem +46 -23
- data/glaemresources/modes/{rlyehian.glaem → rlyehian-tengwar.glaem} +14 -3
- data/glaemresources/modes/{sindarin-daeron.glaem → sindarin-cirth-daeron.glaem} +55 -14
- data/glaemresources/modes/{sindarin-beleriand.glaem → sindarin-tengwar-beleriand.glaem} +154 -28
- data/glaemresources/modes/{sindarin.glaem → sindarin-tengwar-general_use.glaem} +86 -25
- data/glaemresources/modes/{telerin.glaem → telerin-tengwar-glaemscrafu.glaem} +16 -6
- data/glaemresources/modes/{westron.glaem → westron-tengwar-glaemscrafu.glaem} +18 -8
- data/lib/api/charset.rb +67 -7
- data/lib/api/charset_parser.rb +14 -1
- data/lib/api/constants.rb +3 -4
- data/lib/api/fragment.rb +26 -5
- data/lib/api/if_tree.rb +70 -8
- data/lib/api/macro.rb +40 -0
- data/lib/api/mode.rb +66 -19
- data/lib/api/mode_parser.rb +117 -14
- data/lib/api/object_additions.rb +23 -1
- data/lib/api/option.rb +17 -2
- data/lib/api/post_processor/outspace.rb +44 -0
- data/lib/api/post_processor/resolve_virtuals.rb +25 -9
- data/lib/api/resource_manager.rb +1 -0
- data/lib/api/rule_group.rb +170 -26
- data/lib/api/sheaf_chain_iterator.rb +1 -1
- data/lib/api/transcription_pre_post_processor.rb +8 -5
- data/lib/api/transcription_processor.rb +15 -12
- data/lib/api/tts.rb +51 -0
- data/lib/glaemscribe.rb +36 -31
- data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +35 -0
- data/lib_espeak/glaemscribe_tts.js +505 -0
- metadata +76 -24
@@ -0,0 +1,154 @@
|
|
1
|
+
\**
|
2
|
+
|
3
|
+
Glǽmscribe (also written Glaemscribe) is a software dedicated to
|
4
|
+
the transcription of texts between writing systems, and more
|
5
|
+
specifically dedicated to the transcription of J.R.R. Tolkien's
|
6
|
+
invented languages to some of his devised writing systems.
|
7
|
+
|
8
|
+
Copyright (C) 2015 Benjamin Babut (Talagan).
|
9
|
+
|
10
|
+
This program is free software: you can redistribute it and/or modify
|
11
|
+
it under the terms of the GNU Affero General Public License as published by
|
12
|
+
the Free Software Foundation, either version 3 of the License, or
|
13
|
+
any later version.
|
14
|
+
|
15
|
+
This program is distributed in the hope that it will be useful,
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
GNU Affero General Public License for more details.
|
19
|
+
|
20
|
+
You should have received a copy of the GNU Affero General Public License
|
21
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
22
|
+
|
23
|
+
**\
|
24
|
+
|
25
|
+
\** very limited mode, mainly used for writing some doc for glaemscribe. **\
|
26
|
+
|
27
|
+
\beg changelog
|
28
|
+
\entry "0.0.1" "Initial version."
|
29
|
+
\end
|
30
|
+
|
31
|
+
\language "Raw Cirth"
|
32
|
+
\writing "Cirth"
|
33
|
+
\mode "Raw Cirth"
|
34
|
+
\version "0.0.1"
|
35
|
+
\authors "Talagan (Benjamin Babut)"
|
36
|
+
|
37
|
+
\world arda
|
38
|
+
\invention experimental
|
39
|
+
|
40
|
+
\metamode true
|
41
|
+
|
42
|
+
\charset cirth_ds true
|
43
|
+
|
44
|
+
\beg preprocessor
|
45
|
+
\** Work exclusively downcase **\
|
46
|
+
\downcase
|
47
|
+
\end
|
48
|
+
|
49
|
+
\beg processor
|
50
|
+
|
51
|
+
\beg rules litteral
|
52
|
+
_c1_ --> CIRTH_1
|
53
|
+
_c2_ --> CIRTH_2
|
54
|
+
_c3_ --> CIRTH_3
|
55
|
+
_c4_ --> CIRTH_4
|
56
|
+
_c5_ --> CIRTH_5
|
57
|
+
_c6_ --> CIRTH_6
|
58
|
+
_c7_ --> CIRTH_7
|
59
|
+
_c8_ --> CIRTH_8
|
60
|
+
_c9_ --> CIRTH_9
|
61
|
+
_c10_ --> CIRTH_10
|
62
|
+
_c11_ --> CIRTH_11
|
63
|
+
_c12_ --> CIRTH_12
|
64
|
+
_c13_ --> CIRTH_13
|
65
|
+
_c14_ --> CIRTH_14
|
66
|
+
_c15_ --> CIRTH_15
|
67
|
+
_c16_ --> CIRTH_16
|
68
|
+
_c17_ --> CIRTH_17
|
69
|
+
_c18_ --> CIRTH_18
|
70
|
+
_c19_ --> CIRTH_19
|
71
|
+
_c20_ --> CIRTH_20
|
72
|
+
_c21_ --> CIRTH_21
|
73
|
+
_c22_ --> CIRTH_22
|
74
|
+
_c23_ --> CIRTH_23
|
75
|
+
_c24_ --> CIRTH_24
|
76
|
+
_c25_ --> CIRTH_25
|
77
|
+
_c26_ --> CIRTH_26
|
78
|
+
_c27_ --> CIRTH_27
|
79
|
+
_c28_ --> CIRTH_28
|
80
|
+
_c29_ --> CIRTH_29
|
81
|
+
_c30_ --> CIRTH_30
|
82
|
+
_c31_ --> CIRTH_31
|
83
|
+
_c32_ --> CIRTH_32
|
84
|
+
_c33_ --> CIRTH_33
|
85
|
+
_c34_ --> CIRTH_34
|
86
|
+
_c35_ --> CIRTH_35
|
87
|
+
_c36_ --> CIRTH_36
|
88
|
+
_c37_ --> CIRTH_37
|
89
|
+
_c38_ --> CIRTH_38
|
90
|
+
_c39_ --> CIRTH_39
|
91
|
+
_c40_ --> CIRTH_40
|
92
|
+
_c41_ --> CIRTH_41
|
93
|
+
_c42_ --> CIRTH_42
|
94
|
+
_c43_ --> CIRTH_43
|
95
|
+
_c44_ --> CIRTH_44
|
96
|
+
_c45_ --> CIRTH_45
|
97
|
+
_c46_ --> CIRTH_46
|
98
|
+
_c47_ --> CIRTH_47
|
99
|
+
_c48_ --> CIRTH_48
|
100
|
+
_c49_ --> CIRTH_49
|
101
|
+
_c50_ --> CIRTH_50
|
102
|
+
_c51_ --> CIRTH_51
|
103
|
+
_c52_ --> CIRTH_52
|
104
|
+
_c53_ --> CIRTH_53
|
105
|
+
_c54_ --> CIRTH_54
|
106
|
+
_c55_ --> CIRTH_55
|
107
|
+
_c56_ --> CIRTH_56
|
108
|
+
_c57_ --> CIRTH_57
|
109
|
+
_c58_ --> CIRTH_58
|
110
|
+
_c59_ --> CIRTH_59
|
111
|
+
_c60_ --> CIRTH_60
|
112
|
+
|
113
|
+
_c38alt_ --> CIRTH_38_ALT
|
114
|
+
_c45alt_ --> CIRTH_45_ALT
|
115
|
+
_c51alt_ --> CIRTH_51_ALT
|
116
|
+
_c52alt_ --> CIRTH_52_ALT
|
117
|
+
_c55alt_ --> CIRTH_55_ALT
|
118
|
+
_c56alt_ --> CIRTH_56_ALT
|
119
|
+
|
120
|
+
_ce1_ --> CIRTH_EREB_1
|
121
|
+
_ce2_ --> CIRTH_EREB_2
|
122
|
+
_ce3_ --> CIRTH_EREB_3
|
123
|
+
_ce4_ --> CIRTH_EREB_4
|
124
|
+
_ce5_ --> CIRTH_EREB_5
|
125
|
+
_ce6_ --> CIRTH_EREB_6
|
126
|
+
_ce7_ --> CIRTH_EREB_7
|
127
|
+
|
128
|
+
_1_ --> CIRTH_NUMERAL_1
|
129
|
+
_2_ --> CIRTH_NUMERAL_2
|
130
|
+
_3_ --> CIRTH_NUMERAL_3
|
131
|
+
_4_ --> CIRTH_NUMERAL_4
|
132
|
+
_5_ --> CIRTH_NUMERAL_5
|
133
|
+
|
134
|
+
_{UNDERSCORE}_ --> TEHTA_UNDERLINE
|
135
|
+
_sdot_ --> TEHTA_SUB_DOT
|
136
|
+
_(^,circ)_ --> TEHTA_CIRCUM
|
137
|
+
|
138
|
+
{NBSP} --> NBSP
|
139
|
+
\end
|
140
|
+
|
141
|
+
\beg rules punctuation
|
142
|
+
, --> CIRTH_PUNCT_DOT
|
143
|
+
. --> CIRTH_PUNCT_MID_DOT
|
144
|
+
(..,:) --> CIRTH_PUNCT_TWO_DOTS
|
145
|
+
... --> CIRTH_PUNCT_THREE_DOTS
|
146
|
+
(....,::) --> CIRTH_PUNCT_FOUR_DOTS
|
147
|
+
\end
|
148
|
+
|
149
|
+
\end
|
150
|
+
|
151
|
+
\beg postprocessor
|
152
|
+
\resolve_virtuals
|
153
|
+
\end
|
154
|
+
|
@@ -25,13 +25,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
25
25
|
\beg changelog
|
26
26
|
\entry "0.0.1" "Initial version."
|
27
27
|
\entry "0.0.2" "Added missing extended tengwar."
|
28
|
-
\entry "0.0.3" "Added support for non-breaking spaces"
|
28
|
+
\entry "0.0.3" "Added support for non-breaking spaces"
|
29
|
+
\entry "0.0.4" "Added support for new unicode charsets"
|
30
|
+
\entry "0.0.5" "Added support for the Tengwar Telcontar font"
|
31
|
+
\entry "0.0.6" "Added support for Bombadil W/HW"
|
29
32
|
\end
|
30
33
|
|
31
34
|
\language "Raw Tengwar"
|
32
35
|
\writing "Tengwar"
|
33
36
|
\mode "Raw Tengwar"
|
34
|
-
\version "0.0.
|
37
|
+
\version "0.0.6"
|
35
38
|
\authors "Talagan (Benjamin Babut)"
|
36
39
|
|
37
40
|
\world arda
|
@@ -44,7 +47,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
44
47
|
\charset tengwar_ds_eldamar false
|
45
48
|
\charset tengwar_ds_annatar false
|
46
49
|
\charset tengwar_ds_elfica false
|
50
|
+
|
51
|
+
\charset tengwar_guni_sindarin false
|
52
|
+
\charset tengwar_guni_parmaite false
|
53
|
+
\charset tengwar_guni_eldamar false
|
54
|
+
\charset tengwar_guni_annatar false
|
55
|
+
\charset tengwar_guni_elfica false
|
56
|
+
|
47
57
|
\charset tengwar_freemono false
|
58
|
+
\charset tengwar_telcontar false
|
48
59
|
|
49
60
|
|
50
61
|
\beg options
|
@@ -164,6 +175,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
164
175
|
_(mh)_ --> TW_MH
|
165
176
|
_(mhbeleriandic,mhb)_ --> TW_MH_BELERIANDIC
|
166
177
|
_(hwlowdham,hwl)_ --> TW_HW_LOWDHAM
|
178
|
+
_(hwbombadil,hwbom)_ --> BOMBADIL_HW
|
179
|
+
_(wbombadil,wbom)_ --> BOMBADIL_W
|
167
180
|
|
168
181
|
\** ligatures **\
|
169
182
|
_(harmasilme,ahasilme,silmeharma,silmeaha)_ --> ANCA_CLOSED
|
@@ -179,7 +192,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
179
192
|
_(geminate)_ --> {GEMINATE}
|
180
193
|
_(nasal)_ --> {NASAL}
|
181
194
|
_(palatal)_ --> PALATAL_SIGN
|
182
|
-
_(labial)_ -->
|
195
|
+
_(labial)_ --> WA_TEHTA
|
196
|
+
|
197
|
+
\** SA-Rincer to be enhanced ... **\
|
198
|
+
_(sarince)_ --> SARINCE
|
199
|
+
_(arrince,sarincef)_ --> SARINCE_FLOURISHED
|
183
200
|
|
184
201
|
_0_ --> NUM_0
|
185
202
|
_1_ --> NUM_1
|
@@ -193,32 +210,42 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
193
210
|
_9_ --> NUM_9
|
194
211
|
_10_ --> NUM_10
|
195
212
|
_11_ --> NUM_11
|
213
|
+
_12_ --> NUM_12
|
196
214
|
|
197
215
|
_(lsd)_ --> CIRC_TEHTA_INF
|
198
216
|
|
199
217
|
\end
|
200
218
|
|
201
219
|
\beg rules punctuation
|
220
|
+
|
221
|
+
\** Allow / as word breaker **\
|
202
222
|
/ --> {NULL}
|
223
|
+
- --> {NULL}
|
224
|
+
|
225
|
+
· --> PUNCT_DOT
|
226
|
+
, --> PUNCT_DOT
|
227
|
+
: --> PUNCT_DOT
|
228
|
+
; --> PUNCT_DOT
|
203
229
|
|
204
|
-
.
|
205
|
-
..
|
230
|
+
. --> PUNCT_DDOT
|
231
|
+
.. --> PUNCT_DDOT
|
232
|
+
|
233
|
+
... --> PUSTA_3
|
234
|
+
.... --> PUSTA_4
|
235
|
+
|
236
|
+
:: --> PUSTA_4_SQUARED
|
237
|
+
\** PUSTA_4_HALFED ? **\
|
238
|
+
|
239
|
+
..... --> PUSTA_5
|
240
|
+
|
206
241
|
… --> PUNCT_TILD
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
....... --> PUNCT_TILD
|
212
|
-
|
213
|
-
, --> PUNCT_DOT
|
214
|
-
: --> PUNCT_DOT
|
215
|
-
; --> PUNCT_DOT
|
242
|
+
~ --> PUNCT_TILD
|
243
|
+
– --> PUNCT_TILD
|
244
|
+
— --> PUNCT_TILD
|
245
|
+
|
216
246
|
! --> PUNCT_EXCLAM
|
217
247
|
? --> PUNCT_INTERR
|
218
|
-
|
219
|
-
|
220
|
-
:: --> PUSTA_3
|
221
|
-
|
248
|
+
|
222
249
|
\** Apostrophe **\
|
223
250
|
|
224
251
|
' --> {NULL}
|
@@ -234,10 +261,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
234
261
|
« --> DQUOT_OPEN
|
235
262
|
» --> DQUOT_CLOSE
|
236
263
|
|
237
|
-
- --> {NULL}
|
238
|
-
– --> PUNCT_TILD
|
239
|
-
— --> PUNCT_TILD
|
240
|
-
|
241
264
|
[ --> PUNCT_PAREN_L
|
242
265
|
] --> PUNCT_PAREN_R
|
243
266
|
( --> PUNCT_PAREN_L
|
@@ -248,7 +271,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
248
271
|
\** > --> PUNCT_PAREN_R : cannot be used since it is already used as a letter in group litteral **\
|
249
272
|
|
250
273
|
\** Not universal between fonts ... **\
|
251
|
-
$ -->
|
274
|
+
$ --> ELVISH_PAREN
|
252
275
|
≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
|
253
276
|
≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
|
254
277
|
|
@@ -35,12 +35,14 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
35
35
|
\entry "0.0.2" "Ported to virtual chars"
|
36
36
|
\entry "0.0.3" "Ported to various charsets"
|
37
37
|
\entry "0.1.1" "Added support for inlined raw tengwar"
|
38
|
+
\entry "0.1.2" "Added support for new unicode charsets"
|
39
|
+
\entry "0.1.3" "Added support for the Tengwar Telcontar font"
|
38
40
|
\end
|
39
41
|
|
40
42
|
\language "R'lyehian"
|
41
43
|
\writing "Tengwar"
|
42
44
|
\mode "R'lyehian Tengwar - G*"
|
43
|
-
\version "0.1.
|
45
|
+
\version "0.1.3"
|
44
46
|
\authors "H.P.Lovecraft & The Great Ancient Gods, impl. Fthalagn"
|
45
47
|
|
46
48
|
\world other_world
|
@@ -53,7 +55,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
53
55
|
\charset tengwar_ds_eldamar false
|
54
56
|
\charset tengwar_ds_annatar false
|
55
57
|
\charset tengwar_ds_elfica false
|
58
|
+
|
59
|
+
\charset tengwar_guni_sindarin false
|
60
|
+
\charset tengwar_guni_parmaite false
|
61
|
+
\charset tengwar_guni_eldamar false
|
62
|
+
\charset tengwar_guni_annatar false
|
63
|
+
\charset tengwar_guni_elfica false
|
64
|
+
|
56
65
|
\charset tengwar_freemono false
|
66
|
+
\charset tengwar_telcontar false
|
67
|
+
|
57
68
|
|
58
69
|
\beg options
|
59
70
|
\end
|
@@ -178,7 +189,7 @@ y palatal semi vowel ?
|
|
178
189
|
{O_LOOP} === O_TEHTA
|
179
190
|
{U_LOOP} === U_TEHTA
|
180
191
|
|
181
|
-
{TEHTAR} === A_TEHTA * E_TEHTA * I_TEHTA * O_TEHTA * U_TEHTA *
|
192
|
+
{TEHTAR} === A_TEHTA * E_TEHTA * I_TEHTA * O_TEHTA * U_TEHTA * WA_TEHTA
|
182
193
|
|
183
194
|
[{VOWELS}] --> TELCO [{TEHTAR}] \** Replace isolated short vowels **\
|
184
195
|
|
@@ -268,7 +279,7 @@ y palatal semi vowel ?
|
|
268
279
|
> --> PUNCT_PAREN_R
|
269
280
|
|
270
281
|
\** Not universal between fonts ... **\
|
271
|
-
$ -->
|
282
|
+
$ --> ELVISH_PAREN
|
272
283
|
≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
|
273
284
|
≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
|
274
285
|
\end
|
@@ -28,12 +28,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
28
28
|
\entry 0.0.2 "Added thorn as equivalent for th"
|
29
29
|
\entry 0.0.3 "Moved out space to general element"
|
30
30
|
\entry 0.0.4 "Fixed wrong ch, hw, h"
|
31
|
+
\entry 0.0.5 "Added disambiguations from the tengwar modes. Reworked median point behaviour, and ng."
|
31
32
|
\end
|
32
33
|
|
33
34
|
\language "Sindarin"
|
34
35
|
\writing "Cirth"
|
35
36
|
\mode "Sindarin Cirth - Angerthas Daeron"
|
36
|
-
\version "0.0.
|
37
|
+
\version "0.0.5"
|
37
38
|
\authors "J.R.R. Tolkien, impl. Talagan (Benjamin Babut)"
|
38
39
|
|
39
40
|
\world arda
|
@@ -44,10 +45,29 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
44
45
|
\** We redefine the output space to have something beautiful, especially with erebor1 and erebor2 **\
|
45
46
|
\outspace CIRTH_SPACE_BIG
|
46
47
|
|
48
|
+
\beg options
|
49
|
+
\beg option hyphen HYPHEN_WORD_BREAKER
|
50
|
+
\value HYPHEN_WORD_BREAKER 0
|
51
|
+
\value HYPHEN_WORD_JOINER 1
|
52
|
+
\end
|
53
|
+
\end
|
54
|
+
|
55
|
+
|
47
56
|
\beg preprocessor
|
48
57
|
\** Work exclusively downcase **\
|
49
58
|
\downcase
|
50
59
|
|
60
|
+
\if "hyphen == HYPHEN_WORD_JOINER"
|
61
|
+
\** Replace hyphen by median point **\
|
62
|
+
\substitute "-" "·"
|
63
|
+
\else
|
64
|
+
\** Replace hyphen by glaemscribe's word breaker **\
|
65
|
+
\substitute "-" "|"
|
66
|
+
\endif
|
67
|
+
|
68
|
+
\** Add keyboard friendly word joiner **\
|
69
|
+
\substitute "*" "·"
|
70
|
+
|
51
71
|
\** Simplify trema vowels **\
|
52
72
|
\substitute ä a
|
53
73
|
\substitute ë e
|
@@ -63,6 +83,25 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
63
83
|
\rxsubstitute "(ō|ô|oo)" "ó"
|
64
84
|
\rxsubstitute "(ū|û|uu)" "ú"
|
65
85
|
\rxsubstitute "(ȳ|ŷ|yy)" "ý"
|
86
|
+
|
87
|
+
\** Special case of starting 'i' before vowels, replace i by j **\
|
88
|
+
\rxsubstitute "\\bi([aeouyáāâéēêíīîóōôúūûýȳŷ])" "j\\1"
|
89
|
+
|
90
|
+
\** Special case of diphtong aw. Before vowels, do not treat 'aw' as diphthong,
|
91
|
+
since it seems more logical that aw would behave as a semi vowel **\
|
92
|
+
\rxsubstitute "aw([aeouyáāâéēêíīîóōôúūûýȳŷ])" "a|w\\1"
|
93
|
+
|
94
|
+
\** Special case for ng : before the vast majority of consonnants, treat as ŋ **\
|
95
|
+
\** Don't include r / l / lh / w **\
|
96
|
+
\rxsubstitute "ng([tpckbdfðvnmhs])" "ŋ\\1"
|
97
|
+
|
98
|
+
\** Avoid mutated ng of being treated as strong middle word n|g (ex : i·ngelaidh [iŋɛlaið] ) **\
|
99
|
+
\substitute "·ng" "·ŋ"
|
100
|
+
\** But avoid losing the strong g in nasal mutation of g (ex : in·Gelydh [iŋgɛlyð]] ) **\
|
101
|
+
\substitute "n·g" "·ŋg"
|
102
|
+
\** Use median dot as word joiner **\
|
103
|
+
\substitute "·" ""
|
104
|
+
|
66
105
|
\end
|
67
106
|
|
68
107
|
\beg processor
|
@@ -115,16 +154,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
115
154
|
(k,c) --> CIRTH_18
|
116
155
|
(kh,ch) --> CIRTH_20
|
117
156
|
|
118
|
-
ghw --> CIRTH_26
|
119
|
-
gw --> CIRTH_24
|
120
157
|
|
121
158
|
h --> CIRTH_54 \** 13 is more eng. ch like in chin and 15 is more eng. sh like in shoe **\
|
122
|
-
hw --> CIRTH_5
|
123
159
|
|
124
160
|
j --> CIRTH_14
|
125
161
|
|
126
|
-
khw --> CIRTH_25
|
127
|
-
kw --> CIRTH_23
|
128
162
|
l --> CIRTH_31
|
129
163
|
lh --> CIRTH_32
|
130
164
|
m --> CIRTH_6
|
@@ -133,14 +167,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
133
167
|
n --> CIRTH_12
|
134
168
|
nc_ --> CIRTH_22 CIRTH_18 \** equals ŋc **\
|
135
169
|
nd --> CIRTH_38
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
ng_ --> CIRTH_22
|
140
|
-
ŋ --> CIRTH_22
|
170
|
+
|
171
|
+
\** Normalisation of ng **\
|
172
|
+
(ng,ngg,ŋg,ñg) --> CIRTH_33 \** strong **\
|
173
|
+
(ng_,_ng,ŋ,ñ) --> CIRTH_22 \** weak **\
|
141
174
|
|
142
|
-
nw --> CIRTH_28
|
143
|
-
ngw --> CIRTH_27
|
144
175
|
nj --> CIRTH_17
|
145
176
|
r --> CIRTH_29
|
146
177
|
rh --> CIRTH_30
|
@@ -148,8 +179,18 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
148
179
|
sh --> CIRTH_15
|
149
180
|
ss --> CIRTH_36
|
150
181
|
(þ,th) --> CIRTH_10
|
151
|
-
w --> CIRTH_44
|
152
182
|
zh --> CIRTH_16
|
183
|
+
|
184
|
+
\** Labials **\
|
185
|
+
hw --> CIRTH_5
|
186
|
+
ghw --> CIRTH_26
|
187
|
+
gw --> CIRTH_24
|
188
|
+
(ng,ngg,ŋg,ñg)w --> CIRTH_27 \** STRONG NG + W **\
|
189
|
+
khw --> CIRTH_25
|
190
|
+
kw --> CIRTH_23
|
191
|
+
nw --> CIRTH_28
|
192
|
+
w --> CIRTH_44
|
193
|
+
|
153
194
|
\end
|
154
195
|
|
155
196
|
\beg rules punctuation
|