glaemscribe 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/bin/glaemscribe +2 -2
  3. data/glaemresources/charsets/cirth_ds.cst +514 -179
  4. data/glaemresources/charsets/eldamar.cst +210 -0
  5. data/glaemresources/charsets/tengwar_ds_annatar.cst +2776 -348
  6. data/glaemresources/charsets/tengwar_ds_eldamar.cst +2648 -351
  7. data/glaemresources/charsets/tengwar_ds_elfica.cst +2639 -346
  8. data/glaemresources/charsets/tengwar_ds_parmaite.cst +2648 -351
  9. data/glaemresources/charsets/tengwar_ds_sindarin.cst +2642 -348
  10. data/glaemresources/charsets/tengwar_freemono.cst +1 -1
  11. data/glaemresources/charsets/tengwar_guni_annatar.cst +2725 -300
  12. data/glaemresources/charsets/tengwar_guni_eldamar.cst +2589 -295
  13. data/glaemresources/charsets/tengwar_guni_elfica.cst +2592 -298
  14. data/glaemresources/charsets/tengwar_guni_parmaite.cst +2592 -297
  15. data/glaemresources/charsets/tengwar_guni_sindarin.cst +2591 -297
  16. data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
  17. data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
  18. data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
  19. data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
  20. data/glaemresources/modes/japanese-tengwar.glaem +9 -4
  21. data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
  22. data/glaemresources/modes/raw-cirth.glaem +154 -0
  23. data/lib/api/charset.rb +124 -57
  24. data/lib/api/charset_parser.rb +39 -26
  25. data/lib/api/mode.rb +35 -10
  26. data/lib/api/mode_parser.rb +21 -12
  27. data/lib/api/post_processor/outspace.rb +44 -0
  28. data/lib/api/post_processor/resolve_virtuals.rb +41 -19
  29. data/lib/api/rule_group.rb +1 -1
  30. data/lib/api/transcription_pre_post_processor.rb +51 -45
  31. data/lib/api/transcription_processor.rb +12 -9
  32. data/lib/glaemscribe.rb +2 -0
  33. data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
  34. data/lib_espeak/glaemscribe_tts.js +363 -223
  35. metadata +12 -6
@@ -22,11 +22,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
22
 
23
23
  **\
24
24
 
25
+ \beg changelog
26
+ \entry "0.0.1" "Initial version"
27
+ \entry "0.0.2" "Corrected 億 to 10^8, added support for セィ and ゼィ. Thanks to Roman Rausch for this feedback!"
28
+ \end
29
+
25
30
  \language "Japanese"
26
31
  \writing "Tengwar"
27
32
  \mode "Japanese Tengwar - G*"
28
33
  \authors "Talagan (Benjamin Babut)"
29
- \version "0.0.1"
34
+ \version "0.0.2"
30
35
 
31
36
  \world primary
32
37
  \invention experimental
@@ -531,14 +536,14 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
531
536
  \deploy serie {SERIE} {TENGWA} {_PALATAL_} {__SV__} {__LV__} {NULL}
532
537
 
533
538
  \** si **\
534
- {SERIE} === [(スィ,si)]
539
+ {SERIE} === [(スィ,セィ,si)]
535
540
  {TENGWA} === SILME_NUQUERNA
536
541
  {__SV__} === [{_I_}]
537
542
  {__LV__} === [{_II_}]
538
543
  \deploy serie {SERIE} {TENGWA} {_CANCELLER_} {__SV__} {__LV__} {NULL}
539
544
 
540
545
  \** zi **\
541
- {SERIE} === [(ズィ,zi)]
546
+ {SERIE} === [(ズィ,ゼィ,zi)]
542
547
  {TENGWA} === ESSE_NUQUERNA
543
548
  {__SV__} === [{_I_}]
544
549
  {__LV__} === [{_II_}]
@@ -749,7 +754,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
749
754
  百 --> NUM_2 CIRC_TEHTA_INF
750
755
  千 --> NUM_3 CIRC_TEHTA_INF
751
756
  万 --> NUM_4 CIRC_TEHTA_INF
752
- 億 --> NUM_9 CIRC_TEHTA_INF
757
+ 億 --> NUM_8 CIRC_TEHTA_INF
753
758
  0 --> NUM_0
754
759
  1 --> NUM_1
755
760
  2 --> NUM_2
@@ -0,0 +1,248 @@
1
+ \**
2
+
3
+ Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ invented languages to some of his devised writing systems.
7
+
8
+ Copyright (C) 2015 Benjamin Babut (Talagan).
9
+
10
+ This program is free software: you can redistribute it and/or modify
11
+ it under the terms of the GNU Affero General Public License as published by
12
+ the Free Software Foundation, either version 3 of the License, or
13
+ any later version.
14
+
15
+ This program is distributed in the hope that it will be useful,
16
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ GNU Affero General Public License for more details.
19
+
20
+ You should have received a copy of the GNU Affero General Public License
21
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ **\
24
+
25
+ \beg changelog
26
+ \entry "0.0.1" "Ported from Westron"
27
+ \entry "0.0.2" "Simplified"
28
+ \entry "0.0.3" "Yaterash poetic forms"
29
+ \entry "0.0.4" "English numeric defaults, use lsd for nasal, poetic finals"
30
+ \entry "0.0.5" "Bikang option: beacon final hints"
31
+ \entry "0.0.6" "Lowng & kuwendi options"
32
+ \entry "0.1.0" "clean up pass"
33
+ \end
34
+ \version "0.1.0"
35
+
36
+ \** Lang Belta mode for glaemscribe **\
37
+ \language "Lang Belta"
38
+ \writing Tengwar
39
+ \mode "Lang Belta (The Expanse) Tengwar - G*"
40
+ \authors "Da Def, based on Talagan's Westron"
41
+
42
+ \world other_world
43
+ \invention experimental
44
+
45
+ \raw_mode "raw-tengwar"
46
+
47
+ \charset tengwar_ds_sindarin false
48
+ \charset tengwar_ds_parmaite false
49
+ \charset tengwar_ds_eldamar false
50
+ \charset tengwar_ds_annatar false
51
+ \charset tengwar_ds_elfica false
52
+
53
+ \charset tengwar_guni_sindarin false
54
+ \charset tengwar_guni_parmaite false
55
+ \charset tengwar_guni_eldamar false
56
+ \charset tengwar_guni_annatar true
57
+ \charset tengwar_guni_elfica false
58
+
59
+ \charset tengwar_freemono false
60
+ \charset tengwar_telcontar false
61
+
62
+ \beg options
63
+ \option wit_yaterash false
64
+
65
+ \beg option bikang true
66
+ \visible_when "wit_yaterash == true"
67
+ \end
68
+
69
+ \beg option lowng false
70
+ \visible_when "wit_yaterash == true"
71
+ \end
72
+
73
+ \beg option kuwendi false
74
+ \visible_when "wit_yaterash == true && lowng == false"
75
+ \end
76
+
77
+ \option implicit_a false
78
+ \option reverse_numbers false
79
+ \beg option numbers_base BASE_10
80
+ \value BASE_10 10
81
+ \value BASE_12 12
82
+ \end
83
+ \end
84
+
85
+ \beg preprocessor
86
+ \** Work exclusively downcase **\
87
+ \downcase
88
+
89
+ \** protect NOLDO and NWALME **\
90
+ \rxsubstitute "(ny)" "ɲ"
91
+ \rxsubstitute "(ng)" "ŋ"
92
+
93
+ \** Preprocess numbers **\
94
+ \elvish_numbers "\\eval numbers_base" "\\eval reverse_numbers"
95
+ \end
96
+
97
+ \beg processor
98
+ \beg rules literal
99
+
100
+ \if implicit_a
101
+ {_A_} === {NULL}
102
+ {_X_} === NO_VOWEL_DOT
103
+ \else
104
+ {_A_} === A_TEHTA
105
+ {_X_} === {NULL}
106
+ \endif
107
+
108
+ {PENNAR} === a * e * i * ow * o * u
109
+ {TEHTAR} === {_A_} * E_TEHTA * I_TEHTA * A_TEHTA_REVERSED * O_TEHTA * U_TEHTA
110
+
111
+ \** For now, shãsa is a hapax **\
112
+ {PENNAR} === {PENNAR} * ã
113
+ {TEHTAR} === {TEHTAR} * E_TEHTA_GRAVE
114
+
115
+ \** tentative
116
+ {PENNAR} === {PENNAR} * ẽ * ĩ * õw * õ * ũ
117
+ {TEHTAR} === {TEHTAR} * E_TEHTA_DOUBLE * I_TEHTA_DOUBLE * WA_TEHTA * O_TEHTA_DOUBLE * U_TEHTA_DOUBLE
118
+ **\
119
+
120
+ {L1_S} === t * p * ch * k
121
+ {L1_T} === TINCO * PARMA * CALMA * QUESSE
122
+ {L2_S} === d * b * dzh * g
123
+ {L2_T} === ANDO * UMBAR * ANGA * UNGWE
124
+ {L3_S} === f * sh * x
125
+ {L3_T} === FORMEN * AHA * HWESTA
126
+ {L4_S} === v
127
+ {L4_T} === AMPA
128
+ {L5_S} === n * m * ɲ * ŋ
129
+ {L5_T} === NUMEN * MALTA * NOLDO * NWALME
130
+ {L6_S} === w * y
131
+ {L6_T} === VALA * ANNA
132
+ {IR_S} === r * l * s * z
133
+ {IR_T} === ROMEN * LAMBE * SILME_NUQUERNA * ESSE_NUQUERNA
134
+
135
+ {TAPTAR} === {L1_S} * {L2_S} * {L3_S} * {L4_S} * {L5_S} * {L6_S} * {IR_S}
136
+ {TENGWAR} === {L1_T} * {L2_T} * {L3_T} * {L4_T} * {L5_T} * {L6_T} * {IR_T}
137
+
138
+ [{TAPTAR}] --> [{TENGWAR}] {_X_}
139
+ [{TAPTAR}][{PENNAR}] --> [{TENGWAR}][{TEHTAR}]
140
+ [{PENNAR}] --> TELCO [{TEHTAR}]
141
+
142
+ \if wit_yaterash
143
+
144
+ \if lowng
145
+ {TAPTARN} === nt * nd * mp * mb * nsh * ns
146
+ {TENGWARN} === TINCO_EXT * ANDO_EXT * PARMA_EXT * UMBAR_EXT * AHA_EXT * TW_HW_LOWDHAM
147
+
148
+ tn[{PENNAR}] --> TW_MH [{TEHTAR}]
149
+ lt[{PENNAR}] --> ALDA [{TEHTAR}]
150
+
151
+ [{TAPTARN}][{PENNAR}] --> [{TENGWARN}][{TEHTAR}]
152
+ \else
153
+ tn[{PENNAR}] --> NUMEN THINNAS [{TEHTAR}]
154
+ l[{TAPTAR}][{PENNAR}] --> [{TENGWAR}] PALATAL_SIGN [{TEHTAR}]
155
+
156
+ \if kuwendi
157
+ {NASAL} === NASALIZE_SIGN_TILD
158
+ \else
159
+ {NASAL} === CIRC_TEHTA_INF
160
+ \endif
161
+
162
+ (m,n)[{TAPTAR}][{PENNAR}] --> [{TENGWAR}] {NASAL} [{TEHTAR}]
163
+ \endif
164
+
165
+ \if bikang
166
+ (m,t)_ --> O_TEHTA_INF
167
+ (ŋ)_ --> E_TEHTA_DOUBLE_INF
168
+ (f,k,p)_ --> U_TEHTA_INF
169
+ (l,x,sh)_ --> E_TEHTA_INF
170
+ \else
171
+ (t,k,p,f,x,sh,m,ŋ,l)_ --> GEMINATE_SIGN_TILD
172
+ \endif
173
+
174
+ \endif
175
+
176
+ \end
177
+
178
+ \beg rules punctutation
179
+ . --> PUNCT_DDOT
180
+ .. --> PUNCT_DOT PUNCT_DDOT PUNCT_DOT
181
+ … --> PUNCT_TILD
182
+ ... --> PUNCT_TILD
183
+ .... --> PUNCT_TILD
184
+ ..... --> PUNCT_TILD
185
+ ...... --> PUNCT_TILD
186
+ ....... --> PUNCT_TILD
187
+
188
+ , --> PUNCT_DOT
189
+ : --> PUNCT_DOT
190
+ ; --> PUNCT_DOT
191
+ ! --> PUNCT_EXCLAM
192
+ ? --> PUNCT_INTERR
193
+ · --> PUNCT_DOT
194
+
195
+ \** Apostrophe **\
196
+
197
+ ' --> {NULL}
198
+ ’ --> {NULL}
199
+
200
+ \** NBSP **\
201
+ {NBSP} --> NBSP
202
+
203
+ \** Quotes **\
204
+
205
+ “ --> DQUOT_OPEN
206
+ ” --> DQUOT_CLOSE
207
+ « --> DQUOT_OPEN
208
+ » --> DQUOT_CLOSE
209
+
210
+ - --> PUNCT_DOT
211
+ – --> PUNCT_TILD
212
+ — --> PUNCT_DTILD
213
+
214
+ [ --> PUNCT_PAREN_L
215
+ ] --> PUNCT_PAREN_R
216
+ ( --> PUNCT_PAREN_L
217
+ ) --> PUNCT_PAREN_R
218
+ { --> PUNCT_PAREN_L
219
+ } --> PUNCT_PAREN_R
220
+ < --> PUNCT_PAREN_L
221
+ > --> PUNCT_PAREN_R
222
+
223
+ \** Not universal between fonts ... **\
224
+ $ --> ELVISH_PAREN
225
+ ≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
226
+ ≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
227
+ \end
228
+
229
+ \beg rules numbers
230
+ 0 --> NUM_0
231
+ 1 --> NUM_1
232
+ 2 --> NUM_2
233
+ 3 --> NUM_3
234
+ 4 --> NUM_4
235
+ 5 --> NUM_5
236
+ 6 --> NUM_6
237
+ 7 --> NUM_7
238
+ 8 --> NUM_8
239
+ 9 --> NUM_9
240
+ A --> NUM_10
241
+ B --> NUM_11
242
+ \end
243
+
244
+ \end
245
+
246
+ \beg postprocessor
247
+ \resolve_virtuals
248
+ \end
@@ -0,0 +1,154 @@
1
+ \**
2
+
3
+ Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ invented languages to some of his devised writing systems.
7
+
8
+ Copyright (C) 2015 Benjamin Babut (Talagan).
9
+
10
+ This program is free software: you can redistribute it and/or modify
11
+ it under the terms of the GNU Affero General Public License as published by
12
+ the Free Software Foundation, either version 3 of the License, or
13
+ any later version.
14
+
15
+ This program is distributed in the hope that it will be useful,
16
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ GNU Affero General Public License for more details.
19
+
20
+ You should have received a copy of the GNU Affero General Public License
21
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ **\
24
+
25
+ \** very limited mode, mainly used for writing some doc for glaemscribe. **\
26
+
27
+ \beg changelog
28
+ \entry "0.0.1" "Initial version."
29
+ \end
30
+
31
+ \language "Raw Cirth"
32
+ \writing "Cirth"
33
+ \mode "Raw Cirth"
34
+ \version "0.0.1"
35
+ \authors "Talagan (Benjamin Babut)"
36
+
37
+ \world arda
38
+ \invention experimental
39
+
40
+ \metamode true
41
+
42
+ \charset cirth_ds true
43
+
44
+ \beg preprocessor
45
+ \** Work exclusively downcase **\
46
+ \downcase
47
+ \end
48
+
49
+ \beg processor
50
+
51
+ \beg rules litteral
52
+ _c1_ --> CIRTH_1
53
+ _c2_ --> CIRTH_2
54
+ _c3_ --> CIRTH_3
55
+ _c4_ --> CIRTH_4
56
+ _c5_ --> CIRTH_5
57
+ _c6_ --> CIRTH_6
58
+ _c7_ --> CIRTH_7
59
+ _c8_ --> CIRTH_8
60
+ _c9_ --> CIRTH_9
61
+ _c10_ --> CIRTH_10
62
+ _c11_ --> CIRTH_11
63
+ _c12_ --> CIRTH_12
64
+ _c13_ --> CIRTH_13
65
+ _c14_ --> CIRTH_14
66
+ _c15_ --> CIRTH_15
67
+ _c16_ --> CIRTH_16
68
+ _c17_ --> CIRTH_17
69
+ _c18_ --> CIRTH_18
70
+ _c19_ --> CIRTH_19
71
+ _c20_ --> CIRTH_20
72
+ _c21_ --> CIRTH_21
73
+ _c22_ --> CIRTH_22
74
+ _c23_ --> CIRTH_23
75
+ _c24_ --> CIRTH_24
76
+ _c25_ --> CIRTH_25
77
+ _c26_ --> CIRTH_26
78
+ _c27_ --> CIRTH_27
79
+ _c28_ --> CIRTH_28
80
+ _c29_ --> CIRTH_29
81
+ _c30_ --> CIRTH_30
82
+ _c31_ --> CIRTH_31
83
+ _c32_ --> CIRTH_32
84
+ _c33_ --> CIRTH_33
85
+ _c34_ --> CIRTH_34
86
+ _c35_ --> CIRTH_35
87
+ _c36_ --> CIRTH_36
88
+ _c37_ --> CIRTH_37
89
+ _c38_ --> CIRTH_38
90
+ _c39_ --> CIRTH_39
91
+ _c40_ --> CIRTH_40
92
+ _c41_ --> CIRTH_41
93
+ _c42_ --> CIRTH_42
94
+ _c43_ --> CIRTH_43
95
+ _c44_ --> CIRTH_44
96
+ _c45_ --> CIRTH_45
97
+ _c46_ --> CIRTH_46
98
+ _c47_ --> CIRTH_47
99
+ _c48_ --> CIRTH_48
100
+ _c49_ --> CIRTH_49
101
+ _c50_ --> CIRTH_50
102
+ _c51_ --> CIRTH_51
103
+ _c52_ --> CIRTH_52
104
+ _c53_ --> CIRTH_53
105
+ _c54_ --> CIRTH_54
106
+ _c55_ --> CIRTH_55
107
+ _c56_ --> CIRTH_56
108
+ _c57_ --> CIRTH_57
109
+ _c58_ --> CIRTH_58
110
+ _c59_ --> CIRTH_59
111
+ _c60_ --> CIRTH_60
112
+
113
+ _c38alt_ --> CIRTH_38_ALT
114
+ _c45alt_ --> CIRTH_45_ALT
115
+ _c51alt_ --> CIRTH_51_ALT
116
+ _c52alt_ --> CIRTH_52_ALT
117
+ _c55alt_ --> CIRTH_55_ALT
118
+ _c56alt_ --> CIRTH_56_ALT
119
+
120
+ _ce1_ --> CIRTH_EREB_1
121
+ _ce2_ --> CIRTH_EREB_2
122
+ _ce3_ --> CIRTH_EREB_3
123
+ _ce4_ --> CIRTH_EREB_4
124
+ _ce5_ --> CIRTH_EREB_5
125
+ _ce6_ --> CIRTH_EREB_6
126
+ _ce7_ --> CIRTH_EREB_7
127
+
128
+ _1_ --> CIRTH_NUMERAL_1
129
+ _2_ --> CIRTH_NUMERAL_2
130
+ _3_ --> CIRTH_NUMERAL_3
131
+ _4_ --> CIRTH_NUMERAL_4
132
+ _5_ --> CIRTH_NUMERAL_5
133
+
134
+ _{UNDERSCORE}_ --> TEHTA_UNDERLINE
135
+ _sdot_ --> TEHTA_SUB_DOT
136
+ _(^,circ)_ --> TEHTA_CIRCUM
137
+
138
+ {NBSP} --> NBSP
139
+ \end
140
+
141
+ \beg rules punctuation
142
+ , --> CIRTH_PUNCT_DOT
143
+ . --> CIRTH_PUNCT_MID_DOT
144
+ (..,:) --> CIRTH_PUNCT_TWO_DOTS
145
+ ... --> CIRTH_PUNCT_THREE_DOTS
146
+ (....,::) --> CIRTH_PUNCT_FOUR_DOTS
147
+ \end
148
+
149
+ \end
150
+
151
+ \beg postprocessor
152
+ \resolve_virtuals
153
+ \end
154
+