glaemscribe 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/bin/glaemscribe +2 -2
  3. data/glaemresources/charsets/cirth_ds.cst +514 -179
  4. data/glaemresources/charsets/eldamar.cst +210 -0
  5. data/glaemresources/charsets/tengwar_ds_annatar.cst +2452 -130
  6. data/glaemresources/charsets/tengwar_ds_eldamar.cst +2319 -125
  7. data/glaemresources/charsets/tengwar_ds_elfica.cst +2317 -126
  8. data/glaemresources/charsets/tengwar_ds_parmaite.cst +2319 -127
  9. data/glaemresources/charsets/tengwar_ds_sindarin.cst +2318 -127
  10. data/glaemresources/charsets/tengwar_freemono.cst +1 -1
  11. data/glaemresources/charsets/tengwar_guni_annatar.cst +2451 -131
  12. data/glaemresources/charsets/tengwar_guni_eldamar.cst +2317 -126
  13. data/glaemresources/charsets/tengwar_guni_elfica.cst +2316 -127
  14. data/glaemresources/charsets/tengwar_guni_parmaite.cst +2319 -127
  15. data/glaemresources/charsets/tengwar_guni_sindarin.cst +2317 -126
  16. data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
  17. data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
  18. data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
  19. data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
  20. data/glaemresources/modes/japanese-tengwar.glaem +9 -4
  21. data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
  22. data/glaemresources/modes/raw-cirth.glaem +154 -0
  23. data/lib/api/charset_parser.rb +7 -1
  24. data/lib/api/mode.rb +35 -10
  25. data/lib/api/mode_parser.rb +21 -12
  26. data/lib/api/post_processor/outspace.rb +44 -0
  27. data/lib/api/rule_group.rb +1 -1
  28. data/lib/api/transcription_pre_post_processor.rb +8 -5
  29. data/lib/api/transcription_processor.rb +12 -9
  30. data/lib/glaemscribe.rb +2 -0
  31. data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
  32. data/lib_espeak/glaemscribe_tts.js +363 -223
  33. metadata +12 -6
@@ -0,0 +1,210 @@
1
+ \**
2
+
3
+ Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ invented languages to some of his devised writing systems.
7
+
8
+ Copyright (C) 2015 Benjamin Babut (Talagan).
9
+
10
+ This program is free software: you can redistribute it and/or modify
11
+ it under the terms of the GNU Affero General Public License as published by
12
+ the Free Software Foundation, either version 3 of the License, or
13
+ any later version.
14
+
15
+ This program is distributed in the hope that it will be useful,
16
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ GNU Affero General Public License for more details.
19
+
20
+ You should have received a copy of the GNU Affero General Public License
21
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ **\
24
+
25
+ \version 0.0.1
26
+
27
+ \beg changelog
28
+ \entry "0.0.1" "Copy pasted from FreeMono charset."
29
+ \end
30
+
31
+ \char 20 SPACE
32
+ \char a0 NBSP
33
+
34
+ \** PUSTAR, take them in unicode not in personnal use (deprecation) **\
35
+ \char 2e31 PUNCT_DOT
36
+ \char 3a PUNCT_DDOT
37
+ \char 205D PUNCT_TDOT PUSTA_3
38
+ \char 2058 PUSTA_4
39
+ \char 10FB PUSTA_4_HALFED
40
+ \char 2E2C PUSTA_4_SQUARED
41
+ \char 2E2D PUSTA_5
42
+
43
+ \char e000 TW_11 TINCO
44
+ \char e001 TW_12 PARMA
45
+ \char e002 TW_13 CALMA
46
+ \char e003 TW_14 QUESSE
47
+
48
+ \char e004 TW_21 ANDO
49
+ \char e005 TW_22 UMBAR
50
+ \char e006 TW_23 ANGA
51
+ \char e007 TW_24 UNGWE
52
+
53
+ \char e008 TW_31 SULE THULE
54
+ \char e009 TW_32 FORMEN
55
+ \char e00A TW_33 AHA
56
+ \char e00B TW_34 HWESTA
57
+
58
+ \char e00C TW_41 ANTO
59
+ \char e00D TW_42 AMPA
60
+ \char e00E TW_43 ANCA
61
+ \char e00F TW_44 UNQUE
62
+
63
+ \char e010 TW_51 NUMEN
64
+ \char e011 TW_52 MALTA
65
+ \char e012 TW_53 NOLDO
66
+ \char e013 TW_54 NWALME
67
+
68
+ \char e014 TW_61 ORE
69
+ \char e015 TW_62 VALA
70
+ \char e016 TW_63 ANNA
71
+ \char e017 TW_64 VILYA
72
+
73
+ \char e018 TW_EXT_11 TINCO_EXT SULE_EXT THULE_EXT
74
+ \char e019 TW_EXT_12 PARMA_EXT FORMEN_EXT
75
+ \char e01A TW_EXT_13 CALMA_EXT AHA_EXT
76
+ \char e01B TW_EXT_14 QUESSE_EXT HWESTA_EXT
77
+
78
+ \char e01C TW_EXT_21 ANDO_EXT ANTO_EXT
79
+ \char e01D TW_EXT_22 UMBAR_EXT AMPA_EXT
80
+ \char e01E TW_EXT_23 ANGA_EXT ANCA_EXT
81
+ \char e01F TW_EXT_24 UNGWE_EXT UNQUE_EXT
82
+
83
+ \char e020 TW_71 ROMEN
84
+ \char e021 TW_72 ARDA
85
+ \char e022 TW_73 LAMBE
86
+ \char e023 TW_74 ALDA
87
+
88
+ \char e024 TW_81 SILME
89
+ \** Unfortunately, monotengwar is missing silme nuquerna used for y in beleriand **\
90
+ \char e025 TW_82 SILME_NUQUERNA SILME_NUQUERNA_ALT
91
+ \char e026 TW_83 ESSE
92
+ \char e027 TW_84 ESSE_NUQUERNA
93
+
94
+ \char e028 TW_91 HYARMEN
95
+ \char e029 TW_92 HWESTA_SINDARINWA
96
+ \char e02A TW_93 YANTA
97
+ \char e02B TW_94 URE
98
+
99
+ \char e02C ARA
100
+ \char e02D HALLA
101
+ \char e02E TELCO
102
+ \char e02F ?
103
+
104
+ \char e030 REVERSED_OSSE
105
+ \char e031 BOMBADIL_W
106
+ \char e032 OSSE
107
+ \char e033 ?
108
+
109
+ \char e034 LIGATING_SHORT_CARRIER
110
+ \char e035 ?
111
+ \char e036 ANNA_OPEN
112
+ \char e037 CHRISTOPHER_QU
113
+
114
+ \char e038 ?
115
+ \char e039 BOMBADIL_HW
116
+ \char e03A TW_MH MALTA_W_HOOK
117
+ \char e03B TW_MH_BELERIANDIC VALA_W_HOOK
118
+
119
+ \char e03C TW_HW_LOWDHAM HARP_SHAPED
120
+ \char e03D VAIA WAIA VAIYA
121
+ \char e03E ?
122
+ \char e03F ?
123
+
124
+ \char e040 A_TEHTA
125
+ \char e041 A_TEHTA_INF
126
+ \char e042 I_TEHTA_DOUBLE Y_TEHTA
127
+ \char e043 I_TEHTA_DOUBLE_INF PALATAL_SIGN Y_TEHTA_INF
128
+
129
+ \char e044 I_TEHTA
130
+ \char e045 I_TEHTA_INF NO_VOWEL_DOT UNUTIXE
131
+ \char e046 E_TEHTA
132
+ \char e047 E_TEHTA_INF
133
+
134
+ \char e048 E_TEHTA_DOUBLE
135
+ \char e049 E_TEHTA_DOUBLE_INF GEMINATE_DOUBLE
136
+ \char e04A O_TEHTA
137
+ \char e04B O_TEHTA_INF
138
+
139
+ \char e04C U_TEHTA
140
+ \char e04D U_TEHTA_INF
141
+ \char e04E O_TEHTA_DOUBLE
142
+ \char e04F U_TEHTA_DOUBLE
143
+
144
+ \char e050 NASALIZE_SIGN NASALIZE_SIGN_TILD
145
+ \char e051 GEMINATE_SIGN GEMINATE_SIGN_TILD
146
+ \char e052 WA_TEHTA SEV_TEHTA
147
+ \char e053 TEHTA_BREVE A_TEHTA_CIRCUM_REVERSED
148
+
149
+ \char e054 E_TEHTA_GRAVE
150
+ \char e055 A_TEHTA_CIRCUM
151
+ \char e056 A_TEHTA_REVERSED A_TEHTA_DOUBLE
152
+ \char e057 THINNAS THINF_STROKE
153
+
154
+ \** THE TWO FOLLOWING ONES ARE HIGHLY DISCUSSABLE, MAYBE WE SHOULD ADD A VIRTUAL CHAR FOR THESE **\
155
+ \char e058 SARINCE_ENDING_LONG SARINCE_FLOURISHED
156
+ \char e059 SARINCE COMBINING_SARINCE SHOOK_RIGHT_L SHOOK_LEFT_L
157
+ \char e05A ?
158
+ \char e05B ?
159
+
160
+ \char e065 PUNCT_EXCLAM
161
+ \char e066 PUNCT_INTERR
162
+ \char e067 ELVISH_PAREN PUNCT_PAREN_L PUNCT_PAREN_R PUNCT_PAREN_L_ALT PUNCT_PAREN_R_ALT BOOKMARK_SIGN
163
+ \char e068 PUNCT_TILD
164
+
165
+ \char e069 PUNCT_DTILD RING_MARK_L RING_MARK_R
166
+
167
+ \char e06A DQUOT_OPEN
168
+ \char e06B DQUOT_CLOSE
169
+ \char e06C THORIN_EXCLAMATION
170
+
171
+ \char e070 NUM_0
172
+ \char e071 NUM_1
173
+ \char e072 NUM_2
174
+ \char e073 NUM_3
175
+ \char e074 NUM_4
176
+ \char e075 NUM_5
177
+ \char e076 NUM_6
178
+ \char e077 NUM_7
179
+ \char e078 NUM_8
180
+ \char e079 NUM_9
181
+ \char e07A NUM_10
182
+ \char e07B NUM_11
183
+ \char e07C NUM_12
184
+
185
+ \char e07D CIRC_TEHTA_INF
186
+
187
+ \** Ligatured results are mapped here **\
188
+ \**
189
+ \char 10037 AHA_TINCO
190
+ \char 10038 HWESTA_TINCO
191
+ \char 10039 ANCA_CLOSED SILME_AHA
192
+ **\
193
+
194
+ \char 204A OLD_ENGLISH_AND
195
+
196
+ \char 200d ZWJ
197
+
198
+ \** Ligatured sequences used as one entity in modes **\
199
+ \beg seq AHA_TINCO
200
+ AHA ZWJ TINCO
201
+ \end
202
+
203
+ \beg seq HWESTA_TINCO
204
+ HWESTA ZWJ TINCO
205
+ \end
206
+
207
+ \beg seq ANCA_CLOSED SILME_AHA
208
+ SILME ZWJ AHA
209
+ \end
210
+