glaemscribe 1.2.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/bin/glaemscribe +2 -2
  3. data/glaemresources/charsets/cirth_ds.cst +514 -179
  4. data/glaemresources/charsets/eldamar.cst +210 -0
  5. data/glaemresources/charsets/tengwar_ds_annatar.cst +2776 -348
  6. data/glaemresources/charsets/tengwar_ds_eldamar.cst +2648 -351
  7. data/glaemresources/charsets/tengwar_ds_elfica.cst +2639 -346
  8. data/glaemresources/charsets/tengwar_ds_parmaite.cst +2648 -351
  9. data/glaemresources/charsets/tengwar_ds_sindarin.cst +2642 -348
  10. data/glaemresources/charsets/tengwar_freemono.cst +1 -1
  11. data/glaemresources/charsets/tengwar_guni_annatar.cst +2725 -300
  12. data/glaemresources/charsets/tengwar_guni_eldamar.cst +2589 -295
  13. data/glaemresources/charsets/tengwar_guni_elfica.cst +2592 -298
  14. data/glaemresources/charsets/tengwar_guni_parmaite.cst +2592 -297
  15. data/glaemresources/charsets/tengwar_guni_sindarin.cst +2591 -297
  16. data/glaemresources/charsets/tengwar_telcontar.cst +7 -0
  17. data/glaemresources/modes/blackspeech-tengwar-general_use.glaem +1 -1
  18. data/glaemresources/modes/english-cirth-espeak.glaem +687 -0
  19. data/glaemresources/modes/english-tengwar-espeak.glaem +814 -0
  20. data/glaemresources/modes/japanese-tengwar.glaem +9 -4
  21. data/glaemresources/modes/lang_belta-tengwar-dadef.glaem +248 -0
  22. data/glaemresources/modes/raw-cirth.glaem +154 -0
  23. data/lib/api/charset.rb +124 -57
  24. data/lib/api/charset_parser.rb +39 -26
  25. data/lib/api/mode.rb +35 -10
  26. data/lib/api/mode_parser.rb +21 -12
  27. data/lib/api/post_processor/outspace.rb +44 -0
  28. data/lib/api/post_processor/resolve_virtuals.rb +41 -19
  29. data/lib/api/rule_group.rb +1 -1
  30. data/lib/api/transcription_pre_post_processor.rb +51 -45
  31. data/lib/api/transcription_processor.rb +12 -9
  32. data/lib/glaemscribe.rb +2 -0
  33. data/lib_espeak/espeakng.for.glaemscribe.nowasm.sync.js +25 -11
  34. data/lib_espeak/glaemscribe_tts.js +363 -223
  35. metadata +12 -6
@@ -0,0 +1,210 @@
1
+ \**
2
+
3
+ Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ invented languages to some of his devised writing systems.
7
+
8
+ Copyright (C) 2015 Benjamin Babut (Talagan).
9
+
10
+ This program is free software: you can redistribute it and/or modify
11
+ it under the terms of the GNU Affero General Public License as published by
12
+ the Free Software Foundation, either version 3 of the License, or
13
+ any later version.
14
+
15
+ This program is distributed in the hope that it will be useful,
16
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ GNU Affero General Public License for more details.
19
+
20
+ You should have received a copy of the GNU Affero General Public License
21
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ **\
24
+
25
+ \version 0.0.1
26
+
27
+ \beg changelog
28
+ \entry "0.0.1" "Copy pasted from FreeMono charset."
29
+ \end
30
+
31
+ \char 20 SPACE
32
+ \char a0 NBSP
33
+
34
+ \** PUSTAR, take them in unicode not in personnal use (deprecation) **\
35
+ \char 2e31 PUNCT_DOT
36
+ \char 3a PUNCT_DDOT
37
+ \char 205D PUNCT_TDOT PUSTA_3
38
+ \char 2058 PUSTA_4
39
+ \char 10FB PUSTA_4_HALFED
40
+ \char 2E2C PUSTA_4_SQUARED
41
+ \char 2E2D PUSTA_5
42
+
43
+ \char e000 TW_11 TINCO
44
+ \char e001 TW_12 PARMA
45
+ \char e002 TW_13 CALMA
46
+ \char e003 TW_14 QUESSE
47
+
48
+ \char e004 TW_21 ANDO
49
+ \char e005 TW_22 UMBAR
50
+ \char e006 TW_23 ANGA
51
+ \char e007 TW_24 UNGWE
52
+
53
+ \char e008 TW_31 SULE THULE
54
+ \char e009 TW_32 FORMEN
55
+ \char e00A TW_33 AHA
56
+ \char e00B TW_34 HWESTA
57
+
58
+ \char e00C TW_41 ANTO
59
+ \char e00D TW_42 AMPA
60
+ \char e00E TW_43 ANCA
61
+ \char e00F TW_44 UNQUE
62
+
63
+ \char e010 TW_51 NUMEN
64
+ \char e011 TW_52 MALTA
65
+ \char e012 TW_53 NOLDO
66
+ \char e013 TW_54 NWALME
67
+
68
+ \char e014 TW_61 ORE
69
+ \char e015 TW_62 VALA
70
+ \char e016 TW_63 ANNA
71
+ \char e017 TW_64 VILYA
72
+
73
+ \char e018 TW_EXT_11 TINCO_EXT SULE_EXT THULE_EXT
74
+ \char e019 TW_EXT_12 PARMA_EXT FORMEN_EXT
75
+ \char e01A TW_EXT_13 CALMA_EXT AHA_EXT
76
+ \char e01B TW_EXT_14 QUESSE_EXT HWESTA_EXT
77
+
78
+ \char e01C TW_EXT_21 ANDO_EXT ANTO_EXT
79
+ \char e01D TW_EXT_22 UMBAR_EXT AMPA_EXT
80
+ \char e01E TW_EXT_23 ANGA_EXT ANCA_EXT
81
+ \char e01F TW_EXT_24 UNGWE_EXT UNQUE_EXT
82
+
83
+ \char e020 TW_71 ROMEN
84
+ \char e021 TW_72 ARDA
85
+ \char e022 TW_73 LAMBE
86
+ \char e023 TW_74 ALDA
87
+
88
+ \char e024 TW_81 SILME
89
+ \** Unfortunately, monotengwar is missing silme nuquerna used for y in beleriand **\
90
+ \char e025 TW_82 SILME_NUQUERNA SILME_NUQUERNA_ALT
91
+ \char e026 TW_83 ESSE
92
+ \char e027 TW_84 ESSE_NUQUERNA
93
+
94
+ \char e028 TW_91 HYARMEN
95
+ \char e029 TW_92 HWESTA_SINDARINWA
96
+ \char e02A TW_93 YANTA
97
+ \char e02B TW_94 URE
98
+
99
+ \char e02C ARA
100
+ \char e02D HALLA
101
+ \char e02E TELCO
102
+ \char e02F ?
103
+
104
+ \char e030 REVERSED_OSSE
105
+ \char e031 BOMBADIL_W
106
+ \char e032 OSSE
107
+ \char e033 ?
108
+
109
+ \char e034 LIGATING_SHORT_CARRIER
110
+ \char e035 ?
111
+ \char e036 ANNA_OPEN
112
+ \char e037 CHRISTOPHER_QU
113
+
114
+ \char e038 ?
115
+ \char e039 BOMBADIL_HW
116
+ \char e03A TW_MH MALTA_W_HOOK
117
+ \char e03B TW_MH_BELERIANDIC VALA_W_HOOK
118
+
119
+ \char e03C TW_HW_LOWDHAM HARP_SHAPED
120
+ \char e03D VAIA WAIA VAIYA
121
+ \char e03E ?
122
+ \char e03F ?
123
+
124
+ \char e040 A_TEHTA
125
+ \char e041 A_TEHTA_INF
126
+ \char e042 I_TEHTA_DOUBLE Y_TEHTA
127
+ \char e043 I_TEHTA_DOUBLE_INF PALATAL_SIGN Y_TEHTA_INF
128
+
129
+ \char e044 I_TEHTA
130
+ \char e045 I_TEHTA_INF NO_VOWEL_DOT UNUTIXE
131
+ \char e046 E_TEHTA
132
+ \char e047 E_TEHTA_INF
133
+
134
+ \char e048 E_TEHTA_DOUBLE
135
+ \char e049 E_TEHTA_DOUBLE_INF GEMINATE_DOUBLE
136
+ \char e04A O_TEHTA
137
+ \char e04B O_TEHTA_INF
138
+
139
+ \char e04C U_TEHTA
140
+ \char e04D U_TEHTA_INF
141
+ \char e04E O_TEHTA_DOUBLE
142
+ \char e04F U_TEHTA_DOUBLE
143
+
144
+ \char e050 NASALIZE_SIGN NASALIZE_SIGN_TILD
145
+ \char e051 GEMINATE_SIGN GEMINATE_SIGN_TILD
146
+ \char e052 WA_TEHTA SEV_TEHTA
147
+ \char e053 TEHTA_BREVE A_TEHTA_CIRCUM_REVERSED
148
+
149
+ \char e054 E_TEHTA_GRAVE
150
+ \char e055 A_TEHTA_CIRCUM
151
+ \char e056 A_TEHTA_REVERSED A_TEHTA_DOUBLE
152
+ \char e057 THINNAS THINF_STROKE
153
+
154
+ \** THE TWO FOLLOWING ONES ARE HIGHLY DISCUSSABLE, MAYBE WE SHOULD ADD A VIRTUAL CHAR FOR THESE **\
155
+ \char e058 SARINCE_ENDING_LONG SARINCE_FLOURISHED
156
+ \char e059 SARINCE COMBINING_SARINCE SHOOK_RIGHT_L SHOOK_LEFT_L
157
+ \char e05A ?
158
+ \char e05B ?
159
+
160
+ \char e065 PUNCT_EXCLAM
161
+ \char e066 PUNCT_INTERR
162
+ \char e067 ELVISH_PAREN PUNCT_PAREN_L PUNCT_PAREN_R PUNCT_PAREN_L_ALT PUNCT_PAREN_R_ALT BOOKMARK_SIGN
163
+ \char e068 PUNCT_TILD
164
+
165
+ \char e069 PUNCT_DTILD RING_MARK_L RING_MARK_R
166
+
167
+ \char e06A DQUOT_OPEN
168
+ \char e06B DQUOT_CLOSE
169
+ \char e06C THORIN_EXCLAMATION
170
+
171
+ \char e070 NUM_0
172
+ \char e071 NUM_1
173
+ \char e072 NUM_2
174
+ \char e073 NUM_3
175
+ \char e074 NUM_4
176
+ \char e075 NUM_5
177
+ \char e076 NUM_6
178
+ \char e077 NUM_7
179
+ \char e078 NUM_8
180
+ \char e079 NUM_9
181
+ \char e07A NUM_10
182
+ \char e07B NUM_11
183
+ \char e07C NUM_12
184
+
185
+ \char e07D CIRC_TEHTA_INF
186
+
187
+ \** Ligatured results are mapped here **\
188
+ \**
189
+ \char 10037 AHA_TINCO
190
+ \char 10038 HWESTA_TINCO
191
+ \char 10039 ANCA_CLOSED SILME_AHA
192
+ **\
193
+
194
+ \char 204A OLD_ENGLISH_AND
195
+
196
+ \char 200d ZWJ
197
+
198
+ \** Ligatured sequences used as one entity in modes **\
199
+ \beg seq AHA_TINCO
200
+ AHA ZWJ TINCO
201
+ \end
202
+
203
+ \beg seq HWESTA_TINCO
204
+ HWESTA ZWJ TINCO
205
+ \end
206
+
207
+ \beg seq ANCA_CLOSED SILME_AHA
208
+ SILME ZWJ AHA
209
+ \end
210
+