glaemscribe 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +19 -0
  3. data/bin/glaemscribe +307 -0
  4. data/glaemresources/charsets/cirth_ds.cst +205 -0
  5. data/glaemresources/charsets/sarati_eldamar.cst +256 -0
  6. data/glaemresources/charsets/tengwar_ds.cst +318 -0
  7. data/glaemresources/charsets/unicode_gothic.cst +64 -0
  8. data/glaemresources/charsets/unicode_runes.cst +120 -0
  9. data/glaemresources/modes/adunaic.glaem +251 -0
  10. data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
  11. data/glaemresources/modes/blackspeech.glaem +260 -0
  12. data/glaemresources/modes/gothic.glaem +78 -0
  13. data/glaemresources/modes/khuzdul.glaem +141 -0
  14. data/glaemresources/modes/mercian.glaem +419 -0
  15. data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
  16. data/glaemresources/modes/quenya-sarati.glaem +320 -0
  17. data/glaemresources/modes/quenya.glaem +307 -0
  18. data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
  19. data/glaemresources/modes/sindarin-classical.glaem +276 -0
  20. data/glaemresources/modes/sindarin-daeron.glaem +182 -0
  21. data/glaemresources/modes/telerin.glaem +302 -0
  22. data/glaemresources/modes/valarin-sarati.glaem +210 -0
  23. data/glaemresources/modes/westron.glaem +340 -0
  24. data/glaemresources/modes/westsaxon.glaem +342 -0
  25. data/lib/api/charset.rb +84 -0
  26. data/lib/api/charset_parser.rb +55 -0
  27. data/lib/api/constants.rb +29 -0
  28. data/lib/api/debug.rb +36 -0
  29. data/lib/api/eval.rb +268 -0
  30. data/lib/api/fragment.rb +113 -0
  31. data/lib/api/glaeml.rb +200 -0
  32. data/lib/api/if_tree.rb +96 -0
  33. data/lib/api/mode.rb +112 -0
  34. data/lib/api/mode_parser.rb +314 -0
  35. data/lib/api/option.rb +64 -0
  36. data/lib/api/post_processor/reverse.rb +36 -0
  37. data/lib/api/pre_processor/downcase.rb +35 -0
  38. data/lib/api/pre_processor/elvish_numbers.rb +47 -0
  39. data/lib/api/pre_processor/rxsubstitute.rb +40 -0
  40. data/lib/api/pre_processor/substitute.rb +38 -0
  41. data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
  42. data/lib/api/resource_manager.rb +130 -0
  43. data/lib/api/rule.rb +99 -0
  44. data/lib/api/rule_group.rb +159 -0
  45. data/lib/api/sheaf.rb +70 -0
  46. data/lib/api/sheaf_chain.rb +86 -0
  47. data/lib/api/sheaf_chain_iterator.rb +108 -0
  48. data/lib/api/sub_rule.rb +40 -0
  49. data/lib/api/transcription_pre_post_processor.rb +118 -0
  50. data/lib/api/transcription_processor.rb +137 -0
  51. data/lib/api/transcription_tree_node.rb +91 -0
  52. data/lib/glaemscribe.rb +70 -0
  53. metadata +112 -0
@@ -0,0 +1,342 @@
1
+ \**
2
+
3
+ Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ invented languages to some of his devised writing systems.
7
+
8
+ Copyright (C) 2015 Benjamin Babut (Talagan).
9
+
10
+ This program is free software: you can redistribute it and/or modify
11
+ it under the terms of the GNU Affero General Public License as published by
12
+ the Free Software Foundation, either version 3 of the License, or
13
+ any later version.
14
+
15
+ This program is distributed in the hope that it will be useful,
16
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ GNU Affero General Public License for more details.
19
+
20
+ You should have received a copy of the GNU Affero General Public License
21
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ **\
24
+
25
+ \language "Old English"
26
+ \writing "Tengwar"
27
+ \mode "West Saxon"
28
+ \version "0.0.1"
29
+ \authors "Talagan (Benjamin Babut)"
30
+
31
+ \charset tengwar_ds true
32
+
33
+ \beg preprocessor
34
+ \** Work exclusively downcase **\
35
+ \downcase
36
+
37
+ \** Simplify trema vowels **\
38
+ \substitute ä a
39
+ \substitute ë e
40
+ \substitute ï i
41
+ \substitute ö o
42
+ \substitute ü u
43
+ \substitute ÿ y
44
+
45
+ \substitute "ae" "æ"
46
+ \substitute "ea" "æa"
47
+ \substitute "éa" "ǽa"
48
+ \substitute "7" "⁊"
49
+
50
+ \** Dis-ambiguate long vowels **\
51
+ \rxsubstitute "(ā|â|aa)" "á"
52
+ \rxsubstitute "(ē|ê|ee)" "é"
53
+ \rxsubstitute "(ī|î|ii)" "í"
54
+ \rxsubstitute "(ō|ô|oo)" "ó"
55
+ \rxsubstitute "(ū|û|uu)" "ú"
56
+ \rxsubstitute "(ȳ|ŷ|yy)" "ý"
57
+
58
+ \up_down_tehta_split "æ,ǽ,a,ä,á,e,ë,é,i,ï,í,o,ö,ó,u,ü,ú,y,ÿ,ý,ø,ǿ,œ,œ́" "t,p,ċ,c,d,b,ġ,g,þ,f,ç,χ,ħ,ð,v,j,ȝ,n,m,r,ĭ,w,l,ld,s,z,h,x,sċ,hw,çt,χt,ħt"
59
+ \end
60
+
61
+ \beg processor
62
+
63
+ \beg rules litteral
64
+ {A} === a
65
+ {AA} === á
66
+ {E} === e
67
+ {EE} === é
68
+ {I} === i
69
+ {II} === í
70
+ {O} === o
71
+ {OO} === ó
72
+ {U} === u
73
+ {UU} === ú
74
+ {Y} === y
75
+ {YY} === ý
76
+
77
+ {AE} === (æ,ae)
78
+ {AEAE} === (ǽ,ǣ)
79
+
80
+ {OE} === (ø,œ)
81
+ {OEOE} === (ǿ,œ́)
82
+
83
+ \** Diphtongs are always splitted, so consider we don't have any. **\
84
+ \** @ is the phantom vowel **\
85
+
86
+ {S_VOWELS_NP} === {A} * {AE} * {OE} * {E} * {I} * {O} * {U} * {Y} * {A}_
87
+ {S_VOWELS} === {S_VOWELS_NP} * @
88
+
89
+ \** UP TEHTAS **\
90
+ {UTEHTA_NP_XS} === E_TEHTA_XS * A_TEHTA_XS * E_TEHTA_DOUBLE_XS * Y_TEHTA_XS * I_TEHTA_XS * O_TEHTA_XS * U_TEHTA_XS * THSUP_SEV_XS * VILYA
91
+ {UTEHTA_NP__S} === E_TEHTA_S * A_TEHTA_S * E_TEHTA_DOUBLE_S * Y_TEHTA_S * I_TEHTA_S * O_TEHTA_S * U_TEHTA_S * THSUP_SEV_S * VILYA
92
+ {UTEHTA_NP__L} === E_TEHTA_L * A_TEHTA_L * E_TEHTA_DOUBLE_L * Y_TEHTA_L * I_TEHTA_L * O_TEHTA_L * U_TEHTA_L * THSUP_SEV_L * VILYA
93
+ {UTEHTA_NP_XL} === E_TEHTA_XL * A_TEHTA_XL * E_TEHTA_DOUBLE_XL * Y_TEHTA_XL * I_TEHTA_XL * O_TEHTA_XL * U_TEHTA_XL * THSUP_SEV_XL * VILYA
94
+ {UTEHTA_XS} === {UTEHTA_NP_XS} * {NULL}
95
+ {UTEHTA__S} === {UTEHTA_NP__S} * {NULL}
96
+ {UTEHTA__L} === {UTEHTA_NP__L} * {NULL}
97
+ {UTEHTA_XL} === {UTEHTA_NP_XL} * {NULL}
98
+
99
+ \** FOR LONG VOWELS **\
100
+ {L_VOWELS} === {AA} * {AEAE} * {OEOE} * {EE} * {II} * {OO} * {UU} * {YY}
101
+ {L_PTEHTAS} === ARA E_TEHTA_XS * ARA A_TEHTA_XS * ARA E_TEHTA_DOUBLE_XS * ARA Y_TEHTA_XS * ARA I_TEHTA_XS * ARA O_TEHTA_XS * ARA U_TEHTA_XS * ARA THSUP_SEV_XS
102
+
103
+ \** DOWN TEHTAS **\
104
+ {DTEHTA_XS} === THINF_ACCENT_XS * THINF_TDOT_XS * TH_SUB_CIRC_XS * THINF_DDOT_XS * THINF_DOT_XS * TELCO O_TEHTA_XS * TELCO U_TEHTA_XS * THINF_STROKE_XS * VILYA * {NULL}
105
+ {DTEHTA__S} === THINF_ACCENT_S * THINF_TDOT_S * TH_SUB_CIRC_S * THINF_DDOT_S * THINF_DOT_S * TELCO O_TEHTA_XS * TELCO U_TEHTA_XS * THINF_STROKE_S * VILYA * {NULL}
106
+ {DTEHTA__L} === THINF_ACCENT_L * THINF_TDOT_L * TH_SUB_CIRC_L * THINF_DDOT_L * THINF_DOT_L * TELCO O_TEHTA_XS * TELCO U_TEHTA_XS * THINF_STROKE_L * VILYA * {NULL}
107
+ {DTEHTA_XL} === THINF_ACCENT_XL * THINF_TDOT_XL * TH_SUB_CIRC_XL * THINF_DDOT_XL * THINF_DOT_XL * TELCO O_TEHTA_XS * TELCO U_TEHTA_XS * THINF_STROKE_XL * VILYA * {NULL}
108
+
109
+ {S_VOWELS_NP_KER} === [ {S_VOWELS_NP} ]
110
+ {S_VOWELS_NP_KER_WN} === [ {S_VOWELS_NP} * {NULL} ]
111
+ {S_VOWELS_KER} === [ {S_VOWELS} ]
112
+ {S_VOWELS_KER_WN} === [ {S_VOWELS} * {NULL} ]
113
+
114
+ \** Img bundles for all vowels **\
115
+ {SU_VOWELS_IMG_XS} === [ {UTEHTA_XS} ]
116
+ {SU_VOWELS_IMG__S} === [ {UTEHTA__S} ]
117
+ {SU_VOWELS_IMG__L} === [ {UTEHTA__L} ]
118
+ {SU_VOWELS_IMG_XL} === [ {UTEHTA_XL} ]
119
+ {SD_VOWELS_IMG_XS} === [ {DTEHTA_XS} ]
120
+ {SD_VOWELS_IMG__S} === [ {DTEHTA__S} ]
121
+ {SD_VOWELS_IMG__L} === [ {DTEHTA__L} ]
122
+ {SD_VOWELS_IMG_XL} === [ {DTEHTA_XL} ]
123
+
124
+ {SU_VOWELS_IMG_XS_WN} === [ {UTEHTA_XS} * {NULL} ]
125
+ {SU_VOWELS_IMG__S_WN} === [ {UTEHTA__S} * {NULL} ]
126
+ {SU_VOWELS_IMG__L_WN} === [ {UTEHTA__L} * {NULL} ]
127
+ {SU_VOWELS_IMG_XL_WN} === [ {UTEHTA_XL} * {NULL} ]
128
+ {SD_VOWELS_IMG_XS_WN} === [ {DTEHTA_XS} * {NULL} ]
129
+ {SD_VOWELS_IMG__S_WN} === [ {DTEHTA__S} * {NULL} ]
130
+ {SD_VOWELS_IMG__L_WN} === [ {DTEHTA__L} * {NULL} ]
131
+ {SD_VOWELS_IMG_XL_WN} === [ {DTEHTA_XL} * {NULL} ]
132
+
133
+ \** Rule for long vowels **\
134
+ [ {L_VOWELS} ] --> [ {L_PTEHTAS} ]
135
+ {UU} --> VALA U_TEHTA_L \** # Special treatment **\
136
+
137
+ \** Fallback rule for short vowels **\
138
+ @ --> {NULL} \** # If found alone, put nothing **\
139
+ [ {S_VOWELS_NP} ] --> TELCO [ {UTEHTA_NP_XS} ]
140
+ {A}_ --> VILYA \** # We don't want a short carrier for a_ **\
141
+ (w,u)_ --> VALA \** # Only resolved after vowels **\
142
+
143
+ \** ############# **\
144
+ \** CONSONANTS # **\
145
+ \** ############# **\
146
+
147
+ \** ## 1st Line (Voiceless occlusives) **\
148
+ \** ## Short upper dash for nasalisation **\
149
+ {L1_KER_1} === t * p
150
+ {L1_IMG_1} === TINCO * PARMA
151
+ {L1_KER_2} === ċ * c * k
152
+ {L1_IMG_2} === CALMA * QUESSE * QUESSE
153
+
154
+ {L1_KER_1_GEMS} === tt * pp
155
+ {L1_IMG_1_GEMS} === TINCO THINF_DSTROKE_XS * PARMA THINF_DSTROKE_XS
156
+ {L1_KER_2_GEMS} === ċċ * cc * kk
157
+ {L1_IMG_2_GEMS} === CALMA THINF_DSTROKE_XL * QUESSE THINF_DSTROKE_XL * QUESSE THINF_DSTROKE_XL
158
+
159
+
160
+ {S_VOWELS_KER_WN}[{L1_KER_1}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L1_IMG_1}]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG__S_WN}
161
+ {S_VOWELS_KER_WN}[{L1_KER_1_GEMS}] --> 2,1 --> [{L1_IMG_1_GEMS}]{SU_VOWELS_IMG__L_WN}
162
+ {S_VOWELS_KER_WN}[{L1_KER_2}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L1_IMG_2}]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG_XL_WN}
163
+ {S_VOWELS_KER_WN}[{L1_KER_2_GEMS}] --> 2,1 --> [{L1_IMG_2_GEMS}]{SU_VOWELS_IMG__L_WN}
164
+
165
+ {S_VOWELS_KER_WN}[ nt * mp ]{S_VOWELS_KER_WN} --> 2,1,3 --> [ TINCO TILD_SUP_S * PARMA TILD_SUP_S ]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG__S_WN}
166
+ {S_VOWELS_KER_WN}[ nċ * nc ]{S_VOWELS_KER_WN} --> 2,1,3 --> [ CALMA TILD_SUP_S * QUESSE TILD_SUP_S ]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG_XL_WN}
167
+
168
+
169
+ \** ## 2nd Line (Voiced occlusives) **\
170
+ \** ## Long upper dash for nasalisation **\
171
+ {L2_KER} === d * b * ġ * g
172
+ {L2_IMG} === ANDO * UMBAR * ANGA * UNGWE
173
+
174
+ {L2_KER_GEMS} === dd * bb * (ċġ,ġġ) * (cg,gg)
175
+ {L2_IMG_GEMS} === ANDO THINF_DSTROKE_L * UMBAR THINF_DSTROKE_L * ANGA THINF_DSTROKE_L * UNGWE THINF_DSTROKE_L
176
+
177
+
178
+ {S_VOWELS_KER_WN}[{L2_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L2_IMG}]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
179
+ {S_VOWELS_KER_WN}[{L2_KER_GEMS}] --> 2,1 --> [{L2_IMG_GEMS}]{SU_VOWELS_IMG_XL_WN}
180
+
181
+ {S_VOWELS_KER_WN}[ nd * mb * nġ * ng ]{S_VOWELS_KER_WN} --> 2,1,3 --> [ ANDO TILD_SUP_L * UMBAR TILD_SUP_L * ANGA TILD_SUP_L * UNGWE TILD_SUP_L ]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
182
+
183
+
184
+ \** ## 3rd Line (Voiceless fricatives) **\
185
+ \** ## Short upper dash for nasalisation **\
186
+ {L3_KER_1} === þ * f
187
+ {L3_IMG_1} === SULE * FORMEN
188
+ {L3_KER_2} === ç * (χ,ħ)
189
+ {L3_IMG_2} === AHA * HWESTA
190
+
191
+ {L3_KER_1_GEMS} === þþ * ff
192
+ {L3_IMG_1_GEMS} === SULE THINF_DSTROKE_XS * FORMEN THINF_DSTROKE_XS
193
+ {L3_KER_2_GEMS} === çç * (χχ,ħħ)
194
+ {L3_IMG_2_GEMS} === AHA THINF_DSTROKE_XS * HWESTA THINF_DSTROKE_XS
195
+
196
+ {S_VOWELS_KER_WN}[{L3_KER_1}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L3_IMG_1}]{SU_VOWELS_IMG__S_WN}{SD_VOWELS_IMG__L_WN}
197
+ {S_VOWELS_KER_WN}[{L3_KER_1_GEMS}] --> 2,1 --> [{L3_IMG_1_GEMS}]{SU_VOWELS_IMG__S_WN}
198
+ {S_VOWELS_KER_WN}[{L3_KER_2}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L3_IMG_2}]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG_XL_WN}
199
+ {S_VOWELS_KER_WN}[{L3_KER_2_GEMS}] --> 2,1 --> [{L3_IMG_2_GEMS}]{SU_VOWELS_IMG__L_WN}
200
+
201
+
202
+ \** ## 4th Line (Voiced fricatives) **\
203
+ \** ## Long upper dash for nasalisation **\
204
+
205
+ {L4_KER} === ð * v * j * ȝ
206
+ {L4_IMG} === ANTO * AMPA * ANCA * UNQUE
207
+
208
+ {L4_KER_GEMS} === ðð * vv * jj * ȝȝ
209
+ {L4_IMG_GEMS} === ANTO THINF_DSTROKE_L * AMPA THINF_DSTROKE_L * ANCA THINF_DSTROKE_L * UNQUE THINF_DSTROKE_L
210
+
211
+
212
+ {S_VOWELS_KER_WN}[{L4_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L4_IMG}]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
213
+ {S_VOWELS_KER_WN}[{L4_KER_GEMS}] --> 2,1 --> [{L4_IMG_GEMS}]{SU_VOWELS_IMG_XL_WN}
214
+
215
+
216
+ \** ## 5th Line (Nasals) **\
217
+ \** ## Long upper dash for nasalisation (wins on gemination) **\
218
+
219
+ {L5_KER} === n * m
220
+ {L5_IMG} === NUMEN * MALTA
221
+
222
+ {S_VOWELS_KER_WN}[{L5_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L5_IMG}]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
223
+ {S_VOWELS_KER_WN}[ nn * mm ]{S_VOWELS_KER_WN} --> 2,1,3 --> [ NUMEN TILD_SUP_L * MALTA TILD_SUP_L ]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
224
+
225
+
226
+ \** ## 6th Line (Approximants == fr : Spirantes) **\
227
+ \** ## Short upper dash for nasalisation **\
228
+
229
+ {L6_KER} === r * ĭ
230
+ {L6_IMG} === ORE * ANNA
231
+ {L6_KER_GEMS} === rr * ĭĭ
232
+ {L6_IMG_GEMS} === ORE THINF_DSTROKE_XS * ANNA THINF_DSTROKE_XS
233
+
234
+ {S_VOWELS_KER_WN}[{L6_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L6_IMG}]{SU_VOWELS_IMG__S_WN}{SD_VOWELS_IMG__L_WN}
235
+ {S_VOWELS_KER_WN}[{L6_KER_GEMS}] --> 2,1 --> [{L6_IMG_GEMS}]{SU_VOWELS_IMG__S_WN}
236
+
237
+
238
+ \** ## Liquids **\
239
+ \** ## **\
240
+
241
+ {S_VOWELS_KER_WN}w{S_VOWELS_KER_WN} --> 2,1,3 --> ROMEN {SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG_XS_WN}
242
+ {S_VOWELS_KER_WN}ww --> 2,1 --> ROMEN THINF_DSTROKE_XS {SU_VOWELS_IMG__L_WN}
243
+ {S_VOWELS_KER_WN}[l * ll] --> 2,1 --> [LAMBE * LAMBE THINF_DSTROKE_FOR_LAMBE]{SU_VOWELS_IMG__L_WN}
244
+ {S_VOWELS_KER_WN}ld --> 2,1 --> ALDA {SU_VOWELS_IMG__L_WN}
245
+
246
+
247
+ \** ## Alveolar (sifflantes) **\
248
+ \** ## **\
249
+ {L8_KER} === s * z
250
+ {L8_IMG} === SILME_NUQUERNA * ESSE_NUQUERNA
251
+ {L8_KER_GEMS} === ss * zz
252
+ {L8_IMG_GEMS} === SILME_NUQUERNA THINF_DSTROKE_XS * ESSE_NUQUERNA THINF_DSTROKE_L
253
+
254
+ {S_VOWELS_KER_WN}[{L8_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L8_IMG}]{SU_VOWELS_IMG__S_WN}{SD_VOWELS_IMG__S_WN}
255
+ {S_VOWELS_KER_WN}[{L8_KER_GEMS}] --> 2,1 --> [{L8_IMG_GEMS}]{SU_VOWELS_IMG__S_WN}
256
+
257
+
258
+ \** ## FINAL S (Challenging!) **\
259
+ {S_VOWELS_KER_WN}s_ --> SHOOK_BEAUTIFUL {SU_VOWELS_IMG_XL_WN} \** # Final rule for s **\
260
+ _{S_VOWELS_KER_WN}s_ --> SILME_NUQUERNA {SU_VOWELS_IMG__S_WN} \** # Rule _es_ **\
261
+ [{L_VOWELS} * _ ]{S_VOWELS_KER_WN}s_ --> 2,1,3 --> [{L_PTEHTAS} * {NULL} ] SILME_NUQUERNA {SU_VOWELS_IMG__S_WN} \** # Rule for éis_, és_ **\
262
+ {NULL}[ {S_VOWELS_NP} ]{S_VOWELS_KER}s_ --> 1,3,2,4 --> TELCO [ {UTEHTA_NP_XS} ] SILME_NUQUERNA {SU_VOWELS_IMG__S} \** # Rule for ies_ **\
263
+
264
+ s --> SILME \** # Overload lonely s **\
265
+ z --> ESSE \** # Overload lonely z **\
266
+
267
+ \** ## Ligatures **\
268
+ \** ## **\
269
+ {LINE_VARIOUS_1_KER} === sċ
270
+ {LINE_VARIOUS_1_IMG} === ANCA_CLOSED
271
+ {LINE_VARIOUS_2_KER} === hw * çt * (χt,ħt)
272
+ {LINE_VARIOUS_2_IMG} === HARP_SHAPED * AHA_TINCO * HWESTA_TINCO
273
+
274
+ {S_VOWELS_KER_WN}[{LINE_VARIOUS_1_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{LINE_VARIOUS_1_IMG}]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
275
+ {S_VOWELS_KER_WN}[{LINE_VARIOUS_2_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{LINE_VARIOUS_2_IMG}]{SU_VOWELS_IMG__S_WN}{SD_VOWELS_IMG__S_WN}
276
+
277
+
278
+ \** ## Various **\
279
+ \** ## **\
280
+ {S_VOWELS_KER_WN}h{S_VOWELS_KER_WN} --> 2,1,3 --> HYARMEN {SU_VOWELS_IMG_XS_WN}{SD_VOWELS_IMG__L_WN}
281
+ {S_VOWELS_KER_WN}hh --> 2,1 --> HYARMEN THINF_DSTROKE_XL {SU_VOWELS_IMG_XS_WN}
282
+
283
+
284
+ \** ## X **\
285
+ \** ## For x, due to the cedilla, we cannot put tehtas under the tengwa. **\
286
+ {S_VOWELS_KER_WN}x --> 2,1 --> QUESSE SHOOK_LEFT_L {SU_VOWELS_IMG__S_WN}
287
+ {S_VOWELS_KER_WN}xx --> 2,1 --> QUESSE SHOOK_LEFT_L THINF_DSTROKE_XL {SU_VOWELS_IMG__S_WN}
288
+ {S_VOWELS_KER_WN}nx --> 2,1 --> QUESSE SHOOK_LEFT_L TILD_SUP_S {SU_VOWELS_IMG__S_WN}
289
+ \end
290
+
291
+ \beg rules punctuation
292
+ ⁊ --> OLD_ENGLISH_AND
293
+
294
+ . --> PUNCT_DDOT
295
+ .. --> PUNCT_DOT PUNCT_DDOT PUNCT_DOT
296
+ … --> PUNCT_TILD
297
+ ... --> PUNCT_TILD
298
+ .... --> PUNCT_TILD
299
+ ..... --> PUNCT_TILD
300
+ ...... --> PUNCT_TILD
301
+ ....... --> PUNCT_TILD
302
+
303
+ , --> PUNCT_DOT
304
+ : --> PUNCT_DOT
305
+ ; --> PUNCT_DOT
306
+ ! --> PUNCT_EXCLAM
307
+ ? --> PUNCT_INTERR
308
+ · --> PUNCT_DOT
309
+
310
+ \** Apostrophe **\
311
+
312
+ ' --> {NULL}
313
+ ’ --> {NULL}
314
+
315
+ \** Quotes **\
316
+
317
+ “ --> DQUOT_OPEN
318
+ ” --> DQUOT_CLOSE
319
+ « --> DQUOT_OPEN
320
+ » --> DQUOT_CLOSE
321
+
322
+ - --> {NULL}
323
+ – --> PUNCT_TILD
324
+ — --> PUNCT_TILD
325
+
326
+ [ --> PUNCT_PAREN_L
327
+ ] --> PUNCT_PAREN_R
328
+ ( --> PUNCT_PAREN_L_ALT \** TODO : Remove alt ? **\
329
+ ) --> PUNCT_PAREN_R_ALT \** TODO : Remove alt ? **\
330
+ { --> PUNCT_PAREN_L
331
+ } --> PUNCT_PAREN_R
332
+ < --> PUNCT_PAREN_L
333
+ > --> PUNCT_PAREN_R
334
+
335
+ \** Not universal between fonts ... **\
336
+ $ --> BOOKMARK_SIGN
337
+ ≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
338
+ ≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
339
+
340
+ \end
341
+
342
+ \end
@@ -0,0 +1,84 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ class Charset
26
+ attr_reader :name
27
+
28
+ attr_accessor :errors
29
+ attr_reader :chars
30
+
31
+ class Char
32
+ attr_accessor :line
33
+ attr_accessor :code
34
+ attr_accessor :names
35
+ attr_accessor :str
36
+
37
+ def initialize
38
+ @names = {}
39
+ end
40
+ end
41
+
42
+ def initialize(name)
43
+ @name = name
44
+ @chars = []
45
+ @errors = []
46
+ end
47
+
48
+ # Pass integer (utf8 num) and array (of strings)
49
+ def add_char(line, code, names)
50
+ return if names.empty? || names.include?("?") # Ignore characters with '?'
51
+
52
+ c = Char.new
53
+ c.line = line
54
+ c.code = code
55
+ c.names = names
56
+ c.str = code.chr('UTF-8')
57
+ @chars << c
58
+ end
59
+
60
+ def finalize
61
+ @errors = []
62
+ @lookup_table = {}
63
+
64
+ @chars.each { |c|
65
+ c.names.each { |cname|
66
+ found = @lookup_table[cname]
67
+ if found
68
+ @errors << Glaeml::Error.new(c.line, "Character #{cname} found twice.")
69
+ else
70
+ @lookup_table[cname] = c
71
+ end
72
+ }
73
+ }
74
+
75
+ API::Debug::log("Finalized charset '#{@name}', #{@lookup_table.count} symbols loaded.")
76
+ end
77
+
78
+ def [](symbol)
79
+ @lookup_table[symbol]
80
+ end
81
+
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,55 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class CharsetParser
27
+
28
+ def initialize()
29
+ @charset = nil
30
+ end
31
+
32
+ def parse(file_path)
33
+ @charset = Charset.new(ResourceManager::charset_name_from_file_path(file_path))
34
+
35
+ raw = File.open(file_path,"rb:utf-8").read
36
+ doc = Glaeml::Parser.new.parse(raw)
37
+
38
+ if(doc.errors.any?)
39
+ @charset.errors = doc.errors
40
+ return @charset
41
+ end
42
+
43
+ doc.root_node.gpath("char").each { |char_element|
44
+ code = char_element.args[0].hex
45
+ names = char_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
46
+ @charset.add_char(char_element.line,code,names)
47
+ }
48
+ @charset.finalize
49
+
50
+ @charset
51
+ end
52
+
53
+ end
54
+ end
55
+ end