glaemscribe 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +19 -0
  3. data/bin/glaemscribe +307 -0
  4. data/glaemresources/charsets/cirth_ds.cst +205 -0
  5. data/glaemresources/charsets/sarati_eldamar.cst +256 -0
  6. data/glaemresources/charsets/tengwar_ds.cst +318 -0
  7. data/glaemresources/charsets/unicode_gothic.cst +64 -0
  8. data/glaemresources/charsets/unicode_runes.cst +120 -0
  9. data/glaemresources/modes/adunaic.glaem +251 -0
  10. data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
  11. data/glaemresources/modes/blackspeech.glaem +260 -0
  12. data/glaemresources/modes/gothic.glaem +78 -0
  13. data/glaemresources/modes/khuzdul.glaem +141 -0
  14. data/glaemresources/modes/mercian.glaem +419 -0
  15. data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
  16. data/glaemresources/modes/quenya-sarati.glaem +320 -0
  17. data/glaemresources/modes/quenya.glaem +307 -0
  18. data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
  19. data/glaemresources/modes/sindarin-classical.glaem +276 -0
  20. data/glaemresources/modes/sindarin-daeron.glaem +182 -0
  21. data/glaemresources/modes/telerin.glaem +302 -0
  22. data/glaemresources/modes/valarin-sarati.glaem +210 -0
  23. data/glaemresources/modes/westron.glaem +340 -0
  24. data/glaemresources/modes/westsaxon.glaem +342 -0
  25. data/lib/api/charset.rb +84 -0
  26. data/lib/api/charset_parser.rb +55 -0
  27. data/lib/api/constants.rb +29 -0
  28. data/lib/api/debug.rb +36 -0
  29. data/lib/api/eval.rb +268 -0
  30. data/lib/api/fragment.rb +113 -0
  31. data/lib/api/glaeml.rb +200 -0
  32. data/lib/api/if_tree.rb +96 -0
  33. data/lib/api/mode.rb +112 -0
  34. data/lib/api/mode_parser.rb +314 -0
  35. data/lib/api/option.rb +64 -0
  36. data/lib/api/post_processor/reverse.rb +36 -0
  37. data/lib/api/pre_processor/downcase.rb +35 -0
  38. data/lib/api/pre_processor/elvish_numbers.rb +47 -0
  39. data/lib/api/pre_processor/rxsubstitute.rb +40 -0
  40. data/lib/api/pre_processor/substitute.rb +38 -0
  41. data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
  42. data/lib/api/resource_manager.rb +130 -0
  43. data/lib/api/rule.rb +99 -0
  44. data/lib/api/rule_group.rb +159 -0
  45. data/lib/api/sheaf.rb +70 -0
  46. data/lib/api/sheaf_chain.rb +86 -0
  47. data/lib/api/sheaf_chain_iterator.rb +108 -0
  48. data/lib/api/sub_rule.rb +40 -0
  49. data/lib/api/transcription_pre_post_processor.rb +118 -0
  50. data/lib/api/transcription_processor.rb +137 -0
  51. data/lib/api/transcription_tree_node.rb +91 -0
  52. data/lib/glaemscribe.rb +70 -0
  53. metadata +112 -0
@@ -0,0 +1,342 @@
1
+ \**
2
+
3
+ Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ invented languages to some of his devised writing systems.
7
+
8
+ Copyright (C) 2015 Benjamin Babut (Talagan).
9
+
10
+ This program is free software: you can redistribute it and/or modify
11
+ it under the terms of the GNU Affero General Public License as published by
12
+ the Free Software Foundation, either version 3 of the License, or
13
+ any later version.
14
+
15
+ This program is distributed in the hope that it will be useful,
16
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ GNU Affero General Public License for more details.
19
+
20
+ You should have received a copy of the GNU Affero General Public License
21
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ **\
24
+
25
+ \language "Old English"
26
+ \writing "Tengwar"
27
+ \mode "West Saxon"
28
+ \version "0.0.1"
29
+ \authors "Talagan (Benjamin Babut)"
30
+
31
+ \charset tengwar_ds true
32
+
33
+ \beg preprocessor
34
+ \** Work exclusively downcase **\
35
+ \downcase
36
+
37
+ \** Simplify trema vowels **\
38
+ \substitute ä a
39
+ \substitute ë e
40
+ \substitute ï i
41
+ \substitute ö o
42
+ \substitute ü u
43
+ \substitute ÿ y
44
+
45
+ \substitute "ae" "æ"
46
+ \substitute "ea" "æa"
47
+ \substitute "éa" "ǽa"
48
+ \substitute "7" "⁊"
49
+
50
+ \** Dis-ambiguate long vowels **\
51
+ \rxsubstitute "(ā|â|aa)" "á"
52
+ \rxsubstitute "(ē|ê|ee)" "é"
53
+ \rxsubstitute "(ī|î|ii)" "í"
54
+ \rxsubstitute "(ō|ô|oo)" "ó"
55
+ \rxsubstitute "(ū|û|uu)" "ú"
56
+ \rxsubstitute "(ȳ|ŷ|yy)" "ý"
57
+
58
+ \up_down_tehta_split "æ,ǽ,a,ä,á,e,ë,é,i,ï,í,o,ö,ó,u,ü,ú,y,ÿ,ý,ø,ǿ,œ,œ́" "t,p,ċ,c,d,b,ġ,g,þ,f,ç,χ,ħ,ð,v,j,ȝ,n,m,r,ĭ,w,l,ld,s,z,h,x,sċ,hw,çt,χt,ħt"
59
+ \end
60
+
61
+ \beg processor
62
+
63
+ \beg rules litteral
64
+ {A} === a
65
+ {AA} === á
66
+ {E} === e
67
+ {EE} === é
68
+ {I} === i
69
+ {II} === í
70
+ {O} === o
71
+ {OO} === ó
72
+ {U} === u
73
+ {UU} === ú
74
+ {Y} === y
75
+ {YY} === ý
76
+
77
+ {AE} === (æ,ae)
78
+ {AEAE} === (ǽ,ǣ)
79
+
80
+ {OE} === (ø,œ)
81
+ {OEOE} === (ǿ,œ́)
82
+
83
+ \** Diphtongs are always splitted, so consider we don't have any. **\
84
+ \** @ is the phantom vowel **\
85
+
86
+ {S_VOWELS_NP} === {A} * {AE} * {OE} * {E} * {I} * {O} * {U} * {Y} * {A}_
87
+ {S_VOWELS} === {S_VOWELS_NP} * @
88
+
89
+ \** UP TEHTAS **\
90
+ {UTEHTA_NP_XS} === E_TEHTA_XS * A_TEHTA_XS * E_TEHTA_DOUBLE_XS * Y_TEHTA_XS * I_TEHTA_XS * O_TEHTA_XS * U_TEHTA_XS * THSUP_SEV_XS * VILYA
91
+ {UTEHTA_NP__S} === E_TEHTA_S * A_TEHTA_S * E_TEHTA_DOUBLE_S * Y_TEHTA_S * I_TEHTA_S * O_TEHTA_S * U_TEHTA_S * THSUP_SEV_S * VILYA
92
+ {UTEHTA_NP__L} === E_TEHTA_L * A_TEHTA_L * E_TEHTA_DOUBLE_L * Y_TEHTA_L * I_TEHTA_L * O_TEHTA_L * U_TEHTA_L * THSUP_SEV_L * VILYA
93
+ {UTEHTA_NP_XL} === E_TEHTA_XL * A_TEHTA_XL * E_TEHTA_DOUBLE_XL * Y_TEHTA_XL * I_TEHTA_XL * O_TEHTA_XL * U_TEHTA_XL * THSUP_SEV_XL * VILYA
94
+ {UTEHTA_XS} === {UTEHTA_NP_XS} * {NULL}
95
+ {UTEHTA__S} === {UTEHTA_NP__S} * {NULL}
96
+ {UTEHTA__L} === {UTEHTA_NP__L} * {NULL}
97
+ {UTEHTA_XL} === {UTEHTA_NP_XL} * {NULL}
98
+
99
+ \** FOR LONG VOWELS **\
100
+ {L_VOWELS} === {AA} * {AEAE} * {OEOE} * {EE} * {II} * {OO} * {UU} * {YY}
101
+ {L_PTEHTAS} === ARA E_TEHTA_XS * ARA A_TEHTA_XS * ARA E_TEHTA_DOUBLE_XS * ARA Y_TEHTA_XS * ARA I_TEHTA_XS * ARA O_TEHTA_XS * ARA U_TEHTA_XS * ARA THSUP_SEV_XS
102
+
103
+ \** DOWN TEHTAS **\
104
+ {DTEHTA_XS} === THINF_ACCENT_XS * THINF_TDOT_XS * TH_SUB_CIRC_XS * THINF_DDOT_XS * THINF_DOT_XS * TELCO O_TEHTA_XS * TELCO U_TEHTA_XS * THINF_STROKE_XS * VILYA * {NULL}
105
+ {DTEHTA__S} === THINF_ACCENT_S * THINF_TDOT_S * TH_SUB_CIRC_S * THINF_DDOT_S * THINF_DOT_S * TELCO O_TEHTA_XS * TELCO U_TEHTA_XS * THINF_STROKE_S * VILYA * {NULL}
106
+ {DTEHTA__L} === THINF_ACCENT_L * THINF_TDOT_L * TH_SUB_CIRC_L * THINF_DDOT_L * THINF_DOT_L * TELCO O_TEHTA_XS * TELCO U_TEHTA_XS * THINF_STROKE_L * VILYA * {NULL}
107
+ {DTEHTA_XL} === THINF_ACCENT_XL * THINF_TDOT_XL * TH_SUB_CIRC_XL * THINF_DDOT_XL * THINF_DOT_XL * TELCO O_TEHTA_XS * TELCO U_TEHTA_XS * THINF_STROKE_XL * VILYA * {NULL}
108
+
109
+ {S_VOWELS_NP_KER} === [ {S_VOWELS_NP} ]
110
+ {S_VOWELS_NP_KER_WN} === [ {S_VOWELS_NP} * {NULL} ]
111
+ {S_VOWELS_KER} === [ {S_VOWELS} ]
112
+ {S_VOWELS_KER_WN} === [ {S_VOWELS} * {NULL} ]
113
+
114
+ \** Img bundles for all vowels **\
115
+ {SU_VOWELS_IMG_XS} === [ {UTEHTA_XS} ]
116
+ {SU_VOWELS_IMG__S} === [ {UTEHTA__S} ]
117
+ {SU_VOWELS_IMG__L} === [ {UTEHTA__L} ]
118
+ {SU_VOWELS_IMG_XL} === [ {UTEHTA_XL} ]
119
+ {SD_VOWELS_IMG_XS} === [ {DTEHTA_XS} ]
120
+ {SD_VOWELS_IMG__S} === [ {DTEHTA__S} ]
121
+ {SD_VOWELS_IMG__L} === [ {DTEHTA__L} ]
122
+ {SD_VOWELS_IMG_XL} === [ {DTEHTA_XL} ]
123
+
124
+ {SU_VOWELS_IMG_XS_WN} === [ {UTEHTA_XS} * {NULL} ]
125
+ {SU_VOWELS_IMG__S_WN} === [ {UTEHTA__S} * {NULL} ]
126
+ {SU_VOWELS_IMG__L_WN} === [ {UTEHTA__L} * {NULL} ]
127
+ {SU_VOWELS_IMG_XL_WN} === [ {UTEHTA_XL} * {NULL} ]
128
+ {SD_VOWELS_IMG_XS_WN} === [ {DTEHTA_XS} * {NULL} ]
129
+ {SD_VOWELS_IMG__S_WN} === [ {DTEHTA__S} * {NULL} ]
130
+ {SD_VOWELS_IMG__L_WN} === [ {DTEHTA__L} * {NULL} ]
131
+ {SD_VOWELS_IMG_XL_WN} === [ {DTEHTA_XL} * {NULL} ]
132
+
133
+ \** Rule for long vowels **\
134
+ [ {L_VOWELS} ] --> [ {L_PTEHTAS} ]
135
+ {UU} --> VALA U_TEHTA_L \** # Special treatment **\
136
+
137
+ \** Fallback rule for short vowels **\
138
+ @ --> {NULL} \** # If found alone, put nothing **\
139
+ [ {S_VOWELS_NP} ] --> TELCO [ {UTEHTA_NP_XS} ]
140
+ {A}_ --> VILYA \** # We don't want a short carrier for a_ **\
141
+ (w,u)_ --> VALA \** # Only resolved after vowels **\
142
+
143
+ \** ############# **\
144
+ \** CONSONANTS # **\
145
+ \** ############# **\
146
+
147
+ \** ## 1st Line (Voiceless occlusives) **\
148
+ \** ## Short upper dash for nasalisation **\
149
+ {L1_KER_1} === t * p
150
+ {L1_IMG_1} === TINCO * PARMA
151
+ {L1_KER_2} === ċ * c * k
152
+ {L1_IMG_2} === CALMA * QUESSE * QUESSE
153
+
154
+ {L1_KER_1_GEMS} === tt * pp
155
+ {L1_IMG_1_GEMS} === TINCO THINF_DSTROKE_XS * PARMA THINF_DSTROKE_XS
156
+ {L1_KER_2_GEMS} === ċċ * cc * kk
157
+ {L1_IMG_2_GEMS} === CALMA THINF_DSTROKE_XL * QUESSE THINF_DSTROKE_XL * QUESSE THINF_DSTROKE_XL
158
+
159
+
160
+ {S_VOWELS_KER_WN}[{L1_KER_1}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L1_IMG_1}]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG__S_WN}
161
+ {S_VOWELS_KER_WN}[{L1_KER_1_GEMS}] --> 2,1 --> [{L1_IMG_1_GEMS}]{SU_VOWELS_IMG__L_WN}
162
+ {S_VOWELS_KER_WN}[{L1_KER_2}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L1_IMG_2}]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG_XL_WN}
163
+ {S_VOWELS_KER_WN}[{L1_KER_2_GEMS}] --> 2,1 --> [{L1_IMG_2_GEMS}]{SU_VOWELS_IMG__L_WN}
164
+
165
+ {S_VOWELS_KER_WN}[ nt * mp ]{S_VOWELS_KER_WN} --> 2,1,3 --> [ TINCO TILD_SUP_S * PARMA TILD_SUP_S ]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG__S_WN}
166
+ {S_VOWELS_KER_WN}[ nċ * nc ]{S_VOWELS_KER_WN} --> 2,1,3 --> [ CALMA TILD_SUP_S * QUESSE TILD_SUP_S ]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG_XL_WN}
167
+
168
+
169
+ \** ## 2nd Line (Voiced occlusives) **\
170
+ \** ## Long upper dash for nasalisation **\
171
+ {L2_KER} === d * b * ġ * g
172
+ {L2_IMG} === ANDO * UMBAR * ANGA * UNGWE
173
+
174
+ {L2_KER_GEMS} === dd * bb * (ċġ,ġġ) * (cg,gg)
175
+ {L2_IMG_GEMS} === ANDO THINF_DSTROKE_L * UMBAR THINF_DSTROKE_L * ANGA THINF_DSTROKE_L * UNGWE THINF_DSTROKE_L
176
+
177
+
178
+ {S_VOWELS_KER_WN}[{L2_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L2_IMG}]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
179
+ {S_VOWELS_KER_WN}[{L2_KER_GEMS}] --> 2,1 --> [{L2_IMG_GEMS}]{SU_VOWELS_IMG_XL_WN}
180
+
181
+ {S_VOWELS_KER_WN}[ nd * mb * nġ * ng ]{S_VOWELS_KER_WN} --> 2,1,3 --> [ ANDO TILD_SUP_L * UMBAR TILD_SUP_L * ANGA TILD_SUP_L * UNGWE TILD_SUP_L ]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
182
+
183
+
184
+ \** ## 3rd Line (Voiceless fricatives) **\
185
+ \** ## Short upper dash for nasalisation **\
186
+ {L3_KER_1} === þ * f
187
+ {L3_IMG_1} === SULE * FORMEN
188
+ {L3_KER_2} === ç * (χ,ħ)
189
+ {L3_IMG_2} === AHA * HWESTA
190
+
191
+ {L3_KER_1_GEMS} === þþ * ff
192
+ {L3_IMG_1_GEMS} === SULE THINF_DSTROKE_XS * FORMEN THINF_DSTROKE_XS
193
+ {L3_KER_2_GEMS} === çç * (χχ,ħħ)
194
+ {L3_IMG_2_GEMS} === AHA THINF_DSTROKE_XS * HWESTA THINF_DSTROKE_XS
195
+
196
+ {S_VOWELS_KER_WN}[{L3_KER_1}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L3_IMG_1}]{SU_VOWELS_IMG__S_WN}{SD_VOWELS_IMG__L_WN}
197
+ {S_VOWELS_KER_WN}[{L3_KER_1_GEMS}] --> 2,1 --> [{L3_IMG_1_GEMS}]{SU_VOWELS_IMG__S_WN}
198
+ {S_VOWELS_KER_WN}[{L3_KER_2}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L3_IMG_2}]{SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG_XL_WN}
199
+ {S_VOWELS_KER_WN}[{L3_KER_2_GEMS}] --> 2,1 --> [{L3_IMG_2_GEMS}]{SU_VOWELS_IMG__L_WN}
200
+
201
+
202
+ \** ## 4th Line (Voiced fricatives) **\
203
+ \** ## Long upper dash for nasalisation **\
204
+
205
+ {L4_KER} === ð * v * j * ȝ
206
+ {L4_IMG} === ANTO * AMPA * ANCA * UNQUE
207
+
208
+ {L4_KER_GEMS} === ðð * vv * jj * ȝȝ
209
+ {L4_IMG_GEMS} === ANTO THINF_DSTROKE_L * AMPA THINF_DSTROKE_L * ANCA THINF_DSTROKE_L * UNQUE THINF_DSTROKE_L
210
+
211
+
212
+ {S_VOWELS_KER_WN}[{L4_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L4_IMG}]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
213
+ {S_VOWELS_KER_WN}[{L4_KER_GEMS}] --> 2,1 --> [{L4_IMG_GEMS}]{SU_VOWELS_IMG_XL_WN}
214
+
215
+
216
+ \** ## 5th Line (Nasals) **\
217
+ \** ## Long upper dash for nasalisation (wins on gemination) **\
218
+
219
+ {L5_KER} === n * m
220
+ {L5_IMG} === NUMEN * MALTA
221
+
222
+ {S_VOWELS_KER_WN}[{L5_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L5_IMG}]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
223
+ {S_VOWELS_KER_WN}[ nn * mm ]{S_VOWELS_KER_WN} --> 2,1,3 --> [ NUMEN TILD_SUP_L * MALTA TILD_SUP_L ]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
224
+
225
+
226
+ \** ## 6th Line (Approximants == fr : Spirantes) **\
227
+ \** ## Short upper dash for nasalisation **\
228
+
229
+ {L6_KER} === r * ĭ
230
+ {L6_IMG} === ORE * ANNA
231
+ {L6_KER_GEMS} === rr * ĭĭ
232
+ {L6_IMG_GEMS} === ORE THINF_DSTROKE_XS * ANNA THINF_DSTROKE_XS
233
+
234
+ {S_VOWELS_KER_WN}[{L6_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L6_IMG}]{SU_VOWELS_IMG__S_WN}{SD_VOWELS_IMG__L_WN}
235
+ {S_VOWELS_KER_WN}[{L6_KER_GEMS}] --> 2,1 --> [{L6_IMG_GEMS}]{SU_VOWELS_IMG__S_WN}
236
+
237
+
238
+ \** ## Liquids **\
239
+ \** ## **\
240
+
241
+ {S_VOWELS_KER_WN}w{S_VOWELS_KER_WN} --> 2,1,3 --> ROMEN {SU_VOWELS_IMG__L_WN}{SD_VOWELS_IMG_XS_WN}
242
+ {S_VOWELS_KER_WN}ww --> 2,1 --> ROMEN THINF_DSTROKE_XS {SU_VOWELS_IMG__L_WN}
243
+ {S_VOWELS_KER_WN}[l * ll] --> 2,1 --> [LAMBE * LAMBE THINF_DSTROKE_FOR_LAMBE]{SU_VOWELS_IMG__L_WN}
244
+ {S_VOWELS_KER_WN}ld --> 2,1 --> ALDA {SU_VOWELS_IMG__L_WN}
245
+
246
+
247
+ \** ## Alveolar (sifflantes) **\
248
+ \** ## **\
249
+ {L8_KER} === s * z
250
+ {L8_IMG} === SILME_NUQUERNA * ESSE_NUQUERNA
251
+ {L8_KER_GEMS} === ss * zz
252
+ {L8_IMG_GEMS} === SILME_NUQUERNA THINF_DSTROKE_XS * ESSE_NUQUERNA THINF_DSTROKE_L
253
+
254
+ {S_VOWELS_KER_WN}[{L8_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{L8_IMG}]{SU_VOWELS_IMG__S_WN}{SD_VOWELS_IMG__S_WN}
255
+ {S_VOWELS_KER_WN}[{L8_KER_GEMS}] --> 2,1 --> [{L8_IMG_GEMS}]{SU_VOWELS_IMG__S_WN}
256
+
257
+
258
+ \** ## FINAL S (Challenging!) **\
259
+ {S_VOWELS_KER_WN}s_ --> SHOOK_BEAUTIFUL {SU_VOWELS_IMG_XL_WN} \** # Final rule for s **\
260
+ _{S_VOWELS_KER_WN}s_ --> SILME_NUQUERNA {SU_VOWELS_IMG__S_WN} \** # Rule _es_ **\
261
+ [{L_VOWELS} * _ ]{S_VOWELS_KER_WN}s_ --> 2,1,3 --> [{L_PTEHTAS} * {NULL} ] SILME_NUQUERNA {SU_VOWELS_IMG__S_WN} \** # Rule for éis_, és_ **\
262
+ {NULL}[ {S_VOWELS_NP} ]{S_VOWELS_KER}s_ --> 1,3,2,4 --> TELCO [ {UTEHTA_NP_XS} ] SILME_NUQUERNA {SU_VOWELS_IMG__S} \** # Rule for ies_ **\
263
+
264
+ s --> SILME \** # Overload lonely s **\
265
+ z --> ESSE \** # Overload lonely z **\
266
+
267
+ \** ## Ligatures **\
268
+ \** ## **\
269
+ {LINE_VARIOUS_1_KER} === sċ
270
+ {LINE_VARIOUS_1_IMG} === ANCA_CLOSED
271
+ {LINE_VARIOUS_2_KER} === hw * çt * (χt,ħt)
272
+ {LINE_VARIOUS_2_IMG} === HARP_SHAPED * AHA_TINCO * HWESTA_TINCO
273
+
274
+ {S_VOWELS_KER_WN}[{LINE_VARIOUS_1_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{LINE_VARIOUS_1_IMG}]{SU_VOWELS_IMG_XL_WN}{SD_VOWELS_IMG_XL_WN}
275
+ {S_VOWELS_KER_WN}[{LINE_VARIOUS_2_KER}]{S_VOWELS_KER_WN} --> 2,1,3 --> [{LINE_VARIOUS_2_IMG}]{SU_VOWELS_IMG__S_WN}{SD_VOWELS_IMG__S_WN}
276
+
277
+
278
+ \** ## Various **\
279
+ \** ## **\
280
+ {S_VOWELS_KER_WN}h{S_VOWELS_KER_WN} --> 2,1,3 --> HYARMEN {SU_VOWELS_IMG_XS_WN}{SD_VOWELS_IMG__L_WN}
281
+ {S_VOWELS_KER_WN}hh --> 2,1 --> HYARMEN THINF_DSTROKE_XL {SU_VOWELS_IMG_XS_WN}
282
+
283
+
284
+ \** ## X **\
285
+ \** ## For x, due to the cedilla, we cannot put tehtas under the tengwa. **\
286
+ {S_VOWELS_KER_WN}x --> 2,1 --> QUESSE SHOOK_LEFT_L {SU_VOWELS_IMG__S_WN}
287
+ {S_VOWELS_KER_WN}xx --> 2,1 --> QUESSE SHOOK_LEFT_L THINF_DSTROKE_XL {SU_VOWELS_IMG__S_WN}
288
+ {S_VOWELS_KER_WN}nx --> 2,1 --> QUESSE SHOOK_LEFT_L TILD_SUP_S {SU_VOWELS_IMG__S_WN}
289
+ \end
290
+
291
+ \beg rules punctuation
292
+ ⁊ --> OLD_ENGLISH_AND
293
+
294
+ . --> PUNCT_DDOT
295
+ .. --> PUNCT_DOT PUNCT_DDOT PUNCT_DOT
296
+ … --> PUNCT_TILD
297
+ ... --> PUNCT_TILD
298
+ .... --> PUNCT_TILD
299
+ ..... --> PUNCT_TILD
300
+ ...... --> PUNCT_TILD
301
+ ....... --> PUNCT_TILD
302
+
303
+ , --> PUNCT_DOT
304
+ : --> PUNCT_DOT
305
+ ; --> PUNCT_DOT
306
+ ! --> PUNCT_EXCLAM
307
+ ? --> PUNCT_INTERR
308
+ · --> PUNCT_DOT
309
+
310
+ \** Apostrophe **\
311
+
312
+ ' --> {NULL}
313
+ ’ --> {NULL}
314
+
315
+ \** Quotes **\
316
+
317
+ “ --> DQUOT_OPEN
318
+ ” --> DQUOT_CLOSE
319
+ « --> DQUOT_OPEN
320
+ » --> DQUOT_CLOSE
321
+
322
+ - --> {NULL}
323
+ – --> PUNCT_TILD
324
+ — --> PUNCT_TILD
325
+
326
+ [ --> PUNCT_PAREN_L
327
+ ] --> PUNCT_PAREN_R
328
+ ( --> PUNCT_PAREN_L_ALT \** TODO : Remove alt ? **\
329
+ ) --> PUNCT_PAREN_R_ALT \** TODO : Remove alt ? **\
330
+ { --> PUNCT_PAREN_L
331
+ } --> PUNCT_PAREN_R
332
+ < --> PUNCT_PAREN_L
333
+ > --> PUNCT_PAREN_R
334
+
335
+ \** Not universal between fonts ... **\
336
+ $ --> BOOKMARK_SIGN
337
+ ≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
338
+ ≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
339
+
340
+ \end
341
+
342
+ \end
@@ -0,0 +1,84 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+ class Charset
26
+ attr_reader :name
27
+
28
+ attr_accessor :errors
29
+ attr_reader :chars
30
+
31
+ class Char
32
+ attr_accessor :line
33
+ attr_accessor :code
34
+ attr_accessor :names
35
+ attr_accessor :str
36
+
37
+ def initialize
38
+ @names = {}
39
+ end
40
+ end
41
+
42
+ def initialize(name)
43
+ @name = name
44
+ @chars = []
45
+ @errors = []
46
+ end
47
+
48
+ # Pass integer (utf8 num) and array (of strings)
49
+ def add_char(line, code, names)
50
+ return if names.empty? || names.include?("?") # Ignore characters with '?'
51
+
52
+ c = Char.new
53
+ c.line = line
54
+ c.code = code
55
+ c.names = names
56
+ c.str = code.chr('UTF-8')
57
+ @chars << c
58
+ end
59
+
60
+ def finalize
61
+ @errors = []
62
+ @lookup_table = {}
63
+
64
+ @chars.each { |c|
65
+ c.names.each { |cname|
66
+ found = @lookup_table[cname]
67
+ if found
68
+ @errors << Glaeml::Error.new(c.line, "Character #{cname} found twice.")
69
+ else
70
+ @lookup_table[cname] = c
71
+ end
72
+ }
73
+ }
74
+
75
+ API::Debug::log("Finalized charset '#{@name}', #{@lookup_table.count} symbols loaded.")
76
+ end
77
+
78
+ def [](symbol)
79
+ @lookup_table[symbol]
80
+ end
81
+
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,55 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ # the transcription of texts between writing systems, and more
5
+ # specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ # invented languages to some of his devised writing systems.
7
+ #
8
+ # Copyright (C) 2015 Benjamin Babut (Talagan).
9
+ #
10
+ # This program is free software: you can redistribute it and/or modify
11
+ # it under the terms of the GNU Affero General Public License as published by
12
+ # the Free Software Foundation, either version 3 of the License, or
13
+ # any later version.
14
+ #
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU Affero General Public License for more details.
19
+ #
20
+ # You should have received a copy of the GNU Affero General Public License
21
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ module Glaemscribe
24
+ module API
25
+
26
+ class CharsetParser
27
+
28
+ def initialize()
29
+ @charset = nil
30
+ end
31
+
32
+ def parse(file_path)
33
+ @charset = Charset.new(ResourceManager::charset_name_from_file_path(file_path))
34
+
35
+ raw = File.open(file_path,"rb:utf-8").read
36
+ doc = Glaeml::Parser.new.parse(raw)
37
+
38
+ if(doc.errors.any?)
39
+ @charset.errors = doc.errors
40
+ return @charset
41
+ end
42
+
43
+ doc.root_node.gpath("char").each { |char_element|
44
+ code = char_element.args[0].hex
45
+ names = char_element.args[1..-1].map{|cname| cname.strip }.reject{ |cname| cname.empty? }
46
+ @charset.add_char(char_element.line,code,names)
47
+ }
48
+ @charset.finalize
49
+
50
+ @charset
51
+ end
52
+
53
+ end
54
+ end
55
+ end