glaemscribe 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +19 -0
  3. data/bin/glaemscribe +307 -0
  4. data/glaemresources/charsets/cirth_ds.cst +205 -0
  5. data/glaemresources/charsets/sarati_eldamar.cst +256 -0
  6. data/glaemresources/charsets/tengwar_ds.cst +318 -0
  7. data/glaemresources/charsets/unicode_gothic.cst +64 -0
  8. data/glaemresources/charsets/unicode_runes.cst +120 -0
  9. data/glaemresources/modes/adunaic.glaem +251 -0
  10. data/glaemresources/modes/blackspeech-annatar.glaem +318 -0
  11. data/glaemresources/modes/blackspeech.glaem +260 -0
  12. data/glaemresources/modes/gothic.glaem +78 -0
  13. data/glaemresources/modes/khuzdul.glaem +141 -0
  14. data/glaemresources/modes/mercian.glaem +419 -0
  15. data/glaemresources/modes/oldnorse-medieval.glaem +127 -0
  16. data/glaemresources/modes/quenya-sarati.glaem +320 -0
  17. data/glaemresources/modes/quenya.glaem +307 -0
  18. data/glaemresources/modes/sindarin-beleriand.glaem +285 -0
  19. data/glaemresources/modes/sindarin-classical.glaem +276 -0
  20. data/glaemresources/modes/sindarin-daeron.glaem +182 -0
  21. data/glaemresources/modes/telerin.glaem +302 -0
  22. data/glaemresources/modes/valarin-sarati.glaem +210 -0
  23. data/glaemresources/modes/westron.glaem +340 -0
  24. data/glaemresources/modes/westsaxon.glaem +342 -0
  25. data/lib/api/charset.rb +84 -0
  26. data/lib/api/charset_parser.rb +55 -0
  27. data/lib/api/constants.rb +29 -0
  28. data/lib/api/debug.rb +36 -0
  29. data/lib/api/eval.rb +268 -0
  30. data/lib/api/fragment.rb +113 -0
  31. data/lib/api/glaeml.rb +200 -0
  32. data/lib/api/if_tree.rb +96 -0
  33. data/lib/api/mode.rb +112 -0
  34. data/lib/api/mode_parser.rb +314 -0
  35. data/lib/api/option.rb +64 -0
  36. data/lib/api/post_processor/reverse.rb +36 -0
  37. data/lib/api/pre_processor/downcase.rb +35 -0
  38. data/lib/api/pre_processor/elvish_numbers.rb +47 -0
  39. data/lib/api/pre_processor/rxsubstitute.rb +40 -0
  40. data/lib/api/pre_processor/substitute.rb +38 -0
  41. data/lib/api/pre_processor/up_down_tehta_split.rb +138 -0
  42. data/lib/api/resource_manager.rb +130 -0
  43. data/lib/api/rule.rb +99 -0
  44. data/lib/api/rule_group.rb +159 -0
  45. data/lib/api/sheaf.rb +70 -0
  46. data/lib/api/sheaf_chain.rb +86 -0
  47. data/lib/api/sheaf_chain_iterator.rb +108 -0
  48. data/lib/api/sub_rule.rb +40 -0
  49. data/lib/api/transcription_pre_post_processor.rb +118 -0
  50. data/lib/api/transcription_processor.rb +137 -0
  51. data/lib/api/transcription_tree_node.rb +91 -0
  52. data/lib/glaemscribe.rb +70 -0
  53. metadata +112 -0
@@ -0,0 +1,260 @@
1
+ \**
2
+
3
+ Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ invented languages to some of his devised writing systems.
7
+
8
+ Copyright (C) 2015 Benjamin Babut (Talagan).
9
+
10
+ This program is free software: you can redistribute it and/or modify
11
+ it under the terms of the GNU Affero General Public License as published by
12
+ the Free Software Foundation, either version 3 of the License, or
13
+ any later version.
14
+
15
+ This program is distributed in the hope that it will be useful,
16
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ GNU Affero General Public License for more details.
19
+
20
+ You should have received a copy of the GNU Affero General Public License
21
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ **\
24
+
25
+ \** BlackSpeech ring mode for glaemscribe (MAY BE INCOMPLETE) **\
26
+
27
+ \language "Black Speech"
28
+ \writing "Tengwar"
29
+ \mode "General Use"
30
+ \version "0.0.1"
31
+ \authors "Talagan (Benjamin Babut)"
32
+
33
+ \charset tengwar_ds true
34
+
35
+ \beg options
36
+ \option reverse_numbers true
37
+ \beg option numbers_base BASE_12
38
+ \value BASE_10 10
39
+ \value BASE_12 12
40
+ \end
41
+ \end
42
+
43
+ \beg preprocessor
44
+ \** Work exclusively downcase **\
45
+ \downcase
46
+
47
+ \** Simplify trema vowels **\
48
+ \substitute "ä" "a"
49
+ \substitute "ë" "e"
50
+ \substitute "ï" "i"
51
+ \substitute "ö" "o"
52
+ \substitute "ü" "u"
53
+ \substitute "ÿ" "y"
54
+
55
+ \** Dis-ambiguate long vowels **\
56
+ \rxsubstitute "(ā|â|aa)" "á"
57
+ \rxsubstitute "(ē|ê|ee)" "é"
58
+ \rxsubstitute "(ī|î|ii)" "í"
59
+ \rxsubstitute "(ō|ô|oo)" "ó"
60
+ \rxsubstitute "(ū|û|uu)" "ú"
61
+
62
+ \** Preprocess numbers **\
63
+ \elvish_numbers "\\eval numbers_base" "\\eval reverse_numbers"
64
+ \end
65
+
66
+ \beg processor
67
+
68
+ \beg rules litteral
69
+ {A} === a
70
+ {AA} === á
71
+ {E} === e
72
+ {EE} === é
73
+ {I} === i
74
+ {II} === í
75
+ {O} === o
76
+ {OO} === ó
77
+ {U} === u
78
+ {UU} === ú
79
+
80
+ {AI} === {A}{I} \** attested **\
81
+ {AU} === {A}{U} \** attested **\
82
+ {OI} === {O}{I} \** Not quite sure (dushgoi) **\
83
+
84
+ {K} === (c,k)
85
+
86
+ {VOWELS} === {A} * {E} * {I} * {O} * {U}
87
+ {LVOWELS} === {AA} * {EE} * {II} * {OO} * {UU}
88
+
89
+ \** Revers o and u **\
90
+ {TEHTA_XS} === A_TEHTA_XS * E_TEHTA_XS * I_TEHTA_XS * U_TEHTA_XS * O_TEHTA_XS
91
+ {TEHTA__S} === A_TEHTA_S * E_TEHTA_S * I_TEHTA_S * U_TEHTA_S * O_TEHTA_S
92
+ {TEHTA__L} === A_TEHTA_L * E_TEHTA_L * I_TEHTA_L * U_TEHTA_L * O_TEHTA_L
93
+ {TEHTA_XL} === A_TEHTA_XL * E_TEHTA_XL * I_TEHTA_XL * U_TEHTA_XL * O_TEHTA_XL
94
+
95
+ {DIPHTHONGS} === {AI} * {AU} * {OI}
96
+ {DIPHTHENGS} === YANTA A_TEHTA_L * URE A_TEHTA_L * YANTA U_TEHTA_L
97
+
98
+ {V_D_KER} === [ {VOWELS} * {DIPHTHONGS} ]
99
+ {V_D_IMG_XS} === [ {TEHTA_XS} * {DIPHTHENGS} ]
100
+ {V_D_IMG__S} === [ {TEHTA__L} * {DIPHTHENGS} ]
101
+ {V_D_IMG__L} === [ {TEHTA__S} * {DIPHTHENGS} ]
102
+ {V_D_IMG_XL} === [ {TEHTA_XL} * {DIPHTHENGS} ]
103
+
104
+ [{VOWELS}] --> TELCO [{TEHTA_XS}] \** Replace isolated short vowels **\
105
+ [{LVOWELS}] --> ARA [{TEHTA_XS}] \** Replace long vowels **\
106
+ [{DIPHTHONGS}] --> [{DIPHTHENGS}] \** Replace diphthongs **\
107
+
108
+ b --> UMBAR
109
+ [{VOWELS}] b --> UMBAR [{TEHTA_XL}]
110
+
111
+ d --> ANDO
112
+ [{VOWELS}] d --> ANDO [{TEHTA_XL}]
113
+
114
+ f --> FORMEN
115
+ [{VOWELS}] f --> FORMEN_EXT [{TEHTA__S}] \** Beware. **\
116
+
117
+ g --> UNGWE
118
+ [{VOWELS}] g --> UNGWE [{TEHTA_XL}]
119
+
120
+ gh --> UNGWE_EXT
121
+ [{VOWELS}] gh --> UNGWE_EXT [{TEHTA_XL}]
122
+
123
+ h --> HYARMEN
124
+ [{VOWELS}] h --> HYARMEN [{TEHTA_XS}]
125
+
126
+ \** ======================== **\
127
+
128
+ {K} --> QUESSE
129
+ [{VOWELS}]{K} --> QUESSE [{TEHTA__L}]
130
+
131
+ {K}h --> HWESTA
132
+ [{VOWELS}]{K}h --> HWESTA_EXT [{TEHTA__L}] \** Take care. **\
133
+
134
+ \** ======================== **\
135
+
136
+ l --> LAMBE
137
+ [{VOWELS}] l --> LAMBE [{TEHTA__L}]
138
+
139
+ \** ======================== **\
140
+
141
+ m --> MALTA
142
+ [{VOWELS}] m --> MALTA [{TEHTA_XL}]
143
+
144
+ mb --> UMBAR TILD_SUP_L
145
+ [{VOWELS}] mb --> UMBAR TILD_SUP_L [{TEHTA_XL}]
146
+
147
+ mp --> PARMA TILD_SUP_S
148
+ [{VOWELS}] mp --> PARMA TILD_SUP_S [{TEHTA__L}]
149
+
150
+ \** ======================== **\
151
+
152
+ n --> NUMEN
153
+ [{VOWELS}]n --> NUMEN [{TEHTA_XL}]
154
+
155
+ n{K} --> QUESSE TILD_SUP_S
156
+ [{VOWELS}]n{K} --> QUESSE TILD_SUP_S [{TEHTA__S}]
157
+
158
+ \** ======================== **\
159
+
160
+ p --> PARMA
161
+
162
+ r --> ROMEN
163
+ r_ --> ORE
164
+ [{VOWELS}]r --> ORE [{TEHTA__S}]
165
+
166
+ [{VOWELS}]rb --> ORE [{TEHTA__L}] UMBAR
167
+ [{LVOWELS}]rb --> ARA [{TEHTA_XS}] ORE UMBAR
168
+
169
+ [{VOWELS}]rz --> ORE [{TEHTA__L}] ESSE
170
+ [{LVOWELS}]rz --> ARA [{TEHTA_XS}] ORE ESSE
171
+
172
+ \** ======================== **\
173
+
174
+ s --> SILME
175
+
176
+ \** ======================== **\
177
+
178
+ sh --> AHA
179
+ [{VOWELS}] sh --> AHA_EXT [{TEHTA__L}] \** BEWARE. **\
180
+
181
+ t --> TINCO
182
+ [{VOWELS}]t --> TINCO [{TEHTA__L}]
183
+
184
+ th --> SULE
185
+
186
+ y --> ANNA
187
+
188
+ z --> ESSE
189
+ [{VOWELS}] z --> ESSE_NUQUERNA [{TEHTA__L}]
190
+
191
+ \end
192
+
193
+ \beg rules punctuation
194
+ . --> PUNCT_DDOT
195
+ .. --> PUNCT_DOT PUNCT_DDOT PUNCT_DOT
196
+ … --> PUNCT_TILD
197
+ ... --> PUNCT_TILD
198
+ .... --> PUNCT_TILD
199
+ ..... --> PUNCT_TILD
200
+ ...... --> PUNCT_TILD
201
+ ....... --> PUNCT_TILD
202
+
203
+ , --> PUNCT_DOT
204
+ : --> PUNCT_DOT
205
+ ; --> PUNCT_DOT
206
+ ! --> PUNCT_EXCLAM
207
+ ? --> PUNCT_INTERR
208
+ · --> PUNCT_DOT
209
+
210
+ \** Apostrophe **\
211
+
212
+ ' --> {NULL}
213
+ ’ --> {NULL}
214
+
215
+ \** Quotes **\
216
+
217
+ “ --> DQUOT_OPEN
218
+ ” --> DQUOT_CLOSE
219
+ « --> DQUOT_OPEN
220
+ » --> DQUOT_CLOSE
221
+
222
+ - --> {NULL}
223
+ – --> PUNCT_TILD
224
+ — --> PUNCT_TILD
225
+
226
+ [ --> PUNCT_PAREN_L
227
+ ] --> PUNCT_PAREN_R
228
+ ( --> PUNCT_PAREN_L
229
+ ) --> PUNCT_PAREN_R
230
+ { --> PUNCT_PAREN_L
231
+ } --> PUNCT_PAREN_R
232
+ < --> PUNCT_PAREN_L
233
+ > --> PUNCT_PAREN_R
234
+
235
+ \** Not universal between fonts ... **\
236
+ $ --> BOOKMARK_SIGN
237
+ ≤ --> RING_MARK_L \** Ring inscription left beautiful stuff **\
238
+ ≥ --> RING_MARK_R \** Ring inscription right beautiful stuff **\
239
+
240
+ \end
241
+
242
+ \beg rules numbers
243
+ 0 --> NUM_0
244
+ 1 --> NUM_1
245
+ 2 --> NUM_2
246
+ 3 --> NUM_3
247
+ 4 --> NUM_4
248
+ 5 --> NUM_5
249
+ 6 --> NUM_6
250
+ 7 --> NUM_7
251
+ 8 --> NUM_8
252
+ 9 --> NUM_9
253
+ A --> NUM_10
254
+ B --> NUM_11
255
+ \end
256
+
257
+ \end
258
+
259
+
260
+
@@ -0,0 +1,78 @@
1
+ \**
2
+
3
+ Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ invented languages to some of his devised writing systems.
7
+
8
+ Copyright (C) 2015 Benjamin Babut (Talagan).
9
+
10
+ This program is free software: you can redistribute it and/or modify
11
+ it under the terms of the GNU Affero General Public License as published by
12
+ the Free Software Foundation, either version 3 of the License, or
13
+ any later version.
14
+
15
+ This program is distributed in the hope that it will be useful,
16
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ GNU Affero General Public License for more details.
19
+
20
+ You should have received a copy of the GNU Affero General Public License
21
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ **\
24
+
25
+ \language "Gothic"
26
+ \writing "Gothic Alphabet"
27
+ \mode "Standard"
28
+ \version "0.0.1"
29
+ \authors "Talagan (Benjamin Babut)"
30
+
31
+ \charset unicode_gothic true
32
+
33
+ \beg preprocessor
34
+ \downcase
35
+ \end
36
+
37
+ \beg processor
38
+
39
+ \beg rules litteral
40
+ (a,ā) --> AZA
41
+ b --> BERCNA
42
+ d --> DAAZ
43
+ (e,ē) --> EYZ
44
+ f --> FE
45
+ g --> GEUUA
46
+ h --> HAAL
47
+ i --> IIZ
48
+ j --> GAAR
49
+ k --> CHOZMA
50
+ l --> LAAZ
51
+ m --> MANNA
52
+ n --> NOICZ
53
+ (o,ō) --> UTAL
54
+ p --> PERTRA
55
+ q --> QUETRA
56
+ r --> REDA
57
+ s --> SUGIL
58
+ t --> TYZ
59
+ þ --> THYTH
60
+ u --> URAZ
61
+ x --> ENGUZ
62
+ (w,y) --> UUINNE
63
+ z --> EZEC
64
+ ƕ --> UUAER
65
+ ï --> IIZ_TREMA
66
+ _i --> IIZ_TREMA
67
+ \end
68
+
69
+ \beg rules punctuation
70
+ , --> COMA
71
+ . --> PERIOD
72
+ ; --> SEMICOLON
73
+ : --> COLON
74
+ \end
75
+
76
+ \end
77
+
78
+
@@ -0,0 +1,141 @@
1
+ \**
2
+
3
+ Glǽmscribe (also written Glaemscribe) is a software dedicated to
4
+ the transcription of texts between writing systems, and more
5
+ specifically dedicated to the transcription of J.R.R. Tolkien's
6
+ invented languages to some of his devised writing systems.
7
+
8
+ Copyright (C) 2015 Benjamin Babut (Talagan).
9
+
10
+ This program is free software: you can redistribute it and/or modify
11
+ it under the terms of the GNU Affero General Public License as published by
12
+ the Free Software Foundation, either version 3 of the License, or
13
+ any later version.
14
+
15
+ This program is distributed in the hope that it will be useful,
16
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ GNU Affero General Public License for more details.
19
+
20
+ You should have received a copy of the GNU Affero General Public License
21
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
22
+
23
+ **\
24
+
25
+ \** Khuzdul mode for glaemscribe **\
26
+
27
+ \language "Khuzdul"
28
+ \writing "Cirth"
29
+ \mode "Angerthas Moria"
30
+ \version "0.0.1"
31
+ \authors "Talagan (Benjamin Babut)"
32
+
33
+ \charset cirth_ds true
34
+
35
+ \beg preprocessor
36
+ \** Work exclusively downcase **\
37
+ \downcase
38
+
39
+ \** Simplify trema vowels **\
40
+ \substitute "ä" "a"
41
+ \substitute "ë" "e"
42
+ \substitute "ï" "i"
43
+ \substitute "ö" "o"
44
+ \substitute "ü" "u"
45
+ \substitute "ÿ" "y"
46
+
47
+ \** Dis-ambiguate long vowels **\
48
+ \rxsubstitute "(ā|â|aa)" "â"
49
+ \rxsubstitute "(ē|ê|ee)" "ê"
50
+ \rxsubstitute "(ī|î|ii)" "î"
51
+ \rxsubstitute "(ō|ô|oo)" "ô"
52
+ \rxsubstitute "(ū|û|uu)" "û"
53
+ \rxsubstitute "(ȳ|ŷ|yy)" "ŷ"
54
+ \end
55
+
56
+ \beg processor
57
+
58
+ \** We redefine the output space to have something beautiful, especially with erebor1 and erebor2 **\
59
+ \outspace CIRTH_SPACE_BIG
60
+
61
+ \beg rules litteral
62
+ a --> CIRTH_48
63
+ â --> CIRTH_49
64
+ e --> CIRTH_46
65
+ ê --> CIRTH_47
66
+
67
+ i --> CIRTH_39
68
+ î --> CIRTH_39 CIRTH_39
69
+
70
+ o --> CIRTH_50
71
+
72
+ u --> CIRTH_42
73
+ û --> CIRTH_43
74
+
75
+ b --> CIRTH_2
76
+ d --> CIRTH_9
77
+ f --> CIRTH_3
78
+ g --> CIRTH_19
79
+ h --> CIRTH_34
80
+ gh --> CIRTH_19 CIRTH_34
81
+ k --> CIRTH_18
82
+ l --> CIRTH_31
83
+ m --> CIRTH_6
84
+ n --> CIRTH_22
85
+ nd --> CIRTH_33
86
+ r --> CIRTH_12
87
+ s --> CIRTH_54
88
+ t --> CIRTH_8
89
+ sh --> CIRTH_15
90
+ th --> CIRTH_8 CIRTH_59
91
+ z --> CIRTH_17
92
+
93
+ k --> CIRTH_18
94
+ kh --> CIRTH_18 CIRTH_59
95
+ \end
96
+
97
+ \beg rules punctuation
98
+ . --> CIRTH_PUNCT_THREE_DOTS
99
+ .. --> CIRTH_PUNCT_THREE_DOTS
100
+ ... --> CIRTH_PUNCT_THREE_DOTS
101
+ … --> CIRTH_PUNCT_THREE_DOTS
102
+ .... --> CIRTH_PUNCT_THREE_DOTS
103
+ ..... --> CIRTH_PUNCT_THREE_DOTS
104
+ ...... --> CIRTH_PUNCT_THREE_DOTS
105
+ ....... --> CIRTH_PUNCT_THREE_DOTS
106
+
107
+ , --> CIRTH_PUNCT_MID_DOT
108
+ : --> CIRTH_PUNCT_TWO_DOTS
109
+ ; --> CIRTH_PUNCT_TWO_DOTS
110
+ ! --> CIRTH_PUNCT_THREE_DOTS
111
+ ? --> CIRTH_PUNCT_THREE_DOTS
112
+ · --> CIRTH_PUNCT_MID_DOT
113
+
114
+ - --> CIRTH_PUNCT_MID_DOT
115
+ – --> CIRTH_PUNCT_TWO_DOTS
116
+ — --> CIRTH_PUNCT_TWO_DOTS
117
+
118
+ \** Apostrophe **\
119
+
120
+ ' --> {NULL}
121
+ ’ --> {NULL}
122
+
123
+ \** Quotes **\
124
+
125
+ “ --> {NULL}
126
+ ” --> {NULL}
127
+ « --> {NULL}
128
+ » --> {NULL}
129
+
130
+ [ --> CIRTH_PUNCT_THREE_DOTS_L
131
+ ] --> CIRTH_PUNCT_THREE_DOTS_L
132
+ ( --> CIRTH_PUNCT_THREE_DOTS_L
133
+ ) --> CIRTH_PUNCT_THREE_DOTS_L
134
+ { --> CIRTH_PUNCT_THREE_DOTS_L
135
+ } --> CIRTH_PUNCT_THREE_DOTS_L
136
+ < --> CIRTH_PUNCT_THREE_DOTS_L
137
+ > --> CIRTH_PUNCT_THREE_DOTS_L
138
+
139
+ / --> CIRTH_PUNCT_FOUR_DOTS
140
+ \end
141
+ \end