treebank-transform 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,295 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
3
+ <xsl:import href="beta-uni-util.xsl"/>
4
+
5
+ <!--
6
+ Copyright 2008-2009 Cantus Foundation
7
+ http://alpheios.net
8
+
9
+ This file is part of Alpheios.
10
+
11
+ Alpheios is free software: you can redistribute it and/or modify
12
+ it under the terms of the GNU General Public License as published by
13
+ the Free Software Foundation, either version 3 of the License, or
14
+ (at your option) any later version.
15
+
16
+ Alpheios is distributed in the hope that it will be useful,
17
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ GNU General Public License for more details.
20
+
21
+ You should have received a copy of the GNU General Public License
22
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
23
+ -->
24
+
25
+ <!--
26
+ Test whether text is in betacode
27
+ Parameters:
28
+ $a_in string/node to be tested
29
+ Output:
30
+ 1 if encoded in betacode, else 0
31
+ (Note: Boolean return value does not seem to work
32
+ reliably, perhaps because of recursion.)
33
+ -->
34
+ <xsl:template name="is-beta">
35
+ <xsl:param name="a_in"/>
36
+
37
+ <xsl:choose>
38
+ <!-- if xml:lang says betacode, so be it -->
39
+ <xsl:when test="lang('grc-x-beta')">
40
+ <xsl:value-of select="1"/>
41
+ </xsl:when>
42
+
43
+ <!-- if no input, can't be betacode -->
44
+ <xsl:when test="string-length($a_in) = 0">
45
+ <xsl:value-of select="0"/>
46
+ </xsl:when>
47
+
48
+ <!-- otherwise, check the characters in input -->
49
+ <xsl:otherwise>
50
+ <xsl:variable name="head" select="substring($a_in, 1, 1)"/>
51
+
52
+ <xsl:choose>
53
+ <!-- if betacode base letter, assume it's betacode -->
54
+ <xsl:when
55
+ test="contains($s_betaUppers, $head) or
56
+ contains($s_betaLowers, $head)">
57
+ <xsl:value-of select="1"/>
58
+ </xsl:when>
59
+
60
+ <xsl:otherwise>
61
+ <!-- look up unicode in table -->
62
+ <xsl:variable name="beta">
63
+ <xsl:apply-templates select="$s_betaUniTable" mode="u2b">
64
+ <xsl:with-param name="a_key" select="$head"/>
65
+ </xsl:apply-templates>
66
+ </xsl:variable>
67
+
68
+ <xsl:choose>
69
+ <!-- if found in unicode table, it's not betacode -->
70
+ <xsl:when test="string-length($beta) > 0">
71
+ <xsl:value-of select="0"/>
72
+ </xsl:when>
73
+
74
+ <!-- otherwise, skip letter and check remainder of string -->
75
+ <xsl:otherwise>
76
+ <xsl:call-template name="is-beta">
77
+ <xsl:with-param name="a_in" select="substring($a_in, 2)"/>
78
+ </xsl:call-template>
79
+ </xsl:otherwise>
80
+ </xsl:choose>
81
+ </xsl:otherwise>
82
+ </xsl:choose>
83
+
84
+ </xsl:otherwise>
85
+ </xsl:choose>
86
+ </xsl:template>
87
+
88
+ <!--
89
+ Convert Greek betacode to Unicode
90
+ Parameters:
91
+ $a_in betacode input string to be converted
92
+ $a_pending character waiting to be output
93
+ $a_state diacritics associated with pending character
94
+ $a_precomposed whether to put out precomposed or decomposed Unicode
95
+ $a_partial whether this is a partial word
96
+ (If true, do not use final sigma for last letter)
97
+
98
+ Output:
99
+ $a_in transformed to equivalent Unicode
100
+
101
+ The characters in the state string are maintained in a canonical order,
102
+ which allows the lookup table to contain a single entry for each
103
+ combination of base character and diacritics. The diacritics may appear
104
+ in any order in the input.
105
+
106
+ Diacritics associated with (either preceding or following) a base
107
+ character are accumulated until either a non-diacritic character or end
108
+ of input are encountered, at which point the pending character is output.
109
+ -->
110
+ <xsl:template name="beta-to-uni">
111
+ <xsl:param name="a_in"/>
112
+ <xsl:param name="a_pending" select="''"/>
113
+ <xsl:param name="a_state" select="''"/>
114
+ <xsl:param name="a_precomposed" select="true()"/>
115
+ <xsl:param name="a_partial" select="false()"/>
116
+
117
+ <xsl:variable name="head" select="substring($a_in, 1, 1)"/>
118
+
119
+ <xsl:choose>
120
+ <!-- if no more input -->
121
+ <xsl:when test="string-length($a_in) = 0">
122
+ <!-- output last pending char -->
123
+ <xsl:choose>
124
+ <!-- final sigma: S with no state -->
125
+ <xsl:when
126
+ test="(($a_pending = 's') or ($a_pending = 'S')) and
127
+ not($a_partial) and (string-length($a_state) = 0)">
128
+ <xsl:call-template name="output-uni-char">
129
+ <xsl:with-param name="a_char" select="$a_pending"/>
130
+ <xsl:with-param name="a_state" select="'2'"/>
131
+ <xsl:with-param name="a_precomposed" select="$a_precomposed"/>
132
+ </xsl:call-template>
133
+ </xsl:when>
134
+
135
+ <xsl:otherwise>
136
+ <xsl:call-template name="output-uni-char">
137
+ <xsl:with-param name="a_char" select="$a_pending"/>
138
+ <xsl:with-param name="a_state" select="$a_state"/>
139
+ <xsl:with-param name="a_precomposed" select="$a_precomposed"/>
140
+ </xsl:call-template>
141
+ </xsl:otherwise>
142
+ </xsl:choose>
143
+ </xsl:when>
144
+
145
+ <!-- if input starts with "*" -->
146
+ <xsl:when test="$head = '*'">
147
+ <!-- output pending char -->
148
+ <xsl:call-template name="output-uni-char">
149
+ <xsl:with-param name="a_char" select="$a_pending"/>
150
+ <xsl:with-param name="a_state" select="$a_state"/>
151
+ <xsl:with-param name="a_precomposed" select="$a_precomposed"/>
152
+ </xsl:call-template>
153
+
154
+ <!-- recurse, capitalizing next char, erasing any saved state -->
155
+ <xsl:call-template name="beta-to-uni">
156
+ <xsl:with-param name="a_in" select="substring($a_in, 2)"/>
157
+ <xsl:with-param name="a_state" select="'*'"/>
158
+ <xsl:with-param name="a_pending" select="''"/>
159
+ <xsl:with-param name="a_precomposed" select="$a_precomposed"/>
160
+ <xsl:with-param name="a_partial" select="$a_partial"/>
161
+ </xsl:call-template>
162
+ </xsl:when>
163
+
164
+ <!-- if input starts with diacritic -->
165
+ <xsl:when test="contains($s_betaDiacritics, $head)">
166
+ <!-- update state with new character -->
167
+ <xsl:variable name="newstate">
168
+ <xsl:call-template name="insert-diacritic">
169
+ <xsl:with-param name="a_string" select="$a_state"/>
170
+ <xsl:with-param name="a_char" select="$head"/>
171
+ </xsl:call-template>
172
+ </xsl:variable>
173
+
174
+ <!-- recurse with updated state -->
175
+ <xsl:call-template name="beta-to-uni">
176
+ <xsl:with-param name="a_in" select="substring($a_in, 2)"/>
177
+ <xsl:with-param name="a_state" select="$newstate"/>
178
+ <xsl:with-param name="a_pending" select="$a_pending"/>
179
+ <xsl:with-param name="a_precomposed" select="$a_precomposed"/>
180
+ <xsl:with-param name="a_partial" select="$a_partial"/>
181
+ </xsl:call-template>
182
+ </xsl:when>
183
+
184
+ <!-- if not special char -->
185
+ <xsl:otherwise>
186
+ <!-- output pending char -->
187
+ <xsl:choose>
188
+ <!-- final sigma: S with no state followed by word break -->
189
+ <xsl:when
190
+ test="(($a_pending = 's') or ($a_pending = 'S')) and
191
+ (string-length($a_state) = 0) and
192
+ (contains($s_betaSeparators, $head) or
193
+ contains($s_betaSeparators2, $head))">
194
+ <xsl:call-template name="output-uni-char">
195
+ <xsl:with-param name="a_char" select="$a_pending"/>
196
+ <xsl:with-param name="a_state" select="'2'"/>
197
+ <xsl:with-param name="a_precomposed" select="$a_precomposed"/>
198
+ </xsl:call-template>
199
+ </xsl:when>
200
+
201
+ <xsl:otherwise>
202
+ <xsl:call-template name="output-uni-char">
203
+ <xsl:with-param name="a_char" select="$a_pending"/>
204
+ <xsl:with-param name="a_state" select="$a_state"/>
205
+ <xsl:with-param name="a_precomposed" select="$a_precomposed"/>
206
+ </xsl:call-template>
207
+ </xsl:otherwise>
208
+ </xsl:choose>
209
+
210
+ <!-- reset state if there was a pending character -->
211
+ <xsl:variable name="newstate">
212
+ <xsl:choose>
213
+ <xsl:when test="$a_pending"/>
214
+ <xsl:otherwise>
215
+ <xsl:value-of select="$a_state"/>
216
+ </xsl:otherwise>
217
+ </xsl:choose>
218
+ </xsl:variable>
219
+
220
+ <!-- recurse with head as pending char -->
221
+ <xsl:call-template name="beta-to-uni">
222
+ <xsl:with-param name="a_in" select="substring($a_in, 2)"/>
223
+ <xsl:with-param name="a_state" select="$newstate"/>
224
+ <xsl:with-param name="a_pending" select="$head"/>
225
+ <xsl:with-param name="a_precomposed" select="$a_precomposed"/>
226
+ <xsl:with-param name="a_partial" select="$a_partial"/>
227
+ </xsl:call-template>
228
+ </xsl:otherwise>
229
+ </xsl:choose>
230
+ </xsl:template>
231
+
232
+ <!--
233
+ Output a single character with diacritics
234
+ Parameters:
235
+ $a_char character to be output
236
+ $a_state diacritics associated with character
237
+ $a_precomposed whether to put out precomposed or decomposed Unicode
238
+ -->
239
+ <xsl:template name="output-uni-char">
240
+ <xsl:param name="a_char"/>
241
+ <xsl:param name="a_state"/>
242
+ <xsl:param name="a_precomposed"/>
243
+
244
+ <xsl:choose>
245
+ <!-- if no character pending -->
246
+ <xsl:when test="string-length($a_char) = 0">
247
+ <!-- if we have state and we're not processing a capital -->
248
+ <xsl:if
249
+ test="(string-length($a_state) > 0) and
250
+ (substring($a_state, 1, 1) != '*')">
251
+ <!-- output just the state -->
252
+ <!-- here precomposed=true means don't make it combining -->
253
+ <xsl:apply-templates select="$s_betaUniTable" mode="b2u">
254
+ <xsl:with-param name="a_key" select="$a_state"/>
255
+ <xsl:with-param name="a_precomposed" select="true()"/>
256
+ </xsl:apply-templates>
257
+ </xsl:if>
258
+ </xsl:when>
259
+
260
+ <!-- if character is pending -->
261
+ <xsl:otherwise>
262
+ <!-- translate to lower and back -->
263
+ <xsl:variable name="lowerchar"
264
+ select="translate($a_char, $s_betaUppers, $s_betaLowers)"/>
265
+ <xsl:variable name="upperchar"
266
+ select="translate($a_char, $s_betaLowers, $s_betaUppers)"/>
267
+ <xsl:choose>
268
+ <!-- if upper != lower, we have a letter -->
269
+ <xsl:when test="$lowerchar != $upperchar">
270
+ <!-- use letter+state as key into table -->
271
+ <xsl:apply-templates select="$s_betaUniTable" mode="b2u">
272
+ <xsl:with-param name="a_key"
273
+ select="concat($lowerchar, $a_state)"/>
274
+ <xsl:with-param name="a_precomposed" select="$a_precomposed"/>
275
+ </xsl:apply-templates>
276
+ </xsl:when>
277
+
278
+ <!-- if upper = lower, we have a non-letter -->
279
+ <xsl:otherwise>
280
+ <!-- output character, if any, then use state as key into table -->
281
+ <!-- this handles the case of isolated diacritics -->
282
+ <xsl:value-of select="$a_char"/>
283
+ <xsl:if test="string-length($a_state) > 0">
284
+ <xsl:apply-templates select="$s_betaUniTable" mode="b2u">
285
+ <xsl:with-param name="a_key" select="$a_state"/>
286
+ <xsl:with-param name="a_precomposed" select="$a_precomposed"/>
287
+ </xsl:apply-templates>
288
+ </xsl:if>
289
+ </xsl:otherwise>
290
+ </xsl:choose>
291
+ </xsl:otherwise>
292
+ </xsl:choose>
293
+ </xsl:template>
294
+
295
+ </xsl:stylesheet>
@@ -0,0 +1,31 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
3
+ xmlns:xs="http://www.w3.org/2001/XMLSchema"
4
+ exclude-result-prefixes="xs"
5
+ version="1.0">
6
+ <xsl:output media-type="text/xml" omit-xml-declaration="no" method="xml" indent="yes"/>
7
+ <xsl:preserve-space elements="*"/>
8
+ <xsl:include href="beta2unicode.xsl"/>
9
+
10
+ <xsl:template match="@*|node()">
11
+ <xsl:copy>
12
+ <xsl:apply-templates select="@*"></xsl:apply-templates>
13
+ <xsl:apply-templates select="node()"></xsl:apply-templates>
14
+ </xsl:copy>
15
+ </xsl:template>
16
+
17
+ <xsl:template match="@span|@lemma|@form">
18
+ <xsl:choose>
19
+ <xsl:when test="ancestor::treebank[@xml:lang='grc' or @xml:lang='greek']">
20
+ <xsl:attribute name="{local-name(.)}">
21
+ <xsl:call-template name="beta-to-uni">
22
+ <xsl:with-param name="a_in" select="."/>
23
+ </xsl:call-template>
24
+ </xsl:attribute>
25
+ </xsl:when>
26
+ <xsl:otherwise><xsl:copy/></xsl:otherwise>
27
+ </xsl:choose>
28
+
29
+ </xsl:template>
30
+
31
+ </xsl:stylesheet>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: treebank-transform
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - LFDM
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-26 00:00:00.000000000 Z
11
+ date: 2014-12-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -111,6 +111,7 @@ files:
111
111
  - Rakefile
112
112
  - bin/treebank-transform
113
113
  - examples/cicero_catilina_sample.xml
114
+ - examples/hesiod_shield_of_heracles_sample.xml
114
115
  - lib/treebank/alphabet.rb
115
116
  - lib/treebank/elliptic_word.rb
116
117
  - lib/treebank/sentence.rb
@@ -119,6 +120,9 @@ files:
119
120
  - lib/treebank/transform/version.rb
120
121
  - spec/spec_helper.rb
121
122
  - spec/treebank/transform_spec.rb
123
+ - stylesheets/beta-uni-util.xsl
124
+ - stylesheets/beta2unicode.xsl
125
+ - stylesheets/treebank-beta-uni.xsl
122
126
  - treebank-transform.gemspec
123
127
  homepage: ''
124
128
  licenses: