proiel 1.1.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/LICENSE +1 -1
- data/README.md +2 -2
- data/lib/proiel.rb +16 -1
- data/lib/proiel/alignment.rb +3 -0
- data/lib/proiel/alignment/builder.rb +220 -0
- data/lib/proiel/annotation_schema.rb +11 -4
- data/lib/proiel/chronology.rb +80 -0
- data/lib/proiel/dictionary.rb +79 -0
- data/lib/proiel/dictionary/builder.rb +224 -0
- data/lib/proiel/div.rb +22 -3
- data/lib/proiel/language.rb +108 -0
- data/lib/proiel/lemma.rb +77 -0
- data/lib/proiel/proiel_xml/proiel-3.0/proiel-3.0.xsd +383 -0
- data/lib/proiel/proiel_xml/reader.rb +138 -2
- data/lib/proiel/proiel_xml/schema.rb +4 -2
- data/lib/proiel/proiel_xml/validator.rb +76 -9
- data/lib/proiel/sentence.rb +27 -4
- data/lib/proiel/source.rb +14 -4
- data/lib/proiel/statistics.rb +2 -2
- data/lib/proiel/token.rb +14 -6
- data/lib/proiel/tokenization.rb +5 -3
- data/lib/proiel/treebank.rb +23 -6
- data/lib/proiel/utils.rb +0 -1
- data/lib/proiel/valency.rb +5 -0
- data/lib/proiel/valency/arguments.rb +151 -0
- data/lib/proiel/valency/lexicon.rb +59 -0
- data/lib/proiel/valency/obliqueness.rb +31 -0
- data/lib/proiel/version.rb +2 -3
- data/lib/proiel/visualization.rb +1 -0
- data/lib/proiel/visualization/graphviz.rb +111 -0
- data/lib/proiel/visualization/graphviz/aligned-modern.dot.erb +83 -0
- data/lib/proiel/visualization/graphviz/classic.dot.erb +24 -0
- data/lib/proiel/visualization/graphviz/linearized.dot.erb +57 -0
- data/lib/proiel/visualization/graphviz/modern.dot.erb +39 -0
- data/lib/proiel/visualization/graphviz/packed.dot.erb +25 -0
- metadata +76 -31
@@ -0,0 +1,383 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
|
3
|
+
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
|
4
|
+
<xs:annotation>
|
5
|
+
<xs:documentation>PROIEL XML format version 3.0</xs:documentation>
|
6
|
+
</xs:annotation>
|
7
|
+
|
8
|
+
<!-- Source elements -->
|
9
|
+
<xs:complexType name="Source">
|
10
|
+
<xs:sequence>
|
11
|
+
<xs:element name="title" minOccurs="1" maxOccurs="1" type="xs:string"/>
|
12
|
+
<xs:element name="alternative-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
13
|
+
<xs:element name="author" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
14
|
+
<xs:element name="citation-part" minOccurs="1" maxOccurs="1" type="xs:string"/>
|
15
|
+
<xs:element name="principal" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
16
|
+
<xs:element name="funder" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
17
|
+
<xs:element name="distributor" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
18
|
+
<xs:element name="distributor-address" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
19
|
+
<xs:element name="address" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
20
|
+
<xs:element name="date" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
21
|
+
<xs:element name="license" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
22
|
+
<xs:element name="license-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
23
|
+
<xs:element name="reference-system" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
24
|
+
<xs:element name="editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
25
|
+
<xs:element name="editorial-note" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
26
|
+
<xs:element name="annotator" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
27
|
+
<xs:element name="reviewer" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
28
|
+
<xs:element name="electronic-text-editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
29
|
+
<xs:element name="electronic-text-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
30
|
+
<xs:element name="electronic-text-version" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
31
|
+
<xs:element name="electronic-text-publisher" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
32
|
+
<xs:element name="electronic-text-place" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
33
|
+
<xs:element name="electronic-text-date" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
34
|
+
<xs:element name="electronic-text-original-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
35
|
+
<xs:element name="electronic-text-license" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
36
|
+
<xs:element name="electronic-text-license-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
37
|
+
<xs:element name="printed-text-editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
38
|
+
<xs:element name="printed-text-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
39
|
+
<xs:element name="printed-text-edition" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
40
|
+
<xs:element name="printed-text-publisher" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
41
|
+
<xs:element name="printed-text-place" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
42
|
+
<xs:element name="printed-text-date" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
43
|
+
<xs:element name="chronology-composition" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
44
|
+
<xs:element name="chronology-manuscript" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
45
|
+
|
46
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
47
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
48
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
49
|
+
|
50
|
+
<xs:element name="div" minOccurs="1" maxOccurs="unbounded" type="Div"/>
|
51
|
+
|
52
|
+
</xs:sequence>
|
53
|
+
|
54
|
+
<xs:attribute name="id" type="xs:string" use="required"/>
|
55
|
+
<xs:attribute name="alignment-id" type="xs:string" use="optional"/>
|
56
|
+
<xs:attribute name="language" type="xs:string" use="required"/>
|
57
|
+
<xs:attribute name="dialect" type="xs:string" use="optional"/>
|
58
|
+
</xs:complexType>
|
59
|
+
|
60
|
+
<xs:complexType name="Div">
|
61
|
+
<xs:sequence>
|
62
|
+
<xs:element name="title" minOccurs="1" maxOccurs="1" type="xs:string"/>
|
63
|
+
|
64
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
65
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
66
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
67
|
+
|
68
|
+
<xs:element name="sentence" minOccurs="1" maxOccurs="unbounded" type="Sentence"/>
|
69
|
+
</xs:sequence>
|
70
|
+
|
71
|
+
<xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
|
72
|
+
<xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
|
73
|
+
<xs:attribute name="presentation-before" type="xs:string" use="optional"/>
|
74
|
+
<xs:attribute name="presentation-after" type="xs:string" use="optional"/>
|
75
|
+
</xs:complexType>
|
76
|
+
|
77
|
+
<xs:complexType name="Sentence">
|
78
|
+
<xs:sequence>
|
79
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
80
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
81
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
82
|
+
|
83
|
+
<xs:element name="token" minOccurs="1" maxOccurs="unbounded" type="Token"/>
|
84
|
+
</xs:sequence>
|
85
|
+
|
86
|
+
<xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
|
87
|
+
<xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
|
88
|
+
<xs:attribute name="presentation-before" type="xs:string" use="optional"/>
|
89
|
+
<xs:attribute name="presentation-after" type="xs:string" use="optional"/>
|
90
|
+
<xs:attribute name="status" type="SentenceStatus" use="optional"/>
|
91
|
+
<xs:attribute name="annotated-by" type="xs:string" use="optional"/>
|
92
|
+
<xs:attribute name="annotated-at" type="xs:dateTime" use="optional"/>
|
93
|
+
<xs:attribute name="reviewed-by" type="xs:string" use="optional"/>
|
94
|
+
<xs:attribute name="reviewed-at" type="xs:dateTime" use="optional"/>
|
95
|
+
</xs:complexType>
|
96
|
+
|
97
|
+
<xs:simpleType name="SentenceStatus">
|
98
|
+
<xs:restriction base="xs:string">
|
99
|
+
<xs:enumeration value="annotated"/>
|
100
|
+
<xs:enumeration value="reviewed"/>
|
101
|
+
<xs:enumeration value="unannotated"/>
|
102
|
+
</xs:restriction>
|
103
|
+
</xs:simpleType>
|
104
|
+
|
105
|
+
<xs:complexType name="Token">
|
106
|
+
<xs:sequence>
|
107
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
108
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
109
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
110
|
+
|
111
|
+
<xs:element name="slash" minOccurs="0" maxOccurs="unbounded" type="Slash"/>
|
112
|
+
</xs:sequence>
|
113
|
+
|
114
|
+
<xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
|
115
|
+
<xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
|
116
|
+
<xs:attribute name="lemma" type="xs:string" use="optional"/>
|
117
|
+
<xs:attribute name="part-of-speech" type="xs:string" use="optional"/>
|
118
|
+
<xs:attribute name="morphology" type="xs:string" use="optional"/>
|
119
|
+
<xs:attribute name="citation-part" type="xs:string" use="optional"/>
|
120
|
+
<xs:attribute name="relation" type="xs:string" use="optional"/>
|
121
|
+
<xs:attribute name="head-id" type="xs:nonNegativeInteger" use="optional"/>
|
122
|
+
<xs:attribute name="information-status" type="xs:string" use="optional"/>
|
123
|
+
<xs:attribute name="antecedent-id" type="xs:nonNegativeInteger" use="optional"/>
|
124
|
+
<xs:attribute name="contrast-group" type="xs:string" use="optional"/>
|
125
|
+
<xs:attribute name="foreign-ids" type="xs:string" use="optional"/>
|
126
|
+
|
127
|
+
<!-- XSD does not allow us to constrain the use of these attributes properly so they are marked optional even though their occurrence depends on the value of empty-token-sort and form. -->
|
128
|
+
<xs:attribute name="empty-token-sort" type="EmptyTokenSort" use="optional"/>
|
129
|
+
<xs:attribute name="form" type="xs:string" use="optional"/>
|
130
|
+
<xs:attribute name="presentation-before" type="xs:string" use="optional"/>
|
131
|
+
<xs:attribute name="presentation-after" type="xs:string" use="optional"/>
|
132
|
+
</xs:complexType>
|
133
|
+
|
134
|
+
<xs:simpleType name="EmptyTokenSort">
|
135
|
+
<xs:restriction base="xs:string">
|
136
|
+
<xs:enumeration value="P"/>
|
137
|
+
<xs:enumeration value="C"/>
|
138
|
+
<xs:enumeration value="V"/>
|
139
|
+
</xs:restriction>
|
140
|
+
</xs:simpleType>
|
141
|
+
|
142
|
+
<xs:complexType name="Slash">
|
143
|
+
<xs:attribute name="target-id" type="xs:nonNegativeInteger" use="required"/>
|
144
|
+
<xs:attribute name="relation" type="xs:string" use="required"/>
|
145
|
+
</xs:complexType>
|
146
|
+
|
147
|
+
<!-- Shared metadata elements -->
|
148
|
+
<xs:complexType name="Tag">
|
149
|
+
<xs:attribute name="attribute" type="xs:string" use="required"/>
|
150
|
+
<xs:attribute name="value" type="xs:string" use="required"/>
|
151
|
+
<xs:attribute name="target-id" type="xs:string" use="optional"/>
|
152
|
+
<xs:attribute name="target-type" type="xs:string" use="optional"/>
|
153
|
+
</xs:complexType>
|
154
|
+
|
155
|
+
<xs:complexType name="Link">
|
156
|
+
<xs:attribute name="target" type="xs:string" use="required"/>
|
157
|
+
<xs:attribute name="type" type="xs:string" use="required"/>
|
158
|
+
</xs:complexType>
|
159
|
+
|
160
|
+
<xs:complexType name="Note">
|
161
|
+
<xs:simpleContent>
|
162
|
+
<xs:extension base="xs:string">
|
163
|
+
<xs:attribute name="originator" type="xs:string" use="required"/>
|
164
|
+
</xs:extension>
|
165
|
+
</xs:simpleContent>
|
166
|
+
</xs:complexType>
|
167
|
+
|
168
|
+
<!-- Dictionary elements -->
|
169
|
+
<xs:complexType name="DictionarySource">
|
170
|
+
<xs:attribute name="idref" type="xs:string" use="required"/>
|
171
|
+
<xs:attribute name="license" type="xs:string" use="optional"/>
|
172
|
+
<xs:attribute name="n" type="xs:nonNegativeInteger" use="optional"/>
|
173
|
+
</xs:complexType>
|
174
|
+
|
175
|
+
<xs:complexType name="DictionaryToken">
|
176
|
+
<xs:attribute name="idref" type="xs:string" use="required"/>
|
177
|
+
<xs:attribute name="flags" type="xs:string" use="optional"/>
|
178
|
+
</xs:complexType>
|
179
|
+
|
180
|
+
<xs:complexType name="DictionaryArgument">
|
181
|
+
<xs:attribute name="relation" type="xs:string" use="required"/>
|
182
|
+
<xs:attribute name="lemma" type="xs:string" use="optional"/>
|
183
|
+
<xs:attribute name="part-of-speech" type="xs:string" use="optional"/>
|
184
|
+
<xs:attribute name="mood" type="xs:string" use="optional"/>
|
185
|
+
<xs:attribute name="case" type="xs:string" use="optional"/>
|
186
|
+
</xs:complexType>
|
187
|
+
|
188
|
+
<xs:complexType name="DictionaryTokens">
|
189
|
+
<xs:sequence>
|
190
|
+
<xs:element name="token" minOccurs="0" maxOccurs="unbounded" type="DictionaryToken"/>
|
191
|
+
</xs:sequence>
|
192
|
+
</xs:complexType>
|
193
|
+
|
194
|
+
<xs:complexType name="DictionaryArguments">
|
195
|
+
<xs:sequence>
|
196
|
+
<xs:element name="argument" minOccurs="0" maxOccurs="unbounded" type="DictionaryArgument"/>
|
197
|
+
</xs:sequence>
|
198
|
+
</xs:complexType>
|
199
|
+
|
200
|
+
<xs:complexType name="DictionaryFrame">
|
201
|
+
<xs:sequence>
|
202
|
+
<xs:element name="arguments" minOccurs="1" maxOccurs="1" type="DictionaryArguments"/>
|
203
|
+
<xs:element name="tokens" minOccurs="1" maxOccurs="unbounded" type="DictionaryTokens"/>
|
204
|
+
</xs:sequence>
|
205
|
+
</xs:complexType>
|
206
|
+
|
207
|
+
<xs:complexType name="DictionaryValency">
|
208
|
+
<xs:sequence>
|
209
|
+
<xs:element name="frame" minOccurs="1" maxOccurs="unbounded" type="DictionaryFrame"/>
|
210
|
+
</xs:sequence>
|
211
|
+
</xs:complexType>
|
212
|
+
|
213
|
+
<xs:complexType name="DictionarySlot2">
|
214
|
+
<xs:attribute name="form" type="xs:string" use="required"/>
|
215
|
+
<xs:attribute name="n" type="xs:nonNegativeInteger" use="required"/>
|
216
|
+
</xs:complexType>
|
217
|
+
|
218
|
+
<xs:complexType name="DictionarySlot1">
|
219
|
+
<xs:sequence>
|
220
|
+
<xs:element name="slot2" minOccurs="1" maxOccurs="unbounded" type="DictionarySlot2"/>
|
221
|
+
</xs:sequence>
|
222
|
+
|
223
|
+
<xs:attribute name="morphology" type="xs:string" use="required"/>
|
224
|
+
</xs:complexType>
|
225
|
+
|
226
|
+
<xs:complexType name="DictionaryParadigm">
|
227
|
+
<xs:sequence>
|
228
|
+
<xs:element name="slot1" minOccurs="1" maxOccurs="unbounded" type="DictionarySlot1"/>
|
229
|
+
</xs:sequence>
|
230
|
+
</xs:complexType>
|
231
|
+
|
232
|
+
<xs:complexType name="DictionaryHomograph">
|
233
|
+
<xs:attribute name="lemma" type="xs:string" use="required"/>
|
234
|
+
<xs:attribute name="part-of-speech" type="xs:string" use="required"/>
|
235
|
+
</xs:complexType>
|
236
|
+
|
237
|
+
<xs:complexType name="DictionaryHomographs">
|
238
|
+
<xs:sequence>
|
239
|
+
<xs:element name='homograph' minOccurs="1" maxOccurs="unbounded" type='DictionaryHomograph'/>
|
240
|
+
</xs:sequence>
|
241
|
+
</xs:complexType>
|
242
|
+
|
243
|
+
<xs:complexType name="DictionaryGloss">
|
244
|
+
<xs:simpleContent>
|
245
|
+
<xs:extension base="xs:string">
|
246
|
+
<xs:attribute name="language" type="xs:string" use="required"/>
|
247
|
+
</xs:extension>
|
248
|
+
</xs:simpleContent>
|
249
|
+
</xs:complexType>
|
250
|
+
|
251
|
+
<xs:complexType name="DictionaryGlosses">
|
252
|
+
<xs:sequence>
|
253
|
+
<xs:element name='gloss' minOccurs="1" maxOccurs="unbounded" type='DictionaryGloss'/>
|
254
|
+
</xs:sequence>
|
255
|
+
</xs:complexType>
|
256
|
+
|
257
|
+
<xs:complexType name="DictionaryDistribution">
|
258
|
+
<xs:sequence>
|
259
|
+
<xs:element name='source' minOccurs="1" maxOccurs="unbounded" type='DictionarySource'/>
|
260
|
+
</xs:sequence>
|
261
|
+
</xs:complexType>
|
262
|
+
|
263
|
+
<xs:complexType name="DictionaryLemma">
|
264
|
+
<xs:sequence>
|
265
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
266
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
267
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
268
|
+
|
269
|
+
<xs:element name="distribution" minOccurs="0" maxOccurs="1" type="DictionaryDistribution"/>
|
270
|
+
<xs:element name="glosses" minOccurs="0" maxOccurs="1" type="DictionaryGlosses"/>
|
271
|
+
<xs:element name="homographs" minOccurs="0" maxOccurs="1" type="DictionaryHomographs"/>
|
272
|
+
<xs:element name="paradigm" minOccurs="0" maxOccurs="1" type="DictionaryParadigm"/>
|
273
|
+
<xs:element name="valency" minOccurs="0" maxOccurs="1" type="DictionaryValency"/>
|
274
|
+
</xs:sequence>
|
275
|
+
|
276
|
+
<xs:attribute name="lemma" type="xs:string" use="required"/>
|
277
|
+
<xs:attribute name="part-of-speech" type="xs:string" use="required"/>
|
278
|
+
</xs:complexType>
|
279
|
+
|
280
|
+
<xs:complexType name="DictionarySources">
|
281
|
+
<xs:sequence>
|
282
|
+
<xs:element name='source' minOccurs="1" maxOccurs="unbounded" type='DictionarySource'/>
|
283
|
+
</xs:sequence>
|
284
|
+
</xs:complexType>
|
285
|
+
|
286
|
+
<xs:complexType name="DictionaryLemmata">
|
287
|
+
<xs:sequence>
|
288
|
+
<xs:element name='lemma' minOccurs="1" maxOccurs="unbounded" type='DictionaryLemma'/>
|
289
|
+
</xs:sequence>
|
290
|
+
</xs:complexType>
|
291
|
+
|
292
|
+
<xs:complexType name="Dictionary">
|
293
|
+
<xs:sequence>
|
294
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
295
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
296
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
297
|
+
|
298
|
+
<xs:element name='sources' minOccurs="0" maxOccurs="1" type='DictionarySources'/>
|
299
|
+
<xs:element name='lemmata' minOccurs="0" maxOccurs="1" type='DictionaryLemmata'/>
|
300
|
+
</xs:sequence>
|
301
|
+
|
302
|
+
<xs:attribute name="language" type="xs:string" use="required"/>
|
303
|
+
<xs:attribute name="dialect" type="xs:string" use="optional"/>
|
304
|
+
</xs:complexType>
|
305
|
+
|
306
|
+
<!-- Annotation elements -->
|
307
|
+
<xs:complexType name="PartOfSpeechValue">
|
308
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
309
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
310
|
+
</xs:complexType>
|
311
|
+
|
312
|
+
<xs:complexType name="PartsOfSpeech">
|
313
|
+
<xs:sequence>
|
314
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='PartOfSpeechValue'/>
|
315
|
+
</xs:sequence>
|
316
|
+
</xs:complexType>
|
317
|
+
|
318
|
+
<xs:complexType name="InformationStatusValue">
|
319
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
320
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
321
|
+
</xs:complexType>
|
322
|
+
|
323
|
+
<xs:complexType name="InformationStatuses">
|
324
|
+
<xs:sequence>
|
325
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='InformationStatusValue'/>
|
326
|
+
</xs:sequence>
|
327
|
+
</xs:complexType>
|
328
|
+
|
329
|
+
<xs:complexType name="RelationValue">
|
330
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
331
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
332
|
+
<xs:attribute name="primary" type="xs:boolean" use="required"/>
|
333
|
+
<xs:attribute name="secondary" type="xs:boolean" use="required"/>
|
334
|
+
</xs:complexType>
|
335
|
+
|
336
|
+
<xs:complexType name="Relations">
|
337
|
+
<xs:sequence>
|
338
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='RelationValue'/>
|
339
|
+
</xs:sequence>
|
340
|
+
</xs:complexType>
|
341
|
+
|
342
|
+
<xs:complexType name="MorphologyValue">
|
343
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
344
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
345
|
+
</xs:complexType>
|
346
|
+
|
347
|
+
<xs:complexType name="MorphologyField">
|
348
|
+
<xs:sequence>
|
349
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='MorphologyValue'/>
|
350
|
+
</xs:sequence>
|
351
|
+
|
352
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
353
|
+
</xs:complexType>
|
354
|
+
|
355
|
+
<xs:complexType name="Morphology">
|
356
|
+
<xs:sequence>
|
357
|
+
<xs:element name='field' minOccurs="1" maxOccurs="unbounded" type='MorphologyField'/>
|
358
|
+
</xs:sequence>
|
359
|
+
</xs:complexType>
|
360
|
+
|
361
|
+
<xs:complexType name="Annotation">
|
362
|
+
<xs:sequence>
|
363
|
+
<xs:element name='relations' minOccurs="1" maxOccurs="1" type='Relations'/>
|
364
|
+
<xs:element name='parts-of-speech' minOccurs="1" maxOccurs="1" type='PartsOfSpeech'/>
|
365
|
+
<xs:element name='morphology' minOccurs="1" maxOccurs="1" type='Morphology'/>
|
366
|
+
<xs:element name='information-statuses' minOccurs="1" maxOccurs="1" type='InformationStatuses'/>
|
367
|
+
</xs:sequence>
|
368
|
+
</xs:complexType>
|
369
|
+
|
370
|
+
<!-- Top-level element -->
|
371
|
+
<xs:complexType name="Proiel">
|
372
|
+
<xs:sequence>
|
373
|
+
<xs:element name='annotation' minOccurs="0" maxOccurs="1" type='Annotation'/>
|
374
|
+
<xs:element name='source' minOccurs="0" maxOccurs="unbounded" type='Source'/>
|
375
|
+
<xs:element name='dictionary' minOccurs="0" maxOccurs="unbounded" type='Dictionary'/>
|
376
|
+
</xs:sequence>
|
377
|
+
|
378
|
+
<xs:attribute name='export-time' type="xs:dateTime" use="optional"/>
|
379
|
+
<xs:attribute name="schema-version" type="xs:decimal" use="required" fixed="3.0"/>
|
380
|
+
</xs:complexType>
|
381
|
+
|
382
|
+
<xs:element name='proiel' type='Proiel'/>
|
383
|
+
</xs:schema>
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c) 2015-
|
2
|
+
# Copyright (c) 2015-2018 Marius L. Jøhndal
|
3
3
|
#
|
4
4
|
# See LICENSE in the top-level source directory for licensing terms.
|
5
5
|
#++
|
@@ -7,6 +7,95 @@ module PROIEL
|
|
7
7
|
module PROIELXML
|
8
8
|
# @api private
|
9
9
|
module Reader
|
10
|
+
class DictionarySource
|
11
|
+
include SAXMachine
|
12
|
+
|
13
|
+
attribute :idref, required: true
|
14
|
+
attribute :license, required: false
|
15
|
+
attribute :n, required: false
|
16
|
+
end
|
17
|
+
|
18
|
+
class DictionaryGloss
|
19
|
+
include SAXMachine
|
20
|
+
|
21
|
+
attribute :language, required: true
|
22
|
+
value :gloss
|
23
|
+
end
|
24
|
+
|
25
|
+
class DictionaryHomograph
|
26
|
+
include SAXMachine
|
27
|
+
|
28
|
+
attribute :lemma, required: true
|
29
|
+
attribute :'part-of-speech', as: :part_of_speech, required: true
|
30
|
+
end
|
31
|
+
|
32
|
+
class DictionarySlot2
|
33
|
+
include SAXMachine
|
34
|
+
|
35
|
+
attribute :form, required: true
|
36
|
+
attribute :n, required: true
|
37
|
+
end
|
38
|
+
|
39
|
+
class DictionarySlot1
|
40
|
+
include SAXMachine
|
41
|
+
|
42
|
+
elements :slot2, as: :slot2s, class: DictionarySlot2
|
43
|
+
|
44
|
+
attribute :morphology, required: true
|
45
|
+
end
|
46
|
+
|
47
|
+
class DictionaryArgument
|
48
|
+
include SAXMachine
|
49
|
+
|
50
|
+
attribute :relation, required: true
|
51
|
+
attribute :lemma, required: false
|
52
|
+
attribute :'part-of-speech', as: :part_of_speech, required: false
|
53
|
+
attribute :mood, required: false
|
54
|
+
attribute :case, required: false
|
55
|
+
end
|
56
|
+
|
57
|
+
class DictionaryToken
|
58
|
+
include SAXMachine
|
59
|
+
|
60
|
+
attribute :idref, required: true
|
61
|
+
attribute :flags, required: false
|
62
|
+
end
|
63
|
+
|
64
|
+
class DictionaryFrame
|
65
|
+
include SAXMachine
|
66
|
+
|
67
|
+
# We skip the intermediate grouping elements 'arguments' and 'tokens'
|
68
|
+
elements :argument, as: :arguments, class: DictionaryArgument
|
69
|
+
elements :token, as: :tokens, class: DictionaryToken
|
70
|
+
end
|
71
|
+
|
72
|
+
class DictionaryLemma
|
73
|
+
include SAXMachine
|
74
|
+
|
75
|
+
attribute :lemma, required: true
|
76
|
+
attribute :'part-of-speech', as: :part_of_speech, required: true
|
77
|
+
attribute :n, required: false
|
78
|
+
|
79
|
+
# We skip the intermediate grouping elements 'distribution', 'glosses', 'homographs', 'paradigm' and 'valency'
|
80
|
+
elements :source, as: :distribution, class: DictionarySource
|
81
|
+
elements :gloss, as: :glosses, class: DictionaryGloss
|
82
|
+
elements :homograph, as: :homographs, class: DictionaryHomograph
|
83
|
+
elements :slot1, as: :paradigm, class: DictionarySlot1
|
84
|
+
elements :frame, as: :valency, class: DictionaryFrame
|
85
|
+
end
|
86
|
+
|
87
|
+
# Parsing class for `dictionary` elements.
|
88
|
+
class Dictionary
|
89
|
+
include SAXMachine
|
90
|
+
|
91
|
+
attribute :language, required: true
|
92
|
+
attribute :dialect, required: false
|
93
|
+
|
94
|
+
# We skip the intermediate grouping elements 'sources' and 'lemmata'
|
95
|
+
elements :source, as: :sources, class: DictionarySource
|
96
|
+
elements :lemma, as: :lemmata, class: DictionaryLemma
|
97
|
+
end
|
98
|
+
|
10
99
|
# Parsing class for `slash` elements.
|
11
100
|
class Slash
|
12
101
|
include SAXMachine
|
@@ -15,6 +104,22 @@ module PROIEL
|
|
15
104
|
attribute :relation, required: true
|
16
105
|
end
|
17
106
|
|
107
|
+
# Parsing class for `semantic-tag` elements.
|
108
|
+
class SemanticTag
|
109
|
+
include SAXMachine
|
110
|
+
|
111
|
+
attribute :attribute, required: true
|
112
|
+
attribute :value, required: true
|
113
|
+
end
|
114
|
+
|
115
|
+
# Parsing class for `note` elements.
|
116
|
+
class Note
|
117
|
+
include SAXMachine
|
118
|
+
|
119
|
+
attribute :originator, required: true
|
120
|
+
value :content
|
121
|
+
end
|
122
|
+
|
18
123
|
# Parsing class for `token` elements.
|
19
124
|
class Token
|
20
125
|
include SAXMachine
|
@@ -37,6 +142,8 @@ module PROIEL
|
|
37
142
|
attribute :'foreign-ids', as: :foreign_ids
|
38
143
|
|
39
144
|
elements :slash, as: :slashes, class: Slash
|
145
|
+
elements :'semantic-tag', as: :semantic_tags, class: SemanticTag
|
146
|
+
elements :note, as: :notes, class: Note
|
40
147
|
end
|
41
148
|
|
42
149
|
# Parsing class for `sentence` elements.
|
@@ -54,6 +161,7 @@ module PROIEL
|
|
54
161
|
attribute :'presentation-after', as: :presentation_after
|
55
162
|
|
56
163
|
elements :token, as: :tokens, class: Token
|
164
|
+
elements :note, as: :notes, class: Note
|
57
165
|
end
|
58
166
|
|
59
167
|
# Parsing class for `div` elements.
|
@@ -67,6 +175,7 @@ module PROIEL
|
|
67
175
|
|
68
176
|
element :title
|
69
177
|
elements :sentence, as: :sentences, class: Sentence
|
178
|
+
elements :note, as: :notes, class: Note
|
70
179
|
end
|
71
180
|
|
72
181
|
# Parsing class for `source` elements.
|
@@ -74,10 +183,12 @@ module PROIEL
|
|
74
183
|
include SAXMachine
|
75
184
|
|
76
185
|
attribute :id, required: true
|
77
|
-
attribute :'alignment-id', as: :alignment_id, required: false
|
186
|
+
attribute :'alignment-id', as: :alignment_id, class: String, required: false
|
78
187
|
attribute :language, required: true
|
188
|
+
attribute :dialect, required: false
|
79
189
|
|
80
190
|
element :title
|
191
|
+
element :alternative_title
|
81
192
|
element :author
|
82
193
|
element :citation_part
|
83
194
|
element :principal
|
@@ -107,7 +218,11 @@ module PROIEL
|
|
107
218
|
element :printed_text_publisher
|
108
219
|
element :printed_text_place
|
109
220
|
element :printed_text_date
|
221
|
+
element :chronology_composition
|
222
|
+
element :chronology_manuscript
|
223
|
+
|
110
224
|
elements :div, as: :divs, class: Div
|
225
|
+
elements :note, as: :notes, class: Note
|
111
226
|
end
|
112
227
|
|
113
228
|
# Parsing class for `relations/value` elements.
|
@@ -174,6 +289,25 @@ module PROIEL
|
|
174
289
|
attribute :summary, required: true
|
175
290
|
end
|
176
291
|
|
292
|
+
# Parsing class for `lemma` elements.
|
293
|
+
class Lemma
|
294
|
+
include SAXMachine
|
295
|
+
|
296
|
+
attribute :form, required: true
|
297
|
+
attribute :'part-of-speech', as: :part_of_speech, required: true
|
298
|
+
attribute :gloss, required: false
|
299
|
+
|
300
|
+
elements :'semantic-tag', as: :semantic_tags, class: SemanticTag
|
301
|
+
elements :note, as: :notes, class: Note
|
302
|
+
end
|
303
|
+
|
304
|
+
# Parsing class for `dictionary` elements.
|
305
|
+
class Dictionary
|
306
|
+
include SAXMachine
|
307
|
+
|
308
|
+
elements :lemma, as: :lemmas, class: Lemma
|
309
|
+
end
|
310
|
+
|
177
311
|
# Parsing class for `information_statuses` elements.
|
178
312
|
class InformationStatuses
|
179
313
|
include SAXMachine
|
@@ -189,6 +323,7 @@ module PROIEL
|
|
189
323
|
element :parts_of_speech, as: :parts_of_speech, class: PartsOfSpeech
|
190
324
|
element :morphology, class: Morphology
|
191
325
|
element :information_statuses, as: :information_statuses, class: InformationStatuses
|
326
|
+
element :dictionary, as: :dictionary, class: Dictionary
|
192
327
|
end
|
193
328
|
|
194
329
|
# Parsing class for `proiel` elements.
|
@@ -199,6 +334,7 @@ module PROIEL
|
|
199
334
|
attribute :'schema-version', as: :schema_version, required: true
|
200
335
|
|
201
336
|
elements :source, as: :sources, class: Source
|
337
|
+
elements :dictionary, as: :dictionaries, class: Dictionary
|
202
338
|
element :annotation, class: Annotation
|
203
339
|
end
|
204
340
|
|