proiel 1.1.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +5 -5
  2. data/LICENSE +1 -1
  3. data/README.md +2 -2
  4. data/lib/proiel.rb +16 -1
  5. data/lib/proiel/alignment.rb +3 -0
  6. data/lib/proiel/alignment/builder.rb +220 -0
  7. data/lib/proiel/annotation_schema.rb +11 -4
  8. data/lib/proiel/chronology.rb +80 -0
  9. data/lib/proiel/dictionary.rb +79 -0
  10. data/lib/proiel/dictionary/builder.rb +224 -0
  11. data/lib/proiel/div.rb +22 -3
  12. data/lib/proiel/language.rb +108 -0
  13. data/lib/proiel/lemma.rb +77 -0
  14. data/lib/proiel/proiel_xml/proiel-3.0/proiel-3.0.xsd +383 -0
  15. data/lib/proiel/proiel_xml/reader.rb +138 -2
  16. data/lib/proiel/proiel_xml/schema.rb +4 -2
  17. data/lib/proiel/proiel_xml/validator.rb +76 -9
  18. data/lib/proiel/sentence.rb +27 -4
  19. data/lib/proiel/source.rb +14 -4
  20. data/lib/proiel/statistics.rb +2 -2
  21. data/lib/proiel/token.rb +14 -6
  22. data/lib/proiel/tokenization.rb +5 -3
  23. data/lib/proiel/treebank.rb +23 -6
  24. data/lib/proiel/utils.rb +0 -1
  25. data/lib/proiel/valency.rb +5 -0
  26. data/lib/proiel/valency/arguments.rb +151 -0
  27. data/lib/proiel/valency/lexicon.rb +59 -0
  28. data/lib/proiel/valency/obliqueness.rb +31 -0
  29. data/lib/proiel/version.rb +2 -3
  30. data/lib/proiel/visualization.rb +1 -0
  31. data/lib/proiel/visualization/graphviz.rb +111 -0
  32. data/lib/proiel/visualization/graphviz/aligned-modern.dot.erb +83 -0
  33. data/lib/proiel/visualization/graphviz/classic.dot.erb +24 -0
  34. data/lib/proiel/visualization/graphviz/linearized.dot.erb +57 -0
  35. data/lib/proiel/visualization/graphviz/modern.dot.erb +39 -0
  36. data/lib/proiel/visualization/graphviz/packed.dot.erb +25 -0
  37. metadata +76 -31
@@ -0,0 +1,383 @@
1
+ <?xml version="1.0"?>
2
+
3
+ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
4
+ <xs:annotation>
5
+ <xs:documentation>PROIEL XML format version 3.0</xs:documentation>
6
+ </xs:annotation>
7
+
8
+ <!-- Source elements -->
9
+ <xs:complexType name="Source">
10
+ <xs:sequence>
11
+ <xs:element name="title" minOccurs="1" maxOccurs="1" type="xs:string"/>
12
+ <xs:element name="alternative-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
13
+ <xs:element name="author" minOccurs="0" maxOccurs="1" type="xs:string"/>
14
+ <xs:element name="citation-part" minOccurs="1" maxOccurs="1" type="xs:string"/>
15
+ <xs:element name="principal" minOccurs="0" maxOccurs="1" type="xs:string"/>
16
+ <xs:element name="funder" minOccurs="0" maxOccurs="1" type="xs:string"/>
17
+ <xs:element name="distributor" minOccurs="0" maxOccurs="1" type="xs:string"/>
18
+ <xs:element name="distributor-address" minOccurs="0" maxOccurs="1" type="xs:string"/>
19
+ <xs:element name="address" minOccurs="0" maxOccurs="1" type="xs:string"/>
20
+ <xs:element name="date" minOccurs="0" maxOccurs="1" type="xs:string"/>
21
+ <xs:element name="license" minOccurs="0" maxOccurs="1" type="xs:string"/>
22
+ <xs:element name="license-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
23
+ <xs:element name="reference-system" minOccurs="0" maxOccurs="1" type="xs:string"/>
24
+ <xs:element name="editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
25
+ <xs:element name="editorial-note" minOccurs="0" maxOccurs="1" type="xs:string"/>
26
+ <xs:element name="annotator" minOccurs="0" maxOccurs="1" type="xs:string"/>
27
+ <xs:element name="reviewer" minOccurs="0" maxOccurs="1" type="xs:string"/>
28
+ <xs:element name="electronic-text-editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
29
+ <xs:element name="electronic-text-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
30
+ <xs:element name="electronic-text-version" minOccurs="0" maxOccurs="1" type="xs:string"/>
31
+ <xs:element name="electronic-text-publisher" minOccurs="0" maxOccurs="1" type="xs:string"/>
32
+ <xs:element name="electronic-text-place" minOccurs="0" maxOccurs="1" type="xs:string"/>
33
+ <xs:element name="electronic-text-date" minOccurs="0" maxOccurs="1" type="xs:string"/>
34
+ <xs:element name="electronic-text-original-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
35
+ <xs:element name="electronic-text-license" minOccurs="0" maxOccurs="1" type="xs:string"/>
36
+ <xs:element name="electronic-text-license-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
37
+ <xs:element name="printed-text-editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
38
+ <xs:element name="printed-text-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
39
+ <xs:element name="printed-text-edition" minOccurs="0" maxOccurs="1" type="xs:string"/>
40
+ <xs:element name="printed-text-publisher" minOccurs="0" maxOccurs="1" type="xs:string"/>
41
+ <xs:element name="printed-text-place" minOccurs="0" maxOccurs="1" type="xs:string"/>
42
+ <xs:element name="printed-text-date" minOccurs="0" maxOccurs="1" type="xs:string"/>
43
+ <xs:element name="chronology-composition" minOccurs="0" maxOccurs="1" type="xs:string"/>
44
+ <xs:element name="chronology-manuscript" minOccurs="0" maxOccurs="1" type="xs:string"/>
45
+
46
+ <xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
47
+ <xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
48
+ <xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
49
+
50
+ <xs:element name="div" minOccurs="1" maxOccurs="unbounded" type="Div"/>
51
+
52
+ </xs:sequence>
53
+
54
+ <xs:attribute name="id" type="xs:string" use="required"/>
55
+ <xs:attribute name="alignment-id" type="xs:string" use="optional"/>
56
+ <xs:attribute name="language" type="xs:string" use="required"/>
57
+ <xs:attribute name="dialect" type="xs:string" use="optional"/>
58
+ </xs:complexType>
59
+
60
+ <xs:complexType name="Div">
61
+ <xs:sequence>
62
+ <xs:element name="title" minOccurs="1" maxOccurs="1" type="xs:string"/>
63
+
64
+ <xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
65
+ <xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
66
+ <xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
67
+
68
+ <xs:element name="sentence" minOccurs="1" maxOccurs="unbounded" type="Sentence"/>
69
+ </xs:sequence>
70
+
71
+ <xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
72
+ <xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
73
+ <xs:attribute name="presentation-before" type="xs:string" use="optional"/>
74
+ <xs:attribute name="presentation-after" type="xs:string" use="optional"/>
75
+ </xs:complexType>
76
+
77
+ <xs:complexType name="Sentence">
78
+ <xs:sequence>
79
+ <xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
80
+ <xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
81
+ <xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
82
+
83
+ <xs:element name="token" minOccurs="1" maxOccurs="unbounded" type="Token"/>
84
+ </xs:sequence>
85
+
86
+ <xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
87
+ <xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
88
+ <xs:attribute name="presentation-before" type="xs:string" use="optional"/>
89
+ <xs:attribute name="presentation-after" type="xs:string" use="optional"/>
90
+ <xs:attribute name="status" type="SentenceStatus" use="optional"/>
91
+ <xs:attribute name="annotated-by" type="xs:string" use="optional"/>
92
+ <xs:attribute name="annotated-at" type="xs:dateTime" use="optional"/>
93
+ <xs:attribute name="reviewed-by" type="xs:string" use="optional"/>
94
+ <xs:attribute name="reviewed-at" type="xs:dateTime" use="optional"/>
95
+ </xs:complexType>
96
+
97
+ <xs:simpleType name="SentenceStatus">
98
+ <xs:restriction base="xs:string">
99
+ <xs:enumeration value="annotated"/>
100
+ <xs:enumeration value="reviewed"/>
101
+ <xs:enumeration value="unannotated"/>
102
+ </xs:restriction>
103
+ </xs:simpleType>
104
+
105
+ <xs:complexType name="Token">
106
+ <xs:sequence>
107
+ <xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
108
+ <xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
109
+ <xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
110
+
111
+ <xs:element name="slash" minOccurs="0" maxOccurs="unbounded" type="Slash"/>
112
+ </xs:sequence>
113
+
114
+ <xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
115
+ <xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
116
+ <xs:attribute name="lemma" type="xs:string" use="optional"/>
117
+ <xs:attribute name="part-of-speech" type="xs:string" use="optional"/>
118
+ <xs:attribute name="morphology" type="xs:string" use="optional"/>
119
+ <xs:attribute name="citation-part" type="xs:string" use="optional"/>
120
+ <xs:attribute name="relation" type="xs:string" use="optional"/>
121
+ <xs:attribute name="head-id" type="xs:nonNegativeInteger" use="optional"/>
122
+ <xs:attribute name="information-status" type="xs:string" use="optional"/>
123
+ <xs:attribute name="antecedent-id" type="xs:nonNegativeInteger" use="optional"/>
124
+ <xs:attribute name="contrast-group" type="xs:string" use="optional"/>
125
+ <xs:attribute name="foreign-ids" type="xs:string" use="optional"/>
126
+
127
+ <!-- XSD does not allow us to constrain the use of these attributes properly so they are marked optional even though their occurrence depends on the value of empty-token-sort and form. -->
128
+ <xs:attribute name="empty-token-sort" type="EmptyTokenSort" use="optional"/>
129
+ <xs:attribute name="form" type="xs:string" use="optional"/>
130
+ <xs:attribute name="presentation-before" type="xs:string" use="optional"/>
131
+ <xs:attribute name="presentation-after" type="xs:string" use="optional"/>
132
+ </xs:complexType>
133
+
134
+ <xs:simpleType name="EmptyTokenSort">
135
+ <xs:restriction base="xs:string">
136
+ <xs:enumeration value="P"/>
137
+ <xs:enumeration value="C"/>
138
+ <xs:enumeration value="V"/>
139
+ </xs:restriction>
140
+ </xs:simpleType>
141
+
142
+ <xs:complexType name="Slash">
143
+ <xs:attribute name="target-id" type="xs:nonNegativeInteger" use="required"/>
144
+ <xs:attribute name="relation" type="xs:string" use="required"/>
145
+ </xs:complexType>
146
+
147
+ <!-- Shared metadata elements -->
148
+ <xs:complexType name="Tag">
149
+ <xs:attribute name="attribute" type="xs:string" use="required"/>
150
+ <xs:attribute name="value" type="xs:string" use="required"/>
151
+ <xs:attribute name="target-id" type="xs:string" use="optional"/>
152
+ <xs:attribute name="target-type" type="xs:string" use="optional"/>
153
+ </xs:complexType>
154
+
155
+ <xs:complexType name="Link">
156
+ <xs:attribute name="target" type="xs:string" use="required"/>
157
+ <xs:attribute name="type" type="xs:string" use="required"/>
158
+ </xs:complexType>
159
+
160
+ <xs:complexType name="Note">
161
+ <xs:simpleContent>
162
+ <xs:extension base="xs:string">
163
+ <xs:attribute name="originator" type="xs:string" use="required"/>
164
+ </xs:extension>
165
+ </xs:simpleContent>
166
+ </xs:complexType>
167
+
168
+ <!-- Dictionary elements -->
169
+ <xs:complexType name="DictionarySource">
170
+ <xs:attribute name="idref" type="xs:string" use="required"/>
171
+ <xs:attribute name="license" type="xs:string" use="optional"/>
172
+ <xs:attribute name="n" type="xs:nonNegativeInteger" use="optional"/>
173
+ </xs:complexType>
174
+
175
+ <xs:complexType name="DictionaryToken">
176
+ <xs:attribute name="idref" type="xs:string" use="required"/>
177
+ <xs:attribute name="flags" type="xs:string" use="optional"/>
178
+ </xs:complexType>
179
+
180
+ <xs:complexType name="DictionaryArgument">
181
+ <xs:attribute name="relation" type="xs:string" use="required"/>
182
+ <xs:attribute name="lemma" type="xs:string" use="optional"/>
183
+ <xs:attribute name="part-of-speech" type="xs:string" use="optional"/>
184
+ <xs:attribute name="mood" type="xs:string" use="optional"/>
185
+ <xs:attribute name="case" type="xs:string" use="optional"/>
186
+ </xs:complexType>
187
+
188
+ <xs:complexType name="DictionaryTokens">
189
+ <xs:sequence>
190
+ <xs:element name="token" minOccurs="0" maxOccurs="unbounded" type="DictionaryToken"/>
191
+ </xs:sequence>
192
+ </xs:complexType>
193
+
194
+ <xs:complexType name="DictionaryArguments">
195
+ <xs:sequence>
196
+ <xs:element name="argument" minOccurs="0" maxOccurs="unbounded" type="DictionaryArgument"/>
197
+ </xs:sequence>
198
+ </xs:complexType>
199
+
200
+ <xs:complexType name="DictionaryFrame">
201
+ <xs:sequence>
202
+ <xs:element name="arguments" minOccurs="1" maxOccurs="1" type="DictionaryArguments"/>
203
+ <xs:element name="tokens" minOccurs="1" maxOccurs="unbounded" type="DictionaryTokens"/>
204
+ </xs:sequence>
205
+ </xs:complexType>
206
+
207
+ <xs:complexType name="DictionaryValency">
208
+ <xs:sequence>
209
+ <xs:element name="frame" minOccurs="1" maxOccurs="unbounded" type="DictionaryFrame"/>
210
+ </xs:sequence>
211
+ </xs:complexType>
212
+
213
+ <xs:complexType name="DictionarySlot2">
214
+ <xs:attribute name="form" type="xs:string" use="required"/>
215
+ <xs:attribute name="n" type="xs:nonNegativeInteger" use="required"/>
216
+ </xs:complexType>
217
+
218
+ <xs:complexType name="DictionarySlot1">
219
+ <xs:sequence>
220
+ <xs:element name="slot2" minOccurs="1" maxOccurs="unbounded" type="DictionarySlot2"/>
221
+ </xs:sequence>
222
+
223
+ <xs:attribute name="morphology" type="xs:string" use="required"/>
224
+ </xs:complexType>
225
+
226
+ <xs:complexType name="DictionaryParadigm">
227
+ <xs:sequence>
228
+ <xs:element name="slot1" minOccurs="1" maxOccurs="unbounded" type="DictionarySlot1"/>
229
+ </xs:sequence>
230
+ </xs:complexType>
231
+
232
+ <xs:complexType name="DictionaryHomograph">
233
+ <xs:attribute name="lemma" type="xs:string" use="required"/>
234
+ <xs:attribute name="part-of-speech" type="xs:string" use="required"/>
235
+ </xs:complexType>
236
+
237
+ <xs:complexType name="DictionaryHomographs">
238
+ <xs:sequence>
239
+ <xs:element name='homograph' minOccurs="1" maxOccurs="unbounded" type='DictionaryHomograph'/>
240
+ </xs:sequence>
241
+ </xs:complexType>
242
+
243
+ <xs:complexType name="DictionaryGloss">
244
+ <xs:simpleContent>
245
+ <xs:extension base="xs:string">
246
+ <xs:attribute name="language" type="xs:string" use="required"/>
247
+ </xs:extension>
248
+ </xs:simpleContent>
249
+ </xs:complexType>
250
+
251
+ <xs:complexType name="DictionaryGlosses">
252
+ <xs:sequence>
253
+ <xs:element name='gloss' minOccurs="1" maxOccurs="unbounded" type='DictionaryGloss'/>
254
+ </xs:sequence>
255
+ </xs:complexType>
256
+
257
+ <xs:complexType name="DictionaryDistribution">
258
+ <xs:sequence>
259
+ <xs:element name='source' minOccurs="1" maxOccurs="unbounded" type='DictionarySource'/>
260
+ </xs:sequence>
261
+ </xs:complexType>
262
+
263
+ <xs:complexType name="DictionaryLemma">
264
+ <xs:sequence>
265
+ <xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
266
+ <xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
267
+ <xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
268
+
269
+ <xs:element name="distribution" minOccurs="0" maxOccurs="1" type="DictionaryDistribution"/>
270
+ <xs:element name="glosses" minOccurs="0" maxOccurs="1" type="DictionaryGlosses"/>
271
+ <xs:element name="homographs" minOccurs="0" maxOccurs="1" type="DictionaryHomographs"/>
272
+ <xs:element name="paradigm" minOccurs="0" maxOccurs="1" type="DictionaryParadigm"/>
273
+ <xs:element name="valency" minOccurs="0" maxOccurs="1" type="DictionaryValency"/>
274
+ </xs:sequence>
275
+
276
+ <xs:attribute name="lemma" type="xs:string" use="required"/>
277
+ <xs:attribute name="part-of-speech" type="xs:string" use="required"/>
278
+ </xs:complexType>
279
+
280
+ <xs:complexType name="DictionarySources">
281
+ <xs:sequence>
282
+ <xs:element name='source' minOccurs="1" maxOccurs="unbounded" type='DictionarySource'/>
283
+ </xs:sequence>
284
+ </xs:complexType>
285
+
286
+ <xs:complexType name="DictionaryLemmata">
287
+ <xs:sequence>
288
+ <xs:element name='lemma' minOccurs="1" maxOccurs="unbounded" type='DictionaryLemma'/>
289
+ </xs:sequence>
290
+ </xs:complexType>
291
+
292
+ <xs:complexType name="Dictionary">
293
+ <xs:sequence>
294
+ <xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
295
+ <xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
296
+ <xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
297
+
298
+ <xs:element name='sources' minOccurs="0" maxOccurs="1" type='DictionarySources'/>
299
+ <xs:element name='lemmata' minOccurs="0" maxOccurs="1" type='DictionaryLemmata'/>
300
+ </xs:sequence>
301
+
302
+ <xs:attribute name="language" type="xs:string" use="required"/>
303
+ <xs:attribute name="dialect" type="xs:string" use="optional"/>
304
+ </xs:complexType>
305
+
306
+ <!-- Annotation elements -->
307
+ <xs:complexType name="PartOfSpeechValue">
308
+ <xs:attribute name="tag" type="xs:string" use="required"/>
309
+ <xs:attribute name="summary" type="xs:string" use="required"/>
310
+ </xs:complexType>
311
+
312
+ <xs:complexType name="PartsOfSpeech">
313
+ <xs:sequence>
314
+ <xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='PartOfSpeechValue'/>
315
+ </xs:sequence>
316
+ </xs:complexType>
317
+
318
+ <xs:complexType name="InformationStatusValue">
319
+ <xs:attribute name="tag" type="xs:string" use="required"/>
320
+ <xs:attribute name="summary" type="xs:string" use="required"/>
321
+ </xs:complexType>
322
+
323
+ <xs:complexType name="InformationStatuses">
324
+ <xs:sequence>
325
+ <xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='InformationStatusValue'/>
326
+ </xs:sequence>
327
+ </xs:complexType>
328
+
329
+ <xs:complexType name="RelationValue">
330
+ <xs:attribute name="tag" type="xs:string" use="required"/>
331
+ <xs:attribute name="summary" type="xs:string" use="required"/>
332
+ <xs:attribute name="primary" type="xs:boolean" use="required"/>
333
+ <xs:attribute name="secondary" type="xs:boolean" use="required"/>
334
+ </xs:complexType>
335
+
336
+ <xs:complexType name="Relations">
337
+ <xs:sequence>
338
+ <xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='RelationValue'/>
339
+ </xs:sequence>
340
+ </xs:complexType>
341
+
342
+ <xs:complexType name="MorphologyValue">
343
+ <xs:attribute name="tag" type="xs:string" use="required"/>
344
+ <xs:attribute name="summary" type="xs:string" use="required"/>
345
+ </xs:complexType>
346
+
347
+ <xs:complexType name="MorphologyField">
348
+ <xs:sequence>
349
+ <xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='MorphologyValue'/>
350
+ </xs:sequence>
351
+
352
+ <xs:attribute name="tag" type="xs:string" use="required"/>
353
+ </xs:complexType>
354
+
355
+ <xs:complexType name="Morphology">
356
+ <xs:sequence>
357
+ <xs:element name='field' minOccurs="1" maxOccurs="unbounded" type='MorphologyField'/>
358
+ </xs:sequence>
359
+ </xs:complexType>
360
+
361
+ <xs:complexType name="Annotation">
362
+ <xs:sequence>
363
+ <xs:element name='relations' minOccurs="1" maxOccurs="1" type='Relations'/>
364
+ <xs:element name='parts-of-speech' minOccurs="1" maxOccurs="1" type='PartsOfSpeech'/>
365
+ <xs:element name='morphology' minOccurs="1" maxOccurs="1" type='Morphology'/>
366
+ <xs:element name='information-statuses' minOccurs="1" maxOccurs="1" type='InformationStatuses'/>
367
+ </xs:sequence>
368
+ </xs:complexType>
369
+
370
+ <!-- Top-level element -->
371
+ <xs:complexType name="Proiel">
372
+ <xs:sequence>
373
+ <xs:element name='annotation' minOccurs="0" maxOccurs="1" type='Annotation'/>
374
+ <xs:element name='source' minOccurs="0" maxOccurs="unbounded" type='Source'/>
375
+ <xs:element name='dictionary' minOccurs="0" maxOccurs="unbounded" type='Dictionary'/>
376
+ </xs:sequence>
377
+
378
+ <xs:attribute name='export-time' type="xs:dateTime" use="optional"/>
379
+ <xs:attribute name="schema-version" type="xs:decimal" use="required" fixed="3.0"/>
380
+ </xs:complexType>
381
+
382
+ <xs:element name='proiel' type='Proiel'/>
383
+ </xs:schema>
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015-2016 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2018 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -7,6 +7,95 @@ module PROIEL
7
7
  module PROIELXML
8
8
  # @api private
9
9
  module Reader
10
+ class DictionarySource
11
+ include SAXMachine
12
+
13
+ attribute :idref, required: true
14
+ attribute :license, required: false
15
+ attribute :n, required: false
16
+ end
17
+
18
+ class DictionaryGloss
19
+ include SAXMachine
20
+
21
+ attribute :language, required: true
22
+ value :gloss
23
+ end
24
+
25
+ class DictionaryHomograph
26
+ include SAXMachine
27
+
28
+ attribute :lemma, required: true
29
+ attribute :'part-of-speech', as: :part_of_speech, required: true
30
+ end
31
+
32
+ class DictionarySlot2
33
+ include SAXMachine
34
+
35
+ attribute :form, required: true
36
+ attribute :n, required: true
37
+ end
38
+
39
+ class DictionarySlot1
40
+ include SAXMachine
41
+
42
+ elements :slot2, as: :slot2s, class: DictionarySlot2
43
+
44
+ attribute :morphology, required: true
45
+ end
46
+
47
+ class DictionaryArgument
48
+ include SAXMachine
49
+
50
+ attribute :relation, required: true
51
+ attribute :lemma, required: false
52
+ attribute :'part-of-speech', as: :part_of_speech, required: false
53
+ attribute :mood, required: false
54
+ attribute :case, required: false
55
+ end
56
+
57
+ class DictionaryToken
58
+ include SAXMachine
59
+
60
+ attribute :idref, required: true
61
+ attribute :flags, required: false
62
+ end
63
+
64
+ class DictionaryFrame
65
+ include SAXMachine
66
+
67
+ # We skip the intermediate grouping elements 'arguments' and 'tokens'
68
+ elements :argument, as: :arguments, class: DictionaryArgument
69
+ elements :token, as: :tokens, class: DictionaryToken
70
+ end
71
+
72
+ class DictionaryLemma
73
+ include SAXMachine
74
+
75
+ attribute :lemma, required: true
76
+ attribute :'part-of-speech', as: :part_of_speech, required: true
77
+ attribute :n, required: false
78
+
79
+ # We skip the intermediate grouping elements 'distribution', 'glosses', 'homographs', 'paradigm' and 'valency'
80
+ elements :source, as: :distribution, class: DictionarySource
81
+ elements :gloss, as: :glosses, class: DictionaryGloss
82
+ elements :homograph, as: :homographs, class: DictionaryHomograph
83
+ elements :slot1, as: :paradigm, class: DictionarySlot1
84
+ elements :frame, as: :valency, class: DictionaryFrame
85
+ end
86
+
87
+ # Parsing class for `dictionary` elements.
88
+ class Dictionary
89
+ include SAXMachine
90
+
91
+ attribute :language, required: true
92
+ attribute :dialect, required: false
93
+
94
+ # We skip the intermediate grouping elements 'sources' and 'lemmata'
95
+ elements :source, as: :sources, class: DictionarySource
96
+ elements :lemma, as: :lemmata, class: DictionaryLemma
97
+ end
98
+
10
99
  # Parsing class for `slash` elements.
11
100
  class Slash
12
101
  include SAXMachine
@@ -15,6 +104,22 @@ module PROIEL
15
104
  attribute :relation, required: true
16
105
  end
17
106
 
107
+ # Parsing class for `semantic-tag` elements.
108
+ class SemanticTag
109
+ include SAXMachine
110
+
111
+ attribute :attribute, required: true
112
+ attribute :value, required: true
113
+ end
114
+
115
+ # Parsing class for `note` elements.
116
+ class Note
117
+ include SAXMachine
118
+
119
+ attribute :originator, required: true
120
+ value :content
121
+ end
122
+
18
123
  # Parsing class for `token` elements.
19
124
  class Token
20
125
  include SAXMachine
@@ -37,6 +142,8 @@ module PROIEL
37
142
  attribute :'foreign-ids', as: :foreign_ids
38
143
 
39
144
  elements :slash, as: :slashes, class: Slash
145
+ elements :'semantic-tag', as: :semantic_tags, class: SemanticTag
146
+ elements :note, as: :notes, class: Note
40
147
  end
41
148
 
42
149
  # Parsing class for `sentence` elements.
@@ -54,6 +161,7 @@ module PROIEL
54
161
  attribute :'presentation-after', as: :presentation_after
55
162
 
56
163
  elements :token, as: :tokens, class: Token
164
+ elements :note, as: :notes, class: Note
57
165
  end
58
166
 
59
167
  # Parsing class for `div` elements.
@@ -67,6 +175,7 @@ module PROIEL
67
175
 
68
176
  element :title
69
177
  elements :sentence, as: :sentences, class: Sentence
178
+ elements :note, as: :notes, class: Note
70
179
  end
71
180
 
72
181
  # Parsing class for `source` elements.
@@ -74,10 +183,12 @@ module PROIEL
74
183
  include SAXMachine
75
184
 
76
185
  attribute :id, required: true
77
- attribute :'alignment-id', as: :alignment_id, required: false
186
+ attribute :'alignment-id', as: :alignment_id, class: String, required: false
78
187
  attribute :language, required: true
188
+ attribute :dialect, required: false
79
189
 
80
190
  element :title
191
+ element :alternative_title
81
192
  element :author
82
193
  element :citation_part
83
194
  element :principal
@@ -107,7 +218,11 @@ module PROIEL
107
218
  element :printed_text_publisher
108
219
  element :printed_text_place
109
220
  element :printed_text_date
221
+ element :chronology_composition
222
+ element :chronology_manuscript
223
+
110
224
  elements :div, as: :divs, class: Div
225
+ elements :note, as: :notes, class: Note
111
226
  end
112
227
 
113
228
  # Parsing class for `relations/value` elements.
@@ -174,6 +289,25 @@ module PROIEL
174
289
  attribute :summary, required: true
175
290
  end
176
291
 
292
+ # Parsing class for `lemma` elements.
293
+ class Lemma
294
+ include SAXMachine
295
+
296
+ attribute :form, required: true
297
+ attribute :'part-of-speech', as: :part_of_speech, required: true
298
+ attribute :gloss, required: false
299
+
300
+ elements :'semantic-tag', as: :semantic_tags, class: SemanticTag
301
+ elements :note, as: :notes, class: Note
302
+ end
303
+
304
+ # Parsing class for `dictionary` elements.
305
+ class Dictionary
306
+ include SAXMachine
307
+
308
+ elements :lemma, as: :lemmas, class: Lemma
309
+ end
310
+
177
311
  # Parsing class for `information_statuses` elements.
178
312
  class InformationStatuses
179
313
  include SAXMachine
@@ -189,6 +323,7 @@ module PROIEL
189
323
  element :parts_of_speech, as: :parts_of_speech, class: PartsOfSpeech
190
324
  element :morphology, class: Morphology
191
325
  element :information_statuses, as: :information_statuses, class: InformationStatuses
326
+ element :dictionary, as: :dictionary, class: Dictionary
192
327
  end
193
328
 
194
329
  # Parsing class for `proiel` elements.
@@ -199,6 +334,7 @@ module PROIEL
199
334
  attribute :'schema-version', as: :schema_version, required: true
200
335
 
201
336
  elements :source, as: :sources, class: Source
337
+ elements :dictionary, as: :dictionaries, class: Dictionary
202
338
  element :annotation, class: Annotation
203
339
  end
204
340