proiel 1.2.1 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/proiel.rb +8 -2
- data/lib/proiel/alignment.rb +3 -0
- data/lib/proiel/alignment/builder.rb +220 -0
- data/lib/proiel/annotation_schema.rb +11 -4
- data/lib/proiel/dictionary.rb +78 -2
- data/lib/proiel/dictionary/builder.rb +60 -36
- data/lib/proiel/div.rb +5 -2
- data/lib/proiel/language.rb +108 -0
- data/lib/proiel/lemma.rb +78 -0
- data/lib/proiel/proiel_xml/proiel-3.0/proiel-3.0.xsd +383 -0
- data/lib/proiel/proiel_xml/reader.rb +138 -2
- data/lib/proiel/proiel_xml/schema.rb +4 -2
- data/lib/proiel/sentence.rb +5 -2
- data/lib/proiel/source.rb +10 -3
- data/lib/proiel/treebank.rb +21 -4
- data/lib/proiel/version.rb +1 -1
- data/lib/proiel/visualization/graphviz.rb +9 -5
- data/lib/proiel/visualization/graphviz/aligned-modern.dot.erb +83 -0
- data/lib/proiel/visualization/graphviz/classic.dot.erb +2 -1
- data/lib/proiel/visualization/graphviz/linearized.dot.erb +7 -4
- data/lib/proiel/visualization/graphviz/modern.dot.erb +39 -0
- data/lib/proiel/visualization/graphviz/packed.dot.erb +5 -3
- metadata +22 -16
data/lib/proiel/div.rb
CHANGED
@@ -89,10 +89,13 @@ module PROIEL
|
|
89
89
|
# Returns the printable form of the div with all token forms and any
|
90
90
|
# presentation data.
|
91
91
|
#
|
92
|
+
# @param custom_token_formatter [Lambda] formatting function for tokens
|
93
|
+
# which is passed the token as its sole argument
|
94
|
+
#
|
92
95
|
# @return [String] the printable form of the div
|
93
|
-
def printable_form(
|
96
|
+
def printable_form(custom_token_formatter: nil)
|
94
97
|
[presentation_before,
|
95
|
-
@children.map { |s| s.printable_form(
|
98
|
+
@children.map { |s| s.printable_form(custom_token_formatter: custom_token_formatter) },
|
96
99
|
presentation_after].compact.join
|
97
100
|
end
|
98
101
|
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2019 Marius L. Jøhndal
|
3
|
+
#
|
4
|
+
# See LICENSE in the top-level source directory for licensing terms.
|
5
|
+
#++
|
6
|
+
module PROIEL
|
7
|
+
module Language
|
8
|
+
SUPPORTED_LANGUAGES = {
|
9
|
+
# This is a subset of language codes from ISO 639-3 and Glottolog.
|
10
|
+
ang: 'Old English (ca. 450-1100)',
|
11
|
+
ave: 'Avestan',
|
12
|
+
axm: 'Middle Armenian',
|
13
|
+
chu: 'Church Slavic',
|
14
|
+
cms: 'Messapic',
|
15
|
+
cnx: 'Middle Cornish',
|
16
|
+
dum: 'Middle Dutch',
|
17
|
+
enm: 'Middle English',
|
18
|
+
frk: 'Old Frankish',
|
19
|
+
frm: 'Middle French',
|
20
|
+
fro: 'Old French (842-ca. 1400)',
|
21
|
+
ghc: 'Hiberno-Scottish Gaelic',
|
22
|
+
gmh: 'Middle High German',
|
23
|
+
gml: 'Middle Low German',
|
24
|
+
gmy: 'Mycenaean Greek',
|
25
|
+
goh: 'Old High German (ca. 750-1050)',
|
26
|
+
got: 'Gothic',
|
27
|
+
grc: 'Ancient Greek (to 1453)',
|
28
|
+
hit: 'Hittite',
|
29
|
+
hlu: 'Hieroglyphic Luwian',
|
30
|
+
htx: 'Middle Hittite',
|
31
|
+
lat: 'Latin',
|
32
|
+
lng: 'Langobardic',
|
33
|
+
mga: 'Middle Irish (10-12th century)',
|
34
|
+
non: 'Old Norse',
|
35
|
+
nrp: 'North Picene',
|
36
|
+
obt: 'Old Breton',
|
37
|
+
oco: 'Old Cornish',
|
38
|
+
odt: 'Old Dutch-Old Frankish',
|
39
|
+
ofs: 'Old Frisian',
|
40
|
+
oht: 'Old Hittite',
|
41
|
+
olt: 'Old Lithuanian',
|
42
|
+
orv: 'Old Russian',
|
43
|
+
osc: 'Oscan',
|
44
|
+
osp: 'Old Spanish',
|
45
|
+
osx: 'Old Saxon',
|
46
|
+
owl: 'Old-Middle Welsh',
|
47
|
+
peo: 'Old Persian (ca. 600-400 B.C.)',
|
48
|
+
pka: 'Ardhamāgadhī Prākrit',
|
49
|
+
pmh: 'Maharastri Prakrit',
|
50
|
+
por: 'Portuguese',
|
51
|
+
pro: 'Old Provençal',
|
52
|
+
psu: 'Sauraseni Prakrit',
|
53
|
+
rus: 'Russian',
|
54
|
+
san: 'Sanskrit',
|
55
|
+
sga: 'Early Irish',
|
56
|
+
sog: 'Sogdian',
|
57
|
+
spa: 'Spanish',
|
58
|
+
spx: 'South Picene',
|
59
|
+
txb: 'Tokharian B',
|
60
|
+
txh: 'Thracian',
|
61
|
+
wlm: 'Middle Welsh',
|
62
|
+
xbm: 'Middle Breton',
|
63
|
+
xcb: 'Cumbric',
|
64
|
+
xce: 'Celtiberian',
|
65
|
+
xcg: 'Cisalpine Gaulish',
|
66
|
+
xcl: 'Classical Armenian',
|
67
|
+
xum: 'Umbrian',
|
68
|
+
xve: 'Venetic',
|
69
|
+
}.freeze
|
70
|
+
|
71
|
+
# Checks if a language is supported.
|
72
|
+
#
|
73
|
+
# @param language_tag [String, Symbol] language tag of language to check
|
74
|
+
#
|
75
|
+
# @return [Boolean]
|
76
|
+
#
|
77
|
+
# @example
|
78
|
+
# language_supported?(:lat) # => true
|
79
|
+
# language_supported?('grc') # => true
|
80
|
+
def self.language_supported?(language_tag)
|
81
|
+
raise ArgumentError unless language_tag.is_a?(Symbol) or language_tag.is_a?(String)
|
82
|
+
|
83
|
+
SUPPORTED_LANGUAGES.key?(language_tag.to_sym)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Returns the display name for a language.
|
87
|
+
#
|
88
|
+
# @param language_tag [String, Symbol] language tag of language
|
89
|
+
#
|
90
|
+
# @return [String]
|
91
|
+
#
|
92
|
+
# @example
|
93
|
+
# get_display_name(:lat) # => "Latin"
|
94
|
+
def self.get_display_name(language_tag)
|
95
|
+
raise ArgumentError unless language_tag.is_a?(Symbol) or language_tag.is_a?(String)
|
96
|
+
raise ArgumentError, 'unsupported language' unless language_supported?(language_tag)
|
97
|
+
|
98
|
+
SUPPORTED_LANGUAGES[language_tag.to_sym]
|
99
|
+
end
|
100
|
+
|
101
|
+
# Returns tag of all supported languages
|
102
|
+
#
|
103
|
+
# @return [Array<Symbol>]
|
104
|
+
def self.supported_language_tags
|
105
|
+
SUPPORTED_LANGUAGES.keys
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
data/lib/proiel/lemma.rb
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2018 Marius L. Jøhndal
|
3
|
+
#
|
4
|
+
# See LICENSE in the top-level source directory for licensing terms.
|
5
|
+
#++
|
6
|
+
module PROIEL
|
7
|
+
class Lemma < TreebankObject
|
8
|
+
# @return [Dictionary] source that the lemma belongs to
|
9
|
+
attr_reader :dictionary
|
10
|
+
|
11
|
+
attr_reader :n
|
12
|
+
|
13
|
+
# @return [Hash{String, Integer}] distribution of lemmata in sources. The
|
14
|
+
# keys are IDs of sources, the values give the frequency of the lemma per
|
15
|
+
# source.
|
16
|
+
attr_reader :distribution
|
17
|
+
|
18
|
+
# @return [Array<[String, String]> identified homographs of this lemma. The
|
19
|
+
# array contains pairs of lemma form (which will be homographs of this
|
20
|
+
# lemma form under the orthographic conventions of the language) and parts
|
21
|
+
# of speech.
|
22
|
+
attr_reader :homographs
|
23
|
+
|
24
|
+
# @return [Hash{Symbol, String}] glosses for the current lemma. The keys
|
25
|
+
# are language tags and the values the glosses.
|
26
|
+
attr_reader :glosses
|
27
|
+
attr_reader :paradigm
|
28
|
+
attr_reader :valency
|
29
|
+
|
30
|
+
# Creates a new lemma object.
|
31
|
+
def initialize(parent, xml = nil)
|
32
|
+
@dictionary = parent
|
33
|
+
|
34
|
+
@n = nil
|
35
|
+
|
36
|
+
@distribution = {}
|
37
|
+
@homographs = []
|
38
|
+
@glosses = {}
|
39
|
+
@paradigm = {}
|
40
|
+
@valency = []
|
41
|
+
|
42
|
+
from_xml(xml) if xml
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def from_xml(xml)
|
48
|
+
@n = nullify(xml.n, :int)
|
49
|
+
|
50
|
+
@distribution = xml.distribution.map { |h| [h.idref, nullify(h.n, :int)] }.to_h
|
51
|
+
@glosses = xml.glosses.map { |h| [h.language.to_sym, h.gloss] }.to_h
|
52
|
+
@homographs = xml.homographs.map { |h| [h.lemma, h.part_of_speech] }
|
53
|
+
@paradigm = xml.paradigm.map { |slot1| [slot1.morphology, slot1.slot2s.map { |slot2| [slot2.form, nullify(slot2.n, :int)] }.to_h] }.to_h
|
54
|
+
@valency =
|
55
|
+
xml.valency.map do |frame|
|
56
|
+
{
|
57
|
+
arguments: frame.arguments.map { |a| { relation: a.relation, lemma: a.lemma, part_of_speech: a.part_of_speech, mood: a.mood, case: a.case } },
|
58
|
+
tokens: frame.tokens.map { |t| { flags: t.flags, idref: t.idref } },
|
59
|
+
}
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def nullify(s, type = nil)
|
64
|
+
case s
|
65
|
+
when NilClass, /^\s*$/
|
66
|
+
nil
|
67
|
+
else
|
68
|
+
case type
|
69
|
+
when :int
|
70
|
+
s.to_i
|
71
|
+
else
|
72
|
+
s.to_s
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
@@ -0,0 +1,383 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
|
3
|
+
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
|
4
|
+
<xs:annotation>
|
5
|
+
<xs:documentation>PROIEL XML format version 3.0</xs:documentation>
|
6
|
+
</xs:annotation>
|
7
|
+
|
8
|
+
<!-- Source elements -->
|
9
|
+
<xs:complexType name="Source">
|
10
|
+
<xs:sequence>
|
11
|
+
<xs:element name="title" minOccurs="1" maxOccurs="1" type="xs:string"/>
|
12
|
+
<xs:element name="alternative-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
13
|
+
<xs:element name="author" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
14
|
+
<xs:element name="citation-part" minOccurs="1" maxOccurs="1" type="xs:string"/>
|
15
|
+
<xs:element name="principal" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
16
|
+
<xs:element name="funder" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
17
|
+
<xs:element name="distributor" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
18
|
+
<xs:element name="distributor-address" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
19
|
+
<xs:element name="address" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
20
|
+
<xs:element name="date" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
21
|
+
<xs:element name="license" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
22
|
+
<xs:element name="license-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
23
|
+
<xs:element name="reference-system" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
24
|
+
<xs:element name="editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
25
|
+
<xs:element name="editorial-note" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
26
|
+
<xs:element name="annotator" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
27
|
+
<xs:element name="reviewer" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
28
|
+
<xs:element name="electronic-text-editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
29
|
+
<xs:element name="electronic-text-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
30
|
+
<xs:element name="electronic-text-version" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
31
|
+
<xs:element name="electronic-text-publisher" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
32
|
+
<xs:element name="electronic-text-place" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
33
|
+
<xs:element name="electronic-text-date" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
34
|
+
<xs:element name="electronic-text-original-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
35
|
+
<xs:element name="electronic-text-license" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
36
|
+
<xs:element name="electronic-text-license-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
37
|
+
<xs:element name="printed-text-editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
38
|
+
<xs:element name="printed-text-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
39
|
+
<xs:element name="printed-text-edition" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
40
|
+
<xs:element name="printed-text-publisher" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
41
|
+
<xs:element name="printed-text-place" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
42
|
+
<xs:element name="printed-text-date" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
43
|
+
<xs:element name="chronology-composition" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
44
|
+
<xs:element name="chronology-manuscript" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
45
|
+
|
46
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
47
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
48
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
49
|
+
|
50
|
+
<xs:element name="div" minOccurs="1" maxOccurs="unbounded" type="Div"/>
|
51
|
+
|
52
|
+
</xs:sequence>
|
53
|
+
|
54
|
+
<xs:attribute name="id" type="xs:string" use="required"/>
|
55
|
+
<xs:attribute name="alignment-id" type="xs:string" use="optional"/>
|
56
|
+
<xs:attribute name="language" type="xs:string" use="required"/>
|
57
|
+
<xs:attribute name="dialect" type="xs:string" use="optional"/>
|
58
|
+
</xs:complexType>
|
59
|
+
|
60
|
+
<xs:complexType name="Div">
|
61
|
+
<xs:sequence>
|
62
|
+
<xs:element name="title" minOccurs="1" maxOccurs="1" type="xs:string"/>
|
63
|
+
|
64
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
65
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
66
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
67
|
+
|
68
|
+
<xs:element name="sentence" minOccurs="1" maxOccurs="unbounded" type="Sentence"/>
|
69
|
+
</xs:sequence>
|
70
|
+
|
71
|
+
<xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
|
72
|
+
<xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
|
73
|
+
<xs:attribute name="presentation-before" type="xs:string" use="optional"/>
|
74
|
+
<xs:attribute name="presentation-after" type="xs:string" use="optional"/>
|
75
|
+
</xs:complexType>
|
76
|
+
|
77
|
+
<xs:complexType name="Sentence">
|
78
|
+
<xs:sequence>
|
79
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
80
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
81
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
82
|
+
|
83
|
+
<xs:element name="token" minOccurs="1" maxOccurs="unbounded" type="Token"/>
|
84
|
+
</xs:sequence>
|
85
|
+
|
86
|
+
<xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
|
87
|
+
<xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
|
88
|
+
<xs:attribute name="presentation-before" type="xs:string" use="optional"/>
|
89
|
+
<xs:attribute name="presentation-after" type="xs:string" use="optional"/>
|
90
|
+
<xs:attribute name="status" type="SentenceStatus" use="optional"/>
|
91
|
+
<xs:attribute name="annotated-by" type="xs:string" use="optional"/>
|
92
|
+
<xs:attribute name="annotated-at" type="xs:dateTime" use="optional"/>
|
93
|
+
<xs:attribute name="reviewed-by" type="xs:string" use="optional"/>
|
94
|
+
<xs:attribute name="reviewed-at" type="xs:dateTime" use="optional"/>
|
95
|
+
</xs:complexType>
|
96
|
+
|
97
|
+
<xs:simpleType name="SentenceStatus">
|
98
|
+
<xs:restriction base="xs:string">
|
99
|
+
<xs:enumeration value="annotated"/>
|
100
|
+
<xs:enumeration value="reviewed"/>
|
101
|
+
<xs:enumeration value="unannotated"/>
|
102
|
+
</xs:restriction>
|
103
|
+
</xs:simpleType>
|
104
|
+
|
105
|
+
<xs:complexType name="Token">
|
106
|
+
<xs:sequence>
|
107
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
108
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
109
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
110
|
+
|
111
|
+
<xs:element name="slash" minOccurs="0" maxOccurs="unbounded" type="Slash"/>
|
112
|
+
</xs:sequence>
|
113
|
+
|
114
|
+
<xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
|
115
|
+
<xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
|
116
|
+
<xs:attribute name="lemma" type="xs:string" use="optional"/>
|
117
|
+
<xs:attribute name="part-of-speech" type="xs:string" use="optional"/>
|
118
|
+
<xs:attribute name="morphology" type="xs:string" use="optional"/>
|
119
|
+
<xs:attribute name="citation-part" type="xs:string" use="optional"/>
|
120
|
+
<xs:attribute name="relation" type="xs:string" use="optional"/>
|
121
|
+
<xs:attribute name="head-id" type="xs:nonNegativeInteger" use="optional"/>
|
122
|
+
<xs:attribute name="information-status" type="xs:string" use="optional"/>
|
123
|
+
<xs:attribute name="antecedent-id" type="xs:nonNegativeInteger" use="optional"/>
|
124
|
+
<xs:attribute name="contrast-group" type="xs:string" use="optional"/>
|
125
|
+
<xs:attribute name="foreign-ids" type="xs:string" use="optional"/>
|
126
|
+
|
127
|
+
<!-- XSD does not allow us to constrain the use of these attributes properly so they are marked optional even though their occurrence depends on the value of empty-token-sort and form. -->
|
128
|
+
<xs:attribute name="empty-token-sort" type="EmptyTokenSort" use="optional"/>
|
129
|
+
<xs:attribute name="form" type="xs:string" use="optional"/>
|
130
|
+
<xs:attribute name="presentation-before" type="xs:string" use="optional"/>
|
131
|
+
<xs:attribute name="presentation-after" type="xs:string" use="optional"/>
|
132
|
+
</xs:complexType>
|
133
|
+
|
134
|
+
<xs:simpleType name="EmptyTokenSort">
|
135
|
+
<xs:restriction base="xs:string">
|
136
|
+
<xs:enumeration value="P"/>
|
137
|
+
<xs:enumeration value="C"/>
|
138
|
+
<xs:enumeration value="V"/>
|
139
|
+
</xs:restriction>
|
140
|
+
</xs:simpleType>
|
141
|
+
|
142
|
+
<xs:complexType name="Slash">
|
143
|
+
<xs:attribute name="target-id" type="xs:nonNegativeInteger" use="required"/>
|
144
|
+
<xs:attribute name="relation" type="xs:string" use="required"/>
|
145
|
+
</xs:complexType>
|
146
|
+
|
147
|
+
<!-- Shared metadata elements -->
|
148
|
+
<xs:complexType name="Tag">
|
149
|
+
<xs:attribute name="attribute" type="xs:string" use="required"/>
|
150
|
+
<xs:attribute name="value" type="xs:string" use="required"/>
|
151
|
+
<xs:attribute name="target-id" type="xs:string" use="optional"/>
|
152
|
+
<xs:attribute name="target-type" type="xs:string" use="optional"/>
|
153
|
+
</xs:complexType>
|
154
|
+
|
155
|
+
<xs:complexType name="Link">
|
156
|
+
<xs:attribute name="target" type="xs:string" use="required"/>
|
157
|
+
<xs:attribute name="type" type="xs:string" use="required"/>
|
158
|
+
</xs:complexType>
|
159
|
+
|
160
|
+
<xs:complexType name="Note">
|
161
|
+
<xs:simpleContent>
|
162
|
+
<xs:extension base="xs:string">
|
163
|
+
<xs:attribute name="originator" type="xs:string" use="required"/>
|
164
|
+
</xs:extension>
|
165
|
+
</xs:simpleContent>
|
166
|
+
</xs:complexType>
|
167
|
+
|
168
|
+
<!-- Dictionary elements -->
|
169
|
+
<xs:complexType name="DictionarySource">
|
170
|
+
<xs:attribute name="idref" type="xs:string" use="required"/>
|
171
|
+
<xs:attribute name="license" type="xs:string" use="optional"/>
|
172
|
+
<xs:attribute name="n" type="xs:nonNegativeInteger" use="optional"/>
|
173
|
+
</xs:complexType>
|
174
|
+
|
175
|
+
<xs:complexType name="DictionaryToken">
|
176
|
+
<xs:attribute name="idref" type="xs:string" use="required"/>
|
177
|
+
<xs:attribute name="flags" type="xs:string" use="optional"/>
|
178
|
+
</xs:complexType>
|
179
|
+
|
180
|
+
<xs:complexType name="DictionaryArgument">
|
181
|
+
<xs:attribute name="relation" type="xs:string" use="required"/>
|
182
|
+
<xs:attribute name="lemma" type="xs:string" use="optional"/>
|
183
|
+
<xs:attribute name="part-of-speech" type="xs:string" use="optional"/>
|
184
|
+
<xs:attribute name="mood" type="xs:string" use="optional"/>
|
185
|
+
<xs:attribute name="case" type="xs:string" use="optional"/>
|
186
|
+
</xs:complexType>
|
187
|
+
|
188
|
+
<xs:complexType name="DictionaryTokens">
|
189
|
+
<xs:sequence>
|
190
|
+
<xs:element name="token" minOccurs="0" maxOccurs="unbounded" type="DictionaryToken"/>
|
191
|
+
</xs:sequence>
|
192
|
+
</xs:complexType>
|
193
|
+
|
194
|
+
<xs:complexType name="DictionaryArguments">
|
195
|
+
<xs:sequence>
|
196
|
+
<xs:element name="argument" minOccurs="0" maxOccurs="unbounded" type="DictionaryArgument"/>
|
197
|
+
</xs:sequence>
|
198
|
+
</xs:complexType>
|
199
|
+
|
200
|
+
<xs:complexType name="DictionaryFrame">
|
201
|
+
<xs:sequence>
|
202
|
+
<xs:element name="arguments" minOccurs="1" maxOccurs="1" type="DictionaryArguments"/>
|
203
|
+
<xs:element name="tokens" minOccurs="1" maxOccurs="unbounded" type="DictionaryTokens"/>
|
204
|
+
</xs:sequence>
|
205
|
+
</xs:complexType>
|
206
|
+
|
207
|
+
<xs:complexType name="DictionaryValency">
|
208
|
+
<xs:sequence>
|
209
|
+
<xs:element name="frame" minOccurs="1" maxOccurs="unbounded" type="DictionaryFrame"/>
|
210
|
+
</xs:sequence>
|
211
|
+
</xs:complexType>
|
212
|
+
|
213
|
+
<xs:complexType name="DictionarySlot2">
|
214
|
+
<xs:attribute name="form" type="xs:string" use="required"/>
|
215
|
+
<xs:attribute name="n" type="xs:nonNegativeInteger" use="required"/>
|
216
|
+
</xs:complexType>
|
217
|
+
|
218
|
+
<xs:complexType name="DictionarySlot1">
|
219
|
+
<xs:sequence>
|
220
|
+
<xs:element name="slot2" minOccurs="1" maxOccurs="unbounded" type="DictionarySlot2"/>
|
221
|
+
</xs:sequence>
|
222
|
+
|
223
|
+
<xs:attribute name="morphology" type="xs:string" use="required"/>
|
224
|
+
</xs:complexType>
|
225
|
+
|
226
|
+
<xs:complexType name="DictionaryParadigm">
|
227
|
+
<xs:sequence>
|
228
|
+
<xs:element name="slot1" minOccurs="1" maxOccurs="unbounded" type="DictionarySlot1"/>
|
229
|
+
</xs:sequence>
|
230
|
+
</xs:complexType>
|
231
|
+
|
232
|
+
<xs:complexType name="DictionaryHomograph">
|
233
|
+
<xs:attribute name="lemma" type="xs:string" use="required"/>
|
234
|
+
<xs:attribute name="part-of-speech" type="xs:string" use="required"/>
|
235
|
+
</xs:complexType>
|
236
|
+
|
237
|
+
<xs:complexType name="DictionaryHomographs">
|
238
|
+
<xs:sequence>
|
239
|
+
<xs:element name='homograph' minOccurs="1" maxOccurs="unbounded" type='DictionaryHomograph'/>
|
240
|
+
</xs:sequence>
|
241
|
+
</xs:complexType>
|
242
|
+
|
243
|
+
<xs:complexType name="DictionaryGloss">
|
244
|
+
<xs:simpleContent>
|
245
|
+
<xs:extension base="xs:string">
|
246
|
+
<xs:attribute name="language" type="xs:string" use="required"/>
|
247
|
+
</xs:extension>
|
248
|
+
</xs:simpleContent>
|
249
|
+
</xs:complexType>
|
250
|
+
|
251
|
+
<xs:complexType name="DictionaryGlosses">
|
252
|
+
<xs:sequence>
|
253
|
+
<xs:element name='gloss' minOccurs="1" maxOccurs="unbounded" type='DictionaryGloss'/>
|
254
|
+
</xs:sequence>
|
255
|
+
</xs:complexType>
|
256
|
+
|
257
|
+
<xs:complexType name="DictionaryDistribution">
|
258
|
+
<xs:sequence>
|
259
|
+
<xs:element name='source' minOccurs="1" maxOccurs="unbounded" type='DictionarySource'/>
|
260
|
+
</xs:sequence>
|
261
|
+
</xs:complexType>
|
262
|
+
|
263
|
+
<xs:complexType name="DictionaryLemma">
|
264
|
+
<xs:sequence>
|
265
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
266
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
267
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
268
|
+
|
269
|
+
<xs:element name="distribution" minOccurs="0" maxOccurs="1" type="DictionaryDistribution"/>
|
270
|
+
<xs:element name="glosses" minOccurs="0" maxOccurs="1" type="DictionaryGlosses"/>
|
271
|
+
<xs:element name="homographs" minOccurs="0" maxOccurs="1" type="DictionaryHomographs"/>
|
272
|
+
<xs:element name="paradigm" minOccurs="0" maxOccurs="1" type="DictionaryParadigm"/>
|
273
|
+
<xs:element name="valency" minOccurs="0" maxOccurs="1" type="DictionaryValency"/>
|
274
|
+
</xs:sequence>
|
275
|
+
|
276
|
+
<xs:attribute name="lemma" type="xs:string" use="required"/>
|
277
|
+
<xs:attribute name="part-of-speech" type="xs:string" use="required"/>
|
278
|
+
</xs:complexType>
|
279
|
+
|
280
|
+
<xs:complexType name="DictionarySources">
|
281
|
+
<xs:sequence>
|
282
|
+
<xs:element name='source' minOccurs="1" maxOccurs="unbounded" type='DictionarySource'/>
|
283
|
+
</xs:sequence>
|
284
|
+
</xs:complexType>
|
285
|
+
|
286
|
+
<xs:complexType name="DictionaryLemmata">
|
287
|
+
<xs:sequence>
|
288
|
+
<xs:element name='lemma' minOccurs="1" maxOccurs="unbounded" type='DictionaryLemma'/>
|
289
|
+
</xs:sequence>
|
290
|
+
</xs:complexType>
|
291
|
+
|
292
|
+
<xs:complexType name="Dictionary">
|
293
|
+
<xs:sequence>
|
294
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
295
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
296
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
297
|
+
|
298
|
+
<xs:element name='sources' minOccurs="0" maxOccurs="1" type='DictionarySources'/>
|
299
|
+
<xs:element name='lemmata' minOccurs="0" maxOccurs="1" type='DictionaryLemmata'/>
|
300
|
+
</xs:sequence>
|
301
|
+
|
302
|
+
<xs:attribute name="language" type="xs:string" use="required"/>
|
303
|
+
<xs:attribute name="dialect" type="xs:string" use="optional"/>
|
304
|
+
</xs:complexType>
|
305
|
+
|
306
|
+
<!-- Annotation elements -->
|
307
|
+
<xs:complexType name="PartOfSpeechValue">
|
308
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
309
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
310
|
+
</xs:complexType>
|
311
|
+
|
312
|
+
<xs:complexType name="PartsOfSpeech">
|
313
|
+
<xs:sequence>
|
314
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='PartOfSpeechValue'/>
|
315
|
+
</xs:sequence>
|
316
|
+
</xs:complexType>
|
317
|
+
|
318
|
+
<xs:complexType name="InformationStatusValue">
|
319
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
320
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
321
|
+
</xs:complexType>
|
322
|
+
|
323
|
+
<xs:complexType name="InformationStatuses">
|
324
|
+
<xs:sequence>
|
325
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='InformationStatusValue'/>
|
326
|
+
</xs:sequence>
|
327
|
+
</xs:complexType>
|
328
|
+
|
329
|
+
<xs:complexType name="RelationValue">
|
330
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
331
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
332
|
+
<xs:attribute name="primary" type="xs:boolean" use="required"/>
|
333
|
+
<xs:attribute name="secondary" type="xs:boolean" use="required"/>
|
334
|
+
</xs:complexType>
|
335
|
+
|
336
|
+
<xs:complexType name="Relations">
|
337
|
+
<xs:sequence>
|
338
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='RelationValue'/>
|
339
|
+
</xs:sequence>
|
340
|
+
</xs:complexType>
|
341
|
+
|
342
|
+
<xs:complexType name="MorphologyValue">
|
343
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
344
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
345
|
+
</xs:complexType>
|
346
|
+
|
347
|
+
<xs:complexType name="MorphologyField">
|
348
|
+
<xs:sequence>
|
349
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='MorphologyValue'/>
|
350
|
+
</xs:sequence>
|
351
|
+
|
352
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
353
|
+
</xs:complexType>
|
354
|
+
|
355
|
+
<xs:complexType name="Morphology">
|
356
|
+
<xs:sequence>
|
357
|
+
<xs:element name='field' minOccurs="1" maxOccurs="unbounded" type='MorphologyField'/>
|
358
|
+
</xs:sequence>
|
359
|
+
</xs:complexType>
|
360
|
+
|
361
|
+
<xs:complexType name="Annotation">
|
362
|
+
<xs:sequence>
|
363
|
+
<xs:element name='relations' minOccurs="1" maxOccurs="1" type='Relations'/>
|
364
|
+
<xs:element name='parts-of-speech' minOccurs="1" maxOccurs="1" type='PartsOfSpeech'/>
|
365
|
+
<xs:element name='morphology' minOccurs="1" maxOccurs="1" type='Morphology'/>
|
366
|
+
<xs:element name='information-statuses' minOccurs="1" maxOccurs="1" type='InformationStatuses'/>
|
367
|
+
</xs:sequence>
|
368
|
+
</xs:complexType>
|
369
|
+
|
370
|
+
<!-- Top-level element -->
|
371
|
+
<xs:complexType name="Proiel">
|
372
|
+
<xs:sequence>
|
373
|
+
<xs:element name='annotation' minOccurs="0" maxOccurs="1" type='Annotation'/>
|
374
|
+
<xs:element name='source' minOccurs="0" maxOccurs="unbounded" type='Source'/>
|
375
|
+
<xs:element name='dictionary' minOccurs="0" maxOccurs="unbounded" type='Dictionary'/>
|
376
|
+
</xs:sequence>
|
377
|
+
|
378
|
+
<xs:attribute name='export-time' type="xs:dateTime" use="optional"/>
|
379
|
+
<xs:attribute name="schema-version" type="xs:decimal" use="required" fixed="3.0"/>
|
380
|
+
</xs:complexType>
|
381
|
+
|
382
|
+
<xs:element name='proiel' type='Proiel'/>
|
383
|
+
</xs:schema>
|