proiel 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/proiel.rb +8 -2
- data/lib/proiel/alignment.rb +3 -0
- data/lib/proiel/alignment/builder.rb +220 -0
- data/lib/proiel/annotation_schema.rb +11 -4
- data/lib/proiel/dictionary.rb +78 -2
- data/lib/proiel/dictionary/builder.rb +60 -36
- data/lib/proiel/div.rb +5 -2
- data/lib/proiel/language.rb +108 -0
- data/lib/proiel/lemma.rb +78 -0
- data/lib/proiel/proiel_xml/proiel-3.0/proiel-3.0.xsd +383 -0
- data/lib/proiel/proiel_xml/reader.rb +138 -2
- data/lib/proiel/proiel_xml/schema.rb +4 -2
- data/lib/proiel/sentence.rb +5 -2
- data/lib/proiel/source.rb +10 -3
- data/lib/proiel/treebank.rb +21 -4
- data/lib/proiel/version.rb +1 -1
- data/lib/proiel/visualization/graphviz.rb +9 -5
- data/lib/proiel/visualization/graphviz/aligned-modern.dot.erb +83 -0
- data/lib/proiel/visualization/graphviz/classic.dot.erb +2 -1
- data/lib/proiel/visualization/graphviz/linearized.dot.erb +7 -4
- data/lib/proiel/visualization/graphviz/modern.dot.erb +39 -0
- data/lib/proiel/visualization/graphviz/packed.dot.erb +5 -3
- metadata +22 -16
data/lib/proiel/div.rb
CHANGED
@@ -89,10 +89,13 @@ module PROIEL
|
|
89
89
|
# Returns the printable form of the div with all token forms and any
|
90
90
|
# presentation data.
|
91
91
|
#
|
92
|
+
# @param custom_token_formatter [Lambda] formatting function for tokens
|
93
|
+
# which is passed the token as its sole argument
|
94
|
+
#
|
92
95
|
# @return [String] the printable form of the div
|
93
|
-
def printable_form(
|
96
|
+
def printable_form(custom_token_formatter: nil)
|
94
97
|
[presentation_before,
|
95
|
-
@children.map { |s| s.printable_form(
|
98
|
+
@children.map { |s| s.printable_form(custom_token_formatter: custom_token_formatter) },
|
96
99
|
presentation_after].compact.join
|
97
100
|
end
|
98
101
|
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2019 Marius L. Jøhndal
|
3
|
+
#
|
4
|
+
# See LICENSE in the top-level source directory for licensing terms.
|
5
|
+
#++
|
6
|
+
module PROIEL
|
7
|
+
module Language
|
8
|
+
SUPPORTED_LANGUAGES = {
|
9
|
+
# This is a subset of language codes from ISO 639-3 and Glottolog.
|
10
|
+
ang: 'Old English (ca. 450-1100)',
|
11
|
+
ave: 'Avestan',
|
12
|
+
axm: 'Middle Armenian',
|
13
|
+
chu: 'Church Slavic',
|
14
|
+
cms: 'Messapic',
|
15
|
+
cnx: 'Middle Cornish',
|
16
|
+
dum: 'Middle Dutch',
|
17
|
+
enm: 'Middle English',
|
18
|
+
frk: 'Old Frankish',
|
19
|
+
frm: 'Middle French',
|
20
|
+
fro: 'Old French (842-ca. 1400)',
|
21
|
+
ghc: 'Hiberno-Scottish Gaelic',
|
22
|
+
gmh: 'Middle High German',
|
23
|
+
gml: 'Middle Low German',
|
24
|
+
gmy: 'Mycenaean Greek',
|
25
|
+
goh: 'Old High German (ca. 750-1050)',
|
26
|
+
got: 'Gothic',
|
27
|
+
grc: 'Ancient Greek (to 1453)',
|
28
|
+
hit: 'Hittite',
|
29
|
+
hlu: 'Hieroglyphic Luwian',
|
30
|
+
htx: 'Middle Hittite',
|
31
|
+
lat: 'Latin',
|
32
|
+
lng: 'Langobardic',
|
33
|
+
mga: 'Middle Irish (10-12th century)',
|
34
|
+
non: 'Old Norse',
|
35
|
+
nrp: 'North Picene',
|
36
|
+
obt: 'Old Breton',
|
37
|
+
oco: 'Old Cornish',
|
38
|
+
odt: 'Old Dutch-Old Frankish',
|
39
|
+
ofs: 'Old Frisian',
|
40
|
+
oht: 'Old Hittite',
|
41
|
+
olt: 'Old Lithuanian',
|
42
|
+
orv: 'Old Russian',
|
43
|
+
osc: 'Oscan',
|
44
|
+
osp: 'Old Spanish',
|
45
|
+
osx: 'Old Saxon',
|
46
|
+
owl: 'Old-Middle Welsh',
|
47
|
+
peo: 'Old Persian (ca. 600-400 B.C.)',
|
48
|
+
pka: 'Ardhamāgadhī Prākrit',
|
49
|
+
pmh: 'Maharastri Prakrit',
|
50
|
+
por: 'Portuguese',
|
51
|
+
pro: 'Old Provençal',
|
52
|
+
psu: 'Sauraseni Prakrit',
|
53
|
+
rus: 'Russian',
|
54
|
+
san: 'Sanskrit',
|
55
|
+
sga: 'Early Irish',
|
56
|
+
sog: 'Sogdian',
|
57
|
+
spa: 'Spanish',
|
58
|
+
spx: 'South Picene',
|
59
|
+
txb: 'Tokharian B',
|
60
|
+
txh: 'Thracian',
|
61
|
+
wlm: 'Middle Welsh',
|
62
|
+
xbm: 'Middle Breton',
|
63
|
+
xcb: 'Cumbric',
|
64
|
+
xce: 'Celtiberian',
|
65
|
+
xcg: 'Cisalpine Gaulish',
|
66
|
+
xcl: 'Classical Armenian',
|
67
|
+
xum: 'Umbrian',
|
68
|
+
xve: 'Venetic',
|
69
|
+
}.freeze
|
70
|
+
|
71
|
+
# Checks if a language is supported.
|
72
|
+
#
|
73
|
+
# @param language_tag [String, Symbol] language tag of language to check
|
74
|
+
#
|
75
|
+
# @return [Boolean]
|
76
|
+
#
|
77
|
+
# @example
|
78
|
+
# language_supported?(:lat) # => true
|
79
|
+
# language_supported?('grc') # => true
|
80
|
+
def self.language_supported?(language_tag)
|
81
|
+
raise ArgumentError unless language_tag.is_a?(Symbol) or language_tag.is_a?(String)
|
82
|
+
|
83
|
+
SUPPORTED_LANGUAGES.key?(language_tag.to_sym)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Returns the display name for a language.
|
87
|
+
#
|
88
|
+
# @param language_tag [String, Symbol] language tag of language
|
89
|
+
#
|
90
|
+
# @return [String]
|
91
|
+
#
|
92
|
+
# @example
|
93
|
+
# get_display_name(:lat) # => "Latin"
|
94
|
+
def self.get_display_name(language_tag)
|
95
|
+
raise ArgumentError unless language_tag.is_a?(Symbol) or language_tag.is_a?(String)
|
96
|
+
raise ArgumentError, 'unsupported language' unless language_supported?(language_tag)
|
97
|
+
|
98
|
+
SUPPORTED_LANGUAGES[language_tag.to_sym]
|
99
|
+
end
|
100
|
+
|
101
|
+
# Returns tag of all supported languages
|
102
|
+
#
|
103
|
+
# @return [Array<Symbol>]
|
104
|
+
def self.supported_language_tags
|
105
|
+
SUPPORTED_LANGUAGES.keys
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
data/lib/proiel/lemma.rb
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2018 Marius L. Jøhndal
|
3
|
+
#
|
4
|
+
# See LICENSE in the top-level source directory for licensing terms.
|
5
|
+
#++
|
6
|
+
module PROIEL
|
7
|
+
class Lemma < TreebankObject
|
8
|
+
# @return [Dictionary] source that the lemma belongs to
|
9
|
+
attr_reader :dictionary
|
10
|
+
|
11
|
+
attr_reader :n
|
12
|
+
|
13
|
+
# @return [Hash{String, Integer}] distribution of lemmata in sources. The
|
14
|
+
# keys are IDs of sources, the values give the frequency of the lemma per
|
15
|
+
# source.
|
16
|
+
attr_reader :distribution
|
17
|
+
|
18
|
+
# @return [Array<[String, String]> identified homographs of this lemma. The
|
19
|
+
# array contains pairs of lemma form (which will be homographs of this
|
20
|
+
# lemma form under the orthographic conventions of the language) and parts
|
21
|
+
# of speech.
|
22
|
+
attr_reader :homographs
|
23
|
+
|
24
|
+
# @return [Hash{Symbol, String}] glosses for the current lemma. The keys
|
25
|
+
# are language tags and the values the glosses.
|
26
|
+
attr_reader :glosses
|
27
|
+
attr_reader :paradigm
|
28
|
+
attr_reader :valency
|
29
|
+
|
30
|
+
# Creates a new lemma object.
|
31
|
+
def initialize(parent, xml = nil)
|
32
|
+
@dictionary = parent
|
33
|
+
|
34
|
+
@n = nil
|
35
|
+
|
36
|
+
@distribution = {}
|
37
|
+
@homographs = []
|
38
|
+
@glosses = {}
|
39
|
+
@paradigm = {}
|
40
|
+
@valency = []
|
41
|
+
|
42
|
+
from_xml(xml) if xml
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def from_xml(xml)
|
48
|
+
@n = nullify(xml.n, :int)
|
49
|
+
|
50
|
+
@distribution = xml.distribution.map { |h| [h.idref, nullify(h.n, :int)] }.to_h
|
51
|
+
@glosses = xml.glosses.map { |h| [h.language.to_sym, h.gloss] }.to_h
|
52
|
+
@homographs = xml.homographs.map { |h| [h.lemma, h.part_of_speech] }
|
53
|
+
@paradigm = xml.paradigm.map { |slot1| [slot1.morphology, slot1.slot2s.map { |slot2| [slot2.form, nullify(slot2.n, :int)] }.to_h] }.to_h
|
54
|
+
@valency =
|
55
|
+
xml.valency.map do |frame|
|
56
|
+
{
|
57
|
+
arguments: frame.arguments.map { |a| { relation: a.relation, lemma: a.lemma, part_of_speech: a.part_of_speech, mood: a.mood, case: a.case } },
|
58
|
+
tokens: frame.tokens.map { |t| { flags: t.flags, idref: t.idref } },
|
59
|
+
}
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def nullify(s, type = nil)
|
64
|
+
case s
|
65
|
+
when NilClass, /^\s*$/
|
66
|
+
nil
|
67
|
+
else
|
68
|
+
case type
|
69
|
+
when :int
|
70
|
+
s.to_i
|
71
|
+
else
|
72
|
+
s.to_s
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
@@ -0,0 +1,383 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
|
3
|
+
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
|
4
|
+
<xs:annotation>
|
5
|
+
<xs:documentation>PROIEL XML format version 3.0</xs:documentation>
|
6
|
+
</xs:annotation>
|
7
|
+
|
8
|
+
<!-- Source elements -->
|
9
|
+
<xs:complexType name="Source">
|
10
|
+
<xs:sequence>
|
11
|
+
<xs:element name="title" minOccurs="1" maxOccurs="1" type="xs:string"/>
|
12
|
+
<xs:element name="alternative-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
13
|
+
<xs:element name="author" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
14
|
+
<xs:element name="citation-part" minOccurs="1" maxOccurs="1" type="xs:string"/>
|
15
|
+
<xs:element name="principal" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
16
|
+
<xs:element name="funder" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
17
|
+
<xs:element name="distributor" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
18
|
+
<xs:element name="distributor-address" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
19
|
+
<xs:element name="address" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
20
|
+
<xs:element name="date" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
21
|
+
<xs:element name="license" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
22
|
+
<xs:element name="license-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
23
|
+
<xs:element name="reference-system" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
24
|
+
<xs:element name="editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
25
|
+
<xs:element name="editorial-note" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
26
|
+
<xs:element name="annotator" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
27
|
+
<xs:element name="reviewer" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
28
|
+
<xs:element name="electronic-text-editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
29
|
+
<xs:element name="electronic-text-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
30
|
+
<xs:element name="electronic-text-version" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
31
|
+
<xs:element name="electronic-text-publisher" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
32
|
+
<xs:element name="electronic-text-place" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
33
|
+
<xs:element name="electronic-text-date" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
34
|
+
<xs:element name="electronic-text-original-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
35
|
+
<xs:element name="electronic-text-license" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
36
|
+
<xs:element name="electronic-text-license-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
37
|
+
<xs:element name="printed-text-editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
38
|
+
<xs:element name="printed-text-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
39
|
+
<xs:element name="printed-text-edition" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
40
|
+
<xs:element name="printed-text-publisher" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
41
|
+
<xs:element name="printed-text-place" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
42
|
+
<xs:element name="printed-text-date" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
43
|
+
<xs:element name="chronology-composition" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
44
|
+
<xs:element name="chronology-manuscript" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
45
|
+
|
46
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
47
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
48
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
49
|
+
|
50
|
+
<xs:element name="div" minOccurs="1" maxOccurs="unbounded" type="Div"/>
|
51
|
+
|
52
|
+
</xs:sequence>
|
53
|
+
|
54
|
+
<xs:attribute name="id" type="xs:string" use="required"/>
|
55
|
+
<xs:attribute name="alignment-id" type="xs:string" use="optional"/>
|
56
|
+
<xs:attribute name="language" type="xs:string" use="required"/>
|
57
|
+
<xs:attribute name="dialect" type="xs:string" use="optional"/>
|
58
|
+
</xs:complexType>
|
59
|
+
|
60
|
+
<xs:complexType name="Div">
|
61
|
+
<xs:sequence>
|
62
|
+
<xs:element name="title" minOccurs="1" maxOccurs="1" type="xs:string"/>
|
63
|
+
|
64
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
65
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
66
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
67
|
+
|
68
|
+
<xs:element name="sentence" minOccurs="1" maxOccurs="unbounded" type="Sentence"/>
|
69
|
+
</xs:sequence>
|
70
|
+
|
71
|
+
<xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
|
72
|
+
<xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
|
73
|
+
<xs:attribute name="presentation-before" type="xs:string" use="optional"/>
|
74
|
+
<xs:attribute name="presentation-after" type="xs:string" use="optional"/>
|
75
|
+
</xs:complexType>
|
76
|
+
|
77
|
+
<xs:complexType name="Sentence">
|
78
|
+
<xs:sequence>
|
79
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
80
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
81
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
82
|
+
|
83
|
+
<xs:element name="token" minOccurs="1" maxOccurs="unbounded" type="Token"/>
|
84
|
+
</xs:sequence>
|
85
|
+
|
86
|
+
<xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
|
87
|
+
<xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
|
88
|
+
<xs:attribute name="presentation-before" type="xs:string" use="optional"/>
|
89
|
+
<xs:attribute name="presentation-after" type="xs:string" use="optional"/>
|
90
|
+
<xs:attribute name="status" type="SentenceStatus" use="optional"/>
|
91
|
+
<xs:attribute name="annotated-by" type="xs:string" use="optional"/>
|
92
|
+
<xs:attribute name="annotated-at" type="xs:dateTime" use="optional"/>
|
93
|
+
<xs:attribute name="reviewed-by" type="xs:string" use="optional"/>
|
94
|
+
<xs:attribute name="reviewed-at" type="xs:dateTime" use="optional"/>
|
95
|
+
</xs:complexType>
|
96
|
+
|
97
|
+
<xs:simpleType name="SentenceStatus">
|
98
|
+
<xs:restriction base="xs:string">
|
99
|
+
<xs:enumeration value="annotated"/>
|
100
|
+
<xs:enumeration value="reviewed"/>
|
101
|
+
<xs:enumeration value="unannotated"/>
|
102
|
+
</xs:restriction>
|
103
|
+
</xs:simpleType>
|
104
|
+
|
105
|
+
<xs:complexType name="Token">
|
106
|
+
<xs:sequence>
|
107
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
108
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
109
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
110
|
+
|
111
|
+
<xs:element name="slash" minOccurs="0" maxOccurs="unbounded" type="Slash"/>
|
112
|
+
</xs:sequence>
|
113
|
+
|
114
|
+
<xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
|
115
|
+
<xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
|
116
|
+
<xs:attribute name="lemma" type="xs:string" use="optional"/>
|
117
|
+
<xs:attribute name="part-of-speech" type="xs:string" use="optional"/>
|
118
|
+
<xs:attribute name="morphology" type="xs:string" use="optional"/>
|
119
|
+
<xs:attribute name="citation-part" type="xs:string" use="optional"/>
|
120
|
+
<xs:attribute name="relation" type="xs:string" use="optional"/>
|
121
|
+
<xs:attribute name="head-id" type="xs:nonNegativeInteger" use="optional"/>
|
122
|
+
<xs:attribute name="information-status" type="xs:string" use="optional"/>
|
123
|
+
<xs:attribute name="antecedent-id" type="xs:nonNegativeInteger" use="optional"/>
|
124
|
+
<xs:attribute name="contrast-group" type="xs:string" use="optional"/>
|
125
|
+
<xs:attribute name="foreign-ids" type="xs:string" use="optional"/>
|
126
|
+
|
127
|
+
<!-- XSD does not allow us to constrain the use of these attributes properly so they are marked optional even though their occurrence depends on the value of empty-token-sort and form. -->
|
128
|
+
<xs:attribute name="empty-token-sort" type="EmptyTokenSort" use="optional"/>
|
129
|
+
<xs:attribute name="form" type="xs:string" use="optional"/>
|
130
|
+
<xs:attribute name="presentation-before" type="xs:string" use="optional"/>
|
131
|
+
<xs:attribute name="presentation-after" type="xs:string" use="optional"/>
|
132
|
+
</xs:complexType>
|
133
|
+
|
134
|
+
<xs:simpleType name="EmptyTokenSort">
|
135
|
+
<xs:restriction base="xs:string">
|
136
|
+
<xs:enumeration value="P"/>
|
137
|
+
<xs:enumeration value="C"/>
|
138
|
+
<xs:enumeration value="V"/>
|
139
|
+
</xs:restriction>
|
140
|
+
</xs:simpleType>
|
141
|
+
|
142
|
+
<xs:complexType name="Slash">
|
143
|
+
<xs:attribute name="target-id" type="xs:nonNegativeInteger" use="required"/>
|
144
|
+
<xs:attribute name="relation" type="xs:string" use="required"/>
|
145
|
+
</xs:complexType>
|
146
|
+
|
147
|
+
<!-- Shared metadata elements -->
|
148
|
+
<xs:complexType name="Tag">
|
149
|
+
<xs:attribute name="attribute" type="xs:string" use="required"/>
|
150
|
+
<xs:attribute name="value" type="xs:string" use="required"/>
|
151
|
+
<xs:attribute name="target-id" type="xs:string" use="optional"/>
|
152
|
+
<xs:attribute name="target-type" type="xs:string" use="optional"/>
|
153
|
+
</xs:complexType>
|
154
|
+
|
155
|
+
<xs:complexType name="Link">
|
156
|
+
<xs:attribute name="target" type="xs:string" use="required"/>
|
157
|
+
<xs:attribute name="type" type="xs:string" use="required"/>
|
158
|
+
</xs:complexType>
|
159
|
+
|
160
|
+
<xs:complexType name="Note">
|
161
|
+
<xs:simpleContent>
|
162
|
+
<xs:extension base="xs:string">
|
163
|
+
<xs:attribute name="originator" type="xs:string" use="required"/>
|
164
|
+
</xs:extension>
|
165
|
+
</xs:simpleContent>
|
166
|
+
</xs:complexType>
|
167
|
+
|
168
|
+
<!-- Dictionary elements -->
|
169
|
+
<xs:complexType name="DictionarySource">
|
170
|
+
<xs:attribute name="idref" type="xs:string" use="required"/>
|
171
|
+
<xs:attribute name="license" type="xs:string" use="optional"/>
|
172
|
+
<xs:attribute name="n" type="xs:nonNegativeInteger" use="optional"/>
|
173
|
+
</xs:complexType>
|
174
|
+
|
175
|
+
<xs:complexType name="DictionaryToken">
|
176
|
+
<xs:attribute name="idref" type="xs:string" use="required"/>
|
177
|
+
<xs:attribute name="flags" type="xs:string" use="optional"/>
|
178
|
+
</xs:complexType>
|
179
|
+
|
180
|
+
<xs:complexType name="DictionaryArgument">
|
181
|
+
<xs:attribute name="relation" type="xs:string" use="required"/>
|
182
|
+
<xs:attribute name="lemma" type="xs:string" use="optional"/>
|
183
|
+
<xs:attribute name="part-of-speech" type="xs:string" use="optional"/>
|
184
|
+
<xs:attribute name="mood" type="xs:string" use="optional"/>
|
185
|
+
<xs:attribute name="case" type="xs:string" use="optional"/>
|
186
|
+
</xs:complexType>
|
187
|
+
|
188
|
+
<xs:complexType name="DictionaryTokens">
|
189
|
+
<xs:sequence>
|
190
|
+
<xs:element name="token" minOccurs="0" maxOccurs="unbounded" type="DictionaryToken"/>
|
191
|
+
</xs:sequence>
|
192
|
+
</xs:complexType>
|
193
|
+
|
194
|
+
<xs:complexType name="DictionaryArguments">
|
195
|
+
<xs:sequence>
|
196
|
+
<xs:element name="argument" minOccurs="0" maxOccurs="unbounded" type="DictionaryArgument"/>
|
197
|
+
</xs:sequence>
|
198
|
+
</xs:complexType>
|
199
|
+
|
200
|
+
<xs:complexType name="DictionaryFrame">
|
201
|
+
<xs:sequence>
|
202
|
+
<xs:element name="arguments" minOccurs="1" maxOccurs="1" type="DictionaryArguments"/>
|
203
|
+
<xs:element name="tokens" minOccurs="1" maxOccurs="unbounded" type="DictionaryTokens"/>
|
204
|
+
</xs:sequence>
|
205
|
+
</xs:complexType>
|
206
|
+
|
207
|
+
<xs:complexType name="DictionaryValency">
|
208
|
+
<xs:sequence>
|
209
|
+
<xs:element name="frame" minOccurs="1" maxOccurs="unbounded" type="DictionaryFrame"/>
|
210
|
+
</xs:sequence>
|
211
|
+
</xs:complexType>
|
212
|
+
|
213
|
+
<xs:complexType name="DictionarySlot2">
|
214
|
+
<xs:attribute name="form" type="xs:string" use="required"/>
|
215
|
+
<xs:attribute name="n" type="xs:nonNegativeInteger" use="required"/>
|
216
|
+
</xs:complexType>
|
217
|
+
|
218
|
+
<xs:complexType name="DictionarySlot1">
|
219
|
+
<xs:sequence>
|
220
|
+
<xs:element name="slot2" minOccurs="1" maxOccurs="unbounded" type="DictionarySlot2"/>
|
221
|
+
</xs:sequence>
|
222
|
+
|
223
|
+
<xs:attribute name="morphology" type="xs:string" use="required"/>
|
224
|
+
</xs:complexType>
|
225
|
+
|
226
|
+
<xs:complexType name="DictionaryParadigm">
|
227
|
+
<xs:sequence>
|
228
|
+
<xs:element name="slot1" minOccurs="1" maxOccurs="unbounded" type="DictionarySlot1"/>
|
229
|
+
</xs:sequence>
|
230
|
+
</xs:complexType>
|
231
|
+
|
232
|
+
<xs:complexType name="DictionaryHomograph">
|
233
|
+
<xs:attribute name="lemma" type="xs:string" use="required"/>
|
234
|
+
<xs:attribute name="part-of-speech" type="xs:string" use="required"/>
|
235
|
+
</xs:complexType>
|
236
|
+
|
237
|
+
<xs:complexType name="DictionaryHomographs">
|
238
|
+
<xs:sequence>
|
239
|
+
<xs:element name='homograph' minOccurs="1" maxOccurs="unbounded" type='DictionaryHomograph'/>
|
240
|
+
</xs:sequence>
|
241
|
+
</xs:complexType>
|
242
|
+
|
243
|
+
<xs:complexType name="DictionaryGloss">
|
244
|
+
<xs:simpleContent>
|
245
|
+
<xs:extension base="xs:string">
|
246
|
+
<xs:attribute name="language" type="xs:string" use="required"/>
|
247
|
+
</xs:extension>
|
248
|
+
</xs:simpleContent>
|
249
|
+
</xs:complexType>
|
250
|
+
|
251
|
+
<xs:complexType name="DictionaryGlosses">
|
252
|
+
<xs:sequence>
|
253
|
+
<xs:element name='gloss' minOccurs="1" maxOccurs="unbounded" type='DictionaryGloss'/>
|
254
|
+
</xs:sequence>
|
255
|
+
</xs:complexType>
|
256
|
+
|
257
|
+
<xs:complexType name="DictionaryDistribution">
|
258
|
+
<xs:sequence>
|
259
|
+
<xs:element name='source' minOccurs="1" maxOccurs="unbounded" type='DictionarySource'/>
|
260
|
+
</xs:sequence>
|
261
|
+
</xs:complexType>
|
262
|
+
|
263
|
+
<xs:complexType name="DictionaryLemma">
|
264
|
+
<xs:sequence>
|
265
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
266
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
267
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
268
|
+
|
269
|
+
<xs:element name="distribution" minOccurs="0" maxOccurs="1" type="DictionaryDistribution"/>
|
270
|
+
<xs:element name="glosses" minOccurs="0" maxOccurs="1" type="DictionaryGlosses"/>
|
271
|
+
<xs:element name="homographs" minOccurs="0" maxOccurs="1" type="DictionaryHomographs"/>
|
272
|
+
<xs:element name="paradigm" minOccurs="0" maxOccurs="1" type="DictionaryParadigm"/>
|
273
|
+
<xs:element name="valency" minOccurs="0" maxOccurs="1" type="DictionaryValency"/>
|
274
|
+
</xs:sequence>
|
275
|
+
|
276
|
+
<xs:attribute name="lemma" type="xs:string" use="required"/>
|
277
|
+
<xs:attribute name="part-of-speech" type="xs:string" use="required"/>
|
278
|
+
</xs:complexType>
|
279
|
+
|
280
|
+
<xs:complexType name="DictionarySources">
|
281
|
+
<xs:sequence>
|
282
|
+
<xs:element name='source' minOccurs="1" maxOccurs="unbounded" type='DictionarySource'/>
|
283
|
+
</xs:sequence>
|
284
|
+
</xs:complexType>
|
285
|
+
|
286
|
+
<xs:complexType name="DictionaryLemmata">
|
287
|
+
<xs:sequence>
|
288
|
+
<xs:element name='lemma' minOccurs="1" maxOccurs="unbounded" type='DictionaryLemma'/>
|
289
|
+
</xs:sequence>
|
290
|
+
</xs:complexType>
|
291
|
+
|
292
|
+
<xs:complexType name="Dictionary">
|
293
|
+
<xs:sequence>
|
294
|
+
<xs:element name="tag" minOccurs="0" maxOccurs="unbounded" type="Tag"/>
|
295
|
+
<xs:element name="note" minOccurs="0" maxOccurs="unbounded" type="Note"/>
|
296
|
+
<xs:element name="link" minOccurs="0" maxOccurs="unbounded" type="Link"/>
|
297
|
+
|
298
|
+
<xs:element name='sources' minOccurs="0" maxOccurs="1" type='DictionarySources'/>
|
299
|
+
<xs:element name='lemmata' minOccurs="0" maxOccurs="1" type='DictionaryLemmata'/>
|
300
|
+
</xs:sequence>
|
301
|
+
|
302
|
+
<xs:attribute name="language" type="xs:string" use="required"/>
|
303
|
+
<xs:attribute name="dialect" type="xs:string" use="optional"/>
|
304
|
+
</xs:complexType>
|
305
|
+
|
306
|
+
<!-- Annotation elements -->
|
307
|
+
<xs:complexType name="PartOfSpeechValue">
|
308
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
309
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
310
|
+
</xs:complexType>
|
311
|
+
|
312
|
+
<xs:complexType name="PartsOfSpeech">
|
313
|
+
<xs:sequence>
|
314
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='PartOfSpeechValue'/>
|
315
|
+
</xs:sequence>
|
316
|
+
</xs:complexType>
|
317
|
+
|
318
|
+
<xs:complexType name="InformationStatusValue">
|
319
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
320
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
321
|
+
</xs:complexType>
|
322
|
+
|
323
|
+
<xs:complexType name="InformationStatuses">
|
324
|
+
<xs:sequence>
|
325
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='InformationStatusValue'/>
|
326
|
+
</xs:sequence>
|
327
|
+
</xs:complexType>
|
328
|
+
|
329
|
+
<xs:complexType name="RelationValue">
|
330
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
331
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
332
|
+
<xs:attribute name="primary" type="xs:boolean" use="required"/>
|
333
|
+
<xs:attribute name="secondary" type="xs:boolean" use="required"/>
|
334
|
+
</xs:complexType>
|
335
|
+
|
336
|
+
<xs:complexType name="Relations">
|
337
|
+
<xs:sequence>
|
338
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='RelationValue'/>
|
339
|
+
</xs:sequence>
|
340
|
+
</xs:complexType>
|
341
|
+
|
342
|
+
<xs:complexType name="MorphologyValue">
|
343
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
344
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
345
|
+
</xs:complexType>
|
346
|
+
|
347
|
+
<xs:complexType name="MorphologyField">
|
348
|
+
<xs:sequence>
|
349
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='MorphologyValue'/>
|
350
|
+
</xs:sequence>
|
351
|
+
|
352
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
353
|
+
</xs:complexType>
|
354
|
+
|
355
|
+
<xs:complexType name="Morphology">
|
356
|
+
<xs:sequence>
|
357
|
+
<xs:element name='field' minOccurs="1" maxOccurs="unbounded" type='MorphologyField'/>
|
358
|
+
</xs:sequence>
|
359
|
+
</xs:complexType>
|
360
|
+
|
361
|
+
<xs:complexType name="Annotation">
|
362
|
+
<xs:sequence>
|
363
|
+
<xs:element name='relations' minOccurs="1" maxOccurs="1" type='Relations'/>
|
364
|
+
<xs:element name='parts-of-speech' minOccurs="1" maxOccurs="1" type='PartsOfSpeech'/>
|
365
|
+
<xs:element name='morphology' minOccurs="1" maxOccurs="1" type='Morphology'/>
|
366
|
+
<xs:element name='information-statuses' minOccurs="1" maxOccurs="1" type='InformationStatuses'/>
|
367
|
+
</xs:sequence>
|
368
|
+
</xs:complexType>
|
369
|
+
|
370
|
+
<!-- Top-level element -->
|
371
|
+
<xs:complexType name="Proiel">
|
372
|
+
<xs:sequence>
|
373
|
+
<xs:element name='annotation' minOccurs="0" maxOccurs="1" type='Annotation'/>
|
374
|
+
<xs:element name='source' minOccurs="0" maxOccurs="unbounded" type='Source'/>
|
375
|
+
<xs:element name='dictionary' minOccurs="0" maxOccurs="unbounded" type='Dictionary'/>
|
376
|
+
</xs:sequence>
|
377
|
+
|
378
|
+
<xs:attribute name='export-time' type="xs:dateTime" use="optional"/>
|
379
|
+
<xs:attribute name="schema-version" type="xs:decimal" use="required" fixed="3.0"/>
|
380
|
+
</xs:complexType>
|
381
|
+
|
382
|
+
<xs:element name='proiel' type='Proiel'/>
|
383
|
+
</xs:schema>
|