proiel 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +19 -0
- data/README.md +99 -0
- data/bin/console +6 -0
- data/bin/setup +5 -0
- data/lib/proiel/annotation_schema.rb +127 -0
- data/lib/proiel/citations.rb +84 -0
- data/lib/proiel/div.rb +133 -0
- data/lib/proiel/positional_tag.rb +127 -0
- data/lib/proiel/proiel_xml/proiel-1.0/proiel-1.0.xsd +172 -0
- data/lib/proiel/proiel_xml/proiel-1.0/teilite.xsd +7387 -0
- data/lib/proiel/proiel_xml/proiel-1.0/xml.xsd +287 -0
- data/lib/proiel/proiel_xml/proiel-2.0/proiel-2.0.xsd +185 -0
- data/lib/proiel/proiel_xml/reader.rb +237 -0
- data/lib/proiel/proiel_xml/schema.rb +81 -0
- data/lib/proiel/proiel_xml/validator.rb +177 -0
- data/lib/proiel/sentence.rb +191 -0
- data/lib/proiel/source.rb +114 -0
- data/lib/proiel/statistics.rb +41 -0
- data/lib/proiel/token.rb +407 -0
- data/lib/proiel/tokenization.rb +90 -0
- data/lib/proiel/treebank.rb +214 -0
- data/lib/proiel/treebank_object.rb +21 -0
- data/lib/proiel/version.rb +9 -0
- data/lib/proiel.rb +28 -0
- metadata +210 -0
@@ -0,0 +1,172 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
|
3
|
+
<!-- PROIEL XML format version 1.0 -->
|
4
|
+
|
5
|
+
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
6
|
+
xmlns:tei="http://www.tei-c.org/ns/1.0">
|
7
|
+
<xs:import namespace="http://www.w3.org/XML/1998/namespace"
|
8
|
+
schemaLocation="http://www.w3.org/2001/xml.xsd"/>
|
9
|
+
<xs:import namespace="http://www.tei-c.org/ns/1.0"
|
10
|
+
schemaLocation="http://www.tei-c.org/release/xml/tei/custom/schema/xsd/teilite.xsd"/>
|
11
|
+
|
12
|
+
<xs:annotation>
|
13
|
+
<xs:documentation xml:lang="en">PROIEL XML format version 1.0</xs:documentation>
|
14
|
+
</xs:annotation>
|
15
|
+
|
16
|
+
<xs:complexType name="Source">
|
17
|
+
<xs:sequence>
|
18
|
+
<xs:element name="title" minOccurs="1" maxOccurs="1" type="xs:string"/>
|
19
|
+
<xs:element name="author" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
20
|
+
<xs:element name="edition" minOccurs="0" maxOccurs="1" type="xs:string"/>
|
21
|
+
<xs:element name="citation-part" minOccurs="1" maxOccurs="1" type="xs:string"/>
|
22
|
+
<xs:element name="tei-header" minOccurs="1" maxOccurs="1" type="TeiHeader"/>
|
23
|
+
<xs:element name="div" minOccurs="1" maxOccurs="unbounded" type="Div"/>
|
24
|
+
</xs:sequence>
|
25
|
+
<xs:attribute name="id" type="xs:string" use="required"/>
|
26
|
+
<xs:attribute name="language" type="xs:string" use="required"/>
|
27
|
+
</xs:complexType>
|
28
|
+
|
29
|
+
<xs:complexType name="TeiHeader">
|
30
|
+
<xs:sequence>
|
31
|
+
<xs:element ref="tei:teiHeader" minOccurs="0" maxOccurs="1"/>
|
32
|
+
</xs:sequence>
|
33
|
+
</xs:complexType>
|
34
|
+
|
35
|
+
<xs:complexType name="Div">
|
36
|
+
<xs:sequence>
|
37
|
+
<xs:element name="title" minOccurs="1" maxOccurs="1" type="xs:string"/>
|
38
|
+
<xs:element name="sentence" minOccurs="1" maxOccurs="unbounded" type="Sentence"/>
|
39
|
+
</xs:sequence>
|
40
|
+
<xs:attribute name="presentation-before" type="xs:string" use="optional"/>
|
41
|
+
<xs:attribute name="presentation-after" type="xs:string" use="optional"/>
|
42
|
+
</xs:complexType>
|
43
|
+
|
44
|
+
<xs:complexType name="Sentence">
|
45
|
+
<xs:sequence>
|
46
|
+
<xs:element name="token" minOccurs="1" maxOccurs="unbounded" type="Token"/>
|
47
|
+
</xs:sequence>
|
48
|
+
<xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
|
49
|
+
<xs:attribute name="presentation-before" type="xs:string" use="optional"/>
|
50
|
+
<xs:attribute name="presentation-after" type="xs:string" use="optional"/>
|
51
|
+
<xs:attribute name="status" type="SentenceStatus" use="optional"/>
|
52
|
+
</xs:complexType>
|
53
|
+
|
54
|
+
<xs:simpleType name="SentenceStatus">
|
55
|
+
<xs:restriction base="xs:string">
|
56
|
+
<xs:enumeration value="annotated"/>
|
57
|
+
<xs:enumeration value="reviewed"/>
|
58
|
+
<xs:enumeration value="unannotated"/>
|
59
|
+
</xs:restriction>
|
60
|
+
</xs:simpleType>
|
61
|
+
|
62
|
+
<xs:complexType name="Token">
|
63
|
+
<xs:sequence>
|
64
|
+
<xs:element name="slash" minOccurs="0" maxOccurs="unbounded" type="Slash"/>
|
65
|
+
</xs:sequence>
|
66
|
+
|
67
|
+
<xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
|
68
|
+
<xs:attribute name="lemma" type="xs:string" use="optional"/>
|
69
|
+
<xs:attribute name="part-of-speech" type="xs:string" use="optional"/>
|
70
|
+
<xs:attribute name="morphology" type="xs:string" use="optional"/>
|
71
|
+
<xs:attribute name="citation-part" type="xs:string" use="optional"/>
|
72
|
+
<xs:attribute name="relation" type="xs:string" use="optional"/>
|
73
|
+
<xs:attribute name="head-id" type="xs:nonNegativeInteger" use="optional"/>
|
74
|
+
<xs:attribute name="information-status" type="xs:string" use="optional"/>
|
75
|
+
<xs:attribute name="antecedent-id" type="xs:nonNegativeInteger" use="optional"/>
|
76
|
+
<xs:attribute name="contrast-group" type="xs:string" use="optional"/>
|
77
|
+
<xs:attribute name="foreign-ids" type="xs:string" use="optional"/>
|
78
|
+
|
79
|
+
<!-- XSD does not allow us to constrain the use of these attributes properly so they are marked optional even though their occurrence depends on the value of empty-token-sort and form. -->
|
80
|
+
<xs:attribute name="empty-token-sort" type="EmptyTokenSort" use="optional"/>
|
81
|
+
<xs:attribute name="form" type="xs:string" use="optional"/>
|
82
|
+
<xs:attribute name="presentation-before" type="xs:string" use="optional"/>
|
83
|
+
<xs:attribute name="presentation-after" type="xs:string" use="optional"/>
|
84
|
+
</xs:complexType>
|
85
|
+
|
86
|
+
<xs:simpleType name="EmptyTokenSort">
|
87
|
+
<xs:restriction base="xs:string">
|
88
|
+
<xs:enumeration value="P"/>
|
89
|
+
<xs:enumeration value="C"/>
|
90
|
+
<xs:enumeration value="V"/>
|
91
|
+
</xs:restriction>
|
92
|
+
</xs:simpleType>
|
93
|
+
|
94
|
+
<xs:complexType name="Slash">
|
95
|
+
<xs:attribute name="target-id" type="xs:nonNegativeInteger" use="required"/>
|
96
|
+
<xs:attribute name="relation" type="xs:string" use="required"/>
|
97
|
+
</xs:complexType>
|
98
|
+
|
99
|
+
<xs:complexType name="PartOfSpeechValue">
|
100
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
101
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
102
|
+
</xs:complexType>
|
103
|
+
|
104
|
+
<xs:complexType name="PartsOfSpeech">
|
105
|
+
<xs:sequence>
|
106
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='PartOfSpeechValue'/>
|
107
|
+
</xs:sequence>
|
108
|
+
</xs:complexType>
|
109
|
+
|
110
|
+
<xs:complexType name="InformationStatusValue">
|
111
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
112
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
113
|
+
</xs:complexType>
|
114
|
+
|
115
|
+
<xs:complexType name="InformationStatuses">
|
116
|
+
<xs:sequence>
|
117
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='InformationStatusValue'/>
|
118
|
+
</xs:sequence>
|
119
|
+
</xs:complexType>
|
120
|
+
|
121
|
+
<xs:complexType name="RelationValue">
|
122
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
123
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
124
|
+
<xs:attribute name="primary" type="xs:boolean" use="required"/>
|
125
|
+
<xs:attribute name="secondary" type="xs:boolean" use="required"/>
|
126
|
+
</xs:complexType>
|
127
|
+
|
128
|
+
<xs:complexType name="Relations">
|
129
|
+
<xs:sequence>
|
130
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='RelationValue'/>
|
131
|
+
</xs:sequence>
|
132
|
+
</xs:complexType>
|
133
|
+
|
134
|
+
<xs:complexType name="MorphologyValue">
|
135
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
136
|
+
<xs:attribute name="summary" type="xs:string" use="required"/>
|
137
|
+
</xs:complexType>
|
138
|
+
|
139
|
+
<xs:complexType name="MorphologyField">
|
140
|
+
<xs:sequence>
|
141
|
+
<xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='MorphologyValue'/>
|
142
|
+
</xs:sequence>
|
143
|
+
|
144
|
+
<xs:attribute name="tag" type="xs:string" use="required"/>
|
145
|
+
</xs:complexType>
|
146
|
+
|
147
|
+
<xs:complexType name="Morphology">
|
148
|
+
<xs:sequence>
|
149
|
+
<xs:element name='field' minOccurs="1" maxOccurs="unbounded" type='MorphologyField'/>
|
150
|
+
</xs:sequence>
|
151
|
+
</xs:complexType>
|
152
|
+
|
153
|
+
<xs:complexType name="Annotation">
|
154
|
+
<xs:sequence>
|
155
|
+
<xs:element name='relations' minOccurs="1" maxOccurs="1" type='Relations'/>
|
156
|
+
<xs:element name='parts-of-speech' minOccurs="1" maxOccurs="1" type='PartsOfSpeech'/>
|
157
|
+
<xs:element name='morphology' minOccurs="1" maxOccurs="1" type='Morphology'/>
|
158
|
+
<xs:element name='information-statuses' minOccurs="1" maxOccurs="1" type='InformationStatuses'/>
|
159
|
+
</xs:sequence>
|
160
|
+
</xs:complexType>
|
161
|
+
|
162
|
+
<xs:complexType name="Proiel">
|
163
|
+
<xs:sequence>
|
164
|
+
<xs:element name='annotation' minOccurs="0" maxOccurs="1" type='Annotation'/>
|
165
|
+
<xs:element name='source' minOccurs="1" maxOccurs="unbounded" type='Source'/>
|
166
|
+
</xs:sequence>
|
167
|
+
<xs:attribute name='export-time' type="xs:dateTime" use="optional"/>
|
168
|
+
<xs:attribute name="schema-version" type="xs:decimal" use="required" fixed="1.0"/>
|
169
|
+
</xs:complexType>
|
170
|
+
|
171
|
+
<xs:element name='proiel' type='Proiel'/>
|
172
|
+
</xs:schema>
|