proiel 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 14098ee93fc7b4faf6ce3a3f0c4cb17ffab29e31
4
- data.tar.gz: 8a010d9fd1a146d2e4091018b6357ba156792afd
3
+ metadata.gz: 39313c422eb3b2d2f3ad565c0cde3cbd3ddb5271
4
+ data.tar.gz: 1edadad95bbaad82d4d7ab1a9cc409f8e80d3a74
5
5
  SHA512:
6
- metadata.gz: b8f4164437ce019c8f86122bbe15e8af12b5ae2ef6e37fc05fe6b180f5771d98e4b92db56bd845f9ac6551e97b5a6733dbe8b6eb034081e0dac6c4d2d27d7366
7
- data.tar.gz: 48c062decf07fff6149b413cdf96489d03ddffd916d92d6952a7eb15d6043fa26fada9902367dd3d1f50bba77479bf9ad875e000a3a1b32866ea64c845aac87e
6
+ metadata.gz: 41bf1b5bcb3c8d8318128ea146b2609d02942d711553876d71c29cafc948312e79e8cd2e448fef751ca25c685c3f0d57a924004a46bdb3496a8f9913772e3e48
7
+ data.tar.gz: add1511098c62bdd4ee59fdd53e55b4b331595a5a5e02320e97dadff194e8b0b96fb24bd48511a02933d70a83208e0f5d40093a49eec959917177bf59589cbb7
data/lib/proiel/div.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2016 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -28,8 +28,11 @@ module PROIEL
28
28
  # @return [nil, String] presentation material after form
29
29
  attr_reader :presentation_after
30
30
 
31
+ # @return [nil, String] ID of the div that this div is aligned to
32
+ attr_reader :alignment_id
33
+
31
34
  # Creates a new div object.
32
- def initialize(parent, id, title, presentation_before, presentation_after, &block)
35
+ def initialize(parent, id, title, presentation_before, presentation_after, alignment_id, &block)
33
36
  @source = parent
34
37
 
35
38
  raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
@@ -44,6 +47,9 @@ module PROIEL
44
47
  raise ArgumentError, 'string or nil expected' unless presentation_after.nil? or presentation_after.is_a?(String)
45
48
  @presentation_after = presentation_after.freeze
46
49
 
50
+ raise ArgumentError, 'integer or nil expected' unless alignment_id.nil? or alignment_id.is_a?(Integer)
51
+ @alignment_id = alignment_id
52
+
47
53
  @children = block.call(self) if block_given?
48
54
  end
49
55
 
@@ -0,0 +1,198 @@
1
+ <?xml version="1.0"?>
2
+
3
+ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
4
+ <xs:annotation>
5
+ <xs:documentation>PROIEL XML format version 2.1</xs:documentation>
6
+ </xs:annotation>
7
+
8
+ <xs:complexType name="Source">
9
+ <xs:sequence>
10
+ <xs:element name="title" minOccurs="1" maxOccurs="1" type="xs:string"/>
11
+ <xs:element name="author" minOccurs="0" maxOccurs="1" type="xs:string"/>
12
+ <xs:element name="citation-part" minOccurs="1" maxOccurs="1" type="xs:string"/>
13
+ <xs:element name="principal" minOccurs="0" maxOccurs="1" type="xs:string"/>
14
+ <xs:element name="funder" minOccurs="0" maxOccurs="1" type="xs:string"/>
15
+ <xs:element name="distributor" minOccurs="0" maxOccurs="1" type="xs:string"/>
16
+ <xs:element name="distributor-address" minOccurs="0" maxOccurs="1" type="xs:string"/>
17
+ <xs:element name="address" minOccurs="0" maxOccurs="1" type="xs:string"/>
18
+ <xs:element name="date" minOccurs="0" maxOccurs="1" type="xs:string"/>
19
+ <xs:element name="license" minOccurs="0" maxOccurs="1" type="xs:string"/>
20
+ <xs:element name="license-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
21
+ <xs:element name="reference-system" minOccurs="0" maxOccurs="1" type="xs:string"/>
22
+ <xs:element name="editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
23
+ <xs:element name="editorial-note" minOccurs="0" maxOccurs="1" type="xs:string"/>
24
+ <xs:element name="annotator" minOccurs="0" maxOccurs="1" type="xs:string"/>
25
+ <xs:element name="reviewer" minOccurs="0" maxOccurs="1" type="xs:string"/>
26
+ <xs:element name="electronic-text-editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
27
+ <xs:element name="electronic-text-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
28
+ <xs:element name="electronic-text-version" minOccurs="0" maxOccurs="1" type="xs:string"/>
29
+ <xs:element name="electronic-text-publisher" minOccurs="0" maxOccurs="1" type="xs:string"/>
30
+ <xs:element name="electronic-text-place" minOccurs="0" maxOccurs="1" type="xs:string"/>
31
+ <xs:element name="electronic-text-date" minOccurs="0" maxOccurs="1" type="xs:string"/>
32
+ <xs:element name="electronic-text-original-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
33
+ <xs:element name="electronic-text-license" minOccurs="0" maxOccurs="1" type="xs:string"/>
34
+ <xs:element name="electronic-text-license-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
35
+ <xs:element name="printed-text-editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
36
+ <xs:element name="printed-text-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
37
+ <xs:element name="printed-text-edition" minOccurs="0" maxOccurs="1" type="xs:string"/>
38
+ <xs:element name="printed-text-publisher" minOccurs="0" maxOccurs="1" type="xs:string"/>
39
+ <xs:element name="printed-text-place" minOccurs="0" maxOccurs="1" type="xs:string"/>
40
+ <xs:element name="printed-text-date" minOccurs="0" maxOccurs="1" type="xs:string"/>
41
+
42
+ <xs:element name="div" minOccurs="1" maxOccurs="unbounded" type="Div"/>
43
+ </xs:sequence>
44
+
45
+ <xs:attribute name="id" type="xs:string" use="required"/>
46
+ <xs:attribute name="alignment-id" type="xs:string" use="optional"/>
47
+ <xs:attribute name="language" type="xs:string" use="required"/>
48
+ </xs:complexType>
49
+
50
+ <xs:complexType name="Div">
51
+ <xs:sequence>
52
+ <xs:element name="title" minOccurs="1" maxOccurs="1" type="xs:string"/>
53
+ <xs:element name="sentence" minOccurs="1" maxOccurs="unbounded" type="Sentence"/>
54
+ </xs:sequence>
55
+
56
+ <xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
57
+ <xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
58
+ <xs:attribute name="presentation-before" type="xs:string" use="optional"/>
59
+ <xs:attribute name="presentation-after" type="xs:string" use="optional"/>
60
+ </xs:complexType>
61
+
62
+ <xs:complexType name="Sentence">
63
+ <xs:sequence>
64
+ <xs:element name="token" minOccurs="1" maxOccurs="unbounded" type="Token"/>
65
+ </xs:sequence>
66
+
67
+ <xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
68
+ <xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
69
+ <xs:attribute name="presentation-before" type="xs:string" use="optional"/>
70
+ <xs:attribute name="presentation-after" type="xs:string" use="optional"/>
71
+ <xs:attribute name="status" type="SentenceStatus" use="optional"/>
72
+ <xs:attribute name="annotated-by" type="xs:string" use="optional"/>
73
+ <xs:attribute name="annotated-at" type="xs:dateTime" use="optional"/>
74
+ <xs:attribute name="reviewed-by" type="xs:string" use="optional"/>
75
+ <xs:attribute name="reviewed-at" type="xs:dateTime" use="optional"/>
76
+ </xs:complexType>
77
+
78
+ <xs:simpleType name="SentenceStatus">
79
+ <xs:restriction base="xs:string">
80
+ <xs:enumeration value="annotated"/>
81
+ <xs:enumeration value="reviewed"/>
82
+ <xs:enumeration value="unannotated"/>
83
+ </xs:restriction>
84
+ </xs:simpleType>
85
+
86
+ <xs:complexType name="Token">
87
+ <xs:sequence>
88
+ <xs:element name="slash" minOccurs="0" maxOccurs="unbounded" type="Slash"/>
89
+ </xs:sequence>
90
+
91
+ <xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
92
+ <xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
93
+ <xs:attribute name="lemma" type="xs:string" use="optional"/>
94
+ <xs:attribute name="part-of-speech" type="xs:string" use="optional"/>
95
+ <xs:attribute name="morphology" type="xs:string" use="optional"/>
96
+ <xs:attribute name="citation-part" type="xs:string" use="optional"/>
97
+ <xs:attribute name="relation" type="xs:string" use="optional"/>
98
+ <xs:attribute name="head-id" type="xs:nonNegativeInteger" use="optional"/>
99
+ <xs:attribute name="information-status" type="xs:string" use="optional"/>
100
+ <xs:attribute name="antecedent-id" type="xs:nonNegativeInteger" use="optional"/>
101
+ <xs:attribute name="contrast-group" type="xs:string" use="optional"/>
102
+ <xs:attribute name="foreign-ids" type="xs:string" use="optional"/>
103
+
104
+ <!-- XSD does not allow us to constrain the use of these attributes properly so they are marked optional even though their occurrence depends on the value of empty-token-sort and form. -->
105
+ <xs:attribute name="empty-token-sort" type="EmptyTokenSort" use="optional"/>
106
+ <xs:attribute name="form" type="xs:string" use="optional"/>
107
+ <xs:attribute name="presentation-before" type="xs:string" use="optional"/>
108
+ <xs:attribute name="presentation-after" type="xs:string" use="optional"/>
109
+ </xs:complexType>
110
+
111
+ <xs:simpleType name="EmptyTokenSort">
112
+ <xs:restriction base="xs:string">
113
+ <xs:enumeration value="P"/>
114
+ <xs:enumeration value="C"/>
115
+ <xs:enumeration value="V"/>
116
+ </xs:restriction>
117
+ </xs:simpleType>
118
+
119
+ <xs:complexType name="Slash">
120
+ <xs:attribute name="target-id" type="xs:nonNegativeInteger" use="required"/>
121
+ <xs:attribute name="relation" type="xs:string" use="required"/>
122
+ </xs:complexType>
123
+
124
+ <xs:complexType name="PartOfSpeechValue">
125
+ <xs:attribute name="tag" type="xs:string" use="required"/>
126
+ <xs:attribute name="summary" type="xs:string" use="required"/>
127
+ </xs:complexType>
128
+
129
+ <xs:complexType name="PartsOfSpeech">
130
+ <xs:sequence>
131
+ <xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='PartOfSpeechValue'/>
132
+ </xs:sequence>
133
+ </xs:complexType>
134
+
135
+ <xs:complexType name="InformationStatusValue">
136
+ <xs:attribute name="tag" type="xs:string" use="required"/>
137
+ <xs:attribute name="summary" type="xs:string" use="required"/>
138
+ </xs:complexType>
139
+
140
+ <xs:complexType name="InformationStatuses">
141
+ <xs:sequence>
142
+ <xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='InformationStatusValue'/>
143
+ </xs:sequence>
144
+ </xs:complexType>
145
+
146
+ <xs:complexType name="RelationValue">
147
+ <xs:attribute name="tag" type="xs:string" use="required"/>
148
+ <xs:attribute name="summary" type="xs:string" use="required"/>
149
+ <xs:attribute name="primary" type="xs:boolean" use="required"/>
150
+ <xs:attribute name="secondary" type="xs:boolean" use="required"/>
151
+ </xs:complexType>
152
+
153
+ <xs:complexType name="Relations">
154
+ <xs:sequence>
155
+ <xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='RelationValue'/>
156
+ </xs:sequence>
157
+ </xs:complexType>
158
+
159
+ <xs:complexType name="MorphologyValue">
160
+ <xs:attribute name="tag" type="xs:string" use="required"/>
161
+ <xs:attribute name="summary" type="xs:string" use="required"/>
162
+ </xs:complexType>
163
+
164
+ <xs:complexType name="MorphologyField">
165
+ <xs:sequence>
166
+ <xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='MorphologyValue'/>
167
+ </xs:sequence>
168
+
169
+ <xs:attribute name="tag" type="xs:string" use="required"/>
170
+ </xs:complexType>
171
+
172
+ <xs:complexType name="Morphology">
173
+ <xs:sequence>
174
+ <xs:element name='field' minOccurs="1" maxOccurs="unbounded" type='MorphologyField'/>
175
+ </xs:sequence>
176
+ </xs:complexType>
177
+
178
+ <xs:complexType name="Annotation">
179
+ <xs:sequence>
180
+ <xs:element name='relations' minOccurs="1" maxOccurs="1" type='Relations'/>
181
+ <xs:element name='parts-of-speech' minOccurs="1" maxOccurs="1" type='PartsOfSpeech'/>
182
+ <xs:element name='morphology' minOccurs="1" maxOccurs="1" type='Morphology'/>
183
+ <xs:element name='information-statuses' minOccurs="1" maxOccurs="1" type='InformationStatuses'/>
184
+ </xs:sequence>
185
+ </xs:complexType>
186
+
187
+ <xs:complexType name="Proiel">
188
+ <xs:sequence>
189
+ <xs:element name='annotation' minOccurs="0" maxOccurs="1" type='Annotation'/>
190
+ <xs:element name='source' minOccurs="1" maxOccurs="unbounded" type='Source'/>
191
+ </xs:sequence>
192
+
193
+ <xs:attribute name='export-time' type="xs:dateTime" use="optional"/>
194
+ <xs:attribute name="schema-version" type="xs:decimal" use="required" fixed="2.1"/>
195
+ </xs:complexType>
196
+
197
+ <xs:element name='proiel' type='Proiel'/>
198
+ </xs:schema>
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2016 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -20,6 +20,7 @@ module PROIEL
20
20
  include SAXMachine
21
21
 
22
22
  attribute :id, class: Integer, required: true
23
+ attribute :'alignment-id', as: :alignment_id, class: Integer, required: false
23
24
  attribute :'head-id', as: :head_id, class: Integer
24
25
  attribute :form
25
26
  attribute :lemma
@@ -43,7 +44,12 @@ module PROIEL
43
44
  include SAXMachine
44
45
 
45
46
  attribute :id, class: Integer, required: true
47
+ attribute :'alignment-id', as: :alignment_id, class: Integer, required: false
46
48
  attribute :status, class: Symbol, default: :unannotated
49
+ attribute :'annotated-by', as: :annotated_by, required: false
50
+ attribute :'reviewed-by', as: :reviewed_by, required: false
51
+ attribute :'annotated-at', as: :annotated_at, required: false
52
+ attribute :'reviewed-at', as: :reviewed_at, required: false
47
53
  attribute :'presentation-before', as: :presentation_before
48
54
  attribute :'presentation-after', as: :presentation_after
49
55
 
@@ -54,7 +60,8 @@ module PROIEL
54
60
  class Div
55
61
  include SAXMachine
56
62
 
57
- attribute :id
63
+ attribute :id, class: Integer, required: false
64
+ attribute :'alignment-id', as: :alignment_id, class: Integer, required: false
58
65
  attribute :'presentation-before', as: :presentation_before
59
66
  attribute :'presentation-after', as: :presentation_after
60
67
 
@@ -67,6 +74,7 @@ module PROIEL
67
74
  include SAXMachine
68
75
 
69
76
  attribute :id, required: true
77
+ attribute :'alignment-id', as: :alignment_id, required: false
70
78
  attribute :language, required: true
71
79
 
72
80
  element :title
@@ -16,7 +16,7 @@ module PROIEL
16
16
  # @return [String] schema version number
17
17
  #
18
18
  def self.current_proiel_xml_schema_version
19
- '2.0'
19
+ '2.1'
20
20
  end
21
21
 
22
22
  # Invalid PROIEL XML schema version error.
@@ -39,6 +39,8 @@ module PROIEL
39
39
  case doc.root.attr('schema-version')
40
40
  when '2.0'
41
41
  '2.0'
42
+ when '2.1'
43
+ '2.1'
42
44
  when NilClass
43
45
  '1.0'
44
46
  else
@@ -68,7 +70,7 @@ module PROIEL
68
70
  # @raise ArgumentError
69
71
  #
70
72
  def self.proiel_xml_schema_filename(schema_version)
71
- if schema_version == '1.0' or schema_version == '2.0'
73
+ if schema_version == '1.0' or schema_version == '2.0' or schema_version == '2.1'
72
74
  File.join(File.dirname(__FILE__),
73
75
  "proiel-#{schema_version}",
74
76
  "proiel-#{schema_version}.xsd")
@@ -145,6 +145,16 @@ module PROIEL
145
145
  # Pass 3: verify that all features are defined
146
146
  # TBD
147
147
 
148
+ # Pass 4: alignment_id on div, sentence or token requires an alignment_id on source
149
+ tb.sources.each do |source|
150
+ if source.alignment_id.nil?
151
+ if source.divs.any?(&:alignment_id) or source.sentences.any?(&:alignment_id) or source.tokens.any?(&:alignment_id)
152
+ errors << "Alignment ID(s) on divs, sentences or tokens without alignment ID on source"
153
+ end
154
+ end
155
+ end
156
+
157
+ # Decide if there were any errors
148
158
  if errors.empty?
149
159
  true
150
160
  else
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2016 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -23,8 +23,23 @@ module PROIEL
23
23
  # @return [nil, String] presentation material after sentence
24
24
  attr_reader :presentation_after
25
25
 
26
+ # @return [nil, Integer] ID of the sentence that this sentence is aligned to
27
+ attr_reader :alignment_id
28
+
29
+ # @return [nil, String] annotator of sentence
30
+ attr_reader :annotated_by
31
+
32
+ # @return [nil, String] reviewer of sentence
33
+ attr_reader :reviewed_by
34
+
35
+ # @return [nil, DateTime] time of annotation
36
+ attr_reader :annotated_at
37
+
38
+ # @return [nil, DateTime] time of reviewed
39
+ attr_reader :reviewed_at
40
+
26
41
  # Creates a new sentence object.
27
- def initialize(parent, id, status, presentation_before, presentation_after, &block)
42
+ def initialize(parent, id, status, presentation_before, presentation_after, alignment_id, annotated_by, reviewed_by, annotated_at, reviewed_at, &block)
28
43
  @div = parent
29
44
 
30
45
  raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
@@ -39,6 +54,21 @@ module PROIEL
39
54
  raise ArgumentError, 'string or nil expected' unless presentation_after.nil? or presentation_after.is_a?(String)
40
55
  @presentation_after = presentation_after.freeze
41
56
 
57
+ raise ArgumentError, 'integer or nil expected' unless alignment_id.nil? or alignment_id.is_a?(Integer)
58
+ @alignment_id = alignment_id
59
+
60
+ raise ArgumentError, 'XML schema date time or nil expected' unless annotated_at.nil? or PROIEL::Utilities.xmlschema_datetime?(annotated_at)
61
+ @annotated_at = annotated_at ? DateTime.xmlschema(annotated_at).freeze : nil
62
+
63
+ raise ArgumentError, 'XML schema date time or nil expected' unless reviewed_at.nil? or PROIEL::Utilities.xmlschema_datetime?(reviewed_at)
64
+ @reviewed_at = reviewed_at ? DateTime.xmlschema(reviewed_at).freeze : nil
65
+
66
+ raise ArgumentError, 'string or nil expected' unless annotated_by.nil? or annotated_by.is_a?(String)
67
+ @annotated_by = annotated_by.freeze
68
+
69
+ raise ArgumentError, 'string or nil expected' unless reviewed_by.nil? or reviewed_by.is_a?(String)
70
+ @reviewed_by = reviewed_by.freeze
71
+
42
72
  @children = block.call(self) if block_given?
43
73
  end
44
74
 
data/lib/proiel/source.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2016 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -9,7 +9,7 @@ module PROIEL
9
9
  # @return [String] ID of the source
10
10
  attr_reader :id
11
11
 
12
- # @return [Treebank] treebank that the div belongs to
12
+ # @return [Treebank] treebank that this source belongs to
13
13
  attr_reader :treebank
14
14
 
15
15
  # @return [String] language of the source as an ISO 639-3 language tag
@@ -22,13 +22,20 @@ module PROIEL
22
22
  # @see PROIEL::Treebank::METADATA_ELEMENTS
23
23
  attr_reader :metadata
24
24
 
25
+ # @return [nil, String] ID of the source that this source is aligned to
26
+ attr_reader :alignment_id
27
+
25
28
  # Creates a new source object.
26
- def initialize(parent, id, export_time, language, metadata, &block)
29
+ def initialize(parent, id, export_time, language, metadata, alignment_id, &block)
27
30
  @treebank = parent
28
31
  @id = id.freeze
29
32
  @export_time = DateTime.parse(export_time).freeze
30
33
  @language = language.freeze
31
34
  @metadata = metadata.freeze
35
+
36
+ raise ArgumentError, 'string or nil expected' unless alignment_id.nil? or alignment_id.is_a?(String)
37
+ @alignment_id = alignment_id.freeze
38
+
32
39
  @children = block.call(self) if block_given?
33
40
  end
34
41
 
data/lib/proiel/token.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2016 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -63,11 +63,15 @@ module PROIEL
63
63
  # @return [Array<Array<String,Fixnum>>] secondary edges as an array of pairs of relation tag and target token ID
64
64
  attr_reader :slashes
65
65
 
66
+ # @return [nil, Integer] ID of the sentence that this sentence is aligned to
67
+ attr_reader :alignment_id
68
+
66
69
  # Creates a new token object.
67
70
  def initialize(parent, id, head_id, form, lemma, part_of_speech,
68
71
  morphology, relation, empty_token_sort, citation_part,
69
72
  presentation_before, presentation_after, antecedent_id,
70
- information_status, contrast_group, foreign_ids, slashes)
73
+ information_status, contrast_group, foreign_ids, slashes,
74
+ alignment_id)
71
75
  @sentence = parent
72
76
 
73
77
  raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
@@ -117,6 +121,9 @@ module PROIEL
117
121
 
118
122
  raise ArgumentError, 'array expected' unless slashes.is_a?(Array)
119
123
  @slashes = slashes.map { |s| [s.relation.freeze, s.target_id] }
124
+
125
+ raise ArgumentError, 'integer or nil expected' unless alignment_id.nil? or alignment_id.is_a?(Integer)
126
+ @alignment_id = alignment_id
120
127
  end
121
128
 
122
129
  # @return [Div] parent div object
@@ -86,7 +86,7 @@ module PROIEL
86
86
 
87
87
  tf.proiel.sources.each do |s|
88
88
  @sources << Source.new(self, s.id, tf.proiel.export_time, s.language,
89
- bundle_metadata(s)) do |source|
89
+ bundle_metadata(s), s.alignment_id) do |source|
90
90
  build_divs(s, source)
91
91
  end
92
92
 
@@ -165,10 +165,11 @@ module PROIEL
165
165
  end
166
166
 
167
167
  def build_divs(s, source)
168
- # FIXME: for PROIEL XML > 2.0, we should respect d.id
168
+ # For PROIEL XML 2.0 we generate an ID, for PROIEL XML >= 2.1 we respect the ID
169
+ # from the XML file.
169
170
  s.divs.each_with_index.map do |d, i|
170
- Div.new(source, i + 1, d.title, d.presentation_before,
171
- d.presentation_after) do |div|
171
+ Div.new(source, d.id || i + 1, d.title, d.presentation_before,
172
+ d.presentation_after, d.alignment_id) do |div|
172
173
  build_sentences(d, div)
173
174
  end
174
175
  end
@@ -177,7 +178,9 @@ module PROIEL
177
178
  def build_sentences(d, div)
178
179
  d.sentences.map do |e|
179
180
  Sentence.new(div, e.id, e.status, e.presentation_before,
180
- e.presentation_after) do |sentence|
181
+ e.presentation_after, e.alignment_id,
182
+ e.annotated_by, e.reviewed_by, e.annotated_at,
183
+ e.reviewed_at) do |sentence|
181
184
  build_tokens(e, sentence)
182
185
  end
183
186
  end
@@ -191,7 +194,7 @@ module PROIEL
191
194
  t.presentation_before, t.presentation_after,
192
195
  t.antecedent_id, t.information_status,
193
196
  t.contrast_group, t.foreign_ids,
194
- t.slashes)
197
+ t.slashes, t.alignment_id)
195
198
  end
196
199
  end
197
200
 
@@ -0,0 +1,16 @@
1
+ module PROIEL
2
+ module Utilities
3
+ def self.xmlschema_datetime?(s)
4
+ DateTime.xmlschema(s)
5
+
6
+ true
7
+ rescue ArgumentError => e
8
+ if e.message == 'invalid date'
9
+ false
10
+ else
11
+ raise e
12
+ end
13
+ end
14
+ end
15
+ end
16
+
@@ -5,5 +5,5 @@
5
5
  #++
6
6
  module PROIEL
7
7
  # Gem version
8
- VERSION = '1.0.1'
8
+ VERSION = '1.1.0'
9
9
  end
data/lib/proiel.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2016 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -12,6 +12,7 @@ require 'memoist'
12
12
  require 'nokogiri'
13
13
 
14
14
  require 'proiel/version'
15
+ require 'proiel/utils'
15
16
  require 'proiel/citations'
16
17
  require 'proiel/statistics'
17
18
  require 'proiel/tokenization'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proiel
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marius L. Jøhndal
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-15 00:00:00.000000000 Z
11
+ date: 2016-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -167,15 +167,11 @@ files:
167
167
  - lib/proiel/citations.rb
168
168
  - lib/proiel/div.rb
169
169
  - lib/proiel/positional_tag.rb
170
- - lib/proiel/proiel_xml/proiel-1.0/proiel-1.0.md
171
170
  - lib/proiel/proiel_xml/proiel-1.0/proiel-1.0.xsd
172
171
  - lib/proiel/proiel_xml/proiel-1.0/teilite.xsd
173
172
  - lib/proiel/proiel_xml/proiel-1.0/xml.xsd
174
- - lib/proiel/proiel_xml/proiel-2.0/XMLSchema.xsd
175
- - lib/proiel/proiel_xml/proiel-2.0/make
176
- - lib/proiel/proiel_xml/proiel-2.0/proiel-2.0.html
177
173
  - lib/proiel/proiel_xml/proiel-2.0/proiel-2.0.xsd
178
- - lib/proiel/proiel_xml/proiel-2.0/xs3p.xsl
174
+ - lib/proiel/proiel_xml/proiel-2.1/proiel-2.1.xsd
179
175
  - lib/proiel/proiel_xml/reader.rb
180
176
  - lib/proiel/proiel_xml/schema.rb
181
177
  - lib/proiel/proiel_xml/validator.rb
@@ -186,6 +182,7 @@ files:
186
182
  - lib/proiel/tokenization.rb
187
183
  - lib/proiel/treebank.rb
188
184
  - lib/proiel/treebank_object.rb
185
+ - lib/proiel/utils.rb
189
186
  - lib/proiel/version.rb
190
187
  homepage: http://proiel.github.com
191
188
  licenses:
@@ -1,16 +0,0 @@
1
- # The PROIEL XML format
2
-
3
- ## `token`
4
-
5
- ### `lemma` attribute (string, optional)
6
-
7
- When it is necessary to distinguish lemmas with the same textual form, the
8
- PROIEL XML convention is use the associated part of speech to distinguish them.
9
-
10
- If there are multiple lemmas with the same textual form and the same part of
11
- speech, the convention is to append `#` and a positive, non-zero integer:
12
-
13
- ```
14
- quod#1
15
- quod#2
16
- ```