proiel 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 14098ee93fc7b4faf6ce3a3f0c4cb17ffab29e31
4
- data.tar.gz: 8a010d9fd1a146d2e4091018b6357ba156792afd
3
+ metadata.gz: 39313c422eb3b2d2f3ad565c0cde3cbd3ddb5271
4
+ data.tar.gz: 1edadad95bbaad82d4d7ab1a9cc409f8e80d3a74
5
5
  SHA512:
6
- metadata.gz: b8f4164437ce019c8f86122bbe15e8af12b5ae2ef6e37fc05fe6b180f5771d98e4b92db56bd845f9ac6551e97b5a6733dbe8b6eb034081e0dac6c4d2d27d7366
7
- data.tar.gz: 48c062decf07fff6149b413cdf96489d03ddffd916d92d6952a7eb15d6043fa26fada9902367dd3d1f50bba77479bf9ad875e000a3a1b32866ea64c845aac87e
6
+ metadata.gz: 41bf1b5bcb3c8d8318128ea146b2609d02942d711553876d71c29cafc948312e79e8cd2e448fef751ca25c685c3f0d57a924004a46bdb3496a8f9913772e3e48
7
+ data.tar.gz: add1511098c62bdd4ee59fdd53e55b4b331595a5a5e02320e97dadff194e8b0b96fb24bd48511a02933d70a83208e0f5d40093a49eec959917177bf59589cbb7
data/lib/proiel/div.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2016 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -28,8 +28,11 @@ module PROIEL
28
28
  # @return [nil, String] presentation material after form
29
29
  attr_reader :presentation_after
30
30
 
31
+ # @return [nil, String] ID of the div that this div is aligned to
32
+ attr_reader :alignment_id
33
+
31
34
  # Creates a new div object.
32
- def initialize(parent, id, title, presentation_before, presentation_after, &block)
35
+ def initialize(parent, id, title, presentation_before, presentation_after, alignment_id, &block)
33
36
  @source = parent
34
37
 
35
38
  raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
@@ -44,6 +47,9 @@ module PROIEL
44
47
  raise ArgumentError, 'string or nil expected' unless presentation_after.nil? or presentation_after.is_a?(String)
45
48
  @presentation_after = presentation_after.freeze
46
49
 
50
+ raise ArgumentError, 'integer or nil expected' unless alignment_id.nil? or alignment_id.is_a?(Integer)
51
+ @alignment_id = alignment_id
52
+
47
53
  @children = block.call(self) if block_given?
48
54
  end
49
55
 
@@ -0,0 +1,198 @@
1
+ <?xml version="1.0"?>
2
+
3
+ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
4
+ <xs:annotation>
5
+ <xs:documentation>PROIEL XML format version 2.1</xs:documentation>
6
+ </xs:annotation>
7
+
8
+ <xs:complexType name="Source">
9
+ <xs:sequence>
10
+ <xs:element name="title" minOccurs="1" maxOccurs="1" type="xs:string"/>
11
+ <xs:element name="author" minOccurs="0" maxOccurs="1" type="xs:string"/>
12
+ <xs:element name="citation-part" minOccurs="1" maxOccurs="1" type="xs:string"/>
13
+ <xs:element name="principal" minOccurs="0" maxOccurs="1" type="xs:string"/>
14
+ <xs:element name="funder" minOccurs="0" maxOccurs="1" type="xs:string"/>
15
+ <xs:element name="distributor" minOccurs="0" maxOccurs="1" type="xs:string"/>
16
+ <xs:element name="distributor-address" minOccurs="0" maxOccurs="1" type="xs:string"/>
17
+ <xs:element name="address" minOccurs="0" maxOccurs="1" type="xs:string"/>
18
+ <xs:element name="date" minOccurs="0" maxOccurs="1" type="xs:string"/>
19
+ <xs:element name="license" minOccurs="0" maxOccurs="1" type="xs:string"/>
20
+ <xs:element name="license-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
21
+ <xs:element name="reference-system" minOccurs="0" maxOccurs="1" type="xs:string"/>
22
+ <xs:element name="editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
23
+ <xs:element name="editorial-note" minOccurs="0" maxOccurs="1" type="xs:string"/>
24
+ <xs:element name="annotator" minOccurs="0" maxOccurs="1" type="xs:string"/>
25
+ <xs:element name="reviewer" minOccurs="0" maxOccurs="1" type="xs:string"/>
26
+ <xs:element name="electronic-text-editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
27
+ <xs:element name="electronic-text-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
28
+ <xs:element name="electronic-text-version" minOccurs="0" maxOccurs="1" type="xs:string"/>
29
+ <xs:element name="electronic-text-publisher" minOccurs="0" maxOccurs="1" type="xs:string"/>
30
+ <xs:element name="electronic-text-place" minOccurs="0" maxOccurs="1" type="xs:string"/>
31
+ <xs:element name="electronic-text-date" minOccurs="0" maxOccurs="1" type="xs:string"/>
32
+ <xs:element name="electronic-text-original-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
33
+ <xs:element name="electronic-text-license" minOccurs="0" maxOccurs="1" type="xs:string"/>
34
+ <xs:element name="electronic-text-license-url" minOccurs="0" maxOccurs="1" type="xs:string"/>
35
+ <xs:element name="printed-text-editor" minOccurs="0" maxOccurs="1" type="xs:string"/>
36
+ <xs:element name="printed-text-title" minOccurs="0" maxOccurs="1" type="xs:string"/>
37
+ <xs:element name="printed-text-edition" minOccurs="0" maxOccurs="1" type="xs:string"/>
38
+ <xs:element name="printed-text-publisher" minOccurs="0" maxOccurs="1" type="xs:string"/>
39
+ <xs:element name="printed-text-place" minOccurs="0" maxOccurs="1" type="xs:string"/>
40
+ <xs:element name="printed-text-date" minOccurs="0" maxOccurs="1" type="xs:string"/>
41
+
42
+ <xs:element name="div" minOccurs="1" maxOccurs="unbounded" type="Div"/>
43
+ </xs:sequence>
44
+
45
+ <xs:attribute name="id" type="xs:string" use="required"/>
46
+ <xs:attribute name="alignment-id" type="xs:string" use="optional"/>
47
+ <xs:attribute name="language" type="xs:string" use="required"/>
48
+ </xs:complexType>
49
+
50
+ <xs:complexType name="Div">
51
+ <xs:sequence>
52
+ <xs:element name="title" minOccurs="1" maxOccurs="1" type="xs:string"/>
53
+ <xs:element name="sentence" minOccurs="1" maxOccurs="unbounded" type="Sentence"/>
54
+ </xs:sequence>
55
+
56
+ <xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
57
+ <xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
58
+ <xs:attribute name="presentation-before" type="xs:string" use="optional"/>
59
+ <xs:attribute name="presentation-after" type="xs:string" use="optional"/>
60
+ </xs:complexType>
61
+
62
+ <xs:complexType name="Sentence">
63
+ <xs:sequence>
64
+ <xs:element name="token" minOccurs="1" maxOccurs="unbounded" type="Token"/>
65
+ </xs:sequence>
66
+
67
+ <xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
68
+ <xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
69
+ <xs:attribute name="presentation-before" type="xs:string" use="optional"/>
70
+ <xs:attribute name="presentation-after" type="xs:string" use="optional"/>
71
+ <xs:attribute name="status" type="SentenceStatus" use="optional"/>
72
+ <xs:attribute name="annotated-by" type="xs:string" use="optional"/>
73
+ <xs:attribute name="annotated-at" type="xs:dateTime" use="optional"/>
74
+ <xs:attribute name="reviewed-by" type="xs:string" use="optional"/>
75
+ <xs:attribute name="reviewed-at" type="xs:dateTime" use="optional"/>
76
+ </xs:complexType>
77
+
78
+ <xs:simpleType name="SentenceStatus">
79
+ <xs:restriction base="xs:string">
80
+ <xs:enumeration value="annotated"/>
81
+ <xs:enumeration value="reviewed"/>
82
+ <xs:enumeration value="unannotated"/>
83
+ </xs:restriction>
84
+ </xs:simpleType>
85
+
86
+ <xs:complexType name="Token">
87
+ <xs:sequence>
88
+ <xs:element name="slash" minOccurs="0" maxOccurs="unbounded" type="Slash"/>
89
+ </xs:sequence>
90
+
91
+ <xs:attribute name="id" type="xs:nonNegativeInteger" use="optional"/>
92
+ <xs:attribute name="alignment-id" type="xs:nonNegativeInteger" use="optional"/>
93
+ <xs:attribute name="lemma" type="xs:string" use="optional"/>
94
+ <xs:attribute name="part-of-speech" type="xs:string" use="optional"/>
95
+ <xs:attribute name="morphology" type="xs:string" use="optional"/>
96
+ <xs:attribute name="citation-part" type="xs:string" use="optional"/>
97
+ <xs:attribute name="relation" type="xs:string" use="optional"/>
98
+ <xs:attribute name="head-id" type="xs:nonNegativeInteger" use="optional"/>
99
+ <xs:attribute name="information-status" type="xs:string" use="optional"/>
100
+ <xs:attribute name="antecedent-id" type="xs:nonNegativeInteger" use="optional"/>
101
+ <xs:attribute name="contrast-group" type="xs:string" use="optional"/>
102
+ <xs:attribute name="foreign-ids" type="xs:string" use="optional"/>
103
+
104
+ <!-- XSD does not allow us to constrain the use of these attributes properly so they are marked optional even though their occurrence depends on the value of empty-token-sort and form. -->
105
+ <xs:attribute name="empty-token-sort" type="EmptyTokenSort" use="optional"/>
106
+ <xs:attribute name="form" type="xs:string" use="optional"/>
107
+ <xs:attribute name="presentation-before" type="xs:string" use="optional"/>
108
+ <xs:attribute name="presentation-after" type="xs:string" use="optional"/>
109
+ </xs:complexType>
110
+
111
+ <xs:simpleType name="EmptyTokenSort">
112
+ <xs:restriction base="xs:string">
113
+ <xs:enumeration value="P"/>
114
+ <xs:enumeration value="C"/>
115
+ <xs:enumeration value="V"/>
116
+ </xs:restriction>
117
+ </xs:simpleType>
118
+
119
+ <xs:complexType name="Slash">
120
+ <xs:attribute name="target-id" type="xs:nonNegativeInteger" use="required"/>
121
+ <xs:attribute name="relation" type="xs:string" use="required"/>
122
+ </xs:complexType>
123
+
124
+ <xs:complexType name="PartOfSpeechValue">
125
+ <xs:attribute name="tag" type="xs:string" use="required"/>
126
+ <xs:attribute name="summary" type="xs:string" use="required"/>
127
+ </xs:complexType>
128
+
129
+ <xs:complexType name="PartsOfSpeech">
130
+ <xs:sequence>
131
+ <xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='PartOfSpeechValue'/>
132
+ </xs:sequence>
133
+ </xs:complexType>
134
+
135
+ <xs:complexType name="InformationStatusValue">
136
+ <xs:attribute name="tag" type="xs:string" use="required"/>
137
+ <xs:attribute name="summary" type="xs:string" use="required"/>
138
+ </xs:complexType>
139
+
140
+ <xs:complexType name="InformationStatuses">
141
+ <xs:sequence>
142
+ <xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='InformationStatusValue'/>
143
+ </xs:sequence>
144
+ </xs:complexType>
145
+
146
+ <xs:complexType name="RelationValue">
147
+ <xs:attribute name="tag" type="xs:string" use="required"/>
148
+ <xs:attribute name="summary" type="xs:string" use="required"/>
149
+ <xs:attribute name="primary" type="xs:boolean" use="required"/>
150
+ <xs:attribute name="secondary" type="xs:boolean" use="required"/>
151
+ </xs:complexType>
152
+
153
+ <xs:complexType name="Relations">
154
+ <xs:sequence>
155
+ <xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='RelationValue'/>
156
+ </xs:sequence>
157
+ </xs:complexType>
158
+
159
+ <xs:complexType name="MorphologyValue">
160
+ <xs:attribute name="tag" type="xs:string" use="required"/>
161
+ <xs:attribute name="summary" type="xs:string" use="required"/>
162
+ </xs:complexType>
163
+
164
+ <xs:complexType name="MorphologyField">
165
+ <xs:sequence>
166
+ <xs:element name='value' minOccurs="1" maxOccurs="unbounded" type='MorphologyValue'/>
167
+ </xs:sequence>
168
+
169
+ <xs:attribute name="tag" type="xs:string" use="required"/>
170
+ </xs:complexType>
171
+
172
+ <xs:complexType name="Morphology">
173
+ <xs:sequence>
174
+ <xs:element name='field' minOccurs="1" maxOccurs="unbounded" type='MorphologyField'/>
175
+ </xs:sequence>
176
+ </xs:complexType>
177
+
178
+ <xs:complexType name="Annotation">
179
+ <xs:sequence>
180
+ <xs:element name='relations' minOccurs="1" maxOccurs="1" type='Relations'/>
181
+ <xs:element name='parts-of-speech' minOccurs="1" maxOccurs="1" type='PartsOfSpeech'/>
182
+ <xs:element name='morphology' minOccurs="1" maxOccurs="1" type='Morphology'/>
183
+ <xs:element name='information-statuses' minOccurs="1" maxOccurs="1" type='InformationStatuses'/>
184
+ </xs:sequence>
185
+ </xs:complexType>
186
+
187
+ <xs:complexType name="Proiel">
188
+ <xs:sequence>
189
+ <xs:element name='annotation' minOccurs="0" maxOccurs="1" type='Annotation'/>
190
+ <xs:element name='source' minOccurs="1" maxOccurs="unbounded" type='Source'/>
191
+ </xs:sequence>
192
+
193
+ <xs:attribute name='export-time' type="xs:dateTime" use="optional"/>
194
+ <xs:attribute name="schema-version" type="xs:decimal" use="required" fixed="2.1"/>
195
+ </xs:complexType>
196
+
197
+ <xs:element name='proiel' type='Proiel'/>
198
+ </xs:schema>
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2016 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -20,6 +20,7 @@ module PROIEL
20
20
  include SAXMachine
21
21
 
22
22
  attribute :id, class: Integer, required: true
23
+ attribute :'alignment-id', as: :alignment_id, class: Integer, required: false
23
24
  attribute :'head-id', as: :head_id, class: Integer
24
25
  attribute :form
25
26
  attribute :lemma
@@ -43,7 +44,12 @@ module PROIEL
43
44
  include SAXMachine
44
45
 
45
46
  attribute :id, class: Integer, required: true
47
+ attribute :'alignment-id', as: :alignment_id, class: Integer, required: false
46
48
  attribute :status, class: Symbol, default: :unannotated
49
+ attribute :'annotated-by', as: :annotated_by, required: false
50
+ attribute :'reviewed-by', as: :reviewed_by, required: false
51
+ attribute :'annotated-at', as: :annotated_at, required: false
52
+ attribute :'reviewed-at', as: :reviewed_at, required: false
47
53
  attribute :'presentation-before', as: :presentation_before
48
54
  attribute :'presentation-after', as: :presentation_after
49
55
 
@@ -54,7 +60,8 @@ module PROIEL
54
60
  class Div
55
61
  include SAXMachine
56
62
 
57
- attribute :id
63
+ attribute :id, class: Integer, required: false
64
+ attribute :'alignment-id', as: :alignment_id, class: Integer, required: false
58
65
  attribute :'presentation-before', as: :presentation_before
59
66
  attribute :'presentation-after', as: :presentation_after
60
67
 
@@ -67,6 +74,7 @@ module PROIEL
67
74
  include SAXMachine
68
75
 
69
76
  attribute :id, required: true
77
+ attribute :'alignment-id', as: :alignment_id, required: false
70
78
  attribute :language, required: true
71
79
 
72
80
  element :title
@@ -16,7 +16,7 @@ module PROIEL
16
16
  # @return [String] schema version number
17
17
  #
18
18
  def self.current_proiel_xml_schema_version
19
- '2.0'
19
+ '2.1'
20
20
  end
21
21
 
22
22
  # Invalid PROIEL XML schema version error.
@@ -39,6 +39,8 @@ module PROIEL
39
39
  case doc.root.attr('schema-version')
40
40
  when '2.0'
41
41
  '2.0'
42
+ when '2.1'
43
+ '2.1'
42
44
  when NilClass
43
45
  '1.0'
44
46
  else
@@ -68,7 +70,7 @@ module PROIEL
68
70
  # @raise ArgumentError
69
71
  #
70
72
  def self.proiel_xml_schema_filename(schema_version)
71
- if schema_version == '1.0' or schema_version == '2.0'
73
+ if schema_version == '1.0' or schema_version == '2.0' or schema_version == '2.1'
72
74
  File.join(File.dirname(__FILE__),
73
75
  "proiel-#{schema_version}",
74
76
  "proiel-#{schema_version}.xsd")
@@ -145,6 +145,16 @@ module PROIEL
145
145
  # Pass 3: verify that all features are defined
146
146
  # TBD
147
147
 
148
+ # Pass 4: alignment_id on div, sentence or token requires an alignment_id on source
149
+ tb.sources.each do |source|
150
+ if source.alignment_id.nil?
151
+ if source.divs.any?(&:alignment_id) or source.sentences.any?(&:alignment_id) or source.tokens.any?(&:alignment_id)
152
+ errors << "Alignment ID(s) on divs, sentences or tokens without alignment ID on source"
153
+ end
154
+ end
155
+ end
156
+
157
+ # Decide if there were any errors
148
158
  if errors.empty?
149
159
  true
150
160
  else
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2016 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -23,8 +23,23 @@ module PROIEL
23
23
  # @return [nil, String] presentation material after sentence
24
24
  attr_reader :presentation_after
25
25
 
26
+ # @return [nil, Integer] ID of the sentence that this sentence is aligned to
27
+ attr_reader :alignment_id
28
+
29
+ # @return [nil, String] annotator of sentence
30
+ attr_reader :annotated_by
31
+
32
+ # @return [nil, String] reviewer of sentence
33
+ attr_reader :reviewed_by
34
+
35
+ # @return [nil, DateTime] time of annotation
36
+ attr_reader :annotated_at
37
+
38
+ # @return [nil, DateTime] time of reviewed
39
+ attr_reader :reviewed_at
40
+
26
41
  # Creates a new sentence object.
27
- def initialize(parent, id, status, presentation_before, presentation_after, &block)
42
+ def initialize(parent, id, status, presentation_before, presentation_after, alignment_id, annotated_by, reviewed_by, annotated_at, reviewed_at, &block)
28
43
  @div = parent
29
44
 
30
45
  raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
@@ -39,6 +54,21 @@ module PROIEL
39
54
  raise ArgumentError, 'string or nil expected' unless presentation_after.nil? or presentation_after.is_a?(String)
40
55
  @presentation_after = presentation_after.freeze
41
56
 
57
+ raise ArgumentError, 'integer or nil expected' unless alignment_id.nil? or alignment_id.is_a?(Integer)
58
+ @alignment_id = alignment_id
59
+
60
+ raise ArgumentError, 'XML schema date time or nil expected' unless annotated_at.nil? or PROIEL::Utilities.xmlschema_datetime?(annotated_at)
61
+ @annotated_at = annotated_at ? DateTime.xmlschema(annotated_at).freeze : nil
62
+
63
+ raise ArgumentError, 'XML schema date time or nil expected' unless reviewed_at.nil? or PROIEL::Utilities.xmlschema_datetime?(reviewed_at)
64
+ @reviewed_at = reviewed_at ? DateTime.xmlschema(reviewed_at).freeze : nil
65
+
66
+ raise ArgumentError, 'string or nil expected' unless annotated_by.nil? or annotated_by.is_a?(String)
67
+ @annotated_by = annotated_by.freeze
68
+
69
+ raise ArgumentError, 'string or nil expected' unless reviewed_by.nil? or reviewed_by.is_a?(String)
70
+ @reviewed_by = reviewed_by.freeze
71
+
42
72
  @children = block.call(self) if block_given?
43
73
  end
44
74
 
data/lib/proiel/source.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2016 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -9,7 +9,7 @@ module PROIEL
9
9
  # @return [String] ID of the source
10
10
  attr_reader :id
11
11
 
12
- # @return [Treebank] treebank that the div belongs to
12
+ # @return [Treebank] treebank that this source belongs to
13
13
  attr_reader :treebank
14
14
 
15
15
  # @return [String] language of the source as an ISO 639-3 language tag
@@ -22,13 +22,20 @@ module PROIEL
22
22
  # @see PROIEL::Treebank::METADATA_ELEMENTS
23
23
  attr_reader :metadata
24
24
 
25
+ # @return [nil, String] ID of the source that this source is aligned to
26
+ attr_reader :alignment_id
27
+
25
28
  # Creates a new source object.
26
- def initialize(parent, id, export_time, language, metadata, &block)
29
+ def initialize(parent, id, export_time, language, metadata, alignment_id, &block)
27
30
  @treebank = parent
28
31
  @id = id.freeze
29
32
  @export_time = DateTime.parse(export_time).freeze
30
33
  @language = language.freeze
31
34
  @metadata = metadata.freeze
35
+
36
+ raise ArgumentError, 'string or nil expected' unless alignment_id.nil? or alignment_id.is_a?(String)
37
+ @alignment_id = alignment_id.freeze
38
+
32
39
  @children = block.call(self) if block_given?
33
40
  end
34
41
 
data/lib/proiel/token.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2016 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -63,11 +63,15 @@ module PROIEL
63
63
  # @return [Array<Array<String,Fixnum>>] secondary edges as an array of pairs of relation tag and target token ID
64
64
  attr_reader :slashes
65
65
 
66
+ # @return [nil, Integer] ID of the sentence that this sentence is aligned to
67
+ attr_reader :alignment_id
68
+
66
69
  # Creates a new token object.
67
70
  def initialize(parent, id, head_id, form, lemma, part_of_speech,
68
71
  morphology, relation, empty_token_sort, citation_part,
69
72
  presentation_before, presentation_after, antecedent_id,
70
- information_status, contrast_group, foreign_ids, slashes)
73
+ information_status, contrast_group, foreign_ids, slashes,
74
+ alignment_id)
71
75
  @sentence = parent
72
76
 
73
77
  raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
@@ -117,6 +121,9 @@ module PROIEL
117
121
 
118
122
  raise ArgumentError, 'array expected' unless slashes.is_a?(Array)
119
123
  @slashes = slashes.map { |s| [s.relation.freeze, s.target_id] }
124
+
125
+ raise ArgumentError, 'integer or nil expected' unless alignment_id.nil? or alignment_id.is_a?(Integer)
126
+ @alignment_id = alignment_id
120
127
  end
121
128
 
122
129
  # @return [Div] parent div object
@@ -86,7 +86,7 @@ module PROIEL
86
86
 
87
87
  tf.proiel.sources.each do |s|
88
88
  @sources << Source.new(self, s.id, tf.proiel.export_time, s.language,
89
- bundle_metadata(s)) do |source|
89
+ bundle_metadata(s), s.alignment_id) do |source|
90
90
  build_divs(s, source)
91
91
  end
92
92
 
@@ -165,10 +165,11 @@ module PROIEL
165
165
  end
166
166
 
167
167
  def build_divs(s, source)
168
- # FIXME: for PROIEL XML > 2.0, we should respect d.id
168
+ # For PROIEL XML 2.0 we generate an ID, for PROIEL XML >= 2.1 we respect the ID
169
+ # from the XML file.
169
170
  s.divs.each_with_index.map do |d, i|
170
- Div.new(source, i + 1, d.title, d.presentation_before,
171
- d.presentation_after) do |div|
171
+ Div.new(source, d.id || i + 1, d.title, d.presentation_before,
172
+ d.presentation_after, d.alignment_id) do |div|
172
173
  build_sentences(d, div)
173
174
  end
174
175
  end
@@ -177,7 +178,9 @@ module PROIEL
177
178
  def build_sentences(d, div)
178
179
  d.sentences.map do |e|
179
180
  Sentence.new(div, e.id, e.status, e.presentation_before,
180
- e.presentation_after) do |sentence|
181
+ e.presentation_after, e.alignment_id,
182
+ e.annotated_by, e.reviewed_by, e.annotated_at,
183
+ e.reviewed_at) do |sentence|
181
184
  build_tokens(e, sentence)
182
185
  end
183
186
  end
@@ -191,7 +194,7 @@ module PROIEL
191
194
  t.presentation_before, t.presentation_after,
192
195
  t.antecedent_id, t.information_status,
193
196
  t.contrast_group, t.foreign_ids,
194
- t.slashes)
197
+ t.slashes, t.alignment_id)
195
198
  end
196
199
  end
197
200
 
@@ -0,0 +1,16 @@
1
+ module PROIEL
2
+ module Utilities
3
+ def self.xmlschema_datetime?(s)
4
+ DateTime.xmlschema(s)
5
+
6
+ true
7
+ rescue ArgumentError => e
8
+ if e.message == 'invalid date'
9
+ false
10
+ else
11
+ raise e
12
+ end
13
+ end
14
+ end
15
+ end
16
+
@@ -5,5 +5,5 @@
5
5
  #++
6
6
  module PROIEL
7
7
  # Gem version
8
- VERSION = '1.0.1'
8
+ VERSION = '1.1.0'
9
9
  end
data/lib/proiel.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2015 Marius L. Jøhndal
2
+ # Copyright (c) 2015-2016 Marius L. Jøhndal
3
3
  #
4
4
  # See LICENSE in the top-level source directory for licensing terms.
5
5
  #++
@@ -12,6 +12,7 @@ require 'memoist'
12
12
  require 'nokogiri'
13
13
 
14
14
  require 'proiel/version'
15
+ require 'proiel/utils'
15
16
  require 'proiel/citations'
16
17
  require 'proiel/statistics'
17
18
  require 'proiel/tokenization'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proiel
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marius L. Jøhndal
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-15 00:00:00.000000000 Z
11
+ date: 2016-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -167,15 +167,11 @@ files:
167
167
  - lib/proiel/citations.rb
168
168
  - lib/proiel/div.rb
169
169
  - lib/proiel/positional_tag.rb
170
- - lib/proiel/proiel_xml/proiel-1.0/proiel-1.0.md
171
170
  - lib/proiel/proiel_xml/proiel-1.0/proiel-1.0.xsd
172
171
  - lib/proiel/proiel_xml/proiel-1.0/teilite.xsd
173
172
  - lib/proiel/proiel_xml/proiel-1.0/xml.xsd
174
- - lib/proiel/proiel_xml/proiel-2.0/XMLSchema.xsd
175
- - lib/proiel/proiel_xml/proiel-2.0/make
176
- - lib/proiel/proiel_xml/proiel-2.0/proiel-2.0.html
177
173
  - lib/proiel/proiel_xml/proiel-2.0/proiel-2.0.xsd
178
- - lib/proiel/proiel_xml/proiel-2.0/xs3p.xsl
174
+ - lib/proiel/proiel_xml/proiel-2.1/proiel-2.1.xsd
179
175
  - lib/proiel/proiel_xml/reader.rb
180
176
  - lib/proiel/proiel_xml/schema.rb
181
177
  - lib/proiel/proiel_xml/validator.rb
@@ -186,6 +182,7 @@ files:
186
182
  - lib/proiel/tokenization.rb
187
183
  - lib/proiel/treebank.rb
188
184
  - lib/proiel/treebank_object.rb
185
+ - lib/proiel/utils.rb
189
186
  - lib/proiel/version.rb
190
187
  homepage: http://proiel.github.com
191
188
  licenses:
@@ -1,16 +0,0 @@
1
- # The PROIEL XML format
2
-
3
- ## `token`
4
-
5
- ### `lemma` attribute (string, optional)
6
-
7
- When it is necessary to distinguish lemmas with the same textual form, the
8
- PROIEL XML convention is use the associated part of speech to distinguish them.
9
-
10
- If there are multiple lemmas with the same textual form and the same part of
11
- speech, the convention is to append `#` and a positive, non-zero integer:
12
-
13
- ```
14
- quod#1
15
- quod#2
16
- ```