proiel 1.1.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/LICENSE +1 -1
- data/README.md +2 -2
- data/lib/proiel.rb +16 -1
- data/lib/proiel/alignment.rb +3 -0
- data/lib/proiel/alignment/builder.rb +220 -0
- data/lib/proiel/annotation_schema.rb +11 -4
- data/lib/proiel/chronology.rb +80 -0
- data/lib/proiel/dictionary.rb +79 -0
- data/lib/proiel/dictionary/builder.rb +224 -0
- data/lib/proiel/div.rb +22 -3
- data/lib/proiel/language.rb +108 -0
- data/lib/proiel/lemma.rb +77 -0
- data/lib/proiel/proiel_xml/proiel-3.0/proiel-3.0.xsd +383 -0
- data/lib/proiel/proiel_xml/reader.rb +138 -2
- data/lib/proiel/proiel_xml/schema.rb +4 -2
- data/lib/proiel/proiel_xml/validator.rb +76 -9
- data/lib/proiel/sentence.rb +27 -4
- data/lib/proiel/source.rb +14 -4
- data/lib/proiel/statistics.rb +2 -2
- data/lib/proiel/token.rb +14 -6
- data/lib/proiel/tokenization.rb +5 -3
- data/lib/proiel/treebank.rb +23 -6
- data/lib/proiel/utils.rb +0 -1
- data/lib/proiel/valency.rb +5 -0
- data/lib/proiel/valency/arguments.rb +151 -0
- data/lib/proiel/valency/lexicon.rb +59 -0
- data/lib/proiel/valency/obliqueness.rb +31 -0
- data/lib/proiel/version.rb +2 -3
- data/lib/proiel/visualization.rb +1 -0
- data/lib/proiel/visualization/graphviz.rb +111 -0
- data/lib/proiel/visualization/graphviz/aligned-modern.dot.erb +83 -0
- data/lib/proiel/visualization/graphviz/classic.dot.erb +24 -0
- data/lib/proiel/visualization/graphviz/linearized.dot.erb +57 -0
- data/lib/proiel/visualization/graphviz/modern.dot.erb +39 -0
- data/lib/proiel/visualization/graphviz/packed.dot.erb +25 -0
- metadata +76 -31
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: b0df361b20a949a5a7c51f1055470507b8b152af79737a896762ac269ac62c20
|
4
|
+
data.tar.gz: 2346771429bd177c4233e470c8e1830871001b2a511e311ef4cb70520f19687f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c2195b08451ee0208aec4f80d7c6edfba1b0ecd2c0405d8a797a5e9fc8a8b135c6a9997d7a1b7b5a52519a3d8275e2be1d0b9389bd698b371372180254202a9
|
7
|
+
data.tar.gz: eb23ab51a1e7607dd4453e84820aa558636e5ebef7c7caf96824cb0f6dd17af7edaa34ad03efc663d1e66b60b3d63ea2d8053726c86b6435efe3a27f50ea53ba
|
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -12,7 +12,7 @@ PROIEL annotation scheme and the PROIEL XML-based interchange format.
|
|
12
12
|
|
13
13
|
## Installation
|
14
14
|
|
15
|
-
|
15
|
+
This library requires Ruby >= 2.4. Install as
|
16
16
|
|
17
17
|
```shell
|
18
18
|
gem install proiel
|
@@ -35,7 +35,7 @@ bundle
|
|
35
35
|
```
|
36
36
|
|
37
37
|
To download a sample treebank, initialize a new git repository and add the
|
38
|
-
[PROIEL treebank](
|
38
|
+
[PROIEL treebank](https://proiel.github.io) as a submodule:
|
39
39
|
|
40
40
|
```shell
|
41
41
|
git init
|
data/lib/proiel.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c) 2015-
|
2
|
+
# Copyright (c) 2015-2018 Marius L. Jøhndal
|
3
3
|
#
|
4
4
|
# See LICENSE in the top-level source directory for licensing terms.
|
5
5
|
#++
|
@@ -10,6 +10,12 @@ require 'ostruct'
|
|
10
10
|
require 'sax-machine'
|
11
11
|
require 'memoist'
|
12
12
|
require 'nokogiri'
|
13
|
+
require 'singleton'
|
14
|
+
require 'erb'
|
15
|
+
require 'open3'
|
16
|
+
require 'set'
|
17
|
+
require 'builder'
|
18
|
+
require 'csv'
|
13
19
|
|
14
20
|
require 'proiel/version'
|
15
21
|
require 'proiel/utils'
|
@@ -27,3 +33,12 @@ require 'proiel/source'
|
|
27
33
|
require 'proiel/div'
|
28
34
|
require 'proiel/sentence'
|
29
35
|
require 'proiel/token'
|
36
|
+
require 'proiel/dictionary'
|
37
|
+
require 'proiel/dictionary/builder'
|
38
|
+
require 'proiel/lemma'
|
39
|
+
require 'proiel/visualization'
|
40
|
+
require 'proiel/chronology'
|
41
|
+
require 'proiel/valency'
|
42
|
+
require 'proiel/dictionary/builder'
|
43
|
+
require 'proiel/alignment'
|
44
|
+
require 'proiel/language'
|
@@ -0,0 +1,220 @@
|
|
1
|
+
module PROIEL
|
2
|
+
module Alignment
|
3
|
+
module Builder
|
4
|
+
# This computes a matrix of original and translation sentences that are
|
5
|
+
# aligned. For now, this function does not handle translation sentences that
|
6
|
+
# are unaligned (this is tricky to handle robustly!). As the current treebank
|
7
|
+
# collection stands this is an issue that *should* not arise so this is for
|
8
|
+
# now a reasonable approximation.
|
9
|
+
def self.compute_matrix(alignment, source, blacklist = [], log_directory = nil)
|
10
|
+
matrix1 = group_backwards(alignment, source, blacklist)
|
11
|
+
raise unless matrix1.map { |r| r[:original] }.flatten.compact == alignment.sentences.map(&:id)
|
12
|
+
|
13
|
+
matrix2 = group_forwards(alignment, source, blacklist)
|
14
|
+
raise unless matrix2.map { |r| r[:translation] }.flatten.compact == source.sentences.map(&:id)
|
15
|
+
|
16
|
+
if log_directory
|
17
|
+
# Verify that both texts are still in the correct sequence
|
18
|
+
File.open(File.join(log_directory, "#{source.id}1"), 'w') do |f|
|
19
|
+
matrix1.map do |x|
|
20
|
+
f.puts x.inspect
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
File.open(File.join(log_directory, "#{source.id}2"), 'w') do |f|
|
25
|
+
matrix2.map do |x|
|
26
|
+
f.puts x.inspect
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
matrix = []
|
32
|
+
iter1 = { i: 0, m: matrix1 }
|
33
|
+
iter2 = { i: 0, m: matrix2 }
|
34
|
+
|
35
|
+
loop do
|
36
|
+
# Take from matrix1 unless we have a translation
|
37
|
+
while iter1[:i] < iter1[:m].length and iter1[:m][iter1[:i]][:translation].empty?
|
38
|
+
matrix << iter1[:m][iter1[:i]]
|
39
|
+
iter1[:i] += 1
|
40
|
+
end
|
41
|
+
|
42
|
+
# Take from matrix2 unless we have an original
|
43
|
+
while iter2[:i] < iter2[:m].length and iter2[:m][iter2[:i]][:original].empty?
|
44
|
+
matrix << iter2[:m][iter2[:i]]
|
45
|
+
iter2[:i] += 1
|
46
|
+
end
|
47
|
+
|
48
|
+
if iter1[:i] < iter1[:m].length and iter2[:i] < iter2[:m].length
|
49
|
+
# Now the two should match provided alignments are sorted the same way,
|
50
|
+
# so take one from each. If they don't match outright, we may have a case
|
51
|
+
# of swapped sentence orders or a gap (one sentence unaligned in one of
|
52
|
+
# the texts surrounded by two sentences that are aligned to the same
|
53
|
+
# sentence in the other text). We'll try to repair this by merging bits
|
54
|
+
# from the next row in various combinations.
|
55
|
+
#
|
56
|
+
# When adding to the new mateix, pick original from matrix1 and
|
57
|
+
# translation from matrix2 so that the original textual order is
|
58
|
+
# preserved
|
59
|
+
if repair(matrix, iter1, 0, iter2, 0) or
|
60
|
+
|
61
|
+
repair(matrix, iter1, 1, iter2, 0) or
|
62
|
+
repair(matrix, iter1, 0, iter2, 1) or
|
63
|
+
repair(matrix, iter1, 1, iter2, 1) or
|
64
|
+
|
65
|
+
repair(matrix, iter1, 2, iter2, 0) or
|
66
|
+
repair(matrix, iter1, 0, iter2, 2) or
|
67
|
+
repair(matrix, iter1, 2, iter2, 1) or
|
68
|
+
repair(matrix, iter1, 1, iter2, 2) or
|
69
|
+
repair(matrix, iter1, 2, iter2, 2) or
|
70
|
+
|
71
|
+
repair(matrix, iter1, 3, iter2, 0) or
|
72
|
+
repair(matrix, iter1, 0, iter2, 3) or
|
73
|
+
repair(matrix, iter1, 3, iter2, 1) or
|
74
|
+
repair(matrix, iter1, 1, iter2, 3) or
|
75
|
+
repair(matrix, iter1, 3, iter2, 2) or
|
76
|
+
repair(matrix, iter1, 2, iter2, 3) or
|
77
|
+
repair(matrix, iter1, 3, iter2, 3) or
|
78
|
+
|
79
|
+
repair(matrix, iter1, 4, iter2, 0) or
|
80
|
+
repair(matrix, iter1, 0, iter2, 4) or
|
81
|
+
repair(matrix, iter1, 4, iter2, 1) or
|
82
|
+
repair(matrix, iter1, 1, iter2, 4) or
|
83
|
+
repair(matrix, iter1, 4, iter2, 2) or
|
84
|
+
repair(matrix, iter1, 2, iter2, 4) or
|
85
|
+
repair(matrix, iter1, 4, iter2, 3) or
|
86
|
+
repair(matrix, iter1, 3, iter2, 4) or
|
87
|
+
repair(matrix, iter1, 4, iter2, 4)
|
88
|
+
else
|
89
|
+
STDERR.puts iter1[:i], iter1[:m][iter1[:i]].inspect
|
90
|
+
STDERR.puts iter2[:i], iter2[:m][iter2[:i]].inspect
|
91
|
+
raise
|
92
|
+
end
|
93
|
+
else
|
94
|
+
raise unless iter1[:i] == iter1[:m].length and iter2[:i] == iter2[:m].length
|
95
|
+
break
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
if log_directory
|
100
|
+
File.open(File.join(log_directory, "#{source.id}3"), 'w') do |f|
|
101
|
+
matrix.map do |x|
|
102
|
+
f.puts x.inspect
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
raise unless matrix.map { |r| r[:original] }.flatten.compact == alignment.sentences.map(&:id)
|
108
|
+
raise unless matrix.map { |r| r[:translation] }.flatten.compact == source.sentences.map(&:id)
|
109
|
+
|
110
|
+
matrix
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def self.group_forwards(alignment, source, blacklist = [])
|
116
|
+
# Make an original to translation ID mapping
|
117
|
+
mapping = {}
|
118
|
+
|
119
|
+
source.sentences.each do |sentence|
|
120
|
+
mapping[sentence.id] = []
|
121
|
+
|
122
|
+
next if blacklist.include?(sentence.id)
|
123
|
+
|
124
|
+
mapping[sentence.id] = sentence.inferred_alignment(alignment).map(&:id)
|
125
|
+
end
|
126
|
+
|
127
|
+
# Translate to a pairs of ID arrays, chunk original IDs that share at least
|
128
|
+
# one translation ID, then reduce the result so we get an array of m-to-n
|
129
|
+
# relations
|
130
|
+
mapping.map do |v, k|
|
131
|
+
{ original: k, translation: [v] }
|
132
|
+
end.chunk_while do |x, y|
|
133
|
+
!(x[:original] & y[:original]).empty?
|
134
|
+
end.map do |chunk|
|
135
|
+
chunk.inject do |a, v|
|
136
|
+
a[:original] += v[:original]
|
137
|
+
a[:translation] += v[:translation]
|
138
|
+
a
|
139
|
+
end
|
140
|
+
end.map do |row|
|
141
|
+
{ original: row[:original].uniq, translation: row[:translation] }
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def self.group_backwards(alignment, source, blacklist = [])
|
146
|
+
# Make an original to translation ID mapping
|
147
|
+
mapping = {}
|
148
|
+
|
149
|
+
alignment.sentences.each do |sentence|
|
150
|
+
mapping[sentence.id] = []
|
151
|
+
end
|
152
|
+
|
153
|
+
source.sentences.each do |sentence|
|
154
|
+
next if blacklist.include?(sentence.id)
|
155
|
+
|
156
|
+
original_ids = sentence.inferred_alignment(alignment).map(&:id)
|
157
|
+
|
158
|
+
original_ids.each do |original_id|
|
159
|
+
mapping[original_id] << sentence.id
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# Translate to a pairs of ID arrays, chunk original IDs that share at least
|
164
|
+
# one translation ID, then reduce the result so we get an array of m-to-n
|
165
|
+
# relations
|
166
|
+
mapping.map do |k, v|
|
167
|
+
{ original: [k], translation: v }
|
168
|
+
end.chunk_while do |x, y|
|
169
|
+
!(x[:translation] & y[:translation]).empty?
|
170
|
+
end.map do |chunk|
|
171
|
+
chunk.inject do |a, v|
|
172
|
+
a[:original] += v[:original]
|
173
|
+
a[:translation] += v[:translation]
|
174
|
+
a
|
175
|
+
end
|
176
|
+
end.map do |row|
|
177
|
+
{ original: row[:original], translation: row[:translation].uniq }
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def self.repair_merge_cells(iter, delta, field)
|
182
|
+
matrix, i = iter[:m], iter[:i]
|
183
|
+
(0..delta).map { |j| matrix[i + j][field] }.inject(&:+)
|
184
|
+
end
|
185
|
+
|
186
|
+
def self.select_unaligned(iter, delta, field, check_field)
|
187
|
+
matrix, i = iter[:m], iter[:i]
|
188
|
+
(0..delta).select { |j| matrix[i + j][check_field].empty? }.map { |j| matrix[i + j][field] }.flatten
|
189
|
+
end
|
190
|
+
|
191
|
+
def self.repair(matrix, iter1, delta1, iter2, delta2)
|
192
|
+
o1 = repair_merge_cells(iter1, delta1, :original)
|
193
|
+
o2 = repair_merge_cells(iter2, delta2, :original)
|
194
|
+
|
195
|
+
t1 = repair_merge_cells(iter1, delta1, :translation)
|
196
|
+
t2 = repair_merge_cells(iter2, delta2, :translation)
|
197
|
+
|
198
|
+
u1 = select_unaligned(iter1, delta1, :original, :translation)
|
199
|
+
u2 = select_unaligned(iter2, delta2, :translation, :original)
|
200
|
+
|
201
|
+
if o1.sort - u1 == o2.sort.uniq and t1.sort.uniq == t2.sort - u2
|
202
|
+
unless delta1.zero? and delta2.zero?
|
203
|
+
STDERR.puts "Assuming #{delta1 + 1}/#{delta2 + 1} swapped sentence order:"
|
204
|
+
STDERR.puts ' * ' + (0..delta1).map { |j| iter1[:m][iter1[:i] + j].inspect }.join(' + ')
|
205
|
+
STDERR.puts ' * ' + (0..delta2).map { |j| iter2[:m][iter2[:i] + j].inspect }.join(' + ')
|
206
|
+
end
|
207
|
+
|
208
|
+
matrix << { original: o1, translation: t2 }
|
209
|
+
|
210
|
+
iter1[:i] += delta1 + 1
|
211
|
+
iter2[:i] += delta2 + 1
|
212
|
+
|
213
|
+
true
|
214
|
+
else
|
215
|
+
false
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
@@ -22,10 +22,17 @@ module PROIEL
|
|
22
22
|
|
23
23
|
# Creates a new annotation schema object.
|
24
24
|
def initialize(xml_object)
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
if xml_object
|
26
|
+
@part_of_speech_tags = make_part_of_speech_tags(xml_object).freeze
|
27
|
+
@relation_tags = make_relation_tags(xml_object).freeze
|
28
|
+
@morphology_tags = make_morphology_tags(xml_object).freeze
|
29
|
+
@information_status_tags = make_information_status_tags(xml_object).freeze
|
30
|
+
else
|
31
|
+
@part_of_speech_tags = {}.freeze
|
32
|
+
@relation_tags = {}.freeze
|
33
|
+
@morphology_tags = {}.freeze
|
34
|
+
@information_status_tags = {}.freeze
|
35
|
+
end
|
29
36
|
end
|
30
37
|
|
31
38
|
# @return [Hash<String,RelationTagDefinition>] definition of primary relation tags
|
@@ -0,0 +1,80 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2016-2017 Marius L. Jøhndal
|
3
|
+
#
|
4
|
+
# See LICENSE in the top-level source directory for licensing terms.
|
5
|
+
#++
|
6
|
+
|
7
|
+
# Methods for parsing chronological descriptions. Extra care is taken to get
|
8
|
+
# the interpretation of centuries and ranges involving the transition between 1
|
9
|
+
# BC and AD 1 correct.
|
10
|
+
module PROIEL::Chronology
|
11
|
+
# Computes the chronological midpoint of a chronological description.
|
12
|
+
#
|
13
|
+
# @param s [String] chronological description
|
14
|
+
#
|
15
|
+
# @return [Integer]
|
16
|
+
#
|
17
|
+
# @example
|
18
|
+
# midpoint('1000') # => 1000
|
19
|
+
# midpoint('1000 BC') # => -1000
|
20
|
+
# midpoint('1000-1020') # => 1010
|
21
|
+
def self.midpoint(s)
|
22
|
+
i = parse(s)
|
23
|
+
|
24
|
+
if i.is_a?(Array)
|
25
|
+
# Handle missing Julian year 0 by shifting years after 1 BC down by 1 and then shifting the midpoint back
|
26
|
+
# up again unless negative
|
27
|
+
if i.first < 0 and i.last > 0
|
28
|
+
y = (i.first + i.last - 1) / 2.0
|
29
|
+
if y < 0
|
30
|
+
y.floor
|
31
|
+
else
|
32
|
+
(y + 1).floor
|
33
|
+
end
|
34
|
+
else
|
35
|
+
((i.first + i.last) / 2.0).floor # a non-integer midpoint is within the year of the integer part
|
36
|
+
end
|
37
|
+
elsif i.is_a?(Integer)
|
38
|
+
i
|
39
|
+
else
|
40
|
+
raise ArgumentError, 'integer or array expected'
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Parses a chronological description. The syntax of chronological
|
45
|
+
# descriptions is explained in the [PROIEL XML
|
46
|
+
# documentation](http://proiel.github.io/handbook/developer/proielxml.html#chronological-data).
|
47
|
+
#
|
48
|
+
# @param s [String] chronological description
|
49
|
+
#
|
50
|
+
# @return [Integer, Array<Integer,Integer>]
|
51
|
+
#
|
52
|
+
# @example
|
53
|
+
# parse('1000') # => 1000
|
54
|
+
# parse('1000 BC') # => -1000
|
55
|
+
# parse('1000-1020') # => [1000,1020]
|
56
|
+
# parse('1000 BC-1020') # => [-1000,1020]
|
57
|
+
def self.parse(s)
|
58
|
+
case s
|
59
|
+
when /^\s*(?:c\.\s+)?(\d+)(\s+BC)?\s*$/
|
60
|
+
i = $1.to_i
|
61
|
+
multiplier = $2 ? -1 : 1
|
62
|
+
(i * multiplier).to_i.tap do |i|
|
63
|
+
# There is no year zero in the Julian calendar
|
64
|
+
raise ArgumentError, 'invalid year' if i.zero?
|
65
|
+
end
|
66
|
+
when /^\s*(1st|2nd|3rd|\d+th)\s+c\.\s*$/
|
67
|
+
a = $1.to_i * 100
|
68
|
+
[a - 99, a]
|
69
|
+
when /^\s*(1st|2nd|3rd|\d+th)\s+c\.\s+BC\s*$/
|
70
|
+
a = -$1.to_i * 100
|
71
|
+
[a, a + 99]
|
72
|
+
when /^\s*(?:c\.\s+)?\d+(\s+BC)?\s*-\s*(c\.\s+)?\d+(\s+BC)?\s*$/
|
73
|
+
s.split('-').map { |i| parse(i) }.tap do |from, to|
|
74
|
+
raise ArgumentError, 'invalid range' unless from < to
|
75
|
+
end
|
76
|
+
else
|
77
|
+
raise ArgumentError, 'unexpected format'
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2018 Marius L. Jøhndal
|
3
|
+
#
|
4
|
+
# See LICENSE in the top-level source directory for licensing terms.
|
5
|
+
#++
|
6
|
+
module PROIEL
|
7
|
+
class Dictionary < TreebankObject
|
8
|
+
# @return [Treebank] treebank that this source belongs to
|
9
|
+
attr_reader :treebank
|
10
|
+
|
11
|
+
# @return [String] language of the source as an ISO 639-3 language tag
|
12
|
+
attr_reader :language
|
13
|
+
|
14
|
+
# @return [String] dialect of the source
|
15
|
+
attr_reader :dialect
|
16
|
+
|
17
|
+
# @return [DateTime] export time for the dictionary
|
18
|
+
attr_reader :export_time
|
19
|
+
|
20
|
+
# @return [Hash] all lemmata in the dictionary
|
21
|
+
attr_reader :lemmata
|
22
|
+
|
23
|
+
# @return [Integer] number of lemmata in the dictionary
|
24
|
+
attr_reader :n
|
25
|
+
|
26
|
+
# @return [Hash] all sources in the dictionary
|
27
|
+
attr_reader :sources
|
28
|
+
|
29
|
+
# Creates a new dictionary object.
|
30
|
+
def initialize(parent, export_time, language, dialect, xml = nil)
|
31
|
+
@treebank = parent
|
32
|
+
|
33
|
+
raise ArgumentError, 'string or nil expected' unless export_time.nil? or export_time.is_a?(String)
|
34
|
+
@export_time = export_time.nil? ? nil : DateTime.parse(export_time).freeze
|
35
|
+
|
36
|
+
@language = language.freeze
|
37
|
+
@dialect = dialect ? dialect.freeze : nil
|
38
|
+
|
39
|
+
@lemmata = {}
|
40
|
+
@sources = {}
|
41
|
+
@n = 0
|
42
|
+
|
43
|
+
from_xml(xml) if xml
|
44
|
+
end
|
45
|
+
|
46
|
+
# FIXME
|
47
|
+
def id
|
48
|
+
@language
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def from_xml(xml)
|
54
|
+
xml.sources.each do |s|
|
55
|
+
@sources[s.idref] = { license: nullify(s.license), n: nullify(s.n, :int) }
|
56
|
+
end
|
57
|
+
|
58
|
+
xml.lemmata.each do |l|
|
59
|
+
@lemmata[l.lemma] ||= {}
|
60
|
+
@lemmata[l.lemma][l.part_of_speech] = Lemma.new(self, l)
|
61
|
+
@n += 1
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def nullify(s, type = nil)
|
66
|
+
case s
|
67
|
+
when NilClass, /^\s*$/
|
68
|
+
nil
|
69
|
+
else
|
70
|
+
case type
|
71
|
+
when :int
|
72
|
+
s.to_i
|
73
|
+
else
|
74
|
+
s.to_s
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|