llt-review 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +2 -0
- data/.travis.yml +8 -0
- data/Gemfile +14 -0
- data/LICENSE.txt +22 -0
- data/README.md +35 -0
- data/Rakefile +6 -0
- data/config.ru +2 -0
- data/lib/llt/review/alignment/comparison.rb +6 -0
- data/lib/llt/review/alignment/data.rb +7 -0
- data/lib/llt/review/alignment/difference/nrefs.rb +29 -0
- data/lib/llt/review/alignment/difference/sentence.rb +6 -0
- data/lib/llt/review/alignment/difference/translation.rb +27 -0
- data/lib/llt/review/alignment/difference/word.rb +27 -0
- data/lib/llt/review/alignment/difference.rb +11 -0
- data/lib/llt/review/alignment/gold.rb +9 -0
- data/lib/llt/review/alignment/parser/helper.rb +43 -0
- data/lib/llt/review/alignment/parser/nokogiri_handler.rb +44 -0
- data/lib/llt/review/alignment/parser/ox_handler.rb +68 -0
- data/lib/llt/review/alignment/parser.rb +13 -0
- data/lib/llt/review/alignment/report/generic.rb +10 -0
- data/lib/llt/review/alignment/report/translation.rb +7 -0
- data/lib/llt/review/alignment/report/word.rb +7 -0
- data/lib/llt/review/alignment/report.rb +15 -0
- data/lib/llt/review/alignment/reviewable.rb +6 -0
- data/lib/llt/review/alignment/sentence.rb +18 -0
- data/lib/llt/review/alignment/translation.rb +13 -0
- data/lib/llt/review/alignment/word.rb +38 -0
- data/lib/llt/review/alignment.rb +23 -0
- data/lib/llt/review/api.rb +37 -0
- data/lib/llt/review/common/comparison.rb +91 -0
- data/lib/llt/review/common/difference/sentence.rb +31 -0
- data/lib/llt/review/common/difference/word.rb +22 -0
- data/lib/llt/review/common/difference.rb +9 -0
- data/lib/llt/review/common/golden.rb +14 -0
- data/lib/llt/review/common/report/generic.rb +32 -0
- data/lib/llt/review/common/report.rb +66 -0
- data/lib/llt/review/common/reviewable.rb +34 -0
- data/lib/llt/review/common/sentence.rb +41 -0
- data/lib/llt/review/common.rb +12 -0
- data/lib/llt/review/helpers/diff_reporter.rb +45 -0
- data/lib/llt/review/helpers/parsing/helper/for_nokogiri.rb +9 -0
- data/lib/llt/review/helpers/parsing/helper.rb +30 -0
- data/lib/llt/review/helpers/parsing/result.rb +7 -0
- data/lib/llt/review/helpers/parsing.rb +29 -0
- data/lib/llt/review/helpers/reportable.rb +81 -0
- data/lib/llt/review/helpers.rb +10 -0
- data/lib/llt/review/treebank/comparison.rb +6 -0
- data/lib/llt/review/treebank/difference/attribute.rb +11 -0
- data/lib/llt/review/treebank/difference/datapoint.rb +28 -0
- data/lib/llt/review/treebank/difference/generic.rb +32 -0
- data/lib/llt/review/treebank/difference/head.rb +14 -0
- data/lib/llt/review/treebank/difference/lemma.rb +9 -0
- data/lib/llt/review/treebank/difference/postag.rb +37 -0
- data/lib/llt/review/treebank/difference/relation.rb +11 -0
- data/lib/llt/review/treebank/difference/sentence.rb +6 -0
- data/lib/llt/review/treebank/difference/word.rb +8 -0
- data/lib/llt/review/treebank/difference.rb +15 -0
- data/lib/llt/review/treebank/gold.rb +8 -0
- data/lib/llt/review/treebank/parser/helper.rb +9 -0
- data/lib/llt/review/treebank/parser/nokogiri_handler.rb +30 -0
- data/lib/llt/review/treebank/parser/ox_handler.rb +42 -0
- data/lib/llt/review/treebank/parser.rb +13 -0
- data/lib/llt/review/treebank/postag.rb +65 -0
- data/lib/llt/review/treebank/report/datapoints.rb +12 -0
- data/lib/llt/review/treebank/report/generic.rb +10 -0
- data/lib/llt/review/treebank/report/lemma.rb +8 -0
- data/lib/llt/review/treebank/report/postag/datapoint.rb +16 -0
- data/lib/llt/review/treebank/report/postag.rb +7 -0
- data/lib/llt/review/treebank/report/postags.rb +27 -0
- data/lib/llt/review/treebank/report/relation.rb +7 -0
- data/lib/llt/review/treebank/report.rb +19 -0
- data/lib/llt/review/treebank/reviewable.rb +6 -0
- data/lib/llt/review/treebank/sentence.rb +25 -0
- data/lib/llt/review/treebank/word.rb +53 -0
- data/lib/llt/review/treebank.rb +22 -0
- data/lib/llt/review/version.rb +5 -0
- data/lib/llt/review.rb +118 -0
- data/llt-review.gemspec +25 -0
- data/spec/lib/llt/review/alignment/parser_spec.rb +72 -0
- data/spec/lib/llt/review/alignment_spec.rb +242 -0
- data/spec/lib/llt/review/helpers/reportable_spec.rb +26 -0
- data/spec/lib/llt/review/treebank/parser_spec.rb +21 -0
- data/spec/lib/llt/review/treebank/postag_spec.rb +22 -0
- data/spec/lib/llt/review/treebank_spec.rb +222 -0
- data/spec/lib/llt/review_spec.rb +7 -0
- data/spec/spec_helper.rb +22 -0
- metadata +195 -0
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Review::Alignment::Parser do
|
4
|
+
let(:parser) { LLT::Review::Alignment::Parser.new }
|
5
|
+
|
6
|
+
# Orgetorix rex fuit. => Orgetorix was a king. - Actually, he wasn't.
|
7
|
+
let(:data) { <<-EOF }
|
8
|
+
<aligned-text xmlns="http://alpheios.net/namespaces/aligned-text">
|
9
|
+
<language lnum="L1" xml:lang="lat"/>
|
10
|
+
<language lnum="L2" xml:lang="eng"/>
|
11
|
+
<sentence id="1">
|
12
|
+
<wds lnum="L1">
|
13
|
+
<w n="1-1">
|
14
|
+
<text>Orgetorix</text>
|
15
|
+
<refs nrefs="1-1"/>
|
16
|
+
</w>
|
17
|
+
<w n="1-2">
|
18
|
+
<text>rex</text>
|
19
|
+
<refs nrefs="1-3 1-4"/>
|
20
|
+
</w>
|
21
|
+
<w n="1-3">
|
22
|
+
<text>fuit</text>
|
23
|
+
<refs nrefs="1-2"/>
|
24
|
+
</w>
|
25
|
+
<w n="1-4">
|
26
|
+
<text>.</text>
|
27
|
+
<refs nrefs="1-5"/>
|
28
|
+
</w>
|
29
|
+
</wds>
|
30
|
+
<wds lnum="L2">
|
31
|
+
<w n="1-1">
|
32
|
+
<text>Orgetorix</text>
|
33
|
+
<refs nrefs="1-1"/>
|
34
|
+
</w>
|
35
|
+
<w n="1-2">
|
36
|
+
<text>was</text>
|
37
|
+
<refs nrefs="1-3"/>
|
38
|
+
</w>
|
39
|
+
<w n="1-3">
|
40
|
+
<text>a</text>
|
41
|
+
<refs nrefs="1-2"/>
|
42
|
+
</w>
|
43
|
+
<w n="1-4">
|
44
|
+
<text>king</text>
|
45
|
+
<refs nrefs="1-2"/>
|
46
|
+
</w>
|
47
|
+
<w n="1-5">
|
48
|
+
<text>.</text>
|
49
|
+
<refs nrefs="1-4"/>
|
50
|
+
</w>
|
51
|
+
</wds>
|
52
|
+
</sentence>
|
53
|
+
</aligned-text>
|
54
|
+
EOF
|
55
|
+
|
56
|
+
describe "#parse" do
|
57
|
+
it "returns parsed sentences" do
|
58
|
+
result = parser.parse(data)
|
59
|
+
s1= result[1]
|
60
|
+
s1.should be_true
|
61
|
+
s1.lang1.should == 'lat'
|
62
|
+
s1.lang2.should == 'eng'
|
63
|
+
s1.should have(4).items
|
64
|
+
rex = s1[2]
|
65
|
+
rex.should have(2).items
|
66
|
+
rex.translation.should == 'a king'
|
67
|
+
rex[3].to_s.should == 'a'
|
68
|
+
rex[4].to_s.should == 'king'
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
@@ -0,0 +1,242 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Review::Alignment do
|
4
|
+
let(:alignment) { LLT::Review::Alignment.new }
|
5
|
+
|
6
|
+
let(:g1) { <<-EOF }
|
7
|
+
<aligned-text xmlns="http://alpheios.net/namespaces/aligned-text">
|
8
|
+
<language lnum="L1" xml:lang="lat"/>
|
9
|
+
<language lnum="L2" xml:lang="eng"/>
|
10
|
+
<sentence id="1">
|
11
|
+
<wds lnum="L1">
|
12
|
+
<w n="1-1">
|
13
|
+
<text>Orgetorix</text>
|
14
|
+
<refs nrefs="1-1"/>
|
15
|
+
</w>
|
16
|
+
<w n="1-2">
|
17
|
+
<text>rex</text>
|
18
|
+
<refs nrefs="1-3 1-4"/>
|
19
|
+
</w>
|
20
|
+
<w n="1-3">
|
21
|
+
<text>fuit</text>
|
22
|
+
<refs nrefs="1-2"/>
|
23
|
+
</w>
|
24
|
+
<w n="1-4">
|
25
|
+
<text>.</text>
|
26
|
+
<refs nrefs="1-5"/>
|
27
|
+
</w>
|
28
|
+
</wds>
|
29
|
+
<wds lnum="L2">
|
30
|
+
<w n="1-1">
|
31
|
+
<text>Orgetorix</text>
|
32
|
+
<refs nrefs="1-1"/>
|
33
|
+
</w>
|
34
|
+
<w n="1-2">
|
35
|
+
<text>was</text>
|
36
|
+
<refs nrefs="1-3"/>
|
37
|
+
</w>
|
38
|
+
<w n="1-3">
|
39
|
+
<text>a</text>
|
40
|
+
<refs nrefs="1-2"/>
|
41
|
+
</w>
|
42
|
+
<w n="1-4">
|
43
|
+
<text>king</text>
|
44
|
+
<refs nrefs="1-2"/>
|
45
|
+
</w>
|
46
|
+
<w n="1-5">
|
47
|
+
<text>.</text>
|
48
|
+
<refs nrefs="1-4"/>
|
49
|
+
</w>
|
50
|
+
</wds>
|
51
|
+
</sentence>
|
52
|
+
</aligned-text>
|
53
|
+
EOF
|
54
|
+
|
55
|
+
let(:r1) { <<-EOF }
|
56
|
+
<aligned-text xmlns="http://alpheios.net/namespaces/aligned-text">
|
57
|
+
<language lnum="L1" xml:lang="lat"/>
|
58
|
+
<language lnum="L2" xml:lang="eng"/>
|
59
|
+
<sentence id="1">
|
60
|
+
<wds lnum="L1">
|
61
|
+
<w n="1-1">
|
62
|
+
<text>Orgetorix</text>
|
63
|
+
<refs nrefs="1-1"/>
|
64
|
+
</w>
|
65
|
+
<w n="1-2">
|
66
|
+
<text>rex</text>
|
67
|
+
<refs nrefs="1-3 1-4"/>
|
68
|
+
</w>
|
69
|
+
<w n="1-3">
|
70
|
+
<text>fuit</text>
|
71
|
+
<refs nrefs="1-2"/>
|
72
|
+
</w>
|
73
|
+
<w n="1-4">
|
74
|
+
<text>.</text>
|
75
|
+
<refs nrefs="1-5"/>
|
76
|
+
</w>
|
77
|
+
</wds>
|
78
|
+
<wds lnum="L2">
|
79
|
+
<w n="1-1">
|
80
|
+
<text>Orgetorix</text>
|
81
|
+
<refs nrefs="1-1"/>
|
82
|
+
</w>
|
83
|
+
<w n="1-2">
|
84
|
+
<text>was</text>
|
85
|
+
<refs nrefs="1-2"/>
|
86
|
+
</w>
|
87
|
+
<w n="1-3">
|
88
|
+
<text>a</text>
|
89
|
+
<refs nrefs="1-3"/>
|
90
|
+
</w>
|
91
|
+
<w n="1-4">
|
92
|
+
<text>king</text>
|
93
|
+
<refs nrefs="1-3"/>
|
94
|
+
</w>
|
95
|
+
<w n="1-5">
|
96
|
+
<text>.</text>
|
97
|
+
<refs nrefs="1-4"/>
|
98
|
+
</w>
|
99
|
+
</wds>
|
100
|
+
</sentence>
|
101
|
+
</aligned-text>
|
102
|
+
EOF
|
103
|
+
|
104
|
+
describe "#diff" do
|
105
|
+
describe "creates a diff report of a gold and review annotation" do
|
106
|
+
it "contains all differences in detail" do
|
107
|
+
allow(alignment).to receive(:get_from_uri).with(:uri_for_g1) { g1 }
|
108
|
+
allow(alignment).to receive(:get_from_uri).with(:uri_for_r1) { r1 }
|
109
|
+
|
110
|
+
result = alignment.diff([:uri_for_g1], [:uri_for_r1])
|
111
|
+
result.should have(1).item # we had one reviewable annotation
|
112
|
+
result[0].should have(2).item # one sentence with differences & the report
|
113
|
+
result[0][1].should have(2).items # and 2 words with differences
|
114
|
+
wrong_words = result[0][1]
|
115
|
+
w2, w3 = wrong_words.take(2, 3)
|
116
|
+
|
117
|
+
w2.original.should == 'a king'
|
118
|
+
w2.new.should == 'was'
|
119
|
+
|
120
|
+
w3.original.should == 'was'
|
121
|
+
w3.new.should == 'a king'
|
122
|
+
end
|
123
|
+
|
124
|
+
it "contains a full report section" do
|
125
|
+
allow(alignment).to receive(:get_from_uri).with(:uri_for_g1) { g1 }
|
126
|
+
allow(alignment).to receive(:get_from_uri).with(:uri_for_r1) { r1 }
|
127
|
+
|
128
|
+
result = alignment.diff([:uri_for_g1], [:uri_for_r1])
|
129
|
+
report = result.first.report
|
130
|
+
report.should_not be_empty
|
131
|
+
|
132
|
+
sentences = report[:sentences]
|
133
|
+
sentences.total.should == 1
|
134
|
+
sentences.right.should == 0
|
135
|
+
sentences.wrong.should == 1
|
136
|
+
sentences.unique.should == 1
|
137
|
+
|
138
|
+
words = report[:words]
|
139
|
+
words.total.should == 4
|
140
|
+
words.right.should == 2
|
141
|
+
words.wrong.should == 2
|
142
|
+
words.unique.should == 2
|
143
|
+
|
144
|
+
rex = words['rex']
|
145
|
+
rex.total.should == 1
|
146
|
+
rex.right.should == 0
|
147
|
+
rex.wrong.should == 1
|
148
|
+
rex.unique.should == 1
|
149
|
+
|
150
|
+
fuit = words['fuit']
|
151
|
+
fuit.total.should == 1
|
152
|
+
fuit.right.should == 0
|
153
|
+
fuit.wrong.should == 1
|
154
|
+
fuit.unique.should == 1
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
let(:rep) { <<-EOF }
|
160
|
+
<aligned-text xmlns="http://alpheios.net/namespaces/aligned-text">
|
161
|
+
<language lnum="L1" xml:lang="lat"/>
|
162
|
+
<language lnum="L2" xml:lang="eng"/>
|
163
|
+
<sentence id="1">
|
164
|
+
<wds lnum="L1">
|
165
|
+
<w n="1-1">
|
166
|
+
<text>Orgetorix</text>
|
167
|
+
<refs nrefs="1-1"/>
|
168
|
+
</w>
|
169
|
+
<w n="1-2">
|
170
|
+
<text>rex</text>
|
171
|
+
<refs nrefs="1-3 1-4"/>
|
172
|
+
</w>
|
173
|
+
<w n="1-3">
|
174
|
+
<text>fuit</text>
|
175
|
+
<refs nrefs="1-2"/>
|
176
|
+
</w>
|
177
|
+
<w n="1-4">
|
178
|
+
<text>,</text>
|
179
|
+
<refs nrefs="1-5"/>
|
180
|
+
</w>
|
181
|
+
<w n="1-5">
|
182
|
+
<text>rex</text>
|
183
|
+
<refs nrefs="1-6 1-7"/>
|
184
|
+
</w>
|
185
|
+
<w n="1-6">
|
186
|
+
<text>.</text>
|
187
|
+
<refs nrefs="1-8"/>
|
188
|
+
</w>
|
189
|
+
</wds>
|
190
|
+
<wds lnum="L2">
|
191
|
+
<w n="1-1">
|
192
|
+
<text>Orgetorix</text>
|
193
|
+
<refs nrefs="1-1"/>
|
194
|
+
</w>
|
195
|
+
<w n="1-2">
|
196
|
+
<text>was</text>
|
197
|
+
<refs nrefs="1-3"/>
|
198
|
+
</w>
|
199
|
+
<w n="1-3">
|
200
|
+
<text>a</text>
|
201
|
+
<refs nrefs="1-2"/>
|
202
|
+
</w>
|
203
|
+
<w n="1-4">
|
204
|
+
<text>king</text>
|
205
|
+
<refs nrefs="1-2"/>
|
206
|
+
</w>
|
207
|
+
<w n="1-5">
|
208
|
+
<text>,</text>
|
209
|
+
<refs nrefs="1-4"/>
|
210
|
+
</w>
|
211
|
+
<w n="1-6">
|
212
|
+
<text>a</text>
|
213
|
+
<refs nrefs="1-5"/>
|
214
|
+
</w>
|
215
|
+
<w n="1-7">
|
216
|
+
<text>king</text>
|
217
|
+
<refs nrefs="1-5"/>
|
218
|
+
</w>
|
219
|
+
<w n="1-8">
|
220
|
+
<text>.</text>
|
221
|
+
<refs nrefs="1-6"/>
|
222
|
+
</w>
|
223
|
+
</wds>
|
224
|
+
</sentence>
|
225
|
+
</aligned-text>
|
226
|
+
EOF
|
227
|
+
|
228
|
+
describe "#report" do
|
229
|
+
it "reports about how every word was translated" do
|
230
|
+
allow(alignment).to receive(:get_from_uri).with(:uri_for_rep) { rep }
|
231
|
+
result = alignment.report(:uri_for_rep)
|
232
|
+
result.should have(1).item
|
233
|
+
report = result.first
|
234
|
+
report[:sentences].total.should == 1
|
235
|
+
words = report[:words]
|
236
|
+
words.total.should == 6
|
237
|
+
rex = words['rex']
|
238
|
+
rex.total.should == 2
|
239
|
+
rex['a king'].total.should == 2
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Review::Helpers::Reportable do
|
4
|
+
let(:dummy) { Class.new { include LLT::Review::Helpers::Reportable } }
|
5
|
+
|
6
|
+
describe "#clone" do
|
7
|
+
it "performs a deep copy including all container contents" do
|
8
|
+
d = dummy.new('id')
|
9
|
+
5.times { |i| d.add(dummy.new("id#{i}", 1)) }
|
10
|
+
d['id1'].add(dummy.new('id5'))
|
11
|
+
|
12
|
+
cloned = d.clone
|
13
|
+
cloned.increment
|
14
|
+
cloned_id5 = cloned['id1']['id5']
|
15
|
+
d_id5 = d['id1']['id5']
|
16
|
+
cloned_id5.increment
|
17
|
+
cloned_id5.add(dummy.new('id6'))
|
18
|
+
|
19
|
+
cloned.should_not == d
|
20
|
+
cloned.total.should == 2
|
21
|
+
d.total.should == 1
|
22
|
+
d_id5.total.should_not == cloned_id5.total
|
23
|
+
d_id5['id6'].should_not == cloned_id5['id6']
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Review::Treebank::Parser do
|
4
|
+
let(:parser) { LLT::Review::Treebank::Parser.new() }
|
5
|
+
let(:data) { <<-EOF }
|
6
|
+
<sentence id="21" document_id="Perseus:text:1999.02.0002" subdoc="Book=2:chapter=5" span="In3:erat0">
|
7
|
+
<word id="1" form="In" lemma="in1" postag="r--------" head="5" relation="AuxP"/>
|
8
|
+
<word id="2" form="eo" lemma="is1" postag="p-s---nb-" head="3" relation="ATR"/>
|
9
|
+
<word id="3" form="flumine" lemma="flumen1" postag="n-s---nb-" head="1" relation="ADV"/>
|
10
|
+
<word id="4" form="pons" lemma="pons1" postag="n-s---mn-" head="5" relation="SBJ"/>
|
11
|
+
<word id="5" form="erat" lemma="sum1" postag="v3siia---" head="0" relation="PRED"/>
|
12
|
+
</sentence>
|
13
|
+
EOF
|
14
|
+
|
15
|
+
describe "#parse" do
|
16
|
+
it "returns parsed sentences" do
|
17
|
+
result = parser.parse(data)
|
18
|
+
result[21].should be_true
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Review::Treebank::Postag do
|
4
|
+
let(:postag) { LLT::Review::Treebank::Postag.new('v3siia---') }
|
5
|
+
|
6
|
+
describe "#analysis" do
|
7
|
+
it "decodes the postag into a hash" do
|
8
|
+
res = {
|
9
|
+
part_of_speech: 'v',
|
10
|
+
person: '3',
|
11
|
+
number: 's',
|
12
|
+
tense: 'i',
|
13
|
+
mood: 'i',
|
14
|
+
voice: 'a',
|
15
|
+
gender: '-',
|
16
|
+
case: '-',
|
17
|
+
degree: '-',
|
18
|
+
}
|
19
|
+
postag.analysis.should == res
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,222 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Review::Treebank do
|
4
|
+
let(:differ) { LLT::Review::Treebank.new }
|
5
|
+
|
6
|
+
let(:g1) do
|
7
|
+
<<-EOF
|
8
|
+
<treebank>
|
9
|
+
<sentence id="21" document_id="Perseus:text:1999.02.0002" subdoc="Book=2:chapter=5" span="In3:erat0">
|
10
|
+
<word id="1" form="In" lemma="in1" postag="r--------" head="5" relation="AuxP"/>
|
11
|
+
<word id="2" form="eo" lemma="is1" postag="p-s---nb-" head="3" relation="ATR"/>
|
12
|
+
<word id="3" form="flumine" lemma="flumen1" postag="n-s---nb-" head="1" relation="ADV"/>
|
13
|
+
<word id="4" form="pons" lemma="pons1" postag="n-s---mn-" head="5" relation="SBJ"/>
|
14
|
+
<word id="5" form="erat" lemma="sum1" postag="v3siia---" head="0" relation="PRED"/>
|
15
|
+
</sentence>
|
16
|
+
</treebank>
|
17
|
+
EOF
|
18
|
+
end
|
19
|
+
|
20
|
+
let(:r1) do
|
21
|
+
<<-EOF
|
22
|
+
<treebank>
|
23
|
+
<sentence id="21" document_id="Perseus:text:1999.02.0002" subdoc="Book=2:chapter=5" span="In3:erat0">
|
24
|
+
<word id="1" form="In" lemma="in1" postag="r--------" head="4" relation="AuxP"/>
|
25
|
+
<word id="2" form="eo" lemma="is1" postag="p-s---nd-" head="3" relation="ATR"/>
|
26
|
+
<word id="3" form="flumine" lemma="flumen2" postag="n-s---nd-" head="1" relation="ADV"/>
|
27
|
+
<word id="4" form="pons" lemma="pons1" postag="n-s---mn-" head="5" relation="OBJ"/>
|
28
|
+
<word id="5" form="erat" lemma="sum1" postag="v3siia---" head="0" relation="PRED"/>
|
29
|
+
</sentence>
|
30
|
+
</treebank>
|
31
|
+
EOF
|
32
|
+
end
|
33
|
+
|
34
|
+
let(:g2) do
|
35
|
+
<<-EOF
|
36
|
+
<treebank>
|
37
|
+
<sentence id="21" document_id="Perseus:text:1999.02.0002" subdoc="Book=2:chapter=5" span="In3:erat0">
|
38
|
+
<word id="1" form="In" lemma="in1" postag="r--------" head="5" relation="AuxP"/>
|
39
|
+
<word id="2" form="eo" lemma="is1" postag="p-s---nb-" head="3" relation="ATR"/>
|
40
|
+
<word id="3" form="flumine" lemma="flumen1" postag="n-s---nb-" head="1" relation="ADV"/>
|
41
|
+
<word id="4" form="pons" lemma="pons1" postag="n-s---mn-" head="5" relation="SBJ"/>
|
42
|
+
<word id="5" form="erat" lemma="sum1" postag="v3siia---" head="0" relation="PRED"/>
|
43
|
+
</sentence>
|
44
|
+
<sentence id="22" document_id="Perseus:text:1999.02.0002" subdoc="Book=2:chapter=5" span="In3:erat0">
|
45
|
+
<word id="1" form="In" lemma="in1" postag="r--------" head="5" relation="AuxP"/>
|
46
|
+
<word id="2" form="eo" lemma="is1" postag="p-s---nb-" head="3" relation="ATR"/>
|
47
|
+
<word id="3" form="flumine" lemma="flumen1" postag="n-s---nb-" head="1" relation="ADV"/>
|
48
|
+
<word id="4" form="pons" lemma="pons1" postag="n-s---mn-" head="5" relation="SBJ"/>
|
49
|
+
<word id="5" form="erat" lemma="sum1" postag="v3siia---" head="0" relation="PRED"/>
|
50
|
+
</sentence>
|
51
|
+
</treebank>
|
52
|
+
EOF
|
53
|
+
end
|
54
|
+
|
55
|
+
describe "#diff" do
|
56
|
+
describe "creates a diff report of a gold and review annotation" do
|
57
|
+
it "contains all differences in detail" do
|
58
|
+
allow(differ).to receive(:get_from_uri).with(:uri_for_g1) { g1 }
|
59
|
+
allow(differ).to receive(:get_from_uri).with(:uri_for_r1) { r1 }
|
60
|
+
|
61
|
+
result = differ.diff([:uri_for_g1], [:uri_for_r1])
|
62
|
+
result.should have(1).item # we had one reviewable annotation
|
63
|
+
result[0].should have(2).items # one sentence with differences, report also sitting here
|
64
|
+
result[0][21].should have(4).items # and 4 words with differences
|
65
|
+
diff = result[0][21]
|
66
|
+
w1, w2, w3, w4 = diff.take(1, 2, 3, 4).map(&:diff)
|
67
|
+
|
68
|
+
w1[:head].original.should == '5'
|
69
|
+
w1[:head].new.should == '4'
|
70
|
+
|
71
|
+
w2[:postag].original.should == 'p-s---nb-'
|
72
|
+
w2[:postag].new.should == 'p-s---nd-'
|
73
|
+
w2[:postag].unique.should == 1
|
74
|
+
w2[:postag][:case].original.should == 'b'
|
75
|
+
w2[:postag][:case].new.should == 'd'
|
76
|
+
w2[:postag][:case].unique.should == 1 # first occurence of this difference
|
77
|
+
|
78
|
+
w3[:lemma].original.should == 'flumen1'
|
79
|
+
w3[:lemma].new.should == 'flumen2'
|
80
|
+
w3[:postag].original.should == 'n-s---nb-'
|
81
|
+
w3[:postag].new.should == 'n-s---nd-'
|
82
|
+
w3[:postag][:case].original.should == 'b'
|
83
|
+
w3[:postag][:case].new.should == 'd'
|
84
|
+
w3[:postag][:case].unique.should == 0 # second occurence of this difference
|
85
|
+
|
86
|
+
w4[:relation].original.should == 'SBJ'
|
87
|
+
w4[:relation].new.should == 'OBJ'
|
88
|
+
w4[:relation].unique.should == 1
|
89
|
+
end
|
90
|
+
|
91
|
+
it "contains a full report section" do
|
92
|
+
allow(differ).to receive(:get_from_uri).with(:uri_for_g1) { g1 }
|
93
|
+
allow(differ).to receive(:get_from_uri).with(:uri_for_r1) { r1 }
|
94
|
+
|
95
|
+
result = differ.diff([:uri_for_g1], [:uri_for_r1])
|
96
|
+
report = result.first.report
|
97
|
+
report.should_not be_empty
|
98
|
+
|
99
|
+
sentences = report[:sentences]
|
100
|
+
sentences.total.should == 1
|
101
|
+
sentences.right.should == 0
|
102
|
+
sentences.wrong.should == 1
|
103
|
+
sentences.unique.should == 1
|
104
|
+
|
105
|
+
# TODO
|
106
|
+
# Add a couple of more assertions just to be safe
|
107
|
+
|
108
|
+
postags = report[:postags]
|
109
|
+
datapoints = postags[:datapoints]
|
110
|
+
cases = datapoints[:cases]
|
111
|
+
ablative = cases['b']
|
112
|
+
ablative.total.should == 2
|
113
|
+
ablative.right.should == 0
|
114
|
+
ablative.wrong.should == 2
|
115
|
+
ablative.unique.should == 1
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
it "takes multiple gold and review files" do
|
120
|
+
allow(differ).to receive(:get_from_uri).with(:uri_for_g1) { g1 }
|
121
|
+
allow(differ).to receive(:get_from_uri).with(:uri_for_g2) { g1 }
|
122
|
+
allow(differ).to receive(:get_from_uri).with(:uri_for_r1) { r1 }
|
123
|
+
allow(differ).to receive(:get_from_uri).with(:uri_for_r2) { r1 }
|
124
|
+
|
125
|
+
result = differ.diff(%i{ uri_for_g1 uri_for_g2 }, %i{ uri_for_r1 uri_for_r2 })
|
126
|
+
result.should have(4).items # we have two times two reviewable annotations now
|
127
|
+
end
|
128
|
+
|
129
|
+
it "takes an optional array of elements to compare with each other" do
|
130
|
+
allow(differ).to receive(:get_from_uri).with(:uri_for_g1) { g1 }
|
131
|
+
allow(differ).to receive(:get_from_uri).with(:uri_for_r1) { r1 }
|
132
|
+
|
133
|
+
result = differ.diff([:uri_for_g1], [:uri_for_r1], [:head, :relation])
|
134
|
+
result.should have(1).item # we had one reviewable annotation
|
135
|
+
result[0].should have(2).items # one sentence with differences, report also sitting here
|
136
|
+
result[0][21].should have(2).items # and 2 words with differences
|
137
|
+
diff = result[0][21]
|
138
|
+
|
139
|
+
diff[1].should be_true
|
140
|
+
diff[2].should be_nil # has only postag differences
|
141
|
+
diff[3].should be_nil # has only lemma and postag differences
|
142
|
+
diff[4].should be_true
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
describe "#report" do
|
147
|
+
it "analyses occurences of lemmata, head, relation, postags... of passed uris" do
|
148
|
+
allow(differ).to receive(:get_from_uri).with(:uri_for_g1) { g2 }
|
149
|
+
allow(differ).to receive(:get_from_uri).with(:uri_for_g2) { g2 }
|
150
|
+
result = differ.report(:uri_for_g1)
|
151
|
+
result.should have(1).item
|
152
|
+
report = result.first
|
153
|
+
report[:sentences].total.should == 2
|
154
|
+
report[:words].total.should == 10
|
155
|
+
report[:heads].total.should == 10
|
156
|
+
|
157
|
+
relations = report[:relations]
|
158
|
+
relations.total.should == 10
|
159
|
+
relations['ADV'].total.should == 2
|
160
|
+
relations['ATR'].total.should == 2
|
161
|
+
relations['AuxP'].total.should == 2
|
162
|
+
relations['PRED'].total.should == 2
|
163
|
+
relations['SBJ'].total.should == 2
|
164
|
+
|
165
|
+
lemmata = report[:lemmata]
|
166
|
+
lemmata.total.should == 10
|
167
|
+
lemmata['flumen1'].total.should == 2
|
168
|
+
lemmata['in1'].total.should == 2
|
169
|
+
lemmata['is1'].total.should == 2
|
170
|
+
lemmata['pons1'].total.should == 2
|
171
|
+
lemmata['sum1'].total.should == 2
|
172
|
+
|
173
|
+
postags = report[:postags]
|
174
|
+
postags.total.should == 10
|
175
|
+
postags["r--------"].total.should == 2
|
176
|
+
postags["p-s---nb-"].total.should == 2
|
177
|
+
postags["n-s---nb-"].total.should == 2
|
178
|
+
postags["n-s---mn-"].total.should == 2
|
179
|
+
postags["v3siia---"].total.should == 2
|
180
|
+
|
181
|
+
datapoints = postags[:datapoints]
|
182
|
+
datapoints.total.should == 90
|
183
|
+
|
184
|
+
pos = datapoints[:parts_of_speech]
|
185
|
+
pos.total.should == 10
|
186
|
+
pos['r'].total.should == 2
|
187
|
+
pos['p'].total.should == 2
|
188
|
+
pos['n'].total.should == 4
|
189
|
+
pos['v'].total.should == 2
|
190
|
+
|
191
|
+
persons = datapoints[:persons]
|
192
|
+
persons.total.should == 2
|
193
|
+
persons['3'].total.should == 2
|
194
|
+
|
195
|
+
numbers = datapoints[:numbers]
|
196
|
+
numbers.total.should == 8
|
197
|
+
numbers['s'].total.should == 8
|
198
|
+
|
199
|
+
tenses = datapoints[:tenses]
|
200
|
+
tenses.total.should == 2
|
201
|
+
tenses['i'].total.should == 2
|
202
|
+
|
203
|
+
moods = datapoints[:moods]
|
204
|
+
moods.total.should == 2
|
205
|
+
moods['i'].total.should == 2
|
206
|
+
|
207
|
+
voices= datapoints[:voices]
|
208
|
+
voices.total.should == 2
|
209
|
+
voices['a'].total.should == 2
|
210
|
+
|
211
|
+
genders = datapoints[:genders]
|
212
|
+
genders.total.should == 6
|
213
|
+
genders['n'].total.should == 4
|
214
|
+
genders['m'].total.should == 2
|
215
|
+
|
216
|
+
cases = datapoints[:cases]
|
217
|
+
cases.total.should == 6
|
218
|
+
cases['b'].total.should == 4
|
219
|
+
cases['n'].total.should == 2
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'simplecov'
|
2
|
+
require 'coveralls'
|
3
|
+
|
4
|
+
Coveralls.wear!
|
5
|
+
|
6
|
+
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
|
7
|
+
SimpleCov::Formatter::HTMLFormatter,
|
8
|
+
Coveralls::SimpleCov::Formatter
|
9
|
+
]
|
10
|
+
|
11
|
+
SimpleCov.start do
|
12
|
+
add_filter '/spec/'
|
13
|
+
end
|
14
|
+
|
15
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
16
|
+
require 'llt/review'
|
17
|
+
|
18
|
+
RSpec.configure do |config|
|
19
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
20
|
+
config.run_all_when_everything_filtered = true
|
21
|
+
config.filter_run :focus
|
22
|
+
end
|