shalmaneser 0.0.1.alpha → 1.2.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +2 -2
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +49 -0
- data/bin/fred +18 -0
- data/bin/frprep +34 -0
- data/bin/rosy +17 -0
- data/lib/common/AbstractSynInterface.rb +35 -33
- data/lib/common/Mallet.rb +236 -0
- data/lib/common/Maxent.rb +26 -12
- data/lib/common/Parser.rb +5 -5
- data/lib/common/SynInterfaces.rb +13 -6
- data/lib/common/TabFormat.rb +7 -6
- data/lib/common/Tiger.rb +4 -4
- data/lib/common/Timbl.rb +144 -0
- data/lib/common/{FrprepHelper.rb → frprep_helper.rb} +14 -8
- data/lib/common/headz.rb +1 -1
- data/lib/common/ruby_class_extensions.rb +3 -3
- data/lib/fred/FredBOWContext.rb +14 -2
- data/lib/fred/FredDetermineTargets.rb +4 -9
- data/lib/fred/FredEval.rb +1 -1
- data/lib/fred/FredFeatureExtractors.rb +4 -3
- data/lib/fred/FredFeaturize.rb +1 -1
- data/lib/frprep/CollinsInterface.rb +6 -6
- data/lib/frprep/MiniparInterface.rb +5 -5
- data/lib/frprep/SleepyInterface.rb +7 -7
- data/lib/frprep/TntInterface.rb +1 -1
- data/lib/frprep/TreetaggerInterface.rb +29 -5
- data/lib/frprep/do_parses.rb +1 -0
- data/lib/frprep/frprep.rb +36 -32
- data/lib/{common/BerkeleyInterface.rb → frprep/interfaces/berkeley_interface.rb} +69 -95
- data/lib/frprep/interfaces/stanford_interface.rb +353 -0
- data/lib/frprep/interpreters/berkeley_interpreter.rb +22 -0
- data/lib/frprep/interpreters/stanford_interpreter.rb +22 -0
- data/lib/frprep/opt_parser.rb +2 -2
- data/lib/rosy/AbstractFeatureAndExternal.rb +5 -3
- data/lib/rosy/RosyIterator.rb +11 -10
- data/lib/rosy/rosy.rb +1 -0
- data/lib/shalmaneser/version.rb +1 -1
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +1 -1
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +1 -1
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +2 -2
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +2 -2
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +2 -2
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +2 -2
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +2 -2
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +2 -2
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +1 -1
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +7 -7
- data/test/functional/test_frprep.rb +3 -3
- data/test/functional/test_rosy.rb +20 -0
- metadata +215 -224
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/CollinsInterface.rb +0 -1165
- data/lib/common/MiniparInterface.rb +0 -1388
- data/lib/common/SleepyInterface.rb +0 -384
- data/lib/common/TntInterface.rb +0 -44
- data/lib/common/TreetaggerInterface.rb +0 -303
- data/lib/frprep/AbstractSynInterface.rb +0 -1227
- data/lib/frprep/BerkeleyInterface.rb +0 -375
- data/lib/frprep/ConfigData.rb +0 -694
- data/lib/frprep/FixSynSemMapping.rb +0 -196
- data/lib/frprep/FrPrepConfigData.rb +0 -66
- data/lib/frprep/FrprepHelper.rb +0 -1324
- data/lib/frprep/ISO-8859-1.rb +0 -24
- data/lib/frprep/Parser.rb +0 -213
- data/lib/frprep/SalsaTigerRegXML.rb +0 -2347
- data/lib/frprep/SalsaTigerXMLHelper.rb +0 -99
- data/lib/frprep/SynInterfaces.rb +0 -275
- data/lib/frprep/TabFormat.rb +0 -720
- data/lib/frprep/Tiger.rb +0 -1448
- data/lib/frprep/Tree.rb +0 -61
- data/lib/frprep/headz.rb +0 -338
data/lib/frprep/Tree.rb
DELETED
@@ -1,61 +0,0 @@
|
|
1
|
-
require 'frprep/Graph'
|
2
|
-
|
3
|
-
class TreeNode < GraphNode
|
4
|
-
|
5
|
-
def initialize(id)
|
6
|
-
super(id)
|
7
|
-
end
|
8
|
-
|
9
|
-
# redo the ancestor-related methods,
|
10
|
-
# since here we only have one parent per node
|
11
|
-
def parent()
|
12
|
-
retv = parents()
|
13
|
-
if retv.nil?
|
14
|
-
return nil
|
15
|
-
else
|
16
|
-
return retv.first
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
def parent_label()
|
21
|
-
retv = parent_labels()
|
22
|
-
if retv.nil?
|
23
|
-
return nil
|
24
|
-
else
|
25
|
-
return retv.first
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
|
30
|
-
def parent_with_edgelabel()
|
31
|
-
retv = parents_with_edgelabel()
|
32
|
-
|
33
|
-
if retv.nil?
|
34
|
-
return nil
|
35
|
-
else
|
36
|
-
return retv.first
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
|
41
|
-
def add_parent(parent, edgelabel, varhash={})
|
42
|
-
set_parent(parent, edgelabel, varhash)
|
43
|
-
end
|
44
|
-
|
45
|
-
def set_parent(parent, edgelabel, varhash={})
|
46
|
-
# remove old parent
|
47
|
-
each_parent_with_edgelabel { |label, parent|
|
48
|
-
remove_parent(parent, label, varhash)
|
49
|
-
}
|
50
|
-
|
51
|
-
# set new parent
|
52
|
-
@parents << [edgelabel, parent]
|
53
|
-
|
54
|
-
# and vice versa: add self as child to parent
|
55
|
-
unless varhash["pointer_insteadof_edge"]
|
56
|
-
unless parent.children_with_edgelabel().include? [edgelabel, self]
|
57
|
-
parent.add_child(self, edgelabel)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
data/lib/frprep/headz.rb
DELETED
@@ -1,338 +0,0 @@
|
|
1
|
-
# name: Module Headz
|
2
|
-
# auth: albu@coli.uni-sb.de
|
3
|
-
#
|
4
|
-
# modified KE Sept 04:
|
5
|
-
# changed from old Sentence pkg to new SalsaTigerSentence pkg
|
6
|
-
#
|
7
|
-
# modified KE April 05:
|
8
|
-
# suppress the flood of warnings
|
9
|
-
#
|
10
|
-
# modified SP June 05: added some more cases; change to SalsTigerRegXML
|
11
|
-
#
|
12
|
-
#
|
13
|
-
# INIT: REXML TIGER sentence,
|
14
|
-
# FUNC: syn_nodes(term/non_term) -> heads
|
15
|
-
#
|
16
|
-
#
|
17
|
-
# usage:
|
18
|
-
#
|
19
|
-
# h = Headz.new()
|
20
|
-
#
|
21
|
-
# hash = h.get_sem_head(node) # node is a SalsaTigerXmlNode obj
|
22
|
-
#
|
23
|
-
# head = hash["head"]
|
24
|
-
# prep = hash["prep"]
|
25
|
-
#
|
26
|
-
# if h.complex(head)
|
27
|
-
# print "preposition of conjunction involved"
|
28
|
-
# end
|
29
|
-
|
30
|
-
require "frprep/SalsaTigerRegXML"
|
31
|
-
|
32
|
-
class Headz
|
33
|
-
|
34
|
-
def initialize()
|
35
|
-
@Helpers = HeadzHelpers.new()
|
36
|
-
@Verbose = false #KE 13.4.05: please not that many messages!
|
37
|
-
end
|
38
|
-
|
39
|
-
# head of one node
|
40
|
-
def get_sem_head(node)
|
41
|
-
gsh(node)
|
42
|
-
end
|
43
|
-
|
44
|
-
# all headz of top-nodes covering fe
|
45
|
-
def get_fe_heads(fe)
|
46
|
-
if (const = fe.children())
|
47
|
-
const.map { |node|
|
48
|
-
get_sem_head(node)
|
49
|
-
}
|
50
|
-
else
|
51
|
-
$stderr.puts "Headz.get_sem_head: no children for FE #{fe}"
|
52
|
-
[]
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def gsh (node)
|
57
|
-
if !node then
|
58
|
-
if @Verbose then $stderr.puts "Headz.gsh: no input node" end
|
59
|
-
return {}
|
60
|
-
|
61
|
-
elsif node.is_terminal? then return Hash['head'=>node]
|
62
|
-
|
63
|
-
else
|
64
|
-
case node.category
|
65
|
-
when 'AP'
|
66
|
-
return gsh(@Helpers.get_dtr(node,'HD'))
|
67
|
-
|
68
|
-
when 'AVP'
|
69
|
-
return gsh(@Helpers.get_dtr(node,'HD'))
|
70
|
-
when 'CAP', 'CAVP', 'CNP', 'CPP', 'CS', 'CVP'
|
71
|
-
conjs = @Helpers.get_conjuncts(node)
|
72
|
-
head = gsh(conjs.shift)
|
73
|
-
if head
|
74
|
-
head.update(Hash["conj"=>gsh_conjs(conjs)])
|
75
|
-
end
|
76
|
-
return head
|
77
|
-
|
78
|
-
when 'NM'
|
79
|
-
return gsh(@Helpers.get_rightmost_dtr(node,'NMC'))
|
80
|
-
when 'NP'
|
81
|
-
nk = @Helpers.get_rightmost_dtr(node,'NK')
|
82
|
-
if nk
|
83
|
-
return gsh(nk)
|
84
|
-
else
|
85
|
-
return gsh(@Helpers.get_rightmost_dtr(node, "NN"))
|
86
|
-
end
|
87
|
-
|
88
|
-
when 'PN'
|
89
|
-
pncs = @Helpers.get_dtrs(node,'PNC')
|
90
|
-
head = gsh(pncs.last)
|
91
|
-
if head
|
92
|
-
head.update(Hash["pncs"=>pncs])
|
93
|
-
end
|
94
|
-
return head
|
95
|
-
|
96
|
-
when 'PP'
|
97
|
-
return pp(node)
|
98
|
-
|
99
|
-
when 'S'
|
100
|
-
return s(node)
|
101
|
-
when 'VROOT'
|
102
|
-
dtrs = @Helpers.get_dtrs(node,'--')
|
103
|
-
|
104
|
-
# discourse level node with sentence nodes below?
|
105
|
-
# or conjunction with sentence nodes below?
|
106
|
-
discourselevel_dtr = dtrs.detect { |n| n.category == "DL"}
|
107
|
-
co_dtr = dtrs.detect { |n| n.category == "CO" }
|
108
|
-
if discourselevel_dtr
|
109
|
-
dtrs = discourselevel_dtr.children()
|
110
|
-
elsif co_dtr
|
111
|
-
dtrs = co_dtr.children()
|
112
|
-
end
|
113
|
-
|
114
|
-
|
115
|
-
# take first sentence node
|
116
|
-
sent_dtr = dtrs.detect {|n| n.category =~ /^C?S/}
|
117
|
-
if sent_dtr
|
118
|
-
return gsh(sent_dtr)
|
119
|
-
else
|
120
|
-
# $stderr.puts "headz Warning: no sentence found below VROOT! Node #{node.id()}"
|
121
|
-
return nil
|
122
|
-
end
|
123
|
-
|
124
|
-
when 'VP'
|
125
|
-
return vp(node)
|
126
|
-
|
127
|
-
when 'MTA'
|
128
|
-
return gsh(@Helpers.get_rightmost_dtr(node,'ADC'))
|
129
|
-
|
130
|
-
when 'VZ'
|
131
|
-
return gsh(@Helpers.get_dtr(node,'HD'))
|
132
|
-
else
|
133
|
-
if @Verbose
|
134
|
-
$stderr.puts " Headz.gsh: no rule for #{node.category}"
|
135
|
-
end
|
136
|
-
{}
|
137
|
-
end
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
141
|
-
# flatten the processed conjs to a list of (head) Hashes
|
142
|
-
# containing no conj features themselves
|
143
|
-
def gsh_conjs(conjs)
|
144
|
-
flat = Array.new
|
145
|
-
|
146
|
-
conjs.each {|conj|
|
147
|
-
current = gsh(conj)
|
148
|
-
@Helpers.descend(current,flat)
|
149
|
-
}
|
150
|
-
|
151
|
-
flat
|
152
|
-
end
|
153
|
-
|
154
|
-
#####################################3
|
155
|
-
def pp(node)
|
156
|
-
|
157
|
-
prep = node.terminals_sorted().detect { |n|
|
158
|
-
(pt = n.part_of_speech()) and
|
159
|
-
(pt =~ /^APPR/ or
|
160
|
-
pt =~ /^PWAV/ or
|
161
|
-
pt =~ /^C?PP/
|
162
|
-
)
|
163
|
-
}
|
164
|
-
|
165
|
-
if (lastnk = @Helpers.get_rightmost_dtr(node,'NK'))
|
166
|
-
head = gsh(lastnk)
|
167
|
-
if head and prep
|
168
|
-
head.update(Hash['prep'=>prep])
|
169
|
-
end
|
170
|
-
|
171
|
-
elsif (re = @Helpers.get_dtr(node,'RE'))
|
172
|
-
head = gsh(re)
|
173
|
-
if head and prep
|
174
|
-
head.update(Hash['prep'=>prep])
|
175
|
-
end
|
176
|
-
else
|
177
|
-
if @Verbose then $stderr.puts " pp: no rule for #{node}" end
|
178
|
-
end
|
179
|
-
|
180
|
-
head
|
181
|
-
end
|
182
|
-
|
183
|
-
################
|
184
|
-
def s(node)
|
185
|
-
head = @Helpers.get_dtr(node,'HD')
|
186
|
-
if !head
|
187
|
-
# $stderr.puts " s: no head for #{node}"
|
188
|
-
return Hash[]
|
189
|
-
end
|
190
|
-
|
191
|
-
if head.outdeg() == 0
|
192
|
-
return gsh(head)
|
193
|
-
end
|
194
|
-
|
195
|
-
oc = @Helpers.get_dtr(node,'OC')
|
196
|
-
case head.category
|
197
|
-
when 'VVFIN'
|
198
|
-
if svp = @Helpers.get_dtr(node,'SVP') then
|
199
|
-
h = gsh(head)
|
200
|
-
if h
|
201
|
-
return h.update(Hash['svp'=>gsh(svp), 'oc'=>gsh(oc)])
|
202
|
-
else
|
203
|
-
return h
|
204
|
-
end
|
205
|
-
else
|
206
|
-
return gsh(head)
|
207
|
-
end
|
208
|
-
|
209
|
-
when 'VAFIN'
|
210
|
-
if oc && headd = @Helpers.get_dtr(oc,'HD')
|
211
|
-
h = gsh(headd)
|
212
|
-
if h
|
213
|
-
return h.update(Hash['oc'=>gsh(oc)])
|
214
|
-
else
|
215
|
-
return h
|
216
|
-
end
|
217
|
-
|
218
|
-
elsif pd = @Helpers.get_dtr(node,'PD') && head = @Helpers.get_dtr(pd,'HD')
|
219
|
-
return gsh(head)
|
220
|
-
|
221
|
-
else
|
222
|
-
if @Verbose then $stderr.puts " s: no rule for #{node}" end
|
223
|
-
end
|
224
|
-
else
|
225
|
-
if @Verbose then $stderr.puts " s: no rule for #{node}" end
|
226
|
-
end
|
227
|
-
end
|
228
|
-
|
229
|
-
################
|
230
|
-
def vp(node)
|
231
|
-
head = gsh(@Helpers.get_dtr(node,'HD'))
|
232
|
-
tmp = @Verbose
|
233
|
-
@Verbose = false
|
234
|
-
newHash = Hash.new
|
235
|
-
["da","oa"].each { |type|
|
236
|
-
if (dtr = @Helpers.get_dtr(node,type.upcase))
|
237
|
-
newHash[type] = gsh(dtr)
|
238
|
-
end
|
239
|
-
}
|
240
|
-
@Verbose = tmp
|
241
|
-
if head
|
242
|
-
return head.update(newHash)
|
243
|
-
else
|
244
|
-
return newHash
|
245
|
-
end
|
246
|
-
end
|
247
|
-
|
248
|
-
################
|
249
|
-
# Access
|
250
|
-
def head(h)
|
251
|
-
return h['head']
|
252
|
-
end
|
253
|
-
|
254
|
-
def complex(h)
|
255
|
-
prep(h) or conj(h)
|
256
|
-
end
|
257
|
-
|
258
|
-
def prep(h)
|
259
|
-
return h['prep']
|
260
|
-
end
|
261
|
-
|
262
|
-
def conj(h)
|
263
|
-
return h['conj']
|
264
|
-
end
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
end # Class Headz
|
269
|
-
|
270
|
-
|
271
|
-
class HeadzHelpers
|
272
|
-
@Verbose = true
|
273
|
-
|
274
|
-
# Conjunction
|
275
|
-
|
276
|
-
def get_conjuncts(node)
|
277
|
-
conjuncts = get_dtrs(node,'CJ')
|
278
|
-
end
|
279
|
-
|
280
|
-
# flatten
|
281
|
-
def descend(current,flat)
|
282
|
-
if current.nil?
|
283
|
-
return flat
|
284
|
-
end
|
285
|
-
|
286
|
-
if current.has_key?("conj") then
|
287
|
-
tmp = current.delete("conj")
|
288
|
-
flat.push current
|
289
|
-
tmp.each {|item|
|
290
|
-
descend(item,flat)}
|
291
|
-
else
|
292
|
-
flat.push current
|
293
|
-
end
|
294
|
-
end
|
295
|
-
|
296
|
-
# Zugriff
|
297
|
-
|
298
|
-
def get_dtr(node,label)
|
299
|
-
if (dtrs = node.children_by_edgelabels([label]))
|
300
|
-
dtrs.first
|
301
|
-
else
|
302
|
-
if @Verbose then $stderr.puts " SelectHeadDtr: no #{label} dtr for #{node}" end
|
303
|
-
nil
|
304
|
-
end
|
305
|
-
end
|
306
|
-
|
307
|
-
def get_dtrs(node,label)
|
308
|
-
if ! dtrs = node.children_by_edgelabels([label])
|
309
|
-
if @Verbose then $stderr.puts " SelectHeadDtr: no #{label} dtr for #{node}" end
|
310
|
-
else
|
311
|
-
dtrs
|
312
|
-
end
|
313
|
-
end
|
314
|
-
|
315
|
-
def get_rightmost_dtr(node,label)
|
316
|
-
children = node.children_by_edgelabels([label])
|
317
|
-
if re = children.last then re
|
318
|
-
else
|
319
|
-
if @Verbose then $stderr.puts " SelectHeadDtr: no #{label} dtrs for #{node}" end
|
320
|
-
nil
|
321
|
-
end
|
322
|
-
end
|
323
|
-
|
324
|
-
# def l2h(list)
|
325
|
-
# h = Hash.new
|
326
|
-
# while (list.length > 1) do
|
327
|
-
# h[list.shift] = list.shift
|
328
|
-
# end
|
329
|
-
# if list.length == 1 then
|
330
|
-
# $stderr.puts "l2h: odd number of elems: " + list.join(" / ")
|
331
|
-
# end
|
332
|
-
# h
|
333
|
-
# end
|
334
|
-
|
335
|
-
end # Class HeadzHelpers
|
336
|
-
|
337
|
-
|
338
|
-
|