shalmaneser 0.0.1.alpha → 1.2.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +2 -2
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +49 -0
  6. data/bin/fred +18 -0
  7. data/bin/frprep +34 -0
  8. data/bin/rosy +17 -0
  9. data/lib/common/AbstractSynInterface.rb +35 -33
  10. data/lib/common/Mallet.rb +236 -0
  11. data/lib/common/Maxent.rb +26 -12
  12. data/lib/common/Parser.rb +5 -5
  13. data/lib/common/SynInterfaces.rb +13 -6
  14. data/lib/common/TabFormat.rb +7 -6
  15. data/lib/common/Tiger.rb +4 -4
  16. data/lib/common/Timbl.rb +144 -0
  17. data/lib/common/{FrprepHelper.rb → frprep_helper.rb} +14 -8
  18. data/lib/common/headz.rb +1 -1
  19. data/lib/common/ruby_class_extensions.rb +3 -3
  20. data/lib/fred/FredBOWContext.rb +14 -2
  21. data/lib/fred/FredDetermineTargets.rb +4 -9
  22. data/lib/fred/FredEval.rb +1 -1
  23. data/lib/fred/FredFeatureExtractors.rb +4 -3
  24. data/lib/fred/FredFeaturize.rb +1 -1
  25. data/lib/frprep/CollinsInterface.rb +6 -6
  26. data/lib/frprep/MiniparInterface.rb +5 -5
  27. data/lib/frprep/SleepyInterface.rb +7 -7
  28. data/lib/frprep/TntInterface.rb +1 -1
  29. data/lib/frprep/TreetaggerInterface.rb +29 -5
  30. data/lib/frprep/do_parses.rb +1 -0
  31. data/lib/frprep/frprep.rb +36 -32
  32. data/lib/{common/BerkeleyInterface.rb → frprep/interfaces/berkeley_interface.rb} +69 -95
  33. data/lib/frprep/interfaces/stanford_interface.rb +353 -0
  34. data/lib/frprep/interpreters/berkeley_interpreter.rb +22 -0
  35. data/lib/frprep/interpreters/stanford_interpreter.rb +22 -0
  36. data/lib/frprep/opt_parser.rb +2 -2
  37. data/lib/rosy/AbstractFeatureAndExternal.rb +5 -3
  38. data/lib/rosy/RosyIterator.rb +11 -10
  39. data/lib/rosy/rosy.rb +1 -0
  40. data/lib/shalmaneser/version.rb +1 -1
  41. data/test/functional/sample_experiment_files/fred_test.salsa.erb +1 -1
  42. data/test/functional/sample_experiment_files/fred_train.salsa.erb +1 -1
  43. data/test/functional/sample_experiment_files/prp_test.salsa.erb +2 -2
  44. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +2 -2
  45. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +2 -2
  46. data/test/functional/sample_experiment_files/prp_train.salsa.erb +2 -2
  47. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +2 -2
  48. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +2 -2
  49. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +1 -1
  50. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +7 -7
  51. data/test/functional/test_frprep.rb +3 -3
  52. data/test/functional/test_rosy.rb +20 -0
  53. metadata +215 -224
  54. data/CHANGELOG.rdoc +0 -0
  55. data/LICENSE.rdoc +0 -0
  56. data/README.rdoc +0 -0
  57. data/lib/common/CollinsInterface.rb +0 -1165
  58. data/lib/common/MiniparInterface.rb +0 -1388
  59. data/lib/common/SleepyInterface.rb +0 -384
  60. data/lib/common/TntInterface.rb +0 -44
  61. data/lib/common/TreetaggerInterface.rb +0 -303
  62. data/lib/frprep/AbstractSynInterface.rb +0 -1227
  63. data/lib/frprep/BerkeleyInterface.rb +0 -375
  64. data/lib/frprep/ConfigData.rb +0 -694
  65. data/lib/frprep/FixSynSemMapping.rb +0 -196
  66. data/lib/frprep/FrPrepConfigData.rb +0 -66
  67. data/lib/frprep/FrprepHelper.rb +0 -1324
  68. data/lib/frprep/ISO-8859-1.rb +0 -24
  69. data/lib/frprep/Parser.rb +0 -213
  70. data/lib/frprep/SalsaTigerRegXML.rb +0 -2347
  71. data/lib/frprep/SalsaTigerXMLHelper.rb +0 -99
  72. data/lib/frprep/SynInterfaces.rb +0 -275
  73. data/lib/frprep/TabFormat.rb +0 -720
  74. data/lib/frprep/Tiger.rb +0 -1448
  75. data/lib/frprep/Tree.rb +0 -61
  76. data/lib/frprep/headz.rb +0 -338
data/lib/frprep/Tree.rb DELETED
@@ -1,61 +0,0 @@
1
- require 'frprep/Graph'
2
-
3
- class TreeNode < GraphNode
4
-
5
- def initialize(id)
6
- super(id)
7
- end
8
-
9
- # redo the ancestor-related methods,
10
- # since here we only have one parent per node
11
- def parent()
12
- retv = parents()
13
- if retv.nil?
14
- return nil
15
- else
16
- return retv.first
17
- end
18
- end
19
-
20
- def parent_label()
21
- retv = parent_labels()
22
- if retv.nil?
23
- return nil
24
- else
25
- return retv.first
26
- end
27
- end
28
-
29
-
30
- def parent_with_edgelabel()
31
- retv = parents_with_edgelabel()
32
-
33
- if retv.nil?
34
- return nil
35
- else
36
- return retv.first
37
- end
38
- end
39
-
40
-
41
- def add_parent(parent, edgelabel, varhash={})
42
- set_parent(parent, edgelabel, varhash)
43
- end
44
-
45
- def set_parent(parent, edgelabel, varhash={})
46
- # remove old parent
47
- each_parent_with_edgelabel { |label, parent|
48
- remove_parent(parent, label, varhash)
49
- }
50
-
51
- # set new parent
52
- @parents << [edgelabel, parent]
53
-
54
- # and vice versa: add self as child to parent
55
- unless varhash["pointer_insteadof_edge"]
56
- unless parent.children_with_edgelabel().include? [edgelabel, self]
57
- parent.add_child(self, edgelabel)
58
- end
59
- end
60
- end
61
- end
data/lib/frprep/headz.rb DELETED
@@ -1,338 +0,0 @@
1
- # name: Module Headz
2
- # auth: albu@coli.uni-sb.de
3
- #
4
- # modified KE Sept 04:
5
- # changed from old Sentence pkg to new SalsaTigerSentence pkg
6
- #
7
- # modified KE April 05:
8
- # suppress the flood of warnings
9
- #
10
- # modified SP June 05: added some more cases; change to SalsTigerRegXML
11
- #
12
- #
13
- # INIT: REXML TIGER sentence,
14
- # FUNC: syn_nodes(term/non_term) -> heads
15
- #
16
- #
17
- # usage:
18
- #
19
- # h = Headz.new()
20
- #
21
- # hash = h.get_sem_head(node) # node is a SalsaTigerXmlNode obj
22
- #
23
- # head = hash["head"]
24
- # prep = hash["prep"]
25
- #
26
- # if h.complex(head)
27
- # print "preposition of conjunction involved"
28
- # end
29
-
30
- require "frprep/SalsaTigerRegXML"
31
-
32
- class Headz
33
-
34
- def initialize()
35
- @Helpers = HeadzHelpers.new()
36
- @Verbose = false #KE 13.4.05: please not that many messages!
37
- end
38
-
39
- # head of one node
40
- def get_sem_head(node)
41
- gsh(node)
42
- end
43
-
44
- # all headz of top-nodes covering fe
45
- def get_fe_heads(fe)
46
- if (const = fe.children())
47
- const.map { |node|
48
- get_sem_head(node)
49
- }
50
- else
51
- $stderr.puts "Headz.get_sem_head: no children for FE #{fe}"
52
- []
53
- end
54
- end
55
-
56
- def gsh (node)
57
- if !node then
58
- if @Verbose then $stderr.puts "Headz.gsh: no input node" end
59
- return {}
60
-
61
- elsif node.is_terminal? then return Hash['head'=>node]
62
-
63
- else
64
- case node.category
65
- when 'AP'
66
- return gsh(@Helpers.get_dtr(node,'HD'))
67
-
68
- when 'AVP'
69
- return gsh(@Helpers.get_dtr(node,'HD'))
70
- when 'CAP', 'CAVP', 'CNP', 'CPP', 'CS', 'CVP'
71
- conjs = @Helpers.get_conjuncts(node)
72
- head = gsh(conjs.shift)
73
- if head
74
- head.update(Hash["conj"=>gsh_conjs(conjs)])
75
- end
76
- return head
77
-
78
- when 'NM'
79
- return gsh(@Helpers.get_rightmost_dtr(node,'NMC'))
80
- when 'NP'
81
- nk = @Helpers.get_rightmost_dtr(node,'NK')
82
- if nk
83
- return gsh(nk)
84
- else
85
- return gsh(@Helpers.get_rightmost_dtr(node, "NN"))
86
- end
87
-
88
- when 'PN'
89
- pncs = @Helpers.get_dtrs(node,'PNC')
90
- head = gsh(pncs.last)
91
- if head
92
- head.update(Hash["pncs"=>pncs])
93
- end
94
- return head
95
-
96
- when 'PP'
97
- return pp(node)
98
-
99
- when 'S'
100
- return s(node)
101
- when 'VROOT'
102
- dtrs = @Helpers.get_dtrs(node,'--')
103
-
104
- # discourse level node with sentence nodes below?
105
- # or conjunction with sentence nodes below?
106
- discourselevel_dtr = dtrs.detect { |n| n.category == "DL"}
107
- co_dtr = dtrs.detect { |n| n.category == "CO" }
108
- if discourselevel_dtr
109
- dtrs = discourselevel_dtr.children()
110
- elsif co_dtr
111
- dtrs = co_dtr.children()
112
- end
113
-
114
-
115
- # take first sentence node
116
- sent_dtr = dtrs.detect {|n| n.category =~ /^C?S/}
117
- if sent_dtr
118
- return gsh(sent_dtr)
119
- else
120
- # $stderr.puts "headz Warning: no sentence found below VROOT! Node #{node.id()}"
121
- return nil
122
- end
123
-
124
- when 'VP'
125
- return vp(node)
126
-
127
- when 'MTA'
128
- return gsh(@Helpers.get_rightmost_dtr(node,'ADC'))
129
-
130
- when 'VZ'
131
- return gsh(@Helpers.get_dtr(node,'HD'))
132
- else
133
- if @Verbose
134
- $stderr.puts " Headz.gsh: no rule for #{node.category}"
135
- end
136
- {}
137
- end
138
- end
139
- end
140
-
141
- # flatten the processed conjs to a list of (head) Hashes
142
- # containing no conj features themselves
143
- def gsh_conjs(conjs)
144
- flat = Array.new
145
-
146
- conjs.each {|conj|
147
- current = gsh(conj)
148
- @Helpers.descend(current,flat)
149
- }
150
-
151
- flat
152
- end
153
-
154
- #####################################3
155
- def pp(node)
156
-
157
- prep = node.terminals_sorted().detect { |n|
158
- (pt = n.part_of_speech()) and
159
- (pt =~ /^APPR/ or
160
- pt =~ /^PWAV/ or
161
- pt =~ /^C?PP/
162
- )
163
- }
164
-
165
- if (lastnk = @Helpers.get_rightmost_dtr(node,'NK'))
166
- head = gsh(lastnk)
167
- if head and prep
168
- head.update(Hash['prep'=>prep])
169
- end
170
-
171
- elsif (re = @Helpers.get_dtr(node,'RE'))
172
- head = gsh(re)
173
- if head and prep
174
- head.update(Hash['prep'=>prep])
175
- end
176
- else
177
- if @Verbose then $stderr.puts " pp: no rule for #{node}" end
178
- end
179
-
180
- head
181
- end
182
-
183
- ################
184
- def s(node)
185
- head = @Helpers.get_dtr(node,'HD')
186
- if !head
187
- # $stderr.puts " s: no head for #{node}"
188
- return Hash[]
189
- end
190
-
191
- if head.outdeg() == 0
192
- return gsh(head)
193
- end
194
-
195
- oc = @Helpers.get_dtr(node,'OC')
196
- case head.category
197
- when 'VVFIN'
198
- if svp = @Helpers.get_dtr(node,'SVP') then
199
- h = gsh(head)
200
- if h
201
- return h.update(Hash['svp'=>gsh(svp), 'oc'=>gsh(oc)])
202
- else
203
- return h
204
- end
205
- else
206
- return gsh(head)
207
- end
208
-
209
- when 'VAFIN'
210
- if oc && headd = @Helpers.get_dtr(oc,'HD')
211
- h = gsh(headd)
212
- if h
213
- return h.update(Hash['oc'=>gsh(oc)])
214
- else
215
- return h
216
- end
217
-
218
- elsif pd = @Helpers.get_dtr(node,'PD') && head = @Helpers.get_dtr(pd,'HD')
219
- return gsh(head)
220
-
221
- else
222
- if @Verbose then $stderr.puts " s: no rule for #{node}" end
223
- end
224
- else
225
- if @Verbose then $stderr.puts " s: no rule for #{node}" end
226
- end
227
- end
228
-
229
- ################
230
- def vp(node)
231
- head = gsh(@Helpers.get_dtr(node,'HD'))
232
- tmp = @Verbose
233
- @Verbose = false
234
- newHash = Hash.new
235
- ["da","oa"].each { |type|
236
- if (dtr = @Helpers.get_dtr(node,type.upcase))
237
- newHash[type] = gsh(dtr)
238
- end
239
- }
240
- @Verbose = tmp
241
- if head
242
- return head.update(newHash)
243
- else
244
- return newHash
245
- end
246
- end
247
-
248
- ################
249
- # Access
250
- def head(h)
251
- return h['head']
252
- end
253
-
254
- def complex(h)
255
- prep(h) or conj(h)
256
- end
257
-
258
- def prep(h)
259
- return h['prep']
260
- end
261
-
262
- def conj(h)
263
- return h['conj']
264
- end
265
-
266
-
267
-
268
- end # Class Headz
269
-
270
-
271
- class HeadzHelpers
272
- @Verbose = true
273
-
274
- # Conjunction
275
-
276
- def get_conjuncts(node)
277
- conjuncts = get_dtrs(node,'CJ')
278
- end
279
-
280
- # flatten
281
- def descend(current,flat)
282
- if current.nil?
283
- return flat
284
- end
285
-
286
- if current.has_key?("conj") then
287
- tmp = current.delete("conj")
288
- flat.push current
289
- tmp.each {|item|
290
- descend(item,flat)}
291
- else
292
- flat.push current
293
- end
294
- end
295
-
296
- # Zugriff
297
-
298
- def get_dtr(node,label)
299
- if (dtrs = node.children_by_edgelabels([label]))
300
- dtrs.first
301
- else
302
- if @Verbose then $stderr.puts " SelectHeadDtr: no #{label} dtr for #{node}" end
303
- nil
304
- end
305
- end
306
-
307
- def get_dtrs(node,label)
308
- if ! dtrs = node.children_by_edgelabels([label])
309
- if @Verbose then $stderr.puts " SelectHeadDtr: no #{label} dtr for #{node}" end
310
- else
311
- dtrs
312
- end
313
- end
314
-
315
- def get_rightmost_dtr(node,label)
316
- children = node.children_by_edgelabels([label])
317
- if re = children.last then re
318
- else
319
- if @Verbose then $stderr.puts " SelectHeadDtr: no #{label} dtrs for #{node}" end
320
- nil
321
- end
322
- end
323
-
324
- # def l2h(list)
325
- # h = Hash.new
326
- # while (list.length > 1) do
327
- # h[list.shift] = list.shift
328
- # end
329
- # if list.length == 1 then
330
- # $stderr.puts "l2h: odd number of elems: " + list.join(" / ")
331
- # end
332
- # h
333
- # end
334
-
335
- end # Class HeadzHelpers
336
-
337
-
338
-