frprep 0.0.1.prealpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,338 @@
1
+ # name: Module Headz
2
+ # auth: albu@coli.uni-sb.de
3
+ #
4
+ # modified KE Sept 04:
5
+ # changed from old Sentence pkg to new SalsaTigerSentence pkg
6
+ #
7
+ # modified KE April 05:
8
+ # suppress the flood of warnings
9
+ #
10
+ # modified SP June 05: added some more cases; change to SalsTigerRegXML
11
+ #
12
+ #
13
+ # INIT: REXML TIGER sentence,
14
+ # FUNC: syn_nodes(term/non_term) -> heads
15
+ #
16
+ #
17
+ # usage:
18
+ #
19
+ # h = Headz.new()
20
+ #
21
+ # hash = h.get_sem_head(node) # node is a SalsaTigerXmlNode obj
22
+ #
23
+ # head = hash["head"]
24
+ # prep = hash["prep"]
25
+ #
26
+ # if h.complex(head)
27
+ # print "preposition of conjunction involved"
28
+ # end
29
+
30
+ require "frprep/SalsaTigerRegXML"
31
+
32
+ class Headz
33
+
34
+ def initialize()
35
+ @Helpers = HeadzHelpers.new()
36
+ @Verbose = false #KE 13.4.05: please not that many messages!
37
+ end
38
+
39
+ # head of one node
40
+ def get_sem_head(node)
41
+ gsh(node)
42
+ end
43
+
44
+ # all headz of top-nodes covering fe
45
+ def get_fe_heads(fe)
46
+ if (const = fe.children())
47
+ const.map { |node|
48
+ get_sem_head(node)
49
+ }
50
+ else
51
+ $stderr.puts "Headz.get_sem_head: no children for FE #{fe}"
52
+ []
53
+ end
54
+ end
55
+
56
+ def gsh (node)
57
+ if !node then
58
+ if @Verbose then $stderr.puts "Headz.gsh: no input node" end
59
+ return {}
60
+
61
+ elsif node.is_terminal? then return Hash['head'=>node]
62
+
63
+ else
64
+ case node.category
65
+ when 'AP'
66
+ return gsh(@Helpers.get_dtr(node,'HD'))
67
+
68
+ when 'AVP'
69
+ return gsh(@Helpers.get_dtr(node,'HD'))
70
+ when 'CAP', 'CAVP', 'CNP', 'CPP', 'CS', 'CVP'
71
+ conjs = @Helpers.get_conjuncts(node)
72
+ head = gsh(conjs.shift)
73
+ if head
74
+ head.update(Hash["conj"=>gsh_conjs(conjs)])
75
+ end
76
+ return head
77
+
78
+ when 'NM'
79
+ return gsh(@Helpers.get_rightmost_dtr(node,'NMC'))
80
+ when 'NP'
81
+ nk = @Helpers.get_rightmost_dtr(node,'NK')
82
+ if nk
83
+ return gsh(nk)
84
+ else
85
+ return gsh(@Helpers.get_rightmost_dtr(node, "NN"))
86
+ end
87
+
88
+ when 'PN'
89
+ pncs = @Helpers.get_dtrs(node,'PNC')
90
+ head = gsh(pncs.last)
91
+ if head
92
+ head.update(Hash["pncs"=>pncs])
93
+ end
94
+ return head
95
+
96
+ when 'PP'
97
+ return pp(node)
98
+
99
+ when 'S'
100
+ return s(node)
101
+ when 'VROOT'
102
+ dtrs = @Helpers.get_dtrs(node,'--')
103
+
104
+ # discourse level node with sentence nodes below?
105
+ # or conjunction with sentence nodes below?
106
+ discourselevel_dtr = dtrs.detect { |n| n.category == "DL"}
107
+ co_dtr = dtrs.detect { |n| n.category == "CO" }
108
+ if discourselevel_dtr
109
+ dtrs = discourselevel_dtr.children()
110
+ elsif co_dtr
111
+ dtrs = co_dtr.children()
112
+ end
113
+
114
+
115
+ # take first sentence node
116
+ sent_dtr = dtrs.detect {|n| n.category =~ /^C?S/}
117
+ if sent_dtr
118
+ return gsh(sent_dtr)
119
+ else
120
+ # $stderr.puts "headz Warning: no sentence found below VROOT! Node #{node.id()}"
121
+ return nil
122
+ end
123
+
124
+ when 'VP'
125
+ return vp(node)
126
+
127
+ when 'MTA'
128
+ return gsh(@Helpers.get_rightmost_dtr(node,'ADC'))
129
+
130
+ when 'VZ'
131
+ return gsh(@Helpers.get_dtr(node,'HD'))
132
+ else
133
+ if @Verbose
134
+ $stderr.puts " Headz.gsh: no rule for #{node.category}"
135
+ end
136
+ {}
137
+ end
138
+ end
139
+ end
140
+
141
+ # flatten the processed conjs to a list of (head) Hashes
142
+ # containing no conj features themselves
143
+ def gsh_conjs(conjs)
144
+ flat = Array.new
145
+
146
+ conjs.each {|conj|
147
+ current = gsh(conj)
148
+ @Helpers.descend(current,flat)
149
+ }
150
+
151
+ flat
152
+ end
153
+
154
+ #####################################3
155
+ def pp(node)
156
+
157
+ prep = node.terminals_sorted().detect { |n|
158
+ (pt = n.part_of_speech()) and
159
+ (pt =~ /^APPR/ or
160
+ pt =~ /^PWAV/ or
161
+ pt =~ /^C?PP/
162
+ )
163
+ }
164
+
165
+ if (lastnk = @Helpers.get_rightmost_dtr(node,'NK'))
166
+ head = gsh(lastnk)
167
+ if head and prep
168
+ head.update(Hash['prep'=>prep])
169
+ end
170
+
171
+ elsif (re = @Helpers.get_dtr(node,'RE'))
172
+ head = gsh(re)
173
+ if head and prep
174
+ head.update(Hash['prep'=>prep])
175
+ end
176
+ else
177
+ if @Verbose then $stderr.puts " pp: no rule for #{node}" end
178
+ end
179
+
180
+ head
181
+ end
182
+
183
+ ################
184
+ def s(node)
185
+ head = @Helpers.get_dtr(node,'HD')
186
+ if !head
187
+ # $stderr.puts " s: no head for #{node}"
188
+ return Hash[]
189
+ end
190
+
191
+ if head.outdeg() == 0
192
+ return gsh(head)
193
+ end
194
+
195
+ oc = @Helpers.get_dtr(node,'OC')
196
+ case head.category
197
+ when 'VVFIN'
198
+ if svp = @Helpers.get_dtr(node,'SVP') then
199
+ h = gsh(head)
200
+ if h
201
+ return h.update(Hash['svp'=>gsh(svp), 'oc'=>gsh(oc)])
202
+ else
203
+ return h
204
+ end
205
+ else
206
+ return gsh(head)
207
+ end
208
+
209
+ when 'VAFIN'
210
+ if oc && headd = @Helpers.get_dtr(oc,'HD')
211
+ h = gsh(headd)
212
+ if h
213
+ return h.update(Hash['oc'=>gsh(oc)])
214
+ else
215
+ return h
216
+ end
217
+
218
+ elsif pd = @Helpers.get_dtr(node,'PD') && head = @Helpers.get_dtr(pd,'HD')
219
+ return gsh(head)
220
+
221
+ else
222
+ if @Verbose then $stderr.puts " s: no rule for #{node}" end
223
+ end
224
+ else
225
+ if @Verbose then $stderr.puts " s: no rule for #{node}" end
226
+ end
227
+ end
228
+
229
+ ################
230
+ def vp(node)
231
+ head = gsh(@Helpers.get_dtr(node,'HD'))
232
+ tmp = @Verbose
233
+ @Verbose = false
234
+ newHash = Hash.new
235
+ ["da","oa"].each { |type|
236
+ if (dtr = @Helpers.get_dtr(node,type.upcase))
237
+ newHash[type] = gsh(dtr)
238
+ end
239
+ }
240
+ @Verbose = tmp
241
+ if head
242
+ return head.update(newHash)
243
+ else
244
+ return newHash
245
+ end
246
+ end
247
+
248
+ ################
249
+ # Access
250
+ def head(h)
251
+ return h['head']
252
+ end
253
+
254
+ def complex(h)
255
+ prep(h) or conj(h)
256
+ end
257
+
258
+ def prep(h)
259
+ return h['prep']
260
+ end
261
+
262
+ def conj(h)
263
+ return h['conj']
264
+ end
265
+
266
+
267
+
268
+ end # Class Headz
269
+
270
+
271
+ class HeadzHelpers
272
+ @Verbose = true
273
+
274
+ # Conjunction
275
+
276
+ def get_conjuncts(node)
277
+ conjuncts = get_dtrs(node,'CJ')
278
+ end
279
+
280
+ # flatten
281
+ def descend(current,flat)
282
+ if current.nil?
283
+ return flat
284
+ end
285
+
286
+ if current.has_key?("conj") then
287
+ tmp = current.delete("conj")
288
+ flat.push current
289
+ tmp.each {|item|
290
+ descend(item,flat)}
291
+ else
292
+ flat.push current
293
+ end
294
+ end
295
+
296
+ # Zugriff
297
+
298
+ def get_dtr(node,label)
299
+ if (dtrs = node.children_by_edgelabels([label]))
300
+ dtrs.first
301
+ else
302
+ if @Verbose then $stderr.puts " SelectHeadDtr: no #{label} dtr for #{node}" end
303
+ nil
304
+ end
305
+ end
306
+
307
+ def get_dtrs(node,label)
308
+ if ! dtrs = node.children_by_edgelabels([label])
309
+ if @Verbose then $stderr.puts " SelectHeadDtr: no #{label} dtr for #{node}" end
310
+ else
311
+ dtrs
312
+ end
313
+ end
314
+
315
+ def get_rightmost_dtr(node,label)
316
+ children = node.children_by_edgelabels([label])
317
+ if re = children.last then re
318
+ else
319
+ if @Verbose then $stderr.puts " SelectHeadDtr: no #{label} dtrs for #{node}" end
320
+ nil
321
+ end
322
+ end
323
+
324
+ # def l2h(list)
325
+ # h = Hash.new
326
+ # while (list.length > 1) do
327
+ # h[list.shift] = list.shift
328
+ # end
329
+ # if list.length == 1 then
330
+ # $stderr.puts "l2h: odd number of elems: " + list.join(" / ")
331
+ # end
332
+ # h
333
+ # end
334
+
335
+ end # Class HeadzHelpers
336
+
337
+
338
+
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ # AB, 2010-11-25
3
+
4
+
5
+ ##############################
6
+ # class for managing the parses of one file
7
+ class OneParsedFile
8
+ attr_reader :filename
9
+
10
+ def initialize(filename, # string: core of filename for the parse file
11
+ complete_filename, # string: complete filename of parse file
12
+ obj_with_iterator) # object with each_sentence method, see above
13
+ @obj_with_iterator = obj_with_iterator
14
+ @filename = filename
15
+ @complete_filename = complete_filename
16
+ end
17
+
18
+ # yield each parse sentence as a tuple
19
+ # [ salsa/tiger xml sentence, tab format sentence, mapping]
20
+ # of a SalsaTigerSentence object, a FNTabSentence object,
21
+ # and a hash: FNTab sentence lineno(integer) -> array:SynNode
22
+ # pointing each tab word to one or more SalsaTigerSentence terminals
23
+ def each_sentence()
24
+ @obj_with_iterator.each_sentence(@complete_filename) { |st_sent, tab_sent, mapping|
25
+ yield [st_sent, tab_sent, mapping]
26
+ }
27
+ end
28
+ end
@@ -0,0 +1,94 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ # AB, 2010-11-25
4
+
5
+ require 'optparse'
6
+ require 'frprep/FrPrepConfigData'
7
+ require 'frprep/SynInterfaces'
8
+ module FrPrep
9
+
10
+ # This class parses options for FrPrep.
11
+ class OptParser
12
+
13
+ # Main class method.
14
+ # OP expects cmd_args to be an array like ARGV.
15
+ def self.parse(cmd_args)
16
+ @prg_name = 'frprep'
17
+ @@options = {}
18
+
19
+ parser = create_parser
20
+
21
+ # If no options provided print the help.
22
+ if cmd_args.empty?
23
+ $stderr.puts('You have to provide some options.',
24
+ "Please start with <#{@prg_name} --help>.")
25
+ exit(1)
26
+ end
27
+
28
+ # Parse ARGV and provide the options hash.
29
+ # Check if everything is correct and handle exceptions
30
+ begin
31
+ parser.parse(cmd_args)
32
+ rescue OptionParser::InvalidArgument => e
33
+ arg = e.message.split.last
34
+ $stderr.puts "The provided argument #{arg} is currently not supported!"
35
+ $stderr.puts "Please colsult <#{@prg_name} --help>."
36
+ exit(1)
37
+ rescue OptionParser::InvalidOption => e
38
+ $stderr.puts "You have provided an #{e.message}."
39
+ $stderr.puts "Please colsult <#{@prg_name} --help>."
40
+ exit(1)
41
+ rescue
42
+ raise
43
+ end
44
+
45
+
46
+ exp = FrPrepConfigData.new(@@options[:exp_file])
47
+
48
+ # AB: this stuff should be move into FrPrepConfigData.
49
+ # sanity checks
50
+ unless exp.get("prep_experiment_ID") =~ /^[A-Za-z0-9_]+$/
51
+ raise "Please choose an experiment ID consisting only of the letters A-Za-z0-9_."
52
+ end
53
+
54
+ SynInterfaces.check_interfaces_abort_if_missing(exp)
55
+
56
+ exp
57
+ end
58
+
59
+ private
60
+ def self.create_parser
61
+ OptionParser.new do |opts|
62
+ opts.banner = <<STOP
63
+ Fred Preprocessor <FrPrep>. Preprocessing stage before Fred and Rosy
64
+ for further frame/word sense assignment and semantic role assignment.
65
+
66
+ Usage: frprep -h|-e FILENAME'
67
+ STOP
68
+ opts.separator ''
69
+ opts.separator 'Program specific options:'
70
+
71
+ opts.on('-e', '--expfile FILENAME',
72
+ 'Provide the path to an experiment file.',
73
+ 'FrPrep will preprocess data according to the specifications',
74
+ 'given in your experiment file.',
75
+ 'This option is required!',
76
+ 'Also consider the documentation on format and features.'
77
+ ) do |exp_file|
78
+ @@options[:exp_file] = File.expand_path(exp_file)
79
+ end
80
+
81
+ opts.separator ''
82
+ opts.separator 'Common options:'
83
+
84
+ opts.on_tail('-h', '--help', 'Show this help message.') do
85
+ puts opts
86
+ exit
87
+ end
88
+
89
+ end
90
+
91
+ end # def self.parse
92
+
93
+ end # class OptParser
94
+ end # module FrPrep