shalmaneser 0.0.1.alpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +284 -0
@@ -0,0 +1,99 @@
1
+ # sp jul 05 05
2
+ #
3
+ # Static helper methods for SalsaTigerRegXML:
4
+
5
+ # - provide header and footer for Salsa/Tiger XML files
6
+ # - escape and unescape HTML entities
7
+ #
8
+ # changed KE nov 05:
9
+ # many methods moved to FrprepHelper
10
+
11
+ require "common/SalsaTigerRegXML"
12
+ require "common/headz"
13
+ require "common/Parser"
14
+ require "tempfile"
15
+
16
+ class SalsaTigerXMLHelper
17
+
18
+
19
+ ###
20
+ # get header of SalsaTigerXML files (as string)
21
+ def SalsaTigerXMLHelper.get_header
22
+
23
+ header = <<ENDOFHEADER
24
+ <?xml version="1.0" encoding="UTF-8"?>
25
+ <corpus corpusname="corpus" target="">
26
+ <head>
27
+ <meta>
28
+ <format>
29
+ NeGra format, version 3</format>
30
+ </meta>
31
+ <frames xmlns="http://www.clt-st.de/framenet/frame-database">
32
+ </frames>
33
+ <wordtags xmlns="http://www.clt-st.de/salsa/wordtags">
34
+ </wordtags>
35
+ <flags>
36
+ </flags>
37
+ <annotation>
38
+ <edgelabel>
39
+ </edgelabel>
40
+ <secedgelabel>
41
+ </secedgelabel>
42
+ </annotation>
43
+ </head>
44
+ <body>
45
+ ENDOFHEADER
46
+
47
+ return header
48
+
49
+ end
50
+
51
+ ###
52
+ # get footer of SALSATigerXML files (as string)
53
+ def SalsaTigerXMLHelper.get_footer
54
+
55
+ footer = <<ENDOFFOOTER
56
+ </body>
57
+ </corpus>
58
+ ENDOFFOOTER
59
+
60
+ return footer
61
+ end
62
+
63
+
64
+
65
+ # escape and unescape strings for representation in XML
66
+
67
+ @@replacements = [
68
+ # ["&apos;&apos;","&quot;"], # added by ines (09/03/09), might cause problems for unescape???
69
+ ["&","&amp;"], # must be first for escaping, last for unescaping
70
+ ["<","&lt;"],
71
+ [">", "&gt;"],
72
+ ["\"","&apos;&apos;"],
73
+ # ["\"","&quot;"],
74
+ # ["\'\'","&quot;"],
75
+ # ["\`\`","&quot;"],
76
+ ["\'","&apos;"],
77
+ ["\`\`","&apos;&apos;"],
78
+ # ["''","&apos;&apos;"]
79
+ ]
80
+
81
+
82
+
83
+ def SalsaTigerXMLHelper.escape(string)
84
+ @@replacements.each {|unescaped,escaped|
85
+ string.gsub!(unescaped,escaped)
86
+ }
87
+ return string
88
+ end
89
+
90
+ def SalsaTigerXMLHelper.unescape(string)
91
+ # reverse replacements to replace &amp last
92
+ @@replacements.reverse.each {|unescaped,escaped|
93
+ string.gsub!(escaped,unescaped)
94
+ }
95
+ return string
96
+ end
97
+
98
+
99
+ end
@@ -0,0 +1,384 @@
1
+ ####
2
+ # sp 21 07 05
3
+ #
4
+ # modified ke 30 10 05: adapted to fit into SynInterface
5
+ #
6
+ # represents a file containing Sleepy parses
7
+ #
8
+ # underlying data structure for individual sentences: SalsaTigerSentence
9
+ require "tempfile"
10
+
11
+ require "common/SalsaTigerRegXML"
12
+ require "common/SalsaTigerXMLHelper"
13
+ require "common/TabFormat"
14
+ require "common/Counter"
15
+
16
+ require "common/AbstractSynInterface"
17
+ require "common/Tiger.rb"
18
+
19
+ ################################################
20
+ # Interface class
21
+ class SleepyInterface < SynInterfaceSTXML
22
+ SleepyInterface.announce_me()
23
+
24
+ ###
25
+ def SleepyInterface.system()
26
+ return "sleepy"
27
+ end
28
+
29
+ ###
30
+ def SleepyInterface.service()
31
+ return "parser"
32
+ end
33
+
34
+ ###
35
+ # initialize to set values for all subsequent processing
36
+ def initialize(program_path, # string: path to system
37
+ insuffix, # string: suffix of tab files
38
+ outsuffix, # string: suffix for parsed files
39
+ stsuffix, # string: suffix for Salsa/TIGER XML files
40
+ var_hash = {}) # optional arguments in a hash
41
+
42
+ super(program_path, insuffix, outsuffix, stsuffix, var_hash)
43
+ unless @program_path =~ /\/$/
44
+ @program_path = @program_path + "/"
45
+ end
46
+
47
+ # new: evaluate var hash
48
+ @pos_suffix = var_hash["pos_suffix"]
49
+ @lemma_suffix = var_hash["lemma_suffix"]
50
+ @tab_dir = var_hash["tab_dir"]
51
+ end
52
+
53
+ ####
54
+ # parse a directory with TabFormat files and write the parse trees to outputdir
55
+ # I assume that the files in inputdir are smaller than
56
+ # the maximum number of sentences that
57
+ # Sleepy can parse in one go (i.e. that they are split)
58
+ def process_dir(in_dir, # string: input directory name
59
+ out_dir) # string: output directory name
60
+
61
+ sleepy_prog = "#{@program_path}sleepy --beam 1000 --model-file #{@program_path}negra.model --parse "
62
+
63
+ Dir[in_dir + "*" + @insuffix].each {|inputfilename|
64
+ STDERR.puts "*** Parsing #{inputfilename} with Sleepy"
65
+ corpusfilename = File.basename(inputfilename, @insuffix)
66
+ parsefilename = out_dir + corpusfilename + @outsuffix
67
+ tempfile = Tempfile.new(corpusfilename)
68
+
69
+ # we need neither lemmata nor POS tags; sleepy can do with the words
70
+ corpusfile = FNTabFormatFile.new(inputfilename,nil, nil)
71
+ corpusfile.each_sentence {|sentence|
72
+ tempfile.puts sentence.to_s
73
+ }
74
+ tempfile.close
75
+ # parse and remove comments in the parser output
76
+ Kernel.system(sleepy_prog+" "+tempfile.path+" 2>&1 | grep -v \"Span:\" > "+parsefilename)
77
+ }
78
+ end
79
+
80
+ ###
81
+ # for a given parsed file:
82
+ # yield each sentence as a pair
83
+ # [SalsaTigerSentence object, FNTabFormatSentence object]
84
+ # of the sentence in SalsaTigerXML and the matching tab format sentence
85
+ #
86
+ # If a parse has failed, returns
87
+ # [failed_sentence (flat SalsaTigerSentence), FNTabFormatSentence]
88
+ # to allow more detailed accounting for failed parses
89
+ # (basically just a flat structure with a failed=true attribute
90
+ # at the sentence node)
91
+ def each_sentence(parsefilename)
92
+ # sanity checks
93
+ unless @tab_dir
94
+ $stderr.puts "SleepyInterface error: Need to set tab directory on initialization"
95
+ exit 1
96
+ end
97
+
98
+ # get matching tab file for this parser output file
99
+ parsefile = File.new(parsefilename)
100
+ tabfilename = @tab_dir+File.basename(parsefilename, @outsuffix)+ @insuffix
101
+ tabfile = FNTabFormatFile.new(tabfilename, @postag_suffix, @lemma_suffix)
102
+
103
+ sentid = 0
104
+
105
+ tabfile.each_sentence {|tab_sent| # iterate over corpus sentences
106
+
107
+ sentence_str = ""
108
+ status = true # error encountered?
109
+
110
+ # assemble next sentence in Sleepy file by reading lines from parsefile
111
+ while true
112
+ line = parsefile.gets
113
+ case line
114
+ when /% Parse failed/
115
+ status = false
116
+ break
117
+ when nil # end of file: nothing more to break
118
+ break
119
+ when /^%/, /^\s*$/ # empty lines, other comments: end of current sentence
120
+ unless sentence_str == "" # only break if you have read something
121
+ break
122
+ end
123
+ else
124
+ sentence_str += line.chomp # collect line of current parse and continue reading
125
+ end
126
+ end
127
+
128
+ # we have reached some kind of end
129
+ sentid +=1
130
+
131
+ # we don't have a sentence: hopefully, this is becase parsing has failed
132
+ # if this is not the case, we are in trouble
133
+ if sentence_str == ""
134
+ case status
135
+
136
+ when false
137
+ # return a SalsaTigerSentence object for the failed sentence
138
+ # with a virtual top node and one terminal per word.
139
+ if tab_sent.get_sent_id() and tab_sent.get_sent_id() != "--"
140
+ my_sent_id = tab_sent.get_sent_id()
141
+ else
142
+ my_sent_id = File.basename(parsefilename, @outsuffix) + "_" + sentid.to_s
143
+ end
144
+ sent = SleepyInterface.failed_sentence(tab_sent, my_sent_id)
145
+ yield [sent, tab_sent, SleepyInterface.standard_mapping(sent, tab_sent)]
146
+
147
+ else
148
+ # this may not happen: we need some sentence for the current
149
+ # TabFile sentence
150
+ $stderr.puts "SleepyInterface error: premature end of parser file!"
151
+ exit 1
152
+ end
153
+ else
154
+ # if we are here, we have a sentence_str to work on
155
+ # hopefully, our status is OK
156
+ case status
157
+ when true
158
+ if tab_sent.get_sent_id() and tab_sent.get_sent_id() != "--"
159
+ my_sent_id = tab_sent.get_sent_id()
160
+ else
161
+ my_sent_id = File.basename(parsefilename, @outsuffix) + "_" + sentid.to_s
162
+ end
163
+ st_sent = build_salsatiger(" " + sentence_str + " ", 0,
164
+ Array.new, Counter.new(0),
165
+ Counter.new(500),
166
+ SalsaTigerSentence.empty_sentence(my_sent_id.to_s))
167
+ yield [st_sent, tab_sent, SleepyInterface.standard_mapping(st_sent, tab_sent)]
168
+
169
+ else # i.e. when "failed"
170
+ $stderr.puts "SleepyInterface error: failed parse, but parse tree exists??"
171
+ exit 1
172
+ end
173
+ end
174
+ }
175
+
176
+ # all TabFile sentences are consumed:
177
+ # now we may just encounter comments, garbage, empty lines etc.
178
+
179
+ while not parsefile.eof?
180
+ case parsefile.gets
181
+ when nil, /^%/, /^\s*$/ # empty lines, comments, end of input indicate end of current parse
182
+ else
183
+ $stderr.puts "SleepyInterface error: premature end of tab file"
184
+ exit 1
185
+ end
186
+ end
187
+ end
188
+
189
+
190
+ ###
191
+ # write Salsa/TIGER XML output to file
192
+ def to_stxml_file(infilename, # string: name of parse file
193
+ outfilename) # string: name of output stxml file
194
+
195
+ outfile = File.new(outfilename, "w")
196
+ outfile.puts SalsaTigerXMLHelper.get_header()
197
+ each_sentence(infilename) { |st_sent, tabsent|
198
+ outfile.puts st_sent.get()
199
+ }
200
+ outfile.puts SalsaTigerXMLHelper.get_footer()
201
+ outfile.close()
202
+ end
203
+
204
+
205
+
206
+ ########################
207
+ private
208
+
209
+ ###
210
+ # Recursive function for parsing a Sleepy parse tree and
211
+ # building a SalsaTigerSentence recursively
212
+ #
213
+ # Algorithm: manage stack which contains, for the current constituent,
214
+ # child constituents (if a nonterminal), and the category label.
215
+ # When the end of a constituent is reached, a new SynNode (TigerSalsa node) ist created.
216
+ # All children and the category label are popped from the stack and integrated into the
217
+ # TigerSalsa data structure. The new node is re-pushed onto the stack.
218
+ def build_salsatiger(sentence, # string
219
+ pos, # position in string (index): integer
220
+ stack, # stack with incomplete nodes: Array
221
+ termc, # terminal counter
222
+ nontc, # nonterminal counter
223
+ sent_obj) # SalsaTigerSentence
224
+
225
+
226
+ # main case distinction: match the beginning of our string
227
+ # (i.e. what follows our current position in the string)
228
+
229
+ case sentence[pos..-1]
230
+
231
+ when /^ *$/ # nothing -> whole sentence parsed
232
+ if stack.length == 1
233
+ # sleepy always delivers one "top" node; if we don't get just one
234
+ # node, something has gone wrong
235
+ node = stack.pop
236
+ node.del_attribute("gf")
237
+ return sent_obj
238
+ else
239
+ $stderr.puts "SleepyINterface Error: more than one root node (stack length #{stack.length}). Full sentence: \n#{sentence}"
240
+ exit 1
241
+ end
242
+
243
+ when /^\s*\(([^ )]+) /
244
+ # match the beginning of a new constituent
245
+ # (opening bracket + category + space, may not contain closing bracket)
246
+ cat = $1
247
+ if cat.nil? or cat == ""
248
+ $stderr.puts "SleepyInterface Error: found category nil in sentence #{sentence[pos,10]}, full sentence\n#{sentence}"
249
+ exit 1
250
+ end
251
+ # STDERR.puts "new const #{cat}"
252
+ stack.push cat # throw the category label on the stack
253
+ return build_salsatiger(sentence,pos+$&.length,stack,termc,nontc,sent_obj)
254
+
255
+ when /^\s*(\S+)\) /
256
+ # match the end of a terminal constituent (something before a closing bracket + space)
257
+ word = $1
258
+ comb_cat = stack.pop
259
+ if comb_cat.to_s == ""
260
+ $stderr.puts "SleepyInterface error: Empty cat at position #{sentence[pos,10]}, full sentence\n#{sentence}"
261
+ exit 1
262
+ end
263
+ cat,gf = split_cat(comb_cat)
264
+ node = sent_obj.add_syn("t",
265
+ nil, # cat (doesn't matter here)
266
+ SalsaTigerXMLHelper.escape(word), # word
267
+ cat, # pos
268
+ termc.next.to_s)
269
+ node.set_attribute("gf",gf)
270
+ # STDERR.puts "completed terminal #{cat}, #{word}"
271
+ stack.push node
272
+ return build_salsatiger(sentence,pos+$&.length,stack,termc,nontc,sent_obj)
273
+
274
+ when /^\s*\)/ # match the end of a nonterminal (nothing before a closing bracket)
275
+ # now collect children:
276
+ # pop items from the stack until you find the category
277
+ children = Array.new
278
+ while true
279
+ if stack.empty?
280
+ $stderr.puts "SleepyInterface Error: stack empty; cannot find more children"
281
+ exit 1
282
+ end
283
+ item = stack.pop
284
+ case item.class.to_s
285
+ when "SynNode" # this is a child
286
+ children.push item
287
+ when "String" # this is the category label
288
+ if item.to_s == ""
289
+ $stderr.puts "SleepyInterface error: Empty cat at position #{sentence[pos,10]}, full sentence\n#{sentence}"
290
+ exit 1
291
+ end
292
+ cat,gf = split_cat(item)
293
+ break
294
+ else
295
+ $stderr.puts "SleepyInterface Error: unknown item class #{item.class.to_s}"
296
+ exit 1
297
+ end
298
+ end
299
+ # now add a nonterminal node to the sentence object and
300
+ # register the children nodes
301
+ node = sent_obj.add_syn("nt",
302
+ cat, # cat
303
+ nil, # word (doesn't matter)
304
+ nil, # pos (doesn't matter)
305
+ nontc.next.to_s)
306
+ children.each {|child|
307
+ child_gf = child.get_attribute("gf")
308
+ child.del_attribute("gf")
309
+ node.add_child(child,child_gf)
310
+ child.add_parent(node, child_gf)
311
+ }
312
+ node.set_attribute("gf",gf)
313
+ # STDERR.puts "Completed nonterm #{cat}, #{children.length} children."
314
+ stack.push node
315
+ return build_salsatiger(sentence,pos+$&.length, stack,termc,nontc,sent_obj)
316
+ else
317
+
318
+ if sentence =~ /Fatal error: exception Out_of_memory/
319
+ $stderr.puts "SleepyInterface error: Sleepy parser ran out of memory."
320
+ $stderr.puts "Try reducing the max. sentence length"
321
+ $stderr.puts "in the experiment file."
322
+ exit 1
323
+ end
324
+
325
+
326
+ $stderr.puts "SleepyInterface Error: cannot analyse sentence at pos #{pos}:\n #{sentence[pos..-1]}\n Complete sentence: \n#{sentence}"
327
+ exit 1
328
+ end
329
+ end
330
+
331
+ ###
332
+ # Sleepy delivers node labels as "phrase type"-"grammatical function"
333
+ # but the GF may not be present.
334
+
335
+ def split_cat(cat)
336
+
337
+ cat =~ /^([^-]*)(-([^-]*))?$/
338
+ unless $1
339
+ $stderr.puts "SleepyInterface Error: could not identify category in #{cat}"
340
+ exit 1
341
+ end
342
+
343
+ proper_cat = $1
344
+
345
+ if $3
346
+ gf = $3
347
+ else
348
+ gf = ""
349
+ end
350
+
351
+ return [proper_cat,gf]
352
+
353
+ end
354
+ end
355
+
356
+
357
+
358
+ ################################################
359
+ # Interpreter class
360
+ class SleepyInterpreter < Tiger
361
+ SleepyInterpreter.announce_me()
362
+
363
+ ###
364
+ # names of the systems interpreted by this class:
365
+ # returns a hash service(string) -> system name (string),
366
+ # e.g.
367
+ # { "parser" => "collins", "lemmatizer" => "treetagger" }
368
+ def SleepyInterpreter.systems()
369
+ return {
370
+ "parser" => "sleepy"
371
+ }
372
+ end
373
+
374
+ ###
375
+ # names of additional systems that may be interpreted by this class
376
+ # returns a hash service(string) -> system name(string)
377
+ # same as names()
378
+ def SleepyInterpreter.optional_systems()
379
+ return {
380
+ "lemmatizer" => "treetagger"
381
+ }
382
+ end
383
+
384
+ end