frprep 0.0.1.prealpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,99 @@
1
+ # sp jul 05 05
2
+ #
3
+ # Static helper methods for SalsaTigerRegXML:
4
+
5
+ # - provide header and footer for Salsa/Tiger XML files
6
+ # - escape and unescape HTML entities
7
+ #
8
+ # changed KE nov 05:
9
+ # many methods moved to FrprepHelper
10
+
11
+ require "common/SalsaTigerRegXML"
12
+ require "common/headz"
13
+ require "common/Parser"
14
+ require "tempfile"
15
+
16
+ class SalsaTigerXMLHelper
17
+
18
+
19
+ ###
20
+ # get header of SalsaTigerXML files (as string)
21
+ def SalsaTigerXMLHelper.get_header
22
+
23
+ header = <<ENDOFHEADER
24
+ <?xml version="1.0" encoding="UTF-8"?>
25
+ <corpus corpusname="corpus" target="">
26
+ <head>
27
+ <meta>
28
+ <format>
29
+ NeGra format, version 3</format>
30
+ </meta>
31
+ <frames xmlns="http://www.clt-st.de/framenet/frame-database">
32
+ </frames>
33
+ <wordtags xmlns="http://www.clt-st.de/salsa/wordtags">
34
+ </wordtags>
35
+ <flags>
36
+ </flags>
37
+ <annotation>
38
+ <edgelabel>
39
+ </edgelabel>
40
+ <secedgelabel>
41
+ </secedgelabel>
42
+ </annotation>
43
+ </head>
44
+ <body>
45
+ ENDOFHEADER
46
+
47
+ return header
48
+
49
+ end
50
+
51
+ ###
52
+ # get footer of SALSATigerXML files (as string)
53
+ def SalsaTigerXMLHelper.get_footer
54
+
55
+ footer = <<ENDOFFOOTER
56
+ </body>
57
+ </corpus>
58
+ ENDOFFOOTER
59
+
60
+ return footer
61
+ end
62
+
63
+
64
+
65
+ # escape and unescape strings for representation in XML
66
+
67
+ @@replacements = [
68
+ # ["&apos;&apos;","&quot;"], # added by ines (09/03/09), might cause problems for unescape???
69
+ ["&","&amp;"], # must be first for escaping, last for unescaping
70
+ ["<","&lt;"],
71
+ [">", "&gt;"],
72
+ ["\"","&apos;&apos;"],
73
+ # ["\"","&quot;"],
74
+ # ["\'\'","&quot;"],
75
+ # ["\`\`","&quot;"],
76
+ ["\'","&apos;"],
77
+ ["\`\`","&apos;&apos;"],
78
+ # ["''","&apos;&apos;"]
79
+ ]
80
+
81
+
82
+
83
+ def SalsaTigerXMLHelper.escape(string)
84
+ @@replacements.each {|unescaped,escaped|
85
+ string.gsub!(unescaped,escaped)
86
+ }
87
+ return string
88
+ end
89
+
90
+ def SalsaTigerXMLHelper.unescape(string)
91
+ # reverse replacements to replace &amp last
92
+ @@replacements.reverse.each {|unescaped,escaped|
93
+ string.gsub!(escaped,unescaped)
94
+ }
95
+ return string
96
+ end
97
+
98
+
99
+ end
@@ -0,0 +1,384 @@
1
+ ####
2
+ # sp 21 07 05
3
+ #
4
+ # modified ke 30 10 05: adapted to fit into SynInterface
5
+ #
6
+ # represents a file containing Sleepy parses
7
+ #
8
+ # underlying data structure for individual sentences: SalsaTigerSentence
9
+ require "tempfile"
10
+
11
+ require "common/SalsaTigerRegXML"
12
+ require "common/SalsaTigerXMLHelper"
13
+ require "common/TabFormat"
14
+ require "common/Counter"
15
+
16
+ require "common/AbstractSynInterface"
17
+ require "common/Tiger.rb"
18
+
19
+ ################################################
20
+ # Interface class
21
+ class SleepyInterface < SynInterfaceSTXML
22
+ SleepyInterface.announce_me()
23
+
24
+ ###
25
+ def SleepyInterface.system()
26
+ return "sleepy"
27
+ end
28
+
29
+ ###
30
+ def SleepyInterface.service()
31
+ return "parser"
32
+ end
33
+
34
+ ###
35
+ # initialize to set values for all subsequent processing
36
+ def initialize(program_path, # string: path to system
37
+ insuffix, # string: suffix of tab files
38
+ outsuffix, # string: suffix for parsed files
39
+ stsuffix, # string: suffix for Salsa/TIGER XML files
40
+ var_hash = {}) # optional arguments in a hash
41
+
42
+ super(program_path, insuffix, outsuffix, stsuffix, var_hash)
43
+ unless @program_path =~ /\/$/
44
+ @program_path = @program_path + "/"
45
+ end
46
+
47
+ # new: evaluate var hash
48
+ @pos_suffix = var_hash["pos_suffix"]
49
+ @lemma_suffix = var_hash["lemma_suffix"]
50
+ @tab_dir = var_hash["tab_dir"]
51
+ end
52
+
53
+ ####
54
+ # parse a directory with TabFormat files and write the parse trees to outputdir
55
+ # I assume that the files in inputdir are smaller than
56
+ # the maximum number of sentences that
57
+ # Sleepy can parse in one go (i.e. that they are split)
58
+ def process_dir(in_dir, # string: input directory name
59
+ out_dir) # string: output directory name
60
+
61
+ sleepy_prog = "#{@program_path}sleepy --beam 1000 --model-file #{@program_path}negra.model --parse "
62
+
63
+ Dir[in_dir + "*" + @insuffix].each {|inputfilename|
64
+ STDERR.puts "*** Parsing #{inputfilename} with Sleepy"
65
+ corpusfilename = File.basename(inputfilename, @insuffix)
66
+ parsefilename = out_dir + corpusfilename + @outsuffix
67
+ tempfile = Tempfile.new(corpusfilename)
68
+
69
+ # we need neither lemmata nor POS tags; sleepy can do with the words
70
+ corpusfile = FNTabFormatFile.new(inputfilename,nil, nil)
71
+ corpusfile.each_sentence {|sentence|
72
+ tempfile.puts sentence.to_s
73
+ }
74
+ tempfile.close
75
+ # parse and remove comments in the parser output
76
+ Kernel.system(sleepy_prog+" "+tempfile.path+" 2>&1 | grep -v \"Span:\" > "+parsefilename)
77
+ }
78
+ end
79
+
80
+ ###
81
+ # for a given parsed file:
82
+ # yield each sentence as a pair
83
+ # [SalsaTigerSentence object, FNTabFormatSentence object]
84
+ # of the sentence in SalsaTigerXML and the matching tab format sentence
85
+ #
86
+ # If a parse has failed, returns
87
+ # [failed_sentence (flat SalsaTigerSentence), FNTabFormatSentence]
88
+ # to allow more detailed accounting for failed parses
89
+ # (basically just a flat structure with a failed=true attribute
90
+ # at the sentence node)
91
+ def each_sentence(parsefilename)
92
+ # sanity checks
93
+ unless @tab_dir
94
+ $stderr.puts "SleepyInterface error: Need to set tab directory on initialization"
95
+ exit 1
96
+ end
97
+
98
+ # get matching tab file for this parser output file
99
+ parsefile = File.new(parsefilename)
100
+ tabfilename = @tab_dir+File.basename(parsefilename, @outsuffix)+ @insuffix
101
+ tabfile = FNTabFormatFile.new(tabfilename, @postag_suffix, @lemma_suffix)
102
+
103
+ sentid = 0
104
+
105
+ tabfile.each_sentence {|tab_sent| # iterate over corpus sentences
106
+
107
+ sentence_str = ""
108
+ status = true # error encountered?
109
+
110
+ # assemble next sentence in Sleepy file by reading lines from parsefile
111
+ while true
112
+ line = parsefile.gets
113
+ case line
114
+ when /% Parse failed/
115
+ status = false
116
+ break
117
+ when nil # end of file: nothing more to break
118
+ break
119
+ when /^%/, /^\s*$/ # empty lines, other comments: end of current sentence
120
+ unless sentence_str == "" # only break if you have read something
121
+ break
122
+ end
123
+ else
124
+ sentence_str += line.chomp # collect line of current parse and continue reading
125
+ end
126
+ end
127
+
128
+ # we have reached some kind of end
129
+ sentid +=1
130
+
131
+ # we don't have a sentence: hopefully, this is becase parsing has failed
132
+ # if this is not the case, we are in trouble
133
+ if sentence_str == ""
134
+ case status
135
+
136
+ when false
137
+ # return a SalsaTigerSentence object for the failed sentence
138
+ # with a virtual top node and one terminal per word.
139
+ if tab_sent.get_sent_id() and tab_sent.get_sent_id() != "--"
140
+ my_sent_id = tab_sent.get_sent_id()
141
+ else
142
+ my_sent_id = File.basename(parsefilename, @outsuffix) + "_" + sentid.to_s
143
+ end
144
+ sent = SleepyInterface.failed_sentence(tab_sent, my_sent_id)
145
+ yield [sent, tab_sent, SleepyInterface.standard_mapping(sent, tab_sent)]
146
+
147
+ else
148
+ # this may not happen: we need some sentence for the current
149
+ # TabFile sentence
150
+ $stderr.puts "SleepyInterface error: premature end of parser file!"
151
+ exit 1
152
+ end
153
+ else
154
+ # if we are here, we have a sentence_str to work on
155
+ # hopefully, our status is OK
156
+ case status
157
+ when true
158
+ if tab_sent.get_sent_id() and tab_sent.get_sent_id() != "--"
159
+ my_sent_id = tab_sent.get_sent_id()
160
+ else
161
+ my_sent_id = File.basename(parsefilename, @outsuffix) + "_" + sentid.to_s
162
+ end
163
+ st_sent = build_salsatiger(" " + sentence_str + " ", 0,
164
+ Array.new, Counter.new(0),
165
+ Counter.new(500),
166
+ SalsaTigerSentence.empty_sentence(my_sent_id.to_s))
167
+ yield [st_sent, tab_sent, SleepyInterface.standard_mapping(st_sent, tab_sent)]
168
+
169
+ else # i.e. when "failed"
170
+ $stderr.puts "SleepyInterface error: failed parse, but parse tree exists??"
171
+ exit 1
172
+ end
173
+ end
174
+ }
175
+
176
+ # all TabFile sentences are consumed:
177
+ # now we may just encounter comments, garbage, empty lines etc.
178
+
179
+ while not parsefile.eof?
180
+ case parsefile.gets
181
+ when nil, /^%/, /^\s*$/ # empty lines, comments, end of input indicate end of current parse
182
+ else
183
+ $stderr.puts "SleepyInterface error: premature end of tab file"
184
+ exit 1
185
+ end
186
+ end
187
+ end
188
+
189
+
190
+ ###
191
+ # write Salsa/TIGER XML output to file
192
+ def to_stxml_file(infilename, # string: name of parse file
193
+ outfilename) # string: name of output stxml file
194
+
195
+ outfile = File.new(outfilename, "w")
196
+ outfile.puts SalsaTigerXMLHelper.get_header()
197
+ each_sentence(infilename) { |st_sent, tabsent|
198
+ outfile.puts st_sent.get()
199
+ }
200
+ outfile.puts SalsaTigerXMLHelper.get_footer()
201
+ outfile.close()
202
+ end
203
+
204
+
205
+
206
+ ########################
207
+ private
208
+
209
+ ###
210
+ # Recursive function for parsing a Sleepy parse tree and
211
+ # building a SalsaTigerSentence recursively
212
+ #
213
+ # Algorithm: manage stack which contains, for the current constituent,
214
+ # child constituents (if a nonterminal), and the category label.
215
+ # When the end of a constituent is reached, a new SynNode (TigerSalsa node) ist created.
216
+ # All children and the category label are popped from the stack and integrated into the
217
+ # TigerSalsa data structure. The new node is re-pushed onto the stack.
218
+ def build_salsatiger(sentence, # string
219
+ pos, # position in string (index): integer
220
+ stack, # stack with incomplete nodes: Array
221
+ termc, # terminal counter
222
+ nontc, # nonterminal counter
223
+ sent_obj) # SalsaTigerSentence
224
+
225
+
226
+ # main case distinction: match the beginning of our string
227
+ # (i.e. what follows our current position in the string)
228
+
229
+ case sentence[pos..-1]
230
+
231
+ when /^ *$/ # nothing -> whole sentence parsed
232
+ if stack.length == 1
233
+ # sleepy always delivers one "top" node; if we don't get just one
234
+ # node, something has gone wrong
235
+ node = stack.pop
236
+ node.del_attribute("gf")
237
+ return sent_obj
238
+ else
239
+ $stderr.puts "SleepyINterface Error: more than one root node (stack length #{stack.length}). Full sentence: \n#{sentence}"
240
+ exit 1
241
+ end
242
+
243
+ when /^\s*\(([^ )]+) /
244
+ # match the beginning of a new constituent
245
+ # (opening bracket + category + space, may not contain closing bracket)
246
+ cat = $1
247
+ if cat.nil? or cat == ""
248
+ $stderr.puts "SleepyInterface Error: found category nil in sentence #{sentence[pos,10]}, full sentence\n#{sentence}"
249
+ exit 1
250
+ end
251
+ # STDERR.puts "new const #{cat}"
252
+ stack.push cat # throw the category label on the stack
253
+ return build_salsatiger(sentence,pos+$&.length,stack,termc,nontc,sent_obj)
254
+
255
+ when /^\s*(\S+)\) /
256
+ # match the end of a terminal constituent (something before a closing bracket + space)
257
+ word = $1
258
+ comb_cat = stack.pop
259
+ if comb_cat.to_s == ""
260
+ $stderr.puts "SleepyInterface error: Empty cat at position #{sentence[pos,10]}, full sentence\n#{sentence}"
261
+ exit 1
262
+ end
263
+ cat,gf = split_cat(comb_cat)
264
+ node = sent_obj.add_syn("t",
265
+ nil, # cat (doesn't matter here)
266
+ SalsaTigerXMLHelper.escape(word), # word
267
+ cat, # pos
268
+ termc.next.to_s)
269
+ node.set_attribute("gf",gf)
270
+ # STDERR.puts "completed terminal #{cat}, #{word}"
271
+ stack.push node
272
+ return build_salsatiger(sentence,pos+$&.length,stack,termc,nontc,sent_obj)
273
+
274
+ when /^\s*\)/ # match the end of a nonterminal (nothing before a closing bracket)
275
+ # now collect children:
276
+ # pop items from the stack until you find the category
277
+ children = Array.new
278
+ while true
279
+ if stack.empty?
280
+ $stderr.puts "SleepyInterface Error: stack empty; cannot find more children"
281
+ exit 1
282
+ end
283
+ item = stack.pop
284
+ case item.class.to_s
285
+ when "SynNode" # this is a child
286
+ children.push item
287
+ when "String" # this is the category label
288
+ if item.to_s == ""
289
+ $stderr.puts "SleepyInterface error: Empty cat at position #{sentence[pos,10]}, full sentence\n#{sentence}"
290
+ exit 1
291
+ end
292
+ cat,gf = split_cat(item)
293
+ break
294
+ else
295
+ $stderr.puts "SleepyInterface Error: unknown item class #{item.class.to_s}"
296
+ exit 1
297
+ end
298
+ end
299
+ # now add a nonterminal node to the sentence object and
300
+ # register the children nodes
301
+ node = sent_obj.add_syn("nt",
302
+ cat, # cat
303
+ nil, # word (doesn't matter)
304
+ nil, # pos (doesn't matter)
305
+ nontc.next.to_s)
306
+ children.each {|child|
307
+ child_gf = child.get_attribute("gf")
308
+ child.del_attribute("gf")
309
+ node.add_child(child,child_gf)
310
+ child.add_parent(node, child_gf)
311
+ }
312
+ node.set_attribute("gf",gf)
313
+ # STDERR.puts "Completed nonterm #{cat}, #{children.length} children."
314
+ stack.push node
315
+ return build_salsatiger(sentence,pos+$&.length, stack,termc,nontc,sent_obj)
316
+ else
317
+
318
+ if sentence =~ /Fatal error: exception Out_of_memory/
319
+ $stderr.puts "SleepyInterface error: Sleepy parser ran out of memory."
320
+ $stderr.puts "Try reducing the max. sentence length"
321
+ $stderr.puts "in the experiment file."
322
+ exit 1
323
+ end
324
+
325
+
326
+ $stderr.puts "SleepyInterface Error: cannot analyse sentence at pos #{pos}:\n #{sentence[pos..-1]}\n Complete sentence: \n#{sentence}"
327
+ exit 1
328
+ end
329
+ end
330
+
331
+ ###
332
+ # Sleepy delivers node labels as "phrase type"-"grammatical function"
333
+ # but the GF may not be present.
334
+
335
+ def split_cat(cat)
336
+
337
+ cat =~ /^([^-]*)(-([^-]*))?$/
338
+ unless $1
339
+ $stderr.puts "SleepyInterface Error: could not identify category in #{cat}"
340
+ exit 1
341
+ end
342
+
343
+ proper_cat = $1
344
+
345
+ if $3
346
+ gf = $3
347
+ else
348
+ gf = ""
349
+ end
350
+
351
+ return [proper_cat,gf]
352
+
353
+ end
354
+ end
355
+
356
+
357
+
358
+ ################################################
359
+ # Interpreter class
360
+ class SleepyInterpreter < Tiger
361
+ SleepyInterpreter.announce_me()
362
+
363
+ ###
364
+ # names of the systems interpreted by this class:
365
+ # returns a hash service(string) -> system name (string),
366
+ # e.g.
367
+ # { "parser" => "collins", "lemmatizer" => "treetagger" }
368
+ def SleepyInterpreter.systems()
369
+ return {
370
+ "parser" => "sleepy"
371
+ }
372
+ end
373
+
374
+ ###
375
+ # names of additional systems that may be interpreted by this class
376
+ # returns a hash service(string) -> system name(string)
377
+ # same as names()
378
+ def SleepyInterpreter.optional_systems()
379
+ return {
380
+ "lemmatizer" => "treetagger"
381
+ }
382
+ end
383
+
384
+ end