frprep 0.0.1.prealpha
Sign up to get free protection for your applications and to get access to all the features.
- data/.yardopts +8 -0
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/AbstractSynInterface.rb +1227 -0
- data/lib/common/BerkeleyInterface.rb +375 -0
- data/lib/common/CollinsInterface.rb +1165 -0
- data/lib/common/ConfigData.rb +694 -0
- data/lib/common/Counter.rb +18 -0
- data/lib/common/DBInterface.rb +48 -0
- data/lib/common/EnduserMode.rb +27 -0
- data/lib/common/Eval.rb +480 -0
- data/lib/common/FixSynSemMapping.rb +196 -0
- data/lib/common/FrPrepConfigData.rb +66 -0
- data/lib/common/FrprepHelper.rb +1324 -0
- data/lib/common/Graph.rb +345 -0
- data/lib/common/ISO-8859-1.rb +24 -0
- data/lib/common/ML.rb +186 -0
- data/lib/common/Maxent.rb +215 -0
- data/lib/common/MiniparInterface.rb +1388 -0
- data/lib/common/Optimise.rb +195 -0
- data/lib/common/Parser.rb +213 -0
- data/lib/common/RegXML.rb +269 -0
- data/lib/common/RosyConventions.rb +171 -0
- data/lib/common/SQLQuery.rb +243 -0
- data/lib/common/STXmlTerminalOrder.rb +194 -0
- data/lib/common/SalsaTigerRegXML.rb +2347 -0
- data/lib/common/SalsaTigerXMLHelper.rb +99 -0
- data/lib/common/SleepyInterface.rb +384 -0
- data/lib/common/SynInterfaces.rb +275 -0
- data/lib/common/TabFormat.rb +720 -0
- data/lib/common/Tiger.rb +1448 -0
- data/lib/common/TntInterface.rb +44 -0
- data/lib/common/Tree.rb +61 -0
- data/lib/common/TreetaggerInterface.rb +303 -0
- data/lib/common/headz.rb +338 -0
- data/lib/common/option_parser.rb +13 -0
- data/lib/common/ruby_class_extensions.rb +310 -0
- data/lib/fred/Baseline.rb +150 -0
- data/lib/fred/FileZipped.rb +31 -0
- data/lib/fred/FredBOWContext.rb +863 -0
- data/lib/fred/FredConfigData.rb +182 -0
- data/lib/fred/FredConventions.rb +232 -0
- data/lib/fred/FredDetermineTargets.rb +324 -0
- data/lib/fred/FredEval.rb +312 -0
- data/lib/fred/FredFeatureExtractors.rb +321 -0
- data/lib/fred/FredFeatures.rb +1061 -0
- data/lib/fred/FredFeaturize.rb +596 -0
- data/lib/fred/FredNumTrainingSenses.rb +27 -0
- data/lib/fred/FredParameters.rb +402 -0
- data/lib/fred/FredSplit.rb +84 -0
- data/lib/fred/FredSplitPkg.rb +180 -0
- data/lib/fred/FredTest.rb +607 -0
- data/lib/fred/FredTrain.rb +144 -0
- data/lib/fred/PlotAndREval.rb +480 -0
- data/lib/fred/fred.rb +45 -0
- data/lib/fred/md5.rb +23 -0
- data/lib/fred/opt_parser.rb +250 -0
- data/lib/frprep/AbstractSynInterface.rb +1227 -0
- data/lib/frprep/Ampersand.rb +37 -0
- data/lib/frprep/BerkeleyInterface.rb +375 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/ConfigData.rb +694 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FixSynSemMapping.rb +196 -0
- data/lib/frprep/FrPrepConfigData.rb +66 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/FrprepHelper.rb +1324 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/ISO-8859-1.rb +24 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/Parser.rb +213 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
- data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/SynInterfaces.rb +275 -0
- data/lib/frprep/TabFormat.rb +720 -0
- data/lib/frprep/Tiger.rb +1448 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/Tree.rb +61 -0
- data/lib/frprep/TreetaggerInterface.rb +303 -0
- data/lib/frprep/do_parses.rb +142 -0
- data/lib/frprep/frprep.rb +686 -0
- data/lib/frprep/headz.rb +338 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
- data/lib/rosy/DBMySQL.rb +146 -0
- data/lib/rosy/DBSQLite.rb +280 -0
- data/lib/rosy/DBTable.rb +239 -0
- data/lib/rosy/DBWrapper.rb +176 -0
- data/lib/rosy/ExternalConfigData.rb +58 -0
- data/lib/rosy/FailedParses.rb +130 -0
- data/lib/rosy/FeatureInfo.rb +242 -0
- data/lib/rosy/GfInduce.rb +1115 -0
- data/lib/rosy/GfInduceFeature.rb +148 -0
- data/lib/rosy/InputData.rb +294 -0
- data/lib/rosy/RosyConfigData.rb +115 -0
- data/lib/rosy/RosyConfusability.rb +338 -0
- data/lib/rosy/RosyEval.rb +465 -0
- data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
- data/lib/rosy/RosyFeaturize.rb +280 -0
- data/lib/rosy/RosyInspect.rb +336 -0
- data/lib/rosy/RosyIterator.rb +477 -0
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
- data/lib/rosy/RosyPruning.rb +165 -0
- data/lib/rosy/RosyServices.rb +744 -0
- data/lib/rosy/RosySplit.rb +232 -0
- data/lib/rosy/RosyTask.rb +19 -0
- data/lib/rosy/RosyTest.rb +826 -0
- data/lib/rosy/RosyTrain.rb +232 -0
- data/lib/rosy/RosyTrainingTestTable.rb +786 -0
- data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
- data/lib/rosy/View.rb +418 -0
- data/lib/rosy/opt_parser.rb +379 -0
- data/lib/rosy/rosy.rb +77 -0
- data/lib/shalmaneser/version.rb +3 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +40 -0
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +52 -0
- data/test/functional/test_rosy.rb +20 -0
- metadata +270 -0
@@ -0,0 +1,275 @@
|
|
1
|
+
# SynInterfaces.rb
|
2
|
+
#
|
3
|
+
# ke oct/nov 2005
|
4
|
+
#
|
5
|
+
# Store all known interfaces to
|
6
|
+
# systems that do syntactic analysis
|
7
|
+
#
|
8
|
+
# Given the name of a system and the service that the
|
9
|
+
# system performs, return the appropriate interface
|
10
|
+
#
|
11
|
+
# There are two types of interfaces to syntactic analysis systems:
|
12
|
+
# - interfaces:
|
13
|
+
# offer methods for syntactic analysis,
|
14
|
+
# and the transformation to Salsa/Tiger XML and SalsaTigerSentence objects
|
15
|
+
# - interpreters:
|
16
|
+
# interpret the resulting Salsa/Tiger XML (represented as
|
17
|
+
# SalsaTigerSentence and SynNode objects), e.g.
|
18
|
+
# generalize over part of speech;
|
19
|
+
# describe the path between a pair of nodes both as a path
|
20
|
+
# and (potentially) as a grammatical function of one of the nodes;
|
21
|
+
# determine whether a node describes a verb, and in which voice;
|
22
|
+
# determine the head of a constituent
|
23
|
+
#
|
24
|
+
# Abstract classes for both interfaces and interpreters
|
25
|
+
# are in AbstractSynInterface.rb
|
26
|
+
|
27
|
+
require "frprep/ruby_class_extensions"
|
28
|
+
class Array
|
29
|
+
include EnumerableBool
|
30
|
+
end
|
31
|
+
|
32
|
+
# The list of available interface packages
|
33
|
+
# is at the end of this file.
|
34
|
+
# Please enter additional interfaces there.
|
35
|
+
|
36
|
+
class SynInterfaces
|
37
|
+
|
38
|
+
###
|
39
|
+
# class variable:
|
40
|
+
# list of all known interface classes
|
41
|
+
# add to it using add_interface()
|
42
|
+
@@interfaces = Array.new
|
43
|
+
|
44
|
+
###
|
45
|
+
# class variable:
|
46
|
+
# list of all known interpreter classes
|
47
|
+
# add to it using add_interpreter()
|
48
|
+
@@interpreters = Array.new
|
49
|
+
|
50
|
+
###
|
51
|
+
# add interface/interpreter
|
52
|
+
def SynInterfaces.add_interface(class_name)
|
53
|
+
$stderr.puts "Initializing interface #{class_name}" if $DEBUG
|
54
|
+
@@interfaces << class_name
|
55
|
+
end
|
56
|
+
|
57
|
+
def SynInterfaces.add_interpreter(class_name)
|
58
|
+
$stderr.puts "Initializing interpreter #{class_name}" if $DEBUG
|
59
|
+
@@interpreters << class_name
|
60
|
+
end
|
61
|
+
|
62
|
+
# AB: fake method to preview the interfaces table.
|
63
|
+
def SynInterfaces.explore
|
64
|
+
$stderr.puts "Exploring..."
|
65
|
+
$stderr.puts @@interfaces
|
66
|
+
$stderr.puts @@interpreters
|
67
|
+
end
|
68
|
+
###
|
69
|
+
# check_interfaces_abort_if_missing:
|
70
|
+
#
|
71
|
+
# Given an experiment file, use some_system_missing? to
|
72
|
+
# determine whether the system can be run with the requested
|
73
|
+
# syntactic processing, exit with an error message if that is not possible
|
74
|
+
def SynInterfaces.check_interfaces_abort_if_missing(exp) #FrPrepConfigData object
|
75
|
+
if (missing = SynInterfaces.some_system_missing?(exp))
|
76
|
+
interwhat, services = missing
|
77
|
+
|
78
|
+
$stderr.puts
|
79
|
+
$stderr.puts "ERROR: I am missing an #{interwhat} for "
|
80
|
+
services.each_pair { |service, system_name|
|
81
|
+
$stderr.puts "\tservice #{service}, system #{system_name}"
|
82
|
+
}
|
83
|
+
$stderr.puts
|
84
|
+
$stderr.puts "I have the following interfaces:"
|
85
|
+
@@interfaces.each { |interface_class|
|
86
|
+
$stderr.puts "\tservice #{interface_class.service}, system #{interface_class.system}"
|
87
|
+
}
|
88
|
+
$stderr.puts "I have the following interpreters:"
|
89
|
+
@@interpreters.each { |interpreter_class|
|
90
|
+
$stderr.print "\t"
|
91
|
+
$stderr.print interpreter_class.systems.to_a.map { |service, system_name|
|
92
|
+
"service #{service}, system #{system_name}"
|
93
|
+
}.join("; ")
|
94
|
+
unless interpreter_class.optional_systems.empty?
|
95
|
+
$stderr.print ", optional: "
|
96
|
+
$stderr.print interpreter_class.optional_systems.to_a.map { |service, system_name|
|
97
|
+
"service #{service}, system #{system_name}"
|
98
|
+
}.join("; ")
|
99
|
+
end
|
100
|
+
$stderr.puts
|
101
|
+
}
|
102
|
+
$stderr.puts
|
103
|
+
$stderr.puts "Please adapt your experiment file."
|
104
|
+
exit 1
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
###
|
109
|
+
# some_system_missing?
|
110
|
+
# returns nil if I have interfaces and interpreters
|
111
|
+
# for all services requested in the given experiment file
|
112
|
+
# else:
|
113
|
+
# returns pair [interface or interpreter, info]
|
114
|
+
# where the 1st element is either 'interface' or 'interpreter',
|
115
|
+
# and the 2nd element is a hash mapping services to system names:
|
116
|
+
# the services that could not be provided
|
117
|
+
def SynInterfaces.some_system_missing?(exp) # FrPrepConfigData object
|
118
|
+
|
119
|
+
services = SynInterfaces.requested_services(exp)
|
120
|
+
|
121
|
+
# check interfaces
|
122
|
+
services.each_pair { |service, system_name|
|
123
|
+
unless SynInterfaces.get_interface(service, system_name)
|
124
|
+
return ["interface", {service => system_name} ]
|
125
|
+
end
|
126
|
+
}
|
127
|
+
|
128
|
+
# check interpreter
|
129
|
+
unless SynInterfaces.get_interpreter_according_to_exp(exp)
|
130
|
+
return ["interpreter", services]
|
131
|
+
end
|
132
|
+
|
133
|
+
# everything okay
|
134
|
+
return nil
|
135
|
+
end
|
136
|
+
|
137
|
+
###
|
138
|
+
# given the name of a system and the service that it
|
139
|
+
# performs, find the matching interface class
|
140
|
+
#
|
141
|
+
# system: string: name of system, e.g. collins
|
142
|
+
# service: string: service, e.g. parser
|
143
|
+
#
|
144
|
+
# returns: SynInterface class
|
145
|
+
def SynInterfaces.get_interface(service,
|
146
|
+
system)
|
147
|
+
|
148
|
+
# try to find an interface class with the given
|
149
|
+
# name and service
|
150
|
+
@@interfaces.each { |interface_class|
|
151
|
+
if interface_class.system == system and
|
152
|
+
interface_class.service == service
|
153
|
+
return interface_class
|
154
|
+
end
|
155
|
+
}
|
156
|
+
|
157
|
+
# at this point, detection of a suitable interface class has failed
|
158
|
+
return nil
|
159
|
+
end
|
160
|
+
|
161
|
+
###
|
162
|
+
# helper for get_interpreter:
|
163
|
+
def SynInterfaces.get_interpreter_according_to_exp(exp)
|
164
|
+
return SynInterfaces.get_interpreter(SynInterfaces.requested_services(exp))
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
###
|
170
|
+
# given the names and services of a set of systems,
|
171
|
+
# find the matching interpreter class
|
172
|
+
#
|
173
|
+
# an interpreter class has both obligatory systems
|
174
|
+
# (they need to be present for this class to apply)
|
175
|
+
# and optional systems (they may or may not be present
|
176
|
+
# for the class to apply, but no other system performing
|
177
|
+
# the same service may)
|
178
|
+
#
|
179
|
+
# systems:
|
180
|
+
# hash: service(string) -> system name(string)
|
181
|
+
#
|
182
|
+
# returns: SynInterpreter class
|
183
|
+
def SynInterfaces.get_interpreter(systems)
|
184
|
+
# try to find an interface class with the given
|
185
|
+
# service-name pairs
|
186
|
+
|
187
|
+
@@interpreters.each { |interpreter_class|
|
188
|
+
|
189
|
+
if interpreter_class.systems.to_a.big_and { |service, system|
|
190
|
+
# all obligatory entries of interpreter_class
|
191
|
+
# are in systems
|
192
|
+
systems[service] == system
|
193
|
+
} and
|
194
|
+
interpreter_class.optional_systems.to_a.big_and { |service, system|
|
195
|
+
# all optional entries of interpreter_class are
|
196
|
+
# either in systems, or the service isn't in systems at all
|
197
|
+
systems[service].nil? or systems[service] == system
|
198
|
+
} and
|
199
|
+
systems.to_a.big_and { |service, system|
|
200
|
+
# all entries in names are in either
|
201
|
+
# the obligatory or optional set for interpreter_class
|
202
|
+
interpreter_class.systems[service] == system or
|
203
|
+
interpreter_class.optional_systems[service] == system
|
204
|
+
}
|
205
|
+
return interpreter_class
|
206
|
+
end
|
207
|
+
}
|
208
|
+
|
209
|
+
# at this point, detection of a suitable interpreter class has failed
|
210
|
+
return nil
|
211
|
+
end
|
212
|
+
|
213
|
+
################
|
214
|
+
protected
|
215
|
+
|
216
|
+
###
|
217
|
+
# knows about possible services that can be set in
|
218
|
+
# the experiment file, and where the names of
|
219
|
+
# the matching systems will be found in the experiment file data structure
|
220
|
+
#
|
221
|
+
# WARNING: adapt this when you introduce new services!
|
222
|
+
#
|
223
|
+
# returns: a hash
|
224
|
+
# <service> => system_name
|
225
|
+
#
|
226
|
+
# such that for each service/system name pair:
|
227
|
+
# the service with the given name has been requested in
|
228
|
+
# the experiment file, and the names of the systems to be used
|
229
|
+
# for performing the service
|
230
|
+
def SynInterfaces.requested_services(exp)
|
231
|
+
retv = Hash.new
|
232
|
+
|
233
|
+
[
|
234
|
+
{ "flag" => "do_postag", "service"=> "pos_tagger"},
|
235
|
+
{ "flag" => "do_lemmatize", "service"=> "lemmatizer"},
|
236
|
+
{ "flag" => "do_parse", "service" => "parser" }
|
237
|
+
].each { |hash|
|
238
|
+
if exp.get(hash["flag"]) # yes, perform this service
|
239
|
+
retv[hash["service"]] = exp.get(hash["service"])
|
240
|
+
end
|
241
|
+
}
|
242
|
+
|
243
|
+
return retv
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
|
248
|
+
require "frprep/CollinsInterface"
|
249
|
+
require "frprep/BerkeleyInterface"
|
250
|
+
require "frprep/SleepyInterface"
|
251
|
+
require "frprep/MiniparInterface"
|
252
|
+
require "frprep/TntInterface"
|
253
|
+
require "frprep/TreetaggerInterface"
|
254
|
+
|
255
|
+
|
256
|
+
class EmptyInterpreter < SynInterpreter
|
257
|
+
EmptyInterpreter.announce_me()
|
258
|
+
|
259
|
+
###
|
260
|
+
# systems interpreted by this class:
|
261
|
+
# returns a hash service(string) -> system name (string),
|
262
|
+
# e.g.
|
263
|
+
# { "parser" => "collins", "lemmatizer" => "treetagger" }
|
264
|
+
def EmptyInterpreter.systems()
|
265
|
+
return {}
|
266
|
+
end
|
267
|
+
|
268
|
+
###
|
269
|
+
# names of additional systems that may be interpreted by this class
|
270
|
+
# returns a hash service(string) -> system name(string)
|
271
|
+
# same as names()
|
272
|
+
def SynInterpreter.optional_systems()
|
273
|
+
return {}
|
274
|
+
end
|
275
|
+
end
|
@@ -0,0 +1,720 @@
|
|
1
|
+
# TabFormat.rb
|
2
|
+
# Katrin Erk, Jan 2004
|
3
|
+
#
|
4
|
+
# classes to be used with tabular format text files.
|
5
|
+
# originally CoNLL2.rb
|
6
|
+
# Original: Katrin Erk, Jan 2004 for CoNLL '04 data
|
7
|
+
# Rewrite: Sebastian Pado, Mar 2004 for Gemmas FrameNet data (no NEs etc.)
|
8
|
+
|
9
|
+
# Extensions SP Jun/Jul 04
|
10
|
+
# renamed GemmaCorpus to FNTabFormat
|
11
|
+
|
12
|
+
# partial rewrite SP 250804: made things cleaner & leaner: no RawFormat, for example
|
13
|
+
|
14
|
+
# sp 04/05: add a "frame" column to FNTabFormat
|
15
|
+
#
|
16
|
+
# Substantial changes KE 12/06:
|
17
|
+
# variable number of columns to accommodate more than one frame per sentence
|
18
|
+
|
19
|
+
#################################################
|
20
|
+
# class for reading a file
|
21
|
+
# containing data in tabular
|
22
|
+
|
23
|
+
require "tempfile"
|
24
|
+
|
25
|
+
require "frprep/ISO-8859-1"
|
26
|
+
require "frprep/ruby_class_extensions"
|
27
|
+
|
28
|
+
#######################
|
29
|
+
# This function takes a variable number of arguments and
|
30
|
+
# returns them as an array
|
31
|
+
# Idea: make formulation of tab format entries easier to read,
|
32
|
+
# enclose variable arguments in a repeat() call,
|
33
|
+
# which immediately gets transformed into a list
|
34
|
+
def repeat(*args)
|
35
|
+
return args
|
36
|
+
end
|
37
|
+
|
38
|
+
#######################
|
39
|
+
class TabFormatFile
|
40
|
+
|
41
|
+
|
42
|
+
#######
|
43
|
+
# initialize:
|
44
|
+
# open files for reading.
|
45
|
+
#
|
46
|
+
# fp is a list of pairs [filename, format]
|
47
|
+
# where format is a list of strings that will be used
|
48
|
+
# to address columns of the file, the 1st string for the 1st column
|
49
|
+
#
|
50
|
+
# format may contain _one_ entry that is an array (or a call to repeat())
|
51
|
+
# e.g.:
|
52
|
+
# ["word", "pos", "lemma", repeat("frame", "target", "gf", "pt")]
|
53
|
+
def initialize(fp)
|
54
|
+
# open files
|
55
|
+
@files = Array.new
|
56
|
+
@patterns = Array.new
|
57
|
+
@no_of_read_lines = 0
|
58
|
+
fp.each_index { |ix|
|
59
|
+
if ix.modulo(2) == 0
|
60
|
+
# filename
|
61
|
+
begin
|
62
|
+
@files << File.new(fp[ix])
|
63
|
+
rescue
|
64
|
+
raise 'Sorry, could not read input file ' + fp[ix] + "\n"
|
65
|
+
end
|
66
|
+
else
|
67
|
+
# pattern
|
68
|
+
@patterns += fp[ix]
|
69
|
+
end
|
70
|
+
}
|
71
|
+
|
72
|
+
@my_sentence_class = TabFormatSentence
|
73
|
+
end
|
74
|
+
|
75
|
+
########
|
76
|
+
# each_sentence:
|
77
|
+
# yield each sentence of the files in turn.
|
78
|
+
# sentences are expected to be separated
|
79
|
+
# by a line containing nothing but whitespace.
|
80
|
+
# the last sentence may or may not be followed by
|
81
|
+
# an empty line.
|
82
|
+
# each_sentence ends when EOF is encountered on the first file.
|
83
|
+
# it expects all the other files to be the same length
|
84
|
+
# (in terms of number of lines) as the first file.
|
85
|
+
# each sentence is returned in the form of an
|
86
|
+
# array of TabFormatSentence sentences.
|
87
|
+
|
88
|
+
def each_sentence
|
89
|
+
unless @read_completely
|
90
|
+
sentence = @my_sentence_class.new(@patterns)
|
91
|
+
begin
|
92
|
+
lines = Array.new
|
93
|
+
while true do
|
94
|
+
line = ""
|
95
|
+
linearray = Array.new
|
96
|
+
@files.each {|f|
|
97
|
+
linearray << f.readline().chomp()
|
98
|
+
}
|
99
|
+
#STDERR.puts linearray
|
100
|
+
@no_of_read_lines += 1
|
101
|
+
if linearray.detect{|x| x.strip == ""}
|
102
|
+
if linearray.detect {|x| x.strip != ""}
|
103
|
+
STDERR.puts "Error: Mismatching empty lines!"
|
104
|
+
exit(1)
|
105
|
+
else
|
106
|
+
# sentence finished. yield it and start a new one
|
107
|
+
unless sentence.empty?
|
108
|
+
yield sentence
|
109
|
+
end
|
110
|
+
sentence = @my_sentence_class.new(@patterns)
|
111
|
+
end
|
112
|
+
# read an empty line in each of the other files
|
113
|
+
|
114
|
+
else
|
115
|
+
# sentence not yet finished.
|
116
|
+
# add this line to it
|
117
|
+
sentence.add_line(linearray.join("\t"))
|
118
|
+
end
|
119
|
+
end
|
120
|
+
rescue EOFError
|
121
|
+
unless sentence.empty?
|
122
|
+
# maybe we haven't yielded the last sentence yet.
|
123
|
+
yield sentence
|
124
|
+
end
|
125
|
+
@read_completely = true
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
131
|
+
|
132
|
+
#################################################
|
133
|
+
# class for keeping one line,
|
134
|
+
# parsed.
|
135
|
+
# The line is kept as follows:
|
136
|
+
# - normal features: in a hash @f mapping feature names to values
|
137
|
+
# - features of the repeated group: in an array @r of
|
138
|
+
# TabFormatNamedArgs objects, one per group
|
139
|
+
#
|
140
|
+
# each feature of the line is available by name
|
141
|
+
# via the method "get".
|
142
|
+
# Additional features (from other input files) can be
|
143
|
+
# added to the TabFormatNamedArgs object via the method
|
144
|
+
# add_feature
|
145
|
+
#
|
146
|
+
# methods:
|
147
|
+
#
|
148
|
+
# new: initialize.
|
149
|
+
# values: array of strings
|
150
|
+
# features: how to access the strings by name
|
151
|
+
# 'features' is an array of strings
|
152
|
+
# later the i-th feature will be used to access
|
153
|
+
# the i-th value,
|
154
|
+
# except for repeated groups
|
155
|
+
#
|
156
|
+
# get: returns one feature by its name
|
157
|
+
# name: a string
|
158
|
+
#
|
159
|
+
# add_feature: add another feature to this object,
|
160
|
+
# which can be accessed via "get"
|
161
|
+
# name: name for the new feature, should be distinct
|
162
|
+
# from the ones already used in new()
|
163
|
+
# feature: a string, the value of the feature
|
164
|
+
##
|
165
|
+
|
166
|
+
class TabFormatNamedArgs
|
167
|
+
############
|
168
|
+
def initialize(values, features, group = nil)
|
169
|
+
@f = Hash.new
|
170
|
+
@r = Array.new
|
171
|
+
@group = group
|
172
|
+
|
173
|
+
# record the feature names, give special attention to a group
|
174
|
+
# if we have one
|
175
|
+
@group_feature_names = nil
|
176
|
+
@feature_names = features.map { |feature|
|
177
|
+
if feature.instance_of? Array
|
178
|
+
# found a group
|
179
|
+
@group_feature_names = feature
|
180
|
+
"GROUP"
|
181
|
+
else
|
182
|
+
feature
|
183
|
+
end
|
184
|
+
}
|
185
|
+
|
186
|
+
if @feature_names.count("GROUP") > 1
|
187
|
+
$stderr.puts "More than one group in feature set:" + features.join(" ")
|
188
|
+
raise "Cannot handle this."
|
189
|
+
end
|
190
|
+
|
191
|
+
# group_index: position of group in overall feature list
|
192
|
+
group_index = @feature_names.index("GROUP")
|
193
|
+
unless group_index
|
194
|
+
group_index = @feature_names.length()
|
195
|
+
end
|
196
|
+
num_features_after_group = [0,
|
197
|
+
(@feature_names.length() - 1) - group_index].max()
|
198
|
+
index_after_groups = values.length() - num_features_after_group
|
199
|
+
|
200
|
+
|
201
|
+
# features before group: put feature/value pairs in @f hash
|
202
|
+
0.upto(group_index - 1) { |i|
|
203
|
+
@f[features[i]] = values[i]
|
204
|
+
}
|
205
|
+
# group: store each group in @r hash
|
206
|
+
if @group_feature_names
|
207
|
+
# for (group_start = group_index; group_start < index_after_groups;
|
208
|
+
# group_start += @group_feature_names.length())
|
209
|
+
group_no = 0
|
210
|
+
group_index.step(index_after_groups - 1,
|
211
|
+
@group_feature_names.length()) { |group_start|
|
212
|
+
@r << TabFormatNamedArgs.new(values.slice(group_start,
|
213
|
+
@group_feature_names.length()),
|
214
|
+
@group_feature_names,
|
215
|
+
group_no)
|
216
|
+
group_no += 1
|
217
|
+
}
|
218
|
+
end
|
219
|
+
|
220
|
+
# features after group: put feature/value pairs in @f hash
|
221
|
+
feature_index = group_index + 1
|
222
|
+
index_after_groups.upto(values.length() - 1) { |i|
|
223
|
+
@f[features[feature_index]] = values[i]
|
224
|
+
feature_index += 1
|
225
|
+
}
|
226
|
+
end
|
227
|
+
|
228
|
+
############
|
229
|
+
# return feature/value pairs as a tab format line,
|
230
|
+
# order of features as given in the 'features' list
|
231
|
+
# Features not set in the hash: their entry will be "-"
|
232
|
+
#
|
233
|
+
# If the feature list includes a group,
|
234
|
+
# assume zero entries for that group
|
235
|
+
def TabFormatNamedArgs.format_str(hash, # hash: feature -> value
|
236
|
+
features) # feature list, as for new()
|
237
|
+
if features.nil?
|
238
|
+
return ""
|
239
|
+
end
|
240
|
+
|
241
|
+
# sanity check: does the hash contain keys that are not in the feature list?
|
242
|
+
hash.keys().reject { |f| features.include? f }.each { |bad_feature|
|
243
|
+
$stderr.puts "Error: unknown feature #{bad_feature} in format_str: ignoring."
|
244
|
+
}
|
245
|
+
|
246
|
+
return features.select { |f|
|
247
|
+
# remove the group feature, if it's there
|
248
|
+
not(f.instance_of? Array)
|
249
|
+
}.map { |feature|
|
250
|
+
if hash[feature]
|
251
|
+
hash[feature]
|
252
|
+
else
|
253
|
+
"-"
|
254
|
+
end
|
255
|
+
}.join("\t")
|
256
|
+
end
|
257
|
+
|
258
|
+
|
259
|
+
#############
|
260
|
+
def add_feature(name, feature)
|
261
|
+
if @f.has_key? name
|
262
|
+
raise "Trying to add a feature twice: "+name
|
263
|
+
end
|
264
|
+
|
265
|
+
@f[name] = feature
|
266
|
+
end
|
267
|
+
|
268
|
+
#############
|
269
|
+
# get feature value, identified by feature name
|
270
|
+
# return: feature value as string
|
271
|
+
def get(name)
|
272
|
+
if (retv = get_nongroup(name))
|
273
|
+
return retv
|
274
|
+
else
|
275
|
+
return get_from_group(name, @group)
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
#############
|
280
|
+
def set(name, feature)
|
281
|
+
@f[name] = feature
|
282
|
+
end
|
283
|
+
|
284
|
+
#############
|
285
|
+
def num_groups()
|
286
|
+
return @r.length()
|
287
|
+
end
|
288
|
+
|
289
|
+
#############
|
290
|
+
# return line as string, entries connected by tab,
|
291
|
+
# in the order that the entries were in originally
|
292
|
+
def to_s()
|
293
|
+
return @feature_names.map { |feature|
|
294
|
+
case feature
|
295
|
+
when "GROUP"
|
296
|
+
@r.map { |group_obj| group_obj.to_s }.join("\t")
|
297
|
+
else
|
298
|
+
@f[feature]
|
299
|
+
end
|
300
|
+
}.join("\t")
|
301
|
+
end
|
302
|
+
|
303
|
+
protected
|
304
|
+
|
305
|
+
# get feature, non-group
|
306
|
+
# return: feature value (string)
|
307
|
+
def get_nongroup(feature)
|
308
|
+
return @f[feature]
|
309
|
+
end
|
310
|
+
|
311
|
+
# get feature from one of the groups
|
312
|
+
# return: feature value (string)
|
313
|
+
def get_from_group(name, group_no)
|
314
|
+
if not(group_no) or group_no >= @r.length()
|
315
|
+
# no group with that number
|
316
|
+
return nil
|
317
|
+
else
|
318
|
+
return @r[group_no].get_nongroup(name)
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
|
324
|
+
#################################################
|
325
|
+
# class for keeping and yielding one sentence
|
326
|
+
# in tabular format
|
327
|
+
class TabFormatSentence
|
328
|
+
############
|
329
|
+
# initialize:
|
330
|
+
# the sentence will be stored one word (plus additional info
|
331
|
+
# for that word) per line. Each line will be stored in a cell of
|
332
|
+
# the array @lines. the 'initialize' method starts with an empty
|
333
|
+
# array of lines.
|
334
|
+
def initialize(pattern)
|
335
|
+
@lines = Array.new
|
336
|
+
@pattern = pattern
|
337
|
+
|
338
|
+
# this is just for inheritance; FNTabFormatSentence will need this
|
339
|
+
@group_no = nil
|
340
|
+
end
|
341
|
+
|
342
|
+
#####
|
343
|
+
# length: number of words in the sentence
|
344
|
+
def length
|
345
|
+
return @lines.length
|
346
|
+
end
|
347
|
+
|
348
|
+
################3
|
349
|
+
# add_line:
|
350
|
+
# add one entry to the @lines array, i.e. information for one word
|
351
|
+
# of the sentence.
|
352
|
+
def add_line(line)
|
353
|
+
@lines << line
|
354
|
+
end
|
355
|
+
|
356
|
+
###################
|
357
|
+
# empty?:
|
358
|
+
# returns true if there are currently no lines stored in this
|
359
|
+
# TabFormatSentence object
|
360
|
+
# else false
|
361
|
+
def empty?
|
362
|
+
return @lines.empty?
|
363
|
+
end
|
364
|
+
|
365
|
+
######################
|
366
|
+
# empty!:
|
367
|
+
# discards all entries to the @lines array,
|
368
|
+
# i.e. empties this TabFormatSentence object of all
|
369
|
+
# data
|
370
|
+
def empty!
|
371
|
+
@lines.clear
|
372
|
+
end
|
373
|
+
|
374
|
+
#####################
|
375
|
+
# each_line:
|
376
|
+
# yields each line of the sentence
|
377
|
+
# as a string
|
378
|
+
def each_line
|
379
|
+
@lines.each { |l| yield l }
|
380
|
+
end
|
381
|
+
|
382
|
+
######################
|
383
|
+
# each_line_parsed:
|
384
|
+
# yields each line of the sentence
|
385
|
+
# broken up as follows:
|
386
|
+
# the line is expected to contain 6 or more pieces of
|
387
|
+
# information, separated by whitespace.
|
388
|
+
# - the word
|
389
|
+
# - the part of speech info for the word
|
390
|
+
# - syntax for roles (not to be used)
|
391
|
+
# - target (or -)
|
392
|
+
# - gramm. function for roles (not to be used)
|
393
|
+
# - one column with role annotation
|
394
|
+
#
|
395
|
+
# All pieces are yielded as strings, except for the argument columns, which
|
396
|
+
# are yielded as an array of strings.
|
397
|
+
def each_line_parsed
|
398
|
+
lineno = 0
|
399
|
+
f = nil
|
400
|
+
@lines.each { |l|
|
401
|
+
f = TabFormatNamedArgs.new(l.split("\t"), @pattern, @group_no)
|
402
|
+
f.add_feature("lineno", lineno)
|
403
|
+
yield f
|
404
|
+
lineno += 1
|
405
|
+
}
|
406
|
+
end
|
407
|
+
|
408
|
+
###
|
409
|
+
# read_one_line:
|
410
|
+
# return a line of the sentence specified by its number
|
411
|
+
def read_one_line(number)
|
412
|
+
return(@lines[number])
|
413
|
+
end
|
414
|
+
|
415
|
+
###
|
416
|
+
# read_one_line_parsed:
|
417
|
+
# like get_line, but the features in the line are returned
|
418
|
+
# separately,
|
419
|
+
# as in each_line_parsed
|
420
|
+
def read_one_line_parsed(number)
|
421
|
+
if @lines[number].nil?
|
422
|
+
return nil
|
423
|
+
else
|
424
|
+
f = TabFormatNamedArgs.new(@lines[number].split("\t"), @pattern, @group_no)
|
425
|
+
f.add_feature("lineno", number)
|
426
|
+
return f
|
427
|
+
end
|
428
|
+
end
|
429
|
+
|
430
|
+
# set line no of first line of present sentence
|
431
|
+
def set_starting_line(n)
|
432
|
+
raise "Deprecated"
|
433
|
+
end
|
434
|
+
|
435
|
+
# returns line no of first line of present sentence
|
436
|
+
def get_starting_line()
|
437
|
+
raise "Deprecated"
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
########################################################
|
442
|
+
# TabFormat files containing everything that's in the FN lexunit files
|
443
|
+
#
|
444
|
+
# one target per sentence
|
445
|
+
|
446
|
+
class FNTabFormatFile < TabFormatFile
|
447
|
+
|
448
|
+
def initialize(filename,tag_suffix=nil,lemma_suffix=nil)
|
449
|
+
|
450
|
+
corpusname = File.dirname(filename)+"/"+File.basename(filename,".tab")
|
451
|
+
|
452
|
+
filename_label_pairs = [filename,FNTabFormatFile.fntab_format()]
|
453
|
+
if lemma_suffix # raise exception if lemmatisation does not esist
|
454
|
+
filename_label_pairs.concat [corpusname+lemma_suffix,["lemma"]]
|
455
|
+
end
|
456
|
+
if tag_suffix # raise exception if tagging does not exist
|
457
|
+
filename_label_pairs.concat [corpusname+tag_suffix,["pos"]]
|
458
|
+
end
|
459
|
+
super(filename_label_pairs)
|
460
|
+
|
461
|
+
@my_sentence_class = FNTabSentence
|
462
|
+
end
|
463
|
+
|
464
|
+
|
465
|
+
def FNTabFormatFile.fntab_format()
|
466
|
+
# return ["word", "pt", "gf", "role", "target", "frame", "lu_sent_ids"]
|
467
|
+
return [
|
468
|
+
"word",
|
469
|
+
FNTabFormatFile.frametab_format(),
|
470
|
+
"ne", "sent_id"
|
471
|
+
]
|
472
|
+
end
|
473
|
+
|
474
|
+
def FNTabFormatFile.frametab_format()
|
475
|
+
return ["pt", "gf", "role", "target", "frame", "stuff"]
|
476
|
+
end
|
477
|
+
|
478
|
+
##########
|
479
|
+
# given a hash mapping features to values,
|
480
|
+
# format according to fntab_format
|
481
|
+
def FNTabFormatFile.format_str(hash)
|
482
|
+
return TabFormatNamedArgs.format_str(hash, FNTabFormatFile.fntab_format())
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
############################################
|
487
|
+
class FNTabSentence < TabFormatSentence
|
488
|
+
|
489
|
+
####
|
490
|
+
# overwrite this to get a feature from
|
491
|
+
# a group rather than from the main feature list
|
492
|
+
def get_this(l, feature_name)
|
493
|
+
return l.get(feature_name)
|
494
|
+
end
|
495
|
+
|
496
|
+
####
|
497
|
+
def sanity_check()
|
498
|
+
each_line_parsed {|l|
|
499
|
+
if l.get("sent_id").nil?
|
500
|
+
raise "Error: corpus file does not conform to FN format."
|
501
|
+
else
|
502
|
+
return
|
503
|
+
end
|
504
|
+
}
|
505
|
+
end
|
506
|
+
|
507
|
+
####
|
508
|
+
# returns the sentence ID, a string, as set by FrameNet
|
509
|
+
def get_sent_id()
|
510
|
+
sanity_check
|
511
|
+
each_line_parsed {|l|
|
512
|
+
return l.get("sent_id")
|
513
|
+
}
|
514
|
+
end
|
515
|
+
|
516
|
+
####
|
517
|
+
# iterator, yields each frame of the sentence as a FNTabFrame
|
518
|
+
# object. They contain the complete sentence, but provide
|
519
|
+
# access to exactly one frame of that sentence.
|
520
|
+
def each_frame()
|
521
|
+
# how many frames? assume that each line has the same
|
522
|
+
# number of frames
|
523
|
+
num_frames = read_one_line_parsed(0).num_groups()
|
524
|
+
0.upto(num_frames - 1) { |frame_no|
|
525
|
+
frame_obj = FNTabFrame.new(@pattern, frame_no)
|
526
|
+
each_line { |l| frame_obj.add_line(l) }
|
527
|
+
yield frame_obj
|
528
|
+
}
|
529
|
+
end
|
530
|
+
|
531
|
+
####
|
532
|
+
# computes a mapping from word indices to labels on these words
|
533
|
+
#
|
534
|
+
# returns a hash: index_list(array:integer) -> label(string)
|
535
|
+
# An entry il->label means that all the lines whose line
|
536
|
+
# numbers are listed in il are labeled with label.
|
537
|
+
#
|
538
|
+
# Line numbers correspond to words of the sentence. Counting starts at 0.
|
539
|
+
#
|
540
|
+
# By default, "markables" looks for role labels, i.e. labels in the
|
541
|
+
# column "role", but it can also look in another column.
|
542
|
+
# To change the default, give the column name as a parameter.
|
543
|
+
def markables(use_this_column="role")
|
544
|
+
# returns hash of {index list} -> {markup label}
|
545
|
+
|
546
|
+
sanity_check()
|
547
|
+
|
548
|
+
idlist_to_annotation_list = Hash.new
|
549
|
+
|
550
|
+
# add entry for the target word
|
551
|
+
# idlist_to_annotation_list[get_target_indices()] = "target"
|
552
|
+
|
553
|
+
# determine span of each frame element
|
554
|
+
# if we find overlapping FEs, we write a warning to STDERR
|
555
|
+
# ignore the 2nd label and attempt to "close" the 1st label
|
556
|
+
|
557
|
+
ids = Array.new
|
558
|
+
label = nil
|
559
|
+
|
560
|
+
each_line_parsed { |l|
|
561
|
+
|
562
|
+
this_id = get_this(l, "lineno")
|
563
|
+
|
564
|
+
# start of FE?
|
565
|
+
this_col = get_this(l, use_this_column)
|
566
|
+
unless this_col
|
567
|
+
$stderr.puts "nil entry #{use_this_column} in line #{this_id} of sent #{get_sent_id()}. Skipping."
|
568
|
+
next
|
569
|
+
end
|
570
|
+
this_fe_ann = this_col.split(":")
|
571
|
+
|
572
|
+
case this_fe_ann.length
|
573
|
+
when 1 # nothing at all, or a single begin or end
|
574
|
+
markup = this_fe_ann.first
|
575
|
+
if markup == "-" or markup == "--" # no change
|
576
|
+
if label
|
577
|
+
ids << this_id
|
578
|
+
end
|
579
|
+
elsif markup =~ /^B-(\S+)$/
|
580
|
+
if label # are we within a markable right now?
|
581
|
+
$stderr.puts "[TabFormat] Warning: Markable "+$1.to_s+" starts while within markable ", label.to_s
|
582
|
+
$stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
|
583
|
+
else
|
584
|
+
label = $1
|
585
|
+
ids << this_id
|
586
|
+
end
|
587
|
+
elsif markup =~ /^E-(\S+)$/
|
588
|
+
if label == $1 # we close the markable we've opened before
|
589
|
+
ids << this_id
|
590
|
+
# store information
|
591
|
+
idlist_to_annotation_list[ids] = label
|
592
|
+
# reset memory
|
593
|
+
label = nil
|
594
|
+
ids = Array.new
|
595
|
+
else
|
596
|
+
$stderr.puts "[TabFormat] Warning: Markable "+$1.to_s+" closes while within markable "+ label.to_s
|
597
|
+
$stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
|
598
|
+
end
|
599
|
+
else
|
600
|
+
$stderr.puts "[TabFormat] Warning: cannot analyse markup "+markup
|
601
|
+
$stderr.puts "Debug data: Sentence id #{get_sent_id()}"
|
602
|
+
end
|
603
|
+
when 2 # this should be a one-word markable
|
604
|
+
b_markup = this_fe_ann[0]
|
605
|
+
e_markup = this_fe_ann[1]
|
606
|
+
if label
|
607
|
+
$stderr.puts "[TabFormat] Warning: Finding new markable at word #{this_id} while within markable ", label
|
608
|
+
$stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
|
609
|
+
else
|
610
|
+
if b_markup =~ /^B-(\S+)$/
|
611
|
+
b_label = $1
|
612
|
+
if e_markup =~ /^E-(\S+)$/
|
613
|
+
e_label = $1
|
614
|
+
if b_label == e_label
|
615
|
+
idlist_to_annotation_list[[this_id]] = b_label
|
616
|
+
else
|
617
|
+
$stderr.puts "[TabFormat] Warning: Starting markable "+b_label+", closing markable "+e_label
|
618
|
+
$stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
|
619
|
+
end
|
620
|
+
else
|
621
|
+
$stderr.puts "[TabFormat] Warning: Unknown end markup "+e_markup
|
622
|
+
$stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
|
623
|
+
end
|
624
|
+
else
|
625
|
+
$stderr.puts "[TabFormat] Warning: Unknown start markup "+b_markup
|
626
|
+
$stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
|
627
|
+
end
|
628
|
+
end
|
629
|
+
else
|
630
|
+
$stderr.puts "Warning: cannot analyse markup with more than two colon-separated parts like "+this_fee_ann.join(":")
|
631
|
+
$stderr.puts "Debug data: Sentence id #{get_sent_id()}"
|
632
|
+
end
|
633
|
+
}
|
634
|
+
|
635
|
+
unless label.nil?
|
636
|
+
$stderr.puts "[TabFormat] Warning: Markable ", label, " did not end in sentence."
|
637
|
+
$stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
|
638
|
+
end
|
639
|
+
|
640
|
+
return idlist_to_annotation_list
|
641
|
+
end
|
642
|
+
|
643
|
+
#######
|
644
|
+
def to_s
|
645
|
+
sanity_check
|
646
|
+
array = Array.new
|
647
|
+
each_line_parsed {|l|
|
648
|
+
array << l.get("word")
|
649
|
+
}
|
650
|
+
return array.join(" ")
|
651
|
+
end
|
652
|
+
|
653
|
+
end
|
654
|
+
|
655
|
+
class FNTabFrame < FNTabSentence
|
656
|
+
|
657
|
+
############
|
658
|
+
# initialize:
|
659
|
+
# as parent, except that we also get a frame number
|
660
|
+
# such that we can access the features of ``our'' frame
|
661
|
+
def initialize(pattern, frameno)
|
662
|
+
# by setting @group_no to frameno,
|
663
|
+
# we are initializing each TabFormatNamedArgs object
|
664
|
+
# in each_line_parsed() or read_one_line_parsed()
|
665
|
+
# with the right group number,
|
666
|
+
# such that all calls to TabFormatNamedArgs.get()
|
667
|
+
# will access the right group.
|
668
|
+
super(pattern)
|
669
|
+
@group_no = frameno
|
670
|
+
end
|
671
|
+
|
672
|
+
|
673
|
+
# returns the frame introduced by the target word(s)
|
674
|
+
# of this frame group, a string
|
675
|
+
def get_frame()
|
676
|
+
sanity_check()
|
677
|
+
each_line_parsed {|l|
|
678
|
+
return l.get("frame")
|
679
|
+
}
|
680
|
+
end
|
681
|
+
|
682
|
+
####
|
683
|
+
# returns an array of integers: the indices of the target of
|
684
|
+
# the frame
|
685
|
+
# These are the line numbers, which start counting at 0
|
686
|
+
#
|
687
|
+
# a target may span more than one word
|
688
|
+
def get_target_indices()
|
689
|
+
sanity_check
|
690
|
+
idx = Array.new
|
691
|
+
each_line_parsed {|l|
|
692
|
+
unless l.get("target") == "-"
|
693
|
+
idx << l.get("lineno")
|
694
|
+
end
|
695
|
+
}
|
696
|
+
return idx
|
697
|
+
end
|
698
|
+
|
699
|
+
####
|
700
|
+
# returns a string: the target
|
701
|
+
# in the case of multiword targets,
|
702
|
+
# we find the complete target at all
|
703
|
+
# indices, i.e. we can just take the first one we find
|
704
|
+
def get_target()
|
705
|
+
each_line_parsed {|l|
|
706
|
+
t = l.get("target")
|
707
|
+
unless t == "-"
|
708
|
+
return t
|
709
|
+
end
|
710
|
+
}
|
711
|
+
end
|
712
|
+
|
713
|
+
####
|
714
|
+
# get the target POS, according to FrameNet
|
715
|
+
def get_target_fn_pos()
|
716
|
+
get_target() =~ /^[^\.]+\.(\w+)$/
|
717
|
+
return $1
|
718
|
+
end
|
719
|
+
|
720
|
+
end
|