shalmaneser 0.0.1.alpha → 1.2.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +2 -2
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +49 -0
- data/bin/fred +18 -0
- data/bin/frprep +34 -0
- data/bin/rosy +17 -0
- data/lib/common/AbstractSynInterface.rb +35 -33
- data/lib/common/Mallet.rb +236 -0
- data/lib/common/Maxent.rb +26 -12
- data/lib/common/Parser.rb +5 -5
- data/lib/common/SynInterfaces.rb +13 -6
- data/lib/common/TabFormat.rb +7 -6
- data/lib/common/Tiger.rb +4 -4
- data/lib/common/Timbl.rb +144 -0
- data/lib/common/{FrprepHelper.rb → frprep_helper.rb} +14 -8
- data/lib/common/headz.rb +1 -1
- data/lib/common/ruby_class_extensions.rb +3 -3
- data/lib/fred/FredBOWContext.rb +14 -2
- data/lib/fred/FredDetermineTargets.rb +4 -9
- data/lib/fred/FredEval.rb +1 -1
- data/lib/fred/FredFeatureExtractors.rb +4 -3
- data/lib/fred/FredFeaturize.rb +1 -1
- data/lib/frprep/CollinsInterface.rb +6 -6
- data/lib/frprep/MiniparInterface.rb +5 -5
- data/lib/frprep/SleepyInterface.rb +7 -7
- data/lib/frprep/TntInterface.rb +1 -1
- data/lib/frprep/TreetaggerInterface.rb +29 -5
- data/lib/frprep/do_parses.rb +1 -0
- data/lib/frprep/frprep.rb +36 -32
- data/lib/{common/BerkeleyInterface.rb → frprep/interfaces/berkeley_interface.rb} +69 -95
- data/lib/frprep/interfaces/stanford_interface.rb +353 -0
- data/lib/frprep/interpreters/berkeley_interpreter.rb +22 -0
- data/lib/frprep/interpreters/stanford_interpreter.rb +22 -0
- data/lib/frprep/opt_parser.rb +2 -2
- data/lib/rosy/AbstractFeatureAndExternal.rb +5 -3
- data/lib/rosy/RosyIterator.rb +11 -10
- data/lib/rosy/rosy.rb +1 -0
- data/lib/shalmaneser/version.rb +1 -1
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +1 -1
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +1 -1
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +2 -2
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +2 -2
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +2 -2
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +2 -2
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +2 -2
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +2 -2
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +1 -1
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +7 -7
- data/test/functional/test_frprep.rb +3 -3
- data/test/functional/test_rosy.rb +20 -0
- metadata +215 -224
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/CollinsInterface.rb +0 -1165
- data/lib/common/MiniparInterface.rb +0 -1388
- data/lib/common/SleepyInterface.rb +0 -384
- data/lib/common/TntInterface.rb +0 -44
- data/lib/common/TreetaggerInterface.rb +0 -303
- data/lib/frprep/AbstractSynInterface.rb +0 -1227
- data/lib/frprep/BerkeleyInterface.rb +0 -375
- data/lib/frprep/ConfigData.rb +0 -694
- data/lib/frprep/FixSynSemMapping.rb +0 -196
- data/lib/frprep/FrPrepConfigData.rb +0 -66
- data/lib/frprep/FrprepHelper.rb +0 -1324
- data/lib/frprep/ISO-8859-1.rb +0 -24
- data/lib/frprep/Parser.rb +0 -213
- data/lib/frprep/SalsaTigerRegXML.rb +0 -2347
- data/lib/frprep/SalsaTigerXMLHelper.rb +0 -99
- data/lib/frprep/SynInterfaces.rb +0 -275
- data/lib/frprep/TabFormat.rb +0 -720
- data/lib/frprep/Tiger.rb +0 -1448
- data/lib/frprep/Tree.rb +0 -61
- data/lib/frprep/headz.rb +0 -338
@@ -1,99 +0,0 @@
|
|
1
|
-
# sp jul 05 05
|
2
|
-
#
|
3
|
-
# Static helper methods for SalsaTigerRegXML:
|
4
|
-
|
5
|
-
# - provide header and footer for Salsa/Tiger XML files
|
6
|
-
# - escape and unescape HTML entities
|
7
|
-
#
|
8
|
-
# changed KE nov 05:
|
9
|
-
# many methods moved to FrprepHelper
|
10
|
-
|
11
|
-
require "frprep/SalsaTigerRegXML"
|
12
|
-
require "frprep/headz"
|
13
|
-
require "frprep/Parser"
|
14
|
-
require "tempfile"
|
15
|
-
|
16
|
-
class SalsaTigerXMLHelper
|
17
|
-
|
18
|
-
|
19
|
-
###
|
20
|
-
# get header of SalsaTigerXML files (as string)
|
21
|
-
def SalsaTigerXMLHelper.get_header
|
22
|
-
|
23
|
-
header = <<ENDOFHEADER
|
24
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
25
|
-
<corpus corpusname="corpus" target="">
|
26
|
-
<head>
|
27
|
-
<meta>
|
28
|
-
<format>
|
29
|
-
NeGra format, version 3</format>
|
30
|
-
</meta>
|
31
|
-
<frames xmlns="http://www.clt-st.de/framenet/frame-database">
|
32
|
-
</frames>
|
33
|
-
<wordtags xmlns="http://www.clt-st.de/salsa/wordtags">
|
34
|
-
</wordtags>
|
35
|
-
<flags>
|
36
|
-
</flags>
|
37
|
-
<annotation>
|
38
|
-
<edgelabel>
|
39
|
-
</edgelabel>
|
40
|
-
<secedgelabel>
|
41
|
-
</secedgelabel>
|
42
|
-
</annotation>
|
43
|
-
</head>
|
44
|
-
<body>
|
45
|
-
ENDOFHEADER
|
46
|
-
|
47
|
-
return header
|
48
|
-
|
49
|
-
end
|
50
|
-
|
51
|
-
###
|
52
|
-
# get footer of SALSATigerXML files (as string)
|
53
|
-
def SalsaTigerXMLHelper.get_footer
|
54
|
-
|
55
|
-
footer = <<ENDOFFOOTER
|
56
|
-
</body>
|
57
|
-
</corpus>
|
58
|
-
ENDOFFOOTER
|
59
|
-
|
60
|
-
return footer
|
61
|
-
end
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
# escape and unescape strings for representation in XML
|
66
|
-
|
67
|
-
@@replacements = [
|
68
|
-
# ["''","""], # added by ines (09/03/09), might cause problems for unescape???
|
69
|
-
["&","&"], # must be first for escaping, last for unescaping
|
70
|
-
["<","<"],
|
71
|
-
[">", ">"],
|
72
|
-
["\"","''"],
|
73
|
-
# ["\"","""],
|
74
|
-
# ["\'\'","""],
|
75
|
-
# ["\`\`","""],
|
76
|
-
["\'","'"],
|
77
|
-
["\`\`","''"],
|
78
|
-
# ["''","''"]
|
79
|
-
]
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
def SalsaTigerXMLHelper.escape(string)
|
84
|
-
@@replacements.each {|unescaped,escaped|
|
85
|
-
string.gsub!(unescaped,escaped)
|
86
|
-
}
|
87
|
-
return string
|
88
|
-
end
|
89
|
-
|
90
|
-
def SalsaTigerXMLHelper.unescape(string)
|
91
|
-
# reverse replacements to replace & last
|
92
|
-
@@replacements.reverse.each {|unescaped,escaped|
|
93
|
-
string.gsub!(escaped,unescaped)
|
94
|
-
}
|
95
|
-
return string
|
96
|
-
end
|
97
|
-
|
98
|
-
|
99
|
-
end
|
data/lib/frprep/SynInterfaces.rb
DELETED
@@ -1,275 +0,0 @@
|
|
1
|
-
# SynInterfaces.rb
|
2
|
-
#
|
3
|
-
# ke oct/nov 2005
|
4
|
-
#
|
5
|
-
# Store all known interfaces to
|
6
|
-
# systems that do syntactic analysis
|
7
|
-
#
|
8
|
-
# Given the name of a system and the service that the
|
9
|
-
# system performs, return the appropriate interface
|
10
|
-
#
|
11
|
-
# There are two types of interfaces to syntactic analysis systems:
|
12
|
-
# - interfaces:
|
13
|
-
# offer methods for syntactic analysis,
|
14
|
-
# and the transformation to Salsa/Tiger XML and SalsaTigerSentence objects
|
15
|
-
# - interpreters:
|
16
|
-
# interpret the resulting Salsa/Tiger XML (represented as
|
17
|
-
# SalsaTigerSentence and SynNode objects), e.g.
|
18
|
-
# generalize over part of speech;
|
19
|
-
# describe the path between a pair of nodes both as a path
|
20
|
-
# and (potentially) as a grammatical function of one of the nodes;
|
21
|
-
# determine whether a node describes a verb, and in which voice;
|
22
|
-
# determine the head of a constituent
|
23
|
-
#
|
24
|
-
# Abstract classes for both interfaces and interpreters
|
25
|
-
# are in AbstractSynInterface.rb
|
26
|
-
|
27
|
-
require "frprep/ruby_class_extensions"
|
28
|
-
class Array
|
29
|
-
include EnumerableBool
|
30
|
-
end
|
31
|
-
|
32
|
-
# The list of available interface packages
|
33
|
-
# is at the end of this file.
|
34
|
-
# Please enter additional interfaces there.
|
35
|
-
|
36
|
-
class SynInterfaces
|
37
|
-
|
38
|
-
###
|
39
|
-
# class variable:
|
40
|
-
# list of all known interface classes
|
41
|
-
# add to it using add_interface()
|
42
|
-
@@interfaces = Array.new
|
43
|
-
|
44
|
-
###
|
45
|
-
# class variable:
|
46
|
-
# list of all known interpreter classes
|
47
|
-
# add to it using add_interpreter()
|
48
|
-
@@interpreters = Array.new
|
49
|
-
|
50
|
-
###
|
51
|
-
# add interface/interpreter
|
52
|
-
def SynInterfaces.add_interface(class_name)
|
53
|
-
$stderr.puts "Initializing interface #{class_name}" if $DEBUG
|
54
|
-
@@interfaces << class_name
|
55
|
-
end
|
56
|
-
|
57
|
-
def SynInterfaces.add_interpreter(class_name)
|
58
|
-
$stderr.puts "Initializing interpreter #{class_name}" if $DEBUG
|
59
|
-
@@interpreters << class_name
|
60
|
-
end
|
61
|
-
|
62
|
-
# AB: fake method to preview the interfaces table.
|
63
|
-
def SynInterfaces.explore
|
64
|
-
$stderr.puts "Exploring..."
|
65
|
-
$stderr.puts @@interfaces
|
66
|
-
$stderr.puts @@interpreters
|
67
|
-
end
|
68
|
-
###
|
69
|
-
# check_interfaces_abort_if_missing:
|
70
|
-
#
|
71
|
-
# Given an experiment file, use some_system_missing? to
|
72
|
-
# determine whether the system can be run with the requested
|
73
|
-
# syntactic processing, exit with an error message if that is not possible
|
74
|
-
def SynInterfaces.check_interfaces_abort_if_missing(exp) #FrPrepConfigData object
|
75
|
-
if (missing = SynInterfaces.some_system_missing?(exp))
|
76
|
-
interwhat, services = missing
|
77
|
-
|
78
|
-
$stderr.puts
|
79
|
-
$stderr.puts "ERROR: I am missing an #{interwhat} for "
|
80
|
-
services.each_pair { |service, system_name|
|
81
|
-
$stderr.puts "\tservice #{service}, system #{system_name}"
|
82
|
-
}
|
83
|
-
$stderr.puts
|
84
|
-
$stderr.puts "I have the following interfaces:"
|
85
|
-
@@interfaces.each { |interface_class|
|
86
|
-
$stderr.puts "\tservice #{interface_class.service}, system #{interface_class.system}"
|
87
|
-
}
|
88
|
-
$stderr.puts "I have the following interpreters:"
|
89
|
-
@@interpreters.each { |interpreter_class|
|
90
|
-
$stderr.print "\t"
|
91
|
-
$stderr.print interpreter_class.systems.to_a.map { |service, system_name|
|
92
|
-
"service #{service}, system #{system_name}"
|
93
|
-
}.join("; ")
|
94
|
-
unless interpreter_class.optional_systems.empty?
|
95
|
-
$stderr.print ", optional: "
|
96
|
-
$stderr.print interpreter_class.optional_systems.to_a.map { |service, system_name|
|
97
|
-
"service #{service}, system #{system_name}"
|
98
|
-
}.join("; ")
|
99
|
-
end
|
100
|
-
$stderr.puts
|
101
|
-
}
|
102
|
-
$stderr.puts
|
103
|
-
$stderr.puts "Please adapt your experiment file."
|
104
|
-
exit 1
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
###
|
109
|
-
# some_system_missing?
|
110
|
-
# returns nil if I have interfaces and interpreters
|
111
|
-
# for all services requested in the given experiment file
|
112
|
-
# else:
|
113
|
-
# returns pair [interface or interpreter, info]
|
114
|
-
# where the 1st element is either 'interface' or 'interpreter',
|
115
|
-
# and the 2nd element is a hash mapping services to system names:
|
116
|
-
# the services that could not be provided
|
117
|
-
def SynInterfaces.some_system_missing?(exp) # FrPrepConfigData object
|
118
|
-
|
119
|
-
services = SynInterfaces.requested_services(exp)
|
120
|
-
|
121
|
-
# check interfaces
|
122
|
-
services.each_pair { |service, system_name|
|
123
|
-
unless SynInterfaces.get_interface(service, system_name)
|
124
|
-
return ["interface", {service => system_name} ]
|
125
|
-
end
|
126
|
-
}
|
127
|
-
|
128
|
-
# check interpreter
|
129
|
-
unless SynInterfaces.get_interpreter_according_to_exp(exp)
|
130
|
-
return ["interpreter", services]
|
131
|
-
end
|
132
|
-
|
133
|
-
# everything okay
|
134
|
-
return nil
|
135
|
-
end
|
136
|
-
|
137
|
-
###
|
138
|
-
# given the name of a system and the service that it
|
139
|
-
# performs, find the matching interface class
|
140
|
-
#
|
141
|
-
# system: string: name of system, e.g. collins
|
142
|
-
# service: string: service, e.g. parser
|
143
|
-
#
|
144
|
-
# returns: SynInterface class
|
145
|
-
def SynInterfaces.get_interface(service,
|
146
|
-
system)
|
147
|
-
|
148
|
-
# try to find an interface class with the given
|
149
|
-
# name and service
|
150
|
-
@@interfaces.each { |interface_class|
|
151
|
-
if interface_class.system == system and
|
152
|
-
interface_class.service == service
|
153
|
-
return interface_class
|
154
|
-
end
|
155
|
-
}
|
156
|
-
|
157
|
-
# at this point, detection of a suitable interface class has failed
|
158
|
-
return nil
|
159
|
-
end
|
160
|
-
|
161
|
-
###
|
162
|
-
# helper for get_interpreter:
|
163
|
-
def SynInterfaces.get_interpreter_according_to_exp(exp)
|
164
|
-
return SynInterfaces.get_interpreter(SynInterfaces.requested_services(exp))
|
165
|
-
end
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
###
|
170
|
-
# given the names and services of a set of systems,
|
171
|
-
# find the matching interpreter class
|
172
|
-
#
|
173
|
-
# an interpreter class has both obligatory systems
|
174
|
-
# (they need to be present for this class to apply)
|
175
|
-
# and optional systems (they may or may not be present
|
176
|
-
# for the class to apply, but no other system performing
|
177
|
-
# the same service may)
|
178
|
-
#
|
179
|
-
# systems:
|
180
|
-
# hash: service(string) -> system name(string)
|
181
|
-
#
|
182
|
-
# returns: SynInterpreter class
|
183
|
-
def SynInterfaces.get_interpreter(systems)
|
184
|
-
# try to find an interface class with the given
|
185
|
-
# service-name pairs
|
186
|
-
|
187
|
-
@@interpreters.each { |interpreter_class|
|
188
|
-
|
189
|
-
if interpreter_class.systems.to_a.big_and { |service, system|
|
190
|
-
# all obligatory entries of interpreter_class
|
191
|
-
# are in systems
|
192
|
-
systems[service] == system
|
193
|
-
} and
|
194
|
-
interpreter_class.optional_systems.to_a.big_and { |service, system|
|
195
|
-
# all optional entries of interpreter_class are
|
196
|
-
# either in systems, or the service isn't in systems at all
|
197
|
-
systems[service].nil? or systems[service] == system
|
198
|
-
} and
|
199
|
-
systems.to_a.big_and { |service, system|
|
200
|
-
# all entries in names are in either
|
201
|
-
# the obligatory or optional set for interpreter_class
|
202
|
-
interpreter_class.systems[service] == system or
|
203
|
-
interpreter_class.optional_systems[service] == system
|
204
|
-
}
|
205
|
-
return interpreter_class
|
206
|
-
end
|
207
|
-
}
|
208
|
-
|
209
|
-
# at this point, detection of a suitable interpreter class has failed
|
210
|
-
return nil
|
211
|
-
end
|
212
|
-
|
213
|
-
################
|
214
|
-
protected
|
215
|
-
|
216
|
-
###
|
217
|
-
# knows about possible services that can be set in
|
218
|
-
# the experiment file, and where the names of
|
219
|
-
# the matching systems will be found in the experiment file data structure
|
220
|
-
#
|
221
|
-
# WARNING: adapt this when you introduce new services!
|
222
|
-
#
|
223
|
-
# returns: a hash
|
224
|
-
# <service> => system_name
|
225
|
-
#
|
226
|
-
# such that for each service/system name pair:
|
227
|
-
# the service with the given name has been requested in
|
228
|
-
# the experiment file, and the names of the systems to be used
|
229
|
-
# for performing the service
|
230
|
-
def SynInterfaces.requested_services(exp)
|
231
|
-
retv = Hash.new
|
232
|
-
|
233
|
-
[
|
234
|
-
{ "flag" => "do_postag", "service"=> "pos_tagger"},
|
235
|
-
{ "flag" => "do_lemmatize", "service"=> "lemmatizer"},
|
236
|
-
{ "flag" => "do_parse", "service" => "parser" }
|
237
|
-
].each { |hash|
|
238
|
-
if exp.get(hash["flag"]) # yes, perform this service
|
239
|
-
retv[hash["service"]] = exp.get(hash["service"])
|
240
|
-
end
|
241
|
-
}
|
242
|
-
|
243
|
-
return retv
|
244
|
-
end
|
245
|
-
end
|
246
|
-
|
247
|
-
|
248
|
-
require "frprep/CollinsInterface"
|
249
|
-
require "frprep/BerkeleyInterface"
|
250
|
-
require "frprep/SleepyInterface"
|
251
|
-
require "frprep/MiniparInterface"
|
252
|
-
require "frprep/TntInterface"
|
253
|
-
require "frprep/TreetaggerInterface"
|
254
|
-
|
255
|
-
|
256
|
-
class EmptyInterpreter < SynInterpreter
|
257
|
-
EmptyInterpreter.announce_me()
|
258
|
-
|
259
|
-
###
|
260
|
-
# systems interpreted by this class:
|
261
|
-
# returns a hash service(string) -> system name (string),
|
262
|
-
# e.g.
|
263
|
-
# { "parser" => "collins", "lemmatizer" => "treetagger" }
|
264
|
-
def EmptyInterpreter.systems()
|
265
|
-
return {}
|
266
|
-
end
|
267
|
-
|
268
|
-
###
|
269
|
-
# names of additional systems that may be interpreted by this class
|
270
|
-
# returns a hash service(string) -> system name(string)
|
271
|
-
# same as names()
|
272
|
-
def SynInterpreter.optional_systems()
|
273
|
-
return {}
|
274
|
-
end
|
275
|
-
end
|
data/lib/frprep/TabFormat.rb
DELETED
@@ -1,720 +0,0 @@
|
|
1
|
-
# TabFormat.rb
|
2
|
-
# Katrin Erk, Jan 2004
|
3
|
-
#
|
4
|
-
# classes to be used with tabular format text files.
|
5
|
-
# originally CoNLL2.rb
|
6
|
-
# Original: Katrin Erk, Jan 2004 for CoNLL '04 data
|
7
|
-
# Rewrite: Sebastian Pado, Mar 2004 for Gemmas FrameNet data (no NEs etc.)
|
8
|
-
|
9
|
-
# Extensions SP Jun/Jul 04
|
10
|
-
# renamed GemmaCorpus to FNTabFormat
|
11
|
-
|
12
|
-
# partial rewrite SP 250804: made things cleaner & leaner: no RawFormat, for example
|
13
|
-
|
14
|
-
# sp 04/05: add a "frame" column to FNTabFormat
|
15
|
-
#
|
16
|
-
# Substantial changes KE 12/06:
|
17
|
-
# variable number of columns to accommodate more than one frame per sentence
|
18
|
-
|
19
|
-
#################################################
|
20
|
-
# class for reading a file
|
21
|
-
# containing data in tabular
|
22
|
-
|
23
|
-
require "tempfile"
|
24
|
-
|
25
|
-
require "frprep/ISO-8859-1"
|
26
|
-
require "frprep/ruby_class_extensions"
|
27
|
-
|
28
|
-
#######################
|
29
|
-
# This function takes a variable number of arguments and
|
30
|
-
# returns them as an array
|
31
|
-
# Idea: make formulation of tab format entries easier to read,
|
32
|
-
# enclose variable arguments in a repeat() call,
|
33
|
-
# which immediately gets transformed into a list
|
34
|
-
def repeat(*args)
|
35
|
-
return args
|
36
|
-
end
|
37
|
-
|
38
|
-
#######################
|
39
|
-
class TabFormatFile
|
40
|
-
|
41
|
-
|
42
|
-
#######
|
43
|
-
# initialize:
|
44
|
-
# open files for reading.
|
45
|
-
#
|
46
|
-
# fp is a list of pairs [filename, format]
|
47
|
-
# where format is a list of strings that will be used
|
48
|
-
# to address columns of the file, the 1st string for the 1st column
|
49
|
-
#
|
50
|
-
# format may contain _one_ entry that is an array (or a call to repeat())
|
51
|
-
# e.g.:
|
52
|
-
# ["word", "pos", "lemma", repeat("frame", "target", "gf", "pt")]
|
53
|
-
def initialize(fp)
|
54
|
-
# open files
|
55
|
-
@files = Array.new
|
56
|
-
@patterns = Array.new
|
57
|
-
@no_of_read_lines = 0
|
58
|
-
fp.each_index { |ix|
|
59
|
-
if ix.modulo(2) == 0
|
60
|
-
# filename
|
61
|
-
begin
|
62
|
-
@files << File.new(fp[ix])
|
63
|
-
rescue
|
64
|
-
raise 'Sorry, could not read input file ' + fp[ix] + "\n"
|
65
|
-
end
|
66
|
-
else
|
67
|
-
# pattern
|
68
|
-
@patterns += fp[ix]
|
69
|
-
end
|
70
|
-
}
|
71
|
-
|
72
|
-
@my_sentence_class = TabFormatSentence
|
73
|
-
end
|
74
|
-
|
75
|
-
########
|
76
|
-
# each_sentence:
|
77
|
-
# yield each sentence of the files in turn.
|
78
|
-
# sentences are expected to be separated
|
79
|
-
# by a line containing nothing but whitespace.
|
80
|
-
# the last sentence may or may not be followed by
|
81
|
-
# an empty line.
|
82
|
-
# each_sentence ends when EOF is encountered on the first file.
|
83
|
-
# it expects all the other files to be the same length
|
84
|
-
# (in terms of number of lines) as the first file.
|
85
|
-
# each sentence is returned in the form of an
|
86
|
-
# array of TabFormatSentence sentences.
|
87
|
-
|
88
|
-
def each_sentence
|
89
|
-
unless @read_completely
|
90
|
-
sentence = @my_sentence_class.new(@patterns)
|
91
|
-
begin
|
92
|
-
lines = Array.new
|
93
|
-
while true do
|
94
|
-
line = ""
|
95
|
-
linearray = Array.new
|
96
|
-
@files.each {|f|
|
97
|
-
linearray << f.readline().chomp()
|
98
|
-
}
|
99
|
-
#STDERR.puts linearray
|
100
|
-
@no_of_read_lines += 1
|
101
|
-
if linearray.detect{|x| x.strip == ""}
|
102
|
-
if linearray.detect {|x| x.strip != ""}
|
103
|
-
STDERR.puts "Error: Mismatching empty lines!"
|
104
|
-
exit(1)
|
105
|
-
else
|
106
|
-
# sentence finished. yield it and start a new one
|
107
|
-
unless sentence.empty?
|
108
|
-
yield sentence
|
109
|
-
end
|
110
|
-
sentence = @my_sentence_class.new(@patterns)
|
111
|
-
end
|
112
|
-
# read an empty line in each of the other files
|
113
|
-
|
114
|
-
else
|
115
|
-
# sentence not yet finished.
|
116
|
-
# add this line to it
|
117
|
-
sentence.add_line(linearray.join("\t"))
|
118
|
-
end
|
119
|
-
end
|
120
|
-
rescue EOFError
|
121
|
-
unless sentence.empty?
|
122
|
-
# maybe we haven't yielded the last sentence yet.
|
123
|
-
yield sentence
|
124
|
-
end
|
125
|
-
@read_completely = true
|
126
|
-
end
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
end
|
131
|
-
|
132
|
-
#################################################
|
133
|
-
# class for keeping one line,
|
134
|
-
# parsed.
|
135
|
-
# The line is kept as follows:
|
136
|
-
# - normal features: in a hash @f mapping feature names to values
|
137
|
-
# - features of the repeated group: in an array @r of
|
138
|
-
# TabFormatNamedArgs objects, one per group
|
139
|
-
#
|
140
|
-
# each feature of the line is available by name
|
141
|
-
# via the method "get".
|
142
|
-
# Additional features (from other input files) can be
|
143
|
-
# added to the TabFormatNamedArgs object via the method
|
144
|
-
# add_feature
|
145
|
-
#
|
146
|
-
# methods:
|
147
|
-
#
|
148
|
-
# new: initialize.
|
149
|
-
# values: array of strings
|
150
|
-
# features: how to access the strings by name
|
151
|
-
# 'features' is an array of strings
|
152
|
-
# later the i-th feature will be used to access
|
153
|
-
# the i-th value,
|
154
|
-
# except for repeated groups
|
155
|
-
#
|
156
|
-
# get: returns one feature by its name
|
157
|
-
# name: a string
|
158
|
-
#
|
159
|
-
# add_feature: add another feature to this object,
|
160
|
-
# which can be accessed via "get"
|
161
|
-
# name: name for the new feature, should be distinct
|
162
|
-
# from the ones already used in new()
|
163
|
-
# feature: a string, the value of the feature
|
164
|
-
##
|
165
|
-
|
166
|
-
class TabFormatNamedArgs
|
167
|
-
############
|
168
|
-
def initialize(values, features, group = nil)
|
169
|
-
@f = Hash.new
|
170
|
-
@r = Array.new
|
171
|
-
@group = group
|
172
|
-
|
173
|
-
# record the feature names, give special attention to a group
|
174
|
-
# if we have one
|
175
|
-
@group_feature_names = nil
|
176
|
-
@feature_names = features.map { |feature|
|
177
|
-
if feature.instance_of? Array
|
178
|
-
# found a group
|
179
|
-
@group_feature_names = feature
|
180
|
-
"GROUP"
|
181
|
-
else
|
182
|
-
feature
|
183
|
-
end
|
184
|
-
}
|
185
|
-
|
186
|
-
if @feature_names.count("GROUP") > 1
|
187
|
-
$stderr.puts "More than one group in feature set:" + features.join(" ")
|
188
|
-
raise "Cannot handle this."
|
189
|
-
end
|
190
|
-
|
191
|
-
# group_index: position of group in overall feature list
|
192
|
-
group_index = @feature_names.index("GROUP")
|
193
|
-
unless group_index
|
194
|
-
group_index = @feature_names.length()
|
195
|
-
end
|
196
|
-
num_features_after_group = [0,
|
197
|
-
(@feature_names.length() - 1) - group_index].max()
|
198
|
-
index_after_groups = values.length() - num_features_after_group
|
199
|
-
|
200
|
-
|
201
|
-
# features before group: put feature/value pairs in @f hash
|
202
|
-
0.upto(group_index - 1) { |i|
|
203
|
-
@f[features[i]] = values[i]
|
204
|
-
}
|
205
|
-
# group: store each group in @r hash
|
206
|
-
if @group_feature_names
|
207
|
-
# for (group_start = group_index; group_start < index_after_groups;
|
208
|
-
# group_start += @group_feature_names.length())
|
209
|
-
group_no = 0
|
210
|
-
group_index.step(index_after_groups - 1,
|
211
|
-
@group_feature_names.length()) { |group_start|
|
212
|
-
@r << TabFormatNamedArgs.new(values.slice(group_start,
|
213
|
-
@group_feature_names.length()),
|
214
|
-
@group_feature_names,
|
215
|
-
group_no)
|
216
|
-
group_no += 1
|
217
|
-
}
|
218
|
-
end
|
219
|
-
|
220
|
-
# features after group: put feature/value pairs in @f hash
|
221
|
-
feature_index = group_index + 1
|
222
|
-
index_after_groups.upto(values.length() - 1) { |i|
|
223
|
-
@f[features[feature_index]] = values[i]
|
224
|
-
feature_index += 1
|
225
|
-
}
|
226
|
-
end
|
227
|
-
|
228
|
-
############
|
229
|
-
# return feature/value pairs as a tab format line,
|
230
|
-
# order of features as given in the 'features' list
|
231
|
-
# Features not set in the hash: their entry will be "-"
|
232
|
-
#
|
233
|
-
# If the feature list includes a group,
|
234
|
-
# assume zero entries for that group
|
235
|
-
def TabFormatNamedArgs.format_str(hash, # hash: feature -> value
|
236
|
-
features) # feature list, as for new()
|
237
|
-
if features.nil?
|
238
|
-
return ""
|
239
|
-
end
|
240
|
-
|
241
|
-
# sanity check: does the hash contain keys that are not in the feature list?
|
242
|
-
hash.keys().reject { |f| features.include? f }.each { |bad_feature|
|
243
|
-
$stderr.puts "Error: unknown feature #{bad_feature} in format_str: ignoring."
|
244
|
-
}
|
245
|
-
|
246
|
-
return features.select { |f|
|
247
|
-
# remove the group feature, if it's there
|
248
|
-
not(f.instance_of? Array)
|
249
|
-
}.map { |feature|
|
250
|
-
if hash[feature]
|
251
|
-
hash[feature]
|
252
|
-
else
|
253
|
-
"-"
|
254
|
-
end
|
255
|
-
}.join("\t")
|
256
|
-
end
|
257
|
-
|
258
|
-
|
259
|
-
#############
|
260
|
-
def add_feature(name, feature)
|
261
|
-
if @f.has_key? name
|
262
|
-
raise "Trying to add a feature twice: "+name
|
263
|
-
end
|
264
|
-
|
265
|
-
@f[name] = feature
|
266
|
-
end
|
267
|
-
|
268
|
-
#############
|
269
|
-
# get feature value, identified by feature name
|
270
|
-
# return: feature value as string
|
271
|
-
def get(name)
|
272
|
-
if (retv = get_nongroup(name))
|
273
|
-
return retv
|
274
|
-
else
|
275
|
-
return get_from_group(name, @group)
|
276
|
-
end
|
277
|
-
end
|
278
|
-
|
279
|
-
#############
|
280
|
-
def set(name, feature)
|
281
|
-
@f[name] = feature
|
282
|
-
end
|
283
|
-
|
284
|
-
#############
|
285
|
-
def num_groups()
|
286
|
-
return @r.length()
|
287
|
-
end
|
288
|
-
|
289
|
-
#############
|
290
|
-
# return line as string, entries connected by tab,
|
291
|
-
# in the order that the entries were in originally
|
292
|
-
def to_s()
|
293
|
-
return @feature_names.map { |feature|
|
294
|
-
case feature
|
295
|
-
when "GROUP"
|
296
|
-
@r.map { |group_obj| group_obj.to_s }.join("\t")
|
297
|
-
else
|
298
|
-
@f[feature]
|
299
|
-
end
|
300
|
-
}.join("\t")
|
301
|
-
end
|
302
|
-
|
303
|
-
protected
|
304
|
-
|
305
|
-
# get feature, non-group
|
306
|
-
# return: feature value (string)
|
307
|
-
def get_nongroup(feature)
|
308
|
-
return @f[feature]
|
309
|
-
end
|
310
|
-
|
311
|
-
# get feature from one of the groups
|
312
|
-
# return: feature value (string)
|
313
|
-
def get_from_group(name, group_no)
|
314
|
-
if not(group_no) or group_no >= @r.length()
|
315
|
-
# no group with that number
|
316
|
-
return nil
|
317
|
-
else
|
318
|
-
return @r[group_no].get_nongroup(name)
|
319
|
-
end
|
320
|
-
end
|
321
|
-
end
|
322
|
-
|
323
|
-
|
324
|
-
#################################################
|
325
|
-
# class for keeping and yielding one sentence
|
326
|
-
# in tabular format
|
327
|
-
class TabFormatSentence
|
328
|
-
############
|
329
|
-
# initialize:
|
330
|
-
# the sentence will be stored one word (plus additional info
|
331
|
-
# for that word) per line. Each line will be stored in a cell of
|
332
|
-
# the array @lines. the 'initialize' method starts with an empty
|
333
|
-
# array of lines.
|
334
|
-
def initialize(pattern)
|
335
|
-
@lines = Array.new
|
336
|
-
@pattern = pattern
|
337
|
-
|
338
|
-
# this is just for inheritance; FNTabFormatSentence will need this
|
339
|
-
@group_no = nil
|
340
|
-
end
|
341
|
-
|
342
|
-
#####
|
343
|
-
# length: number of words in the sentence
|
344
|
-
def length
|
345
|
-
return @lines.length
|
346
|
-
end
|
347
|
-
|
348
|
-
################3
|
349
|
-
# add_line:
|
350
|
-
# add one entry to the @lines array, i.e. information for one word
|
351
|
-
# of the sentence.
|
352
|
-
def add_line(line)
|
353
|
-
@lines << line
|
354
|
-
end
|
355
|
-
|
356
|
-
###################
|
357
|
-
# empty?:
|
358
|
-
# returns true if there are currently no lines stored in this
|
359
|
-
# TabFormatSentence object
|
360
|
-
# else false
|
361
|
-
def empty?
|
362
|
-
return @lines.empty?
|
363
|
-
end
|
364
|
-
|
365
|
-
######################
|
366
|
-
# empty!:
|
367
|
-
# discards all entries to the @lines array,
|
368
|
-
# i.e. empties this TabFormatSentence object of all
|
369
|
-
# data
|
370
|
-
def empty!
|
371
|
-
@lines.clear
|
372
|
-
end
|
373
|
-
|
374
|
-
#####################
|
375
|
-
# each_line:
|
376
|
-
# yields each line of the sentence
|
377
|
-
# as a string
|
378
|
-
def each_line
|
379
|
-
@lines.each { |l| yield l }
|
380
|
-
end
|
381
|
-
|
382
|
-
######################
|
383
|
-
# each_line_parsed:
|
384
|
-
# yields each line of the sentence
|
385
|
-
# broken up as follows:
|
386
|
-
# the line is expected to contain 6 or more pieces of
|
387
|
-
# information, separated by whitespace.
|
388
|
-
# - the word
|
389
|
-
# - the part of speech info for the word
|
390
|
-
# - syntax for roles (not to be used)
|
391
|
-
# - target (or -)
|
392
|
-
# - gramm. function for roles (not to be used)
|
393
|
-
# - one column with role annotation
|
394
|
-
#
|
395
|
-
# All pieces are yielded as strings, except for the argument columns, which
|
396
|
-
# are yielded as an array of strings.
|
397
|
-
def each_line_parsed
|
398
|
-
lineno = 0
|
399
|
-
f = nil
|
400
|
-
@lines.each { |l|
|
401
|
-
f = TabFormatNamedArgs.new(l.split("\t"), @pattern, @group_no)
|
402
|
-
f.add_feature("lineno", lineno)
|
403
|
-
yield f
|
404
|
-
lineno += 1
|
405
|
-
}
|
406
|
-
end
|
407
|
-
|
408
|
-
###
|
409
|
-
# read_one_line:
|
410
|
-
# return a line of the sentence specified by its number
|
411
|
-
def read_one_line(number)
|
412
|
-
return(@lines[number])
|
413
|
-
end
|
414
|
-
|
415
|
-
###
|
416
|
-
# read_one_line_parsed:
|
417
|
-
# like get_line, but the features in the line are returned
|
418
|
-
# separately,
|
419
|
-
# as in each_line_parsed
|
420
|
-
def read_one_line_parsed(number)
|
421
|
-
if @lines[number].nil?
|
422
|
-
return nil
|
423
|
-
else
|
424
|
-
f = TabFormatNamedArgs.new(@lines[number].split("\t"), @pattern, @group_no)
|
425
|
-
f.add_feature("lineno", number)
|
426
|
-
return f
|
427
|
-
end
|
428
|
-
end
|
429
|
-
|
430
|
-
# set line no of first line of present sentence
|
431
|
-
def set_starting_line(n)
|
432
|
-
raise "Deprecated"
|
433
|
-
end
|
434
|
-
|
435
|
-
# returns line no of first line of present sentence
|
436
|
-
def get_starting_line()
|
437
|
-
raise "Deprecated"
|
438
|
-
end
|
439
|
-
end
|
440
|
-
|
441
|
-
########################################################
|
442
|
-
# TabFormat files containing everything that's in the FN lexunit files
|
443
|
-
#
|
444
|
-
# one target per sentence
|
445
|
-
|
446
|
-
class FNTabFormatFile < TabFormatFile
|
447
|
-
|
448
|
-
def initialize(filename,tag_suffix=nil,lemma_suffix=nil)
|
449
|
-
|
450
|
-
corpusname = File.dirname(filename)+"/"+File.basename(filename,".tab")
|
451
|
-
|
452
|
-
filename_label_pairs = [filename,FNTabFormatFile.fntab_format()]
|
453
|
-
if lemma_suffix # raise exception if lemmatisation does not esist
|
454
|
-
filename_label_pairs.concat [corpusname+lemma_suffix,["lemma"]]
|
455
|
-
end
|
456
|
-
if tag_suffix # raise exception if tagging does not exist
|
457
|
-
filename_label_pairs.concat [corpusname+tag_suffix,["pos"]]
|
458
|
-
end
|
459
|
-
super(filename_label_pairs)
|
460
|
-
|
461
|
-
@my_sentence_class = FNTabSentence
|
462
|
-
end
|
463
|
-
|
464
|
-
|
465
|
-
def FNTabFormatFile.fntab_format()
|
466
|
-
# return ["word", "pt", "gf", "role", "target", "frame", "lu_sent_ids"]
|
467
|
-
return [
|
468
|
-
"word",
|
469
|
-
FNTabFormatFile.frametab_format(),
|
470
|
-
"ne", "sent_id"
|
471
|
-
]
|
472
|
-
end
|
473
|
-
|
474
|
-
def FNTabFormatFile.frametab_format()
|
475
|
-
return ["pt", "gf", "role", "target", "frame", "stuff"]
|
476
|
-
end
|
477
|
-
|
478
|
-
##########
|
479
|
-
# given a hash mapping features to values,
|
480
|
-
# format according to fntab_format
|
481
|
-
def FNTabFormatFile.format_str(hash)
|
482
|
-
return TabFormatNamedArgs.format_str(hash, FNTabFormatFile.fntab_format())
|
483
|
-
end
|
484
|
-
end
|
485
|
-
|
486
|
-
############################################
|
487
|
-
class FNTabSentence < TabFormatSentence
|
488
|
-
|
489
|
-
####
|
490
|
-
# overwrite this to get a feature from
|
491
|
-
# a group rather than from the main feature list
|
492
|
-
def get_this(l, feature_name)
|
493
|
-
return l.get(feature_name)
|
494
|
-
end
|
495
|
-
|
496
|
-
####
|
497
|
-
def sanity_check()
|
498
|
-
each_line_parsed {|l|
|
499
|
-
if l.get("sent_id").nil?
|
500
|
-
raise "Error: corpus file does not conform to FN format."
|
501
|
-
else
|
502
|
-
return
|
503
|
-
end
|
504
|
-
}
|
505
|
-
end
|
506
|
-
|
507
|
-
####
|
508
|
-
# returns the sentence ID, a string, as set by FrameNet
|
509
|
-
def get_sent_id()
|
510
|
-
sanity_check
|
511
|
-
each_line_parsed {|l|
|
512
|
-
return l.get("sent_id")
|
513
|
-
}
|
514
|
-
end
|
515
|
-
|
516
|
-
####
|
517
|
-
# iterator, yields each frame of the sentence as a FNTabFrame
|
518
|
-
# object. They contain the complete sentence, but provide
|
519
|
-
# access to exactly one frame of that sentence.
|
520
|
-
def each_frame()
|
521
|
-
# how many frames? assume that each line has the same
|
522
|
-
# number of frames
|
523
|
-
num_frames = read_one_line_parsed(0).num_groups()
|
524
|
-
0.upto(num_frames - 1) { |frame_no|
|
525
|
-
frame_obj = FNTabFrame.new(@pattern, frame_no)
|
526
|
-
each_line { |l| frame_obj.add_line(l) }
|
527
|
-
yield frame_obj
|
528
|
-
}
|
529
|
-
end
|
530
|
-
|
531
|
-
####
|
532
|
-
# computes a mapping from word indices to labels on these words
|
533
|
-
#
|
534
|
-
# returns a hash: index_list(array:integer) -> label(string)
|
535
|
-
# An entry il->label means that all the lines whose line
|
536
|
-
# numbers are listed in il are labeled with label.
|
537
|
-
#
|
538
|
-
# Line numbers correspond to words of the sentence. Counting starts at 0.
|
539
|
-
#
|
540
|
-
# By default, "markables" looks for role labels, i.e. labels in the
|
541
|
-
# column "role", but it can also look in another column.
|
542
|
-
# To change the default, give the column name as a parameter.
|
543
|
-
def markables(use_this_column="role")
|
544
|
-
# returns hash of {index list} -> {markup label}
|
545
|
-
|
546
|
-
sanity_check()
|
547
|
-
|
548
|
-
idlist_to_annotation_list = Hash.new
|
549
|
-
|
550
|
-
# add entry for the target word
|
551
|
-
# idlist_to_annotation_list[get_target_indices()] = "target"
|
552
|
-
|
553
|
-
# determine span of each frame element
|
554
|
-
# if we find overlapping FEs, we write a warning to STDERR
|
555
|
-
# ignore the 2nd label and attempt to "close" the 1st label
|
556
|
-
|
557
|
-
ids = Array.new
|
558
|
-
label = nil
|
559
|
-
|
560
|
-
each_line_parsed { |l|
|
561
|
-
|
562
|
-
this_id = get_this(l, "lineno")
|
563
|
-
|
564
|
-
# start of FE?
|
565
|
-
this_col = get_this(l, use_this_column)
|
566
|
-
unless this_col
|
567
|
-
$stderr.puts "nil entry #{use_this_column} in line #{this_id} of sent #{get_sent_id()}. Skipping."
|
568
|
-
next
|
569
|
-
end
|
570
|
-
this_fe_ann = this_col.split(":")
|
571
|
-
|
572
|
-
case this_fe_ann.length
|
573
|
-
when 1 # nothing at all, or a single begin or end
|
574
|
-
markup = this_fe_ann.first
|
575
|
-
if markup == "-" or markup == "--" # no change
|
576
|
-
if label
|
577
|
-
ids << this_id
|
578
|
-
end
|
579
|
-
elsif markup =~ /^B-(\S+)$/
|
580
|
-
if label # are we within a markable right now?
|
581
|
-
$stderr.puts "[TabFormat] Warning: Markable "+$1.to_s+" starts while within markable ", label.to_s
|
582
|
-
$stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
|
583
|
-
else
|
584
|
-
label = $1
|
585
|
-
ids << this_id
|
586
|
-
end
|
587
|
-
elsif markup =~ /^E-(\S+)$/
|
588
|
-
if label == $1 # we close the markable we've opened before
|
589
|
-
ids << this_id
|
590
|
-
# store information
|
591
|
-
idlist_to_annotation_list[ids] = label
|
592
|
-
# reset memory
|
593
|
-
label = nil
|
594
|
-
ids = Array.new
|
595
|
-
else
|
596
|
-
$stderr.puts "[TabFormat] Warning: Markable "+$1.to_s+" closes while within markable "+ label.to_s
|
597
|
-
$stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
|
598
|
-
end
|
599
|
-
else
|
600
|
-
$stderr.puts "[TabFormat] Warning: cannot analyse markup "+markup
|
601
|
-
$stderr.puts "Debug data: Sentence id #{get_sent_id()}"
|
602
|
-
end
|
603
|
-
when 2 # this should be a one-word markable
|
604
|
-
b_markup = this_fe_ann[0]
|
605
|
-
e_markup = this_fe_ann[1]
|
606
|
-
if label
|
607
|
-
$stderr.puts "[TabFormat] Warning: Finding new markable at word #{this_id} while within markable ", label
|
608
|
-
$stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
|
609
|
-
else
|
610
|
-
if b_markup =~ /^B-(\S+)$/
|
611
|
-
b_label = $1
|
612
|
-
if e_markup =~ /^E-(\S+)$/
|
613
|
-
e_label = $1
|
614
|
-
if b_label == e_label
|
615
|
-
idlist_to_annotation_list[[this_id]] = b_label
|
616
|
-
else
|
617
|
-
$stderr.puts "[TabFormat] Warning: Starting markable "+b_label+", closing markable "+e_label
|
618
|
-
$stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
|
619
|
-
end
|
620
|
-
else
|
621
|
-
$stderr.puts "[TabFormat] Warning: Unknown end markup "+e_markup
|
622
|
-
$stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
|
623
|
-
end
|
624
|
-
else
|
625
|
-
$stderr.puts "[TabFormat] Warning: Unknown start markup "+b_markup
|
626
|
-
$stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
|
627
|
-
end
|
628
|
-
end
|
629
|
-
else
|
630
|
-
$stderr.puts "Warning: cannot analyse markup with more than two colon-separated parts like "+this_fee_ann.join(":")
|
631
|
-
$stderr.puts "Debug data: Sentence id #{get_sent_id()}"
|
632
|
-
end
|
633
|
-
}
|
634
|
-
|
635
|
-
unless label.nil?
|
636
|
-
$stderr.puts "[TabFormat] Warning: Markable ", label, " did not end in sentence."
|
637
|
-
$stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
|
638
|
-
end
|
639
|
-
|
640
|
-
return idlist_to_annotation_list
|
641
|
-
end
|
642
|
-
|
643
|
-
#######
|
644
|
-
def to_s
|
645
|
-
sanity_check
|
646
|
-
array = Array.new
|
647
|
-
each_line_parsed {|l|
|
648
|
-
array << l.get("word")
|
649
|
-
}
|
650
|
-
return array.join(" ")
|
651
|
-
end
|
652
|
-
|
653
|
-
end
|
654
|
-
|
655
|
-
class FNTabFrame < FNTabSentence
|
656
|
-
|
657
|
-
############
|
658
|
-
# initialize:
|
659
|
-
# as parent, except that we also get a frame number
|
660
|
-
# such that we can access the features of ``our'' frame
|
661
|
-
def initialize(pattern, frameno)
|
662
|
-
# by setting @group_no to frameno,
|
663
|
-
# we are initializing each TabFormatNamedArgs object
|
664
|
-
# in each_line_parsed() or read_one_line_parsed()
|
665
|
-
# with the right group number,
|
666
|
-
# such that all calls to TabFormatNamedArgs.get()
|
667
|
-
# will access the right group.
|
668
|
-
super(pattern)
|
669
|
-
@group_no = frameno
|
670
|
-
end
|
671
|
-
|
672
|
-
|
673
|
-
# returns the frame introduced by the target word(s)
|
674
|
-
# of this frame group, a string
|
675
|
-
def get_frame()
|
676
|
-
sanity_check()
|
677
|
-
each_line_parsed {|l|
|
678
|
-
return l.get("frame")
|
679
|
-
}
|
680
|
-
end
|
681
|
-
|
682
|
-
####
|
683
|
-
# returns an array of integers: the indices of the target of
|
684
|
-
# the frame
|
685
|
-
# These are the line numbers, which start counting at 0
|
686
|
-
#
|
687
|
-
# a target may span more than one word
|
688
|
-
def get_target_indices()
|
689
|
-
sanity_check
|
690
|
-
idx = Array.new
|
691
|
-
each_line_parsed {|l|
|
692
|
-
unless l.get("target") == "-"
|
693
|
-
idx << l.get("lineno")
|
694
|
-
end
|
695
|
-
}
|
696
|
-
return idx
|
697
|
-
end
|
698
|
-
|
699
|
-
####
|
700
|
-
# returns a string: the target
|
701
|
-
# in the case of multiword targets,
|
702
|
-
# we find the complete target at all
|
703
|
-
# indices, i.e. we can just take the first one we find
|
704
|
-
def get_target()
|
705
|
-
each_line_parsed {|l|
|
706
|
-
t = l.get("target")
|
707
|
-
unless t == "-"
|
708
|
-
return t
|
709
|
-
end
|
710
|
-
}
|
711
|
-
end
|
712
|
-
|
713
|
-
####
|
714
|
-
# get the target POS, according to FrameNet
|
715
|
-
def get_target_fn_pos()
|
716
|
-
get_target() =~ /^[^\.]+\.(\w+)$/
|
717
|
-
return $1
|
718
|
-
end
|
719
|
-
|
720
|
-
end
|