shalmaneser-lib 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +122 -0
- data/lib/configuration/config_data.rb +457 -0
- data/lib/configuration/config_format_element.rb +210 -0
- data/lib/configuration/configuration_error.rb +15 -0
- data/lib/configuration/external_config_data.rb +56 -0
- data/lib/configuration/frappe_config_data.rb +134 -0
- data/lib/configuration/fred_config_data.rb +199 -0
- data/lib/configuration/rosy_config_data.rb +126 -0
- data/lib/db/db_interface.rb +50 -0
- data/lib/db/db_mysql.rb +141 -0
- data/lib/db/db_sqlite.rb +280 -0
- data/lib/db/db_table.rb +237 -0
- data/lib/db/db_view.rb +416 -0
- data/lib/db/db_wrapper.rb +175 -0
- data/lib/db/select_table_and_columns.rb +10 -0
- data/lib/db/sql_query.rb +243 -0
- data/lib/definitions.rb +19 -0
- data/lib/eval.rb +482 -0
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/external_systems.rb +251 -0
- data/lib/framenet_format/fn_corpus_aset.rb +209 -0
- data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
- data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
- data/lib/framenet_format/fn_database.rb +143 -0
- data/lib/framenet_format/frame_xml_file.rb +104 -0
- data/lib/framenet_format/frame_xml_sentence.rb +411 -0
- data/lib/logging.rb +25 -0
- data/lib/ml/classifier.rb +189 -0
- data/lib/ml/mallet.rb +236 -0
- data/lib/ml/maxent.rb +229 -0
- data/lib/ml/optimize.rb +195 -0
- data/lib/ml/timbl.rb +140 -0
- data/lib/monkey_patching/array.rb +82 -0
- data/lib/monkey_patching/enumerable_bool.rb +24 -0
- data/lib/monkey_patching/enumerable_distribute.rb +18 -0
- data/lib/monkey_patching/file.rb +131 -0
- data/lib/monkey_patching/subsumed.rb +24 -0
- data/lib/ruby_class_extensions.rb +4 -0
- data/lib/salsa_tiger_xml/corpus.rb +24 -0
- data/lib/salsa_tiger_xml/fe_node.rb +98 -0
- data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
- data/lib/salsa_tiger_xml/frame_node.rb +145 -0
- data/lib/salsa_tiger_xml/graph_node.rb +347 -0
- data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
- data/lib/salsa_tiger_xml/sem_node.rb +58 -0
- data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
- data/lib/salsa_tiger_xml/syn_node.rb +169 -0
- data/lib/salsa_tiger_xml/tree_node.rb +59 -0
- data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
- data/lib/salsa_tiger_xml/usp_node.rb +72 -0
- data/lib/salsa_tiger_xml/xml_node.rb +163 -0
- data/lib/shalmaneser/lib.rb +1 -0
- data/lib/tabular_format/fn_tab_format_file.rb +38 -0
- data/lib/tabular_format/fn_tab_frame.rb +67 -0
- data/lib/tabular_format/fn_tab_sentence.rb +169 -0
- data/lib/tabular_format/tab_format_file.rb +91 -0
- data/lib/tabular_format/tab_format_named_args.rb +184 -0
- data/lib/tabular_format/tab_format_sentence.rb +119 -0
- data/lib/value_restriction.rb +49 -0
- metadata +131 -0
Binary file
|
Binary file
|
@@ -0,0 +1,251 @@
|
|
1
|
+
# ExternalSystems.rb
|
2
|
+
#
|
3
|
+
# ke oct/nov 2005
|
4
|
+
#
|
5
|
+
# Store all known interfaces to
|
6
|
+
# systems that do syntactic analysis
|
7
|
+
#
|
8
|
+
# Given the name of a system and the service that the
|
9
|
+
# system performs, return the appropriate interface
|
10
|
+
#
|
11
|
+
# There are two types of interfaces to syntactic analysis systems:
|
12
|
+
# - interfaces:
|
13
|
+
# offer methods for syntactic analysis,
|
14
|
+
# and the transformation to Salsa/Tiger XML and SalsaTigerSentence objects
|
15
|
+
# - interpreters:
|
16
|
+
# interpret the resulting Salsa/Tiger XML (represented as
|
17
|
+
# SalsaTigerSentence and SynNode objects), e.g.
|
18
|
+
# generalize over part of speech;
|
19
|
+
# describe the path between a pair of nodes both as a path
|
20
|
+
# and (potentially) as a grammatical function of one of the nodes;
|
21
|
+
# determine whether a node describes a verb, and in which voice;
|
22
|
+
# determine the head of a constituent
|
23
|
+
#
|
24
|
+
# Abstract classes for both interfaces and interpreters
|
25
|
+
# are in AbstractSynInterface.rb
|
26
|
+
|
27
|
+
require "ruby_class_extensions"
|
28
|
+
require 'logging'
|
29
|
+
|
30
|
+
# The list of available interface packages
|
31
|
+
# is at the end of this file.
|
32
|
+
# Please enter additional interfaces there.
|
33
|
+
|
34
|
+
# @todo AB: [2015-12-16 Wed 01:03]
|
35
|
+
# After decoupling in OptParser and ConfigData classes move this
|
36
|
+
# to Frappe.
|
37
|
+
module Shalmaneser
|
38
|
+
class ExternalSystems
|
39
|
+
###
|
40
|
+
# class variable:
|
41
|
+
# list of all known interface classes
|
42
|
+
# add to it using add_interface()
|
43
|
+
@interfaces = []
|
44
|
+
|
45
|
+
###
|
46
|
+
# class variable:
|
47
|
+
# list of all known interpreter classes
|
48
|
+
# add to it using add_interpreter()
|
49
|
+
@interpreters = []
|
50
|
+
|
51
|
+
###
|
52
|
+
# add interface/interpreter
|
53
|
+
def self.add_interface(class_name)
|
54
|
+
LOGGER.debug "Initializing interface <#{class_name}>."
|
55
|
+
@interfaces << class_name
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.add_interpreter(class_name)
|
59
|
+
LOGGER.debug "Initializing interpreter <#{class_name}>."
|
60
|
+
@interpreters << class_name
|
61
|
+
end
|
62
|
+
|
63
|
+
###
|
64
|
+
# check_interfaces_abort_if_missing:
|
65
|
+
#
|
66
|
+
# Given an experiment file, use some_system_missing? to
|
67
|
+
# determine whether the system can be run with the requested
|
68
|
+
# syntactic processing, exit with an error message if that is not possible
|
69
|
+
# @param [FrappeConfigData] exp Experiment description.
|
70
|
+
def self.check_interfaces_abort_if_missing(exp)
|
71
|
+
if (missing = some_system_missing?(exp))
|
72
|
+
interwhat, services = missing
|
73
|
+
|
74
|
+
$stderr.puts
|
75
|
+
$stderr.puts "ERROR: I am missing an #{interwhat} for "
|
76
|
+
services.each_pair { |service, system_name|
|
77
|
+
$stderr.puts "\tservice #{service}, system #{system_name}"
|
78
|
+
}
|
79
|
+
$stderr.puts
|
80
|
+
$stderr.puts "I have the following interfaces:"
|
81
|
+
@interfaces.each { |interface_class|
|
82
|
+
$stderr.puts "\tservice #{interface_class.service}, system #{interface_class.system}"
|
83
|
+
}
|
84
|
+
$stderr.puts "I have the following interpreters:"
|
85
|
+
@interpreters.each { |interpreter_class|
|
86
|
+
$stderr.print "\t"
|
87
|
+
$stderr.print interpreter_class.systems.to_a.map { |service, system_name|
|
88
|
+
"service #{service}, system #{system_name}"
|
89
|
+
}.join("; ")
|
90
|
+
unless interpreter_class.optional_systems.empty?
|
91
|
+
$stderr.print ", optional: "
|
92
|
+
$stderr.print interpreter_class.optional_systems.to_a.map { |service, system_name|
|
93
|
+
"service #{service}, system #{system_name}"
|
94
|
+
}.join("; ")
|
95
|
+
end
|
96
|
+
$stderr.puts
|
97
|
+
}
|
98
|
+
$stderr.puts
|
99
|
+
$stderr.puts "Please adapt your experiment file."
|
100
|
+
exit 1
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
###
|
105
|
+
# given the name of a system and the service that it
|
106
|
+
# performs, find the matching interface class
|
107
|
+
#
|
108
|
+
# system: string: name of system, e.g. collins
|
109
|
+
# service: string: service, e.g. parser
|
110
|
+
#
|
111
|
+
# returns: SynInterface class
|
112
|
+
def self.get_interface(service, system)
|
113
|
+
interfaces = @interfaces.select do |interface_class|
|
114
|
+
interface_class.system == system && interface_class.service == service
|
115
|
+
end
|
116
|
+
|
117
|
+
unless interfaces.any?
|
118
|
+
raise "I've been requested an interface for #{service} and #{system}, "\
|
119
|
+
'but I cannot find any. Please correct your experiment files.'
|
120
|
+
end
|
121
|
+
|
122
|
+
# @todo AB: Actually it's bad logic, but no idea for now how to handle it.
|
123
|
+
interfaces.first
|
124
|
+
end
|
125
|
+
|
126
|
+
###
|
127
|
+
# helper for get_interpreter:
|
128
|
+
def self.get_interpreter_according_to_exp(exp)
|
129
|
+
ExternalSystems.get_interpreter(ExternalSystems.requested_services(exp))
|
130
|
+
end
|
131
|
+
|
132
|
+
###
|
133
|
+
# given the names and services of a set of systems,
|
134
|
+
# find the matching interpreter class
|
135
|
+
#
|
136
|
+
# an interpreter class has both obligatory systems
|
137
|
+
# (they need to be present for this class to apply)
|
138
|
+
# and optional systems (they may or may not be present
|
139
|
+
# for the class to apply, but no other system performing
|
140
|
+
# the same service may)
|
141
|
+
#
|
142
|
+
# systems:
|
143
|
+
# hash: service(string) -> system name(string)
|
144
|
+
#
|
145
|
+
# returns: SynInterpreter class
|
146
|
+
def self.get_interpreter(systems)
|
147
|
+
# try to find an interface class with the given
|
148
|
+
# service-name pairs
|
149
|
+
|
150
|
+
@interpreters.each { |interpreter_class|
|
151
|
+
|
152
|
+
if interpreter_class.systems.to_a.big_and { |service, system|
|
153
|
+
# all obligatory entries of interpreter_class
|
154
|
+
# are in systems
|
155
|
+
systems[service] == system
|
156
|
+
} and
|
157
|
+
interpreter_class.optional_systems.to_a.big_and { |service, system|
|
158
|
+
# all optional entries of interpreter_class are
|
159
|
+
# either in systems, or the service isn't in systems at all
|
160
|
+
systems[service].nil? or systems[service] == system
|
161
|
+
} and
|
162
|
+
systems.to_a.big_and { |service, system|
|
163
|
+
# all entries in names are in either
|
164
|
+
# the obligatory or optional set for interpreter_class
|
165
|
+
interpreter_class.systems[service] == system or
|
166
|
+
interpreter_class.optional_systems[service] == system
|
167
|
+
}
|
168
|
+
return interpreter_class
|
169
|
+
end
|
170
|
+
}
|
171
|
+
|
172
|
+
# at this point, detection of a suitable interpreter class has failed
|
173
|
+
return nil
|
174
|
+
end
|
175
|
+
|
176
|
+
################
|
177
|
+
private
|
178
|
+
|
179
|
+
###
|
180
|
+
# knows about possible services that can be set in
|
181
|
+
# the experiment file, and where the names of
|
182
|
+
# the matching systems will be found in the experiment file data structure
|
183
|
+
#
|
184
|
+
# WARNING: adapt this when you introduce new services!
|
185
|
+
#
|
186
|
+
# returns: a hash
|
187
|
+
# <service> => system_name
|
188
|
+
#
|
189
|
+
# such that for each service/system name pair:
|
190
|
+
# the service with the given name has been requested in
|
191
|
+
# the experiment file, and the names of the systems to be used
|
192
|
+
# for performing the service
|
193
|
+
def self.requested_services(exp)
|
194
|
+
services = {}
|
195
|
+
[
|
196
|
+
{"flag" => "do_postag", "service" => "pos_tagger"},
|
197
|
+
{"flag" => "do_lemmatize", "service" => "lemmatizer"},
|
198
|
+
{"flag" => "do_parse", "service" => "parser"}
|
199
|
+
].each do |hash|
|
200
|
+
# yes, perform this service
|
201
|
+
if exp.get(hash["flag"])
|
202
|
+
services[hash["service"]] = exp.get(hash["service"])
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
services
|
207
|
+
end
|
208
|
+
|
209
|
+
###
|
210
|
+
# some_system_missing?
|
211
|
+
# returns nil if I have interfaces and interpreters
|
212
|
+
# for all services requested in the given experiment file
|
213
|
+
# else:
|
214
|
+
# returns pair [interface or interpreter, info]
|
215
|
+
# where the 1st element is either 'interface' or 'interpreter',
|
216
|
+
# and the 2nd element is a hash mapping services to system names:
|
217
|
+
# the services that could not be provided
|
218
|
+
# @param [FrappeConfigdata] exp FrappeConfigData object to check all the systems.
|
219
|
+
def self.some_system_missing?(exp)
|
220
|
+
missing_systems = nil
|
221
|
+
# check interfaces
|
222
|
+
requested_services(exp).each_pair do |service, system_name|
|
223
|
+
unless get_interface(service, system_name)
|
224
|
+
missing_systems = ["interface", {service => system_name}]
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
# check interpreter
|
229
|
+
unless get_interpreter_according_to_exp(exp)
|
230
|
+
missing_systems = ["interpreter", services]
|
231
|
+
end
|
232
|
+
|
233
|
+
# everything okay
|
234
|
+
missing_systems
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
# @todo AB: We should require programmatically all files in
|
240
|
+
# <frappe/interpreters> and <frappe/interfaces>.
|
241
|
+
require 'frappe/interfaces/collins_interface'
|
242
|
+
require 'frappe/interpreters/collins_treetagger_interpreter'
|
243
|
+
require 'frappe/interpreters/collins_tnt_interpreter'
|
244
|
+
require 'frappe/interfaces/berkeley_interface'
|
245
|
+
require 'frappe/interpreters/berkeley_interpreter'
|
246
|
+
require 'frappe/interfaces/stanford_interface'
|
247
|
+
require 'frappe/interpreters/stanford_interpreter'
|
248
|
+
require 'frappe/interfaces/treetagger_interface'
|
249
|
+
require 'frappe/interfaces/treetagger_pos_interface'
|
250
|
+
require 'frappe/interpreters/treetagger_interpreter'
|
251
|
+
require 'frappe/interpreters/empty_interpreter'
|
@@ -0,0 +1,209 @@
|
|
1
|
+
#####################
|
2
|
+
# class to keep data for one frame
|
3
|
+
class FNCorpusAset
|
4
|
+
attr_reader :layers, :aset_type, :aset_id, :frame_name, :lu
|
5
|
+
|
6
|
+
#######
|
7
|
+
# Analyze RegXML object, store in object variables:
|
8
|
+
#
|
9
|
+
# @aset_type: "frame" or "NER"
|
10
|
+
# @frame_name: frame name for "frame" type
|
11
|
+
# @lu: LU for "frame" type
|
12
|
+
# @aset_id: ID of the annotation set
|
13
|
+
# @layers: hash: layer type (FE, GF, PT, Target, NER) -> [offset, "start"/"stop"] -> list of labels
|
14
|
+
# string -> int*string -> array:string
|
15
|
+
#
|
16
|
+
def initialize(aset, #RegXML object
|
17
|
+
charidx) # array of pairs [start index, stop index] int*int
|
18
|
+
|
19
|
+
@layers = {}
|
20
|
+
@frame_name = nil
|
21
|
+
@lu = nil
|
22
|
+
@aset_type = nil
|
23
|
+
|
24
|
+
attributes = aset.attributes
|
25
|
+
|
26
|
+
@aset_id = attributes["ID"]
|
27
|
+
|
28
|
+
if attributes["frameName"]
|
29
|
+
# all of these seem to be frames. store in 'frames' array
|
30
|
+
unless attributes["luName"]
|
31
|
+
$stderr.puts "FNCorpusAset warning: cannot determine LU name"
|
32
|
+
$stder.puts aset.to_s
|
33
|
+
return
|
34
|
+
end
|
35
|
+
@aset_type = "frame"
|
36
|
+
@frame_name = attributes["frameName"]
|
37
|
+
@lu = attributes["luName"]
|
38
|
+
|
39
|
+
unless (layers = aset.first_child_matching("layers"))
|
40
|
+
$stderr.puts "FNCorpusAset warning: unexpectedly no layers found"
|
41
|
+
$stderr.puts aset.to_s
|
42
|
+
return
|
43
|
+
end
|
44
|
+
|
45
|
+
layers.each_child_matching("layer") { |l| analyze_layer(l, charidx) }
|
46
|
+
|
47
|
+
else
|
48
|
+
# all we seem to get here are named entity labels.
|
49
|
+
@aset_type = "NER"
|
50
|
+
|
51
|
+
unless (layers = aset.first_child_matching("layers"))
|
52
|
+
$stderr.puts "FNCorpusAset Warning: unexpectedly no layers found"
|
53
|
+
$stderr.puts aset.to_s
|
54
|
+
return
|
55
|
+
end
|
56
|
+
unless (layer = layers.first_child_matching("layer"))
|
57
|
+
$stderr.puts "FNCorpusAset Warning: unexpectedly no layers found"
|
58
|
+
$stderr.puts aset.to_s
|
59
|
+
return
|
60
|
+
end
|
61
|
+
|
62
|
+
unless layer.attributes["name"] == "NER"
|
63
|
+
$stderr.puts "FNCorpusAset Warning: unexpected layer #{layer.attributes["name"]}, was expecting only an NER layer."
|
64
|
+
$stderr.puts aset.to_s
|
65
|
+
return
|
66
|
+
end
|
67
|
+
|
68
|
+
analyze_layer(layer, charidx)
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
#############
|
75
|
+
# input: <layer> RegXML object
|
76
|
+
# analyze this, put into @layers data structure
|
77
|
+
def analyze_layer(layer, # RegXML object
|
78
|
+
charidx) # array:int*int pairs start/end index of words
|
79
|
+
layer_name = layer.attributes["name"]
|
80
|
+
unless layer_name
|
81
|
+
$stderr.puts "FNCorpusAset warning: cannot determine layer name"
|
82
|
+
$stderr.puts layer.to_s
|
83
|
+
return
|
84
|
+
end
|
85
|
+
|
86
|
+
# FN-specific: skip 2nd layer FEs for now
|
87
|
+
if layer_name == "FE" and layer.attributes["rank"] == "2"
|
88
|
+
return
|
89
|
+
end
|
90
|
+
|
91
|
+
unless @layers[layer_name]
|
92
|
+
@layers[layer_name] = {}
|
93
|
+
end
|
94
|
+
|
95
|
+
unless (labels = layer.first_child_matching("labels"))
|
96
|
+
# nothing to record for this layer
|
97
|
+
return
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
# taking over much of analyse_layer from class FrameXML
|
102
|
+
thisLayer = []
|
103
|
+
|
104
|
+
labels.each_child_matching("label") { |label|
|
105
|
+
attributes = label.attributes
|
106
|
+
if attributes["itype"] =~ /NI/
|
107
|
+
# null instantiation, ignore
|
108
|
+
next
|
109
|
+
end
|
110
|
+
|
111
|
+
if not(attributes["start"]) and not(attributes["end"])
|
112
|
+
# no start and end labels
|
113
|
+
next
|
114
|
+
end
|
115
|
+
thisLayer << [attributes["name"], attributes["start"].to_i, attributes["end"].to_i]
|
116
|
+
}
|
117
|
+
|
118
|
+
# sanity check: do indices
|
119
|
+
# match word start and end indices?
|
120
|
+
thisLayer = verify_annotation(thisLayer, charidx)
|
121
|
+
|
122
|
+
# sanity check: verify that
|
123
|
+
# we don't have overlapping labels
|
124
|
+
|
125
|
+
deleteHash = {} # keep track of the labels which are to be deleted
|
126
|
+
# i -> Boolean
|
127
|
+
|
128
|
+
thisLayer.each_index {|i|
|
129
|
+
# efficiency: skip already delete labels
|
130
|
+
if deleteHash[i]
|
131
|
+
next
|
132
|
+
end
|
133
|
+
this_label, this_from , this_to = thisLayer[i]
|
134
|
+
|
135
|
+
# compare with all remaining labels
|
136
|
+
(i+1..thisLayer.length-1).to_a.each { |other_i|
|
137
|
+
other_label,other_from,other_to = thisLayer[other_i]
|
138
|
+
|
139
|
+
# overlap? Throw out the later FE
|
140
|
+
if this_from <= other_from and other_from <= this_to
|
141
|
+
$stderr.puts "Warning: Label overlap, deleting #{other_label}"
|
142
|
+
deleteHash[other_i] = true
|
143
|
+
elsif this_from <= other_to and other_to <= this_to
|
144
|
+
$stderr.puts "Warning: Label overlap, deleting #{this_label}"
|
145
|
+
delete_hash[i] = true
|
146
|
+
end
|
147
|
+
}
|
148
|
+
# matched with all other labels. If "keep", return
|
149
|
+
|
150
|
+
if deleteHash[i]
|
151
|
+
# $stderr.puts " deleting entry #{i}"
|
152
|
+
else
|
153
|
+
[ [this_from, "start"], [this_to, "stop"]].each { |offset, start_or_stop|
|
154
|
+
unless @layers[layer_name].has_key?([offset, start_or_stop])
|
155
|
+
@layers[layer_name][[offset, start_or_stop]] = []
|
156
|
+
end
|
157
|
+
@layers[layer_name][ [offset, start_or_stop] ] << this_label
|
158
|
+
}
|
159
|
+
end
|
160
|
+
}
|
161
|
+
end
|
162
|
+
|
163
|
+
##############3
|
164
|
+
# verify found triples label/from_index/to_index
|
165
|
+
# against given start/end indices of words
|
166
|
+
#
|
167
|
+
# returns: triples, possibly changed
|
168
|
+
def verify_annotation(found, # array: label/from/to, string*int*int
|
169
|
+
charidx) # array: from/to, int*int
|
170
|
+
|
171
|
+
return found.map {|element, start, stop|
|
172
|
+
|
173
|
+
newstart = start
|
174
|
+
newstop = stop
|
175
|
+
|
176
|
+
# compare against word start/stop indices
|
177
|
+
charidx.each_index{|j|
|
178
|
+
unless j== 0
|
179
|
+
pstartidx, pstopidx = charidx[j-1]
|
180
|
+
end
|
181
|
+
startidx, stopidx = charidx[j]
|
182
|
+
|
183
|
+
if (start > startidx and start <= stopidx) or
|
184
|
+
(j != 0 and start > pstopidx and start < startidx)
|
185
|
+
newstart = startidx
|
186
|
+
end
|
187
|
+
|
188
|
+
if (stop >= startidx and stop < stopidx)
|
189
|
+
newstop = stopidx
|
190
|
+
elsif (j != 0 and stop > pstopidx and stop < startidx)
|
191
|
+
newstop = pstopidx
|
192
|
+
end
|
193
|
+
}
|
194
|
+
|
195
|
+
# change?
|
196
|
+
if start != newstart or stop != newstop
|
197
|
+
# report change
|
198
|
+
$stderr.puts "FNCorpusXML warning: Heuristics has changed element "+element
|
199
|
+
$stderr.puts "\tfrom ["+[start,stop].join(",")+"] to ["+[newstart,newstop].join(",")+"]"
|
200
|
+
|
201
|
+
[element, newstart, newstop]
|
202
|
+
|
203
|
+
else
|
204
|
+
|
205
|
+
[element, start, stop]
|
206
|
+
end
|
207
|
+
}
|
208
|
+
end
|
209
|
+
end
|