shalmaneser-lib 1.2.rc5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +122 -0
- data/lib/configuration/config_data.rb +457 -0
- data/lib/configuration/config_format_element.rb +210 -0
- data/lib/configuration/configuration_error.rb +15 -0
- data/lib/configuration/external_config_data.rb +56 -0
- data/lib/configuration/frappe_config_data.rb +134 -0
- data/lib/configuration/fred_config_data.rb +199 -0
- data/lib/configuration/rosy_config_data.rb +126 -0
- data/lib/db/db_interface.rb +50 -0
- data/lib/db/db_mysql.rb +141 -0
- data/lib/db/db_sqlite.rb +280 -0
- data/lib/db/db_table.rb +237 -0
- data/lib/db/db_view.rb +416 -0
- data/lib/db/db_wrapper.rb +175 -0
- data/lib/db/select_table_and_columns.rb +10 -0
- data/lib/db/sql_query.rb +243 -0
- data/lib/definitions.rb +19 -0
- data/lib/eval.rb +482 -0
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/external_systems.rb +251 -0
- data/lib/framenet_format/fn_corpus_aset.rb +209 -0
- data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
- data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
- data/lib/framenet_format/fn_database.rb +143 -0
- data/lib/framenet_format/frame_xml_file.rb +104 -0
- data/lib/framenet_format/frame_xml_sentence.rb +411 -0
- data/lib/logging.rb +25 -0
- data/lib/ml/classifier.rb +189 -0
- data/lib/ml/mallet.rb +236 -0
- data/lib/ml/maxent.rb +229 -0
- data/lib/ml/optimize.rb +195 -0
- data/lib/ml/timbl.rb +140 -0
- data/lib/monkey_patching/array.rb +82 -0
- data/lib/monkey_patching/enumerable_bool.rb +24 -0
- data/lib/monkey_patching/enumerable_distribute.rb +18 -0
- data/lib/monkey_patching/file.rb +131 -0
- data/lib/monkey_patching/subsumed.rb +24 -0
- data/lib/ruby_class_extensions.rb +4 -0
- data/lib/salsa_tiger_xml/corpus.rb +24 -0
- data/lib/salsa_tiger_xml/fe_node.rb +98 -0
- data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
- data/lib/salsa_tiger_xml/frame_node.rb +145 -0
- data/lib/salsa_tiger_xml/graph_node.rb +347 -0
- data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
- data/lib/salsa_tiger_xml/sem_node.rb +58 -0
- data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
- data/lib/salsa_tiger_xml/syn_node.rb +169 -0
- data/lib/salsa_tiger_xml/tree_node.rb +59 -0
- data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
- data/lib/salsa_tiger_xml/usp_node.rb +72 -0
- data/lib/salsa_tiger_xml/xml_node.rb +163 -0
- data/lib/shalmaneser/lib.rb +1 -0
- data/lib/tabular_format/fn_tab_format_file.rb +38 -0
- data/lib/tabular_format/fn_tab_frame.rb +67 -0
- data/lib/tabular_format/fn_tab_sentence.rb +169 -0
- data/lib/tabular_format/tab_format_file.rb +91 -0
- data/lib/tabular_format/tab_format_named_args.rb +184 -0
- data/lib/tabular_format/tab_format_sentence.rb +119 -0
- data/lib/value_restriction.rb +49 -0
- metadata +131 -0
Binary file
|
Binary file
|
@@ -0,0 +1,251 @@
|
|
1
|
+
# ExternalSystems.rb
|
2
|
+
#
|
3
|
+
# ke oct/nov 2005
|
4
|
+
#
|
5
|
+
# Store all known interfaces to
|
6
|
+
# systems that do syntactic analysis
|
7
|
+
#
|
8
|
+
# Given the name of a system and the service that the
|
9
|
+
# system performs, return the appropriate interface
|
10
|
+
#
|
11
|
+
# There are two types of interfaces to syntactic analysis systems:
|
12
|
+
# - interfaces:
|
13
|
+
# offer methods for syntactic analysis,
|
14
|
+
# and the transformation to Salsa/Tiger XML and SalsaTigerSentence objects
|
15
|
+
# - interpreters:
|
16
|
+
# interpret the resulting Salsa/Tiger XML (represented as
|
17
|
+
# SalsaTigerSentence and SynNode objects), e.g.
|
18
|
+
# generalize over part of speech;
|
19
|
+
# describe the path between a pair of nodes both as a path
|
20
|
+
# and (potentially) as a grammatical function of one of the nodes;
|
21
|
+
# determine whether a node describes a verb, and in which voice;
|
22
|
+
# determine the head of a constituent
|
23
|
+
#
|
24
|
+
# Abstract classes for both interfaces and interpreters
|
25
|
+
# are in AbstractSynInterface.rb
|
26
|
+
|
27
|
+
require "ruby_class_extensions"
|
28
|
+
require 'logging'
|
29
|
+
|
30
|
+
# The list of available interface packages
|
31
|
+
# is at the end of this file.
|
32
|
+
# Please enter additional interfaces there.
|
33
|
+
|
34
|
+
# @todo AB: [2015-12-16 Wed 01:03]
|
35
|
+
# After decoupling in OptParser and ConfigData classes move this
|
36
|
+
# to Frappe.
|
37
|
+
module Shalmaneser
|
38
|
+
class ExternalSystems
|
39
|
+
###
|
40
|
+
# class variable:
|
41
|
+
# list of all known interface classes
|
42
|
+
# add to it using add_interface()
|
43
|
+
@interfaces = []
|
44
|
+
|
45
|
+
###
|
46
|
+
# class variable:
|
47
|
+
# list of all known interpreter classes
|
48
|
+
# add to it using add_interpreter()
|
49
|
+
@interpreters = []
|
50
|
+
|
51
|
+
###
|
52
|
+
# add interface/interpreter
|
53
|
+
def self.add_interface(class_name)
|
54
|
+
LOGGER.debug "Initializing interface <#{class_name}>."
|
55
|
+
@interfaces << class_name
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.add_interpreter(class_name)
|
59
|
+
LOGGER.debug "Initializing interpreter <#{class_name}>."
|
60
|
+
@interpreters << class_name
|
61
|
+
end
|
62
|
+
|
63
|
+
###
|
64
|
+
# check_interfaces_abort_if_missing:
|
65
|
+
#
|
66
|
+
# Given an experiment file, use some_system_missing? to
|
67
|
+
# determine whether the system can be run with the requested
|
68
|
+
# syntactic processing, exit with an error message if that is not possible
|
69
|
+
# @param [FrappeConfigData] exp Experiment description.
|
70
|
+
def self.check_interfaces_abort_if_missing(exp)
|
71
|
+
if (missing = some_system_missing?(exp))
|
72
|
+
interwhat, services = missing
|
73
|
+
|
74
|
+
$stderr.puts
|
75
|
+
$stderr.puts "ERROR: I am missing an #{interwhat} for "
|
76
|
+
services.each_pair { |service, system_name|
|
77
|
+
$stderr.puts "\tservice #{service}, system #{system_name}"
|
78
|
+
}
|
79
|
+
$stderr.puts
|
80
|
+
$stderr.puts "I have the following interfaces:"
|
81
|
+
@interfaces.each { |interface_class|
|
82
|
+
$stderr.puts "\tservice #{interface_class.service}, system #{interface_class.system}"
|
83
|
+
}
|
84
|
+
$stderr.puts "I have the following interpreters:"
|
85
|
+
@interpreters.each { |interpreter_class|
|
86
|
+
$stderr.print "\t"
|
87
|
+
$stderr.print interpreter_class.systems.to_a.map { |service, system_name|
|
88
|
+
"service #{service}, system #{system_name}"
|
89
|
+
}.join("; ")
|
90
|
+
unless interpreter_class.optional_systems.empty?
|
91
|
+
$stderr.print ", optional: "
|
92
|
+
$stderr.print interpreter_class.optional_systems.to_a.map { |service, system_name|
|
93
|
+
"service #{service}, system #{system_name}"
|
94
|
+
}.join("; ")
|
95
|
+
end
|
96
|
+
$stderr.puts
|
97
|
+
}
|
98
|
+
$stderr.puts
|
99
|
+
$stderr.puts "Please adapt your experiment file."
|
100
|
+
exit 1
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
###
|
105
|
+
# given the name of a system and the service that it
|
106
|
+
# performs, find the matching interface class
|
107
|
+
#
|
108
|
+
# system: string: name of system, e.g. collins
|
109
|
+
# service: string: service, e.g. parser
|
110
|
+
#
|
111
|
+
# returns: SynInterface class
|
112
|
+
def self.get_interface(service, system)
|
113
|
+
interfaces = @interfaces.select do |interface_class|
|
114
|
+
interface_class.system == system && interface_class.service == service
|
115
|
+
end
|
116
|
+
|
117
|
+
unless interfaces.any?
|
118
|
+
raise "I've been requested an interface for #{service} and #{system}, "\
|
119
|
+
'but I cannot find any. Please correct your experiment files.'
|
120
|
+
end
|
121
|
+
|
122
|
+
# @todo AB: Actually it's bad logic, but no idea for now how to handle it.
|
123
|
+
interfaces.first
|
124
|
+
end
|
125
|
+
|
126
|
+
###
|
127
|
+
# helper for get_interpreter:
|
128
|
+
def self.get_interpreter_according_to_exp(exp)
|
129
|
+
ExternalSystems.get_interpreter(ExternalSystems.requested_services(exp))
|
130
|
+
end
|
131
|
+
|
132
|
+
###
|
133
|
+
# given the names and services of a set of systems,
|
134
|
+
# find the matching interpreter class
|
135
|
+
#
|
136
|
+
# an interpreter class has both obligatory systems
|
137
|
+
# (they need to be present for this class to apply)
|
138
|
+
# and optional systems (they may or may not be present
|
139
|
+
# for the class to apply, but no other system performing
|
140
|
+
# the same service may)
|
141
|
+
#
|
142
|
+
# systems:
|
143
|
+
# hash: service(string) -> system name(string)
|
144
|
+
#
|
145
|
+
# returns: SynInterpreter class
|
146
|
+
def self.get_interpreter(systems)
|
147
|
+
# try to find an interface class with the given
|
148
|
+
# service-name pairs
|
149
|
+
|
150
|
+
@interpreters.each { |interpreter_class|
|
151
|
+
|
152
|
+
if interpreter_class.systems.to_a.big_and { |service, system|
|
153
|
+
# all obligatory entries of interpreter_class
|
154
|
+
# are in systems
|
155
|
+
systems[service] == system
|
156
|
+
} and
|
157
|
+
interpreter_class.optional_systems.to_a.big_and { |service, system|
|
158
|
+
# all optional entries of interpreter_class are
|
159
|
+
# either in systems, or the service isn't in systems at all
|
160
|
+
systems[service].nil? or systems[service] == system
|
161
|
+
} and
|
162
|
+
systems.to_a.big_and { |service, system|
|
163
|
+
# all entries in names are in either
|
164
|
+
# the obligatory or optional set for interpreter_class
|
165
|
+
interpreter_class.systems[service] == system or
|
166
|
+
interpreter_class.optional_systems[service] == system
|
167
|
+
}
|
168
|
+
return interpreter_class
|
169
|
+
end
|
170
|
+
}
|
171
|
+
|
172
|
+
# at this point, detection of a suitable interpreter class has failed
|
173
|
+
return nil
|
174
|
+
end
|
175
|
+
|
176
|
+
################
|
177
|
+
private
|
178
|
+
|
179
|
+
###
|
180
|
+
# knows about possible services that can be set in
|
181
|
+
# the experiment file, and where the names of
|
182
|
+
# the matching systems will be found in the experiment file data structure
|
183
|
+
#
|
184
|
+
# WARNING: adapt this when you introduce new services!
|
185
|
+
#
|
186
|
+
# returns: a hash
|
187
|
+
# <service> => system_name
|
188
|
+
#
|
189
|
+
# such that for each service/system name pair:
|
190
|
+
# the service with the given name has been requested in
|
191
|
+
# the experiment file, and the names of the systems to be used
|
192
|
+
# for performing the service
|
193
|
+
def self.requested_services(exp)
|
194
|
+
services = {}
|
195
|
+
[
|
196
|
+
{"flag" => "do_postag", "service" => "pos_tagger"},
|
197
|
+
{"flag" => "do_lemmatize", "service" => "lemmatizer"},
|
198
|
+
{"flag" => "do_parse", "service" => "parser"}
|
199
|
+
].each do |hash|
|
200
|
+
# yes, perform this service
|
201
|
+
if exp.get(hash["flag"])
|
202
|
+
services[hash["service"]] = exp.get(hash["service"])
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
services
|
207
|
+
end
|
208
|
+
|
209
|
+
###
|
210
|
+
# some_system_missing?
|
211
|
+
# returns nil if I have interfaces and interpreters
|
212
|
+
# for all services requested in the given experiment file
|
213
|
+
# else:
|
214
|
+
# returns pair [interface or interpreter, info]
|
215
|
+
# where the 1st element is either 'interface' or 'interpreter',
|
216
|
+
# and the 2nd element is a hash mapping services to system names:
|
217
|
+
# the services that could not be provided
|
218
|
+
# @param [FrappeConfigdata] exp FrappeConfigData object to check all the systems.
|
219
|
+
def self.some_system_missing?(exp)
|
220
|
+
missing_systems = nil
|
221
|
+
# check interfaces
|
222
|
+
requested_services(exp).each_pair do |service, system_name|
|
223
|
+
unless get_interface(service, system_name)
|
224
|
+
missing_systems = ["interface", {service => system_name}]
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
# check interpreter
|
229
|
+
unless get_interpreter_according_to_exp(exp)
|
230
|
+
missing_systems = ["interpreter", services]
|
231
|
+
end
|
232
|
+
|
233
|
+
# everything okay
|
234
|
+
missing_systems
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
# @todo AB: We should require programmatically all files in
|
240
|
+
# <frappe/interpreters> and <frappe/interfaces>.
|
241
|
+
require 'frappe/interfaces/collins_interface'
|
242
|
+
require 'frappe/interpreters/collins_treetagger_interpreter'
|
243
|
+
require 'frappe/interpreters/collins_tnt_interpreter'
|
244
|
+
require 'frappe/interfaces/berkeley_interface'
|
245
|
+
require 'frappe/interpreters/berkeley_interpreter'
|
246
|
+
require 'frappe/interfaces/stanford_interface'
|
247
|
+
require 'frappe/interpreters/stanford_interpreter'
|
248
|
+
require 'frappe/interfaces/treetagger_interface'
|
249
|
+
require 'frappe/interfaces/treetagger_pos_interface'
|
250
|
+
require 'frappe/interpreters/treetagger_interpreter'
|
251
|
+
require 'frappe/interpreters/empty_interpreter'
|
@@ -0,0 +1,209 @@
|
|
1
|
+
#####################
|
2
|
+
# class to keep data for one frame
|
3
|
+
class FNCorpusAset
|
4
|
+
attr_reader :layers, :aset_type, :aset_id, :frame_name, :lu
|
5
|
+
|
6
|
+
#######
|
7
|
+
# Analyze RegXML object, store in object variables:
|
8
|
+
#
|
9
|
+
# @aset_type: "frame" or "NER"
|
10
|
+
# @frame_name: frame name for "frame" type
|
11
|
+
# @lu: LU for "frame" type
|
12
|
+
# @aset_id: ID of the annotation set
|
13
|
+
# @layers: hash: layer type (FE, GF, PT, Target, NER) -> [offset, "start"/"stop"] -> list of labels
|
14
|
+
# string -> int*string -> array:string
|
15
|
+
#
|
16
|
+
def initialize(aset, #RegXML object
|
17
|
+
charidx) # array of pairs [start index, stop index] int*int
|
18
|
+
|
19
|
+
@layers = {}
|
20
|
+
@frame_name = nil
|
21
|
+
@lu = nil
|
22
|
+
@aset_type = nil
|
23
|
+
|
24
|
+
attributes = aset.attributes
|
25
|
+
|
26
|
+
@aset_id = attributes["ID"]
|
27
|
+
|
28
|
+
if attributes["frameName"]
|
29
|
+
# all of these seem to be frames. store in 'frames' array
|
30
|
+
unless attributes["luName"]
|
31
|
+
$stderr.puts "FNCorpusAset warning: cannot determine LU name"
|
32
|
+
$stder.puts aset.to_s
|
33
|
+
return
|
34
|
+
end
|
35
|
+
@aset_type = "frame"
|
36
|
+
@frame_name = attributes["frameName"]
|
37
|
+
@lu = attributes["luName"]
|
38
|
+
|
39
|
+
unless (layers = aset.first_child_matching("layers"))
|
40
|
+
$stderr.puts "FNCorpusAset warning: unexpectedly no layers found"
|
41
|
+
$stderr.puts aset.to_s
|
42
|
+
return
|
43
|
+
end
|
44
|
+
|
45
|
+
layers.each_child_matching("layer") { |l| analyze_layer(l, charidx) }
|
46
|
+
|
47
|
+
else
|
48
|
+
# all we seem to get here are named entity labels.
|
49
|
+
@aset_type = "NER"
|
50
|
+
|
51
|
+
unless (layers = aset.first_child_matching("layers"))
|
52
|
+
$stderr.puts "FNCorpusAset Warning: unexpectedly no layers found"
|
53
|
+
$stderr.puts aset.to_s
|
54
|
+
return
|
55
|
+
end
|
56
|
+
unless (layer = layers.first_child_matching("layer"))
|
57
|
+
$stderr.puts "FNCorpusAset Warning: unexpectedly no layers found"
|
58
|
+
$stderr.puts aset.to_s
|
59
|
+
return
|
60
|
+
end
|
61
|
+
|
62
|
+
unless layer.attributes["name"] == "NER"
|
63
|
+
$stderr.puts "FNCorpusAset Warning: unexpected layer #{layer.attributes["name"]}, was expecting only an NER layer."
|
64
|
+
$stderr.puts aset.to_s
|
65
|
+
return
|
66
|
+
end
|
67
|
+
|
68
|
+
analyze_layer(layer, charidx)
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
#############
|
75
|
+
# input: <layer> RegXML object
|
76
|
+
# analyze this, put into @layers data structure
|
77
|
+
def analyze_layer(layer, # RegXML object
|
78
|
+
charidx) # array:int*int pairs start/end index of words
|
79
|
+
layer_name = layer.attributes["name"]
|
80
|
+
unless layer_name
|
81
|
+
$stderr.puts "FNCorpusAset warning: cannot determine layer name"
|
82
|
+
$stderr.puts layer.to_s
|
83
|
+
return
|
84
|
+
end
|
85
|
+
|
86
|
+
# FN-specific: skip 2nd layer FEs for now
|
87
|
+
if layer_name == "FE" and layer.attributes["rank"] == "2"
|
88
|
+
return
|
89
|
+
end
|
90
|
+
|
91
|
+
unless @layers[layer_name]
|
92
|
+
@layers[layer_name] = {}
|
93
|
+
end
|
94
|
+
|
95
|
+
unless (labels = layer.first_child_matching("labels"))
|
96
|
+
# nothing to record for this layer
|
97
|
+
return
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
# taking over much of analyse_layer from class FrameXML
|
102
|
+
thisLayer = []
|
103
|
+
|
104
|
+
labels.each_child_matching("label") { |label|
|
105
|
+
attributes = label.attributes
|
106
|
+
if attributes["itype"] =~ /NI/
|
107
|
+
# null instantiation, ignore
|
108
|
+
next
|
109
|
+
end
|
110
|
+
|
111
|
+
if not(attributes["start"]) and not(attributes["end"])
|
112
|
+
# no start and end labels
|
113
|
+
next
|
114
|
+
end
|
115
|
+
thisLayer << [attributes["name"], attributes["start"].to_i, attributes["end"].to_i]
|
116
|
+
}
|
117
|
+
|
118
|
+
# sanity check: do indices
|
119
|
+
# match word start and end indices?
|
120
|
+
thisLayer = verify_annotation(thisLayer, charidx)
|
121
|
+
|
122
|
+
# sanity check: verify that
|
123
|
+
# we don't have overlapping labels
|
124
|
+
|
125
|
+
deleteHash = {} # keep track of the labels which are to be deleted
|
126
|
+
# i -> Boolean
|
127
|
+
|
128
|
+
thisLayer.each_index {|i|
|
129
|
+
# efficiency: skip already delete labels
|
130
|
+
if deleteHash[i]
|
131
|
+
next
|
132
|
+
end
|
133
|
+
this_label, this_from , this_to = thisLayer[i]
|
134
|
+
|
135
|
+
# compare with all remaining labels
|
136
|
+
(i+1..thisLayer.length-1).to_a.each { |other_i|
|
137
|
+
other_label,other_from,other_to = thisLayer[other_i]
|
138
|
+
|
139
|
+
# overlap? Throw out the later FE
|
140
|
+
if this_from <= other_from and other_from <= this_to
|
141
|
+
$stderr.puts "Warning: Label overlap, deleting #{other_label}"
|
142
|
+
deleteHash[other_i] = true
|
143
|
+
elsif this_from <= other_to and other_to <= this_to
|
144
|
+
$stderr.puts "Warning: Label overlap, deleting #{this_label}"
|
145
|
+
delete_hash[i] = true
|
146
|
+
end
|
147
|
+
}
|
148
|
+
# matched with all other labels. If "keep", return
|
149
|
+
|
150
|
+
if deleteHash[i]
|
151
|
+
# $stderr.puts " deleting entry #{i}"
|
152
|
+
else
|
153
|
+
[ [this_from, "start"], [this_to, "stop"]].each { |offset, start_or_stop|
|
154
|
+
unless @layers[layer_name].has_key?([offset, start_or_stop])
|
155
|
+
@layers[layer_name][[offset, start_or_stop]] = []
|
156
|
+
end
|
157
|
+
@layers[layer_name][ [offset, start_or_stop] ] << this_label
|
158
|
+
}
|
159
|
+
end
|
160
|
+
}
|
161
|
+
end
|
162
|
+
|
163
|
+
##############3
|
164
|
+
# verify found triples label/from_index/to_index
|
165
|
+
# against given start/end indices of words
|
166
|
+
#
|
167
|
+
# returns: triples, possibly changed
|
168
|
+
def verify_annotation(found, # array: label/from/to, string*int*int
|
169
|
+
charidx) # array: from/to, int*int
|
170
|
+
|
171
|
+
return found.map {|element, start, stop|
|
172
|
+
|
173
|
+
newstart = start
|
174
|
+
newstop = stop
|
175
|
+
|
176
|
+
# compare against word start/stop indices
|
177
|
+
charidx.each_index{|j|
|
178
|
+
unless j== 0
|
179
|
+
pstartidx, pstopidx = charidx[j-1]
|
180
|
+
end
|
181
|
+
startidx, stopidx = charidx[j]
|
182
|
+
|
183
|
+
if (start > startidx and start <= stopidx) or
|
184
|
+
(j != 0 and start > pstopidx and start < startidx)
|
185
|
+
newstart = startidx
|
186
|
+
end
|
187
|
+
|
188
|
+
if (stop >= startidx and stop < stopidx)
|
189
|
+
newstop = stopidx
|
190
|
+
elsif (j != 0 and stop > pstopidx and stop < startidx)
|
191
|
+
newstop = pstopidx
|
192
|
+
end
|
193
|
+
}
|
194
|
+
|
195
|
+
# change?
|
196
|
+
if start != newstart or stop != newstop
|
197
|
+
# report change
|
198
|
+
$stderr.puts "FNCorpusXML warning: Heuristics has changed element "+element
|
199
|
+
$stderr.puts "\tfrom ["+[start,stop].join(",")+"] to ["+[newstart,newstop].join(",")+"]"
|
200
|
+
|
201
|
+
[element, newstart, newstop]
|
202
|
+
|
203
|
+
else
|
204
|
+
|
205
|
+
[element, start, stop]
|
206
|
+
end
|
207
|
+
}
|
208
|
+
end
|
209
|
+
end
|