shalmaneser-lib 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +122 -0
- data/lib/configuration/config_data.rb +457 -0
- data/lib/configuration/config_format_element.rb +210 -0
- data/lib/configuration/configuration_error.rb +15 -0
- data/lib/configuration/external_config_data.rb +56 -0
- data/lib/configuration/frappe_config_data.rb +134 -0
- data/lib/configuration/fred_config_data.rb +199 -0
- data/lib/configuration/rosy_config_data.rb +126 -0
- data/lib/db/db_interface.rb +50 -0
- data/lib/db/db_mysql.rb +141 -0
- data/lib/db/db_sqlite.rb +280 -0
- data/lib/db/db_table.rb +237 -0
- data/lib/db/db_view.rb +416 -0
- data/lib/db/db_wrapper.rb +175 -0
- data/lib/db/select_table_and_columns.rb +10 -0
- data/lib/db/sql_query.rb +243 -0
- data/lib/definitions.rb +19 -0
- data/lib/eval.rb +482 -0
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/external_systems.rb +251 -0
- data/lib/framenet_format/fn_corpus_aset.rb +209 -0
- data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
- data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
- data/lib/framenet_format/fn_database.rb +143 -0
- data/lib/framenet_format/frame_xml_file.rb +104 -0
- data/lib/framenet_format/frame_xml_sentence.rb +411 -0
- data/lib/logging.rb +25 -0
- data/lib/ml/classifier.rb +189 -0
- data/lib/ml/mallet.rb +236 -0
- data/lib/ml/maxent.rb +229 -0
- data/lib/ml/optimize.rb +195 -0
- data/lib/ml/timbl.rb +140 -0
- data/lib/monkey_patching/array.rb +82 -0
- data/lib/monkey_patching/enumerable_bool.rb +24 -0
- data/lib/monkey_patching/enumerable_distribute.rb +18 -0
- data/lib/monkey_patching/file.rb +131 -0
- data/lib/monkey_patching/subsumed.rb +24 -0
- data/lib/ruby_class_extensions.rb +4 -0
- data/lib/salsa_tiger_xml/corpus.rb +24 -0
- data/lib/salsa_tiger_xml/fe_node.rb +98 -0
- data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
- data/lib/salsa_tiger_xml/frame_node.rb +145 -0
- data/lib/salsa_tiger_xml/graph_node.rb +347 -0
- data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
- data/lib/salsa_tiger_xml/sem_node.rb +58 -0
- data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
- data/lib/salsa_tiger_xml/syn_node.rb +169 -0
- data/lib/salsa_tiger_xml/tree_node.rb +59 -0
- data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
- data/lib/salsa_tiger_xml/usp_node.rb +72 -0
- data/lib/salsa_tiger_xml/xml_node.rb +163 -0
- data/lib/shalmaneser/lib.rb +1 -0
- data/lib/tabular_format/fn_tab_format_file.rb +38 -0
- data/lib/tabular_format/fn_tab_frame.rb +67 -0
- data/lib/tabular_format/fn_tab_sentence.rb +169 -0
- data/lib/tabular_format/tab_format_file.rb +91 -0
- data/lib/tabular_format/tab_format_named_args.rb +184 -0
- data/lib/tabular_format/tab_format_sentence.rb +119 -0
- data/lib/value_restriction.rb +49 -0
- metadata +131 -0
@@ -0,0 +1,333 @@
|
|
1
|
+
require_relative 'xml_node'
|
2
|
+
require_relative 'string_terminals_in_right_order'
|
3
|
+
require_relative 'reg_xml'
|
4
|
+
|
5
|
+
module STXML
|
6
|
+
#############
|
7
|
+
class SalsaTigerSentenceGraph < XMLNode
|
8
|
+
include StringTerminalsInRightOrder
|
9
|
+
|
10
|
+
attr_reader :node
|
11
|
+
|
12
|
+
def initialize(xml_obj, # RegXML object
|
13
|
+
sentence_id) # string: ID of this sentence
|
14
|
+
|
15
|
+
# global data:
|
16
|
+
# node: hash node_id -> XMLNode object
|
17
|
+
# maps node IDs to the nodes with that ID
|
18
|
+
@node = {}
|
19
|
+
@sentence_id = sentence_id
|
20
|
+
|
21
|
+
if xml_obj
|
22
|
+
# we actually have syntactic information.
|
23
|
+
# read it.
|
24
|
+
|
25
|
+
# initialize this object as an XML node,
|
26
|
+
# i.e. remember the outermost element's name, attributes,
|
27
|
+
# and ID, and specify that it's not a text but an XML object
|
28
|
+
super(xml_obj.name, xml_obj.attributes, sentence_id + "_graph", false)
|
29
|
+
|
30
|
+
# initialize nodes, remember their IDs
|
31
|
+
xml_obj.children_and_text.each { |child_or_text|
|
32
|
+
|
33
|
+
case child_or_text.name
|
34
|
+
when "terminals"
|
35
|
+
make_nodes(child_or_text, "t", "s/graph/terminals", "all_children_kith")
|
36
|
+
when "nonterminals"
|
37
|
+
make_nodes(child_or_text, "nt", "s/graph/nonterminals")
|
38
|
+
else
|
39
|
+
# additional info that we don't need for now
|
40
|
+
# keep for output
|
41
|
+
add_kith(child_or_text)
|
42
|
+
end
|
43
|
+
}
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
# add edges between nodes
|
48
|
+
nonterminals = xml_obj.children_and_text.detect { |child| child.name == "nonterminals" }
|
49
|
+
if nonterminals
|
50
|
+
nonterminals.children_and_text.each { |nt|
|
51
|
+
|
52
|
+
unless nt.name == "nt"
|
53
|
+
# we've already done the warning bit in make_nodes
|
54
|
+
next
|
55
|
+
end
|
56
|
+
|
57
|
+
syn_add_children(@node[SalsaTigerXmlNode.xmlel_id(nt)], nt)
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
else
|
62
|
+
# we have no syntactic information
|
63
|
+
# record it anyway
|
64
|
+
|
65
|
+
super("graph", {}, sentence_id + "_graph", false)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
###
|
71
|
+
def add_splitwords(xml_obj) #RegXMl object
|
72
|
+
unless xml_obj.nil?
|
73
|
+
# splitwords is an XML element with name "splitwords" and
|
74
|
+
# children named "splitword", each of which describes a split
|
75
|
+
# for one of the terminals we already know
|
76
|
+
xml_obj.children_and_text.each { |splitword|
|
77
|
+
unless splitword.name == "splitword"
|
78
|
+
warn_child_ignored("s/sem/splitwords/", splitword)
|
79
|
+
next
|
80
|
+
end
|
81
|
+
|
82
|
+
# make nodes for the splitword parts
|
83
|
+
make_nodes(splitword, "part", "s/sem/splitwords/splitword", "all_children_kith")
|
84
|
+
|
85
|
+
# this is the terminal that is being split:
|
86
|
+
# add links to its new children
|
87
|
+
syn_add_children(@node[SalsaTigerXmlNode.xmlel_id(splitword)], splitword)
|
88
|
+
}
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
###
|
93
|
+
def to_s
|
94
|
+
string_for_nodes(syn_roots)
|
95
|
+
end
|
96
|
+
|
97
|
+
###
|
98
|
+
def get
|
99
|
+
# make sure that the graph element has a 'root' attribute
|
100
|
+
# since the Salsa tool needs this
|
101
|
+
set_attribute("root", syn_roots.first.id)
|
102
|
+
super()
|
103
|
+
end
|
104
|
+
|
105
|
+
#####
|
106
|
+
# access methods
|
107
|
+
|
108
|
+
###
|
109
|
+
def each_node
|
110
|
+
@node.each_value { |n|
|
111
|
+
yield n
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
###
|
116
|
+
def nodes
|
117
|
+
return @node.values
|
118
|
+
end
|
119
|
+
|
120
|
+
###
|
121
|
+
def each_terminal
|
122
|
+
@node.each_value { |node|
|
123
|
+
if node.is_terminal?
|
124
|
+
yield node
|
125
|
+
end
|
126
|
+
}
|
127
|
+
end
|
128
|
+
|
129
|
+
###
|
130
|
+
def each_terminal_sorted
|
131
|
+
sort_terminals_and_splitwords_left_to_right(terminals).each { |node_obj|
|
132
|
+
yield node_obj
|
133
|
+
}
|
134
|
+
end
|
135
|
+
|
136
|
+
###
|
137
|
+
def terminals
|
138
|
+
return @node.values.select { |node| node.is_terminal? }
|
139
|
+
end
|
140
|
+
|
141
|
+
###
|
142
|
+
def terminals_sorted
|
143
|
+
return sort_terminals_and_splitwords_left_to_right(terminals)
|
144
|
+
end
|
145
|
+
|
146
|
+
###
|
147
|
+
def each_nonterminal
|
148
|
+
@node.each_value { |node|
|
149
|
+
if node.is_nonterminal?
|
150
|
+
yield node
|
151
|
+
end
|
152
|
+
}
|
153
|
+
end
|
154
|
+
|
155
|
+
###
|
156
|
+
def nonterminals
|
157
|
+
return @node.values.select { |node| node.is_nonterminal? }
|
158
|
+
end
|
159
|
+
|
160
|
+
###
|
161
|
+
def syn_roots
|
162
|
+
return @node.values.select { |node|
|
163
|
+
node.parent.nil?
|
164
|
+
}
|
165
|
+
end
|
166
|
+
###
|
167
|
+
|
168
|
+
######################3
|
169
|
+
# adding nodes
|
170
|
+
|
171
|
+
###
|
172
|
+
def add_child(arg1, arg2, varhash={})
|
173
|
+
raise "Not implemented for this class"
|
174
|
+
end
|
175
|
+
|
176
|
+
###
|
177
|
+
def remove_child(arg1, arg2, varhash={})
|
178
|
+
raise "Not implemented for this class"
|
179
|
+
end
|
180
|
+
|
181
|
+
###
|
182
|
+
def add_node(sentid, # string: sentence ID
|
183
|
+
label, # string: t or nt
|
184
|
+
cat = nil, # string: category
|
185
|
+
word = nil,# string: word
|
186
|
+
pos = nil, # string: part of speech
|
187
|
+
syn_id = nil) # string: ID for the new node
|
188
|
+
|
189
|
+
unless ["t", "nt"].include? label
|
190
|
+
raise "Unknown node label #{label} for new syntactic node. Must be either t or nt."
|
191
|
+
end
|
192
|
+
|
193
|
+
# make node ID: sentence ID plus ID generated by system time
|
194
|
+
if syn_id
|
195
|
+
new_id = sentid + "_" + syn_id
|
196
|
+
else
|
197
|
+
new_id = sentid + "_" + Time.new.to_f.to_s
|
198
|
+
end
|
199
|
+
|
200
|
+
elt = "<#{label}"
|
201
|
+
[["id", new_id], ["cat", cat], ["word", word], ["pos", pos]].each { |lbl, content|
|
202
|
+
if content
|
203
|
+
elt << " #{lbl}=\"#{xml_secure_val(content)}\""
|
204
|
+
end
|
205
|
+
}
|
206
|
+
elt << "/>"
|
207
|
+
n = SynNode.new(RegXML.new(elt))
|
208
|
+
@node[n.id] = n
|
209
|
+
|
210
|
+
return n
|
211
|
+
end
|
212
|
+
|
213
|
+
###
|
214
|
+
def remove_node(node) # SynNode
|
215
|
+
# remove node from list
|
216
|
+
@node.delete(node.id)
|
217
|
+
|
218
|
+
# remove it as child and parent of other nodes;
|
219
|
+
# add its own children to the parent.
|
220
|
+
# the _edgelabel_ of the new edges will be the edgeslabels
|
221
|
+
# between the original node in its children
|
222
|
+
# in other words, the label of the removed node's incoming edge
|
223
|
+
# is deleted
|
224
|
+
|
225
|
+
pair = node.parent_with_edgelabel
|
226
|
+
if pair
|
227
|
+
# delete incoming edge for deleted node
|
228
|
+
label, parent = pair
|
229
|
+
parent.remove_child(node, label)
|
230
|
+
end
|
231
|
+
# delete outgoing edge for deleted node
|
232
|
+
node.each_child_with_edgelabel { |lbl, child| child.remove_parent(node, lbl) }
|
233
|
+
# glue deleted node's children to its parent
|
234
|
+
if pair
|
235
|
+
_plabel, parent = pair
|
236
|
+
node.each_child_with_edgelabel { |clabel, child| parent.add_child(child, clabel) }
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
######################
|
241
|
+
protected
|
242
|
+
|
243
|
+
###
|
244
|
+
def get_xml_ofchildren
|
245
|
+
string = ""
|
246
|
+
|
247
|
+
string << "<terminals>\n"
|
248
|
+
each_terminal_sorted { |t|
|
249
|
+
string << t.get
|
250
|
+
}
|
251
|
+
string << "</terminals>\n"
|
252
|
+
|
253
|
+
string << "<nonterminals>\n"
|
254
|
+
each_nonterminal { |nt|
|
255
|
+
string << nt.get
|
256
|
+
}
|
257
|
+
string << "</nonterminals>\n"
|
258
|
+
|
259
|
+
return string
|
260
|
+
|
261
|
+
end
|
262
|
+
|
263
|
+
def make_nodes(xml_obj, # RegXML object
|
264
|
+
expected_obj_name, # string
|
265
|
+
where, # string
|
266
|
+
all_children_kith = nil) # object: if non-nil,
|
267
|
+
# keep all children of the new nodes
|
268
|
+
# as kith"
|
269
|
+
|
270
|
+
xml_obj.children_and_text.each { |elt|
|
271
|
+
|
272
|
+
if elt.name == expected_obj_name
|
273
|
+
# this is the kind of child we were expecting to see
|
274
|
+
n = SynNode.new(elt)
|
275
|
+
@node[n.id] = n
|
276
|
+
|
277
|
+
if all_children_kith
|
278
|
+
elt.children_and_text.each { |elt_child|
|
279
|
+
n.add_kith(elt_child)
|
280
|
+
}
|
281
|
+
end
|
282
|
+
|
283
|
+
else
|
284
|
+
warn_child_ignored(where, elt)
|
285
|
+
end
|
286
|
+
}
|
287
|
+
end
|
288
|
+
|
289
|
+
def syn_add_children(node,
|
290
|
+
xml_obj)
|
291
|
+
unless node
|
292
|
+
raise "Shouldn't be here"
|
293
|
+
end
|
294
|
+
|
295
|
+
xml_obj.children_and_text.each { |edge|
|
296
|
+
|
297
|
+
if ["edge", "part"].include? edge.name
|
298
|
+
|
299
|
+
# add an edge to this child,
|
300
|
+
# retrieve the node with the given ID from id_to_node
|
301
|
+
child = @node[SalsaTigerXmlNode.xmlel_id(edge)]
|
302
|
+
unless child
|
303
|
+
raise "Sentence #{@sentence_id}: I cannot find a node for " + edge.to_s
|
304
|
+
end
|
305
|
+
|
306
|
+
edgelabel = edge.attributes["label"]
|
307
|
+
node.add_child(child, edgelabel)
|
308
|
+
|
309
|
+
elsif edge.name == "other_edge"
|
310
|
+
# add link to this node,
|
311
|
+
# retrieve the node with the given ID from id_to_node
|
312
|
+
child = @node[SalsaTigerXmlNode.xmlel_id(edge)]
|
313
|
+
unless child
|
314
|
+
raise "Sentence #{@sentence_id}: I cannot find a node for other_edge #{SalsaTigerXmlNode.xmlel_id(edge)} : " + edge.to_s
|
315
|
+
end
|
316
|
+
|
317
|
+
attributes = edge.attributes
|
318
|
+
if attributes
|
319
|
+
edgelabel = attributes.delete("label")
|
320
|
+
else
|
321
|
+
edgelabel = nil
|
322
|
+
end
|
323
|
+
node.add_link(child, edgelabel, attributes)
|
324
|
+
|
325
|
+
else
|
326
|
+
# something other than an edge
|
327
|
+
# keep for output
|
328
|
+
node.add_kith(edge)
|
329
|
+
end
|
330
|
+
}
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
@@ -0,0 +1,438 @@
|
|
1
|
+
require_relative 'xml_node'
|
2
|
+
require_relative 'ts_syn_node'
|
3
|
+
require_relative 'salsa_tiger_xml_node'
|
4
|
+
require_relative 'usp_node'
|
5
|
+
require_relative 'frame_node'
|
6
|
+
require_relative 'fe_node'
|
7
|
+
require_relative 'reg_xml'
|
8
|
+
|
9
|
+
module STXML
|
10
|
+
#############
|
11
|
+
class SalsaTigerSentenceSem < XMLNode
|
12
|
+
|
13
|
+
attr_reader :node
|
14
|
+
|
15
|
+
###
|
16
|
+
def SalsaTigerSentenceSem.get_splitwords(xml_obj)
|
17
|
+
return xml_obj.children_and_text.detect { |child|
|
18
|
+
child.name == "splitwords"
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
###
|
23
|
+
def initialize(xml_obj, # RegXML object
|
24
|
+
sentence_id, # string: sentence ID
|
25
|
+
id_to_node) # hash: syn_node_id(string) -> SynNode object
|
26
|
+
|
27
|
+
# global data:
|
28
|
+
# node: hash node_id -> XMLNode object
|
29
|
+
# maps node IDs to the nodes with that ID
|
30
|
+
# frame_id, uspframe_id, uspfe_id: arrays of node IDs,
|
31
|
+
# listing all frame nodes, frame underspecification nodes,
|
32
|
+
# and FE underspecification nodes respectively
|
33
|
+
# globals: array of RegXML objects, each representing one sentence flag
|
34
|
+
@node = {}
|
35
|
+
@frame_id = []
|
36
|
+
@uspframe_id = []
|
37
|
+
@uspfe_id = []
|
38
|
+
@globals = []
|
39
|
+
|
40
|
+
if xml_obj
|
41
|
+
# we actually have semantic information.
|
42
|
+
# read it.
|
43
|
+
|
44
|
+
super(xml_obj.name, xml_obj.attributes, sentence_id + "_sem", false)
|
45
|
+
|
46
|
+
globals_obj = frames_obj = usp_obj = nil
|
47
|
+
|
48
|
+
xml_obj.children_and_text.each { |obj|
|
49
|
+
case obj.name
|
50
|
+
when "globals"
|
51
|
+
globals_obj = obj
|
52
|
+
when "frames"
|
53
|
+
frames_obj = obj
|
54
|
+
when "usp"
|
55
|
+
usp_obj = obj
|
56
|
+
else
|
57
|
+
add_kith(obj)
|
58
|
+
end
|
59
|
+
}
|
60
|
+
|
61
|
+
# handle globals
|
62
|
+
if globals_obj
|
63
|
+
globals_obj.children_and_text.each { |obj|
|
64
|
+
@globals << obj
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
# index frames
|
69
|
+
if frames_obj
|
70
|
+
frames_obj.children_and_text.each { |frame|
|
71
|
+
unless frame.name == "frame"
|
72
|
+
warn_child_ignored("s/sem/frames/", frame)
|
73
|
+
next
|
74
|
+
end
|
75
|
+
|
76
|
+
# make a node for the frame.
|
77
|
+
node = FrameNode.new(frame)
|
78
|
+
semnode_add_flags(node, frame)
|
79
|
+
@node[node.id] = node
|
80
|
+
@frame_id << node.id
|
81
|
+
# add FEs
|
82
|
+
frame_add_children(node, frame, id_to_node)
|
83
|
+
}
|
84
|
+
end
|
85
|
+
|
86
|
+
# index underspecification
|
87
|
+
if usp_obj
|
88
|
+
usp_obj.children_and_text.each { |uspframe_or_fe|
|
89
|
+
case uspframe_or_fe.name
|
90
|
+
when "uspframes"
|
91
|
+
initialize_usp(uspframe_or_fe, "frame")
|
92
|
+
when "uspfes"
|
93
|
+
initialize_usp(uspframe_or_fe, "fe")
|
94
|
+
|
95
|
+
else
|
96
|
+
warn_child_ignored("s/sem/usp/", uspframe_or_fe)
|
97
|
+
end
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
else
|
102
|
+
# we have no semantic information
|
103
|
+
# record it anyway
|
104
|
+
|
105
|
+
super("sem", {}, sentence_id + "_sem", false)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
################################################3
|
110
|
+
# access methods
|
111
|
+
|
112
|
+
###
|
113
|
+
def each_frame
|
114
|
+
@frame_id.each { |node_id|
|
115
|
+
yield @node[node_id]
|
116
|
+
}
|
117
|
+
end
|
118
|
+
|
119
|
+
###
|
120
|
+
def frames
|
121
|
+
return @frame_id.map { |node_id| @node[node_id] }
|
122
|
+
end
|
123
|
+
|
124
|
+
###
|
125
|
+
def each_usp_frameblock
|
126
|
+
@uspframe_id.each { |node_id|
|
127
|
+
yield @node[node_id]
|
128
|
+
}
|
129
|
+
end
|
130
|
+
|
131
|
+
###
|
132
|
+
def usp_frameblocks
|
133
|
+
return @uspframe_id.map { |node_id| @node[node_id] }
|
134
|
+
end
|
135
|
+
|
136
|
+
###
|
137
|
+
def each_usp_feblock
|
138
|
+
@uspfe_id.each { |node_id|
|
139
|
+
yield @node[node_id]
|
140
|
+
}
|
141
|
+
end
|
142
|
+
|
143
|
+
###
|
144
|
+
def usp_feblocks
|
145
|
+
return @uspfe_id.map { |node_id| @node[node_id] }
|
146
|
+
end
|
147
|
+
|
148
|
+
###
|
149
|
+
def flags
|
150
|
+
return @globals.map { |xml_obj|
|
151
|
+
{ "type" => xml_obj.attributes["type"],
|
152
|
+
"param" => xml_obj.attributes["param"],
|
153
|
+
"text" => xml_obj.children_and_text.map { |c| c.to_s }.join
|
154
|
+
}
|
155
|
+
}
|
156
|
+
end
|
157
|
+
|
158
|
+
################################################3
|
159
|
+
# adding and removing things
|
160
|
+
|
161
|
+
###
|
162
|
+
def add_frame(sentid, # string: sentence ID
|
163
|
+
name, # string: name of the frame
|
164
|
+
sem_id = nil) # string: ID for the new node
|
165
|
+
|
166
|
+
# make a node for the frame
|
167
|
+
if sem_id
|
168
|
+
frameid = sem_id
|
169
|
+
else
|
170
|
+
frameid = sentid + "_f" + Time.new.to_f.to_s
|
171
|
+
end
|
172
|
+
n = FrameNode.new(RegXML.new("<frame id=\"#{frameid}\" name=\"#{name}\"/>"))
|
173
|
+
@node[n.id] = n
|
174
|
+
@frame_id << n.id
|
175
|
+
|
176
|
+
return n
|
177
|
+
end
|
178
|
+
|
179
|
+
###
|
180
|
+
def remove_frame(frame_node)
|
181
|
+
@node.delete(frame_node.id)
|
182
|
+
@frame_id.delete(frame_node.id)
|
183
|
+
end
|
184
|
+
|
185
|
+
###
|
186
|
+
def add_fe(frame_node, # FrameNode
|
187
|
+
fe_name, # string: name of new FE
|
188
|
+
fe_children, # array:SynNode, children of new FE
|
189
|
+
sem_id = nil) # optional: ID of new FE
|
190
|
+
|
191
|
+
|
192
|
+
new_fe = frame_node.add_fe(fe_name, fe_children, sem_id)
|
193
|
+
@node[new_fe.id] = new_fe
|
194
|
+
return new_fe
|
195
|
+
end
|
196
|
+
|
197
|
+
###
|
198
|
+
def remove_fe(fe_node)
|
199
|
+
@node.delete(fe_node.id)
|
200
|
+
fe_node.parent.remove_child(fe_node)
|
201
|
+
end
|
202
|
+
|
203
|
+
###
|
204
|
+
def add_usp(frame_or_fe) # string: "frame" or "fe"
|
205
|
+
|
206
|
+
n = UspNode.new(RegXML.new("<uspblock/>"), frame_or_fe)
|
207
|
+
@node[n.id] = n
|
208
|
+
case frame_or_fe
|
209
|
+
when "frame"
|
210
|
+
@uspframe_id << n.id
|
211
|
+
when "fe"
|
212
|
+
@uspfe_id << n.id
|
213
|
+
else
|
214
|
+
raise "Shouldn't be here"
|
215
|
+
end
|
216
|
+
|
217
|
+
return n
|
218
|
+
end
|
219
|
+
|
220
|
+
###
|
221
|
+
def remove_usp(usp_node)
|
222
|
+
usp_node.children.each { |child|
|
223
|
+
usp_node.remove_child(child)
|
224
|
+
}
|
225
|
+
@node.delete(usp_node.id)
|
226
|
+
case usp_node.i_am
|
227
|
+
when "frame"
|
228
|
+
@uspframe_id.delete(usp_node.id)
|
229
|
+
when "fe"
|
230
|
+
@uspfe_id.delete(usp_node.id)
|
231
|
+
else
|
232
|
+
raise "Shouldn't be here"
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
|
237
|
+
###
|
238
|
+
def add_child(arg1, arg2)
|
239
|
+
raise "Not implemented for this class"
|
240
|
+
end
|
241
|
+
|
242
|
+
###
|
243
|
+
def remove_child(arg1, arg2)
|
244
|
+
raise "Not implemented for this class"
|
245
|
+
end
|
246
|
+
|
247
|
+
###
|
248
|
+
def add_flag(type, param=nil, text=nil)
|
249
|
+
# unless ["REEXAMINE", "WRONGSUBCORPUS", "INTERESTING", "LATER"].include? type
|
250
|
+
# raise "add_flag: unknown type "+type
|
251
|
+
# end
|
252
|
+
|
253
|
+
newglob = "<global type=\'#{xml_secure_val(type)}\'"
|
254
|
+
if param
|
255
|
+
newglob << " param=\'#{xml_secure_val(param)}\'"
|
256
|
+
end
|
257
|
+
if text
|
258
|
+
newglob << "> #{text} </global>"
|
259
|
+
else
|
260
|
+
newglob << "/>"
|
261
|
+
end
|
262
|
+
|
263
|
+
newglob = RegXML.new(newglob)
|
264
|
+
@globals << newglob
|
265
|
+
return newglob
|
266
|
+
end
|
267
|
+
|
268
|
+
###
|
269
|
+
def remove_flag(type, param=nil, text=nil)
|
270
|
+
|
271
|
+
remove_ix = nil
|
272
|
+
@globals.each_with_index { |glob,ix|
|
273
|
+
if glob.attributes("type") == type
|
274
|
+
if param.nil? or glob.attributes("param") == param
|
275
|
+
if text.nil? or glob.children_and_text.map { |c| c.to_s }.join == text
|
276
|
+
# found it
|
277
|
+
remove_ix = ix
|
278
|
+
break
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
282
|
+
}
|
283
|
+
|
284
|
+
if remove_ix
|
285
|
+
return @globals.delete_at(remove_ix)
|
286
|
+
else
|
287
|
+
return nil
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
############################3
|
292
|
+
protected
|
293
|
+
|
294
|
+
def get_xml_ofchildren
|
295
|
+
string = ""
|
296
|
+
|
297
|
+
# globals
|
298
|
+
string << "<globals>\n"
|
299
|
+
@globals.each { |glob|
|
300
|
+
string << glob.to_s + "\n"
|
301
|
+
}
|
302
|
+
string << "</globals>\n"
|
303
|
+
|
304
|
+
# frames
|
305
|
+
string << "<frames>\n"
|
306
|
+
each_frame { |frame_node|
|
307
|
+
string << frame_node.get
|
308
|
+
}
|
309
|
+
string << "</frames>\n"
|
310
|
+
|
311
|
+
# underspecification
|
312
|
+
string << "<usp>\n"
|
313
|
+
string << "<uspframes>\n"
|
314
|
+
each_usp_frameblock { |block|
|
315
|
+
string << block.get
|
316
|
+
}
|
317
|
+
string << "</uspframes>\n"
|
318
|
+
string << "<uspfes>\n"
|
319
|
+
each_usp_feblock { |block|
|
320
|
+
string << block.get
|
321
|
+
}
|
322
|
+
string << "</uspfes>\n"
|
323
|
+
string << "</usp>\n"
|
324
|
+
|
325
|
+
return string
|
326
|
+
end
|
327
|
+
|
328
|
+
###
|
329
|
+
def semnode_add_flags(sem_node, # SemNode object
|
330
|
+
xml_obj) # RegXML object
|
331
|
+
|
332
|
+
xml_obj.children_and_text.each { |child|
|
333
|
+
if child.name == "flag"
|
334
|
+
# found a flag, record it
|
335
|
+
name = child.attributes["name"]
|
336
|
+
if name
|
337
|
+
sem_node.add_flag(name)
|
338
|
+
else
|
339
|
+
$stderr.puts "Warning: flag without a name"
|
340
|
+
end
|
341
|
+
end
|
342
|
+
}
|
343
|
+
end
|
344
|
+
|
345
|
+
def frame_add_children(frame_node, # FrameNode object
|
346
|
+
xml_obj, # RegXML object
|
347
|
+
id_to_node) # hash: syn_node_id(string) -> SynNode object
|
348
|
+
|
349
|
+
xml_obj.children_and_text.each { |fe|
|
350
|
+
case fe.name
|
351
|
+
when "fe", "target"
|
352
|
+
# $stderr.puts "Da: #{fe.name}\n#{fe.to_s}"
|
353
|
+
|
354
|
+
# make a node for this,
|
355
|
+
# and add it as child of this frame node.
|
356
|
+
fe_node = FeNode.new(fe)
|
357
|
+
@node[fe_node.id] = fe_node
|
358
|
+
frame_node.add_child(fe_node)
|
359
|
+
|
360
|
+
semnode_add_flags(fe_node, fe)
|
361
|
+
|
362
|
+
# add the FE's children
|
363
|
+
fe.children_and_text.each { |fechild|
|
364
|
+
case fechild.name
|
365
|
+
when "fenode"
|
366
|
+
|
367
|
+
syn_node = id_to_node[SalsaTigerXmlNode.xmlel_id(fechild)]
|
368
|
+
if syn_node
|
369
|
+
# normal syntactic node, which the id_to_node mapping knows
|
370
|
+
fe_node.add_child(syn_node, fechild)
|
371
|
+
syn_node.add_sem(fe_node)
|
372
|
+
|
373
|
+
else
|
374
|
+
# must be a node in a different sentence
|
375
|
+
# make a dummy graph node for it
|
376
|
+
fe_node.add_child(TSSynNode.new(SalsaTigerXmlNode.xmlel_id(fechild)), fechild)
|
377
|
+
end
|
378
|
+
|
379
|
+
when "flag"
|
380
|
+
# nothing to do, we've handled that already
|
381
|
+
else
|
382
|
+
fe_node.add_kith(fechild)
|
383
|
+
end
|
384
|
+
}
|
385
|
+
|
386
|
+
when "flag"
|
387
|
+
# nothing to do, wee handled that already
|
388
|
+
|
389
|
+
else
|
390
|
+
# keep for output
|
391
|
+
frame_node.add_kith(fe)
|
392
|
+
end
|
393
|
+
}
|
394
|
+
end
|
395
|
+
|
396
|
+
###
|
397
|
+
def initialize_usp(xml_obj, # RegXML object
|
398
|
+
frame_or_fe) # string: "frame" or "fe"
|
399
|
+
|
400
|
+
xml_obj.children_and_text.each { |uspblock|
|
401
|
+
unless uspblock.name == "uspblock"
|
402
|
+
warn_child_ignored("s/sem/usp/uspframe|uspfe", uspblock)
|
403
|
+
next
|
404
|
+
end
|
405
|
+
|
406
|
+
# node for this underspecified block
|
407
|
+
n = UspNode.new(uspblock, frame_or_fe)
|
408
|
+
@node[n.id] = n
|
409
|
+
|
410
|
+
case frame_or_fe
|
411
|
+
when "frame"
|
412
|
+
@uspframe_id << n.id
|
413
|
+
when "fe"
|
414
|
+
@uspfe_id << n.id
|
415
|
+
else
|
416
|
+
raise "Shouldn't be here"
|
417
|
+
end
|
418
|
+
|
419
|
+
# add its children
|
420
|
+
uspblock.children_and_text.each { |uspitem|
|
421
|
+
unless uspitem.name == "uspitem"
|
422
|
+
warn_child_ignored("s/sem/usp/uspframe|uspfe/uspblock", uspitem)
|
423
|
+
next
|
424
|
+
end
|
425
|
+
|
426
|
+
usp_id = SalsaTigerXmlNode.xmlel_id(uspitem)
|
427
|
+
usp_id = usp_id.gsub(/.*_s/, "s")
|
428
|
+
|
429
|
+
unless @node[usp_id]
|
430
|
+
$stderr.puts "Error: Underspecification: could not find node with ID #{usp_id}. Skipping."
|
431
|
+
next
|
432
|
+
end
|
433
|
+
n.add_child(@node[usp_id])
|
434
|
+
}
|
435
|
+
}
|
436
|
+
end
|
437
|
+
end
|
438
|
+
end
|