shalmaneser-lib 1.2.rc5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +122 -0
- data/lib/configuration/config_data.rb +457 -0
- data/lib/configuration/config_format_element.rb +210 -0
- data/lib/configuration/configuration_error.rb +15 -0
- data/lib/configuration/external_config_data.rb +56 -0
- data/lib/configuration/frappe_config_data.rb +134 -0
- data/lib/configuration/fred_config_data.rb +199 -0
- data/lib/configuration/rosy_config_data.rb +126 -0
- data/lib/db/db_interface.rb +50 -0
- data/lib/db/db_mysql.rb +141 -0
- data/lib/db/db_sqlite.rb +280 -0
- data/lib/db/db_table.rb +237 -0
- data/lib/db/db_view.rb +416 -0
- data/lib/db/db_wrapper.rb +175 -0
- data/lib/db/select_table_and_columns.rb +10 -0
- data/lib/db/sql_query.rb +243 -0
- data/lib/definitions.rb +19 -0
- data/lib/eval.rb +482 -0
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/external_systems.rb +251 -0
- data/lib/framenet_format/fn_corpus_aset.rb +209 -0
- data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
- data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
- data/lib/framenet_format/fn_database.rb +143 -0
- data/lib/framenet_format/frame_xml_file.rb +104 -0
- data/lib/framenet_format/frame_xml_sentence.rb +411 -0
- data/lib/logging.rb +25 -0
- data/lib/ml/classifier.rb +189 -0
- data/lib/ml/mallet.rb +236 -0
- data/lib/ml/maxent.rb +229 -0
- data/lib/ml/optimize.rb +195 -0
- data/lib/ml/timbl.rb +140 -0
- data/lib/monkey_patching/array.rb +82 -0
- data/lib/monkey_patching/enumerable_bool.rb +24 -0
- data/lib/monkey_patching/enumerable_distribute.rb +18 -0
- data/lib/monkey_patching/file.rb +131 -0
- data/lib/monkey_patching/subsumed.rb +24 -0
- data/lib/ruby_class_extensions.rb +4 -0
- data/lib/salsa_tiger_xml/corpus.rb +24 -0
- data/lib/salsa_tiger_xml/fe_node.rb +98 -0
- data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
- data/lib/salsa_tiger_xml/frame_node.rb +145 -0
- data/lib/salsa_tiger_xml/graph_node.rb +347 -0
- data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
- data/lib/salsa_tiger_xml/sem_node.rb +58 -0
- data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
- data/lib/salsa_tiger_xml/syn_node.rb +169 -0
- data/lib/salsa_tiger_xml/tree_node.rb +59 -0
- data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
- data/lib/salsa_tiger_xml/usp_node.rb +72 -0
- data/lib/salsa_tiger_xml/xml_node.rb +163 -0
- data/lib/shalmaneser/lib.rb +1 -0
- data/lib/tabular_format/fn_tab_format_file.rb +38 -0
- data/lib/tabular_format/fn_tab_frame.rb +67 -0
- data/lib/tabular_format/fn_tab_sentence.rb +169 -0
- data/lib/tabular_format/tab_format_file.rb +91 -0
- data/lib/tabular_format/tab_format_named_args.rb +184 -0
- data/lib/tabular_format/tab_format_sentence.rb +119 -0
- data/lib/value_restriction.rb +49 -0
- metadata +131 -0
@@ -0,0 +1,333 @@
|
|
1
|
+
require_relative 'xml_node'
|
2
|
+
require_relative 'string_terminals_in_right_order'
|
3
|
+
require_relative 'reg_xml'
|
4
|
+
|
5
|
+
module STXML
|
6
|
+
#############
|
7
|
+
class SalsaTigerSentenceGraph < XMLNode
|
8
|
+
include StringTerminalsInRightOrder
|
9
|
+
|
10
|
+
attr_reader :node
|
11
|
+
|
12
|
+
def initialize(xml_obj, # RegXML object
|
13
|
+
sentence_id) # string: ID of this sentence
|
14
|
+
|
15
|
+
# global data:
|
16
|
+
# node: hash node_id -> XMLNode object
|
17
|
+
# maps node IDs to the nodes with that ID
|
18
|
+
@node = {}
|
19
|
+
@sentence_id = sentence_id
|
20
|
+
|
21
|
+
if xml_obj
|
22
|
+
# we actually have syntactic information.
|
23
|
+
# read it.
|
24
|
+
|
25
|
+
# initialize this object as an XML node,
|
26
|
+
# i.e. remember the outermost element's name, attributes,
|
27
|
+
# and ID, and specify that it's not a text but an XML object
|
28
|
+
super(xml_obj.name, xml_obj.attributes, sentence_id + "_graph", false)
|
29
|
+
|
30
|
+
# initialize nodes, remember their IDs
|
31
|
+
xml_obj.children_and_text.each { |child_or_text|
|
32
|
+
|
33
|
+
case child_or_text.name
|
34
|
+
when "terminals"
|
35
|
+
make_nodes(child_or_text, "t", "s/graph/terminals", "all_children_kith")
|
36
|
+
when "nonterminals"
|
37
|
+
make_nodes(child_or_text, "nt", "s/graph/nonterminals")
|
38
|
+
else
|
39
|
+
# additional info that we don't need for now
|
40
|
+
# keep for output
|
41
|
+
add_kith(child_or_text)
|
42
|
+
end
|
43
|
+
}
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
# add edges between nodes
|
48
|
+
nonterminals = xml_obj.children_and_text.detect { |child| child.name == "nonterminals" }
|
49
|
+
if nonterminals
|
50
|
+
nonterminals.children_and_text.each { |nt|
|
51
|
+
|
52
|
+
unless nt.name == "nt"
|
53
|
+
# we've already done the warning bit in make_nodes
|
54
|
+
next
|
55
|
+
end
|
56
|
+
|
57
|
+
syn_add_children(@node[SalsaTigerXmlNode.xmlel_id(nt)], nt)
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
else
|
62
|
+
# we have no syntactic information
|
63
|
+
# record it anyway
|
64
|
+
|
65
|
+
super("graph", {}, sentence_id + "_graph", false)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
###
|
71
|
+
def add_splitwords(xml_obj) #RegXMl object
|
72
|
+
unless xml_obj.nil?
|
73
|
+
# splitwords is an XML element with name "splitwords" and
|
74
|
+
# children named "splitword", each of which describes a split
|
75
|
+
# for one of the terminals we already know
|
76
|
+
xml_obj.children_and_text.each { |splitword|
|
77
|
+
unless splitword.name == "splitword"
|
78
|
+
warn_child_ignored("s/sem/splitwords/", splitword)
|
79
|
+
next
|
80
|
+
end
|
81
|
+
|
82
|
+
# make nodes for the splitword parts
|
83
|
+
make_nodes(splitword, "part", "s/sem/splitwords/splitword", "all_children_kith")
|
84
|
+
|
85
|
+
# this is the terminal that is being split:
|
86
|
+
# add links to its new children
|
87
|
+
syn_add_children(@node[SalsaTigerXmlNode.xmlel_id(splitword)], splitword)
|
88
|
+
}
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
###
|
93
|
+
def to_s
|
94
|
+
string_for_nodes(syn_roots)
|
95
|
+
end
|
96
|
+
|
97
|
+
###
|
98
|
+
def get
|
99
|
+
# make sure that the graph element has a 'root' attribute
|
100
|
+
# since the Salsa tool needs this
|
101
|
+
set_attribute("root", syn_roots.first.id)
|
102
|
+
super()
|
103
|
+
end
|
104
|
+
|
105
|
+
#####
|
106
|
+
# access methods
|
107
|
+
|
108
|
+
###
|
109
|
+
def each_node
|
110
|
+
@node.each_value { |n|
|
111
|
+
yield n
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
###
|
116
|
+
def nodes
|
117
|
+
return @node.values
|
118
|
+
end
|
119
|
+
|
120
|
+
###
|
121
|
+
def each_terminal
|
122
|
+
@node.each_value { |node|
|
123
|
+
if node.is_terminal?
|
124
|
+
yield node
|
125
|
+
end
|
126
|
+
}
|
127
|
+
end
|
128
|
+
|
129
|
+
###
|
130
|
+
def each_terminal_sorted
|
131
|
+
sort_terminals_and_splitwords_left_to_right(terminals).each { |node_obj|
|
132
|
+
yield node_obj
|
133
|
+
}
|
134
|
+
end
|
135
|
+
|
136
|
+
###
|
137
|
+
def terminals
|
138
|
+
return @node.values.select { |node| node.is_terminal? }
|
139
|
+
end
|
140
|
+
|
141
|
+
###
|
142
|
+
def terminals_sorted
|
143
|
+
return sort_terminals_and_splitwords_left_to_right(terminals)
|
144
|
+
end
|
145
|
+
|
146
|
+
###
|
147
|
+
def each_nonterminal
|
148
|
+
@node.each_value { |node|
|
149
|
+
if node.is_nonterminal?
|
150
|
+
yield node
|
151
|
+
end
|
152
|
+
}
|
153
|
+
end
|
154
|
+
|
155
|
+
###
|
156
|
+
def nonterminals
|
157
|
+
return @node.values.select { |node| node.is_nonterminal? }
|
158
|
+
end
|
159
|
+
|
160
|
+
###
|
161
|
+
def syn_roots
|
162
|
+
return @node.values.select { |node|
|
163
|
+
node.parent.nil?
|
164
|
+
}
|
165
|
+
end
|
166
|
+
###
|
167
|
+
|
168
|
+
######################3
|
169
|
+
# adding nodes
|
170
|
+
|
171
|
+
###
|
172
|
+
def add_child(arg1, arg2, varhash={})
|
173
|
+
raise "Not implemented for this class"
|
174
|
+
end
|
175
|
+
|
176
|
+
###
|
177
|
+
def remove_child(arg1, arg2, varhash={})
|
178
|
+
raise "Not implemented for this class"
|
179
|
+
end
|
180
|
+
|
181
|
+
###
|
182
|
+
def add_node(sentid, # string: sentence ID
|
183
|
+
label, # string: t or nt
|
184
|
+
cat = nil, # string: category
|
185
|
+
word = nil,# string: word
|
186
|
+
pos = nil, # string: part of speech
|
187
|
+
syn_id = nil) # string: ID for the new node
|
188
|
+
|
189
|
+
unless ["t", "nt"].include? label
|
190
|
+
raise "Unknown node label #{label} for new syntactic node. Must be either t or nt."
|
191
|
+
end
|
192
|
+
|
193
|
+
# make node ID: sentence ID plus ID generated by system time
|
194
|
+
if syn_id
|
195
|
+
new_id = sentid + "_" + syn_id
|
196
|
+
else
|
197
|
+
new_id = sentid + "_" + Time.new.to_f.to_s
|
198
|
+
end
|
199
|
+
|
200
|
+
elt = "<#{label}"
|
201
|
+
[["id", new_id], ["cat", cat], ["word", word], ["pos", pos]].each { |lbl, content|
|
202
|
+
if content
|
203
|
+
elt << " #{lbl}=\"#{xml_secure_val(content)}\""
|
204
|
+
end
|
205
|
+
}
|
206
|
+
elt << "/>"
|
207
|
+
n = SynNode.new(RegXML.new(elt))
|
208
|
+
@node[n.id] = n
|
209
|
+
|
210
|
+
return n
|
211
|
+
end
|
212
|
+
|
213
|
+
###
|
214
|
+
def remove_node(node) # SynNode
|
215
|
+
# remove node from list
|
216
|
+
@node.delete(node.id)
|
217
|
+
|
218
|
+
# remove it as child and parent of other nodes;
|
219
|
+
# add its own children to the parent.
|
220
|
+
# the _edgelabel_ of the new edges will be the edgeslabels
|
221
|
+
# between the original node in its children
|
222
|
+
# in other words, the label of the removed node's incoming edge
|
223
|
+
# is deleted
|
224
|
+
|
225
|
+
pair = node.parent_with_edgelabel
|
226
|
+
if pair
|
227
|
+
# delete incoming edge for deleted node
|
228
|
+
label, parent = pair
|
229
|
+
parent.remove_child(node, label)
|
230
|
+
end
|
231
|
+
# delete outgoing edge for deleted node
|
232
|
+
node.each_child_with_edgelabel { |lbl, child| child.remove_parent(node, lbl) }
|
233
|
+
# glue deleted node's children to its parent
|
234
|
+
if pair
|
235
|
+
_plabel, parent = pair
|
236
|
+
node.each_child_with_edgelabel { |clabel, child| parent.add_child(child, clabel) }
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
######################
|
241
|
+
protected
|
242
|
+
|
243
|
+
###
|
244
|
+
def get_xml_ofchildren
|
245
|
+
string = ""
|
246
|
+
|
247
|
+
string << "<terminals>\n"
|
248
|
+
each_terminal_sorted { |t|
|
249
|
+
string << t.get
|
250
|
+
}
|
251
|
+
string << "</terminals>\n"
|
252
|
+
|
253
|
+
string << "<nonterminals>\n"
|
254
|
+
each_nonterminal { |nt|
|
255
|
+
string << nt.get
|
256
|
+
}
|
257
|
+
string << "</nonterminals>\n"
|
258
|
+
|
259
|
+
return string
|
260
|
+
|
261
|
+
end
|
262
|
+
|
263
|
+
def make_nodes(xml_obj, # RegXML object
|
264
|
+
expected_obj_name, # string
|
265
|
+
where, # string
|
266
|
+
all_children_kith = nil) # object: if non-nil,
|
267
|
+
# keep all children of the new nodes
|
268
|
+
# as kith"
|
269
|
+
|
270
|
+
xml_obj.children_and_text.each { |elt|
|
271
|
+
|
272
|
+
if elt.name == expected_obj_name
|
273
|
+
# this is the kind of child we were expecting to see
|
274
|
+
n = SynNode.new(elt)
|
275
|
+
@node[n.id] = n
|
276
|
+
|
277
|
+
if all_children_kith
|
278
|
+
elt.children_and_text.each { |elt_child|
|
279
|
+
n.add_kith(elt_child)
|
280
|
+
}
|
281
|
+
end
|
282
|
+
|
283
|
+
else
|
284
|
+
warn_child_ignored(where, elt)
|
285
|
+
end
|
286
|
+
}
|
287
|
+
end
|
288
|
+
|
289
|
+
def syn_add_children(node,
|
290
|
+
xml_obj)
|
291
|
+
unless node
|
292
|
+
raise "Shouldn't be here"
|
293
|
+
end
|
294
|
+
|
295
|
+
xml_obj.children_and_text.each { |edge|
|
296
|
+
|
297
|
+
if ["edge", "part"].include? edge.name
|
298
|
+
|
299
|
+
# add an edge to this child,
|
300
|
+
# retrieve the node with the given ID from id_to_node
|
301
|
+
child = @node[SalsaTigerXmlNode.xmlel_id(edge)]
|
302
|
+
unless child
|
303
|
+
raise "Sentence #{@sentence_id}: I cannot find a node for " + edge.to_s
|
304
|
+
end
|
305
|
+
|
306
|
+
edgelabel = edge.attributes["label"]
|
307
|
+
node.add_child(child, edgelabel)
|
308
|
+
|
309
|
+
elsif edge.name == "other_edge"
|
310
|
+
# add link to this node,
|
311
|
+
# retrieve the node with the given ID from id_to_node
|
312
|
+
child = @node[SalsaTigerXmlNode.xmlel_id(edge)]
|
313
|
+
unless child
|
314
|
+
raise "Sentence #{@sentence_id}: I cannot find a node for other_edge #{SalsaTigerXmlNode.xmlel_id(edge)} : " + edge.to_s
|
315
|
+
end
|
316
|
+
|
317
|
+
attributes = edge.attributes
|
318
|
+
if attributes
|
319
|
+
edgelabel = attributes.delete("label")
|
320
|
+
else
|
321
|
+
edgelabel = nil
|
322
|
+
end
|
323
|
+
node.add_link(child, edgelabel, attributes)
|
324
|
+
|
325
|
+
else
|
326
|
+
# something other than an edge
|
327
|
+
# keep for output
|
328
|
+
node.add_kith(edge)
|
329
|
+
end
|
330
|
+
}
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
@@ -0,0 +1,438 @@
|
|
1
|
+
require_relative 'xml_node'
|
2
|
+
require_relative 'ts_syn_node'
|
3
|
+
require_relative 'salsa_tiger_xml_node'
|
4
|
+
require_relative 'usp_node'
|
5
|
+
require_relative 'frame_node'
|
6
|
+
require_relative 'fe_node'
|
7
|
+
require_relative 'reg_xml'
|
8
|
+
|
9
|
+
module STXML
|
10
|
+
#############
|
11
|
+
class SalsaTigerSentenceSem < XMLNode
|
12
|
+
|
13
|
+
attr_reader :node
|
14
|
+
|
15
|
+
###
|
16
|
+
def SalsaTigerSentenceSem.get_splitwords(xml_obj)
|
17
|
+
return xml_obj.children_and_text.detect { |child|
|
18
|
+
child.name == "splitwords"
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
###
|
23
|
+
def initialize(xml_obj, # RegXML object
|
24
|
+
sentence_id, # string: sentence ID
|
25
|
+
id_to_node) # hash: syn_node_id(string) -> SynNode object
|
26
|
+
|
27
|
+
# global data:
|
28
|
+
# node: hash node_id -> XMLNode object
|
29
|
+
# maps node IDs to the nodes with that ID
|
30
|
+
# frame_id, uspframe_id, uspfe_id: arrays of node IDs,
|
31
|
+
# listing all frame nodes, frame underspecification nodes,
|
32
|
+
# and FE underspecification nodes respectively
|
33
|
+
# globals: array of RegXML objects, each representing one sentence flag
|
34
|
+
@node = {}
|
35
|
+
@frame_id = []
|
36
|
+
@uspframe_id = []
|
37
|
+
@uspfe_id = []
|
38
|
+
@globals = []
|
39
|
+
|
40
|
+
if xml_obj
|
41
|
+
# we actually have semantic information.
|
42
|
+
# read it.
|
43
|
+
|
44
|
+
super(xml_obj.name, xml_obj.attributes, sentence_id + "_sem", false)
|
45
|
+
|
46
|
+
globals_obj = frames_obj = usp_obj = nil
|
47
|
+
|
48
|
+
xml_obj.children_and_text.each { |obj|
|
49
|
+
case obj.name
|
50
|
+
when "globals"
|
51
|
+
globals_obj = obj
|
52
|
+
when "frames"
|
53
|
+
frames_obj = obj
|
54
|
+
when "usp"
|
55
|
+
usp_obj = obj
|
56
|
+
else
|
57
|
+
add_kith(obj)
|
58
|
+
end
|
59
|
+
}
|
60
|
+
|
61
|
+
# handle globals
|
62
|
+
if globals_obj
|
63
|
+
globals_obj.children_and_text.each { |obj|
|
64
|
+
@globals << obj
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
# index frames
|
69
|
+
if frames_obj
|
70
|
+
frames_obj.children_and_text.each { |frame|
|
71
|
+
unless frame.name == "frame"
|
72
|
+
warn_child_ignored("s/sem/frames/", frame)
|
73
|
+
next
|
74
|
+
end
|
75
|
+
|
76
|
+
# make a node for the frame.
|
77
|
+
node = FrameNode.new(frame)
|
78
|
+
semnode_add_flags(node, frame)
|
79
|
+
@node[node.id] = node
|
80
|
+
@frame_id << node.id
|
81
|
+
# add FEs
|
82
|
+
frame_add_children(node, frame, id_to_node)
|
83
|
+
}
|
84
|
+
end
|
85
|
+
|
86
|
+
# index underspecification
|
87
|
+
if usp_obj
|
88
|
+
usp_obj.children_and_text.each { |uspframe_or_fe|
|
89
|
+
case uspframe_or_fe.name
|
90
|
+
when "uspframes"
|
91
|
+
initialize_usp(uspframe_or_fe, "frame")
|
92
|
+
when "uspfes"
|
93
|
+
initialize_usp(uspframe_or_fe, "fe")
|
94
|
+
|
95
|
+
else
|
96
|
+
warn_child_ignored("s/sem/usp/", uspframe_or_fe)
|
97
|
+
end
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
else
|
102
|
+
# we have no semantic information
|
103
|
+
# record it anyway
|
104
|
+
|
105
|
+
super("sem", {}, sentence_id + "_sem", false)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
################################################3
|
110
|
+
# access methods
|
111
|
+
|
112
|
+
###
|
113
|
+
def each_frame
|
114
|
+
@frame_id.each { |node_id|
|
115
|
+
yield @node[node_id]
|
116
|
+
}
|
117
|
+
end
|
118
|
+
|
119
|
+
###
|
120
|
+
def frames
|
121
|
+
return @frame_id.map { |node_id| @node[node_id] }
|
122
|
+
end
|
123
|
+
|
124
|
+
###
|
125
|
+
def each_usp_frameblock
|
126
|
+
@uspframe_id.each { |node_id|
|
127
|
+
yield @node[node_id]
|
128
|
+
}
|
129
|
+
end
|
130
|
+
|
131
|
+
###
|
132
|
+
def usp_frameblocks
|
133
|
+
return @uspframe_id.map { |node_id| @node[node_id] }
|
134
|
+
end
|
135
|
+
|
136
|
+
###
|
137
|
+
def each_usp_feblock
|
138
|
+
@uspfe_id.each { |node_id|
|
139
|
+
yield @node[node_id]
|
140
|
+
}
|
141
|
+
end
|
142
|
+
|
143
|
+
###
|
144
|
+
def usp_feblocks
|
145
|
+
return @uspfe_id.map { |node_id| @node[node_id] }
|
146
|
+
end
|
147
|
+
|
148
|
+
###
|
149
|
+
def flags
|
150
|
+
return @globals.map { |xml_obj|
|
151
|
+
{ "type" => xml_obj.attributes["type"],
|
152
|
+
"param" => xml_obj.attributes["param"],
|
153
|
+
"text" => xml_obj.children_and_text.map { |c| c.to_s }.join
|
154
|
+
}
|
155
|
+
}
|
156
|
+
end
|
157
|
+
|
158
|
+
################################################3
|
159
|
+
# adding and removing things
|
160
|
+
|
161
|
+
###
|
162
|
+
def add_frame(sentid, # string: sentence ID
|
163
|
+
name, # string: name of the frame
|
164
|
+
sem_id = nil) # string: ID for the new node
|
165
|
+
|
166
|
+
# make a node for the frame
|
167
|
+
if sem_id
|
168
|
+
frameid = sem_id
|
169
|
+
else
|
170
|
+
frameid = sentid + "_f" + Time.new.to_f.to_s
|
171
|
+
end
|
172
|
+
n = FrameNode.new(RegXML.new("<frame id=\"#{frameid}\" name=\"#{name}\"/>"))
|
173
|
+
@node[n.id] = n
|
174
|
+
@frame_id << n.id
|
175
|
+
|
176
|
+
return n
|
177
|
+
end
|
178
|
+
|
179
|
+
###
|
180
|
+
def remove_frame(frame_node)
|
181
|
+
@node.delete(frame_node.id)
|
182
|
+
@frame_id.delete(frame_node.id)
|
183
|
+
end
|
184
|
+
|
185
|
+
###
|
186
|
+
def add_fe(frame_node, # FrameNode
|
187
|
+
fe_name, # string: name of new FE
|
188
|
+
fe_children, # array:SynNode, children of new FE
|
189
|
+
sem_id = nil) # optional: ID of new FE
|
190
|
+
|
191
|
+
|
192
|
+
new_fe = frame_node.add_fe(fe_name, fe_children, sem_id)
|
193
|
+
@node[new_fe.id] = new_fe
|
194
|
+
return new_fe
|
195
|
+
end
|
196
|
+
|
197
|
+
###
|
198
|
+
def remove_fe(fe_node)
|
199
|
+
@node.delete(fe_node.id)
|
200
|
+
fe_node.parent.remove_child(fe_node)
|
201
|
+
end
|
202
|
+
|
203
|
+
###
|
204
|
+
def add_usp(frame_or_fe) # string: "frame" or "fe"
|
205
|
+
|
206
|
+
n = UspNode.new(RegXML.new("<uspblock/>"), frame_or_fe)
|
207
|
+
@node[n.id] = n
|
208
|
+
case frame_or_fe
|
209
|
+
when "frame"
|
210
|
+
@uspframe_id << n.id
|
211
|
+
when "fe"
|
212
|
+
@uspfe_id << n.id
|
213
|
+
else
|
214
|
+
raise "Shouldn't be here"
|
215
|
+
end
|
216
|
+
|
217
|
+
return n
|
218
|
+
end
|
219
|
+
|
220
|
+
###
|
221
|
+
def remove_usp(usp_node)
|
222
|
+
usp_node.children.each { |child|
|
223
|
+
usp_node.remove_child(child)
|
224
|
+
}
|
225
|
+
@node.delete(usp_node.id)
|
226
|
+
case usp_node.i_am
|
227
|
+
when "frame"
|
228
|
+
@uspframe_id.delete(usp_node.id)
|
229
|
+
when "fe"
|
230
|
+
@uspfe_id.delete(usp_node.id)
|
231
|
+
else
|
232
|
+
raise "Shouldn't be here"
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
|
237
|
+
###
|
238
|
+
def add_child(arg1, arg2)
|
239
|
+
raise "Not implemented for this class"
|
240
|
+
end
|
241
|
+
|
242
|
+
###
|
243
|
+
def remove_child(arg1, arg2)
|
244
|
+
raise "Not implemented for this class"
|
245
|
+
end
|
246
|
+
|
247
|
+
###
|
248
|
+
def add_flag(type, param=nil, text=nil)
|
249
|
+
# unless ["REEXAMINE", "WRONGSUBCORPUS", "INTERESTING", "LATER"].include? type
|
250
|
+
# raise "add_flag: unknown type "+type
|
251
|
+
# end
|
252
|
+
|
253
|
+
newglob = "<global type=\'#{xml_secure_val(type)}\'"
|
254
|
+
if param
|
255
|
+
newglob << " param=\'#{xml_secure_val(param)}\'"
|
256
|
+
end
|
257
|
+
if text
|
258
|
+
newglob << "> #{text} </global>"
|
259
|
+
else
|
260
|
+
newglob << "/>"
|
261
|
+
end
|
262
|
+
|
263
|
+
newglob = RegXML.new(newglob)
|
264
|
+
@globals << newglob
|
265
|
+
return newglob
|
266
|
+
end
|
267
|
+
|
268
|
+
###
|
269
|
+
def remove_flag(type, param=nil, text=nil)
|
270
|
+
|
271
|
+
remove_ix = nil
|
272
|
+
@globals.each_with_index { |glob,ix|
|
273
|
+
if glob.attributes("type") == type
|
274
|
+
if param.nil? or glob.attributes("param") == param
|
275
|
+
if text.nil? or glob.children_and_text.map { |c| c.to_s }.join == text
|
276
|
+
# found it
|
277
|
+
remove_ix = ix
|
278
|
+
break
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
282
|
+
}
|
283
|
+
|
284
|
+
if remove_ix
|
285
|
+
return @globals.delete_at(remove_ix)
|
286
|
+
else
|
287
|
+
return nil
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
############################3
|
292
|
+
protected
|
293
|
+
|
294
|
+
def get_xml_ofchildren
|
295
|
+
string = ""
|
296
|
+
|
297
|
+
# globals
|
298
|
+
string << "<globals>\n"
|
299
|
+
@globals.each { |glob|
|
300
|
+
string << glob.to_s + "\n"
|
301
|
+
}
|
302
|
+
string << "</globals>\n"
|
303
|
+
|
304
|
+
# frames
|
305
|
+
string << "<frames>\n"
|
306
|
+
each_frame { |frame_node|
|
307
|
+
string << frame_node.get
|
308
|
+
}
|
309
|
+
string << "</frames>\n"
|
310
|
+
|
311
|
+
# underspecification
|
312
|
+
string << "<usp>\n"
|
313
|
+
string << "<uspframes>\n"
|
314
|
+
each_usp_frameblock { |block|
|
315
|
+
string << block.get
|
316
|
+
}
|
317
|
+
string << "</uspframes>\n"
|
318
|
+
string << "<uspfes>\n"
|
319
|
+
each_usp_feblock { |block|
|
320
|
+
string << block.get
|
321
|
+
}
|
322
|
+
string << "</uspfes>\n"
|
323
|
+
string << "</usp>\n"
|
324
|
+
|
325
|
+
return string
|
326
|
+
end
|
327
|
+
|
328
|
+
###
|
329
|
+
def semnode_add_flags(sem_node, # SemNode object
|
330
|
+
xml_obj) # RegXML object
|
331
|
+
|
332
|
+
xml_obj.children_and_text.each { |child|
|
333
|
+
if child.name == "flag"
|
334
|
+
# found a flag, record it
|
335
|
+
name = child.attributes["name"]
|
336
|
+
if name
|
337
|
+
sem_node.add_flag(name)
|
338
|
+
else
|
339
|
+
$stderr.puts "Warning: flag without a name"
|
340
|
+
end
|
341
|
+
end
|
342
|
+
}
|
343
|
+
end
|
344
|
+
|
345
|
+
def frame_add_children(frame_node, # FrameNode object
|
346
|
+
xml_obj, # RegXML object
|
347
|
+
id_to_node) # hash: syn_node_id(string) -> SynNode object
|
348
|
+
|
349
|
+
xml_obj.children_and_text.each { |fe|
|
350
|
+
case fe.name
|
351
|
+
when "fe", "target"
|
352
|
+
# $stderr.puts "Da: #{fe.name}\n#{fe.to_s}"
|
353
|
+
|
354
|
+
# make a node for this,
|
355
|
+
# and add it as child of this frame node.
|
356
|
+
fe_node = FeNode.new(fe)
|
357
|
+
@node[fe_node.id] = fe_node
|
358
|
+
frame_node.add_child(fe_node)
|
359
|
+
|
360
|
+
semnode_add_flags(fe_node, fe)
|
361
|
+
|
362
|
+
# add the FE's children
|
363
|
+
fe.children_and_text.each { |fechild|
|
364
|
+
case fechild.name
|
365
|
+
when "fenode"
|
366
|
+
|
367
|
+
syn_node = id_to_node[SalsaTigerXmlNode.xmlel_id(fechild)]
|
368
|
+
if syn_node
|
369
|
+
# normal syntactic node, which the id_to_node mapping knows
|
370
|
+
fe_node.add_child(syn_node, fechild)
|
371
|
+
syn_node.add_sem(fe_node)
|
372
|
+
|
373
|
+
else
|
374
|
+
# must be a node in a different sentence
|
375
|
+
# make a dummy graph node for it
|
376
|
+
fe_node.add_child(TSSynNode.new(SalsaTigerXmlNode.xmlel_id(fechild)), fechild)
|
377
|
+
end
|
378
|
+
|
379
|
+
when "flag"
|
380
|
+
# nothing to do, we've handled that already
|
381
|
+
else
|
382
|
+
fe_node.add_kith(fechild)
|
383
|
+
end
|
384
|
+
}
|
385
|
+
|
386
|
+
when "flag"
|
387
|
+
# nothing to do, wee handled that already
|
388
|
+
|
389
|
+
else
|
390
|
+
# keep for output
|
391
|
+
frame_node.add_kith(fe)
|
392
|
+
end
|
393
|
+
}
|
394
|
+
end
|
395
|
+
|
396
|
+
###
|
397
|
+
def initialize_usp(xml_obj, # RegXML object
|
398
|
+
frame_or_fe) # string: "frame" or "fe"
|
399
|
+
|
400
|
+
xml_obj.children_and_text.each { |uspblock|
|
401
|
+
unless uspblock.name == "uspblock"
|
402
|
+
warn_child_ignored("s/sem/usp/uspframe|uspfe", uspblock)
|
403
|
+
next
|
404
|
+
end
|
405
|
+
|
406
|
+
# node for this underspecified block
|
407
|
+
n = UspNode.new(uspblock, frame_or_fe)
|
408
|
+
@node[n.id] = n
|
409
|
+
|
410
|
+
case frame_or_fe
|
411
|
+
when "frame"
|
412
|
+
@uspframe_id << n.id
|
413
|
+
when "fe"
|
414
|
+
@uspfe_id << n.id
|
415
|
+
else
|
416
|
+
raise "Shouldn't be here"
|
417
|
+
end
|
418
|
+
|
419
|
+
# add its children
|
420
|
+
uspblock.children_and_text.each { |uspitem|
|
421
|
+
unless uspitem.name == "uspitem"
|
422
|
+
warn_child_ignored("s/sem/usp/uspframe|uspfe/uspblock", uspitem)
|
423
|
+
next
|
424
|
+
end
|
425
|
+
|
426
|
+
usp_id = SalsaTigerXmlNode.xmlel_id(uspitem)
|
427
|
+
usp_id = usp_id.gsub(/.*_s/, "s")
|
428
|
+
|
429
|
+
unless @node[usp_id]
|
430
|
+
$stderr.puts "Error: Underspecification: could not find node with ID #{usp_id}. Skipping."
|
431
|
+
next
|
432
|
+
end
|
433
|
+
n.add_child(@node[usp_id])
|
434
|
+
}
|
435
|
+
}
|
436
|
+
end
|
437
|
+
end
|
438
|
+
end
|