shalmaneser-lib 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +10 -0
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +122 -0
  6. data/lib/configuration/config_data.rb +457 -0
  7. data/lib/configuration/config_format_element.rb +210 -0
  8. data/lib/configuration/configuration_error.rb +15 -0
  9. data/lib/configuration/external_config_data.rb +56 -0
  10. data/lib/configuration/frappe_config_data.rb +134 -0
  11. data/lib/configuration/fred_config_data.rb +199 -0
  12. data/lib/configuration/rosy_config_data.rb +126 -0
  13. data/lib/db/db_interface.rb +50 -0
  14. data/lib/db/db_mysql.rb +141 -0
  15. data/lib/db/db_sqlite.rb +280 -0
  16. data/lib/db/db_table.rb +237 -0
  17. data/lib/db/db_view.rb +416 -0
  18. data/lib/db/db_wrapper.rb +175 -0
  19. data/lib/db/select_table_and_columns.rb +10 -0
  20. data/lib/db/sql_query.rb +243 -0
  21. data/lib/definitions.rb +19 -0
  22. data/lib/eval.rb +482 -0
  23. data/lib/ext/maxent/Classify.class +0 -0
  24. data/lib/ext/maxent/Train.class +0 -0
  25. data/lib/external_systems.rb +251 -0
  26. data/lib/framenet_format/fn_corpus_aset.rb +209 -0
  27. data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
  28. data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
  29. data/lib/framenet_format/fn_database.rb +143 -0
  30. data/lib/framenet_format/frame_xml_file.rb +104 -0
  31. data/lib/framenet_format/frame_xml_sentence.rb +411 -0
  32. data/lib/logging.rb +25 -0
  33. data/lib/ml/classifier.rb +189 -0
  34. data/lib/ml/mallet.rb +236 -0
  35. data/lib/ml/maxent.rb +229 -0
  36. data/lib/ml/optimize.rb +195 -0
  37. data/lib/ml/timbl.rb +140 -0
  38. data/lib/monkey_patching/array.rb +82 -0
  39. data/lib/monkey_patching/enumerable_bool.rb +24 -0
  40. data/lib/monkey_patching/enumerable_distribute.rb +18 -0
  41. data/lib/monkey_patching/file.rb +131 -0
  42. data/lib/monkey_patching/subsumed.rb +24 -0
  43. data/lib/ruby_class_extensions.rb +4 -0
  44. data/lib/salsa_tiger_xml/corpus.rb +24 -0
  45. data/lib/salsa_tiger_xml/fe_node.rb +98 -0
  46. data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
  47. data/lib/salsa_tiger_xml/frame_node.rb +145 -0
  48. data/lib/salsa_tiger_xml/graph_node.rb +347 -0
  49. data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
  50. data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
  51. data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
  52. data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
  53. data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
  54. data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
  55. data/lib/salsa_tiger_xml/sem_node.rb +58 -0
  56. data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
  57. data/lib/salsa_tiger_xml/syn_node.rb +169 -0
  58. data/lib/salsa_tiger_xml/tree_node.rb +59 -0
  59. data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
  60. data/lib/salsa_tiger_xml/usp_node.rb +72 -0
  61. data/lib/salsa_tiger_xml/xml_node.rb +163 -0
  62. data/lib/shalmaneser/lib.rb +1 -0
  63. data/lib/tabular_format/fn_tab_format_file.rb +38 -0
  64. data/lib/tabular_format/fn_tab_frame.rb +67 -0
  65. data/lib/tabular_format/fn_tab_sentence.rb +169 -0
  66. data/lib/tabular_format/tab_format_file.rb +91 -0
  67. data/lib/tabular_format/tab_format_named_args.rb +184 -0
  68. data/lib/tabular_format/tab_format_sentence.rb +119 -0
  69. data/lib/value_restriction.rb +49 -0
  70. metadata +131 -0
@@ -0,0 +1,333 @@
1
+ require_relative 'xml_node'
2
+ require_relative 'string_terminals_in_right_order'
3
+ require_relative 'reg_xml'
4
+
5
+ module STXML
6
+ #############
7
+ class SalsaTigerSentenceGraph < XMLNode
8
+ include StringTerminalsInRightOrder
9
+
10
+ attr_reader :node
11
+
12
+ def initialize(xml_obj, # RegXML object
13
+ sentence_id) # string: ID of this sentence
14
+
15
+ # global data:
16
+ # node: hash node_id -> XMLNode object
17
+ # maps node IDs to the nodes with that ID
18
+ @node = {}
19
+ @sentence_id = sentence_id
20
+
21
+ if xml_obj
22
+ # we actually have syntactic information.
23
+ # read it.
24
+
25
+ # initialize this object as an XML node,
26
+ # i.e. remember the outermost element's name, attributes,
27
+ # and ID, and specify that it's not a text but an XML object
28
+ super(xml_obj.name, xml_obj.attributes, sentence_id + "_graph", false)
29
+
30
+ # initialize nodes, remember their IDs
31
+ xml_obj.children_and_text.each { |child_or_text|
32
+
33
+ case child_or_text.name
34
+ when "terminals"
35
+ make_nodes(child_or_text, "t", "s/graph/terminals", "all_children_kith")
36
+ when "nonterminals"
37
+ make_nodes(child_or_text, "nt", "s/graph/nonterminals")
38
+ else
39
+ # additional info that we don't need for now
40
+ # keep for output
41
+ add_kith(child_or_text)
42
+ end
43
+ }
44
+
45
+
46
+
47
+ # add edges between nodes
48
+ nonterminals = xml_obj.children_and_text.detect { |child| child.name == "nonterminals" }
49
+ if nonterminals
50
+ nonterminals.children_and_text.each { |nt|
51
+
52
+ unless nt.name == "nt"
53
+ # we've already done the warning bit in make_nodes
54
+ next
55
+ end
56
+
57
+ syn_add_children(@node[SalsaTigerXmlNode.xmlel_id(nt)], nt)
58
+ }
59
+ end
60
+
61
+ else
62
+ # we have no syntactic information
63
+ # record it anyway
64
+
65
+ super("graph", {}, sentence_id + "_graph", false)
66
+ end
67
+ end
68
+
69
+
70
+ ###
71
+ def add_splitwords(xml_obj) #RegXMl object
72
+ unless xml_obj.nil?
73
+ # splitwords is an XML element with name "splitwords" and
74
+ # children named "splitword", each of which describes a split
75
+ # for one of the terminals we already know
76
+ xml_obj.children_and_text.each { |splitword|
77
+ unless splitword.name == "splitword"
78
+ warn_child_ignored("s/sem/splitwords/", splitword)
79
+ next
80
+ end
81
+
82
+ # make nodes for the splitword parts
83
+ make_nodes(splitword, "part", "s/sem/splitwords/splitword", "all_children_kith")
84
+
85
+ # this is the terminal that is being split:
86
+ # add links to its new children
87
+ syn_add_children(@node[SalsaTigerXmlNode.xmlel_id(splitword)], splitword)
88
+ }
89
+ end
90
+ end
91
+
92
+ ###
93
+ def to_s
94
+ string_for_nodes(syn_roots)
95
+ end
96
+
97
+ ###
98
+ def get
99
+ # make sure that the graph element has a 'root' attribute
100
+ # since the Salsa tool needs this
101
+ set_attribute("root", syn_roots.first.id)
102
+ super()
103
+ end
104
+
105
+ #####
106
+ # access methods
107
+
108
+ ###
109
+ def each_node
110
+ @node.each_value { |n|
111
+ yield n
112
+ }
113
+ end
114
+
115
+ ###
116
+ def nodes
117
+ return @node.values
118
+ end
119
+
120
+ ###
121
+ def each_terminal
122
+ @node.each_value { |node|
123
+ if node.is_terminal?
124
+ yield node
125
+ end
126
+ }
127
+ end
128
+
129
+ ###
130
+ def each_terminal_sorted
131
+ sort_terminals_and_splitwords_left_to_right(terminals).each { |node_obj|
132
+ yield node_obj
133
+ }
134
+ end
135
+
136
+ ###
137
+ def terminals
138
+ return @node.values.select { |node| node.is_terminal? }
139
+ end
140
+
141
+ ###
142
+ def terminals_sorted
143
+ return sort_terminals_and_splitwords_left_to_right(terminals)
144
+ end
145
+
146
+ ###
147
+ def each_nonterminal
148
+ @node.each_value { |node|
149
+ if node.is_nonterminal?
150
+ yield node
151
+ end
152
+ }
153
+ end
154
+
155
+ ###
156
+ def nonterminals
157
+ return @node.values.select { |node| node.is_nonterminal? }
158
+ end
159
+
160
+ ###
161
+ def syn_roots
162
+ return @node.values.select { |node|
163
+ node.parent.nil?
164
+ }
165
+ end
166
+ ###
167
+
168
+ ######################3
169
+ # adding nodes
170
+
171
+ ###
172
+ def add_child(arg1, arg2, varhash={})
173
+ raise "Not implemented for this class"
174
+ end
175
+
176
+ ###
177
+ def remove_child(arg1, arg2, varhash={})
178
+ raise "Not implemented for this class"
179
+ end
180
+
181
+ ###
182
+ def add_node(sentid, # string: sentence ID
183
+ label, # string: t or nt
184
+ cat = nil, # string: category
185
+ word = nil,# string: word
186
+ pos = nil, # string: part of speech
187
+ syn_id = nil) # string: ID for the new node
188
+
189
+ unless ["t", "nt"].include? label
190
+ raise "Unknown node label #{label} for new syntactic node. Must be either t or nt."
191
+ end
192
+
193
+ # make node ID: sentence ID plus ID generated by system time
194
+ if syn_id
195
+ new_id = sentid + "_" + syn_id
196
+ else
197
+ new_id = sentid + "_" + Time.new.to_f.to_s
198
+ end
199
+
200
+ elt = "<#{label}"
201
+ [["id", new_id], ["cat", cat], ["word", word], ["pos", pos]].each { |lbl, content|
202
+ if content
203
+ elt << " #{lbl}=\"#{xml_secure_val(content)}\""
204
+ end
205
+ }
206
+ elt << "/>"
207
+ n = SynNode.new(RegXML.new(elt))
208
+ @node[n.id] = n
209
+
210
+ return n
211
+ end
212
+
213
+ ###
214
+ def remove_node(node) # SynNode
215
+ # remove node from list
216
+ @node.delete(node.id)
217
+
218
+ # remove it as child and parent of other nodes;
219
+ # add its own children to the parent.
220
+ # the _edgelabel_ of the new edges will be the edgeslabels
221
+ # between the original node in its children
222
+ # in other words, the label of the removed node's incoming edge
223
+ # is deleted
224
+
225
+ pair = node.parent_with_edgelabel
226
+ if pair
227
+ # delete incoming edge for deleted node
228
+ label, parent = pair
229
+ parent.remove_child(node, label)
230
+ end
231
+ # delete outgoing edge for deleted node
232
+ node.each_child_with_edgelabel { |lbl, child| child.remove_parent(node, lbl) }
233
+ # glue deleted node's children to its parent
234
+ if pair
235
+ _plabel, parent = pair
236
+ node.each_child_with_edgelabel { |clabel, child| parent.add_child(child, clabel) }
237
+ end
238
+ end
239
+
240
+ ######################
241
+ protected
242
+
243
+ ###
244
+ def get_xml_ofchildren
245
+ string = ""
246
+
247
+ string << "<terminals>\n"
248
+ each_terminal_sorted { |t|
249
+ string << t.get
250
+ }
251
+ string << "</terminals>\n"
252
+
253
+ string << "<nonterminals>\n"
254
+ each_nonterminal { |nt|
255
+ string << nt.get
256
+ }
257
+ string << "</nonterminals>\n"
258
+
259
+ return string
260
+
261
+ end
262
+
263
+ def make_nodes(xml_obj, # RegXML object
264
+ expected_obj_name, # string
265
+ where, # string
266
+ all_children_kith = nil) # object: if non-nil,
267
+ # keep all children of the new nodes
268
+ # as kith"
269
+
270
+ xml_obj.children_and_text.each { |elt|
271
+
272
+ if elt.name == expected_obj_name
273
+ # this is the kind of child we were expecting to see
274
+ n = SynNode.new(elt)
275
+ @node[n.id] = n
276
+
277
+ if all_children_kith
278
+ elt.children_and_text.each { |elt_child|
279
+ n.add_kith(elt_child)
280
+ }
281
+ end
282
+
283
+ else
284
+ warn_child_ignored(where, elt)
285
+ end
286
+ }
287
+ end
288
+
289
+ def syn_add_children(node,
290
+ xml_obj)
291
+ unless node
292
+ raise "Shouldn't be here"
293
+ end
294
+
295
+ xml_obj.children_and_text.each { |edge|
296
+
297
+ if ["edge", "part"].include? edge.name
298
+
299
+ # add an edge to this child,
300
+ # retrieve the node with the given ID from id_to_node
301
+ child = @node[SalsaTigerXmlNode.xmlel_id(edge)]
302
+ unless child
303
+ raise "Sentence #{@sentence_id}: I cannot find a node for " + edge.to_s
304
+ end
305
+
306
+ edgelabel = edge.attributes["label"]
307
+ node.add_child(child, edgelabel)
308
+
309
+ elsif edge.name == "other_edge"
310
+ # add link to this node,
311
+ # retrieve the node with the given ID from id_to_node
312
+ child = @node[SalsaTigerXmlNode.xmlel_id(edge)]
313
+ unless child
314
+ raise "Sentence #{@sentence_id}: I cannot find a node for other_edge #{SalsaTigerXmlNode.xmlel_id(edge)} : " + edge.to_s
315
+ end
316
+
317
+ attributes = edge.attributes
318
+ if attributes
319
+ edgelabel = attributes.delete("label")
320
+ else
321
+ edgelabel = nil
322
+ end
323
+ node.add_link(child, edgelabel, attributes)
324
+
325
+ else
326
+ # something other than an edge
327
+ # keep for output
328
+ node.add_kith(edge)
329
+ end
330
+ }
331
+ end
332
+ end
333
+ end
@@ -0,0 +1,438 @@
1
+ require_relative 'xml_node'
2
+ require_relative 'ts_syn_node'
3
+ require_relative 'salsa_tiger_xml_node'
4
+ require_relative 'usp_node'
5
+ require_relative 'frame_node'
6
+ require_relative 'fe_node'
7
+ require_relative 'reg_xml'
8
+
9
+ module STXML
10
+ #############
11
+ class SalsaTigerSentenceSem < XMLNode
12
+
13
+ attr_reader :node
14
+
15
+ ###
16
+ def SalsaTigerSentenceSem.get_splitwords(xml_obj)
17
+ return xml_obj.children_and_text.detect { |child|
18
+ child.name == "splitwords"
19
+ }
20
+ end
21
+
22
+ ###
23
+ def initialize(xml_obj, # RegXML object
24
+ sentence_id, # string: sentence ID
25
+ id_to_node) # hash: syn_node_id(string) -> SynNode object
26
+
27
+ # global data:
28
+ # node: hash node_id -> XMLNode object
29
+ # maps node IDs to the nodes with that ID
30
+ # frame_id, uspframe_id, uspfe_id: arrays of node IDs,
31
+ # listing all frame nodes, frame underspecification nodes,
32
+ # and FE underspecification nodes respectively
33
+ # globals: array of RegXML objects, each representing one sentence flag
34
+ @node = {}
35
+ @frame_id = []
36
+ @uspframe_id = []
37
+ @uspfe_id = []
38
+ @globals = []
39
+
40
+ if xml_obj
41
+ # we actually have semantic information.
42
+ # read it.
43
+
44
+ super(xml_obj.name, xml_obj.attributes, sentence_id + "_sem", false)
45
+
46
+ globals_obj = frames_obj = usp_obj = nil
47
+
48
+ xml_obj.children_and_text.each { |obj|
49
+ case obj.name
50
+ when "globals"
51
+ globals_obj = obj
52
+ when "frames"
53
+ frames_obj = obj
54
+ when "usp"
55
+ usp_obj = obj
56
+ else
57
+ add_kith(obj)
58
+ end
59
+ }
60
+
61
+ # handle globals
62
+ if globals_obj
63
+ globals_obj.children_and_text.each { |obj|
64
+ @globals << obj
65
+ }
66
+ end
67
+
68
+ # index frames
69
+ if frames_obj
70
+ frames_obj.children_and_text.each { |frame|
71
+ unless frame.name == "frame"
72
+ warn_child_ignored("s/sem/frames/", frame)
73
+ next
74
+ end
75
+
76
+ # make a node for the frame.
77
+ node = FrameNode.new(frame)
78
+ semnode_add_flags(node, frame)
79
+ @node[node.id] = node
80
+ @frame_id << node.id
81
+ # add FEs
82
+ frame_add_children(node, frame, id_to_node)
83
+ }
84
+ end
85
+
86
+ # index underspecification
87
+ if usp_obj
88
+ usp_obj.children_and_text.each { |uspframe_or_fe|
89
+ case uspframe_or_fe.name
90
+ when "uspframes"
91
+ initialize_usp(uspframe_or_fe, "frame")
92
+ when "uspfes"
93
+ initialize_usp(uspframe_or_fe, "fe")
94
+
95
+ else
96
+ warn_child_ignored("s/sem/usp/", uspframe_or_fe)
97
+ end
98
+ }
99
+ end
100
+
101
+ else
102
+ # we have no semantic information
103
+ # record it anyway
104
+
105
+ super("sem", {}, sentence_id + "_sem", false)
106
+ end
107
+ end
108
+
109
+ ################################################3
110
+ # access methods
111
+
112
+ ###
113
+ def each_frame
114
+ @frame_id.each { |node_id|
115
+ yield @node[node_id]
116
+ }
117
+ end
118
+
119
+ ###
120
+ def frames
121
+ return @frame_id.map { |node_id| @node[node_id] }
122
+ end
123
+
124
+ ###
125
+ def each_usp_frameblock
126
+ @uspframe_id.each { |node_id|
127
+ yield @node[node_id]
128
+ }
129
+ end
130
+
131
+ ###
132
+ def usp_frameblocks
133
+ return @uspframe_id.map { |node_id| @node[node_id] }
134
+ end
135
+
136
+ ###
137
+ def each_usp_feblock
138
+ @uspfe_id.each { |node_id|
139
+ yield @node[node_id]
140
+ }
141
+ end
142
+
143
+ ###
144
+ def usp_feblocks
145
+ return @uspfe_id.map { |node_id| @node[node_id] }
146
+ end
147
+
148
+ ###
149
+ def flags
150
+ return @globals.map { |xml_obj|
151
+ { "type" => xml_obj.attributes["type"],
152
+ "param" => xml_obj.attributes["param"],
153
+ "text" => xml_obj.children_and_text.map { |c| c.to_s }.join
154
+ }
155
+ }
156
+ end
157
+
158
+ ################################################3
159
+ # adding and removing things
160
+
161
+ ###
162
+ def add_frame(sentid, # string: sentence ID
163
+ name, # string: name of the frame
164
+ sem_id = nil) # string: ID for the new node
165
+
166
+ # make a node for the frame
167
+ if sem_id
168
+ frameid = sem_id
169
+ else
170
+ frameid = sentid + "_f" + Time.new.to_f.to_s
171
+ end
172
+ n = FrameNode.new(RegXML.new("<frame id=\"#{frameid}\" name=\"#{name}\"/>"))
173
+ @node[n.id] = n
174
+ @frame_id << n.id
175
+
176
+ return n
177
+ end
178
+
179
+ ###
180
+ def remove_frame(frame_node)
181
+ @node.delete(frame_node.id)
182
+ @frame_id.delete(frame_node.id)
183
+ end
184
+
185
+ ###
186
+ def add_fe(frame_node, # FrameNode
187
+ fe_name, # string: name of new FE
188
+ fe_children, # array:SynNode, children of new FE
189
+ sem_id = nil) # optional: ID of new FE
190
+
191
+
192
+ new_fe = frame_node.add_fe(fe_name, fe_children, sem_id)
193
+ @node[new_fe.id] = new_fe
194
+ return new_fe
195
+ end
196
+
197
+ ###
198
+ def remove_fe(fe_node)
199
+ @node.delete(fe_node.id)
200
+ fe_node.parent.remove_child(fe_node)
201
+ end
202
+
203
+ ###
204
+ def add_usp(frame_or_fe) # string: "frame" or "fe"
205
+
206
+ n = UspNode.new(RegXML.new("<uspblock/>"), frame_or_fe)
207
+ @node[n.id] = n
208
+ case frame_or_fe
209
+ when "frame"
210
+ @uspframe_id << n.id
211
+ when "fe"
212
+ @uspfe_id << n.id
213
+ else
214
+ raise "Shouldn't be here"
215
+ end
216
+
217
+ return n
218
+ end
219
+
220
+ ###
221
+ def remove_usp(usp_node)
222
+ usp_node.children.each { |child|
223
+ usp_node.remove_child(child)
224
+ }
225
+ @node.delete(usp_node.id)
226
+ case usp_node.i_am
227
+ when "frame"
228
+ @uspframe_id.delete(usp_node.id)
229
+ when "fe"
230
+ @uspfe_id.delete(usp_node.id)
231
+ else
232
+ raise "Shouldn't be here"
233
+ end
234
+ end
235
+
236
+
237
+ ###
238
+ def add_child(arg1, arg2)
239
+ raise "Not implemented for this class"
240
+ end
241
+
242
+ ###
243
+ def remove_child(arg1, arg2)
244
+ raise "Not implemented for this class"
245
+ end
246
+
247
+ ###
248
+ def add_flag(type, param=nil, text=nil)
249
+ # unless ["REEXAMINE", "WRONGSUBCORPUS", "INTERESTING", "LATER"].include? type
250
+ # raise "add_flag: unknown type "+type
251
+ # end
252
+
253
+ newglob = "<global type=\'#{xml_secure_val(type)}\'"
254
+ if param
255
+ newglob << " param=\'#{xml_secure_val(param)}\'"
256
+ end
257
+ if text
258
+ newglob << "> #{text} </global>"
259
+ else
260
+ newglob << "/>"
261
+ end
262
+
263
+ newglob = RegXML.new(newglob)
264
+ @globals << newglob
265
+ return newglob
266
+ end
267
+
268
+ ###
269
+ def remove_flag(type, param=nil, text=nil)
270
+
271
+ remove_ix = nil
272
+ @globals.each_with_index { |glob,ix|
273
+ if glob.attributes("type") == type
274
+ if param.nil? or glob.attributes("param") == param
275
+ if text.nil? or glob.children_and_text.map { |c| c.to_s }.join == text
276
+ # found it
277
+ remove_ix = ix
278
+ break
279
+ end
280
+ end
281
+ end
282
+ }
283
+
284
+ if remove_ix
285
+ return @globals.delete_at(remove_ix)
286
+ else
287
+ return nil
288
+ end
289
+ end
290
+
291
+ ############################3
292
+ protected
293
+
294
+ def get_xml_ofchildren
295
+ string = ""
296
+
297
+ # globals
298
+ string << "<globals>\n"
299
+ @globals.each { |glob|
300
+ string << glob.to_s + "\n"
301
+ }
302
+ string << "</globals>\n"
303
+
304
+ # frames
305
+ string << "<frames>\n"
306
+ each_frame { |frame_node|
307
+ string << frame_node.get
308
+ }
309
+ string << "</frames>\n"
310
+
311
+ # underspecification
312
+ string << "<usp>\n"
313
+ string << "<uspframes>\n"
314
+ each_usp_frameblock { |block|
315
+ string << block.get
316
+ }
317
+ string << "</uspframes>\n"
318
+ string << "<uspfes>\n"
319
+ each_usp_feblock { |block|
320
+ string << block.get
321
+ }
322
+ string << "</uspfes>\n"
323
+ string << "</usp>\n"
324
+
325
+ return string
326
+ end
327
+
328
+ ###
329
+ def semnode_add_flags(sem_node, # SemNode object
330
+ xml_obj) # RegXML object
331
+
332
+ xml_obj.children_and_text.each { |child|
333
+ if child.name == "flag"
334
+ # found a flag, record it
335
+ name = child.attributes["name"]
336
+ if name
337
+ sem_node.add_flag(name)
338
+ else
339
+ $stderr.puts "Warning: flag without a name"
340
+ end
341
+ end
342
+ }
343
+ end
344
+
345
+ def frame_add_children(frame_node, # FrameNode object
346
+ xml_obj, # RegXML object
347
+ id_to_node) # hash: syn_node_id(string) -> SynNode object
348
+
349
+ xml_obj.children_and_text.each { |fe|
350
+ case fe.name
351
+ when "fe", "target"
352
+ # $stderr.puts "Da: #{fe.name}\n#{fe.to_s}"
353
+
354
+ # make a node for this,
355
+ # and add it as child of this frame node.
356
+ fe_node = FeNode.new(fe)
357
+ @node[fe_node.id] = fe_node
358
+ frame_node.add_child(fe_node)
359
+
360
+ semnode_add_flags(fe_node, fe)
361
+
362
+ # add the FE's children
363
+ fe.children_and_text.each { |fechild|
364
+ case fechild.name
365
+ when "fenode"
366
+
367
+ syn_node = id_to_node[SalsaTigerXmlNode.xmlel_id(fechild)]
368
+ if syn_node
369
+ # normal syntactic node, which the id_to_node mapping knows
370
+ fe_node.add_child(syn_node, fechild)
371
+ syn_node.add_sem(fe_node)
372
+
373
+ else
374
+ # must be a node in a different sentence
375
+ # make a dummy graph node for it
376
+ fe_node.add_child(TSSynNode.new(SalsaTigerXmlNode.xmlel_id(fechild)), fechild)
377
+ end
378
+
379
+ when "flag"
380
+ # nothing to do, we've handled that already
381
+ else
382
+ fe_node.add_kith(fechild)
383
+ end
384
+ }
385
+
386
+ when "flag"
387
+ # nothing to do, wee handled that already
388
+
389
+ else
390
+ # keep for output
391
+ frame_node.add_kith(fe)
392
+ end
393
+ }
394
+ end
395
+
396
+ ###
397
+ def initialize_usp(xml_obj, # RegXML object
398
+ frame_or_fe) # string: "frame" or "fe"
399
+
400
+ xml_obj.children_and_text.each { |uspblock|
401
+ unless uspblock.name == "uspblock"
402
+ warn_child_ignored("s/sem/usp/uspframe|uspfe", uspblock)
403
+ next
404
+ end
405
+
406
+ # node for this underspecified block
407
+ n = UspNode.new(uspblock, frame_or_fe)
408
+ @node[n.id] = n
409
+
410
+ case frame_or_fe
411
+ when "frame"
412
+ @uspframe_id << n.id
413
+ when "fe"
414
+ @uspfe_id << n.id
415
+ else
416
+ raise "Shouldn't be here"
417
+ end
418
+
419
+ # add its children
420
+ uspblock.children_and_text.each { |uspitem|
421
+ unless uspitem.name == "uspitem"
422
+ warn_child_ignored("s/sem/usp/uspframe|uspfe/uspblock", uspitem)
423
+ next
424
+ end
425
+
426
+ usp_id = SalsaTigerXmlNode.xmlel_id(uspitem)
427
+ usp_id = usp_id.gsub(/.*_s/, "s")
428
+
429
+ unless @node[usp_id]
430
+ $stderr.puts "Error: Underspecification: could not find node with ID #{usp_id}. Skipping."
431
+ next
432
+ end
433
+ n.add_child(@node[usp_id])
434
+ }
435
+ }
436
+ end
437
+ end
438
+ end