shalmaneser-lib 1.2.rc5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +10 -0
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +122 -0
  6. data/lib/configuration/config_data.rb +457 -0
  7. data/lib/configuration/config_format_element.rb +210 -0
  8. data/lib/configuration/configuration_error.rb +15 -0
  9. data/lib/configuration/external_config_data.rb +56 -0
  10. data/lib/configuration/frappe_config_data.rb +134 -0
  11. data/lib/configuration/fred_config_data.rb +199 -0
  12. data/lib/configuration/rosy_config_data.rb +126 -0
  13. data/lib/db/db_interface.rb +50 -0
  14. data/lib/db/db_mysql.rb +141 -0
  15. data/lib/db/db_sqlite.rb +280 -0
  16. data/lib/db/db_table.rb +237 -0
  17. data/lib/db/db_view.rb +416 -0
  18. data/lib/db/db_wrapper.rb +175 -0
  19. data/lib/db/select_table_and_columns.rb +10 -0
  20. data/lib/db/sql_query.rb +243 -0
  21. data/lib/definitions.rb +19 -0
  22. data/lib/eval.rb +482 -0
  23. data/lib/ext/maxent/Classify.class +0 -0
  24. data/lib/ext/maxent/Train.class +0 -0
  25. data/lib/external_systems.rb +251 -0
  26. data/lib/framenet_format/fn_corpus_aset.rb +209 -0
  27. data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
  28. data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
  29. data/lib/framenet_format/fn_database.rb +143 -0
  30. data/lib/framenet_format/frame_xml_file.rb +104 -0
  31. data/lib/framenet_format/frame_xml_sentence.rb +411 -0
  32. data/lib/logging.rb +25 -0
  33. data/lib/ml/classifier.rb +189 -0
  34. data/lib/ml/mallet.rb +236 -0
  35. data/lib/ml/maxent.rb +229 -0
  36. data/lib/ml/optimize.rb +195 -0
  37. data/lib/ml/timbl.rb +140 -0
  38. data/lib/monkey_patching/array.rb +82 -0
  39. data/lib/monkey_patching/enumerable_bool.rb +24 -0
  40. data/lib/monkey_patching/enumerable_distribute.rb +18 -0
  41. data/lib/monkey_patching/file.rb +131 -0
  42. data/lib/monkey_patching/subsumed.rb +24 -0
  43. data/lib/ruby_class_extensions.rb +4 -0
  44. data/lib/salsa_tiger_xml/corpus.rb +24 -0
  45. data/lib/salsa_tiger_xml/fe_node.rb +98 -0
  46. data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
  47. data/lib/salsa_tiger_xml/frame_node.rb +145 -0
  48. data/lib/salsa_tiger_xml/graph_node.rb +347 -0
  49. data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
  50. data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
  51. data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
  52. data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
  53. data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
  54. data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
  55. data/lib/salsa_tiger_xml/sem_node.rb +58 -0
  56. data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
  57. data/lib/salsa_tiger_xml/syn_node.rb +169 -0
  58. data/lib/salsa_tiger_xml/tree_node.rb +59 -0
  59. data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
  60. data/lib/salsa_tiger_xml/usp_node.rb +72 -0
  61. data/lib/salsa_tiger_xml/xml_node.rb +163 -0
  62. data/lib/shalmaneser/lib.rb +1 -0
  63. data/lib/tabular_format/fn_tab_format_file.rb +38 -0
  64. data/lib/tabular_format/fn_tab_frame.rb +67 -0
  65. data/lib/tabular_format/fn_tab_sentence.rb +169 -0
  66. data/lib/tabular_format/tab_format_file.rb +91 -0
  67. data/lib/tabular_format/tab_format_named_args.rb +184 -0
  68. data/lib/tabular_format/tab_format_sentence.rb +119 -0
  69. data/lib/value_restriction.rb +49 -0
  70. metadata +131 -0
@@ -0,0 +1,333 @@
1
+ require_relative 'xml_node'
2
+ require_relative 'string_terminals_in_right_order'
3
+ require_relative 'reg_xml'
4
+
5
+ module STXML
6
+ #############
7
+ class SalsaTigerSentenceGraph < XMLNode
8
+ include StringTerminalsInRightOrder
9
+
10
+ attr_reader :node
11
+
12
+ def initialize(xml_obj, # RegXML object
13
+ sentence_id) # string: ID of this sentence
14
+
15
+ # global data:
16
+ # node: hash node_id -> XMLNode object
17
+ # maps node IDs to the nodes with that ID
18
+ @node = {}
19
+ @sentence_id = sentence_id
20
+
21
+ if xml_obj
22
+ # we actually have syntactic information.
23
+ # read it.
24
+
25
+ # initialize this object as an XML node,
26
+ # i.e. remember the outermost element's name, attributes,
27
+ # and ID, and specify that it's not a text but an XML object
28
+ super(xml_obj.name, xml_obj.attributes, sentence_id + "_graph", false)
29
+
30
+ # initialize nodes, remember their IDs
31
+ xml_obj.children_and_text.each { |child_or_text|
32
+
33
+ case child_or_text.name
34
+ when "terminals"
35
+ make_nodes(child_or_text, "t", "s/graph/terminals", "all_children_kith")
36
+ when "nonterminals"
37
+ make_nodes(child_or_text, "nt", "s/graph/nonterminals")
38
+ else
39
+ # additional info that we don't need for now
40
+ # keep for output
41
+ add_kith(child_or_text)
42
+ end
43
+ }
44
+
45
+
46
+
47
+ # add edges between nodes
48
+ nonterminals = xml_obj.children_and_text.detect { |child| child.name == "nonterminals" }
49
+ if nonterminals
50
+ nonterminals.children_and_text.each { |nt|
51
+
52
+ unless nt.name == "nt"
53
+ # we've already done the warning bit in make_nodes
54
+ next
55
+ end
56
+
57
+ syn_add_children(@node[SalsaTigerXmlNode.xmlel_id(nt)], nt)
58
+ }
59
+ end
60
+
61
+ else
62
+ # we have no syntactic information
63
+ # record it anyway
64
+
65
+ super("graph", {}, sentence_id + "_graph", false)
66
+ end
67
+ end
68
+
69
+
70
+ ###
71
+ def add_splitwords(xml_obj) #RegXMl object
72
+ unless xml_obj.nil?
73
+ # splitwords is an XML element with name "splitwords" and
74
+ # children named "splitword", each of which describes a split
75
+ # for one of the terminals we already know
76
+ xml_obj.children_and_text.each { |splitword|
77
+ unless splitword.name == "splitword"
78
+ warn_child_ignored("s/sem/splitwords/", splitword)
79
+ next
80
+ end
81
+
82
+ # make nodes for the splitword parts
83
+ make_nodes(splitword, "part", "s/sem/splitwords/splitword", "all_children_kith")
84
+
85
+ # this is the terminal that is being split:
86
+ # add links to its new children
87
+ syn_add_children(@node[SalsaTigerXmlNode.xmlel_id(splitword)], splitword)
88
+ }
89
+ end
90
+ end
91
+
92
+ ###
93
+ def to_s
94
+ string_for_nodes(syn_roots)
95
+ end
96
+
97
+ ###
98
+ def get
99
+ # make sure that the graph element has a 'root' attribute
100
+ # since the Salsa tool needs this
101
+ set_attribute("root", syn_roots.first.id)
102
+ super()
103
+ end
104
+
105
+ #####
106
+ # access methods
107
+
108
+ ###
109
+ def each_node
110
+ @node.each_value { |n|
111
+ yield n
112
+ }
113
+ end
114
+
115
+ ###
116
+ def nodes
117
+ return @node.values
118
+ end
119
+
120
+ ###
121
+ def each_terminal
122
+ @node.each_value { |node|
123
+ if node.is_terminal?
124
+ yield node
125
+ end
126
+ }
127
+ end
128
+
129
+ ###
130
+ def each_terminal_sorted
131
+ sort_terminals_and_splitwords_left_to_right(terminals).each { |node_obj|
132
+ yield node_obj
133
+ }
134
+ end
135
+
136
+ ###
137
+ def terminals
138
+ return @node.values.select { |node| node.is_terminal? }
139
+ end
140
+
141
+ ###
142
+ def terminals_sorted
143
+ return sort_terminals_and_splitwords_left_to_right(terminals)
144
+ end
145
+
146
+ ###
147
+ def each_nonterminal
148
+ @node.each_value { |node|
149
+ if node.is_nonterminal?
150
+ yield node
151
+ end
152
+ }
153
+ end
154
+
155
+ ###
156
+ def nonterminals
157
+ return @node.values.select { |node| node.is_nonterminal? }
158
+ end
159
+
160
+ ###
161
+ def syn_roots
162
+ return @node.values.select { |node|
163
+ node.parent.nil?
164
+ }
165
+ end
166
+ ###
167
+
168
+ ######################3
169
+ # adding nodes
170
+
171
+ ###
172
+ def add_child(arg1, arg2, varhash={})
173
+ raise "Not implemented for this class"
174
+ end
175
+
176
+ ###
177
+ def remove_child(arg1, arg2, varhash={})
178
+ raise "Not implemented for this class"
179
+ end
180
+
181
+ ###
182
+ def add_node(sentid, # string: sentence ID
183
+ label, # string: t or nt
184
+ cat = nil, # string: category
185
+ word = nil,# string: word
186
+ pos = nil, # string: part of speech
187
+ syn_id = nil) # string: ID for the new node
188
+
189
+ unless ["t", "nt"].include? label
190
+ raise "Unknown node label #{label} for new syntactic node. Must be either t or nt."
191
+ end
192
+
193
+ # make node ID: sentence ID plus ID generated by system time
194
+ if syn_id
195
+ new_id = sentid + "_" + syn_id
196
+ else
197
+ new_id = sentid + "_" + Time.new.to_f.to_s
198
+ end
199
+
200
+ elt = "<#{label}"
201
+ [["id", new_id], ["cat", cat], ["word", word], ["pos", pos]].each { |lbl, content|
202
+ if content
203
+ elt << " #{lbl}=\"#{xml_secure_val(content)}\""
204
+ end
205
+ }
206
+ elt << "/>"
207
+ n = SynNode.new(RegXML.new(elt))
208
+ @node[n.id] = n
209
+
210
+ return n
211
+ end
212
+
213
+ ###
214
+ def remove_node(node) # SynNode
215
+ # remove node from list
216
+ @node.delete(node.id)
217
+
218
+ # remove it as child and parent of other nodes;
219
+ # add its own children to the parent.
220
+ # the _edgelabel_ of the new edges will be the edgeslabels
221
+ # between the original node in its children
222
+ # in other words, the label of the removed node's incoming edge
223
+ # is deleted
224
+
225
+ pair = node.parent_with_edgelabel
226
+ if pair
227
+ # delete incoming edge for deleted node
228
+ label, parent = pair
229
+ parent.remove_child(node, label)
230
+ end
231
+ # delete outgoing edge for deleted node
232
+ node.each_child_with_edgelabel { |lbl, child| child.remove_parent(node, lbl) }
233
+ # glue deleted node's children to its parent
234
+ if pair
235
+ _plabel, parent = pair
236
+ node.each_child_with_edgelabel { |clabel, child| parent.add_child(child, clabel) }
237
+ end
238
+ end
239
+
240
+ ######################
241
+ protected
242
+
243
+ ###
244
+ def get_xml_ofchildren
245
+ string = ""
246
+
247
+ string << "<terminals>\n"
248
+ each_terminal_sorted { |t|
249
+ string << t.get
250
+ }
251
+ string << "</terminals>\n"
252
+
253
+ string << "<nonterminals>\n"
254
+ each_nonterminal { |nt|
255
+ string << nt.get
256
+ }
257
+ string << "</nonterminals>\n"
258
+
259
+ return string
260
+
261
+ end
262
+
263
+ def make_nodes(xml_obj, # RegXML object
264
+ expected_obj_name, # string
265
+ where, # string
266
+ all_children_kith = nil) # object: if non-nil,
267
+ # keep all children of the new nodes
268
+ # as kith"
269
+
270
+ xml_obj.children_and_text.each { |elt|
271
+
272
+ if elt.name == expected_obj_name
273
+ # this is the kind of child we were expecting to see
274
+ n = SynNode.new(elt)
275
+ @node[n.id] = n
276
+
277
+ if all_children_kith
278
+ elt.children_and_text.each { |elt_child|
279
+ n.add_kith(elt_child)
280
+ }
281
+ end
282
+
283
+ else
284
+ warn_child_ignored(where, elt)
285
+ end
286
+ }
287
+ end
288
+
289
+ def syn_add_children(node,
290
+ xml_obj)
291
+ unless node
292
+ raise "Shouldn't be here"
293
+ end
294
+
295
+ xml_obj.children_and_text.each { |edge|
296
+
297
+ if ["edge", "part"].include? edge.name
298
+
299
+ # add an edge to this child,
300
+ # retrieve the node with the given ID from id_to_node
301
+ child = @node[SalsaTigerXmlNode.xmlel_id(edge)]
302
+ unless child
303
+ raise "Sentence #{@sentence_id}: I cannot find a node for " + edge.to_s
304
+ end
305
+
306
+ edgelabel = edge.attributes["label"]
307
+ node.add_child(child, edgelabel)
308
+
309
+ elsif edge.name == "other_edge"
310
+ # add link to this node,
311
+ # retrieve the node with the given ID from id_to_node
312
+ child = @node[SalsaTigerXmlNode.xmlel_id(edge)]
313
+ unless child
314
+ raise "Sentence #{@sentence_id}: I cannot find a node for other_edge #{SalsaTigerXmlNode.xmlel_id(edge)} : " + edge.to_s
315
+ end
316
+
317
+ attributes = edge.attributes
318
+ if attributes
319
+ edgelabel = attributes.delete("label")
320
+ else
321
+ edgelabel = nil
322
+ end
323
+ node.add_link(child, edgelabel, attributes)
324
+
325
+ else
326
+ # something other than an edge
327
+ # keep for output
328
+ node.add_kith(edge)
329
+ end
330
+ }
331
+ end
332
+ end
333
+ end
@@ -0,0 +1,438 @@
1
+ require_relative 'xml_node'
2
+ require_relative 'ts_syn_node'
3
+ require_relative 'salsa_tiger_xml_node'
4
+ require_relative 'usp_node'
5
+ require_relative 'frame_node'
6
+ require_relative 'fe_node'
7
+ require_relative 'reg_xml'
8
+
9
+ module STXML
10
+ #############
11
+ class SalsaTigerSentenceSem < XMLNode
12
+
13
+ attr_reader :node
14
+
15
+ ###
16
+ def SalsaTigerSentenceSem.get_splitwords(xml_obj)
17
+ return xml_obj.children_and_text.detect { |child|
18
+ child.name == "splitwords"
19
+ }
20
+ end
21
+
22
+ ###
23
+ def initialize(xml_obj, # RegXML object
24
+ sentence_id, # string: sentence ID
25
+ id_to_node) # hash: syn_node_id(string) -> SynNode object
26
+
27
+ # global data:
28
+ # node: hash node_id -> XMLNode object
29
+ # maps node IDs to the nodes with that ID
30
+ # frame_id, uspframe_id, uspfe_id: arrays of node IDs,
31
+ # listing all frame nodes, frame underspecification nodes,
32
+ # and FE underspecification nodes respectively
33
+ # globals: array of RegXML objects, each representing one sentence flag
34
+ @node = {}
35
+ @frame_id = []
36
+ @uspframe_id = []
37
+ @uspfe_id = []
38
+ @globals = []
39
+
40
+ if xml_obj
41
+ # we actually have semantic information.
42
+ # read it.
43
+
44
+ super(xml_obj.name, xml_obj.attributes, sentence_id + "_sem", false)
45
+
46
+ globals_obj = frames_obj = usp_obj = nil
47
+
48
+ xml_obj.children_and_text.each { |obj|
49
+ case obj.name
50
+ when "globals"
51
+ globals_obj = obj
52
+ when "frames"
53
+ frames_obj = obj
54
+ when "usp"
55
+ usp_obj = obj
56
+ else
57
+ add_kith(obj)
58
+ end
59
+ }
60
+
61
+ # handle globals
62
+ if globals_obj
63
+ globals_obj.children_and_text.each { |obj|
64
+ @globals << obj
65
+ }
66
+ end
67
+
68
+ # index frames
69
+ if frames_obj
70
+ frames_obj.children_and_text.each { |frame|
71
+ unless frame.name == "frame"
72
+ warn_child_ignored("s/sem/frames/", frame)
73
+ next
74
+ end
75
+
76
+ # make a node for the frame.
77
+ node = FrameNode.new(frame)
78
+ semnode_add_flags(node, frame)
79
+ @node[node.id] = node
80
+ @frame_id << node.id
81
+ # add FEs
82
+ frame_add_children(node, frame, id_to_node)
83
+ }
84
+ end
85
+
86
+ # index underspecification
87
+ if usp_obj
88
+ usp_obj.children_and_text.each { |uspframe_or_fe|
89
+ case uspframe_or_fe.name
90
+ when "uspframes"
91
+ initialize_usp(uspframe_or_fe, "frame")
92
+ when "uspfes"
93
+ initialize_usp(uspframe_or_fe, "fe")
94
+
95
+ else
96
+ warn_child_ignored("s/sem/usp/", uspframe_or_fe)
97
+ end
98
+ }
99
+ end
100
+
101
+ else
102
+ # we have no semantic information
103
+ # record it anyway
104
+
105
+ super("sem", {}, sentence_id + "_sem", false)
106
+ end
107
+ end
108
+
109
+ ################################################3
110
+ # access methods
111
+
112
+ ###
113
+ def each_frame
114
+ @frame_id.each { |node_id|
115
+ yield @node[node_id]
116
+ }
117
+ end
118
+
119
+ ###
120
+ def frames
121
+ return @frame_id.map { |node_id| @node[node_id] }
122
+ end
123
+
124
+ ###
125
+ def each_usp_frameblock
126
+ @uspframe_id.each { |node_id|
127
+ yield @node[node_id]
128
+ }
129
+ end
130
+
131
+ ###
132
+ def usp_frameblocks
133
+ return @uspframe_id.map { |node_id| @node[node_id] }
134
+ end
135
+
136
+ ###
137
+ def each_usp_feblock
138
+ @uspfe_id.each { |node_id|
139
+ yield @node[node_id]
140
+ }
141
+ end
142
+
143
+ ###
144
+ def usp_feblocks
145
+ return @uspfe_id.map { |node_id| @node[node_id] }
146
+ end
147
+
148
+ ###
149
+ def flags
150
+ return @globals.map { |xml_obj|
151
+ { "type" => xml_obj.attributes["type"],
152
+ "param" => xml_obj.attributes["param"],
153
+ "text" => xml_obj.children_and_text.map { |c| c.to_s }.join
154
+ }
155
+ }
156
+ end
157
+
158
+ ################################################3
159
+ # adding and removing things
160
+
161
+ ###
162
+ def add_frame(sentid, # string: sentence ID
163
+ name, # string: name of the frame
164
+ sem_id = nil) # string: ID for the new node
165
+
166
+ # make a node for the frame
167
+ if sem_id
168
+ frameid = sem_id
169
+ else
170
+ frameid = sentid + "_f" + Time.new.to_f.to_s
171
+ end
172
+ n = FrameNode.new(RegXML.new("<frame id=\"#{frameid}\" name=\"#{name}\"/>"))
173
+ @node[n.id] = n
174
+ @frame_id << n.id
175
+
176
+ return n
177
+ end
178
+
179
+ ###
180
+ def remove_frame(frame_node)
181
+ @node.delete(frame_node.id)
182
+ @frame_id.delete(frame_node.id)
183
+ end
184
+
185
+ ###
186
+ def add_fe(frame_node, # FrameNode
187
+ fe_name, # string: name of new FE
188
+ fe_children, # array:SynNode, children of new FE
189
+ sem_id = nil) # optional: ID of new FE
190
+
191
+
192
+ new_fe = frame_node.add_fe(fe_name, fe_children, sem_id)
193
+ @node[new_fe.id] = new_fe
194
+ return new_fe
195
+ end
196
+
197
+ ###
198
+ def remove_fe(fe_node)
199
+ @node.delete(fe_node.id)
200
+ fe_node.parent.remove_child(fe_node)
201
+ end
202
+
203
+ ###
204
+ def add_usp(frame_or_fe) # string: "frame" or "fe"
205
+
206
+ n = UspNode.new(RegXML.new("<uspblock/>"), frame_or_fe)
207
+ @node[n.id] = n
208
+ case frame_or_fe
209
+ when "frame"
210
+ @uspframe_id << n.id
211
+ when "fe"
212
+ @uspfe_id << n.id
213
+ else
214
+ raise "Shouldn't be here"
215
+ end
216
+
217
+ return n
218
+ end
219
+
220
+ ###
221
+ def remove_usp(usp_node)
222
+ usp_node.children.each { |child|
223
+ usp_node.remove_child(child)
224
+ }
225
+ @node.delete(usp_node.id)
226
+ case usp_node.i_am
227
+ when "frame"
228
+ @uspframe_id.delete(usp_node.id)
229
+ when "fe"
230
+ @uspfe_id.delete(usp_node.id)
231
+ else
232
+ raise "Shouldn't be here"
233
+ end
234
+ end
235
+
236
+
237
+ ###
238
+ def add_child(arg1, arg2)
239
+ raise "Not implemented for this class"
240
+ end
241
+
242
+ ###
243
+ def remove_child(arg1, arg2)
244
+ raise "Not implemented for this class"
245
+ end
246
+
247
+ ###
248
+ def add_flag(type, param=nil, text=nil)
249
+ # unless ["REEXAMINE", "WRONGSUBCORPUS", "INTERESTING", "LATER"].include? type
250
+ # raise "add_flag: unknown type "+type
251
+ # end
252
+
253
+ newglob = "<global type=\'#{xml_secure_val(type)}\'"
254
+ if param
255
+ newglob << " param=\'#{xml_secure_val(param)}\'"
256
+ end
257
+ if text
258
+ newglob << "> #{text} </global>"
259
+ else
260
+ newglob << "/>"
261
+ end
262
+
263
+ newglob = RegXML.new(newglob)
264
+ @globals << newglob
265
+ return newglob
266
+ end
267
+
268
+ ###
269
+ def remove_flag(type, param=nil, text=nil)
270
+
271
+ remove_ix = nil
272
+ @globals.each_with_index { |glob,ix|
273
+ if glob.attributes("type") == type
274
+ if param.nil? or glob.attributes("param") == param
275
+ if text.nil? or glob.children_and_text.map { |c| c.to_s }.join == text
276
+ # found it
277
+ remove_ix = ix
278
+ break
279
+ end
280
+ end
281
+ end
282
+ }
283
+
284
+ if remove_ix
285
+ return @globals.delete_at(remove_ix)
286
+ else
287
+ return nil
288
+ end
289
+ end
290
+
291
+ ############################3
292
+ protected
293
+
294
+ def get_xml_ofchildren
295
+ string = ""
296
+
297
+ # globals
298
+ string << "<globals>\n"
299
+ @globals.each { |glob|
300
+ string << glob.to_s + "\n"
301
+ }
302
+ string << "</globals>\n"
303
+
304
+ # frames
305
+ string << "<frames>\n"
306
+ each_frame { |frame_node|
307
+ string << frame_node.get
308
+ }
309
+ string << "</frames>\n"
310
+
311
+ # underspecification
312
+ string << "<usp>\n"
313
+ string << "<uspframes>\n"
314
+ each_usp_frameblock { |block|
315
+ string << block.get
316
+ }
317
+ string << "</uspframes>\n"
318
+ string << "<uspfes>\n"
319
+ each_usp_feblock { |block|
320
+ string << block.get
321
+ }
322
+ string << "</uspfes>\n"
323
+ string << "</usp>\n"
324
+
325
+ return string
326
+ end
327
+
328
+ ###
329
+ def semnode_add_flags(sem_node, # SemNode object
330
+ xml_obj) # RegXML object
331
+
332
+ xml_obj.children_and_text.each { |child|
333
+ if child.name == "flag"
334
+ # found a flag, record it
335
+ name = child.attributes["name"]
336
+ if name
337
+ sem_node.add_flag(name)
338
+ else
339
+ $stderr.puts "Warning: flag without a name"
340
+ end
341
+ end
342
+ }
343
+ end
344
+
345
+ def frame_add_children(frame_node, # FrameNode object
346
+ xml_obj, # RegXML object
347
+ id_to_node) # hash: syn_node_id(string) -> SynNode object
348
+
349
+ xml_obj.children_and_text.each { |fe|
350
+ case fe.name
351
+ when "fe", "target"
352
+ # $stderr.puts "Da: #{fe.name}\n#{fe.to_s}"
353
+
354
+ # make a node for this,
355
+ # and add it as child of this frame node.
356
+ fe_node = FeNode.new(fe)
357
+ @node[fe_node.id] = fe_node
358
+ frame_node.add_child(fe_node)
359
+
360
+ semnode_add_flags(fe_node, fe)
361
+
362
+ # add the FE's children
363
+ fe.children_and_text.each { |fechild|
364
+ case fechild.name
365
+ when "fenode"
366
+
367
+ syn_node = id_to_node[SalsaTigerXmlNode.xmlel_id(fechild)]
368
+ if syn_node
369
+ # normal syntactic node, which the id_to_node mapping knows
370
+ fe_node.add_child(syn_node, fechild)
371
+ syn_node.add_sem(fe_node)
372
+
373
+ else
374
+ # must be a node in a different sentence
375
+ # make a dummy graph node for it
376
+ fe_node.add_child(TSSynNode.new(SalsaTigerXmlNode.xmlel_id(fechild)), fechild)
377
+ end
378
+
379
+ when "flag"
380
+ # nothing to do, we've handled that already
381
+ else
382
+ fe_node.add_kith(fechild)
383
+ end
384
+ }
385
+
386
+ when "flag"
387
+ # nothing to do, wee handled that already
388
+
389
+ else
390
+ # keep for output
391
+ frame_node.add_kith(fe)
392
+ end
393
+ }
394
+ end
395
+
396
+ ###
397
+ def initialize_usp(xml_obj, # RegXML object
398
+ frame_or_fe) # string: "frame" or "fe"
399
+
400
+ xml_obj.children_and_text.each { |uspblock|
401
+ unless uspblock.name == "uspblock"
402
+ warn_child_ignored("s/sem/usp/uspframe|uspfe", uspblock)
403
+ next
404
+ end
405
+
406
+ # node for this underspecified block
407
+ n = UspNode.new(uspblock, frame_or_fe)
408
+ @node[n.id] = n
409
+
410
+ case frame_or_fe
411
+ when "frame"
412
+ @uspframe_id << n.id
413
+ when "fe"
414
+ @uspfe_id << n.id
415
+ else
416
+ raise "Shouldn't be here"
417
+ end
418
+
419
+ # add its children
420
+ uspblock.children_and_text.each { |uspitem|
421
+ unless uspitem.name == "uspitem"
422
+ warn_child_ignored("s/sem/usp/uspframe|uspfe/uspblock", uspitem)
423
+ next
424
+ end
425
+
426
+ usp_id = SalsaTigerXmlNode.xmlel_id(uspitem)
427
+ usp_id = usp_id.gsub(/.*_s/, "s")
428
+
429
+ unless @node[usp_id]
430
+ $stderr.puts "Error: Underspecification: could not find node with ID #{usp_id}. Skipping."
431
+ next
432
+ end
433
+ n.add_child(@node[usp_id])
434
+ }
435
+ }
436
+ end
437
+ end
438
+ end