shalmaneser-lib 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +10 -0
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +122 -0
  6. data/lib/configuration/config_data.rb +457 -0
  7. data/lib/configuration/config_format_element.rb +210 -0
  8. data/lib/configuration/configuration_error.rb +15 -0
  9. data/lib/configuration/external_config_data.rb +56 -0
  10. data/lib/configuration/frappe_config_data.rb +134 -0
  11. data/lib/configuration/fred_config_data.rb +199 -0
  12. data/lib/configuration/rosy_config_data.rb +126 -0
  13. data/lib/db/db_interface.rb +50 -0
  14. data/lib/db/db_mysql.rb +141 -0
  15. data/lib/db/db_sqlite.rb +280 -0
  16. data/lib/db/db_table.rb +237 -0
  17. data/lib/db/db_view.rb +416 -0
  18. data/lib/db/db_wrapper.rb +175 -0
  19. data/lib/db/select_table_and_columns.rb +10 -0
  20. data/lib/db/sql_query.rb +243 -0
  21. data/lib/definitions.rb +19 -0
  22. data/lib/eval.rb +482 -0
  23. data/lib/ext/maxent/Classify.class +0 -0
  24. data/lib/ext/maxent/Train.class +0 -0
  25. data/lib/external_systems.rb +251 -0
  26. data/lib/framenet_format/fn_corpus_aset.rb +209 -0
  27. data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
  28. data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
  29. data/lib/framenet_format/fn_database.rb +143 -0
  30. data/lib/framenet_format/frame_xml_file.rb +104 -0
  31. data/lib/framenet_format/frame_xml_sentence.rb +411 -0
  32. data/lib/logging.rb +25 -0
  33. data/lib/ml/classifier.rb +189 -0
  34. data/lib/ml/mallet.rb +236 -0
  35. data/lib/ml/maxent.rb +229 -0
  36. data/lib/ml/optimize.rb +195 -0
  37. data/lib/ml/timbl.rb +140 -0
  38. data/lib/monkey_patching/array.rb +82 -0
  39. data/lib/monkey_patching/enumerable_bool.rb +24 -0
  40. data/lib/monkey_patching/enumerable_distribute.rb +18 -0
  41. data/lib/monkey_patching/file.rb +131 -0
  42. data/lib/monkey_patching/subsumed.rb +24 -0
  43. data/lib/ruby_class_extensions.rb +4 -0
  44. data/lib/salsa_tiger_xml/corpus.rb +24 -0
  45. data/lib/salsa_tiger_xml/fe_node.rb +98 -0
  46. data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
  47. data/lib/salsa_tiger_xml/frame_node.rb +145 -0
  48. data/lib/salsa_tiger_xml/graph_node.rb +347 -0
  49. data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
  50. data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
  51. data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
  52. data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
  53. data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
  54. data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
  55. data/lib/salsa_tiger_xml/sem_node.rb +58 -0
  56. data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
  57. data/lib/salsa_tiger_xml/syn_node.rb +169 -0
  58. data/lib/salsa_tiger_xml/tree_node.rb +59 -0
  59. data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
  60. data/lib/salsa_tiger_xml/usp_node.rb +72 -0
  61. data/lib/salsa_tiger_xml/xml_node.rb +163 -0
  62. data/lib/shalmaneser/lib.rb +1 -0
  63. data/lib/tabular_format/fn_tab_format_file.rb +38 -0
  64. data/lib/tabular_format/fn_tab_frame.rb +67 -0
  65. data/lib/tabular_format/fn_tab_sentence.rb +169 -0
  66. data/lib/tabular_format/tab_format_file.rb +91 -0
  67. data/lib/tabular_format/tab_format_named_args.rb +184 -0
  68. data/lib/tabular_format/tab_format_sentence.rb +119 -0
  69. data/lib/value_restriction.rb +49 -0
  70. metadata +131 -0
@@ -0,0 +1,145 @@
1
+ require_relative 'sem_node'
2
+
3
+ module STXML
4
+ #############
5
+ # class FrameNode
6
+ #
7
+ # inherits from SemNode
8
+ # adds to it methods specific to nodes
9
+ # that describe a frame
10
+ #
11
+ # additional/changed methods:
12
+ #
13
+ # name returns the name of the frame
14
+ # set_name changes the name of the frame to a new name
15
+ # target returns the target (as a FeNode object)
16
+ #
17
+ # each_child() iterates through FEs, children() returns all FEs
18
+ #
19
+ # each_fe_by_name A frame node may have several FE children with the same
20
+ # frame element label. While each_child returns them separately,
21
+ # each_fe_by_name lumps FE children with the same frame element label
22
+ # into one FeNode.
23
+ # Warnings:
24
+ # - the REXML object of the FeNode is that of the first FE child
25
+ # with that frame element label.
26
+ # - Underspecification is ignored! If you have the same FE twice,
27
+ # and there is underspecification regarding the extent of the FE,
28
+ # the two FE children will be lumped together anyway.
29
+ # If you don't want that, use each_child instead.
30
+ #
31
+ #
32
+ # add_fe CAUTION: please do not call this method directly externally,
33
+ # use SalsaTigerSentence.add_fe, otherwise the node and its ID
34
+ # will not be recorded in the node list and the node cannot be retrieved
35
+ # via its ID
36
+
37
+ class FrameNode < SemNode
38
+ ###
39
+ def target
40
+ target = children_by_edgelabels(["target"])
41
+ if target.empty?
42
+ $stderr.puts "SalsaTigerRegXML warning: Frame #{id}: No target, but I got: \n" + child_labels.join(", ")
43
+ return nil
44
+ else
45
+ unless target.length == 1
46
+ raise "Target: more than one target to frame #{id}."
47
+ end
48
+ return target.first
49
+ end
50
+ end
51
+
52
+ ###
53
+ def name
54
+ get_attribute("name")
55
+ end
56
+
57
+ ###
58
+ def set_name(new_name)
59
+ set_attribute("name", new_name)
60
+ end
61
+
62
+ ###
63
+ # each_fe: synonym for each_child
64
+ def each_fe
65
+ each_child { |c| yield c }
66
+ end
67
+
68
+ ###
69
+ # fes: synonym for children
70
+ def fes
71
+ children
72
+ end
73
+
74
+ ###
75
+ def each_fe_by_name
76
+ child_labels.uniq.each { |fe_name|
77
+ unless fe_name == "target"
78
+
79
+ fes = children_by_edgelabels([fe_name])
80
+
81
+ if fes.length == 1
82
+ # one frame element with that name
83
+ yield fes.first
84
+
85
+ else
86
+ # several frame elements with that name
87
+ # combine them
88
+
89
+ combined_fe = FeNode.new(fe_name, "#{id}_#{fe_name}")
90
+ fes.each { |fe|
91
+ fe.each_child { |child|
92
+ combined_fe.add_child(child)
93
+ }
94
+ }
95
+ yield combined_fe
96
+ end
97
+ end
98
+ }
99
+ end
100
+
101
+ ###
102
+ def add_child(fe_node)
103
+ if fe_node.name == "target" and not(children_by_edgelabels(["target"]).empty?)
104
+ $stderr.puts "Adding second target to frame #{id}"
105
+ $stderr.puts "I already have: " + children_by_edgelabels(["target"]).map { |t| t.id }.join(",")
106
+ raise "More than one target."
107
+ end
108
+
109
+ super(fe_node, fe_node.name)
110
+ end
111
+
112
+ ###
113
+ def remove_child(fe_node)
114
+ super(fe_node, fe_node.name)
115
+ end
116
+
117
+ ###
118
+ def add_fe(fe_name, # string: name of FE to add
119
+ syn_nodes, # array:SynNode, syntactic nodes that this FE should point to
120
+ fe_id = nil) # string: ID for the new FE
121
+
122
+ if fe_name == "target" && not(children_by_edgelabels(["target"]).empty?)
123
+ $stderr.puts "Adding second target to frame #{id}"
124
+ $stderr.puts "I already have: " + children_by_edgelabels(["target"]).map(&:id).join(",")
125
+ raise "More than one target."
126
+ end
127
+
128
+ # make FE node and list as this frame's child
129
+ unless fe_id
130
+ # no FE ID given, make one myself
131
+ fe_id = id + "_fe" + Time.new.to_f.to_s
132
+ end
133
+
134
+ n = FeNode.new(fe_name, fe_id)
135
+ add_child(n)
136
+
137
+ # add syn nodes
138
+ syn_nodes.each { |syn_node|
139
+ n.add_child(syn_node)
140
+ }
141
+
142
+ n
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,347 @@
1
+ module STXML
2
+ # GraphNode: describes one node in a graph.
3
+ #
4
+ # A node may have an arbitrary number of parents (sources of incoming edges)
5
+ # and an arbitrary number of children (targets of outgoing edges)
6
+ #
7
+ # All edges are labeled and directed
8
+ #
9
+ # The add_parent, add_child, remove_parent, remove_child methods
10
+ # take care of both ends of an edge
11
+ # (i.e. n1.add_child(n2, label) also adds n1 as parent of n2 with edge label 'label'
12
+ #
13
+ # It is possible to create a 'pointer' rather than an edge:
14
+ # n1.add_child(n2, label, pointer_insteadof_edge => true)
15
+ # will create an edge from n1 to n2 labeled 'label' that is
16
+ # listed under the outgoing edges of n1, but not among
17
+ # the incoming edges of n2
18
+ # The same option is available for add_parent, remove_parent, remove_child.
19
+
20
+ class GraphNode
21
+
22
+ def initialize(id)
23
+ @id = id
24
+ @children = []
25
+ @parents = []
26
+ @features = {}
27
+ end
28
+
29
+ # for Marshalling:
30
+ # Dump just IDs instead of actual nodes from Parents and Children lists.
31
+ # Otherwise the Marshaller will go crazy following
32
+ # all the links to objects mentioned.
33
+ # After loading: replace IDs by actual objects with a little help
34
+ # from the caller.
35
+ # @deprecated This method seams to be useless.
36
+ def _dump(depth)
37
+ @id.to_s +
38
+ "QQSEPVALUESQQ" +
39
+ Marshal.dump(@features) +
40
+ "QQSEPVALUESQQ" +
41
+ @children.map { |label_child|
42
+ label_child[0] + "QQSEPQQ" + label_child[1].id
43
+ }.join("QQPAIRQQ") +
44
+ "QQSEPVALUESQQ" +
45
+ @parents.map { |label_parent|
46
+ label_parent[0] + "QQSEPQQ" + label_parent[1].id
47
+ }.join("QQPAIRQQ")
48
+ end
49
+
50
+ def self._load(string)
51
+ id, _features_s, _children_s, _parents_s = string.split("QQSEPVALUESQQ")
52
+
53
+ result = GraphNode.new(id)
54
+ result.fill_from_pickle(string)
55
+
56
+ result
57
+ end
58
+
59
+ def fill_from_pickle(string)
60
+ _id, features_s, children_s, parents_s = string.split("QQSEPVALUESQQ")
61
+
62
+ @features = Marshal.load(features_s)
63
+
64
+ if children_s.nil? || children_s.empty?
65
+ @children = []
66
+ else
67
+ @children = children_s.split("QQPAIRQQ").map do |pair|
68
+ pair.split("QQSEPQQ")
69
+ end
70
+ end
71
+
72
+ if parents_s.nil? || parents_s.empty?
73
+ @parents = []
74
+ else
75
+ @parents = parents_s.split("QQPAIRQQ").map { |pair|
76
+ pair.split("QQSEPQQ")
77
+ }
78
+ end
79
+ end
80
+
81
+ def recover_from_dump(node_by_id)
82
+ @children = @children.map { |label_id| [label_id[0], node_by_id.call(label_id[1])] }
83
+ @parents = @parents.map { |label_id| [label_id[0], node_by_id.call(label_id[1])] }
84
+ end
85
+
86
+ # ID-related things
87
+ def ==(other)
88
+ if other.is_a?(GraphNode)
89
+ @id == other.id
90
+ else
91
+ false
92
+ end
93
+ end
94
+
95
+ def id
96
+ @id
97
+ end
98
+
99
+ def chid(newid)
100
+ @id = newid
101
+ end
102
+
103
+ # setting and retrieving features
104
+
105
+ def get_f(feature)
106
+ @features[feature]
107
+ end
108
+
109
+ def set_f(feature, value)
110
+ @features[feature] = value
111
+ end
112
+
113
+ def add_f(feature, value)
114
+ unless @features[feature].nil?
115
+ raise "Feature " + feature + "already set."
116
+ end
117
+ set_f(feature, value)
118
+ end
119
+
120
+ # ancestors
121
+
122
+ def parents
123
+ @parents.map { |label| label[1] }
124
+ end
125
+
126
+ def parent_labels
127
+ @parents.map { |label_parent| label_parent[0] }
128
+ end
129
+
130
+ def parent_label(parent)
131
+ @parents.each do |label_parent|
132
+ if label_parent[1] == parent
133
+ return label_parent[0]
134
+ end
135
+ end
136
+
137
+ nil
138
+ end
139
+
140
+ def parents_with_edgelabel
141
+ @parents
142
+ end
143
+
144
+ def each_parent
145
+ @parents.each { |label_parent| yield label_parent[1] }
146
+ end
147
+
148
+ def each_parent_with_edgelabel
149
+ @parents.each { |label_parent| yield label_parent}
150
+ end
151
+
152
+ def parents_by_edgelabels(labels)
153
+ @parents.select { |label_parent|
154
+ labels.include? label_parent[0]
155
+ }.map { |label_parent|
156
+ label_parent[1]
157
+ }
158
+ end
159
+
160
+ def add_parent(parent, edgelabel, varhash = {})
161
+ @parents << [edgelabel, parent]
162
+
163
+ # and vice versa: add self as child to parent
164
+ unless varhash["pointer_insteadof_edge"]
165
+ unless parent.children_with_edgelabel.include? [edgelabel, self]
166
+ parent.add_child(self, edgelabel)
167
+ end
168
+ end
169
+ end
170
+
171
+ def remove_parent(parent, edgelabel, varhash={})
172
+ @parents = @parents.reject { |label_child|
173
+ label_child.first == edgelabel and
174
+ label_child.last == parent
175
+ }
176
+
177
+ # and vice versa: remove self as child from parent
178
+ unless varhash["pointer_insteadof_edge"]
179
+ if parent.children_with_edgelabel.include? [edgelabel, self]
180
+ parent.remove_child(self, edgelabel)
181
+ end
182
+ end
183
+ end
184
+
185
+ def indeg
186
+ @parents.length
187
+ end
188
+
189
+ def ancestors
190
+ ancestors_noduplicates([], [])
191
+ end
192
+
193
+ def ancestors_by_edgelabels(labels)
194
+ ancestors_noduplicates([], labels)
195
+ end
196
+
197
+ # descendants
198
+
199
+ def children
200
+ @children.map { |label_child| label_child[1] }
201
+ end
202
+
203
+ def child_labels
204
+ @children.map { |label_child| label_child[0] }
205
+ end
206
+
207
+ def child_label(child)
208
+ @children.each { |label_child|
209
+ if label_child[1] == child
210
+ return label_child[0]
211
+ end
212
+ }
213
+
214
+ nil
215
+ end
216
+
217
+ def children_with_edgelabel
218
+ @children
219
+ end
220
+
221
+ def each_child
222
+ @children.each { |label_child| yield label_child[1]}
223
+ end
224
+
225
+ def each_child_with_edgelabel
226
+ @children.each { |label_child| yield label_child }
227
+ end
228
+
229
+ def children_by_edgelabels(labels)
230
+ return @children.select { |label_child|
231
+ labels.include? label_child[0]
232
+ }.map { |label_child|
233
+ label_child[1]
234
+ }
235
+ end
236
+
237
+ def add_child(child, edgelabel, varhash = {})
238
+ @children << [edgelabel, child]
239
+
240
+ # and vice versa: add self as parent to child
241
+ unless varhash["pointer_insteadof_edge"]
242
+ unless child.parents_with_edgelabel.include? [edgelabel, self]
243
+ child.add_parent(self, edgelabel)
244
+ end
245
+ end
246
+ end
247
+
248
+ def remove_child(child, edgelabel, varhash={})
249
+ @children = @children.reject { |label_child|
250
+ label_child.first == edgelabel and
251
+ label_child.last == child
252
+ }
253
+
254
+ # and vice versa: remove self as parent from child
255
+ unless varhash["pointer_insteadof_edge"]
256
+ if child.parents_with_edgelabel.include? [edgelabel, self]
257
+ child.remove_parent(self, edgelabel)
258
+ end
259
+ end
260
+ end
261
+
262
+ def change_child_label(child, oldlabel, newlabel, varhash={})
263
+ if @children.include? [oldlabel, child]
264
+ remove_child(child,oldlabel, varhash)
265
+ add_child(child, newlabel, varhash)
266
+ end
267
+ end
268
+
269
+ def remove_all_children(varhash={})
270
+ each_child_with_edgelabel { |label, child|
271
+ remove_child(child, label, varhash)
272
+ }
273
+ end
274
+
275
+ def set_children(list, varhash={})
276
+ #### CAUTION: set_children must be called with an "internal format" list of parents:
277
+ #### instead of using [node, edgelabel], use [edgelabel, node]
278
+ remove_all_children(varhash)
279
+
280
+ @children = list
281
+ end
282
+
283
+ def outdeg
284
+ return @children.length
285
+ end
286
+
287
+ def yield_nodes
288
+ arr = []
289
+ if outdeg == 0
290
+ arr << self
291
+ end
292
+ each_child { |c|
293
+ if c.outdeg == 0
294
+ arr << c
295
+ else
296
+ arr.concat c.yield_nodes
297
+ end
298
+ }
299
+ return arr
300
+ end
301
+
302
+ def descendants
303
+ descendants_noduplicates([], [])
304
+ end
305
+
306
+ def descendants_by_edgelabels(labels)
307
+ return descendants_noduplicates([], labels)
308
+ end
309
+
310
+ protected
311
+
312
+ def descendants_noduplicates(nodes, labels)
313
+ each_child_with_edgelabel { |l_c|
314
+ if labels.empty? or labels.include? l_c[0]
315
+ unless nodes.include? l_c[1]
316
+ nodes = l_c[1].descendants_noduplicates(nodes << l_c[1], labels)
317
+ end
318
+ end
319
+ }
320
+ return nodes
321
+ end
322
+
323
+ def ancestors_noduplicates(nodes, labels)
324
+ each_parent_with_edgelabel { |l_p|
325
+ if labels.empty? or labels.include? l_p[0]
326
+ unless nodes.include? l_p[1]
327
+ nodes = l_p[1].ancestors_noduplicates(nodes << l_p[1], labels)
328
+ end
329
+ end
330
+ }
331
+ return nodes
332
+ end
333
+
334
+ #### CAUTION: set_parents must be called with an "internal format" list of parents:
335
+ #### instead of using [node, edgelabel], use [edgelabel, node]
336
+
337
+ def set_parents(list, varhash={})
338
+ each_parent_with_edgelabel { |label, parent|
339
+ remove_parent(parent, label, varhash)
340
+ }
341
+
342
+ list.each { |label, parent|
343
+ add_parent(label, parent)
344
+ }
345
+ end
346
+ end
347
+ end