shalmaneser-lib 1.2.rc5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +10 -0
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +122 -0
  6. data/lib/configuration/config_data.rb +457 -0
  7. data/lib/configuration/config_format_element.rb +210 -0
  8. data/lib/configuration/configuration_error.rb +15 -0
  9. data/lib/configuration/external_config_data.rb +56 -0
  10. data/lib/configuration/frappe_config_data.rb +134 -0
  11. data/lib/configuration/fred_config_data.rb +199 -0
  12. data/lib/configuration/rosy_config_data.rb +126 -0
  13. data/lib/db/db_interface.rb +50 -0
  14. data/lib/db/db_mysql.rb +141 -0
  15. data/lib/db/db_sqlite.rb +280 -0
  16. data/lib/db/db_table.rb +237 -0
  17. data/lib/db/db_view.rb +416 -0
  18. data/lib/db/db_wrapper.rb +175 -0
  19. data/lib/db/select_table_and_columns.rb +10 -0
  20. data/lib/db/sql_query.rb +243 -0
  21. data/lib/definitions.rb +19 -0
  22. data/lib/eval.rb +482 -0
  23. data/lib/ext/maxent/Classify.class +0 -0
  24. data/lib/ext/maxent/Train.class +0 -0
  25. data/lib/external_systems.rb +251 -0
  26. data/lib/framenet_format/fn_corpus_aset.rb +209 -0
  27. data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
  28. data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
  29. data/lib/framenet_format/fn_database.rb +143 -0
  30. data/lib/framenet_format/frame_xml_file.rb +104 -0
  31. data/lib/framenet_format/frame_xml_sentence.rb +411 -0
  32. data/lib/logging.rb +25 -0
  33. data/lib/ml/classifier.rb +189 -0
  34. data/lib/ml/mallet.rb +236 -0
  35. data/lib/ml/maxent.rb +229 -0
  36. data/lib/ml/optimize.rb +195 -0
  37. data/lib/ml/timbl.rb +140 -0
  38. data/lib/monkey_patching/array.rb +82 -0
  39. data/lib/monkey_patching/enumerable_bool.rb +24 -0
  40. data/lib/monkey_patching/enumerable_distribute.rb +18 -0
  41. data/lib/monkey_patching/file.rb +131 -0
  42. data/lib/monkey_patching/subsumed.rb +24 -0
  43. data/lib/ruby_class_extensions.rb +4 -0
  44. data/lib/salsa_tiger_xml/corpus.rb +24 -0
  45. data/lib/salsa_tiger_xml/fe_node.rb +98 -0
  46. data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
  47. data/lib/salsa_tiger_xml/frame_node.rb +145 -0
  48. data/lib/salsa_tiger_xml/graph_node.rb +347 -0
  49. data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
  50. data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
  51. data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
  52. data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
  53. data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
  54. data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
  55. data/lib/salsa_tiger_xml/sem_node.rb +58 -0
  56. data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
  57. data/lib/salsa_tiger_xml/syn_node.rb +169 -0
  58. data/lib/salsa_tiger_xml/tree_node.rb +59 -0
  59. data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
  60. data/lib/salsa_tiger_xml/usp_node.rb +72 -0
  61. data/lib/salsa_tiger_xml/xml_node.rb +163 -0
  62. data/lib/shalmaneser/lib.rb +1 -0
  63. data/lib/tabular_format/fn_tab_format_file.rb +38 -0
  64. data/lib/tabular_format/fn_tab_frame.rb +67 -0
  65. data/lib/tabular_format/fn_tab_sentence.rb +169 -0
  66. data/lib/tabular_format/tab_format_file.rb +91 -0
  67. data/lib/tabular_format/tab_format_named_args.rb +184 -0
  68. data/lib/tabular_format/tab_format_sentence.rb +119 -0
  69. data/lib/value_restriction.rb +49 -0
  70. metadata +131 -0
@@ -0,0 +1,145 @@
1
+ require_relative 'sem_node'
2
+
3
+ module STXML
4
+ #############
5
+ # class FrameNode
6
+ #
7
+ # inherits from SemNode
8
+ # adds to it methods specific to nodes
9
+ # that describe a frame
10
+ #
11
+ # additional/changed methods:
12
+ #
13
+ # name returns the name of the frame
14
+ # set_name changes the name of the frame to a new name
15
+ # target returns the target (as a FeNode object)
16
+ #
17
+ # each_child() iterates through FEs, children() returns all FEs
18
+ #
19
+ # each_fe_by_name A frame node may have several FE children with the same
20
+ # frame element label. While each_child returns them separately,
21
+ # each_fe_by_name lumps FE children with the same frame element label
22
+ # into one FeNode.
23
+ # Warnings:
24
+ # - the REXML object of the FeNode is that of the first FE child
25
+ # with that frame element label.
26
+ # - Underspecification is ignored! If you have the same FE twice,
27
+ # and there is underspecification regarding the extent of the FE,
28
+ # the two FE children will be lumped together anyway.
29
+ # If you don't want that, use each_child instead.
30
+ #
31
+ #
32
+ # add_fe CAUTION: please do not call this method directly externally,
33
+ # use SalsaTigerSentence.add_fe, otherwise the node and its ID
34
+ # will not be recorded in the node list and the node cannot be retrieved
35
+ # via its ID
36
+
37
+ class FrameNode < SemNode
38
+ ###
39
+ def target
40
+ target = children_by_edgelabels(["target"])
41
+ if target.empty?
42
+ $stderr.puts "SalsaTigerRegXML warning: Frame #{id}: No target, but I got: \n" + child_labels.join(", ")
43
+ return nil
44
+ else
45
+ unless target.length == 1
46
+ raise "Target: more than one target to frame #{id}."
47
+ end
48
+ return target.first
49
+ end
50
+ end
51
+
52
+ ###
53
+ def name
54
+ get_attribute("name")
55
+ end
56
+
57
+ ###
58
+ def set_name(new_name)
59
+ set_attribute("name", new_name)
60
+ end
61
+
62
+ ###
63
+ # each_fe: synonym for each_child
64
+ def each_fe
65
+ each_child { |c| yield c }
66
+ end
67
+
68
+ ###
69
+ # fes: synonym for children
70
+ def fes
71
+ children
72
+ end
73
+
74
+ ###
75
+ def each_fe_by_name
76
+ child_labels.uniq.each { |fe_name|
77
+ unless fe_name == "target"
78
+
79
+ fes = children_by_edgelabels([fe_name])
80
+
81
+ if fes.length == 1
82
+ # one frame element with that name
83
+ yield fes.first
84
+
85
+ else
86
+ # several frame elements with that name
87
+ # combine them
88
+
89
+ combined_fe = FeNode.new(fe_name, "#{id}_#{fe_name}")
90
+ fes.each { |fe|
91
+ fe.each_child { |child|
92
+ combined_fe.add_child(child)
93
+ }
94
+ }
95
+ yield combined_fe
96
+ end
97
+ end
98
+ }
99
+ end
100
+
101
+ ###
102
+ def add_child(fe_node)
103
+ if fe_node.name == "target" and not(children_by_edgelabels(["target"]).empty?)
104
+ $stderr.puts "Adding second target to frame #{id}"
105
+ $stderr.puts "I already have: " + children_by_edgelabels(["target"]).map { |t| t.id }.join(",")
106
+ raise "More than one target."
107
+ end
108
+
109
+ super(fe_node, fe_node.name)
110
+ end
111
+
112
+ ###
113
+ def remove_child(fe_node)
114
+ super(fe_node, fe_node.name)
115
+ end
116
+
117
+ ###
118
+ def add_fe(fe_name, # string: name of FE to add
119
+ syn_nodes, # array:SynNode, syntactic nodes that this FE should point to
120
+ fe_id = nil) # string: ID for the new FE
121
+
122
+ if fe_name == "target" && not(children_by_edgelabels(["target"]).empty?)
123
+ $stderr.puts "Adding second target to frame #{id}"
124
+ $stderr.puts "I already have: " + children_by_edgelabels(["target"]).map(&:id).join(",")
125
+ raise "More than one target."
126
+ end
127
+
128
+ # make FE node and list as this frame's child
129
+ unless fe_id
130
+ # no FE ID given, make one myself
131
+ fe_id = id + "_fe" + Time.new.to_f.to_s
132
+ end
133
+
134
+ n = FeNode.new(fe_name, fe_id)
135
+ add_child(n)
136
+
137
+ # add syn nodes
138
+ syn_nodes.each { |syn_node|
139
+ n.add_child(syn_node)
140
+ }
141
+
142
+ n
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,347 @@
1
+ module STXML
2
+ # GraphNode: describes one node in a graph.
3
+ #
4
+ # A node may have an arbitrary number of parents (sources of incoming edges)
5
+ # and an arbitrary number of children (targets of outgoing edges)
6
+ #
7
+ # All edges are labeled and directed
8
+ #
9
+ # The add_parent, add_child, remove_parent, remove_child methods
10
+ # take care of both ends of an edge
11
+ # (i.e. n1.add_child(n2, label) also adds n1 as parent of n2 with edge label 'label'
12
+ #
13
+ # It is possible to create a 'pointer' rather than an edge:
14
+ # n1.add_child(n2, label, pointer_insteadof_edge => true)
15
+ # will create an edge from n1 to n2 labeled 'label' that is
16
+ # listed under the outgoing edges of n1, but not among
17
+ # the incoming edges of n2
18
+ # The same option is available for add_parent, remove_parent, remove_child.
19
+
20
+ class GraphNode
21
+
22
+ def initialize(id)
23
+ @id = id
24
+ @children = []
25
+ @parents = []
26
+ @features = {}
27
+ end
28
+
29
+ # for Marshalling:
30
+ # Dump just IDs instead of actual nodes from Parents and Children lists.
31
+ # Otherwise the Marshaller will go crazy following
32
+ # all the links to objects mentioned.
33
+ # After loading: replace IDs by actual objects with a little help
34
+ # from the caller.
35
+ # @deprecated This method seams to be useless.
36
+ def _dump(depth)
37
+ @id.to_s +
38
+ "QQSEPVALUESQQ" +
39
+ Marshal.dump(@features) +
40
+ "QQSEPVALUESQQ" +
41
+ @children.map { |label_child|
42
+ label_child[0] + "QQSEPQQ" + label_child[1].id
43
+ }.join("QQPAIRQQ") +
44
+ "QQSEPVALUESQQ" +
45
+ @parents.map { |label_parent|
46
+ label_parent[0] + "QQSEPQQ" + label_parent[1].id
47
+ }.join("QQPAIRQQ")
48
+ end
49
+
50
+ def self._load(string)
51
+ id, _features_s, _children_s, _parents_s = string.split("QQSEPVALUESQQ")
52
+
53
+ result = GraphNode.new(id)
54
+ result.fill_from_pickle(string)
55
+
56
+ result
57
+ end
58
+
59
+ def fill_from_pickle(string)
60
+ _id, features_s, children_s, parents_s = string.split("QQSEPVALUESQQ")
61
+
62
+ @features = Marshal.load(features_s)
63
+
64
+ if children_s.nil? || children_s.empty?
65
+ @children = []
66
+ else
67
+ @children = children_s.split("QQPAIRQQ").map do |pair|
68
+ pair.split("QQSEPQQ")
69
+ end
70
+ end
71
+
72
+ if parents_s.nil? || parents_s.empty?
73
+ @parents = []
74
+ else
75
+ @parents = parents_s.split("QQPAIRQQ").map { |pair|
76
+ pair.split("QQSEPQQ")
77
+ }
78
+ end
79
+ end
80
+
81
+ def recover_from_dump(node_by_id)
82
+ @children = @children.map { |label_id| [label_id[0], node_by_id.call(label_id[1])] }
83
+ @parents = @parents.map { |label_id| [label_id[0], node_by_id.call(label_id[1])] }
84
+ end
85
+
86
+ # ID-related things
87
+ def ==(other)
88
+ if other.is_a?(GraphNode)
89
+ @id == other.id
90
+ else
91
+ false
92
+ end
93
+ end
94
+
95
+ def id
96
+ @id
97
+ end
98
+
99
+ def chid(newid)
100
+ @id = newid
101
+ end
102
+
103
+ # setting and retrieving features
104
+
105
+ def get_f(feature)
106
+ @features[feature]
107
+ end
108
+
109
+ def set_f(feature, value)
110
+ @features[feature] = value
111
+ end
112
+
113
+ def add_f(feature, value)
114
+ unless @features[feature].nil?
115
+ raise "Feature " + feature + "already set."
116
+ end
117
+ set_f(feature, value)
118
+ end
119
+
120
+ # ancestors
121
+
122
+ def parents
123
+ @parents.map { |label| label[1] }
124
+ end
125
+
126
+ def parent_labels
127
+ @parents.map { |label_parent| label_parent[0] }
128
+ end
129
+
130
+ def parent_label(parent)
131
+ @parents.each do |label_parent|
132
+ if label_parent[1] == parent
133
+ return label_parent[0]
134
+ end
135
+ end
136
+
137
+ nil
138
+ end
139
+
140
+ def parents_with_edgelabel
141
+ @parents
142
+ end
143
+
144
+ def each_parent
145
+ @parents.each { |label_parent| yield label_parent[1] }
146
+ end
147
+
148
+ def each_parent_with_edgelabel
149
+ @parents.each { |label_parent| yield label_parent}
150
+ end
151
+
152
+ def parents_by_edgelabels(labels)
153
+ @parents.select { |label_parent|
154
+ labels.include? label_parent[0]
155
+ }.map { |label_parent|
156
+ label_parent[1]
157
+ }
158
+ end
159
+
160
+ def add_parent(parent, edgelabel, varhash = {})
161
+ @parents << [edgelabel, parent]
162
+
163
+ # and vice versa: add self as child to parent
164
+ unless varhash["pointer_insteadof_edge"]
165
+ unless parent.children_with_edgelabel.include? [edgelabel, self]
166
+ parent.add_child(self, edgelabel)
167
+ end
168
+ end
169
+ end
170
+
171
+ def remove_parent(parent, edgelabel, varhash={})
172
+ @parents = @parents.reject { |label_child|
173
+ label_child.first == edgelabel and
174
+ label_child.last == parent
175
+ }
176
+
177
+ # and vice versa: remove self as child from parent
178
+ unless varhash["pointer_insteadof_edge"]
179
+ if parent.children_with_edgelabel.include? [edgelabel, self]
180
+ parent.remove_child(self, edgelabel)
181
+ end
182
+ end
183
+ end
184
+
185
+ def indeg
186
+ @parents.length
187
+ end
188
+
189
+ def ancestors
190
+ ancestors_noduplicates([], [])
191
+ end
192
+
193
+ def ancestors_by_edgelabels(labels)
194
+ ancestors_noduplicates([], labels)
195
+ end
196
+
197
+ # descendants
198
+
199
+ def children
200
+ @children.map { |label_child| label_child[1] }
201
+ end
202
+
203
+ def child_labels
204
+ @children.map { |label_child| label_child[0] }
205
+ end
206
+
207
+ def child_label(child)
208
+ @children.each { |label_child|
209
+ if label_child[1] == child
210
+ return label_child[0]
211
+ end
212
+ }
213
+
214
+ nil
215
+ end
216
+
217
+ def children_with_edgelabel
218
+ @children
219
+ end
220
+
221
+ def each_child
222
+ @children.each { |label_child| yield label_child[1]}
223
+ end
224
+
225
+ def each_child_with_edgelabel
226
+ @children.each { |label_child| yield label_child }
227
+ end
228
+
229
+ def children_by_edgelabels(labels)
230
+ return @children.select { |label_child|
231
+ labels.include? label_child[0]
232
+ }.map { |label_child|
233
+ label_child[1]
234
+ }
235
+ end
236
+
237
+ def add_child(child, edgelabel, varhash = {})
238
+ @children << [edgelabel, child]
239
+
240
+ # and vice versa: add self as parent to child
241
+ unless varhash["pointer_insteadof_edge"]
242
+ unless child.parents_with_edgelabel.include? [edgelabel, self]
243
+ child.add_parent(self, edgelabel)
244
+ end
245
+ end
246
+ end
247
+
248
+ def remove_child(child, edgelabel, varhash={})
249
+ @children = @children.reject { |label_child|
250
+ label_child.first == edgelabel and
251
+ label_child.last == child
252
+ }
253
+
254
+ # and vice versa: remove self as parent from child
255
+ unless varhash["pointer_insteadof_edge"]
256
+ if child.parents_with_edgelabel.include? [edgelabel, self]
257
+ child.remove_parent(self, edgelabel)
258
+ end
259
+ end
260
+ end
261
+
262
+ def change_child_label(child, oldlabel, newlabel, varhash={})
263
+ if @children.include? [oldlabel, child]
264
+ remove_child(child,oldlabel, varhash)
265
+ add_child(child, newlabel, varhash)
266
+ end
267
+ end
268
+
269
+ def remove_all_children(varhash={})
270
+ each_child_with_edgelabel { |label, child|
271
+ remove_child(child, label, varhash)
272
+ }
273
+ end
274
+
275
+ def set_children(list, varhash={})
276
+ #### CAUTION: set_children must be called with an "internal format" list of parents:
277
+ #### instead of using [node, edgelabel], use [edgelabel, node]
278
+ remove_all_children(varhash)
279
+
280
+ @children = list
281
+ end
282
+
283
+ def outdeg
284
+ return @children.length
285
+ end
286
+
287
+ def yield_nodes
288
+ arr = []
289
+ if outdeg == 0
290
+ arr << self
291
+ end
292
+ each_child { |c|
293
+ if c.outdeg == 0
294
+ arr << c
295
+ else
296
+ arr.concat c.yield_nodes
297
+ end
298
+ }
299
+ return arr
300
+ end
301
+
302
+ def descendants
303
+ descendants_noduplicates([], [])
304
+ end
305
+
306
+ def descendants_by_edgelabels(labels)
307
+ return descendants_noduplicates([], labels)
308
+ end
309
+
310
+ protected
311
+
312
+ def descendants_noduplicates(nodes, labels)
313
+ each_child_with_edgelabel { |l_c|
314
+ if labels.empty? or labels.include? l_c[0]
315
+ unless nodes.include? l_c[1]
316
+ nodes = l_c[1].descendants_noduplicates(nodes << l_c[1], labels)
317
+ end
318
+ end
319
+ }
320
+ return nodes
321
+ end
322
+
323
+ def ancestors_noduplicates(nodes, labels)
324
+ each_parent_with_edgelabel { |l_p|
325
+ if labels.empty? or labels.include? l_p[0]
326
+ unless nodes.include? l_p[1]
327
+ nodes = l_p[1].ancestors_noduplicates(nodes << l_p[1], labels)
328
+ end
329
+ end
330
+ }
331
+ return nodes
332
+ end
333
+
334
+ #### CAUTION: set_parents must be called with an "internal format" list of parents:
335
+ #### instead of using [node, edgelabel], use [edgelabel, node]
336
+
337
+ def set_parents(list, varhash={})
338
+ each_parent_with_edgelabel { |label, parent|
339
+ remove_parent(parent, label, varhash)
340
+ }
341
+
342
+ list.each { |label, parent|
343
+ add_parent(label, parent)
344
+ }
345
+ end
346
+ end
347
+ end