shalmaneser-lib 1.2.rc5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +122 -0
- data/lib/configuration/config_data.rb +457 -0
- data/lib/configuration/config_format_element.rb +210 -0
- data/lib/configuration/configuration_error.rb +15 -0
- data/lib/configuration/external_config_data.rb +56 -0
- data/lib/configuration/frappe_config_data.rb +134 -0
- data/lib/configuration/fred_config_data.rb +199 -0
- data/lib/configuration/rosy_config_data.rb +126 -0
- data/lib/db/db_interface.rb +50 -0
- data/lib/db/db_mysql.rb +141 -0
- data/lib/db/db_sqlite.rb +280 -0
- data/lib/db/db_table.rb +237 -0
- data/lib/db/db_view.rb +416 -0
- data/lib/db/db_wrapper.rb +175 -0
- data/lib/db/select_table_and_columns.rb +10 -0
- data/lib/db/sql_query.rb +243 -0
- data/lib/definitions.rb +19 -0
- data/lib/eval.rb +482 -0
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/external_systems.rb +251 -0
- data/lib/framenet_format/fn_corpus_aset.rb +209 -0
- data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
- data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
- data/lib/framenet_format/fn_database.rb +143 -0
- data/lib/framenet_format/frame_xml_file.rb +104 -0
- data/lib/framenet_format/frame_xml_sentence.rb +411 -0
- data/lib/logging.rb +25 -0
- data/lib/ml/classifier.rb +189 -0
- data/lib/ml/mallet.rb +236 -0
- data/lib/ml/maxent.rb +229 -0
- data/lib/ml/optimize.rb +195 -0
- data/lib/ml/timbl.rb +140 -0
- data/lib/monkey_patching/array.rb +82 -0
- data/lib/monkey_patching/enumerable_bool.rb +24 -0
- data/lib/monkey_patching/enumerable_distribute.rb +18 -0
- data/lib/monkey_patching/file.rb +131 -0
- data/lib/monkey_patching/subsumed.rb +24 -0
- data/lib/ruby_class_extensions.rb +4 -0
- data/lib/salsa_tiger_xml/corpus.rb +24 -0
- data/lib/salsa_tiger_xml/fe_node.rb +98 -0
- data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
- data/lib/salsa_tiger_xml/frame_node.rb +145 -0
- data/lib/salsa_tiger_xml/graph_node.rb +347 -0
- data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
- data/lib/salsa_tiger_xml/sem_node.rb +58 -0
- data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
- data/lib/salsa_tiger_xml/syn_node.rb +169 -0
- data/lib/salsa_tiger_xml/tree_node.rb +59 -0
- data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
- data/lib/salsa_tiger_xml/usp_node.rb +72 -0
- data/lib/salsa_tiger_xml/xml_node.rb +163 -0
- data/lib/shalmaneser/lib.rb +1 -0
- data/lib/tabular_format/fn_tab_format_file.rb +38 -0
- data/lib/tabular_format/fn_tab_frame.rb +67 -0
- data/lib/tabular_format/fn_tab_sentence.rb +169 -0
- data/lib/tabular_format/tab_format_file.rb +91 -0
- data/lib/tabular_format/tab_format_named_args.rb +184 -0
- data/lib/tabular_format/tab_format_sentence.rb +119 -0
- data/lib/value_restriction.rb +49 -0
- metadata +131 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
require_relative 'sem_node'
|
2
|
+
|
3
|
+
module STXML
|
4
|
+
#############
|
5
|
+
# class FrameNode
|
6
|
+
#
|
7
|
+
# inherits from SemNode
|
8
|
+
# adds to it methods specific to nodes
|
9
|
+
# that describe a frame
|
10
|
+
#
|
11
|
+
# additional/changed methods:
|
12
|
+
#
|
13
|
+
# name returns the name of the frame
|
14
|
+
# set_name changes the name of the frame to a new name
|
15
|
+
# target returns the target (as a FeNode object)
|
16
|
+
#
|
17
|
+
# each_child() iterates through FEs, children() returns all FEs
|
18
|
+
#
|
19
|
+
# each_fe_by_name A frame node may have several FE children with the same
|
20
|
+
# frame element label. While each_child returns them separately,
|
21
|
+
# each_fe_by_name lumps FE children with the same frame element label
|
22
|
+
# into one FeNode.
|
23
|
+
# Warnings:
|
24
|
+
# - the REXML object of the FeNode is that of the first FE child
|
25
|
+
# with that frame element label.
|
26
|
+
# - Underspecification is ignored! If you have the same FE twice,
|
27
|
+
# and there is underspecification regarding the extent of the FE,
|
28
|
+
# the two FE children will be lumped together anyway.
|
29
|
+
# If you don't want that, use each_child instead.
|
30
|
+
#
|
31
|
+
#
|
32
|
+
# add_fe CAUTION: please do not call this method directly externally,
|
33
|
+
# use SalsaTigerSentence.add_fe, otherwise the node and its ID
|
34
|
+
# will not be recorded in the node list and the node cannot be retrieved
|
35
|
+
# via its ID
|
36
|
+
|
37
|
+
class FrameNode < SemNode
|
38
|
+
###
|
39
|
+
def target
|
40
|
+
target = children_by_edgelabels(["target"])
|
41
|
+
if target.empty?
|
42
|
+
$stderr.puts "SalsaTigerRegXML warning: Frame #{id}: No target, but I got: \n" + child_labels.join(", ")
|
43
|
+
return nil
|
44
|
+
else
|
45
|
+
unless target.length == 1
|
46
|
+
raise "Target: more than one target to frame #{id}."
|
47
|
+
end
|
48
|
+
return target.first
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
###
|
53
|
+
def name
|
54
|
+
get_attribute("name")
|
55
|
+
end
|
56
|
+
|
57
|
+
###
|
58
|
+
def set_name(new_name)
|
59
|
+
set_attribute("name", new_name)
|
60
|
+
end
|
61
|
+
|
62
|
+
###
|
63
|
+
# each_fe: synonym for each_child
|
64
|
+
def each_fe
|
65
|
+
each_child { |c| yield c }
|
66
|
+
end
|
67
|
+
|
68
|
+
###
|
69
|
+
# fes: synonym for children
|
70
|
+
def fes
|
71
|
+
children
|
72
|
+
end
|
73
|
+
|
74
|
+
###
|
75
|
+
def each_fe_by_name
|
76
|
+
child_labels.uniq.each { |fe_name|
|
77
|
+
unless fe_name == "target"
|
78
|
+
|
79
|
+
fes = children_by_edgelabels([fe_name])
|
80
|
+
|
81
|
+
if fes.length == 1
|
82
|
+
# one frame element with that name
|
83
|
+
yield fes.first
|
84
|
+
|
85
|
+
else
|
86
|
+
# several frame elements with that name
|
87
|
+
# combine them
|
88
|
+
|
89
|
+
combined_fe = FeNode.new(fe_name, "#{id}_#{fe_name}")
|
90
|
+
fes.each { |fe|
|
91
|
+
fe.each_child { |child|
|
92
|
+
combined_fe.add_child(child)
|
93
|
+
}
|
94
|
+
}
|
95
|
+
yield combined_fe
|
96
|
+
end
|
97
|
+
end
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
###
|
102
|
+
def add_child(fe_node)
|
103
|
+
if fe_node.name == "target" and not(children_by_edgelabels(["target"]).empty?)
|
104
|
+
$stderr.puts "Adding second target to frame #{id}"
|
105
|
+
$stderr.puts "I already have: " + children_by_edgelabels(["target"]).map { |t| t.id }.join(",")
|
106
|
+
raise "More than one target."
|
107
|
+
end
|
108
|
+
|
109
|
+
super(fe_node, fe_node.name)
|
110
|
+
end
|
111
|
+
|
112
|
+
###
|
113
|
+
def remove_child(fe_node)
|
114
|
+
super(fe_node, fe_node.name)
|
115
|
+
end
|
116
|
+
|
117
|
+
###
|
118
|
+
def add_fe(fe_name, # string: name of FE to add
|
119
|
+
syn_nodes, # array:SynNode, syntactic nodes that this FE should point to
|
120
|
+
fe_id = nil) # string: ID for the new FE
|
121
|
+
|
122
|
+
if fe_name == "target" && not(children_by_edgelabels(["target"]).empty?)
|
123
|
+
$stderr.puts "Adding second target to frame #{id}"
|
124
|
+
$stderr.puts "I already have: " + children_by_edgelabels(["target"]).map(&:id).join(",")
|
125
|
+
raise "More than one target."
|
126
|
+
end
|
127
|
+
|
128
|
+
# make FE node and list as this frame's child
|
129
|
+
unless fe_id
|
130
|
+
# no FE ID given, make one myself
|
131
|
+
fe_id = id + "_fe" + Time.new.to_f.to_s
|
132
|
+
end
|
133
|
+
|
134
|
+
n = FeNode.new(fe_name, fe_id)
|
135
|
+
add_child(n)
|
136
|
+
|
137
|
+
# add syn nodes
|
138
|
+
syn_nodes.each { |syn_node|
|
139
|
+
n.add_child(syn_node)
|
140
|
+
}
|
141
|
+
|
142
|
+
n
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
@@ -0,0 +1,347 @@
|
|
1
|
+
module STXML
|
2
|
+
# GraphNode: describes one node in a graph.
|
3
|
+
#
|
4
|
+
# A node may have an arbitrary number of parents (sources of incoming edges)
|
5
|
+
# and an arbitrary number of children (targets of outgoing edges)
|
6
|
+
#
|
7
|
+
# All edges are labeled and directed
|
8
|
+
#
|
9
|
+
# The add_parent, add_child, remove_parent, remove_child methods
|
10
|
+
# take care of both ends of an edge
|
11
|
+
# (i.e. n1.add_child(n2, label) also adds n1 as parent of n2 with edge label 'label'
|
12
|
+
#
|
13
|
+
# It is possible to create a 'pointer' rather than an edge:
|
14
|
+
# n1.add_child(n2, label, pointer_insteadof_edge => true)
|
15
|
+
# will create an edge from n1 to n2 labeled 'label' that is
|
16
|
+
# listed under the outgoing edges of n1, but not among
|
17
|
+
# the incoming edges of n2
|
18
|
+
# The same option is available for add_parent, remove_parent, remove_child.
|
19
|
+
|
20
|
+
class GraphNode
|
21
|
+
|
22
|
+
def initialize(id)
|
23
|
+
@id = id
|
24
|
+
@children = []
|
25
|
+
@parents = []
|
26
|
+
@features = {}
|
27
|
+
end
|
28
|
+
|
29
|
+
# for Marshalling:
|
30
|
+
# Dump just IDs instead of actual nodes from Parents and Children lists.
|
31
|
+
# Otherwise the Marshaller will go crazy following
|
32
|
+
# all the links to objects mentioned.
|
33
|
+
# After loading: replace IDs by actual objects with a little help
|
34
|
+
# from the caller.
|
35
|
+
# @deprecated This method seams to be useless.
|
36
|
+
def _dump(depth)
|
37
|
+
@id.to_s +
|
38
|
+
"QQSEPVALUESQQ" +
|
39
|
+
Marshal.dump(@features) +
|
40
|
+
"QQSEPVALUESQQ" +
|
41
|
+
@children.map { |label_child|
|
42
|
+
label_child[0] + "QQSEPQQ" + label_child[1].id
|
43
|
+
}.join("QQPAIRQQ") +
|
44
|
+
"QQSEPVALUESQQ" +
|
45
|
+
@parents.map { |label_parent|
|
46
|
+
label_parent[0] + "QQSEPQQ" + label_parent[1].id
|
47
|
+
}.join("QQPAIRQQ")
|
48
|
+
end
|
49
|
+
|
50
|
+
def self._load(string)
|
51
|
+
id, _features_s, _children_s, _parents_s = string.split("QQSEPVALUESQQ")
|
52
|
+
|
53
|
+
result = GraphNode.new(id)
|
54
|
+
result.fill_from_pickle(string)
|
55
|
+
|
56
|
+
result
|
57
|
+
end
|
58
|
+
|
59
|
+
def fill_from_pickle(string)
|
60
|
+
_id, features_s, children_s, parents_s = string.split("QQSEPVALUESQQ")
|
61
|
+
|
62
|
+
@features = Marshal.load(features_s)
|
63
|
+
|
64
|
+
if children_s.nil? || children_s.empty?
|
65
|
+
@children = []
|
66
|
+
else
|
67
|
+
@children = children_s.split("QQPAIRQQ").map do |pair|
|
68
|
+
pair.split("QQSEPQQ")
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
if parents_s.nil? || parents_s.empty?
|
73
|
+
@parents = []
|
74
|
+
else
|
75
|
+
@parents = parents_s.split("QQPAIRQQ").map { |pair|
|
76
|
+
pair.split("QQSEPQQ")
|
77
|
+
}
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def recover_from_dump(node_by_id)
|
82
|
+
@children = @children.map { |label_id| [label_id[0], node_by_id.call(label_id[1])] }
|
83
|
+
@parents = @parents.map { |label_id| [label_id[0], node_by_id.call(label_id[1])] }
|
84
|
+
end
|
85
|
+
|
86
|
+
# ID-related things
|
87
|
+
def ==(other)
|
88
|
+
if other.is_a?(GraphNode)
|
89
|
+
@id == other.id
|
90
|
+
else
|
91
|
+
false
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def id
|
96
|
+
@id
|
97
|
+
end
|
98
|
+
|
99
|
+
def chid(newid)
|
100
|
+
@id = newid
|
101
|
+
end
|
102
|
+
|
103
|
+
# setting and retrieving features
|
104
|
+
|
105
|
+
def get_f(feature)
|
106
|
+
@features[feature]
|
107
|
+
end
|
108
|
+
|
109
|
+
def set_f(feature, value)
|
110
|
+
@features[feature] = value
|
111
|
+
end
|
112
|
+
|
113
|
+
def add_f(feature, value)
|
114
|
+
unless @features[feature].nil?
|
115
|
+
raise "Feature " + feature + "already set."
|
116
|
+
end
|
117
|
+
set_f(feature, value)
|
118
|
+
end
|
119
|
+
|
120
|
+
# ancestors
|
121
|
+
|
122
|
+
def parents
|
123
|
+
@parents.map { |label| label[1] }
|
124
|
+
end
|
125
|
+
|
126
|
+
def parent_labels
|
127
|
+
@parents.map { |label_parent| label_parent[0] }
|
128
|
+
end
|
129
|
+
|
130
|
+
def parent_label(parent)
|
131
|
+
@parents.each do |label_parent|
|
132
|
+
if label_parent[1] == parent
|
133
|
+
return label_parent[0]
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
nil
|
138
|
+
end
|
139
|
+
|
140
|
+
def parents_with_edgelabel
|
141
|
+
@parents
|
142
|
+
end
|
143
|
+
|
144
|
+
def each_parent
|
145
|
+
@parents.each { |label_parent| yield label_parent[1] }
|
146
|
+
end
|
147
|
+
|
148
|
+
def each_parent_with_edgelabel
|
149
|
+
@parents.each { |label_parent| yield label_parent}
|
150
|
+
end
|
151
|
+
|
152
|
+
def parents_by_edgelabels(labels)
|
153
|
+
@parents.select { |label_parent|
|
154
|
+
labels.include? label_parent[0]
|
155
|
+
}.map { |label_parent|
|
156
|
+
label_parent[1]
|
157
|
+
}
|
158
|
+
end
|
159
|
+
|
160
|
+
def add_parent(parent, edgelabel, varhash = {})
|
161
|
+
@parents << [edgelabel, parent]
|
162
|
+
|
163
|
+
# and vice versa: add self as child to parent
|
164
|
+
unless varhash["pointer_insteadof_edge"]
|
165
|
+
unless parent.children_with_edgelabel.include? [edgelabel, self]
|
166
|
+
parent.add_child(self, edgelabel)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def remove_parent(parent, edgelabel, varhash={})
|
172
|
+
@parents = @parents.reject { |label_child|
|
173
|
+
label_child.first == edgelabel and
|
174
|
+
label_child.last == parent
|
175
|
+
}
|
176
|
+
|
177
|
+
# and vice versa: remove self as child from parent
|
178
|
+
unless varhash["pointer_insteadof_edge"]
|
179
|
+
if parent.children_with_edgelabel.include? [edgelabel, self]
|
180
|
+
parent.remove_child(self, edgelabel)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def indeg
|
186
|
+
@parents.length
|
187
|
+
end
|
188
|
+
|
189
|
+
def ancestors
|
190
|
+
ancestors_noduplicates([], [])
|
191
|
+
end
|
192
|
+
|
193
|
+
def ancestors_by_edgelabels(labels)
|
194
|
+
ancestors_noduplicates([], labels)
|
195
|
+
end
|
196
|
+
|
197
|
+
# descendants
|
198
|
+
|
199
|
+
def children
|
200
|
+
@children.map { |label_child| label_child[1] }
|
201
|
+
end
|
202
|
+
|
203
|
+
def child_labels
|
204
|
+
@children.map { |label_child| label_child[0] }
|
205
|
+
end
|
206
|
+
|
207
|
+
def child_label(child)
|
208
|
+
@children.each { |label_child|
|
209
|
+
if label_child[1] == child
|
210
|
+
return label_child[0]
|
211
|
+
end
|
212
|
+
}
|
213
|
+
|
214
|
+
nil
|
215
|
+
end
|
216
|
+
|
217
|
+
def children_with_edgelabel
|
218
|
+
@children
|
219
|
+
end
|
220
|
+
|
221
|
+
def each_child
|
222
|
+
@children.each { |label_child| yield label_child[1]}
|
223
|
+
end
|
224
|
+
|
225
|
+
def each_child_with_edgelabel
|
226
|
+
@children.each { |label_child| yield label_child }
|
227
|
+
end
|
228
|
+
|
229
|
+
def children_by_edgelabels(labels)
|
230
|
+
return @children.select { |label_child|
|
231
|
+
labels.include? label_child[0]
|
232
|
+
}.map { |label_child|
|
233
|
+
label_child[1]
|
234
|
+
}
|
235
|
+
end
|
236
|
+
|
237
|
+
def add_child(child, edgelabel, varhash = {})
|
238
|
+
@children << [edgelabel, child]
|
239
|
+
|
240
|
+
# and vice versa: add self as parent to child
|
241
|
+
unless varhash["pointer_insteadof_edge"]
|
242
|
+
unless child.parents_with_edgelabel.include? [edgelabel, self]
|
243
|
+
child.add_parent(self, edgelabel)
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def remove_child(child, edgelabel, varhash={})
|
249
|
+
@children = @children.reject { |label_child|
|
250
|
+
label_child.first == edgelabel and
|
251
|
+
label_child.last == child
|
252
|
+
}
|
253
|
+
|
254
|
+
# and vice versa: remove self as parent from child
|
255
|
+
unless varhash["pointer_insteadof_edge"]
|
256
|
+
if child.parents_with_edgelabel.include? [edgelabel, self]
|
257
|
+
child.remove_parent(self, edgelabel)
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def change_child_label(child, oldlabel, newlabel, varhash={})
|
263
|
+
if @children.include? [oldlabel, child]
|
264
|
+
remove_child(child,oldlabel, varhash)
|
265
|
+
add_child(child, newlabel, varhash)
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
def remove_all_children(varhash={})
|
270
|
+
each_child_with_edgelabel { |label, child|
|
271
|
+
remove_child(child, label, varhash)
|
272
|
+
}
|
273
|
+
end
|
274
|
+
|
275
|
+
def set_children(list, varhash={})
|
276
|
+
#### CAUTION: set_children must be called with an "internal format" list of parents:
|
277
|
+
#### instead of using [node, edgelabel], use [edgelabel, node]
|
278
|
+
remove_all_children(varhash)
|
279
|
+
|
280
|
+
@children = list
|
281
|
+
end
|
282
|
+
|
283
|
+
def outdeg
|
284
|
+
return @children.length
|
285
|
+
end
|
286
|
+
|
287
|
+
def yield_nodes
|
288
|
+
arr = []
|
289
|
+
if outdeg == 0
|
290
|
+
arr << self
|
291
|
+
end
|
292
|
+
each_child { |c|
|
293
|
+
if c.outdeg == 0
|
294
|
+
arr << c
|
295
|
+
else
|
296
|
+
arr.concat c.yield_nodes
|
297
|
+
end
|
298
|
+
}
|
299
|
+
return arr
|
300
|
+
end
|
301
|
+
|
302
|
+
def descendants
|
303
|
+
descendants_noduplicates([], [])
|
304
|
+
end
|
305
|
+
|
306
|
+
def descendants_by_edgelabels(labels)
|
307
|
+
return descendants_noduplicates([], labels)
|
308
|
+
end
|
309
|
+
|
310
|
+
protected
|
311
|
+
|
312
|
+
def descendants_noduplicates(nodes, labels)
|
313
|
+
each_child_with_edgelabel { |l_c|
|
314
|
+
if labels.empty? or labels.include? l_c[0]
|
315
|
+
unless nodes.include? l_c[1]
|
316
|
+
nodes = l_c[1].descendants_noduplicates(nodes << l_c[1], labels)
|
317
|
+
end
|
318
|
+
end
|
319
|
+
}
|
320
|
+
return nodes
|
321
|
+
end
|
322
|
+
|
323
|
+
def ancestors_noduplicates(nodes, labels)
|
324
|
+
each_parent_with_edgelabel { |l_p|
|
325
|
+
if labels.empty? or labels.include? l_p[0]
|
326
|
+
unless nodes.include? l_p[1]
|
327
|
+
nodes = l_p[1].ancestors_noduplicates(nodes << l_p[1], labels)
|
328
|
+
end
|
329
|
+
end
|
330
|
+
}
|
331
|
+
return nodes
|
332
|
+
end
|
333
|
+
|
334
|
+
#### CAUTION: set_parents must be called with an "internal format" list of parents:
|
335
|
+
#### instead of using [node, edgelabel], use [edgelabel, node]
|
336
|
+
|
337
|
+
def set_parents(list, varhash={})
|
338
|
+
each_parent_with_edgelabel { |label, parent|
|
339
|
+
remove_parent(parent, label, varhash)
|
340
|
+
}
|
341
|
+
|
342
|
+
list.each { |label, parent|
|
343
|
+
add_parent(label, parent)
|
344
|
+
}
|
345
|
+
end
|
346
|
+
end
|
347
|
+
end
|