shalmaneser-lib 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +122 -0
- data/lib/configuration/config_data.rb +457 -0
- data/lib/configuration/config_format_element.rb +210 -0
- data/lib/configuration/configuration_error.rb +15 -0
- data/lib/configuration/external_config_data.rb +56 -0
- data/lib/configuration/frappe_config_data.rb +134 -0
- data/lib/configuration/fred_config_data.rb +199 -0
- data/lib/configuration/rosy_config_data.rb +126 -0
- data/lib/db/db_interface.rb +50 -0
- data/lib/db/db_mysql.rb +141 -0
- data/lib/db/db_sqlite.rb +280 -0
- data/lib/db/db_table.rb +237 -0
- data/lib/db/db_view.rb +416 -0
- data/lib/db/db_wrapper.rb +175 -0
- data/lib/db/select_table_and_columns.rb +10 -0
- data/lib/db/sql_query.rb +243 -0
- data/lib/definitions.rb +19 -0
- data/lib/eval.rb +482 -0
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/external_systems.rb +251 -0
- data/lib/framenet_format/fn_corpus_aset.rb +209 -0
- data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
- data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
- data/lib/framenet_format/fn_database.rb +143 -0
- data/lib/framenet_format/frame_xml_file.rb +104 -0
- data/lib/framenet_format/frame_xml_sentence.rb +411 -0
- data/lib/logging.rb +25 -0
- data/lib/ml/classifier.rb +189 -0
- data/lib/ml/mallet.rb +236 -0
- data/lib/ml/maxent.rb +229 -0
- data/lib/ml/optimize.rb +195 -0
- data/lib/ml/timbl.rb +140 -0
- data/lib/monkey_patching/array.rb +82 -0
- data/lib/monkey_patching/enumerable_bool.rb +24 -0
- data/lib/monkey_patching/enumerable_distribute.rb +18 -0
- data/lib/monkey_patching/file.rb +131 -0
- data/lib/monkey_patching/subsumed.rb +24 -0
- data/lib/ruby_class_extensions.rb +4 -0
- data/lib/salsa_tiger_xml/corpus.rb +24 -0
- data/lib/salsa_tiger_xml/fe_node.rb +98 -0
- data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
- data/lib/salsa_tiger_xml/frame_node.rb +145 -0
- data/lib/salsa_tiger_xml/graph_node.rb +347 -0
- data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
- data/lib/salsa_tiger_xml/sem_node.rb +58 -0
- data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
- data/lib/salsa_tiger_xml/syn_node.rb +169 -0
- data/lib/salsa_tiger_xml/tree_node.rb +59 -0
- data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
- data/lib/salsa_tiger_xml/usp_node.rb +72 -0
- data/lib/salsa_tiger_xml/xml_node.rb +163 -0
- data/lib/shalmaneser/lib.rb +1 -0
- data/lib/tabular_format/fn_tab_format_file.rb +38 -0
- data/lib/tabular_format/fn_tab_frame.rb +67 -0
- data/lib/tabular_format/fn_tab_sentence.rb +169 -0
- data/lib/tabular_format/tab_format_file.rb +91 -0
- data/lib/tabular_format/tab_format_named_args.rb +184 -0
- data/lib/tabular_format/tab_format_sentence.rb +119 -0
- data/lib/value_restriction.rb +49 -0
- metadata +131 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
require_relative 'sem_node'
|
2
|
+
|
3
|
+
module STXML
|
4
|
+
#############
|
5
|
+
# class FrameNode
|
6
|
+
#
|
7
|
+
# inherits from SemNode
|
8
|
+
# adds to it methods specific to nodes
|
9
|
+
# that describe a frame
|
10
|
+
#
|
11
|
+
# additional/changed methods:
|
12
|
+
#
|
13
|
+
# name returns the name of the frame
|
14
|
+
# set_name changes the name of the frame to a new name
|
15
|
+
# target returns the target (as a FeNode object)
|
16
|
+
#
|
17
|
+
# each_child() iterates through FEs, children() returns all FEs
|
18
|
+
#
|
19
|
+
# each_fe_by_name A frame node may have several FE children with the same
|
20
|
+
# frame element label. While each_child returns them separately,
|
21
|
+
# each_fe_by_name lumps FE children with the same frame element label
|
22
|
+
# into one FeNode.
|
23
|
+
# Warnings:
|
24
|
+
# - the REXML object of the FeNode is that of the first FE child
|
25
|
+
# with that frame element label.
|
26
|
+
# - Underspecification is ignored! If you have the same FE twice,
|
27
|
+
# and there is underspecification regarding the extent of the FE,
|
28
|
+
# the two FE children will be lumped together anyway.
|
29
|
+
# If you don't want that, use each_child instead.
|
30
|
+
#
|
31
|
+
#
|
32
|
+
# add_fe CAUTION: please do not call this method directly externally,
|
33
|
+
# use SalsaTigerSentence.add_fe, otherwise the node and its ID
|
34
|
+
# will not be recorded in the node list and the node cannot be retrieved
|
35
|
+
# via its ID
|
36
|
+
|
37
|
+
class FrameNode < SemNode
|
38
|
+
###
|
39
|
+
def target
|
40
|
+
target = children_by_edgelabels(["target"])
|
41
|
+
if target.empty?
|
42
|
+
$stderr.puts "SalsaTigerRegXML warning: Frame #{id}: No target, but I got: \n" + child_labels.join(", ")
|
43
|
+
return nil
|
44
|
+
else
|
45
|
+
unless target.length == 1
|
46
|
+
raise "Target: more than one target to frame #{id}."
|
47
|
+
end
|
48
|
+
return target.first
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
###
|
53
|
+
def name
|
54
|
+
get_attribute("name")
|
55
|
+
end
|
56
|
+
|
57
|
+
###
|
58
|
+
def set_name(new_name)
|
59
|
+
set_attribute("name", new_name)
|
60
|
+
end
|
61
|
+
|
62
|
+
###
|
63
|
+
# each_fe: synonym for each_child
|
64
|
+
def each_fe
|
65
|
+
each_child { |c| yield c }
|
66
|
+
end
|
67
|
+
|
68
|
+
###
|
69
|
+
# fes: synonym for children
|
70
|
+
def fes
|
71
|
+
children
|
72
|
+
end
|
73
|
+
|
74
|
+
###
|
75
|
+
def each_fe_by_name
|
76
|
+
child_labels.uniq.each { |fe_name|
|
77
|
+
unless fe_name == "target"
|
78
|
+
|
79
|
+
fes = children_by_edgelabels([fe_name])
|
80
|
+
|
81
|
+
if fes.length == 1
|
82
|
+
# one frame element with that name
|
83
|
+
yield fes.first
|
84
|
+
|
85
|
+
else
|
86
|
+
# several frame elements with that name
|
87
|
+
# combine them
|
88
|
+
|
89
|
+
combined_fe = FeNode.new(fe_name, "#{id}_#{fe_name}")
|
90
|
+
fes.each { |fe|
|
91
|
+
fe.each_child { |child|
|
92
|
+
combined_fe.add_child(child)
|
93
|
+
}
|
94
|
+
}
|
95
|
+
yield combined_fe
|
96
|
+
end
|
97
|
+
end
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
###
|
102
|
+
def add_child(fe_node)
|
103
|
+
if fe_node.name == "target" and not(children_by_edgelabels(["target"]).empty?)
|
104
|
+
$stderr.puts "Adding second target to frame #{id}"
|
105
|
+
$stderr.puts "I already have: " + children_by_edgelabels(["target"]).map { |t| t.id }.join(",")
|
106
|
+
raise "More than one target."
|
107
|
+
end
|
108
|
+
|
109
|
+
super(fe_node, fe_node.name)
|
110
|
+
end
|
111
|
+
|
112
|
+
###
|
113
|
+
def remove_child(fe_node)
|
114
|
+
super(fe_node, fe_node.name)
|
115
|
+
end
|
116
|
+
|
117
|
+
###
|
118
|
+
def add_fe(fe_name, # string: name of FE to add
|
119
|
+
syn_nodes, # array:SynNode, syntactic nodes that this FE should point to
|
120
|
+
fe_id = nil) # string: ID for the new FE
|
121
|
+
|
122
|
+
if fe_name == "target" && not(children_by_edgelabels(["target"]).empty?)
|
123
|
+
$stderr.puts "Adding second target to frame #{id}"
|
124
|
+
$stderr.puts "I already have: " + children_by_edgelabels(["target"]).map(&:id).join(",")
|
125
|
+
raise "More than one target."
|
126
|
+
end
|
127
|
+
|
128
|
+
# make FE node and list as this frame's child
|
129
|
+
unless fe_id
|
130
|
+
# no FE ID given, make one myself
|
131
|
+
fe_id = id + "_fe" + Time.new.to_f.to_s
|
132
|
+
end
|
133
|
+
|
134
|
+
n = FeNode.new(fe_name, fe_id)
|
135
|
+
add_child(n)
|
136
|
+
|
137
|
+
# add syn nodes
|
138
|
+
syn_nodes.each { |syn_node|
|
139
|
+
n.add_child(syn_node)
|
140
|
+
}
|
141
|
+
|
142
|
+
n
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
@@ -0,0 +1,347 @@
|
|
1
|
+
module STXML
|
2
|
+
# GraphNode: describes one node in a graph.
|
3
|
+
#
|
4
|
+
# A node may have an arbitrary number of parents (sources of incoming edges)
|
5
|
+
# and an arbitrary number of children (targets of outgoing edges)
|
6
|
+
#
|
7
|
+
# All edges are labeled and directed
|
8
|
+
#
|
9
|
+
# The add_parent, add_child, remove_parent, remove_child methods
|
10
|
+
# take care of both ends of an edge
|
11
|
+
# (i.e. n1.add_child(n2, label) also adds n1 as parent of n2 with edge label 'label'
|
12
|
+
#
|
13
|
+
# It is possible to create a 'pointer' rather than an edge:
|
14
|
+
# n1.add_child(n2, label, pointer_insteadof_edge => true)
|
15
|
+
# will create an edge from n1 to n2 labeled 'label' that is
|
16
|
+
# listed under the outgoing edges of n1, but not among
|
17
|
+
# the incoming edges of n2
|
18
|
+
# The same option is available for add_parent, remove_parent, remove_child.
|
19
|
+
|
20
|
+
class GraphNode
|
21
|
+
|
22
|
+
def initialize(id)
|
23
|
+
@id = id
|
24
|
+
@children = []
|
25
|
+
@parents = []
|
26
|
+
@features = {}
|
27
|
+
end
|
28
|
+
|
29
|
+
# for Marshalling:
|
30
|
+
# Dump just IDs instead of actual nodes from Parents and Children lists.
|
31
|
+
# Otherwise the Marshaller will go crazy following
|
32
|
+
# all the links to objects mentioned.
|
33
|
+
# After loading: replace IDs by actual objects with a little help
|
34
|
+
# from the caller.
|
35
|
+
# @deprecated This method seams to be useless.
|
36
|
+
def _dump(depth)
|
37
|
+
@id.to_s +
|
38
|
+
"QQSEPVALUESQQ" +
|
39
|
+
Marshal.dump(@features) +
|
40
|
+
"QQSEPVALUESQQ" +
|
41
|
+
@children.map { |label_child|
|
42
|
+
label_child[0] + "QQSEPQQ" + label_child[1].id
|
43
|
+
}.join("QQPAIRQQ") +
|
44
|
+
"QQSEPVALUESQQ" +
|
45
|
+
@parents.map { |label_parent|
|
46
|
+
label_parent[0] + "QQSEPQQ" + label_parent[1].id
|
47
|
+
}.join("QQPAIRQQ")
|
48
|
+
end
|
49
|
+
|
50
|
+
def self._load(string)
|
51
|
+
id, _features_s, _children_s, _parents_s = string.split("QQSEPVALUESQQ")
|
52
|
+
|
53
|
+
result = GraphNode.new(id)
|
54
|
+
result.fill_from_pickle(string)
|
55
|
+
|
56
|
+
result
|
57
|
+
end
|
58
|
+
|
59
|
+
def fill_from_pickle(string)
|
60
|
+
_id, features_s, children_s, parents_s = string.split("QQSEPVALUESQQ")
|
61
|
+
|
62
|
+
@features = Marshal.load(features_s)
|
63
|
+
|
64
|
+
if children_s.nil? || children_s.empty?
|
65
|
+
@children = []
|
66
|
+
else
|
67
|
+
@children = children_s.split("QQPAIRQQ").map do |pair|
|
68
|
+
pair.split("QQSEPQQ")
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
if parents_s.nil? || parents_s.empty?
|
73
|
+
@parents = []
|
74
|
+
else
|
75
|
+
@parents = parents_s.split("QQPAIRQQ").map { |pair|
|
76
|
+
pair.split("QQSEPQQ")
|
77
|
+
}
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def recover_from_dump(node_by_id)
|
82
|
+
@children = @children.map { |label_id| [label_id[0], node_by_id.call(label_id[1])] }
|
83
|
+
@parents = @parents.map { |label_id| [label_id[0], node_by_id.call(label_id[1])] }
|
84
|
+
end
|
85
|
+
|
86
|
+
# ID-related things
|
87
|
+
def ==(other)
|
88
|
+
if other.is_a?(GraphNode)
|
89
|
+
@id == other.id
|
90
|
+
else
|
91
|
+
false
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def id
|
96
|
+
@id
|
97
|
+
end
|
98
|
+
|
99
|
+
def chid(newid)
|
100
|
+
@id = newid
|
101
|
+
end
|
102
|
+
|
103
|
+
# setting and retrieving features
|
104
|
+
|
105
|
+
def get_f(feature)
|
106
|
+
@features[feature]
|
107
|
+
end
|
108
|
+
|
109
|
+
def set_f(feature, value)
|
110
|
+
@features[feature] = value
|
111
|
+
end
|
112
|
+
|
113
|
+
def add_f(feature, value)
|
114
|
+
unless @features[feature].nil?
|
115
|
+
raise "Feature " + feature + "already set."
|
116
|
+
end
|
117
|
+
set_f(feature, value)
|
118
|
+
end
|
119
|
+
|
120
|
+
# ancestors
|
121
|
+
|
122
|
+
def parents
|
123
|
+
@parents.map { |label| label[1] }
|
124
|
+
end
|
125
|
+
|
126
|
+
def parent_labels
|
127
|
+
@parents.map { |label_parent| label_parent[0] }
|
128
|
+
end
|
129
|
+
|
130
|
+
def parent_label(parent)
|
131
|
+
@parents.each do |label_parent|
|
132
|
+
if label_parent[1] == parent
|
133
|
+
return label_parent[0]
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
nil
|
138
|
+
end
|
139
|
+
|
140
|
+
def parents_with_edgelabel
|
141
|
+
@parents
|
142
|
+
end
|
143
|
+
|
144
|
+
def each_parent
|
145
|
+
@parents.each { |label_parent| yield label_parent[1] }
|
146
|
+
end
|
147
|
+
|
148
|
+
def each_parent_with_edgelabel
|
149
|
+
@parents.each { |label_parent| yield label_parent}
|
150
|
+
end
|
151
|
+
|
152
|
+
def parents_by_edgelabels(labels)
|
153
|
+
@parents.select { |label_parent|
|
154
|
+
labels.include? label_parent[0]
|
155
|
+
}.map { |label_parent|
|
156
|
+
label_parent[1]
|
157
|
+
}
|
158
|
+
end
|
159
|
+
|
160
|
+
def add_parent(parent, edgelabel, varhash = {})
|
161
|
+
@parents << [edgelabel, parent]
|
162
|
+
|
163
|
+
# and vice versa: add self as child to parent
|
164
|
+
unless varhash["pointer_insteadof_edge"]
|
165
|
+
unless parent.children_with_edgelabel.include? [edgelabel, self]
|
166
|
+
parent.add_child(self, edgelabel)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def remove_parent(parent, edgelabel, varhash={})
|
172
|
+
@parents = @parents.reject { |label_child|
|
173
|
+
label_child.first == edgelabel and
|
174
|
+
label_child.last == parent
|
175
|
+
}
|
176
|
+
|
177
|
+
# and vice versa: remove self as child from parent
|
178
|
+
unless varhash["pointer_insteadof_edge"]
|
179
|
+
if parent.children_with_edgelabel.include? [edgelabel, self]
|
180
|
+
parent.remove_child(self, edgelabel)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def indeg
|
186
|
+
@parents.length
|
187
|
+
end
|
188
|
+
|
189
|
+
def ancestors
|
190
|
+
ancestors_noduplicates([], [])
|
191
|
+
end
|
192
|
+
|
193
|
+
def ancestors_by_edgelabels(labels)
|
194
|
+
ancestors_noduplicates([], labels)
|
195
|
+
end
|
196
|
+
|
197
|
+
# descendants
|
198
|
+
|
199
|
+
def children
|
200
|
+
@children.map { |label_child| label_child[1] }
|
201
|
+
end
|
202
|
+
|
203
|
+
def child_labels
|
204
|
+
@children.map { |label_child| label_child[0] }
|
205
|
+
end
|
206
|
+
|
207
|
+
def child_label(child)
|
208
|
+
@children.each { |label_child|
|
209
|
+
if label_child[1] == child
|
210
|
+
return label_child[0]
|
211
|
+
end
|
212
|
+
}
|
213
|
+
|
214
|
+
nil
|
215
|
+
end
|
216
|
+
|
217
|
+
def children_with_edgelabel
|
218
|
+
@children
|
219
|
+
end
|
220
|
+
|
221
|
+
def each_child
|
222
|
+
@children.each { |label_child| yield label_child[1]}
|
223
|
+
end
|
224
|
+
|
225
|
+
def each_child_with_edgelabel
|
226
|
+
@children.each { |label_child| yield label_child }
|
227
|
+
end
|
228
|
+
|
229
|
+
def children_by_edgelabels(labels)
|
230
|
+
return @children.select { |label_child|
|
231
|
+
labels.include? label_child[0]
|
232
|
+
}.map { |label_child|
|
233
|
+
label_child[1]
|
234
|
+
}
|
235
|
+
end
|
236
|
+
|
237
|
+
def add_child(child, edgelabel, varhash = {})
|
238
|
+
@children << [edgelabel, child]
|
239
|
+
|
240
|
+
# and vice versa: add self as parent to child
|
241
|
+
unless varhash["pointer_insteadof_edge"]
|
242
|
+
unless child.parents_with_edgelabel.include? [edgelabel, self]
|
243
|
+
child.add_parent(self, edgelabel)
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def remove_child(child, edgelabel, varhash={})
|
249
|
+
@children = @children.reject { |label_child|
|
250
|
+
label_child.first == edgelabel and
|
251
|
+
label_child.last == child
|
252
|
+
}
|
253
|
+
|
254
|
+
# and vice versa: remove self as parent from child
|
255
|
+
unless varhash["pointer_insteadof_edge"]
|
256
|
+
if child.parents_with_edgelabel.include? [edgelabel, self]
|
257
|
+
child.remove_parent(self, edgelabel)
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def change_child_label(child, oldlabel, newlabel, varhash={})
|
263
|
+
if @children.include? [oldlabel, child]
|
264
|
+
remove_child(child,oldlabel, varhash)
|
265
|
+
add_child(child, newlabel, varhash)
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
def remove_all_children(varhash={})
|
270
|
+
each_child_with_edgelabel { |label, child|
|
271
|
+
remove_child(child, label, varhash)
|
272
|
+
}
|
273
|
+
end
|
274
|
+
|
275
|
+
def set_children(list, varhash={})
|
276
|
+
#### CAUTION: set_children must be called with an "internal format" list of parents:
|
277
|
+
#### instead of using [node, edgelabel], use [edgelabel, node]
|
278
|
+
remove_all_children(varhash)
|
279
|
+
|
280
|
+
@children = list
|
281
|
+
end
|
282
|
+
|
283
|
+
def outdeg
|
284
|
+
return @children.length
|
285
|
+
end
|
286
|
+
|
287
|
+
def yield_nodes
|
288
|
+
arr = []
|
289
|
+
if outdeg == 0
|
290
|
+
arr << self
|
291
|
+
end
|
292
|
+
each_child { |c|
|
293
|
+
if c.outdeg == 0
|
294
|
+
arr << c
|
295
|
+
else
|
296
|
+
arr.concat c.yield_nodes
|
297
|
+
end
|
298
|
+
}
|
299
|
+
return arr
|
300
|
+
end
|
301
|
+
|
302
|
+
def descendants
|
303
|
+
descendants_noduplicates([], [])
|
304
|
+
end
|
305
|
+
|
306
|
+
def descendants_by_edgelabels(labels)
|
307
|
+
return descendants_noduplicates([], labels)
|
308
|
+
end
|
309
|
+
|
310
|
+
protected
|
311
|
+
|
312
|
+
def descendants_noduplicates(nodes, labels)
|
313
|
+
each_child_with_edgelabel { |l_c|
|
314
|
+
if labels.empty? or labels.include? l_c[0]
|
315
|
+
unless nodes.include? l_c[1]
|
316
|
+
nodes = l_c[1].descendants_noduplicates(nodes << l_c[1], labels)
|
317
|
+
end
|
318
|
+
end
|
319
|
+
}
|
320
|
+
return nodes
|
321
|
+
end
|
322
|
+
|
323
|
+
def ancestors_noduplicates(nodes, labels)
|
324
|
+
each_parent_with_edgelabel { |l_p|
|
325
|
+
if labels.empty? or labels.include? l_p[0]
|
326
|
+
unless nodes.include? l_p[1]
|
327
|
+
nodes = l_p[1].ancestors_noduplicates(nodes << l_p[1], labels)
|
328
|
+
end
|
329
|
+
end
|
330
|
+
}
|
331
|
+
return nodes
|
332
|
+
end
|
333
|
+
|
334
|
+
#### CAUTION: set_parents must be called with an "internal format" list of parents:
|
335
|
+
#### instead of using [node, edgelabel], use [edgelabel, node]
|
336
|
+
|
337
|
+
def set_parents(list, varhash={})
|
338
|
+
each_parent_with_edgelabel { |label, parent|
|
339
|
+
remove_parent(parent, label, varhash)
|
340
|
+
}
|
341
|
+
|
342
|
+
list.each { |label, parent|
|
343
|
+
add_parent(label, parent)
|
344
|
+
}
|
345
|
+
end
|
346
|
+
end
|
347
|
+
end
|