shalmaneser-lib 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +122 -0
- data/lib/configuration/config_data.rb +457 -0
- data/lib/configuration/config_format_element.rb +210 -0
- data/lib/configuration/configuration_error.rb +15 -0
- data/lib/configuration/external_config_data.rb +56 -0
- data/lib/configuration/frappe_config_data.rb +134 -0
- data/lib/configuration/fred_config_data.rb +199 -0
- data/lib/configuration/rosy_config_data.rb +126 -0
- data/lib/db/db_interface.rb +50 -0
- data/lib/db/db_mysql.rb +141 -0
- data/lib/db/db_sqlite.rb +280 -0
- data/lib/db/db_table.rb +237 -0
- data/lib/db/db_view.rb +416 -0
- data/lib/db/db_wrapper.rb +175 -0
- data/lib/db/select_table_and_columns.rb +10 -0
- data/lib/db/sql_query.rb +243 -0
- data/lib/definitions.rb +19 -0
- data/lib/eval.rb +482 -0
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/external_systems.rb +251 -0
- data/lib/framenet_format/fn_corpus_aset.rb +209 -0
- data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
- data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
- data/lib/framenet_format/fn_database.rb +143 -0
- data/lib/framenet_format/frame_xml_file.rb +104 -0
- data/lib/framenet_format/frame_xml_sentence.rb +411 -0
- data/lib/logging.rb +25 -0
- data/lib/ml/classifier.rb +189 -0
- data/lib/ml/mallet.rb +236 -0
- data/lib/ml/maxent.rb +229 -0
- data/lib/ml/optimize.rb +195 -0
- data/lib/ml/timbl.rb +140 -0
- data/lib/monkey_patching/array.rb +82 -0
- data/lib/monkey_patching/enumerable_bool.rb +24 -0
- data/lib/monkey_patching/enumerable_distribute.rb +18 -0
- data/lib/monkey_patching/file.rb +131 -0
- data/lib/monkey_patching/subsumed.rb +24 -0
- data/lib/ruby_class_extensions.rb +4 -0
- data/lib/salsa_tiger_xml/corpus.rb +24 -0
- data/lib/salsa_tiger_xml/fe_node.rb +98 -0
- data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
- data/lib/salsa_tiger_xml/frame_node.rb +145 -0
- data/lib/salsa_tiger_xml/graph_node.rb +347 -0
- data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
- data/lib/salsa_tiger_xml/sem_node.rb +58 -0
- data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
- data/lib/salsa_tiger_xml/syn_node.rb +169 -0
- data/lib/salsa_tiger_xml/tree_node.rb +59 -0
- data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
- data/lib/salsa_tiger_xml/usp_node.rb +72 -0
- data/lib/salsa_tiger_xml/xml_node.rb +163 -0
- data/lib/shalmaneser/lib.rb +1 -0
- data/lib/tabular_format/fn_tab_format_file.rb +38 -0
- data/lib/tabular_format/fn_tab_frame.rb +67 -0
- data/lib/tabular_format/fn_tab_sentence.rb +169 -0
- data/lib/tabular_format/tab_format_file.rb +91 -0
- data/lib/tabular_format/tab_format_named_args.rb +184 -0
- data/lib/tabular_format/tab_format_sentence.rb +119 -0
- data/lib/value_restriction.rb +49 -0
- metadata +131 -0
@@ -0,0 +1,169 @@
|
|
1
|
+
require_relative 'salsa_tiger_xml_node'
|
2
|
+
|
3
|
+
module STXML
|
4
|
+
#############
|
5
|
+
# class SynNode
|
6
|
+
#
|
7
|
+
# inherits from SalsaTigerXmlNode,
|
8
|
+
# adds to it methods specific to nodes
|
9
|
+
# that describe the syntactic structure
|
10
|
+
#
|
11
|
+
# additional/changed methods:
|
12
|
+
#
|
13
|
+
# part_of_speech part_of_speech information as a string,
|
14
|
+
# nil for anything but terminal nodes
|
15
|
+
#
|
16
|
+
# word word information for this node as a string,
|
17
|
+
# nil for anything but terminal nodes
|
18
|
+
#
|
19
|
+
# category category information for this node as a string,
|
20
|
+
# nil for anything but nonterminal nodes
|
21
|
+
#
|
22
|
+
# is_punct? true if this is a terminal node and it is a punctuation sign
|
23
|
+
#
|
24
|
+
# get_sem add a non-tree edge from this syntactic node to a semantic node
|
25
|
+
# Idea: this is basically the inverse of the edge pointing from
|
26
|
+
# the FeNode to this SynNode, so you can fetch a node's semantics directly
|
27
|
+
#
|
28
|
+
# add_sem add non-tree edge from this syntactic node to a FeNode
|
29
|
+
class SynNode < SalsaTigerXmlNode
|
30
|
+
|
31
|
+
###
|
32
|
+
def initialize(xml)
|
33
|
+
super(xml)
|
34
|
+
|
35
|
+
@sem = []
|
36
|
+
@other_links = []
|
37
|
+
end
|
38
|
+
|
39
|
+
###
|
40
|
+
def add_link(other_node, # SynNode
|
41
|
+
link_label, # string: edge label
|
42
|
+
attributes = {}) # hash string>string: further attribute-value pairs for the edge
|
43
|
+
|
44
|
+
@other_links << [link_label, other_node, attributes]
|
45
|
+
end
|
46
|
+
|
47
|
+
###
|
48
|
+
def get_linked(label = nil) # string/nil: if string, use only linked with this link_label
|
49
|
+
if label
|
50
|
+
return @other_links.select { |label_node_attr| label_node_attr.first == label }
|
51
|
+
else
|
52
|
+
return @other_links
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
###
|
57
|
+
def part_of_speech
|
58
|
+
if get_attribute("pos")
|
59
|
+
return get_attribute("pos").strip
|
60
|
+
else
|
61
|
+
return nil
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
###
|
66
|
+
def category
|
67
|
+
if get_attribute("cat")
|
68
|
+
return get_attribute("cat").strip
|
69
|
+
else
|
70
|
+
return nil
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
###
|
75
|
+
def word
|
76
|
+
if get_attribute("word")
|
77
|
+
return get_attribute("word").strip
|
78
|
+
else
|
79
|
+
return nil
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
###
|
84
|
+
def is_punct?
|
85
|
+
if is_nonterminal?
|
86
|
+
# only terminals can be punctuation signs
|
87
|
+
return false
|
88
|
+
end
|
89
|
+
|
90
|
+
# next check part of speech
|
91
|
+
# this works at least for TIGER corpus annotation
|
92
|
+
case part_of_speech
|
93
|
+
when '$.', '$,', '$('
|
94
|
+
return true
|
95
|
+
end
|
96
|
+
if part_of_speech =~ /^PUNC/
|
97
|
+
return true
|
98
|
+
end
|
99
|
+
|
100
|
+
# known punctuation signs: filtered out for determining maximal constituents
|
101
|
+
|
102
|
+
# no luck with part of speech:
|
103
|
+
# check word
|
104
|
+
case word
|
105
|
+
when ".", ";", ",", ":", "?", "!", "(", ")", "[", "]", "{", "}", "-", "''", "``", "\"", "'"
|
106
|
+
return true
|
107
|
+
end
|
108
|
+
|
109
|
+
# not a punctuation sign by any of the tests we have applied
|
110
|
+
return false
|
111
|
+
end
|
112
|
+
|
113
|
+
###
|
114
|
+
def to_s
|
115
|
+
if is_terminal?
|
116
|
+
return word
|
117
|
+
else
|
118
|
+
return super()
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
###
|
123
|
+
def get_sem
|
124
|
+
return @sem.clone
|
125
|
+
end
|
126
|
+
|
127
|
+
###
|
128
|
+
def add_sem(fe_node)
|
129
|
+
unless fe_node.class == FeNode
|
130
|
+
raise "Unexpected class of semantic node: was expecting an FeNode"
|
131
|
+
end
|
132
|
+
|
133
|
+
@sem << fe_node
|
134
|
+
end
|
135
|
+
|
136
|
+
#############
|
137
|
+
protected
|
138
|
+
|
139
|
+
def get_xml_ofchildren
|
140
|
+
string = ""
|
141
|
+
|
142
|
+
each_child_with_edgelabel { |label, child|
|
143
|
+
unless child.is_splitword?
|
144
|
+
# terminal or nonterminal child.
|
145
|
+
# splitwords are handled separately in the "sem" part of the sentence
|
146
|
+
if label
|
147
|
+
string << "<edge label=\'#{xml_secure_val(label)}\' idref=\'#{xml_secure_val(child.id)}\'/>\n"
|
148
|
+
else
|
149
|
+
string << "<edge label=\'-\' idref=\'#{xml_secure_val(child.id)}\'/>\n"
|
150
|
+
end
|
151
|
+
end
|
152
|
+
}
|
153
|
+
@other_links.each { |label, node, attributes|
|
154
|
+
if label
|
155
|
+
string << "<other_edge label=\'#{xml_secure_val(label)}\'"
|
156
|
+
else
|
157
|
+
string << "<other_edge label=\'-\'"
|
158
|
+
end
|
159
|
+
string << " idref=\'#{xml_secure_val(node.id)}\'"
|
160
|
+
if attributes
|
161
|
+
string << " " + attributes.to_a.map { |attr, val| "#{xml_secure_val(attr)}=\'#{xml_secure_val(val)}\'" }.join(" ")
|
162
|
+
end
|
163
|
+
string << "/>\n"
|
164
|
+
}
|
165
|
+
|
166
|
+
return string
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require_relative 'graph_node'
|
2
|
+
|
3
|
+
module STXML
|
4
|
+
class TreeNode < GraphNode
|
5
|
+
|
6
|
+
def initialize(id)
|
7
|
+
super(id)
|
8
|
+
end
|
9
|
+
|
10
|
+
# redo the ancestor-related methods,
|
11
|
+
# since here we only have one parent per node
|
12
|
+
def parent
|
13
|
+
retv = parents
|
14
|
+
if retv.nil?
|
15
|
+
return nil
|
16
|
+
else
|
17
|
+
return retv.first
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def parent_label
|
22
|
+
retv = parent_labels
|
23
|
+
if retv.nil?
|
24
|
+
return nil
|
25
|
+
else
|
26
|
+
return retv.first
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def parent_with_edgelabel
|
31
|
+
retv = parents_with_edgelabel
|
32
|
+
|
33
|
+
if retv.nil?
|
34
|
+
return nil
|
35
|
+
else
|
36
|
+
return retv.first
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def add_parent(parent, edgelabel, varhash = {})
|
41
|
+
set_parent(parent, edgelabel, varhash)
|
42
|
+
end
|
43
|
+
|
44
|
+
def set_parent(parent, edgelabel, varhash = {})
|
45
|
+
# remove old parent
|
46
|
+
each_parent_with_edgelabel { |label, p| remove_parent(p, label, varhash) }
|
47
|
+
|
48
|
+
# set new parent
|
49
|
+
@parents << [edgelabel, parent]
|
50
|
+
|
51
|
+
# and vice versa: add self as child to parent
|
52
|
+
unless varhash["pointer_insteadof_edge"]
|
53
|
+
unless parent.children_with_edgelabel.include? [edgelabel, self]
|
54
|
+
parent.add_child(self, edgelabel)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require_relative 'syn_node'
|
2
|
+
require_relative 'reg_xml'
|
3
|
+
|
4
|
+
module STXML
|
5
|
+
#############
|
6
|
+
# class TSSynNode
|
7
|
+
#
|
8
|
+
# inherits from SynNode
|
9
|
+
#
|
10
|
+
# describes a syntactic node that isn't really there:
|
11
|
+
# a reference to a node in another sentence
|
12
|
+
#
|
13
|
+
# contains that node's ID, but an empty RegXML object,
|
14
|
+
# its string is "<unknown>", and you cannot add
|
15
|
+
# a child to it
|
16
|
+
#
|
17
|
+
# new or changed methods:
|
18
|
+
#-----------------------
|
19
|
+
#
|
20
|
+
# is_outside_sentence? returns true
|
21
|
+
#
|
22
|
+
# word returns "<unknown>"
|
23
|
+
#
|
24
|
+
# add_child raises an error
|
25
|
+
class TSSynNode < SynNode
|
26
|
+
|
27
|
+
###
|
28
|
+
def initialize(id_string)
|
29
|
+
super(RegXML.new("<OTHER_SENTENCE id='" + id_string + "'/>"))
|
30
|
+
end
|
31
|
+
|
32
|
+
###
|
33
|
+
def is_outside_sentence?
|
34
|
+
return true
|
35
|
+
end
|
36
|
+
|
37
|
+
###
|
38
|
+
# word of this node: <unknown>
|
39
|
+
def word
|
40
|
+
return "<unknown>"
|
41
|
+
end
|
42
|
+
|
43
|
+
def add_child(arg1, arg2)
|
44
|
+
raise "Not implemented for this class"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require_relative 'salsa_tiger_xml_node'
|
2
|
+
|
3
|
+
module STXML
|
4
|
+
#############
|
5
|
+
# class UspNode
|
6
|
+
#
|
7
|
+
# inherits from SalsaTigerXmlNode,
|
8
|
+
# adds to it methods specific to nodes
|
9
|
+
# that describe a frame underspecification or frame element underspecification
|
10
|
+
#
|
11
|
+
# additional/changed methods:
|
12
|
+
#----------------------------
|
13
|
+
#
|
14
|
+
# new initializes the object
|
15
|
+
# rexml_object: underlying XML object for this node
|
16
|
+
# frame_or_fe: string, either "frame" for frame underspecification
|
17
|
+
# or "fe" for frame element underspecification
|
18
|
+
#
|
19
|
+
# add_child, remove_child add, remove underspecification entry
|
20
|
+
class UspNode < SalsaTigerXmlNode
|
21
|
+
|
22
|
+
attr_reader :i_am
|
23
|
+
|
24
|
+
###
|
25
|
+
def initialize(xml_obj, # RegXMl object
|
26
|
+
frame_or_fe) # string "frame" or "fe"
|
27
|
+
|
28
|
+
super(xml_obj)
|
29
|
+
case frame_or_fe
|
30
|
+
when "frame"
|
31
|
+
@i_am = "frame"
|
32
|
+
when "fe"
|
33
|
+
@i_am = "fe"
|
34
|
+
else
|
35
|
+
raise "new: neither frame nor fe??"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
###
|
40
|
+
def add_child(node, varhash={})
|
41
|
+
if node
|
42
|
+
super(node, nil, "pointer_insteadof_edge" => true)
|
43
|
+
else
|
44
|
+
raise "Got nil for a node."
|
45
|
+
end
|
46
|
+
|
47
|
+
# set usp. attribute on child
|
48
|
+
node.set_attribute("usp", "yes")
|
49
|
+
end
|
50
|
+
|
51
|
+
###
|
52
|
+
def remove_child(node, varhash={})
|
53
|
+
super(node, nil, "pointer_insteadof_edge" => true)
|
54
|
+
|
55
|
+
# removing "usp" attribute on child
|
56
|
+
# this will be wrong if the child is involved in more
|
57
|
+
# than one instance of underspecification!
|
58
|
+
|
59
|
+
$stderr.puts "Warning: unsafe removal of attribute 'usp'"
|
60
|
+
node.del_attribute("usp")
|
61
|
+
end
|
62
|
+
|
63
|
+
#############
|
64
|
+
protected
|
65
|
+
|
66
|
+
def get_xml_ofchildren
|
67
|
+
return children.map { |child|
|
68
|
+
"<uspitem idref=\'#{xml_secure_val(child.id)}\'/>\n"
|
69
|
+
}.join
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
require_relative 'tree_node'
|
2
|
+
|
3
|
+
module STXML
|
4
|
+
#############
|
5
|
+
# class XMLNode
|
6
|
+
#
|
7
|
+
# node with entries pointing to its children
|
8
|
+
# as well as its parent.
|
9
|
+
# all edges may be labeled.
|
10
|
+
# each node has a unique ID.
|
11
|
+
#
|
12
|
+
# indexes a string with XML data representing the same node,
|
13
|
+
# but does not look into it, just keeps it
|
14
|
+
#
|
15
|
+
# methods:
|
16
|
+
# This class inherits from TreeNode and GraphNode.
|
17
|
+
# See Tree.rb and Graph.rb for the methods they offer.
|
18
|
+
#
|
19
|
+
# new initializes the object
|
20
|
+
#
|
21
|
+
# get returns the XML object representing
|
22
|
+
# the same node as this node object
|
23
|
+
#
|
24
|
+
class XMLNode < TreeNode
|
25
|
+
|
26
|
+
# @param name [String] element name; or, for text, the whole text
|
27
|
+
# @param attribute [Hash] attr_name(string) -> attr_value(string)
|
28
|
+
# @param id [String] node ID
|
29
|
+
# @param i_am_text [false, true] set to anything but false or nil to represent
|
30
|
+
# not an xml element but text
|
31
|
+
def initialize(name, attribute, id, i_am_text = false)
|
32
|
+
|
33
|
+
if id.nil?
|
34
|
+
# I wasn't given any ID
|
35
|
+
# take system time for an ID
|
36
|
+
# use to_f to get fractions of seconds too:
|
37
|
+
# If I make several nodes in the same second,
|
38
|
+
# they should still have unique IDs
|
39
|
+
id = Time.new.to_f.to_s
|
40
|
+
end
|
41
|
+
|
42
|
+
super(id)
|
43
|
+
|
44
|
+
# remember values for this element
|
45
|
+
set_f("name", name)
|
46
|
+
set_f("attributes", attribute)
|
47
|
+
set_f("i_am_text", i_am_text)
|
48
|
+
|
49
|
+
# sanity check
|
50
|
+
if i_am_text and attributes
|
51
|
+
raise "A text element cannot have attributes"
|
52
|
+
end
|
53
|
+
|
54
|
+
@kith = []
|
55
|
+
end
|
56
|
+
|
57
|
+
###
|
58
|
+
# add sanity check:
|
59
|
+
# if this is text rather than an xml element,
|
60
|
+
# it cannot have children
|
61
|
+
def add_child(child, edgelabel, varhash={})
|
62
|
+
if get_f("i_am_text")
|
63
|
+
raise "A text element cannot have children"
|
64
|
+
end
|
65
|
+
super(child, edgelabel, varhash)
|
66
|
+
end
|
67
|
+
|
68
|
+
###
|
69
|
+
def add_kith(xml) # RegXML object
|
70
|
+
@kith << xml
|
71
|
+
end
|
72
|
+
|
73
|
+
###
|
74
|
+
# set attribute
|
75
|
+
# @param value [String]
|
76
|
+
def set_attribute(name, value)
|
77
|
+
unless value.class == String
|
78
|
+
raise "I can only set attribute values to strings. Got: #{value.class}."
|
79
|
+
end
|
80
|
+
|
81
|
+
if get_f("attributes").nil?
|
82
|
+
set_f("attributes", {})
|
83
|
+
end
|
84
|
+
get_f("attributes")[name] = value
|
85
|
+
end
|
86
|
+
|
87
|
+
###
|
88
|
+
def get_attribute(name)
|
89
|
+
if get_f("attributes")
|
90
|
+
return get_f("attributes")[name]
|
91
|
+
else
|
92
|
+
return nil
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
###
|
97
|
+
# delete attribute
|
98
|
+
def del_attribute(name)
|
99
|
+
if get_f("attributes")
|
100
|
+
get_f("attributes").delete(name)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
###
|
105
|
+
# return XML as string:
|
106
|
+
# If this is a text, just return the text
|
107
|
+
# which is stored in "name"
|
108
|
+
# If this is an XMl element,
|
109
|
+
# make a tag from its name and attributes,
|
110
|
+
# then add tags for all its children,
|
111
|
+
# then add an end tag.
|
112
|
+
def get
|
113
|
+
if get_f("i_am_text")
|
114
|
+
# text rather than XML element
|
115
|
+
return get_f("name")
|
116
|
+
else
|
117
|
+
# XMl element, not text
|
118
|
+
string = "<" + get_f("name")
|
119
|
+
if get_f("attributes")
|
120
|
+
string << get_f("attributes").to_a.map { |name, value|
|
121
|
+
" " + name + "=\'" + xml_secure_val(value) + "\'"
|
122
|
+
}.join
|
123
|
+
end
|
124
|
+
string << ">\n"
|
125
|
+
string << get_xml_embedded
|
126
|
+
string << "</#{get_f("name")}>\n"
|
127
|
+
return string
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
#############
|
132
|
+
protected
|
133
|
+
|
134
|
+
def get_xml_embedded
|
135
|
+
return get_xml_ofchildren +
|
136
|
+
get_xml_ofkith
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
def get_xml_ofchildren
|
141
|
+
return children.map { |child|
|
142
|
+
child.get
|
143
|
+
}.join
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
def get_xml_ofkith
|
148
|
+
return @kith.map { |thing| thing.to_s + "\n" }.join
|
149
|
+
end
|
150
|
+
|
151
|
+
|
152
|
+
###
|
153
|
+
def warn_child_ignored(where, xml_node)
|
154
|
+
$stderr.puts "WARNING: additional material found in #{where}, will be ignored:"
|
155
|
+
$stderr.puts "\t" + xml_node.to_s
|
156
|
+
end
|
157
|
+
|
158
|
+
###
|
159
|
+
def xml_secure_val(value) # string: value of an attribute
|
160
|
+
value.gsub(/'/, "'").gsub(/"/, "''")
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|