shalmaneser-lib 1.2.rc5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +10 -0
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +122 -0
  6. data/lib/configuration/config_data.rb +457 -0
  7. data/lib/configuration/config_format_element.rb +210 -0
  8. data/lib/configuration/configuration_error.rb +15 -0
  9. data/lib/configuration/external_config_data.rb +56 -0
  10. data/lib/configuration/frappe_config_data.rb +134 -0
  11. data/lib/configuration/fred_config_data.rb +199 -0
  12. data/lib/configuration/rosy_config_data.rb +126 -0
  13. data/lib/db/db_interface.rb +50 -0
  14. data/lib/db/db_mysql.rb +141 -0
  15. data/lib/db/db_sqlite.rb +280 -0
  16. data/lib/db/db_table.rb +237 -0
  17. data/lib/db/db_view.rb +416 -0
  18. data/lib/db/db_wrapper.rb +175 -0
  19. data/lib/db/select_table_and_columns.rb +10 -0
  20. data/lib/db/sql_query.rb +243 -0
  21. data/lib/definitions.rb +19 -0
  22. data/lib/eval.rb +482 -0
  23. data/lib/ext/maxent/Classify.class +0 -0
  24. data/lib/ext/maxent/Train.class +0 -0
  25. data/lib/external_systems.rb +251 -0
  26. data/lib/framenet_format/fn_corpus_aset.rb +209 -0
  27. data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
  28. data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
  29. data/lib/framenet_format/fn_database.rb +143 -0
  30. data/lib/framenet_format/frame_xml_file.rb +104 -0
  31. data/lib/framenet_format/frame_xml_sentence.rb +411 -0
  32. data/lib/logging.rb +25 -0
  33. data/lib/ml/classifier.rb +189 -0
  34. data/lib/ml/mallet.rb +236 -0
  35. data/lib/ml/maxent.rb +229 -0
  36. data/lib/ml/optimize.rb +195 -0
  37. data/lib/ml/timbl.rb +140 -0
  38. data/lib/monkey_patching/array.rb +82 -0
  39. data/lib/monkey_patching/enumerable_bool.rb +24 -0
  40. data/lib/monkey_patching/enumerable_distribute.rb +18 -0
  41. data/lib/monkey_patching/file.rb +131 -0
  42. data/lib/monkey_patching/subsumed.rb +24 -0
  43. data/lib/ruby_class_extensions.rb +4 -0
  44. data/lib/salsa_tiger_xml/corpus.rb +24 -0
  45. data/lib/salsa_tiger_xml/fe_node.rb +98 -0
  46. data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
  47. data/lib/salsa_tiger_xml/frame_node.rb +145 -0
  48. data/lib/salsa_tiger_xml/graph_node.rb +347 -0
  49. data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
  50. data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
  51. data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
  52. data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
  53. data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
  54. data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
  55. data/lib/salsa_tiger_xml/sem_node.rb +58 -0
  56. data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
  57. data/lib/salsa_tiger_xml/syn_node.rb +169 -0
  58. data/lib/salsa_tiger_xml/tree_node.rb +59 -0
  59. data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
  60. data/lib/salsa_tiger_xml/usp_node.rb +72 -0
  61. data/lib/salsa_tiger_xml/xml_node.rb +163 -0
  62. data/lib/shalmaneser/lib.rb +1 -0
  63. data/lib/tabular_format/fn_tab_format_file.rb +38 -0
  64. data/lib/tabular_format/fn_tab_frame.rb +67 -0
  65. data/lib/tabular_format/fn_tab_sentence.rb +169 -0
  66. data/lib/tabular_format/tab_format_file.rb +91 -0
  67. data/lib/tabular_format/tab_format_named_args.rb +184 -0
  68. data/lib/tabular_format/tab_format_sentence.rb +119 -0
  69. data/lib/value_restriction.rb +49 -0
  70. metadata +131 -0
@@ -0,0 +1,169 @@
1
+ require_relative 'salsa_tiger_xml_node'
2
+
3
+ module STXML
4
+ #############
5
+ # class SynNode
6
+ #
7
+ # inherits from SalsaTigerXmlNode,
8
+ # adds to it methods specific to nodes
9
+ # that describe the syntactic structure
10
+ #
11
+ # additional/changed methods:
12
+ #
13
+ # part_of_speech part_of_speech information as a string,
14
+ # nil for anything but terminal nodes
15
+ #
16
+ # word word information for this node as a string,
17
+ # nil for anything but terminal nodes
18
+ #
19
+ # category category information for this node as a string,
20
+ # nil for anything but nonterminal nodes
21
+ #
22
+ # is_punct? true if this is a terminal node and it is a punctuation sign
23
+ #
24
+ # get_sem add a non-tree edge from this syntactic node to a semantic node
25
+ # Idea: this is basically the inverse of the edge pointing from
26
+ # the FeNode to this SynNode, so you can fetch a node's semantics directly
27
+ #
28
+ # add_sem add non-tree edge from this syntactic node to a FeNode
29
+ class SynNode < SalsaTigerXmlNode
30
+
31
+ ###
32
+ def initialize(xml)
33
+ super(xml)
34
+
35
+ @sem = []
36
+ @other_links = []
37
+ end
38
+
39
+ ###
40
+ def add_link(other_node, # SynNode
41
+ link_label, # string: edge label
42
+ attributes = {}) # hash string>string: further attribute-value pairs for the edge
43
+
44
+ @other_links << [link_label, other_node, attributes]
45
+ end
46
+
47
+ ###
48
+ def get_linked(label = nil) # string/nil: if string, use only linked with this link_label
49
+ if label
50
+ return @other_links.select { |label_node_attr| label_node_attr.first == label }
51
+ else
52
+ return @other_links
53
+ end
54
+ end
55
+
56
+ ###
57
+ def part_of_speech
58
+ if get_attribute("pos")
59
+ return get_attribute("pos").strip
60
+ else
61
+ return nil
62
+ end
63
+ end
64
+
65
+ ###
66
+ def category
67
+ if get_attribute("cat")
68
+ return get_attribute("cat").strip
69
+ else
70
+ return nil
71
+ end
72
+ end
73
+
74
+ ###
75
+ def word
76
+ if get_attribute("word")
77
+ return get_attribute("word").strip
78
+ else
79
+ return nil
80
+ end
81
+ end
82
+
83
+ ###
84
+ def is_punct?
85
+ if is_nonterminal?
86
+ # only terminals can be punctuation signs
87
+ return false
88
+ end
89
+
90
+ # next check part of speech
91
+ # this works at least for TIGER corpus annotation
92
+ case part_of_speech
93
+ when '$.', '$,', '$('
94
+ return true
95
+ end
96
+ if part_of_speech =~ /^PUNC/
97
+ return true
98
+ end
99
+
100
+ # known punctuation signs: filtered out for determining maximal constituents
101
+
102
+ # no luck with part of speech:
103
+ # check word
104
+ case word
105
+ when ".", ";", ",", ":", "?", "!", "(", ")", "[", "]", "{", "}", "-", "''", "``", "\"", "'"
106
+ return true
107
+ end
108
+
109
+ # not a punctuation sign by any of the tests we have applied
110
+ return false
111
+ end
112
+
113
+ ###
114
+ def to_s
115
+ if is_terminal?
116
+ return word
117
+ else
118
+ return super()
119
+ end
120
+ end
121
+
122
+ ###
123
+ def get_sem
124
+ return @sem.clone
125
+ end
126
+
127
+ ###
128
+ def add_sem(fe_node)
129
+ unless fe_node.class == FeNode
130
+ raise "Unexpected class of semantic node: was expecting an FeNode"
131
+ end
132
+
133
+ @sem << fe_node
134
+ end
135
+
136
+ #############
137
+ protected
138
+
139
+ def get_xml_ofchildren
140
+ string = ""
141
+
142
+ each_child_with_edgelabel { |label, child|
143
+ unless child.is_splitword?
144
+ # terminal or nonterminal child.
145
+ # splitwords are handled separately in the "sem" part of the sentence
146
+ if label
147
+ string << "<edge label=\'#{xml_secure_val(label)}\' idref=\'#{xml_secure_val(child.id)}\'/>\n"
148
+ else
149
+ string << "<edge label=\'-\' idref=\'#{xml_secure_val(child.id)}\'/>\n"
150
+ end
151
+ end
152
+ }
153
+ @other_links.each { |label, node, attributes|
154
+ if label
155
+ string << "<other_edge label=\'#{xml_secure_val(label)}\'"
156
+ else
157
+ string << "<other_edge label=\'-\'"
158
+ end
159
+ string << " idref=\'#{xml_secure_val(node.id)}\'"
160
+ if attributes
161
+ string << " " + attributes.to_a.map { |attr, val| "#{xml_secure_val(attr)}=\'#{xml_secure_val(val)}\'" }.join(" ")
162
+ end
163
+ string << "/>\n"
164
+ }
165
+
166
+ return string
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,59 @@
1
+ require_relative 'graph_node'
2
+
3
+ module STXML
4
+ class TreeNode < GraphNode
5
+
6
+ def initialize(id)
7
+ super(id)
8
+ end
9
+
10
+ # redo the ancestor-related methods,
11
+ # since here we only have one parent per node
12
+ def parent
13
+ retv = parents
14
+ if retv.nil?
15
+ return nil
16
+ else
17
+ return retv.first
18
+ end
19
+ end
20
+
21
+ def parent_label
22
+ retv = parent_labels
23
+ if retv.nil?
24
+ return nil
25
+ else
26
+ return retv.first
27
+ end
28
+ end
29
+
30
+ def parent_with_edgelabel
31
+ retv = parents_with_edgelabel
32
+
33
+ if retv.nil?
34
+ return nil
35
+ else
36
+ return retv.first
37
+ end
38
+ end
39
+
40
+ def add_parent(parent, edgelabel, varhash = {})
41
+ set_parent(parent, edgelabel, varhash)
42
+ end
43
+
44
+ def set_parent(parent, edgelabel, varhash = {})
45
+ # remove old parent
46
+ each_parent_with_edgelabel { |label, p| remove_parent(p, label, varhash) }
47
+
48
+ # set new parent
49
+ @parents << [edgelabel, parent]
50
+
51
+ # and vice versa: add self as child to parent
52
+ unless varhash["pointer_insteadof_edge"]
53
+ unless parent.children_with_edgelabel.include? [edgelabel, self]
54
+ parent.add_child(self, edgelabel)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,47 @@
1
+ require_relative 'syn_node'
2
+ require_relative 'reg_xml'
3
+
4
+ module STXML
5
+ #############
6
+ # class TSSynNode
7
+ #
8
+ # inherits from SynNode
9
+ #
10
+ # describes a syntactic node that isn't really there:
11
+ # a reference to a node in another sentence
12
+ #
13
+ # contains that node's ID, but an empty RegXML object,
14
+ # its string is "<unknown>", and you cannot add
15
+ # a child to it
16
+ #
17
+ # new or changed methods:
18
+ #-----------------------
19
+ #
20
+ # is_outside_sentence? returns true
21
+ #
22
+ # word returns "<unknown>"
23
+ #
24
+ # add_child raises an error
25
+ class TSSynNode < SynNode
26
+
27
+ ###
28
+ def initialize(id_string)
29
+ super(RegXML.new("<OTHER_SENTENCE id='" + id_string + "'/>"))
30
+ end
31
+
32
+ ###
33
+ def is_outside_sentence?
34
+ return true
35
+ end
36
+
37
+ ###
38
+ # word of this node: <unknown>
39
+ def word
40
+ return "<unknown>"
41
+ end
42
+
43
+ def add_child(arg1, arg2)
44
+ raise "Not implemented for this class"
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,72 @@
1
+ require_relative 'salsa_tiger_xml_node'
2
+
3
+ module STXML
4
+ #############
5
+ # class UspNode
6
+ #
7
+ # inherits from SalsaTigerXmlNode,
8
+ # adds to it methods specific to nodes
9
+ # that describe a frame underspecification or frame element underspecification
10
+ #
11
+ # additional/changed methods:
12
+ #----------------------------
13
+ #
14
+ # new initializes the object
15
+ # rexml_object: underlying XML object for this node
16
+ # frame_or_fe: string, either "frame" for frame underspecification
17
+ # or "fe" for frame element underspecification
18
+ #
19
+ # add_child, remove_child add, remove underspecification entry
20
+ class UspNode < SalsaTigerXmlNode
21
+
22
+ attr_reader :i_am
23
+
24
+ ###
25
+ def initialize(xml_obj, # RegXMl object
26
+ frame_or_fe) # string "frame" or "fe"
27
+
28
+ super(xml_obj)
29
+ case frame_or_fe
30
+ when "frame"
31
+ @i_am = "frame"
32
+ when "fe"
33
+ @i_am = "fe"
34
+ else
35
+ raise "new: neither frame nor fe??"
36
+ end
37
+ end
38
+
39
+ ###
40
+ def add_child(node, varhash={})
41
+ if node
42
+ super(node, nil, "pointer_insteadof_edge" => true)
43
+ else
44
+ raise "Got nil for a node."
45
+ end
46
+
47
+ # set usp. attribute on child
48
+ node.set_attribute("usp", "yes")
49
+ end
50
+
51
+ ###
52
+ def remove_child(node, varhash={})
53
+ super(node, nil, "pointer_insteadof_edge" => true)
54
+
55
+ # removing "usp" attribute on child
56
+ # this will be wrong if the child is involved in more
57
+ # than one instance of underspecification!
58
+
59
+ $stderr.puts "Warning: unsafe removal of attribute 'usp'"
60
+ node.del_attribute("usp")
61
+ end
62
+
63
+ #############
64
+ protected
65
+
66
+ def get_xml_ofchildren
67
+ return children.map { |child|
68
+ "<uspitem idref=\'#{xml_secure_val(child.id)}\'/>\n"
69
+ }.join
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,163 @@
1
+ require_relative 'tree_node'
2
+
3
+ module STXML
4
+ #############
5
+ # class XMLNode
6
+ #
7
+ # node with entries pointing to its children
8
+ # as well as its parent.
9
+ # all edges may be labeled.
10
+ # each node has a unique ID.
11
+ #
12
+ # indexes a string with XML data representing the same node,
13
+ # but does not look into it, just keeps it
14
+ #
15
+ # methods:
16
+ # This class inherits from TreeNode and GraphNode.
17
+ # See Tree.rb and Graph.rb for the methods they offer.
18
+ #
19
+ # new initializes the object
20
+ #
21
+ # get returns the XML object representing
22
+ # the same node as this node object
23
+ #
24
+ class XMLNode < TreeNode
25
+
26
+ # @param name [String] element name; or, for text, the whole text
27
+ # @param attribute [Hash] attr_name(string) -> attr_value(string)
28
+ # @param id [String] node ID
29
+ # @param i_am_text [false, true] set to anything but false or nil to represent
30
+ # not an xml element but text
31
+ def initialize(name, attribute, id, i_am_text = false)
32
+
33
+ if id.nil?
34
+ # I wasn't given any ID
35
+ # take system time for an ID
36
+ # use to_f to get fractions of seconds too:
37
+ # If I make several nodes in the same second,
38
+ # they should still have unique IDs
39
+ id = Time.new.to_f.to_s
40
+ end
41
+
42
+ super(id)
43
+
44
+ # remember values for this element
45
+ set_f("name", name)
46
+ set_f("attributes", attribute)
47
+ set_f("i_am_text", i_am_text)
48
+
49
+ # sanity check
50
+ if i_am_text and attributes
51
+ raise "A text element cannot have attributes"
52
+ end
53
+
54
+ @kith = []
55
+ end
56
+
57
+ ###
58
+ # add sanity check:
59
+ # if this is text rather than an xml element,
60
+ # it cannot have children
61
+ def add_child(child, edgelabel, varhash={})
62
+ if get_f("i_am_text")
63
+ raise "A text element cannot have children"
64
+ end
65
+ super(child, edgelabel, varhash)
66
+ end
67
+
68
+ ###
69
+ def add_kith(xml) # RegXML object
70
+ @kith << xml
71
+ end
72
+
73
+ ###
74
+ # set attribute
75
+ # @param value [String]
76
+ def set_attribute(name, value)
77
+ unless value.class == String
78
+ raise "I can only set attribute values to strings. Got: #{value.class}."
79
+ end
80
+
81
+ if get_f("attributes").nil?
82
+ set_f("attributes", {})
83
+ end
84
+ get_f("attributes")[name] = value
85
+ end
86
+
87
+ ###
88
+ def get_attribute(name)
89
+ if get_f("attributes")
90
+ return get_f("attributes")[name]
91
+ else
92
+ return nil
93
+ end
94
+ end
95
+
96
+ ###
97
+ # delete attribute
98
+ def del_attribute(name)
99
+ if get_f("attributes")
100
+ get_f("attributes").delete(name)
101
+ end
102
+ end
103
+
104
+ ###
105
+ # return XML as string:
106
+ # If this is a text, just return the text
107
+ # which is stored in "name"
108
+ # If this is an XMl element,
109
+ # make a tag from its name and attributes,
110
+ # then add tags for all its children,
111
+ # then add an end tag.
112
+ def get
113
+ if get_f("i_am_text")
114
+ # text rather than XML element
115
+ return get_f("name")
116
+ else
117
+ # XMl element, not text
118
+ string = "<" + get_f("name")
119
+ if get_f("attributes")
120
+ string << get_f("attributes").to_a.map { |name, value|
121
+ " " + name + "=\'" + xml_secure_val(value) + "\'"
122
+ }.join
123
+ end
124
+ string << ">\n"
125
+ string << get_xml_embedded
126
+ string << "</#{get_f("name")}>\n"
127
+ return string
128
+ end
129
+ end
130
+
131
+ #############
132
+ protected
133
+
134
+ def get_xml_embedded
135
+ return get_xml_ofchildren +
136
+ get_xml_ofkith
137
+ end
138
+
139
+
140
+ def get_xml_ofchildren
141
+ return children.map { |child|
142
+ child.get
143
+ }.join
144
+ end
145
+
146
+
147
+ def get_xml_ofkith
148
+ return @kith.map { |thing| thing.to_s + "\n" }.join
149
+ end
150
+
151
+
152
+ ###
153
+ def warn_child_ignored(where, xml_node)
154
+ $stderr.puts "WARNING: additional material found in #{where}, will be ignored:"
155
+ $stderr.puts "\t" + xml_node.to_s
156
+ end
157
+
158
+ ###
159
+ def xml_secure_val(value) # string: value of an attribute
160
+ value.gsub(/'/, "&apos;").gsub(/"/, "&apos;&apos;")
161
+ end
162
+ end
163
+ end