shalmaneser-lib 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +10 -0
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +122 -0
  6. data/lib/configuration/config_data.rb +457 -0
  7. data/lib/configuration/config_format_element.rb +210 -0
  8. data/lib/configuration/configuration_error.rb +15 -0
  9. data/lib/configuration/external_config_data.rb +56 -0
  10. data/lib/configuration/frappe_config_data.rb +134 -0
  11. data/lib/configuration/fred_config_data.rb +199 -0
  12. data/lib/configuration/rosy_config_data.rb +126 -0
  13. data/lib/db/db_interface.rb +50 -0
  14. data/lib/db/db_mysql.rb +141 -0
  15. data/lib/db/db_sqlite.rb +280 -0
  16. data/lib/db/db_table.rb +237 -0
  17. data/lib/db/db_view.rb +416 -0
  18. data/lib/db/db_wrapper.rb +175 -0
  19. data/lib/db/select_table_and_columns.rb +10 -0
  20. data/lib/db/sql_query.rb +243 -0
  21. data/lib/definitions.rb +19 -0
  22. data/lib/eval.rb +482 -0
  23. data/lib/ext/maxent/Classify.class +0 -0
  24. data/lib/ext/maxent/Train.class +0 -0
  25. data/lib/external_systems.rb +251 -0
  26. data/lib/framenet_format/fn_corpus_aset.rb +209 -0
  27. data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
  28. data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
  29. data/lib/framenet_format/fn_database.rb +143 -0
  30. data/lib/framenet_format/frame_xml_file.rb +104 -0
  31. data/lib/framenet_format/frame_xml_sentence.rb +411 -0
  32. data/lib/logging.rb +25 -0
  33. data/lib/ml/classifier.rb +189 -0
  34. data/lib/ml/mallet.rb +236 -0
  35. data/lib/ml/maxent.rb +229 -0
  36. data/lib/ml/optimize.rb +195 -0
  37. data/lib/ml/timbl.rb +140 -0
  38. data/lib/monkey_patching/array.rb +82 -0
  39. data/lib/monkey_patching/enumerable_bool.rb +24 -0
  40. data/lib/monkey_patching/enumerable_distribute.rb +18 -0
  41. data/lib/monkey_patching/file.rb +131 -0
  42. data/lib/monkey_patching/subsumed.rb +24 -0
  43. data/lib/ruby_class_extensions.rb +4 -0
  44. data/lib/salsa_tiger_xml/corpus.rb +24 -0
  45. data/lib/salsa_tiger_xml/fe_node.rb +98 -0
  46. data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
  47. data/lib/salsa_tiger_xml/frame_node.rb +145 -0
  48. data/lib/salsa_tiger_xml/graph_node.rb +347 -0
  49. data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
  50. data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
  51. data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
  52. data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
  53. data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
  54. data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
  55. data/lib/salsa_tiger_xml/sem_node.rb +58 -0
  56. data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
  57. data/lib/salsa_tiger_xml/syn_node.rb +169 -0
  58. data/lib/salsa_tiger_xml/tree_node.rb +59 -0
  59. data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
  60. data/lib/salsa_tiger_xml/usp_node.rb +72 -0
  61. data/lib/salsa_tiger_xml/xml_node.rb +163 -0
  62. data/lib/shalmaneser/lib.rb +1 -0
  63. data/lib/tabular_format/fn_tab_format_file.rb +38 -0
  64. data/lib/tabular_format/fn_tab_frame.rb +67 -0
  65. data/lib/tabular_format/fn_tab_sentence.rb +169 -0
  66. data/lib/tabular_format/tab_format_file.rb +91 -0
  67. data/lib/tabular_format/tab_format_named_args.rb +184 -0
  68. data/lib/tabular_format/tab_format_sentence.rb +119 -0
  69. data/lib/value_restriction.rb +49 -0
  70. metadata +131 -0
@@ -0,0 +1,169 @@
1
+ require_relative 'salsa_tiger_xml_node'
2
+
3
+ module STXML
4
+ #############
5
+ # class SynNode
6
+ #
7
+ # inherits from SalsaTigerXmlNode,
8
+ # adds to it methods specific to nodes
9
+ # that describe the syntactic structure
10
+ #
11
+ # additional/changed methods:
12
+ #
13
+ # part_of_speech part_of_speech information as a string,
14
+ # nil for anything but terminal nodes
15
+ #
16
+ # word word information for this node as a string,
17
+ # nil for anything but terminal nodes
18
+ #
19
+ # category category information for this node as a string,
20
+ # nil for anything but nonterminal nodes
21
+ #
22
+ # is_punct? true if this is a terminal node and it is a punctuation sign
23
+ #
24
+ # get_sem add a non-tree edge from this syntactic node to a semantic node
25
+ # Idea: this is basically the inverse of the edge pointing from
26
+ # the FeNode to this SynNode, so you can fetch a node's semantics directly
27
+ #
28
+ # add_sem add non-tree edge from this syntactic node to a FeNode
29
+ class SynNode < SalsaTigerXmlNode
30
+
31
+ ###
32
+ def initialize(xml)
33
+ super(xml)
34
+
35
+ @sem = []
36
+ @other_links = []
37
+ end
38
+
39
+ ###
40
+ def add_link(other_node, # SynNode
41
+ link_label, # string: edge label
42
+ attributes = {}) # hash string>string: further attribute-value pairs for the edge
43
+
44
+ @other_links << [link_label, other_node, attributes]
45
+ end
46
+
47
+ ###
48
+ def get_linked(label = nil) # string/nil: if string, use only linked with this link_label
49
+ if label
50
+ return @other_links.select { |label_node_attr| label_node_attr.first == label }
51
+ else
52
+ return @other_links
53
+ end
54
+ end
55
+
56
+ ###
57
+ def part_of_speech
58
+ if get_attribute("pos")
59
+ return get_attribute("pos").strip
60
+ else
61
+ return nil
62
+ end
63
+ end
64
+
65
+ ###
66
+ def category
67
+ if get_attribute("cat")
68
+ return get_attribute("cat").strip
69
+ else
70
+ return nil
71
+ end
72
+ end
73
+
74
+ ###
75
+ def word
76
+ if get_attribute("word")
77
+ return get_attribute("word").strip
78
+ else
79
+ return nil
80
+ end
81
+ end
82
+
83
+ ###
84
+ def is_punct?
85
+ if is_nonterminal?
86
+ # only terminals can be punctuation signs
87
+ return false
88
+ end
89
+
90
+ # next check part of speech
91
+ # this works at least for TIGER corpus annotation
92
+ case part_of_speech
93
+ when '$.', '$,', '$('
94
+ return true
95
+ end
96
+ if part_of_speech =~ /^PUNC/
97
+ return true
98
+ end
99
+
100
+ # known punctuation signs: filtered out for determining maximal constituents
101
+
102
+ # no luck with part of speech:
103
+ # check word
104
+ case word
105
+ when ".", ";", ",", ":", "?", "!", "(", ")", "[", "]", "{", "}", "-", "''", "``", "\"", "'"
106
+ return true
107
+ end
108
+
109
+ # not a punctuation sign by any of the tests we have applied
110
+ return false
111
+ end
112
+
113
+ ###
114
+ def to_s
115
+ if is_terminal?
116
+ return word
117
+ else
118
+ return super()
119
+ end
120
+ end
121
+
122
+ ###
123
+ def get_sem
124
+ return @sem.clone
125
+ end
126
+
127
+ ###
128
+ def add_sem(fe_node)
129
+ unless fe_node.class == FeNode
130
+ raise "Unexpected class of semantic node: was expecting an FeNode"
131
+ end
132
+
133
+ @sem << fe_node
134
+ end
135
+
136
+ #############
137
+ protected
138
+
139
+ def get_xml_ofchildren
140
+ string = ""
141
+
142
+ each_child_with_edgelabel { |label, child|
143
+ unless child.is_splitword?
144
+ # terminal or nonterminal child.
145
+ # splitwords are handled separately in the "sem" part of the sentence
146
+ if label
147
+ string << "<edge label=\'#{xml_secure_val(label)}\' idref=\'#{xml_secure_val(child.id)}\'/>\n"
148
+ else
149
+ string << "<edge label=\'-\' idref=\'#{xml_secure_val(child.id)}\'/>\n"
150
+ end
151
+ end
152
+ }
153
+ @other_links.each { |label, node, attributes|
154
+ if label
155
+ string << "<other_edge label=\'#{xml_secure_val(label)}\'"
156
+ else
157
+ string << "<other_edge label=\'-\'"
158
+ end
159
+ string << " idref=\'#{xml_secure_val(node.id)}\'"
160
+ if attributes
161
+ string << " " + attributes.to_a.map { |attr, val| "#{xml_secure_val(attr)}=\'#{xml_secure_val(val)}\'" }.join(" ")
162
+ end
163
+ string << "/>\n"
164
+ }
165
+
166
+ return string
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,59 @@
1
+ require_relative 'graph_node'
2
+
3
+ module STXML
4
+ class TreeNode < GraphNode
5
+
6
+ def initialize(id)
7
+ super(id)
8
+ end
9
+
10
+ # redo the ancestor-related methods,
11
+ # since here we only have one parent per node
12
+ def parent
13
+ retv = parents
14
+ if retv.nil?
15
+ return nil
16
+ else
17
+ return retv.first
18
+ end
19
+ end
20
+
21
+ def parent_label
22
+ retv = parent_labels
23
+ if retv.nil?
24
+ return nil
25
+ else
26
+ return retv.first
27
+ end
28
+ end
29
+
30
+ def parent_with_edgelabel
31
+ retv = parents_with_edgelabel
32
+
33
+ if retv.nil?
34
+ return nil
35
+ else
36
+ return retv.first
37
+ end
38
+ end
39
+
40
+ def add_parent(parent, edgelabel, varhash = {})
41
+ set_parent(parent, edgelabel, varhash)
42
+ end
43
+
44
+ def set_parent(parent, edgelabel, varhash = {})
45
+ # remove old parent
46
+ each_parent_with_edgelabel { |label, p| remove_parent(p, label, varhash) }
47
+
48
+ # set new parent
49
+ @parents << [edgelabel, parent]
50
+
51
+ # and vice versa: add self as child to parent
52
+ unless varhash["pointer_insteadof_edge"]
53
+ unless parent.children_with_edgelabel.include? [edgelabel, self]
54
+ parent.add_child(self, edgelabel)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,47 @@
1
+ require_relative 'syn_node'
2
+ require_relative 'reg_xml'
3
+
4
+ module STXML
5
+ #############
6
+ # class TSSynNode
7
+ #
8
+ # inherits from SynNode
9
+ #
10
+ # describes a syntactic node that isn't really there:
11
+ # a reference to a node in another sentence
12
+ #
13
+ # contains that node's ID, but an empty RegXML object,
14
+ # its string is "<unknown>", and you cannot add
15
+ # a child to it
16
+ #
17
+ # new or changed methods:
18
+ #-----------------------
19
+ #
20
+ # is_outside_sentence? returns true
21
+ #
22
+ # word returns "<unknown>"
23
+ #
24
+ # add_child raises an error
25
+ class TSSynNode < SynNode
26
+
27
+ ###
28
+ def initialize(id_string)
29
+ super(RegXML.new("<OTHER_SENTENCE id='" + id_string + "'/>"))
30
+ end
31
+
32
+ ###
33
+ def is_outside_sentence?
34
+ return true
35
+ end
36
+
37
+ ###
38
+ # word of this node: <unknown>
39
+ def word
40
+ return "<unknown>"
41
+ end
42
+
43
+ def add_child(arg1, arg2)
44
+ raise "Not implemented for this class"
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,72 @@
1
+ require_relative 'salsa_tiger_xml_node'
2
+
3
+ module STXML
4
+ #############
5
+ # class UspNode
6
+ #
7
+ # inherits from SalsaTigerXmlNode,
8
+ # adds to it methods specific to nodes
9
+ # that describe a frame underspecification or frame element underspecification
10
+ #
11
+ # additional/changed methods:
12
+ #----------------------------
13
+ #
14
+ # new initializes the object
15
+ # rexml_object: underlying XML object for this node
16
+ # frame_or_fe: string, either "frame" for frame underspecification
17
+ # or "fe" for frame element underspecification
18
+ #
19
+ # add_child, remove_child add, remove underspecification entry
20
+ class UspNode < SalsaTigerXmlNode
21
+
22
+ attr_reader :i_am
23
+
24
+ ###
25
+ def initialize(xml_obj, # RegXMl object
26
+ frame_or_fe) # string "frame" or "fe"
27
+
28
+ super(xml_obj)
29
+ case frame_or_fe
30
+ when "frame"
31
+ @i_am = "frame"
32
+ when "fe"
33
+ @i_am = "fe"
34
+ else
35
+ raise "new: neither frame nor fe??"
36
+ end
37
+ end
38
+
39
+ ###
40
+ def add_child(node, varhash={})
41
+ if node
42
+ super(node, nil, "pointer_insteadof_edge" => true)
43
+ else
44
+ raise "Got nil for a node."
45
+ end
46
+
47
+ # set usp. attribute on child
48
+ node.set_attribute("usp", "yes")
49
+ end
50
+
51
+ ###
52
+ def remove_child(node, varhash={})
53
+ super(node, nil, "pointer_insteadof_edge" => true)
54
+
55
+ # removing "usp" attribute on child
56
+ # this will be wrong if the child is involved in more
57
+ # than one instance of underspecification!
58
+
59
+ $stderr.puts "Warning: unsafe removal of attribute 'usp'"
60
+ node.del_attribute("usp")
61
+ end
62
+
63
+ #############
64
+ protected
65
+
66
+ def get_xml_ofchildren
67
+ return children.map { |child|
68
+ "<uspitem idref=\'#{xml_secure_val(child.id)}\'/>\n"
69
+ }.join
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,163 @@
1
+ require_relative 'tree_node'
2
+
3
+ module STXML
4
+ #############
5
+ # class XMLNode
6
+ #
7
+ # node with entries pointing to its children
8
+ # as well as its parent.
9
+ # all edges may be labeled.
10
+ # each node has a unique ID.
11
+ #
12
+ # indexes a string with XML data representing the same node,
13
+ # but does not look into it, just keeps it
14
+ #
15
+ # methods:
16
+ # This class inherits from TreeNode and GraphNode.
17
+ # See Tree.rb and Graph.rb for the methods they offer.
18
+ #
19
+ # new initializes the object
20
+ #
21
+ # get returns the XML object representing
22
+ # the same node as this node object
23
+ #
24
+ class XMLNode < TreeNode
25
+
26
+ # @param name [String] element name; or, for text, the whole text
27
+ # @param attribute [Hash] attr_name(string) -> attr_value(string)
28
+ # @param id [String] node ID
29
+ # @param i_am_text [false, true] set to anything but false or nil to represent
30
+ # not an xml element but text
31
+ def initialize(name, attribute, id, i_am_text = false)
32
+
33
+ if id.nil?
34
+ # I wasn't given any ID
35
+ # take system time for an ID
36
+ # use to_f to get fractions of seconds too:
37
+ # If I make several nodes in the same second,
38
+ # they should still have unique IDs
39
+ id = Time.new.to_f.to_s
40
+ end
41
+
42
+ super(id)
43
+
44
+ # remember values for this element
45
+ set_f("name", name)
46
+ set_f("attributes", attribute)
47
+ set_f("i_am_text", i_am_text)
48
+
49
+ # sanity check
50
+ if i_am_text and attributes
51
+ raise "A text element cannot have attributes"
52
+ end
53
+
54
+ @kith = []
55
+ end
56
+
57
+ ###
58
+ # add sanity check:
59
+ # if this is text rather than an xml element,
60
+ # it cannot have children
61
+ def add_child(child, edgelabel, varhash={})
62
+ if get_f("i_am_text")
63
+ raise "A text element cannot have children"
64
+ end
65
+ super(child, edgelabel, varhash)
66
+ end
67
+
68
+ ###
69
+ def add_kith(xml) # RegXML object
70
+ @kith << xml
71
+ end
72
+
73
+ ###
74
+ # set attribute
75
+ # @param value [String]
76
+ def set_attribute(name, value)
77
+ unless value.class == String
78
+ raise "I can only set attribute values to strings. Got: #{value.class}."
79
+ end
80
+
81
+ if get_f("attributes").nil?
82
+ set_f("attributes", {})
83
+ end
84
+ get_f("attributes")[name] = value
85
+ end
86
+
87
+ ###
88
+ def get_attribute(name)
89
+ if get_f("attributes")
90
+ return get_f("attributes")[name]
91
+ else
92
+ return nil
93
+ end
94
+ end
95
+
96
+ ###
97
+ # delete attribute
98
+ def del_attribute(name)
99
+ if get_f("attributes")
100
+ get_f("attributes").delete(name)
101
+ end
102
+ end
103
+
104
+ ###
105
+ # return XML as string:
106
+ # If this is a text, just return the text
107
+ # which is stored in "name"
108
+ # If this is an XMl element,
109
+ # make a tag from its name and attributes,
110
+ # then add tags for all its children,
111
+ # then add an end tag.
112
+ def get
113
+ if get_f("i_am_text")
114
+ # text rather than XML element
115
+ return get_f("name")
116
+ else
117
+ # XMl element, not text
118
+ string = "<" + get_f("name")
119
+ if get_f("attributes")
120
+ string << get_f("attributes").to_a.map { |name, value|
121
+ " " + name + "=\'" + xml_secure_val(value) + "\'"
122
+ }.join
123
+ end
124
+ string << ">\n"
125
+ string << get_xml_embedded
126
+ string << "</#{get_f("name")}>\n"
127
+ return string
128
+ end
129
+ end
130
+
131
+ #############
132
+ protected
133
+
134
+ def get_xml_embedded
135
+ return get_xml_ofchildren +
136
+ get_xml_ofkith
137
+ end
138
+
139
+
140
+ def get_xml_ofchildren
141
+ return children.map { |child|
142
+ child.get
143
+ }.join
144
+ end
145
+
146
+
147
+ def get_xml_ofkith
148
+ return @kith.map { |thing| thing.to_s + "\n" }.join
149
+ end
150
+
151
+
152
+ ###
153
+ def warn_child_ignored(where, xml_node)
154
+ $stderr.puts "WARNING: additional material found in #{where}, will be ignored:"
155
+ $stderr.puts "\t" + xml_node.to_s
156
+ end
157
+
158
+ ###
159
+ def xml_secure_val(value) # string: value of an attribute
160
+ value.gsub(/'/, "&apos;").gsub(/"/, "&apos;&apos;")
161
+ end
162
+ end
163
+ end