shalmaneser 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/shalmaneser +8 -2
  4. data/doc/index.md +1 -0
  5. data/lib/shalmaneser/opt_parser.rb +68 -67
  6. metadata +49 -119
  7. data/bin/fred +0 -16
  8. data/bin/frprep +0 -34
  9. data/bin/rosy +0 -17
  10. data/lib/common/AbstractSynInterface.rb +0 -1229
  11. data/lib/common/Counter.rb +0 -18
  12. data/lib/common/EnduserMode.rb +0 -27
  13. data/lib/common/Eval.rb +0 -480
  14. data/lib/common/FixSynSemMapping.rb +0 -196
  15. data/lib/common/Graph.rb +0 -345
  16. data/lib/common/ISO-8859-1.rb +0 -24
  17. data/lib/common/ML.rb +0 -186
  18. data/lib/common/Mallet.rb +0 -236
  19. data/lib/common/Maxent.rb +0 -229
  20. data/lib/common/Optimise.rb +0 -195
  21. data/lib/common/Parser.rb +0 -213
  22. data/lib/common/RegXML.rb +0 -269
  23. data/lib/common/RosyConventions.rb +0 -171
  24. data/lib/common/STXmlTerminalOrder.rb +0 -194
  25. data/lib/common/SalsaTigerRegXML.rb +0 -2347
  26. data/lib/common/SalsaTigerXMLHelper.rb +0 -99
  27. data/lib/common/SynInterfaces.rb +0 -282
  28. data/lib/common/TabFormat.rb +0 -721
  29. data/lib/common/Tiger.rb +0 -1448
  30. data/lib/common/Timbl.rb +0 -144
  31. data/lib/common/Tree.rb +0 -61
  32. data/lib/common/config_data.rb +0 -470
  33. data/lib/common/config_format_element.rb +0 -220
  34. data/lib/common/headz.rb +0 -338
  35. data/lib/common/option_parser.rb +0 -13
  36. data/lib/common/prep_config_data.rb +0 -62
  37. data/lib/common/prep_helper.rb +0 -1330
  38. data/lib/common/ruby_class_extensions.rb +0 -310
  39. data/lib/db/db_interface.rb +0 -48
  40. data/lib/db/db_mysql.rb +0 -145
  41. data/lib/db/db_sqlite.rb +0 -280
  42. data/lib/db/db_table.rb +0 -239
  43. data/lib/db/db_wrapper.rb +0 -176
  44. data/lib/db/sql_query.rb +0 -243
  45. data/lib/ext/maxent/Classify.class +0 -0
  46. data/lib/ext/maxent/Train.class +0 -0
  47. data/lib/fred/Baseline.rb +0 -150
  48. data/lib/fred/FileZipped.rb +0 -31
  49. data/lib/fred/FredBOWContext.rb +0 -877
  50. data/lib/fred/FredConventions.rb +0 -232
  51. data/lib/fred/FredDetermineTargets.rb +0 -319
  52. data/lib/fred/FredEval.rb +0 -312
  53. data/lib/fred/FredFeatureExtractors.rb +0 -322
  54. data/lib/fred/FredFeatures.rb +0 -1061
  55. data/lib/fred/FredFeaturize.rb +0 -602
  56. data/lib/fred/FredNumTrainingSenses.rb +0 -27
  57. data/lib/fred/FredParameters.rb +0 -402
  58. data/lib/fred/FredSplit.rb +0 -84
  59. data/lib/fred/FredSplitPkg.rb +0 -180
  60. data/lib/fred/FredTest.rb +0 -606
  61. data/lib/fred/FredTrain.rb +0 -144
  62. data/lib/fred/PlotAndREval.rb +0 -480
  63. data/lib/fred/fred.rb +0 -47
  64. data/lib/fred/fred_config_data.rb +0 -185
  65. data/lib/fred/md5.rb +0 -23
  66. data/lib/fred/opt_parser.rb +0 -250
  67. data/lib/frprep/Ampersand.rb +0 -39
  68. data/lib/frprep/CollinsInterface.rb +0 -1165
  69. data/lib/frprep/Counter.rb +0 -18
  70. data/lib/frprep/FNCorpusXML.rb +0 -643
  71. data/lib/frprep/FNDatabase.rb +0 -144
  72. data/lib/frprep/FrameXML.rb +0 -513
  73. data/lib/frprep/Graph.rb +0 -345
  74. data/lib/frprep/MiniparInterface.rb +0 -1388
  75. data/lib/frprep/RegXML.rb +0 -269
  76. data/lib/frprep/STXmlTerminalOrder.rb +0 -194
  77. data/lib/frprep/SleepyInterface.rb +0 -384
  78. data/lib/frprep/TntInterface.rb +0 -44
  79. data/lib/frprep/TreetaggerInterface.rb +0 -327
  80. data/lib/frprep/do_parses.rb +0 -143
  81. data/lib/frprep/frprep.rb +0 -693
  82. data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
  83. data/lib/frprep/interfaces/stanford_interface.rb +0 -353
  84. data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
  85. data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
  86. data/lib/frprep/one_parsed_file.rb +0 -28
  87. data/lib/frprep/opt_parser.rb +0 -94
  88. data/lib/frprep/ruby_class_extensions.rb +0 -310
  89. data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
  90. data/lib/rosy/ExternalConfigData.rb +0 -58
  91. data/lib/rosy/FailedParses.rb +0 -130
  92. data/lib/rosy/FeatureInfo.rb +0 -242
  93. data/lib/rosy/GfInduce.rb +0 -1115
  94. data/lib/rosy/GfInduceFeature.rb +0 -148
  95. data/lib/rosy/InputData.rb +0 -294
  96. data/lib/rosy/RosyConfusability.rb +0 -338
  97. data/lib/rosy/RosyEval.rb +0 -465
  98. data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
  99. data/lib/rosy/RosyFeaturize.rb +0 -281
  100. data/lib/rosy/RosyInspect.rb +0 -336
  101. data/lib/rosy/RosyIterator.rb +0 -478
  102. data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
  103. data/lib/rosy/RosyPruning.rb +0 -165
  104. data/lib/rosy/RosyServices.rb +0 -744
  105. data/lib/rosy/RosySplit.rb +0 -232
  106. data/lib/rosy/RosyTask.rb +0 -19
  107. data/lib/rosy/RosyTest.rb +0 -829
  108. data/lib/rosy/RosyTrain.rb +0 -234
  109. data/lib/rosy/RosyTrainingTestTable.rb +0 -787
  110. data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
  111. data/lib/rosy/View.rb +0 -418
  112. data/lib/rosy/opt_parser.rb +0 -379
  113. data/lib/rosy/rosy.rb +0 -78
  114. data/lib/rosy/rosy_config_data.rb +0 -121
  115. data/lib/shalmaneser/version.rb +0 -3
@@ -1,196 +0,0 @@
1
- ###
2
- # FixSynSemMapping:
3
- # Given a SalsaTigerRegXML sentence with semantic role annotation,
4
- # simplify the mapping of semantic roles to syntactic constituents
5
- #
6
- # The following is lifted from the LREC06 paper on Shalmaneser:
7
- # During preprocessing, the span of semantic roles in the training corpora is
8
- # projected onto the output of the syntactic parser by assigning each
9
- # role to the set of maximal constituents covering its word span.
10
- # f the word span of a role does not coincide
11
- # with parse tree constituents, e.g. due to misparses,
12
- # the role is ``spread out'' across several constituents. This leads to
13
- # idiosyncratic paths between predicate and semantic role in the parse
14
- # tree.
15
- #
16
- # [The following span standardization algorithm is used to make the
17
- # syntax-semantics mapping more uniform:]
18
- # Given a role r that has been assigned, let N be the set of
19
- # terminal nodes of the syntactic structure that are covered by r.
20
- #
21
- # Iteratively compute the maximal projection of N in the syntactic
22
- # structure:
23
- # 1) If n is a node such that all of n's children are in N,
24
- # then remove n's children from N and add n instead.
25
- # 2) If n is a node with 3 or more children, and all of n's
26
- # children except one are in N, then remove n's children from N
27
- # and add n instead.
28
- # 3) If n is an NP with 2 children, and one of them, another NP,
29
- # is in N, and the other, a relative clause, is not, then remove
30
- # n's children from N and add n instead.
31
- #
32
- # If none of the rules is applicable to N anymore, assign r to the
33
- # nodes in N.
34
- #
35
- # Rule 1 implements normal maximal projection. Rule 2 ``repairs'' parser
36
- # errors where all children of a node but one have been assigned the
37
- # same role. Rule 3 addresses a problem of the FrameNet data, where
38
- # relative clauses have been omitted from roles assigned to NPs.
39
-
40
- # KE Feb 08: rule 3 currently out of commission!
41
-
42
- require "common/SalsaTigerRegXML"
43
-
44
- module FixSynSemMapping
45
- ##
46
- # fix it
47
- #
48
- # relevant settings in the experiment file:
49
- #
50
- # fe_syn_repair:
51
- # If there is a node that would be a max. constituent for the
52
- # words covered by the given FE, except that it has one child
53
- # whose words are not in the FE, use the node as max constituent anyway.
54
- # This is to repair cases where the parser has made an attachment choice
55
- # that differs from the one in the gold annotation
56
- #
57
- # fe_rel_repair:
58
- # If there is an NP such that all of its children except one have been
59
- # assigned the same FE, and that missing child is a relative clause
60
- # depending on one of the other children, then take the complete NP as
61
- # that FE
62
- def FixSynSemMapping.fixit(sent, # SalsaTigerSentence object
63
- exp, # experiment file object
64
- interpreter_class) # SynInterpreter class
65
-
66
-
67
- unless exp.get("fe_syn_repair") or exp.get("fe_rel_repair")
68
- return
69
- end
70
-
71
- if sent.nil?
72
- return
73
- end
74
-
75
- # "repair" FEs:
76
- sent.each_frame { |frame|
77
-
78
- frame.each_child { |fe_or_target|
79
-
80
- # repair only if the FE currently
81
- # points to more than one syn node
82
- if fe_or_target.children.length() < 2
83
- next
84
- end
85
-
86
- if exp.get("fe_rel_repair")
87
- lastfe = fe_or_target.children.last()
88
- if lastfe and interpreter_class.simplified_pt(lastfe) =~ /^(WDT)|(WP\$?)|(WRB)/
89
-
90
- # remove syn nodes that the FE points to
91
- old_fe_syn = fe_or_target.children()
92
- old_fe_syn.each { |child|
93
- fe_or_target.remove_child(child)
94
- }
95
-
96
- # set it to point only to the last previous node, the relative pronoun
97
- fe_or_target.add_child(lastfe)
98
- end
99
- end
100
-
101
- if exp.get("fe_syn_repair")
102
- # remove syn nodes that the FE points to
103
- old_fe_syn = fe_or_target.children()
104
- old_fe_syn.each { |child|
105
- fe_or_target.remove_child(child)
106
- }
107
-
108
- # and recompute
109
- new_fe_syn = interpreter_class.max_constituents(old_fe_syn.map { |t|
110
- t.yield_nodes
111
- }.flatten.uniq,
112
- sent,
113
- exp.get("fe_syn_repair"))
114
-
115
- # make the FE point to the new nodes
116
- new_fe_syn.each { |syn_node|
117
- fe_or_target.add_child(syn_node)
118
- }
119
- end
120
- } # each FE
121
- } # each frame
122
- end # def fixit
123
- end # module
124
-
125
-
126
- #########3
127
- # old code
128
-
129
- # if exp.get("fe_rel_repair")
130
- # # repair relative clauses:
131
- # # then make a procedure to pass on to max constituents
132
- # # that will recognize the relevant cases
133
-
134
- # accept_anyway_proc = Proc.new { |node, children_in, children_out|
135
-
136
- # # node: SynNode
137
- # # children_in, children_out: array:SynNode. children_in are the children
138
- # # that are already covered by the FE, children_out the ones that aren't
139
-
140
- # # if node is an NP,
141
- # # and only one of its children is out,
142
- # # and one node in children_in is an NP, and the missing child is an SBAR
143
- # # with a child that is a relative pronoun, then consider the child in children_out as covered
144
- # if interpreter_class.category(node) == "noun" and
145
- # children_out.length() == 1 and
146
- # children_in.select { |n| interpreter_class.category(n) == "noun" } and
147
- # interpreter_class.category(children_out.first) == "sent" and
148
- # (ch = children_out.first.children) and
149
- # ch.select { |n| interpreter_class.relative_pronoun?(n) }
150
- # true
151
- # else
152
- # false
153
- # end
154
- # }
155
-
156
- # else
157
- # accept_anyway_proc = nil
158
- # end
159
-
160
-
161
- # # "repair" FEs:
162
- # sent.each_frame { |frame|
163
-
164
- # frame.each_child { |fe_or_target|
165
-
166
- # # repair only if the FE currently
167
- # # points to more than one syn node, or
168
- # # if it is a noun with a non-covered sentence sister
169
- # if fe_or_target.children.length() > 1 or
170
- # (exp.get("fe_rel_repair") and (curr_marked = fe_or_target.children.first()) and
171
- # interpreter_class.category(curr_marked) == "noun" and
172
- # (p = curr_marked.parent) and
173
- # p.children.select { |n| n != curr_marked and interpreter_class.category(n) == "sent" } )
174
-
175
- # # remember nodes covered by the FE
176
- # old_fe_syn = fe_or_target.children()
177
-
178
- # # remove syn nodes that the FE points to
179
- # old_fe_syn.each { |child|
180
- # fe_or_target.remove_child(child)
181
- # }
182
-
183
- # # and recompute
184
- # new_fe_syn = interpreter_class.max_constituents(old_fe_syn.map { |t| t.yield_nodes}.flatten.uniq,
185
- # sent,
186
- # exp.get("fe_syn_repair"),
187
- # accept_anyway_proc)
188
-
189
- # # make the FE point to the new nodes
190
- # new_fe_syn.each { |syn_node|
191
- # fe_or_target.add_child(syn_node)
192
- # }
193
-
194
- # end # if FE points to more than one syn node
195
- # } # each FE
196
- # } # each frame
@@ -1,345 +0,0 @@
1
- # GraphNode: describes one node in a graph.
2
- #
3
- # A node may have an arbitrary number of parents (sources of incoming edges)
4
- # and an arbitrary number of children (targets of outgoing edges)
5
- #
6
- # All edges are labeled and directed
7
- #
8
- # The add_parent, add_child, remove_parent, remove_child methods
9
- # take care of both ends of an edge
10
- # (i.e. n1.add_child(n2, label) also adds n1 as parent of n2 with edge label 'label'
11
- #
12
- # It is possible to create a 'pointer' rather than an edge:
13
- # n1.add_child(n2, label, pointer_insteadof_edge => true)
14
- # will create an edge from n1 to n2 labeled 'label' that is
15
- # listed under the outgoing edges of n1, but not among
16
- # the incoming edges of n2
17
- # The same option is available for add_parent, remove_parent, remove_child.
18
-
19
- class GraphNode
20
-
21
- def initialize(id)
22
- @id = id
23
- @children = Array.new
24
- @parents = Array.new
25
- @features = Hash.new
26
- end
27
-
28
- # for Marshalling:
29
- # Dump just IDs instead of actual nodes from Parents and Children lists.
30
- # Otherwise the Marshaller will go crazy following
31
- # all the links to objects mentioned.
32
- # After loading: replace IDs by actual objects with a little help
33
- # from the caller.
34
-
35
- def _dump(depth)
36
- @id.to_s +
37
- "QQSEPVALUESQQ" +
38
- Marshal.dump(@features) +
39
- "QQSEPVALUESQQ" +
40
- @children.map { |label_child|
41
- label_child[0] + "QQSEPQQ" + label_child[1].id()
42
- }.join("QQPAIRQQ") +
43
- "QQSEPVALUESQQ" +
44
- @parents.map { |label_parent|
45
- label_parent[0] + "QQSEPQQ" + label_parent[1].id()
46
- }.join("QQPAIRQQ")
47
- end
48
-
49
- def GraphNode._load(string)
50
- id, features_s, children_s, parents_s =
51
- string.split("QQSEPVALUESQQ")
52
-
53
- result = GraphNode.new(id)
54
- result.fill_from_pickle(string)
55
- return result
56
- end
57
-
58
- def fill_from_pickle(string)
59
- id, features_s, children_s, parents_s =
60
- string.split("QQSEPVALUESQQ")
61
-
62
- @features = Marshal.load(features_s)
63
-
64
- if children_s.nil? or children_s.empty?
65
- @children = []
66
- else
67
- @children = children_s.split("QQPAIRQQ").map { |pair|
68
- pair.split("QQSEPQQ")
69
- }
70
- end
71
-
72
- if parents_s.nil? or parents_s.empty?
73
- @parents = []
74
- else
75
- @parents = parents_s.split("QQPAIRQQ").map { |pair|
76
- pair.split("QQSEPQQ")
77
- }
78
- end
79
- end
80
-
81
- def recover_from_dump(node_by_id)
82
- @children = @children.map { |label_id| [label_id[0], node_by_id.call(label_id[1])] }
83
- @parents = @parents.map { |label_id| [label_id[0], node_by_id.call(label_id[1])] }
84
- end
85
-
86
- # ID-related things
87
-
88
- def ==(other_node)
89
- unless other_node.kind_of? GraphNode
90
- return false
91
- end
92
- @id == other_node.id()
93
- end
94
-
95
- def id()
96
- return @id
97
- end
98
-
99
- def chid(newid)
100
- @id = newid
101
- end
102
-
103
- # setting and retrieving features
104
-
105
- def get_f(feature)
106
- return @features[feature]
107
- end
108
-
109
- def set_f(feature, value)
110
- @features[feature] = value
111
- end
112
-
113
- def add_f(feature, value)
114
- unless @features[feature].nil?
115
- raise "Feature " + feature + "already set."
116
- end
117
- set_f(feature, value)
118
- end
119
-
120
- # ancestors
121
-
122
- def parents()
123
- return @parents.map { |label_parent|
124
- label_parent[1] }
125
- end
126
-
127
- def parent_labels()
128
- return @parents.map { |label_parent| label_parent[0] }
129
- end
130
-
131
- def parent_label(parent)
132
- @parents.each { |label_parent|
133
- if label_parent[1] == parent
134
- return label_parent[0]
135
- end
136
- }
137
- return nil
138
- end
139
-
140
- def parents_with_edgelabel()
141
- return @parents
142
- end
143
-
144
- def each_parent()
145
- @parents.each { |label_parent| yield label_parent[1] }
146
- end
147
-
148
- def each_parent_with_edgelabel()
149
- @parents.each { |label_parent| yield label_parent}
150
- end
151
-
152
- def parents_by_edgelabels(labels)
153
- return @parents.select { |label_parent|
154
- labels.include? label_parent[0]
155
- }.map { |label_parent|
156
- label_parent[1]
157
- }
158
- end
159
-
160
- def add_parent(parent, edgelabel, varhash={})
161
- @parents << [edgelabel, parent]
162
-
163
- # and vice versa: add self as child to parent
164
- unless varhash["pointer_insteadof_edge"]
165
- unless parent.children_with_edgelabel().include? [edgelabel, self]
166
- parent.add_child(self, edgelabel)
167
- end
168
- end
169
- end
170
-
171
- def remove_parent(parent, edgelabel, varhash={})
172
- @parents = @parents.reject { |label_child|
173
- label_child.first == edgelabel and
174
- label_child.last == parent
175
- }
176
-
177
- # and vice versa: remove self as child from parent
178
- unless varhash["pointer_insteadof_edge"]
179
- if parent.children_with_edgelabel().include? [edgelabel, self]
180
- parent.remove_child(self, edgelabel)
181
- end
182
- end
183
- end
184
-
185
- def indeg()
186
- return @parents.length()
187
- end
188
-
189
- def ancestors
190
- return ancestors_noduplicates([], [])
191
- end
192
-
193
- def ancestors_by_edgelabels(labels)
194
- return ancestors_noduplicates([], labels)
195
- end
196
-
197
- # descendants
198
-
199
- def children()
200
- return @children.map { |label_child| label_child[1] }
201
- end
202
-
203
- def child_labels()
204
- return @children.map { |label_child| label_child[0] }
205
- end
206
-
207
- def child_label(child)
208
- @children.each { |label_child|
209
- if label_child[1] == child
210
- return label_child[0]
211
- end
212
- }
213
- return nil
214
- end
215
-
216
- def children_with_edgelabel()
217
- return @children
218
- end
219
-
220
- def each_child()
221
- @children.each { |label_child| yield label_child[1]}
222
- end
223
-
224
- def each_child_with_edgelabel()
225
- @children.each { |label_child| yield label_child }
226
- end
227
-
228
- def children_by_edgelabels(labels)
229
- return @children.select { |label_child|
230
- labels.include? label_child[0]
231
- }.map { |label_child|
232
- label_child[1]
233
- }
234
- end
235
-
236
- def add_child(child, edgelabel, varhash={})
237
- @children << [edgelabel, child]
238
-
239
- # and vice versa: add self as parent to child
240
- unless varhash["pointer_insteadof_edge"]
241
- unless child.parents_with_edgelabel().include? [edgelabel, self]
242
- child.add_parent(self, edgelabel)
243
- end
244
- end
245
- end
246
-
247
- def remove_child(child, edgelabel, varhash={})
248
- @children = @children.reject { |label_child|
249
- label_child.first == edgelabel and
250
- label_child.last == child
251
- }
252
-
253
- # and vice versa: remove self as parent from child
254
- unless varhash["pointer_insteadof_edge"]
255
- if child.parents_with_edgelabel().include? [edgelabel, self]
256
- child.remove_parent(self, edgelabel)
257
- end
258
- end
259
- end
260
-
261
- def change_child_label(child, oldlabel, newlabel, varhash={})
262
- if @children.include? [oldlabel, child]
263
- remove_child(child,oldlabel, varhash)
264
- add_child(child, newlabel, varhash)
265
- end
266
- end
267
-
268
- def remove_all_children(varhash={})
269
- each_child_with_edgelabel { |label, child|
270
- remove_child(child, label, varhash)
271
- }
272
- end
273
-
274
- def set_children(list, varhash={})
275
- #### CAUTION: set_children must be called with an "internal format" list of parents:
276
- #### instead of using [node, edgelabel], use [edgelabel, node]
277
- remove_all_children(varhash)
278
-
279
- @children = list
280
- end
281
-
282
- def outdeg()
283
- return @children.length()
284
- end
285
-
286
- def yield_nodes()
287
- arr = Array.new
288
- if outdeg() == 0
289
- arr << self
290
- end
291
- each_child { |c|
292
- if c.outdeg() == 0
293
- arr << c
294
- else
295
- arr.concat c.yield_nodes
296
- end
297
- }
298
- return arr
299
- end
300
-
301
- def descendants
302
- return descendants_noduplicates([], [])
303
- end
304
-
305
- def descendants_by_edgelabels(labels)
306
- return descendants_noduplicates([], labels)
307
- end
308
-
309
- protected
310
-
311
- def descendants_noduplicates(nodes, labels)
312
- each_child_with_edgelabel() { |l_c|
313
- if labels.empty? or labels.include? l_c[0]
314
- unless nodes.include? l_c[1]
315
- nodes = l_c[1].descendants_noduplicates(nodes << l_c[1], labels)
316
- end
317
- end
318
- }
319
- return nodes
320
- end
321
-
322
- def ancestors_noduplicates(nodes, labels)
323
- each_parent_with_edgelabel() { |l_p|
324
- if labels.empty? or labels.include? l_p[0]
325
- unless nodes.include? l_p[1]
326
- nodes = l_p[1].ancestors_noduplicates(nodes << l_p[1], labels)
327
- end
328
- end
329
- }
330
- return nodes
331
- end
332
-
333
- #### CAUTION: set_parents must be called with an "internal format" list of parents:
334
- #### instead of using [node, edgelabel], use [edgelabel, node]
335
-
336
- def set_parents(list, varhash={})
337
- each_parent_with_edgelabel { |label, parent|
338
- remove_parent(parent, label, varhash)
339
- }
340
-
341
- list.each { |label, parent|
342
- add_parent(label, parent)
343
- }
344
- end
345
- end