shalmaneser 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/shalmaneser +8 -2
  4. data/doc/index.md +1 -0
  5. data/lib/shalmaneser/opt_parser.rb +68 -67
  6. metadata +49 -119
  7. data/bin/fred +0 -16
  8. data/bin/frprep +0 -34
  9. data/bin/rosy +0 -17
  10. data/lib/common/AbstractSynInterface.rb +0 -1229
  11. data/lib/common/Counter.rb +0 -18
  12. data/lib/common/EnduserMode.rb +0 -27
  13. data/lib/common/Eval.rb +0 -480
  14. data/lib/common/FixSynSemMapping.rb +0 -196
  15. data/lib/common/Graph.rb +0 -345
  16. data/lib/common/ISO-8859-1.rb +0 -24
  17. data/lib/common/ML.rb +0 -186
  18. data/lib/common/Mallet.rb +0 -236
  19. data/lib/common/Maxent.rb +0 -229
  20. data/lib/common/Optimise.rb +0 -195
  21. data/lib/common/Parser.rb +0 -213
  22. data/lib/common/RegXML.rb +0 -269
  23. data/lib/common/RosyConventions.rb +0 -171
  24. data/lib/common/STXmlTerminalOrder.rb +0 -194
  25. data/lib/common/SalsaTigerRegXML.rb +0 -2347
  26. data/lib/common/SalsaTigerXMLHelper.rb +0 -99
  27. data/lib/common/SynInterfaces.rb +0 -282
  28. data/lib/common/TabFormat.rb +0 -721
  29. data/lib/common/Tiger.rb +0 -1448
  30. data/lib/common/Timbl.rb +0 -144
  31. data/lib/common/Tree.rb +0 -61
  32. data/lib/common/config_data.rb +0 -470
  33. data/lib/common/config_format_element.rb +0 -220
  34. data/lib/common/headz.rb +0 -338
  35. data/lib/common/option_parser.rb +0 -13
  36. data/lib/common/prep_config_data.rb +0 -62
  37. data/lib/common/prep_helper.rb +0 -1330
  38. data/lib/common/ruby_class_extensions.rb +0 -310
  39. data/lib/db/db_interface.rb +0 -48
  40. data/lib/db/db_mysql.rb +0 -145
  41. data/lib/db/db_sqlite.rb +0 -280
  42. data/lib/db/db_table.rb +0 -239
  43. data/lib/db/db_wrapper.rb +0 -176
  44. data/lib/db/sql_query.rb +0 -243
  45. data/lib/ext/maxent/Classify.class +0 -0
  46. data/lib/ext/maxent/Train.class +0 -0
  47. data/lib/fred/Baseline.rb +0 -150
  48. data/lib/fred/FileZipped.rb +0 -31
  49. data/lib/fred/FredBOWContext.rb +0 -877
  50. data/lib/fred/FredConventions.rb +0 -232
  51. data/lib/fred/FredDetermineTargets.rb +0 -319
  52. data/lib/fred/FredEval.rb +0 -312
  53. data/lib/fred/FredFeatureExtractors.rb +0 -322
  54. data/lib/fred/FredFeatures.rb +0 -1061
  55. data/lib/fred/FredFeaturize.rb +0 -602
  56. data/lib/fred/FredNumTrainingSenses.rb +0 -27
  57. data/lib/fred/FredParameters.rb +0 -402
  58. data/lib/fred/FredSplit.rb +0 -84
  59. data/lib/fred/FredSplitPkg.rb +0 -180
  60. data/lib/fred/FredTest.rb +0 -606
  61. data/lib/fred/FredTrain.rb +0 -144
  62. data/lib/fred/PlotAndREval.rb +0 -480
  63. data/lib/fred/fred.rb +0 -47
  64. data/lib/fred/fred_config_data.rb +0 -185
  65. data/lib/fred/md5.rb +0 -23
  66. data/lib/fred/opt_parser.rb +0 -250
  67. data/lib/frprep/Ampersand.rb +0 -39
  68. data/lib/frprep/CollinsInterface.rb +0 -1165
  69. data/lib/frprep/Counter.rb +0 -18
  70. data/lib/frprep/FNCorpusXML.rb +0 -643
  71. data/lib/frprep/FNDatabase.rb +0 -144
  72. data/lib/frprep/FrameXML.rb +0 -513
  73. data/lib/frprep/Graph.rb +0 -345
  74. data/lib/frprep/MiniparInterface.rb +0 -1388
  75. data/lib/frprep/RegXML.rb +0 -269
  76. data/lib/frprep/STXmlTerminalOrder.rb +0 -194
  77. data/lib/frprep/SleepyInterface.rb +0 -384
  78. data/lib/frprep/TntInterface.rb +0 -44
  79. data/lib/frprep/TreetaggerInterface.rb +0 -327
  80. data/lib/frprep/do_parses.rb +0 -143
  81. data/lib/frprep/frprep.rb +0 -693
  82. data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
  83. data/lib/frprep/interfaces/stanford_interface.rb +0 -353
  84. data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
  85. data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
  86. data/lib/frprep/one_parsed_file.rb +0 -28
  87. data/lib/frprep/opt_parser.rb +0 -94
  88. data/lib/frprep/ruby_class_extensions.rb +0 -310
  89. data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
  90. data/lib/rosy/ExternalConfigData.rb +0 -58
  91. data/lib/rosy/FailedParses.rb +0 -130
  92. data/lib/rosy/FeatureInfo.rb +0 -242
  93. data/lib/rosy/GfInduce.rb +0 -1115
  94. data/lib/rosy/GfInduceFeature.rb +0 -148
  95. data/lib/rosy/InputData.rb +0 -294
  96. data/lib/rosy/RosyConfusability.rb +0 -338
  97. data/lib/rosy/RosyEval.rb +0 -465
  98. data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
  99. data/lib/rosy/RosyFeaturize.rb +0 -281
  100. data/lib/rosy/RosyInspect.rb +0 -336
  101. data/lib/rosy/RosyIterator.rb +0 -478
  102. data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
  103. data/lib/rosy/RosyPruning.rb +0 -165
  104. data/lib/rosy/RosyServices.rb +0 -744
  105. data/lib/rosy/RosySplit.rb +0 -232
  106. data/lib/rosy/RosyTask.rb +0 -19
  107. data/lib/rosy/RosyTest.rb +0 -829
  108. data/lib/rosy/RosyTrain.rb +0 -234
  109. data/lib/rosy/RosyTrainingTestTable.rb +0 -787
  110. data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
  111. data/lib/rosy/View.rb +0 -418
  112. data/lib/rosy/opt_parser.rb +0 -379
  113. data/lib/rosy/rosy.rb +0 -78
  114. data/lib/rosy/rosy_config_data.rb +0 -121
  115. data/lib/shalmaneser/version.rb +0 -3
@@ -1,22 +0,0 @@
1
- # AB: 2013-12-25
2
- class BerkeleyInterpreter < Tiger
3
- BerkeleyInterpreter.announce_me
4
-
5
- ###
6
- # names of the systems interpreted by this class:
7
- # returns a hash service(string) -> system name (string),
8
- # e.g.
9
- # { "parser" => "collins", "lemmatizer" => "treetagger" }
10
- def self.systems
11
- {"parser" => "berkeley"}
12
- end
13
-
14
- ###
15
- # names of additional systems that may be interpreted by this class
16
- # returns a hash service(string) -> system name(string)
17
- # same as names()
18
- def self.optional_systems
19
- {"lemmatizer" => "treetagger", 'pos_tagger' => 'treetagger'}
20
- end
21
-
22
- end
@@ -1,22 +0,0 @@
1
- # AB: 2013-12-25
2
- class StanfordInterpreter < Tiger
3
- StanfordInterpreter.announce_me
4
-
5
- ###
6
- # names of the systems interpreted by this class:
7
- # returns a hash service(string) -> system name (string),
8
- # e.g.
9
- # { "parser" => "collins", "lemmatizer" => "treetagger" }
10
- def self.systems
11
- {"parser" => "stanford"}
12
- end
13
-
14
- ###
15
- # names of additional systems that may be interpreted by this class
16
- # returns a hash service(string) -> system name(string)
17
- # same as names()
18
- def self.optional_systems
19
- {"lemmatizer" => "treetagger", 'pos_tagger' => 'treetagger'}
20
- end
21
-
22
- end
@@ -1,28 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
- # AB, 2010-11-25
3
-
4
-
5
- ##############################
6
- # class for managing the parses of one file
7
- class OneParsedFile
8
- attr_reader :filename
9
-
10
- def initialize(filename, # string: core of filename for the parse file
11
- complete_filename, # string: complete filename of parse file
12
- obj_with_iterator) # object with each_sentence method, see above
13
- @obj_with_iterator = obj_with_iterator
14
- @filename = filename
15
- @complete_filename = complete_filename
16
- end
17
-
18
- # yield each parse sentence as a tuple
19
- # [ salsa/tiger xml sentence, tab format sentence, mapping]
20
- # of a SalsaTigerSentence object, a FNTabSentence object,
21
- # and a hash: FNTab sentence lineno(integer) -> array:SynNode
22
- # pointing each tab word to one or more SalsaTigerSentence terminals
23
- def each_sentence()
24
- @obj_with_iterator.each_sentence(@complete_filename) { |st_sent, tab_sent, mapping|
25
- yield [st_sent, tab_sent, mapping]
26
- }
27
- end
28
- end
@@ -1,94 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- # AB, 2010-11-25
4
-
5
- require 'optparse'
6
- require 'common/prep_config_data'
7
- require 'common/SynInterfaces'
8
- module FrPrep
9
-
10
- # This class parses options for FrPrep.
11
- class OptParser
12
-
13
- # Main class method.
14
- # OP expects cmd_args to be an array like ARGV.
15
- def self.parse(cmd_args)
16
- @prg_name = 'frprep'
17
- @@options = {}
18
-
19
- parser = create_parser
20
-
21
- # If no options provided print the help.
22
- if cmd_args.empty?
23
- $stderr.puts('You have to provide some options.',
24
- "Please start with <#{@prg_name} --help>.")
25
- exit(1)
26
- end
27
-
28
- # Parse ARGV and provide the options hash.
29
- # Check if everything is correct and handle exceptions
30
- begin
31
- parser.parse(cmd_args)
32
- rescue OptionParser::InvalidArgument => e
33
- arg = e.message.split.last
34
- $stderr.puts "The provided argument #{arg} is currently not supported!"
35
- $stderr.puts "Please colsult <#{@prg_name} --help>."
36
- exit(1)
37
- rescue OptionParser::InvalidOption => e
38
- $stderr.puts "You have provided an #{e.message}."
39
- $stderr.puts "Please colsult <#{@prg_name} --help>."
40
- exit(1)
41
- rescue
42
- raise
43
- end
44
-
45
-
46
- exp = FrPrepConfigData.new(@@options[:exp_file])
47
-
48
- # AB: this stuff should be move into FrPrepConfigData.
49
- # sanity checks
50
- unless exp.get("prep_experiment_ID") =~ /^[A-Za-z0-9_]+$/
51
- raise "Please choose an experiment ID consisting only of the letters A-Za-z0-9_."
52
- end
53
-
54
- SynInterfaces.check_interfaces_abort_if_missing(exp)
55
-
56
- exp
57
- end
58
-
59
- private
60
- def self.create_parser
61
- OptionParser.new do |opts|
62
- opts.banner = <<STOP
63
- Fred Preprocessor <FrPrep>. Preprocessing stage before Fred and Rosy
64
- for further frame/word sense assignment and semantic role assignment.
65
-
66
- Usage: frprep -h|-e FILENAME'
67
- STOP
68
- opts.separator ''
69
- opts.separator 'Program specific options:'
70
-
71
- opts.on('-e', '--expfile FILENAME',
72
- 'Provide the path to an experiment file.',
73
- 'FrPrep will preprocess data according to the specifications',
74
- 'given in your experiment file.',
75
- 'This option is required!',
76
- 'Also consider the documentation on format and features.'
77
- ) do |exp_file|
78
- @@options[:exp_file] = File.expand_path(exp_file)
79
- end
80
-
81
- opts.separator ''
82
- opts.separator 'Common options:'
83
-
84
- opts.on_tail('-h', '--help', 'Show this help message.') do
85
- puts opts
86
- exit
87
- end
88
-
89
- end
90
-
91
- end # def self.parse
92
-
93
- end # class OptParser
94
- end # module FrPrep
@@ -1,310 +0,0 @@
1
- # Katrin Erk Oct 05
2
- #
3
- # useful extensions to standard classes
4
-
5
- require 'fileutils'
6
-
7
- class String
8
- def startswith(other_string)
9
- self[0..other_string.length() - 1] == other_string
10
- end
11
-
12
- def endswith(other_string)
13
- not(other_string.length() > self.length()) and
14
- self[self.length() - other_string.length()..-1] == other_string
15
- end
16
- end
17
-
18
- class File
19
- ########
20
- # check whether a given path exists,
21
- # and if it doesn't, make sure it is created.
22
- #
23
- # piece together the strings in 'pieces' to make the path,
24
- # appending "/" to all strings if necessary
25
- #
26
- # returns: the path pieced together
27
- def File.new_dir(*pieces) # strings, to be pieced together
28
-
29
- dir_path, dummy = File.make_path(pieces, true)
30
- unless File.exists? dir_path
31
- FileUtils.mkdir_p dir_path
32
- end
33
- # check that all went well in creating the directory)
34
- File.existing_dir(dir_path)
35
-
36
- return dir_path
37
- end
38
-
39
- ########
40
- # same as new_dir, but last piece is a filename
41
- def File.new_filename(*pieces)
42
- dir_path, whole_path = File.make_path(pieces, false)
43
- unless File.exists? dir_path
44
- FileUtils.mkdir_p dir_path
45
- end
46
- # check that all went well in creating the directory)
47
- File.existing_dir(dir_path)
48
-
49
- return whole_path
50
- end
51
-
52
-
53
- #####
54
- # check whether a given path exists,
55
- # and report failure of it does not exist.
56
- #
57
- # piece together the strings in 'pieces' to make the path,
58
- # appending "/" to all strings if necessary
59
- #
60
- # returns: the path pieced together
61
- def File.existing_dir(*pieces) # strings
62
-
63
- dir_path, dummy = File.make_path(pieces, true)
64
-
65
- unless File.exists? dir_path and File.directory? dir_path
66
- $stderr.puts "Error: Directory #{dir_path} doesn't exist. Exiting."
67
- exit(1)
68
- end
69
- unless File.executable? dir_path
70
- $stderr.puts "Error: Cannot access directory #{dir_path}. Exiting."
71
- exit(1)
72
- end
73
-
74
- return dir_path
75
- end
76
-
77
- ####
78
- # like existing_dir, but last bit is filename
79
- def File.existing_filename(*pieces) # strings
80
-
81
- dir_path, whole_path = File.make_path(pieces, false)
82
-
83
- unless File.exists? dir_path and File.directory? dir_path
84
- $stderr.puts "Error: Directory #{dir_path} doesn't exist. Exiting"
85
- exit(1)
86
- end
87
- unless File.executable? dir_path
88
- $stderr.puts "Error: Cannot access directory #{dir_path}. Exiting."
89
- exit(1)
90
- end
91
-
92
- return whole_path
93
- end
94
-
95
- ####
96
- # piece together the strings in 'pieces' to make a path,
97
- # appending "/" to all but the last string if necessary
98
- #
99
- # if 'pieces' is already a string, take that as a one-piece path
100
- #
101
- # if dir is true, also append "/" to the last piece of the string
102
- #
103
- # the resulting path is expanded: For example, initial
104
- # ~ is expanded to the setting of $HOME
105
- #
106
- # returns: pair of strings (directory_part, whole_path)
107
- #
108
- def File.make_path(pieces, # string or array:string
109
- is_dir = false) # Boolean: is the path a directory?
110
-
111
- if pieces.kind_of? String
112
- pieces = [ pieces ]
113
- end
114
-
115
- dir = ""
116
- # iterate over all but the filename
117
- if is_dir
118
- last_dir_index = -1
119
- else
120
- last_dir_index = -2
121
- end
122
- pieces[0..last_dir_index].each { |piece|
123
- if piece.nil?
124
- # whoops, nil entry in name of path!
125
- $stderr.puts "File.make_path ERROR: nil for piece of path name."
126
- next
127
- end
128
- if piece =~ /\/$/
129
- dir << piece
130
- else
131
- dir << piece << "/"
132
- end
133
- }
134
- dir = File.expand_path(dir)
135
- # expand_path removes the final "/" again
136
- unless dir =~ /\/$/
137
- dir = dir + "/"
138
- end
139
-
140
- if is_dir
141
- return [dir, dir]
142
- else
143
- return [dir, dir + pieces[-1]]
144
- end
145
- end
146
-
147
- end
148
-
149
- #############################################
150
- class Array
151
-
152
- ###
153
- # interleave N arrays:
154
- # given arrays [a1... an], [b1,...,bn], ..[z1, ...,zn]
155
- # return [[a1,b1, .., z1]...,[an,bn, .., zn]]
156
- #
157
- # if one array is longer than the other,
158
- # e.g. [a1...an], [b1,...,bm] with n> m
159
- # the result is
160
- # [[a1,b1],...[am, bm], [am+1, nil], ..., [an, nil]]
161
- # and analogously for m>n
162
- def interleave(*arrays)
163
- len = [length(), arrays.map { |a| a.length() }.max()].max()
164
- (0..len-1).to_a.map { |ix|
165
- [at(ix)] + arrays.map { |a| a[ix] }
166
- }
167
- end
168
-
169
- ###
170
- # prepend: prepend element to array
171
- # because I can never remember which is 'shift'
172
- # and which is 'unshift'
173
- def prepend(element)
174
- unshift(element)
175
- end
176
-
177
- ###
178
- # count the number of occurrences of element in this array
179
- def count(element)
180
- num = 0
181
- each { |my_element|
182
- if my_element == element
183
- num += 1
184
- end
185
- }
186
- return num
187
- end
188
-
189
- ###
190
- # count the number of occurrences of
191
- # elements from list in this array
192
- def counts(list)
193
- num = 0
194
- each { |my_element|
195
- if list.include? my_element
196
- num += 1
197
- end
198
- }
199
- return num
200
- end
201
-
202
- ###
203
- # draw a random sample of size N
204
- # from this array
205
- def sample(size)
206
- if size < 0
207
- return nil
208
- elsif size == 0
209
- return []
210
- elsif size >= length()
211
- return self.clone()
212
- end
213
-
214
- rank = Hash.new()
215
- each { |my_element|
216
- rank[my_element] = rand()
217
- }
218
- return self.sort { |a, b| rank[a] <=> rank[b] }[0..size-1]
219
- end
220
- end
221
-
222
- class Float
223
- ###
224
- # round a float to the given number of decimal points
225
- def round_to_decpts(n)
226
- if self.nan?
227
- return self
228
- else
229
- return (self * 10**n).round.to_f / 10**n
230
- end
231
- end
232
- end
233
-
234
- ################
235
- module EnumerableBool
236
- ###
237
- # And_{x \in X} block(x)
238
- def big_and(&block)
239
- each { |x|
240
- unless block.call(x)
241
- return false
242
- end
243
- }
244
- return true
245
- end
246
-
247
- ###
248
- # Or_{x \in X} block(x)
249
- def big_or(&block)
250
- each { |x|
251
- if block.call(x)
252
- return true
253
- end
254
- }
255
- return false
256
- end
257
-
258
- ###
259
- # Sum_{x \in X} block(x)
260
- def big_sum(init = 0, &block)
261
- sum = init
262
- unless block_given?
263
- block = Proc.new { |x| x}
264
- end
265
- each { |x|
266
- sum += block.call(x)
267
- }
268
- return sum
269
- end
270
- end
271
-
272
- ################
273
- # Given an enumerable, distribute its items into two bins (arrays)
274
- # depending on whether the block returns true
275
- module EnumerableDistribute
276
- def distribute(&block)
277
- retv1 = Array.new
278
- retv2 = Array.new
279
- each { |x|
280
- if block.call(x)
281
- retv1 << x
282
- else
283
- retv2 << x
284
- end
285
- }
286
- return [retv1, retv2]
287
- end
288
- end
289
-
290
- #####################
291
- # map with index
292
- module MapWithIndex
293
- def map_with_index(&block)
294
- retv = Array.new
295
-
296
- each_with_index { |x, index|
297
- retv << block.call(x, index)
298
- }
299
-
300
- return retv
301
- end
302
- end
303
-
304
- # include new Mixins into array already.
305
- # for other classes, do this when requiring StandardPkgExtensions
306
- class Array
307
- include EnumerableBool
308
- include EnumerableDistribute
309
- include MapWithIndex
310
- end