shalmaneser-prep 1.2.0.rc4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ # AB: 2013-12-25
2
+ class BerkeleyInterpreter < Tiger
3
+ BerkeleyInterpreter.announce_me
4
+
5
+ ###
6
+ # names of the systems interpreted by this class:
7
+ # returns a hash service(string) -> system name (string),
8
+ # e.g.
9
+ # { "parser" => "collins", "lemmatizer" => "treetagger" }
10
+ def self.systems
11
+ {"parser" => "berkeley"}
12
+ end
13
+
14
+ ###
15
+ # names of additional systems that may be interpreted by this class
16
+ # returns a hash service(string) -> system name(string)
17
+ # same as names()
18
+ def self.optional_systems
19
+ {"lemmatizer" => "treetagger", 'pos_tagger' => 'treetagger'}
20
+ end
21
+
22
+ end
@@ -0,0 +1,22 @@
1
+ # AB: 2013-12-25
2
+ class StanfordInterpreter < Tiger
3
+ StanfordInterpreter.announce_me
4
+
5
+ ###
6
+ # names of the systems interpreted by this class:
7
+ # returns a hash service(string) -> system name (string),
8
+ # e.g.
9
+ # { "parser" => "collins", "lemmatizer" => "treetagger" }
10
+ def self.systems
11
+ {"parser" => "stanford"}
12
+ end
13
+
14
+ ###
15
+ # names of additional systems that may be interpreted by this class
16
+ # returns a hash service(string) -> system name(string)
17
+ # same as names()
18
+ def self.optional_systems
19
+ {"lemmatizer" => "treetagger", 'pos_tagger' => 'treetagger'}
20
+ end
21
+
22
+ end
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ # AB, 2010-11-25
3
+
4
+
5
+ ##############################
6
+ # class for managing the parses of one file
7
+ class OneParsedFile
8
+ attr_reader :filename
9
+
10
+ def initialize(filename, # string: core of filename for the parse file
11
+ complete_filename, # string: complete filename of parse file
12
+ obj_with_iterator) # object with each_sentence method, see above
13
+ @obj_with_iterator = obj_with_iterator
14
+ @filename = filename
15
+ @complete_filename = complete_filename
16
+ end
17
+
18
+ # yield each parse sentence as a tuple
19
+ # [ salsa/tiger xml sentence, tab format sentence, mapping]
20
+ # of a SalsaTigerSentence object, a FNTabSentence object,
21
+ # and a hash: FNTab sentence lineno(integer) -> array:SynNode
22
+ # pointing each tab word to one or more SalsaTigerSentence terminals
23
+ def each_sentence()
24
+ @obj_with_iterator.each_sentence(@complete_filename) { |st_sent, tab_sent, mapping|
25
+ yield [st_sent, tab_sent, mapping]
26
+ }
27
+ end
28
+ end
@@ -0,0 +1,94 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ # AB, 2010-11-25
4
+
5
+ require 'optparse'
6
+ require 'common/prep_config_data'
7
+ require 'common/SynInterfaces'
8
+ module FrPrep
9
+
10
+ # This class parses options for FrPrep.
11
+ class OptParser
12
+
13
+ # Main class method.
14
+ # OP expects cmd_args to be an array like ARGV.
15
+ def self.parse(cmd_args)
16
+ @prg_name = 'frprep'
17
+ @@options = {}
18
+
19
+ parser = create_parser
20
+
21
+ # If no options provided print the help.
22
+ if cmd_args.empty?
23
+ $stderr.puts('You have to provide some options.',
24
+ "Please start with <#{@prg_name} --help>.")
25
+ exit(1)
26
+ end
27
+
28
+ # Parse ARGV and provide the options hash.
29
+ # Check if everything is correct and handle exceptions
30
+ begin
31
+ parser.parse(cmd_args)
32
+ rescue OptionParser::InvalidArgument => e
33
+ arg = e.message.split.last
34
+ $stderr.puts "The provided argument #{arg} is currently not supported!"
35
+ $stderr.puts "Please colsult <#{@prg_name} --help>."
36
+ exit(1)
37
+ rescue OptionParser::InvalidOption => e
38
+ $stderr.puts "You have provided an #{e.message}."
39
+ $stderr.puts "Please colsult <#{@prg_name} --help>."
40
+ exit(1)
41
+ rescue
42
+ raise
43
+ end
44
+
45
+
46
+ exp = FrPrepConfigData.new(@@options[:exp_file])
47
+
48
+ # AB: this stuff should be move into FrPrepConfigData.
49
+ # sanity checks
50
+ unless exp.get("prep_experiment_ID") =~ /^[A-Za-z0-9_]+$/
51
+ raise "Please choose an experiment ID consisting only of the letters A-Za-z0-9_."
52
+ end
53
+
54
+ SynInterfaces.check_interfaces_abort_if_missing(exp)
55
+
56
+ exp
57
+ end
58
+
59
+ private
60
+ def self.create_parser
61
+ OptionParser.new do |opts|
62
+ opts.banner = <<STOP
63
+ Fred Preprocessor <FrPrep>. Preprocessing stage before Fred and Rosy
64
+ for further frame/word sense assignment and semantic role assignment.
65
+
66
+ Usage: frprep -h|-e FILENAME'
67
+ STOP
68
+ opts.separator ''
69
+ opts.separator 'Program specific options:'
70
+
71
+ opts.on('-e', '--expfile FILENAME',
72
+ 'Provide the path to an experiment file.',
73
+ 'FrPrep will preprocess data according to the specifications',
74
+ 'given in your experiment file.',
75
+ 'This option is required!',
76
+ 'Also consider the documentation on format and features.'
77
+ ) do |exp_file|
78
+ @@options[:exp_file] = File.expand_path(exp_file)
79
+ end
80
+
81
+ opts.separator ''
82
+ opts.separator 'Common options:'
83
+
84
+ opts.on_tail('-h', '--help', 'Show this help message.') do
85
+ puts opts
86
+ exit
87
+ end
88
+
89
+ end
90
+
91
+ end # def self.parse
92
+
93
+ end # class OptParser
94
+ end # module FrPrep
@@ -0,0 +1,310 @@
1
+ # Katrin Erk Oct 05
2
+ #
3
+ # useful extensions to standard classes
4
+
5
+ require 'fileutils'
6
+
7
+ class String
8
+ def startswith(other_string)
9
+ self[0..other_string.length() - 1] == other_string
10
+ end
11
+
12
+ def endswith(other_string)
13
+ not(other_string.length() > self.length()) and
14
+ self[self.length() - other_string.length()..-1] == other_string
15
+ end
16
+ end
17
+
18
+ class File
19
+ ########
20
+ # check whether a given path exists,
21
+ # and if it doesn't, make sure it is created.
22
+ #
23
+ # piece together the strings in 'pieces' to make the path,
24
+ # appending "/" to all strings if necessary
25
+ #
26
+ # returns: the path pieced together
27
+ def File.new_dir(*pieces) # strings, to be pieced together
28
+
29
+ dir_path, dummy = File.make_path(pieces, true)
30
+ unless File.exists? dir_path
31
+ FileUtils.mkdir_p dir_path
32
+ end
33
+ # check that all went well in creating the directory)
34
+ File.existing_dir(dir_path)
35
+
36
+ return dir_path
37
+ end
38
+
39
+ ########
40
+ # same as new_dir, but last piece is a filename
41
+ def File.new_filename(*pieces)
42
+ dir_path, whole_path = File.make_path(pieces, false)
43
+ unless File.exists? dir_path
44
+ FileUtils.mkdir_p dir_path
45
+ end
46
+ # check that all went well in creating the directory)
47
+ File.existing_dir(dir_path)
48
+
49
+ return whole_path
50
+ end
51
+
52
+
53
+ #####
54
+ # check whether a given path exists,
55
+ # and report failure of it does not exist.
56
+ #
57
+ # piece together the strings in 'pieces' to make the path,
58
+ # appending "/" to all strings if necessary
59
+ #
60
+ # returns: the path pieced together
61
+ def File.existing_dir(*pieces) # strings
62
+
63
+ dir_path, dummy = File.make_path(pieces, true)
64
+
65
+ unless File.exists? dir_path and File.directory? dir_path
66
+ $stderr.puts "Error: Directory #{dir_path} doesn't exist. Exiting."
67
+ exit(1)
68
+ end
69
+ unless File.executable? dir_path
70
+ $stderr.puts "Error: Cannot access directory #{dir_path}. Exiting."
71
+ exit(1)
72
+ end
73
+
74
+ return dir_path
75
+ end
76
+
77
+ ####
78
+ # like existing_dir, but last bit is filename
79
+ def File.existing_filename(*pieces) # strings
80
+
81
+ dir_path, whole_path = File.make_path(pieces, false)
82
+
83
+ unless File.exists? dir_path and File.directory? dir_path
84
+ $stderr.puts "Error: Directory #{dir_path} doesn't exist. Exiting"
85
+ exit(1)
86
+ end
87
+ unless File.executable? dir_path
88
+ $stderr.puts "Error: Cannot access directory #{dir_path}. Exiting."
89
+ exit(1)
90
+ end
91
+
92
+ return whole_path
93
+ end
94
+
95
+ ####
96
+ # piece together the strings in 'pieces' to make a path,
97
+ # appending "/" to all but the last string if necessary
98
+ #
99
+ # if 'pieces' is already a string, take that as a one-piece path
100
+ #
101
+ # if dir is true, also append "/" to the last piece of the string
102
+ #
103
+ # the resulting path is expanded: For example, initial
104
+ # ~ is expanded to the setting of $HOME
105
+ #
106
+ # returns: pair of strings (directory_part, whole_path)
107
+ #
108
+ def File.make_path(pieces, # string or array:string
109
+ is_dir = false) # Boolean: is the path a directory?
110
+
111
+ if pieces.kind_of? String
112
+ pieces = [ pieces ]
113
+ end
114
+
115
+ dir = ""
116
+ # iterate over all but the filename
117
+ if is_dir
118
+ last_dir_index = -1
119
+ else
120
+ last_dir_index = -2
121
+ end
122
+ pieces[0..last_dir_index].each { |piece|
123
+ if piece.nil?
124
+ # whoops, nil entry in name of path!
125
+ $stderr.puts "File.make_path ERROR: nil for piece of path name."
126
+ next
127
+ end
128
+ if piece =~ /\/$/
129
+ dir << piece
130
+ else
131
+ dir << piece << "/"
132
+ end
133
+ }
134
+ dir = File.expand_path(dir)
135
+ # expand_path removes the final "/" again
136
+ unless dir =~ /\/$/
137
+ dir = dir + "/"
138
+ end
139
+
140
+ if is_dir
141
+ return [dir, dir]
142
+ else
143
+ return [dir, dir + pieces[-1]]
144
+ end
145
+ end
146
+
147
+ end
148
+
149
+ #############################################
150
+ class Array
151
+
152
+ ###
153
+ # interleave N arrays:
154
+ # given arrays [a1... an], [b1,...,bn], ..[z1, ...,zn]
155
+ # return [[a1,b1, .., z1]...,[an,bn, .., zn]]
156
+ #
157
+ # if one array is longer than the other,
158
+ # e.g. [a1...an], [b1,...,bm] with n> m
159
+ # the result is
160
+ # [[a1,b1],...[am, bm], [am+1, nil], ..., [an, nil]]
161
+ # and analogously for m>n
162
+ def interleave(*arrays)
163
+ len = [length(), arrays.map { |a| a.length() }.max()].max()
164
+ (0..len-1).to_a.map { |ix|
165
+ [at(ix)] + arrays.map { |a| a[ix] }
166
+ }
167
+ end
168
+
169
+ ###
170
+ # prepend: prepend element to array
171
+ # because I can never remember which is 'shift'
172
+ # and which is 'unshift'
173
+ def prepend(element)
174
+ unshift(element)
175
+ end
176
+
177
+ ###
178
+ # count the number of occurrences of element in this array
179
+ def count(element)
180
+ num = 0
181
+ each { |my_element|
182
+ if my_element == element
183
+ num += 1
184
+ end
185
+ }
186
+ return num
187
+ end
188
+
189
+ ###
190
+ # count the number of occurrences of
191
+ # elements from list in this array
192
+ def counts(list)
193
+ num = 0
194
+ each { |my_element|
195
+ if list.include? my_element
196
+ num += 1
197
+ end
198
+ }
199
+ return num
200
+ end
201
+
202
+ ###
203
+ # draw a random sample of size N
204
+ # from this array
205
+ def sample(size)
206
+ if size < 0
207
+ return nil
208
+ elsif size == 0
209
+ return []
210
+ elsif size >= length()
211
+ return self.clone()
212
+ end
213
+
214
+ rank = Hash.new()
215
+ each { |my_element|
216
+ rank[my_element] = rand()
217
+ }
218
+ return self.sort { |a, b| rank[a] <=> rank[b] }[0..size-1]
219
+ end
220
+ end
221
+
222
+ class Float
223
+ ###
224
+ # round a float to the given number of decimal points
225
+ def round_to_decpts(n)
226
+ if self.nan?
227
+ return self
228
+ else
229
+ return (self * 10**n).round.to_f / 10**n
230
+ end
231
+ end
232
+ end
233
+
234
+ ################
235
+ module EnumerableBool
236
+ ###
237
+ # And_{x \in X} block(x)
238
+ def big_and(&block)
239
+ each { |x|
240
+ unless block.call(x)
241
+ return false
242
+ end
243
+ }
244
+ return true
245
+ end
246
+
247
+ ###
248
+ # Or_{x \in X} block(x)
249
+ def big_or(&block)
250
+ each { |x|
251
+ if block.call(x)
252
+ return true
253
+ end
254
+ }
255
+ return false
256
+ end
257
+
258
+ ###
259
+ # Sum_{x \in X} block(x)
260
+ def big_sum(init = 0, &block)
261
+ sum = init
262
+ unless block_given?
263
+ block = Proc.new { |x| x}
264
+ end
265
+ each { |x|
266
+ sum += block.call(x)
267
+ }
268
+ return sum
269
+ end
270
+ end
271
+
272
+ ################
273
+ # Given an enumerable, distribute its items into two bins (arrays)
274
+ # depending on whether the block returns true
275
+ module EnumerableDistribute
276
+ def distribute(&block)
277
+ retv1 = Array.new
278
+ retv2 = Array.new
279
+ each { |x|
280
+ if block.call(x)
281
+ retv1 << x
282
+ else
283
+ retv2 << x
284
+ end
285
+ }
286
+ return [retv1, retv2]
287
+ end
288
+ end
289
+
290
+ #####################
291
+ # map with index
292
+ module MapWithIndex
293
+ def map_with_index(&block)
294
+ retv = Array.new
295
+
296
+ each_with_index { |x, index|
297
+ retv << block.call(x, index)
298
+ }
299
+
300
+ return retv
301
+ end
302
+ end
303
+
304
+ # include new Mixins into array already.
305
+ # for other classes, do this when requiring StandardPkgExtensions
306
+ class Array
307
+ include EnumerableBool
308
+ include EnumerableDistribute
309
+ include MapWithIndex
310
+ end