shalmaneser-prep 1.2.0.rc4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,22 @@
1
+ # AB: 2013-12-25
2
+ class BerkeleyInterpreter < Tiger
3
+ BerkeleyInterpreter.announce_me
4
+
5
+ ###
6
+ # names of the systems interpreted by this class:
7
+ # returns a hash service(string) -> system name (string),
8
+ # e.g.
9
+ # { "parser" => "collins", "lemmatizer" => "treetagger" }
10
+ def self.systems
11
+ {"parser" => "berkeley"}
12
+ end
13
+
14
+ ###
15
+ # names of additional systems that may be interpreted by this class
16
+ # returns a hash service(string) -> system name(string)
17
+ # same as names()
18
+ def self.optional_systems
19
+ {"lemmatizer" => "treetagger", 'pos_tagger' => 'treetagger'}
20
+ end
21
+
22
+ end
@@ -0,0 +1,22 @@
1
+ # AB: 2013-12-25
2
+ class StanfordInterpreter < Tiger
3
+ StanfordInterpreter.announce_me
4
+
5
+ ###
6
+ # names of the systems interpreted by this class:
7
+ # returns a hash service(string) -> system name (string),
8
+ # e.g.
9
+ # { "parser" => "collins", "lemmatizer" => "treetagger" }
10
+ def self.systems
11
+ {"parser" => "stanford"}
12
+ end
13
+
14
+ ###
15
+ # names of additional systems that may be interpreted by this class
16
+ # returns a hash service(string) -> system name(string)
17
+ # same as names()
18
+ def self.optional_systems
19
+ {"lemmatizer" => "treetagger", 'pos_tagger' => 'treetagger'}
20
+ end
21
+
22
+ end
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ # AB, 2010-11-25
3
+
4
+
5
+ ##############################
6
+ # class for managing the parses of one file
7
+ class OneParsedFile
8
+ attr_reader :filename
9
+
10
+ def initialize(filename, # string: core of filename for the parse file
11
+ complete_filename, # string: complete filename of parse file
12
+ obj_with_iterator) # object with each_sentence method, see above
13
+ @obj_with_iterator = obj_with_iterator
14
+ @filename = filename
15
+ @complete_filename = complete_filename
16
+ end
17
+
18
+ # yield each parse sentence as a tuple
19
+ # [ salsa/tiger xml sentence, tab format sentence, mapping]
20
+ # of a SalsaTigerSentence object, a FNTabSentence object,
21
+ # and a hash: FNTab sentence lineno(integer) -> array:SynNode
22
+ # pointing each tab word to one or more SalsaTigerSentence terminals
23
+ def each_sentence()
24
+ @obj_with_iterator.each_sentence(@complete_filename) { |st_sent, tab_sent, mapping|
25
+ yield [st_sent, tab_sent, mapping]
26
+ }
27
+ end
28
+ end
@@ -0,0 +1,94 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ # AB, 2010-11-25
4
+
5
+ require 'optparse'
6
+ require 'common/prep_config_data'
7
+ require 'common/SynInterfaces'
8
+ module FrPrep
9
+
10
+ # This class parses options for FrPrep.
11
+ class OptParser
12
+
13
+ # Main class method.
14
+ # OP expects cmd_args to be an array like ARGV.
15
+ def self.parse(cmd_args)
16
+ @prg_name = 'frprep'
17
+ @@options = {}
18
+
19
+ parser = create_parser
20
+
21
+ # If no options provided print the help.
22
+ if cmd_args.empty?
23
+ $stderr.puts('You have to provide some options.',
24
+ "Please start with <#{@prg_name} --help>.")
25
+ exit(1)
26
+ end
27
+
28
+ # Parse ARGV and provide the options hash.
29
+ # Check if everything is correct and handle exceptions
30
+ begin
31
+ parser.parse(cmd_args)
32
+ rescue OptionParser::InvalidArgument => e
33
+ arg = e.message.split.last
34
+ $stderr.puts "The provided argument #{arg} is currently not supported!"
35
+ $stderr.puts "Please colsult <#{@prg_name} --help>."
36
+ exit(1)
37
+ rescue OptionParser::InvalidOption => e
38
+ $stderr.puts "You have provided an #{e.message}."
39
+ $stderr.puts "Please colsult <#{@prg_name} --help>."
40
+ exit(1)
41
+ rescue
42
+ raise
43
+ end
44
+
45
+
46
+ exp = FrPrepConfigData.new(@@options[:exp_file])
47
+
48
+ # AB: this stuff should be move into FrPrepConfigData.
49
+ # sanity checks
50
+ unless exp.get("prep_experiment_ID") =~ /^[A-Za-z0-9_]+$/
51
+ raise "Please choose an experiment ID consisting only of the letters A-Za-z0-9_."
52
+ end
53
+
54
+ SynInterfaces.check_interfaces_abort_if_missing(exp)
55
+
56
+ exp
57
+ end
58
+
59
+ private
60
+ def self.create_parser
61
+ OptionParser.new do |opts|
62
+ opts.banner = <<STOP
63
+ Fred Preprocessor <FrPrep>. Preprocessing stage before Fred and Rosy
64
+ for further frame/word sense assignment and semantic role assignment.
65
+
66
+ Usage: frprep -h|-e FILENAME'
67
+ STOP
68
+ opts.separator ''
69
+ opts.separator 'Program specific options:'
70
+
71
+ opts.on('-e', '--expfile FILENAME',
72
+ 'Provide the path to an experiment file.',
73
+ 'FrPrep will preprocess data according to the specifications',
74
+ 'given in your experiment file.',
75
+ 'This option is required!',
76
+ 'Also consider the documentation on format and features.'
77
+ ) do |exp_file|
78
+ @@options[:exp_file] = File.expand_path(exp_file)
79
+ end
80
+
81
+ opts.separator ''
82
+ opts.separator 'Common options:'
83
+
84
+ opts.on_tail('-h', '--help', 'Show this help message.') do
85
+ puts opts
86
+ exit
87
+ end
88
+
89
+ end
90
+
91
+ end # def self.parse
92
+
93
+ end # class OptParser
94
+ end # module FrPrep
@@ -0,0 +1,310 @@
1
+ # Katrin Erk Oct 05
2
+ #
3
+ # useful extensions to standard classes
4
+
5
+ require 'fileutils'
6
+
7
+ class String
8
+ def startswith(other_string)
9
+ self[0..other_string.length() - 1] == other_string
10
+ end
11
+
12
+ def endswith(other_string)
13
+ not(other_string.length() > self.length()) and
14
+ self[self.length() - other_string.length()..-1] == other_string
15
+ end
16
+ end
17
+
18
+ class File
19
+ ########
20
+ # check whether a given path exists,
21
+ # and if it doesn't, make sure it is created.
22
+ #
23
+ # piece together the strings in 'pieces' to make the path,
24
+ # appending "/" to all strings if necessary
25
+ #
26
+ # returns: the path pieced together
27
+ def File.new_dir(*pieces) # strings, to be pieced together
28
+
29
+ dir_path, dummy = File.make_path(pieces, true)
30
+ unless File.exists? dir_path
31
+ FileUtils.mkdir_p dir_path
32
+ end
33
+ # check that all went well in creating the directory)
34
+ File.existing_dir(dir_path)
35
+
36
+ return dir_path
37
+ end
38
+
39
+ ########
40
+ # same as new_dir, but last piece is a filename
41
+ def File.new_filename(*pieces)
42
+ dir_path, whole_path = File.make_path(pieces, false)
43
+ unless File.exists? dir_path
44
+ FileUtils.mkdir_p dir_path
45
+ end
46
+ # check that all went well in creating the directory)
47
+ File.existing_dir(dir_path)
48
+
49
+ return whole_path
50
+ end
51
+
52
+
53
+ #####
54
+ # check whether a given path exists,
55
+ # and report failure of it does not exist.
56
+ #
57
+ # piece together the strings in 'pieces' to make the path,
58
+ # appending "/" to all strings if necessary
59
+ #
60
+ # returns: the path pieced together
61
+ def File.existing_dir(*pieces) # strings
62
+
63
+ dir_path, dummy = File.make_path(pieces, true)
64
+
65
+ unless File.exists? dir_path and File.directory? dir_path
66
+ $stderr.puts "Error: Directory #{dir_path} doesn't exist. Exiting."
67
+ exit(1)
68
+ end
69
+ unless File.executable? dir_path
70
+ $stderr.puts "Error: Cannot access directory #{dir_path}. Exiting."
71
+ exit(1)
72
+ end
73
+
74
+ return dir_path
75
+ end
76
+
77
+ ####
78
+ # like existing_dir, but last bit is filename
79
+ def File.existing_filename(*pieces) # strings
80
+
81
+ dir_path, whole_path = File.make_path(pieces, false)
82
+
83
+ unless File.exists? dir_path and File.directory? dir_path
84
+ $stderr.puts "Error: Directory #{dir_path} doesn't exist. Exiting"
85
+ exit(1)
86
+ end
87
+ unless File.executable? dir_path
88
+ $stderr.puts "Error: Cannot access directory #{dir_path}. Exiting."
89
+ exit(1)
90
+ end
91
+
92
+ return whole_path
93
+ end
94
+
95
+ ####
96
+ # piece together the strings in 'pieces' to make a path,
97
+ # appending "/" to all but the last string if necessary
98
+ #
99
+ # if 'pieces' is already a string, take that as a one-piece path
100
+ #
101
+ # if dir is true, also append "/" to the last piece of the string
102
+ #
103
+ # the resulting path is expanded: For example, initial
104
+ # ~ is expanded to the setting of $HOME
105
+ #
106
+ # returns: pair of strings (directory_part, whole_path)
107
+ #
108
+ def File.make_path(pieces, # string or array:string
109
+ is_dir = false) # Boolean: is the path a directory?
110
+
111
+ if pieces.kind_of? String
112
+ pieces = [ pieces ]
113
+ end
114
+
115
+ dir = ""
116
+ # iterate over all but the filename
117
+ if is_dir
118
+ last_dir_index = -1
119
+ else
120
+ last_dir_index = -2
121
+ end
122
+ pieces[0..last_dir_index].each { |piece|
123
+ if piece.nil?
124
+ # whoops, nil entry in name of path!
125
+ $stderr.puts "File.make_path ERROR: nil for piece of path name."
126
+ next
127
+ end
128
+ if piece =~ /\/$/
129
+ dir << piece
130
+ else
131
+ dir << piece << "/"
132
+ end
133
+ }
134
+ dir = File.expand_path(dir)
135
+ # expand_path removes the final "/" again
136
+ unless dir =~ /\/$/
137
+ dir = dir + "/"
138
+ end
139
+
140
+ if is_dir
141
+ return [dir, dir]
142
+ else
143
+ return [dir, dir + pieces[-1]]
144
+ end
145
+ end
146
+
147
+ end
148
+
149
+ #############################################
150
+ class Array
151
+
152
+ ###
153
+ # interleave N arrays:
154
+ # given arrays [a1... an], [b1,...,bn], ..[z1, ...,zn]
155
+ # return [[a1,b1, .., z1]...,[an,bn, .., zn]]
156
+ #
157
+ # if one array is longer than the other,
158
+ # e.g. [a1...an], [b1,...,bm] with n> m
159
+ # the result is
160
+ # [[a1,b1],...[am, bm], [am+1, nil], ..., [an, nil]]
161
+ # and analogously for m>n
162
+ def interleave(*arrays)
163
+ len = [length(), arrays.map { |a| a.length() }.max()].max()
164
+ (0..len-1).to_a.map { |ix|
165
+ [at(ix)] + arrays.map { |a| a[ix] }
166
+ }
167
+ end
168
+
169
+ ###
170
+ # prepend: prepend element to array
171
+ # because I can never remember which is 'shift'
172
+ # and which is 'unshift'
173
+ def prepend(element)
174
+ unshift(element)
175
+ end
176
+
177
+ ###
178
+ # count the number of occurrences of element in this array
179
+ def count(element)
180
+ num = 0
181
+ each { |my_element|
182
+ if my_element == element
183
+ num += 1
184
+ end
185
+ }
186
+ return num
187
+ end
188
+
189
+ ###
190
+ # count the number of occurrences of
191
+ # elements from list in this array
192
+ def counts(list)
193
+ num = 0
194
+ each { |my_element|
195
+ if list.include? my_element
196
+ num += 1
197
+ end
198
+ }
199
+ return num
200
+ end
201
+
202
+ ###
203
+ # draw a random sample of size N
204
+ # from this array
205
+ def sample(size)
206
+ if size < 0
207
+ return nil
208
+ elsif size == 0
209
+ return []
210
+ elsif size >= length()
211
+ return self.clone()
212
+ end
213
+
214
+ rank = Hash.new()
215
+ each { |my_element|
216
+ rank[my_element] = rand()
217
+ }
218
+ return self.sort { |a, b| rank[a] <=> rank[b] }[0..size-1]
219
+ end
220
+ end
221
+
222
+ class Float
223
+ ###
224
+ # round a float to the given number of decimal points
225
+ def round_to_decpts(n)
226
+ if self.nan?
227
+ return self
228
+ else
229
+ return (self * 10**n).round.to_f / 10**n
230
+ end
231
+ end
232
+ end
233
+
234
+ ################
235
+ module EnumerableBool
236
+ ###
237
+ # And_{x \in X} block(x)
238
+ def big_and(&block)
239
+ each { |x|
240
+ unless block.call(x)
241
+ return false
242
+ end
243
+ }
244
+ return true
245
+ end
246
+
247
+ ###
248
+ # Or_{x \in X} block(x)
249
+ def big_or(&block)
250
+ each { |x|
251
+ if block.call(x)
252
+ return true
253
+ end
254
+ }
255
+ return false
256
+ end
257
+
258
+ ###
259
+ # Sum_{x \in X} block(x)
260
+ def big_sum(init = 0, &block)
261
+ sum = init
262
+ unless block_given?
263
+ block = Proc.new { |x| x}
264
+ end
265
+ each { |x|
266
+ sum += block.call(x)
267
+ }
268
+ return sum
269
+ end
270
+ end
271
+
272
+ ################
273
+ # Given an enumerable, distribute its items into two bins (arrays)
274
+ # depending on whether the block returns true
275
+ module EnumerableDistribute
276
+ def distribute(&block)
277
+ retv1 = Array.new
278
+ retv2 = Array.new
279
+ each { |x|
280
+ if block.call(x)
281
+ retv1 << x
282
+ else
283
+ retv2 << x
284
+ end
285
+ }
286
+ return [retv1, retv2]
287
+ end
288
+ end
289
+
290
+ #####################
291
+ # map with index
292
+ module MapWithIndex
293
+ def map_with_index(&block)
294
+ retv = Array.new
295
+
296
+ each_with_index { |x, index|
297
+ retv << block.call(x, index)
298
+ }
299
+
300
+ return retv
301
+ end
302
+ end
303
+
304
+ # include new Mixins into array already.
305
+ # for other classes, do this when requiring StandardPkgExtensions
306
+ class Array
307
+ include EnumerableBool
308
+ include EnumerableDistribute
309
+ include MapWithIndex
310
+ end