shalmaneser-rosy 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/rosy +14 -7
  4. data/lib/rosy/FailedParses.rb +22 -20
  5. data/lib/rosy/FeatureInfo.rb +35 -31
  6. data/lib/rosy/GfInduce.rb +132 -130
  7. data/lib/rosy/GfInduceFeature.rb +86 -68
  8. data/lib/rosy/InputData.rb +59 -55
  9. data/lib/rosy/RosyConfusability.rb +47 -40
  10. data/lib/rosy/RosyEval.rb +55 -55
  11. data/lib/rosy/RosyFeatureExtractors.rb +295 -290
  12. data/lib/rosy/RosyFeaturize.rb +54 -67
  13. data/lib/rosy/RosyInspect.rb +52 -50
  14. data/lib/rosy/RosyIterator.rb +73 -67
  15. data/lib/rosy/RosyPhase2FeatureExtractors.rb +48 -48
  16. data/lib/rosy/RosyPruning.rb +39 -31
  17. data/lib/rosy/RosyServices.rb +116 -115
  18. data/lib/rosy/RosySplit.rb +55 -53
  19. data/lib/rosy/RosyTask.rb +7 -3
  20. data/lib/rosy/RosyTest.rb +174 -191
  21. data/lib/rosy/RosyTrain.rb +46 -50
  22. data/lib/rosy/RosyTrainingTestTable.rb +101 -99
  23. data/lib/rosy/TargetsMostFrequentFrame.rb +13 -9
  24. data/lib/rosy/{AbstractFeatureAndExternal.rb → abstract_feature_extractor.rb} +22 -97
  25. data/lib/rosy/abstract_single_feature_extractor.rb +52 -0
  26. data/lib/rosy/external_feature_extractor.rb +35 -0
  27. data/lib/rosy/opt_parser.rb +231 -201
  28. data/lib/rosy/rosy.rb +63 -64
  29. data/lib/rosy/rosy_conventions.rb +66 -0
  30. data/lib/rosy/rosy_error.rb +15 -0
  31. data/lib/rosy/var_var_restriction.rb +16 -0
  32. data/lib/shalmaneser/rosy.rb +1 -0
  33. metadata +26 -19
  34. data/lib/rosy/ExternalConfigData.rb +0 -58
  35. data/lib/rosy/View.rb +0 -418
  36. data/lib/rosy/rosy_config_data.rb +0 -121
  37. data/test/frprep/test_opt_parser.rb +0 -94
  38. data/test/functional/functional_test_helper.rb +0 -58
  39. data/test/functional/test_fred.rb +0 -47
  40. data/test/functional/test_frprep.rb +0 -99
  41. data/test/functional/test_rosy.rb +0 -40
@@ -8,38 +8,44 @@
8
8
  # Pruning currently available:
9
9
  # Both Xue/Palmer original and a modified version for FrameNet
10
10
 
11
- require "common/ruby_class_extensions"
11
+ require "ruby_class_extensions"
12
12
 
13
13
  require "rosy/RosyFeatureExtractors"
14
- require "common/RosyConventions"
15
- require "rosy/rosy_config_data"
14
+ # require "RosyConventions"
15
+ require 'value_restriction'
16
+ require 'configuration/rosy_config_data'
16
17
  require "rosy/RosyIterator"
17
18
 
18
19
  ###
19
20
  # Pruning, derived from the Xue/Palmer algorithm
20
21
  #
21
22
  # implemented in the Interpreter Class of each individual parser
23
+ module Shalmaneser
24
+ module Rosy
22
25
  class PruneFeature < RosySingleFeatureExtractor
23
- PruneFeature.announce_me()
26
+ PruneFeature.announce_me
24
27
 
25
- def PruneFeature.feature_name()
26
- return "prune"
28
+ def self.feature_name
29
+ "prune"
27
30
  end
28
- def PruneFeature.sql_type()
29
- return "TINYINT"
31
+
32
+ def self.sql_type
33
+ "TINYINT"
30
34
  end
31
- def PruneFeature.feature_type()
32
- return "syn"
35
+
36
+ def self.feature_type
37
+ 'syn'
33
38
  end
34
- def PruneFeature.info()
39
+
40
+ def self.info
35
41
  # additional info: I am an index feature
36
- return super().concat(["index"])
42
+ super().concat(["index"])
37
43
  end
38
44
 
39
45
  ################
40
46
  private
41
47
 
42
- def compute_feature_instanceOK()
48
+ def compute_feature_instanceOK
43
49
  retv = @@interpreter_class.prune?(@@node, @@paths, @@terminals_ordered)
44
50
  if [0, 1].include? retv
45
51
  return retv
@@ -52,18 +58,18 @@ end
52
58
  ####################
53
59
  # HIER changeme
54
60
  class TigerPruneFeature < RosySingleFeatureExtractor
55
- TigerPruneFeature.announce_me()
61
+ TigerPruneFeature.announce_me
56
62
 
57
- def TigerPruneFeature.feature_name()
63
+ def TigerPruneFeature.feature_name
58
64
  return "tiger_prune"
59
65
  end
60
- def TigerPruneFeature.sql_type()
66
+ def TigerPruneFeature.sql_type
61
67
  return "TINYINT"
62
68
  end
63
- def TigerPruneFeature.feature_type()
69
+ def TigerPruneFeature.feature_type
64
70
  return "syn"
65
71
  end
66
- def TigerPruneFeature.info()
72
+ def TigerPruneFeature.info
67
73
  # additional info: I am an index feature
68
74
  return super().concat(["index"])
69
75
  end
@@ -71,7 +77,7 @@ class TigerPruneFeature < RosySingleFeatureExtractor
71
77
  ################
72
78
  private
73
79
 
74
- def compute_feature_instanceOK()
80
+ def compute_feature_instanceOK
75
81
  if @@changeme_tiger_include.include? @@node
76
82
  return 1
77
83
  else
@@ -84,9 +90,9 @@ end
84
90
 
85
91
 
86
92
  #######################3
87
- # Pruning:
88
- # packaging all methods that will be needed to
89
- # implement it,
93
+ # Pruning:
94
+ # packaging all methods that will be needed to
95
+ # implement it,
90
96
  # given that the xp_prune feature defined above
91
97
  # has been computed for each constituent during featurization.
92
98
  class Pruning
@@ -110,14 +116,14 @@ class Pruning
110
116
  return exp.get("prune")
111
117
  else
112
118
  return nil
113
- end
119
+ end
114
120
  end
115
121
 
116
122
  ###
117
- # make ValueRestriction according to the pruning option set in
123
+ # make ValueRestriction according to the pruning option set in
118
124
  # the experiment file:
119
125
  # WHERE <pruning_column_name> = 1
120
- # where <pruning_column_name> is the name of one of the
126
+ # where <pruning_column_name> is the name of one of the
121
127
  # pruning features defined above, the same name that has
122
128
  # been set as the value of the pruning parameter in the experiment file
123
129
  #
@@ -133,10 +139,10 @@ class Pruning
133
139
 
134
140
  ###
135
141
  # given the name of a DB table column and an iterator that
136
- # iterates over some data,
142
+ # iterates over some data,
137
143
  # assuming that the column describes some classifier run results,
138
144
  # choose all rows where the pruning column is 0 (i.e. all instances
139
- # that have been pruned away) and set the value of the given column
145
+ # that have been pruned away) and set the value of the given column
140
146
  # to noval for them all, marking them as "not assigned any role".
141
147
  def Pruning.integrate_pruning_into_run(run_column, # string: run column name
142
148
  iterator, # RosyIterator object
@@ -145,21 +151,23 @@ class Pruning
145
151
  # no pruning activated
146
152
  return
147
153
  end
148
-
154
+
149
155
  iterator.each_group { |group_descr_hash, group|
150
156
  # get a view of all instances for which prune == 0, i.e. that have been pruned away
151
157
  view = iterator.get_a_view_for_current_group(
152
- [run_column],
158
+ [run_column],
153
159
  [ValueRestriction.new(Pruning.colname(exp), 0)]
154
160
  )
155
161
  # make a list of column values that are all noval
156
- all_noval = Array.new
162
+ all_noval = []
157
163
  view.each_instance_s { |inst|
158
164
  all_noval << exp.get("noval")
159
165
  }
160
166
  # and set all selected instances to noval
161
167
  view.update_column(run_column, all_noval)
162
- view.close()
168
+ view.close
163
169
  }
164
170
  end
165
171
  end
172
+ end
173
+ end
@@ -5,25 +5,26 @@
5
5
  # remove database tables and experiments,
6
6
  # dump experiment to files and load from files
7
7
 
8
- require "common/ruby_class_extensions"
8
+ require "ruby_class_extensions"
9
9
 
10
10
  # Rosy packages
11
- require "common/RosyConventions"
11
+ require 'rosy/rosy_conventions'
12
12
  require "rosy/RosyIterator"
13
13
  require "rosy/RosySplit"
14
14
  require "rosy/RosyTask"
15
15
  require "rosy/RosyTrainingTestTable"
16
- require "rosy/View"
16
+ # require "rosy/View"
17
17
 
18
18
  # Frprep packages
19
- require "common/prep_config_data"
20
-
19
+ require 'configuration/frappe_config_data'
20
+ module Shalmaneser
21
+ module Rosy
21
22
  ###################################################
22
23
  class RosyServices < RosyTask
23
24
 
24
25
  def initialize(exp, # RosyConfigData object: experiment description
25
- opts, # hash: runtime argument option (string) -> value (string)
26
- ttt_obj) # RosyTrainingTestTable object
26
+ opts, # hash: runtime argument option (string) -> value (string)
27
+ ttt_obj) # RosyTrainingTestTable object
27
28
 
28
29
  ##
29
30
  # remember the experiment description
@@ -34,27 +35,24 @@ class RosyServices < RosyTask
34
35
  ##
35
36
  # check runtime options
36
37
 
37
- @tasks = Array.new
38
+ @tasks = []
38
39
  # defaults:
39
40
  @step = "onestep"
40
41
  @splitID = nil
41
- @testID = default_test_ID()
42
+ @testID = Rosy.default_test_ID
42
43
 
43
44
 
44
45
  opts.each do |opt,arg|
45
46
  case opt
46
47
  when "--deltable", "--delexp", "--delruns", "--delsplit", "--deltables"
47
- #####
48
- # In enduser mode, you cannot delete things
49
- in_enduser_mode_unavailable()
50
- @tasks << [opt, arg]
48
+ @tasks << [opt, arg]
51
49
  when "--dump", "--load", "--writefeatures"
52
- @tasks << [opt, arg]
50
+ @tasks << [opt, arg]
53
51
  when "--step"
54
- unless ["argrec", "arglab", "both", "onestep"].include? arg
55
- raise "Classification step must be one of: argrec, arglab, both, onestep. I got: " + arg.to_s
56
- end
57
- @step = arg
52
+ unless ["argrec", "arglab", "both", "onestep"].include? arg
53
+ raise "Classification step must be one of: argrec, arglab, both, onestep. I got: " + arg.to_s
54
+ end
55
+ @step = arg
58
56
 
59
57
  when "--logID"
60
58
  @splitID = arg
@@ -63,8 +61,8 @@ class RosyServices < RosyTask
63
61
  @testID = arg
64
62
 
65
63
  else
66
- # this is an option that is okay but has already been read and used by rosy.rb
67
- end
64
+ # this is an option that is okay but has already been read and used by rosy.rb
65
+ end
68
66
  end
69
67
  # announce the task
70
68
  $stderr.puts "---------"
@@ -76,17 +74,17 @@ class RosyServices < RosyTask
76
74
  # perform
77
75
  #
78
76
  # do each of the inspection tasks set as options
79
- def perform()
77
+ def perform
80
78
  @tasks.each { |opt, arg|
81
79
  case opt
82
80
  when "--deltable"
83
81
  del_table(arg)
84
82
  when "--deltables"
85
- del_tables()
83
+ del_tables
86
84
  when "--delexp"
87
- del_experiment()
85
+ del_experiment
88
86
  when "--delruns"
89
- del_runs()
87
+ del_runs
90
88
  when "--delsplit"
91
89
  del_split(arg)
92
90
  when "--dump"
@@ -94,14 +92,14 @@ class RosyServices < RosyTask
94
92
  when "--load"
95
93
  load_experiment(arg)
96
94
  when "--writefeatures"
97
- write_features(arg)
95
+ write_features(arg)
98
96
  end
99
97
  }
100
98
  end
101
99
 
102
100
  ################################
103
101
  private
104
-
102
+
105
103
  #####
106
104
  # del_table
107
105
  #
@@ -110,14 +108,14 @@ class RosyServices < RosyTask
110
108
  # If the user gives an answer starting in "y", the table is deleted.
111
109
  def del_table(table_name) # string: name of DB table
112
110
  # check if we have this table
113
- unless @ttt_obj.database.list_tables().include? table_name
111
+ unless @ttt_obj.database.list_tables.include? table_name
114
112
  $stderr.puts "Cannot find DB table #{table_name}."
115
113
  return
116
114
  end
117
115
 
118
116
  # really delete?
119
117
  $stderr.print "Really delete DB table #{table_name}? [y/n] "
120
- answer = gets().chomp()
118
+ answer = gets.chomp
121
119
  unless answer =~ /^y/
122
120
  return
123
121
  end
@@ -139,12 +137,12 @@ class RosyServices < RosyTask
139
137
  # for all the tables in the database, present their name and size,
140
138
  # and ask if it should be deleted.
141
139
  # this is good for cleaning up!
142
-
143
- def del_tables()
144
- @ttt_obj.database.list_tables().each { |table_name|
140
+
141
+ def del_tables
142
+ @ttt_obj.database.list_tables.each { |table_name|
145
143
 
146
144
  STDERR.print "Delete table #{table_name} (num. rows #{@ttt_obj.database.num_rows(table_name)})? [y/n] "
147
- answer = gets().chomp()
145
+ answer = gets.chomp
148
146
 
149
147
  if answer =~ /^y/
150
148
  deletion_worked = false
@@ -154,10 +152,10 @@ class RosyServices < RosyTask
154
152
  rescue
155
153
  deletion_worked = false
156
154
  end
157
- if deletion_worked
155
+ if deletion_worked
158
156
  STDERR.puts "Table #{name} removed."
159
157
  else
160
- $stderr.puts "Error: Removal of #{name} failed."
158
+ $stderr.puts "Error: Removal of #{name} failed."
161
159
  end
162
160
  end
163
161
  }
@@ -169,31 +167,31 @@ class RosyServices < RosyTask
169
167
  # remove the experiment described by the experiment file @exp
170
168
  # The method verifies whether the experiment should be deleted.
171
169
  # If the user gives an answer starting in "y", the experiment is deleted.
172
- def del_experiment()
170
+ def del_experiment
173
171
  data_dir = File.new_dir(@exp.instantiate("rosy_dir", "exp_ID" => @exp.get("experiment_ID")))
174
172
 
175
173
  # no data? then don't do anything
176
174
  if not(@ttt_obj.train_table_exists?) and
177
- @ttt_obj.testIDs().empty? and
178
- @ttt_obj.splitIDs().empty? and
175
+ @ttt_obj.testIDs.empty? and
176
+ @ttt_obj.splitIDs.empty? and
179
177
  Dir[data_dir + "*"].empty?
180
178
  $stderr.puts "No data to delete for experiment #{@exp.get("experiment_ID")}."
181
179
  # we have just made the directory data_dir by calling @exp.new_dir
182
180
  # undo that
183
181
  %x{rmdir #{data_dir}}
184
182
  return
185
- end
186
-
183
+ end
184
+
187
185
 
188
186
  # really delete?
189
187
  $stderr.print "Really delete experiment #{@exp.get("experiment_ID")}? [y/n] "
190
- answer = gets().chomp()
188
+ answer = gets.chomp
191
189
  unless answer =~ /^y/
192
190
  return
193
191
  end
194
192
 
195
193
  # remove main table
196
- @ttt_obj.remove_train_table()
194
+ @ttt_obj.remove_train_table
197
195
 
198
196
  # remove test tables
199
197
  @ttt_obj.testIDs.each { |testID|
@@ -218,17 +216,17 @@ class RosyServices < RosyTask
218
216
  # del_runs
219
217
  #
220
218
  # interactively remove runs from the current experiment
221
- def del_runs()
219
+ def del_runs
222
220
  # iterate through all tables and runs
223
- @ttt_obj.runlog_to_s_list().each { |table_descr|
221
+ @ttt_obj.runlog_to_s_list.each { |table_descr|
224
222
  unless table_descr["runlist"].empty?
225
223
  # print description of the table
226
224
  $stderr.puts table_descr["header"]
227
-
225
+
228
226
  table_descr["runlist"].each { |run_id, run_descr|
229
227
  $stderr.puts run_descr
230
228
  $stderr.puts "Delete this run? [y/n] "
231
- answer = gets().chomp()
229
+ answer = gets.chomp
232
230
  if answer =~ /^[yY]/
233
231
  @ttt_obj.delete_runlog(table_descr["table_name"], run_id)
234
232
  end
@@ -239,7 +237,7 @@ class RosyServices < RosyTask
239
237
 
240
238
  ##############
241
239
  # del_split
242
- #
240
+ #
243
241
  # remove the split with the given ID
244
242
  # from the current experiment:
245
243
  # delete split tables, remove from list of test and split tables
@@ -253,7 +251,7 @@ class RosyServices < RosyTask
253
251
 
254
252
  # really delete?
255
253
  $stderr.print "Really delete split #{splitID} of experiment #{@exp.get("experiment_ID")}? [y/n] "
256
- answer = gets().chomp()
254
+ answer = gets.chomp
257
255
  unless answer =~ /^y/
258
256
  return
259
257
  end
@@ -264,7 +262,7 @@ class RosyServices < RosyTask
264
262
 
265
263
  # remove classifiers for split
266
264
  ["argrec", "arglab", "onestep"].each { |step|
267
- classif_dir = classifier_directory_name(@exp,step, splitID)
265
+ classif_dir = Rosy::classifier_directory_name(@exp,step, splitID)
268
266
  %x{rm -rf #{classif_dir}}
269
267
  }
270
268
  end
@@ -283,7 +281,7 @@ class RosyServices < RosyTask
283
281
  dir = File.new_dir(directory)
284
282
  else
285
283
  # use the default directory: <rosy_dir>/tables
286
- dir = File.new_dir(@exp.instantiate("rosy_dir",
284
+ dir = File.new_dir(@exp.instantiate("rosy_dir",
287
285
  "exp_ID" => @exp.get("experiment_ID")),
288
286
  "your_feature_files")
289
287
  end
@@ -292,7 +290,7 @@ class RosyServices < RosyTask
292
290
  ##
293
291
  # check: if this is about a split, do we have it?
294
292
  if @splitID
295
- unless @ttt_obj.splitIDs().include?(@splitID)
293
+ unless @ttt_obj.splitIDs.include?(@splitID)
296
294
  $stderr.puts "Sorry, I have no data for split ID #{@splitID}."
297
295
  exit 1
298
296
  end
@@ -304,30 +302,30 @@ class RosyServices < RosyTask
304
302
  $stderr.puts "Writing data according to split '#{@splitID}'"
305
303
  elsif @testID
306
304
  # do we have this test set? else write only training set
307
- if @ttt_obj.testIDs().include?(@testID)
308
- $stderr.puts "Writing training data, and test data with ID '#{@testID}'"
305
+ if @ttt_obj.testIDs.include?(@testID)
306
+ $stderr.puts "Writing training data, and test data with ID '#{@testID}'"
309
307
  else
310
308
  $stderr.puts "Warning: no data for test ID '#{@testID}', writing only training data."
311
- @testID = nil
309
+ @testID = nil
312
310
  end
313
311
  end
314
-
312
+
315
313
  $stderr.puts "Writing data for classification step '#{@step}'."
316
314
  $stderr.puts
317
315
 
318
316
  ##
319
317
  # write training data
320
318
  $stderr.puts "Writing training sets"
321
- iterator = RosyIterator.new(@ttt_obj, @exp, "train",
322
- "step" => @step,
323
- "splitID" => @splitID,
324
- "prune" => true)
319
+ iterator = RosyIterator.new(@ttt_obj, @exp, "train",
320
+ "step" => @step,
321
+ "splitID" => @splitID,
322
+ "prune" => true)
325
323
 
326
324
  # get the list of relevant features,
327
- # remove the features that describe the unit by which we train,
325
+ # remove the features that describe the unit by which we train,
328
326
  # since they are going to be constant throughout the training file
329
- features = @ttt_obj.feature_info.get_model_features(@step) -
330
- iterator.get_xwise_column_names()
327
+ features = @ttt_obj.feature_info.get_model_features(@step) -
328
+ iterator.get_xwise_column_names
331
329
 
332
330
  # but add the gold feature
333
331
  unless features.include? "gold"
@@ -337,14 +335,14 @@ class RosyServices < RosyTask
337
335
 
338
336
  write_features_aux(dir, "training", @step, iterator, features)
339
337
 
340
- ##
338
+ ##
341
339
  # write test data
342
340
  if @testID
343
341
  $stderr.puts "Writing test sets"
344
342
  filename = dir + "test.data"
345
- iterator = RosyIterator.new(@ttt_obj, @exp, "test",
346
- "step" => @step,
347
- "testID" => @testID,
343
+ iterator = RosyIterator.new(@ttt_obj, @exp, "test",
344
+ "step" => @step,
345
+ "testID" => @testID,
348
346
  "splitID" => @splitID,
349
347
  "prune" => true)
350
348
  write_features_aux(dir, "test", @step, iterator, features)
@@ -354,39 +352,39 @@ class RosyServices < RosyTask
354
352
  ########
355
353
  # write_features_aux: actually do the writing
356
354
  def write_features_aux(dir, # string: directory to write to
357
- dataset, # string: training or test
358
- step, # string: argrec, arglab, onestep
359
- iterator, # RosyIterator tuned to what we're writing
360
- features) # array:string: list of features to include in views
355
+ dataset, # string: training or test
356
+ step, # string: argrec, arglab, onestep
357
+ iterator, # RosyIterator tuned to what we're writing
358
+ features) # array:string: list of features to include in views
361
359
 
362
360
  # proceed one group at a time
363
361
  iterator.each_group { |group_descr_hash, group|
364
362
  # get data for this group
365
363
  view = iterator.get_a_view_for_current_group(features)
366
-
364
+
367
365
  #filename: e.g. directory/training.Statement.data
368
- filename = dir + dataset + "." +
369
- step + "." +
370
- group.gsub(/\s/, "_") + ".data"
366
+ filename = dir + dataset + "." +
367
+ step + "." +
368
+ group.gsub(/\s/, "_") + ".data"
371
369
 
372
370
  begin
373
- file = File.new(filename, "w")
371
+ file = File.new(filename, "w")
374
372
  rescue
375
- $stderr.puts "Error: Could not write to file #{filename}, exiting."
376
- exit 1
373
+ $stderr.puts "Error: Could not write to file #{filename}, exiting."
374
+ exit 1
377
375
  end
378
376
 
379
377
  view.each_instance_s { |instance_string|
380
- # change punctuation to _PUNCT_
381
- # and change empty space to _
382
- # because otherwise some classifiers may spit
383
- file.puts prepare_output_for_classifiers(instance_string)
378
+ # change punctuation to _PUNCT_
379
+ # and change empty space to _
380
+ # because otherwise some classifiers may spit
381
+ file.puts Rosy::prepare_output_for_classifiers(instance_string)
384
382
  }
385
- file.close()
386
- view.close()
383
+ file.close
384
+ view.close
387
385
  }
388
386
  end
389
-
387
+
390
388
  ##############3
391
389
  # dump_experiment
392
390
  #
@@ -412,7 +410,7 @@ class RosyServices < RosyTask
412
410
  dir = File.new_dir(directory)
413
411
  else
414
412
  # use the default directory: <rosy_dir>/tables
415
- dir = File.new_dir(@exp.instantiate("rosy_dir",
413
+ dir = File.new_dir(@exp.instantiate("rosy_dir",
416
414
  "exp_ID" => @exp.get("experiment_ID")),
417
415
  "tables")
418
416
  end
@@ -420,7 +418,7 @@ class RosyServices < RosyTask
420
418
 
421
419
  ###
422
420
  # dump main table
423
-
421
+
424
422
  $stderr.puts "Dumping main table"
425
423
  filename = dir + "main"
426
424
  begin
@@ -432,13 +430,13 @@ class RosyServices < RosyTask
432
430
 
433
431
  if @ttt_obj.train_table_exists?
434
432
  iterator = RosyIterator.new(@ttt_obj, @exp, "train", "xwise" => "frame")
435
- table_obj = @ttt_obj.existing_train_table()
433
+ table_obj = @ttt_obj.existing_train_table
436
434
  aux_dump(iterator, file, table_obj)
437
435
  end
438
436
 
439
437
  ###
440
438
  # dump test tables
441
-
439
+
442
440
  unless @ttt_obj.testIDs.empty?
443
441
  $stderr.print "Dumping test tables: "
444
442
  end
@@ -452,7 +450,7 @@ class RosyServices < RosyTask
452
450
  $stderr.puts "Sorry, couldn't write to #{filename}"
453
451
  return
454
452
  end
455
-
453
+
456
454
  if @ttt_obj.test_table_exists?(testID)
457
455
  iterator = RosyIterator.new(@ttt_obj, @exp, "test", "testID" => testID, "xwise" => "frame")
458
456
  table_obj = @ttt_obj.existing_test_table(testID)
@@ -469,7 +467,7 @@ class RosyServices < RosyTask
469
467
  end
470
468
  @ttt_obj.splitIDs.each { |splitID|
471
469
  ["train", "test"].each { |dataset|
472
-
470
+
473
471
  filename = dir + "split." + dataset + "." + splitID
474
472
  $stderr.print filename, " "
475
473
  begin
@@ -481,7 +479,7 @@ class RosyServices < RosyTask
481
479
 
482
480
  if @ttt_obj.split_table_exists?(splitID, dataset)
483
481
  iterator = RosyIterator.new(@ttt_obj, @exp, dataset, "splitID" => splitID, "xwise" => "frame")
484
- table_obj = @ttt_obj.existing_split_table(splitID, dataset, RosySplit.split_index_colname())
482
+ table_obj = @ttt_obj.existing_split_table(splitID, dataset, RosySplit.split_index_colname)
485
483
  aux_dump(iterator, file, table_obj)
486
484
  end
487
485
  }
@@ -492,7 +490,7 @@ class RosyServices < RosyTask
492
490
 
493
491
  ###
494
492
  # dump classification run logs
495
- @ttt_obj.to_file(dir)
493
+ @ttt_obj.to_file(dir)
496
494
  end
497
495
 
498
496
  ################3
@@ -502,10 +500,10 @@ class RosyServices < RosyTask
502
500
  def aux_dump(iterator, # RosyIterator object, refers to table to write
503
501
  file, # stream: write to this file
504
502
  table_obj) # DB table to be written
505
-
503
+
506
504
  # write all columns except the autoincrement index
507
505
  # columns_to_write: array:string*string column name, column SQL type
508
- columns_to_write = Array.new()
506
+ columns_to_write = []
509
507
  @ttt_obj.database.list_column_formats(table_obj.table_name).each { |column_name, column_type|
510
508
  unless column_name == table_obj.index_name
511
509
  # check: when loading we make assumptions on the field types that can happen.
@@ -520,27 +518,27 @@ class RosyServices < RosyTask
520
518
  end
521
519
  }
522
520
  columns_as_array = columns_to_write.map { |name, type| name}
523
-
521
+
524
522
  # write column names and types
525
523
  file.puts columns_to_write.map { |name, type| name }.join(",")
526
524
  file.puts columns_to_write.map { |name, type| type }.join(",")
527
-
525
+
528
526
  # access groups and write data
529
-
527
+
530
528
  iterator.each_group { |hash, framename|
531
529
  view = iterator.get_a_view_for_current_group(columns_as_array)
532
530
 
533
531
  # write instances
534
532
  view.each_hash { |instance|
535
- file.puts columns_to_write.map { |name, type|
533
+ file.puts columns_to_write.map { |name, type|
536
534
  # get column entries in order of column names
537
- instance[name]
535
+ instance[name]
538
536
  }.map { |entry|
539
537
  # remove commas
540
- entry.to_s.gsub(/,/, "COMMA")
538
+ entry.to_s.gsub(/,/, "COMMA")
541
539
  }.join(",")
542
540
  }
543
- view.close()
541
+ view.close
544
542
  }
545
543
  end
546
544
 
@@ -567,7 +565,7 @@ class RosyServices < RosyTask
567
565
  $stderr.puts "Load experiment data from files into the current experiment:"
568
566
  $stderr.puts "This will overwrite existing data of experiment #{@exp.get("experiment_ID")}."
569
567
  $stderr.print "Proceed? [y/n] "
570
- answer = gets().chomp()
568
+ answer = gets.chomp
571
569
  unless answer =~ /^y/
572
570
  return
573
571
  end
@@ -586,7 +584,8 @@ class RosyServices < RosyTask
586
584
  $stderr.puts "Parameter preproc_descr_file_train has to be a readable file."
587
585
  exit 1
588
586
  end
589
- preproc_exp = FrPrepConfigData.new(preproc_expname)
587
+ # @note Remove this dependency.
588
+ preproc_exp = ::Shalmaneser::Configuration::FrappeConfigData.new(preproc_expname)
590
589
  @exp.adjoin(preproc_exp)
591
590
 
592
591
  ###
@@ -597,8 +596,8 @@ class RosyServices < RosyTask
597
596
  dir = File.existing_dir(directory)
598
597
  else
599
598
  # default: <rosy_dir>/tables
600
- dir = File.existing_dir(@exp.instantiate("rosy_dir",
601
- "exp_ID" => @exp.get("experiment_ID")),
599
+ dir = File.existing_dir(@exp.instantiate("rosy_dir",
600
+ "exp_ID" => @exp.get("experiment_ID")),
602
601
  "tables")
603
602
  end
604
603
  $stderr.puts "Reading experiment data from directory " + dir
@@ -639,13 +638,13 @@ class RosyServices < RosyTask
639
638
 
640
639
  file = File.new(dir + filename)
641
640
  col_names, col_types = aux_read_colnames(file, nil)
642
- table_obj = @ttt_obj.new_split_table(splitID, dataset, RosySplit.split_index_colname())
641
+ table_obj = @ttt_obj.new_split_table(splitID, dataset, RosySplit.split_index_colname)
643
642
  # write file contents to the DB table
644
643
  aux_transfer_to_table(file, table_obj, col_names, col_types)
645
644
 
646
645
  else
647
646
  # not a filename we recognize
648
- # don't do anything with it
647
+ # don't do anything with it
649
648
  end
650
649
  }
651
650
 
@@ -672,11 +671,11 @@ class RosyServices < RosyTask
672
671
  # sanity check: features here the same as in the experiment file?
673
672
  if exp_colnames
674
673
  feature_colnames = colnames.select { |c| c !~ /^#{@exp.get("classif_column_name")}/ }
675
- unless feature_colnames.sort() == exp_colnames.sort()
674
+ unless feature_colnames.sort == exp_colnames.sort
676
675
  raise "Feature name mismatch!\nIn the experiment file, you have specified:\n" +
677
- exp_colnames.sort().join(",") +
676
+ exp_colnames.sort.join(",") +
678
677
  "\nIn the table I'm reading from file I got:\n" +
679
- feature_colnames.sort().join(",")
678
+ feature_colnames.sort.join(",")
680
679
  end
681
680
  else
682
681
  # no check of column name match requested
@@ -684,16 +683,16 @@ class RosyServices < RosyTask
684
683
  coltypes = aux_read_columns(file)
685
684
  return [colnames, coltypes]
686
685
  end
687
-
686
+
688
687
 
689
688
  ##
690
689
  # aux_transfer_columns
691
- #
690
+ #
692
691
  # auxiliary method for load_experiment:
693
692
  # read a line from file, split it at commas
694
693
  # to arrive at the contents
695
694
  def aux_read_columns(file) # stream: file
696
- line = file.gets()
695
+ line = file.gets
697
696
  if line.nil?
698
697
  return nil
699
698
  end
@@ -724,12 +723,12 @@ class RosyServices < RosyTask
724
723
  }
725
724
 
726
725
  # write file contents to the DB table
727
- names_and_values = Array.new
726
+ names_and_values = []
728
727
  while row = aux_read_columns(file)
729
- names_and_values.clear()
728
+ names_and_values.clear
730
729
  col_names.each_with_index { |name, ix|
731
730
  unless row[ix].nil?
732
- if col_types[ix] =~ /^(TINYINT|tinyint)/
731
+ if col_types[ix] =~ /^(TINYINT|tinyint)/
733
732
  # integer value: map!
734
733
  names_and_values << [name, row[ix].to_i]
735
734
  else
@@ -742,3 +741,5 @@ class RosyServices < RosyTask
742
741
  end
743
742
  end
744
743
  end
744
+ end
745
+ end