frprep 0.0.1.prealpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,280 @@
1
+ # RosyFeaturize
2
+ # KE, SP April 05
3
+ #
4
+ # One of the main task modules of Rosy:
5
+ # featurize data and store it in the database
6
+
7
+ # Salsa packages
8
+ require "common/SynInterfaces"
9
+ require "common/ruby_class_extensions"
10
+
11
+ # Frprep packages
12
+ require "common/FrPrepConfigData"
13
+
14
+ # Rosy packages
15
+ require "rosy/FailedParses"
16
+ require "rosy/FeatureInfo"
17
+ require "rosy/InputData"
18
+ require "rosy/RosyConfigData"
19
+ require "common/RosyConventions"
20
+ require "rosy/RosySplit"
21
+ require "rosy/RosyTask"
22
+ require "rosy/RosyTrainingTestTable"
23
+ require "rosy/View"
24
+
25
+ class RosyFeaturize < RosyTask
26
+
27
+ def initialize(exp, # RosyConfigData object: experiment description
28
+ opts, # hash: runtime argument option (string) -> value (string)
29
+ ttt_obj) # RosyTrainingTestTable object
30
+
31
+ ##
32
+ # remember the experiment description
33
+
34
+ @exp = exp
35
+ @ttt_obj = ttt_obj
36
+
37
+ ##
38
+ # check runtime options
39
+ if $ENDUSER_MODE
40
+ @dataset = "test"
41
+ else
42
+ @dataset = nil
43
+ end
44
+ @testID = default_test_ID()
45
+ @splitID = nil
46
+ @append_rather_than_overwrite = false
47
+
48
+ opts.each do |opt,arg|
49
+ case opt
50
+ when "--dataset"
51
+ unless ["train", "test"].include? arg
52
+ raise "--dataset needs to be either 'train' or 'test'"
53
+ end
54
+ @dataset = arg
55
+ when "--logID"
56
+ @splitID = arg
57
+ when "--testID"
58
+ @testID = arg
59
+ when "--append"
60
+ @append_rather_than_overwrite = true
61
+ else
62
+ # this is an option that is okay but has already been read and used by rosy.rb
63
+ end
64
+ end
65
+
66
+ # further sanity checks
67
+ if @dataset.nil? and @splitID.nil?
68
+ $stderr.puts "I need either a dataset ('train' or 'test', option --dataset) or a splitID (option --logID) in the command line."
69
+ exit 1
70
+ end
71
+
72
+ #####
73
+ # Enduser mode: featurization only of test data
74
+ in_enduser_mode_ensure(@dataset == "test")
75
+ in_enduser_mode_ensure(@append_rather_than_overwrite == false)
76
+
77
+ # announce the task
78
+ $stderr.puts "---------"
79
+ $stderr.puts "Rosy experiment #{@exp.get("experiment_ID")}: Featurization of dataset #{@dataset}"
80
+ $stderr.puts "---------"
81
+
82
+ ##
83
+ # add preprocessing information to the experiment file object
84
+ if @dataset
85
+ preproc_parameter = "preproc_descr_file_" + @dataset
86
+ else
87
+ # split data
88
+ preproc_parameter = "preproc_descr_file_train"
89
+ end
90
+ preproc_expname = @exp.get(preproc_parameter)
91
+ if not(preproc_expname)
92
+ $stderr.puts "Please set the name of the preprocessing exp. file name"
93
+ $stderr.puts "in the experiment file, parameter #{preproc_parameter}"
94
+ exit 1
95
+ elsif not(File.readable?(preproc_expname))
96
+ $stderr.puts "Error in the experiment file:"
97
+ $stderr.puts "Parameter #{preproc_parameter} has to be a readable file."
98
+ exit 1
99
+ end
100
+ preproc_exp = FrPrepConfigData.new(preproc_expname)
101
+ @exp.adjoin(preproc_exp)
102
+
103
+ ###
104
+ # find appropriate class for interpreting syntactic structures
105
+ @interpreter_class = SynInterfaces.get_interpreter_according_to_exp(@exp)
106
+
107
+ ###
108
+ # prepare featurization
109
+ if @dataset
110
+ unless @exp.get("directory_input_" + @dataset)
111
+ raise "Please set 'directory_input_train' and/or 'directory_input_test' in your experiment file."
112
+ end
113
+ prepare_main_featurization(File.existing_dir(@exp.get("directory_input_" + @dataset)),
114
+ @testID)
115
+ end
116
+ end
117
+
118
+ #####
119
+ # perform
120
+ #
121
+ # compute features and write them to the DB table
122
+ def perform()
123
+ if @dataset
124
+ # compute features for main or test table
125
+ perform_main_featurization()
126
+ end
127
+ end
128
+
129
+ #####################
130
+ private
131
+
132
+ ###
133
+ # prepare_main_featurization
134
+ #
135
+ # this is an auxiliary of the new() method:
136
+ # the part of the initialization that is performed
137
+ # if we start a new main/test table,
138
+ # but not if we only re-featurize the split tables
139
+ def prepare_main_featurization(datapath,# string: name of directory with SalsaTigerXML input data
140
+ testID) # string: name of this testset, or nil for no testset
141
+
142
+ # sanity check
143
+ unless datapath
144
+ raise "No input path given in the preprocessing experiment file.\n" +
145
+ "Please set 'directory_preprocessed there."
146
+ end
147
+ unless File.exists? datapath and File.directory? datapath
148
+ raise "I cannot read the input path " + datapath
149
+ end
150
+
151
+ ##
152
+ # determine features and feature formats
153
+
154
+ # create feature extraction wrapper object
155
+ @input_obj = InputData.new(@exp, @dataset, @ttt_obj.feature_info, @interpreter_class, datapath)
156
+
157
+ # zip and store input data
158
+ rosy_dir = File.new_dir(@exp.instantiate("rosy_dir",
159
+ "exp_ID" => @exp.get("experiment_ID")))
160
+ zipped_input_dir = File.new_dir(rosy_dir, "input_dir", @dataset)
161
+
162
+ unless @append_rather_than_overwrite
163
+ # remove old input data
164
+ Dir[zipped_input_dir + "*.gz"].each { |filename|
165
+ File.delete(filename)
166
+ }
167
+ end
168
+ # store new input data
169
+ Dir[datapath + "*.xml"].each { |filename|
170
+ %x{gzip -c #{filename} > #{zipped_input_dir}#{File.basename(filename)}.gz}
171
+ }
172
+
173
+ ##
174
+ # open appropriate DB table
175
+
176
+
177
+ case @dataset
178
+ when "train"
179
+ # open main table
180
+
181
+
182
+ if @append_rather_than_overwrite
183
+ # add to existing DB table
184
+ @db_table = @ttt_obj.existing_train_table()
185
+
186
+ else
187
+ # start new DB table
188
+ @db_table = @ttt_obj.new_train_table()
189
+ end
190
+
191
+ when "test"
192
+
193
+ if @append_rather_than_overwrite
194
+ # add to existing DB table
195
+ @db_table = @ttt_obj.existing_test_table(testID)
196
+
197
+ else
198
+ # start new DB table
199
+ @db_table = @ttt_obj.new_test_table(testID)
200
+
201
+ end
202
+
203
+ else
204
+ raise "Shouldn't be here"
205
+ end
206
+
207
+ end
208
+
209
+
210
+ ##########
211
+ # helper method of perform():
212
+ # the part of featurization that is performed
213
+ # if we start a new main/test table,
214
+ # but not if we only re-featurize the split tables
215
+ def perform_main_featurization()
216
+
217
+ ###########
218
+ # write state to log
219
+ log_filename =
220
+ File.new_filename(@exp.instantiate("rosy_dir",
221
+ "exp_ID" => @exp.get("experiment_ID")),
222
+ "featurize.log")
223
+
224
+ ##############
225
+ # input object, compute features for **PHASE 1*:
226
+ #
227
+ # make features for each instance:
228
+ # features that can be computed from this instance alone
229
+
230
+ `echo "[#{Time.now().to_s}] Featurize: Start phase 1 feature extraction" >> #{log_filename}`
231
+
232
+ @input_obj.each_instance_phase1 { |feature_list| # list of pairs [column_name(string), value(whatever)]
233
+
234
+ # write instance to @db_table
235
+ @db_table.insert_row(feature_list)
236
+ }
237
+
238
+ # during featurisation, an Object with info about failed parses has been created
239
+ # now get this object and store it in a file in the datadir
240
+
241
+ failed_parses_obj = @input_obj.get_failed_parses()
242
+
243
+ failed_parses_filename =
244
+ File.new_filename(@exp.instantiate("rosy_dir",
245
+ "exp_ID" => @exp.get("experiment_ID")),
246
+ @exp.instantiate("failed_file",
247
+ "exp_ID" => @exp.get("experiment_ID"),
248
+ "split_ID" => "none",
249
+ "dataset" => "none"))
250
+
251
+ failed_parses_obj.save(failed_parses_filename)
252
+
253
+ ################
254
+ # input object, compute features for **PHASE 2**:
255
+ #
256
+ # based on all features from Phase 1, make additional features
257
+
258
+ `echo "[#{Time.now().to_s}] Featurize: Start phase 2 feature extraction" >> #{log_filename}`
259
+
260
+ iterator = RosyIterator.new(@ttt_obj, @exp, @dataset,
261
+ "testID" => @testID,
262
+ "splitID" => @splitID,
263
+ "xwise" => "frame")
264
+ iterator.each_group { |dummy1, dummy2|
265
+ view = iterator.get_a_view_for_current_group("*")
266
+
267
+ @input_obj.each_phase2_column(view) { |feature_name, feature_values|
268
+ view.update_column(feature_name, feature_values)
269
+ }
270
+
271
+ view.close()
272
+ }
273
+
274
+ #########
275
+ # finished!!
276
+ #
277
+ `echo "[#{Time.now().to_s}] Featurize: Finished" >> #{log_filename}`
278
+
279
+ end
280
+ end
@@ -0,0 +1,336 @@
1
+ # RosyInspect
2
+ # KE May 05
3
+ #
4
+ # One of the main task modules of Rosy:
5
+ # inspect global data and experiment-specific data of the system
6
+
7
+ # Rosy packages
8
+ require "common/RosyConventions"
9
+ require "rosy/RosySplit"
10
+ require "rosy/RosyTask"
11
+ require "rosy/RosyTrainingTestTable"
12
+ require "rosy/View"
13
+
14
+ # Frprep packages
15
+ require "common/FrPrepConfigData"
16
+
17
+ class RosyInspect < RosyTask
18
+
19
+ def initialize(exp, # RosyConfigData object: experiment description
20
+ opts, # hash: runtime argument option (string) -> value (string)
21
+ ttt_obj) # RosyTrainingTestTable object
22
+
23
+ ##
24
+ # remember the experiment description
25
+
26
+ @exp = exp
27
+ @ttt_obj = ttt_obj
28
+
29
+ ##
30
+ # check runtime options
31
+
32
+ @tasks = Array.new
33
+ @test_id = nil
34
+
35
+ opts.each do |opt,arg|
36
+ case opt
37
+ when "--tables", "--tablecont", "--runs", "--split"
38
+ @tasks << [opt, arg]
39
+ when "--testID"
40
+ @test_id = arg
41
+ else
42
+ # this is an option that is okay but has already been read and used by rosy.rb
43
+ end
44
+ end
45
+
46
+ ##
47
+ # preprocessing information in the experiment file: doesn't seem to be needed,
48
+ # disabling for now
49
+ # ##
50
+ # # add preprocessing information to the experiment file object
51
+ # if @test_id
52
+ # # use test data
53
+ # preproc_parameter = "preproc_descr_file_test"
54
+ # else
55
+ # # use training data
56
+ # preproc_parameter = "preproc_descr_file_train"
57
+ # end
58
+ # preproc_expname = @exp.get(preproc_parameter)
59
+ # if not(preproc_expname)
60
+ # $stderr.puts "Please set the name of the preprocessing exp. file name"
61
+ # $stderr.puts "in the experiment file, parameter #{preproc_parameter}"
62
+ # exit 1
63
+ # elsif not(File.readable?(preproc_expname))
64
+ # $stderr.puts "Error in the experiment file:"
65
+ # $stderr.puts "Parameter #{preproc_parameter} has to be a readable file."
66
+ # exit 1
67
+ # end
68
+ # preproc_exp = FrPrepConfigData.new(preproc_expname)
69
+ # @exp.adjoin(preproc_exp)
70
+
71
+ # announce the task
72
+ $stderr.puts "---------"
73
+ $stderr.puts "Rosy experiment #{@exp.get("experiment_ID")}: Inspecting data."
74
+ $stderr.puts "---------"
75
+ end
76
+
77
+ #####
78
+ # perform
79
+ #
80
+ # do each of the inspection tasks set as options
81
+ def perform()
82
+ @tasks.each { |opt, arg|
83
+ case opt
84
+ when "--tables"
85
+ inspect_tables()
86
+ when "--tablecont"
87
+ inspect_tablecont(arg)
88
+ when "--runs"
89
+ inspect_runs()
90
+ when "--split"
91
+ inspect_split(arg)
92
+ end
93
+ }
94
+
95
+ if @tasks.empty?
96
+ inspect_experiment()
97
+ end
98
+ end
99
+
100
+ ################################
101
+ private
102
+
103
+ # print to stdout:
104
+ # name and column names of each table
105
+ # in this database
106
+ def inspect_tables()
107
+ puts
108
+ puts "-----------------------------------------------"
109
+ puts "List of all tables in the database"
110
+ puts "-----------------------------------------------"
111
+ puts
112
+
113
+ @ttt_obj.database.list_tables().each { | table_name|
114
+ puts "Table " + table_name
115
+ puts "\tColumns: "
116
+ print "\t"
117
+ count = 0
118
+ @ttt_obj.database.list_column_formats(table_name).each { |column_name, column_format|
119
+ count += 1
120
+ print column_name, " (", column_format, ")\t"
121
+ if count % 4 == 0
122
+ print "\n\t"
123
+ end
124
+ }
125
+ puts
126
+ puts
127
+ }
128
+ puts
129
+ end
130
+
131
+ # print to stdout:
132
+ # contents of both the training and the test table
133
+ # up to line N (if N is given)
134
+ # or contents of just the table with the given ID
135
+ def inspect_tablecont(id_numlines)
136
+
137
+ table_id = nil
138
+ num_lines = nil
139
+
140
+ if id_numlines
141
+ if id_numlines.include? ":"
142
+ # both table ID and number of lines given
143
+ parts = id_numlines.split(":")
144
+ if parts.length == 1
145
+ # only table ID given after all
146
+ table_id = parts.first
147
+ num_lines = nil
148
+ else
149
+ # both table ID and number of lines
150
+ # last part: number of lines. Rest: table ID
151
+ # (re-join in case the table ID includes a ':')
152
+ num_lines = parts.pop()
153
+ table_id = parts.join(":")
154
+ end
155
+ elsif not(id_numlines.empty?)
156
+ # only number of lines given
157
+ num_lines = id_numlines
158
+ end
159
+ end
160
+
161
+ # sanity check: existing table ID?
162
+ if table_id and not(@ttt_obj.database.list_tables().include?(table_id))
163
+ $stderr.puts "Error: I don't know a table with ID #{table_id}"
164
+ return
165
+ end
166
+
167
+ if table_id
168
+ # handle table with given table ID
169
+
170
+ puts
171
+ puts "-----------------------------------------------"
172
+ puts "Experiment " + @exp.get("experiment_ID").to_s + " table "+ table_id
173
+ puts "-----------------------------------------------"
174
+ puts
175
+
176
+ db_table = DBTable.new(@ttt_obj.database,
177
+ table_id,
178
+ "open",
179
+ "addcol_prefix" => @exp.get("classif_column_name"))
180
+
181
+ inspect_tablecont_aux(db_table, num_lines)
182
+
183
+ else
184
+
185
+ # handle training data
186
+ puts
187
+ puts "-----------------------------------------------"
188
+ puts "Experiment " + @exp.get("experiment_ID").to_s + " training data"
189
+ puts "-----------------------------------------------"
190
+ puts
191
+
192
+ if @ttt_obj.train_table_exists?
193
+ db_table = @ttt_obj.existing_train_table()
194
+ inspect_tablecont_aux(db_table, num_lines)
195
+ else
196
+ $stderr.puts "(No main table.)"
197
+ end
198
+
199
+ # handle test data
200
+ if @test_id
201
+
202
+ puts
203
+ puts "-----------------------------------------------"
204
+ puts "Experiment " + @exp.get("experiment_ID").to_s + " test data (#{@test_id})"
205
+ puts "-----------------------------------------------"
206
+ puts
207
+
208
+ if @ttt_obj.test_table_exists?(@test_id)
209
+ db_table = @ttt_obj.existing_test_table(@test_id)
210
+ inspect_tablecont_aux(db_table, num_lines)
211
+ else
212
+ $stderr.puts "(No test table #{@test_id}.)"
213
+ end
214
+ end
215
+ end
216
+ end
217
+
218
+ # auxiliary method for inspect_tablecont:
219
+ # print the actual lines
220
+ def inspect_tablecont_aux(table_obj, # DBTable object
221
+ num_lines) # integer: number of lines to read
222
+
223
+ # collect column names
224
+ column_names = @ttt_obj.database.list_column_names(table_obj.table_name)
225
+
226
+ # move "gold" column to the end
227
+ column_names.delete("gold")
228
+ column_names << "gold"
229
+
230
+ # print column names
231
+ print column_names.map { |n| "[" + n + "]" }.join(" ")
232
+ puts
233
+ puts
234
+
235
+ # select rows to print
236
+ view = DBView.new([SelectTableAndColumns.new(table_obj, column_names)],
237
+ [], # no restrictions on rows to pick
238
+ @ttt_obj.database, # database access
239
+ "gold" => "gold", # name of gold feature
240
+ "line_limit" => num_lines) # number of lines to read
241
+
242
+ # and print them
243
+ view.write_to_file($stdout)
244
+ view.close()
245
+ end
246
+
247
+ # print to stdout: all classification runs for the current experiment ID
248
+ def inspect_runs()
249
+ puts @ttt_obj.runlog_to_s()
250
+ end
251
+
252
+ # print to stdout: train, test sentence ID for given split
253
+ def inspect_split(splitID)
254
+
255
+ puts
256
+ puts "-----------------------------------------------"
257
+ puts "Split " + splitID.to_s
258
+ puts "-----------------------------------------------"
259
+ puts
260
+
261
+ ["train", "test"].each { |dataset|
262
+
263
+ puts "Dataset " + dataset
264
+ puts "==========="
265
+ puts
266
+
267
+ table = @ttt_obj.existing_split_table(splitID, dataset, RosySplit.split_index_colname())
268
+ view = DBView.new([SelectTableAndColumns.new(table, "*")], [], @ttt_obj.database)
269
+ index = 1
270
+ view.each_array { |row|
271
+ print row.join(","), " "
272
+ if index % 3 == 0
273
+ puts
274
+ end
275
+ index += 1
276
+ }
277
+ puts
278
+ }
279
+ end
280
+
281
+ def inspect_experiment()
282
+ puts "------------------------------------"
283
+ puts "Experiment #{@exp.get("experiment_ID").to_s}"
284
+ puts "------------------------------------"
285
+ puts
286
+
287
+ # main table
288
+ aux_tableinfo(@ttt_obj.maintable_name, "main table")
289
+
290
+ # test tables
291
+ @ttt_obj.testIDs.each { |testID|
292
+ aux_tableinfo(@ttt_obj.testtable_name(testID), "test table #{testID}")
293
+ }
294
+ # split tables
295
+ @ttt_obj.splitIDs.each { |splitID|
296
+ aux_tableinfo(@ttt_obj.splittable_name(splitID, "train"), "split table (training data) #{splitID}")
297
+ aux_tableinfo(@ttt_obj.splittable_name(splitID, "test"), "split table (test data) #{splitID}")
298
+ }
299
+
300
+ # features
301
+ puts "-----------------------"
302
+ puts "Features computed in this experiment:"
303
+ puts "-----------------------"
304
+
305
+ @ttt_obj.feature_names.sort.each_with_index { |feature_name, ix|
306
+ if ix % 4 == 0
307
+ puts
308
+ end
309
+ print feature_name, " "
310
+ }
311
+ puts
312
+ puts
313
+
314
+
315
+ # Runs
316
+ puts "-----------------------"
317
+ puts "Classifier runs for this experiment:"
318
+ puts "-----------------------"
319
+ puts
320
+ puts @ttt_obj.runlog_to_s()
321
+ puts
322
+ end
323
+
324
+ def aux_tableinfo(table_name, # string: name of DB table
325
+ table_descr) # string: which table is it?
326
+
327
+ puts "--------------------------"
328
+ puts table_descr
329
+ puts "--------------------------"
330
+
331
+ puts "Name: #{table_name}"
332
+ puts "Rows: #{@ttt_obj.database.num_rows(table_name)}"
333
+ puts
334
+ end
335
+
336
+ end