frprep 0.0.1.prealpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,280 @@
1
+ # RosyFeaturize
2
+ # KE, SP April 05
3
+ #
4
+ # One of the main task modules of Rosy:
5
+ # featurize data and store it in the database
6
+
7
+ # Salsa packages
8
+ require "common/SynInterfaces"
9
+ require "common/ruby_class_extensions"
10
+
11
+ # Frprep packages
12
+ require "common/FrPrepConfigData"
13
+
14
+ # Rosy packages
15
+ require "rosy/FailedParses"
16
+ require "rosy/FeatureInfo"
17
+ require "rosy/InputData"
18
+ require "rosy/RosyConfigData"
19
+ require "common/RosyConventions"
20
+ require "rosy/RosySplit"
21
+ require "rosy/RosyTask"
22
+ require "rosy/RosyTrainingTestTable"
23
+ require "rosy/View"
24
+
25
+ class RosyFeaturize < RosyTask
26
+
27
+ def initialize(exp, # RosyConfigData object: experiment description
28
+ opts, # hash: runtime argument option (string) -> value (string)
29
+ ttt_obj) # RosyTrainingTestTable object
30
+
31
+ ##
32
+ # remember the experiment description
33
+
34
+ @exp = exp
35
+ @ttt_obj = ttt_obj
36
+
37
+ ##
38
+ # check runtime options
39
+ if $ENDUSER_MODE
40
+ @dataset = "test"
41
+ else
42
+ @dataset = nil
43
+ end
44
+ @testID = default_test_ID()
45
+ @splitID = nil
46
+ @append_rather_than_overwrite = false
47
+
48
+ opts.each do |opt,arg|
49
+ case opt
50
+ when "--dataset"
51
+ unless ["train", "test"].include? arg
52
+ raise "--dataset needs to be either 'train' or 'test'"
53
+ end
54
+ @dataset = arg
55
+ when "--logID"
56
+ @splitID = arg
57
+ when "--testID"
58
+ @testID = arg
59
+ when "--append"
60
+ @append_rather_than_overwrite = true
61
+ else
62
+ # this is an option that is okay but has already been read and used by rosy.rb
63
+ end
64
+ end
65
+
66
+ # further sanity checks
67
+ if @dataset.nil? and @splitID.nil?
68
+ $stderr.puts "I need either a dataset ('train' or 'test', option --dataset) or a splitID (option --logID) in the command line."
69
+ exit 1
70
+ end
71
+
72
+ #####
73
+ # Enduser mode: featurization only of test data
74
+ in_enduser_mode_ensure(@dataset == "test")
75
+ in_enduser_mode_ensure(@append_rather_than_overwrite == false)
76
+
77
+ # announce the task
78
+ $stderr.puts "---------"
79
+ $stderr.puts "Rosy experiment #{@exp.get("experiment_ID")}: Featurization of dataset #{@dataset}"
80
+ $stderr.puts "---------"
81
+
82
+ ##
83
+ # add preprocessing information to the experiment file object
84
+ if @dataset
85
+ preproc_parameter = "preproc_descr_file_" + @dataset
86
+ else
87
+ # split data
88
+ preproc_parameter = "preproc_descr_file_train"
89
+ end
90
+ preproc_expname = @exp.get(preproc_parameter)
91
+ if not(preproc_expname)
92
+ $stderr.puts "Please set the name of the preprocessing exp. file name"
93
+ $stderr.puts "in the experiment file, parameter #{preproc_parameter}"
94
+ exit 1
95
+ elsif not(File.readable?(preproc_expname))
96
+ $stderr.puts "Error in the experiment file:"
97
+ $stderr.puts "Parameter #{preproc_parameter} has to be a readable file."
98
+ exit 1
99
+ end
100
+ preproc_exp = FrPrepConfigData.new(preproc_expname)
101
+ @exp.adjoin(preproc_exp)
102
+
103
+ ###
104
+ # find appropriate class for interpreting syntactic structures
105
+ @interpreter_class = SynInterfaces.get_interpreter_according_to_exp(@exp)
106
+
107
+ ###
108
+ # prepare featurization
109
+ if @dataset
110
+ unless @exp.get("directory_input_" + @dataset)
111
+ raise "Please set 'directory_input_train' and/or 'directory_input_test' in your experiment file."
112
+ end
113
+ prepare_main_featurization(File.existing_dir(@exp.get("directory_input_" + @dataset)),
114
+ @testID)
115
+ end
116
+ end
117
+
118
+ #####
119
+ # perform
120
+ #
121
+ # compute features and write them to the DB table
122
+ def perform()
123
+ if @dataset
124
+ # compute features for main or test table
125
+ perform_main_featurization()
126
+ end
127
+ end
128
+
129
+ #####################
130
+ private
131
+
132
+ ###
133
+ # prepare_main_featurization
134
+ #
135
+ # this is an auxiliary of the new() method:
136
+ # the part of the initialization that is performed
137
+ # if we start a new main/test table,
138
+ # but not if we only re-featurize the split tables
139
+ def prepare_main_featurization(datapath,# string: name of directory with SalsaTigerXML input data
140
+ testID) # string: name of this testset, or nil for no testset
141
+
142
+ # sanity check
143
+ unless datapath
144
+ raise "No input path given in the preprocessing experiment file.\n" +
145
+ "Please set 'directory_preprocessed there."
146
+ end
147
+ unless File.exists? datapath and File.directory? datapath
148
+ raise "I cannot read the input path " + datapath
149
+ end
150
+
151
+ ##
152
+ # determine features and feature formats
153
+
154
+ # create feature extraction wrapper object
155
+ @input_obj = InputData.new(@exp, @dataset, @ttt_obj.feature_info, @interpreter_class, datapath)
156
+
157
+ # zip and store input data
158
+ rosy_dir = File.new_dir(@exp.instantiate("rosy_dir",
159
+ "exp_ID" => @exp.get("experiment_ID")))
160
+ zipped_input_dir = File.new_dir(rosy_dir, "input_dir", @dataset)
161
+
162
+ unless @append_rather_than_overwrite
163
+ # remove old input data
164
+ Dir[zipped_input_dir + "*.gz"].each { |filename|
165
+ File.delete(filename)
166
+ }
167
+ end
168
+ # store new input data
169
+ Dir[datapath + "*.xml"].each { |filename|
170
+ %x{gzip -c #{filename} > #{zipped_input_dir}#{File.basename(filename)}.gz}
171
+ }
172
+
173
+ ##
174
+ # open appropriate DB table
175
+
176
+
177
+ case @dataset
178
+ when "train"
179
+ # open main table
180
+
181
+
182
+ if @append_rather_than_overwrite
183
+ # add to existing DB table
184
+ @db_table = @ttt_obj.existing_train_table()
185
+
186
+ else
187
+ # start new DB table
188
+ @db_table = @ttt_obj.new_train_table()
189
+ end
190
+
191
+ when "test"
192
+
193
+ if @append_rather_than_overwrite
194
+ # add to existing DB table
195
+ @db_table = @ttt_obj.existing_test_table(testID)
196
+
197
+ else
198
+ # start new DB table
199
+ @db_table = @ttt_obj.new_test_table(testID)
200
+
201
+ end
202
+
203
+ else
204
+ raise "Shouldn't be here"
205
+ end
206
+
207
+ end
208
+
209
+
210
+ ##########
211
+ # helper method of perform():
212
+ # the part of featurization that is performed
213
+ # if we start a new main/test table,
214
+ # but not if we only re-featurize the split tables
215
+ def perform_main_featurization()
216
+
217
+ ###########
218
+ # write state to log
219
+ log_filename =
220
+ File.new_filename(@exp.instantiate("rosy_dir",
221
+ "exp_ID" => @exp.get("experiment_ID")),
222
+ "featurize.log")
223
+
224
+ ##############
225
+ # input object, compute features for **PHASE 1*:
226
+ #
227
+ # make features for each instance:
228
+ # features that can be computed from this instance alone
229
+
230
+ `echo "[#{Time.now().to_s}] Featurize: Start phase 1 feature extraction" >> #{log_filename}`
231
+
232
+ @input_obj.each_instance_phase1 { |feature_list| # list of pairs [column_name(string), value(whatever)]
233
+
234
+ # write instance to @db_table
235
+ @db_table.insert_row(feature_list)
236
+ }
237
+
238
+ # during featurisation, an Object with info about failed parses has been created
239
+ # now get this object and store it in a file in the datadir
240
+
241
+ failed_parses_obj = @input_obj.get_failed_parses()
242
+
243
+ failed_parses_filename =
244
+ File.new_filename(@exp.instantiate("rosy_dir",
245
+ "exp_ID" => @exp.get("experiment_ID")),
246
+ @exp.instantiate("failed_file",
247
+ "exp_ID" => @exp.get("experiment_ID"),
248
+ "split_ID" => "none",
249
+ "dataset" => "none"))
250
+
251
+ failed_parses_obj.save(failed_parses_filename)
252
+
253
+ ################
254
+ # input object, compute features for **PHASE 2**:
255
+ #
256
+ # based on all features from Phase 1, make additional features
257
+
258
+ `echo "[#{Time.now().to_s}] Featurize: Start phase 2 feature extraction" >> #{log_filename}`
259
+
260
+ iterator = RosyIterator.new(@ttt_obj, @exp, @dataset,
261
+ "testID" => @testID,
262
+ "splitID" => @splitID,
263
+ "xwise" => "frame")
264
+ iterator.each_group { |dummy1, dummy2|
265
+ view = iterator.get_a_view_for_current_group("*")
266
+
267
+ @input_obj.each_phase2_column(view) { |feature_name, feature_values|
268
+ view.update_column(feature_name, feature_values)
269
+ }
270
+
271
+ view.close()
272
+ }
273
+
274
+ #########
275
+ # finished!!
276
+ #
277
+ `echo "[#{Time.now().to_s}] Featurize: Finished" >> #{log_filename}`
278
+
279
+ end
280
+ end
@@ -0,0 +1,336 @@
1
+ # RosyInspect
2
+ # KE May 05
3
+ #
4
+ # One of the main task modules of Rosy:
5
+ # inspect global data and experiment-specific data of the system
6
+
7
+ # Rosy packages
8
+ require "common/RosyConventions"
9
+ require "rosy/RosySplit"
10
+ require "rosy/RosyTask"
11
+ require "rosy/RosyTrainingTestTable"
12
+ require "rosy/View"
13
+
14
+ # Frprep packages
15
+ require "common/FrPrepConfigData"
16
+
17
+ class RosyInspect < RosyTask
18
+
19
+ def initialize(exp, # RosyConfigData object: experiment description
20
+ opts, # hash: runtime argument option (string) -> value (string)
21
+ ttt_obj) # RosyTrainingTestTable object
22
+
23
+ ##
24
+ # remember the experiment description
25
+
26
+ @exp = exp
27
+ @ttt_obj = ttt_obj
28
+
29
+ ##
30
+ # check runtime options
31
+
32
+ @tasks = Array.new
33
+ @test_id = nil
34
+
35
+ opts.each do |opt,arg|
36
+ case opt
37
+ when "--tables", "--tablecont", "--runs", "--split"
38
+ @tasks << [opt, arg]
39
+ when "--testID"
40
+ @test_id = arg
41
+ else
42
+ # this is an option that is okay but has already been read and used by rosy.rb
43
+ end
44
+ end
45
+
46
+ ##
47
+ # preprocessing information in the experiment file: doesn't seem to be needed,
48
+ # disabling for now
49
+ # ##
50
+ # # add preprocessing information to the experiment file object
51
+ # if @test_id
52
+ # # use test data
53
+ # preproc_parameter = "preproc_descr_file_test"
54
+ # else
55
+ # # use training data
56
+ # preproc_parameter = "preproc_descr_file_train"
57
+ # end
58
+ # preproc_expname = @exp.get(preproc_parameter)
59
+ # if not(preproc_expname)
60
+ # $stderr.puts "Please set the name of the preprocessing exp. file name"
61
+ # $stderr.puts "in the experiment file, parameter #{preproc_parameter}"
62
+ # exit 1
63
+ # elsif not(File.readable?(preproc_expname))
64
+ # $stderr.puts "Error in the experiment file:"
65
+ # $stderr.puts "Parameter #{preproc_parameter} has to be a readable file."
66
+ # exit 1
67
+ # end
68
+ # preproc_exp = FrPrepConfigData.new(preproc_expname)
69
+ # @exp.adjoin(preproc_exp)
70
+
71
+ # announce the task
72
+ $stderr.puts "---------"
73
+ $stderr.puts "Rosy experiment #{@exp.get("experiment_ID")}: Inspecting data."
74
+ $stderr.puts "---------"
75
+ end
76
+
77
+ #####
78
+ # perform
79
+ #
80
+ # do each of the inspection tasks set as options
81
+ def perform()
82
+ @tasks.each { |opt, arg|
83
+ case opt
84
+ when "--tables"
85
+ inspect_tables()
86
+ when "--tablecont"
87
+ inspect_tablecont(arg)
88
+ when "--runs"
89
+ inspect_runs()
90
+ when "--split"
91
+ inspect_split(arg)
92
+ end
93
+ }
94
+
95
+ if @tasks.empty?
96
+ inspect_experiment()
97
+ end
98
+ end
99
+
100
+ ################################
101
+ private
102
+
103
+ # print to stdout:
104
+ # name and column names of each table
105
+ # in this database
106
+ def inspect_tables()
107
+ puts
108
+ puts "-----------------------------------------------"
109
+ puts "List of all tables in the database"
110
+ puts "-----------------------------------------------"
111
+ puts
112
+
113
+ @ttt_obj.database.list_tables().each { | table_name|
114
+ puts "Table " + table_name
115
+ puts "\tColumns: "
116
+ print "\t"
117
+ count = 0
118
+ @ttt_obj.database.list_column_formats(table_name).each { |column_name, column_format|
119
+ count += 1
120
+ print column_name, " (", column_format, ")\t"
121
+ if count % 4 == 0
122
+ print "\n\t"
123
+ end
124
+ }
125
+ puts
126
+ puts
127
+ }
128
+ puts
129
+ end
130
+
131
+ # print to stdout:
132
+ # contents of both the training and the test table
133
+ # up to line N (if N is given)
134
+ # or contents of just the table with the given ID
135
+ def inspect_tablecont(id_numlines)
136
+
137
+ table_id = nil
138
+ num_lines = nil
139
+
140
+ if id_numlines
141
+ if id_numlines.include? ":"
142
+ # both table ID and number of lines given
143
+ parts = id_numlines.split(":")
144
+ if parts.length == 1
145
+ # only table ID given after all
146
+ table_id = parts.first
147
+ num_lines = nil
148
+ else
149
+ # both table ID and number of lines
150
+ # last part: number of lines. Rest: table ID
151
+ # (re-join in case the table ID includes a ':')
152
+ num_lines = parts.pop()
153
+ table_id = parts.join(":")
154
+ end
155
+ elsif not(id_numlines.empty?)
156
+ # only number of lines given
157
+ num_lines = id_numlines
158
+ end
159
+ end
160
+
161
+ # sanity check: existing table ID?
162
+ if table_id and not(@ttt_obj.database.list_tables().include?(table_id))
163
+ $stderr.puts "Error: I don't know a table with ID #{table_id}"
164
+ return
165
+ end
166
+
167
+ if table_id
168
+ # handle table with given table ID
169
+
170
+ puts
171
+ puts "-----------------------------------------------"
172
+ puts "Experiment " + @exp.get("experiment_ID").to_s + " table "+ table_id
173
+ puts "-----------------------------------------------"
174
+ puts
175
+
176
+ db_table = DBTable.new(@ttt_obj.database,
177
+ table_id,
178
+ "open",
179
+ "addcol_prefix" => @exp.get("classif_column_name"))
180
+
181
+ inspect_tablecont_aux(db_table, num_lines)
182
+
183
+ else
184
+
185
+ # handle training data
186
+ puts
187
+ puts "-----------------------------------------------"
188
+ puts "Experiment " + @exp.get("experiment_ID").to_s + " training data"
189
+ puts "-----------------------------------------------"
190
+ puts
191
+
192
+ if @ttt_obj.train_table_exists?
193
+ db_table = @ttt_obj.existing_train_table()
194
+ inspect_tablecont_aux(db_table, num_lines)
195
+ else
196
+ $stderr.puts "(No main table.)"
197
+ end
198
+
199
+ # handle test data
200
+ if @test_id
201
+
202
+ puts
203
+ puts "-----------------------------------------------"
204
+ puts "Experiment " + @exp.get("experiment_ID").to_s + " test data (#{@test_id})"
205
+ puts "-----------------------------------------------"
206
+ puts
207
+
208
+ if @ttt_obj.test_table_exists?(@test_id)
209
+ db_table = @ttt_obj.existing_test_table(@test_id)
210
+ inspect_tablecont_aux(db_table, num_lines)
211
+ else
212
+ $stderr.puts "(No test table #{@test_id}.)"
213
+ end
214
+ end
215
+ end
216
+ end
217
+
218
+ # auxiliary method for inspect_tablecont:
219
+ # print the actual lines
220
+ def inspect_tablecont_aux(table_obj, # DBTable object
221
+ num_lines) # integer: number of lines to read
222
+
223
+ # collect column names
224
+ column_names = @ttt_obj.database.list_column_names(table_obj.table_name)
225
+
226
+ # move "gold" column to the end
227
+ column_names.delete("gold")
228
+ column_names << "gold"
229
+
230
+ # print column names
231
+ print column_names.map { |n| "[" + n + "]" }.join(" ")
232
+ puts
233
+ puts
234
+
235
+ # select rows to print
236
+ view = DBView.new([SelectTableAndColumns.new(table_obj, column_names)],
237
+ [], # no restrictions on rows to pick
238
+ @ttt_obj.database, # database access
239
+ "gold" => "gold", # name of gold feature
240
+ "line_limit" => num_lines) # number of lines to read
241
+
242
+ # and print them
243
+ view.write_to_file($stdout)
244
+ view.close()
245
+ end
246
+
247
+ # print to stdout: all classification runs for the current experiment ID
248
+ def inspect_runs()
249
+ puts @ttt_obj.runlog_to_s()
250
+ end
251
+
252
+ # print to stdout: train, test sentence ID for given split
253
+ def inspect_split(splitID)
254
+
255
+ puts
256
+ puts "-----------------------------------------------"
257
+ puts "Split " + splitID.to_s
258
+ puts "-----------------------------------------------"
259
+ puts
260
+
261
+ ["train", "test"].each { |dataset|
262
+
263
+ puts "Dataset " + dataset
264
+ puts "==========="
265
+ puts
266
+
267
+ table = @ttt_obj.existing_split_table(splitID, dataset, RosySplit.split_index_colname())
268
+ view = DBView.new([SelectTableAndColumns.new(table, "*")], [], @ttt_obj.database)
269
+ index = 1
270
+ view.each_array { |row|
271
+ print row.join(","), " "
272
+ if index % 3 == 0
273
+ puts
274
+ end
275
+ index += 1
276
+ }
277
+ puts
278
+ }
279
+ end
280
+
281
+ def inspect_experiment()
282
+ puts "------------------------------------"
283
+ puts "Experiment #{@exp.get("experiment_ID").to_s}"
284
+ puts "------------------------------------"
285
+ puts
286
+
287
+ # main table
288
+ aux_tableinfo(@ttt_obj.maintable_name, "main table")
289
+
290
+ # test tables
291
+ @ttt_obj.testIDs.each { |testID|
292
+ aux_tableinfo(@ttt_obj.testtable_name(testID), "test table #{testID}")
293
+ }
294
+ # split tables
295
+ @ttt_obj.splitIDs.each { |splitID|
296
+ aux_tableinfo(@ttt_obj.splittable_name(splitID, "train"), "split table (training data) #{splitID}")
297
+ aux_tableinfo(@ttt_obj.splittable_name(splitID, "test"), "split table (test data) #{splitID}")
298
+ }
299
+
300
+ # features
301
+ puts "-----------------------"
302
+ puts "Features computed in this experiment:"
303
+ puts "-----------------------"
304
+
305
+ @ttt_obj.feature_names.sort.each_with_index { |feature_name, ix|
306
+ if ix % 4 == 0
307
+ puts
308
+ end
309
+ print feature_name, " "
310
+ }
311
+ puts
312
+ puts
313
+
314
+
315
+ # Runs
316
+ puts "-----------------------"
317
+ puts "Classifier runs for this experiment:"
318
+ puts "-----------------------"
319
+ puts
320
+ puts @ttt_obj.runlog_to_s()
321
+ puts
322
+ end
323
+
324
+ def aux_tableinfo(table_name, # string: name of DB table
325
+ table_descr) # string: which table is it?
326
+
327
+ puts "--------------------------"
328
+ puts table_descr
329
+ puts "--------------------------"
330
+
331
+ puts "Name: #{table_name}"
332
+ puts "Rows: #{@ttt_obj.database.num_rows(table_name)}"
333
+ puts
334
+ end
335
+
336
+ end