shalmaneser-rosy 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/rosy +14 -7
  4. data/lib/rosy/FailedParses.rb +22 -20
  5. data/lib/rosy/FeatureInfo.rb +35 -31
  6. data/lib/rosy/GfInduce.rb +132 -130
  7. data/lib/rosy/GfInduceFeature.rb +86 -68
  8. data/lib/rosy/InputData.rb +59 -55
  9. data/lib/rosy/RosyConfusability.rb +47 -40
  10. data/lib/rosy/RosyEval.rb +55 -55
  11. data/lib/rosy/RosyFeatureExtractors.rb +295 -290
  12. data/lib/rosy/RosyFeaturize.rb +54 -67
  13. data/lib/rosy/RosyInspect.rb +52 -50
  14. data/lib/rosy/RosyIterator.rb +73 -67
  15. data/lib/rosy/RosyPhase2FeatureExtractors.rb +48 -48
  16. data/lib/rosy/RosyPruning.rb +39 -31
  17. data/lib/rosy/RosyServices.rb +116 -115
  18. data/lib/rosy/RosySplit.rb +55 -53
  19. data/lib/rosy/RosyTask.rb +7 -3
  20. data/lib/rosy/RosyTest.rb +174 -191
  21. data/lib/rosy/RosyTrain.rb +46 -50
  22. data/lib/rosy/RosyTrainingTestTable.rb +101 -99
  23. data/lib/rosy/TargetsMostFrequentFrame.rb +13 -9
  24. data/lib/rosy/{AbstractFeatureAndExternal.rb → abstract_feature_extractor.rb} +22 -97
  25. data/lib/rosy/abstract_single_feature_extractor.rb +52 -0
  26. data/lib/rosy/external_feature_extractor.rb +35 -0
  27. data/lib/rosy/opt_parser.rb +231 -201
  28. data/lib/rosy/rosy.rb +63 -64
  29. data/lib/rosy/rosy_conventions.rb +66 -0
  30. data/lib/rosy/rosy_error.rb +15 -0
  31. data/lib/rosy/var_var_restriction.rb +16 -0
  32. data/lib/shalmaneser/rosy.rb +1 -0
  33. metadata +26 -19
  34. data/lib/rosy/ExternalConfigData.rb +0 -58
  35. data/lib/rosy/View.rb +0 -418
  36. data/lib/rosy/rosy_config_data.rb +0 -121
  37. data/test/frprep/test_opt_parser.rb +0 -94
  38. data/test/functional/functional_test_helper.rb +0 -58
  39. data/test/functional/test_fred.rb +0 -47
  40. data/test/functional/test_frprep.rb +0 -99
  41. data/test/functional/test_rosy.rb +0 -40
@@ -1,50 +1,54 @@
1
1
  # RosyIterator
2
2
  # KE May 2005
3
3
  #
4
- # RosyIterator is a class that
5
- # * reads the "xwise" parameters in the experiment file to
4
+ # RosyIterator is a class that
5
+ # * reads the "xwise" parameters in the experiment file to
6
6
  # determine the portions in which data is to be fed to classifiers,
7
- # and offers an iterator that iterates through every group to
7
+ # and offers an iterator that iterates through every group to
8
8
  # be trained/tested on
9
9
  # * constructs views matching the given "xwise" group.
10
- #
10
+ #
11
11
  # RosyIterator incorporates the following services:
12
- # - choosing the right DB table, depending on
12
+ # - choosing the right DB table, depending on
13
13
  # whether training/test data is being accessed,
14
14
  # and with or without a splitlog
15
15
  # - making and adding all currently available Dynamic Gold objects
16
- # (i.e. objects that are capable of mapping the gold column to
16
+ # (i.e. objects that are capable of mapping the gold column to
17
17
  # something else)
18
18
  # - initializing a view, potentially modified depending on the assignment step:
19
19
  # argrec -> use dynamic gold, mapping gold labels to "FE" or "NONE"
20
20
  # arglab -> use only those rows that have "FE" assigned from the argrec step
21
21
  #
22
22
  # Setting "xwise": An "xwise" entry in the hash passed on to RosyIterator.new()
23
- # overrides all other settings. If that isn't given, the "xwise_" + step
23
+ # overrides all other settings. If that isn't given, the "xwise_" + step
24
24
  # (xwise_argrec, xwise_arglab, xwise_onestep) from the experiment file is read.
25
25
  # If that hasn't been set either, the default is frame-wise.
26
26
 
27
- require 'common/ruby_class_extensions'
27
+ require 'ruby_class_extensions'
28
28
 
29
- require 'rosy/View'
30
- require "common/RosyConventions"
31
- require "rosy/RosyPruning"
29
+ # require 'rosy/View'
30
+ # require "RosyConventions"
31
+ require 'value_restriction'
32
+ require 'db/select_table_and_columns'
33
+ require 'db/db_view'
32
34
  require "rosy/RosySplit"
33
35
  require "rosy/RosyTrainingTestTable"
34
36
 
37
+ module Shalmaneser
38
+ module Rosy
35
39
  class RosyIterator
36
40
 
37
41
  ###
38
42
  # new
39
43
  #
40
- # open the correct database table,
44
+ # open the correct database table,
41
45
  # initialize Dynamic Gold objects
42
46
 
43
47
 
44
- def initialize(ttt_obj, # RosyTrainingTestTable object
45
- exp, # RosyConfigData object: experiment file
46
- dataset, # string: train/test
47
- var_hash = {}) # further arguments:
48
+ def initialize(ttt_obj, # RosyTrainingTestTable object
49
+ exp, # RosyConfigData object: experiment file
50
+ dataset, # string: train/test
51
+ var_hash = {}) # further arguments:
48
52
  # step: string: argrec/arglab/onestep, or nil (= no manipulation of the view)
49
53
  # testID: string: ID of test set, or nil
50
54
  # splitID string: splitlog ID, or nil if no split is to be used
@@ -59,7 +63,7 @@ class RosyIterator
59
63
  @splitID = var_hash["splitID"]
60
64
  @step = var_hash["step"]
61
65
  @testID = var_hash["testID"]
62
-
66
+
63
67
  # object variables we are going to use below
64
68
  @db_table = nil # DB table we are working on
65
69
  @allcolnames = nil # names of all columns of first and potentially second table
@@ -80,19 +84,19 @@ class RosyIterator
80
84
  ##
81
85
  # open the right database table
82
86
  if @dataset == "train" or @splitID
83
- @db_table = @ttt_obj.existing_train_table()
87
+ @db_table = @ttt_obj.existing_train_table
84
88
 
85
89
  else
86
90
  unless @testID
87
- raise "cannot open the test table without test ID"
91
+ raise "cannot open the test table without test ID"
88
92
  end
89
93
  @db_table = @ttt_obj.existing_test_table(@testID)
90
94
  end
91
- @allcolnames = @db_table.list_column_names()
95
+ @allcolnames = @db_table.list_column_names
92
96
 
93
97
  ##
94
98
  # make dynamic gold objects
95
- @dyn_gold_objects = Array.new
99
+ @dyn_gold_objects = []
96
100
  @dyn_gold_objects << DynGoldBinary.new(@exp.get("noval"))
97
101
 
98
102
  ###
@@ -101,38 +105,38 @@ class RosyIterator
101
105
  # argument recognition: distinguish just "FE", "NONE" as gold
102
106
  @standard_dyngold_id = "binary_gold"
103
107
  end
104
-
108
+
105
109
  ##
106
- # if splitID has been set,
110
+ # if splitID has been set,
107
111
  # make additional restrictions on the column values
108
112
  if @splitID
109
113
  # get split table name
110
- @second_table = @ttt_obj.existing_split_table(@splitID, @dataset, RosySplit.split_index_colname())
114
+ @second_table = @ttt_obj.existing_split_table(@splitID, @dataset, RosySplit.split_index_colname)
111
115
 
112
116
  # additional value restriction:
113
117
  # only use rows whose sentence ID also appears in the split table
114
118
  # (i.e. rows included in the split)
115
- @standard_value_restrictions << RosySplit.make_join_restriction(@splitID,
119
+ @standard_value_restrictions << RosySplit.make_join_restriction(@splitID,
116
120
  @db_table,
117
121
  @dataset,
118
122
  @ttt_obj)
119
123
 
120
124
  # additional column names:
121
125
  # those of the second table (but remove duplicates)
122
- @allcolnames.concat @ttt_obj.existing_split_table(@splitID, @dataset, RosySplit.split_index_colname()).list_column_names()
126
+ @allcolnames.concat @ttt_obj.existing_split_table(@splitID, @dataset, RosySplit.split_index_colname).list_column_names
123
127
  @allcolnames.uniq!
124
128
 
125
129
 
126
130
  # if we're using a split, read the phase 2 features and the classification results
127
131
  # from the split table rather than from the main table:
128
- # @use_cols_from_second_table is a list of column names (strings)
132
+ # @use_cols_from_second_table is a list of column names (strings)
129
133
  # to take from the 2nd table
130
134
  # @second_table_colprefix is a string: all columns starting with this prefix
131
135
  # are taken from the 2nd table
132
- @use_cols_from_second_table = [ RosySplit.split_index_colname() ]
136
+ @use_cols_from_second_table = [ RosySplit.split_index_colname ]
133
137
  @second_table_colprefix = @exp.get("classif_column_name")
134
138
  end
135
-
139
+
136
140
  ###
137
141
  # Any (row) value restrictions to be imposed
138
142
  # on all views we generate?
@@ -141,14 +145,14 @@ class RosyIterator
141
145
  # for which argrec-label is "FE"
142
146
 
143
147
  if @exp.get("assume_argrec_perfect")
144
- # assume perfect argrec step:
145
- # take all rows where gold is not "noval"
146
- @standard_value_restrictions << ValueRestriction.new(@db_table.table_name + ".gold",
147
- @exp.get("noval"),
148
+ # assume perfect argrec step:
149
+ # take all rows where gold is not "noval"
150
+ @standard_value_restrictions << ValueRestriction.new(@db_table.table_name + ".gold",
151
+ @exp.get("noval"),
148
152
  "posneg" => "!=")
149
153
  else
150
- # use argrec step as is:
151
- # take all rows where the argrec result is "FE"
154
+ # use argrec step as is:
155
+ # take all rows where the argrec result is "FE"
152
156
 
153
157
  case @dataset
154
158
  when "train"
@@ -159,10 +163,10 @@ class RosyIterator
159
163
  raise "Shouldn't be here"
160
164
  end
161
165
 
162
- if run_column_name.nil?
166
+ if run_column_name.nil?
163
167
  $stderr.puts "Missing: argrec classification results on #{@dataset} data."
164
168
  $stderr.puts "I have logs of the following runs: "
165
- $stderr.puts @ttt_obj.runlog_to_s()
169
+ $stderr.puts @ttt_obj.runlog_to_s
166
170
  raise "Problem"
167
171
  end
168
172
 
@@ -173,7 +177,7 @@ class RosyIterator
173
177
  run_column_name = @db_table.table_name + "." + run_column_name
174
178
  end
175
179
 
176
- @standard_value_restrictions << ValueRestriction.new(run_column_name, "FE")
180
+ @standard_value_restrictions << ValueRestriction.new(run_column_name, "FE")
177
181
  end
178
182
  end
179
183
 
@@ -192,9 +196,9 @@ class RosyIterator
192
196
  @xwise = var_hash["xwise"]
193
197
  unless @xwise
194
198
  if @step
195
- # read xwise from experiment file,
196
- # if we know what training/test step we're in
197
- @xwise = @exp.get("xwise_" + @step)
199
+ # read xwise from experiment file,
200
+ # if we know what training/test step we're in
201
+ @xwise = @exp.get("xwise_" + @step)
198
202
  end
199
203
  end
200
204
  if @xwise.nil?
@@ -202,10 +206,10 @@ class RosyIterator
202
206
  @xwise = "frame"
203
207
  end
204
208
 
205
- # xwise is a string consisting of any subset of
209
+ # xwise is a string consisting of any subset of
206
210
  # "frame", "target_pos", "target" joined by spaces.
207
211
  # transform to an array by splitting at spaces
208
- @xwise = @xwise.split()
212
+ @xwise = @xwise.split
209
213
  @xwise.each { |xwise_entry|
210
214
  unless @ttt_obj.feature_names.include? xwise_entry
211
215
  # sanity check: valid xwise value?
@@ -226,17 +230,17 @@ class RosyIterator
226
230
  #
227
231
  # get the column names used for determining the groups
228
232
  #
229
- # returns: an array of strings, ["frame"] or ["frame", "target"],
233
+ # returns: an array of strings, ["frame"] or ["frame", "target"],
230
234
  # or ["target_pos"]
231
- def get_xwise_column_names()
235
+ def get_xwise_column_names
232
236
  return @xwise
233
237
  end
234
238
 
235
239
  ####
236
240
  # num_groups
237
241
  # returns: integer
238
- def num_groups()
239
- return @groups.length()
242
+ def num_groups
243
+ return @groups.length
240
244
  end
241
245
 
242
246
  ####
@@ -250,7 +254,7 @@ class RosyIterator
250
254
  # - the hash describing the group, as returned by unique_values_of_column
251
255
  # - plus an ID for the group, made up of its hash values concatenated into a string
252
256
  # (values are connected by spaces)
253
- def each_group()
257
+ def each_group
254
258
  @groups.each { |hash|
255
259
  # hash is a hash column_name(string)-> value(object)
256
260
  # this is the unique description of the current group
@@ -269,12 +273,12 @@ class RosyIterator
269
273
  # (or "*" for all columns) and a list of value restrictions
270
274
  # on the rows (ValueRestriction objects, equalities or inequalities
271
275
  # column_name = value, columnb_name != value), which may be omitted
272
- #
276
+ #
273
277
  # returns: DBView object
274
278
  # @param columns [Array] array:string, column names to include
275
279
  # or string: "*" for all columns
276
280
  # @param value_restrictions [Array] array:ValueRestriction objects
277
- def get_a_view_for_current_group(columns, value_restrictions = [])
281
+ def get_a_view_for_current_group(columns, value_restrictions = [])
278
282
  get_a_view_for_group(@current_group, columns, value_restrictions)
279
283
  end
280
284
 
@@ -290,7 +294,7 @@ class RosyIterator
290
294
  # (or "*" for all columns) and a list of value restrictions
291
295
  # on the rows (ValueRestriction objects, equalities or inequalities
292
296
  # column_name = value, columnb_name != value), which may be omitted
293
- #
297
+ #
294
298
  # returns: DBView object
295
299
  # @param group [Hash] column(string)->value(object)
296
300
  # describing the group
@@ -311,7 +315,7 @@ class RosyIterator
311
315
  # the second table
312
316
 
313
317
  # separate group column names into two groups
314
- first_columns, second_columns =
318
+ first_columns, second_columns =
315
319
  separate_into_1st_and_2nd_table_cols(group.keys)
316
320
 
317
321
  # make separate value restrictions for the two groups
@@ -323,12 +327,12 @@ class RosyIterator
323
327
  raise "Cannot use second table columns without second table"
324
328
  end
325
329
  value_restrictions.concat second_columns.map { |column_name|
326
- ValueRestriction.new(@second_table.table_name + "." + column_name,
330
+ ValueRestriction.new(@second_table.table_name + "." + column_name,
327
331
  group[column_name],
328
332
  "table_name_included" => true)
329
333
  }
330
334
  end
331
-
335
+
332
336
  # get a view with the given columns, given value restrictions
333
337
  # plus add more value restrictions: must be the current group
334
338
  return get_a_view(columns,value_restrictions)
@@ -345,18 +349,18 @@ class RosyIterator
345
349
  # (or "*" for all columns) and a list of value restrictions
346
350
  # on the rows (ValueRestriction objects, equalities or inequalities
347
351
  # column_name = value, columnb_name != value), which may be omitted
348
- #
352
+ #
349
353
  # returns: DBView object
350
354
  def get_a_view(columns, # array:strings, list of column names
351
- # or string "*" (all columns)
352
- value_restrictions = []) # array: ValueRestriction objects
355
+ # or string "*" (all columns)
356
+ value_restrictions = []) # array: ValueRestriction objects
353
357
  # or [], nil for no restrictions
354
358
 
355
359
  if value_restrictions.nil?
356
360
  value_restrictions = []
357
361
  end
358
- return get_a_view_aux(columns, value_restrictions,
359
- "gold" => "gold",
362
+ return get_a_view_aux(columns, value_restrictions,
363
+ "gold" => "gold",
360
364
  "dynamic_feature_list" => @dyn_gold_objects,
361
365
  "standard_dyngold_id" => @standard_dyngold_id,
362
366
  "sentence_id_feature" => "sentid")
@@ -371,15 +375,15 @@ class RosyIterator
371
375
  #
372
376
  # returns: a list of hashes, one for each unique set of values
373
377
  def unique_values_of_columns(columns) # array:string, several column names
374
- retv = Array.new
378
+ retv = []
375
379
 
376
380
  view = get_a_view_aux(columns, [],
377
381
  "distinct" => true)
378
382
 
379
- view.each_hash() { |row|
383
+ view.each_hash { |row|
380
384
  retv << row
381
385
  }
382
- view.close()
386
+ view.close
383
387
  return retv
384
388
  end
385
389
 
@@ -387,7 +391,7 @@ class RosyIterator
387
391
  private
388
392
 
389
393
  ###
390
- # given a list of column names,
394
+ # given a list of column names,
391
395
  # separate them into first table and second table columns
392
396
  #
393
397
  # columns may be either an array of string (column names)
@@ -440,10 +444,10 @@ class RosyIterator
440
444
 
441
445
 
442
446
  # and get a view
443
- return DBView.new(tables_and_cols,
444
- value_restrictions + @standard_value_restrictions,
447
+ return DBView.new(tables_and_cols,
448
+ value_restrictions + @standard_value_restrictions,
445
449
  @ttt_obj.database,
446
- var_hash)
450
+ var_hash)
447
451
  end
448
452
 
449
453
  end
@@ -472,7 +476,9 @@ class DynGoldBinary
472
476
  end
473
477
  end
474
478
 
475
- def id()
479
+ def id
476
480
  return "binary_gold"
477
481
  end
478
482
  end
483
+ end
484
+ end
@@ -1,5 +1,5 @@
1
1
  ####
2
- # ke & sp
2
+ # ke & sp
3
3
  # adapted to new feature extractor class,
4
4
  # Collins and Tiger features combined:
5
5
  # SP November 2005
@@ -9,7 +9,7 @@
9
9
  # These are features that are computed on the basis of the Phase 1 feature set
10
10
  #
11
11
  # This consists of all features which have to know feature values for other nodes
12
- # (e.g. am I the nearest node to the target?) or similar.
12
+ # (e.g. am I the nearest node to the target?) or similar.
13
13
  #
14
14
  # Contract: each feature extractor inherits from the RosyPhase2FeatureExtractor class
15
15
  #
@@ -17,16 +17,17 @@
17
17
 
18
18
 
19
19
  # Salsa packages
20
- require 'rosy/AbstractFeatureAndExternal'
21
- require 'common/SalsaTigerRegXML'
20
+ require 'rosy/abstract_feature_extractor'
21
+ # require 'SalsaTigerRegXML'
22
22
 
23
23
  # Fred and Rosy packages
24
- require "common/RosyConventions"
24
+ # require "RosyConventions"
25
25
 
26
26
 
27
27
  ################################
28
28
  # base class for all following feature extractors
29
-
29
+ module Shalmaneser
30
+ module Rosy
30
31
  class RosyPhase2FeatureExtractor < AbstractFeatureExtractor
31
32
 
32
33
  ###
@@ -41,15 +42,15 @@ class RosyPhase2FeatureExtractor < AbstractFeatureExtractor
41
42
  # computed for the training set
42
43
  #
43
44
  # Here: all features in this packages are phase 2
44
- def RosyPhase2FeatureExtractor.phase()
45
- return "phase 2"
45
+ def self.phase
46
+ "phase 2"
46
47
  end
47
48
 
48
49
  ###
49
50
  # returns an array of strings, providing information about
50
51
  # the feature extractor
51
- def RosyPhase2FeatureExtractor.info()
52
- return super().concat(["rosy"])
52
+ def self.info
53
+ super().concat(["rosy"])
53
54
  end
54
55
 
55
56
  ###
@@ -57,23 +58,23 @@ class RosyPhase2FeatureExtractor < AbstractFeatureExtractor
57
58
  # feature computation using compute_feature_value()
58
59
  # such that computations that stay the same for
59
60
  # several features can be done in advance
60
- def RosyPhase2FeatureExtractor.set(var_hash)
61
+ def self.set(var_hash)
61
62
  @@split_nones = var_hash["split_nones"]
62
63
  return true
63
64
  end
64
65
 
65
- # check if the current feature is computable, i.e. if all the necessary
66
+ # check if the current feature is computable, i.e. if all the necessary
66
67
  # Phase 1 features are in the present model..
67
68
  def RosyPhase2FeatureExtractor.is_computable(given_extractor_list)
68
- return (eval(self.name()).extractor_list - given_extractor_list).empty?
69
+ (extractor_list - given_extractor_list).empty?
69
70
  end
70
-
71
+
71
72
  # this probably has to be done for each feature:
72
- # identify sentences and the target, and recombine into a large array
73
+ # identify sentences and the target, and recombine into a large array
73
74
  def compute_features_on_view(view)
74
- result = Array.new(eval(self.class.name()).feature_names.length)
75
+ result = Array.new(self.class.feature_names.length)
75
76
  result.each_index {|i|
76
- result[i] = Array.new
77
+ result[i] = []
77
78
  }
78
79
  view.each_sentence {|instance_features|
79
80
  sentence_result = compute_features_for_sentence(instance_features)
@@ -94,7 +95,7 @@ class RosyPhase2FeatureExtractor < AbstractFeatureExtractor
94
95
  private
95
96
 
96
97
  # list of all the Phase 1 extractors that a particular feature extractor presupposes
97
- def RosyPhase2FeatureExtractor.extractor_list()
98
+ def RosyPhase2FeatureExtractor.extractor_list
98
99
  return []
99
100
  end
100
101
 
@@ -105,8 +106,6 @@ class RosyPhase2FeatureExtractor < AbstractFeatureExtractor
105
106
  def compute_features_for_sentence(instance_features) # array of hashes features -> values
106
107
  raise "Overwrite me"
107
108
  end
108
-
109
-
110
109
  end
111
110
 
112
111
 
@@ -117,65 +116,65 @@ end
117
116
  ####################
118
117
  # nearestNode
119
118
  #
120
- # compute whether if my head word is the nearest word to the target,
119
+ # compute whether if my head word is the nearest word to the target,
121
120
  # according to some criterion
122
121
 
123
122
  class NearestNodeFeature < RosyPhase2FeatureExtractor
124
- NearestNodeFeature.announce_me()
125
-
126
- def NearestNodeFeature.designator()
123
+ NearestNodeFeature.announce_me
124
+
125
+ def NearestNodeFeature.designator
127
126
  return "nearest_node"
128
127
  end
129
- def NearestNodeFeature.feature_names()
130
- return ["nearest_pt_path", # the nearest node with a specific pt_path
131
- "neareststring_pt",# the nearest pt (string distance)
128
+ def NearestNodeFeature.feature_names
129
+ return ["nearest_pt_path", # the nearest node with a specific pt_path
130
+ "neareststring_pt",# the nearest pt (string distance)
132
131
  "nearestpath_pt"] # the nearest pt (path length) ]
133
132
  end
134
- def NearestNodeFeature.sql_type()
133
+ def NearestNodeFeature.sql_type
135
134
  return "TINYINT"
136
135
  end
137
- def NearestNodeFeature.feature_type()
136
+ def NearestNodeFeature.feature_type
138
137
  return "syn"
139
138
  end
140
139
 
141
140
  #####
142
141
  private
143
142
 
144
- def NearestNodeFeature.extractor_list()
143
+ def NearestNodeFeature.extractor_list
145
144
  return ["worddistance","pt_path","pt","path_length"]
146
145
  end
147
-
146
+
148
147
  def compute_features_for_sentence(instance_features)
149
-
148
+
150
149
  # for each "interesting" feature, compute a hash map value -> index
151
150
  # also compute a hashmap index -> distance
152
- # so we efficiently compute, for each feature value, the index with min distance
153
-
154
- dist_hash = Hash.new # node id -> word distance
155
- pl_hash = Hash.new # node id -> path length
156
- path_hash = Hash.new # path -> node id array
157
- pt_hash = Hash.new # pt -> node id array
158
-
151
+ # so we efficiently compute, for each feature value, the index with min distance
152
+
153
+ dist_hash = {} # node id -> word distance
154
+ pl_hash = {} # node id -> path length
155
+ path_hash = {} # path -> node id array
156
+ pt_hash = {} # pt -> node id array
157
+
159
158
  result = [Array.new(instance_features.length),
160
159
  Array.new(instance_features.length),
161
160
  Array.new(instance_features.length)]
162
-
161
+
163
162
  instance_features.each_index {|inst_id|
164
163
  instance_hash = instance_features[inst_id]
165
164
  dist_hash[inst_id] = instance_hash["worddistance"]
166
165
  pl_hash[inst_id] = instance_hash["path_length"]
167
-
166
+
168
167
  # record paths
169
168
  pt_path = instance_hash["pt_path"]
170
169
  unless path_hash.key? pt_path
171
- path_hash[pt_path] = Array.new
170
+ path_hash[pt_path] = []
172
171
  end
173
172
  path_hash[pt_path] << inst_id
174
173
 
175
174
  # record pts
176
175
  pt = instance_hash["pt"]
177
176
  unless pt_hash.key? pt
178
- pt_hash[pt] = Array.new
177
+ pt_hash[pt] = []
179
178
  end
180
179
  pt_hash[pt] << inst_id
181
180
 
@@ -208,8 +207,8 @@ class NearestNodeFeature < RosyPhase2FeatureExtractor
208
207
  result[1][inst_id] = 0
209
208
  end
210
209
  }
211
- }
212
-
210
+ }
211
+
213
212
  # nearest-pt (path length) feature is feature 2 of the extractor
214
213
  pt_hash.each{|pt,inst_ids|
215
214
  path_lengths = inst_ids.map {|inst_id| pl_hash[inst_id]}
@@ -222,9 +221,10 @@ class NearestNodeFeature < RosyPhase2FeatureExtractor
222
221
  result[2][inst_id] = 0
223
222
  end
224
223
  }
225
- }
224
+ }
226
225
 
227
226
  return result
228
- end
227
+ end
228
+ end
229
+ end
229
230
  end
230
-