shalmaneser-rosy 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/rosy +14 -7
  4. data/lib/rosy/FailedParses.rb +22 -20
  5. data/lib/rosy/FeatureInfo.rb +35 -31
  6. data/lib/rosy/GfInduce.rb +132 -130
  7. data/lib/rosy/GfInduceFeature.rb +86 -68
  8. data/lib/rosy/InputData.rb +59 -55
  9. data/lib/rosy/RosyConfusability.rb +47 -40
  10. data/lib/rosy/RosyEval.rb +55 -55
  11. data/lib/rosy/RosyFeatureExtractors.rb +295 -290
  12. data/lib/rosy/RosyFeaturize.rb +54 -67
  13. data/lib/rosy/RosyInspect.rb +52 -50
  14. data/lib/rosy/RosyIterator.rb +73 -67
  15. data/lib/rosy/RosyPhase2FeatureExtractors.rb +48 -48
  16. data/lib/rosy/RosyPruning.rb +39 -31
  17. data/lib/rosy/RosyServices.rb +116 -115
  18. data/lib/rosy/RosySplit.rb +55 -53
  19. data/lib/rosy/RosyTask.rb +7 -3
  20. data/lib/rosy/RosyTest.rb +174 -191
  21. data/lib/rosy/RosyTrain.rb +46 -50
  22. data/lib/rosy/RosyTrainingTestTable.rb +101 -99
  23. data/lib/rosy/TargetsMostFrequentFrame.rb +13 -9
  24. data/lib/rosy/{AbstractFeatureAndExternal.rb → abstract_feature_extractor.rb} +22 -97
  25. data/lib/rosy/abstract_single_feature_extractor.rb +52 -0
  26. data/lib/rosy/external_feature_extractor.rb +35 -0
  27. data/lib/rosy/opt_parser.rb +231 -201
  28. data/lib/rosy/rosy.rb +63 -64
  29. data/lib/rosy/rosy_conventions.rb +66 -0
  30. data/lib/rosy/rosy_error.rb +15 -0
  31. data/lib/rosy/var_var_restriction.rb +16 -0
  32. data/lib/shalmaneser/rosy.rb +1 -0
  33. metadata +26 -19
  34. data/lib/rosy/ExternalConfigData.rb +0 -58
  35. data/lib/rosy/View.rb +0 -418
  36. data/lib/rosy/rosy_config_data.rb +0 -121
  37. data/test/frprep/test_opt_parser.rb +0 -94
  38. data/test/functional/functional_test_helper.rb +0 -58
  39. data/test/functional/test_fred.rb +0 -47
  40. data/test/functional/test_frprep.rb +0 -99
  41. data/test/functional/test_rosy.rb +0 -40
data/lib/rosy/rosy.rb CHANGED
@@ -8,71 +8,70 @@ require 'rosy/RosyTrain'
8
8
  require 'rosy/RosyInspect'
9
9
  require 'rosy/RosyEval'
10
10
  require 'rosy/RosyServices'
11
+ require 'logging'
12
+ require 'rosy/rosy_error'
11
13
 
12
- module Rosy
13
- class Rosy
14
+ module Shalmaneser
15
+ module Rosy
16
+ class Rosy
14
17
 
15
- def initialize(options)
16
- @exp, @opts = options
17
- @task = @opts['--task']
18
- end
19
-
20
- def assign
21
-
22
- # make rosy directory pattern:
23
- # main rosy directory name (data_dir) plus subdirectory
24
- # named after the experiment ID
25
- rosy_dir_pattern = File.new_dir(@exp.get("data_dir")) + "<exp_ID>/"
26
- @exp.set_entry("rosy_dir", rosy_dir_pattern)
27
-
28
- ##
29
- # open database
30
-
31
- rosy_dir = File.new_dir(@exp.instantiate("rosy_dir",
32
- "exp_ID" => @exp.get("experiment_ID")))
33
- database = get_db_interface(@exp, rosy_dir, "features")
34
-
35
- table_obj = RosyTrainingTestTable.new(@exp, database)
36
-
37
- ##
38
- # start the actual processing,
39
- # according to given arguments
40
-
41
- # initialize task object
42
- #begin
43
- case @task
44
- when "featurize"
45
- task_obj = RosyFeaturize.new(@exp, @opts, table_obj)
46
- when "split"
47
- task_obj = RosySplit.new(@exp, @opts, table_obj)
48
- when "train"
49
- task_obj = RosyTrain.new(@exp, @opts, table_obj)
50
- when "test"
51
- task_obj = RosyTest.new(@exp, @opts, table_obj)
52
- when "eval"
53
- task_obj = RosyEvalTask.new(@exp, @opts, table_obj)
54
- when "inspect"
55
- task_obj = RosyInspect.new(@exp, @opts, table_obj)
56
- when "services"
57
- task_obj = RosyServices.new(@exp, @opts, table_obj)
58
- else
59
- raise "Shouldn't be here"
18
+ def initialize(options)
19
+ @exp, @opts = options
20
+ @task = @opts['--task']
60
21
  end
61
-
62
-
63
- # execute task
64
- begin
65
- task_obj.perform
66
- rescue => e
67
- puts e.backtrace
68
- fail "Error during task execution: #{e.class}=>#{e.message}"
69
- ensure
70
- database.close
22
+
23
+ def assign
24
+
25
+ # make rosy directory pattern:
26
+ # main rosy directory name (data_dir) plus subdirectory
27
+ # named after the experiment ID
28
+ rosy_dir_pattern = File.new_dir(@exp.get("data_dir")) + "<exp_ID>/"
29
+ @exp.set_entry("rosy_dir", rosy_dir_pattern)
30
+
31
+ ##
32
+ # open database
33
+
34
+ rosy_dir = File.new_dir(@exp.instantiate("rosy_dir",
35
+ "exp_ID" => @exp.get("experiment_ID")))
36
+ database = DBInterface.get_db_interface(@exp, rosy_dir, "features")
37
+
38
+ table_obj = RosyTrainingTestTable.new(@exp, database)
39
+
40
+ ##
41
+ # start the actual processing,
42
+ # according to given arguments
43
+
44
+ # initialize task object
45
+ task = case @task
46
+ when "featurize"
47
+ RosyFeaturize.new(@exp, @opts, table_obj)
48
+ when "split"
49
+ RosySplit.new(@exp, @opts, table_obj)
50
+ when "train"
51
+ RosyTrain.new(@exp, @opts, table_obj)
52
+ when "test"
53
+ RosyTest.new(@exp, @opts, table_obj)
54
+ when "eval"
55
+ RosyEvalTask.new(@exp, @opts, table_obj)
56
+ when "inspect"
57
+ RosyInspect.new(@exp, @opts, table_obj)
58
+ when "services"
59
+ RosyServices.new(@exp, @opts, table_obj)
60
+ else
61
+ raise "Shouldn't be here"
62
+ end
63
+
64
+ # execute task
65
+ begin
66
+ task.perform
67
+ rescue => e
68
+ raise RosyError.new 'Rosy is doing bad!', e
69
+ ensure
70
+ database.close
71
+ end
72
+
73
+ LOGGER.info "#{PROGRAM_NAME} finished performing the task: #{task}!"
71
74
  end
72
-
73
-
74
- $stderr.puts "Rosy: done."
75
- end
76
-
77
- end # class Rosy
78
- end # module Rosy
75
+ end # class Rosy
76
+ end # module Rosy
77
+ end
@@ -0,0 +1,66 @@
1
+ require 'monkey_patching/file'
2
+
3
+ module Shalmaneser
4
+ module Rosy
5
+
6
+ module_function
7
+
8
+ # transforming feature output to a format that classifiers can handle
9
+ # @note Used only under Rosy.
10
+ def prepare_output_for_classifiers(string)
11
+ # change punctuation to _PUNCT_
12
+ # and change empty space to _
13
+ # because otherwise some classifiers may spit
14
+ string.gsub(/[.":';`]/,"_PUNCT_").gsub(/\s/,"_")
15
+ end
16
+
17
+ ###
18
+ # classifier directory:
19
+ # either user-given classifier_dir or our own default classifier directory,
20
+ # then argrec/arglab/onestep, plus the splitID, if there is one
21
+ # @note Need the extended File class.
22
+ # @note Used only under Rosy.
23
+ def classifier_directory_name(exp, # RosyConfigData object
24
+ step, # argrec, arglab, onestep
25
+ splitID) # string or nil
26
+
27
+ if exp.get("classifier_dir")
28
+ base_dir = File.new_dir(exp.get("classifier_dir"))
29
+ else
30
+ base_dir = File.new_dir(exp.instantiate("rosy_dir",
31
+ "exp_ID" => exp.get("experiment_ID")))
32
+ end
33
+ classif_base_dir = File.new_dir(base_dir, "classif_dir")
34
+
35
+ if splitID
36
+ return File.new_dir(classif_base_dir, step + "." + splitID.to_s)
37
+ else
38
+ return File.new_dir(classif_base_dir, step)
39
+ end
40
+ end
41
+
42
+ #################################################################
43
+ #################################################################
44
+
45
+ ###
46
+ # @note Used only under Rosy.
47
+ # instance ID: sentence ID plus frame ID
48
+ def construct_instance_id(sentence_id, frame_id)
49
+ sentence_id.to_s + "---" + frame_id.to_s
50
+ end
51
+
52
+ # @note Not used anywhere.
53
+ def deconstruct_instance_id(instance_id)
54
+ instance_id.split("---")
55
+ end
56
+
57
+ #################################################################
58
+ #################################################################
59
+
60
+ # default test ID given when the user didn't specify one
61
+ # @note Used only under Rosy.
62
+ def default_test_ID
63
+ "apply"
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,15 @@
1
+ module Shalmaneser
2
+ module Rosy
3
+ class RosyError < StandardError
4
+ # @param [String] msg A custom message for this exception.
5
+ # @param [Exception] nested_exception An external exception
6
+ # which is reused to provide more information.
7
+ def initialize(msg = nil, nested_exception = nil)
8
+ if nested_exception
9
+ msg = "#{nested_exception.class}: #{nested_exception.message}\n#{msg}" << nested_exception.backtrace.join("\n")
10
+ end
11
+ super(msg)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,16 @@
1
+ require 'value_restriction'
2
+ ###
3
+ # value restrictions saying that variable1 = variable2:
4
+ # here, value is a variable name, and the table names
5
+ # must be already included
6
+ module Shalmaneser
7
+ module Rosy
8
+ class VarVarRestriction < ValueRestriction
9
+ def initialize(column, value, var_hash = {})
10
+ super(column, value, var_hash)
11
+ @val_is_variable = true
12
+ @table_name_included = true
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1 @@
1
+ # A dummy file to require for now.
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: shalmaneser-rosy
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0.rc4
4
+ version: 1.2.rc5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Beliankou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-04 00:00:00.000000000 Z
11
+ date: 2016-01-13 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: shalmaneser-lib
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.2.rc5
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 1.2.rc5
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: mysql
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -24,7 +38,7 @@ dependencies:
24
38
  - - "~>"
25
39
  - !ruby/object:Gem::Version
26
40
  version: '2.9'
27
- description: ROSY - Role Assignment System.
41
+ description: ROSY - ROle assignment SYstem.
28
42
  email: arbox@yandex.ru
29
43
  executables:
30
44
  - rosy
@@ -39,8 +53,6 @@ files:
39
53
  - LICENSE.md
40
54
  - README.md
41
55
  - bin/rosy
42
- - lib/rosy/AbstractFeatureAndExternal.rb
43
- - lib/rosy/ExternalConfigData.rb
44
56
  - lib/rosy/FailedParses.rb
45
57
  - lib/rosy/FeatureInfo.rb
46
58
  - lib/rosy/GfInduce.rb
@@ -61,15 +73,15 @@ files:
61
73
  - lib/rosy/RosyTrain.rb
62
74
  - lib/rosy/RosyTrainingTestTable.rb
63
75
  - lib/rosy/TargetsMostFrequentFrame.rb
64
- - lib/rosy/View.rb
76
+ - lib/rosy/abstract_feature_extractor.rb
77
+ - lib/rosy/abstract_single_feature_extractor.rb
78
+ - lib/rosy/external_feature_extractor.rb
65
79
  - lib/rosy/opt_parser.rb
66
80
  - lib/rosy/rosy.rb
67
- - lib/rosy/rosy_config_data.rb
68
- - test/frprep/test_opt_parser.rb
69
- - test/functional/functional_test_helper.rb
70
- - test/functional/test_fred.rb
71
- - test/functional/test_frprep.rb
72
- - test/functional/test_rosy.rb
81
+ - lib/rosy/rosy_conventions.rb
82
+ - lib/rosy/rosy_error.rb
83
+ - lib/rosy/var_var_restriction.rb
84
+ - lib/shalmaneser/rosy.rb
73
85
  homepage: https://github.com/arbox/shalmaneser
74
86
  licenses:
75
87
  - GPL-2.0
@@ -92,14 +104,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
92
104
  version: 1.3.1
93
105
  requirements: []
94
106
  rubyforge_project:
95
- rubygems_version: 2.4.5
107
+ rubygems_version: 2.5.1
96
108
  signing_key:
97
109
  specification_version: 4
98
110
  summary: ROSY
99
- test_files:
100
- - test/frprep/test_opt_parser.rb
101
- - test/functional/functional_test_helper.rb
102
- - test/functional/test_fred.rb
103
- - test/functional/test_frprep.rb
104
- - test/functional/test_rosy.rb
111
+ test_files: []
105
112
  has_rdoc:
@@ -1,58 +0,0 @@
1
- # ExternalConfigData
2
- # Katrin Erk January 2006
3
- #
4
- # All scripts that compute additional external knowledge sources
5
- # for Fred and Rosy:
6
- # access to configuration and experiment description file
7
-
8
- require 'common/config_data'
9
-
10
- ##############################
11
- # Class ExternalConfigData
12
- #
13
- # inherits from ConfigData,
14
- # sets variable names appropriate to tasks of external knowledge sources
15
-
16
- class ExternalConfigData < ConfigData
17
- def initialize(filename)
18
-
19
- # initialize config data object
20
- super(filename, # config file
21
- { "directory" => "string", # features
22
-
23
- "experiment_id" => "string",
24
-
25
- "gfmap_restrict_to_downpath" => "bool",
26
- "gfmap_restrict_pathlen" => "integer",
27
- "gfmap_remove_gf" => "list"
28
- },
29
- [] # variables
30
- )
31
-
32
- # set access functions for list features
33
- set_list_feature_access("gfmap_remove_gf",
34
- method("access_as_stringlist"))
35
- end
36
-
37
- ###
38
- protected
39
-
40
- #####
41
- # access_as_stringlist
42
- #
43
- # assumed format:
44
- #
45
- # lhs = rhs1 rhs2 ... rhsN
46
- #
47
- # given in val_list as string tuples [rhs1,...,rhsN]
48
- #
49
- # join the rhs strings by spaces, return as string
50
- # "rhs1 rhs2 ... rhsN"
51
- #
52
- def access_as_stringlist(val_list) # array:array:string
53
- return val_list.map { |rhs| rhs.join(" ") }
54
- end
55
- end
56
-
57
-
58
-
data/lib/rosy/View.rb DELETED
@@ -1,418 +0,0 @@
1
- # class DBView
2
- # KE, SP 27.1.05
3
- #
4
- # builds on class DBTable, which offers access to a database table
5
- # extract views of the table (select columns, select rows)
6
- # and offers access methods for these views.
7
- # Rows of the table can be returned either as hashes or as arrays.
8
- #
9
- # There is a special column of the table (the name of which we get in the new() method),
10
- # the gold column.
11
- # It can be returned directly, or modified by some "dynamic feature object",
12
- # and its value (modified or unmodified) will always be last in the array representation of a row.
13
-
14
- require 'db/sql_query'
15
- require "common/ruby_class_extensions"
16
- require "common/RosyConventions"
17
-
18
- class DBView
19
-
20
- ################
21
- # new
22
- #
23
- # prepare a view.
24
- # given a list of DB tables to access, each with its
25
- # set of features to be returned in the view,
26
- # a set of value restrictions,
27
- # the name of the gold feature,
28
- # and a list of objects that manipulate the gold feature into alternate
29
- # gold features.
30
- #
31
- # value_restrictions restricts the view to those rows for which the value restrictions hold,
32
- # e.g. only those rows where frame = Bla, or only those rows where partofspeech = Blupp
33
- #
34
- # The view remembers the indices of the _first_ table in the list of tables
35
- # it is given.
36
- #
37
- # A standard dynamic ID can be given: DynGold objects all have an id() method,
38
- # which returns a string, by which the use of the object can be requested
39
- # of the view. If no dynamic ID is given below in methods each_array,
40
- # each_hash, each_sentence, the system falls back to the standard dynamic ID.
41
- # if none is given here, the standard DynGold object is the one that doesn't
42
- # change the gold column. If one is given here, it will be used by default
43
- # when no ID is given in each_hash, each_array, each_sentence
44
- #
45
- # The last parameter is a hash with the following optional entries:
46
- # "gold":
47
- # string: name of the gold feature
48
- # If you want the gold feature to be mapped using a DynGold object,
49
- # you need to specify this parameter -- and you need to include
50
- # the gold feature in some feature_list.
51
- # Warning: if a feature of this name appears in several of the
52
- # feature lists, only the first one is mapped
53
- # "dynamic_feature_list":
54
- # array:DynGold objects, list of objects that map the gold feature
55
- # to a different feature value (e.g. to "FE", "NONE")
56
- # DynGold objects have one method make: string -> string
57
- # that maps one gold feature,
58
- # and one method id: -> string that gives an ID unique to this DynGold class
59
- # and by which this DynGold class can be chosen.
60
- # "standard_dyngold_id":
61
- # string: standard DynGold object ID (see above)
62
- # "sentence_id_feature":
63
- # string: feature name for the sentence ID column, needed for each_sentence()
64
- #
65
- # further parameters that are passed on to SQLQuery.select: see there
66
-
67
- def initialize(table_col_pairs, # array:SelectTableAndColumns objects
68
- value_restrictions, # array:ValueRestriction objects
69
- db_obj, # MySql object (from mysql.rb) that already has access to the correct database
70
- parameters = {}) # hash with further parameters: see above
71
-
72
- @db_obj = db_obj
73
- @table_col_pairs = table_col_pairs
74
- @parameters = parameters
75
-
76
- # view empty?
77
- if @table_col_pairs.empty? or
78
- @table_col_pairs.big_and { |tc| tc.columns.class.to_s == "Array" and tc.columns.empty? }
79
- @view_empty = true
80
- return
81
- else
82
- @view_empty = false
83
- end
84
-
85
- # okay, we can make the view, it contains at least one table and
86
- # at least one column:
87
- # do one view for all columns requested, and one for the indices of each table
88
- #
89
- # @main_table is a DBResult object
90
- @main_table = execute_command(SQLQuery.select(@table_col_pairs,
91
- value_restrictions, parameters))
92
-
93
- # index_tables: Hash: table name => DBResult object
94
- @index_tables = Hash.new
95
- table_col_pairs.each_with_index { |tc, index|
96
- # read index column of this table, add all the other tables
97
- # with empty column lists
98
- index_table_col_pairs = @table_col_pairs.map_with_index { |other_tc, other_index|
99
- if other_index == index
100
- # the current table
101
- SelectTableAndColumns.new(tc.table_obj,
102
- [tc.table_obj.index_name])
103
- else
104
- # other table: keep just the table, not the columns
105
- SelectTableAndColumns.new(other_tc.table_obj, nil)
106
- end
107
- }
108
- @index_tables[tc.table_obj.table_name] = execute_command(SQLQuery.select(index_table_col_pairs,
109
- value_restrictions, parameters))
110
- }
111
-
112
- # map gold to something else?
113
- # yes, if parameters[gold] has been set
114
- if @parameters["gold"]
115
- @map_gold = true
116
- # remember which column in the DB table is the gold column
117
- @gold_index = column_names().index(@parameters["gold"])
118
- else
119
- @map_gold = false
120
- end
121
- end
122
-
123
- ################
124
- # close
125
- #
126
- # to be called when the view is no longer needed:
127
- # frees the DBResult objects underlying this view
128
- def close()
129
- unless @view_empty
130
- @main_table.free()
131
- @index_tables.each_value { |t| t.free() }
132
- end
133
- end
134
-
135
- ################
136
- # write_to_file
137
- #
138
- # writes instances to a file
139
- # each instance given as a comma-separated list of features
140
- # The features are the ones given in my_feature_list
141
- # (parameter to the new() method) above, in that order,
142
- # plus (dynamic) gold, which is last.
143
- #
144
- # guarantees that comma is used only to separate features -- but no other
145
- # changes in the feature values
146
- def write_to_file(file, # stream to write to
147
- dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list.
148
- # if nil, main gold is used
149
-
150
- each_instance_s(dyn_gold_id) { |instance_string|
151
- file.puts instance_string
152
- }
153
- end
154
-
155
-
156
- ################
157
- # each_instance_s
158
- #
159
- # yields each instance as a string:
160
- # a comma-separated list of features
161
- # The features are the ones given in my_feature_list
162
- # (parameter to the new() method) above, in that order,
163
- # plus (dynamic) gold, which is last.
164
- #
165
- # guarantees that comma is used only to separate features -- but no other
166
- # changes in the feature values
167
- def each_instance_s(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list.
168
- # if nil, main gold is used
169
- each_array(dyn_gold_id) {|array|
170
- yield array.map { |entry| entry.to_s.gsub(/,/, "COMMA") }.join(",")
171
- }
172
- end
173
-
174
- ################
175
- # each_hash
176
- #
177
- # iterates over hashes representing rows
178
- # in each row, there is a gold key/value pair
179
- # specified by the optional argument dyn_gold_id.
180
- # which is the string ID of a DynGold object
181
- # from the dynamic_feature_list.
182
- # If arg is not present, main gold is used
183
- #
184
- # The key for the gold is the dyn_gold_id
185
- # If that is nil, the key is 'gold'
186
- #
187
- # yields: hashes column_name -> column_value
188
- def each_hash(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list, or nil
189
- if @view_empty
190
- return
191
- end
192
- if @map_gold
193
- dyn_gold_obj = fetch_dyn_gold_obj(dyn_gold_id)
194
- end
195
- @main_table.reset()
196
-
197
- @main_table.each_hash { |row_hash|
198
- if @map_gold
199
- row_hash[@parameters["gold"]] = dyn_gold_obj.make(row_hash[@parameters["gold"]])
200
- end
201
-
202
- yield row_hash
203
- }
204
- end
205
-
206
- ################
207
- # each_array
208
- #
209
- # iterates over arrays representing rows
210
- # the last item of each row is the gold column
211
- # selected by the optional argument dyn_gold_id.
212
- # which is the string ID of a DynGold object
213
- # from the dynamic_feature_list.
214
- # If arg is not present, main gold is used
215
- #
216
- # yields: arrays of column values,
217
- # values are in the order of my_feature_list given
218
- # to the new() method, (dynamic) gold is last
219
- def each_array(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list, or nil
220
-
221
- if @view_empty
222
- return
223
- end
224
- if @map_gold
225
- dyn_gold_obj = fetch_dyn_gold_obj(dyn_gold_id)
226
- end
227
- @main_table.reset()
228
-
229
- @main_table.each {|row|
230
- if @gold_index
231
- gold = row.delete_at(@gold_index)
232
- if @map_gold
233
- row.push dyn_gold_obj.make(gold)
234
- else
235
- row.push gold
236
- end
237
- end
238
-
239
- yield row
240
- }
241
- end
242
-
243
- ################
244
- # update_column
245
- #
246
- # update a column for all rows of this view
247
- #
248
- # Given a column name to be updated, and a list of value tuples,
249
- # update each row of the view, or rather the appropriate column of each row of the view,
250
- # with values for that row.
251
- #
252
- # the list has the same length as the view, as there must be a value tuple
253
- # for each row of the view.
254
- #
255
- # returns: nothing
256
- def update_column(name, # string: column name
257
- values) # array of Objects
258
-
259
- if @view_empty
260
- raise "Cannot update empty view"
261
- end
262
-
263
- # find the first table in @table_col_pairs that has
264
- # a column with this name
265
- # and update that column
266
- @table_col_pairs.each { |tc|
267
- if (tc.columns.class.to_s == "Array" and tc.columns.include? name) or
268
- (tc.columns == "*" and tc.table_obj.list_column_names().include? name)
269
-
270
- table_name = tc.table_obj.table_name
271
-
272
- # sanity check: number of update entries must match
273
- # number of entries in this view
274
- unless values.length() == @index_tables[table_name].num_rows()
275
- $stderr.puts "Error: length of value array (#{values.length}) is not equal to length of view (#{@index_tables[table_name].num_rows})!"
276
- exit 1
277
- end
278
-
279
- @index_tables[tc.table_obj.table_name].reset()
280
-
281
- values.each { |value|
282
- index = @index_tables[table_name].fetch_row().first
283
- tc.table_obj.update_row(index, [[name, value]])
284
- }
285
-
286
- return
287
- end
288
- }
289
-
290
- # no match found
291
- $stderr.puts "View.rb Error: cannot update a column that is not in this view: #{name}"
292
- exit 1
293
- end
294
-
295
-
296
- ################
297
- # each_sentence
298
- #
299
- # like each_hash, but it groups the row hashes sentence-wise
300
- # sentence boundaries in the view are detected by the change in a
301
- # special column describing sentence IDs
302
- #
303
- # also needs a dyngold object id
304
- #
305
- # returns: an array of hashes column_name -> column_value
306
- def each_sentence(dyn_gold_id = nil) # string: ID of a DynGold object from the dynamic_feature_list, or nil
307
-
308
- # sanity check 1: need to know what the sentence ID is
309
- unless @parameters["sentence_id_feature"]
310
- raise "I need the name of the sentence ID feature for each_sentence()"
311
- end
312
- # sanity check 2: the view needs to include the sentence ID
313
- unless column_names().include? @parameters["sentence_id_feature"]
314
- raise "View.each_sentence: Cannot do this without sentence ID in the view"
315
- end
316
-
317
- last_sent_id = nil
318
- sentence = Array.new
319
- each_hash(dyn_gold_id) {|row_hash|
320
- if last_sent_id != row_hash[@parameters["sentence_id_feature"]] and
321
- (!(last_sent_id.nil?))
322
- yield sentence
323
- sentence = Array.new
324
- end
325
- last_sent_id = row_hash[@parameters["sentence_id_feature"]]
326
- sentence << row_hash
327
- }
328
- unless sentence.empty?
329
- yield sentence
330
- end
331
- end
332
-
333
- ######################
334
- # length
335
- #
336
- # returns the length of the view: the number of its rows
337
- def length()
338
- return @index_tables[@table_col_pairs.first.table_obj.table_name].num_rows
339
- end
340
-
341
- ###
342
- private
343
-
344
- ################
345
- # column_names
346
- #
347
- # returns: array:string
348
- # the list of column names for this view
349
- # in the right order
350
- def column_names()
351
- if @view_empty
352
- return []
353
- else
354
- return @main_table.list_column_names()
355
- end
356
- end
357
-
358
- ######
359
- # fetch_dyn_gold_obj
360
- #
361
- # given an ID of a gold object, look for the DynGold object
362
- # with this ID in the dynamic_feature_list and return it
363
- # If the ID is nil, use the standard dynamic gold ID that
364
- # has been set in the new() method.
365
- # If that is nil too, take the non-modified gold as a
366
- # default: return a dummy object with a make() method
367
- # that just returns its parameter.
368
- #
369
- # returns: object offering a make() method
370
-
371
- def fetch_dyn_gold_obj(dyn_gold_id) # string or nil
372
- # find a DynGold object that will transform the gold column
373
- if dyn_gold_id.nil?
374
- dyn_gold_id = @parameters["standard_dyngold_id"]
375
- end
376
-
377
- dyn_gold_obj = "we need an object that can do 'make'"
378
- if dyn_gold_id
379
- unless @parameters["dynamic_feature_list"]
380
- raise "No dynamic features given"
381
- end
382
-
383
- dyn_gold_obj = @parameters["dynamic_feature_list"].detect { |obj|
384
- obj.id() == dyn_gold_id
385
- }
386
- if dyn_gold_obj.nil?
387
- $stderr.puts "View.rb: Unknown DynGold ID " + dyn_gold_id
388
- $stderr.puts "Using unchanged gold"
389
- dyn_gold_id = nil
390
- end
391
- end
392
-
393
- unless dyn_gold_id
394
- # no dynamic gold ID: use unchanged gold by default
395
- class << dyn_gold_obj
396
- def make(x)
397
- x
398
- end
399
- def id()
400
- return "gold"
401
- end
402
- end
403
- end
404
- return dyn_gold_obj
405
- end
406
-
407
- def execute_command(command)
408
- begin
409
- return @db_obj.query(command)
410
- rescue MysqlError => e
411
- $stderr.puts "Error executing SQL query. Command was:\n" + command
412
- $stderr.puts "Error code: #{e.errno}"
413
- $stderr.puts "Error message: #{e.error}"
414
- raise e
415
- end
416
- end
417
-
418
- end