shalmaneser 1.2.0.rc4 → 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +47 -18
- data/bin/shalmaneser +8 -2
- data/doc/index.md +1 -0
- data/lib/shalmaneser/opt_parser.rb +68 -67
- metadata +49 -119
- data/bin/fred +0 -16
- data/bin/frprep +0 -34
- data/bin/rosy +0 -17
- data/lib/common/AbstractSynInterface.rb +0 -1229
- data/lib/common/Counter.rb +0 -18
- data/lib/common/EnduserMode.rb +0 -27
- data/lib/common/Eval.rb +0 -480
- data/lib/common/FixSynSemMapping.rb +0 -196
- data/lib/common/Graph.rb +0 -345
- data/lib/common/ISO-8859-1.rb +0 -24
- data/lib/common/ML.rb +0 -186
- data/lib/common/Mallet.rb +0 -236
- data/lib/common/Maxent.rb +0 -229
- data/lib/common/Optimise.rb +0 -195
- data/lib/common/Parser.rb +0 -213
- data/lib/common/RegXML.rb +0 -269
- data/lib/common/RosyConventions.rb +0 -171
- data/lib/common/STXmlTerminalOrder.rb +0 -194
- data/lib/common/SalsaTigerRegXML.rb +0 -2347
- data/lib/common/SalsaTigerXMLHelper.rb +0 -99
- data/lib/common/SynInterfaces.rb +0 -282
- data/lib/common/TabFormat.rb +0 -721
- data/lib/common/Tiger.rb +0 -1448
- data/lib/common/Timbl.rb +0 -144
- data/lib/common/Tree.rb +0 -61
- data/lib/common/config_data.rb +0 -470
- data/lib/common/config_format_element.rb +0 -220
- data/lib/common/headz.rb +0 -338
- data/lib/common/option_parser.rb +0 -13
- data/lib/common/prep_config_data.rb +0 -62
- data/lib/common/prep_helper.rb +0 -1330
- data/lib/common/ruby_class_extensions.rb +0 -310
- data/lib/db/db_interface.rb +0 -48
- data/lib/db/db_mysql.rb +0 -145
- data/lib/db/db_sqlite.rb +0 -280
- data/lib/db/db_table.rb +0 -239
- data/lib/db/db_wrapper.rb +0 -176
- data/lib/db/sql_query.rb +0 -243
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/fred/Baseline.rb +0 -150
- data/lib/fred/FileZipped.rb +0 -31
- data/lib/fred/FredBOWContext.rb +0 -877
- data/lib/fred/FredConventions.rb +0 -232
- data/lib/fred/FredDetermineTargets.rb +0 -319
- data/lib/fred/FredEval.rb +0 -312
- data/lib/fred/FredFeatureExtractors.rb +0 -322
- data/lib/fred/FredFeatures.rb +0 -1061
- data/lib/fred/FredFeaturize.rb +0 -602
- data/lib/fred/FredNumTrainingSenses.rb +0 -27
- data/lib/fred/FredParameters.rb +0 -402
- data/lib/fred/FredSplit.rb +0 -84
- data/lib/fred/FredSplitPkg.rb +0 -180
- data/lib/fred/FredTest.rb +0 -606
- data/lib/fred/FredTrain.rb +0 -144
- data/lib/fred/PlotAndREval.rb +0 -480
- data/lib/fred/fred.rb +0 -47
- data/lib/fred/fred_config_data.rb +0 -185
- data/lib/fred/md5.rb +0 -23
- data/lib/fred/opt_parser.rb +0 -250
- data/lib/frprep/Ampersand.rb +0 -39
- data/lib/frprep/CollinsInterface.rb +0 -1165
- data/lib/frprep/Counter.rb +0 -18
- data/lib/frprep/FNCorpusXML.rb +0 -643
- data/lib/frprep/FNDatabase.rb +0 -144
- data/lib/frprep/FrameXML.rb +0 -513
- data/lib/frprep/Graph.rb +0 -345
- data/lib/frprep/MiniparInterface.rb +0 -1388
- data/lib/frprep/RegXML.rb +0 -269
- data/lib/frprep/STXmlTerminalOrder.rb +0 -194
- data/lib/frprep/SleepyInterface.rb +0 -384
- data/lib/frprep/TntInterface.rb +0 -44
- data/lib/frprep/TreetaggerInterface.rb +0 -327
- data/lib/frprep/do_parses.rb +0 -143
- data/lib/frprep/frprep.rb +0 -693
- data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
- data/lib/frprep/interfaces/stanford_interface.rb +0 -353
- data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
- data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
- data/lib/frprep/one_parsed_file.rb +0 -28
- data/lib/frprep/opt_parser.rb +0 -94
- data/lib/frprep/ruby_class_extensions.rb +0 -310
- data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
- data/lib/rosy/ExternalConfigData.rb +0 -58
- data/lib/rosy/FailedParses.rb +0 -130
- data/lib/rosy/FeatureInfo.rb +0 -242
- data/lib/rosy/GfInduce.rb +0 -1115
- data/lib/rosy/GfInduceFeature.rb +0 -148
- data/lib/rosy/InputData.rb +0 -294
- data/lib/rosy/RosyConfusability.rb +0 -338
- data/lib/rosy/RosyEval.rb +0 -465
- data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
- data/lib/rosy/RosyFeaturize.rb +0 -281
- data/lib/rosy/RosyInspect.rb +0 -336
- data/lib/rosy/RosyIterator.rb +0 -478
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
- data/lib/rosy/RosyPruning.rb +0 -165
- data/lib/rosy/RosyServices.rb +0 -744
- data/lib/rosy/RosySplit.rb +0 -232
- data/lib/rosy/RosyTask.rb +0 -19
- data/lib/rosy/RosyTest.rb +0 -829
- data/lib/rosy/RosyTrain.rb +0 -234
- data/lib/rosy/RosyTrainingTestTable.rb +0 -787
- data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
- data/lib/rosy/View.rb +0 -418
- data/lib/rosy/opt_parser.rb +0 -379
- data/lib/rosy/rosy.rb +0 -78
- data/lib/rosy/rosy_config_data.rb +0 -121
- data/lib/shalmaneser/version.rb +0 -3
data/lib/db/db_wrapper.rb
DELETED
@@ -1,176 +0,0 @@
|
|
1
|
-
###########################
|
2
|
-
# DBWrapper:
|
3
|
-
# abstract class wrapping database interfaces,
|
4
|
-
# so we can have both an interface to an SQL server
|
5
|
-
# and an interface to SQLite in Shalmaneser
|
6
|
-
class DBWrapper
|
7
|
-
attr_reader :table_name
|
8
|
-
|
9
|
-
###
|
10
|
-
def initialize(exp) # RosyConfigData experiment file object
|
11
|
-
# remember experiment file
|
12
|
-
@exp = exp
|
13
|
-
|
14
|
-
# open the database:
|
15
|
-
# please set to some other value in subclass initialization
|
16
|
-
@database = nil
|
17
|
-
|
18
|
-
# name of default table to access: none
|
19
|
-
@table_name = nil
|
20
|
-
end
|
21
|
-
|
22
|
-
###
|
23
|
-
# close DB access
|
24
|
-
def close()
|
25
|
-
@database.close()
|
26
|
-
end
|
27
|
-
|
28
|
-
####
|
29
|
-
# querying the database:
|
30
|
-
# returns an DBResult object
|
31
|
-
def query(query)
|
32
|
-
raise "Overwrite me"
|
33
|
-
end
|
34
|
-
|
35
|
-
####
|
36
|
-
# querying the database:
|
37
|
-
# no result value
|
38
|
-
def query_noretv(query)
|
39
|
-
raise "Overwrite me"
|
40
|
-
end
|
41
|
-
|
42
|
-
###
|
43
|
-
# list all tables in the database:
|
44
|
-
# no default here
|
45
|
-
#
|
46
|
-
# returns: list of strings
|
47
|
-
def list_tables()
|
48
|
-
raise "Overwrite me"
|
49
|
-
end
|
50
|
-
|
51
|
-
###
|
52
|
-
# make a table
|
53
|
-
#
|
54
|
-
# returns: nothing
|
55
|
-
def create_table(table_name, # string
|
56
|
-
column_formats, # array: array: string*string [column_name,column_format]
|
57
|
-
index_column_names, # array: string: column_name
|
58
|
-
indexname) # string: name of automatically created index column
|
59
|
-
raise "overwrite me"
|
60
|
-
end
|
61
|
-
|
62
|
-
###
|
63
|
-
# remove a table
|
64
|
-
def drop_table(table_name)
|
65
|
-
query_noretv("DROP TABLE " + table_name)
|
66
|
-
end
|
67
|
-
|
68
|
-
###
|
69
|
-
# list all column names of a table (no default)
|
70
|
-
#
|
71
|
-
# returns: array of strings
|
72
|
-
def list_column_names(table_name)
|
73
|
-
return list_column_formats(table_name).map { |col_name, col_format| col_name }
|
74
|
-
end
|
75
|
-
|
76
|
-
#####
|
77
|
-
# list_column_formats
|
78
|
-
#
|
79
|
-
# list column names and column types of this table
|
80
|
-
#
|
81
|
-
# returns: array:string*string, list of pairs [column name, column format]
|
82
|
-
def list_column_formats(table_name)
|
83
|
-
raise "Overwrite me"
|
84
|
-
end
|
85
|
-
|
86
|
-
####
|
87
|
-
# num_rows
|
88
|
-
#
|
89
|
-
# determine the number of rows in a table
|
90
|
-
# returns: integer
|
91
|
-
def num_rows(table_name)
|
92
|
-
raise "Overwrite me"
|
93
|
-
end
|
94
|
-
|
95
|
-
####
|
96
|
-
# make a temporary table: basically just make a table
|
97
|
-
#
|
98
|
-
# returns: DBWrapper object (or object of current subclass)
|
99
|
-
# that has the @table_name attribute set to the name of a temporary DB
|
100
|
-
def make_temp_table(column_formats, # array: string*string [column_name,column_format]
|
101
|
-
index_column_names, # array: string: column_name
|
102
|
-
indexname) # string: name of autoincrement primary index
|
103
|
-
|
104
|
-
temp_obj = self.clone()
|
105
|
-
temp_obj.initialize_temp_table(column_formats, index_column_names, indexname)
|
106
|
-
return temp_obj
|
107
|
-
end
|
108
|
-
|
109
|
-
def drop_temp_table()
|
110
|
-
unless @table_name
|
111
|
-
raise "can only do drop_temp_table() for objects that have a temp table"
|
112
|
-
end
|
113
|
-
drop_table(@table_name)
|
114
|
-
end
|
115
|
-
|
116
|
-
##############################
|
117
|
-
protected
|
118
|
-
|
119
|
-
def initialize_temp_table(column_formats, index_column_names, indexname)
|
120
|
-
@table_name = "t" + Time.new().to_f().to_s().gsub(/\./, "")
|
121
|
-
create_table(@table_name, column_formats, index_column_names, indexname)
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
######################################################################
|
129
|
-
# DBResult:
|
130
|
-
# abstract class keeping query results
|
131
|
-
#
|
132
|
-
# instantiate for the DB package used
|
133
|
-
class DBResult
|
134
|
-
###
|
135
|
-
# initialize with query result, and keep it
|
136
|
-
def initialize(value)
|
137
|
-
@result = value
|
138
|
-
end
|
139
|
-
|
140
|
-
# column names: NO DEFAULT
|
141
|
-
def list_column_names()
|
142
|
-
raise "Overwrite me"
|
143
|
-
end
|
144
|
-
|
145
|
-
# number of rows: returns an integer
|
146
|
-
def num_rows()
|
147
|
-
return @result.num_rows
|
148
|
-
end
|
149
|
-
|
150
|
-
# yields each row as an array of values
|
151
|
-
def each()
|
152
|
-
@result.each { |row| yield row }
|
153
|
-
end
|
154
|
-
|
155
|
-
# yields each row as a hash: column name=> column value
|
156
|
-
def each_hash()
|
157
|
-
@result.each_hash { |row_hash| yield row_hash }
|
158
|
-
end
|
159
|
-
|
160
|
-
# reset object, such that each() can be run again
|
161
|
-
# DEFAULT DOES NOTHING, PLEASE OVERWRITE
|
162
|
-
def reset()
|
163
|
-
end
|
164
|
-
|
165
|
-
# free result object
|
166
|
-
def free()
|
167
|
-
@result.free()
|
168
|
-
end
|
169
|
-
|
170
|
-
# returns row as an array of column contents
|
171
|
-
def fetch_row()
|
172
|
-
return @result.fetch_row()
|
173
|
-
end
|
174
|
-
|
175
|
-
end
|
176
|
-
|
data/lib/db/sql_query.rb
DELETED
@@ -1,243 +0,0 @@
|
|
1
|
-
# class SQLQuery
|
2
|
-
# KE, SP 27.1.05
|
3
|
-
#
|
4
|
-
# provides static methods that generate SQL queries as strings
|
5
|
-
# that can then be passed on to the database
|
6
|
-
|
7
|
-
require "common/ruby_class_extensions"
|
8
|
-
|
9
|
-
require "common/RosyConventions"
|
10
|
-
|
11
|
-
class SQLQuery
|
12
|
-
|
13
|
-
|
14
|
-
#####
|
15
|
-
# SQLQuery.insert
|
16
|
-
#
|
17
|
-
# query created: insert a new row into a given database table
|
18
|
-
# the new row is given as a list of pairs [column_name, value]
|
19
|
-
#
|
20
|
-
# returns: string
|
21
|
-
def SQLQuery.insert(table_name, # string: table name
|
22
|
-
field_value_pairs) # array: string*object [column_name, cell_value]
|
23
|
-
|
24
|
-
# example:
|
25
|
-
# insert into table01 (field01,field02,field03,field04,field05) values
|
26
|
-
# (2, 'second', 'another', '1999-10-23', '10:30:00');
|
27
|
-
|
28
|
-
string = "INSERT INTO " + table_name + "("+
|
29
|
-
field_value_pairs.map { |column_name, cell_value|
|
30
|
-
column_name
|
31
|
-
}.join(",") +
|
32
|
-
") VALUES (" +
|
33
|
-
field_value_pairs.map { |column_name, cell_value|
|
34
|
-
if cell_value.nil?
|
35
|
-
raise "SQL query construction error: Nil value for column " + column_name
|
36
|
-
end
|
37
|
-
SQLQuery.stringify_value(cell_value)
|
38
|
-
}.join(",") + ");"
|
39
|
-
|
40
|
-
return string
|
41
|
-
end
|
42
|
-
|
43
|
-
#####
|
44
|
-
# SQLQuery.select
|
45
|
-
#
|
46
|
-
# query created: select from given database tables
|
47
|
-
# all column entries that conform to the given description:
|
48
|
-
# - names of the columns to be selected (or the string "*")
|
49
|
-
# - only those column entries where the row matches the given
|
50
|
-
# row restrictions: [column_name, column_value] => WHERE column_name IS column_value
|
51
|
-
# - optionally, at most N lines => LIMIT N
|
52
|
-
# - If more than one DB table is named, make a join
|
53
|
-
# - Value restrictions: If it doesn't say which DB table to use,
|
54
|
-
# use the first one listed in table_col_pairs
|
55
|
-
#
|
56
|
-
# Use with only one database table creates queries like e.g.
|
57
|
-
# SELECT column1, column2 FROM table WHERE column3=val3 AND column4!=val4
|
58
|
-
#
|
59
|
-
# or:
|
60
|
-
# SELECT DISTINCT column1, column2 FROM table WHERE column3=val3 AND column4!=val4 LIMIT 10
|
61
|
-
#
|
62
|
-
# Use with 2 SelectTableAndColumns entries creates queries like
|
63
|
-
# SELECT table1.column1, table1.column2 FROM table1, table2 WHERE table1.column1=val3 AND table1.id=table2.id
|
64
|
-
#
|
65
|
-
#
|
66
|
-
# returns: string.
|
67
|
-
# raises an error if no columns at all are selected
|
68
|
-
def SQLQuery.select(table_col_pairs, # Array: SelectTableAndColumns
|
69
|
-
row_restrictions, # array: ValueRestriction objects
|
70
|
-
var_hash = {}) # further parameters:
|
71
|
-
# line_limit: integer: select at most N lines. if nil, all lines are chosen
|
72
|
-
# distinct: boolean: return each tuple only once. if nil or false, duplicates are kept
|
73
|
-
|
74
|
-
if table_col_pairs.empty?
|
75
|
-
raise "Zero tables to select from"
|
76
|
-
end
|
77
|
-
|
78
|
-
## SELECT
|
79
|
-
string = "SELECT "
|
80
|
-
|
81
|
-
if var_hash["distinct"]
|
82
|
-
# unique return values?
|
83
|
-
string << "DISTINCT "
|
84
|
-
end
|
85
|
-
|
86
|
-
## column names to select: iterate through table/col pairs
|
87
|
-
at_least_one_column_selected = false
|
88
|
-
string << table_col_pairs.map { |tc|
|
89
|
-
|
90
|
-
if tc.columns == "*"
|
91
|
-
# all columns from this table
|
92
|
-
at_least_one_column_selected = true
|
93
|
-
SQLQuery.prepend_tablename(tc.table_obj.table_name, "*")
|
94
|
-
|
95
|
-
elsif tc.columns.class.to_s == "Array" and not(tc.columns.empty?)
|
96
|
-
# at least one column from this table
|
97
|
-
at_least_one_column_selected = true
|
98
|
-
|
99
|
-
tc.columns.map { |c|
|
100
|
-
if c.nil? or c.empty?
|
101
|
-
raise "Got nil/empty value within the column name list"
|
102
|
-
end
|
103
|
-
|
104
|
-
SQLQuery.prepend_tablename(tc.table_obj.table_name, c)
|
105
|
-
}.join(", " )
|
106
|
-
|
107
|
-
else
|
108
|
-
# no columns from this table
|
109
|
-
nil
|
110
|
-
end
|
111
|
-
}.compact.join(", ")
|
112
|
-
|
113
|
-
|
114
|
-
if not(at_least_one_column_selected)
|
115
|
-
raise "Empty select: zero columns selected"
|
116
|
-
end
|
117
|
-
|
118
|
-
## FROM table name(s)
|
119
|
-
string += " FROM " + table_col_pairs.map { |tc| tc.table_obj.table_name }.join(", ")
|
120
|
-
|
121
|
-
## WHERE row_restrictions
|
122
|
-
unless row_restrictions.nil? or row_restrictions.empty?
|
123
|
-
string += " WHERE "+row_restrictions.map { |restr_obj|
|
124
|
-
# get the actual restriction out of its object
|
125
|
-
# form: name(string) eqsymb(string: =, !=) value(object)
|
126
|
-
name, eqsymb, value = restr_obj.get()
|
127
|
-
if value.nil?
|
128
|
-
raise "SQL query construction error: Nil value for column " + name
|
129
|
-
end
|
130
|
-
unless restr_obj.val_is_variable
|
131
|
-
# value is a value, not a variable name
|
132
|
-
value = SQLQuery.stringify_value(value)
|
133
|
-
end
|
134
|
-
if restr_obj.table_name_included
|
135
|
-
# name already includes table name, if needed
|
136
|
-
name + eqsymb + value
|
137
|
-
else
|
138
|
-
# prepend name of first table in table_col_pairs
|
139
|
-
SQLQuery.prepend_tablename(table_col_pairs.first.table_obj.table_name(), name) + eqsymb + value
|
140
|
-
end
|
141
|
-
}.join(" AND ")
|
142
|
-
end
|
143
|
-
|
144
|
-
|
145
|
-
## LIMIT at_most_that_many_lines
|
146
|
-
if var_hash["line_limit"]
|
147
|
-
string += " LIMIT " + var_hash["line_limit"].to_s
|
148
|
-
end
|
149
|
-
string += ";"
|
150
|
-
|
151
|
-
return string
|
152
|
-
end
|
153
|
-
|
154
|
-
#####
|
155
|
-
# SQLQuery.update
|
156
|
-
#
|
157
|
-
# query created: overwrite several cells in possibly multiple rows of a
|
158
|
-
# database table with new values
|
159
|
-
# rows are selected via row restrictions
|
160
|
-
#
|
161
|
-
# returns: nothing
|
162
|
-
|
163
|
-
# update table01 set field04=19991022, field05=062218 where field01=1;
|
164
|
-
|
165
|
-
def SQLQuery.update(table_name, # string: table name
|
166
|
-
field_value_pairs, # array: string*Object: column name and value
|
167
|
-
row_restrictions # array: ValueRestriction objects: column name and value restriction
|
168
|
-
)
|
169
|
-
string = "UPDATE "+table_name+" SET "+
|
170
|
-
field_value_pairs.map {|field,value|
|
171
|
-
if value.nil?
|
172
|
-
raise "SQL query construction error: Nil value for column " + field
|
173
|
-
end
|
174
|
-
field+"="+SQLQuery.stringify_value(value)}.join(", ") +
|
175
|
-
" WHERE "+row_restrictions.map {|restr_obj|
|
176
|
-
# get the actual restriction out of its object
|
177
|
-
# form: name(string) eqsymb(string: =, !=) value(object)
|
178
|
-
name, eqsymb, value = restr_obj.get()
|
179
|
-
if value.nil?
|
180
|
-
raise "SQL query construction error: Nil value for column " + name
|
181
|
-
end
|
182
|
-
name + eqsymb + SQLQuery.stringify_value(value)
|
183
|
-
}.join(" AND ")
|
184
|
-
string += ";"
|
185
|
-
return string
|
186
|
-
end
|
187
|
-
|
188
|
-
|
189
|
-
#####
|
190
|
-
# SQLQuery.add_columns
|
191
|
-
#
|
192
|
-
# query created: extend given table by
|
193
|
-
# one or more columns given by their names and formats
|
194
|
-
#
|
195
|
-
# returns: string
|
196
|
-
def SQLQuery.add_columns(table_name, # string: table name
|
197
|
-
column_formats) # array: array: string*string [column_name,column_format]
|
198
|
-
|
199
|
-
string = "ALTER TABLE " + table_name
|
200
|
-
string << column_formats.map { |column_name, column_format|
|
201
|
-
" ADD COLUMN " + column_name + " " + column_format
|
202
|
-
}.join(", ")
|
203
|
-
|
204
|
-
string << ";"
|
205
|
-
|
206
|
-
return string
|
207
|
-
end
|
208
|
-
|
209
|
-
#####
|
210
|
-
# SQLQuery.stringify ensures that value is a properly
|
211
|
-
# escaped SQL string
|
212
|
-
#
|
213
|
-
# returns: string
|
214
|
-
def SQLQuery.stringify_value(value) # object
|
215
|
-
if value.class == String
|
216
|
-
return "'" + value.gsub(/"/,"QQUOT0").gsub(/'/, "QQUOT1").gsub(/`/, "QQUOT2") + "'"
|
217
|
-
else
|
218
|
-
return value.to_s
|
219
|
-
end
|
220
|
-
end
|
221
|
-
|
222
|
-
#####
|
223
|
-
# SQLQuery.unstringify undoes the result of stringify_value
|
224
|
-
# please apply only to strings
|
225
|
-
def SQLQuery.unstringify_value(value) # string
|
226
|
-
value.gsub(/QQUOT0/, '"').gsub(/QQUOT1/, "'").gsub(/QQUOT2/, "`")
|
227
|
-
end
|
228
|
-
|
229
|
-
####
|
230
|
-
# SQLQuery.prepend_tablename
|
231
|
-
#
|
232
|
-
# auxiliary method for select:
|
233
|
-
# prepend table name to column name
|
234
|
-
# and if the column name does not already include a table name
|
235
|
-
def SQLQuery.prepend_tablename(table_name,
|
236
|
-
column_name)
|
237
|
-
if not(column_name.include?("."))
|
238
|
-
return table_name + "." + column_name
|
239
|
-
else
|
240
|
-
return column_name
|
241
|
-
end
|
242
|
-
end
|
243
|
-
end
|
Binary file
|
data/lib/ext/maxent/Train.class
DELETED
Binary file
|
data/lib/fred/Baseline.rb
DELETED
@@ -1,150 +0,0 @@
|
|
1
|
-
# Baseline
|
2
|
-
# Katrin Erk April 05
|
3
|
-
#
|
4
|
-
# baseline for WSD:
|
5
|
-
# always assign most frequent sense
|
6
|
-
# The baseline doesn't do binary classifiers.
|
7
|
-
|
8
|
-
require "fred/FredConventions"
|
9
|
-
require "fred/FredSplitPkg"
|
10
|
-
require "fred/FredFeatures"
|
11
|
-
require "fred/FredDetermineTargets"
|
12
|
-
|
13
|
-
class Baseline
|
14
|
-
###
|
15
|
-
# new
|
16
|
-
#
|
17
|
-
# get splitlog dir (if any) along with everything else
|
18
|
-
# because we are only evaluating the training data
|
19
|
-
# at test time
|
20
|
-
#
|
21
|
-
def initialize(exp, # FredConfigData object
|
22
|
-
split_id = nil) # string: split ID
|
23
|
-
@exp = exp
|
24
|
-
@split_id = split_id
|
25
|
-
|
26
|
-
# for each lemma: remember prevalent sense
|
27
|
-
@lemma_to_sense = Hash.new()
|
28
|
-
|
29
|
-
if @split_id
|
30
|
-
split_obj = FredSplitPkg.new(@exp)
|
31
|
-
end
|
32
|
-
|
33
|
-
lemma_done = Hash.new()
|
34
|
-
|
35
|
-
# iterate through lemmas
|
36
|
-
@target_obj = Targets.new(@exp, nil, "r")
|
37
|
-
unless @target_obj.targets_okay
|
38
|
-
# error during initialization
|
39
|
-
$stderr.puts "Error: Could not read list of known targets, bailing out."
|
40
|
-
exit 1
|
41
|
-
end
|
42
|
-
|
43
|
-
@target_obj.get_lemmas().each { |lemmapos|
|
44
|
-
|
45
|
-
if @split_id
|
46
|
-
# read training split of answer keys
|
47
|
-
answer_obj = AnswerKeyAccess.new(@exp, "train", lemmapos, "r", @split_id, "train")
|
48
|
-
else
|
49
|
-
# read full answer key file of training data
|
50
|
-
answer_obj = AnswerKeyAccess.new(@exp, "train", lemmapos, "r")
|
51
|
-
end
|
52
|
-
|
53
|
-
count_senses = Hash.new(0)
|
54
|
-
|
55
|
-
answer_obj.each { |lemma, pos, ids, sid, senses_all, senses_this|
|
56
|
-
# senses_this may include more than one sense for multi-label assignment
|
57
|
-
senses_this.each { |sense|
|
58
|
-
count_senses[sense] += 1
|
59
|
-
}
|
60
|
-
}
|
61
|
-
|
62
|
-
@lemma_to_sense[lemmapos] = count_senses.keys().max { |a, b|
|
63
|
-
count_senses[a] <=> count_senses[b]
|
64
|
-
}
|
65
|
-
}
|
66
|
-
|
67
|
-
|
68
|
-
@lemma = nil
|
69
|
-
end
|
70
|
-
|
71
|
-
###
|
72
|
-
def train(infilename)
|
73
|
-
# no training here
|
74
|
-
end
|
75
|
-
|
76
|
-
###
|
77
|
-
def write(classifier_file)
|
78
|
-
# no classifiers to write
|
79
|
-
end
|
80
|
-
|
81
|
-
def exists?(classifier_file)
|
82
|
-
return true
|
83
|
-
end
|
84
|
-
|
85
|
-
def read(classifier_file)
|
86
|
-
values = deconstruct_fred_classifier_filename(File.basename(classifier_file))
|
87
|
-
@lemma = values["lemma"]
|
88
|
-
if @lemma
|
89
|
-
return true
|
90
|
-
else
|
91
|
-
$stderr.puts "Warning: couldn't determine lemma name in #{classifier_file}, skipping"
|
92
|
-
return false
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
|
97
|
-
def read_resultfile(filename)
|
98
|
-
retv = Array.new()
|
99
|
-
begin
|
100
|
-
f = File.new(filename)
|
101
|
-
rescue
|
102
|
-
raise "Could not read baseline result file #{filename}"
|
103
|
-
end
|
104
|
-
|
105
|
-
f.each { |line|
|
106
|
-
retv << [[ line.chomp(), 1.0 ]]
|
107
|
-
}
|
108
|
-
|
109
|
-
return retv
|
110
|
-
end
|
111
|
-
|
112
|
-
def apply(infilename, outfilename)
|
113
|
-
# open input and output file
|
114
|
-
begin
|
115
|
-
out_f = File.new(outfilename, "w")
|
116
|
-
rescue
|
117
|
-
$stderr.puts "Error: cannot write to classification output file #{outfilename}."
|
118
|
-
exit 1
|
119
|
-
end
|
120
|
-
begin
|
121
|
-
f = File.new(infilename)
|
122
|
-
rescue
|
123
|
-
$stderr.puts "Error: cannot read feature file #{infilename}."
|
124
|
-
exit 1
|
125
|
-
end
|
126
|
-
|
127
|
-
# deconstruct input filename to determine lemma
|
128
|
-
unless @lemma
|
129
|
-
# something went wrong in read()
|
130
|
-
return false
|
131
|
-
end
|
132
|
-
|
133
|
-
# do we have a sense for this?
|
134
|
-
unless (sense = @lemma_to_sense[@lemma])
|
135
|
-
# nope: assign "NONE" (or whatever the null label is here)
|
136
|
-
sense = @exp.get("negsense")
|
137
|
-
unless sense
|
138
|
-
sense = "NONE"
|
139
|
-
end
|
140
|
-
end
|
141
|
-
|
142
|
-
f.each { |line|
|
143
|
-
out_f.puts sense
|
144
|
-
}
|
145
|
-
out_f.close()
|
146
|
-
f.close()
|
147
|
-
|
148
|
-
return true
|
149
|
-
end
|
150
|
-
end
|