shalmaneser 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/shalmaneser +8 -2
  4. data/doc/index.md +1 -0
  5. data/lib/shalmaneser/opt_parser.rb +68 -67
  6. metadata +49 -119
  7. data/bin/fred +0 -16
  8. data/bin/frprep +0 -34
  9. data/bin/rosy +0 -17
  10. data/lib/common/AbstractSynInterface.rb +0 -1229
  11. data/lib/common/Counter.rb +0 -18
  12. data/lib/common/EnduserMode.rb +0 -27
  13. data/lib/common/Eval.rb +0 -480
  14. data/lib/common/FixSynSemMapping.rb +0 -196
  15. data/lib/common/Graph.rb +0 -345
  16. data/lib/common/ISO-8859-1.rb +0 -24
  17. data/lib/common/ML.rb +0 -186
  18. data/lib/common/Mallet.rb +0 -236
  19. data/lib/common/Maxent.rb +0 -229
  20. data/lib/common/Optimise.rb +0 -195
  21. data/lib/common/Parser.rb +0 -213
  22. data/lib/common/RegXML.rb +0 -269
  23. data/lib/common/RosyConventions.rb +0 -171
  24. data/lib/common/STXmlTerminalOrder.rb +0 -194
  25. data/lib/common/SalsaTigerRegXML.rb +0 -2347
  26. data/lib/common/SalsaTigerXMLHelper.rb +0 -99
  27. data/lib/common/SynInterfaces.rb +0 -282
  28. data/lib/common/TabFormat.rb +0 -721
  29. data/lib/common/Tiger.rb +0 -1448
  30. data/lib/common/Timbl.rb +0 -144
  31. data/lib/common/Tree.rb +0 -61
  32. data/lib/common/config_data.rb +0 -470
  33. data/lib/common/config_format_element.rb +0 -220
  34. data/lib/common/headz.rb +0 -338
  35. data/lib/common/option_parser.rb +0 -13
  36. data/lib/common/prep_config_data.rb +0 -62
  37. data/lib/common/prep_helper.rb +0 -1330
  38. data/lib/common/ruby_class_extensions.rb +0 -310
  39. data/lib/db/db_interface.rb +0 -48
  40. data/lib/db/db_mysql.rb +0 -145
  41. data/lib/db/db_sqlite.rb +0 -280
  42. data/lib/db/db_table.rb +0 -239
  43. data/lib/db/db_wrapper.rb +0 -176
  44. data/lib/db/sql_query.rb +0 -243
  45. data/lib/ext/maxent/Classify.class +0 -0
  46. data/lib/ext/maxent/Train.class +0 -0
  47. data/lib/fred/Baseline.rb +0 -150
  48. data/lib/fred/FileZipped.rb +0 -31
  49. data/lib/fred/FredBOWContext.rb +0 -877
  50. data/lib/fred/FredConventions.rb +0 -232
  51. data/lib/fred/FredDetermineTargets.rb +0 -319
  52. data/lib/fred/FredEval.rb +0 -312
  53. data/lib/fred/FredFeatureExtractors.rb +0 -322
  54. data/lib/fred/FredFeatures.rb +0 -1061
  55. data/lib/fred/FredFeaturize.rb +0 -602
  56. data/lib/fred/FredNumTrainingSenses.rb +0 -27
  57. data/lib/fred/FredParameters.rb +0 -402
  58. data/lib/fred/FredSplit.rb +0 -84
  59. data/lib/fred/FredSplitPkg.rb +0 -180
  60. data/lib/fred/FredTest.rb +0 -606
  61. data/lib/fred/FredTrain.rb +0 -144
  62. data/lib/fred/PlotAndREval.rb +0 -480
  63. data/lib/fred/fred.rb +0 -47
  64. data/lib/fred/fred_config_data.rb +0 -185
  65. data/lib/fred/md5.rb +0 -23
  66. data/lib/fred/opt_parser.rb +0 -250
  67. data/lib/frprep/Ampersand.rb +0 -39
  68. data/lib/frprep/CollinsInterface.rb +0 -1165
  69. data/lib/frprep/Counter.rb +0 -18
  70. data/lib/frprep/FNCorpusXML.rb +0 -643
  71. data/lib/frprep/FNDatabase.rb +0 -144
  72. data/lib/frprep/FrameXML.rb +0 -513
  73. data/lib/frprep/Graph.rb +0 -345
  74. data/lib/frprep/MiniparInterface.rb +0 -1388
  75. data/lib/frprep/RegXML.rb +0 -269
  76. data/lib/frprep/STXmlTerminalOrder.rb +0 -194
  77. data/lib/frprep/SleepyInterface.rb +0 -384
  78. data/lib/frprep/TntInterface.rb +0 -44
  79. data/lib/frprep/TreetaggerInterface.rb +0 -327
  80. data/lib/frprep/do_parses.rb +0 -143
  81. data/lib/frprep/frprep.rb +0 -693
  82. data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
  83. data/lib/frprep/interfaces/stanford_interface.rb +0 -353
  84. data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
  85. data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
  86. data/lib/frprep/one_parsed_file.rb +0 -28
  87. data/lib/frprep/opt_parser.rb +0 -94
  88. data/lib/frprep/ruby_class_extensions.rb +0 -310
  89. data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
  90. data/lib/rosy/ExternalConfigData.rb +0 -58
  91. data/lib/rosy/FailedParses.rb +0 -130
  92. data/lib/rosy/FeatureInfo.rb +0 -242
  93. data/lib/rosy/GfInduce.rb +0 -1115
  94. data/lib/rosy/GfInduceFeature.rb +0 -148
  95. data/lib/rosy/InputData.rb +0 -294
  96. data/lib/rosy/RosyConfusability.rb +0 -338
  97. data/lib/rosy/RosyEval.rb +0 -465
  98. data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
  99. data/lib/rosy/RosyFeaturize.rb +0 -281
  100. data/lib/rosy/RosyInspect.rb +0 -336
  101. data/lib/rosy/RosyIterator.rb +0 -478
  102. data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
  103. data/lib/rosy/RosyPruning.rb +0 -165
  104. data/lib/rosy/RosyServices.rb +0 -744
  105. data/lib/rosy/RosySplit.rb +0 -232
  106. data/lib/rosy/RosyTask.rb +0 -19
  107. data/lib/rosy/RosyTest.rb +0 -829
  108. data/lib/rosy/RosyTrain.rb +0 -234
  109. data/lib/rosy/RosyTrainingTestTable.rb +0 -787
  110. data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
  111. data/lib/rosy/View.rb +0 -418
  112. data/lib/rosy/opt_parser.rb +0 -379
  113. data/lib/rosy/rosy.rb +0 -78
  114. data/lib/rosy/rosy_config_data.rb +0 -121
  115. data/lib/shalmaneser/version.rb +0 -3
@@ -1,176 +0,0 @@
1
- ###########################
2
- # DBWrapper:
3
- # abstract class wrapping database interfaces,
4
- # so we can have both an interface to an SQL server
5
- # and an interface to SQLite in Shalmaneser
6
- class DBWrapper
7
- attr_reader :table_name
8
-
9
- ###
10
- def initialize(exp) # RosyConfigData experiment file object
11
- # remember experiment file
12
- @exp = exp
13
-
14
- # open the database:
15
- # please set to some other value in subclass initialization
16
- @database = nil
17
-
18
- # name of default table to access: none
19
- @table_name = nil
20
- end
21
-
22
- ###
23
- # close DB access
24
- def close()
25
- @database.close()
26
- end
27
-
28
- ####
29
- # querying the database:
30
- # returns an DBResult object
31
- def query(query)
32
- raise "Overwrite me"
33
- end
34
-
35
- ####
36
- # querying the database:
37
- # no result value
38
- def query_noretv(query)
39
- raise "Overwrite me"
40
- end
41
-
42
- ###
43
- # list all tables in the database:
44
- # no default here
45
- #
46
- # returns: list of strings
47
- def list_tables()
48
- raise "Overwrite me"
49
- end
50
-
51
- ###
52
- # make a table
53
- #
54
- # returns: nothing
55
- def create_table(table_name, # string
56
- column_formats, # array: array: string*string [column_name,column_format]
57
- index_column_names, # array: string: column_name
58
- indexname) # string: name of automatically created index column
59
- raise "overwrite me"
60
- end
61
-
62
- ###
63
- # remove a table
64
- def drop_table(table_name)
65
- query_noretv("DROP TABLE " + table_name)
66
- end
67
-
68
- ###
69
- # list all column names of a table (no default)
70
- #
71
- # returns: array of strings
72
- def list_column_names(table_name)
73
- return list_column_formats(table_name).map { |col_name, col_format| col_name }
74
- end
75
-
76
- #####
77
- # list_column_formats
78
- #
79
- # list column names and column types of this table
80
- #
81
- # returns: array:string*string, list of pairs [column name, column format]
82
- def list_column_formats(table_name)
83
- raise "Overwrite me"
84
- end
85
-
86
- ####
87
- # num_rows
88
- #
89
- # determine the number of rows in a table
90
- # returns: integer
91
- def num_rows(table_name)
92
- raise "Overwrite me"
93
- end
94
-
95
- ####
96
- # make a temporary table: basically just make a table
97
- #
98
- # returns: DBWrapper object (or object of current subclass)
99
- # that has the @table_name attribute set to the name of a temporary DB
100
- def make_temp_table(column_formats, # array: string*string [column_name,column_format]
101
- index_column_names, # array: string: column_name
102
- indexname) # string: name of autoincrement primary index
103
-
104
- temp_obj = self.clone()
105
- temp_obj.initialize_temp_table(column_formats, index_column_names, indexname)
106
- return temp_obj
107
- end
108
-
109
- def drop_temp_table()
110
- unless @table_name
111
- raise "can only do drop_temp_table() for objects that have a temp table"
112
- end
113
- drop_table(@table_name)
114
- end
115
-
116
- ##############################
117
- protected
118
-
119
- def initialize_temp_table(column_formats, index_column_names, indexname)
120
- @table_name = "t" + Time.new().to_f().to_s().gsub(/\./, "")
121
- create_table(@table_name, column_formats, index_column_names, indexname)
122
- end
123
- end
124
-
125
-
126
-
127
-
128
- ######################################################################
129
- # DBResult:
130
- # abstract class keeping query results
131
- #
132
- # instantiate for the DB package used
133
- class DBResult
134
- ###
135
- # initialize with query result, and keep it
136
- def initialize(value)
137
- @result = value
138
- end
139
-
140
- # column names: NO DEFAULT
141
- def list_column_names()
142
- raise "Overwrite me"
143
- end
144
-
145
- # number of rows: returns an integer
146
- def num_rows()
147
- return @result.num_rows
148
- end
149
-
150
- # yields each row as an array of values
151
- def each()
152
- @result.each { |row| yield row }
153
- end
154
-
155
- # yields each row as a hash: column name=> column value
156
- def each_hash()
157
- @result.each_hash { |row_hash| yield row_hash }
158
- end
159
-
160
- # reset object, such that each() can be run again
161
- # DEFAULT DOES NOTHING, PLEASE OVERWRITE
162
- def reset()
163
- end
164
-
165
- # free result object
166
- def free()
167
- @result.free()
168
- end
169
-
170
- # returns row as an array of column contents
171
- def fetch_row()
172
- return @result.fetch_row()
173
- end
174
-
175
- end
176
-
@@ -1,243 +0,0 @@
1
- # class SQLQuery
2
- # KE, SP 27.1.05
3
- #
4
- # provides static methods that generate SQL queries as strings
5
- # that can then be passed on to the database
6
-
7
- require "common/ruby_class_extensions"
8
-
9
- require "common/RosyConventions"
10
-
11
- class SQLQuery
12
-
13
-
14
- #####
15
- # SQLQuery.insert
16
- #
17
- # query created: insert a new row into a given database table
18
- # the new row is given as a list of pairs [column_name, value]
19
- #
20
- # returns: string
21
- def SQLQuery.insert(table_name, # string: table name
22
- field_value_pairs) # array: string*object [column_name, cell_value]
23
-
24
- # example:
25
- # insert into table01 (field01,field02,field03,field04,field05) values
26
- # (2, 'second', 'another', '1999-10-23', '10:30:00');
27
-
28
- string = "INSERT INTO " + table_name + "("+
29
- field_value_pairs.map { |column_name, cell_value|
30
- column_name
31
- }.join(",") +
32
- ") VALUES (" +
33
- field_value_pairs.map { |column_name, cell_value|
34
- if cell_value.nil?
35
- raise "SQL query construction error: Nil value for column " + column_name
36
- end
37
- SQLQuery.stringify_value(cell_value)
38
- }.join(",") + ");"
39
-
40
- return string
41
- end
42
-
43
- #####
44
- # SQLQuery.select
45
- #
46
- # query created: select from given database tables
47
- # all column entries that conform to the given description:
48
- # - names of the columns to be selected (or the string "*")
49
- # - only those column entries where the row matches the given
50
- # row restrictions: [column_name, column_value] => WHERE column_name IS column_value
51
- # - optionally, at most N lines => LIMIT N
52
- # - If more than one DB table is named, make a join
53
- # - Value restrictions: If it doesn't say which DB table to use,
54
- # use the first one listed in table_col_pairs
55
- #
56
- # Use with only one database table creates queries like e.g.
57
- # SELECT column1, column2 FROM table WHERE column3=val3 AND column4!=val4
58
- #
59
- # or:
60
- # SELECT DISTINCT column1, column2 FROM table WHERE column3=val3 AND column4!=val4 LIMIT 10
61
- #
62
- # Use with 2 SelectTableAndColumns entries creates queries like
63
- # SELECT table1.column1, table1.column2 FROM table1, table2 WHERE table1.column1=val3 AND table1.id=table2.id
64
- #
65
- #
66
- # returns: string.
67
- # raises an error if no columns at all are selected
68
- def SQLQuery.select(table_col_pairs, # Array: SelectTableAndColumns
69
- row_restrictions, # array: ValueRestriction objects
70
- var_hash = {}) # further parameters:
71
- # line_limit: integer: select at most N lines. if nil, all lines are chosen
72
- # distinct: boolean: return each tuple only once. if nil or false, duplicates are kept
73
-
74
- if table_col_pairs.empty?
75
- raise "Zero tables to select from"
76
- end
77
-
78
- ## SELECT
79
- string = "SELECT "
80
-
81
- if var_hash["distinct"]
82
- # unique return values?
83
- string << "DISTINCT "
84
- end
85
-
86
- ## column names to select: iterate through table/col pairs
87
- at_least_one_column_selected = false
88
- string << table_col_pairs.map { |tc|
89
-
90
- if tc.columns == "*"
91
- # all columns from this table
92
- at_least_one_column_selected = true
93
- SQLQuery.prepend_tablename(tc.table_obj.table_name, "*")
94
-
95
- elsif tc.columns.class.to_s == "Array" and not(tc.columns.empty?)
96
- # at least one column from this table
97
- at_least_one_column_selected = true
98
-
99
- tc.columns.map { |c|
100
- if c.nil? or c.empty?
101
- raise "Got nil/empty value within the column name list"
102
- end
103
-
104
- SQLQuery.prepend_tablename(tc.table_obj.table_name, c)
105
- }.join(", " )
106
-
107
- else
108
- # no columns from this table
109
- nil
110
- end
111
- }.compact.join(", ")
112
-
113
-
114
- if not(at_least_one_column_selected)
115
- raise "Empty select: zero columns selected"
116
- end
117
-
118
- ## FROM table name(s)
119
- string += " FROM " + table_col_pairs.map { |tc| tc.table_obj.table_name }.join(", ")
120
-
121
- ## WHERE row_restrictions
122
- unless row_restrictions.nil? or row_restrictions.empty?
123
- string += " WHERE "+row_restrictions.map { |restr_obj|
124
- # get the actual restriction out of its object
125
- # form: name(string) eqsymb(string: =, !=) value(object)
126
- name, eqsymb, value = restr_obj.get()
127
- if value.nil?
128
- raise "SQL query construction error: Nil value for column " + name
129
- end
130
- unless restr_obj.val_is_variable
131
- # value is a value, not a variable name
132
- value = SQLQuery.stringify_value(value)
133
- end
134
- if restr_obj.table_name_included
135
- # name already includes table name, if needed
136
- name + eqsymb + value
137
- else
138
- # prepend name of first table in table_col_pairs
139
- SQLQuery.prepend_tablename(table_col_pairs.first.table_obj.table_name(), name) + eqsymb + value
140
- end
141
- }.join(" AND ")
142
- end
143
-
144
-
145
- ## LIMIT at_most_that_many_lines
146
- if var_hash["line_limit"]
147
- string += " LIMIT " + var_hash["line_limit"].to_s
148
- end
149
- string += ";"
150
-
151
- return string
152
- end
153
-
154
- #####
155
- # SQLQuery.update
156
- #
157
- # query created: overwrite several cells in possibly multiple rows of a
158
- # database table with new values
159
- # rows are selected via row restrictions
160
- #
161
- # returns: nothing
162
-
163
- # update table01 set field04=19991022, field05=062218 where field01=1;
164
-
165
- def SQLQuery.update(table_name, # string: table name
166
- field_value_pairs, # array: string*Object: column name and value
167
- row_restrictions # array: ValueRestriction objects: column name and value restriction
168
- )
169
- string = "UPDATE "+table_name+" SET "+
170
- field_value_pairs.map {|field,value|
171
- if value.nil?
172
- raise "SQL query construction error: Nil value for column " + field
173
- end
174
- field+"="+SQLQuery.stringify_value(value)}.join(", ") +
175
- " WHERE "+row_restrictions.map {|restr_obj|
176
- # get the actual restriction out of its object
177
- # form: name(string) eqsymb(string: =, !=) value(object)
178
- name, eqsymb, value = restr_obj.get()
179
- if value.nil?
180
- raise "SQL query construction error: Nil value for column " + name
181
- end
182
- name + eqsymb + SQLQuery.stringify_value(value)
183
- }.join(" AND ")
184
- string += ";"
185
- return string
186
- end
187
-
188
-
189
- #####
190
- # SQLQuery.add_columns
191
- #
192
- # query created: extend given table by
193
- # one or more columns given by their names and formats
194
- #
195
- # returns: string
196
- def SQLQuery.add_columns(table_name, # string: table name
197
- column_formats) # array: array: string*string [column_name,column_format]
198
-
199
- string = "ALTER TABLE " + table_name
200
- string << column_formats.map { |column_name, column_format|
201
- " ADD COLUMN " + column_name + " " + column_format
202
- }.join(", ")
203
-
204
- string << ";"
205
-
206
- return string
207
- end
208
-
209
- #####
210
- # SQLQuery.stringify ensures that value is a properly
211
- # escaped SQL string
212
- #
213
- # returns: string
214
- def SQLQuery.stringify_value(value) # object
215
- if value.class == String
216
- return "'" + value.gsub(/"/,"QQUOT0").gsub(/'/, "QQUOT1").gsub(/`/, "QQUOT2") + "'"
217
- else
218
- return value.to_s
219
- end
220
- end
221
-
222
- #####
223
- # SQLQuery.unstringify undoes the result of stringify_value
224
- # please apply only to strings
225
- def SQLQuery.unstringify_value(value) # string
226
- value.gsub(/QQUOT0/, '"').gsub(/QQUOT1/, "'").gsub(/QQUOT2/, "`")
227
- end
228
-
229
- ####
230
- # SQLQuery.prepend_tablename
231
- #
232
- # auxiliary method for select:
233
- # prepend table name to column name
234
- # and if the column name does not already include a table name
235
- def SQLQuery.prepend_tablename(table_name,
236
- column_name)
237
- if not(column_name.include?("."))
238
- return table_name + "." + column_name
239
- else
240
- return column_name
241
- end
242
- end
243
- end
Binary file
@@ -1,150 +0,0 @@
1
- # Baseline
2
- # Katrin Erk April 05
3
- #
4
- # baseline for WSD:
5
- # always assign most frequent sense
6
- # The baseline doesn't do binary classifiers.
7
-
8
- require "fred/FredConventions"
9
- require "fred/FredSplitPkg"
10
- require "fred/FredFeatures"
11
- require "fred/FredDetermineTargets"
12
-
13
- class Baseline
14
- ###
15
- # new
16
- #
17
- # get splitlog dir (if any) along with everything else
18
- # because we are only evaluating the training data
19
- # at test time
20
- #
21
- def initialize(exp, # FredConfigData object
22
- split_id = nil) # string: split ID
23
- @exp = exp
24
- @split_id = split_id
25
-
26
- # for each lemma: remember prevalent sense
27
- @lemma_to_sense = Hash.new()
28
-
29
- if @split_id
30
- split_obj = FredSplitPkg.new(@exp)
31
- end
32
-
33
- lemma_done = Hash.new()
34
-
35
- # iterate through lemmas
36
- @target_obj = Targets.new(@exp, nil, "r")
37
- unless @target_obj.targets_okay
38
- # error during initialization
39
- $stderr.puts "Error: Could not read list of known targets, bailing out."
40
- exit 1
41
- end
42
-
43
- @target_obj.get_lemmas().each { |lemmapos|
44
-
45
- if @split_id
46
- # read training split of answer keys
47
- answer_obj = AnswerKeyAccess.new(@exp, "train", lemmapos, "r", @split_id, "train")
48
- else
49
- # read full answer key file of training data
50
- answer_obj = AnswerKeyAccess.new(@exp, "train", lemmapos, "r")
51
- end
52
-
53
- count_senses = Hash.new(0)
54
-
55
- answer_obj.each { |lemma, pos, ids, sid, senses_all, senses_this|
56
- # senses_this may include more than one sense for multi-label assignment
57
- senses_this.each { |sense|
58
- count_senses[sense] += 1
59
- }
60
- }
61
-
62
- @lemma_to_sense[lemmapos] = count_senses.keys().max { |a, b|
63
- count_senses[a] <=> count_senses[b]
64
- }
65
- }
66
-
67
-
68
- @lemma = nil
69
- end
70
-
71
- ###
72
- def train(infilename)
73
- # no training here
74
- end
75
-
76
- ###
77
- def write(classifier_file)
78
- # no classifiers to write
79
- end
80
-
81
- def exists?(classifier_file)
82
- return true
83
- end
84
-
85
- def read(classifier_file)
86
- values = deconstruct_fred_classifier_filename(File.basename(classifier_file))
87
- @lemma = values["lemma"]
88
- if @lemma
89
- return true
90
- else
91
- $stderr.puts "Warning: couldn't determine lemma name in #{classifier_file}, skipping"
92
- return false
93
- end
94
- end
95
-
96
-
97
- def read_resultfile(filename)
98
- retv = Array.new()
99
- begin
100
- f = File.new(filename)
101
- rescue
102
- raise "Could not read baseline result file #{filename}"
103
- end
104
-
105
- f.each { |line|
106
- retv << [[ line.chomp(), 1.0 ]]
107
- }
108
-
109
- return retv
110
- end
111
-
112
- def apply(infilename, outfilename)
113
- # open input and output file
114
- begin
115
- out_f = File.new(outfilename, "w")
116
- rescue
117
- $stderr.puts "Error: cannot write to classification output file #{outfilename}."
118
- exit 1
119
- end
120
- begin
121
- f = File.new(infilename)
122
- rescue
123
- $stderr.puts "Error: cannot read feature file #{infilename}."
124
- exit 1
125
- end
126
-
127
- # deconstruct input filename to determine lemma
128
- unless @lemma
129
- # something went wrong in read()
130
- return false
131
- end
132
-
133
- # do we have a sense for this?
134
- unless (sense = @lemma_to_sense[@lemma])
135
- # nope: assign "NONE" (or whatever the null label is here)
136
- sense = @exp.get("negsense")
137
- unless sense
138
- sense = "NONE"
139
- end
140
- end
141
-
142
- f.each { |line|
143
- out_f.puts sense
144
- }
145
- out_f.close()
146
- f.close()
147
-
148
- return true
149
- end
150
- end