shalmaneser 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/shalmaneser +8 -2
  4. data/doc/index.md +1 -0
  5. data/lib/shalmaneser/opt_parser.rb +68 -67
  6. metadata +49 -119
  7. data/bin/fred +0 -16
  8. data/bin/frprep +0 -34
  9. data/bin/rosy +0 -17
  10. data/lib/common/AbstractSynInterface.rb +0 -1229
  11. data/lib/common/Counter.rb +0 -18
  12. data/lib/common/EnduserMode.rb +0 -27
  13. data/lib/common/Eval.rb +0 -480
  14. data/lib/common/FixSynSemMapping.rb +0 -196
  15. data/lib/common/Graph.rb +0 -345
  16. data/lib/common/ISO-8859-1.rb +0 -24
  17. data/lib/common/ML.rb +0 -186
  18. data/lib/common/Mallet.rb +0 -236
  19. data/lib/common/Maxent.rb +0 -229
  20. data/lib/common/Optimise.rb +0 -195
  21. data/lib/common/Parser.rb +0 -213
  22. data/lib/common/RegXML.rb +0 -269
  23. data/lib/common/RosyConventions.rb +0 -171
  24. data/lib/common/STXmlTerminalOrder.rb +0 -194
  25. data/lib/common/SalsaTigerRegXML.rb +0 -2347
  26. data/lib/common/SalsaTigerXMLHelper.rb +0 -99
  27. data/lib/common/SynInterfaces.rb +0 -282
  28. data/lib/common/TabFormat.rb +0 -721
  29. data/lib/common/Tiger.rb +0 -1448
  30. data/lib/common/Timbl.rb +0 -144
  31. data/lib/common/Tree.rb +0 -61
  32. data/lib/common/config_data.rb +0 -470
  33. data/lib/common/config_format_element.rb +0 -220
  34. data/lib/common/headz.rb +0 -338
  35. data/lib/common/option_parser.rb +0 -13
  36. data/lib/common/prep_config_data.rb +0 -62
  37. data/lib/common/prep_helper.rb +0 -1330
  38. data/lib/common/ruby_class_extensions.rb +0 -310
  39. data/lib/db/db_interface.rb +0 -48
  40. data/lib/db/db_mysql.rb +0 -145
  41. data/lib/db/db_sqlite.rb +0 -280
  42. data/lib/db/db_table.rb +0 -239
  43. data/lib/db/db_wrapper.rb +0 -176
  44. data/lib/db/sql_query.rb +0 -243
  45. data/lib/ext/maxent/Classify.class +0 -0
  46. data/lib/ext/maxent/Train.class +0 -0
  47. data/lib/fred/Baseline.rb +0 -150
  48. data/lib/fred/FileZipped.rb +0 -31
  49. data/lib/fred/FredBOWContext.rb +0 -877
  50. data/lib/fred/FredConventions.rb +0 -232
  51. data/lib/fred/FredDetermineTargets.rb +0 -319
  52. data/lib/fred/FredEval.rb +0 -312
  53. data/lib/fred/FredFeatureExtractors.rb +0 -322
  54. data/lib/fred/FredFeatures.rb +0 -1061
  55. data/lib/fred/FredFeaturize.rb +0 -602
  56. data/lib/fred/FredNumTrainingSenses.rb +0 -27
  57. data/lib/fred/FredParameters.rb +0 -402
  58. data/lib/fred/FredSplit.rb +0 -84
  59. data/lib/fred/FredSplitPkg.rb +0 -180
  60. data/lib/fred/FredTest.rb +0 -606
  61. data/lib/fred/FredTrain.rb +0 -144
  62. data/lib/fred/PlotAndREval.rb +0 -480
  63. data/lib/fred/fred.rb +0 -47
  64. data/lib/fred/fred_config_data.rb +0 -185
  65. data/lib/fred/md5.rb +0 -23
  66. data/lib/fred/opt_parser.rb +0 -250
  67. data/lib/frprep/Ampersand.rb +0 -39
  68. data/lib/frprep/CollinsInterface.rb +0 -1165
  69. data/lib/frprep/Counter.rb +0 -18
  70. data/lib/frprep/FNCorpusXML.rb +0 -643
  71. data/lib/frprep/FNDatabase.rb +0 -144
  72. data/lib/frprep/FrameXML.rb +0 -513
  73. data/lib/frprep/Graph.rb +0 -345
  74. data/lib/frprep/MiniparInterface.rb +0 -1388
  75. data/lib/frprep/RegXML.rb +0 -269
  76. data/lib/frprep/STXmlTerminalOrder.rb +0 -194
  77. data/lib/frprep/SleepyInterface.rb +0 -384
  78. data/lib/frprep/TntInterface.rb +0 -44
  79. data/lib/frprep/TreetaggerInterface.rb +0 -327
  80. data/lib/frprep/do_parses.rb +0 -143
  81. data/lib/frprep/frprep.rb +0 -693
  82. data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
  83. data/lib/frprep/interfaces/stanford_interface.rb +0 -353
  84. data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
  85. data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
  86. data/lib/frprep/one_parsed_file.rb +0 -28
  87. data/lib/frprep/opt_parser.rb +0 -94
  88. data/lib/frprep/ruby_class_extensions.rb +0 -310
  89. data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
  90. data/lib/rosy/ExternalConfigData.rb +0 -58
  91. data/lib/rosy/FailedParses.rb +0 -130
  92. data/lib/rosy/FeatureInfo.rb +0 -242
  93. data/lib/rosy/GfInduce.rb +0 -1115
  94. data/lib/rosy/GfInduceFeature.rb +0 -148
  95. data/lib/rosy/InputData.rb +0 -294
  96. data/lib/rosy/RosyConfusability.rb +0 -338
  97. data/lib/rosy/RosyEval.rb +0 -465
  98. data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
  99. data/lib/rosy/RosyFeaturize.rb +0 -281
  100. data/lib/rosy/RosyInspect.rb +0 -336
  101. data/lib/rosy/RosyIterator.rb +0 -478
  102. data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
  103. data/lib/rosy/RosyPruning.rb +0 -165
  104. data/lib/rosy/RosyServices.rb +0 -744
  105. data/lib/rosy/RosySplit.rb +0 -232
  106. data/lib/rosy/RosyTask.rb +0 -19
  107. data/lib/rosy/RosyTest.rb +0 -829
  108. data/lib/rosy/RosyTrain.rb +0 -234
  109. data/lib/rosy/RosyTrainingTestTable.rb +0 -787
  110. data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
  111. data/lib/rosy/View.rb +0 -418
  112. data/lib/rosy/opt_parser.rb +0 -379
  113. data/lib/rosy/rosy.rb +0 -78
  114. data/lib/rosy/rosy_config_data.rb +0 -121
  115. data/lib/shalmaneser/version.rb +0 -3
@@ -1,280 +0,0 @@
1
- # DBSQLite: a subclass of DBWrapper.
2
- #
3
- # Use SQLite to access a database.
4
- # Use the Ruby sqlite3 interface package for that.
5
-
6
- require 'sqlite3'
7
- require 'tempfile'
8
-
9
- require 'db/db_wrapper'
10
-
11
- #################
12
- class DBSQLiteResult < DBResult
13
- # initialize with the result of SQLite::execute()
14
- # which returns an array of rows
15
- # Each row is an array
16
- # but additionally has attributes
17
- # - fields: returns an array of strings, the column names
18
- # - types: returns an array of strings, the column types
19
- def initialize(value)
20
- super(value)
21
- @counter = 0
22
- end
23
-
24
- ###
25
- # column names: list of strings
26
- def list_column_names()
27
- return @result.columns
28
- end
29
-
30
- # number of rows: returns an integer
31
- def num_rows()
32
- # remember where we were in iterating over items
33
- tmp_counter = @counter
34
-
35
- # reset, and iterate over all rows to count
36
- reset()
37
- retv = 0
38
- each { |x| retv += 1}
39
-
40
- # return to where we were in iterating over items
41
- reset()
42
- while @counter < tmp_counter
43
- @result.next()
44
- @counter += 1
45
- end
46
-
47
- # and return the number of rows
48
- return retv
49
- end
50
-
51
-
52
- # yields each row as an array of values
53
- def each()
54
- @result.each { |row|
55
- @counter += 1
56
- yield row.map { |x| x.to_s() }
57
- }
58
- end
59
-
60
- # yields each row as a hash: column name=> column value
61
- def each_hash()
62
- @result.each { |row|
63
- @counter += 1
64
-
65
- row_hash = Hash.new()
66
- row.fields.each_with_index { |key, index|
67
- row_hash[key] = row[index].to_s()
68
- }
69
- yield row_hash
70
- }
71
- end
72
-
73
-
74
- ###
75
- # reset such that each() can be run again on the result object
76
- def reset()
77
- @result.reset()
78
- @counter = 0
79
- end
80
-
81
- # free object
82
- def free()
83
- @result.close()
84
- end
85
-
86
- # returns row as an array of column contents
87
- def fetch_row()
88
- @counter += 1
89
- return @result.next()
90
- end
91
- end
92
-
93
- #################
94
- class DBSQLite < DBWrapper
95
-
96
- ###
97
- # initialization:
98
- #
99
- # open database file according to the given identifier
100
- def initialize(exp, # RosyConfigData experiment file object
101
- dir = nil, # string: directory for Shalmaneser internal data, ends in "/"
102
- identifier = nil) # string: identifier to use for the database
103
- super(exp)
104
-
105
- # dir and identifier may be nil, if we're only opening this object
106
- # in order to make temp databases
107
- if dir and identifier
108
- @database = SQLite3::Database.new(dir + identifier.to_s + ".db")
109
- else
110
- @database = nil
111
- end
112
-
113
- # temp file for temp database
114
- @tf = nil
115
- end
116
-
117
- ###
118
- # make a table
119
- #
120
- # returns: nothing
121
- def create_table(table_name, # string
122
- column_formats, # array: array: string*string [column_name,column_format]
123
- index_column_names, # array: string: column_name
124
- indexname) # string: name of automatically created index column
125
-
126
- # primary key and auto-increment column
127
- string = "CREATE TABLE #{table_name} (" +
128
- "#{indexname} INTEGER PRIMARY KEY"
129
-
130
- # column declarations
131
- unless column_formats.empty?
132
- string << ", "
133
- string << column_formats.map { |name, format|
134
- # include other keys
135
- if index_column_names.include? name
136
- name.to_s + " KEY " + format.to_s
137
- else
138
- name.to_s + " " + format.to_s
139
- end
140
- }.join(",")
141
- end
142
- string << ");"
143
-
144
- query_noretv(string)
145
- end
146
-
147
- ###
148
- # remove a table
149
- def drop_table(table_name)
150
- query_noretv("DROP TABLE " + table_name)
151
- end
152
-
153
- ###
154
- def query(query)
155
- if @database
156
- return DBSQLiteResult.new(@database.query(query))
157
- else
158
- return nil
159
- end
160
- end
161
-
162
- ####
163
- # querying the database:
164
- # no result value
165
- def query_noretv(query)
166
- if @database
167
- @database.execute(query)
168
- end
169
- return nil
170
- end
171
-
172
- ###
173
- # list all tables in the database
174
- #
175
- # array of strings
176
- def list_tables()
177
- if @database
178
- return @database.execute("select name from sqlite_master;").map { |t|
179
- t.to_s()
180
- }
181
- else
182
- return nil
183
- end
184
- end
185
-
186
- #####
187
- # list_column_formats
188
- #
189
- # list column names and column types of this table
190
- #
191
- # returns: array:string*string, list of pairs [column name, column format]
192
- def list_column_formats(table_name)
193
- unless @database
194
- return nil
195
- end
196
-
197
- table_descr = @database.execute("select * from sqlite_master where name=='#{table_name}';")
198
- # this is an array of pieces of table description.
199
- # the piece in the column called 'sql' is the 'create' statement.
200
- # get the 'create' statement
201
- begin
202
- field_names = table_descr[0].fields
203
- rescue
204
- $stderr.puts "SQLite error: could not read description of table #{table_name}"
205
- exit 1
206
- end
207
- create_index = (0..field_names.length()).detect { |ix| field_names[ix] == 'sql' }
208
-
209
- # try to parse column names out of the 'create' statement
210
- if table_descr[0][create_index] =~ /^\s*create table \S+\s*\((.*)\)\s*$/i
211
- # we now have something of shape ' a key varchar2(30), b varchar2(30)'
212
- # split at the comma, remove whitespace at beginning and end
213
- # then split again to get pairs [column name, column format]
214
- return $1.split(",").map { |col_descrip|
215
- pieces = col_descrip.strip().split().reject { |entry|
216
- entry =~ /^key$/i or entry =~ /^primary$/i
217
- }
218
- if pieces.length() > 2
219
- $stderr.puts "Warning: problematic column format in #{col_descrip}, may be parsed wrong."
220
- end
221
- pieces
222
- }
223
- else
224
- $stderr.puts "SQLite error: cannot read column names"
225
- exit 1
226
- end
227
- end
228
-
229
- ####
230
- # num_rows
231
- #
232
- # determine the number of rows in a table
233
- # returns: integer or nil
234
- def num_rows(table_name)
235
- unless @database
236
- return nil
237
- end
238
-
239
- rows_s = @database.get_first_value( "select count(*) from #{table_name}" )
240
- if rows_s
241
- return rows_s.to_i()
242
- else
243
- return nil
244
- end
245
- end
246
-
247
- ####
248
- # make a temporary table: make a table in a new, temporary file
249
- #
250
- # returns: DBWrapper object (or object of current subclass)
251
- # that has the @table_name attribute set to the name of a temporary DB
252
- #
253
- # same as in superclass
254
- #
255
- # def make_temp_table(column_formats, # array: string*string [column_name,column_format]
256
- # index_column_names, # array: string: column_name
257
- # indexname) # string: name of autoincrement primary index
258
-
259
- # temp_obj = self.clone()
260
- # temp.initialize_temp_table(column_formats, index_column_names, indexname)
261
- # return temp_obj
262
- # end
263
-
264
- def drop_temp_table()
265
- @tf.close(true)
266
- @database = nil
267
- end
268
-
269
- ##############################
270
- protected
271
-
272
- def initialize_temp_table(column_formats, index_column_names, indexname)
273
- @table_name = "temptable"
274
- @tf = Tempfile.new("temp_table")
275
- @tf.close()
276
- @database = SQLite3::Database.new(@tf.path())
277
- create_table(@table_name, column_formats, index_column_names, indexname)
278
- end
279
-
280
- end
@@ -1,239 +0,0 @@
1
- # class DBTable
2
- # KE, SP 27.1.05
3
- #
4
- # Manages one table in a (given) SQL database
5
- # Doesn't know anything about the ROSY application
6
- # Just creating a table, changing the table, and accessing it.
7
- #
8
-
9
- require 'db/sql_query'
10
- require "common/RosyConventions"
11
-
12
- class DBTable
13
- attr_reader :index_name, :table_name
14
-
15
- #####
16
- # new
17
- #
18
- # creates the table for this object.
19
- # The name of the table (given as parameter) can be new, in which caes the table
20
- # is created, or old, in which case we check whether its format matches the format
21
- # given in the parameters.
22
- #
23
- # The table format is given in the form of column formats (column names and column formats,
24
- # formats are the usual SQLy things). Additionally, a subset of the column names can be
25
- # designated index columns, which means that the table is indexed (and can be searched quickly)
26
- # for them.
27
- #
28
- # DBTable internally constructs a "Primary index" feature that is called "XXindexXX" (autoincrement column)
29
- #
30
- # For all columns that are added later using add_columns, DBTable adds a prefix to the column names;
31
- # these columns are not checked against the column_formats when opening an existing table;
32
- # this can be used to store experiment-specific data.
33
-
34
- def initialize(db_obj, # DBWrapper object
35
- table_name, # string: name of DB table (existing/new)
36
- mode, # new: starts new DB table, removes old if it exists. open: reopens existing DB table
37
- hash={}) # hash: parameter name => parameter value, depending on mode
38
- # mode= new needs:
39
- # 'col_formats': array:array len 2: string*string, [column_name, column_format]
40
- # 'index_cols': array:string: column_names that should be used to index the table
41
- # 'addcol_prefix': string: prefix for names of additional columns
42
- # mode='open' needs:
43
- # 'col_formats': array: string*string: column names/formats
44
- # May be nil, in that case column name match isn't tested
45
-
46
- @index_name = "XXindexXX"
47
- @db_obj = db_obj
48
- @table_name = table_name
49
-
50
- case mode
51
- when 'new'
52
- ###
53
- # open new database
54
-
55
- # sanity check: exactly the required parameters present?
56
- unless hash.keys.sort == ['addcol_prefix', 'col_formats', 'index_cols']
57
- raise "Expecting hash parameters 'addcol_prefix', 'col_formats', 'index_cols'.\n" +
58
- "I got: " + hash.keys.join(", ")
59
- end
60
-
61
- # sanity check: main index column name should be unique
62
- all_column_names = hash['col_formats'].map { |name, format| name}
63
- if all_column_names.include? @index_name
64
- raise "[DBTable] You used the reserved name #{@index_name} as a column name. Please don't do that!"
65
- end
66
-
67
- # sanity check: index_column_names should be included in column_names
68
- hash['index_cols'].each { |name|
69
- unless all_column_names.include? name
70
- raise "[DBTable] #{name} is in the list of index names, but it isn't in the list of column names."
71
- end
72
- }
73
-
74
- # does a table with name table_name exist? if so, remove it
75
- if @db_obj.list_tables().include? table_name
76
- # this table exists
77
- # remove old table
78
- @db_obj.drop_table(table_name)
79
- end
80
-
81
- @db_obj.create_table(table_name,hash['col_formats'],
82
- hash['index_cols'], @index_name)
83
- when 'open'
84
-
85
- ###
86
- # open existing database table
87
-
88
- # sanity check: exactly the required parameters present?
89
- hash.keys.each { |key|
90
- unless ['addcol_prefix', 'col_names'].include? key
91
- raise "Expecting hash parameters 'addcol_prefix', 'col_names'.\n" +
92
- "I got: " + hash.keys.join(", ")
93
- end
94
- }
95
- # sanity check: main index column name should be unique
96
- if hash['col_names'] and hash['col_names'].include? @index_name
97
- raise "[DBTable] You used the reserved name #{@index_name} as a column name. Please don't do that!"
98
- end
99
-
100
-
101
- # does a table with name table_name exist?
102
- unless @db_obj.list_tables().include? table_name
103
- raise "[DBTable] Sorry, I cannot find a database table named #{table_name}."
104
- end
105
-
106
- # check if all column formats match
107
-
108
- if hash['col_names']
109
-
110
- existing_fields = @db_obj.list_column_names(table_name).reject { |col|
111
- col =~ /^#{hash["addcol_prefix"]}/ or
112
- col == @index_name
113
- }
114
-
115
- unless existing_fields.sort() == hash["col_names"].sort()
116
- raise "[DBTable] Column names in the DB table #{table_name}\n" +
117
- "don't match feature specification in the experiment file.\n" +
118
- "Table:\n\t" + existing_fields.sort.join(", ") +
119
- "\n\nExp. file:\n\t" + hash["col_names"].sort.join(", ")
120
- end
121
-
122
- else
123
- # no column names given, no check of column formats
124
- end
125
-
126
- else
127
- raise "Parameter 'mode' needs to be either 'new' or 'open'! I got " + mode.to_s
128
- end
129
- end
130
-
131
- #####
132
- # list_column_names
133
- #
134
- # list column names of this table
135
- #
136
- # returns: array:string, list of column names
137
- def list_column_names()
138
- return @db_obj.list_column_names(@table_name)
139
- end
140
-
141
- #####
142
- # list_column_formats
143
- #
144
- # list column names and column types of this table
145
- #
146
- # returns: array:string*string, list of pairs [column name, column format]
147
- def list_column_formats()
148
- return @db_obj.list_column_formats(@table_name)
149
- end
150
-
151
- #####
152
- # change_format_add_columns
153
- #
154
- # adds one or more columns to the table managed by this object
155
- # columns are given by their names and formats, as above
156
- #
157
- # returns: nothing
158
- def change_format_add_columns(column_formats) # array: string*string [column_name,column_format]
159
-
160
- if column_formats.nil? or column_formats.empty?
161
- raise "Need nonempty column_formats list"
162
- end
163
-
164
- column_formats.each {|col_name,col_format|
165
- unless col_name =~ /^#{@addcol_prefix}/
166
- raise "Columns that are added need to have prefix #{@addcol_prefix}!"
167
- end
168
- }
169
-
170
- execute_command(SQLQuery.add_columns(@table_name, column_formats))
171
- end
172
-
173
- #####
174
- # change_format_remove_column
175
- #
176
- # removes one column from the table managed by this object
177
- #
178
- # returns: nothing
179
- def change_format_remove_column(column_name) # string:name of the column to remove
180
- unless list_column_names(@table_name).include? column_name
181
- $stderr.puts "WARNING: Cannot remove column #{column_name}: I don't have it"
182
- return
183
- end
184
-
185
- execute_command("ALTER TABLE #{@table_name} DROP COLUMN #{column_name}")
186
- end
187
-
188
-
189
- #####
190
- # insert_row
191
- #
192
- # inserts a new row into the table and fills cells with values, as specified
193
- # by the column_value_pairs
194
- #
195
- # returns: nothing
196
- def insert_row(column_value_pairs) # array: string*Object [column_name,column_value]
197
- if column_value_pairs.nil? or column_value_pairs.empty?
198
- raise "Need nonempty column_value_pairs list"
199
- end
200
- execute_command(SQLQuery.insert(@table_name,column_value_pairs))
201
- end
202
-
203
- #####
204
- # update_row
205
- #
206
- # update column values for a given row which is identified
207
- # via its (autoincrement) index
208
- #
209
- # returns: nothing
210
- def update_row(index, # index, content of autoincrement column
211
- column_value_pairs) # array: string*Object [column_name, column_value]
212
-
213
- if column_value_pairs.nil? or column_value_pairs.empty?
214
- raise "Need nonempty column_value_pairs list"
215
- end
216
- execute_command(SQLQuery.update(@table_name,
217
- column_value_pairs,
218
- [ValueRestriction.new(@index_name, index)]))
219
- end
220
-
221
-
222
- ####
223
- private
224
-
225
- ###
226
- # execute_command:
227
- # execute DB command
228
- #
229
- # returns nil: the commands in this package are all
230
- # not of the kind that requires a return value
231
- def execute_command(command)
232
- begin
233
- @db_obj.query_noretv(command)
234
- rescue
235
- $stderr.puts "Error executing SQL query. Command was:\n" + command
236
- exit 1
237
- end
238
- end
239
- end