frprep 0.0.1.prealpha
Sign up to get free protection for your applications and to get access to all the features.
- data/.yardopts +8 -0
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/AbstractSynInterface.rb +1227 -0
- data/lib/common/BerkeleyInterface.rb +375 -0
- data/lib/common/CollinsInterface.rb +1165 -0
- data/lib/common/ConfigData.rb +694 -0
- data/lib/common/Counter.rb +18 -0
- data/lib/common/DBInterface.rb +48 -0
- data/lib/common/EnduserMode.rb +27 -0
- data/lib/common/Eval.rb +480 -0
- data/lib/common/FixSynSemMapping.rb +196 -0
- data/lib/common/FrPrepConfigData.rb +66 -0
- data/lib/common/FrprepHelper.rb +1324 -0
- data/lib/common/Graph.rb +345 -0
- data/lib/common/ISO-8859-1.rb +24 -0
- data/lib/common/ML.rb +186 -0
- data/lib/common/Maxent.rb +215 -0
- data/lib/common/MiniparInterface.rb +1388 -0
- data/lib/common/Optimise.rb +195 -0
- data/lib/common/Parser.rb +213 -0
- data/lib/common/RegXML.rb +269 -0
- data/lib/common/RosyConventions.rb +171 -0
- data/lib/common/SQLQuery.rb +243 -0
- data/lib/common/STXmlTerminalOrder.rb +194 -0
- data/lib/common/SalsaTigerRegXML.rb +2347 -0
- data/lib/common/SalsaTigerXMLHelper.rb +99 -0
- data/lib/common/SleepyInterface.rb +384 -0
- data/lib/common/SynInterfaces.rb +275 -0
- data/lib/common/TabFormat.rb +720 -0
- data/lib/common/Tiger.rb +1448 -0
- data/lib/common/TntInterface.rb +44 -0
- data/lib/common/Tree.rb +61 -0
- data/lib/common/TreetaggerInterface.rb +303 -0
- data/lib/common/headz.rb +338 -0
- data/lib/common/option_parser.rb +13 -0
- data/lib/common/ruby_class_extensions.rb +310 -0
- data/lib/fred/Baseline.rb +150 -0
- data/lib/fred/FileZipped.rb +31 -0
- data/lib/fred/FredBOWContext.rb +863 -0
- data/lib/fred/FredConfigData.rb +182 -0
- data/lib/fred/FredConventions.rb +232 -0
- data/lib/fred/FredDetermineTargets.rb +324 -0
- data/lib/fred/FredEval.rb +312 -0
- data/lib/fred/FredFeatureExtractors.rb +321 -0
- data/lib/fred/FredFeatures.rb +1061 -0
- data/lib/fred/FredFeaturize.rb +596 -0
- data/lib/fred/FredNumTrainingSenses.rb +27 -0
- data/lib/fred/FredParameters.rb +402 -0
- data/lib/fred/FredSplit.rb +84 -0
- data/lib/fred/FredSplitPkg.rb +180 -0
- data/lib/fred/FredTest.rb +607 -0
- data/lib/fred/FredTrain.rb +144 -0
- data/lib/fred/PlotAndREval.rb +480 -0
- data/lib/fred/fred.rb +45 -0
- data/lib/fred/md5.rb +23 -0
- data/lib/fred/opt_parser.rb +250 -0
- data/lib/frprep/AbstractSynInterface.rb +1227 -0
- data/lib/frprep/Ampersand.rb +37 -0
- data/lib/frprep/BerkeleyInterface.rb +375 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/ConfigData.rb +694 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FixSynSemMapping.rb +196 -0
- data/lib/frprep/FrPrepConfigData.rb +66 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/FrprepHelper.rb +1324 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/ISO-8859-1.rb +24 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/Parser.rb +213 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
- data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/SynInterfaces.rb +275 -0
- data/lib/frprep/TabFormat.rb +720 -0
- data/lib/frprep/Tiger.rb +1448 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/Tree.rb +61 -0
- data/lib/frprep/TreetaggerInterface.rb +303 -0
- data/lib/frprep/do_parses.rb +142 -0
- data/lib/frprep/frprep.rb +686 -0
- data/lib/frprep/headz.rb +338 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
- data/lib/rosy/DBMySQL.rb +146 -0
- data/lib/rosy/DBSQLite.rb +280 -0
- data/lib/rosy/DBTable.rb +239 -0
- data/lib/rosy/DBWrapper.rb +176 -0
- data/lib/rosy/ExternalConfigData.rb +58 -0
- data/lib/rosy/FailedParses.rb +130 -0
- data/lib/rosy/FeatureInfo.rb +242 -0
- data/lib/rosy/GfInduce.rb +1115 -0
- data/lib/rosy/GfInduceFeature.rb +148 -0
- data/lib/rosy/InputData.rb +294 -0
- data/lib/rosy/RosyConfigData.rb +115 -0
- data/lib/rosy/RosyConfusability.rb +338 -0
- data/lib/rosy/RosyEval.rb +465 -0
- data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
- data/lib/rosy/RosyFeaturize.rb +280 -0
- data/lib/rosy/RosyInspect.rb +336 -0
- data/lib/rosy/RosyIterator.rb +477 -0
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
- data/lib/rosy/RosyPruning.rb +165 -0
- data/lib/rosy/RosyServices.rb +744 -0
- data/lib/rosy/RosySplit.rb +232 -0
- data/lib/rosy/RosyTask.rb +19 -0
- data/lib/rosy/RosyTest.rb +826 -0
- data/lib/rosy/RosyTrain.rb +232 -0
- data/lib/rosy/RosyTrainingTestTable.rb +786 -0
- data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
- data/lib/rosy/View.rb +418 -0
- data/lib/rosy/opt_parser.rb +379 -0
- data/lib/rosy/rosy.rb +77 -0
- data/lib/shalmaneser/version.rb +3 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +40 -0
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +52 -0
- data/test/functional/test_rosy.rb +20 -0
- metadata +270 -0
data/lib/rosy/DBMySQL.rb
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
|
2
|
+
# DBMysql: a subclass of DBWrapper.
|
3
|
+
#
|
4
|
+
# Use a MySQL server to access a database.
|
5
|
+
# Use the Ruby mysql interface package for that.
|
6
|
+
|
7
|
+
require 'mysql'
|
8
|
+
|
9
|
+
|
10
|
+
require 'rosy/DBWrapper'
|
11
|
+
|
12
|
+
#################
|
13
|
+
class DBMySQLResult < DBResult
|
14
|
+
# initialize with the result of Mysql::query
|
15
|
+
# which is a MysqlResult object
|
16
|
+
#
|
17
|
+
# also remember the offset of the first row
|
18
|
+
# for reset()
|
19
|
+
def initialize(value)
|
20
|
+
super(value)
|
21
|
+
@row_first = @result.row_tell()
|
22
|
+
end
|
23
|
+
|
24
|
+
###
|
25
|
+
# reset object such that each() can be run again
|
26
|
+
def reset()
|
27
|
+
@result.row_seek(@row_first)
|
28
|
+
end
|
29
|
+
|
30
|
+
###
|
31
|
+
# column names: list of strings
|
32
|
+
def list_column_names()
|
33
|
+
current = @result.row_tell()
|
34
|
+
fields = @result.fetch_fields().map { |f|
|
35
|
+
f.name()
|
36
|
+
}
|
37
|
+
@result.row_seek(current)
|
38
|
+
return fields
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
#################
|
44
|
+
class DBMySQL < DBWrapper
|
45
|
+
###
|
46
|
+
# initialization:
|
47
|
+
#
|
48
|
+
# open connection to MySQL server
|
49
|
+
def initialize(exp) # RosyConfigData experiment file object
|
50
|
+
super(exp)
|
51
|
+
|
52
|
+
@database = Mysql.real_connect(@exp.get('host'), @exp.get('user'),
|
53
|
+
@exp.get('passwd'), @exp.get('dbname'))
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
###
|
59
|
+
# make a table
|
60
|
+
#
|
61
|
+
# returns: nothing
|
62
|
+
def create_table(table_name, # string
|
63
|
+
column_formats, # array: array: string*string [column_name,column_format]
|
64
|
+
index_column_names, # array: string: column_name
|
65
|
+
indexname) # string: name of automatically created index column
|
66
|
+
|
67
|
+
string = "CREATE TABLE #{table_name} (" +
|
68
|
+
"#{indexname} INT NOT NULL AUTO_INCREMENT"
|
69
|
+
|
70
|
+
# column declarations
|
71
|
+
unless column_formats.empty?
|
72
|
+
string << ", "
|
73
|
+
string << column_formats.map { |name, format| name.to_s + " " + format.to_s }.join(",")
|
74
|
+
end
|
75
|
+
|
76
|
+
# primary key
|
77
|
+
string << ", " + "PRIMARY KEY(#{indexname})"
|
78
|
+
|
79
|
+
# other keys
|
80
|
+
unless index_column_names.empty?
|
81
|
+
string << ", "
|
82
|
+
string << index_column_names.map { |name| "KEY(#{name})" }.join(",")
|
83
|
+
end
|
84
|
+
string << ");"
|
85
|
+
|
86
|
+
query_noretv(string)
|
87
|
+
end
|
88
|
+
|
89
|
+
####
|
90
|
+
# querying the database:
|
91
|
+
# returns a DBResult object
|
92
|
+
def query(query)
|
93
|
+
result = @database.query(query)
|
94
|
+
if result
|
95
|
+
return DBMySQLResult.new(result)
|
96
|
+
else
|
97
|
+
return nil
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
####
|
102
|
+
# querying the database:
|
103
|
+
# no result value
|
104
|
+
def query_noretv(query)
|
105
|
+
@database.query(query)
|
106
|
+
return nil
|
107
|
+
end
|
108
|
+
|
109
|
+
###
|
110
|
+
# list all tables in the database
|
111
|
+
#
|
112
|
+
# array of strings
|
113
|
+
def list_tables()
|
114
|
+
return @database.list_tables()
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
#####
|
119
|
+
# list_column_formats
|
120
|
+
#
|
121
|
+
# list column names and column types of this table
|
122
|
+
#
|
123
|
+
# returns: array:string*string, list of pairs [column name, column format]
|
124
|
+
def list_column_formats(table_name)
|
125
|
+
retv = Array.new
|
126
|
+
@database.query("DESCRIBE #{table_name}").each_hash { |field|
|
127
|
+
retv << [field["Field"], field["Type"]]
|
128
|
+
}
|
129
|
+
return retv
|
130
|
+
end
|
131
|
+
|
132
|
+
####
|
133
|
+
# num_rows
|
134
|
+
#
|
135
|
+
# determine the number of rows in a table
|
136
|
+
# returns: integer or nil
|
137
|
+
def num_rows(table_name)
|
138
|
+
@database.query("SHOW TABLE STATUS").each_hash { |hash|
|
139
|
+
if hash["Name"] == table_name
|
140
|
+
return hash["Rows"]
|
141
|
+
end
|
142
|
+
}
|
143
|
+
return nil
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
@@ -0,0 +1,280 @@
|
|
1
|
+
# DBSQLite: a subclass of DBWrapper.
|
2
|
+
#
|
3
|
+
# Use SQLite to access a database.
|
4
|
+
# Use the Ruby sqlite3 interface package for that.
|
5
|
+
|
6
|
+
require 'sqlite3'
|
7
|
+
require 'tempfile'
|
8
|
+
|
9
|
+
require "DBWrapper"
|
10
|
+
|
11
|
+
#################
|
12
|
+
class DBSQLiteResult < DBResult
|
13
|
+
# initialize with the result of SQLite::execute()
|
14
|
+
# which returns an array of rows
|
15
|
+
# Each row is an array
|
16
|
+
# but additionally has attributes
|
17
|
+
# - fields: returns an array of strings, the column names
|
18
|
+
# - types: returns an array of strings, the column types
|
19
|
+
def initialize(value)
|
20
|
+
super(value)
|
21
|
+
@counter = 0
|
22
|
+
end
|
23
|
+
|
24
|
+
###
|
25
|
+
# column names: list of strings
|
26
|
+
def list_column_names()
|
27
|
+
return @result.columns
|
28
|
+
end
|
29
|
+
|
30
|
+
# number of rows: returns an integer
|
31
|
+
def num_rows()
|
32
|
+
# remember where we were in iterating over items
|
33
|
+
tmp_counter = @counter
|
34
|
+
|
35
|
+
# reset, and iterate over all rows to count
|
36
|
+
reset()
|
37
|
+
retv = 0
|
38
|
+
each { |x| retv += 1}
|
39
|
+
|
40
|
+
# return to where we were in iterating over items
|
41
|
+
reset()
|
42
|
+
while @counter < tmp_counter
|
43
|
+
@result.next()
|
44
|
+
@counter += 1
|
45
|
+
end
|
46
|
+
|
47
|
+
# and return the number of rows
|
48
|
+
return retv
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
# yields each row as an array of values
|
53
|
+
def each()
|
54
|
+
@result.each { |row|
|
55
|
+
@counter += 1
|
56
|
+
yield row.map { |x| x.to_s() }
|
57
|
+
}
|
58
|
+
end
|
59
|
+
|
60
|
+
# yields each row as a hash: column name=> column value
|
61
|
+
def each_hash()
|
62
|
+
@result.each { |row|
|
63
|
+
@counter += 1
|
64
|
+
|
65
|
+
row_hash = Hash.new()
|
66
|
+
row.fields.each_with_index { |key, index|
|
67
|
+
row_hash[key] = row[index].to_s()
|
68
|
+
}
|
69
|
+
yield row_hash
|
70
|
+
}
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
###
|
75
|
+
# reset such that each() can be run again on the result object
|
76
|
+
def reset()
|
77
|
+
@result.reset()
|
78
|
+
@counter = 0
|
79
|
+
end
|
80
|
+
|
81
|
+
# free object
|
82
|
+
def free()
|
83
|
+
@result.close()
|
84
|
+
end
|
85
|
+
|
86
|
+
# returns row as an array of column contents
|
87
|
+
def fetch_row()
|
88
|
+
@counter += 1
|
89
|
+
return @result.next()
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
#################
|
94
|
+
class DBSQLite < DBWrapper
|
95
|
+
|
96
|
+
###
|
97
|
+
# initialization:
|
98
|
+
#
|
99
|
+
# open database file according to the given identifier
|
100
|
+
def initialize(exp, # RosyConfigData experiment file object
|
101
|
+
dir = nil, # string: directory for Shalmaneser internal data, ends in "/"
|
102
|
+
identifier = nil) # string: identifier to use for the database
|
103
|
+
super(exp)
|
104
|
+
|
105
|
+
# dir and identifier may be nil, if we're only opening this object
|
106
|
+
# in order to make temp databases
|
107
|
+
if dir and identifier
|
108
|
+
@database = SQLite3::Database.new(dir + identifier.to_s + ".db")
|
109
|
+
else
|
110
|
+
@database = nil
|
111
|
+
end
|
112
|
+
|
113
|
+
# temp file for temp database
|
114
|
+
@tf = nil
|
115
|
+
end
|
116
|
+
|
117
|
+
###
|
118
|
+
# make a table
|
119
|
+
#
|
120
|
+
# returns: nothing
|
121
|
+
def create_table(table_name, # string
|
122
|
+
column_formats, # array: array: string*string [column_name,column_format]
|
123
|
+
index_column_names, # array: string: column_name
|
124
|
+
indexname) # string: name of automatically created index column
|
125
|
+
|
126
|
+
# primary key and auto-increment column
|
127
|
+
string = "CREATE TABLE #{table_name} (" +
|
128
|
+
"#{indexname} INTEGER PRIMARY KEY"
|
129
|
+
|
130
|
+
# column declarations
|
131
|
+
unless column_formats.empty?
|
132
|
+
string << ", "
|
133
|
+
string << column_formats.map { |name, format|
|
134
|
+
# include other keys
|
135
|
+
if index_column_names.include? name
|
136
|
+
name.to_s + " KEY " + format.to_s
|
137
|
+
else
|
138
|
+
name.to_s + " " + format.to_s
|
139
|
+
end
|
140
|
+
}.join(",")
|
141
|
+
end
|
142
|
+
string << ");"
|
143
|
+
|
144
|
+
query_noretv(string)
|
145
|
+
end
|
146
|
+
|
147
|
+
###
|
148
|
+
# remove a table
|
149
|
+
def drop_table(table_name)
|
150
|
+
query_noretv("DROP TABLE " + table_name)
|
151
|
+
end
|
152
|
+
|
153
|
+
###
|
154
|
+
def query(query)
|
155
|
+
if @database
|
156
|
+
return DBSQLiteResult.new(@database.query(query))
|
157
|
+
else
|
158
|
+
return nil
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
####
|
163
|
+
# querying the database:
|
164
|
+
# no result value
|
165
|
+
def query_noretv(query)
|
166
|
+
if @database
|
167
|
+
@database.execute(query)
|
168
|
+
end
|
169
|
+
return nil
|
170
|
+
end
|
171
|
+
|
172
|
+
###
|
173
|
+
# list all tables in the database
|
174
|
+
#
|
175
|
+
# array of strings
|
176
|
+
def list_tables()
|
177
|
+
if @database
|
178
|
+
return @database.execute("select name from sqlite_master;").map { |t|
|
179
|
+
t.to_s()
|
180
|
+
}
|
181
|
+
else
|
182
|
+
return nil
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
#####
|
187
|
+
# list_column_formats
|
188
|
+
#
|
189
|
+
# list column names and column types of this table
|
190
|
+
#
|
191
|
+
# returns: array:string*string, list of pairs [column name, column format]
|
192
|
+
def list_column_formats(table_name)
|
193
|
+
unless @database
|
194
|
+
return nil
|
195
|
+
end
|
196
|
+
|
197
|
+
table_descr = @database.execute("select * from sqlite_master where name=='#{table_name}';")
|
198
|
+
# this is an array of pieces of table description.
|
199
|
+
# the piece in the column called 'sql' is the 'create' statement.
|
200
|
+
# get the 'create' statement
|
201
|
+
begin
|
202
|
+
field_names = table_descr[0].fields
|
203
|
+
rescue
|
204
|
+
$stderr.puts "SQLite error: could not read description of table #{table_name}"
|
205
|
+
exit 1
|
206
|
+
end
|
207
|
+
create_index = (0..field_names.length()).detect { |ix| field_names[ix] == 'sql' }
|
208
|
+
|
209
|
+
# try to parse column names out of the 'create' statement
|
210
|
+
if table_descr[0][create_index] =~ /^\s*create table \S+\s*\((.*)\)\s*$/i
|
211
|
+
# we now have something of shape ' a key varchar2(30), b varchar2(30)'
|
212
|
+
# split at the comma, remove whitespace at beginning and end
|
213
|
+
# then split again to get pairs [column name, column format]
|
214
|
+
return $1.split(",").map { |col_descrip|
|
215
|
+
pieces = col_descrip.strip().split().reject { |entry|
|
216
|
+
entry =~ /^key$/i or entry =~ /^primary$/i
|
217
|
+
}
|
218
|
+
if pieces.length() > 2
|
219
|
+
$stderr.puts "Warning: problematic column format in #{col_descrip}, may be parsed wrong."
|
220
|
+
end
|
221
|
+
pieces
|
222
|
+
}
|
223
|
+
else
|
224
|
+
$stderr.puts "SQLite error: cannot read column names"
|
225
|
+
exit 1
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
####
|
230
|
+
# num_rows
|
231
|
+
#
|
232
|
+
# determine the number of rows in a table
|
233
|
+
# returns: integer or nil
|
234
|
+
def num_rows(table_name)
|
235
|
+
unless @database
|
236
|
+
return nil
|
237
|
+
end
|
238
|
+
|
239
|
+
rows_s = @database.get_first_value( "select count(*) from #{table_name}" )
|
240
|
+
if rows_s
|
241
|
+
return rows_s.to_i()
|
242
|
+
else
|
243
|
+
return nil
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
####
|
248
|
+
# make a temporary table: make a table in a new, temporary file
|
249
|
+
#
|
250
|
+
# returns: DBWrapper object (or object of current subclass)
|
251
|
+
# that has the @table_name attribute set to the name of a temporary DB
|
252
|
+
#
|
253
|
+
# same as in superclass
|
254
|
+
#
|
255
|
+
# def make_temp_table(column_formats, # array: string*string [column_name,column_format]
|
256
|
+
# index_column_names, # array: string: column_name
|
257
|
+
# indexname) # string: name of autoincrement primary index
|
258
|
+
|
259
|
+
# temp_obj = self.clone()
|
260
|
+
# temp.initialize_temp_table(column_formats, index_column_names, indexname)
|
261
|
+
# return temp_obj
|
262
|
+
# end
|
263
|
+
|
264
|
+
def drop_temp_table()
|
265
|
+
@tf.close(true)
|
266
|
+
@database = nil
|
267
|
+
end
|
268
|
+
|
269
|
+
##############################
|
270
|
+
protected
|
271
|
+
|
272
|
+
def initialize_temp_table(column_formats, index_column_names, indexname)
|
273
|
+
@table_name = "temptable"
|
274
|
+
@tf = Tempfile.new("temp_table")
|
275
|
+
@tf.close()
|
276
|
+
@database = SQLite3::Database.new(@tf.path())
|
277
|
+
create_table(@table_name, column_formats, index_column_names, indexname)
|
278
|
+
end
|
279
|
+
|
280
|
+
end
|
data/lib/rosy/DBTable.rb
ADDED
@@ -0,0 +1,239 @@
|
|
1
|
+
# class DBTable
|
2
|
+
# KE, SP 27.1.05
|
3
|
+
#
|
4
|
+
# Manages one table in a (given) SQL database
|
5
|
+
# Doesn't know anything about the ROSY application
|
6
|
+
# Just creating a table, changing the table, and accessing it.
|
7
|
+
#
|
8
|
+
|
9
|
+
require "common/SQLQuery"
|
10
|
+
require "common/RosyConventions"
|
11
|
+
|
12
|
+
class DBTable
|
13
|
+
attr_reader :index_name, :table_name
|
14
|
+
|
15
|
+
#####
|
16
|
+
# new
|
17
|
+
#
|
18
|
+
# creates the table for this object.
|
19
|
+
# The name of the table (given as parameter) can be new, in which caes the table
|
20
|
+
# is created, or old, in which case we check whether its format matches the format
|
21
|
+
# given in the parameters.
|
22
|
+
#
|
23
|
+
# The table format is given in the form of column formats (column names and column formats,
|
24
|
+
# formats are the usual SQLy things). Additionally, a subset of the column names can be
|
25
|
+
# designated index columns, which means that the table is indexed (and can be searched quickly)
|
26
|
+
# for them.
|
27
|
+
#
|
28
|
+
# DBTable internally constructs a "Primary index" feature that is called "XXindexXX" (autoincrement column)
|
29
|
+
#
|
30
|
+
# For all columns that are added later using add_columns, DBTable adds a prefix to the column names;
|
31
|
+
# these columns are not checked against the column_formats when opening an existing table;
|
32
|
+
# this can be used to store experiment-specific data.
|
33
|
+
|
34
|
+
def initialize(db_obj, # DBWrapper object
|
35
|
+
table_name, # string: name of DB table (existing/new)
|
36
|
+
mode, # new: starts new DB table, removes old if it exists. open: reopens existing DB table
|
37
|
+
hash={}) # hash: parameter name => parameter value, depending on mode
|
38
|
+
# mode= new needs:
|
39
|
+
# 'col_formats': array:array len 2: string*string, [column_name, column_format]
|
40
|
+
# 'index_cols': array:string: column_names that should be used to index the table
|
41
|
+
# 'addcol_prefix': string: prefix for names of additional columns
|
42
|
+
# mode='open' needs:
|
43
|
+
# 'col_formats': array: string*string: column names/formats
|
44
|
+
# May be nil, in that case column name match isn't tested
|
45
|
+
|
46
|
+
@index_name = "XXindexXX"
|
47
|
+
@db_obj = db_obj
|
48
|
+
@table_name = table_name
|
49
|
+
|
50
|
+
case mode
|
51
|
+
when 'new'
|
52
|
+
###
|
53
|
+
# open new database
|
54
|
+
|
55
|
+
# sanity check: exactly the required parameters present?
|
56
|
+
unless hash.keys.sort == ['addcol_prefix', 'col_formats', 'index_cols']
|
57
|
+
raise "Expecting hash parameters 'addcol_prefix', 'col_formats', 'index_cols'.\n" +
|
58
|
+
"I got: " + hash.keys.join(", ")
|
59
|
+
end
|
60
|
+
|
61
|
+
# sanity check: main index column name should be unique
|
62
|
+
all_column_names = hash['col_formats'].map { |name, format| name}
|
63
|
+
if all_column_names.include? @index_name
|
64
|
+
raise "[DBTable] You used the reserved name #{@index_name} as a column name. Please don't do that!"
|
65
|
+
end
|
66
|
+
|
67
|
+
# sanity check: index_column_names should be included in column_names
|
68
|
+
hash['index_cols'].each { |name|
|
69
|
+
unless all_column_names.include? name
|
70
|
+
raise "[DBTable] #{name} is in the list of index names, but it isn't in the list of column names."
|
71
|
+
end
|
72
|
+
}
|
73
|
+
|
74
|
+
# does a table with name table_name exist? if so, remove it
|
75
|
+
if @db_obj.list_tables().include? table_name
|
76
|
+
# this table exists
|
77
|
+
# remove old table
|
78
|
+
@db_obj.drop_table(table_name)
|
79
|
+
end
|
80
|
+
|
81
|
+
@db_obj.create_table(table_name,hash['col_formats'],
|
82
|
+
hash['index_cols'], @index_name)
|
83
|
+
when 'open'
|
84
|
+
|
85
|
+
###
|
86
|
+
# open existing database table
|
87
|
+
|
88
|
+
# sanity check: exactly the required parameters present?
|
89
|
+
hash.keys.each { |key|
|
90
|
+
unless ['addcol_prefix', 'col_names'].include? key
|
91
|
+
raise "Expecting hash parameters 'addcol_prefix', 'col_names'.\n" +
|
92
|
+
"I got: " + hash.keys.join(", ")
|
93
|
+
end
|
94
|
+
}
|
95
|
+
# sanity check: main index column name should be unique
|
96
|
+
if hash['col_names'] and hash['col_names'].include? @index_name
|
97
|
+
raise "[DBTable] You used the reserved name #{@index_name} as a column name. Please don't do that!"
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
# does a table with name table_name exist?
|
102
|
+
unless @db_obj.list_tables().include? table_name
|
103
|
+
raise "[DBTable] Sorry, I cannot find a database table named #{table_name}."
|
104
|
+
end
|
105
|
+
|
106
|
+
# check if all column formats match
|
107
|
+
|
108
|
+
if hash['col_names']
|
109
|
+
|
110
|
+
existing_fields = @db_obj.list_column_names(table_name).reject { |col|
|
111
|
+
col =~ /^#{hash["addcol_prefix"]}/ or
|
112
|
+
col == @index_name
|
113
|
+
}
|
114
|
+
|
115
|
+
unless existing_fields.sort() == hash["col_names"].sort()
|
116
|
+
raise "[DBTable] Column names in the DB table #{table_name}\n" +
|
117
|
+
"don't match feature specification in the experiment file.\n" +
|
118
|
+
"Table:\n\t" + existing_fields.sort.join(", ") +
|
119
|
+
"\n\nExp. file:\n\t" + hash["col_names"].sort.join(", ")
|
120
|
+
end
|
121
|
+
|
122
|
+
else
|
123
|
+
# no column names given, no check of column formats
|
124
|
+
end
|
125
|
+
|
126
|
+
else
|
127
|
+
raise "Parameter 'mode' needs to be either 'new' or 'open'! I got " + mode.to_s
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
#####
|
132
|
+
# list_column_names
|
133
|
+
#
|
134
|
+
# list column names of this table
|
135
|
+
#
|
136
|
+
# returns: array:string, list of column names
|
137
|
+
def list_column_names()
|
138
|
+
return @db_obj.list_column_names(@table_name)
|
139
|
+
end
|
140
|
+
|
141
|
+
#####
|
142
|
+
# list_column_formats
|
143
|
+
#
|
144
|
+
# list column names and column types of this table
|
145
|
+
#
|
146
|
+
# returns: array:string*string, list of pairs [column name, column format]
|
147
|
+
def list_column_formats()
|
148
|
+
return @db_obj.list_column_formats(@table_name)
|
149
|
+
end
|
150
|
+
|
151
|
+
#####
|
152
|
+
# change_format_add_columns
|
153
|
+
#
|
154
|
+
# adds one or more columns to the table managed by this object
|
155
|
+
# columns are given by their names and formats, as above
|
156
|
+
#
|
157
|
+
# returns: nothing
|
158
|
+
def change_format_add_columns(column_formats) # array: string*string [column_name,column_format]
|
159
|
+
|
160
|
+
if column_formats.nil? or column_formats.empty?
|
161
|
+
raise "Need nonempty column_formats list"
|
162
|
+
end
|
163
|
+
|
164
|
+
column_formats.each {|col_name,col_format|
|
165
|
+
unless col_name =~ /^#{@addcol_prefix}/
|
166
|
+
raise "Columns that are added need to have prefix #{@addcol_prefix}!"
|
167
|
+
end
|
168
|
+
}
|
169
|
+
|
170
|
+
execute_command(SQLQuery.add_columns(@table_name, column_formats))
|
171
|
+
end
|
172
|
+
|
173
|
+
#####
|
174
|
+
# change_format_remove_column
|
175
|
+
#
|
176
|
+
# removes one column from the table managed by this object
|
177
|
+
#
|
178
|
+
# returns: nothing
|
179
|
+
def change_format_remove_column(column_name) # string:name of the column to remove
|
180
|
+
unless list_column_names(@table_name).include? column_name
|
181
|
+
$stderr.puts "WARNING: Cannot remove column #{column_name}: I don't have it"
|
182
|
+
return
|
183
|
+
end
|
184
|
+
|
185
|
+
execute_command("ALTER TABLE #{@table_name} DROP COLUMN #{column_name}")
|
186
|
+
end
|
187
|
+
|
188
|
+
|
189
|
+
#####
|
190
|
+
# insert_row
|
191
|
+
#
|
192
|
+
# inserts a new row into the table and fills cells with values, as specified
|
193
|
+
# by the column_value_pairs
|
194
|
+
#
|
195
|
+
# returns: nothing
|
196
|
+
def insert_row(column_value_pairs) # array: string*Object [column_name,column_value]
|
197
|
+
if column_value_pairs.nil? or column_value_pairs.empty?
|
198
|
+
raise "Need nonempty column_value_pairs list"
|
199
|
+
end
|
200
|
+
execute_command(SQLQuery.insert(@table_name,column_value_pairs))
|
201
|
+
end
|
202
|
+
|
203
|
+
#####
|
204
|
+
# update_row
|
205
|
+
#
|
206
|
+
# update column values for a given row which is identified
|
207
|
+
# via its (autoincrement) index
|
208
|
+
#
|
209
|
+
# returns: nothing
|
210
|
+
def update_row(index, # index, content of autoincrement column
|
211
|
+
column_value_pairs) # array: string*Object [column_name, column_value]
|
212
|
+
|
213
|
+
if column_value_pairs.nil? or column_value_pairs.empty?
|
214
|
+
raise "Need nonempty column_value_pairs list"
|
215
|
+
end
|
216
|
+
execute_command(SQLQuery.update(@table_name,
|
217
|
+
column_value_pairs,
|
218
|
+
[ValueRestriction.new(@index_name, index)]))
|
219
|
+
end
|
220
|
+
|
221
|
+
|
222
|
+
####
|
223
|
+
private
|
224
|
+
|
225
|
+
###
|
226
|
+
# execute_command:
|
227
|
+
# execute DB command
|
228
|
+
#
|
229
|
+
# returns nil: the commands in this package are all
|
230
|
+
# not of the kind that requires a return value
|
231
|
+
def execute_command(command)
|
232
|
+
begin
|
233
|
+
@db_obj.query_noretv(command)
|
234
|
+
rescue
|
235
|
+
$stderr.puts "Error executing SQL query. Command was:\n" + command
|
236
|
+
exit 1
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|