frprep 0.0.1.prealpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +8 -0
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/AbstractSynInterface.rb +1227 -0
- data/lib/common/BerkeleyInterface.rb +375 -0
- data/lib/common/CollinsInterface.rb +1165 -0
- data/lib/common/ConfigData.rb +694 -0
- data/lib/common/Counter.rb +18 -0
- data/lib/common/DBInterface.rb +48 -0
- data/lib/common/EnduserMode.rb +27 -0
- data/lib/common/Eval.rb +480 -0
- data/lib/common/FixSynSemMapping.rb +196 -0
- data/lib/common/FrPrepConfigData.rb +66 -0
- data/lib/common/FrprepHelper.rb +1324 -0
- data/lib/common/Graph.rb +345 -0
- data/lib/common/ISO-8859-1.rb +24 -0
- data/lib/common/ML.rb +186 -0
- data/lib/common/Maxent.rb +215 -0
- data/lib/common/MiniparInterface.rb +1388 -0
- data/lib/common/Optimise.rb +195 -0
- data/lib/common/Parser.rb +213 -0
- data/lib/common/RegXML.rb +269 -0
- data/lib/common/RosyConventions.rb +171 -0
- data/lib/common/SQLQuery.rb +243 -0
- data/lib/common/STXmlTerminalOrder.rb +194 -0
- data/lib/common/SalsaTigerRegXML.rb +2347 -0
- data/lib/common/SalsaTigerXMLHelper.rb +99 -0
- data/lib/common/SleepyInterface.rb +384 -0
- data/lib/common/SynInterfaces.rb +275 -0
- data/lib/common/TabFormat.rb +720 -0
- data/lib/common/Tiger.rb +1448 -0
- data/lib/common/TntInterface.rb +44 -0
- data/lib/common/Tree.rb +61 -0
- data/lib/common/TreetaggerInterface.rb +303 -0
- data/lib/common/headz.rb +338 -0
- data/lib/common/option_parser.rb +13 -0
- data/lib/common/ruby_class_extensions.rb +310 -0
- data/lib/fred/Baseline.rb +150 -0
- data/lib/fred/FileZipped.rb +31 -0
- data/lib/fred/FredBOWContext.rb +863 -0
- data/lib/fred/FredConfigData.rb +182 -0
- data/lib/fred/FredConventions.rb +232 -0
- data/lib/fred/FredDetermineTargets.rb +324 -0
- data/lib/fred/FredEval.rb +312 -0
- data/lib/fred/FredFeatureExtractors.rb +321 -0
- data/lib/fred/FredFeatures.rb +1061 -0
- data/lib/fred/FredFeaturize.rb +596 -0
- data/lib/fred/FredNumTrainingSenses.rb +27 -0
- data/lib/fred/FredParameters.rb +402 -0
- data/lib/fred/FredSplit.rb +84 -0
- data/lib/fred/FredSplitPkg.rb +180 -0
- data/lib/fred/FredTest.rb +607 -0
- data/lib/fred/FredTrain.rb +144 -0
- data/lib/fred/PlotAndREval.rb +480 -0
- data/lib/fred/fred.rb +45 -0
- data/lib/fred/md5.rb +23 -0
- data/lib/fred/opt_parser.rb +250 -0
- data/lib/frprep/AbstractSynInterface.rb +1227 -0
- data/lib/frprep/Ampersand.rb +37 -0
- data/lib/frprep/BerkeleyInterface.rb +375 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/ConfigData.rb +694 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FixSynSemMapping.rb +196 -0
- data/lib/frprep/FrPrepConfigData.rb +66 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/FrprepHelper.rb +1324 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/ISO-8859-1.rb +24 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/Parser.rb +213 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
- data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/SynInterfaces.rb +275 -0
- data/lib/frprep/TabFormat.rb +720 -0
- data/lib/frprep/Tiger.rb +1448 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/Tree.rb +61 -0
- data/lib/frprep/TreetaggerInterface.rb +303 -0
- data/lib/frprep/do_parses.rb +142 -0
- data/lib/frprep/frprep.rb +686 -0
- data/lib/frprep/headz.rb +338 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
- data/lib/rosy/DBMySQL.rb +146 -0
- data/lib/rosy/DBSQLite.rb +280 -0
- data/lib/rosy/DBTable.rb +239 -0
- data/lib/rosy/DBWrapper.rb +176 -0
- data/lib/rosy/ExternalConfigData.rb +58 -0
- data/lib/rosy/FailedParses.rb +130 -0
- data/lib/rosy/FeatureInfo.rb +242 -0
- data/lib/rosy/GfInduce.rb +1115 -0
- data/lib/rosy/GfInduceFeature.rb +148 -0
- data/lib/rosy/InputData.rb +294 -0
- data/lib/rosy/RosyConfigData.rb +115 -0
- data/lib/rosy/RosyConfusability.rb +338 -0
- data/lib/rosy/RosyEval.rb +465 -0
- data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
- data/lib/rosy/RosyFeaturize.rb +280 -0
- data/lib/rosy/RosyInspect.rb +336 -0
- data/lib/rosy/RosyIterator.rb +477 -0
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
- data/lib/rosy/RosyPruning.rb +165 -0
- data/lib/rosy/RosyServices.rb +744 -0
- data/lib/rosy/RosySplit.rb +232 -0
- data/lib/rosy/RosyTask.rb +19 -0
- data/lib/rosy/RosyTest.rb +826 -0
- data/lib/rosy/RosyTrain.rb +232 -0
- data/lib/rosy/RosyTrainingTestTable.rb +786 -0
- data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
- data/lib/rosy/View.rb +418 -0
- data/lib/rosy/opt_parser.rb +379 -0
- data/lib/rosy/rosy.rb +77 -0
- data/lib/shalmaneser/version.rb +3 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +40 -0
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +52 -0
- data/test/functional/test_rosy.rb +20 -0
- metadata +270 -0
data/lib/rosy/DBMySQL.rb
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
|
2
|
+
# DBMysql: a subclass of DBWrapper.
|
3
|
+
#
|
4
|
+
# Use a MySQL server to access a database.
|
5
|
+
# Use the Ruby mysql interface package for that.
|
6
|
+
|
7
|
+
require 'mysql'
|
8
|
+
|
9
|
+
|
10
|
+
require 'rosy/DBWrapper'
|
11
|
+
|
12
|
+
#################
|
13
|
+
class DBMySQLResult < DBResult
|
14
|
+
# initialize with the result of Mysql::query
|
15
|
+
# which is a MysqlResult object
|
16
|
+
#
|
17
|
+
# also remember the offset of the first row
|
18
|
+
# for reset()
|
19
|
+
def initialize(value)
|
20
|
+
super(value)
|
21
|
+
@row_first = @result.row_tell()
|
22
|
+
end
|
23
|
+
|
24
|
+
###
|
25
|
+
# reset object such that each() can be run again
|
26
|
+
def reset()
|
27
|
+
@result.row_seek(@row_first)
|
28
|
+
end
|
29
|
+
|
30
|
+
###
|
31
|
+
# column names: list of strings
|
32
|
+
def list_column_names()
|
33
|
+
current = @result.row_tell()
|
34
|
+
fields = @result.fetch_fields().map { |f|
|
35
|
+
f.name()
|
36
|
+
}
|
37
|
+
@result.row_seek(current)
|
38
|
+
return fields
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
#################
|
44
|
+
class DBMySQL < DBWrapper
|
45
|
+
###
|
46
|
+
# initialization:
|
47
|
+
#
|
48
|
+
# open connection to MySQL server
|
49
|
+
def initialize(exp) # RosyConfigData experiment file object
|
50
|
+
super(exp)
|
51
|
+
|
52
|
+
@database = Mysql.real_connect(@exp.get('host'), @exp.get('user'),
|
53
|
+
@exp.get('passwd'), @exp.get('dbname'))
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
###
|
59
|
+
# make a table
|
60
|
+
#
|
61
|
+
# returns: nothing
|
62
|
+
def create_table(table_name, # string
|
63
|
+
column_formats, # array: array: string*string [column_name,column_format]
|
64
|
+
index_column_names, # array: string: column_name
|
65
|
+
indexname) # string: name of automatically created index column
|
66
|
+
|
67
|
+
string = "CREATE TABLE #{table_name} (" +
|
68
|
+
"#{indexname} INT NOT NULL AUTO_INCREMENT"
|
69
|
+
|
70
|
+
# column declarations
|
71
|
+
unless column_formats.empty?
|
72
|
+
string << ", "
|
73
|
+
string << column_formats.map { |name, format| name.to_s + " " + format.to_s }.join(",")
|
74
|
+
end
|
75
|
+
|
76
|
+
# primary key
|
77
|
+
string << ", " + "PRIMARY KEY(#{indexname})"
|
78
|
+
|
79
|
+
# other keys
|
80
|
+
unless index_column_names.empty?
|
81
|
+
string << ", "
|
82
|
+
string << index_column_names.map { |name| "KEY(#{name})" }.join(",")
|
83
|
+
end
|
84
|
+
string << ");"
|
85
|
+
|
86
|
+
query_noretv(string)
|
87
|
+
end
|
88
|
+
|
89
|
+
####
|
90
|
+
# querying the database:
|
91
|
+
# returns a DBResult object
|
92
|
+
def query(query)
|
93
|
+
result = @database.query(query)
|
94
|
+
if result
|
95
|
+
return DBMySQLResult.new(result)
|
96
|
+
else
|
97
|
+
return nil
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
####
|
102
|
+
# querying the database:
|
103
|
+
# no result value
|
104
|
+
def query_noretv(query)
|
105
|
+
@database.query(query)
|
106
|
+
return nil
|
107
|
+
end
|
108
|
+
|
109
|
+
###
|
110
|
+
# list all tables in the database
|
111
|
+
#
|
112
|
+
# array of strings
|
113
|
+
def list_tables()
|
114
|
+
return @database.list_tables()
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
#####
|
119
|
+
# list_column_formats
|
120
|
+
#
|
121
|
+
# list column names and column types of this table
|
122
|
+
#
|
123
|
+
# returns: array:string*string, list of pairs [column name, column format]
|
124
|
+
def list_column_formats(table_name)
|
125
|
+
retv = Array.new
|
126
|
+
@database.query("DESCRIBE #{table_name}").each_hash { |field|
|
127
|
+
retv << [field["Field"], field["Type"]]
|
128
|
+
}
|
129
|
+
return retv
|
130
|
+
end
|
131
|
+
|
132
|
+
####
|
133
|
+
# num_rows
|
134
|
+
#
|
135
|
+
# determine the number of rows in a table
|
136
|
+
# returns: integer or nil
|
137
|
+
def num_rows(table_name)
|
138
|
+
@database.query("SHOW TABLE STATUS").each_hash { |hash|
|
139
|
+
if hash["Name"] == table_name
|
140
|
+
return hash["Rows"]
|
141
|
+
end
|
142
|
+
}
|
143
|
+
return nil
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
@@ -0,0 +1,280 @@
|
|
1
|
+
# DBSQLite: a subclass of DBWrapper.
|
2
|
+
#
|
3
|
+
# Use SQLite to access a database.
|
4
|
+
# Use the Ruby sqlite3 interface package for that.
|
5
|
+
|
6
|
+
require 'sqlite3'
|
7
|
+
require 'tempfile'
|
8
|
+
|
9
|
+
require "DBWrapper"
|
10
|
+
|
11
|
+
#################
|
12
|
+
class DBSQLiteResult < DBResult
|
13
|
+
# initialize with the result of SQLite::execute()
|
14
|
+
# which returns an array of rows
|
15
|
+
# Each row is an array
|
16
|
+
# but additionally has attributes
|
17
|
+
# - fields: returns an array of strings, the column names
|
18
|
+
# - types: returns an array of strings, the column types
|
19
|
+
def initialize(value)
|
20
|
+
super(value)
|
21
|
+
@counter = 0
|
22
|
+
end
|
23
|
+
|
24
|
+
###
|
25
|
+
# column names: list of strings
|
26
|
+
def list_column_names()
|
27
|
+
return @result.columns
|
28
|
+
end
|
29
|
+
|
30
|
+
# number of rows: returns an integer
|
31
|
+
def num_rows()
|
32
|
+
# remember where we were in iterating over items
|
33
|
+
tmp_counter = @counter
|
34
|
+
|
35
|
+
# reset, and iterate over all rows to count
|
36
|
+
reset()
|
37
|
+
retv = 0
|
38
|
+
each { |x| retv += 1}
|
39
|
+
|
40
|
+
# return to where we were in iterating over items
|
41
|
+
reset()
|
42
|
+
while @counter < tmp_counter
|
43
|
+
@result.next()
|
44
|
+
@counter += 1
|
45
|
+
end
|
46
|
+
|
47
|
+
# and return the number of rows
|
48
|
+
return retv
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
# yields each row as an array of values
|
53
|
+
def each()
|
54
|
+
@result.each { |row|
|
55
|
+
@counter += 1
|
56
|
+
yield row.map { |x| x.to_s() }
|
57
|
+
}
|
58
|
+
end
|
59
|
+
|
60
|
+
# yields each row as a hash: column name=> column value
|
61
|
+
def each_hash()
|
62
|
+
@result.each { |row|
|
63
|
+
@counter += 1
|
64
|
+
|
65
|
+
row_hash = Hash.new()
|
66
|
+
row.fields.each_with_index { |key, index|
|
67
|
+
row_hash[key] = row[index].to_s()
|
68
|
+
}
|
69
|
+
yield row_hash
|
70
|
+
}
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
###
|
75
|
+
# reset such that each() can be run again on the result object
|
76
|
+
def reset()
|
77
|
+
@result.reset()
|
78
|
+
@counter = 0
|
79
|
+
end
|
80
|
+
|
81
|
+
# free object
|
82
|
+
def free()
|
83
|
+
@result.close()
|
84
|
+
end
|
85
|
+
|
86
|
+
# returns row as an array of column contents
|
87
|
+
def fetch_row()
|
88
|
+
@counter += 1
|
89
|
+
return @result.next()
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
#################
|
94
|
+
class DBSQLite < DBWrapper
|
95
|
+
|
96
|
+
###
|
97
|
+
# initialization:
|
98
|
+
#
|
99
|
+
# open database file according to the given identifier
|
100
|
+
def initialize(exp, # RosyConfigData experiment file object
|
101
|
+
dir = nil, # string: directory for Shalmaneser internal data, ends in "/"
|
102
|
+
identifier = nil) # string: identifier to use for the database
|
103
|
+
super(exp)
|
104
|
+
|
105
|
+
# dir and identifier may be nil, if we're only opening this object
|
106
|
+
# in order to make temp databases
|
107
|
+
if dir and identifier
|
108
|
+
@database = SQLite3::Database.new(dir + identifier.to_s + ".db")
|
109
|
+
else
|
110
|
+
@database = nil
|
111
|
+
end
|
112
|
+
|
113
|
+
# temp file for temp database
|
114
|
+
@tf = nil
|
115
|
+
end
|
116
|
+
|
117
|
+
###
|
118
|
+
# make a table
|
119
|
+
#
|
120
|
+
# returns: nothing
|
121
|
+
def create_table(table_name, # string
|
122
|
+
column_formats, # array: array: string*string [column_name,column_format]
|
123
|
+
index_column_names, # array: string: column_name
|
124
|
+
indexname) # string: name of automatically created index column
|
125
|
+
|
126
|
+
# primary key and auto-increment column
|
127
|
+
string = "CREATE TABLE #{table_name} (" +
|
128
|
+
"#{indexname} INTEGER PRIMARY KEY"
|
129
|
+
|
130
|
+
# column declarations
|
131
|
+
unless column_formats.empty?
|
132
|
+
string << ", "
|
133
|
+
string << column_formats.map { |name, format|
|
134
|
+
# include other keys
|
135
|
+
if index_column_names.include? name
|
136
|
+
name.to_s + " KEY " + format.to_s
|
137
|
+
else
|
138
|
+
name.to_s + " " + format.to_s
|
139
|
+
end
|
140
|
+
}.join(",")
|
141
|
+
end
|
142
|
+
string << ");"
|
143
|
+
|
144
|
+
query_noretv(string)
|
145
|
+
end
|
146
|
+
|
147
|
+
###
|
148
|
+
# remove a table
|
149
|
+
def drop_table(table_name)
|
150
|
+
query_noretv("DROP TABLE " + table_name)
|
151
|
+
end
|
152
|
+
|
153
|
+
###
|
154
|
+
def query(query)
|
155
|
+
if @database
|
156
|
+
return DBSQLiteResult.new(@database.query(query))
|
157
|
+
else
|
158
|
+
return nil
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
####
|
163
|
+
# querying the database:
|
164
|
+
# no result value
|
165
|
+
def query_noretv(query)
|
166
|
+
if @database
|
167
|
+
@database.execute(query)
|
168
|
+
end
|
169
|
+
return nil
|
170
|
+
end
|
171
|
+
|
172
|
+
###
|
173
|
+
# list all tables in the database
|
174
|
+
#
|
175
|
+
# array of strings
|
176
|
+
def list_tables()
|
177
|
+
if @database
|
178
|
+
return @database.execute("select name from sqlite_master;").map { |t|
|
179
|
+
t.to_s()
|
180
|
+
}
|
181
|
+
else
|
182
|
+
return nil
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
#####
|
187
|
+
# list_column_formats
|
188
|
+
#
|
189
|
+
# list column names and column types of this table
|
190
|
+
#
|
191
|
+
# returns: array:string*string, list of pairs [column name, column format]
|
192
|
+
def list_column_formats(table_name)
|
193
|
+
unless @database
|
194
|
+
return nil
|
195
|
+
end
|
196
|
+
|
197
|
+
table_descr = @database.execute("select * from sqlite_master where name=='#{table_name}';")
|
198
|
+
# this is an array of pieces of table description.
|
199
|
+
# the piece in the column called 'sql' is the 'create' statement.
|
200
|
+
# get the 'create' statement
|
201
|
+
begin
|
202
|
+
field_names = table_descr[0].fields
|
203
|
+
rescue
|
204
|
+
$stderr.puts "SQLite error: could not read description of table #{table_name}"
|
205
|
+
exit 1
|
206
|
+
end
|
207
|
+
create_index = (0..field_names.length()).detect { |ix| field_names[ix] == 'sql' }
|
208
|
+
|
209
|
+
# try to parse column names out of the 'create' statement
|
210
|
+
if table_descr[0][create_index] =~ /^\s*create table \S+\s*\((.*)\)\s*$/i
|
211
|
+
# we now have something of shape ' a key varchar2(30), b varchar2(30)'
|
212
|
+
# split at the comma, remove whitespace at beginning and end
|
213
|
+
# then split again to get pairs [column name, column format]
|
214
|
+
return $1.split(",").map { |col_descrip|
|
215
|
+
pieces = col_descrip.strip().split().reject { |entry|
|
216
|
+
entry =~ /^key$/i or entry =~ /^primary$/i
|
217
|
+
}
|
218
|
+
if pieces.length() > 2
|
219
|
+
$stderr.puts "Warning: problematic column format in #{col_descrip}, may be parsed wrong."
|
220
|
+
end
|
221
|
+
pieces
|
222
|
+
}
|
223
|
+
else
|
224
|
+
$stderr.puts "SQLite error: cannot read column names"
|
225
|
+
exit 1
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
####
|
230
|
+
# num_rows
|
231
|
+
#
|
232
|
+
# determine the number of rows in a table
|
233
|
+
# returns: integer or nil
|
234
|
+
def num_rows(table_name)
|
235
|
+
unless @database
|
236
|
+
return nil
|
237
|
+
end
|
238
|
+
|
239
|
+
rows_s = @database.get_first_value( "select count(*) from #{table_name}" )
|
240
|
+
if rows_s
|
241
|
+
return rows_s.to_i()
|
242
|
+
else
|
243
|
+
return nil
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
####
|
248
|
+
# make a temporary table: make a table in a new, temporary file
|
249
|
+
#
|
250
|
+
# returns: DBWrapper object (or object of current subclass)
|
251
|
+
# that has the @table_name attribute set to the name of a temporary DB
|
252
|
+
#
|
253
|
+
# same as in superclass
|
254
|
+
#
|
255
|
+
# def make_temp_table(column_formats, # array: string*string [column_name,column_format]
|
256
|
+
# index_column_names, # array: string: column_name
|
257
|
+
# indexname) # string: name of autoincrement primary index
|
258
|
+
|
259
|
+
# temp_obj = self.clone()
|
260
|
+
# temp.initialize_temp_table(column_formats, index_column_names, indexname)
|
261
|
+
# return temp_obj
|
262
|
+
# end
|
263
|
+
|
264
|
+
def drop_temp_table()
|
265
|
+
@tf.close(true)
|
266
|
+
@database = nil
|
267
|
+
end
|
268
|
+
|
269
|
+
##############################
|
270
|
+
protected
|
271
|
+
|
272
|
+
def initialize_temp_table(column_formats, index_column_names, indexname)
|
273
|
+
@table_name = "temptable"
|
274
|
+
@tf = Tempfile.new("temp_table")
|
275
|
+
@tf.close()
|
276
|
+
@database = SQLite3::Database.new(@tf.path())
|
277
|
+
create_table(@table_name, column_formats, index_column_names, indexname)
|
278
|
+
end
|
279
|
+
|
280
|
+
end
|
data/lib/rosy/DBTable.rb
ADDED
@@ -0,0 +1,239 @@
|
|
1
|
+
# class DBTable
|
2
|
+
# KE, SP 27.1.05
|
3
|
+
#
|
4
|
+
# Manages one table in a (given) SQL database
|
5
|
+
# Doesn't know anything about the ROSY application
|
6
|
+
# Just creating a table, changing the table, and accessing it.
|
7
|
+
#
|
8
|
+
|
9
|
+
require "common/SQLQuery"
|
10
|
+
require "common/RosyConventions"
|
11
|
+
|
12
|
+
class DBTable
|
13
|
+
attr_reader :index_name, :table_name
|
14
|
+
|
15
|
+
#####
|
16
|
+
# new
|
17
|
+
#
|
18
|
+
# creates the table for this object.
|
19
|
+
# The name of the table (given as parameter) can be new, in which caes the table
|
20
|
+
# is created, or old, in which case we check whether its format matches the format
|
21
|
+
# given in the parameters.
|
22
|
+
#
|
23
|
+
# The table format is given in the form of column formats (column names and column formats,
|
24
|
+
# formats are the usual SQLy things). Additionally, a subset of the column names can be
|
25
|
+
# designated index columns, which means that the table is indexed (and can be searched quickly)
|
26
|
+
# for them.
|
27
|
+
#
|
28
|
+
# DBTable internally constructs a "Primary index" feature that is called "XXindexXX" (autoincrement column)
|
29
|
+
#
|
30
|
+
# For all columns that are added later using add_columns, DBTable adds a prefix to the column names;
|
31
|
+
# these columns are not checked against the column_formats when opening an existing table;
|
32
|
+
# this can be used to store experiment-specific data.
|
33
|
+
|
34
|
+
def initialize(db_obj, # DBWrapper object
|
35
|
+
table_name, # string: name of DB table (existing/new)
|
36
|
+
mode, # new: starts new DB table, removes old if it exists. open: reopens existing DB table
|
37
|
+
hash={}) # hash: parameter name => parameter value, depending on mode
|
38
|
+
# mode= new needs:
|
39
|
+
# 'col_formats': array:array len 2: string*string, [column_name, column_format]
|
40
|
+
# 'index_cols': array:string: column_names that should be used to index the table
|
41
|
+
# 'addcol_prefix': string: prefix for names of additional columns
|
42
|
+
# mode='open' needs:
|
43
|
+
# 'col_formats': array: string*string: column names/formats
|
44
|
+
# May be nil, in that case column name match isn't tested
|
45
|
+
|
46
|
+
@index_name = "XXindexXX"
|
47
|
+
@db_obj = db_obj
|
48
|
+
@table_name = table_name
|
49
|
+
|
50
|
+
case mode
|
51
|
+
when 'new'
|
52
|
+
###
|
53
|
+
# open new database
|
54
|
+
|
55
|
+
# sanity check: exactly the required parameters present?
|
56
|
+
unless hash.keys.sort == ['addcol_prefix', 'col_formats', 'index_cols']
|
57
|
+
raise "Expecting hash parameters 'addcol_prefix', 'col_formats', 'index_cols'.\n" +
|
58
|
+
"I got: " + hash.keys.join(", ")
|
59
|
+
end
|
60
|
+
|
61
|
+
# sanity check: main index column name should be unique
|
62
|
+
all_column_names = hash['col_formats'].map { |name, format| name}
|
63
|
+
if all_column_names.include? @index_name
|
64
|
+
raise "[DBTable] You used the reserved name #{@index_name} as a column name. Please don't do that!"
|
65
|
+
end
|
66
|
+
|
67
|
+
# sanity check: index_column_names should be included in column_names
|
68
|
+
hash['index_cols'].each { |name|
|
69
|
+
unless all_column_names.include? name
|
70
|
+
raise "[DBTable] #{name} is in the list of index names, but it isn't in the list of column names."
|
71
|
+
end
|
72
|
+
}
|
73
|
+
|
74
|
+
# does a table with name table_name exist? if so, remove it
|
75
|
+
if @db_obj.list_tables().include? table_name
|
76
|
+
# this table exists
|
77
|
+
# remove old table
|
78
|
+
@db_obj.drop_table(table_name)
|
79
|
+
end
|
80
|
+
|
81
|
+
@db_obj.create_table(table_name,hash['col_formats'],
|
82
|
+
hash['index_cols'], @index_name)
|
83
|
+
when 'open'
|
84
|
+
|
85
|
+
###
|
86
|
+
# open existing database table
|
87
|
+
|
88
|
+
# sanity check: exactly the required parameters present?
|
89
|
+
hash.keys.each { |key|
|
90
|
+
unless ['addcol_prefix', 'col_names'].include? key
|
91
|
+
raise "Expecting hash parameters 'addcol_prefix', 'col_names'.\n" +
|
92
|
+
"I got: " + hash.keys.join(", ")
|
93
|
+
end
|
94
|
+
}
|
95
|
+
# sanity check: main index column name should be unique
|
96
|
+
if hash['col_names'] and hash['col_names'].include? @index_name
|
97
|
+
raise "[DBTable] You used the reserved name #{@index_name} as a column name. Please don't do that!"
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
# does a table with name table_name exist?
|
102
|
+
unless @db_obj.list_tables().include? table_name
|
103
|
+
raise "[DBTable] Sorry, I cannot find a database table named #{table_name}."
|
104
|
+
end
|
105
|
+
|
106
|
+
# check if all column formats match
|
107
|
+
|
108
|
+
if hash['col_names']
|
109
|
+
|
110
|
+
existing_fields = @db_obj.list_column_names(table_name).reject { |col|
|
111
|
+
col =~ /^#{hash["addcol_prefix"]}/ or
|
112
|
+
col == @index_name
|
113
|
+
}
|
114
|
+
|
115
|
+
unless existing_fields.sort() == hash["col_names"].sort()
|
116
|
+
raise "[DBTable] Column names in the DB table #{table_name}\n" +
|
117
|
+
"don't match feature specification in the experiment file.\n" +
|
118
|
+
"Table:\n\t" + existing_fields.sort.join(", ") +
|
119
|
+
"\n\nExp. file:\n\t" + hash["col_names"].sort.join(", ")
|
120
|
+
end
|
121
|
+
|
122
|
+
else
|
123
|
+
# no column names given, no check of column formats
|
124
|
+
end
|
125
|
+
|
126
|
+
else
|
127
|
+
raise "Parameter 'mode' needs to be either 'new' or 'open'! I got " + mode.to_s
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
#####
|
132
|
+
# list_column_names
|
133
|
+
#
|
134
|
+
# list column names of this table
|
135
|
+
#
|
136
|
+
# returns: array:string, list of column names
|
137
|
+
def list_column_names()
|
138
|
+
return @db_obj.list_column_names(@table_name)
|
139
|
+
end
|
140
|
+
|
141
|
+
#####
|
142
|
+
# list_column_formats
|
143
|
+
#
|
144
|
+
# list column names and column types of this table
|
145
|
+
#
|
146
|
+
# returns: array:string*string, list of pairs [column name, column format]
|
147
|
+
def list_column_formats()
|
148
|
+
return @db_obj.list_column_formats(@table_name)
|
149
|
+
end
|
150
|
+
|
151
|
+
#####
|
152
|
+
# change_format_add_columns
|
153
|
+
#
|
154
|
+
# adds one or more columns to the table managed by this object
|
155
|
+
# columns are given by their names and formats, as above
|
156
|
+
#
|
157
|
+
# returns: nothing
|
158
|
+
def change_format_add_columns(column_formats) # array: string*string [column_name,column_format]
|
159
|
+
|
160
|
+
if column_formats.nil? or column_formats.empty?
|
161
|
+
raise "Need nonempty column_formats list"
|
162
|
+
end
|
163
|
+
|
164
|
+
column_formats.each {|col_name,col_format|
|
165
|
+
unless col_name =~ /^#{@addcol_prefix}/
|
166
|
+
raise "Columns that are added need to have prefix #{@addcol_prefix}!"
|
167
|
+
end
|
168
|
+
}
|
169
|
+
|
170
|
+
execute_command(SQLQuery.add_columns(@table_name, column_formats))
|
171
|
+
end
|
172
|
+
|
173
|
+
#####
|
174
|
+
# change_format_remove_column
|
175
|
+
#
|
176
|
+
# removes one column from the table managed by this object
|
177
|
+
#
|
178
|
+
# returns: nothing
|
179
|
+
def change_format_remove_column(column_name) # string:name of the column to remove
|
180
|
+
unless list_column_names(@table_name).include? column_name
|
181
|
+
$stderr.puts "WARNING: Cannot remove column #{column_name}: I don't have it"
|
182
|
+
return
|
183
|
+
end
|
184
|
+
|
185
|
+
execute_command("ALTER TABLE #{@table_name} DROP COLUMN #{column_name}")
|
186
|
+
end
|
187
|
+
|
188
|
+
|
189
|
+
#####
|
190
|
+
# insert_row
|
191
|
+
#
|
192
|
+
# inserts a new row into the table and fills cells with values, as specified
|
193
|
+
# by the column_value_pairs
|
194
|
+
#
|
195
|
+
# returns: nothing
|
196
|
+
def insert_row(column_value_pairs) # array: string*Object [column_name,column_value]
|
197
|
+
if column_value_pairs.nil? or column_value_pairs.empty?
|
198
|
+
raise "Need nonempty column_value_pairs list"
|
199
|
+
end
|
200
|
+
execute_command(SQLQuery.insert(@table_name,column_value_pairs))
|
201
|
+
end
|
202
|
+
|
203
|
+
#####
|
204
|
+
# update_row
|
205
|
+
#
|
206
|
+
# update column values for a given row which is identified
|
207
|
+
# via its (autoincrement) index
|
208
|
+
#
|
209
|
+
# returns: nothing
|
210
|
+
def update_row(index, # index, content of autoincrement column
|
211
|
+
column_value_pairs) # array: string*Object [column_name, column_value]
|
212
|
+
|
213
|
+
if column_value_pairs.nil? or column_value_pairs.empty?
|
214
|
+
raise "Need nonempty column_value_pairs list"
|
215
|
+
end
|
216
|
+
execute_command(SQLQuery.update(@table_name,
|
217
|
+
column_value_pairs,
|
218
|
+
[ValueRestriction.new(@index_name, index)]))
|
219
|
+
end
|
220
|
+
|
221
|
+
|
222
|
+
####
|
223
|
+
private
|
224
|
+
|
225
|
+
###
|
226
|
+
# execute_command:
|
227
|
+
# execute DB command
|
228
|
+
#
|
229
|
+
# returns nil: the commands in this package are all
|
230
|
+
# not of the kind that requires a return value
|
231
|
+
def execute_command(command)
|
232
|
+
begin
|
233
|
+
@db_obj.query_noretv(command)
|
234
|
+
rescue
|
235
|
+
$stderr.puts "Error executing SQL query. Command was:\n" + command
|
236
|
+
exit 1
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|