shalmaneser-lib 1.2.rc5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +122 -0
- data/lib/configuration/config_data.rb +457 -0
- data/lib/configuration/config_format_element.rb +210 -0
- data/lib/configuration/configuration_error.rb +15 -0
- data/lib/configuration/external_config_data.rb +56 -0
- data/lib/configuration/frappe_config_data.rb +134 -0
- data/lib/configuration/fred_config_data.rb +199 -0
- data/lib/configuration/rosy_config_data.rb +126 -0
- data/lib/db/db_interface.rb +50 -0
- data/lib/db/db_mysql.rb +141 -0
- data/lib/db/db_sqlite.rb +280 -0
- data/lib/db/db_table.rb +237 -0
- data/lib/db/db_view.rb +416 -0
- data/lib/db/db_wrapper.rb +175 -0
- data/lib/db/select_table_and_columns.rb +10 -0
- data/lib/db/sql_query.rb +243 -0
- data/lib/definitions.rb +19 -0
- data/lib/eval.rb +482 -0
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/external_systems.rb +251 -0
- data/lib/framenet_format/fn_corpus_aset.rb +209 -0
- data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
- data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
- data/lib/framenet_format/fn_database.rb +143 -0
- data/lib/framenet_format/frame_xml_file.rb +104 -0
- data/lib/framenet_format/frame_xml_sentence.rb +411 -0
- data/lib/logging.rb +25 -0
- data/lib/ml/classifier.rb +189 -0
- data/lib/ml/mallet.rb +236 -0
- data/lib/ml/maxent.rb +229 -0
- data/lib/ml/optimize.rb +195 -0
- data/lib/ml/timbl.rb +140 -0
- data/lib/monkey_patching/array.rb +82 -0
- data/lib/monkey_patching/enumerable_bool.rb +24 -0
- data/lib/monkey_patching/enumerable_distribute.rb +18 -0
- data/lib/monkey_patching/file.rb +131 -0
- data/lib/monkey_patching/subsumed.rb +24 -0
- data/lib/ruby_class_extensions.rb +4 -0
- data/lib/salsa_tiger_xml/corpus.rb +24 -0
- data/lib/salsa_tiger_xml/fe_node.rb +98 -0
- data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
- data/lib/salsa_tiger_xml/frame_node.rb +145 -0
- data/lib/salsa_tiger_xml/graph_node.rb +347 -0
- data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
- data/lib/salsa_tiger_xml/sem_node.rb +58 -0
- data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
- data/lib/salsa_tiger_xml/syn_node.rb +169 -0
- data/lib/salsa_tiger_xml/tree_node.rb +59 -0
- data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
- data/lib/salsa_tiger_xml/usp_node.rb +72 -0
- data/lib/salsa_tiger_xml/xml_node.rb +163 -0
- data/lib/shalmaneser/lib.rb +1 -0
- data/lib/tabular_format/fn_tab_format_file.rb +38 -0
- data/lib/tabular_format/fn_tab_frame.rb +67 -0
- data/lib/tabular_format/fn_tab_sentence.rb +169 -0
- data/lib/tabular_format/tab_format_file.rb +91 -0
- data/lib/tabular_format/tab_format_named_args.rb +184 -0
- data/lib/tabular_format/tab_format_sentence.rb +119 -0
- data/lib/value_restriction.rb +49 -0
- metadata +131 -0
@@ -0,0 +1,126 @@
|
|
1
|
+
require_relative 'config_data'
|
2
|
+
require_relative 'configuration_error'
|
3
|
+
|
4
|
+
##############################
|
5
|
+
# Class RosyConfigData
|
6
|
+
#
|
7
|
+
# inherits from ConfigData,
|
8
|
+
# sets features for ROSY
|
9
|
+
module Shalmaneser
|
10
|
+
module Configuration
|
11
|
+
class RosyConfigData < ConfigData
|
12
|
+
CONFIG_DEFS = {
|
13
|
+
"feature" => "list",
|
14
|
+
"classifier" => "list",
|
15
|
+
"verbose" => "bool",
|
16
|
+
"experiment_ID" => "string",
|
17
|
+
"directory_input_train" => "string",
|
18
|
+
"directory_input_test" => "string",
|
19
|
+
"directory_output" => "string",
|
20
|
+
"preproc_descr_file_train" => "string",
|
21
|
+
"preproc_descr_file_test" => "string",
|
22
|
+
"external_descr_file" => "string",
|
23
|
+
"dbtype" => "string", # "mysql" or "sqlite"
|
24
|
+
|
25
|
+
"host" => "string", # DB access: sqlite only
|
26
|
+
"user" => "string",
|
27
|
+
"passwd" => "string",
|
28
|
+
"dbname" => "string",
|
29
|
+
|
30
|
+
"data_dir" => "string", # for external use
|
31
|
+
"rosy_dir" => "pattern", # for internal use only, set by rosy.rb
|
32
|
+
|
33
|
+
"classifier_dir" => "string", # if present, special directory for classifiers
|
34
|
+
|
35
|
+
"classif_column_name" => "string",
|
36
|
+
"main_table_name" => "pattern",
|
37
|
+
"test_table_name" => "pattern",
|
38
|
+
|
39
|
+
"eval_file" => "pattern",
|
40
|
+
"log_file" => "pattern",
|
41
|
+
"failed_file" => "pattern",
|
42
|
+
"classifier_file" => "pattern",
|
43
|
+
"classifier_output_file" => "pattern",
|
44
|
+
"noval" => "string",
|
45
|
+
"split_nones" => "bool",
|
46
|
+
"print_eval_log" => "bool",
|
47
|
+
"assume_argrec_perfect" => "bool",
|
48
|
+
"xwise_argrec" => "string",
|
49
|
+
"xwise_arglab" => "string",
|
50
|
+
"xwise_onestep" => "string",
|
51
|
+
"fe_syn_repair" => "bool", # map words to constituents for FEs: idealize?
|
52
|
+
"fe_rel_repair" => "bool", # FEs: include non-included relative clauses into FEs
|
53
|
+
"prune" => "string", # pruning prior to argrec?
|
54
|
+
|
55
|
+
# Imported from PrepConfigData
|
56
|
+
'do_postag' => 'bool',
|
57
|
+
'do_lemmatize' => 'bool',
|
58
|
+
'do_parse' => 'bool',
|
59
|
+
'pos_tagger' => 'string',
|
60
|
+
'lemmatizer' => 'string',
|
61
|
+
'parser' => 'string'
|
62
|
+
}
|
63
|
+
|
64
|
+
def initialize(filename)
|
65
|
+
super(filename, CONFIG_DEFS, ["exp_ID", "test_ID", "split_ID",
|
66
|
+
"feature_name", "classif", "step",
|
67
|
+
"group", "dataset", "mode"])
|
68
|
+
|
69
|
+
# set access functions for list features
|
70
|
+
set_list_feature_access("feature",
|
71
|
+
method("access_feature"))
|
72
|
+
|
73
|
+
# set access functions for list features
|
74
|
+
set_list_feature_access("classifier",
|
75
|
+
method("access_feature"))
|
76
|
+
validate
|
77
|
+
end
|
78
|
+
|
79
|
+
###
|
80
|
+
# protected
|
81
|
+
|
82
|
+
#####
|
83
|
+
# access_feature
|
84
|
+
#
|
85
|
+
# access function for feature 'feature'
|
86
|
+
#
|
87
|
+
# assumed format in the config file:
|
88
|
+
#
|
89
|
+
# feature = path [option]*
|
90
|
+
#
|
91
|
+
# i.e. first the name of the feature type to use, then
|
92
|
+
# optionally options associated with that feature,
|
93
|
+
# e.g. 'argrec': use that feature only when computing argrec
|
94
|
+
#
|
95
|
+
# the access function is called with parameter val_list, an array of
|
96
|
+
# string tuples, one string tuple for each feature defined.
|
97
|
+
# the first string in the tuple is the feature name, the rest are the options
|
98
|
+
#
|
99
|
+
# returns: a list of pairs [feature_name(string), options(array:string)]
|
100
|
+
# of defined features
|
101
|
+
def access_feature(val_list) # array:array:string: list of tuples defined in config file
|
102
|
+
# for feature 'feature'
|
103
|
+
if val_list.nil?
|
104
|
+
[]
|
105
|
+
else
|
106
|
+
val_list.map do |feature_descr_tuple|
|
107
|
+
[feature_descr_tuple.first, feature_descr_tuple[1..-1]]
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
private
|
113
|
+
|
114
|
+
def validate
|
115
|
+
msg = []
|
116
|
+
|
117
|
+
unless get("experiment_ID") =~ /^[A-Za-z0-9_]+$/
|
118
|
+
msg << 'Please choose an alphanumeric experiment ID! '\
|
119
|
+
"You provided: #{get('experiment_ID')}"
|
120
|
+
end
|
121
|
+
|
122
|
+
raise(ConfigurationError, msg.join("\n")) if msg.any?
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# DBInterface
|
2
|
+
#
|
3
|
+
# Okay, things are getting somewhat complicated here with all
|
4
|
+
# the DB classes, but this is how it all fits together:
|
5
|
+
#
|
6
|
+
# - DBWrapper: abstract class describing the DB interface
|
7
|
+
# - DBMySQL, DBSQLite: subclasses of DBWrapper, for MySQL
|
8
|
+
# and SQLite, respectively
|
9
|
+
# - DBInterface: class to be used from outside,
|
10
|
+
# decides ( based on the experiment file) whether to use
|
11
|
+
# MySQL or SQLite and makes an object of the right kind,
|
12
|
+
# 'require'-ing either DBMySQL or DBSQLite, but not both,
|
13
|
+
# because the right ruby packages might not be installed
|
14
|
+
# for both SQL systems
|
15
|
+
# @note This class will be obsolete if we deleten MySQL.
|
16
|
+
class DBInterface
|
17
|
+
|
18
|
+
def self.get_db_interface(exp, # experiment file object with 'dbtype' entry
|
19
|
+
dir = nil, # string: Shalmaneser directory (used by SQLite only)
|
20
|
+
identifier = nil) # string: identifier of the data (SQLite)
|
21
|
+
|
22
|
+
case exp.get('dbtype')
|
23
|
+
when 'mysql'
|
24
|
+
begin
|
25
|
+
require 'db/db_mysql'
|
26
|
+
rescue => e
|
27
|
+
p e
|
28
|
+
STDERR.puts 'Error loading DB interface.'
|
29
|
+
STDERR.puts 'Make sure you have the Ruby MySQL package installed.'
|
30
|
+
exit 1
|
31
|
+
end
|
32
|
+
|
33
|
+
return DBMySQL.new(exp)
|
34
|
+
when 'sqlite'
|
35
|
+
begin
|
36
|
+
require 'db/db_sqlite'
|
37
|
+
rescue
|
38
|
+
STDERR.puts 'Error loading DB interface.'
|
39
|
+
STDERR.puts 'Make sure you have the Ruby SQLite package installed.'
|
40
|
+
exit 1
|
41
|
+
end
|
42
|
+
return DBSQLite.new(exp, dir, identifier)
|
43
|
+
|
44
|
+
else
|
45
|
+
STDERR.puts 'Error: database type needs to be either "mysql" or "sqlite"".'
|
46
|
+
STDERR.puts 'Please set parameter "dbtype" in the experiment file accordingly.'
|
47
|
+
exit 1
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/lib/db/db_mysql.rb
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
# DBMysql: a subclass of DBWrapper.
|
2
|
+
#
|
3
|
+
# Use a MySQL server to access a database.
|
4
|
+
# Use the Ruby mysql interface package for that.
|
5
|
+
|
6
|
+
require 'mysql'
|
7
|
+
require 'db/db_wrapper'
|
8
|
+
|
9
|
+
#################
|
10
|
+
class DBMySQLResult < DBResult
|
11
|
+
# initialize with the result of Mysql::query
|
12
|
+
# which is a MysqlResult object
|
13
|
+
#
|
14
|
+
# also remember the offset of the first row
|
15
|
+
# for reset()
|
16
|
+
def initialize(value)
|
17
|
+
super(value)
|
18
|
+
@row_first = @result.row_tell
|
19
|
+
end
|
20
|
+
|
21
|
+
###
|
22
|
+
# reset object such that each() can be run again
|
23
|
+
def reset
|
24
|
+
@result.row_seek(@row_first)
|
25
|
+
end
|
26
|
+
|
27
|
+
###
|
28
|
+
# column names: list of strings
|
29
|
+
def list_column_names
|
30
|
+
current = @result.row_tell
|
31
|
+
fields = @result.fetch_fields.map(&:name)
|
32
|
+
@result.row_seek(current)
|
33
|
+
|
34
|
+
fields
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
#################
|
39
|
+
class DBMySQL < DBWrapper
|
40
|
+
###
|
41
|
+
# initialization:
|
42
|
+
#
|
43
|
+
# open connection to MySQL server
|
44
|
+
def initialize(exp) # RosyConfigData experiment file object
|
45
|
+
super(exp)
|
46
|
+
|
47
|
+
@database = Mysql.real_connect(@exp.get('host'), @exp.get('user'),
|
48
|
+
@exp.get('passwd'), @exp.get('dbname'))
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
###
|
54
|
+
# make a table
|
55
|
+
#
|
56
|
+
# returns: nothing
|
57
|
+
def create_table(table_name, # string
|
58
|
+
column_formats, # array: array: string*string [column_name,column_format]
|
59
|
+
index_column_names, # array: string: column_name
|
60
|
+
indexname) # string: name of automatically created index column
|
61
|
+
|
62
|
+
string = "CREATE TABLE #{table_name} (" +
|
63
|
+
"#{indexname} INT NOT NULL AUTO_INCREMENT"
|
64
|
+
|
65
|
+
# column declarations
|
66
|
+
unless column_formats.empty?
|
67
|
+
string << ", "
|
68
|
+
string << column_formats.map { |name, format| name.to_s + " " + format.to_s }.join(",")
|
69
|
+
end
|
70
|
+
|
71
|
+
# primary key
|
72
|
+
string << ", " + "PRIMARY KEY(#{indexname})"
|
73
|
+
|
74
|
+
# other keys
|
75
|
+
unless index_column_names.empty?
|
76
|
+
string << ", "
|
77
|
+
string << index_column_names.map { |name| "KEY(#{name})" }.join(",")
|
78
|
+
end
|
79
|
+
string << ");"
|
80
|
+
|
81
|
+
query_noretv(string)
|
82
|
+
end
|
83
|
+
|
84
|
+
####
|
85
|
+
# querying the database:
|
86
|
+
# returns a DBResult object
|
87
|
+
def query(query)
|
88
|
+
result = @database.query(query)
|
89
|
+
if result
|
90
|
+
return DBMySQLResult.new(result)
|
91
|
+
else
|
92
|
+
return nil
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
####
|
97
|
+
# querying the database:
|
98
|
+
# no result value
|
99
|
+
def query_noretv(query)
|
100
|
+
@database.query(query)
|
101
|
+
return nil
|
102
|
+
end
|
103
|
+
|
104
|
+
###
|
105
|
+
# list all tables in the database
|
106
|
+
#
|
107
|
+
# array of strings
|
108
|
+
def list_tables
|
109
|
+
return @database.list_tables
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
#####
|
114
|
+
# list_column_formats
|
115
|
+
#
|
116
|
+
# list column names and column types of this table
|
117
|
+
#
|
118
|
+
# returns: array:string*string, list of pairs [column name, column format]
|
119
|
+
def list_column_formats(table_name)
|
120
|
+
retv = []
|
121
|
+
@database.query("DESCRIBE #{table_name}").each_hash { |field|
|
122
|
+
retv << [field["Field"], field["Type"]]
|
123
|
+
}
|
124
|
+
return retv
|
125
|
+
end
|
126
|
+
|
127
|
+
####
|
128
|
+
# num_rows
|
129
|
+
#
|
130
|
+
# determine the number of rows in a table
|
131
|
+
# returns: integer or nil
|
132
|
+
def num_rows(table_name)
|
133
|
+
@database.query("SHOW TABLE STATUS").each_hash { |hash|
|
134
|
+
if hash["Name"] == table_name
|
135
|
+
return hash["Rows"]
|
136
|
+
end
|
137
|
+
}
|
138
|
+
return nil
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
data/lib/db/db_sqlite.rb
ADDED
@@ -0,0 +1,280 @@
|
|
1
|
+
# DBSQLite: a subclass of DBWrapper.
|
2
|
+
#
|
3
|
+
# Use SQLite to access a database.
|
4
|
+
# Use the Ruby sqlite3 interface package for that.
|
5
|
+
|
6
|
+
require 'sqlite3'
|
7
|
+
require 'tempfile'
|
8
|
+
|
9
|
+
require 'db/db_wrapper'
|
10
|
+
|
11
|
+
#################
|
12
|
+
class DBSQLiteResult < DBResult
|
13
|
+
# initialize with the result of SQLite::execute()
|
14
|
+
# which returns an array of rows
|
15
|
+
# Each row is an array
|
16
|
+
# but additionally has attributes
|
17
|
+
# - fields: returns an array of strings, the column names
|
18
|
+
# - types: returns an array of strings, the column types
|
19
|
+
def initialize(value)
|
20
|
+
super(value)
|
21
|
+
@counter = 0
|
22
|
+
end
|
23
|
+
|
24
|
+
###
|
25
|
+
# column names: list of strings
|
26
|
+
def list_column_names
|
27
|
+
return @result.columns
|
28
|
+
end
|
29
|
+
|
30
|
+
# number of rows: returns an integer
|
31
|
+
def num_rows
|
32
|
+
# remember where we were in iterating over items
|
33
|
+
tmp_counter = @counter
|
34
|
+
|
35
|
+
# reset, and iterate over all rows to count
|
36
|
+
reset
|
37
|
+
retv = 0
|
38
|
+
each { |x| retv += 1}
|
39
|
+
|
40
|
+
# return to where we were in iterating over items
|
41
|
+
reset
|
42
|
+
while @counter < tmp_counter
|
43
|
+
@result.next
|
44
|
+
@counter += 1
|
45
|
+
end
|
46
|
+
|
47
|
+
# and return the number of rows
|
48
|
+
return retv
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
# yields each row as an array of values
|
53
|
+
def each
|
54
|
+
@result.each { |row|
|
55
|
+
@counter += 1
|
56
|
+
yield row.map { |x| x.to_s }
|
57
|
+
}
|
58
|
+
end
|
59
|
+
|
60
|
+
# yields each row as a hash: column name=> column value
|
61
|
+
def each_hash
|
62
|
+
@result.each { |row|
|
63
|
+
@counter += 1
|
64
|
+
|
65
|
+
row_hash = {}
|
66
|
+
row.fields.each_with_index { |key, index|
|
67
|
+
row_hash[key] = row[index].to_s
|
68
|
+
}
|
69
|
+
yield row_hash
|
70
|
+
}
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
###
|
75
|
+
# reset such that each() can be run again on the result object
|
76
|
+
def reset
|
77
|
+
@result.reset
|
78
|
+
@counter = 0
|
79
|
+
end
|
80
|
+
|
81
|
+
# free object
|
82
|
+
def free
|
83
|
+
@result.close
|
84
|
+
end
|
85
|
+
|
86
|
+
# returns row as an array of column contents
|
87
|
+
def fetch_row
|
88
|
+
@counter += 1
|
89
|
+
return @result.next
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
#################
|
94
|
+
class DBSQLite < DBWrapper
|
95
|
+
|
96
|
+
###
|
97
|
+
# initialization:
|
98
|
+
#
|
99
|
+
# open database file according to the given identifier
|
100
|
+
def initialize(exp, # RosyConfigData experiment file object
|
101
|
+
dir = nil, # string: directory for Shalmaneser internal data, ends in "/"
|
102
|
+
identifier = nil) # string: identifier to use for the database
|
103
|
+
super(exp)
|
104
|
+
|
105
|
+
# dir and identifier may be nil, if we're only opening this object
|
106
|
+
# in order to make temp databases
|
107
|
+
if dir and identifier
|
108
|
+
@database = SQLite3::Database.new(dir + identifier.to_s + ".db")
|
109
|
+
else
|
110
|
+
@database = nil
|
111
|
+
end
|
112
|
+
|
113
|
+
# temp file for temp database
|
114
|
+
@tf = nil
|
115
|
+
end
|
116
|
+
|
117
|
+
###
|
118
|
+
# make a table
|
119
|
+
#
|
120
|
+
# returns: nothing
|
121
|
+
def create_table(table_name, # string
|
122
|
+
column_formats, # array: array: string*string [column_name,column_format]
|
123
|
+
index_column_names, # array: string: column_name
|
124
|
+
indexname) # string: name of automatically created index column
|
125
|
+
|
126
|
+
# primary key and auto-increment column
|
127
|
+
string = "CREATE TABLE #{table_name} (" +
|
128
|
+
"#{indexname} INTEGER PRIMARY KEY"
|
129
|
+
|
130
|
+
# column declarations
|
131
|
+
unless column_formats.empty?
|
132
|
+
string << ", "
|
133
|
+
string << column_formats.map { |name, format|
|
134
|
+
# include other keys
|
135
|
+
if index_column_names.include? name
|
136
|
+
name.to_s + " KEY " + format.to_s
|
137
|
+
else
|
138
|
+
name.to_s + " " + format.to_s
|
139
|
+
end
|
140
|
+
}.join(",")
|
141
|
+
end
|
142
|
+
string << ");"
|
143
|
+
|
144
|
+
query_noretv(string)
|
145
|
+
end
|
146
|
+
|
147
|
+
###
|
148
|
+
# remove a table
|
149
|
+
def drop_table(table_name)
|
150
|
+
query_noretv("DROP TABLE " + table_name)
|
151
|
+
end
|
152
|
+
|
153
|
+
###
|
154
|
+
def query(query)
|
155
|
+
if @database
|
156
|
+
return DBSQLiteResult.new(@database.query(query))
|
157
|
+
else
|
158
|
+
return nil
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
####
|
163
|
+
# querying the database:
|
164
|
+
# no result value
|
165
|
+
def query_noretv(query)
|
166
|
+
if @database
|
167
|
+
@database.execute(query)
|
168
|
+
end
|
169
|
+
return nil
|
170
|
+
end
|
171
|
+
|
172
|
+
###
|
173
|
+
# list all tables in the database
|
174
|
+
#
|
175
|
+
# array of strings
|
176
|
+
def list_tables
|
177
|
+
if @database
|
178
|
+
return @database.execute("select name from sqlite_master;").map { |t|
|
179
|
+
t.to_s
|
180
|
+
}
|
181
|
+
else
|
182
|
+
return nil
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
#####
|
187
|
+
# list_column_formats
|
188
|
+
#
|
189
|
+
# list column names and column types of this table
|
190
|
+
#
|
191
|
+
# returns: array:string*string, list of pairs [column name, column format]
|
192
|
+
def list_column_formats(table_name)
|
193
|
+
unless @database
|
194
|
+
return nil
|
195
|
+
end
|
196
|
+
|
197
|
+
table_descr = @database.execute("select * from sqlite_master where name=='#{table_name}';")
|
198
|
+
# this is an array of pieces of table description.
|
199
|
+
# the piece in the column called 'sql' is the 'create' statement.
|
200
|
+
# get the 'create' statement
|
201
|
+
begin
|
202
|
+
field_names = table_descr[0].fields
|
203
|
+
rescue
|
204
|
+
$stderr.puts "SQLite error: could not read description of table #{table_name}"
|
205
|
+
exit 1
|
206
|
+
end
|
207
|
+
create_index = (0..field_names.length).detect { |ix| field_names[ix] == 'sql' }
|
208
|
+
|
209
|
+
# try to parse column names out of the 'create' statement
|
210
|
+
if table_descr[0][create_index] =~ /^\s*create table \S+\s*\((.*)\)\s*$/i
|
211
|
+
# we now have something of shape ' a key varchar2(30), b varchar2(30)'
|
212
|
+
# split at the comma, remove whitespace at beginning and end
|
213
|
+
# then split again to get pairs [column name, column format]
|
214
|
+
return $1.split(",").map { |col_descrip|
|
215
|
+
pieces = col_descrip.strip.split.reject { |entry|
|
216
|
+
entry =~ /^key$/i or entry =~ /^primary$/i
|
217
|
+
}
|
218
|
+
if pieces.length > 2
|
219
|
+
$stderr.puts "Warning: problematic column format in #{col_descrip}, may be parsed wrong."
|
220
|
+
end
|
221
|
+
pieces
|
222
|
+
}
|
223
|
+
else
|
224
|
+
$stderr.puts "SQLite error: cannot read column names"
|
225
|
+
exit 1
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
####
|
230
|
+
# num_rows
|
231
|
+
#
|
232
|
+
# determine the number of rows in a table
|
233
|
+
# returns: integer or nil
|
234
|
+
def num_rows(table_name)
|
235
|
+
unless @database
|
236
|
+
return nil
|
237
|
+
end
|
238
|
+
|
239
|
+
rows_s = @database.get_first_value( "select count(*) from #{table_name}" )
|
240
|
+
if rows_s
|
241
|
+
return rows_s.to_i
|
242
|
+
else
|
243
|
+
return nil
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
####
|
248
|
+
# make a temporary table: make a table in a new, temporary file
|
249
|
+
#
|
250
|
+
# returns: DBWrapper object (or object of current subclass)
|
251
|
+
# that has the @table_name attribute set to the name of a temporary DB
|
252
|
+
#
|
253
|
+
# same as in superclass
|
254
|
+
#
|
255
|
+
# def make_temp_table(column_formats, # array: string*string [column_name,column_format]
|
256
|
+
# index_column_names, # array: string: column_name
|
257
|
+
# indexname) # string: name of autoincrement primary index
|
258
|
+
|
259
|
+
# temp_obj = self.clone()
|
260
|
+
# temp.initialize_temp_table(column_formats, index_column_names, indexname)
|
261
|
+
# return temp_obj
|
262
|
+
# end
|
263
|
+
|
264
|
+
def drop_temp_table
|
265
|
+
@tf.close(true)
|
266
|
+
@database = nil
|
267
|
+
end
|
268
|
+
|
269
|
+
##############################
|
270
|
+
protected
|
271
|
+
|
272
|
+
def initialize_temp_table(column_formats, index_column_names, indexname)
|
273
|
+
@table_name = "temptable"
|
274
|
+
@tf = Tempfile.new("temp_table")
|
275
|
+
@tf.close
|
276
|
+
@database = SQLite3::Database.new(@tf.path)
|
277
|
+
create_table(@table_name, column_formats, index_column_names, indexname)
|
278
|
+
end
|
279
|
+
|
280
|
+
end
|