shalmaneser-lib 1.2.rc5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +10 -0
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +122 -0
  6. data/lib/configuration/config_data.rb +457 -0
  7. data/lib/configuration/config_format_element.rb +210 -0
  8. data/lib/configuration/configuration_error.rb +15 -0
  9. data/lib/configuration/external_config_data.rb +56 -0
  10. data/lib/configuration/frappe_config_data.rb +134 -0
  11. data/lib/configuration/fred_config_data.rb +199 -0
  12. data/lib/configuration/rosy_config_data.rb +126 -0
  13. data/lib/db/db_interface.rb +50 -0
  14. data/lib/db/db_mysql.rb +141 -0
  15. data/lib/db/db_sqlite.rb +280 -0
  16. data/lib/db/db_table.rb +237 -0
  17. data/lib/db/db_view.rb +416 -0
  18. data/lib/db/db_wrapper.rb +175 -0
  19. data/lib/db/select_table_and_columns.rb +10 -0
  20. data/lib/db/sql_query.rb +243 -0
  21. data/lib/definitions.rb +19 -0
  22. data/lib/eval.rb +482 -0
  23. data/lib/ext/maxent/Classify.class +0 -0
  24. data/lib/ext/maxent/Train.class +0 -0
  25. data/lib/external_systems.rb +251 -0
  26. data/lib/framenet_format/fn_corpus_aset.rb +209 -0
  27. data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
  28. data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
  29. data/lib/framenet_format/fn_database.rb +143 -0
  30. data/lib/framenet_format/frame_xml_file.rb +104 -0
  31. data/lib/framenet_format/frame_xml_sentence.rb +411 -0
  32. data/lib/logging.rb +25 -0
  33. data/lib/ml/classifier.rb +189 -0
  34. data/lib/ml/mallet.rb +236 -0
  35. data/lib/ml/maxent.rb +229 -0
  36. data/lib/ml/optimize.rb +195 -0
  37. data/lib/ml/timbl.rb +140 -0
  38. data/lib/monkey_patching/array.rb +82 -0
  39. data/lib/monkey_patching/enumerable_bool.rb +24 -0
  40. data/lib/monkey_patching/enumerable_distribute.rb +18 -0
  41. data/lib/monkey_patching/file.rb +131 -0
  42. data/lib/monkey_patching/subsumed.rb +24 -0
  43. data/lib/ruby_class_extensions.rb +4 -0
  44. data/lib/salsa_tiger_xml/corpus.rb +24 -0
  45. data/lib/salsa_tiger_xml/fe_node.rb +98 -0
  46. data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
  47. data/lib/salsa_tiger_xml/frame_node.rb +145 -0
  48. data/lib/salsa_tiger_xml/graph_node.rb +347 -0
  49. data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
  50. data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
  51. data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
  52. data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
  53. data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
  54. data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
  55. data/lib/salsa_tiger_xml/sem_node.rb +58 -0
  56. data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
  57. data/lib/salsa_tiger_xml/syn_node.rb +169 -0
  58. data/lib/salsa_tiger_xml/tree_node.rb +59 -0
  59. data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
  60. data/lib/salsa_tiger_xml/usp_node.rb +72 -0
  61. data/lib/salsa_tiger_xml/xml_node.rb +163 -0
  62. data/lib/shalmaneser/lib.rb +1 -0
  63. data/lib/tabular_format/fn_tab_format_file.rb +38 -0
  64. data/lib/tabular_format/fn_tab_frame.rb +67 -0
  65. data/lib/tabular_format/fn_tab_sentence.rb +169 -0
  66. data/lib/tabular_format/tab_format_file.rb +91 -0
  67. data/lib/tabular_format/tab_format_named_args.rb +184 -0
  68. data/lib/tabular_format/tab_format_sentence.rb +119 -0
  69. data/lib/value_restriction.rb +49 -0
  70. metadata +131 -0
@@ -0,0 +1,175 @@
1
+ ###########################
2
+ # DBWrapper:
3
+ # abstract class wrapping database interfaces,
4
+ # so we can have both an interface to an SQL server
5
+ # and an interface to SQLite in Shalmaneser
6
+ class DBWrapper
7
+ attr_reader :table_name
8
+
9
+ ###
10
+ def initialize(exp) # RosyConfigData experiment file object
11
+ # remember experiment file
12
+ @exp = exp
13
+
14
+ # open the database:
15
+ # please set to some other value in subclass initialization
16
+ @database = nil
17
+
18
+ # name of default table to access: none
19
+ @table_name = nil
20
+ end
21
+
22
+ ###
23
+ # close DB access
24
+ def close
25
+ @database.close
26
+ end
27
+
28
+ ####
29
+ # querying the database:
30
+ # returns an DBResult object
31
+ def query(query)
32
+ raise "Overwrite me"
33
+ end
34
+
35
+ ####
36
+ # querying the database:
37
+ # no result value
38
+ def query_noretv(query)
39
+ raise "Overwrite me"
40
+ end
41
+
42
+ ###
43
+ # list all tables in the database:
44
+ # no default here
45
+ #
46
+ # returns: list of strings
47
+ def list_tables
48
+ raise "Overwrite me"
49
+ end
50
+
51
+ ###
52
+ # make a table
53
+ #
54
+ # returns: nothing
55
+ def create_table(table_name, # string
56
+ column_formats, # array: array: string*string [column_name,column_format]
57
+ index_column_names, # array: string: column_name
58
+ indexname) # string: name of automatically created index column
59
+ raise "overwrite me"
60
+ end
61
+
62
+ ###
63
+ # remove a table
64
+ def drop_table(table_name)
65
+ query_noretv("DROP TABLE " + table_name)
66
+ end
67
+
68
+ ###
69
+ # list all column names of a table (no default)
70
+ #
71
+ # returns: array of strings
72
+ def list_column_names(table_name)
73
+ return list_column_formats(table_name).map { |col_name, col_format| col_name }
74
+ end
75
+
76
+ #####
77
+ # list_column_formats
78
+ #
79
+ # list column names and column types of this table
80
+ #
81
+ # returns: array:string*string, list of pairs [column name, column format]
82
+ def list_column_formats(table_name)
83
+ raise "Overwrite me"
84
+ end
85
+
86
+ ####
87
+ # num_rows
88
+ #
89
+ # determine the number of rows in a table
90
+ # returns: integer
91
+ def num_rows(table_name)
92
+ raise "Overwrite me"
93
+ end
94
+
95
+ ####
96
+ # make a temporary table: basically just make a table
97
+ #
98
+ # returns: DBWrapper object (or object of current subclass)
99
+ # that has the @table_name attribute set to the name of a temporary DB
100
+ def make_temp_table(column_formats, # array: string*string [column_name,column_format]
101
+ index_column_names, # array: string: column_name
102
+ indexname) # string: name of autoincrement primary index
103
+
104
+ temp_obj = self.clone
105
+ temp_obj.initialize_temp_table(column_formats, index_column_names, indexname)
106
+ return temp_obj
107
+ end
108
+
109
+ def drop_temp_table
110
+ unless @table_name
111
+ raise "can only do drop_temp_table() for objects that have a temp table"
112
+ end
113
+ drop_table(@table_name)
114
+ end
115
+
116
+ ##############################
117
+ protected
118
+
119
+ def initialize_temp_table(column_formats, index_column_names, indexname)
120
+ @table_name = "t" + Time.new.to_f.to_s.gsub(/\./, "")
121
+ create_table(@table_name, column_formats, index_column_names, indexname)
122
+ end
123
+ end
124
+
125
+
126
+
127
+
128
+ ######################################################################
129
+ # DBResult:
130
+ # abstract class keeping query results
131
+ #
132
+ # instantiate for the DB package used
133
+ class DBResult
134
+ ###
135
+ # initialize with query result, and keep it
136
+ def initialize(value)
137
+ @result = value
138
+ end
139
+
140
+ # column names: NO DEFAULT
141
+ def list_column_names
142
+ raise "Overwrite me"
143
+ end
144
+
145
+ # number of rows: returns an integer
146
+ def num_rows
147
+ return @result.num_rows
148
+ end
149
+
150
+ # yields each row as an array of values
151
+ def each
152
+ @result.each { |row| yield row }
153
+ end
154
+
155
+ # yields each row as a hash: column name=> column value
156
+ def each_hash
157
+ @result.each_hash { |row_hash| yield row_hash }
158
+ end
159
+
160
+ # reset object, such that each() can be run again
161
+ # DEFAULT DOES NOTHING, PLEASE OVERWRITE
162
+ def reset
163
+ end
164
+
165
+ # free result object
166
+ def free
167
+ @result.free
168
+ end
169
+
170
+ # returns row as an array of column contents
171
+ def fetch_row
172
+ return @result.fetch_row
173
+ end
174
+
175
+ end
@@ -0,0 +1,10 @@
1
+ #################################################################
2
+ #################################################################
3
+ # Table and column names to pass on to a view / SQLQuery:
4
+ # which DB table to access, which columns to view?
5
+ #
6
+ # table_obj: DBTable object or DBWrapper object, table to access.
7
+ # The important thing is that the object must have a table_name attribute.
8
+ # columns: string|array:string, list of column names, or "*" for all columns
9
+
10
+ SelectTableAndColumns = Struct.new("SelectTableAndColumns", :table_obj, :columns)
@@ -0,0 +1,243 @@
1
+ # class SQLQuery
2
+ # KE, SP 27.1.05
3
+ #
4
+ # provides static methods that generate SQL queries as strings
5
+ # that can then be passed on to the database
6
+
7
+ require "ruby_class_extensions"
8
+
9
+ # require "RosyConventions"
10
+
11
+ class SQLQuery
12
+
13
+
14
+ #####
15
+ # SQLQuery.insert
16
+ #
17
+ # query created: insert a new row into a given database table
18
+ # the new row is given as a list of pairs [column_name, value]
19
+ #
20
+ # returns: string
21
+ def SQLQuery.insert(table_name, # string: table name
22
+ field_value_pairs) # array: string*object [column_name, cell_value]
23
+
24
+ # example:
25
+ # insert into table01 (field01,field02,field03,field04,field05) values
26
+ # (2, 'second', 'another', '1999-10-23', '10:30:00');
27
+
28
+ string = "INSERT INTO " + table_name + "("+
29
+ field_value_pairs.map { |column_name, cell_value|
30
+ column_name
31
+ }.join(",") +
32
+ ") VALUES (" +
33
+ field_value_pairs.map { |column_name, cell_value|
34
+ if cell_value.nil?
35
+ raise "SQL query construction error: Nil value for column " + column_name
36
+ end
37
+ SQLQuery.stringify_value(cell_value)
38
+ }.join(",") + ");"
39
+
40
+ return string
41
+ end
42
+
43
+ #####
44
+ # SQLQuery.select
45
+ #
46
+ # query created: select from given database tables
47
+ # all column entries that conform to the given description:
48
+ # - names of the columns to be selected (or the string "*")
49
+ # - only those column entries where the row matches the given
50
+ # row restrictions: [column_name, column_value] => WHERE column_name IS column_value
51
+ # - optionally, at most N lines => LIMIT N
52
+ # - If more than one DB table is named, make a join
53
+ # - Value restrictions: If it doesn't say which DB table to use,
54
+ # use the first one listed in table_col_pairs
55
+ #
56
+ # Use with only one database table creates queries like e.g.
57
+ # SELECT column1, column2 FROM table WHERE column3=val3 AND column4!=val4
58
+ #
59
+ # or:
60
+ # SELECT DISTINCT column1, column2 FROM table WHERE column3=val3 AND column4!=val4 LIMIT 10
61
+ #
62
+ # Use with 2 SelectTableAndColumns entries creates queries like
63
+ # SELECT table1.column1, table1.column2 FROM table1, table2 WHERE table1.column1=val3 AND table1.id=table2.id
64
+ #
65
+ #
66
+ # returns: string.
67
+ # raises an error if no columns at all are selected
68
+ def SQLQuery.select(table_col_pairs, # Array: SelectTableAndColumns
69
+ row_restrictions, # array: ValueRestriction objects
70
+ var_hash = {}) # further parameters:
71
+ # line_limit: integer: select at most N lines. if nil, all lines are chosen
72
+ # distinct: boolean: return each tuple only once. if nil or false, duplicates are kept
73
+
74
+ if table_col_pairs.empty?
75
+ raise "Zero tables to select from"
76
+ end
77
+
78
+ ## SELECT
79
+ string = "SELECT "
80
+
81
+ if var_hash["distinct"]
82
+ # unique return values?
83
+ string << "DISTINCT "
84
+ end
85
+
86
+ ## column names to select: iterate through table/col pairs
87
+ at_least_one_column_selected = false
88
+ string << table_col_pairs.map { |tc|
89
+
90
+ if tc.columns == "*"
91
+ # all columns from this table
92
+ at_least_one_column_selected = true
93
+ SQLQuery.prepend_tablename(tc.table_obj.table_name, "*")
94
+
95
+ elsif tc.columns.is_a?(Array) and not(tc.columns.empty?)
96
+ # at least one column from this table
97
+ at_least_one_column_selected = true
98
+
99
+ tc.columns.map { |c|
100
+ if c.nil? or c.empty?
101
+ raise "Got nil/empty value within the column name list"
102
+ end
103
+
104
+ SQLQuery.prepend_tablename(tc.table_obj.table_name, c)
105
+ }.join(", " )
106
+
107
+ else
108
+ # no columns from this table
109
+ nil
110
+ end
111
+ }.compact.join(", ")
112
+
113
+
114
+ if not(at_least_one_column_selected)
115
+ raise "Empty select: zero columns selected"
116
+ end
117
+
118
+ ## FROM table name(s)
119
+ string += " FROM " + table_col_pairs.map { |tc| tc.table_obj.table_name }.join(", ")
120
+
121
+ ## WHERE row_restrictions
122
+ unless row_restrictions.nil? or row_restrictions.empty?
123
+ string += " WHERE "+row_restrictions.map { |restr_obj|
124
+ # get the actual restriction out of its object
125
+ # form: name(string) eqsymb(string: =, !=) value(object)
126
+ name, eqsymb, value = restr_obj.get
127
+ if value.nil?
128
+ raise "SQL query construction error: Nil value for column " + name
129
+ end
130
+ unless restr_obj.val_is_variable
131
+ # value is a value, not a variable name
132
+ value = SQLQuery.stringify_value(value)
133
+ end
134
+ if restr_obj.table_name_included
135
+ # name already includes table name, if needed
136
+ name + eqsymb + value
137
+ else
138
+ # prepend name of first table in table_col_pairs
139
+ SQLQuery.prepend_tablename(table_col_pairs.first.table_obj.table_name, name) + eqsymb + value
140
+ end
141
+ }.join(" AND ")
142
+ end
143
+
144
+
145
+ ## LIMIT at_most_that_many_lines
146
+ if var_hash["line_limit"]
147
+ string += " LIMIT " + var_hash["line_limit"].to_s
148
+ end
149
+ string += ";"
150
+
151
+ return string
152
+ end
153
+
154
+ #####
155
+ # SQLQuery.update
156
+ #
157
+ # query created: overwrite several cells in possibly multiple rows of a
158
+ # database table with new values
159
+ # rows are selected via row restrictions
160
+ #
161
+ # returns: nothing
162
+
163
+ # update table01 set field04=19991022, field05=062218 where field01=1;
164
+
165
+ def SQLQuery.update(table_name, # string: table name
166
+ field_value_pairs, # array: string*Object: column name and value
167
+ row_restrictions # array: ValueRestriction objects: column name and value restriction
168
+ )
169
+ string = "UPDATE "+table_name+" SET "+
170
+ field_value_pairs.map {|field,value|
171
+ if value.nil?
172
+ raise "SQL query construction error: Nil value for column " + field
173
+ end
174
+ field+"="+SQLQuery.stringify_value(value)}.join(", ") +
175
+ " WHERE "+row_restrictions.map {|restr_obj|
176
+ # get the actual restriction out of its object
177
+ # form: name(string) eqsymb(string: =, !=) value(object)
178
+ name, eqsymb, value = restr_obj.get
179
+ if value.nil?
180
+ raise "SQL query construction error: Nil value for column " + name
181
+ end
182
+ name + eqsymb + SQLQuery.stringify_value(value)
183
+ }.join(" AND ")
184
+ string += ";"
185
+ return string
186
+ end
187
+
188
+
189
+ #####
190
+ # SQLQuery.add_columns
191
+ #
192
+ # query created: extend given table by
193
+ # one or more columns given by their names and formats
194
+ #
195
+ # returns: string
196
+ def SQLQuery.add_columns(table_name, # string: table name
197
+ column_formats) # array: array: string*string [column_name,column_format]
198
+
199
+ string = "ALTER TABLE " + table_name
200
+ string << column_formats.map { |column_name, column_format|
201
+ " ADD COLUMN " + column_name + " " + column_format
202
+ }.join(", ")
203
+
204
+ string << ";"
205
+
206
+ return string
207
+ end
208
+
209
+ #####
210
+ # SQLQuery.stringify ensures that value is a properly
211
+ # escaped SQL string
212
+ #
213
+ # returns: string
214
+ def SQLQuery.stringify_value(value) # object
215
+ if value.class == String
216
+ return "'" + value.gsub(/"/,"QQUOT0").gsub(/'/, "QQUOT1").gsub(/`/, "QQUOT2") + "'"
217
+ else
218
+ return value.to_s
219
+ end
220
+ end
221
+
222
+ #####
223
+ # SQLQuery.unstringify undoes the result of stringify_value
224
+ # please apply only to strings
225
+ def SQLQuery.unstringify_value(value) # string
226
+ value.gsub(/QQUOT0/, '"').gsub(/QQUOT1/, "'").gsub(/QQUOT2/, "`")
227
+ end
228
+
229
+ ####
230
+ # SQLQuery.prepend_tablename
231
+ #
232
+ # auxiliary method for select:
233
+ # prepend table name to column name
234
+ # and if the column name does not already include a table name
235
+ def SQLQuery.prepend_tablename(table_name,
236
+ column_name)
237
+ if not(column_name.include?("."))
238
+ return table_name + "." + column_name
239
+ else
240
+ return column_name
241
+ end
242
+ end
243
+ end
@@ -0,0 +1,19 @@
1
+ module Shalmaneser
2
+ LICENSE = 'GPL-2.0'
3
+ module Frappe
4
+ PROGRAM_NAME = 'Frappe'
5
+ VERSION = '1.2.rc5'
6
+ end
7
+ module Fred
8
+ PROGRAM_NAME = 'Fred'
9
+ VERSION = '1.2.rc5'
10
+ end
11
+ module Rosy
12
+ PROGRAM_NAME = 'Rosy'
13
+ VERSION = '1.2.rc5'
14
+ end
15
+ module Shalmaneser
16
+ PROGRAM_NAME = 'Shalmaneser'
17
+ VERSION = '1.2.rc5'
18
+ end
19
+ end