frprep 0.0.1.prealpha
Sign up to get free protection for your applications and to get access to all the features.
- data/.yardopts +8 -0
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/AbstractSynInterface.rb +1227 -0
- data/lib/common/BerkeleyInterface.rb +375 -0
- data/lib/common/CollinsInterface.rb +1165 -0
- data/lib/common/ConfigData.rb +694 -0
- data/lib/common/Counter.rb +18 -0
- data/lib/common/DBInterface.rb +48 -0
- data/lib/common/EnduserMode.rb +27 -0
- data/lib/common/Eval.rb +480 -0
- data/lib/common/FixSynSemMapping.rb +196 -0
- data/lib/common/FrPrepConfigData.rb +66 -0
- data/lib/common/FrprepHelper.rb +1324 -0
- data/lib/common/Graph.rb +345 -0
- data/lib/common/ISO-8859-1.rb +24 -0
- data/lib/common/ML.rb +186 -0
- data/lib/common/Maxent.rb +215 -0
- data/lib/common/MiniparInterface.rb +1388 -0
- data/lib/common/Optimise.rb +195 -0
- data/lib/common/Parser.rb +213 -0
- data/lib/common/RegXML.rb +269 -0
- data/lib/common/RosyConventions.rb +171 -0
- data/lib/common/SQLQuery.rb +243 -0
- data/lib/common/STXmlTerminalOrder.rb +194 -0
- data/lib/common/SalsaTigerRegXML.rb +2347 -0
- data/lib/common/SalsaTigerXMLHelper.rb +99 -0
- data/lib/common/SleepyInterface.rb +384 -0
- data/lib/common/SynInterfaces.rb +275 -0
- data/lib/common/TabFormat.rb +720 -0
- data/lib/common/Tiger.rb +1448 -0
- data/lib/common/TntInterface.rb +44 -0
- data/lib/common/Tree.rb +61 -0
- data/lib/common/TreetaggerInterface.rb +303 -0
- data/lib/common/headz.rb +338 -0
- data/lib/common/option_parser.rb +13 -0
- data/lib/common/ruby_class_extensions.rb +310 -0
- data/lib/fred/Baseline.rb +150 -0
- data/lib/fred/FileZipped.rb +31 -0
- data/lib/fred/FredBOWContext.rb +863 -0
- data/lib/fred/FredConfigData.rb +182 -0
- data/lib/fred/FredConventions.rb +232 -0
- data/lib/fred/FredDetermineTargets.rb +324 -0
- data/lib/fred/FredEval.rb +312 -0
- data/lib/fred/FredFeatureExtractors.rb +321 -0
- data/lib/fred/FredFeatures.rb +1061 -0
- data/lib/fred/FredFeaturize.rb +596 -0
- data/lib/fred/FredNumTrainingSenses.rb +27 -0
- data/lib/fred/FredParameters.rb +402 -0
- data/lib/fred/FredSplit.rb +84 -0
- data/lib/fred/FredSplitPkg.rb +180 -0
- data/lib/fred/FredTest.rb +607 -0
- data/lib/fred/FredTrain.rb +144 -0
- data/lib/fred/PlotAndREval.rb +480 -0
- data/lib/fred/fred.rb +45 -0
- data/lib/fred/md5.rb +23 -0
- data/lib/fred/opt_parser.rb +250 -0
- data/lib/frprep/AbstractSynInterface.rb +1227 -0
- data/lib/frprep/Ampersand.rb +37 -0
- data/lib/frprep/BerkeleyInterface.rb +375 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/ConfigData.rb +694 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FixSynSemMapping.rb +196 -0
- data/lib/frprep/FrPrepConfigData.rb +66 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/FrprepHelper.rb +1324 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/ISO-8859-1.rb +24 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/Parser.rb +213 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
- data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/SynInterfaces.rb +275 -0
- data/lib/frprep/TabFormat.rb +720 -0
- data/lib/frprep/Tiger.rb +1448 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/Tree.rb +61 -0
- data/lib/frprep/TreetaggerInterface.rb +303 -0
- data/lib/frprep/do_parses.rb +142 -0
- data/lib/frprep/frprep.rb +686 -0
- data/lib/frprep/headz.rb +338 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
- data/lib/rosy/DBMySQL.rb +146 -0
- data/lib/rosy/DBSQLite.rb +280 -0
- data/lib/rosy/DBTable.rb +239 -0
- data/lib/rosy/DBWrapper.rb +176 -0
- data/lib/rosy/ExternalConfigData.rb +58 -0
- data/lib/rosy/FailedParses.rb +130 -0
- data/lib/rosy/FeatureInfo.rb +242 -0
- data/lib/rosy/GfInduce.rb +1115 -0
- data/lib/rosy/GfInduceFeature.rb +148 -0
- data/lib/rosy/InputData.rb +294 -0
- data/lib/rosy/RosyConfigData.rb +115 -0
- data/lib/rosy/RosyConfusability.rb +338 -0
- data/lib/rosy/RosyEval.rb +465 -0
- data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
- data/lib/rosy/RosyFeaturize.rb +280 -0
- data/lib/rosy/RosyInspect.rb +336 -0
- data/lib/rosy/RosyIterator.rb +477 -0
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
- data/lib/rosy/RosyPruning.rb +165 -0
- data/lib/rosy/RosyServices.rb +744 -0
- data/lib/rosy/RosySplit.rb +232 -0
- data/lib/rosy/RosyTask.rb +19 -0
- data/lib/rosy/RosyTest.rb +826 -0
- data/lib/rosy/RosyTrain.rb +232 -0
- data/lib/rosy/RosyTrainingTestTable.rb +786 -0
- data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
- data/lib/rosy/View.rb +418 -0
- data/lib/rosy/opt_parser.rb +379 -0
- data/lib/rosy/rosy.rb +77 -0
- data/lib/shalmaneser/version.rb +3 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +40 -0
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +52 -0
- data/test/functional/test_rosy.rb +20 -0
- metadata +270 -0
@@ -0,0 +1,171 @@
|
|
1
|
+
# RosyConventions
|
2
|
+
# KE May 05
|
3
|
+
#
|
4
|
+
# Conventions to be used throughout the Rosy system
|
5
|
+
# for greater consistency
|
6
|
+
|
7
|
+
require "common/ruby_class_extensions"
|
8
|
+
|
9
|
+
require "common/EnduserMode"
|
10
|
+
|
11
|
+
#################################################################
|
12
|
+
#################################################################
|
13
|
+
###
|
14
|
+
# value restriction (to pass on to a view):
|
15
|
+
# some column is restricted to be equal/inequal to some value
|
16
|
+
class ValueRestriction
|
17
|
+
|
18
|
+
attr_reader :val_is_variable, :table_name_included
|
19
|
+
|
20
|
+
###
|
21
|
+
# new(): store values
|
22
|
+
def initialize(column, # string: column name
|
23
|
+
value, # value this column is to be restricted to
|
24
|
+
var_hash = {}) # hash:additional settings. possible entries:
|
25
|
+
# posneg: string: "=" or "!=": equality or inequality restriction
|
26
|
+
# (default: =)
|
27
|
+
# table_name_included: boolean: is the table name aready included
|
28
|
+
# in the column name? default: false
|
29
|
+
|
30
|
+
@column = column
|
31
|
+
@value = value
|
32
|
+
|
33
|
+
@posneg = var_hash["posneg"]
|
34
|
+
if @posneg.nil?
|
35
|
+
# per default, equality restriction
|
36
|
+
@posneg = "="
|
37
|
+
else
|
38
|
+
unless ["=", "!="].include? @posneg
|
39
|
+
raise "posneg should be either '=' or '!='. I got: " + @posneg.to_s
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
@table_name_included = var_hash["table_name_included"]
|
44
|
+
if @table_name_included.nil?
|
45
|
+
# per default, the table name is not yet included
|
46
|
+
# in the column name
|
47
|
+
@table_name_included = false
|
48
|
+
end
|
49
|
+
|
50
|
+
# per default, value is a value and not another column name
|
51
|
+
@val_is_variable = false
|
52
|
+
end
|
53
|
+
|
54
|
+
###
|
55
|
+
# get(): returns a triple [column name(string), eq(string), value(object)]
|
56
|
+
def get()
|
57
|
+
return [@column, @posneg, @value]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
###
|
62
|
+
# value restrictions saying that variable1 = variable2:
|
63
|
+
# here, value is a variable name, and the table names
|
64
|
+
# must be already included
|
65
|
+
class VarVarRestriction < ValueRestriction
|
66
|
+
def initialize(column, value, var_hash={})
|
67
|
+
super(column, value, var_hash)
|
68
|
+
@val_is_variable = true
|
69
|
+
@table_name_included = true
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
#################################################################
|
74
|
+
#################################################################
|
75
|
+
# Table and column names to pass on to a view / SQLQuery:
|
76
|
+
# which DB table to access, which columns to view?
|
77
|
+
#
|
78
|
+
# table_obj: DBTable object or DBWrapper object, table to access.
|
79
|
+
# The important thing is that the object must have a table_name attribute.
|
80
|
+
# columns: string|array:string, list of column names, or "*" for all columns
|
81
|
+
|
82
|
+
SelectTableAndColumns = Struct.new("SelectTableAndColumns", :table_obj, :columns)
|
83
|
+
|
84
|
+
#################################################################
|
85
|
+
#################################################################
|
86
|
+
|
87
|
+
###
|
88
|
+
# transforming feature output to a format that classifiers can handle
|
89
|
+
def prepare_output_for_classifiers(string)
|
90
|
+
# change punctuation to _PUNCT_
|
91
|
+
# and change empty space to _
|
92
|
+
# because otherwise some classifiers may spit
|
93
|
+
return string.gsub(/[.":';`]/,"_PUNCT_").gsub(/\s/,"_")
|
94
|
+
end
|
95
|
+
|
96
|
+
#################################################################
|
97
|
+
#################################################################
|
98
|
+
|
99
|
+
###
|
100
|
+
# classifier directory:
|
101
|
+
# either user-given classifier_dir or our own default classifier directory,
|
102
|
+
# then argrec/arglab/onestep, plus the splitID, if there is one
|
103
|
+
def classifier_directory_name(exp, # RosyConfigData object
|
104
|
+
step, # argrec, arglab, onestep
|
105
|
+
splitID) # string or nil
|
106
|
+
|
107
|
+
if exp.get("classifier_dir")
|
108
|
+
base_dir = File.new_dir(exp.get("classifier_dir"))
|
109
|
+
else
|
110
|
+
base_dir = File.new_dir(exp.instantiate("rosy_dir",
|
111
|
+
"exp_ID" => exp.get("experiment_ID")))
|
112
|
+
end
|
113
|
+
classif_base_dir = File.new_dir(base_dir, "classif_dir")
|
114
|
+
|
115
|
+
if splitID
|
116
|
+
return File.new_dir(classif_base_dir, step + "." + splitID.to_s)
|
117
|
+
else
|
118
|
+
return File.new_dir(classif_base_dir, step)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
#################################################################
|
123
|
+
#################################################################
|
124
|
+
|
125
|
+
###
|
126
|
+
# instance ID: sentence ID plus frame ID
|
127
|
+
def construct_instance_id(sentence_id, frame_id)
|
128
|
+
return sentence_id.to_s + "---" + frame_id.to_s
|
129
|
+
end
|
130
|
+
|
131
|
+
def deconstruct_instance_id(instance_id)
|
132
|
+
return instance_id.split("---")
|
133
|
+
end
|
134
|
+
|
135
|
+
#################################################################
|
136
|
+
#################################################################
|
137
|
+
|
138
|
+
# default test ID given when the user didn't specify one
|
139
|
+
def default_test_ID()
|
140
|
+
return "apply"
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
#################################################################
|
145
|
+
#################################################################
|
146
|
+
|
147
|
+
###
|
148
|
+
# extend Array class by subsumption
|
149
|
+
module Subsumed
|
150
|
+
def subsumed_by?(array2)
|
151
|
+
temp = array2.clone()
|
152
|
+
self.each { |el|
|
153
|
+
found = false
|
154
|
+
temp.each_index { |ix|
|
155
|
+
if el == temp[ix]
|
156
|
+
temp.delete_at(ix)
|
157
|
+
found = true
|
158
|
+
break
|
159
|
+
end
|
160
|
+
}
|
161
|
+
unless found
|
162
|
+
return false
|
163
|
+
end
|
164
|
+
}
|
165
|
+
return true
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
class Array
|
170
|
+
include Subsumed
|
171
|
+
end
|
@@ -0,0 +1,243 @@
|
|
1
|
+
# class SQLQuery
|
2
|
+
# KE, SP 27.1.05
|
3
|
+
#
|
4
|
+
# provides static methods that generate SQL queries as strings
|
5
|
+
# that can then be passed on to the database
|
6
|
+
|
7
|
+
require "common/ruby_class_extensions"
|
8
|
+
|
9
|
+
require "common/RosyConventions"
|
10
|
+
|
11
|
+
class SQLQuery
|
12
|
+
|
13
|
+
|
14
|
+
#####
|
15
|
+
# SQLQuery.insert
|
16
|
+
#
|
17
|
+
# query created: insert a new row into a given database table
|
18
|
+
# the new row is given as a list of pairs [column_name, value]
|
19
|
+
#
|
20
|
+
# returns: string
|
21
|
+
def SQLQuery.insert(table_name, # string: table name
|
22
|
+
field_value_pairs) # array: string*object [column_name, cell_value]
|
23
|
+
|
24
|
+
# example:
|
25
|
+
# insert into table01 (field01,field02,field03,field04,field05) values
|
26
|
+
# (2, 'second', 'another', '1999-10-23', '10:30:00');
|
27
|
+
|
28
|
+
string = "INSERT INTO " + table_name + "("+
|
29
|
+
field_value_pairs.map { |column_name, cell_value|
|
30
|
+
column_name
|
31
|
+
}.join(",") +
|
32
|
+
") VALUES (" +
|
33
|
+
field_value_pairs.map { |column_name, cell_value|
|
34
|
+
if cell_value.nil?
|
35
|
+
raise "SQL query construction error: Nil value for column " + column_name
|
36
|
+
end
|
37
|
+
SQLQuery.stringify_value(cell_value)
|
38
|
+
}.join(",") + ");"
|
39
|
+
|
40
|
+
return string
|
41
|
+
end
|
42
|
+
|
43
|
+
#####
|
44
|
+
# SQLQuery.select
|
45
|
+
#
|
46
|
+
# query created: select from given database tables
|
47
|
+
# all column entries that conform to the given description:
|
48
|
+
# - names of the columns to be selected (or the string "*")
|
49
|
+
# - only those column entries where the row matches the given
|
50
|
+
# row restrictions: [column_name, column_value] => WHERE column_name IS column_value
|
51
|
+
# - optionally, at most N lines => LIMIT N
|
52
|
+
# - If more than one DB table is named, make a join
|
53
|
+
# - Value restrictions: If it doesn't say which DB table to use,
|
54
|
+
# use the first one listed in table_col_pairs
|
55
|
+
#
|
56
|
+
# Use with only one database table creates queries like e.g.
|
57
|
+
# SELECT column1, column2 FROM table WHERE column3=val3 AND column4!=val4
|
58
|
+
#
|
59
|
+
# or:
|
60
|
+
# SELECT DISTINCT column1, column2 FROM table WHERE column3=val3 AND column4!=val4 LIMIT 10
|
61
|
+
#
|
62
|
+
# Use with 2 SelectTableAndColumns entries creates queries like
|
63
|
+
# SELECT table1.column1, table1.column2 FROM table1, table2 WHERE table1.column1=val3 AND table1.id=table2.id
|
64
|
+
#
|
65
|
+
#
|
66
|
+
# returns: string.
|
67
|
+
# raises an error if no columns at all are selected
|
68
|
+
def SQLQuery.select(table_col_pairs, # Array: SelectTableAndColumns
|
69
|
+
row_restrictions, # array: ValueRestriction objects
|
70
|
+
var_hash = {}) # further parameters:
|
71
|
+
# line_limit: integer: select at most N lines. if nil, all lines are chosen
|
72
|
+
# distinct: boolean: return each tuple only once. if nil or false, duplicates are kept
|
73
|
+
|
74
|
+
if table_col_pairs.empty?
|
75
|
+
raise "Zero tables to select from"
|
76
|
+
end
|
77
|
+
|
78
|
+
## SELECT
|
79
|
+
string = "SELECT "
|
80
|
+
|
81
|
+
if var_hash["distinct"]
|
82
|
+
# unique return values?
|
83
|
+
string << "DISTINCT "
|
84
|
+
end
|
85
|
+
|
86
|
+
## column names to select: iterate through table/col pairs
|
87
|
+
at_least_one_column_selected = false
|
88
|
+
string << table_col_pairs.map { |tc|
|
89
|
+
|
90
|
+
if tc.columns == "*"
|
91
|
+
# all columns from this table
|
92
|
+
at_least_one_column_selected = true
|
93
|
+
SQLQuery.prepend_tablename(tc.table_obj.table_name, "*")
|
94
|
+
|
95
|
+
elsif tc.columns.class.to_s == "Array" and not(tc.columns.empty?)
|
96
|
+
# at least one column from this table
|
97
|
+
at_least_one_column_selected = true
|
98
|
+
|
99
|
+
tc.columns.map { |c|
|
100
|
+
if c.nil? or c.empty?
|
101
|
+
raise "Got nil/empty value within the column name list"
|
102
|
+
end
|
103
|
+
|
104
|
+
SQLQuery.prepend_tablename(tc.table_obj.table_name, c)
|
105
|
+
}.join(", " )
|
106
|
+
|
107
|
+
else
|
108
|
+
# no columns from this table
|
109
|
+
nil
|
110
|
+
end
|
111
|
+
}.compact.join(", ")
|
112
|
+
|
113
|
+
|
114
|
+
if not(at_least_one_column_selected)
|
115
|
+
raise "Empty select: zero columns selected"
|
116
|
+
end
|
117
|
+
|
118
|
+
## FROM table name(s)
|
119
|
+
string += " FROM " + table_col_pairs.map { |tc| tc.table_obj.table_name }.join(", ")
|
120
|
+
|
121
|
+
## WHERE row_restrictions
|
122
|
+
unless row_restrictions.nil? or row_restrictions.empty?
|
123
|
+
string += " WHERE "+row_restrictions.map { |restr_obj|
|
124
|
+
# get the actual restriction out of its object
|
125
|
+
# form: name(string) eqsymb(string: =, !=) value(object)
|
126
|
+
name, eqsymb, value = restr_obj.get()
|
127
|
+
if value.nil?
|
128
|
+
raise "SQL query construction error: Nil value for column " + name
|
129
|
+
end
|
130
|
+
unless restr_obj.val_is_variable
|
131
|
+
# value is a value, not a variable name
|
132
|
+
value = SQLQuery.stringify_value(value)
|
133
|
+
end
|
134
|
+
if restr_obj.table_name_included
|
135
|
+
# name already includes table name, if needed
|
136
|
+
name + eqsymb + value
|
137
|
+
else
|
138
|
+
# prepend name of first table in table_col_pairs
|
139
|
+
SQLQuery.prepend_tablename(table_col_pairs.first.table_obj.table_name(), name) + eqsymb + value
|
140
|
+
end
|
141
|
+
}.join(" AND ")
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
## LIMIT at_most_that_many_lines
|
146
|
+
if var_hash["line_limit"]
|
147
|
+
string += " LIMIT " + var_hash["line_limit"].to_s
|
148
|
+
end
|
149
|
+
string += ";"
|
150
|
+
|
151
|
+
return string
|
152
|
+
end
|
153
|
+
|
154
|
+
#####
|
155
|
+
# SQLQuery.update
|
156
|
+
#
|
157
|
+
# query created: overwrite several cells in possibly multiple rows of a
|
158
|
+
# database table with new values
|
159
|
+
# rows are selected via row restrictions
|
160
|
+
#
|
161
|
+
# returns: nothing
|
162
|
+
|
163
|
+
# update table01 set field04=19991022, field05=062218 where field01=1;
|
164
|
+
|
165
|
+
def SQLQuery.update(table_name, # string: table name
|
166
|
+
field_value_pairs, # array: string*Object: column name and value
|
167
|
+
row_restrictions # array: ValueRestriction objects: column name and value restriction
|
168
|
+
)
|
169
|
+
string = "UPDATE "+table_name+" SET "+
|
170
|
+
field_value_pairs.map {|field,value|
|
171
|
+
if value.nil?
|
172
|
+
raise "SQL query construction error: Nil value for column " + field
|
173
|
+
end
|
174
|
+
field+"="+SQLQuery.stringify_value(value)}.join(", ") +
|
175
|
+
" WHERE "+row_restrictions.map {|restr_obj|
|
176
|
+
# get the actual restriction out of its object
|
177
|
+
# form: name(string) eqsymb(string: =, !=) value(object)
|
178
|
+
name, eqsymb, value = restr_obj.get()
|
179
|
+
if value.nil?
|
180
|
+
raise "SQL query construction error: Nil value for column " + name
|
181
|
+
end
|
182
|
+
name + eqsymb + SQLQuery.stringify_value(value)
|
183
|
+
}.join(" AND ")
|
184
|
+
string += ";"
|
185
|
+
return string
|
186
|
+
end
|
187
|
+
|
188
|
+
|
189
|
+
#####
|
190
|
+
# SQLQuery.add_columns
|
191
|
+
#
|
192
|
+
# query created: extend given table by
|
193
|
+
# one or more columns given by their names and formats
|
194
|
+
#
|
195
|
+
# returns: string
|
196
|
+
def SQLQuery.add_columns(table_name, # string: table name
|
197
|
+
column_formats) # array: array: string*string [column_name,column_format]
|
198
|
+
|
199
|
+
string = "ALTER TABLE " + table_name
|
200
|
+
string << column_formats.map { |column_name, column_format|
|
201
|
+
" ADD COLUMN " + column_name + " " + column_format
|
202
|
+
}.join(", ")
|
203
|
+
|
204
|
+
string << ";"
|
205
|
+
|
206
|
+
return string
|
207
|
+
end
|
208
|
+
|
209
|
+
#####
|
210
|
+
# SQLQuery.stringify ensures that value is a properly
|
211
|
+
# escaped SQL string
|
212
|
+
#
|
213
|
+
# returns: string
|
214
|
+
def SQLQuery.stringify_value(value) # object
|
215
|
+
if value.class == String
|
216
|
+
return "'" + value.gsub(/"/,"QQUOT0").gsub(/'/, "QQUOT1").gsub(/`/, "QQUOT2") + "'"
|
217
|
+
else
|
218
|
+
return value.to_s
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
#####
|
223
|
+
# SQLQuery.unstringify undoes the result of stringify_value
|
224
|
+
# please apply only to strings
|
225
|
+
def SQLQuery.unstringify_value(value) # string
|
226
|
+
value.gsub(/QQUOT0/, '"').gsub(/QQUOT1/, "'").gsub(/QQUOT2/, "`")
|
227
|
+
end
|
228
|
+
|
229
|
+
####
|
230
|
+
# SQLQuery.prepend_tablename
|
231
|
+
#
|
232
|
+
# auxiliary method for select:
|
233
|
+
# prepend table name to column name
|
234
|
+
# and if the column name does not already include a table name
|
235
|
+
def SQLQuery.prepend_tablename(table_name,
|
236
|
+
column_name)
|
237
|
+
if not(column_name.include?("."))
|
238
|
+
return table_name + "." + column_name
|
239
|
+
else
|
240
|
+
return column_name
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
@@ -0,0 +1,194 @@
|
|
1
|
+
#########
|
2
|
+
# module StringTerminalsInRightOrder
|
3
|
+
#
|
4
|
+
# returns the yield of a node, or a list of nodes, as a string
|
5
|
+
# of " "-separated words
|
6
|
+
#
|
7
|
+
# Words are put into the right order, left to right,
|
8
|
+
# under the assumption that their node IDs reflect that order
|
9
|
+
#
|
10
|
+
# Terminal nodes are assumed to have IDs ending in a number,
|
11
|
+
# numbered from left to right
|
12
|
+
#
|
13
|
+
# Splitword nodes are assumed to have IDs ending in N_sM
|
14
|
+
# for numbers N and M, where N orders terminals left to right
|
15
|
+
# and M orders the splitword parts left to right
|
16
|
+
#
|
17
|
+
# If the yield of the node/the list of nodes contains all splitwords of a terminal,
|
18
|
+
# the whole terminal is taken instead
|
19
|
+
#
|
20
|
+
# methods:
|
21
|
+
#
|
22
|
+
# string_for_node returns the string for the yield of a node
|
23
|
+
# node: a node object
|
24
|
+
#
|
25
|
+
# string_for_nodes returns the string for the yield of a list of nodes
|
26
|
+
# nodes: a list of node objects
|
27
|
+
|
28
|
+
module StringTerminalsInRightOrder
|
29
|
+
def string_for_node(node)
|
30
|
+
string_for_nodes([node])
|
31
|
+
end
|
32
|
+
|
33
|
+
def string_for_nodes(nodes)
|
34
|
+
a = right_level_terminals_for_nodes(nodes)
|
35
|
+
a = sort_terminals_and_splitwords_left_to_right(a)
|
36
|
+
return node_array_to_string(a)
|
37
|
+
end
|
38
|
+
|
39
|
+
#####
|
40
|
+
private
|
41
|
+
|
42
|
+
# right_level_terminals_for_nodes:
|
43
|
+
# - compute the yield for each element of 'nodes'
|
44
|
+
# - then consider all splitwords in the yield:
|
45
|
+
# if all splitwords of a terminal are in the yield,
|
46
|
+
# then use the terminal rather than its splitwords
|
47
|
+
def right_level_terminals_for_nodes(nodes)
|
48
|
+
a = nodes.map { |n| n.yield_nodes()}.flatten
|
49
|
+
b = Array.new
|
50
|
+
a.each { |n|
|
51
|
+
if n.is_splitword?
|
52
|
+
# see if a contains all parts of this splitword
|
53
|
+
# if so, take into b the splitword's parent, the terminal,
|
54
|
+
# rather than the individual splitwords
|
55
|
+
|
56
|
+
if n.parent.nil?
|
57
|
+
# splitword without a parent
|
58
|
+
b << n
|
59
|
+
elsif b.include? n.parent or a.include? n.parent
|
60
|
+
# did we already include the splitword's parent in b?
|
61
|
+
# then we're done
|
62
|
+
else
|
63
|
+
|
64
|
+
# check if all children of n.parent are in 'a'
|
65
|
+
all_in = true
|
66
|
+
n.parent.each_child { |nsibling|
|
67
|
+
unless a.include? nsibling
|
68
|
+
all_in = false
|
69
|
+
break
|
70
|
+
end
|
71
|
+
}
|
72
|
+
|
73
|
+
if all_in
|
74
|
+
# yes, all children of n.parent are in 'a'
|
75
|
+
b << n.parent
|
76
|
+
else
|
77
|
+
# no, some sibling of n is not in 'a'
|
78
|
+
b << n
|
79
|
+
end
|
80
|
+
end
|
81
|
+
elsif n.is_terminal?
|
82
|
+
# n is a terminal
|
83
|
+
b << n
|
84
|
+
# if n is anything but a splitword or a terminal,
|
85
|
+
# ignore it
|
86
|
+
end
|
87
|
+
}
|
88
|
+
return b.uniq
|
89
|
+
end
|
90
|
+
|
91
|
+
# sort_terminals_and_splitwords_left_to_right:
|
92
|
+
# take an array of nodes that consists of terminals and splitwords
|
93
|
+
# and sort them using the following comparison:
|
94
|
+
# - when comparing two terminals, use the
|
95
|
+
# last numbers in their respective IDs
|
96
|
+
# - when comparing two splitwords, their IDs end in _N_sM
|
97
|
+
# for numbers N and M.
|
98
|
+
# If they coincide in N, compare them by M,
|
99
|
+
# else compare them by M
|
100
|
+
# - when comparing a terminal and a splitword,
|
101
|
+
# compare the terminal's last number to the splitword's N
|
102
|
+
def sort_terminals_and_splitwords_left_to_right(nodes)
|
103
|
+
nodes.sort { |a, b|
|
104
|
+
if a.is_splitword? and b.is_splitword?
|
105
|
+
compare_splitwords(a, b)
|
106
|
+
elsif a.is_terminal? and b.is_terminal?
|
107
|
+
compare_terminals(a, b)
|
108
|
+
else
|
109
|
+
compare_mixed(a, b)
|
110
|
+
end
|
111
|
+
}
|
112
|
+
end
|
113
|
+
|
114
|
+
# node_array_to_string:
|
115
|
+
# 'nodes' is an array of node objects, each of which offer a "word" method
|
116
|
+
# string their words together separated by " "
|
117
|
+
def node_array_to_string(nodes)
|
118
|
+
s = ""
|
119
|
+
nodes.each { |n|
|
120
|
+
s = s + n.word + " "
|
121
|
+
}
|
122
|
+
return s
|
123
|
+
end
|
124
|
+
|
125
|
+
# - when comparing two terminals, use the
|
126
|
+
# last numbers in their respective IDs
|
127
|
+
def compare_terminals(a, b)
|
128
|
+
last_i(a) <=> last_i(b)
|
129
|
+
end
|
130
|
+
|
131
|
+
# - when comparing two splitwords, their IDs end in _N_sM
|
132
|
+
# for numbers N and M.
|
133
|
+
# If they coincide in N, compare them by M,
|
134
|
+
# else compare them by M
|
135
|
+
def compare_splitwords(a, b)
|
136
|
+
if splitword_terminal_i(a) == splitword_terminal_i(b)
|
137
|
+
# parts of same terminal?
|
138
|
+
# compare parts
|
139
|
+
last_i(a) <=> last_i(b)
|
140
|
+
else
|
141
|
+
# not parts of same terminal?
|
142
|
+
# compare terminals
|
143
|
+
splitword_terminal_i(a) <=> splitword_terminal_i(b)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# - when comparing a terminal and a splitword,
|
148
|
+
# compare the terminal's last number to the splitword's N
|
149
|
+
def compare_mixed(a, b)
|
150
|
+
if a.is_splitword? and b.is_terminal?
|
151
|
+
splitword_terminal_i(a) <=> last_i(b)
|
152
|
+
|
153
|
+
elsif a.is_terminal? and b.is_splitword?
|
154
|
+
last_i(a) <=> splitword_terminal_i(b)
|
155
|
+
else
|
156
|
+
# not one terminal, one splitword?
|
157
|
+
# then what?
|
158
|
+
$stderr.print "SalsaTigerSentence, compare_mixed: confused by "
|
159
|
+
$stderr.print a.id, ", ", b.id, "\n"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# return last number of the ID of a node
|
164
|
+
def last_i(n)
|
165
|
+
n.id =~ /(\d+)$/ # match final string of digits
|
166
|
+
if $1.nil? # if shouldn't happen _in principle_
|
167
|
+
# but we might get weird node IDs for splitwords;
|
168
|
+
# so we act gracefully and catch the case where there
|
169
|
+
# is one final letter behind the digits
|
170
|
+
n.id =~ /(\d+)\w$/
|
171
|
+
end
|
172
|
+
if $1.nil? # this shouldn't ever happen
|
173
|
+
$stderr.print "SalsaTigerSentence, last_i: Couldn't extract digits from: "
|
174
|
+
$stderr.print n.id, "\n"
|
175
|
+
exit 1
|
176
|
+
end
|
177
|
+
return $1.to_i # and return it as number
|
178
|
+
end
|
179
|
+
|
180
|
+
# assume the ID of the node includes N_sM
|
181
|
+
# return N
|
182
|
+
def splitword_terminal_i(n)
|
183
|
+
n.id =~ /(\d+)_s\d*/ # match string of digits before splitword ID
|
184
|
+
if $1.nil? # this shouldn't ever happen
|
185
|
+
$stderr.print "SalsaTigerSentence, splitword_terminal_i: Couldn't extract digits from: "
|
186
|
+
$stderr.print n.id, "\n"
|
187
|
+
exit 1
|
188
|
+
end
|
189
|
+
return $1.to_i # and return it as number
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
193
|
+
|
194
|
+
|