frprep 0.0.1.prealpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +8 -0
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/AbstractSynInterface.rb +1227 -0
- data/lib/common/BerkeleyInterface.rb +375 -0
- data/lib/common/CollinsInterface.rb +1165 -0
- data/lib/common/ConfigData.rb +694 -0
- data/lib/common/Counter.rb +18 -0
- data/lib/common/DBInterface.rb +48 -0
- data/lib/common/EnduserMode.rb +27 -0
- data/lib/common/Eval.rb +480 -0
- data/lib/common/FixSynSemMapping.rb +196 -0
- data/lib/common/FrPrepConfigData.rb +66 -0
- data/lib/common/FrprepHelper.rb +1324 -0
- data/lib/common/Graph.rb +345 -0
- data/lib/common/ISO-8859-1.rb +24 -0
- data/lib/common/ML.rb +186 -0
- data/lib/common/Maxent.rb +215 -0
- data/lib/common/MiniparInterface.rb +1388 -0
- data/lib/common/Optimise.rb +195 -0
- data/lib/common/Parser.rb +213 -0
- data/lib/common/RegXML.rb +269 -0
- data/lib/common/RosyConventions.rb +171 -0
- data/lib/common/SQLQuery.rb +243 -0
- data/lib/common/STXmlTerminalOrder.rb +194 -0
- data/lib/common/SalsaTigerRegXML.rb +2347 -0
- data/lib/common/SalsaTigerXMLHelper.rb +99 -0
- data/lib/common/SleepyInterface.rb +384 -0
- data/lib/common/SynInterfaces.rb +275 -0
- data/lib/common/TabFormat.rb +720 -0
- data/lib/common/Tiger.rb +1448 -0
- data/lib/common/TntInterface.rb +44 -0
- data/lib/common/Tree.rb +61 -0
- data/lib/common/TreetaggerInterface.rb +303 -0
- data/lib/common/headz.rb +338 -0
- data/lib/common/option_parser.rb +13 -0
- data/lib/common/ruby_class_extensions.rb +310 -0
- data/lib/fred/Baseline.rb +150 -0
- data/lib/fred/FileZipped.rb +31 -0
- data/lib/fred/FredBOWContext.rb +863 -0
- data/lib/fred/FredConfigData.rb +182 -0
- data/lib/fred/FredConventions.rb +232 -0
- data/lib/fred/FredDetermineTargets.rb +324 -0
- data/lib/fred/FredEval.rb +312 -0
- data/lib/fred/FredFeatureExtractors.rb +321 -0
- data/lib/fred/FredFeatures.rb +1061 -0
- data/lib/fred/FredFeaturize.rb +596 -0
- data/lib/fred/FredNumTrainingSenses.rb +27 -0
- data/lib/fred/FredParameters.rb +402 -0
- data/lib/fred/FredSplit.rb +84 -0
- data/lib/fred/FredSplitPkg.rb +180 -0
- data/lib/fred/FredTest.rb +607 -0
- data/lib/fred/FredTrain.rb +144 -0
- data/lib/fred/PlotAndREval.rb +480 -0
- data/lib/fred/fred.rb +45 -0
- data/lib/fred/md5.rb +23 -0
- data/lib/fred/opt_parser.rb +250 -0
- data/lib/frprep/AbstractSynInterface.rb +1227 -0
- data/lib/frprep/Ampersand.rb +37 -0
- data/lib/frprep/BerkeleyInterface.rb +375 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/ConfigData.rb +694 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FixSynSemMapping.rb +196 -0
- data/lib/frprep/FrPrepConfigData.rb +66 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/FrprepHelper.rb +1324 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/ISO-8859-1.rb +24 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/Parser.rb +213 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
- data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/SynInterfaces.rb +275 -0
- data/lib/frprep/TabFormat.rb +720 -0
- data/lib/frprep/Tiger.rb +1448 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/Tree.rb +61 -0
- data/lib/frprep/TreetaggerInterface.rb +303 -0
- data/lib/frprep/do_parses.rb +142 -0
- data/lib/frprep/frprep.rb +686 -0
- data/lib/frprep/headz.rb +338 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
- data/lib/rosy/DBMySQL.rb +146 -0
- data/lib/rosy/DBSQLite.rb +280 -0
- data/lib/rosy/DBTable.rb +239 -0
- data/lib/rosy/DBWrapper.rb +176 -0
- data/lib/rosy/ExternalConfigData.rb +58 -0
- data/lib/rosy/FailedParses.rb +130 -0
- data/lib/rosy/FeatureInfo.rb +242 -0
- data/lib/rosy/GfInduce.rb +1115 -0
- data/lib/rosy/GfInduceFeature.rb +148 -0
- data/lib/rosy/InputData.rb +294 -0
- data/lib/rosy/RosyConfigData.rb +115 -0
- data/lib/rosy/RosyConfusability.rb +338 -0
- data/lib/rosy/RosyEval.rb +465 -0
- data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
- data/lib/rosy/RosyFeaturize.rb +280 -0
- data/lib/rosy/RosyInspect.rb +336 -0
- data/lib/rosy/RosyIterator.rb +477 -0
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
- data/lib/rosy/RosyPruning.rb +165 -0
- data/lib/rosy/RosyServices.rb +744 -0
- data/lib/rosy/RosySplit.rb +232 -0
- data/lib/rosy/RosyTask.rb +19 -0
- data/lib/rosy/RosyTest.rb +826 -0
- data/lib/rosy/RosyTrain.rb +232 -0
- data/lib/rosy/RosyTrainingTestTable.rb +786 -0
- data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
- data/lib/rosy/View.rb +418 -0
- data/lib/rosy/opt_parser.rb +379 -0
- data/lib/rosy/rosy.rb +77 -0
- data/lib/shalmaneser/version.rb +3 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +40 -0
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +52 -0
- data/test/functional/test_rosy.rb +20 -0
- metadata +270 -0
@@ -0,0 +1,171 @@
|
|
1
|
+
# RosyConventions
|
2
|
+
# KE May 05
|
3
|
+
#
|
4
|
+
# Conventions to be used throughout the Rosy system
|
5
|
+
# for greater consistency
|
6
|
+
|
7
|
+
require "common/ruby_class_extensions"
|
8
|
+
|
9
|
+
require "common/EnduserMode"
|
10
|
+
|
11
|
+
#################################################################
|
12
|
+
#################################################################
|
13
|
+
###
|
14
|
+
# value restriction (to pass on to a view):
|
15
|
+
# some column is restricted to be equal/inequal to some value
|
16
|
+
class ValueRestriction
|
17
|
+
|
18
|
+
attr_reader :val_is_variable, :table_name_included
|
19
|
+
|
20
|
+
###
|
21
|
+
# new(): store values
|
22
|
+
def initialize(column, # string: column name
|
23
|
+
value, # value this column is to be restricted to
|
24
|
+
var_hash = {}) # hash:additional settings. possible entries:
|
25
|
+
# posneg: string: "=" or "!=": equality or inequality restriction
|
26
|
+
# (default: =)
|
27
|
+
# table_name_included: boolean: is the table name aready included
|
28
|
+
# in the column name? default: false
|
29
|
+
|
30
|
+
@column = column
|
31
|
+
@value = value
|
32
|
+
|
33
|
+
@posneg = var_hash["posneg"]
|
34
|
+
if @posneg.nil?
|
35
|
+
# per default, equality restriction
|
36
|
+
@posneg = "="
|
37
|
+
else
|
38
|
+
unless ["=", "!="].include? @posneg
|
39
|
+
raise "posneg should be either '=' or '!='. I got: " + @posneg.to_s
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
@table_name_included = var_hash["table_name_included"]
|
44
|
+
if @table_name_included.nil?
|
45
|
+
# per default, the table name is not yet included
|
46
|
+
# in the column name
|
47
|
+
@table_name_included = false
|
48
|
+
end
|
49
|
+
|
50
|
+
# per default, value is a value and not another column name
|
51
|
+
@val_is_variable = false
|
52
|
+
end
|
53
|
+
|
54
|
+
###
|
55
|
+
# get(): returns a triple [column name(string), eq(string), value(object)]
|
56
|
+
def get()
|
57
|
+
return [@column, @posneg, @value]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
###
|
62
|
+
# value restrictions saying that variable1 = variable2:
|
63
|
+
# here, value is a variable name, and the table names
|
64
|
+
# must be already included
|
65
|
+
class VarVarRestriction < ValueRestriction
|
66
|
+
def initialize(column, value, var_hash={})
|
67
|
+
super(column, value, var_hash)
|
68
|
+
@val_is_variable = true
|
69
|
+
@table_name_included = true
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
#################################################################
|
74
|
+
#################################################################
|
75
|
+
# Table and column names to pass on to a view / SQLQuery:
|
76
|
+
# which DB table to access, which columns to view?
|
77
|
+
#
|
78
|
+
# table_obj: DBTable object or DBWrapper object, table to access.
|
79
|
+
# The important thing is that the object must have a table_name attribute.
|
80
|
+
# columns: string|array:string, list of column names, or "*" for all columns
|
81
|
+
|
82
|
+
SelectTableAndColumns = Struct.new("SelectTableAndColumns", :table_obj, :columns)
|
83
|
+
|
84
|
+
#################################################################
|
85
|
+
#################################################################
|
86
|
+
|
87
|
+
###
|
88
|
+
# transforming feature output to a format that classifiers can handle
|
89
|
+
def prepare_output_for_classifiers(string)
|
90
|
+
# change punctuation to _PUNCT_
|
91
|
+
# and change empty space to _
|
92
|
+
# because otherwise some classifiers may spit
|
93
|
+
return string.gsub(/[.":';`]/,"_PUNCT_").gsub(/\s/,"_")
|
94
|
+
end
|
95
|
+
|
96
|
+
#################################################################
|
97
|
+
#################################################################
|
98
|
+
|
99
|
+
###
|
100
|
+
# classifier directory:
|
101
|
+
# either user-given classifier_dir or our own default classifier directory,
|
102
|
+
# then argrec/arglab/onestep, plus the splitID, if there is one
|
103
|
+
def classifier_directory_name(exp, # RosyConfigData object
|
104
|
+
step, # argrec, arglab, onestep
|
105
|
+
splitID) # string or nil
|
106
|
+
|
107
|
+
if exp.get("classifier_dir")
|
108
|
+
base_dir = File.new_dir(exp.get("classifier_dir"))
|
109
|
+
else
|
110
|
+
base_dir = File.new_dir(exp.instantiate("rosy_dir",
|
111
|
+
"exp_ID" => exp.get("experiment_ID")))
|
112
|
+
end
|
113
|
+
classif_base_dir = File.new_dir(base_dir, "classif_dir")
|
114
|
+
|
115
|
+
if splitID
|
116
|
+
return File.new_dir(classif_base_dir, step + "." + splitID.to_s)
|
117
|
+
else
|
118
|
+
return File.new_dir(classif_base_dir, step)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
#################################################################
|
123
|
+
#################################################################
|
124
|
+
|
125
|
+
###
|
126
|
+
# instance ID: sentence ID plus frame ID
|
127
|
+
def construct_instance_id(sentence_id, frame_id)
|
128
|
+
return sentence_id.to_s + "---" + frame_id.to_s
|
129
|
+
end
|
130
|
+
|
131
|
+
def deconstruct_instance_id(instance_id)
|
132
|
+
return instance_id.split("---")
|
133
|
+
end
|
134
|
+
|
135
|
+
#################################################################
|
136
|
+
#################################################################
|
137
|
+
|
138
|
+
# default test ID given when the user didn't specify one
|
139
|
+
def default_test_ID()
|
140
|
+
return "apply"
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
#################################################################
|
145
|
+
#################################################################
|
146
|
+
|
147
|
+
###
|
148
|
+
# extend Array class by subsumption
|
149
|
+
module Subsumed
|
150
|
+
def subsumed_by?(array2)
|
151
|
+
temp = array2.clone()
|
152
|
+
self.each { |el|
|
153
|
+
found = false
|
154
|
+
temp.each_index { |ix|
|
155
|
+
if el == temp[ix]
|
156
|
+
temp.delete_at(ix)
|
157
|
+
found = true
|
158
|
+
break
|
159
|
+
end
|
160
|
+
}
|
161
|
+
unless found
|
162
|
+
return false
|
163
|
+
end
|
164
|
+
}
|
165
|
+
return true
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
class Array
|
170
|
+
include Subsumed
|
171
|
+
end
|
@@ -0,0 +1,243 @@
|
|
1
|
+
# class SQLQuery
|
2
|
+
# KE, SP 27.1.05
|
3
|
+
#
|
4
|
+
# provides static methods that generate SQL queries as strings
|
5
|
+
# that can then be passed on to the database
|
6
|
+
|
7
|
+
require "common/ruby_class_extensions"
|
8
|
+
|
9
|
+
require "common/RosyConventions"
|
10
|
+
|
11
|
+
class SQLQuery
|
12
|
+
|
13
|
+
|
14
|
+
#####
|
15
|
+
# SQLQuery.insert
|
16
|
+
#
|
17
|
+
# query created: insert a new row into a given database table
|
18
|
+
# the new row is given as a list of pairs [column_name, value]
|
19
|
+
#
|
20
|
+
# returns: string
|
21
|
+
def SQLQuery.insert(table_name, # string: table name
|
22
|
+
field_value_pairs) # array: string*object [column_name, cell_value]
|
23
|
+
|
24
|
+
# example:
|
25
|
+
# insert into table01 (field01,field02,field03,field04,field05) values
|
26
|
+
# (2, 'second', 'another', '1999-10-23', '10:30:00');
|
27
|
+
|
28
|
+
string = "INSERT INTO " + table_name + "("+
|
29
|
+
field_value_pairs.map { |column_name, cell_value|
|
30
|
+
column_name
|
31
|
+
}.join(",") +
|
32
|
+
") VALUES (" +
|
33
|
+
field_value_pairs.map { |column_name, cell_value|
|
34
|
+
if cell_value.nil?
|
35
|
+
raise "SQL query construction error: Nil value for column " + column_name
|
36
|
+
end
|
37
|
+
SQLQuery.stringify_value(cell_value)
|
38
|
+
}.join(",") + ");"
|
39
|
+
|
40
|
+
return string
|
41
|
+
end
|
42
|
+
|
43
|
+
#####
|
44
|
+
# SQLQuery.select
|
45
|
+
#
|
46
|
+
# query created: select from given database tables
|
47
|
+
# all column entries that conform to the given description:
|
48
|
+
# - names of the columns to be selected (or the string "*")
|
49
|
+
# - only those column entries where the row matches the given
|
50
|
+
# row restrictions: [column_name, column_value] => WHERE column_name IS column_value
|
51
|
+
# - optionally, at most N lines => LIMIT N
|
52
|
+
# - If more than one DB table is named, make a join
|
53
|
+
# - Value restrictions: If it doesn't say which DB table to use,
|
54
|
+
# use the first one listed in table_col_pairs
|
55
|
+
#
|
56
|
+
# Use with only one database table creates queries like e.g.
|
57
|
+
# SELECT column1, column2 FROM table WHERE column3=val3 AND column4!=val4
|
58
|
+
#
|
59
|
+
# or:
|
60
|
+
# SELECT DISTINCT column1, column2 FROM table WHERE column3=val3 AND column4!=val4 LIMIT 10
|
61
|
+
#
|
62
|
+
# Use with 2 SelectTableAndColumns entries creates queries like
|
63
|
+
# SELECT table1.column1, table1.column2 FROM table1, table2 WHERE table1.column1=val3 AND table1.id=table2.id
|
64
|
+
#
|
65
|
+
#
|
66
|
+
# returns: string.
|
67
|
+
# raises an error if no columns at all are selected
|
68
|
+
def SQLQuery.select(table_col_pairs, # Array: SelectTableAndColumns
|
69
|
+
row_restrictions, # array: ValueRestriction objects
|
70
|
+
var_hash = {}) # further parameters:
|
71
|
+
# line_limit: integer: select at most N lines. if nil, all lines are chosen
|
72
|
+
# distinct: boolean: return each tuple only once. if nil or false, duplicates are kept
|
73
|
+
|
74
|
+
if table_col_pairs.empty?
|
75
|
+
raise "Zero tables to select from"
|
76
|
+
end
|
77
|
+
|
78
|
+
## SELECT
|
79
|
+
string = "SELECT "
|
80
|
+
|
81
|
+
if var_hash["distinct"]
|
82
|
+
# unique return values?
|
83
|
+
string << "DISTINCT "
|
84
|
+
end
|
85
|
+
|
86
|
+
## column names to select: iterate through table/col pairs
|
87
|
+
at_least_one_column_selected = false
|
88
|
+
string << table_col_pairs.map { |tc|
|
89
|
+
|
90
|
+
if tc.columns == "*"
|
91
|
+
# all columns from this table
|
92
|
+
at_least_one_column_selected = true
|
93
|
+
SQLQuery.prepend_tablename(tc.table_obj.table_name, "*")
|
94
|
+
|
95
|
+
elsif tc.columns.class.to_s == "Array" and not(tc.columns.empty?)
|
96
|
+
# at least one column from this table
|
97
|
+
at_least_one_column_selected = true
|
98
|
+
|
99
|
+
tc.columns.map { |c|
|
100
|
+
if c.nil? or c.empty?
|
101
|
+
raise "Got nil/empty value within the column name list"
|
102
|
+
end
|
103
|
+
|
104
|
+
SQLQuery.prepend_tablename(tc.table_obj.table_name, c)
|
105
|
+
}.join(", " )
|
106
|
+
|
107
|
+
else
|
108
|
+
# no columns from this table
|
109
|
+
nil
|
110
|
+
end
|
111
|
+
}.compact.join(", ")
|
112
|
+
|
113
|
+
|
114
|
+
if not(at_least_one_column_selected)
|
115
|
+
raise "Empty select: zero columns selected"
|
116
|
+
end
|
117
|
+
|
118
|
+
## FROM table name(s)
|
119
|
+
string += " FROM " + table_col_pairs.map { |tc| tc.table_obj.table_name }.join(", ")
|
120
|
+
|
121
|
+
## WHERE row_restrictions
|
122
|
+
unless row_restrictions.nil? or row_restrictions.empty?
|
123
|
+
string += " WHERE "+row_restrictions.map { |restr_obj|
|
124
|
+
# get the actual restriction out of its object
|
125
|
+
# form: name(string) eqsymb(string: =, !=) value(object)
|
126
|
+
name, eqsymb, value = restr_obj.get()
|
127
|
+
if value.nil?
|
128
|
+
raise "SQL query construction error: Nil value for column " + name
|
129
|
+
end
|
130
|
+
unless restr_obj.val_is_variable
|
131
|
+
# value is a value, not a variable name
|
132
|
+
value = SQLQuery.stringify_value(value)
|
133
|
+
end
|
134
|
+
if restr_obj.table_name_included
|
135
|
+
# name already includes table name, if needed
|
136
|
+
name + eqsymb + value
|
137
|
+
else
|
138
|
+
# prepend name of first table in table_col_pairs
|
139
|
+
SQLQuery.prepend_tablename(table_col_pairs.first.table_obj.table_name(), name) + eqsymb + value
|
140
|
+
end
|
141
|
+
}.join(" AND ")
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
## LIMIT at_most_that_many_lines
|
146
|
+
if var_hash["line_limit"]
|
147
|
+
string += " LIMIT " + var_hash["line_limit"].to_s
|
148
|
+
end
|
149
|
+
string += ";"
|
150
|
+
|
151
|
+
return string
|
152
|
+
end
|
153
|
+
|
154
|
+
#####
|
155
|
+
# SQLQuery.update
|
156
|
+
#
|
157
|
+
# query created: overwrite several cells in possibly multiple rows of a
|
158
|
+
# database table with new values
|
159
|
+
# rows are selected via row restrictions
|
160
|
+
#
|
161
|
+
# returns: nothing
|
162
|
+
|
163
|
+
# update table01 set field04=19991022, field05=062218 where field01=1;
|
164
|
+
|
165
|
+
def SQLQuery.update(table_name, # string: table name
|
166
|
+
field_value_pairs, # array: string*Object: column name and value
|
167
|
+
row_restrictions # array: ValueRestriction objects: column name and value restriction
|
168
|
+
)
|
169
|
+
string = "UPDATE "+table_name+" SET "+
|
170
|
+
field_value_pairs.map {|field,value|
|
171
|
+
if value.nil?
|
172
|
+
raise "SQL query construction error: Nil value for column " + field
|
173
|
+
end
|
174
|
+
field+"="+SQLQuery.stringify_value(value)}.join(", ") +
|
175
|
+
" WHERE "+row_restrictions.map {|restr_obj|
|
176
|
+
# get the actual restriction out of its object
|
177
|
+
# form: name(string) eqsymb(string: =, !=) value(object)
|
178
|
+
name, eqsymb, value = restr_obj.get()
|
179
|
+
if value.nil?
|
180
|
+
raise "SQL query construction error: Nil value for column " + name
|
181
|
+
end
|
182
|
+
name + eqsymb + SQLQuery.stringify_value(value)
|
183
|
+
}.join(" AND ")
|
184
|
+
string += ";"
|
185
|
+
return string
|
186
|
+
end
|
187
|
+
|
188
|
+
|
189
|
+
#####
|
190
|
+
# SQLQuery.add_columns
|
191
|
+
#
|
192
|
+
# query created: extend given table by
|
193
|
+
# one or more columns given by their names and formats
|
194
|
+
#
|
195
|
+
# returns: string
|
196
|
+
def SQLQuery.add_columns(table_name, # string: table name
|
197
|
+
column_formats) # array: array: string*string [column_name,column_format]
|
198
|
+
|
199
|
+
string = "ALTER TABLE " + table_name
|
200
|
+
string << column_formats.map { |column_name, column_format|
|
201
|
+
" ADD COLUMN " + column_name + " " + column_format
|
202
|
+
}.join(", ")
|
203
|
+
|
204
|
+
string << ";"
|
205
|
+
|
206
|
+
return string
|
207
|
+
end
|
208
|
+
|
209
|
+
#####
|
210
|
+
# SQLQuery.stringify ensures that value is a properly
|
211
|
+
# escaped SQL string
|
212
|
+
#
|
213
|
+
# returns: string
|
214
|
+
def SQLQuery.stringify_value(value) # object
|
215
|
+
if value.class == String
|
216
|
+
return "'" + value.gsub(/"/,"QQUOT0").gsub(/'/, "QQUOT1").gsub(/`/, "QQUOT2") + "'"
|
217
|
+
else
|
218
|
+
return value.to_s
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
#####
|
223
|
+
# SQLQuery.unstringify undoes the result of stringify_value
|
224
|
+
# please apply only to strings
|
225
|
+
def SQLQuery.unstringify_value(value) # string
|
226
|
+
value.gsub(/QQUOT0/, '"').gsub(/QQUOT1/, "'").gsub(/QQUOT2/, "`")
|
227
|
+
end
|
228
|
+
|
229
|
+
####
|
230
|
+
# SQLQuery.prepend_tablename
|
231
|
+
#
|
232
|
+
# auxiliary method for select:
|
233
|
+
# prepend table name to column name
|
234
|
+
# and if the column name does not already include a table name
|
235
|
+
def SQLQuery.prepend_tablename(table_name,
|
236
|
+
column_name)
|
237
|
+
if not(column_name.include?("."))
|
238
|
+
return table_name + "." + column_name
|
239
|
+
else
|
240
|
+
return column_name
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
@@ -0,0 +1,194 @@
|
|
1
|
+
#########
|
2
|
+
# module StringTerminalsInRightOrder
|
3
|
+
#
|
4
|
+
# returns the yield of a node, or a list of nodes, as a string
|
5
|
+
# of " "-separated words
|
6
|
+
#
|
7
|
+
# Words are put into the right order, left to right,
|
8
|
+
# under the assumption that their node IDs reflect that order
|
9
|
+
#
|
10
|
+
# Terminal nodes are assumed to have IDs ending in a number,
|
11
|
+
# numbered from left to right
|
12
|
+
#
|
13
|
+
# Splitword nodes are assumed to have IDs ending in N_sM
|
14
|
+
# for numbers N and M, where N orders terminals left to right
|
15
|
+
# and M orders the splitword parts left to right
|
16
|
+
#
|
17
|
+
# If the yield of the node/the list of nodes contains all splitwords of a terminal,
|
18
|
+
# the whole terminal is taken instead
|
19
|
+
#
|
20
|
+
# methods:
|
21
|
+
#
|
22
|
+
# string_for_node returns the string for the yield of a node
|
23
|
+
# node: a node object
|
24
|
+
#
|
25
|
+
# string_for_nodes returns the string for the yield of a list of nodes
|
26
|
+
# nodes: a list of node objects
|
27
|
+
|
28
|
+
module StringTerminalsInRightOrder
|
29
|
+
def string_for_node(node)
|
30
|
+
string_for_nodes([node])
|
31
|
+
end
|
32
|
+
|
33
|
+
def string_for_nodes(nodes)
|
34
|
+
a = right_level_terminals_for_nodes(nodes)
|
35
|
+
a = sort_terminals_and_splitwords_left_to_right(a)
|
36
|
+
return node_array_to_string(a)
|
37
|
+
end
|
38
|
+
|
39
|
+
#####
|
40
|
+
private
|
41
|
+
|
42
|
+
# right_level_terminals_for_nodes:
|
43
|
+
# - compute the yield for each element of 'nodes'
|
44
|
+
# - then consider all splitwords in the yield:
|
45
|
+
# if all splitwords of a terminal are in the yield,
|
46
|
+
# then use the terminal rather than its splitwords
|
47
|
+
def right_level_terminals_for_nodes(nodes)
|
48
|
+
a = nodes.map { |n| n.yield_nodes()}.flatten
|
49
|
+
b = Array.new
|
50
|
+
a.each { |n|
|
51
|
+
if n.is_splitword?
|
52
|
+
# see if a contains all parts of this splitword
|
53
|
+
# if so, take into b the splitword's parent, the terminal,
|
54
|
+
# rather than the individual splitwords
|
55
|
+
|
56
|
+
if n.parent.nil?
|
57
|
+
# splitword without a parent
|
58
|
+
b << n
|
59
|
+
elsif b.include? n.parent or a.include? n.parent
|
60
|
+
# did we already include the splitword's parent in b?
|
61
|
+
# then we're done
|
62
|
+
else
|
63
|
+
|
64
|
+
# check if all children of n.parent are in 'a'
|
65
|
+
all_in = true
|
66
|
+
n.parent.each_child { |nsibling|
|
67
|
+
unless a.include? nsibling
|
68
|
+
all_in = false
|
69
|
+
break
|
70
|
+
end
|
71
|
+
}
|
72
|
+
|
73
|
+
if all_in
|
74
|
+
# yes, all children of n.parent are in 'a'
|
75
|
+
b << n.parent
|
76
|
+
else
|
77
|
+
# no, some sibling of n is not in 'a'
|
78
|
+
b << n
|
79
|
+
end
|
80
|
+
end
|
81
|
+
elsif n.is_terminal?
|
82
|
+
# n is a terminal
|
83
|
+
b << n
|
84
|
+
# if n is anything but a splitword or a terminal,
|
85
|
+
# ignore it
|
86
|
+
end
|
87
|
+
}
|
88
|
+
return b.uniq
|
89
|
+
end
|
90
|
+
|
91
|
+
# sort_terminals_and_splitwords_left_to_right:
|
92
|
+
# take an array of nodes that consists of terminals and splitwords
|
93
|
+
# and sort them using the following comparison:
|
94
|
+
# - when comparing two terminals, use the
|
95
|
+
# last numbers in their respective IDs
|
96
|
+
# - when comparing two splitwords, their IDs end in _N_sM
|
97
|
+
# for numbers N and M.
|
98
|
+
# If they coincide in N, compare them by M,
|
99
|
+
# else compare them by M
|
100
|
+
# - when comparing a terminal and a splitword,
|
101
|
+
# compare the terminal's last number to the splitword's N
|
102
|
+
def sort_terminals_and_splitwords_left_to_right(nodes)
|
103
|
+
nodes.sort { |a, b|
|
104
|
+
if a.is_splitword? and b.is_splitword?
|
105
|
+
compare_splitwords(a, b)
|
106
|
+
elsif a.is_terminal? and b.is_terminal?
|
107
|
+
compare_terminals(a, b)
|
108
|
+
else
|
109
|
+
compare_mixed(a, b)
|
110
|
+
end
|
111
|
+
}
|
112
|
+
end
|
113
|
+
|
114
|
+
# node_array_to_string:
|
115
|
+
# 'nodes' is an array of node objects, each of which offer a "word" method
|
116
|
+
# string their words together separated by " "
|
117
|
+
def node_array_to_string(nodes)
|
118
|
+
s = ""
|
119
|
+
nodes.each { |n|
|
120
|
+
s = s + n.word + " "
|
121
|
+
}
|
122
|
+
return s
|
123
|
+
end
|
124
|
+
|
125
|
+
# - when comparing two terminals, use the
|
126
|
+
# last numbers in their respective IDs
|
127
|
+
def compare_terminals(a, b)
|
128
|
+
last_i(a) <=> last_i(b)
|
129
|
+
end
|
130
|
+
|
131
|
+
# - when comparing two splitwords, their IDs end in _N_sM
|
132
|
+
# for numbers N and M.
|
133
|
+
# If they coincide in N, compare them by M,
|
134
|
+
# else compare them by M
|
135
|
+
def compare_splitwords(a, b)
|
136
|
+
if splitword_terminal_i(a) == splitword_terminal_i(b)
|
137
|
+
# parts of same terminal?
|
138
|
+
# compare parts
|
139
|
+
last_i(a) <=> last_i(b)
|
140
|
+
else
|
141
|
+
# not parts of same terminal?
|
142
|
+
# compare terminals
|
143
|
+
splitword_terminal_i(a) <=> splitword_terminal_i(b)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# - when comparing a terminal and a splitword,
|
148
|
+
# compare the terminal's last number to the splitword's N
|
149
|
+
def compare_mixed(a, b)
|
150
|
+
if a.is_splitword? and b.is_terminal?
|
151
|
+
splitword_terminal_i(a) <=> last_i(b)
|
152
|
+
|
153
|
+
elsif a.is_terminal? and b.is_splitword?
|
154
|
+
last_i(a) <=> splitword_terminal_i(b)
|
155
|
+
else
|
156
|
+
# not one terminal, one splitword?
|
157
|
+
# then what?
|
158
|
+
$stderr.print "SalsaTigerSentence, compare_mixed: confused by "
|
159
|
+
$stderr.print a.id, ", ", b.id, "\n"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# return last number of the ID of a node
|
164
|
+
def last_i(n)
|
165
|
+
n.id =~ /(\d+)$/ # match final string of digits
|
166
|
+
if $1.nil? # if shouldn't happen _in principle_
|
167
|
+
# but we might get weird node IDs for splitwords;
|
168
|
+
# so we act gracefully and catch the case where there
|
169
|
+
# is one final letter behind the digits
|
170
|
+
n.id =~ /(\d+)\w$/
|
171
|
+
end
|
172
|
+
if $1.nil? # this shouldn't ever happen
|
173
|
+
$stderr.print "SalsaTigerSentence, last_i: Couldn't extract digits from: "
|
174
|
+
$stderr.print n.id, "\n"
|
175
|
+
exit 1
|
176
|
+
end
|
177
|
+
return $1.to_i # and return it as number
|
178
|
+
end
|
179
|
+
|
180
|
+
# assume the ID of the node includes N_sM
|
181
|
+
# return N
|
182
|
+
def splitword_terminal_i(n)
|
183
|
+
n.id =~ /(\d+)_s\d*/ # match string of digits before splitword ID
|
184
|
+
if $1.nil? # this shouldn't ever happen
|
185
|
+
$stderr.print "SalsaTigerSentence, splitword_terminal_i: Couldn't extract digits from: "
|
186
|
+
$stderr.print n.id, "\n"
|
187
|
+
exit 1
|
188
|
+
end
|
189
|
+
return $1.to_i # and return it as number
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
193
|
+
|
194
|
+
|