shalmaneser 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/shalmaneser +8 -2
  4. data/doc/index.md +1 -0
  5. data/lib/shalmaneser/opt_parser.rb +68 -67
  6. metadata +49 -119
  7. data/bin/fred +0 -16
  8. data/bin/frprep +0 -34
  9. data/bin/rosy +0 -17
  10. data/lib/common/AbstractSynInterface.rb +0 -1229
  11. data/lib/common/Counter.rb +0 -18
  12. data/lib/common/EnduserMode.rb +0 -27
  13. data/lib/common/Eval.rb +0 -480
  14. data/lib/common/FixSynSemMapping.rb +0 -196
  15. data/lib/common/Graph.rb +0 -345
  16. data/lib/common/ISO-8859-1.rb +0 -24
  17. data/lib/common/ML.rb +0 -186
  18. data/lib/common/Mallet.rb +0 -236
  19. data/lib/common/Maxent.rb +0 -229
  20. data/lib/common/Optimise.rb +0 -195
  21. data/lib/common/Parser.rb +0 -213
  22. data/lib/common/RegXML.rb +0 -269
  23. data/lib/common/RosyConventions.rb +0 -171
  24. data/lib/common/STXmlTerminalOrder.rb +0 -194
  25. data/lib/common/SalsaTigerRegXML.rb +0 -2347
  26. data/lib/common/SalsaTigerXMLHelper.rb +0 -99
  27. data/lib/common/SynInterfaces.rb +0 -282
  28. data/lib/common/TabFormat.rb +0 -721
  29. data/lib/common/Tiger.rb +0 -1448
  30. data/lib/common/Timbl.rb +0 -144
  31. data/lib/common/Tree.rb +0 -61
  32. data/lib/common/config_data.rb +0 -470
  33. data/lib/common/config_format_element.rb +0 -220
  34. data/lib/common/headz.rb +0 -338
  35. data/lib/common/option_parser.rb +0 -13
  36. data/lib/common/prep_config_data.rb +0 -62
  37. data/lib/common/prep_helper.rb +0 -1330
  38. data/lib/common/ruby_class_extensions.rb +0 -310
  39. data/lib/db/db_interface.rb +0 -48
  40. data/lib/db/db_mysql.rb +0 -145
  41. data/lib/db/db_sqlite.rb +0 -280
  42. data/lib/db/db_table.rb +0 -239
  43. data/lib/db/db_wrapper.rb +0 -176
  44. data/lib/db/sql_query.rb +0 -243
  45. data/lib/ext/maxent/Classify.class +0 -0
  46. data/lib/ext/maxent/Train.class +0 -0
  47. data/lib/fred/Baseline.rb +0 -150
  48. data/lib/fred/FileZipped.rb +0 -31
  49. data/lib/fred/FredBOWContext.rb +0 -877
  50. data/lib/fred/FredConventions.rb +0 -232
  51. data/lib/fred/FredDetermineTargets.rb +0 -319
  52. data/lib/fred/FredEval.rb +0 -312
  53. data/lib/fred/FredFeatureExtractors.rb +0 -322
  54. data/lib/fred/FredFeatures.rb +0 -1061
  55. data/lib/fred/FredFeaturize.rb +0 -602
  56. data/lib/fred/FredNumTrainingSenses.rb +0 -27
  57. data/lib/fred/FredParameters.rb +0 -402
  58. data/lib/fred/FredSplit.rb +0 -84
  59. data/lib/fred/FredSplitPkg.rb +0 -180
  60. data/lib/fred/FredTest.rb +0 -606
  61. data/lib/fred/FredTrain.rb +0 -144
  62. data/lib/fred/PlotAndREval.rb +0 -480
  63. data/lib/fred/fred.rb +0 -47
  64. data/lib/fred/fred_config_data.rb +0 -185
  65. data/lib/fred/md5.rb +0 -23
  66. data/lib/fred/opt_parser.rb +0 -250
  67. data/lib/frprep/Ampersand.rb +0 -39
  68. data/lib/frprep/CollinsInterface.rb +0 -1165
  69. data/lib/frprep/Counter.rb +0 -18
  70. data/lib/frprep/FNCorpusXML.rb +0 -643
  71. data/lib/frprep/FNDatabase.rb +0 -144
  72. data/lib/frprep/FrameXML.rb +0 -513
  73. data/lib/frprep/Graph.rb +0 -345
  74. data/lib/frprep/MiniparInterface.rb +0 -1388
  75. data/lib/frprep/RegXML.rb +0 -269
  76. data/lib/frprep/STXmlTerminalOrder.rb +0 -194
  77. data/lib/frprep/SleepyInterface.rb +0 -384
  78. data/lib/frprep/TntInterface.rb +0 -44
  79. data/lib/frprep/TreetaggerInterface.rb +0 -327
  80. data/lib/frprep/do_parses.rb +0 -143
  81. data/lib/frprep/frprep.rb +0 -693
  82. data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
  83. data/lib/frprep/interfaces/stanford_interface.rb +0 -353
  84. data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
  85. data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
  86. data/lib/frprep/one_parsed_file.rb +0 -28
  87. data/lib/frprep/opt_parser.rb +0 -94
  88. data/lib/frprep/ruby_class_extensions.rb +0 -310
  89. data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
  90. data/lib/rosy/ExternalConfigData.rb +0 -58
  91. data/lib/rosy/FailedParses.rb +0 -130
  92. data/lib/rosy/FeatureInfo.rb +0 -242
  93. data/lib/rosy/GfInduce.rb +0 -1115
  94. data/lib/rosy/GfInduceFeature.rb +0 -148
  95. data/lib/rosy/InputData.rb +0 -294
  96. data/lib/rosy/RosyConfusability.rb +0 -338
  97. data/lib/rosy/RosyEval.rb +0 -465
  98. data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
  99. data/lib/rosy/RosyFeaturize.rb +0 -281
  100. data/lib/rosy/RosyInspect.rb +0 -336
  101. data/lib/rosy/RosyIterator.rb +0 -478
  102. data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
  103. data/lib/rosy/RosyPruning.rb +0 -165
  104. data/lib/rosy/RosyServices.rb +0 -744
  105. data/lib/rosy/RosySplit.rb +0 -232
  106. data/lib/rosy/RosyTask.rb +0 -19
  107. data/lib/rosy/RosyTest.rb +0 -829
  108. data/lib/rosy/RosyTrain.rb +0 -234
  109. data/lib/rosy/RosyTrainingTestTable.rb +0 -787
  110. data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
  111. data/lib/rosy/View.rb +0 -418
  112. data/lib/rosy/opt_parser.rb +0 -379
  113. data/lib/rosy/rosy.rb +0 -78
  114. data/lib/rosy/rosy_config_data.rb +0 -121
  115. data/lib/shalmaneser/version.rb +0 -3
@@ -1,310 +0,0 @@
1
- # Katrin Erk Oct 05
2
- #
3
- # useful extensions to standard classes
4
-
5
- require 'fileutils'
6
-
7
- class String
8
- def startswith(other_string)
9
- self[0..other_string.length() - 1] == other_string
10
- end
11
-
12
- def endswith(other_string)
13
- not(other_string.length() > self.length()) and
14
- self[self.length() - other_string.length()..-1] == other_string
15
- end
16
- end
17
-
18
- class File
19
- ########
20
- # check whether a given path exists,
21
- # and if it doesn't, make sure it is created.
22
- #
23
- # piece together the strings in 'pieces' to make the path,
24
- # appending "/" to all strings if necessary
25
- #
26
- # returns: the path pieced together
27
- def File.new_dir(*pieces) # strings, to be pieced together
28
-
29
- dir_path, dummy = File.make_path(pieces, true)
30
- unless File.exists? dir_path
31
- FileUtils.mkdir_p dir_path
32
- end
33
- # check that all went well in creating the directory)
34
- File.existing_dir(dir_path)
35
-
36
- return dir_path
37
- end
38
-
39
- ########
40
- # same as new_dir, but last piece is a filename
41
- def File.new_filename(*pieces)
42
- dir_path, whole_path = File.make_path(pieces, false)
43
- unless File.exists? dir_path
44
- FileUtils.mkdir_p dir_path
45
- end
46
- # check that all went well in creating the directory)
47
- File.existing_dir(dir_path)
48
-
49
- return whole_path
50
- end
51
-
52
-
53
- #####
54
- # check whether a given path exists,
55
- # and report failure of it does not exist.
56
- #
57
- # piece together the strings in 'pieces' to make the path,
58
- # appending "/" to all strings if necessary
59
- #
60
- # returns: the path pieced together
61
- def File.existing_dir(*pieces) # strings
62
-
63
- dir_path, dummy = File.make_path(pieces, true)
64
-
65
- unless File.exists? dir_path and File.directory? dir_path
66
- $stderr.puts "Error: Directory #{dir_path} doesn't exist. Exiting."
67
- exit(1)
68
- end
69
- unless File.executable? dir_path
70
- $stderr.puts "Error: Cannot access directory #{dir_path}. Exiting."
71
- exit(1)
72
- end
73
-
74
- return dir_path
75
- end
76
-
77
- ####
78
- # like existing_dir, but last bit is filename
79
- def File.existing_filename(*pieces) # strings
80
-
81
- dir_path, whole_path = File.make_path(pieces, false)
82
-
83
- unless File.exists? dir_path and File.directory? dir_path
84
- $stderr.puts "Error: Directory #{dir_path} doesn't exist. Exiting"
85
- exit(1)
86
- end
87
- unless File.executable? dir_path
88
- $stderr.puts "Error: Cannot access directory #{dir_path}. Exiting."
89
- exit(1)
90
- end
91
-
92
- return whole_path
93
- end
94
-
95
- ####
96
- # piece together the strings in 'pieces' to make a path,
97
- # appending "/" to all but the last string if necessary
98
- #
99
- # if 'pieces' is already a string, take that as a one-piece path
100
- #
101
- # if dir is true, also append "/" to the last piece of the string
102
- #
103
- # the resulting path is expanded: For example, initial
104
- # ~ is expanded to the setting of $HOME
105
- #
106
- # returns: pair of strings (directory_part, whole_path)
107
- #
108
- def File.make_path(pieces, # string or array:string
109
- is_dir = false) # Boolean: is the path a directory?
110
-
111
- if pieces.kind_of? String
112
- pieces = [ pieces ]
113
- end
114
-
115
- dir = ""
116
- # iterate over all but the filename
117
- if is_dir
118
- last_dir_index = -1
119
- else
120
- last_dir_index = -2
121
- end
122
- pieces[0..last_dir_index].each { |piece|
123
- if piece.nil?
124
- # whoops, nil entry in name of path!
125
- $stderr.puts "File.make_path ERROR: nil for piece of path name."
126
- next
127
- end
128
- if piece =~ /\/$/
129
- dir << piece
130
- else
131
- dir << piece << "/"
132
- end
133
- }
134
- dir = File.expand_path(dir)
135
- # expand_path removes the final "/" again
136
- unless dir =~ /\/$/
137
- dir = dir + "/"
138
- end
139
-
140
- if is_dir
141
- return [dir, dir]
142
- else
143
- return [dir, dir + pieces[-1]]
144
- end
145
- end
146
-
147
- end
148
-
149
- #############################################
150
- class Array
151
-
152
- ###
153
- # interleave N arrays:
154
- # given arrays [a1... an], [b1,...,bn], ..[z1, ...,zn]
155
- # return [[a1,b1, .., z1]...,[an,bn, .., zn]]
156
- #
157
- # if one array is longer than the other,
158
- # e.g. [a1...an], [b1,...,bm] with n> m
159
- # the result is
160
- # [[a1,b1],...[am, bm], [am+1, nil], ..., [an, nil]]
161
- # and analogously for m>n
162
- def interleave(*arrays)
163
- len = [length(), arrays.map { |a| a.length() }.max()].max()
164
- (0..len-1).to_a.map { |ix|
165
- [at(ix)] + arrays.map { |a| a[ix] }
166
- }
167
- end
168
-
169
- ###
170
- # prepend: prepend element to array
171
- # because I can never remember which is 'shift'
172
- # and which is 'unshift'
173
- def prepend(element)
174
- unshift(element)
175
- end
176
-
177
- ###
178
- # count the number of occurrences of element in this array
179
- def count(element)
180
- num = 0
181
- each { |my_element|
182
- if my_element == element
183
- num += 1
184
- end
185
- }
186
- return num
187
- end
188
-
189
- ###
190
- # count the number of occurrences of
191
- # elements from list in this array
192
- def counts(list)
193
- num = 0
194
- each { |my_element|
195
- if list.include? my_element
196
- num += 1
197
- end
198
- }
199
- return num
200
- end
201
-
202
- ###
203
- # draw a random sample of size N
204
- # from this array
205
- def sample(size)
206
- if size < 0
207
- return nil
208
- elsif size == 0
209
- return []
210
- elsif size >= length()
211
- return self.clone()
212
- end
213
-
214
- rank = Hash.new()
215
- each { |my_element|
216
- rank[my_element] = rand()
217
- }
218
- return self.sort { |a, b| rank[a] <=> rank[b] }[0..size-1]
219
- end
220
- end
221
-
222
- class Float
223
- ###
224
- # round a float to the given number of decimal points
225
- def round_to_decpts(n)
226
- if self.nan?
227
- return self
228
- else
229
- return (self * 10**n).round.to_f / 10**n
230
- end
231
- end
232
- end
233
-
234
- ################
235
- module EnumerableBool
236
- ###
237
- # And_(x \in X) block(x)
238
- def big_and(&block)
239
- each { |x|
240
- unless block.call(x)
241
- return false
242
- end
243
- }
244
- return true
245
- end
246
-
247
- ###
248
- # Or_(x \in X) block(x)
249
- def big_or(&block)
250
- each { |x|
251
- if block.call(x)
252
- return true
253
- end
254
- }
255
- return false
256
- end
257
-
258
- ###
259
- # Sum_(x \in X) block(x)
260
- def big_sum(init = 0, &block)
261
- sum = init
262
- unless block_given?
263
- block = Proc.new { |x| x}
264
- end
265
- each { |x|
266
- sum += block.call(x)
267
- }
268
- return sum
269
- end
270
- end
271
-
272
- ################
273
- # Given an enumerable, distribute its items into two bins (arrays)
274
- # depending on whether the block returns true
275
- module EnumerableDistribute
276
- def distribute(&block)
277
- retv1 = Array.new
278
- retv2 = Array.new
279
- each { |x|
280
- if block.call(x)
281
- retv1 << x
282
- else
283
- retv2 << x
284
- end
285
- }
286
- return [retv1, retv2]
287
- end
288
- end
289
-
290
- #####################
291
- # map with index
292
- module MapWithIndex
293
- def map_with_index(&block)
294
- retv = Array.new
295
-
296
- each_with_index { |x, index|
297
- retv << block.call(x, index)
298
- }
299
-
300
- return retv
301
- end
302
- end
303
-
304
- # include new Mixins into array already.
305
- # for other classes, do this when requiring StandardPkgExtensions
306
- class Array
307
- include EnumerableBool
308
- include EnumerableDistribute
309
- include MapWithIndex
310
- end
@@ -1,48 +0,0 @@
1
- # DBInterface
2
- #
3
- # Okay, things are getting somewhat complicated here with all
4
- # the DB classes, but this is how it all fits together:
5
- #
6
- # - DBWrapper: abstract class describing the DB interface
7
- # - DBMySQL, DBSQLite: subclasses of DBWrapper, for MySQL
8
- # and SQLite, respectively
9
- # - DBInterface: class to be used from outside,
10
- # decides ( based on the experiment file) whether to use
11
- # MySQL or SQLite and makes an object of the right kind,
12
- # 'require'-ing either DBMySQL or DBSQLite, but not both,
13
- # because the right ruby packages might not be installed
14
- # for both SQL systems
15
-
16
- def get_db_interface(exp, # experiment file object with 'dbtype' entry
17
- dir = nil, # string: Shalmaneser directory (used by SQLite only)
18
- identifier = nil) # string: identifier of the data (SQLite)
19
-
20
- case exp.get("dbtype")
21
- when "mysql"
22
- begin
23
- require 'db/db_mysql'
24
- rescue
25
- $stderr.puts "Error loading DB interface."
26
- $stderr.puts "Make sure you have the Ruby MySQL package installed."
27
- exit 1
28
- end
29
- return DBMySQL.new(exp)
30
-
31
- when "sqlite"
32
- begin
33
- require 'db/db_sqlite'
34
- rescue
35
- $stderr.puts "Error loading DB interface."
36
- $stderr.puts "Make sure you have the Ruby SQLite package installed."
37
- exit 1
38
- end
39
- return DBSQLite.new(exp, dir, identifier)
40
-
41
- else
42
- $stderr.puts "Error: database type needs to be either 'mysql' or 'sqlite'."
43
- $stderr.puts "Please set parameter 'dbtype' in the experiment file accordingly."
44
- exit 1
45
- end
46
- end
47
-
48
-
@@ -1,145 +0,0 @@
1
-
2
- # DBMysql: a subclass of DBWrapper.
3
- #
4
- # Use a MySQL server to access a database.
5
- # Use the Ruby mysql interface package for that.
6
-
7
- require 'mysql'
8
-
9
- require 'db/db_wrapper'
10
-
11
- #################
12
- class DBMySQLResult < DBResult
13
- # initialize with the result of Mysql::query
14
- # which is a MysqlResult object
15
- #
16
- # also remember the offset of the first row
17
- # for reset()
18
- def initialize(value)
19
- super(value)
20
- @row_first = @result.row_tell()
21
- end
22
-
23
- ###
24
- # reset object such that each() can be run again
25
- def reset()
26
- @result.row_seek(@row_first)
27
- end
28
-
29
- ###
30
- # column names: list of strings
31
- def list_column_names()
32
- current = @result.row_tell()
33
- fields = @result.fetch_fields().map { |f|
34
- f.name()
35
- }
36
- @result.row_seek(current)
37
- return fields
38
- end
39
- end
40
-
41
-
42
- #################
43
- class DBMySQL < DBWrapper
44
- ###
45
- # initialization:
46
- #
47
- # open connection to MySQL server
48
- def initialize(exp) # RosyConfigData experiment file object
49
- super(exp)
50
-
51
- @database = Mysql.real_connect(@exp.get('host'), @exp.get('user'),
52
- @exp.get('passwd'), @exp.get('dbname'))
53
-
54
- end
55
-
56
-
57
- ###
58
- # make a table
59
- #
60
- # returns: nothing
61
- def create_table(table_name, # string
62
- column_formats, # array: array: string*string [column_name,column_format]
63
- index_column_names, # array: string: column_name
64
- indexname) # string: name of automatically created index column
65
-
66
- string = "CREATE TABLE #{table_name} (" +
67
- "#{indexname} INT NOT NULL AUTO_INCREMENT"
68
-
69
- # column declarations
70
- unless column_formats.empty?
71
- string << ", "
72
- string << column_formats.map { |name, format| name.to_s + " " + format.to_s }.join(",")
73
- end
74
-
75
- # primary key
76
- string << ", " + "PRIMARY KEY(#{indexname})"
77
-
78
- # other keys
79
- unless index_column_names.empty?
80
- string << ", "
81
- string << index_column_names.map { |name| "KEY(#{name})" }.join(",")
82
- end
83
- string << ");"
84
-
85
- query_noretv(string)
86
- end
87
-
88
- ####
89
- # querying the database:
90
- # returns a DBResult object
91
- def query(query)
92
- result = @database.query(query)
93
- if result
94
- return DBMySQLResult.new(result)
95
- else
96
- return nil
97
- end
98
- end
99
-
100
- ####
101
- # querying the database:
102
- # no result value
103
- def query_noretv(query)
104
- @database.query(query)
105
- return nil
106
- end
107
-
108
- ###
109
- # list all tables in the database
110
- #
111
- # array of strings
112
- def list_tables()
113
- return @database.list_tables()
114
- end
115
-
116
-
117
- #####
118
- # list_column_formats
119
- #
120
- # list column names and column types of this table
121
- #
122
- # returns: array:string*string, list of pairs [column name, column format]
123
- def list_column_formats(table_name)
124
- retv = Array.new
125
- @database.query("DESCRIBE #{table_name}").each_hash { |field|
126
- retv << [field["Field"], field["Type"]]
127
- }
128
- return retv
129
- end
130
-
131
- ####
132
- # num_rows
133
- #
134
- # determine the number of rows in a table
135
- # returns: integer or nil
136
- def num_rows(table_name)
137
- @database.query("SHOW TABLE STATUS").each_hash { |hash|
138
- if hash["Name"] == table_name
139
- return hash["Rows"]
140
- end
141
- }
142
- return nil
143
- end
144
-
145
- end