delphin 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +17 -0
- data/bin/cleanup-profile +67 -0
- data/bin/score-profile +61 -0
- data/lib/delphin.rb +377 -0
- data/test/test_delphin.rb +77 -0
- metadata +66 -0
data/README
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
= Ruby Utilities for DELPH-IN
|
2
|
+
|
3
|
+
This is a set of Ruby utilities for the {Delphin}[http://www.delph-in.net/] HPSG processing project.
|
4
|
+
|
5
|
+
= History
|
6
|
+
|
7
|
+
1.0.0:: Profile data structures
|
8
|
+
|
9
|
+
= Copyright
|
10
|
+
|
11
|
+
Copyright 2009, William Patrick McNeill
|
12
|
+
|
13
|
+
This program is distributed under the GNU General Public License.
|
14
|
+
|
15
|
+
= Author
|
16
|
+
|
17
|
+
W.P. McNeill mailto:billmcn@gmail.com
|
data/bin/cleanup-profile
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "delphin"
|
4
|
+
require "fileutils"
|
5
|
+
require "optparse"
|
6
|
+
|
7
|
+
|
8
|
+
# Display an error message and the generic help message and exit.
|
9
|
+
def error_exit(parser, error, exit_code = -1)
|
10
|
+
puts error
|
11
|
+
puts parser.help
|
12
|
+
exit(exit_code)
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
delete = false
|
17
|
+
|
18
|
+
parser = OptionParser.new do |opts|
|
19
|
+
opts.banner =<<-EOTEXT
|
20
|
+
#{File.basename(__FILE__)} [OPTION] glob [table table...]
|
21
|
+
|
22
|
+
List all invalid TSDB profiles matching the specified file glob and optionally delete them.
|
23
|
+
|
24
|
+
A directory is a valid profile if it contains a non-empty relations file. Other required non-empty table files may be specified as command line arguments.
|
25
|
+
|
26
|
+
By default this script merely prints the names of invalid profiles. It will also delete them if the delete switch is specified.
|
27
|
+
|
28
|
+
Bracket characters in globs must be escaped with two backslashes, e.g. \\\\[
|
29
|
+
EOTEXT
|
30
|
+
opts.on("-l", "--logging LEVEL", "Logging level") do |level|
|
31
|
+
Delphin.set_log_level(eval("Logger::#{level.upcase}"))
|
32
|
+
end
|
33
|
+
|
34
|
+
opts.on("-d", "--delete", "Delete invalid profiles") do
|
35
|
+
delete = true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
begin
|
40
|
+
parser.parse!
|
41
|
+
rescue OptionParser::ParseError => e
|
42
|
+
error_exit(parser, e)
|
43
|
+
end
|
44
|
+
|
45
|
+
if ARGV.length < 1
|
46
|
+
error_exit(parser, "Incorrect number of arguments.")
|
47
|
+
end
|
48
|
+
|
49
|
+
glob = ARGV.shift
|
50
|
+
table_names = ARGV
|
51
|
+
|
52
|
+
Pathname.glob(glob).select {|d| d.directory?}.collect do |d|
|
53
|
+
Delphin::LOGGER.debug("Profile directory #{d}")
|
54
|
+
begin
|
55
|
+
profile = Delphin::Profile.new(d)
|
56
|
+
table_names.each do |table_name|
|
57
|
+
table = profile[table_name]
|
58
|
+
if File.zero?(table.filename)
|
59
|
+
raise Delphin::EmptyDataFile.new(table_name, profile)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
rescue Delphin::InvalidProfileException => e
|
63
|
+
puts e.message
|
64
|
+
FileUtils.rm_rf(d) if delete
|
65
|
+
next
|
66
|
+
end
|
67
|
+
end
|
data/bin/score-profile
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "delphin"
|
4
|
+
require "optparse"
|
5
|
+
require "yaml"
|
6
|
+
|
7
|
+
|
8
|
+
# Display an error message and the generic help message and exit.
|
9
|
+
def error_exit(parser, error, exit_code = -1)
|
10
|
+
puts error
|
11
|
+
puts parser.help
|
12
|
+
exit(exit_code)
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
full_name = false
|
17
|
+
to_yaml = false
|
18
|
+
|
19
|
+
parser = OptionParser.new do |opts|
|
20
|
+
opts.banner =<<-EOTEXT
|
21
|
+
#{File.basename(__FILE__)} [OPTION] glob
|
22
|
+
|
23
|
+
Print a summary of the scores in all the profiles matching the specified file
|
24
|
+
glob.
|
25
|
+
|
26
|
+
Bracket characters in paths must be escaped with two backslashes, e.g. \\\\[
|
27
|
+
EOTEXT
|
28
|
+
opts.on("-l", "--logging LEVEL", "Logging level") do |level|
|
29
|
+
Delphin.set_log_level(eval("Logger::#{level.upcase}"))
|
30
|
+
end
|
31
|
+
|
32
|
+
opts.on("-f", "--full-name", "Print full directory name") do
|
33
|
+
full_name = true
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.on("-y", "--yaml", "Output results as YAML") do
|
37
|
+
to_yaml = true
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
begin
|
43
|
+
parser.parse!
|
44
|
+
rescue OptionParser::ParseError => e
|
45
|
+
error_exit(parser, e)
|
46
|
+
end
|
47
|
+
|
48
|
+
if not ARGV.length == 1
|
49
|
+
error_exit(parser, "Incorrect number of arguments.")
|
50
|
+
end
|
51
|
+
|
52
|
+
stats = Delphin.summarize_folds(File.expand_path(ARGV.first))
|
53
|
+
s = if to_yaml
|
54
|
+
YAML::dump(stats)
|
55
|
+
else
|
56
|
+
stats.collect do |stat|
|
57
|
+
stat[3] = File.basename(stat[3]) if not full_name
|
58
|
+
stat.join(" ")
|
59
|
+
end.join("\n")
|
60
|
+
end
|
61
|
+
puts s
|
data/lib/delphin.rb
ADDED
@@ -0,0 +1,377 @@
|
|
1
|
+
# Copyright 2009 William Patrick McNeill
|
2
|
+
#
|
3
|
+
# This file is part of DELPHN-IN Ruby Utility Package.
|
4
|
+
#
|
5
|
+
# The DELPHN-IN Ruby Utility Package is free software; you can redistribute it
|
6
|
+
# and/or modify it under the terms of the GNU General Public License as
|
7
|
+
# published by the Free Software Foundation; either version 2 of the License,
|
8
|
+
# or (at your option) any later version.
|
9
|
+
#
|
10
|
+
# The DELPHN-IN Ruby Utility Package is distributed in the hope that it will
|
11
|
+
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
13
|
+
# Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License along with
|
16
|
+
# editalign; if not, write to the Free Software Foundation, Inc., 51 Franklin
|
17
|
+
# St, Fifth Floor, Boston, MA 02110-1301 USA
|
18
|
+
|
19
|
+
require "logger"
|
20
|
+
require "pathname"
|
21
|
+
require "set"
|
22
|
+
|
23
|
+
# Utilities for use with the DELPH-IN project.
|
24
|
+
module Delphin
|
25
|
+
VERSION = "0.0.1"
|
26
|
+
|
27
|
+
# Create the logger and set its default log level to ERROR. This function
|
28
|
+
# is called when the module is loaded.
|
29
|
+
def Delphin.initialize_logger
|
30
|
+
logger = Logger.new(STDERR)
|
31
|
+
logger.level = Logger::ERROR
|
32
|
+
logger.datetime_format = "%Y-%m-%d %H:%M:%S"
|
33
|
+
logger
|
34
|
+
end
|
35
|
+
|
36
|
+
private_class_method :initialize_logger
|
37
|
+
|
38
|
+
# Logger used by all objects in this module. This is initialized at module
|
39
|
+
# load time. The default log level is ERROR.
|
40
|
+
LOGGER = initialize_logger
|
41
|
+
|
42
|
+
# Set the logging level. For example:
|
43
|
+
#
|
44
|
+
# > Delphin.set_log_level(Logger::DEBUG)
|
45
|
+
def Delphin.set_log_level(level)
|
46
|
+
Delphin::LOGGER.level = level
|
47
|
+
end
|
48
|
+
|
49
|
+
# Abstract base class for all exceptions raised by this module.
|
50
|
+
class InvalidProfileException < Exception
|
51
|
+
end
|
52
|
+
|
53
|
+
class MissingDataFile < InvalidProfileException
|
54
|
+
def initialize(name, profile)
|
55
|
+
@name = name
|
56
|
+
@profile = profile
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_s
|
60
|
+
"Missing data file for table #{@name} in #{@profile}."
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class EmptyDataFile < InvalidProfileException
|
65
|
+
def initialize(name, profile)
|
66
|
+
@name = name
|
67
|
+
@profile = profile
|
68
|
+
end
|
69
|
+
|
70
|
+
def to_s
|
71
|
+
"Empty data file for table #{@name} in #{@profile}."
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class MissingRelationsFile < InvalidProfileException
|
76
|
+
def initialize(directory)
|
77
|
+
@directory = directory
|
78
|
+
end
|
79
|
+
|
80
|
+
def to_s
|
81
|
+
"Missing relations file in #{@directory}."
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class InvalidRelationsFile < InvalidProfileException
|
86
|
+
def initialize(filename, linenum, line)
|
87
|
+
@filename = filename
|
88
|
+
@linenum = linenum
|
89
|
+
@line = line
|
90
|
+
end
|
91
|
+
|
92
|
+
def to_s
|
93
|
+
"Invalid line #{@linenum} #{@filename}\n#{@line}"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Given a file glob that picks out profiles, extract the specified numerical
|
98
|
+
# information, calculate statistics, and return a table sorted by mean.
|
99
|
+
#
|
100
|
+
# [_glob_] A file glob
|
101
|
+
def Delphin.summarize_folds(glob)
|
102
|
+
Pathname.glob(glob).select {|d| d.directory?}.collect do |d|
|
103
|
+
LOGGER.debug("Profile directory #{d}")
|
104
|
+
begin
|
105
|
+
s = Profile.new(d).statistics("fold", "f-accuracy")
|
106
|
+
rescue InvalidProfileException => e
|
107
|
+
LOGGER.error(e.message)
|
108
|
+
next
|
109
|
+
end
|
110
|
+
[s.mean, s.sdev, s.range, d.to_s]
|
111
|
+
# Filter out nils left in the list by handled exceptions.
|
112
|
+
end.select{|r| not r.nil?}.sort_by {|r| -r.first}
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
# The name of a Logon TSDB output profile.
|
117
|
+
#
|
118
|
+
# These are long strings with information about the features used to
|
119
|
+
# generate them in brackets.
|
120
|
+
class OutputProfileName < Struct.new(:prefix, :grandparenting,
|
121
|
+
:constituent_weight, :active_edges,
|
122
|
+
:ngram_size, :ngram_back_off,
|
123
|
+
:relative_tolerance, :variance)
|
124
|
+
# The basename of the profile
|
125
|
+
attr_reader :name
|
126
|
+
|
127
|
+
# Create a output profile name from a directory name
|
128
|
+
#
|
129
|
+
# Path information will be stripped from the name parameter.
|
130
|
+
#
|
131
|
+
# [_name_] A directory name
|
132
|
+
def initialize(name)
|
133
|
+
@name = File.basename(name)
|
134
|
+
@name =~ /\[(\S+)\]\s # 1 Prefix
|
135
|
+
GP\[(\d+)\]\s # 2 Grandparenting
|
136
|
+
[+-]PT\s
|
137
|
+
[+-]LEX\s
|
138
|
+
CW\[(\d*)\]\s # 3 Constituent weight
|
139
|
+
([+-])AE\s # 4 Active edges
|
140
|
+
NS\[(\d+)\]\s # 5 N-gram size
|
141
|
+
NT\[\w*\]\s
|
142
|
+
([+-])NB\s # 6 N-gram backoff
|
143
|
+
LM\[\d+\]\s
|
144
|
+
FT\[:::\d+\]\s
|
145
|
+
RS\[\]\s
|
146
|
+
MM\[\S+\]\s
|
147
|
+
MI\[\d+\]\s
|
148
|
+
RT\[(\d+(?:\.\d+)?e[+-]\d+)\]\s # 7 Relative tolerance
|
149
|
+
AT\[\d+(?:\.\d+)?e[+-]\d+\]\s
|
150
|
+
VA\[(\d+(?:\.\d+)?e[+-]\d+)?\]\s # 8 Variance
|
151
|
+
PC\[\d+\]
|
152
|
+
/x or
|
153
|
+
raise ArgumentError.new("Invalid profile name #{name}")
|
154
|
+
super($1, $2.to_i, $3.to_i, $4 == "+", $5.to_i, $6 == "+", $7.to_f,
|
155
|
+
$8.nil? ? nil : $8.to_f)
|
156
|
+
end
|
157
|
+
|
158
|
+
def to_s
|
159
|
+
"#{prefix}:#{feature_string}"
|
160
|
+
end
|
161
|
+
|
162
|
+
# Do this profile name and other have the all same machine learning
|
163
|
+
# features?
|
164
|
+
def equal_learner_features?(other)
|
165
|
+
[:grandparenting, :constituent_weight, :active_edges, :ngram_size,
|
166
|
+
:ngram_back_off, :relative_tolerance, :variance].all? do |feature|
|
167
|
+
send(feature) == other.send(feature)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# A compact and readable representation of all the feature values.
|
172
|
+
def feature_string
|
173
|
+
[:grandparenting, :constituent_weight, :active_edges, :ngram_size,
|
174
|
+
:ngram_back_off, :relative_tolerance, :variance].collect do |feature|
|
175
|
+
"#{feature.to_s}=#{send(feature)}"
|
176
|
+
end.join(",")
|
177
|
+
end
|
178
|
+
|
179
|
+
end # OutputProfileName
|
180
|
+
|
181
|
+
|
182
|
+
# A TSDB profile
|
183
|
+
class Profile
|
184
|
+
attr_reader :directory, :relations
|
185
|
+
|
186
|
+
def initialize(directory)
|
187
|
+
@directory = directory
|
188
|
+
begin
|
189
|
+
@relations = open(File.join(directory, "relations")) do |file|
|
190
|
+
RelationsFile.new(file)
|
191
|
+
end
|
192
|
+
rescue Errno::ENOENT
|
193
|
+
raise MissingRelationsFile.new(directory)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def inspect
|
198
|
+
"#{self.class}(#{directory})"
|
199
|
+
end
|
200
|
+
|
201
|
+
def to_s
|
202
|
+
inspect
|
203
|
+
end
|
204
|
+
|
205
|
+
# A list of all the tables in the profile
|
206
|
+
def tables
|
207
|
+
@relations.keys
|
208
|
+
end
|
209
|
+
|
210
|
+
# Open the specified table file.
|
211
|
+
def [](name)
|
212
|
+
ProfileTable.new(self, name, @relations[name])
|
213
|
+
end
|
214
|
+
|
215
|
+
# Retun mean and standard deviation for numeric values in the specified
|
216
|
+
# field.
|
217
|
+
def statistics(table, field)
|
218
|
+
s = self[table].collect {|r| r[field].to_f}
|
219
|
+
n = s.length
|
220
|
+
raise EmptyDataFile.new(table, self) if n.zero?
|
221
|
+
mean = (s.inject(0) {|sum, x| sum + x})/n
|
222
|
+
n = n-1 if n > 1
|
223
|
+
sdev = Math.sqrt((s.inject(0) {|sum, x| sum + (x-mean)**2})/n)
|
224
|
+
range = s.max - s.min
|
225
|
+
Struct.new(:mean, :sdev, :range).new(mean, sdev, range)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
|
230
|
+
# A data table in a TSDB profile.
|
231
|
+
class ProfileTable
|
232
|
+
include Enumerable
|
233
|
+
|
234
|
+
attr_reader :profile, :name, :schema, :filename
|
235
|
+
|
236
|
+
def initialize(profile, name, schema)
|
237
|
+
@profile = profile
|
238
|
+
@name = name
|
239
|
+
@schema = schema
|
240
|
+
# Find the table containing this table. It may be gzipped.
|
241
|
+
filename = File.join(profile.directory, name)
|
242
|
+
gzname = filename + ".gz"
|
243
|
+
if File.exist?(filename)
|
244
|
+
@filename = filename
|
245
|
+
@file = open(filename)
|
246
|
+
elsif File.exist?(gzname)
|
247
|
+
@filename = gzname
|
248
|
+
@file = Zlib::GzipReader.open(gzname)
|
249
|
+
else
|
250
|
+
raise MissingDataFile.new(@name, @profile)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
def inspect
|
255
|
+
"#{self.class}(#{name}) in #{profile}"
|
256
|
+
end
|
257
|
+
|
258
|
+
def to_s
|
259
|
+
inspect
|
260
|
+
end
|
261
|
+
|
262
|
+
# Enumerate the records in this table.
|
263
|
+
def each
|
264
|
+
@file.each do |line|
|
265
|
+
yield @schema.record(line.strip!)
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end # ProfileTable
|
269
|
+
|
270
|
+
|
271
|
+
# A database schema table in a profile.
|
272
|
+
#
|
273
|
+
# This is a list of field labels and their types.
|
274
|
+
class ProfileTableSchema < Array
|
275
|
+
attr_reader :name, :keys, :partials
|
276
|
+
|
277
|
+
def initialize(init_name)
|
278
|
+
super()
|
279
|
+
@name = init_name
|
280
|
+
@keys = Set.new
|
281
|
+
@partials = Set.new
|
282
|
+
end
|
283
|
+
|
284
|
+
# The string representation is identical to what appears in the relations
|
285
|
+
# file.
|
286
|
+
def to_s
|
287
|
+
"#{name}:\n" + collect do |field|
|
288
|
+
s = " #{field.label} :#{field.type}"
|
289
|
+
s += " :key" if is_key?(field.label)
|
290
|
+
s += " :partial" if is_partial?(field.label)
|
291
|
+
s
|
292
|
+
end.join("\n")
|
293
|
+
end
|
294
|
+
|
295
|
+
# Generate a data record from a line of text.
|
296
|
+
#
|
297
|
+
# A data record is a hash of field labels to values.
|
298
|
+
def record(text)
|
299
|
+
data_fields = text.split(/@/)
|
300
|
+
field_names = collect {|f| f.label}
|
301
|
+
field_types = collect {|f| f.type}
|
302
|
+
# Do type conversion if the field is of type integer.
|
303
|
+
data_fields = field_types.zip(data_fields).collect do |type, data|
|
304
|
+
case type
|
305
|
+
when :integer
|
306
|
+
data.to_i
|
307
|
+
else
|
308
|
+
data
|
309
|
+
end
|
310
|
+
end
|
311
|
+
Hash[*field_names.zip(data_fields).flatten]
|
312
|
+
end
|
313
|
+
|
314
|
+
# Add a new field and type.
|
315
|
+
def add_field(label, type, key = false, partial = false)
|
316
|
+
self.push(Struct.new(:label, :type).new(label, type))
|
317
|
+
@keys.add(label) if key
|
318
|
+
@partials.add(label) if partial
|
319
|
+
end
|
320
|
+
|
321
|
+
# Is the specified label a key?
|
322
|
+
def is_key?(label)
|
323
|
+
@keys.member?(label)
|
324
|
+
end
|
325
|
+
|
326
|
+
# Is the specified label a partial?
|
327
|
+
def is_partial?(label)
|
328
|
+
@partials.member?(label)
|
329
|
+
end
|
330
|
+
end # ProfileTableSchema
|
331
|
+
|
332
|
+
|
333
|
+
# A file that contains a set of database schema tables.
|
334
|
+
#
|
335
|
+
# This object is a hash of ProfileTableSchema objects indexed by table name.
|
336
|
+
class RelationsFile < Hash
|
337
|
+
def initialize(file)
|
338
|
+
super()
|
339
|
+
state = :outside_table
|
340
|
+
table_name = nil
|
341
|
+
file.each_with_index do |line,i|
|
342
|
+
# Remove comments and surrounding whitespace.
|
343
|
+
line.sub!(/#.*/, "")
|
344
|
+
line.strip!
|
345
|
+
case state
|
346
|
+
when :inside_table
|
347
|
+
if line.empty?
|
348
|
+
state = :outside_table
|
349
|
+
elsif line =~ /^(\S+)\s+:(\w+)(\s+:key)?(\s+:partial)?$/
|
350
|
+
# E.g. parse-id :integer :key
|
351
|
+
field, type = line.split
|
352
|
+
self[table_name].add_field($1, $2, !$3.nil?, !$4.nil?)
|
353
|
+
else
|
354
|
+
raise InvalidRelationsFile.new(filename, i+1, line)
|
355
|
+
end
|
356
|
+
when :outside_table
|
357
|
+
next if line.empty?
|
358
|
+
if line =~ /(\S+):/
|
359
|
+
# E.g. item:
|
360
|
+
table_name = $1
|
361
|
+
self[table_name] = ProfileTableSchema.new(table_name)
|
362
|
+
state = :inside_table
|
363
|
+
else
|
364
|
+
raise InvalidRelationsFile.new(filename, i+1, line)
|
365
|
+
end
|
366
|
+
end
|
367
|
+
end # each_with_index
|
368
|
+
end # initialize
|
369
|
+
|
370
|
+
# Print out a relations file
|
371
|
+
def to_s
|
372
|
+
values.map {|t| t.to_s}.join("\n\n")
|
373
|
+
end # to_s
|
374
|
+
end # RelationsFile
|
375
|
+
|
376
|
+
|
377
|
+
end # Delphin
|
@@ -0,0 +1,77 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#--
|
4
|
+
|
5
|
+
# Copyright 2009 William Patrick McNeill
|
6
|
+
#
|
7
|
+
# This file is part of DELPN-IN Ruby Utility Package.
|
8
|
+
#
|
9
|
+
# The DELPN-IN Ruby Utility Package is free software; you can redistribute it
|
10
|
+
# and/or modify it under the terms of the GNU General Public License as
|
11
|
+
# published by the Free Software Foundation; either version 2 of the License,
|
12
|
+
# or (at your option) any later version.
|
13
|
+
#
|
14
|
+
# The DELPN-IN Ruby Utility Package is distributed in the hope that it will be
|
15
|
+
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
17
|
+
# Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU General Public License along with
|
20
|
+
# editalign; if not, write to the Free Software Foundation, Inc., 51 Franklin
|
21
|
+
# St, Fifth Floor, Boston, MA 02110-1301 USA
|
22
|
+
#
|
23
|
+
#++
|
24
|
+
|
25
|
+
# Test cases for the Delphin module
|
26
|
+
|
27
|
+
require "test/unit"
|
28
|
+
require "delphin"
|
29
|
+
|
30
|
+
|
31
|
+
class Schema < Test::Unit::TestCase
|
32
|
+
def test_stub
|
33
|
+
Delphin::ProfileTableSchema.new("schema")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class TestOutputProfileName < Test::Unit::TestCase
|
38
|
+
def setup
|
39
|
+
@name1 = "[tanaka-train] GP[0] +PT -LEX CW[1] +AE NS[4] NT[type] +NB LM[0] FT[:::1] RS[] MM[tao_lmvm] MI[5000] RT[1.0e-8] AT[1.0e-20] VA[1.0e+0] PC[100]"
|
40
|
+
@profile1 = Delphin::OutputProfileName.new(@name1)
|
41
|
+
@name2 = "[jhpstg] GP[0] +PT -LEX CW[] -AE NS[3] NT[type] +NB LM[0] FT[:::1] RS[] MM[tao_lmvm] MI[5000] RT[1.0e-6] AT[1.0e-20] VA[1.0e-4] PC[100]"
|
42
|
+
@profile2 = Delphin::OutputProfileName.new(@name2)
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_valid_name
|
46
|
+
assert_equal(@name1, @profile1.name)
|
47
|
+
assert_equal("tanaka-train", @profile1.prefix)
|
48
|
+
assert_equal(0, @profile1.grandparenting)
|
49
|
+
assert_equal(1, @profile1.constituent_weight)
|
50
|
+
assert_equal(true, @profile1.active_edges)
|
51
|
+
assert_equal(4, @profile1.ngram_size)
|
52
|
+
assert_equal(true, @profile1.ngram_back_off)
|
53
|
+
assert_equal(1e-8, @profile1.relative_tolerance)
|
54
|
+
assert_equal(1, @profile1.variance)
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_empty_variance
|
58
|
+
name = "[jhpstg] GP[0] +PT -LEX CW[2] +AE NS[2] NT[type] +NB LM[0] FT[:::1] RS[] MM[tao_lmvm] MI[5000] RT[1.0e-6] AT[1.0e-20] VA[] PC[100]"
|
59
|
+
profile = Delphin::OutputProfileName.new(name)
|
60
|
+
assert_equal(nil, profile.variance)
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_feature_string
|
64
|
+
expected = "grandparenting=0,constituent_weight=1,active_edges=true,ngram_size=4,ngram_back_off=true,relative_tolerance=1.0e-08,variance=1.0"
|
65
|
+
assert_equal(expected, @profile1.feature_string)
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_feature_equivalence
|
69
|
+
assert(@profile1.equal_learner_features?(@profile1))
|
70
|
+
assert((not @profile1.equal_learner_features?(@profile2)))
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_invalid_name
|
74
|
+
assert_raise(ArgumentError) { Delphin::OutputProfileName.new("bogus") }
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: delphin
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- W.P. McNeill
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-09-23 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: |
|
17
|
+
This module is a Ruby wrapper for the DELPH-IN project.
|
18
|
+
|
19
|
+
email: billmcn@gmail.com
|
20
|
+
executables: []
|
21
|
+
|
22
|
+
extensions: []
|
23
|
+
|
24
|
+
extra_rdoc_files:
|
25
|
+
- README
|
26
|
+
files:
|
27
|
+
- test/test_delphin.rb
|
28
|
+
- lib/delphin.rb
|
29
|
+
- bin/cleanup-profile
|
30
|
+
- bin/score-profile
|
31
|
+
- README
|
32
|
+
has_rdoc: true
|
33
|
+
homepage: http://delphin.rubyforge.org/
|
34
|
+
licenses: []
|
35
|
+
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options:
|
38
|
+
- - --title
|
39
|
+
- Delphin -- DELPH-IN utilities
|
40
|
+
- --main
|
41
|
+
- README
|
42
|
+
- --line-numbers
|
43
|
+
- --inline-source
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: "0"
|
51
|
+
version:
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: "0"
|
57
|
+
version:
|
58
|
+
requirements: []
|
59
|
+
|
60
|
+
rubyforge_project: delphin
|
61
|
+
rubygems_version: 1.3.5
|
62
|
+
signing_key:
|
63
|
+
specification_version: 3
|
64
|
+
summary: Ruby utilities for the DELPH-IN project
|
65
|
+
test_files:
|
66
|
+
- test/test_delphin.rb
|