delphin 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +17 -0
- data/bin/cleanup-profile +67 -0
- data/bin/score-profile +61 -0
- data/lib/delphin.rb +377 -0
- data/test/test_delphin.rb +77 -0
- metadata +66 -0
data/README
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
= Ruby Utilities for DELPH-IN
|
2
|
+
|
3
|
+
This is a set of Ruby utilities for the {Delphin}[http://www.delph-in.net/] HPSG processing project.
|
4
|
+
|
5
|
+
= History
|
6
|
+
|
7
|
+
1.0.0:: Profile data structures
|
8
|
+
|
9
|
+
= Copyright
|
10
|
+
|
11
|
+
Copyright 2009, William Patrick McNeill
|
12
|
+
|
13
|
+
This program is distributed under the GNU General Public License.
|
14
|
+
|
15
|
+
= Author
|
16
|
+
|
17
|
+
W.P. McNeill mailto:billmcn@gmail.com
|
data/bin/cleanup-profile
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "delphin"
|
4
|
+
require "fileutils"
|
5
|
+
require "optparse"
|
6
|
+
|
7
|
+
|
8
|
+
# Display an error message and the generic help message and exit.
|
9
|
+
def error_exit(parser, error, exit_code = -1)
|
10
|
+
puts error
|
11
|
+
puts parser.help
|
12
|
+
exit(exit_code)
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
delete = false
|
17
|
+
|
18
|
+
parser = OptionParser.new do |opts|
|
19
|
+
opts.banner =<<-EOTEXT
|
20
|
+
#{File.basename(__FILE__)} [OPTION] glob [table table...]
|
21
|
+
|
22
|
+
List all invalid TSDB profiles matching the specified file glob and optionally delete them.
|
23
|
+
|
24
|
+
A directory is a valid profile if it contains a non-empty relations file. Other required non-empty table files may be specified as command line arguments.
|
25
|
+
|
26
|
+
By default this script merely prints the names of invalid profiles. It will also delete them if the delete switch is specified.
|
27
|
+
|
28
|
+
Bracket characters in globs must be escaped with two backslashes, e.g. \\\\[
|
29
|
+
EOTEXT
|
30
|
+
opts.on("-l", "--logging LEVEL", "Logging level") do |level|
|
31
|
+
Delphin.set_log_level(eval("Logger::#{level.upcase}"))
|
32
|
+
end
|
33
|
+
|
34
|
+
opts.on("-d", "--delete", "Delete invalid profiles") do
|
35
|
+
delete = true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
begin
|
40
|
+
parser.parse!
|
41
|
+
rescue OptionParser::ParseError => e
|
42
|
+
error_exit(parser, e)
|
43
|
+
end
|
44
|
+
|
45
|
+
if ARGV.length < 1
|
46
|
+
error_exit(parser, "Incorrect number of arguments.")
|
47
|
+
end
|
48
|
+
|
49
|
+
glob = ARGV.shift
|
50
|
+
table_names = ARGV
|
51
|
+
|
52
|
+
Pathname.glob(glob).select {|d| d.directory?}.collect do |d|
|
53
|
+
Delphin::LOGGER.debug("Profile directory #{d}")
|
54
|
+
begin
|
55
|
+
profile = Delphin::Profile.new(d)
|
56
|
+
table_names.each do |table_name|
|
57
|
+
table = profile[table_name]
|
58
|
+
if File.zero?(table.filename)
|
59
|
+
raise Delphin::EmptyDataFile.new(table_name, profile)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
rescue Delphin::InvalidProfileException => e
|
63
|
+
puts e.message
|
64
|
+
FileUtils.rm_rf(d) if delete
|
65
|
+
next
|
66
|
+
end
|
67
|
+
end
|
data/bin/score-profile
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "delphin"
|
4
|
+
require "optparse"
|
5
|
+
require "yaml"
|
6
|
+
|
7
|
+
|
8
|
+
# Display an error message and the generic help message and exit.
|
9
|
+
def error_exit(parser, error, exit_code = -1)
|
10
|
+
puts error
|
11
|
+
puts parser.help
|
12
|
+
exit(exit_code)
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
full_name = false
|
17
|
+
to_yaml = false
|
18
|
+
|
19
|
+
parser = OptionParser.new do |opts|
|
20
|
+
opts.banner =<<-EOTEXT
|
21
|
+
#{File.basename(__FILE__)} [OPTION] glob
|
22
|
+
|
23
|
+
Print a summary of the scores in all the profiles matching the specified file
|
24
|
+
glob.
|
25
|
+
|
26
|
+
Bracket characters in paths must be escaped with two backslashes, e.g. \\\\[
|
27
|
+
EOTEXT
|
28
|
+
opts.on("-l", "--logging LEVEL", "Logging level") do |level|
|
29
|
+
Delphin.set_log_level(eval("Logger::#{level.upcase}"))
|
30
|
+
end
|
31
|
+
|
32
|
+
opts.on("-f", "--full-name", "Print full directory name") do
|
33
|
+
full_name = true
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.on("-y", "--yaml", "Output results as YAML") do
|
37
|
+
to_yaml = true
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
begin
|
43
|
+
parser.parse!
|
44
|
+
rescue OptionParser::ParseError => e
|
45
|
+
error_exit(parser, e)
|
46
|
+
end
|
47
|
+
|
48
|
+
if not ARGV.length == 1
|
49
|
+
error_exit(parser, "Incorrect number of arguments.")
|
50
|
+
end
|
51
|
+
|
52
|
+
stats = Delphin.summarize_folds(File.expand_path(ARGV.first))
|
53
|
+
s = if to_yaml
|
54
|
+
YAML::dump(stats)
|
55
|
+
else
|
56
|
+
stats.collect do |stat|
|
57
|
+
stat[3] = File.basename(stat[3]) if not full_name
|
58
|
+
stat.join(" ")
|
59
|
+
end.join("\n")
|
60
|
+
end
|
61
|
+
puts s
|
data/lib/delphin.rb
ADDED
@@ -0,0 +1,377 @@
|
|
1
|
+
# Copyright 2009 William Patrick McNeill
|
2
|
+
#
|
3
|
+
# This file is part of DELPHN-IN Ruby Utility Package.
|
4
|
+
#
|
5
|
+
# The DELPHN-IN Ruby Utility Package is free software; you can redistribute it
|
6
|
+
# and/or modify it under the terms of the GNU General Public License as
|
7
|
+
# published by the Free Software Foundation; either version 2 of the License,
|
8
|
+
# or (at your option) any later version.
|
9
|
+
#
|
10
|
+
# The DELPHN-IN Ruby Utility Package is distributed in the hope that it will
|
11
|
+
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
13
|
+
# Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License along with
|
16
|
+
# editalign; if not, write to the Free Software Foundation, Inc., 51 Franklin
|
17
|
+
# St, Fifth Floor, Boston, MA 02110-1301 USA
|
18
|
+
|
19
|
+
require "logger"
|
20
|
+
require "pathname"
|
21
|
+
require "set"
|
22
|
+
|
23
|
+
# Utilities for use with the DELPH-IN project.
|
24
|
+
module Delphin
|
25
|
+
VERSION = "0.0.1"
|
26
|
+
|
27
|
+
# Create the logger and set its default log level to ERROR. This function
|
28
|
+
# is called when the module is loaded.
|
29
|
+
def Delphin.initialize_logger
|
30
|
+
logger = Logger.new(STDERR)
|
31
|
+
logger.level = Logger::ERROR
|
32
|
+
logger.datetime_format = "%Y-%m-%d %H:%M:%S"
|
33
|
+
logger
|
34
|
+
end
|
35
|
+
|
36
|
+
private_class_method :initialize_logger
|
37
|
+
|
38
|
+
# Logger used by all objects in this module. This is initialized at module
|
39
|
+
# load time. The default log level is ERROR.
|
40
|
+
LOGGER = initialize_logger
|
41
|
+
|
42
|
+
# Set the logging level. For example:
|
43
|
+
#
|
44
|
+
# > Delphin.set_log_level(Logger::DEBUG)
|
45
|
+
def Delphin.set_log_level(level)
|
46
|
+
Delphin::LOGGER.level = level
|
47
|
+
end
|
48
|
+
|
49
|
+
# Abstract base class for all exceptions raised by this module.
|
50
|
+
class InvalidProfileException < Exception
|
51
|
+
end
|
52
|
+
|
53
|
+
class MissingDataFile < InvalidProfileException
|
54
|
+
def initialize(name, profile)
|
55
|
+
@name = name
|
56
|
+
@profile = profile
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_s
|
60
|
+
"Missing data file for table #{@name} in #{@profile}."
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class EmptyDataFile < InvalidProfileException
|
65
|
+
def initialize(name, profile)
|
66
|
+
@name = name
|
67
|
+
@profile = profile
|
68
|
+
end
|
69
|
+
|
70
|
+
def to_s
|
71
|
+
"Empty data file for table #{@name} in #{@profile}."
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class MissingRelationsFile < InvalidProfileException
|
76
|
+
def initialize(directory)
|
77
|
+
@directory = directory
|
78
|
+
end
|
79
|
+
|
80
|
+
def to_s
|
81
|
+
"Missing relations file in #{@directory}."
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class InvalidRelationsFile < InvalidProfileException
|
86
|
+
def initialize(filename, linenum, line)
|
87
|
+
@filename = filename
|
88
|
+
@linenum = linenum
|
89
|
+
@line = line
|
90
|
+
end
|
91
|
+
|
92
|
+
def to_s
|
93
|
+
"Invalid line #{@linenum} #{@filename}\n#{@line}"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Given a file glob that picks out profiles, extract the specified numerical
|
98
|
+
# information, calculate statistics, and return a table sorted by mean.
|
99
|
+
#
|
100
|
+
# [_glob_] A file glob
|
101
|
+
def Delphin.summarize_folds(glob)
|
102
|
+
Pathname.glob(glob).select {|d| d.directory?}.collect do |d|
|
103
|
+
LOGGER.debug("Profile directory #{d}")
|
104
|
+
begin
|
105
|
+
s = Profile.new(d).statistics("fold", "f-accuracy")
|
106
|
+
rescue InvalidProfileException => e
|
107
|
+
LOGGER.error(e.message)
|
108
|
+
next
|
109
|
+
end
|
110
|
+
[s.mean, s.sdev, s.range, d.to_s]
|
111
|
+
# Filter out nils left in the list by handled exceptions.
|
112
|
+
end.select{|r| not r.nil?}.sort_by {|r| -r.first}
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
# The name of a Logon TSDB output profile.
|
117
|
+
#
|
118
|
+
# These are long strings with information about the features used to
|
119
|
+
# generate them in brackets.
|
120
|
+
class OutputProfileName < Struct.new(:prefix, :grandparenting,
|
121
|
+
:constituent_weight, :active_edges,
|
122
|
+
:ngram_size, :ngram_back_off,
|
123
|
+
:relative_tolerance, :variance)
|
124
|
+
# The basename of the profile
|
125
|
+
attr_reader :name
|
126
|
+
|
127
|
+
# Create a output profile name from a directory name
|
128
|
+
#
|
129
|
+
# Path information will be stripped from the name parameter.
|
130
|
+
#
|
131
|
+
# [_name_] A directory name
|
132
|
+
def initialize(name)
|
133
|
+
@name = File.basename(name)
|
134
|
+
@name =~ /\[(\S+)\]\s # 1 Prefix
|
135
|
+
GP\[(\d+)\]\s # 2 Grandparenting
|
136
|
+
[+-]PT\s
|
137
|
+
[+-]LEX\s
|
138
|
+
CW\[(\d*)\]\s # 3 Constituent weight
|
139
|
+
([+-])AE\s # 4 Active edges
|
140
|
+
NS\[(\d+)\]\s # 5 N-gram size
|
141
|
+
NT\[\w*\]\s
|
142
|
+
([+-])NB\s # 6 N-gram backoff
|
143
|
+
LM\[\d+\]\s
|
144
|
+
FT\[:::\d+\]\s
|
145
|
+
RS\[\]\s
|
146
|
+
MM\[\S+\]\s
|
147
|
+
MI\[\d+\]\s
|
148
|
+
RT\[(\d+(?:\.\d+)?e[+-]\d+)\]\s # 7 Relative tolerance
|
149
|
+
AT\[\d+(?:\.\d+)?e[+-]\d+\]\s
|
150
|
+
VA\[(\d+(?:\.\d+)?e[+-]\d+)?\]\s # 8 Variance
|
151
|
+
PC\[\d+\]
|
152
|
+
/x or
|
153
|
+
raise ArgumentError.new("Invalid profile name #{name}")
|
154
|
+
super($1, $2.to_i, $3.to_i, $4 == "+", $5.to_i, $6 == "+", $7.to_f,
|
155
|
+
$8.nil? ? nil : $8.to_f)
|
156
|
+
end
|
157
|
+
|
158
|
+
def to_s
|
159
|
+
"#{prefix}:#{feature_string}"
|
160
|
+
end
|
161
|
+
|
162
|
+
# Do this profile name and other have the all same machine learning
|
163
|
+
# features?
|
164
|
+
def equal_learner_features?(other)
|
165
|
+
[:grandparenting, :constituent_weight, :active_edges, :ngram_size,
|
166
|
+
:ngram_back_off, :relative_tolerance, :variance].all? do |feature|
|
167
|
+
send(feature) == other.send(feature)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# A compact and readable representation of all the feature values.
|
172
|
+
def feature_string
|
173
|
+
[:grandparenting, :constituent_weight, :active_edges, :ngram_size,
|
174
|
+
:ngram_back_off, :relative_tolerance, :variance].collect do |feature|
|
175
|
+
"#{feature.to_s}=#{send(feature)}"
|
176
|
+
end.join(",")
|
177
|
+
end
|
178
|
+
|
179
|
+
end # OutputProfileName
|
180
|
+
|
181
|
+
|
182
|
+
# A TSDB profile
|
183
|
+
class Profile
|
184
|
+
attr_reader :directory, :relations
|
185
|
+
|
186
|
+
def initialize(directory)
|
187
|
+
@directory = directory
|
188
|
+
begin
|
189
|
+
@relations = open(File.join(directory, "relations")) do |file|
|
190
|
+
RelationsFile.new(file)
|
191
|
+
end
|
192
|
+
rescue Errno::ENOENT
|
193
|
+
raise MissingRelationsFile.new(directory)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def inspect
|
198
|
+
"#{self.class}(#{directory})"
|
199
|
+
end
|
200
|
+
|
201
|
+
def to_s
|
202
|
+
inspect
|
203
|
+
end
|
204
|
+
|
205
|
+
# A list of all the tables in the profile
|
206
|
+
def tables
|
207
|
+
@relations.keys
|
208
|
+
end
|
209
|
+
|
210
|
+
# Open the specified table file.
|
211
|
+
def [](name)
|
212
|
+
ProfileTable.new(self, name, @relations[name])
|
213
|
+
end
|
214
|
+
|
215
|
+
# Retun mean and standard deviation for numeric values in the specified
|
216
|
+
# field.
|
217
|
+
def statistics(table, field)
|
218
|
+
s = self[table].collect {|r| r[field].to_f}
|
219
|
+
n = s.length
|
220
|
+
raise EmptyDataFile.new(table, self) if n.zero?
|
221
|
+
mean = (s.inject(0) {|sum, x| sum + x})/n
|
222
|
+
n = n-1 if n > 1
|
223
|
+
sdev = Math.sqrt((s.inject(0) {|sum, x| sum + (x-mean)**2})/n)
|
224
|
+
range = s.max - s.min
|
225
|
+
Struct.new(:mean, :sdev, :range).new(mean, sdev, range)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
|
230
|
+
# A data table in a TSDB profile.
|
231
|
+
class ProfileTable
|
232
|
+
include Enumerable
|
233
|
+
|
234
|
+
attr_reader :profile, :name, :schema, :filename
|
235
|
+
|
236
|
+
def initialize(profile, name, schema)
|
237
|
+
@profile = profile
|
238
|
+
@name = name
|
239
|
+
@schema = schema
|
240
|
+
# Find the table containing this table. It may be gzipped.
|
241
|
+
filename = File.join(profile.directory, name)
|
242
|
+
gzname = filename + ".gz"
|
243
|
+
if File.exist?(filename)
|
244
|
+
@filename = filename
|
245
|
+
@file = open(filename)
|
246
|
+
elsif File.exist?(gzname)
|
247
|
+
@filename = gzname
|
248
|
+
@file = Zlib::GzipReader.open(gzname)
|
249
|
+
else
|
250
|
+
raise MissingDataFile.new(@name, @profile)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
def inspect
|
255
|
+
"#{self.class}(#{name}) in #{profile}"
|
256
|
+
end
|
257
|
+
|
258
|
+
def to_s
|
259
|
+
inspect
|
260
|
+
end
|
261
|
+
|
262
|
+
# Enumerate the records in this table.
|
263
|
+
def each
|
264
|
+
@file.each do |line|
|
265
|
+
yield @schema.record(line.strip!)
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end # ProfileTable
|
269
|
+
|
270
|
+
|
271
|
+
# A database schema table in a profile.
|
272
|
+
#
|
273
|
+
# This is a list of field labels and their types.
|
274
|
+
class ProfileTableSchema < Array
|
275
|
+
attr_reader :name, :keys, :partials
|
276
|
+
|
277
|
+
def initialize(init_name)
|
278
|
+
super()
|
279
|
+
@name = init_name
|
280
|
+
@keys = Set.new
|
281
|
+
@partials = Set.new
|
282
|
+
end
|
283
|
+
|
284
|
+
# The string representation is identical to what appears in the relations
|
285
|
+
# file.
|
286
|
+
def to_s
|
287
|
+
"#{name}:\n" + collect do |field|
|
288
|
+
s = " #{field.label} :#{field.type}"
|
289
|
+
s += " :key" if is_key?(field.label)
|
290
|
+
s += " :partial" if is_partial?(field.label)
|
291
|
+
s
|
292
|
+
end.join("\n")
|
293
|
+
end
|
294
|
+
|
295
|
+
# Generate a data record from a line of text.
|
296
|
+
#
|
297
|
+
# A data record is a hash of field labels to values.
|
298
|
+
def record(text)
|
299
|
+
data_fields = text.split(/@/)
|
300
|
+
field_names = collect {|f| f.label}
|
301
|
+
field_types = collect {|f| f.type}
|
302
|
+
# Do type conversion if the field is of type integer.
|
303
|
+
data_fields = field_types.zip(data_fields).collect do |type, data|
|
304
|
+
case type
|
305
|
+
when :integer
|
306
|
+
data.to_i
|
307
|
+
else
|
308
|
+
data
|
309
|
+
end
|
310
|
+
end
|
311
|
+
Hash[*field_names.zip(data_fields).flatten]
|
312
|
+
end
|
313
|
+
|
314
|
+
# Add a new field and type.
|
315
|
+
def add_field(label, type, key = false, partial = false)
|
316
|
+
self.push(Struct.new(:label, :type).new(label, type))
|
317
|
+
@keys.add(label) if key
|
318
|
+
@partials.add(label) if partial
|
319
|
+
end
|
320
|
+
|
321
|
+
# Is the specified label a key?
|
322
|
+
def is_key?(label)
|
323
|
+
@keys.member?(label)
|
324
|
+
end
|
325
|
+
|
326
|
+
# Is the specified label a partial?
|
327
|
+
def is_partial?(label)
|
328
|
+
@partials.member?(label)
|
329
|
+
end
|
330
|
+
end # ProfileTableSchema
|
331
|
+
|
332
|
+
|
333
|
+
# A file that contains a set of database schema tables.
|
334
|
+
#
|
335
|
+
# This object is a hash of ProfileTableSchema objects indexed by table name.
|
336
|
+
class RelationsFile < Hash
|
337
|
+
def initialize(file)
|
338
|
+
super()
|
339
|
+
state = :outside_table
|
340
|
+
table_name = nil
|
341
|
+
file.each_with_index do |line,i|
|
342
|
+
# Remove comments and surrounding whitespace.
|
343
|
+
line.sub!(/#.*/, "")
|
344
|
+
line.strip!
|
345
|
+
case state
|
346
|
+
when :inside_table
|
347
|
+
if line.empty?
|
348
|
+
state = :outside_table
|
349
|
+
elsif line =~ /^(\S+)\s+:(\w+)(\s+:key)?(\s+:partial)?$/
|
350
|
+
# E.g. parse-id :integer :key
|
351
|
+
field, type = line.split
|
352
|
+
self[table_name].add_field($1, $2, !$3.nil?, !$4.nil?)
|
353
|
+
else
|
354
|
+
raise InvalidRelationsFile.new(filename, i+1, line)
|
355
|
+
end
|
356
|
+
when :outside_table
|
357
|
+
next if line.empty?
|
358
|
+
if line =~ /(\S+):/
|
359
|
+
# E.g. item:
|
360
|
+
table_name = $1
|
361
|
+
self[table_name] = ProfileTableSchema.new(table_name)
|
362
|
+
state = :inside_table
|
363
|
+
else
|
364
|
+
raise InvalidRelationsFile.new(filename, i+1, line)
|
365
|
+
end
|
366
|
+
end
|
367
|
+
end # each_with_index
|
368
|
+
end # initialize
|
369
|
+
|
370
|
+
# Print out a relations file
|
371
|
+
def to_s
|
372
|
+
values.map {|t| t.to_s}.join("\n\n")
|
373
|
+
end # to_s
|
374
|
+
end # RelationsFile
|
375
|
+
|
376
|
+
|
377
|
+
end # Delphin
|
@@ -0,0 +1,77 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#--
|
4
|
+
|
5
|
+
# Copyright 2009 William Patrick McNeill
|
6
|
+
#
|
7
|
+
# This file is part of DELPN-IN Ruby Utility Package.
|
8
|
+
#
|
9
|
+
# The DELPN-IN Ruby Utility Package is free software; you can redistribute it
|
10
|
+
# and/or modify it under the terms of the GNU General Public License as
|
11
|
+
# published by the Free Software Foundation; either version 2 of the License,
|
12
|
+
# or (at your option) any later version.
|
13
|
+
#
|
14
|
+
# The DELPN-IN Ruby Utility Package is distributed in the hope that it will be
|
15
|
+
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
17
|
+
# Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU General Public License along with
|
20
|
+
# editalign; if not, write to the Free Software Foundation, Inc., 51 Franklin
|
21
|
+
# St, Fifth Floor, Boston, MA 02110-1301 USA
|
22
|
+
#
|
23
|
+
#++
|
24
|
+
|
25
|
+
# Test cases for the Delphin module
|
26
|
+
|
27
|
+
require "test/unit"
|
28
|
+
require "delphin"
|
29
|
+
|
30
|
+
|
31
|
+
class Schema < Test::Unit::TestCase
|
32
|
+
def test_stub
|
33
|
+
Delphin::ProfileTableSchema.new("schema")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class TestOutputProfileName < Test::Unit::TestCase
|
38
|
+
def setup
|
39
|
+
@name1 = "[tanaka-train] GP[0] +PT -LEX CW[1] +AE NS[4] NT[type] +NB LM[0] FT[:::1] RS[] MM[tao_lmvm] MI[5000] RT[1.0e-8] AT[1.0e-20] VA[1.0e+0] PC[100]"
|
40
|
+
@profile1 = Delphin::OutputProfileName.new(@name1)
|
41
|
+
@name2 = "[jhpstg] GP[0] +PT -LEX CW[] -AE NS[3] NT[type] +NB LM[0] FT[:::1] RS[] MM[tao_lmvm] MI[5000] RT[1.0e-6] AT[1.0e-20] VA[1.0e-4] PC[100]"
|
42
|
+
@profile2 = Delphin::OutputProfileName.new(@name2)
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_valid_name
|
46
|
+
assert_equal(@name1, @profile1.name)
|
47
|
+
assert_equal("tanaka-train", @profile1.prefix)
|
48
|
+
assert_equal(0, @profile1.grandparenting)
|
49
|
+
assert_equal(1, @profile1.constituent_weight)
|
50
|
+
assert_equal(true, @profile1.active_edges)
|
51
|
+
assert_equal(4, @profile1.ngram_size)
|
52
|
+
assert_equal(true, @profile1.ngram_back_off)
|
53
|
+
assert_equal(1e-8, @profile1.relative_tolerance)
|
54
|
+
assert_equal(1, @profile1.variance)
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_empty_variance
|
58
|
+
name = "[jhpstg] GP[0] +PT -LEX CW[2] +AE NS[2] NT[type] +NB LM[0] FT[:::1] RS[] MM[tao_lmvm] MI[5000] RT[1.0e-6] AT[1.0e-20] VA[] PC[100]"
|
59
|
+
profile = Delphin::OutputProfileName.new(name)
|
60
|
+
assert_equal(nil, profile.variance)
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_feature_string
|
64
|
+
expected = "grandparenting=0,constituent_weight=1,active_edges=true,ngram_size=4,ngram_back_off=true,relative_tolerance=1.0e-08,variance=1.0"
|
65
|
+
assert_equal(expected, @profile1.feature_string)
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_feature_equivalence
|
69
|
+
assert(@profile1.equal_learner_features?(@profile1))
|
70
|
+
assert((not @profile1.equal_learner_features?(@profile2)))
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_invalid_name
|
74
|
+
assert_raise(ArgumentError) { Delphin::OutputProfileName.new("bogus") }
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: delphin
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- W.P. McNeill
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-09-23 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: |
|
17
|
+
This module is a Ruby wrapper for the DELPH-IN project.
|
18
|
+
|
19
|
+
email: billmcn@gmail.com
|
20
|
+
executables: []
|
21
|
+
|
22
|
+
extensions: []
|
23
|
+
|
24
|
+
extra_rdoc_files:
|
25
|
+
- README
|
26
|
+
files:
|
27
|
+
- test/test_delphin.rb
|
28
|
+
- lib/delphin.rb
|
29
|
+
- bin/cleanup-profile
|
30
|
+
- bin/score-profile
|
31
|
+
- README
|
32
|
+
has_rdoc: true
|
33
|
+
homepage: http://delphin.rubyforge.org/
|
34
|
+
licenses: []
|
35
|
+
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options:
|
38
|
+
- - --title
|
39
|
+
- Delphin -- DELPH-IN utilities
|
40
|
+
- --main
|
41
|
+
- README
|
42
|
+
- --line-numbers
|
43
|
+
- --inline-source
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: "0"
|
51
|
+
version:
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: "0"
|
57
|
+
version:
|
58
|
+
requirements: []
|
59
|
+
|
60
|
+
rubyforge_project: delphin
|
61
|
+
rubygems_version: 1.3.5
|
62
|
+
signing_key:
|
63
|
+
specification_version: 3
|
64
|
+
summary: Ruby utilities for the DELPH-IN project
|
65
|
+
test_files:
|
66
|
+
- test/test_delphin.rb
|