GECS 1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +10 -0
- data/lib/GECS.rb +754 -0
- data/test/test_GECS.rb +46 -0
- metadata +84 -0
data/Rakefile
ADDED
data/lib/GECS.rb
ADDED
@@ -0,0 +1,754 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# Gem for Experimental Computer Science
|
3
|
+
|
4
|
+
# Gem for Experimental Computer Science
|
5
|
+
#
|
6
|
+
# Author:: David Flater <dflater@nist.gov>
|
7
|
+
# Copyright:: Public domain
|
8
|
+
# License:: Unlicense
|
9
|
+
#
|
10
|
+
# This software is experimental. NIST assumes no responsibility whatsoever
|
11
|
+
# for its use by other parties and makes no guarantees, expressed or
|
12
|
+
# implied, about its quality, reliability, or any other characteristic.
|
13
|
+
#
|
14
|
+
# == Conventions used within the GECS module
|
15
|
+
#
|
16
|
+
# The following abbreviations are used:
|
17
|
+
# Parm:: parameter
|
18
|
+
# Est:: estimation or estimate
|
19
|
+
# Inv:: interval
|
20
|
+
# Ind:: independent
|
21
|
+
# Dep:: dependent
|
22
|
+
# Var:: variable
|
23
|
+
# Treat:: treatment (a vector of factor levels)
|
24
|
+
#
|
25
|
+
# Arrays of character strings (or in one case, ParmDef structs) are used as a
|
26
|
+
# kind of "enum." Values of the enum pseudo-type are unsigned integers that
|
27
|
+
# simply index the array to provide a concise identifier for whatever the
|
28
|
+
# referenced character string (or ParmDef) describes.
|
29
|
+
#
|
30
|
+
# A good many helper methods that should not be exposed through the GECS API
|
31
|
+
# unfortunately are exposed and get listed by rdoc. These have been tagged
|
32
|
+
# (Private) in their comments.
|
33
|
+
#
|
34
|
+
# Rdoc lists struct definitions in the Constants section.
|
35
|
+
#
|
36
|
+
# == Dependencies
|
37
|
+
#
|
38
|
+
# To use the ::bootstrapMeans method:
|
39
|
+
# * The Ruby gem parallel[rubygems.org/gems/parallel] must be installed.
|
40
|
+
# Tested version: 0.9.2.
|
41
|
+
# * The R[www.r-project.org] environment for statistical computing must be
|
42
|
+
# runnable from a shell command line. Tested version: 3.1.0.
|
43
|
+
# * The R package bootBCa[bootbca.r-forge.r-project.org] must be installed.
|
44
|
+
# Tested version: 1.0.
|
45
|
+
#
|
46
|
+
# To use the ::quickMeans method:
|
47
|
+
# * The Ruby gem statistics2[rubygems.org/gems/statistics2] must be installed.
|
48
|
+
# Tested version: 0.54.
|
49
|
+
#
|
50
|
+
# Both ::bootstrapMeans and ::quickMeans use set, a standard
|
51
|
+
# pre-installed class.
|
52
|
+
#
|
53
|
+
# == Bad behaviors
|
54
|
+
#
|
55
|
+
# ::bootstrapMeans writes temporary files into the current working directory.
|
56
|
+
# Normally, they will be deleted when no longer in use.
|
57
|
+
#
|
58
|
+
# The inverse t distribution in statistics2 that is used by ::quickMeans
|
59
|
+
# agrees with R and Octave only to 4 decimals or so.
|
60
|
+
|
61
|
+
module GECS
|
62
|
+
|
63
|
+
# Integer constant indicating the version of GECS that has been loaded.
|
64
|
+
# There is no major/minor/patchlevel encoding. It just increments.
|
65
|
+
Version = 1
|
66
|
+
|
67
|
+
# Array of character strings (pseudo-enum definition) used to identify
|
68
|
+
# parameters.
|
69
|
+
#
|
70
|
+
# Parameters are theoretically fixed but unknown metrics of a population
|
71
|
+
# that is bigger than the sample. They can only be estimated, and with
|
72
|
+
# estimation methods potentially being computationally expensive and
|
73
|
+
# complex, it may be important to preserve those estimates.
|
74
|
+
#
|
75
|
+
# - mean
|
76
|
+
# - standard deviation
|
77
|
+
# - variance
|
78
|
+
#
|
79
|
+
# This is a non-prescriptive definition for example or default use.
|
80
|
+
# Each export file encapsulates its own definitions.
|
81
|
+
Parms = [
|
82
|
+
"mean",
|
83
|
+
"standard deviation",
|
84
|
+
"variance"
|
85
|
+
]
|
86
|
+
|
87
|
+
# Array of character strings (pseudo-enum definition) used to identify
|
88
|
+
# estimation methods.
|
89
|
+
#
|
90
|
+
# - original
|
91
|
+
# - bootstrap, percentile interval
|
92
|
+
# - bootstrap, BCa interval
|
93
|
+
# - bootstrap-t interval
|
94
|
+
#
|
95
|
+
# This is a non-prescriptive definition for example or default use.
|
96
|
+
# Each export file encapsulates its own definitions.
|
97
|
+
EstMethods = [
|
98
|
+
"original", # Standard formulae using normal approximations.
|
99
|
+
"bootstrap, percentile interval",
|
100
|
+
"bootstrap, BCa interval",
|
101
|
+
"bootstrap-t interval" # A.k.a. studentized bootstrap
|
102
|
+
]
|
103
|
+
|
104
|
+
# Array of character strings (pseudo-enum definition) used to identify
|
105
|
+
# parameters of estimation methods.
|
106
|
+
#
|
107
|
+
# Different estimation methods will have different parameters.
|
108
|
+
# Interval type matters for asymmetrical distributions.
|
109
|
+
# Nested bootstrap replica count is used for bootstrap-t.
|
110
|
+
# An adaptive bootstrap may vary the replica count to achieve a precision
|
111
|
+
# specified as a numerical tolerance.
|
112
|
+
#
|
113
|
+
# - coverage probability
|
114
|
+
# - interval type
|
115
|
+
# - bootstrap replica count
|
116
|
+
# - nested bootstrap replica count
|
117
|
+
# - numerical tolerance of adaptive bootstrap
|
118
|
+
# - estimated attained precision of adaptive bootstrap
|
119
|
+
#
|
120
|
+
# This is a non-prescriptive definition for example or default use.
|
121
|
+
# Each export file encapsulates its own definitions.
|
122
|
+
EstMethodParms = [
|
123
|
+
"coverage probability",
|
124
|
+
"interval type",
|
125
|
+
"bootstrap replica count",
|
126
|
+
"nested bootstrap replica count",
|
127
|
+
"numerical tolerance of adaptive bootstrap",
|
128
|
+
"estimated attained precision of adaptive bootstrap"
|
129
|
+
]
|
130
|
+
|
131
|
+
# Array of character strings (pseudo-enum definition) used to identify
|
132
|
+
# types of confidence intervals.
|
133
|
+
#
|
134
|
+
# - probabilistically symmetric
|
135
|
+
# - shortest
|
136
|
+
#
|
137
|
+
# This is a non-prescriptive definition for example or default use.
|
138
|
+
# Each export file encapsulates its own definitions.
|
139
|
+
InvTypes = [
|
140
|
+
"probabilistically symmetric", # A.k.a. equi-tailed.
|
141
|
+
"shortest"
|
142
|
+
]
|
143
|
+
|
144
|
+
# --------------------------------------------------------------------------
|
145
|
+
|
146
|
+
# Normative struct definition.
|
147
|
+
#
|
148
|
+
# A ParmDef is a "parameterized parameter" providing additional context to
|
149
|
+
# disambiguate alternative ways of estimating the parameter. Additional
|
150
|
+
# result-specific context can be included in the ParmData if necessary.
|
151
|
+
#
|
152
|
+
# parm:: Index into parms.
|
153
|
+
# estMethod:: Index into estMethods.
|
154
|
+
# estMethodParms:: Hash from estMethodParms index to values.
|
155
|
+
ParmDef = Struct.new(
|
156
|
+
:parm,
|
157
|
+
:estMethod,
|
158
|
+
:estMethodParms
|
159
|
+
)
|
160
|
+
|
161
|
+
# Normative struct definition.
|
162
|
+
#
|
163
|
+
# est:: Value (or, if necessary, an array of values).
|
164
|
+
# lo:: Bounds are presumed inclusive unless infinite (or nil).
|
165
|
+
# hi:: Bounds are presumed inclusive unless infinite (or nil).
|
166
|
+
# estMethodParms:: Result-specific context (or nil).
|
167
|
+
ParmData = Struct.new(
|
168
|
+
:est,
|
169
|
+
:lo, :hi,
|
170
|
+
:estMethodParms
|
171
|
+
)
|
172
|
+
|
173
|
+
# Normative struct definition.
|
174
|
+
#
|
175
|
+
# id:: Primary key.
|
176
|
+
# indVars:: Array of factor identifiers (enum style).
|
177
|
+
# depVars:: Array of output variable identifiers (enum style).
|
178
|
+
# description:: Everything else as verbose text.
|
179
|
+
Experiment = Struct.new(
|
180
|
+
:id,
|
181
|
+
:indVars,
|
182
|
+
:depVars,
|
183
|
+
:description
|
184
|
+
) do
|
185
|
+
def to_s
|
186
|
+
"Experiment id " + id.to_s + ": " + description.to_s + "\n" +
|
187
|
+
" Independent variables: " + indVars.to_s + "\n" +
|
188
|
+
" Dependent variables: " + depVars.to_s
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# Normative struct definition.
|
193
|
+
#
|
194
|
+
# A Key is used to retrieve either data or parameter estimates.
|
195
|
+
#
|
196
|
+
# experimentId:: References experiments.
|
197
|
+
# treat:: Array of factor values ordered per indVars. For data, all values
|
198
|
+
# must be specified. For parms, nil works like a wildcard. E.g.,
|
199
|
+
# for main effects, only one factor will have a specified level and
|
200
|
+
# all others will be nil. Factor values (levels) are not
|
201
|
+
# necessarily numeric.
|
202
|
+
Key = Struct.new(
|
203
|
+
:experimentId,
|
204
|
+
:treat
|
205
|
+
)
|
206
|
+
|
207
|
+
# Normative struct definition.
|
208
|
+
#
|
209
|
+
# BagOfHolding is a bag containing everything that is loaded from or saved
|
210
|
+
# to an export file except for the GECS version number.
|
211
|
+
#
|
212
|
+
# parms:: Array of strings defining pseudo-enum as used in a given data file. See Parms.
|
213
|
+
# estMethods:: Array of strings defining pseudo-enum as used in a given data file. See EstMethods.
|
214
|
+
# estMethodParms:: Array of strings defining pseudo-enum as used in a given data file. See EstMethodParms.
|
215
|
+
# invTypes:: Array of strings defining pseudo-enum as used in a given data file. See InvTypes.
|
216
|
+
# parmDefs:: Array of ParmDef structs defining pseudo-enum as used in a given data file.
|
217
|
+
# experiments:: Array of Experiment structs (index by experiment id).
|
218
|
+
# data:: Hash from Key to array (per depVars) of arrays (measurement values in chronological order).
|
219
|
+
# ests:: Hash from Key to array (per depVars) of hashes (from parmDefs index to ParmData).
|
220
|
+
BagOfHolding = Struct.new(
|
221
|
+
:parms, :estMethods, :estMethodParms, :invTypes,
|
222
|
+
:parmDefs,
|
223
|
+
:experiments,
|
224
|
+
:data,
|
225
|
+
:ests
|
226
|
+
) do
|
227
|
+
def to_s
|
228
|
+
experiments.join("\n")
|
229
|
+
end
|
230
|
+
|
231
|
+
# Methods to retrieve or create enum values. If a requested value does
|
232
|
+
# not already exist, it is added without fanfare.
|
233
|
+
def getOrAdd(array,name)
|
234
|
+
raise "Can't add to nil" if array.nil?
|
235
|
+
i = array.index(name)
|
236
|
+
if i.nil?
|
237
|
+
array.push(name)
|
238
|
+
array.length-1
|
239
|
+
else
|
240
|
+
i
|
241
|
+
end
|
242
|
+
end
|
243
|
+
def getOrAddParm(name)
|
244
|
+
self.parms ||= Array.new
|
245
|
+
getOrAdd(parms,name)
|
246
|
+
end
|
247
|
+
def getOrAddEstMethod(name)
|
248
|
+
self.estMethods ||= Array.new
|
249
|
+
getOrAdd(estMethods,name)
|
250
|
+
end
|
251
|
+
def getOrAddEstMethodParm(name)
|
252
|
+
self.estMethodParms ||= Array.new
|
253
|
+
getOrAdd(estMethodParms,name)
|
254
|
+
end
|
255
|
+
def getOrAddInvType(name)
|
256
|
+
self.invTypes ||= Array.new
|
257
|
+
getOrAdd(invTypes,name)
|
258
|
+
end
|
259
|
+
def getOrAddParmDef(parmDef)
|
260
|
+
self.parmDefs ||= Array.new
|
261
|
+
getOrAdd(parmDefs,parmDef)
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
# Normative struct definition.
|
266
|
+
#
|
267
|
+
# DoubleBag is a bag containing a GECS version number and a BagOfHolding.
|
268
|
+
# The format version identifier is added/removed by save/load.
|
269
|
+
DoubleBag = Struct.new(:version, :bagOfHolding)
|
270
|
+
|
271
|
+
# --------------------------------------------------------------------------
|
272
|
+
|
273
|
+
# Convenience methods.
|
274
|
+
|
275
|
+
# Save a GECS database to a file.
|
276
|
+
#
|
277
|
+
# bagOfHolding:: A BagOfHolding.
|
278
|
+
def GECS.save(filename, bagOfHolding)
|
279
|
+
Marshal.dump(DoubleBag.new(Version,bagOfHolding), open(filename,"w"))
|
280
|
+
end
|
281
|
+
|
282
|
+
# Load a GECS database from a file. Returns a BagOfHolding.
|
283
|
+
def GECS.load(filename)
|
284
|
+
temp = Marshal.load(File.open(filename,"r"))
|
285
|
+
if temp.version > Version
|
286
|
+
raise "File format version is later than GECS.rb version"
|
287
|
+
end
|
288
|
+
temp.bagOfHolding
|
289
|
+
end
|
290
|
+
|
291
|
+
# Simplify creation of a new database by nilling out the enums. Since
|
292
|
+
# enums are looked up using a find-or-create pattern, there is no harm in
|
293
|
+
# starting with nils.
|
294
|
+
#
|
295
|
+
# experiments:: Array of Experiment structs (index by experiment id).
|
296
|
+
# data:: Hash from Key to array (per depVars) of arrays (measurement values in chronological order).
|
297
|
+
def GECS.newBag(experiments,data)
|
298
|
+
BagOfHolding.new(nil, nil, nil, nil, nil, experiments, data, nil)
|
299
|
+
end
|
300
|
+
|
301
|
+
# Dump an experiment's raw data (not parameter estimates) as an R table
|
302
|
+
# (with header) with a column for each dependent variable and N rows for
|
303
|
+
# each treatment. Short and missing series are padded with NAs.
|
304
|
+
def GECS.dumpData(bagOfHolding,id)
|
305
|
+
throw "Bag is nil" if bagOfHolding.nil?
|
306
|
+
throw "Experiments are nil" if bagOfHolding.experiments.nil?
|
307
|
+
throw "No such experiment" if id >= bagOfHolding.experiments.length
|
308
|
+
exp = bagOfHolding.experiments[id]
|
309
|
+
raise "Experiment has no independent variables!" if exp.indVars.empty?
|
310
|
+
raise "Experiment has no dependent variables!" if exp.depVars.empty?
|
311
|
+
dump = quotesome(exp.indVars) + " " + quotesome(exp.depVars) + "\n"
|
312
|
+
results = bagOfHolding.data.select{|k,v| k.experimentId==id}
|
313
|
+
results.each{|k,cellarray|
|
314
|
+
raise "Null treatment data" if cellarray.nil?
|
315
|
+
raise "Bad treatment data" if cellarray.length != exp.depVars.length
|
316
|
+
maxlen = cellarray.map{|cell| cell.nil? ? 1 : cell.length}.max
|
317
|
+
for iteration in 0..maxlen-1
|
318
|
+
dump << quotesome(k.treat)
|
319
|
+
cellarray.each{|cell|
|
320
|
+
dump << " " + quotemaybe(cell.nil? ? nil : cell[iteration]).to_s
|
321
|
+
}
|
322
|
+
dump << "\n"
|
323
|
+
end
|
324
|
+
}
|
325
|
+
dump
|
326
|
+
end
|
327
|
+
|
328
|
+
# Dump parameter estimates for an experiment as an R table (with header)
|
329
|
+
# with crudely constructed column names: depVar X parmDefId X [est, lo,
|
330
|
+
# hi, optionally prec]. Estimates are assumed to be scalars. If prec is
|
331
|
+
# true, add a prec column for each parameter containing the value of the
|
332
|
+
# estMethodParm "estimated attained precision of adaptive bootstrap".
|
333
|
+
def GECS.dumpParms(bagOfHolding,id,prec=false)
|
334
|
+
throw "Bag is nil" if bagOfHolding.nil?
|
335
|
+
throw "Experiments are nil" if bagOfHolding.experiments.nil?
|
336
|
+
throw "No such experiment" if id >= bagOfHolding.experiments.length
|
337
|
+
experiment = bagOfHolding.experiments[id]
|
338
|
+
ests = bagOfHolding.ests.select{|k,v| k.experimentId==id}
|
339
|
+
parms = ests[ests.keys[0]][0].keys.sort
|
340
|
+
precparm = bagOfHolding.getOrAddEstMethodParm("estimated attained precision of adaptive bootstrap") if prec
|
341
|
+
dump = "# Key to parameter ID numbers:\n"
|
342
|
+
parms.each{|x|
|
343
|
+
parmDef = bagOfHolding.parmDefs[x]
|
344
|
+
dump += "# " + x.to_s + " = " + bagOfHolding.parms[parmDef.parm] +
|
345
|
+
", " + bagOfHolding.estMethods[parmDef.estMethod]
|
346
|
+
parmDef.estMethodParms.each{|k,v|
|
347
|
+
dump += ", " + bagOfHolding.estMethodParms[k] + "=" + v.to_s
|
348
|
+
}
|
349
|
+
dump += "\n"
|
350
|
+
}
|
351
|
+
dump += quotesome(experiment.indVars)
|
352
|
+
experiment.depVars.each{|d|
|
353
|
+
parms.each{|p|
|
354
|
+
dump += " \"" + d + " " + p.to_s + " est\"" +
|
355
|
+
" \"" + d + " " + p.to_s + " lo\"" +
|
356
|
+
" \"" + d + " " + p.to_s + " hi\""
|
357
|
+
dump += " \"" + d + " " + p.to_s + " prec\"" if prec
|
358
|
+
}
|
359
|
+
}
|
360
|
+
dump += "\n"
|
361
|
+
ests.each{|k,cells|
|
362
|
+
dump += quotesome(k.treat)
|
363
|
+
cells.each{|cell|
|
364
|
+
parms.each{|p|
|
365
|
+
if cell.nil?
|
366
|
+
dump += " NA NA NA"
|
367
|
+
dump += " NA" if prec
|
368
|
+
else
|
369
|
+
parm = cell[p]
|
370
|
+
dump += " " + quotemaybe(parm.est).to_s + " " + quotemaybe(parm.lo).to_s + " " + quotemaybe(parm.hi).to_s
|
371
|
+
if prec
|
372
|
+
if parm.estMethodParms.nil?
|
373
|
+
dump += " NA"
|
374
|
+
else
|
375
|
+
dump += " " + quotemaybe(parm.estMethodParms[precparm]).to_s
|
376
|
+
end
|
377
|
+
end
|
378
|
+
end
|
379
|
+
}
|
380
|
+
}
|
381
|
+
dump += "\n"
|
382
|
+
}
|
383
|
+
dump
|
384
|
+
end
|
385
|
+
|
386
|
+
# (Private) Helper method to quote values that aren't numeric.
|
387
|
+
#
|
388
|
+
# oneval:: A single value to be quoted or not. Nil becomes NA.
|
389
|
+
def GECS.quotemaybe(oneval)
|
390
|
+
if oneval.nil?
|
391
|
+
"NA"
|
392
|
+
elsif oneval.is_a?(String)
|
393
|
+
# R 3.0.2 looks like it is re-escaping strings on input so that \\
|
394
|
+
# turns into \\\\, yet this is the minimum amount of escaping that gets
|
395
|
+
# everything through read.table without choking. allowEscapes=F only
|
396
|
+
# makes it choke even more. The worst case seems to be when a string
|
397
|
+
# ends with a backslash.
|
398
|
+
"\""+oneval.gsub('\\'){'\\\\'}.gsub("\"","\\\"")+"\""
|
399
|
+
else
|
400
|
+
oneval
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
# (Private) Helper method to quote values that aren't numeric. Were they
|
405
|
+
# always well-behaved values, k.treat.join(" ") would suffice.
|
406
|
+
#
|
407
|
+
# treat:: An array of values that might need to be quoted.
|
408
|
+
def GECS.quotesome(treat)
|
409
|
+
treat.map{|level| quotemaybe(level)}.join(" ")
|
410
|
+
end
|
411
|
+
|
412
|
+
# Print out parameter metadata for an experiment.
|
413
|
+
def GECS.describeParms(bagOfHolding,id)
|
414
|
+
experiment = bagOfHolding.experiments[id]
|
415
|
+
ests = bagOfHolding.ests.select{|k,v| k.experimentId==id}
|
416
|
+
ests.each{|k,v|
|
417
|
+
print "Treatment " + k.treat.to_s + "\n"
|
418
|
+
v.each_index{|di|
|
419
|
+
print " Depvar ", experiment.depVars[di], "\n"
|
420
|
+
if v[di].nil?
|
421
|
+
print " Not applicable\n"
|
422
|
+
else
|
423
|
+
v[di].each{|pk,parmData|
|
424
|
+
parmDef = bagOfHolding.parmDefs[pk]
|
425
|
+
print " Parameter: ", bagOfHolding.parms[parmDef.parm], "\n"
|
426
|
+
print " Estimation method: ", bagOfHolding.estMethods[parmDef.estMethod], "\n"
|
427
|
+
print " Global estimation method parameters:", "\n"
|
428
|
+
printEstMethodParms(bagOfHolding, parmDef.estMethodParms)
|
429
|
+
print " Local estimation method parameters:", "\n"
|
430
|
+
printEstMethodParms(bagOfHolding, parmData.estMethodParms)
|
431
|
+
}
|
432
|
+
end
|
433
|
+
}
|
434
|
+
}
|
435
|
+
end
|
436
|
+
|
437
|
+
# (Private) Helper method for ::describeParms.
|
438
|
+
def GECS.printEstMethodParms(bagOfHolding,estMethodParms)
|
439
|
+
if estMethodParms.nil?
|
440
|
+
puts " nil"
|
441
|
+
else
|
442
|
+
estMethodParms.each{|k,v|
|
443
|
+
print " ", bagOfHolding.estMethodParms[k], " = ", v, "\n"
|
444
|
+
}
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
# --------------------------------------------------------------------------
|
449
|
+
|
450
|
+
# Helper functions to deal with main effects and interactions.
|
451
|
+
|
452
|
+
# (Private) Equality test for treatments that implements nil as wildcard.
|
453
|
+
def GECS.treatEq(a,b)
|
454
|
+
throw "Nil treatment passed to treatEq" if a.nil? or b.nil?
|
455
|
+
throw "Length mismatch" if a.length != b.length
|
456
|
+
a.each_index{|i|
|
457
|
+
return false if !a[i].nil? and !b[i].nil? and a[i]!=b[i]
|
458
|
+
}
|
459
|
+
true
|
460
|
+
end
|
461
|
+
|
462
|
+
# (Private) Refactor the data of an experiment according to a specified
|
463
|
+
# effect and extract the data from the specified cell. If there are no
|
464
|
+
# nils in key, this is just a slow way of doing bagOfHolding.data[key][di].
|
465
|
+
#
|
466
|
+
# di:: depvar index
|
467
|
+
def GECS.refactorExtract(bagOfHolding,key,di)
|
468
|
+
id = key.experimentId
|
469
|
+
r = nil
|
470
|
+
bagOfHolding.data.each{|k,v|
|
471
|
+
if k.experimentId==id
|
472
|
+
if treatEq(key.treat,k.treat)
|
473
|
+
unless v.nil? or v[di].nil?
|
474
|
+
r ||= Array.new
|
475
|
+
r.concat(v[di])
|
476
|
+
end
|
477
|
+
end
|
478
|
+
end
|
479
|
+
}
|
480
|
+
r
|
481
|
+
end
|
482
|
+
|
483
|
+
# (Private) Return true if a given key matches any data at all.
|
484
|
+
def GECS.matchesSomething(bagOfHolding,key)
|
485
|
+
id = key.experimentId
|
486
|
+
bagOfHolding.data.each{|k,v|
|
487
|
+
if k.experimentId==id
|
488
|
+
if treatEq(key.treat,k.treat)
|
489
|
+
unless v.nil?
|
490
|
+
return true # Need to check every v[di] too?
|
491
|
+
end
|
492
|
+
end
|
493
|
+
end
|
494
|
+
}
|
495
|
+
false
|
496
|
+
end
|
497
|
+
|
498
|
+
# (Private) Make a list of the Keys for all treatments, main effects, and
|
499
|
+
# 2-way interactions for an experiment. An attempt is made to suppress
|
500
|
+
# interactions for which there are no data at all (combinations of levels
|
501
|
+
# that don't occur).
|
502
|
+
def GECS.enumerateKeys(bagOfHolding,id)
|
503
|
+
require 'set'
|
504
|
+
throw "Bag is nil" if bagOfHolding.nil?
|
505
|
+
throw "Experiments are nil" if bagOfHolding.experiments.nil?
|
506
|
+
throw "No such experiment" if id >= bagOfHolding.experiments.length
|
507
|
+
|
508
|
+
exp = bagOfHolding.experiments[id]
|
509
|
+
throw "Null experiment" if exp.nil?
|
510
|
+
throw "Null indvars" if exp.indVars.nil?
|
511
|
+
throw "Null depvars" if exp.depVars.nil?
|
512
|
+
numfacs = exp.indVars.length
|
513
|
+
numdeps = exp.depVars.length
|
514
|
+
throw "Not enough indvars" if numfacs < 1
|
515
|
+
throw "Not enough depvars" if numdeps < 1
|
516
|
+
|
517
|
+
if numfacs==1
|
518
|
+
# Short cut for single-factor experiments.
|
519
|
+
bagOfHolding.data.select{|k,v| k.experimentId==id}.keys
|
520
|
+
else
|
521
|
+
# Enumerate the levels of all of the factors while adding all of the
|
522
|
+
# treatments.
|
523
|
+
r = Array.new
|
524
|
+
levels = Array.new(numfacs){Set.new}
|
525
|
+
bagOfHolding.data.each_key{|k|
|
526
|
+
if k.experimentId==id
|
527
|
+
r.push(k)
|
528
|
+
for fac in 0..numfacs-1
|
529
|
+
levels[fac].add(k.treat[fac])
|
530
|
+
end
|
531
|
+
end
|
532
|
+
}
|
533
|
+
|
534
|
+
treat = Array.new(numfacs,nil)
|
535
|
+
# Main effects. Single-factor experiments were already excluded.
|
536
|
+
for fac in 0..numfacs-1
|
537
|
+
treat.fill(nil)
|
538
|
+
for lvl in levels[fac]
|
539
|
+
treat[fac] = lvl
|
540
|
+
r.push(Key.new(id,Array.new(treat)))
|
541
|
+
end
|
542
|
+
end
|
543
|
+
# 2-way interactions.
|
544
|
+
if numfacs > 2 # Don't duplicate the treatments when numfacs==2.
|
545
|
+
for fac1 in 0..numfacs-2
|
546
|
+
for fac2 in fac1+1..numfacs-1
|
547
|
+
treat.fill(nil)
|
548
|
+
for lvl1 in levels[fac1]
|
549
|
+
treat[fac1] = lvl1
|
550
|
+
for lvl2 in levels[fac2]
|
551
|
+
treat[fac2] = lvl2
|
552
|
+
key = Key.new(id,Array.new(treat))
|
553
|
+
r.push(key) if matchesSomething(bagOfHolding,key)
|
554
|
+
end
|
555
|
+
end
|
556
|
+
end
|
557
|
+
end
|
558
|
+
end
|
559
|
+
r
|
560
|
+
end
|
561
|
+
end
|
562
|
+
|
563
|
+
# --------------------------------------------------------------------------
|
564
|
+
|
565
|
+
# rubygems.org/gems/statistics2 is required for the inverse t distribution.
|
566
|
+
# Unfortunately, as of version 0.54, its agreement with R and Octave is
|
567
|
+
# only to 4 decimals or so:
|
568
|
+
|
569
|
+
# R version 3.0.2 (2013-09-25) -- "Frisbee Sailing"
|
570
|
+
# > qt(0.975,5)
|
571
|
+
# [1] 2.570582
|
572
|
+
|
573
|
+
# GNU Octave, version 3.6.3
|
574
|
+
# octave:1> printf ("%f\n", tinv(0.975,5))
|
575
|
+
# 2.570582
|
576
|
+
|
577
|
+
# irb(main):002:0> puts Gem.loaded_specs["statistics2"].version,
|
578
|
+
# irb(main):003:0* Statistics2::ptdist(5,0.975)
|
579
|
+
# 0.54
|
580
|
+
# 2.57051
|
581
|
+
|
582
|
+
# (Private) Calculate the quick interval for the mean of some data.
|
583
|
+
# Returns a ParmData.
|
584
|
+
def GECS.quickInterval(data)
|
585
|
+
require 'statistics2'
|
586
|
+
if data.nil?
|
587
|
+
nil
|
588
|
+
else
|
589
|
+
count = data.length
|
590
|
+
sum = data.reduce(:+)
|
591
|
+
mean = sum.to_f/count
|
592
|
+
variance = data.map{|x| (mean-x)**2}.inject(:+)/(count-1.0)
|
593
|
+
meanU = Math.sqrt(variance/count)*Statistics2::ptdist(count-1,0.975)
|
594
|
+
ParmData.new(mean,mean-meanU,mean+meanU,nil)
|
595
|
+
end
|
596
|
+
end
|
597
|
+
|
598
|
+
# Add the following parameter to the data and effects of a specified
|
599
|
+
# experiment: mean, original method, 95% confidence. This is a quick way
|
600
|
+
# to summarize results when more complicated options are not needed. All
|
601
|
+
# values are computed as Floats with no respect for the original data type
|
602
|
+
# or its precision.
|
603
|
+
#
|
604
|
+
# bagOfHolding:: A BagOfHolding.
|
605
|
+
# id:: Experiment id.
|
606
|
+
def GECS.quickMeans(bagOfHolding,id)
|
607
|
+
throw "Bag is nil" if bagOfHolding.nil?
|
608
|
+
throw "Experiments are nil" if bagOfHolding.experiments.nil?
|
609
|
+
throw "No such experiment" if id >= bagOfHolding.experiments.length
|
610
|
+
|
611
|
+
exp = bagOfHolding.experiments[id]
|
612
|
+
throw "Null experiment" if exp.nil?
|
613
|
+
throw "Null indvars" if exp.indVars.nil?
|
614
|
+
throw "Null depvars" if exp.depVars.nil?
|
615
|
+
numfacs = exp.indVars.length
|
616
|
+
numdeps = exp.depVars.length
|
617
|
+
throw "Not enough indvars" if numfacs < 1
|
618
|
+
throw "Not enough depvars" if numdeps < 1
|
619
|
+
|
620
|
+
parmDef = ParmDef.new(bagOfHolding.getOrAddParm("mean"),
|
621
|
+
bagOfHolding.getOrAddEstMethod("original"),
|
622
|
+
{bagOfHolding.getOrAddEstMethodParm("coverage probability")=>0.95})
|
623
|
+
meanId = bagOfHolding.getOrAddParmDef(parmDef)
|
624
|
+
bagOfHolding.ests ||= Hash.new
|
625
|
+
enumerateKeys(bagOfHolding,id).each{|key|
|
626
|
+
bagOfHolding.ests[key] ||= Array.new(numdeps,nil)
|
627
|
+
for di in 0..numdeps-1
|
628
|
+
cell = refactorExtract(bagOfHolding,key,di)
|
629
|
+
unless cell.nil?
|
630
|
+
bagOfHolding.ests[key][di] ||= Hash.new
|
631
|
+
bagOfHolding.ests[key][di][meanId] = quickInterval(cell)
|
632
|
+
end
|
633
|
+
end
|
634
|
+
}
|
635
|
+
end
|
636
|
+
|
637
|
+
# --------------------------------------------------------------------------
|
638
|
+
|
639
|
+
# (Private) Unit of parallelization.
|
640
|
+
ParPod = Struct.new(
|
641
|
+
:key, # A Key to data or ests, as applicable.
|
642
|
+
:di, # Depvar index.
|
643
|
+
:sernum, # Unique integer.
|
644
|
+
:out # Returned interval.
|
645
|
+
)
|
646
|
+
|
647
|
+
# (Private) Create ParmData from ParPod return.
|
648
|
+
def GECS.parmRet(bagOfHolding,delta,pod)
|
649
|
+
if pod.out.nil?
|
650
|
+
nil
|
651
|
+
else
|
652
|
+
ParmData.new(pod.out[2].to_f, pod.out[3].to_f, pod.out[4].to_f, {
|
653
|
+
bagOfHolding.getOrAddEstMethodParm("bootstrap replica count")=>pod.out[0].to_i,
|
654
|
+
bagOfHolding.getOrAddEstMethodParm("numerical tolerance of adaptive bootstrap")=>delta[pod.di],
|
655
|
+
bagOfHolding.getOrAddEstMethodParm("estimated attained precision of adaptive bootstrap")=>pod.out[1].to_f})
|
656
|
+
end
|
657
|
+
end
|
658
|
+
|
659
|
+
# Add the following parameter to the results and effects of a specified
|
660
|
+
# experiment: mean, bootstrap method, BCa interval, 95% confidence,
|
661
|
+
# adaptive determination of bootstrap replica count to achieve the
|
662
|
+
# requested numerical tolerances for each depvar. The estimate provided is
|
663
|
+
# the sample mean, not the bootstrap estimate. Count will be at least
|
664
|
+
# 50000.
|
665
|
+
#
|
666
|
+
# This function is parallelized. All available CPUs will be used to run
|
667
|
+
# bootstrap calculations in R.
|
668
|
+
#
|
669
|
+
# bagOfHolding:: A BagOfHolding.
|
670
|
+
# id:: Experiment id.
|
671
|
+
# delta:: Array of numbers specifying desired numerical tolerances for each
|
672
|
+
# depvar. To reduce variation below the resolution of a typical
|
673
|
+
# plot of height 1000 pixels, you'd want delta something like
|
674
|
+
# (max(y)-min(y))/2000 for whatever range of y is being plotted.
|
675
|
+
def GECS.bootstrapMeans(bagOfHolding,id,delta)
|
676
|
+
require 'parallel'
|
677
|
+
throw "Bag is nil" if bagOfHolding.nil?
|
678
|
+
throw "Experiments are nil" if bagOfHolding.experiments.nil?
|
679
|
+
throw "No such experiment" if id >= bagOfHolding.experiments.length
|
680
|
+
|
681
|
+
exp = bagOfHolding.experiments[id]
|
682
|
+
throw "Null experiment" if exp.nil?
|
683
|
+
throw "Null indvars" if exp.indVars.nil?
|
684
|
+
throw "Null depvars" if exp.depVars.nil?
|
685
|
+
numfacs = exp.indVars.length
|
686
|
+
numdeps = exp.depVars.length
|
687
|
+
throw "Not enough indvars" if numfacs < 1
|
688
|
+
throw "Not enough depvars" if numdeps < 1
|
689
|
+
throw "Wrong number of deltas" if delta.length != numdeps
|
690
|
+
|
691
|
+
parmDef = ParmDef.new(bagOfHolding.getOrAddParm("mean"),
|
692
|
+
bagOfHolding.getOrAddEstMethod("bootstrap, BCa interval"),
|
693
|
+
{bagOfHolding.getOrAddEstMethodParm("coverage probability")=>0.95})
|
694
|
+
meanId = bagOfHolding.getOrAddParmDef(parmDef)
|
695
|
+
|
696
|
+
# Make a list of the data and effects that need fixing with a unique
|
697
|
+
# serial number assigned to each.
|
698
|
+
sernum = 0
|
699
|
+
pods = Array.new
|
700
|
+
enumerateKeys(bagOfHolding,id).each{|key|
|
701
|
+
for di in 0..numdeps-1
|
702
|
+
pods.push(ParPod.new(key,di,(sernum+=1),nil))
|
703
|
+
end
|
704
|
+
}
|
705
|
+
|
706
|
+
# Run numCPUs instances of BCa.R in parallel.
|
707
|
+
pods = Parallel.map(pods) do |pod|
|
708
|
+
poddata = refactorExtract(bagOfHolding,pod.key,pod.di)
|
709
|
+
unless poddata.nil?
|
710
|
+
descript = "Treatment " + pod.key.treat.to_s + " depvar " + pod.di.to_s
|
711
|
+
fnam = "bootstrap-in-" + pod.sernum.to_s + ".txt"
|
712
|
+
fp = File.open(fnam,"w")
|
713
|
+
fp.puts(poddata)
|
714
|
+
fp.close
|
715
|
+
|
716
|
+
# This R script that has been mangled onto the command line to avoid
|
717
|
+
# adding another external dependency is mostly just a wrapper for
|
718
|
+
# BCa.R, but the bootstrap estimate of the mean is replaced by the
|
719
|
+
# sample mean.
|
720
|
+
cmd = "Rscript " +
|
721
|
+
"-e 'library(\"bootBCa\")' " +
|
722
|
+
"-e 'data <- unlist(read.table(\"" + fnam + "\",header=F,colClasses=\"numeric\"))' " +
|
723
|
+
"-e 'out <- BCa(data,as.numeric(" + delta[pod.di].to_s + "),mean)' " +
|
724
|
+
"-e 'cat(sprintf(\"%d %0.16f %0.16f %0.16f %0.16f\\n\",out[1],out[2],mean(data),out[4],out[5]))'"
|
725
|
+
|
726
|
+
pod.out = `#{cmd}`.split
|
727
|
+
File.delete(fnam)
|
728
|
+
if pod.out.length < 5
|
729
|
+
# This should never happen.
|
730
|
+
print "Bootstrap failure\n"
|
731
|
+
print " ", descript, "\n"
|
732
|
+
print " Depvar: ", bagOfHolding.experiments[id].depVars[pod.di], "\n"
|
733
|
+
throw "Bootstrap failure"
|
734
|
+
end
|
735
|
+
|
736
|
+
# Verbose progress reporting
|
737
|
+
print descript + ", " + pod.out[0] + " iterations done\n"
|
738
|
+
end
|
739
|
+
pod
|
740
|
+
end
|
741
|
+
|
742
|
+
# Copy back parameter estimates.
|
743
|
+
bagOfHolding.ests ||= Hash.new
|
744
|
+
pods.each{|pod|
|
745
|
+
pd = parmRet(bagOfHolding,delta,pod)
|
746
|
+
unless pd.nil?
|
747
|
+
bagOfHolding.ests[pod.key] ||= Array.new(numdeps,nil)
|
748
|
+
bagOfHolding.ests[pod.key][pod.di] ||= Hash.new
|
749
|
+
bagOfHolding.ests[pod.key][pod.di][meanId] = pd
|
750
|
+
end
|
751
|
+
}
|
752
|
+
end
|
753
|
+
|
754
|
+
end
|
data/test/test_GECS.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'GECS'
|
3
|
+
|
4
|
+
class GECSTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test1
|
7
|
+
assert_nothing_raised do
|
8
|
+
assert_nothing_thrown do
|
9
|
+
puts "This test requires you to install R and the bootBCa package."
|
10
|
+
puts "R: www.r-project.org"
|
11
|
+
puts "bootBCa: bootbca.r-forge.r-project.org\n\n"
|
12
|
+
xid = 0
|
13
|
+
experiment = GECS::Experiment.new(xid, ["input"], ["output"],
|
14
|
+
"A silly, simple, small-sample test is better than nothing")
|
15
|
+
results = Hash.new
|
16
|
+
key5 = GECS::Key.new(xid,[5])
|
17
|
+
key10 = GECS::Key.new(xid,[10])
|
18
|
+
# rnorm(n=30,mean=5,sd=5)
|
19
|
+
results[key5] = [[
|
20
|
+
8.1239862, 0.7075978, 18.3996805, 1.3803859, 8.1125496, 0.5903406,
|
21
|
+
0.8996330, 4.8486252, -2.8038801, 13.7444565, 6.5451796, 6.8989424,
|
22
|
+
7.2075758, 19.0412270, 2.6637848, -1.7874951, 9.7959101, -0.4751056,
|
23
|
+
9.1401820, 9.8164959, 12.0606408, 6.0108636, -0.5488287, 12.0728692,
|
24
|
+
6.7253890, 6.4999955, 1.9586040, 2.2619639, 2.2761585, 3.5316891]]
|
25
|
+
# rnorm(n=30,mean=10,sd=5)
|
26
|
+
results[key10] = [[
|
27
|
+
10.225934, 7.991545, 8.665374, 5.550484, 6.315061, 17.274522,
|
28
|
+
11.291662, 19.324338, 8.760387, 5.561073, 10.225028, 7.939054,
|
29
|
+
13.094538, 7.893948, 2.026159, 19.918737, 11.955975, 7.056023,
|
30
|
+
2.883466, -2.054165, 8.938372, 3.319929, 18.280414, 5.495548,
|
31
|
+
4.556700, 1.883278, 7.677032, 7.741751, 15.817214, 16.886633]]
|
32
|
+
bag = GECS.newBag([experiment],results)
|
33
|
+
# Fail here because they didn't install bootBCa, or R.
|
34
|
+
GECS.bootstrapMeans(bag, xid, [0.1])
|
35
|
+
puts "\n", bag, "\n", GECS.dumpParms(bag,xid,true)
|
36
|
+
# [key][depvar index][parmDef index]
|
37
|
+
# We have only one depvar and only one parmDef.
|
38
|
+
parm = bag.ests[key5][0][0]
|
39
|
+
assert (parm.lo < 5 && 5 < parm.hi)
|
40
|
+
parm = bag.ests[key10][0][0]
|
41
|
+
assert (parm.lo < 10 && 10 < parm.hi)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
metadata
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: GECS
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '1'
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- David Flater
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2014-07-31 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: parallel
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.9.2
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.9.2
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: statistics2
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0.54'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0.54'
|
46
|
+
description: This software is experimental. NIST assumes no responsibility whatsoever
|
47
|
+
for its use by other parties and makes no guarantees, expressed or implied, about
|
48
|
+
its quality, reliability, or any other characteristic.
|
49
|
+
email: dflater@nist.gov
|
50
|
+
executables: []
|
51
|
+
extensions: []
|
52
|
+
extra_rdoc_files: []
|
53
|
+
files:
|
54
|
+
- lib/GECS.rb
|
55
|
+
- test/test_GECS.rb
|
56
|
+
- Rakefile
|
57
|
+
homepage: http://www.nist.gov/itl/ssd/cs/software-performance.cfm
|
58
|
+
licenses:
|
59
|
+
- Unlicense
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
require_paths:
|
63
|
+
- lib
|
64
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ! '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
requirements:
|
77
|
+
- ! 'R: www.r-project.org'
|
78
|
+
- ! 'bootBCa: bootbca.r-forge.r-project.org'
|
79
|
+
rubyforge_project:
|
80
|
+
rubygems_version: 1.8.23
|
81
|
+
signing_key:
|
82
|
+
specification_version: 3
|
83
|
+
summary: Gem for Experimental Computer Science
|
84
|
+
test_files: []
|