ent 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +3 -0
- data/Gemfile +14 -0
- data/LICENSE.txt +3 -0
- data/README.rdoc +8 -0
- data/Rakefile +58 -0
- data/VERSION +1 -0
- data/ext/ent_native/chisq.c +136 -0
- data/ext/ent_native/ent_native.c +277 -0
- data/ext/ent_native/extconf.rb +8 -0
- data/ext/ent_native/iso8859.c +19 -0
- data/ext/ent_native/iso8859.h +17 -0
- data/ext/ent_native/randtest.c +167 -0
- data/ext/ent_native/randtest.h +35 -0
- data/lib/ent.rb +2 -0
- data/spec/ent_spec.rb +7 -0
- data/spec/spec_helper.rb +12 -0
- metadata +149 -0
data/.document
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "rspec", "~> 2.3.0"
|
10
|
+
gem "bundler", "~> 1.0.0"
|
11
|
+
gem "jeweler", "~> 1.5.2"
|
12
|
+
gem "rcov", ">= 0"
|
13
|
+
gem "rake-compiler", ">= 0"
|
14
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,3 @@
|
|
1
|
+
Copyright (c) 2011 Eric Monti
|
2
|
+
|
3
|
+
This software is in the public domain. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, without any conditions or restrictions. This software is provided “as is” without express or implied warranty.
|
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/extensiontask'
|
4
|
+
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
8
|
+
gem.name = "ent"
|
9
|
+
gem.homepage = "http://github.com/emonti/ent"
|
10
|
+
gem.license = "MIT"
|
11
|
+
gem.summary = %Q{Calculate the entropy of data}
|
12
|
+
gem.description = %Q{Calculate the entropy of data}
|
13
|
+
gem.email = "esmonti@gmail.com"
|
14
|
+
gem.authors = ["Eric Monti"]
|
15
|
+
|
16
|
+
gem.extensions = FileList['ext/**/extconf.rb']
|
17
|
+
gem.extra_rdoc_files += FileList['ext/**/*.c']
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
Jeweler::RubygemsDotOrgTasks.new
|
22
|
+
|
23
|
+
Rake::ExtensionTask.new("ent_native")
|
24
|
+
|
25
|
+
CLEAN.include("lib/*.bundle")
|
26
|
+
CLEAN.include("ext/ent_native/*.bundle")
|
27
|
+
CLEAN.include("ext/ent_native/*.o")
|
28
|
+
CLEAN.include("ext/ent_native/Makefile")
|
29
|
+
CLEAN.include("ext/ent_native/mkmf.log")
|
30
|
+
CLEAN.include("ext/ent_native/extconf.h")
|
31
|
+
CLEAN.include("lib/*.so")
|
32
|
+
CLEAN.include("tmp/")
|
33
|
+
CLEAN.include("doc/")
|
34
|
+
CLEAN.include("rdoc/")
|
35
|
+
CLEAN.include("coverage/")
|
36
|
+
|
37
|
+
require 'rspec/core'
|
38
|
+
require 'rspec/core/rake_task'
|
39
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
40
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
41
|
+
end
|
42
|
+
|
43
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
44
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
45
|
+
spec.rcov = true
|
46
|
+
end
|
47
|
+
|
48
|
+
task :default => :spec
|
49
|
+
|
50
|
+
require 'rake/rdoctask'
|
51
|
+
Rake::RDocTask.new do |rdoc|
|
52
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
53
|
+
|
54
|
+
rdoc.rdoc_dir = 'rdoc'
|
55
|
+
rdoc.title = "ent #{version}"
|
56
|
+
rdoc.rdoc_files.include('README*')
|
57
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
58
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.2
|
@@ -0,0 +1,136 @@
|
|
1
|
+
/*
|
2
|
+
|
3
|
+
Compute probability of measured Chi Square value.
|
4
|
+
|
5
|
+
This code was developed by Gary Perlman of the Wang
|
6
|
+
Institute (full citation below) and has been minimally
|
7
|
+
modified for use in this program.
|
8
|
+
|
9
|
+
*/
|
10
|
+
|
11
|
+
#include <math.h>
|
12
|
+
|
13
|
+
/*HEADER
|
14
|
+
Module: z.c
|
15
|
+
Purpose: compute approximations to normal z distribution probabilities
|
16
|
+
Programmer: Gary Perlman
|
17
|
+
Organization: Wang Institute, Tyngsboro, MA 01879
|
18
|
+
Copyright: none
|
19
|
+
Tabstops: 4
|
20
|
+
*/
|
21
|
+
|
22
|
+
#define Z_MAX 6.0 /* maximum meaningful z value */
|
23
|
+
|
24
|
+
/*FUNCTION poz: probability of normal z value */
|
25
|
+
/*ALGORITHM
|
26
|
+
Adapted from a polynomial approximation in:
|
27
|
+
Ibbetson D, Algorithm 209
|
28
|
+
Collected Algorithms of the CACM 1963 p. 616
|
29
|
+
Note:
|
30
|
+
This routine has six digit accuracy, so it is only useful for absolute
|
31
|
+
z values < 6. For z values >= to 6.0, poz() returns 0.0.
|
32
|
+
*/
|
33
|
+
static double /*VAR returns cumulative probability from -oo to z */
|
34
|
+
poz(const double z) /*VAR normal z value */
|
35
|
+
{
|
36
|
+
double y, x, w;
|
37
|
+
|
38
|
+
if (z == 0.0) {
|
39
|
+
x = 0.0;
|
40
|
+
} else {
|
41
|
+
y = 0.5 * fabs(z);
|
42
|
+
if (y >= (Z_MAX * 0.5)) {
|
43
|
+
x = 1.0;
|
44
|
+
} else if (y < 1.0) {
|
45
|
+
w = y * y;
|
46
|
+
x = ((((((((0.000124818987 * w
|
47
|
+
-0.001075204047) * w +0.005198775019) * w
|
48
|
+
-0.019198292004) * w +0.059054035642) * w
|
49
|
+
-0.151968751364) * w +0.319152932694) * w
|
50
|
+
-0.531923007300) * w +0.797884560593) * y * 2.0;
|
51
|
+
} else {
|
52
|
+
y -= 2.0;
|
53
|
+
x = (((((((((((((-0.000045255659 * y
|
54
|
+
+0.000152529290) * y -0.000019538132) * y
|
55
|
+
-0.000676904986) * y +0.001390604284) * y
|
56
|
+
-0.000794620820) * y -0.002034254874) * y
|
57
|
+
+0.006549791214) * y -0.010557625006) * y
|
58
|
+
+0.011630447319) * y -0.009279453341) * y
|
59
|
+
+0.005353579108) * y -0.002141268741) * y
|
60
|
+
+0.000535310849) * y +0.999936657524;
|
61
|
+
}
|
62
|
+
}
|
63
|
+
return (z > 0.0 ? ((x + 1.0) * 0.5) : ((1.0 - x) * 0.5));
|
64
|
+
}
|
65
|
+
|
66
|
+
/*
|
67
|
+
Module: chisq.c
|
68
|
+
Purpose: compute approximations to chisquare distribution probabilities
|
69
|
+
Contents: pochisq()
|
70
|
+
Uses: poz() in z.c (Algorithm 209)
|
71
|
+
Programmer: Gary Perlman
|
72
|
+
Organization: Wang Institute, Tyngsboro, MA 01879
|
73
|
+
Copyright: none
|
74
|
+
Tabstops: 4
|
75
|
+
*/
|
76
|
+
|
77
|
+
#define LOG_SQRT_PI 0.5723649429247000870717135 /* log (sqrt (pi)) */
|
78
|
+
#define I_SQRT_PI 0.5641895835477562869480795 /* 1 / sqrt (pi) */
|
79
|
+
#define BIGX 20.0 /* max value to represent exp (x) */
|
80
|
+
#define ex(x) (((x) < -BIGX) ? 0.0 : exp(x))
|
81
|
+
|
82
|
+
/*FUNCTION pochisq: probability of chi sqaure value */
|
83
|
+
/*ALGORITHM Compute probability of chi square value.
|
84
|
+
Adapted from:
|
85
|
+
Hill, I. D. and Pike, M. C. Algorithm 299
|
86
|
+
Collected Algorithms for the CACM 1967 p. 243
|
87
|
+
Updated for rounding errors based on remark in
|
88
|
+
ACM TOMS June 1985, page 185
|
89
|
+
*/
|
90
|
+
|
91
|
+
double pochisq(
|
92
|
+
const double ax, /* obtained chi-square value */
|
93
|
+
const int df /* degrees of freedom */
|
94
|
+
)
|
95
|
+
{
|
96
|
+
double x = ax;
|
97
|
+
double a, y, s;
|
98
|
+
double e, c, z;
|
99
|
+
int even; /* true if df is an even number */
|
100
|
+
|
101
|
+
if (x <= 0.0 || df < 1) {
|
102
|
+
return 1.0;
|
103
|
+
}
|
104
|
+
|
105
|
+
a = 0.5 * x;
|
106
|
+
even = (2 * (df / 2)) == df;
|
107
|
+
if (df > 1) {
|
108
|
+
y = ex(-a);
|
109
|
+
}
|
110
|
+
s = (even ? y : (2.0 * poz(-sqrt(x))));
|
111
|
+
if (df > 2) {
|
112
|
+
x = 0.5 * (df - 1.0);
|
113
|
+
z = (even ? 1.0 : 0.5);
|
114
|
+
if (a > BIGX) {
|
115
|
+
e = (even ? 0.0 : LOG_SQRT_PI);
|
116
|
+
c = log(a);
|
117
|
+
while (z <= x) {
|
118
|
+
e = log(z) + e;
|
119
|
+
s += ex(c * z - a - e);
|
120
|
+
z += 1.0;
|
121
|
+
}
|
122
|
+
return (s);
|
123
|
+
} else {
|
124
|
+
e = (even ? 1.0 : (I_SQRT_PI / sqrt(a)));
|
125
|
+
c = 0.0;
|
126
|
+
while (z <= x) {
|
127
|
+
e = e * (a / z);
|
128
|
+
c = c + e;
|
129
|
+
z += 1.0;
|
130
|
+
}
|
131
|
+
return (c * y + s);
|
132
|
+
}
|
133
|
+
} else {
|
134
|
+
return s;
|
135
|
+
}
|
136
|
+
}
|
@@ -0,0 +1,277 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
|
3
|
+
#include "randtest.h"
|
4
|
+
#include "iso8859.h"
|
5
|
+
|
6
|
+
|
7
|
+
#ifndef DBL2NUM
|
8
|
+
#define DBL2NUM(dbl) rb_float_new(dbl)
|
9
|
+
#endif
|
10
|
+
|
11
|
+
extern void rt_init();
|
12
|
+
|
13
|
+
extern rt_ctx *rt_new();
|
14
|
+
extern void rt_add(rt_ctx *ctx, void *buf, int bufl);
|
15
|
+
extern void rt_end(rt_ctx *ctx);
|
16
|
+
extern void rt_free(rt_ctx *ctx);
|
17
|
+
|
18
|
+
|
19
|
+
VALUE m_ent = Qnil;
|
20
|
+
VALUE c_random_test = Qnil;
|
21
|
+
|
22
|
+
VALUE rb_rt_assert_open(rt_ctx *ctx) {
|
23
|
+
if(ctx->ended)
|
24
|
+
rb_raise(rb_eIOError, "closed stream");
|
25
|
+
|
26
|
+
return Qnil;
|
27
|
+
}
|
28
|
+
|
29
|
+
void rb_rt_mark(rt_ctx *ctx) { }
|
30
|
+
|
31
|
+
VALUE rb_rt_initialize(int argc, VALUE *argv, VALUE klass) {
|
32
|
+
rt_ctx *ctx;
|
33
|
+
VALUE binmode = Qnil;
|
34
|
+
int _bmode = 0;
|
35
|
+
|
36
|
+
rb_scan_args(argc, argv, "01", &binmode);
|
37
|
+
if ((binmode != Qnil) && (binmode != Qfalse)) {
|
38
|
+
Check_Type(binmode, T_TRUE);
|
39
|
+
_bmode=1;
|
40
|
+
}
|
41
|
+
|
42
|
+
if(!(ctx = rt_new()))
|
43
|
+
rb_sys_fail(0);
|
44
|
+
|
45
|
+
ctx->binary = _bmode;
|
46
|
+
|
47
|
+
return Data_Wrap_Struct(klass, rb_rt_mark, rt_free, ctx);
|
48
|
+
}
|
49
|
+
|
50
|
+
|
51
|
+
VALUE rb_rt_get_binmode(VALUE self) {
|
52
|
+
rt_ctx *ctx;
|
53
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
54
|
+
if (ctx->binary)
|
55
|
+
return Qtrue;
|
56
|
+
else
|
57
|
+
return Qfalse;
|
58
|
+
}
|
59
|
+
|
60
|
+
|
61
|
+
VALUE rb_rt_final(VALUE self) {
|
62
|
+
rt_ctx *ctx;
|
63
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
64
|
+
|
65
|
+
if (! ctx->ended)
|
66
|
+
rt_end(ctx);
|
67
|
+
|
68
|
+
return self;
|
69
|
+
}
|
70
|
+
|
71
|
+
VALUE rb_rt_read_string(VALUE self, VALUE rb_buf) {
|
72
|
+
rt_ctx *ctx;
|
73
|
+
|
74
|
+
Check_Type(rb_buf, T_STRING);
|
75
|
+
|
76
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
77
|
+
if(ctx->ended)
|
78
|
+
rb_raise(rb_eIOError, "data cannot be added after finalizing");
|
79
|
+
|
80
|
+
rt_add(ctx, RSTRING_PTR(rb_buf), RSTRING_LEN(rb_buf));
|
81
|
+
return LONG2NUM(RSTRING_LEN(rb_buf));
|
82
|
+
}
|
83
|
+
|
84
|
+
|
85
|
+
VALUE rb_rt_read_file(VALUE self, VALUE rb_filename) {
|
86
|
+
rt_ctx *ctx;
|
87
|
+
|
88
|
+
Check_Type(rb_filename, T_STRING);
|
89
|
+
|
90
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
91
|
+
if(ctx->ended)
|
92
|
+
rb_raise(rb_eIOError, "data cannot be added after finalizing");
|
93
|
+
|
94
|
+
// todo ...
|
95
|
+
return Qnil;
|
96
|
+
}
|
97
|
+
|
98
|
+
|
99
|
+
VALUE rb_rt_entropy(VALUE self) {
|
100
|
+
rt_ctx *ctx;
|
101
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
102
|
+
|
103
|
+
if(ctx->ended)
|
104
|
+
return DBL2NUM(ctx->r_ent);
|
105
|
+
else
|
106
|
+
return Qnil;
|
107
|
+
}
|
108
|
+
|
109
|
+
|
110
|
+
VALUE rb_rt_mean(VALUE self) {
|
111
|
+
rt_ctx *ctx;
|
112
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
113
|
+
|
114
|
+
if(ctx->ended)
|
115
|
+
return DBL2NUM(ctx->r_mean);
|
116
|
+
else
|
117
|
+
return Qnil;
|
118
|
+
}
|
119
|
+
|
120
|
+
|
121
|
+
VALUE rb_rt_chisquare(VALUE self) {
|
122
|
+
rt_ctx *ctx;
|
123
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
124
|
+
|
125
|
+
if(ctx->ended)
|
126
|
+
return DBL2NUM(ctx->r_chisq);
|
127
|
+
else
|
128
|
+
return Qnil;
|
129
|
+
}
|
130
|
+
|
131
|
+
|
132
|
+
VALUE rb_rt_datasum(VALUE self) {
|
133
|
+
rt_ctx *ctx;
|
134
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
135
|
+
|
136
|
+
if(ctx->ended)
|
137
|
+
return DBL2NUM(ctx->r_datasum);
|
138
|
+
else
|
139
|
+
return Qnil;
|
140
|
+
}
|
141
|
+
|
142
|
+
|
143
|
+
VALUE rb_rt_montepi(VALUE self) {
|
144
|
+
rt_ctx *ctx;
|
145
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
146
|
+
|
147
|
+
if(ctx->ended)
|
148
|
+
return DBL2NUM(ctx->r_montepicalc);
|
149
|
+
else
|
150
|
+
return Qnil;
|
151
|
+
}
|
152
|
+
|
153
|
+
|
154
|
+
VALUE rb_rt_scc(VALUE self) {
|
155
|
+
rt_ctx *ctx;
|
156
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
157
|
+
|
158
|
+
if(ctx->ended)
|
159
|
+
return DBL2NUM(ctx->r_scc);
|
160
|
+
else
|
161
|
+
return Qnil;
|
162
|
+
}
|
163
|
+
|
164
|
+
VALUE rb_rt_entropy_force(VALUE self) {
|
165
|
+
rt_ctx *ctx;
|
166
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
167
|
+
|
168
|
+
if(! ctx->ended)
|
169
|
+
rt_end(ctx);
|
170
|
+
|
171
|
+
return DBL2NUM(ctx->r_ent);
|
172
|
+
}
|
173
|
+
|
174
|
+
|
175
|
+
VALUE rb_rt_mean_force(VALUE self) {
|
176
|
+
rt_ctx *ctx;
|
177
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
178
|
+
|
179
|
+
if(! ctx->ended)
|
180
|
+
rt_end(ctx);
|
181
|
+
|
182
|
+
return DBL2NUM(ctx->r_mean);
|
183
|
+
}
|
184
|
+
|
185
|
+
|
186
|
+
VALUE rb_rt_chisquare_force(VALUE self) {
|
187
|
+
rt_ctx *ctx;
|
188
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
189
|
+
|
190
|
+
if(! ctx->ended)
|
191
|
+
rt_end(ctx);
|
192
|
+
|
193
|
+
return DBL2NUM(ctx->r_chisq);
|
194
|
+
}
|
195
|
+
|
196
|
+
|
197
|
+
VALUE rb_rt_datasum_force(VALUE self) {
|
198
|
+
rt_ctx *ctx;
|
199
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
200
|
+
|
201
|
+
if(! ctx->ended)
|
202
|
+
rt_end(ctx);
|
203
|
+
|
204
|
+
return DBL2NUM(ctx->r_datasum);
|
205
|
+
}
|
206
|
+
|
207
|
+
|
208
|
+
VALUE rb_rt_montepi_force(VALUE self) {
|
209
|
+
rt_ctx *ctx;
|
210
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
211
|
+
|
212
|
+
if(! ctx->ended)
|
213
|
+
rt_end(ctx);
|
214
|
+
|
215
|
+
return DBL2NUM(ctx->r_montepicalc);
|
216
|
+
}
|
217
|
+
|
218
|
+
|
219
|
+
VALUE rb_rt_scc_force(VALUE self) {
|
220
|
+
rt_ctx *ctx;
|
221
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
222
|
+
|
223
|
+
if(! ctx->ended)
|
224
|
+
rt_end(ctx);
|
225
|
+
|
226
|
+
return DBL2NUM(ctx->r_scc);
|
227
|
+
}
|
228
|
+
|
229
|
+
VALUE rb_rt_result(VALUE self) {
|
230
|
+
rt_ctx *ctx;
|
231
|
+
VALUE ret;
|
232
|
+
|
233
|
+
Data_Get_Struct(self, rt_ctx, ctx);
|
234
|
+
|
235
|
+
if(! ctx->ended)
|
236
|
+
rt_end(ctx);
|
237
|
+
|
238
|
+
ret = rb_hash_new();
|
239
|
+
rb_hash_aset(ret, ID2SYM(rb_intern("entropy")), DBL2NUM(ctx->r_ent));
|
240
|
+
rb_hash_aset(ret, ID2SYM(rb_intern("mean")), DBL2NUM(ctx->r_mean));
|
241
|
+
rb_hash_aset(ret, ID2SYM(rb_intern("chisquare")), DBL2NUM(ctx->r_chisq));
|
242
|
+
rb_hash_aset(ret, ID2SYM(rb_intern("datasum")), DBL2NUM(ctx->r_datasum));
|
243
|
+
rb_hash_aset(ret, ID2SYM(rb_intern("montepi")), DBL2NUM(ctx->r_montepicalc));
|
244
|
+
rb_hash_aset(ret, ID2SYM(rb_intern("scc")), DBL2NUM(ctx->r_scc));
|
245
|
+
return ret;
|
246
|
+
}
|
247
|
+
|
248
|
+
void Init_ent_native() {
|
249
|
+
rt_init();
|
250
|
+
|
251
|
+
m_ent = rb_define_module("Ent");
|
252
|
+
|
253
|
+
c_random_test = rb_define_class_under(m_ent, "RandomTest", rb_cObject);
|
254
|
+
|
255
|
+
rb_define_singleton_method(c_random_test, "new", rb_rt_initialize, -1);
|
256
|
+
rb_define_method(c_random_test, "binary?", rb_rt_get_binmode, 0);
|
257
|
+
|
258
|
+
rb_define_method(c_random_test, "read", rb_rt_read_string, 1);
|
259
|
+
// rb_define_method(c_random_test, "read_string", rb_rt_read_string, 1);
|
260
|
+
// rb_define_method(c_random_test, "read_file", rb_rt_read_file, 1);
|
261
|
+
rb_define_method(c_random_test, "finalize", rb_rt_final, 0);
|
262
|
+
rb_define_method(c_random_test, "result", rb_rt_result, 0);
|
263
|
+
rb_define_method(c_random_test, "entropy", rb_rt_entropy, 0);
|
264
|
+
rb_define_method(c_random_test, "mean", rb_rt_mean, 0);
|
265
|
+
rb_define_method(c_random_test, "chisquare", rb_rt_chisquare, 0);
|
266
|
+
rb_define_method(c_random_test, "datasum", rb_rt_datasum, 0);
|
267
|
+
rb_define_method(c_random_test, "montepi", rb_rt_montepi, 0);
|
268
|
+
rb_define_method(c_random_test, "scc", rb_rt_scc, 0);
|
269
|
+
rb_define_method(c_random_test, "entropy!", rb_rt_entropy_force, 0);
|
270
|
+
rb_define_method(c_random_test, "mean!", rb_rt_mean_force, 0);
|
271
|
+
rb_define_method(c_random_test, "chisquare!", rb_rt_chisquare_force, 0);
|
272
|
+
rb_define_method(c_random_test, "datasum!", rb_rt_datasum_force, 0);
|
273
|
+
rb_define_method(c_random_test, "montepi!", rb_rt_montepi_force, 0);
|
274
|
+
rb_define_method(c_random_test, "scc!", rb_rt_scc_force, 0);
|
275
|
+
}
|
276
|
+
|
277
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
/* ISO 8859/1 Latin-1 alphabetic and upper and lower case bit vector tables. */
|
3
|
+
|
4
|
+
/* LINTLIBRARY */
|
5
|
+
|
6
|
+
unsigned char isoalpha[32] = {
|
7
|
+
0,0,0,0,0,0,0,0,127,255,255,224,127,255,255,224,0,0,0,0,0,0,0,0,255,255,
|
8
|
+
254,255,255,255,254,255
|
9
|
+
};
|
10
|
+
|
11
|
+
unsigned char isoupper[32] = {
|
12
|
+
0,0,0,0,0,0,0,0,127,255,255,224,0,0,0,0,0,0,0,0,0,0,0,0,255,255,254,254,
|
13
|
+
0,0,0,0
|
14
|
+
};
|
15
|
+
|
16
|
+
unsigned char isolower[32] = {
|
17
|
+
0,0,0,0,0,0,0,0,0,0,0,0,127,255,255,224,0,0,0,0,0,0,0,0,0,0,0,1,255,255,
|
18
|
+
254,255
|
19
|
+
};
|
@@ -0,0 +1,17 @@
|
|
1
|
+
|
2
|
+
/* ISO 8859/1 Latin-1 "ctype" macro replacements. */
|
3
|
+
|
4
|
+
extern unsigned char isoalpha[32], isoupper[32], isolower[32];
|
5
|
+
|
6
|
+
#define isISOspace(x) ((isascii(((unsigned char) (x))) && isspace(((unsigned char) (x)))) || ((x) == 0xA0))
|
7
|
+
#define isISOalpha(x) ((isoalpha[(((unsigned char) (x))) / 8] & (0x80 >> ((((unsigned char) (x))) % 8))) != 0)
|
8
|
+
#define isISOupper(x) ((isoupper[(((unsigned char) (x))) / 8] & (0x80 >> ((((unsigned char) (x))) % 8))) != 0)
|
9
|
+
#define isISOlower(x) ((isolower[(((unsigned char) (x))) / 8] & (0x80 >> ((((unsigned char) (x))) % 8))) != 0)
|
10
|
+
#define isISOprint(x) ((((x) >= ' ') && ((x) <= '~')) || ((x) >= 0xA0))
|
11
|
+
#define toISOupper(x) (isISOlower(x) ? (isascii(((unsigned char) (x))) ? \
|
12
|
+
toupper(x) : (((((unsigned char) (x)) != 0xDF) && \
|
13
|
+
(((unsigned char) (x)) != 0xFF)) ? \
|
14
|
+
(((unsigned char) (x)) - 0x20) : (x))) : (x))
|
15
|
+
#define toISOlower(x) (isISOupper(x) ? (isascii(((unsigned char) (x))) ? \
|
16
|
+
tolower(x) : (((unsigned char) (x)) + 0x20)) \
|
17
|
+
: (x))
|
@@ -0,0 +1,167 @@
|
|
1
|
+
/*
|
2
|
+
|
3
|
+
Apply various randomness tests to a stream of bytes
|
4
|
+
|
5
|
+
by John Walker -- September 1996
|
6
|
+
http://www.fourmilab.ch/
|
7
|
+
|
8
|
+
|
9
|
+
Minor modifications made for use as a library
|
10
|
+
in ruby bindings -- Eric Monti
|
11
|
+
*/
|
12
|
+
|
13
|
+
#include <math.h>
|
14
|
+
#include <stdlib.h>
|
15
|
+
#include <strings.h>
|
16
|
+
#include "randtest.h"
|
17
|
+
|
18
|
+
#define FALSE 0
|
19
|
+
#define TRUE 1
|
20
|
+
|
21
|
+
static double incirc; /* In-circle distance for Monte Carlo */
|
22
|
+
|
23
|
+
#define log2of10 3.32192809488736234787
|
24
|
+
static double rt_log2(double x)
|
25
|
+
{
|
26
|
+
return log2of10 * log10(x);
|
27
|
+
}
|
28
|
+
|
29
|
+
void rt_init() {
|
30
|
+
incirc = pow(pow(256.0, (double) (MONTEN / 2)) - 1, 2.0);
|
31
|
+
}
|
32
|
+
|
33
|
+
/* RT_INIT -- Initialise random test counters. */
|
34
|
+
rt_ctx * rt_new()
|
35
|
+
{
|
36
|
+
rt_ctx * ctx;
|
37
|
+
|
38
|
+
if (ctx=(rt_ctx *) malloc(sizeof(rt_ctx))) {
|
39
|
+
bzero(ctx, sizeof(rt_ctx));
|
40
|
+
|
41
|
+
/* Initialise for calculations */
|
42
|
+
ctx->sccfirst = TRUE; /* Mark first time for serial correlation */
|
43
|
+
}
|
44
|
+
|
45
|
+
return ctx;
|
46
|
+
}
|
47
|
+
|
48
|
+
/* RT_ADD -- Add one or more bytes to accumulation. */
|
49
|
+
|
50
|
+
void rt_add(rt_ctx * ctx, void *buf, size_t bufl)
|
51
|
+
{
|
52
|
+
unsigned char *bp = buf;
|
53
|
+
int oc, c, bean;
|
54
|
+
|
55
|
+
while (bean = 0, (bufl-- > 0)) {
|
56
|
+
oc = *bp++;
|
57
|
+
|
58
|
+
do {
|
59
|
+
if (ctx->binary) {
|
60
|
+
c = !!(oc & 0x80);
|
61
|
+
} else {
|
62
|
+
c = oc;
|
63
|
+
}
|
64
|
+
ctx->ccount[c]++; /* Update counter for this bin */
|
65
|
+
ctx->totalc++;
|
66
|
+
|
67
|
+
/* Update inside / outside circle counts for Monte Carlo
|
68
|
+
computation of PI */
|
69
|
+
|
70
|
+
if (bean == 0) {
|
71
|
+
ctx->monte[ctx->mp++] = oc; /* Save character for Monte Carlo */
|
72
|
+
if (ctx->mp >= MONTEN) { /* Calculate every MONTEN character */
|
73
|
+
int mj;
|
74
|
+
|
75
|
+
ctx->mp = 0;
|
76
|
+
ctx->mcount++;
|
77
|
+
ctx->montex = ctx->montey = 0;
|
78
|
+
for (mj = 0; mj < MONTEN / 2; mj++) {
|
79
|
+
ctx->montex = (ctx->montex * 256.0) + ctx->monte[mj];
|
80
|
+
ctx->montey = (ctx->montey * 256.0) + ctx->monte[(MONTEN / 2) + mj];
|
81
|
+
}
|
82
|
+
if ((ctx->montex * ctx->montex + ctx->montey * ctx->montey) <= incirc) {
|
83
|
+
ctx->inmont++;
|
84
|
+
}
|
85
|
+
}
|
86
|
+
}
|
87
|
+
|
88
|
+
/* Update calculation of serial correlation coefficient */
|
89
|
+
|
90
|
+
ctx->sccun = c;
|
91
|
+
if (ctx->sccfirst) {
|
92
|
+
ctx->sccfirst = FALSE;
|
93
|
+
ctx->scclast = 0;
|
94
|
+
ctx->sccu0 = ctx->sccun;
|
95
|
+
} else {
|
96
|
+
ctx->scct1 = ctx->scct1 + ctx->scclast * ctx->sccun;
|
97
|
+
}
|
98
|
+
ctx->scct2 = ctx->scct2 + ctx->sccun;
|
99
|
+
ctx->scct3 = ctx->scct3 + (ctx->sccun * ctx->sccun);
|
100
|
+
ctx->scclast = ctx->sccun;
|
101
|
+
oc <<= 1;
|
102
|
+
} while (ctx->binary && (++bean < 8));
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
/* RT_END -- Complete calculation and return results. */
|
107
|
+
|
108
|
+
void rt_end(rt_ctx * ctx)
|
109
|
+
{
|
110
|
+
int i;
|
111
|
+
double ent, chisq, datasum, montepi, scc;
|
112
|
+
ent = chisq = datasum = montepi = scc = 0.0;
|
113
|
+
|
114
|
+
/* Complete calculation of serial correlation coefficient */
|
115
|
+
|
116
|
+
ctx->scct1 = ctx->scct1 + ctx->scclast * ctx->sccu0;
|
117
|
+
ctx->scct2 = ctx->scct2 * ctx->scct2;
|
118
|
+
scc = ctx->totalc * ctx->scct3 - ctx->scct2;
|
119
|
+
if (scc == 0.0) {
|
120
|
+
scc = -100000;
|
121
|
+
} else {
|
122
|
+
scc = (ctx->totalc * ctx->scct1 - ctx->scct2) / scc;
|
123
|
+
}
|
124
|
+
|
125
|
+
/* Scan bins and calculate probability for each bin and
|
126
|
+
Chi-Square distribution. The probability will be reused
|
127
|
+
in the entropy calculation below. While we're at it,
|
128
|
+
we sum of all the data which will be used to compute the
|
129
|
+
mean. */
|
130
|
+
|
131
|
+
/* Expected count per bin */
|
132
|
+
ctx->cexp = ctx->totalc / (ctx->binary ? 2.0 : 256.0);
|
133
|
+
|
134
|
+
for (i = 0; i < (ctx->binary ? 2 : 256); i++) {
|
135
|
+
double a = ctx->ccount[i] - ctx->cexp;;
|
136
|
+
|
137
|
+
ctx->prob[i] = ((double) ctx->ccount[i]) / ctx->totalc;
|
138
|
+
chisq += (a * a) / ctx->cexp;
|
139
|
+
datasum += ((double) i) * ctx->ccount[i];
|
140
|
+
}
|
141
|
+
|
142
|
+
/* Calculate entropy */
|
143
|
+
|
144
|
+
for (i = 0; i < (ctx->binary ? 2 : 256); i++) {
|
145
|
+
if (ctx->prob[i] > 0.0) {
|
146
|
+
ent += ctx->prob[i] * rt_log2(1 / ctx->prob[i]);
|
147
|
+
}
|
148
|
+
}
|
149
|
+
|
150
|
+
/* Calculate Monte Carlo value for PI from percentage of hits
|
151
|
+
within the circle */
|
152
|
+
|
153
|
+
montepi = 4.0 * (((double) ctx->inmont) / ctx->mcount);
|
154
|
+
|
155
|
+
/* Return results through arguments */
|
156
|
+
|
157
|
+
ctx->r_ent = ent;
|
158
|
+
ctx->r_chisq = chisq;
|
159
|
+
ctx->r_mean = datasum / ctx->totalc;
|
160
|
+
ctx->r_montepicalc = montepi;
|
161
|
+
ctx->r_scc = scc;
|
162
|
+
ctx->ended = 1;
|
163
|
+
}
|
164
|
+
|
165
|
+
void rt_free(rt_ctx *ctx) {
|
166
|
+
free(ctx);
|
167
|
+
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#define MONTEN 6 /* Bytes used as Monte Carlo co-ordinates.
|
2
|
+
This should be no more bits than the mantissa
|
3
|
+
of your "double" floating point type. */
|
4
|
+
|
5
|
+
|
6
|
+
typedef struct {
|
7
|
+
int binary; /* use byte(0) or binary(1) mode ? */
|
8
|
+
int sccfirst; /* first time for serial correlation ? */
|
9
|
+
size_t totalc; /* Total bytes counted */
|
10
|
+
int mp; /* Monte Carlo accumulator pointer */
|
11
|
+
size_t inmont; /* Monte Carlo inside count */
|
12
|
+
size_t mcount; /* Monte Carlo tries */
|
13
|
+
double cexp;
|
14
|
+
double montex;
|
15
|
+
double montey;
|
16
|
+
double sccun;
|
17
|
+
double sccu0;
|
18
|
+
double scclast;
|
19
|
+
double scct1; /* serial correlation term 1 */
|
20
|
+
double scct2; /* serial correlation term 2 */
|
21
|
+
double scct3; /* serial correlation term 3 */
|
22
|
+
unsigned int monte[MONTEN]; /* Monte Carlo co-ordinates */
|
23
|
+
size_t ccount[256]; /* Bins to count occurrences of values */
|
24
|
+
double prob[256]; /* Probabilities per bin for entropy */
|
25
|
+
|
26
|
+
int ended;
|
27
|
+
double r_ent;
|
28
|
+
double r_mean;
|
29
|
+
double r_chisq;
|
30
|
+
double r_datasum;
|
31
|
+
double r_montepicalc;
|
32
|
+
double r_scc;
|
33
|
+
} rt_ctx;
|
34
|
+
|
35
|
+
|
data/lib/ent.rb
ADDED
data/spec/ent_spec.rb
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
require 'ent'
|
5
|
+
|
6
|
+
# Requires supporting files with custom matchers and macros, etc,
|
7
|
+
# in ./support/ and its subdirectories.
|
8
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
|
12
|
+
end
|
metadata
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ent
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
version: 0.0.2
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Eric Monti
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-03-10 00:00:00 -06:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rspec
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ~>
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 2
|
29
|
+
- 3
|
30
|
+
- 0
|
31
|
+
version: 2.3.0
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: bundler
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ~>
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 1
|
43
|
+
- 0
|
44
|
+
- 0
|
45
|
+
version: 1.0.0
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id002
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: jeweler
|
50
|
+
prerelease: false
|
51
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ~>
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
segments:
|
56
|
+
- 1
|
57
|
+
- 5
|
58
|
+
- 2
|
59
|
+
version: 1.5.2
|
60
|
+
type: :development
|
61
|
+
version_requirements: *id003
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rcov
|
64
|
+
prerelease: false
|
65
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
segments:
|
70
|
+
- 0
|
71
|
+
version: "0"
|
72
|
+
type: :development
|
73
|
+
version_requirements: *id004
|
74
|
+
- !ruby/object:Gem::Dependency
|
75
|
+
name: rake-compiler
|
76
|
+
prerelease: false
|
77
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
segments:
|
82
|
+
- 0
|
83
|
+
version: "0"
|
84
|
+
type: :development
|
85
|
+
version_requirements: *id005
|
86
|
+
description: Calculate the entropy of data
|
87
|
+
email: esmonti@gmail.com
|
88
|
+
executables: []
|
89
|
+
|
90
|
+
extensions:
|
91
|
+
- ext/ent_native/extconf.rb
|
92
|
+
extra_rdoc_files:
|
93
|
+
- LICENSE.txt
|
94
|
+
- README.rdoc
|
95
|
+
- ext/ent_native/chisq.c
|
96
|
+
- ext/ent_native/ent_native.c
|
97
|
+
- ext/ent_native/iso8859.c
|
98
|
+
- ext/ent_native/randtest.c
|
99
|
+
files:
|
100
|
+
- .document
|
101
|
+
- .rspec
|
102
|
+
- Gemfile
|
103
|
+
- LICENSE.txt
|
104
|
+
- README.rdoc
|
105
|
+
- Rakefile
|
106
|
+
- VERSION
|
107
|
+
- ext/ent_native/chisq.c
|
108
|
+
- ext/ent_native/ent_native.c
|
109
|
+
- ext/ent_native/extconf.rb
|
110
|
+
- ext/ent_native/iso8859.c
|
111
|
+
- ext/ent_native/iso8859.h
|
112
|
+
- ext/ent_native/randtest.c
|
113
|
+
- ext/ent_native/randtest.h
|
114
|
+
- lib/ent.rb
|
115
|
+
- spec/ent_spec.rb
|
116
|
+
- spec/spec_helper.rb
|
117
|
+
has_rdoc: true
|
118
|
+
homepage: http://github.com/emonti/ent
|
119
|
+
licenses:
|
120
|
+
- MIT
|
121
|
+
post_install_message:
|
122
|
+
rdoc_options: []
|
123
|
+
|
124
|
+
require_paths:
|
125
|
+
- lib
|
126
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ">="
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
segments:
|
131
|
+
- 0
|
132
|
+
version: "0"
|
133
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - ">="
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
segments:
|
138
|
+
- 0
|
139
|
+
version: "0"
|
140
|
+
requirements: []
|
141
|
+
|
142
|
+
rubyforge_project:
|
143
|
+
rubygems_version: 1.3.6
|
144
|
+
signing_key:
|
145
|
+
specification_version: 3
|
146
|
+
summary: Calculate the entropy of data
|
147
|
+
test_files:
|
148
|
+
- spec/ent_spec.rb
|
149
|
+
- spec/spec_helper.rb
|