szaru 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/Makefile +157 -0
- data/ext/converter.h +69 -0
- data/ext/extconf.rb +5 -0
- data/ext/rb_szaru.cc +268 -0
- data/ext/rb_szaru.o +0 -0
- data/ext/szaru.so +0 -0
- data/overview.rd +33 -0
- data/sample/sample.rb +68 -0
- data/szaru-doc.rb +189 -0
- data/test/quantile_spec.rb +72 -0
- data/test/top_spec.rb +110 -0
- data/test/unique_spec.rb +39 -0
- metadata +82 -0
data/ext/Makefile
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
|
2
|
+
SHELL = /bin/sh
|
3
|
+
|
4
|
+
#### Start of system configuration section. ####
|
5
|
+
|
6
|
+
srcdir = .
|
7
|
+
topdir = /usr/local/lib/ruby/1.8/i686-linux
|
8
|
+
hdrdir = $(topdir)
|
9
|
+
VPATH = $(srcdir):$(topdir):$(hdrdir)
|
10
|
+
exec_prefix = $(prefix)
|
11
|
+
prefix = $(DESTDIR)/usr/local
|
12
|
+
sharedstatedir = $(prefix)/com
|
13
|
+
mandir = $(datarootdir)/man
|
14
|
+
psdir = $(docdir)
|
15
|
+
oldincludedir = $(DESTDIR)/usr/include
|
16
|
+
localedir = $(datarootdir)/locale
|
17
|
+
bindir = $(exec_prefix)/bin
|
18
|
+
libexecdir = $(exec_prefix)/libexec
|
19
|
+
sitedir = $(libdir)/ruby/site_ruby
|
20
|
+
htmldir = $(docdir)
|
21
|
+
vendorarchdir = $(vendorlibdir)/$(sitearch)
|
22
|
+
includedir = $(prefix)/include
|
23
|
+
infodir = $(datarootdir)/info
|
24
|
+
vendorlibdir = $(vendordir)/$(ruby_version)
|
25
|
+
sysconfdir = $(prefix)/etc
|
26
|
+
libdir = $(exec_prefix)/lib
|
27
|
+
sbindir = $(exec_prefix)/sbin
|
28
|
+
rubylibdir = $(libdir)/ruby/$(ruby_version)
|
29
|
+
docdir = $(datarootdir)/doc/$(PACKAGE)
|
30
|
+
dvidir = $(docdir)
|
31
|
+
vendordir = $(libdir)/ruby/vendor_ruby
|
32
|
+
datarootdir = $(prefix)/share
|
33
|
+
pdfdir = $(docdir)
|
34
|
+
archdir = $(rubylibdir)/$(arch)
|
35
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
36
|
+
datadir = $(datarootdir)
|
37
|
+
localstatedir = $(prefix)/var
|
38
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
39
|
+
|
40
|
+
CC = gcc
|
41
|
+
LIBRUBY = $(LIBRUBY_A)
|
42
|
+
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
43
|
+
LIBRUBYARG_SHARED = -Wl,-R -Wl,$(libdir) -L$(libdir)
|
44
|
+
LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
|
45
|
+
|
46
|
+
RUBY_EXTCONF_H =
|
47
|
+
CFLAGS = -fPIC -g -O2 $(cflags)
|
48
|
+
INCFLAGS = -I. -I. -I/usr/local/lib/ruby/1.8/i686-linux -I.
|
49
|
+
DEFS = -D_FILE_OFFSET_BITS=64
|
50
|
+
CPPFLAGS = -D_FILE_OFFSET_BITS=64
|
51
|
+
CXXFLAGS = $(CFLAGS)
|
52
|
+
ldflags = -L. -rdynamic -Wl,-export-dynamic
|
53
|
+
dldflags =
|
54
|
+
archflag =
|
55
|
+
DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
|
56
|
+
LDSHARED = $(CC) -shared
|
57
|
+
AR = ar
|
58
|
+
EXEEXT =
|
59
|
+
|
60
|
+
RUBY_INSTALL_NAME = ruby
|
61
|
+
RUBY_SO_NAME = ruby
|
62
|
+
arch = i686-linux
|
63
|
+
sitearch = i686-linux
|
64
|
+
ruby_version = 1.8
|
65
|
+
ruby = /usr/local/bin/ruby
|
66
|
+
RUBY = $(ruby)
|
67
|
+
RM = rm -f
|
68
|
+
MAKEDIRS = mkdir -p
|
69
|
+
INSTALL = /usr/bin/install -c
|
70
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
71
|
+
INSTALL_DATA = $(INSTALL) -m 644
|
72
|
+
COPY = cp
|
73
|
+
|
74
|
+
#### End of system configuration section. ####
|
75
|
+
|
76
|
+
preload =
|
77
|
+
|
78
|
+
libpath = . $(libdir)
|
79
|
+
LIBPATH = -L. -L$(libdir) -Wl,-R$(libdir)
|
80
|
+
DEFFILE =
|
81
|
+
|
82
|
+
CLEANFILES = mkmf.log
|
83
|
+
DISTCLEANFILES =
|
84
|
+
|
85
|
+
extout =
|
86
|
+
extout_prefix =
|
87
|
+
target_prefix =
|
88
|
+
LOCAL_LIBS =
|
89
|
+
LIBS = -lszaru -lrt -ldl -lcrypt -lm -lc
|
90
|
+
SRCS = rb_szaru.cc
|
91
|
+
OBJS = rb_szaru.o
|
92
|
+
TARGET = szaru
|
93
|
+
DLLIB = $(TARGET).so
|
94
|
+
EXTSTATIC =
|
95
|
+
STATIC_LIB =
|
96
|
+
|
97
|
+
BINDIR = $(bindir)
|
98
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
99
|
+
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
100
|
+
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
101
|
+
|
102
|
+
TARGET_SO = $(DLLIB)
|
103
|
+
CLEANLIBS = $(TARGET).so $(TARGET).il? $(TARGET).tds $(TARGET).map
|
104
|
+
CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
|
105
|
+
|
106
|
+
all: $(DLLIB)
|
107
|
+
static: $(STATIC_LIB)
|
108
|
+
|
109
|
+
clean:
|
110
|
+
@-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
|
111
|
+
|
112
|
+
distclean: clean
|
113
|
+
@-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
114
|
+
@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
115
|
+
|
116
|
+
realclean: distclean
|
117
|
+
install: install-so install-rb
|
118
|
+
|
119
|
+
install-so: $(RUBYARCHDIR)
|
120
|
+
install-so: $(RUBYARCHDIR)/$(DLLIB)
|
121
|
+
$(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
|
122
|
+
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
123
|
+
install-rb: pre-install-rb install-rb-default
|
124
|
+
install-rb-default: pre-install-rb-default
|
125
|
+
pre-install-rb: Makefile
|
126
|
+
pre-install-rb-default: Makefile
|
127
|
+
$(RUBYARCHDIR):
|
128
|
+
$(MAKEDIRS) $@
|
129
|
+
|
130
|
+
site-install: site-install-so site-install-rb
|
131
|
+
site-install-so: install-so
|
132
|
+
site-install-rb: install-rb
|
133
|
+
|
134
|
+
.SUFFIXES: .c .m .cc .cxx .cpp .C .o
|
135
|
+
|
136
|
+
.cc.o:
|
137
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
138
|
+
|
139
|
+
.cxx.o:
|
140
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
141
|
+
|
142
|
+
.cpp.o:
|
143
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
144
|
+
|
145
|
+
.C.o:
|
146
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
147
|
+
|
148
|
+
.c.o:
|
149
|
+
$(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
|
150
|
+
|
151
|
+
$(DLLIB): $(OBJS) Makefile
|
152
|
+
@-$(RM) $@
|
153
|
+
$(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
$(OBJS): ruby.h defines.h
|
data/ext/converter.h
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
// Copyright 2010 Yuji Kaneda
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
// ------------------------------------------------------------------------
|
15
|
+
|
16
|
+
// specialization of Conveter only for int32_t, int64_t, double
|
17
|
+
template< typename Value >
|
18
|
+
class Converter {
|
19
|
+
public:
|
20
|
+
static inline VALUE ToRuby(Value value);
|
21
|
+
static inline Value FromRuby(VALUE value);
|
22
|
+
static inline void CheckType(VALUE value);
|
23
|
+
};
|
24
|
+
|
25
|
+
template<> inline
|
26
|
+
VALUE Converter<int32_t>::ToRuby(int32_t value){
|
27
|
+
return INT2FIX(value);
|
28
|
+
}
|
29
|
+
|
30
|
+
template<> inline
|
31
|
+
VALUE Converter<int64_t>::ToRuby(int64_t value){
|
32
|
+
return LONG2FIX(value);
|
33
|
+
}
|
34
|
+
|
35
|
+
template<> inline
|
36
|
+
VALUE Converter<double>::ToRuby(double value){
|
37
|
+
return rb_float_new(value);
|
38
|
+
}
|
39
|
+
|
40
|
+
template<> inline
|
41
|
+
int32_t Converter<int32_t>::FromRuby(VALUE value){
|
42
|
+
return FIX2INT(value);
|
43
|
+
}
|
44
|
+
|
45
|
+
template<> inline
|
46
|
+
int64_t Converter<int64_t>::FromRuby(VALUE value){
|
47
|
+
return FIX2LONG(value);
|
48
|
+
}
|
49
|
+
|
50
|
+
template<> inline
|
51
|
+
double Converter<double>::FromRuby(VALUE value){
|
52
|
+
return RFLOAT(value)->value;
|
53
|
+
}
|
54
|
+
|
55
|
+
template<> inline
|
56
|
+
void Converter<int32_t>::CheckType(VALUE value){
|
57
|
+
Check_Type(value, T_FIXNUM);
|
58
|
+
}
|
59
|
+
|
60
|
+
template<> inline
|
61
|
+
void Converter<int64_t>::CheckType(VALUE value){
|
62
|
+
Check_Type(value, T_FIXNUM);
|
63
|
+
}
|
64
|
+
|
65
|
+
template<> inline
|
66
|
+
void Converter<double>::CheckType(VALUE value){
|
67
|
+
Check_Type(value, T_FLOAT);
|
68
|
+
}
|
69
|
+
|
data/ext/extconf.rb
ADDED
data/ext/rb_szaru.cc
ADDED
@@ -0,0 +1,268 @@
|
|
1
|
+
// Copyright 2010 Yuji Kaneda
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
// ------------------------------------------------------------------------
|
15
|
+
|
16
|
+
// Ruby Binding of SZaru
|
17
|
+
|
18
|
+
#include <ruby.h>
|
19
|
+
#include <szaru.h>
|
20
|
+
// local include file
|
21
|
+
#include "converter.h"
|
22
|
+
|
23
|
+
extern "C" {
|
24
|
+
void Init_szaru(void);
|
25
|
+
}
|
26
|
+
|
27
|
+
namespace {
|
28
|
+
|
29
|
+
template<typename Value>
|
30
|
+
class RubyQuantileEstimator {
|
31
|
+
private:
|
32
|
+
static void
|
33
|
+
Free(SZaru::QuantileEstimator<Value> **ptr)
|
34
|
+
{
|
35
|
+
if (*ptr) {
|
36
|
+
delete *ptr;
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
static VALUE
|
41
|
+
Alloc(VALUE klass)
|
42
|
+
{
|
43
|
+
SZaru::QuantileEstimator<Value> **ptr = ALLOC(SZaru::QuantileEstimator<Value>*);
|
44
|
+
*ptr = NULL;
|
45
|
+
return Data_Wrap_Struct(klass, 0, Free, ptr);
|
46
|
+
}
|
47
|
+
|
48
|
+
static VALUE
|
49
|
+
Initialize(VALUE self, VALUE maxElems)
|
50
|
+
{
|
51
|
+
SZaru::QuantileEstimator<Value> **ptr;
|
52
|
+
Data_Get_Struct(self, SZaru::QuantileEstimator<Value>*, ptr);
|
53
|
+
*ptr = SZaru::QuantileEstimator<Value>::Create(NUM2LONG(maxElems));
|
54
|
+
return Qnil;
|
55
|
+
}
|
56
|
+
|
57
|
+
static VALUE
|
58
|
+
AddElem(VALUE self, VALUE elem)
|
59
|
+
{
|
60
|
+
SZaru::QuantileEstimator<Value> **qe;
|
61
|
+
Converter<Value>::CheckType(elem);
|
62
|
+
Data_Get_Struct(self, SZaru::QuantileEstimator<Value>*, qe);
|
63
|
+
(*qe)->AddElem(Converter<Value>::FromRuby(elem));
|
64
|
+
return Qnil;
|
65
|
+
}
|
66
|
+
|
67
|
+
static VALUE
|
68
|
+
Estimate(VALUE self)
|
69
|
+
{
|
70
|
+
SZaru::QuantileEstimator<Value> **qe;
|
71
|
+
Data_Get_Struct(self, SZaru::QuantileEstimator<Value>*, qe);
|
72
|
+
std::vector<Value> quantiles;
|
73
|
+
(*qe)->Estimate(quantiles);
|
74
|
+
VALUE ary = rb_ary_new2(quantiles.size());
|
75
|
+
for (int i = 0; i < quantiles.size(); i++) {
|
76
|
+
rb_ary_push(ary, Converter<Value>::ToRuby(quantiles[i]));
|
77
|
+
}
|
78
|
+
return ary;
|
79
|
+
}
|
80
|
+
|
81
|
+
public:
|
82
|
+
|
83
|
+
static VALUE
|
84
|
+
Define(VALUE superModule, const char *name)
|
85
|
+
{
|
86
|
+
VALUE cQuantileEstimator = rb_define_class_under(superModule, name, rb_cObject);
|
87
|
+
rb_define_alloc_func(cQuantileEstimator, Alloc);
|
88
|
+
rb_define_private_method(cQuantileEstimator, "initialize",
|
89
|
+
RUBY_METHOD_FUNC(Initialize), 1);
|
90
|
+
rb_define_method(cQuantileEstimator, "add_elem",
|
91
|
+
RUBY_METHOD_FUNC(AddElem), 1);
|
92
|
+
rb_define_method(cQuantileEstimator, "estimate",
|
93
|
+
RUBY_METHOD_FUNC(Estimate), 0);
|
94
|
+
return cQuantileEstimator;
|
95
|
+
}
|
96
|
+
};
|
97
|
+
|
98
|
+
|
99
|
+
template< typename Value >
|
100
|
+
class RubyTopEstimator {
|
101
|
+
private:
|
102
|
+
|
103
|
+
static void
|
104
|
+
Free(SZaru::TopEstimator<Value> **ptr)
|
105
|
+
{
|
106
|
+
if (*ptr) {
|
107
|
+
delete *ptr;
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
111
|
+
static VALUE
|
112
|
+
Alloc(VALUE klass)
|
113
|
+
{
|
114
|
+
SZaru::TopEstimator<Value> **ptr = ALLOC(SZaru::TopEstimator<Value>*);
|
115
|
+
*ptr = NULL;
|
116
|
+
return Data_Wrap_Struct(klass, 0, Free, ptr);
|
117
|
+
}
|
118
|
+
|
119
|
+
static VALUE
|
120
|
+
Initialize(VALUE self, VALUE maxElems)
|
121
|
+
{
|
122
|
+
SZaru::TopEstimator<Value> **ptr;
|
123
|
+
Check_Type(maxElems, T_FIXNUM);
|
124
|
+
Data_Get_Struct(self, SZaru::TopEstimator<Value>*, ptr);
|
125
|
+
*ptr = SZaru::TopEstimator<Value>::Create(NUM2LONG(maxElems));
|
126
|
+
return Qnil;
|
127
|
+
}
|
128
|
+
|
129
|
+
static VALUE
|
130
|
+
AddElem(VALUE self, VALUE elem)
|
131
|
+
{
|
132
|
+
SZaru::TopEstimator<Value> **te;
|
133
|
+
Check_Type(elem, T_STRING);
|
134
|
+
Data_Get_Struct(self, SZaru::TopEstimator<Value>*, te);
|
135
|
+
(*te)->AddElem(std::string(RSTRING_PTR(elem), RSTRING_LEN(elem)));
|
136
|
+
return Qnil;
|
137
|
+
}
|
138
|
+
|
139
|
+
static VALUE
|
140
|
+
AddWeightedElem(VALUE self, VALUE elem, VALUE weight)
|
141
|
+
{
|
142
|
+
SZaru::TopEstimator<Value> **te;
|
143
|
+
Check_Type(elem, T_STRING);
|
144
|
+
Converter<Value>::CheckType(weight);
|
145
|
+
Data_Get_Struct(self, SZaru::TopEstimator<Value>*, te);
|
146
|
+
(*te)->AddWeightedElem(std::string(RSTRING_PTR(elem), RSTRING_LEN(elem)),
|
147
|
+
Converter<Value>::FromRuby(weight));
|
148
|
+
return Qnil;
|
149
|
+
}
|
150
|
+
|
151
|
+
static VALUE
|
152
|
+
Estimate(VALUE self)
|
153
|
+
{
|
154
|
+
SZaru::TopEstimator<Value> **te;
|
155
|
+
Data_Get_Struct(self, SZaru::TopEstimator<Value>*, te);
|
156
|
+
std::vector<typename SZaru::TopEstimator<Value>::Elem> topElems;
|
157
|
+
(*te)->Estimate(topElems);
|
158
|
+
VALUE ary = rb_ary_new2(topElems.size());
|
159
|
+
for (int i = 0; i < topElems.size(); i++) {
|
160
|
+
rb_ary_push(ary, rb_ary_new3(2,
|
161
|
+
rb_str_new(topElems[i].value.c_str(), topElems[i].value.size()),
|
162
|
+
Converter<Value>::ToRuby(topElems[i].weight)));
|
163
|
+
}
|
164
|
+
return ary;
|
165
|
+
}
|
166
|
+
|
167
|
+
public:
|
168
|
+
static VALUE
|
169
|
+
Define(VALUE superModule, const char *name)
|
170
|
+
{
|
171
|
+
VALUE cTopEstimator = rb_define_class_under(superModule, name, rb_cObject);
|
172
|
+
rb_define_alloc_func(cTopEstimator, Alloc);
|
173
|
+
rb_define_private_method(cTopEstimator, "initialize",
|
174
|
+
RUBY_METHOD_FUNC(Initialize), 1);
|
175
|
+
rb_define_method(cTopEstimator, "add_elem",
|
176
|
+
RUBY_METHOD_FUNC(AddElem), 1);
|
177
|
+
rb_define_method(cTopEstimator, "add_weighted_elem",
|
178
|
+
RUBY_METHOD_FUNC(AddWeightedElem), 2);
|
179
|
+
rb_define_method(cTopEstimator, "estimate",
|
180
|
+
RUBY_METHOD_FUNC(Estimate), 0);
|
181
|
+
return cTopEstimator;
|
182
|
+
}
|
183
|
+
|
184
|
+
};
|
185
|
+
|
186
|
+
|
187
|
+
class RubyUniqueEstimator {
|
188
|
+
private:
|
189
|
+
static void
|
190
|
+
Free(SZaru::UniqueEstimator **ptr)
|
191
|
+
{
|
192
|
+
if (*ptr) {
|
193
|
+
delete *ptr;
|
194
|
+
}
|
195
|
+
}
|
196
|
+
|
197
|
+
static VALUE
|
198
|
+
Alloc(VALUE klass)
|
199
|
+
{
|
200
|
+
SZaru::UniqueEstimator **ptr = ALLOC(SZaru::UniqueEstimator*);
|
201
|
+
*ptr = NULL;
|
202
|
+
return Data_Wrap_Struct(klass, 0, Free, ptr);
|
203
|
+
}
|
204
|
+
|
205
|
+
static VALUE
|
206
|
+
Initialize(VALUE self, VALUE maxElems)
|
207
|
+
{
|
208
|
+
SZaru::UniqueEstimator **ptr;
|
209
|
+
Data_Get_Struct(self, SZaru::UniqueEstimator*, ptr);
|
210
|
+
*ptr = SZaru::UniqueEstimator::Create(NUM2LONG(maxElems));
|
211
|
+
return Qnil;
|
212
|
+
}
|
213
|
+
|
214
|
+
static VALUE
|
215
|
+
AddElem(VALUE self, VALUE elem)
|
216
|
+
{
|
217
|
+
SZaru::UniqueEstimator **ue;
|
218
|
+
Check_Type(elem, T_STRING);
|
219
|
+
Data_Get_Struct(self, SZaru::UniqueEstimator*, ue);
|
220
|
+
(*ue)->AddElemInCIF(RSTRING_PTR(elem), RSTRING_LEN(elem));
|
221
|
+
return Qnil;
|
222
|
+
}
|
223
|
+
|
224
|
+
static VALUE
|
225
|
+
Estimate(VALUE self)
|
226
|
+
{
|
227
|
+
SZaru::UniqueEstimator **ue;
|
228
|
+
Data_Get_Struct(self, SZaru::UniqueEstimator*, ue);
|
229
|
+
uint64_t unique = (*ue)->Estimate();
|
230
|
+
return LONG2NUM(unique);
|
231
|
+
}
|
232
|
+
|
233
|
+
public:
|
234
|
+
static VALUE
|
235
|
+
Define(VALUE superModule, const char *name) {
|
236
|
+
VALUE cUniqueEstimator = rb_define_class_under(superModule, name, rb_cObject);
|
237
|
+
rb_define_alloc_func(cUniqueEstimator, Alloc);
|
238
|
+
rb_define_private_method(cUniqueEstimator, "initialize",
|
239
|
+
RUBY_METHOD_FUNC(Initialize), 1);
|
240
|
+
rb_define_method(cUniqueEstimator, "add_elem",
|
241
|
+
RUBY_METHOD_FUNC(AddElem), 1);
|
242
|
+
rb_define_method(cUniqueEstimator, "estimate",
|
243
|
+
RUBY_METHOD_FUNC(Estimate), 0);
|
244
|
+
return cUniqueEstimator;
|
245
|
+
}
|
246
|
+
};
|
247
|
+
|
248
|
+
}
|
249
|
+
|
250
|
+
|
251
|
+
void
|
252
|
+
Init_szaru(void){
|
253
|
+
VALUE mSZaru = rb_define_module("SZaru");
|
254
|
+
RubyUniqueEstimator::Define(mSZaru, "UniqueEstimator");
|
255
|
+
|
256
|
+
// TopEstimator
|
257
|
+
VALUE mTopEstimator = rb_define_module_under(mSZaru, "TopEstimator");
|
258
|
+
RubyTopEstimator<double>::Define(mTopEstimator, "Double");
|
259
|
+
RubyTopEstimator<int32_t>::Define(mTopEstimator, "Int32");
|
260
|
+
RubyTopEstimator<int64_t>::Define(mTopEstimator, "Int64");
|
261
|
+
|
262
|
+
// QuantileEstimator
|
263
|
+
VALUE mQuantileEstimator = rb_define_module_under(mSZaru, "QuantileEstimator");
|
264
|
+
RubyQuantileEstimator<double>::Define(mQuantileEstimator, "Double");
|
265
|
+
RubyQuantileEstimator<int32_t>::Define(mQuantileEstimator, "Int32");
|
266
|
+
RubyQuantileEstimator<int64_t>::Define(mQuantileEstimator, "Int64");
|
267
|
+
|
268
|
+
}
|
data/ext/rb_szaru.o
ADDED
Binary file
|
data/ext/szaru.so
ADDED
Binary file
|
data/overview.rd
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
= Ruby Binding of SZaru
|
2
|
+
|
3
|
+
== Introduction
|
4
|
+
{SZaru}[http://llamerada.github.com/SZaru/] is a library to use {Sawzall}[http://code.google.com/p/szl/] aggregators in pure C++, Ruby and Python.
|
5
|
+
Currently, I have implemented the following 3 aggregators:
|
6
|
+
[Top] Statistical samplings that record the 'top N' data items based on CountSketch algorithm from "Finding Frequent Items in Data Streams", Moses Charikar, Kevin Chen and Martin Farach-Colton, 2002.
|
7
|
+
[Unique] Statistical estimators for the total number of unique data items.
|
8
|
+
[Quantile] Approximate N-tiles for data items from an ordered domain based on the following paper: Munro & Paterson, "Selection and Sorting with Limited Storage", Theoretical Computer Science, Vol 12, p 315-323, 1980.
|
9
|
+
|
10
|
+
== Example
|
11
|
+
require "szaru"
|
12
|
+
unq_est = SZaru::UniqueEstimator.new(10)
|
13
|
+
1000.times do |i|
|
14
|
+
unq_est.add_elem(i.to_s + "test")
|
15
|
+
end
|
16
|
+
puts unq_est.estimate # => 913
|
17
|
+
|
18
|
+
== License
|
19
|
+
|
20
|
+
Copyright 2010 Yuji Kaneda
|
21
|
+
|
22
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
23
|
+
you may not use this file except in compliance with the License.
|
24
|
+
You may obtain a copy of the License at
|
25
|
+
|
26
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
27
|
+
|
28
|
+
Unless required by applicable law or agreed to in writing, software
|
29
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
30
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
31
|
+
See the License for the specific language governing permissions and
|
32
|
+
limitations under the License.
|
33
|
+
|
data/sample/sample.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
begin
|
2
|
+
require "rubygems"
|
3
|
+
require "szaru"
|
4
|
+
rescue
|
5
|
+
puts "load local library"
|
6
|
+
require File.join(File.dirname(__FILE__), "../ext/szaru")
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_unique
|
10
|
+
ue = SZaru::UniqueEstimator.new(10)
|
11
|
+
1000.times do |i|
|
12
|
+
ue.add_elem(i.to_s + "test")
|
13
|
+
end
|
14
|
+
puts ue.estimate
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_top
|
18
|
+
te = SZaru::TopEstimator::Int32.new(10)
|
19
|
+
ary = []
|
20
|
+
100.times do |i|
|
21
|
+
i.times do |j|
|
22
|
+
ary << "test#{i}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
ary.sort_by{rand}.each do |e|
|
26
|
+
te.add_elem(e)
|
27
|
+
end
|
28
|
+
p te.estimate
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_top2
|
32
|
+
te = SZaru::TopEstimator::Double.new(10)
|
33
|
+
ary = []
|
34
|
+
100.times do |i|
|
35
|
+
ary << ["test#{i}", i]
|
36
|
+
end
|
37
|
+
ary.sort_by{rand}.each do |e, w|
|
38
|
+
te.add_weighted_elem(e, w.to_f)
|
39
|
+
end
|
40
|
+
p te.estimate
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_quantile
|
44
|
+
te = SZaru::QuantileEstimator::Int64.new(10)
|
45
|
+
ary = []
|
46
|
+
1000.times do |i|
|
47
|
+
# i.times do |j|
|
48
|
+
ary << i
|
49
|
+
end
|
50
|
+
rand_ary = ary.sort_by{rand}
|
51
|
+
# $stdout.sync = true
|
52
|
+
# puts "start"
|
53
|
+
rand_ary.each_with_index do |e, i|
|
54
|
+
te.add_elem(e)
|
55
|
+
end
|
56
|
+
p te.estimate
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
# test_topheap
|
62
|
+
# test_sketch
|
63
|
+
test_unique
|
64
|
+
test_top
|
65
|
+
test_top2
|
66
|
+
test_quantile
|
67
|
+
|
68
|
+
|
data/szaru-doc.rb
ADDED
@@ -0,0 +1,189 @@
|
|
1
|
+
#--
|
2
|
+
# SZaru: Porting of excellent Sawzall aggregators.
|
3
|
+
#++
|
4
|
+
#:include:overview.rd
|
5
|
+
|
6
|
+
|
7
|
+
# SZaru namespace
|
8
|
+
module SZaru
|
9
|
+
# Statistical estimators for the total number of unique data items.
|
10
|
+
class UniqueEstimator
|
11
|
+
# Create a UniqueEstimator object.
|
12
|
+
#
|
13
|
+
# _max_elems_ is a tuning parameter.
|
14
|
+
# If _max_elems_ is bigger, the estimation becomes more accurate but consuming more memory.
|
15
|
+
def initialize(max_elems)
|
16
|
+
# (native code)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Add a new element to this entry.
|
20
|
+
# _element_ must be String object.
|
21
|
+
def add_elem(elem)
|
22
|
+
# (native code)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Return the stimation the number of unique entries.
|
26
|
+
def estimate
|
27
|
+
# (native code)
|
28
|
+
end
|
29
|
+
end # UniqueEstimator
|
30
|
+
|
31
|
+
# Statistical samplings that record the 'top N' data items.
|
32
|
+
module TopEstimator
|
33
|
+
# TopEstimator of that weight is int32
|
34
|
+
class Int32
|
35
|
+
# Create a TopEstimator::Int32 object.
|
36
|
+
#
|
37
|
+
# _top_elems_ is a number of top elements to be estimate.
|
38
|
+
def initialize(top_elems)
|
39
|
+
# (native code)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Add a new element to this entry.
|
43
|
+
# _element_ must be String object.
|
44
|
+
def add_elem(elem)
|
45
|
+
# (native code)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Add a new weighted element to this entry.
|
49
|
+
# _element_ must be String object.
|
50
|
+
# _weight_ msut be Fixnum object.
|
51
|
+
def add_weighted_elem(elem, weight)
|
52
|
+
# (native code)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Return a top elements with weight.
|
56
|
+
# Example: [["abc", 7], ["def", 3]]
|
57
|
+
def estimate
|
58
|
+
# (native code)
|
59
|
+
end
|
60
|
+
end # Int32
|
61
|
+
|
62
|
+
# TopEstimator of that weight is int64
|
63
|
+
class Int64
|
64
|
+
# Create a TopEstimator::Int64 object.
|
65
|
+
#
|
66
|
+
# _top_elems_ is a number of top elements to be estimate.
|
67
|
+
def initialize(top_elems)
|
68
|
+
# (native code)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Add a new element to this entry.
|
72
|
+
# _element_ must be String object.
|
73
|
+
def add_elem(elem)
|
74
|
+
# (native code)
|
75
|
+
end
|
76
|
+
|
77
|
+
# Add a new weighted element to this entry.
|
78
|
+
# _element_ must be String object.
|
79
|
+
# _weight_ msut be Fixnum object.
|
80
|
+
def add_weighted_elem(elem, weight)
|
81
|
+
# (native code)
|
82
|
+
end
|
83
|
+
|
84
|
+
# Return a top elements with weight.
|
85
|
+
# Example: [["abc", 7], ["def", 3]]
|
86
|
+
def estimate
|
87
|
+
# (native code)
|
88
|
+
end
|
89
|
+
end # Int64
|
90
|
+
|
91
|
+
# TopEstimator of that weight is Double
|
92
|
+
class Double
|
93
|
+
# Create a TopEstimator::Double object.
|
94
|
+
#
|
95
|
+
# _top_elems_ is a number of top elements to be estimate.
|
96
|
+
def initialize(top_elems)
|
97
|
+
# (native code)
|
98
|
+
end
|
99
|
+
|
100
|
+
# Add a new element to this entry.
|
101
|
+
# _element_ must be String object.
|
102
|
+
def add_elem(elem)
|
103
|
+
# (native code)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Add a new weighted element to this entry.
|
107
|
+
# _element_ must be String object.
|
108
|
+
# _weight_ msut be Float object.
|
109
|
+
def add_weighted_elem(elem, weight)
|
110
|
+
# (native code)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Return a top elements with weight.
|
114
|
+
# Example: [["abc", 7.0], ["def", 3.0]]
|
115
|
+
def estimate
|
116
|
+
# (native code)
|
117
|
+
end
|
118
|
+
end # Double
|
119
|
+
end # TopEstimator
|
120
|
+
|
121
|
+
# Approximate N-tiles for data items from an ordered domain.
|
122
|
+
module QuantileEstimator
|
123
|
+
# TopEstimator of that element is int32
|
124
|
+
class Int32
|
125
|
+
# Create a QuantileEstimator::Int32 object.
|
126
|
+
#
|
127
|
+
# _num_quantiles_ is a number of tiles to be estimate.
|
128
|
+
def initialize(num_quantiles)
|
129
|
+
# (native code)
|
130
|
+
end
|
131
|
+
|
132
|
+
# Add a new element to this entry.
|
133
|
+
# _element_ must be Fixnum object.
|
134
|
+
def add_elem(elem)
|
135
|
+
# (native code)
|
136
|
+
end
|
137
|
+
|
138
|
+
# Return a estimated N tiles.
|
139
|
+
# Example: [0, 3, 7, 9]
|
140
|
+
def estimate()
|
141
|
+
# (native code)
|
142
|
+
end
|
143
|
+
end # Int32
|
144
|
+
|
145
|
+
# TopEstimator of that element is int64
|
146
|
+
class Int64
|
147
|
+
# Create a QuantileEstimator::Int64 object.
|
148
|
+
#
|
149
|
+
# _num_quantiles_ is a number of tiles to be estimate.
|
150
|
+
def initialize(num_quantiles)
|
151
|
+
# (native code)
|
152
|
+
end
|
153
|
+
|
154
|
+
# Add a new element to this entry.
|
155
|
+
# _element_ must be Fixnum object.
|
156
|
+
def add_elem(elem)
|
157
|
+
# (native code)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Return a estimated N tiles.
|
161
|
+
# Example: [0, 3, 7, 9]
|
162
|
+
def estimate()
|
163
|
+
# (native code)
|
164
|
+
end
|
165
|
+
end # Int64
|
166
|
+
|
167
|
+
# TopEstimator of that element is double
|
168
|
+
class Double
|
169
|
+
# Create a QuantileEstimator::Double object.
|
170
|
+
#
|
171
|
+
# _num_quantiles_ is a number of tiles to be estimate.
|
172
|
+
def initialize(num_quantiles)
|
173
|
+
# (native code)
|
174
|
+
end
|
175
|
+
|
176
|
+
# Add a new element to this entry.
|
177
|
+
# _element_ must be Fixnum object.
|
178
|
+
def add_elem(elem)
|
179
|
+
# (native code)
|
180
|
+
end
|
181
|
+
|
182
|
+
# Return a estimated N tiles.
|
183
|
+
# Example: [0.0, 3.2, 6.8, 9.5]
|
184
|
+
def estimate()
|
185
|
+
# (native code)
|
186
|
+
end
|
187
|
+
end # Double
|
188
|
+
end # QuantileEstimator
|
189
|
+
end # SZaru
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "../ext/szaru")
|
2
|
+
|
3
|
+
include SZaru
|
4
|
+
quantile_estimators = [QuantileEstimator::Int32, QuantileEstimator::Int64, QuantileEstimator::Double]
|
5
|
+
|
6
|
+
def convert_value_from_index(index, value)
|
7
|
+
case index
|
8
|
+
when 0, 1
|
9
|
+
value.to_i
|
10
|
+
when 2
|
11
|
+
value.to_f
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
quantile_estimators.each_with_index do |quantile_estimator, te_index|
|
16
|
+
describe quantile_estimator do
|
17
|
+
it "return [0] if no addition exists" do
|
18
|
+
te = quantile_estimator.new(10)
|
19
|
+
te.estimate.should == [0]
|
20
|
+
end
|
21
|
+
|
22
|
+
it "return [min, max] if quantile_elems is 0" do
|
23
|
+
te = quantile_estimator.new(0)
|
24
|
+
te.add_elem(convert_value_from_index(te_index, 10))
|
25
|
+
te.add_elem(convert_value_from_index(te_index, 7))
|
26
|
+
te.estimate.should == [7, 10]
|
27
|
+
end
|
28
|
+
|
29
|
+
it "return exact quantile when the number of elements is small than quantile_elems" do
|
30
|
+
te = quantile_estimator.new(10)
|
31
|
+
n_elemnts = 5
|
32
|
+
n_elemnts.times do |i|
|
33
|
+
te.add_elem(convert_value_from_index(te_index, i))
|
34
|
+
end
|
35
|
+
quantile_elements = te.estimate
|
36
|
+
expexcted_values = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4]
|
37
|
+
expexcted = expexcted_values.map{|value| convert_value_from_index(te_index, value)}
|
38
|
+
quantile_elements.should == expexcted
|
39
|
+
end
|
40
|
+
|
41
|
+
it "return same result when calling estimate twice" do
|
42
|
+
te = quantile_estimator.new(10)
|
43
|
+
n_elemnts = 5
|
44
|
+
n_elemnts.times do |i|
|
45
|
+
te.add_elem(convert_value_from_index(te_index, i))
|
46
|
+
end
|
47
|
+
# first call
|
48
|
+
quantile_elements = te.estimate
|
49
|
+
# second call
|
50
|
+
quantile_elements = te.estimate
|
51
|
+
expexcted_values = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4]
|
52
|
+
expexcted = expexcted_values.map{|value| convert_value_from_index(te_index, value)}
|
53
|
+
quantile_elements.should == expexcted
|
54
|
+
end
|
55
|
+
|
56
|
+
it "return approximate number when the number of elements is greater than quantile_elems" do
|
57
|
+
qe = quantile_estimator.new(11)
|
58
|
+
elems = (0 .. 1000).to_a.sort_by{rand}
|
59
|
+
elems.each do |elem|
|
60
|
+
qe.add_elem(convert_value_from_index(te_index, elem))
|
61
|
+
end
|
62
|
+
quantiles = qe.estimate
|
63
|
+
quantiles.length.should == 11
|
64
|
+
quantiles.each_with_index do |tile, index|
|
65
|
+
exact = index * 100
|
66
|
+
diff = (tile - exact).abs
|
67
|
+
diff.should < 10
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
data/test/top_spec.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "../ext/szaru")
|
2
|
+
|
3
|
+
include SZaru
|
4
|
+
top_estimators = [TopEstimator::Int32, TopEstimator::Int64, TopEstimator::Double]
|
5
|
+
|
6
|
+
def convert_value_from_index(index, value)
|
7
|
+
case index
|
8
|
+
when 0, 1
|
9
|
+
value.to_i
|
10
|
+
when 2
|
11
|
+
value.to_f
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
top_estimators.each_with_index do |top_estimator, te_index|
|
16
|
+
describe top_estimator do
|
17
|
+
it "return [] if no addition exists" do
|
18
|
+
te = top_estimator.new(10)
|
19
|
+
te.estimate.should == []
|
20
|
+
end
|
21
|
+
|
22
|
+
it "return [] if top_elems is 0" do
|
23
|
+
te = top_estimator.new(0)
|
24
|
+
te.add_elem("test")
|
25
|
+
te.estimate.should == []
|
26
|
+
end
|
27
|
+
|
28
|
+
it "return exact number when the number of elements is small than top_elems" do
|
29
|
+
te = top_estimator.new(10)
|
30
|
+
n_elemnts = 5
|
31
|
+
n_elemnts.times do |i|
|
32
|
+
te.add_elem("test#{i}")
|
33
|
+
te.add_weighted_elem("test#{i}", convert_value_from_index(te_index, i))
|
34
|
+
end
|
35
|
+
top_elements = te.estimate
|
36
|
+
top_elements.length.should == n_elemnts
|
37
|
+
n_elemnts.times do |i|
|
38
|
+
# check element
|
39
|
+
top_elements[i][0].should == "test#{n_elemnts - i - 1}"
|
40
|
+
# check weight
|
41
|
+
exact_weight = convert_value_from_index(te_index, n_elemnts - i)
|
42
|
+
top_elements[i][1].should == exact_weight
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
it "return same result when calling estimate twice" do
|
47
|
+
te = top_estimator.new(10)
|
48
|
+
n_elemnts = 5
|
49
|
+
n_elemnts.times do |i|
|
50
|
+
te.add_elem("test#{i}")
|
51
|
+
te.add_weighted_elem("test#{i}", convert_value_from_index(te_index, i))
|
52
|
+
end
|
53
|
+
# first call
|
54
|
+
top_elements = te.estimate
|
55
|
+
# second call
|
56
|
+
top_elements = te.estimate
|
57
|
+
top_elements.length.should == n_elemnts
|
58
|
+
n_elemnts.times do |i|
|
59
|
+
# check element
|
60
|
+
top_elements[i][0].should == "test#{n_elemnts - i - 1}"
|
61
|
+
# check weight
|
62
|
+
exact_weight = convert_value_from_index(te_index, n_elemnts - i)
|
63
|
+
top_elements[i][1].should == exact_weight
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
it "return approximate number when the number of elements is greater than top_elems" do
|
68
|
+
te = top_estimator.new(10)
|
69
|
+
n_large_elemnts = 30
|
70
|
+
n_small_elemnts = 1000
|
71
|
+
# create input stream
|
72
|
+
elems = []
|
73
|
+
# large element x_i occurs x_i^2 times.
|
74
|
+
n_large_elemnts.times do |i|
|
75
|
+
(i * i).times do
|
76
|
+
elems << i
|
77
|
+
end
|
78
|
+
end
|
79
|
+
# small element y_i occurs less than 5 times.
|
80
|
+
n_small_elemnts.times do |i|
|
81
|
+
rand(5).times do
|
82
|
+
elems << i
|
83
|
+
end
|
84
|
+
end
|
85
|
+
# run input stream 2 times in random oreder
|
86
|
+
2.times do
|
87
|
+
elems.sort_by{ rand }.each do |j|
|
88
|
+
te.add_elem("test#{j}")
|
89
|
+
end
|
90
|
+
end
|
91
|
+
# check estimation
|
92
|
+
top_elements = te.estimate
|
93
|
+
top_elements.length.should == 10
|
94
|
+
10.times do |i|
|
95
|
+
exact_index = n_large_elemnts - i - 1
|
96
|
+
# check element
|
97
|
+
top_elements[i][0] =~ /test(\d*)/
|
98
|
+
estimated_index = $1.to_i
|
99
|
+
diff = (exact_index - estimated_index).abs
|
100
|
+
diff.should < 3
|
101
|
+
# check weight
|
102
|
+
exact_weight = convert_value_from_index(te_index, 2 * exact_index * exact_index)
|
103
|
+
diff = top_elements[i][1] - exact_weight
|
104
|
+
error = (diff / exact_weight.to_f).abs
|
105
|
+
error.should < 0.1
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
110
|
+
end
|
data/test/unique_spec.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "../ext/szaru")
|
2
|
+
|
3
|
+
include SZaru
|
4
|
+
describe UniqueEstimator do
|
5
|
+
it "return 0 if no addition exists" do
|
6
|
+
ue = UniqueEstimator.new(10)
|
7
|
+
ue.estimate.should == 0
|
8
|
+
end
|
9
|
+
|
10
|
+
it "return 0 if max_elems is 0" do
|
11
|
+
ue = UniqueEstimator.new(0)
|
12
|
+
ue.add_elem("test")
|
13
|
+
ue.estimate.should == 0
|
14
|
+
end
|
15
|
+
|
16
|
+
it "return exact number when the number of elements is small than max_elems" do
|
17
|
+
ue = UniqueEstimator.new(10)
|
18
|
+
5.times do |i|
|
19
|
+
ue.add_elem("test#{i}")
|
20
|
+
end
|
21
|
+
ue.estimate.should == 5
|
22
|
+
end
|
23
|
+
|
24
|
+
it "return approximate number when the number of elements is greater than max_elems" do
|
25
|
+
ue = UniqueEstimator.new(10)
|
26
|
+
n_unique = 997
|
27
|
+
elems = Array.new(n_unique){|i| ("test#{i}") }
|
28
|
+
# add elems to ue 2 times in random oreder
|
29
|
+
2.times do
|
30
|
+
elems.sort_by{ rand }.each do |elm|
|
31
|
+
ue.add_elem(elm)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
diff = ue.estimate - n_unique
|
35
|
+
error_rate = ( diff / n_unique.to_f).abs
|
36
|
+
error_rate.should < 0.1
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
metadata
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: szaru
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 25
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 1
|
10
|
+
version: 0.1.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Yuji Kaneda
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-11-13 00:00:00 +09:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: |
|
23
|
+
Portings of excellent Sawzall aggregators.
|
24
|
+
|
25
|
+
email: llamerada@gmail.com
|
26
|
+
executables: []
|
27
|
+
|
28
|
+
extensions:
|
29
|
+
- ext/extconf.rb
|
30
|
+
extra_rdoc_files: []
|
31
|
+
|
32
|
+
files:
|
33
|
+
- ext/rb_szaru.cc
|
34
|
+
- ext/converter.h
|
35
|
+
- ext/szaru.so
|
36
|
+
- ext/Makefile
|
37
|
+
- ext/extconf.rb
|
38
|
+
- ext/rb_szaru.o
|
39
|
+
- test/quantile_spec.rb
|
40
|
+
- test/top_spec.rb
|
41
|
+
- test/unique_spec.rb
|
42
|
+
- sample/sample.rb
|
43
|
+
- overview.rd
|
44
|
+
- szaru-doc.rb
|
45
|
+
has_rdoc: true
|
46
|
+
homepage: http://llamerada.github.com/SZaru/
|
47
|
+
licenses: []
|
48
|
+
|
49
|
+
post_install_message:
|
50
|
+
rdoc_options:
|
51
|
+
- szaru-doc.rb
|
52
|
+
require_paths:
|
53
|
+
- lib
|
54
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
55
|
+
none: false
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
hash: 3
|
60
|
+
segments:
|
61
|
+
- 0
|
62
|
+
version: "0"
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
none: false
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
hash: 3
|
69
|
+
segments:
|
70
|
+
- 0
|
71
|
+
version: "0"
|
72
|
+
requirements: []
|
73
|
+
|
74
|
+
rubyforge_project:
|
75
|
+
rubygems_version: 1.3.7
|
76
|
+
signing_key:
|
77
|
+
specification_version: 3
|
78
|
+
summary: Portings of excellent Sawzall aggregators
|
79
|
+
test_files:
|
80
|
+
- test/quantile_spec.rb
|
81
|
+
- test/top_spec.rb
|
82
|
+
- test/unique_spec.rb
|