szaru 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/Makefile +157 -0
- data/ext/converter.h +69 -0
- data/ext/extconf.rb +5 -0
- data/ext/rb_szaru.cc +268 -0
- data/ext/rb_szaru.o +0 -0
- data/ext/szaru.so +0 -0
- data/overview.rd +33 -0
- data/sample/sample.rb +68 -0
- data/szaru-doc.rb +189 -0
- data/test/quantile_spec.rb +72 -0
- data/test/top_spec.rb +110 -0
- data/test/unique_spec.rb +39 -0
- metadata +82 -0
data/ext/Makefile
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
|
2
|
+
SHELL = /bin/sh
|
3
|
+
|
4
|
+
#### Start of system configuration section. ####
|
5
|
+
|
6
|
+
srcdir = .
|
7
|
+
topdir = /usr/local/lib/ruby/1.8/i686-linux
|
8
|
+
hdrdir = $(topdir)
|
9
|
+
VPATH = $(srcdir):$(topdir):$(hdrdir)
|
10
|
+
exec_prefix = $(prefix)
|
11
|
+
prefix = $(DESTDIR)/usr/local
|
12
|
+
sharedstatedir = $(prefix)/com
|
13
|
+
mandir = $(datarootdir)/man
|
14
|
+
psdir = $(docdir)
|
15
|
+
oldincludedir = $(DESTDIR)/usr/include
|
16
|
+
localedir = $(datarootdir)/locale
|
17
|
+
bindir = $(exec_prefix)/bin
|
18
|
+
libexecdir = $(exec_prefix)/libexec
|
19
|
+
sitedir = $(libdir)/ruby/site_ruby
|
20
|
+
htmldir = $(docdir)
|
21
|
+
vendorarchdir = $(vendorlibdir)/$(sitearch)
|
22
|
+
includedir = $(prefix)/include
|
23
|
+
infodir = $(datarootdir)/info
|
24
|
+
vendorlibdir = $(vendordir)/$(ruby_version)
|
25
|
+
sysconfdir = $(prefix)/etc
|
26
|
+
libdir = $(exec_prefix)/lib
|
27
|
+
sbindir = $(exec_prefix)/sbin
|
28
|
+
rubylibdir = $(libdir)/ruby/$(ruby_version)
|
29
|
+
docdir = $(datarootdir)/doc/$(PACKAGE)
|
30
|
+
dvidir = $(docdir)
|
31
|
+
vendordir = $(libdir)/ruby/vendor_ruby
|
32
|
+
datarootdir = $(prefix)/share
|
33
|
+
pdfdir = $(docdir)
|
34
|
+
archdir = $(rubylibdir)/$(arch)
|
35
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
36
|
+
datadir = $(datarootdir)
|
37
|
+
localstatedir = $(prefix)/var
|
38
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
39
|
+
|
40
|
+
CC = gcc
|
41
|
+
LIBRUBY = $(LIBRUBY_A)
|
42
|
+
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
43
|
+
LIBRUBYARG_SHARED = -Wl,-R -Wl,$(libdir) -L$(libdir)
|
44
|
+
LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
|
45
|
+
|
46
|
+
RUBY_EXTCONF_H =
|
47
|
+
CFLAGS = -fPIC -g -O2 $(cflags)
|
48
|
+
INCFLAGS = -I. -I. -I/usr/local/lib/ruby/1.8/i686-linux -I.
|
49
|
+
DEFS = -D_FILE_OFFSET_BITS=64
|
50
|
+
CPPFLAGS = -D_FILE_OFFSET_BITS=64
|
51
|
+
CXXFLAGS = $(CFLAGS)
|
52
|
+
ldflags = -L. -rdynamic -Wl,-export-dynamic
|
53
|
+
dldflags =
|
54
|
+
archflag =
|
55
|
+
DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
|
56
|
+
LDSHARED = $(CC) -shared
|
57
|
+
AR = ar
|
58
|
+
EXEEXT =
|
59
|
+
|
60
|
+
RUBY_INSTALL_NAME = ruby
|
61
|
+
RUBY_SO_NAME = ruby
|
62
|
+
arch = i686-linux
|
63
|
+
sitearch = i686-linux
|
64
|
+
ruby_version = 1.8
|
65
|
+
ruby = /usr/local/bin/ruby
|
66
|
+
RUBY = $(ruby)
|
67
|
+
RM = rm -f
|
68
|
+
MAKEDIRS = mkdir -p
|
69
|
+
INSTALL = /usr/bin/install -c
|
70
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
71
|
+
INSTALL_DATA = $(INSTALL) -m 644
|
72
|
+
COPY = cp
|
73
|
+
|
74
|
+
#### End of system configuration section. ####
|
75
|
+
|
76
|
+
preload =
|
77
|
+
|
78
|
+
libpath = . $(libdir)
|
79
|
+
LIBPATH = -L. -L$(libdir) -Wl,-R$(libdir)
|
80
|
+
DEFFILE =
|
81
|
+
|
82
|
+
CLEANFILES = mkmf.log
|
83
|
+
DISTCLEANFILES =
|
84
|
+
|
85
|
+
extout =
|
86
|
+
extout_prefix =
|
87
|
+
target_prefix =
|
88
|
+
LOCAL_LIBS =
|
89
|
+
LIBS = -lszaru -lrt -ldl -lcrypt -lm -lc
|
90
|
+
SRCS = rb_szaru.cc
|
91
|
+
OBJS = rb_szaru.o
|
92
|
+
TARGET = szaru
|
93
|
+
DLLIB = $(TARGET).so
|
94
|
+
EXTSTATIC =
|
95
|
+
STATIC_LIB =
|
96
|
+
|
97
|
+
BINDIR = $(bindir)
|
98
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
99
|
+
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
100
|
+
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
101
|
+
|
102
|
+
TARGET_SO = $(DLLIB)
|
103
|
+
CLEANLIBS = $(TARGET).so $(TARGET).il? $(TARGET).tds $(TARGET).map
|
104
|
+
CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
|
105
|
+
|
106
|
+
all: $(DLLIB)
|
107
|
+
static: $(STATIC_LIB)
|
108
|
+
|
109
|
+
clean:
|
110
|
+
@-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
|
111
|
+
|
112
|
+
distclean: clean
|
113
|
+
@-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
114
|
+
@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
115
|
+
|
116
|
+
realclean: distclean
|
117
|
+
install: install-so install-rb
|
118
|
+
|
119
|
+
install-so: $(RUBYARCHDIR)
|
120
|
+
install-so: $(RUBYARCHDIR)/$(DLLIB)
|
121
|
+
$(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
|
122
|
+
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
123
|
+
install-rb: pre-install-rb install-rb-default
|
124
|
+
install-rb-default: pre-install-rb-default
|
125
|
+
pre-install-rb: Makefile
|
126
|
+
pre-install-rb-default: Makefile
|
127
|
+
$(RUBYARCHDIR):
|
128
|
+
$(MAKEDIRS) $@
|
129
|
+
|
130
|
+
site-install: site-install-so site-install-rb
|
131
|
+
site-install-so: install-so
|
132
|
+
site-install-rb: install-rb
|
133
|
+
|
134
|
+
.SUFFIXES: .c .m .cc .cxx .cpp .C .o
|
135
|
+
|
136
|
+
.cc.o:
|
137
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
138
|
+
|
139
|
+
.cxx.o:
|
140
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
141
|
+
|
142
|
+
.cpp.o:
|
143
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
144
|
+
|
145
|
+
.C.o:
|
146
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
147
|
+
|
148
|
+
.c.o:
|
149
|
+
$(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
|
150
|
+
|
151
|
+
$(DLLIB): $(OBJS) Makefile
|
152
|
+
@-$(RM) $@
|
153
|
+
$(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
$(OBJS): ruby.h defines.h
|
data/ext/converter.h
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
// Copyright 2010 Yuji Kaneda
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
// ------------------------------------------------------------------------
|
15
|
+
|
16
|
+
// specialization of Conveter only for int32_t, int64_t, double
|
17
|
+
template< typename Value >
|
18
|
+
class Converter {
|
19
|
+
public:
|
20
|
+
static inline VALUE ToRuby(Value value);
|
21
|
+
static inline Value FromRuby(VALUE value);
|
22
|
+
static inline void CheckType(VALUE value);
|
23
|
+
};
|
24
|
+
|
25
|
+
template<> inline
|
26
|
+
VALUE Converter<int32_t>::ToRuby(int32_t value){
|
27
|
+
return INT2FIX(value);
|
28
|
+
}
|
29
|
+
|
30
|
+
template<> inline
|
31
|
+
VALUE Converter<int64_t>::ToRuby(int64_t value){
|
32
|
+
return LONG2FIX(value);
|
33
|
+
}
|
34
|
+
|
35
|
+
template<> inline
|
36
|
+
VALUE Converter<double>::ToRuby(double value){
|
37
|
+
return rb_float_new(value);
|
38
|
+
}
|
39
|
+
|
40
|
+
template<> inline
|
41
|
+
int32_t Converter<int32_t>::FromRuby(VALUE value){
|
42
|
+
return FIX2INT(value);
|
43
|
+
}
|
44
|
+
|
45
|
+
template<> inline
|
46
|
+
int64_t Converter<int64_t>::FromRuby(VALUE value){
|
47
|
+
return FIX2LONG(value);
|
48
|
+
}
|
49
|
+
|
50
|
+
template<> inline
|
51
|
+
double Converter<double>::FromRuby(VALUE value){
|
52
|
+
return RFLOAT(value)->value;
|
53
|
+
}
|
54
|
+
|
55
|
+
template<> inline
|
56
|
+
void Converter<int32_t>::CheckType(VALUE value){
|
57
|
+
Check_Type(value, T_FIXNUM);
|
58
|
+
}
|
59
|
+
|
60
|
+
template<> inline
|
61
|
+
void Converter<int64_t>::CheckType(VALUE value){
|
62
|
+
Check_Type(value, T_FIXNUM);
|
63
|
+
}
|
64
|
+
|
65
|
+
template<> inline
|
66
|
+
void Converter<double>::CheckType(VALUE value){
|
67
|
+
Check_Type(value, T_FLOAT);
|
68
|
+
}
|
69
|
+
|
data/ext/extconf.rb
ADDED
data/ext/rb_szaru.cc
ADDED
@@ -0,0 +1,268 @@
|
|
1
|
+
// Copyright 2010 Yuji Kaneda
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
// ------------------------------------------------------------------------
|
15
|
+
|
16
|
+
// Ruby Binding of SZaru
|
17
|
+
|
18
|
+
#include <ruby.h>
|
19
|
+
#include <szaru.h>
|
20
|
+
// local include file
|
21
|
+
#include "converter.h"
|
22
|
+
|
23
|
+
extern "C" {
|
24
|
+
void Init_szaru(void);
|
25
|
+
}
|
26
|
+
|
27
|
+
namespace {
|
28
|
+
|
29
|
+
template<typename Value>
|
30
|
+
class RubyQuantileEstimator {
|
31
|
+
private:
|
32
|
+
static void
|
33
|
+
Free(SZaru::QuantileEstimator<Value> **ptr)
|
34
|
+
{
|
35
|
+
if (*ptr) {
|
36
|
+
delete *ptr;
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
static VALUE
|
41
|
+
Alloc(VALUE klass)
|
42
|
+
{
|
43
|
+
SZaru::QuantileEstimator<Value> **ptr = ALLOC(SZaru::QuantileEstimator<Value>*);
|
44
|
+
*ptr = NULL;
|
45
|
+
return Data_Wrap_Struct(klass, 0, Free, ptr);
|
46
|
+
}
|
47
|
+
|
48
|
+
static VALUE
|
49
|
+
Initialize(VALUE self, VALUE maxElems)
|
50
|
+
{
|
51
|
+
SZaru::QuantileEstimator<Value> **ptr;
|
52
|
+
Data_Get_Struct(self, SZaru::QuantileEstimator<Value>*, ptr);
|
53
|
+
*ptr = SZaru::QuantileEstimator<Value>::Create(NUM2LONG(maxElems));
|
54
|
+
return Qnil;
|
55
|
+
}
|
56
|
+
|
57
|
+
static VALUE
|
58
|
+
AddElem(VALUE self, VALUE elem)
|
59
|
+
{
|
60
|
+
SZaru::QuantileEstimator<Value> **qe;
|
61
|
+
Converter<Value>::CheckType(elem);
|
62
|
+
Data_Get_Struct(self, SZaru::QuantileEstimator<Value>*, qe);
|
63
|
+
(*qe)->AddElem(Converter<Value>::FromRuby(elem));
|
64
|
+
return Qnil;
|
65
|
+
}
|
66
|
+
|
67
|
+
static VALUE
|
68
|
+
Estimate(VALUE self)
|
69
|
+
{
|
70
|
+
SZaru::QuantileEstimator<Value> **qe;
|
71
|
+
Data_Get_Struct(self, SZaru::QuantileEstimator<Value>*, qe);
|
72
|
+
std::vector<Value> quantiles;
|
73
|
+
(*qe)->Estimate(quantiles);
|
74
|
+
VALUE ary = rb_ary_new2(quantiles.size());
|
75
|
+
for (int i = 0; i < quantiles.size(); i++) {
|
76
|
+
rb_ary_push(ary, Converter<Value>::ToRuby(quantiles[i]));
|
77
|
+
}
|
78
|
+
return ary;
|
79
|
+
}
|
80
|
+
|
81
|
+
public:
|
82
|
+
|
83
|
+
static VALUE
|
84
|
+
Define(VALUE superModule, const char *name)
|
85
|
+
{
|
86
|
+
VALUE cQuantileEstimator = rb_define_class_under(superModule, name, rb_cObject);
|
87
|
+
rb_define_alloc_func(cQuantileEstimator, Alloc);
|
88
|
+
rb_define_private_method(cQuantileEstimator, "initialize",
|
89
|
+
RUBY_METHOD_FUNC(Initialize), 1);
|
90
|
+
rb_define_method(cQuantileEstimator, "add_elem",
|
91
|
+
RUBY_METHOD_FUNC(AddElem), 1);
|
92
|
+
rb_define_method(cQuantileEstimator, "estimate",
|
93
|
+
RUBY_METHOD_FUNC(Estimate), 0);
|
94
|
+
return cQuantileEstimator;
|
95
|
+
}
|
96
|
+
};
|
97
|
+
|
98
|
+
|
99
|
+
template< typename Value >
|
100
|
+
class RubyTopEstimator {
|
101
|
+
private:
|
102
|
+
|
103
|
+
static void
|
104
|
+
Free(SZaru::TopEstimator<Value> **ptr)
|
105
|
+
{
|
106
|
+
if (*ptr) {
|
107
|
+
delete *ptr;
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
111
|
+
static VALUE
|
112
|
+
Alloc(VALUE klass)
|
113
|
+
{
|
114
|
+
SZaru::TopEstimator<Value> **ptr = ALLOC(SZaru::TopEstimator<Value>*);
|
115
|
+
*ptr = NULL;
|
116
|
+
return Data_Wrap_Struct(klass, 0, Free, ptr);
|
117
|
+
}
|
118
|
+
|
119
|
+
static VALUE
|
120
|
+
Initialize(VALUE self, VALUE maxElems)
|
121
|
+
{
|
122
|
+
SZaru::TopEstimator<Value> **ptr;
|
123
|
+
Check_Type(maxElems, T_FIXNUM);
|
124
|
+
Data_Get_Struct(self, SZaru::TopEstimator<Value>*, ptr);
|
125
|
+
*ptr = SZaru::TopEstimator<Value>::Create(NUM2LONG(maxElems));
|
126
|
+
return Qnil;
|
127
|
+
}
|
128
|
+
|
129
|
+
static VALUE
|
130
|
+
AddElem(VALUE self, VALUE elem)
|
131
|
+
{
|
132
|
+
SZaru::TopEstimator<Value> **te;
|
133
|
+
Check_Type(elem, T_STRING);
|
134
|
+
Data_Get_Struct(self, SZaru::TopEstimator<Value>*, te);
|
135
|
+
(*te)->AddElem(std::string(RSTRING_PTR(elem), RSTRING_LEN(elem)));
|
136
|
+
return Qnil;
|
137
|
+
}
|
138
|
+
|
139
|
+
static VALUE
|
140
|
+
AddWeightedElem(VALUE self, VALUE elem, VALUE weight)
|
141
|
+
{
|
142
|
+
SZaru::TopEstimator<Value> **te;
|
143
|
+
Check_Type(elem, T_STRING);
|
144
|
+
Converter<Value>::CheckType(weight);
|
145
|
+
Data_Get_Struct(self, SZaru::TopEstimator<Value>*, te);
|
146
|
+
(*te)->AddWeightedElem(std::string(RSTRING_PTR(elem), RSTRING_LEN(elem)),
|
147
|
+
Converter<Value>::FromRuby(weight));
|
148
|
+
return Qnil;
|
149
|
+
}
|
150
|
+
|
151
|
+
static VALUE
|
152
|
+
Estimate(VALUE self)
|
153
|
+
{
|
154
|
+
SZaru::TopEstimator<Value> **te;
|
155
|
+
Data_Get_Struct(self, SZaru::TopEstimator<Value>*, te);
|
156
|
+
std::vector<typename SZaru::TopEstimator<Value>::Elem> topElems;
|
157
|
+
(*te)->Estimate(topElems);
|
158
|
+
VALUE ary = rb_ary_new2(topElems.size());
|
159
|
+
for (int i = 0; i < topElems.size(); i++) {
|
160
|
+
rb_ary_push(ary, rb_ary_new3(2,
|
161
|
+
rb_str_new(topElems[i].value.c_str(), topElems[i].value.size()),
|
162
|
+
Converter<Value>::ToRuby(topElems[i].weight)));
|
163
|
+
}
|
164
|
+
return ary;
|
165
|
+
}
|
166
|
+
|
167
|
+
public:
|
168
|
+
static VALUE
|
169
|
+
Define(VALUE superModule, const char *name)
|
170
|
+
{
|
171
|
+
VALUE cTopEstimator = rb_define_class_under(superModule, name, rb_cObject);
|
172
|
+
rb_define_alloc_func(cTopEstimator, Alloc);
|
173
|
+
rb_define_private_method(cTopEstimator, "initialize",
|
174
|
+
RUBY_METHOD_FUNC(Initialize), 1);
|
175
|
+
rb_define_method(cTopEstimator, "add_elem",
|
176
|
+
RUBY_METHOD_FUNC(AddElem), 1);
|
177
|
+
rb_define_method(cTopEstimator, "add_weighted_elem",
|
178
|
+
RUBY_METHOD_FUNC(AddWeightedElem), 2);
|
179
|
+
rb_define_method(cTopEstimator, "estimate",
|
180
|
+
RUBY_METHOD_FUNC(Estimate), 0);
|
181
|
+
return cTopEstimator;
|
182
|
+
}
|
183
|
+
|
184
|
+
};
|
185
|
+
|
186
|
+
|
187
|
+
class RubyUniqueEstimator {
|
188
|
+
private:
|
189
|
+
static void
|
190
|
+
Free(SZaru::UniqueEstimator **ptr)
|
191
|
+
{
|
192
|
+
if (*ptr) {
|
193
|
+
delete *ptr;
|
194
|
+
}
|
195
|
+
}
|
196
|
+
|
197
|
+
static VALUE
|
198
|
+
Alloc(VALUE klass)
|
199
|
+
{
|
200
|
+
SZaru::UniqueEstimator **ptr = ALLOC(SZaru::UniqueEstimator*);
|
201
|
+
*ptr = NULL;
|
202
|
+
return Data_Wrap_Struct(klass, 0, Free, ptr);
|
203
|
+
}
|
204
|
+
|
205
|
+
static VALUE
|
206
|
+
Initialize(VALUE self, VALUE maxElems)
|
207
|
+
{
|
208
|
+
SZaru::UniqueEstimator **ptr;
|
209
|
+
Data_Get_Struct(self, SZaru::UniqueEstimator*, ptr);
|
210
|
+
*ptr = SZaru::UniqueEstimator::Create(NUM2LONG(maxElems));
|
211
|
+
return Qnil;
|
212
|
+
}
|
213
|
+
|
214
|
+
static VALUE
|
215
|
+
AddElem(VALUE self, VALUE elem)
|
216
|
+
{
|
217
|
+
SZaru::UniqueEstimator **ue;
|
218
|
+
Check_Type(elem, T_STRING);
|
219
|
+
Data_Get_Struct(self, SZaru::UniqueEstimator*, ue);
|
220
|
+
(*ue)->AddElemInCIF(RSTRING_PTR(elem), RSTRING_LEN(elem));
|
221
|
+
return Qnil;
|
222
|
+
}
|
223
|
+
|
224
|
+
static VALUE
|
225
|
+
Estimate(VALUE self)
|
226
|
+
{
|
227
|
+
SZaru::UniqueEstimator **ue;
|
228
|
+
Data_Get_Struct(self, SZaru::UniqueEstimator*, ue);
|
229
|
+
uint64_t unique = (*ue)->Estimate();
|
230
|
+
return LONG2NUM(unique);
|
231
|
+
}
|
232
|
+
|
233
|
+
public:
|
234
|
+
static VALUE
|
235
|
+
Define(VALUE superModule, const char *name) {
|
236
|
+
VALUE cUniqueEstimator = rb_define_class_under(superModule, name, rb_cObject);
|
237
|
+
rb_define_alloc_func(cUniqueEstimator, Alloc);
|
238
|
+
rb_define_private_method(cUniqueEstimator, "initialize",
|
239
|
+
RUBY_METHOD_FUNC(Initialize), 1);
|
240
|
+
rb_define_method(cUniqueEstimator, "add_elem",
|
241
|
+
RUBY_METHOD_FUNC(AddElem), 1);
|
242
|
+
rb_define_method(cUniqueEstimator, "estimate",
|
243
|
+
RUBY_METHOD_FUNC(Estimate), 0);
|
244
|
+
return cUniqueEstimator;
|
245
|
+
}
|
246
|
+
};
|
247
|
+
|
248
|
+
}
|
249
|
+
|
250
|
+
|
251
|
+
void
|
252
|
+
Init_szaru(void){
|
253
|
+
VALUE mSZaru = rb_define_module("SZaru");
|
254
|
+
RubyUniqueEstimator::Define(mSZaru, "UniqueEstimator");
|
255
|
+
|
256
|
+
// TopEstimator
|
257
|
+
VALUE mTopEstimator = rb_define_module_under(mSZaru, "TopEstimator");
|
258
|
+
RubyTopEstimator<double>::Define(mTopEstimator, "Double");
|
259
|
+
RubyTopEstimator<int32_t>::Define(mTopEstimator, "Int32");
|
260
|
+
RubyTopEstimator<int64_t>::Define(mTopEstimator, "Int64");
|
261
|
+
|
262
|
+
// QuantileEstimator
|
263
|
+
VALUE mQuantileEstimator = rb_define_module_under(mSZaru, "QuantileEstimator");
|
264
|
+
RubyQuantileEstimator<double>::Define(mQuantileEstimator, "Double");
|
265
|
+
RubyQuantileEstimator<int32_t>::Define(mQuantileEstimator, "Int32");
|
266
|
+
RubyQuantileEstimator<int64_t>::Define(mQuantileEstimator, "Int64");
|
267
|
+
|
268
|
+
}
|
data/ext/rb_szaru.o
ADDED
Binary file
|
data/ext/szaru.so
ADDED
Binary file
|
data/overview.rd
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
= Ruby Binding of SZaru
|
2
|
+
|
3
|
+
== Introduction
|
4
|
+
{SZaru}[http://llamerada.github.com/SZaru/] is a library to use {Sawzall}[http://code.google.com/p/szl/] aggregators in pure C++, Ruby and Python.
|
5
|
+
Currently, I have implemented the following 3 aggregators:
|
6
|
+
[Top] Statistical samplings that record the 'top N' data items based on CountSketch algorithm from "Finding Frequent Items in Data Streams", Moses Charikar, Kevin Chen and Martin Farach-Colton, 2002.
|
7
|
+
[Unique] Statistical estimators for the total number of unique data items.
|
8
|
+
[Quantile] Approximate N-tiles for data items from an ordered domain based on the following paper: Munro & Paterson, "Selection and Sorting with Limited Storage", Theoretical Computer Science, Vol 12, p 315-323, 1980.
|
9
|
+
|
10
|
+
== Example
|
11
|
+
require "szaru"
|
12
|
+
unq_est = SZaru::UniqueEstimator.new(10)
|
13
|
+
1000.times do |i|
|
14
|
+
unq_est.add_elem(i.to_s + "test")
|
15
|
+
end
|
16
|
+
puts unq_est.estimate # => 913
|
17
|
+
|
18
|
+
== License
|
19
|
+
|
20
|
+
Copyright 2010 Yuji Kaneda
|
21
|
+
|
22
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
23
|
+
you may not use this file except in compliance with the License.
|
24
|
+
You may obtain a copy of the License at
|
25
|
+
|
26
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
27
|
+
|
28
|
+
Unless required by applicable law or agreed to in writing, software
|
29
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
30
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
31
|
+
See the License for the specific language governing permissions and
|
32
|
+
limitations under the License.
|
33
|
+
|
data/sample/sample.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
begin
|
2
|
+
require "rubygems"
|
3
|
+
require "szaru"
|
4
|
+
rescue
|
5
|
+
puts "load local library"
|
6
|
+
require File.join(File.dirname(__FILE__), "../ext/szaru")
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_unique
|
10
|
+
ue = SZaru::UniqueEstimator.new(10)
|
11
|
+
1000.times do |i|
|
12
|
+
ue.add_elem(i.to_s + "test")
|
13
|
+
end
|
14
|
+
puts ue.estimate
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_top
|
18
|
+
te = SZaru::TopEstimator::Int32.new(10)
|
19
|
+
ary = []
|
20
|
+
100.times do |i|
|
21
|
+
i.times do |j|
|
22
|
+
ary << "test#{i}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
ary.sort_by{rand}.each do |e|
|
26
|
+
te.add_elem(e)
|
27
|
+
end
|
28
|
+
p te.estimate
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_top2
|
32
|
+
te = SZaru::TopEstimator::Double.new(10)
|
33
|
+
ary = []
|
34
|
+
100.times do |i|
|
35
|
+
ary << ["test#{i}", i]
|
36
|
+
end
|
37
|
+
ary.sort_by{rand}.each do |e, w|
|
38
|
+
te.add_weighted_elem(e, w.to_f)
|
39
|
+
end
|
40
|
+
p te.estimate
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_quantile
|
44
|
+
te = SZaru::QuantileEstimator::Int64.new(10)
|
45
|
+
ary = []
|
46
|
+
1000.times do |i|
|
47
|
+
# i.times do |j|
|
48
|
+
ary << i
|
49
|
+
end
|
50
|
+
rand_ary = ary.sort_by{rand}
|
51
|
+
# $stdout.sync = true
|
52
|
+
# puts "start"
|
53
|
+
rand_ary.each_with_index do |e, i|
|
54
|
+
te.add_elem(e)
|
55
|
+
end
|
56
|
+
p te.estimate
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
# test_topheap
|
62
|
+
# test_sketch
|
63
|
+
test_unique
|
64
|
+
test_top
|
65
|
+
test_top2
|
66
|
+
test_quantile
|
67
|
+
|
68
|
+
|
data/szaru-doc.rb
ADDED
@@ -0,0 +1,189 @@
|
|
1
|
+
#--
|
2
|
+
# SZaru: Porting of excellent Sawzall aggregators.
|
3
|
+
#++
|
4
|
+
#:include:overview.rd
|
5
|
+
|
6
|
+
|
7
|
+
# SZaru namespace
|
8
|
+
module SZaru
|
9
|
+
# Statistical estimators for the total number of unique data items.
|
10
|
+
class UniqueEstimator
|
11
|
+
# Create a UniqueEstimator object.
|
12
|
+
#
|
13
|
+
# _max_elems_ is a tuning parameter.
|
14
|
+
# If _max_elems_ is bigger, the estimation becomes more accurate but consuming more memory.
|
15
|
+
def initialize(max_elems)
|
16
|
+
# (native code)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Add a new element to this entry.
|
20
|
+
# _element_ must be String object.
|
21
|
+
def add_elem(elem)
|
22
|
+
# (native code)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Return the stimation the number of unique entries.
|
26
|
+
def estimate
|
27
|
+
# (native code)
|
28
|
+
end
|
29
|
+
end # UniqueEstimator
|
30
|
+
|
31
|
+
# Statistical samplings that record the 'top N' data items.
|
32
|
+
module TopEstimator
|
33
|
+
# TopEstimator of that weight is int32
|
34
|
+
class Int32
|
35
|
+
# Create a TopEstimator::Int32 object.
|
36
|
+
#
|
37
|
+
# _top_elems_ is a number of top elements to be estimate.
|
38
|
+
def initialize(top_elems)
|
39
|
+
# (native code)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Add a new element to this entry.
|
43
|
+
# _element_ must be String object.
|
44
|
+
def add_elem(elem)
|
45
|
+
# (native code)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Add a new weighted element to this entry.
|
49
|
+
# _element_ must be String object.
|
50
|
+
# _weight_ msut be Fixnum object.
|
51
|
+
def add_weighted_elem(elem, weight)
|
52
|
+
# (native code)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Return a top elements with weight.
|
56
|
+
# Example: [["abc", 7], ["def", 3]]
|
57
|
+
def estimate
|
58
|
+
# (native code)
|
59
|
+
end
|
60
|
+
end # Int32
|
61
|
+
|
62
|
+
# TopEstimator of that weight is int64
|
63
|
+
class Int64
|
64
|
+
# Create a TopEstimator::Int64 object.
|
65
|
+
#
|
66
|
+
# _top_elems_ is a number of top elements to be estimate.
|
67
|
+
def initialize(top_elems)
|
68
|
+
# (native code)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Add a new element to this entry.
|
72
|
+
# _element_ must be String object.
|
73
|
+
def add_elem(elem)
|
74
|
+
# (native code)
|
75
|
+
end
|
76
|
+
|
77
|
+
# Add a new weighted element to this entry.
|
78
|
+
# _element_ must be String object.
|
79
|
+
# _weight_ msut be Fixnum object.
|
80
|
+
def add_weighted_elem(elem, weight)
|
81
|
+
# (native code)
|
82
|
+
end
|
83
|
+
|
84
|
+
# Return a top elements with weight.
|
85
|
+
# Example: [["abc", 7], ["def", 3]]
|
86
|
+
def estimate
|
87
|
+
# (native code)
|
88
|
+
end
|
89
|
+
end # Int64
|
90
|
+
|
91
|
+
# TopEstimator of that weight is Double
|
92
|
+
class Double
|
93
|
+
# Create a TopEstimator::Double object.
|
94
|
+
#
|
95
|
+
# _top_elems_ is a number of top elements to be estimate.
|
96
|
+
def initialize(top_elems)
|
97
|
+
# (native code)
|
98
|
+
end
|
99
|
+
|
100
|
+
# Add a new element to this entry.
|
101
|
+
# _element_ must be String object.
|
102
|
+
def add_elem(elem)
|
103
|
+
# (native code)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Add a new weighted element to this entry.
|
107
|
+
# _element_ must be String object.
|
108
|
+
# _weight_ msut be Float object.
|
109
|
+
def add_weighted_elem(elem, weight)
|
110
|
+
# (native code)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Return a top elements with weight.
|
114
|
+
# Example: [["abc", 7.0], ["def", 3.0]]
|
115
|
+
def estimate
|
116
|
+
# (native code)
|
117
|
+
end
|
118
|
+
end # Double
|
119
|
+
end # TopEstimator
|
120
|
+
|
121
|
+
# Approximate N-tiles for data items from an ordered domain.
|
122
|
+
module QuantileEstimator
|
123
|
+
# TopEstimator of that element is int32
|
124
|
+
class Int32
|
125
|
+
# Create a QuantileEstimator::Int32 object.
|
126
|
+
#
|
127
|
+
# _num_quantiles_ is a number of tiles to be estimate.
|
128
|
+
def initialize(num_quantiles)
|
129
|
+
# (native code)
|
130
|
+
end
|
131
|
+
|
132
|
+
# Add a new element to this entry.
|
133
|
+
# _element_ must be Fixnum object.
|
134
|
+
def add_elem(elem)
|
135
|
+
# (native code)
|
136
|
+
end
|
137
|
+
|
138
|
+
# Return a estimated N tiles.
|
139
|
+
# Example: [0, 3, 7, 9]
|
140
|
+
def estimate()
|
141
|
+
# (native code)
|
142
|
+
end
|
143
|
+
end # Int32
|
144
|
+
|
145
|
+
# TopEstimator of that element is int64
|
146
|
+
class Int64
|
147
|
+
# Create a QuantileEstimator::Int64 object.
|
148
|
+
#
|
149
|
+
# _num_quantiles_ is a number of tiles to be estimate.
|
150
|
+
def initialize(num_quantiles)
|
151
|
+
# (native code)
|
152
|
+
end
|
153
|
+
|
154
|
+
# Add a new element to this entry.
|
155
|
+
# _element_ must be Fixnum object.
|
156
|
+
def add_elem(elem)
|
157
|
+
# (native code)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Return a estimated N tiles.
|
161
|
+
# Example: [0, 3, 7, 9]
|
162
|
+
def estimate()
|
163
|
+
# (native code)
|
164
|
+
end
|
165
|
+
end # Int64
|
166
|
+
|
167
|
+
# TopEstimator of that element is double
|
168
|
+
class Double
|
169
|
+
# Create a QuantileEstimator::Double object.
|
170
|
+
#
|
171
|
+
# _num_quantiles_ is a number of tiles to be estimate.
|
172
|
+
def initialize(num_quantiles)
|
173
|
+
# (native code)
|
174
|
+
end
|
175
|
+
|
176
|
+
# Add a new element to this entry.
|
177
|
+
# _element_ must be Fixnum object.
|
178
|
+
def add_elem(elem)
|
179
|
+
# (native code)
|
180
|
+
end
|
181
|
+
|
182
|
+
# Return a estimated N tiles.
|
183
|
+
# Example: [0.0, 3.2, 6.8, 9.5]
|
184
|
+
def estimate()
|
185
|
+
# (native code)
|
186
|
+
end
|
187
|
+
end # Double
|
188
|
+
end # QuantileEstimator
|
189
|
+
end # SZaru
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "../ext/szaru")
|
2
|
+
|
3
|
+
include SZaru
|
4
|
+
quantile_estimators = [QuantileEstimator::Int32, QuantileEstimator::Int64, QuantileEstimator::Double]
|
5
|
+
|
6
|
+
def convert_value_from_index(index, value)
|
7
|
+
case index
|
8
|
+
when 0, 1
|
9
|
+
value.to_i
|
10
|
+
when 2
|
11
|
+
value.to_f
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
quantile_estimators.each_with_index do |quantile_estimator, te_index|
|
16
|
+
describe quantile_estimator do
|
17
|
+
it "return [0] if no addition exists" do
|
18
|
+
te = quantile_estimator.new(10)
|
19
|
+
te.estimate.should == [0]
|
20
|
+
end
|
21
|
+
|
22
|
+
it "return [min, max] if quantile_elems is 0" do
|
23
|
+
te = quantile_estimator.new(0)
|
24
|
+
te.add_elem(convert_value_from_index(te_index, 10))
|
25
|
+
te.add_elem(convert_value_from_index(te_index, 7))
|
26
|
+
te.estimate.should == [7, 10]
|
27
|
+
end
|
28
|
+
|
29
|
+
it "return exact quantile when the number of elements is small than quantile_elems" do
|
30
|
+
te = quantile_estimator.new(10)
|
31
|
+
n_elemnts = 5
|
32
|
+
n_elemnts.times do |i|
|
33
|
+
te.add_elem(convert_value_from_index(te_index, i))
|
34
|
+
end
|
35
|
+
quantile_elements = te.estimate
|
36
|
+
expexcted_values = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4]
|
37
|
+
expexcted = expexcted_values.map{|value| convert_value_from_index(te_index, value)}
|
38
|
+
quantile_elements.should == expexcted
|
39
|
+
end
|
40
|
+
|
41
|
+
it "return same result when calling estimate twice" do
|
42
|
+
te = quantile_estimator.new(10)
|
43
|
+
n_elemnts = 5
|
44
|
+
n_elemnts.times do |i|
|
45
|
+
te.add_elem(convert_value_from_index(te_index, i))
|
46
|
+
end
|
47
|
+
# first call
|
48
|
+
quantile_elements = te.estimate
|
49
|
+
# second call
|
50
|
+
quantile_elements = te.estimate
|
51
|
+
expexcted_values = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4]
|
52
|
+
expexcted = expexcted_values.map{|value| convert_value_from_index(te_index, value)}
|
53
|
+
quantile_elements.should == expexcted
|
54
|
+
end
|
55
|
+
|
56
|
+
it "return approximate number when the number of elements is greater than quantile_elems" do
|
57
|
+
qe = quantile_estimator.new(11)
|
58
|
+
elems = (0 .. 1000).to_a.sort_by{rand}
|
59
|
+
elems.each do |elem|
|
60
|
+
qe.add_elem(convert_value_from_index(te_index, elem))
|
61
|
+
end
|
62
|
+
quantiles = qe.estimate
|
63
|
+
quantiles.length.should == 11
|
64
|
+
quantiles.each_with_index do |tile, index|
|
65
|
+
exact = index * 100
|
66
|
+
diff = (tile - exact).abs
|
67
|
+
diff.should < 10
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
data/test/top_spec.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "../ext/szaru")
|
2
|
+
|
3
|
+
include SZaru
|
4
|
+
top_estimators = [TopEstimator::Int32, TopEstimator::Int64, TopEstimator::Double]
|
5
|
+
|
6
|
+
def convert_value_from_index(index, value)
|
7
|
+
case index
|
8
|
+
when 0, 1
|
9
|
+
value.to_i
|
10
|
+
when 2
|
11
|
+
value.to_f
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
top_estimators.each_with_index do |top_estimator, te_index|
|
16
|
+
describe top_estimator do
|
17
|
+
it "return [] if no addition exists" do
|
18
|
+
te = top_estimator.new(10)
|
19
|
+
te.estimate.should == []
|
20
|
+
end
|
21
|
+
|
22
|
+
it "return [] if top_elems is 0" do
|
23
|
+
te = top_estimator.new(0)
|
24
|
+
te.add_elem("test")
|
25
|
+
te.estimate.should == []
|
26
|
+
end
|
27
|
+
|
28
|
+
it "return exact number when the number of elements is small than top_elems" do
|
29
|
+
te = top_estimator.new(10)
|
30
|
+
n_elemnts = 5
|
31
|
+
n_elemnts.times do |i|
|
32
|
+
te.add_elem("test#{i}")
|
33
|
+
te.add_weighted_elem("test#{i}", convert_value_from_index(te_index, i))
|
34
|
+
end
|
35
|
+
top_elements = te.estimate
|
36
|
+
top_elements.length.should == n_elemnts
|
37
|
+
n_elemnts.times do |i|
|
38
|
+
# check element
|
39
|
+
top_elements[i][0].should == "test#{n_elemnts - i - 1}"
|
40
|
+
# check weight
|
41
|
+
exact_weight = convert_value_from_index(te_index, n_elemnts - i)
|
42
|
+
top_elements[i][1].should == exact_weight
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
it "return same result when calling estimate twice" do
|
47
|
+
te = top_estimator.new(10)
|
48
|
+
n_elemnts = 5
|
49
|
+
n_elemnts.times do |i|
|
50
|
+
te.add_elem("test#{i}")
|
51
|
+
te.add_weighted_elem("test#{i}", convert_value_from_index(te_index, i))
|
52
|
+
end
|
53
|
+
# first call
|
54
|
+
top_elements = te.estimate
|
55
|
+
# second call
|
56
|
+
top_elements = te.estimate
|
57
|
+
top_elements.length.should == n_elemnts
|
58
|
+
n_elemnts.times do |i|
|
59
|
+
# check element
|
60
|
+
top_elements[i][0].should == "test#{n_elemnts - i - 1}"
|
61
|
+
# check weight
|
62
|
+
exact_weight = convert_value_from_index(te_index, n_elemnts - i)
|
63
|
+
top_elements[i][1].should == exact_weight
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
it "return approximate number when the number of elements is greater than top_elems" do
|
68
|
+
te = top_estimator.new(10)
|
69
|
+
n_large_elemnts = 30
|
70
|
+
n_small_elemnts = 1000
|
71
|
+
# create input stream
|
72
|
+
elems = []
|
73
|
+
# large element x_i occurs x_i^2 times.
|
74
|
+
n_large_elemnts.times do |i|
|
75
|
+
(i * i).times do
|
76
|
+
elems << i
|
77
|
+
end
|
78
|
+
end
|
79
|
+
# small element y_i occurs less than 5 times.
|
80
|
+
n_small_elemnts.times do |i|
|
81
|
+
rand(5).times do
|
82
|
+
elems << i
|
83
|
+
end
|
84
|
+
end
|
85
|
+
# run input stream 2 times in random oreder
|
86
|
+
2.times do
|
87
|
+
elems.sort_by{ rand }.each do |j|
|
88
|
+
te.add_elem("test#{j}")
|
89
|
+
end
|
90
|
+
end
|
91
|
+
# check estimation
|
92
|
+
top_elements = te.estimate
|
93
|
+
top_elements.length.should == 10
|
94
|
+
10.times do |i|
|
95
|
+
exact_index = n_large_elemnts - i - 1
|
96
|
+
# check element
|
97
|
+
top_elements[i][0] =~ /test(\d*)/
|
98
|
+
estimated_index = $1.to_i
|
99
|
+
diff = (exact_index - estimated_index).abs
|
100
|
+
diff.should < 3
|
101
|
+
# check weight
|
102
|
+
exact_weight = convert_value_from_index(te_index, 2 * exact_index * exact_index)
|
103
|
+
diff = top_elements[i][1] - exact_weight
|
104
|
+
error = (diff / exact_weight.to_f).abs
|
105
|
+
error.should < 0.1
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
110
|
+
end
|
data/test/unique_spec.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "../ext/szaru")
|
2
|
+
|
3
|
+
include SZaru
|
4
|
+
describe UniqueEstimator do
|
5
|
+
it "return 0 if no addition exists" do
|
6
|
+
ue = UniqueEstimator.new(10)
|
7
|
+
ue.estimate.should == 0
|
8
|
+
end
|
9
|
+
|
10
|
+
it "return 0 if max_elems is 0" do
|
11
|
+
ue = UniqueEstimator.new(0)
|
12
|
+
ue.add_elem("test")
|
13
|
+
ue.estimate.should == 0
|
14
|
+
end
|
15
|
+
|
16
|
+
it "return exact number when the number of elements is small than max_elems" do
|
17
|
+
ue = UniqueEstimator.new(10)
|
18
|
+
5.times do |i|
|
19
|
+
ue.add_elem("test#{i}")
|
20
|
+
end
|
21
|
+
ue.estimate.should == 5
|
22
|
+
end
|
23
|
+
|
24
|
+
it "return approximate number when the number of elements is greater than max_elems" do
|
25
|
+
ue = UniqueEstimator.new(10)
|
26
|
+
n_unique = 997
|
27
|
+
elems = Array.new(n_unique){|i| ("test#{i}") }
|
28
|
+
# add elems to ue 2 times in random oreder
|
29
|
+
2.times do
|
30
|
+
elems.sort_by{ rand }.each do |elm|
|
31
|
+
ue.add_elem(elm)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
diff = ue.estimate - n_unique
|
35
|
+
error_rate = ( diff / n_unique.to_f).abs
|
36
|
+
error_rate.should < 0.1
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
metadata
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: szaru
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 25
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 1
|
10
|
+
version: 0.1.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Yuji Kaneda
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-11-13 00:00:00 +09:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: |
|
23
|
+
Portings of excellent Sawzall aggregators.
|
24
|
+
|
25
|
+
email: llamerada@gmail.com
|
26
|
+
executables: []
|
27
|
+
|
28
|
+
extensions:
|
29
|
+
- ext/extconf.rb
|
30
|
+
extra_rdoc_files: []
|
31
|
+
|
32
|
+
files:
|
33
|
+
- ext/rb_szaru.cc
|
34
|
+
- ext/converter.h
|
35
|
+
- ext/szaru.so
|
36
|
+
- ext/Makefile
|
37
|
+
- ext/extconf.rb
|
38
|
+
- ext/rb_szaru.o
|
39
|
+
- test/quantile_spec.rb
|
40
|
+
- test/top_spec.rb
|
41
|
+
- test/unique_spec.rb
|
42
|
+
- sample/sample.rb
|
43
|
+
- overview.rd
|
44
|
+
- szaru-doc.rb
|
45
|
+
has_rdoc: true
|
46
|
+
homepage: http://llamerada.github.com/SZaru/
|
47
|
+
licenses: []
|
48
|
+
|
49
|
+
post_install_message:
|
50
|
+
rdoc_options:
|
51
|
+
- szaru-doc.rb
|
52
|
+
require_paths:
|
53
|
+
- lib
|
54
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
55
|
+
none: false
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
hash: 3
|
60
|
+
segments:
|
61
|
+
- 0
|
62
|
+
version: "0"
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
none: false
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
hash: 3
|
69
|
+
segments:
|
70
|
+
- 0
|
71
|
+
version: "0"
|
72
|
+
requirements: []
|
73
|
+
|
74
|
+
rubyforge_project:
|
75
|
+
rubygems_version: 1.3.7
|
76
|
+
signing_key:
|
77
|
+
specification_version: 3
|
78
|
+
summary: Portings of excellent Sawzall aggregators
|
79
|
+
test_files:
|
80
|
+
- test/quantile_spec.rb
|
81
|
+
- test/top_spec.rb
|
82
|
+
- test/unique_spec.rb
|