stream_stats 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.rdoc +43 -0
- data/ext/stream_stats/cm_quantile.c +402 -0
- data/ext/stream_stats/cm_quantile.h +88 -0
- data/ext/stream_stats/extconf.rb +3 -0
- data/ext/stream_stats/heap.c +407 -0
- data/ext/stream_stats/heap.h +85 -0
- data/ext/stream_stats/stream_stats.c +135 -0
- data/ext/stream_stats/timer.c +165 -0
- data/ext/stream_stats/timer.h +96 -0
- data/lib/stream_stats.rb +6 -0
- data/lib/stream_stats/stream.rb +17 -0
- data/lib/stream_stats/version.rb +4 -0
- data/stream_stats.gemspec +15 -0
- metadata +57 -0
@@ -0,0 +1,85 @@
|
|
1
|
+
/*
|
2
|
+
* Author: Armon Dadgar
|
3
|
+
*
|
4
|
+
* Header for the Heap functions and data definitions
|
5
|
+
*/
|
6
|
+
|
7
|
+
#ifndef HEAP_H
|
8
|
+
#define HEAP_H
|
9
|
+
|
10
|
+
// Structure for a single heap entry
|
11
|
+
typedef struct heap_entry {
|
12
|
+
void* key; // Key for this entry
|
13
|
+
void* value; // Value for this entry
|
14
|
+
} heap_entry;
|
15
|
+
|
16
|
+
|
17
|
+
// Main struct for representing the heap
|
18
|
+
typedef struct heap {
|
19
|
+
int (*compare_func)(void*, void*); // The key comparison function to use
|
20
|
+
int active_entries; // The number of entries in the heap
|
21
|
+
int minimum_pages; // The minimum number of pages to maintain, based on the initial cap.
|
22
|
+
int allocated_pages; // The number of pages in memory that are allocated
|
23
|
+
heap_entry* table; // Pointer to the table, which maps to the pages
|
24
|
+
} heap;
|
25
|
+
|
26
|
+
// Functions
|
27
|
+
|
28
|
+
/**
|
29
|
+
* Creates a new heap
|
30
|
+
* @param h Pointer to a heap structure that is initialized
|
31
|
+
* @param initial_size What should the initial size of the heap be. If <= 0, then it will be set to the minimum
|
32
|
+
* permissable size, of 1 page (512 entries on 32bit system with 4K pages).
|
33
|
+
* @param comp_func A pointer to a function that can be used to compare the keys. If NULL, it will be set
|
34
|
+
* to a function which treats keys as signed ints. This function must take two keys, given as pointers and return an int.
|
35
|
+
* It should return -1 if key 1 is smaller, 0 if they are equal, and 1 if key 2 is smaller.
|
36
|
+
*/
|
37
|
+
void heap_create(heap* h, int initial_size, int (*comp_func)(void*,void*));
|
38
|
+
|
39
|
+
/**
|
40
|
+
* Returns the size of the heap
|
41
|
+
* @param h Pointer to a heap structure
|
42
|
+
* @return The number of entries in the heap.
|
43
|
+
*/
|
44
|
+
int heap_size(heap* h);
|
45
|
+
|
46
|
+
/**
|
47
|
+
* Inserts a new element into a heap.
|
48
|
+
* @param h The heap to insert into
|
49
|
+
* @param key The key of the new entry
|
50
|
+
* @param value The value of the new entry
|
51
|
+
*/
|
52
|
+
void heap_insert(heap* h, void* key, void* value);
|
53
|
+
|
54
|
+
/**
|
55
|
+
* Returns the element with the smallest key in the heap.
|
56
|
+
* @param h Pointer to the heap structure
|
57
|
+
* @param key A pointer to a pointer, to set to the minimum key
|
58
|
+
* @param value Set to the value corresponding with the key
|
59
|
+
* @return 1 if the minimum element exists and is set, 0 if there are no elements.
|
60
|
+
*/
|
61
|
+
int heap_min(heap* h, void** key, void** value);
|
62
|
+
|
63
|
+
/**
|
64
|
+
* Deletes the element with the smallest key from the heap.
|
65
|
+
* @param h Pointer to the heap structure
|
66
|
+
* @param key A pointer to a pointer, to set to the minimum key
|
67
|
+
* @param valu Set to the value corresponding with the key
|
68
|
+
* @return 1if the minimum element exists and is deleted, 0 if there are no elements.
|
69
|
+
*/
|
70
|
+
int heap_delmin(heap* h, void** key, void** value);
|
71
|
+
|
72
|
+
/**
|
73
|
+
* Calls a function for each entry in the heap.
|
74
|
+
* @param h The heap to iterate over
|
75
|
+
* @param func The function to call on each entry. Should take a void* key and value.
|
76
|
+
*/
|
77
|
+
void heap_foreach(heap* h, void (*func)(void*,void*));
|
78
|
+
|
79
|
+
/**
|
80
|
+
* Destroys and cleans up a heap.
|
81
|
+
* @param h The heap to destroy.
|
82
|
+
*/
|
83
|
+
void heap_destroy(heap* h);
|
84
|
+
|
85
|
+
#endif
|
@@ -0,0 +1,135 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
|
4
|
+
#include "timer.h"
|
5
|
+
|
6
|
+
VALUE timer_class;
|
7
|
+
|
8
|
+
static void strstat_timer_free(void *ptr) {
|
9
|
+
destroy_timer(ptr);
|
10
|
+
}
|
11
|
+
|
12
|
+
static VALUE strstat_timer_init(VALUE self, VALUE rb_eps, VALUE rb_quantiles) {
|
13
|
+
|
14
|
+
timer *i_timer = (timer *) malloc(sizeof(timer));
|
15
|
+
|
16
|
+
double eps = NUM2DBL(rb_eps);
|
17
|
+
double *quantiles;
|
18
|
+
uint32_t num_quantiles;
|
19
|
+
|
20
|
+
switch (TYPE(rb_quantiles)) {
|
21
|
+
case T_ARRAY:
|
22
|
+
rb_iv_set(self, "@quantiles", rb_quantiles);
|
23
|
+
num_quantiles = RARRAY_LEN(rb_quantiles);
|
24
|
+
if (num_quantiles < 1)
|
25
|
+
rb_raise(rb_eRuntimeError, "no quantiles defined");
|
26
|
+
quantiles = malloc(sizeof(double) * num_quantiles);
|
27
|
+
for (int i = 0; i < num_quantiles; i++) {
|
28
|
+
quantiles[i] = NUM2DBL(rb_ary_entry(rb_quantiles, i));
|
29
|
+
}
|
30
|
+
break;
|
31
|
+
default:
|
32
|
+
/* raise exception */
|
33
|
+
rb_raise(rb_eTypeError, "not valid value");
|
34
|
+
break;
|
35
|
+
}
|
36
|
+
|
37
|
+
init_timer(eps, quantiles, num_quantiles, i_timer);
|
38
|
+
|
39
|
+
VALUE data = Data_Wrap_Struct(timer_class, NULL, strstat_timer_free, i_timer);
|
40
|
+
rb_ivar_set(self, rb_intern("timer"), data);
|
41
|
+
|
42
|
+
return Qnil;
|
43
|
+
}
|
44
|
+
|
45
|
+
static VALUE strstat_timer_add_sample(VALUE self, VALUE rb_sample) {
|
46
|
+
|
47
|
+
double sample = NUM2DBL(rb_sample);
|
48
|
+
|
49
|
+
timer *i_timer;
|
50
|
+
|
51
|
+
VALUE data = rb_ivar_get(self, rb_intern("timer"));
|
52
|
+
Data_Get_Struct(data, timer, i_timer);
|
53
|
+
|
54
|
+
int returned = timer_add_sample(i_timer, sample);
|
55
|
+
if (returned != 0) {
|
56
|
+
rb_raise(rb_eRuntimeError, "add sample returned %d", returned);
|
57
|
+
}
|
58
|
+
|
59
|
+
return Qnil;
|
60
|
+
}
|
61
|
+
|
62
|
+
static VALUE strstat_timer_count(VALUE self) {
|
63
|
+
timer *i_timer;
|
64
|
+
|
65
|
+
VALUE data = rb_ivar_get(self, rb_intern("timer"));
|
66
|
+
Data_Get_Struct(data, timer, i_timer);
|
67
|
+
|
68
|
+
return LONG2NUM(timer_count(i_timer));
|
69
|
+
}
|
70
|
+
|
71
|
+
static VALUE strstat_timer_query(VALUE self, VALUE rb_query) {
|
72
|
+
double query = NUM2DBL(rb_query);
|
73
|
+
if (query < 0 || query > 1)
|
74
|
+
rb_raise(rb_eRuntimeError, "invalid quantile");
|
75
|
+
|
76
|
+
timer *i_timer;
|
77
|
+
|
78
|
+
VALUE data = rb_ivar_get(self, rb_intern("timer"));
|
79
|
+
Data_Get_Struct(data, timer, i_timer);
|
80
|
+
return DBL2NUM(timer_query(i_timer, query));
|
81
|
+
}
|
82
|
+
|
83
|
+
static VALUE strstat_timer_percentile(VALUE self, VALUE rb_percentile) {
|
84
|
+
int percentile = NUM2INT(rb_percentile);
|
85
|
+
if (percentile < 0 || percentile > 100)
|
86
|
+
rb_raise(rb_eRuntimeError, "invalid percentile");
|
87
|
+
|
88
|
+
return strstat_timer_query(self, DBL2NUM(percentile / 100.0));
|
89
|
+
}
|
90
|
+
|
91
|
+
static VALUE strstat_timer_commoncall(VALUE self, double(*func)(timer*)) {
|
92
|
+
timer *i_timer;
|
93
|
+
|
94
|
+
VALUE data = rb_ivar_get(self, rb_intern("timer"));
|
95
|
+
Data_Get_Struct(data, timer, i_timer);
|
96
|
+
return DBL2NUM((*func)(i_timer));
|
97
|
+
}
|
98
|
+
|
99
|
+
static VALUE strstat_timer_min(VALUE self) {
|
100
|
+
return strstat_timer_commoncall(self, timer_min);
|
101
|
+
}
|
102
|
+
static VALUE strstat_timer_max(VALUE self) {
|
103
|
+
return strstat_timer_commoncall(self, timer_max);
|
104
|
+
}
|
105
|
+
static VALUE strstat_timer_mean(VALUE self) {
|
106
|
+
return strstat_timer_commoncall(self, timer_mean);
|
107
|
+
}
|
108
|
+
static VALUE strstat_timer_stddev(VALUE self) {
|
109
|
+
return strstat_timer_commoncall(self, timer_stddev);
|
110
|
+
}
|
111
|
+
static VALUE strstat_timer_sum(VALUE self) {
|
112
|
+
return strstat_timer_commoncall(self, timer_sum);
|
113
|
+
}
|
114
|
+
static VALUE strstat_timer_squared_sum(VALUE self) {
|
115
|
+
return strstat_timer_commoncall(self, timer_squared_sum);
|
116
|
+
}
|
117
|
+
|
118
|
+
void Init_stream_stats(void) {
|
119
|
+
VALUE module = rb_define_module("StreamStats");
|
120
|
+
|
121
|
+
timer_class = rb_define_class_under(module, "Stream", rb_cObject);
|
122
|
+
|
123
|
+
rb_define_method(timer_class, "initialize", strstat_timer_init, 2);
|
124
|
+
rb_define_method(timer_class, "<<", strstat_timer_add_sample, 1);
|
125
|
+
rb_define_method(timer_class, "count", strstat_timer_count, 0);
|
126
|
+
rb_define_method(timer_class, "quantile", strstat_timer_query, 1);
|
127
|
+
rb_define_method(timer_class, "percentile", strstat_timer_percentile, 1);
|
128
|
+
rb_define_method(timer_class, "min", strstat_timer_min, 0);
|
129
|
+
rb_define_method(timer_class, "max", strstat_timer_max, 0);
|
130
|
+
rb_define_method(timer_class, "mean", strstat_timer_mean, 0);
|
131
|
+
rb_define_method(timer_class, "stddev", strstat_timer_stddev, 0);
|
132
|
+
rb_define_method(timer_class, "sum", strstat_timer_sum, 0);
|
133
|
+
rb_define_method(timer_class, "squared_sum", strstat_timer_squared_sum, 0);
|
134
|
+
|
135
|
+
}
|
@@ -0,0 +1,165 @@
|
|
1
|
+
/*
|
2
|
+
Source: https://github.com/armon/statsite/blob/master/src/timer.c
|
3
|
+
Copyright (c) 2012, Armon Dadgar
|
4
|
+
All rights reserved.
|
5
|
+
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
7
|
+
modification, are permitted provided that the following conditions are met:
|
8
|
+
* Redistributions of source code must retain the above copyright
|
9
|
+
notice, this list of conditions and the following disclaimer.
|
10
|
+
* Redistributions in binary form must reproduce the above copyright
|
11
|
+
notice, this list of conditions and the following disclaimer in the
|
12
|
+
documentation and/or other materials provided with the distribution.
|
13
|
+
* Neither the name of the organization nor the
|
14
|
+
names of its contributors may be used to endorse or promote products
|
15
|
+
derived from this software without specific prior written permission.
|
16
|
+
|
17
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
18
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
19
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
20
|
+
DISCLAIMED. IN NO EVENT SHALL ARMON DADGAR BE LIABLE FOR ANY
|
21
|
+
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
22
|
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
23
|
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
24
|
+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
25
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
26
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
27
|
+
*/
|
28
|
+
|
29
|
+
#include <math.h>
|
30
|
+
#include "timer.h"
|
31
|
+
|
32
|
+
/* Static declarations */
|
33
|
+
static void finalize_timer(timer *timer);
|
34
|
+
|
35
|
+
/**
|
36
|
+
* Initializes the timer struct
|
37
|
+
* @arg eps The maximum error for the quantiles
|
38
|
+
* @arg quantiles A sorted array of double quantile values, must be on (0, 1)
|
39
|
+
* @arg num_quants The number of entries in the quantiles array
|
40
|
+
* @arg timeer The timer struct to initialize
|
41
|
+
* @return 0 on success.
|
42
|
+
*/
|
43
|
+
int init_timer(double eps, double *quantiles, uint32_t num_quants, timer *timer) {
|
44
|
+
timer->count = 0;
|
45
|
+
timer->sum = 0;
|
46
|
+
timer->squared_sum = 0;
|
47
|
+
timer->finalized = 1;
|
48
|
+
int res = init_cm_quantile(eps, quantiles, num_quants, &timer->cm);
|
49
|
+
return res;
|
50
|
+
}
|
51
|
+
|
52
|
+
/**
|
53
|
+
* Destroy the timer struct.
|
54
|
+
* @arg timer The timer to destroy
|
55
|
+
* @return 0 on success.
|
56
|
+
*/
|
57
|
+
int destroy_timer(timer *timer) {
|
58
|
+
return destroy_cm_quantile(&timer->cm);
|
59
|
+
}
|
60
|
+
|
61
|
+
/**
|
62
|
+
* Adds a new sample to the struct
|
63
|
+
* @arg timer The timer to add to
|
64
|
+
* @arg sample The new sample value
|
65
|
+
* @return 0 on success.
|
66
|
+
*/
|
67
|
+
int timer_add_sample(timer *timer, double sample) {
|
68
|
+
timer->count += 1;
|
69
|
+
timer->sum += sample;
|
70
|
+
timer->squared_sum += pow(sample, 2);
|
71
|
+
timer->finalized = 0;
|
72
|
+
return cm_add_sample(&timer->cm, sample);
|
73
|
+
}
|
74
|
+
|
75
|
+
/**
|
76
|
+
* Queries for a quantile value
|
77
|
+
* @arg timer The timer to query
|
78
|
+
* @arg quantile The quantile to query
|
79
|
+
* @return The value on success or 0.
|
80
|
+
*/
|
81
|
+
double timer_query(timer *timer, double quantile) {
|
82
|
+
finalize_timer(timer);
|
83
|
+
return cm_query(&timer->cm, quantile);
|
84
|
+
}
|
85
|
+
|
86
|
+
/**
|
87
|
+
* Returns the number of samples in the timer
|
88
|
+
* @arg timer The timer to query
|
89
|
+
* @return The number of samples
|
90
|
+
*/
|
91
|
+
uint64_t timer_count(timer *timer) {
|
92
|
+
return timer->count;
|
93
|
+
}
|
94
|
+
|
95
|
+
/**
|
96
|
+
* Returns the minimum timer value
|
97
|
+
* @arg timer The timer to query
|
98
|
+
* @return The number of samples
|
99
|
+
*/
|
100
|
+
double timer_min(timer *timer) {
|
101
|
+
finalize_timer(timer);
|
102
|
+
if (!timer->cm.samples) return 0;
|
103
|
+
return timer->cm.samples->value;
|
104
|
+
}
|
105
|
+
|
106
|
+
/**
|
107
|
+
* Returns the mean timer value
|
108
|
+
* @arg timer The timer to query
|
109
|
+
* @return The mean value
|
110
|
+
*/
|
111
|
+
double timer_mean(timer *timer) {
|
112
|
+
return (timer->count) ? timer->sum / timer->count : 0;
|
113
|
+
}
|
114
|
+
|
115
|
+
/**
|
116
|
+
* Returns the sample standard deviation timer value
|
117
|
+
* @arg timer The timer to query
|
118
|
+
* @return The sample standard deviation
|
119
|
+
*/
|
120
|
+
double timer_stddev(timer *timer) {
|
121
|
+
double num = (timer->count * timer->squared_sum) - pow(timer->sum, 2);
|
122
|
+
double div = timer->count * (timer->count - 1);
|
123
|
+
if (div == 0) return 0;
|
124
|
+
return sqrt(num / div);
|
125
|
+
}
|
126
|
+
|
127
|
+
/**
|
128
|
+
* Returns the sum of the timer
|
129
|
+
* @arg timer The timer to query
|
130
|
+
* @return The sum of values
|
131
|
+
*/
|
132
|
+
double timer_sum(timer *timer) {
|
133
|
+
return timer->sum;
|
134
|
+
}
|
135
|
+
|
136
|
+
/**
|
137
|
+
* Returns the sum squared of the timer
|
138
|
+
* @arg timer The timer to query
|
139
|
+
* @return The sum squared of values
|
140
|
+
*/
|
141
|
+
double timer_squared_sum(timer *timer) {
|
142
|
+
return timer->squared_sum;
|
143
|
+
}
|
144
|
+
|
145
|
+
/**
|
146
|
+
* Returns the maximum timer value
|
147
|
+
* @arg timer The timer to query
|
148
|
+
* @return The maximum value
|
149
|
+
*/
|
150
|
+
double timer_max(timer *timer) {
|
151
|
+
finalize_timer(timer);
|
152
|
+
if (!timer->cm.end) return 0;
|
153
|
+
return timer->cm.end->value;
|
154
|
+
}
|
155
|
+
|
156
|
+
// Finalizes the timer for queries
|
157
|
+
static void finalize_timer(timer *timer) {
|
158
|
+
if (timer->finalized) return;
|
159
|
+
|
160
|
+
// Force the quantile to flush internal
|
161
|
+
// buffers so that queries are accurate.
|
162
|
+
cm_flush(&timer->cm);
|
163
|
+
|
164
|
+
timer->finalized = 1;
|
165
|
+
}
|
@@ -0,0 +1,96 @@
|
|
1
|
+
#ifndef TIMER_H
|
2
|
+
#define TIMER_H
|
3
|
+
#include <stdint.h>
|
4
|
+
#include "cm_quantile.h"
|
5
|
+
|
6
|
+
typedef struct {
|
7
|
+
uint64_t count; // Count of items
|
8
|
+
double sum; // Sum of the values
|
9
|
+
double squared_sum; // Sum of the squared values
|
10
|
+
int finalized; // Is the cm_quantile finalized
|
11
|
+
cm_quantile cm; // Quantile we use
|
12
|
+
} timer;
|
13
|
+
|
14
|
+
/**
|
15
|
+
* Initializes the timer struct
|
16
|
+
* @arg eps The maximum error for the quantiles
|
17
|
+
* @arg quantiles A sorted array of double quantile values, must be on (0, 1)
|
18
|
+
* @arg num_quants The number of entries in the quantiles array
|
19
|
+
* @arg timeer The timer struct to initialize
|
20
|
+
* @return 0 on success.
|
21
|
+
*/
|
22
|
+
int init_timer(double eps, double *quantiles, uint32_t num_quants, timer *timer);
|
23
|
+
|
24
|
+
/**
|
25
|
+
* Destroy the timer struct.
|
26
|
+
* @arg timer The timer to destroy
|
27
|
+
* @return 0 on success.
|
28
|
+
*/
|
29
|
+
int destroy_timer(timer *timer);
|
30
|
+
|
31
|
+
/**
|
32
|
+
* Adds a new sample to the struct
|
33
|
+
* @arg timer The timer to add to
|
34
|
+
* @arg sample The new sample value
|
35
|
+
* @return 0 on success.
|
36
|
+
*/
|
37
|
+
int timer_add_sample(timer *timer, double sample);
|
38
|
+
|
39
|
+
/**
|
40
|
+
* Queries for a quantile value
|
41
|
+
* @arg timer The timer to query
|
42
|
+
* @arg quantile The quantile to query
|
43
|
+
* @return The value on success or 0.
|
44
|
+
*/
|
45
|
+
double timer_query(timer *timer, double quantile);
|
46
|
+
|
47
|
+
/**
|
48
|
+
* Returns the number of samples in the timer
|
49
|
+
* @arg timer The timer to query
|
50
|
+
* @return The number of samples
|
51
|
+
*/
|
52
|
+
uint64_t timer_count(timer *timer);
|
53
|
+
|
54
|
+
/**
|
55
|
+
* Returns the minimum timer value
|
56
|
+
* @arg timer The timer to query
|
57
|
+
* @return The number of samples
|
58
|
+
*/
|
59
|
+
double timer_min(timer *timer);
|
60
|
+
|
61
|
+
/**
|
62
|
+
* Returns the mean timer value
|
63
|
+
* @arg timer The timer to query
|
64
|
+
* @return The mean value
|
65
|
+
*/
|
66
|
+
double timer_mean(timer *timer);
|
67
|
+
|
68
|
+
/**
|
69
|
+
* Returns the sample standard deviation timer value
|
70
|
+
* @arg timer The timer to query
|
71
|
+
* @return The sample standard deviation
|
72
|
+
*/
|
73
|
+
double timer_stddev(timer *timer);
|
74
|
+
|
75
|
+
/**
|
76
|
+
* Returns the sum of the timer
|
77
|
+
* @arg timer The timer to query
|
78
|
+
* @return The sum of values
|
79
|
+
*/
|
80
|
+
double timer_sum(timer *timer);
|
81
|
+
|
82
|
+
/**
|
83
|
+
* Returns the sum squared of the timer
|
84
|
+
* @arg timer The timer to query
|
85
|
+
* @return The sum squared of values
|
86
|
+
*/
|
87
|
+
double timer_squared_sum(timer *timer);
|
88
|
+
|
89
|
+
/**
|
90
|
+
* Returns the maximum timer value
|
91
|
+
* @arg timer The timer to query
|
92
|
+
* @return The maximum value
|
93
|
+
*/
|
94
|
+
double timer_max(timer *timer);
|
95
|
+
|
96
|
+
#endif
|