statsrb 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/statsrb/statsrb.c +408 -200
- data/test/test_statsrb.rb +87 -5
- metadata +4 -4
data/ext/statsrb/statsrb.c
CHANGED
@@ -3,54 +3,358 @@
|
|
3
3
|
#include <string.h>
|
4
4
|
#include <stdlib.h>
|
5
5
|
|
6
|
+
/**
|
7
|
+
* Keeps track of a single event.
|
8
|
+
*/
|
9
|
+
typedef struct {
|
10
|
+
int ns_index;
|
11
|
+
int timestamp;
|
12
|
+
int value;
|
13
|
+
} StatsrbEvent;
|
14
|
+
|
15
|
+
/**
|
16
|
+
* Keeps track of available namespaces.
|
17
|
+
*/
|
18
|
+
typedef struct {
|
19
|
+
char *namespace[256];
|
20
|
+
} StatsrbNS;
|
21
|
+
|
22
|
+
/**
|
23
|
+
* Keeps track of internal storage.
|
24
|
+
*/
|
25
|
+
typedef struct {
|
26
|
+
StatsrbEvent *event_list;
|
27
|
+
int event_count;
|
28
|
+
int event_memory;
|
29
|
+
StatsrbNS *ns_list;
|
30
|
+
int ns_count;
|
31
|
+
int ns_memory;
|
32
|
+
} StatsrbInternal;
|
33
|
+
|
34
|
+
/**
|
35
|
+
* Internal: retreives the internal storage.
|
36
|
+
*/
|
37
|
+
static StatsrbInternal* statsrb_get_internal(VALUE self) {
|
38
|
+
StatsrbInternal *internal;
|
39
|
+
Data_Get_Struct(self, StatsrbInternal, internal);
|
40
|
+
|
41
|
+
return internal;
|
42
|
+
}
|
43
|
+
|
44
|
+
/**
|
45
|
+
* Internal: allocates internal storage.
|
46
|
+
*/
|
47
|
+
static VALUE statsrb_alloc_internal(VALUE self) {
|
48
|
+
// Allocate internal memory for the StatsrbEvent structs.
|
49
|
+
StatsrbEvent *eventlist = (StatsrbEvent *)calloc(1, sizeof(StatsrbEvent));
|
50
|
+
|
51
|
+
// Allocate memory for the list of namespaces.
|
52
|
+
StatsrbNS *nslist = (StatsrbNS *)calloc(1, sizeof(StatsrbNS));
|
53
|
+
|
54
|
+
// Allocate memory for the pointer storage;
|
55
|
+
StatsrbInternal *internalptr = (StatsrbInternal *)calloc(1, sizeof(StatsrbInternal));
|
56
|
+
internalptr->event_list = eventlist;
|
57
|
+
internalptr->event_count = 0;
|
58
|
+
internalptr->event_memory = 0;
|
59
|
+
internalptr->ns_list = nslist;
|
60
|
+
internalptr->ns_count = 0;
|
61
|
+
internalptr->ns_memory = 0;
|
62
|
+
return Data_Wrap_Struct(self, 0, free, internalptr);
|
63
|
+
}
|
64
|
+
|
65
|
+
/**
|
66
|
+
* Clears out the internal memory.
|
67
|
+
* @return [void]
|
68
|
+
*/
|
69
|
+
static void statsrb_data_clear_events(VALUE self) {
|
70
|
+
StatsrbInternal *internal = statsrb_get_internal(self);
|
71
|
+
|
72
|
+
// Allocate internal memory for the StatsrbEvent structs.
|
73
|
+
StatsrbEvent *event_success = (StatsrbEvent *)realloc(internal->event_list, sizeof(StatsrbEvent));
|
74
|
+
|
75
|
+
// Allocate memory for the list of namespaces.
|
76
|
+
StatsrbNS *ns_success = (StatsrbNS *)realloc(internal->ns_list, sizeof(StatsrbNS));
|
77
|
+
|
78
|
+
// Allocate memory for the pointer storage;
|
79
|
+
if (event_success && ns_success) {
|
80
|
+
StatsrbEvent *event_list = event_success;
|
81
|
+
event_success = NULL;
|
82
|
+
StatsrbNS *ns_list = ns_success;
|
83
|
+
ns_success = NULL;
|
84
|
+
internal->event_list = event_list;
|
85
|
+
internal->event_count = 0;
|
86
|
+
internal->event_memory = 0;
|
87
|
+
internal->ns_list = ns_list;
|
88
|
+
internal->ns_count = 0;
|
89
|
+
internal->ns_memory = 0;
|
90
|
+
}
|
91
|
+
else {
|
92
|
+
fprintf(stderr, "Error deallocating memory");
|
93
|
+
return;
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
|
98
|
+
/**
|
99
|
+
* Returns the length of the internal storage.
|
100
|
+
* @return [Numeric] The count of items in the internal storage.
|
101
|
+
*/
|
102
|
+
static VALUE statsrb_length(VALUE self) {
|
103
|
+
StatsrbInternal *internal = statsrb_get_internal(self);
|
104
|
+
if (!internal->event_count) {
|
105
|
+
internal->event_count = 0;
|
106
|
+
}
|
107
|
+
|
108
|
+
return INT2NUM(internal->event_count);
|
109
|
+
}
|
110
|
+
|
111
|
+
/**
|
112
|
+
* Debugging function.
|
113
|
+
*/
|
114
|
+
static void statsrb_debug_print_internal(VALUE self) {
|
115
|
+
StatsrbInternal *internal = statsrb_get_internal(self);
|
116
|
+
int i;
|
117
|
+
|
118
|
+
//for (i = 0; i < internal->event_count; i++) {
|
119
|
+
//fprintf(stdout, "Debug: ns: %s; ts: %d; v: %d\n", internal->ns_list[internal->event_list[i].ns_index].namespace, internal->event_list[i].timestamp, internal->event_list[i].value);
|
120
|
+
//}
|
121
|
+
fprintf(stdout, "Debug: count: %d memory: %d\n", internal->event_count, internal->event_memory);
|
122
|
+
}
|
123
|
+
|
124
|
+
|
125
|
+
/**
|
126
|
+
* Implementation of quicksort algorithm.
|
127
|
+
*/
|
128
|
+
void time_sort(int left, int right, StatsrbEvent * event_list) {
|
129
|
+
int i = left;
|
130
|
+
int j = right;
|
131
|
+
int p = (i + j) / 2;
|
132
|
+
int pv = event_list[p].timestamp;
|
133
|
+
StatsrbEvent * tmp = (StatsrbEvent *)malloc(sizeof(StatsrbEvent));
|
134
|
+
|
135
|
+
while (i <= j) {
|
136
|
+
while (event_list[i].timestamp < pv) {
|
137
|
+
i++;
|
138
|
+
}
|
139
|
+
while (event_list[j].timestamp > pv) {
|
140
|
+
j--;
|
141
|
+
}
|
142
|
+
if (i <= j) {
|
143
|
+
memcpy(tmp, &event_list[i], sizeof(StatsrbEvent));
|
144
|
+
memcpy(&event_list[i], &event_list[j], sizeof(StatsrbEvent));
|
145
|
+
memcpy(&event_list[j], tmp, sizeof(StatsrbEvent));
|
146
|
+
i++;
|
147
|
+
j--;
|
148
|
+
}
|
149
|
+
}
|
150
|
+
|
151
|
+
free(tmp);
|
152
|
+
|
153
|
+
if (left < j) {
|
154
|
+
time_sort(left, j, event_list);
|
155
|
+
}
|
156
|
+
if (i < right) {
|
157
|
+
time_sort(i, right, event_list);
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
/**
|
162
|
+
* Sorts internal data using a quicksort algorithm based on the hash element's timestamp.
|
163
|
+
* @return [Statsrb] A reference to the object.
|
164
|
+
*/
|
165
|
+
static VALUE statsrb_sort(VALUE self) {
|
166
|
+
StatsrbInternal *internal = statsrb_get_internal(self);
|
167
|
+
if (internal->event_count > 0) {
|
168
|
+
time_sort(0, internal->event_count - 1, internal->event_list);
|
169
|
+
}
|
170
|
+
|
171
|
+
return self;
|
172
|
+
}
|
173
|
+
|
174
|
+
/**
|
175
|
+
* Internal: pushes a namespace onto the internal storage or retrieves a
|
176
|
+
* preexisting one.
|
177
|
+
*
|
178
|
+
* @param VALUE self
|
179
|
+
* @param const char *namespace
|
180
|
+
*
|
181
|
+
* @return Integer
|
182
|
+
* The pointer index of the namespace in @nslist.
|
183
|
+
*/
|
184
|
+
static int statsrb_data_push_ns(VALUE self, const char *namespace) {
|
185
|
+
int i;
|
186
|
+
StatsrbInternal *internal = statsrb_get_internal(self);
|
187
|
+
|
188
|
+
for (i = 0; i < internal->ns_count; i++) {
|
189
|
+
if (strcmp(internal->ns_list[i].namespace, namespace) == 0) {
|
190
|
+
return i;
|
191
|
+
}
|
192
|
+
}
|
193
|
+
|
194
|
+
int memory = (internal->ns_count + 1) * sizeof(StatsrbNS);
|
195
|
+
StatsrbNS *success = (StatsrbNS *)realloc(internal->ns_list, memory);
|
196
|
+
|
197
|
+
if (success) {
|
198
|
+
internal->ns_list = success;
|
199
|
+
success = NULL;
|
200
|
+
strcpy(internal->ns_list[internal->ns_count].namespace, namespace);
|
201
|
+
internal->ns_count++;
|
202
|
+
return internal->ns_count - 1;
|
203
|
+
}
|
204
|
+
else {
|
205
|
+
fprintf(stderr, "Error allocating memory");
|
206
|
+
}
|
207
|
+
|
208
|
+
}
|
209
|
+
|
210
|
+
/**
|
211
|
+
* Internal: pushes a data event onto the internal storage.
|
212
|
+
*
|
213
|
+
* @param VALUE self
|
214
|
+
* @param const char *namespace
|
215
|
+
* @param int timestamp
|
216
|
+
* @param int value
|
217
|
+
*/
|
218
|
+
static void statsrb_data_push_event(VALUE self, const char *namespace, int timestamp, int value) {
|
219
|
+
StatsrbInternal *internal = statsrb_get_internal(self);
|
220
|
+
|
221
|
+
// Get the index of the namespace pointer.
|
222
|
+
int ns_index = statsrb_data_push_ns(self, namespace);
|
223
|
+
|
224
|
+
// If it appears that we are approaching the end of the memory block, allocate
|
225
|
+
// some more.
|
226
|
+
// @TODO 2x memory is a little nuts, maybe throttle this back a bit?
|
227
|
+
if ((sizeof(StatsrbEvent) * internal->event_count) > (internal->event_memory * .9)) {
|
228
|
+
internal->event_memory = (2* internal->event_count) * sizeof(StatsrbEvent);
|
229
|
+
StatsrbEvent *success = (StatsrbEvent *)realloc(internal->event_list, internal->event_memory);
|
230
|
+
if (success) {
|
231
|
+
internal->event_list = success;
|
232
|
+
success = NULL;
|
233
|
+
}
|
234
|
+
else {
|
235
|
+
fprintf(stderr, "Error allocating memory");
|
236
|
+
return;
|
237
|
+
}
|
238
|
+
}
|
239
|
+
|
240
|
+
// Set the values;
|
241
|
+
internal->event_list[internal->event_count].timestamp = timestamp;
|
242
|
+
internal->event_list[internal->event_count].ns_index = ns_index;
|
243
|
+
internal->event_list[internal->event_count].value = value;
|
244
|
+
|
245
|
+
// Track the count by saving the new pointer.
|
246
|
+
internal->event_count++;
|
247
|
+
}
|
248
|
+
|
249
|
+
/**
|
250
|
+
* Creates a ruby hash from event VALUEs.
|
251
|
+
*
|
252
|
+
* @param VALUE self
|
253
|
+
* @param VALUE ts
|
254
|
+
* @param VALUE ns
|
255
|
+
* @param VALUE v
|
256
|
+
*/
|
257
|
+
VALUE statsrb_create_rb_event_hash(VALUE self, VALUE ts, VALUE ns, VALUE v) {
|
258
|
+
VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
|
259
|
+
VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
|
260
|
+
VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
|
261
|
+
|
262
|
+
VALUE statsrb_event = rb_hash_new();
|
263
|
+
rb_hash_aset(statsrb_event, statsrb_key_ts, ts);
|
264
|
+
rb_hash_aset(statsrb_event, statsrb_key_ns, ns);
|
265
|
+
rb_hash_aset(statsrb_event, statsrb_key_v, v);
|
266
|
+
|
267
|
+
return statsrb_event;
|
268
|
+
}
|
269
|
+
|
270
|
+
/**
|
271
|
+
* Pushes a stat onto the statsrb object.
|
272
|
+
* @param timestamp [Number]
|
273
|
+
* @param namespace [String]
|
274
|
+
* @param value [Number]
|
275
|
+
* @return [Statsrb] A reference to the object.
|
276
|
+
*/
|
277
|
+
static VALUE statsrb_push(VALUE self, VALUE timestamp, VALUE namespace, VALUE value) {
|
278
|
+
int ts = NUM2INT(timestamp);
|
279
|
+
int v = NUM2INT(value);
|
280
|
+
const char *ns = RSTRING_PTR(namespace);
|
281
|
+
statsrb_data_push_event(self, ns, ts, v);
|
282
|
+
return self;
|
283
|
+
}
|
284
|
+
|
6
285
|
/**
|
7
286
|
* Retrieves internal data based on specified filters.
|
8
287
|
* @param namespace [String]
|
9
288
|
* @param limit [Number]
|
10
289
|
* @param start_time [Number]
|
11
290
|
* @param end_time [Number]
|
12
|
-
* @return [Array] An array of data hashes.
|
291
|
+
* @return [Array] An array of data event hashes.
|
13
292
|
*/
|
14
293
|
static VALUE statsrb_get(VALUE self, VALUE query_ns, VALUE query_limit, VALUE query_start, VALUE query_end) {
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
int
|
19
|
-
int count = 0;
|
20
|
-
int tmp_ts;
|
294
|
+
// @TODO maybe it would be sane to make a new statsrb object and then just have
|
295
|
+
// methods to dump everything to ary, json, etc.
|
296
|
+
StatsrbInternal *internal = statsrb_get_internal(self);
|
297
|
+
int tmp_ts, tmp_v, tmp_i;
|
21
298
|
|
22
299
|
VALUE filtered_data = rb_ary_new();
|
23
|
-
VALUE
|
300
|
+
VALUE rb_ns_list = rb_ary_new();
|
301
|
+
VALUE statsrb_event;
|
24
302
|
|
25
|
-
|
26
|
-
|
27
|
-
VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
|
28
|
-
VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
|
303
|
+
int i = 0;
|
304
|
+
int filtered_count = 0;
|
29
305
|
|
30
|
-
// Convert into an int that ruby understands.
|
31
306
|
int limit = NUM2INT(query_limit);
|
32
307
|
int qstart = NUM2INT(query_start);
|
33
308
|
int qend = NUM2INT(query_end);
|
34
309
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
310
|
+
VALUE rb_ns;
|
311
|
+
|
312
|
+
// Create rb strings for the namespaces.
|
313
|
+
signed int found = -1;
|
314
|
+
for (i = 0; i < internal->ns_count; i++) {
|
315
|
+
rb_hash_aset(rb_ns_list, INT2NUM(i), rb_str_new2(internal->ns_list[i].namespace));
|
316
|
+
if (strcmp(RSTRING_PTR(query_ns), RSTRING_PTR(rb_hash_aref(rb_ns_list, INT2NUM(i)))) == 0) {
|
317
|
+
memcpy(&found, &i, sizeof(int));
|
318
|
+
}
|
319
|
+
}
|
320
|
+
|
321
|
+
// Return right away if the namespace doesn't exist.
|
322
|
+
if (found == -1) {
|
323
|
+
rb_ary_resize(filtered_data, (long) 0);
|
324
|
+
return filtered_data;
|
325
|
+
}
|
326
|
+
|
327
|
+
// Iterate through the in-memory data to find matches.
|
328
|
+
for (i = 0; i < internal->event_count; i++) {
|
329
|
+
if (found == internal->event_list[i].ns_index
|
330
|
+
&& (qstart == 0 || internal->event_list[i].timestamp >= qstart)
|
331
|
+
&& (qend == 0 || internal->event_list[i].timestamp <= qend)) {
|
332
|
+
|
333
|
+
memcpy(&tmp_ts, &internal->event_list[i].timestamp, sizeof(int));
|
334
|
+
memcpy(&tmp_v, &internal->event_list[i].value, sizeof(int));
|
335
|
+
|
336
|
+
statsrb_event = statsrb_create_rb_event_hash(
|
337
|
+
self,
|
338
|
+
INT2NUM(tmp_ts),
|
339
|
+
rb_hash_aref(rb_ns_list, INT2NUM(found)),
|
340
|
+
INT2NUM(tmp_v)
|
341
|
+
);
|
342
|
+
|
44
343
|
rb_ary_push(filtered_data, statsrb_event);
|
45
|
-
|
344
|
+
filtered_count++;
|
345
|
+
}
|
346
|
+
|
347
|
+
if (limit > 0 && filtered_count == limit) {
|
348
|
+
break;
|
46
349
|
}
|
47
350
|
}
|
48
351
|
|
352
|
+
rb_ary_resize(filtered_data, filtered_count);
|
49
353
|
return filtered_data;
|
50
354
|
}
|
51
355
|
|
52
356
|
/**
|
53
|
-
* Locates data from a specified file and loads into
|
357
|
+
* Locates data from a specified file and loads into internal memory.
|
54
358
|
* @param filepath [String]
|
55
359
|
* @param namespace [String]
|
56
360
|
* @param limit [Number]
|
@@ -60,28 +364,18 @@ static VALUE statsrb_get(VALUE self, VALUE query_ns, VALUE query_limit, VALUE qu
|
|
60
364
|
*/
|
61
365
|
static VALUE statsrb_read(VALUE self, VALUE logfile, VALUE query_ns, VALUE query_limit, VALUE query_start, VALUE query_end) {
|
62
366
|
FILE * file;
|
63
|
-
int line_size =
|
367
|
+
int line_size = 512;
|
64
368
|
char *line = (char *) malloc(line_size);
|
369
|
+
char *tmp_ns = (char *) malloc(256);
|
65
370
|
const char *filepath = RSTRING_PTR(logfile);
|
66
371
|
const char *query_ns_char = RSTRING_PTR(query_ns);
|
67
|
-
|
68
|
-
// @data hash key symbols.
|
69
|
-
VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
|
70
|
-
VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
|
71
|
-
VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
|
72
|
-
// Create an empty string for comparison.
|
73
|
-
VALUE statsrb_str_empty = rb_str_new2("");
|
372
|
+
int tmp_v, tmp_ts;
|
74
373
|
|
75
374
|
// Convert into an int that ruby understands.
|
76
375
|
int limit = NUM2INT(query_limit);
|
77
376
|
int qstart = NUM2INT(query_start);
|
78
377
|
int qend = NUM2INT(query_end);
|
79
378
|
|
80
|
-
// Return array instantiation.
|
81
|
-
VALUE statsrb_data = rb_iv_get(self, "@data");
|
82
|
-
// @TODO does this garbage collect all of the old hash data?
|
83
|
-
rb_ary_resize(statsrb_data, 0);
|
84
|
-
|
85
379
|
file = fopen(filepath, "r");
|
86
380
|
if (file == NULL) {
|
87
381
|
fprintf(stderr, "File error: could not open file %s for reading.", filepath);
|
@@ -98,29 +392,28 @@ static VALUE statsrb_read(VALUE self, VALUE logfile, VALUE query_ns, VALUE query
|
|
98
392
|
|
99
393
|
// If the namespace is in the row, explode it.
|
100
394
|
if (line[0] != '\0' && line[0] != '\n' && strchr(line, query_ns_char[0]) && strstr(line, query_ns_char)) {
|
101
|
-
VALUE statsrb_event = rb_hash_new();
|
395
|
+
//VALUE statsrb_event = rb_hash_new();
|
102
396
|
|
103
397
|
// I tried sscanf for convenience, but it was predictably slower.
|
104
398
|
//int statsrb_ts, statsrb_v;
|
105
399
|
//sscanf(line, "%d\t%*s\t%d", &statsrb_ts, &statsrb_v);
|
106
400
|
|
107
401
|
// @TODO this should something more robust than atoi.
|
108
|
-
|
402
|
+
tmp_ts = atoi(strtok(line, "\t"));
|
109
403
|
|
110
|
-
if (
|
404
|
+
if (tmp_ts != NULL && (qstart == 0 || tmp_ts >= qstart) && (qend == 0 || tmp_ts <= qend)) {
|
111
405
|
// @TODO this should probably use the actual namespace if we do wildcard queries.
|
112
|
-
|
406
|
+
strcpy(tmp_ns, strtok(NULL, "\t"));
|
113
407
|
//strtok(NULL, "\t");
|
114
|
-
|
408
|
+
tmp_v = atoi(strtok(NULL, "\0"));
|
115
409
|
|
116
410
|
// @TODO this should really query the namespace exactly instead of just relying on strstr.
|
117
411
|
//if (rb_str_cmp(query_ns, statsrb_str_empty) == 0 || rb_str_cmp(query_ns, statsrb_str_ns) == 0) {
|
118
|
-
if (
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
rb_ary_push(statsrb_data, statsrb_event);
|
412
|
+
if (tmp_ts && (tmp_v || tmp_v == 0)) {
|
413
|
+
statsrb_data_push_event(self,
|
414
|
+
tmp_ns,
|
415
|
+
tmp_ts,
|
416
|
+
tmp_v);
|
124
417
|
count++;
|
125
418
|
}
|
126
419
|
}
|
@@ -130,63 +423,14 @@ static VALUE statsrb_read(VALUE self, VALUE logfile, VALUE query_ns, VALUE query
|
|
130
423
|
// terminate
|
131
424
|
fclose (file);
|
132
425
|
free (line);
|
133
|
-
|
134
|
-
//return statsrb_data;
|
135
|
-
//rb_iv_set(self, "@data", statsrb_data);
|
426
|
+
free (tmp_ns);
|
136
427
|
|
137
428
|
return self;
|
138
429
|
}
|
139
430
|
|
140
|
-
/**
|
141
|
-
* Implementation of quicksort algorithm.
|
142
|
-
*/
|
143
|
-
void time_sort(int left, int right, VALUE ary, VALUE statsrb_key_ts) {
|
144
|
-
int i = left;
|
145
|
-
int j = right;
|
146
|
-
int p = (i + j) / 2;
|
147
|
-
int pv = NUM2INT(rb_hash_aref(rb_ary_entry(ary, p), statsrb_key_ts));
|
148
|
-
VALUE tmp;
|
149
|
-
|
150
|
-
while (i <= j) {
|
151
|
-
while (NUM2INT(rb_hash_aref(rb_ary_entry(ary, i), statsrb_key_ts)) < pv) {
|
152
|
-
i++;
|
153
|
-
}
|
154
|
-
while (NUM2INT(rb_hash_aref(rb_ary_entry(ary, j), statsrb_key_ts)) > pv) {
|
155
|
-
j--;
|
156
|
-
}
|
157
|
-
if (i <= j) {
|
158
|
-
tmp = rb_ary_entry(ary, i);
|
159
|
-
rb_ary_store(ary, i, rb_ary_entry(ary, j));
|
160
|
-
rb_ary_store(ary, j, tmp);
|
161
|
-
i++;
|
162
|
-
j--;
|
163
|
-
}
|
164
|
-
}
|
165
|
-
|
166
|
-
if (left < j) {
|
167
|
-
time_sort(left, j, ary, statsrb_key_ts);
|
168
|
-
}
|
169
|
-
if (i < right) {
|
170
|
-
time_sort(i, right, ary, statsrb_key_ts);
|
171
|
-
}
|
172
|
-
}
|
173
|
-
|
174
|
-
/**
|
175
|
-
* Sorts @data using a quicksort algorithm based on the hash element's timestamp.
|
176
|
-
* @return [Hash] The sorted data
|
177
|
-
*/
|
178
|
-
static VALUE statsrb_sort(VALUE self) {
|
179
|
-
VALUE statsrb_data = rb_iv_get(self, "@data");
|
180
|
-
int len = RARRAY_LEN(statsrb_data);
|
181
|
-
if (len > 0) {
|
182
|
-
VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
|
183
|
-
time_sort(0, len - 1, statsrb_data, statsrb_key_ts);
|
184
|
-
}
|
185
|
-
return statsrb_data;
|
186
|
-
}
|
187
431
|
|
188
432
|
/**
|
189
|
-
* Writes the
|
433
|
+
* Writes the in memory data to a specified file.
|
190
434
|
* @param filepath [String]
|
191
435
|
* @param filemode [String]
|
192
436
|
* @return [Statsrb] A reference to the object.
|
@@ -195,17 +439,9 @@ static VALUE statsrb_write(VALUE self, VALUE logfile, VALUE mode) {
|
|
195
439
|
FILE * file;
|
196
440
|
const char *filepath = RSTRING_PTR(logfile);
|
197
441
|
const char *filemode = RSTRING_PTR(mode);
|
198
|
-
VALUE statsrb_data = rb_iv_get(self, "@data");
|
199
|
-
int data_length = RARRAY_LEN(statsrb_data);
|
200
|
-
int i;
|
201
|
-
int line_size = 256;
|
202
|
-
int tmp_ts, tmp_v;
|
203
|
-
const char *tmp_ns = (char *) malloc(line_size);
|
204
442
|
|
205
|
-
|
206
|
-
|
207
|
-
VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
|
208
|
-
VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
|
443
|
+
StatsrbInternal *internal = statsrb_get_internal(self);
|
444
|
+
int i;
|
209
445
|
|
210
446
|
file = fopen(filepath, filemode);
|
211
447
|
if (file==NULL) {
|
@@ -213,15 +449,15 @@ static VALUE statsrb_write(VALUE self, VALUE logfile, VALUE mode) {
|
|
213
449
|
return self;
|
214
450
|
}
|
215
451
|
|
216
|
-
// Iterate through the data
|
217
|
-
for (i = 0; i <
|
452
|
+
// Iterate through the internal data, writing as we go.
|
453
|
+
for (i = 0; i < internal->event_count; i++) {
|
218
454
|
// @TODO make sure that these values are not empty before writing.
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
455
|
+
fprintf(file,
|
456
|
+
"%d\t%s\t%d\n",
|
457
|
+
internal->event_list[i].timestamp,
|
458
|
+
internal->ns_list[internal->event_list[i].ns_index].namespace,
|
459
|
+
internal->event_list[i].value
|
460
|
+
);
|
225
461
|
}
|
226
462
|
|
227
463
|
fclose (file);
|
@@ -229,44 +465,28 @@ static VALUE statsrb_write(VALUE self, VALUE logfile, VALUE mode) {
|
|
229
465
|
}
|
230
466
|
|
231
467
|
/**
|
232
|
-
*
|
468
|
+
* Writes the in memory data to a separate files based on namespace.
|
233
469
|
* @param filepath [String]
|
234
|
-
* @param
|
235
|
-
* @param limit [Number]
|
236
|
-
* @param start_time [Number]
|
237
|
-
* @param end_time [Number]
|
470
|
+
* @param filemode [String]
|
238
471
|
* @return [Statsrb] A reference to the object.
|
239
472
|
*/
|
240
473
|
static VALUE statsrb_split_write(VALUE self, VALUE logdir, VALUE mode) {
|
241
|
-
|
242
|
-
int len = RARRAY_LEN(statsrb_data);
|
474
|
+
StatsrbInternal *internal = statsrb_get_internal(self);
|
243
475
|
int i, ii, ns_len;
|
244
476
|
|
245
|
-
|
246
|
-
VALUE
|
247
|
-
VALUE
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
}
|
257
|
-
|
258
|
-
ns_len = RARRAY_LEN(ns_list);
|
259
|
-
|
260
|
-
for (i = 0; i < ns_len; i++) {
|
261
|
-
VALUE tmp = rb_obj_dup(self);
|
262
|
-
VALUE tmp_data = rb_ary_new();
|
263
|
-
for (ii = 0; ii < len; ii++) {
|
264
|
-
if (rb_str_cmp(rb_ary_entry(ns_list, i), rb_hash_aref(rb_ary_entry(statsrb_data, ii), statsrb_key_ns)) == 0) {
|
265
|
-
rb_ary_push(tmp_data, rb_ary_entry(statsrb_data, ii));
|
477
|
+
VALUE filename;
|
478
|
+
VALUE klass = rb_obj_class(self);
|
479
|
+
VALUE tmp = rb_class_new_instance(0, NULL, klass);
|
480
|
+
|
481
|
+
for (i = 0; i < internal->ns_count; i++) {
|
482
|
+
for (ii = 0; ii < internal->event_count; ii++) {
|
483
|
+
if (strcmp(internal->ns_list[i].namespace, internal->ns_list[internal->event_list[ii].ns_index].namespace) == 0) {
|
484
|
+
statsrb_data_push_event(tmp,
|
485
|
+
internal->ns_list[internal->event_list[ii].ns_index].namespace,
|
486
|
+
internal->event_list[ii].timestamp,
|
487
|
+
internal->event_list[ii].value);
|
266
488
|
}
|
267
489
|
}
|
268
|
-
//fputs (RSTRING_PTR(rb_obj_as_string(INT2NUM(RARRAY_LEN(tmp_data)))),stderr);
|
269
|
-
rb_iv_set(tmp, "@data", tmp_data);
|
270
490
|
|
271
491
|
// If there is no trailing slash on the log dir, add one.
|
272
492
|
const char *filepath = RSTRING_PTR(logdir);
|
@@ -274,7 +494,9 @@ static VALUE statsrb_split_write(VALUE self, VALUE logdir, VALUE mode) {
|
|
274
494
|
if (filepath[len - 1] != '/') {
|
275
495
|
logdir = rb_str_plus(logdir, rb_str_new2("/"));
|
276
496
|
}
|
277
|
-
|
497
|
+
filename = rb_str_new2(internal->ns_list[i].namespace);
|
498
|
+
statsrb_write(tmp, rb_str_plus(logdir, filename), mode);
|
499
|
+
statsrb_data_clear_events(tmp);
|
278
500
|
}
|
279
501
|
|
280
502
|
return self;
|
@@ -322,13 +544,6 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
|
|
322
544
|
VALUE response = rb_ary_new();
|
323
545
|
VALUE headers = rb_hash_new();
|
324
546
|
VALUE body = rb_ary_new();
|
325
|
-
VALUE statsrb_data = rb_iv_get(self, "@data");
|
326
|
-
VALUE statsrb_hash = rb_hash_new();
|
327
|
-
|
328
|
-
// @data hash key symbols.
|
329
|
-
VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
|
330
|
-
VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
|
331
|
-
VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
|
332
547
|
|
333
548
|
char *path = RSTRING_PTR(rb_hash_aref(env, rb_str_new2("PATH_INFO")));
|
334
549
|
|
@@ -344,7 +559,7 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
|
|
344
559
|
const char *method_getu = "GET";
|
345
560
|
const char *method_put = "put";
|
346
561
|
const char *method_putu = "PUT";
|
347
|
-
// Remove the leading
|
562
|
+
// Remove the leading slash.
|
348
563
|
path++;
|
349
564
|
const char *method = strtok(path, "/\0");
|
350
565
|
if (method && (strcmp(method, method_put) == 0 || strcmp(method, method_putu) == 0)) {
|
@@ -373,18 +588,16 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
|
|
373
588
|
statsrb_v = atoi(RSTRING_PTR(statsrb_v_qs));
|
374
589
|
}
|
375
590
|
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
rb_ary_push(statsrb_data, statsrb_hash);
|
591
|
+
statsrb_data_push_event(self, RSTRING_PTR(statsrb_ns), statsrb_ts, statsrb_v);
|
592
|
+
|
593
|
+
int data_length = NUM2INT(statsrb_length(self));
|
380
594
|
|
381
|
-
|
382
|
-
rb_ary_push(body, rb_obj_as_string(INT2NUM(RARRAY_LEN(statsrb_data))));
|
595
|
+
rb_ary_push(body, rb_obj_as_string(INT2NUM(data_length)));
|
383
596
|
|
384
|
-
if (data_length
|
597
|
+
if (data_length >= NUM2INT(rb_iv_get(self, "@flush_count"))) {
|
385
598
|
statsrb_sort(self);
|
386
599
|
statsrb_split_write(self, rb_iv_get(self, "@split_file_dir"), rb_str_new2("a+"));
|
387
|
-
|
600
|
+
statsrb_data_clear_events(self);
|
388
601
|
}
|
389
602
|
|
390
603
|
rb_ary_push(body, statsrb_ns);
|
@@ -400,6 +613,7 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
|
|
400
613
|
if (jsoncallback != Qnil) {
|
401
614
|
rb_ary_push(body, rb_str_plus(jsoncallback, rb_str_new("(", 1)));
|
402
615
|
}
|
616
|
+
// @TODO move this to a to_json method.
|
403
617
|
char json_start[256];
|
404
618
|
sprintf(json_start, "{\"%s\":[", statsrb_str_ns);
|
405
619
|
rb_ary_push(body, rb_str_new2(json_start));
|
@@ -440,27 +654,30 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
|
|
440
654
|
}
|
441
655
|
|
442
656
|
// Create a new Statsrb object to query from.
|
443
|
-
|
444
|
-
VALUE tmp =
|
445
|
-
|
446
|
-
rb_iv_set(tmp, "@data", tmp_data);
|
657
|
+
VALUE klass = rb_obj_class(self);
|
658
|
+
VALUE tmp = rb_class_new_instance(0, NULL, klass);
|
659
|
+
|
447
660
|
statsrb_read(tmp, rb_str_plus(rb_iv_get(self, "@split_file_dir"), statsrb_ns), statsrb_ns, INT2NUM(query_limit), INT2NUM(query_start), INT2NUM(query_end));
|
448
661
|
statsrb_sort(tmp);
|
449
662
|
|
450
|
-
int i, data_length =
|
663
|
+
int i, data_length = NUM2INT(statsrb_length(tmp));
|
664
|
+
StatsrbInternal *internal = statsrb_get_internal(tmp);
|
451
665
|
|
452
666
|
for (i = 0; i < data_length; i++) {
|
453
667
|
rb_ary_push(body, rb_str_new("[", 1));
|
454
|
-
rb_ary_push(body, rb_obj_as_string(
|
455
|
-
rb_ary_push(body, rb_str_new("
|
456
|
-
rb_ary_push(body,
|
668
|
+
rb_ary_push(body, rb_obj_as_string(INT2NUM(internal->event_list[i].timestamp)));
|
669
|
+
rb_ary_push(body, rb_str_new(",\"", 2));
|
670
|
+
rb_ary_push(body, rb_str_new2(internal->ns_list[internal->event_list[i].ns_index].namespace));
|
671
|
+
rb_ary_push(body, rb_str_new("\",", 2));
|
672
|
+
rb_ary_push(body, rb_obj_as_string(INT2NUM(internal->event_list[i].value)));
|
457
673
|
rb_ary_push(body, rb_str_new("]", 1));
|
674
|
+
|
458
675
|
if (i < data_length - 1) {
|
459
676
|
rb_ary_push(body, rb_str_new(",", 1));
|
460
677
|
}
|
461
678
|
rb_ary_push(body, rb_str_new("\n", 1));
|
462
679
|
}
|
463
|
-
|
680
|
+
statsrb_data_clear_events(tmp);
|
464
681
|
}
|
465
682
|
rb_ary_push(body, rb_str_new("]}", 2));
|
466
683
|
if (jsoncallback != Qnil) {
|
@@ -482,36 +699,27 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
|
|
482
699
|
}
|
483
700
|
|
484
701
|
/**
|
485
|
-
*
|
486
|
-
*
|
702
|
+
* Populates the internal storage with test data.
|
703
|
+
*
|
487
704
|
* @param namespace [String]
|
488
|
-
* @param
|
489
|
-
* @return [Statsrb] A reference to the object.
|
705
|
+
* @param amount [Numeric]
|
490
706
|
*/
|
491
|
-
static
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
rb_hash_aset(statsrb_event, statsrb_key_ns, namespace);
|
502
|
-
rb_hash_aset(statsrb_event, statsrb_key_v, value);
|
503
|
-
|
504
|
-
rb_ary_push(statsrb_data, statsrb_event);
|
505
|
-
|
506
|
-
return self;
|
707
|
+
static void statsrb_load_test(VALUE self, VALUE ns, VALUE amt) {
|
708
|
+
StatsrbInternal *internal = statsrb_get_internal(self);
|
709
|
+
int i, val;
|
710
|
+
srand(time(NULL));
|
711
|
+
for (i = 0; i < NUM2INT(amt); i++) {
|
712
|
+
val = rand();
|
713
|
+
statsrb_data_push_event(self, RSTRING_PTR(ns), val + 100, val + 1);
|
714
|
+
}
|
715
|
+
statsrb_debug_print_internal(self);
|
716
|
+
fprintf(stdout, "Debug: count: %d\n", internal->event_count);
|
507
717
|
}
|
508
718
|
|
509
719
|
/**
|
510
720
|
* Class constructor, sets up an instance variable.
|
511
721
|
*/
|
512
722
|
static VALUE statsrb_constructor(VALUE self) {
|
513
|
-
VALUE statsrb_data = rb_ary_new();
|
514
|
-
rb_iv_set(self, "@data", statsrb_data);
|
515
723
|
VALUE statsrb_split_file_dir = rb_str_new("/tmp", 4);
|
516
724
|
rb_iv_set(self, "@split_file_dir", statsrb_split_file_dir);
|
517
725
|
rb_iv_set(self, "@flush_count", INT2NUM(9));
|
@@ -535,21 +743,21 @@ void Init_statsrb(void) {
|
|
535
743
|
VALUE klass = rb_define_class("Statsrb", rb_cObject);
|
536
744
|
|
537
745
|
// Instance methods and properties.
|
746
|
+
rb_define_alloc_func(klass, statsrb_alloc_internal);
|
538
747
|
rb_define_method(klass, "initialize", statsrb_constructor, 0);
|
539
748
|
rb_define_method(klass, "query", statsrb_read, 5);
|
540
749
|
rb_define_method(klass, "read", statsrb_read, 5);
|
541
750
|
rb_define_method(klass, "get", statsrb_get, 4);
|
751
|
+
rb_define_method(klass, "load_test", statsrb_load_test, 2);
|
752
|
+
rb_define_method(klass, "length", statsrb_length, 0);
|
542
753
|
rb_define_method(klass, "sort", statsrb_sort, 0);
|
543
754
|
rb_define_method(klass, "write", statsrb_write, 2);
|
544
755
|
rb_define_method(klass, "split_write", statsrb_split_write, 2);
|
545
756
|
rb_define_method(klass, "push", statsrb_push, 3);
|
757
|
+
rb_define_method(klass, "clear", statsrb_data_clear_events, 0);
|
546
758
|
rb_define_method(klass, "call", statsrb_rack_call, 1);
|
547
|
-
// Define :attr_accessor (read/write instance var)
|
548
|
-
// Note that this must correspond with a call to rb_iv_self() and it's string name must be @data.
|
549
|
-
// An array of hashes keyed with :ts(timestamp), :ns(namespace) and :v(value) e.g. [!{:ts => Time.now.to_i, :ns => "test", :v => 33}]
|
550
|
-
rb_define_attr(klass, "data", 1, 1);
|
551
759
|
// The file directory to write when splitting namespaces. @see #split_write
|
552
760
|
rb_define_attr(klass, "split_file_dir", 1, 1);
|
553
|
-
// When used with a rack server, the max count of
|
761
|
+
// When used with a rack server, the max count of internal events.
|
554
762
|
rb_define_attr(klass, "flush_count", 1, 1);
|
555
763
|
}
|
data/test/test_statsrb.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'minitest/autorun'
|
2
2
|
require 'statsrb'
|
3
|
+
require 'json'
|
4
|
+
require 'pp'
|
3
5
|
|
4
6
|
class TestStatsrb < MiniTest::Test
|
5
7
|
|
@@ -8,10 +10,14 @@ class TestStatsrb < MiniTest::Test
|
|
8
10
|
def setup
|
9
11
|
@s = Statsrb.new
|
10
12
|
@tmpfile = "/tmp/test.statsrb"
|
13
|
+
@s.split_file_dir = "/tmp/"
|
14
|
+
@s.flush_count = 10
|
11
15
|
end
|
12
16
|
|
13
17
|
def teardown
|
14
18
|
File.delete @tmpfile unless !File.exists? @tmpfile
|
19
|
+
rackfile = "/tmp/test"
|
20
|
+
File.delete rackfile unless !File.exists? rackfile
|
15
21
|
end
|
16
22
|
|
17
23
|
# Provides test data.
|
@@ -38,12 +44,14 @@ class TestStatsrb < MiniTest::Test
|
|
38
44
|
# Tests that the data was indeed pushed.
|
39
45
|
def test_push_data
|
40
46
|
push_data
|
41
|
-
assert_equal @s.
|
47
|
+
assert_equal @s.length, get_data.length
|
42
48
|
end
|
43
|
-
|
49
|
+
|
44
50
|
# Tests that we can filter the in-memory data.
|
45
51
|
def test_get_data
|
46
52
|
push_data
|
53
|
+
t = @s.get "test1", 100, 0, 0
|
54
|
+
assert_equal(t.length, 3);
|
47
55
|
t = @s.get "test2", 100, 0, 0
|
48
56
|
assert_equal(t.length, 2);
|
49
57
|
end
|
@@ -53,7 +61,8 @@ class TestStatsrb < MiniTest::Test
|
|
53
61
|
current = 0
|
54
62
|
push_data
|
55
63
|
@s.sort
|
56
|
-
@s.
|
64
|
+
t = @s.get "test1", 100, 0, 0
|
65
|
+
t.each do |value|
|
57
66
|
assert value[:ts] > current
|
58
67
|
current = value[:ts]
|
59
68
|
end
|
@@ -77,13 +86,86 @@ class TestStatsrb < MiniTest::Test
|
|
77
86
|
end
|
78
87
|
end
|
79
88
|
|
89
|
+
# Tests that we can clear data from memory.
|
90
|
+
def test_clear_data
|
91
|
+
push_data
|
92
|
+
assert_equal @s.length, get_data.length
|
93
|
+
@s.clear
|
94
|
+
assert_equal @s.length, 0
|
95
|
+
end
|
96
|
+
|
80
97
|
# Tests that we can read data from a file.
|
81
98
|
def test_read_data
|
82
99
|
push_data
|
83
100
|
write_data
|
101
|
+
@s.clear
|
84
102
|
@s.read @tmpfile, "test1", 100, 0, 0
|
85
|
-
assert_equal @s.
|
103
|
+
assert_equal @s.length, 3
|
104
|
+
@s.clear
|
86
105
|
@s.read @tmpfile, "test2", 100, 0, 0
|
87
|
-
assert_equal @s.
|
106
|
+
assert_equal @s.length, 2
|
107
|
+
end
|
108
|
+
|
109
|
+
# Tests that the rack interface works properly.
|
110
|
+
def test_rack_call
|
111
|
+
# Test putting data.
|
112
|
+
env = {
|
113
|
+
"PATH_INFO" => "/PUT",
|
114
|
+
"QUERY_STRING" => "name=test&value=13"
|
115
|
+
}
|
116
|
+
|
117
|
+
5.times do |i|
|
118
|
+
@s.call(env);
|
119
|
+
end
|
120
|
+
|
121
|
+
assert_equal 5, @s.length
|
122
|
+
|
123
|
+
# Write enough data to flush.
|
124
|
+
5.times do |i|
|
125
|
+
@s.call(env);
|
126
|
+
end
|
127
|
+
|
128
|
+
# Test getting data.
|
129
|
+
env = {
|
130
|
+
"PATH_INFO" => "/GET/test",
|
131
|
+
"QUERY_STRING" => ""
|
132
|
+
}
|
133
|
+
|
134
|
+
resp = @s.call(env)
|
135
|
+
data = JSON.parse(resp[2].join)
|
136
|
+
assert_equal data["test"].length, 10
|
137
|
+
end
|
138
|
+
|
139
|
+
def test_no_results
|
140
|
+
push_data
|
141
|
+
t = @s.get "noresults", 100, 0, 0
|
142
|
+
assert_equal(t.length, 0);
|
143
|
+
end
|
144
|
+
|
145
|
+
# Tests large data volumes.
|
146
|
+
def test_large_data
|
147
|
+
# Load a lot of data.
|
148
|
+
@s.load_test "kevin", 500000
|
149
|
+
@s.load_test "melissa", 500000
|
150
|
+
@s.load_test "benjamin", 500000
|
151
|
+
@s.sort
|
152
|
+
# Extract all of one namespace.
|
153
|
+
t = @s.get "melissa", 100000, 0, 0
|
154
|
+
assert_equal t.length, 100000
|
155
|
+
# Push them back to the object.
|
156
|
+
t.each do |i|
|
157
|
+
@s.push i[:ts], i[:ns], i[:v]
|
158
|
+
end
|
159
|
+
# Save it to file and clear it.
|
160
|
+
@s.write @tmpfile, "w+"
|
161
|
+
@s.clear
|
162
|
+
# Re-load the data.
|
163
|
+
@s.read @tmpfile, "melissa", 600000, 0, 0
|
164
|
+
# Try to get one that doesn't exist.
|
165
|
+
t = @s.get "kevin", 10000, 0, 0
|
166
|
+
assert_equal t.length, 0
|
167
|
+
# Try to get all of the data out.
|
168
|
+
t = @s.get "melissa", 600000, 0, 0
|
169
|
+
assert_equal t.length, 600000
|
88
170
|
end
|
89
171
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statsrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,10 +9,10 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
12
|
+
date: 2013-06-28 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
|
-
description: A Ruby time series stats repository using flat file storage,
|
15
|
-
a Ruby API as well as a Rack compatible REST API.
|
14
|
+
description: A Ruby time series stats repository written in C, using flat file storage,
|
15
|
+
providing a Ruby API as well as a Rack compatible REST API.
|
16
16
|
email: email@kevinhankens.com
|
17
17
|
executables: []
|
18
18
|
extensions:
|