statsrb 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/ext/statsrb/statsrb.c +408 -200
  2. data/test/test_statsrb.rb +87 -5
  3. metadata +4 -4
@@ -3,54 +3,358 @@
3
3
  #include <string.h>
4
4
  #include <stdlib.h>
5
5
 
6
+ /**
7
+ * Keeps track of a single event.
8
+ */
9
+ typedef struct {
10
+ int ns_index;
11
+ int timestamp;
12
+ int value;
13
+ } StatsrbEvent;
14
+
15
+ /**
16
+ * Keeps track of available namespaces.
17
+ */
18
+ typedef struct {
19
+ char *namespace[256];
20
+ } StatsrbNS;
21
+
22
+ /**
23
+ * Keeps track of internal storage.
24
+ */
25
+ typedef struct {
26
+ StatsrbEvent *event_list;
27
+ int event_count;
28
+ int event_memory;
29
+ StatsrbNS *ns_list;
30
+ int ns_count;
31
+ int ns_memory;
32
+ } StatsrbInternal;
33
+
34
+ /**
35
+ * Internal: retreives the internal storage.
36
+ */
37
+ static StatsrbInternal* statsrb_get_internal(VALUE self) {
38
+ StatsrbInternal *internal;
39
+ Data_Get_Struct(self, StatsrbInternal, internal);
40
+
41
+ return internal;
42
+ }
43
+
44
+ /**
45
+ * Internal: allocates internal storage.
46
+ */
47
+ static VALUE statsrb_alloc_internal(VALUE self) {
48
+ // Allocate internal memory for the StatsrbEvent structs.
49
+ StatsrbEvent *eventlist = (StatsrbEvent *)calloc(1, sizeof(StatsrbEvent));
50
+
51
+ // Allocate memory for the list of namespaces.
52
+ StatsrbNS *nslist = (StatsrbNS *)calloc(1, sizeof(StatsrbNS));
53
+
54
+ // Allocate memory for the pointer storage;
55
+ StatsrbInternal *internalptr = (StatsrbInternal *)calloc(1, sizeof(StatsrbInternal));
56
+ internalptr->event_list = eventlist;
57
+ internalptr->event_count = 0;
58
+ internalptr->event_memory = 0;
59
+ internalptr->ns_list = nslist;
60
+ internalptr->ns_count = 0;
61
+ internalptr->ns_memory = 0;
62
+ return Data_Wrap_Struct(self, 0, free, internalptr);
63
+ }
64
+
65
+ /**
66
+ * Clears out the internal memory.
67
+ * @return [void]
68
+ */
69
+ static void statsrb_data_clear_events(VALUE self) {
70
+ StatsrbInternal *internal = statsrb_get_internal(self);
71
+
72
+ // Allocate internal memory for the StatsrbEvent structs.
73
+ StatsrbEvent *event_success = (StatsrbEvent *)realloc(internal->event_list, sizeof(StatsrbEvent));
74
+
75
+ // Allocate memory for the list of namespaces.
76
+ StatsrbNS *ns_success = (StatsrbNS *)realloc(internal->ns_list, sizeof(StatsrbNS));
77
+
78
+ // Allocate memory for the pointer storage;
79
+ if (event_success && ns_success) {
80
+ StatsrbEvent *event_list = event_success;
81
+ event_success = NULL;
82
+ StatsrbNS *ns_list = ns_success;
83
+ ns_success = NULL;
84
+ internal->event_list = event_list;
85
+ internal->event_count = 0;
86
+ internal->event_memory = 0;
87
+ internal->ns_list = ns_list;
88
+ internal->ns_count = 0;
89
+ internal->ns_memory = 0;
90
+ }
91
+ else {
92
+ fprintf(stderr, "Error deallocating memory");
93
+ return;
94
+ }
95
+ }
96
+
97
+
98
+ /**
99
+ * Returns the length of the internal storage.
100
+ * @return [Numeric] The count of items in the internal storage.
101
+ */
102
+ static VALUE statsrb_length(VALUE self) {
103
+ StatsrbInternal *internal = statsrb_get_internal(self);
104
+ if (!internal->event_count) {
105
+ internal->event_count = 0;
106
+ }
107
+
108
+ return INT2NUM(internal->event_count);
109
+ }
110
+
111
+ /**
112
+ * Debugging function.
113
+ */
114
+ static void statsrb_debug_print_internal(VALUE self) {
115
+ StatsrbInternal *internal = statsrb_get_internal(self);
116
+ int i;
117
+
118
+ //for (i = 0; i < internal->event_count; i++) {
119
+ //fprintf(stdout, "Debug: ns: %s; ts: %d; v: %d\n", internal->ns_list[internal->event_list[i].ns_index].namespace, internal->event_list[i].timestamp, internal->event_list[i].value);
120
+ //}
121
+ fprintf(stdout, "Debug: count: %d memory: %d\n", internal->event_count, internal->event_memory);
122
+ }
123
+
124
+
125
+ /**
126
+ * Implementation of quicksort algorithm.
127
+ */
128
+ void time_sort(int left, int right, StatsrbEvent * event_list) {
129
+ int i = left;
130
+ int j = right;
131
+ int p = (i + j) / 2;
132
+ int pv = event_list[p].timestamp;
133
+ StatsrbEvent * tmp = (StatsrbEvent *)malloc(sizeof(StatsrbEvent));
134
+
135
+ while (i <= j) {
136
+ while (event_list[i].timestamp < pv) {
137
+ i++;
138
+ }
139
+ while (event_list[j].timestamp > pv) {
140
+ j--;
141
+ }
142
+ if (i <= j) {
143
+ memcpy(tmp, &event_list[i], sizeof(StatsrbEvent));
144
+ memcpy(&event_list[i], &event_list[j], sizeof(StatsrbEvent));
145
+ memcpy(&event_list[j], tmp, sizeof(StatsrbEvent));
146
+ i++;
147
+ j--;
148
+ }
149
+ }
150
+
151
+ free(tmp);
152
+
153
+ if (left < j) {
154
+ time_sort(left, j, event_list);
155
+ }
156
+ if (i < right) {
157
+ time_sort(i, right, event_list);
158
+ }
159
+ }
160
+
161
+ /**
162
+ * Sorts internal data using a quicksort algorithm based on the hash element's timestamp.
163
+ * @return [Statsrb] A reference to the object.
164
+ */
165
+ static VALUE statsrb_sort(VALUE self) {
166
+ StatsrbInternal *internal = statsrb_get_internal(self);
167
+ if (internal->event_count > 0) {
168
+ time_sort(0, internal->event_count - 1, internal->event_list);
169
+ }
170
+
171
+ return self;
172
+ }
173
+
174
+ /**
175
+ * Internal: pushes a namespace onto the internal storage or retrieves a
176
+ * preexisting one.
177
+ *
178
+ * @param VALUE self
179
+ * @param const char *namespace
180
+ *
181
+ * @return Integer
182
+ * The pointer index of the namespace in @nslist.
183
+ */
184
+ static int statsrb_data_push_ns(VALUE self, const char *namespace) {
185
+ int i;
186
+ StatsrbInternal *internal = statsrb_get_internal(self);
187
+
188
+ for (i = 0; i < internal->ns_count; i++) {
189
+ if (strcmp(internal->ns_list[i].namespace, namespace) == 0) {
190
+ return i;
191
+ }
192
+ }
193
+
194
+ int memory = (internal->ns_count + 1) * sizeof(StatsrbNS);
195
+ StatsrbNS *success = (StatsrbNS *)realloc(internal->ns_list, memory);
196
+
197
+ if (success) {
198
+ internal->ns_list = success;
199
+ success = NULL;
200
+ strcpy(internal->ns_list[internal->ns_count].namespace, namespace);
201
+ internal->ns_count++;
202
+ return internal->ns_count - 1;
203
+ }
204
+ else {
205
+ fprintf(stderr, "Error allocating memory");
206
+ }
207
+
208
+ }
209
+
210
+ /**
211
+ * Internal: pushes a data event onto the internal storage.
212
+ *
213
+ * @param VALUE self
214
+ * @param const char *namespace
215
+ * @param int timestamp
216
+ * @param int value
217
+ */
218
+ static void statsrb_data_push_event(VALUE self, const char *namespace, int timestamp, int value) {
219
+ StatsrbInternal *internal = statsrb_get_internal(self);
220
+
221
+ // Get the index of the namespace pointer.
222
+ int ns_index = statsrb_data_push_ns(self, namespace);
223
+
224
+ // If it appears that we are approaching the end of the memory block, allocate
225
+ // some more.
226
+ // @TODO 2x memory is a little nuts, maybe throttle this back a bit?
227
+ if ((sizeof(StatsrbEvent) * internal->event_count) > (internal->event_memory * .9)) {
228
+ internal->event_memory = (2* internal->event_count) * sizeof(StatsrbEvent);
229
+ StatsrbEvent *success = (StatsrbEvent *)realloc(internal->event_list, internal->event_memory);
230
+ if (success) {
231
+ internal->event_list = success;
232
+ success = NULL;
233
+ }
234
+ else {
235
+ fprintf(stderr, "Error allocating memory");
236
+ return;
237
+ }
238
+ }
239
+
240
+ // Set the values;
241
+ internal->event_list[internal->event_count].timestamp = timestamp;
242
+ internal->event_list[internal->event_count].ns_index = ns_index;
243
+ internal->event_list[internal->event_count].value = value;
244
+
245
+ // Track the count by saving the new pointer.
246
+ internal->event_count++;
247
+ }
248
+
249
+ /**
250
+ * Creates a ruby hash from event VALUEs.
251
+ *
252
+ * @param VALUE self
253
+ * @param VALUE ts
254
+ * @param VALUE ns
255
+ * @param VALUE v
256
+ */
257
+ VALUE statsrb_create_rb_event_hash(VALUE self, VALUE ts, VALUE ns, VALUE v) {
258
+ VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
259
+ VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
260
+ VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
261
+
262
+ VALUE statsrb_event = rb_hash_new();
263
+ rb_hash_aset(statsrb_event, statsrb_key_ts, ts);
264
+ rb_hash_aset(statsrb_event, statsrb_key_ns, ns);
265
+ rb_hash_aset(statsrb_event, statsrb_key_v, v);
266
+
267
+ return statsrb_event;
268
+ }
269
+
270
+ /**
271
+ * Pushes a stat onto the statsrb object.
272
+ * @param timestamp [Number]
273
+ * @param namespace [String]
274
+ * @param value [Number]
275
+ * @return [Statsrb] A reference to the object.
276
+ */
277
+ static VALUE statsrb_push(VALUE self, VALUE timestamp, VALUE namespace, VALUE value) {
278
+ int ts = NUM2INT(timestamp);
279
+ int v = NUM2INT(value);
280
+ const char *ns = RSTRING_PTR(namespace);
281
+ statsrb_data_push_event(self, ns, ts, v);
282
+ return self;
283
+ }
284
+
6
285
  /**
7
286
  * Retrieves internal data based on specified filters.
8
287
  * @param namespace [String]
9
288
  * @param limit [Number]
10
289
  * @param start_time [Number]
11
290
  * @param end_time [Number]
12
- * @return [Array] An array of data hashes.
291
+ * @return [Array] An array of data event hashes.
13
292
  */
14
293
  static VALUE statsrb_get(VALUE self, VALUE query_ns, VALUE query_limit, VALUE query_start, VALUE query_end) {
15
- VALUE statsrb_data = rb_iv_get(self, "@data");
16
- VALUE statsrb_event = rb_hash_new();
17
- int data_length = RARRAY_LEN(statsrb_data);
18
- int i = 0;
19
- int count = 0;
20
- int tmp_ts;
294
+ // @TODO maybe it would be sane to make a new statsrb object and then just have
295
+ // methods to dump everything to ary, json, etc.
296
+ StatsrbInternal *internal = statsrb_get_internal(self);
297
+ int tmp_ts, tmp_v, tmp_i;
21
298
 
22
299
  VALUE filtered_data = rb_ary_new();
23
- VALUE tmp_ns;
300
+ VALUE rb_ns_list = rb_ary_new();
301
+ VALUE statsrb_event;
24
302
 
25
- // @data hash key symbols.
26
- VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
27
- VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
28
- VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
303
+ int i = 0;
304
+ int filtered_count = 0;
29
305
 
30
- // Convert into an int that ruby understands.
31
306
  int limit = NUM2INT(query_limit);
32
307
  int qstart = NUM2INT(query_start);
33
308
  int qend = NUM2INT(query_end);
34
309
 
35
- for (i = 0; i < data_length; i++) {
36
- tmp_ts = NUM2INT(rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ts));
37
- tmp_ns = rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ns);
38
- if (rb_str_equal(query_ns, tmp_ns)
39
- && (qstart == 0 || tmp_ts >= qstart)
40
- && (qend == 0 || tmp_ts <= qend)) {
41
- rb_hash_aset(statsrb_event, statsrb_key_ts, rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ts));
42
- rb_hash_aset(statsrb_event, statsrb_key_ns, rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ns));
43
- rb_hash_aset(statsrb_event, statsrb_key_v, rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_v));
310
+ VALUE rb_ns;
311
+
312
+ // Create rb strings for the namespaces.
313
+ signed int found = -1;
314
+ for (i = 0; i < internal->ns_count; i++) {
315
+ rb_hash_aset(rb_ns_list, INT2NUM(i), rb_str_new2(internal->ns_list[i].namespace));
316
+ if (strcmp(RSTRING_PTR(query_ns), RSTRING_PTR(rb_hash_aref(rb_ns_list, INT2NUM(i)))) == 0) {
317
+ memcpy(&found, &i, sizeof(int));
318
+ }
319
+ }
320
+
321
+ // Return right away if the namespace doesn't exist.
322
+ if (found == -1) {
323
+ rb_ary_resize(filtered_data, (long) 0);
324
+ return filtered_data;
325
+ }
326
+
327
+ // Iterate through the in-memory data to find matches.
328
+ for (i = 0; i < internal->event_count; i++) {
329
+ if (found == internal->event_list[i].ns_index
330
+ && (qstart == 0 || internal->event_list[i].timestamp >= qstart)
331
+ && (qend == 0 || internal->event_list[i].timestamp <= qend)) {
332
+
333
+ memcpy(&tmp_ts, &internal->event_list[i].timestamp, sizeof(int));
334
+ memcpy(&tmp_v, &internal->event_list[i].value, sizeof(int));
335
+
336
+ statsrb_event = statsrb_create_rb_event_hash(
337
+ self,
338
+ INT2NUM(tmp_ts),
339
+ rb_hash_aref(rb_ns_list, INT2NUM(found)),
340
+ INT2NUM(tmp_v)
341
+ );
342
+
44
343
  rb_ary_push(filtered_data, statsrb_event);
45
- count++;
344
+ filtered_count++;
345
+ }
346
+
347
+ if (limit > 0 && filtered_count == limit) {
348
+ break;
46
349
  }
47
350
  }
48
351
 
352
+ rb_ary_resize(filtered_data, filtered_count);
49
353
  return filtered_data;
50
354
  }
51
355
 
52
356
  /**
53
- * Locates data from a specified file and loads into @data.
357
+ * Locates data from a specified file and loads into internal memory.
54
358
  * @param filepath [String]
55
359
  * @param namespace [String]
56
360
  * @param limit [Number]
@@ -60,28 +364,18 @@ static VALUE statsrb_get(VALUE self, VALUE query_ns, VALUE query_limit, VALUE qu
60
364
  */
61
365
  static VALUE statsrb_read(VALUE self, VALUE logfile, VALUE query_ns, VALUE query_limit, VALUE query_start, VALUE query_end) {
62
366
  FILE * file;
63
- int line_size = 256;
367
+ int line_size = 512;
64
368
  char *line = (char *) malloc(line_size);
369
+ char *tmp_ns = (char *) malloc(256);
65
370
  const char *filepath = RSTRING_PTR(logfile);
66
371
  const char *query_ns_char = RSTRING_PTR(query_ns);
67
-
68
- // @data hash key symbols.
69
- VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
70
- VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
71
- VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
72
- // Create an empty string for comparison.
73
- VALUE statsrb_str_empty = rb_str_new2("");
372
+ int tmp_v, tmp_ts;
74
373
 
75
374
  // Convert into an int that ruby understands.
76
375
  int limit = NUM2INT(query_limit);
77
376
  int qstart = NUM2INT(query_start);
78
377
  int qend = NUM2INT(query_end);
79
378
 
80
- // Return array instantiation.
81
- VALUE statsrb_data = rb_iv_get(self, "@data");
82
- // @TODO does this garbage collect all of the old hash data?
83
- rb_ary_resize(statsrb_data, 0);
84
-
85
379
  file = fopen(filepath, "r");
86
380
  if (file == NULL) {
87
381
  fprintf(stderr, "File error: could not open file %s for reading.", filepath);
@@ -98,29 +392,28 @@ static VALUE statsrb_read(VALUE self, VALUE logfile, VALUE query_ns, VALUE query
98
392
 
99
393
  // If the namespace is in the row, explode it.
100
394
  if (line[0] != '\0' && line[0] != '\n' && strchr(line, query_ns_char[0]) && strstr(line, query_ns_char)) {
101
- VALUE statsrb_event = rb_hash_new();
395
+ //VALUE statsrb_event = rb_hash_new();
102
396
 
103
397
  // I tried sscanf for convenience, but it was predictably slower.
104
398
  //int statsrb_ts, statsrb_v;
105
399
  //sscanf(line, "%d\t%*s\t%d", &statsrb_ts, &statsrb_v);
106
400
 
107
401
  // @TODO this should something more robust than atoi.
108
- int statsrb_ts = atoi(strtok(line, "\t"));
402
+ tmp_ts = atoi(strtok(line, "\t"));
109
403
 
110
- if (statsrb_ts != NULL && (qstart == 0 || statsrb_ts >= qstart) && (qend == 0 || statsrb_ts <= qend)) {
404
+ if (tmp_ts != NULL && (qstart == 0 || tmp_ts >= qstart) && (qend == 0 || tmp_ts <= qend)) {
111
405
  // @TODO this should probably use the actual namespace if we do wildcard queries.
112
- VALUE statsrb_str_ns = rb_str_new2(strtok(NULL, "\t"));
406
+ strcpy(tmp_ns, strtok(NULL, "\t"));
113
407
  //strtok(NULL, "\t");
114
- int statsrb_v = atoi(strtok(NULL, "\0"));
408
+ tmp_v = atoi(strtok(NULL, "\0"));
115
409
 
116
410
  // @TODO this should really query the namespace exactly instead of just relying on strstr.
117
411
  //if (rb_str_cmp(query_ns, statsrb_str_empty) == 0 || rb_str_cmp(query_ns, statsrb_str_ns) == 0) {
118
- if (statsrb_ts && (statsrb_v || statsrb_v == 0)) {
119
- rb_hash_aset(statsrb_event, statsrb_key_ts, INT2NUM(statsrb_ts));
120
- rb_hash_aset(statsrb_event, statsrb_key_ns, statsrb_str_ns);
121
- //rb_hash_aset(statsrb_event, statsrb_key_ns, query_ns);
122
- rb_hash_aset(statsrb_event, statsrb_key_v, INT2NUM(statsrb_v));
123
- rb_ary_push(statsrb_data, statsrb_event);
412
+ if (tmp_ts && (tmp_v || tmp_v == 0)) {
413
+ statsrb_data_push_event(self,
414
+ tmp_ns,
415
+ tmp_ts,
416
+ tmp_v);
124
417
  count++;
125
418
  }
126
419
  }
@@ -130,63 +423,14 @@ static VALUE statsrb_read(VALUE self, VALUE logfile, VALUE query_ns, VALUE query
130
423
  // terminate
131
424
  fclose (file);
132
425
  free (line);
133
-
134
- //return statsrb_data;
135
- //rb_iv_set(self, "@data", statsrb_data);
426
+ free (tmp_ns);
136
427
 
137
428
  return self;
138
429
  }
139
430
 
140
- /**
141
- * Implementation of quicksort algorithm.
142
- */
143
- void time_sort(int left, int right, VALUE ary, VALUE statsrb_key_ts) {
144
- int i = left;
145
- int j = right;
146
- int p = (i + j) / 2;
147
- int pv = NUM2INT(rb_hash_aref(rb_ary_entry(ary, p), statsrb_key_ts));
148
- VALUE tmp;
149
-
150
- while (i <= j) {
151
- while (NUM2INT(rb_hash_aref(rb_ary_entry(ary, i), statsrb_key_ts)) < pv) {
152
- i++;
153
- }
154
- while (NUM2INT(rb_hash_aref(rb_ary_entry(ary, j), statsrb_key_ts)) > pv) {
155
- j--;
156
- }
157
- if (i <= j) {
158
- tmp = rb_ary_entry(ary, i);
159
- rb_ary_store(ary, i, rb_ary_entry(ary, j));
160
- rb_ary_store(ary, j, tmp);
161
- i++;
162
- j--;
163
- }
164
- }
165
-
166
- if (left < j) {
167
- time_sort(left, j, ary, statsrb_key_ts);
168
- }
169
- if (i < right) {
170
- time_sort(i, right, ary, statsrb_key_ts);
171
- }
172
- }
173
-
174
- /**
175
- * Sorts @data using a quicksort algorithm based on the hash element's timestamp.
176
- * @return [Hash] The sorted data
177
- */
178
- static VALUE statsrb_sort(VALUE self) {
179
- VALUE statsrb_data = rb_iv_get(self, "@data");
180
- int len = RARRAY_LEN(statsrb_data);
181
- if (len > 0) {
182
- VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
183
- time_sort(0, len - 1, statsrb_data, statsrb_key_ts);
184
- }
185
- return statsrb_data;
186
- }
187
431
 
188
432
  /**
189
- * Writes the @data in memory to a specified file.
433
+ * Writes the in memory data to a specified file.
190
434
  * @param filepath [String]
191
435
  * @param filemode [String]
192
436
  * @return [Statsrb] A reference to the object.
@@ -195,17 +439,9 @@ static VALUE statsrb_write(VALUE self, VALUE logfile, VALUE mode) {
195
439
  FILE * file;
196
440
  const char *filepath = RSTRING_PTR(logfile);
197
441
  const char *filemode = RSTRING_PTR(mode);
198
- VALUE statsrb_data = rb_iv_get(self, "@data");
199
- int data_length = RARRAY_LEN(statsrb_data);
200
- int i;
201
- int line_size = 256;
202
- int tmp_ts, tmp_v;
203
- const char *tmp_ns = (char *) malloc(line_size);
204
442
 
205
- // @data hash key symbols.
206
- VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
207
- VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
208
- VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
443
+ StatsrbInternal *internal = statsrb_get_internal(self);
444
+ int i;
209
445
 
210
446
  file = fopen(filepath, filemode);
211
447
  if (file==NULL) {
@@ -213,15 +449,15 @@ static VALUE statsrb_write(VALUE self, VALUE logfile, VALUE mode) {
213
449
  return self;
214
450
  }
215
451
 
216
- // Iterate through the data array, writing the data as we go.
217
- for (i = 0; i < data_length; i++) {
452
+ // Iterate through the internal data, writing as we go.
453
+ for (i = 0; i < internal->event_count; i++) {
218
454
  // @TODO make sure that these values are not empty before writing.
219
- //VALUE tmp_line = rb_str_tmp_new(line_size);
220
- tmp_ts = NUM2INT(rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ts));
221
- tmp_ns = RSTRING_PTR(rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ns));
222
- tmp_v = NUM2INT(rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_v));
223
- fprintf(file, "%d\t%s\t%d\n", tmp_ts, tmp_ns, tmp_v);
224
- //rb_str_free(tmp_line);
455
+ fprintf(file,
456
+ "%d\t%s\t%d\n",
457
+ internal->event_list[i].timestamp,
458
+ internal->ns_list[internal->event_list[i].ns_index].namespace,
459
+ internal->event_list[i].value
460
+ );
225
461
  }
226
462
 
227
463
  fclose (file);
@@ -229,44 +465,28 @@ static VALUE statsrb_write(VALUE self, VALUE logfile, VALUE mode) {
229
465
  }
230
466
 
231
467
  /**
232
- * Locates data from a specified file and loads into @data.
468
+ * Writes the in memory data to a separate files based on namespace.
233
469
  * @param filepath [String]
234
- * @param namespace [String]
235
- * @param limit [Number]
236
- * @param start_time [Number]
237
- * @param end_time [Number]
470
+ * @param filemode [String]
238
471
  * @return [Statsrb] A reference to the object.
239
472
  */
240
473
  static VALUE statsrb_split_write(VALUE self, VALUE logdir, VALUE mode) {
241
- VALUE statsrb_data = rb_iv_get(self, "@data");
242
- int len = RARRAY_LEN(statsrb_data);
474
+ StatsrbInternal *internal = statsrb_get_internal(self);
243
475
  int i, ii, ns_len;
244
476
 
245
- // @data hash key symbols.
246
- VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
247
- VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
248
- VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
249
-
250
- VALUE ns_list = rb_ary_new();
251
-
252
- for (i = 0; i < len; i++) {
253
- if (!rb_ary_includes(ns_list, rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ns))) {
254
- rb_ary_push(ns_list, rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ns));
255
- }
256
- }
257
-
258
- ns_len = RARRAY_LEN(ns_list);
259
-
260
- for (i = 0; i < ns_len; i++) {
261
- VALUE tmp = rb_obj_dup(self);
262
- VALUE tmp_data = rb_ary_new();
263
- for (ii = 0; ii < len; ii++) {
264
- if (rb_str_cmp(rb_ary_entry(ns_list, i), rb_hash_aref(rb_ary_entry(statsrb_data, ii), statsrb_key_ns)) == 0) {
265
- rb_ary_push(tmp_data, rb_ary_entry(statsrb_data, ii));
477
+ VALUE filename;
478
+ VALUE klass = rb_obj_class(self);
479
+ VALUE tmp = rb_class_new_instance(0, NULL, klass);
480
+
481
+ for (i = 0; i < internal->ns_count; i++) {
482
+ for (ii = 0; ii < internal->event_count; ii++) {
483
+ if (strcmp(internal->ns_list[i].namespace, internal->ns_list[internal->event_list[ii].ns_index].namespace) == 0) {
484
+ statsrb_data_push_event(tmp,
485
+ internal->ns_list[internal->event_list[ii].ns_index].namespace,
486
+ internal->event_list[ii].timestamp,
487
+ internal->event_list[ii].value);
266
488
  }
267
489
  }
268
- //fputs (RSTRING_PTR(rb_obj_as_string(INT2NUM(RARRAY_LEN(tmp_data)))),stderr);
269
- rb_iv_set(tmp, "@data", tmp_data);
270
490
 
271
491
  // If there is no trailing slash on the log dir, add one.
272
492
  const char *filepath = RSTRING_PTR(logdir);
@@ -274,7 +494,9 @@ static VALUE statsrb_split_write(VALUE self, VALUE logdir, VALUE mode) {
274
494
  if (filepath[len - 1] != '/') {
275
495
  logdir = rb_str_plus(logdir, rb_str_new2("/"));
276
496
  }
277
- statsrb_write(tmp, rb_str_plus(logdir, rb_ary_entry(ns_list, i)), mode);
497
+ filename = rb_str_new2(internal->ns_list[i].namespace);
498
+ statsrb_write(tmp, rb_str_plus(logdir, filename), mode);
499
+ statsrb_data_clear_events(tmp);
278
500
  }
279
501
 
280
502
  return self;
@@ -322,13 +544,6 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
322
544
  VALUE response = rb_ary_new();
323
545
  VALUE headers = rb_hash_new();
324
546
  VALUE body = rb_ary_new();
325
- VALUE statsrb_data = rb_iv_get(self, "@data");
326
- VALUE statsrb_hash = rb_hash_new();
327
-
328
- // @data hash key symbols.
329
- VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
330
- VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
331
- VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
332
547
 
333
548
  char *path = RSTRING_PTR(rb_hash_aref(env, rb_str_new2("PATH_INFO")));
334
549
 
@@ -344,7 +559,7 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
344
559
  const char *method_getu = "GET";
345
560
  const char *method_put = "put";
346
561
  const char *method_putu = "PUT";
347
- // Remove the leading /
562
+ // Remove the leading slash.
348
563
  path++;
349
564
  const char *method = strtok(path, "/\0");
350
565
  if (method && (strcmp(method, method_put) == 0 || strcmp(method, method_putu) == 0)) {
@@ -373,18 +588,16 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
373
588
  statsrb_v = atoi(RSTRING_PTR(statsrb_v_qs));
374
589
  }
375
590
 
376
- rb_hash_aset(statsrb_hash, statsrb_key_ts, INT2NUM(statsrb_ts));
377
- rb_hash_aset(statsrb_hash, statsrb_key_ns, statsrb_ns);
378
- rb_hash_aset(statsrb_hash, statsrb_key_v, INT2NUM(statsrb_v));
379
- rb_ary_push(statsrb_data, statsrb_hash);
591
+ statsrb_data_push_event(self, RSTRING_PTR(statsrb_ns), statsrb_ts, statsrb_v);
592
+
593
+ int data_length = NUM2INT(statsrb_length(self));
380
594
 
381
- int data_length = RARRAY_LEN(statsrb_data);
382
- rb_ary_push(body, rb_obj_as_string(INT2NUM(RARRAY_LEN(statsrb_data))));
595
+ rb_ary_push(body, rb_obj_as_string(INT2NUM(data_length)));
383
596
 
384
- if (data_length > NUM2INT(rb_iv_get(self, "@flush_count"))) {
597
+ if (data_length >= NUM2INT(rb_iv_get(self, "@flush_count"))) {
385
598
  statsrb_sort(self);
386
599
  statsrb_split_write(self, rb_iv_get(self, "@split_file_dir"), rb_str_new2("a+"));
387
- rb_ary_resize(statsrb_data, 0);
600
+ statsrb_data_clear_events(self);
388
601
  }
389
602
 
390
603
  rb_ary_push(body, statsrb_ns);
@@ -400,6 +613,7 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
400
613
  if (jsoncallback != Qnil) {
401
614
  rb_ary_push(body, rb_str_plus(jsoncallback, rb_str_new("(", 1)));
402
615
  }
616
+ // @TODO move this to a to_json method.
403
617
  char json_start[256];
404
618
  sprintf(json_start, "{\"%s\":[", statsrb_str_ns);
405
619
  rb_ary_push(body, rb_str_new2(json_start));
@@ -440,27 +654,30 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
440
654
  }
441
655
 
442
656
  // Create a new Statsrb object to query from.
443
- // @todo we probably need to assign a new array to @data to avoid messing up the pointers.
444
- VALUE tmp = rb_obj_dup(self);
445
- VALUE tmp_data = rb_ary_new();
446
- rb_iv_set(tmp, "@data", tmp_data);
657
+ VALUE klass = rb_obj_class(self);
658
+ VALUE tmp = rb_class_new_instance(0, NULL, klass);
659
+
447
660
  statsrb_read(tmp, rb_str_plus(rb_iv_get(self, "@split_file_dir"), statsrb_ns), statsrb_ns, INT2NUM(query_limit), INT2NUM(query_start), INT2NUM(query_end));
448
661
  statsrb_sort(tmp);
449
662
 
450
- int i, data_length = RARRAY_LEN(tmp_data);
663
+ int i, data_length = NUM2INT(statsrb_length(tmp));
664
+ StatsrbInternal *internal = statsrb_get_internal(tmp);
451
665
 
452
666
  for (i = 0; i < data_length; i++) {
453
667
  rb_ary_push(body, rb_str_new("[", 1));
454
- rb_ary_push(body, rb_obj_as_string(rb_hash_aref(rb_ary_entry(tmp_data, i), statsrb_key_ts )));
455
- rb_ary_push(body, rb_str_new(",", 1));
456
- rb_ary_push(body, rb_obj_as_string(rb_hash_aref(rb_ary_entry(tmp_data, i), statsrb_key_v )));
668
+ rb_ary_push(body, rb_obj_as_string(INT2NUM(internal->event_list[i].timestamp)));
669
+ rb_ary_push(body, rb_str_new(",\"", 2));
670
+ rb_ary_push(body, rb_str_new2(internal->ns_list[internal->event_list[i].ns_index].namespace));
671
+ rb_ary_push(body, rb_str_new("\",", 2));
672
+ rb_ary_push(body, rb_obj_as_string(INT2NUM(internal->event_list[i].value)));
457
673
  rb_ary_push(body, rb_str_new("]", 1));
674
+
458
675
  if (i < data_length - 1) {
459
676
  rb_ary_push(body, rb_str_new(",", 1));
460
677
  }
461
678
  rb_ary_push(body, rb_str_new("\n", 1));
462
679
  }
463
- rb_ary_resize(tmp_data, 0);
680
+ statsrb_data_clear_events(tmp);
464
681
  }
465
682
  rb_ary_push(body, rb_str_new("]}", 2));
466
683
  if (jsoncallback != Qnil) {
@@ -482,36 +699,27 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
482
699
  }
483
700
 
484
701
  /**
485
- * Pushes a stat onto the statsrb object.
486
- * @param timestamp [Number]
702
+ * Populates the internal storage with test data.
703
+ *
487
704
  * @param namespace [String]
488
- * @param value [Number]
489
- * @return [Statsrb] A reference to the object.
705
+ * @param amount [Numeric]
490
706
  */
491
- static VALUE statsrb_push(VALUE self, VALUE timestamp, VALUE namespace, VALUE value) {
492
- VALUE statsrb_data = rb_iv_get(self, "@data");
493
- VALUE statsrb_event = rb_hash_new();
494
-
495
- // @data hash key symbols.
496
- VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
497
- VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
498
- VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
499
-
500
- rb_hash_aset(statsrb_event, statsrb_key_ts, timestamp);
501
- rb_hash_aset(statsrb_event, statsrb_key_ns, namespace);
502
- rb_hash_aset(statsrb_event, statsrb_key_v, value);
503
-
504
- rb_ary_push(statsrb_data, statsrb_event);
505
-
506
- return self;
707
+ static void statsrb_load_test(VALUE self, VALUE ns, VALUE amt) {
708
+ StatsrbInternal *internal = statsrb_get_internal(self);
709
+ int i, val;
710
+ srand(time(NULL));
711
+ for (i = 0; i < NUM2INT(amt); i++) {
712
+ val = rand();
713
+ statsrb_data_push_event(self, RSTRING_PTR(ns), val + 100, val + 1);
714
+ }
715
+ statsrb_debug_print_internal(self);
716
+ fprintf(stdout, "Debug: count: %d\n", internal->event_count);
507
717
  }
508
718
 
509
719
  /**
510
720
  * Class constructor, sets up an instance variable.
511
721
  */
512
722
  static VALUE statsrb_constructor(VALUE self) {
513
- VALUE statsrb_data = rb_ary_new();
514
- rb_iv_set(self, "@data", statsrb_data);
515
723
  VALUE statsrb_split_file_dir = rb_str_new("/tmp", 4);
516
724
  rb_iv_set(self, "@split_file_dir", statsrb_split_file_dir);
517
725
  rb_iv_set(self, "@flush_count", INT2NUM(9));
@@ -535,21 +743,21 @@ void Init_statsrb(void) {
535
743
  VALUE klass = rb_define_class("Statsrb", rb_cObject);
536
744
 
537
745
  // Instance methods and properties.
746
+ rb_define_alloc_func(klass, statsrb_alloc_internal);
538
747
  rb_define_method(klass, "initialize", statsrb_constructor, 0);
539
748
  rb_define_method(klass, "query", statsrb_read, 5);
540
749
  rb_define_method(klass, "read", statsrb_read, 5);
541
750
  rb_define_method(klass, "get", statsrb_get, 4);
751
+ rb_define_method(klass, "load_test", statsrb_load_test, 2);
752
+ rb_define_method(klass, "length", statsrb_length, 0);
542
753
  rb_define_method(klass, "sort", statsrb_sort, 0);
543
754
  rb_define_method(klass, "write", statsrb_write, 2);
544
755
  rb_define_method(klass, "split_write", statsrb_split_write, 2);
545
756
  rb_define_method(klass, "push", statsrb_push, 3);
757
+ rb_define_method(klass, "clear", statsrb_data_clear_events, 0);
546
758
  rb_define_method(klass, "call", statsrb_rack_call, 1);
547
- // Define :attr_accessor (read/write instance var)
548
- // Note that this must correspond with a call to rb_iv_self() and it's string name must be @data.
549
- // An array of hashes keyed with :ts(timestamp), :ns(namespace) and :v(value) e.g. [!{:ts => Time.now.to_i, :ns => "test", :v => 33}]
550
- rb_define_attr(klass, "data", 1, 1);
551
759
  // The file directory to write when splitting namespaces. @see #split_write
552
760
  rb_define_attr(klass, "split_file_dir", 1, 1);
553
- // When used with a rack server, the max count of @data before flushing and writing to file.
761
+ // When used with a rack server, the max count of internal events.
554
762
  rb_define_attr(klass, "flush_count", 1, 1);
555
763
  }
data/test/test_statsrb.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  require 'minitest/autorun'
2
2
  require 'statsrb'
3
+ require 'json'
4
+ require 'pp'
3
5
 
4
6
  class TestStatsrb < MiniTest::Test
5
7
 
@@ -8,10 +10,14 @@ class TestStatsrb < MiniTest::Test
8
10
  def setup
9
11
  @s = Statsrb.new
10
12
  @tmpfile = "/tmp/test.statsrb"
13
+ @s.split_file_dir = "/tmp/"
14
+ @s.flush_count = 10
11
15
  end
12
16
 
13
17
  def teardown
14
18
  File.delete @tmpfile unless !File.exists? @tmpfile
19
+ rackfile = "/tmp/test"
20
+ File.delete rackfile unless !File.exists? rackfile
15
21
  end
16
22
 
17
23
  # Provides test data.
@@ -38,12 +44,14 @@ class TestStatsrb < MiniTest::Test
38
44
  # Tests that the data was indeed pushed.
39
45
  def test_push_data
40
46
  push_data
41
- assert_equal @s.data.length, get_data.length
47
+ assert_equal @s.length, get_data.length
42
48
  end
43
-
49
+
44
50
  # Tests that we can filter the in-memory data.
45
51
  def test_get_data
46
52
  push_data
53
+ t = @s.get "test1", 100, 0, 0
54
+ assert_equal(t.length, 3);
47
55
  t = @s.get "test2", 100, 0, 0
48
56
  assert_equal(t.length, 2);
49
57
  end
@@ -53,7 +61,8 @@ class TestStatsrb < MiniTest::Test
53
61
  current = 0
54
62
  push_data
55
63
  @s.sort
56
- @s.data.each do |value|
64
+ t = @s.get "test1", 100, 0, 0
65
+ t.each do |value|
57
66
  assert value[:ts] > current
58
67
  current = value[:ts]
59
68
  end
@@ -77,13 +86,86 @@ class TestStatsrb < MiniTest::Test
77
86
  end
78
87
  end
79
88
 
89
+ # Tests that we can clear data from memory.
90
+ def test_clear_data
91
+ push_data
92
+ assert_equal @s.length, get_data.length
93
+ @s.clear
94
+ assert_equal @s.length, 0
95
+ end
96
+
80
97
  # Tests that we can read data from a file.
81
98
  def test_read_data
82
99
  push_data
83
100
  write_data
101
+ @s.clear
84
102
  @s.read @tmpfile, "test1", 100, 0, 0
85
- assert_equal @s.data.length, 3
103
+ assert_equal @s.length, 3
104
+ @s.clear
86
105
  @s.read @tmpfile, "test2", 100, 0, 0
87
- assert_equal @s.data.length, 2
106
+ assert_equal @s.length, 2
107
+ end
108
+
109
+ # Tests that the rack interface works properly.
110
+ def test_rack_call
111
+ # Test putting data.
112
+ env = {
113
+ "PATH_INFO" => "/PUT",
114
+ "QUERY_STRING" => "name=test&value=13"
115
+ }
116
+
117
+ 5.times do |i|
118
+ @s.call(env);
119
+ end
120
+
121
+ assert_equal 5, @s.length
122
+
123
+ # Write enough data to flush.
124
+ 5.times do |i|
125
+ @s.call(env);
126
+ end
127
+
128
+ # Test getting data.
129
+ env = {
130
+ "PATH_INFO" => "/GET/test",
131
+ "QUERY_STRING" => ""
132
+ }
133
+
134
+ resp = @s.call(env)
135
+ data = JSON.parse(resp[2].join)
136
+ assert_equal data["test"].length, 10
137
+ end
138
+
139
+ def test_no_results
140
+ push_data
141
+ t = @s.get "noresults", 100, 0, 0
142
+ assert_equal(t.length, 0);
143
+ end
144
+
145
+ # Tests large data volumes.
146
+ def test_large_data
147
+ # Load a lot of data.
148
+ @s.load_test "kevin", 500000
149
+ @s.load_test "melissa", 500000
150
+ @s.load_test "benjamin", 500000
151
+ @s.sort
152
+ # Extract all of one namespace.
153
+ t = @s.get "melissa", 100000, 0, 0
154
+ assert_equal t.length, 100000
155
+ # Push them back to the object.
156
+ t.each do |i|
157
+ @s.push i[:ts], i[:ns], i[:v]
158
+ end
159
+ # Save it to file and clear it.
160
+ @s.write @tmpfile, "w+"
161
+ @s.clear
162
+ # Re-load the data.
163
+ @s.read @tmpfile, "melissa", 600000, 0, 0
164
+ # Try to get one that doesn't exist.
165
+ t = @s.get "kevin", 10000, 0, 0
166
+ assert_equal t.length, 0
167
+ # Try to get all of the data out.
168
+ t = @s.get "melissa", 600000, 0, 0
169
+ assert_equal t.length, 600000
88
170
  end
89
171
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,10 +9,10 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-06-08 00:00:00.000000000 Z
12
+ date: 2013-06-28 00:00:00.000000000 Z
13
13
  dependencies: []
14
- description: A Ruby time series stats repository using flat file storage, providing
15
- a Ruby API as well as a Rack compatible REST API.
14
+ description: A Ruby time series stats repository written in C, using flat file storage,
15
+ providing a Ruby API as well as a Rack compatible REST API.
16
16
  email: email@kevinhankens.com
17
17
  executables: []
18
18
  extensions: