statsrb 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/ext/statsrb/statsrb.c +408 -200
  2. data/test/test_statsrb.rb +87 -5
  3. metadata +4 -4
@@ -3,54 +3,358 @@
3
3
  #include <string.h>
4
4
  #include <stdlib.h>
5
5
 
6
+ /**
7
+ * Keeps track of a single event.
8
+ */
9
+ typedef struct {
10
+ int ns_index;
11
+ int timestamp;
12
+ int value;
13
+ } StatsrbEvent;
14
+
15
+ /**
16
+ * Keeps track of available namespaces.
17
+ */
18
+ typedef struct {
19
+ char *namespace[256];
20
+ } StatsrbNS;
21
+
22
+ /**
23
+ * Keeps track of internal storage.
24
+ */
25
+ typedef struct {
26
+ StatsrbEvent *event_list;
27
+ int event_count;
28
+ int event_memory;
29
+ StatsrbNS *ns_list;
30
+ int ns_count;
31
+ int ns_memory;
32
+ } StatsrbInternal;
33
+
34
+ /**
35
+ * Internal: retreives the internal storage.
36
+ */
37
+ static StatsrbInternal* statsrb_get_internal(VALUE self) {
38
+ StatsrbInternal *internal;
39
+ Data_Get_Struct(self, StatsrbInternal, internal);
40
+
41
+ return internal;
42
+ }
43
+
44
+ /**
45
+ * Internal: allocates internal storage.
46
+ */
47
+ static VALUE statsrb_alloc_internal(VALUE self) {
48
+ // Allocate internal memory for the StatsrbEvent structs.
49
+ StatsrbEvent *eventlist = (StatsrbEvent *)calloc(1, sizeof(StatsrbEvent));
50
+
51
+ // Allocate memory for the list of namespaces.
52
+ StatsrbNS *nslist = (StatsrbNS *)calloc(1, sizeof(StatsrbNS));
53
+
54
+ // Allocate memory for the pointer storage;
55
+ StatsrbInternal *internalptr = (StatsrbInternal *)calloc(1, sizeof(StatsrbInternal));
56
+ internalptr->event_list = eventlist;
57
+ internalptr->event_count = 0;
58
+ internalptr->event_memory = 0;
59
+ internalptr->ns_list = nslist;
60
+ internalptr->ns_count = 0;
61
+ internalptr->ns_memory = 0;
62
+ return Data_Wrap_Struct(self, 0, free, internalptr);
63
+ }
64
+
65
+ /**
66
+ * Clears out the internal memory.
67
+ * @return [void]
68
+ */
69
+ static void statsrb_data_clear_events(VALUE self) {
70
+ StatsrbInternal *internal = statsrb_get_internal(self);
71
+
72
+ // Allocate internal memory for the StatsrbEvent structs.
73
+ StatsrbEvent *event_success = (StatsrbEvent *)realloc(internal->event_list, sizeof(StatsrbEvent));
74
+
75
+ // Allocate memory for the list of namespaces.
76
+ StatsrbNS *ns_success = (StatsrbNS *)realloc(internal->ns_list, sizeof(StatsrbNS));
77
+
78
+ // Allocate memory for the pointer storage;
79
+ if (event_success && ns_success) {
80
+ StatsrbEvent *event_list = event_success;
81
+ event_success = NULL;
82
+ StatsrbNS *ns_list = ns_success;
83
+ ns_success = NULL;
84
+ internal->event_list = event_list;
85
+ internal->event_count = 0;
86
+ internal->event_memory = 0;
87
+ internal->ns_list = ns_list;
88
+ internal->ns_count = 0;
89
+ internal->ns_memory = 0;
90
+ }
91
+ else {
92
+ fprintf(stderr, "Error deallocating memory");
93
+ return;
94
+ }
95
+ }
96
+
97
+
98
+ /**
99
+ * Returns the length of the internal storage.
100
+ * @return [Numeric] The count of items in the internal storage.
101
+ */
102
+ static VALUE statsrb_length(VALUE self) {
103
+ StatsrbInternal *internal = statsrb_get_internal(self);
104
+ if (!internal->event_count) {
105
+ internal->event_count = 0;
106
+ }
107
+
108
+ return INT2NUM(internal->event_count);
109
+ }
110
+
111
+ /**
112
+ * Debugging function.
113
+ */
114
+ static void statsrb_debug_print_internal(VALUE self) {
115
+ StatsrbInternal *internal = statsrb_get_internal(self);
116
+ int i;
117
+
118
+ //for (i = 0; i < internal->event_count; i++) {
119
+ //fprintf(stdout, "Debug: ns: %s; ts: %d; v: %d\n", internal->ns_list[internal->event_list[i].ns_index].namespace, internal->event_list[i].timestamp, internal->event_list[i].value);
120
+ //}
121
+ fprintf(stdout, "Debug: count: %d memory: %d\n", internal->event_count, internal->event_memory);
122
+ }
123
+
124
+
125
+ /**
126
+ * Implementation of quicksort algorithm.
127
+ */
128
+ void time_sort(int left, int right, StatsrbEvent * event_list) {
129
+ int i = left;
130
+ int j = right;
131
+ int p = (i + j) / 2;
132
+ int pv = event_list[p].timestamp;
133
+ StatsrbEvent * tmp = (StatsrbEvent *)malloc(sizeof(StatsrbEvent));
134
+
135
+ while (i <= j) {
136
+ while (event_list[i].timestamp < pv) {
137
+ i++;
138
+ }
139
+ while (event_list[j].timestamp > pv) {
140
+ j--;
141
+ }
142
+ if (i <= j) {
143
+ memcpy(tmp, &event_list[i], sizeof(StatsrbEvent));
144
+ memcpy(&event_list[i], &event_list[j], sizeof(StatsrbEvent));
145
+ memcpy(&event_list[j], tmp, sizeof(StatsrbEvent));
146
+ i++;
147
+ j--;
148
+ }
149
+ }
150
+
151
+ free(tmp);
152
+
153
+ if (left < j) {
154
+ time_sort(left, j, event_list);
155
+ }
156
+ if (i < right) {
157
+ time_sort(i, right, event_list);
158
+ }
159
+ }
160
+
161
+ /**
162
+ * Sorts internal data using a quicksort algorithm based on the hash element's timestamp.
163
+ * @return [Statsrb] A reference to the object.
164
+ */
165
+ static VALUE statsrb_sort(VALUE self) {
166
+ StatsrbInternal *internal = statsrb_get_internal(self);
167
+ if (internal->event_count > 0) {
168
+ time_sort(0, internal->event_count - 1, internal->event_list);
169
+ }
170
+
171
+ return self;
172
+ }
173
+
174
+ /**
175
+ * Internal: pushes a namespace onto the internal storage or retrieves a
176
+ * preexisting one.
177
+ *
178
+ * @param VALUE self
179
+ * @param const char *namespace
180
+ *
181
+ * @return Integer
182
+ * The pointer index of the namespace in @nslist.
183
+ */
184
+ static int statsrb_data_push_ns(VALUE self, const char *namespace) {
185
+ int i;
186
+ StatsrbInternal *internal = statsrb_get_internal(self);
187
+
188
+ for (i = 0; i < internal->ns_count; i++) {
189
+ if (strcmp(internal->ns_list[i].namespace, namespace) == 0) {
190
+ return i;
191
+ }
192
+ }
193
+
194
+ int memory = (internal->ns_count + 1) * sizeof(StatsrbNS);
195
+ StatsrbNS *success = (StatsrbNS *)realloc(internal->ns_list, memory);
196
+
197
+ if (success) {
198
+ internal->ns_list = success;
199
+ success = NULL;
200
+ strcpy(internal->ns_list[internal->ns_count].namespace, namespace);
201
+ internal->ns_count++;
202
+ return internal->ns_count - 1;
203
+ }
204
+ else {
205
+ fprintf(stderr, "Error allocating memory");
206
+ }
207
+
208
+ }
209
+
210
+ /**
211
+ * Internal: pushes a data event onto the internal storage.
212
+ *
213
+ * @param VALUE self
214
+ * @param const char *namespace
215
+ * @param int timestamp
216
+ * @param int value
217
+ */
218
+ static void statsrb_data_push_event(VALUE self, const char *namespace, int timestamp, int value) {
219
+ StatsrbInternal *internal = statsrb_get_internal(self);
220
+
221
+ // Get the index of the namespace pointer.
222
+ int ns_index = statsrb_data_push_ns(self, namespace);
223
+
224
+ // If it appears that we are approaching the end of the memory block, allocate
225
+ // some more.
226
+ // @TODO 2x memory is a little nuts, maybe throttle this back a bit?
227
+ if ((sizeof(StatsrbEvent) * internal->event_count) > (internal->event_memory * .9)) {
228
+ internal->event_memory = (2* internal->event_count) * sizeof(StatsrbEvent);
229
+ StatsrbEvent *success = (StatsrbEvent *)realloc(internal->event_list, internal->event_memory);
230
+ if (success) {
231
+ internal->event_list = success;
232
+ success = NULL;
233
+ }
234
+ else {
235
+ fprintf(stderr, "Error allocating memory");
236
+ return;
237
+ }
238
+ }
239
+
240
+ // Set the values;
241
+ internal->event_list[internal->event_count].timestamp = timestamp;
242
+ internal->event_list[internal->event_count].ns_index = ns_index;
243
+ internal->event_list[internal->event_count].value = value;
244
+
245
+ // Track the count by saving the new pointer.
246
+ internal->event_count++;
247
+ }
248
+
249
+ /**
250
+ * Creates a ruby hash from event VALUEs.
251
+ *
252
+ * @param VALUE self
253
+ * @param VALUE ts
254
+ * @param VALUE ns
255
+ * @param VALUE v
256
+ */
257
+ VALUE statsrb_create_rb_event_hash(VALUE self, VALUE ts, VALUE ns, VALUE v) {
258
+ VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
259
+ VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
260
+ VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
261
+
262
+ VALUE statsrb_event = rb_hash_new();
263
+ rb_hash_aset(statsrb_event, statsrb_key_ts, ts);
264
+ rb_hash_aset(statsrb_event, statsrb_key_ns, ns);
265
+ rb_hash_aset(statsrb_event, statsrb_key_v, v);
266
+
267
+ return statsrb_event;
268
+ }
269
+
270
+ /**
271
+ * Pushes a stat onto the statsrb object.
272
+ * @param timestamp [Number]
273
+ * @param namespace [String]
274
+ * @param value [Number]
275
+ * @return [Statsrb] A reference to the object.
276
+ */
277
+ static VALUE statsrb_push(VALUE self, VALUE timestamp, VALUE namespace, VALUE value) {
278
+ int ts = NUM2INT(timestamp);
279
+ int v = NUM2INT(value);
280
+ const char *ns = RSTRING_PTR(namespace);
281
+ statsrb_data_push_event(self, ns, ts, v);
282
+ return self;
283
+ }
284
+
6
285
  /**
7
286
  * Retrieves internal data based on specified filters.
8
287
  * @param namespace [String]
9
288
  * @param limit [Number]
10
289
  * @param start_time [Number]
11
290
  * @param end_time [Number]
12
- * @return [Array] An array of data hashes.
291
+ * @return [Array] An array of data event hashes.
13
292
  */
14
293
  static VALUE statsrb_get(VALUE self, VALUE query_ns, VALUE query_limit, VALUE query_start, VALUE query_end) {
15
- VALUE statsrb_data = rb_iv_get(self, "@data");
16
- VALUE statsrb_event = rb_hash_new();
17
- int data_length = RARRAY_LEN(statsrb_data);
18
- int i = 0;
19
- int count = 0;
20
- int tmp_ts;
294
+ // @TODO maybe it would be sane to make a new statsrb object and then just have
295
+ // methods to dump everything to ary, json, etc.
296
+ StatsrbInternal *internal = statsrb_get_internal(self);
297
+ int tmp_ts, tmp_v, tmp_i;
21
298
 
22
299
  VALUE filtered_data = rb_ary_new();
23
- VALUE tmp_ns;
300
+ VALUE rb_ns_list = rb_ary_new();
301
+ VALUE statsrb_event;
24
302
 
25
- // @data hash key symbols.
26
- VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
27
- VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
28
- VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
303
+ int i = 0;
304
+ int filtered_count = 0;
29
305
 
30
- // Convert into an int that ruby understands.
31
306
  int limit = NUM2INT(query_limit);
32
307
  int qstart = NUM2INT(query_start);
33
308
  int qend = NUM2INT(query_end);
34
309
 
35
- for (i = 0; i < data_length; i++) {
36
- tmp_ts = NUM2INT(rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ts));
37
- tmp_ns = rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ns);
38
- if (rb_str_equal(query_ns, tmp_ns)
39
- && (qstart == 0 || tmp_ts >= qstart)
40
- && (qend == 0 || tmp_ts <= qend)) {
41
- rb_hash_aset(statsrb_event, statsrb_key_ts, rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ts));
42
- rb_hash_aset(statsrb_event, statsrb_key_ns, rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ns));
43
- rb_hash_aset(statsrb_event, statsrb_key_v, rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_v));
310
+ VALUE rb_ns;
311
+
312
+ // Create rb strings for the namespaces.
313
+ signed int found = -1;
314
+ for (i = 0; i < internal->ns_count; i++) {
315
+ rb_hash_aset(rb_ns_list, INT2NUM(i), rb_str_new2(internal->ns_list[i].namespace));
316
+ if (strcmp(RSTRING_PTR(query_ns), RSTRING_PTR(rb_hash_aref(rb_ns_list, INT2NUM(i)))) == 0) {
317
+ memcpy(&found, &i, sizeof(int));
318
+ }
319
+ }
320
+
321
+ // Return right away if the namespace doesn't exist.
322
+ if (found == -1) {
323
+ rb_ary_resize(filtered_data, (long) 0);
324
+ return filtered_data;
325
+ }
326
+
327
+ // Iterate through the in-memory data to find matches.
328
+ for (i = 0; i < internal->event_count; i++) {
329
+ if (found == internal->event_list[i].ns_index
330
+ && (qstart == 0 || internal->event_list[i].timestamp >= qstart)
331
+ && (qend == 0 || internal->event_list[i].timestamp <= qend)) {
332
+
333
+ memcpy(&tmp_ts, &internal->event_list[i].timestamp, sizeof(int));
334
+ memcpy(&tmp_v, &internal->event_list[i].value, sizeof(int));
335
+
336
+ statsrb_event = statsrb_create_rb_event_hash(
337
+ self,
338
+ INT2NUM(tmp_ts),
339
+ rb_hash_aref(rb_ns_list, INT2NUM(found)),
340
+ INT2NUM(tmp_v)
341
+ );
342
+
44
343
  rb_ary_push(filtered_data, statsrb_event);
45
- count++;
344
+ filtered_count++;
345
+ }
346
+
347
+ if (limit > 0 && filtered_count == limit) {
348
+ break;
46
349
  }
47
350
  }
48
351
 
352
+ rb_ary_resize(filtered_data, filtered_count);
49
353
  return filtered_data;
50
354
  }
51
355
 
52
356
  /**
53
- * Locates data from a specified file and loads into @data.
357
+ * Locates data from a specified file and loads into internal memory.
54
358
  * @param filepath [String]
55
359
  * @param namespace [String]
56
360
  * @param limit [Number]
@@ -60,28 +364,18 @@ static VALUE statsrb_get(VALUE self, VALUE query_ns, VALUE query_limit, VALUE qu
60
364
  */
61
365
  static VALUE statsrb_read(VALUE self, VALUE logfile, VALUE query_ns, VALUE query_limit, VALUE query_start, VALUE query_end) {
62
366
  FILE * file;
63
- int line_size = 256;
367
+ int line_size = 512;
64
368
  char *line = (char *) malloc(line_size);
369
+ char *tmp_ns = (char *) malloc(256);
65
370
  const char *filepath = RSTRING_PTR(logfile);
66
371
  const char *query_ns_char = RSTRING_PTR(query_ns);
67
-
68
- // @data hash key symbols.
69
- VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
70
- VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
71
- VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
72
- // Create an empty string for comparison.
73
- VALUE statsrb_str_empty = rb_str_new2("");
372
+ int tmp_v, tmp_ts;
74
373
 
75
374
  // Convert into an int that ruby understands.
76
375
  int limit = NUM2INT(query_limit);
77
376
  int qstart = NUM2INT(query_start);
78
377
  int qend = NUM2INT(query_end);
79
378
 
80
- // Return array instantiation.
81
- VALUE statsrb_data = rb_iv_get(self, "@data");
82
- // @TODO does this garbage collect all of the old hash data?
83
- rb_ary_resize(statsrb_data, 0);
84
-
85
379
  file = fopen(filepath, "r");
86
380
  if (file == NULL) {
87
381
  fprintf(stderr, "File error: could not open file %s for reading.", filepath);
@@ -98,29 +392,28 @@ static VALUE statsrb_read(VALUE self, VALUE logfile, VALUE query_ns, VALUE query
98
392
 
99
393
  // If the namespace is in the row, explode it.
100
394
  if (line[0] != '\0' && line[0] != '\n' && strchr(line, query_ns_char[0]) && strstr(line, query_ns_char)) {
101
- VALUE statsrb_event = rb_hash_new();
395
+ //VALUE statsrb_event = rb_hash_new();
102
396
 
103
397
  // I tried sscanf for convenience, but it was predictably slower.
104
398
  //int statsrb_ts, statsrb_v;
105
399
  //sscanf(line, "%d\t%*s\t%d", &statsrb_ts, &statsrb_v);
106
400
 
107
401
  // @TODO this should something more robust than atoi.
108
- int statsrb_ts = atoi(strtok(line, "\t"));
402
+ tmp_ts = atoi(strtok(line, "\t"));
109
403
 
110
- if (statsrb_ts != NULL && (qstart == 0 || statsrb_ts >= qstart) && (qend == 0 || statsrb_ts <= qend)) {
404
+ if (tmp_ts != NULL && (qstart == 0 || tmp_ts >= qstart) && (qend == 0 || tmp_ts <= qend)) {
111
405
  // @TODO this should probably use the actual namespace if we do wildcard queries.
112
- VALUE statsrb_str_ns = rb_str_new2(strtok(NULL, "\t"));
406
+ strcpy(tmp_ns, strtok(NULL, "\t"));
113
407
  //strtok(NULL, "\t");
114
- int statsrb_v = atoi(strtok(NULL, "\0"));
408
+ tmp_v = atoi(strtok(NULL, "\0"));
115
409
 
116
410
  // @TODO this should really query the namespace exactly instead of just relying on strstr.
117
411
  //if (rb_str_cmp(query_ns, statsrb_str_empty) == 0 || rb_str_cmp(query_ns, statsrb_str_ns) == 0) {
118
- if (statsrb_ts && (statsrb_v || statsrb_v == 0)) {
119
- rb_hash_aset(statsrb_event, statsrb_key_ts, INT2NUM(statsrb_ts));
120
- rb_hash_aset(statsrb_event, statsrb_key_ns, statsrb_str_ns);
121
- //rb_hash_aset(statsrb_event, statsrb_key_ns, query_ns);
122
- rb_hash_aset(statsrb_event, statsrb_key_v, INT2NUM(statsrb_v));
123
- rb_ary_push(statsrb_data, statsrb_event);
412
+ if (tmp_ts && (tmp_v || tmp_v == 0)) {
413
+ statsrb_data_push_event(self,
414
+ tmp_ns,
415
+ tmp_ts,
416
+ tmp_v);
124
417
  count++;
125
418
  }
126
419
  }
@@ -130,63 +423,14 @@ static VALUE statsrb_read(VALUE self, VALUE logfile, VALUE query_ns, VALUE query
130
423
  // terminate
131
424
  fclose (file);
132
425
  free (line);
133
-
134
- //return statsrb_data;
135
- //rb_iv_set(self, "@data", statsrb_data);
426
+ free (tmp_ns);
136
427
 
137
428
  return self;
138
429
  }
139
430
 
140
- /**
141
- * Implementation of quicksort algorithm.
142
- */
143
- void time_sort(int left, int right, VALUE ary, VALUE statsrb_key_ts) {
144
- int i = left;
145
- int j = right;
146
- int p = (i + j) / 2;
147
- int pv = NUM2INT(rb_hash_aref(rb_ary_entry(ary, p), statsrb_key_ts));
148
- VALUE tmp;
149
-
150
- while (i <= j) {
151
- while (NUM2INT(rb_hash_aref(rb_ary_entry(ary, i), statsrb_key_ts)) < pv) {
152
- i++;
153
- }
154
- while (NUM2INT(rb_hash_aref(rb_ary_entry(ary, j), statsrb_key_ts)) > pv) {
155
- j--;
156
- }
157
- if (i <= j) {
158
- tmp = rb_ary_entry(ary, i);
159
- rb_ary_store(ary, i, rb_ary_entry(ary, j));
160
- rb_ary_store(ary, j, tmp);
161
- i++;
162
- j--;
163
- }
164
- }
165
-
166
- if (left < j) {
167
- time_sort(left, j, ary, statsrb_key_ts);
168
- }
169
- if (i < right) {
170
- time_sort(i, right, ary, statsrb_key_ts);
171
- }
172
- }
173
-
174
- /**
175
- * Sorts @data using a quicksort algorithm based on the hash element's timestamp.
176
- * @return [Hash] The sorted data
177
- */
178
- static VALUE statsrb_sort(VALUE self) {
179
- VALUE statsrb_data = rb_iv_get(self, "@data");
180
- int len = RARRAY_LEN(statsrb_data);
181
- if (len > 0) {
182
- VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
183
- time_sort(0, len - 1, statsrb_data, statsrb_key_ts);
184
- }
185
- return statsrb_data;
186
- }
187
431
 
188
432
  /**
189
- * Writes the @data in memory to a specified file.
433
+ * Writes the in memory data to a specified file.
190
434
  * @param filepath [String]
191
435
  * @param filemode [String]
192
436
  * @return [Statsrb] A reference to the object.
@@ -195,17 +439,9 @@ static VALUE statsrb_write(VALUE self, VALUE logfile, VALUE mode) {
195
439
  FILE * file;
196
440
  const char *filepath = RSTRING_PTR(logfile);
197
441
  const char *filemode = RSTRING_PTR(mode);
198
- VALUE statsrb_data = rb_iv_get(self, "@data");
199
- int data_length = RARRAY_LEN(statsrb_data);
200
- int i;
201
- int line_size = 256;
202
- int tmp_ts, tmp_v;
203
- const char *tmp_ns = (char *) malloc(line_size);
204
442
 
205
- // @data hash key symbols.
206
- VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
207
- VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
208
- VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
443
+ StatsrbInternal *internal = statsrb_get_internal(self);
444
+ int i;
209
445
 
210
446
  file = fopen(filepath, filemode);
211
447
  if (file==NULL) {
@@ -213,15 +449,15 @@ static VALUE statsrb_write(VALUE self, VALUE logfile, VALUE mode) {
213
449
  return self;
214
450
  }
215
451
 
216
- // Iterate through the data array, writing the data as we go.
217
- for (i = 0; i < data_length; i++) {
452
+ // Iterate through the internal data, writing as we go.
453
+ for (i = 0; i < internal->event_count; i++) {
218
454
  // @TODO make sure that these values are not empty before writing.
219
- //VALUE tmp_line = rb_str_tmp_new(line_size);
220
- tmp_ts = NUM2INT(rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ts));
221
- tmp_ns = RSTRING_PTR(rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ns));
222
- tmp_v = NUM2INT(rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_v));
223
- fprintf(file, "%d\t%s\t%d\n", tmp_ts, tmp_ns, tmp_v);
224
- //rb_str_free(tmp_line);
455
+ fprintf(file,
456
+ "%d\t%s\t%d\n",
457
+ internal->event_list[i].timestamp,
458
+ internal->ns_list[internal->event_list[i].ns_index].namespace,
459
+ internal->event_list[i].value
460
+ );
225
461
  }
226
462
 
227
463
  fclose (file);
@@ -229,44 +465,28 @@ static VALUE statsrb_write(VALUE self, VALUE logfile, VALUE mode) {
229
465
  }
230
466
 
231
467
  /**
232
- * Locates data from a specified file and loads into @data.
468
+ * Writes the in memory data to a separate files based on namespace.
233
469
  * @param filepath [String]
234
- * @param namespace [String]
235
- * @param limit [Number]
236
- * @param start_time [Number]
237
- * @param end_time [Number]
470
+ * @param filemode [String]
238
471
  * @return [Statsrb] A reference to the object.
239
472
  */
240
473
  static VALUE statsrb_split_write(VALUE self, VALUE logdir, VALUE mode) {
241
- VALUE statsrb_data = rb_iv_get(self, "@data");
242
- int len = RARRAY_LEN(statsrb_data);
474
+ StatsrbInternal *internal = statsrb_get_internal(self);
243
475
  int i, ii, ns_len;
244
476
 
245
- // @data hash key symbols.
246
- VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
247
- VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
248
- VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
249
-
250
- VALUE ns_list = rb_ary_new();
251
-
252
- for (i = 0; i < len; i++) {
253
- if (!rb_ary_includes(ns_list, rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ns))) {
254
- rb_ary_push(ns_list, rb_hash_aref(rb_ary_entry(statsrb_data, i), statsrb_key_ns));
255
- }
256
- }
257
-
258
- ns_len = RARRAY_LEN(ns_list);
259
-
260
- for (i = 0; i < ns_len; i++) {
261
- VALUE tmp = rb_obj_dup(self);
262
- VALUE tmp_data = rb_ary_new();
263
- for (ii = 0; ii < len; ii++) {
264
- if (rb_str_cmp(rb_ary_entry(ns_list, i), rb_hash_aref(rb_ary_entry(statsrb_data, ii), statsrb_key_ns)) == 0) {
265
- rb_ary_push(tmp_data, rb_ary_entry(statsrb_data, ii));
477
+ VALUE filename;
478
+ VALUE klass = rb_obj_class(self);
479
+ VALUE tmp = rb_class_new_instance(0, NULL, klass);
480
+
481
+ for (i = 0; i < internal->ns_count; i++) {
482
+ for (ii = 0; ii < internal->event_count; ii++) {
483
+ if (strcmp(internal->ns_list[i].namespace, internal->ns_list[internal->event_list[ii].ns_index].namespace) == 0) {
484
+ statsrb_data_push_event(tmp,
485
+ internal->ns_list[internal->event_list[ii].ns_index].namespace,
486
+ internal->event_list[ii].timestamp,
487
+ internal->event_list[ii].value);
266
488
  }
267
489
  }
268
- //fputs (RSTRING_PTR(rb_obj_as_string(INT2NUM(RARRAY_LEN(tmp_data)))),stderr);
269
- rb_iv_set(tmp, "@data", tmp_data);
270
490
 
271
491
  // If there is no trailing slash on the log dir, add one.
272
492
  const char *filepath = RSTRING_PTR(logdir);
@@ -274,7 +494,9 @@ static VALUE statsrb_split_write(VALUE self, VALUE logdir, VALUE mode) {
274
494
  if (filepath[len - 1] != '/') {
275
495
  logdir = rb_str_plus(logdir, rb_str_new2("/"));
276
496
  }
277
- statsrb_write(tmp, rb_str_plus(logdir, rb_ary_entry(ns_list, i)), mode);
497
+ filename = rb_str_new2(internal->ns_list[i].namespace);
498
+ statsrb_write(tmp, rb_str_plus(logdir, filename), mode);
499
+ statsrb_data_clear_events(tmp);
278
500
  }
279
501
 
280
502
  return self;
@@ -322,13 +544,6 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
322
544
  VALUE response = rb_ary_new();
323
545
  VALUE headers = rb_hash_new();
324
546
  VALUE body = rb_ary_new();
325
- VALUE statsrb_data = rb_iv_get(self, "@data");
326
- VALUE statsrb_hash = rb_hash_new();
327
-
328
- // @data hash key symbols.
329
- VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
330
- VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
331
- VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
332
547
 
333
548
  char *path = RSTRING_PTR(rb_hash_aref(env, rb_str_new2("PATH_INFO")));
334
549
 
@@ -344,7 +559,7 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
344
559
  const char *method_getu = "GET";
345
560
  const char *method_put = "put";
346
561
  const char *method_putu = "PUT";
347
- // Remove the leading /
562
+ // Remove the leading slash.
348
563
  path++;
349
564
  const char *method = strtok(path, "/\0");
350
565
  if (method && (strcmp(method, method_put) == 0 || strcmp(method, method_putu) == 0)) {
@@ -373,18 +588,16 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
373
588
  statsrb_v = atoi(RSTRING_PTR(statsrb_v_qs));
374
589
  }
375
590
 
376
- rb_hash_aset(statsrb_hash, statsrb_key_ts, INT2NUM(statsrb_ts));
377
- rb_hash_aset(statsrb_hash, statsrb_key_ns, statsrb_ns);
378
- rb_hash_aset(statsrb_hash, statsrb_key_v, INT2NUM(statsrb_v));
379
- rb_ary_push(statsrb_data, statsrb_hash);
591
+ statsrb_data_push_event(self, RSTRING_PTR(statsrb_ns), statsrb_ts, statsrb_v);
592
+
593
+ int data_length = NUM2INT(statsrb_length(self));
380
594
 
381
- int data_length = RARRAY_LEN(statsrb_data);
382
- rb_ary_push(body, rb_obj_as_string(INT2NUM(RARRAY_LEN(statsrb_data))));
595
+ rb_ary_push(body, rb_obj_as_string(INT2NUM(data_length)));
383
596
 
384
- if (data_length > NUM2INT(rb_iv_get(self, "@flush_count"))) {
597
+ if (data_length >= NUM2INT(rb_iv_get(self, "@flush_count"))) {
385
598
  statsrb_sort(self);
386
599
  statsrb_split_write(self, rb_iv_get(self, "@split_file_dir"), rb_str_new2("a+"));
387
- rb_ary_resize(statsrb_data, 0);
600
+ statsrb_data_clear_events(self);
388
601
  }
389
602
 
390
603
  rb_ary_push(body, statsrb_ns);
@@ -400,6 +613,7 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
400
613
  if (jsoncallback != Qnil) {
401
614
  rb_ary_push(body, rb_str_plus(jsoncallback, rb_str_new("(", 1)));
402
615
  }
616
+ // @TODO move this to a to_json method.
403
617
  char json_start[256];
404
618
  sprintf(json_start, "{\"%s\":[", statsrb_str_ns);
405
619
  rb_ary_push(body, rb_str_new2(json_start));
@@ -440,27 +654,30 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
440
654
  }
441
655
 
442
656
  // Create a new Statsrb object to query from.
443
- // @todo we probably need to assign a new array to @data to avoid messing up the pointers.
444
- VALUE tmp = rb_obj_dup(self);
445
- VALUE tmp_data = rb_ary_new();
446
- rb_iv_set(tmp, "@data", tmp_data);
657
+ VALUE klass = rb_obj_class(self);
658
+ VALUE tmp = rb_class_new_instance(0, NULL, klass);
659
+
447
660
  statsrb_read(tmp, rb_str_plus(rb_iv_get(self, "@split_file_dir"), statsrb_ns), statsrb_ns, INT2NUM(query_limit), INT2NUM(query_start), INT2NUM(query_end));
448
661
  statsrb_sort(tmp);
449
662
 
450
- int i, data_length = RARRAY_LEN(tmp_data);
663
+ int i, data_length = NUM2INT(statsrb_length(tmp));
664
+ StatsrbInternal *internal = statsrb_get_internal(tmp);
451
665
 
452
666
  for (i = 0; i < data_length; i++) {
453
667
  rb_ary_push(body, rb_str_new("[", 1));
454
- rb_ary_push(body, rb_obj_as_string(rb_hash_aref(rb_ary_entry(tmp_data, i), statsrb_key_ts )));
455
- rb_ary_push(body, rb_str_new(",", 1));
456
- rb_ary_push(body, rb_obj_as_string(rb_hash_aref(rb_ary_entry(tmp_data, i), statsrb_key_v )));
668
+ rb_ary_push(body, rb_obj_as_string(INT2NUM(internal->event_list[i].timestamp)));
669
+ rb_ary_push(body, rb_str_new(",\"", 2));
670
+ rb_ary_push(body, rb_str_new2(internal->ns_list[internal->event_list[i].ns_index].namespace));
671
+ rb_ary_push(body, rb_str_new("\",", 2));
672
+ rb_ary_push(body, rb_obj_as_string(INT2NUM(internal->event_list[i].value)));
457
673
  rb_ary_push(body, rb_str_new("]", 1));
674
+
458
675
  if (i < data_length - 1) {
459
676
  rb_ary_push(body, rb_str_new(",", 1));
460
677
  }
461
678
  rb_ary_push(body, rb_str_new("\n", 1));
462
679
  }
463
- rb_ary_resize(tmp_data, 0);
680
+ statsrb_data_clear_events(tmp);
464
681
  }
465
682
  rb_ary_push(body, rb_str_new("]}", 2));
466
683
  if (jsoncallback != Qnil) {
@@ -482,36 +699,27 @@ static VALUE statsrb_rack_call(VALUE self, VALUE env) {
482
699
  }
483
700
 
484
701
  /**
485
- * Pushes a stat onto the statsrb object.
486
- * @param timestamp [Number]
702
+ * Populates the internal storage with test data.
703
+ *
487
704
  * @param namespace [String]
488
- * @param value [Number]
489
- * @return [Statsrb] A reference to the object.
705
+ * @param amount [Numeric]
490
706
  */
491
- static VALUE statsrb_push(VALUE self, VALUE timestamp, VALUE namespace, VALUE value) {
492
- VALUE statsrb_data = rb_iv_get(self, "@data");
493
- VALUE statsrb_event = rb_hash_new();
494
-
495
- // @data hash key symbols.
496
- VALUE statsrb_key_ts = rb_iv_get(self, "@key_ts");
497
- VALUE statsrb_key_ns = rb_iv_get(self, "@key_ns");
498
- VALUE statsrb_key_v = rb_iv_get(self, "@key_v");
499
-
500
- rb_hash_aset(statsrb_event, statsrb_key_ts, timestamp);
501
- rb_hash_aset(statsrb_event, statsrb_key_ns, namespace);
502
- rb_hash_aset(statsrb_event, statsrb_key_v, value);
503
-
504
- rb_ary_push(statsrb_data, statsrb_event);
505
-
506
- return self;
707
+ static void statsrb_load_test(VALUE self, VALUE ns, VALUE amt) {
708
+ StatsrbInternal *internal = statsrb_get_internal(self);
709
+ int i, val;
710
+ srand(time(NULL));
711
+ for (i = 0; i < NUM2INT(amt); i++) {
712
+ val = rand();
713
+ statsrb_data_push_event(self, RSTRING_PTR(ns), val + 100, val + 1);
714
+ }
715
+ statsrb_debug_print_internal(self);
716
+ fprintf(stdout, "Debug: count: %d\n", internal->event_count);
507
717
  }
508
718
 
509
719
  /**
510
720
  * Class constructor, sets up an instance variable.
511
721
  */
512
722
  static VALUE statsrb_constructor(VALUE self) {
513
- VALUE statsrb_data = rb_ary_new();
514
- rb_iv_set(self, "@data", statsrb_data);
515
723
  VALUE statsrb_split_file_dir = rb_str_new("/tmp", 4);
516
724
  rb_iv_set(self, "@split_file_dir", statsrb_split_file_dir);
517
725
  rb_iv_set(self, "@flush_count", INT2NUM(9));
@@ -535,21 +743,21 @@ void Init_statsrb(void) {
535
743
  VALUE klass = rb_define_class("Statsrb", rb_cObject);
536
744
 
537
745
  // Instance methods and properties.
746
+ rb_define_alloc_func(klass, statsrb_alloc_internal);
538
747
  rb_define_method(klass, "initialize", statsrb_constructor, 0);
539
748
  rb_define_method(klass, "query", statsrb_read, 5);
540
749
  rb_define_method(klass, "read", statsrb_read, 5);
541
750
  rb_define_method(klass, "get", statsrb_get, 4);
751
+ rb_define_method(klass, "load_test", statsrb_load_test, 2);
752
+ rb_define_method(klass, "length", statsrb_length, 0);
542
753
  rb_define_method(klass, "sort", statsrb_sort, 0);
543
754
  rb_define_method(klass, "write", statsrb_write, 2);
544
755
  rb_define_method(klass, "split_write", statsrb_split_write, 2);
545
756
  rb_define_method(klass, "push", statsrb_push, 3);
757
+ rb_define_method(klass, "clear", statsrb_data_clear_events, 0);
546
758
  rb_define_method(klass, "call", statsrb_rack_call, 1);
547
- // Define :attr_accessor (read/write instance var)
548
- // Note that this must correspond with a call to rb_iv_self() and it's string name must be @data.
549
- // An array of hashes keyed with :ts(timestamp), :ns(namespace) and :v(value) e.g. [!{:ts => Time.now.to_i, :ns => "test", :v => 33}]
550
- rb_define_attr(klass, "data", 1, 1);
551
759
  // The file directory to write when splitting namespaces. @see #split_write
552
760
  rb_define_attr(klass, "split_file_dir", 1, 1);
553
- // When used with a rack server, the max count of @data before flushing and writing to file.
761
+ // When used with a rack server, the max count of internal events.
554
762
  rb_define_attr(klass, "flush_count", 1, 1);
555
763
  }
data/test/test_statsrb.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  require 'minitest/autorun'
2
2
  require 'statsrb'
3
+ require 'json'
4
+ require 'pp'
3
5
 
4
6
  class TestStatsrb < MiniTest::Test
5
7
 
@@ -8,10 +10,14 @@ class TestStatsrb < MiniTest::Test
8
10
  def setup
9
11
  @s = Statsrb.new
10
12
  @tmpfile = "/tmp/test.statsrb"
13
+ @s.split_file_dir = "/tmp/"
14
+ @s.flush_count = 10
11
15
  end
12
16
 
13
17
  def teardown
14
18
  File.delete @tmpfile unless !File.exists? @tmpfile
19
+ rackfile = "/tmp/test"
20
+ File.delete rackfile unless !File.exists? rackfile
15
21
  end
16
22
 
17
23
  # Provides test data.
@@ -38,12 +44,14 @@ class TestStatsrb < MiniTest::Test
38
44
  # Tests that the data was indeed pushed.
39
45
  def test_push_data
40
46
  push_data
41
- assert_equal @s.data.length, get_data.length
47
+ assert_equal @s.length, get_data.length
42
48
  end
43
-
49
+
44
50
  # Tests that we can filter the in-memory data.
45
51
  def test_get_data
46
52
  push_data
53
+ t = @s.get "test1", 100, 0, 0
54
+ assert_equal(t.length, 3);
47
55
  t = @s.get "test2", 100, 0, 0
48
56
  assert_equal(t.length, 2);
49
57
  end
@@ -53,7 +61,8 @@ class TestStatsrb < MiniTest::Test
53
61
  current = 0
54
62
  push_data
55
63
  @s.sort
56
- @s.data.each do |value|
64
+ t = @s.get "test1", 100, 0, 0
65
+ t.each do |value|
57
66
  assert value[:ts] > current
58
67
  current = value[:ts]
59
68
  end
@@ -77,13 +86,86 @@ class TestStatsrb < MiniTest::Test
77
86
  end
78
87
  end
79
88
 
89
+ # Tests that we can clear data from memory.
90
+ def test_clear_data
91
+ push_data
92
+ assert_equal @s.length, get_data.length
93
+ @s.clear
94
+ assert_equal @s.length, 0
95
+ end
96
+
80
97
  # Tests that we can read data from a file.
81
98
  def test_read_data
82
99
  push_data
83
100
  write_data
101
+ @s.clear
84
102
  @s.read @tmpfile, "test1", 100, 0, 0
85
- assert_equal @s.data.length, 3
103
+ assert_equal @s.length, 3
104
+ @s.clear
86
105
  @s.read @tmpfile, "test2", 100, 0, 0
87
- assert_equal @s.data.length, 2
106
+ assert_equal @s.length, 2
107
+ end
108
+
109
+ # Tests that the rack interface works properly.
110
+ def test_rack_call
111
+ # Test putting data.
112
+ env = {
113
+ "PATH_INFO" => "/PUT",
114
+ "QUERY_STRING" => "name=test&value=13"
115
+ }
116
+
117
+ 5.times do |i|
118
+ @s.call(env);
119
+ end
120
+
121
+ assert_equal 5, @s.length
122
+
123
+ # Write enough data to flush.
124
+ 5.times do |i|
125
+ @s.call(env);
126
+ end
127
+
128
+ # Test getting data.
129
+ env = {
130
+ "PATH_INFO" => "/GET/test",
131
+ "QUERY_STRING" => ""
132
+ }
133
+
134
+ resp = @s.call(env)
135
+ data = JSON.parse(resp[2].join)
136
+ assert_equal data["test"].length, 10
137
+ end
138
+
139
+ def test_no_results
140
+ push_data
141
+ t = @s.get "noresults", 100, 0, 0
142
+ assert_equal(t.length, 0);
143
+ end
144
+
145
+ # Tests large data volumes.
146
+ def test_large_data
147
+ # Load a lot of data.
148
+ @s.load_test "kevin", 500000
149
+ @s.load_test "melissa", 500000
150
+ @s.load_test "benjamin", 500000
151
+ @s.sort
152
+ # Extract all of one namespace.
153
+ t = @s.get "melissa", 100000, 0, 0
154
+ assert_equal t.length, 100000
155
+ # Push them back to the object.
156
+ t.each do |i|
157
+ @s.push i[:ts], i[:ns], i[:v]
158
+ end
159
+ # Save it to file and clear it.
160
+ @s.write @tmpfile, "w+"
161
+ @s.clear
162
+ # Re-load the data.
163
+ @s.read @tmpfile, "melissa", 600000, 0, 0
164
+ # Try to get one that doesn't exist.
165
+ t = @s.get "kevin", 10000, 0, 0
166
+ assert_equal t.length, 0
167
+ # Try to get all of the data out.
168
+ t = @s.get "melissa", 600000, 0, 0
169
+ assert_equal t.length, 600000
88
170
  end
89
171
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,10 +9,10 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-06-08 00:00:00.000000000 Z
12
+ date: 2013-06-28 00:00:00.000000000 Z
13
13
  dependencies: []
14
- description: A Ruby time series stats repository using flat file storage, providing
15
- a Ruby API as well as a Rack compatible REST API.
14
+ description: A Ruby time series stats repository written in C, using flat file storage,
15
+ providing a Ruby API as well as a Rack compatible REST API.
16
16
  email: email@kevinhankens.com
17
17
  executables: []
18
18
  extensions: