cheap-stats 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 0e5a30b7e09b21f08359019f9a4a33bb8bc8b4e1e103bea1eed739f12de818f0
4
+ data.tar.gz: dc3146cd65e34779373ffe61b5f32c90ac1cfb89218e5fefd81804bf6d40ac59
5
+ SHA512:
6
+ metadata.gz: 5860b61e54bc57a8ade4724d4f5e5ac62811079b75869177e774c0eb281341fa0a2c7ee7f41f59fbb409355b4a2e5757ccf14a7179d6912d0919d240928759fe
7
+ data.tar.gz: a9b091cfcb393b1236dd5eca02eaf2d5742c51ce0ea94da38827171c6752c0a6140856c50679b97d363eceabffbdd2876ffb5795aa99c0e365ffe81ad7f85ffd
data/.gitignore ADDED
@@ -0,0 +1,50 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /tmp/
12
+
13
+ # Used by dotenv library to load environment variables.
14
+ # .env
15
+
16
+ ## Specific to RubyMotion:
17
+ .dat*
18
+ .repl_history
19
+ build/
20
+ *.bridgesupport
21
+ build-iPhoneOS/
22
+ build-iPhoneSimulator/
23
+
24
+ ## Specific to RubyMotion (use of CocoaPods):
25
+ #
26
+ # We recommend against adding the Pods directory to your .gitignore. However
27
+ # you should judge for yourself, the pros and cons are mentioned at:
28
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29
+ #
30
+ # vendor/Pods/
31
+
32
+ ## Documentation cache and generated files:
33
+ /.yardoc/
34
+ /_yardoc/
35
+ /doc/
36
+ /rdoc/
37
+
38
+ ## Environment normalization:
39
+ /.bundle/
40
+ /vendor/bundle
41
+ /lib/bundler/man/
42
+
43
+ # for a library or gem, you might want to ignore these files since the code is
44
+ # intended to run in multiple environments; otherwise, check them in:
45
+ # Gemfile.lock
46
+ # .ruby-version
47
+ # .ruby-gemset
48
+
49
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in cheap-stats.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Hiroshi Kuwagata
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # cheap-stats
2
+ A cheap statistics library for Ruby
3
+
4
+ ## Installation
5
+
6
+ ```ruby
7
+ gem 'cheap-stats'
8
+ ```
9
+
10
+ And then execute:
11
+
12
+ $ bundle
13
+
14
+ Or install it yourself as:
15
+
16
+ $ gem install cheap-stats
17
+
18
+ ## Examplw
19
+
20
+ ```ruby
21
+ require 'chaep_stats'
22
+
23
+ SAMPLES = [7.0, 4.0, 1.0, 5.0, 3.0, 10.0, 6.0, 2.0, 8.0, 9.0]
24
+
25
+ stats = CheapStats.new(SMAPLES)
26
+
27
+ p stats.mean
28
+ p stats.std
29
+ p stats.skewness
30
+
31
+ ```
32
+
33
+ ## License
34
+
35
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
@@ -0,0 +1,40 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "cheap_stats/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "cheap-stats"
8
+ spec.version = CheapStats::VERSION
9
+ spec.authors = ["Hiroshi Kuwagata"]
10
+ spec.email = ["kgt9221@gmail.com"]
11
+
12
+ spec.summary = %q{A cheaper statistics library.}
13
+ spec.description = %q{A cheaper statistics library.}
14
+ spec.homepage = "https://github.com/kwgt/cheap-stats"
15
+ spec.license = "MIT"
16
+
17
+ if spec.respond_to?(:metadata)
18
+ spec.metadata["homepage_uri"] = spec.homepage
19
+ else
20
+ raise "RubyGems 2.0 or newer is required to protect against " \
21
+ "public gem pushes."
22
+ end
23
+
24
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
25
+ `git ls-files -z`.split("\x0").reject { |f|
26
+ f.match(%r{^(test|spec|features)/})
27
+ }
28
+ end
29
+
30
+ spec.bindir = "bin"
31
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
32
+ spec.extensions = ["ext/cheap_stats/extconf.rb"]
33
+ spec.require_paths = ["lib"]
34
+
35
+ spec.required_ruby_version = ">= 2.4.0"
36
+
37
+ spec.add_development_dependency "bundler", "~> 2.0"
38
+ spec.add_development_dependency "rake", "~> 10.0"
39
+ spec.add_development_dependency "rake-compiler"
40
+ end
@@ -0,0 +1,605 @@
1
+ /*
2
+ * Small statics library
3
+ *
4
+ * Copyright (C) 2019 Hiroshi Kuwagata <kgt9221@gmail.com>
5
+ */
6
+
7
+ #include <stdio.h>
8
+ #include <stdlib.h>
9
+ #include <string.h>
10
+ #include <math.h>
11
+
12
+ #include "cheap_stats.h"
13
+
14
+ #define DEFAULT_ERROR __LINE__
15
+ #define MIN_SAMPLES 10
16
+
17
+ #define ALLOC(t) ((t*)malloc(sizeof(t)))
18
+ #define NALLOC(t,n) ((t*)malloc(sizeof(t) * (n)))
19
+ #define FREE(var) do {free(var);var = NULL;} while (0)
20
+ #define SHRINK(n) ((n * 10) / 13)
21
+ #define SWAP(a,b) do {double t; t = b; b = a; a = t;} while(0)
22
+
23
+ static void
24
+ combsort11(double* a, size_t n)
25
+ {
26
+ int h;
27
+ int f;
28
+ int i;
29
+
30
+ /*
31
+ * sort by ascending order
32
+ */
33
+
34
+ h = n;
35
+
36
+ while (h > 1 || f) {
37
+ f = 0;
38
+ h = SHRINK(h);
39
+
40
+ if (h == 9 || h == 10) h = 11;
41
+
42
+ for (i = 0; i < ((int)n - h); i++) {
43
+ if (a[i] > a[i + h]) {
44
+ SWAP(a[i], a[i + h]);
45
+ f = !0;
46
+ }
47
+ }
48
+ }
49
+ }
50
+
51
+ static int
52
+ binsearch(double* a, size_t n, double v)
53
+ {
54
+ int l;
55
+ int r;
56
+ int ret;
57
+
58
+ l = 0;
59
+ r = n - 1;
60
+
61
+ while (1) {
62
+ ret = (l + r) / 2;
63
+
64
+ if (r <= l) break;
65
+
66
+ if (a[ret] < v) {
67
+ l = ret + 1;
68
+ continue;
69
+ }
70
+
71
+ if (a[ret] > v) {
72
+ r = ret - 1;
73
+ continue;
74
+ }
75
+
76
+ break;
77
+ }
78
+
79
+ return ret;
80
+ }
81
+
82
+ static double
83
+ calc_sum(double* a, size_t n)
84
+ {
85
+ int i;
86
+ double s;
87
+
88
+ s = 0;
89
+
90
+ for (i = 0; i < (int)n; i++) {
91
+ s += a[i];
92
+ }
93
+
94
+ return s;
95
+ }
96
+
97
+ static double
98
+ calc_variance(double* a, size_t n, double mean)
99
+ {
100
+ int i;
101
+
102
+ double d;
103
+ double s;
104
+
105
+ s = 0;
106
+
107
+ for (i = 0; i < (int)n; i++) {
108
+ d = a[i] - mean;
109
+ s += (d * d);
110
+ }
111
+
112
+ return s / n;
113
+ }
114
+
115
+ static double
116
+ calc_cdf(double* a, size_t n, double v)
117
+ {
118
+ int idx;
119
+
120
+ if (v > a[n - 1]) {
121
+ idx = n;
122
+ } else {
123
+ idx = binsearch(a, n, v);
124
+ }
125
+
126
+ return (double)idx / n;
127
+ }
128
+
129
+ static double
130
+ calc_moment(double* a, size_t n, double k)
131
+ {
132
+ double s;
133
+ int i;
134
+
135
+ for (i = 0; i < (int)n; i++) {
136
+ s += pow(a[i], k);
137
+ }
138
+
139
+ return s / n;
140
+ }
141
+
142
+ static double
143
+ calc_central_moment(double* a, size_t n, double k, double mean)
144
+ {
145
+ double s;
146
+ int i;
147
+
148
+ for (i = 0; i < (int)n; i++) {
149
+ s += pow(a[i] - mean, k);
150
+ }
151
+
152
+ return s / n;
153
+ }
154
+
155
+ static double
156
+ calc_std_moment(double* a, size_t n, double k, double mean, double std)
157
+ {
158
+ return calc_central_moment(a, n, k, mean) / pow(std, k);
159
+ }
160
+
161
+ static double
162
+ calc_normal_pdf(double* a, size_t n,
163
+ double mean, double std, double total, double v)
164
+ {
165
+ double t;
166
+
167
+ t = (v - mean) / std;
168
+
169
+ // 2.50662827463 == sqrt(2.0 * M_PI)
170
+ return (exp(-0.5 * (t * t)) / (std * 2.50662827463)) / total;
171
+ }
172
+
173
+ static double
174
+ kernel_gaussian(double x)
175
+ {
176
+ // 2.50662827463 == sqrt(2.0 * M_PI)
177
+ return exp(-(x * x) / 2.0) / 2.50662827463;
178
+ }
179
+
180
+ static double
181
+ calc_kde(double* a, size_t n, double sig, double v)
182
+ {
183
+ double h;
184
+ double s;
185
+ int i;
186
+
187
+ h = (0.9 * sig) / pow(n, 1.0 / 5.0);
188
+ s = 0.0;
189
+
190
+ for (i = 0; i < (int)n; i++) {
191
+ s += kernel_gaussian((v - a[i]) / h);
192
+ }
193
+
194
+ return (s / (n * h));
195
+ }
196
+
197
+ int
198
+ cheap_stats_new(double* src, size_t n, cheap_stats_t** dst)
199
+ {
200
+ int ret;
201
+ double* a0;
202
+ double* a1;
203
+ cheap_stats_t* ptr;
204
+
205
+ /*
206
+ * initialize
207
+ */
208
+ ret = 0;
209
+ ptr = NULL;
210
+ a0 = NULL;
211
+ a1 = NULL;
212
+
213
+ /*
214
+ * argument check
215
+ */
216
+ do {
217
+ if (src == NULL) {
218
+ ret = DEFAULT_ERROR;
219
+ break;
220
+ }
221
+
222
+ if (n < MIN_SAMPLES) {
223
+ ret = DEFAULT_ERROR;
224
+ break;
225
+ }
226
+
227
+ if (dst == NULL) {
228
+ ret = DEFAULT_ERROR;
229
+ break;
230
+ }
231
+ } while (0);
232
+
233
+ /*
234
+ * alloc memory
235
+ */
236
+ if (!ret) do {
237
+ a0 = NALLOC(double, n);
238
+ if (a0 == NULL) {
239
+ ret = DEFAULT_ERROR;
240
+ break;
241
+ }
242
+
243
+ a1 = NALLOC(double, n);
244
+ if (a1 == NULL) {
245
+ ret = DEFAULT_ERROR;
246
+ break;
247
+ }
248
+
249
+ ptr = ALLOC(cheap_stats_t);
250
+ if (ptr == NULL) {
251
+ ret = DEFAULT_ERROR;
252
+ break;
253
+ }
254
+ } while (0);
255
+
256
+ /*
257
+ * put return parameter
258
+ */
259
+ if (!ret) {
260
+ memcpy(a0, src, sizeof(double) * n);
261
+ memcpy(a1, src, sizeof(double) * n);
262
+ combsort11(a1, n);
263
+
264
+ ptr->a0 = a0;
265
+ ptr->a1 = a1;
266
+ ptr->n = n;
267
+ ptr->total = calc_sum(a0, n);
268
+ ptr->mean = ptr->total / n;
269
+ ptr->min = ptr->a1[0];
270
+ ptr->max = ptr->a1[n - 1];
271
+ ptr->q1 = ptr->a1[n / 4];
272
+ ptr->q3 = ptr->a1[(3 * n) / 4];
273
+ ptr->median = ptr->a1[n / 2];
274
+ ptr->variance = calc_variance(a0, n, ptr->mean);
275
+ ptr->std = sqrt(ptr->variance);
276
+
277
+ *dst = ptr;
278
+ }
279
+
280
+ /*
281
+ * post process
282
+ */
283
+ if (ret) {
284
+ if (ptr) free(ptr);
285
+ if (a0) free(a0);
286
+ if (a1) free(a1);
287
+ }
288
+
289
+ return ret;
290
+ }
291
+
292
+ int
293
+ cheap_stats_destroy(cheap_stats_t* ptr)
294
+ {
295
+ int ret;
296
+
297
+ /*
298
+ * initialize
299
+ */
300
+ ret = 0;
301
+
302
+ /*
303
+ * argument check
304
+ */
305
+ if (ptr == NULL) {
306
+ ret = DEFAULT_ERROR;
307
+ }
308
+
309
+ /*
310
+ * release memory
311
+ */
312
+ if (!ret) {
313
+ if (ptr->a0) free(ptr->a0);
314
+ if (ptr->a1) free(ptr->a1);
315
+ free(ptr);
316
+ }
317
+
318
+ return ret;
319
+ }
320
+
321
+ int
322
+ cheap_stats_cdf(cheap_stats_t* ptr, double v, double* dst)
323
+ {
324
+ int ret;
325
+
326
+ /*
327
+ * initialize
328
+ */
329
+ ret = 0;
330
+
331
+ /*
332
+ * argument check
333
+ */
334
+ do {
335
+ if (ptr == NULL) {
336
+ ret = DEFAULT_ERROR;
337
+ break;
338
+ }
339
+
340
+ if (dst == NULL) {
341
+ ret = DEFAULT_ERROR;
342
+ break;
343
+ }
344
+ } while (0);
345
+
346
+ /*
347
+ * calc CDF
348
+ */
349
+ if (!ret) {
350
+ *dst = calc_cdf(ptr->a1, ptr->n, v);
351
+ }
352
+
353
+ return ret;
354
+ }
355
+
356
+ int
357
+ cheap_stats_normal_pdf(cheap_stats_t* ptr, double v, double* dst)
358
+ {
359
+ int ret;
360
+
361
+ /*
362
+ * initialize
363
+ */
364
+ ret = 0;
365
+
366
+ /*
367
+ * argument check
368
+ */
369
+ do {
370
+ if (ptr == NULL) {
371
+ ret = DEFAULT_ERROR;
372
+ break;
373
+ }
374
+
375
+ if (dst == NULL) {
376
+ ret = DEFAULT_ERROR;
377
+ break;
378
+ }
379
+ } while (0);
380
+
381
+ /*
382
+ * calc CDF
383
+ */
384
+ if (!ret) {
385
+ *dst = calc_normal_pdf(ptr->a1, ptr->n, ptr->mean, ptr->std, ptr->total, v);
386
+ }
387
+
388
+ return ret;
389
+ }
390
+
391
+ int
392
+ cheap_stats_estimated_pdf(cheap_stats_t* ptr, double v, double* dst)
393
+ {
394
+ int ret;
395
+ double sig;
396
+
397
+ /*
398
+ * initialize
399
+ */
400
+ ret = 0;
401
+
402
+ /*
403
+ * argument check
404
+ */
405
+ do {
406
+ if (ptr == NULL) {
407
+ ret = DEFAULT_ERROR;
408
+ break;
409
+ }
410
+
411
+ if (dst == NULL) {
412
+ ret = DEFAULT_ERROR;
413
+ break;
414
+ }
415
+ } while (0);
416
+
417
+ /*
418
+ * calc CDF
419
+ */
420
+ if (!ret) {
421
+ sig = ptr->q3 - ptr->q1;
422
+ if (sig > ptr->std) sig = ptr->std;
423
+
424
+ *dst = calc_kde(ptr->a1, ptr->n, sig, v);
425
+ }
426
+
427
+ return ret;
428
+ }
429
+
430
+
431
+ int
432
+ cheap_stats_moment(cheap_stats_t* ptr, double k, double* dst)
433
+ {
434
+ int ret;
435
+
436
+ /*
437
+ * initialize
438
+ */
439
+ ret = 0;
440
+
441
+ /*
442
+ * argument check
443
+ */
444
+ do {
445
+ if (ptr == NULL) {
446
+ ret = DEFAULT_ERROR;
447
+ break;
448
+ }
449
+
450
+ if (dst == NULL) {
451
+ ret = DEFAULT_ERROR;
452
+ break;
453
+ }
454
+ } while (0);
455
+
456
+ /*
457
+ * calc raw moment
458
+ */
459
+ if (!ret) {
460
+ *dst = calc_moment(ptr->a1, ptr->n, k);
461
+ }
462
+
463
+ return ret;
464
+ }
465
+
466
+ int
467
+ cheap_stats_central_moment(cheap_stats_t* ptr, double k, double* dst)
468
+ {
469
+ int ret;
470
+
471
+ /*
472
+ * initialize
473
+ */
474
+ ret = 0;
475
+
476
+ /*
477
+ * argument check
478
+ */
479
+ do {
480
+ if (ptr == NULL) {
481
+ ret = DEFAULT_ERROR;
482
+ break;
483
+ }
484
+
485
+ if (dst == NULL) {
486
+ ret = DEFAULT_ERROR;
487
+ break;
488
+ }
489
+ } while (0);
490
+
491
+ /*
492
+ * calc raw moment
493
+ */
494
+ if (!ret) {
495
+ *dst = calc_central_moment(ptr->a1, ptr->n, k, ptr->mean);
496
+ }
497
+
498
+ return ret;
499
+ }
500
+
501
+ int
502
+ cheap_stats_std_moment(cheap_stats_t* ptr, double k, double* dst)
503
+ {
504
+ int ret;
505
+
506
+ /*
507
+ * initialize
508
+ */
509
+ ret = 0;
510
+
511
+ /*
512
+ * argument check
513
+ */
514
+ do {
515
+ if (ptr == NULL) {
516
+ ret = DEFAULT_ERROR;
517
+ break;
518
+ }
519
+
520
+ if (dst == NULL) {
521
+ ret = DEFAULT_ERROR;
522
+ break;
523
+ }
524
+ } while (0);
525
+
526
+ /*
527
+ * calc raw moment
528
+ */
529
+ if (!ret) {
530
+ *dst = calc_std_moment(ptr->a1, ptr->n, k, ptr->mean, ptr->std);
531
+ }
532
+
533
+ return ret;
534
+ }
535
+
536
+ int
537
+ cheap_stats_skewness(cheap_stats_t* ptr, double* dst)
538
+ {
539
+ int ret;
540
+
541
+ /*
542
+ * initialize
543
+ */
544
+ ret = 0;
545
+
546
+ /*
547
+ * argument check
548
+ */
549
+ do {
550
+ if (ptr == NULL) {
551
+ ret = DEFAULT_ERROR;
552
+ break;
553
+ }
554
+
555
+ if (dst == NULL) {
556
+ ret = DEFAULT_ERROR;
557
+ break;
558
+ }
559
+ } while (0);
560
+
561
+ /*
562
+ * calc raw moment
563
+ */
564
+ if (!ret) {
565
+ *dst = calc_std_moment(ptr->a1, ptr->n, 3.0, ptr->mean, ptr->std);
566
+ }
567
+
568
+ return ret;
569
+ }
570
+
571
+ int
572
+ cheap_stats_pearson_skewness(cheap_stats_t* ptr, double* dst)
573
+ {
574
+ int ret;
575
+
576
+ /*
577
+ * initialize
578
+ */
579
+ ret = 0;
580
+
581
+ /*
582
+ * argument check
583
+ */
584
+ do {
585
+ if (ptr == NULL) {
586
+ ret = DEFAULT_ERROR;
587
+ break;
588
+ }
589
+
590
+ if (dst == NULL) {
591
+ ret = DEFAULT_ERROR;
592
+ break;
593
+ }
594
+ } while (0);
595
+
596
+ /*
597
+ * calc raw moment
598
+ */
599
+ if (!ret) {
600
+ *dst = (3.0 * (ptr->mean - ptr->median)) / (ptr->std + 1e-15);
601
+ }
602
+
603
+ return ret;
604
+ }
605
+
@@ -0,0 +1,37 @@
1
+ /*
2
+ * Small statics library
3
+ *
4
+ * Copyright (C) 2019 Hiroshi Kuwagata <kgt9221@gmail.com>
5
+ */
6
+
7
+ #ifndef __SMALL_STATS_H__
8
+ #define __SMALL_STATS_H__
9
+
10
+ #include <stdlib.h>
11
+
12
+ typedef struct {
13
+ double* a0;
14
+ double* a1; // sorted
15
+ size_t n;
16
+
17
+ double total;
18
+ double mean;
19
+ double min;
20
+ double max;
21
+ double q1;
22
+ double q3;
23
+ double median;
24
+ double variance;
25
+ double std;
26
+ } cheap_stats_t;
27
+
28
+ int cheap_stats_new(double* samples, size_t size, cheap_stats_t** obj);
29
+ int cheap_stats_destroy(cheap_stats_t* obj);
30
+ int cheap_stats_cdf(cheap_stats_t* obj, double v, double* dst);
31
+ int cheap_stats_moment(cheap_stats_t* obj, double k, double* dst);
32
+ int cheap_stats_central_moment(cheap_stats_t* obj, double k, double* dst);
33
+ int cheap_stats_std_moment(cheap_stats_t* obj, double k, double* dst);
34
+ int cheap_stats_skewness(cheap_stats_t* obj, double* dst);
35
+ int cheap_stats_pearson_skewness(cheap_stats_t* obj, double* dst);
36
+
37
+ #endif /* !defined(__SMALL_STATS_H__) */
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+
3
+ have_library( "m")
4
+ create_makefile( "cheap_stats/cheap_stats")
@@ -0,0 +1,513 @@
1
+ /*
2
+ * cheap statistics library for ruby
3
+ *
4
+ * Copyright (C) 2019 Hiroshi Kuwagata <kgt9221@gmail.com>
5
+ */
6
+
7
+ #include <stdio.h>
8
+ #include <stdlib.h>
9
+ #include <stdint.h>
10
+ #include <string.h>
11
+ #include "ruby.h"
12
+
13
+ #include "cheap_stats.h"
14
+
15
+ #define N(x) (sizeof(x)/sizeof(*x))
16
+
17
+ #define RUNTIME_ERROR(msg, ...) rb_raise(rb_eRuntimeError, (msg), __VA_ARGS__)
18
+ #define ARGUMENT_ERROR(msg, ...) rb_raise(rb_eArgError, (msg), __VA_ARGS__)
19
+ #define TYPE_ERROR(msg, ...) rb_raise(rb_eTypeError, (msg), __VA_ARGS__)
20
+ #define NOMEMORY_ERROR(msg, ...) rb_raise(rb_eNoMemError, (msg), __VA_ARGS__)
21
+
22
+ #define API_SIMPLIFIED 1
23
+ #define API_CLASSIC 2
24
+
25
+ #define EQ_STR(val,str) (rb_to_id(val) == rb_intern(str))
26
+ #define EQ_INT(val,n) (FIX2INT(val) == n)
27
+ #define IS_NUMERIC(t) \
28
+ ((t) == T_FLOAT || (t) == T_FIXNUM || (t) == T_BIGNUM)
29
+
30
+ typedef struct {
31
+ cheap_stats_t* stats;
32
+ } rb_cheap_stats_t;
33
+
34
+ VALUE klass;
35
+
36
+ static size_t
37
+ rb_cheap_stats_size(const void* _ptr)
38
+ {
39
+ rb_cheap_stats_t* ptr;
40
+
41
+ ptr = (rb_cheap_stats_t*)_ptr;
42
+
43
+ return sizeof(*ptr) + ((sizeof(double) * ptr->stats->n) * 2);
44
+ }
45
+
46
+ static void
47
+ rb_cheap_stats_free(void* _ptr)
48
+ {
49
+ rb_cheap_stats_t* ptr;
50
+
51
+ ptr = (rb_cheap_stats_t*)_ptr;
52
+
53
+ if (ptr->stats != NULL) {
54
+ cheap_stats_destroy(ptr->stats);
55
+ ptr->stats = NULL;
56
+ }
57
+
58
+ xfree(ptr);
59
+ }
60
+
61
+ static const struct rb_data_type_struct rb_cheap_stats_data_type = {
62
+ "A Cheap satatics library",
63
+ {
64
+ NULL,
65
+ rb_cheap_stats_free,
66
+ rb_cheap_stats_size,
67
+ {NULL, NULL}
68
+ },
69
+ NULL,
70
+ NULL,
71
+ };
72
+
73
+ static VALUE
74
+ rb_cheap_stats_alloc(VALUE self)
75
+ {
76
+ rb_cheap_stats_t* ptr;
77
+
78
+ ptr = ALLOC(rb_cheap_stats_t);
79
+ memset(ptr, 0, sizeof(*ptr));
80
+
81
+ return TypedData_Wrap_Struct(klass, &rb_cheap_stats_data_type, ptr);
82
+ }
83
+
84
+ /**
85
+ * initialize object
86
+ *
87
+ * @params [Array<Numeric>] samples sample vaules.
88
+ */
89
+ static VALUE
90
+ rb_cheap_stats_initialize(VALUE self, VALUE samples)
91
+ {
92
+ rb_cheap_stats_t* ptr;
93
+ VALUE exp;
94
+ const char* msg;
95
+ int err;
96
+ double *a;
97
+ int i;
98
+ int n;
99
+ VALUE v;
100
+ int t;
101
+ char str[64];
102
+
103
+ /*
104
+ * strip context data
105
+ */
106
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
107
+
108
+ /*
109
+ * check argument
110
+ */
111
+ Check_Type(samples, T_ARRAY);
112
+
113
+ do {
114
+ exp = Qnil;
115
+ a = NULL;
116
+
117
+ /*
118
+ * allocate work buffer
119
+ */
120
+ n = RARRAY_LEN(samples);
121
+ a = malloc(sizeof(double) * n);
122
+
123
+ if (a == NULL) {
124
+ exp = rb_eNoMemError;
125
+ msg = "Memory allocation failed";
126
+ break;
127
+ }
128
+
129
+ /*
130
+ * copy source value
131
+ */
132
+ for (i = 0; i < n; i++) {
133
+ v = RARRAY_AREF(samples, i);
134
+ t = TYPE(v);
135
+
136
+ if (!IS_NUMERIC(t)) break;
137
+
138
+ a[i] = NUM2DBL(v);
139
+ }
140
+
141
+ if (i != n) {
142
+ exp = rb_eTypeError;
143
+ msg = "the value that not numeric was included";
144
+ }
145
+
146
+ /*
147
+ * create statistic context
148
+ */
149
+ err = cheap_stats_new(a, n, &ptr->stats);
150
+ if (err) {
151
+ sprintf(str, "cheap_stats_new() failed [err=%d]", err);
152
+
153
+ exp = rb_eRuntimeError;
154
+ msg = str;
155
+ break;
156
+ }
157
+ } while (0);
158
+
159
+ /*
160
+ * post porcess
161
+ */
162
+ if (a != NULL) free(a);
163
+
164
+ if (exp != Qnil) {
165
+ if (ptr->stats != NULL) {
166
+ cheap_stats_destroy(ptr->stats);
167
+ ptr->stats = NULL;
168
+ }
169
+
170
+ rb_raise(exp, msg);
171
+ }
172
+
173
+ return self;
174
+ }
175
+
176
+ /**
177
+ * get total value of samples
178
+ *
179
+ * @return [Float] total value
180
+ */
181
+ static VALUE
182
+ rb_cheap_stats_total(VALUE self)
183
+ {
184
+ rb_cheap_stats_t* ptr;
185
+
186
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
187
+
188
+ return DBL2NUM(ptr->stats->total);
189
+ }
190
+
191
+ /**
192
+ * get mean of samples
193
+ *
194
+ * @return [Float] mean
195
+ */
196
+ static VALUE
197
+ rb_cheap_stats_mean(VALUE self)
198
+ {
199
+ rb_cheap_stats_t* ptr;
200
+
201
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
202
+
203
+ return DBL2NUM(ptr->stats->mean);
204
+ }
205
+
206
+ /**
207
+ * get min value of samples
208
+ *
209
+ * @return [Float] min value
210
+ */
211
+ static VALUE
212
+ rb_cheap_stats_min(VALUE self)
213
+ {
214
+ rb_cheap_stats_t* ptr;
215
+
216
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
217
+
218
+ return DBL2NUM(ptr->stats->min);
219
+ }
220
+
221
+ /**
222
+ * get max value of samples
223
+ *
224
+ * @return [Float] max value
225
+ */
226
+ static VALUE
227
+ rb_cheap_stats_max(VALUE self)
228
+ {
229
+ rb_cheap_stats_t* ptr;
230
+
231
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
232
+
233
+ return DBL2NUM(ptr->stats->max);
234
+ }
235
+
236
+ /**
237
+ * get 1/4 quartile value of samples
238
+ *
239
+ * @return [Float] 1/4 quartile value
240
+ */
241
+ static VALUE
242
+ rb_cheap_stats_q1(VALUE self)
243
+ {
244
+ rb_cheap_stats_t* ptr;
245
+
246
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
247
+
248
+ return DBL2NUM(ptr->stats->q1);
249
+ }
250
+
251
+ /**
252
+ * get 3/4 quartile value of samples
253
+ *
254
+ * @return [Float] 3/4 quartile value
255
+ */
256
+ static VALUE
257
+ rb_cheap_stats_q3(VALUE self)
258
+ {
259
+ rb_cheap_stats_t* ptr;
260
+
261
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
262
+
263
+ return DBL2NUM(ptr->stats->q3);
264
+ }
265
+
266
+ /**
267
+ * get median of samples
268
+ *
269
+ * @return [Float] median
270
+ */
271
+ static VALUE
272
+ rb_cheap_stats_median(VALUE self)
273
+ {
274
+ rb_cheap_stats_t* ptr;
275
+
276
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
277
+
278
+ return DBL2NUM(ptr->stats->median);
279
+ }
280
+
281
+ /**
282
+ * get standard division of samples
283
+ *
284
+ * @return [Float] standard division
285
+ */
286
+ static VALUE
287
+ rb_cheap_stats_std(VALUE self)
288
+ {
289
+ rb_cheap_stats_t* ptr;
290
+
291
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
292
+
293
+ return DBL2NUM(ptr->stats->std);
294
+ }
295
+
296
+ /**
297
+ * get variance of samples
298
+ *
299
+ * @return [Float] variance
300
+ */
301
+ static VALUE
302
+ rb_cheap_stats_variance(VALUE self)
303
+ {
304
+ rb_cheap_stats_t* ptr;
305
+
306
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
307
+
308
+ return DBL2NUM(ptr->stats->variance);
309
+ }
310
+
311
+ /**
312
+ * calc CDF
313
+ *
314
+ * @param [Numeric] x taregt value
315
+ *
316
+ * @return [Float] CDF value that coresspoinding to target.
317
+ */
318
+ static VALUE
319
+ rb_cheap_stats_cdf(VALUE self, VALUE x)
320
+ {
321
+ rb_cheap_stats_t* ptr;
322
+ int err;
323
+ double ret;
324
+
325
+ /*
326
+ * strip context data
327
+ */
328
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
329
+
330
+ /*
331
+ * check argument
332
+ */
333
+ err = cheap_stats_cdf(ptr->stats, rb_num2dbl(x), &ret);
334
+ if (err) {
335
+ RUNTIME_ERROR("cheap_stats_cdf() failed [err=%d]", err);
336
+ }
337
+
338
+ return DBL2NUM(ret);
339
+ }
340
+
341
+ /**
342
+ * calc moment
343
+ *
344
+ * @param [Numeric] k order number
345
+ *
346
+ * @return [Float] moment value
347
+ */
348
+ static VALUE
349
+ rb_cheap_stats_moment(VALUE self, VALUE k)
350
+ {
351
+ rb_cheap_stats_t* ptr;
352
+ int err;
353
+ double ret;
354
+
355
+ /*
356
+ * strip context data
357
+ */
358
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
359
+
360
+ /*
361
+ * check argument
362
+ */
363
+ err = cheap_stats_moment(ptr->stats, rb_num2dbl(k), &ret);
364
+ if (err) {
365
+ RUNTIME_ERROR("cheap_stats_moment() failed [err=%d]", err);
366
+ }
367
+
368
+ return DBL2NUM(ret);
369
+ }
370
+
371
+ /**
372
+ * calc central moment
373
+ *
374
+ * @param [Numeric] k order number
375
+ *
376
+ * @return [Float] moment value
377
+ */
378
+ static VALUE
379
+ rb_cheap_stats_central_moment(VALUE self, VALUE k)
380
+ {
381
+ rb_cheap_stats_t* ptr;
382
+ int err;
383
+ double ret;
384
+
385
+ /*
386
+ * strip context data
387
+ */
388
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
389
+
390
+ /*
391
+ * check argument
392
+ */
393
+ err = cheap_stats_central_moment(ptr->stats, rb_num2dbl(k), &ret);
394
+ if (err) {
395
+ RUNTIME_ERROR("cheap_stats_central_moment() failed [err=%d]", err);
396
+ }
397
+
398
+ return DBL2NUM(ret);
399
+ }
400
+
401
+ /**
402
+ * calc std (standard division) moment
403
+ *
404
+ * @param [Numeric] k order number
405
+ *
406
+ * @return [Float] moment value
407
+ */
408
+ static VALUE
409
+ rb_cheap_stats_std_moment(VALUE self, VALUE k)
410
+ {
411
+ rb_cheap_stats_t* ptr;
412
+ int err;
413
+ double ret;
414
+
415
+ /*
416
+ * strip context data
417
+ */
418
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
419
+
420
+ /*
421
+ * check argument
422
+ */
423
+ err = cheap_stats_std_moment(ptr->stats, rb_num2dbl(k), &ret);
424
+ if (err) {
425
+ RUNTIME_ERROR("cheap_stats_std_moment() failed [err=%d]", err);
426
+ }
427
+
428
+ return DBL2NUM(ret);
429
+ }
430
+
431
+ /**
432
+ * calc skewness value
433
+ *
434
+ * @return [Float] skewness value
435
+ */
436
+ static VALUE
437
+ rb_cheap_stats_skewness(VALUE self)
438
+ {
439
+ rb_cheap_stats_t* ptr;
440
+ int err;
441
+ double ret;
442
+
443
+ /*
444
+ * strip context data
445
+ */
446
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
447
+
448
+ /*
449
+ * check argument
450
+ */
451
+ err = cheap_stats_skewness(ptr->stats, &ret);
452
+ if (err) {
453
+ RUNTIME_ERROR("cheap_stats_skewness() failed [err=%d]", err);
454
+ }
455
+
456
+ return DBL2NUM(ret);
457
+ }
458
+
459
+ /**
460
+ * calc pearson median skewness value
461
+ *
462
+ * @return [Float] skewness value
463
+ */
464
+ static VALUE
465
+ rb_cheap_stats_pearson_skewness(VALUE self)
466
+ {
467
+ rb_cheap_stats_t* ptr;
468
+ int err;
469
+ double ret;
470
+
471
+ /*
472
+ * strip context data
473
+ */
474
+ TypedData_Get_Struct(self, rb_cheap_stats_t, &rb_cheap_stats_data_type, ptr);
475
+
476
+ /*
477
+ * check argument
478
+ */
479
+ err = cheap_stats_pearson_skewness(ptr->stats, &ret);
480
+ if (err) {
481
+ RUNTIME_ERROR("cheap_stats_pearson_skewness() failed [err=%d]", err);
482
+ }
483
+
484
+ return DBL2NUM(ret);
485
+ }
486
+
487
+ void
488
+ Init_cheap_stats()
489
+ {
490
+ klass = rb_define_class("CheapStats", rb_cObject);
491
+
492
+ rb_define_alloc_func(klass, rb_cheap_stats_alloc);
493
+
494
+ rb_define_method(klass, "initialize", rb_cheap_stats_initialize, 1);
495
+ rb_define_method(klass, "total", rb_cheap_stats_total, 0);
496
+ rb_define_method(klass, "min", rb_cheap_stats_min, 0);
497
+ rb_define_method(klass, "max", rb_cheap_stats_max, 0);
498
+ rb_define_method(klass, "q1", rb_cheap_stats_q1, 0);
499
+ rb_define_method(klass, "q3", rb_cheap_stats_q3, 0);
500
+ rb_define_method(klass, "mean", rb_cheap_stats_mean, 0);
501
+ rb_define_method(klass, "median", rb_cheap_stats_median, 0);
502
+ rb_define_method(klass, "variance", rb_cheap_stats_variance, 0);
503
+ rb_define_method(klass, "std", rb_cheap_stats_std, 0);
504
+ rb_define_method(klass, "cdf", rb_cheap_stats_cdf, 1);
505
+ rb_define_method(klass, "moment", rb_cheap_stats_moment, 1);
506
+ rb_define_method(klass, "central_moment", rb_cheap_stats_central_moment, 1);
507
+ rb_define_method(klass, "std_moment", rb_cheap_stats_std_moment, 1);
508
+ rb_define_method(klass, "skewness", rb_cheap_stats_skewness, 0);
509
+ rb_define_method(klass, "pearson_skewness",rb_cheap_stats_pearson_skewness,0);
510
+
511
+ rb_alias(klass, rb_intern("average"), rb_intern("mean"));
512
+ rb_alias(klass, rb_intern("sigma"), rb_intern("std"));
513
+ }
@@ -0,0 +1,11 @@
1
+ #
2
+ # Cheap statistics for rUby
3
+ #
4
+ # Copyright (C) 2019 Hiroshi Kuwagata <kgt9221@gmail.com>
5
+ #
6
+
7
+ require "cheap_stats/version"
8
+ require "cheap_stats/cheap_stats"
9
+
10
+ class CheapStats
11
+ end
@@ -0,0 +1,3 @@
1
+ class CheapStats
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cheap-stats
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Hiroshi Kuwagata
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-04-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: A cheaper statistics library.
56
+ email:
57
+ - kgt9221@gmail.com
58
+ executables: []
59
+ extensions:
60
+ - ext/cheap_stats/extconf.rb
61
+ extra_rdoc_files: []
62
+ files:
63
+ - ".gitignore"
64
+ - Gemfile
65
+ - LICENSE
66
+ - README.md
67
+ - Rakefile
68
+ - cheap-stats.gemspec
69
+ - ext/cheap_stats/cheap_stats.c
70
+ - ext/cheap_stats/cheap_stats.h
71
+ - ext/cheap_stats/extconf.rb
72
+ - ext/cheap_stats/rb_cheap_stats.c
73
+ - lib/cheap_stats.rb
74
+ - lib/cheap_stats/version.rb
75
+ homepage: https://github.com/kwgt/cheap-stats
76
+ licenses:
77
+ - MIT
78
+ metadata:
79
+ homepage_uri: https://github.com/kwgt/cheap-stats
80
+ post_install_message:
81
+ rdoc_options: []
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: 2.4.0
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubygems_version: 3.0.1
96
+ signing_key:
97
+ specification_version: 4
98
+ summary: A cheaper statistics library.
99
+ test_files: []