mdarray 0.4.3.pre-java → 0.5.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +82 -14
- data/Rakefile +8 -2
- data/doc/BinaryOperator.html +227 -0
- data/doc/BitwiseOperators.html +135 -0
- data/doc/BooleanFunctions.html +135 -0
- data/doc/BooleanMDArray.html +193 -0
- data/doc/ByteMDArray.html +271 -0
- data/doc/Colt.html +269 -0
- data/doc/ComparisonOperators.html +135 -0
- data/doc/Const.html +490 -0
- data/doc/Csv.html +589 -0
- data/doc/DDescriptive.html +4373 -0
- data/doc/DoubleMDArray.html +555 -0
- data/doc/DoubleStatList.html +256 -0
- data/doc/FastBinaryOperator.html +743 -0
- data/doc/FastBoolean.html +512 -0
- data/doc/FastUnaryOperator.html +583 -0
- data/doc/FloatMDArray.html +237 -0
- data/doc/FunctionCreation.html +520 -0
- data/doc/FunctionMap.html +813 -0
- data/doc/GenericFunctions.html +135 -0
- data/doc/IntMDArray.html +259 -0
- data/doc/LongMDArray.html +257 -0
- data/doc/MDArray.html +9639 -0
- data/doc/MDArray/Counter.html +2767 -0
- data/doc/MDArray/IteratorFast.html +872 -0
- data/doc/MDArray/IteratorFastBoolean.html +384 -0
- data/doc/MDArray/IteratorFastByte.html +373 -0
- data/doc/MDArray/IteratorFastChar.html +384 -0
- data/doc/MDArray/IteratorFastDouble.html +384 -0
- data/doc/MDArray/IteratorFastFloat.html +384 -0
- data/doc/MDArray/IteratorFastInt.html +384 -0
- data/doc/MDArray/IteratorFastLong.html +384 -0
- data/doc/MDArray/IteratorFastShort.html +384 -0
- data/doc/MDArrayTest.html +125 -0
- data/doc/NonNumericalMDArray.html +177 -0
- data/doc/NumericFunctions.html +297 -0
- data/doc/NumericalMDArray.html +278 -0
- data/doc/Operator.html +826 -0
- data/doc/Proc.html +1097 -0
- data/doc/RubyBinaryOperator.html +526 -0
- data/doc/RubyFunctions.html +232 -0
- data/doc/RubyMath.html +135 -0
- data/doc/RubyStats.html +135 -0
- data/doc/RubyUnaryOperator.html +510 -0
- data/doc/ShortMDArray.html +267 -0
- data/doc/StatList.html +1176 -0
- data/doc/StringMDArray.html +181 -0
- data/doc/StructureMDArray.html +181 -0
- data/doc/UnaryOperator.html +227 -0
- data/doc/UserFunction.html +297 -0
- data/doc/_index.html +467 -0
- data/doc/class_list.html +53 -0
- data/doc/css/common.css +1 -0
- data/doc/css/full_list.css +57 -0
- data/doc/css/style.css +338 -0
- data/doc/file.README.html +173 -0
- data/doc/file_list.html +55 -0
- data/doc/frames.html +28 -0
- data/doc/index.html +173 -0
- data/doc/js/app.js +214 -0
- data/doc/js/full_list.js +173 -0
- data/doc/js/jquery.js +4 -0
- data/doc/method_list.html +2524 -0
- data/doc/top-level-namespace.html +114 -0
- data/lib/colt/colt.rb +56 -0
- data/lib/colt/colt_mdarray.rb +95 -0
- data/lib/colt/double_descriptive.rb +636 -0
- data/lib/colt/probability.rb +12 -0
- data/lib/colt/stat_list.rb +192 -0
- data/lib/env.rb +5 -1
- data/lib/mdarray.rb +5 -0
- data/lib/mdarray/access.rb +2 -2
- data/lib/mdarray/creation.rb +34 -19
- data/lib/mdarray/csv.rb +61 -0
- data/lib/mdarray/fast_operators.rb +241 -0
- data/lib/mdarray/operators.rb +18 -0
- data/lib/mdarray/ruby_operators.rb +2 -18
- data/lib/mdarray/ruby_stats.rb +1 -1
- data/lib/mdarray/views.rb +8 -8
- data/target/helper.jar +0 -0
- data/test/colt/VALE3.csv +3437 -0
- data/test/colt/VALE3.xlsx +0 -0
- data/test/colt/VALE3_short-err.csv +20 -0
- data/test/colt/VALE3_short.csv +20 -0
- data/test/colt/VALE3_short.xlsx +0 -0
- data/test/colt/test_complete.rb +25 -0
- data/test/colt/test_stat_list.rb +128 -0
- data/test/colt/test_statistics.rb +114 -0
- data/test/complete.rb +25 -0
- data/test/env.rb +54 -30
- data/test/{arithmetic_casting.rb → mdarray/arithmetic_casting.rb} +0 -0
- data/test/{test_access.rb → mdarray/test_access.rb} +0 -0
- data/test/{test_boolean.rb → mdarray/test_boolean.rb} +0 -2
- data/test/{test_comparison.rb → mdarray/test_comparison.rb} +0 -0
- data/test/{test_complete.rb → mdarray/test_complete.rb} +3 -14
- data/test/{test_counter.rb → mdarray/test_counter.rb} +0 -0
- data/test/{test_creation.rb → mdarray/test_creation.rb} +0 -0
- data/test/{test_error.rb → mdarray/test_error.rb} +8 -5
- data/test/{test_operator.rb → mdarray/test_operator.rb} +6 -2
- data/test/{test_speed.rb → mdarray/test_performance.rb} +74 -39
- data/test/{test_printing.rb → mdarray/test_printing.rb} +0 -1
- data/test/{test_shape.rb → mdarray/test_shape.rb} +0 -0
- data/test/mdarray/test_statistics.rb +80 -0
- data/test/{test_trigonometry.rb → mdarray/test_trigonometry.rb} +0 -0
- data/test/{test_views.rb → mdarray/test_views.rb} +0 -0
- data/vendor/parallelcolt-0.10.0.jar +0 -0
- data/version.rb +1 -1
- metadata +122 -39
- data/test/test_lazy.rb +0 -52
- data/test/test_statistics.rb +0 -38
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
|
2
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|
4
|
+
<head>
|
|
5
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
|
6
|
+
<title>
|
|
7
|
+
Top Level Namespace
|
|
8
|
+
|
|
9
|
+
— Documentation by YARD 0.8.5.2
|
|
10
|
+
|
|
11
|
+
</title>
|
|
12
|
+
|
|
13
|
+
<link rel="stylesheet" href="css/style.css" type="text/css" media="screen" charset="utf-8" />
|
|
14
|
+
|
|
15
|
+
<link rel="stylesheet" href="css/common.css" type="text/css" media="screen" charset="utf-8" />
|
|
16
|
+
|
|
17
|
+
<script type="text/javascript" charset="utf-8">
|
|
18
|
+
hasFrames = window.top.frames.main ? true : false;
|
|
19
|
+
relpath = '';
|
|
20
|
+
framesUrl = "frames.html#!" + escape(window.location.href);
|
|
21
|
+
</script>
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
<script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
|
|
25
|
+
|
|
26
|
+
<script type="text/javascript" charset="utf-8" src="js/app.js"></script>
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
</head>
|
|
30
|
+
<body>
|
|
31
|
+
<div id="header">
|
|
32
|
+
<div id="menu">
|
|
33
|
+
|
|
34
|
+
<a href="_index.html">Index</a> »
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
<span class="title">Top Level Namespace</span>
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
<div class="noframes"><span class="title">(</span><a href="." target="_top">no frames</a><span class="title">)</span></div>
|
|
41
|
+
</div>
|
|
42
|
+
|
|
43
|
+
<div id="search">
|
|
44
|
+
|
|
45
|
+
<a class="full_list_link" id="class_list_link"
|
|
46
|
+
href="class_list.html">
|
|
47
|
+
Class List
|
|
48
|
+
</a>
|
|
49
|
+
|
|
50
|
+
<a class="full_list_link" id="method_list_link"
|
|
51
|
+
href="method_list.html">
|
|
52
|
+
Method List
|
|
53
|
+
</a>
|
|
54
|
+
|
|
55
|
+
<a class="full_list_link" id="file_list_link"
|
|
56
|
+
href="file_list.html">
|
|
57
|
+
File List
|
|
58
|
+
</a>
|
|
59
|
+
|
|
60
|
+
</div>
|
|
61
|
+
<div class="clear"></div>
|
|
62
|
+
</div>
|
|
63
|
+
|
|
64
|
+
<iframe id="search_frame"></iframe>
|
|
65
|
+
|
|
66
|
+
<div id="content"><h1>Top Level Namespace
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
</h1>
|
|
71
|
+
|
|
72
|
+
<dl class="box">
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
</dl>
|
|
82
|
+
<div class="clear"></div>
|
|
83
|
+
|
|
84
|
+
<h2>Defined Under Namespace</h2>
|
|
85
|
+
<p class="children">
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
<strong class="modules">Modules:</strong> <span class='object_link'><a href="BitwiseOperators.html" title="BitwiseOperators (module)">BitwiseOperators</a></span>, <span class='object_link'><a href="BooleanFunctions.html" title="BooleanFunctions (module)">BooleanFunctions</a></span>, <span class='object_link'><a href="ComparisonOperators.html" title="ComparisonOperators (module)">ComparisonOperators</a></span>, <span class='object_link'><a href="DDescriptive.html" title="DDescriptive (module)">DDescriptive</a></span>, <span class='object_link'><a href="FunctionCreation.html" title="FunctionCreation (module)">FunctionCreation</a></span>, <span class='object_link'><a href="GenericFunctions.html" title="GenericFunctions (module)">GenericFunctions</a></span>, <span class='object_link'><a href="NumericFunctions.html" title="NumericFunctions (module)">NumericFunctions</a></span>, <span class='object_link'><a href="RubyFunctions.html" title="RubyFunctions (module)">RubyFunctions</a></span>, <span class='object_link'><a href="RubyMath.html" title="RubyMath (module)">RubyMath</a></span>, <span class='object_link'><a href="RubyStats.html" title="RubyStats (module)">RubyStats</a></span>, <span class='object_link'><a href="UserFunction.html" title="UserFunction (module)">UserFunction</a></span>
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
<strong class="classes">Classes:</strong> <span class='object_link'><a href="BinaryOperator.html" title="BinaryOperator (class)">BinaryOperator</a></span>, <span class='object_link'><a href="BooleanMDArray.html" title="BooleanMDArray (class)">BooleanMDArray</a></span>, <span class='object_link'><a href="ByteMDArray.html" title="ByteMDArray (class)">ByteMDArray</a></span>, <span class='object_link'><a href="Colt.html" title="Colt (class)">Colt</a></span>, <span class='object_link'><a href="Const.html" title="Const (class)">Const</a></span>, <span class='object_link'><a href="Csv.html" title="Csv (class)">Csv</a></span>, <span class='object_link'><a href="DoubleMDArray.html" title="DoubleMDArray (class)">DoubleMDArray</a></span>, <span class='object_link'><a href="DoubleStatList.html" title="DoubleStatList (class)">DoubleStatList</a></span>, <span class='object_link'><a href="FastBinaryOperator.html" title="FastBinaryOperator (class)">FastBinaryOperator</a></span>, <span class='object_link'><a href="FastUnaryOperator.html" title="FastUnaryOperator (class)">FastUnaryOperator</a></span>, <span class='object_link'><a href="FloatMDArray.html" title="FloatMDArray (class)">FloatMDArray</a></span>, <span class='object_link'><a href="FunctionMap.html" title="FunctionMap (class)">FunctionMap</a></span>, <span class='object_link'><a href="IntMDArray.html" title="IntMDArray (class)">IntMDArray</a></span>, <span class='object_link'><a href="LongMDArray.html" title="LongMDArray (class)">LongMDArray</a></span>, <span class='object_link'><a href="MDArray.html" title="MDArray (class)">MDArray</a></span>, <span class='object_link'><a href="NonNumericalMDArray.html" title="NonNumericalMDArray (class)">NonNumericalMDArray</a></span>, <span class='object_link'><a href="NumericalMDArray.html" title="NumericalMDArray (class)">NumericalMDArray</a></span>, <span class='object_link'><a href="Operator.html" title="Operator (class)">Operator</a></span>, <span class='object_link'><a href="Proc.html" title="Proc (class)">Proc</a></span>, <span class='object_link'><a href="RubyBinaryOperator.html" title="RubyBinaryOperator (class)">RubyBinaryOperator</a></span>, <span class='object_link'><a href="RubyUnaryOperator.html" title="RubyUnaryOperator (class)">RubyUnaryOperator</a></span>, <span class='object_link'><a href="ShortMDArray.html" title="ShortMDArray (class)">ShortMDArray</a></span>, <span class='object_link'><a href="StatList.html" title="StatList (class)">StatList</a></span>, <span class='object_link'><a href="StringMDArray.html" title="StringMDArray (class)">StringMDArray</a></span>, <span class='object_link'><a href="StructureMDArray.html" title="StructureMDArray (class)">StructureMDArray</a></span>, <span class='object_link'><a href="UnaryOperator.html" title="UnaryOperator (class)">UnaryOperator</a></span>
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
</p>
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
</div>
|
|
106
|
+
|
|
107
|
+
<div id="footer">
|
|
108
|
+
Generated on Thu May 16 12:48:34 2013 by
|
|
109
|
+
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
|
110
|
+
0.8.5.2 (ruby-1.9.3).
|
|
111
|
+
</div>
|
|
112
|
+
|
|
113
|
+
</body>
|
|
114
|
+
</html>
|
data/lib/colt/colt.rb
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
##########################################################################################
|
|
4
|
+
# Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
|
10
|
+
#
|
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
15
|
+
#
|
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
|
20
|
+
# OR MODIFICATIONS.
|
|
21
|
+
##########################################################################################
|
|
22
|
+
|
|
23
|
+
##########################################################################################
|
|
24
|
+
#
|
|
25
|
+
##########################################################################################
|
|
26
|
+
|
|
27
|
+
class Colt
|
|
28
|
+
|
|
29
|
+
#------------------------------------------------------------------------------------
|
|
30
|
+
#
|
|
31
|
+
#------------------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
def self.processors
|
|
34
|
+
Java::EduEmoryMathcsUtils::ConcurrencyUtils.get_number_of_processors
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
#------------------------------------------------------------------------------------
|
|
38
|
+
#
|
|
39
|
+
#------------------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
def self.threads
|
|
42
|
+
Java::EduEmoryMathcsUtils::ConcurrencyUtils.get_number_of_threads
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
#------------------------------------------------------------------------------------
|
|
46
|
+
#
|
|
47
|
+
#------------------------------------------------------------------------------------
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
end # Colt
|
|
51
|
+
|
|
52
|
+
require_relative 'stat_list'
|
|
53
|
+
require_relative 'colt_mdarray'
|
|
54
|
+
|
|
55
|
+
# MDArray.functions = "CernFunctions"
|
|
56
|
+
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
##########################################################################################
|
|
4
|
+
# Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
|
10
|
+
#
|
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
15
|
+
#
|
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
|
20
|
+
# OR MODIFICATIONS.
|
|
21
|
+
##########################################################################################
|
|
22
|
+
|
|
23
|
+
##########################################################################################
|
|
24
|
+
# Reopens class MDArray so that we can add helper methods to interface with Colt
|
|
25
|
+
##########################################################################################
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DoubleMDArray
|
|
29
|
+
|
|
30
|
+
attr_reader :stat_list
|
|
31
|
+
|
|
32
|
+
#------------------------------------------------------------------------------------
|
|
33
|
+
# Converts the mdarray to an DoubleArrayList usable by Parallel Colt
|
|
34
|
+
#------------------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
def reset_statistics
|
|
37
|
+
|
|
38
|
+
base_array = @nc_array.get1DJavaArray(Java::double.java_class)
|
|
39
|
+
double_array_list = Java::CernColtListTdouble::DoubleArrayList.new(base_array)
|
|
40
|
+
@stat_list = DoubleStatList.new(double_array_list)
|
|
41
|
+
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
#------------------------------------------------------------------------------------
|
|
45
|
+
#
|
|
46
|
+
#------------------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
#------------------------------------------------------------------------------------
|
|
51
|
+
#
|
|
52
|
+
#------------------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
def self.colt_stats
|
|
55
|
+
|
|
56
|
+
stats = [:array_list, :auto_correlation, :correlation, :covariance, :durbin_watson,
|
|
57
|
+
:frequencies,
|
|
58
|
+
:geometric_mean, :harmonic_mean, :kurtosis, :lag1, :list_size, :max, :mean,
|
|
59
|
+
:mean_deviation, :median, :min, :moment, :moment3, :moment4, :pooled_mean,
|
|
60
|
+
:pooled_variance, :product, :quantile, :quantile_inverse, :quantiles,
|
|
61
|
+
:rank_interpolated, :rms, :sample_covariance, :sample_kurtosis,
|
|
62
|
+
:sample_kurtosis_standard_error,
|
|
63
|
+
:sample_skew, :sample_skew_standard_error, :sample_standard_deviation,
|
|
64
|
+
:sample_variance, :sample_weighted_variance, :skew, :split,
|
|
65
|
+
:standard_deviation, :standard_error, :sum,
|
|
66
|
+
:sum_of_inversions, :sum_of_logarithms, :sum_of_powers,
|
|
67
|
+
:sum_of_power_deviations, :sum_of_squares, :sum_of_squared_deviations,
|
|
68
|
+
:trimmed_mean,
|
|
69
|
+
:variance, :weighted_mean, :weighted_rms, :weighted_sums, :winsorized_mean]
|
|
70
|
+
|
|
71
|
+
=begin
|
|
72
|
+
# undefine all methods. Need for now, on furture versions we should be able to
|
|
73
|
+
# have many ways of executing the same method, so there should be no need to
|
|
74
|
+
# undef a method.
|
|
75
|
+
stats.each do |method|
|
|
76
|
+
remove_method(:max)
|
|
77
|
+
end
|
|
78
|
+
=end
|
|
79
|
+
|
|
80
|
+
# define all statistics methods from colt
|
|
81
|
+
stats.each do |method|
|
|
82
|
+
define_method(method) { |*args| @stat_list.send(method, *args) }
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
define_method(:standardize) do |*args|
|
|
86
|
+
@stat_list.standardize!(*args)
|
|
87
|
+
@stat_list.elements
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
self.colt_stats
|
|
93
|
+
|
|
94
|
+
end # MDArray
|
|
95
|
+
|
|
@@ -0,0 +1,636 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
##########################################################################################
|
|
4
|
+
# Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
|
10
|
+
#
|
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
15
|
+
#
|
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
|
20
|
+
# OR MODIFICATIONS.
|
|
21
|
+
##########################################################################################
|
|
22
|
+
|
|
23
|
+
##########################################################################################
|
|
24
|
+
#
|
|
25
|
+
##########################################################################################
|
|
26
|
+
|
|
27
|
+
require 'java'
|
|
28
|
+
|
|
29
|
+
module DDescriptive
|
|
30
|
+
include_package "cern.jet.stat.tdouble"
|
|
31
|
+
|
|
32
|
+
#------------------------------------------------------------------------------------
|
|
33
|
+
#
|
|
34
|
+
#------------------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
def reset_statistics
|
|
37
|
+
|
|
38
|
+
@distinct_values = nil
|
|
39
|
+
@durbin_watson = nil
|
|
40
|
+
@frequencies = nil
|
|
41
|
+
@geometric_mean = nil
|
|
42
|
+
@kurtosis = nil
|
|
43
|
+
@lag1 = nil
|
|
44
|
+
@max = nil
|
|
45
|
+
@mean = nil
|
|
46
|
+
@mean_deviation = nil
|
|
47
|
+
@median = nil
|
|
48
|
+
@min = nil
|
|
49
|
+
@moment3 = nil
|
|
50
|
+
@moment4 = nil
|
|
51
|
+
@product = nil
|
|
52
|
+
@sample_kurtosis = nil
|
|
53
|
+
@sample_kurtosis_standard_error = nil
|
|
54
|
+
@sample_skew = nil
|
|
55
|
+
@sample_skew_standard_error = nil
|
|
56
|
+
@sample_standard_deviation = nil
|
|
57
|
+
@sample_variance = nil
|
|
58
|
+
@sample_weighted_variance = nil
|
|
59
|
+
@list_size = nil
|
|
60
|
+
@skew = nil
|
|
61
|
+
@sorted_data = nil
|
|
62
|
+
@standard_deviation = nil
|
|
63
|
+
@standard_error = nil
|
|
64
|
+
@sum = nil
|
|
65
|
+
@sum_of_inversions = nil
|
|
66
|
+
@sum_of_logarithms = nil
|
|
67
|
+
@sum_of_squared_deviations = nil
|
|
68
|
+
@sum_of_squares = nil
|
|
69
|
+
@variance = nil
|
|
70
|
+
@weighted_rms = nil
|
|
71
|
+
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
#------------------------------------------------------------------------------------
|
|
75
|
+
# Returns the auto-correlation of a data sequence.
|
|
76
|
+
# @param lag lag between the two measures to auto correlate
|
|
77
|
+
#------------------------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
def auto_correlation(lag)
|
|
80
|
+
DoubleDescriptive.autoCorrelation(@array_list, lag, mean, variance)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
#------------------------------------------------------------------------------------
|
|
84
|
+
# Returns the correlation of two data sequences.
|
|
85
|
+
# That is covariance(data1,data2)/(standardDev1*standardDev2).
|
|
86
|
+
#------------------------------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
def correlation(other_val)
|
|
89
|
+
covariance(other_val) / (standard_deviation * other_val.standard_deviation)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
#------------------------------------------------------------------------------------
|
|
93
|
+
# Returns the covariance of two data sequences.
|
|
94
|
+
# That is cov(x,y) = Sum((x[i]-mean(x)) * (y[i]-mean(y))) / size().
|
|
95
|
+
#------------------------------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
def covariance(other_val)
|
|
98
|
+
sample_covariance(other_val) * (list_size - 1) / list_size
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
#------------------------------------------------------------------------------------
|
|
102
|
+
# Durbin-Watson computation.
|
|
103
|
+
#------------------------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
def durbin_watson
|
|
106
|
+
@durbin_watson ||= DoubleDescriptive.durbinWatson(@array_list)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
#------------------------------------------------------------------------------------
|
|
110
|
+
# Computes the frequency (number of occurances, count) of each distinct value in the
|
|
111
|
+
# given sorted data.
|
|
112
|
+
#------------------------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
def frequencies
|
|
115
|
+
|
|
116
|
+
if (@frequencies == nil)
|
|
117
|
+
distinct_values = Java::CernColtListTdouble::DoubleArrayList.new
|
|
118
|
+
frequencies = Java::CernColtListTint::IntArrayList.new
|
|
119
|
+
DoubleDescriptive.frequencies(sorted_data, distinct_values, frequencies)
|
|
120
|
+
distinct_values.trimToSize()
|
|
121
|
+
frequencies.trimToSize()
|
|
122
|
+
@distinct_values = distinct_values.elements().to_a
|
|
123
|
+
@frequencies = frequencies.elements().to_a
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
{ :distinct_values => @distinct_values, :frequencies => @frequencies}
|
|
127
|
+
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
#------------------------------------------------------------------------------------
|
|
131
|
+
# Returns the geometric mean of a data sequence.
|
|
132
|
+
#------------------------------------------------------------------------------------
|
|
133
|
+
|
|
134
|
+
def geometric_mean
|
|
135
|
+
@geometric_mean ||= DoubleDescriptive.geometricMean(@array_list)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
#------------------------------------------------------------------------------------
|
|
139
|
+
# Returns the harmonic mean of a data sequence.
|
|
140
|
+
#------------------------------------------------------------------------------------
|
|
141
|
+
|
|
142
|
+
def harmonic_mean
|
|
143
|
+
@harmonic_mean ||= DoubleDescriptive.harmonicMean(list_size, sum_of_inversions)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
#------------------------------------------------------------------------------------
|
|
147
|
+
# Returns the kurtosis (aka excess) of a data sequence, which is -3 +
|
|
148
|
+
# moment(data,4,mean) / standardDeviation4.
|
|
149
|
+
#------------------------------------------------------------------------------------
|
|
150
|
+
|
|
151
|
+
def kurtosis
|
|
152
|
+
@kurtosis ||=
|
|
153
|
+
DoubleDescriptive.kurtosis(moment4, standard_deviation)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
#------------------------------------------------------------------------------------
|
|
157
|
+
# Returns the lag-1 autocorrelation of a dataset; Note that this method has semantics
|
|
158
|
+
# different from autoCorrelation(..., 1);
|
|
159
|
+
#------------------------------------------------------------------------------------
|
|
160
|
+
|
|
161
|
+
def lag1
|
|
162
|
+
@lag1 ||= DoubleDescriptive.lag1(@array_list, mean)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
#------------------------------------------------------------------------------------
|
|
166
|
+
#
|
|
167
|
+
#------------------------------------------------------------------------------------
|
|
168
|
+
|
|
169
|
+
def list_size
|
|
170
|
+
@list_size ||= @array_list.size
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
#------------------------------------------------------------------------------------
|
|
174
|
+
# Returns the largest member of a data sequence.
|
|
175
|
+
#------------------------------------------------------------------------------------
|
|
176
|
+
|
|
177
|
+
def max
|
|
178
|
+
@max ||= DoubleDescriptive.max(@array_list)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
#------------------------------------------------------------------------------------
|
|
182
|
+
# Returns the arithmetic mean of a data sequence; That is Sum( data[i] ) / data.size()
|
|
183
|
+
#------------------------------------------------------------------------------------
|
|
184
|
+
|
|
185
|
+
def mean
|
|
186
|
+
@mean ||= DoubleDescriptive.mean(@array_list)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
#------------------------------------------------------------------------------------
|
|
190
|
+
# Returns the mean deviation of a dataset.
|
|
191
|
+
#------------------------------------------------------------------------------------
|
|
192
|
+
|
|
193
|
+
def mean_deviation
|
|
194
|
+
@mean_deviation ||= DoubleDescriptive.meanDeviation(@array_list, mean)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
#------------------------------------------------------------------------------------
|
|
198
|
+
# Returns the median of a sorted data sequence.
|
|
199
|
+
#------------------------------------------------------------------------------------
|
|
200
|
+
|
|
201
|
+
def median
|
|
202
|
+
@median ||= DoubleDescriptive.median(sorted_data)
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
#------------------------------------------------------------------------------------
|
|
206
|
+
# Returns the smallest member of a data sequence.
|
|
207
|
+
#------------------------------------------------------------------------------------
|
|
208
|
+
|
|
209
|
+
def min
|
|
210
|
+
@min ||= DoubleDescriptive.min(@array_list)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
#------------------------------------------------------------------------------------
|
|
214
|
+
# Returns the moment of k-th order with constant c of a data sequence, which is
|
|
215
|
+
# Sum( (data[i]-c)k ) / data.size().
|
|
216
|
+
# @param k integer
|
|
217
|
+
# @param c double
|
|
218
|
+
#------------------------------------------------------------------------------------
|
|
219
|
+
|
|
220
|
+
def moment(k, c)
|
|
221
|
+
DoubleDescriptive.moment(@array_list, k, c)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
#------------------------------------------------------------------------------------
|
|
225
|
+
# The third central moment. That is: moment(data,3,mean)
|
|
226
|
+
#------------------------------------------------------------------------------------
|
|
227
|
+
|
|
228
|
+
def moment3
|
|
229
|
+
@moment3 ||= moment(3, mean)
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
#------------------------------------------------------------------------------------
|
|
233
|
+
#
|
|
234
|
+
#------------------------------------------------------------------------------------
|
|
235
|
+
|
|
236
|
+
def moment4
|
|
237
|
+
@moment4 ||= moment(4, mean)
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
#------------------------------------------------------------------------------------
|
|
241
|
+
# Returns the pooled mean of two data sequences.
|
|
242
|
+
# That is (size1 * mean1 + size2 * mean2) / (size1 + size2).
|
|
243
|
+
#------------------------------------------------------------------------------------
|
|
244
|
+
|
|
245
|
+
def pooled_mean(other_val)
|
|
246
|
+
other_val.reset_statistics
|
|
247
|
+
DoubleDescriptive.pooledMean(list_size, mean, other_val.list_size, other_val.mean)
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
#------------------------------------------------------------------------------------
|
|
251
|
+
# Returns the pooled variance of two data sequences.
|
|
252
|
+
# That is: size1 * variance1 + size2 * variance2) / (size1 + size2)
|
|
253
|
+
#------------------------------------------------------------------------------------
|
|
254
|
+
|
|
255
|
+
def pooled_variance(other_val)
|
|
256
|
+
other_val.reset_statistics
|
|
257
|
+
DoubleDescriptive.pooledVariance(list_size, variance, other_val.list_size,
|
|
258
|
+
other_val.variance)
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
#------------------------------------------------------------------------------------
|
|
262
|
+
# Returns the product of a data sequence, which is Prod( data[i] ) .
|
|
263
|
+
#------------------------------------------------------------------------------------
|
|
264
|
+
|
|
265
|
+
def product
|
|
266
|
+
@product ||= DoubleDescriptive.product(@array_list)
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
#------------------------------------------------------------------------------------
|
|
270
|
+
# Returns the phi-quantile; that is, an element elem for which holds that phi percent
|
|
271
|
+
# of data elements are less than elem.
|
|
272
|
+
# @param phi double
|
|
273
|
+
#------------------------------------------------------------------------------------
|
|
274
|
+
|
|
275
|
+
def quantile(phi)
|
|
276
|
+
DoubleDescriptive.quantile(sorted_data, phi)
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
#------------------------------------------------------------------------------------
|
|
280
|
+
# Returns how many percent of the elements contained in the receiver are <= element.
|
|
281
|
+
# @param elmt double
|
|
282
|
+
#------------------------------------------------------------------------------------
|
|
283
|
+
|
|
284
|
+
def quantile_inverse(elmt)
|
|
285
|
+
DoubleDescriptive.quantileInverse(sorted_data, elmt)
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
#------------------------------------------------------------------------------------
|
|
289
|
+
# @param percentages the percentages for which quantiles are to be computed. Each
|
|
290
|
+
# percentage must be in the interval [0.0,1.0].
|
|
291
|
+
#------------------------------------------------------------------------------------
|
|
292
|
+
|
|
293
|
+
def quantiles(percs)
|
|
294
|
+
|
|
295
|
+
percs = Java::CernColtListTdouble::DoubleArrayList.new(percs.to_java(Java::double))
|
|
296
|
+
res = DoubleDescriptive.quantiles(sorted_data, percs)
|
|
297
|
+
res.elements().to_a
|
|
298
|
+
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
#------------------------------------------------------------------------------------
|
|
302
|
+
# Returns the linearly interpolated number of elements in a list less or equal to a
|
|
303
|
+
# given element. The rank is the number of elements <= element. Ranks are of the form
|
|
304
|
+
# {0, 1, 2,..., sortedList.size()}. If no element is <= element, then the rank is
|
|
305
|
+
# zero. If the element lies in between two contained elements, then linear
|
|
306
|
+
# interpolation is used and a non integer value is returned.
|
|
307
|
+
# @param elmt double
|
|
308
|
+
#------------------------------------------------------------------------------------
|
|
309
|
+
|
|
310
|
+
def rank_interpolated(elmt)
|
|
311
|
+
DoubleDescriptive.rankInterpolated(sorted_data, elmt)
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
#------------------------------------------------------------------------------------
|
|
315
|
+
# Returns the RMS (Root-Mean-Square) of a data sequence.
|
|
316
|
+
#------------------------------------------------------------------------------------
|
|
317
|
+
|
|
318
|
+
def rms
|
|
319
|
+
@rms ||= DoubleDescriptive.rms(list_size, sum_of_squares)
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
#------------------------------------------------------------------------------------
|
|
323
|
+
# Returns the sample covariance of two data sequences.
|
|
324
|
+
# That is cov(x,y) = (1/(size()-1)) * Sum((x[i]-mean(x)) * (y[i]-mean(y))) .
|
|
325
|
+
#------------------------------------------------------------------------------------
|
|
326
|
+
|
|
327
|
+
def sample_covariance(other_val)
|
|
328
|
+
other_val.reset_statistics
|
|
329
|
+
DoubleDescriptive.covariance(@array_list, other_val.array_list)
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
#------------------------------------------------------------------------------------
|
|
333
|
+
# Returns the sample kurtosis (aka excess) of a data sequence.
|
|
334
|
+
#------------------------------------------------------------------------------------
|
|
335
|
+
|
|
336
|
+
def sample_kurtosis
|
|
337
|
+
@sample_kurtosis ||=
|
|
338
|
+
DoubleDescriptive.sampleKurtosis(list_size, moment4, sample_variance)
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
#------------------------------------------------------------------------------------
|
|
342
|
+
# Return the standard error of the sample kurtosis. Ref: R.R. Sokal, F.J. Rohlf,
|
|
343
|
+
# Biometry: the principles and practice of statistics in biological research (W.H.
|
|
344
|
+
# Freeman and Company, New York, 1998, 3rd edition) p. 138.
|
|
345
|
+
#------------------------------------------------------------------------------------
|
|
346
|
+
|
|
347
|
+
def sample_kurtosis_standard_error
|
|
348
|
+
@sample_kurtosis_standard_error ||=
|
|
349
|
+
DoubleDescriptive.sampleKurtosisStandardError(list_size)
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
#------------------------------------------------------------------------------------
|
|
353
|
+
# Returns the sample skew of a data sequence.
|
|
354
|
+
#------------------------------------------------------------------------------------
|
|
355
|
+
|
|
356
|
+
def sample_skew
|
|
357
|
+
@sample_skew ||=
|
|
358
|
+
DoubleDescriptive.sampleSkew(list_size, moment3, sample_variance)
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
#------------------------------------------------------------------------------------
|
|
362
|
+
# Return the standard error of the sample skew. Ref: R.R. Sokal, F.J. Rohlf,
|
|
363
|
+
# Biometry: the principles and practice of statistics in biological research (W.H.
|
|
364
|
+
# Freeman and Company, New York, 1998, 3rd edition) p. 138.
|
|
365
|
+
#------------------------------------------------------------------------------------
|
|
366
|
+
|
|
367
|
+
def sample_skew_standard_error
|
|
368
|
+
@sample_skew_standard_error ||=
|
|
369
|
+
DoubleDescriptive.sampleSkewStandardError(list_size)
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
#------------------------------------------------------------------------------------
|
|
373
|
+
# Returns the sample standard deviation. Ref: R.R. Sokal, F.J. Rohlf, Biometry: the
|
|
374
|
+
# principles and practice of statistics in biological research (W.H. Freeman and
|
|
375
|
+
# Company, New York, 1998, 3rd edition) p. 53. The standard deviation calculated as
|
|
376
|
+
# the sqrt of the variance underestimates the unbiased standard deviation. It needs
|
|
377
|
+
# to be multiplied by this correction factor:
|
|
378
|
+
# 1) if (n > 30): Cn = 1+1/(4*(n-1)), else
|
|
379
|
+
# 2) Cn = Math.sqrt((n - 1) * 0.5) * Gamma.gamma((n - 1) * 0.5) / Gamma.gamma(n * 0.5)
|
|
380
|
+
# The sample standard deviation is Cn * size
|
|
381
|
+
#------------------------------------------------------------------------------------
|
|
382
|
+
|
|
383
|
+
def sample_standard_deviation
|
|
384
|
+
@sample_standard_deviation ||=
|
|
385
|
+
DoubleDescriptive.sampleStandardDeviation(list_size, sample_variance)
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
#------------------------------------------------------------------------------------
|
|
389
|
+
# Returns the sample variance of a data sequence.
|
|
390
|
+
#------------------------------------------------------------------------------------
|
|
391
|
+
|
|
392
|
+
def sample_variance
|
|
393
|
+
@sample_variance ||=
|
|
394
|
+
DoubleDescriptive.sampleVariance(list_size, sum, sum_of_squares)
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
#------------------------------------------------------------------------------------
|
|
398
|
+
# Returns the sample weighted variance of a data sequence.
|
|
399
|
+
# That is (sum_of_squared_products - sum_of_products * sum_of_products /
|
|
400
|
+
# sum_of_weights) / (sum_of_weights - 1)
|
|
401
|
+
# where:
|
|
402
|
+
# sum_of_weights = Sum ( weights[i] )
|
|
403
|
+
# sum_of_products = Sum ( data[i] * weights[i] )
|
|
404
|
+
# sum_of_squared_products = Sum( data[i] * data[i] * weights[i] )
|
|
405
|
+
#------------------------------------------------------------------------------------
|
|
406
|
+
|
|
407
|
+
def sample_weighted_variance(weights)
|
|
408
|
+
|
|
409
|
+
weights = Java::CernColtListTdouble::DoubleArrayList.new(weights.to_java(Java::double))
|
|
410
|
+
sum_of_weights = DoubleDescriptive.sum(weights)
|
|
411
|
+
sum_of_products, sum_of_squared_products = weighted_sums(weights)
|
|
412
|
+
DoubleDescriptive.sampleWeightedVariance(sum_of_weights, sum_of_products,
|
|
413
|
+
sum_of_squared_products)
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
#------------------------------------------------------------------------------------
|
|
417
|
+
# Returns the skew of a data sequence, which is moment(data,3,mean) /
|
|
418
|
+
# standardDeviation.
|
|
419
|
+
#------------------------------------------------------------------------------------
|
|
420
|
+
|
|
421
|
+
def skew
|
|
422
|
+
@skew ||= DoubleDescriptive.skew(moment3, standard_deviation)
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
#------------------------------------------------------------------------------------
|
|
426
|
+
# Splits (partitions) a list into sublists such that each sublist contains the
|
|
427
|
+
# elements with a given range. splitters= (a,b,c,...,y,z) defines the ranges [-inf,a),
|
|
428
|
+
# [a,b), [b,c), ..., [y,z), [z,inf].
|
|
429
|
+
# Examples:
|
|
430
|
+
# data = (1,2,3,4,5,8,8,8,10,11).
|
|
431
|
+
# splitters=(2,8) yields 3 bins: (1), (2,3,4,5) (8,8,8,10,11).
|
|
432
|
+
# splitters=() yields 1 bin: (1,2,3,4,5,8,8,8,10,11).
|
|
433
|
+
# splitters=(-5) yields 2 bins: (), (1,2,3,4,5,8,8,8,10,11).
|
|
434
|
+
# splitters=(100) yields 2 bins: (1,2,3,4,5,8,8,8,10,11), ().
|
|
435
|
+
# @para splitters - the points at which the list shall be partitioned (must be sorted
|
|
436
|
+
# ascending).
|
|
437
|
+
# @return the sublists (an array with length == splitters.size() + 1. Each sublist is
|
|
438
|
+
# returned sorted ascending.
|
|
439
|
+
#------------------------------------------------------------------------------------
|
|
440
|
+
|
|
441
|
+
def split(splitters)
|
|
442
|
+
|
|
443
|
+
split = Java::CernColtListTdouble::DoubleArrayList.new(splitters.to_java(Java::double))
|
|
444
|
+
res = DoubleDescriptive.split(sorted_data, split)
|
|
445
|
+
lists = res.to_a
|
|
446
|
+
bins = Array.new
|
|
447
|
+
|
|
448
|
+
lists.each do |list|
|
|
449
|
+
list.trimToSize()
|
|
450
|
+
bins << list.elements().to_a
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
bins
|
|
454
|
+
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
#------------------------------------------------------------------------------------
|
|
458
|
+
# Returns a list with the sorted elements
|
|
459
|
+
#------------------------------------------------------------------------------------
|
|
460
|
+
|
|
461
|
+
def sort
|
|
462
|
+
sorted_data
|
|
463
|
+
@sorted_data.trimToSize()
|
|
464
|
+
@sorted_data.elements.to_a
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
#------------------------------------------------------------------------------------
|
|
468
|
+
#
|
|
469
|
+
#------------------------------------------------------------------------------------
|
|
470
|
+
|
|
471
|
+
def sorted_data
|
|
472
|
+
|
|
473
|
+
if (@sorted_data)
|
|
474
|
+
return @sorted_data
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
list = @array_list.clone().elements()
|
|
478
|
+
comp = Proc.new { |val1, val2| val1 <=> val2 }
|
|
479
|
+
Java::CernColt::Sorting.parallelQuickSort(list, 0, @array_list.size(), comp)
|
|
480
|
+
@sorted_data = Java::CernColtListTdouble::DoubleArrayList.new(list)
|
|
481
|
+
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
#------------------------------------------------------------------------------------
|
|
485
|
+
# Returns the standard deviation from a variance.
|
|
486
|
+
#------------------------------------------------------------------------------------
|
|
487
|
+
|
|
488
|
+
def standard_deviation
|
|
489
|
+
@standard_deviation ||= DoubleDescriptive.standardDeviation(variance)
|
|
490
|
+
end
|
|
491
|
+
|
|
492
|
+
#------------------------------------------------------------------------------------
|
|
493
|
+
# Returns the standard error of a data sequence.
|
|
494
|
+
#------------------------------------------------------------------------------------
|
|
495
|
+
|
|
496
|
+
def standard_error
|
|
497
|
+
@standard_error ||= DoubleDescriptive.standardError(list_size, variance)
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
#------------------------------------------------------------------------------------
|
|
501
|
+
# Modifies a data sequence to be standardized. Changes each element data[i] as
|
|
502
|
+
# follows: data[i] = (data[i]-mean)/standardDeviation.
|
|
503
|
+
#------------------------------------------------------------------------------------
|
|
504
|
+
|
|
505
|
+
def standardize!
|
|
506
|
+
DoubleDescriptive.standardize(@array_list, mean, standard_deviation)
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
#------------------------------------------------------------------------------------
|
|
510
|
+
# Returns the sum of a data sequence.
|
|
511
|
+
#------------------------------------------------------------------------------------
|
|
512
|
+
|
|
513
|
+
def sum
|
|
514
|
+
@sum ||= DoubleDescriptive.sum(@array_list)
|
|
515
|
+
end
|
|
516
|
+
|
|
517
|
+
#------------------------------------------------------------------------------------
|
|
518
|
+
# Returns the sum of inversions of a data sequence, which is Sum( 1.0 / data[i]).
|
|
519
|
+
#------------------------------------------------------------------------------------
|
|
520
|
+
|
|
521
|
+
def sum_of_inversions(from = 0, to = list_size - 1)
|
|
522
|
+
@sum_of_inversions ||= DoubleDescriptive.sumOfInversions(@array_list, from, to)
|
|
523
|
+
end
|
|
524
|
+
|
|
525
|
+
#------------------------------------------------------------------------------------
|
|
526
|
+
# Returns the sum of logarithms of a data sequence, which is Sum( Log(data[i]).
|
|
527
|
+
#------------------------------------------------------------------------------------
|
|
528
|
+
|
|
529
|
+
def sum_of_logarithms(from = 0, to = list_size - 1)
|
|
530
|
+
@sum_of_logarithms ||= DoubleDescriptive.sumOfLogarithms(@array_list, from, to)
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
#------------------------------------------------------------------------------------
|
|
534
|
+
# Returns Sum( (data[i]-c)k ); optimized for common parameters like c == 0.0 and/or
|
|
535
|
+
# k == -2
|
|
536
|
+
#------------------------------------------------------------------------------------
|
|
537
|
+
|
|
538
|
+
def sum_of_power_deviations(k, c)
|
|
539
|
+
DoubleDescriptive.sumOfPowerDeviations(@array_list, k, c)
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
#------------------------------------------------------------------------------------
|
|
543
|
+
# Returns the sum of powers of a data sequence, which is Sum ( data[i]k ).
|
|
544
|
+
#------------------------------------------------------------------------------------
|
|
545
|
+
|
|
546
|
+
def sum_of_powers(k)
|
|
547
|
+
DoubleDescriptive.sumOfPowers(@array_list, k)
|
|
548
|
+
end
|
|
549
|
+
|
|
550
|
+
#------------------------------------------------------------------------------------
|
|
551
|
+
# Returns the sum of the product with another array.T
|
|
552
|
+
# hat is, Sum( data[i] * other_val[i] )
|
|
553
|
+
# @param other_val: ruby array or a CernColtListTdouble::DoubleArrayList (when called
|
|
554
|
+
# internally.
|
|
555
|
+
#------------------------------------------------------------------------------------
|
|
556
|
+
|
|
557
|
+
def weighted_sums(other_val, from = 0, to = list_size - 1)
|
|
558
|
+
|
|
559
|
+
if (other_val.is_a? Array)
|
|
560
|
+
weights = Java::CernColtListTdouble::DoubleArrayList.new(other_val.to_java(Java::double))
|
|
561
|
+
elsif (other_val.is_a? Java::CernColtListTdouble::DoubleArrayList)
|
|
562
|
+
weights = other_val
|
|
563
|
+
else
|
|
564
|
+
raise "#{other_val} is not a valid weight array"
|
|
565
|
+
end
|
|
566
|
+
|
|
567
|
+
in_out = [0.0, 0.0].to_java Java::double
|
|
568
|
+
DoubleDescriptive.incrementalWeightedUpdate(@array_list, weights, from, to, in_out)
|
|
569
|
+
[in_out[0], in_out[1]]
|
|
570
|
+
|
|
571
|
+
end
|
|
572
|
+
|
|
573
|
+
#------------------------------------------------------------------------------------
|
|
574
|
+
# Returns the sum of squared mean deviation of of a data sequence.
|
|
575
|
+
#------------------------------------------------------------------------------------
|
|
576
|
+
|
|
577
|
+
def sum_of_squared_deviations
|
|
578
|
+
@sum_of_square_deviations ||=
|
|
579
|
+
DoubleDescriptive.sumOfSquaredDeviations(list_size, variance)
|
|
580
|
+
end
|
|
581
|
+
|
|
582
|
+
#------------------------------------------------------------------------------------
|
|
583
|
+
# Returns the sum of squares of a data sequence.
|
|
584
|
+
#------------------------------------------------------------------------------------
|
|
585
|
+
|
|
586
|
+
def sum_of_squares
|
|
587
|
+
@sum_of_squares ||= DoubleDescriptive.sumOfSquares(@array_list)
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
#------------------------------------------------------------------------------------
|
|
591
|
+
# Returns the trimmed mean of a sorted data sequence.
|
|
592
|
+
#------------------------------------------------------------------------------------
|
|
593
|
+
|
|
594
|
+
def trimmed_mean(left = 0, right = 0)
|
|
595
|
+
DoubleDescriptive.trimmedMean(sorted_data, mean, left, right)
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
#------------------------------------------------------------------------------------
|
|
599
|
+
# Returns the variance from a standard deviation.
|
|
600
|
+
#------------------------------------------------------------------------------------
|
|
601
|
+
|
|
602
|
+
def variance
|
|
603
|
+
@variance ||=
|
|
604
|
+
DoubleDescriptive.variance(list_size, sum, sum_of_squares)
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
#------------------------------------------------------------------------------------
|
|
608
|
+
# Returns the weighted mean of a data sequence.
|
|
609
|
+
#------------------------------------------------------------------------------------
|
|
610
|
+
|
|
611
|
+
def weighted_mean(weights)
|
|
612
|
+
weights = Java::CernColtListTdouble::DoubleArrayList.new(weights.to_java(Java::double))
|
|
613
|
+
DoubleDescriptive.weightedMean(@array_list, weights)
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
#------------------------------------------------------------------------------------
|
|
617
|
+
# Returns the weighted RMS (Root-Mean-Square) of a data sequence.
|
|
618
|
+
#------------------------------------------------------------------------------------
|
|
619
|
+
|
|
620
|
+
def weighted_rms(weights)
|
|
621
|
+
|
|
622
|
+
weights = Java::CernColtListTdouble::DoubleArrayList.new(weights.to_java(Java::double))
|
|
623
|
+
sum_of_products, sum_of_squared_products = weighted_sums(weights)
|
|
624
|
+
DoubleDescriptive.weightedRMS(sum_of_products, sum_of_squared_products)
|
|
625
|
+
|
|
626
|
+
end
|
|
627
|
+
|
|
628
|
+
#------------------------------------------------------------------------------------
|
|
629
|
+
# Returns the winsorized mean of a sorted data sequence.
|
|
630
|
+
#------------------------------------------------------------------------------------
|
|
631
|
+
|
|
632
|
+
def winsorized_mean(left, right)
|
|
633
|
+
DoubleDescriptive.winsorizedMean(sorted_data, mean, left, right)
|
|
634
|
+
end
|
|
635
|
+
|
|
636
|
+
end # DoubleDescriptive
|