mdarray 0.4.3.pre-java → 0.5.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. data/README.md +82 -14
  2. data/Rakefile +8 -2
  3. data/doc/BinaryOperator.html +227 -0
  4. data/doc/BitwiseOperators.html +135 -0
  5. data/doc/BooleanFunctions.html +135 -0
  6. data/doc/BooleanMDArray.html +193 -0
  7. data/doc/ByteMDArray.html +271 -0
  8. data/doc/Colt.html +269 -0
  9. data/doc/ComparisonOperators.html +135 -0
  10. data/doc/Const.html +490 -0
  11. data/doc/Csv.html +589 -0
  12. data/doc/DDescriptive.html +4373 -0
  13. data/doc/DoubleMDArray.html +555 -0
  14. data/doc/DoubleStatList.html +256 -0
  15. data/doc/FastBinaryOperator.html +743 -0
  16. data/doc/FastBoolean.html +512 -0
  17. data/doc/FastUnaryOperator.html +583 -0
  18. data/doc/FloatMDArray.html +237 -0
  19. data/doc/FunctionCreation.html +520 -0
  20. data/doc/FunctionMap.html +813 -0
  21. data/doc/GenericFunctions.html +135 -0
  22. data/doc/IntMDArray.html +259 -0
  23. data/doc/LongMDArray.html +257 -0
  24. data/doc/MDArray.html +9639 -0
  25. data/doc/MDArray/Counter.html +2767 -0
  26. data/doc/MDArray/IteratorFast.html +872 -0
  27. data/doc/MDArray/IteratorFastBoolean.html +384 -0
  28. data/doc/MDArray/IteratorFastByte.html +373 -0
  29. data/doc/MDArray/IteratorFastChar.html +384 -0
  30. data/doc/MDArray/IteratorFastDouble.html +384 -0
  31. data/doc/MDArray/IteratorFastFloat.html +384 -0
  32. data/doc/MDArray/IteratorFastInt.html +384 -0
  33. data/doc/MDArray/IteratorFastLong.html +384 -0
  34. data/doc/MDArray/IteratorFastShort.html +384 -0
  35. data/doc/MDArrayTest.html +125 -0
  36. data/doc/NonNumericalMDArray.html +177 -0
  37. data/doc/NumericFunctions.html +297 -0
  38. data/doc/NumericalMDArray.html +278 -0
  39. data/doc/Operator.html +826 -0
  40. data/doc/Proc.html +1097 -0
  41. data/doc/RubyBinaryOperator.html +526 -0
  42. data/doc/RubyFunctions.html +232 -0
  43. data/doc/RubyMath.html +135 -0
  44. data/doc/RubyStats.html +135 -0
  45. data/doc/RubyUnaryOperator.html +510 -0
  46. data/doc/ShortMDArray.html +267 -0
  47. data/doc/StatList.html +1176 -0
  48. data/doc/StringMDArray.html +181 -0
  49. data/doc/StructureMDArray.html +181 -0
  50. data/doc/UnaryOperator.html +227 -0
  51. data/doc/UserFunction.html +297 -0
  52. data/doc/_index.html +467 -0
  53. data/doc/class_list.html +53 -0
  54. data/doc/css/common.css +1 -0
  55. data/doc/css/full_list.css +57 -0
  56. data/doc/css/style.css +338 -0
  57. data/doc/file.README.html +173 -0
  58. data/doc/file_list.html +55 -0
  59. data/doc/frames.html +28 -0
  60. data/doc/index.html +173 -0
  61. data/doc/js/app.js +214 -0
  62. data/doc/js/full_list.js +173 -0
  63. data/doc/js/jquery.js +4 -0
  64. data/doc/method_list.html +2524 -0
  65. data/doc/top-level-namespace.html +114 -0
  66. data/lib/colt/colt.rb +56 -0
  67. data/lib/colt/colt_mdarray.rb +95 -0
  68. data/lib/colt/double_descriptive.rb +636 -0
  69. data/lib/colt/probability.rb +12 -0
  70. data/lib/colt/stat_list.rb +192 -0
  71. data/lib/env.rb +5 -1
  72. data/lib/mdarray.rb +5 -0
  73. data/lib/mdarray/access.rb +2 -2
  74. data/lib/mdarray/creation.rb +34 -19
  75. data/lib/mdarray/csv.rb +61 -0
  76. data/lib/mdarray/fast_operators.rb +241 -0
  77. data/lib/mdarray/operators.rb +18 -0
  78. data/lib/mdarray/ruby_operators.rb +2 -18
  79. data/lib/mdarray/ruby_stats.rb +1 -1
  80. data/lib/mdarray/views.rb +8 -8
  81. data/target/helper.jar +0 -0
  82. data/test/colt/VALE3.csv +3437 -0
  83. data/test/colt/VALE3.xlsx +0 -0
  84. data/test/colt/VALE3_short-err.csv +20 -0
  85. data/test/colt/VALE3_short.csv +20 -0
  86. data/test/colt/VALE3_short.xlsx +0 -0
  87. data/test/colt/test_complete.rb +25 -0
  88. data/test/colt/test_stat_list.rb +128 -0
  89. data/test/colt/test_statistics.rb +114 -0
  90. data/test/complete.rb +25 -0
  91. data/test/env.rb +54 -30
  92. data/test/{arithmetic_casting.rb → mdarray/arithmetic_casting.rb} +0 -0
  93. data/test/{test_access.rb → mdarray/test_access.rb} +0 -0
  94. data/test/{test_boolean.rb → mdarray/test_boolean.rb} +0 -2
  95. data/test/{test_comparison.rb → mdarray/test_comparison.rb} +0 -0
  96. data/test/{test_complete.rb → mdarray/test_complete.rb} +3 -14
  97. data/test/{test_counter.rb → mdarray/test_counter.rb} +0 -0
  98. data/test/{test_creation.rb → mdarray/test_creation.rb} +0 -0
  99. data/test/{test_error.rb → mdarray/test_error.rb} +8 -5
  100. data/test/{test_operator.rb → mdarray/test_operator.rb} +6 -2
  101. data/test/{test_speed.rb → mdarray/test_performance.rb} +74 -39
  102. data/test/{test_printing.rb → mdarray/test_printing.rb} +0 -1
  103. data/test/{test_shape.rb → mdarray/test_shape.rb} +0 -0
  104. data/test/mdarray/test_statistics.rb +80 -0
  105. data/test/{test_trigonometry.rb → mdarray/test_trigonometry.rb} +0 -0
  106. data/test/{test_views.rb → mdarray/test_views.rb} +0 -0
  107. data/vendor/parallelcolt-0.10.0.jar +0 -0
  108. data/version.rb +1 -1
  109. metadata +122 -39
  110. data/test/test_lazy.rb +0 -52
  111. data/test/test_statistics.rb +0 -38
@@ -0,0 +1,114 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
2
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4
+ <head>
5
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
6
+ <title>
7
+ Top Level Namespace
8
+
9
+ &mdash; Documentation by YARD 0.8.5.2
10
+
11
+ </title>
12
+
13
+ <link rel="stylesheet" href="css/style.css" type="text/css" media="screen" charset="utf-8" />
14
+
15
+ <link rel="stylesheet" href="css/common.css" type="text/css" media="screen" charset="utf-8" />
16
+
17
+ <script type="text/javascript" charset="utf-8">
18
+ hasFrames = window.top.frames.main ? true : false;
19
+ relpath = '';
20
+ framesUrl = "frames.html#!" + escape(window.location.href);
21
+ </script>
22
+
23
+
24
+ <script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
25
+
26
+ <script type="text/javascript" charset="utf-8" src="js/app.js"></script>
27
+
28
+
29
+ </head>
30
+ <body>
31
+ <div id="header">
32
+ <div id="menu">
33
+
34
+ <a href="_index.html">Index</a> &raquo;
35
+
36
+
37
+ <span class="title">Top Level Namespace</span>
38
+
39
+
40
+ <div class="noframes"><span class="title">(</span><a href="." target="_top">no frames</a><span class="title">)</span></div>
41
+ </div>
42
+
43
+ <div id="search">
44
+
45
+ <a class="full_list_link" id="class_list_link"
46
+ href="class_list.html">
47
+ Class List
48
+ </a>
49
+
50
+ <a class="full_list_link" id="method_list_link"
51
+ href="method_list.html">
52
+ Method List
53
+ </a>
54
+
55
+ <a class="full_list_link" id="file_list_link"
56
+ href="file_list.html">
57
+ File List
58
+ </a>
59
+
60
+ </div>
61
+ <div class="clear"></div>
62
+ </div>
63
+
64
+ <iframe id="search_frame"></iframe>
65
+
66
+ <div id="content"><h1>Top Level Namespace
67
+
68
+
69
+
70
+ </h1>
71
+
72
+ <dl class="box">
73
+
74
+
75
+
76
+
77
+
78
+
79
+
80
+
81
+ </dl>
82
+ <div class="clear"></div>
83
+
84
+ <h2>Defined Under Namespace</h2>
85
+ <p class="children">
86
+
87
+
88
+ <strong class="modules">Modules:</strong> <span class='object_link'><a href="BitwiseOperators.html" title="BitwiseOperators (module)">BitwiseOperators</a></span>, <span class='object_link'><a href="BooleanFunctions.html" title="BooleanFunctions (module)">BooleanFunctions</a></span>, <span class='object_link'><a href="ComparisonOperators.html" title="ComparisonOperators (module)">ComparisonOperators</a></span>, <span class='object_link'><a href="DDescriptive.html" title="DDescriptive (module)">DDescriptive</a></span>, <span class='object_link'><a href="FunctionCreation.html" title="FunctionCreation (module)">FunctionCreation</a></span>, <span class='object_link'><a href="GenericFunctions.html" title="GenericFunctions (module)">GenericFunctions</a></span>, <span class='object_link'><a href="NumericFunctions.html" title="NumericFunctions (module)">NumericFunctions</a></span>, <span class='object_link'><a href="RubyFunctions.html" title="RubyFunctions (module)">RubyFunctions</a></span>, <span class='object_link'><a href="RubyMath.html" title="RubyMath (module)">RubyMath</a></span>, <span class='object_link'><a href="RubyStats.html" title="RubyStats (module)">RubyStats</a></span>, <span class='object_link'><a href="UserFunction.html" title="UserFunction (module)">UserFunction</a></span>
89
+
90
+
91
+
92
+ <strong class="classes">Classes:</strong> <span class='object_link'><a href="BinaryOperator.html" title="BinaryOperator (class)">BinaryOperator</a></span>, <span class='object_link'><a href="BooleanMDArray.html" title="BooleanMDArray (class)">BooleanMDArray</a></span>, <span class='object_link'><a href="ByteMDArray.html" title="ByteMDArray (class)">ByteMDArray</a></span>, <span class='object_link'><a href="Colt.html" title="Colt (class)">Colt</a></span>, <span class='object_link'><a href="Const.html" title="Const (class)">Const</a></span>, <span class='object_link'><a href="Csv.html" title="Csv (class)">Csv</a></span>, <span class='object_link'><a href="DoubleMDArray.html" title="DoubleMDArray (class)">DoubleMDArray</a></span>, <span class='object_link'><a href="DoubleStatList.html" title="DoubleStatList (class)">DoubleStatList</a></span>, <span class='object_link'><a href="FastBinaryOperator.html" title="FastBinaryOperator (class)">FastBinaryOperator</a></span>, <span class='object_link'><a href="FastUnaryOperator.html" title="FastUnaryOperator (class)">FastUnaryOperator</a></span>, <span class='object_link'><a href="FloatMDArray.html" title="FloatMDArray (class)">FloatMDArray</a></span>, <span class='object_link'><a href="FunctionMap.html" title="FunctionMap (class)">FunctionMap</a></span>, <span class='object_link'><a href="IntMDArray.html" title="IntMDArray (class)">IntMDArray</a></span>, <span class='object_link'><a href="LongMDArray.html" title="LongMDArray (class)">LongMDArray</a></span>, <span class='object_link'><a href="MDArray.html" title="MDArray (class)">MDArray</a></span>, <span class='object_link'><a href="NonNumericalMDArray.html" title="NonNumericalMDArray (class)">NonNumericalMDArray</a></span>, <span class='object_link'><a href="NumericalMDArray.html" title="NumericalMDArray (class)">NumericalMDArray</a></span>, <span class='object_link'><a href="Operator.html" title="Operator (class)">Operator</a></span>, <span class='object_link'><a href="Proc.html" title="Proc (class)">Proc</a></span>, <span class='object_link'><a href="RubyBinaryOperator.html" title="RubyBinaryOperator (class)">RubyBinaryOperator</a></span>, <span class='object_link'><a href="RubyUnaryOperator.html" title="RubyUnaryOperator (class)">RubyUnaryOperator</a></span>, <span class='object_link'><a href="ShortMDArray.html" title="ShortMDArray (class)">ShortMDArray</a></span>, <span class='object_link'><a href="StatList.html" title="StatList (class)">StatList</a></span>, <span class='object_link'><a href="StringMDArray.html" title="StringMDArray (class)">StringMDArray</a></span>, <span class='object_link'><a href="StructureMDArray.html" title="StructureMDArray (class)">StructureMDArray</a></span>, <span class='object_link'><a href="UnaryOperator.html" title="UnaryOperator (class)">UnaryOperator</a></span>
93
+
94
+
95
+ </p>
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+ </div>
106
+
107
+ <div id="footer">
108
+ Generated on Thu May 16 12:48:34 2013 by
109
+ <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
110
+ 0.8.5.2 (ruby-1.9.3).
111
+ </div>
112
+
113
+ </body>
114
+ </html>
@@ -0,0 +1,56 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation for educational, research, and
6
+ # not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
7
+ # granted, provided that the above copyright notice, this paragraph and the following two
8
+ # paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
9
+ # Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
10
+ #
11
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
12
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
13
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
14
+ # POSSIBILITY OF SUCH DAMAGE.
15
+ #
16
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
18
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
19
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
20
+ # OR MODIFICATIONS.
21
+ ##########################################################################################
22
+
23
+ ##########################################################################################
24
+ #
25
+ ##########################################################################################
26
+
27
+ class Colt
28
+
29
+ #------------------------------------------------------------------------------------
30
+ #
31
+ #------------------------------------------------------------------------------------
32
+
33
+ def self.processors
34
+ Java::EduEmoryMathcsUtils::ConcurrencyUtils.get_number_of_processors
35
+ end
36
+
37
+ #------------------------------------------------------------------------------------
38
+ #
39
+ #------------------------------------------------------------------------------------
40
+
41
+ def self.threads
42
+ Java::EduEmoryMathcsUtils::ConcurrencyUtils.get_number_of_threads
43
+ end
44
+
45
+ #------------------------------------------------------------------------------------
46
+ #
47
+ #------------------------------------------------------------------------------------
48
+
49
+
50
+ end # Colt
51
+
52
+ require_relative 'stat_list'
53
+ require_relative 'colt_mdarray'
54
+
55
+ # MDArray.functions = "CernFunctions"
56
+
@@ -0,0 +1,95 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation for educational, research, and
6
+ # not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
7
+ # granted, provided that the above copyright notice, this paragraph and the following two
8
+ # paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
9
+ # Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
10
+ #
11
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
12
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
13
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
14
+ # POSSIBILITY OF SUCH DAMAGE.
15
+ #
16
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
18
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
19
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
20
+ # OR MODIFICATIONS.
21
+ ##########################################################################################
22
+
23
+ ##########################################################################################
24
+ # Reopens class MDArray so that we can add helper methods to interface with Colt
25
+ ##########################################################################################
26
+
27
+
28
+ class DoubleMDArray
29
+
30
+ attr_reader :stat_list
31
+
32
+ #------------------------------------------------------------------------------------
33
+ # Converts the mdarray to an DoubleArrayList usable by Parallel Colt
34
+ #------------------------------------------------------------------------------------
35
+
36
+ def reset_statistics
37
+
38
+ base_array = @nc_array.get1DJavaArray(Java::double.java_class)
39
+ double_array_list = Java::CernColtListTdouble::DoubleArrayList.new(base_array)
40
+ @stat_list = DoubleStatList.new(double_array_list)
41
+
42
+ end
43
+
44
+ #------------------------------------------------------------------------------------
45
+ #
46
+ #------------------------------------------------------------------------------------
47
+
48
+ private
49
+
50
+ #------------------------------------------------------------------------------------
51
+ #
52
+ #------------------------------------------------------------------------------------
53
+
54
+ def self.colt_stats
55
+
56
+ stats = [:array_list, :auto_correlation, :correlation, :covariance, :durbin_watson,
57
+ :frequencies,
58
+ :geometric_mean, :harmonic_mean, :kurtosis, :lag1, :list_size, :max, :mean,
59
+ :mean_deviation, :median, :min, :moment, :moment3, :moment4, :pooled_mean,
60
+ :pooled_variance, :product, :quantile, :quantile_inverse, :quantiles,
61
+ :rank_interpolated, :rms, :sample_covariance, :sample_kurtosis,
62
+ :sample_kurtosis_standard_error,
63
+ :sample_skew, :sample_skew_standard_error, :sample_standard_deviation,
64
+ :sample_variance, :sample_weighted_variance, :skew, :split,
65
+ :standard_deviation, :standard_error, :sum,
66
+ :sum_of_inversions, :sum_of_logarithms, :sum_of_powers,
67
+ :sum_of_power_deviations, :sum_of_squares, :sum_of_squared_deviations,
68
+ :trimmed_mean,
69
+ :variance, :weighted_mean, :weighted_rms, :weighted_sums, :winsorized_mean]
70
+
71
+ =begin
72
+ # undefine all methods. Need for now, on furture versions we should be able to
73
+ # have many ways of executing the same method, so there should be no need to
74
+ # undef a method.
75
+ stats.each do |method|
76
+ remove_method(:max)
77
+ end
78
+ =end
79
+
80
+ # define all statistics methods from colt
81
+ stats.each do |method|
82
+ define_method(method) { |*args| @stat_list.send(method, *args) }
83
+ end
84
+
85
+ define_method(:standardize) do |*args|
86
+ @stat_list.standardize!(*args)
87
+ @stat_list.elements
88
+ end
89
+
90
+ end
91
+
92
+ self.colt_stats
93
+
94
+ end # MDArray
95
+
@@ -0,0 +1,636 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation for educational, research, and
6
+ # not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
7
+ # granted, provided that the above copyright notice, this paragraph and the following two
8
+ # paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
9
+ # Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
10
+ #
11
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
12
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
13
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
14
+ # POSSIBILITY OF SUCH DAMAGE.
15
+ #
16
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
18
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
19
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
20
+ # OR MODIFICATIONS.
21
+ ##########################################################################################
22
+
23
+ ##########################################################################################
24
+ #
25
+ ##########################################################################################
26
+
27
+ require 'java'
28
+
29
+ module DDescriptive
30
+ include_package "cern.jet.stat.tdouble"
31
+
32
+ #------------------------------------------------------------------------------------
33
+ #
34
+ #------------------------------------------------------------------------------------
35
+
36
+ def reset_statistics
37
+
38
+ @distinct_values = nil
39
+ @durbin_watson = nil
40
+ @frequencies = nil
41
+ @geometric_mean = nil
42
+ @kurtosis = nil
43
+ @lag1 = nil
44
+ @max = nil
45
+ @mean = nil
46
+ @mean_deviation = nil
47
+ @median = nil
48
+ @min = nil
49
+ @moment3 = nil
50
+ @moment4 = nil
51
+ @product = nil
52
+ @sample_kurtosis = nil
53
+ @sample_kurtosis_standard_error = nil
54
+ @sample_skew = nil
55
+ @sample_skew_standard_error = nil
56
+ @sample_standard_deviation = nil
57
+ @sample_variance = nil
58
+ @sample_weighted_variance = nil
59
+ @list_size = nil
60
+ @skew = nil
61
+ @sorted_data = nil
62
+ @standard_deviation = nil
63
+ @standard_error = nil
64
+ @sum = nil
65
+ @sum_of_inversions = nil
66
+ @sum_of_logarithms = nil
67
+ @sum_of_squared_deviations = nil
68
+ @sum_of_squares = nil
69
+ @variance = nil
70
+ @weighted_rms = nil
71
+
72
+ end
73
+
74
+ #------------------------------------------------------------------------------------
75
+ # Returns the auto-correlation of a data sequence.
76
+ # @param lag lag between the two measures to auto correlate
77
+ #------------------------------------------------------------------------------------
78
+
79
+ def auto_correlation(lag)
80
+ DoubleDescriptive.autoCorrelation(@array_list, lag, mean, variance)
81
+ end
82
+
83
+ #------------------------------------------------------------------------------------
84
+ # Returns the correlation of two data sequences.
85
+ # That is covariance(data1,data2)/(standardDev1*standardDev2).
86
+ #------------------------------------------------------------------------------------
87
+
88
+ def correlation(other_val)
89
+ covariance(other_val) / (standard_deviation * other_val.standard_deviation)
90
+ end
91
+
92
+ #------------------------------------------------------------------------------------
93
+ # Returns the covariance of two data sequences.
94
+ # That is cov(x,y) = Sum((x[i]-mean(x)) * (y[i]-mean(y))) / size().
95
+ #------------------------------------------------------------------------------------
96
+
97
+ def covariance(other_val)
98
+ sample_covariance(other_val) * (list_size - 1) / list_size
99
+ end
100
+
101
+ #------------------------------------------------------------------------------------
102
+ # Durbin-Watson computation.
103
+ #------------------------------------------------------------------------------------
104
+
105
+ def durbin_watson
106
+ @durbin_watson ||= DoubleDescriptive.durbinWatson(@array_list)
107
+ end
108
+
109
+ #------------------------------------------------------------------------------------
110
+ # Computes the frequency (number of occurances, count) of each distinct value in the
111
+ # given sorted data.
112
+ #------------------------------------------------------------------------------------
113
+
114
+ def frequencies
115
+
116
+ if (@frequencies == nil)
117
+ distinct_values = Java::CernColtListTdouble::DoubleArrayList.new
118
+ frequencies = Java::CernColtListTint::IntArrayList.new
119
+ DoubleDescriptive.frequencies(sorted_data, distinct_values, frequencies)
120
+ distinct_values.trimToSize()
121
+ frequencies.trimToSize()
122
+ @distinct_values = distinct_values.elements().to_a
123
+ @frequencies = frequencies.elements().to_a
124
+ end
125
+
126
+ { :distinct_values => @distinct_values, :frequencies => @frequencies}
127
+
128
+ end
129
+
130
+ #------------------------------------------------------------------------------------
131
+ # Returns the geometric mean of a data sequence.
132
+ #------------------------------------------------------------------------------------
133
+
134
+ def geometric_mean
135
+ @geometric_mean ||= DoubleDescriptive.geometricMean(@array_list)
136
+ end
137
+
138
+ #------------------------------------------------------------------------------------
139
+ # Returns the harmonic mean of a data sequence.
140
+ #------------------------------------------------------------------------------------
141
+
142
+ def harmonic_mean
143
+ @harmonic_mean ||= DoubleDescriptive.harmonicMean(list_size, sum_of_inversions)
144
+ end
145
+
146
+ #------------------------------------------------------------------------------------
147
+ # Returns the kurtosis (aka excess) of a data sequence, which is -3 +
148
+ # moment(data,4,mean) / standardDeviation4.
149
+ #------------------------------------------------------------------------------------
150
+
151
+ def kurtosis
152
+ @kurtosis ||=
153
+ DoubleDescriptive.kurtosis(moment4, standard_deviation)
154
+ end
155
+
156
+ #------------------------------------------------------------------------------------
157
+ # Returns the lag-1 autocorrelation of a dataset; Note that this method has semantics
158
+ # different from autoCorrelation(..., 1);
159
+ #------------------------------------------------------------------------------------
160
+
161
+ def lag1
162
+ @lag1 ||= DoubleDescriptive.lag1(@array_list, mean)
163
+ end
164
+
165
+ #------------------------------------------------------------------------------------
166
+ #
167
+ #------------------------------------------------------------------------------------
168
+
169
+ def list_size
170
+ @list_size ||= @array_list.size
171
+ end
172
+
173
+ #------------------------------------------------------------------------------------
174
+ # Returns the largest member of a data sequence.
175
+ #------------------------------------------------------------------------------------
176
+
177
+ def max
178
+ @max ||= DoubleDescriptive.max(@array_list)
179
+ end
180
+
181
+ #------------------------------------------------------------------------------------
182
+ # Returns the arithmetic mean of a data sequence; That is Sum( data[i] ) / data.size()
183
+ #------------------------------------------------------------------------------------
184
+
185
+ def mean
186
+ @mean ||= DoubleDescriptive.mean(@array_list)
187
+ end
188
+
189
+ #------------------------------------------------------------------------------------
190
+ # Returns the mean deviation of a dataset.
191
+ #------------------------------------------------------------------------------------
192
+
193
+ def mean_deviation
194
+ @mean_deviation ||= DoubleDescriptive.meanDeviation(@array_list, mean)
195
+ end
196
+
197
+ #------------------------------------------------------------------------------------
198
+ # Returns the median of a sorted data sequence.
199
+ #------------------------------------------------------------------------------------
200
+
201
+ def median
202
+ @median ||= DoubleDescriptive.median(sorted_data)
203
+ end
204
+
205
+ #------------------------------------------------------------------------------------
206
+ # Returns the smallest member of a data sequence.
207
+ #------------------------------------------------------------------------------------
208
+
209
+ def min
210
+ @min ||= DoubleDescriptive.min(@array_list)
211
+ end
212
+
213
+ #------------------------------------------------------------------------------------
214
+ # Returns the moment of k-th order with constant c of a data sequence, which is
215
+ # Sum( (data[i]-c)k ) / data.size().
216
+ # @param k integer
217
+ # @param c double
218
+ #------------------------------------------------------------------------------------
219
+
220
+ def moment(k, c)
221
+ DoubleDescriptive.moment(@array_list, k, c)
222
+ end
223
+
224
+ #------------------------------------------------------------------------------------
225
+ # The third central moment. That is: moment(data,3,mean)
226
+ #------------------------------------------------------------------------------------
227
+
228
+ def moment3
229
+ @moment3 ||= moment(3, mean)
230
+ end
231
+
232
+ #------------------------------------------------------------------------------------
233
+ #
234
+ #------------------------------------------------------------------------------------
235
+
236
+ def moment4
237
+ @moment4 ||= moment(4, mean)
238
+ end
239
+
240
+ #------------------------------------------------------------------------------------
241
+ # Returns the pooled mean of two data sequences.
242
+ # That is (size1 * mean1 + size2 * mean2) / (size1 + size2).
243
+ #------------------------------------------------------------------------------------
244
+
245
+ def pooled_mean(other_val)
246
+ other_val.reset_statistics
247
+ DoubleDescriptive.pooledMean(list_size, mean, other_val.list_size, other_val.mean)
248
+ end
249
+
250
+ #------------------------------------------------------------------------------------
251
+ # Returns the pooled variance of two data sequences.
252
+ # That is: size1 * variance1 + size2 * variance2) / (size1 + size2)
253
+ #------------------------------------------------------------------------------------
254
+
255
+ def pooled_variance(other_val)
256
+ other_val.reset_statistics
257
+ DoubleDescriptive.pooledVariance(list_size, variance, other_val.list_size,
258
+ other_val.variance)
259
+ end
260
+
261
+ #------------------------------------------------------------------------------------
262
+ # Returns the product of a data sequence, which is Prod( data[i] ) .
263
+ #------------------------------------------------------------------------------------
264
+
265
+ def product
266
+ @product ||= DoubleDescriptive.product(@array_list)
267
+ end
268
+
269
+ #------------------------------------------------------------------------------------
270
+ # Returns the phi-quantile; that is, an element elem for which holds that phi percent
271
+ # of data elements are less than elem.
272
+ # @param phi double
273
+ #------------------------------------------------------------------------------------
274
+
275
+ def quantile(phi)
276
+ DoubleDescriptive.quantile(sorted_data, phi)
277
+ end
278
+
279
+ #------------------------------------------------------------------------------------
280
+ # Returns how many percent of the elements contained in the receiver are <= element.
281
+ # @param elmt double
282
+ #------------------------------------------------------------------------------------
283
+
284
+ def quantile_inverse(elmt)
285
+ DoubleDescriptive.quantileInverse(sorted_data, elmt)
286
+ end
287
+
288
+ #------------------------------------------------------------------------------------
289
+ # @param percentages the percentages for which quantiles are to be computed. Each
290
+ # percentage must be in the interval [0.0,1.0].
291
+ #------------------------------------------------------------------------------------
292
+
293
+ def quantiles(percs)
294
+
295
+ percs = Java::CernColtListTdouble::DoubleArrayList.new(percs.to_java(Java::double))
296
+ res = DoubleDescriptive.quantiles(sorted_data, percs)
297
+ res.elements().to_a
298
+
299
+ end
300
+
301
+ #------------------------------------------------------------------------------------
302
+ # Returns the linearly interpolated number of elements in a list less or equal to a
303
+ # given element. The rank is the number of elements <= element. Ranks are of the form
304
+ # {0, 1, 2,..., sortedList.size()}. If no element is <= element, then the rank is
305
+ # zero. If the element lies in between two contained elements, then linear
306
+ # interpolation is used and a non integer value is returned.
307
+ # @param elmt double
308
+ #------------------------------------------------------------------------------------
309
+
310
+ def rank_interpolated(elmt)
311
+ DoubleDescriptive.rankInterpolated(sorted_data, elmt)
312
+ end
313
+
314
+ #------------------------------------------------------------------------------------
315
+ # Returns the RMS (Root-Mean-Square) of a data sequence.
316
+ #------------------------------------------------------------------------------------
317
+
318
+ def rms
319
+ @rms ||= DoubleDescriptive.rms(list_size, sum_of_squares)
320
+ end
321
+
322
+ #------------------------------------------------------------------------------------
323
+ # Returns the sample covariance of two data sequences.
324
+ # That is cov(x,y) = (1/(size()-1)) * Sum((x[i]-mean(x)) * (y[i]-mean(y))) .
325
+ #------------------------------------------------------------------------------------
326
+
327
+ def sample_covariance(other_val)
328
+ other_val.reset_statistics
329
+ DoubleDescriptive.covariance(@array_list, other_val.array_list)
330
+ end
331
+
332
+ #------------------------------------------------------------------------------------
333
+ # Returns the sample kurtosis (aka excess) of a data sequence.
334
+ #------------------------------------------------------------------------------------
335
+
336
+ def sample_kurtosis
337
+ @sample_kurtosis ||=
338
+ DoubleDescriptive.sampleKurtosis(list_size, moment4, sample_variance)
339
+ end
340
+
341
+ #------------------------------------------------------------------------------------
342
+ # Return the standard error of the sample kurtosis. Ref: R.R. Sokal, F.J. Rohlf,
343
+ # Biometry: the principles and practice of statistics in biological research (W.H.
344
+ # Freeman and Company, New York, 1998, 3rd edition) p. 138.
345
+ #------------------------------------------------------------------------------------
346
+
347
+ def sample_kurtosis_standard_error
348
+ @sample_kurtosis_standard_error ||=
349
+ DoubleDescriptive.sampleKurtosisStandardError(list_size)
350
+ end
351
+
352
+ #------------------------------------------------------------------------------------
353
+ # Returns the sample skew of a data sequence.
354
+ #------------------------------------------------------------------------------------
355
+
356
+ def sample_skew
357
+ @sample_skew ||=
358
+ DoubleDescriptive.sampleSkew(list_size, moment3, sample_variance)
359
+ end
360
+
361
+ #------------------------------------------------------------------------------------
362
+ # Return the standard error of the sample skew. Ref: R.R. Sokal, F.J. Rohlf,
363
+ # Biometry: the principles and practice of statistics in biological research (W.H.
364
+ # Freeman and Company, New York, 1998, 3rd edition) p. 138.
365
+ #------------------------------------------------------------------------------------
366
+
367
+ def sample_skew_standard_error
368
+ @sample_skew_standard_error ||=
369
+ DoubleDescriptive.sampleSkewStandardError(list_size)
370
+ end
371
+
372
+ #------------------------------------------------------------------------------------
373
+ # Returns the sample standard deviation. Ref: R.R. Sokal, F.J. Rohlf, Biometry: the
374
+ # principles and practice of statistics in biological research (W.H. Freeman and
375
+ # Company, New York, 1998, 3rd edition) p. 53. The standard deviation calculated as
376
+ # the sqrt of the variance underestimates the unbiased standard deviation. It needs
377
+ # to be multiplied by this correction factor:
378
+ # 1) if (n > 30): Cn = 1+1/(4*(n-1)), else
379
+ # 2) Cn = Math.sqrt((n - 1) * 0.5) * Gamma.gamma((n - 1) * 0.5) / Gamma.gamma(n * 0.5)
380
+ # The sample standard deviation is Cn * size
381
+ #------------------------------------------------------------------------------------
382
+
383
+ def sample_standard_deviation
384
+ @sample_standard_deviation ||=
385
+ DoubleDescriptive.sampleStandardDeviation(list_size, sample_variance)
386
+ end
387
+
388
+ #------------------------------------------------------------------------------------
389
+ # Returns the sample variance of a data sequence.
390
+ #------------------------------------------------------------------------------------
391
+
392
+ def sample_variance
393
+ @sample_variance ||=
394
+ DoubleDescriptive.sampleVariance(list_size, sum, sum_of_squares)
395
+ end
396
+
397
+ #------------------------------------------------------------------------------------
398
+ # Returns the sample weighted variance of a data sequence.
399
+ # That is (sum_of_squared_products - sum_of_products * sum_of_products /
400
+ # sum_of_weights) / (sum_of_weights - 1)
401
+ # where:
402
+ # sum_of_weights = Sum ( weights[i] )
403
+ # sum_of_products = Sum ( data[i] * weights[i] )
404
+ # sum_of_squared_products = Sum( data[i] * data[i] * weights[i] )
405
+ #------------------------------------------------------------------------------------
406
+
407
+ def sample_weighted_variance(weights)
408
+
409
+ weights = Java::CernColtListTdouble::DoubleArrayList.new(weights.to_java(Java::double))
410
+ sum_of_weights = DoubleDescriptive.sum(weights)
411
+ sum_of_products, sum_of_squared_products = weighted_sums(weights)
412
+ DoubleDescriptive.sampleWeightedVariance(sum_of_weights, sum_of_products,
413
+ sum_of_squared_products)
414
+ end
415
+
416
+ #------------------------------------------------------------------------------------
417
+ # Returns the skew of a data sequence, which is moment(data,3,mean) /
418
+ # standardDeviation.
419
+ #------------------------------------------------------------------------------------
420
+
421
+ def skew
422
+ @skew ||= DoubleDescriptive.skew(moment3, standard_deviation)
423
+ end
424
+
425
+ #------------------------------------------------------------------------------------
426
+ # Splits (partitions) a list into sublists such that each sublist contains the
427
+ # elements with a given range. splitters= (a,b,c,...,y,z) defines the ranges [-inf,a),
428
+ # [a,b), [b,c), ..., [y,z), [z,inf].
429
+ # Examples:
430
+ # data = (1,2,3,4,5,8,8,8,10,11).
431
+ # splitters=(2,8) yields 3 bins: (1), (2,3,4,5) (8,8,8,10,11).
432
+ # splitters=() yields 1 bin: (1,2,3,4,5,8,8,8,10,11).
433
+ # splitters=(-5) yields 2 bins: (), (1,2,3,4,5,8,8,8,10,11).
434
+ # splitters=(100) yields 2 bins: (1,2,3,4,5,8,8,8,10,11), ().
435
+ # @para splitters - the points at which the list shall be partitioned (must be sorted
436
+ # ascending).
437
+ # @return the sublists (an array with length == splitters.size() + 1. Each sublist is
438
+ # returned sorted ascending.
439
+ #------------------------------------------------------------------------------------
440
+
441
+ def split(splitters)
442
+
443
+ split = Java::CernColtListTdouble::DoubleArrayList.new(splitters.to_java(Java::double))
444
+ res = DoubleDescriptive.split(sorted_data, split)
445
+ lists = res.to_a
446
+ bins = Array.new
447
+
448
+ lists.each do |list|
449
+ list.trimToSize()
450
+ bins << list.elements().to_a
451
+ end
452
+
453
+ bins
454
+
455
+ end
456
+
457
+ #------------------------------------------------------------------------------------
458
+ # Returns a list with the sorted elements
459
+ #------------------------------------------------------------------------------------
460
+
461
+ def sort
462
+ sorted_data
463
+ @sorted_data.trimToSize()
464
+ @sorted_data.elements.to_a
465
+ end
466
+
467
+ #------------------------------------------------------------------------------------
468
+ #
469
+ #------------------------------------------------------------------------------------
470
+
471
+ def sorted_data
472
+
473
+ if (@sorted_data)
474
+ return @sorted_data
475
+ end
476
+
477
+ list = @array_list.clone().elements()
478
+ comp = Proc.new { |val1, val2| val1 <=> val2 }
479
+ Java::CernColt::Sorting.parallelQuickSort(list, 0, @array_list.size(), comp)
480
+ @sorted_data = Java::CernColtListTdouble::DoubleArrayList.new(list)
481
+
482
+ end
483
+
484
+ #------------------------------------------------------------------------------------
485
+ # Returns the standard deviation from a variance.
486
+ #------------------------------------------------------------------------------------
487
+
488
+ def standard_deviation
489
+ @standard_deviation ||= DoubleDescriptive.standardDeviation(variance)
490
+ end
491
+
492
+ #------------------------------------------------------------------------------------
493
+ # Returns the standard error of a data sequence.
494
+ #------------------------------------------------------------------------------------
495
+
496
+ def standard_error
497
+ @standard_error ||= DoubleDescriptive.standardError(list_size, variance)
498
+ end
499
+
500
+ #------------------------------------------------------------------------------------
501
+ # Modifies a data sequence to be standardized. Changes each element data[i] as
502
+ # follows: data[i] = (data[i]-mean)/standardDeviation.
503
+ #------------------------------------------------------------------------------------
504
+
505
+ def standardize!
506
+ DoubleDescriptive.standardize(@array_list, mean, standard_deviation)
507
+ end
508
+
509
+ #------------------------------------------------------------------------------------
510
+ # Returns the sum of a data sequence.
511
+ #------------------------------------------------------------------------------------
512
+
513
+ def sum
514
+ @sum ||= DoubleDescriptive.sum(@array_list)
515
+ end
516
+
517
+ #------------------------------------------------------------------------------------
518
+ # Returns the sum of inversions of a data sequence, which is Sum( 1.0 / data[i]).
519
+ #------------------------------------------------------------------------------------
520
+
521
+ def sum_of_inversions(from = 0, to = list_size - 1)
522
+ @sum_of_inversions ||= DoubleDescriptive.sumOfInversions(@array_list, from, to)
523
+ end
524
+
525
+ #------------------------------------------------------------------------------------
526
+ # Returns the sum of logarithms of a data sequence, which is Sum( Log(data[i]).
527
+ #------------------------------------------------------------------------------------
528
+
529
+ def sum_of_logarithms(from = 0, to = list_size - 1)
530
+ @sum_of_logarithms ||= DoubleDescriptive.sumOfLogarithms(@array_list, from, to)
531
+ end
532
+
533
+ #------------------------------------------------------------------------------------
534
+ # Returns Sum( (data[i]-c)k ); optimized for common parameters like c == 0.0 and/or
535
+ # k == -2
536
+ #------------------------------------------------------------------------------------
537
+
538
+ def sum_of_power_deviations(k, c)
539
+ DoubleDescriptive.sumOfPowerDeviations(@array_list, k, c)
540
+ end
541
+
542
+ #------------------------------------------------------------------------------------
543
+ # Returns the sum of powers of a data sequence, which is Sum ( data[i]k ).
544
+ #------------------------------------------------------------------------------------
545
+
546
+ def sum_of_powers(k)
547
+ DoubleDescriptive.sumOfPowers(@array_list, k)
548
+ end
549
+
550
+ #------------------------------------------------------------------------------------
551
+ # Returns the sum of the product with another array.T
552
+ # hat is, Sum( data[i] * other_val[i] )
553
+ # @param other_val: ruby array or a CernColtListTdouble::DoubleArrayList (when called
554
+ # internally.
555
+ #------------------------------------------------------------------------------------
556
+
557
+ def weighted_sums(other_val, from = 0, to = list_size - 1)
558
+
559
+ if (other_val.is_a? Array)
560
+ weights = Java::CernColtListTdouble::DoubleArrayList.new(other_val.to_java(Java::double))
561
+ elsif (other_val.is_a? Java::CernColtListTdouble::DoubleArrayList)
562
+ weights = other_val
563
+ else
564
+ raise "#{other_val} is not a valid weight array"
565
+ end
566
+
567
+ in_out = [0.0, 0.0].to_java Java::double
568
+ DoubleDescriptive.incrementalWeightedUpdate(@array_list, weights, from, to, in_out)
569
+ [in_out[0], in_out[1]]
570
+
571
+ end
572
+
573
+ #------------------------------------------------------------------------------------
574
+ # Returns the sum of squared mean deviation of of a data sequence.
575
+ #------------------------------------------------------------------------------------
576
+
577
+ def sum_of_squared_deviations
578
+ @sum_of_square_deviations ||=
579
+ DoubleDescriptive.sumOfSquaredDeviations(list_size, variance)
580
+ end
581
+
582
+ #------------------------------------------------------------------------------------
583
+ # Returns the sum of squares of a data sequence.
584
+ #------------------------------------------------------------------------------------
585
+
586
+ def sum_of_squares
587
+ @sum_of_squares ||= DoubleDescriptive.sumOfSquares(@array_list)
588
+ end
589
+
590
+ #------------------------------------------------------------------------------------
591
+ # Returns the trimmed mean of a sorted data sequence.
592
+ #------------------------------------------------------------------------------------
593
+
594
+ def trimmed_mean(left = 0, right = 0)
595
+ DoubleDescriptive.trimmedMean(sorted_data, mean, left, right)
596
+ end
597
+
598
+ #------------------------------------------------------------------------------------
599
+ # Returns the variance from a standard deviation.
600
+ #------------------------------------------------------------------------------------
601
+
602
+ def variance
603
+ @variance ||=
604
+ DoubleDescriptive.variance(list_size, sum, sum_of_squares)
605
+ end
606
+
607
+ #------------------------------------------------------------------------------------
608
+ # Returns the weighted mean of a data sequence.
609
+ #------------------------------------------------------------------------------------
610
+
611
+ def weighted_mean(weights)
612
+ weights = Java::CernColtListTdouble::DoubleArrayList.new(weights.to_java(Java::double))
613
+ DoubleDescriptive.weightedMean(@array_list, weights)
614
+ end
615
+
616
+ #------------------------------------------------------------------------------------
617
+ # Returns the weighted RMS (Root-Mean-Square) of a data sequence.
618
+ #------------------------------------------------------------------------------------
619
+
620
+ def weighted_rms(weights)
621
+
622
+ weights = Java::CernColtListTdouble::DoubleArrayList.new(weights.to_java(Java::double))
623
+ sum_of_products, sum_of_squared_products = weighted_sums(weights)
624
+ DoubleDescriptive.weightedRMS(sum_of_products, sum_of_squared_products)
625
+
626
+ end
627
+
628
+ #------------------------------------------------------------------------------------
629
+ # Returns the winsorized mean of a sorted data sequence.
630
+ #------------------------------------------------------------------------------------
631
+
632
+ def winsorized_mean(left, right)
633
+ DoubleDescriptive.winsorizedMean(sorted_data, mean, left, right)
634
+ end
635
+
636
+ end # DoubleDescriptive