scicom 0.2.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +674 -0
  3. data/README.md +66 -0
  4. data/README.md~ +290 -0
  5. data/Rakefile +51 -0
  6. data/config.rb +163 -0
  7. data/doc/PypeR.pdf +0 -0
  8. data/doc/Stat 133 Class Notes (Phil Spector).pdf +29905 -45
  9. data/doc/The R interface.docx +0 -0
  10. data/lib/JRubyR/as_mdarray.rb +60 -0
  11. data/lib/JRubyR/attributes.rb +74 -0
  12. data/lib/JRubyR/dataframe.rb +35 -0
  13. data/lib/JRubyR/environment.rb +60 -0
  14. data/lib/JRubyR/function.rb +61 -0
  15. data/lib/JRubyR/index.rb +278 -0
  16. data/lib/JRubyR/list.rb +56 -0
  17. data/lib/JRubyR/list_orig.rb +111 -0
  18. data/lib/JRubyR/logical_value.rb +56 -0
  19. data/lib/JRubyR/rbsexp.rb +386 -0
  20. data/lib/JRubyR/renjin.rb +431 -0
  21. data/lib/JRubyR/ruby_classes.rb +58 -0
  22. data/lib/JRubyR/sequence.rb +56 -0
  23. data/lib/JRubyR/vector.rb +493 -0
  24. data/lib/env.rb +12 -0
  25. data/lib/rinruby.rb +795 -0
  26. data/lib/scicom.rb +29 -0
  27. data/target/helper.jar +0 -0
  28. data/test/baseball.csv +1 -0
  29. data/test/env.rb +7 -0
  30. data/test/test_R_interface.rb +165 -0
  31. data/test/test_array.rb +191 -0
  32. data/test/test_attributes.rb +261 -0
  33. data/test/test_basic.rb +156 -0
  34. data/test/test_column-major.rb +114 -0
  35. data/test/test_complete.rb +49 -0
  36. data/test/test_creation.rb +299 -0
  37. data/test/test_dataframe.rb +248 -0
  38. data/test/test_distribution.rb +320 -0
  39. data/test/test_double_assign.rb +240 -0
  40. data/test/test_double_receive.rb +106 -0
  41. data/test/test_environment.rb +57 -0
  42. data/test/test_factor.rb +285 -0
  43. data/test/test_functions.rb +67 -0
  44. data/test/test_linear_model.rb +64 -0
  45. data/test/test_list.rb +220 -0
  46. data/test/test_matrix.rb +205 -0
  47. data/test/test_mdarray.rb +258 -0
  48. data/test/test_operators.rb +227 -0
  49. data/test/test_sequence.rb +63 -0
  50. data/test/test_subsetting.rb +67 -0
  51. data/test/test_tmp.rb +67 -0
  52. data/test/test_vector.rb +227 -0
  53. data/vendor/Renjin.pdf +0 -0
  54. data/vendor/renjin-script-engine-0.7.0-RC7-SNAPSHOT-jar-with-dependencies.jar +0 -0
  55. data/version.rb +2 -0
  56. metadata +196 -0
@@ -0,0 +1,240 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # -*- coding: utf-8 -*-
4
+
5
+ ##########################################################################################
6
+ # Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ require 'rubygems'
25
+ require "test/unit"
26
+ require 'shoulda'
27
+
28
+ require 'env'
29
+ require 'scicom'
30
+
31
+ class SciComTest < Test::Unit::TestCase
32
+
33
+ context "R environment" do
34
+
35
+ #--------------------------------------------------------------------------------------
36
+ #
37
+ #--------------------------------------------------------------------------------------
38
+
39
+ setup do
40
+
41
+ # creating new instance of R interpreter
42
+ @r1 = Renjin.new
43
+
44
+ end
45
+
46
+ =begin
47
+ #--------------------------------------------------------------------------------------
48
+ #
49
+ #--------------------------------------------------------------------------------------
50
+
51
+ should "create Renjin vectors" do
52
+
53
+ arr = MDArray.typed_arange(:double, 12)
54
+ arr.reshape!([4, 3])
55
+ # arr.print
56
+
57
+ vector = R.build_vector(arr)
58
+ (0...arr.size).each do |index|
59
+ vector.getElementAsDouble(index)
60
+ end
61
+
62
+ # typed_arange does the same as arange but for arrays of other type
63
+ arr = MDArray.typed_arange(:double, 60)
64
+ # MDArray is stored in row-major order
65
+ arr.reshape!([5, 3, 4])
66
+ # arr.print
67
+
68
+ vector = R.build_vector(arr)
69
+ (0...arr.size).each do |index|
70
+ vector.getElementAsDouble(index)
71
+ end
72
+
73
+ end
74
+ =end
75
+ #--------------------------------------------------------------------------------------
76
+ #
77
+ #--------------------------------------------------------------------------------------
78
+
79
+ should "send 2D arrays to Renjin" do
80
+
81
+ # typed_arange does the same as arange but for arrays of other type
82
+ arr = MDArray.typed_arange(:double, 12)
83
+ arr.reshape!([4, 3])
84
+ # arr.print
85
+
86
+ # assign MDArray to R vector. MDArray shape is converted to R shape: two dimensions
87
+ # are identical in MDArray and R.
88
+ R.vec = arr
89
+
90
+ # When accessing a vector with the wrong indexes, return nil
91
+ res = R.eval("vec[0]")
92
+ assert_equal(nil, res)
93
+
94
+ # R.eval("print(vec[1, 1])")
95
+ # R.eval("print(vec[1, 2])")
96
+
97
+
98
+ # First index in R is 1 and not 0.
99
+ # method R.ri converts an MDArray index to a R index (in string format) ready
100
+ # to evaluate
101
+ arr.each_with_counter do |val, ct|
102
+ assert_equal(val, R.eval("vec#{R.ri(ct)}"))
103
+ end
104
+
105
+ end
106
+ #=begin
107
+ #--------------------------------------------------------------------------------------
108
+ #
109
+ #--------------------------------------------------------------------------------------
110
+
111
+ should "send 3D arrays to Renjin" do
112
+
113
+ # typed_arange does the same as arange but for arrays of other type
114
+ arr = MDArray.typed_arange(:double, 60)
115
+ # MDArray is stored in row-major order
116
+ arr.reshape!([5, 3, 4])
117
+ # arr.print
118
+
119
+ # shape of R.vec is [3, 4, 5].
120
+ R.vec = arr
121
+ R.eval("print(dim(vec))")
122
+ # R.eval("print(vec)")
123
+
124
+ # The data in the array can be accessed both in MDArray as in the R vector.
125
+ # To access the same element, indexing has to be properly converted from MDArray
126
+ # indexing to R indexing. In general converting from MDArray index to R index
127
+ # is done as follows: Let [i1, i2, i3, ... in] be the MDArray index, the
128
+ # corresponding R index is [i(n-1)+1, in+1, ..., i3+1, i2+1, i1+1]. As ane example
129
+ # arr[3, 0, 1] is the R vector vec[1, 2, 4]
130
+ assert_equal(arr[3, 0, 1], R.eval("vec[1, 2, 4]")[0])
131
+ # arr[3, 1, 2] is vec[2, 3, 4]
132
+ assert_equal(arr[3, 1, 2], R.eval("vec[2, 3, 4]")[0])
133
+
134
+ # method R.ri converts an MDArray index to a R index (in string format) ready
135
+ # to evaluate
136
+ arr.each_with_counter do |val, ct|
137
+ assert_equal(arr.get(ct), R.eval("vec#{R.ri(ct)}"))
138
+ end
139
+ end
140
+
141
+ #=begin
142
+
143
+ #--------------------------------------------------------------------------------------
144
+ #
145
+ #--------------------------------------------------------------------------------------
146
+
147
+ should "send 4D arrays to Renjin" do
148
+
149
+ # typed_arange does the same as arange but for arrays of other type
150
+ arr = MDArray.typed_arange(:double, 120)
151
+ arr.reshape!([2, 4, 3, 5])
152
+ R.vec = arr
153
+ arr.each_with_counter do |val, ct|
154
+ assert_equal(val, R.eval("vec#{R.ri(ct)}"))
155
+ end
156
+
157
+ end
158
+
159
+ #--------------------------------------------------------------------------------------
160
+ #
161
+ #--------------------------------------------------------------------------------------
162
+
163
+ should "send 5D arrays to Renjin" do
164
+
165
+ # typed_arange does the same as arange but for arrays of other type
166
+ arr = MDArray.typed_arange(:double, 360)
167
+ arr.reshape!([2, 4, 3, 5, 3])
168
+ R.vec = arr
169
+ arr.each_with_counter do |val, ct|
170
+ assert_equal(val, R.eval("vec#{R.ri(ct)}"))
171
+ end
172
+
173
+ end
174
+
175
+ #--------------------------------------------------------------------------------------
176
+ #
177
+ #--------------------------------------------------------------------------------------
178
+
179
+ should "send 6D arrays to Renjin" do
180
+
181
+ # typed_arange does the same as arange but for arrays of other type
182
+ arr = MDArray.typed_arange(:double, 720)
183
+ arr.reshape!([2, 4, 3, 5, 3, 2])
184
+ R.vec = arr
185
+ arr.each_with_counter do |val, ct|
186
+ assert_equal(val, R.eval("vec#{R.ri(ct)}"))
187
+ end
188
+
189
+ end
190
+
191
+ #--------------------------------------------------------------------------------------
192
+ #
193
+ #--------------------------------------------------------------------------------------
194
+
195
+ should "send 7D arrays to Renjin" do
196
+
197
+ # typed_arange does the same as arange but for arrays of other type
198
+ arr = MDArray.typed_arange(:double, 2160)
199
+ arr.reshape!([2, 4, 3, 5, 3, 2, 3])
200
+ R.vec = arr
201
+ arr.each_with_counter do |val, ct|
202
+ assert_equal(val, R.eval("vec#{R.ri(ct)}"))
203
+ end
204
+
205
+ end
206
+
207
+ #--------------------------------------------------------------------------------------
208
+ #
209
+ #--------------------------------------------------------------------------------------
210
+
211
+ should "send larger than 7D arrays to Renjin" do
212
+
213
+ # typed_arange does the same as arange but for arrays of other type
214
+ arr = MDArray.typed_arange(:double, 8640)
215
+ arr.reshape!([2, 4, 3, 5, 3, 2, 3, 4])
216
+ R.vec = arr
217
+ arr.each_with_counter do |val, ct|
218
+ assert_equal(val, R.eval("vec#{R.ri(ct)}"))
219
+ end
220
+
221
+ end
222
+ #=end
223
+ =begin
224
+
225
+ #--------------------------------------------------------------------------------------
226
+ #
227
+ #--------------------------------------------------------------------------------------
228
+
229
+ should "receive multidimensional arrays from Renjin" do
230
+
231
+ # returned value is column major but MDArray is interpreting as row major
232
+ mat = R.eval(" mat = matrix(rnorm(20), 4)")
233
+ mat.print
234
+ R.eval("print(mat)")
235
+ end
236
+ =end
237
+
238
+ end
239
+
240
+ end
@@ -0,0 +1,106 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation, without fee and without a signed
6
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
7
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
8
+ # distributions.
9
+ #
10
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
11
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
12
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
13
+ # POSSIBILITY OF SUCH DAMAGE.
14
+ #
15
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
17
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
18
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
19
+ # OR MODIFICATIONS.
20
+ ##########################################################################################
21
+
22
+ require 'rubygems'
23
+ require "test/unit"
24
+ require 'shoulda'
25
+
26
+ require 'env'
27
+ require 'scicom'
28
+
29
+ class SciComTest < Test::Unit::TestCase
30
+
31
+ context "R environment" do
32
+
33
+ #--------------------------------------------------------------------------------------
34
+ #
35
+ #--------------------------------------------------------------------------------------
36
+
37
+ setup do
38
+
39
+ # creating two distinct instances of SciCom
40
+ @r1 = R.new
41
+ @r2 = R.new
42
+
43
+ end
44
+
45
+ #--------------------------------------------------------------------------------------
46
+ #
47
+ #--------------------------------------------------------------------------------------
48
+
49
+ should "receive an original 2D MDArray send to Renjin back" do
50
+
51
+ # typed_arange does the same as arange but for arrays of other type
52
+ arr = MDArray.typed_arange(:double, 12)
53
+ arr.reshape!([4, 3])
54
+ # assign MDArray to R vector. MDArray shape is converted to R shape: two dimensions
55
+ # are identical in MDArray and R.
56
+ @r1.vec = arr
57
+
58
+ arr2 = @r1.vec
59
+
60
+ arr.each_with_counter do |elmt, ct|
61
+ assert_equal(elmt, arr2.get(ct))
62
+ end
63
+
64
+ end
65
+
66
+ #--------------------------------------------------------------------------------------
67
+ #
68
+ #--------------------------------------------------------------------------------------
69
+
70
+ should "receive an original 3D MDArray send to Renjin back" do
71
+
72
+ # typed_arange does the same as arange but for arrays of other type
73
+ arr = MDArray.typed_arange(:double, 60)
74
+ # MDArray is stored in row-major order
75
+ arr.reshape!([5, 3, 4])
76
+ # shape of @r1.vec is [3, 4, 5].
77
+ @r1.vec = arr
78
+
79
+ arr2 = @r1.vec
80
+
81
+ arr.each_with_counter do |elmt, ct|
82
+ assert_equal(elmt, arr2.get(ct))
83
+ end
84
+
85
+ end
86
+
87
+ #--------------------------------------------------------------------------------------
88
+ #
89
+ #--------------------------------------------------------------------------------------
90
+
91
+ should "receive a 2D Renjin vector as MDArray" do
92
+
93
+ @r1.eval("vec = seq(12)")
94
+ @r1.eval("as.double(vec)")
95
+
96
+ arr2 = @r1.vec
97
+
98
+ arr.each_with_counter do |elmt, ct|
99
+ assert_equal(elmt, arr2.get(ct))
100
+ end
101
+
102
+ end
103
+
104
+ end
105
+
106
+ end
@@ -0,0 +1,57 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation, without fee and without a signed
6
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
7
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
8
+ # distributions.
9
+ #
10
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
11
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
12
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
13
+ # POSSIBILITY OF SUCH DAMAGE.
14
+ #
15
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
17
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
18
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
19
+ # OR MODIFICATIONS.
20
+ ##########################################################################################
21
+
22
+ require 'rubygems'
23
+ require "test/unit"
24
+ require 'shoulda'
25
+
26
+ require 'env'
27
+ require 'scicom'
28
+
29
+ class SciComTest < Test::Unit::TestCase
30
+
31
+ context "R environment" do
32
+
33
+ #--------------------------------------------------------------------------------------
34
+ #
35
+ #--------------------------------------------------------------------------------------
36
+
37
+ setup do
38
+
39
+ end
40
+
41
+ #--------------------------------------------------------------------------------------
42
+ #
43
+ #--------------------------------------------------------------------------------------
44
+
45
+ should "work with environment" do
46
+
47
+ # creates a new environment
48
+ e1 = R.new__env
49
+ # This is wrong!!!
50
+ e1.a = 2
51
+ # p e1.a
52
+
53
+ end
54
+
55
+ end
56
+
57
+ end
@@ -0,0 +1,285 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation, without fee and without a signed
6
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
7
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
8
+ # distributions.
9
+ #
10
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
11
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
12
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
13
+ # POSSIBILITY OF SUCH DAMAGE.
14
+ #
15
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
17
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
18
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
19
+ # OR MODIFICATIONS.
20
+ ##########################################################################################
21
+
22
+ require 'rubygems'
23
+ require "test/unit"
24
+ require 'shoulda'
25
+
26
+ require 'env'
27
+ require 'scicom'
28
+
29
+ class SciComTest < Test::Unit::TestCase
30
+
31
+ context "R environment" do
32
+
33
+ #--------------------------------------------------------------------------------------
34
+ #
35
+ #--------------------------------------------------------------------------------------
36
+
37
+ setup do
38
+
39
+ end
40
+
41
+ #--------------------------------------------------------------------------------------
42
+ # Conceptually, factors are variables in R which take on a limited number of different
43
+ # values; such variables are often refered to as categorical variables. One of the
44
+ # most important uses of factors is in statistical modeling; since categorical
45
+ # variables enter into statistical models differently than continuous variables,
46
+ # storing data as factors insures that the modeling functions will treat such data
47
+ # correctly.
48
+ #
49
+ # Factors in R are stored as a vector of integer values with a corresponding set of
50
+ # character values to use when the factor is displayed. The factor function is used to
51
+ # create a factor. The only required argument to factor is a vector of values which
52
+ # will be returned as a vector of factor values. Both numeric and character variables
53
+ # can be made into factors, but a factor's levels will always be character values.
54
+ # You can see the possible levels for a factor through the levels command.
55
+ #
56
+ # To change the order in which the levels will be displayed from their default sorted
57
+ # order, the levels= argument can be given a vector of all the possible values of the
58
+ # variable in the order you desire. If the ordering should also be used when
59
+ # performing comparisons, use the optional ordered=TRUE argument. In this case, the
60
+ # factor is known as an ordered factor.
61
+ #
62
+ # The levels of a factor are used when displaying the factor's values. You can change
63
+ # these levels at the time you create a factor by passing a vector with the new values
64
+ # through the labels= argument. Note that this actually changes the internal levels of
65
+ # the factor, and to change the labels of a factor after it has been created, the
66
+ # assignment form of the levels function is used. To illustrate this point, consider a
67
+ # factor taking on integer values which we want to display as roman numerals.
68
+ # (http://www.stat.berkeley.edu/~s133/factors.html)
69
+ #--------------------------------------------------------------------------------------
70
+
71
+ should "create factors" do
72
+
73
+ =begin
74
+ # Open bug report with Renjin
75
+ # R.substring("statistics", (1..10), (1..10)).pp
76
+ split = R.strsplit("statistics", "")
77
+ split.pp
78
+ R.eval("print(strsplit(\"statistics\", split = \"\"))")
79
+
80
+ ff = R.factor(split, labels: R.letters)
81
+ # R.eval("print(factor(strsplit(\"statistics\", split = \"\"), levels = letters))")
82
+ # ff.pp
83
+ =end
84
+
85
+
86
+ data = R.c(1,2,2,3,1,2,3,3,1,2,3,3,1)
87
+
88
+ # The same as above, but more like R, i.e. calling function 'factor' with data as
89
+ # argument
90
+ fdata = R.factor(data)
91
+ fdata.pp
92
+
93
+ # Calling "method" 'factor' on the data vector. Same result as above, but shorter
94
+ # without having to use 'R.factor'
95
+ data.factor.pp
96
+
97
+ # calling 'factor' method with arguments
98
+ data.factor(labels: R.c("I","II","III")).pp
99
+
100
+ # To convert the default factor fdata to roman numerals, we need to set its levels
101
+ # attribute
102
+ fdata.attr.levels = R.c('I','II','III')
103
+ fdata.pp
104
+
105
+ # Factors represent a very efficient way to store character values, because each
106
+ # unique character value is stored only once, and the data itself is stored as a
107
+ # vector of integers. Because of this, read.table will automatically convert
108
+ # character variables to factors unless the as.is= argument is specified. See
109
+ # Section for details.
110
+ #
111
+ # As an example of an ordered factor, consider data consisting of the names of
112
+ # months:
113
+
114
+ mons = R.c("March","April","January","November","January",\
115
+ "September","October","September","November","August",\
116
+ "January","November","November","February","May","August",\
117
+ "July","December","August","August","September","November",\
118
+ "February","April")
119
+ # mons.pp
120
+ mons = R.factor(mons)
121
+ R.table(mons).pp
122
+
123
+ # This does the same as above
124
+ mons.factor.table.pp
125
+
126
+ # Although the months clearly have an ordering, this is not reflected in the
127
+ # output of the table function. Additionally, comparison operators are not
128
+ # supported for unordered factors. Creating an ordered factor solves these
129
+ # problems:
130
+
131
+ mons = R.factor(mons, levels: R.c("January","February","March",\
132
+ "April","May","June","July","August","September",\
133
+ "October","November","December"), ordered: TRUE)
134
+ (mons[1] < mons[2]).pp
135
+ mons.table.pp
136
+
137
+ # While it may be necessary to convert a numeric variable to a factor for a
138
+ # particular application, it is often very useful to convert the factor back to
139
+ # its original numeric values, since even simple arithmetic operations will fail
140
+ # when using factors. Since the as.numeric function will simply return the
141
+ # internal integer values of the factor, the conversion must be done using the
142
+ # levels attribute of the factor.
143
+ #
144
+ # Suppose we are studying the effects of several levels of a fertilizer on the
145
+ # growth of a plant. For some analyses, it might be useful to convert the
146
+ # fertilizer levels to an ordered factor:
147
+ fert = R.c(10,20,20,50,10,20,10,50,20)
148
+ fert = R.factor(fert, levels: R.c(10, 20, 50), ordered: TRUE)
149
+ fert.pp
150
+
151
+ # now calling factor with arguments
152
+ fert.factor(levels: R.c(10, 20, 50), ordered: TRUE).pp
153
+
154
+ # If we wished to calculate the mean of the original numeric values of the fert
155
+ # variable, we would have to convert the values using the levels function:
156
+
157
+ # This prints NA
158
+ R.mean(fert).pp
159
+
160
+ # actually calculates the mean
161
+ R.mean(R.as__numeric(R.levels(fert)[fert])).pp
162
+
163
+ # the same, but more Ruby like
164
+ fert.levels[fert].as__numeric.mean.pp
165
+
166
+ # Indexing the return value from the levels function is the most reliable way
167
+ # to convert numeric factors to their original numeric values.
168
+ #
169
+ # When a factor is first created, all of its levels are stored along with the
170
+ # factor, and if subsets of the factor are extracted, they will retain all of the
171
+ # original levels. This can create problems when constructing model matrices and
172
+ # may or may not be useful when displaying the data using, say, the table function.
173
+ # As an example, consider a random sample from the letters vector, which is part
174
+ # of the base R distribution.
175
+
176
+ lets = R.sample(R.letters, size: 100,replace: TRUE)
177
+ lets = R.factor(lets)
178
+ R.table(lets[(1..5)]).pp
179
+
180
+ # Even though only five of the levels were actually represented, the table function
181
+ # shows the frequencies for all of the levels of the original factors. To change
182
+ # this, we can simply use another call to factor
183
+
184
+ R.table(R.factor(lets[(1..5)])).pp
185
+
186
+ # To exclude certain levels from appearing in a factor, the exclude= argument
187
+ # can be passed to factor. By default, the missing value (NA) is excluded from
188
+ # factor levels; to create a factor that inludes missing values from a numeric
189
+ # variable, use exclude=NULL.
190
+ #
191
+ # Care must be taken when combining variables which are factors, because the c
192
+ # function will interpret the factors as integers. To combine factors, they should
193
+ # first be converted back to their original values (through the levels function),
194
+ # then catenated and converted to a new factor:
195
+
196
+ l1 = R.factor(R.sample(R.letters, size: 10, replace: TRUE))
197
+ l2 = R.factor(R.sample(R.letters, size: 10, replace: TRUE))
198
+ l1.pp
199
+ l2.pp
200
+ l12 = R.factor(R.c(R.levels(l1)[l1], R.levels(l2)[l2]))
201
+ l12.pp
202
+
203
+ # l12 in with chainning
204
+ R.factor(R.c(l1.levels[l1], l2.levels[l2])).pp
205
+
206
+ # The cut function is used to convert a numeric variable into a factor. The
207
+ # breaks argument to cut is used to describe how ranges of numbers will be
208
+ # converted to factor values. If a number is provided through the breaks argument,
209
+ # the resulting factor will be created by dividing the range of the variable into
210
+ # that number of equal length intervals; if a vector of values is provided, the
211
+ # values in the vector are used to determine the breakpoint. Note that if a vector
212
+ # of values is provided, the number of levels of the resultant factor will be one
213
+ # less than the number of values in the vector.
214
+ #
215
+ # For example, consider the women data set, which contains height and weights for
216
+ # a sample of women. If we wanted to create a factor corresponding to weight, with
217
+ # three equally-spaced levels, we could use the following:
218
+
219
+ # With R.d to access a build in dataset: "women"
220
+ women = R.d("women")
221
+ wfact = R.cut(women.weight, 3)
222
+ wfact.table.pp
223
+
224
+ # To produce factors based on percentiles of your data (for example quartiles or deciles),
225
+ # the quantile function can be used to generate the breaks argument, insuring nearly equal
226
+ # numbers of observations in each of the levels of the factor:
227
+
228
+ wfact = R.cut(women.weight, R.quantile(women.weight, R.c((0..4))/4))
229
+ wfact.table.pp
230
+
231
+ # As mentioned in Section , there are a number of ways to create factors from date/time
232
+ # objects. If you wish to create a factor based on one of the components of that date, you
233
+ # can extract it with strftime and convert it to a factor directly. For example, we can use
234
+ # the seq function to create a vector of dates representing each day of the year:
235
+
236
+ everyday = R.seq(from: R.as__Date('2005-1-1'), to: R.as__Date('2005-12-31'), by: 'day')
237
+ everyday.pp
238
+
239
+ # To create a factor based on the month of the year in which each date falls, we can extract
240
+ # the month name (full or abbreviated) using format:
241
+
242
+ cmonth = R.format(everyday, '%b')
243
+ months = R.factor(cmonth, levels: R.unique(cmonth), ordered: TRUE)
244
+ months.table.pp
245
+
246
+ p "this is it"
247
+ # simplifying the above -- Javascript like:
248
+ cmonth = everyday.format('%b')
249
+ cmonth
250
+ .factor(levels: cmonth.unique, ordered: TRUE)
251
+ .table
252
+ .pp
253
+
254
+ # Since R.unique returns unique values in the order they are encountered, the levels
255
+ # argument will provide the month abbreviations in the correct order to produce a properly
256
+ # ordered factor.
257
+ #
258
+ # Sometimes more flexibility can be acheived by using the cut function, which understands
259
+ # time units of months, days, weeks and years through the breaks argument. (For date/time
260
+ # values, units of hours, minutes, and seconds can also be used.) For example, to format the
261
+ # days of the year based on the week in which they fall, we could use cut as follows:
262
+
263
+ # NOT WORKING... check! Renjin is trying to cast a DoubleArrayVector into a IntVector
264
+ # wks = R.cut(everyday, breaks: 'week')
265
+ # R.head(wks).pp
266
+ # p "Renjin bug"
267
+ # R.eval("everyday = seq(from= as.Date('2005-1-1'), to= as.Date('2005-12-31'), by= 'day')")
268
+ # R.eval("cut(everyday, breaks= 'week')")
269
+
270
+ # Note that the first observation had a date earlier than any of the dates in the everyday
271
+ # vector, since the first date was in middle of the week. By default, cut starts weeks on
272
+ # Mondays; to use Sundays instead, pass the start.on.monday=FALSE argument to cut.
273
+ # Multiples of units can also be specified through the breaks argument. For example, create
274
+ # a factor based on the quarter of the year an observation is in, we could use cut as follows:
275
+
276
+ # NOT WORKING... check! Renjin is trying to cast a DoubleArrayVector into a IntVector
277
+ # qtrs = R.cut(everyday, "3 months", labels: R.paste('Q',(1..4), sep: ''))
278
+ # R.head(qtrs).pp
279
+
280
+ end
281
+
282
+ end
283
+
284
+ end
285
+