RubyGems - scicom - Versions diffs - 0.2.0-java - Mend

scicom 0.2.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

checksums.yaml +7 -0
data/LICENSE.txt +674 -0
data/README.md +66 -0
data/README.md~ +290 -0
data/Rakefile +51 -0
data/config.rb +163 -0
data/doc/PypeR.pdf +0 -0
data/doc/Stat 133 Class Notes (Phil Spector).pdf +29905 -45
data/doc/The R interface.docx +0 -0
data/lib/JRubyR/as_mdarray.rb +60 -0
data/lib/JRubyR/attributes.rb +74 -0
data/lib/JRubyR/dataframe.rb +35 -0
data/lib/JRubyR/environment.rb +60 -0
data/lib/JRubyR/function.rb +61 -0
data/lib/JRubyR/index.rb +278 -0
data/lib/JRubyR/list.rb +56 -0
data/lib/JRubyR/list_orig.rb +111 -0
data/lib/JRubyR/logical_value.rb +56 -0
data/lib/JRubyR/rbsexp.rb +386 -0
data/lib/JRubyR/renjin.rb +431 -0
data/lib/JRubyR/ruby_classes.rb +58 -0
data/lib/JRubyR/sequence.rb +56 -0
data/lib/JRubyR/vector.rb +493 -0
data/lib/env.rb +12 -0
data/lib/rinruby.rb +795 -0
data/lib/scicom.rb +29 -0
data/target/helper.jar +0 -0
data/test/baseball.csv +1 -0
data/test/env.rb +7 -0
data/test/test_R_interface.rb +165 -0
data/test/test_array.rb +191 -0
data/test/test_attributes.rb +261 -0
data/test/test_basic.rb +156 -0
data/test/test_column-major.rb +114 -0
data/test/test_complete.rb +49 -0
data/test/test_creation.rb +299 -0
data/test/test_dataframe.rb +248 -0
data/test/test_distribution.rb +320 -0
data/test/test_double_assign.rb +240 -0
data/test/test_double_receive.rb +106 -0
data/test/test_environment.rb +57 -0
data/test/test_factor.rb +285 -0
data/test/test_functions.rb +67 -0
data/test/test_linear_model.rb +64 -0
data/test/test_list.rb +220 -0
data/test/test_matrix.rb +205 -0
data/test/test_mdarray.rb +258 -0
data/test/test_operators.rb +227 -0
data/test/test_sequence.rb +63 -0
data/test/test_subsetting.rb +67 -0
data/test/test_tmp.rb +67 -0
data/test/test_vector.rb +227 -0
data/vendor/Renjin.pdf +0 -0
data/vendor/renjin-script-engine-0.7.0-RC7-SNAPSHOT-jar-with-dependencies.jar +0 -0
data/version.rb +2 -0
metadata +196 -0

data/test/test_dataframe.rb ADDED Viewed

@@ -0,0 +1,248 @@
+# -*- coding: utf-8 -*-
+##########################################################################################
+# Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
+# and distribute this software and its documentation, without fee and without a signed
+# licensing agreement, is hereby granted, provided that the above copyright notice, this
+# paragraph and the following two paragraphs appear in all copies, modifications, and
+# distributions.
+#
+# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
+# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
+# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
+# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
+# OR MODIFICATIONS.
+##########################################################################################
+require 'rubygems'
+require "test/unit"
+require 'shoulda'
+require 'env'
+require 'scicom'
+class SciComTest < Test::Unit::TestCase
+  context "R environment" do
+    #--------------------------------------------------------------------------------------
+    #
+    #--------------------------------------------------------------------------------------
+    setup do
+    end
+    #--------------------------------------------------------------------------------------
+    #
+    #--------------------------------------------------------------------------------------
+=begin
+    should "create data-frame from a single vector" do
+      vec = R.seq(20)
+      vec.attr.dim = R.c(4, 5)
+      df = R.as__data__frame(vec)
+      df.pp
+      assert_equal(4, df.nrow.gz)
+      assert_equal(5, df.ncol.gz)
+      df[0].pp
+      df[1].pp
+      df["V2"].pp
+      df["V4"].pp
+    end
+    #--------------------------------------------------------------------------------------
+    #
+    #--------------------------------------------------------------------------------------
+    should "work with build-in data-frames" do
+      # We use built-in data frames in R for our tutorials. For example, here is a built-in
+      # data frame in R, called mtcars.
+      # to access a build-in data-frame, use method R.d with the data-frame's name
+      mtcars = R.d("mtcars")
+      p "mtcars build-in data-frame"
+      mtcars.pp
+      # Here is the cell value from the first row, second column of mtcars.
+      assert_equal(6, mtcars[1, 2].gz)
+      # Moreover, we can use the row and column names instead of the numeric coordinates.
+      assert_equal(6, mtcars["Mazda RX4", "cyl"].gz)
+      # Lastly, the number of data rows in the data frame is given by the nrow function.
+      assert_equal(32, mtcars.nrow.gz)    # number of data rows
+      # And the number of columns of a data frame is given by the ncol function.
+      assert_equal(11, mtcars.ncol.gz)    # number of columns
+      p "mtcars head"
+      mtcars.head.pp
+    end
+    #--------------------------------------------------------------------------------------
+    #
+    #--------------------------------------------------------------------------------------
+    should "access data-frames by column vector" do
+      mtcars = R.d("mtcars")
+      # We reference a data frame column with the double square bracket "[[]]" operator.
+      # For example, to retrieve the ninth column vector of the built-in data set mtcars,
+      # we write mtcars[[9]].
+      mtcars[[9]].pp
+      # We can retrieve the same column vector by its name.
+      mtcars[["am"]].pp
+      # We can also retrieve with the "." operator in lieu of the double square
+      # bracket operator.
+      mtcars.am.pp
+      # Yet another way to retrieve the same column vector is to use the single square
+      # bracket "[]" operator. We prepend the column name with 'nil', which signals a
+      # wildcard match for the row position.
+      mtcars[nil, "am"].pp
+    end
+    #--------------------------------------------------------------------------------------
+    #
+    #--------------------------------------------------------------------------------------
+    should "access data-frames by column slice" do
+      mtcars = R.d("mtcars")
+      # We retrieve a data frame column slice with the single square bracket "[]" operator.
+      # Numeric Indexing
+      # The following is a slice containing the first column of the built-in data set
+      # mtcars.
+      mtcars[1].pp
+      # Name Indexing
+      # We can retrieve the same column slice by its name.
+      mtcars["mpg"].pp
+      # To retrieve a data frame slice with the two columns mpg and hp, we pack the
+      # column names in an index vector inside the single square bracket operator.
+      mtcars[R.c("mpg", "hp")].pp
+    end
+=end
+    #--------------------------------------------------------------------------------------
+    #
+    #--------------------------------------------------------------------------------------
+    should "access data-frames by row slice" do
+      mtcars = R.d("mtcars")
+      # We retrieve rows from a data frame with the single square bracket operator, just
+      # like what we did with columns. However, in additional to an index vector of row
+      # positions, we append an nil. This is important, as the nil signals a wildcard match
+      # for the second coordinate for column positions.
+      # Numeric Indexing
+      # For example, the following retrieves a row record of the built-in data set mtcars.
+      # Please notice the nil in the square bracket operator. It states that the 1974 Camaro
+      # Z28 has a gas mileage of 13.3 miles per gallon, and an eight cylinder 245 horse power
+      # engine, ..., etc.
+      mtcars[24, nil].pp
+      # To retrieve more than one rows, we use a numeric index vector.
+      mtcars[R.c(3, 24), nil].pp
+      # Name Indexing
+      # We can retrieve a row by its name.
+      mtcars["Camaro Z28", nil].pp
+      # And we can pack the row names in an index vector in order to retrieve multiple
+      # rows.
+      mtcars[R.c("Datsun 710", "Camaro Z28"), nil].pp
+      # Logical Indexing
+      # Lastly, we can retrieve rows with a logical index vector. In the following
+      # vector L, the member value is TRUE if the car has automatic transmission, and
+      # FALSE if otherwise.
+      auto = mtcars.am == 0
+      auto.pp
+      # Here is the list of vehicles with automatic transmission.
+      mtcars[auto, nil].pp
+      # And here is the gas mileage data for automatic transmission.
+      mtcars[auto, nil].mpg.pp
+    end
+    #--------------------------------------------------------------------------------------
+    #
+    #--------------------------------------------------------------------------------------
+    should "create data-frame from multiple vectors" do
+=begin
+      # name     age  hgt  wgt  race year   SAT
+      # Bob       21   70  180  Cauc   Jr  1080
+      # Fred      18   67  156 Af.Am   Fr  1210
+      # Barb      18   64  128 Af.Am   Fr   840
+      # Sue       24   66  118  Cauc   Sr  1340
+      # Jeff      20   72  202 Asian   So   880
+      name = R.c("Bob", "Fred", "Barb", "Sue", "Jeff")
+      age = R.c(21, 18, 18, 24, 20)
+      hgt = R.c(70, 67, 64, 66, 72)
+      wgt = R.c(180, 156, 128, 118, 202)
+      race = R.c("Cauc", "Af. Am", "Af. Am", "Cauc", "Asian")
+      sat = R.c(1080, 1210, 840, 1340, 880)
+      df = R.data__frame(name, age, hgt, wgt, race, sat)
+      df.colnames.pp
+      df.colnames(prefix: "sc").pp
+      # Renjin allows changes to variable properties
+      R.eval("colnames(#{df.r}) = c('name', 'age', 'height', 'weigth', 'race', 'SAT')")
+      R.eval("print(colnames(#{df.r}))")
+      rbvec = R.eval("vec = c(1, 2, 3, 4, 5)")
+      # this is a new vector with the same name.  Assigning a new value to a large
+      # vector can then be very costly as every assignment does copy the old data.
+      R.eval("vec[1] = 10")
+      R.eval("print(vec)")
+      # this proves that vec is actually a new vec.  We have kept the old vector in
+      # variable rbvec.
+      rbvec.print
+      # R.colnames(df) = R.c("name", "age", "height", "weigth", "race", "SAT")
+      df.print
+      summ = R.summary(df.r)
+      p summ
+      summ.print
+      R.eval("print(colnames(#{df.r}))")
+      col = R.colnames(:df)
+      col.print
+=end
+    end
+  end
+end

data/test/test_distribution.rb ADDED Viewed

@@ -0,0 +1,320 @@
+# -*- coding: utf-8 -*-
+##########################################################################################
+# Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
+# and distribute this software and its documentation, without fee and without a signed
+# licensing agreement, is hereby granted, provided that the above copyright notice, this
+# paragraph and the following two paragraphs appear in all copies, modifications, and
+# distributions.
+#
+# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
+# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
+# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
+# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
+# OR MODIFICATIONS.
+##########################################################################################
+require 'rubygems'
+require "test/unit"
+require 'shoulda'
+require 'env'
+require 'scicom'
+class SciComTest < Test::Unit::TestCase
+  context "R environment" do
+    #======================================================================================
+    #
+    #======================================================================================
+    setup do
+      # creating a new instance of Renjin
+      @r1 = R.new
+    end
+    #======================================================================================
+    #
+    #======================================================================================
+    should "be able to call built-in R numeric functions" do
+      # All R numeric functions are available to be called directly from a Ruby script.
+      # Note that a numeric function in R always returns a vector (MDArray), in that case,
+      # of size 1, so we need to index the result with [0].
+      assert_equal(20.5, R.abs(-20.5))
+      assert_equal(Math.sqrt(84), R.sqrt(84)[0])
+      assert_equal(4, R.ceiling(3.475)[0])
+      assert_equal(3, R.floor(3.475)[0])
+      assert_equal(5, R.trunc(5.99)[0])
+      assert_equal(3.46, R.round(3.457, digits: 2)[0])
+      assert_equal(3.5, R.signif(3.475, digits: 2)[0])
+      assert_equal(Math.cos(10), R.cos(10)[0])
+      assert_equal(Math.sin(0.53), R.sin(0.53)[0])
+      assert_equal(Math.tan(0.53), R.tan(0.53)[0])
+      assert_equal(Math.acos(0.53), R.acos(0.53)[0])
+      assert_equal(Math.cosh(0.53), R.cosh(0.53)[0])
+      assert_equal(Math.acosh(1), R.acosh(1)[0])
+      assert_equal(Math.log(25.45), R.log(25.45)[0])
+      # Math.log10 = 1.4056877866727773
+      # R.log10    = 1.4056877866727775
+      # assert_equal(Math.log10(25.45), R.log10(25.45)[0])
+      assert_equal(Math.exp(2.43), R.exp(2.43)[0])
+    end
+    #======================================================================================
+    #
+    #======================================================================================
+    should "be able to call built-in R character functions" do
+      # Extract or replace substrings in a character vector.
+      x = "abcdef"
+      assert_equal("bcd", R.substr(x, 2, 4)[0])
+      # Returns a logical array vector
+      vec = R.c(TRUE, TRUE, FALSE)
+      vec = R.c(true, true, false)
+      # returns a DoubleMDArray.  NA in MDArray is NaN.  There is no difference
+      vec = R.c(NaN, NA, EPSILON)
+      vec.print
+      # grep(pattern, x, ignore.case=FALSE, fixed=FALSE).  Search for pattern in x.
+      # If fixed = FALSE then pattern is a regular expression. If fixed = TRUE then pattern
+      # is a text string. Returns matching indices.
+      res = R.grep("A", R.c("b","A","c"), fixed: TRUE)
+      assert_equal(2, res[0])
+      # Split the elements of character vector x at split.
+      # Returns a ListVector
+      split = R.strsplit(x, "")
+      # returns c("x1","x2" "x3")
+      # Method in R is called as paste("x", 1:3, sep = "")
+      str = R.paste("x", (1..3), sep: "")
+      str.print
+      # returns c("xM1","xM2" "xM3")
+      # Method in R is called as paste("x", 1:3, sep = "M")
+      str = R.paste("x", (1..3), sep: "M")
+      str.print
+      # date is a Closure
+      date = R.date()
+      # str = R.paste("Today is", R.date())
+      str.print
+      str = R.toupper("this is a string")
+      assert_equal("THIS IS A STRING", str[0])
+      str = R.tolower("THIS IS ALSO A STRING")
+      assert_equal("this is also a string", str[0])
+      # R.sub(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE)
+      # Find pattern in x and replace with replacement text. If fixed=FALSE then pattern is a
+      # regular expression.  If fixed = TRUE then pattern is a text string.
+      # returns "Hello.There"
+      str = R.sub("\\\\s",".","Hello There")
+      assert_equal("Hello.There", str[0])
+=begin
+x <- "abcdef"
+substr(x, 2, 4) <- "22222" is "a222ef"
+=end
+    end
+    #======================================================================================
+    #
+    #======================================================================================
+    should "be able to call built-in R statistical probability functions" do
+      # By prefixing a "d" to the function name in the table above, you can get probability
+      # density values (pdf). By prefixing a "p", you can get cumulative probabilities (cdf).
+      # By prefixing a "q", you can get quantile values. By prefixing an "r", you can get
+      # random numbers from the distribution. I will demonstrate using the normal distribution.
+      # cumulative normal probability for q (area under the normal curve to the right of q)
+      assert_equal(0.975, R.pnorm(1.96))
+      # The dnorm( ) function returns the height of the normal curve at some value along the
+      # x-axis.
+      assert_equal(0.24197072451914337, R.dnorm(1)[0])
+      # The pnorm( ) function is the cumulative density function or cdf. It returns the area
+      # below the given value of "x",
+      assert_equal(0.841344746068543, R.pnorm(1)[0])
+      # Once again, the defaults for mean and sd are 0 and 1 respectively. These can be set
+      # to other values as in the case of dnorm( ). To find the area above the cutoff x-value,
+      # either subtract from 1, or set the "lower.tail=" option to FALSE...
+      assert_equal(0.15865525393145696, 1 - R.pnorm(1)[0])
+      assert_equal(0.15865525393145696, R.pnorm(1, "lower.tail" => FALSE))
+      # To get quantiles or "critical values", you can use the qnorm( ) function as in the
+      # following examples...
+      # p = .05, one-tailed (upper)
+      assert_equal(1.644854, R.qnorm(0.95))
+      # p = .05, two-tailed
+      R.qnorm(R.c(0.025,0.975)).print
+      # deciles from the unit normal dist.
+      R.qnorm(R.seq(0.1,0.9,0.1)).print
+      # area below t = 2.101, df = 8
+      assert_equal(0.9655848143495498, R.pt(2.101, df: 8)[0])
+      # critical value of chi square, df = 1
+      assert_equal(3.8414588206939566, R.qchisq(0.95, df: 1)[0])
+      R.qf(R.c(0.025,0.975), df1: 3, df2: 12).print
+      # a discrete binomial probability
+      assert_equal(0.010843866711637968, R.dbinom(60, size: 100, prob: 0.5)[0])
+      # Random numbers are generated from a given distribution like this...
+      # 9 uniformly distributed random nos.
+      R.runif(9).print
+      # 9 normally distributed random nos.
+      R.rnorm(9).print
+      # 9 t-distributed random nos.
+      R.rt(9, df: 10).print
+      R.eval("print(quantile(rivers))")
+      quant = R.quantile(:rivers)
+      quant.print
+      summary = R.summary(:rivers)
+      summary.print
+      # quintiles
+      quint = R.quantile(:rivers, probs: R.seq(0.2,0.8,0.2))
+      quint.print
+      # deciles
+      dec = R.quantile(:rivers, probs: R.seq(0.1,0.9,0.1))
+      dec.print
+      # And then there is the "type=" option. It turns out there is some disagreement among
+      # different sources as to just how quantiles should be calculated from an empirical
+      # distribution. R doesn't take sides. It gives you nine different methods! Pick the
+      # one you like best by setting the "type=" option to a number between 1 and 9. Here
+      # are some details (and more are available on the help page): type=2 will give the results
+      # most people are taught to calculate in an intro stats course, type=3 is the SAS
+      # definition, type=6 is the Minitab and SPSS definition, type=7 is the default and the
+      # S definition and seems to work well when the variable is continuous.
+      # deciles - Don't see any difference, shoud there be?
+      dec = R.quantile(:rivers, probs: R.seq(0.1,0.9,0.1), type: 1)
+      dec.print
+      dec = R.quantile(:rivers, probs: R.seq(0.1,0.9,0.1), type: 2)
+      dec.print
+      dec = R.quantile(:rivers, probs: R.seq(0.1,0.9,0.1), type: 7o)
+      dec.print
+=begin
+dnorm(x)	normal density function (by default m=0 sd=1)
+# plot standard normal curve
+x <- pretty(c(-3,3), 30)
+y <- dnorm(x)
+plot(x, y, type='l', xlab="Normal Deviate", ylab="Density", yaxs="i")
+qnorm(p)	normal quantile.
+value at the p percentile of normal distribution
+qnorm(.9) is 1.28 # 90th percentile
+rnorm(n, m=0,sd=1)	n random normal deviates with mean m
+and standard deviation sd.
+#50 random normal variates with mean=50, sd=10
+x <- rnorm(50, m=50, sd=10)
+dbinom(x, size, prob)
+pbinom(q, size, prob)
+qbinom(p, size, prob)
+rbinom(n, size, prob)	binomial distribution where size is the sample size
+and prob is the probability of a heads (pi)
+# prob of 0 to 5 heads of fair coin out of 10 flips
+dbinom(0:5, 10, .5)
+# prob of 5 or less heads of fair coin out of 10 flips
+pbinom(5, 10, .5)
+dpois(x, lamda)
+ppois(q, lamda)
+qpois(p, lamda)
+rpois(n, lamda)	poisson distribution with m=std=lamda
+#probability of 0,1, or 2 events with lamda=4
+dpois(0:2, 4)
+# probability of at least 3 events with lamda=4
+1- ppois(2,4)
+dunif(x, min=0, max=1)
+punif(q, min=0, max=1)
+qunif(p, min=0, max=1)
+runif(n, min=0, max=1)	uniform distribution, follows the same pattern
+as the normal distribution above.
+#10 uniform random variates
+x <- runif(10)
+=end
+    end
+=begin
+    #======================================================================================
+    #
+    #======================================================================================
+    should "integrate Ruby sequence with R sequence" do
+      seq = R.seq(2, 10)
+      res = R.eval <<EOF
+      print(#{seq.r});
+      print(#{seq.r});
+print(ls());
+EOF
+      # remove the variable from R
+      seq.destroy
+      R.eval("print(ls())")
+    end
+    #======================================================================================
+    #
+    #======================================================================================
+    should "integrate MDArray with R vector" do
+      # typed_arange does the same as arange but for arrays of other type
+      arr = MDArray.typed_arange(:double, 60)
+      # MDArray is stored in row-major order
+      arr.reshape!([5, 3, 4])
+      # arr.print
+      R.eval <<EOF
+      print(#{arr.r});
+      vec = #{arr.r};
+print(vec);
+print(vec[1, 1, 1]);
+EOF
+    end
+=end
+  end
+end