RubyGems - sciruby - Versions diffs - 0.1.0 → 0.1.3 - Mend

sciruby 0.1.0 → 0.1.3

Files changed (121) hide show

data/.autotest +23 -0
data/.gemtest +0 -0
data/History.txt +6 -0
data/Manifest.txt +119 -0
data/Rakefile +178 -0
data/bin/sciruby-plotter +12 -0
data/data/r/man/AirPassengers.Rd +51 -0
data/data/r/man/BJsales.Rd +34 -0
data/data/r/man/BOD.Rd +53 -0
data/data/r/man/ChickWeight.Rd +68 -0
data/data/r/man/DNase.Rd +63 -0
data/data/r/man/EuStockMarkets.Rd +28 -0
data/data/r/man/Formaldehyde.Rd +44 -0
data/data/r/man/HairEyeColor.Rd +77 -0
data/data/r/man/Harman23.cor.Rd +25 -0
data/data/r/man/Harman74.cor.Rd +28 -0
data/data/r/man/Indometh.Rd +57 -0
data/data/r/man/InsectSprays.Rd +45 -0
data/data/r/man/JohnsonJohnson.Rd +37 -0
data/data/r/man/LakeHuron.Rd +27 -0
data/data/r/man/LifeCycleSavings.Rd +54 -0
data/data/r/man/Loblolly.Rd +56 -0
data/data/r/man/Nile.Rd +78 -0
data/data/r/man/Orange.Rd +57 -0
data/data/r/man/OrchardSprays.Rd +62 -0
data/data/r/man/PlantGrowth.Rd +39 -0
data/data/r/man/Puromycin.Rd +84 -0
data/data/r/man/Theoph.Rd +84 -0
data/data/r/man/Titanic.Rd +73 -0
data/data/r/man/ToothGrowth.Rd +40 -0
data/data/r/man/UCBAdmissions.Rd +68 -0
data/data/r/man/UKDriverDeaths.Rd +72 -0
data/data/r/man/UKLungDeaths.Rd +40 -0
data/data/r/man/UKgas.Rd +25 -0
data/data/r/man/USAccDeaths.Rd +23 -0
data/data/r/man/USArrests.Rd +45 -0
data/data/r/man/USJudgeRatings.Rd +38 -0
data/data/r/man/USPersonalExpenditure.Rd +33 -0
data/data/r/man/VADeaths.Rd +51 -0
data/data/r/man/WWWusage.Rd +41 -0
data/data/r/man/WorldPhones.Rd +40 -0
data/data/r/man/ability.cov.Rd +50 -0
data/data/r/man/airmiles.Rd +29 -0
data/data/r/man/airquality.Rd +56 -0
data/data/r/man/anscombe.Rd +62 -0
data/data/r/man/attenu.Rd +66 -0
data/data/r/man/attitude.Rd +48 -0
data/data/r/man/austres.Rd +22 -0
data/data/r/man/beavers.Rd +73 -0
data/data/r/man/cars.Rd +59 -0
data/data/r/man/chickwts.Rd +47 -0
data/data/r/man/co2.Rd +43 -0
data/data/r/man/crimtab.Rd +129 -0
data/data/r/man/datasets-package.Rd +24 -0
data/data/r/man/discoveries.Rd +30 -0
data/data/r/man/esoph.Rd +66 -0
data/data/r/man/euro.Rd +56 -0
data/data/r/man/eurodist.Rd +25 -0
data/data/r/man/faithful.Rd +63 -0
data/data/r/man/freeny.Rd +56 -0
data/data/r/man/infert.Rd +56 -0
data/data/r/man/iris.Rd +62 -0
data/data/r/man/islands.Rd +29 -0
data/data/r/man/lh.Rd +22 -0
data/data/r/man/longley.Rd +56 -0
data/data/r/man/lynx.Rd +33 -0
data/data/r/man/morley.Rd +50 -0
data/data/r/man/mtcars.Rd +44 -0
data/data/r/man/nhtemp.Rd +30 -0
data/data/r/man/nottem.Rd +30 -0
data/data/r/man/occupationalStatus.Rd +44 -0
data/data/r/man/precip.Rd +31 -0
data/data/r/man/presidents.Rd +36 -0
data/data/r/man/pressure.Rd +41 -0
data/data/r/man/quakes.Rd +40 -0
data/data/r/man/randu.Rd +46 -0
data/data/r/man/rivers.Rd +21 -0
data/data/r/man/rock.Rd +34 -0
data/data/r/man/sleep.Rd +51 -0
data/data/r/man/stackloss.Rd +77 -0
data/data/r/man/state.Rd +80 -0
data/data/r/man/sunspot.month.Rd +49 -0
data/data/r/man/sunspot.year.Rd +26 -0
data/data/r/man/sunspots.Rd +33 -0
data/data/r/man/swiss.Rd +79 -0
data/data/r/man/treering.Rd +38 -0
data/data/r/man/trees.Rd +48 -0
data/data/r/man/uspop.Rd +27 -0
data/data/r/man/volcano.Rd +31 -0
data/data/r/man/warpbreaks.Rd +56 -0
data/data/r/man/women.Rd +40 -0
data/data/r/man/zCO2.Rd +81 -0
data/lib/ext/csv.rb +22 -0
data/lib/ext/shoes.rb +131 -0
data/lib/ext/string.rb +39 -0
data/lib/sciruby.rb +50 -4
data/lib/sciruby/analysis.rb +98 -0
data/lib/sciruby/analysis/suite.rb +87 -0
data/lib/sciruby/analysis/suite_report_builder.rb +44 -0
data/lib/sciruby/config.rb +93 -0
data/lib/sciruby/data.rb +168 -0
data/lib/sciruby/data/guardian.rb +96 -0
data/lib/sciruby/data/r.rb +155 -0
data/lib/sciruby/data/r/base.rb +110 -0
data/lib/sciruby/data/r/data_frame.rb +24 -0
data/lib/sciruby/data/r/grouped_data.rb +7 -0
data/lib/sciruby/data/r/list.rb +20 -0
data/lib/sciruby/data/r/multi_time_series.rb +24 -0
data/lib/sciruby/data/r/r_matrix.rb +7 -0
data/lib/sciruby/data/r/time_series.rb +19 -0
data/lib/sciruby/data/r/time_series_base.rb +40 -0
data/lib/sciruby/data/r/vector.rb +125 -0
data/lib/sciruby/editor.rb +82 -0
data/lib/sciruby/plotter.rb +128 -0
data/lib/sciruby/recommend.rb +4 -0
data/lib/sciruby/validation.rb +368 -0
data/readme.md +75 -0
data/static/sciruby-icon.png +0 -0
data/test/helpers_tests.rb +58 -0
data/test/test_recommend.rb +16 -0
metadata +396 -20

data/data/r/man/discoveries.Rd ADDED Viewed

@@ -0,0 +1,30 @@
+% File src/library/datasets/man/discoveries.Rd
+% Part of the R package, http://www.R-project.org
+% Copyright 1995-2007 R Core Development Team
+% Distributed under GPL 2 or later
+\name{discoveries}
+\docType{data}
+\alias{discoveries}
+\title{Yearly Numbers of Important Discoveries}
+\description{
+  The numbers of \dQuote{great} inventions and scientific
+  discoveries in each year from 1860 to 1959.
+}
+\usage{discoveries}
+\format{A time series of 100 values.}
+\source{
+  The World Almanac and Book of Facts, 1975 Edition, pages 315--318.
+}
+\references{
+  McNeil, D. R. (1977)
+  \emph{Interactive Data Analysis}.
+  Wiley.
+}
+\examples{
+require(graphics)
+plot(discoveries, ylab = "Number of important discoveries",
+     las = 1)
+title(main = "discoveries data set")
+}
+\keyword{datasets}

data/data/r/man/esoph.Rd ADDED Viewed

@@ -0,0 +1,66 @@
+% File src/library/datasets/man/esoph.Rd
+% Part of the R package, http://www.R-project.org
+% Copyright 1995-2007 R Core Development Team
+% Distributed under GPL 2 or later
+\name{esoph}
+\docType{data}
+\alias{esoph}
+\title{Smoking, Alcohol and (O)esophageal Cancer}
+\description{
+  Data from a case-control study of (o)esophageal cancer in
+  Ile-et-Vilaine, France.
+}
+\usage{esoph}
+\format{
+  A data frame with records for 88 age/alcohol/tobacco combinations.
+  \tabular{rlll}{
+    [,1] \tab "agegp" \tab Age group \tab 1  25--34 years\cr
+    \tab \tab \tab 2  35--44\cr
+    \tab \tab \tab 3  45--54\cr
+    \tab \tab \tab 4  55--64\cr
+    \tab \tab \tab 5  65--74\cr
+    \tab \tab \tab 6  75+\cr
+    [,2] \tab "alcgp" \tab Alcohol consumption \tab 1   0--39 gm/day\cr
+    \tab \tab \tab 2  40--79\cr
+    \tab \tab \tab 3  80--119\cr
+    \tab \tab \tab 4  120+\cr
+    [,3] \tab "tobgp" \tab Tobacco consumption \tab 1   0-- 9 gm/day\cr
+    \tab \tab \tab 2  10--19\cr
+    \tab \tab \tab 3  20--29\cr
+    \tab \tab \tab 4  30+\cr
+    [,4] \tab "ncases" \tab Number of cases \tab \cr
+    [,5] \tab "ncontrols" \tab Number of controls \tab
+  }
+}
+\source{
+  Breslow, N. E. and Day, N. E. (1980)
+  \emph{Statistical Methods in Cancer Research. 1: The Analysis of
+    Case-Control Studies.}  IARC Lyon / Oxford University Press.
+}
+\author{Thomas Lumley}
+\examples{
+require(stats)
+require(graphics) # for mosaicplot
+summary(esoph)
+## effects of alcohol, tobacco and interaction, age-adjusted
+model1 <- glm(cbind(ncases, ncontrols) ~ agegp + tobgp * alcgp,
+              data = esoph, family = binomial())
+anova(model1)
+## Try a linear effect of alcohol and tobacco
+model2 <- glm(cbind(ncases, ncontrols) ~ agegp + unclass(tobgp)
+                                         + unclass(alcgp),
+              data = esoph, family = binomial())
+summary(model2)
+## Re-arrange data for a mosaic plot
+ttt <- table(esoph$agegp, esoph$alcgp, esoph$tobgp)
+o <- with(esoph, order(tobgp, alcgp, agegp))
+ttt[ttt == 1] <- esoph$ncases[o]
+tt1 <- table(esoph$agegp, esoph$alcgp, esoph$tobgp)
+tt1[tt1 == 1] <- esoph$ncontrols[o]
+tt <- array(c(ttt, tt1), c(dim(ttt),2),
+            c(dimnames(ttt), list(c("Cancer", "control"))))
+mosaicplot(tt, main = "esoph data set", color = TRUE)
+}
+\keyword{datasets}

data/data/r/man/euro.Rd ADDED Viewed

@@ -0,0 +1,56 @@
+% File src/library/datasets/man/euro.Rd
+% Part of the R package, http://www.R-project.org
+% Copyright 1995-2007 R Core Development Team
+% Distributed under GPL 2 or later
+\name{euro}
+\docType{data}
+\alias{euro}
+\alias{euro.cross}
+\title{Conversion Rates of Euro Currencies}
+\description{Conversion rates between the various Euro currencies.}
+\usage{
+euro
+euro.cross
+}
+\format{
+  \code{euro} is a named vector of length 11, \code{euro.cross} a
+  matrix of size 11 by 11, with dimnames.
+}
+\details{
+  The data set \code{euro} contains the value of 1 Euro in all
+  currencies participating in the European monetary union (Austrian
+  Schilling ATS, Belgian Franc BEF, German Mark DEM, Spanish Peseta ESP,
+  Finnish Markka FIM, French Franc FRF, Irish Punt IEP, Italian Lira
+  ITL, Luxembourg Franc LUF, Dutch Guilder NLG and Portuguese Escudo
+  PTE).  These conversion rates were fixed by the European Union on
+  December 31, 1998.  To convert old prices to Euro prices, divide by
+  the respective rate and round to 2 digits.
+  The data set \code{euro.cross} contains conversion rates between the
+  various Euro currencies, i.e., the result of
+  \code{outer(1 / euro, euro)}.
+}
+\examples{
+cbind(euro)
+## These relations hold:
+euro == signif(euro,6) # [6 digit precision in Euro's definition]
+all(euro.cross == outer(1/euro, euro))
+## Convert 20 Euro to Belgian Franc
+20 * euro["BEF"]
+## Convert 20 Austrian Schilling to Euro
+20 / euro["ATS"]
+## Convert 20 Spanish Pesetas to Italian Lira
+20 * euro.cross["ESP", "ITL"]
+require(graphics)
+dotchart(euro,
+         main = "euro data: 1 Euro in currency unit")
+dotchart(1/euro,
+         main = "euro data: 1 currency unit in Euros")
+dotchart(log(euro, 10),
+         main = "euro data: log10(1 Euro in currency unit)")
+}
+\keyword{datasets}

data/data/r/man/eurodist.Rd ADDED Viewed

@@ -0,0 +1,25 @@
+% File src/library/datasets/man/eurodist.Rd
+% Part of the R package, http://www.R-project.org
+% Copyright 1995-2007 R Core Development Team
+% Distributed under GPL 2 or later
+\name{eurodist}
+\docType{data}
+\alias{eurodist}
+\title{Distances Between European Cities}
+\description{
+  The data give the road distances (in km) between 21 cities in Europe.
+  The data are taken from a table in \emph{The Cambridge Encyclopaedia}.
+}
+\usage{eurodist}
+\format{
+  A \code{dist} object based on 21 objects.
+  (You must have the \pkg{stats} package loaded to have the methods for this
+  kind of object available).
+}
+\source{
+  Crystal, D. Ed. (1990)
+  \emph{The Cambridge Encyclopaedia}.
+  Cambridge: Cambridge University Press,
+}
+\keyword{datasets}

data/data/r/man/faithful.Rd ADDED Viewed

@@ -0,0 +1,63 @@
+% File src/library/datasets/man/faithful.Rd
+% Part of the R package, http://www.R-project.org
+% Copyright 1995-2007 R Core Development Team
+% Distributed under GPL 2 or later
+\name{faithful}
+\docType{data}
+\alias{faithful}
+\encoding{UTF-8}
+\title{Old Faithful Geyser Data}
+\description{
+  Waiting time between eruptions and the duration of the eruption for
+  the Old Faithful geyser in Yellowstone National Park, Wyoming, USA.
+}
+\usage{faithful}
+\format{A data frame with 272 observations on 2 variables.
+  \tabular{rlll}{
+    [,1]  \tab eruptions  \tab numeric  \tab Eruption time in mins \cr
+    [,2]  \tab waiting    \tab numeric  \tab Waiting time to next
+    eruption (in mins)\cr
+  }
+}
+\source{W. Härdle.}
+\references{
+  \enc{Härdle}{Haerdle}, W. (1991)
+  \emph{Smoothing Techniques with Implementation in S}.
+  New York: Springer.
+  Azzalini, A. and Bowman, A. W. (1990).
+  A look at some data on the Old Faithful geyser.
+  \emph{Applied Statistics} \bold{39}, 357--365.
+}
+\details{
+  A closer look at \code{faithful$eruptions} reveals that these are
+  heavily rounded times originally in seconds, where multiples of 5 are
+  more frequent than expected under non-human measurement.  For a
+  better version of the eruption times, see the example below.
+  There are many versions of this dataset around: Azzalini and Bowman
+  (1990) use a more complete version.
+}
+\seealso{
+  \code{geyser} in package \pkg{MASS} for the Azzalini--Bowman version.
+}
+\examples{
+require(stats); require(graphics)
+f.tit <-  "faithful data: Eruptions of Old Faithful"
+ne60 <- round(e60 <- 60 * faithful$eruptions)
+all.equal(e60, ne60)             # relative diff. ~ 1/10000
+table(zapsmall(abs(e60 - ne60))) # 0, 0.02 or 0.04
+faithful$better.eruptions <- ne60 / 60
+te <- table(ne60)
+te[te >= 4]                      # (too) many multiples of 5 !
+plot(names(te), te, type="h", main = f.tit, xlab = "Eruption time (sec)")
+plot(faithful[, -3], main = f.tit,
+     xlab = "Eruption time (min)",
+     ylab = "Waiting time to next eruption (min)")
+lines(lowess(faithful$eruptions, faithful$waiting, f = 2/3, iter = 3),
+      col = "red")
+}
+\keyword{datasets}

data/data/r/man/freeny.Rd ADDED Viewed

@@ -0,0 +1,56 @@
+% File src/library/datasets/man/freeny.Rd
+% Part of the R package, http://www.R-project.org
+% Copyright 1995-2007 R Core Development Team
+% Distributed under GPL 2 or later
+\name{freeny}
+\docType{data}
+\alias{freeny}
+\alias{freeny.x}
+\alias{freeny.y}
+\title{Freeny's Revenue Data}
+\description{
+  Freeny's data on quarterly revenue and explanatory variables.
+}
+\usage{
+freeny
+freeny.x
+freeny.y
+}
+\format{
+  There are three \sQuote{freeny} data sets.
+  \code{freeny.y} is a time series with 39 observations on quarterly
+  revenue from (1962,2Q) to (1971,4Q).
+  \code{freeny.x} is a matrix of explanatory variables.  The columns
+  are \code{freeny.y} lagged 1 quarter, price index, income level, and
+  market potential.
+  Finally, \code{freeny} is a data frame with variables \code{y},
+  \code{lag.quarterly.revenue}, \code{price.index}, \code{income.level},
+  and \code{market.potential} obtained from the above two data objects.
+}
+\source{
+  A. E. Freeny (1977)
+  \emph{A Portable Linear Regression Package with Test Programs}.
+  Bell Laboratories memorandum.
+}
+\references{
+  Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988)
+  \emph{The New S Language}.
+  Wadsworth & Brooks/Cole.
+}
+\examples{
+require(stats); require(graphics)
+summary(freeny)
+pairs(freeny, main = "freeny data")
+# gives warning: freeny$y has class "ts"
+summary(fm1 <- lm(y ~ ., data = freeny))
+opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
+            mar = c(4.1, 4.1, 2.1, 1.1))
+plot(fm1)
+par(opar)
+}
+\keyword{datasets}

data/data/r/man/infert.Rd ADDED Viewed

@@ -0,0 +1,56 @@
+% File src/library/datasets/man/infert.Rd
+% Part of the R package, http://www.R-project.org
+% Copyright 1995-2007 R Core Development Team
+% Distributed under GPL 2 or later
+\name{infert}
+\docType{data}
+\alias{infert}
+\title{Infertility after Spontaneous and Induced Abortion}
+\description{
+  This is a matched case-control study dating from before the
+  availability of conditional logistic regression.
+}
+\usage{infert}
+\format{
+  \tabular{rll}{
+    1.  \tab Education  \tab 0 = 0-5  years \cr
+        \tab            \tab 1 = 6-11 years \cr
+        \tab            \tab 2 = 12+  years  \cr
+    2.  \tab age        \tab age in years of case \cr
+    3.  \tab parity     \tab count \cr
+    4.  \tab number of prior \tab 0 = 0 \cr
+        \tab induced abortions \tab 1 = 1 \cr
+        \tab            \tab 2 = 2 or more \cr
+    5.  \tab case status\tab 1 = case \cr
+        \tab            \tab 0 = control \cr
+    6.  \tab number of prior \tab 0 = 0 \cr
+        \tab spontaneous abortions \tab 1 = 1 \cr
+        \tab            \tab 2 = 2 or more \cr
+    7.  \tab matched set number \tab 1-83 \cr
+    8.  \tab stratum number \tab 1-63}
+}
+\source{
+  Trichopoulos et al. (1976)
+  \emph{Br. J. of Obst. and Gynaec.} \bold{83}, 645--650.
+}
+\note{
+  One case with two prior spontaneous abortions and two prior induced
+  abortions is omitted.
+}
+\examples{
+require(stats)
+model1 <- glm(case ~ spontaneous+induced, data=infert,family=binomial())
+summary(model1)
+## adjusted for other potential confounders:
+summary(model2 <- glm(case ~ age+parity+education+spontaneous+induced,
+                data=infert,family=binomial()))
+## Really should be analysed by conditional logistic regression
+## which is in the survival package
+if(require(survival)){
+  model3 <- clogit(case~spontaneous+induced+strata(stratum),data=infert)
+  print(summary(model3))
+  detach()# survival (conflicts)
+}
+}
+\keyword{datasets}

data/data/r/man/iris.Rd ADDED Viewed

@@ -0,0 +1,62 @@
+% File src/library/datasets/man/iris.Rd
+% Part of the R package, http://www.R-project.org
+% Copyright 1995-2007 R Core Development Team
+% Distributed under GPL 2 or later
+\name{iris}
+\docType{data}
+\alias{iris}
+\alias{iris3}
+\title{Edgar Anderson's Iris Data}
+\description{
+  This famous (Fisher's or Anderson's) iris data set gives the
+  measurements in centimeters of the variables sepal length and width
+  and petal length and width, respectively, for 50 flowers from each
+  of 3 species of iris.  The species are \emph{Iris setosa},
+  \emph{versicolor}, and \emph{virginica}.
+}
+\usage{
+iris
+iris3
+}
+\format{
+  \code{iris} is a data frame with 150 cases (rows) and 5 variables
+  (columns) named \code{Sepal.Length}, \code{Sepal.Width},
+  \code{Petal.Length}, \code{Petal.Width}, and \code{Species}.
+  \code{iris3} gives the same data arranged as a 3-dimensional array
+  of size 50 by 4 by 3, as represented by S-PLUS.  The first dimension
+  gives the case number within the species subsample, the second the
+  measurements with names \code{Sepal L.}, \code{Sepal W.},
+  \code{Petal L.}, and \code{Petal W.}, and the third the species.
+}
+\source{
+  Fisher, R. A. (1936)
+  The use of multiple measurements in taxonomic problems.
+  \emph{Annals of Eugenics},
+  \bold{7}, Part II, 179--188.
+  The data were collected by
+  Anderson, Edgar (1935).
+  The irises of the Gaspe Peninsula,
+  \emph{Bulletin of the American Iris Society},
+  \bold{59}, 2--5.
+}
+\references{
+  Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988)
+  \emph{The New S Language}.
+  Wadsworth & Brooks/Cole. (has \code{iris3} as \code{iris}.)
+}
+\seealso{
+  \code{\link{matplot}} some examples of which use
+  \code{iris}.
+}
+\examples{
+dni3 <- dimnames(iris3)
+ii <- data.frame(matrix(aperm(iris3, c(1,3,2)), ncol=4,
+                        dimnames = list(NULL, sub(" L.",".Length",
+                                        sub(" W.",".Width", dni3[[2]])))),
+    Species = gl(3, 50, labels=sub("S", "s", sub("V", "v", dni3[[3]]))))
+all.equal(ii, iris) # TRUE
+}
+\keyword{datasets}

data/data/r/man/islands.Rd ADDED Viewed

@@ -0,0 +1,29 @@
+% File src/library/datasets/man/islands.Rd
+% Part of the R package, http://www.R-project.org
+% Copyright 1995-2007 R Core Development Team
+% Distributed under GPL 2 or later
+\name{islands}
+\docType{data}
+\alias{islands}
+\title{Areas of the World's Major Landmasses}
+\description{
+  The areas in thousands of square miles of the landmasses which exceed
+  10,000 square miles.
+}
+\usage{islands}
+\format{A named vector of length 48.}
+\source{The World Almanac and Book of Facts, 1975, page 406.}
+\references{
+  McNeil, D. R. (1977)
+  \emph{Interactive Data Analysis}.
+  Wiley.
+}
+\examples{
+require(graphics)
+dotchart(log(islands, 10),
+   main = "islands data: log10(area) (log10(sq. miles))")
+dotchart(log(islands[order(islands)], 10),
+   main = "islands data: log10(area) (log10(sq. miles))")
+}
+\keyword{datasets}