sciruby 0.1.3 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/CHANGES +3 -0
- data/CONTRIBUTING.md +46 -0
- data/Gemfile +34 -0
- data/LICENSE.txt +21 -0
- data/README.rdoc +28 -0
- data/lib/sciruby/gems.rb +27 -0
- data/lib/sciruby/version.rb +3 -0
- data/lib/sciruby.rb +2 -77
- data/sciruby.gemspec +35 -0
- metadata +51 -413
- data/.autotest +0 -23
- data/.gemtest +0 -0
- data/History.txt +0 -6
- data/Manifest.txt +0 -119
- data/Rakefile +0 -178
- data/bin/sciruby-plotter +0 -12
- data/data/r/man/AirPassengers.Rd +0 -51
- data/data/r/man/BJsales.Rd +0 -34
- data/data/r/man/BOD.Rd +0 -53
- data/data/r/man/ChickWeight.Rd +0 -68
- data/data/r/man/DNase.Rd +0 -63
- data/data/r/man/EuStockMarkets.Rd +0 -28
- data/data/r/man/Formaldehyde.Rd +0 -44
- data/data/r/man/HairEyeColor.Rd +0 -77
- data/data/r/man/Harman23.cor.Rd +0 -25
- data/data/r/man/Harman74.cor.Rd +0 -28
- data/data/r/man/Indometh.Rd +0 -57
- data/data/r/man/InsectSprays.Rd +0 -45
- data/data/r/man/JohnsonJohnson.Rd +0 -37
- data/data/r/man/LakeHuron.Rd +0 -27
- data/data/r/man/LifeCycleSavings.Rd +0 -54
- data/data/r/man/Loblolly.Rd +0 -56
- data/data/r/man/Nile.Rd +0 -78
- data/data/r/man/Orange.Rd +0 -57
- data/data/r/man/OrchardSprays.Rd +0 -62
- data/data/r/man/PlantGrowth.Rd +0 -39
- data/data/r/man/Puromycin.Rd +0 -84
- data/data/r/man/Theoph.Rd +0 -84
- data/data/r/man/Titanic.Rd +0 -73
- data/data/r/man/ToothGrowth.Rd +0 -40
- data/data/r/man/UCBAdmissions.Rd +0 -68
- data/data/r/man/UKDriverDeaths.Rd +0 -72
- data/data/r/man/UKLungDeaths.Rd +0 -40
- data/data/r/man/UKgas.Rd +0 -25
- data/data/r/man/USAccDeaths.Rd +0 -23
- data/data/r/man/USArrests.Rd +0 -45
- data/data/r/man/USJudgeRatings.Rd +0 -38
- data/data/r/man/USPersonalExpenditure.Rd +0 -33
- data/data/r/man/VADeaths.Rd +0 -51
- data/data/r/man/WWWusage.Rd +0 -41
- data/data/r/man/WorldPhones.Rd +0 -40
- data/data/r/man/ability.cov.Rd +0 -50
- data/data/r/man/airmiles.Rd +0 -29
- data/data/r/man/airquality.Rd +0 -56
- data/data/r/man/anscombe.Rd +0 -62
- data/data/r/man/attenu.Rd +0 -66
- data/data/r/man/attitude.Rd +0 -48
- data/data/r/man/austres.Rd +0 -22
- data/data/r/man/beavers.Rd +0 -73
- data/data/r/man/cars.Rd +0 -59
- data/data/r/man/chickwts.Rd +0 -47
- data/data/r/man/co2.Rd +0 -43
- data/data/r/man/crimtab.Rd +0 -129
- data/data/r/man/datasets-package.Rd +0 -24
- data/data/r/man/discoveries.Rd +0 -30
- data/data/r/man/esoph.Rd +0 -66
- data/data/r/man/euro.Rd +0 -56
- data/data/r/man/eurodist.Rd +0 -25
- data/data/r/man/faithful.Rd +0 -63
- data/data/r/man/freeny.Rd +0 -56
- data/data/r/man/infert.Rd +0 -56
- data/data/r/man/iris.Rd +0 -62
- data/data/r/man/islands.Rd +0 -29
- data/data/r/man/lh.Rd +0 -22
- data/data/r/man/longley.Rd +0 -56
- data/data/r/man/lynx.Rd +0 -33
- data/data/r/man/morley.Rd +0 -50
- data/data/r/man/mtcars.Rd +0 -44
- data/data/r/man/nhtemp.Rd +0 -30
- data/data/r/man/nottem.Rd +0 -30
- data/data/r/man/occupationalStatus.Rd +0 -44
- data/data/r/man/precip.Rd +0 -31
- data/data/r/man/presidents.Rd +0 -36
- data/data/r/man/pressure.Rd +0 -41
- data/data/r/man/quakes.Rd +0 -40
- data/data/r/man/randu.Rd +0 -46
- data/data/r/man/rivers.Rd +0 -21
- data/data/r/man/rock.Rd +0 -34
- data/data/r/man/sleep.Rd +0 -51
- data/data/r/man/stackloss.Rd +0 -77
- data/data/r/man/state.Rd +0 -80
- data/data/r/man/sunspot.month.Rd +0 -49
- data/data/r/man/sunspot.year.Rd +0 -26
- data/data/r/man/sunspots.Rd +0 -33
- data/data/r/man/swiss.Rd +0 -79
- data/data/r/man/treering.Rd +0 -38
- data/data/r/man/trees.Rd +0 -48
- data/data/r/man/uspop.Rd +0 -27
- data/data/r/man/volcano.Rd +0 -31
- data/data/r/man/warpbreaks.Rd +0 -56
- data/data/r/man/women.Rd +0 -40
- data/data/r/man/zCO2.Rd +0 -81
- data/lib/ext/csv.rb +0 -22
- data/lib/ext/shoes.rb +0 -131
- data/lib/ext/string.rb +0 -39
- data/lib/sciruby/analysis/suite.rb +0 -87
- data/lib/sciruby/analysis/suite_report_builder.rb +0 -44
- data/lib/sciruby/analysis.rb +0 -98
- data/lib/sciruby/config.rb +0 -93
- data/lib/sciruby/data/guardian.rb +0 -96
- data/lib/sciruby/data/r/base.rb +0 -110
- data/lib/sciruby/data/r/data_frame.rb +0 -24
- data/lib/sciruby/data/r/grouped_data.rb +0 -7
- data/lib/sciruby/data/r/list.rb +0 -20
- data/lib/sciruby/data/r/multi_time_series.rb +0 -24
- data/lib/sciruby/data/r/r_matrix.rb +0 -7
- data/lib/sciruby/data/r/time_series.rb +0 -19
- data/lib/sciruby/data/r/time_series_base.rb +0 -40
- data/lib/sciruby/data/r/vector.rb +0 -125
- data/lib/sciruby/data/r.rb +0 -155
- data/lib/sciruby/data.rb +0 -168
- data/lib/sciruby/editor.rb +0 -82
- data/lib/sciruby/plotter.rb +0 -128
- data/lib/sciruby/recommend.rb +0 -70
- data/lib/sciruby/validation.rb +0 -368
- data/readme.md +0 -75
- data/static/sciruby-icon.png +0 -0
- data/test/helpers_tests.rb +0 -58
- data/test/test_recommend.rb +0 -16
data/data/r/man/airquality.Rd
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
% File src/library/datasets/man/airquality.Rd
|
2
|
-
% Part of the R package, http://www.R-project.org
|
3
|
-
% Copyright 1995-2007 R Core Development Team
|
4
|
-
% Distributed under GPL 2 or later
|
5
|
-
|
6
|
-
\name{airquality}
|
7
|
-
\docType{data}
|
8
|
-
\alias{airquality}
|
9
|
-
\title{New York Air Quality Measurements}
|
10
|
-
\description{
|
11
|
-
Daily air quality measurements in New York, May to September 1973.
|
12
|
-
}
|
13
|
-
\usage{airquality}
|
14
|
-
\format{
|
15
|
-
A data frame with 154 observations on 6 variables.
|
16
|
-
|
17
|
-
\tabular{rlll}{
|
18
|
-
\code{[,1]} \tab \code{Ozone} \tab numeric \tab Ozone (ppb)\cr
|
19
|
-
\code{[,2]} \tab \code{Solar.R} \tab numeric \tab Solar R (lang)\cr
|
20
|
-
\code{[,3]} \tab \code{Wind} \tab numeric \tab Wind (mph)\cr
|
21
|
-
\code{[,4]} \tab \code{Temp} \tab numeric \tab Temperature (degrees F)\cr
|
22
|
-
\code{[,5]} \tab \code{Month} \tab numeric \tab Month (1--12)\cr
|
23
|
-
\code{[,6]} \tab \code{Day} \tab numeric \tab Day of month (1--31)
|
24
|
-
}
|
25
|
-
}
|
26
|
-
\details{
|
27
|
-
Daily readings of the following air quality values for May 1, 1973 (a
|
28
|
-
Tuesday) to September 30, 1973.
|
29
|
-
|
30
|
-
\itemize{
|
31
|
-
\item \code{Ozone}: Mean ozone in parts per
|
32
|
-
billion from 1300 to 1500 hours at Roosevelt Island
|
33
|
-
\item \code{Solar.R}: Solar radiation
|
34
|
-
in Langleys in the frequency band 4000--7700 Angstroms from
|
35
|
-
0800 to 1200 hours at Central Park
|
36
|
-
\item \code{Wind}: Average wind speed in miles
|
37
|
-
per hour at 0700 and 1000 hours at LaGuardia Airport
|
38
|
-
\item \code{Temp}: Maximum daily
|
39
|
-
temperature in degrees Fahrenheit at La Guardia Airport.
|
40
|
-
}
|
41
|
-
}
|
42
|
-
\source{
|
43
|
-
The data were obtained from the New York State Department of
|
44
|
-
Conservation (ozone data) and the National Weather Service
|
45
|
-
(meteorological data).
|
46
|
-
}
|
47
|
-
\references{
|
48
|
-
Chambers, J. M., Cleveland, W. S., Kleiner, B. and Tukey, P. A. (1983)
|
49
|
-
\emph{Graphical Methods for Data Analysis}.
|
50
|
-
Belmont, CA: Wadsworth.
|
51
|
-
}
|
52
|
-
\examples{
|
53
|
-
require(graphics)
|
54
|
-
pairs(airquality, panel = panel.smooth, main = "airquality data")
|
55
|
-
}
|
56
|
-
\keyword{datasets}
|
data/data/r/man/anscombe.Rd
DELETED
@@ -1,62 +0,0 @@
|
|
1
|
-
% File src/library/datasets/man/anscombe.Rd
|
2
|
-
% Part of the R package, http://www.R-project.org
|
3
|
-
% Copyright 1995-2011 R Core Development Team
|
4
|
-
% Distributed under GPL 2 or later
|
5
|
-
|
6
|
-
\name{anscombe}
|
7
|
-
\docType{data}
|
8
|
-
\alias{anscombe}
|
9
|
-
\title{Anscombe's Quartet of \sQuote{Identical} Simple Linear Regressions}
|
10
|
-
\description{
|
11
|
-
Four \eqn{x}-\eqn{y} datasets which have the same traditional
|
12
|
-
statistical properties (mean, variance, correlation, regression line,
|
13
|
-
etc.), yet are quite different.
|
14
|
-
}
|
15
|
-
\usage{anscombe}
|
16
|
-
\format{
|
17
|
-
A data frame with 11 observations on 8 variables.
|
18
|
-
\tabular{rl}{
|
19
|
-
x1 == x2 == x3 \tab the integers 4:14, specially arranged \cr
|
20
|
-
x4 \tab values 8 and 19 \cr
|
21
|
-
y1, y2, y3, y4 \tab numbers in (3, 12.5) with mean 7.5 and sdev 2.03}
|
22
|
-
}
|
23
|
-
\source{
|
24
|
-
Tufte, Edward R. (1989)
|
25
|
-
\emph{The Visual Display of Quantitative Information}, 13--14.
|
26
|
-
Graphics Press.
|
27
|
-
}
|
28
|
-
\references{
|
29
|
-
Anscombe, Francis J. (1973) Graphs in statistical analysis.
|
30
|
-
\emph{American Statistician}, \bold{27}, 17--21.
|
31
|
-
}
|
32
|
-
\examples{
|
33
|
-
require(stats); require(graphics)
|
34
|
-
summary(anscombe)
|
35
|
-
|
36
|
-
##-- now some "magic" to do the 4 regressions in a loop:
|
37
|
-
ff <- y ~ x
|
38
|
-
for(i in 1:4) {
|
39
|
-
ff[2:3] <- lapply(paste(c("y","x"), i, sep=""), as.name)
|
40
|
-
## or ff[[2]] <- as.name(paste("y", i, sep=""))
|
41
|
-
## ff[[3]] <- as.name(paste("x", i, sep=""))
|
42
|
-
assign(paste("lm.",i,sep=""), lmi <- lm(ff, data= anscombe))
|
43
|
-
print(anova(lmi))
|
44
|
-
}
|
45
|
-
|
46
|
-
## See how close they are (numerically!)
|
47
|
-
sapply(objects(pattern="lm\\\\.[1-4]$"), function(n) coef(get(n)))
|
48
|
-
lapply(objects(pattern="lm\\\\.[1-4]$"),
|
49
|
-
function(n) coef(summary(get(n))))
|
50
|
-
|
51
|
-
## Now, do what you should have done in the first place: PLOTS
|
52
|
-
op <- par(mfrow=c(2,2), mar=.1+c(4,4,1,1), oma= c(0,0,2,0))
|
53
|
-
for(i in 1:4) {
|
54
|
-
ff[2:3] <- lapply(paste(c("y","x"), i, sep=""), as.name)
|
55
|
-
plot(ff, data =anscombe, col="red", pch=21, bg = "orange", cex = 1.2,
|
56
|
-
xlim=c(3,19), ylim=c(3,13))
|
57
|
-
abline(get(paste("lm.",i,sep="")), col="blue")
|
58
|
-
}
|
59
|
-
mtext("Anscombe's 4 Regression data sets", outer = TRUE, cex=1.5)
|
60
|
-
par(op)
|
61
|
-
}
|
62
|
-
\keyword{datasets}
|
data/data/r/man/attenu.Rd
DELETED
@@ -1,66 +0,0 @@
|
|
1
|
-
% File src/library/datasets/man/attenu.Rd
|
2
|
-
% Part of the R package, http://www.R-project.org
|
3
|
-
% Copyright 1995-2007 R Core Development Team
|
4
|
-
% Distributed under GPL 2 or later
|
5
|
-
|
6
|
-
\name{attenu}
|
7
|
-
\docType{data}
|
8
|
-
\alias{attenu}
|
9
|
-
\title{The Joyner--Boore Attenuation Data}
|
10
|
-
\description{
|
11
|
-
This data gives peak accelerations measured at various observation
|
12
|
-
stations for 23 earthquakes in California. The data have been used
|
13
|
-
by various workers to estimate the attenuating affect of distance
|
14
|
-
on ground acceleration.
|
15
|
-
}
|
16
|
-
\usage{attenu}
|
17
|
-
\format{
|
18
|
-
A data frame with 182 observations on 5 variables.
|
19
|
-
\tabular{rlll}{
|
20
|
-
[,1] \tab event \tab numeric \tab Event Number\cr
|
21
|
-
[,2] \tab mag \tab numeric \tab Moment Magnitude\cr
|
22
|
-
[,3] \tab station \tab factor \tab Station Number\cr
|
23
|
-
[,4] \tab dist \tab numeric \tab Station-hypocenter distance (km)\cr
|
24
|
-
[,5] \tab accel \tab numeric \tab Peak acceleration (g)}
|
25
|
-
}
|
26
|
-
\source{
|
27
|
-
Joyner, W.B., D.M. Boore and R.D. Porcella (1981). Peak horizontal
|
28
|
-
acceleration and velocity from strong-motion records including
|
29
|
-
records from the 1979 Imperial Valley, California earthquake. USGS
|
30
|
-
Open File report 81-365. Menlo Park, Ca.
|
31
|
-
}
|
32
|
-
\references{
|
33
|
-
Boore, D. M. and Joyner, W.B.(1982)
|
34
|
-
The empirical prediction of ground motion,
|
35
|
-
\emph{Bull. Seism. Soc. Am.}, \bold{72}, S269--S268.
|
36
|
-
|
37
|
-
Bolt, B. A. and Abrahamson, N. A. (1982)
|
38
|
-
New attenuation relations for peak and expected accelerations of
|
39
|
-
strong ground motion,
|
40
|
-
\emph{Bull. Seism. Soc. Am.}, \bold{72}, 2307--2321.
|
41
|
-
|
42
|
-
Bolt B. A. and Abrahamson, N. A. (1983)
|
43
|
-
Reply to W. B. Joyner & D. M. Boore's \dQuote{Comments on: New
|
44
|
-
attenuation relations for peak and expected accelerations for peak
|
45
|
-
and expected accelerations of strong ground motion},
|
46
|
-
\emph{Bull. Seism. Soc. Am.}, \bold{73}, 1481--1483.
|
47
|
-
|
48
|
-
Brillinger, D. R. and Preisler, H. K. (1984)
|
49
|
-
An exploratory analysis of the Joyner-Boore attenuation data,
|
50
|
-
\emph{Bull. Seism. Soc. Am.}, \bold{74}, 1441--1449.
|
51
|
-
|
52
|
-
Brillinger, D. R. and Preisler, H. K. (1984)
|
53
|
-
\emph{Further analysis of the Joyner-Boore attenuation data}.
|
54
|
-
Manuscript.
|
55
|
-
}
|
56
|
-
\examples{
|
57
|
-
require(graphics)
|
58
|
-
## check the data class of the variables
|
59
|
-
sapply(attenu, data.class)
|
60
|
-
summary(attenu)
|
61
|
-
pairs(attenu, main = "attenu data")
|
62
|
-
coplot(accel ~ dist | as.factor(event), data = attenu, show.given = FALSE)
|
63
|
-
coplot(log(accel) ~ log(dist) | as.factor(event),
|
64
|
-
data = attenu, panel = panel.smooth, show.given = FALSE)
|
65
|
-
}
|
66
|
-
\keyword{datasets}
|
data/data/r/man/attitude.Rd
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
% File src/library/datasets/man/attitude.Rd
|
2
|
-
% Part of the R package, http://www.R-project.org
|
3
|
-
% Copyright 1995-2007 R Core Development Team
|
4
|
-
% Distributed under GPL 2 or later
|
5
|
-
|
6
|
-
\name{attitude}
|
7
|
-
\docType{data}
|
8
|
-
\alias{attitude}
|
9
|
-
\title{The Chatterjee--Price Attitude Data}
|
10
|
-
\description{
|
11
|
-
From a survey of the clerical employees of a large financial
|
12
|
-
organization, the data are aggregated from the questionnaires of the
|
13
|
-
approximately 35 employees for each of 30 (randomly selected)
|
14
|
-
departments. The numbers give the percent proportion of favourable
|
15
|
-
responses to seven questions in each department.}
|
16
|
-
\usage{attitude}
|
17
|
-
\format{
|
18
|
-
A dataframe with 30 observations on 7 variables. The first column are
|
19
|
-
the short names from the reference, the second one the variable names
|
20
|
-
in the data frame:
|
21
|
-
\tabular{rlll}{
|
22
|
-
Y \tab rating \tab numeric \tab Overall rating \cr
|
23
|
-
X[1] \tab complaints\tab numeric \tab Handling of employee complaints \cr
|
24
|
-
X[2] \tab privileges\tab numeric \tab Does not allow special privileges \cr
|
25
|
-
X[3] \tab learning \tab numeric \tab Opportunity to learn \cr
|
26
|
-
X[4] \tab raises \tab numeric \tab Raises based on performance \cr
|
27
|
-
X[5] \tab critical \tab numeric \tab Too critical \cr
|
28
|
-
X[6] \tab advancel \tab numeric \tab Advancement}
|
29
|
-
}
|
30
|
-
\source{
|
31
|
-
Chatterjee, S. and Price, B. (1977)
|
32
|
-
\emph{Regression Analysis by Example}.
|
33
|
-
New York: Wiley.
|
34
|
-
(Section 3.7, p.68ff of 2nd ed.(1991).)
|
35
|
-
}
|
36
|
-
\examples{
|
37
|
-
require(stats); require(graphics)
|
38
|
-
pairs(attitude, main = "attitude data")
|
39
|
-
summary(attitude)
|
40
|
-
summary(fm1 <- lm(rating ~ ., data = attitude))
|
41
|
-
opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
|
42
|
-
mar = c(4.1, 4.1, 2.1, 1.1))
|
43
|
-
plot(fm1)
|
44
|
-
summary(fm2 <- lm(rating ~ complaints, data = attitude))
|
45
|
-
plot(fm2)
|
46
|
-
par(opar)
|
47
|
-
}
|
48
|
-
\keyword{datasets}
|
data/data/r/man/austres.Rd
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
% File src/library/datasets/man/austres.Rd
|
2
|
-
% Part of the R package, http://www.R-project.org
|
3
|
-
% Copyright 1994-9 W. N. Venables and B. D. Ripley
|
4
|
-
% Distributed under GPL 2 or later
|
5
|
-
|
6
|
-
\name{austres}
|
7
|
-
\docType{data}
|
8
|
-
\alias{austres}
|
9
|
-
\title{
|
10
|
-
Quarterly Time Series of the Number of Australian Residents
|
11
|
-
}
|
12
|
-
\description{
|
13
|
-
Numbers (in thousands) of Australian residents measured quarterly from
|
14
|
-
March 1971 to March 1994. The object is of class \code{"ts"}.
|
15
|
-
}
|
16
|
-
\usage{austres}
|
17
|
-
\source{
|
18
|
-
P. J. Brockwell and R. A. Davis (1996)
|
19
|
-
\emph{Introduction to Time Series and Forecasting.}
|
20
|
-
Springer
|
21
|
-
}
|
22
|
-
\keyword{datasets}
|
data/data/r/man/beavers.Rd
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
% File src/library/datasets/man/beavers.Rd
|
2
|
-
% Part of the R package, http://www.R-project.org
|
3
|
-
% Copyright 1994-9 W. N. Venables and B. D. Ripley
|
4
|
-
% Distributed under GPL 2 or later
|
5
|
-
|
6
|
-
\name{beavers}
|
7
|
-
\docType{data}
|
8
|
-
\alias{beavers}
|
9
|
-
\alias{beaver1}
|
10
|
-
\alias{beaver2}
|
11
|
-
\title{Body Temperature Series of Two Beavers}
|
12
|
-
\usage{
|
13
|
-
beaver1
|
14
|
-
beaver2
|
15
|
-
}
|
16
|
-
\description{
|
17
|
-
Reynolds (1994) describes a small part of a study of the long-term
|
18
|
-
temperature dynamics of beaver \emph{Castor canadensis} in
|
19
|
-
north-central Wisconsin. Body temperature was measured by telemetry
|
20
|
-
every 10 minutes for four females, but data from a one period of
|
21
|
-
less than a day for each of two animals is used there.
|
22
|
-
}
|
23
|
-
\format{
|
24
|
-
The \code{beaver1} data frame has 114 rows and 4 columns on body
|
25
|
-
temperature measurements at 10 minute intervals.
|
26
|
-
|
27
|
-
The \code{beaver2} data frame has 100 rows and 4 columns on body
|
28
|
-
temperature measurements at 10 minute intervals.
|
29
|
-
|
30
|
-
The variables are as follows:
|
31
|
-
\describe{
|
32
|
-
\item{day}{Day of observation (in days since the beginning of
|
33
|
-
1990), December 12--13 (\code{beaver1}) and November 3--4
|
34
|
-
(\code{beaver2}).}
|
35
|
-
\item{time}{Time of observation, in the form \code{0330} for
|
36
|
-
3:30am}
|
37
|
-
\item{temp}{Measured body temperature in degrees Celsius.}
|
38
|
-
\item{activ}{Indicator of activity outside the retreat.}
|
39
|
-
}
|
40
|
-
}
|
41
|
-
\note{
|
42
|
-
The observation at 22:20 is missing in \code{beaver1}.
|
43
|
-
}
|
44
|
-
\source{
|
45
|
-
P. S. Reynolds (1994) Time-series analyses of beaver body
|
46
|
-
temperatures. Chapter 11 of Lange, N., Ryan, L., Billard, L.,
|
47
|
-
Brillinger, D., Conquest, L. and Greenhouse, J. eds (1994)
|
48
|
-
\emph{Case Studies in Biometry.}
|
49
|
-
New York: John Wiley and Sons.
|
50
|
-
}
|
51
|
-
%% consider converting times to POSIXct (using 'yday' ?)
|
52
|
-
\examples{
|
53
|
-
require(graphics)
|
54
|
-
(yl <- range(beaver1$temp, beaver2$temp))
|
55
|
-
|
56
|
-
beaver.plot <- function(bdat, ...) {
|
57
|
-
nam <- deparse(substitute(bdat))
|
58
|
-
with(bdat, {
|
59
|
-
# Hours since start of day:
|
60
|
-
hours <- time \%/\% 100 + 24*(day - day[1]) + (time \%\% 100)/60
|
61
|
-
plot (hours, temp, type = "l", ...,
|
62
|
-
main = paste(nam, "body temperature"))
|
63
|
-
abline(h = 37.5, col = "gray", lty = 2)
|
64
|
-
is.act <- activ == 1
|
65
|
-
points(hours[is.act], temp[is.act], col = 2, cex = .8)
|
66
|
-
})
|
67
|
-
}
|
68
|
-
op <- par(mfrow = c(2,1), mar = c(3,3,4,2), mgp = .9* 2:0)
|
69
|
-
beaver.plot(beaver1, ylim = yl)
|
70
|
-
beaver.plot(beaver2, ylim = yl)
|
71
|
-
par(op)
|
72
|
-
}
|
73
|
-
\keyword{datasets}
|
data/data/r/man/cars.Rd
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
% File src/library/datasets/man/cars.Rd
|
2
|
-
% Part of the R package, http://www.R-project.org
|
3
|
-
% Copyright 1995-2007 R Core Development Team
|
4
|
-
% Distributed under GPL 2 or later
|
5
|
-
|
6
|
-
\name{cars}
|
7
|
-
\docType{data}
|
8
|
-
\alias{cars}
|
9
|
-
\title{Speed and Stopping Distances of Cars}
|
10
|
-
\description{
|
11
|
-
The data give the speed of cars and the distances taken to stop.
|
12
|
-
Note that the data were recorded in the 1920s.
|
13
|
-
}
|
14
|
-
\usage{cars}
|
15
|
-
\format{
|
16
|
-
A data frame with 50 observations on 2 variables.
|
17
|
-
\tabular{rlll}{
|
18
|
-
[,1] \tab speed \tab numeric \tab Speed (mph)\cr
|
19
|
-
[,2] \tab dist \tab numeric \tab Stopping distance (ft)
|
20
|
-
}
|
21
|
-
}
|
22
|
-
\source{
|
23
|
-
Ezekiel, M. (1930)
|
24
|
-
\emph{Methods of Correlation Analysis}.
|
25
|
-
Wiley.
|
26
|
-
}
|
27
|
-
\references{
|
28
|
-
McNeil, D. R. (1977)
|
29
|
-
\emph{Interactive Data Analysis}.
|
30
|
-
Wiley.
|
31
|
-
}
|
32
|
-
\examples{
|
33
|
-
require(stats); require(graphics)
|
34
|
-
plot(cars, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
|
35
|
-
las = 1)
|
36
|
-
lines(lowess(cars$speed, cars$dist, f = 2/3, iter = 3), col = "red")
|
37
|
-
title(main = "cars data")
|
38
|
-
plot(cars, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
|
39
|
-
las = 1, log = "xy")
|
40
|
-
title(main = "cars data (logarithmic scales)")
|
41
|
-
lines(lowess(cars$speed, cars$dist, f = 2/3, iter = 3), col = "red")
|
42
|
-
summary(fm1 <- lm(log(dist) ~ log(speed), data = cars))
|
43
|
-
opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
|
44
|
-
mar = c(4.1, 4.1, 2.1, 1.1))
|
45
|
-
plot(fm1)
|
46
|
-
par(opar)
|
47
|
-
|
48
|
-
## An example of polynomial regression
|
49
|
-
plot(cars, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
|
50
|
-
las = 1, xlim = c(0, 25))
|
51
|
-
d <- seq(0, 25, length.out = 200)
|
52
|
-
for(degree in 1:4) {
|
53
|
-
fm <- lm(dist ~ poly(speed, degree), data = cars)
|
54
|
-
assign(paste("cars", degree, sep="."), fm)
|
55
|
-
lines(d, predict(fm, data.frame(speed=d)), col = degree)
|
56
|
-
}
|
57
|
-
anova(cars.1, cars.2, cars.3, cars.4)
|
58
|
-
}
|
59
|
-
\keyword{datasets}
|
data/data/r/man/chickwts.Rd
DELETED
@@ -1,47 +0,0 @@
|
|
1
|
-
% File src/library/datasets/man/chickwts.Rd
|
2
|
-
% Part of the R package, http://www.R-project.org
|
3
|
-
% Copyright 1995-2007 R Core Development Team
|
4
|
-
% Distributed under GPL 2 or later
|
5
|
-
|
6
|
-
\name{chickwts}
|
7
|
-
\docType{data}
|
8
|
-
\alias{chickwts}
|
9
|
-
\title{Chicken Weights by Feed Type}
|
10
|
-
\description{
|
11
|
-
An experiment was conducted to measure and compare the effectiveness
|
12
|
-
of various feed supplements on the growth rate of chickens.
|
13
|
-
}
|
14
|
-
\usage{chickwts}
|
15
|
-
\format{
|
16
|
-
A data frame with 71 observations on 2 variables.
|
17
|
-
\describe{
|
18
|
-
\item{weight}{a numeric variable giving the chick weight.}
|
19
|
-
\item{feed}{a factor giving the feed type.}
|
20
|
-
}
|
21
|
-
}
|
22
|
-
\source{
|
23
|
-
Anonymous (1948)
|
24
|
-
\emph{Biometrika}, \bold{35}, 214.
|
25
|
-
}
|
26
|
-
\details{
|
27
|
-
Newly hatched chicks were randomly allocated into six groups, and each
|
28
|
-
group was given a different feed supplement. Their weights in grams
|
29
|
-
after six weeks are given along with feed types.
|
30
|
-
}
|
31
|
-
\references{
|
32
|
-
McNeil, D. R. (1977)
|
33
|
-
\emph{Interactive Data Analysis}.
|
34
|
-
New York: Wiley.
|
35
|
-
}
|
36
|
-
\examples{
|
37
|
-
require(stats); require(graphics)
|
38
|
-
boxplot(weight ~ feed, data = chickwts, col = "lightgray",
|
39
|
-
varwidth = TRUE, notch = TRUE, main = "chickwt data",
|
40
|
-
ylab = "Weight at six weeks (gm)")
|
41
|
-
anova(fm1 <- lm(weight ~ feed, data = chickwts))
|
42
|
-
opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
|
43
|
-
mar = c(4.1, 4.1, 2.1, 1.1))
|
44
|
-
plot(fm1)
|
45
|
-
par(opar)
|
46
|
-
}
|
47
|
-
\keyword{datasets}
|
data/data/r/man/co2.Rd
DELETED
@@ -1,43 +0,0 @@
|
|
1
|
-
% File src/library/datasets/man/co2.Rd
|
2
|
-
% Part of the R package, http://www.R-project.org
|
3
|
-
% Copyright 1995-2007 R Core Development Team
|
4
|
-
% Distributed under GPL 2 or later
|
5
|
-
|
6
|
-
\name{co2}
|
7
|
-
\docType{data}
|
8
|
-
\alias{co2}
|
9
|
-
\title{Mauna Loa Atmospheric CO2 Concentration}
|
10
|
-
\description{
|
11
|
-
Atmospheric concentrations of CO\eqn{_2}{2} are expressed in parts per
|
12
|
-
million (ppm) and reported in the preliminary 1997 SIO manometric mole
|
13
|
-
fraction scale.
|
14
|
-
}
|
15
|
-
\usage{co2}
|
16
|
-
\format{
|
17
|
-
A time series of 468 observations; monthly from 1959 to 1997.
|
18
|
-
}
|
19
|
-
\details{
|
20
|
-
The values for February, March and April of 1964 were missing and have
|
21
|
-
been obtained by interpolating linearly between the values for January
|
22
|
-
and May of 1964.
|
23
|
-
}
|
24
|
-
\source{
|
25
|
-
Keeling, C. D. and Whorf, T. P.,
|
26
|
-
Scripps Institution of Oceanography (SIO),
|
27
|
-
University of California,
|
28
|
-
La Jolla, California USA 92093-0220.
|
29
|
-
|
30
|
-
\url{ftp://cdiac.esd.ornl.gov/pub/maunaloa-co2/maunaloa.co2}.
|
31
|
-
}
|
32
|
-
\references{
|
33
|
-
Cleveland, W. S. (1993)
|
34
|
-
\emph{Visualizing Data}.
|
35
|
-
New Jersey: Summit Press.
|
36
|
-
}
|
37
|
-
\examples{
|
38
|
-
require(graphics)
|
39
|
-
plot(co2, ylab = expression("Atmospheric concentration of CO"[2]),
|
40
|
-
las = 1)
|
41
|
-
title(main = "co2 data set")
|
42
|
-
}
|
43
|
-
\keyword{datasets}
|
data/data/r/man/crimtab.Rd
DELETED
@@ -1,129 +0,0 @@
|
|
1
|
-
% File src/library/datasets/man/crimtab.Rd
|
2
|
-
% Part of the R package, http://www.R-project.org
|
3
|
-
% Copyright 1995-2007 R Core Development Team
|
4
|
-
% Distributed under GPL 2 or later
|
5
|
-
|
6
|
-
\name{crimtab}
|
7
|
-
\alias{crimtab}
|
8
|
-
\docType{data}
|
9
|
-
\encoding{UTF-8}
|
10
|
-
\title{Student's 3000 Criminals Data}
|
11
|
-
\description{
|
12
|
-
Data of 3000 male criminals over 20 years old undergoing their
|
13
|
-
sentences in the chief prisons of England and Wales.
|
14
|
-
}
|
15
|
-
\usage{crimtab}
|
16
|
-
\format{
|
17
|
-
A \code{\link{table}} object of \code{\link{integer}} counts, of dimension
|
18
|
-
\eqn{42 \times 22}{42 * 22} with a total count, \code{sum(crimtab)} of
|
19
|
-
3000.
|
20
|
-
|
21
|
-
The 42 \code{\link{rownames}} (\code{"9.4"}, \code{"9.5"}, \dots)
|
22
|
-
correspond to midpoints of intervals of finger lengths
|
23
|
-
whereas the 22 column names (\code{\link{colnames}})
|
24
|
-
(\code{"142.24"}, \code{"144.78"}, \dots) correspond to (body) heights
|
25
|
-
of 3000 criminals, see also below.
|
26
|
-
}
|
27
|
-
\details{
|
28
|
-
Student is the pseudonym of William Sealy Gosset.
|
29
|
-
In his 1908 paper he wrote (on page 13) at the beginning of section VI
|
30
|
-
entitled \emph{Practical Test of the forgoing Equations}:
|
31
|
-
|
32
|
-
\dQuote{Before I had succeeded in solving my problem analytically,
|
33
|
-
I had endeavoured to do so empirically. The material used was a
|
34
|
-
correlation table containing the height and left middle finger
|
35
|
-
measurements of 3000 criminals, from a paper by W. R. MacDonell
|
36
|
-
(\emph{Biometrika}, Vol. I., p. 219). The measurements were written
|
37
|
-
out on 3000 pieces of cardboard, which were then very thoroughly
|
38
|
-
shuffled and drawn at random. As each card was drawn its numbers
|
39
|
-
were written down in a book, which thus contains the measurements of
|
40
|
-
3000 criminals in a random order. Finally, each consecutive set of
|
41
|
-
4 was taken as a sample---750 in all---and the mean, standard
|
42
|
-
deviation, and correlation of each sample determined. The
|
43
|
-
difference between the mean of each sample and the mean of the
|
44
|
-
population was then divided by the standard deviation of the sample,
|
45
|
-
giving us the \emph{z} of Section III.}
|
46
|
-
|
47
|
-
The table is in fact page 216 and not page 219 in MacDonell(1902).
|
48
|
-
In the MacDonell table, the middle finger lengths were given in mm
|
49
|
-
and the heights in feet/inches intervals, they are both converted into
|
50
|
-
cm here. The midpoints of intervals were used, e.g., where MacDonell
|
51
|
-
has \eqn{4' 7''9/16 -- 8''9/16}, we have 142.24 which is 2.54*56 =
|
52
|
-
2.54*(\eqn{4' 8''}).
|
53
|
-
|
54
|
-
MacDonell credited the source of data (page 178) as follows:
|
55
|
-
\emph{The data on which the memoir is based were obtained, through the
|
56
|
-
kindness of Dr Garson, from the Central Metric Office, New Scotland Yard...}
|
57
|
-
He pointed out on page 179 that : \emph{The forms were drawn at random
|
58
|
-
from the mass on the office shelves; we are therefore dealing with a
|
59
|
-
random sampling.}
|
60
|
-
}
|
61
|
-
\source{
|
62
|
-
\url{http://pbil.univ-lyon1.fr/R/donnees/criminals1902.txt}
|
63
|
-
thanks to Jean R. Lobry and \enc{Anne-Béatrice}{Anne-Beatrice} Dufour.
|
64
|
-
}
|
65
|
-
\references{
|
66
|
-
Garson, J.G. (1900)
|
67
|
-
The metric system of identification of criminals, as used in in Great
|
68
|
-
Britain and Ireland.
|
69
|
-
\emph{The Journal of the Anthropological Institute of Great Britain
|
70
|
-
and Ireland} \bold{30}, 161--198.
|
71
|
-
|
72
|
-
MacDonell, W.R. (1902)
|
73
|
-
On criminal anthropometry and the identification of criminals.
|
74
|
-
\emph{Biometrika} \bold{1}, 2, 177--227.
|
75
|
-
|
76
|
-
Student (1908) The probable error of a mean.
|
77
|
-
\emph{Biometrika} \bold{6}, 1--25.
|
78
|
-
}
|
79
|
-
\examples{
|
80
|
-
require(stats)
|
81
|
-
dim(crimtab)
|
82
|
-
utils::str(crimtab)
|
83
|
-
## for nicer printing:
|
84
|
-
local({cT <- crimtab
|
85
|
-
colnames(cT) <- substring(colnames(cT), 2,3)
|
86
|
-
print(cT, zero.print = " ")
|
87
|
-
})
|
88
|
-
|
89
|
-
## Repeat Student's experiment:
|
90
|
-
|
91
|
-
# 1) Reconstitute 3000 raw data for heights in inches and rounded to
|
92
|
-
# nearest integer as in Student's paper:
|
93
|
-
|
94
|
-
(heIn <- round(as.numeric(colnames(crimtab)) / 2.54))
|
95
|
-
d.hei <- data.frame(height = rep(heIn, colSums(crimtab)))
|
96
|
-
|
97
|
-
# 2) shuffle the data:
|
98
|
-
|
99
|
-
set.seed(1)
|
100
|
-
d.hei <- d.hei[sample(1:3000), , drop = FALSE]
|
101
|
-
|
102
|
-
# 3) Make 750 samples each of size 4:
|
103
|
-
|
104
|
-
d.hei$sample <- as.factor(rep(1:750, each = 4))
|
105
|
-
|
106
|
-
# 4) Compute the means and standard deviations (n) for the 750 samples:
|
107
|
-
|
108
|
-
h.mean <- with(d.hei, tapply(height, sample, FUN = mean))
|
109
|
-
h.sd <- with(d.hei, tapply(height, sample, FUN = sd)) * sqrt(3/4)
|
110
|
-
|
111
|
-
# 5) Compute the difference between the mean of each sample and
|
112
|
-
# the mean of the population and then divide by the
|
113
|
-
# standard deviation of the sample:
|
114
|
-
|
115
|
-
zobs <- (h.mean - mean(d.hei[,"height"]))/h.sd
|
116
|
-
|
117
|
-
# 6) Replace infinite values by +/- 6 as in Student's paper:
|
118
|
-
|
119
|
-
zobs[infZ <- is.infinite(zobs)] # 3 of them
|
120
|
-
zobs[infZ] <- 6 * sign(zobs[infZ])
|
121
|
-
|
122
|
-
# 7) Plot the distribution:
|
123
|
-
|
124
|
-
require(grDevices); require(graphics)
|
125
|
-
hist(x = zobs, probability = TRUE, xlab = "Student's z",
|
126
|
-
col = grey(0.8), border = grey(0.5),
|
127
|
-
main = "Distribution of Student's z score for 'crimtab' data")
|
128
|
-
}
|
129
|
-
\keyword{datasets}
|
@@ -1,24 +0,0 @@
|
|
1
|
-
% File src/library/datasets/man/datasets-package.Rd
|
2
|
-
% Part of the R package, http://www.R-project.org
|
3
|
-
% Copyright 1995-2007 R Core Development Team
|
4
|
-
% Distributed under GPL 2 or later
|
5
|
-
|
6
|
-
\name{datasets-package}
|
7
|
-
\alias{datasets-package}
|
8
|
-
\alias{datasets}
|
9
|
-
\docType{package}
|
10
|
-
\title{
|
11
|
-
The R Datasets Package
|
12
|
-
}
|
13
|
-
\description{
|
14
|
-
Base R datasets
|
15
|
-
}
|
16
|
-
\details{This package contains a variety of datasets. For a complete
|
17
|
-
list, use \code{library(help="datasets")}.
|
18
|
-
}
|
19
|
-
\author{
|
20
|
-
R Development Core Team and contributors worldwide
|
21
|
-
|
22
|
-
Maintainer: R Core Team \email{R-core@r-project.org}
|
23
|
-
}
|
24
|
-
\keyword{ package }
|
data/data/r/man/discoveries.Rd
DELETED
@@ -1,30 +0,0 @@
|
|
1
|
-
% File src/library/datasets/man/discoveries.Rd
|
2
|
-
% Part of the R package, http://www.R-project.org
|
3
|
-
% Copyright 1995-2007 R Core Development Team
|
4
|
-
% Distributed under GPL 2 or later
|
5
|
-
|
6
|
-
\name{discoveries}
|
7
|
-
\docType{data}
|
8
|
-
\alias{discoveries}
|
9
|
-
\title{Yearly Numbers of Important Discoveries}
|
10
|
-
\description{
|
11
|
-
The numbers of \dQuote{great} inventions and scientific
|
12
|
-
discoveries in each year from 1860 to 1959.
|
13
|
-
}
|
14
|
-
\usage{discoveries}
|
15
|
-
\format{A time series of 100 values.}
|
16
|
-
\source{
|
17
|
-
The World Almanac and Book of Facts, 1975 Edition, pages 315--318.
|
18
|
-
}
|
19
|
-
\references{
|
20
|
-
McNeil, D. R. (1977)
|
21
|
-
\emph{Interactive Data Analysis}.
|
22
|
-
Wiley.
|
23
|
-
}
|
24
|
-
\examples{
|
25
|
-
require(graphics)
|
26
|
-
plot(discoveries, ylab = "Number of important discoveries",
|
27
|
-
las = 1)
|
28
|
-
title(main = "discoveries data set")
|
29
|
-
}
|
30
|
-
\keyword{datasets}
|