sciruby 0.1.3 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (131) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -0
  3. data/CHANGES +3 -0
  4. data/CONTRIBUTING.md +46 -0
  5. data/Gemfile +34 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.rdoc +28 -0
  8. data/lib/sciruby/gems.rb +27 -0
  9. data/lib/sciruby/version.rb +3 -0
  10. data/lib/sciruby.rb +2 -77
  11. data/sciruby.gemspec +35 -0
  12. metadata +51 -413
  13. data/.autotest +0 -23
  14. data/.gemtest +0 -0
  15. data/History.txt +0 -6
  16. data/Manifest.txt +0 -119
  17. data/Rakefile +0 -178
  18. data/bin/sciruby-plotter +0 -12
  19. data/data/r/man/AirPassengers.Rd +0 -51
  20. data/data/r/man/BJsales.Rd +0 -34
  21. data/data/r/man/BOD.Rd +0 -53
  22. data/data/r/man/ChickWeight.Rd +0 -68
  23. data/data/r/man/DNase.Rd +0 -63
  24. data/data/r/man/EuStockMarkets.Rd +0 -28
  25. data/data/r/man/Formaldehyde.Rd +0 -44
  26. data/data/r/man/HairEyeColor.Rd +0 -77
  27. data/data/r/man/Harman23.cor.Rd +0 -25
  28. data/data/r/man/Harman74.cor.Rd +0 -28
  29. data/data/r/man/Indometh.Rd +0 -57
  30. data/data/r/man/InsectSprays.Rd +0 -45
  31. data/data/r/man/JohnsonJohnson.Rd +0 -37
  32. data/data/r/man/LakeHuron.Rd +0 -27
  33. data/data/r/man/LifeCycleSavings.Rd +0 -54
  34. data/data/r/man/Loblolly.Rd +0 -56
  35. data/data/r/man/Nile.Rd +0 -78
  36. data/data/r/man/Orange.Rd +0 -57
  37. data/data/r/man/OrchardSprays.Rd +0 -62
  38. data/data/r/man/PlantGrowth.Rd +0 -39
  39. data/data/r/man/Puromycin.Rd +0 -84
  40. data/data/r/man/Theoph.Rd +0 -84
  41. data/data/r/man/Titanic.Rd +0 -73
  42. data/data/r/man/ToothGrowth.Rd +0 -40
  43. data/data/r/man/UCBAdmissions.Rd +0 -68
  44. data/data/r/man/UKDriverDeaths.Rd +0 -72
  45. data/data/r/man/UKLungDeaths.Rd +0 -40
  46. data/data/r/man/UKgas.Rd +0 -25
  47. data/data/r/man/USAccDeaths.Rd +0 -23
  48. data/data/r/man/USArrests.Rd +0 -45
  49. data/data/r/man/USJudgeRatings.Rd +0 -38
  50. data/data/r/man/USPersonalExpenditure.Rd +0 -33
  51. data/data/r/man/VADeaths.Rd +0 -51
  52. data/data/r/man/WWWusage.Rd +0 -41
  53. data/data/r/man/WorldPhones.Rd +0 -40
  54. data/data/r/man/ability.cov.Rd +0 -50
  55. data/data/r/man/airmiles.Rd +0 -29
  56. data/data/r/man/airquality.Rd +0 -56
  57. data/data/r/man/anscombe.Rd +0 -62
  58. data/data/r/man/attenu.Rd +0 -66
  59. data/data/r/man/attitude.Rd +0 -48
  60. data/data/r/man/austres.Rd +0 -22
  61. data/data/r/man/beavers.Rd +0 -73
  62. data/data/r/man/cars.Rd +0 -59
  63. data/data/r/man/chickwts.Rd +0 -47
  64. data/data/r/man/co2.Rd +0 -43
  65. data/data/r/man/crimtab.Rd +0 -129
  66. data/data/r/man/datasets-package.Rd +0 -24
  67. data/data/r/man/discoveries.Rd +0 -30
  68. data/data/r/man/esoph.Rd +0 -66
  69. data/data/r/man/euro.Rd +0 -56
  70. data/data/r/man/eurodist.Rd +0 -25
  71. data/data/r/man/faithful.Rd +0 -63
  72. data/data/r/man/freeny.Rd +0 -56
  73. data/data/r/man/infert.Rd +0 -56
  74. data/data/r/man/iris.Rd +0 -62
  75. data/data/r/man/islands.Rd +0 -29
  76. data/data/r/man/lh.Rd +0 -22
  77. data/data/r/man/longley.Rd +0 -56
  78. data/data/r/man/lynx.Rd +0 -33
  79. data/data/r/man/morley.Rd +0 -50
  80. data/data/r/man/mtcars.Rd +0 -44
  81. data/data/r/man/nhtemp.Rd +0 -30
  82. data/data/r/man/nottem.Rd +0 -30
  83. data/data/r/man/occupationalStatus.Rd +0 -44
  84. data/data/r/man/precip.Rd +0 -31
  85. data/data/r/man/presidents.Rd +0 -36
  86. data/data/r/man/pressure.Rd +0 -41
  87. data/data/r/man/quakes.Rd +0 -40
  88. data/data/r/man/randu.Rd +0 -46
  89. data/data/r/man/rivers.Rd +0 -21
  90. data/data/r/man/rock.Rd +0 -34
  91. data/data/r/man/sleep.Rd +0 -51
  92. data/data/r/man/stackloss.Rd +0 -77
  93. data/data/r/man/state.Rd +0 -80
  94. data/data/r/man/sunspot.month.Rd +0 -49
  95. data/data/r/man/sunspot.year.Rd +0 -26
  96. data/data/r/man/sunspots.Rd +0 -33
  97. data/data/r/man/swiss.Rd +0 -79
  98. data/data/r/man/treering.Rd +0 -38
  99. data/data/r/man/trees.Rd +0 -48
  100. data/data/r/man/uspop.Rd +0 -27
  101. data/data/r/man/volcano.Rd +0 -31
  102. data/data/r/man/warpbreaks.Rd +0 -56
  103. data/data/r/man/women.Rd +0 -40
  104. data/data/r/man/zCO2.Rd +0 -81
  105. data/lib/ext/csv.rb +0 -22
  106. data/lib/ext/shoes.rb +0 -131
  107. data/lib/ext/string.rb +0 -39
  108. data/lib/sciruby/analysis/suite.rb +0 -87
  109. data/lib/sciruby/analysis/suite_report_builder.rb +0 -44
  110. data/lib/sciruby/analysis.rb +0 -98
  111. data/lib/sciruby/config.rb +0 -93
  112. data/lib/sciruby/data/guardian.rb +0 -96
  113. data/lib/sciruby/data/r/base.rb +0 -110
  114. data/lib/sciruby/data/r/data_frame.rb +0 -24
  115. data/lib/sciruby/data/r/grouped_data.rb +0 -7
  116. data/lib/sciruby/data/r/list.rb +0 -20
  117. data/lib/sciruby/data/r/multi_time_series.rb +0 -24
  118. data/lib/sciruby/data/r/r_matrix.rb +0 -7
  119. data/lib/sciruby/data/r/time_series.rb +0 -19
  120. data/lib/sciruby/data/r/time_series_base.rb +0 -40
  121. data/lib/sciruby/data/r/vector.rb +0 -125
  122. data/lib/sciruby/data/r.rb +0 -155
  123. data/lib/sciruby/data.rb +0 -168
  124. data/lib/sciruby/editor.rb +0 -82
  125. data/lib/sciruby/plotter.rb +0 -128
  126. data/lib/sciruby/recommend.rb +0 -70
  127. data/lib/sciruby/validation.rb +0 -368
  128. data/readme.md +0 -75
  129. data/static/sciruby-icon.png +0 -0
  130. data/test/helpers_tests.rb +0 -58
  131. data/test/test_recommend.rb +0 -16
@@ -1,56 +0,0 @@
1
- % File src/library/datasets/man/airquality.Rd
2
- % Part of the R package, http://www.R-project.org
3
- % Copyright 1995-2007 R Core Development Team
4
- % Distributed under GPL 2 or later
5
-
6
- \name{airquality}
7
- \docType{data}
8
- \alias{airquality}
9
- \title{New York Air Quality Measurements}
10
- \description{
11
- Daily air quality measurements in New York, May to September 1973.
12
- }
13
- \usage{airquality}
14
- \format{
15
- A data frame with 154 observations on 6 variables.
16
-
17
- \tabular{rlll}{
18
- \code{[,1]} \tab \code{Ozone} \tab numeric \tab Ozone (ppb)\cr
19
- \code{[,2]} \tab \code{Solar.R} \tab numeric \tab Solar R (lang)\cr
20
- \code{[,3]} \tab \code{Wind} \tab numeric \tab Wind (mph)\cr
21
- \code{[,4]} \tab \code{Temp} \tab numeric \tab Temperature (degrees F)\cr
22
- \code{[,5]} \tab \code{Month} \tab numeric \tab Month (1--12)\cr
23
- \code{[,6]} \tab \code{Day} \tab numeric \tab Day of month (1--31)
24
- }
25
- }
26
- \details{
27
- Daily readings of the following air quality values for May 1, 1973 (a
28
- Tuesday) to September 30, 1973.
29
-
30
- \itemize{
31
- \item \code{Ozone}: Mean ozone in parts per
32
- billion from 1300 to 1500 hours at Roosevelt Island
33
- \item \code{Solar.R}: Solar radiation
34
- in Langleys in the frequency band 4000--7700 Angstroms from
35
- 0800 to 1200 hours at Central Park
36
- \item \code{Wind}: Average wind speed in miles
37
- per hour at 0700 and 1000 hours at LaGuardia Airport
38
- \item \code{Temp}: Maximum daily
39
- temperature in degrees Fahrenheit at La Guardia Airport.
40
- }
41
- }
42
- \source{
43
- The data were obtained from the New York State Department of
44
- Conservation (ozone data) and the National Weather Service
45
- (meteorological data).
46
- }
47
- \references{
48
- Chambers, J. M., Cleveland, W. S., Kleiner, B. and Tukey, P. A. (1983)
49
- \emph{Graphical Methods for Data Analysis}.
50
- Belmont, CA: Wadsworth.
51
- }
52
- \examples{
53
- require(graphics)
54
- pairs(airquality, panel = panel.smooth, main = "airquality data")
55
- }
56
- \keyword{datasets}
@@ -1,62 +0,0 @@
1
- % File src/library/datasets/man/anscombe.Rd
2
- % Part of the R package, http://www.R-project.org
3
- % Copyright 1995-2011 R Core Development Team
4
- % Distributed under GPL 2 or later
5
-
6
- \name{anscombe}
7
- \docType{data}
8
- \alias{anscombe}
9
- \title{Anscombe's Quartet of \sQuote{Identical} Simple Linear Regressions}
10
- \description{
11
- Four \eqn{x}-\eqn{y} datasets which have the same traditional
12
- statistical properties (mean, variance, correlation, regression line,
13
- etc.), yet are quite different.
14
- }
15
- \usage{anscombe}
16
- \format{
17
- A data frame with 11 observations on 8 variables.
18
- \tabular{rl}{
19
- x1 == x2 == x3 \tab the integers 4:14, specially arranged \cr
20
- x4 \tab values 8 and 19 \cr
21
- y1, y2, y3, y4 \tab numbers in (3, 12.5) with mean 7.5 and sdev 2.03}
22
- }
23
- \source{
24
- Tufte, Edward R. (1989)
25
- \emph{The Visual Display of Quantitative Information}, 13--14.
26
- Graphics Press.
27
- }
28
- \references{
29
- Anscombe, Francis J. (1973) Graphs in statistical analysis.
30
- \emph{American Statistician}, \bold{27}, 17--21.
31
- }
32
- \examples{
33
- require(stats); require(graphics)
34
- summary(anscombe)
35
-
36
- ##-- now some "magic" to do the 4 regressions in a loop:
37
- ff <- y ~ x
38
- for(i in 1:4) {
39
- ff[2:3] <- lapply(paste(c("y","x"), i, sep=""), as.name)
40
- ## or ff[[2]] <- as.name(paste("y", i, sep=""))
41
- ## ff[[3]] <- as.name(paste("x", i, sep=""))
42
- assign(paste("lm.",i,sep=""), lmi <- lm(ff, data= anscombe))
43
- print(anova(lmi))
44
- }
45
-
46
- ## See how close they are (numerically!)
47
- sapply(objects(pattern="lm\\\\.[1-4]$"), function(n) coef(get(n)))
48
- lapply(objects(pattern="lm\\\\.[1-4]$"),
49
- function(n) coef(summary(get(n))))
50
-
51
- ## Now, do what you should have done in the first place: PLOTS
52
- op <- par(mfrow=c(2,2), mar=.1+c(4,4,1,1), oma= c(0,0,2,0))
53
- for(i in 1:4) {
54
- ff[2:3] <- lapply(paste(c("y","x"), i, sep=""), as.name)
55
- plot(ff, data =anscombe, col="red", pch=21, bg = "orange", cex = 1.2,
56
- xlim=c(3,19), ylim=c(3,13))
57
- abline(get(paste("lm.",i,sep="")), col="blue")
58
- }
59
- mtext("Anscombe's 4 Regression data sets", outer = TRUE, cex=1.5)
60
- par(op)
61
- }
62
- \keyword{datasets}
data/data/r/man/attenu.Rd DELETED
@@ -1,66 +0,0 @@
1
- % File src/library/datasets/man/attenu.Rd
2
- % Part of the R package, http://www.R-project.org
3
- % Copyright 1995-2007 R Core Development Team
4
- % Distributed under GPL 2 or later
5
-
6
- \name{attenu}
7
- \docType{data}
8
- \alias{attenu}
9
- \title{The Joyner--Boore Attenuation Data}
10
- \description{
11
- This data gives peak accelerations measured at various observation
12
- stations for 23 earthquakes in California. The data have been used
13
- by various workers to estimate the attenuating affect of distance
14
- on ground acceleration.
15
- }
16
- \usage{attenu}
17
- \format{
18
- A data frame with 182 observations on 5 variables.
19
- \tabular{rlll}{
20
- [,1] \tab event \tab numeric \tab Event Number\cr
21
- [,2] \tab mag \tab numeric \tab Moment Magnitude\cr
22
- [,3] \tab station \tab factor \tab Station Number\cr
23
- [,4] \tab dist \tab numeric \tab Station-hypocenter distance (km)\cr
24
- [,5] \tab accel \tab numeric \tab Peak acceleration (g)}
25
- }
26
- \source{
27
- Joyner, W.B., D.M. Boore and R.D. Porcella (1981). Peak horizontal
28
- acceleration and velocity from strong-motion records including
29
- records from the 1979 Imperial Valley, California earthquake. USGS
30
- Open File report 81-365. Menlo Park, Ca.
31
- }
32
- \references{
33
- Boore, D. M. and Joyner, W.B.(1982)
34
- The empirical prediction of ground motion,
35
- \emph{Bull. Seism. Soc. Am.}, \bold{72}, S269--S268.
36
-
37
- Bolt, B. A. and Abrahamson, N. A. (1982)
38
- New attenuation relations for peak and expected accelerations of
39
- strong ground motion,
40
- \emph{Bull. Seism. Soc. Am.}, \bold{72}, 2307--2321.
41
-
42
- Bolt B. A. and Abrahamson, N. A. (1983)
43
- Reply to W. B. Joyner & D. M. Boore's \dQuote{Comments on: New
44
- attenuation relations for peak and expected accelerations for peak
45
- and expected accelerations of strong ground motion},
46
- \emph{Bull. Seism. Soc. Am.}, \bold{73}, 1481--1483.
47
-
48
- Brillinger, D. R. and Preisler, H. K. (1984)
49
- An exploratory analysis of the Joyner-Boore attenuation data,
50
- \emph{Bull. Seism. Soc. Am.}, \bold{74}, 1441--1449.
51
-
52
- Brillinger, D. R. and Preisler, H. K. (1984)
53
- \emph{Further analysis of the Joyner-Boore attenuation data}.
54
- Manuscript.
55
- }
56
- \examples{
57
- require(graphics)
58
- ## check the data class of the variables
59
- sapply(attenu, data.class)
60
- summary(attenu)
61
- pairs(attenu, main = "attenu data")
62
- coplot(accel ~ dist | as.factor(event), data = attenu, show.given = FALSE)
63
- coplot(log(accel) ~ log(dist) | as.factor(event),
64
- data = attenu, panel = panel.smooth, show.given = FALSE)
65
- }
66
- \keyword{datasets}
@@ -1,48 +0,0 @@
1
- % File src/library/datasets/man/attitude.Rd
2
- % Part of the R package, http://www.R-project.org
3
- % Copyright 1995-2007 R Core Development Team
4
- % Distributed under GPL 2 or later
5
-
6
- \name{attitude}
7
- \docType{data}
8
- \alias{attitude}
9
- \title{The Chatterjee--Price Attitude Data}
10
- \description{
11
- From a survey of the clerical employees of a large financial
12
- organization, the data are aggregated from the questionnaires of the
13
- approximately 35 employees for each of 30 (randomly selected)
14
- departments. The numbers give the percent proportion of favourable
15
- responses to seven questions in each department.}
16
- \usage{attitude}
17
- \format{
18
- A dataframe with 30 observations on 7 variables. The first column are
19
- the short names from the reference, the second one the variable names
20
- in the data frame:
21
- \tabular{rlll}{
22
- Y \tab rating \tab numeric \tab Overall rating \cr
23
- X[1] \tab complaints\tab numeric \tab Handling of employee complaints \cr
24
- X[2] \tab privileges\tab numeric \tab Does not allow special privileges \cr
25
- X[3] \tab learning \tab numeric \tab Opportunity to learn \cr
26
- X[4] \tab raises \tab numeric \tab Raises based on performance \cr
27
- X[5] \tab critical \tab numeric \tab Too critical \cr
28
- X[6] \tab advancel \tab numeric \tab Advancement}
29
- }
30
- \source{
31
- Chatterjee, S. and Price, B. (1977)
32
- \emph{Regression Analysis by Example}.
33
- New York: Wiley.
34
- (Section 3.7, p.68ff of 2nd ed.(1991).)
35
- }
36
- \examples{
37
- require(stats); require(graphics)
38
- pairs(attitude, main = "attitude data")
39
- summary(attitude)
40
- summary(fm1 <- lm(rating ~ ., data = attitude))
41
- opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
42
- mar = c(4.1, 4.1, 2.1, 1.1))
43
- plot(fm1)
44
- summary(fm2 <- lm(rating ~ complaints, data = attitude))
45
- plot(fm2)
46
- par(opar)
47
- }
48
- \keyword{datasets}
@@ -1,22 +0,0 @@
1
- % File src/library/datasets/man/austres.Rd
2
- % Part of the R package, http://www.R-project.org
3
- % Copyright 1994-9 W. N. Venables and B. D. Ripley
4
- % Distributed under GPL 2 or later
5
-
6
- \name{austres}
7
- \docType{data}
8
- \alias{austres}
9
- \title{
10
- Quarterly Time Series of the Number of Australian Residents
11
- }
12
- \description{
13
- Numbers (in thousands) of Australian residents measured quarterly from
14
- March 1971 to March 1994. The object is of class \code{"ts"}.
15
- }
16
- \usage{austres}
17
- \source{
18
- P. J. Brockwell and R. A. Davis (1996)
19
- \emph{Introduction to Time Series and Forecasting.}
20
- Springer
21
- }
22
- \keyword{datasets}
@@ -1,73 +0,0 @@
1
- % File src/library/datasets/man/beavers.Rd
2
- % Part of the R package, http://www.R-project.org
3
- % Copyright 1994-9 W. N. Venables and B. D. Ripley
4
- % Distributed under GPL 2 or later
5
-
6
- \name{beavers}
7
- \docType{data}
8
- \alias{beavers}
9
- \alias{beaver1}
10
- \alias{beaver2}
11
- \title{Body Temperature Series of Two Beavers}
12
- \usage{
13
- beaver1
14
- beaver2
15
- }
16
- \description{
17
- Reynolds (1994) describes a small part of a study of the long-term
18
- temperature dynamics of beaver \emph{Castor canadensis} in
19
- north-central Wisconsin. Body temperature was measured by telemetry
20
- every 10 minutes for four females, but data from a one period of
21
- less than a day for each of two animals is used there.
22
- }
23
- \format{
24
- The \code{beaver1} data frame has 114 rows and 4 columns on body
25
- temperature measurements at 10 minute intervals.
26
-
27
- The \code{beaver2} data frame has 100 rows and 4 columns on body
28
- temperature measurements at 10 minute intervals.
29
-
30
- The variables are as follows:
31
- \describe{
32
- \item{day}{Day of observation (in days since the beginning of
33
- 1990), December 12--13 (\code{beaver1}) and November 3--4
34
- (\code{beaver2}).}
35
- \item{time}{Time of observation, in the form \code{0330} for
36
- 3:30am}
37
- \item{temp}{Measured body temperature in degrees Celsius.}
38
- \item{activ}{Indicator of activity outside the retreat.}
39
- }
40
- }
41
- \note{
42
- The observation at 22:20 is missing in \code{beaver1}.
43
- }
44
- \source{
45
- P. S. Reynolds (1994) Time-series analyses of beaver body
46
- temperatures. Chapter 11 of Lange, N., Ryan, L., Billard, L.,
47
- Brillinger, D., Conquest, L. and Greenhouse, J. eds (1994)
48
- \emph{Case Studies in Biometry.}
49
- New York: John Wiley and Sons.
50
- }
51
- %% consider converting times to POSIXct (using 'yday' ?)
52
- \examples{
53
- require(graphics)
54
- (yl <- range(beaver1$temp, beaver2$temp))
55
-
56
- beaver.plot <- function(bdat, ...) {
57
- nam <- deparse(substitute(bdat))
58
- with(bdat, {
59
- # Hours since start of day:
60
- hours <- time \%/\% 100 + 24*(day - day[1]) + (time \%\% 100)/60
61
- plot (hours, temp, type = "l", ...,
62
- main = paste(nam, "body temperature"))
63
- abline(h = 37.5, col = "gray", lty = 2)
64
- is.act <- activ == 1
65
- points(hours[is.act], temp[is.act], col = 2, cex = .8)
66
- })
67
- }
68
- op <- par(mfrow = c(2,1), mar = c(3,3,4,2), mgp = .9* 2:0)
69
- beaver.plot(beaver1, ylim = yl)
70
- beaver.plot(beaver2, ylim = yl)
71
- par(op)
72
- }
73
- \keyword{datasets}
data/data/r/man/cars.Rd DELETED
@@ -1,59 +0,0 @@
1
- % File src/library/datasets/man/cars.Rd
2
- % Part of the R package, http://www.R-project.org
3
- % Copyright 1995-2007 R Core Development Team
4
- % Distributed under GPL 2 or later
5
-
6
- \name{cars}
7
- \docType{data}
8
- \alias{cars}
9
- \title{Speed and Stopping Distances of Cars}
10
- \description{
11
- The data give the speed of cars and the distances taken to stop.
12
- Note that the data were recorded in the 1920s.
13
- }
14
- \usage{cars}
15
- \format{
16
- A data frame with 50 observations on 2 variables.
17
- \tabular{rlll}{
18
- [,1] \tab speed \tab numeric \tab Speed (mph)\cr
19
- [,2] \tab dist \tab numeric \tab Stopping distance (ft)
20
- }
21
- }
22
- \source{
23
- Ezekiel, M. (1930)
24
- \emph{Methods of Correlation Analysis}.
25
- Wiley.
26
- }
27
- \references{
28
- McNeil, D. R. (1977)
29
- \emph{Interactive Data Analysis}.
30
- Wiley.
31
- }
32
- \examples{
33
- require(stats); require(graphics)
34
- plot(cars, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
35
- las = 1)
36
- lines(lowess(cars$speed, cars$dist, f = 2/3, iter = 3), col = "red")
37
- title(main = "cars data")
38
- plot(cars, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
39
- las = 1, log = "xy")
40
- title(main = "cars data (logarithmic scales)")
41
- lines(lowess(cars$speed, cars$dist, f = 2/3, iter = 3), col = "red")
42
- summary(fm1 <- lm(log(dist) ~ log(speed), data = cars))
43
- opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
44
- mar = c(4.1, 4.1, 2.1, 1.1))
45
- plot(fm1)
46
- par(opar)
47
-
48
- ## An example of polynomial regression
49
- plot(cars, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
50
- las = 1, xlim = c(0, 25))
51
- d <- seq(0, 25, length.out = 200)
52
- for(degree in 1:4) {
53
- fm <- lm(dist ~ poly(speed, degree), data = cars)
54
- assign(paste("cars", degree, sep="."), fm)
55
- lines(d, predict(fm, data.frame(speed=d)), col = degree)
56
- }
57
- anova(cars.1, cars.2, cars.3, cars.4)
58
- }
59
- \keyword{datasets}
@@ -1,47 +0,0 @@
1
- % File src/library/datasets/man/chickwts.Rd
2
- % Part of the R package, http://www.R-project.org
3
- % Copyright 1995-2007 R Core Development Team
4
- % Distributed under GPL 2 or later
5
-
6
- \name{chickwts}
7
- \docType{data}
8
- \alias{chickwts}
9
- \title{Chicken Weights by Feed Type}
10
- \description{
11
- An experiment was conducted to measure and compare the effectiveness
12
- of various feed supplements on the growth rate of chickens.
13
- }
14
- \usage{chickwts}
15
- \format{
16
- A data frame with 71 observations on 2 variables.
17
- \describe{
18
- \item{weight}{a numeric variable giving the chick weight.}
19
- \item{feed}{a factor giving the feed type.}
20
- }
21
- }
22
- \source{
23
- Anonymous (1948)
24
- \emph{Biometrika}, \bold{35}, 214.
25
- }
26
- \details{
27
- Newly hatched chicks were randomly allocated into six groups, and each
28
- group was given a different feed supplement. Their weights in grams
29
- after six weeks are given along with feed types.
30
- }
31
- \references{
32
- McNeil, D. R. (1977)
33
- \emph{Interactive Data Analysis}.
34
- New York: Wiley.
35
- }
36
- \examples{
37
- require(stats); require(graphics)
38
- boxplot(weight ~ feed, data = chickwts, col = "lightgray",
39
- varwidth = TRUE, notch = TRUE, main = "chickwt data",
40
- ylab = "Weight at six weeks (gm)")
41
- anova(fm1 <- lm(weight ~ feed, data = chickwts))
42
- opar <- par(mfrow = c(2, 2), oma = c(0, 0, 1.1, 0),
43
- mar = c(4.1, 4.1, 2.1, 1.1))
44
- plot(fm1)
45
- par(opar)
46
- }
47
- \keyword{datasets}
data/data/r/man/co2.Rd DELETED
@@ -1,43 +0,0 @@
1
- % File src/library/datasets/man/co2.Rd
2
- % Part of the R package, http://www.R-project.org
3
- % Copyright 1995-2007 R Core Development Team
4
- % Distributed under GPL 2 or later
5
-
6
- \name{co2}
7
- \docType{data}
8
- \alias{co2}
9
- \title{Mauna Loa Atmospheric CO2 Concentration}
10
- \description{
11
- Atmospheric concentrations of CO\eqn{_2}{2} are expressed in parts per
12
- million (ppm) and reported in the preliminary 1997 SIO manometric mole
13
- fraction scale.
14
- }
15
- \usage{co2}
16
- \format{
17
- A time series of 468 observations; monthly from 1959 to 1997.
18
- }
19
- \details{
20
- The values for February, March and April of 1964 were missing and have
21
- been obtained by interpolating linearly between the values for January
22
- and May of 1964.
23
- }
24
- \source{
25
- Keeling, C. D. and Whorf, T. P.,
26
- Scripps Institution of Oceanography (SIO),
27
- University of California,
28
- La Jolla, California USA 92093-0220.
29
-
30
- \url{ftp://cdiac.esd.ornl.gov/pub/maunaloa-co2/maunaloa.co2}.
31
- }
32
- \references{
33
- Cleveland, W. S. (1993)
34
- \emph{Visualizing Data}.
35
- New Jersey: Summit Press.
36
- }
37
- \examples{
38
- require(graphics)
39
- plot(co2, ylab = expression("Atmospheric concentration of CO"[2]),
40
- las = 1)
41
- title(main = "co2 data set")
42
- }
43
- \keyword{datasets}
@@ -1,129 +0,0 @@
1
- % File src/library/datasets/man/crimtab.Rd
2
- % Part of the R package, http://www.R-project.org
3
- % Copyright 1995-2007 R Core Development Team
4
- % Distributed under GPL 2 or later
5
-
6
- \name{crimtab}
7
- \alias{crimtab}
8
- \docType{data}
9
- \encoding{UTF-8}
10
- \title{Student's 3000 Criminals Data}
11
- \description{
12
- Data of 3000 male criminals over 20 years old undergoing their
13
- sentences in the chief prisons of England and Wales.
14
- }
15
- \usage{crimtab}
16
- \format{
17
- A \code{\link{table}} object of \code{\link{integer}} counts, of dimension
18
- \eqn{42 \times 22}{42 * 22} with a total count, \code{sum(crimtab)} of
19
- 3000.
20
-
21
- The 42 \code{\link{rownames}} (\code{"9.4"}, \code{"9.5"}, \dots)
22
- correspond to midpoints of intervals of finger lengths
23
- whereas the 22 column names (\code{\link{colnames}})
24
- (\code{"142.24"}, \code{"144.78"}, \dots) correspond to (body) heights
25
- of 3000 criminals, see also below.
26
- }
27
- \details{
28
- Student is the pseudonym of William Sealy Gosset.
29
- In his 1908 paper he wrote (on page 13) at the beginning of section VI
30
- entitled \emph{Practical Test of the forgoing Equations}:
31
-
32
- \dQuote{Before I had succeeded in solving my problem analytically,
33
- I had endeavoured to do so empirically. The material used was a
34
- correlation table containing the height and left middle finger
35
- measurements of 3000 criminals, from a paper by W. R. MacDonell
36
- (\emph{Biometrika}, Vol. I., p. 219). The measurements were written
37
- out on 3000 pieces of cardboard, which were then very thoroughly
38
- shuffled and drawn at random. As each card was drawn its numbers
39
- were written down in a book, which thus contains the measurements of
40
- 3000 criminals in a random order. Finally, each consecutive set of
41
- 4 was taken as a sample---750 in all---and the mean, standard
42
- deviation, and correlation of each sample determined. The
43
- difference between the mean of each sample and the mean of the
44
- population was then divided by the standard deviation of the sample,
45
- giving us the \emph{z} of Section III.}
46
-
47
- The table is in fact page 216 and not page 219 in MacDonell(1902).
48
- In the MacDonell table, the middle finger lengths were given in mm
49
- and the heights in feet/inches intervals, they are both converted into
50
- cm here. The midpoints of intervals were used, e.g., where MacDonell
51
- has \eqn{4' 7''9/16 -- 8''9/16}, we have 142.24 which is 2.54*56 =
52
- 2.54*(\eqn{4' 8''}).
53
-
54
- MacDonell credited the source of data (page 178) as follows:
55
- \emph{The data on which the memoir is based were obtained, through the
56
- kindness of Dr Garson, from the Central Metric Office, New Scotland Yard...}
57
- He pointed out on page 179 that : \emph{The forms were drawn at random
58
- from the mass on the office shelves; we are therefore dealing with a
59
- random sampling.}
60
- }
61
- \source{
62
- \url{http://pbil.univ-lyon1.fr/R/donnees/criminals1902.txt}
63
- thanks to Jean R. Lobry and \enc{Anne-Béatrice}{Anne-Beatrice} Dufour.
64
- }
65
- \references{
66
- Garson, J.G. (1900)
67
- The metric system of identification of criminals, as used in in Great
68
- Britain and Ireland.
69
- \emph{The Journal of the Anthropological Institute of Great Britain
70
- and Ireland} \bold{30}, 161--198.
71
-
72
- MacDonell, W.R. (1902)
73
- On criminal anthropometry and the identification of criminals.
74
- \emph{Biometrika} \bold{1}, 2, 177--227.
75
-
76
- Student (1908) The probable error of a mean.
77
- \emph{Biometrika} \bold{6}, 1--25.
78
- }
79
- \examples{
80
- require(stats)
81
- dim(crimtab)
82
- utils::str(crimtab)
83
- ## for nicer printing:
84
- local({cT <- crimtab
85
- colnames(cT) <- substring(colnames(cT), 2,3)
86
- print(cT, zero.print = " ")
87
- })
88
-
89
- ## Repeat Student's experiment:
90
-
91
- # 1) Reconstitute 3000 raw data for heights in inches and rounded to
92
- # nearest integer as in Student's paper:
93
-
94
- (heIn <- round(as.numeric(colnames(crimtab)) / 2.54))
95
- d.hei <- data.frame(height = rep(heIn, colSums(crimtab)))
96
-
97
- # 2) shuffle the data:
98
-
99
- set.seed(1)
100
- d.hei <- d.hei[sample(1:3000), , drop = FALSE]
101
-
102
- # 3) Make 750 samples each of size 4:
103
-
104
- d.hei$sample <- as.factor(rep(1:750, each = 4))
105
-
106
- # 4) Compute the means and standard deviations (n) for the 750 samples:
107
-
108
- h.mean <- with(d.hei, tapply(height, sample, FUN = mean))
109
- h.sd <- with(d.hei, tapply(height, sample, FUN = sd)) * sqrt(3/4)
110
-
111
- # 5) Compute the difference between the mean of each sample and
112
- # the mean of the population and then divide by the
113
- # standard deviation of the sample:
114
-
115
- zobs <- (h.mean - mean(d.hei[,"height"]))/h.sd
116
-
117
- # 6) Replace infinite values by +/- 6 as in Student's paper:
118
-
119
- zobs[infZ <- is.infinite(zobs)] # 3 of them
120
- zobs[infZ] <- 6 * sign(zobs[infZ])
121
-
122
- # 7) Plot the distribution:
123
-
124
- require(grDevices); require(graphics)
125
- hist(x = zobs, probability = TRUE, xlab = "Student's z",
126
- col = grey(0.8), border = grey(0.5),
127
- main = "Distribution of Student's z score for 'crimtab' data")
128
- }
129
- \keyword{datasets}
@@ -1,24 +0,0 @@
1
- % File src/library/datasets/man/datasets-package.Rd
2
- % Part of the R package, http://www.R-project.org
3
- % Copyright 1995-2007 R Core Development Team
4
- % Distributed under GPL 2 or later
5
-
6
- \name{datasets-package}
7
- \alias{datasets-package}
8
- \alias{datasets}
9
- \docType{package}
10
- \title{
11
- The R Datasets Package
12
- }
13
- \description{
14
- Base R datasets
15
- }
16
- \details{This package contains a variety of datasets. For a complete
17
- list, use \code{library(help="datasets")}.
18
- }
19
- \author{
20
- R Development Core Team and contributors worldwide
21
-
22
- Maintainer: R Core Team \email{R-core@r-project.org}
23
- }
24
- \keyword{ package }
@@ -1,30 +0,0 @@
1
- % File src/library/datasets/man/discoveries.Rd
2
- % Part of the R package, http://www.R-project.org
3
- % Copyright 1995-2007 R Core Development Team
4
- % Distributed under GPL 2 or later
5
-
6
- \name{discoveries}
7
- \docType{data}
8
- \alias{discoveries}
9
- \title{Yearly Numbers of Important Discoveries}
10
- \description{
11
- The numbers of \dQuote{great} inventions and scientific
12
- discoveries in each year from 1860 to 1959.
13
- }
14
- \usage{discoveries}
15
- \format{A time series of 100 values.}
16
- \source{
17
- The World Almanac and Book of Facts, 1975 Edition, pages 315--318.
18
- }
19
- \references{
20
- McNeil, D. R. (1977)
21
- \emph{Interactive Data Analysis}.
22
- Wiley.
23
- }
24
- \examples{
25
- require(graphics)
26
- plot(discoveries, ylab = "Number of important discoveries",
27
- las = 1)
28
- title(main = "discoveries data set")
29
- }
30
- \keyword{datasets}