sciruby 0.1.3 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/CHANGES +3 -0
- data/CONTRIBUTING.md +46 -0
- data/Gemfile +34 -0
- data/LICENSE.txt +21 -0
- data/README.rdoc +28 -0
- data/lib/sciruby/gems.rb +27 -0
- data/lib/sciruby/version.rb +3 -0
- data/lib/sciruby.rb +2 -77
- data/sciruby.gemspec +35 -0
- metadata +51 -413
- data/.autotest +0 -23
- data/.gemtest +0 -0
- data/History.txt +0 -6
- data/Manifest.txt +0 -119
- data/Rakefile +0 -178
- data/bin/sciruby-plotter +0 -12
- data/data/r/man/AirPassengers.Rd +0 -51
- data/data/r/man/BJsales.Rd +0 -34
- data/data/r/man/BOD.Rd +0 -53
- data/data/r/man/ChickWeight.Rd +0 -68
- data/data/r/man/DNase.Rd +0 -63
- data/data/r/man/EuStockMarkets.Rd +0 -28
- data/data/r/man/Formaldehyde.Rd +0 -44
- data/data/r/man/HairEyeColor.Rd +0 -77
- data/data/r/man/Harman23.cor.Rd +0 -25
- data/data/r/man/Harman74.cor.Rd +0 -28
- data/data/r/man/Indometh.Rd +0 -57
- data/data/r/man/InsectSprays.Rd +0 -45
- data/data/r/man/JohnsonJohnson.Rd +0 -37
- data/data/r/man/LakeHuron.Rd +0 -27
- data/data/r/man/LifeCycleSavings.Rd +0 -54
- data/data/r/man/Loblolly.Rd +0 -56
- data/data/r/man/Nile.Rd +0 -78
- data/data/r/man/Orange.Rd +0 -57
- data/data/r/man/OrchardSprays.Rd +0 -62
- data/data/r/man/PlantGrowth.Rd +0 -39
- data/data/r/man/Puromycin.Rd +0 -84
- data/data/r/man/Theoph.Rd +0 -84
- data/data/r/man/Titanic.Rd +0 -73
- data/data/r/man/ToothGrowth.Rd +0 -40
- data/data/r/man/UCBAdmissions.Rd +0 -68
- data/data/r/man/UKDriverDeaths.Rd +0 -72
- data/data/r/man/UKLungDeaths.Rd +0 -40
- data/data/r/man/UKgas.Rd +0 -25
- data/data/r/man/USAccDeaths.Rd +0 -23
- data/data/r/man/USArrests.Rd +0 -45
- data/data/r/man/USJudgeRatings.Rd +0 -38
- data/data/r/man/USPersonalExpenditure.Rd +0 -33
- data/data/r/man/VADeaths.Rd +0 -51
- data/data/r/man/WWWusage.Rd +0 -41
- data/data/r/man/WorldPhones.Rd +0 -40
- data/data/r/man/ability.cov.Rd +0 -50
- data/data/r/man/airmiles.Rd +0 -29
- data/data/r/man/airquality.Rd +0 -56
- data/data/r/man/anscombe.Rd +0 -62
- data/data/r/man/attenu.Rd +0 -66
- data/data/r/man/attitude.Rd +0 -48
- data/data/r/man/austres.Rd +0 -22
- data/data/r/man/beavers.Rd +0 -73
- data/data/r/man/cars.Rd +0 -59
- data/data/r/man/chickwts.Rd +0 -47
- data/data/r/man/co2.Rd +0 -43
- data/data/r/man/crimtab.Rd +0 -129
- data/data/r/man/datasets-package.Rd +0 -24
- data/data/r/man/discoveries.Rd +0 -30
- data/data/r/man/esoph.Rd +0 -66
- data/data/r/man/euro.Rd +0 -56
- data/data/r/man/eurodist.Rd +0 -25
- data/data/r/man/faithful.Rd +0 -63
- data/data/r/man/freeny.Rd +0 -56
- data/data/r/man/infert.Rd +0 -56
- data/data/r/man/iris.Rd +0 -62
- data/data/r/man/islands.Rd +0 -29
- data/data/r/man/lh.Rd +0 -22
- data/data/r/man/longley.Rd +0 -56
- data/data/r/man/lynx.Rd +0 -33
- data/data/r/man/morley.Rd +0 -50
- data/data/r/man/mtcars.Rd +0 -44
- data/data/r/man/nhtemp.Rd +0 -30
- data/data/r/man/nottem.Rd +0 -30
- data/data/r/man/occupationalStatus.Rd +0 -44
- data/data/r/man/precip.Rd +0 -31
- data/data/r/man/presidents.Rd +0 -36
- data/data/r/man/pressure.Rd +0 -41
- data/data/r/man/quakes.Rd +0 -40
- data/data/r/man/randu.Rd +0 -46
- data/data/r/man/rivers.Rd +0 -21
- data/data/r/man/rock.Rd +0 -34
- data/data/r/man/sleep.Rd +0 -51
- data/data/r/man/stackloss.Rd +0 -77
- data/data/r/man/state.Rd +0 -80
- data/data/r/man/sunspot.month.Rd +0 -49
- data/data/r/man/sunspot.year.Rd +0 -26
- data/data/r/man/sunspots.Rd +0 -33
- data/data/r/man/swiss.Rd +0 -79
- data/data/r/man/treering.Rd +0 -38
- data/data/r/man/trees.Rd +0 -48
- data/data/r/man/uspop.Rd +0 -27
- data/data/r/man/volcano.Rd +0 -31
- data/data/r/man/warpbreaks.Rd +0 -56
- data/data/r/man/women.Rd +0 -40
- data/data/r/man/zCO2.Rd +0 -81
- data/lib/ext/csv.rb +0 -22
- data/lib/ext/shoes.rb +0 -131
- data/lib/ext/string.rb +0 -39
- data/lib/sciruby/analysis/suite.rb +0 -87
- data/lib/sciruby/analysis/suite_report_builder.rb +0 -44
- data/lib/sciruby/analysis.rb +0 -98
- data/lib/sciruby/config.rb +0 -93
- data/lib/sciruby/data/guardian.rb +0 -96
- data/lib/sciruby/data/r/base.rb +0 -110
- data/lib/sciruby/data/r/data_frame.rb +0 -24
- data/lib/sciruby/data/r/grouped_data.rb +0 -7
- data/lib/sciruby/data/r/list.rb +0 -20
- data/lib/sciruby/data/r/multi_time_series.rb +0 -24
- data/lib/sciruby/data/r/r_matrix.rb +0 -7
- data/lib/sciruby/data/r/time_series.rb +0 -19
- data/lib/sciruby/data/r/time_series_base.rb +0 -40
- data/lib/sciruby/data/r/vector.rb +0 -125
- data/lib/sciruby/data/r.rb +0 -155
- data/lib/sciruby/data.rb +0 -168
- data/lib/sciruby/editor.rb +0 -82
- data/lib/sciruby/plotter.rb +0 -128
- data/lib/sciruby/recommend.rb +0 -70
- data/lib/sciruby/validation.rb +0 -368
- data/readme.md +0 -75
- data/static/sciruby-icon.png +0 -0
- data/test/helpers_tests.rb +0 -58
- data/test/test_recommend.rb +0 -16
@@ -1,44 +0,0 @@
|
|
1
|
-
module SciRuby
|
2
|
-
module Analysis
|
3
|
-
class SuiteReportBuilder < Suite
|
4
|
-
attr_accessor :rb
|
5
|
-
def initialize(opts=Hash.new,&block)
|
6
|
-
if !opts.is_a? Hash
|
7
|
-
opts={:name=>opts}
|
8
|
-
end
|
9
|
-
super(opts,&block)
|
10
|
-
@rb=opts[:rb] || ReportBuilder.new(:name=>name)
|
11
|
-
end
|
12
|
-
def generate(filename)
|
13
|
-
run if @block
|
14
|
-
@rb.save(filename)
|
15
|
-
end
|
16
|
-
def to_text
|
17
|
-
run if @block
|
18
|
-
@rb.to_text
|
19
|
-
end
|
20
|
-
def summary(o)
|
21
|
-
@rb.add(o)
|
22
|
-
end
|
23
|
-
def desc(d)
|
24
|
-
@rb.add(d)
|
25
|
-
end
|
26
|
-
def echo(*args)
|
27
|
-
args.each do |a|
|
28
|
-
@rb.add(a)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def boxplot(*args)
|
33
|
-
@rb.add(old_boxplot(*args))
|
34
|
-
end
|
35
|
-
def histogram(*args)
|
36
|
-
@rb.add(old_histogram(*args))
|
37
|
-
end
|
38
|
-
def boxplot(*args)
|
39
|
-
@rb.add(old_boxplot(*args))
|
40
|
-
end
|
41
|
-
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
data/lib/sciruby/analysis.rb
DELETED
@@ -1,98 +0,0 @@
|
|
1
|
-
require 'sciruby/analysis/suite'
|
2
|
-
require 'sciruby/analysis/suite_report_builder'
|
3
|
-
|
4
|
-
module SciRuby
|
5
|
-
# DSL to run a statistical analysis without hassle.
|
6
|
-
# * Shortcut methods to avoid having to use complete namespaces, many based on R.
|
7
|
-
# * Attach/detach vectors to workspace, as with R
|
8
|
-
# == Example
|
9
|
-
# an1 = Statsample::Analysis.store(:first) do
|
10
|
-
# # Load excel file with x,y,z vectors
|
11
|
-
# ds = excel('data.xls')
|
12
|
-
# # See variables on ds dataset
|
13
|
-
# names(ds)
|
14
|
-
# # Attach the vectors to workspace, like R
|
15
|
-
# attach(ds)
|
16
|
-
# # vector 'x' is attached to workspace like a method,
|
17
|
-
# # so you can use like any variable
|
18
|
-
# mean,sd = x.mean, x.sd
|
19
|
-
# # Shameless R robbery
|
20
|
-
# a = c( 1:10)
|
21
|
-
# b = c(21:30)
|
22
|
-
# summary(cor(ds)) # Call summary method on correlation matrix
|
23
|
-
# end
|
24
|
-
# # You can run the analysis by its name
|
25
|
-
# Statsample::Analysis.run(:first)
|
26
|
-
# # or using the returned variables
|
27
|
-
# an1.run
|
28
|
-
# # You can also generate a report using ReportBuilder.
|
29
|
-
# # .summary() method call 'report_building' on the object,
|
30
|
-
# # instead of calling text summary
|
31
|
-
# an1.generate("report.html")
|
32
|
-
module Analysis
|
33
|
-
@@stored_analyses={}
|
34
|
-
@@last_analysis=nil
|
35
|
-
def self.clear_analysis
|
36
|
-
@@stored_analyses.clear
|
37
|
-
end
|
38
|
-
def self.stored_analyses
|
39
|
-
@@stored_analyses
|
40
|
-
end
|
41
|
-
def self.last
|
42
|
-
@@stored_analyses[@@last_analysis]
|
43
|
-
end
|
44
|
-
def self.store(name, opts=Hash.new,&block)
|
45
|
-
raise "You should provide a block" if !block
|
46
|
-
@@last_analysis=name
|
47
|
-
opts={:name=>name}.merge(opts)
|
48
|
-
@@stored_analyses[name]=Suite.new(opts,&block)
|
49
|
-
end
|
50
|
-
# Run analysis +*args+
|
51
|
-
# Without arguments, run all stored analyses
|
52
|
-
# Only 'echo' will be printed to screen.
|
53
|
-
def self.run(*args)
|
54
|
-
args=stored_analyses.keys if args.size==0
|
55
|
-
raise "Analysis #{args} doesn't exists" if (args - stored_analyses.keys).size>0
|
56
|
-
args.each do |name|
|
57
|
-
stored_analyses[name].run
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
# Add analysis +*args+ to a ReportBuilder object.
|
62
|
-
# Without arguments, add all stored analyses.
|
63
|
-
# Each analysis is wrapped inside a ReportBuilder::Section object.
|
64
|
-
# This is the method used by +save+ and +to_text+.
|
65
|
-
def self.add_to_reportbuilder(rb, *args)
|
66
|
-
args=stored_analyses.keys if args.size==0
|
67
|
-
raise "Analysis #{name} doesn't exists" if (args - stored_analyses.keys).size>0
|
68
|
-
args.each do |name|
|
69
|
-
section=ReportBuilder::Section.new(:name=>stored_analyses[name].name)
|
70
|
-
rb_an=stored_analyses[name].add_to_reportbuilder(section)
|
71
|
-
rb.add(section)
|
72
|
-
rb_an.run
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
# Save the analysis to a file.
|
77
|
-
# Without arguments, adds all stored analyses.
|
78
|
-
def self.save(filename, *args)
|
79
|
-
rb=ReportBuilder.new(:name=>filename)
|
80
|
-
add_to_reportbuilder(rb, *args)
|
81
|
-
rb.save(filename)
|
82
|
-
end
|
83
|
-
|
84
|
-
# Run analysis and return as string.
|
85
|
-
# Only 'echo' will be printed to screen.
|
86
|
-
# Without arguments, add all stored analyses.
|
87
|
-
def self.to_text(*args)
|
88
|
-
rb=ReportBuilder.new(:name=>"Analysis #{Time.now}")
|
89
|
-
add_to_reportbuilder(rb, *args)
|
90
|
-
rb.to_text
|
91
|
-
end
|
92
|
-
|
93
|
-
# Run analysis and print to screen all echo and summary callings
|
94
|
-
def self.run_batch(*args)
|
95
|
-
puts to_text(*args)
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
data/lib/sciruby/config.rb
DELETED
@@ -1,93 +0,0 @@
|
|
1
|
-
module SciRuby
|
2
|
-
module Config
|
3
|
-
class << self
|
4
|
-
|
5
|
-
# Create a .sciruby directory if it doesn't exist (.sciruby) and chdir to it.
|
6
|
-
def dir
|
7
|
-
Dir.chdir(Dir.home) do
|
8
|
-
FileUtils.mkdir('.sciruby') unless Dir.exists?('.sciruby')
|
9
|
-
Dir.chdir '.sciruby' do
|
10
|
-
yield if block_given?
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
# Create a data dir in the .sciruby directory if it doesn't exist (data/) and chdir to it.
|
16
|
-
def data_dir
|
17
|
-
dir do
|
18
|
-
FileUtils.mkdir('data') unless Dir.exists?('data')
|
19
|
-
Dir.chdir 'data' do
|
20
|
-
yield
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
# Create a data source directory within the .sciruby dir for a given module, e.g., ./sciruby/data/guardian for Guardian.
|
26
|
-
def data_source_dir module_name, create=true
|
27
|
-
dir_name = module_name.to_s if module_name.is_a?(Symbol)
|
28
|
-
dir_name ||= module_name.split('::').tap{ |m| 2.times { m.shift } }.join('::').underscore
|
29
|
-
data_dir do
|
30
|
-
FileUtils.mkdir(dir_name) if !Dir.exists?(dir_name) && create
|
31
|
-
Dir.chdir dir_name do
|
32
|
-
yield if block_given?
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
|
38
|
-
# Add an extension to the basename for a dataset based on the format.
|
39
|
-
def filename_for_dataset id, format=nil
|
40
|
-
basename = basename_for_dataset(id)
|
41
|
-
format.nil? ? basename : [basename, format.to_s].join('.')
|
42
|
-
end
|
43
|
-
|
44
|
-
# Generate a unique and safe filename for a dataset. This may need to be improved to incorporate some kind of hash.
|
45
|
-
# Hopefully there will be no collisions.
|
46
|
-
def basename_for_dataset id
|
47
|
-
return id.gsub(/[^a-zA-Z0-9\_]/, '_')
|
48
|
-
end
|
49
|
-
|
50
|
-
# Determines whether the basename for a cached dataset exists in some format or another.
|
51
|
-
def basename_exists? id
|
52
|
-
matches = Dir.glob("#{basename_for_dataset(id)}.*")
|
53
|
-
return matches.first if matches.size >= 1
|
54
|
-
return nil
|
55
|
-
end
|
56
|
-
|
57
|
-
# Store a given dataset in the .sciruby/data directory.
|
58
|
-
def cache_dataset module_name, dataset_id, file_contents, format
|
59
|
-
for_dataset_filename(module_name, dataset_id, format) do |dataset_filename|
|
60
|
-
unless File.exists?(dataset_filename) || basename_exists?(dataset_id)
|
61
|
-
File.open(dataset_filename, 'w') do |file|
|
62
|
-
file.write file_contents
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
# In the data source directory, do something with the dataset cache file. e.g.,
|
69
|
-
# for_dataset('Guardian', '963', :cvs) do |dataset_filename|
|
70
|
-
# File.open(dataset_filename, 'w') do |f|
|
71
|
-
# f.write "Hello, world!"
|
72
|
-
# end
|
73
|
-
# end
|
74
|
-
#
|
75
|
-
# It computes the block arg (here, +dataset_filename+) for you using Config::filename_for_dataset. It also puts
|
76
|
-
# you in the correct directory.
|
77
|
-
#
|
78
|
-
# This function is used by Config::cache_dataset.
|
79
|
-
def for_dataset_filename module_name, dataset_id, format, &block
|
80
|
-
data_source_dir module_name do
|
81
|
-
yield filename_for_dataset(dataset_id, format)
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
def for_dataset_basename module_name, dataset_id, &block
|
86
|
-
data_source_dir module_name do
|
87
|
-
yield basename_for_dataset(dataset_id)
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
@@ -1,96 +0,0 @@
|
|
1
|
-
module SciRuby
|
2
|
-
module Data
|
3
|
-
|
4
|
-
# World Government Data from the Guardian.
|
5
|
-
class Guardian < PublicSearcher
|
6
|
-
QUERY_DOMAIN = %q{www.guardian.co.uk}
|
7
|
-
QUERY_PATH = %q{/world-government-data/search.json}
|
8
|
-
FOUR_OH_FOUR_MESSAGE = '404 Page not found'
|
9
|
-
ALLOWED_FORMATS = [:csv, :excel]
|
10
|
-
|
11
|
-
class DatasetInfo < ::OpenStruct
|
12
|
-
def initialize h
|
13
|
-
super h
|
14
|
-
self.download_links.each_index do |i|
|
15
|
-
self.download_links[i] = ::OpenStruct.new(self.download_links[i])
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
|
21
|
-
# Search the site or database using some set of parameters.
|
22
|
-
#
|
23
|
-
# This function is the one that you should redefine if you want to require certain parameters, or if there are
|
24
|
-
# parameter co-dependencies. Ultimately, you call `search_internal(params)`.
|
25
|
-
#
|
26
|
-
# == Arguments
|
27
|
-
# * q: keywords (default: '', if no other parameters are supplied)
|
28
|
-
# * facet_country: country code abbreviation to search
|
29
|
-
# * facet_source_title: e.g., data from Australian government would be data.nsw.org.au
|
30
|
-
# * facet_format: e.g., csv, excel, xml, shapefile, kml
|
31
|
-
def initialize args={}
|
32
|
-
#args[:facet_format] ||= :csv
|
33
|
-
#@require_format ||= args[:facet_format] # This should be removed when we can interpret other formats.
|
34
|
-
|
35
|
-
@search_result = search(args)
|
36
|
-
end
|
37
|
-
|
38
|
-
# Return dataset meta-data found in the search, hashed by source_id. So, do datasets.keys if you want a list of
|
39
|
-
# source_ids.
|
40
|
-
def datasets
|
41
|
-
@datasets ||= begin
|
42
|
-
h = {}
|
43
|
-
search_result["results"].each do |res|
|
44
|
-
h[res['source_id']] = DatasetInfo.new(res)
|
45
|
-
end
|
46
|
-
h
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
# Download a specific dataset by +source_id+ and cache it in the searcher. Returns a Statsample::Dataset.
|
51
|
-
#
|
52
|
-
# If this raises an exception, you can try this:
|
53
|
-
#
|
54
|
-
# links = raw_dataset_links_cached(source_id)
|
55
|
-
#
|
56
|
-
# And then for each of +links+, do `raw_dataset(source_id, link)` to see what the actual downloaded data was.
|
57
|
-
# This is good for debugging -- e.g., did the page move? or is there something wrong with Ruby's CSV interpreter?
|
58
|
-
# Or is it in some other format altogether?
|
59
|
-
#
|
60
|
-
# Right now, this function only handles CSV. TODO: Add more format handlers!
|
61
|
-
def dataset source_id
|
62
|
-
@dataset ||= {}
|
63
|
-
@dataset[source_id] ||= begin # Datasets are stored by source ID
|
64
|
-
pos = 0
|
65
|
-
datasets[source_id].download_links.each do |link_info|
|
66
|
-
|
67
|
-
unless ALLOWED_FORMATS.include?(link_info.format)
|
68
|
-
pos += 1
|
69
|
-
next # Format is incorrect.
|
70
|
-
end
|
71
|
-
|
72
|
-
# Format appears to be correct, prior to actually downloading. Proceed.
|
73
|
-
|
74
|
-
# Attempt to read the cached one first, and if that fails, try downloading.
|
75
|
-
raw = cached_dataset(source_id) || download_dataset(link_info.link)
|
76
|
-
|
77
|
-
begin
|
78
|
-
ds = parse_dataset link_info.format, raw, datasets[source_id].title
|
79
|
-
cache_dataset(source_id, raw, link_info.format)
|
80
|
-
rescue TypeError => e
|
81
|
-
if pos == datasets[source_id].download_links.size - 1
|
82
|
-
raise DatasetNotFoundError.new(e)
|
83
|
-
end
|
84
|
-
ensure
|
85
|
-
pos += 1
|
86
|
-
end
|
87
|
-
|
88
|
-
return ds unless ds.nil?
|
89
|
-
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
data/lib/sciruby/data/r/base.rb
DELETED
@@ -1,110 +0,0 @@
|
|
1
|
-
module SciRuby::Data
|
2
|
-
class R
|
3
|
-
# Parses datasets from R directly.
|
4
|
-
class Base
|
5
|
-
FLOAT_RE = /([.eE])/
|
6
|
-
|
7
|
-
require "simpler"
|
8
|
-
|
9
|
-
def initialize id
|
10
|
-
@rob = id # R object name
|
11
|
-
|
12
|
-
assign_properties # Read as many properties as possible from R
|
13
|
-
end
|
14
|
-
|
15
|
-
def self.class obj
|
16
|
-
#STDERR.puts "obj=#{obj}"
|
17
|
-
Base.new(obj).send :read_class
|
18
|
-
end
|
19
|
-
|
20
|
-
attr_reader :rob
|
21
|
-
alias_method :rname, :rob
|
22
|
-
|
23
|
-
protected
|
24
|
-
def assign_properties; end
|
25
|
-
|
26
|
-
def r obj=nil
|
27
|
-
SciRuby::Data::R.r(obj)
|
28
|
-
end
|
29
|
-
|
30
|
-
def float_re
|
31
|
-
SciRuby::Data::R::Base::FLOAT_RE
|
32
|
-
end
|
33
|
-
|
34
|
-
def call_function fn=nil
|
35
|
-
#STDERR.puts "Call function: #{fn.to_s}\t#{rob}"
|
36
|
-
fn.nil? ? r.eval! { rob } : r.eval! { "#{fn.to_s}(#{rob})" }
|
37
|
-
end
|
38
|
-
|
39
|
-
def call_property prop
|
40
|
-
r.eval! { "#{rob}$'#{prop.to_s}'"}
|
41
|
-
end
|
42
|
-
|
43
|
-
def read_class fn=:class
|
44
|
-
read_single_line(fn).first
|
45
|
-
end
|
46
|
-
|
47
|
-
def read_single_line fn=nil
|
48
|
-
line = call_function fn
|
49
|
-
#STDERR.puts "rsl Got back: #{line}"
|
50
|
-
CSV::parse_line(line.split(' ', 2).tap{ |s| s.shift }.first, :col_sep => ' ')
|
51
|
-
end
|
52
|
-
|
53
|
-
def read_single_token fn=nil
|
54
|
-
line = call_function fn
|
55
|
-
#STDERR.puts "rst Got back: #{line}"
|
56
|
-
line.split.tap{ |s| s.shift }.first
|
57
|
-
end
|
58
|
-
|
59
|
-
# Read multiple lines from a function call. You can also pass in a block if you want to ask for a property instead
|
60
|
-
# of a function call, e.g.,
|
61
|
-
# read_multiple_lines { call_property('height') }
|
62
|
-
def read_multiple_lines fn=nil
|
63
|
-
lines = block_given? ? yield : call_function(fn)
|
64
|
-
#STDERR.puts "rml Got back:\n#{lines}"
|
65
|
-
|
66
|
-
lines = lines.split("\n")
|
67
|
-
|
68
|
-
return nil if lines.first =~ /^NULL/
|
69
|
-
if lines.first =~ /^ *\[/
|
70
|
-
return lines.map do |line|
|
71
|
-
remaining_line = CSV::parse_line(line.split(' ', 2).tap { |s| s.shift }.first, :col_sep => ' ')
|
72
|
-
remaining_line = remaining_line.tap { |l| l.pop } if remaining_line.last.nil?
|
73
|
-
remaining_line
|
74
|
-
end.flatten
|
75
|
-
end
|
76
|
-
|
77
|
-
raise "Unrecognized R output"
|
78
|
-
end
|
79
|
-
|
80
|
-
def read_row_names fn='rownames'
|
81
|
-
attempt = read_multiple_lines(fn) # may return nil if no rownames found.
|
82
|
-
return [] if attempt.nil?
|
83
|
-
attempt
|
84
|
-
end
|
85
|
-
|
86
|
-
def read_col_names fn='colnames'
|
87
|
-
read_row_names fn
|
88
|
-
end
|
89
|
-
|
90
|
-
def read_names fn='names'
|
91
|
-
read_row_names fn
|
92
|
-
end
|
93
|
-
|
94
|
-
def read_levels fn='levels'
|
95
|
-
read_row_names fn
|
96
|
-
end
|
97
|
-
|
98
|
-
def read_columns fields
|
99
|
-
columns = {}
|
100
|
-
fields.each do |field|
|
101
|
-
raise(ArgumentError, "nil field") if field.nil?
|
102
|
-
columns_for_field = SciRuby::Data::R.r("#{rob}[,'#{field.to_s}']")
|
103
|
-
columns[field] = (columns_for_field.is_a?(Vector) && columns_for_field.has_levels?) || columns_for_field.is_a?(TimeSeries) ? columns_for_field : columns_for_field.to_a
|
104
|
-
end
|
105
|
-
columns
|
106
|
-
end
|
107
|
-
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|
@@ -1,24 +0,0 @@
|
|
1
|
-
module SciRuby::Data
|
2
|
-
class R
|
3
|
-
class DataFrame < Base
|
4
|
-
attr_reader :row_names, :columns
|
5
|
-
|
6
|
-
def col_names
|
7
|
-
columns.keys
|
8
|
-
end
|
9
|
-
|
10
|
-
def levels col_name
|
11
|
-
columns[col_name].levels
|
12
|
-
end
|
13
|
-
|
14
|
-
protected
|
15
|
-
|
16
|
-
def assign_properties
|
17
|
-
@row_names = read_row_names
|
18
|
-
col_names = read_col_names
|
19
|
-
@columns = read_columns(col_names)
|
20
|
-
end
|
21
|
-
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
data/lib/sciruby/data/r/list.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
module SciRuby::Data
|
2
|
-
class R
|
3
|
-
# An intermediate object that doesn't really get used -- immediately gets converted to a Ruby Hash of other R objects.
|
4
|
-
class List < Base
|
5
|
-
def to_h
|
6
|
-
@data
|
7
|
-
end
|
8
|
-
protected
|
9
|
-
def assign_properties
|
10
|
-
@names = read_names
|
11
|
-
@names = nil if @names.nil? || (@names.is_a?(Array) && @names.empty?)
|
12
|
-
|
13
|
-
@data = {}
|
14
|
-
@names.each do |list_item|
|
15
|
-
@data[list_item] = r("#{rob}[['#{list_item}']]")
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
@@ -1,24 +0,0 @@
|
|
1
|
-
module SciRuby::Data
|
2
|
-
class R
|
3
|
-
# class 'mts' in R
|
4
|
-
class MultiTimeSeries < TimeSeriesBase
|
5
|
-
attr_reader :row_names, :columns
|
6
|
-
|
7
|
-
def col_names
|
8
|
-
columns.keys
|
9
|
-
end
|
10
|
-
|
11
|
-
def levels col_name
|
12
|
-
columns[col_name].levels
|
13
|
-
end
|
14
|
-
|
15
|
-
protected
|
16
|
-
def assign_properties
|
17
|
-
@row_names = read_row_names
|
18
|
-
col_names = read_col_names
|
19
|
-
@columns = read_columns(col_names)
|
20
|
-
super
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
@@ -1,19 +0,0 @@
|
|
1
|
-
module SciRuby::Data
|
2
|
-
class R
|
3
|
-
# class 'ts' in R
|
4
|
-
class TimeSeries < TimeSeriesBase
|
5
|
-
attr_reader :data, :levels
|
6
|
-
|
7
|
-
def initialize id
|
8
|
-
super id
|
9
|
-
@data = @data.to_a # Convert from R Vector to array.
|
10
|
-
end
|
11
|
-
|
12
|
-
protected
|
13
|
-
def assign_properties
|
14
|
-
super
|
15
|
-
@data = r("c(#{rob})") # Repeat for the data, which is probably of type Vector.
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
@@ -1,40 +0,0 @@
|
|
1
|
-
module SciRuby::Data
|
2
|
-
class R
|
3
|
-
# classes 'mts' and 'ts' in R; not instantiated directly. Use RTimeSeries or RMultiTimeSeries.
|
4
|
-
class TimeSeriesBase < Base
|
5
|
-
attr_reader :start, :end, :frequency, :delta_t
|
6
|
-
|
7
|
-
protected
|
8
|
-
|
9
|
-
def assign_properties
|
10
|
-
@start = read_time(:start)
|
11
|
-
@end = read_time(:end)
|
12
|
-
@frequency = read_frequency
|
13
|
-
|
14
|
-
# in R, the user supplies either frequency or delta t, but not both. frequency is always an integer (if I remember correctly)
|
15
|
-
@delta_t = @frequency.is_a?(Fixnum) && @frequency > 1 ? 1.0 / @frequency : read_delta_t
|
16
|
-
end
|
17
|
-
|
18
|
-
def read_frequency fn=:frequency
|
19
|
-
read_single_token(fn).to_i
|
20
|
-
end
|
21
|
-
|
22
|
-
def read_delta_t fn=:deltat
|
23
|
-
deltat = read_single_token(fn)
|
24
|
-
deltat =~ float_re ? deltat.to_f : deltat.to_i
|
25
|
-
end
|
26
|
-
|
27
|
-
# Returns either two integers (time and sample number) or a number (time) and nil
|
28
|
-
def read_time fn=nil
|
29
|
-
time = read_single_line(fn)
|
30
|
-
if time.size == 2 # vector of two integers
|
31
|
-
time.map { |t| t.to_i }
|
32
|
-
else # single number
|
33
|
-
single = time.first
|
34
|
-
single =~ float_re ? single.to_f : single.to_i
|
35
|
-
[single, nil]
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|