sciruby 0.1.0 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (121) hide show
  1. data/.autotest +23 -0
  2. data/.gemtest +0 -0
  3. data/History.txt +6 -0
  4. data/Manifest.txt +119 -0
  5. data/Rakefile +178 -0
  6. data/bin/sciruby-plotter +12 -0
  7. data/data/r/man/AirPassengers.Rd +51 -0
  8. data/data/r/man/BJsales.Rd +34 -0
  9. data/data/r/man/BOD.Rd +53 -0
  10. data/data/r/man/ChickWeight.Rd +68 -0
  11. data/data/r/man/DNase.Rd +63 -0
  12. data/data/r/man/EuStockMarkets.Rd +28 -0
  13. data/data/r/man/Formaldehyde.Rd +44 -0
  14. data/data/r/man/HairEyeColor.Rd +77 -0
  15. data/data/r/man/Harman23.cor.Rd +25 -0
  16. data/data/r/man/Harman74.cor.Rd +28 -0
  17. data/data/r/man/Indometh.Rd +57 -0
  18. data/data/r/man/InsectSprays.Rd +45 -0
  19. data/data/r/man/JohnsonJohnson.Rd +37 -0
  20. data/data/r/man/LakeHuron.Rd +27 -0
  21. data/data/r/man/LifeCycleSavings.Rd +54 -0
  22. data/data/r/man/Loblolly.Rd +56 -0
  23. data/data/r/man/Nile.Rd +78 -0
  24. data/data/r/man/Orange.Rd +57 -0
  25. data/data/r/man/OrchardSprays.Rd +62 -0
  26. data/data/r/man/PlantGrowth.Rd +39 -0
  27. data/data/r/man/Puromycin.Rd +84 -0
  28. data/data/r/man/Theoph.Rd +84 -0
  29. data/data/r/man/Titanic.Rd +73 -0
  30. data/data/r/man/ToothGrowth.Rd +40 -0
  31. data/data/r/man/UCBAdmissions.Rd +68 -0
  32. data/data/r/man/UKDriverDeaths.Rd +72 -0
  33. data/data/r/man/UKLungDeaths.Rd +40 -0
  34. data/data/r/man/UKgas.Rd +25 -0
  35. data/data/r/man/USAccDeaths.Rd +23 -0
  36. data/data/r/man/USArrests.Rd +45 -0
  37. data/data/r/man/USJudgeRatings.Rd +38 -0
  38. data/data/r/man/USPersonalExpenditure.Rd +33 -0
  39. data/data/r/man/VADeaths.Rd +51 -0
  40. data/data/r/man/WWWusage.Rd +41 -0
  41. data/data/r/man/WorldPhones.Rd +40 -0
  42. data/data/r/man/ability.cov.Rd +50 -0
  43. data/data/r/man/airmiles.Rd +29 -0
  44. data/data/r/man/airquality.Rd +56 -0
  45. data/data/r/man/anscombe.Rd +62 -0
  46. data/data/r/man/attenu.Rd +66 -0
  47. data/data/r/man/attitude.Rd +48 -0
  48. data/data/r/man/austres.Rd +22 -0
  49. data/data/r/man/beavers.Rd +73 -0
  50. data/data/r/man/cars.Rd +59 -0
  51. data/data/r/man/chickwts.Rd +47 -0
  52. data/data/r/man/co2.Rd +43 -0
  53. data/data/r/man/crimtab.Rd +129 -0
  54. data/data/r/man/datasets-package.Rd +24 -0
  55. data/data/r/man/discoveries.Rd +30 -0
  56. data/data/r/man/esoph.Rd +66 -0
  57. data/data/r/man/euro.Rd +56 -0
  58. data/data/r/man/eurodist.Rd +25 -0
  59. data/data/r/man/faithful.Rd +63 -0
  60. data/data/r/man/freeny.Rd +56 -0
  61. data/data/r/man/infert.Rd +56 -0
  62. data/data/r/man/iris.Rd +62 -0
  63. data/data/r/man/islands.Rd +29 -0
  64. data/data/r/man/lh.Rd +22 -0
  65. data/data/r/man/longley.Rd +56 -0
  66. data/data/r/man/lynx.Rd +33 -0
  67. data/data/r/man/morley.Rd +50 -0
  68. data/data/r/man/mtcars.Rd +44 -0
  69. data/data/r/man/nhtemp.Rd +30 -0
  70. data/data/r/man/nottem.Rd +30 -0
  71. data/data/r/man/occupationalStatus.Rd +44 -0
  72. data/data/r/man/precip.Rd +31 -0
  73. data/data/r/man/presidents.Rd +36 -0
  74. data/data/r/man/pressure.Rd +41 -0
  75. data/data/r/man/quakes.Rd +40 -0
  76. data/data/r/man/randu.Rd +46 -0
  77. data/data/r/man/rivers.Rd +21 -0
  78. data/data/r/man/rock.Rd +34 -0
  79. data/data/r/man/sleep.Rd +51 -0
  80. data/data/r/man/stackloss.Rd +77 -0
  81. data/data/r/man/state.Rd +80 -0
  82. data/data/r/man/sunspot.month.Rd +49 -0
  83. data/data/r/man/sunspot.year.Rd +26 -0
  84. data/data/r/man/sunspots.Rd +33 -0
  85. data/data/r/man/swiss.Rd +79 -0
  86. data/data/r/man/treering.Rd +38 -0
  87. data/data/r/man/trees.Rd +48 -0
  88. data/data/r/man/uspop.Rd +27 -0
  89. data/data/r/man/volcano.Rd +31 -0
  90. data/data/r/man/warpbreaks.Rd +56 -0
  91. data/data/r/man/women.Rd +40 -0
  92. data/data/r/man/zCO2.Rd +81 -0
  93. data/lib/ext/csv.rb +22 -0
  94. data/lib/ext/shoes.rb +131 -0
  95. data/lib/ext/string.rb +39 -0
  96. data/lib/sciruby.rb +50 -4
  97. data/lib/sciruby/analysis.rb +98 -0
  98. data/lib/sciruby/analysis/suite.rb +87 -0
  99. data/lib/sciruby/analysis/suite_report_builder.rb +44 -0
  100. data/lib/sciruby/config.rb +93 -0
  101. data/lib/sciruby/data.rb +168 -0
  102. data/lib/sciruby/data/guardian.rb +96 -0
  103. data/lib/sciruby/data/r.rb +155 -0
  104. data/lib/sciruby/data/r/base.rb +110 -0
  105. data/lib/sciruby/data/r/data_frame.rb +24 -0
  106. data/lib/sciruby/data/r/grouped_data.rb +7 -0
  107. data/lib/sciruby/data/r/list.rb +20 -0
  108. data/lib/sciruby/data/r/multi_time_series.rb +24 -0
  109. data/lib/sciruby/data/r/r_matrix.rb +7 -0
  110. data/lib/sciruby/data/r/time_series.rb +19 -0
  111. data/lib/sciruby/data/r/time_series_base.rb +40 -0
  112. data/lib/sciruby/data/r/vector.rb +125 -0
  113. data/lib/sciruby/editor.rb +82 -0
  114. data/lib/sciruby/plotter.rb +128 -0
  115. data/lib/sciruby/recommend.rb +4 -0
  116. data/lib/sciruby/validation.rb +368 -0
  117. data/readme.md +75 -0
  118. data/static/sciruby-icon.png +0 -0
  119. data/test/helpers_tests.rb +58 -0
  120. data/test/test_recommend.rb +16 -0
  121. metadata +396 -20
data/lib/ext/string.rb ADDED
@@ -0,0 +1,39 @@
1
+ class String
2
+ unless method_defined?(:constantize)
3
+ # Based on constantize from ActiveSupport::Inflector
4
+ def constantize
5
+ names = self.split('::')
6
+ names.shift if names.empty? || names.first.empty?
7
+
8
+ constant = Object
9
+ names.each do |name|
10
+ constant = constant.const_defined?(name, false) ? constant.const_get(name) : constant.const_missing(name)
11
+ end
12
+ constant
13
+ end
14
+ end
15
+
16
+ unless method_defined?(:camelize)
17
+ # Adapted from camelize from ActiveSupport::Inflector
18
+ def camelize first_letter_in_uppercase = true
19
+ if first_letter_in_uppercase
20
+ self.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
21
+ else
22
+ self.to_s[0].chr.downcase + self[1..-1].camelize
23
+ end
24
+ end
25
+ end
26
+
27
+ unless method_defined?(:underscore)
28
+ # Adapted from underscore from ActiveSupport::Inflector
29
+ def underscore
30
+ word = self.dup
31
+ word.gsub!(/::/, '/')
32
+ word.gsub!(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
33
+ word.gsub!(/([a-z\d])([A-Z])/,'\1_\2')
34
+ word.tr!("-", "_")
35
+ word.downcase!
36
+ word
37
+ end
38
+ end
39
+ end
data/lib/sciruby.rb CHANGED
@@ -1,4 +1,4 @@
1
- # = sci_ruby.rb -
1
+ # = sciruby.rb -
2
2
  # SciRuby - Ruby scientific visualization and computation.
3
3
  #
4
4
  # Copyright (C) 2011 SciRuby Development Team
@@ -9,7 +9,7 @@
9
9
  #
10
10
  # This program is free software; you can redistribute it and/or
11
11
  # modify it under the terms of the GNU General Public License
12
- # as published by the Free Software Foundation; either version 2
12
+ # as published by the Free Software Foundation; either version 3
13
13
  # of the License, or (at your option) any later version.
14
14
  #
15
15
  # This program is distributed in the hope that it will be useful,
@@ -24,8 +24,54 @@
24
24
  # Specific notices will be placed where they are appropriate.
25
25
  #
26
26
 
27
+ require "rubygems"
28
+ require "bundler/setup"
29
+
27
30
  module SciRuby
28
- VERSION = '0.1.0'
31
+ VERSION = '0.1.3'
32
+ DIR = Pathname.new(__FILE__).realpath.dirname.to_s
33
+
34
+ require File.join(::SciRuby::DIR, 'ext', 'string.rb')
35
+ require File.join(::SciRuby::DIR, 'ext', 'csv.rb')
36
+
37
+ class << self
38
+ def plot script
39
+ SciRuby::Plotter.new script
40
+ end
41
+
42
+ def integrate *args, &block
43
+ require "integration"
44
+ ::Integration.integrate(*args, &block)
45
+ end
29
46
 
30
- autoload(:Recommend, 'sciruby/recommend')
47
+ # Produce a list of datasets that can be loaded using the +dataset+ method
48
+ def dataset_search database, args = {}
49
+ "SciRuby::Data::#{database.to_s.camelize}".constantize.new(args).datasets.keys
50
+ end
51
+
52
+ # Load a dataset from a specific database. For a list of datasets, use `dataset_search(:guardian)`, for example.
53
+ def dataset database, source_id
54
+ begin
55
+ "SciRuby::Data::#{database.to_s.camelize}".constantize.new.dataset(source_id)
56
+ rescue DatabaseUnavailableError => e
57
+ warn "Database appears to be unavailable. Attempting to use cached version."
58
+ SciRuby::Data::Cacher.new.dataset(source_id, database)
59
+ end
60
+ end
61
+
62
+ # Shorthand for SciRuby::Analysis.store(*args, &block)
63
+ def analyze *args, &block
64
+ SciRuby::Analysis.store(*args, &block)
65
+ end
66
+ end
67
+
68
+ autoload(:Analysis, File.join(DIR, 'sciruby', 'analysis'))
69
+ autoload(:Config, File.join(DIR, 'sciruby', 'config'))
70
+ autoload(:Editor, File.join(DIR, 'sciruby', 'editor'))
71
+ autoload(:Plotter, File.join(DIR, 'sciruby', 'plotter'))
72
+ autoload(:Recommend, File.join(DIR, 'sciruby', 'recommend'))
73
+ autoload(:Validation, File.join(DIR, 'sciruby', 'validation'))
74
+ autoload(:Data, File.join(DIR, 'sciruby', 'data'))
31
75
  end
76
+
77
+ autoload(:Shoes, File.join(SciRuby::DIR, 'ext', 'shoes'))
@@ -0,0 +1,98 @@
1
+ require 'sciruby/analysis/suite'
2
+ require 'sciruby/analysis/suite_report_builder'
3
+
4
+ module SciRuby
5
+ # DSL to run a statistical analysis without hassle.
6
+ # * Shortcut methods to avoid having to use complete namespaces, many based on R.
7
+ # * Attach/detach vectors to workspace, as with R
8
+ # == Example
9
+ # an1 = Statsample::Analysis.store(:first) do
10
+ # # Load excel file with x,y,z vectors
11
+ # ds = excel('data.xls')
12
+ # # See variables on ds dataset
13
+ # names(ds)
14
+ # # Attach the vectors to workspace, like R
15
+ # attach(ds)
16
+ # # vector 'x' is attached to workspace like a method,
17
+ # # so you can use like any variable
18
+ # mean,sd = x.mean, x.sd
19
+ # # Shameless R robbery
20
+ # a = c( 1:10)
21
+ # b = c(21:30)
22
+ # summary(cor(ds)) # Call summary method on correlation matrix
23
+ # end
24
+ # # You can run the analysis by its name
25
+ # Statsample::Analysis.run(:first)
26
+ # # or using the returned variables
27
+ # an1.run
28
+ # # You can also generate a report using ReportBuilder.
29
+ # # .summary() method call 'report_building' on the object,
30
+ # # instead of calling text summary
31
+ # an1.generate("report.html")
32
+ module Analysis
33
+ @@stored_analyses={}
34
+ @@last_analysis=nil
35
+ def self.clear_analysis
36
+ @@stored_analyses.clear
37
+ end
38
+ def self.stored_analyses
39
+ @@stored_analyses
40
+ end
41
+ def self.last
42
+ @@stored_analyses[@@last_analysis]
43
+ end
44
+ def self.store(name, opts=Hash.new,&block)
45
+ raise "You should provide a block" if !block
46
+ @@last_analysis=name
47
+ opts={:name=>name}.merge(opts)
48
+ @@stored_analyses[name]=Suite.new(opts,&block)
49
+ end
50
+ # Run analysis +*args+
51
+ # Without arguments, run all stored analyses
52
+ # Only 'echo' will be printed to screen.
53
+ def self.run(*args)
54
+ args=stored_analyses.keys if args.size==0
55
+ raise "Analysis #{args} doesn't exists" if (args - stored_analyses.keys).size>0
56
+ args.each do |name|
57
+ stored_analyses[name].run
58
+ end
59
+ end
60
+
61
+ # Add analysis +*args+ to a ReportBuilder object.
62
+ # Without arguments, add all stored analyses.
63
+ # Each analysis is wrapped inside a ReportBuilder::Section object.
64
+ # This is the method used by +save+ and +to_text+.
65
+ def self.add_to_reportbuilder(rb, *args)
66
+ args=stored_analyses.keys if args.size==0
67
+ raise "Analysis #{name} doesn't exists" if (args - stored_analyses.keys).size>0
68
+ args.each do |name|
69
+ section=ReportBuilder::Section.new(:name=>stored_analyses[name].name)
70
+ rb_an=stored_analyses[name].add_to_reportbuilder(section)
71
+ rb.add(section)
72
+ rb_an.run
73
+ end
74
+ end
75
+
76
+ # Save the analysis to a file.
77
+ # Without arguments, adds all stored analyses.
78
+ def self.save(filename, *args)
79
+ rb=ReportBuilder.new(:name=>filename)
80
+ add_to_reportbuilder(rb, *args)
81
+ rb.save(filename)
82
+ end
83
+
84
+ # Run analysis and return as string.
85
+ # Only 'echo' will be printed to screen.
86
+ # Without arguments, add all stored analyses.
87
+ def self.to_text(*args)
88
+ rb=ReportBuilder.new(:name=>"Analysis #{Time.now}")
89
+ add_to_reportbuilder(rb, *args)
90
+ rb.to_text
91
+ end
92
+
93
+ # Run analysis and print to screen all echo and summary callings
94
+ def self.run_batch(*args)
95
+ puts to_text(*args)
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,87 @@
1
+ require "statsample"
2
+
3
+ module SciRuby
4
+ module Analysis
5
+ class Suite
6
+ include ::Statsample::Shorthand
7
+ attr_accessor :output
8
+ attr_accessor :name
9
+ attr_reader :block
10
+ def initialize(opts=Hash.new(), &block)
11
+ if !opts.is_a? Hash
12
+ opts={:name=>opts}
13
+ end
14
+
15
+ @block=block
16
+ @name=opts[:name] || "Analysis #{Time.now}"
17
+ @attached=[]
18
+ @output=opts[:output] || ::STDOUT
19
+ end
20
+ # Run the analysis, putting output on
21
+ def run
22
+ @block.arity<1 ? instance_eval(&@block) : @block.call(self)
23
+ end
24
+ # Provides a description of the procedure. Only appears as a commentary on
25
+ # SuiteReportBuilder outputs
26
+ def desc(d)
27
+ @output.puts("Description:")
28
+ @output.puts(" #{d}")
29
+ end
30
+ def echo(*args)
31
+ @output.puts(*args)
32
+ end
33
+ def summary(obj)
34
+ obj.summary
35
+ end
36
+ def add_to_reportbuilder(rb)
37
+ SuiteReportBuilder.new({:name=>name, :rb=>rb}, &block)
38
+ end
39
+
40
+ def generate(filename)
41
+ ar=SuiteReportBuilder.new({:name=>name}, &block)
42
+ ar.generate(filename)
43
+ end
44
+ def to_text
45
+ ar=SuiteReportBuilder.new({:name=>name}, &block)
46
+ ar.to_text
47
+ end
48
+
49
+ def attach(ds)
50
+ @attached.push(ds)
51
+ end
52
+ def detach(ds=nil)
53
+ if ds.nil?
54
+ @attached.pop
55
+ else
56
+ @attached.delete(ds)
57
+ end
58
+ end
59
+ alias :old_boxplot :boxplot
60
+ alias :old_histogram :histogram
61
+ alias :old_scatterplot :scatterplot
62
+
63
+ def show_svg(svg)
64
+ require 'tmpdir'
65
+ fn=Dir.tmpdir+"/image_#{Time.now.to_f}.svg"
66
+ File.open(fn,"w") {|fp| fp.write svg}
67
+ `xdg-open '#{fn}'`
68
+ end
69
+ def boxplot(*args)
70
+ show_svg(old_boxplot(*args).to_svg)
71
+ end
72
+ def histogram(*args)
73
+ show_svg(old_histogram(*args).to_svg)
74
+ end
75
+ def scatterplot(*args)
76
+ show_svg(old_scatterplot(*args).to_svg)
77
+ end
78
+
79
+ def method_missing(name, *args,&block)
80
+ @attached.reverse.each do |ds|
81
+ return ds[name.to_s] if ds.fields.include? (name.to_s)
82
+ end
83
+ raise "Method #{name} doesn't exists"
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,44 @@
1
+ module SciRuby
2
+ module Analysis
3
+ class SuiteReportBuilder < Suite
4
+ attr_accessor :rb
5
+ def initialize(opts=Hash.new,&block)
6
+ if !opts.is_a? Hash
7
+ opts={:name=>opts}
8
+ end
9
+ super(opts,&block)
10
+ @rb=opts[:rb] || ReportBuilder.new(:name=>name)
11
+ end
12
+ def generate(filename)
13
+ run if @block
14
+ @rb.save(filename)
15
+ end
16
+ def to_text
17
+ run if @block
18
+ @rb.to_text
19
+ end
20
+ def summary(o)
21
+ @rb.add(o)
22
+ end
23
+ def desc(d)
24
+ @rb.add(d)
25
+ end
26
+ def echo(*args)
27
+ args.each do |a|
28
+ @rb.add(a)
29
+ end
30
+ end
31
+
32
+ def boxplot(*args)
33
+ @rb.add(old_boxplot(*args))
34
+ end
35
+ def histogram(*args)
36
+ @rb.add(old_histogram(*args))
37
+ end
38
+ def boxplot(*args)
39
+ @rb.add(old_boxplot(*args))
40
+ end
41
+
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,93 @@
1
+ module SciRuby
2
+ module Config
3
+ class << self
4
+
5
+ # Create a .sciruby directory if it doesn't exist (.sciruby) and chdir to it.
6
+ def dir
7
+ Dir.chdir(Dir.home) do
8
+ FileUtils.mkdir('.sciruby') unless Dir.exists?('.sciruby')
9
+ Dir.chdir '.sciruby' do
10
+ yield if block_given?
11
+ end
12
+ end
13
+ end
14
+
15
+ # Create a data dir in the .sciruby directory if it doesn't exist (data/) and chdir to it.
16
+ def data_dir
17
+ dir do
18
+ FileUtils.mkdir('data') unless Dir.exists?('data')
19
+ Dir.chdir 'data' do
20
+ yield
21
+ end
22
+ end
23
+ end
24
+
25
+ # Create a data source directory within the .sciruby dir for a given module, e.g., ./sciruby/data/guardian for Guardian.
26
+ def data_source_dir module_name, create=true
27
+ dir_name = module_name.to_s if module_name.is_a?(Symbol)
28
+ dir_name ||= module_name.split('::').tap{ |m| 2.times { m.shift } }.join('::').underscore
29
+ data_dir do
30
+ FileUtils.mkdir(dir_name) if !Dir.exists?(dir_name) && create
31
+ Dir.chdir dir_name do
32
+ yield if block_given?
33
+ end
34
+ end
35
+ end
36
+
37
+
38
+ # Add an extension to the basename for a dataset based on the format.
39
+ def filename_for_dataset id, format=nil
40
+ basename = basename_for_dataset(id)
41
+ format.nil? ? basename : [basename, format.to_s].join('.')
42
+ end
43
+
44
+ # Generate a unique and safe filename for a dataset. This may need to be improved to incorporate some kind of hash.
45
+ # Hopefully there will be no collisions.
46
+ def basename_for_dataset id
47
+ return id.gsub(/[^a-zA-Z0-9\_]/, '_')
48
+ end
49
+
50
+ # Determines whether the basename for a cached dataset exists in some format or another.
51
+ def basename_exists? id
52
+ matches = Dir.glob("#{basename_for_dataset(id)}.*")
53
+ return matches.first if matches.size >= 1
54
+ return nil
55
+ end
56
+
57
+ # Store a given dataset in the .sciruby/data directory.
58
+ def cache_dataset module_name, dataset_id, file_contents, format
59
+ for_dataset_filename(module_name, dataset_id, format) do |dataset_filename|
60
+ unless File.exists?(dataset_filename) || basename_exists?(dataset_id)
61
+ File.open(dataset_filename, 'w') do |file|
62
+ file.write file_contents
63
+ end
64
+ end
65
+ end
66
+ end
67
+
68
+ # In the data source directory, do something with the dataset cache file. e.g.,
69
+ # for_dataset('Guardian', '963', :cvs) do |dataset_filename|
70
+ # File.open(dataset_filename, 'w') do |f|
71
+ # f.write "Hello, world!"
72
+ # end
73
+ # end
74
+ #
75
+ # It computes the block arg (here, +dataset_filename+) for you using Config::filename_for_dataset. It also puts
76
+ # you in the correct directory.
77
+ #
78
+ # This function is used by Config::cache_dataset.
79
+ def for_dataset_filename module_name, dataset_id, format, &block
80
+ data_source_dir module_name do
81
+ yield filename_for_dataset(dataset_id, format)
82
+ end
83
+ end
84
+
85
+ def for_dataset_basename module_name, dataset_id, &block
86
+ data_source_dir module_name do
87
+ yield basename_for_dataset(dataset_id)
88
+ end
89
+ end
90
+
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,168 @@
1
+ require "json"
2
+ require "net/http"
3
+ require "uri"
4
+ require "cgi"
5
+ require "ostruct"
6
+
7
+
8
+ module SciRuby
9
+ class DatabaseUnavailableError < IOError
10
+ def initialize domain, path, http_get_result=nil
11
+ @domain = domain
12
+ @path = path
13
+ @http_get_result = http_get_result
14
+ end
15
+ attr_reader :domain, :path, :http_get_result
16
+
17
+ def to_s
18
+ "Database at domain '#{@domain}', path '#{@path}' appears to be unavailable."
19
+ end
20
+ end
21
+
22
+ class DatasetNotFoundError < TypeError
23
+ def initialize e
24
+ @exp=e
25
+ end
26
+
27
+ def to_s
28
+ "Dataset does not exist. It may have moved, is not available in a format SciRuby can interpret." + @exp.message + "\n" + @exp.backtrace.join("\n")
29
+ end
30
+ end
31
+
32
+
33
+ module Data
34
+ DIR = File.join(SciRuby::DIR, 'sciruby', 'data')
35
+
36
+ def self.in_dir &block
37
+ Dir.chdir(File.join(SciRuby::DIR, '..', 'data')) do
38
+ yield
39
+ end
40
+ end
41
+
42
+ # Really just a placeholder.
43
+ class Base #:nodoc:
44
+ end
45
+
46
+ # Basic dataset type -- handles caching of datasets, that's about it.
47
+ class Cacher < Base
48
+
49
+ # Attempt to load a dataset. This is overridden for publicly-searchable datasets.
50
+ # Basically it works as a fallback if a publicly-searchable database is unavailable for some reason, but we
51
+ # may already have the data in the cache.
52
+ def dataset source_id, module_name=nil
53
+ module_name ||= self.class.to_s
54
+ raw = cached_dataset(source_id, module_name)
55
+ if raw.nil?
56
+ raise(ArgumentError, "Dataset is not cached.")
57
+ else
58
+ match = SciRuby::Config.data_source_dir(module_name, false) { SciRuby::Config.basename_exists?(source_id) }
59
+ format = match.split('.').last.to_sym
60
+ title = SciRuby::Config.basename_for_dataset(source_id)
61
+ parse_dataset(format, raw, title)
62
+ end
63
+ end
64
+
65
+ protected
66
+
67
+ # Attempt to get the dataset from the cache. This function is a little bit fragile for the following reason:
68
+ # The +dataset+ function [eventually] allows for different +download_links+ of a dataset, which may be in different
69
+ # formats. +cached_dataset+, however, guesses the format based on the format indicated for the first download link.
70
+ #
71
+ # TODO: Consider gzipping cached datasets.
72
+ def cached_dataset source_id, module_name=nil
73
+ module_name ||= self.class.to_s
74
+ SciRuby::Config.for_dataset_basename(module_name, source_id) do |basename|
75
+ filename = SciRuby::Config.basename_exists?(source_id)
76
+ return nil unless filename
77
+ File.read(filename)
78
+ end
79
+ end
80
+
81
+ # Store a dataset locally. Use cached_dataset to retrieve.
82
+ def cache_dataset source_id, raw_data, format
83
+ SciRuby::Config.cache_dataset self.class.to_s, source_id, raw_data, format
84
+ end
85
+
86
+ # Parse and cache a dataset, using the appropriate interpreter.
87
+ def parse_dataset format, raw, name
88
+ begin
89
+ case format
90
+ when :csv
91
+ CSV.parse(raw, :headers => true, :converters => :all).to_dataset.tap { |da| da.name = name }
92
+ when :excel
93
+ Statsample::Excel.parse(raw, :name => name)
94
+ end
95
+ rescue NameError => e
96
+ STDERR.puts "Unable to load statsample"
97
+ raise e
98
+ rescue => e
99
+ STDERR.puts e.inspect
100
+ raise TypeError, "Format was not as expected; dataset may have moved"
101
+ end
102
+ end
103
+ end
104
+
105
+
106
+ # Base class for searching datasets. R dataset interpreter and PublicSearcherBase (and thus Guardian) are all derived
107
+ # from this type.
108
+ class Searcher < Cacher
109
+ def initialize args={}
110
+ @search_result = search(args)
111
+ end
112
+ end
113
+
114
+
115
+ # Handles searching public datasets. Doesn't actually do it itself, but you can derive searchers from this -- e.g.,
116
+ # Guardian.
117
+ class PublicSearcher < Searcher
118
+ FOUR_OH_FOUR_MESSAGE = '404'
119
+ attr_reader :search_result
120
+
121
+ # Search the site or database using some set of parameters.
122
+ #
123
+ # This function is the one that you should redefine if you want to require certain parameters, or if there are
124
+ # parameter co-dependencies. Ultimately, you call `search_internal(params)`.
125
+ #
126
+ # == Example Arguments
127
+ # * q: keywords
128
+ # * facet_country: country code abbreviation to search
129
+ # * facet_source_title: e.g., data from Australian government would be data.nsw.org.au
130
+ def search args={}
131
+ JSON.parse(search_internal(args))
132
+ end
133
+
134
+ # Download a dataset from a given link.
135
+ def download_dataset link
136
+ url = URI.parse link
137
+ http_get(url.host, url.path)
138
+ end
139
+
140
+ protected
141
+ # Like http_get, but gets the domain and path from the child searcher class.
142
+ def search_internal params={} #:nodoc:
143
+ domain = self.class.const_get(:QUERY_DOMAIN, true)
144
+ path = self.class.const_get(:QUERY_PATH, true)
145
+
146
+ result = http_get(domain, path, params)
147
+
148
+ if result.include?(self.class.const_get(:FOUR_OH_FOUR_MESSAGE, true))
149
+ raise(DatabaseUnavailableError.new(domain, path, result))
150
+ end
151
+
152
+ result
153
+ end
154
+
155
+ # Execute an HTTP get request with or without parameters.
156
+ #
157
+ # Adapted from: http://stackoverflow.com/questions/1252210/parametrized-get-request-in-ruby/1252305#1252305
158
+ def http_get domain, path, params = {} #:nodoc:
159
+ path_with_params = "#{path}?".concat(params.collect { |k,v| "#{k.to_s}=#{CGI::escape(v.to_s)}"}.join('&'))
160
+ return Net::HTTP.get(domain, path_with_params) unless params.empty?
161
+ Net::HTTP.get(domain, path)
162
+ end
163
+ end
164
+
165
+ autoload(:R, File.join(DIR, 'r'))
166
+ autoload(:Guardian, File.join(DIR, 'guardian'))
167
+ end
168
+ end