datafarming 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a228e8b48eaa2cd95669a37b0954e660e806df0e49bb9050a91ee46273f0cbbb
4
- data.tar.gz: 7303e31f70cad28645b92d2196a69770f22d02b7181c0e34ce502dd8c9e75115
3
+ metadata.gz: 9356e4448e1a6aa818a5361cc04a9149d8e3d78c74ca20aaf2bb1c353e0cbd87
4
+ data.tar.gz: 0e18063cfe4c92c66ed822debc6a20ef7c9a5b063e3c5769f0647d9109f98034
5
5
  SHA512:
6
- metadata.gz: 4d214779a72e0c9afdbc97abd54f6f8e027a069f5216189f3d49a1fe108f3e02f880095afd80e3a67c54636309a98b0dc13911fc4da5c2c66addcc90883ae762
7
- data.tar.gz: 5565f12ac35ed08841eecb2e775b2ec2021ac9d95c497b6f63a5505586305148672c06ed7a8633b363d6fbeda9d4e559493ebb7ac36969c0dcc9781bb4ebff7a
6
+ metadata.gz: 82b4d9c0ae4d331b5612bbaa930aabe57fd67b4fdb8b1c3e19f7d2df3904289c6d7adb209fcccef8186ec979d3a876dcca8b093229a69570d5b7f291abb26c61
7
+ data.tar.gz: b9972a61a075af31a8b59957fac9f308ddea8647ff2035d442c8285c609c01c3ef2a83c65857c11a95f568cbedf3fdee311cd6f8cc1dc7e712d94c103ba36931
data/README.md CHANGED
@@ -50,4 +50,8 @@ generates star points to augment a fractional factorial
50
50
  - `cross.rb` —
51
51
  creates a combinatorial design by crossing all combinations of any # of individual smaller designs
52
52
  - `mser.rb` —
53
- uses MSER truncation to remove initial transient effects for time-series output, reports truncated average and number of observations for each run to facilitate construction of a properly weighted confidence interval.
53
+ uses MSER truncation to remove initial transient effects for time-series output, reports truncated average and number of observations for each run to facilitate construction of a properly weighted confidence interval.
54
+ - `mser_nolbm.rb` —
55
+ uses MSER truncation to remove initial transient effects for time-series output, then calculates a 95% confidence interval on the remaining data using non-overlapping batch means.
56
+ - `mser_olbm.rb` —
57
+ uses MSER truncation to remove initial transient effects for time-series output, then calculates a 95% confidence interval on the remaining data using overlapping batch means.
@@ -1,10 +1,10 @@
1
1
  # -*- ruby -*-
2
- _VERSION = "1.2.0"
2
+ _VERSION = "1.3.0"
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "datafarming"
6
6
  s.version = _VERSION
7
- s.date = "2018-07-27"
7
+ s.date = "2018-08-17"
8
8
  s.summary = "Useful scripts for data farming."
9
9
  s.homepage = "https://gitlab.nps.edu/pjsanche/datafarmingrubyscripts.git"
10
10
  s.email = "pjs@alum.mit.edu"
@@ -36,5 +36,5 @@ end.parse!
36
36
 
37
37
  ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
38
38
 
39
- $-i = '.orig'
39
+ $-i = '.orig' unless ARGV.empty? # specify backup suffix unless STDIN
40
40
  ARGF.each { |line| puts line.strip.gsub(/\s+/, ',') }
@@ -34,5 +34,5 @@ end.parse!
34
34
 
35
35
  ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
36
36
 
37
- $-i = '.orig'
37
+ $-i = '.orig' unless ARGV.empty? # specify backup suffix unless STDIN
38
38
  ARGF.each { |line| puts line.split(/\r\n|\r/) }
@@ -35,5 +35,5 @@ end.parse!
35
35
 
36
36
  ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
37
37
 
38
- $-i = '.orig'
38
+ $-i = '.orig' unless ARGV.empty? # specify backup suffix unless STDIN
39
39
  ARGF.each { |line| puts line.strip.tr(',', ' ') }
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ # Efficiently calculate a moving average
4
+
5
+ require 'colorize'
6
+
7
+ String.disable_colorization false
8
+
9
+ require 'optparse'
10
+ require 'datafarming/error_handling'
11
+ require 'datafarming/moving_average'
12
+
13
+ help_msg = [
14
+ 'Calculate moving averages for one or more input files, or for ' +
15
+ 'stdin'.green + '.', 'Results are sent to ' + 'stdout'.green + '.', '',
16
+ 'Syntax:' + "\t#{ErrorHandling.prog_name} [OPTIONS] [FILENAMES...]".yellow, '',
17
+ "Prefix the command with '" + 'ruby'.yellow +
18
+ "' if it is not on your PATH.", '',
19
+ 'Options:', ''
20
+ ]
21
+
22
+ opts_help = nil
23
+ options = { column: 0 }
24
+ OptionParser.new do |opts|
25
+ opts.banner = help_msg.join("\n")
26
+ opts.on('-h', '-?', '--help', 'Print this help') do
27
+ puts
28
+ puts opts
29
+ puts
30
+ exit
31
+ end
32
+ opts.on('-w LEN',
33
+ '--window LEN',
34
+ 'Window size to use for moving average',
35
+ 'average ' + '[REQUIRED]'.red) { |win| options[:window] = win.to_i }
36
+ opts.on('-c COL',
37
+ '--column COL',
38
+ 'Which column to average.',
39
+ 'Defaults to column 1.') { |col| options[:column] = col.to_i - 1 }
40
+ opts_help = opts
41
+ end.parse!
42
+
43
+ ErrorHandling.clean_abort [opts_help] if ARGV[0] == '?' || options[:window].nil?
44
+
45
+ column = options[:column]
46
+ ma = MovingAverage.new(options[:window])
47
+ ARGF.each do |line|
48
+ value = line.strip.split(/[,:;]\s*|\s+/).map(&:to_f)
49
+ result = ma.new_obs(value[column]) unless value[column].nil?
50
+ puts result unless result.nil?
51
+ end
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ require 'rubygems' if RUBY_VERSION =~ /^1\.8/
4
+ require 'colorize'
5
+
6
+ String.disable_colorization false
7
+
8
+ require 'optparse'
9
+ require 'datafarming/error_handling'
10
+
11
+ begin
12
+ require 'quickstats'
13
+ rescue LoadError
14
+ ErrorHandling.clean_abort [
15
+ "\n\tALERT: quickstats gem is not installed!".red,
16
+ "\tIf you have network connectivity, type:",
17
+ "\n\t\tgem install quickstats\n".yellow,
18
+ "\t(Admin privileges may be required.)\n\n"
19
+ ]
20
+ end
21
+
22
+ help_msg = [
23
+ 'Calculate confidence intervals using batch means after MSER truncation.',
24
+ 'Results are written to ' + 'stdout'.blue + ' in CSV format, with headers.',
25
+ '', 'Syntax:',
26
+ "\n\t#{ErrorHandling.prog_name} [--help] [--column COL] [filenames...]".yellow, '',
27
+ "Arguments in square brackets are optional. A vertical bar '|'",
28
+ 'indicates valid alternatives for invoking the option. Prefix',
29
+ 'the command with "' + 'ruby'.yellow +
30
+ '" if it is not on your PATH.', '',
31
+ ' --help | -h | -? | ?'.green,
32
+ "\tProduce this help message.",
33
+ ' [--column COL | -c COL]'.green,
34
+ "\tSpecify column to analyze. (Optional - default is column 1)",
35
+ ' [filenames...]'.green,
36
+ "\tThe names of one or more files containing data to be analyzed.",
37
+ "\t(Optional - uses " + "stdin".blue + " if no files are specified.)"
38
+ ]
39
+
40
+ OPTIONS = {:column => 0}
41
+
42
+ OptionParser.new do |opts|
43
+ opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...]"
44
+ opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
45
+ opts.on('-c COL',
46
+ '--column COL',
47
+ 'Which column to average.',
48
+ 'Defaults to column 1.') { |col| OPTIONS[:column] = col.to_i - 1 }
49
+ end.parse!
50
+
51
+ ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
52
+
53
+ def square(x)
54
+ x * x
55
+ end
56
+
57
+ T_VALUE = [
58
+ Float::INFINITY, 12.706, 4.303, 3.182, 2.776,
59
+ 2.571, 2.447, 2.365, 2.306, 2.262,
60
+ 2.228, 2.201, 2.179, 2.160, 2.145,
61
+ 2.131, 2.120, 2.110, 2.101, 2.093,
62
+ 2.086, 2.080, 2.074, 2.069, 2.064,
63
+ 2.060, 2.056, 2.052, 2.048, 2.045, 2.042
64
+ ]
65
+
66
+ def nolbm(data)
67
+ data.shift if data[0] =~ /[A-Za-z]/ # strip header if one present
68
+ data.map! { |line| line.strip.split(/[,:;]\s*|\s+/)[OPTIONS[:column]].to_f }
69
+ m_stats = QuickStats.new
70
+ warmup = [2 * data.length / 3, data.length - 10].min
71
+ index = data.length - 1
72
+ while index > (data.length - warmup) && index > 1
73
+ m_stats.new_obs(data[index])
74
+ index -= 1
75
+ end
76
+ best = [m_stats.std_err, m_stats.avg, index]
77
+
78
+ while index > -1
79
+ m_stats.new_obs(data[index])
80
+ best = [m_stats.std_err, m_stats.avg, index] if m_stats.std_err <= best[0]
81
+ index -= 1
82
+ end
83
+
84
+ avg = best[1]
85
+ index = best[2]
86
+ length = data.length - index
87
+ m = [length / 31, index].max
88
+ b = length / m
89
+ m = length / b
90
+ index += length - m * b
91
+
92
+ mean_stats = QuickStats.new
93
+ batch_means = Array.new(b) do
94
+ mean_stats.reset
95
+ m.times do
96
+ mean_stats.new_obs(data[index])
97
+ index += 1
98
+ end
99
+ mean_stats.avg
100
+ end
101
+ sum_squared_deviations = batch_means.
102
+ map { |y| square(y - avg)}.
103
+ inject(&:+)
104
+ var_hat = sum_squared_deviations / (b - 1)
105
+ df = (b - 1)
106
+ std_err = Math.sqrt(var_hat / b)
107
+ half_width = T_VALUE[df] * std_err
108
+ lower = avg - half_width
109
+ upper = avg + half_width
110
+ printf "%f,%f,%d,%f,%f\n", avg, std_err, df, lower, upper
111
+ end
112
+
113
+ puts "sample_mean,std_err,df,lower95_bound,upper95_bound"
114
+ if ARGF.filename == "-"
115
+ data = STDIN.readlines
116
+ nolbm(data)
117
+ else
118
+ ARGV.each do |fname|
119
+ data = File.readlines(fname)
120
+ nolbm(data)
121
+ end
122
+ end
@@ -0,0 +1,119 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ require 'rubygems' if RUBY_VERSION =~ /^1\.8/
4
+ require 'colorize'
5
+
6
+ String.disable_colorization false
7
+
8
+ require 'optparse'
9
+ require 'datafarming/error_handling'
10
+ require 'datafarming/moving_average'
11
+
12
+ begin
13
+ require 'quickstats'
14
+ rescue LoadError
15
+ ErrorHandling.clean_abort [
16
+ "\n\tALERT: quickstats gem is not installed!".red,
17
+ "\tIf you have network connectivity, type:",
18
+ "\n\t\tgem install quickstats\n".yellow,
19
+ "\t(Admin privileges may be required.)\n\n"
20
+ ]
21
+ end
22
+
23
+ help_msg = [
24
+ 'Calculate confidence intervals using overlapping batch means after MSER',
25
+ 'truncation. Results are written to ' + 'stdout'.blue + ' in CSV format, with headers.',
26
+ '', 'Syntax:',
27
+ "\n\t#{ErrorHandling.prog_name} [--help] [--column COL] [filenames...]".yellow, '',
28
+ "Arguments in square brackets are optional. A vertical bar '|'",
29
+ 'indicates valid alternatives for invoking the option. Prefix',
30
+ 'the command with "' + 'ruby'.yellow +
31
+ '" if it is not on your PATH.', '',
32
+ ' --help | -h | -? | ?'.green,
33
+ "\tProduce this help message.",
34
+ ' [--column COL | -c COL]'.green,
35
+ "\tSpecify column to analyze. (Optional - default is column 1)",
36
+ ' [filenames...]'.green,
37
+ "\tThe names of one or more files containing data to be analyzed.",
38
+ "\t(Optional - uses " + "stdin".blue + " if no files are specified.)"
39
+ ]
40
+
41
+ OPTIONS = {:column => 0}
42
+
43
+ OptionParser.new do |opts|
44
+ opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...]"
45
+ opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
46
+ opts.on('-c COL',
47
+ '--column COL',
48
+ 'Which column to average.',
49
+ 'Defaults to column 1.') { |col| OPTIONS[:column] = col.to_i - 1 }
50
+ end.parse!
51
+
52
+ ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
53
+
54
+ def square(x)
55
+ x * x
56
+ end
57
+
58
+ T_VALUE = [
59
+ Float::INFINITY, 12.706, 4.303, 3.182, 2.776,
60
+ 2.571, 2.447, 2.365, 2.306, 2.262,
61
+ 2.228, 2.201, 2.179, 2.160, 2.145,
62
+ 2.131, 2.120, 2.110, 2.101, 2.093,
63
+ 2.086, 2.080, 2.074, 2.069, 2.064,
64
+ 2.060, 2.056, 2.052, 2.048, 2.045,
65
+ 2.042, 2.040, 2.037, 2.035, 2.032,
66
+ 2.030, 2.028, 2.026, 2.024, 2.023,
67
+ 2.021, 2.020, 2.018, 2.017, 2.015, 2.014
68
+ ]
69
+
70
+ def olbm(data)
71
+ data.shift if data[0] =~ /[A-Za-z]/ # strip header if one present
72
+ data.map! { |line| line.strip.split(/[,:;]\s*|\s+/)[OPTIONS[:column]].to_f }
73
+ mser = QuickStats.new
74
+ warmup = [2 * data.length / 3, data.length - 10].min
75
+ index = data.length - 1
76
+ while index > (data.length - warmup) && index > 1
77
+ mser.new_obs(data[index])
78
+ index -= 1
79
+ end
80
+ best = [mser.std_err, mser.avg, index]
81
+
82
+ while index > -1
83
+ mser.new_obs(data[index])
84
+ best = [mser.std_err, mser.avg, index] if mser.std_err <= best[0]
85
+ index -= 1
86
+ end
87
+
88
+ avg = best[1]
89
+ start_index = best[2]
90
+ length = data.length - start_index
91
+ m = [[length / 21, start_index].max, length / 3].min
92
+ b = length / m
93
+ m = length / b
94
+ start_index += length - m * b
95
+ ma = MovingAverage.new(m)
96
+
97
+ sum_squared_deviations = data[start_index..-1].
98
+ map { |y| ma.new_obs(y) }.compact.
99
+ map { |y| square(y - avg)}.
100
+ inject(&:+)
101
+
102
+ se_sqr = (m.to_f / ((length - m) * (length - m + 1))) * sum_squared_deviations
103
+ df = (3 * (b - 1) * (1 + (b - 1.0)**(-0.5 - 0.6 * b))).to_i / 2
104
+ half_width = T_VALUE[df] * Math.sqrt(se_sqr)
105
+ lower = avg - half_width
106
+ upper = avg + half_width
107
+ printf "%f,%f,%d,%f,%f\n", avg, Math.sqrt(se_sqr), df, lower, upper
108
+ end
109
+
110
+ puts "sample_mean,std_err,df,lower95_bound,upper95_bound"
111
+ if ARGF.filename == "-"
112
+ data = STDIN.readlines
113
+ olbm(data)
114
+ else
115
+ ARGV.each do |fname|
116
+ data = File.readlines(fname)
117
+ olbm(data)
118
+ end
119
+ end
@@ -37,7 +37,7 @@ end.parse!
37
37
 
38
38
  ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
39
39
 
40
- $-i = '.orig' # specify backup suffix
40
+ $-i = '.orig' unless ARGV.empty? # specify backup suffix unless STDIN
41
41
 
42
42
  oldfilename = ''
43
43
  header = ''
@@ -36,7 +36,7 @@ end.parse!
36
36
 
37
37
  ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
38
38
 
39
- $-i = '.orig' # specify backup suffix
39
+ $-i = '.orig' unless ARGV.empty? # specify backup suffix unless STDIN
40
40
 
41
41
  oldfilename = ''
42
42
 
@@ -13,8 +13,8 @@ require 'fwt'
13
13
  # corresponds to a factor and each row is a design point.
14
14
  #
15
15
  # Author:: Paul J Sanchez (mailto:pjs@alum.mit.edu)
16
- # Copyright:: Copyright (c) Paul J Sanchez
17
- # License:: LGPL
16
+ # Copyright:: Copyright (c) 2018 Paul J Sanchez
17
+ # License:: MIT
18
18
  #
19
19
  def make_design(number_of_factors)
20
20
  index = [1, 2, 4, 8, 15, 16, 32, 51, 64, 85, 106, 128, 150, 171,
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ # A class to efficiently perform Moving Average calculations.
4
+ # Calculating moving averages of length m on a set of data of length n
5
+ # requires Θ(m) storage and Θ(n) work.
6
+ #
7
+ # Author:: Paul J Sanchez (mailto:pjs at alum.mit.edu)
8
+ # Copyright:: Copyright (c) 2018 Paul J Sanchez
9
+ # License:: MIT
10
+ #
11
+ class MovingAverage
12
+ # Number of elements in the moving average
13
+ attr_reader :m
14
+
15
+ # Initialize the MovingAverage object.
16
+ #
17
+ # *Arguments*::
18
+ # - +m+ -> the number of elements to be averaged
19
+ #
20
+ # *Raises*::
21
+ # - RuntimeError if +m+ < 1
22
+ #
23
+ def initialize(m)
24
+ fail 'Number of terms to avg (m) must be strictly positive' if m < 1
25
+ @m = m
26
+ @current_set = Array.new(@m)
27
+ @current_avg = 0.0
28
+ @current_count = 0
29
+ end
30
+
31
+ # Add a new observation, get the resulting moving average.
32
+ #
33
+ # *Arguments*::
34
+ # - +x+ -> the number of elements to be averaged
35
+ #
36
+ # *Raises*::
37
+ # - RuntimeError if +x+ is non-numeric
38
+ #
39
+ # *Returns*::
40
+ # - Average of the last +m+ observations, or +nil+ if fewer than +m+ values have been processed.
41
+ #
42
+ def new_obs(x)
43
+ x = x.to_f
44
+ if @current_count < @m
45
+ @current_set[@current_count] = x
46
+ @current_count += 1
47
+ @current_avg += (x - @current_avg) / @current_count
48
+ @current_count == @m ? @current_avg : nil
49
+ else
50
+ @current_set << x
51
+ @current_avg += (x - @current_set.shift) / @m
52
+ end
53
+ end
54
+ end
55
+
56
+ # Simple but effective test case
57
+ if __FILE__ == $PROGRAM_NAME
58
+ puts "Ruby v" + RUBY_VERSION
59
+ ma = MovingAverage.new(3)
60
+ results = []
61
+ 20.times { |i| avg = ma.new_obs(i); results << avg if avg }
62
+ puts results.join ', '
63
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datafarming
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul J Sanchez
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-07-27 00:00:00.000000000 Z
11
+ date: 2018-08-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fwt
@@ -63,7 +63,10 @@ executables:
63
63
  - convert_line_endings.rb
64
64
  - cross.rb
65
65
  - csv2blank.rb
66
+ - moving_average.rb
66
67
  - mser.rb
68
+ - mser_nolbm.rb
69
+ - mser_olbm.rb
67
70
  - pool_files.rb
68
71
  - rundesign_general.rb
69
72
  - scaled_fde.rb
@@ -84,7 +87,10 @@ files:
84
87
  - exe/convert_line_endings.rb
85
88
  - exe/cross.rb
86
89
  - exe/csv2blank.rb
90
+ - exe/moving_average.rb
87
91
  - exe/mser.rb
92
+ - exe/mser_nolbm.rb
93
+ - exe/mser_olbm.rb
88
94
  - exe/pool_files.rb
89
95
  - exe/rundesign_general.rb
90
96
  - exe/scaled_fde.rb
@@ -96,6 +102,7 @@ files:
96
102
  - lib/datafarming/error_handling.rb
97
103
  - lib/datafarming/factorial_generator.rb
98
104
  - lib/datafarming/freq_sets.rb
105
+ - lib/datafarming/moving_average.rb
99
106
  - lib/datafarming/nolh_designs.rb
100
107
  homepage: https://gitlab.nps.edu/pjsanche/datafarmingrubyscripts.git
101
108
  licenses: