datafarming 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a228e8b48eaa2cd95669a37b0954e660e806df0e49bb9050a91ee46273f0cbbb
4
- data.tar.gz: 7303e31f70cad28645b92d2196a69770f22d02b7181c0e34ce502dd8c9e75115
3
+ metadata.gz: 9356e4448e1a6aa818a5361cc04a9149d8e3d78c74ca20aaf2bb1c353e0cbd87
4
+ data.tar.gz: 0e18063cfe4c92c66ed822debc6a20ef7c9a5b063e3c5769f0647d9109f98034
5
5
  SHA512:
6
- metadata.gz: 4d214779a72e0c9afdbc97abd54f6f8e027a069f5216189f3d49a1fe108f3e02f880095afd80e3a67c54636309a98b0dc13911fc4da5c2c66addcc90883ae762
7
- data.tar.gz: 5565f12ac35ed08841eecb2e775b2ec2021ac9d95c497b6f63a5505586305148672c06ed7a8633b363d6fbeda9d4e559493ebb7ac36969c0dcc9781bb4ebff7a
6
+ metadata.gz: 82b4d9c0ae4d331b5612bbaa930aabe57fd67b4fdb8b1c3e19f7d2df3904289c6d7adb209fcccef8186ec979d3a876dcca8b093229a69570d5b7f291abb26c61
7
+ data.tar.gz: b9972a61a075af31a8b59957fac9f308ddea8647ff2035d442c8285c609c01c3ef2a83c65857c11a95f568cbedf3fdee311cd6f8cc1dc7e712d94c103ba36931
data/README.md CHANGED
@@ -50,4 +50,8 @@ generates star points to augment a fractional factorial
50
50
  - `cross.rb` —
51
51
  creates a combinatorial design by crossing all combinations of any # of individual smaller designs
52
52
  - `mser.rb` —
53
- uses MSER truncation to remove initial transient effects for time-series output, reports truncated average and number of observations for each run to facilitate construction of a properly weighted confidence interval.
53
+ uses MSER truncation to remove initial transient effects for time-series output, reports truncated average and number of observations for each run to facilitate construction of a properly weighted confidence interval.
54
+ - `mser_nolbm.rb` —
55
+ uses MSER truncation to remove initial transient effects for time-series output, then calculates a 95% confidence interval on the remaining data using non-overlapping batch means.
56
+ - `mser_olbm.rb` —
57
+ uses MSER truncation to remove initial transient effects for time-series output, then calculates a 95% confidence interval on the remaining data using overlapping batch means.
@@ -1,10 +1,10 @@
1
1
  # -*- ruby -*-
2
- _VERSION = "1.2.0"
2
+ _VERSION = "1.3.0"
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "datafarming"
6
6
  s.version = _VERSION
7
- s.date = "2018-07-27"
7
+ s.date = "2018-08-17"
8
8
  s.summary = "Useful scripts for data farming."
9
9
  s.homepage = "https://gitlab.nps.edu/pjsanche/datafarmingrubyscripts.git"
10
10
  s.email = "pjs@alum.mit.edu"
@@ -36,5 +36,5 @@ end.parse!
36
36
 
37
37
  ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
38
38
 
39
- $-i = '.orig'
39
+ $-i = '.orig' unless ARGV.empty? # specify backup suffix unless STDIN
40
40
  ARGF.each { |line| puts line.strip.gsub(/\s+/, ',') }
@@ -34,5 +34,5 @@ end.parse!
34
34
 
35
35
  ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
36
36
 
37
- $-i = '.orig'
37
+ $-i = '.orig' unless ARGV.empty? # specify backup suffix unless STDIN
38
38
  ARGF.each { |line| puts line.split(/\r\n|\r/) }
@@ -35,5 +35,5 @@ end.parse!
35
35
 
36
36
  ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
37
37
 
38
- $-i = '.orig'
38
+ $-i = '.orig' unless ARGV.empty? # specify backup suffix unless STDIN
39
39
  ARGF.each { |line| puts line.strip.tr(',', ' ') }
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ # Efficiently calculate a moving average
4
+
5
+ require 'colorize'
6
+
7
+ String.disable_colorization false
8
+
9
+ require 'optparse'
10
+ require 'datafarming/error_handling'
11
+ require 'datafarming/moving_average'
12
+
13
+ help_msg = [
14
+ 'Calculate moving averages for one or more input files, or for ' +
15
+ 'stdin'.green + '.', 'Results are sent to ' + 'stdout'.green + '.', '',
16
+ 'Syntax:' + "\t#{ErrorHandling.prog_name} [OPTIONS] [FILENAMES...]".yellow, '',
17
+ "Prefix the command with '" + 'ruby'.yellow +
18
+ "' if it is not on your PATH.", '',
19
+ 'Options:', ''
20
+ ]
21
+
22
+ opts_help = nil
23
+ options = { column: 0 }
24
+ OptionParser.new do |opts|
25
+ opts.banner = help_msg.join("\n")
26
+ opts.on('-h', '-?', '--help', 'Print this help') do
27
+ puts
28
+ puts opts
29
+ puts
30
+ exit
31
+ end
32
+ opts.on('-w LEN',
33
+ '--window LEN',
34
+ 'Window size to use for moving average',
35
+ 'average ' + '[REQUIRED]'.red) { |win| options[:window] = win.to_i }
36
+ opts.on('-c COL',
37
+ '--column COL',
38
+ 'Which column to average.',
39
+ 'Defaults to column 1.') { |col| options[:column] = col.to_i - 1 }
40
+ opts_help = opts
41
+ end.parse!
42
+
43
+ ErrorHandling.clean_abort [opts_help] if ARGV[0] == '?' || options[:window].nil?
44
+
45
+ column = options[:column]
46
+ ma = MovingAverage.new(options[:window])
47
+ ARGF.each do |line|
48
+ value = line.strip.split(/[,:;]\s*|\s+/).map(&:to_f)
49
+ result = ma.new_obs(value[column]) unless value[column].nil?
50
+ puts result unless result.nil?
51
+ end
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ require 'rubygems' if RUBY_VERSION =~ /^1\.8/
4
+ require 'colorize'
5
+
6
+ String.disable_colorization false
7
+
8
+ require 'optparse'
9
+ require 'datafarming/error_handling'
10
+
11
+ begin
12
+ require 'quickstats'
13
+ rescue LoadError
14
+ ErrorHandling.clean_abort [
15
+ "\n\tALERT: quickstats gem is not installed!".red,
16
+ "\tIf you have network connectivity, type:",
17
+ "\n\t\tgem install quickstats\n".yellow,
18
+ "\t(Admin privileges may be required.)\n\n"
19
+ ]
20
+ end
21
+
22
+ help_msg = [
23
+ 'Calculate confidence intervals using batch means after MSER truncation.',
24
+ 'Results are written to ' + 'stdout'.blue + ' in CSV format, with headers.',
25
+ '', 'Syntax:',
26
+ "\n\t#{ErrorHandling.prog_name} [--help] [--column COL] [filenames...]".yellow, '',
27
+ "Arguments in square brackets are optional. A vertical bar '|'",
28
+ 'indicates valid alternatives for invoking the option. Prefix',
29
+ 'the command with "' + 'ruby'.yellow +
30
+ '" if it is not on your PATH.', '',
31
+ ' --help | -h | -? | ?'.green,
32
+ "\tProduce this help message.",
33
+ ' [--column COL | -c COL]'.green,
34
+ "\tSpecify column to analyze. (Optional - default is column 1)",
35
+ ' [filenames...]'.green,
36
+ "\tThe names of one or more files containing data to be analyzed.",
37
+ "\t(Optional - uses " + "stdin".blue + " if no files are specified.)"
38
+ ]
39
+
40
+ OPTIONS = {:column => 0}
41
+
42
+ OptionParser.new do |opts|
43
+ opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...]"
44
+ opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
45
+ opts.on('-c COL',
46
+ '--column COL',
47
+ 'Which column to average.',
48
+ 'Defaults to column 1.') { |col| OPTIONS[:column] = col.to_i - 1 }
49
+ end.parse!
50
+
51
+ ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
52
+
53
+ def square(x)
54
+ x * x
55
+ end
56
+
57
+ T_VALUE = [
58
+ Float::INFINITY, 12.706, 4.303, 3.182, 2.776,
59
+ 2.571, 2.447, 2.365, 2.306, 2.262,
60
+ 2.228, 2.201, 2.179, 2.160, 2.145,
61
+ 2.131, 2.120, 2.110, 2.101, 2.093,
62
+ 2.086, 2.080, 2.074, 2.069, 2.064,
63
+ 2.060, 2.056, 2.052, 2.048, 2.045, 2.042
64
+ ]
65
+
66
+ def nolbm(data)
67
+ data.shift if data[0] =~ /[A-Za-z]/ # strip header if one present
68
+ data.map! { |line| line.strip.split(/[,:;]\s*|\s+/)[OPTIONS[:column]].to_f }
69
+ m_stats = QuickStats.new
70
+ warmup = [2 * data.length / 3, data.length - 10].min
71
+ index = data.length - 1
72
+ while index > (data.length - warmup) && index > 1
73
+ m_stats.new_obs(data[index])
74
+ index -= 1
75
+ end
76
+ best = [m_stats.std_err, m_stats.avg, index]
77
+
78
+ while index > -1
79
+ m_stats.new_obs(data[index])
80
+ best = [m_stats.std_err, m_stats.avg, index] if m_stats.std_err <= best[0]
81
+ index -= 1
82
+ end
83
+
84
+ avg = best[1]
85
+ index = best[2]
86
+ length = data.length - index
87
+ m = [length / 31, index].max
88
+ b = length / m
89
+ m = length / b
90
+ index += length - m * b
91
+
92
+ mean_stats = QuickStats.new
93
+ batch_means = Array.new(b) do
94
+ mean_stats.reset
95
+ m.times do
96
+ mean_stats.new_obs(data[index])
97
+ index += 1
98
+ end
99
+ mean_stats.avg
100
+ end
101
+ sum_squared_deviations = batch_means.
102
+ map { |y| square(y - avg)}.
103
+ inject(&:+)
104
+ var_hat = sum_squared_deviations / (b - 1)
105
+ df = (b - 1)
106
+ std_err = Math.sqrt(var_hat / b)
107
+ half_width = T_VALUE[df] * std_err
108
+ lower = avg - half_width
109
+ upper = avg + half_width
110
+ printf "%f,%f,%d,%f,%f\n", avg, std_err, df, lower, upper
111
+ end
112
+
113
+ puts "sample_mean,std_err,df,lower95_bound,upper95_bound"
114
+ if ARGF.filename == "-"
115
+ data = STDIN.readlines
116
+ nolbm(data)
117
+ else
118
+ ARGV.each do |fname|
119
+ data = File.readlines(fname)
120
+ nolbm(data)
121
+ end
122
+ end
@@ -0,0 +1,119 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ require 'rubygems' if RUBY_VERSION =~ /^1\.8/
4
+ require 'colorize'
5
+
6
+ String.disable_colorization false
7
+
8
+ require 'optparse'
9
+ require 'datafarming/error_handling'
10
+ require 'datafarming/moving_average'
11
+
12
+ begin
13
+ require 'quickstats'
14
+ rescue LoadError
15
+ ErrorHandling.clean_abort [
16
+ "\n\tALERT: quickstats gem is not installed!".red,
17
+ "\tIf you have network connectivity, type:",
18
+ "\n\t\tgem install quickstats\n".yellow,
19
+ "\t(Admin privileges may be required.)\n\n"
20
+ ]
21
+ end
22
+
23
+ help_msg = [
24
+ 'Calculate confidence intervals using overlapping batch means after MSER',
25
+ 'truncation. Results are written to ' + 'stdout'.blue + ' in CSV format, with headers.',
26
+ '', 'Syntax:',
27
+ "\n\t#{ErrorHandling.prog_name} [--help] [--column COL] [filenames...]".yellow, '',
28
+ "Arguments in square brackets are optional. A vertical bar '|'",
29
+ 'indicates valid alternatives for invoking the option. Prefix',
30
+ 'the command with "' + 'ruby'.yellow +
31
+ '" if it is not on your PATH.', '',
32
+ ' --help | -h | -? | ?'.green,
33
+ "\tProduce this help message.",
34
+ ' [--column COL | -c COL]'.green,
35
+ "\tSpecify column to analyze. (Optional - default is column 1)",
36
+ ' [filenames...]'.green,
37
+ "\tThe names of one or more files containing data to be analyzed.",
38
+ "\t(Optional - uses " + "stdin".blue + " if no files are specified.)"
39
+ ]
40
+
41
+ OPTIONS = {:column => 0}
42
+
43
+ OptionParser.new do |opts|
44
+ opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...]"
45
+ opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
46
+ opts.on('-c COL',
47
+ '--column COL',
48
+ 'Which column to average.',
49
+ 'Defaults to column 1.') { |col| OPTIONS[:column] = col.to_i - 1 }
50
+ end.parse!
51
+
52
+ ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
53
+
54
+ def square(x)
55
+ x * x
56
+ end
57
+
58
+ T_VALUE = [
59
+ Float::INFINITY, 12.706, 4.303, 3.182, 2.776,
60
+ 2.571, 2.447, 2.365, 2.306, 2.262,
61
+ 2.228, 2.201, 2.179, 2.160, 2.145,
62
+ 2.131, 2.120, 2.110, 2.101, 2.093,
63
+ 2.086, 2.080, 2.074, 2.069, 2.064,
64
+ 2.060, 2.056, 2.052, 2.048, 2.045,
65
+ 2.042, 2.040, 2.037, 2.035, 2.032,
66
+ 2.030, 2.028, 2.026, 2.024, 2.023,
67
+ 2.021, 2.020, 2.018, 2.017, 2.015, 2.014
68
+ ]
69
+
70
+ def olbm(data)
71
+ data.shift if data[0] =~ /[A-Za-z]/ # strip header if one present
72
+ data.map! { |line| line.strip.split(/[,:;]\s*|\s+/)[OPTIONS[:column]].to_f }
73
+ mser = QuickStats.new
74
+ warmup = [2 * data.length / 3, data.length - 10].min
75
+ index = data.length - 1
76
+ while index > (data.length - warmup) && index > 1
77
+ mser.new_obs(data[index])
78
+ index -= 1
79
+ end
80
+ best = [mser.std_err, mser.avg, index]
81
+
82
+ while index > -1
83
+ mser.new_obs(data[index])
84
+ best = [mser.std_err, mser.avg, index] if mser.std_err <= best[0]
85
+ index -= 1
86
+ end
87
+
88
+ avg = best[1]
89
+ start_index = best[2]
90
+ length = data.length - start_index
91
+ m = [[length / 21, start_index].max, length / 3].min
92
+ b = length / m
93
+ m = length / b
94
+ start_index += length - m * b
95
+ ma = MovingAverage.new(m)
96
+
97
+ sum_squared_deviations = data[start_index..-1].
98
+ map { |y| ma.new_obs(y) }.compact.
99
+ map { |y| square(y - avg)}.
100
+ inject(&:+)
101
+
102
+ se_sqr = (m.to_f / ((length - m) * (length - m + 1))) * sum_squared_deviations
103
+ df = (3 * (b - 1) * (1 + (b - 1.0)**(-0.5 - 0.6 * b))).to_i / 2
104
+ half_width = T_VALUE[df] * Math.sqrt(se_sqr)
105
+ lower = avg - half_width
106
+ upper = avg + half_width
107
+ printf "%f,%f,%d,%f,%f\n", avg, Math.sqrt(se_sqr), df, lower, upper
108
+ end
109
+
110
+ puts "sample_mean,std_err,df,lower95_bound,upper95_bound"
111
+ if ARGF.filename == "-"
112
+ data = STDIN.readlines
113
+ olbm(data)
114
+ else
115
+ ARGV.each do |fname|
116
+ data = File.readlines(fname)
117
+ olbm(data)
118
+ end
119
+ end
@@ -37,7 +37,7 @@ end.parse!
37
37
 
38
38
  ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
39
39
 
40
- $-i = '.orig' # specify backup suffix
40
+ $-i = '.orig' unless ARGV.empty? # specify backup suffix unless STDIN
41
41
 
42
42
  oldfilename = ''
43
43
  header = ''
@@ -36,7 +36,7 @@ end.parse!
36
36
 
37
37
  ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
38
38
 
39
- $-i = '.orig' # specify backup suffix
39
+ $-i = '.orig' unless ARGV.empty? # specify backup suffix unless STDIN
40
40
 
41
41
  oldfilename = ''
42
42
 
@@ -13,8 +13,8 @@ require 'fwt'
13
13
  # corresponds to a factor and each row is a design point.
14
14
  #
15
15
  # Author:: Paul J Sanchez (mailto:pjs@alum.mit.edu)
16
- # Copyright:: Copyright (c) Paul J Sanchez
17
- # License:: LGPL
16
+ # Copyright:: Copyright (c) 2018 Paul J Sanchez
17
+ # License:: MIT
18
18
  #
19
19
  def make_design(number_of_factors)
20
20
  index = [1, 2, 4, 8, 15, 16, 32, 51, 64, 85, 106, 128, 150, 171,
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ # A class to efficiently perform Moving Average calculations.
4
+ # Calculating moving averages of length m on a set of data of length n
5
+ # requires Θ(m) storage and Θ(n) work.
6
+ #
7
+ # Author:: Paul J Sanchez (mailto:pjs at alum.mit.edu)
8
+ # Copyright:: Copyright (c) 2018 Paul J Sanchez
9
+ # License:: MIT
10
+ #
11
+ class MovingAverage
12
+ # Number of elements in the moving average
13
+ attr_reader :m
14
+
15
+ # Initialize the MovingAverage object.
16
+ #
17
+ # *Arguments*::
18
+ # - +m+ -> the number of elements to be averaged
19
+ #
20
+ # *Raises*::
21
+ # - RuntimeError if +m+ < 1
22
+ #
23
+ def initialize(m)
24
+ fail 'Number of terms to avg (m) must be strictly positive' if m < 1
25
+ @m = m
26
+ @current_set = Array.new(@m)
27
+ @current_avg = 0.0
28
+ @current_count = 0
29
+ end
30
+
31
+ # Add a new observation, get the resulting moving average.
32
+ #
33
+ # *Arguments*::
34
+ # - +x+ -> the number of elements to be averaged
35
+ #
36
+ # *Raises*::
37
+ # - RuntimeError if +x+ is non-numeric
38
+ #
39
+ # *Returns*::
40
+ # - Average of the last +m+ observations, or +nil+ if fewer than +m+ values have been processed.
41
+ #
42
+ def new_obs(x)
43
+ x = x.to_f
44
+ if @current_count < @m
45
+ @current_set[@current_count] = x
46
+ @current_count += 1
47
+ @current_avg += (x - @current_avg) / @current_count
48
+ @current_count == @m ? @current_avg : nil
49
+ else
50
+ @current_set << x
51
+ @current_avg += (x - @current_set.shift) / @m
52
+ end
53
+ end
54
+ end
55
+
56
+ # Simple but effective test case
57
+ if __FILE__ == $PROGRAM_NAME
58
+ puts "Ruby v" + RUBY_VERSION
59
+ ma = MovingAverage.new(3)
60
+ results = []
61
+ 20.times { |i| avg = ma.new_obs(i); results << avg if avg }
62
+ puts results.join ', '
63
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datafarming
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul J Sanchez
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-07-27 00:00:00.000000000 Z
11
+ date: 2018-08-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fwt
@@ -63,7 +63,10 @@ executables:
63
63
  - convert_line_endings.rb
64
64
  - cross.rb
65
65
  - csv2blank.rb
66
+ - moving_average.rb
66
67
  - mser.rb
68
+ - mser_nolbm.rb
69
+ - mser_olbm.rb
67
70
  - pool_files.rb
68
71
  - rundesign_general.rb
69
72
  - scaled_fde.rb
@@ -84,7 +87,10 @@ files:
84
87
  - exe/convert_line_endings.rb
85
88
  - exe/cross.rb
86
89
  - exe/csv2blank.rb
90
+ - exe/moving_average.rb
87
91
  - exe/mser.rb
92
+ - exe/mser_nolbm.rb
93
+ - exe/mser_olbm.rb
88
94
  - exe/pool_files.rb
89
95
  - exe/rundesign_general.rb
90
96
  - exe/scaled_fde.rb
@@ -96,6 +102,7 @@ files:
96
102
  - lib/datafarming/error_handling.rb
97
103
  - lib/datafarming/factorial_generator.rb
98
104
  - lib/datafarming/freq_sets.rb
105
+ - lib/datafarming/moving_average.rb
99
106
  - lib/datafarming/nolh_designs.rb
100
107
  homepage: https://gitlab.nps.edu/pjsanche/datafarmingrubyscripts.git
101
108
  licenses: