RubyGems - compsci - Versions diffs - 0.0.1.1 - Mend

compsci 0.0.1.1

Files changed (23) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: dc7af3ec0bc93ea91b485b329e66033cca137597
+  data.tar.gz: f62c24c3171a6534e92fa8f53dba942e009442c3
+SHA512:
+  metadata.gz: fe64178808bef0a929882318ed08941de2179940bc332ee42ca9f48f0430e566ae6299f628aa4ad11573787c59fe9746cd8a23b5309fa1ecd92117fd1c0daf16
+  data.tar.gz: 0ce9187b7eb0d17add238c6344452e344685e00126927359f2d2d54f6e3fbc8ce5a389d153027c7f626b8039f86e8b7409d1950f2d14afbd5619ec8dd878ff4c

data/README.md ADDED Viewed

@@ -0,0 +1,48 @@
+[![Build Status](https://travis-ci.org/rickhull/compsci.svg?branch=master)](https://travis-ci.org/rickhull/compsci)
+# Introduction
+Provided are some toy implementations for some basic computer science problems.
+## [`Tree`](/lib/compsci/tree.rb) data structures
+* `Tree`       - enforces number of children per node
+* `Tree::Node` - references parent and children nodes
+* `BinaryTree` - subclass of `Tree`; child_slots == 2
+* `CompleteBinaryTree` - efficient Array implementation
+## [`Heap`](lib/compsci/heap.rb) data structure
+Implemented with a `CompleteBinaryTree` for storage using simple arithmetic to
+determine array indices for parent and children.  See the
+[heap example](https://github.com/rickhull/compsci/blob/master/eamples/heap.rb)
+which can be executed (among other examples) via `rake examples`.
+Both minheaps and maxheaps are supported.  The primary operations are
+`Heap#push` and `Heap#pop`.  My basic Vagrant VM gets over 350k pushes per
+second, constant up past 1M pushes.
+## [`Fibonacci`](lib/compsci/fib.rb) functions
+* `Fibonacci.classic(n)`         - naive, recursive
+* `Fibonacci.cache_recursive(n)` - as above, caching already computed results
+* `Fibonacci.cache_iterative(n)` - as above but iterative
+* `Fibonacci.dynamic(n)`         - as above but without a cache structure
+## [`Timer`](/lib/compsci/timer.rb) functions
+* `Timer.now`          - uses `Process::CLOCK_MONOTONIC` if available
+* `Timer.elapsed`      - provides the elapsed time to run a block
+* `Timer.loop_average` - runs a block repeatedly and provides the mean elapsed
+                         time
+* `Timer.since`        - provides the elapsed time since a prior time
+## [`Fit`](lib/compsci/fit.rb) functions
+* `Fit.sigma` - sums the result of a block applied to array values
+* `Fit.error` - returns a generic r^2 value, the coefficient of determination
+* `Fit.constant` - fits `y = a + 0x`; returns the mean and variance
+* `Fit.logarithmic` - fits `y = a + b*ln(x)`; returns a, b, r^2
+* `Fit.linear` - fits `y = a + bx`; returns a, b, r^2
+* `Fit.exponential` fits `y = ae^(bx)`; returns a, b, r^2
+* `Fit.power` fits `y = ax^b`; returns a, b, r^2

data/Rakefile ADDED Viewed

@@ -0,0 +1,108 @@
+require 'rake/testtask'
+Rake::TestTask.new :test do |t|
+  t.pattern = "test/*.rb"
+  t.warning = true
+end
+Rake::TestTask.new bench: :test do |t|
+  t.pattern = "test/bench/*.rb"
+  t.warning = true
+  t.description = "Run benchmarks"
+end
+desc "Run example scripts"
+task examples: :test do
+  Dir['examples/**/*.rb'].each { |filepath|
+    puts
+    sh "ruby -Ilib #{filepath}"
+    puts
+  }
+end
+task default: :examples
+#
+# METRICS
+#
+metrics_tasks = []
+begin
+  require 'flog_task'
+  FlogTask.new do |t|
+    t.threshold = 420
+    t.dirs = ['lib']
+    t.verbose = true
+  end
+  metrics_tasks << :flog
+rescue LoadError
+  warn 'flog_task unavailable'
+end
+begin
+  require 'flay_task'
+  FlayTask.new do |t|
+    t.dirs = ['lib']
+    t.verbose = true
+  end
+  metrics_tasks << :flay
+rescue LoadError
+  warn 'flay_task unavailable'
+end
+begin
+  require 'roodi_task'
+  RoodiTask.new config: '.roodi.yml', patterns: ['lib/**/*.rb']
+  metrics_tasks << :roodi
+rescue LoadError
+  warn "roodi_task unavailable"
+end
+desc "Generate code metrics reports"
+task code_metrics: metrics_tasks
+#
+# PROFILING
+#
+desc "Show current system load"
+task "loadavg" do
+  puts File.read "/proc/loadavg"
+end
+def lib_sh(cmd)
+  sh "RUBYLIB=lib #{cmd}"
+end
+def rprof_sh(script, args = '', rprof_args = '')
+  lib_sh ['ruby-prof', rprof_args, script, '--', args].join(' ')
+end
+scripts = ["examples/binary_tree.rb", "examples/heap.rb"]
+desc "Run ruby-prof on examples/"
+task "ruby-prof" => "loadavg" do
+  scripts.each { |script| rprof_sh script }
+end
+desc "Run ruby-prof on examples/ with --exclude-common-cycles"
+task "ruby-prof-exclude" => "loadavg" do
+  scripts.each { |script| rprof_sh script, "", "--exclude-common-cycles" }
+end
+#
+# GEM BUILD / PUBLISH
+#
+begin
+  require 'buildar'
+  Buildar.new do |b|
+    b.gemspec_file = 'compsci.gemspec'
+    b.version_file = 'VERSION'
+    b.use_git = true
+  end
+rescue LoadError
+  warn "buildar tasks unavailable"
+end

data/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.0.1.1

data/compsci.gemspec ADDED Viewed

@@ -0,0 +1,38 @@
+Gem::Specification.new do |s|
+  s.name = 'compsci'
+  s.summary = "Toy implementations for some basic computer science problems"
+  s.description = "Trees, Heaps, Timers, Error fitting, etc"
+  s.authors = ["Rick Hull"]
+  s.homepage = "https://github.com/rickhull/compsci"
+  s.license = "LGPL-3.0"
+  s.required_ruby_version = "~> 2"
+  s.version = File.read(File.join(__dir__, 'VERSION')).chomp
+  s.files = %w[
+    compsci.gemspec
+    VERSION
+    README.md
+    Rakefile
+    lib/compsci.rb
+    lib/compsci/fib.rb
+    lib/compsci/fit.rb
+    lib/compsci/heap.rb
+    lib/compsci/timer.rb
+    lib/compsci/tree.rb
+    examples/binary_tree.rb
+    examples/heap.rb
+    examples/timer.rb
+    test/fib.rb
+    test/fit.rb
+    test/heap.rb
+    test/timer.rb
+    test/tree.rb
+    test/bench/fib.rb
+    test/bench/heap.rb
+    test/bench/tree.rb
+  ]
+  s.add_development_dependency "minitest", "~> 5.0"
+end

data/examples/binary_tree.rb ADDED Viewed

@@ -0,0 +1,21 @@
+require 'compsci/tree'
+include CompSci
+vals = []
+30.times { vals << rand(99) }
+p vals
+root_node = Tree::Node.new vals.shift
+tree = BinaryTree.new(root_node)
+tree.push vals.shift until vals.empty?
+tree.bf_print
+tree.df_search { |n|
+  puts "visited #{n}"
+  false # or n.value > 90
+}
+puts
+p tree

data/examples/heap.rb ADDED Viewed

@@ -0,0 +1,65 @@
+require 'compsci/heap'
+require 'compsci/timer'
+include CompSci
+puts <<EOF
+#
+# 3 seconds worth of inserts
+#
+EOF
+count = 0
+start = Timer.now
+h = Heap.new
+elapsed = 0
+while elapsed < 3
+  _answer, push_elapsed = Timer.elapsed { h.push rand 99999 }
+  count += 1
+  puts "%ith push: %0.8f s" % [count, push_elapsed] if count % 10000 == 0
+  if count % 100000 == 0
+    start_100k ||= start
+    push_100k_elapsed = Timer.now - start_100k
+    puts "-------------"
+    puts "    100k push: %0.8f s (%ik push / s)" %
+         [push_100k_elapsed, 100.to_f / push_100k_elapsed]
+    puts
+    start_100k = Timer.now
+  end
+  elapsed = Timer.now - start
+end
+puts "pushed %i items in %0.1f s" % [count, elapsed]
+puts
+print "still a heap with #{h.size} items? "
+answer, elapsed = Timer.elapsed { h.heap? }
+puts "%s - %0.3f sec" % [answer ? 'YES' : 'NO', elapsed]
+puts
+puts <<EOF
+#
+# 99 inserts; display the internal array
+#
+EOF
+h = Heap.new
+puts "push: %s" % Array.new(99) { rand(99).tap { |i| h.push i } }.join(' ')
+puts "heap store: #{h.store.inspect}"
+puts "heap: #{h.heap?}"
+puts
+puts "pop: %i" % h.pop
+puts "heap store: #{h.store.inspect}"
+puts "heap: #{h.heap?}"
+puts
+puts "pop: %s" % Array.new(9) { h.pop }.join(' ')
+puts "heap store: #{h.store.inspect}"
+puts "heap: #{h.heap?}"
+puts

data/examples/timer.rb ADDED Viewed

@@ -0,0 +1,42 @@
+require 'compsci/timer'
+include CompSci
+overall_start = Timer.now
+start = Timer.now
+print "running sleep 0.01 (50x): "
+_answer, each_et = Timer.loop_average(count: 50) {
+  print '.'
+  sleep 0.01
+}
+puts
+puts "each: %0.3f" % each_et
+puts "elapsed: %0.3f" % Timer.since(start)
+puts "cumulative: %0.3f" % Timer.since(overall_start)
+puts
+start = Timer.now
+print "running sleep 0.02 (0.5 s): "
+_answer, each_et = Timer.loop_average(seconds: 0.5) {
+  print '.'
+  sleep 0.02
+}
+puts
+puts "each: %0.3f" % each_et
+puts "elapsed: %0.3f" % Timer.since(start)
+puts "cumulative: %0.3f" % Timer.since(overall_start)
+puts
+start = Timer.now
+print "running sleep 2 (1 s): "
+_answer, each_et = Timer.loop_average(seconds: 1) {
+  print '.'
+  sleep 2
+}
+puts
+puts "each: %0.3f" % each_et
+puts "elapsed: %0.3f" % Timer.since(start)
+puts "cumulative: %0.3f" % Timer.since(overall_start)

data/lib/compsci/fib.rb ADDED Viewed

@@ -0,0 +1,24 @@
+require 'compsci'
+module CompSci::Fibonacci
+  def self.classic(n)
+    n < 2 ? n : classic(n-1) + classic(n-2)
+  end
+  def self.cache_recursive(n, cache = {})
+    return n if n < 2
+    cache[n] ||= cache_recursive(n-1, cache) + cache_recursive(n-2, cache)
+  end
+  def self.cache_iterative(n)
+    cache = [0, 1]
+    2.upto(n) { |i| cache[i] = cache[i-1] + cache[i-2] }
+    cache[n]
+  end
+  def self.dynamic(n)
+    a, b = 0, 1
+    (n-1).times { a, b = b, a+b }
+    b
+  end
+end

data/lib/compsci/fit.rb ADDED Viewed

@@ -0,0 +1,137 @@
+require 'compsci'
+module CompSci::Fit
+  #
+  # functions below originally copied from https://github.com/seattlrb/minitest
+  #
+  ##
+  # Enumerates over +enum+ mapping +block+ if given, returning the
+  # sum of the result. Eg:
+  #
+  #   sigma([1, 2, 3])                # => 1 + 2 + 3 => 7
+  #   sigma([1, 2, 3]) { |n| n ** 2 } # => 1 + 4 + 9 => 14
+  def self.sigma enum, &block
+    enum = enum.map(&block) if block
+    enum.inject { |sum, n| sum + n }
+  end
+  ##
+  # Takes an array of x/y pairs and calculates the general R^2 value to
+  # measure fit against a predictive function,  which is the block supplied
+  # to error:
+  #
+  # e.g. error(xys) { |x| 5 + 2 * x }
+  #
+  # See: http://en.wikipedia.org/wiki/Coefficient_of_determination
+  #
+  def self.error xys, &blk
+    y_bar  = sigma(xys) { |_, y| y                   } / xys.size.to_f
+    ss_tot = sigma(xys) { |_, y| (y - y_bar)    ** 2 }
+    ss_res = sigma(xys) { |x, y| (yield(x) - y) ** 2 }
+    1 - (ss_res / ss_tot)
+  end
+  ##
+  # Fits the functional form: a (+ 0x)
+  #
+  # Takes x and y values and returns [a, variance]
+  #
+  def self.constant xs, ys
+    # written by Rick
+    y_bar = sigma(ys) / ys.size.to_f
+    variance = sigma(ys) { |y| (y - y_bar) ** 2 }
+    [y_bar, variance]
+  end
+  ##
+  # To fit a functional form: y = a + b*ln(x).
+  #
+  # Takes x and y values and returns [a, b, r^2].
+  #
+  # See: http://mathworld.wolfram.com/LeastSquaresFittingLogarithmic.html
+  def self.logarithmic xs, ys
+    n     = xs.size
+    xys   = xs.zip(ys)
+    slnx2 = sigma(xys) { |x, _| Math.log(x) ** 2 }
+    slnx  = sigma(xys) { |x, _| Math.log(x)      }
+    sylnx = sigma(xys) { |x, y| y * Math.log(x)  }
+    sy    = sigma(xys) { |_, y| y                }
+    c = n * slnx2 - slnx ** 2
+    b = ( n * sylnx - sy * slnx ) / c
+    a = (sy - b * slnx) / n
+    return a, b, self.error(xys) { |x| a + b * Math.log(x) }
+  end
+  ##
+  # Fits the functional form: a + bx.
+  #
+  # Takes x and y values and returns [a, b, r^2].
+  #
+  # See: http://mathworld.wolfram.com/LeastSquaresFitting.html
+  def self.linear xs, ys
+    n   = xs.size
+    xys = xs.zip(ys)
+    sx  = sigma xs
+    sy  = sigma ys
+    sx2 = sigma(xs)  { |x|   x ** 2 }
+    sxy = sigma(xys) { |x, y| x * y  }
+    c = n * sx2 - sx**2
+    a = (sy * sx2 - sx * sxy) / c
+    b = ( n * sxy - sx * sy ) / c
+    return a, b, self.error(xys) { |x| a + b * x }
+  end
+  ##
+  # To fit a functional form: y = ae^(bx).
+  #
+  # Takes x and y values and returns [a, b, r^2].
+  #
+  # See: http://mathworld.wolfram.com/LeastSquaresFittingExponential.html
+  def self.exponential xs, ys
+    n     = xs.size
+    xys   = xs.zip(ys)
+    sxlny = sigma(xys) { |x, y| x * Math.log(y) }
+    slny  = sigma(xys) { |_, y| Math.log(y)     }
+    sx2   = sigma(xys) { |x, _| x * x           }
+    sx    = sigma xs
+    c = n * sx2 - sx ** 2
+    a = (slny * sx2 - sx * sxlny) / c
+    b = ( n * sxlny - sx * slny ) / c
+    return Math.exp(a), b, self.error(xys) { |x| Math.exp(a + b * x) }
+  end
+  ##
+  # To fit a functional form: y = ax^b.
+  #
+  # Takes x and y values and returns [a, b, r^2].
+  #
+  # See: http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html
+  def self.power xs, ys
+    n       = xs.size
+    xys     = xs.zip(ys)
+    slnxlny = sigma(xys) { |x, y| Math.log(x) * Math.log(y) }
+    slnx    = sigma(xs)  { |x   | Math.log(x)               }
+    slny    = sigma(ys)  { |   y| Math.log(y)               }
+    slnx2   = sigma(xs)  { |x   | Math.log(x) ** 2          }
+    b = (n * slnxlny - slnx * slny) / (n * slnx2 - slnx ** 2)
+    a = (slny - b * slnx) / n
+    return Math.exp(a), b, self.error(xys) { |x| (Math.exp(a) * (x ** b)) }
+  end
+end

data/lib/compsci/heap.rb ADDED Viewed

@@ -0,0 +1,100 @@
+require 'compsci/tree'
+# A Heap is a partially sorted, complete binary tree with the property:
+# * Every node has a value larger (or smaller) than that of its children.
+#
+# This class implements a heap using a simple array for storage.
+# Array index math is used to find:
+# * The root node (idx 0)
+# * The "bottom-most" leaf node (last idx)
+# * Parent idx (idx-1 / 2)
+# * Child idx (2*idx + 1, 2*idx + 2)
+#
+# Any Comparable may be used for node values.
+# Initialize a heap with a cmp_val, either 1 for a MaxHeap or -1 for a MinHeap.
+# The heap property is satisfied when a parent value equals a child value.
+# Insertion (push) and removal (pop) are O(log n) where n is the heap size.
+# Nodes are inserted at the end of the array, and sift_up is called to
+#   reestablish the heap property.
+# Nodes are removed from the start of the array, and sift_down is called to
+#   reestablish the heap property.
+# Sift_up and sift_down are O(log n) because they only have to check and swap
+#   nodes at each layer of the tree, and there are log n layers to the tree.
+#
+class CompSci::Heap < CompSci::CompleteBinaryTree
+  # defaults to a MaxHeap, with the largest node at the root
+  # specify a minheap with minheap: true or cmp_val: -1
+  #
+  def initialize(cmp_val: 1, minheap: false)
+    super()
+    cmp_val = -1 if minheap
+    case cmp_val
+    when -1, 1
+      @cmp_val = cmp_val
+    else
+      raise(ArgumentError, "unknown comparison value: #{cmp_val}")
+    end
+  end
+  # append to the array; sift_up
+  def push(node)
+    @store << node
+    self.sift_up(@store.size - 1)
+  end
+  # remove from the front of the array; move last node to root; sift_down
+  def pop
+    node = @store.shift
+    replacement = @store.pop
+    @store.unshift replacement if replacement
+    self.sift_down(0)
+    node
+  end
+  # return what pop would return (avoid sifting)
+  def peek
+    @store.first
+  end
+  # called recursively; idx represents the node suspected to violate the heap
+  def sift_up(idx)
+    return self if idx <= 0
+    pidx = self.class.parent_idx(idx)
+    if !self.heapish?(pidx, idx)
+      @store[idx], @store[pidx] = @store[pidx], @store[idx] # swap
+      self.sift_up(pidx)
+    end
+    self
+  end
+  # called recursively; idx represents the node suspected to violate the heap
+  def sift_down(idx)
+    return self if idx >= @store.size
+    lidx, ridx = self.class.children_idx(idx)
+    # take the child most likely to be a good parent
+    cidx = self.heapish?(lidx, ridx) ? lidx : ridx
+    if !self.heapish?(idx, cidx)
+      @store[idx], @store[cidx] = @store[cidx], @store[idx] # swap
+      self.sift_down(cidx)
+    end
+    self
+  end
+  # are values of parent and child (by index) in accordance with heap property?
+  def heapish?(pidx, cidx)
+    (@store[pidx] <=> @store[cidx]) != (@cmp_val * -1)
+  end
+  # not used internally; checks that every node satisfies the heap property
+  def heap?(idx: 0)
+    check_children = []
+    self.class.children_idx(idx).each { |cidx|
+      if cidx < @store.size
+        return false unless self.heapish?(idx, cidx)
+        check_children << cidx
+      end
+    }
+    check_children.each { |cidx| return false unless self.heap?(idx: cidx) }
+    true
+  end
+end

data/lib/compsci/timer.rb ADDED Viewed

@@ -0,0 +1,36 @@
+require 'compsci'
+module CompSci::Timer
+  # lifted from seattlerb/minitest
+  if defined? Process::CLOCK_MONOTONIC
+    def self.now
+      Process.clock_gettime Process::CLOCK_MONOTONIC
+    end
+  else
+    def self.now
+      Time.now
+    end
+  end
+  def self.since(t)
+    self.now - t
+  end
+  def self.elapsed(&work)
+    t = self.now
+    return yield, self.since(t)
+  end
+  def self.loop_average(count: 999, seconds: 1, &work)
+    i = 0
+    start = self.now
+    val = nil
+    loop {
+      val = yield
+      i += 1
+      break if i >= count
+      break if self.since(start) > seconds
+    }
+    return val, self.since(start) / i.to_f
+  end
+end