RubyGems - bayesnet - Versions diffs - 0.1.0 → 0.6.0 - Mend

bayesnet 0.1.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 9a746d994d25c279f3246613b9a918fb84720c7c9c78f85ce1ffdc5fbd6bcf9c
-  data.tar.gz: 3b8ee59eab90bf75172239601ddef479926f5a27db475688f1191c71298ca757
+  metadata.gz: 7591665046345784f55275c06d1129fd91ee3f098f3800b2c03b6f9bbfd8e172
+  data.tar.gz: ec9009ab90593d42fa2506a230e5900d5a39bebb1a7fbd874953d6c86022b2eb
 SHA512:
-  metadata.gz: 72985a24e9d529b04e8d275a46cc6eadbd55aa4380f2aed73a41d2e3c3c7e7528419aa99fa0004d4d3aab1692484021c3b577e1f9262ee9ef4d89664523d8335
-  data.tar.gz: 8be39618f74ccd85750569a74e18a0a384aa3a12c4c52a35315958d8d4ad4045abbe8bee83d510459e3fd6b29de68bab4120e3540cee0cadc9a024f1b2389ffb
+  metadata.gz: 5e668b431f55f9239ad3ae06cdc020098ff4a8b68f7934283d6f77a3969a014aa17f68df12b23013d1681e79fdecd8f4c8e4da105e9430a16d2ab8075bbcca7b
+  data.tar.gz: 75eceac300152cfa8d0ce736b16939e779f127dc844ea0c8ce8e1d0f363b04048107533f64c744420b2631fd3e8678d6812e15c6b770e2b37a598ae290af773a

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,8 @@
 ## [Unreleased]
+## [0.6.0] - 2022-06-26
+- Using variables elimination algorithm to build a distribution
 ## [0.5.0] - 2022-02-26
 - Constructing networks out of the `.BIF` ([Interchange Format for Bayesian Networks](https://www.cs.washington.edu/dm/vfml/appendixes/bif.htm)) files.
@@ -8,7 +11,7 @@
 ## [0.0.3] - 2021-12-29
-- Fixing terminoloty used in Factor class
+- Fixing terminology used in Factor class
 ## [0.0.2] - 2021-12-28

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    bayesnet (0.1.0)
+    bayesnet (0.6.0)
 GEM
   remote: https://rubygems.org/

data/lib/bayesnet/factor.rb CHANGED Viewed

@@ -1,29 +1,68 @@
 # frozen_string_literal: true
 module Bayesnet
-  # Factor if a function of sevaral variables (A, B, ...) each defined on values from finite set
+  # Factor if a function of several variables (A, B, ...), where
+  # every variable cold take values from some finite set
   class Factor
+    # +++ Factor DSL +++
+    #
+    # Factor DSL entry point:
     def self.build(&block)
       factor = new
       factor.instance_eval(&block)
       factor
     end
-    # Specifies variable name together with its values
-    def scope(var_name_to_values)
-      @scope.merge!(var_name_to_values)
+    # Factor DSL
+    # Defining variable with list of its possible values looks like:
+    # ```
+    # Bayesnet::Factor.build do
+    #   scope weather: %i[sunny cloudy]
+    #   scope mood: %i[bad good]
+    #   ...
+    # ```
+    # ^ this code defines to variables `weather` and `mood`, where
+    # `weather` could be :sunny or :cloudy, and
+    # `mood` could be :bad or :good
+    def scope(var_name_to_values = nil)
+      if var_name_to_values
+        @scope.merge!(var_name_to_values)
+      else
+        @scope
+      end
     end
-    # Specifies value for a scope context. Value is the last element in `context_and_val`
+    # Factor DSL
+    # Specifies factor value for some set of variable values, i.e.
+    # ```
+    # Bayesnet::Factor.build do
+    #   scope weather: %i[sunny cloudy]
+    #   scope mood: %i[bad good]
+    #   val :sunny, :bad, 0.1
+    #   ...
+    # ```
+    # ^ this code says the value of factor for [weather == :sunny, mood == :bad] is 0.1
     def val(*context_and_val)
       context_and_val = context_and_val[0] if context_and_val.size == 1 && context_and_val[0].is_a?(Array)
       @vals[context_and_val[0..-2]] = context_and_val[-1]
     end
+    # --- Factor DSL ---
+    # List of variable names
     def var_names
       @scope.keys
     end
+    # accessor factor value, i.e
+    # ```
+    # factor = Bayesnet::Factor.build do
+    #   scope weather: %i[sunny cloudy]
+    #   scope mood: %i[bad good]
+    #   val :sunny, :bad, 0.1
+    #   ...
+    # end
+    # factor[:sunny, :bad] # 0.1
+    # ```
     def [](*context)
       key = if context.size == 1 && context[0].is_a?(Hash)
               context[0].slice(*var_names).values
@@ -33,20 +72,19 @@ module Bayesnet
       @vals[key]
     end
-    def self.from_distribution(var_distribution)
-      self.class.new(var_distribution.keys, var_distribution.values.map(&:to_a))
-    end
+    # returns all combinations of values of `var_names`
     def contextes(*var_names)
       return [] if var_names.empty?
       @scope[var_names[0]].product(*var_names[1..].map { |var_name| @scope[var_name] })
     end
+    # returns all possible values
     def values
       @vals.values
     end
+    # returns new normalized factor, i.e. where sum of all values is 1.0
     def normalize
       vals = @vals.clone
       norm_factor = vals.map(&:last).sum * 1.0
@@ -54,26 +92,29 @@ module Bayesnet
       self.class.new(@scope.clone, vals)
     end
+    # Returns factor built as follows:
+    # 1. Original factor gets filtered out by variables having values compatible with `context`
+    # 2. Returned factor does not have any variables from `context` (because they have
+    # same values, after step 1)
+    # The `context` argument supposed to be an evidence, somewhat like
+    # `{weather: :sunny}`
     def reduce_to(context)
-      # TODO: use Hash#except when Ruby 2.6 support no longer needed
-      context_keys_set = context.keys.to_set
-      scope = @scope.reject { |k, _| context_keys_set.include?(k) }
+      limited_context = context.slice(*scope.keys)
+      return self.class.new(@scope, @vals) if limited_context.empty?
+      limited_scope = @scope.slice(*(@scope.keys - limited_context.keys))
-      context_vals = context.values
-      indices = context.keys.map { |k| index_by_var_name[k] }
+      context_vals = limited_context.values
+      indices = limited_context.keys.map { |k| index_by_var_name[k] }
       vals = @vals.select { |k, _v| indices.map { |i| k[i] } == context_vals }
       vals.transform_keys! { |k| delete_by_indices(k, indices) }
-      self.class.new(scope, vals)
-    end
-    def delete_by_indices(array, indices)
-      result = array.dup
-      indices.map { |i| result[i] = nil }
-      result.compact
+      self.class.new(limited_scope, vals)
     end
-    # groups by `var_names` having same context and sum out values.
+    # Returns new context defined over `var_names`, all other variables
+    # get eliminated. For every combination of `var_names`'s values
+    # the value of new factor is defined by summing up values in original factor
+    # having compatible value
     def marginalize(var_names)
       scope = @scope.slice(*var_names)
@@ -84,8 +125,52 @@ module Bayesnet
       self.class.new(scope, vals)
     end
+    def eliminate(var_name)
+      keep_var_names = var_names
+      keep_var_names.delete(var_name)
+      marginalize(keep_var_names)
+    end
+    def select(subcontext)
+      @vals.select do |context, _|
+        var_names.zip(context).slice(subcontext.keys) == subcontext
+      end
+    end
+    def *(other)
+      common_scope = @scope.keys & other.scope.keys
+      new_scope = scope.merge(other.scope)
+      new_vals = {}
+      group1 = group_by_scope_values(common_scope)
+      group2 = other.group_by_scope_values(common_scope)
+      group1.each do |scope, vals1|
+        combo = vals1.product(group2[scope])
+        combo.each do |(val1, val2)|
+          # values in scope must match variables order in new_scope, i.e.
+          # they must match `new_scope.var_names`
+          # The code bellow ensures it by merging two hashes in the same
+          # wasy as `new_scope`` is constructed above
+          val_by_name1 = var_names.zip(val1.first).to_h
+          val_by_name2 = other.var_names.zip(val2.first).to_h
+          new_vals[val_by_name1.merge(val_by_name2).values] = val1.last*val2.last
+        end
+      end
+      Factor.new(new_scope, new_vals)
+    end
+    def group_by_scope_values(scope_keys)
+      indices = scope_keys.map { |k| index_by_var_name[k] }
+      @vals.group_by { |context, _val| indices.map { |i| context[i] } }
+    end
     private
+    def delete_by_indices(array, indices)
+      result = array.dup
+      indices.map { |i| result[i] = nil }
+      result.compact
+    end
     def initialize(scope = {}, vals = {})
       @scope = scope
       @vals = vals

data/lib/bayesnet/graph.rb CHANGED Viewed

@@ -5,16 +5,15 @@ require "bayesnet/node"
 module Bayesnet
   # Acyclic graph
   class Graph
+    include Bayesnet::Logging
     attr_reader :nodes
     def initialize
       @nodes = {}
     end
-    def var_names
-      nodes.keys
-    end
+    # +++ Graph DSL +++
     def node(name, parents: [], &block)
       raise Error, "DSL error, #node requires a &block" unless block
@@ -22,21 +21,100 @@ module Bayesnet
       node.instance_eval(&block)
       @nodes[name] = node
     end
+    # --- Graph DSL ---
-    def resolve_factors
-      @nodes.values.each do |node|
-        node.resolve_factor(@nodes.slice(*node.parent_nodes))
+    # returns names of all nodes
+    def var_names
+      nodes.keys
+    end
+    # returns normalized distribution reduced to `evidence`
+    # and marginalized over `over`
+    def distribution(over: [], evidence: {}, algorithm: :variables_elimination)
+      case algorithm
+      when :brute_force
+        joint_distribution
+          .reduce_to(evidence)
+          .marginalize(over)
+          .normalize
+      when :variables_elimination
+        reduced_factors = nodes.values.map(&:factor).map { |f| f.reduce_to(evidence) }
+        not_include_in_order = evidence.keys.to_set + over.to_set
+        variables_order = elimination_order.reject { |v| not_include_in_order.include?(v) }
+        distribution = eliminate_variables(variables_order, reduced_factors)
+        distribution.normalize
+      else
+        raise "Uknown algorithm #{algorithm}"
       end
     end
-    def distribution(over: [], evidence: {})
-      joint_distribution
-        .reduce_to(evidence)
-        .marginalize(over)
-        .normalize
+    def elimination_order
+      return @order if @order
+      @order = []
+      edges = Set.new
+      @nodes.each do |name, node|
+        parents = node.parent_nodes.keys
+        parents.each { |p| edges.add([name, p].to_set) }
+        parents.combination(2) { |p1, p2| edges.add([p1, p2].to_set) }
+      end
+      # edges now are moralized graph of `self`, just represented differently as
+      # set of edges
+      remaining_nodes = nodes.keys.to_set
+      until remaining_nodes.empty?
+        best_node = find_min_neighbor(remaining_nodes, edges)
+        remaining_nodes.delete(best_node)
+        @order.push(best_node)
+        clique = edges.select { |e| e.include?(best_node) }
+        edges -= clique
+        if edges.empty? #i.e. clique is the last edge
+          @order += remaining_nodes.to_a
+          remaining_nodes = Set.new
+        end
+        clique.
+          map { |e| e.delete(best_node) }.
+          map(&:first).
+          combination(2) { |p1, p2| edges.add([p1,p2].to_set) }
+      end
+      @order
+    end
+    def find_min_neighbor(remaining_nodes, edges)
+      result = nil
+      min_neighbors = nil
+      remaining_nodes.each do |name, _|
+        neighbors = edges.count { |e| e.include?(name) }
+        if min_neighbors.nil? || neighbors < min_neighbors
+          min_neighbors = neighbors
+          result = name
+        end
+      end
+      result
+    end
+    def eliminate_variables(variables_order, factors)
+      logger.debug "Eliminating variables #{variables_order} from #{factors.size} factors #{factors.map(&:var_names)}"
+      remaining_factors = factors.to_set
+      variables_order.each do |var_name|
+        logger.debug "Eliminating '#{var_name}'..."
+        grouped_factors = remaining_factors.select { |f| f.var_names.include?(var_name) }
+        remaining_factors -= grouped_factors
+        logger.debug "Building new factor out of #{grouped_factors.size} factors having '#{var_name}' - #{grouped_factors.map(&:var_names)}"
+        product_factor = grouped_factors.reduce(&:*)
+        logger.debug "Removing variable from new factor"
+        new_factor = product_factor.eliminate(var_name)
+        logger.debug "New factor variables are #{new_factor.var_names}"
+        remaining_factors.add(new_factor)
+        logger.debug "The variable '#{var_name}' is elminated"
+      end
+      logger.debug "Non-eliminated variables are #{remaining_factors.map(&:var_names).flatten.uniq}"
+      result = remaining_factors.reduce(&:*)
+      logger.debug "Eliminating is done"
+      result
     end
     # This is MAP query, i.e. Maximum a Posteriory
+    # returns value of `var_name` having maximum likelihood, when `evidence` is observed
     def most_likely_value(var_name, evidence:)
       posterior_distribution = distribution(over: [var_name], evidence: evidence)
       mode = posterior_distribution.contextes(var_name).zip(posterior_distribution.values).max_by(&:last)
@@ -49,6 +127,7 @@ module Bayesnet
       posterior_distribution[*over_vars.values]
     end
+    # Essentially it builds product of all node's factors
     def joint_distribution
       return @joint_distribution if @joint_distribution
@@ -75,5 +154,11 @@ module Bayesnet
     def parameters
       nodes.values.map(&:parameters).sum
     end
+    def resolve_factors
+      @nodes.values.each do |node|
+        node.resolve_factor(@nodes.slice(*node.parent_nodes))
+      end
+    end
   end
 end

data/lib/bayesnet/logging.rb ADDED Viewed

@@ -0,0 +1,13 @@
+# lib/logging.rb
+module Bayesnet
+  def self.logger
+    @logger ||= Logger.new(STDOUT).tap { |l| l.level = :debug }
+  end
+  module Logging
+    def logger
+      Bayesnet.logger
+    end
+  end
+end

data/lib/bayesnet/node.rb CHANGED Viewed

@@ -6,8 +6,10 @@ module Bayesnet
       @name = name
       @parent_nodes = parent_nodes
       @values = []
+      @factor = Factor.new
     end
+    # +++ Node DSL +++
     def values(hash_or_array = nil, &block)
       case hash_or_array
       when NilClass
@@ -28,6 +30,21 @@ module Bayesnet
       end
     end
+    def distributions(&block)
+      instance_eval(&block)
+    end
+    # --- Node DSL ---
+    def parameters
+      (values.size - 1) * parent_nodes.values.reduce(1) { |mul, n| mul * n.values.size }
+    end
+    def as(distribution, given:)
+      @values.zip(distribution).each do |value, probability|
+        @factor.val [value] + given + [probability]
+      end
+    end
     def resolve_factor(parent_nodes)
       @parent_nodes = parent_nodes
       if @factor.is_a?(Proc)
@@ -43,18 +60,5 @@ module Bayesnet
       end
     end
-    def distributions(&block)
-      instance_eval(&block)
-    end
-    def parameters
-      (values.size - 1) * parent_nodes.values.reduce(1) { |mul, n| mul * n.values.size }
-    end
-    def as(distribution, given:)
-      @values.zip(distribution).each do |value, probability|
-        @factor.val [value] + given + [probability]
-      end
-    end
   end
 end

data/lib/bayesnet/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Bayesnet
-  VERSION = "0.1.0"
+  VERSION = "0.6.0"
 end

data/lib/bayesnet.rb CHANGED Viewed

@@ -1,8 +1,10 @@
 # frozen_string_literal: true
 require "set"
+require "logger"
 # net
+require_relative "bayesnet/logging"
 require_relative "bayesnet/dsl"
 require_relative "bayesnet/error"
 require_relative "bayesnet/factor"

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: bayesnet
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.6.0
 platform: ruby
 authors:
 - Aleksandr Furmanov
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2022-02-26 00:00:00.000000000 Z
+date: 2022-06-26 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: m
@@ -77,6 +77,7 @@ files:
 - lib/bayesnet/error.rb
 - lib/bayesnet/factor.rb
 - lib/bayesnet/graph.rb
+- lib/bayesnet/logging.rb
 - lib/bayesnet/node.rb
 - lib/bayesnet/parsers/bif.rb
 - lib/bayesnet/parsers/bif.treetop