fathom 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.bundle/config +2 -0
- data/.document +5 -0
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +30 -0
- data/LICENSE +20 -0
- data/README.md +176 -0
- data/Rakefile +50 -0
- data/VERSION +1 -0
- data/autotest/discover.rb +1 -0
- data/lib/fathom.rb +68 -0
- data/lib/fathom/archive/conditional_probability_matrix.rb +116 -0
- data/lib/fathom/archive/n2.rb +198 -0
- data/lib/fathom/archive/n3.rb +119 -0
- data/lib/fathom/archive/node.rb +74 -0
- data/lib/fathom/archive/noodle.rb +136 -0
- data/lib/fathom/archive/scratch.rb +45 -0
- data/lib/fathom/basic_node.rb +8 -0
- data/lib/fathom/causal_graph.rb +12 -0
- data/lib/fathom/combined_plausibilities.rb +12 -0
- data/lib/fathom/concept.rb +83 -0
- data/lib/fathom/data_node.rb +51 -0
- data/lib/fathom/import.rb +68 -0
- data/lib/fathom/import/csv_import.rb +60 -0
- data/lib/fathom/import/yaml_import.rb +53 -0
- data/lib/fathom/inverter.rb +21 -0
- data/lib/fathom/knowledge_base.rb +23 -0
- data/lib/fathom/monte_carlo_set.rb +76 -0
- data/lib/fathom/node_utilities.rb +8 -0
- data/lib/fathom/plausible_range.rb +82 -0
- data/lib/fathom/value_aggregator.rb +11 -0
- data/lib/fathom/value_description.rb +79 -0
- data/lib/fathom/value_multiplier.rb +18 -0
- data/lib/options_hash.rb +186 -0
- data/spec/fathom/data_node_spec.rb +61 -0
- data/spec/fathom/import/csv_import_spec.rb +36 -0
- data/spec/fathom/import/yaml_import_spec.rb +40 -0
- data/spec/fathom/import_spec.rb +22 -0
- data/spec/fathom/knowledge_base_spec.rb +16 -0
- data/spec/fathom/monte_carlo_set_spec.rb +58 -0
- data/spec/fathom/plausible_range_spec.rb +130 -0
- data/spec/fathom/value_description_spec.rb +70 -0
- data/spec/fathom_spec.rb +8 -0
- data/spec/spec_helper.rb +13 -0
- data/spec/support/demo.yml +17 -0
- metadata +135 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'node'
|
2
|
+
require 'conditional_probability_matrix'
|
3
|
+
|
4
|
+
class A
|
5
|
+
class << self
|
6
|
+
# def will_pay
|
7
|
+
# @will_pay ||= Node.new :true, :false
|
8
|
+
# end
|
9
|
+
#
|
10
|
+
# def has_money
|
11
|
+
# @has_money ||= Node.new :plenty => 0.1, :some => 0.8, :little => 0.1
|
12
|
+
# end
|
13
|
+
#
|
14
|
+
# def ones
|
15
|
+
# Matrix.ones(will_pay.values.length, has_money.values.length)
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
# def parents
|
19
|
+
# will_pay.values.col * has_money.values
|
20
|
+
# # will_pay.each do |value|
|
21
|
+
# #
|
22
|
+
# # end
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# def cpm
|
26
|
+
# ConditionalProbabilityMatrix.new(will_pay, has_money)
|
27
|
+
# end
|
28
|
+
|
29
|
+
def killer_identity
|
30
|
+
@killer_identity ||= Node.new(:killer_identity, :jack => 0.8, :joe => 0.1, :jeff => 0.1)
|
31
|
+
end
|
32
|
+
alias :k :killer_identity
|
33
|
+
alias :x :killer_identity
|
34
|
+
|
35
|
+
def fingerprint_information
|
36
|
+
@fingerprint_information ||= Node.new(:fingerprint_information, :jack => 2/3.0, :joe => 1/6.0, :jeff => 1/6.0)
|
37
|
+
end
|
38
|
+
alias :f :fingerprint_information
|
39
|
+
alias :y :fingerprint_information
|
40
|
+
|
41
|
+
def cpm
|
42
|
+
@cpm ||= ConditionalProbabilityMatrix.new(x, y)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
2
|
+
module Fathom
|
3
|
+
class CausalGraph
|
4
|
+
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
if __FILE__ == $0
|
9
|
+
include Fathom
|
10
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
11
|
+
# CausalGraph.new
|
12
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
2
|
+
module Fathom
|
3
|
+
class CombinedPlausibilities
|
4
|
+
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
if __FILE__ == $0
|
9
|
+
include Fathom
|
10
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
11
|
+
# CombinedPlausibilities.new
|
12
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
=begin
|
2
|
+
This is a first approach to an RDF back end for a broadly-defined data store.
|
3
|
+
|
4
|
+
I am borrowing from the SKOS ontology here to be able to define any sort of concept
|
5
|
+
that may assist me with my decision-making work.
|
6
|
+
|
7
|
+
TODO:
|
8
|
+
|
9
|
+
[x] Build a basic Spira modeal
|
10
|
+
[x] Make a SKOS commitment
|
11
|
+
[.] Create helper methods to find or create the concept easily (using hash syntax for field names)
|
12
|
+
[] Create association methods for associating the concept to other concepts (need to think about this one)
|
13
|
+
[] Create specific methods to define a plausible range (probably define a Spira model here too)
|
14
|
+
[] Create specific methods to define a ValueDescription
|
15
|
+
[] Create specific methods to define a MonteCarloSet
|
16
|
+
[] Create specific methods to define a CausalGraph
|
17
|
+
[] Create specific methods to define a DependencyGraph
|
18
|
+
[] Create specific methods to define the value of further measurement (another un-named class)
|
19
|
+
|
20
|
+
=end
|
21
|
+
|
22
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
23
|
+
require 'rdf'
|
24
|
+
require 'rdf/ntriples'
|
25
|
+
require 'data_objects'
|
26
|
+
require 'do_sqlite3'
|
27
|
+
require 'rdf/do'
|
28
|
+
require 'spira'
|
29
|
+
|
30
|
+
|
31
|
+
module Fathom
|
32
|
+
|
33
|
+
# Go ahead and create a generic repo for Fathom
|
34
|
+
def repo
|
35
|
+
@repo ||= RDF::DataObjects::Repository.new('sqlite3:/tmp/test.db')
|
36
|
+
end
|
37
|
+
|
38
|
+
Spira.add_repository(:default, repo)
|
39
|
+
|
40
|
+
class Concept
|
41
|
+
|
42
|
+
include Spira::Resource
|
43
|
+
include RDF
|
44
|
+
|
45
|
+
class << self
|
46
|
+
def find_or_build(name, description=nil)
|
47
|
+
concept = Concept.for(concept_name(name))
|
48
|
+
return concept if concept.exist?
|
49
|
+
concept.name = name
|
50
|
+
concept.description = description
|
51
|
+
concept
|
52
|
+
end
|
53
|
+
|
54
|
+
def find_or_create(name, description=nil)
|
55
|
+
concept = Concept.for(concept_name(name))
|
56
|
+
return concept if concept.exist?
|
57
|
+
concept.name = name
|
58
|
+
concept.description = description
|
59
|
+
concept.save!
|
60
|
+
concept
|
61
|
+
end
|
62
|
+
|
63
|
+
protected
|
64
|
+
def concept_name(name)
|
65
|
+
concept_name = name.downcase.gsub(/\s+/, '_')
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
base_uri "http://example.org/example/concepts"
|
70
|
+
|
71
|
+
property :name, :predicate => SKOS.prefLabel
|
72
|
+
property :description, :predicate => SKOS.definition
|
73
|
+
property :scope, :predicate => SKOS.scopeNote
|
74
|
+
|
75
|
+
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
if __FILE__ == $0
|
80
|
+
include Fathom
|
81
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
82
|
+
# Concept.new
|
83
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
2
|
+
|
3
|
+
=begin
|
4
|
+
A DataNode is a node generated from data itself. It stores the data and reveals some statistical
|
5
|
+
measurements for the data. It expects an array or vector of values and generates a vector on demans.
|
6
|
+
=end
|
7
|
+
class Fathom::DataNode
|
8
|
+
|
9
|
+
include NodeUtilities
|
10
|
+
|
11
|
+
attr_reader :values, :name, :distribution, :confidence_interval
|
12
|
+
|
13
|
+
def initialize(opts={})
|
14
|
+
@values = opts[:values]
|
15
|
+
raise ArgumentError, "Must provided values: DataNode.new(:values => [...])" unless self.values
|
16
|
+
@name = opts[:name]
|
17
|
+
@distribution = opts[:distribution]
|
18
|
+
end
|
19
|
+
|
20
|
+
alias :ci :confidence_interval
|
21
|
+
|
22
|
+
def vector
|
23
|
+
@vector ||= GSL::Vector.ary_to_gv(self.values)
|
24
|
+
end
|
25
|
+
|
26
|
+
def standard_deviation
|
27
|
+
@standard_deviation ||= vector.sd
|
28
|
+
end
|
29
|
+
alias :sd :standard_deviation
|
30
|
+
alias :std :standard_deviation
|
31
|
+
|
32
|
+
def mean
|
33
|
+
@mean ||= vector.mean
|
34
|
+
end
|
35
|
+
|
36
|
+
def rand
|
37
|
+
rng.gaussian(std) + mean
|
38
|
+
end
|
39
|
+
|
40
|
+
protected
|
41
|
+
def rng
|
42
|
+
@rng ||= GSL::Rng.alloc
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
if __FILE__ == $0
|
48
|
+
include Fathom
|
49
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
50
|
+
# DataNode.new
|
51
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
2
|
+
|
3
|
+
=begin
|
4
|
+
|
5
|
+
The import scripts are meant to make it easier to build up a knowledge base from
|
6
|
+
real data. There are a lot of ways we could get our data: YAML files, spreadsheets,
|
7
|
+
web crawlers, relational databases, RDF data. The Import class creates a regular
|
8
|
+
way to create nodes in the knowledge base. The following example would be a
|
9
|
+
simple way to import some data about weekend trips:
|
10
|
+
|
11
|
+
class WeekendPlanningImport < Import
|
12
|
+
def import_plausible_data
|
13
|
+
values = [
|
14
|
+
{:name => 'Chance of Rain', :min => 0.2, :max => 0.3, :confidence_interval => 0.8},
|
15
|
+
{:name => 'Would Go to the Beach Despite the Rain', :min => 0, :max => 0.2},
|
16
|
+
{:name => 'Would Go to the Movies if Something Good Was Playing', :min => 0.8, :max => 1.0},
|
17
|
+
{:name => 'Would Go to the Movies Despite the Rain', :min => 0.9, :max => 1.0}
|
18
|
+
]
|
19
|
+
return [PlausibleRange, values]
|
20
|
+
end
|
21
|
+
|
22
|
+
def import_destination_data
|
23
|
+
value_hash = FasterCSV...
|
24
|
+
return [DataNode, value_hash]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
Each import method is starts with 'import_' and returns an array of [NodeClass, values_hash].
|
29
|
+
The Import class will then create a series of nodes and insert them into the active knowledge
|
30
|
+
base.
|
31
|
+
|
32
|
+
There are a number of useful Import classes that make most data imports fairly straight forward.
|
33
|
+
This way data from spreadsheets or YAML files can easily be added to the knowledge base.
|
34
|
+
|
35
|
+
=end
|
36
|
+
class Fathom::Import
|
37
|
+
|
38
|
+
attr_reader :content, :options
|
39
|
+
|
40
|
+
def initialize(opts={})
|
41
|
+
@options = OptionsHash.new(opts)
|
42
|
+
@content = @options[:content]
|
43
|
+
end
|
44
|
+
|
45
|
+
def import
|
46
|
+
results = []
|
47
|
+
import_methods.each do |method|
|
48
|
+
klass, initialization_data = self.send(method.to_sym)
|
49
|
+
initialization_data.each do |values|
|
50
|
+
results << extract_nodes(klass, values)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
results
|
54
|
+
end
|
55
|
+
|
56
|
+
protected
|
57
|
+
|
58
|
+
def extract_nodes(klass, values)
|
59
|
+
node = klass.new(values)
|
60
|
+
Fathom.knowledge_base[node.name] = node
|
61
|
+
node
|
62
|
+
end
|
63
|
+
|
64
|
+
def import_methods
|
65
|
+
(self.methods - self.class.superclass.instance_methods).map {|m| m if m =~ /import_\w+/}.compact
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
|
2
|
+
require 'import'
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
# TODO: Work this out with Ruby 1.9. We don't need to load this for all Rubies.
|
6
|
+
require 'fastercsv'
|
7
|
+
|
8
|
+
module Fathom
|
9
|
+
class CSVImport < Import
|
10
|
+
|
11
|
+
# @content could be a filename, URI, or actual file contents. We figure out which
|
12
|
+
# it is and then parse the contents with FasterCSV. We assume that there are column
|
13
|
+
# headers that are the names of each node, and that the values in the node are
|
14
|
+
# values for a DataNode.
|
15
|
+
def import_csv
|
16
|
+
parsed = parse_contents
|
17
|
+
extracted = extract_columns(parsed)
|
18
|
+
[DataNode, extracted]
|
19
|
+
end
|
20
|
+
|
21
|
+
# These are the options we use to parse the file. They can be overriden
|
22
|
+
# by calling CSVImport.new(:parse_options => {...}, ...)
|
23
|
+
def parse_options
|
24
|
+
return @parse_options if @parse_options
|
25
|
+
@parse_options = @options[:parse_options]
|
26
|
+
@parse_options ||= {:converters => [:all], :headers => true, :skip_blanks => true}
|
27
|
+
@parse_options
|
28
|
+
end
|
29
|
+
|
30
|
+
protected
|
31
|
+
def parse_contents
|
32
|
+
arr_of_arrs = FasterCSV.parse(get_contents, parse_options)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Tries to read a file or URL. That failing, assumes the contents are CSV contents.
|
36
|
+
def get_contents
|
37
|
+
begin
|
38
|
+
content = open(@content).read
|
39
|
+
rescue
|
40
|
+
content = @content
|
41
|
+
end
|
42
|
+
return content
|
43
|
+
end
|
44
|
+
|
45
|
+
# Returns an array of hashes with :name => row header, :values => values
|
46
|
+
def extract_columns(parsed)
|
47
|
+
transposed = parsed.to_a.transpose
|
48
|
+
transposed.inject([]) do |list, column|
|
49
|
+
list << {:name => column.shift, :values => column}
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
if __FILE__ == $0
|
57
|
+
include Fathom
|
58
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
59
|
+
# CSV.new
|
60
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
class Fathom::YAMLImport < Import
|
5
|
+
def import_plausible_ranges
|
6
|
+
assert_yaml_content
|
7
|
+
plausible_ranges = extract_plausible_ranges
|
8
|
+
[PlausibleRange, plausible_ranges]
|
9
|
+
end
|
10
|
+
|
11
|
+
def import_data_nodes
|
12
|
+
assert_yaml_content
|
13
|
+
data_nodes = extract_data_nodes
|
14
|
+
[DataNode, data_nodes]
|
15
|
+
end
|
16
|
+
|
17
|
+
protected
|
18
|
+
def assert_yaml_content
|
19
|
+
return @yaml_content if @yaml_content
|
20
|
+
begin
|
21
|
+
file_contents = open(self.content).read
|
22
|
+
raise ArgumentError, "Unable to extract YAML data out of the contents." unless file_contents
|
23
|
+
rescue
|
24
|
+
file_contents = self.content
|
25
|
+
end
|
26
|
+
@yaml_content = YAML.load(file_contents)
|
27
|
+
end
|
28
|
+
|
29
|
+
def extract_plausible_ranges
|
30
|
+
@yaml_content.inject([]) do |list, array|
|
31
|
+
name, value = array.first, array.last
|
32
|
+
if value.is_a?(Hash)
|
33
|
+
value = OptionsHash.new(value)
|
34
|
+
list << value.merge(:name => name) if value[:min] and value[:max]
|
35
|
+
end
|
36
|
+
list
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def extract_data_nodes
|
41
|
+
@yaml_content.inject([]) do |list, array|
|
42
|
+
name, value = array.first, array.last
|
43
|
+
list << {:name => name, :values => value} if value.is_a?(Array)
|
44
|
+
list
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
if __FILE__ == $0
|
50
|
+
include Fathom
|
51
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
52
|
+
# YAMLImport.new
|
53
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
2
|
+
module Fathom
|
3
|
+
class Invertor
|
4
|
+
include BasicNode
|
5
|
+
|
6
|
+
def initialize(opts={})
|
7
|
+
super(opts)
|
8
|
+
@name ||= "Inverter"
|
9
|
+
end
|
10
|
+
|
11
|
+
def value
|
12
|
+
-1
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
if __FILE__ == $0
|
18
|
+
include Fathom
|
19
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
20
|
+
# Invertor.new
|
21
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
2
|
+
class Fathom::KnowledgeBase
|
3
|
+
|
4
|
+
def initialize(opts={})
|
5
|
+
opts = OptionsHash.new(opts)
|
6
|
+
@data_store = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def []=(key, value)
|
10
|
+
@data_store[key] = value
|
11
|
+
end
|
12
|
+
|
13
|
+
def [](key)
|
14
|
+
@data_store[key]
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
if __FILE__ == $0
|
20
|
+
include Fathom
|
21
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
22
|
+
# KnowledgeBase.new
|
23
|
+
end
|