fathom 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.bundle/config +2 -0
- data/.document +5 -0
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +30 -0
- data/LICENSE +20 -0
- data/README.md +176 -0
- data/Rakefile +50 -0
- data/VERSION +1 -0
- data/autotest/discover.rb +1 -0
- data/lib/fathom.rb +68 -0
- data/lib/fathom/archive/conditional_probability_matrix.rb +116 -0
- data/lib/fathom/archive/n2.rb +198 -0
- data/lib/fathom/archive/n3.rb +119 -0
- data/lib/fathom/archive/node.rb +74 -0
- data/lib/fathom/archive/noodle.rb +136 -0
- data/lib/fathom/archive/scratch.rb +45 -0
- data/lib/fathom/basic_node.rb +8 -0
- data/lib/fathom/causal_graph.rb +12 -0
- data/lib/fathom/combined_plausibilities.rb +12 -0
- data/lib/fathom/concept.rb +83 -0
- data/lib/fathom/data_node.rb +51 -0
- data/lib/fathom/import.rb +68 -0
- data/lib/fathom/import/csv_import.rb +60 -0
- data/lib/fathom/import/yaml_import.rb +53 -0
- data/lib/fathom/inverter.rb +21 -0
- data/lib/fathom/knowledge_base.rb +23 -0
- data/lib/fathom/monte_carlo_set.rb +76 -0
- data/lib/fathom/node_utilities.rb +8 -0
- data/lib/fathom/plausible_range.rb +82 -0
- data/lib/fathom/value_aggregator.rb +11 -0
- data/lib/fathom/value_description.rb +79 -0
- data/lib/fathom/value_multiplier.rb +18 -0
- data/lib/options_hash.rb +186 -0
- data/spec/fathom/data_node_spec.rb +61 -0
- data/spec/fathom/import/csv_import_spec.rb +36 -0
- data/spec/fathom/import/yaml_import_spec.rb +40 -0
- data/spec/fathom/import_spec.rb +22 -0
- data/spec/fathom/knowledge_base_spec.rb +16 -0
- data/spec/fathom/monte_carlo_set_spec.rb +58 -0
- data/spec/fathom/plausible_range_spec.rb +130 -0
- data/spec/fathom/value_description_spec.rb +70 -0
- data/spec/fathom_spec.rb +8 -0
- data/spec/spec_helper.rb +13 -0
- data/spec/support/demo.yml +17 -0
- metadata +135 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'node'
|
2
|
+
require 'conditional_probability_matrix'
|
3
|
+
|
4
|
+
class A
|
5
|
+
class << self
|
6
|
+
# def will_pay
|
7
|
+
# @will_pay ||= Node.new :true, :false
|
8
|
+
# end
|
9
|
+
#
|
10
|
+
# def has_money
|
11
|
+
# @has_money ||= Node.new :plenty => 0.1, :some => 0.8, :little => 0.1
|
12
|
+
# end
|
13
|
+
#
|
14
|
+
# def ones
|
15
|
+
# Matrix.ones(will_pay.values.length, has_money.values.length)
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
# def parents
|
19
|
+
# will_pay.values.col * has_money.values
|
20
|
+
# # will_pay.each do |value|
|
21
|
+
# #
|
22
|
+
# # end
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# def cpm
|
26
|
+
# ConditionalProbabilityMatrix.new(will_pay, has_money)
|
27
|
+
# end
|
28
|
+
|
29
|
+
def killer_identity
|
30
|
+
@killer_identity ||= Node.new(:killer_identity, :jack => 0.8, :joe => 0.1, :jeff => 0.1)
|
31
|
+
end
|
32
|
+
alias :k :killer_identity
|
33
|
+
alias :x :killer_identity
|
34
|
+
|
35
|
+
def fingerprint_information
|
36
|
+
@fingerprint_information ||= Node.new(:fingerprint_information, :jack => 2/3.0, :joe => 1/6.0, :jeff => 1/6.0)
|
37
|
+
end
|
38
|
+
alias :f :fingerprint_information
|
39
|
+
alias :y :fingerprint_information
|
40
|
+
|
41
|
+
def cpm
|
42
|
+
@cpm ||= ConditionalProbabilityMatrix.new(x, y)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
2
|
+
module Fathom
|
3
|
+
class CausalGraph
|
4
|
+
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
if __FILE__ == $0
|
9
|
+
include Fathom
|
10
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
11
|
+
# CausalGraph.new
|
12
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
2
|
+
module Fathom
|
3
|
+
class CombinedPlausibilities
|
4
|
+
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
if __FILE__ == $0
|
9
|
+
include Fathom
|
10
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
11
|
+
# CombinedPlausibilities.new
|
12
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
=begin
|
2
|
+
This is a first approach to an RDF back end for a broadly-defined data store.
|
3
|
+
|
4
|
+
I am borrowing from the SKOS ontology here to be able to define any sort of concept
|
5
|
+
that may assist me with my decision-making work.
|
6
|
+
|
7
|
+
TODO:
|
8
|
+
|
9
|
+
[x] Build a basic Spira modeal
|
10
|
+
[x] Make a SKOS commitment
|
11
|
+
[.] Create helper methods to find or create the concept easily (using hash syntax for field names)
|
12
|
+
[] Create association methods for associating the concept to other concepts (need to think about this one)
|
13
|
+
[] Create specific methods to define a plausible range (probably define a Spira model here too)
|
14
|
+
[] Create specific methods to define a ValueDescription
|
15
|
+
[] Create specific methods to define a MonteCarloSet
|
16
|
+
[] Create specific methods to define a CausalGraph
|
17
|
+
[] Create specific methods to define a DependencyGraph
|
18
|
+
[] Create specific methods to define the value of further measurement (another un-named class)
|
19
|
+
|
20
|
+
=end
|
21
|
+
|
22
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
23
|
+
require 'rdf'
|
24
|
+
require 'rdf/ntriples'
|
25
|
+
require 'data_objects'
|
26
|
+
require 'do_sqlite3'
|
27
|
+
require 'rdf/do'
|
28
|
+
require 'spira'
|
29
|
+
|
30
|
+
|
31
|
+
module Fathom
|
32
|
+
|
33
|
+
# Go ahead and create a generic repo for Fathom
|
34
|
+
def repo
|
35
|
+
@repo ||= RDF::DataObjects::Repository.new('sqlite3:/tmp/test.db')
|
36
|
+
end
|
37
|
+
|
38
|
+
Spira.add_repository(:default, repo)
|
39
|
+
|
40
|
+
class Concept
|
41
|
+
|
42
|
+
include Spira::Resource
|
43
|
+
include RDF
|
44
|
+
|
45
|
+
class << self
|
46
|
+
def find_or_build(name, description=nil)
|
47
|
+
concept = Concept.for(concept_name(name))
|
48
|
+
return concept if concept.exist?
|
49
|
+
concept.name = name
|
50
|
+
concept.description = description
|
51
|
+
concept
|
52
|
+
end
|
53
|
+
|
54
|
+
def find_or_create(name, description=nil)
|
55
|
+
concept = Concept.for(concept_name(name))
|
56
|
+
return concept if concept.exist?
|
57
|
+
concept.name = name
|
58
|
+
concept.description = description
|
59
|
+
concept.save!
|
60
|
+
concept
|
61
|
+
end
|
62
|
+
|
63
|
+
protected
|
64
|
+
def concept_name(name)
|
65
|
+
concept_name = name.downcase.gsub(/\s+/, '_')
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
base_uri "http://example.org/example/concepts"
|
70
|
+
|
71
|
+
property :name, :predicate => SKOS.prefLabel
|
72
|
+
property :description, :predicate => SKOS.definition
|
73
|
+
property :scope, :predicate => SKOS.scopeNote
|
74
|
+
|
75
|
+
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
if __FILE__ == $0
|
80
|
+
include Fathom
|
81
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
82
|
+
# Concept.new
|
83
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
2
|
+
|
3
|
+
=begin
|
4
|
+
A DataNode is a node generated from data itself. It stores the data and reveals some statistical
|
5
|
+
measurements for the data. It expects an array or vector of values and generates a vector on demans.
|
6
|
+
=end
|
7
|
+
class Fathom::DataNode
|
8
|
+
|
9
|
+
include NodeUtilities
|
10
|
+
|
11
|
+
attr_reader :values, :name, :distribution, :confidence_interval
|
12
|
+
|
13
|
+
def initialize(opts={})
|
14
|
+
@values = opts[:values]
|
15
|
+
raise ArgumentError, "Must provided values: DataNode.new(:values => [...])" unless self.values
|
16
|
+
@name = opts[:name]
|
17
|
+
@distribution = opts[:distribution]
|
18
|
+
end
|
19
|
+
|
20
|
+
alias :ci :confidence_interval
|
21
|
+
|
22
|
+
def vector
|
23
|
+
@vector ||= GSL::Vector.ary_to_gv(self.values)
|
24
|
+
end
|
25
|
+
|
26
|
+
def standard_deviation
|
27
|
+
@standard_deviation ||= vector.sd
|
28
|
+
end
|
29
|
+
alias :sd :standard_deviation
|
30
|
+
alias :std :standard_deviation
|
31
|
+
|
32
|
+
def mean
|
33
|
+
@mean ||= vector.mean
|
34
|
+
end
|
35
|
+
|
36
|
+
def rand
|
37
|
+
rng.gaussian(std) + mean
|
38
|
+
end
|
39
|
+
|
40
|
+
protected
|
41
|
+
def rng
|
42
|
+
@rng ||= GSL::Rng.alloc
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
if __FILE__ == $0
|
48
|
+
include Fathom
|
49
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
50
|
+
# DataNode.new
|
51
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
2
|
+
|
3
|
+
=begin
|
4
|
+
|
5
|
+
The import scripts are meant to make it easier to build up a knowledge base from
|
6
|
+
real data. There are a lot of ways we could get our data: YAML files, spreadsheets,
|
7
|
+
web crawlers, relational databases, RDF data. The Import class creates a regular
|
8
|
+
way to create nodes in the knowledge base. The following example would be a
|
9
|
+
simple way to import some data about weekend trips:
|
10
|
+
|
11
|
+
class WeekendPlanningImport < Import
|
12
|
+
def import_plausible_data
|
13
|
+
values = [
|
14
|
+
{:name => 'Chance of Rain', :min => 0.2, :max => 0.3, :confidence_interval => 0.8},
|
15
|
+
{:name => 'Would Go to the Beach Despite the Rain', :min => 0, :max => 0.2},
|
16
|
+
{:name => 'Would Go to the Movies if Something Good Was Playing', :min => 0.8, :max => 1.0},
|
17
|
+
{:name => 'Would Go to the Movies Despite the Rain', :min => 0.9, :max => 1.0}
|
18
|
+
]
|
19
|
+
return [PlausibleRange, values]
|
20
|
+
end
|
21
|
+
|
22
|
+
def import_destination_data
|
23
|
+
value_hash = FasterCSV...
|
24
|
+
return [DataNode, value_hash]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
Each import method is starts with 'import_' and returns an array of [NodeClass, values_hash].
|
29
|
+
The Import class will then create a series of nodes and insert them into the active knowledge
|
30
|
+
base.
|
31
|
+
|
32
|
+
There are a number of useful Import classes that make most data imports fairly straight forward.
|
33
|
+
This way data from spreadsheets or YAML files can easily be added to the knowledge base.
|
34
|
+
|
35
|
+
=end
|
36
|
+
class Fathom::Import
|
37
|
+
|
38
|
+
attr_reader :content, :options
|
39
|
+
|
40
|
+
def initialize(opts={})
|
41
|
+
@options = OptionsHash.new(opts)
|
42
|
+
@content = @options[:content]
|
43
|
+
end
|
44
|
+
|
45
|
+
def import
|
46
|
+
results = []
|
47
|
+
import_methods.each do |method|
|
48
|
+
klass, initialization_data = self.send(method.to_sym)
|
49
|
+
initialization_data.each do |values|
|
50
|
+
results << extract_nodes(klass, values)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
results
|
54
|
+
end
|
55
|
+
|
56
|
+
protected
|
57
|
+
|
58
|
+
def extract_nodes(klass, values)
|
59
|
+
node = klass.new(values)
|
60
|
+
Fathom.knowledge_base[node.name] = node
|
61
|
+
node
|
62
|
+
end
|
63
|
+
|
64
|
+
def import_methods
|
65
|
+
(self.methods - self.class.superclass.instance_methods).map {|m| m if m =~ /import_\w+/}.compact
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
|
2
|
+
require 'import'
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
# TODO: Work this out with Ruby 1.9. We don't need to load this for all Rubies.
|
6
|
+
require 'fastercsv'
|
7
|
+
|
8
|
+
module Fathom
|
9
|
+
class CSVImport < Import
|
10
|
+
|
11
|
+
# @content could be a filename, URI, or actual file contents. We figure out which
|
12
|
+
# it is and then parse the contents with FasterCSV. We assume that there are column
|
13
|
+
# headers that are the names of each node, and that the values in the node are
|
14
|
+
# values for a DataNode.
|
15
|
+
def import_csv
|
16
|
+
parsed = parse_contents
|
17
|
+
extracted = extract_columns(parsed)
|
18
|
+
[DataNode, extracted]
|
19
|
+
end
|
20
|
+
|
21
|
+
# These are the options we use to parse the file. They can be overriden
|
22
|
+
# by calling CSVImport.new(:parse_options => {...}, ...)
|
23
|
+
def parse_options
|
24
|
+
return @parse_options if @parse_options
|
25
|
+
@parse_options = @options[:parse_options]
|
26
|
+
@parse_options ||= {:converters => [:all], :headers => true, :skip_blanks => true}
|
27
|
+
@parse_options
|
28
|
+
end
|
29
|
+
|
30
|
+
protected
|
31
|
+
def parse_contents
|
32
|
+
arr_of_arrs = FasterCSV.parse(get_contents, parse_options)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Tries to read a file or URL. That failing, assumes the contents are CSV contents.
|
36
|
+
def get_contents
|
37
|
+
begin
|
38
|
+
content = open(@content).read
|
39
|
+
rescue
|
40
|
+
content = @content
|
41
|
+
end
|
42
|
+
return content
|
43
|
+
end
|
44
|
+
|
45
|
+
# Returns an array of hashes with :name => row header, :values => values
|
46
|
+
def extract_columns(parsed)
|
47
|
+
transposed = parsed.to_a.transpose
|
48
|
+
transposed.inject([]) do |list, column|
|
49
|
+
list << {:name => column.shift, :values => column}
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
if __FILE__ == $0
|
57
|
+
include Fathom
|
58
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
59
|
+
# CSV.new
|
60
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
class Fathom::YAMLImport < Import
|
5
|
+
def import_plausible_ranges
|
6
|
+
assert_yaml_content
|
7
|
+
plausible_ranges = extract_plausible_ranges
|
8
|
+
[PlausibleRange, plausible_ranges]
|
9
|
+
end
|
10
|
+
|
11
|
+
def import_data_nodes
|
12
|
+
assert_yaml_content
|
13
|
+
data_nodes = extract_data_nodes
|
14
|
+
[DataNode, data_nodes]
|
15
|
+
end
|
16
|
+
|
17
|
+
protected
|
18
|
+
def assert_yaml_content
|
19
|
+
return @yaml_content if @yaml_content
|
20
|
+
begin
|
21
|
+
file_contents = open(self.content).read
|
22
|
+
raise ArgumentError, "Unable to extract YAML data out of the contents." unless file_contents
|
23
|
+
rescue
|
24
|
+
file_contents = self.content
|
25
|
+
end
|
26
|
+
@yaml_content = YAML.load(file_contents)
|
27
|
+
end
|
28
|
+
|
29
|
+
def extract_plausible_ranges
|
30
|
+
@yaml_content.inject([]) do |list, array|
|
31
|
+
name, value = array.first, array.last
|
32
|
+
if value.is_a?(Hash)
|
33
|
+
value = OptionsHash.new(value)
|
34
|
+
list << value.merge(:name => name) if value[:min] and value[:max]
|
35
|
+
end
|
36
|
+
list
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def extract_data_nodes
|
41
|
+
@yaml_content.inject([]) do |list, array|
|
42
|
+
name, value = array.first, array.last
|
43
|
+
list << {:name => name, :values => value} if value.is_a?(Array)
|
44
|
+
list
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
if __FILE__ == $0
|
50
|
+
include Fathom
|
51
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
52
|
+
# YAMLImport.new
|
53
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
2
|
+
module Fathom
|
3
|
+
class Invertor
|
4
|
+
include BasicNode
|
5
|
+
|
6
|
+
def initialize(opts={})
|
7
|
+
super(opts)
|
8
|
+
@name ||= "Inverter"
|
9
|
+
end
|
10
|
+
|
11
|
+
def value
|
12
|
+
-1
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
if __FILE__ == $0
|
18
|
+
include Fathom
|
19
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
20
|
+
# Invertor.new
|
21
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
|
2
|
+
class Fathom::KnowledgeBase
|
3
|
+
|
4
|
+
def initialize(opts={})
|
5
|
+
opts = OptionsHash.new(opts)
|
6
|
+
@data_store = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def []=(key, value)
|
10
|
+
@data_store[key] = value
|
11
|
+
end
|
12
|
+
|
13
|
+
def [](key)
|
14
|
+
@data_store[key]
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
if __FILE__ == $0
|
20
|
+
include Fathom
|
21
|
+
# TODO: Is there anything you want to do to run this file on its own?
|
22
|
+
# KnowledgeBase.new
|
23
|
+
end
|