fathom 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/.bundle/config +2 -0
  2. data/.document +5 -0
  3. data/.gitignore +5 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +5 -0
  6. data/Gemfile.lock +30 -0
  7. data/LICENSE +20 -0
  8. data/README.md +176 -0
  9. data/Rakefile +50 -0
  10. data/VERSION +1 -0
  11. data/autotest/discover.rb +1 -0
  12. data/lib/fathom.rb +68 -0
  13. data/lib/fathom/archive/conditional_probability_matrix.rb +116 -0
  14. data/lib/fathom/archive/n2.rb +198 -0
  15. data/lib/fathom/archive/n3.rb +119 -0
  16. data/lib/fathom/archive/node.rb +74 -0
  17. data/lib/fathom/archive/noodle.rb +136 -0
  18. data/lib/fathom/archive/scratch.rb +45 -0
  19. data/lib/fathom/basic_node.rb +8 -0
  20. data/lib/fathom/causal_graph.rb +12 -0
  21. data/lib/fathom/combined_plausibilities.rb +12 -0
  22. data/lib/fathom/concept.rb +83 -0
  23. data/lib/fathom/data_node.rb +51 -0
  24. data/lib/fathom/import.rb +68 -0
  25. data/lib/fathom/import/csv_import.rb +60 -0
  26. data/lib/fathom/import/yaml_import.rb +53 -0
  27. data/lib/fathom/inverter.rb +21 -0
  28. data/lib/fathom/knowledge_base.rb +23 -0
  29. data/lib/fathom/monte_carlo_set.rb +76 -0
  30. data/lib/fathom/node_utilities.rb +8 -0
  31. data/lib/fathom/plausible_range.rb +82 -0
  32. data/lib/fathom/value_aggregator.rb +11 -0
  33. data/lib/fathom/value_description.rb +79 -0
  34. data/lib/fathom/value_multiplier.rb +18 -0
  35. data/lib/options_hash.rb +186 -0
  36. data/spec/fathom/data_node_spec.rb +61 -0
  37. data/spec/fathom/import/csv_import_spec.rb +36 -0
  38. data/spec/fathom/import/yaml_import_spec.rb +40 -0
  39. data/spec/fathom/import_spec.rb +22 -0
  40. data/spec/fathom/knowledge_base_spec.rb +16 -0
  41. data/spec/fathom/monte_carlo_set_spec.rb +58 -0
  42. data/spec/fathom/plausible_range_spec.rb +130 -0
  43. data/spec/fathom/value_description_spec.rb +70 -0
  44. data/spec/fathom_spec.rb +8 -0
  45. data/spec/spec_helper.rb +13 -0
  46. data/spec/support/demo.yml +17 -0
  47. metadata +135 -0
@@ -0,0 +1,45 @@
1
+ require 'node'
2
+ require 'conditional_probability_matrix'
3
+
4
+ class A
5
+ class << self
6
+ # def will_pay
7
+ # @will_pay ||= Node.new :true, :false
8
+ # end
9
+ #
10
+ # def has_money
11
+ # @has_money ||= Node.new :plenty => 0.1, :some => 0.8, :little => 0.1
12
+ # end
13
+ #
14
+ # def ones
15
+ # Matrix.ones(will_pay.values.length, has_money.values.length)
16
+ # end
17
+ #
18
+ # def parents
19
+ # will_pay.values.col * has_money.values
20
+ # # will_pay.each do |value|
21
+ # #
22
+ # # end
23
+ # end
24
+ #
25
+ # def cpm
26
+ # ConditionalProbabilityMatrix.new(will_pay, has_money)
27
+ # end
28
+
29
+ def killer_identity
30
+ @killer_identity ||= Node.new(:killer_identity, :jack => 0.8, :joe => 0.1, :jeff => 0.1)
31
+ end
32
+ alias :k :killer_identity
33
+ alias :x :killer_identity
34
+
35
+ def fingerprint_information
36
+ @fingerprint_information ||= Node.new(:fingerprint_information, :jack => 2/3.0, :joe => 1/6.0, :jeff => 1/6.0)
37
+ end
38
+ alias :f :fingerprint_information
39
+ alias :y :fingerprint_information
40
+
41
+ def cpm
42
+ @cpm ||= ConditionalProbabilityMatrix.new(x, y)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,8 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+ module Fathom
3
+ module BasicNode
4
+ def initialize(opts={})
5
+ end
6
+ end
7
+ end
8
+
@@ -0,0 +1,12 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+ module Fathom
3
+ class CausalGraph
4
+
5
+ end
6
+ end
7
+
8
+ if __FILE__ == $0
9
+ include Fathom
10
+ # TODO: Is there anything you want to do to run this file on its own?
11
+ # CausalGraph.new
12
+ end
@@ -0,0 +1,12 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+ module Fathom
3
+ class CombinedPlausibilities
4
+
5
+ end
6
+ end
7
+
8
+ if __FILE__ == $0
9
+ include Fathom
10
+ # TODO: Is there anything you want to do to run this file on its own?
11
+ # CombinedPlausibilities.new
12
+ end
@@ -0,0 +1,83 @@
1
+ =begin
2
+ This is a first approach to an RDF back end for a broadly-defined data store.
3
+
4
+ I am borrowing from the SKOS ontology here to be able to define any sort of concept
5
+ that may assist me with my decision-making work.
6
+
7
+ TODO:
8
+
9
+ [x] Build a basic Spira modeal
10
+ [x] Make a SKOS commitment
11
+ [.] Create helper methods to find or create the concept easily (using hash syntax for field names)
12
+ [] Create association methods for associating the concept to other concepts (need to think about this one)
13
+ [] Create specific methods to define a plausible range (probably define a Spira model here too)
14
+ [] Create specific methods to define a ValueDescription
15
+ [] Create specific methods to define a MonteCarloSet
16
+ [] Create specific methods to define a CausalGraph
17
+ [] Create specific methods to define a DependencyGraph
18
+ [] Create specific methods to define the value of further measurement (another un-named class)
19
+
20
+ =end
21
+
22
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
23
+ require 'rdf'
24
+ require 'rdf/ntriples'
25
+ require 'data_objects'
26
+ require 'do_sqlite3'
27
+ require 'rdf/do'
28
+ require 'spira'
29
+
30
+
31
+ module Fathom
32
+
33
+ # Go ahead and create a generic repo for Fathom
34
+ def repo
35
+ @repo ||= RDF::DataObjects::Repository.new('sqlite3:/tmp/test.db')
36
+ end
37
+
38
+ Spira.add_repository(:default, repo)
39
+
40
+ class Concept
41
+
42
+ include Spira::Resource
43
+ include RDF
44
+
45
+ class << self
46
+ def find_or_build(name, description=nil)
47
+ concept = Concept.for(concept_name(name))
48
+ return concept if concept.exist?
49
+ concept.name = name
50
+ concept.description = description
51
+ concept
52
+ end
53
+
54
+ def find_or_create(name, description=nil)
55
+ concept = Concept.for(concept_name(name))
56
+ return concept if concept.exist?
57
+ concept.name = name
58
+ concept.description = description
59
+ concept.save!
60
+ concept
61
+ end
62
+
63
+ protected
64
+ def concept_name(name)
65
+ concept_name = name.downcase.gsub(/\s+/, '_')
66
+ end
67
+ end
68
+
69
+ base_uri "http://example.org/example/concepts"
70
+
71
+ property :name, :predicate => SKOS.prefLabel
72
+ property :description, :predicate => SKOS.definition
73
+ property :scope, :predicate => SKOS.scopeNote
74
+
75
+
76
+ end
77
+ end
78
+
79
+ if __FILE__ == $0
80
+ include Fathom
81
+ # TODO: Is there anything you want to do to run this file on its own?
82
+ # Concept.new
83
+ end
@@ -0,0 +1,51 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+
3
+ =begin
4
+ A DataNode is a node generated from data itself. It stores the data and reveals some statistical
5
+ measurements for the data. It expects an array or vector of values and generates a vector on demans.
6
+ =end
7
+ class Fathom::DataNode
8
+
9
+ include NodeUtilities
10
+
11
+ attr_reader :values, :name, :distribution, :confidence_interval
12
+
13
+ def initialize(opts={})
14
+ @values = opts[:values]
15
+ raise ArgumentError, "Must provided values: DataNode.new(:values => [...])" unless self.values
16
+ @name = opts[:name]
17
+ @distribution = opts[:distribution]
18
+ end
19
+
20
+ alias :ci :confidence_interval
21
+
22
+ def vector
23
+ @vector ||= GSL::Vector.ary_to_gv(self.values)
24
+ end
25
+
26
+ def standard_deviation
27
+ @standard_deviation ||= vector.sd
28
+ end
29
+ alias :sd :standard_deviation
30
+ alias :std :standard_deviation
31
+
32
+ def mean
33
+ @mean ||= vector.mean
34
+ end
35
+
36
+ def rand
37
+ rng.gaussian(std) + mean
38
+ end
39
+
40
+ protected
41
+ def rng
42
+ @rng ||= GSL::Rng.alloc
43
+ end
44
+
45
+ end
46
+
47
+ if __FILE__ == $0
48
+ include Fathom
49
+ # TODO: Is there anything you want to do to run this file on its own?
50
+ # DataNode.new
51
+ end
@@ -0,0 +1,68 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+
3
+ =begin
4
+
5
+ The import scripts are meant to make it easier to build up a knowledge base from
6
+ real data. There are a lot of ways we could get our data: YAML files, spreadsheets,
7
+ web crawlers, relational databases, RDF data. The Import class creates a regular
8
+ way to create nodes in the knowledge base. The following example would be a
9
+ simple way to import some data about weekend trips:
10
+
11
+ class WeekendPlanningImport < Import
12
+ def import_plausible_data
13
+ values = [
14
+ {:name => 'Chance of Rain', :min => 0.2, :max => 0.3, :confidence_interval => 0.8},
15
+ {:name => 'Would Go to the Beach Despite the Rain', :min => 0, :max => 0.2},
16
+ {:name => 'Would Go to the Movies if Something Good Was Playing', :min => 0.8, :max => 1.0},
17
+ {:name => 'Would Go to the Movies Despite the Rain', :min => 0.9, :max => 1.0}
18
+ ]
19
+ return [PlausibleRange, values]
20
+ end
21
+
22
+ def import_destination_data
23
+ value_hash = FasterCSV...
24
+ return [DataNode, value_hash]
25
+ end
26
+ end
27
+
28
+ Each import method is starts with 'import_' and returns an array of [NodeClass, values_hash].
29
+ The Import class will then create a series of nodes and insert them into the active knowledge
30
+ base.
31
+
32
+ There are a number of useful Import classes that make most data imports fairly straight forward.
33
+ This way data from spreadsheets or YAML files can easily be added to the knowledge base.
34
+
35
+ =end
36
+ class Fathom::Import
37
+
38
+ attr_reader :content, :options
39
+
40
+ def initialize(opts={})
41
+ @options = OptionsHash.new(opts)
42
+ @content = @options[:content]
43
+ end
44
+
45
+ def import
46
+ results = []
47
+ import_methods.each do |method|
48
+ klass, initialization_data = self.send(method.to_sym)
49
+ initialization_data.each do |values|
50
+ results << extract_nodes(klass, values)
51
+ end
52
+ end
53
+ results
54
+ end
55
+
56
+ protected
57
+
58
+ def extract_nodes(klass, values)
59
+ node = klass.new(values)
60
+ Fathom.knowledge_base[node.name] = node
61
+ node
62
+ end
63
+
64
+ def import_methods
65
+ (self.methods - self.class.superclass.instance_methods).map {|m| m if m =~ /import_\w+/}.compact
66
+ end
67
+
68
+ end
@@ -0,0 +1,60 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
2
+ require 'import'
3
+ require 'open-uri'
4
+
5
+ # TODO: Work this out with Ruby 1.9. We don't need to load this for all Rubies.
6
+ require 'fastercsv'
7
+
8
+ module Fathom
9
+ class CSVImport < Import
10
+
11
+ # @content could be a filename, URI, or actual file contents. We figure out which
12
+ # it is and then parse the contents with FasterCSV. We assume that there are column
13
+ # headers that are the names of each node, and that the values in the node are
14
+ # values for a DataNode.
15
+ def import_csv
16
+ parsed = parse_contents
17
+ extracted = extract_columns(parsed)
18
+ [DataNode, extracted]
19
+ end
20
+
21
+ # These are the options we use to parse the file. They can be overriden
22
+ # by calling CSVImport.new(:parse_options => {...}, ...)
23
+ def parse_options
24
+ return @parse_options if @parse_options
25
+ @parse_options = @options[:parse_options]
26
+ @parse_options ||= {:converters => [:all], :headers => true, :skip_blanks => true}
27
+ @parse_options
28
+ end
29
+
30
+ protected
31
+ def parse_contents
32
+ arr_of_arrs = FasterCSV.parse(get_contents, parse_options)
33
+ end
34
+
35
+ # Tries to read a file or URL. That failing, assumes the contents are CSV contents.
36
+ def get_contents
37
+ begin
38
+ content = open(@content).read
39
+ rescue
40
+ content = @content
41
+ end
42
+ return content
43
+ end
44
+
45
+ # Returns an array of hashes with :name => row header, :values => values
46
+ def extract_columns(parsed)
47
+ transposed = parsed.to_a.transpose
48
+ transposed.inject([]) do |list, column|
49
+ list << {:name => column.shift, :values => column}
50
+ end
51
+ end
52
+
53
+ end
54
+ end
55
+
56
+ if __FILE__ == $0
57
+ include Fathom
58
+ # TODO: Is there anything you want to do to run this file on its own?
59
+ # CSV.new
60
+ end
@@ -0,0 +1,53 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
2
+ require 'open-uri'
3
+
4
+ class Fathom::YAMLImport < Import
5
+ def import_plausible_ranges
6
+ assert_yaml_content
7
+ plausible_ranges = extract_plausible_ranges
8
+ [PlausibleRange, plausible_ranges]
9
+ end
10
+
11
+ def import_data_nodes
12
+ assert_yaml_content
13
+ data_nodes = extract_data_nodes
14
+ [DataNode, data_nodes]
15
+ end
16
+
17
+ protected
18
+ def assert_yaml_content
19
+ return @yaml_content if @yaml_content
20
+ begin
21
+ file_contents = open(self.content).read
22
+ raise ArgumentError, "Unable to extract YAML data out of the contents." unless file_contents
23
+ rescue
24
+ file_contents = self.content
25
+ end
26
+ @yaml_content = YAML.load(file_contents)
27
+ end
28
+
29
+ def extract_plausible_ranges
30
+ @yaml_content.inject([]) do |list, array|
31
+ name, value = array.first, array.last
32
+ if value.is_a?(Hash)
33
+ value = OptionsHash.new(value)
34
+ list << value.merge(:name => name) if value[:min] and value[:max]
35
+ end
36
+ list
37
+ end
38
+ end
39
+
40
+ def extract_data_nodes
41
+ @yaml_content.inject([]) do |list, array|
42
+ name, value = array.first, array.last
43
+ list << {:name => name, :values => value} if value.is_a?(Array)
44
+ list
45
+ end
46
+ end
47
+ end
48
+
49
+ if __FILE__ == $0
50
+ include Fathom
51
+ # TODO: Is there anything you want to do to run this file on its own?
52
+ # YAMLImport.new
53
+ end
@@ -0,0 +1,21 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+ module Fathom
3
+ class Invertor
4
+ include BasicNode
5
+
6
+ def initialize(opts={})
7
+ super(opts)
8
+ @name ||= "Inverter"
9
+ end
10
+
11
+ def value
12
+ -1
13
+ end
14
+ end
15
+ end
16
+
17
+ if __FILE__ == $0
18
+ include Fathom
19
+ # TODO: Is there anything you want to do to run this file on its own?
20
+ # Invertor.new
21
+ end
@@ -0,0 +1,23 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+ class Fathom::KnowledgeBase
3
+
4
+ def initialize(opts={})
5
+ opts = OptionsHash.new(opts)
6
+ @data_store = {}
7
+ end
8
+
9
+ def []=(key, value)
10
+ @data_store[key] = value
11
+ end
12
+
13
+ def [](key)
14
+ @data_store[key]
15
+ end
16
+
17
+ end
18
+
19
+ if __FILE__ == $0
20
+ include Fathom
21
+ # TODO: Is there anything you want to do to run this file on its own?
22
+ # KnowledgeBase.new
23
+ end