fathom 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/.bundle/config +2 -0
  2. data/.document +5 -0
  3. data/.gitignore +5 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +5 -0
  6. data/Gemfile.lock +30 -0
  7. data/LICENSE +20 -0
  8. data/README.md +176 -0
  9. data/Rakefile +50 -0
  10. data/VERSION +1 -0
  11. data/autotest/discover.rb +1 -0
  12. data/lib/fathom.rb +68 -0
  13. data/lib/fathom/archive/conditional_probability_matrix.rb +116 -0
  14. data/lib/fathom/archive/n2.rb +198 -0
  15. data/lib/fathom/archive/n3.rb +119 -0
  16. data/lib/fathom/archive/node.rb +74 -0
  17. data/lib/fathom/archive/noodle.rb +136 -0
  18. data/lib/fathom/archive/scratch.rb +45 -0
  19. data/lib/fathom/basic_node.rb +8 -0
  20. data/lib/fathom/causal_graph.rb +12 -0
  21. data/lib/fathom/combined_plausibilities.rb +12 -0
  22. data/lib/fathom/concept.rb +83 -0
  23. data/lib/fathom/data_node.rb +51 -0
  24. data/lib/fathom/import.rb +68 -0
  25. data/lib/fathom/import/csv_import.rb +60 -0
  26. data/lib/fathom/import/yaml_import.rb +53 -0
  27. data/lib/fathom/inverter.rb +21 -0
  28. data/lib/fathom/knowledge_base.rb +23 -0
  29. data/lib/fathom/monte_carlo_set.rb +76 -0
  30. data/lib/fathom/node_utilities.rb +8 -0
  31. data/lib/fathom/plausible_range.rb +82 -0
  32. data/lib/fathom/value_aggregator.rb +11 -0
  33. data/lib/fathom/value_description.rb +79 -0
  34. data/lib/fathom/value_multiplier.rb +18 -0
  35. data/lib/options_hash.rb +186 -0
  36. data/spec/fathom/data_node_spec.rb +61 -0
  37. data/spec/fathom/import/csv_import_spec.rb +36 -0
  38. data/spec/fathom/import/yaml_import_spec.rb +40 -0
  39. data/spec/fathom/import_spec.rb +22 -0
  40. data/spec/fathom/knowledge_base_spec.rb +16 -0
  41. data/spec/fathom/monte_carlo_set_spec.rb +58 -0
  42. data/spec/fathom/plausible_range_spec.rb +130 -0
  43. data/spec/fathom/value_description_spec.rb +70 -0
  44. data/spec/fathom_spec.rb +8 -0
  45. data/spec/spec_helper.rb +13 -0
  46. data/spec/support/demo.yml +17 -0
  47. metadata +135 -0
@@ -0,0 +1,45 @@
1
+ require 'node'
2
+ require 'conditional_probability_matrix'
3
+
4
+ class A
5
+ class << self
6
+ # def will_pay
7
+ # @will_pay ||= Node.new :true, :false
8
+ # end
9
+ #
10
+ # def has_money
11
+ # @has_money ||= Node.new :plenty => 0.1, :some => 0.8, :little => 0.1
12
+ # end
13
+ #
14
+ # def ones
15
+ # Matrix.ones(will_pay.values.length, has_money.values.length)
16
+ # end
17
+ #
18
+ # def parents
19
+ # will_pay.values.col * has_money.values
20
+ # # will_pay.each do |value|
21
+ # #
22
+ # # end
23
+ # end
24
+ #
25
+ # def cpm
26
+ # ConditionalProbabilityMatrix.new(will_pay, has_money)
27
+ # end
28
+
29
+ def killer_identity
30
+ @killer_identity ||= Node.new(:killer_identity, :jack => 0.8, :joe => 0.1, :jeff => 0.1)
31
+ end
32
+ alias :k :killer_identity
33
+ alias :x :killer_identity
34
+
35
+ def fingerprint_information
36
+ @fingerprint_information ||= Node.new(:fingerprint_information, :jack => 2/3.0, :joe => 1/6.0, :jeff => 1/6.0)
37
+ end
38
+ alias :f :fingerprint_information
39
+ alias :y :fingerprint_information
40
+
41
+ def cpm
42
+ @cpm ||= ConditionalProbabilityMatrix.new(x, y)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,8 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+ module Fathom
3
+ module BasicNode
4
+ def initialize(opts={})
5
+ end
6
+ end
7
+ end
8
+
@@ -0,0 +1,12 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+ module Fathom
3
+ class CausalGraph
4
+
5
+ end
6
+ end
7
+
8
+ if __FILE__ == $0
9
+ include Fathom
10
+ # TODO: Is there anything you want to do to run this file on its own?
11
+ # CausalGraph.new
12
+ end
@@ -0,0 +1,12 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+ module Fathom
3
+ class CombinedPlausibilities
4
+
5
+ end
6
+ end
7
+
8
+ if __FILE__ == $0
9
+ include Fathom
10
+ # TODO: Is there anything you want to do to run this file on its own?
11
+ # CombinedPlausibilities.new
12
+ end
@@ -0,0 +1,83 @@
1
+ =begin
2
+ This is a first approach to an RDF back end for a broadly-defined data store.
3
+
4
+ I am borrowing from the SKOS ontology here to be able to define any sort of concept
5
+ that may assist me with my decision-making work.
6
+
7
+ TODO:
8
+
9
+ [x] Build a basic Spira modeal
10
+ [x] Make a SKOS commitment
11
+ [.] Create helper methods to find or create the concept easily (using hash syntax for field names)
12
+ [] Create association methods for associating the concept to other concepts (need to think about this one)
13
+ [] Create specific methods to define a plausible range (probably define a Spira model here too)
14
+ [] Create specific methods to define a ValueDescription
15
+ [] Create specific methods to define a MonteCarloSet
16
+ [] Create specific methods to define a CausalGraph
17
+ [] Create specific methods to define a DependencyGraph
18
+ [] Create specific methods to define the value of further measurement (another un-named class)
19
+
20
+ =end
21
+
22
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
23
+ require 'rdf'
24
+ require 'rdf/ntriples'
25
+ require 'data_objects'
26
+ require 'do_sqlite3'
27
+ require 'rdf/do'
28
+ require 'spira'
29
+
30
+
31
+ module Fathom
32
+
33
+ # Go ahead and create a generic repo for Fathom
34
+ def repo
35
+ @repo ||= RDF::DataObjects::Repository.new('sqlite3:/tmp/test.db')
36
+ end
37
+
38
+ Spira.add_repository(:default, repo)
39
+
40
+ class Concept
41
+
42
+ include Spira::Resource
43
+ include RDF
44
+
45
+ class << self
46
+ def find_or_build(name, description=nil)
47
+ concept = Concept.for(concept_name(name))
48
+ return concept if concept.exist?
49
+ concept.name = name
50
+ concept.description = description
51
+ concept
52
+ end
53
+
54
+ def find_or_create(name, description=nil)
55
+ concept = Concept.for(concept_name(name))
56
+ return concept if concept.exist?
57
+ concept.name = name
58
+ concept.description = description
59
+ concept.save!
60
+ concept
61
+ end
62
+
63
+ protected
64
+ def concept_name(name)
65
+ concept_name = name.downcase.gsub(/\s+/, '_')
66
+ end
67
+ end
68
+
69
+ base_uri "http://example.org/example/concepts"
70
+
71
+ property :name, :predicate => SKOS.prefLabel
72
+ property :description, :predicate => SKOS.definition
73
+ property :scope, :predicate => SKOS.scopeNote
74
+
75
+
76
+ end
77
+ end
78
+
79
+ if __FILE__ == $0
80
+ include Fathom
81
+ # TODO: Is there anything you want to do to run this file on its own?
82
+ # Concept.new
83
+ end
@@ -0,0 +1,51 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+
3
+ =begin
4
+ A DataNode is a node generated from data itself. It stores the data and reveals some statistical
5
+ measurements for the data. It expects an array or vector of values and generates a vector on demans.
6
+ =end
7
+ class Fathom::DataNode
8
+
9
+ include NodeUtilities
10
+
11
+ attr_reader :values, :name, :distribution, :confidence_interval
12
+
13
+ def initialize(opts={})
14
+ @values = opts[:values]
15
+ raise ArgumentError, "Must provided values: DataNode.new(:values => [...])" unless self.values
16
+ @name = opts[:name]
17
+ @distribution = opts[:distribution]
18
+ end
19
+
20
+ alias :ci :confidence_interval
21
+
22
+ def vector
23
+ @vector ||= GSL::Vector.ary_to_gv(self.values)
24
+ end
25
+
26
+ def standard_deviation
27
+ @standard_deviation ||= vector.sd
28
+ end
29
+ alias :sd :standard_deviation
30
+ alias :std :standard_deviation
31
+
32
+ def mean
33
+ @mean ||= vector.mean
34
+ end
35
+
36
+ def rand
37
+ rng.gaussian(std) + mean
38
+ end
39
+
40
+ protected
41
+ def rng
42
+ @rng ||= GSL::Rng.alloc
43
+ end
44
+
45
+ end
46
+
47
+ if __FILE__ == $0
48
+ include Fathom
49
+ # TODO: Is there anything you want to do to run this file on its own?
50
+ # DataNode.new
51
+ end
@@ -0,0 +1,68 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+
3
+ =begin
4
+
5
+ The import scripts are meant to make it easier to build up a knowledge base from
6
+ real data. There are a lot of ways we could get our data: YAML files, spreadsheets,
7
+ web crawlers, relational databases, RDF data. The Import class creates a regular
8
+ way to create nodes in the knowledge base. The following example would be a
9
+ simple way to import some data about weekend trips:
10
+
11
+ class WeekendPlanningImport < Import
12
+ def import_plausible_data
13
+ values = [
14
+ {:name => 'Chance of Rain', :min => 0.2, :max => 0.3, :confidence_interval => 0.8},
15
+ {:name => 'Would Go to the Beach Despite the Rain', :min => 0, :max => 0.2},
16
+ {:name => 'Would Go to the Movies if Something Good Was Playing', :min => 0.8, :max => 1.0},
17
+ {:name => 'Would Go to the Movies Despite the Rain', :min => 0.9, :max => 1.0}
18
+ ]
19
+ return [PlausibleRange, values]
20
+ end
21
+
22
+ def import_destination_data
23
+ value_hash = FasterCSV...
24
+ return [DataNode, value_hash]
25
+ end
26
+ end
27
+
28
+ Each import method is starts with 'import_' and returns an array of [NodeClass, values_hash].
29
+ The Import class will then create a series of nodes and insert them into the active knowledge
30
+ base.
31
+
32
+ There are a number of useful Import classes that make most data imports fairly straight forward.
33
+ This way data from spreadsheets or YAML files can easily be added to the knowledge base.
34
+
35
+ =end
36
+ class Fathom::Import
37
+
38
+ attr_reader :content, :options
39
+
40
+ def initialize(opts={})
41
+ @options = OptionsHash.new(opts)
42
+ @content = @options[:content]
43
+ end
44
+
45
+ def import
46
+ results = []
47
+ import_methods.each do |method|
48
+ klass, initialization_data = self.send(method.to_sym)
49
+ initialization_data.each do |values|
50
+ results << extract_nodes(klass, values)
51
+ end
52
+ end
53
+ results
54
+ end
55
+
56
+ protected
57
+
58
+ def extract_nodes(klass, values)
59
+ node = klass.new(values)
60
+ Fathom.knowledge_base[node.name] = node
61
+ node
62
+ end
63
+
64
+ def import_methods
65
+ (self.methods - self.class.superclass.instance_methods).map {|m| m if m =~ /import_\w+/}.compact
66
+ end
67
+
68
+ end
@@ -0,0 +1,60 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
2
+ require 'import'
3
+ require 'open-uri'
4
+
5
+ # TODO: Work this out with Ruby 1.9. We don't need to load this for all Rubies.
6
+ require 'fastercsv'
7
+
8
+ module Fathom
9
+ class CSVImport < Import
10
+
11
+ # @content could be a filename, URI, or actual file contents. We figure out which
12
+ # it is and then parse the contents with FasterCSV. We assume that there are column
13
+ # headers that are the names of each node, and that the values in the node are
14
+ # values for a DataNode.
15
+ def import_csv
16
+ parsed = parse_contents
17
+ extracted = extract_columns(parsed)
18
+ [DataNode, extracted]
19
+ end
20
+
21
+ # These are the options we use to parse the file. They can be overriden
22
+ # by calling CSVImport.new(:parse_options => {...}, ...)
23
+ def parse_options
24
+ return @parse_options if @parse_options
25
+ @parse_options = @options[:parse_options]
26
+ @parse_options ||= {:converters => [:all], :headers => true, :skip_blanks => true}
27
+ @parse_options
28
+ end
29
+
30
+ protected
31
+ def parse_contents
32
+ arr_of_arrs = FasterCSV.parse(get_contents, parse_options)
33
+ end
34
+
35
+ # Tries to read a file or URL. That failing, assumes the contents are CSV contents.
36
+ def get_contents
37
+ begin
38
+ content = open(@content).read
39
+ rescue
40
+ content = @content
41
+ end
42
+ return content
43
+ end
44
+
45
+ # Returns an array of hashes with :name => row header, :values => values
46
+ def extract_columns(parsed)
47
+ transposed = parsed.to_a.transpose
48
+ transposed.inject([]) do |list, column|
49
+ list << {:name => column.shift, :values => column}
50
+ end
51
+ end
52
+
53
+ end
54
+ end
55
+
56
+ if __FILE__ == $0
57
+ include Fathom
58
+ # TODO: Is there anything you want to do to run this file on its own?
59
+ # CSV.new
60
+ end
@@ -0,0 +1,53 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'fathom'))
2
+ require 'open-uri'
3
+
4
+ class Fathom::YAMLImport < Import
5
+ def import_plausible_ranges
6
+ assert_yaml_content
7
+ plausible_ranges = extract_plausible_ranges
8
+ [PlausibleRange, plausible_ranges]
9
+ end
10
+
11
+ def import_data_nodes
12
+ assert_yaml_content
13
+ data_nodes = extract_data_nodes
14
+ [DataNode, data_nodes]
15
+ end
16
+
17
+ protected
18
+ def assert_yaml_content
19
+ return @yaml_content if @yaml_content
20
+ begin
21
+ file_contents = open(self.content).read
22
+ raise ArgumentError, "Unable to extract YAML data out of the contents." unless file_contents
23
+ rescue
24
+ file_contents = self.content
25
+ end
26
+ @yaml_content = YAML.load(file_contents)
27
+ end
28
+
29
+ def extract_plausible_ranges
30
+ @yaml_content.inject([]) do |list, array|
31
+ name, value = array.first, array.last
32
+ if value.is_a?(Hash)
33
+ value = OptionsHash.new(value)
34
+ list << value.merge(:name => name) if value[:min] and value[:max]
35
+ end
36
+ list
37
+ end
38
+ end
39
+
40
+ def extract_data_nodes
41
+ @yaml_content.inject([]) do |list, array|
42
+ name, value = array.first, array.last
43
+ list << {:name => name, :values => value} if value.is_a?(Array)
44
+ list
45
+ end
46
+ end
47
+ end
48
+
49
+ if __FILE__ == $0
50
+ include Fathom
51
+ # TODO: Is there anything you want to do to run this file on its own?
52
+ # YAMLImport.new
53
+ end
@@ -0,0 +1,21 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+ module Fathom
3
+ class Invertor
4
+ include BasicNode
5
+
6
+ def initialize(opts={})
7
+ super(opts)
8
+ @name ||= "Inverter"
9
+ end
10
+
11
+ def value
12
+ -1
13
+ end
14
+ end
15
+ end
16
+
17
+ if __FILE__ == $0
18
+ include Fathom
19
+ # TODO: Is there anything you want to do to run this file on its own?
20
+ # Invertor.new
21
+ end
@@ -0,0 +1,23 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'fathom'))
2
+ class Fathom::KnowledgeBase
3
+
4
+ def initialize(opts={})
5
+ opts = OptionsHash.new(opts)
6
+ @data_store = {}
7
+ end
8
+
9
+ def []=(key, value)
10
+ @data_store[key] = value
11
+ end
12
+
13
+ def [](key)
14
+ @data_store[key]
15
+ end
16
+
17
+ end
18
+
19
+ if __FILE__ == $0
20
+ include Fathom
21
+ # TODO: Is there anything you want to do to run this file on its own?
22
+ # KnowledgeBase.new
23
+ end