fact_db 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.envrc +1 -0
  3. data/CHANGELOG.md +48 -0
  4. data/COMMITS.md +196 -0
  5. data/README.md +102 -0
  6. data/Rakefile +41 -0
  7. data/db/migrate/001_enable_extensions.rb +7 -0
  8. data/db/migrate/002_create_contents.rb +44 -0
  9. data/db/migrate/003_create_entities.rb +36 -0
  10. data/db/migrate/004_create_entity_aliases.rb +18 -0
  11. data/db/migrate/005_create_facts.rb +65 -0
  12. data/db/migrate/006_create_entity_mentions.rb +18 -0
  13. data/db/migrate/007_create_fact_sources.rb +18 -0
  14. data/docs/api/extractors/index.md +71 -0
  15. data/docs/api/extractors/llm.md +162 -0
  16. data/docs/api/extractors/manual.md +92 -0
  17. data/docs/api/extractors/rule-based.md +165 -0
  18. data/docs/api/facts.md +300 -0
  19. data/docs/api/index.md +66 -0
  20. data/docs/api/models/content.md +165 -0
  21. data/docs/api/models/entity.md +202 -0
  22. data/docs/api/models/fact.md +270 -0
  23. data/docs/api/models/index.md +77 -0
  24. data/docs/api/pipeline/extraction.md +175 -0
  25. data/docs/api/pipeline/index.md +72 -0
  26. data/docs/api/pipeline/resolution.md +209 -0
  27. data/docs/api/services/content-service.md +166 -0
  28. data/docs/api/services/entity-service.md +202 -0
  29. data/docs/api/services/fact-service.md +223 -0
  30. data/docs/api/services/index.md +55 -0
  31. data/docs/architecture/database-schema.md +293 -0
  32. data/docs/architecture/entity-resolution.md +293 -0
  33. data/docs/architecture/index.md +149 -0
  34. data/docs/architecture/temporal-facts.md +268 -0
  35. data/docs/architecture/three-layer-model.md +242 -0
  36. data/docs/assets/css/custom.css +137 -0
  37. data/docs/assets/fact_db.jpg +0 -0
  38. data/docs/assets/images/fact_db.jpg +0 -0
  39. data/docs/concepts.md +183 -0
  40. data/docs/examples/basic-usage.md +235 -0
  41. data/docs/examples/hr-onboarding.md +312 -0
  42. data/docs/examples/index.md +64 -0
  43. data/docs/examples/news-analysis.md +288 -0
  44. data/docs/getting-started/database-setup.md +170 -0
  45. data/docs/getting-started/index.md +71 -0
  46. data/docs/getting-started/installation.md +98 -0
  47. data/docs/getting-started/quick-start.md +191 -0
  48. data/docs/guides/batch-processing.md +325 -0
  49. data/docs/guides/configuration.md +243 -0
  50. data/docs/guides/entity-management.md +364 -0
  51. data/docs/guides/extracting-facts.md +299 -0
  52. data/docs/guides/index.md +22 -0
  53. data/docs/guides/ingesting-content.md +252 -0
  54. data/docs/guides/llm-integration.md +299 -0
  55. data/docs/guides/temporal-queries.md +315 -0
  56. data/docs/index.md +121 -0
  57. data/examples/README.md +130 -0
  58. data/examples/basic_usage.rb +164 -0
  59. data/examples/entity_management.rb +216 -0
  60. data/examples/hr_system.rb +428 -0
  61. data/examples/rule_based_extraction.rb +258 -0
  62. data/examples/temporal_queries.rb +245 -0
  63. data/lib/fact_db/config.rb +71 -0
  64. data/lib/fact_db/database.rb +45 -0
  65. data/lib/fact_db/errors.rb +10 -0
  66. data/lib/fact_db/extractors/base.rb +117 -0
  67. data/lib/fact_db/extractors/llm_extractor.rb +179 -0
  68. data/lib/fact_db/extractors/manual_extractor.rb +53 -0
  69. data/lib/fact_db/extractors/rule_based_extractor.rb +228 -0
  70. data/lib/fact_db/llm/adapter.rb +109 -0
  71. data/lib/fact_db/models/content.rb +62 -0
  72. data/lib/fact_db/models/entity.rb +84 -0
  73. data/lib/fact_db/models/entity_alias.rb +26 -0
  74. data/lib/fact_db/models/entity_mention.rb +33 -0
  75. data/lib/fact_db/models/fact.rb +192 -0
  76. data/lib/fact_db/models/fact_source.rb +35 -0
  77. data/lib/fact_db/pipeline/extraction_pipeline.rb +146 -0
  78. data/lib/fact_db/pipeline/resolution_pipeline.rb +129 -0
  79. data/lib/fact_db/resolution/entity_resolver.rb +261 -0
  80. data/lib/fact_db/resolution/fact_resolver.rb +259 -0
  81. data/lib/fact_db/services/content_service.rb +93 -0
  82. data/lib/fact_db/services/entity_service.rb +150 -0
  83. data/lib/fact_db/services/fact_service.rb +193 -0
  84. data/lib/fact_db/temporal/query.rb +125 -0
  85. data/lib/fact_db/temporal/timeline.rb +134 -0
  86. data/lib/fact_db/version.rb +5 -0
  87. data/lib/fact_db.rb +141 -0
  88. data/mkdocs.yml +198 -0
  89. metadata +288 -0
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Basic Usage Example for FactDb
5
+ #
6
+ # This example demonstrates:
7
+ # - Configuring FactDb
8
+ # - Ingesting content
9
+ # - Creating entities
10
+ # - Creating facts manually
11
+ # - Querying facts
12
+
13
+ require "bundler/setup"
14
+ require "fact_db"
15
+
16
+ # Configure FactDb
17
+ FactDb.configure do |config|
18
+ config.database_url = ENV.fetch("DATABASE_URL", "postgres://#{ENV['USER']}@localhost/fact_db_demo")
19
+ config.default_extractor = :manual
20
+ config.fuzzy_match_threshold = 0.85
21
+ end
22
+
23
+ # Ensure database tables exist
24
+ FactDb::Database.migrate!
25
+
26
+ # Create a new FactDb instance (the "clock")
27
+ clock = FactDb.new
28
+
29
+ puts "=" * 60
30
+ puts "FactDb Basic Usage Demo"
31
+ puts "=" * 60
32
+
33
+ # Step 1: Ingest some content
34
+ puts "\n--- Step 1: Ingesting Content ---\n"
35
+
36
+ email_content = <<~EMAIL
37
+ From: hr@acme.com
38
+ To: all@acme.com
39
+ Subject: New Hire Announcement
40
+ Date: January 8, 2026
41
+
42
+ We are pleased to announce that Jane Smith has joined Acme Corp
43
+ as our new Director of Engineering. Jane comes to us from
44
+ TechStartup Inc where she served as VP of Engineering for 3 years.
45
+
46
+ Please join me in welcoming Jane to the team!
47
+
48
+ Best regards,
49
+ HR Department
50
+ EMAIL
51
+
52
+ content = clock.ingest(
53
+ email_content,
54
+ type: :email,
55
+ title: "New Hire Announcement - Jane Smith",
56
+ captured_at: Time.new(2026, 1, 8)
57
+ )
58
+
59
+ puts "Ingested content ID: #{content.id}"
60
+ puts "Content hash: #{content.content_hash}"
61
+ puts "Word count: #{content.word_count}"
62
+
63
+ # Step 2: Create entities
64
+ puts "\n--- Step 2: Creating Entities ---\n"
65
+
66
+ entity_service = clock.entity_service
67
+
68
+ jane = entity_service.create(
69
+ "Jane Smith",
70
+ type: :person,
71
+ aliases: ["J. Smith"],
72
+ description: "Director of Engineering at Acme Corp"
73
+ )
74
+ puts "Created entity: #{jane.canonical_name} (ID: #{jane.id})"
75
+
76
+ acme = entity_service.create(
77
+ "Acme Corp",
78
+ type: :organization,
79
+ aliases: ["Acme", "Acme Corporation"],
80
+ description: "Technology company"
81
+ )
82
+ puts "Created entity: #{acme.canonical_name} (ID: #{acme.id})"
83
+
84
+ techstartup = entity_service.create(
85
+ "TechStartup Inc",
86
+ type: :organization,
87
+ aliases: ["TechStartup"],
88
+ description: "Technology startup company"
89
+ )
90
+ puts "Created entity: #{techstartup.canonical_name} (ID: #{techstartup.id})"
91
+
92
+ # Step 3: Create facts
93
+ puts "\n--- Step 3: Creating Facts ---\n"
94
+
95
+ fact_service = clock.fact_service
96
+
97
+ # Fact 1: Jane works at Acme
98
+ fact1 = fact_service.create(
99
+ "Jane Smith is Director of Engineering at Acme Corp",
100
+ valid_at: Date.new(2026, 1, 8),
101
+ extraction_method: :manual,
102
+ confidence: 1.0,
103
+ mentions: [
104
+ { entity_id: jane.id, role: :subject, text: "Jane Smith" },
105
+ { entity_id: acme.id, role: :object, text: "Acme Corp" }
106
+ ]
107
+ )
108
+ puts "Created fact: #{fact1.fact_text}"
109
+ puts " Valid from: #{fact1.valid_at}"
110
+
111
+ # Fact 2: Jane previously worked at TechStartup (now invalid)
112
+ fact2 = fact_service.create(
113
+ "Jane Smith was VP of Engineering at TechStartup Inc",
114
+ valid_at: Date.new(2023, 1, 1),
115
+ invalid_at: Date.new(2026, 1, 7),
116
+ extraction_method: :manual,
117
+ confidence: 0.9,
118
+ mentions: [
119
+ { entity_id: jane.id, role: :subject, text: "Jane Smith" },
120
+ { entity_id: techstartup.id, role: :object, text: "TechStartup Inc" }
121
+ ]
122
+ )
123
+ puts "Created fact: #{fact2.fact_text}"
124
+ puts " Valid from: #{fact2.valid_at} to #{fact2.invalid_at}"
125
+
126
+ # Link facts to source content
127
+ fact1.add_source(content: content, type: :primary, confidence: 1.0)
128
+ fact2.add_source(content: content, type: :supporting, confidence: 0.8)
129
+
130
+ # Step 4: Query facts
131
+ puts "\n--- Step 4: Querying Facts ---\n"
132
+
133
+ # Get current facts about Jane
134
+ puts "\nCurrent facts about Jane Smith:"
135
+ current_facts = fact_service.current_facts(entity: jane.id)
136
+ current_facts.each do |fact|
137
+ puts " - #{fact.fact_text}"
138
+ end
139
+
140
+ # Get facts valid at a specific date (when Jane was at TechStartup)
141
+ puts "\nFacts about Jane on January 1, 2024:"
142
+ past_facts = fact_service.facts_at(Date.new(2024, 1, 1), entity: jane.id)
143
+ past_facts.each do |fact|
144
+ puts " - #{fact.fact_text}"
145
+ end
146
+
147
+ # Get all facts (including historical)
148
+ puts "\nAll facts in the system:"
149
+ all_facts = clock.query_facts
150
+ all_facts.each do |fact|
151
+ status = fact.invalid_at ? "(historical)" : "(current)"
152
+ puts " - #{fact.fact_text} #{status}"
153
+ end
154
+
155
+ # Step 5: Get statistics
156
+ puts "\n--- Step 5: Statistics ---\n"
157
+
158
+ puts "Content stats: #{clock.content_service.stats}"
159
+ puts "Entity stats: #{entity_service.stats}"
160
+ puts "Fact stats: #{fact_service.stats}"
161
+
162
+ puts "\n" + "=" * 60
163
+ puts "Demo complete!"
164
+ puts "=" * 60
@@ -0,0 +1,216 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Entity Management Example for FactDb
5
+ #
6
+ # This example demonstrates:
7
+ # - Creating entities with various types
8
+ # - Managing aliases
9
+ # - Entity resolution (fuzzy matching)
10
+ # - Merging duplicate entities
11
+ # - Searching entities
12
+ # - Building entity timelines
13
+
14
+ require "bundler/setup"
15
+ require "fact_db"
16
+
17
+ FactDb.configure do |config|
18
+ config.database_url = ENV.fetch("DATABASE_URL", "postgres://#{ENV['USER']}@localhost/fact_db_demo")
19
+ config.fuzzy_match_threshold = 0.85
20
+ config.auto_merge_threshold = 0.95
21
+ end
22
+
23
+ # Ensure database tables exist
24
+ FactDb::Database.migrate!
25
+
26
+ clock = FactDb.new
27
+ entity_service = clock.entity_service
28
+ fact_service = clock.fact_service
29
+
30
+ puts "=" * 60
31
+ puts "FactDb Entity Management Demo"
32
+ puts "=" * 60
33
+
34
+ # Section 1: Creating Entities
35
+ puts "\n--- Section 1: Creating Entities ---\n"
36
+
37
+ # Create a person entity with aliases
38
+ person = entity_service.create(
39
+ "Robert Johnson",
40
+ type: :person,
41
+ aliases: ["Bob Johnson", "R. Johnson", "Bobby"],
42
+ attributes: { email: "rjohnson@example.com", department: "Sales" },
43
+ description: "Senior Sales Representative"
44
+ )
45
+ puts "Created person: #{person.canonical_name}"
46
+ puts " Aliases: #{person.aliases.map(&:alias_text).join(', ')}"
47
+ puts " Type: #{person.entity_type}"
48
+
49
+ # Create organization entities
50
+ org1 = entity_service.create(
51
+ "Global Industries Inc",
52
+ type: :organization,
53
+ aliases: ["Global Industries", "GII"],
54
+ description: "Fortune 500 manufacturing company"
55
+ )
56
+ puts "\nCreated organization: #{org1.canonical_name}"
57
+
58
+ # Create a location entity
59
+ location = entity_service.create(
60
+ "San Francisco",
61
+ type: :place,
62
+ aliases: ["SF", "San Fran"],
63
+ attributes: { country: "USA", state: "California" }
64
+ )
65
+ puts "Created place: #{location.canonical_name}"
66
+
67
+ # Section 2: Entity Resolution
68
+ puts "\n--- Section 2: Entity Resolution ---\n"
69
+
70
+ # Try to resolve entities using fuzzy matching
71
+ test_names = ["Bob Johnson", "R Johnson", "Robert J", "Global Ind", "GII"]
72
+
73
+ test_names.each do |name|
74
+ resolved = entity_service.resolve(name, type: nil)
75
+ if resolved
76
+ puts "Resolved '#{name}' -> #{resolved.canonical_name} (#{resolved.entity_type})"
77
+ else
78
+ puts "Could not resolve '#{name}'"
79
+ end
80
+ end
81
+
82
+ # Resolve or create - creates if not found
83
+ puts "\nUsing resolve_or_create:"
84
+ new_person = entity_service.resolve_or_create(
85
+ "Maria Garcia",
86
+ type: :person,
87
+ description: "New employee"
88
+ )
89
+ puts "Result: #{new_person.canonical_name} (new: #{new_person.created_at == new_person.updated_at})"
90
+
91
+ # Section 3: Managing Aliases
92
+ puts "\n--- Section 3: Managing Aliases ---\n"
93
+
94
+ # Add more aliases to an existing entity
95
+ entity_service.add_alias(person.id, "Robert J.", alias_type: :name, confidence: 0.9)
96
+ entity_service.add_alias(person.id, "rjohnson@example.com", alias_type: :email, confidence: 1.0)
97
+
98
+ person.reload
99
+ puts "Updated aliases for #{person.canonical_name}:"
100
+ person.aliases.each do |a|
101
+ puts " - #{a.alias_text} (#{a.alias_type}, confidence: #{a.confidence})"
102
+ end
103
+
104
+ # Section 4: Merging Duplicate Entities
105
+ puts "\n--- Section 4: Merging Entities ---\n"
106
+
107
+ # Create a duplicate entity (simulating data entry error)
108
+ duplicate = entity_service.create(
109
+ "Bob Johnson",
110
+ type: :person,
111
+ description: "Possible duplicate of Robert Johnson"
112
+ )
113
+ puts "Created potential duplicate: #{duplicate.canonical_name} (ID: #{duplicate.id})"
114
+
115
+ # Find potential duplicates
116
+ puts "\nSearching for duplicates:"
117
+ duplicates = entity_service.find_duplicates(threshold: 0.8)
118
+ duplicates.each do |dup_pair|
119
+ puts " Potential duplicate: #{dup_pair[:entity1].canonical_name} <-> #{dup_pair[:entity2].canonical_name}"
120
+ puts " Similarity: #{dup_pair[:similarity]}"
121
+ end
122
+
123
+ # Merge the duplicate into the canonical entity
124
+ puts "\nMerging entities..."
125
+ entity_service.merge(person.id, duplicate.id)
126
+ puts "Merged '#{duplicate.canonical_name}' into '#{person.canonical_name}'"
127
+
128
+ # Verify the duplicate is marked as merged
129
+ duplicate.reload
130
+ puts "Duplicate status: #{duplicate.resolution_status}"
131
+ puts "Merged into: #{duplicate.merged_into_id}"
132
+
133
+ # Section 5: Searching Entities
134
+ puts "\n--- Section 5: Searching Entities ---\n"
135
+
136
+ # Create more entities for search demo
137
+ entity_service.create("Jennifer Wilson", type: :person, description: "Marketing Manager")
138
+ entity_service.create("John Williams", type: :person, description: "Software Engineer")
139
+ entity_service.create("Wilson & Associates", type: :organization, description: "Law firm")
140
+
141
+ # Text search
142
+ puts "Search results for 'Wilson':"
143
+ results = entity_service.search("Wilson")
144
+ results.each do |entity|
145
+ puts " - #{entity.canonical_name} (#{entity.entity_type})"
146
+ end
147
+
148
+ # Filter by type
149
+ puts "\nPeople only:"
150
+ entity_service.people.each do |entity|
151
+ puts " - #{entity.canonical_name}"
152
+ end
153
+
154
+ puts "\nOrganizations only:"
155
+ entity_service.organizations.each do |entity|
156
+ puts " - #{entity.canonical_name}"
157
+ end
158
+
159
+ # Section 6: Entity Timeline
160
+ puts "\n--- Section 6: Entity Timeline ---\n"
161
+
162
+ # Create some facts about Bob to build a timeline
163
+ fact_service.create(
164
+ "Robert Johnson joined Global Industries as Sales Associate",
165
+ valid_at: Date.new(2018, 3, 1),
166
+ invalid_at: Date.new(2020, 6, 30),
167
+ mentions: [
168
+ { entity_id: person.id, role: :subject, text: "Robert Johnson" },
169
+ { entity_id: org1.id, role: :object, text: "Global Industries" }
170
+ ]
171
+ )
172
+
173
+ fact_service.create(
174
+ "Robert Johnson promoted to Senior Sales Representative at Global Industries",
175
+ valid_at: Date.new(2020, 7, 1),
176
+ mentions: [
177
+ { entity_id: person.id, role: :subject, text: "Robert Johnson" },
178
+ { entity_id: org1.id, role: :object, text: "Global Industries" }
179
+ ]
180
+ )
181
+
182
+ fact_service.create(
183
+ "Robert Johnson relocated to San Francisco office",
184
+ valid_at: Date.new(2022, 1, 15),
185
+ mentions: [
186
+ { entity_id: person.id, role: :subject, text: "Robert Johnson" },
187
+ { entity_id: location.id, role: :location, text: "San Francisco" }
188
+ ]
189
+ )
190
+
191
+ # Build timeline for the person
192
+ puts "Timeline for #{person.canonical_name}:"
193
+ timeline = entity_service.timeline_for(person.id, from: Date.new(2017, 1, 1), to: Date.today)
194
+ timeline.each do |entry|
195
+ date_range = entry[:invalid_at] ? "#{entry[:valid_at]} - #{entry[:invalid_at]}" : "#{entry[:valid_at]} - present"
196
+ puts " [#{date_range}]"
197
+ puts " #{entry[:fact_text]}"
198
+ end
199
+
200
+ # Section 7: Statistics
201
+ puts "\n--- Section 7: Entity Statistics ---\n"
202
+
203
+ stats = entity_service.stats
204
+ puts "Total entities: #{stats[:total]}"
205
+ puts "By type:"
206
+ stats[:by_type].each do |type, count|
207
+ puts " #{type}: #{count}"
208
+ end
209
+ puts "By resolution status:"
210
+ stats[:by_status].each do |status, count|
211
+ puts " #{status}: #{count}"
212
+ end
213
+
214
+ puts "\n" + "=" * 60
215
+ puts "Entity Management Demo Complete!"
216
+ puts "=" * 60