fact_db 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.envrc +1 -0
- data/CHANGELOG.md +48 -0
- data/COMMITS.md +196 -0
- data/README.md +102 -0
- data/Rakefile +41 -0
- data/db/migrate/001_enable_extensions.rb +7 -0
- data/db/migrate/002_create_contents.rb +44 -0
- data/db/migrate/003_create_entities.rb +36 -0
- data/db/migrate/004_create_entity_aliases.rb +18 -0
- data/db/migrate/005_create_facts.rb +65 -0
- data/db/migrate/006_create_entity_mentions.rb +18 -0
- data/db/migrate/007_create_fact_sources.rb +18 -0
- data/docs/api/extractors/index.md +71 -0
- data/docs/api/extractors/llm.md +162 -0
- data/docs/api/extractors/manual.md +92 -0
- data/docs/api/extractors/rule-based.md +165 -0
- data/docs/api/facts.md +300 -0
- data/docs/api/index.md +66 -0
- data/docs/api/models/content.md +165 -0
- data/docs/api/models/entity.md +202 -0
- data/docs/api/models/fact.md +270 -0
- data/docs/api/models/index.md +77 -0
- data/docs/api/pipeline/extraction.md +175 -0
- data/docs/api/pipeline/index.md +72 -0
- data/docs/api/pipeline/resolution.md +209 -0
- data/docs/api/services/content-service.md +166 -0
- data/docs/api/services/entity-service.md +202 -0
- data/docs/api/services/fact-service.md +223 -0
- data/docs/api/services/index.md +55 -0
- data/docs/architecture/database-schema.md +293 -0
- data/docs/architecture/entity-resolution.md +293 -0
- data/docs/architecture/index.md +149 -0
- data/docs/architecture/temporal-facts.md +268 -0
- data/docs/architecture/three-layer-model.md +242 -0
- data/docs/assets/css/custom.css +137 -0
- data/docs/assets/fact_db.jpg +0 -0
- data/docs/assets/images/fact_db.jpg +0 -0
- data/docs/concepts.md +183 -0
- data/docs/examples/basic-usage.md +235 -0
- data/docs/examples/hr-onboarding.md +312 -0
- data/docs/examples/index.md +64 -0
- data/docs/examples/news-analysis.md +288 -0
- data/docs/getting-started/database-setup.md +170 -0
- data/docs/getting-started/index.md +71 -0
- data/docs/getting-started/installation.md +98 -0
- data/docs/getting-started/quick-start.md +191 -0
- data/docs/guides/batch-processing.md +325 -0
- data/docs/guides/configuration.md +243 -0
- data/docs/guides/entity-management.md +364 -0
- data/docs/guides/extracting-facts.md +299 -0
- data/docs/guides/index.md +22 -0
- data/docs/guides/ingesting-content.md +252 -0
- data/docs/guides/llm-integration.md +299 -0
- data/docs/guides/temporal-queries.md +315 -0
- data/docs/index.md +121 -0
- data/examples/README.md +130 -0
- data/examples/basic_usage.rb +164 -0
- data/examples/entity_management.rb +216 -0
- data/examples/hr_system.rb +428 -0
- data/examples/rule_based_extraction.rb +258 -0
- data/examples/temporal_queries.rb +245 -0
- data/lib/fact_db/config.rb +71 -0
- data/lib/fact_db/database.rb +45 -0
- data/lib/fact_db/errors.rb +10 -0
- data/lib/fact_db/extractors/base.rb +117 -0
- data/lib/fact_db/extractors/llm_extractor.rb +179 -0
- data/lib/fact_db/extractors/manual_extractor.rb +53 -0
- data/lib/fact_db/extractors/rule_based_extractor.rb +228 -0
- data/lib/fact_db/llm/adapter.rb +109 -0
- data/lib/fact_db/models/content.rb +62 -0
- data/lib/fact_db/models/entity.rb +84 -0
- data/lib/fact_db/models/entity_alias.rb +26 -0
- data/lib/fact_db/models/entity_mention.rb +33 -0
- data/lib/fact_db/models/fact.rb +192 -0
- data/lib/fact_db/models/fact_source.rb +35 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +146 -0
- data/lib/fact_db/pipeline/resolution_pipeline.rb +129 -0
- data/lib/fact_db/resolution/entity_resolver.rb +261 -0
- data/lib/fact_db/resolution/fact_resolver.rb +259 -0
- data/lib/fact_db/services/content_service.rb +93 -0
- data/lib/fact_db/services/entity_service.rb +150 -0
- data/lib/fact_db/services/fact_service.rb +193 -0
- data/lib/fact_db/temporal/query.rb +125 -0
- data/lib/fact_db/temporal/timeline.rb +134 -0
- data/lib/fact_db/version.rb +5 -0
- data/lib/fact_db.rb +141 -0
- data/mkdocs.yml +198 -0
- metadata +288 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/* EventClock Documentation Custom Styles */
|
|
2
|
+
|
|
3
|
+
/* Code blocks */
|
|
4
|
+
.highlight pre {
|
|
5
|
+
border-radius: 6px;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
/* Tables */
|
|
9
|
+
.md-typeset table:not([class]) {
|
|
10
|
+
font-size: 0.85rem;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
.md-typeset table:not([class]) th {
|
|
14
|
+
background-color: var(--md-primary-fg-color);
|
|
15
|
+
color: var(--md-primary-bg-color);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/* Admonitions */
|
|
19
|
+
.md-typeset .admonition {
|
|
20
|
+
border-radius: 6px;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/* Method signatures */
|
|
24
|
+
.md-typeset code {
|
|
25
|
+
border-radius: 4px;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/* Navigation */
|
|
29
|
+
.md-nav__link {
|
|
30
|
+
font-size: 0.85rem;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/* Mermaid diagrams */
|
|
34
|
+
.mermaid {
|
|
35
|
+
text-align: center;
|
|
36
|
+
margin: 1.5rem 0;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/* API reference styling */
|
|
40
|
+
.md-typeset h3 code {
|
|
41
|
+
background: transparent;
|
|
42
|
+
padding: 0;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/* Parameters table */
|
|
46
|
+
.md-typeset table:not([class]) td:first-child code {
|
|
47
|
+
white-space: nowrap;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/* Example blocks */
|
|
51
|
+
.md-typeset .example {
|
|
52
|
+
background-color: var(--md-code-bg-color);
|
|
53
|
+
border-left: 4px solid var(--md-accent-fg-color);
|
|
54
|
+
padding: 1rem;
|
|
55
|
+
margin: 1rem 0;
|
|
56
|
+
border-radius: 0 6px 6px 0;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/* Cards grid (for index pages) */
|
|
60
|
+
.grid.cards {
|
|
61
|
+
display: grid;
|
|
62
|
+
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
|
63
|
+
gap: 1rem;
|
|
64
|
+
margin: 1rem 0;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/* Method dividers */
|
|
68
|
+
.md-typeset hr {
|
|
69
|
+
margin: 2rem 0;
|
|
70
|
+
border-top: 1px solid var(--md-default-fg-color--lightest);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/* Smaller font for long code blocks */
|
|
74
|
+
.md-typeset pre > code {
|
|
75
|
+
font-size: 0.8rem;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/* Timeline styling */
|
|
79
|
+
.timeline {
|
|
80
|
+
border-left: 2px solid var(--md-accent-fg-color);
|
|
81
|
+
padding-left: 1rem;
|
|
82
|
+
margin-left: 0.5rem;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
.timeline-item {
|
|
86
|
+
position: relative;
|
|
87
|
+
padding-bottom: 1rem;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
.timeline-item::before {
|
|
91
|
+
content: "";
|
|
92
|
+
position: absolute;
|
|
93
|
+
left: -1.4rem;
|
|
94
|
+
top: 0.5rem;
|
|
95
|
+
width: 10px;
|
|
96
|
+
height: 10px;
|
|
97
|
+
background: var(--md-accent-fg-color);
|
|
98
|
+
border-radius: 50%;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/* Status badges */
|
|
102
|
+
.status-canonical {
|
|
103
|
+
background: #4caf50;
|
|
104
|
+
color: white;
|
|
105
|
+
padding: 2px 8px;
|
|
106
|
+
border-radius: 4px;
|
|
107
|
+
font-size: 0.75rem;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
.status-superseded {
|
|
111
|
+
background: #ff9800;
|
|
112
|
+
color: white;
|
|
113
|
+
padding: 2px 8px;
|
|
114
|
+
border-radius: 4px;
|
|
115
|
+
font-size: 0.75rem;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
.status-corroborated {
|
|
119
|
+
background: #2196f3;
|
|
120
|
+
color: white;
|
|
121
|
+
padding: 2px 8px;
|
|
122
|
+
border-radius: 4px;
|
|
123
|
+
font-size: 0.75rem;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
.status-synthesized {
|
|
127
|
+
background: #9c27b0;
|
|
128
|
+
color: white;
|
|
129
|
+
padding: 2px 8px;
|
|
130
|
+
border-radius: 4px;
|
|
131
|
+
font-size: 0.75rem;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/* Dark mode adjustments */
|
|
135
|
+
[data-md-color-scheme="slate"] .md-typeset table:not([class]) th {
|
|
136
|
+
background-color: var(--md-primary-fg-color--dark);
|
|
137
|
+
}
|
|
Binary file
|
|
Binary file
|
data/docs/concepts.md
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# Core Concepts
|
|
2
|
+
|
|
3
|
+
FactDb is built around several key concepts that work together to provide temporal fact tracking with full provenance.
|
|
4
|
+
|
|
5
|
+
## The Three-Layer Model
|
|
6
|
+
|
|
7
|
+
### Content Layer
|
|
8
|
+
|
|
9
|
+
The content layer stores immutable source documents - emails, articles, reports, transcripts, or any text that contains facts. Content is:
|
|
10
|
+
|
|
11
|
+
- **Immutable** - Once ingested, content never changes
|
|
12
|
+
- **Deduplicated** - Identified by content hash to prevent duplicates
|
|
13
|
+
- **Timestamped** - Records when the content was captured
|
|
14
|
+
- **Searchable** - Supports full-text and semantic search via embeddings
|
|
15
|
+
|
|
16
|
+
### Entity Layer
|
|
17
|
+
|
|
18
|
+
Entities represent real-world things mentioned in content:
|
|
19
|
+
|
|
20
|
+
| Type | Examples |
|
|
21
|
+
|------|----------|
|
|
22
|
+
| `person` | Paula Chen, John Smith |
|
|
23
|
+
| `organization` | Microsoft, Acme Corp |
|
|
24
|
+
| `place` | San Francisco, Building A |
|
|
25
|
+
| `product` | Windows 11, iPhone |
|
|
26
|
+
| `event` | Q4 2024 Earnings Call |
|
|
27
|
+
|
|
28
|
+
Entities support:
|
|
29
|
+
|
|
30
|
+
- **Canonical Names** - The authoritative name for the entity
|
|
31
|
+
- **Aliases** - Alternative names and spellings
|
|
32
|
+
- **Resolution** - Matching mentions to entities via exact match, aliases, or fuzzy matching
|
|
33
|
+
- **Merging** - Combining duplicate entities when discovered
|
|
34
|
+
|
|
35
|
+
### Fact Layer
|
|
36
|
+
|
|
37
|
+
Facts are temporal assertions about entities:
|
|
38
|
+
|
|
39
|
+
```ruby
|
|
40
|
+
# A fact has:
|
|
41
|
+
# - fact_text: The assertion itself
|
|
42
|
+
# - valid_at: When the fact became true
|
|
43
|
+
# - invalid_at: When the fact stopped being true (nil if still valid)
|
|
44
|
+
# - status: canonical, superseded, corroborated, synthesized
|
|
45
|
+
# - entity_mentions: Links to entities mentioned in the fact
|
|
46
|
+
# - fact_sources: Links to source content
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Fact Lifecycle
|
|
50
|
+
|
|
51
|
+
Facts move through different statuses as information evolves:
|
|
52
|
+
|
|
53
|
+
```mermaid
|
|
54
|
+
stateDiagram-v2
|
|
55
|
+
[*] --> canonical: New fact extracted
|
|
56
|
+
canonical --> corroborated: Multiple sources confirm
|
|
57
|
+
canonical --> superseded: New information replaces
|
|
58
|
+
canonical --> synthesized: Combined with other facts
|
|
59
|
+
superseded --> [*]: Archived
|
|
60
|
+
corroborated --> superseded: Later replaced
|
|
61
|
+
|
|
62
|
+
classDef blue fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
|
|
63
|
+
classDef green fill:#047857,stroke:#065F46,color:#FFFFFF
|
|
64
|
+
classDef red fill:#B91C1C,stroke:#991B1B,color:#FFFFFF
|
|
65
|
+
classDef yellow fill:#B45309,stroke:#92400E,color:#FFFFFF
|
|
66
|
+
|
|
67
|
+
class canonical blue
|
|
68
|
+
class corroborated green
|
|
69
|
+
class superseded red
|
|
70
|
+
class synthesized yellow
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Canonical
|
|
74
|
+
|
|
75
|
+
A canonical fact is the current authoritative version:
|
|
76
|
+
|
|
77
|
+
```ruby
|
|
78
|
+
fact = facts.fact_service.create(
|
|
79
|
+
"Paula is a Principal Engineer",
|
|
80
|
+
valid_at: Date.parse("2024-01-10"),
|
|
81
|
+
mentions: [{ entity: paula, role: "subject" }]
|
|
82
|
+
)
|
|
83
|
+
# fact.status => "canonical"
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Superseded
|
|
87
|
+
|
|
88
|
+
When information changes, the old fact is superseded:
|
|
89
|
+
|
|
90
|
+
```ruby
|
|
91
|
+
# Paula gets promoted
|
|
92
|
+
new_fact = facts.fact_service.resolver.supersede(
|
|
93
|
+
fact.id,
|
|
94
|
+
"Paula is a Senior Principal Engineer",
|
|
95
|
+
valid_at: Date.parse("2024-06-01")
|
|
96
|
+
)
|
|
97
|
+
# old fact.status => "superseded"
|
|
98
|
+
# old fact.invalid_at => "2024-06-01"
|
|
99
|
+
# new_fact.status => "canonical"
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Corroborated
|
|
103
|
+
|
|
104
|
+
Facts confirmed by multiple sources gain higher confidence:
|
|
105
|
+
|
|
106
|
+
```ruby
|
|
107
|
+
facts.fact_service.resolver.corroborate(fact.id, other_fact.id)
|
|
108
|
+
# After 2+ corroborations: fact.status => "corroborated"
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Synthesized
|
|
112
|
+
|
|
113
|
+
Derived facts combine information from multiple sources:
|
|
114
|
+
|
|
115
|
+
```ruby
|
|
116
|
+
synthesized = facts.fact_service.resolver.synthesize(
|
|
117
|
+
[fact1.id, fact2.id, fact3.id],
|
|
118
|
+
"Paula worked at Microsoft from Jan 2024, promoted to Senior in June 2024",
|
|
119
|
+
valid_at: Date.parse("2024-01-10")
|
|
120
|
+
)
|
|
121
|
+
# synthesized.status => "synthesized"
|
|
122
|
+
# synthesized.derived_from_ids => [fact1.id, fact2.id, fact3.id]
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Temporal Queries
|
|
126
|
+
|
|
127
|
+
The power of FactDb is querying facts across time:
|
|
128
|
+
|
|
129
|
+
```ruby
|
|
130
|
+
# What do we know now?
|
|
131
|
+
current_facts = facts.query_facts(entity: paula.id)
|
|
132
|
+
|
|
133
|
+
# What did we know on March 1st?
|
|
134
|
+
march_facts = facts.facts_at(Date.parse("2024-03-01"), entity: paula.id)
|
|
135
|
+
|
|
136
|
+
# What's the full timeline?
|
|
137
|
+
timeline = facts.timeline_for(paula.id, from: "2024-01-01", to: "2024-12-31")
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Entity Resolution
|
|
141
|
+
|
|
142
|
+
When extracting facts, mentions must be resolved to entities:
|
|
143
|
+
|
|
144
|
+
```mermaid
|
|
145
|
+
graph LR
|
|
146
|
+
M1["'Paula'"] --> R{Resolver}
|
|
147
|
+
M2["'P. Chen'"] --> R
|
|
148
|
+
M3["'Paula Chen'"] --> R
|
|
149
|
+
R --> E[Paula Chen Entity]
|
|
150
|
+
|
|
151
|
+
style M1 fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
|
|
152
|
+
style M2 fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
|
|
153
|
+
style M3 fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
|
|
154
|
+
style R fill:#B45309,stroke:#92400E,color:#FFFFFF
|
|
155
|
+
style E fill:#047857,stroke:#065F46,color:#FFFFFF
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Resolution strategies (in order):
|
|
159
|
+
|
|
160
|
+
1. **Exact Match** - Direct canonical name match
|
|
161
|
+
2. **Alias Match** - Match against registered aliases
|
|
162
|
+
3. **Fuzzy Match** - Levenshtein distance for typos/variations
|
|
163
|
+
4. **Create New** - If no match, optionally create new entity
|
|
164
|
+
|
|
165
|
+
## Audit Trails
|
|
166
|
+
|
|
167
|
+
Every fact maintains links to its source:
|
|
168
|
+
|
|
169
|
+
```ruby
|
|
170
|
+
fact.fact_sources.each do |source|
|
|
171
|
+
puts "Source: #{source.content.title}"
|
|
172
|
+
puts "Type: #{source.source_type}" # primary, supporting, contradicting
|
|
173
|
+
puts "Excerpt: #{source.excerpt}"
|
|
174
|
+
puts "Confidence: #{source.confidence}"
|
|
175
|
+
end
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
This enables:
|
|
179
|
+
|
|
180
|
+
- Tracing facts back to original documents
|
|
181
|
+
- Verifying information
|
|
182
|
+
- Understanding confidence levels
|
|
183
|
+
- Investigating contradictions
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# Basic Usage
|
|
2
|
+
|
|
3
|
+
A simple introduction to FactDb's core functionality.
|
|
4
|
+
|
|
5
|
+
## Setup
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
require 'fact_db'
|
|
9
|
+
|
|
10
|
+
# Configure
|
|
11
|
+
FactDb.configure do |config|
|
|
12
|
+
config.database_url = ENV['DATABASE_URL']
|
|
13
|
+
config.llm_provider = :openai
|
|
14
|
+
config.llm_api_key = ENV['OPENAI_API_KEY']
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Create facts instance
|
|
18
|
+
facts = FactDb.new
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Ingest Content
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
# Ingest an email
|
|
25
|
+
email = facts.ingest(
|
|
26
|
+
<<~TEXT,
|
|
27
|
+
Hi team,
|
|
28
|
+
|
|
29
|
+
I'm excited to announce that Paula Chen has accepted our offer
|
|
30
|
+
to join Microsoft as Principal Engineer starting January 10, 2024.
|
|
31
|
+
|
|
32
|
+
She'll be part of the Platform team reporting to Sarah Johnson.
|
|
33
|
+
|
|
34
|
+
Best,
|
|
35
|
+
HR
|
|
36
|
+
TEXT
|
|
37
|
+
type: :email,
|
|
38
|
+
title: "New Hire Announcement - Paula Chen",
|
|
39
|
+
captured_at: Time.current
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
puts "Ingested content ID: #{email.id}"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Create Entities
|
|
46
|
+
|
|
47
|
+
```ruby
|
|
48
|
+
# Create people
|
|
49
|
+
paula = facts.entity_service.create(
|
|
50
|
+
"Paula Chen",
|
|
51
|
+
type: :person,
|
|
52
|
+
aliases: ["Paula"]
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
sarah = facts.entity_service.create(
|
|
56
|
+
"Sarah Johnson",
|
|
57
|
+
type: :person,
|
|
58
|
+
aliases: ["Sarah"]
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Create organization
|
|
62
|
+
microsoft = facts.entity_service.create(
|
|
63
|
+
"Microsoft",
|
|
64
|
+
type: :organization,
|
|
65
|
+
aliases: ["MS", "MSFT"]
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
platform_team = facts.entity_service.create(
|
|
69
|
+
"Platform Team",
|
|
70
|
+
type: :organization
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
puts "Created entities: Paula, Sarah, Microsoft, Platform Team"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Extract Facts Manually
|
|
77
|
+
|
|
78
|
+
```ruby
|
|
79
|
+
# Create facts with explicit links
|
|
80
|
+
fact1 = facts.fact_service.create(
|
|
81
|
+
"Paula Chen joined Microsoft as Principal Engineer",
|
|
82
|
+
valid_at: Date.parse("2024-01-10"),
|
|
83
|
+
mentions: [
|
|
84
|
+
{ entity: paula, role: "subject", text: "Paula Chen" },
|
|
85
|
+
{ entity: microsoft, role: "organization", text: "Microsoft" }
|
|
86
|
+
],
|
|
87
|
+
sources: [
|
|
88
|
+
{ content: email, type: "primary" }
|
|
89
|
+
]
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
fact2 = facts.fact_service.create(
|
|
93
|
+
"Paula Chen reports to Sarah Johnson",
|
|
94
|
+
valid_at: Date.parse("2024-01-10"),
|
|
95
|
+
mentions: [
|
|
96
|
+
{ entity: paula, role: "subject", text: "Paula Chen" },
|
|
97
|
+
{ entity: sarah, role: "object", text: "Sarah Johnson" }
|
|
98
|
+
],
|
|
99
|
+
sources: [
|
|
100
|
+
{ content: email, type: "primary" }
|
|
101
|
+
]
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
fact3 = facts.fact_service.create(
|
|
105
|
+
"Paula Chen is on the Platform Team",
|
|
106
|
+
valid_at: Date.parse("2024-01-10"),
|
|
107
|
+
mentions: [
|
|
108
|
+
{ entity: paula, role: "subject", text: "Paula Chen" },
|
|
109
|
+
{ entity: platform_team, role: "organization", text: "Platform Team" }
|
|
110
|
+
],
|
|
111
|
+
sources: [
|
|
112
|
+
{ content: email, type: "primary" }
|
|
113
|
+
]
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
puts "Created #{3} facts"
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Extract Facts with LLM
|
|
120
|
+
|
|
121
|
+
```ruby
|
|
122
|
+
# Alternative: let LLM extract facts
|
|
123
|
+
extracted = facts.extract_facts(email.id, extractor: :llm)
|
|
124
|
+
|
|
125
|
+
puts "LLM extracted #{extracted.count} facts:"
|
|
126
|
+
extracted.each do |fact|
|
|
127
|
+
puts " - #{fact.fact_text}"
|
|
128
|
+
end
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Query Facts
|
|
132
|
+
|
|
133
|
+
```ruby
|
|
134
|
+
# Current facts about Paula
|
|
135
|
+
puts "\nCurrent facts about Paula:"
|
|
136
|
+
facts.current_facts_for(paula.id).each do |fact|
|
|
137
|
+
puts " - #{fact.fact_text}"
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Facts about Microsoft
|
|
141
|
+
puts "\nFacts about Microsoft:"
|
|
142
|
+
facts.query_facts(entity: microsoft.id).each do |fact|
|
|
143
|
+
puts " - #{fact.fact_text}"
|
|
144
|
+
end
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Resolve Entity
|
|
148
|
+
|
|
149
|
+
```ruby
|
|
150
|
+
# Resolve a name
|
|
151
|
+
resolved = facts.resolve_entity("Paula")
|
|
152
|
+
puts "\n'Paula' resolves to: #{resolved&.canonical_name}"
|
|
153
|
+
|
|
154
|
+
# Type-constrained resolution
|
|
155
|
+
person = facts.resolve_entity("Paula", type: :person)
|
|
156
|
+
puts "'Paula' as person: #{person&.canonical_name}"
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Update Facts (Supersession)
|
|
160
|
+
|
|
161
|
+
```ruby
|
|
162
|
+
# Paula gets promoted
|
|
163
|
+
new_fact = facts.fact_service.resolver.supersede(
|
|
164
|
+
fact1.id,
|
|
165
|
+
"Paula Chen is Senior Principal Engineer at Microsoft",
|
|
166
|
+
valid_at: Date.parse("2024-06-01")
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
puts "\nSuperseded fact:"
|
|
170
|
+
puts " Old: #{fact1.reload.fact_text} (#{fact1.status})"
|
|
171
|
+
puts " New: #{new_fact.fact_text} (#{new_fact.status})"
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Timeline
|
|
175
|
+
|
|
176
|
+
```ruby
|
|
177
|
+
# Build timeline
|
|
178
|
+
puts "\nPaula's timeline:"
|
|
179
|
+
facts.timeline_for(paula.id).each do |fact|
|
|
180
|
+
valid = fact.invalid_at ? "#{fact.valid_at} - #{fact.invalid_at}" : "#{fact.valid_at} - present"
|
|
181
|
+
puts " #{valid}: #{fact.fact_text}"
|
|
182
|
+
end
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## Historical Query
|
|
186
|
+
|
|
187
|
+
```ruby
|
|
188
|
+
# What did we know before promotion?
|
|
189
|
+
puts "\nFacts about Paula on March 1, 2024:"
|
|
190
|
+
facts.facts_at(Date.parse("2024-03-01"), entity: paula.id).each do |fact|
|
|
191
|
+
puts " - #{fact.fact_text}"
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# What do we know after promotion?
|
|
195
|
+
puts "\nFacts about Paula on July 1, 2024:"
|
|
196
|
+
facts.facts_at(Date.parse("2024-07-01"), entity: paula.id).each do |fact|
|
|
197
|
+
puts " - #{fact.fact_text}"
|
|
198
|
+
end
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## Complete Script
|
|
202
|
+
|
|
203
|
+
```ruby
|
|
204
|
+
#!/usr/bin/env ruby
|
|
205
|
+
require 'fact_db'
|
|
206
|
+
|
|
207
|
+
# Setup
|
|
208
|
+
FactDb.configure do |config|
|
|
209
|
+
config.database_url = ENV['DATABASE_URL'] || 'postgresql://localhost/fact_db'
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
facts = FactDb.new
|
|
213
|
+
|
|
214
|
+
# Ingest
|
|
215
|
+
content = facts.ingest("Paula joined Microsoft on Jan 10, 2024", type: :note)
|
|
216
|
+
|
|
217
|
+
# Create entities
|
|
218
|
+
paula = facts.entity_service.create("Paula", type: :person)
|
|
219
|
+
microsoft = facts.entity_service.create("Microsoft", type: :organization)
|
|
220
|
+
|
|
221
|
+
# Create fact
|
|
222
|
+
fact = facts.fact_service.create(
|
|
223
|
+
"Paula joined Microsoft",
|
|
224
|
+
valid_at: Date.parse("2024-01-10"),
|
|
225
|
+
mentions: [
|
|
226
|
+
{ entity: paula, role: "subject", text: "Paula" },
|
|
227
|
+
{ entity: microsoft, role: "organization", text: "Microsoft" }
|
|
228
|
+
],
|
|
229
|
+
sources: [{ content: content, type: "primary" }]
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Query
|
|
233
|
+
puts "Current facts about Paula:"
|
|
234
|
+
facts.current_facts_for(paula.id).each { |f| puts " - #{f.fact_text}" }
|
|
235
|
+
```
|