fact_db 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.envrc +2 -0
- data/.yardopts +5 -0
- data/CHANGELOG.md +64 -0
- data/README.md +107 -6
- data/Rakefile +243 -10
- data/db/migrate/001_enable_extensions.rb +1 -0
- data/db/migrate/002_create_sources.rb +49 -0
- data/db/migrate/003_create_entities.rb +27 -15
- data/db/migrate/004_create_entity_aliases.rb +20 -7
- data/db/migrate/005_create_facts.rb +37 -21
- data/db/migrate/006_create_entity_mentions.rb +14 -6
- data/db/migrate/007_create_fact_sources.rb +16 -8
- data/docs/api/extractors/index.md +5 -5
- data/docs/api/extractors/llm.md +17 -17
- data/docs/api/extractors/rule-based.md +14 -14
- data/docs/api/facts.md +20 -20
- data/docs/api/index.md +4 -4
- data/docs/api/models/entity.md +21 -21
- data/docs/api/models/fact.md +15 -15
- data/docs/api/models/index.md +7 -7
- data/docs/api/models/{content.md → source.md} +29 -29
- data/docs/api/pipeline/extraction.md +25 -25
- data/docs/api/pipeline/index.md +1 -1
- data/docs/api/pipeline/resolution.md +4 -4
- data/docs/api/services/entity-service.md +20 -20
- data/docs/api/services/fact-service.md +12 -12
- data/docs/api/services/index.md +5 -5
- data/docs/api/services/{content-service.md → source-service.md} +27 -27
- data/docs/architecture/database-schema.md +46 -46
- data/docs/architecture/entity-resolution.md +6 -6
- data/docs/architecture/index.md +10 -10
- data/docs/architecture/temporal-facts.md +5 -5
- data/docs/architecture/three-layer-model.md +17 -17
- data/docs/concepts.md +6 -6
- data/docs/examples/basic-usage.md +20 -20
- data/docs/examples/hr-onboarding.md +17 -17
- data/docs/examples/index.md +4 -4
- data/docs/examples/news-analysis.md +23 -23
- data/docs/getting-started/database-setup.md +28 -20
- data/docs/getting-started/index.md +3 -3
- data/docs/getting-started/quick-start.md +33 -30
- data/docs/guides/batch-processing.md +26 -26
- data/docs/guides/configuration.md +158 -77
- data/docs/guides/entity-management.md +14 -14
- data/docs/guides/extracting-facts.md +28 -28
- data/docs/guides/ingesting-content.md +14 -14
- data/docs/guides/llm-integration.md +40 -32
- data/docs/guides/temporal-queries.md +11 -11
- data/docs/index.md +6 -2
- data/examples/.envrc +4 -0
- data/examples/.gitignore +1 -0
- data/examples/001_configuration.rb +312 -0
- data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
- data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
- data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
- data/examples/040_output_formats.rb +177 -0
- data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
- data/examples/060_fluent_temporal_api.rb +217 -0
- data/examples/070_introspection.rb +252 -0
- data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
- data/examples/090_ingest_demo.rb +515 -0
- data/examples/100_query_context.rb +668 -0
- data/examples/110_prove_it.rb +204 -0
- data/examples/120_dump_database.rb +358 -0
- data/examples/130_rag_feedback_loop.rb +858 -0
- data/examples/README.md +229 -15
- data/examples/data/lincoln_associates.md +201 -0
- data/examples/data/lincoln_biography.md +66 -0
- data/examples/data/lincoln_cabinet.md +243 -0
- data/examples/data/lincoln_family.md +163 -0
- data/examples/data/lincoln_military.md +241 -0
- data/examples/data/lincoln_todd_family.md +136 -0
- data/examples/ingest_reporter.rb +335 -0
- data/examples/utilities.rb +182 -0
- data/lib/fact_db/config/defaults.yml +254 -0
- data/lib/fact_db/config.rb +94 -35
- data/lib/fact_db/database.rb +98 -8
- data/lib/fact_db/extractors/base.rb +106 -21
- data/lib/fact_db/extractors/llm_extractor.rb +35 -63
- data/lib/fact_db/extractors/manual_extractor.rb +46 -6
- data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
- data/lib/fact_db/llm/adapter.rb +3 -3
- data/lib/fact_db/models/entity.rb +94 -22
- data/lib/fact_db/models/entity_alias.rb +41 -7
- data/lib/fact_db/models/entity_mention.rb +34 -1
- data/lib/fact_db/models/fact.rb +259 -28
- data/lib/fact_db/models/fact_source.rb +43 -9
- data/lib/fact_db/models/source.rb +113 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
- data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
- data/lib/fact_db/query_result.rb +202 -0
- data/lib/fact_db/resolution/entity_resolver.rb +139 -39
- data/lib/fact_db/resolution/fact_resolver.rb +86 -14
- data/lib/fact_db/services/entity_service.rb +246 -37
- data/lib/fact_db/services/fact_service.rb +254 -17
- data/lib/fact_db/services/source_service.rb +164 -0
- data/lib/fact_db/temporal/query.rb +71 -7
- data/lib/fact_db/temporal/query_builder.rb +69 -0
- data/lib/fact_db/temporal/timeline.rb +102 -11
- data/lib/fact_db/transformers/base.rb +77 -0
- data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
- data/lib/fact_db/transformers/json_transformer.rb +17 -0
- data/lib/fact_db/transformers/raw_transformer.rb +35 -0
- data/lib/fact_db/transformers/text_transformer.rb +114 -0
- data/lib/fact_db/transformers/triple_transformer.rb +138 -0
- data/lib/fact_db/validation/alias_filter.rb +185 -0
- data/lib/fact_db/version.rb +1 -1
- data/lib/fact_db.rb +281 -30
- data/mkdocs.yml +2 -2
- metadata +60 -16
- data/db/migrate/002_create_contents.rb +0 -44
- data/lib/fact_db/models/content.rb +0 -62
- data/lib/fact_db/services/content_service.rb +0 -93
data/docs/architecture/index.md
CHANGED
|
@@ -48,7 +48,7 @@ graph TB
|
|
|
48
48
|
end
|
|
49
49
|
|
|
50
50
|
subgraph FactDb["FactDb Core"]
|
|
51
|
-
|
|
51
|
+
SS[SourceService]
|
|
52
52
|
ES[EntityService]
|
|
53
53
|
FS[FactService]
|
|
54
54
|
|
|
@@ -70,17 +70,17 @@ graph TB
|
|
|
70
70
|
end
|
|
71
71
|
|
|
72
72
|
subgraph Storage["PostgreSQL + pgvector"]
|
|
73
|
-
|
|
73
|
+
Sources[(Sources)]
|
|
74
74
|
Entities[(Entities)]
|
|
75
75
|
Facts[(Facts)]
|
|
76
76
|
end
|
|
77
77
|
|
|
78
|
-
Email -->
|
|
79
|
-
Doc -->
|
|
80
|
-
News -->
|
|
81
|
-
API -->
|
|
78
|
+
Email --> SS
|
|
79
|
+
Doc --> SS
|
|
80
|
+
News --> SS
|
|
81
|
+
API --> SS
|
|
82
82
|
|
|
83
|
-
|
|
83
|
+
SS --> Sources
|
|
84
84
|
ES --> Entities
|
|
85
85
|
FS --> Facts
|
|
86
86
|
|
|
@@ -93,7 +93,7 @@ graph TB
|
|
|
93
93
|
style Doc fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
|
|
94
94
|
style News fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
|
|
95
95
|
style API fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
|
|
96
|
-
style
|
|
96
|
+
style SS fill:#B45309,stroke:#92400E,color:#FFFFFF
|
|
97
97
|
style ES fill:#B45309,stroke:#92400E,color:#FFFFFF
|
|
98
98
|
style FS fill:#B45309,stroke:#92400E,color:#FFFFFF
|
|
99
99
|
style ME fill:#047857,stroke:#065F46,color:#FFFFFF
|
|
@@ -103,7 +103,7 @@ graph TB
|
|
|
103
103
|
style FR fill:#C2410C,stroke:#9A3412,color:#FFFFFF
|
|
104
104
|
style EP fill:#7C3AED,stroke:#6D28D9,color:#FFFFFF
|
|
105
105
|
style RP fill:#7C3AED,stroke:#6D28D9,color:#FFFFFF
|
|
106
|
-
style
|
|
106
|
+
style Sources fill:#B91C1C,stroke:#991B1B,color:#FFFFFF
|
|
107
107
|
style Entities fill:#B91C1C,stroke:#991B1B,color:#FFFFFF
|
|
108
108
|
style Facts fill:#B91C1C,stroke:#991B1B,color:#FFFFFF
|
|
109
109
|
```
|
|
@@ -112,7 +112,7 @@ graph TB
|
|
|
112
112
|
|
|
113
113
|
### Services
|
|
114
114
|
|
|
115
|
-
- **
|
|
115
|
+
- **SourceService** - Ingests and manages source content
|
|
116
116
|
- **EntityService** - Creates and resolves entities
|
|
117
117
|
- **FactService** - Extracts, creates, and queries facts
|
|
118
118
|
|
|
@@ -20,7 +20,7 @@ Facts with `invalid_at: nil` are currently valid:
|
|
|
20
20
|
```ruby
|
|
21
21
|
# Paula is currently a Principal Engineer
|
|
22
22
|
{
|
|
23
|
-
|
|
23
|
+
text: "Paula Chen is Principal Engineer",
|
|
24
24
|
valid_at: "2024-01-10",
|
|
25
25
|
invalid_at: nil
|
|
26
26
|
}
|
|
@@ -33,7 +33,7 @@ Facts with both dates represent historical periods:
|
|
|
33
33
|
```ruby
|
|
34
34
|
# Paula was Senior Engineer before promotion
|
|
35
35
|
{
|
|
36
|
-
|
|
36
|
+
text: "Paula Chen is Senior Engineer",
|
|
37
37
|
valid_at: "2022-03-15",
|
|
38
38
|
invalid_at: "2024-01-10"
|
|
39
39
|
}
|
|
@@ -168,7 +168,7 @@ timeline = facts.timeline_for(paula.id)
|
|
|
168
168
|
|
|
169
169
|
# Returns chronological list of facts
|
|
170
170
|
timeline.each do |entry|
|
|
171
|
-
puts "#{entry.valid_at}: #{entry.
|
|
171
|
+
puts "#{entry.valid_at}: #{entry.text}"
|
|
172
172
|
puts " Until: #{entry.invalid_at || 'present'}"
|
|
173
173
|
end
|
|
174
174
|
```
|
|
@@ -198,8 +198,8 @@ conflicts = facts.fact_service.resolver.find_conflicts(
|
|
|
198
198
|
|
|
199
199
|
conflicts.each do |conflict|
|
|
200
200
|
puts "Potential conflict:"
|
|
201
|
-
puts " Fact 1: #{conflict[:fact1].
|
|
202
|
-
puts " Fact 2: #{conflict[:fact2].
|
|
201
|
+
puts " Fact 1: #{conflict[:fact1].text}"
|
|
202
|
+
puts " Fact 2: #{conflict[:fact2].text}"
|
|
203
203
|
puts " Similarity: #{conflict[:similarity]}"
|
|
204
204
|
end
|
|
205
205
|
```
|
|
@@ -68,7 +68,7 @@ The content layer stores raw source material that serves as evidence for facts.
|
|
|
68
68
|
### Example
|
|
69
69
|
|
|
70
70
|
```ruby
|
|
71
|
-
|
|
71
|
+
source = facts.ingest(
|
|
72
72
|
"Paula Chen accepted the offer for Principal Engineer...",
|
|
73
73
|
type: :email,
|
|
74
74
|
title: "RE: Offer Letter - Paula Chen",
|
|
@@ -142,7 +142,7 @@ Facts are temporal assertions about entities, extracted from content.
|
|
|
142
142
|
|
|
143
143
|
```ruby
|
|
144
144
|
fact = Models::Fact.new(
|
|
145
|
-
|
|
145
|
+
text: "Paula Chen is Principal Engineer at Microsoft",
|
|
146
146
|
valid_at: Date.parse("2024-01-10"),
|
|
147
147
|
invalid_at: nil, # Still valid
|
|
148
148
|
status: "canonical",
|
|
@@ -185,11 +185,11 @@ Facts connect to both content and entities:
|
|
|
185
185
|
|
|
186
186
|
```mermaid
|
|
187
187
|
graph LR
|
|
188
|
-
|
|
188
|
+
S[Source] -->|fact_sources| F[Fact]
|
|
189
189
|
F -->|entity_mentions| E1[Entity 1]
|
|
190
190
|
F -->|entity_mentions| E2[Entity 2]
|
|
191
191
|
|
|
192
|
-
style
|
|
192
|
+
style S fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
|
|
193
193
|
style F fill:#B91C1C,stroke:#991B1B,color:#FFFFFF
|
|
194
194
|
style E1 fill:#047857,stroke:#065F46,color:#FFFFFF
|
|
195
195
|
style E2 fill:#047857,stroke:#065F46,color:#FFFFFF
|
|
@@ -197,18 +197,18 @@ graph LR
|
|
|
197
197
|
|
|
198
198
|
## Layer Interactions
|
|
199
199
|
|
|
200
|
-
###
|
|
200
|
+
### Source to Facts
|
|
201
201
|
|
|
202
|
-
Facts are extracted from
|
|
202
|
+
Facts are extracted from sources and maintain source links:
|
|
203
203
|
|
|
204
204
|
```ruby
|
|
205
|
-
# Extract facts from
|
|
206
|
-
extracted = facts.extract_facts(
|
|
205
|
+
# Extract facts from source
|
|
206
|
+
extracted = facts.extract_facts(source.id, extractor: :llm)
|
|
207
207
|
|
|
208
208
|
# Each fact links back to source
|
|
209
|
-
extracted.first.fact_sources.each do |
|
|
210
|
-
puts source.
|
|
211
|
-
puts
|
|
209
|
+
extracted.first.fact_sources.each do |fs|
|
|
210
|
+
puts fs.source.title
|
|
211
|
+
puts fs.excerpt
|
|
212
212
|
end
|
|
213
213
|
```
|
|
214
214
|
|
|
@@ -218,7 +218,7 @@ Facts mention entities with specific roles:
|
|
|
218
218
|
|
|
219
219
|
```ruby
|
|
220
220
|
fact.entity_mentions.each do |mention|
|
|
221
|
-
puts "#{mention.entity.
|
|
221
|
+
puts "#{mention.entity.name}: #{mention.mention_role}"
|
|
222
222
|
end
|
|
223
223
|
# Output:
|
|
224
224
|
# Paula Chen: subject
|
|
@@ -231,12 +231,12 @@ end
|
|
|
231
231
|
Query across all layers:
|
|
232
232
|
|
|
233
233
|
```ruby
|
|
234
|
-
# Find all
|
|
235
|
-
|
|
234
|
+
# Find all sources about an entity
|
|
235
|
+
sources = facts.source_service.mentioning_entity(paula.id)
|
|
236
236
|
|
|
237
|
-
# Find all entities mentioned in
|
|
238
|
-
entities = facts.entity_service.
|
|
237
|
+
# Find all entities mentioned in source
|
|
238
|
+
entities = facts.entity_service.in_source(source.id)
|
|
239
239
|
|
|
240
240
|
# Find all facts from a specific source
|
|
241
|
-
source_facts = facts.fact_service.
|
|
241
|
+
source_facts = facts.fact_service.from_source(source.id)
|
|
242
242
|
```
|
data/docs/concepts.md
CHANGED
|
@@ -38,7 +38,7 @@ Facts are temporal assertions about entities:
|
|
|
38
38
|
|
|
39
39
|
```ruby
|
|
40
40
|
# A fact has:
|
|
41
|
-
# -
|
|
41
|
+
# - text: The assertion itself
|
|
42
42
|
# - valid_at: When the fact became true
|
|
43
43
|
# - invalid_at: When the fact stopped being true (nil if still valid)
|
|
44
44
|
# - status: canonical, superseded, corroborated, synthesized
|
|
@@ -167,11 +167,11 @@ Resolution strategies (in order):
|
|
|
167
167
|
Every fact maintains links to its source:
|
|
168
168
|
|
|
169
169
|
```ruby
|
|
170
|
-
fact.fact_sources.each do |
|
|
171
|
-
puts "Source: #{source.
|
|
172
|
-
puts "
|
|
173
|
-
puts "Excerpt: #{
|
|
174
|
-
puts "Confidence: #{
|
|
170
|
+
fact.fact_sources.each do |fact_source|
|
|
171
|
+
puts "Source: #{fact_source.source.title}"
|
|
172
|
+
puts "Kind: #{fact_source.kind}" # primary, supporting, corroborating
|
|
173
|
+
puts "Excerpt: #{fact_source.excerpt}"
|
|
174
|
+
puts "Confidence: #{fact_source.confidence}"
|
|
175
175
|
end
|
|
176
176
|
```
|
|
177
177
|
|
|
@@ -9,9 +9,9 @@ require 'fact_db'
|
|
|
9
9
|
|
|
10
10
|
# Configure
|
|
11
11
|
FactDb.configure do |config|
|
|
12
|
-
config.
|
|
13
|
-
config.
|
|
14
|
-
config.
|
|
12
|
+
config.database.url = ENV['DATABASE_URL']
|
|
13
|
+
config.llm.provider = :openai
|
|
14
|
+
config.llm.api_key = ENV['OPENAI_API_KEY']
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
# Create facts instance
|
|
@@ -85,7 +85,7 @@ fact1 = facts.fact_service.create(
|
|
|
85
85
|
{ entity: microsoft, role: "organization", text: "Microsoft" }
|
|
86
86
|
],
|
|
87
87
|
sources: [
|
|
88
|
-
{
|
|
88
|
+
{ source: email, type: "primary" }
|
|
89
89
|
]
|
|
90
90
|
)
|
|
91
91
|
|
|
@@ -97,7 +97,7 @@ fact2 = facts.fact_service.create(
|
|
|
97
97
|
{ entity: sarah, role: "object", text: "Sarah Johnson" }
|
|
98
98
|
],
|
|
99
99
|
sources: [
|
|
100
|
-
{
|
|
100
|
+
{ source: email, type: "primary" }
|
|
101
101
|
]
|
|
102
102
|
)
|
|
103
103
|
|
|
@@ -109,7 +109,7 @@ fact3 = facts.fact_service.create(
|
|
|
109
109
|
{ entity: platform_team, role: "organization", text: "Platform Team" }
|
|
110
110
|
],
|
|
111
111
|
sources: [
|
|
112
|
-
{
|
|
112
|
+
{ source: email, type: "primary" }
|
|
113
113
|
]
|
|
114
114
|
)
|
|
115
115
|
|
|
@@ -124,7 +124,7 @@ extracted = facts.extract_facts(email.id, extractor: :llm)
|
|
|
124
124
|
|
|
125
125
|
puts "LLM extracted #{extracted.count} facts:"
|
|
126
126
|
extracted.each do |fact|
|
|
127
|
-
puts " - #{fact.
|
|
127
|
+
puts " - #{fact.text}"
|
|
128
128
|
end
|
|
129
129
|
```
|
|
130
130
|
|
|
@@ -134,13 +134,13 @@ end
|
|
|
134
134
|
# Current facts about Paula
|
|
135
135
|
puts "\nCurrent facts about Paula:"
|
|
136
136
|
facts.current_facts_for(paula.id).each do |fact|
|
|
137
|
-
puts " - #{fact.
|
|
137
|
+
puts " - #{fact.text}"
|
|
138
138
|
end
|
|
139
139
|
|
|
140
140
|
# Facts about Microsoft
|
|
141
141
|
puts "\nFacts about Microsoft:"
|
|
142
142
|
facts.query_facts(entity: microsoft.id).each do |fact|
|
|
143
|
-
puts " - #{fact.
|
|
143
|
+
puts " - #{fact.text}"
|
|
144
144
|
end
|
|
145
145
|
```
|
|
146
146
|
|
|
@@ -149,11 +149,11 @@ end
|
|
|
149
149
|
```ruby
|
|
150
150
|
# Resolve a name
|
|
151
151
|
resolved = facts.resolve_entity("Paula")
|
|
152
|
-
puts "\n'Paula' resolves to: #{resolved&.
|
|
152
|
+
puts "\n'Paula' resolves to: #{resolved&.name}"
|
|
153
153
|
|
|
154
154
|
# Type-constrained resolution
|
|
155
155
|
person = facts.resolve_entity("Paula", type: :person)
|
|
156
|
-
puts "'Paula' as person: #{person&.
|
|
156
|
+
puts "'Paula' as person: #{person&.name}"
|
|
157
157
|
```
|
|
158
158
|
|
|
159
159
|
## Update Facts (Supersession)
|
|
@@ -167,8 +167,8 @@ new_fact = facts.fact_service.resolver.supersede(
|
|
|
167
167
|
)
|
|
168
168
|
|
|
169
169
|
puts "\nSuperseded fact:"
|
|
170
|
-
puts " Old: #{fact1.reload.
|
|
171
|
-
puts " New: #{new_fact.
|
|
170
|
+
puts " Old: #{fact1.reload.text} (#{fact1.status})"
|
|
171
|
+
puts " New: #{new_fact.text} (#{new_fact.status})"
|
|
172
172
|
```
|
|
173
173
|
|
|
174
174
|
## Timeline
|
|
@@ -178,7 +178,7 @@ puts " New: #{new_fact.fact_text} (#{new_fact.status})"
|
|
|
178
178
|
puts "\nPaula's timeline:"
|
|
179
179
|
facts.timeline_for(paula.id).each do |fact|
|
|
180
180
|
valid = fact.invalid_at ? "#{fact.valid_at} - #{fact.invalid_at}" : "#{fact.valid_at} - present"
|
|
181
|
-
puts " #{valid}: #{fact.
|
|
181
|
+
puts " #{valid}: #{fact.text}"
|
|
182
182
|
end
|
|
183
183
|
```
|
|
184
184
|
|
|
@@ -188,13 +188,13 @@ end
|
|
|
188
188
|
# What did we know before promotion?
|
|
189
189
|
puts "\nFacts about Paula on March 1, 2024:"
|
|
190
190
|
facts.facts_at(Date.parse("2024-03-01"), entity: paula.id).each do |fact|
|
|
191
|
-
puts " - #{fact.
|
|
191
|
+
puts " - #{fact.text}"
|
|
192
192
|
end
|
|
193
193
|
|
|
194
194
|
# What do we know after promotion?
|
|
195
195
|
puts "\nFacts about Paula on July 1, 2024:"
|
|
196
196
|
facts.facts_at(Date.parse("2024-07-01"), entity: paula.id).each do |fact|
|
|
197
|
-
puts " - #{fact.
|
|
197
|
+
puts " - #{fact.text}"
|
|
198
198
|
end
|
|
199
199
|
```
|
|
200
200
|
|
|
@@ -206,13 +206,13 @@ require 'fact_db'
|
|
|
206
206
|
|
|
207
207
|
# Setup
|
|
208
208
|
FactDb.configure do |config|
|
|
209
|
-
config.
|
|
209
|
+
config.database.url = ENV['DATABASE_URL'] || 'postgresql://localhost/fact_db'
|
|
210
210
|
end
|
|
211
211
|
|
|
212
212
|
facts = FactDb.new
|
|
213
213
|
|
|
214
214
|
# Ingest
|
|
215
|
-
|
|
215
|
+
source = facts.ingest("Paula joined Microsoft on Jan 10, 2024", type: :note)
|
|
216
216
|
|
|
217
217
|
# Create entities
|
|
218
218
|
paula = facts.entity_service.create("Paula", type: :person)
|
|
@@ -226,10 +226,10 @@ fact = facts.fact_service.create(
|
|
|
226
226
|
{ entity: paula, role: "subject", text: "Paula" },
|
|
227
227
|
{ entity: microsoft, role: "organization", text: "Microsoft" }
|
|
228
228
|
],
|
|
229
|
-
sources: [{
|
|
229
|
+
sources: [{ source: source, type: "primary" }]
|
|
230
230
|
)
|
|
231
231
|
|
|
232
232
|
# Query
|
|
233
233
|
puts "Current facts about Paula:"
|
|
234
|
-
facts.current_facts_for(paula.id).each { |f| puts " - #{f.
|
|
234
|
+
facts.current_facts_for(paula.id).each { |f| puts " - #{f.text}" }
|
|
235
235
|
```
|
|
@@ -12,9 +12,9 @@ An HR system that tracks employee facts over time, maintaining a complete audit
|
|
|
12
12
|
require 'fact_db'
|
|
13
13
|
|
|
14
14
|
FactDb.configure do |config|
|
|
15
|
-
config.
|
|
16
|
-
config.
|
|
17
|
-
config.
|
|
15
|
+
config.database.url = ENV['DATABASE_URL']
|
|
16
|
+
config.llm.provider = :openai
|
|
17
|
+
config.llm.api_key = ENV['OPENAI_API_KEY']
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
facts = FactDb.new
|
|
@@ -96,14 +96,14 @@ facts.fact_service.create(
|
|
|
96
96
|
{ entity: paula, role: "subject", text: "Paula Chen" },
|
|
97
97
|
{ entity: acme, role: "organization", text: "Acme Corporation" }
|
|
98
98
|
],
|
|
99
|
-
sources: [{
|
|
99
|
+
sources: [{ source: offer_letter, type: "primary" }]
|
|
100
100
|
)
|
|
101
101
|
|
|
102
102
|
facts.fact_service.create(
|
|
103
103
|
"Paula Chen's title is Software Engineer",
|
|
104
104
|
valid_at: Date.parse("2022-03-01"),
|
|
105
105
|
mentions: [{ entity: paula, role: "subject", text: "Paula Chen" }],
|
|
106
|
-
sources: [{
|
|
106
|
+
sources: [{ source: offer_letter, type: "primary" }]
|
|
107
107
|
)
|
|
108
108
|
|
|
109
109
|
facts.fact_service.create(
|
|
@@ -113,7 +113,7 @@ facts.fact_service.create(
|
|
|
113
113
|
{ entity: paula, role: "subject", text: "Paula Chen" },
|
|
114
114
|
{ entity: john, role: "object", text: "John Smith" }
|
|
115
115
|
],
|
|
116
|
-
sources: [{
|
|
116
|
+
sources: [{ source: offer_letter, type: "primary" }]
|
|
117
117
|
)
|
|
118
118
|
|
|
119
119
|
facts.fact_service.create(
|
|
@@ -123,7 +123,7 @@ facts.fact_service.create(
|
|
|
123
123
|
{ entity: paula, role: "subject", text: "Paula Chen" },
|
|
124
124
|
{ entity: engineering, role: "organization", text: "Engineering" }
|
|
125
125
|
],
|
|
126
|
-
sources: [{
|
|
126
|
+
sources: [{ source: offer_letter, type: "primary" }]
|
|
127
127
|
)
|
|
128
128
|
```
|
|
129
129
|
|
|
@@ -233,7 +233,7 @@ facts.timeline_for(paula.id).each do |fact|
|
|
|
233
233
|
"#{fact.valid_at.to_date} - present"
|
|
234
234
|
|
|
235
235
|
status = fact.superseded? ? " [superseded]" : ""
|
|
236
|
-
puts "#{valid}: #{fact.
|
|
236
|
+
puts "#{valid}: #{fact.text}#{status}"
|
|
237
237
|
end
|
|
238
238
|
```
|
|
239
239
|
|
|
@@ -265,7 +265,7 @@ dates = [
|
|
|
265
265
|
dates.each do |date|
|
|
266
266
|
puts "\nPaula's status on #{date}:"
|
|
267
267
|
facts.facts_at(date, entity: paula.id).each do |fact|
|
|
268
|
-
puts " - #{fact.
|
|
268
|
+
puts " - #{fact.text}"
|
|
269
269
|
end
|
|
270
270
|
end
|
|
271
271
|
```
|
|
@@ -278,26 +278,26 @@ def employment_report(facts, employee_id)
|
|
|
278
278
|
current = facts.current_facts_for(employee_id)
|
|
279
279
|
|
|
280
280
|
report = {
|
|
281
|
-
name: employee.
|
|
281
|
+
name: employee.name,
|
|
282
282
|
current_status: {},
|
|
283
283
|
history: []
|
|
284
284
|
}
|
|
285
285
|
|
|
286
286
|
# Current status
|
|
287
287
|
current.each do |fact|
|
|
288
|
-
if fact.
|
|
289
|
-
report[:current_status][:title] = fact.
|
|
290
|
-
elsif fact.
|
|
291
|
-
report[:current_status][:department] = fact.
|
|
292
|
-
elsif fact.
|
|
293
|
-
report[:current_status][:manager] = fact.
|
|
288
|
+
if fact.text.include?("title is")
|
|
289
|
+
report[:current_status][:title] = fact.text.split("title is ").last
|
|
290
|
+
elsif fact.text.include?("works in")
|
|
291
|
+
report[:current_status][:department] = fact.text.split("works in ").last
|
|
292
|
+
elsif fact.text.include?("reports to")
|
|
293
|
+
report[:current_status][:manager] = fact.text.split("reports to ").last
|
|
294
294
|
end
|
|
295
295
|
end
|
|
296
296
|
|
|
297
297
|
# Employment history
|
|
298
298
|
report[:history] = facts.timeline_for(employee_id).map do |fact|
|
|
299
299
|
{
|
|
300
|
-
fact: fact.
|
|
300
|
+
fact: fact.text,
|
|
301
301
|
from: fact.valid_at,
|
|
302
302
|
to: fact.invalid_at,
|
|
303
303
|
status: fact.status
|
data/docs/examples/index.md
CHANGED
|
@@ -19,10 +19,10 @@ Practical examples demonstrating FactDb usage patterns.
|
|
|
19
19
|
facts = FactDb.new
|
|
20
20
|
|
|
21
21
|
# Ingest content
|
|
22
|
-
|
|
22
|
+
source = facts.ingest(document_text, type: :document)
|
|
23
23
|
|
|
24
24
|
# Extract facts
|
|
25
|
-
extracted = facts.extract_facts(
|
|
25
|
+
extracted = facts.extract_facts(source.id, extractor: :llm)
|
|
26
26
|
```
|
|
27
27
|
|
|
28
28
|
### Query Current State
|
|
@@ -45,7 +45,7 @@ historical = facts.facts_at(Date.parse("2023-06-15"), entity: paula.id)
|
|
|
45
45
|
# Build complete timeline
|
|
46
46
|
timeline = facts.timeline_for(paula.id)
|
|
47
47
|
timeline.each do |fact|
|
|
48
|
-
puts "#{fact.valid_at}: #{fact.
|
|
48
|
+
puts "#{fact.valid_at}: #{fact.text}"
|
|
49
49
|
end
|
|
50
50
|
```
|
|
51
51
|
|
|
@@ -60,5 +60,5 @@ entity = facts.resolve_entity("Paula Chen", type: :person)
|
|
|
60
60
|
|
|
61
61
|
```ruby
|
|
62
62
|
# Process multiple documents
|
|
63
|
-
results = facts.batch_extract(
|
|
63
|
+
results = facts.batch_extract(source_ids, parallel: true)
|
|
64
64
|
```
|
|
@@ -12,9 +12,9 @@ A news monitoring system that extracts facts from articles and tracks how inform
|
|
|
12
12
|
require 'fact_db'
|
|
13
13
|
|
|
14
14
|
FactDb.configure do |config|
|
|
15
|
-
config.
|
|
16
|
-
config.
|
|
17
|
-
config.
|
|
15
|
+
config.database.url = ENV['DATABASE_URL']
|
|
16
|
+
config.llm.provider = :openai
|
|
17
|
+
config.llm.api_key = ENV['OPENAI_API_KEY']
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
facts = FactDb.new
|
|
@@ -109,7 +109,7 @@ end
|
|
|
109
109
|
# List all extracted entities
|
|
110
110
|
puts "\nExtracted Entities:"
|
|
111
111
|
FactDb::Models::Entity.all.each do |entity|
|
|
112
|
-
puts " #{entity.
|
|
112
|
+
puts " #{entity.name} (#{entity.type})"
|
|
113
113
|
end
|
|
114
114
|
```
|
|
115
115
|
|
|
@@ -119,13 +119,13 @@ end
|
|
|
119
119
|
# CEO-related facts
|
|
120
120
|
puts "\nCEO Facts:"
|
|
121
121
|
facts.query_facts(topic: "CEO").each do |fact|
|
|
122
|
-
puts " #{fact.valid_at.to_date}: #{fact.
|
|
122
|
+
puts " #{fact.valid_at.to_date}: #{fact.text}"
|
|
123
123
|
end
|
|
124
124
|
|
|
125
125
|
# Acquisition facts
|
|
126
126
|
puts "\nAcquisition Facts:"
|
|
127
127
|
facts.query_facts(topic: "acquisition").each do |fact|
|
|
128
|
-
puts " #{fact.valid_at.to_date}: #{fact.
|
|
128
|
+
puts " #{fact.valid_at.to_date}: #{fact.text}"
|
|
129
129
|
end
|
|
130
130
|
```
|
|
131
131
|
|
|
@@ -138,8 +138,8 @@ techcorp = facts.resolve_entity("TechCorp", type: :organization)
|
|
|
138
138
|
# Timeline of TechCorp facts
|
|
139
139
|
puts "\nTechCorp Timeline:"
|
|
140
140
|
facts.timeline_for(techcorp.id).each do |fact|
|
|
141
|
-
source = fact.fact_sources.first&.
|
|
142
|
-
puts " #{fact.valid_at.to_date}: #{fact.
|
|
141
|
+
source = fact.fact_sources.first&.source&.title || "Unknown"
|
|
142
|
+
puts " #{fact.valid_at.to_date}: #{fact.text}"
|
|
143
143
|
puts " Source: #{source}"
|
|
144
144
|
end
|
|
145
145
|
```
|
|
@@ -164,8 +164,8 @@ if acquisition_fact
|
|
|
164
164
|
)
|
|
165
165
|
|
|
166
166
|
puts "\nAcquisition status updated:"
|
|
167
|
-
puts " Original: #{acquisition_fact.reload.
|
|
168
|
-
puts " Updated: #{acquisition_fact.superseded_by.
|
|
167
|
+
puts " Original: #{acquisition_fact.reload.text} (#{acquisition_fact.status})"
|
|
168
|
+
puts " Updated: #{acquisition_fact.superseded_by.text}"
|
|
169
169
|
end
|
|
170
170
|
```
|
|
171
171
|
|
|
@@ -197,14 +197,14 @@ def company_report(facts, company_name)
|
|
|
197
197
|
current_facts = facts.current_facts_for(company.id)
|
|
198
198
|
|
|
199
199
|
{
|
|
200
|
-
company: company.
|
|
201
|
-
current_facts: current_facts.map(&:
|
|
200
|
+
company: company.name,
|
|
201
|
+
current_facts: current_facts.map(&:text),
|
|
202
202
|
leadership: extract_leadership(current_facts),
|
|
203
203
|
timeline: facts.timeline_for(company.id).map { |f|
|
|
204
204
|
{
|
|
205
205
|
date: f.valid_at,
|
|
206
|
-
fact: f.
|
|
207
|
-
source: f.fact_sources.first&.
|
|
206
|
+
fact: f.text,
|
|
207
|
+
source: f.fact_sources.first&.source&.title
|
|
208
208
|
}
|
|
209
209
|
}
|
|
210
210
|
}
|
|
@@ -213,8 +213,8 @@ end
|
|
|
213
213
|
def extract_leadership(facts)
|
|
214
214
|
leadership = {}
|
|
215
215
|
facts.each do |fact|
|
|
216
|
-
if fact.
|
|
217
|
-
leadership[:ceo] = fact.entity_mentions.find { |m| m.mention_role == "subject" }&.entity&.
|
|
216
|
+
if fact.text =~ /CEO/
|
|
217
|
+
leadership[:ceo] = fact.entity_mentions.find { |m| m.mention_role == "subject" }&.entity&.name
|
|
218
218
|
end
|
|
219
219
|
end
|
|
220
220
|
leadership
|
|
@@ -228,19 +228,19 @@ puts JSON.pretty_generate(report)
|
|
|
228
228
|
|
|
229
229
|
```ruby
|
|
230
230
|
def process_news_feed(facts, articles)
|
|
231
|
-
|
|
232
|
-
|
|
231
|
+
source_ids = articles.map do |article|
|
|
232
|
+
source = facts.ingest(
|
|
233
233
|
article[:text],
|
|
234
234
|
type: :article,
|
|
235
235
|
title: article[:title],
|
|
236
236
|
source_uri: article[:url],
|
|
237
237
|
captured_at: article[:published_at]
|
|
238
238
|
)
|
|
239
|
-
|
|
239
|
+
source.id
|
|
240
240
|
end
|
|
241
241
|
|
|
242
242
|
# Parallel extraction
|
|
243
|
-
results = facts.batch_extract(
|
|
243
|
+
results = facts.batch_extract(source_ids, extractor: :llm)
|
|
244
244
|
|
|
245
245
|
{
|
|
246
246
|
processed: results.count,
|
|
@@ -273,10 +273,10 @@ def monitor_topic(facts, topic, since: 1.week.ago)
|
|
|
273
273
|
new_facts: matching.count,
|
|
274
274
|
facts: matching.map { |f|
|
|
275
275
|
{
|
|
276
|
-
text: f.
|
|
276
|
+
text: f.text,
|
|
277
277
|
date: f.valid_at,
|
|
278
|
-
source: f.fact_sources.first&.
|
|
279
|
-
entities: f.entity_mentions.map { |m| m.entity.
|
|
278
|
+
source: f.fact_sources.first&.source&.title,
|
|
279
|
+
entities: f.entity_mentions.map { |m| m.entity.name }
|
|
280
280
|
}
|
|
281
281
|
}
|
|
282
282
|
}
|