fact_db 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.envrc +1 -0
- data/CHANGELOG.md +48 -0
- data/COMMITS.md +196 -0
- data/README.md +102 -0
- data/Rakefile +41 -0
- data/db/migrate/001_enable_extensions.rb +7 -0
- data/db/migrate/002_create_contents.rb +44 -0
- data/db/migrate/003_create_entities.rb +36 -0
- data/db/migrate/004_create_entity_aliases.rb +18 -0
- data/db/migrate/005_create_facts.rb +65 -0
- data/db/migrate/006_create_entity_mentions.rb +18 -0
- data/db/migrate/007_create_fact_sources.rb +18 -0
- data/docs/api/extractors/index.md +71 -0
- data/docs/api/extractors/llm.md +162 -0
- data/docs/api/extractors/manual.md +92 -0
- data/docs/api/extractors/rule-based.md +165 -0
- data/docs/api/facts.md +300 -0
- data/docs/api/index.md +66 -0
- data/docs/api/models/content.md +165 -0
- data/docs/api/models/entity.md +202 -0
- data/docs/api/models/fact.md +270 -0
- data/docs/api/models/index.md +77 -0
- data/docs/api/pipeline/extraction.md +175 -0
- data/docs/api/pipeline/index.md +72 -0
- data/docs/api/pipeline/resolution.md +209 -0
- data/docs/api/services/content-service.md +166 -0
- data/docs/api/services/entity-service.md +202 -0
- data/docs/api/services/fact-service.md +223 -0
- data/docs/api/services/index.md +55 -0
- data/docs/architecture/database-schema.md +293 -0
- data/docs/architecture/entity-resolution.md +293 -0
- data/docs/architecture/index.md +149 -0
- data/docs/architecture/temporal-facts.md +268 -0
- data/docs/architecture/three-layer-model.md +242 -0
- data/docs/assets/css/custom.css +137 -0
- data/docs/assets/fact_db.jpg +0 -0
- data/docs/assets/images/fact_db.jpg +0 -0
- data/docs/concepts.md +183 -0
- data/docs/examples/basic-usage.md +235 -0
- data/docs/examples/hr-onboarding.md +312 -0
- data/docs/examples/index.md +64 -0
- data/docs/examples/news-analysis.md +288 -0
- data/docs/getting-started/database-setup.md +170 -0
- data/docs/getting-started/index.md +71 -0
- data/docs/getting-started/installation.md +98 -0
- data/docs/getting-started/quick-start.md +191 -0
- data/docs/guides/batch-processing.md +325 -0
- data/docs/guides/configuration.md +243 -0
- data/docs/guides/entity-management.md +364 -0
- data/docs/guides/extracting-facts.md +299 -0
- data/docs/guides/index.md +22 -0
- data/docs/guides/ingesting-content.md +252 -0
- data/docs/guides/llm-integration.md +299 -0
- data/docs/guides/temporal-queries.md +315 -0
- data/docs/index.md +121 -0
- data/examples/README.md +130 -0
- data/examples/basic_usage.rb +164 -0
- data/examples/entity_management.rb +216 -0
- data/examples/hr_system.rb +428 -0
- data/examples/rule_based_extraction.rb +258 -0
- data/examples/temporal_queries.rb +245 -0
- data/lib/fact_db/config.rb +71 -0
- data/lib/fact_db/database.rb +45 -0
- data/lib/fact_db/errors.rb +10 -0
- data/lib/fact_db/extractors/base.rb +117 -0
- data/lib/fact_db/extractors/llm_extractor.rb +179 -0
- data/lib/fact_db/extractors/manual_extractor.rb +53 -0
- data/lib/fact_db/extractors/rule_based_extractor.rb +228 -0
- data/lib/fact_db/llm/adapter.rb +109 -0
- data/lib/fact_db/models/content.rb +62 -0
- data/lib/fact_db/models/entity.rb +84 -0
- data/lib/fact_db/models/entity_alias.rb +26 -0
- data/lib/fact_db/models/entity_mention.rb +33 -0
- data/lib/fact_db/models/fact.rb +192 -0
- data/lib/fact_db/models/fact_source.rb +35 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +146 -0
- data/lib/fact_db/pipeline/resolution_pipeline.rb +129 -0
- data/lib/fact_db/resolution/entity_resolver.rb +261 -0
- data/lib/fact_db/resolution/fact_resolver.rb +259 -0
- data/lib/fact_db/services/content_service.rb +93 -0
- data/lib/fact_db/services/entity_service.rb +150 -0
- data/lib/fact_db/services/fact_service.rb +193 -0
- data/lib/fact_db/temporal/query.rb +125 -0
- data/lib/fact_db/temporal/timeline.rb +134 -0
- data/lib/fact_db/version.rb +5 -0
- data/lib/fact_db.rb +141 -0
- data/mkdocs.yml +198 -0
- metadata +288 -0
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
# HR Onboarding Example
|
|
2
|
+
|
|
3
|
+
Track employee lifecycle events - hiring, promotions, transfers, and departures.
|
|
4
|
+
|
|
5
|
+
## Scenario
|
|
6
|
+
|
|
7
|
+
An HR system that tracks employee facts over time, maintaining a complete audit trail of employment events.
|
|
8
|
+
|
|
9
|
+
## Setup
|
|
10
|
+
|
|
11
|
+
```ruby
|
|
12
|
+
require 'fact_db'
|
|
13
|
+
|
|
14
|
+
FactDb.configure do |config|
|
|
15
|
+
config.database_url = ENV['DATABASE_URL']
|
|
16
|
+
config.llm_provider = :openai
|
|
17
|
+
config.llm_api_key = ENV['OPENAI_API_KEY']
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
facts = FactDb.new
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Create Organization Structure
|
|
24
|
+
|
|
25
|
+
```ruby
|
|
26
|
+
# Company
|
|
27
|
+
acme = facts.entity_service.create(
|
|
28
|
+
"Acme Corporation",
|
|
29
|
+
type: :organization,
|
|
30
|
+
aliases: ["Acme", "Acme Corp"]
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Departments
|
|
34
|
+
engineering = facts.entity_service.create(
|
|
35
|
+
"Engineering Department",
|
|
36
|
+
type: :organization,
|
|
37
|
+
aliases: ["Engineering", "Eng"]
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
sales = facts.entity_service.create(
|
|
41
|
+
"Sales Department",
|
|
42
|
+
type: :organization,
|
|
43
|
+
aliases: ["Sales"]
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Locations
|
|
47
|
+
hq = facts.entity_service.create(
|
|
48
|
+
"Headquarters",
|
|
49
|
+
type: :place,
|
|
50
|
+
aliases: ["HQ", "Main Office"],
|
|
51
|
+
metadata: { address: "123 Main St, San Francisco, CA" }
|
|
52
|
+
)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Track Hiring Event
|
|
56
|
+
|
|
57
|
+
```ruby
|
|
58
|
+
# Ingest offer letter
|
|
59
|
+
offer_letter = facts.ingest(
|
|
60
|
+
<<~TEXT,
|
|
61
|
+
Dear Paula Chen,
|
|
62
|
+
|
|
63
|
+
We are pleased to offer you the position of Software Engineer
|
|
64
|
+
at Acme Corporation, starting March 1, 2022.
|
|
65
|
+
|
|
66
|
+
Your starting salary will be $120,000 per year.
|
|
67
|
+
You will report to John Smith, Engineering Manager.
|
|
68
|
+
|
|
69
|
+
Location: Headquarters, San Francisco
|
|
70
|
+
TEXT
|
|
71
|
+
type: :document,
|
|
72
|
+
title: "Offer Letter - Paula Chen",
|
|
73
|
+
captured_at: Date.parse("2022-02-15")
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Create employee
|
|
77
|
+
paula = facts.entity_service.create(
|
|
78
|
+
"Paula Chen",
|
|
79
|
+
type: :person,
|
|
80
|
+
aliases: ["Paula"],
|
|
81
|
+
metadata: { employee_id: "E001" }
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
john = facts.entity_service.create(
|
|
85
|
+
"John Smith",
|
|
86
|
+
type: :person,
|
|
87
|
+
aliases: ["John"],
|
|
88
|
+
metadata: { employee_id: "M001" }
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Create employment facts
|
|
92
|
+
facts.fact_service.create(
|
|
93
|
+
"Paula Chen is employed at Acme Corporation",
|
|
94
|
+
valid_at: Date.parse("2022-03-01"),
|
|
95
|
+
mentions: [
|
|
96
|
+
{ entity: paula, role: "subject", text: "Paula Chen" },
|
|
97
|
+
{ entity: acme, role: "organization", text: "Acme Corporation" }
|
|
98
|
+
],
|
|
99
|
+
sources: [{ content: offer_letter, type: "primary" }]
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
facts.fact_service.create(
|
|
103
|
+
"Paula Chen's title is Software Engineer",
|
|
104
|
+
valid_at: Date.parse("2022-03-01"),
|
|
105
|
+
mentions: [{ entity: paula, role: "subject", text: "Paula Chen" }],
|
|
106
|
+
sources: [{ content: offer_letter, type: "primary" }]
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
facts.fact_service.create(
|
|
110
|
+
"Paula Chen reports to John Smith",
|
|
111
|
+
valid_at: Date.parse("2022-03-01"),
|
|
112
|
+
mentions: [
|
|
113
|
+
{ entity: paula, role: "subject", text: "Paula Chen" },
|
|
114
|
+
{ entity: john, role: "object", text: "John Smith" }
|
|
115
|
+
],
|
|
116
|
+
sources: [{ content: offer_letter, type: "primary" }]
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
facts.fact_service.create(
|
|
120
|
+
"Paula Chen works in Engineering Department",
|
|
121
|
+
valid_at: Date.parse("2022-03-01"),
|
|
122
|
+
mentions: [
|
|
123
|
+
{ entity: paula, role: "subject", text: "Paula Chen" },
|
|
124
|
+
{ entity: engineering, role: "organization", text: "Engineering" }
|
|
125
|
+
],
|
|
126
|
+
sources: [{ content: offer_letter, type: "primary" }]
|
|
127
|
+
)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Track Promotion
|
|
131
|
+
|
|
132
|
+
```ruby
|
|
133
|
+
# Ingest promotion letter
|
|
134
|
+
promotion = facts.ingest(
|
|
135
|
+
<<~TEXT,
|
|
136
|
+
Dear Paula,
|
|
137
|
+
|
|
138
|
+
Congratulations! Effective January 15, 2023, you have been
|
|
139
|
+
promoted to Senior Software Engineer.
|
|
140
|
+
|
|
141
|
+
Your new salary will be $145,000 per year.
|
|
142
|
+
TEXT
|
|
143
|
+
type: :document,
|
|
144
|
+
title: "Promotion Letter - Paula Chen",
|
|
145
|
+
captured_at: Date.parse("2023-01-10")
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Supersede title fact
|
|
149
|
+
title_fact = FactDb::Models::Fact
|
|
150
|
+
.mentioning_entity(paula.id)
|
|
151
|
+
.search_text("title")
|
|
152
|
+
.canonical
|
|
153
|
+
.first
|
|
154
|
+
|
|
155
|
+
facts.fact_service.resolver.supersede(
|
|
156
|
+
title_fact.id,
|
|
157
|
+
"Paula Chen's title is Senior Software Engineer",
|
|
158
|
+
valid_at: Date.parse("2023-01-15")
|
|
159
|
+
)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Track Transfer
|
|
163
|
+
|
|
164
|
+
```ruby
|
|
165
|
+
# Ingest transfer notice
|
|
166
|
+
transfer = facts.ingest(
|
|
167
|
+
<<~TEXT,
|
|
168
|
+
Effective July 1, 2023, Paula Chen will transfer from
|
|
169
|
+
Engineering to Sales as Sales Engineer, reporting to
|
|
170
|
+
Maria Garcia.
|
|
171
|
+
TEXT
|
|
172
|
+
type: :document,
|
|
173
|
+
title: "Transfer Notice - Paula Chen",
|
|
174
|
+
captured_at: Date.parse("2023-06-15")
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
maria = facts.entity_service.create(
|
|
178
|
+
"Maria Garcia",
|
|
179
|
+
type: :person,
|
|
180
|
+
metadata: { employee_id: "M002" }
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Supersede department fact
|
|
184
|
+
dept_fact = FactDb::Models::Fact
|
|
185
|
+
.mentioning_entity(paula.id)
|
|
186
|
+
.search_text("Department")
|
|
187
|
+
.canonical
|
|
188
|
+
.first
|
|
189
|
+
|
|
190
|
+
facts.fact_service.resolver.supersede(
|
|
191
|
+
dept_fact.id,
|
|
192
|
+
"Paula Chen works in Sales Department",
|
|
193
|
+
valid_at: Date.parse("2023-07-01")
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Supersede manager fact
|
|
197
|
+
manager_fact = FactDb::Models::Fact
|
|
198
|
+
.mentioning_entity(paula.id)
|
|
199
|
+
.search_text("reports to")
|
|
200
|
+
.canonical
|
|
201
|
+
.first
|
|
202
|
+
|
|
203
|
+
facts.fact_service.resolver.supersede(
|
|
204
|
+
manager_fact.id,
|
|
205
|
+
"Paula Chen reports to Maria Garcia",
|
|
206
|
+
valid_at: Date.parse("2023-07-01")
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Supersede title
|
|
210
|
+
title_fact = FactDb::Models::Fact
|
|
211
|
+
.mentioning_entity(paula.id)
|
|
212
|
+
.search_text("title")
|
|
213
|
+
.canonical
|
|
214
|
+
.first
|
|
215
|
+
|
|
216
|
+
facts.fact_service.resolver.supersede(
|
|
217
|
+
title_fact.id,
|
|
218
|
+
"Paula Chen's title is Sales Engineer",
|
|
219
|
+
valid_at: Date.parse("2023-07-01")
|
|
220
|
+
)
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
## Query Employment History
|
|
224
|
+
|
|
225
|
+
```ruby
|
|
226
|
+
# Complete timeline
|
|
227
|
+
puts "Paula Chen's Employment Timeline:"
|
|
228
|
+
puts "=" * 50
|
|
229
|
+
|
|
230
|
+
facts.timeline_for(paula.id).each do |fact|
|
|
231
|
+
valid = fact.invalid_at ?
|
|
232
|
+
"#{fact.valid_at.to_date} - #{fact.invalid_at.to_date}" :
|
|
233
|
+
"#{fact.valid_at.to_date} - present"
|
|
234
|
+
|
|
235
|
+
status = fact.superseded? ? " [superseded]" : ""
|
|
236
|
+
puts "#{valid}: #{fact.fact_text}#{status}"
|
|
237
|
+
end
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
Output:
|
|
241
|
+
```
|
|
242
|
+
Paula Chen's Employment Timeline:
|
|
243
|
+
==================================================
|
|
244
|
+
2022-03-01 - present: Paula Chen is employed at Acme Corporation
|
|
245
|
+
2022-03-01 - 2023-01-15: Paula Chen's title is Software Engineer [superseded]
|
|
246
|
+
2023-01-15 - 2023-07-01: Paula Chen's title is Senior Software Engineer [superseded]
|
|
247
|
+
2023-07-01 - present: Paula Chen's title is Sales Engineer
|
|
248
|
+
2022-03-01 - 2023-07-01: Paula Chen works in Engineering Department [superseded]
|
|
249
|
+
2023-07-01 - present: Paula Chen works in Sales Department
|
|
250
|
+
2022-03-01 - 2023-07-01: Paula Chen reports to John Smith [superseded]
|
|
251
|
+
2023-07-01 - present: Paula Chen reports to Maria Garcia
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
## Point-in-Time Queries
|
|
255
|
+
|
|
256
|
+
```ruby
|
|
257
|
+
# What was Paula's status on different dates?
|
|
258
|
+
|
|
259
|
+
dates = [
|
|
260
|
+
Date.parse("2022-06-01"),
|
|
261
|
+
Date.parse("2023-03-01"),
|
|
262
|
+
Date.parse("2023-10-01")
|
|
263
|
+
]
|
|
264
|
+
|
|
265
|
+
dates.each do |date|
|
|
266
|
+
puts "\nPaula's status on #{date}:"
|
|
267
|
+
facts.facts_at(date, entity: paula.id).each do |fact|
|
|
268
|
+
puts " - #{fact.fact_text}"
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
## Generate Employment Report
|
|
274
|
+
|
|
275
|
+
```ruby
|
|
276
|
+
def employment_report(facts, employee_id)
|
|
277
|
+
employee = FactDb::Models::Entity.find(employee_id)
|
|
278
|
+
current = facts.current_facts_for(employee_id)
|
|
279
|
+
|
|
280
|
+
report = {
|
|
281
|
+
name: employee.canonical_name,
|
|
282
|
+
current_status: {},
|
|
283
|
+
history: []
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
# Current status
|
|
287
|
+
current.each do |fact|
|
|
288
|
+
if fact.fact_text.include?("title is")
|
|
289
|
+
report[:current_status][:title] = fact.fact_text.split("title is ").last
|
|
290
|
+
elsif fact.fact_text.include?("works in")
|
|
291
|
+
report[:current_status][:department] = fact.fact_text.split("works in ").last
|
|
292
|
+
elsif fact.fact_text.include?("reports to")
|
|
293
|
+
report[:current_status][:manager] = fact.fact_text.split("reports to ").last
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# Employment history
|
|
298
|
+
report[:history] = facts.timeline_for(employee_id).map do |fact|
|
|
299
|
+
{
|
|
300
|
+
fact: fact.fact_text,
|
|
301
|
+
from: fact.valid_at,
|
|
302
|
+
to: fact.invalid_at,
|
|
303
|
+
status: fact.status
|
|
304
|
+
}
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
report
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
report = employment_report(facts, paula.id)
|
|
311
|
+
puts JSON.pretty_generate(report)
|
|
312
|
+
```
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Examples
|
|
2
|
+
|
|
3
|
+
Practical examples demonstrating FactDb usage patterns.
|
|
4
|
+
|
|
5
|
+
## Getting Started
|
|
6
|
+
|
|
7
|
+
- [Basic Usage](basic-usage.md) - Simple introduction to core functionality
|
|
8
|
+
|
|
9
|
+
## Use Cases
|
|
10
|
+
|
|
11
|
+
- [HR Onboarding](hr-onboarding.md) - Track employee facts over time
|
|
12
|
+
- [News Analysis](news-analysis.md) - Extract facts from news articles
|
|
13
|
+
|
|
14
|
+
## Common Patterns
|
|
15
|
+
|
|
16
|
+
### Ingest and Extract
|
|
17
|
+
|
|
18
|
+
```ruby
|
|
19
|
+
facts = FactDb.new
|
|
20
|
+
|
|
21
|
+
# Ingest content
|
|
22
|
+
content = facts.ingest(document_text, type: :document)
|
|
23
|
+
|
|
24
|
+
# Extract facts
|
|
25
|
+
extracted = facts.extract_facts(content.id, extractor: :llm)
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Query Current State
|
|
29
|
+
|
|
30
|
+
```ruby
|
|
31
|
+
# What do we know about Paula now?
|
|
32
|
+
current = facts.current_facts_for(paula.id)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### Historical Query
|
|
36
|
+
|
|
37
|
+
```ruby
|
|
38
|
+
# What did we know on a specific date?
|
|
39
|
+
historical = facts.facts_at(Date.parse("2023-06-15"), entity: paula.id)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Timeline
|
|
43
|
+
|
|
44
|
+
```ruby
|
|
45
|
+
# Build complete timeline
|
|
46
|
+
timeline = facts.timeline_for(paula.id)
|
|
47
|
+
timeline.each do |fact|
|
|
48
|
+
puts "#{fact.valid_at}: #{fact.fact_text}"
|
|
49
|
+
end
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Entity Resolution
|
|
53
|
+
|
|
54
|
+
```ruby
|
|
55
|
+
# Resolve names to entities
|
|
56
|
+
entity = facts.resolve_entity("Paula Chen", type: :person)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Batch Processing
|
|
60
|
+
|
|
61
|
+
```ruby
|
|
62
|
+
# Process multiple documents
|
|
63
|
+
results = facts.batch_extract(content_ids, parallel: true)
|
|
64
|
+
```
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
# News Analysis Example
|
|
2
|
+
|
|
3
|
+
Extract and track facts from news articles over time.
|
|
4
|
+
|
|
5
|
+
## Scenario
|
|
6
|
+
|
|
7
|
+
A news monitoring system that extracts facts from articles and tracks how information about companies and people changes over time.
|
|
8
|
+
|
|
9
|
+
## Setup
|
|
10
|
+
|
|
11
|
+
```ruby
|
|
12
|
+
require 'fact_db'
|
|
13
|
+
|
|
14
|
+
FactDb.configure do |config|
|
|
15
|
+
config.database_url = ENV['DATABASE_URL']
|
|
16
|
+
config.llm_provider = :openai
|
|
17
|
+
config.llm_api_key = ENV['OPENAI_API_KEY']
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
facts = FactDb.new
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Ingest News Articles
|
|
24
|
+
|
|
25
|
+
```ruby
|
|
26
|
+
# Article 1: CEO Announcement
|
|
27
|
+
article1 = facts.ingest(
|
|
28
|
+
<<~TEXT,
|
|
29
|
+
TechCorp Appoints New CEO
|
|
30
|
+
|
|
31
|
+
San Francisco, Jan 15, 2024 - TechCorp announced today that
|
|
32
|
+
Jane Williams has been appointed as Chief Executive Officer,
|
|
33
|
+
effective immediately. Williams previously served as COO at
|
|
34
|
+
InnovateTech for 8 years.
|
|
35
|
+
|
|
36
|
+
"We are thrilled to welcome Jane to lead TechCorp into its
|
|
37
|
+
next chapter," said Board Chairman Robert Chen.
|
|
38
|
+
|
|
39
|
+
Williams succeeds Michael Johnson, who is retiring after
|
|
40
|
+
15 years at the helm.
|
|
41
|
+
TEXT
|
|
42
|
+
type: :article,
|
|
43
|
+
title: "TechCorp Appoints New CEO",
|
|
44
|
+
source_uri: "https://news.example.com/techcorp-new-ceo",
|
|
45
|
+
captured_at: Date.parse("2024-01-15"),
|
|
46
|
+
metadata: {
|
|
47
|
+
source: "Tech News Daily",
|
|
48
|
+
author: "Sarah Reporter",
|
|
49
|
+
category: "Business"
|
|
50
|
+
}
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Article 2: Earnings Report
|
|
54
|
+
article2 = facts.ingest(
|
|
55
|
+
<<~TEXT,
|
|
56
|
+
TechCorp Reports Record Q4 Earnings
|
|
57
|
+
|
|
58
|
+
San Francisco, Feb 1, 2024 - TechCorp reported quarterly
|
|
59
|
+
revenue of $5.2 billion, up 23% year-over-year. Net income
|
|
60
|
+
reached $800 million.
|
|
61
|
+
|
|
62
|
+
"Our cloud division continues to drive growth," said CEO
|
|
63
|
+
Jane Williams in her first earnings call since taking over.
|
|
64
|
+
|
|
65
|
+
The company also announced plans to acquire DataFlow Inc
|
|
66
|
+
for $1.2 billion, expected to close in Q2 2024.
|
|
67
|
+
TEXT
|
|
68
|
+
type: :article,
|
|
69
|
+
title: "TechCorp Reports Record Q4 Earnings",
|
|
70
|
+
source_uri: "https://news.example.com/techcorp-q4-earnings",
|
|
71
|
+
captured_at: Date.parse("2024-02-01"),
|
|
72
|
+
metadata: { source: "Financial Times", category: "Earnings" }
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Article 3: Acquisition Update
|
|
76
|
+
article3 = facts.ingest(
|
|
77
|
+
<<~TEXT,
|
|
78
|
+
TechCorp-DataFlow Deal Falls Through
|
|
79
|
+
|
|
80
|
+
San Francisco, Apr 15, 2024 - TechCorp announced it has
|
|
81
|
+
terminated its planned acquisition of DataFlow Inc, citing
|
|
82
|
+
regulatory concerns.
|
|
83
|
+
|
|
84
|
+
"After careful consideration, we have decided not to proceed
|
|
85
|
+
with the acquisition," said TechCorp CEO Jane Williams.
|
|
86
|
+
TEXT
|
|
87
|
+
type: :article,
|
|
88
|
+
title: "TechCorp-DataFlow Deal Falls Through",
|
|
89
|
+
source_uri: "https://news.example.com/techcorp-dataflow-cancelled",
|
|
90
|
+
captured_at: Date.parse("2024-04-15"),
|
|
91
|
+
metadata: { source: "Business Wire", category: "M&A" }
|
|
92
|
+
)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Extract Facts with LLM
|
|
96
|
+
|
|
97
|
+
```ruby
|
|
98
|
+
# Process all articles
|
|
99
|
+
[article1, article2, article3].each do |article|
|
|
100
|
+
puts "Processing: #{article.title}"
|
|
101
|
+
extracted = facts.extract_facts(article.id, extractor: :llm)
|
|
102
|
+
puts " Extracted #{extracted.count} facts"
|
|
103
|
+
end
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Review Extracted Entities
|
|
107
|
+
|
|
108
|
+
```ruby
|
|
109
|
+
# List all extracted entities
|
|
110
|
+
puts "\nExtracted Entities:"
|
|
111
|
+
FactDb::Models::Entity.all.each do |entity|
|
|
112
|
+
puts " #{entity.canonical_name} (#{entity.entity_type})"
|
|
113
|
+
end
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Query Facts by Topic
|
|
117
|
+
|
|
118
|
+
```ruby
|
|
119
|
+
# CEO-related facts
|
|
120
|
+
puts "\nCEO Facts:"
|
|
121
|
+
facts.query_facts(topic: "CEO").each do |fact|
|
|
122
|
+
puts " #{fact.valid_at.to_date}: #{fact.fact_text}"
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Acquisition facts
|
|
126
|
+
puts "\nAcquisition Facts:"
|
|
127
|
+
facts.query_facts(topic: "acquisition").each do |fact|
|
|
128
|
+
puts " #{fact.valid_at.to_date}: #{fact.fact_text}"
|
|
129
|
+
end
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Track Entity Over Time
|
|
133
|
+
|
|
134
|
+
```ruby
|
|
135
|
+
# Find TechCorp entity
|
|
136
|
+
techcorp = facts.resolve_entity("TechCorp", type: :organization)
|
|
137
|
+
|
|
138
|
+
# Timeline of TechCorp facts
|
|
139
|
+
puts "\nTechCorp Timeline:"
|
|
140
|
+
facts.timeline_for(techcorp.id).each do |fact|
|
|
141
|
+
source = fact.fact_sources.first&.content&.title || "Unknown"
|
|
142
|
+
puts " #{fact.valid_at.to_date}: #{fact.fact_text}"
|
|
143
|
+
puts " Source: #{source}"
|
|
144
|
+
end
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Handle Superseded Information
|
|
148
|
+
|
|
149
|
+
```ruby
|
|
150
|
+
# The acquisition fact from article2 should be superseded by article3
|
|
151
|
+
|
|
152
|
+
# Find the original acquisition fact
|
|
153
|
+
acquisition_fact = FactDb::Models::Fact
|
|
154
|
+
.search_text("acquire DataFlow")
|
|
155
|
+
.canonical
|
|
156
|
+
.first
|
|
157
|
+
|
|
158
|
+
if acquisition_fact
|
|
159
|
+
# Supersede with cancelled status
|
|
160
|
+
facts.fact_service.resolver.supersede(
|
|
161
|
+
acquisition_fact.id,
|
|
162
|
+
"TechCorp cancelled its planned acquisition of DataFlow Inc",
|
|
163
|
+
valid_at: Date.parse("2024-04-15")
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
puts "\nAcquisition status updated:"
|
|
167
|
+
puts " Original: #{acquisition_fact.reload.fact_text} (#{acquisition_fact.status})"
|
|
168
|
+
puts " Updated: #{acquisition_fact.superseded_by.fact_text}"
|
|
169
|
+
end
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## Corroborate Facts
|
|
173
|
+
|
|
174
|
+
```ruby
|
|
175
|
+
# If multiple articles confirm the same fact
|
|
176
|
+
ceo_facts = FactDb::Models::Fact
|
|
177
|
+
.search_text("Jane Williams CEO")
|
|
178
|
+
.canonical
|
|
179
|
+
.to_a
|
|
180
|
+
|
|
181
|
+
if ceo_facts.count > 1
|
|
182
|
+
primary = ceo_facts.first
|
|
183
|
+
ceo_facts[1..].each do |corroborating|
|
|
184
|
+
facts.fact_service.resolver.corroborate(primary.id, corroborating.id)
|
|
185
|
+
end
|
|
186
|
+
puts "\nCEO fact corroborated by #{ceo_facts.count} sources"
|
|
187
|
+
end
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## Generate Company Report
|
|
191
|
+
|
|
192
|
+
```ruby
|
|
193
|
+
def company_report(facts, company_name)
|
|
194
|
+
company = facts.resolve_entity(company_name, type: :organization)
|
|
195
|
+
return nil unless company
|
|
196
|
+
|
|
197
|
+
current_facts = facts.current_facts_for(company.id)
|
|
198
|
+
|
|
199
|
+
{
|
|
200
|
+
company: company.canonical_name,
|
|
201
|
+
current_facts: current_facts.map(&:fact_text),
|
|
202
|
+
leadership: extract_leadership(current_facts),
|
|
203
|
+
timeline: facts.timeline_for(company.id).map { |f|
|
|
204
|
+
{
|
|
205
|
+
date: f.valid_at,
|
|
206
|
+
fact: f.fact_text,
|
|
207
|
+
source: f.fact_sources.first&.content&.title
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def extract_leadership(facts)
|
|
214
|
+
leadership = {}
|
|
215
|
+
facts.each do |fact|
|
|
216
|
+
if fact.fact_text =~ /CEO/
|
|
217
|
+
leadership[:ceo] = fact.entity_mentions.find { |m| m.mention_role == "subject" }&.entity&.canonical_name
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
leadership
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
report = company_report(facts, "TechCorp")
|
|
224
|
+
puts JSON.pretty_generate(report)
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## Batch Process News Feed
|
|
228
|
+
|
|
229
|
+
```ruby
|
|
230
|
+
def process_news_feed(facts, articles)
|
|
231
|
+
content_ids = articles.map do |article|
|
|
232
|
+
content = facts.ingest(
|
|
233
|
+
article[:text],
|
|
234
|
+
type: :article,
|
|
235
|
+
title: article[:title],
|
|
236
|
+
source_uri: article[:url],
|
|
237
|
+
captured_at: article[:published_at]
|
|
238
|
+
)
|
|
239
|
+
content.id
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Parallel extraction
|
|
243
|
+
results = facts.batch_extract(content_ids, extractor: :llm)
|
|
244
|
+
|
|
245
|
+
{
|
|
246
|
+
processed: results.count,
|
|
247
|
+
successful: results.count { |r| r[:error].nil? },
|
|
248
|
+
total_facts: results.sum { |r| r[:facts].count }
|
|
249
|
+
}
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Example usage
|
|
253
|
+
news_feed = [
|
|
254
|
+
{ title: "Article 1", text: "...", url: "...", published_at: Time.now },
|
|
255
|
+
{ title: "Article 2", text: "...", url: "...", published_at: Time.now }
|
|
256
|
+
]
|
|
257
|
+
|
|
258
|
+
stats = process_news_feed(facts, news_feed)
|
|
259
|
+
puts "Processed #{stats[:processed]} articles, extracted #{stats[:total_facts]} facts"
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
## Monitor Specific Topics
|
|
263
|
+
|
|
264
|
+
```ruby
|
|
265
|
+
def monitor_topic(facts, topic, since: 1.week.ago)
|
|
266
|
+
matching = FactDb::Models::Fact
|
|
267
|
+
.search_text(topic)
|
|
268
|
+
.where("created_at > ?", since)
|
|
269
|
+
.order(created_at: :desc)
|
|
270
|
+
|
|
271
|
+
{
|
|
272
|
+
topic: topic,
|
|
273
|
+
new_facts: matching.count,
|
|
274
|
+
facts: matching.map { |f|
|
|
275
|
+
{
|
|
276
|
+
text: f.fact_text,
|
|
277
|
+
date: f.valid_at,
|
|
278
|
+
source: f.fact_sources.first&.content&.title,
|
|
279
|
+
entities: f.entity_mentions.map { |m| m.entity.canonical_name }
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
# Monitor acquisitions
|
|
286
|
+
acquisition_updates = monitor_topic(facts, "acquisition")
|
|
287
|
+
puts "Recent acquisition news: #{acquisition_updates[:new_facts]} facts"
|
|
288
|
+
```
|