fact_db 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.envrc +1 -0
- data/CHANGELOG.md +48 -0
- data/COMMITS.md +196 -0
- data/README.md +102 -0
- data/Rakefile +41 -0
- data/db/migrate/001_enable_extensions.rb +7 -0
- data/db/migrate/002_create_contents.rb +44 -0
- data/db/migrate/003_create_entities.rb +36 -0
- data/db/migrate/004_create_entity_aliases.rb +18 -0
- data/db/migrate/005_create_facts.rb +65 -0
- data/db/migrate/006_create_entity_mentions.rb +18 -0
- data/db/migrate/007_create_fact_sources.rb +18 -0
- data/docs/api/extractors/index.md +71 -0
- data/docs/api/extractors/llm.md +162 -0
- data/docs/api/extractors/manual.md +92 -0
- data/docs/api/extractors/rule-based.md +165 -0
- data/docs/api/facts.md +300 -0
- data/docs/api/index.md +66 -0
- data/docs/api/models/content.md +165 -0
- data/docs/api/models/entity.md +202 -0
- data/docs/api/models/fact.md +270 -0
- data/docs/api/models/index.md +77 -0
- data/docs/api/pipeline/extraction.md +175 -0
- data/docs/api/pipeline/index.md +72 -0
- data/docs/api/pipeline/resolution.md +209 -0
- data/docs/api/services/content-service.md +166 -0
- data/docs/api/services/entity-service.md +202 -0
- data/docs/api/services/fact-service.md +223 -0
- data/docs/api/services/index.md +55 -0
- data/docs/architecture/database-schema.md +293 -0
- data/docs/architecture/entity-resolution.md +293 -0
- data/docs/architecture/index.md +149 -0
- data/docs/architecture/temporal-facts.md +268 -0
- data/docs/architecture/three-layer-model.md +242 -0
- data/docs/assets/css/custom.css +137 -0
- data/docs/assets/fact_db.jpg +0 -0
- data/docs/assets/images/fact_db.jpg +0 -0
- data/docs/concepts.md +183 -0
- data/docs/examples/basic-usage.md +235 -0
- data/docs/examples/hr-onboarding.md +312 -0
- data/docs/examples/index.md +64 -0
- data/docs/examples/news-analysis.md +288 -0
- data/docs/getting-started/database-setup.md +170 -0
- data/docs/getting-started/index.md +71 -0
- data/docs/getting-started/installation.md +98 -0
- data/docs/getting-started/quick-start.md +191 -0
- data/docs/guides/batch-processing.md +325 -0
- data/docs/guides/configuration.md +243 -0
- data/docs/guides/entity-management.md +364 -0
- data/docs/guides/extracting-facts.md +299 -0
- data/docs/guides/index.md +22 -0
- data/docs/guides/ingesting-content.md +252 -0
- data/docs/guides/llm-integration.md +299 -0
- data/docs/guides/temporal-queries.md +315 -0
- data/docs/index.md +121 -0
- data/examples/README.md +130 -0
- data/examples/basic_usage.rb +164 -0
- data/examples/entity_management.rb +216 -0
- data/examples/hr_system.rb +428 -0
- data/examples/rule_based_extraction.rb +258 -0
- data/examples/temporal_queries.rb +245 -0
- data/lib/fact_db/config.rb +71 -0
- data/lib/fact_db/database.rb +45 -0
- data/lib/fact_db/errors.rb +10 -0
- data/lib/fact_db/extractors/base.rb +117 -0
- data/lib/fact_db/extractors/llm_extractor.rb +179 -0
- data/lib/fact_db/extractors/manual_extractor.rb +53 -0
- data/lib/fact_db/extractors/rule_based_extractor.rb +228 -0
- data/lib/fact_db/llm/adapter.rb +109 -0
- data/lib/fact_db/models/content.rb +62 -0
- data/lib/fact_db/models/entity.rb +84 -0
- data/lib/fact_db/models/entity_alias.rb +26 -0
- data/lib/fact_db/models/entity_mention.rb +33 -0
- data/lib/fact_db/models/fact.rb +192 -0
- data/lib/fact_db/models/fact_source.rb +35 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +146 -0
- data/lib/fact_db/pipeline/resolution_pipeline.rb +129 -0
- data/lib/fact_db/resolution/entity_resolver.rb +261 -0
- data/lib/fact_db/resolution/fact_resolver.rb +259 -0
- data/lib/fact_db/services/content_service.rb +93 -0
- data/lib/fact_db/services/entity_service.rb +150 -0
- data/lib/fact_db/services/fact_service.rb +193 -0
- data/lib/fact_db/temporal/query.rb +125 -0
- data/lib/fact_db/temporal/timeline.rb +134 -0
- data/lib/fact_db/version.rb +5 -0
- data/lib/fact_db.rb +141 -0
- data/mkdocs.yml +198 -0
- metadata +288 -0
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# HR System Example for FactDb
|
|
5
|
+
#
|
|
6
|
+
# This example demonstrates a practical HR knowledge management system:
|
|
7
|
+
# - Tracking employee information over time
|
|
8
|
+
# - Managing organizational hierarchy
|
|
9
|
+
# - Recording promotions, transfers, and departures
|
|
10
|
+
# - Auditing changes with temporal queries
|
|
11
|
+
# - Detecting conflicts in employee data
|
|
12
|
+
|
|
13
|
+
require "bundler/setup"
|
|
14
|
+
require "fact_db"
|
|
15
|
+
|
|
16
|
+
FactDb.configure do |config|
|
|
17
|
+
config.database_url = ENV.fetch("DATABASE_URL", "postgres://#{ENV['USER']}@localhost/fact_db_demo")
|
|
18
|
+
config.default_extractor = :manual
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Ensure database tables exist
|
|
22
|
+
FactDb::Database.migrate!
|
|
23
|
+
|
|
24
|
+
clock = FactDb.new
|
|
25
|
+
entity_service = clock.entity_service
|
|
26
|
+
fact_service = clock.fact_service
|
|
27
|
+
content_service = clock.content_service
|
|
28
|
+
|
|
29
|
+
puts "=" * 60
|
|
30
|
+
puts "HR Knowledge Management System Demo"
|
|
31
|
+
puts "=" * 60
|
|
32
|
+
|
|
33
|
+
# Section 1: Setup Company Structure
|
|
34
|
+
puts "\n--- Section 1: Setting Up Organization ---\n"
|
|
35
|
+
|
|
36
|
+
# Create the company
|
|
37
|
+
company = entity_service.create(
|
|
38
|
+
"Innovate Corp",
|
|
39
|
+
type: :organization,
|
|
40
|
+
description: "Technology company specializing in AI solutions",
|
|
41
|
+
attributes: { industry: "Technology", founded: "2010" }
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Create departments
|
|
45
|
+
engineering = entity_service.create(
|
|
46
|
+
"Engineering Department",
|
|
47
|
+
type: :organization,
|
|
48
|
+
description: "Software engineering team",
|
|
49
|
+
attributes: { parent: company.id }
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
product = entity_service.create(
|
|
53
|
+
"Product Department",
|
|
54
|
+
type: :organization,
|
|
55
|
+
description: "Product management team",
|
|
56
|
+
attributes: { parent: company.id }
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
hr_dept = entity_service.create(
|
|
60
|
+
"Human Resources",
|
|
61
|
+
type: :organization,
|
|
62
|
+
description: "HR team",
|
|
63
|
+
attributes: { parent: company.id }
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
puts "Created company: #{company.canonical_name}"
|
|
67
|
+
puts "Created departments: #{engineering.canonical_name}, #{product.canonical_name}, #{hr_dept.canonical_name}"
|
|
68
|
+
|
|
69
|
+
# Create locations
|
|
70
|
+
hq = entity_service.create(
|
|
71
|
+
"San Francisco HQ",
|
|
72
|
+
type: :place,
|
|
73
|
+
aliases: ["SF Office", "Headquarters"],
|
|
74
|
+
attributes: { city: "San Francisco", state: "CA" }
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
remote_office = entity_service.create(
|
|
78
|
+
"Austin Office",
|
|
79
|
+
type: :place,
|
|
80
|
+
aliases: ["Austin TX Office"],
|
|
81
|
+
attributes: { city: "Austin", state: "TX" }
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
puts "Created locations: #{hq.canonical_name}, #{remote_office.canonical_name}"
|
|
85
|
+
|
|
86
|
+
# Section 2: Create Employee Profiles
|
|
87
|
+
puts "\n--- Section 2: Creating Employee Profiles ---\n"
|
|
88
|
+
|
|
89
|
+
employees = {}
|
|
90
|
+
|
|
91
|
+
# CEO
|
|
92
|
+
employees[:ceo] = entity_service.create(
|
|
93
|
+
"Katherine Rodriguez",
|
|
94
|
+
type: :person,
|
|
95
|
+
aliases: ["Kate Rodriguez", "K. Rodriguez"],
|
|
96
|
+
attributes: { employee_id: "EMP001", email: "krodriguez@innovatecorp.com" },
|
|
97
|
+
description: "Chief Executive Officer"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# VP Engineering
|
|
101
|
+
employees[:vp_eng] = entity_service.create(
|
|
102
|
+
"Marcus Chen",
|
|
103
|
+
type: :person,
|
|
104
|
+
aliases: ["Marc Chen"],
|
|
105
|
+
attributes: { employee_id: "EMP002", email: "mchen@innovatecorp.com" },
|
|
106
|
+
description: "VP of Engineering"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Senior Engineer
|
|
110
|
+
employees[:senior_eng] = entity_service.create(
|
|
111
|
+
"Priya Sharma",
|
|
112
|
+
type: :person,
|
|
113
|
+
attributes: { employee_id: "EMP003", email: "psharma@innovatecorp.com" },
|
|
114
|
+
description: "Senior Software Engineer"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Junior Engineer (will be promoted)
|
|
118
|
+
employees[:junior_eng] = entity_service.create(
|
|
119
|
+
"Alex Kim",
|
|
120
|
+
type: :person,
|
|
121
|
+
attributes: { employee_id: "EMP004", email: "akim@innovatecorp.com" },
|
|
122
|
+
description: "Software Engineer"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Product Manager
|
|
126
|
+
employees[:pm] = entity_service.create(
|
|
127
|
+
"Jordan Taylor",
|
|
128
|
+
type: :person,
|
|
129
|
+
attributes: { employee_id: "EMP005", email: "jtaylor@innovatecorp.com" },
|
|
130
|
+
description: "Product Manager"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# HR Manager
|
|
134
|
+
employees[:hr_mgr] = entity_service.create(
|
|
135
|
+
"Michelle Brown",
|
|
136
|
+
type: :person,
|
|
137
|
+
attributes: { employee_id: "EMP006", email: "mbrown@innovatecorp.com" },
|
|
138
|
+
description: "HR Manager"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
puts "Created #{employees.length} employee profiles"
|
|
142
|
+
|
|
143
|
+
# Section 3: Record Initial Employment Facts
|
|
144
|
+
puts "\n--- Section 3: Recording Employment History ---\n"
|
|
145
|
+
|
|
146
|
+
# Ingest an onboarding document
|
|
147
|
+
onboarding_doc = content_service.create(
|
|
148
|
+
<<~DOC,
|
|
149
|
+
EMPLOYEE ONBOARDING RECORDS - 2020-2024
|
|
150
|
+
|
|
151
|
+
Katherine Rodriguez - Hired as CEO on January 15, 2020
|
|
152
|
+
Marcus Chen - Hired as Engineering Manager on March 1, 2020
|
|
153
|
+
Priya Sharma - Hired as Software Engineer on June 15, 2021
|
|
154
|
+
Alex Kim - Hired as Junior Developer on September 1, 2023
|
|
155
|
+
Jordan Taylor - Hired as Associate PM on February 1, 2022
|
|
156
|
+
Michelle Brown - Hired as HR Coordinator on April 1, 2021
|
|
157
|
+
DOC
|
|
158
|
+
type: :document,
|
|
159
|
+
title: "Historical Onboarding Records"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# CEO facts
|
|
163
|
+
ceo_employment = fact_service.create(
|
|
164
|
+
"Katherine Rodriguez is CEO of Innovate Corp",
|
|
165
|
+
valid_at: Date.new(2020, 1, 15),
|
|
166
|
+
mentions: [
|
|
167
|
+
{ entity_id: employees[:ceo].id, role: :subject, text: "Katherine Rodriguez" },
|
|
168
|
+
{ entity_id: company.id, role: :object, text: "Innovate Corp" }
|
|
169
|
+
]
|
|
170
|
+
)
|
|
171
|
+
ceo_employment.add_source(content: onboarding_doc, type: :primary)
|
|
172
|
+
|
|
173
|
+
ceo_location = fact_service.create(
|
|
174
|
+
"Katherine Rodriguez works at San Francisco HQ",
|
|
175
|
+
valid_at: Date.new(2020, 1, 15),
|
|
176
|
+
mentions: [
|
|
177
|
+
{ entity_id: employees[:ceo].id, role: :subject, text: "Katherine Rodriguez" },
|
|
178
|
+
{ entity_id: hq.id, role: :location, text: "San Francisco HQ" }
|
|
179
|
+
]
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# VP Engineering - with promotion history
|
|
183
|
+
vp_eng_original = fact_service.create(
|
|
184
|
+
"Marcus Chen is Engineering Manager at Innovate Corp",
|
|
185
|
+
valid_at: Date.new(2020, 3, 1),
|
|
186
|
+
invalid_at: Date.new(2023, 1, 1),
|
|
187
|
+
status: :superseded,
|
|
188
|
+
mentions: [
|
|
189
|
+
{ entity_id: employees[:vp_eng].id, role: :subject, text: "Marcus Chen" },
|
|
190
|
+
{ entity_id: company.id, role: :object, text: "Innovate Corp" }
|
|
191
|
+
]
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
vp_eng_current = fact_service.create(
|
|
195
|
+
"Marcus Chen is VP of Engineering at Innovate Corp",
|
|
196
|
+
valid_at: Date.new(2023, 1, 1),
|
|
197
|
+
mentions: [
|
|
198
|
+
{ entity_id: employees[:vp_eng].id, role: :subject, text: "Marcus Chen" },
|
|
199
|
+
{ entity_id: company.id, role: :object, text: "Innovate Corp" }
|
|
200
|
+
]
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Other employees
|
|
204
|
+
fact_service.create(
|
|
205
|
+
"Priya Sharma is Senior Software Engineer at Innovate Corp",
|
|
206
|
+
valid_at: Date.new(2021, 6, 15),
|
|
207
|
+
mentions: [
|
|
208
|
+
{ entity_id: employees[:senior_eng].id, role: :subject, text: "Priya Sharma" },
|
|
209
|
+
{ entity_id: engineering.id, role: :object, text: "Engineering Department" }
|
|
210
|
+
]
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
fact_service.create(
|
|
214
|
+
"Priya Sharma reports to Marcus Chen",
|
|
215
|
+
valid_at: Date.new(2021, 6, 15),
|
|
216
|
+
mentions: [
|
|
217
|
+
{ entity_id: employees[:senior_eng].id, role: :subject, text: "Priya Sharma" },
|
|
218
|
+
{ entity_id: employees[:vp_eng].id, role: :object, text: "Marcus Chen" }
|
|
219
|
+
]
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
junior_original = fact_service.create(
|
|
223
|
+
"Alex Kim is Junior Developer at Innovate Corp",
|
|
224
|
+
valid_at: Date.new(2023, 9, 1),
|
|
225
|
+
mentions: [
|
|
226
|
+
{ entity_id: employees[:junior_eng].id, role: :subject, text: "Alex Kim" },
|
|
227
|
+
{ entity_id: engineering.id, role: :object, text: "Engineering Department" }
|
|
228
|
+
]
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
fact_service.create(
|
|
232
|
+
"Jordan Taylor is Product Manager at Innovate Corp",
|
|
233
|
+
valid_at: Date.new(2022, 2, 1),
|
|
234
|
+
mentions: [
|
|
235
|
+
{ entity_id: employees[:pm].id, role: :subject, text: "Jordan Taylor" },
|
|
236
|
+
{ entity_id: product.id, role: :object, text: "Product Department" }
|
|
237
|
+
]
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
hr_original = fact_service.create(
|
|
241
|
+
"Michelle Brown is HR Coordinator at Innovate Corp",
|
|
242
|
+
valid_at: Date.new(2021, 4, 1),
|
|
243
|
+
invalid_at: Date.new(2024, 7, 1),
|
|
244
|
+
status: :superseded,
|
|
245
|
+
mentions: [
|
|
246
|
+
{ entity_id: employees[:hr_mgr].id, role: :subject, text: "Michelle Brown" },
|
|
247
|
+
{ entity_id: hr_dept.id, role: :object, text: "Human Resources" }
|
|
248
|
+
]
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
hr_current = fact_service.create(
|
|
252
|
+
"Michelle Brown is HR Manager at Innovate Corp",
|
|
253
|
+
valid_at: Date.new(2024, 7, 1),
|
|
254
|
+
mentions: [
|
|
255
|
+
{ entity_id: employees[:hr_mgr].id, role: :subject, text: "Michelle Brown" },
|
|
256
|
+
{ entity_id: hr_dept.id, role: :object, text: "Human Resources" }
|
|
257
|
+
]
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
puts "Recorded employment history facts"
|
|
261
|
+
|
|
262
|
+
# Section 4: Process a Promotion
|
|
263
|
+
puts "\n--- Section 4: Processing a Promotion ---\n"
|
|
264
|
+
|
|
265
|
+
# Ingest the promotion memo
|
|
266
|
+
promotion_memo = content_service.create(
|
|
267
|
+
<<~MEMO,
|
|
268
|
+
INTERNAL MEMO
|
|
269
|
+
Date: January 8, 2026
|
|
270
|
+
From: Marcus Chen, VP Engineering
|
|
271
|
+
Subject: Promotion Announcement
|
|
272
|
+
|
|
273
|
+
I am pleased to announce that Alex Kim has been promoted to
|
|
274
|
+
Software Engineer, effective January 15, 2026. Alex has demonstrated
|
|
275
|
+
exceptional growth and technical skills during their time as
|
|
276
|
+
Junior Developer.
|
|
277
|
+
|
|
278
|
+
Congratulations Alex!
|
|
279
|
+
MEMO
|
|
280
|
+
type: :document,
|
|
281
|
+
title: "Promotion Memo - Alex Kim"
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# Supersede the old fact
|
|
285
|
+
promoted_fact = fact_service.supersede(
|
|
286
|
+
junior_original.id,
|
|
287
|
+
"Alex Kim is Software Engineer at Innovate Corp",
|
|
288
|
+
valid_at: Date.new(2026, 1, 15),
|
|
289
|
+
mentions: [
|
|
290
|
+
{ entity_id: employees[:junior_eng].id, role: :subject, text: "Alex Kim" },
|
|
291
|
+
{ entity_id: engineering.id, role: :object, text: "Engineering Department" }
|
|
292
|
+
]
|
|
293
|
+
)
|
|
294
|
+
promoted_fact.add_source(content: promotion_memo, type: :primary)
|
|
295
|
+
|
|
296
|
+
puts "Promoted Alex Kim from Junior Developer to Software Engineer"
|
|
297
|
+
puts "Previous fact (#{junior_original.id}) now superseded"
|
|
298
|
+
puts "New fact ID: #{promoted_fact.id}"
|
|
299
|
+
|
|
300
|
+
# Section 5: Record a Transfer
|
|
301
|
+
puts "\n--- Section 5: Recording a Transfer ---\n"
|
|
302
|
+
|
|
303
|
+
# Jordan is transferring to Austin
|
|
304
|
+
transfer_memo = content_service.create(
|
|
305
|
+
<<~MEMO,
|
|
306
|
+
INTERNAL MEMO
|
|
307
|
+
Date: January 10, 2026
|
|
308
|
+
Subject: Transfer Notice - Jordan Taylor
|
|
309
|
+
|
|
310
|
+
Jordan Taylor will be transferring to our Austin office
|
|
311
|
+
effective February 1, 2026. Jordan will continue in the
|
|
312
|
+
Product Manager role but will lead our Texas expansion efforts.
|
|
313
|
+
MEMO
|
|
314
|
+
type: :document,
|
|
315
|
+
title: "Transfer Notice - Jordan Taylor"
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
# Old location fact
|
|
319
|
+
jordan_sf_location = fact_service.create(
|
|
320
|
+
"Jordan Taylor works at San Francisco HQ",
|
|
321
|
+
valid_at: Date.new(2022, 2, 1),
|
|
322
|
+
invalid_at: Date.new(2026, 2, 1),
|
|
323
|
+
status: :superseded,
|
|
324
|
+
mentions: [
|
|
325
|
+
{ entity_id: employees[:pm].id, role: :subject, text: "Jordan Taylor" },
|
|
326
|
+
{ entity_id: hq.id, role: :location, text: "San Francisco HQ" }
|
|
327
|
+
]
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# New location fact
|
|
331
|
+
jordan_austin_location = fact_service.create(
|
|
332
|
+
"Jordan Taylor works at Austin Office",
|
|
333
|
+
valid_at: Date.new(2026, 2, 1),
|
|
334
|
+
mentions: [
|
|
335
|
+
{ entity_id: employees[:pm].id, role: :subject, text: "Jordan Taylor" },
|
|
336
|
+
{ entity_id: remote_office.id, role: :location, text: "Austin Office" }
|
|
337
|
+
]
|
|
338
|
+
)
|
|
339
|
+
jordan_austin_location.add_source(content: transfer_memo, type: :primary)
|
|
340
|
+
|
|
341
|
+
puts "Recorded Jordan Taylor's transfer to Austin Office"
|
|
342
|
+
|
|
343
|
+
# Section 6: Query Employee Information
|
|
344
|
+
puts "\n--- Section 6: HR Queries ---\n"
|
|
345
|
+
|
|
346
|
+
# Current state of all employees
|
|
347
|
+
puts "\nCurrent Employee Status:"
|
|
348
|
+
puts "-" * 50
|
|
349
|
+
|
|
350
|
+
employees.each do |key, employee|
|
|
351
|
+
puts "\n#{employee.canonical_name}:"
|
|
352
|
+
current_facts = fact_service.current_facts(entity: employee.id)
|
|
353
|
+
current_facts.each do |fact|
|
|
354
|
+
puts " - #{fact.fact_text}"
|
|
355
|
+
end
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
# Section 7: Historical Query
|
|
359
|
+
puts "\n--- Section 7: Historical Employee Query ---\n"
|
|
360
|
+
|
|
361
|
+
# What was Alex Kim's role in December 2024?
|
|
362
|
+
puts "\nAlex Kim's facts as of December 2024:"
|
|
363
|
+
past_facts = fact_service.facts_at(Date.new(2024, 12, 1), entity: employees[:junior_eng].id)
|
|
364
|
+
past_facts.each { |f| puts " - #{f.fact_text}" }
|
|
365
|
+
|
|
366
|
+
# What is Alex Kim's role now?
|
|
367
|
+
puts "\nAlex Kim's facts as of today:"
|
|
368
|
+
current_facts = fact_service.facts_at(Date.today, entity: employees[:junior_eng].id)
|
|
369
|
+
current_facts.each { |f| puts " - #{f.fact_text}" }
|
|
370
|
+
|
|
371
|
+
# Section 8: Organization Chart Query
|
|
372
|
+
puts "\n--- Section 8: Organization Chart ---\n"
|
|
373
|
+
|
|
374
|
+
puts "\nReporting relationships:"
|
|
375
|
+
# Find all "reports to" facts
|
|
376
|
+
reporting_facts = fact_service.search("reports to")
|
|
377
|
+
reporting_facts.each { |f| puts " #{f.fact_text}" }
|
|
378
|
+
|
|
379
|
+
puts "\nEngineering Department members:"
|
|
380
|
+
engineering_facts = fact_service.current_facts(entity: engineering.id)
|
|
381
|
+
engineering_facts.each { |f| puts " #{f.fact_text}" }
|
|
382
|
+
|
|
383
|
+
# Section 9: Employee Timeline
|
|
384
|
+
puts "\n--- Section 9: Marcus Chen Career Timeline ---\n"
|
|
385
|
+
|
|
386
|
+
timeline = fact_service.timeline(
|
|
387
|
+
entity_id: employees[:vp_eng].id,
|
|
388
|
+
from: Date.new(2020, 1, 1),
|
|
389
|
+
to: Date.today
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
timeline.each do |entry|
|
|
393
|
+
end_date = entry[:invalid_at]&.strftime("%Y-%m-%d") || "present"
|
|
394
|
+
status_marker = entry[:status] != "canonical" ? " [#{entry[:status]}]" : ""
|
|
395
|
+
puts " #{entry[:valid_at].strftime('%Y-%m-%d')} - #{end_date}: #{entry[:fact_text]}#{status_marker}"
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
# Section 10: Audit Trail
|
|
399
|
+
puts "\n--- Section 10: Audit Trail for Alex Kim ---\n"
|
|
400
|
+
|
|
401
|
+
alex_facts = FactDb::Models::Fact.joins(:entity_mentions)
|
|
402
|
+
.where(entity_mentions: { entity_id: employees[:junior_eng].id })
|
|
403
|
+
.order(:created_at)
|
|
404
|
+
|
|
405
|
+
puts "Complete fact history:"
|
|
406
|
+
alex_facts.each do |fact|
|
|
407
|
+
status_info = fact.status != "canonical" ? " [#{fact.status}]" : ""
|
|
408
|
+
validity = fact.invalid_at ? "#{fact.valid_at} - #{fact.invalid_at}" : "#{fact.valid_at} - present"
|
|
409
|
+
puts " [#{validity}] #{fact.fact_text}#{status_info}"
|
|
410
|
+
|
|
411
|
+
fact.fact_sources.each do |source|
|
|
412
|
+
puts " Source: #{source.content.title} (#{source.source_type})"
|
|
413
|
+
end
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
# Section 11: Statistics
|
|
417
|
+
puts "\n--- Section 11: HR System Statistics ---\n"
|
|
418
|
+
|
|
419
|
+
puts "Total employees tracked: #{entity_service.people.count}"
|
|
420
|
+
puts "Total departments: #{entity_service.organizations.where("description LIKE ?", "%team%").count}"
|
|
421
|
+
puts "Total employment facts: #{fact_service.stats[:total]}"
|
|
422
|
+
puts "Current facts: #{FactDb::Models::Fact.currently_valid.count}"
|
|
423
|
+
puts "Historical facts: #{FactDb::Models::Fact.historical.count}"
|
|
424
|
+
puts "Documents processed: #{content_service.stats[:total]}"
|
|
425
|
+
|
|
426
|
+
puts "\n" + "=" * 60
|
|
427
|
+
puts "HR System Demo Complete!"
|
|
428
|
+
puts "=" * 60
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Rule-Based Extraction Example for FactDb
|
|
5
|
+
#
|
|
6
|
+
# This example demonstrates:
|
|
7
|
+
# - Using the rule-based extractor
|
|
8
|
+
# - Automatic pattern detection for employment, relationships, and locations
|
|
9
|
+
# - Processing extracted facts into the database
|
|
10
|
+
# - Handling extraction results
|
|
11
|
+
|
|
12
|
+
require "bundler/setup"
|
|
13
|
+
require "fact_db"
|
|
14
|
+
|
|
15
|
+
FactDb.configure do |config|
|
|
16
|
+
config.database_url = ENV.fetch("DATABASE_URL", "postgres://#{ENV['USER']}@localhost/fact_db_demo")
|
|
17
|
+
config.default_extractor = :rule_based
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Ensure database tables exist
|
|
21
|
+
FactDb::Database.migrate!
|
|
22
|
+
|
|
23
|
+
clock = FactDb.new
|
|
24
|
+
|
|
25
|
+
puts "=" * 60
|
|
26
|
+
puts "FactDb Rule-Based Extraction Demo"
|
|
27
|
+
puts "=" * 60
|
|
28
|
+
|
|
29
|
+
# Sample documents to process
|
|
30
|
+
documents = [
|
|
31
|
+
{
|
|
32
|
+
title: "Company Announcement",
|
|
33
|
+
type: :document,
|
|
34
|
+
text: <<~TEXT
|
|
35
|
+
FOR IMMEDIATE RELEASE - January 15, 2026
|
|
36
|
+
|
|
37
|
+
Global Tech Solutions Appoints New Leadership Team
|
|
38
|
+
|
|
39
|
+
Global Tech Solutions announced today that Jennifer Martinez has joined
|
|
40
|
+
the company as Chief Technology Officer. Martinez, who previously served
|
|
41
|
+
as VP of Engineering at DataFlow Inc from 2020 to 2025, will lead the
|
|
42
|
+
company's technical strategy.
|
|
43
|
+
|
|
44
|
+
Additionally, Michael Chen has been promoted to Chief Operating Officer,
|
|
45
|
+
effective February 1, 2026. Chen has been with Global Tech Solutions
|
|
46
|
+
since 2018.
|
|
47
|
+
|
|
48
|
+
The company, headquartered in Seattle, Washington, continues to expand
|
|
49
|
+
its presence in the cloud computing market.
|
|
50
|
+
TEXT
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
title: "HR Update Email",
|
|
54
|
+
type: :email,
|
|
55
|
+
text: <<~TEXT
|
|
56
|
+
From: hr@example.com
|
|
57
|
+
Subject: Team Updates - Q1 2026
|
|
58
|
+
Date: January 20, 2026
|
|
59
|
+
|
|
60
|
+
Hi team,
|
|
61
|
+
|
|
62
|
+
Please welcome our new hires:
|
|
63
|
+
- Sarah Williams joined our Marketing team on January 10, 2026
|
|
64
|
+
- David Lee started as Senior Developer on January 15, 2026
|
|
65
|
+
- Emma Thompson works at our London office as Regional Manager
|
|
66
|
+
|
|
67
|
+
Also note that James Wilson left the company on December 31, 2025
|
|
68
|
+
to pursue other opportunities.
|
|
69
|
+
|
|
70
|
+
Recent relocations:
|
|
71
|
+
- Lisa Anderson moved to the Austin office
|
|
72
|
+
- Robert Kim now lives in San Francisco
|
|
73
|
+
|
|
74
|
+
Best regards,
|
|
75
|
+
Human Resources
|
|
76
|
+
TEXT
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
title: "Meeting Notes",
|
|
80
|
+
type: :meeting_notes,
|
|
81
|
+
text: <<~TEXT
|
|
82
|
+
Project Status Meeting - January 22, 2026
|
|
83
|
+
|
|
84
|
+
Attendees: Tom Baker (Project Manager), Anna Kowalski (Lead Developer)
|
|
85
|
+
|
|
86
|
+
Updates:
|
|
87
|
+
- Tom Baker is leading the Alpha project launch scheduled for Q2
|
|
88
|
+
- Anna Kowalski is responsible for the backend infrastructure
|
|
89
|
+
- Partnership discussion: Maria Santos is CEO of TechPartner Corp
|
|
90
|
+
- TechPartner Corp is headquartered in Miami, Florida
|
|
91
|
+
|
|
92
|
+
Action Items:
|
|
93
|
+
- Tom to schedule follow-up with Maria Santos
|
|
94
|
+
- Anna reports to Tom Baker for the Alpha project
|
|
95
|
+
TEXT
|
|
96
|
+
}
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
# Create the rule-based extractor
|
|
100
|
+
extractor = FactDb::Extractors::Base.for(:rule_based)
|
|
101
|
+
|
|
102
|
+
# Section 1: Process Each Document
|
|
103
|
+
puts "\n--- Section 1: Processing Documents ---\n"
|
|
104
|
+
|
|
105
|
+
documents.each_with_index do |doc, index|
|
|
106
|
+
puts "\n#{'=' * 40}"
|
|
107
|
+
puts "Document #{index + 1}: #{doc[:title]}"
|
|
108
|
+
puts "=" * 40
|
|
109
|
+
|
|
110
|
+
# Ingest the content
|
|
111
|
+
content = clock.ingest(
|
|
112
|
+
doc[:text],
|
|
113
|
+
type: doc[:type],
|
|
114
|
+
title: doc[:title],
|
|
115
|
+
captured_at: Time.now
|
|
116
|
+
)
|
|
117
|
+
puts "Ingested content ID: #{content.id}"
|
|
118
|
+
|
|
119
|
+
# Extract facts and entities
|
|
120
|
+
context = { source_id: content.id, captured_at: content.captured_at }
|
|
121
|
+
extracted_facts = extractor.extract(doc[:text], context)
|
|
122
|
+
|
|
123
|
+
puts "\nExtracted #{extracted_facts.length} facts:"
|
|
124
|
+
extracted_facts.each_with_index do |fact, i|
|
|
125
|
+
puts " #{i + 1}. #{fact[:text]}"
|
|
126
|
+
puts " Confidence: #{fact[:confidence]}"
|
|
127
|
+
puts " Valid from: #{fact[:valid_at]}" if fact[:valid_at]
|
|
128
|
+
puts " Valid until: #{fact[:invalid_at]}" if fact[:invalid_at]
|
|
129
|
+
if fact[:mentions]&.any?
|
|
130
|
+
puts " Mentions: #{fact[:mentions].map { |m| "#{m[:name]} (#{m[:role]})" }.join(', ')}"
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Also extract entities
|
|
135
|
+
entities = extractor.extract_entities(doc[:text])
|
|
136
|
+
if entities.any?
|
|
137
|
+
puts "\nExtracted #{entities.length} entities:"
|
|
138
|
+
entities.each do |entity|
|
|
139
|
+
puts " - #{entity[:name]} (#{entity[:type]})"
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Section 2: Save Extracted Facts to Database
|
|
145
|
+
puts "\n\n--- Section 2: Saving Extracted Facts ---\n"
|
|
146
|
+
|
|
147
|
+
entity_service = clock.entity_service
|
|
148
|
+
fact_service = clock.fact_service
|
|
149
|
+
|
|
150
|
+
# Process first document in detail
|
|
151
|
+
sample_doc = documents.first
|
|
152
|
+
content = clock.content_service.search(sample_doc[:title]).first
|
|
153
|
+
context = { source_id: content.id, captured_at: content.captured_at }
|
|
154
|
+
result = extractor.extract(sample_doc[:text], context)
|
|
155
|
+
|
|
156
|
+
result.each do |fact_data|
|
|
157
|
+
# Resolve or create mentioned entities
|
|
158
|
+
mention_records = []
|
|
159
|
+
|
|
160
|
+
fact_data[:mentions]&.each do |mention|
|
|
161
|
+
# Try to resolve the entity, create if not found
|
|
162
|
+
entity = entity_service.resolve_or_create(
|
|
163
|
+
mention[:name],
|
|
164
|
+
type: mention[:type] || :unknown,
|
|
165
|
+
description: "Auto-extracted entity"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
mention_records << {
|
|
169
|
+
entity_id: entity.id,
|
|
170
|
+
role: mention[:role],
|
|
171
|
+
text: mention[:name],
|
|
172
|
+
confidence: mention[:confidence] || fact_data[:confidence]
|
|
173
|
+
}
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Create the fact
|
|
177
|
+
fact = fact_service.create(
|
|
178
|
+
fact_data[:text],
|
|
179
|
+
valid_at: fact_data[:valid_at] || Date.today,
|
|
180
|
+
invalid_at: fact_data[:invalid_at],
|
|
181
|
+
extraction_method: :rule_based,
|
|
182
|
+
confidence: fact_data[:confidence],
|
|
183
|
+
mentions: mention_records
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Link to source content
|
|
187
|
+
fact.add_source(content: content, type: :primary, confidence: fact_data[:confidence])
|
|
188
|
+
|
|
189
|
+
puts "Saved fact: #{fact.fact_text}"
|
|
190
|
+
puts " ID: #{fact.id}, Mentions: #{fact.entity_mentions.count}"
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Section 3: Query the Extracted Data
|
|
194
|
+
puts "\n--- Section 3: Querying Extracted Data ---\n"
|
|
195
|
+
|
|
196
|
+
# Find all extracted entities
|
|
197
|
+
puts "\nAll extracted entities:"
|
|
198
|
+
FactDb::Models::Entity.where(resolution_status: :resolved).order(:canonical_name).each do |entity|
|
|
199
|
+
fact_count = entity.facts.count
|
|
200
|
+
puts " #{entity.canonical_name} (#{entity.entity_type}) - #{fact_count} facts"
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Find facts by extraction method
|
|
204
|
+
puts "\nFacts extracted by rule-based extractor:"
|
|
205
|
+
FactDb::Models::Fact.by_extraction_method(:rule_based).limit(10).each do |fact|
|
|
206
|
+
puts " [#{fact.confidence}] #{fact.fact_text}"
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Section 4: Pattern Examples
|
|
210
|
+
puts "\n--- Section 4: Extraction Pattern Examples ---\n"
|
|
211
|
+
|
|
212
|
+
test_patterns = [
|
|
213
|
+
"John Smith works at Acme Corp as a Senior Engineer.",
|
|
214
|
+
"Mary Johnson joined Microsoft on March 15, 2024.",
|
|
215
|
+
"The CEO of Apple is Tim Cook.",
|
|
216
|
+
"Sarah left Google on December 1, 2025.",
|
|
217
|
+
"Amazon is headquartered in Seattle.",
|
|
218
|
+
"Bob lives in New York City.",
|
|
219
|
+
"Dr. Lisa Chen married James Wong in 2023.",
|
|
220
|
+
]
|
|
221
|
+
|
|
222
|
+
puts "Testing individual patterns:\n"
|
|
223
|
+
test_patterns.each do |pattern|
|
|
224
|
+
result = extractor.extract(pattern, {})
|
|
225
|
+
if result.any?
|
|
226
|
+
puts "Input: \"#{pattern}\""
|
|
227
|
+
result.each do |fact|
|
|
228
|
+
puts " -> #{fact[:text]} (confidence: #{fact[:confidence]})"
|
|
229
|
+
end
|
|
230
|
+
puts
|
|
231
|
+
else
|
|
232
|
+
puts "Input: \"#{pattern}\""
|
|
233
|
+
puts " -> No facts extracted"
|
|
234
|
+
puts
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# Section 5: Statistics
|
|
239
|
+
puts "\n--- Section 5: Extraction Statistics ---\n"
|
|
240
|
+
|
|
241
|
+
content_stats = clock.content_service.stats
|
|
242
|
+
fact_stats = clock.fact_service.stats
|
|
243
|
+
entity_stats = entity_service.stats
|
|
244
|
+
|
|
245
|
+
puts "Content ingested: #{content_stats[:total]}"
|
|
246
|
+
puts "Entities created: #{entity_stats[:total]}"
|
|
247
|
+
puts "Facts extracted: #{fact_stats[:total]}"
|
|
248
|
+
|
|
249
|
+
if fact_stats[:by_extraction_method]
|
|
250
|
+
puts "\nFacts by extraction method:"
|
|
251
|
+
fact_stats[:by_extraction_method].each do |method, count|
|
|
252
|
+
puts " #{method}: #{count}"
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
puts "\n" + "=" * 60
|
|
257
|
+
puts "Rule-Based Extraction Demo Complete!"
|
|
258
|
+
puts "=" * 60
|