fact_db 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.envrc +1 -0
  3. data/CHANGELOG.md +48 -0
  4. data/COMMITS.md +196 -0
  5. data/README.md +102 -0
  6. data/Rakefile +41 -0
  7. data/db/migrate/001_enable_extensions.rb +7 -0
  8. data/db/migrate/002_create_contents.rb +44 -0
  9. data/db/migrate/003_create_entities.rb +36 -0
  10. data/db/migrate/004_create_entity_aliases.rb +18 -0
  11. data/db/migrate/005_create_facts.rb +65 -0
  12. data/db/migrate/006_create_entity_mentions.rb +18 -0
  13. data/db/migrate/007_create_fact_sources.rb +18 -0
  14. data/docs/api/extractors/index.md +71 -0
  15. data/docs/api/extractors/llm.md +162 -0
  16. data/docs/api/extractors/manual.md +92 -0
  17. data/docs/api/extractors/rule-based.md +165 -0
  18. data/docs/api/facts.md +300 -0
  19. data/docs/api/index.md +66 -0
  20. data/docs/api/models/content.md +165 -0
  21. data/docs/api/models/entity.md +202 -0
  22. data/docs/api/models/fact.md +270 -0
  23. data/docs/api/models/index.md +77 -0
  24. data/docs/api/pipeline/extraction.md +175 -0
  25. data/docs/api/pipeline/index.md +72 -0
  26. data/docs/api/pipeline/resolution.md +209 -0
  27. data/docs/api/services/content-service.md +166 -0
  28. data/docs/api/services/entity-service.md +202 -0
  29. data/docs/api/services/fact-service.md +223 -0
  30. data/docs/api/services/index.md +55 -0
  31. data/docs/architecture/database-schema.md +293 -0
  32. data/docs/architecture/entity-resolution.md +293 -0
  33. data/docs/architecture/index.md +149 -0
  34. data/docs/architecture/temporal-facts.md +268 -0
  35. data/docs/architecture/three-layer-model.md +242 -0
  36. data/docs/assets/css/custom.css +137 -0
  37. data/docs/assets/fact_db.jpg +0 -0
  38. data/docs/assets/images/fact_db.jpg +0 -0
  39. data/docs/concepts.md +183 -0
  40. data/docs/examples/basic-usage.md +235 -0
  41. data/docs/examples/hr-onboarding.md +312 -0
  42. data/docs/examples/index.md +64 -0
  43. data/docs/examples/news-analysis.md +288 -0
  44. data/docs/getting-started/database-setup.md +170 -0
  45. data/docs/getting-started/index.md +71 -0
  46. data/docs/getting-started/installation.md +98 -0
  47. data/docs/getting-started/quick-start.md +191 -0
  48. data/docs/guides/batch-processing.md +325 -0
  49. data/docs/guides/configuration.md +243 -0
  50. data/docs/guides/entity-management.md +364 -0
  51. data/docs/guides/extracting-facts.md +299 -0
  52. data/docs/guides/index.md +22 -0
  53. data/docs/guides/ingesting-content.md +252 -0
  54. data/docs/guides/llm-integration.md +299 -0
  55. data/docs/guides/temporal-queries.md +315 -0
  56. data/docs/index.md +121 -0
  57. data/examples/README.md +130 -0
  58. data/examples/basic_usage.rb +164 -0
  59. data/examples/entity_management.rb +216 -0
  60. data/examples/hr_system.rb +428 -0
  61. data/examples/rule_based_extraction.rb +258 -0
  62. data/examples/temporal_queries.rb +245 -0
  63. data/lib/fact_db/config.rb +71 -0
  64. data/lib/fact_db/database.rb +45 -0
  65. data/lib/fact_db/errors.rb +10 -0
  66. data/lib/fact_db/extractors/base.rb +117 -0
  67. data/lib/fact_db/extractors/llm_extractor.rb +179 -0
  68. data/lib/fact_db/extractors/manual_extractor.rb +53 -0
  69. data/lib/fact_db/extractors/rule_based_extractor.rb +228 -0
  70. data/lib/fact_db/llm/adapter.rb +109 -0
  71. data/lib/fact_db/models/content.rb +62 -0
  72. data/lib/fact_db/models/entity.rb +84 -0
  73. data/lib/fact_db/models/entity_alias.rb +26 -0
  74. data/lib/fact_db/models/entity_mention.rb +33 -0
  75. data/lib/fact_db/models/fact.rb +192 -0
  76. data/lib/fact_db/models/fact_source.rb +35 -0
  77. data/lib/fact_db/pipeline/extraction_pipeline.rb +146 -0
  78. data/lib/fact_db/pipeline/resolution_pipeline.rb +129 -0
  79. data/lib/fact_db/resolution/entity_resolver.rb +261 -0
  80. data/lib/fact_db/resolution/fact_resolver.rb +259 -0
  81. data/lib/fact_db/services/content_service.rb +93 -0
  82. data/lib/fact_db/services/entity_service.rb +150 -0
  83. data/lib/fact_db/services/fact_service.rb +193 -0
  84. data/lib/fact_db/temporal/query.rb +125 -0
  85. data/lib/fact_db/temporal/timeline.rb +134 -0
  86. data/lib/fact_db/version.rb +5 -0
  87. data/lib/fact_db.rb +141 -0
  88. data/mkdocs.yml +198 -0
  89. metadata +288 -0
@@ -0,0 +1,428 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # HR System Example for FactDb
5
+ #
6
+ # This example demonstrates a practical HR knowledge management system:
7
+ # - Tracking employee information over time
8
+ # - Managing organizational hierarchy
9
+ # - Recording promotions, transfers, and departures
10
+ # - Auditing changes with temporal queries
11
+ # - Detecting conflicts in employee data
12
+
13
+ require "bundler/setup"
14
+ require "fact_db"
15
+
16
+ FactDb.configure do |config|
17
+ config.database_url = ENV.fetch("DATABASE_URL", "postgres://#{ENV['USER']}@localhost/fact_db_demo")
18
+ config.default_extractor = :manual
19
+ end
20
+
21
+ # Ensure database tables exist
22
+ FactDb::Database.migrate!
23
+
24
+ clock = FactDb.new
25
+ entity_service = clock.entity_service
26
+ fact_service = clock.fact_service
27
+ content_service = clock.content_service
28
+
29
+ puts "=" * 60
30
+ puts "HR Knowledge Management System Demo"
31
+ puts "=" * 60
32
+
33
+ # Section 1: Setup Company Structure
34
+ puts "\n--- Section 1: Setting Up Organization ---\n"
35
+
36
+ # Create the company
37
+ company = entity_service.create(
38
+ "Innovate Corp",
39
+ type: :organization,
40
+ description: "Technology company specializing in AI solutions",
41
+ attributes: { industry: "Technology", founded: "2010" }
42
+ )
43
+
44
+ # Create departments
45
+ engineering = entity_service.create(
46
+ "Engineering Department",
47
+ type: :organization,
48
+ description: "Software engineering team",
49
+ attributes: { parent: company.id }
50
+ )
51
+
52
+ product = entity_service.create(
53
+ "Product Department",
54
+ type: :organization,
55
+ description: "Product management team",
56
+ attributes: { parent: company.id }
57
+ )
58
+
59
+ hr_dept = entity_service.create(
60
+ "Human Resources",
61
+ type: :organization,
62
+ description: "HR team",
63
+ attributes: { parent: company.id }
64
+ )
65
+
66
+ puts "Created company: #{company.canonical_name}"
67
+ puts "Created departments: #{engineering.canonical_name}, #{product.canonical_name}, #{hr_dept.canonical_name}"
68
+
69
+ # Create locations
70
+ hq = entity_service.create(
71
+ "San Francisco HQ",
72
+ type: :place,
73
+ aliases: ["SF Office", "Headquarters"],
74
+ attributes: { city: "San Francisco", state: "CA" }
75
+ )
76
+
77
+ remote_office = entity_service.create(
78
+ "Austin Office",
79
+ type: :place,
80
+ aliases: ["Austin TX Office"],
81
+ attributes: { city: "Austin", state: "TX" }
82
+ )
83
+
84
+ puts "Created locations: #{hq.canonical_name}, #{remote_office.canonical_name}"
85
+
86
+ # Section 2: Create Employee Profiles
87
+ puts "\n--- Section 2: Creating Employee Profiles ---\n"
88
+
89
+ employees = {}
90
+
91
+ # CEO
92
+ employees[:ceo] = entity_service.create(
93
+ "Katherine Rodriguez",
94
+ type: :person,
95
+ aliases: ["Kate Rodriguez", "K. Rodriguez"],
96
+ attributes: { employee_id: "EMP001", email: "krodriguez@innovatecorp.com" },
97
+ description: "Chief Executive Officer"
98
+ )
99
+
100
+ # VP Engineering
101
+ employees[:vp_eng] = entity_service.create(
102
+ "Marcus Chen",
103
+ type: :person,
104
+ aliases: ["Marc Chen"],
105
+ attributes: { employee_id: "EMP002", email: "mchen@innovatecorp.com" },
106
+ description: "VP of Engineering"
107
+ )
108
+
109
+ # Senior Engineer
110
+ employees[:senior_eng] = entity_service.create(
111
+ "Priya Sharma",
112
+ type: :person,
113
+ attributes: { employee_id: "EMP003", email: "psharma@innovatecorp.com" },
114
+ description: "Senior Software Engineer"
115
+ )
116
+
117
+ # Junior Engineer (will be promoted)
118
+ employees[:junior_eng] = entity_service.create(
119
+ "Alex Kim",
120
+ type: :person,
121
+ attributes: { employee_id: "EMP004", email: "akim@innovatecorp.com" },
122
+ description: "Software Engineer"
123
+ )
124
+
125
+ # Product Manager
126
+ employees[:pm] = entity_service.create(
127
+ "Jordan Taylor",
128
+ type: :person,
129
+ attributes: { employee_id: "EMP005", email: "jtaylor@innovatecorp.com" },
130
+ description: "Product Manager"
131
+ )
132
+
133
+ # HR Manager
134
+ employees[:hr_mgr] = entity_service.create(
135
+ "Michelle Brown",
136
+ type: :person,
137
+ attributes: { employee_id: "EMP006", email: "mbrown@innovatecorp.com" },
138
+ description: "HR Manager"
139
+ )
140
+
141
+ puts "Created #{employees.length} employee profiles"
142
+
143
+ # Section 3: Record Initial Employment Facts
144
+ puts "\n--- Section 3: Recording Employment History ---\n"
145
+
146
+ # Ingest an onboarding document
147
+ onboarding_doc = content_service.create(
148
+ <<~DOC,
149
+ EMPLOYEE ONBOARDING RECORDS - 2020-2024
150
+
151
+ Katherine Rodriguez - Hired as CEO on January 15, 2020
152
+ Marcus Chen - Hired as Engineering Manager on March 1, 2020
153
+ Priya Sharma - Hired as Software Engineer on June 15, 2021
154
+ Alex Kim - Hired as Junior Developer on September 1, 2023
155
+ Jordan Taylor - Hired as Associate PM on February 1, 2022
156
+ Michelle Brown - Hired as HR Coordinator on April 1, 2021
157
+ DOC
158
+ type: :document,
159
+ title: "Historical Onboarding Records"
160
+ )
161
+
162
+ # CEO facts
163
+ ceo_employment = fact_service.create(
164
+ "Katherine Rodriguez is CEO of Innovate Corp",
165
+ valid_at: Date.new(2020, 1, 15),
166
+ mentions: [
167
+ { entity_id: employees[:ceo].id, role: :subject, text: "Katherine Rodriguez" },
168
+ { entity_id: company.id, role: :object, text: "Innovate Corp" }
169
+ ]
170
+ )
171
+ ceo_employment.add_source(content: onboarding_doc, type: :primary)
172
+
173
+ ceo_location = fact_service.create(
174
+ "Katherine Rodriguez works at San Francisco HQ",
175
+ valid_at: Date.new(2020, 1, 15),
176
+ mentions: [
177
+ { entity_id: employees[:ceo].id, role: :subject, text: "Katherine Rodriguez" },
178
+ { entity_id: hq.id, role: :location, text: "San Francisco HQ" }
179
+ ]
180
+ )
181
+
182
+ # VP Engineering - with promotion history
183
+ vp_eng_original = fact_service.create(
184
+ "Marcus Chen is Engineering Manager at Innovate Corp",
185
+ valid_at: Date.new(2020, 3, 1),
186
+ invalid_at: Date.new(2023, 1, 1),
187
+ status: :superseded,
188
+ mentions: [
189
+ { entity_id: employees[:vp_eng].id, role: :subject, text: "Marcus Chen" },
190
+ { entity_id: company.id, role: :object, text: "Innovate Corp" }
191
+ ]
192
+ )
193
+
194
+ vp_eng_current = fact_service.create(
195
+ "Marcus Chen is VP of Engineering at Innovate Corp",
196
+ valid_at: Date.new(2023, 1, 1),
197
+ mentions: [
198
+ { entity_id: employees[:vp_eng].id, role: :subject, text: "Marcus Chen" },
199
+ { entity_id: company.id, role: :object, text: "Innovate Corp" }
200
+ ]
201
+ )
202
+
203
+ # Other employees
204
+ fact_service.create(
205
+ "Priya Sharma is Senior Software Engineer at Innovate Corp",
206
+ valid_at: Date.new(2021, 6, 15),
207
+ mentions: [
208
+ { entity_id: employees[:senior_eng].id, role: :subject, text: "Priya Sharma" },
209
+ { entity_id: engineering.id, role: :object, text: "Engineering Department" }
210
+ ]
211
+ )
212
+
213
+ fact_service.create(
214
+ "Priya Sharma reports to Marcus Chen",
215
+ valid_at: Date.new(2021, 6, 15),
216
+ mentions: [
217
+ { entity_id: employees[:senior_eng].id, role: :subject, text: "Priya Sharma" },
218
+ { entity_id: employees[:vp_eng].id, role: :object, text: "Marcus Chen" }
219
+ ]
220
+ )
221
+
222
+ junior_original = fact_service.create(
223
+ "Alex Kim is Junior Developer at Innovate Corp",
224
+ valid_at: Date.new(2023, 9, 1),
225
+ mentions: [
226
+ { entity_id: employees[:junior_eng].id, role: :subject, text: "Alex Kim" },
227
+ { entity_id: engineering.id, role: :object, text: "Engineering Department" }
228
+ ]
229
+ )
230
+
231
+ fact_service.create(
232
+ "Jordan Taylor is Product Manager at Innovate Corp",
233
+ valid_at: Date.new(2022, 2, 1),
234
+ mentions: [
235
+ { entity_id: employees[:pm].id, role: :subject, text: "Jordan Taylor" },
236
+ { entity_id: product.id, role: :object, text: "Product Department" }
237
+ ]
238
+ )
239
+
240
+ hr_original = fact_service.create(
241
+ "Michelle Brown is HR Coordinator at Innovate Corp",
242
+ valid_at: Date.new(2021, 4, 1),
243
+ invalid_at: Date.new(2024, 7, 1),
244
+ status: :superseded,
245
+ mentions: [
246
+ { entity_id: employees[:hr_mgr].id, role: :subject, text: "Michelle Brown" },
247
+ { entity_id: hr_dept.id, role: :object, text: "Human Resources" }
248
+ ]
249
+ )
250
+
251
+ hr_current = fact_service.create(
252
+ "Michelle Brown is HR Manager at Innovate Corp",
253
+ valid_at: Date.new(2024, 7, 1),
254
+ mentions: [
255
+ { entity_id: employees[:hr_mgr].id, role: :subject, text: "Michelle Brown" },
256
+ { entity_id: hr_dept.id, role: :object, text: "Human Resources" }
257
+ ]
258
+ )
259
+
260
+ puts "Recorded employment history facts"
261
+
262
+ # Section 4: Process a Promotion
263
+ puts "\n--- Section 4: Processing a Promotion ---\n"
264
+
265
+ # Ingest the promotion memo
266
+ promotion_memo = content_service.create(
267
+ <<~MEMO,
268
+ INTERNAL MEMO
269
+ Date: January 8, 2026
270
+ From: Marcus Chen, VP Engineering
271
+ Subject: Promotion Announcement
272
+
273
+ I am pleased to announce that Alex Kim has been promoted to
274
+ Software Engineer, effective January 15, 2026. Alex has demonstrated
275
+ exceptional growth and technical skills during their time as
276
+ Junior Developer.
277
+
278
+ Congratulations Alex!
279
+ MEMO
280
+ type: :document,
281
+ title: "Promotion Memo - Alex Kim"
282
+ )
283
+
284
+ # Supersede the old fact
285
+ promoted_fact = fact_service.supersede(
286
+ junior_original.id,
287
+ "Alex Kim is Software Engineer at Innovate Corp",
288
+ valid_at: Date.new(2026, 1, 15),
289
+ mentions: [
290
+ { entity_id: employees[:junior_eng].id, role: :subject, text: "Alex Kim" },
291
+ { entity_id: engineering.id, role: :object, text: "Engineering Department" }
292
+ ]
293
+ )
294
+ promoted_fact.add_source(content: promotion_memo, type: :primary)
295
+
296
+ puts "Promoted Alex Kim from Junior Developer to Software Engineer"
297
+ puts "Previous fact (#{junior_original.id}) now superseded"
298
+ puts "New fact ID: #{promoted_fact.id}"
299
+
300
+ # Section 5: Record a Transfer
301
+ puts "\n--- Section 5: Recording a Transfer ---\n"
302
+
303
+ # Jordan is transferring to Austin
304
+ transfer_memo = content_service.create(
305
+ <<~MEMO,
306
+ INTERNAL MEMO
307
+ Date: January 10, 2026
308
+ Subject: Transfer Notice - Jordan Taylor
309
+
310
+ Jordan Taylor will be transferring to our Austin office
311
+ effective February 1, 2026. Jordan will continue in the
312
+ Product Manager role but will lead our Texas expansion efforts.
313
+ MEMO
314
+ type: :document,
315
+ title: "Transfer Notice - Jordan Taylor"
316
+ )
317
+
318
+ # Old location fact
319
+ jordan_sf_location = fact_service.create(
320
+ "Jordan Taylor works at San Francisco HQ",
321
+ valid_at: Date.new(2022, 2, 1),
322
+ invalid_at: Date.new(2026, 2, 1),
323
+ status: :superseded,
324
+ mentions: [
325
+ { entity_id: employees[:pm].id, role: :subject, text: "Jordan Taylor" },
326
+ { entity_id: hq.id, role: :location, text: "San Francisco HQ" }
327
+ ]
328
+ )
329
+
330
+ # New location fact
331
+ jordan_austin_location = fact_service.create(
332
+ "Jordan Taylor works at Austin Office",
333
+ valid_at: Date.new(2026, 2, 1),
334
+ mentions: [
335
+ { entity_id: employees[:pm].id, role: :subject, text: "Jordan Taylor" },
336
+ { entity_id: remote_office.id, role: :location, text: "Austin Office" }
337
+ ]
338
+ )
339
+ jordan_austin_location.add_source(content: transfer_memo, type: :primary)
340
+
341
+ puts "Recorded Jordan Taylor's transfer to Austin Office"
342
+
343
+ # Section 6: Query Employee Information
344
+ puts "\n--- Section 6: HR Queries ---\n"
345
+
346
+ # Current state of all employees
347
+ puts "\nCurrent Employee Status:"
348
+ puts "-" * 50
349
+
350
+ employees.each do |key, employee|
351
+ puts "\n#{employee.canonical_name}:"
352
+ current_facts = fact_service.current_facts(entity: employee.id)
353
+ current_facts.each do |fact|
354
+ puts " - #{fact.fact_text}"
355
+ end
356
+ end
357
+
358
+ # Section 7: Historical Query
359
+ puts "\n--- Section 7: Historical Employee Query ---\n"
360
+
361
+ # What was Alex Kim's role in December 2024?
362
+ puts "\nAlex Kim's facts as of December 2024:"
363
+ past_facts = fact_service.facts_at(Date.new(2024, 12, 1), entity: employees[:junior_eng].id)
364
+ past_facts.each { |f| puts " - #{f.fact_text}" }
365
+
366
+ # What is Alex Kim's role now?
367
+ puts "\nAlex Kim's facts as of today:"
368
+ current_facts = fact_service.facts_at(Date.today, entity: employees[:junior_eng].id)
369
+ current_facts.each { |f| puts " - #{f.fact_text}" }
370
+
371
+ # Section 8: Organization Chart Query
372
+ puts "\n--- Section 8: Organization Chart ---\n"
373
+
374
+ puts "\nReporting relationships:"
375
+ # Find all "reports to" facts
376
+ reporting_facts = fact_service.search("reports to")
377
+ reporting_facts.each { |f| puts " #{f.fact_text}" }
378
+
379
+ puts "\nEngineering Department members:"
380
+ engineering_facts = fact_service.current_facts(entity: engineering.id)
381
+ engineering_facts.each { |f| puts " #{f.fact_text}" }
382
+
383
+ # Section 9: Employee Timeline
384
+ puts "\n--- Section 9: Marcus Chen Career Timeline ---\n"
385
+
386
+ timeline = fact_service.timeline(
387
+ entity_id: employees[:vp_eng].id,
388
+ from: Date.new(2020, 1, 1),
389
+ to: Date.today
390
+ )
391
+
392
+ timeline.each do |entry|
393
+ end_date = entry[:invalid_at]&.strftime("%Y-%m-%d") || "present"
394
+ status_marker = entry[:status] != "canonical" ? " [#{entry[:status]}]" : ""
395
+ puts " #{entry[:valid_at].strftime('%Y-%m-%d')} - #{end_date}: #{entry[:fact_text]}#{status_marker}"
396
+ end
397
+
398
+ # Section 10: Audit Trail
399
+ puts "\n--- Section 10: Audit Trail for Alex Kim ---\n"
400
+
401
+ alex_facts = FactDb::Models::Fact.joins(:entity_mentions)
402
+ .where(entity_mentions: { entity_id: employees[:junior_eng].id })
403
+ .order(:created_at)
404
+
405
+ puts "Complete fact history:"
406
+ alex_facts.each do |fact|
407
+ status_info = fact.status != "canonical" ? " [#{fact.status}]" : ""
408
+ validity = fact.invalid_at ? "#{fact.valid_at} - #{fact.invalid_at}" : "#{fact.valid_at} - present"
409
+ puts " [#{validity}] #{fact.fact_text}#{status_info}"
410
+
411
+ fact.fact_sources.each do |source|
412
+ puts " Source: #{source.content.title} (#{source.source_type})"
413
+ end
414
+ end
415
+
416
+ # Section 11: Statistics
417
+ puts "\n--- Section 11: HR System Statistics ---\n"
418
+
419
+ puts "Total employees tracked: #{entity_service.people.count}"
420
+ puts "Total departments: #{entity_service.organizations.where("description LIKE ?", "%team%").count}"
421
+ puts "Total employment facts: #{fact_service.stats[:total]}"
422
+ puts "Current facts: #{FactDb::Models::Fact.currently_valid.count}"
423
+ puts "Historical facts: #{FactDb::Models::Fact.historical.count}"
424
+ puts "Documents processed: #{content_service.stats[:total]}"
425
+
426
+ puts "\n" + "=" * 60
427
+ puts "HR System Demo Complete!"
428
+ puts "=" * 60
@@ -0,0 +1,258 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Rule-Based Extraction Example for FactDb
5
+ #
6
+ # This example demonstrates:
7
+ # - Using the rule-based extractor
8
+ # - Automatic pattern detection for employment, relationships, and locations
9
+ # - Processing extracted facts into the database
10
+ # - Handling extraction results
11
+
12
+ require "bundler/setup"
13
+ require "fact_db"
14
+
15
+ FactDb.configure do |config|
16
+ config.database_url = ENV.fetch("DATABASE_URL", "postgres://#{ENV['USER']}@localhost/fact_db_demo")
17
+ config.default_extractor = :rule_based
18
+ end
19
+
20
+ # Ensure database tables exist
21
+ FactDb::Database.migrate!
22
+
23
+ clock = FactDb.new
24
+
25
+ puts "=" * 60
26
+ puts "FactDb Rule-Based Extraction Demo"
27
+ puts "=" * 60
28
+
29
+ # Sample documents to process
30
+ documents = [
31
+ {
32
+ title: "Company Announcement",
33
+ type: :document,
34
+ text: <<~TEXT
35
+ FOR IMMEDIATE RELEASE - January 15, 2026
36
+
37
+ Global Tech Solutions Appoints New Leadership Team
38
+
39
+ Global Tech Solutions announced today that Jennifer Martinez has joined
40
+ the company as Chief Technology Officer. Martinez, who previously served
41
+ as VP of Engineering at DataFlow Inc from 2020 to 2025, will lead the
42
+ company's technical strategy.
43
+
44
+ Additionally, Michael Chen has been promoted to Chief Operating Officer,
45
+ effective February 1, 2026. Chen has been with Global Tech Solutions
46
+ since 2018.
47
+
48
+ The company, headquartered in Seattle, Washington, continues to expand
49
+ its presence in the cloud computing market.
50
+ TEXT
51
+ },
52
+ {
53
+ title: "HR Update Email",
54
+ type: :email,
55
+ text: <<~TEXT
56
+ From: hr@example.com
57
+ Subject: Team Updates - Q1 2026
58
+ Date: January 20, 2026
59
+
60
+ Hi team,
61
+
62
+ Please welcome our new hires:
63
+ - Sarah Williams joined our Marketing team on January 10, 2026
64
+ - David Lee started as Senior Developer on January 15, 2026
65
+ - Emma Thompson works at our London office as Regional Manager
66
+
67
+ Also note that James Wilson left the company on December 31, 2025
68
+ to pursue other opportunities.
69
+
70
+ Recent relocations:
71
+ - Lisa Anderson moved to the Austin office
72
+ - Robert Kim now lives in San Francisco
73
+
74
+ Best regards,
75
+ Human Resources
76
+ TEXT
77
+ },
78
+ {
79
+ title: "Meeting Notes",
80
+ type: :meeting_notes,
81
+ text: <<~TEXT
82
+ Project Status Meeting - January 22, 2026
83
+
84
+ Attendees: Tom Baker (Project Manager), Anna Kowalski (Lead Developer)
85
+
86
+ Updates:
87
+ - Tom Baker is leading the Alpha project launch scheduled for Q2
88
+ - Anna Kowalski is responsible for the backend infrastructure
89
+ - Partnership discussion: Maria Santos is CEO of TechPartner Corp
90
+ - TechPartner Corp is headquartered in Miami, Florida
91
+
92
+ Action Items:
93
+ - Tom to schedule follow-up with Maria Santos
94
+ - Anna reports to Tom Baker for the Alpha project
95
+ TEXT
96
+ }
97
+ ]
98
+
99
+ # Create the rule-based extractor
100
+ extractor = FactDb::Extractors::Base.for(:rule_based)
101
+
102
+ # Section 1: Process Each Document
103
+ puts "\n--- Section 1: Processing Documents ---\n"
104
+
105
+ documents.each_with_index do |doc, index|
106
+ puts "\n#{'=' * 40}"
107
+ puts "Document #{index + 1}: #{doc[:title]}"
108
+ puts "=" * 40
109
+
110
+ # Ingest the content
111
+ content = clock.ingest(
112
+ doc[:text],
113
+ type: doc[:type],
114
+ title: doc[:title],
115
+ captured_at: Time.now
116
+ )
117
+ puts "Ingested content ID: #{content.id}"
118
+
119
+ # Extract facts and entities
120
+ context = { source_id: content.id, captured_at: content.captured_at }
121
+ extracted_facts = extractor.extract(doc[:text], context)
122
+
123
+ puts "\nExtracted #{extracted_facts.length} facts:"
124
+ extracted_facts.each_with_index do |fact, i|
125
+ puts " #{i + 1}. #{fact[:text]}"
126
+ puts " Confidence: #{fact[:confidence]}"
127
+ puts " Valid from: #{fact[:valid_at]}" if fact[:valid_at]
128
+ puts " Valid until: #{fact[:invalid_at]}" if fact[:invalid_at]
129
+ if fact[:mentions]&.any?
130
+ puts " Mentions: #{fact[:mentions].map { |m| "#{m[:name]} (#{m[:role]})" }.join(', ')}"
131
+ end
132
+ end
133
+
134
+ # Also extract entities
135
+ entities = extractor.extract_entities(doc[:text])
136
+ if entities.any?
137
+ puts "\nExtracted #{entities.length} entities:"
138
+ entities.each do |entity|
139
+ puts " - #{entity[:name]} (#{entity[:type]})"
140
+ end
141
+ end
142
+ end
143
+
144
+ # Section 2: Save Extracted Facts to Database
145
+ puts "\n\n--- Section 2: Saving Extracted Facts ---\n"
146
+
147
+ entity_service = clock.entity_service
148
+ fact_service = clock.fact_service
149
+
150
+ # Process first document in detail
151
+ sample_doc = documents.first
152
+ content = clock.content_service.search(sample_doc[:title]).first
153
+ context = { source_id: content.id, captured_at: content.captured_at }
154
+ result = extractor.extract(sample_doc[:text], context)
155
+
156
+ result.each do |fact_data|
157
+ # Resolve or create mentioned entities
158
+ mention_records = []
159
+
160
+ fact_data[:mentions]&.each do |mention|
161
+ # Try to resolve the entity, create if not found
162
+ entity = entity_service.resolve_or_create(
163
+ mention[:name],
164
+ type: mention[:type] || :unknown,
165
+ description: "Auto-extracted entity"
166
+ )
167
+
168
+ mention_records << {
169
+ entity_id: entity.id,
170
+ role: mention[:role],
171
+ text: mention[:name],
172
+ confidence: mention[:confidence] || fact_data[:confidence]
173
+ }
174
+ end
175
+
176
+ # Create the fact
177
+ fact = fact_service.create(
178
+ fact_data[:text],
179
+ valid_at: fact_data[:valid_at] || Date.today,
180
+ invalid_at: fact_data[:invalid_at],
181
+ extraction_method: :rule_based,
182
+ confidence: fact_data[:confidence],
183
+ mentions: mention_records
184
+ )
185
+
186
+ # Link to source content
187
+ fact.add_source(content: content, type: :primary, confidence: fact_data[:confidence])
188
+
189
+ puts "Saved fact: #{fact.fact_text}"
190
+ puts " ID: #{fact.id}, Mentions: #{fact.entity_mentions.count}"
191
+ end
192
+
193
+ # Section 3: Query the Extracted Data
194
+ puts "\n--- Section 3: Querying Extracted Data ---\n"
195
+
196
+ # Find all extracted entities
197
+ puts "\nAll extracted entities:"
198
+ FactDb::Models::Entity.where(resolution_status: :resolved).order(:canonical_name).each do |entity|
199
+ fact_count = entity.facts.count
200
+ puts " #{entity.canonical_name} (#{entity.entity_type}) - #{fact_count} facts"
201
+ end
202
+
203
+ # Find facts by extraction method
204
+ puts "\nFacts extracted by rule-based extractor:"
205
+ FactDb::Models::Fact.by_extraction_method(:rule_based).limit(10).each do |fact|
206
+ puts " [#{fact.confidence}] #{fact.fact_text}"
207
+ end
208
+
209
+ # Section 4: Pattern Examples
210
+ puts "\n--- Section 4: Extraction Pattern Examples ---\n"
211
+
212
+ test_patterns = [
213
+ "John Smith works at Acme Corp as a Senior Engineer.",
214
+ "Mary Johnson joined Microsoft on March 15, 2024.",
215
+ "The CEO of Apple is Tim Cook.",
216
+ "Sarah left Google on December 1, 2025.",
217
+ "Amazon is headquartered in Seattle.",
218
+ "Bob lives in New York City.",
219
+ "Dr. Lisa Chen married James Wong in 2023.",
220
+ ]
221
+
222
+ puts "Testing individual patterns:\n"
223
+ test_patterns.each do |pattern|
224
+ result = extractor.extract(pattern, {})
225
+ if result.any?
226
+ puts "Input: \"#{pattern}\""
227
+ result.each do |fact|
228
+ puts " -> #{fact[:text]} (confidence: #{fact[:confidence]})"
229
+ end
230
+ puts
231
+ else
232
+ puts "Input: \"#{pattern}\""
233
+ puts " -> No facts extracted"
234
+ puts
235
+ end
236
+ end
237
+
238
+ # Section 5: Statistics
239
+ puts "\n--- Section 5: Extraction Statistics ---\n"
240
+
241
+ content_stats = clock.content_service.stats
242
+ fact_stats = clock.fact_service.stats
243
+ entity_stats = entity_service.stats
244
+
245
+ puts "Content ingested: #{content_stats[:total]}"
246
+ puts "Entities created: #{entity_stats[:total]}"
247
+ puts "Facts extracted: #{fact_stats[:total]}"
248
+
249
+ if fact_stats[:by_extraction_method]
250
+ puts "\nFacts by extraction method:"
251
+ fact_stats[:by_extraction_method].each do |method, count|
252
+ puts " #{method}: #{count}"
253
+ end
254
+ end
255
+
256
+ puts "\n" + "=" * 60
257
+ puts "Rule-Based Extraction Demo Complete!"
258
+ puts "=" * 60