fact_db 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.envrc +1 -0
  3. data/CHANGELOG.md +48 -0
  4. data/COMMITS.md +196 -0
  5. data/README.md +102 -0
  6. data/Rakefile +41 -0
  7. data/db/migrate/001_enable_extensions.rb +7 -0
  8. data/db/migrate/002_create_contents.rb +44 -0
  9. data/db/migrate/003_create_entities.rb +36 -0
  10. data/db/migrate/004_create_entity_aliases.rb +18 -0
  11. data/db/migrate/005_create_facts.rb +65 -0
  12. data/db/migrate/006_create_entity_mentions.rb +18 -0
  13. data/db/migrate/007_create_fact_sources.rb +18 -0
  14. data/docs/api/extractors/index.md +71 -0
  15. data/docs/api/extractors/llm.md +162 -0
  16. data/docs/api/extractors/manual.md +92 -0
  17. data/docs/api/extractors/rule-based.md +165 -0
  18. data/docs/api/facts.md +300 -0
  19. data/docs/api/index.md +66 -0
  20. data/docs/api/models/content.md +165 -0
  21. data/docs/api/models/entity.md +202 -0
  22. data/docs/api/models/fact.md +270 -0
  23. data/docs/api/models/index.md +77 -0
  24. data/docs/api/pipeline/extraction.md +175 -0
  25. data/docs/api/pipeline/index.md +72 -0
  26. data/docs/api/pipeline/resolution.md +209 -0
  27. data/docs/api/services/content-service.md +166 -0
  28. data/docs/api/services/entity-service.md +202 -0
  29. data/docs/api/services/fact-service.md +223 -0
  30. data/docs/api/services/index.md +55 -0
  31. data/docs/architecture/database-schema.md +293 -0
  32. data/docs/architecture/entity-resolution.md +293 -0
  33. data/docs/architecture/index.md +149 -0
  34. data/docs/architecture/temporal-facts.md +268 -0
  35. data/docs/architecture/three-layer-model.md +242 -0
  36. data/docs/assets/css/custom.css +137 -0
  37. data/docs/assets/fact_db.jpg +0 -0
  38. data/docs/assets/images/fact_db.jpg +0 -0
  39. data/docs/concepts.md +183 -0
  40. data/docs/examples/basic-usage.md +235 -0
  41. data/docs/examples/hr-onboarding.md +312 -0
  42. data/docs/examples/index.md +64 -0
  43. data/docs/examples/news-analysis.md +288 -0
  44. data/docs/getting-started/database-setup.md +170 -0
  45. data/docs/getting-started/index.md +71 -0
  46. data/docs/getting-started/installation.md +98 -0
  47. data/docs/getting-started/quick-start.md +191 -0
  48. data/docs/guides/batch-processing.md +325 -0
  49. data/docs/guides/configuration.md +243 -0
  50. data/docs/guides/entity-management.md +364 -0
  51. data/docs/guides/extracting-facts.md +299 -0
  52. data/docs/guides/index.md +22 -0
  53. data/docs/guides/ingesting-content.md +252 -0
  54. data/docs/guides/llm-integration.md +299 -0
  55. data/docs/guides/temporal-queries.md +315 -0
  56. data/docs/index.md +121 -0
  57. data/examples/README.md +130 -0
  58. data/examples/basic_usage.rb +164 -0
  59. data/examples/entity_management.rb +216 -0
  60. data/examples/hr_system.rb +428 -0
  61. data/examples/rule_based_extraction.rb +258 -0
  62. data/examples/temporal_queries.rb +245 -0
  63. data/lib/fact_db/config.rb +71 -0
  64. data/lib/fact_db/database.rb +45 -0
  65. data/lib/fact_db/errors.rb +10 -0
  66. data/lib/fact_db/extractors/base.rb +117 -0
  67. data/lib/fact_db/extractors/llm_extractor.rb +179 -0
  68. data/lib/fact_db/extractors/manual_extractor.rb +53 -0
  69. data/lib/fact_db/extractors/rule_based_extractor.rb +228 -0
  70. data/lib/fact_db/llm/adapter.rb +109 -0
  71. data/lib/fact_db/models/content.rb +62 -0
  72. data/lib/fact_db/models/entity.rb +84 -0
  73. data/lib/fact_db/models/entity_alias.rb +26 -0
  74. data/lib/fact_db/models/entity_mention.rb +33 -0
  75. data/lib/fact_db/models/fact.rb +192 -0
  76. data/lib/fact_db/models/fact_source.rb +35 -0
  77. data/lib/fact_db/pipeline/extraction_pipeline.rb +146 -0
  78. data/lib/fact_db/pipeline/resolution_pipeline.rb +129 -0
  79. data/lib/fact_db/resolution/entity_resolver.rb +261 -0
  80. data/lib/fact_db/resolution/fact_resolver.rb +259 -0
  81. data/lib/fact_db/services/content_service.rb +93 -0
  82. data/lib/fact_db/services/entity_service.rb +150 -0
  83. data/lib/fact_db/services/fact_service.rb +193 -0
  84. data/lib/fact_db/temporal/query.rb +125 -0
  85. data/lib/fact_db/temporal/timeline.rb +134 -0
  86. data/lib/fact_db/version.rb +5 -0
  87. data/lib/fact_db.rb +141 -0
  88. data/mkdocs.yml +198 -0
  89. metadata +288 -0
@@ -0,0 +1,293 @@
1
+ # Database Schema
2
+
3
+ FactDb uses PostgreSQL with the pgvector extension for semantic search capabilities.
4
+
5
+ ## Entity Relationship Diagram
6
+
7
+ ```mermaid
8
+ erDiagram
9
+ contents ||--o{ fact_sources : "sourced by"
10
+ entities ||--o{ entity_aliases : "has"
11
+ entities ||--o{ entity_mentions : "mentioned in"
12
+ facts ||--o{ entity_mentions : "mentions"
13
+ facts ||--o{ fact_sources : "sourced from"
14
+ facts ||--o| facts : "superseded by"
15
+
16
+ contents {
17
+ bigint id PK
18
+ string content_hash UK
19
+ string content_type
20
+ text raw_text
21
+ string title
22
+ string source_uri
23
+ jsonb source_metadata
24
+ vector embedding
25
+ timestamptz captured_at
26
+ timestamptz created_at
27
+ }
28
+
29
+ entities {
30
+ bigint id PK
31
+ string canonical_name
32
+ string entity_type
33
+ string resolution_status
34
+ bigint merged_into_id FK
35
+ jsonb metadata
36
+ vector embedding
37
+ timestamptz created_at
38
+ }
39
+
40
+ entity_aliases {
41
+ bigint id PK
42
+ bigint entity_id FK
43
+ string alias_text
44
+ string alias_type
45
+ float confidence
46
+ }
47
+
48
+ facts {
49
+ bigint id PK
50
+ text fact_text
51
+ string fact_hash
52
+ timestamptz valid_at
53
+ timestamptz invalid_at
54
+ string status
55
+ bigint superseded_by_id FK
56
+ bigint[] derived_from_ids
57
+ bigint[] corroborated_by_ids
58
+ float confidence
59
+ string extraction_method
60
+ jsonb metadata
61
+ vector embedding
62
+ timestamptz created_at
63
+ }
64
+
65
+ entity_mentions {
66
+ bigint id PK
67
+ bigint fact_id FK
68
+ bigint entity_id FK
69
+ string mention_text
70
+ string mention_role
71
+ float confidence
72
+ }
73
+
74
+ fact_sources {
75
+ bigint id PK
76
+ bigint fact_id FK
77
+ bigint content_id FK
78
+ string source_type
79
+ text excerpt
80
+ float confidence
81
+ }
82
+ ```
83
+
84
+ ## Tables
85
+
86
+ ### contents
87
+
88
+ Stores immutable source documents.
89
+
90
+ ```sql
91
+ CREATE TABLE contents (
92
+ id BIGSERIAL PRIMARY KEY,
93
+ content_hash VARCHAR(64) NOT NULL UNIQUE,
94
+ content_type VARCHAR(50) NOT NULL,
95
+ raw_text TEXT NOT NULL,
96
+ title VARCHAR(255),
97
+ source_uri TEXT,
98
+ source_metadata JSONB NOT NULL DEFAULT '{}',
99
+ embedding VECTOR(1536),
100
+ captured_at TIMESTAMPTZ NOT NULL,
101
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
102
+ );
103
+
104
+ CREATE INDEX idx_contents_type ON contents(content_type);
105
+ CREATE INDEX idx_contents_captured ON contents(captured_at);
106
+ CREATE INDEX idx_contents_text ON contents USING gin(to_tsvector('english', raw_text));
107
+ CREATE INDEX idx_contents_embedding ON contents USING hnsw(embedding vector_cosine_ops);
108
+ ```
109
+
110
+ ### entities
111
+
112
+ Stores resolved identities.
113
+
114
+ ```sql
115
+ CREATE TABLE entities (
116
+ id BIGSERIAL PRIMARY KEY,
117
+ canonical_name VARCHAR(255) NOT NULL,
118
+ entity_type VARCHAR(50) NOT NULL,
119
+ resolution_status VARCHAR(20) NOT NULL DEFAULT 'unresolved',
120
+ merged_into_id BIGINT REFERENCES entities(id),
121
+ metadata JSONB NOT NULL DEFAULT '{}',
122
+ embedding VECTOR(1536),
123
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
124
+ );
125
+
126
+ CREATE INDEX idx_entities_name ON entities(canonical_name);
127
+ CREATE INDEX idx_entities_type ON entities(entity_type);
128
+ CREATE INDEX idx_entities_status ON entities(resolution_status);
129
+ CREATE INDEX idx_entities_embedding ON entities USING hnsw(embedding vector_cosine_ops);
130
+ ```
131
+
132
+ ### entity_aliases
133
+
134
+ Stores alternative names for entities.
135
+
136
+ ```sql
137
+ CREATE TABLE entity_aliases (
138
+ id BIGSERIAL PRIMARY KEY,
139
+ entity_id BIGINT NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
140
+ alias_text VARCHAR(255) NOT NULL,
141
+ alias_type VARCHAR(50),
142
+ confidence FLOAT DEFAULT 1.0
143
+ );
144
+
145
+ CREATE INDEX idx_aliases_entity ON entity_aliases(entity_id);
146
+ CREATE INDEX idx_aliases_text ON entity_aliases(alias_text);
147
+ CREATE UNIQUE INDEX idx_aliases_unique ON entity_aliases(entity_id, alias_text);
148
+ ```
149
+
150
+ ### facts
151
+
152
+ Stores temporal assertions.
153
+
154
+ ```sql
155
+ CREATE TABLE facts (
156
+ id BIGSERIAL PRIMARY KEY,
157
+ fact_text TEXT NOT NULL,
158
+ fact_hash VARCHAR(64) NOT NULL,
159
+ valid_at TIMESTAMPTZ NOT NULL,
160
+ invalid_at TIMESTAMPTZ,
161
+ status VARCHAR(20) NOT NULL DEFAULT 'canonical',
162
+ superseded_by_id BIGINT REFERENCES facts(id),
163
+ derived_from_ids BIGINT[],
164
+ corroborated_by_ids BIGINT[],
165
+ confidence FLOAT DEFAULT 1.0,
166
+ extraction_method VARCHAR(50) NOT NULL,
167
+ metadata JSONB NOT NULL DEFAULT '{}',
168
+ embedding VECTOR(1536),
169
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
170
+ );
171
+
172
+ CREATE INDEX idx_facts_status ON facts(status);
173
+ CREATE INDEX idx_facts_valid ON facts(valid_at);
174
+ CREATE INDEX idx_facts_invalid ON facts(invalid_at);
175
+ CREATE INDEX idx_facts_temporal ON facts(valid_at, invalid_at);
176
+ CREATE INDEX idx_facts_method ON facts(extraction_method);
177
+ CREATE INDEX idx_facts_text ON facts USING gin(to_tsvector('english', fact_text));
178
+ CREATE INDEX idx_facts_embedding ON facts USING hnsw(embedding vector_cosine_ops);
179
+ ```
180
+
181
+ ### entity_mentions
182
+
183
+ Links facts to mentioned entities.
184
+
185
+ ```sql
186
+ CREATE TABLE entity_mentions (
187
+ id BIGSERIAL PRIMARY KEY,
188
+ fact_id BIGINT NOT NULL REFERENCES facts(id) ON DELETE CASCADE,
189
+ entity_id BIGINT NOT NULL REFERENCES entities(id),
190
+ mention_text VARCHAR(255) NOT NULL,
191
+ mention_role VARCHAR(50) NOT NULL,
192
+ confidence FLOAT DEFAULT 1.0
193
+ );
194
+
195
+ CREATE INDEX idx_mentions_fact ON entity_mentions(fact_id);
196
+ CREATE INDEX idx_mentions_entity ON entity_mentions(entity_id);
197
+ CREATE INDEX idx_mentions_role ON entity_mentions(mention_role);
198
+ ```
199
+
200
+ ### fact_sources
201
+
202
+ Links facts to source content.
203
+
204
+ ```sql
205
+ CREATE TABLE fact_sources (
206
+ id BIGSERIAL PRIMARY KEY,
207
+ fact_id BIGINT NOT NULL REFERENCES facts(id) ON DELETE CASCADE,
208
+ content_id BIGINT NOT NULL REFERENCES contents(id),
209
+ source_type VARCHAR(50) NOT NULL DEFAULT 'primary',
210
+ excerpt TEXT,
211
+ confidence FLOAT DEFAULT 1.0
212
+ );
213
+
214
+ CREATE INDEX idx_sources_fact ON fact_sources(fact_id);
215
+ CREATE INDEX idx_sources_content ON fact_sources(content_id);
216
+ CREATE INDEX idx_sources_type ON fact_sources(source_type);
217
+ ```
218
+
219
+ ## Vector Indexes
220
+
221
+ FactDb uses HNSW indexes for fast approximate nearest neighbor search:
222
+
223
+ ```sql
224
+ -- Contents semantic search
225
+ CREATE INDEX idx_contents_embedding ON contents
226
+ USING hnsw(embedding vector_cosine_ops)
227
+ WITH (m = 16, ef_construction = 64);
228
+
229
+ -- Entities semantic search
230
+ CREATE INDEX idx_entities_embedding ON entities
231
+ USING hnsw(embedding vector_cosine_ops)
232
+ WITH (m = 16, ef_construction = 64);
233
+
234
+ -- Facts semantic search
235
+ CREATE INDEX idx_facts_embedding ON facts
236
+ USING hnsw(embedding vector_cosine_ops)
237
+ WITH (m = 16, ef_construction = 64);
238
+ ```
239
+
240
+ ## Temporal Query Patterns
241
+
242
+ ### Currently Valid Facts
243
+
244
+ ```sql
245
+ SELECT * FROM facts
246
+ WHERE status = 'canonical'
247
+ AND invalid_at IS NULL;
248
+ ```
249
+
250
+ ### Facts Valid at Point in Time
251
+
252
+ ```sql
253
+ SELECT * FROM facts
254
+ WHERE status IN ('canonical', 'corroborated')
255
+ AND valid_at <= '2024-03-15'
256
+ AND (invalid_at IS NULL OR invalid_at > '2024-03-15');
257
+ ```
258
+
259
+ ### Entity Timeline
260
+
261
+ ```sql
262
+ SELECT f.* FROM facts f
263
+ JOIN entity_mentions em ON em.fact_id = f.id
264
+ WHERE em.entity_id = 123
265
+ ORDER BY f.valid_at ASC;
266
+ ```
267
+
268
+ ### Semantic Search
269
+
270
+ ```sql
271
+ SELECT *, embedding <=> '[...]' AS distance
272
+ FROM contents
273
+ ORDER BY embedding <=> '[...]'
274
+ LIMIT 10;
275
+ ```
276
+
277
+ ## Maintenance
278
+
279
+ ### Vacuum and Analyze
280
+
281
+ ```sql
282
+ VACUUM ANALYZE contents;
283
+ VACUUM ANALYZE entities;
284
+ VACUUM ANALYZE facts;
285
+ ```
286
+
287
+ ### Reindex Vectors
288
+
289
+ ```sql
290
+ REINDEX INDEX idx_contents_embedding;
291
+ REINDEX INDEX idx_entities_embedding;
292
+ REINDEX INDEX idx_facts_embedding;
293
+ ```
@@ -0,0 +1,293 @@
1
+ # Entity Resolution
2
+
3
+ Entity resolution is the process of matching text mentions to canonical entities in the system.
4
+
5
+ ## Overview
6
+
7
+ When extracting facts from content, mentions like "Paula", "P. Chen", or "Paula Chen" need to be resolved to a single canonical entity.
8
+
9
+ ```mermaid
10
+ graph LR
11
+ M1["'Paula'"] --> R{EntityResolver}
12
+ M2["'P. Chen'"] --> R
13
+ M3["'Paula Chen'"] --> R
14
+ M4["'Chen, Paula'"] --> R
15
+ R --> E["Entity: Paula Chen"]
16
+
17
+ style M1 fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
18
+ style M2 fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
19
+ style M3 fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
20
+ style M4 fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
21
+ style R fill:#B45309,stroke:#92400E,color:#FFFFFF
22
+ style E fill:#047857,stroke:#065F46,color:#FFFFFF
23
+ ```
24
+
25
+ ## Resolution Strategies
26
+
27
+ The resolver tries multiple strategies in order:
28
+
29
+ ### 1. Exact Match
30
+
31
+ Direct match against canonical names:
32
+
33
+ ```ruby
34
+ # Looking for "Microsoft"
35
+ entity = facts.resolve_entity("Microsoft")
36
+ # Matches: Entity(canonical_name: "Microsoft")
37
+ ```
38
+
39
+ ### 2. Alias Match
40
+
41
+ Match against registered aliases:
42
+
43
+ ```ruby
44
+ # Entity has aliases: ["MS", "MSFT", "Microsoft Corp"]
45
+ entity = facts.resolve_entity("MSFT")
46
+ # Matches via alias
47
+ ```
48
+
49
+ ### 3. Fuzzy Match
50
+
51
+ Levenshtein distance for typos and variations:
52
+
53
+ ```ruby
54
+ # Looking for "Microsft" (typo)
55
+ entity = facts.resolve_entity("Microsft")
56
+ # Fuzzy matches "Microsoft" if similarity > threshold
57
+ ```
58
+
59
+ Configuration:
60
+
61
+ ```ruby
62
+ FactDb.configure do |config|
63
+ config.fuzzy_match_threshold = 0.85 # 85% similarity required
64
+ config.auto_merge_threshold = 0.95 # Auto-merge at 95%
65
+ end
66
+ ```
67
+
68
+ ### 4. Type-Constrained
69
+
70
+ Limit matches to specific entity types:
71
+
72
+ ```ruby
73
+ # Only match person entities
74
+ person = facts.resolve_entity("Paula", type: :person)
75
+
76
+ # Only match organizations
77
+ org = facts.resolve_entity("Platform", type: :organization)
78
+ ```
79
+
80
+ ## Creating Entities
81
+
82
+ ### Basic Creation
83
+
84
+ ```ruby
85
+ entity = facts.entity_service.create(
86
+ "Paula Chen",
87
+ type: :person
88
+ )
89
+ ```
90
+
91
+ ### With Aliases
92
+
93
+ ```ruby
94
+ entity = facts.entity_service.create(
95
+ "Paula Chen",
96
+ type: :person,
97
+ aliases: ["Paula", "P. Chen", "Chen, Paula"]
98
+ )
99
+ ```
100
+
101
+ ### With Metadata
102
+
103
+ ```ruby
104
+ entity = facts.entity_service.create(
105
+ "Paula Chen",
106
+ type: :person,
107
+ aliases: ["Paula"],
108
+ metadata: {
109
+ department: "Engineering",
110
+ start_date: "2024-01-10",
111
+ employee_id: "E12345"
112
+ }
113
+ )
114
+ ```
115
+
116
+ ## Managing Aliases
117
+
118
+ ### Add Alias
119
+
120
+ ```ruby
121
+ facts.entity_service.add_alias(
122
+ entity.id,
123
+ "P. Chen",
124
+ type: :abbreviation,
125
+ confidence: 0.9
126
+ )
127
+ ```
128
+
129
+ ### List Aliases
130
+
131
+ ```ruby
132
+ entity.entity_aliases.each do |alias_record|
133
+ puts "#{alias_record.alias_text} (#{alias_record.alias_type})"
134
+ end
135
+ ```
136
+
137
+ ### Remove Alias
138
+
139
+ ```ruby
140
+ facts.entity_service.remove_alias(entity.id, "Old Name")
141
+ ```
142
+
143
+ ## Merging Entities
144
+
145
+ When duplicate entities are discovered:
146
+
147
+ ```ruby
148
+ # Merge entity2 into entity1
149
+ facts.entity_service.merge(
150
+ entity1.id, # Keep this one
151
+ entity2.id # Merge into entity1
152
+ )
153
+
154
+ # After merge:
155
+ # - entity2.resolution_status => "merged"
156
+ # - entity2.merged_into_id => entity1.id
157
+ # - All facts mentioning entity2 now also reference entity1
158
+ ```
159
+
160
+ ### Automatic Merging
161
+
162
+ High-confidence matches can be auto-merged:
163
+
164
+ ```ruby
165
+ FactDb.configure do |config|
166
+ config.auto_merge_threshold = 0.95
167
+ end
168
+
169
+ # When resolving, if similarity > 0.95, entities auto-merge
170
+ ```
171
+
172
+ ## Resolution in Extraction
173
+
174
+ ### Manual Resolution
175
+
176
+ ```ruby
177
+ fact = facts.fact_service.create(
178
+ "Paula joined the team",
179
+ valid_at: Date.today,
180
+ mentions: [
181
+ {
182
+ entity: paula_entity,
183
+ text: "Paula",
184
+ role: "subject",
185
+ confidence: 1.0
186
+ }
187
+ ]
188
+ )
189
+ ```
190
+
191
+ ### Automatic Resolution
192
+
193
+ The LLM extractor resolves mentions automatically:
194
+
195
+ ```ruby
196
+ extracted = facts.extract_facts(content.id, extractor: :llm)
197
+
198
+ extracted.each do |fact|
199
+ fact.entity_mentions.each do |mention|
200
+ puts "Resolved '#{mention.mention_text}' to #{mention.entity.canonical_name}"
201
+ puts " Role: #{mention.mention_role}"
202
+ puts " Confidence: #{mention.confidence}"
203
+ end
204
+ end
205
+ ```
206
+
207
+ ## Mention Roles
208
+
209
+ When linking entities to facts, specify the role:
210
+
211
+ | Role | Description | Example |
212
+ |------|-------------|---------|
213
+ | `subject` | Primary actor | "Paula joined..." |
214
+ | `object` | Target of action | "...hired Paula" |
215
+ | `organization` | Company/team | "...at Microsoft" |
216
+ | `location` | Place | "...in Seattle" |
217
+ | `role` | Job title/position | "...as Engineer" |
218
+ | `temporal` | Time reference | "...in Q4 2024" |
219
+
220
+ ```ruby
221
+ fact = facts.fact_service.create(
222
+ "Paula Chen joined Microsoft as Principal Engineer in Seattle",
223
+ valid_at: Date.parse("2024-01-10"),
224
+ mentions: [
225
+ { entity: paula, role: "subject", text: "Paula Chen" },
226
+ { entity: microsoft, role: "organization", text: "Microsoft" },
227
+ { entity: seattle, role: "location", text: "Seattle" }
228
+ ]
229
+ )
230
+ ```
231
+
232
+ ## Batch Resolution
233
+
234
+ For processing multiple entities efficiently:
235
+
236
+ ```ruby
237
+ names = ["Paula Chen", "John Smith", "Acme Corp", "Seattle"]
238
+
239
+ results = facts.batch_resolve_entities(names)
240
+
241
+ results.each do |result|
242
+ puts "#{result[:name]}: #{result[:status]}"
243
+ puts " Entity: #{result[:entity]&.canonical_name}"
244
+ end
245
+ ```
246
+
247
+ ## Best Practices
248
+
249
+ ### 1. Create Comprehensive Aliases
250
+
251
+ ```ruby
252
+ # Include common variations
253
+ entity = facts.entity_service.create(
254
+ "International Business Machines Corporation",
255
+ type: :organization,
256
+ aliases: [
257
+ "IBM",
258
+ "Big Blue",
259
+ "International Business Machines"
260
+ ]
261
+ )
262
+ ```
263
+
264
+ ### 2. Use Type Constraints
265
+
266
+ ```ruby
267
+ # Avoid ambiguous matches
268
+ entity = facts.resolve_entity("Apple", type: :organization)
269
+ # Won't match "Apple" as a fruit/food entity
270
+ ```
271
+
272
+ ### 3. Review Fuzzy Matches
273
+
274
+ ```ruby
275
+ # Log low-confidence resolutions for review
276
+ if resolution.confidence < 0.9
277
+ logger.warn "Low confidence resolution: #{resolution}"
278
+ end
279
+ ```
280
+
281
+ ### 4. Handle Unresolved Mentions
282
+
283
+ ```ruby
284
+ entity = facts.resolve_entity("Unknown Person")
285
+ if entity.nil?
286
+ # Create new entity or flag for review
287
+ entity = facts.entity_service.create(
288
+ "Unknown Person",
289
+ type: :person,
290
+ metadata: { needs_review: true }
291
+ )
292
+ end
293
+ ```