fact_db 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +2 -0
  3. data/.yardopts +5 -0
  4. data/CHANGELOG.md +64 -0
  5. data/README.md +107 -6
  6. data/Rakefile +243 -10
  7. data/db/migrate/001_enable_extensions.rb +1 -0
  8. data/db/migrate/002_create_sources.rb +49 -0
  9. data/db/migrate/003_create_entities.rb +27 -15
  10. data/db/migrate/004_create_entity_aliases.rb +20 -7
  11. data/db/migrate/005_create_facts.rb +37 -21
  12. data/db/migrate/006_create_entity_mentions.rb +14 -6
  13. data/db/migrate/007_create_fact_sources.rb +16 -8
  14. data/docs/api/extractors/index.md +5 -5
  15. data/docs/api/extractors/llm.md +17 -17
  16. data/docs/api/extractors/rule-based.md +14 -14
  17. data/docs/api/facts.md +20 -20
  18. data/docs/api/index.md +4 -4
  19. data/docs/api/models/entity.md +21 -21
  20. data/docs/api/models/fact.md +15 -15
  21. data/docs/api/models/index.md +7 -7
  22. data/docs/api/models/{content.md → source.md} +29 -29
  23. data/docs/api/pipeline/extraction.md +25 -25
  24. data/docs/api/pipeline/index.md +1 -1
  25. data/docs/api/pipeline/resolution.md +4 -4
  26. data/docs/api/services/entity-service.md +20 -20
  27. data/docs/api/services/fact-service.md +12 -12
  28. data/docs/api/services/index.md +5 -5
  29. data/docs/api/services/{content-service.md → source-service.md} +27 -27
  30. data/docs/architecture/database-schema.md +46 -46
  31. data/docs/architecture/entity-resolution.md +6 -6
  32. data/docs/architecture/index.md +10 -10
  33. data/docs/architecture/temporal-facts.md +5 -5
  34. data/docs/architecture/three-layer-model.md +17 -17
  35. data/docs/concepts.md +6 -6
  36. data/docs/examples/basic-usage.md +20 -20
  37. data/docs/examples/hr-onboarding.md +17 -17
  38. data/docs/examples/index.md +4 -4
  39. data/docs/examples/news-analysis.md +23 -23
  40. data/docs/getting-started/database-setup.md +28 -20
  41. data/docs/getting-started/index.md +3 -3
  42. data/docs/getting-started/quick-start.md +33 -30
  43. data/docs/guides/batch-processing.md +26 -26
  44. data/docs/guides/configuration.md +158 -77
  45. data/docs/guides/entity-management.md +14 -14
  46. data/docs/guides/extracting-facts.md +28 -28
  47. data/docs/guides/ingesting-content.md +14 -14
  48. data/docs/guides/llm-integration.md +40 -32
  49. data/docs/guides/temporal-queries.md +11 -11
  50. data/docs/index.md +6 -2
  51. data/examples/.envrc +4 -0
  52. data/examples/.gitignore +1 -0
  53. data/examples/001_configuration.rb +312 -0
  54. data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
  55. data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
  56. data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
  57. data/examples/040_output_formats.rb +177 -0
  58. data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
  59. data/examples/060_fluent_temporal_api.rb +217 -0
  60. data/examples/070_introspection.rb +252 -0
  61. data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
  62. data/examples/090_ingest_demo.rb +515 -0
  63. data/examples/100_query_context.rb +668 -0
  64. data/examples/110_prove_it.rb +204 -0
  65. data/examples/120_dump_database.rb +358 -0
  66. data/examples/130_rag_feedback_loop.rb +858 -0
  67. data/examples/README.md +229 -15
  68. data/examples/data/lincoln_associates.md +201 -0
  69. data/examples/data/lincoln_biography.md +66 -0
  70. data/examples/data/lincoln_cabinet.md +243 -0
  71. data/examples/data/lincoln_family.md +163 -0
  72. data/examples/data/lincoln_military.md +241 -0
  73. data/examples/data/lincoln_todd_family.md +136 -0
  74. data/examples/ingest_reporter.rb +335 -0
  75. data/examples/utilities.rb +182 -0
  76. data/lib/fact_db/config/defaults.yml +254 -0
  77. data/lib/fact_db/config.rb +94 -35
  78. data/lib/fact_db/database.rb +98 -8
  79. data/lib/fact_db/extractors/base.rb +106 -21
  80. data/lib/fact_db/extractors/llm_extractor.rb +35 -63
  81. data/lib/fact_db/extractors/manual_extractor.rb +46 -6
  82. data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
  83. data/lib/fact_db/llm/adapter.rb +3 -3
  84. data/lib/fact_db/models/entity.rb +94 -22
  85. data/lib/fact_db/models/entity_alias.rb +41 -7
  86. data/lib/fact_db/models/entity_mention.rb +34 -1
  87. data/lib/fact_db/models/fact.rb +259 -28
  88. data/lib/fact_db/models/fact_source.rb +43 -9
  89. data/lib/fact_db/models/source.rb +113 -0
  90. data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
  91. data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
  92. data/lib/fact_db/query_result.rb +202 -0
  93. data/lib/fact_db/resolution/entity_resolver.rb +139 -39
  94. data/lib/fact_db/resolution/fact_resolver.rb +86 -14
  95. data/lib/fact_db/services/entity_service.rb +246 -37
  96. data/lib/fact_db/services/fact_service.rb +254 -17
  97. data/lib/fact_db/services/source_service.rb +164 -0
  98. data/lib/fact_db/temporal/query.rb +71 -7
  99. data/lib/fact_db/temporal/query_builder.rb +69 -0
  100. data/lib/fact_db/temporal/timeline.rb +102 -11
  101. data/lib/fact_db/transformers/base.rb +77 -0
  102. data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
  103. data/lib/fact_db/transformers/json_transformer.rb +17 -0
  104. data/lib/fact_db/transformers/raw_transformer.rb +35 -0
  105. data/lib/fact_db/transformers/text_transformer.rb +114 -0
  106. data/lib/fact_db/transformers/triple_transformer.rb +138 -0
  107. data/lib/fact_db/validation/alias_filter.rb +185 -0
  108. data/lib/fact_db/version.rb +1 -1
  109. data/lib/fact_db.rb +281 -30
  110. data/mkdocs.yml +2 -2
  111. metadata +60 -16
  112. data/db/migrate/002_create_contents.rb +0 -44
  113. data/lib/fact_db/models/content.rb +0 -62
  114. data/lib/fact_db/services/content_service.rb +0 -93
@@ -1,48 +1,89 @@
1
1
  # Configuration
2
2
 
3
- FactDb uses the `anyway_config` gem for flexible configuration via environment variables, YAML files, or Ruby code.
3
+ FactDb uses the `anyway_config` gem for flexible configuration via environment variables, YAML files, or Ruby code. Configuration uses **nested sections** for better organization.
4
+
5
+ ## Configuration Sources
6
+
7
+ Configuration is loaded from multiple sources (lowest to highest priority):
8
+
9
+ 1. **Bundled defaults** - `lib/fact_db/config/defaults.yml` (ships with gem)
10
+ 2. **XDG user config** - `~/.config/fact_db/fact_db.yml`
11
+ 3. **Project config** - `./config/fact_db.yml`
12
+ 4. **Local overrides** - `./config/fact_db.local.yml` (gitignored)
13
+ 5. **Environment variables** - `FDB_*`
14
+ 6. **Ruby configure block** - `FactDb.configure { |c| ... }`
15
+
16
+ ## Configuration Access Pattern
17
+
18
+ FactDb uses nested configuration sections:
19
+
20
+ ```ruby
21
+ # Nested access
22
+ FactDb.config.database.url
23
+ FactDb.config.database.pool_size
24
+ FactDb.config.llm.provider
25
+ FactDb.config.llm.model
26
+ FactDb.config.ranking.ts_rank_weight
27
+ ```
4
28
 
5
29
  ## Configuration Methods
6
30
 
7
31
  ### Environment Variables
8
32
 
9
- All settings can be configured via environment variables with the `EVENT_CLOCK_` prefix:
33
+ All settings use the `FDB_` prefix with double underscores for nested values:
10
34
 
11
35
  ```bash
12
- export EVENT_CLOCK_DATABASE_URL="postgresql://localhost/fact_db"
13
- export EVENT_CLOCK_DATABASE_POOL_SIZE=10
14
- export EVENT_CLOCK_LLM_PROVIDER="openai"
15
- export EVENT_CLOCK_LLM_MODEL="gpt-4o-mini"
16
- export EVENT_CLOCK_LLM_API_KEY="sk-..."
17
- export EVENT_CLOCK_FUZZY_MATCH_THRESHOLD=0.85
36
+ # Database settings
37
+ export FDB_DATABASE__URL="postgresql://localhost/fact_db"
38
+ export FDB_DATABASE__POOL_SIZE=10
39
+ export FDB_DATABASE__TIMEOUT=30000
40
+
41
+ # LLM settings
42
+ export FDB_LLM__PROVIDER="openai"
43
+ export FDB_LLM__MODEL="gpt-4o-mini"
44
+ export FDB_LLM__API_KEY="sk-..."
45
+
46
+ # Top-level settings
47
+ export FDB_FUZZY_MATCH_THRESHOLD=0.85
48
+ export FDB_DEFAULT_EXTRACTOR="llm"
49
+ export FDB_LOG_LEVEL="debug"
18
50
  ```
19
51
 
20
52
  ### YAML Configuration
21
53
 
22
- Create `config/fact_db.yml`:
54
+ Create `config/fact_db.yml` with nested sections:
23
55
 
24
56
  ```yaml
25
57
  # Database
26
- database_url: postgresql://localhost/fact_db
27
- database_pool_size: 10
28
- database_timeout: 30000
58
+ database:
59
+ url: postgresql://localhost/fact_db
60
+ pool_size: 10
61
+ timeout: 30000
29
62
 
30
63
  # Embeddings
31
- embedding_dimensions: 1536
64
+ embedding:
65
+ dimensions: 1536
32
66
 
33
67
  # LLM
34
- llm_provider: openai
35
- llm_model: gpt-4o-mini
36
- llm_api_key: <%= ENV['OPENAI_API_KEY'] %>
37
-
38
- # Extraction
68
+ llm:
69
+ provider: openai
70
+ model: gpt-4o-mini
71
+ api_key: <%= ENV['OPENAI_API_KEY'] %>
72
+
73
+ # Ranking weights (should sum to 1.0)
74
+ ranking:
75
+ ts_rank_weight: 0.25
76
+ vector_similarity_weight: 0.25
77
+ entity_mention_weight: 0.15
78
+ direct_answer_weight: 0.15
79
+ term_overlap_weight: 0.10
80
+ relationship_match_weight: 0.05
81
+ confidence_weight: 0.05
82
+
83
+ # Top-level settings
39
84
  default_extractor: manual
40
-
41
- # Entity Resolution
42
85
  fuzzy_match_threshold: 0.85
43
86
  auto_merge_threshold: 0.95
44
-
45
- # Logging
46
87
  log_level: info
47
88
  ```
48
89
 
@@ -51,21 +92,21 @@ log_level: info
51
92
  ```ruby
52
93
  FactDb.configure do |config|
53
94
  # Database
54
- config.database_url = "postgresql://localhost/fact_db"
55
- config.database_pool_size = 10
56
- config.database_timeout = 30_000
95
+ config.database.url = "postgresql://localhost/fact_db"
96
+ config.database.pool_size = 10
97
+ config.database.timeout = 30_000
57
98
 
58
99
  # Embeddings
59
- config.embedding_dimensions = 1536
100
+ config.embedding.dimensions = 1536
60
101
  config.embedding_generator = ->(text) {
61
102
  # Your embedding generation logic
62
103
  OpenAI::Client.new.embeddings(input: text)
63
104
  }
64
105
 
65
- # LLM
66
- config.llm_provider = :openai
67
- config.llm_model = "gpt-4o-mini"
68
- config.llm_api_key = ENV['OPENAI_API_KEY']
106
+ # LLM (nested access)
107
+ config.llm.provider = :openai
108
+ config.llm.model = "gpt-4o-mini"
109
+ config.llm.api_key = ENV['OPENAI_API_KEY']
69
110
 
70
111
  # Or provide a pre-configured client
71
112
  config.llm_client = FactDb::LLM::Adapter.new(
@@ -73,10 +114,12 @@ FactDb.configure do |config|
73
114
  model: "claude-sonnet-4-20250514"
74
115
  )
75
116
 
76
- # Extraction
77
- config.default_extractor = :llm
117
+ # Ranking weights
118
+ config.ranking.ts_rank_weight = 0.30
119
+ config.ranking.vector_similarity_weight = 0.25
78
120
 
79
- # Entity Resolution
121
+ # Top-level settings
122
+ config.default_extractor = :llm
80
123
  config.fuzzy_match_threshold = 0.85
81
124
  config.auto_merge_threshold = 0.95
82
125
 
@@ -90,46 +133,62 @@ end
90
133
 
91
134
  ### Database Settings
92
135
 
136
+ Access: `FactDb.config.database.*`
137
+
93
138
  | Option | Type | Default | Description |
94
139
  |--------|------|---------|-------------|
95
- | `database_url` | String | nil | PostgreSQL connection URL (required) |
96
- | `database_pool_size` | Integer | 5 | Connection pool size |
97
- | `database_timeout` | Integer | 30000 | Query timeout in milliseconds |
140
+ | `url` | String | nil | PostgreSQL connection URL |
141
+ | `host` | String | localhost | Database host |
142
+ | `port` | Integer | 5432 | Database port |
143
+ | `name` | String | nil | Database name |
144
+ | `user` | String | nil | Database user |
145
+ | `password` | String | nil | Database password |
146
+ | `pool_size` | Integer | 5 | Connection pool size |
147
+ | `timeout` | Integer | 30000 | Query timeout in milliseconds |
98
148
 
99
149
  ### Embedding Settings
100
150
 
151
+ Access: `FactDb.config.embedding.*`
152
+
101
153
  | Option | Type | Default | Description |
102
154
  |--------|------|---------|-------------|
103
- | `embedding_dimensions` | Integer | 1536 | Vector dimensions (match your model) |
104
- | `embedding_generator` | Proc | nil | Custom embedding generation function |
155
+ | `dimensions` | Integer | 1536 | Vector dimensions (match your model) |
156
+ | `generator` | Proc | nil | Custom embedding generation function |
105
157
 
106
158
  ### LLM Settings
107
159
 
160
+ Access: `FactDb.config.llm.*`
161
+
108
162
  | Option | Type | Default | Description |
109
163
  |--------|------|---------|-------------|
110
- | `llm_client` | Object | nil | Pre-configured LLM client |
111
- | `llm_provider` | Symbol | nil | Provider name (:openai, :anthropic, etc.) |
112
- | `llm_model` | String | varies | Model name |
113
- | `llm_api_key` | String | nil | API key |
164
+ | `client` | Object | nil | Pre-configured LLM client |
165
+ | `provider` | Symbol | nil | Provider name (:openai, :anthropic, etc.) |
166
+ | `model` | String | varies | Model name |
167
+ | `api_key` | String | nil | API key |
168
+
169
+ ### Ranking Settings
114
170
 
115
- ### Extraction Settings
171
+ Access: `FactDb.config.ranking.*`
116
172
 
117
173
  | Option | Type | Default | Description |
118
174
  |--------|------|---------|-------------|
119
- | `default_extractor` | Symbol | :manual | Default extraction method |
175
+ | `ts_rank_weight` | Float | 0.25 | PostgreSQL full-text search weight |
176
+ | `vector_similarity_weight` | Float | 0.25 | Semantic similarity weight |
177
+ | `entity_mention_weight` | Float | 0.15 | Entity mentions weight |
178
+ | `direct_answer_weight` | Float | 0.15 | Direct answer pattern weight |
179
+ | `term_overlap_weight` | Float | 0.10 | Query word matches weight |
180
+ | `relationship_match_weight` | Float | 0.05 | Relationship words weight |
181
+ | `confidence_weight` | Float | 0.05 | Stored confidence score weight |
182
+
183
+ **Note:** Weights should sum to approximately 1.0.
120
184
 
121
- ### Resolution Settings
185
+ ### Top-Level Settings
122
186
 
123
187
  | Option | Type | Default | Description |
124
188
  |--------|------|---------|-------------|
189
+ | `default_extractor` | Symbol | :manual | Default extraction method |
125
190
  | `fuzzy_match_threshold` | Float | 0.85 | Minimum similarity for fuzzy matching |
126
191
  | `auto_merge_threshold` | Float | 0.95 | Similarity threshold for auto-merge |
127
-
128
- ### Logging Settings
129
-
130
- | Option | Type | Default | Description |
131
- |--------|------|---------|-------------|
132
- | `logger` | Logger | STDOUT | Logger instance |
133
192
  | `log_level` | Symbol | :info | Log level |
134
193
 
135
194
  ## LLM Provider Configuration
@@ -138,9 +197,9 @@ end
138
197
 
139
198
  ```ruby
140
199
  FactDb.configure do |config|
141
- config.llm_provider = :openai
142
- config.llm_model = "gpt-4o-mini" # or "gpt-4o", "gpt-4-turbo"
143
- config.llm_api_key = ENV['OPENAI_API_KEY']
200
+ config.llm.provider = :openai
201
+ config.llm.model = "gpt-4o-mini" # or "gpt-4o", "gpt-4-turbo"
202
+ config.llm.api_key = ENV['OPENAI_API_KEY']
144
203
  end
145
204
  ```
146
205
 
@@ -148,9 +207,9 @@ end
148
207
 
149
208
  ```ruby
150
209
  FactDb.configure do |config|
151
- config.llm_provider = :anthropic
152
- config.llm_model = "claude-sonnet-4-20250514"
153
- config.llm_api_key = ENV['ANTHROPIC_API_KEY']
210
+ config.llm.provider = :anthropic
211
+ config.llm.model = "claude-sonnet-4-20250514"
212
+ config.llm.api_key = ENV['ANTHROPIC_API_KEY']
154
213
  end
155
214
  ```
156
215
 
@@ -158,9 +217,9 @@ end
158
217
 
159
218
  ```ruby
160
219
  FactDb.configure do |config|
161
- config.llm_provider = :gemini
162
- config.llm_model = "gemini-2.0-flash"
163
- config.llm_api_key = ENV['GEMINI_API_KEY']
220
+ config.llm.provider = :gemini
221
+ config.llm.model = "gemini-2.0-flash"
222
+ config.llm.api_key = ENV['GEMINI_API_KEY']
164
223
  end
165
224
  ```
166
225
 
@@ -168,8 +227,8 @@ end
168
227
 
169
228
  ```ruby
170
229
  FactDb.configure do |config|
171
- config.llm_provider = :ollama
172
- config.llm_model = "llama3.2"
230
+ config.llm.provider = :ollama
231
+ config.llm.model = "llama3.2"
173
232
  # No API key needed for local Ollama
174
233
  end
175
234
  ```
@@ -178,8 +237,8 @@ end
178
237
 
179
238
  ```ruby
180
239
  FactDb.configure do |config|
181
- config.llm_provider = :bedrock
182
- config.llm_model = "claude-sonnet-4"
240
+ config.llm.provider = :bedrock
241
+ config.llm.model = "claude-sonnet-4"
183
242
  # Uses AWS credentials from environment
184
243
  end
185
244
  ```
@@ -188,45 +247,58 @@ end
188
247
 
189
248
  ```ruby
190
249
  FactDb.configure do |config|
191
- config.llm_provider = :openrouter
192
- config.llm_model = "anthropic/claude-sonnet-4"
193
- config.llm_api_key = ENV['OPENROUTER_API_KEY']
250
+ config.llm.provider = :openrouter
251
+ config.llm.model = "anthropic/claude-sonnet-4"
252
+ config.llm.api_key = ENV['OPENROUTER_API_KEY']
194
253
  end
195
254
  ```
196
255
 
256
+ ## XDG User Configuration
257
+
258
+ FactDb supports XDG Base Directory Specification for user-level configuration:
259
+
260
+ - `~/.config/fact_db/fact_db.yml` (Linux/macOS)
261
+ - `~/Library/Application Support/fact_db/fact_db.yml` (macOS)
262
+ - `$XDG_CONFIG_HOME/fact_db/fact_db.yml` (if XDG_CONFIG_HOME is set)
263
+
264
+ This allows you to set personal defaults that apply across all projects.
265
+
197
266
  ## Environment-Specific Configuration
198
267
 
199
- Use YAML anchors for shared settings:
268
+ The bundled defaults support environment-specific overrides:
200
269
 
201
270
  ```yaml
202
271
  # config/fact_db.yml
203
- defaults: &defaults
204
- embedding_dimensions: 1536
272
+ defaults:
273
+ embedding:
274
+ dimensions: 1536
205
275
  fuzzy_match_threshold: 0.85
206
276
 
207
277
  development:
208
- <<: *defaults
209
- database_url: postgresql://localhost/fact_db_dev
278
+ database:
279
+ name: fact_db_development
210
280
  log_level: debug
211
281
 
212
282
  test:
213
- <<: *defaults
214
- database_url: postgresql://localhost/fact_db_test
283
+ database:
284
+ name: fact_db_test
215
285
  log_level: warn
216
286
 
217
287
  production:
218
- <<: *defaults
219
- database_url: <%= ENV['DATABASE_URL'] %>
288
+ database:
289
+ pool_size: 25
220
290
  log_level: info
221
291
  ```
222
292
 
293
+ Environment is detected from: `FDB_ENV` > `RAILS_ENV` > `RACK_ENV` > `'development'`
294
+
223
295
  ## Validation
224
296
 
225
297
  Validate configuration at startup:
226
298
 
227
299
  ```ruby
228
300
  FactDb.configure do |config|
229
- config.database_url = ENV['DATABASE_URL']
301
+ config.database.url = ENV['DATABASE_URL']
230
302
  end
231
303
 
232
304
  # Raises ConfigurationError if invalid
@@ -241,3 +313,12 @@ For testing, reset configuration between tests:
241
313
  # In test setup
242
314
  FactDb.reset_configuration!
243
315
  ```
316
+
317
+ ## Environment Helpers
318
+
319
+ ```ruby
320
+ FactDb.config.test? # true if FDB_ENV == 'test'
321
+ FactDb.config.development? # true if FDB_ENV == 'development'
322
+ FactDb.config.production? # true if FDB_ENV == 'production'
323
+ FactDb.config.environment # returns current environment string
324
+ ```
@@ -85,7 +85,7 @@ facts.entity_service.add_alias(
85
85
 
86
86
  ```ruby
87
87
  entity.entity_aliases.each do |alias_record|
88
- puts "#{alias_record.alias_text} (#{alias_record.alias_type})"
88
+ puts "#{alias_record.name} (#{alias_record.type})"
89
89
  puts " Confidence: #{alias_record.confidence}"
90
90
  end
91
91
  ```
@@ -142,7 +142,7 @@ results = facts.batch_resolve_entities(names)
142
142
  results.each do |result|
143
143
  status = result[:status] # :resolved, :not_found, :error
144
144
  entity = result[:entity]
145
- puts "#{result[:name]}: #{status} -> #{entity&.canonical_name}"
145
+ puts "#{result[:name]}: #{status} -> #{entity&.name}"
146
146
  end
147
147
  ```
148
148
 
@@ -157,13 +157,13 @@ facts.entity_service.merge(entity1.id, entity2.id)
157
157
  # After merge:
158
158
  entity2.reload
159
159
  entity2.resolution_status # => "merged"
160
- entity2.merged_into_id # => entity1.id
160
+ entity2.canonical_id # => entity1.id
161
161
  ```
162
162
 
163
163
  ### What Happens on Merge
164
164
 
165
165
  1. Entity2's status changes to "merged"
166
- 2. Entity2 points to entity1 via `merged_into_id`
166
+ 2. Entity2 points to entity1 via `canonical_id`
167
167
  3. Entity2's aliases are copied to entity1
168
168
  4. All facts mentioning entity2 now also reference entity1
169
169
 
@@ -184,7 +184,7 @@ end
184
184
  ```ruby
185
185
  facts.entity_service.update(
186
186
  entity.id,
187
- canonical_name: "Paula M. Chen"
187
+ name: "Paula M. Chen"
188
188
  )
189
189
  ```
190
190
 
@@ -203,7 +203,7 @@ facts.entity_service.update(
203
203
  # Reclassify entity type
204
204
  facts.entity_service.update(
205
205
  entity.id,
206
- entity_type: :organization
206
+ type: :organization
207
207
  )
208
208
  ```
209
209
 
@@ -250,15 +250,15 @@ entities = facts.entity_service.search("Paula")
250
250
 
251
251
  ```ruby
252
252
  people = FactDb::Models::Entity
253
- .where(entity_type: 'person')
253
+ .where(type: 'person')
254
254
  .where.not(resolution_status: 'merged')
255
255
  ```
256
256
 
257
- ### Find Entities in Content
257
+ ### Find Entities in Source
258
258
 
259
259
  ```ruby
260
- # Find all entities mentioned in a content
261
- entities = facts.entity_service.in_content(content.id)
260
+ # Find all entities mentioned in a source
261
+ entities = facts.entity_service.in_source(source.id)
262
262
  ```
263
263
 
264
264
  ### Find Related Entities
@@ -327,9 +327,9 @@ unresolved = FactDb::Models::Entity
327
327
 
328
328
  unresolved.each do |entity|
329
329
  # Try to find duplicates
330
- similar = facts.entity_service.search(entity.canonical_name)
330
+ similar = facts.entity_service.search(entity.name)
331
331
  if similar.count > 1
332
- puts "Potential duplicate: #{entity.canonical_name}"
332
+ puts "Potential duplicate: #{entity.name}"
333
333
  end
334
334
  end
335
335
  ```
@@ -343,8 +343,8 @@ active_entities = FactDb::Models::Entity
343
343
 
344
344
  # Or follow the merge chain
345
345
  def canonical_entity(entity)
346
- while entity.merged_into_id
347
- entity = FactDb::Models::Entity.find(entity.merged_into_id)
346
+ while entity.canonical_id
347
+ entity = FactDb::Models::Entity.find(entity.canonical_id)
348
348
  end
349
349
  entity
350
350
  end
@@ -24,7 +24,7 @@ fact = facts.fact_service.create(
24
24
  { entity: microsoft, role: "organization", text: "Microsoft" }
25
25
  ],
26
26
  sources: [
27
- { content: content, type: "primary", excerpt: "...accepted the offer..." }
27
+ { source: source, type: "primary", excerpt: "...accepted the offer..." }
28
28
  ]
29
29
  )
30
30
  ```
@@ -36,19 +36,19 @@ Use AI to automatically extract facts:
36
36
  ```ruby
37
37
  # Configure LLM
38
38
  FactDb.configure do |config|
39
- config.llm_provider = :openai
40
- config.llm_api_key = ENV['OPENAI_API_KEY']
39
+ config.llm.provider = :openai
40
+ config.llm.api_key = ENV['OPENAI_API_KEY']
41
41
  end
42
42
 
43
43
  facts = FactDb.new
44
44
 
45
- # Extract facts from content
46
- extracted = facts.extract_facts(content.id, extractor: :llm)
45
+ # Extract facts from source
46
+ extracted = facts.extract_facts(source.id, extractor: :llm)
47
47
 
48
48
  extracted.each do |fact|
49
- puts fact.fact_text
49
+ puts fact.text
50
50
  puts " Valid from: #{fact.valid_at}"
51
- puts " Entities: #{fact.entity_mentions.map(&:entity).map(&:canonical_name)}"
51
+ puts " Entities: #{fact.entity_mentions.map(&:entity).map(&:name)}"
52
52
  end
53
53
  ```
54
54
 
@@ -57,7 +57,7 @@ end
57
57
  Use regex patterns for structured content:
58
58
 
59
59
  ```ruby
60
- extracted = facts.extract_facts(content.id, extractor: :rule_based)
60
+ extracted = facts.extract_facts(source.id, extractor: :rule_based)
61
61
  ```
62
62
 
63
63
  The rule-based extractor includes patterns for:
@@ -76,7 +76,7 @@ FactDb.configure do |config|
76
76
  end
77
77
 
78
78
  # Uses configured default
79
- extracted = facts.extract_facts(content.id)
79
+ extracted = facts.extract_facts(source.id)
80
80
  ```
81
81
 
82
82
  ## Fact Structure
@@ -85,8 +85,8 @@ Every extracted fact includes:
85
85
 
86
86
  ```ruby
87
87
  fact = Models::Fact.new(
88
- fact_text: "Paula Chen is Principal Engineer at Microsoft",
89
- fact_hash: "sha256...", # For deduplication
88
+ text: "Paula Chen is Principal Engineer at Microsoft",
89
+ digest: "sha256...", # For deduplication
90
90
  valid_at: Time.parse("2024-01-10"),
91
91
  invalid_at: nil, # nil = currently valid
92
92
  status: "canonical", # canonical, superseded, corroborated, synthesized
@@ -127,7 +127,7 @@ Facts link to source content:
127
127
 
128
128
  ```ruby
129
129
  fact.add_source(
130
- content: email_content,
130
+ source: email_source,
131
131
  type: "primary",
132
132
  excerpt: "Paula has accepted our offer to join as Principal Engineer...",
133
133
  confidence: 0.95
@@ -147,16 +147,16 @@ fact.add_source(
147
147
  Process multiple content items:
148
148
 
149
149
  ```ruby
150
- content_ids = [content1.id, content2.id, content3.id]
150
+ source_ids = [source1.id, source2.id, source3.id]
151
151
 
152
152
  # Sequential processing
153
- results = facts.batch_extract(content_ids, parallel: false)
153
+ results = facts.batch_extract(source_ids, parallel: false)
154
154
 
155
155
  # Parallel processing (default)
156
- results = facts.batch_extract(content_ids, parallel: true)
156
+ results = facts.batch_extract(source_ids, parallel: true)
157
157
 
158
158
  results.each do |result|
159
- puts "Content #{result[:content_id]}:"
159
+ puts "Source #{result[:source_id]}:"
160
160
  puts " Facts: #{result[:facts].count}"
161
161
  puts " Error: #{result[:error]}" if result[:error]
162
162
  end
@@ -168,11 +168,11 @@ Create custom extractors by extending the base class:
168
168
 
169
169
  ```ruby
170
170
  class MyExtractor < FactDb::Extractors::Base
171
- def extract(content)
171
+ def extract(source)
172
172
  extracted = []
173
173
 
174
174
  # Your extraction logic here
175
- # Parse content.raw_text
175
+ # Parse source.content
176
176
  # Create fact records
177
177
 
178
178
  extracted
@@ -180,8 +180,8 @@ class MyExtractor < FactDb::Extractors::Base
180
180
  end
181
181
 
182
182
  # Register and use
183
- facts.fact_service.extract_from_content(
184
- content.id,
183
+ facts.fact_service.extract_from_source(
184
+ source.id,
185
185
  extractor: MyExtractor.new(config)
186
186
  )
187
187
  ```
@@ -217,7 +217,7 @@ After extraction, you may want to:
217
217
  ### Resolve Entities
218
218
 
219
219
  ```ruby
220
- extracted = facts.extract_facts(content.id, extractor: :llm)
220
+ extracted = facts.extract_facts(source.id, extractor: :llm)
221
221
 
222
222
  extracted.each do |fact|
223
223
  fact.entity_mentions.each do |mention|
@@ -239,8 +239,8 @@ conflicts = facts.fact_service.resolver.find_conflicts(
239
239
 
240
240
  conflicts.each do |conflict|
241
241
  puts "Conflict between:"
242
- puts " #{conflict[:fact1].fact_text}"
243
- puts " #{conflict[:fact2].fact_text}"
242
+ puts " #{conflict[:fact1].text}"
243
+ puts " #{conflict[:fact2].text}"
244
244
  end
245
245
  ```
246
246
 
@@ -248,7 +248,7 @@ end
248
248
 
249
249
  ```ruby
250
250
  # If multiple sources say the same thing
251
- if fact1.fact_text.similar_to?(fact2.fact_text)
251
+ if fact1.text.similar_to?(fact2.text)
252
252
  facts.fact_service.resolver.corroborate(fact1.id, fact2.id)
253
253
  end
254
254
  ```
@@ -258,7 +258,7 @@ end
258
258
  ### 1. Review LLM Extractions
259
259
 
260
260
  ```ruby
261
- extracted = facts.extract_facts(content.id, extractor: :llm)
261
+ extracted = facts.extract_facts(source.id, extractor: :llm)
262
262
 
263
263
  extracted.select { |f| f.confidence < 0.8 }.each do |fact|
264
264
  # Flag for human review
@@ -282,7 +282,7 @@ end
282
282
  fact = facts.fact_service.create(
283
283
  "Important fact",
284
284
  valid_at: Date.today,
285
- sources: [{ content: source_content, type: "primary" }]
285
+ sources: [{ source: source_record, type: "primary" }]
286
286
  )
287
287
  ```
288
288
 
@@ -290,10 +290,10 @@ fact = facts.fact_service.create(
290
290
 
291
291
  ```ruby
292
292
  begin
293
- extracted = facts.extract_facts(content.id, extractor: :llm)
293
+ extracted = facts.extract_facts(source.id, extractor: :llm)
294
294
  rescue FactDb::ExtractionError => e
295
295
  logger.error "Extraction failed: #{e.message}"
296
296
  # Fall back to manual or rule-based
297
- extracted = facts.extract_facts(content.id, extractor: :rule_based)
297
+ extracted = facts.extract_facts(source.id, extractor: :rule_based)
298
298
  end
299
299
  ```