fact_db 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +2 -0
  3. data/.yardopts +5 -0
  4. data/CHANGELOG.md +64 -0
  5. data/README.md +107 -6
  6. data/Rakefile +243 -10
  7. data/db/migrate/001_enable_extensions.rb +1 -0
  8. data/db/migrate/002_create_sources.rb +49 -0
  9. data/db/migrate/003_create_entities.rb +27 -15
  10. data/db/migrate/004_create_entity_aliases.rb +20 -7
  11. data/db/migrate/005_create_facts.rb +37 -21
  12. data/db/migrate/006_create_entity_mentions.rb +14 -6
  13. data/db/migrate/007_create_fact_sources.rb +16 -8
  14. data/docs/api/extractors/index.md +5 -5
  15. data/docs/api/extractors/llm.md +17 -17
  16. data/docs/api/extractors/rule-based.md +14 -14
  17. data/docs/api/facts.md +20 -20
  18. data/docs/api/index.md +4 -4
  19. data/docs/api/models/entity.md +21 -21
  20. data/docs/api/models/fact.md +15 -15
  21. data/docs/api/models/index.md +7 -7
  22. data/docs/api/models/{content.md → source.md} +29 -29
  23. data/docs/api/pipeline/extraction.md +25 -25
  24. data/docs/api/pipeline/index.md +1 -1
  25. data/docs/api/pipeline/resolution.md +4 -4
  26. data/docs/api/services/entity-service.md +20 -20
  27. data/docs/api/services/fact-service.md +12 -12
  28. data/docs/api/services/index.md +5 -5
  29. data/docs/api/services/{content-service.md → source-service.md} +27 -27
  30. data/docs/architecture/database-schema.md +46 -46
  31. data/docs/architecture/entity-resolution.md +6 -6
  32. data/docs/architecture/index.md +10 -10
  33. data/docs/architecture/temporal-facts.md +5 -5
  34. data/docs/architecture/three-layer-model.md +17 -17
  35. data/docs/concepts.md +6 -6
  36. data/docs/examples/basic-usage.md +20 -20
  37. data/docs/examples/hr-onboarding.md +17 -17
  38. data/docs/examples/index.md +4 -4
  39. data/docs/examples/news-analysis.md +23 -23
  40. data/docs/getting-started/database-setup.md +28 -20
  41. data/docs/getting-started/index.md +3 -3
  42. data/docs/getting-started/quick-start.md +33 -30
  43. data/docs/guides/batch-processing.md +26 -26
  44. data/docs/guides/configuration.md +158 -77
  45. data/docs/guides/entity-management.md +14 -14
  46. data/docs/guides/extracting-facts.md +28 -28
  47. data/docs/guides/ingesting-content.md +14 -14
  48. data/docs/guides/llm-integration.md +40 -32
  49. data/docs/guides/temporal-queries.md +11 -11
  50. data/docs/index.md +6 -2
  51. data/examples/.envrc +4 -0
  52. data/examples/.gitignore +1 -0
  53. data/examples/001_configuration.rb +312 -0
  54. data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
  55. data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
  56. data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
  57. data/examples/040_output_formats.rb +177 -0
  58. data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
  59. data/examples/060_fluent_temporal_api.rb +217 -0
  60. data/examples/070_introspection.rb +252 -0
  61. data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
  62. data/examples/090_ingest_demo.rb +515 -0
  63. data/examples/100_query_context.rb +668 -0
  64. data/examples/110_prove_it.rb +204 -0
  65. data/examples/120_dump_database.rb +358 -0
  66. data/examples/130_rag_feedback_loop.rb +858 -0
  67. data/examples/README.md +229 -15
  68. data/examples/data/lincoln_associates.md +201 -0
  69. data/examples/data/lincoln_biography.md +66 -0
  70. data/examples/data/lincoln_cabinet.md +243 -0
  71. data/examples/data/lincoln_family.md +163 -0
  72. data/examples/data/lincoln_military.md +241 -0
  73. data/examples/data/lincoln_todd_family.md +136 -0
  74. data/examples/ingest_reporter.rb +335 -0
  75. data/examples/utilities.rb +182 -0
  76. data/lib/fact_db/config/defaults.yml +254 -0
  77. data/lib/fact_db/config.rb +94 -35
  78. data/lib/fact_db/database.rb +98 -8
  79. data/lib/fact_db/extractors/base.rb +106 -21
  80. data/lib/fact_db/extractors/llm_extractor.rb +35 -63
  81. data/lib/fact_db/extractors/manual_extractor.rb +46 -6
  82. data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
  83. data/lib/fact_db/llm/adapter.rb +3 -3
  84. data/lib/fact_db/models/entity.rb +94 -22
  85. data/lib/fact_db/models/entity_alias.rb +41 -7
  86. data/lib/fact_db/models/entity_mention.rb +34 -1
  87. data/lib/fact_db/models/fact.rb +259 -28
  88. data/lib/fact_db/models/fact_source.rb +43 -9
  89. data/lib/fact_db/models/source.rb +113 -0
  90. data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
  91. data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
  92. data/lib/fact_db/query_result.rb +202 -0
  93. data/lib/fact_db/resolution/entity_resolver.rb +139 -39
  94. data/lib/fact_db/resolution/fact_resolver.rb +86 -14
  95. data/lib/fact_db/services/entity_service.rb +246 -37
  96. data/lib/fact_db/services/fact_service.rb +254 -17
  97. data/lib/fact_db/services/source_service.rb +164 -0
  98. data/lib/fact_db/temporal/query.rb +71 -7
  99. data/lib/fact_db/temporal/query_builder.rb +69 -0
  100. data/lib/fact_db/temporal/timeline.rb +102 -11
  101. data/lib/fact_db/transformers/base.rb +77 -0
  102. data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
  103. data/lib/fact_db/transformers/json_transformer.rb +17 -0
  104. data/lib/fact_db/transformers/raw_transformer.rb +35 -0
  105. data/lib/fact_db/transformers/text_transformer.rb +114 -0
  106. data/lib/fact_db/transformers/triple_transformer.rb +138 -0
  107. data/lib/fact_db/validation/alias_filter.rb +185 -0
  108. data/lib/fact_db/version.rb +1 -1
  109. data/lib/fact_db.rb +281 -30
  110. data/mkdocs.yml +2 -2
  111. metadata +60 -16
  112. data/db/migrate/002_create_contents.rb +0 -44
  113. data/lib/fact_db/models/content.rb +0 -62
  114. data/lib/fact_db/services/content_service.rb +0 -93
@@ -16,8 +16,8 @@ content = facts.ingest(
16
16
  ## Full Options
17
17
 
18
18
  ```ruby
19
- content = facts.ingest(
20
- raw_text,
19
+ source = facts.ingest(
20
+ text_content,
21
21
  type: :email,
22
22
  title: "RE: Offer Letter - Paula Chen",
23
23
  source_uri: "mailto:hr@company.com/msg/12345",
@@ -89,12 +89,12 @@ Content is automatically deduplicated by SHA256 hash:
89
89
 
90
90
  ```ruby
91
91
  # First ingestion - creates new record
92
- content1 = facts.ingest("Hello world", type: :note)
92
+ source1 = facts.ingest("Hello world", type: :note)
93
93
 
94
94
  # Second ingestion - returns existing record
95
- content2 = facts.ingest("Hello world", type: :note)
95
+ source2 = facts.ingest("Hello world", type: :note)
96
96
 
97
- content1.id == content2.id # => true
97
+ source1.id == source2.id # => true
98
98
  ```
99
99
 
100
100
  ## Timestamps
@@ -132,29 +132,29 @@ contents = documents.map do |doc|
132
132
  end
133
133
  ```
134
134
 
135
- ## Content Service
135
+ ## Source Service
136
136
 
137
- For advanced operations, use the content service directly:
137
+ For advanced operations, use the source service directly:
138
138
 
139
139
  ```ruby
140
- # Create content
141
- content = facts.content_service.create(
142
- raw_text,
140
+ # Create source
141
+ source = facts.source_service.create(
142
+ text_content,
143
143
  type: :document,
144
144
  title: "Annual Report"
145
145
  )
146
146
 
147
147
  # Find by ID
148
- content = facts.content_service.find(content_id)
148
+ source = facts.source_service.find(source_id)
149
149
 
150
150
  # Find by hash
151
- content = facts.content_service.find_by_hash(sha256_hash)
151
+ source = facts.source_service.find_by_hash(sha256_hash)
152
152
 
153
153
  # Search by text
154
- results = facts.content_service.search("quarterly earnings")
154
+ results = facts.source_service.search("quarterly earnings")
155
155
 
156
156
  # Semantic search (requires embedding)
157
- results = facts.content_service.semantic_search(
157
+ results = facts.source_service.semantic_search(
158
158
  "financial performance",
159
159
  limit: 10
160
160
  )
@@ -18,9 +18,9 @@ gem 'ruby_llm'
18
18
 
19
19
  ```ruby
20
20
  FactDb.configure do |config|
21
- config.llm_provider = :openai
22
- config.llm_model = "gpt-4o-mini"
23
- config.llm_api_key = ENV['OPENAI_API_KEY']
21
+ config.llm.provider = :openai
22
+ config.llm.model = "gpt-4o-mini"
23
+ config.llm.api_key = ENV['OPENAI_API_KEY']
24
24
  end
25
25
  ```
26
26
 
@@ -28,9 +28,9 @@ gem 'ruby_llm'
28
28
 
29
29
  ```ruby
30
30
  FactDb.configure do |config|
31
- config.llm_provider = :anthropic
32
- config.llm_model = "claude-sonnet-4-20250514"
33
- config.llm_api_key = ENV['ANTHROPIC_API_KEY']
31
+ config.llm.provider = :anthropic
32
+ config.llm.model = "claude-sonnet-4-20250514"
33
+ config.llm.api_key = ENV['ANTHROPIC_API_KEY']
34
34
  end
35
35
  ```
36
36
 
@@ -38,9 +38,9 @@ gem 'ruby_llm'
38
38
 
39
39
  ```ruby
40
40
  FactDb.configure do |config|
41
- config.llm_provider = :gemini
42
- config.llm_model = "gemini-2.0-flash"
43
- config.llm_api_key = ENV['GEMINI_API_KEY']
41
+ config.llm.provider = :gemini
42
+ config.llm.model = "gemini-2.0-flash"
43
+ config.llm.api_key = ENV['GEMINI_API_KEY']
44
44
  end
45
45
  ```
46
46
 
@@ -48,11 +48,19 @@ gem 'ruby_llm'
48
48
 
49
49
  ```ruby
50
50
  FactDb.configure do |config|
51
- config.llm_provider = :ollama
52
- config.llm_model = "llama3.2"
51
+ config.llm.provider = :ollama
52
+ config.llm.model = "llama3.2"
53
53
  end
54
54
  ```
55
55
 
56
+ === "Environment Variables"
57
+
58
+ ```bash
59
+ export FDB_LLM__PROVIDER=openai
60
+ export FDB_LLM__MODEL=gpt-4o-mini
61
+ export FDB_LLM__API_KEY=sk-...
62
+ ```
63
+
56
64
  ## Supported Providers
57
65
 
58
66
  | Provider | Models | Config Key |
@@ -85,20 +93,20 @@ PROVIDER_DEFAULTS = {
85
93
  facts = FactDb.new
86
94
 
87
95
  # Ingest content
88
- content = facts.ingest(
96
+ source = facts.ingest(
89
97
  "Paula Chen joined Microsoft as Principal Engineer on January 10, 2024. She previously worked at Google for 5 years.",
90
98
  type: :announcement
91
99
  )
92
100
 
93
101
  # Extract facts using LLM
94
- extracted = facts.extract_facts(content.id, extractor: :llm)
102
+ extracted = facts.extract_facts(source.id, extractor: :llm)
95
103
 
96
104
  extracted.each do |fact|
97
- puts "Fact: #{fact.fact_text}"
105
+ puts "Fact: #{fact.text}"
98
106
  puts " Valid: #{fact.valid_at}"
99
107
  puts " Confidence: #{fact.confidence}"
100
108
  fact.entity_mentions.each do |m|
101
- puts " Entity: #{m.entity.canonical_name} (#{m.mention_role})"
109
+ puts " Entity: #{m.entity.name} (#{m.mention_role})"
102
110
  end
103
111
  end
104
112
  ```
@@ -122,7 +130,7 @@ Extract temporal facts from this content. For each fact:
122
130
  4. Assess confidence level
123
131
 
124
132
  Content:
125
- {content.raw_text}
133
+ {source.content}
126
134
 
127
135
  Return JSON:
128
136
  {
@@ -174,12 +182,12 @@ puts response
174
182
 
175
183
  ```ruby
176
184
  begin
177
- extracted = facts.extract_facts(content.id, extractor: :llm)
185
+ extracted = facts.extract_facts(source.id, extractor: :llm)
178
186
  rescue FactDb::ConfigurationError => e
179
187
  # LLM not configured or ruby_llm missing
180
188
  puts "LLM Error: #{e.message}"
181
189
  # Fall back to rule-based
182
- extracted = facts.extract_facts(content.id, extractor: :rule_based)
190
+ extracted = facts.extract_facts(source.id, extractor: :rule_based)
183
191
  rescue StandardError => e
184
192
  # API error, rate limit, etc.
185
193
  puts "Extraction failed: #{e.message}"
@@ -191,16 +199,16 @@ end
191
199
  Process multiple documents efficiently:
192
200
 
193
201
  ```ruby
194
- content_ids = [content1.id, content2.id, content3.id]
202
+ source_ids = [content1.id, content2.id, content3.id]
195
203
 
196
204
  # Parallel processing (uses simple_flow pipeline)
197
- results = facts.batch_extract(content_ids, extractor: :llm, parallel: true)
205
+ results = facts.batch_extract(source_ids, extractor: :llm, parallel: true)
198
206
 
199
207
  results.each do |result|
200
208
  if result[:error]
201
- puts "Error for #{result[:content_id]}: #{result[:error]}"
209
+ puts "Error for #{result[:source_id]}: #{result[:error]}"
202
210
  else
203
- puts "Extracted #{result[:facts].count} facts from #{result[:content_id]}"
211
+ puts "Extracted #{result[:facts].count} facts from #{result[:source_id]}"
204
212
  end
205
213
  end
206
214
  ```
@@ -211,17 +219,17 @@ end
211
219
 
212
220
  ```ruby
213
221
  # For simple extractions, use smaller models
214
- config.llm_model = "gpt-4o-mini" # Cheaper than gpt-4o
222
+ config.llm.model = "gpt-4o-mini" # Cheaper than gpt-4o
215
223
 
216
224
  # For complex documents, use larger models
217
- config.llm_model = "gpt-4o"
225
+ config.llm.model = "gpt-4o"
218
226
  ```
219
227
 
220
228
  ### Batch Processing
221
229
 
222
230
  ```ruby
223
231
  # Process in batches to reduce API calls
224
- content_ids.each_slice(10) do |batch|
232
+ source_ids.each_slice(10) do |batch|
225
233
  facts.batch_extract(batch, extractor: :llm)
226
234
  sleep(1) # Rate limiting
227
235
  end
@@ -232,8 +240,8 @@ end
232
240
  ```ruby
233
241
  # Use Ollama for development/testing
234
242
  FactDb.configure do |config|
235
- config.llm_provider = :ollama
236
- config.llm_model = "llama3.2"
243
+ config.llm.provider = :ollama
244
+ config.llm.model = "llama3.2"
237
245
  end
238
246
  ```
239
247
 
@@ -259,7 +267,7 @@ end
259
267
  ### 1. Validate Extractions
260
268
 
261
269
  ```ruby
262
- extracted = facts.extract_facts(content.id, extractor: :llm)
270
+ extracted = facts.extract_facts(source.id, extractor: :llm)
263
271
 
264
272
  extracted.each do |fact|
265
273
  # Flag low-confidence extractions
@@ -273,9 +281,9 @@ end
273
281
 
274
282
  ```ruby
275
283
  # Cache LLM responses for repeated content
276
- cache_key = "llm_extraction:#{content.content_hash}"
284
+ cache_key = "llm_extraction:#{source.content_hash}"
277
285
  extracted = Rails.cache.fetch(cache_key) do
278
- facts.extract_facts(content.id, extractor: :llm)
286
+ facts.extract_facts(source.id, extractor: :llm)
279
287
  end
280
288
  ```
281
289
 
@@ -285,7 +293,7 @@ end
285
293
  require 'retryable'
286
294
 
287
295
  Retryable.retryable(tries: 3, sleep: 5) do
288
- facts.extract_facts(content.id, extractor: :llm)
296
+ facts.extract_facts(source.id, extractor: :llm)
289
297
  end
290
298
  ```
291
299
 
@@ -293,7 +301,7 @@ end
293
301
 
294
302
  ```ruby
295
303
  # Track extraction statistics
296
- extracted = facts.extract_facts(content.id, extractor: :llm)
304
+ extracted = facts.extract_facts(source.id, extractor: :llm)
297
305
  StatsD.increment('fact_db.llm_extractions')
298
306
  StatsD.histogram('fact_db.facts_per_content', extracted.count)
299
307
  ```
@@ -66,7 +66,7 @@ timeline = facts.timeline_for(paula.id)
66
66
 
67
67
  timeline.each do |fact|
68
68
  range = fact.invalid_at ? "#{fact.valid_at} - #{fact.invalid_at}" : "#{fact.valid_at} - present"
69
- puts "#{range}: #{fact.fact_text}"
69
+ puts "#{range}: #{fact.text}"
70
70
  end
71
71
 
72
72
  # Timeline for specific period
@@ -196,7 +196,7 @@ similar_about_paula = facts.fact_service.semantic_search(
196
196
  ```ruby
197
197
  fact = facts.query_facts(entity: paula.id).first
198
198
 
199
- fact.fact_text # The assertion text
199
+ fact.text # The assertion text
200
200
  fact.valid_at # When it became true
201
201
  fact.invalid_at # When it stopped (nil if current)
202
202
  fact.status # canonical, superseded, etc.
@@ -210,26 +210,26 @@ fact.metadata # Additional data
210
210
  ```ruby
211
211
  # Entity mentions
212
212
  fact.entity_mentions.each do |mention|
213
- puts "#{mention.entity.canonical_name} (#{mention.mention_role})"
213
+ puts "#{mention.entity.name} (#{mention.mention_role})"
214
214
  end
215
215
 
216
216
  # Source content
217
- fact.fact_sources.each do |source|
218
- puts "Source: #{source.content.title}"
219
- puts "Excerpt: #{source.excerpt}"
217
+ fact.fact_sources.each do |fact_source|
218
+ puts "Source: #{fact_source.source.title}"
219
+ puts "Excerpt: #{fact_source.excerpt}"
220
220
  end
221
221
 
222
222
  # Superseding fact
223
223
  if fact.superseded?
224
224
  new_fact = fact.superseded_by
225
- puts "Superseded by: #{new_fact.fact_text}"
225
+ puts "Superseded by: #{new_fact.text}"
226
226
  end
227
227
 
228
228
  # Source facts (for synthesized)
229
229
  if fact.synthesized?
230
230
  fact.derived_from_ids.each do |id|
231
231
  source = FactDb::Models::Fact.find(id)
232
- puts "Derived from: #{source.fact_text}"
232
+ puts "Derived from: #{source.text}"
233
233
  end
234
234
  end
235
235
  ```
@@ -295,7 +295,7 @@ all_facts = FactDb::Models::Fact
295
295
 
296
296
  all_facts.each do |fact|
297
297
  status_info = fact.superseded? ? "(superseded)" : "(current)"
298
- puts "#{fact.valid_at}: #{fact.fact_text} #{status_info}"
298
+ puts "#{fact.valid_at}: #{fact.text} #{status_info}"
299
299
  end
300
300
  ```
301
301
 
@@ -309,7 +309,7 @@ recently_superseded = FactDb::Models::Fact
309
309
  .includes(:superseded_by)
310
310
 
311
311
  recently_superseded.each do |old_fact|
312
- puts "Changed: #{old_fact.fact_text}"
313
- puts "To: #{old_fact.superseded_by.fact_text}"
312
+ puts "Changed: #{old_fact.text}"
313
+ puts "To: #{old_fact.superseded_by.text}"
314
314
  end
315
315
  ```
data/docs/index.md CHANGED
@@ -34,14 +34,14 @@ require 'fact_db'
34
34
  facts = FactDb.new
35
35
 
36
36
  # Ingest content
37
- content = facts.ingest(
37
+ source = facts.ingest(
38
38
  "Paula Chen joined Microsoft as Principal Engineer on January 10, 2024.",
39
39
  type: :email,
40
40
  captured_at: Time.current
41
41
  )
42
42
 
43
43
  # Extract facts using LLM
44
- extracted = facts.extract_facts(content.id, extractor: :llm)
44
+ extracted = facts.extract_facts(source.id, extractor: :llm)
45
45
 
46
46
  # Query current facts about Paula
47
47
  current_facts = facts.query_facts(entity: paula.id)
@@ -110,6 +110,10 @@ bundle install
110
110
 
111
111
  See the [Installation Guide](getting-started/installation.md) for detailed setup instructions.
112
112
 
113
+ ## API Documentation
114
+
115
+ API documentation (YARD) is available at **[https://madbomber.github.io/fact_db/yard](https://madbomber.github.io/fact_db/yard)**
116
+
113
117
  ## Requirements
114
118
 
115
119
  - Ruby >= 3.0
data/examples/.envrc ADDED
@@ -0,0 +1,4 @@
1
+
2
+ source_up
3
+
4
+ export FDB_ENV=demo
@@ -0,0 +1 @@
1
+ *.log
@@ -0,0 +1,312 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Conventions? bah-humbug; Configure everything!!
5
+ #
6
+ # Configuration Example for FactDb
7
+ #
8
+ # This example demonstrates all the ways to configure FactDb:
9
+ # - Bundled defaults (shipped with the gem)
10
+ # - Environment variables (FDB_*)
11
+ # - XDG user config files (~/.config/fact_db/fact_db.yml)
12
+ # - Project config files (./config/fact_db.yml)
13
+ # - Local overrides (./config/fact_db.local.yml)
14
+ # - Programmatic configuration blocks
15
+ #
16
+ # Configuration priority (lowest to highest):
17
+ # 1. Bundled defaults
18
+ # 2. XDG user config
19
+ # 3. Project config
20
+ # 4. Local overrides
21
+ # 5. Environment variables
22
+ # 6. Programmatic (FactDb.configure block)
23
+
24
+ require_relative "utilities"
25
+
26
+ demo_setup!("FactDb Configuration Demo")
27
+
28
+ demo_section("Section 1: Current Environment")
29
+
30
+ puts <<~ENV_DETECTION
31
+ Environment detection priority: FDB_ENV > RAILS_ENV > RACK_ENV > 'development'
32
+ FDB_ENV: #{ENV['FDB_ENV'].inspect}
33
+ RAILS_ENV: #{ENV['RAILS_ENV'].inspect}
34
+ RACK_ENV: #{ENV['RACK_ENV'].inspect}
35
+ Detected: #{FactDb.env}
36
+ ENV_DETECTION
37
+
38
+ # Environment helper methods
39
+ config = FactDb.config
40
+
41
+ puts <<~ENV_HELPERS
42
+
43
+ Environment helpers:
44
+ config.environment: #{config.environment}
45
+ config.production?: #{config.production?}
46
+ config.development?: #{config.development?}
47
+ config.test?: #{config.test?}
48
+ config.demo?: #{config.demo?}
49
+ ENV_HELPERS
50
+
51
+ demo_section("Section 2: Configuration File Locations")
52
+
53
+ defaults_path = File.expand_path("../lib/fact_db/config/defaults.yml", __dir__)
54
+
55
+ puts <<~CONFIG_SOURCES
56
+ Configuration sources (lowest to highest priority):
57
+
58
+ 1. Bundled defaults (ships with gem):
59
+ #{defaults_path}
60
+
61
+ 2. XDG user config paths (checked in order):
62
+ CONFIG_SOURCES
63
+
64
+ FactDb::Config.xdg_config_paths.each_with_index do |path, i|
65
+ file_path = File.join(path, "fact_db.yml")
66
+ exists = File.exist?(file_path)
67
+ status = exists ? "(exists)" : "(not found)"
68
+ puts " #{i + 1}. #{file_path} #{status}"
69
+ end
70
+
71
+ active_xdg = FactDb::Config.active_xdg_config_file
72
+
73
+ puts <<~MORE_SOURCES
74
+
75
+ Active XDG config: #{active_xdg || '(none)'}
76
+
77
+ 3. Project config: ./config/fact_db.yml
78
+ Exists: #{File.exist?('./config/fact_db.yml')}
79
+
80
+ 4. Local overrides: ./config/fact_db.local.yml (typically gitignored)
81
+ Exists: #{File.exist?('./config/fact_db.local.yml')}
82
+
83
+ 5. Environment variables: FDB_* (see Section 4)
84
+
85
+ 6. Programmatic: FactDb.configure { |c| ... } (see Section 5)
86
+ MORE_SOURCES
87
+
88
+ demo_section("Section 3: Accessing Configuration Values")
89
+
90
+ puts <<~DATABASE_CONFIG
91
+ Database configuration (ConfigSection with dot notation):
92
+ config.database.adapter: #{config.database.adapter.inspect}
93
+ config.database.url: #{config.database.url.inspect}
94
+ config.database.host: #{config.database.host.inspect}
95
+ config.database.port: #{config.database.port.inspect}
96
+ config.database.name: #{config.database.name.inspect}
97
+ config.database.username: #{config.database.username.inspect}
98
+ config.database.pool: #{config.database.pool.inspect}
99
+ config.database.timeout: #{config.database.timeout.inspect}
100
+
101
+ LLM configuration:
102
+ config.llm.provider: #{config.llm.provider.inspect}
103
+ config.llm.model: #{config.llm.model.inspect}
104
+ config.llm.api_key: #{config.llm.api_key ? '[REDACTED]' : 'nil'}
105
+
106
+ Embedding configuration:
107
+ config.embedding.generator: #{config.embedding.generator.inspect}
108
+ config.embedding.dimensions: #{config.embedding.dimensions.inspect}
109
+
110
+ Ranking weights (for relevance scoring):
111
+ config.ranking.ts_rank_weight: #{config.ranking.ts_rank_weight}
112
+ config.ranking.vector_similarity_weight: #{config.ranking.vector_similarity_weight}
113
+ config.ranking.entity_mention_weight: #{config.ranking.entity_mention_weight}
114
+ config.ranking.direct_answer_weight: #{config.ranking.direct_answer_weight}
115
+ config.ranking.term_overlap_weight: #{config.ranking.term_overlap_weight}
116
+ config.ranking.relationship_match_weight: #{config.ranking.relationship_match_weight}
117
+ config.ranking.confidence_weight: #{config.ranking.confidence_weight}
118
+
119
+ General settings:
120
+ config.default_extractor: #{config.default_extractor.inspect}
121
+ config.fuzzy_match_threshold: #{config.fuzzy_match_threshold}
122
+ config.auto_merge_threshold: #{config.auto_merge_threshold}
123
+ config.log_level: #{config.log_level.inspect}
124
+ DATABASE_CONFIG
125
+
126
+ demo_section("Section 4: Environment Variables")
127
+
128
+ puts <<~ENV_VARS
129
+ Environment variables use the FDB_ prefix with double underscores for nesting.
130
+ They have the HIGHEST priority (except for programmatic config).
131
+
132
+ Examples:
133
+ # Environment selection
134
+ export FDB_ENV=production
135
+
136
+ # Database configuration
137
+ export FDB_DATABASE__URL=postgresql://user:pass@localhost:5432/fact_db
138
+ export FDB_DATABASE__NAME=my_fact_db
139
+ export FDB_DATABASE__HOST=db.example.com
140
+ export FDB_DATABASE__PORT=5432
141
+ export FDB_DATABASE__USERNAME=dbuser
142
+ export FDB_DATABASE__PASSWORD=secret
143
+ export FDB_DATABASE__POOL=10
144
+
145
+ # LLM configuration
146
+ export FDB_LLM__PROVIDER=anthropic
147
+ export FDB_LLM__MODEL=claude-sonnet-4-20250514
148
+ export FDB_LLM__API_KEY=sk-xxx
149
+
150
+ # Embedding configuration
151
+ export FDB_EMBEDDING__DIMENSIONS=1536
152
+
153
+ # Ranking weights
154
+ export FDB_RANKING__TS_RANK_WEIGHT=0.30
155
+ export FDB_RANKING__VECTOR_SIMILARITY_WEIGHT=0.25
156
+
157
+ # General settings
158
+ export FDB_DEFAULT_EXTRACTOR=llm
159
+ export FDB_FUZZY_MATCH_THRESHOLD=0.80
160
+ export FDB_LOG_LEVEL=debug
161
+
162
+ Currently set FDB_* environment variables:
163
+ ENV_VARS
164
+
165
+ fdb_vars = ENV.select { |k, _| k.start_with?("FDB_") }
166
+ if fdb_vars.empty?
167
+ puts " (none)"
168
+ else
169
+ fdb_vars.each do |key, value|
170
+ display_value = key.include?("KEY") || key.include?("PASSWORD") ? "[REDACTED]" : value
171
+ puts " #{key}=#{display_value}"
172
+ end
173
+ end
174
+
175
+ demo_section("Section 5: Programmatic Configuration")
176
+
177
+ # Reset configuration to show before/after
178
+ FactDb.reset_configuration!
179
+
180
+ puts <<~PROGRAMMATIC_INTRO
181
+ Use FactDb.configure to set values programmatically.
182
+ This has the HIGHEST priority and overrides all other sources.
183
+
184
+ Before programmatic configuration:
185
+ log_level: #{FactDb.config.log_level.inspect}
186
+ fuzzy_match_threshold: #{FactDb.config.fuzzy_match_threshold}
187
+ PROGRAMMATIC_INTRO
188
+
189
+ # Apply programmatic configuration
190
+ FactDb.configure do |c|
191
+ # Scalar values
192
+ c.log_level = :debug
193
+ c.fuzzy_match_threshold = 0.75
194
+ c.default_extractor = :llm
195
+
196
+ # Callable objects (not loaded from config files)
197
+ c.logger = Logger.new($stdout, level: Logger::WARN)
198
+
199
+ # Custom embedding generator (lambda or object responding to #call)
200
+ # c.embedding_generator = ->(text) { OpenAI.embed(text) }
201
+
202
+ # Custom LLM client
203
+ # c.llm_client = MyCustomLLMClient.new
204
+ end
205
+
206
+ puts <<~AFTER_PROGRAMMATIC
207
+
208
+ After programmatic configuration:
209
+ log_level: #{FactDb.config.log_level.inspect}
210
+ fuzzy_match_threshold: #{FactDb.config.fuzzy_match_threshold}
211
+ default_extractor: #{FactDb.config.default_extractor.inspect}
212
+ logger: #{FactDb.config.logger.class}
213
+ AFTER_PROGRAMMATIC
214
+
215
+ demo_section("Section 6: Config File Examples")
216
+
217
+ puts <<~CONFIG_FILES
218
+ XDG User Config (~/.config/fact_db/fact_db.yml):
219
+ ---
220
+ # User-wide defaults (applies to all projects)
221
+ database:
222
+ host: localhost
223
+ username: myuser
224
+
225
+ llm:
226
+ provider: anthropic
227
+ api_key: sk-my-api-key # Or use FDB_LLM__API_KEY env var
228
+
229
+ embedding:
230
+ dimensions: 1536
231
+
232
+ Project Config (./config/fact_db.yml):
233
+ ---
234
+ # Environment-specific configuration
235
+ development:
236
+ database:
237
+ name: myapp_development
238
+ log_level: debug
239
+
240
+ test:
241
+ database:
242
+ name: myapp_test
243
+ pool: 2
244
+ log_level: warn
245
+
246
+ production:
247
+ database:
248
+ name: myapp_production
249
+ pool: 25
250
+ log_level: info
251
+ fuzzy_match_threshold: 0.90
252
+
253
+ Local Overrides (./config/fact_db.local.yml):
254
+ ---
255
+ # Personal overrides (gitignored)
256
+ # Great for API keys and local database credentials
257
+ llm:
258
+ api_key: sk-my-personal-key
259
+
260
+ database:
261
+ password: my_local_password
262
+ CONFIG_FILES
263
+
264
+ demo_section("Section 7: Database Configuration")
265
+
266
+ puts <<~DATABASE_INFO
267
+ Database configuration uses 'name' for the database name.
268
+ When passed to ActiveRecord, it's automatically mapped to 'database'.
269
+
270
+ Current database config:
271
+ Name: #{FactDb.config.database.name}
272
+ Host: #{FactDb.config.database.host}
273
+ Port: #{FactDb.config.database.port}
274
+ Pool: #{FactDb.config.database.pool}
275
+
276
+ Full config hash (for ActiveRecord):
277
+ #{FactDb.config.database.to_h.inspect}
278
+ DATABASE_INFO
279
+
280
+ demo_section("Section 8: Quick Reference")
281
+
282
+ puts <<~REFERENCE
283
+ Environment Variables:
284
+ FDB_ENV - Set environment (development/test/production)
285
+ FDB_DATABASE__URL - Full database connection URL
286
+ FDB_DATABASE__NAME - Database name
287
+ FDB_LLM__PROVIDER - LLM provider (anthropic/openai)
288
+ FDB_LLM__API_KEY - API key for LLM
289
+
290
+ Config Files (in priority order):
291
+ lib/fact_db/config/defaults.yml - Bundled defaults (lowest)
292
+ ~/.config/fact_db/fact_db.yml - XDG user config
293
+ ./config/fact_db.yml - Project config
294
+ ./config/fact_db.local.yml - Local overrides (gitignored)
295
+
296
+ Ruby API:
297
+ FactDb.env - Current environment name
298
+ FactDb.config - Configuration object
299
+ FactDb.configure { |c| ... } - Programmatic configuration
300
+ FactDb.reset_configuration! - Reload from all sources
301
+
302
+ Config Sections (ConfigSection with dot notation):
303
+ config.database - Database configuration
304
+ config.database.host - Dot notation access
305
+ config.database[:host] - Hash bracket access
306
+ config.database.to_h - Convert to plain Hash
307
+ config.llm - LLM configuration
308
+ config.embedding - Embedding configuration
309
+ config.ranking - Ranking weights
310
+ REFERENCE
311
+
312
+ demo_footer("Configuration Demo Complete!")