fact_db 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +2 -0
  3. data/.yardopts +5 -0
  4. data/CHANGELOG.md +64 -0
  5. data/README.md +107 -6
  6. data/Rakefile +243 -10
  7. data/db/migrate/001_enable_extensions.rb +1 -0
  8. data/db/migrate/002_create_sources.rb +49 -0
  9. data/db/migrate/003_create_entities.rb +27 -15
  10. data/db/migrate/004_create_entity_aliases.rb +20 -7
  11. data/db/migrate/005_create_facts.rb +37 -21
  12. data/db/migrate/006_create_entity_mentions.rb +14 -6
  13. data/db/migrate/007_create_fact_sources.rb +16 -8
  14. data/docs/api/extractors/index.md +5 -5
  15. data/docs/api/extractors/llm.md +17 -17
  16. data/docs/api/extractors/rule-based.md +14 -14
  17. data/docs/api/facts.md +20 -20
  18. data/docs/api/index.md +4 -4
  19. data/docs/api/models/entity.md +21 -21
  20. data/docs/api/models/fact.md +15 -15
  21. data/docs/api/models/index.md +7 -7
  22. data/docs/api/models/{content.md → source.md} +29 -29
  23. data/docs/api/pipeline/extraction.md +25 -25
  24. data/docs/api/pipeline/index.md +1 -1
  25. data/docs/api/pipeline/resolution.md +4 -4
  26. data/docs/api/services/entity-service.md +20 -20
  27. data/docs/api/services/fact-service.md +12 -12
  28. data/docs/api/services/index.md +5 -5
  29. data/docs/api/services/{content-service.md → source-service.md} +27 -27
  30. data/docs/architecture/database-schema.md +46 -46
  31. data/docs/architecture/entity-resolution.md +6 -6
  32. data/docs/architecture/index.md +10 -10
  33. data/docs/architecture/temporal-facts.md +5 -5
  34. data/docs/architecture/three-layer-model.md +17 -17
  35. data/docs/concepts.md +6 -6
  36. data/docs/examples/basic-usage.md +20 -20
  37. data/docs/examples/hr-onboarding.md +17 -17
  38. data/docs/examples/index.md +4 -4
  39. data/docs/examples/news-analysis.md +23 -23
  40. data/docs/getting-started/database-setup.md +28 -20
  41. data/docs/getting-started/index.md +3 -3
  42. data/docs/getting-started/quick-start.md +33 -30
  43. data/docs/guides/batch-processing.md +26 -26
  44. data/docs/guides/configuration.md +158 -77
  45. data/docs/guides/entity-management.md +14 -14
  46. data/docs/guides/extracting-facts.md +28 -28
  47. data/docs/guides/ingesting-content.md +14 -14
  48. data/docs/guides/llm-integration.md +40 -32
  49. data/docs/guides/temporal-queries.md +11 -11
  50. data/docs/index.md +6 -2
  51. data/examples/.envrc +4 -0
  52. data/examples/.gitignore +1 -0
  53. data/examples/001_configuration.rb +312 -0
  54. data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
  55. data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
  56. data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
  57. data/examples/040_output_formats.rb +177 -0
  58. data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
  59. data/examples/060_fluent_temporal_api.rb +217 -0
  60. data/examples/070_introspection.rb +252 -0
  61. data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
  62. data/examples/090_ingest_demo.rb +515 -0
  63. data/examples/100_query_context.rb +668 -0
  64. data/examples/110_prove_it.rb +204 -0
  65. data/examples/120_dump_database.rb +358 -0
  66. data/examples/130_rag_feedback_loop.rb +858 -0
  67. data/examples/README.md +229 -15
  68. data/examples/data/lincoln_associates.md +201 -0
  69. data/examples/data/lincoln_biography.md +66 -0
  70. data/examples/data/lincoln_cabinet.md +243 -0
  71. data/examples/data/lincoln_family.md +163 -0
  72. data/examples/data/lincoln_military.md +241 -0
  73. data/examples/data/lincoln_todd_family.md +136 -0
  74. data/examples/ingest_reporter.rb +335 -0
  75. data/examples/utilities.rb +182 -0
  76. data/lib/fact_db/config/defaults.yml +254 -0
  77. data/lib/fact_db/config.rb +94 -35
  78. data/lib/fact_db/database.rb +98 -8
  79. data/lib/fact_db/extractors/base.rb +106 -21
  80. data/lib/fact_db/extractors/llm_extractor.rb +35 -63
  81. data/lib/fact_db/extractors/manual_extractor.rb +46 -6
  82. data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
  83. data/lib/fact_db/llm/adapter.rb +3 -3
  84. data/lib/fact_db/models/entity.rb +94 -22
  85. data/lib/fact_db/models/entity_alias.rb +41 -7
  86. data/lib/fact_db/models/entity_mention.rb +34 -1
  87. data/lib/fact_db/models/fact.rb +259 -28
  88. data/lib/fact_db/models/fact_source.rb +43 -9
  89. data/lib/fact_db/models/source.rb +113 -0
  90. data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
  91. data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
  92. data/lib/fact_db/query_result.rb +202 -0
  93. data/lib/fact_db/resolution/entity_resolver.rb +139 -39
  94. data/lib/fact_db/resolution/fact_resolver.rb +86 -14
  95. data/lib/fact_db/services/entity_service.rb +246 -37
  96. data/lib/fact_db/services/fact_service.rb +254 -17
  97. data/lib/fact_db/services/source_service.rb +164 -0
  98. data/lib/fact_db/temporal/query.rb +71 -7
  99. data/lib/fact_db/temporal/query_builder.rb +69 -0
  100. data/lib/fact_db/temporal/timeline.rb +102 -11
  101. data/lib/fact_db/transformers/base.rb +77 -0
  102. data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
  103. data/lib/fact_db/transformers/json_transformer.rb +17 -0
  104. data/lib/fact_db/transformers/raw_transformer.rb +35 -0
  105. data/lib/fact_db/transformers/text_transformer.rb +114 -0
  106. data/lib/fact_db/transformers/triple_transformer.rb +138 -0
  107. data/lib/fact_db/validation/alias_filter.rb +185 -0
  108. data/lib/fact_db/version.rb +1 -1
  109. data/lib/fact_db.rb +281 -30
  110. data/mkdocs.yml +2 -2
  111. metadata +60 -16
  112. data/db/migrate/002_create_contents.rb +0 -44
  113. data/lib/fact_db/models/content.rb +0 -62
  114. data/lib/fact_db/services/content_service.rb +0 -93
@@ -0,0 +1,515 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Ingest Demo for FactDb
5
+ #
6
+ # This example demonstrates building a fact database from a directory of markdown files
7
+ # using automatic entity and fact extraction:
8
+ # - Parsing markdown files with optional YAML frontmatter
9
+ # - Using LLM-based extraction to identify entities and facts
10
+ # - Automatic entity resolution and deduplication
11
+ # - Progressive entity discovery from text
12
+ #
13
+ # Usage:
14
+ # ruby ingest_demo.rb <directory> # Build/update database from directory
15
+ # ruby ingest_demo.rb <file.md> # Process a single markdown file
16
+ # ruby ingest_demo.rb <path> --rebuild # Drop and rebuild from scratch
17
+ # ruby ingest_demo.rb --stats # Show statistics only
18
+
19
+ require_relative "utilities"
20
+ require_relative "ingest_reporter"
21
+ require "yaml"
22
+ require "debug_me"
23
+ include DebugMe
24
+ require "amazing_print"
25
+
26
+ # Note: CLI tool - uses cli_setup! which does NOT reset database
27
+ # Use --rebuild flag to explicitly reset
28
+
29
+ class IngestDemo
30
+ def initialize(path:, rebuild: false, count: nil, reporter: nil)
31
+ @path = path
32
+ @is_file = File.file?(@path)
33
+ @directory = @is_file ? File.dirname(@path) : @path
34
+ @rebuild = rebuild
35
+ @count = count
36
+ @reporter = reporter || IngestReporter.new
37
+ setup_factdb
38
+ end
39
+
40
+ def run
41
+ unless File.exist?(@path)
42
+ puts "Error: Path not found: #{@path}"
43
+ exit 1
44
+ end
45
+
46
+ if @is_file && !@path.end_with?(".md")
47
+ puts "Error: File must be a markdown (.md) file: #{@path}"
48
+ exit 1
49
+ end
50
+
51
+ if @rebuild
52
+ puts "Rebuilding database from scratch..."
53
+ clear_all_data
54
+ end
55
+
56
+ puts "=" * 60
57
+ puts "Document Ingest Demo - FactDb (Automatic Extraction)"
58
+ puts "=" * 60
59
+ puts @is_file ? "Source file: #{@path}" : "Source directory: #{@directory}"
60
+ puts "Extractor: #{@extractor.class.name.split('::').last}"
61
+ puts
62
+
63
+ process_markdown_files
64
+ show_statistics
65
+ demonstrate_queries
66
+
67
+ puts "\n" + "=" * 60
68
+ puts "Document Ingest Complete!"
69
+ puts "=" * 60
70
+ end
71
+
72
+ def show_statistics_only
73
+ puts "=" * 60
74
+ puts "Database Statistics"
75
+ puts "=" * 60
76
+
77
+ show_statistics
78
+ end
79
+
80
+ private
81
+
82
+ def setup_factdb
83
+ # Ensure demo environment is set
84
+ DemoUtilities.ensure_demo_environment!
85
+ DemoUtilities.require_fact_db!
86
+
87
+ log_path = File.join(__dir__, "#{File.basename(__FILE__, '.rb')}.log")
88
+
89
+ FactDb.configure do |config|
90
+ config.default_extractor = :llm
91
+ config.logger = Logger.new(File.open(log_path, 'w'))
92
+
93
+ # Configure LLM client - uses environment variables by default
94
+ # Supports: ANTHROPIC_API_KEY, OPENAI_API_KEY, GEMINI_API_KEY, etc.
95
+ provider = ENV.fetch("FACT_DB_LLM_PROVIDER", "anthropic").to_sym
96
+ config.llm_client = FactDb::LLM::Adapter.new(provider: provider)
97
+ end
98
+
99
+ FactDb::Database.migrate!
100
+
101
+ @facts = FactDb.new
102
+ @entity_service = @facts.entity_service
103
+ @fact_service = @facts.fact_service
104
+ @source_service = @facts.source_service
105
+ @extractor = FactDb::Extractors::Base.for(:llm)
106
+ end
107
+
108
+ def clear_all_data
109
+ puts "Clearing all data from database..."
110
+
111
+ # Clear in order respecting foreign key constraints
112
+ FactDb::Models::FactSource.delete_all
113
+ FactDb::Models::EntityMention.delete_all
114
+ FactDb::Models::Fact.delete_all
115
+ FactDb::Models::EntityAlias.delete_all
116
+ FactDb::Models::Entity.delete_all
117
+ FactDb::Models::Source.delete_all
118
+
119
+ puts " All data cleared"
120
+ end
121
+
122
+ def clear_directory_data
123
+ puts "Clearing existing data from this directory..."
124
+
125
+ # Find and remove sources from this directory
126
+ dir_name = File.basename(@directory)
127
+ directory_sources = FactDb::Models::Source.where("metadata->>'source_directory' = ?", dir_name)
128
+ source_ids = directory_sources.pluck(:id)
129
+
130
+ if source_ids.any?
131
+ FactDb::Models::FactSource.where(source_id: source_ids).delete_all
132
+ directory_sources.delete_all
133
+ puts " Removed #{source_ids.count} source records"
134
+ end
135
+
136
+ puts " Data cleared"
137
+ end
138
+
139
+ def file_already_processed?(file_path)
140
+ filename = File.basename(file_path, ".md")
141
+ FactDb::Models::Source.exists?(title: filename)
142
+ end
143
+
144
+ def process_markdown_files
145
+ if @is_file
146
+ all_files = [@path]
147
+ else
148
+ all_files = Dir.glob(File.join(@directory, "*.md")).sort
149
+ end
150
+
151
+ # Filter to only unprocessed files
152
+ unprocessed_files = all_files.reject { |f| file_already_processed?(f) }
153
+ already_processed = all_files.count - unprocessed_files.count
154
+
155
+ files = @count ? unprocessed_files.first(@count) : unprocessed_files
156
+
157
+ @reporter.start_ingestion(
158
+ total_files: files.count,
159
+ source_path: @is_file ? @path : @directory
160
+ )
161
+ @reporter.report_already_processed(already_processed)
162
+
163
+ if files.empty?
164
+ @reporter.no_files_to_process
165
+ return
166
+ end
167
+
168
+ files.each_with_index do |file, index|
169
+ process_markdown_file(file, index + 1, files.count)
170
+ end
171
+
172
+ @reporter.finish_ingestion
173
+ end
174
+
175
+ def process_markdown_file(file_path, file_index, total_files)
176
+ filename = File.basename(file_path, ".md")
177
+ content_text = File.read(file_path)
178
+
179
+ @reporter.file_started(filename, file_index, total_files)
180
+
181
+ # Parse frontmatter and content
182
+ frontmatter, body = parse_frontmatter(content_text)
183
+
184
+ # Create source record for the document
185
+ source = find_or_create_source(filename, content_text, frontmatter)
186
+
187
+ # Split into paragraphs/sections for processing
188
+ sections = parse_sections(body)
189
+
190
+ # Process sections with LLM extraction
191
+ stats = process_sections_with_extraction(filename, sections, source)
192
+
193
+ @reporter.file_completed(**stats)
194
+ end
195
+
196
+ def parse_frontmatter(content)
197
+ if content.start_with?("---")
198
+ parts = content.split("---", 3)
199
+ if parts.length >= 3
200
+ frontmatter = YAML.safe_load(parts[1]) rescue {}
201
+ body = parts[2]
202
+ return [frontmatter, body]
203
+ end
204
+ end
205
+ [{}, content]
206
+ end
207
+
208
+ def find_or_create_source(filename, content_text, frontmatter)
209
+ title = frontmatter["title"] || filename
210
+
211
+ existing = FactDb::Models::Source.find_by(title: title)
212
+ return existing if existing
213
+
214
+ @source_service.create(
215
+ content_text,
216
+ kind: :document,
217
+ title: title,
218
+ metadata: frontmatter.merge(
219
+ source_directory: File.basename(@directory),
220
+ source_file: filename
221
+ )
222
+ )
223
+ end
224
+
225
+ def parse_sections(body)
226
+ sections = []
227
+ current_section = { heading: nil, text: "", start_line: 1, end_line: 1 }
228
+ line_number = 0
229
+
230
+ body.each_line do |line|
231
+ line_number += 1
232
+ line_stripped = line.strip
233
+
234
+ # Detect markdown headers
235
+ if line_stripped =~ /^(#+)\s+(.+)$/
236
+ # Save previous section if it has content
237
+ if current_section[:text].strip.length > 0
238
+ current_section[:end_line] = line_number - 1
239
+ sections << current_section
240
+ end
241
+ current_section = { heading: $2.strip, text: "", start_line: line_number, end_line: line_number }
242
+ elsif !line_stripped.empty? && line_stripped != "---"
243
+ current_section[:text] += " " unless current_section[:text].empty?
244
+ current_section[:text] += line_stripped
245
+ current_section[:end_line] = line_number
246
+ end
247
+ end
248
+
249
+ # Add final section
250
+ if current_section[:text].strip.length > 0
251
+ current_section[:end_line] = line_number
252
+ sections << current_section
253
+ end
254
+
255
+ sections
256
+ end
257
+
258
+ def process_sections_with_extraction(filename, sections, source)
259
+ stats = { facts: 0, entities: 0, skipped: 0, errors: 0 }
260
+ total_sections = sections.count
261
+
262
+ sections.each_with_index do |section, index|
263
+ section_text = clean_text(section[:text])
264
+ next if section_text.empty? || section_text.length < 10
265
+
266
+ section_ref = section[:heading] || "Section #{index + 1}"
267
+ @reporter.section_started(section_ref, index + 1, total_sections)
268
+
269
+ # Skip if facts already exist for this section
270
+ fact_identifier = "#{filename}: #{section_ref}"
271
+ existing = FactDb::Models::Fact.where("metadata->>'section_ref' = ?", fact_identifier).first
272
+ if existing
273
+ stats[:skipped] += 1
274
+ @reporter.section_skipped(section_ref)
275
+ @reporter.section_completed
276
+ next
277
+ end
278
+
279
+ begin
280
+ # Extract atomic facts from section text with progress feedback
281
+ extracted_facts = extract_with_progress(section_text)
282
+
283
+ section_facts = 0
284
+ section_entities = 0
285
+
286
+ extracted_facts.each do |fact_data|
287
+ # Resolve/create entities from mentions and build mention references
288
+ mentions = []
289
+ (fact_data[:mentions] || []).each do |mention_data|
290
+ entity = @entity_service.resolve_or_create(
291
+ mention_data[:name],
292
+ kind: normalize_kind(mention_data[:kind]),
293
+ aliases: mention_data[:aliases] || [],
294
+ description: "Extracted from #{filename}"
295
+ )
296
+
297
+ # Add any aliases that weren't already added during creation
298
+ (mention_data[:aliases] || []).each do |alias_text|
299
+ next if alias_text.to_s.strip.empty?
300
+ next if entity.name.downcase == alias_text.to_s.strip.downcase
301
+ next if entity.all_aliases.map(&:downcase).include?(alias_text.to_s.strip.downcase)
302
+
303
+ entity.add_alias(alias_text.to_s.strip)
304
+ end
305
+
306
+ mentions << {
307
+ entity_id: entity.id,
308
+ role: mention_data[:role] || determine_role(mention_data[:type]),
309
+ text: mention_data[:name]
310
+ }
311
+ section_entities += 1
312
+ end
313
+
314
+ # Create the atomic fact
315
+ fact_metadata = {
316
+ source_file: filename,
317
+ section_heading: section[:heading],
318
+ section_ref: fact_identifier,
319
+ line_start: section[:start_line],
320
+ line_end: section[:end_line]
321
+ }.compact
322
+
323
+ fact = @fact_service.create(
324
+ fact_data[:text],
325
+ valid_at: fact_data[:valid_at] || Date.today,
326
+ invalid_at: fact_data[:invalid_at],
327
+ extraction_method: :llm,
328
+ confidence: fact_data[:confidence] || 0.8,
329
+ mentions: mentions.uniq { |m| m[:entity_id] },
330
+ metadata: fact_metadata
331
+ )
332
+
333
+ fact.add_source(source: source, kind: :primary, confidence: 1.0)
334
+ section_facts += 1
335
+ end
336
+
337
+ @reporter.extraction_completed(facts_count: section_facts, entities_count: section_entities)
338
+ stats[:facts] += section_facts
339
+ stats[:entities] += section_entities
340
+
341
+ rescue StandardError => e
342
+ debug_me { [:section_ref, :e] }
343
+ @reporter.error_occurred(e, context: section_ref)
344
+ stats[:errors] += 1
345
+ end
346
+
347
+ @reporter.section_completed
348
+ end
349
+
350
+ stats
351
+ end
352
+
353
+ # Extract facts with periodic progress updates
354
+ def extract_with_progress(text)
355
+ @reporter.extraction_started
356
+
357
+ # Run extraction in a thread so we can update progress
358
+ result = nil
359
+ extraction_thread = Thread.new do
360
+ result = @extractor.extract(text)
361
+ end
362
+
363
+ # Update progress while extraction runs
364
+ while extraction_thread.alive?
365
+ @reporter.extraction_progress
366
+ sleep 0.15
367
+ end
368
+
369
+ extraction_thread.join
370
+ result
371
+ end
372
+
373
+ def clean_text(text)
374
+ text
375
+ .gsub(/\*\*/, "") # Remove bold markers
376
+ .gsub(/\*/, "") # Remove italic markers
377
+ .gsub(/`[^`]+`/, "") # Remove inline code
378
+ .gsub(/\[([^\]]+)\]\([^)]+\)/, '\1') # Convert links to text
379
+ .gsub(/#+\s*/, "") # Remove header markers
380
+ .strip
381
+ end
382
+
383
+ def determine_role(entity_type)
384
+ # Valid roles: subject, object, location, temporal, instrument, beneficiary
385
+ case entity_type.to_s
386
+ when "person" then :subject
387
+ when "place" then :location
388
+ when "organization" then :object
389
+ when "event" then :temporal
390
+ else :subject
391
+ end
392
+ end
393
+
394
+ def normalize_kind(kind)
395
+ return :concept if kind.nil?
396
+
397
+ kind_sym = kind.to_s.downcase.to_sym
398
+ valid_kinds = FactDb::Models::Entity::ENTITY_KINDS.map(&:to_sym)
399
+
400
+ valid_kinds.include?(kind_sym) ? kind_sym : :other
401
+ end
402
+
403
+ def show_statistics
404
+ puts "\n--- Database Statistics ---\n"
405
+
406
+ puts "Sources:"
407
+ ap @source_service.stats
408
+
409
+ puts "\nEntities:"
410
+ ap @entity_service.stats
411
+
412
+ puts "\nFacts:"
413
+ ap @fact_service.stats
414
+
415
+ # Directory-specific stats if available
416
+ if @directory
417
+ dir_name = File.basename(@directory)
418
+ dir_sources = FactDb::Models::Source.where("metadata->>'source_directory' = ?", dir_name).count
419
+ dir_facts = FactDb::Models::Fact.where("metadata->>'source_file' IS NOT NULL").count
420
+
421
+ puts "\nDirectory '#{dir_name}':"
422
+ puts " Documents loaded: #{dir_sources}"
423
+ puts " Facts extracted: #{dir_facts}"
424
+ end
425
+
426
+ # Show discovered entities by kind
427
+ puts "\nDiscovered entities by kind:"
428
+ ap FactDb::Models::Entity.group(:kind).count
429
+ end
430
+
431
+ def demonstrate_queries
432
+ puts "\n--- Sample Queries ---\n"
433
+
434
+ # Show some discovered entities
435
+ puts "\nRecently discovered entities (last 10):"
436
+ recent_entities = FactDb::Models::Entity.order(created_at: :desc).limit(10)
437
+ recent_entities.each do |entity|
438
+ fact_count = entity.facts.count
439
+ puts " #{entity.name} (#{entity.kind}) - #{fact_count} mentions"
440
+ end
441
+
442
+ # Show recent facts
443
+ puts "\nRecent facts (last 5):"
444
+ recent_facts = FactDb::Models::Fact.order(created_at: :desc).limit(5)
445
+ recent_facts.each do |fact|
446
+ puts " #{fact.text[0..80]}..."
447
+ end
448
+ end
449
+ end
450
+
451
+ # Main execution
452
+ if __FILE__ == $PROGRAM_NAME
453
+ options = { rebuild: false, path: nil, count: nil, reporter: nil }
454
+
455
+ args = ARGV.dup
456
+ while arg = args.shift
457
+ case arg
458
+ when "--rebuild"
459
+ options[:rebuild] = true
460
+ when "--count"
461
+ options[:count] = args.shift.to_i
462
+ when "--quiet", "-q"
463
+ options[:reporter] = QuietReporter.new
464
+ when "--verbose", "-v"
465
+ options[:reporter] = VerboseReporter.new
466
+ when "--stats"
467
+ IngestDemo.new(path: ".").show_statistics_only
468
+ exit 0
469
+ when "--help", "-h"
470
+ puts <<~HELP
471
+ Document Ingest Demo for FactDb (Automatic Extraction)
472
+
473
+ Usage:
474
+ ruby ingest_demo.rb <directory> # Build/update database from directory
475
+ ruby ingest_demo.rb <file.md> # Process a single markdown file
476
+ ruby ingest_demo.rb <path> --rebuild # Drop and rebuild from scratch
477
+ ruby ingest_demo.rb <directory> --count 3 # Process only first 3 files
478
+ ruby ingest_demo.rb --stats # Show statistics only
479
+
480
+ Options:
481
+ --rebuild Clear existing data and rebuild from scratch
482
+ --count <n> Process only the first n files (for testing, directory only)
483
+ --quiet, -q Minimal output (good for scripts/CI)
484
+ --verbose, -v Detailed output with section-level progress
485
+ --stats Show database statistics only
486
+ --help, -h Show this help message
487
+
488
+ Environment variables:
489
+ FACT_DB_LLM_PROVIDER # LLM provider (anthropic, openai, gemini, ollama)
490
+ ANTHROPIC_API_KEY # API key for Anthropic
491
+ OPENAI_API_KEY # API key for OpenAI
492
+ DATABASE_URL # PostgreSQL connection URL
493
+
494
+ Accepts either a directory containing markdown (.md) files or a single
495
+ markdown file. Files may optionally include YAML frontmatter between
496
+ --- delimiters at the start of the file.
497
+
498
+ Reporter classes (IngestReporter, QuietReporter, VerboseReporter) can be
499
+ used directly in your own applications for custom progress handling.
500
+ HELP
501
+ exit 0
502
+ else
503
+ options[:path] = arg unless arg.start_with?("-")
504
+ end
505
+ end
506
+
507
+ unless options[:path]
508
+ puts "Error: Please specify a directory or markdown file"
509
+ puts "Usage: ruby ingest_demo.rb <directory|file.md>"
510
+ puts "Run 'ruby ingest_demo.rb --help' for more information"
511
+ exit 1
512
+ end
513
+
514
+ IngestDemo.new(**options).run
515
+ end