fact_db 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.envrc +2 -0
- data/.yardopts +5 -0
- data/CHANGELOG.md +64 -0
- data/README.md +107 -6
- data/Rakefile +243 -10
- data/db/migrate/001_enable_extensions.rb +1 -0
- data/db/migrate/002_create_sources.rb +49 -0
- data/db/migrate/003_create_entities.rb +27 -15
- data/db/migrate/004_create_entity_aliases.rb +20 -7
- data/db/migrate/005_create_facts.rb +37 -21
- data/db/migrate/006_create_entity_mentions.rb +14 -6
- data/db/migrate/007_create_fact_sources.rb +16 -8
- data/docs/api/extractors/index.md +5 -5
- data/docs/api/extractors/llm.md +17 -17
- data/docs/api/extractors/rule-based.md +14 -14
- data/docs/api/facts.md +20 -20
- data/docs/api/index.md +4 -4
- data/docs/api/models/entity.md +21 -21
- data/docs/api/models/fact.md +15 -15
- data/docs/api/models/index.md +7 -7
- data/docs/api/models/{content.md → source.md} +29 -29
- data/docs/api/pipeline/extraction.md +25 -25
- data/docs/api/pipeline/index.md +1 -1
- data/docs/api/pipeline/resolution.md +4 -4
- data/docs/api/services/entity-service.md +20 -20
- data/docs/api/services/fact-service.md +12 -12
- data/docs/api/services/index.md +5 -5
- data/docs/api/services/{content-service.md → source-service.md} +27 -27
- data/docs/architecture/database-schema.md +46 -46
- data/docs/architecture/entity-resolution.md +6 -6
- data/docs/architecture/index.md +10 -10
- data/docs/architecture/temporal-facts.md +5 -5
- data/docs/architecture/three-layer-model.md +17 -17
- data/docs/concepts.md +6 -6
- data/docs/examples/basic-usage.md +20 -20
- data/docs/examples/hr-onboarding.md +17 -17
- data/docs/examples/index.md +4 -4
- data/docs/examples/news-analysis.md +23 -23
- data/docs/getting-started/database-setup.md +28 -20
- data/docs/getting-started/index.md +3 -3
- data/docs/getting-started/quick-start.md +33 -30
- data/docs/guides/batch-processing.md +26 -26
- data/docs/guides/configuration.md +158 -77
- data/docs/guides/entity-management.md +14 -14
- data/docs/guides/extracting-facts.md +28 -28
- data/docs/guides/ingesting-content.md +14 -14
- data/docs/guides/llm-integration.md +40 -32
- data/docs/guides/temporal-queries.md +11 -11
- data/docs/index.md +6 -2
- data/examples/.envrc +4 -0
- data/examples/.gitignore +1 -0
- data/examples/001_configuration.rb +312 -0
- data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
- data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
- data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
- data/examples/040_output_formats.rb +177 -0
- data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
- data/examples/060_fluent_temporal_api.rb +217 -0
- data/examples/070_introspection.rb +252 -0
- data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
- data/examples/090_ingest_demo.rb +515 -0
- data/examples/100_query_context.rb +668 -0
- data/examples/110_prove_it.rb +204 -0
- data/examples/120_dump_database.rb +358 -0
- data/examples/130_rag_feedback_loop.rb +858 -0
- data/examples/README.md +229 -15
- data/examples/data/lincoln_associates.md +201 -0
- data/examples/data/lincoln_biography.md +66 -0
- data/examples/data/lincoln_cabinet.md +243 -0
- data/examples/data/lincoln_family.md +163 -0
- data/examples/data/lincoln_military.md +241 -0
- data/examples/data/lincoln_todd_family.md +136 -0
- data/examples/ingest_reporter.rb +335 -0
- data/examples/utilities.rb +182 -0
- data/lib/fact_db/config/defaults.yml +254 -0
- data/lib/fact_db/config.rb +94 -35
- data/lib/fact_db/database.rb +98 -8
- data/lib/fact_db/extractors/base.rb +106 -21
- data/lib/fact_db/extractors/llm_extractor.rb +35 -63
- data/lib/fact_db/extractors/manual_extractor.rb +46 -6
- data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
- data/lib/fact_db/llm/adapter.rb +3 -3
- data/lib/fact_db/models/entity.rb +94 -22
- data/lib/fact_db/models/entity_alias.rb +41 -7
- data/lib/fact_db/models/entity_mention.rb +34 -1
- data/lib/fact_db/models/fact.rb +259 -28
- data/lib/fact_db/models/fact_source.rb +43 -9
- data/lib/fact_db/models/source.rb +113 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
- data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
- data/lib/fact_db/query_result.rb +202 -0
- data/lib/fact_db/resolution/entity_resolver.rb +139 -39
- data/lib/fact_db/resolution/fact_resolver.rb +86 -14
- data/lib/fact_db/services/entity_service.rb +246 -37
- data/lib/fact_db/services/fact_service.rb +254 -17
- data/lib/fact_db/services/source_service.rb +164 -0
- data/lib/fact_db/temporal/query.rb +71 -7
- data/lib/fact_db/temporal/query_builder.rb +69 -0
- data/lib/fact_db/temporal/timeline.rb +102 -11
- data/lib/fact_db/transformers/base.rb +77 -0
- data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
- data/lib/fact_db/transformers/json_transformer.rb +17 -0
- data/lib/fact_db/transformers/raw_transformer.rb +35 -0
- data/lib/fact_db/transformers/text_transformer.rb +114 -0
- data/lib/fact_db/transformers/triple_transformer.rb +138 -0
- data/lib/fact_db/validation/alias_filter.rb +185 -0
- data/lib/fact_db/version.rb +1 -1
- data/lib/fact_db.rb +281 -30
- data/mkdocs.yml +2 -2
- metadata +60 -16
- data/db/migrate/002_create_contents.rb +0 -44
- data/lib/fact_db/models/content.rb +0 -62
- data/lib/fact_db/services/content_service.rb +0 -93
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# Mary Todd Lincoln's Family (The Todd Family)
|
|
2
|
+
|
|
3
|
+
## Mary's Father
|
|
4
|
+
|
|
5
|
+
### Robert Smith Todd
|
|
6
|
+
|
|
7
|
+
| Field | Value |
|
|
8
|
+
|-------|-------|
|
|
9
|
+
| Full Name | Robert Smith Todd |
|
|
10
|
+
| Birth Year | 1791 |
|
|
11
|
+
| Education | Studied law at Transylvania College |
|
|
12
|
+
| Career | Businessman (chose business over law) |
|
|
13
|
+
| Position | President of the Lexington branch of the Bank of Kentucky |
|
|
14
|
+
| Political Offices | Justice of the Peace, Sheriff, Clerk of State House of Representatives |
|
|
15
|
+
| First Marriage | Elizabeth "Eliza" Parker (1812) |
|
|
16
|
+
| Second Marriage | Elizabeth "Betsy" Humphreys (1826) |
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Mary's Mother
|
|
21
|
+
|
|
22
|
+
### Elizabeth "Eliza" Parker Todd
|
|
23
|
+
|
|
24
|
+
| Field | Value |
|
|
25
|
+
|-------|-------|
|
|
26
|
+
| Full Name | Elizabeth "Eliza" Parker Todd |
|
|
27
|
+
| Birth Year | 1794 |
|
|
28
|
+
| Family Background | Wealthy merchant family |
|
|
29
|
+
| Education | Likely attended local female academy |
|
|
30
|
+
| Marriage | Robert Smith Todd (1812) |
|
|
31
|
+
| Death Year | 1825 |
|
|
32
|
+
| Cause of Death | Complications following childbirth |
|
|
33
|
+
| Children | 7 children |
|
|
34
|
+
|
|
35
|
+
Mary was only 6 years old when her mother died.
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Mary's Stepmother
|
|
40
|
+
|
|
41
|
+
### Elizabeth "Betsy" Humphreys Todd
|
|
42
|
+
|
|
43
|
+
| Field | Value |
|
|
44
|
+
|-------|-------|
|
|
45
|
+
| Full Name | Elizabeth "Betsy" Humphreys Todd |
|
|
46
|
+
| Marriage to Robert | 1826 |
|
|
47
|
+
| Children with Robert | 9 children |
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Children from First Marriage (Eliza Parker Todd)
|
|
52
|
+
|
|
53
|
+
| Name | Notes |
|
|
54
|
+
|------|-------|
|
|
55
|
+
| Elizabeth Todd Edwards | Mary's older sister; Mary lived with her in Springfield |
|
|
56
|
+
| Frances Todd | Sister |
|
|
57
|
+
| Levi Todd | Brother |
|
|
58
|
+
| **Mary Todd Lincoln** | Fourth child; married Abraham Lincoln |
|
|
59
|
+
| Robert Todd | Died in infancy |
|
|
60
|
+
| Ann Todd | Sister |
|
|
61
|
+
| George Todd | Brother |
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Children from Second Marriage (Betsy Humphreys Todd)
|
|
66
|
+
|
|
67
|
+
| Name | Notes |
|
|
68
|
+
|------|-------|
|
|
69
|
+
| Margaret Todd | Half-sister |
|
|
70
|
+
| Samuel Todd | Half-brother |
|
|
71
|
+
| David Todd | Half-brother |
|
|
72
|
+
| Martha Todd | Half-sister |
|
|
73
|
+
| Emilie Todd Helm | Half-sister; particularly close to Mary |
|
|
74
|
+
| Alexander Todd | Half-brother |
|
|
75
|
+
| Elodie Todd | Half-sister |
|
|
76
|
+
| Katherine "Kittie" Todd | Half-sister |
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Mary's Sister: Elizabeth Todd Edwards
|
|
81
|
+
|
|
82
|
+
Elizabeth was Mary's older sister who lived in Springfield, Illinois with her husband Ninian W. Edwards. Mary moved to Springfield in 1839 to live with Elizabeth, and it was at Elizabeth's home that Mary met Abraham Lincoln.
|
|
83
|
+
|
|
84
|
+
### Ninian W. Edwards (Brother-in-law)
|
|
85
|
+
|
|
86
|
+
| Field | Value |
|
|
87
|
+
|-------|-------|
|
|
88
|
+
| Full Name | Ninian Wirt Edwards |
|
|
89
|
+
| Relationship | Husband of Elizabeth Todd |
|
|
90
|
+
| Occupation | Politician, Lawyer |
|
|
91
|
+
| Political Role | Member of "the Junto" political group with Lincoln |
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Civil War Divided Loyalties
|
|
96
|
+
|
|
97
|
+
The Todd family was deeply divided during the Civil War, reflecting the broader national conflict:
|
|
98
|
+
|
|
99
|
+
### Union Supporters (5 siblings)
|
|
100
|
+
The Todds who lived in Northern states, including Mary, supported the Union cause.
|
|
101
|
+
|
|
102
|
+
### Confederate Supporters (8 siblings)
|
|
103
|
+
The majority of Mary's siblings sided with the Confederacy, creating painful divisions within the family.
|
|
104
|
+
|
|
105
|
+
### Geographic Distribution
|
|
106
|
+
When Abraham and Mary Lincoln entered the White House, Todd family members were scattered across:
|
|
107
|
+
- Kentucky
|
|
108
|
+
- Illinois
|
|
109
|
+
- Ohio
|
|
110
|
+
- Virginia
|
|
111
|
+
- Alabama
|
|
112
|
+
- Louisiana
|
|
113
|
+
|
|
114
|
+
This geographic dispersal mirrored the political divide that split the nation.
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## Emilie Todd Helm
|
|
119
|
+
|
|
120
|
+
Mary's half-sister Emilie was particularly close to her. Emilie married Benjamin Hardin Helm, who became a Confederate general and was killed at the Battle of Chickamauga in 1863. After his death, Emilie visited Mary at the White House, causing political controversy.
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Total Todd Family Size
|
|
125
|
+
|
|
126
|
+
| Category | Count |
|
|
127
|
+
|----------|-------|
|
|
128
|
+
| Children from first marriage | 7 |
|
|
129
|
+
| Children from second marriage | 9 |
|
|
130
|
+
| **Total children of Robert S. Todd** | **16** |
|
|
131
|
+
|
|
132
|
+
## Sources
|
|
133
|
+
|
|
134
|
+
- Mary Todd Lincoln House (mtlhouse.org)
|
|
135
|
+
- White House Historical Association
|
|
136
|
+
- Lincoln Collection
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# IngestReporter - Progress reporting for document ingestion
|
|
4
|
+
#
|
|
5
|
+
# This class provides clear, non-blocking feedback during long ingestion
|
|
6
|
+
# processes. It uses a single-line status approach that updates in place,
|
|
7
|
+
# showing users that work is happening without cluttering the terminal.
|
|
8
|
+
#
|
|
9
|
+
# Usage:
|
|
10
|
+
# reporter = IngestReporter.new
|
|
11
|
+
# reporter.start_ingestion(total_files: 10)
|
|
12
|
+
# reporter.file_started("document.md", 1, 10)
|
|
13
|
+
# reporter.section_started("Introduction", 1, 5)
|
|
14
|
+
# reporter.extraction_started
|
|
15
|
+
# reporter.extraction_progress # call periodically during LLM calls
|
|
16
|
+
# reporter.extraction_completed(facts_count: 3, entities_count: 5)
|
|
17
|
+
# reporter.section_completed
|
|
18
|
+
# reporter.file_completed(facts: 8, entities: 12, errors: 0, skipped: 1)
|
|
19
|
+
# reporter.finish_ingestion
|
|
20
|
+
#
|
|
21
|
+
# Customization:
|
|
22
|
+
# Subclass and override methods to customize output format, or set
|
|
23
|
+
# output: to a different IO object for logging.
|
|
24
|
+
|
|
25
|
+
class IngestReporter
|
|
26
|
+
SPINNER_CHARS = %w[⠋ ⠙ ⠹ ⠸ ⠼ ⠴ ⠦ ⠧ ⠇ ⠏].freeze
|
|
27
|
+
|
|
28
|
+
attr_reader :output, :total_files, :total_facts, :total_entities, :total_errors
|
|
29
|
+
|
|
30
|
+
def initialize(output: $stdout, color: true)
|
|
31
|
+
@output = output
|
|
32
|
+
@color = color && output.respond_to?(:tty?) && output.tty?
|
|
33
|
+
@spinner_index = 0
|
|
34
|
+
@extraction_start_time = nil
|
|
35
|
+
@file_start_time = nil
|
|
36
|
+
@ingestion_start_time = nil
|
|
37
|
+
@current_file = nil
|
|
38
|
+
@current_file_index = 0
|
|
39
|
+
@current_section = nil
|
|
40
|
+
@current_section_index = 0
|
|
41
|
+
@total_sections = 0
|
|
42
|
+
@total_files = 0
|
|
43
|
+
@total_facts = 0
|
|
44
|
+
@total_entities = 0
|
|
45
|
+
@total_errors = 0
|
|
46
|
+
@line_length = 0
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Called once at the start of the ingestion process
|
|
50
|
+
def start_ingestion(total_files:, source_path:)
|
|
51
|
+
@total_files = total_files
|
|
52
|
+
@ingestion_start_time = Time.now
|
|
53
|
+
@total_facts = 0
|
|
54
|
+
@total_entities = 0
|
|
55
|
+
@total_errors = 0
|
|
56
|
+
|
|
57
|
+
write_line ""
|
|
58
|
+
write_line "Starting ingestion: #{total_files} file(s) from #{source_path}"
|
|
59
|
+
write_line ""
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Called when a new file begins processing
|
|
63
|
+
def file_started(filename, index, total)
|
|
64
|
+
@current_file = filename
|
|
65
|
+
@current_file_index = index
|
|
66
|
+
@total_files = total
|
|
67
|
+
@file_start_time = Time.now
|
|
68
|
+
@file_facts = 0
|
|
69
|
+
@file_entities = 0
|
|
70
|
+
@file_errors = 0
|
|
71
|
+
@file_skipped = 0
|
|
72
|
+
|
|
73
|
+
update_status
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Called when a new section within a file begins
|
|
77
|
+
def section_started(section_ref, index, total)
|
|
78
|
+
@current_section = section_ref
|
|
79
|
+
@current_section_index = index
|
|
80
|
+
@total_sections = total
|
|
81
|
+
|
|
82
|
+
update_status
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Called when a section is skipped (already processed)
|
|
86
|
+
def section_skipped(section_ref)
|
|
87
|
+
@file_skipped += 1
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Called when LLM extraction begins
|
|
91
|
+
def extraction_started
|
|
92
|
+
@extraction_start_time = Time.now
|
|
93
|
+
update_status
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Called periodically during LLM extraction to show activity
|
|
97
|
+
# Returns immediately - call this in a loop or timer
|
|
98
|
+
def extraction_progress
|
|
99
|
+
@spinner_index = (@spinner_index + 1) % SPINNER_CHARS.length
|
|
100
|
+
update_status(extracting: true)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Called when extraction completes for a section
|
|
104
|
+
def extraction_completed(facts_count:, entities_count:)
|
|
105
|
+
@file_facts += facts_count
|
|
106
|
+
@file_entities += entities_count
|
|
107
|
+
@total_facts += facts_count
|
|
108
|
+
@total_entities += entities_count
|
|
109
|
+
@extraction_start_time = nil
|
|
110
|
+
|
|
111
|
+
update_status
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Called when a section finishes processing
|
|
115
|
+
def section_completed
|
|
116
|
+
@current_section = nil
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Called when an error occurs
|
|
120
|
+
def error_occurred(error, context: nil)
|
|
121
|
+
@file_errors += 1
|
|
122
|
+
@total_errors += 1
|
|
123
|
+
|
|
124
|
+
clear_line
|
|
125
|
+
error_msg = context ? "#{context}: #{error.message}" : error.message
|
|
126
|
+
write_line colorize(" ✗ Error: #{truncate(error_msg, 70)}", :red)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Called when a file finishes processing
|
|
130
|
+
def file_completed(facts:, entities:, errors:, skipped:)
|
|
131
|
+
elapsed = Time.now - @file_start_time
|
|
132
|
+
clear_line
|
|
133
|
+
|
|
134
|
+
status_parts = ["#{facts} facts", "#{entities} entities"]
|
|
135
|
+
status_parts << colorize("#{errors} errors", :red) if errors > 0
|
|
136
|
+
status_parts << "#{skipped} skipped" if skipped > 0
|
|
137
|
+
status_parts << format_duration(elapsed)
|
|
138
|
+
|
|
139
|
+
symbol = errors > 0 ? colorize("✗", :red) : colorize("✓", :green)
|
|
140
|
+
write_line "#{symbol} #{@current_file}: #{status_parts.join(", ")}"
|
|
141
|
+
|
|
142
|
+
@current_file = nil
|
|
143
|
+
@current_section = nil
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Called when all files are processed
|
|
147
|
+
def finish_ingestion
|
|
148
|
+
elapsed = Time.now - @ingestion_start_time
|
|
149
|
+
clear_line
|
|
150
|
+
|
|
151
|
+
write_line ""
|
|
152
|
+
write_line "─" * 50
|
|
153
|
+
write_line "Ingestion complete in #{format_duration(elapsed)}"
|
|
154
|
+
write_line " Files processed: #{@current_file_index}"
|
|
155
|
+
write_line " Facts extracted: #{@total_facts}"
|
|
156
|
+
write_line " Entities found: #{@total_entities}"
|
|
157
|
+
write_line colorize(" Errors: #{@total_errors}", :red) if @total_errors > 0
|
|
158
|
+
write_line ""
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Called to report files that were already processed
|
|
162
|
+
def report_already_processed(count)
|
|
163
|
+
return if count == 0
|
|
164
|
+
|
|
165
|
+
write_line colorize(" (#{count} file(s) already processed, skipping)", :dim)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Called when no files need processing
|
|
169
|
+
def no_files_to_process
|
|
170
|
+
write_line colorize(" All files already processed. Use --rebuild to reprocess.", :dim)
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
private
|
|
174
|
+
|
|
175
|
+
def update_status(extracting: false)
|
|
176
|
+
clear_line
|
|
177
|
+
|
|
178
|
+
parts = []
|
|
179
|
+
|
|
180
|
+
# File progress
|
|
181
|
+
parts << "[#{@current_file_index}/#{@total_files}]"
|
|
182
|
+
|
|
183
|
+
# Current file (truncated)
|
|
184
|
+
if @current_file
|
|
185
|
+
parts << truncate(@current_file, 25)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Section progress
|
|
189
|
+
if @current_section && @total_sections > 0
|
|
190
|
+
parts << "#{@current_section_index}/#{@total_sections} sections"
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Extraction indicator with spinner and elapsed time
|
|
194
|
+
if extracting && @extraction_start_time
|
|
195
|
+
elapsed = Time.now - @extraction_start_time
|
|
196
|
+
spinner = SPINNER_CHARS[@spinner_index]
|
|
197
|
+
parts << "#{spinner} extracting (#{format_duration(elapsed)})"
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Running totals
|
|
201
|
+
if @total_facts > 0 || @file_facts > 0
|
|
202
|
+
parts << "#{@total_facts} facts"
|
|
203
|
+
parts << "#{@total_entities} entities"
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
status = parts.join(" │ ")
|
|
207
|
+
write_status(status)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def write_status(text)
|
|
211
|
+
@line_length = text.length
|
|
212
|
+
@output.print "\r#{text}"
|
|
213
|
+
@output.flush
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def clear_line
|
|
217
|
+
return if @line_length == 0
|
|
218
|
+
|
|
219
|
+
@output.print "\r#{" " * @line_length}\r"
|
|
220
|
+
@output.flush
|
|
221
|
+
@line_length = 0
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def write_line(text)
|
|
225
|
+
clear_line
|
|
226
|
+
@output.puts text
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def format_duration(seconds)
|
|
230
|
+
if seconds < 60
|
|
231
|
+
format("%.1fs", seconds)
|
|
232
|
+
elsif seconds < 3600
|
|
233
|
+
minutes = (seconds / 60).to_i
|
|
234
|
+
secs = (seconds % 60).to_i
|
|
235
|
+
"#{minutes}m #{secs}s"
|
|
236
|
+
else
|
|
237
|
+
hours = (seconds / 3600).to_i
|
|
238
|
+
minutes = ((seconds % 3600) / 60).to_i
|
|
239
|
+
"#{hours}h #{minutes}m"
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def truncate(text, length)
|
|
244
|
+
return text if text.length <= length
|
|
245
|
+
|
|
246
|
+
text[0, length - 1] + "…"
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def colorize(text, color)
|
|
250
|
+
return text unless @color
|
|
251
|
+
|
|
252
|
+
codes = {
|
|
253
|
+
red: "\e[31m",
|
|
254
|
+
green: "\e[32m",
|
|
255
|
+
yellow: "\e[33m",
|
|
256
|
+
dim: "\e[2m",
|
|
257
|
+
reset: "\e[0m"
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
"#{codes[color]}#{text}#{codes[:reset]}"
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
# QuietReporter - Minimal output for scripting/automation
|
|
266
|
+
#
|
|
267
|
+
# Use this when you want minimal output, e.g., in CI pipelines
|
|
268
|
+
# or when redirecting output to a file.
|
|
269
|
+
class QuietReporter < IngestReporter
|
|
270
|
+
def start_ingestion(total_files:, source_path:)
|
|
271
|
+
@total_files = total_files
|
|
272
|
+
@ingestion_start_time = Time.now
|
|
273
|
+
@total_facts = 0
|
|
274
|
+
@total_entities = 0
|
|
275
|
+
@total_errors = 0
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
def file_started(filename, index, total)
|
|
279
|
+
@current_file = filename
|
|
280
|
+
@current_file_index = index
|
|
281
|
+
@file_start_time = Time.now
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def section_started(section_ref, index, total); end
|
|
285
|
+
def section_skipped(section_ref); end
|
|
286
|
+
def extraction_started; end
|
|
287
|
+
def extraction_progress; end
|
|
288
|
+
def extraction_completed(facts_count:, entities_count:)
|
|
289
|
+
@total_facts += facts_count
|
|
290
|
+
@total_entities += entities_count
|
|
291
|
+
end
|
|
292
|
+
def section_completed; end
|
|
293
|
+
|
|
294
|
+
def error_occurred(error, context: nil)
|
|
295
|
+
@total_errors += 1
|
|
296
|
+
error_msg = context ? "#{context}: #{error.message}" : error.message
|
|
297
|
+
@output.puts "ERROR: #{error_msg}"
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
def file_completed(facts:, entities:, errors:, skipped:)
|
|
301
|
+
@output.puts "#{@current_file}: #{facts} facts, #{entities} entities"
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def finish_ingestion
|
|
305
|
+
elapsed = Time.now - @ingestion_start_time
|
|
306
|
+
@output.puts "Completed: #{@total_facts} facts, #{@total_entities} entities in #{format_duration(elapsed)}"
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
def report_already_processed(count); end
|
|
310
|
+
def no_files_to_process
|
|
311
|
+
@output.puts "All files already processed."
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
# VerboseReporter - Detailed output for debugging
|
|
317
|
+
#
|
|
318
|
+
# Shows all details including section names and extraction timing.
|
|
319
|
+
class VerboseReporter < IngestReporter
|
|
320
|
+
def section_started(section_ref, index, total)
|
|
321
|
+
super
|
|
322
|
+
write_line " Section #{index}/#{total}: #{section_ref}"
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
def extraction_completed(facts_count:, entities_count:)
|
|
326
|
+
elapsed = @extraction_start_time ? Time.now - @extraction_start_time : 0
|
|
327
|
+
super
|
|
328
|
+
write_line " Extracted #{facts_count} facts, #{entities_count} entities (#{format_duration(elapsed)})"
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
def error_occurred(error, context: nil)
|
|
332
|
+
super
|
|
333
|
+
write_line " #{error.backtrace&.first}" if error.backtrace
|
|
334
|
+
end
|
|
335
|
+
end
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Common utilities for FactDb demo programs
|
|
4
|
+
#
|
|
5
|
+
# Usage: require_relative "utilities"
|
|
6
|
+
#
|
|
7
|
+
# This file provides:
|
|
8
|
+
# - Environment setup (ensures FDB_ENV=demo)
|
|
9
|
+
# - Database reset before each demo
|
|
10
|
+
# - Common output formatting methods
|
|
11
|
+
|
|
12
|
+
module DemoUtilities
|
|
13
|
+
SEPARATOR_WIDTH = 60
|
|
14
|
+
|
|
15
|
+
class << self
|
|
16
|
+
# Setup the demo environment and reset the database
|
|
17
|
+
# Call this at the start of each demo
|
|
18
|
+
def setup!(demo_name = nil)
|
|
19
|
+
ensure_demo_environment!
|
|
20
|
+
reset_demo_database!
|
|
21
|
+
require_fact_db!
|
|
22
|
+
|
|
23
|
+
if demo_name
|
|
24
|
+
header(demo_name)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Setup for CLI tools - sets environment but does NOT reset database
|
|
29
|
+
# Use this for utility scripts that inspect/query existing data
|
|
30
|
+
def cli_setup!(tool_name = nil)
|
|
31
|
+
ensure_demo_environment!
|
|
32
|
+
require_fact_db!
|
|
33
|
+
FactDb::Database.establish_connection!
|
|
34
|
+
|
|
35
|
+
if tool_name
|
|
36
|
+
header(tool_name)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Ensure FDB_ENV is set to "demo"
|
|
41
|
+
def ensure_demo_environment!
|
|
42
|
+
ENV["FDB_ENV"] = "demo"
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Reset the demo database using rake task
|
|
46
|
+
def reset_demo_database!
|
|
47
|
+
project_root = File.expand_path("..", __dir__)
|
|
48
|
+
|
|
49
|
+
# Run rake db:reset:demo quietly
|
|
50
|
+
Dir.chdir(project_root) do
|
|
51
|
+
system("bundle exec rake db:reset:demo > /dev/null 2>&1")
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Require fact_db after environment is set
|
|
56
|
+
def require_fact_db!
|
|
57
|
+
require "bundler/setup"
|
|
58
|
+
require "fact_db"
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Print a major header (demo title)
|
|
62
|
+
def header(title)
|
|
63
|
+
puts separator
|
|
64
|
+
puts title
|
|
65
|
+
puts separator
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Print a section header
|
|
69
|
+
def section(title)
|
|
70
|
+
str = "--- #{title} ---"
|
|
71
|
+
wrapper = str[0] * str.length
|
|
72
|
+
puts "\n#{wrapper}\n#{str}\n#{wrapper}"
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Print a subsection header
|
|
76
|
+
def subsection(title)
|
|
77
|
+
puts "\n#{title}:"
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Print the demo completion footer
|
|
81
|
+
def footer(title = "Demo Complete!")
|
|
82
|
+
puts "\n" + separator
|
|
83
|
+
puts title
|
|
84
|
+
puts separator
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Print a separator line
|
|
88
|
+
def separator
|
|
89
|
+
"=" * SEPARATOR_WIDTH
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Print a list item
|
|
93
|
+
def list_item(text, indent: 2)
|
|
94
|
+
puts "#{" " * indent}- #{text}"
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Print an indented line
|
|
98
|
+
def indent(text, level: 1)
|
|
99
|
+
puts "#{" " * level}#{text}"
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Print a key-value pair
|
|
103
|
+
def kv(key, value, indent_level: 1)
|
|
104
|
+
puts "#{" " * indent_level}#{key}: #{value}"
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Print multiple lines with consistent formatting
|
|
108
|
+
def block(lines)
|
|
109
|
+
puts lines.map { |line| " #{line}" }.join("\n")
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Configure logging to a file based on demo filename
|
|
113
|
+
# Overwrites the log file on each run (does not append)
|
|
114
|
+
def configure_logging(demo_file)
|
|
115
|
+
log_path = File.join(File.dirname(demo_file), "#{File.basename(demo_file, '.rb')}.log")
|
|
116
|
+
|
|
117
|
+
FactDb.configure do |config|
|
|
118
|
+
config.logger = Logger.new(File.open(log_path, 'w'))
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Create a new FactDb instance with optional logging setup
|
|
123
|
+
def create_fact_db(demo_file = nil)
|
|
124
|
+
configure_logging(demo_file) if demo_file
|
|
125
|
+
FactDb.new
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
##########################################################
|
|
131
|
+
# Convenience methods at top level for cleaner demo code
|
|
132
|
+
def demo_setup!(demo_name = nil)
|
|
133
|
+
DemoUtilities.setup!(demo_name)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def demo_header(title)
|
|
137
|
+
DemoUtilities.header(title)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def demo_section(title)
|
|
141
|
+
DemoUtilities.section(title)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def demo_subsection(title)
|
|
145
|
+
DemoUtilities.subsection(title)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def demo_footer(title = "Demo Complete!")
|
|
149
|
+
DemoUtilities.footer(title)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def demo_separator
|
|
153
|
+
DemoUtilities.separator
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def demo_list_item(text, indent: 2)
|
|
157
|
+
DemoUtilities.list_item(text, indent: indent)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def demo_indent(text, level: 1)
|
|
161
|
+
DemoUtilities.indent(text, level: level)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def demo_kv(key, value, indent_level: 1)
|
|
165
|
+
DemoUtilities.kv(key, value, indent_level: indent_level)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def demo_block(lines)
|
|
169
|
+
DemoUtilities.block(lines)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def demo_configure_logging(demo_file)
|
|
173
|
+
DemoUtilities.configure_logging(demo_file)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def demo_create_fact_db(demo_file = nil)
|
|
177
|
+
DemoUtilities.create_fact_db(demo_file)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def cli_setup!(tool_name = nil)
|
|
181
|
+
DemoUtilities.cli_setup!(tool_name)
|
|
182
|
+
end
|