fact_db 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.envrc +2 -0
- data/.yardopts +5 -0
- data/CHANGELOG.md +64 -0
- data/README.md +107 -6
- data/Rakefile +243 -10
- data/db/migrate/001_enable_extensions.rb +1 -0
- data/db/migrate/002_create_sources.rb +49 -0
- data/db/migrate/003_create_entities.rb +27 -15
- data/db/migrate/004_create_entity_aliases.rb +20 -7
- data/db/migrate/005_create_facts.rb +37 -21
- data/db/migrate/006_create_entity_mentions.rb +14 -6
- data/db/migrate/007_create_fact_sources.rb +16 -8
- data/docs/api/extractors/index.md +5 -5
- data/docs/api/extractors/llm.md +17 -17
- data/docs/api/extractors/rule-based.md +14 -14
- data/docs/api/facts.md +20 -20
- data/docs/api/index.md +4 -4
- data/docs/api/models/entity.md +21 -21
- data/docs/api/models/fact.md +15 -15
- data/docs/api/models/index.md +7 -7
- data/docs/api/models/{content.md → source.md} +29 -29
- data/docs/api/pipeline/extraction.md +25 -25
- data/docs/api/pipeline/index.md +1 -1
- data/docs/api/pipeline/resolution.md +4 -4
- data/docs/api/services/entity-service.md +20 -20
- data/docs/api/services/fact-service.md +12 -12
- data/docs/api/services/index.md +5 -5
- data/docs/api/services/{content-service.md → source-service.md} +27 -27
- data/docs/architecture/database-schema.md +46 -46
- data/docs/architecture/entity-resolution.md +6 -6
- data/docs/architecture/index.md +10 -10
- data/docs/architecture/temporal-facts.md +5 -5
- data/docs/architecture/three-layer-model.md +17 -17
- data/docs/concepts.md +6 -6
- data/docs/examples/basic-usage.md +20 -20
- data/docs/examples/hr-onboarding.md +17 -17
- data/docs/examples/index.md +4 -4
- data/docs/examples/news-analysis.md +23 -23
- data/docs/getting-started/database-setup.md +28 -20
- data/docs/getting-started/index.md +3 -3
- data/docs/getting-started/quick-start.md +33 -30
- data/docs/guides/batch-processing.md +26 -26
- data/docs/guides/configuration.md +158 -77
- data/docs/guides/entity-management.md +14 -14
- data/docs/guides/extracting-facts.md +28 -28
- data/docs/guides/ingesting-content.md +14 -14
- data/docs/guides/llm-integration.md +40 -32
- data/docs/guides/temporal-queries.md +11 -11
- data/docs/index.md +6 -2
- data/examples/.envrc +4 -0
- data/examples/.gitignore +1 -0
- data/examples/001_configuration.rb +312 -0
- data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
- data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
- data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
- data/examples/040_output_formats.rb +177 -0
- data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
- data/examples/060_fluent_temporal_api.rb +217 -0
- data/examples/070_introspection.rb +252 -0
- data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
- data/examples/090_ingest_demo.rb +515 -0
- data/examples/100_query_context.rb +668 -0
- data/examples/110_prove_it.rb +204 -0
- data/examples/120_dump_database.rb +358 -0
- data/examples/130_rag_feedback_loop.rb +858 -0
- data/examples/README.md +229 -15
- data/examples/data/lincoln_associates.md +201 -0
- data/examples/data/lincoln_biography.md +66 -0
- data/examples/data/lincoln_cabinet.md +243 -0
- data/examples/data/lincoln_family.md +163 -0
- data/examples/data/lincoln_military.md +241 -0
- data/examples/data/lincoln_todd_family.md +136 -0
- data/examples/ingest_reporter.rb +335 -0
- data/examples/utilities.rb +182 -0
- data/lib/fact_db/config/defaults.yml +254 -0
- data/lib/fact_db/config.rb +94 -35
- data/lib/fact_db/database.rb +98 -8
- data/lib/fact_db/extractors/base.rb +106 -21
- data/lib/fact_db/extractors/llm_extractor.rb +35 -63
- data/lib/fact_db/extractors/manual_extractor.rb +46 -6
- data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
- data/lib/fact_db/llm/adapter.rb +3 -3
- data/lib/fact_db/models/entity.rb +94 -22
- data/lib/fact_db/models/entity_alias.rb +41 -7
- data/lib/fact_db/models/entity_mention.rb +34 -1
- data/lib/fact_db/models/fact.rb +259 -28
- data/lib/fact_db/models/fact_source.rb +43 -9
- data/lib/fact_db/models/source.rb +113 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
- data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
- data/lib/fact_db/query_result.rb +202 -0
- data/lib/fact_db/resolution/entity_resolver.rb +139 -39
- data/lib/fact_db/resolution/fact_resolver.rb +86 -14
- data/lib/fact_db/services/entity_service.rb +246 -37
- data/lib/fact_db/services/fact_service.rb +254 -17
- data/lib/fact_db/services/source_service.rb +164 -0
- data/lib/fact_db/temporal/query.rb +71 -7
- data/lib/fact_db/temporal/query_builder.rb +69 -0
- data/lib/fact_db/temporal/timeline.rb +102 -11
- data/lib/fact_db/transformers/base.rb +77 -0
- data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
- data/lib/fact_db/transformers/json_transformer.rb +17 -0
- data/lib/fact_db/transformers/raw_transformer.rb +35 -0
- data/lib/fact_db/transformers/text_transformer.rb +114 -0
- data/lib/fact_db/transformers/triple_transformer.rb +138 -0
- data/lib/fact_db/validation/alias_filter.rb +185 -0
- data/lib/fact_db/version.rb +1 -1
- data/lib/fact_db.rb +281 -30
- data/mkdocs.yml +2 -2
- metadata +60 -16
- data/db/migrate/002_create_contents.rb +0 -44
- data/lib/fact_db/models/content.rb +0 -62
- data/lib/fact_db/services/content_service.rb +0 -93
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Prove It - Source Evidence Viewer for FactDb
|
|
5
|
+
#
|
|
6
|
+
# Displays fact records along with their original source text evidence.
|
|
7
|
+
#
|
|
8
|
+
# Usage:
|
|
9
|
+
# ruby prove_it.rb <fact_id> [fact_id...] # Show facts with source evidence
|
|
10
|
+
# ruby prove_it.rb --last <n> # Show last n facts
|
|
11
|
+
# ruby prove_it.rb --search <term> # Search facts and show evidence
|
|
12
|
+
|
|
13
|
+
require_relative "utilities"
|
|
14
|
+
|
|
15
|
+
# Note: CLI tool - uses cli_setup! which does NOT reset database
|
|
16
|
+
|
|
17
|
+
class ProveItDemo
|
|
18
|
+
def initialize
|
|
19
|
+
setup_factdb
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def run(fact_ids)
|
|
23
|
+
puts "=" * 70
|
|
24
|
+
puts "Prove It - Source Evidence Viewer"
|
|
25
|
+
puts "=" * 70
|
|
26
|
+
puts
|
|
27
|
+
|
|
28
|
+
if fact_ids.empty?
|
|
29
|
+
puts "No fact IDs provided."
|
|
30
|
+
puts "Usage: ruby prove_it.rb <fact_id> [fact_id...]"
|
|
31
|
+
puts " ruby prove_it.rb --last <n>"
|
|
32
|
+
puts " ruby prove_it.rb --search <term>"
|
|
33
|
+
return
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
fact_ids.each do |id|
|
|
37
|
+
display_fact(id)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def run_last(count)
|
|
42
|
+
puts "=" * 70
|
|
43
|
+
puts "Prove It - Last #{count} Facts"
|
|
44
|
+
puts "=" * 70
|
|
45
|
+
puts
|
|
46
|
+
|
|
47
|
+
facts = FactDb::Models::Fact.order(created_at: :desc).limit(count)
|
|
48
|
+
|
|
49
|
+
if facts.empty?
|
|
50
|
+
puts "No facts found in database."
|
|
51
|
+
return
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
facts.each do |fact|
|
|
55
|
+
display_fact_record(fact)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def run_search(term)
|
|
60
|
+
puts "=" * 70
|
|
61
|
+
puts "Prove It - Search: \"#{term}\""
|
|
62
|
+
puts "=" * 70
|
|
63
|
+
puts
|
|
64
|
+
|
|
65
|
+
facts = FactDb::Models::Fact.where("text ILIKE ?", "%#{term}%").limit(20)
|
|
66
|
+
|
|
67
|
+
if facts.empty?
|
|
68
|
+
puts "No facts found matching \"#{term}\"."
|
|
69
|
+
return
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
puts "Found #{facts.count} matching facts:\n\n"
|
|
73
|
+
|
|
74
|
+
facts.each do |fact|
|
|
75
|
+
display_fact_record(fact)
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
def setup_factdb
|
|
82
|
+
DemoUtilities.ensure_demo_environment!
|
|
83
|
+
DemoUtilities.require_fact_db!
|
|
84
|
+
|
|
85
|
+
FactDb.configure do |config|
|
|
86
|
+
config.logger = Logger.new("/dev/null")
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
FactDb::Database.establish_connection!
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def display_fact(id)
|
|
93
|
+
fact = FactDb::Models::Fact.find_by(id: id)
|
|
94
|
+
|
|
95
|
+
if fact.nil?
|
|
96
|
+
puts "Fact ID #{id} not found."
|
|
97
|
+
puts "-" * 70
|
|
98
|
+
puts
|
|
99
|
+
return
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
display_fact_record(fact)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def display_fact_record(fact)
|
|
106
|
+
puts "-" * 70
|
|
107
|
+
puts "FACT ID: #{fact.id}"
|
|
108
|
+
puts "-" * 70
|
|
109
|
+
puts
|
|
110
|
+
puts "Text: #{fact.text}"
|
|
111
|
+
puts
|
|
112
|
+
puts "Valid: #{fact.valid_at}#{" to #{fact.invalid_at}" if fact.invalid_at}"
|
|
113
|
+
puts "Status: #{fact.status}"
|
|
114
|
+
puts "Extraction: #{fact.extraction_method}"
|
|
115
|
+
puts "Confidence: #{fact.confidence}"
|
|
116
|
+
puts
|
|
117
|
+
|
|
118
|
+
if fact.metadata.present?
|
|
119
|
+
puts "Metadata:"
|
|
120
|
+
fact.metadata.each do |key, value|
|
|
121
|
+
puts " #{key}: #{value}"
|
|
122
|
+
end
|
|
123
|
+
puts
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
if fact.entity_mentions.any?
|
|
127
|
+
puts "Entities:"
|
|
128
|
+
fact.entity_mentions.includes(:entity).each do |mention|
|
|
129
|
+
entity_name = mention.entity&.name || "(unknown)"
|
|
130
|
+
puts " - #{entity_name} (#{mention.mention_role})"
|
|
131
|
+
end
|
|
132
|
+
puts
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
evidence = fact.prove_it
|
|
136
|
+
if evidence
|
|
137
|
+
# Show focused lines (most relevant)
|
|
138
|
+
if evidence[:focused_lines].present?
|
|
139
|
+
line_nums = evidence[:focused_line_numbers].join(", ")
|
|
140
|
+
puts "FOCUSED EVIDENCE (lines #{line_nums}):"
|
|
141
|
+
puts "-" * 40
|
|
142
|
+
puts evidence[:focused_lines]
|
|
143
|
+
puts "-" * 40
|
|
144
|
+
puts
|
|
145
|
+
puts "Key terms matched: #{evidence[:key_terms].first(10).join(", ")}"
|
|
146
|
+
else
|
|
147
|
+
# Show full section context
|
|
148
|
+
puts
|
|
149
|
+
puts "FULL SECTION (lines #{fact.metadata["line_start"]}-#{fact.metadata["line_end"]}):"
|
|
150
|
+
puts "-" * 40
|
|
151
|
+
puts evidence[:full_section]
|
|
152
|
+
puts "-" * 40
|
|
153
|
+
end
|
|
154
|
+
else
|
|
155
|
+
puts "SOURCE EVIDENCE: Not available"
|
|
156
|
+
puts " (Missing line numbers or source content)"
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
puts
|
|
160
|
+
puts
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Main execution
|
|
165
|
+
if __FILE__ == $PROGRAM_NAME
|
|
166
|
+
if ARGV.empty? || ARGV.include?("--help") || ARGV.include?("-h")
|
|
167
|
+
puts <<~HELP
|
|
168
|
+
Prove It - Source Evidence Viewer for FactDb
|
|
169
|
+
|
|
170
|
+
Displays fact records along with their original source text evidence.
|
|
171
|
+
|
|
172
|
+
Usage:
|
|
173
|
+
ruby prove_it.rb <fact_id> [fact_id...] # Show specific facts
|
|
174
|
+
ruby prove_it.rb --last <n> # Show last n facts
|
|
175
|
+
ruby prove_it.rb --search <term> # Search facts by text
|
|
176
|
+
|
|
177
|
+
Examples:
|
|
178
|
+
ruby prove_it.rb 123 456 789
|
|
179
|
+
ruby prove_it.rb --last 5
|
|
180
|
+
ruby prove_it.rb --search "Stephen"
|
|
181
|
+
|
|
182
|
+
Environment:
|
|
183
|
+
DATABASE_URL # PostgreSQL connection URL
|
|
184
|
+
HELP
|
|
185
|
+
exit 0
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
demo = ProveItDemo.new
|
|
189
|
+
|
|
190
|
+
if ARGV[0] == "--last"
|
|
191
|
+
count = (ARGV[1] || 10).to_i
|
|
192
|
+
demo.run_last(count)
|
|
193
|
+
elsif ARGV[0] == "--search"
|
|
194
|
+
term = ARGV[1..-1].join(" ")
|
|
195
|
+
if term.empty?
|
|
196
|
+
puts "Error: Please provide a search term"
|
|
197
|
+
exit 1
|
|
198
|
+
end
|
|
199
|
+
demo.run_search(term)
|
|
200
|
+
else
|
|
201
|
+
fact_ids = ARGV.map(&:to_i).reject(&:zero?)
|
|
202
|
+
demo.run(fact_ids)
|
|
203
|
+
end
|
|
204
|
+
end
|
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Database Dump Utility for FactDb
|
|
5
|
+
#
|
|
6
|
+
# Dumps the contents of the fact_db_demo database in a structured format
|
|
7
|
+
# for verification and inspection.
|
|
8
|
+
#
|
|
9
|
+
# Usage:
|
|
10
|
+
# ruby dump_database.rb # Full dump
|
|
11
|
+
# ruby dump_database.rb --summary # Summary only
|
|
12
|
+
# ruby dump_database.rb --entities # Entities only
|
|
13
|
+
# ruby dump_database.rb --facts # Facts only
|
|
14
|
+
# ruby dump_database.rb --sources # Sources only
|
|
15
|
+
# ruby dump_database.rb --search TERM # Search facts/entities
|
|
16
|
+
|
|
17
|
+
require_relative "utilities"
|
|
18
|
+
|
|
19
|
+
# Note: CLI tool - uses cli_setup! which does NOT reset database
|
|
20
|
+
|
|
21
|
+
class DatabaseDumper
|
|
22
|
+
def initialize
|
|
23
|
+
setup_factdb
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def run(options = {})
|
|
27
|
+
puts "=" * 70
|
|
28
|
+
puts "FactDb Database Dump"
|
|
29
|
+
puts "=" * 70
|
|
30
|
+
puts "Database: #{FactDb.config.database.url}"
|
|
31
|
+
puts "Timestamp: #{Time.now}"
|
|
32
|
+
puts
|
|
33
|
+
|
|
34
|
+
if options[:search]
|
|
35
|
+
search(options[:search])
|
|
36
|
+
elsif options[:summary]
|
|
37
|
+
dump_summary
|
|
38
|
+
elsif options[:entities]
|
|
39
|
+
dump_entities
|
|
40
|
+
elsif options[:facts]
|
|
41
|
+
dump_facts
|
|
42
|
+
elsif options[:sources]
|
|
43
|
+
dump_sources
|
|
44
|
+
else
|
|
45
|
+
dump_summary
|
|
46
|
+
dump_sources
|
|
47
|
+
dump_entities
|
|
48
|
+
dump_facts
|
|
49
|
+
dump_relationships
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
def setup_factdb
|
|
56
|
+
DemoUtilities.ensure_demo_environment!
|
|
57
|
+
DemoUtilities.require_fact_db!
|
|
58
|
+
|
|
59
|
+
FactDb.configure do |config|
|
|
60
|
+
config.logger = Logger.new("/dev/null")
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
FactDb::Database.establish_connection!
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def dump_summary
|
|
67
|
+
puts "\n" + "=" * 70
|
|
68
|
+
puts "SUMMARY"
|
|
69
|
+
puts "=" * 70
|
|
70
|
+
|
|
71
|
+
source_count = FactDb::Models::Source.count
|
|
72
|
+
entity_count = FactDb::Models::Entity.count
|
|
73
|
+
fact_count = FactDb::Models::Fact.count
|
|
74
|
+
mention_count = FactDb::Models::EntityMention.count
|
|
75
|
+
fact_source_count = FactDb::Models::FactSource.count
|
|
76
|
+
|
|
77
|
+
puts <<~SUMMARY
|
|
78
|
+
Source records: #{source_count.to_s.rjust(6)}
|
|
79
|
+
Entity records: #{entity_count.to_s.rjust(6)}
|
|
80
|
+
Fact records: #{fact_count.to_s.rjust(6)}
|
|
81
|
+
Entity mentions: #{mention_count.to_s.rjust(6)}
|
|
82
|
+
Fact sources: #{fact_source_count.to_s.rjust(6)}
|
|
83
|
+
SUMMARY
|
|
84
|
+
|
|
85
|
+
if entity_count > 0
|
|
86
|
+
puts "\nEntities by kind:"
|
|
87
|
+
FactDb::Models::Entity.group(:kind).count.sort_by { |_, v| -v }.each do |kind, count|
|
|
88
|
+
puts " #{kind.to_s.ljust(20)} #{count.to_s.rjust(6)}"
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
if fact_count > 0
|
|
93
|
+
puts "\nFacts by extraction method:"
|
|
94
|
+
FactDb::Models::Fact.group(:extraction_method).count.each do |method, count|
|
|
95
|
+
puts " #{method.to_s.ljust(20)} #{count.to_s.rjust(6)}"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
puts "\nFacts by status:"
|
|
99
|
+
FactDb::Models::Fact.group(:status).count.each do |status, count|
|
|
100
|
+
puts " #{status.to_s.ljust(20)} #{count.to_s.rjust(6)}"
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
if source_count > 0
|
|
105
|
+
puts "\nSources by kind:"
|
|
106
|
+
FactDb::Models::Source.group(:kind).count.each do |kind, count|
|
|
107
|
+
puts " #{kind.to_s.ljust(20)} #{count.to_s.rjust(6)}"
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def dump_sources
|
|
113
|
+
puts "\n" + "=" * 70
|
|
114
|
+
puts "SOURCES"
|
|
115
|
+
puts "=" * 70
|
|
116
|
+
|
|
117
|
+
sources = FactDb::Models::Source.order(:created_at)
|
|
118
|
+
|
|
119
|
+
if sources.empty?
|
|
120
|
+
puts " (no source records)"
|
|
121
|
+
return
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
sources.each do |source|
|
|
125
|
+
puts "\n#{'-' * 60}"
|
|
126
|
+
puts "ID: #{source.id}"
|
|
127
|
+
puts "Title: #{source.title || '(untitled)'}"
|
|
128
|
+
puts "Kind: #{source.kind}"
|
|
129
|
+
puts "Hash: #{source.content_hash[0..16]}..."
|
|
130
|
+
puts "Captured: #{source.captured_at}"
|
|
131
|
+
puts "Created: #{source.created_at}"
|
|
132
|
+
|
|
133
|
+
if source.metadata.present?
|
|
134
|
+
puts "Metadata: #{source.metadata.to_json}"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Show linked facts count
|
|
138
|
+
fact_count = source.facts.count
|
|
139
|
+
puts "Linked facts: #{fact_count}"
|
|
140
|
+
|
|
141
|
+
# Preview of content
|
|
142
|
+
preview = source.content.to_s.gsub(/\s+/, ' ').strip[0..200]
|
|
143
|
+
puts "Preview: #{preview}..." if preview.present?
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def dump_entities
|
|
148
|
+
puts "\n" + "=" * 70
|
|
149
|
+
puts "ENTITIES"
|
|
150
|
+
puts "=" * 70
|
|
151
|
+
|
|
152
|
+
entities = FactDb::Models::Entity.order(:kind, :name)
|
|
153
|
+
|
|
154
|
+
if entities.empty?
|
|
155
|
+
puts " (no entity records)"
|
|
156
|
+
return
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
current_kind = nil
|
|
160
|
+
entities.each do |entity|
|
|
161
|
+
if entity.kind != current_kind
|
|
162
|
+
current_kind = entity.kind
|
|
163
|
+
puts "\n--- #{current_kind.upcase} ---"
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
mention_count = entity.entity_mentions.count
|
|
167
|
+
fact_count = entity.facts.count
|
|
168
|
+
|
|
169
|
+
puts "\n #{entity.name}"
|
|
170
|
+
puts " ID: #{entity.id}"
|
|
171
|
+
puts " Aliases: #{entity.all_aliases.join(', ')}" if entity.all_aliases.any?
|
|
172
|
+
puts " Description: #{entity.description}" if entity.description.present?
|
|
173
|
+
puts " Resolution: #{entity.resolution_status}"
|
|
174
|
+
puts " Mentions: #{mention_count}, Facts: #{fact_count}"
|
|
175
|
+
|
|
176
|
+
if entity.metadata.present? && entity.metadata.any?
|
|
177
|
+
puts " Metadata: #{entity.metadata.to_json}"
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def dump_facts
|
|
183
|
+
puts "\n" + "=" * 70
|
|
184
|
+
puts "FACTS"
|
|
185
|
+
puts "=" * 70
|
|
186
|
+
|
|
187
|
+
facts = FactDb::Models::Fact.includes(:entity_mentions, :fact_sources)
|
|
188
|
+
.order(:created_at)
|
|
189
|
+
|
|
190
|
+
if facts.empty?
|
|
191
|
+
puts " (no fact records)"
|
|
192
|
+
return
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
facts.each do |fact|
|
|
196
|
+
puts "\n#{'-' * 60}"
|
|
197
|
+
puts "ID: #{fact.id}"
|
|
198
|
+
puts "Text: #{fact.text}"
|
|
199
|
+
puts "Valid: #{fact.valid_at}#{" to #{fact.invalid_at}" if fact.invalid_at}"
|
|
200
|
+
puts "Status: #{fact.status}"
|
|
201
|
+
puts "Method: #{fact.extraction_method}"
|
|
202
|
+
puts "Confidence: #{fact.confidence}"
|
|
203
|
+
|
|
204
|
+
if fact.metadata.present?
|
|
205
|
+
puts "Metadata: #{fact.metadata.to_json}"
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Entity mentions
|
|
209
|
+
if fact.entity_mentions.any?
|
|
210
|
+
puts "Mentions:"
|
|
211
|
+
fact.entity_mentions.each do |mention|
|
|
212
|
+
entity_name = mention.entity&.name || "(unknown)"
|
|
213
|
+
puts " - #{entity_name} (#{mention.mention_role}): \"#{mention.mention_text}\""
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Sources
|
|
218
|
+
if fact.fact_sources.any?
|
|
219
|
+
puts "Sources:"
|
|
220
|
+
fact.fact_sources.each do |fact_source|
|
|
221
|
+
source_title = fact_source.source&.title || "(unknown)"
|
|
222
|
+
puts " - #{source_title} (#{fact_source.kind}, confidence: #{fact_source.confidence})"
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def dump_relationships
|
|
229
|
+
puts "\n" + "=" * 70
|
|
230
|
+
puts "RELATIONSHIPS"
|
|
231
|
+
puts "=" * 70
|
|
232
|
+
|
|
233
|
+
# Entity mention statistics
|
|
234
|
+
puts "\nTop entities by mention count:"
|
|
235
|
+
entity_mention_counts = FactDb::Models::Entity
|
|
236
|
+
.joins(:entity_mentions)
|
|
237
|
+
.group('fact_db_entities.id')
|
|
238
|
+
.order(Arel.sql('count(*) DESC'))
|
|
239
|
+
.limit(20)
|
|
240
|
+
.count
|
|
241
|
+
|
|
242
|
+
entity_ids = entity_mention_counts.keys
|
|
243
|
+
entities_by_id = FactDb::Models::Entity.where(id: entity_ids).index_by(&:id)
|
|
244
|
+
|
|
245
|
+
entity_mention_counts.each do |id, count|
|
|
246
|
+
entity = entities_by_id[id]
|
|
247
|
+
next unless entity
|
|
248
|
+
|
|
249
|
+
puts " #{entity.name.to_s.ljust(30)} (#{entity.kind.to_s.ljust(12)}) #{count.to_s.rjust(4)} mentions"
|
|
250
|
+
if entity.all_aliases.any?
|
|
251
|
+
puts " Aliases: #{entity.all_aliases.join(', ')}"
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# Sources with most facts
|
|
256
|
+
puts "\nSources by linked fact count:"
|
|
257
|
+
source_facts = FactDb::Models::Source
|
|
258
|
+
.joins(:fact_sources)
|
|
259
|
+
.group('fact_db_sources.id', 'fact_db_sources.title')
|
|
260
|
+
.order(Arel.sql('count(*) DESC'))
|
|
261
|
+
.limit(10)
|
|
262
|
+
.count
|
|
263
|
+
|
|
264
|
+
source_facts.each do |(id, title), count|
|
|
265
|
+
puts " #{(title || 'untitled').to_s.ljust(40)} #{count.to_s.rjust(4)} facts"
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def search(term)
|
|
270
|
+
puts "\n" + "=" * 70
|
|
271
|
+
puts "SEARCH RESULTS: \"#{term}\""
|
|
272
|
+
puts "=" * 70
|
|
273
|
+
|
|
274
|
+
# Search entities
|
|
275
|
+
puts "\nMatching Entities:"
|
|
276
|
+
entities = FactDb::Models::Entity.where(
|
|
277
|
+
"name ILIKE ? OR description ILIKE ?",
|
|
278
|
+
"%#{term}%", "%#{term}%"
|
|
279
|
+
).order(:name)
|
|
280
|
+
|
|
281
|
+
if entities.any?
|
|
282
|
+
entities.each do |entity|
|
|
283
|
+
puts " #{entity.name} (#{entity.kind})"
|
|
284
|
+
puts " #{entity.description}" if entity.description.present?
|
|
285
|
+
end
|
|
286
|
+
else
|
|
287
|
+
puts " (no matching entities)"
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# Search facts
|
|
291
|
+
puts "\nMatching Facts:"
|
|
292
|
+
facts = FactDb::Models::Fact.where(
|
|
293
|
+
"text ILIKE ?", "%#{term}%"
|
|
294
|
+
).order(:created_at).limit(50)
|
|
295
|
+
|
|
296
|
+
if facts.any?
|
|
297
|
+
facts.each do |fact|
|
|
298
|
+
puts " [#{fact.id}] #{fact.text}"
|
|
299
|
+
end
|
|
300
|
+
puts " (showing first 50 of #{FactDb::Models::Fact.where("text ILIKE ?", "%#{term}%").count})" if facts.count == 50
|
|
301
|
+
else
|
|
302
|
+
puts " (no matching facts)"
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# Search sources
|
|
306
|
+
puts "\nMatching Sources:"
|
|
307
|
+
sources = FactDb::Models::Source.where(
|
|
308
|
+
"title ILIKE ? OR content ILIKE ?",
|
|
309
|
+
"%#{term}%", "%#{term}%"
|
|
310
|
+
).order(:title).limit(20)
|
|
311
|
+
|
|
312
|
+
if sources.any?
|
|
313
|
+
sources.each do |source|
|
|
314
|
+
puts " [#{source.id}] #{source.title || '(untitled)'} (#{source.kind})"
|
|
315
|
+
end
|
|
316
|
+
else
|
|
317
|
+
puts " (no matching sources)"
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
# Main execution
|
|
323
|
+
if __FILE__ == $PROGRAM_NAME
|
|
324
|
+
options = {}
|
|
325
|
+
|
|
326
|
+
ARGV.each_with_index do |arg, i|
|
|
327
|
+
case arg
|
|
328
|
+
when "--summary"
|
|
329
|
+
options[:summary] = true
|
|
330
|
+
when "--entities"
|
|
331
|
+
options[:entities] = true
|
|
332
|
+
when "--facts"
|
|
333
|
+
options[:facts] = true
|
|
334
|
+
when "--sources"
|
|
335
|
+
options[:sources] = true
|
|
336
|
+
when "--search"
|
|
337
|
+
options[:search] = ARGV[i + 1]
|
|
338
|
+
when "--help", "-h"
|
|
339
|
+
puts <<~HELP
|
|
340
|
+
FactDb Database Dump Utility
|
|
341
|
+
|
|
342
|
+
Usage:
|
|
343
|
+
ruby dump_database.rb # Full dump
|
|
344
|
+
ruby dump_database.rb --summary # Summary statistics only
|
|
345
|
+
ruby dump_database.rb --entities # Entities only
|
|
346
|
+
ruby dump_database.rb --facts # Facts only
|
|
347
|
+
ruby dump_database.rb --sources # Sources only
|
|
348
|
+
ruby dump_database.rb --search TERM # Search facts/entities/sources
|
|
349
|
+
|
|
350
|
+
Environment:
|
|
351
|
+
DATABASE_URL # PostgreSQL connection URL (default: postgres://USER@localhost/fact_db_demo)
|
|
352
|
+
HELP
|
|
353
|
+
exit 0
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
DatabaseDumper.new.run(options)
|
|
358
|
+
end
|