fact_db 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.envrc +2 -0
- data/.yardopts +5 -0
- data/CHANGELOG.md +64 -0
- data/README.md +107 -6
- data/Rakefile +243 -10
- data/db/migrate/001_enable_extensions.rb +1 -0
- data/db/migrate/002_create_sources.rb +49 -0
- data/db/migrate/003_create_entities.rb +27 -15
- data/db/migrate/004_create_entity_aliases.rb +20 -7
- data/db/migrate/005_create_facts.rb +37 -21
- data/db/migrate/006_create_entity_mentions.rb +14 -6
- data/db/migrate/007_create_fact_sources.rb +16 -8
- data/docs/api/extractors/index.md +5 -5
- data/docs/api/extractors/llm.md +17 -17
- data/docs/api/extractors/rule-based.md +14 -14
- data/docs/api/facts.md +20 -20
- data/docs/api/index.md +4 -4
- data/docs/api/models/entity.md +21 -21
- data/docs/api/models/fact.md +15 -15
- data/docs/api/models/index.md +7 -7
- data/docs/api/models/{content.md → source.md} +29 -29
- data/docs/api/pipeline/extraction.md +25 -25
- data/docs/api/pipeline/index.md +1 -1
- data/docs/api/pipeline/resolution.md +4 -4
- data/docs/api/services/entity-service.md +20 -20
- data/docs/api/services/fact-service.md +12 -12
- data/docs/api/services/index.md +5 -5
- data/docs/api/services/{content-service.md → source-service.md} +27 -27
- data/docs/architecture/database-schema.md +46 -46
- data/docs/architecture/entity-resolution.md +6 -6
- data/docs/architecture/index.md +10 -10
- data/docs/architecture/temporal-facts.md +5 -5
- data/docs/architecture/three-layer-model.md +17 -17
- data/docs/concepts.md +6 -6
- data/docs/examples/basic-usage.md +20 -20
- data/docs/examples/hr-onboarding.md +17 -17
- data/docs/examples/index.md +4 -4
- data/docs/examples/news-analysis.md +23 -23
- data/docs/getting-started/database-setup.md +28 -20
- data/docs/getting-started/index.md +3 -3
- data/docs/getting-started/quick-start.md +33 -30
- data/docs/guides/batch-processing.md +26 -26
- data/docs/guides/configuration.md +158 -77
- data/docs/guides/entity-management.md +14 -14
- data/docs/guides/extracting-facts.md +28 -28
- data/docs/guides/ingesting-content.md +14 -14
- data/docs/guides/llm-integration.md +40 -32
- data/docs/guides/temporal-queries.md +11 -11
- data/docs/index.md +6 -2
- data/examples/.envrc +4 -0
- data/examples/.gitignore +1 -0
- data/examples/001_configuration.rb +312 -0
- data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
- data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
- data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
- data/examples/040_output_formats.rb +177 -0
- data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
- data/examples/060_fluent_temporal_api.rb +217 -0
- data/examples/070_introspection.rb +252 -0
- data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
- data/examples/090_ingest_demo.rb +515 -0
- data/examples/100_query_context.rb +668 -0
- data/examples/110_prove_it.rb +204 -0
- data/examples/120_dump_database.rb +358 -0
- data/examples/130_rag_feedback_loop.rb +858 -0
- data/examples/README.md +229 -15
- data/examples/data/lincoln_associates.md +201 -0
- data/examples/data/lincoln_biography.md +66 -0
- data/examples/data/lincoln_cabinet.md +243 -0
- data/examples/data/lincoln_family.md +163 -0
- data/examples/data/lincoln_military.md +241 -0
- data/examples/data/lincoln_todd_family.md +136 -0
- data/examples/ingest_reporter.rb +335 -0
- data/examples/utilities.rb +182 -0
- data/lib/fact_db/config/defaults.yml +254 -0
- data/lib/fact_db/config.rb +94 -35
- data/lib/fact_db/database.rb +98 -8
- data/lib/fact_db/extractors/base.rb +106 -21
- data/lib/fact_db/extractors/llm_extractor.rb +35 -63
- data/lib/fact_db/extractors/manual_extractor.rb +46 -6
- data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
- data/lib/fact_db/llm/adapter.rb +3 -3
- data/lib/fact_db/models/entity.rb +94 -22
- data/lib/fact_db/models/entity_alias.rb +41 -7
- data/lib/fact_db/models/entity_mention.rb +34 -1
- data/lib/fact_db/models/fact.rb +259 -28
- data/lib/fact_db/models/fact_source.rb +43 -9
- data/lib/fact_db/models/source.rb +113 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
- data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
- data/lib/fact_db/query_result.rb +202 -0
- data/lib/fact_db/resolution/entity_resolver.rb +139 -39
- data/lib/fact_db/resolution/fact_resolver.rb +86 -14
- data/lib/fact_db/services/entity_service.rb +246 -37
- data/lib/fact_db/services/fact_service.rb +254 -17
- data/lib/fact_db/services/source_service.rb +164 -0
- data/lib/fact_db/temporal/query.rb +71 -7
- data/lib/fact_db/temporal/query_builder.rb +69 -0
- data/lib/fact_db/temporal/timeline.rb +102 -11
- data/lib/fact_db/transformers/base.rb +77 -0
- data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
- data/lib/fact_db/transformers/json_transformer.rb +17 -0
- data/lib/fact_db/transformers/raw_transformer.rb +35 -0
- data/lib/fact_db/transformers/text_transformer.rb +114 -0
- data/lib/fact_db/transformers/triple_transformer.rb +138 -0
- data/lib/fact_db/validation/alias_filter.rb +185 -0
- data/lib/fact_db/version.rb +1 -1
- data/lib/fact_db.rb +281 -30
- data/mkdocs.yml +2 -2
- metadata +60 -16
- data/db/migrate/002_create_contents.rb +0 -44
- data/lib/fact_db/models/content.rb +0 -62
- data/lib/fact_db/services/content_service.rb +0 -93
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
# FactDb Bundled Defaults
|
|
2
|
+
#
|
|
3
|
+
# This file is the SINGLE SOURCE OF TRUTH for FactDb configuration schema.
|
|
4
|
+
# All attributes must be declared here (even if nil) to be recognized.
|
|
5
|
+
# It is bundled with the gem and loaded automatically at lowest priority.
|
|
6
|
+
#
|
|
7
|
+
# Loading priority (lowest to highest):
|
|
8
|
+
# 1. This file (bundled defaults)
|
|
9
|
+
# 2. XDG user config (~/.config/fact_db/fact_db.yml)
|
|
10
|
+
# 3. Project config (./config/fact_db.yml)
|
|
11
|
+
# 4. Local overrides (./config/fact_db.local.yml)
|
|
12
|
+
# 5. Environment variables (FDB_*)
|
|
13
|
+
# 6. Programmatic (FactDb.configure block)
|
|
14
|
+
#
|
|
15
|
+
# Structure:
|
|
16
|
+
# - defaults: Base values for all environments (with nested sections)
|
|
17
|
+
# - development: Overrides for development environment
|
|
18
|
+
# - test: Overrides for test environment
|
|
19
|
+
# - production: Overrides for production environment
|
|
20
|
+
# - demo: Overrides for demo/staging environment
|
|
21
|
+
#
|
|
22
|
+
# Custom Environments:
|
|
23
|
+
# Your application's config file (./config/fact_db.yml) can define additional
|
|
24
|
+
# environments to suit your infrastructure. For example:
|
|
25
|
+
# staging:
|
|
26
|
+
# database:
|
|
27
|
+
# name: myapp_staging
|
|
28
|
+
# qa:
|
|
29
|
+
# database:
|
|
30
|
+
# name: myapp_qa
|
|
31
|
+
#
|
|
32
|
+
# Environment helper methods (e.g., config.staging?, config.qa?) are
|
|
33
|
+
# automatically generated for any environment defined in configuration files.
|
|
34
|
+
#
|
|
35
|
+
# Access pattern:
|
|
36
|
+
# FactDb.config.database.url
|
|
37
|
+
# FactDb.config.database.pool
|
|
38
|
+
# FactDb.config.llm.provider
|
|
39
|
+
# FactDb.config.ranking.ts_rank_weight
|
|
40
|
+
|
|
41
|
+
# =============================================================================
|
|
42
|
+
# Shared Defaults (base for all environments)
|
|
43
|
+
# =============================================================================
|
|
44
|
+
defaults:
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
# Database Configuration (ActiveRecord-compatible keys)
|
|
47
|
+
# Access: FactDb.config.database.url, FactDb.config.database.pool, etc.
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
database:
|
|
50
|
+
adapter: postgresql
|
|
51
|
+
url: ~
|
|
52
|
+
host: localhost
|
|
53
|
+
port: 5432
|
|
54
|
+
name: fact_db_demo
|
|
55
|
+
username: ~
|
|
56
|
+
password: ~
|
|
57
|
+
pool: 5
|
|
58
|
+
timeout: 30000
|
|
59
|
+
encoding: "unicode"
|
|
60
|
+
prepared_statements: false
|
|
61
|
+
advisory_locks: false
|
|
62
|
+
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
# Embedding Configuration
|
|
65
|
+
# Access: FactDb.config.embedding.generator, FactDb.config.embedding.dimensions
|
|
66
|
+
# ---------------------------------------------------------------------------
|
|
67
|
+
embedding:
|
|
68
|
+
generator: ~
|
|
69
|
+
dimensions: 1536
|
|
70
|
+
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
# LLM Configuration
|
|
73
|
+
# Access: FactDb.config.llm.provider, FactDb.config.llm.model, etc.
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
llm:
|
|
76
|
+
client: ~
|
|
77
|
+
provider: ~
|
|
78
|
+
model: ~
|
|
79
|
+
api_key: ~
|
|
80
|
+
|
|
81
|
+
# ---------------------------------------------------------------------------
|
|
82
|
+
# Ranking Signal Weights
|
|
83
|
+
# Access: FactDb.config.ranking.ts_rank_weight, etc.
|
|
84
|
+
# Weights should sum to approximately 1.0 for normalized scores
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
ranking:
|
|
87
|
+
ts_rank_weight: 0.25
|
|
88
|
+
vector_similarity_weight: 0.25
|
|
89
|
+
entity_mention_weight: 0.15
|
|
90
|
+
direct_answer_weight: 0.15
|
|
91
|
+
term_overlap_weight: 0.10
|
|
92
|
+
relationship_match_weight: 0.05
|
|
93
|
+
confidence_weight: 0.05
|
|
94
|
+
|
|
95
|
+
# ---------------------------------------------------------------------------
|
|
96
|
+
# General Settings
|
|
97
|
+
# Access: FactDb.config.default_extractor, FactDb.config.log_level, etc.
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
default_extractor: manual
|
|
100
|
+
fuzzy_match_threshold: 0.85
|
|
101
|
+
auto_merge_threshold: 0.95
|
|
102
|
+
log_level: info
|
|
103
|
+
|
|
104
|
+
# ---------------------------------------------------------------------------
|
|
105
|
+
# LLM Prompts
|
|
106
|
+
# Access: FactDb.config.prompts.fact_extraction, FactDb.config.prompts.entity_extraction
|
|
107
|
+
# These prompts use %<text>s as a placeholder for the input text.
|
|
108
|
+
# ---------------------------------------------------------------------------
|
|
109
|
+
prompts:
|
|
110
|
+
fact_extraction: |
|
|
111
|
+
Extract ATOMIC factual assertions from the following text. Break compound
|
|
112
|
+
statements into individual, indivisible facts - one assertion per fact.
|
|
113
|
+
|
|
114
|
+
For each atomic fact:
|
|
115
|
+
1. State a single, indivisible assertion (not multiple facts combined)
|
|
116
|
+
2. Identify when it became true (valid_at) if mentioned or inferable
|
|
117
|
+
3. Identify when it stopped being true (invalid_at) if mentioned
|
|
118
|
+
4. Identify entities mentioned (people, organizations, places, products)
|
|
119
|
+
5. Assign a confidence score (0.0 to 1.0) based on how explicitly stated the fact is
|
|
120
|
+
6. For each entity, include any aliases or alternative names used in the text
|
|
121
|
+
|
|
122
|
+
Text:
|
|
123
|
+
%<text>s
|
|
124
|
+
|
|
125
|
+
Return as a JSON array with this structure:
|
|
126
|
+
[
|
|
127
|
+
{
|
|
128
|
+
"text": "Paula works at Microsoft",
|
|
129
|
+
"valid_at": "2024-01-10",
|
|
130
|
+
"invalid_at": null,
|
|
131
|
+
"confidence": 0.95,
|
|
132
|
+
"mentions": [
|
|
133
|
+
{"name": "Paula Chen", "type": "person", "role": "subject", "aliases": ["Paula", "P. Chen"]},
|
|
134
|
+
{"name": "Microsoft", "type": "organization", "role": "object", "aliases": ["MS", "Microsoft Corporation"]}
|
|
135
|
+
]
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
"text": "Paula holds the title of Principal Engineer",
|
|
139
|
+
"valid_at": "2024-01-10",
|
|
140
|
+
"invalid_at": null,
|
|
141
|
+
"confidence": 0.95,
|
|
142
|
+
"mentions": [
|
|
143
|
+
{"name": "Paula Chen", "type": "person", "role": "subject", "aliases": ["Paula", "P. Chen"]}
|
|
144
|
+
]
|
|
145
|
+
}
|
|
146
|
+
]
|
|
147
|
+
|
|
148
|
+
Rules:
|
|
149
|
+
- ATOMIC FACTS: Break compound statements into smallest meaningful assertions
|
|
150
|
+
- "John and Mary married in Paris" becomes TWO facts: "John married Mary" AND "The marriage took place in Paris"
|
|
151
|
+
- "She is a doctor at City Hospital" becomes TWO facts: "She is a doctor" AND "She works at City Hospital"
|
|
152
|
+
- Extract only factual assertions, not opinions or speculation
|
|
153
|
+
- Use ISO 8601 date format (YYYY-MM-DD) when possible
|
|
154
|
+
- Set invalid_at to null if the fact is still true or unknown
|
|
155
|
+
- Set valid_at to null if the timing is not mentioned
|
|
156
|
+
- Entity types (use ONLY these exact values):
|
|
157
|
+
- person: individual humans
|
|
158
|
+
- organization: companies, governments, institutions, teams, groups
|
|
159
|
+
- place: cities, countries, buildings, geographic locations
|
|
160
|
+
- product: goods, services, software, publications
|
|
161
|
+
- event: wars, battles, elections, conferences, treaties, historical events
|
|
162
|
+
- concept: ideas, theories, abstract notions, fields of study
|
|
163
|
+
- Roles: subject, object, location, temporal, instrument, beneficiary
|
|
164
|
+
- For person entities, use the most complete/formal name as "name" and shorter/alternative forms as "aliases"
|
|
165
|
+
- Common aliases include: nicknames, titles with name, name variations, abbreviations
|
|
166
|
+
- NEVER include pronouns as aliases (he, she, him, her, they, them, his, her, their, it, we, you, I, me, my, etc.)
|
|
167
|
+
- NEVER include generic terms as aliases (man, woman, person, husband, wife, the man, this person, etc.)
|
|
168
|
+
- Only include proper names, nicknames, and formal variations as aliases
|
|
169
|
+
|
|
170
|
+
Return only valid JSON, no additional text.
|
|
171
|
+
|
|
172
|
+
entity_extraction: |
|
|
173
|
+
Extract all named entities from the following text.
|
|
174
|
+
For each entity:
|
|
175
|
+
1. Identify the canonical name
|
|
176
|
+
2. Classify the type using ONLY these exact values:
|
|
177
|
+
- person: individual humans
|
|
178
|
+
- organization: companies, governments, institutions, teams, groups
|
|
179
|
+
- place: cities, countries, buildings, geographic locations
|
|
180
|
+
- product: goods, services, software, publications
|
|
181
|
+
- event: wars, battles, elections, conferences, treaties, historical events
|
|
182
|
+
- concept: ideas, theories, abstract notions, fields of study
|
|
183
|
+
3. List any aliases or alternative names mentioned
|
|
184
|
+
|
|
185
|
+
Text:
|
|
186
|
+
%<text>s
|
|
187
|
+
|
|
188
|
+
Return as a JSON array:
|
|
189
|
+
[
|
|
190
|
+
{
|
|
191
|
+
"name": "Paula Chen",
|
|
192
|
+
"type": "person",
|
|
193
|
+
"aliases": ["Paula", "P. Chen"]
|
|
194
|
+
}
|
|
195
|
+
]
|
|
196
|
+
|
|
197
|
+
Important rules for aliases:
|
|
198
|
+
- NEVER include pronouns (he, she, him, her, they, them, his, her, their, it, we, you, I, me, my, etc.)
|
|
199
|
+
- NEVER include generic terms (man, woman, person, husband, wife, the man, this person, believers, disciples, etc.)
|
|
200
|
+
- Only include proper names, nicknames, titles, and formal name variations
|
|
201
|
+
|
|
202
|
+
Return only valid JSON, no additional text.
|
|
203
|
+
|
|
204
|
+
rag_system: |
|
|
205
|
+
You are a knowledgeable assistant with access to a fact database. Use the provided context to inform your response, but also feel free to synthesize and expand upon the information with related knowledge.
|
|
206
|
+
|
|
207
|
+
Your response should:
|
|
208
|
+
1. Directly address the user's question
|
|
209
|
+
2. Include specific facts, names, dates, and details where relevant
|
|
210
|
+
3. Make connections between related pieces of information
|
|
211
|
+
4. Present information in clear, atomic statements that can be extracted as individual facts
|
|
212
|
+
|
|
213
|
+
# =============================================================================
|
|
214
|
+
# Development Environment Overrides
|
|
215
|
+
# =============================================================================
|
|
216
|
+
development:
|
|
217
|
+
database:
|
|
218
|
+
name: fact_db_development
|
|
219
|
+
log_level: debug
|
|
220
|
+
|
|
221
|
+
# =============================================================================
|
|
222
|
+
# Test Environment Overrides
|
|
223
|
+
# =============================================================================
|
|
224
|
+
test:
|
|
225
|
+
database:
|
|
226
|
+
name: fact_db_test
|
|
227
|
+
pool: 2
|
|
228
|
+
timeout: 5000
|
|
229
|
+
log_level: warn
|
|
230
|
+
|
|
231
|
+
# =============================================================================
|
|
232
|
+
# Production Environment Overrides
|
|
233
|
+
# =============================================================================
|
|
234
|
+
production:
|
|
235
|
+
database:
|
|
236
|
+
name: fact_db_production
|
|
237
|
+
pool: 10
|
|
238
|
+
timeout: 60000
|
|
239
|
+
log_level: warn
|
|
240
|
+
|
|
241
|
+
# =============================================================================
|
|
242
|
+
# Demo Environment Overrides
|
|
243
|
+
# =============================================================================
|
|
244
|
+
demo:
|
|
245
|
+
database:
|
|
246
|
+
name: fact_db_demo
|
|
247
|
+
pool: 3
|
|
248
|
+
log_level: info
|
|
249
|
+
|
|
250
|
+
# -----------------------------------------------------------------------------
|
|
251
|
+
# Add your own environments for your project's infrastructure
|
|
252
|
+
# -----------------------------------------------------------------------------
|
|
253
|
+
# qa:
|
|
254
|
+
# staging:
|
data/lib/fact_db/config.rb
CHANGED
|
@@ -1,60 +1,119 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "
|
|
3
|
+
require "myway_config"
|
|
4
4
|
require "logger"
|
|
5
5
|
|
|
6
|
+
# Configure MywayConfig to use FDB_ENV for environment detection
|
|
7
|
+
Anyway::Settings.current_environment = ENV["FDB_ENV"] || ENV["RAILS_ENV"] || ENV["RACK_ENV"] || "development"
|
|
8
|
+
|
|
6
9
|
module FactDb
|
|
7
|
-
|
|
10
|
+
# FactDb Configuration using MywayConfig
|
|
11
|
+
#
|
|
12
|
+
# Schema is defined in lib/fact_db/config/defaults.yml (single source of truth)
|
|
13
|
+
#
|
|
14
|
+
# All config sections return ConfigSections with symbol keys:
|
|
15
|
+
# FactDb.config.database # => ConfigSection with {adapter: "postgresql", host: "localhost", ...}
|
|
16
|
+
# FactDb.config.llm # => ConfigSection with {provider: :anthropic, model: "claude-...", ...}
|
|
17
|
+
# FactDb.config.embedding # => ConfigSection with {generator: nil, dimensions: 1536}
|
|
18
|
+
# FactDb.config.ranking # => ConfigSection with {ts_rank_weight: 0.25, ...}
|
|
19
|
+
#
|
|
20
|
+
# Access values via hash keys or dot notation:
|
|
21
|
+
# FactDb.config.database[:host]
|
|
22
|
+
# FactDb.config.database.host
|
|
23
|
+
# FactDb.config.llm[:provider]
|
|
24
|
+
# FactDb.config.llm.provider
|
|
25
|
+
#
|
|
26
|
+
# Configuration sources (lowest to highest priority):
|
|
27
|
+
# 1. Bundled defaults: lib/fact_db/config/defaults.yml (ships with gem)
|
|
28
|
+
# 2. XDG user config:
|
|
29
|
+
# - ~/Library/Application Support/fact_db/fact_db.yml (macOS only)
|
|
30
|
+
# - ~/.config/fact_db/fact_db.yml (XDG default)
|
|
31
|
+
# - $XDG_CONFIG_HOME/fact_db/fact_db.yml (if XDG_CONFIG_HOME is set)
|
|
32
|
+
# 3. Project config: ./config/fact_db.yml (environment-specific)
|
|
33
|
+
# 4. Local overrides: ./config/fact_db.local.yml (gitignored)
|
|
34
|
+
# 5. Environment variables (FDB_*)
|
|
35
|
+
# 6. Explicit values passed to configure block
|
|
36
|
+
#
|
|
37
|
+
# @example Configure with environment variables
|
|
38
|
+
# export FDB_DATABASE__URL=postgresql://localhost/fact_db_development
|
|
39
|
+
# export FDB_LLM__PROVIDER=openai
|
|
40
|
+
# export FDB_LLM__API_KEY=sk-xxx
|
|
41
|
+
#
|
|
42
|
+
# @example Configure with Ruby block
|
|
43
|
+
# FactDb.configure do |config|
|
|
44
|
+
# config.llm[:provider] = :openai
|
|
45
|
+
# config.llm[:model] = "gpt-4o-mini"
|
|
46
|
+
# end
|
|
47
|
+
#
|
|
48
|
+
class Config < MywayConfig::Base
|
|
8
49
|
config_name :fact_db
|
|
50
|
+
env_prefix :fdb
|
|
51
|
+
defaults_path File.expand_path("config/defaults.yml", __dir__)
|
|
52
|
+
auto_configure!
|
|
9
53
|
|
|
10
|
-
#
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
database_timeout: 30_000
|
|
14
|
-
|
|
15
|
-
# Embedding configuration
|
|
16
|
-
attr_config :embedding_generator
|
|
17
|
-
attr_config embedding_dimensions: 1536
|
|
18
|
-
|
|
19
|
-
# LLM configuration
|
|
20
|
-
attr_config :llm_client, :llm_provider, :llm_model, :llm_api_key
|
|
54
|
+
# ==========================================================================
|
|
55
|
+
# Callable Accessors (not loaded from config sources)
|
|
56
|
+
# ==========================================================================
|
|
21
57
|
|
|
22
|
-
|
|
23
|
-
attr_config default_extractor: :manual
|
|
58
|
+
attr_accessor :embedding_generator, :llm_client, :logger
|
|
24
59
|
|
|
25
|
-
#
|
|
26
|
-
|
|
27
|
-
|
|
60
|
+
# ==========================================================================
|
|
61
|
+
# Callbacks
|
|
62
|
+
# ==========================================================================
|
|
28
63
|
|
|
29
|
-
|
|
30
|
-
attr_config :logger
|
|
31
|
-
attr_config log_level: :info
|
|
64
|
+
on_load :setup_defaults
|
|
32
65
|
|
|
33
|
-
#
|
|
34
|
-
|
|
35
|
-
|
|
66
|
+
# ==========================================================================
|
|
67
|
+
# XDG Config Path Helpers
|
|
68
|
+
# ==========================================================================
|
|
36
69
|
|
|
37
|
-
|
|
70
|
+
def self.xdg_config_paths
|
|
71
|
+
MywayConfig::Loaders::XdgConfigLoader.config_paths(:fact_db)
|
|
72
|
+
end
|
|
38
73
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
74
|
+
def self.xdg_config_file
|
|
75
|
+
xdg_home = ENV["XDG_CONFIG_HOME"]
|
|
76
|
+
base = if xdg_home && !xdg_home.empty?
|
|
77
|
+
xdg_home
|
|
78
|
+
else
|
|
79
|
+
File.expand_path("~/.config")
|
|
80
|
+
end
|
|
81
|
+
File.join(base, "fact_db", "fact_db.yml")
|
|
44
82
|
end
|
|
45
83
|
|
|
46
|
-
def
|
|
47
|
-
|
|
84
|
+
def self.active_xdg_config_file
|
|
85
|
+
MywayConfig::Loaders::XdgConfigLoader.find_config_file(:fact_db)
|
|
48
86
|
end
|
|
49
87
|
|
|
50
|
-
|
|
51
|
-
|
|
88
|
+
private
|
|
89
|
+
|
|
90
|
+
def setup_defaults
|
|
91
|
+
@logger ||= build_default_logger
|
|
92
|
+
end
|
|
52
93
|
|
|
53
|
-
|
|
94
|
+
def build_default_logger
|
|
95
|
+
logger = Logger.new($stdout)
|
|
96
|
+
logger.level = log_level || :info
|
|
97
|
+
logger.formatter = proc do |severity, datetime, _progname, msg|
|
|
98
|
+
"[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] #{severity} -- FactDb: #{msg}\n"
|
|
99
|
+
end
|
|
100
|
+
logger
|
|
54
101
|
end
|
|
55
102
|
end
|
|
56
103
|
|
|
104
|
+
# ==========================================================================
|
|
105
|
+
# Module-level Configuration API
|
|
106
|
+
# ==========================================================================
|
|
107
|
+
|
|
57
108
|
class << self
|
|
109
|
+
def env
|
|
110
|
+
@env ||= ENV.fetch("FDB_ENV") { ENV.fetch("RAILS_ENV") { ENV.fetch("RACK_ENV", "development") } }
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def env=(value)
|
|
114
|
+
@env = value.to_s
|
|
115
|
+
end
|
|
116
|
+
|
|
58
117
|
def config
|
|
59
118
|
@config ||= Config.new
|
|
60
119
|
end
|
data/lib/fact_db/database.rb
CHANGED
|
@@ -4,43 +4,133 @@ require "active_record"
|
|
|
4
4
|
require "neighbor"
|
|
5
5
|
|
|
6
6
|
module FactDb
|
|
7
|
+
# Database management module for FactDb
|
|
8
|
+
#
|
|
9
|
+
# Provides class methods for establishing database connections, running
|
|
10
|
+
# migrations, and managing database lifecycle (create, drop, reset).
|
|
11
|
+
#
|
|
12
|
+
# @example Establish connection and run migrations
|
|
13
|
+
# FactDb::Database.establish_connection!
|
|
14
|
+
# FactDb::Database.migrate!
|
|
15
|
+
#
|
|
16
|
+
# @example Reset database for testing
|
|
17
|
+
# FactDb::Database.reset!
|
|
18
|
+
#
|
|
7
19
|
module Database
|
|
8
20
|
class << self
|
|
21
|
+
# Establishes an ActiveRecord database connection
|
|
22
|
+
#
|
|
23
|
+
# Uses configuration from FactDb.config by default. Sets up the logger
|
|
24
|
+
# if configured.
|
|
25
|
+
#
|
|
26
|
+
# @param config [FactDb::Config] configuration object (defaults to FactDb.config)
|
|
27
|
+
# @return [void]
|
|
9
28
|
def establish_connection!(config = FactDb.config)
|
|
10
|
-
config.
|
|
11
|
-
ActiveRecord::Base.establish_connection(config.
|
|
29
|
+
# config.database is a ConfigSection - convert to AR-compatible hash
|
|
30
|
+
ActiveRecord::Base.establish_connection(ar_connection_hash(config.database))
|
|
12
31
|
ActiveRecord::Base.logger = config.logger if config.logger
|
|
13
32
|
end
|
|
14
33
|
|
|
34
|
+
# Checks if a database connection is established
|
|
35
|
+
#
|
|
36
|
+
# @return [Boolean] true if connected to database
|
|
15
37
|
def connected?
|
|
16
38
|
ActiveRecord::Base.connected?
|
|
17
39
|
end
|
|
18
40
|
|
|
41
|
+
# Drops the database
|
|
42
|
+
#
|
|
43
|
+
# Disconnects from the current database, connects to postgres maintenance
|
|
44
|
+
# database, and drops the configured database.
|
|
45
|
+
#
|
|
46
|
+
# @return [void]
|
|
47
|
+
def drop!
|
|
48
|
+
db_name = FactDb.config.database.name
|
|
49
|
+
ActiveRecord::Base.connection.disconnect! if connected?
|
|
50
|
+
ActiveRecord::Base.establish_connection(maintenance_database_url)
|
|
51
|
+
ActiveRecord::Base.connection.drop_database(db_name)
|
|
52
|
+
puts "Dropped database '#{db_name}'"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Creates the database
|
|
56
|
+
#
|
|
57
|
+
# Connects to postgres maintenance database and creates the configured database.
|
|
58
|
+
#
|
|
59
|
+
# @return [void]
|
|
60
|
+
def create!
|
|
61
|
+
db_name = FactDb.config.database.name
|
|
62
|
+
ActiveRecord::Base.establish_connection(maintenance_database_url)
|
|
63
|
+
ActiveRecord::Base.connection.create_database(db_name)
|
|
64
|
+
puts "Created database '#{db_name}'"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Runs all pending migrations
|
|
68
|
+
#
|
|
69
|
+
# Establishes connection if needed and runs migrations from db/migrate.
|
|
70
|
+
#
|
|
71
|
+
# @return [void]
|
|
19
72
|
def migrate!
|
|
20
|
-
establish_connection!
|
|
73
|
+
establish_connection!
|
|
21
74
|
migrations_path = File.expand_path("../../db/migrate", __dir__)
|
|
22
75
|
ActiveRecord::MigrationContext.new(migrations_path).migrate
|
|
23
76
|
end
|
|
24
77
|
|
|
78
|
+
# Rolls back migrations
|
|
79
|
+
#
|
|
80
|
+
# @param steps [Integer] number of migrations to rollback (default: 1)
|
|
81
|
+
# @return [void]
|
|
25
82
|
def rollback!(steps = 1)
|
|
26
83
|
establish_connection! unless connected?
|
|
27
84
|
migrations_path = File.expand_path("../../db/migrate", __dir__)
|
|
28
85
|
ActiveRecord::MigrationContext.new(migrations_path).rollback(steps)
|
|
29
86
|
end
|
|
30
87
|
|
|
88
|
+
# Drops, creates, and migrates the database
|
|
89
|
+
#
|
|
90
|
+
# Convenience method to completely reset the database to a clean state.
|
|
91
|
+
# Ignores errors when dropping (database may not exist).
|
|
92
|
+
#
|
|
93
|
+
# @return [void]
|
|
31
94
|
def reset!
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
next if table == "schema_migrations"
|
|
35
|
-
ActiveRecord::Base.connection.drop_table(table, if_exists: true, force: :cascade)
|
|
36
|
-
end
|
|
95
|
+
drop! rescue nil
|
|
96
|
+
create!
|
|
37
97
|
migrate!
|
|
38
98
|
end
|
|
39
99
|
|
|
100
|
+
# Returns the current schema version
|
|
101
|
+
#
|
|
102
|
+
# @return [Integer] the latest migration version number, or 0 if no migrations
|
|
40
103
|
def schema_version
|
|
41
104
|
establish_connection! unless connected?
|
|
42
105
|
ActiveRecord::SchemaMigration.all.map(&:version).max || 0
|
|
43
106
|
end
|
|
107
|
+
|
|
108
|
+
private
|
|
109
|
+
|
|
110
|
+
def maintenance_database_url
|
|
111
|
+
db = FactDb.config.database
|
|
112
|
+
url = db.url || build_database_url(db, "postgres")
|
|
113
|
+
uri = URI.parse(url)
|
|
114
|
+
uri.path = "/postgres"
|
|
115
|
+
uri.to_s
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def build_database_url(db, database_name = nil)
|
|
119
|
+
host = db.host || "localhost"
|
|
120
|
+
port = db.port || 5432
|
|
121
|
+
name = database_name || db.name
|
|
122
|
+
user = db.username || ENV["USER"]
|
|
123
|
+
|
|
124
|
+
auth = user ? "#{user}@" : ""
|
|
125
|
+
"postgresql://#{auth}#{host}:#{port}/#{name}"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Convert config to AR-compatible hash (name -> database)
|
|
129
|
+
def ar_connection_hash(db)
|
|
130
|
+
h = db.to_h
|
|
131
|
+
h[:database] = h.delete(:name) if h[:name] && !h[:database]
|
|
132
|
+
h
|
|
133
|
+
end
|
|
44
134
|
end
|
|
45
135
|
end
|
|
46
136
|
end
|