haiku.rag 0.12.1__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

haiku/rag/a2a/__init__.py CHANGED
@@ -57,12 +57,12 @@ def create_a2a_app(
57
57
  """
58
58
  base_storage = InMemoryStorage()
59
59
  storage = LRUMemoryStorage(
60
- storage=base_storage, max_contexts=Config.A2A_MAX_CONTEXTS
60
+ storage=base_storage, max_contexts=Config.a2a.max_contexts
61
61
  )
62
62
  broker = InMemoryBroker()
63
63
 
64
64
  # Create the agent with native search tool
65
- model = get_model(Config.QA_PROVIDER, Config.QA_MODEL)
65
+ model = get_model(Config.qa.provider, Config.qa.model)
66
66
  agent = Agent(
67
67
  model=model,
68
68
  deps_type=AgentDependencies,
@@ -120,7 +120,7 @@ def create_a2a_app(
120
120
  # Create FastA2A app with custom worker lifecycle
121
121
  @asynccontextmanager
122
122
  async def lifespan(app):
123
- logger.info(f"Started A2A server (max contexts: {Config.A2A_MAX_CONTEXTS})")
123
+ logger.info(f"Started A2A server (max contexts: {Config.a2a.max_contexts})")
124
124
  async with app.task_manager:
125
125
  async with worker.run():
126
126
  yield
haiku/rag/app.py CHANGED
@@ -231,8 +231,8 @@ class HaikuRAGApp:
231
231
  )
232
232
 
233
233
  start_node = DeepQAPlanNode(
234
- provider=Config.QA_PROVIDER,
235
- model=Config.QA_MODEL,
234
+ provider=Config.qa.provider,
235
+ model=Config.qa.model,
236
236
  )
237
237
 
238
238
  result = await graph.run(
@@ -278,8 +278,8 @@ class HaikuRAGApp:
278
278
  )
279
279
 
280
280
  start = PlanNode(
281
- provider=Config.RESEARCH_PROVIDER or Config.QA_PROVIDER,
282
- model=Config.RESEARCH_MODEL or Config.QA_MODEL,
281
+ provider=Config.research.provider or Config.qa.provider,
282
+ model=Config.research.model or Config.qa.model,
283
283
  )
284
284
  report = None
285
285
  async for event in stream_research_graph(graph, start, state, deps):
@@ -474,7 +474,9 @@ class HaikuRAGApp:
474
474
 
475
475
  # Start file monitor if enabled
476
476
  if enable_monitor:
477
- monitor = FileWatcher(paths=Config.MONITOR_DIRECTORIES, client=client)
477
+ monitor = FileWatcher(
478
+ paths=Config.storage.monitor_directories, client=client
479
+ )
478
480
  monitor_task = asyncio.create_task(monitor.observe())
479
481
  tasks.append(monitor_task)
480
482
 
haiku/rag/chunker.py CHANGED
@@ -22,7 +22,7 @@ class Chunker:
22
22
 
23
23
  def __init__(
24
24
  self,
25
- chunk_size: int = Config.CHUNK_SIZE,
25
+ chunk_size: int = Config.processing.chunk_size,
26
26
  ):
27
27
  self.chunk_size = chunk_size
28
28
  tokenizer = OpenAITokenizer(
haiku/rag/cli.py CHANGED
@@ -42,10 +42,21 @@ def main(
42
42
  callback=version_callback,
43
43
  help="Show version and exit",
44
44
  ),
45
+ config: Path | None = typer.Option(
46
+ None,
47
+ "--config",
48
+ help="Path to YAML configuration file",
49
+ ),
45
50
  ):
46
51
  """haiku.rag CLI - Vector database RAG system"""
52
+ # Store config path in environment for config loader to use
53
+ if config:
54
+ import os
55
+
56
+ os.environ["HAIKU_RAG_CONFIG_PATH"] = str(config.absolute())
57
+
47
58
  # Configure logging minimally for CLI context
48
- if Config.ENV == "development":
59
+ if Config.environment == "development":
49
60
  # Lazy import logfire only in development
50
61
  try:
51
62
  import logfire # type: ignore
@@ -69,7 +80,7 @@ def main(
69
80
  @cli.command("list", help="List all stored documents")
70
81
  def list_documents(
71
82
  db: Path = typer.Option(
72
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
83
+ Config.storage.data_dir / "haiku.rag.lancedb",
73
84
  "--db",
74
85
  help="Path to the LanceDB database file",
75
86
  ),
@@ -116,7 +127,7 @@ def add_document_text(
116
127
  metavar="KEY=VALUE",
117
128
  ),
118
129
  db: Path = typer.Option(
119
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
130
+ Config.storage.data_dir / "haiku.rag.lancedb",
120
131
  "--db",
121
132
  help="Path to the LanceDB database file",
122
133
  ),
@@ -145,7 +156,7 @@ def add_document_src(
145
156
  metavar="KEY=VALUE",
146
157
  ),
147
158
  db: Path = typer.Option(
148
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
159
+ Config.storage.data_dir / "haiku.rag.lancedb",
149
160
  "--db",
150
161
  help="Path to the LanceDB database file",
151
162
  ),
@@ -167,7 +178,7 @@ def get_document(
167
178
  help="The ID of the document to get",
168
179
  ),
169
180
  db: Path = typer.Option(
170
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
181
+ Config.storage.data_dir / "haiku.rag.lancedb",
171
182
  "--db",
172
183
  help="Path to the LanceDB database file",
173
184
  ),
@@ -184,7 +195,7 @@ def delete_document(
184
195
  help="The ID of the document to delete",
185
196
  ),
186
197
  db: Path = typer.Option(
187
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
198
+ Config.storage.data_dir / "haiku.rag.lancedb",
188
199
  "--db",
189
200
  help="Path to the LanceDB database file",
190
201
  ),
@@ -211,7 +222,7 @@ def search(
211
222
  help="Maximum number of results to return",
212
223
  ),
213
224
  db: Path = typer.Option(
214
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
225
+ Config.storage.data_dir / "haiku.rag.lancedb",
215
226
  "--db",
216
227
  help="Path to the LanceDB database file",
217
228
  ),
@@ -228,7 +239,7 @@ def ask(
228
239
  help="The question to ask",
229
240
  ),
230
241
  db: Path = typer.Option(
231
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
242
+ Config.storage.data_dir / "haiku.rag.lancedb",
232
243
  "--db",
233
244
  help="Path to the LanceDB database file",
234
245
  ),
@@ -276,7 +287,7 @@ def research(
276
287
  help="Max concurrent searches per iteration (planned)",
277
288
  ),
278
289
  db: Path = typer.Option(
279
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
290
+ Config.storage.data_dir / "haiku.rag.lancedb",
280
291
  "--db",
281
292
  help="Path to the LanceDB database file",
282
293
  ),
@@ -308,13 +319,61 @@ def settings():
308
319
  app.show_settings()
309
320
 
310
321
 
322
+ @cli.command("init-config", help="Generate a YAML configuration file")
323
+ def init_config(
324
+ output: Path = typer.Argument(
325
+ Path("haiku.rag.yaml"),
326
+ help="Output path for the config file",
327
+ ),
328
+ from_env: bool = typer.Option(
329
+ False,
330
+ "--from-env",
331
+ help="Migrate settings from .env file",
332
+ ),
333
+ ):
334
+ """Generate a YAML configuration file with defaults or from .env."""
335
+ import yaml
336
+
337
+ from haiku.rag.config.loader import generate_default_config, load_config_from_env
338
+
339
+ if output.exists():
340
+ typer.echo(
341
+ f"Error: {output} already exists. Remove it first or choose a different path."
342
+ )
343
+ raise typer.Exit(1)
344
+
345
+ if from_env:
346
+ # Load from environment variables (including .env if present)
347
+ from dotenv import load_dotenv
348
+
349
+ load_dotenv()
350
+ config_data = load_config_from_env()
351
+ if not config_data:
352
+ typer.echo("Warning: No environment variables found to migrate.")
353
+ typer.echo("Generating default configuration instead.")
354
+ config_data = generate_default_config()
355
+ else:
356
+ config_data = generate_default_config()
357
+
358
+ # Write YAML with comments
359
+ with open(output, "w") as f:
360
+ f.write("# haiku.rag configuration file\n")
361
+ f.write(
362
+ "# See https://ggozad.github.io/haiku.rag/configuration/ for details\n\n"
363
+ )
364
+ yaml.dump(config_data, f, default_flow_style=False, sort_keys=False)
365
+
366
+ typer.echo(f"Configuration file created: {output}")
367
+ typer.echo("Edit the file to customize your settings.")
368
+
369
+
311
370
  @cli.command(
312
371
  "rebuild",
313
372
  help="Rebuild the database by deleting all chunks and re-indexing all documents",
314
373
  )
315
374
  def rebuild(
316
375
  db: Path = typer.Option(
317
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
376
+ Config.storage.data_dir / "haiku.rag.lancedb",
318
377
  "--db",
319
378
  help="Path to the LanceDB database file",
320
379
  ),
@@ -328,7 +387,7 @@ def rebuild(
328
387
  @cli.command("vacuum", help="Optimize and clean up all tables to reduce disk usage")
329
388
  def vacuum(
330
389
  db: Path = typer.Option(
331
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
390
+ Config.storage.data_dir / "haiku.rag.lancedb",
332
391
  "--db",
333
392
  help="Path to the LanceDB database file",
334
393
  ),
@@ -342,7 +401,7 @@ def vacuum(
342
401
  @cli.command("info", help="Show read-only database info (no upgrades or writes)")
343
402
  def info(
344
403
  db: Path = typer.Option(
345
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
404
+ Config.storage.data_dir / "haiku.rag.lancedb",
346
405
  "--db",
347
406
  help="Path to the LanceDB database file",
348
407
  ),
@@ -371,7 +430,7 @@ def download_models_cmd():
371
430
  )
372
431
  def serve(
373
432
  db: Path = typer.Option(
374
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
433
+ Config.storage.data_dir / "haiku.rag.lancedb",
375
434
  "--db",
376
435
  help="Path to the LanceDB database file",
377
436
  ),
@@ -442,24 +501,6 @@ def serve(
442
501
  )
443
502
 
444
503
 
445
- @cli.command("migrate", help="Migrate an SQLite database to LanceDB")
446
- def migrate(
447
- sqlite_path: Path = typer.Argument(
448
- help="Path to the SQLite database file to migrate",
449
- ),
450
- ):
451
- # Generate LanceDB path in same parent directory
452
- lancedb_path = sqlite_path.parent / (sqlite_path.stem + ".lancedb")
453
-
454
- # Lazy import to avoid heavy deps on simple invocations
455
- from haiku.rag.migration import migrate_sqlite_to_lancedb
456
-
457
- success = asyncio.run(migrate_sqlite_to_lancedb(sqlite_path, lancedb_path))
458
-
459
- if not success:
460
- raise typer.Exit(1)
461
-
462
-
463
504
  @cli.command(
464
505
  "a2aclient", help="Run interactive client to chat with haiku.rag's A2A server"
465
506
  )
haiku/rag/client.py CHANGED
@@ -8,8 +8,7 @@ from urllib.parse import urlparse
8
8
 
9
9
  import httpx
10
10
 
11
- from haiku.rag.config import Config
12
- from haiku.rag.reader import FileReader
11
+ from haiku.rag.config import AppConfig, Config
13
12
  from haiku.rag.reranking import get_reranker
14
13
  from haiku.rag.store.engine import Store
15
14
  from haiku.rag.store.models.chunk import Chunk
@@ -17,7 +16,6 @@ from haiku.rag.store.models.document import Document
17
16
  from haiku.rag.store.repositories.chunk import ChunkRepository
18
17
  from haiku.rag.store.repositories.document import DocumentRepository
19
18
  from haiku.rag.store.repositories.settings import SettingsRepository
20
- from haiku.rag.utils import text_to_docling_document
21
19
 
22
20
  logger = logging.getLogger(__name__)
23
21
 
@@ -27,16 +25,23 @@ class HaikuRAG:
27
25
 
28
26
  def __init__(
29
27
  self,
30
- db_path: Path = Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
28
+ db_path: Path | None = None,
29
+ config: AppConfig = Config,
31
30
  skip_validation: bool = False,
32
31
  ):
33
32
  """Initialize the RAG client with a database path.
34
33
 
35
34
  Args:
36
- db_path: Path to the database file.
35
+ db_path: Path to the database file. If None, uses config.storage.data_dir.
36
+ config: Configuration to use. Defaults to global Config.
37
37
  skip_validation: Whether to skip configuration validation on database load.
38
38
  """
39
- self.store = Store(db_path, skip_validation=skip_validation)
39
+ self._config = config
40
+ if db_path is None:
41
+ db_path = self._config.storage.data_dir / "haiku.rag.lancedb"
42
+ self.store = Store(
43
+ db_path, config=self._config, skip_validation=skip_validation
44
+ )
40
45
  self.document_repository = DocumentRepository(self.store)
41
46
  self.chunk_repository = ChunkRepository(self.store)
42
47
 
@@ -91,6 +96,9 @@ class HaikuRAG:
91
96
  Returns:
92
97
  The created Document instance.
93
98
  """
99
+ # Lazy import to avoid loading docling
100
+ from haiku.rag.utils import text_to_docling_document
101
+
94
102
  # Convert content to DoclingDocument for processing
95
103
  docling_document = text_to_docling_document(content)
96
104
 
@@ -127,6 +135,8 @@ class HaikuRAG:
127
135
  ValueError: If the file/URL cannot be parsed or doesn't exist
128
136
  httpx.RequestError: If URL request fails
129
137
  """
138
+ # Lazy import to avoid loading docling
139
+ from haiku.rag.reader import FileReader
130
140
 
131
141
  # Normalize metadata
132
142
  metadata = metadata or {}
@@ -181,6 +191,9 @@ class HaikuRAG:
181
191
  Raises:
182
192
  ValueError: If the file cannot be parsed or doesn't exist
183
193
  """
194
+ # Lazy import to avoid loading docling
195
+ from haiku.rag.reader import FileReader
196
+
184
197
  metadata = metadata or {}
185
198
 
186
199
  if source_path.suffix.lower() not in FileReader.extensions:
@@ -256,6 +269,9 @@ class HaikuRAG:
256
269
  ValueError: If the content cannot be parsed
257
270
  httpx.RequestError: If URL request fails
258
271
  """
272
+ # Lazy import to avoid loading docling
273
+ from haiku.rag.reader import FileReader
274
+
259
275
  metadata = metadata or {}
260
276
 
261
277
  async with httpx.AsyncClient() as client:
@@ -379,6 +395,9 @@ class HaikuRAG:
379
395
 
380
396
  async def update_document(self, document: Document) -> Document:
381
397
  """Update an existing document."""
398
+ # Lazy import to avoid loading docling
399
+ from haiku.rag.utils import text_to_docling_document
400
+
382
401
  # Convert content to DoclingDocument
383
402
  docling_document = text_to_docling_document(document.content)
384
403
 
@@ -418,7 +437,7 @@ class HaikuRAG:
418
437
  List of (chunk, score) tuples ordered by relevance.
419
438
  """
420
439
  # Get reranker if available
421
- reranker = get_reranker()
440
+ reranker = get_reranker(config=self._config)
422
441
 
423
442
  if reranker is None:
424
443
  # No reranking - return direct search results
@@ -440,18 +459,20 @@ class HaikuRAG:
440
459
  async def expand_context(
441
460
  self,
442
461
  search_results: list[tuple[Chunk, float]],
443
- radius: int = Config.CONTEXT_CHUNK_RADIUS,
462
+ radius: int | None = None,
444
463
  ) -> list[tuple[Chunk, float]]:
445
464
  """Expand search results with adjacent chunks, merging overlapping chunks.
446
465
 
447
466
  Args:
448
467
  search_results: List of (chunk, score) tuples from search.
449
468
  radius: Number of adjacent chunks to include before/after each chunk.
450
- Defaults to CONTEXT_CHUNK_RADIUS config setting.
469
+ If None, uses config.processing.context_chunk_radius.
451
470
 
452
471
  Returns:
453
472
  List of (chunk, score) tuples with expanded and merged context chunks.
454
473
  """
474
+ if radius is None:
475
+ radius = self._config.processing.context_chunk_radius
455
476
  if radius == 0:
456
477
  return search_results
457
478
 
@@ -581,7 +602,9 @@ class HaikuRAG:
581
602
  """
582
603
  from haiku.rag.qa import get_qa_agent
583
604
 
584
- qa_agent = get_qa_agent(self, use_citations=cite, system_prompt=system_prompt)
605
+ qa_agent = get_qa_agent(
606
+ self, config=self._config, use_citations=cite, system_prompt=system_prompt
607
+ )
585
608
  return await qa_agent.answer(question)
586
609
 
587
610
  async def rebuild_database(self) -> AsyncGenerator[str, None]:
@@ -597,6 +620,9 @@ class HaikuRAG:
597
620
  Yields:
598
621
  int: The ID of the document currently being processed
599
622
  """
623
+ # Lazy import to avoid loading docling
624
+ from haiku.rag.utils import text_to_docling_document
625
+
600
626
  await self.chunk_repository.delete_all()
601
627
  self.store.recreate_embeddings_table()
602
628
 
@@ -0,0 +1,54 @@
1
+ import os
2
+
3
+ from haiku.rag.config.loader import (
4
+ check_for_deprecated_env,
5
+ find_config_file,
6
+ generate_default_config,
7
+ load_config_from_env,
8
+ load_yaml_config,
9
+ )
10
+ from haiku.rag.config.models import (
11
+ A2AConfig,
12
+ AppConfig,
13
+ EmbeddingsConfig,
14
+ LanceDBConfig,
15
+ OllamaConfig,
16
+ ProcessingConfig,
17
+ ProvidersConfig,
18
+ QAConfig,
19
+ RerankingConfig,
20
+ ResearchConfig,
21
+ StorageConfig,
22
+ VLLMConfig,
23
+ )
24
+
25
+ __all__ = [
26
+ "Config",
27
+ "AppConfig",
28
+ "StorageConfig",
29
+ "LanceDBConfig",
30
+ "EmbeddingsConfig",
31
+ "RerankingConfig",
32
+ "QAConfig",
33
+ "ResearchConfig",
34
+ "ProcessingConfig",
35
+ "OllamaConfig",
36
+ "VLLMConfig",
37
+ "ProvidersConfig",
38
+ "A2AConfig",
39
+ "find_config_file",
40
+ "load_yaml_config",
41
+ "generate_default_config",
42
+ "load_config_from_env",
43
+ ]
44
+
45
+ # Load config from YAML file or use defaults
46
+ config_path = find_config_file(None)
47
+ if config_path:
48
+ yaml_data = load_yaml_config(config_path)
49
+ Config = AppConfig.model_validate(yaml_data)
50
+ else:
51
+ Config = AppConfig()
52
+
53
+ # Check for deprecated .env file
54
+ check_for_deprecated_env()
@@ -0,0 +1,151 @@
1
+ import os
2
+ import warnings
3
+ from pathlib import Path
4
+
5
+ import yaml
6
+
7
+
8
+ def find_config_file(cli_path: Path | None = None) -> Path | None:
9
+ """Find the YAML config file using the search path.
10
+
11
+ Search order:
12
+ 1. CLI-provided path (via HAIKU_RAG_CONFIG_PATH env var or parameter)
13
+ 2. ./haiku.rag.yaml (current directory)
14
+ 3. ~/.config/haiku.rag/config.yaml (user config)
15
+
16
+ Returns None if no config file is found.
17
+ """
18
+ # Check environment variable first (set by CLI --config flag)
19
+ if not cli_path:
20
+ env_path = os.getenv("HAIKU_RAG_CONFIG_PATH")
21
+ if env_path:
22
+ cli_path = Path(env_path)
23
+
24
+ if cli_path:
25
+ if cli_path.exists():
26
+ return cli_path
27
+ raise FileNotFoundError(f"Config file not found: {cli_path}")
28
+
29
+ cwd_config = Path.cwd() / "haiku.rag.yaml"
30
+ if cwd_config.exists():
31
+ return cwd_config
32
+
33
+ user_config_dir = Path.home() / ".config" / "haiku.rag"
34
+ user_config = user_config_dir / "config.yaml"
35
+ if user_config.exists():
36
+ return user_config
37
+
38
+ return None
39
+
40
+
41
+ def load_yaml_config(path: Path) -> dict:
42
+ """Load and parse a YAML config file."""
43
+ with open(path) as f:
44
+ data = yaml.safe_load(f)
45
+ return data or {}
46
+
47
+
48
+ def check_for_deprecated_env() -> None:
49
+ """Check for .env file and warn if found."""
50
+ env_file = Path.cwd() / ".env"
51
+ if env_file.exists():
52
+ warnings.warn(
53
+ ".env file detected but YAML configuration is now preferred. "
54
+ "Environment variable configuration is deprecated and will be removed in future versions."
55
+ "Run 'haiku-rag init-config' to generate a YAML config file.",
56
+ DeprecationWarning,
57
+ stacklevel=2,
58
+ )
59
+
60
+
61
+ def generate_default_config() -> dict:
62
+ """Generate a default YAML config structure with documentation."""
63
+ return {
64
+ "environment": "production",
65
+ "storage": {
66
+ "data_dir": "",
67
+ "monitor_directories": [],
68
+ "disable_autocreate": False,
69
+ "vacuum_retention_seconds": 60,
70
+ },
71
+ "lancedb": {"uri": "", "api_key": "", "region": ""},
72
+ "embeddings": {
73
+ "provider": "ollama",
74
+ "model": "qwen3-embedding",
75
+ "vector_dim": 4096,
76
+ },
77
+ "reranking": {"provider": "", "model": ""},
78
+ "qa": {"provider": "ollama", "model": "gpt-oss"},
79
+ "research": {"provider": "", "model": ""},
80
+ "processing": {
81
+ "chunk_size": 256,
82
+ "context_chunk_radius": 0,
83
+ "markdown_preprocessor": "",
84
+ },
85
+ "providers": {
86
+ "ollama": {"base_url": "http://localhost:11434"},
87
+ "vllm": {
88
+ "embeddings_base_url": "",
89
+ "rerank_base_url": "",
90
+ "qa_base_url": "",
91
+ "research_base_url": "",
92
+ },
93
+ },
94
+ "a2a": {"max_contexts": 1000},
95
+ }
96
+
97
+
98
+ def load_config_from_env() -> dict:
99
+ """Load current config from environment variables (for migration)."""
100
+ result = {}
101
+
102
+ env_mappings = {
103
+ "ENV": "environment",
104
+ "DEFAULT_DATA_DIR": ("storage", "data_dir"),
105
+ "MONITOR_DIRECTORIES": ("storage", "monitor_directories"),
106
+ "DISABLE_DB_AUTOCREATE": ("storage", "disable_autocreate"),
107
+ "VACUUM_RETENTION_SECONDS": ("storage", "vacuum_retention_seconds"),
108
+ "LANCEDB_URI": ("lancedb", "uri"),
109
+ "LANCEDB_API_KEY": ("lancedb", "api_key"),
110
+ "LANCEDB_REGION": ("lancedb", "region"),
111
+ "EMBEDDINGS_PROVIDER": ("embeddings", "provider"),
112
+ "EMBEDDINGS_MODEL": ("embeddings", "model"),
113
+ "EMBEDDINGS_VECTOR_DIM": ("embeddings", "vector_dim"),
114
+ "RERANK_PROVIDER": ("reranking", "provider"),
115
+ "RERANK_MODEL": ("reranking", "model"),
116
+ "QA_PROVIDER": ("qa", "provider"),
117
+ "QA_MODEL": ("qa", "model"),
118
+ "RESEARCH_PROVIDER": ("research", "provider"),
119
+ "RESEARCH_MODEL": ("research", "model"),
120
+ "CHUNK_SIZE": ("processing", "chunk_size"),
121
+ "CONTEXT_CHUNK_RADIUS": ("processing", "context_chunk_radius"),
122
+ "MARKDOWN_PREPROCESSOR": ("processing", "markdown_preprocessor"),
123
+ "OLLAMA_BASE_URL": ("providers", "ollama", "base_url"),
124
+ "VLLM_EMBEDDINGS_BASE_URL": ("providers", "vllm", "embeddings_base_url"),
125
+ "VLLM_RERANK_BASE_URL": ("providers", "vllm", "rerank_base_url"),
126
+ "VLLM_QA_BASE_URL": ("providers", "vllm", "qa_base_url"),
127
+ "VLLM_RESEARCH_BASE_URL": ("providers", "vllm", "research_base_url"),
128
+ "A2A_MAX_CONTEXTS": ("a2a", "max_contexts"),
129
+ }
130
+
131
+ for env_var, path in env_mappings.items():
132
+ value = os.getenv(env_var)
133
+ if value is not None:
134
+ # Special handling for MONITOR_DIRECTORIES - parse comma-separated list
135
+ if env_var == "MONITOR_DIRECTORIES":
136
+ if value.strip():
137
+ value = [p.strip() for p in value.split(",") if p.strip()]
138
+ else:
139
+ value = []
140
+
141
+ if isinstance(path, tuple):
142
+ current = result
143
+ for key in path[:-1]:
144
+ if key not in current:
145
+ current[key] = {}
146
+ current = current[key]
147
+ current[path[-1]] = value
148
+ else:
149
+ result[path] = value
150
+
151
+ return result