haiku.rag 0.12.0__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

haiku/rag/a2a/__init__.py CHANGED
@@ -57,12 +57,12 @@ def create_a2a_app(
57
57
  """
58
58
  base_storage = InMemoryStorage()
59
59
  storage = LRUMemoryStorage(
60
- storage=base_storage, max_contexts=Config.A2A_MAX_CONTEXTS
60
+ storage=base_storage, max_contexts=Config.a2a.max_contexts
61
61
  )
62
62
  broker = InMemoryBroker()
63
63
 
64
64
  # Create the agent with native search tool
65
- model = get_model(Config.QA_PROVIDER, Config.QA_MODEL)
65
+ model = get_model(Config.qa.provider, Config.qa.model)
66
66
  agent = Agent(
67
67
  model=model,
68
68
  deps_type=AgentDependencies,
@@ -120,7 +120,7 @@ def create_a2a_app(
120
120
  # Create FastA2A app with custom worker lifecycle
121
121
  @asynccontextmanager
122
122
  async def lifespan(app):
123
- logger.info(f"Started A2A server (max contexts: {Config.A2A_MAX_CONTEXTS})")
123
+ logger.info(f"Started A2A server (max contexts: {Config.a2a.max_contexts})")
124
124
  async with app.task_manager:
125
125
  async with worker.run():
126
126
  yield
haiku/rag/a2a/client.py CHANGED
@@ -7,9 +7,18 @@ from rich.console import Console
7
7
  from rich.markdown import Markdown
8
8
  from rich.prompt import Prompt
9
9
 
10
+ try:
11
+ from fasta2a.client import A2AClient as FastA2AClient
12
+ from fasta2a.schema import Message, TextPart
13
+ except ImportError as e:
14
+ raise ImportError(
15
+ "A2A support requires the 'a2a' extra. "
16
+ "Install with: uv pip install 'haiku.rag[a2a]'"
17
+ ) from e
18
+
10
19
 
11
20
  class A2AClient:
12
- """Simple A2A protocol client."""
21
+ """Interactive A2A protocol client."""
13
22
 
14
23
  def __init__(self, base_url: str = "http://localhost:8000"):
15
24
  """Initialize A2A client.
@@ -18,11 +27,12 @@ class A2AClient:
18
27
  base_url: Base URL of the A2A server
19
28
  """
20
29
  self.base_url = base_url.rstrip("/")
21
- self.client = httpx.AsyncClient(timeout=60.0)
30
+ http_client = httpx.AsyncClient(timeout=60.0)
31
+ self._client = FastA2AClient(base_url=base_url, http_client=http_client)
22
32
 
23
33
  async def close(self):
24
34
  """Close the HTTP client."""
25
- await self.client.aclose()
35
+ await self._client.http_client.aclose()
26
36
 
27
37
  async def get_agent_card(self) -> dict[str, Any]:
28
38
  """Fetch the agent card from the A2A server.
@@ -30,7 +40,9 @@ class A2AClient:
30
40
  Returns:
31
41
  Agent card dictionary with agent capabilities and metadata
32
42
  """
33
- response = await self.client.get(f"{self.base_url}/.well-known/agent-card.json")
43
+ response = await self._client.http_client.get(
44
+ f"{self.base_url}/.well-known/agent-card.json"
45
+ )
34
46
  response.raise_for_status()
35
47
  return response.json()
36
48
 
@@ -53,46 +65,38 @@ class A2AClient:
53
65
  if context_id is None:
54
66
  context_id = str(uuid.uuid4())
55
67
 
56
- message_id = str(uuid.uuid4())
57
-
58
- payload: dict[str, Any] = {
59
- "jsonrpc": "2.0",
60
- "method": "message/send",
61
- "params": {
62
- "contextId": context_id,
63
- "message": {
64
- "kind": "message",
65
- "role": "user",
66
- "messageId": message_id,
67
- "parts": [{"kind": "text", "text": text}],
68
- },
69
- },
70
- "id": 1,
71
- }
68
+ message = Message(
69
+ kind="message",
70
+ role="user",
71
+ message_id=str(uuid.uuid4()),
72
+ parts=[TextPart(kind="text", text=text)],
73
+ )
72
74
 
75
+ metadata: dict[str, Any] = {"contextId": context_id}
73
76
  if skill_id:
74
- payload["params"]["skillId"] = skill_id
77
+ metadata["skillId"] = skill_id
75
78
 
76
- response = await self.client.post(
77
- self.base_url,
78
- json=payload,
79
- headers={"Content-Type": "application/json"},
80
- )
81
- response.raise_for_status()
82
- initial_response = response.json()
79
+ response = await self._client.send_message(message, metadata=metadata)
83
80
 
84
- # Extract task ID from response
85
- result = initial_response.get("result", {})
86
- task_id = result.get("id")
81
+ if "error" in response:
82
+ return {"error": response["error"]}
87
83
 
88
- if not task_id:
89
- return initial_response
84
+ result = response.get("result")
85
+ if not result:
86
+ return {"result": result}
90
87
 
91
- # Poll for task completion
92
- return await self.wait_for_task(task_id)
88
+ # Result can be either Task or Message - check if it's a Task with an id
89
+ if result.get("kind") == "task":
90
+ task_id = result.get("id")
91
+ if task_id:
92
+ # Poll for task completion
93
+ return await self.wait_for_task(task_id)
94
+
95
+ # Return the message directly
96
+ return {"result": result}
93
97
 
94
98
  async def wait_for_task(
95
- self, task_id: str, max_wait: int = 60, poll_interval: float = 0.5
99
+ self, task_id: str, max_wait: int = 120, poll_interval: float = 0.5
96
100
  ) -> dict[str, Any]:
97
101
  """Poll for task completion.
98
102
 
@@ -109,27 +113,19 @@ class A2AClient:
109
113
  start_time = time.time()
110
114
 
111
115
  while time.time() - start_time < max_wait:
112
- payload = {
113
- "jsonrpc": "2.0",
114
- "method": "tasks/get",
115
- "params": {"id": task_id},
116
- "id": 2,
117
- }
118
-
119
- response = await self.client.post(
120
- self.base_url,
121
- json=payload,
122
- headers={"Content-Type": "application/json"},
123
- )
124
- response.raise_for_status()
125
- task = response.json()
126
-
127
- result = task.get("result", {})
128
- status = result.get("status", {})
129
- state = status.get("state")
116
+ task_response = await self._client.get_task(task_id)
117
+
118
+ if "error" in task_response:
119
+ return {"error": task_response["error"]}
120
+
121
+ task = task_response.get("result")
122
+ if not task:
123
+ raise Exception("No task in response")
124
+
125
+ state = task.get("status", {}).get("state")
130
126
 
131
127
  if state == "completed":
132
- return task
128
+ return {"result": task}
133
129
  elif state == "failed":
134
130
  raise Exception(f"Task failed: {task}")
135
131
 
@@ -191,6 +187,7 @@ def print_response(response: dict[str, Any], console: Console):
191
187
 
192
188
  # Print artifacts summary with details
193
189
  if artifacts:
190
+ console.rule("[dim]Artifacts generated[/dim]")
194
191
  summary_lines = []
195
192
 
196
193
  for artifact in artifacts:
haiku/rag/app.py CHANGED
@@ -160,13 +160,20 @@ class HaikuRAGApp:
160
160
  self, source: str, title: str | None = None, metadata: dict | None = None
161
161
  ):
162
162
  async with HaikuRAG(db_path=self.db_path) as self.client:
163
- doc = await self.client.create_document_from_source(
163
+ result = await self.client.create_document_from_source(
164
164
  source, title=title, metadata=metadata
165
165
  )
166
- self._rich_print_document(doc, truncate=True)
167
- self.console.print(
168
- f"[bold green]Document {doc.id} added successfully.[/bold green]"
169
- )
166
+ if isinstance(result, list):
167
+ for doc in result:
168
+ self._rich_print_document(doc, truncate=True)
169
+ self.console.print(
170
+ f"[bold green]{len(result)} documents added successfully.[/bold green]"
171
+ )
172
+ else:
173
+ self._rich_print_document(result, truncate=True)
174
+ self.console.print(
175
+ f"[bold green]Document {result.id} added successfully.[/bold green]"
176
+ )
170
177
 
171
178
  async def get_document(self, doc_id: str):
172
179
  async with HaikuRAG(db_path=self.db_path) as self.client:
@@ -224,8 +231,8 @@ class HaikuRAGApp:
224
231
  )
225
232
 
226
233
  start_node = DeepQAPlanNode(
227
- provider=Config.QA_PROVIDER,
228
- model=Config.QA_MODEL,
234
+ provider=Config.qa.provider,
235
+ model=Config.qa.model,
229
236
  )
230
237
 
231
238
  result = await graph.run(
@@ -271,8 +278,8 @@ class HaikuRAGApp:
271
278
  )
272
279
 
273
280
  start = PlanNode(
274
- provider=Config.RESEARCH_PROVIDER or Config.QA_PROVIDER,
275
- model=Config.RESEARCH_MODEL or Config.QA_MODEL,
281
+ provider=Config.research.provider or Config.qa.provider,
282
+ model=Config.research.model or Config.qa.model,
276
283
  )
277
284
  report = None
278
285
  async for event in stream_research_graph(graph, start, state, deps):
@@ -467,7 +474,9 @@ class HaikuRAGApp:
467
474
 
468
475
  # Start file monitor if enabled
469
476
  if enable_monitor:
470
- monitor = FileWatcher(paths=Config.MONITOR_DIRECTORIES, client=client)
477
+ monitor = FileWatcher(
478
+ paths=Config.storage.monitor_directories, client=client
479
+ )
471
480
  monitor_task = asyncio.create_task(monitor.observe())
472
481
  tasks.append(monitor_task)
473
482
 
haiku/rag/chunker.py CHANGED
@@ -22,7 +22,7 @@ class Chunker:
22
22
 
23
23
  def __init__(
24
24
  self,
25
- chunk_size: int = Config.CHUNK_SIZE,
25
+ chunk_size: int = Config.processing.chunk_size,
26
26
  ):
27
27
  self.chunk_size = chunk_size
28
28
  tokenizer = OpenAITokenizer(
haiku/rag/cli.py CHANGED
@@ -42,10 +42,21 @@ def main(
42
42
  callback=version_callback,
43
43
  help="Show version and exit",
44
44
  ),
45
+ config: Path | None = typer.Option(
46
+ None,
47
+ "--config",
48
+ help="Path to YAML configuration file",
49
+ ),
45
50
  ):
46
51
  """haiku.rag CLI - Vector database RAG system"""
52
+ # Store config path in environment for config loader to use
53
+ if config:
54
+ import os
55
+
56
+ os.environ["HAIKU_RAG_CONFIG_PATH"] = str(config.absolute())
57
+
47
58
  # Configure logging minimally for CLI context
48
- if Config.ENV == "development":
59
+ if Config.environment == "development":
49
60
  # Lazy import logfire only in development
50
61
  try:
51
62
  import logfire # type: ignore
@@ -69,7 +80,7 @@ def main(
69
80
  @cli.command("list", help="List all stored documents")
70
81
  def list_documents(
71
82
  db: Path = typer.Option(
72
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
83
+ Config.storage.data_dir / "haiku.rag.lancedb",
73
84
  "--db",
74
85
  help="Path to the LanceDB database file",
75
86
  ),
@@ -116,7 +127,7 @@ def add_document_text(
116
127
  metavar="KEY=VALUE",
117
128
  ),
118
129
  db: Path = typer.Option(
119
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
130
+ Config.storage.data_dir / "haiku.rag.lancedb",
120
131
  "--db",
121
132
  help="Path to the LanceDB database file",
122
133
  ),
@@ -128,10 +139,10 @@ def add_document_text(
128
139
  asyncio.run(app.add_document_from_text(text=text, metadata=metadata or None))
129
140
 
130
141
 
131
- @cli.command("add-src", help="Add a document from a file path or URL")
142
+ @cli.command("add-src", help="Add a document from a file path, directory, or URL")
132
143
  def add_document_src(
133
144
  source: str = typer.Argument(
134
- help="The file path or URL of the document to add",
145
+ help="The file path, directory, or URL of the document(s) to add",
135
146
  ),
136
147
  title: str | None = typer.Option(
137
148
  None,
@@ -145,7 +156,7 @@ def add_document_src(
145
156
  metavar="KEY=VALUE",
146
157
  ),
147
158
  db: Path = typer.Option(
148
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
159
+ Config.storage.data_dir / "haiku.rag.lancedb",
149
160
  "--db",
150
161
  help="Path to the LanceDB database file",
151
162
  ),
@@ -167,7 +178,7 @@ def get_document(
167
178
  help="The ID of the document to get",
168
179
  ),
169
180
  db: Path = typer.Option(
170
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
181
+ Config.storage.data_dir / "haiku.rag.lancedb",
171
182
  "--db",
172
183
  help="Path to the LanceDB database file",
173
184
  ),
@@ -184,7 +195,7 @@ def delete_document(
184
195
  help="The ID of the document to delete",
185
196
  ),
186
197
  db: Path = typer.Option(
187
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
198
+ Config.storage.data_dir / "haiku.rag.lancedb",
188
199
  "--db",
189
200
  help="Path to the LanceDB database file",
190
201
  ),
@@ -211,7 +222,7 @@ def search(
211
222
  help="Maximum number of results to return",
212
223
  ),
213
224
  db: Path = typer.Option(
214
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
225
+ Config.storage.data_dir / "haiku.rag.lancedb",
215
226
  "--db",
216
227
  help="Path to the LanceDB database file",
217
228
  ),
@@ -228,7 +239,7 @@ def ask(
228
239
  help="The question to ask",
229
240
  ),
230
241
  db: Path = typer.Option(
231
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
242
+ Config.storage.data_dir / "haiku.rag.lancedb",
232
243
  "--db",
233
244
  help="Path to the LanceDB database file",
234
245
  ),
@@ -276,7 +287,7 @@ def research(
276
287
  help="Max concurrent searches per iteration (planned)",
277
288
  ),
278
289
  db: Path = typer.Option(
279
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
290
+ Config.storage.data_dir / "haiku.rag.lancedb",
280
291
  "--db",
281
292
  help="Path to the LanceDB database file",
282
293
  ),
@@ -308,13 +319,61 @@ def settings():
308
319
  app.show_settings()
309
320
 
310
321
 
322
+ @cli.command("init-config", help="Generate a YAML configuration file")
323
+ def init_config(
324
+ output: Path = typer.Argument(
325
+ Path("haiku.rag.yaml"),
326
+ help="Output path for the config file",
327
+ ),
328
+ from_env: bool = typer.Option(
329
+ False,
330
+ "--from-env",
331
+ help="Migrate settings from .env file",
332
+ ),
333
+ ):
334
+ """Generate a YAML configuration file with defaults or from .env."""
335
+ import yaml
336
+
337
+ from haiku.rag.config.loader import generate_default_config, load_config_from_env
338
+
339
+ if output.exists():
340
+ typer.echo(
341
+ f"Error: {output} already exists. Remove it first or choose a different path."
342
+ )
343
+ raise typer.Exit(1)
344
+
345
+ if from_env:
346
+ # Load from environment variables (including .env if present)
347
+ from dotenv import load_dotenv
348
+
349
+ load_dotenv()
350
+ config_data = load_config_from_env()
351
+ if not config_data:
352
+ typer.echo("Warning: No environment variables found to migrate.")
353
+ typer.echo("Generating default configuration instead.")
354
+ config_data = generate_default_config()
355
+ else:
356
+ config_data = generate_default_config()
357
+
358
+ # Write YAML with comments
359
+ with open(output, "w") as f:
360
+ f.write("# haiku.rag configuration file\n")
361
+ f.write(
362
+ "# See https://ggozad.github.io/haiku.rag/configuration/ for details\n\n"
363
+ )
364
+ yaml.dump(config_data, f, default_flow_style=False, sort_keys=False)
365
+
366
+ typer.echo(f"Configuration file created: {output}")
367
+ typer.echo("Edit the file to customize your settings.")
368
+
369
+
311
370
  @cli.command(
312
371
  "rebuild",
313
372
  help="Rebuild the database by deleting all chunks and re-indexing all documents",
314
373
  )
315
374
  def rebuild(
316
375
  db: Path = typer.Option(
317
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
376
+ Config.storage.data_dir / "haiku.rag.lancedb",
318
377
  "--db",
319
378
  help="Path to the LanceDB database file",
320
379
  ),
@@ -328,7 +387,7 @@ def rebuild(
328
387
  @cli.command("vacuum", help="Optimize and clean up all tables to reduce disk usage")
329
388
  def vacuum(
330
389
  db: Path = typer.Option(
331
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
390
+ Config.storage.data_dir / "haiku.rag.lancedb",
332
391
  "--db",
333
392
  help="Path to the LanceDB database file",
334
393
  ),
@@ -342,7 +401,7 @@ def vacuum(
342
401
  @cli.command("info", help="Show read-only database info (no upgrades or writes)")
343
402
  def info(
344
403
  db: Path = typer.Option(
345
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
404
+ Config.storage.data_dir / "haiku.rag.lancedb",
346
405
  "--db",
347
406
  help="Path to the LanceDB database file",
348
407
  ),
@@ -371,7 +430,7 @@ def download_models_cmd():
371
430
  )
372
431
  def serve(
373
432
  db: Path = typer.Option(
374
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
433
+ Config.storage.data_dir / "haiku.rag.lancedb",
375
434
  "--db",
376
435
  help="Path to the LanceDB database file",
377
436
  ),
@@ -442,24 +501,6 @@ def serve(
442
501
  )
443
502
 
444
503
 
445
- @cli.command("migrate", help="Migrate an SQLite database to LanceDB")
446
- def migrate(
447
- sqlite_path: Path = typer.Argument(
448
- help="Path to the SQLite database file to migrate",
449
- ),
450
- ):
451
- # Generate LanceDB path in same parent directory
452
- lancedb_path = sqlite_path.parent / (sqlite_path.stem + ".lancedb")
453
-
454
- # Lazy import to avoid heavy deps on simple invocations
455
- from haiku.rag.migration import migrate_sqlite_to_lancedb
456
-
457
- success = asyncio.run(migrate_sqlite_to_lancedb(sqlite_path, lancedb_path))
458
-
459
- if not success:
460
- raise typer.Exit(1)
461
-
462
-
463
504
  @cli.command(
464
505
  "a2aclient", help="Run interactive client to chat with haiku.rag's A2A server"
465
506
  )