haiku.rag 0.10.2__py3-none-any.whl → 0.19.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. README.md +172 -0
  2. {haiku_rag-0.10.2.dist-info → haiku_rag-0.19.3.dist-info}/METADATA +79 -51
  3. haiku_rag-0.19.3.dist-info/RECORD +6 -0
  4. {haiku_rag-0.10.2.dist-info → haiku_rag-0.19.3.dist-info}/WHEEL +1 -1
  5. haiku/rag/__init__.py +0 -0
  6. haiku/rag/app.py +0 -437
  7. haiku/rag/chunker.py +0 -51
  8. haiku/rag/cli.py +0 -466
  9. haiku/rag/client.py +0 -605
  10. haiku/rag/config.py +0 -81
  11. haiku/rag/embeddings/__init__.py +0 -35
  12. haiku/rag/embeddings/base.py +0 -15
  13. haiku/rag/embeddings/ollama.py +0 -17
  14. haiku/rag/embeddings/openai.py +0 -16
  15. haiku/rag/embeddings/vllm.py +0 -19
  16. haiku/rag/embeddings/voyageai.py +0 -17
  17. haiku/rag/logging.py +0 -56
  18. haiku/rag/mcp.py +0 -156
  19. haiku/rag/migration.py +0 -316
  20. haiku/rag/monitor.py +0 -73
  21. haiku/rag/qa/__init__.py +0 -15
  22. haiku/rag/qa/agent.py +0 -91
  23. haiku/rag/qa/prompts.py +0 -60
  24. haiku/rag/reader.py +0 -115
  25. haiku/rag/reranking/__init__.py +0 -34
  26. haiku/rag/reranking/base.py +0 -13
  27. haiku/rag/reranking/cohere.py +0 -34
  28. haiku/rag/reranking/mxbai.py +0 -28
  29. haiku/rag/reranking/vllm.py +0 -44
  30. haiku/rag/research/__init__.py +0 -20
  31. haiku/rag/research/common.py +0 -53
  32. haiku/rag/research/dependencies.py +0 -47
  33. haiku/rag/research/graph.py +0 -29
  34. haiku/rag/research/models.py +0 -70
  35. haiku/rag/research/nodes/evaluate.py +0 -80
  36. haiku/rag/research/nodes/plan.py +0 -63
  37. haiku/rag/research/nodes/search.py +0 -93
  38. haiku/rag/research/nodes/synthesize.py +0 -51
  39. haiku/rag/research/prompts.py +0 -114
  40. haiku/rag/research/state.py +0 -25
  41. haiku/rag/store/__init__.py +0 -4
  42. haiku/rag/store/engine.py +0 -269
  43. haiku/rag/store/models/__init__.py +0 -4
  44. haiku/rag/store/models/chunk.py +0 -17
  45. haiku/rag/store/models/document.py +0 -17
  46. haiku/rag/store/repositories/__init__.py +0 -9
  47. haiku/rag/store/repositories/chunk.py +0 -424
  48. haiku/rag/store/repositories/document.py +0 -237
  49. haiku/rag/store/repositories/settings.py +0 -155
  50. haiku/rag/store/upgrades/__init__.py +0 -62
  51. haiku/rag/store/upgrades/v0_10_1.py +0 -64
  52. haiku/rag/store/upgrades/v0_9_3.py +0 -112
  53. haiku/rag/utils.py +0 -199
  54. haiku_rag-0.10.2.dist-info/RECORD +0 -54
  55. {haiku_rag-0.10.2.dist-info → haiku_rag-0.19.3.dist-info}/entry_points.txt +0 -0
  56. {haiku_rag-0.10.2.dist-info → haiku_rag-0.19.3.dist-info}/licenses/LICENSE +0 -0
haiku/rag/cli.py DELETED
@@ -1,466 +0,0 @@
1
- import asyncio
2
- import json
3
- import warnings
4
- from importlib.metadata import version
5
- from pathlib import Path
6
- from typing import Any
7
-
8
- import typer
9
-
10
- from haiku.rag.config import Config
11
- from haiku.rag.logging import configure_cli_logging
12
- from haiku.rag.utils import is_up_to_date
13
-
14
- cli = typer.Typer(
15
- context_settings={"help_option_names": ["-h", "--help"]}, no_args_is_help=True
16
- )
17
-
18
-
19
- def complete_document_ids(ctx: typer.Context, incomplete: str):
20
- """Autocomplete document IDs from the selected DB."""
21
- db_path = ctx.params.get("db") or (Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb")
22
-
23
- try:
24
- from haiku.rag.client import HaikuRAG
25
-
26
- async def _list_ids():
27
- async with HaikuRAG(db_path) as client:
28
- docs = await client.list_documents()
29
- return [d.id for d in docs if d.id]
30
-
31
- ids = asyncio.run(_list_ids())
32
- except Exception:
33
- return []
34
-
35
- return [i for i in ids if i and i.startswith(incomplete)]
36
-
37
-
38
- def complete_local_paths(ctx: typer.Context, incomplete: str) -> list[str]:
39
- """Autocomplete local filesystem paths.
40
-
41
- Provides directory/file suggestions based on the current incomplete input.
42
- Does not validate or restrict to specific extensions to keep it flexible
43
- (URLs are still allowed to be typed manually).
44
- """
45
- try:
46
- text = incomplete or ""
47
-
48
- # Expand user home
49
- from os.path import expanduser
50
-
51
- expanded = expanduser(text)
52
- p = Path(expanded)
53
-
54
- # Choose directory to list and prefix to filter
55
- if text == "" or text.endswith(("/", "\\")):
56
- directory = p
57
- prefix = ""
58
- else:
59
- directory = p.parent
60
- prefix = p.name
61
-
62
- if not directory.exists():
63
- return []
64
-
65
- suggestions: list[str] = []
66
- for entry in directory.iterdir():
67
- name = entry.name
68
- if not prefix or name.startswith(prefix):
69
- suggestion = str(directory / name)
70
- if entry.is_dir():
71
- suggestion += "/"
72
- suggestions.append(suggestion)
73
- return suggestions
74
- except Exception:
75
- return []
76
-
77
-
78
- async def check_version():
79
- """Check if haiku.rag is up to date and show warning if not."""
80
- up_to_date, current_version, latest_version = await is_up_to_date()
81
- if not up_to_date:
82
- typer.echo(
83
- f"Warning: haiku.rag is outdated. Current: {current_version}, Latest: {latest_version}",
84
- )
85
- typer.echo("Please update.")
86
-
87
-
88
- def version_callback(value: bool):
89
- if value:
90
- v = version("haiku.rag")
91
- typer.echo(f"haiku.rag version {v}")
92
- raise typer.Exit()
93
-
94
-
95
- @cli.callback()
96
- def main(
97
- _version: bool = typer.Option(
98
- False,
99
- "-v",
100
- "--version",
101
- callback=version_callback,
102
- help="Show version and exit",
103
- ),
104
- ):
105
- """haiku.rag CLI - Vector database RAG system"""
106
- # Configure logging minimally for CLI context
107
- if Config.ENV == "development":
108
- # Lazy import logfire only in development
109
- try:
110
- import logfire # type: ignore
111
-
112
- logfire.configure(send_to_logfire="if-token-present")
113
- logfire.instrument_pydantic_ai()
114
- except Exception:
115
- pass
116
- else:
117
- configure_cli_logging()
118
- warnings.filterwarnings("ignore")
119
-
120
- # Run version check before any command
121
- try:
122
- asyncio.run(check_version())
123
- except Exception:
124
- # Do not block CLI on version check issues
125
- pass
126
-
127
-
128
- @cli.command("list", help="List all stored documents")
129
- def list_documents(
130
- db: Path = typer.Option(
131
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
132
- "--db",
133
- help="Path to the LanceDB database file",
134
- ),
135
- ):
136
- from haiku.rag.app import HaikuRAGApp
137
-
138
- app = HaikuRAGApp(db_path=db)
139
- asyncio.run(app.list_documents())
140
-
141
-
142
- def _parse_meta_options(meta: list[str] | None) -> dict[str, Any]:
143
- """Parse repeated --meta KEY=VALUE options into a dictionary.
144
-
145
- Raises a Typer error if any entry is malformed.
146
- """
147
- result: dict[str, Any] = {}
148
- if not meta:
149
- return result
150
- for item in meta:
151
- if "=" not in item:
152
- raise typer.BadParameter("--meta must be in KEY=VALUE format")
153
- key, value = item.split("=", 1)
154
- if not key:
155
- raise typer.BadParameter("--meta key cannot be empty")
156
- # Best-effort JSON coercion: numbers, booleans, null, arrays/objects
157
- try:
158
- parsed = json.loads(value)
159
- result[key] = parsed
160
- except Exception:
161
- # Leave as string if not valid JSON literal
162
- result[key] = value
163
- return result
164
-
165
-
166
- @cli.command("add", help="Add a document from text input")
167
- def add_document_text(
168
- text: str = typer.Argument(
169
- help="The text content of the document to add",
170
- ),
171
- meta: list[str] | None = typer.Option(
172
- None,
173
- "--meta",
174
- help="Metadata entries as KEY=VALUE (repeatable)",
175
- metavar="KEY=VALUE",
176
- ),
177
- db: Path = typer.Option(
178
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
179
- "--db",
180
- help="Path to the LanceDB database file",
181
- ),
182
- ):
183
- from haiku.rag.app import HaikuRAGApp
184
-
185
- app = HaikuRAGApp(db_path=db)
186
- metadata = _parse_meta_options(meta)
187
- asyncio.run(app.add_document_from_text(text=text, metadata=metadata or None))
188
-
189
-
190
- @cli.command("add-src", help="Add a document from a file path or URL")
191
- def add_document_src(
192
- source: str = typer.Argument(
193
- help="The file path or URL of the document to add",
194
- autocompletion=complete_local_paths,
195
- ),
196
- title: str | None = typer.Option(
197
- None,
198
- "--title",
199
- help="Optional human-readable title to store with the document",
200
- ),
201
- meta: list[str] | None = typer.Option(
202
- None,
203
- "--meta",
204
- help="Metadata entries as KEY=VALUE (repeatable)",
205
- metavar="KEY=VALUE",
206
- ),
207
- db: Path = typer.Option(
208
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
209
- "--db",
210
- help="Path to the LanceDB database file",
211
- ),
212
- ):
213
- from haiku.rag.app import HaikuRAGApp
214
-
215
- app = HaikuRAGApp(db_path=db)
216
- metadata = _parse_meta_options(meta)
217
- asyncio.run(
218
- app.add_document_from_source(
219
- source=source, title=title, metadata=metadata or None
220
- )
221
- )
222
-
223
-
224
- @cli.command("get", help="Get and display a document by its ID")
225
- def get_document(
226
- doc_id: str = typer.Argument(
227
- help="The ID of the document to get",
228
- autocompletion=complete_document_ids,
229
- ),
230
- db: Path = typer.Option(
231
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
232
- "--db",
233
- help="Path to the LanceDB database file",
234
- ),
235
- ):
236
- from haiku.rag.app import HaikuRAGApp
237
-
238
- app = HaikuRAGApp(db_path=db)
239
- asyncio.run(app.get_document(doc_id=doc_id))
240
-
241
-
242
- @cli.command("delete", help="Delete a document by its ID")
243
- def delete_document(
244
- doc_id: str = typer.Argument(
245
- help="The ID of the document to delete",
246
- autocompletion=complete_document_ids,
247
- ),
248
- db: Path = typer.Option(
249
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
250
- "--db",
251
- help="Path to the LanceDB database file",
252
- ),
253
- ):
254
- from haiku.rag.app import HaikuRAGApp
255
-
256
- app = HaikuRAGApp(db_path=db)
257
- asyncio.run(app.delete_document(doc_id=doc_id))
258
-
259
-
260
- # Add alias `rm` for delete
261
- cli.command("rm", help="Alias for delete: remove a document by its ID")(delete_document)
262
-
263
-
264
- @cli.command("search", help="Search for documents by a query")
265
- def search(
266
- query: str = typer.Argument(
267
- help="The search query to use",
268
- ),
269
- limit: int = typer.Option(
270
- 5,
271
- "--limit",
272
- "-l",
273
- help="Maximum number of results to return",
274
- ),
275
- db: Path = typer.Option(
276
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
277
- "--db",
278
- help="Path to the LanceDB database file",
279
- ),
280
- ):
281
- from haiku.rag.app import HaikuRAGApp
282
-
283
- app = HaikuRAGApp(db_path=db)
284
- asyncio.run(app.search(query=query, limit=limit))
285
-
286
-
287
- @cli.command("ask", help="Ask a question using the QA agent")
288
- def ask(
289
- question: str = typer.Argument(
290
- help="The question to ask",
291
- ),
292
- db: Path = typer.Option(
293
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
294
- "--db",
295
- help="Path to the LanceDB database file",
296
- ),
297
- cite: bool = typer.Option(
298
- False,
299
- "--cite",
300
- help="Include citations in the response",
301
- ),
302
- ):
303
- from haiku.rag.app import HaikuRAGApp
304
-
305
- app = HaikuRAGApp(db_path=db)
306
- asyncio.run(app.ask(question=question, cite=cite))
307
-
308
-
309
- @cli.command("research", help="Run multi-agent research and output a concise report")
310
- def research(
311
- question: str = typer.Argument(
312
- help="The research question to investigate",
313
- ),
314
- max_iterations: int = typer.Option(
315
- 3,
316
- "--max-iterations",
317
- "-n",
318
- help="Maximum search/analyze iterations",
319
- ),
320
- confidence_threshold: float = typer.Option(
321
- 0.8,
322
- "--confidence-threshold",
323
- help="Minimum confidence (0-1) to stop",
324
- ),
325
- max_concurrency: int = typer.Option(
326
- 1,
327
- "--max-concurrency",
328
- help="Max concurrent searches per iteration (planned)",
329
- ),
330
- db: Path = typer.Option(
331
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
332
- "--db",
333
- help="Path to the LanceDB database file",
334
- ),
335
- verbose: bool = typer.Option(
336
- False,
337
- "--verbose",
338
- help="Show verbose progress output",
339
- ),
340
- ):
341
- from haiku.rag.app import HaikuRAGApp
342
-
343
- app = HaikuRAGApp(db_path=db)
344
- asyncio.run(
345
- app.research(
346
- question=question,
347
- max_iterations=max_iterations,
348
- confidence_threshold=confidence_threshold,
349
- max_concurrency=max_concurrency,
350
- verbose=verbose,
351
- )
352
- )
353
-
354
-
355
- @cli.command("settings", help="Display current configuration settings")
356
- def settings():
357
- from haiku.rag.app import HaikuRAGApp
358
-
359
- app = HaikuRAGApp(db_path=Path()) # Don't need actual DB for settings
360
- app.show_settings()
361
-
362
-
363
- @cli.command(
364
- "rebuild",
365
- help="Rebuild the database by deleting all chunks and re-indexing all documents",
366
- )
367
- def rebuild(
368
- db: Path = typer.Option(
369
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
370
- "--db",
371
- help="Path to the LanceDB database file",
372
- ),
373
- ):
374
- from haiku.rag.app import HaikuRAGApp
375
-
376
- app = HaikuRAGApp(db_path=db)
377
- asyncio.run(app.rebuild())
378
-
379
-
380
- @cli.command("vacuum", help="Optimize and clean up all tables to reduce disk usage")
381
- def vacuum(
382
- db: Path = typer.Option(
383
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
384
- "--db",
385
- help="Path to the LanceDB database file",
386
- ),
387
- ):
388
- from haiku.rag.app import HaikuRAGApp
389
-
390
- app = HaikuRAGApp(db_path=db)
391
- asyncio.run(app.vacuum())
392
-
393
-
394
- @cli.command("info", help="Show read-only database info (no upgrades or writes)")
395
- def info(
396
- db: Path = typer.Option(
397
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
398
- "--db",
399
- help="Path to the LanceDB database file",
400
- ),
401
- ):
402
- from haiku.rag.app import HaikuRAGApp
403
-
404
- app = HaikuRAGApp(db_path=db)
405
- asyncio.run(app.info())
406
-
407
-
408
- @cli.command("download-models", help="Download Docling and Ollama models per config")
409
- def download_models_cmd():
410
- from haiku.rag.utils import prefetch_models
411
-
412
- try:
413
- prefetch_models()
414
- typer.echo("Models downloaded successfully.")
415
- except Exception as e:
416
- typer.echo(f"Error downloading models: {e}")
417
- raise typer.Exit(1)
418
-
419
-
420
- @cli.command(
421
- "serve", help="Start the haiku.rag MCP server (by default in streamable HTTP mode)"
422
- )
423
- def serve(
424
- db: Path = typer.Option(
425
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
426
- "--db",
427
- help="Path to the LanceDB database file",
428
- ),
429
- stdio: bool = typer.Option(
430
- False,
431
- "--stdio",
432
- help="Run MCP server on stdio Transport",
433
- ),
434
- ) -> None:
435
- """Start the MCP server."""
436
- from haiku.rag.app import HaikuRAGApp
437
-
438
- app = HaikuRAGApp(db_path=db)
439
-
440
- transport = None
441
- if stdio:
442
- transport = "stdio"
443
-
444
- asyncio.run(app.serve(transport=transport))
445
-
446
-
447
- @cli.command("migrate", help="Migrate an SQLite database to LanceDB")
448
- def migrate(
449
- sqlite_path: Path = typer.Argument(
450
- help="Path to the SQLite database file to migrate",
451
- ),
452
- ):
453
- # Generate LanceDB path in same parent directory
454
- lancedb_path = sqlite_path.parent / (sqlite_path.stem + ".lancedb")
455
-
456
- # Lazy import to avoid heavy deps on simple invocations
457
- from haiku.rag.migration import migrate_sqlite_to_lancedb
458
-
459
- success = asyncio.run(migrate_sqlite_to_lancedb(sqlite_path, lancedb_path))
460
-
461
- if not success:
462
- raise typer.Exit(1)
463
-
464
-
465
- if __name__ == "__main__":
466
- cli()