haiku.rag 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- haiku/rag/cli.py +17 -1
- haiku/rag/reader.py +71 -14
- haiku/rag/store/engine.py +5 -0
- {haiku_rag-0.4.2.dist-info → haiku_rag-0.4.3.dist-info}/METADATA +3 -3
- {haiku_rag-0.4.2.dist-info → haiku_rag-0.4.3.dist-info}/RECORD +8 -8
- {haiku_rag-0.4.2.dist-info → haiku_rag-0.4.3.dist-info}/WHEEL +0 -0
- {haiku_rag-0.4.2.dist-info → haiku_rag-0.4.3.dist-info}/entry_points.txt +0 -0
- {haiku_rag-0.4.2.dist-info → haiku_rag-0.4.3.dist-info}/licenses/LICENSE +0 -0
haiku/rag/cli.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
from importlib.metadata import version
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
|
|
4
5
|
import typer
|
|
@@ -26,8 +27,23 @@ async def check_version():
|
|
|
26
27
|
console.print("[yellow]Please update.[/yellow]")
|
|
27
28
|
|
|
28
29
|
|
|
30
|
+
def version_callback(value: bool):
|
|
31
|
+
if value:
|
|
32
|
+
v = version("haiku.rag")
|
|
33
|
+
console.print(f"haiku.rag version {v}")
|
|
34
|
+
raise typer.Exit()
|
|
35
|
+
|
|
36
|
+
|
|
29
37
|
@cli.callback()
|
|
30
|
-
def main(
|
|
38
|
+
def main(
|
|
39
|
+
_version: bool = typer.Option(
|
|
40
|
+
False,
|
|
41
|
+
"-v",
|
|
42
|
+
"--version",
|
|
43
|
+
callback=version_callback,
|
|
44
|
+
help="Show version and exit",
|
|
45
|
+
),
|
|
46
|
+
):
|
|
31
47
|
"""haiku.rag CLI - SQLite-based RAG system"""
|
|
32
48
|
# Run version check before any command
|
|
33
49
|
event_loop.run_until_complete(check_version())
|
haiku/rag/reader.py
CHANGED
|
@@ -1,32 +1,45 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
from typing import ClassVar
|
|
3
3
|
|
|
4
|
-
from
|
|
4
|
+
from docling.document_converter import DocumentConverter
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class FileReader:
|
|
8
|
-
|
|
8
|
+
# Extensions supported by docling
|
|
9
|
+
docling_extensions: ClassVar[list[str]] = [
|
|
10
|
+
".asciidoc",
|
|
11
|
+
".bmp",
|
|
12
|
+
".csv",
|
|
13
|
+
".docx",
|
|
14
|
+
".html",
|
|
15
|
+
".xhtml",
|
|
16
|
+
".jpeg",
|
|
17
|
+
".jpg",
|
|
18
|
+
".md",
|
|
19
|
+
".pdf.png",
|
|
20
|
+
".pptx",
|
|
21
|
+
".tiff",
|
|
22
|
+
".xlsx",
|
|
23
|
+
".xml",
|
|
24
|
+
".webp",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
# Plain text extensions that we'll read directly
|
|
28
|
+
text_extensions: ClassVar[list[str]] = [
|
|
9
29
|
".astro",
|
|
10
30
|
".c",
|
|
11
31
|
".cpp",
|
|
12
32
|
".css",
|
|
13
|
-
".csv",
|
|
14
|
-
".docx",
|
|
15
33
|
".go",
|
|
16
34
|
".h",
|
|
17
35
|
".hpp",
|
|
18
|
-
".html",
|
|
19
36
|
".java",
|
|
20
37
|
".js",
|
|
21
38
|
".json",
|
|
22
39
|
".kt",
|
|
23
|
-
".md",
|
|
24
40
|
".mdx",
|
|
25
41
|
".mjs",
|
|
26
|
-
".mp3",
|
|
27
|
-
".pdf",
|
|
28
42
|
".php",
|
|
29
|
-
".pptx",
|
|
30
43
|
".py",
|
|
31
44
|
".rb",
|
|
32
45
|
".rs",
|
|
@@ -36,17 +49,61 @@ class FileReader:
|
|
|
36
49
|
".tsx",
|
|
37
50
|
".txt",
|
|
38
51
|
".vue",
|
|
39
|
-
".wav",
|
|
40
|
-
".xml",
|
|
41
|
-
".xlsx",
|
|
42
52
|
".yaml",
|
|
43
53
|
".yml",
|
|
44
54
|
]
|
|
45
55
|
|
|
56
|
+
# Code file extensions with their markdown language identifiers for syntax highlighting
|
|
57
|
+
code_markdown_identifier: ClassVar[dict[str, str]] = {
|
|
58
|
+
".astro": "astro",
|
|
59
|
+
".c": "c",
|
|
60
|
+
".cpp": "cpp",
|
|
61
|
+
".css": "css",
|
|
62
|
+
".go": "go",
|
|
63
|
+
".h": "c",
|
|
64
|
+
".hpp": "cpp",
|
|
65
|
+
".java": "java",
|
|
66
|
+
".js": "javascript",
|
|
67
|
+
".json": "json",
|
|
68
|
+
".kt": "kotlin",
|
|
69
|
+
".mjs": "javascript",
|
|
70
|
+
".php": "php",
|
|
71
|
+
".py": "python",
|
|
72
|
+
".rb": "ruby",
|
|
73
|
+
".rs": "rust",
|
|
74
|
+
".svelte": "svelte",
|
|
75
|
+
".swift": "swift",
|
|
76
|
+
".ts": "typescript",
|
|
77
|
+
".tsx": "tsx",
|
|
78
|
+
".vue": "vue",
|
|
79
|
+
".yaml": "yaml",
|
|
80
|
+
".yml": "yaml",
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
extensions: ClassVar[list[str]] = docling_extensions + text_extensions
|
|
84
|
+
|
|
46
85
|
@staticmethod
|
|
47
86
|
def parse_file(path: Path) -> str:
|
|
48
87
|
try:
|
|
49
|
-
|
|
50
|
-
|
|
88
|
+
file_extension = path.suffix.lower()
|
|
89
|
+
|
|
90
|
+
if file_extension in FileReader.docling_extensions:
|
|
91
|
+
# Use docling for complex document formats
|
|
92
|
+
converter = DocumentConverter()
|
|
93
|
+
result = converter.convert(path)
|
|
94
|
+
return result.document.export_to_markdown()
|
|
95
|
+
elif file_extension in FileReader.text_extensions:
|
|
96
|
+
# Read plain text files directly
|
|
97
|
+
content = path.read_text(encoding="utf-8")
|
|
98
|
+
|
|
99
|
+
# Wrap code files (but not plain txt) in markdown code blocks for better presentation
|
|
100
|
+
if file_extension in FileReader.code_markdown_identifier:
|
|
101
|
+
language = FileReader.code_markdown_identifier[file_extension]
|
|
102
|
+
return f"```{language}\n{content}\n```"
|
|
103
|
+
|
|
104
|
+
return content
|
|
105
|
+
else:
|
|
106
|
+
# Fallback: try to read as text
|
|
107
|
+
return path.read_text(encoding="utf-8")
|
|
51
108
|
except Exception:
|
|
52
109
|
raise ValueError(f"Failed to parse file: {path}")
|
haiku/rag/store/engine.py
CHANGED
|
@@ -37,6 +37,11 @@ class Store:
|
|
|
37
37
|
db = sqlite3.connect(self.db_path)
|
|
38
38
|
db.enable_load_extension(True)
|
|
39
39
|
sqlite_vec.load(db)
|
|
40
|
+
|
|
41
|
+
# Enable WAL mode for better concurrency (skip for in-memory databases)
|
|
42
|
+
if self.db_path != ":memory:":
|
|
43
|
+
db.execute("PRAGMA journal_mode=WAL")
|
|
44
|
+
|
|
40
45
|
self._connection = db
|
|
41
46
|
existing_tables = [
|
|
42
47
|
row[0]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: Retrieval Augmented Generation (RAG) with SQLite
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -18,9 +18,9 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Typing :: Typed
|
|
20
20
|
Requires-Python: >=3.10
|
|
21
|
+
Requires-Dist: docling>=2.15.0
|
|
21
22
|
Requires-Dist: fastmcp>=2.8.1
|
|
22
23
|
Requires-Dist: httpx>=0.28.1
|
|
23
|
-
Requires-Dist: markitdown[audio-transcription,docx,pdf,pptx,xlsx]>=0.1.2
|
|
24
24
|
Requires-Dist: mxbai-rerank>=0.1.6
|
|
25
25
|
Requires-Dist: ollama>=0.5.1
|
|
26
26
|
Requires-Dist: pydantic>=2.11.7
|
|
@@ -55,7 +55,7 @@ Retrieval-Augmented Generation (RAG) library on SQLite.
|
|
|
55
55
|
- **Reranking**: Default search result reranking with MixedBread AI or Cohere
|
|
56
56
|
- **Question answering**: Built-in QA agents on your documents
|
|
57
57
|
- **File monitoring**: Auto-index files when run as server
|
|
58
|
-
- **40+ file formats**: PDF, DOCX, HTML, Markdown,
|
|
58
|
+
- **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
|
|
59
59
|
- **MCP server**: Expose as tools for AI assistants
|
|
60
60
|
- **CLI & Python API**: Use from command line or Python
|
|
61
61
|
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
haiku/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
haiku/rag/app.py,sha256=FpLVyP1-zAq_XPmU8CPVLkuIAeuhBOGvMqhYS8RbN40,7649
|
|
3
3
|
haiku/rag/chunker.py,sha256=MbCtP66OfTFoIBvqmVT9T9c87fozsYYzAQzJJEfPBVI,1812
|
|
4
|
-
haiku/rag/cli.py,sha256=
|
|
4
|
+
haiku/rag/cli.py,sha256=k7EhLkvTncxsdh5TYrg8BHLYh_lfyzupsWGj1dEEdqY,5992
|
|
5
5
|
haiku/rag/client.py,sha256=MZNIpMm6MS3P6vjLqiCztT2dBOM7-bZOosX5IpbHJbI,12724
|
|
6
6
|
haiku/rag/config.py,sha256=_Ss54kmfxVAJupExLKaYjYUlFxJgb7hEEdbG4-isapY,1662
|
|
7
7
|
haiku/rag/logging.py,sha256=zTTGpGq5tPdcd7RpCbd9EGw1IZlQDbYkrCg9t9pqRc4,580
|
|
8
8
|
haiku/rag/mcp.py,sha256=tMN6fNX7ZtAER1R6DL1GkC9HZozTC4HzuQs199p7icI,4551
|
|
9
9
|
haiku/rag/monitor.py,sha256=r386nkhdlsU8UECwIuVwnrSlgMk3vNIuUZGNIzkZuec,2770
|
|
10
|
-
haiku/rag/reader.py,sha256=
|
|
10
|
+
haiku/rag/reader.py,sha256=dLz3yyc5r8dzdqCc2VViC3fADpScw4lxXueKiu-cI7c,2915
|
|
11
11
|
haiku/rag/utils.py,sha256=Ez_tvNlRO_D8c2CBZ83Hs9Gmzcqdq4cmw_V5GBdKy_8,2214
|
|
12
12
|
haiku/rag/embeddings/__init__.py,sha256=yFBlxS0jBiVHl_rWz5kb43t6Ha132U1ZGdlIPfhzPdg,1491
|
|
13
13
|
haiku/rag/embeddings/base.py,sha256=NTQvuzbZPu0LBo5wAu3qGyJ4xXUaRAt1fjBO0ygWn_Y,465
|
|
@@ -25,7 +25,7 @@ haiku/rag/reranking/base.py,sha256=LM9yUSSJ414UgBZhFTgxGprlRqzfTe4I1vgjricz2JY,4
|
|
|
25
25
|
haiku/rag/reranking/cohere.py,sha256=1iTdiaa8vvb6oHVB2qpWzUOVkyfUcimVSZp6Qr4aq4c,1049
|
|
26
26
|
haiku/rag/reranking/mxbai.py,sha256=46sVTsTIkzIX9THgM3u8HaEmgY7evvEyB-N54JTHvK8,867
|
|
27
27
|
haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
|
|
28
|
-
haiku/rag/store/engine.py,sha256=
|
|
28
|
+
haiku/rag/store/engine.py,sha256=cOMBToLilI1Di1qQrFzGLqtRMsuvtiX0Q5RNIEzQy9w,6232
|
|
29
29
|
haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
|
|
30
30
|
haiku/rag/store/models/chunk.py,sha256=9-vIxW75-kMTelIhgVIMd_WhP-Drc1q65vjaWMP8w1E,364
|
|
31
31
|
haiku/rag/store/models/document.py,sha256=TVXVY-nQs-1vCORQEs9rA7zOtndeGC4dgCoujLAS054,396
|
|
@@ -36,8 +36,8 @@ haiku/rag/store/repositories/document.py,sha256=fXIWevJaOe6x2cK4u9cQxiEGD0ntKQb9
|
|
|
36
36
|
haiku/rag/store/repositories/settings.py,sha256=dme3_ulQdQvyF9daavSjAd-SjZ5hh0MJoxP7iXgap-A,2492
|
|
37
37
|
haiku/rag/store/upgrades/__init__.py,sha256=kKS1YWT_P-CYKhKtokOLTIFNKf9jlfjFFr8lyIMeogM,100
|
|
38
38
|
haiku/rag/store/upgrades/v0_3_4.py,sha256=GLogKZdZ40NX1vBHKdOJju7fFzNUCHoEnjSZg17Hm2U,663
|
|
39
|
-
haiku_rag-0.4.
|
|
40
|
-
haiku_rag-0.4.
|
|
41
|
-
haiku_rag-0.4.
|
|
42
|
-
haiku_rag-0.4.
|
|
43
|
-
haiku_rag-0.4.
|
|
39
|
+
haiku_rag-0.4.3.dist-info/METADATA,sha256=T2ZHdGL_zd1eSfEjFolh3R_zJpuWmUhKsnNkYLKtT7E,4198
|
|
40
|
+
haiku_rag-0.4.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
41
|
+
haiku_rag-0.4.3.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
|
|
42
|
+
haiku_rag-0.4.3.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
|
|
43
|
+
haiku_rag-0.4.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|