raghilda 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- raghilda-0.1.0/PKG-INFO +95 -0
- raghilda-0.1.0/README.md +63 -0
- raghilda-0.1.0/pyproject.toml +58 -0
- raghilda-0.1.0/src/raghilda/__init__.py +12 -0
- raghilda-0.1.0/src/raghilda/_attribute_filters.py +518 -0
- raghilda-0.1.0/src/raghilda/_attribute_schema.py +757 -0
- raghilda-0.1.0/src/raghilda/_attributes.py +65 -0
- raghilda-0.1.0/src/raghilda/_chroma_store.py +981 -0
- raghilda-0.1.0/src/raghilda/_chunker.py +377 -0
- raghilda-0.1.0/src/raghilda/_deoverlap.py +472 -0
- raghilda-0.1.0/src/raghilda/_duckdb_store.py +1373 -0
- raghilda-0.1.0/src/raghilda/_embedding.py +543 -0
- raghilda-0.1.0/src/raghilda/_embedding_sentence_transformers.py +141 -0
- raghilda-0.1.0/src/raghilda/_openai_store.py +613 -0
- raghilda-0.1.0/src/raghilda/_store.py +109 -0
- raghilda-0.1.0/src/raghilda/_store_metadata.py +71 -0
- raghilda-0.1.0/src/raghilda/_types.py +54 -0
- raghilda-0.1.0/src/raghilda/chunk.py +181 -0
- raghilda-0.1.0/src/raghilda/chunker.py +4 -0
- raghilda-0.1.0/src/raghilda/document.py +237 -0
- raghilda-0.1.0/src/raghilda/embedding.py +19 -0
- raghilda-0.1.0/src/raghilda/read.py +246 -0
- raghilda-0.1.0/src/raghilda/scrape.py +227 -0
- raghilda-0.1.0/src/raghilda/store.py +6 -0
- raghilda-0.1.0/src/raghilda/types.py +23 -0
raghilda-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: raghilda
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: RAG made simple
|
|
5
|
+
Author: Daniel Falbel, Tomasz Kalinowski
|
|
6
|
+
Author-email: Daniel Falbel <daniel@posit.co>, Tomasz Kalinowski <tomasz@posit.co>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Requires-Dist: duckdb>=1.3.2
|
|
9
|
+
Requires-Dist: openai>=1.104.2
|
|
10
|
+
Requires-Dist: requests>=2.32.5
|
|
11
|
+
Requires-Dist: commonmark>=0.9.1
|
|
12
|
+
Requires-Dist: markitdown>=0.1.3
|
|
13
|
+
Requires-Dist: tqdm>=4.67.1
|
|
14
|
+
Requires-Dist: chromadb>=1.0.0 ; extra == 'chromadb'
|
|
15
|
+
Requires-Dist: chatlas>=0.2.0 ; extra == 'examples'
|
|
16
|
+
Requires-Dist: python-dotenv>=1.0.0 ; extra == 'examples'
|
|
17
|
+
Requires-Dist: sentence-transformers>=3.0.0 ; extra == 'sentence-transformers'
|
|
18
|
+
Requires-Dist: pyright>=1.1.405 ; extra == 'test'
|
|
19
|
+
Requires-Dist: pytest>=8.4.1 ; extra == 'test'
|
|
20
|
+
Requires-Dist: ruff>=0.12.11 ; extra == 'test'
|
|
21
|
+
Requires-Dist: chonkie>=1.0.0 ; extra == 'test'
|
|
22
|
+
Requires-Dist: cohere>=5.0.0 ; extra == 'test'
|
|
23
|
+
Requires-Dist: chromadb>=1.0.0 ; extra == 'test'
|
|
24
|
+
Requires-Dist: sentence-transformers>=3.0.0 ; extra == 'test'
|
|
25
|
+
Requires-Python: >=3.11, <3.14
|
|
26
|
+
Project-URL: Repository, https://github.com/dfalbel/raghilda
|
|
27
|
+
Provides-Extra: chromadb
|
|
28
|
+
Provides-Extra: examples
|
|
29
|
+
Provides-Extra: sentence-transformers
|
|
30
|
+
Provides-Extra: test
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
# raghilda <img src="assets/raghilda-logo.png" align="right" width="140" alt="raghilda hex logo" />
|
|
34
|
+
|
|
35
|
+
RAG made simple.
|
|
36
|
+
|
|
37
|
+
raghilda is a Python package for implementing Retrieval-Augmented Generation (RAG) workflows. It provides a complete solution with sensible defaults while remaining transparent—not a black box.
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install raghilda
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Or install from GitHub:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install git+https://github.com/dfalbel/raghilda.git
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Key Steps
|
|
52
|
+
|
|
53
|
+
raghilda handles the complete RAG pipeline:
|
|
54
|
+
|
|
55
|
+
1. **Document Processing** — Convert documents to Markdown using MarkItDown
|
|
56
|
+
2. **Text Chunking** — Split text at semantic boundaries (headings, paragraphs, sentences)
|
|
57
|
+
3. **Embedding** — Generate vector representations via OpenAI or other providers
|
|
58
|
+
4. **Storage** — Store chunks and embeddings in DuckDB, ChromaDB, or OpenAI Vector Stores
|
|
59
|
+
5. **Retrieval** — Find relevant chunks using similarity search or BM25
|
|
60
|
+
|
|
61
|
+
## Usage
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from raghilda.store import DuckDBStore
|
|
65
|
+
from raghilda.embedding import EmbeddingOpenAI
|
|
66
|
+
from raghilda.scrape import find_links
|
|
67
|
+
from raghilda.read import read_as_markdown
|
|
68
|
+
from raghilda.chunker import MarkdownChunker
|
|
69
|
+
|
|
70
|
+
# Create a store with embeddings
|
|
71
|
+
store = DuckDBStore.create(
|
|
72
|
+
location="chatlas.db",
|
|
73
|
+
embed=EmbeddingOpenAI(),
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Find and index pages from the chatlas documentation
|
|
77
|
+
links = find_links("https://posit-dev.github.io/chatlas/")
|
|
78
|
+
chunker = MarkdownChunker()
|
|
79
|
+
|
|
80
|
+
for link in links:
|
|
81
|
+
document = read_as_markdown(link)
|
|
82
|
+
chunked_document = chunker.chunk(document)
|
|
83
|
+
store.upsert(chunked_document)
|
|
84
|
+
|
|
85
|
+
# Retrieve relevant chunks
|
|
86
|
+
chunks = store.retrieve("How do I stream a response?", top_k=5)
|
|
87
|
+
for chunk in chunks:
|
|
88
|
+
print(chunk.text)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Links
|
|
92
|
+
|
|
93
|
+
- [Documentation](https://dfalbel.github.io/raghilda/)
|
|
94
|
+
- [Source Code](https://github.com/dfalbel/raghilda)
|
|
95
|
+
- [Report Issues](https://github.com/dfalbel/raghilda/issues)
|
raghilda-0.1.0/README.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# raghilda <img src="assets/raghilda-logo.png" align="right" width="140" alt="raghilda hex logo" />
|
|
2
|
+
|
|
3
|
+
RAG made simple.
|
|
4
|
+
|
|
5
|
+
raghilda is a Python package for implementing Retrieval-Augmented Generation (RAG) workflows. It provides a complete solution with sensible defaults while remaining transparent—not a black box.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install raghilda
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Or install from GitHub:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install git+https://github.com/dfalbel/raghilda.git
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Key Steps
|
|
20
|
+
|
|
21
|
+
raghilda handles the complete RAG pipeline:
|
|
22
|
+
|
|
23
|
+
1. **Document Processing** — Convert documents to Markdown using MarkItDown
|
|
24
|
+
2. **Text Chunking** — Split text at semantic boundaries (headings, paragraphs, sentences)
|
|
25
|
+
3. **Embedding** — Generate vector representations via OpenAI or other providers
|
|
26
|
+
4. **Storage** — Store chunks and embeddings in DuckDB, ChromaDB, or OpenAI Vector Stores
|
|
27
|
+
5. **Retrieval** — Find relevant chunks using similarity search or BM25
|
|
28
|
+
|
|
29
|
+
## Usage
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from raghilda.store import DuckDBStore
|
|
33
|
+
from raghilda.embedding import EmbeddingOpenAI
|
|
34
|
+
from raghilda.scrape import find_links
|
|
35
|
+
from raghilda.read import read_as_markdown
|
|
36
|
+
from raghilda.chunker import MarkdownChunker
|
|
37
|
+
|
|
38
|
+
# Create a store with embeddings
|
|
39
|
+
store = DuckDBStore.create(
|
|
40
|
+
location="chatlas.db",
|
|
41
|
+
embed=EmbeddingOpenAI(),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Find and index pages from the chatlas documentation
|
|
45
|
+
links = find_links("https://posit-dev.github.io/chatlas/")
|
|
46
|
+
chunker = MarkdownChunker()
|
|
47
|
+
|
|
48
|
+
for link in links:
|
|
49
|
+
document = read_as_markdown(link)
|
|
50
|
+
chunked_document = chunker.chunk(document)
|
|
51
|
+
store.upsert(chunked_document)
|
|
52
|
+
|
|
53
|
+
# Retrieve relevant chunks
|
|
54
|
+
chunks = store.retrieve("How do I stream a response?", top_k=5)
|
|
55
|
+
for chunk in chunks:
|
|
56
|
+
print(chunk.text)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Links
|
|
60
|
+
|
|
61
|
+
- [Documentation](https://dfalbel.github.io/raghilda/)
|
|
62
|
+
- [Source Code](https://github.com/dfalbel/raghilda)
|
|
63
|
+
- [Report Issues](https://github.com/dfalbel/raghilda/issues)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "raghilda"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "RAG made simple"
|
|
5
|
+
license = "MIT"
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
authors = [
|
|
8
|
+
{ name = "Daniel Falbel", email = "daniel@posit.co" },
|
|
9
|
+
{ name = "Tomasz Kalinowski", email = "tomasz@posit.co" },
|
|
10
|
+
]
|
|
11
|
+
requires-python = ">=3.11, <3.14"
|
|
12
|
+
dependencies = [
|
|
13
|
+
"duckdb>=1.3.2",
|
|
14
|
+
"openai>=1.104.2",
|
|
15
|
+
"requests>=2.32.5",
|
|
16
|
+
"commonmark>=0.9.1",
|
|
17
|
+
"markitdown>=0.1.3",
|
|
18
|
+
"tqdm>=4.67.1",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.urls]
|
|
22
|
+
Repository = "https://github.com/dfalbel/raghilda"
|
|
23
|
+
|
|
24
|
+
[build-system]
|
|
25
|
+
requires = ["uv_build>=0.8.0,<0.9"]
|
|
26
|
+
build-backend = "uv_build"
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
test = ["pyright>=1.1.405", "pytest>=8.4.1", "ruff>=0.12.11", "chonkie>=1.0.0", "cohere>=5.0.0", "chromadb>=1.0.0", "sentence-transformers>=3.0.0"]
|
|
30
|
+
examples = ["chatlas>=0.2.0", "python-dotenv>=1.0.0"]
|
|
31
|
+
chromadb = ["chromadb>=1.0.0"]
|
|
32
|
+
sentence-transformers = ["sentence-transformers>=3.0.0"]
|
|
33
|
+
|
|
34
|
+
[dependency-groups]
|
|
35
|
+
dev = [
|
|
36
|
+
"dotenv>=0.9.9",
|
|
37
|
+
"great-docs",
|
|
38
|
+
"griffe>=1.5.0,<2.0",
|
|
39
|
+
"taskipy>=1.14.1",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[tool.taskipy.tasks]
|
|
43
|
+
docs_build = { cmd = "./.venv/bin/great-docs build", help = "build docs" }
|
|
44
|
+
docs_preview = { cmd = "./.venv/bin/great-docs preview", help = "build docs and launch preview" }
|
|
45
|
+
docs = { cmd = "./.venv/bin/great-docs preview", help = "build docs and launch preview" }
|
|
46
|
+
tests = { cmd = "./.venv/bin/pytest tests src", help = "run pytest suite" }
|
|
47
|
+
types_check = { cmd = "./.venv/bin/pyright --pythonpath ./.venv/bin/python", help = "run pyright" }
|
|
48
|
+
format = { cmd = "./.venv/bin/ruff format src tests", help = "format code" }
|
|
49
|
+
format_check = { cmd = "./.venv/bin/ruff format --check src tests", help = "format check" }
|
|
50
|
+
lint_check = { cmd = "./.venv/bin/ruff check", help = "ruff lint" }
|
|
51
|
+
lint = { cmd = "./.venv/bin/ruff check --fix", help = "ruff lint --fix" }
|
|
52
|
+
check = { cmd = "./.venv/bin/ruff format --check src tests && ./.venv/bin/ruff check && ./.venv/bin/pyright --pythonpath ./.venv/bin/python && ./.venv/bin/pytest tests src", help = "format+lint+types+tests" }
|
|
53
|
+
|
|
54
|
+
[tool.pyright]
|
|
55
|
+
exclude = [".venv", ".pytest_cache", ".ruff_cache", "great-docs"]
|
|
56
|
+
|
|
57
|
+
[tool.uv.sources]
|
|
58
|
+
great-docs = { git = "https://github.com/rich-iannone/great-docs.git" }
|