raghilda 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,95 @@
1
+ Metadata-Version: 2.4
2
+ Name: raghilda
3
+ Version: 0.1.0
4
+ Summary: RAG made simple
5
+ Author: Daniel Falbel, Tomasz Kalinowski
6
+ Author-email: Daniel Falbel <daniel@posit.co>, Tomasz Kalinowski <tomasz@posit.co>
7
+ License-Expression: MIT
8
+ Requires-Dist: duckdb>=1.3.2
9
+ Requires-Dist: openai>=1.104.2
10
+ Requires-Dist: requests>=2.32.5
11
+ Requires-Dist: commonmark>=0.9.1
12
+ Requires-Dist: markitdown>=0.1.3
13
+ Requires-Dist: tqdm>=4.67.1
14
+ Requires-Dist: chromadb>=1.0.0 ; extra == 'chromadb'
15
+ Requires-Dist: chatlas>=0.2.0 ; extra == 'examples'
16
+ Requires-Dist: python-dotenv>=1.0.0 ; extra == 'examples'
17
+ Requires-Dist: sentence-transformers>=3.0.0 ; extra == 'sentence-transformers'
18
+ Requires-Dist: pyright>=1.1.405 ; extra == 'test'
19
+ Requires-Dist: pytest>=8.4.1 ; extra == 'test'
20
+ Requires-Dist: ruff>=0.12.11 ; extra == 'test'
21
+ Requires-Dist: chonkie>=1.0.0 ; extra == 'test'
22
+ Requires-Dist: cohere>=5.0.0 ; extra == 'test'
23
+ Requires-Dist: chromadb>=1.0.0 ; extra == 'test'
24
+ Requires-Dist: sentence-transformers>=3.0.0 ; extra == 'test'
25
+ Requires-Python: >=3.11, <3.14
26
+ Project-URL: Repository, https://github.com/dfalbel/raghilda
27
+ Provides-Extra: chromadb
28
+ Provides-Extra: examples
29
+ Provides-Extra: sentence-transformers
30
+ Provides-Extra: test
31
+ Description-Content-Type: text/markdown
32
+
33
+ # raghilda <img src="assets/raghilda-logo.png" align="right" width="140" alt="raghilda hex logo" />
34
+
35
+ RAG made simple.
36
+
37
+ raghilda is a Python package for implementing Retrieval-Augmented Generation (RAG) workflows. It provides a complete solution with sensible defaults while remaining transparent—not a black box.
38
+
39
+ ## Installation
40
+
41
+ ```bash
42
+ pip install raghilda
43
+ ```
44
+
45
+ Or install from GitHub:
46
+
47
+ ```bash
48
+ pip install git+https://github.com/dfalbel/raghilda.git
49
+ ```
50
+
51
+ ## Key Steps
52
+
53
+ raghilda handles the complete RAG pipeline:
54
+
55
+ 1. **Document Processing** — Convert documents to Markdown using MarkItDown
56
+ 2. **Text Chunking** — Split text at semantic boundaries (headings, paragraphs, sentences)
57
+ 3. **Embedding** — Generate vector representations via OpenAI or other providers
58
+ 4. **Storage** — Store chunks and embeddings in DuckDB, ChromaDB, or OpenAI Vector Stores
59
+ 5. **Retrieval** — Find relevant chunks using similarity search or BM25
60
+
61
+ ## Usage
62
+
63
+ ```python
64
+ from raghilda.store import DuckDBStore
65
+ from raghilda.embedding import EmbeddingOpenAI
66
+ from raghilda.scrape import find_links
67
+ from raghilda.read import read_as_markdown
68
+ from raghilda.chunker import MarkdownChunker
69
+
70
+ # Create a store with embeddings
71
+ store = DuckDBStore.create(
72
+ location="chatlas.db",
73
+ embed=EmbeddingOpenAI(),
74
+ )
75
+
76
+ # Find and index pages from the chatlas documentation
77
+ links = find_links("https://posit-dev.github.io/chatlas/")
78
+ chunker = MarkdownChunker()
79
+
80
+ for link in links:
81
+ document = read_as_markdown(link)
82
+ chunked_document = chunker.chunk(document)
83
+ store.upsert(chunked_document)
84
+
85
+ # Retrieve relevant chunks
86
+ chunks = store.retrieve("How do I stream a response?", top_k=5)
87
+ for chunk in chunks:
88
+ print(chunk.text)
89
+ ```
90
+
91
+ ## Links
92
+
93
+ - [Documentation](https://dfalbel.github.io/raghilda/)
94
+ - [Source Code](https://github.com/dfalbel/raghilda)
95
+ - [Report Issues](https://github.com/dfalbel/raghilda/issues)
@@ -0,0 +1,63 @@
1
+ # raghilda <img src="assets/raghilda-logo.png" align="right" width="140" alt="raghilda hex logo" />
2
+
3
+ RAG made simple.
4
+
5
+ raghilda is a Python package for implementing Retrieval-Augmented Generation (RAG) workflows. It provides a complete solution with sensible defaults while remaining transparent—not a black box.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install raghilda
11
+ ```
12
+
13
+ Or install from GitHub:
14
+
15
+ ```bash
16
+ pip install git+https://github.com/dfalbel/raghilda.git
17
+ ```
18
+
19
+ ## Key Steps
20
+
21
+ raghilda handles the complete RAG pipeline:
22
+
23
+ 1. **Document Processing** — Convert documents to Markdown using MarkItDown
24
+ 2. **Text Chunking** — Split text at semantic boundaries (headings, paragraphs, sentences)
25
+ 3. **Embedding** — Generate vector representations via OpenAI or other providers
26
+ 4. **Storage** — Store chunks and embeddings in DuckDB, ChromaDB, or OpenAI Vector Stores
27
+ 5. **Retrieval** — Find relevant chunks using similarity search or BM25
28
+
29
+ ## Usage
30
+
31
+ ```python
32
+ from raghilda.store import DuckDBStore
33
+ from raghilda.embedding import EmbeddingOpenAI
34
+ from raghilda.scrape import find_links
35
+ from raghilda.read import read_as_markdown
36
+ from raghilda.chunker import MarkdownChunker
37
+
38
+ # Create a store with embeddings
39
+ store = DuckDBStore.create(
40
+ location="chatlas.db",
41
+ embed=EmbeddingOpenAI(),
42
+ )
43
+
44
+ # Find and index pages from the chatlas documentation
45
+ links = find_links("https://posit-dev.github.io/chatlas/")
46
+ chunker = MarkdownChunker()
47
+
48
+ for link in links:
49
+ document = read_as_markdown(link)
50
+ chunked_document = chunker.chunk(document)
51
+ store.upsert(chunked_document)
52
+
53
+ # Retrieve relevant chunks
54
+ chunks = store.retrieve("How do I stream a response?", top_k=5)
55
+ for chunk in chunks:
56
+ print(chunk.text)
57
+ ```
58
+
59
+ ## Links
60
+
61
+ - [Documentation](https://dfalbel.github.io/raghilda/)
62
+ - [Source Code](https://github.com/dfalbel/raghilda)
63
+ - [Report Issues](https://github.com/dfalbel/raghilda/issues)
@@ -0,0 +1,58 @@
1
+ [project]
2
+ name = "raghilda"
3
+ version = "0.1.0"
4
+ description = "RAG made simple"
5
+ license = "MIT"
6
+ readme = "README.md"
7
+ authors = [
8
+ { name = "Daniel Falbel", email = "daniel@posit.co" },
9
+ { name = "Tomasz Kalinowski", email = "tomasz@posit.co" },
10
+ ]
11
+ requires-python = ">=3.11, <3.14"
12
+ dependencies = [
13
+ "duckdb>=1.3.2",
14
+ "openai>=1.104.2",
15
+ "requests>=2.32.5",
16
+ "commonmark>=0.9.1",
17
+ "markitdown>=0.1.3",
18
+ "tqdm>=4.67.1",
19
+ ]
20
+
21
+ [project.urls]
22
+ Repository = "https://github.com/dfalbel/raghilda"
23
+
24
+ [build-system]
25
+ requires = ["uv_build>=0.8.0,<0.9"]
26
+ build-backend = "uv_build"
27
+
28
+ [project.optional-dependencies]
29
+ test = ["pyright>=1.1.405", "pytest>=8.4.1", "ruff>=0.12.11", "chonkie>=1.0.0", "cohere>=5.0.0", "chromadb>=1.0.0", "sentence-transformers>=3.0.0"]
30
+ examples = ["chatlas>=0.2.0", "python-dotenv>=1.0.0"]
31
+ chromadb = ["chromadb>=1.0.0"]
32
+ sentence-transformers = ["sentence-transformers>=3.0.0"]
33
+
34
+ [dependency-groups]
35
+ dev = [
36
+ "dotenv>=0.9.9",
37
+ "great-docs",
38
+ "griffe>=1.5.0,<2.0",
39
+ "taskipy>=1.14.1",
40
+ ]
41
+
42
+ [tool.taskipy.tasks]
43
+ docs_build = { cmd = "./.venv/bin/great-docs build", help = "build docs" }
44
+ docs_preview = { cmd = "./.venv/bin/great-docs preview", help = "build docs and launch preview" }
45
+ docs = { cmd = "./.venv/bin/great-docs preview", help = "build docs and launch preview" }
46
+ tests = { cmd = "./.venv/bin/pytest tests src", help = "run pytest suite" }
47
+ types_check = { cmd = "./.venv/bin/pyright --pythonpath ./.venv/bin/python", help = "run pyright" }
48
+ format = { cmd = "./.venv/bin/ruff format src tests", help = "format code" }
49
+ format_check = { cmd = "./.venv/bin/ruff format --check src tests", help = "format check" }
50
+ lint_check = { cmd = "./.venv/bin/ruff check", help = "ruff lint" }
51
+ lint = { cmd = "./.venv/bin/ruff check --fix", help = "ruff lint --fix" }
52
+ check = { cmd = "./.venv/bin/ruff format --check src tests && ./.venv/bin/ruff check && ./.venv/bin/pyright --pythonpath ./.venv/bin/python && ./.venv/bin/pytest tests src", help = "format+lint+types+tests" }
53
+
54
+ [tool.pyright]
55
+ exclude = [".venv", ".pytest_cache", ".ruff_cache", "great-docs"]
56
+
57
+ [tool.uv.sources]
58
+ great-docs = { git = "https://github.com/rich-iannone/great-docs.git" }
@@ -0,0 +1,12 @@
1
+ from . import embedding, store, types, chunk, chunker, document, read, scrape
2
+
3
+ __all__ = [
4
+ "embedding",
5
+ "store",
6
+ "types",
7
+ "chunk",
8
+ "chunker",
9
+ "document",
10
+ "read",
11
+ "scrape",
12
+ ]