rag-python 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rag_python-0.1.0/src/rag_python.egg-info → rag_python-0.2.0}/PKG-INFO +7 -3
- {rag_python-0.1.0 → rag_python-0.2.0}/README.md +3 -1
- {rag_python-0.1.0 → rag_python-0.2.0}/pyproject.toml +3 -2
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/__init__.py +1 -1
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/cli.py +18 -3
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/providers/factory.py +4 -1
- rag_python-0.2.0/src/rag_python/providers/local_provider.py +34 -0
- {rag_python-0.1.0 → rag_python-0.2.0/src/rag_python.egg-info}/PKG-INFO +7 -3
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python.egg-info/SOURCES.txt +6 -1
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python.egg-info/requires.txt +4 -1
- rag_python-0.2.0/tests/test_chunking.py +25 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/tests/test_import.py +1 -1
- rag_python-0.2.0/tests/test_loaders.py +28 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/tests/test_package.py +1 -1
- rag_python-0.2.0/tests/test_pipeline.py +55 -0
- rag_python-0.2.0/tests/test_providers.py +6 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/LICENSE +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/setup.cfg +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/chunking.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/cleaning.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/client.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/config.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/document_loaders.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/evaluation.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/generation.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/guardrails.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/options.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/providers/__init__.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/providers/anthropic_provider.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/providers/azure_openai_provider.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/providers/base.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/providers/gemini_provider.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/providers/ollama_provider.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/providers/openai_provider.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/py.typed +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/query_rewriting.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/rag_pipeline.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/reranker.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/retrieval.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python/vector_store.py +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python.egg-info/dependency_links.txt +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python.egg-info/entry_points.txt +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/src/rag_python.egg-info/top_level.txt +0 -0
- {rag_python-0.1.0 → rag_python-0.2.0}/tests/test_config.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: rag-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Production-grade RAG for Python: multi-LLM, query rewriting, reranking, guardrails, and evaluation.
|
|
5
5
|
Author-email: Raghav Singla <04raghavsingla28@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -33,6 +33,8 @@ Requires-Dist: requests>=2.31.0
|
|
|
33
33
|
Provides-Extra: rerank
|
|
34
34
|
Requires-Dist: sentence-transformers>=2.2.0; extra == "rerank"
|
|
35
35
|
Requires-Dist: torch>=2.0.0; extra == "rerank"
|
|
36
|
+
Provides-Extra: local
|
|
37
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "local"
|
|
36
38
|
Provides-Extra: anthropic
|
|
37
39
|
Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
|
|
38
40
|
Provides-Extra: gemini
|
|
@@ -43,10 +45,12 @@ Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
|
43
45
|
Requires-Dist: build; extra == "dev"
|
|
44
46
|
Requires-Dist: twine; extra == "dev"
|
|
45
47
|
Provides-Extra: all
|
|
46
|
-
Requires-Dist: rag-python[anthropic,gemini,rerank]; extra == "all"
|
|
48
|
+
Requires-Dist: rag-python[anthropic,gemini,local,rerank]; extra == "all"
|
|
47
49
|
|
|
48
50
|
# rag-python
|
|
49
51
|
|
|
52
|
+
[](https://pypi.org/project/rag-python/)
|
|
53
|
+
[](https://pypi.org/project/rag-python/)
|
|
50
54
|
[](https://www.python.org/downloads/)
|
|
51
55
|
[](LICENSE)
|
|
52
56
|
[](https://github.com/RaghavOG/rag-python)
|
|
@@ -77,7 +81,7 @@ pip install rag-python
|
|
|
77
81
|
# or from source
|
|
78
82
|
pip install -e .
|
|
79
83
|
# with reranking + extra providers
|
|
80
|
-
pip install -e ".[rerank,anthropic,gemini,all]"
|
|
84
|
+
pip install -e ".[rerank,local,anthropic,gemini,all]"
|
|
81
85
|
```
|
|
82
86
|
|
|
83
87
|
---
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# rag-python
|
|
2
2
|
|
|
3
|
+
[](https://pypi.org/project/rag-python/)
|
|
4
|
+
[](https://pypi.org/project/rag-python/)
|
|
3
5
|
[](https://www.python.org/downloads/)
|
|
4
6
|
[](LICENSE)
|
|
5
7
|
[](https://github.com/RaghavOG/rag-python)
|
|
@@ -30,7 +32,7 @@ pip install rag-python
|
|
|
30
32
|
# or from source
|
|
31
33
|
pip install -e .
|
|
32
34
|
# with reranking + extra providers
|
|
33
|
-
pip install -e ".[rerank,anthropic,gemini,all]"
|
|
35
|
+
pip install -e ".[rerank,local,anthropic,gemini,all]"
|
|
34
36
|
```
|
|
35
37
|
|
|
36
38
|
---
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "rag-python"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "Production-grade RAG for Python: multi-LLM, query rewriting, reranking, guardrails, and evaluation."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -38,10 +38,11 @@ dependencies = [
|
|
|
38
38
|
|
|
39
39
|
[project.optional-dependencies]
|
|
40
40
|
rerank = ["sentence-transformers>=2.2.0", "torch>=2.0.0"]
|
|
41
|
+
local = ["sentence-transformers>=2.2.0"]
|
|
41
42
|
anthropic = ["anthropic>=0.20.0"]
|
|
42
43
|
gemini = ["google-genai>=0.3.0"]
|
|
43
44
|
dev = ["pytest>=7.0", "ruff>=0.1.0", "build", "twine"]
|
|
44
|
-
all = ["rag-python[rerank,anthropic,gemini]"]
|
|
45
|
+
all = ["rag-python[rerank,local,anthropic,gemini]"]
|
|
45
46
|
|
|
46
47
|
[project.scripts]
|
|
47
48
|
rag-python = "rag_python.cli:main"
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"""rag-python command-line interface."""
|
|
2
2
|
import argparse
|
|
3
|
+
from dataclasses import replace
|
|
3
4
|
|
|
5
|
+
from . import __version__
|
|
4
6
|
from .client import RAG
|
|
5
7
|
|
|
6
8
|
|
|
@@ -21,9 +23,17 @@ def _build_rag(args: argparse.Namespace) -> RAG:
|
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
def _add_provider_args(parser: argparse.ArgumentParser) -> None:
|
|
24
|
-
parser.add_argument(
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--llm-provider",
|
|
28
|
+
default="openai",
|
|
29
|
+
choices=["openai", "azure_openai", "anthropic", "gemini", "ollama"],
|
|
30
|
+
)
|
|
25
31
|
parser.add_argument("--llm-model", default=None)
|
|
26
|
-
parser.add_argument(
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"--embedding-provider",
|
|
34
|
+
default="openai",
|
|
35
|
+
choices=["openai", "azure_openai", "ollama", "local"],
|
|
36
|
+
)
|
|
27
37
|
parser.add_argument("--embedding-model", default=None)
|
|
28
38
|
parser.add_argument("--ollama-base-url", default=None)
|
|
29
39
|
parser.add_argument("--azure-endpoint", default=None)
|
|
@@ -39,6 +49,7 @@ def main() -> None:
|
|
|
39
49
|
prog="rag-python",
|
|
40
50
|
description="rag-python — modular RAG with query rewriting, reranking, guardrails, and multi-LLM support.",
|
|
41
51
|
)
|
|
52
|
+
parser.add_argument("--version", action="version", version=f"rag-python {__version__}")
|
|
42
53
|
sub = parser.add_subparsers(dest="command", required=True)
|
|
43
54
|
|
|
44
55
|
ing = sub.add_parser("ingest", help="Ingest files/folders into the vector store")
|
|
@@ -63,7 +74,11 @@ def main() -> None:
|
|
|
63
74
|
if args.command == "query":
|
|
64
75
|
rag = _build_rag(args)
|
|
65
76
|
question = " ".join(args.question)
|
|
66
|
-
|
|
77
|
+
search = replace(
|
|
78
|
+
rag.config.search,
|
|
79
|
+
retriever="vector" if args.no_multi_query else "multi_query",
|
|
80
|
+
)
|
|
81
|
+
ans = rag.query(question, search=search)
|
|
67
82
|
print(ans.text)
|
|
68
83
|
if args.verbose:
|
|
69
84
|
print("\n--- evaluation ---")
|
|
@@ -9,10 +9,11 @@ from .azure_openai_provider import AzureOpenAIProvider
|
|
|
9
9
|
from .anthropic_provider import AnthropicProvider
|
|
10
10
|
from .gemini_provider import GeminiProvider
|
|
11
11
|
from .ollama_provider import OllamaProvider
|
|
12
|
+
from .local_provider import LocalEmbeddingProvider
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
LLMProviderName = Literal["openai", "azure_openai", "anthropic", "gemini", "ollama"]
|
|
15
|
-
EmbeddingProviderName = Literal["openai", "azure_openai", "ollama"]
|
|
16
|
+
EmbeddingProviderName = Literal["openai", "azure_openai", "ollama", "local"]
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
def make_llm_provider(name: LLMProviderName, **kwargs) -> LLMProvider:
|
|
@@ -49,5 +50,7 @@ def make_embedding_provider(name: EmbeddingProviderName, **kwargs) -> EmbeddingP
|
|
|
49
50
|
)
|
|
50
51
|
if name == "ollama":
|
|
51
52
|
return OllamaProvider(base_url=kwargs.get("base_url") or os.getenv("OLLAMA_BASE_URL", "http://localhost:11434"))
|
|
53
|
+
if name == "local":
|
|
54
|
+
return LocalEmbeddingProvider(model_name=kwargs.get("model") or os.getenv("LOCAL_EMBEDDING_MODEL"))
|
|
52
55
|
raise ValueError(f"Unknown embedding provider: {name}")
|
|
53
56
|
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Local sentence-transformers embeddings (no API key required)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
_DEFAULT_MODEL = "all-MiniLM-L6-v2"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LocalEmbeddingProvider:
|
|
10
|
+
"""Offline embeddings via sentence-transformers."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, model_name: str | None = None) -> None:
|
|
13
|
+
self.default_model = model_name or os.getenv("LOCAL_EMBEDDING_MODEL", _DEFAULT_MODEL)
|
|
14
|
+
self._models: dict[str, object] = {}
|
|
15
|
+
|
|
16
|
+
def _get_model(self, model_name: str):
|
|
17
|
+
if model_name not in self._models:
|
|
18
|
+
try:
|
|
19
|
+
from sentence_transformers import SentenceTransformer
|
|
20
|
+
except ImportError as e:
|
|
21
|
+
raise ImportError(
|
|
22
|
+
"Local embeddings require optional dependencies. "
|
|
23
|
+
"Install with: pip install rag-python[local]"
|
|
24
|
+
) from e
|
|
25
|
+
self._models[model_name] = SentenceTransformer(model_name)
|
|
26
|
+
return self._models[model_name]
|
|
27
|
+
|
|
28
|
+
def embed(self, texts: list[str], *, model: str | None = None) -> list[list[float]]:
|
|
29
|
+
if not texts:
|
|
30
|
+
return []
|
|
31
|
+
model_name = model or self.default_model
|
|
32
|
+
encoder = self._get_model(model_name)
|
|
33
|
+
vectors = encoder.encode(texts, convert_to_numpy=True)
|
|
34
|
+
return [v.tolist() for v in vectors]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: rag-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Production-grade RAG for Python: multi-LLM, query rewriting, reranking, guardrails, and evaluation.
|
|
5
5
|
Author-email: Raghav Singla <04raghavsingla28@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -33,6 +33,8 @@ Requires-Dist: requests>=2.31.0
|
|
|
33
33
|
Provides-Extra: rerank
|
|
34
34
|
Requires-Dist: sentence-transformers>=2.2.0; extra == "rerank"
|
|
35
35
|
Requires-Dist: torch>=2.0.0; extra == "rerank"
|
|
36
|
+
Provides-Extra: local
|
|
37
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "local"
|
|
36
38
|
Provides-Extra: anthropic
|
|
37
39
|
Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
|
|
38
40
|
Provides-Extra: gemini
|
|
@@ -43,10 +45,12 @@ Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
|
43
45
|
Requires-Dist: build; extra == "dev"
|
|
44
46
|
Requires-Dist: twine; extra == "dev"
|
|
45
47
|
Provides-Extra: all
|
|
46
|
-
Requires-Dist: rag-python[anthropic,gemini,rerank]; extra == "all"
|
|
48
|
+
Requires-Dist: rag-python[anthropic,gemini,local,rerank]; extra == "all"
|
|
47
49
|
|
|
48
50
|
# rag-python
|
|
49
51
|
|
|
52
|
+
[](https://pypi.org/project/rag-python/)
|
|
53
|
+
[](https://pypi.org/project/rag-python/)
|
|
50
54
|
[](https://www.python.org/downloads/)
|
|
51
55
|
[](LICENSE)
|
|
52
56
|
[](https://github.com/RaghavOG/rag-python)
|
|
@@ -77,7 +81,7 @@ pip install rag-python
|
|
|
77
81
|
# or from source
|
|
78
82
|
pip install -e .
|
|
79
83
|
# with reranking + extra providers
|
|
80
|
-
pip install -e ".[rerank,anthropic,gemini,all]"
|
|
84
|
+
pip install -e ".[rerank,local,anthropic,gemini,all]"
|
|
81
85
|
```
|
|
82
86
|
|
|
83
87
|
---
|
|
@@ -30,8 +30,13 @@ src/rag_python/providers/azure_openai_provider.py
|
|
|
30
30
|
src/rag_python/providers/base.py
|
|
31
31
|
src/rag_python/providers/factory.py
|
|
32
32
|
src/rag_python/providers/gemini_provider.py
|
|
33
|
+
src/rag_python/providers/local_provider.py
|
|
33
34
|
src/rag_python/providers/ollama_provider.py
|
|
34
35
|
src/rag_python/providers/openai_provider.py
|
|
36
|
+
tests/test_chunking.py
|
|
35
37
|
tests/test_config.py
|
|
36
38
|
tests/test_import.py
|
|
37
|
-
tests/
|
|
39
|
+
tests/test_loaders.py
|
|
40
|
+
tests/test_package.py
|
|
41
|
+
tests/test_pipeline.py
|
|
42
|
+
tests/test_providers.py
|
|
@@ -9,7 +9,7 @@ python-dotenv>=1.0.0
|
|
|
9
9
|
requests>=2.31.0
|
|
10
10
|
|
|
11
11
|
[all]
|
|
12
|
-
rag-python[anthropic,gemini,rerank]
|
|
12
|
+
rag-python[anthropic,gemini,local,rerank]
|
|
13
13
|
|
|
14
14
|
[anthropic]
|
|
15
15
|
anthropic>=0.20.0
|
|
@@ -23,6 +23,9 @@ twine
|
|
|
23
23
|
[gemini]
|
|
24
24
|
google-genai>=0.3.0
|
|
25
25
|
|
|
26
|
+
[local]
|
|
27
|
+
sentence-transformers>=2.2.0
|
|
28
|
+
|
|
26
29
|
[rerank]
|
|
27
30
|
sentence-transformers>=2.2.0
|
|
28
31
|
torch>=2.0.0
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from rag_python.chunking import chunk_recursive, chunk_structure_aware, chunk_text
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_chunk_recursive_splits_long_text():
|
|
5
|
+
text = "word " * 500
|
|
6
|
+
chunks = chunk_recursive(text, chunk_size=64, overlap=8, metadata={"source": "t.txt"})
|
|
7
|
+
assert len(chunks) > 1
|
|
8
|
+
assert all(c.metadata["chunk_strategy"] == "recursive" for c in chunks)
|
|
9
|
+
assert all(c.metadata["source"] == "t.txt" for c in chunks)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_chunk_structure_aware_respects_headings():
|
|
13
|
+
text = "# Policy\n\nAnnual leave is 20 days.\n\n# Benefits\n\nHealth insurance is provided."
|
|
14
|
+
chunks = chunk_structure_aware(text, chunk_size=512, overlap=0)
|
|
15
|
+
assert len(chunks) >= 2
|
|
16
|
+
sections = {c.metadata.get("section") for c in chunks}
|
|
17
|
+
assert "Policy" in sections
|
|
18
|
+
assert "Benefits" in sections
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_chunk_text_unified_entry():
|
|
22
|
+
text = "Simple paragraph for testing."
|
|
23
|
+
chunks = chunk_text(text, strategy="recursive", metadata={"source": "x"})
|
|
24
|
+
assert len(chunks) >= 1
|
|
25
|
+
assert chunks[0].text
|
|
@@ -2,7 +2,7 @@ def test_import_rag_python():
|
|
|
2
2
|
import rag_python
|
|
3
3
|
from rag_python import RAG, RAGAnswer, ingest, query
|
|
4
4
|
|
|
5
|
-
assert rag_python.__version__ == "0.
|
|
5
|
+
assert rag_python.__version__ == "0.2.0"
|
|
6
6
|
assert RAG is not None
|
|
7
7
|
assert RAGAnswer is not None
|
|
8
8
|
assert callable(ingest)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from rag_python.document_loaders import load_file, load_directory
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_load_txt_file(tmp_path: Path):
|
|
7
|
+
f = tmp_path / "note.txt"
|
|
8
|
+
f.write_text("Hello from rag-python.", encoding="utf-8")
|
|
9
|
+
doc = load_file(f)
|
|
10
|
+
assert doc is not None
|
|
11
|
+
assert "Hello" in doc.content
|
|
12
|
+
assert doc.metadata["filename"] == "note.txt"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_load_markdown_file(tmp_path: Path):
|
|
16
|
+
f = tmp_path / "readme.md"
|
|
17
|
+
f.write_text("# Title\n\nBody text.", encoding="utf-8")
|
|
18
|
+
doc = load_file(f)
|
|
19
|
+
assert doc is not None
|
|
20
|
+
assert "Title" in doc.content
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_load_directory_skips_empty_files(tmp_path: Path):
|
|
24
|
+
(tmp_path / "a.txt").write_text("content a", encoding="utf-8")
|
|
25
|
+
(tmp_path / "empty.txt").write_text(" ", encoding="utf-8")
|
|
26
|
+
docs = list(load_directory(tmp_path))
|
|
27
|
+
assert len(docs) == 1
|
|
28
|
+
assert docs[0].metadata["filename"] == "a.txt"
|
|
@@ -4,7 +4,7 @@ import importlib.metadata
|
|
|
4
4
|
def test_package_metadata():
|
|
5
5
|
dist = importlib.metadata.metadata("rag-python")
|
|
6
6
|
assert dist["Name"] == "rag-python"
|
|
7
|
-
assert dist["Version"] == "0.
|
|
7
|
+
assert dist["Version"] == "0.2.0"
|
|
8
8
|
author = dist.get("Author") or dist.get("Author-email") or ""
|
|
9
9
|
assert "Raghav Singla" in author or "RaghavOG" in author
|
|
10
10
|
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from unittest.mock import MagicMock, patch
|
|
2
|
+
|
|
3
|
+
from rag_python.rag_pipeline import ingest, query
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_ingest_with_mocked_embedder(tmp_path):
|
|
7
|
+
f = tmp_path / "doc.txt"
|
|
8
|
+
f.write_text("Employees receive twenty days of annual leave per year.", encoding="utf-8")
|
|
9
|
+
|
|
10
|
+
embedder = MagicMock()
|
|
11
|
+
embedder.embed.return_value = [[0.1, 0.2, 0.3]]
|
|
12
|
+
|
|
13
|
+
with patch("rag_python.rag_pipeline.ingest_chunks") as mock_ingest:
|
|
14
|
+
n = ingest(
|
|
15
|
+
data_path=f,
|
|
16
|
+
clean=False,
|
|
17
|
+
chunk_strategy="recursive",
|
|
18
|
+
chunk_size=128,
|
|
19
|
+
chunk_overlap=0,
|
|
20
|
+
reindex=False,
|
|
21
|
+
embedder=embedder,
|
|
22
|
+
)
|
|
23
|
+
assert n >= 1
|
|
24
|
+
mock_ingest.assert_called_once()
|
|
25
|
+
assert mock_ingest.call_args.kwargs["embedder"] is embedder
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_query_with_mocked_providers():
|
|
29
|
+
llm = MagicMock()
|
|
30
|
+
llm.generate.return_value = "Twenty days of annual leave."
|
|
31
|
+
|
|
32
|
+
embedder = MagicMock()
|
|
33
|
+
embedder.embed.return_value = [[0.5, 0.5, 0.5]]
|
|
34
|
+
|
|
35
|
+
with (
|
|
36
|
+
patch("rag_python.rag_pipeline.check_prompt_injection", return_value=(True, "")),
|
|
37
|
+
patch("rag_python.rag_pipeline.rag_retrieve") as mock_retrieve,
|
|
38
|
+
patch("rag_python.rag_pipeline.check_hallucination", return_value=(True, "")),
|
|
39
|
+
patch("rag_python.rag_pipeline.evaluate_rag", return_value={"faithfulness": 0.9, "relevance": 0.9}),
|
|
40
|
+
patch("rag_python.rag_pipeline.should_retry", return_value=False),
|
|
41
|
+
):
|
|
42
|
+
mock_retrieve.return_value = [
|
|
43
|
+
("Employees receive twenty days of annual leave.", {"source": "doc.txt"}, 0.95),
|
|
44
|
+
]
|
|
45
|
+
resp = query(
|
|
46
|
+
"How many days of annual leave?",
|
|
47
|
+
use_guardrails=True,
|
|
48
|
+
use_retry=False,
|
|
49
|
+
llm=llm,
|
|
50
|
+
embedder=embedder,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
assert "twenty" in resp.answer.lower() or "Twenty" in resp.answer
|
|
54
|
+
assert resp.sources
|
|
55
|
+
mock_retrieve.assert_called_once()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|