kssrag 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kssrag/cli.py +59 -13
- kssrag/config.py +15 -1
- kssrag/core/agents.py +61 -10
- kssrag/core/chunkers.py +95 -1
- kssrag/core/vectorstores.py +103 -2
- kssrag/models/openrouter.py +77 -15
- kssrag/server.py +49 -0
- kssrag/utils/document_loaders.py +80 -2
- kssrag/utils/helpers.py +74 -31
- kssrag/utils/ocr.py +48 -0
- kssrag/utils/ocr_loader.py +151 -0
- kssrag-0.2.0.dist-info/METADATA +840 -0
- kssrag-0.2.0.dist-info/RECORD +33 -0
- tests/test_bm25s.py +74 -0
- tests/test_config.py +42 -0
- tests/test_image_chunker.py +17 -0
- tests/test_integration.py +35 -0
- tests/test_ocr.py +142 -0
- tests/test_streaming.py +41 -0
- kssrag-0.1.1.dist-info/METADATA +0 -407
- kssrag-0.1.1.dist-info/RECORD +0 -25
- {kssrag-0.1.1.dist-info → kssrag-0.2.0.dist-info}/WHEEL +0 -0
- {kssrag-0.1.1.dist-info → kssrag-0.2.0.dist-info}/entry_points.txt +0 -0
- {kssrag-0.1.1.dist-info → kssrag-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
kssrag/__init__.py,sha256=N1XfR8IRKtEJAzcOVyHnKXtgx-ztlrSFtFwiVkGbAX8,2041
|
|
2
|
+
kssrag/cli.py,sha256=9AbtUEV9X63bhRj4EU-eHhud8iPM7LJAGSbu_IPlMUE,9703
|
|
3
|
+
kssrag/config.py,sha256=zd978GZQ66TlLZnk9yP7uvoXoWD89BS8VHi7w_yGXrM,6529
|
|
4
|
+
kssrag/kssrag.py,sha256=vy3oCHeHFAp_dJW0JjLbTxeEwCcwtXuOL_Ejmv0qz8Y,5251
|
|
5
|
+
kssrag/server.py,sha256=CbnC0WhIKIi6iJ3q448swEdLDcvmUf80lsdlSKp0GpM,5942
|
|
6
|
+
kssrag/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
kssrag/core/agents.py,sha256=5zRSudh_4tbp4lDfAhaczeOe-INCpgfm6OJEhE6Ut4I,5421
|
|
8
|
+
kssrag/core/chunkers.py,sha256=HmWL3y2DhhobV5zIlIdZP2KK2N7TASqeirPqmc3_inI,7324
|
|
9
|
+
kssrag/core/retrievers.py,sha256=1e9c7ukUD4pFSVasOMTXSKoz_rapXQTl-FrSHK6Osqg,3037
|
|
10
|
+
kssrag/core/vectorstores.py,sha256=H8hTpjc6hAFMhqAO2Cjq-Jp6xrJhsJKiRN9qxb_-6XM,21003
|
|
11
|
+
kssrag/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
kssrag/models/local_llms.py,sha256=IsthEwiNG1QcvHrTpQWdd1kZuHa4-0bfGTxHe8F3i2M,1178
|
|
13
|
+
kssrag/models/openrouter.py,sha256=tplACtQ5J9YTemk0616dhg6H81_eAdsfeLs3AEytKE0,6429
|
|
14
|
+
kssrag/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
+
kssrag/utils/document_loaders.py,sha256=grXtU4sy8j23zJwanadO8rFXlsRJ2e2JF6MvoczsmqQ,4192
|
|
16
|
+
kssrag/utils/helpers.py,sha256=MoTZRgTTiHDnbELHLrDKOBoxxiwFyLKJXUnZeWOtHFg,3806
|
|
17
|
+
kssrag/utils/ocr.py,sha256=T2yZM-Z8B-1Y7K7CKxM5BrvNFPB5Cx0vjlk-XZnc3p8,1425
|
|
18
|
+
kssrag/utils/ocr_loader.py,sha256=0RvY56aSNulo4U1eHwSzOUBZUIzc1nBLt7395OYdkXM,6930
|
|
19
|
+
kssrag/utils/preprocessors.py,sha256=_kbeZOWnbqbKKSBiyRP8QZAKx9uYMXgHfARcWBqC3JU,938
|
|
20
|
+
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
+
tests/test_basic.py,sha256=JdBBRpP9wOo4BvvZTisidP40gGyK_azUoewJpoJaa5M,1275
|
|
22
|
+
tests/test_bm25s.py,sha256=tfvhWGxippGmNiLujc2OiaFewvvkJoOwrGXzBGZQMtU,2749
|
|
23
|
+
tests/test_config.py,sha256=zIawdV9xb-EuDl1BXKKOvgZY-uUc5Q0KeyJHBP85eIE,1398
|
|
24
|
+
tests/test_image_chunker.py,sha256=7cY3HucIFdNzcOVI2WA0nY5QmGcsv5umfE4c_yNnLfw,741
|
|
25
|
+
tests/test_integration.py,sha256=TY7MrTcAiu1KG4MlgIC7VVlzUTnOoqp9pieK8rhBNrg,1059
|
|
26
|
+
tests/test_ocr.py,sha256=PoGKLNISpAwaoPvGuS7qiOf6dsVnsFRFtYkG1WFi6TU,6202
|
|
27
|
+
tests/test_streaming.py,sha256=rMQ0w8_HQFFV0PbHDqQXRBqaNfbd3WqJVNT2hKVbsqw,1442
|
|
28
|
+
tests/test_vectorstores.py,sha256=YOwI2bfqprzbq8ahIw4pbbbEOaKGcg-XPcLCO7WiLxE,1474
|
|
29
|
+
kssrag-0.2.0.dist-info/METADATA,sha256=MK2r6XV1oT76WuQJ01vdAbJxER9ZkCDalAITwMo3tNg,24016
|
|
30
|
+
kssrag-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
31
|
+
kssrag-0.2.0.dist-info/entry_points.txt,sha256=g4tQj5YUqPK3Osb9BI85tsErxleSBUENiqlnX0fWK5M,43
|
|
32
|
+
kssrag-0.2.0.dist-info/top_level.txt,sha256=sO9LGINa0GEjLoHTtufpz01yM5SmeTw6M4zWHEF0R2s,13
|
|
33
|
+
kssrag-0.2.0.dist-info/RECORD,,
|
tests/test_bm25s.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import numpy as np
|
|
3
|
+
import tempfile
|
|
4
|
+
import os
|
|
5
|
+
from kssrag.core.vectorstores import BM25SVectorStore
|
|
6
|
+
|
|
7
|
+
def test_bm25s_vector_store_basic():
|
|
8
|
+
"""Test BM25S vector store basic functionality"""
|
|
9
|
+
documents = [
|
|
10
|
+
{"content": "This is a test document about Python programming.", "metadata": {"source": "test1"}},
|
|
11
|
+
{"content": "Another document about machine learning and AI.", "metadata": {"source": "test2"}},
|
|
12
|
+
{"content": "A third document on web development with JavaScript.", "metadata": {"source": "test3"}},
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
vector_store = BM25SVectorStore()
|
|
16
|
+
vector_store.add_documents(documents)
|
|
17
|
+
|
|
18
|
+
results = vector_store.retrieve("Python programming", top_k=2)
|
|
19
|
+
|
|
20
|
+
assert len(results) == 2
|
|
21
|
+
assert "Python" in results[0]["content"]
|
|
22
|
+
assert all("metadata" in result for result in results)
|
|
23
|
+
|
|
24
|
+
def test_bm25s_persistence():
|
|
25
|
+
"""Test BM25S vector store persistence"""
|
|
26
|
+
documents = [
|
|
27
|
+
{"content": "Test document for persistence.", "metadata": {"source": "test1"}},
|
|
28
|
+
{"content": "Another test document.", "metadata": {"source": "test2"}},
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
with tempfile.NamedTemporaryFile(suffix='.pkl', delete=False) as f:
|
|
32
|
+
temp_file = f.name
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
# Create and persist
|
|
36
|
+
vector_store = BM25SVectorStore(persist_path=temp_file)
|
|
37
|
+
vector_store.add_documents(documents)
|
|
38
|
+
vector_store.persist()
|
|
39
|
+
|
|
40
|
+
# Load and verify
|
|
41
|
+
new_vector_store = BM25SVectorStore(persist_path=temp_file)
|
|
42
|
+
new_vector_store.load()
|
|
43
|
+
|
|
44
|
+
results = new_vector_store.retrieve("persistence", top_k=1)
|
|
45
|
+
assert len(results) == 1
|
|
46
|
+
assert "persistence" in results[0]["content"]
|
|
47
|
+
|
|
48
|
+
finally:
|
|
49
|
+
if os.path.exists(temp_file):
|
|
50
|
+
os.unlink(temp_file)
|
|
51
|
+
|
|
52
|
+
def test_bm25s_empty_query():
|
|
53
|
+
"""Test BM25S with empty query"""
|
|
54
|
+
documents = [
|
|
55
|
+
{"content": "Test document.", "metadata": {"source": "test1"}},
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
vector_store = BM25SVectorStore()
|
|
59
|
+
vector_store.add_documents(documents)
|
|
60
|
+
|
|
61
|
+
results = vector_store.retrieve("", top_k=1)
|
|
62
|
+
# BM25S may return documents even with empty query, but they should have low scores
|
|
63
|
+
# Let's check that the behavior is consistent
|
|
64
|
+
if len(results) > 0:
|
|
65
|
+
# If it returns results, they should be the documents we added
|
|
66
|
+
assert results[0]["content"] == "Test document."
|
|
67
|
+
# Either behavior is acceptable for this test
|
|
68
|
+
|
|
69
|
+
def test_bm25s_no_documents():
|
|
70
|
+
"""Test BM25S with no documents added"""
|
|
71
|
+
vector_store = BM25SVectorStore()
|
|
72
|
+
|
|
73
|
+
with pytest.raises(ValueError, match="BM25S index not initialized"):
|
|
74
|
+
vector_store.retrieve("test query")
|
tests/test_config.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import os
|
|
3
|
+
from kssrag.config import Config, VectorStoreType, ChunkerType
|
|
4
|
+
|
|
5
|
+
def test_config_new_options():
|
|
6
|
+
"""Test new configuration options"""
|
|
7
|
+
config = Config(
|
|
8
|
+
OCR_DEFAULT_MODE="handwritten",
|
|
9
|
+
ENABLE_STREAMING=True,
|
|
10
|
+
VECTOR_STORE_TYPE=VectorStoreType.BM25S
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
assert config.OCR_DEFAULT_MODE == "handwritten"
|
|
14
|
+
assert config.ENABLE_STREAMING == True
|
|
15
|
+
assert config.VECTOR_STORE_TYPE == VectorStoreType.BM25S
|
|
16
|
+
|
|
17
|
+
def test_config_vector_store_types():
|
|
18
|
+
"""Test all vector store types including BM25S"""
|
|
19
|
+
config = Config(VECTOR_STORE_TYPE=VectorStoreType.BM25S)
|
|
20
|
+
assert config.VECTOR_STORE_TYPE == "bm25s"
|
|
21
|
+
|
|
22
|
+
config = Config(VECTOR_STORE_TYPE=VectorStoreType.BM25)
|
|
23
|
+
assert config.VECTOR_STORE_TYPE == "bm25"
|
|
24
|
+
|
|
25
|
+
def test_config_chunker_types():
|
|
26
|
+
"""Test all chunker types including image"""
|
|
27
|
+
config = Config(CHUNKER_TYPE=ChunkerType.IMAGE)
|
|
28
|
+
assert config.CHUNKER_TYPE == "image"
|
|
29
|
+
|
|
30
|
+
def test_config_environment_variables():
|
|
31
|
+
"""Test new environment variables"""
|
|
32
|
+
os.environ["OCR_DEFAULT_MODE"] = "handwritten"
|
|
33
|
+
os.environ["ENABLE_STREAMING"] = "true"
|
|
34
|
+
|
|
35
|
+
config = Config()
|
|
36
|
+
|
|
37
|
+
assert config.OCR_DEFAULT_MODE == "handwritten"
|
|
38
|
+
assert config.ENABLE_STREAMING == True
|
|
39
|
+
|
|
40
|
+
# Cleanup
|
|
41
|
+
del os.environ["OCR_DEFAULT_MODE"]
|
|
42
|
+
del os.environ["ENABLE_STREAMING"]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from kssrag.core.chunkers import ImageChunker, OCR_AVAILABLE
|
|
3
|
+
|
|
4
|
+
@pytest.mark.skipif(not OCR_AVAILABLE, reason="OCR dependencies not available")
|
|
5
|
+
def test_image_chunker_basic():
|
|
6
|
+
"""Basic test for ImageChunker - just check it initializes"""
|
|
7
|
+
chunker = ImageChunker(ocr_mode="typed")
|
|
8
|
+
assert chunker.ocr_mode == "typed"
|
|
9
|
+
|
|
10
|
+
@pytest.mark.skipif(not OCR_AVAILABLE, reason="OCR dependencies not available")
|
|
11
|
+
def test_image_chunker_modes():
|
|
12
|
+
"""Test that ImageChunker accepts valid modes"""
|
|
13
|
+
chunker_typed = ImageChunker(ocr_mode="typed")
|
|
14
|
+
chunker_handwritten = ImageChunker(ocr_mode="handwritten")
|
|
15
|
+
|
|
16
|
+
assert chunker_typed.ocr_mode == "typed"
|
|
17
|
+
assert chunker_handwritten.ocr_mode == "handwritten"
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import tempfile
|
|
3
|
+
import os
|
|
4
|
+
from kssrag import KSSRAG, Config
|
|
5
|
+
|
|
6
|
+
def test_bm25s_integration():
|
|
7
|
+
"""Test BM25S integration with KSSRAG"""
|
|
8
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
|
|
9
|
+
f.write("Test document about Python programming and machine learning.")
|
|
10
|
+
temp_file = f.name
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
config = Config(
|
|
14
|
+
VECTOR_STORE_TYPE="bm25s",
|
|
15
|
+
MAX_DOCS_FOR_TESTING=1
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
rag = KSSRAG(config=config)
|
|
19
|
+
rag.load_document(temp_file, format="text")
|
|
20
|
+
|
|
21
|
+
response = rag.query("Python programming")
|
|
22
|
+
|
|
23
|
+
assert isinstance(response, str)
|
|
24
|
+
assert len(response) > 0
|
|
25
|
+
|
|
26
|
+
finally:
|
|
27
|
+
os.unlink(temp_file)
|
|
28
|
+
|
|
29
|
+
def test_streaming_integration():
|
|
30
|
+
"""Test streaming integration (mock test)"""
|
|
31
|
+
config = Config(ENABLE_STREAMING=True)
|
|
32
|
+
|
|
33
|
+
# This is a basic test that config is accepted
|
|
34
|
+
# Actual streaming would require API calls
|
|
35
|
+
assert config.ENABLE_STREAMING == True
|
tests/test_ocr.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import tempfile
|
|
3
|
+
import os
|
|
4
|
+
from unittest.mock import Mock, patch, MagicMock
|
|
5
|
+
from kssrag.utils.ocr_loader import OCRLoader
|
|
6
|
+
|
|
7
|
+
def test_ocr_loader_initialization():
|
|
8
|
+
"""Test OCRLoader initialization with mocked PaddleOCR"""
|
|
9
|
+
with patch('kssrag.utils.ocr_loader.PaddleOCR') as mock_paddle:
|
|
10
|
+
mock_instance = Mock()
|
|
11
|
+
mock_paddle.return_value = mock_instance
|
|
12
|
+
|
|
13
|
+
loader = OCRLoader()
|
|
14
|
+
assert loader.paddle_ocr == mock_instance
|
|
15
|
+
|
|
16
|
+
def test_ocr_loader_invalid_mode():
|
|
17
|
+
"""Test OCRLoader with invalid mode"""
|
|
18
|
+
with patch('kssrag.utils.ocr_loader.PaddleOCR') as mock_paddle:
|
|
19
|
+
mock_instance = Mock()
|
|
20
|
+
mock_paddle.return_value = mock_instance
|
|
21
|
+
|
|
22
|
+
loader = OCRLoader()
|
|
23
|
+
|
|
24
|
+
with pytest.raises(ValueError, match="Invalid OCR mode"):
|
|
25
|
+
loader.extract_text("test.jpg", "invalid_mode")
|
|
26
|
+
|
|
27
|
+
def test_ocr_loader_file_not_found():
|
|
28
|
+
"""Test OCRLoader with non-existent file"""
|
|
29
|
+
with patch('kssrag.utils.ocr_loader.PaddleOCR') as mock_paddle:
|
|
30
|
+
mock_instance = Mock()
|
|
31
|
+
mock_paddle.return_value = mock_instance
|
|
32
|
+
|
|
33
|
+
loader = OCRLoader()
|
|
34
|
+
|
|
35
|
+
with pytest.raises(FileNotFoundError):
|
|
36
|
+
loader.extract_text("nonexistent.jpg", "typed")
|
|
37
|
+
|
|
38
|
+
def test_ocr_loader_typed_mode():
|
|
39
|
+
"""Test OCRLoader typed mode"""
|
|
40
|
+
with patch('kssrag.utils.ocr_loader.PaddleOCR') as mock_paddle:
|
|
41
|
+
with patch('kssrag.utils.ocr_loader.pytesseract') as mock_tesseract:
|
|
42
|
+
with patch('kssrag.utils.ocr_loader.Image') as mock_image:
|
|
43
|
+
with patch('kssrag.utils.ocr_loader.os.path.exists') as mock_exists:
|
|
44
|
+
# Mock file exists
|
|
45
|
+
mock_exists.return_value = True
|
|
46
|
+
|
|
47
|
+
# Mock image opening
|
|
48
|
+
mock_img_instance = MagicMock()
|
|
49
|
+
mock_image.open.return_value = mock_img_instance
|
|
50
|
+
|
|
51
|
+
# Mock OCR result
|
|
52
|
+
mock_tesseract.image_to_string.return_value = "Typed text content"
|
|
53
|
+
|
|
54
|
+
mock_paddle_instance = Mock()
|
|
55
|
+
mock_paddle.return_value = mock_paddle_instance
|
|
56
|
+
|
|
57
|
+
loader = OCRLoader()
|
|
58
|
+
|
|
59
|
+
result = loader.extract_text("test.jpg", "typed")
|
|
60
|
+
assert result == "Typed text content"
|
|
61
|
+
mock_tesseract.image_to_string.assert_called_once_with(mock_img_instance)
|
|
62
|
+
|
|
63
|
+
def test_ocr_loader_handwritten_mode():
|
|
64
|
+
"""Test OCRLoader handwritten mode"""
|
|
65
|
+
with patch('kssrag.utils.ocr_loader.PaddleOCR') as mock_paddle:
|
|
66
|
+
with patch('kssrag.utils.ocr_loader.cv2') as mock_cv2:
|
|
67
|
+
with patch('kssrag.utils.ocr_loader.os.path.exists') as mock_exists:
|
|
68
|
+
# Mock file exists
|
|
69
|
+
mock_exists.return_value = True
|
|
70
|
+
|
|
71
|
+
# Mock image reading
|
|
72
|
+
mock_cv2.imread.return_value = "mock_image"
|
|
73
|
+
|
|
74
|
+
# Mock OCR result
|
|
75
|
+
mock_paddle_instance = Mock()
|
|
76
|
+
mock_paddle_instance.ocr.return_value = [[[None, ["Handwritten text", 0.9]]]]
|
|
77
|
+
mock_paddle.return_value = mock_paddle_instance
|
|
78
|
+
|
|
79
|
+
loader = OCRLoader()
|
|
80
|
+
|
|
81
|
+
result = loader.extract_text("test.jpg", "handwritten")
|
|
82
|
+
assert result == "Handwritten text"
|
|
83
|
+
mock_paddle_instance.ocr.assert_called_once_with("mock_image", cls=True)
|
|
84
|
+
|
|
85
|
+
def test_ocr_loader_paddle_not_initialized():
|
|
86
|
+
"""Test OCRLoader when PaddleOCR is not initialized"""
|
|
87
|
+
with patch('kssrag.utils.ocr_loader.PaddleOCR') as mock_paddle:
|
|
88
|
+
mock_paddle.return_value = None # Simulate initialization failure
|
|
89
|
+
|
|
90
|
+
loader = OCRLoader()
|
|
91
|
+
loader.paddle_ocr = None # Force the failure state
|
|
92
|
+
|
|
93
|
+
with pytest.raises(RuntimeError, match="PaddleOCR not initialized"):
|
|
94
|
+
loader.extract_text("test.jpg", "handwritten")
|
|
95
|
+
|
|
96
|
+
def test_ocr_loader_empty_text():
|
|
97
|
+
"""Test OCRLoader when no text is extracted"""
|
|
98
|
+
with patch('kssrag.utils.ocr_loader.PaddleOCR') as mock_paddle:
|
|
99
|
+
with patch('kssrag.utils.ocr_loader.pytesseract') as mock_tesseract:
|
|
100
|
+
with patch('kssrag.utils.ocr_loader.Image') as mock_image:
|
|
101
|
+
with patch('kssrag.utils.ocr_loader.os.path.exists') as mock_exists:
|
|
102
|
+
# Mock file exists
|
|
103
|
+
mock_exists.return_value = True
|
|
104
|
+
|
|
105
|
+
# Mock image opening
|
|
106
|
+
mock_img_instance = MagicMock()
|
|
107
|
+
mock_image.open.return_value = mock_img_instance
|
|
108
|
+
|
|
109
|
+
# Mock empty OCR result
|
|
110
|
+
mock_tesseract.image_to_string.return_value = " " # Only whitespace
|
|
111
|
+
|
|
112
|
+
mock_paddle_instance = Mock()
|
|
113
|
+
mock_paddle.return_value = mock_paddle_instance
|
|
114
|
+
|
|
115
|
+
loader = OCRLoader()
|
|
116
|
+
|
|
117
|
+
result = loader.extract_text("test.jpg", "typed")
|
|
118
|
+
assert result == "" # Should return empty string
|
|
119
|
+
|
|
120
|
+
@pytest.mark.skipif(not os.getenv('TEST_OCR'), reason="OCR tests require actual OCR dependencies")
|
|
121
|
+
def test_ocr_loader_integration():
|
|
122
|
+
"""Integration test for OCRLoader with actual image"""
|
|
123
|
+
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
|
|
124
|
+
temp_file = f.name
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
# Create a simple test image with text
|
|
128
|
+
from PIL import Image, ImageDraw
|
|
129
|
+
img = Image.new('RGB', (400, 100), color='white')
|
|
130
|
+
draw = ImageDraw.Draw(img)
|
|
131
|
+
draw.text((50, 40), "OCR Test Text", fill='black')
|
|
132
|
+
img.save(temp_file)
|
|
133
|
+
|
|
134
|
+
loader = OCRLoader()
|
|
135
|
+
text = loader.extract_text(temp_file, "typed")
|
|
136
|
+
|
|
137
|
+
assert isinstance(text, str)
|
|
138
|
+
# Note: Actual OCR text recognition might vary
|
|
139
|
+
|
|
140
|
+
finally:
|
|
141
|
+
if os.path.exists(temp_file):
|
|
142
|
+
os.unlink(temp_file)
|
tests/test_streaming.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import asyncio
|
|
3
|
+
from kssrag.models.openrouter import OpenRouterLLM
|
|
4
|
+
|
|
5
|
+
def test_openrouter_streaming_initialization():
|
|
6
|
+
"""Test OpenRouterLLM streaming initialization"""
|
|
7
|
+
llm = OpenRouterLLM(stream=True)
|
|
8
|
+
assert llm.stream == True
|
|
9
|
+
|
|
10
|
+
def test_openrouter_non_streaming_initialization():
|
|
11
|
+
"""Test OpenRouterLLM non-streaming initialization"""
|
|
12
|
+
llm = OpenRouterLLM(stream=False)
|
|
13
|
+
assert llm.stream == False
|
|
14
|
+
|
|
15
|
+
def test_streaming_generator():
|
|
16
|
+
"""Test streaming generator interface"""
|
|
17
|
+
# Mock the predict_stream method for testing
|
|
18
|
+
class TestOpenRouterLLM(OpenRouterLLM):
|
|
19
|
+
def predict_stream(self, messages):
|
|
20
|
+
yield "Hello "
|
|
21
|
+
yield "World"
|
|
22
|
+
yield "!"
|
|
23
|
+
|
|
24
|
+
llm = TestOpenRouterLLM(stream=True)
|
|
25
|
+
messages = [{"role": "user", "content": "test"}]
|
|
26
|
+
|
|
27
|
+
chunks = list(llm.predict_stream(messages))
|
|
28
|
+
assert chunks == ["Hello ", "World", "!"]
|
|
29
|
+
|
|
30
|
+
def test_streaming_fallback_to_non_streaming():
|
|
31
|
+
"""Test that streaming falls back to non-streaming when no chunks"""
|
|
32
|
+
class TestOpenRouterLLM(OpenRouterLLM):
|
|
33
|
+
def predict_stream(self, messages):
|
|
34
|
+
# Simulate no chunks returned
|
|
35
|
+
if False:
|
|
36
|
+
yield "test"
|
|
37
|
+
|
|
38
|
+
llm = TestOpenRouterLLM(stream=True)
|
|
39
|
+
# This should not raise an error
|
|
40
|
+
result = llm.predict([{"role": "user", "content": "test"}])
|
|
41
|
+
assert isinstance(result, str)
|