piragi 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- piragi-0.1.0.dist-info/METADATA +149 -0
- piragi-0.1.0.dist-info/RECORD +14 -0
- piragi-0.1.0.dist-info/WHEEL +4 -0
- piragi-0.1.0.dist-info/licenses/LICENSE +21 -0
- ragi/__init__.py +28 -0
- ragi/async_updater.py +345 -0
- ragi/change_detection.py +211 -0
- ragi/chunking.py +150 -0
- ragi/core.py +318 -0
- ragi/embeddings.py +150 -0
- ragi/loader.py +131 -0
- ragi/retrieval.py +125 -0
- ragi/store.py +177 -0
- ragi/types.py +54 -0
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: piragi
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: The best RAG interface yet - Built-in vector store, embeddings, citations, and auto-updates
|
|
5
|
+
Project-URL: Homepage, https://github.com/hemanth/ragi
|
|
6
|
+
Project-URL: Documentation, https://github.com/hemanth/ragi#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/hemanth/ragi
|
|
8
|
+
Project-URL: Issues, https://github.com/hemanth/ragi/issues
|
|
9
|
+
Author-email: Hemanth HM <hemanth.hm@gmail.com>
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: ai,embeddings,llm,rag,retrieval,vector-search
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Requires-Dist: lancedb>=0.3.0
|
|
23
|
+
Requires-Dist: markitdown[all]>=0.0.1
|
|
24
|
+
Requires-Dist: openai>=1.0.0
|
|
25
|
+
Requires-Dist: pydantic>=2.0.0
|
|
26
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
27
|
+
Requires-Dist: sentence-transformers>=2.2.0
|
|
28
|
+
Requires-Dist: torch>=2.0.0
|
|
29
|
+
Requires-Dist: transformers>=4.51.0
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
|
|
34
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
36
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# Ragi
|
|
40
|
+
|
|
41
|
+
**The best RAG interface yet.**
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from ragi import Ragi
|
|
45
|
+
|
|
46
|
+
kb = Ragi(["./docs", "./code/**/*.py", "https://api.example.com/docs"])
|
|
47
|
+
answer = kb.ask("How do I deploy this?")
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
That's it. Built-in vector store, embeddings, citations, and auto-updates. Free & local by default.
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## Installation
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install piragi
|
|
58
|
+
|
|
59
|
+
# Optional: Install Ollama for local LLM
|
|
60
|
+
curl -fsSL https://ollama.com/install.sh | sh
|
|
61
|
+
ollama pull llama3.2
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Features
|
|
67
|
+
|
|
68
|
+
- **Simple Setup** - Works with free local models out of the box
|
|
69
|
+
- **All Formats** - PDF, Word, Excel, Markdown, Code, URLs, Images, Audio
|
|
70
|
+
- **Auto-Updates** - Background refresh, queries never blocked
|
|
71
|
+
- **Smart Citations** - Every answer includes sources
|
|
72
|
+
- **OpenAI Compatible** - Drop-in support for any OpenAI-compatible API
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## Examples
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
# Basic
|
|
80
|
+
kb = Ragi("./docs")
|
|
81
|
+
answer = kb("What is this?")
|
|
82
|
+
|
|
83
|
+
# Multiple sources
|
|
84
|
+
kb = Ragi(["./docs/*.pdf", "https://api.docs.com", "./code/**/*.py"])
|
|
85
|
+
|
|
86
|
+
# OpenAI
|
|
87
|
+
kb = Ragi("./docs", config={
|
|
88
|
+
"llm": {"model": "gpt-4o-mini", "api_key": "sk-..."},
|
|
89
|
+
"embedding": {"model": "text-embedding-3-small", "api_key": "sk-..."}
|
|
90
|
+
})
|
|
91
|
+
|
|
92
|
+
# Filter
|
|
93
|
+
answer = kb.filter(file_type="pdf").ask("What's in the PDFs?")
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Configuration
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
# Defaults (all optional)
|
|
102
|
+
config = {
|
|
103
|
+
"llm": {
|
|
104
|
+
"model": "llama3.2",
|
|
105
|
+
"base_url": "http://localhost:11434/v1"
|
|
106
|
+
},
|
|
107
|
+
"embedding": {
|
|
108
|
+
"model": "nvidia/llama-embed-nemotron-8b"
|
|
109
|
+
},
|
|
110
|
+
"auto_update": {
|
|
111
|
+
"enabled": True,
|
|
112
|
+
"interval": 300 # seconds
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Auto-Updates
|
|
120
|
+
|
|
121
|
+
Changes detected and refreshed automatically in background. Zero query latency.
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
kb = Ragi(["./docs", "https://api.docs.com"])
|
|
125
|
+
# That's it - auto-updates enabled by default
|
|
126
|
+
|
|
127
|
+
# Disable if needed
|
|
128
|
+
kb = Ragi("./docs", config={"auto_update": {"enabled": False}})
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## API
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
kb = Ragi(sources, persist_dir=".ragi", config=None)
|
|
137
|
+
kb.add("./more-docs")
|
|
138
|
+
kb.ask(query, top_k=5)
|
|
139
|
+
kb(query) # Shorthand
|
|
140
|
+
kb.filter(**metadata).ask(query)
|
|
141
|
+
kb.count()
|
|
142
|
+
kb.clear()
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Full docs: [API.md](API.md)
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
MIT License | **Ragi** = **R**etrieval **A**ugmented **G**eneration **I**nterface
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
ragi/__init__.py,sha256=BKXHlsfnnCP9bIc-WfbztwKAUbQUAI38oOy8iEHMO9k,781
|
|
2
|
+
ragi/async_updater.py,sha256=rjiaMUPSJ0cDzCdKRdifZ1yU6c70D_N0t9zkj281Jws,11218
|
|
3
|
+
ragi/change_detection.py,sha256=m2-bMywK9WDVrjniKJyTrrkUzzsOvGXAWf5ZFIFVLiU,6863
|
|
4
|
+
ragi/chunking.py,sha256=L9tRiLL0Hz10bD4gaLc-eYyviTTfXka2_QdPpZ_FBmQ,4525
|
|
5
|
+
ragi/core.py,sha256=IT558mpiUhE7JpeTczC3f05vtSs8U_MOEk5MPiO1btQ,10194
|
|
6
|
+
ragi/embeddings.py,sha256=tPVB8fc73BWXMvwAbOfOmiyVYrrs84d9lJfeKf5l00M,5094
|
|
7
|
+
ragi/loader.py,sha256=WBiDn6iuJLGHq31O0Q2nuiDNVtc3hzbCOIxwevqOtMo,3978
|
|
8
|
+
ragi/retrieval.py,sha256=uTMqLCZwpjI03_xVq8OIGmtKyrbCc6-sVpoNH2eh8r4,3788
|
|
9
|
+
ragi/store.py,sha256=7mYDnD9DW065nklrUjgngVDeY97rF8G4i-tLfI469wc,5176
|
|
10
|
+
ragi/types.py,sha256=5EY50FJNB7CceGQkpX-5O3vLkWRsElKvpf0oxizxPOs,2142
|
|
11
|
+
piragi-0.1.0.dist-info/METADATA,sha256=NDSacGNkfd4Ms_Yit9CKIGFtWpq_2TGoeM7F3BRrP9M,3747
|
|
12
|
+
piragi-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
13
|
+
piragi-0.1.0.dist-info/licenses/LICENSE,sha256=p8azJxT7o36aVnN6FDXGGTs0JjfOMab1J2REesqgoJM,1074
|
|
14
|
+
piragi-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Ragi Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
ragi/__init__.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Ragi - Zero-setup RAG library with auto-chunking, embeddings, and smart citations.
|
|
3
|
+
|
|
4
|
+
Example:
|
|
5
|
+
>>> from ragi import Ragi
|
|
6
|
+
>>>
|
|
7
|
+
>>> # One-liner setup and query
|
|
8
|
+
>>> kb = Ragi("./docs")
|
|
9
|
+
>>> answer = kb.ask("How do I install this?")
|
|
10
|
+
>>>
|
|
11
|
+
>>> # Access answer and citations
|
|
12
|
+
>>> print(answer.text)
|
|
13
|
+
>>> for citation in answer.citations:
|
|
14
|
+
... print(f"Source: {citation.source}")
|
|
15
|
+
... print(f"Relevance: {citation.score:.2f}")
|
|
16
|
+
>>>
|
|
17
|
+
>>> # Callable shorthand
|
|
18
|
+
>>> answer = kb("What's the API?")
|
|
19
|
+
>>>
|
|
20
|
+
>>> # Filter by metadata
|
|
21
|
+
>>> answer = kb.filter(type="documentation").ask("How to configure?")
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from .core import Ragi
|
|
25
|
+
from .types import Answer, Citation
|
|
26
|
+
|
|
27
|
+
__version__ = "0.1.0"
|
|
28
|
+
__all__ = ["Ragi", "Answer", "Citation"]
|
ragi/async_updater.py
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
"""Async background updater for automatic document refresh."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
|
7
|
+
from queue import Queue, Empty
|
|
8
|
+
import logging
|
|
9
|
+
|
|
10
|
+
from .change_detection import ChangeDetector
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class UpdateTask:
|
|
16
|
+
"""Represents an update task."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, source: str, priority: int = 0):
|
|
19
|
+
self.source = source
|
|
20
|
+
self.priority = priority
|
|
21
|
+
self.created_at = time.time()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AsyncUpdater:
|
|
25
|
+
"""
|
|
26
|
+
Background updater that checks and refreshes sources asynchronously.
|
|
27
|
+
|
|
28
|
+
This prevents queries from being blocked while checking for updates.
|
|
29
|
+
Updates happen in a background thread/task.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
refresh_callback: Callable[[Union[str, List[str]]], None],
|
|
35
|
+
check_interval: float = 300.0,
|
|
36
|
+
max_workers: int = 2,
|
|
37
|
+
):
|
|
38
|
+
"""
|
|
39
|
+
Initialize the async updater.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
refresh_callback: Function to call when refresh is needed (e.g., kb.refresh)
|
|
43
|
+
check_interval: Default interval between checks in seconds
|
|
44
|
+
max_workers: Number of background workers
|
|
45
|
+
"""
|
|
46
|
+
self.refresh_callback = refresh_callback
|
|
47
|
+
self.check_interval = check_interval
|
|
48
|
+
self.max_workers = max_workers
|
|
49
|
+
|
|
50
|
+
# Source metadata tracking
|
|
51
|
+
self.sources_metadata: Dict[str, Dict[str, Any]] = {}
|
|
52
|
+
|
|
53
|
+
# Update queue and control
|
|
54
|
+
self.update_queue: Queue = Queue()
|
|
55
|
+
self.running = False
|
|
56
|
+
self.workers: List[threading.Thread] = []
|
|
57
|
+
self._lock = threading.RLock()
|
|
58
|
+
|
|
59
|
+
# Statistics
|
|
60
|
+
self.stats = {
|
|
61
|
+
"checks_performed": 0,
|
|
62
|
+
"updates_performed": 0,
|
|
63
|
+
"last_check_time": None,
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
def register_source(
|
|
67
|
+
self,
|
|
68
|
+
source: str,
|
|
69
|
+
content: str,
|
|
70
|
+
check_interval: Optional[float] = None,
|
|
71
|
+
) -> None:
|
|
72
|
+
"""
|
|
73
|
+
Register a source for automatic update checking.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
source: File path or URL
|
|
77
|
+
content: Current content
|
|
78
|
+
check_interval: Custom check interval (uses default if None)
|
|
79
|
+
"""
|
|
80
|
+
with self._lock:
|
|
81
|
+
if ChangeDetector.is_url(source):
|
|
82
|
+
metadata = ChangeDetector.get_url_metadata(source, content)
|
|
83
|
+
else:
|
|
84
|
+
metadata = ChangeDetector.get_file_metadata(source, content)
|
|
85
|
+
|
|
86
|
+
if check_interval is not None:
|
|
87
|
+
metadata["check_interval"] = check_interval
|
|
88
|
+
|
|
89
|
+
self.sources_metadata[source] = metadata
|
|
90
|
+
logger.info(f"Registered source for auto-update: {source}")
|
|
91
|
+
|
|
92
|
+
def unregister_source(self, source: str) -> None:
|
|
93
|
+
"""Remove a source from auto-update tracking."""
|
|
94
|
+
with self._lock:
|
|
95
|
+
if source in self.sources_metadata:
|
|
96
|
+
del self.sources_metadata[source]
|
|
97
|
+
logger.info(f"Unregistered source: {source}")
|
|
98
|
+
|
|
99
|
+
def start(self) -> None:
|
|
100
|
+
"""Start background update workers."""
|
|
101
|
+
if self.running:
|
|
102
|
+
logger.warning("Updater already running")
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
self.running = True
|
|
106
|
+
|
|
107
|
+
# Start worker threads
|
|
108
|
+
for i in range(self.max_workers):
|
|
109
|
+
worker = threading.Thread(
|
|
110
|
+
target=self._worker_loop,
|
|
111
|
+
name=f"AsyncUpdater-Worker-{i}",
|
|
112
|
+
daemon=True,
|
|
113
|
+
)
|
|
114
|
+
worker.start()
|
|
115
|
+
self.workers.append(worker)
|
|
116
|
+
|
|
117
|
+
# Start scheduler thread
|
|
118
|
+
scheduler = threading.Thread(
|
|
119
|
+
target=self._scheduler_loop,
|
|
120
|
+
name="AsyncUpdater-Scheduler",
|
|
121
|
+
daemon=True,
|
|
122
|
+
)
|
|
123
|
+
scheduler.start()
|
|
124
|
+
self.workers.append(scheduler)
|
|
125
|
+
|
|
126
|
+
logger.info(f"Started {self.max_workers} update workers + scheduler")
|
|
127
|
+
|
|
128
|
+
def stop(self) -> None:
|
|
129
|
+
"""Stop background workers."""
|
|
130
|
+
self.running = False
|
|
131
|
+
logger.info("Stopping async updater...")
|
|
132
|
+
|
|
133
|
+
# Wait for workers to finish current tasks
|
|
134
|
+
for worker in self.workers:
|
|
135
|
+
if worker.is_alive():
|
|
136
|
+
worker.join(timeout=5.0)
|
|
137
|
+
|
|
138
|
+
self.workers.clear()
|
|
139
|
+
logger.info("Async updater stopped")
|
|
140
|
+
|
|
141
|
+
def _scheduler_loop(self) -> None:
|
|
142
|
+
"""
|
|
143
|
+
Main scheduler loop that checks which sources need updating.
|
|
144
|
+
Runs in background thread.
|
|
145
|
+
"""
|
|
146
|
+
while self.running:
|
|
147
|
+
try:
|
|
148
|
+
current_time = time.time()
|
|
149
|
+
|
|
150
|
+
with self._lock:
|
|
151
|
+
sources_to_check = []
|
|
152
|
+
|
|
153
|
+
for source, metadata in self.sources_metadata.items():
|
|
154
|
+
# Check if enough time has passed
|
|
155
|
+
if ChangeDetector.should_check_now(
|
|
156
|
+
metadata["last_checked"], metadata["check_interval"]
|
|
157
|
+
):
|
|
158
|
+
sources_to_check.append(source)
|
|
159
|
+
|
|
160
|
+
# Queue checks
|
|
161
|
+
for source in sources_to_check:
|
|
162
|
+
self.update_queue.put(UpdateTask(source))
|
|
163
|
+
|
|
164
|
+
self.stats["last_check_time"] = current_time
|
|
165
|
+
|
|
166
|
+
# Sleep until next scheduled check (check every 10s for any due sources)
|
|
167
|
+
time.sleep(10)
|
|
168
|
+
|
|
169
|
+
except Exception as e:
|
|
170
|
+
logger.error(f"Scheduler error: {e}")
|
|
171
|
+
time.sleep(10)
|
|
172
|
+
|
|
173
|
+
def _worker_loop(self) -> None:
|
|
174
|
+
"""
|
|
175
|
+
Worker loop that processes update tasks.
|
|
176
|
+
Runs in background thread.
|
|
177
|
+
"""
|
|
178
|
+
while self.running:
|
|
179
|
+
try:
|
|
180
|
+
# Get task from queue (timeout to allow checking self.running)
|
|
181
|
+
try:
|
|
182
|
+
task = self.update_queue.get(timeout=1.0)
|
|
183
|
+
except Empty:
|
|
184
|
+
continue
|
|
185
|
+
|
|
186
|
+
# Process the update task
|
|
187
|
+
self._check_and_refresh(task.source)
|
|
188
|
+
|
|
189
|
+
self.update_queue.task_done()
|
|
190
|
+
|
|
191
|
+
except Exception as e:
|
|
192
|
+
logger.error(f"Worker error: {e}")
|
|
193
|
+
|
|
194
|
+
def _check_and_refresh(self, source: str) -> None:
|
|
195
|
+
"""
|
|
196
|
+
Check if source changed and refresh if needed.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
source: Source to check
|
|
200
|
+
"""
|
|
201
|
+
try:
|
|
202
|
+
with self._lock:
|
|
203
|
+
if source not in self.sources_metadata:
|
|
204
|
+
return
|
|
205
|
+
|
|
206
|
+
metadata = self.sources_metadata[source]
|
|
207
|
+
|
|
208
|
+
self.stats["checks_performed"] += 1
|
|
209
|
+
|
|
210
|
+
# Check for changes
|
|
211
|
+
changed = False
|
|
212
|
+
new_metadata = {}
|
|
213
|
+
|
|
214
|
+
if ChangeDetector.is_url(source):
|
|
215
|
+
result = ChangeDetector.check_url_changed(
|
|
216
|
+
source,
|
|
217
|
+
metadata.get("etag"),
|
|
218
|
+
metadata.get("last_modified"),
|
|
219
|
+
)
|
|
220
|
+
changed = result.get("changed", False)
|
|
221
|
+
if "etag" in result:
|
|
222
|
+
new_metadata["etag"] = result["etag"]
|
|
223
|
+
if "last_modified" in result:
|
|
224
|
+
new_metadata["last_modified"] = result["last_modified"]
|
|
225
|
+
else:
|
|
226
|
+
changed = ChangeDetector.check_file_changed(
|
|
227
|
+
source, metadata.get("mtime"), metadata["content_hash"]
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# Update last_checked timestamp
|
|
231
|
+
with self._lock:
|
|
232
|
+
self.sources_metadata[source]["last_checked"] = time.time()
|
|
233
|
+
if new_metadata:
|
|
234
|
+
self.sources_metadata[source].update(new_metadata)
|
|
235
|
+
|
|
236
|
+
# Refresh if changed
|
|
237
|
+
if changed:
|
|
238
|
+
logger.info(f"Change detected in {source}, refreshing...")
|
|
239
|
+
self.refresh_callback(source)
|
|
240
|
+
self.stats["updates_performed"] += 1
|
|
241
|
+
|
|
242
|
+
# Update metadata after refresh
|
|
243
|
+
# Note: In production, you'd want to read the new content and hash
|
|
244
|
+
# For now, we just update the timestamp
|
|
245
|
+
|
|
246
|
+
except Exception as e:
|
|
247
|
+
logger.error(f"Error checking source {source}: {e}")
|
|
248
|
+
|
|
249
|
+
def queue_update(self, source: str, priority: int = 0) -> None:
|
|
250
|
+
"""
|
|
251
|
+
Manually queue a source for update check.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
source: Source to check
|
|
255
|
+
priority: Priority (higher = more urgent)
|
|
256
|
+
"""
|
|
257
|
+
self.update_queue.put(UpdateTask(source, priority))
|
|
258
|
+
logger.debug(f"Queued update check for {source}")
|
|
259
|
+
|
|
260
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
261
|
+
"""Get updater statistics."""
|
|
262
|
+
with self._lock:
|
|
263
|
+
return {
|
|
264
|
+
**self.stats,
|
|
265
|
+
"registered_sources": len(self.sources_metadata),
|
|
266
|
+
"queue_size": self.update_queue.qsize(),
|
|
267
|
+
"running": self.running,
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class AsyncUpdaterMixin:
|
|
272
|
+
"""
|
|
273
|
+
Mixin to add async update capabilities to Ragi.
|
|
274
|
+
|
|
275
|
+
Usage:
|
|
276
|
+
class AutoUpdateRagi(AsyncUpdaterMixin, Ragi):
|
|
277
|
+
pass
|
|
278
|
+
|
|
279
|
+
kb = AutoUpdateRagi("./docs", auto_update=True)
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
def __init__(self, *args, auto_update: bool = False, **kwargs):
|
|
283
|
+
"""
|
|
284
|
+
Initialize with optional auto-update.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
auto_update: Enable background auto-updates
|
|
288
|
+
**kwargs: Additional config including:
|
|
289
|
+
- auto_update_interval: Check interval in seconds
|
|
290
|
+
- auto_update_workers: Number of background workers
|
|
291
|
+
"""
|
|
292
|
+
super().__init__(*args, **kwargs)
|
|
293
|
+
|
|
294
|
+
self.auto_update_enabled = auto_update
|
|
295
|
+
self.updater: Optional[AsyncUpdater] = None
|
|
296
|
+
|
|
297
|
+
if auto_update:
|
|
298
|
+
# Extract auto-update config
|
|
299
|
+
config = kwargs.get("config", {})
|
|
300
|
+
auto_config = config.get("auto_update", {})
|
|
301
|
+
|
|
302
|
+
interval = auto_config.get("interval", 300.0)
|
|
303
|
+
workers = auto_config.get("workers", 2)
|
|
304
|
+
|
|
305
|
+
# Initialize updater
|
|
306
|
+
self.updater = AsyncUpdater(
|
|
307
|
+
refresh_callback=self.refresh,
|
|
308
|
+
check_interval=interval,
|
|
309
|
+
max_workers=workers,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
# Start background workers
|
|
313
|
+
self.updater.start()
|
|
314
|
+
|
|
315
|
+
def add(self, sources, **kwargs):
|
|
316
|
+
"""Override add to register sources with auto-updater."""
|
|
317
|
+
result = super().add(sources, **kwargs)
|
|
318
|
+
|
|
319
|
+
# Register sources with updater if auto-update enabled
|
|
320
|
+
if self.auto_update_enabled and self.updater:
|
|
321
|
+
# Note: Would need access to loaded documents to register properly
|
|
322
|
+
# This is a simplified version
|
|
323
|
+
source_list = sources if isinstance(sources, list) else [sources]
|
|
324
|
+
for source in source_list:
|
|
325
|
+
# Register with default metadata
|
|
326
|
+
# In production, you'd track the actual content
|
|
327
|
+
self.updater.register_source(source, "", check_interval=None)
|
|
328
|
+
|
|
329
|
+
return result
|
|
330
|
+
|
|
331
|
+
def get_update_stats(self) -> Dict[str, Any]:
|
|
332
|
+
"""Get auto-update statistics."""
|
|
333
|
+
if self.updater:
|
|
334
|
+
return self.updater.get_stats()
|
|
335
|
+
return {"auto_update_enabled": False}
|
|
336
|
+
|
|
337
|
+
def stop_auto_update(self) -> None:
|
|
338
|
+
"""Stop auto-update background workers."""
|
|
339
|
+
if self.updater:
|
|
340
|
+
self.updater.stop()
|
|
341
|
+
|
|
342
|
+
def __del__(self):
|
|
343
|
+
"""Cleanup on deletion."""
|
|
344
|
+
if hasattr(self, "updater") and self.updater:
|
|
345
|
+
self.updater.stop()
|