agent-runtime-core 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. agent_runtime_core/__init__.py +108 -1
  2. agent_runtime_core/agentic_loop.py +254 -0
  3. agent_runtime_core/config.py +54 -4
  4. agent_runtime_core/config_schema.py +307 -0
  5. agent_runtime_core/interfaces.py +106 -0
  6. agent_runtime_core/json_runtime.py +509 -0
  7. agent_runtime_core/llm/__init__.py +80 -7
  8. agent_runtime_core/llm/anthropic.py +133 -12
  9. agent_runtime_core/llm/models_config.py +180 -0
  10. agent_runtime_core/memory/__init__.py +70 -0
  11. agent_runtime_core/memory/manager.py +554 -0
  12. agent_runtime_core/memory/mixin.py +294 -0
  13. agent_runtime_core/multi_agent.py +569 -0
  14. agent_runtime_core/persistence/__init__.py +2 -0
  15. agent_runtime_core/persistence/file.py +277 -0
  16. agent_runtime_core/rag/__init__.py +65 -0
  17. agent_runtime_core/rag/chunking.py +224 -0
  18. agent_runtime_core/rag/indexer.py +253 -0
  19. agent_runtime_core/rag/retriever.py +261 -0
  20. agent_runtime_core/runner.py +193 -15
  21. agent_runtime_core/tool_calling_agent.py +88 -130
  22. agent_runtime_core/tools.py +179 -0
  23. agent_runtime_core/vectorstore/__init__.py +193 -0
  24. agent_runtime_core/vectorstore/base.py +138 -0
  25. agent_runtime_core/vectorstore/embeddings.py +242 -0
  26. agent_runtime_core/vectorstore/sqlite_vec.py +328 -0
  27. agent_runtime_core/vectorstore/vertex.py +295 -0
  28. {agent_runtime_core-0.7.0.dist-info → agent_runtime_core-0.7.1.dist-info}/METADATA +202 -1
  29. agent_runtime_core-0.7.1.dist-info/RECORD +57 -0
  30. agent_runtime_core-0.7.0.dist-info/RECORD +0 -39
  31. {agent_runtime_core-0.7.0.dist-info → agent_runtime_core-0.7.1.dist-info}/WHEEL +0 -0
  32. {agent_runtime_core-0.7.0.dist-info → agent_runtime_core-0.7.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,253 @@
1
+ """
2
+ Portable knowledge indexing service for RAG.
3
+
4
+ Handles chunking, embedding, and storing knowledge in vector stores.
5
+ This module has no Django dependencies and can be used standalone.
6
+ """
7
+
8
+ import hashlib
9
+ import logging
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime
12
+ from typing import Optional, Protocol
13
+ from uuid import uuid4
14
+
15
+ from agent_runtime_core.rag.chunking import chunk_text, ChunkingConfig, TextChunk
16
+ from agent_runtime_core.vectorstore.base import VectorStore
17
+ from agent_runtime_core.vectorstore.embeddings import EmbeddingClient
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @dataclass
23
+ class IndexedDocument:
24
+ """Represents an indexed document with its metadata."""
25
+
26
+ source_id: str
27
+ """Unique identifier for the source document."""
28
+
29
+ name: str = ""
30
+ """Human-readable name for the document."""
31
+
32
+ content_hash: str = ""
33
+ """Hash of the content for change detection."""
34
+
35
+ chunk_count: int = 0
36
+ """Number of chunks created from this document."""
37
+
38
+ indexed_at: Optional[datetime] = None
39
+ """When the document was last indexed."""
40
+
41
+ metadata: dict = field(default_factory=dict)
42
+ """Additional metadata about the document."""
43
+
44
+
45
+ @dataclass
46
+ class IndexingResult:
47
+ """Result of an indexing operation."""
48
+
49
+ status: str
50
+ """Status: 'success', 'skipped', or 'error'."""
51
+
52
+ source_id: str
53
+ """ID of the indexed source."""
54
+
55
+ chunks_indexed: int = 0
56
+ """Number of chunks that were indexed."""
57
+
58
+ message: str = ""
59
+ """Optional message with details."""
60
+
61
+ error: Optional[str] = None
62
+ """Error message if status is 'error'."""
63
+
64
+
65
+ def _compute_content_hash(content: str) -> str:
66
+ """Compute a hash of the content for change detection."""
67
+ return hashlib.sha256(content.encode()).hexdigest()[:16]
68
+
69
+
70
+ class KnowledgeIndexer:
71
+ """
72
+ Portable service to index knowledge sources for RAG retrieval.
73
+
74
+ Handles:
75
+ - Chunking text into appropriate sizes
76
+ - Generating embeddings via configured provider
77
+ - Storing vectors in the configured vector store
78
+
79
+ This class has no Django dependencies and can be used in standalone
80
+ Python scripts or any other context.
81
+
82
+ Usage:
83
+ from agent_runtime_core.vectorstore import get_vector_store, get_embedding_client
84
+ from agent_runtime_core.rag import KnowledgeIndexer
85
+
86
+ vector_store = get_vector_store("sqlite_vec", path="./vectors.db")
87
+ embedding_client = get_embedding_client("openai")
88
+
89
+ indexer = KnowledgeIndexer(vector_store, embedding_client)
90
+ result = await indexer.index_text(
91
+ text="Your document content...",
92
+ source_id="doc-1",
93
+ metadata={"name": "My Document"},
94
+ )
95
+ """
96
+
97
+ def __init__(
98
+ self,
99
+ vector_store: VectorStore,
100
+ embedding_client: EmbeddingClient,
101
+ default_chunking_config: Optional[ChunkingConfig] = None,
102
+ ):
103
+ """
104
+ Initialize the indexer.
105
+
106
+ Args:
107
+ vector_store: VectorStore instance for storing embeddings
108
+ embedding_client: EmbeddingClient instance for generating embeddings
109
+ default_chunking_config: Default chunking configuration
110
+ """
111
+ self._vector_store = vector_store
112
+ self._embedding_client = embedding_client
113
+ self._default_chunking_config = default_chunking_config or ChunkingConfig()
114
+
115
+ async def index_text(
116
+ self,
117
+ text: str,
118
+ source_id: str,
119
+ metadata: Optional[dict] = None,
120
+ chunking_config: Optional[ChunkingConfig] = None,
121
+ force: bool = False,
122
+ content_hash: Optional[str] = None,
123
+ ) -> IndexingResult:
124
+ """
125
+ Index text content for RAG retrieval.
126
+
127
+ Args:
128
+ text: The text content to index
129
+ source_id: Unique identifier for this content source
130
+ metadata: Optional metadata to store with each chunk
131
+ chunking_config: Optional chunking configuration override
132
+ force: If True, re-index even if content hasn't changed
133
+ content_hash: Optional pre-computed content hash for change detection
134
+
135
+ Returns:
136
+ IndexingResult with status and details
137
+ """
138
+ if metadata is None:
139
+ metadata = {}
140
+
141
+ if chunking_config is None:
142
+ chunking_config = self._default_chunking_config
143
+
144
+ # Compute content hash for change detection
145
+ computed_hash = content_hash or _compute_content_hash(text)
146
+
147
+ try:
148
+ if not text or not text.strip():
149
+ return IndexingResult(
150
+ status='error',
151
+ source_id=source_id,
152
+ error='No content to index',
153
+ )
154
+
155
+ # Chunk the content
156
+ chunks = chunk_text(
157
+ text,
158
+ config=chunking_config,
159
+ metadata={
160
+ 'source_id': source_id,
161
+ **metadata,
162
+ },
163
+ )
164
+
165
+ if not chunks:
166
+ return IndexingResult(
167
+ status='error',
168
+ source_id=source_id,
169
+ error='Content produced no chunks',
170
+ )
171
+
172
+ # Delete existing vectors for this source
173
+ await self._delete_existing_vectors(source_id)
174
+
175
+ # Generate embeddings and store
176
+ chunk_ids = await self._embed_and_store_chunks(source_id, chunks, metadata)
177
+
178
+ return IndexingResult(
179
+ status='success',
180
+ source_id=source_id,
181
+ chunks_indexed=len(chunk_ids),
182
+ )
183
+
184
+ except Exception as e:
185
+ logger.exception(f"Error indexing source {source_id}")
186
+ return IndexingResult(
187
+ status='error',
188
+ source_id=source_id,
189
+ error=str(e),
190
+ )
191
+
192
+ async def _delete_existing_vectors(self, source_id: str) -> int:
193
+ """Delete existing vectors for a source."""
194
+ try:
195
+ deleted = await self._vector_store.delete_by_filter({
196
+ 'source_id': str(source_id),
197
+ })
198
+ logger.debug(f"Deleted {deleted} existing vectors for source {source_id}")
199
+ return deleted
200
+ except Exception as e:
201
+ logger.warning(f"Error deleting existing vectors: {e}")
202
+ return 0
203
+
204
+ async def _embed_and_store_chunks(
205
+ self,
206
+ source_id: str,
207
+ chunks: list[TextChunk],
208
+ metadata: dict,
209
+ ) -> list[str]:
210
+ """Generate embeddings and store chunks in vector store."""
211
+ chunk_ids = []
212
+
213
+ # Batch embed for efficiency
214
+ texts = [chunk.text for chunk in chunks]
215
+ embeddings = await self._embedding_client.embed_batch(texts)
216
+
217
+ # Store each chunk
218
+ items = []
219
+ for chunk, embedding in zip(chunks, embeddings):
220
+ chunk_id = f"{source_id}_{chunk.index}"
221
+ items.append((
222
+ chunk_id,
223
+ embedding,
224
+ chunk.text,
225
+ {
226
+ 'source_id': str(source_id),
227
+ 'chunk_index': chunk.index,
228
+ 'total_chunks': len(chunks),
229
+ **metadata,
230
+ },
231
+ ))
232
+ chunk_ids.append(chunk_id)
233
+
234
+ await self._vector_store.add_batch(items)
235
+ return chunk_ids
236
+
237
+ async def delete_source(self, source_id: str) -> int:
238
+ """
239
+ Delete all vectors for a source.
240
+
241
+ Args:
242
+ source_id: The source ID to delete vectors for
243
+
244
+ Returns:
245
+ Number of vectors deleted
246
+ """
247
+ return await self._delete_existing_vectors(source_id)
248
+
249
+ async def close(self) -> None:
250
+ """Close the indexer and release resources."""
251
+ await self._vector_store.close()
252
+ await self._embedding_client.close()
253
+
@@ -0,0 +1,261 @@
1
+ """
2
+ Portable knowledge retrieval service for RAG.
3
+
4
+ Retrieves relevant knowledge from vector stores based on query similarity.
5
+ This module has no Django dependencies and can be used standalone.
6
+ """
7
+
8
+ import logging
9
+ from dataclasses import dataclass, field
10
+ from typing import Optional
11
+
12
+ from agent_runtime_core.vectorstore.base import VectorStore, VectorSearchResult
13
+ from agent_runtime_core.vectorstore.embeddings import EmbeddingClient
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @dataclass
19
+ class RetrievalConfig:
20
+ """Configuration for knowledge retrieval."""
21
+
22
+ top_k: int = 5
23
+ """Maximum number of chunks to retrieve."""
24
+
25
+ similarity_threshold: float = 0.0
26
+ """Minimum similarity score (0-1) to include in results."""
27
+
28
+ include_metadata: bool = True
29
+ """Whether to include metadata in results."""
30
+
31
+
32
+ @dataclass
33
+ class RetrievedChunk:
34
+ """A retrieved chunk with its metadata and score."""
35
+
36
+ content: str
37
+ """The chunk text content."""
38
+
39
+ score: float
40
+ """Similarity score (higher = more similar)."""
41
+
42
+ source_id: Optional[str] = None
43
+ """ID of the source document."""
44
+
45
+ source_name: Optional[str] = None
46
+ """Human-readable name of the source."""
47
+
48
+ chunk_index: Optional[int] = None
49
+ """Index of this chunk within the source."""
50
+
51
+ metadata: dict = field(default_factory=dict)
52
+ """Additional metadata."""
53
+
54
+
55
+ class KnowledgeRetriever:
56
+ """
57
+ Portable service to retrieve relevant knowledge for RAG at runtime.
58
+
59
+ Handles:
60
+ - Embedding user queries
61
+ - Searching vector store for similar content
62
+ - Filtering by source and similarity threshold
63
+ - Formatting retrieved content for inclusion in prompts
64
+
65
+ This class has no Django dependencies and can be used in standalone
66
+ Python scripts or any other context.
67
+
68
+ Usage:
69
+ from agent_runtime_core.vectorstore import get_vector_store, get_embedding_client
70
+ from agent_runtime_core.rag import KnowledgeRetriever
71
+
72
+ vector_store = get_vector_store("sqlite_vec", path="./vectors.db")
73
+ embedding_client = get_embedding_client("openai")
74
+
75
+ retriever = KnowledgeRetriever(vector_store, embedding_client)
76
+ results = await retriever.retrieve(
77
+ query="What is the return policy?",
78
+ top_k=5,
79
+ )
80
+ """
81
+
82
+ def __init__(
83
+ self,
84
+ vector_store: VectorStore,
85
+ embedding_client: EmbeddingClient,
86
+ default_config: Optional[RetrievalConfig] = None,
87
+ ):
88
+ """
89
+ Initialize the retriever.
90
+
91
+ Args:
92
+ vector_store: VectorStore instance for searching embeddings
93
+ embedding_client: EmbeddingClient instance for embedding queries
94
+ default_config: Default retrieval configuration
95
+ """
96
+ self._vector_store = vector_store
97
+ self._embedding_client = embedding_client
98
+ self._default_config = default_config or RetrievalConfig()
99
+
100
+ async def retrieve(
101
+ self,
102
+ query: str,
103
+ top_k: Optional[int] = None,
104
+ similarity_threshold: Optional[float] = None,
105
+ filter: Optional[dict] = None,
106
+ ) -> list[RetrievedChunk]:
107
+ """
108
+ Retrieve relevant knowledge chunks for a query.
109
+
110
+ Args:
111
+ query: The user's query to find relevant content for
112
+ top_k: Maximum number of chunks to retrieve (overrides default)
113
+ similarity_threshold: Minimum similarity score (overrides default)
114
+ filter: Optional metadata filter for the search
115
+
116
+ Returns:
117
+ List of RetrievedChunk objects ordered by relevance
118
+ """
119
+ if top_k is None:
120
+ top_k = self._default_config.top_k
121
+ if similarity_threshold is None:
122
+ similarity_threshold = self._default_config.similarity_threshold
123
+
124
+ # Embed the query
125
+ query_vector = await self._embedding_client.embed(query)
126
+
127
+ # Search vector store
128
+ results = await self._vector_store.search(
129
+ query_vector=query_vector,
130
+ limit=top_k,
131
+ filter=filter,
132
+ )
133
+
134
+ # Filter by similarity threshold and convert to RetrievedChunk
135
+ retrieved = []
136
+ for result in results:
137
+ if result.score >= similarity_threshold:
138
+ retrieved.append(RetrievedChunk(
139
+ content=result.content,
140
+ score=result.score,
141
+ source_id=result.metadata.get('source_id'),
142
+ source_name=result.metadata.get('name') or result.metadata.get('source_name'),
143
+ chunk_index=result.metadata.get('chunk_index'),
144
+ metadata=result.metadata if self._default_config.include_metadata else {},
145
+ ))
146
+
147
+ return retrieved
148
+
149
+ async def retrieve_for_sources(
150
+ self,
151
+ query: str,
152
+ source_ids: list[str],
153
+ top_k: Optional[int] = None,
154
+ similarity_threshold: Optional[float] = None,
155
+ ) -> list[RetrievedChunk]:
156
+ """
157
+ Retrieve relevant chunks from specific sources.
158
+
159
+ Args:
160
+ query: The user's query
161
+ source_ids: List of source IDs to search within
162
+ top_k: Maximum number of chunks to retrieve
163
+ similarity_threshold: Minimum similarity score
164
+
165
+ Returns:
166
+ List of RetrievedChunk objects
167
+ """
168
+ # For now, we search all and filter
169
+ # TODO: Support OR filters in vector stores for efficiency
170
+ all_results = []
171
+
172
+ for source_id in source_ids:
173
+ results = await self.retrieve(
174
+ query=query,
175
+ top_k=top_k,
176
+ similarity_threshold=similarity_threshold,
177
+ filter={'source_id': source_id},
178
+ )
179
+ all_results.extend(results)
180
+
181
+ # Sort by score and limit
182
+ all_results.sort(key=lambda x: x.score, reverse=True)
183
+ if top_k:
184
+ all_results = all_results[:top_k]
185
+
186
+ return all_results
187
+
188
+ async def retrieve_formatted(
189
+ self,
190
+ query: str,
191
+ top_k: Optional[int] = None,
192
+ similarity_threshold: Optional[float] = None,
193
+ filter: Optional[dict] = None,
194
+ header: str = "## Relevant Knowledge\n",
195
+ ) -> str:
196
+ """
197
+ Retrieve and format knowledge for inclusion in a prompt.
198
+
199
+ Args:
200
+ query: The user's query
201
+ top_k: Maximum number of chunks to retrieve
202
+ similarity_threshold: Minimum similarity score
203
+ filter: Optional metadata filter
204
+ header: Header text for the formatted output
205
+
206
+ Returns:
207
+ Formatted string of retrieved knowledge for prompt inclusion
208
+ """
209
+ results = await self.retrieve(
210
+ query=query,
211
+ top_k=top_k,
212
+ similarity_threshold=similarity_threshold,
213
+ filter=filter,
214
+ )
215
+
216
+ if not results:
217
+ return ""
218
+
219
+ return self.format_results(results, header=header)
220
+
221
+ def format_results(
222
+ self,
223
+ results: list[RetrievedChunk],
224
+ header: str = "## Relevant Knowledge\n",
225
+ ) -> str:
226
+ """
227
+ Format retrieved results for inclusion in a prompt.
228
+
229
+ Args:
230
+ results: List of RetrievedChunk objects
231
+ header: Header text for the formatted output
232
+
233
+ Returns:
234
+ Formatted string
235
+ """
236
+ if not results:
237
+ return ""
238
+
239
+ parts = [header]
240
+ parts.append("The following information may be relevant to the user's question:\n")
241
+
242
+ # Group by source
243
+ by_source = {}
244
+ for r in results:
245
+ source = r.source_name or r.source_id or 'Unknown'
246
+ if source not in by_source:
247
+ by_source[source] = []
248
+ by_source[source].append(r)
249
+
250
+ for source, chunks in by_source.items():
251
+ parts.append(f"\n### {source}\n")
252
+ for chunk in chunks:
253
+ parts.append(f"{chunk.content}\n")
254
+
255
+ return "\n".join(parts)
256
+
257
+ async def close(self) -> None:
258
+ """Close the retriever and release resources."""
259
+ await self._vector_store.close()
260
+ await self._embedding_client.close()
261
+