dataknobs-bots 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. dataknobs_bots/__init__.py +42 -0
  2. dataknobs_bots/api/__init__.py +42 -0
  3. dataknobs_bots/api/dependencies.py +140 -0
  4. dataknobs_bots/api/exceptions.py +289 -0
  5. dataknobs_bots/bot/__init__.py +15 -0
  6. dataknobs_bots/bot/base.py +1091 -0
  7. dataknobs_bots/bot/context.py +102 -0
  8. dataknobs_bots/bot/manager.py +430 -0
  9. dataknobs_bots/bot/registry.py +629 -0
  10. dataknobs_bots/config/__init__.py +39 -0
  11. dataknobs_bots/config/resolution.py +353 -0
  12. dataknobs_bots/knowledge/__init__.py +82 -0
  13. dataknobs_bots/knowledge/query/__init__.py +25 -0
  14. dataknobs_bots/knowledge/query/expander.py +262 -0
  15. dataknobs_bots/knowledge/query/transformer.py +288 -0
  16. dataknobs_bots/knowledge/rag.py +738 -0
  17. dataknobs_bots/knowledge/retrieval/__init__.py +23 -0
  18. dataknobs_bots/knowledge/retrieval/formatter.py +249 -0
  19. dataknobs_bots/knowledge/retrieval/merger.py +279 -0
  20. dataknobs_bots/memory/__init__.py +56 -0
  21. dataknobs_bots/memory/base.py +38 -0
  22. dataknobs_bots/memory/buffer.py +58 -0
  23. dataknobs_bots/memory/vector.py +188 -0
  24. dataknobs_bots/middleware/__init__.py +11 -0
  25. dataknobs_bots/middleware/base.py +92 -0
  26. dataknobs_bots/middleware/cost.py +421 -0
  27. dataknobs_bots/middleware/logging.py +184 -0
  28. dataknobs_bots/reasoning/__init__.py +65 -0
  29. dataknobs_bots/reasoning/base.py +50 -0
  30. dataknobs_bots/reasoning/react.py +299 -0
  31. dataknobs_bots/reasoning/simple.py +51 -0
  32. dataknobs_bots/registry/__init__.py +41 -0
  33. dataknobs_bots/registry/backend.py +181 -0
  34. dataknobs_bots/registry/memory.py +244 -0
  35. dataknobs_bots/registry/models.py +102 -0
  36. dataknobs_bots/registry/portability.py +210 -0
  37. dataknobs_bots/tools/__init__.py +5 -0
  38. dataknobs_bots/tools/knowledge_search.py +113 -0
  39. dataknobs_bots/utils/__init__.py +1 -0
  40. dataknobs_bots-0.2.4.dist-info/METADATA +591 -0
  41. dataknobs_bots-0.2.4.dist-info/RECORD +42 -0
  42. dataknobs_bots-0.2.4.dist-info/WHEEL +4 -0
@@ -0,0 +1,353 @@
1
+ """Resource resolution utilities for DynaBot configuration.
2
+
3
+ This module provides utilities to create a ConfigBindingResolver with
4
+ DynaBot-specific factories registered, enabling direct resource instantiation
5
+ from logical names.
6
+
7
+ Example:
8
+ ```python
9
+ from dataknobs_config import EnvironmentConfig
10
+ from dataknobs_bots.config import create_bot_resolver
11
+
12
+ # Load environment
13
+ env = EnvironmentConfig.load("production")
14
+
15
+ # Create resolver with all DynaBot factories registered
16
+ resolver = create_bot_resolver(env)
17
+
18
+ # Resolve resources by logical name
19
+ llm = resolver.resolve("llm_providers", "default")
20
+ db = await resolver.resolve_async("databases", "conversations")
21
+ vector_store = resolver.resolve("vector_stores", "knowledge")
22
+ embedding = resolver.resolve("embedding_providers", "default")
23
+ ```
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import logging
29
+ from typing import TYPE_CHECKING, Any
30
+
31
+ if TYPE_CHECKING:
32
+ from dataknobs_config import ConfigBindingResolver, EnvironmentConfig
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ def create_bot_resolver(
38
+ environment: EnvironmentConfig,
39
+ resolve_env_vars: bool = True,
40
+ register_defaults: bool = True,
41
+ ) -> ConfigBindingResolver:
42
+ """Create a ConfigBindingResolver with DynaBot-specific factories.
43
+
44
+ This resolver can instantiate resources directly from logical names
45
+ defined in environment configuration. It registers factories for:
46
+
47
+ - **llm_providers**: LLM providers (OpenAI, Anthropic, Ollama, etc.)
48
+ - **databases**: Database backends (memory, sqlite, postgres, etc.)
49
+ - **vector_stores**: Vector store backends (FAISS, Chroma, memory, etc.)
50
+ - **embedding_providers**: Embedding providers (uses LLM providers with embed())
51
+
52
+ Args:
53
+ environment: Environment configuration for resource lookup
54
+ resolve_env_vars: Whether to resolve environment variables in configs
55
+ register_defaults: If True, register all default DynaBot factories.
56
+ Set to False to manually register only needed factories.
57
+
58
+ Returns:
59
+ ConfigBindingResolver with registered factories
60
+
61
+ Example:
62
+ ```python
63
+ from dataknobs_config import EnvironmentConfig
64
+ from dataknobs_bots.config import create_bot_resolver
65
+
66
+ # Auto-detect environment from DATAKNOBS_ENVIRONMENT
67
+ env = EnvironmentConfig.load()
68
+ resolver = create_bot_resolver(env)
69
+
70
+ # Resolve an LLM provider
71
+ llm = resolver.resolve("llm_providers", "default")
72
+ await llm.initialize()
73
+
74
+ # Resolve a database asynchronously
75
+ db = await resolver.resolve_async("databases", "conversations")
76
+
77
+ # Resolve a vector store
78
+ vs = resolver.resolve("vector_stores", "knowledge")
79
+ await vs.initialize()
80
+ ```
81
+
82
+ Note:
83
+ The resolver caches created instances by default. Use
84
+ `resolver.resolve(..., use_cache=False)` to create fresh instances.
85
+ """
86
+ from dataknobs_config import ConfigBindingResolver
87
+
88
+ resolver = ConfigBindingResolver(environment, resolve_env_vars=resolve_env_vars)
89
+
90
+ if register_defaults:
91
+ register_llm_factory(resolver)
92
+ register_database_factory(resolver)
93
+ register_vector_store_factory(resolver)
94
+ register_embedding_factory(resolver)
95
+ logger.debug("Registered all DynaBot resource factories")
96
+
97
+ return resolver
98
+
99
+
100
+ def register_llm_factory(resolver: ConfigBindingResolver) -> None:
101
+ """Register LLM provider factory with the resolver.
102
+
103
+ Args:
104
+ resolver: ConfigBindingResolver to register with
105
+
106
+ Example:
107
+ ```python
108
+ resolver = ConfigBindingResolver(env)
109
+ register_llm_factory(resolver)
110
+
111
+ # Now can resolve LLM providers
112
+ llm = resolver.resolve("llm_providers", "default")
113
+ ```
114
+ """
115
+ from dataknobs_llm.llm import LLMProviderFactory
116
+
117
+ factory = LLMProviderFactory(is_async=True)
118
+ resolver.register_factory("llm_providers", factory)
119
+ logger.debug("Registered LLM provider factory")
120
+
121
+
122
+ def register_database_factory(resolver: ConfigBindingResolver) -> None:
123
+ """Register async database factory with the resolver.
124
+
125
+ Args:
126
+ resolver: ConfigBindingResolver to register with
127
+
128
+ Example:
129
+ ```python
130
+ resolver = ConfigBindingResolver(env)
131
+ register_database_factory(resolver)
132
+
133
+ # Now can resolve databases
134
+ db = await resolver.resolve_async("databases", "conversations")
135
+ ```
136
+ """
137
+ from dataknobs_data.factory import AsyncDatabaseFactory
138
+
139
+ factory = AsyncDatabaseFactory()
140
+ resolver.register_factory("databases", factory)
141
+ logger.debug("Registered database factory")
142
+
143
+
144
+ def register_vector_store_factory(resolver: ConfigBindingResolver) -> None:
145
+ """Register vector store factory with the resolver.
146
+
147
+ Args:
148
+ resolver: ConfigBindingResolver to register with
149
+
150
+ Example:
151
+ ```python
152
+ resolver = ConfigBindingResolver(env)
153
+ register_vector_store_factory(resolver)
154
+
155
+ # Now can resolve vector stores
156
+ vs = resolver.resolve("vector_stores", "knowledge")
157
+ await vs.initialize()
158
+ ```
159
+ """
160
+ from dataknobs_data.vector.stores import VectorStoreFactory
161
+
162
+ factory = VectorStoreFactory()
163
+ resolver.register_factory("vector_stores", factory)
164
+ logger.debug("Registered vector store factory")
165
+
166
+
167
+ def register_embedding_factory(resolver: ConfigBindingResolver) -> None:
168
+ """Register embedding provider factory with the resolver.
169
+
170
+ Embedding providers use the LLM provider factory since most LLM
171
+ providers (OpenAI, Ollama, etc.) support embedding via their
172
+ embed() method.
173
+
174
+ Args:
175
+ resolver: ConfigBindingResolver to register with
176
+
177
+ Example:
178
+ ```python
179
+ resolver = ConfigBindingResolver(env)
180
+ register_embedding_factory(resolver)
181
+
182
+ # Now can resolve embedding providers
183
+ embedder = resolver.resolve("embedding_providers", "default")
184
+ await embedder.initialize()
185
+ embedding = await embedder.embed("Hello world")
186
+ ```
187
+
188
+ Note:
189
+ The resolved provider should have an `embed()` method. Standard
190
+ LLM providers like OpenAI, Anthropic, and Ollama support this.
191
+ """
192
+ from dataknobs_llm.llm import LLMProviderFactory
193
+
194
+ factory = LLMProviderFactory(is_async=True)
195
+ resolver.register_factory("embedding_providers", factory)
196
+ logger.debug("Registered embedding provider factory")
197
+
198
+
199
+ class BotResourceResolver:
200
+ """High-level resource resolver for DynaBot.
201
+
202
+ Provides convenient async methods for resolving and initializing
203
+ DynaBot resources. Wraps ConfigBindingResolver with DynaBot-specific
204
+ initialization logic.
205
+
206
+ Example:
207
+ ```python
208
+ from dataknobs_config import EnvironmentConfig
209
+ from dataknobs_bots.config import BotResourceResolver
210
+
211
+ env = EnvironmentConfig.load("production")
212
+ resolver = BotResourceResolver(env)
213
+
214
+ # Get initialized LLM provider
215
+ llm = await resolver.get_llm("default")
216
+
217
+ # Get initialized database
218
+ db = await resolver.get_database("conversations")
219
+
220
+ # Get initialized vector store
221
+ vs = await resolver.get_vector_store("knowledge")
222
+ ```
223
+ """
224
+
225
+ def __init__(
226
+ self,
227
+ environment: EnvironmentConfig,
228
+ resolve_env_vars: bool = True,
229
+ ):
230
+ """Initialize the resource resolver.
231
+
232
+ Args:
233
+ environment: Environment configuration
234
+ resolve_env_vars: Whether to resolve env vars in configs
235
+ """
236
+ self._resolver = create_bot_resolver(
237
+ environment,
238
+ resolve_env_vars=resolve_env_vars,
239
+ )
240
+ self._environment = environment
241
+
242
+ @property
243
+ def environment(self) -> EnvironmentConfig:
244
+ """Get the environment configuration."""
245
+ return self._environment
246
+
247
+ @property
248
+ def resolver(self) -> ConfigBindingResolver:
249
+ """Get the underlying ConfigBindingResolver."""
250
+ return self._resolver
251
+
252
+ async def get_llm(
253
+ self,
254
+ name: str = "default",
255
+ use_cache: bool = True,
256
+ **overrides: Any,
257
+ ) -> Any:
258
+ """Get an initialized LLM provider.
259
+
260
+ Args:
261
+ name: Logical name of the LLM provider
262
+ use_cache: Whether to return cached instance
263
+ **overrides: Config overrides for this resolution
264
+
265
+ Returns:
266
+ Initialized AsyncLLMProvider instance
267
+ """
268
+ llm = self._resolver.resolve(
269
+ "llm_providers", name, use_cache=use_cache, **overrides
270
+ )
271
+ await llm.initialize()
272
+ return llm
273
+
274
+ async def get_database(
275
+ self,
276
+ name: str = "default",
277
+ use_cache: bool = True,
278
+ **overrides: Any,
279
+ ) -> Any:
280
+ """Get an initialized database backend.
281
+
282
+ Args:
283
+ name: Logical name of the database
284
+ use_cache: Whether to return cached instance
285
+ **overrides: Config overrides for this resolution
286
+
287
+ Returns:
288
+ Initialized database backend instance
289
+ """
290
+ db = self._resolver.resolve(
291
+ "databases", name, use_cache=use_cache, **overrides
292
+ )
293
+ if hasattr(db, "connect"):
294
+ await db.connect()
295
+ return db
296
+
297
+ async def get_vector_store(
298
+ self,
299
+ name: str = "default",
300
+ use_cache: bool = True,
301
+ **overrides: Any,
302
+ ) -> Any:
303
+ """Get an initialized vector store.
304
+
305
+ Args:
306
+ name: Logical name of the vector store
307
+ use_cache: Whether to return cached instance
308
+ **overrides: Config overrides for this resolution
309
+
310
+ Returns:
311
+ Initialized VectorStore instance
312
+ """
313
+ vs = self._resolver.resolve(
314
+ "vector_stores", name, use_cache=use_cache, **overrides
315
+ )
316
+ if hasattr(vs, "initialize"):
317
+ await vs.initialize()
318
+ return vs
319
+
320
+ async def get_embedding_provider(
321
+ self,
322
+ name: str = "default",
323
+ use_cache: bool = True,
324
+ **overrides: Any,
325
+ ) -> Any:
326
+ """Get an initialized embedding provider.
327
+
328
+ Args:
329
+ name: Logical name of the embedding provider
330
+ use_cache: Whether to return cached instance
331
+ **overrides: Config overrides for this resolution
332
+
333
+ Returns:
334
+ Initialized provider with embed() method
335
+ """
336
+ provider = self._resolver.resolve(
337
+ "embedding_providers", name, use_cache=use_cache, **overrides
338
+ )
339
+ await provider.initialize()
340
+ return provider
341
+
342
+ def clear_cache(self, resource_type: str | None = None) -> None:
343
+ """Clear cached resource instances.
344
+
345
+ Args:
346
+ resource_type: Specific type to clear, or None for all
347
+ """
348
+ self._resolver.clear_cache(resource_type)
349
+
350
+ def __repr__(self) -> str:
351
+ """String representation."""
352
+ types = self._resolver.get_registered_types()
353
+ return f"BotResourceResolver(environment={self._environment.name!r}, types={types})"
@@ -0,0 +1,82 @@
1
+ """Knowledge base implementations for DynaBot."""
2
+
3
+ from typing import Any
4
+
5
+ from .rag import RAGKnowledgeBase
6
+ from .retrieval import (
7
+ ChunkMerger,
8
+ ContextFormatter,
9
+ FormatterConfig,
10
+ MergedChunk,
11
+ MergerConfig,
12
+ )
13
+ from .query import (
14
+ ContextualExpander,
15
+ Message,
16
+ QueryTransformer,
17
+ TransformerConfig,
18
+ create_transformer,
19
+ is_ambiguous_query,
20
+ )
21
+
22
+ __all__ = [
23
+ # Main knowledge base
24
+ "RAGKnowledgeBase",
25
+ "create_knowledge_base_from_config",
26
+ # Retrieval utilities
27
+ "ChunkMerger",
28
+ "MergedChunk",
29
+ "MergerConfig",
30
+ "ContextFormatter",
31
+ "FormatterConfig",
32
+ # Query utilities
33
+ "QueryTransformer",
34
+ "TransformerConfig",
35
+ "create_transformer",
36
+ "ContextualExpander",
37
+ "Message",
38
+ "is_ambiguous_query",
39
+ ]
40
+
41
+
42
+ async def create_knowledge_base_from_config(config: dict[str, Any]) -> RAGKnowledgeBase:
43
+ """Create knowledge base from configuration.
44
+
45
+ Args:
46
+ config: Knowledge base configuration with:
47
+ - type: Type of knowledge base (currently only 'rag' supported)
48
+ - vector_store: Vector store configuration
49
+ - embedding_provider: LLM provider for embeddings
50
+ - embedding_model: Model to use for embeddings
51
+ - chunking: Optional chunking configuration
52
+ - documents_path: Optional path to load documents
53
+ - document_pattern: Optional file pattern
54
+
55
+ Returns:
56
+ Configured knowledge base instance
57
+
58
+ Raises:
59
+ ValueError: If knowledge base type is not supported
60
+
61
+ Example:
62
+ ```python
63
+ config = {
64
+ "type": "rag",
65
+ "vector_store": {
66
+ "backend": "memory",
67
+ "dimensions": 384
68
+ },
69
+ "embedding_provider": "echo",
70
+ "embedding_model": "test"
71
+ }
72
+ kb = await create_knowledge_base_from_config(config)
73
+ ```
74
+ """
75
+ kb_type = config.get("type", "rag").lower()
76
+
77
+ if kb_type == "rag":
78
+ return await RAGKnowledgeBase.from_config(config)
79
+ else:
80
+ raise ValueError(
81
+ f"Unknown knowledge base type: {kb_type}. " f"Available types: rag"
82
+ )
@@ -0,0 +1,25 @@
1
+ """Query transformation utilities for RAG knowledge bases.
2
+
3
+ This module provides query preprocessing to improve retrieval quality
4
+ by transforming user input into optimized search queries.
5
+ """
6
+
7
+ from dataknobs_bots.knowledge.query.expander import (
8
+ ContextualExpander,
9
+ Message,
10
+ is_ambiguous_query,
11
+ )
12
+ from dataknobs_bots.knowledge.query.transformer import (
13
+ QueryTransformer,
14
+ TransformerConfig,
15
+ create_transformer,
16
+ )
17
+
18
+ __all__ = [
19
+ "QueryTransformer",
20
+ "TransformerConfig",
21
+ "create_transformer",
22
+ "ContextualExpander",
23
+ "Message",
24
+ "is_ambiguous_query",
25
+ ]