dataknobs-bots 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. dataknobs_bots/__init__.py +42 -0
  2. dataknobs_bots/api/__init__.py +42 -0
  3. dataknobs_bots/api/dependencies.py +140 -0
  4. dataknobs_bots/api/exceptions.py +289 -0
  5. dataknobs_bots/bot/__init__.py +15 -0
  6. dataknobs_bots/bot/base.py +1091 -0
  7. dataknobs_bots/bot/context.py +102 -0
  8. dataknobs_bots/bot/manager.py +430 -0
  9. dataknobs_bots/bot/registry.py +629 -0
  10. dataknobs_bots/config/__init__.py +39 -0
  11. dataknobs_bots/config/resolution.py +353 -0
  12. dataknobs_bots/knowledge/__init__.py +82 -0
  13. dataknobs_bots/knowledge/query/__init__.py +25 -0
  14. dataknobs_bots/knowledge/query/expander.py +262 -0
  15. dataknobs_bots/knowledge/query/transformer.py +288 -0
  16. dataknobs_bots/knowledge/rag.py +738 -0
  17. dataknobs_bots/knowledge/retrieval/__init__.py +23 -0
  18. dataknobs_bots/knowledge/retrieval/formatter.py +249 -0
  19. dataknobs_bots/knowledge/retrieval/merger.py +279 -0
  20. dataknobs_bots/memory/__init__.py +56 -0
  21. dataknobs_bots/memory/base.py +38 -0
  22. dataknobs_bots/memory/buffer.py +58 -0
  23. dataknobs_bots/memory/vector.py +188 -0
  24. dataknobs_bots/middleware/__init__.py +11 -0
  25. dataknobs_bots/middleware/base.py +92 -0
  26. dataknobs_bots/middleware/cost.py +421 -0
  27. dataknobs_bots/middleware/logging.py +184 -0
  28. dataknobs_bots/reasoning/__init__.py +65 -0
  29. dataknobs_bots/reasoning/base.py +50 -0
  30. dataknobs_bots/reasoning/react.py +299 -0
  31. dataknobs_bots/reasoning/simple.py +51 -0
  32. dataknobs_bots/registry/__init__.py +41 -0
  33. dataknobs_bots/registry/backend.py +181 -0
  34. dataknobs_bots/registry/memory.py +244 -0
  35. dataknobs_bots/registry/models.py +102 -0
  36. dataknobs_bots/registry/portability.py +210 -0
  37. dataknobs_bots/tools/__init__.py +5 -0
  38. dataknobs_bots/tools/knowledge_search.py +113 -0
  39. dataknobs_bots/utils/__init__.py +1 -0
  40. dataknobs_bots-0.2.4.dist-info/METADATA +591 -0
  41. dataknobs_bots-0.2.4.dist-info/RECORD +42 -0
  42. dataknobs_bots-0.2.4.dist-info/WHEEL +4 -0
@@ -0,0 +1,262 @@
1
+ """Contextual query expansion using conversation history.
2
+
3
+ This module provides query expansion without requiring LLM calls,
4
+ using recent conversation context to enrich ambiguous queries.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Any, Callable
11
+
12
+
13
+ @dataclass
14
+ class Message:
15
+ """A conversation message.
16
+
17
+ Attributes:
18
+ role: Message role ("user", "assistant", "system")
19
+ content: Message content
20
+ """
21
+
22
+ role: str
23
+ content: str
24
+
25
+
26
+ class ContextualExpander:
27
+ """Expands queries using conversation context.
28
+
29
+ This expander enriches ambiguous or context-dependent queries
30
+ by incorporating information from recent conversation turns.
31
+ Unlike QueryTransformer, it doesn't require LLM calls.
32
+
33
+ Example:
34
+ ```python
35
+ expander = ContextualExpander(max_context_turns=3)
36
+
37
+ # User asks: "Show me an example"
38
+ # Recent context: discussing chain-of-thought prompting
39
+ expanded = expander.expand(
40
+ "Show me an example",
41
+ conversation_history
42
+ )
43
+ # Returns: "chain-of-thought prompting examples Show me an example"
44
+ ```
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ max_context_turns: int = 3,
50
+ include_assistant: bool = False,
51
+ keyword_weight: int = 2,
52
+ ):
53
+ """Initialize the contextual expander.
54
+
55
+ Args:
56
+ max_context_turns: Maximum conversation turns to consider
57
+ include_assistant: Whether to include assistant messages
58
+ keyword_weight: How many times to repeat extracted keywords
59
+ """
60
+ self.max_context_turns = max_context_turns
61
+ self.include_assistant = include_assistant
62
+ self.keyword_weight = keyword_weight
63
+
64
+ # Common words to filter out
65
+ self._stop_words = {
66
+ "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
67
+ "have", "has", "had", "do", "does", "did", "will", "would", "could",
68
+ "should", "may", "might", "must", "can", "this", "that", "these",
69
+ "those", "i", "you", "he", "she", "it", "we", "they", "what", "which",
70
+ "who", "when", "where", "why", "how", "all", "each", "every", "both",
71
+ "few", "more", "most", "other", "some", "such", "no", "not", "only",
72
+ "own", "same", "so", "than", "too", "very", "just", "also", "now",
73
+ "here", "there", "about", "into", "through", "during", "before",
74
+ "after", "above", "below", "to", "from", "up", "down", "in", "out",
75
+ "on", "off", "over", "under", "again", "further", "then", "once",
76
+ "and", "but", "or", "nor", "for", "yet", "because", "as", "until",
77
+ "while", "of", "at", "by", "with", "without", "between", "me", "my",
78
+ "your", "his", "her", "its", "our", "their", "please", "help", "want",
79
+ "need", "like", "show", "tell", "give", "make", "let", "get", "see",
80
+ }
81
+
82
+ def expand(
83
+ self,
84
+ user_input: str,
85
+ conversation_history: list[Message] | list[dict[str, Any]],
86
+ ) -> str:
87
+ """Expand query with conversation context.
88
+
89
+ Args:
90
+ user_input: The user's current message
91
+ conversation_history: Recent conversation messages
92
+
93
+ Returns:
94
+ Expanded query string
95
+ """
96
+ # Normalize conversation history to Message objects
97
+ messages = self._normalize_messages(conversation_history)
98
+
99
+ # Get recent context
100
+ recent = self._get_recent_context(messages)
101
+
102
+ # Extract keywords from context
103
+ keywords = self._extract_keywords(recent)
104
+
105
+ # Build expanded query
106
+ if keywords:
107
+ keyword_str = " ".join(keywords)
108
+ return f"{keyword_str} {user_input}"
109
+
110
+ return user_input
111
+
112
+ def _normalize_messages(
113
+ self,
114
+ history: list[Message] | list[dict[str, Any]],
115
+ ) -> list[Message]:
116
+ """Normalize history to Message objects.
117
+
118
+ Args:
119
+ history: Conversation history in various formats
120
+
121
+ Returns:
122
+ List of Message objects
123
+ """
124
+ messages = []
125
+ for item in history:
126
+ if isinstance(item, Message):
127
+ messages.append(item)
128
+ elif isinstance(item, dict):
129
+ messages.append(Message(
130
+ role=item.get("role", "user"),
131
+ content=item.get("content", ""),
132
+ ))
133
+ return messages
134
+
135
+ def _get_recent_context(self, messages: list[Message]) -> list[str]:
136
+ """Get recent relevant context from conversation.
137
+
138
+ Args:
139
+ messages: Conversation messages
140
+
141
+ Returns:
142
+ List of context strings
143
+ """
144
+ context = []
145
+ count = 0
146
+
147
+ # Walk backwards through messages
148
+ for msg in reversed(messages):
149
+ if count >= self.max_context_turns:
150
+ break
151
+
152
+ if msg.role == "user" or (msg.role == "assistant" and self.include_assistant):
153
+ context.insert(0, msg.content)
154
+ count += 1
155
+
156
+ return context
157
+
158
+ def _extract_keywords(self, context: list[str]) -> list[str]:
159
+ """Extract meaningful keywords from context.
160
+
161
+ Args:
162
+ context: List of context strings
163
+
164
+ Returns:
165
+ List of extracted keywords
166
+ """
167
+ # Combine all context
168
+ combined = " ".join(context)
169
+
170
+ # Tokenize and filter
171
+ words = combined.lower().split()
172
+ keywords = []
173
+
174
+ for word in words:
175
+ # Clean punctuation
176
+ cleaned = word.strip(".,!?\"'()[]{}:;")
177
+
178
+ # Skip short words, stop words, and numbers
179
+ if (
180
+ len(cleaned) < 3
181
+ or cleaned in self._stop_words
182
+ or cleaned.isdigit()
183
+ ):
184
+ continue
185
+
186
+ # Add keyword if not already present
187
+ if cleaned not in keywords:
188
+ keywords.append(cleaned)
189
+
190
+ # Return top keywords (most recent first gives natural weighting)
191
+ return keywords[:5]
192
+
193
+ def expand_with_topics(
194
+ self,
195
+ user_input: str,
196
+ conversation_history: list[Message] | list[dict[str, Any]],
197
+ topic_extractor: Callable[[str], list[str]] | None = None,
198
+ ) -> str:
199
+ """Expand query with extracted topics.
200
+
201
+ Enhanced expansion that uses a custom topic extractor.
202
+
203
+ Args:
204
+ user_input: The user's current message
205
+ conversation_history: Recent conversation messages
206
+ topic_extractor: Optional function to extract topics from text
207
+
208
+ Returns:
209
+ Expanded query string
210
+ """
211
+ messages = self._normalize_messages(conversation_history)
212
+ recent = self._get_recent_context(messages)
213
+
214
+ if topic_extractor:
215
+ # Use custom topic extraction
216
+ topics = []
217
+ for text in recent:
218
+ topics.extend(topic_extractor(text))
219
+ topic_str = " ".join(topics[:5])
220
+ else:
221
+ # Fall back to keyword extraction
222
+ keywords = self._extract_keywords(recent)
223
+ topic_str = " ".join(keywords)
224
+
225
+ if topic_str:
226
+ return f"{topic_str} {user_input}"
227
+
228
+ return user_input
229
+
230
+
231
+ def is_ambiguous_query(query: str) -> bool:
232
+ """Check if a query is likely ambiguous and needs expansion.
233
+
234
+ Args:
235
+ query: The query to check
236
+
237
+ Returns:
238
+ True if query appears ambiguous
239
+
240
+ Example:
241
+ ```python
242
+ is_ambiguous_query("Show me an example") # True
243
+ is_ambiguous_query("How do I configure OAuth?") # False
244
+ ```
245
+ """
246
+ # Short queries are often ambiguous
247
+ words = query.split()
248
+ if len(words) < 4:
249
+ return True
250
+
251
+ # Queries with demonstratives are often context-dependent
252
+ ambiguous_patterns = [
253
+ "this", "that", "these", "those", "it", "them",
254
+ "example", "more", "another", "same", "similar",
255
+ ]
256
+
257
+ query_lower = query.lower()
258
+ for pattern in ambiguous_patterns:
259
+ if pattern in query_lower:
260
+ return True
261
+
262
+ return False
@@ -0,0 +1,288 @@
1
+ """Query transformation using LLM for improved retrieval.
2
+
3
+ This module provides LLM-based query transformation to generate
4
+ optimized search queries from user input.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Any
11
+
12
+
13
+ @dataclass
14
+ class TransformerConfig:
15
+ """Configuration for query transformation.
16
+
17
+ Attributes:
18
+ enabled: Whether transformation is enabled
19
+ llm_provider: LLM provider name (e.g., "ollama", "openai")
20
+ llm_model: Model to use for transformation
21
+ num_queries: Number of alternative queries to generate
22
+ domain_context: Domain-specific context for better queries
23
+ """
24
+
25
+ enabled: bool = False
26
+ llm_provider: str = "ollama"
27
+ llm_model: str = "llama3.2"
28
+ num_queries: int = 3
29
+ domain_context: str = ""
30
+
31
+
32
+ class QueryTransformer:
33
+ """LLM-based query transformation for improved RAG retrieval.
34
+
35
+ Transforms user input into optimized search queries by using an LLM
36
+ to extract key concepts and generate alternative phrasings.
37
+
38
+ This is particularly useful when:
39
+ - User input contains literal text to analyze (not queries)
40
+ - User asks vague questions that need expansion
41
+ - Domain-specific terminology needs translation
42
+
43
+ Example:
44
+ ```python
45
+ config = TransformerConfig(
46
+ enabled=True,
47
+ llm_provider="ollama",
48
+ llm_model="llama3.2",
49
+ domain_context="prompt engineering"
50
+ )
51
+ transformer = QueryTransformer(config)
52
+ await transformer.initialize()
53
+
54
+ # Transform user input to search queries
55
+ queries = await transformer.transform(
56
+ "Analyze this: Write a poem about cats"
57
+ )
58
+ # Returns: ["prompt analysis techniques", "evaluating prompt quality", ...]
59
+ ```
60
+ """
61
+
62
+ def __init__(self, config: TransformerConfig | None = None):
63
+ """Initialize the query transformer.
64
+
65
+ Args:
66
+ config: Transformer configuration, uses defaults if not provided
67
+ """
68
+ self.config = config or TransformerConfig()
69
+ self._llm = None
70
+ self._initialized = False
71
+
72
+ async def initialize(self) -> None:
73
+ """Initialize the LLM provider.
74
+
75
+ Must be called before using transform() if enabled.
76
+ """
77
+ if not self.config.enabled:
78
+ return
79
+
80
+ from dataknobs_llm.llm import LLMProviderFactory
81
+
82
+ factory = LLMProviderFactory(is_async=True)
83
+ self._llm = factory.create({
84
+ "provider": self.config.llm_provider,
85
+ "model": self.config.llm_model,
86
+ })
87
+ await self._llm.initialize()
88
+ self._initialized = True
89
+
90
+ async def close(self) -> None:
91
+ """Close the LLM provider and release resources."""
92
+ if self._llm and hasattr(self._llm, "close"):
93
+ await self._llm.close()
94
+ self._initialized = False
95
+
96
+ async def transform(
97
+ self,
98
+ user_input: str,
99
+ num_queries: int | None = None,
100
+ ) -> list[str]:
101
+ """Transform user input into optimized search queries.
102
+
103
+ Args:
104
+ user_input: The user's message or question
105
+ num_queries: Number of queries to generate (overrides config)
106
+
107
+ Returns:
108
+ List of optimized search queries
109
+
110
+ Raises:
111
+ RuntimeError: If transformer is enabled but not initialized
112
+ """
113
+ # If disabled, return the original input as a single query
114
+ if not self.config.enabled:
115
+ return [user_input]
116
+
117
+ if not self._initialized:
118
+ raise RuntimeError(
119
+ "QueryTransformer not initialized. Call initialize() first."
120
+ )
121
+
122
+ num = num_queries or self.config.num_queries
123
+
124
+ # Build the transformation prompt
125
+ prompt = self._build_prompt(user_input, num)
126
+
127
+ # Generate queries using LLM
128
+ response = await self._llm.generate(prompt)
129
+
130
+ # Parse the response into individual queries
131
+ queries = self._parse_response(response, user_input)
132
+
133
+ return queries[:num]
134
+
135
+ def _build_prompt(self, user_input: str, num_queries: int) -> str:
136
+ """Build the transformation prompt.
137
+
138
+ Args:
139
+ user_input: User's message
140
+ num_queries: Number of queries to generate
141
+
142
+ Returns:
143
+ Prompt string for LLM
144
+ """
145
+ domain_context = ""
146
+ if self.config.domain_context:
147
+ domain_context = f" in the context of {self.config.domain_context}"
148
+
149
+ return f"""Generate {num_queries} search queries to find relevant knowledge base content for the following user message{domain_context}.
150
+
151
+ User message: "{user_input}"
152
+
153
+ Focus on:
154
+ - Key concepts and techniques being discussed
155
+ - The underlying intent, not the literal text
156
+ - Related topics that would provide useful context
157
+
158
+ Return ONLY the search queries, one per line, without numbering or explanation.
159
+ Keep each query concise (2-6 words).
160
+ """
161
+
162
+ def _parse_response(self, response: str, fallback: str) -> list[str]:
163
+ """Parse LLM response into list of queries.
164
+
165
+ Args:
166
+ response: Raw LLM response
167
+ fallback: Fallback query if parsing fails
168
+
169
+ Returns:
170
+ List of parsed queries
171
+ """
172
+ # Split by newlines and clean up
173
+ lines = response.strip().split("\n")
174
+ queries = []
175
+
176
+ for line in lines:
177
+ # Remove common prefixes (numbering, bullets, etc.)
178
+ cleaned = line.strip()
179
+ cleaned = cleaned.lstrip("0123456789.-) ")
180
+ cleaned = cleaned.strip('"\'')
181
+
182
+ if cleaned and len(cleaned) > 2:
183
+ queries.append(cleaned)
184
+
185
+ # Ensure we have at least one query
186
+ if not queries:
187
+ queries = [fallback]
188
+
189
+ return queries
190
+
191
+ async def transform_with_context(
192
+ self,
193
+ user_input: str,
194
+ conversation_context: str,
195
+ num_queries: int | None = None,
196
+ ) -> list[str]:
197
+ """Transform with additional conversation context.
198
+
199
+ Args:
200
+ user_input: The user's message
201
+ conversation_context: Recent conversation history
202
+ num_queries: Number of queries to generate
203
+
204
+ Returns:
205
+ List of optimized search queries
206
+ """
207
+ if not self.config.enabled:
208
+ return [user_input]
209
+
210
+ if not self._initialized:
211
+ raise RuntimeError(
212
+ "QueryTransformer not initialized. Call initialize() first."
213
+ )
214
+
215
+ num = num_queries or self.config.num_queries
216
+
217
+ # Build enhanced prompt with context
218
+ prompt = self._build_contextual_prompt(
219
+ user_input, conversation_context, num
220
+ )
221
+
222
+ response = await self._llm.generate(prompt)
223
+ queries = self._parse_response(response, user_input)
224
+
225
+ return queries[:num]
226
+
227
+ def _build_contextual_prompt(
228
+ self,
229
+ user_input: str,
230
+ conversation_context: str,
231
+ num_queries: int,
232
+ ) -> str:
233
+ """Build prompt with conversation context.
234
+
235
+ Args:
236
+ user_input: User's message
237
+ conversation_context: Recent conversation
238
+ num_queries: Number of queries to generate
239
+
240
+ Returns:
241
+ Prompt string for LLM
242
+ """
243
+ domain_context = ""
244
+ if self.config.domain_context:
245
+ domain_context = f" in the context of {self.config.domain_context}"
246
+
247
+ return f"""Generate {num_queries} search queries to find relevant knowledge base content for the user's message{domain_context}.
248
+
249
+ Recent conversation context:
250
+ {conversation_context}
251
+
252
+ Current user message: "{user_input}"
253
+
254
+ Focus on:
255
+ - Key concepts relevant to what the user is asking
256
+ - Context from the conversation that clarifies the query
257
+ - Related topics that would provide useful information
258
+
259
+ Return ONLY the search queries, one per line, without numbering or explanation.
260
+ Keep each query concise (2-6 words).
261
+ """
262
+
263
+
264
+ async def create_transformer(config: dict[str, Any]) -> QueryTransformer:
265
+ """Create and initialize a QueryTransformer from config dict.
266
+
267
+ Convenience function for creating transformer from configuration.
268
+
269
+ Args:
270
+ config: Configuration dictionary with TransformerConfig fields
271
+
272
+ Returns:
273
+ Initialized QueryTransformer
274
+
275
+ Example:
276
+ ```python
277
+ transformer = await create_transformer({
278
+ "enabled": True,
279
+ "llm_provider": "ollama",
280
+ "llm_model": "llama3.2",
281
+ "domain_context": "prompt engineering"
282
+ })
283
+ ```
284
+ """
285
+ transformer_config = TransformerConfig(**config)
286
+ transformer = QueryTransformer(transformer_config)
287
+ await transformer.initialize()
288
+ return transformer