qtype 0.0.12__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qtype/application/commons/tools.py +1 -1
- qtype/application/converters/tools_from_api.py +476 -11
- qtype/application/converters/tools_from_module.py +38 -14
- qtype/application/converters/types.py +15 -30
- qtype/application/documentation.py +1 -1
- qtype/application/facade.py +102 -85
- qtype/base/types.py +227 -7
- qtype/cli.py +5 -1
- qtype/commands/convert.py +52 -6
- qtype/commands/generate.py +44 -4
- qtype/commands/run.py +78 -36
- qtype/commands/serve.py +74 -44
- qtype/commands/validate.py +37 -14
- qtype/commands/visualize.py +46 -25
- qtype/dsl/__init__.py +6 -5
- qtype/dsl/custom_types.py +1 -1
- qtype/dsl/domain_types.py +86 -5
- qtype/dsl/linker.py +384 -0
- qtype/dsl/loader.py +315 -0
- qtype/dsl/model.py +751 -263
- qtype/dsl/parser.py +200 -0
- qtype/dsl/types.py +50 -0
- qtype/interpreter/api.py +63 -136
- qtype/interpreter/auth/aws.py +19 -9
- qtype/interpreter/auth/generic.py +93 -16
- qtype/interpreter/base/base_step_executor.py +436 -0
- qtype/interpreter/base/batch_step_executor.py +171 -0
- qtype/interpreter/base/exceptions.py +50 -0
- qtype/interpreter/base/executor_context.py +91 -0
- qtype/interpreter/base/factory.py +84 -0
- qtype/interpreter/base/progress_tracker.py +110 -0
- qtype/interpreter/base/secrets.py +339 -0
- qtype/interpreter/base/step_cache.py +74 -0
- qtype/interpreter/base/stream_emitter.py +469 -0
- qtype/interpreter/conversions.py +471 -22
- qtype/interpreter/converters.py +79 -0
- qtype/interpreter/endpoints.py +355 -0
- qtype/interpreter/executors/agent_executor.py +242 -0
- qtype/interpreter/executors/aggregate_executor.py +93 -0
- qtype/interpreter/executors/bedrock_reranker_executor.py +195 -0
- qtype/interpreter/executors/decoder_executor.py +163 -0
- qtype/interpreter/executors/doc_to_text_executor.py +112 -0
- qtype/interpreter/executors/document_embedder_executor.py +107 -0
- qtype/interpreter/executors/document_search_executor.py +113 -0
- qtype/interpreter/executors/document_source_executor.py +118 -0
- qtype/interpreter/executors/document_splitter_executor.py +105 -0
- qtype/interpreter/executors/echo_executor.py +63 -0
- qtype/interpreter/executors/field_extractor_executor.py +165 -0
- qtype/interpreter/executors/file_source_executor.py +101 -0
- qtype/interpreter/executors/file_writer_executor.py +110 -0
- qtype/interpreter/executors/index_upsert_executor.py +232 -0
- qtype/interpreter/executors/invoke_embedding_executor.py +92 -0
- qtype/interpreter/executors/invoke_flow_executor.py +51 -0
- qtype/interpreter/executors/invoke_tool_executor.py +358 -0
- qtype/interpreter/executors/llm_inference_executor.py +272 -0
- qtype/interpreter/executors/prompt_template_executor.py +78 -0
- qtype/interpreter/executors/sql_source_executor.py +106 -0
- qtype/interpreter/executors/vector_search_executor.py +91 -0
- qtype/interpreter/flow.py +173 -22
- qtype/interpreter/logging_progress.py +61 -0
- qtype/interpreter/metadata_api.py +115 -0
- qtype/interpreter/resource_cache.py +5 -4
- qtype/interpreter/rich_progress.py +225 -0
- qtype/interpreter/stream/chat/__init__.py +15 -0
- qtype/interpreter/stream/chat/converter.py +391 -0
- qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
- qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
- qtype/interpreter/stream/chat/vercel.py +609 -0
- qtype/interpreter/stream/utils/__init__.py +15 -0
- qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
- qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
- qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
- qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
- qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
- qtype/interpreter/telemetry.py +135 -8
- qtype/interpreter/tools/__init__.py +5 -0
- qtype/interpreter/tools/function_tool_helper.py +265 -0
- qtype/interpreter/types.py +330 -0
- qtype/interpreter/typing.py +83 -89
- qtype/interpreter/ui/404/index.html +1 -1
- qtype/interpreter/ui/404.html +1 -1
- qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
- qtype/interpreter/ui/_next/static/chunks/434-b2112d19f25c44ff.js +36 -0
- qtype/interpreter/ui/_next/static/chunks/{964-ed4ab073db645007.js → 964-2b041321a01cbf56.js} +1 -1
- qtype/interpreter/ui/_next/static/chunks/app/{layout-5ccbc44fd528d089.js → layout-a05273ead5de2c41.js} +1 -1
- qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/{main-6d261b6c5d6fb6c2.js → main-e26b9cb206da2cac.js} +1 -1
- qtype/interpreter/ui/_next/static/chunks/webpack-08642e441b39b6c2.js +1 -0
- qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
- qtype/interpreter/ui/_next/static/media/4cf2300e9c8272f7-s.p.woff2 +0 -0
- qtype/interpreter/ui/icon.png +0 -0
- qtype/interpreter/ui/index.html +1 -1
- qtype/interpreter/ui/index.txt +5 -5
- qtype/semantic/checker.py +643 -0
- qtype/semantic/generate.py +268 -85
- qtype/semantic/loader.py +95 -0
- qtype/semantic/model.py +535 -163
- qtype/semantic/resolver.py +63 -19
- qtype/semantic/visualize.py +50 -35
- {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/METADATA +21 -4
- qtype-0.1.3.dist-info/RECORD +137 -0
- qtype/dsl/base_types.py +0 -38
- qtype/dsl/validator.py +0 -464
- qtype/interpreter/batch/__init__.py +0 -0
- qtype/interpreter/batch/flow.py +0 -95
- qtype/interpreter/batch/sql_source.py +0 -95
- qtype/interpreter/batch/step.py +0 -63
- qtype/interpreter/batch/types.py +0 -41
- qtype/interpreter/batch/utils.py +0 -179
- qtype/interpreter/chat/chat_api.py +0 -237
- qtype/interpreter/chat/vercel.py +0 -314
- qtype/interpreter/exceptions.py +0 -10
- qtype/interpreter/step.py +0 -67
- qtype/interpreter/steps/__init__.py +0 -0
- qtype/interpreter/steps/agent.py +0 -114
- qtype/interpreter/steps/condition.py +0 -36
- qtype/interpreter/steps/decoder.py +0 -88
- qtype/interpreter/steps/llm_inference.py +0 -150
- qtype/interpreter/steps/prompt_template.py +0 -54
- qtype/interpreter/steps/search.py +0 -24
- qtype/interpreter/steps/tool.py +0 -53
- qtype/interpreter/streaming_helpers.py +0 -123
- qtype/interpreter/ui/_next/static/chunks/736-7fc606e244fedcb1.js +0 -36
- qtype/interpreter/ui/_next/static/chunks/app/page-c72e847e888e549d.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/webpack-8289c17c67827f22.js +0 -1
- qtype/interpreter/ui/_next/static/css/a262c53826df929b.css +0 -3
- qtype/interpreter/ui/_next/static/media/569ce4b8f30dc480-s.p.woff2 +0 -0
- qtype/interpreter/ui/favicon.ico +0 -0
- qtype/loader.py +0 -389
- qtype-0.0.12.dist-info/RECORD +0 -105
- /qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
- {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/WHEEL +0 -0
- {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/entry_points.txt +0 -0
- {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/top_level.txt +0 -0
qtype/interpreter/conversions.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import importlib
|
|
4
|
+
import uuid
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
3
7
|
from llama_index.core.base.embeddings.base import BaseEmbedding
|
|
4
8
|
from llama_index.core.base.llms.base import BaseLLM
|
|
5
9
|
from llama_index.core.base.llms.types import AudioBlock
|
|
@@ -9,18 +13,152 @@ from llama_index.core.base.llms.types import (
|
|
|
9
13
|
DocumentBlock,
|
|
10
14
|
ImageBlock,
|
|
11
15
|
TextBlock,
|
|
16
|
+
ThinkingBlock,
|
|
12
17
|
)
|
|
13
18
|
from llama_index.core.memory import Memory as LlamaMemory
|
|
19
|
+
from llama_index.core.schema import Document as LlamaDocument
|
|
20
|
+
from llama_index.core.vector_stores.types import BasePydanticVectorStore
|
|
21
|
+
from opensearchpy import AsyncOpenSearch, AWSV4SignerAuth
|
|
14
22
|
|
|
15
|
-
from qtype.
|
|
16
|
-
from qtype.dsl.domain_types import
|
|
23
|
+
from qtype.base.types import PrimitiveTypeEnum
|
|
24
|
+
from qtype.dsl.domain_types import (
|
|
25
|
+
ChatContent,
|
|
26
|
+
ChatMessage,
|
|
27
|
+
RAGDocument,
|
|
28
|
+
RAGSearchResult,
|
|
29
|
+
)
|
|
17
30
|
from qtype.dsl.model import Memory
|
|
18
|
-
from qtype.interpreter.
|
|
19
|
-
from qtype.
|
|
31
|
+
from qtype.interpreter.auth.aws import aws
|
|
32
|
+
from qtype.interpreter.auth.generic import auth
|
|
33
|
+
from qtype.interpreter.base.secrets import SecretManagerBase
|
|
34
|
+
from qtype.interpreter.types import InterpreterError
|
|
35
|
+
from qtype.semantic.model import (
|
|
36
|
+
APIKeyAuthProvider,
|
|
37
|
+
DocumentIndex,
|
|
38
|
+
DocumentSplitter,
|
|
39
|
+
Model,
|
|
40
|
+
VectorIndex,
|
|
41
|
+
)
|
|
20
42
|
|
|
21
43
|
from .resource_cache import cached_resource
|
|
22
44
|
|
|
23
45
|
|
|
46
|
+
def to_llama_document(doc: RAGDocument) -> LlamaDocument:
|
|
47
|
+
"""Convert a RAGDocument to a LlamaDocument."""
|
|
48
|
+
from llama_index.core.schema import MediaResource
|
|
49
|
+
|
|
50
|
+
# Prepare metadata, adding file_name and uri if available
|
|
51
|
+
metadata = doc.metadata.copy() if doc.metadata else {}
|
|
52
|
+
if doc.file_name:
|
|
53
|
+
metadata["file_name"] = doc.file_name
|
|
54
|
+
if doc.uri:
|
|
55
|
+
metadata["url"] = (
|
|
56
|
+
doc.uri
|
|
57
|
+
) # url is more commonly used in LlamaIndex metadata
|
|
58
|
+
|
|
59
|
+
# Default text content
|
|
60
|
+
text = ""
|
|
61
|
+
if isinstance(doc.content, str):
|
|
62
|
+
text = doc.content
|
|
63
|
+
|
|
64
|
+
# Handle different content types
|
|
65
|
+
if doc.type == PrimitiveTypeEnum.text:
|
|
66
|
+
# Text content - store as text field
|
|
67
|
+
return LlamaDocument(text=text, doc_id=doc.file_id, metadata=metadata)
|
|
68
|
+
elif doc.type == PrimitiveTypeEnum.image and isinstance(
|
|
69
|
+
doc.content, bytes
|
|
70
|
+
):
|
|
71
|
+
# Image content - store in image_resource
|
|
72
|
+
return LlamaDocument(
|
|
73
|
+
text=text, # Keep text empty or use as description
|
|
74
|
+
doc_id=doc.file_id,
|
|
75
|
+
metadata=metadata,
|
|
76
|
+
image_resource=MediaResource(data=doc.content),
|
|
77
|
+
)
|
|
78
|
+
elif doc.type == PrimitiveTypeEnum.audio and isinstance(
|
|
79
|
+
doc.content, bytes
|
|
80
|
+
):
|
|
81
|
+
# Audio content - store in audio_resource
|
|
82
|
+
return LlamaDocument(
|
|
83
|
+
text=text,
|
|
84
|
+
doc_id=doc.file_id,
|
|
85
|
+
metadata=metadata,
|
|
86
|
+
audio_resource=MediaResource(data=doc.content),
|
|
87
|
+
)
|
|
88
|
+
elif doc.type == PrimitiveTypeEnum.video and isinstance(
|
|
89
|
+
doc.content, bytes
|
|
90
|
+
):
|
|
91
|
+
# Video content - store in video_resource
|
|
92
|
+
return LlamaDocument(
|
|
93
|
+
text=text,
|
|
94
|
+
doc_id=doc.file_id,
|
|
95
|
+
metadata=metadata,
|
|
96
|
+
video_resource=MediaResource(data=doc.content),
|
|
97
|
+
)
|
|
98
|
+
else:
|
|
99
|
+
# Fallback for other types - store as text
|
|
100
|
+
return LlamaDocument(
|
|
101
|
+
text=str(doc.content) if doc.content else "",
|
|
102
|
+
doc_id=doc.file_id,
|
|
103
|
+
metadata=metadata,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def from_llama_document(doc: LlamaDocument) -> RAGDocument:
|
|
108
|
+
"""Convert a LlamaDocument to a RAGDocument."""
|
|
109
|
+
# Extract file_id from doc_id or id_
|
|
110
|
+
file_id = doc.doc_id
|
|
111
|
+
|
|
112
|
+
# Extract file_name from metadata or use file_id as fallback
|
|
113
|
+
file_name = (
|
|
114
|
+
doc.metadata.get("file_name", file_id) if doc.metadata else file_id
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Extract URI from metadata if available
|
|
118
|
+
uri = (
|
|
119
|
+
doc.metadata.get("url") or doc.metadata.get("uri")
|
|
120
|
+
if doc.metadata
|
|
121
|
+
else None
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Determine content type and extract content based on resource fields
|
|
125
|
+
content_type = PrimitiveTypeEnum.text
|
|
126
|
+
content: str | bytes = doc.text # default to text
|
|
127
|
+
|
|
128
|
+
# Check for media resources in priority order
|
|
129
|
+
if hasattr(doc, "image_resource") and doc.image_resource is not None:
|
|
130
|
+
content_type = PrimitiveTypeEnum.image
|
|
131
|
+
# MediaResource has a 'data' field containing the bytes
|
|
132
|
+
content = (
|
|
133
|
+
doc.image_resource.data
|
|
134
|
+
if hasattr(doc.image_resource, "data")
|
|
135
|
+
else doc.text
|
|
136
|
+
) # type: ignore
|
|
137
|
+
elif hasattr(doc, "audio_resource") and doc.audio_resource is not None:
|
|
138
|
+
content_type = PrimitiveTypeEnum.audio
|
|
139
|
+
content = (
|
|
140
|
+
doc.audio_resource.data
|
|
141
|
+
if hasattr(doc.audio_resource, "data")
|
|
142
|
+
else doc.text
|
|
143
|
+
) # type: ignore
|
|
144
|
+
elif hasattr(doc, "video_resource") and doc.video_resource is not None:
|
|
145
|
+
content_type = PrimitiveTypeEnum.video
|
|
146
|
+
content = (
|
|
147
|
+
doc.video_resource.data
|
|
148
|
+
if hasattr(doc.video_resource, "data")
|
|
149
|
+
else doc.text
|
|
150
|
+
) # type: ignore
|
|
151
|
+
|
|
152
|
+
return RAGDocument(
|
|
153
|
+
content=content,
|
|
154
|
+
file_id=file_id,
|
|
155
|
+
file_name=file_name,
|
|
156
|
+
uri=uri,
|
|
157
|
+
metadata=doc.metadata.copy() if doc.metadata else {},
|
|
158
|
+
type=content_type,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
24
162
|
@cached_resource
|
|
25
163
|
def to_memory(session_id: str | None, memory: Memory) -> LlamaMemory:
|
|
26
164
|
return LlamaMemory.from_defaults(
|
|
@@ -32,17 +170,38 @@ def to_memory(session_id: str | None, memory: Memory) -> LlamaMemory:
|
|
|
32
170
|
|
|
33
171
|
|
|
34
172
|
@cached_resource
|
|
35
|
-
def to_llm(
|
|
36
|
-
|
|
173
|
+
def to_llm(
|
|
174
|
+
model: Model,
|
|
175
|
+
system_prompt: str | None,
|
|
176
|
+
secret_manager: SecretManagerBase,
|
|
177
|
+
) -> BaseLLM:
|
|
178
|
+
"""
|
|
179
|
+
Convert a qtype Model to a LlamaIndex Model.
|
|
37
180
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
181
|
+
Args:
|
|
182
|
+
model: The semantic model configuration
|
|
183
|
+
system_prompt: Optional system prompt for the model
|
|
184
|
+
secret_manager: Optional secret manager for resolving SecretReferences
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
A LlamaIndex LLM instance
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
if model.provider == "aws-bedrock":
|
|
191
|
+
from llama_index.llms.bedrock_converse import BedrockConverse
|
|
192
|
+
|
|
193
|
+
from qtype.semantic.model import AWSAuthProvider
|
|
194
|
+
|
|
195
|
+
if model.auth:
|
|
196
|
+
# Type hint for mypy - we know it's AWSAuthProvider for aws-bedrock
|
|
197
|
+
assert isinstance(model.auth, AWSAuthProvider)
|
|
198
|
+
with aws(model.auth, secret_manager) as session:
|
|
199
|
+
session = session._session
|
|
200
|
+
else:
|
|
201
|
+
session = None
|
|
44
202
|
|
|
45
203
|
brv: BaseLLM = BedrockConverse(
|
|
204
|
+
botocore_session=session,
|
|
46
205
|
model=model.model_id if model.model_id else model.id,
|
|
47
206
|
system_prompt=system_prompt,
|
|
48
207
|
**(model.inference_params if model.inference_params else {}),
|
|
@@ -51,45 +210,109 @@ def to_llm(model: Model, system_prompt: str | None) -> BaseLLM:
|
|
|
51
210
|
elif model.provider == "openai":
|
|
52
211
|
from llama_index.llms.openai import OpenAI
|
|
53
212
|
|
|
213
|
+
from qtype.interpreter.auth.generic import auth
|
|
214
|
+
from qtype.semantic.model import APIKeyAuthProvider
|
|
215
|
+
|
|
216
|
+
api_key: str | None = None
|
|
217
|
+
if model.auth:
|
|
218
|
+
with auth(model.auth, secret_manager) as provider:
|
|
219
|
+
if not isinstance(provider, APIKeyAuthProvider):
|
|
220
|
+
raise InterpreterError(
|
|
221
|
+
f"OpenAI provider requires APIKeyAuthProvider, "
|
|
222
|
+
f"got {type(provider).__name__}"
|
|
223
|
+
)
|
|
224
|
+
# api_key is guaranteed to be str after auth() resolves it
|
|
225
|
+
api_key = provider.api_key # type: ignore[assignment]
|
|
226
|
+
|
|
54
227
|
return OpenAI(
|
|
55
228
|
model=model.model_id if model.model_id else model.id,
|
|
56
229
|
system_prompt=system_prompt,
|
|
57
230
|
**(model.inference_params if model.inference_params else {}),
|
|
58
|
-
api_key=
|
|
59
|
-
if model.auth
|
|
60
|
-
else None,
|
|
231
|
+
api_key=api_key,
|
|
61
232
|
)
|
|
62
233
|
elif model.provider == "anthropic":
|
|
63
234
|
from llama_index.llms.anthropic import ( # type: ignore[import-untyped]
|
|
64
235
|
Anthropic,
|
|
65
236
|
)
|
|
66
237
|
|
|
238
|
+
from qtype.interpreter.auth.generic import auth
|
|
239
|
+
from qtype.semantic.model import APIKeyAuthProvider
|
|
240
|
+
|
|
241
|
+
api_key: str | None = None
|
|
242
|
+
if model.auth:
|
|
243
|
+
with auth(model.auth, secret_manager) as provider:
|
|
244
|
+
if not isinstance(provider, APIKeyAuthProvider):
|
|
245
|
+
raise InterpreterError(
|
|
246
|
+
f"Anthropic provider requires APIKeyAuthProvider, "
|
|
247
|
+
f"got {type(provider).__name__}"
|
|
248
|
+
)
|
|
249
|
+
# api_key is guaranteed to be str after auth() resolves it
|
|
250
|
+
api_key = provider.api_key # type: ignore[assignment]
|
|
251
|
+
|
|
67
252
|
arv: BaseLLM = Anthropic(
|
|
68
253
|
model=model.model_id if model.model_id else model.id,
|
|
69
254
|
system_prompt=system_prompt,
|
|
70
255
|
**(model.inference_params if model.inference_params else {}),
|
|
71
|
-
api_key=
|
|
72
|
-
if model.auth
|
|
73
|
-
else None,
|
|
256
|
+
api_key=api_key,
|
|
74
257
|
)
|
|
75
258
|
return arv
|
|
259
|
+
elif model.provider == "gcp-vertex":
|
|
260
|
+
from llama_index.llms.vertex import Vertex
|
|
261
|
+
|
|
262
|
+
project_name = getattr(
|
|
263
|
+
getattr(model, "auth", None), "profile_name", None
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
vgv: BaseLLM = Vertex(
|
|
267
|
+
model=model.model_id if model.model_id else model.id,
|
|
268
|
+
project=project_name,
|
|
269
|
+
system_prompt=system_prompt,
|
|
270
|
+
**(model.inference_params if model.inference_params else {}),
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
return vgv
|
|
76
274
|
else:
|
|
77
275
|
raise InterpreterError(
|
|
78
276
|
f"Unsupported model provider: {model.provider}."
|
|
79
277
|
)
|
|
80
278
|
|
|
81
279
|
|
|
280
|
+
@cached_resource
|
|
281
|
+
def to_vector_store(
|
|
282
|
+
index: VectorIndex, secret_manager: SecretManagerBase
|
|
283
|
+
) -> BasePydanticVectorStore:
|
|
284
|
+
"""Convert a qtype Index to a LlamaIndex vector store."""
|
|
285
|
+
module_path = ".".join(index.module.split(".")[:-1])
|
|
286
|
+
class_name = index.module.split(".")[-1]
|
|
287
|
+
# Dynamically import the reader module
|
|
288
|
+
try:
|
|
289
|
+
reader_module = importlib.import_module(module_path)
|
|
290
|
+
reader_class = getattr(reader_module, class_name)
|
|
291
|
+
except (ImportError, AttributeError) as e:
|
|
292
|
+
raise ImportError(
|
|
293
|
+
f"Failed to import reader class '{class_name}' from '{module_path}': {e}"
|
|
294
|
+
) from e
|
|
295
|
+
|
|
296
|
+
# Resolve any SecretReferences in args
|
|
297
|
+
context = f"index '{index.id}'"
|
|
298
|
+
resolved_args = secret_manager.resolve_secrets_in_dict(index.args, context)
|
|
299
|
+
index_instance = reader_class(**resolved_args)
|
|
300
|
+
|
|
301
|
+
return index_instance
|
|
302
|
+
|
|
303
|
+
|
|
82
304
|
@cached_resource
|
|
83
305
|
def to_embedding_model(model: Model) -> BaseEmbedding:
|
|
84
306
|
"""Convert a qtype Model to a LlamaIndex embedding model."""
|
|
85
307
|
|
|
86
|
-
if model.provider
|
|
308
|
+
if model.provider == "aws-bedrock":
|
|
87
309
|
from llama_index.embeddings.bedrock import ( # type: ignore[import-untyped]
|
|
88
310
|
BedrockEmbedding,
|
|
89
311
|
)
|
|
90
312
|
|
|
91
313
|
bedrock_embedding: BaseEmbedding = BedrockEmbedding(
|
|
92
|
-
model_name=model.model_id if model.model_id else model.id
|
|
314
|
+
model_name=model.model_id if model.model_id else model.id,
|
|
315
|
+
max_retries=100,
|
|
93
316
|
)
|
|
94
317
|
return bedrock_embedding
|
|
95
318
|
elif model.provider == "openai":
|
|
@@ -107,6 +330,61 @@ def to_embedding_model(model: Model) -> BaseEmbedding:
|
|
|
107
330
|
)
|
|
108
331
|
|
|
109
332
|
|
|
333
|
+
@cached_resource
|
|
334
|
+
def to_opensearch_client(
|
|
335
|
+
index: DocumentIndex, secret_manager: SecretManagerBase
|
|
336
|
+
) -> AsyncOpenSearch:
|
|
337
|
+
"""
|
|
338
|
+
Convert a DocumentIndex to an OpenSearch/Elasticsearch client.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
index: DocumentIndex configuration with endpoint, auth, etc.
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
OpenSearch client instance configured with authentication
|
|
345
|
+
|
|
346
|
+
Raises:
|
|
347
|
+
InterpreterError: If authentication fails or configuration is invalid
|
|
348
|
+
"""
|
|
349
|
+
client_kwargs: dict[str, Any] = {
|
|
350
|
+
"hosts": [index.endpoint],
|
|
351
|
+
**index.args,
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
# Handle authentication if provided
|
|
355
|
+
if index.auth:
|
|
356
|
+
if isinstance(index.auth, APIKeyAuthProvider):
|
|
357
|
+
# Use API key authentication
|
|
358
|
+
client_kwargs["api_key"] = index.auth.api_key
|
|
359
|
+
elif hasattr(index.auth, "type") and index.auth.type == "aws":
|
|
360
|
+
# Use AWS authentication with boto3 session
|
|
361
|
+
# Get AWS credentials from auth provider using context manager
|
|
362
|
+
with auth(index.auth, secret_manager) as auth_session:
|
|
363
|
+
# Type checker doesn't know this is a boto3.Session
|
|
364
|
+
# but runtime validation ensures it for AWS auth
|
|
365
|
+
credentials = auth_session.get_credentials() # type: ignore
|
|
366
|
+
if credentials is None:
|
|
367
|
+
raise InterpreterError(
|
|
368
|
+
f"Failed to obtain AWS credentials for DocumentIndex '{index.id}'"
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
# Use opensearch-py's built-in AWS auth
|
|
372
|
+
aws_auth = AWSV4SignerAuth(
|
|
373
|
+
credentials,
|
|
374
|
+
auth_session.region_name or "us-east-1", # type: ignore
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
client_kwargs["http_auth"] = aws_auth
|
|
378
|
+
client_kwargs["use_ssl"] = True
|
|
379
|
+
client_kwargs["verify_certs"] = True
|
|
380
|
+
else:
|
|
381
|
+
raise InterpreterError(
|
|
382
|
+
f"Unsupported authentication type for DocumentIndex: {type(index.auth)}"
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
return AsyncOpenSearch(**client_kwargs)
|
|
386
|
+
|
|
387
|
+
|
|
110
388
|
def to_content_block(content: ChatContent) -> ContentBlock:
|
|
111
389
|
if content.type == PrimitiveTypeEnum.text:
|
|
112
390
|
if isinstance(content.content, str):
|
|
@@ -132,6 +410,61 @@ def to_content_block(content: ChatContent) -> ContentBlock:
|
|
|
132
410
|
)
|
|
133
411
|
|
|
134
412
|
|
|
413
|
+
def variable_to_chat_message(
|
|
414
|
+
value: Any, variable: Any, default_role: str = "user"
|
|
415
|
+
) -> ChatMessage:
|
|
416
|
+
"""Convert any variable value to a ChatMessage based on the variable's type.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
value: The value to convert (can be any primitive type or ChatMessage)
|
|
420
|
+
variable: The Variable definition with type information
|
|
421
|
+
default_role: The default message role to use (default: "user")
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
ChatMessage with appropriate content blocks
|
|
425
|
+
|
|
426
|
+
Raises:
|
|
427
|
+
InterpreterError: If the value type cannot be converted
|
|
428
|
+
"""
|
|
429
|
+
# If already a ChatMessage, return as-is
|
|
430
|
+
if isinstance(value, ChatMessage):
|
|
431
|
+
return value
|
|
432
|
+
|
|
433
|
+
# Convert based on the variable's declared type
|
|
434
|
+
var_type = variable.type
|
|
435
|
+
# Handle primitive types based on variable declaration
|
|
436
|
+
if isinstance(var_type, PrimitiveTypeEnum):
|
|
437
|
+
# Numeric/boolean types get converted to text
|
|
438
|
+
if var_type in (
|
|
439
|
+
PrimitiveTypeEnum.int,
|
|
440
|
+
PrimitiveTypeEnum.float,
|
|
441
|
+
PrimitiveTypeEnum.boolean,
|
|
442
|
+
):
|
|
443
|
+
content = ChatContent(
|
|
444
|
+
type=PrimitiveTypeEnum.text, content=str(value)
|
|
445
|
+
)
|
|
446
|
+
# All other primitive types pass through as-is
|
|
447
|
+
else:
|
|
448
|
+
content = ChatContent(type=var_type, content=value)
|
|
449
|
+
elif isinstance(var_type, str) and (
|
|
450
|
+
var_type.startswith("list[") or var_type.startswith("dict[")
|
|
451
|
+
):
|
|
452
|
+
# Handle list and dict types - convert to JSON string
|
|
453
|
+
import json
|
|
454
|
+
|
|
455
|
+
content = ChatContent(
|
|
456
|
+
type=PrimitiveTypeEnum.text, content=json.dumps(value)
|
|
457
|
+
)
|
|
458
|
+
else:
|
|
459
|
+
# Unsupported type - raise an error
|
|
460
|
+
raise InterpreterError(
|
|
461
|
+
f"Cannot convert variable '{variable.id}' of unsupported type "
|
|
462
|
+
f"'{var_type}' to ChatMessage"
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
return ChatMessage(role=default_role, blocks=[content]) # type: ignore
|
|
466
|
+
|
|
467
|
+
|
|
135
468
|
def to_chat_message(message: ChatMessage) -> LlamaChatMessage:
|
|
136
469
|
"""Convert a ChatMessage to a LlamaChatMessage."""
|
|
137
470
|
blocks = [to_content_block(content) for content in message.blocks]
|
|
@@ -140,7 +473,7 @@ def to_chat_message(message: ChatMessage) -> LlamaChatMessage:
|
|
|
140
473
|
|
|
141
474
|
def from_chat_message(message: LlamaChatMessage) -> ChatMessage:
|
|
142
475
|
"""Convert a LlamaChatMessage to a ChatMessage."""
|
|
143
|
-
blocks = []
|
|
476
|
+
blocks: list[ChatContent] = []
|
|
144
477
|
for block in message.blocks:
|
|
145
478
|
if isinstance(block, TextBlock):
|
|
146
479
|
blocks.append(
|
|
@@ -158,9 +491,125 @@ def from_chat_message(message: LlamaChatMessage) -> ChatMessage:
|
|
|
158
491
|
blocks.append(
|
|
159
492
|
ChatContent(type=PrimitiveTypeEnum.file, content=block.data)
|
|
160
493
|
)
|
|
494
|
+
elif isinstance(block, ThinkingBlock):
|
|
495
|
+
continue
|
|
161
496
|
else:
|
|
162
497
|
raise InterpreterError(
|
|
163
498
|
f"Unsupported content block type: {type(block)}"
|
|
164
499
|
)
|
|
165
500
|
|
|
166
|
-
return ChatMessage(role=message.role, blocks=blocks)
|
|
501
|
+
return ChatMessage(role=message.role, blocks=blocks)
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def to_text_splitter(splitter: DocumentSplitter) -> Any:
|
|
505
|
+
"""Convert a DocumentSplitter to a LlamaIndex text splitter.
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
splitter: The DocumentSplitter configuration.
|
|
509
|
+
|
|
510
|
+
Returns:
|
|
511
|
+
An instance of the appropriate LlamaIndex text splitter class.
|
|
512
|
+
|
|
513
|
+
Raises:
|
|
514
|
+
InterpreterError: If the splitter class cannot be found or instantiated.
|
|
515
|
+
"""
|
|
516
|
+
|
|
517
|
+
module_path = "llama_index.core.node_parser"
|
|
518
|
+
class_name = splitter.splitter_name
|
|
519
|
+
try:
|
|
520
|
+
reader_module = importlib.import_module(module_path)
|
|
521
|
+
splitter_class = getattr(reader_module, class_name)
|
|
522
|
+
except (ImportError, AttributeError) as e:
|
|
523
|
+
raise ImportError(
|
|
524
|
+
f"Failed to import reader class '{class_name}' from '{module_path}': {e}"
|
|
525
|
+
) from e
|
|
526
|
+
from llama_index.core.schema import BaseNode
|
|
527
|
+
|
|
528
|
+
# TODO: let the user specify a custom ID namespace
|
|
529
|
+
namespace = uuid.UUID("12345678-1234-5678-1234-567812345678")
|
|
530
|
+
|
|
531
|
+
def id_func(i: int, doc: BaseNode) -> str:
|
|
532
|
+
u = uuid.uuid5(namespace, f"{doc.node_id}_{i}")
|
|
533
|
+
return str(u)
|
|
534
|
+
|
|
535
|
+
# Prepare arguments for the splitter
|
|
536
|
+
splitter_args = {
|
|
537
|
+
"chunk_size": splitter.chunk_size,
|
|
538
|
+
"chunk_overlap": splitter.chunk_overlap,
|
|
539
|
+
"id_func": id_func,
|
|
540
|
+
**splitter.args,
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
# Instantiate and return the splitter
|
|
544
|
+
try:
|
|
545
|
+
return splitter_class(**splitter_args)
|
|
546
|
+
except Exception as e:
|
|
547
|
+
raise InterpreterError(
|
|
548
|
+
f"Failed to instantiate {splitter.splitter_name}: {e}"
|
|
549
|
+
) from e
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def to_llama_vector_store_and_retriever(
|
|
553
|
+
index: VectorIndex, secret_manager: SecretManagerBase
|
|
554
|
+
) -> tuple[BasePydanticVectorStore, Any]:
|
|
555
|
+
"""Create a LlamaIndex vector store and retriever from a VectorIndex.
|
|
556
|
+
|
|
557
|
+
Args:
|
|
558
|
+
index: VectorIndex configuration
|
|
559
|
+
|
|
560
|
+
Returns:
|
|
561
|
+
Tuple of (vector_store, retriever)
|
|
562
|
+
"""
|
|
563
|
+
from llama_index.core import VectorStoreIndex
|
|
564
|
+
|
|
565
|
+
# Get the vector store using existing function
|
|
566
|
+
vector_store = to_vector_store(index, secret_manager)
|
|
567
|
+
|
|
568
|
+
# Get the embedding model
|
|
569
|
+
embedding_model = to_embedding_model(index.embedding_model)
|
|
570
|
+
|
|
571
|
+
# Create a VectorStoreIndex with the vector store and embedding model
|
|
572
|
+
vector_index = VectorStoreIndex.from_vector_store(
|
|
573
|
+
vector_store=vector_store,
|
|
574
|
+
embed_model=embedding_model,
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
# Create retriever with optional top_k configuration
|
|
578
|
+
retriever = vector_index.as_retriever()
|
|
579
|
+
|
|
580
|
+
return vector_store, retriever
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def from_node_with_score(node_with_score) -> RAGSearchResult:
|
|
584
|
+
"""Convert a LlamaIndex NodeWithScore to a RAGSearchResult.
|
|
585
|
+
|
|
586
|
+
Args:
|
|
587
|
+
node_with_score: LlamaIndex NodeWithScore object
|
|
588
|
+
|
|
589
|
+
Returns:
|
|
590
|
+
RAGSearchResult with chunk and score
|
|
591
|
+
"""
|
|
592
|
+
from qtype.dsl.domain_types import RAGChunk, RAGSearchResult
|
|
593
|
+
|
|
594
|
+
node = node_with_score.node
|
|
595
|
+
|
|
596
|
+
# Extract vector if available
|
|
597
|
+
vector = None
|
|
598
|
+
if hasattr(node, "embedding") and node.embedding is not None:
|
|
599
|
+
vector = node.embedding
|
|
600
|
+
|
|
601
|
+
# Create RAGChunk from node
|
|
602
|
+
chunk = RAGChunk(
|
|
603
|
+
content=node.text or "",
|
|
604
|
+
chunk_id=node.node_id,
|
|
605
|
+
document_id=node.metadata.get("document_id", node.node_id),
|
|
606
|
+
vector=vector,
|
|
607
|
+
metadata=node.metadata or {},
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
# Wrap in RAGSearchResult with score
|
|
611
|
+
return RAGSearchResult(
|
|
612
|
+
content=chunk,
|
|
613
|
+
doc_id=chunk.document_id,
|
|
614
|
+
score=node_with_score.score or 0.0,
|
|
615
|
+
)
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Converters between DataFrames and FlowMessages."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import AsyncIterator
|
|
6
|
+
from typing import Any, cast
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
from qtype.interpreter.types import FlowMessage, Session
|
|
12
|
+
from qtype.semantic.model import Flow
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def dataframe_to_flow_messages(
|
|
16
|
+
df: pd.DataFrame, session: Session
|
|
17
|
+
) -> AsyncIterator[FlowMessage]:
|
|
18
|
+
"""
|
|
19
|
+
Convert a DataFrame to an async generator of FlowMessages.
|
|
20
|
+
|
|
21
|
+
Each row in the DataFrame becomes a FlowMessage with the same session.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
df: DataFrame where each row represents one set of inputs
|
|
25
|
+
session: Session object to use for all messages
|
|
26
|
+
|
|
27
|
+
Yields:
|
|
28
|
+
FlowMessages, one per DataFrame row
|
|
29
|
+
"""
|
|
30
|
+
# Use to_dict with orient='records' - much faster than iterrows
|
|
31
|
+
# This returns a list of dicts directly without Series overhead
|
|
32
|
+
records = cast(list[dict[str, Any]], df.to_dict(orient="records"))
|
|
33
|
+
|
|
34
|
+
for record in records:
|
|
35
|
+
yield FlowMessage(session=session, variables=record)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def flow_messages_to_dataframe(
|
|
39
|
+
messages: list[FlowMessage], flow: Flow
|
|
40
|
+
) -> pd.DataFrame:
|
|
41
|
+
"""
|
|
42
|
+
Convert a list of FlowMessages to a DataFrame.
|
|
43
|
+
|
|
44
|
+
Extracts output variables from each message based on the flow's outputs.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
messages: List of FlowMessages with results
|
|
48
|
+
flow: Flow definition containing output variable specifications
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
DataFrame with one row per message, columns for each output variable
|
|
52
|
+
"""
|
|
53
|
+
results = []
|
|
54
|
+
for idx, message in enumerate(messages):
|
|
55
|
+
row_data: dict[str, Any] = {"row": idx}
|
|
56
|
+
|
|
57
|
+
# Extract output variables
|
|
58
|
+
for var in flow.outputs:
|
|
59
|
+
if var.id in message.variables:
|
|
60
|
+
value = message.variables[var.id]
|
|
61
|
+
if isinstance(value, BaseModel):
|
|
62
|
+
value = value.model_dump()
|
|
63
|
+
row_data[var.id] = value
|
|
64
|
+
else:
|
|
65
|
+
row_data[var.id] = None
|
|
66
|
+
|
|
67
|
+
# Include error if present
|
|
68
|
+
if message.is_failed():
|
|
69
|
+
row_data["error"] = (
|
|
70
|
+
message.error.error_message
|
|
71
|
+
if message.error
|
|
72
|
+
else "Unknown error"
|
|
73
|
+
)
|
|
74
|
+
else:
|
|
75
|
+
row_data["error"] = None
|
|
76
|
+
|
|
77
|
+
results.append(row_data)
|
|
78
|
+
|
|
79
|
+
return pd.DataFrame(results)
|