qtype 0.0.12__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. qtype/application/commons/tools.py +1 -1
  2. qtype/application/converters/tools_from_api.py +476 -11
  3. qtype/application/converters/tools_from_module.py +38 -14
  4. qtype/application/converters/types.py +15 -30
  5. qtype/application/documentation.py +1 -1
  6. qtype/application/facade.py +102 -85
  7. qtype/base/types.py +227 -7
  8. qtype/cli.py +5 -1
  9. qtype/commands/convert.py +52 -6
  10. qtype/commands/generate.py +44 -4
  11. qtype/commands/run.py +78 -36
  12. qtype/commands/serve.py +74 -44
  13. qtype/commands/validate.py +37 -14
  14. qtype/commands/visualize.py +46 -25
  15. qtype/dsl/__init__.py +6 -5
  16. qtype/dsl/custom_types.py +1 -1
  17. qtype/dsl/domain_types.py +86 -5
  18. qtype/dsl/linker.py +384 -0
  19. qtype/dsl/loader.py +315 -0
  20. qtype/dsl/model.py +753 -264
  21. qtype/dsl/parser.py +200 -0
  22. qtype/dsl/types.py +50 -0
  23. qtype/interpreter/api.py +63 -136
  24. qtype/interpreter/auth/aws.py +19 -9
  25. qtype/interpreter/auth/generic.py +93 -16
  26. qtype/interpreter/base/base_step_executor.py +436 -0
  27. qtype/interpreter/base/batch_step_executor.py +171 -0
  28. qtype/interpreter/base/exceptions.py +50 -0
  29. qtype/interpreter/base/executor_context.py +91 -0
  30. qtype/interpreter/base/factory.py +84 -0
  31. qtype/interpreter/base/progress_tracker.py +110 -0
  32. qtype/interpreter/base/secrets.py +339 -0
  33. qtype/interpreter/base/step_cache.py +74 -0
  34. qtype/interpreter/base/stream_emitter.py +469 -0
  35. qtype/interpreter/conversions.py +495 -24
  36. qtype/interpreter/converters.py +79 -0
  37. qtype/interpreter/endpoints.py +355 -0
  38. qtype/interpreter/executors/agent_executor.py +242 -0
  39. qtype/interpreter/executors/aggregate_executor.py +93 -0
  40. qtype/interpreter/executors/bedrock_reranker_executor.py +195 -0
  41. qtype/interpreter/executors/decoder_executor.py +163 -0
  42. qtype/interpreter/executors/doc_to_text_executor.py +112 -0
  43. qtype/interpreter/executors/document_embedder_executor.py +123 -0
  44. qtype/interpreter/executors/document_search_executor.py +113 -0
  45. qtype/interpreter/executors/document_source_executor.py +118 -0
  46. qtype/interpreter/executors/document_splitter_executor.py +105 -0
  47. qtype/interpreter/executors/echo_executor.py +63 -0
  48. qtype/interpreter/executors/field_extractor_executor.py +165 -0
  49. qtype/interpreter/executors/file_source_executor.py +101 -0
  50. qtype/interpreter/executors/file_writer_executor.py +110 -0
  51. qtype/interpreter/executors/index_upsert_executor.py +232 -0
  52. qtype/interpreter/executors/invoke_embedding_executor.py +104 -0
  53. qtype/interpreter/executors/invoke_flow_executor.py +51 -0
  54. qtype/interpreter/executors/invoke_tool_executor.py +358 -0
  55. qtype/interpreter/executors/llm_inference_executor.py +272 -0
  56. qtype/interpreter/executors/prompt_template_executor.py +78 -0
  57. qtype/interpreter/executors/sql_source_executor.py +106 -0
  58. qtype/interpreter/executors/vector_search_executor.py +91 -0
  59. qtype/interpreter/flow.py +172 -22
  60. qtype/interpreter/logging_progress.py +61 -0
  61. qtype/interpreter/metadata_api.py +115 -0
  62. qtype/interpreter/resource_cache.py +5 -4
  63. qtype/interpreter/rich_progress.py +225 -0
  64. qtype/interpreter/stream/chat/__init__.py +15 -0
  65. qtype/interpreter/stream/chat/converter.py +391 -0
  66. qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
  67. qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
  68. qtype/interpreter/stream/chat/vercel.py +609 -0
  69. qtype/interpreter/stream/utils/__init__.py +15 -0
  70. qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
  71. qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
  72. qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
  73. qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
  74. qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
  75. qtype/interpreter/telemetry.py +135 -8
  76. qtype/interpreter/tools/__init__.py +5 -0
  77. qtype/interpreter/tools/function_tool_helper.py +265 -0
  78. qtype/interpreter/types.py +330 -0
  79. qtype/interpreter/typing.py +83 -89
  80. qtype/interpreter/ui/404/index.html +1 -1
  81. qtype/interpreter/ui/404.html +1 -1
  82. qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
  83. qtype/interpreter/ui/_next/static/chunks/434-b2112d19f25c44ff.js +36 -0
  84. qtype/interpreter/ui/_next/static/chunks/{964-ed4ab073db645007.js → 964-2b041321a01cbf56.js} +1 -1
  85. qtype/interpreter/ui/_next/static/chunks/app/{layout-5ccbc44fd528d089.js → layout-a05273ead5de2c41.js} +1 -1
  86. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
  87. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
  88. qtype/interpreter/ui/_next/static/chunks/{main-6d261b6c5d6fb6c2.js → main-e26b9cb206da2cac.js} +1 -1
  89. qtype/interpreter/ui/_next/static/chunks/webpack-08642e441b39b6c2.js +1 -0
  90. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
  91. qtype/interpreter/ui/_next/static/media/4cf2300e9c8272f7-s.p.woff2 +0 -0
  92. qtype/interpreter/ui/icon.png +0 -0
  93. qtype/interpreter/ui/index.html +1 -1
  94. qtype/interpreter/ui/index.txt +5 -5
  95. qtype/semantic/checker.py +643 -0
  96. qtype/semantic/generate.py +268 -85
  97. qtype/semantic/loader.py +95 -0
  98. qtype/semantic/model.py +535 -163
  99. qtype/semantic/resolver.py +63 -19
  100. qtype/semantic/visualize.py +50 -35
  101. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/METADATA +22 -5
  102. qtype-0.1.7.dist-info/RECORD +137 -0
  103. qtype/dsl/base_types.py +0 -38
  104. qtype/dsl/validator.py +0 -464
  105. qtype/interpreter/batch/__init__.py +0 -0
  106. qtype/interpreter/batch/flow.py +0 -95
  107. qtype/interpreter/batch/sql_source.py +0 -95
  108. qtype/interpreter/batch/step.py +0 -63
  109. qtype/interpreter/batch/types.py +0 -41
  110. qtype/interpreter/batch/utils.py +0 -179
  111. qtype/interpreter/chat/chat_api.py +0 -237
  112. qtype/interpreter/chat/vercel.py +0 -314
  113. qtype/interpreter/exceptions.py +0 -10
  114. qtype/interpreter/step.py +0 -67
  115. qtype/interpreter/steps/__init__.py +0 -0
  116. qtype/interpreter/steps/agent.py +0 -114
  117. qtype/interpreter/steps/condition.py +0 -36
  118. qtype/interpreter/steps/decoder.py +0 -88
  119. qtype/interpreter/steps/llm_inference.py +0 -150
  120. qtype/interpreter/steps/prompt_template.py +0 -54
  121. qtype/interpreter/steps/search.py +0 -24
  122. qtype/interpreter/steps/tool.py +0 -53
  123. qtype/interpreter/streaming_helpers.py +0 -123
  124. qtype/interpreter/ui/_next/static/chunks/736-7fc606e244fedcb1.js +0 -36
  125. qtype/interpreter/ui/_next/static/chunks/app/page-c72e847e888e549d.js +0 -1
  126. qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
  127. qtype/interpreter/ui/_next/static/chunks/webpack-8289c17c67827f22.js +0 -1
  128. qtype/interpreter/ui/_next/static/css/a262c53826df929b.css +0 -3
  129. qtype/interpreter/ui/_next/static/media/569ce4b8f30dc480-s.p.woff2 +0 -0
  130. qtype/interpreter/ui/favicon.ico +0 -0
  131. qtype/loader.py +0 -389
  132. qtype-0.0.12.dist-info/RECORD +0 -105
  133. /qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
  134. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/WHEEL +0 -0
  135. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/entry_points.txt +0 -0
  136. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/licenses/LICENSE +0 -0
  137. {qtype-0.0.12.dist-info → qtype-0.1.7.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import importlib
4
+ import uuid
5
+ from typing import Any
6
+
3
7
  from llama_index.core.base.embeddings.base import BaseEmbedding
4
8
  from llama_index.core.base.llms.base import BaseLLM
5
9
  from llama_index.core.base.llms.types import AudioBlock
@@ -9,18 +13,153 @@ from llama_index.core.base.llms.types import (
9
13
  DocumentBlock,
10
14
  ImageBlock,
11
15
  TextBlock,
16
+ ThinkingBlock,
12
17
  )
13
18
  from llama_index.core.memory import Memory as LlamaMemory
19
+ from llama_index.core.schema import Document as LlamaDocument
20
+ from llama_index.core.vector_stores.types import BasePydanticVectorStore
21
+ from opensearchpy import AsyncOpenSearch, AWSV4SignerAuth
14
22
 
15
- from qtype.dsl.base_types import PrimitiveTypeEnum
16
- from qtype.dsl.domain_types import ChatContent, ChatMessage
23
+ from qtype.base.types import PrimitiveTypeEnum
24
+ from qtype.dsl.domain_types import (
25
+ ChatContent,
26
+ ChatMessage,
27
+ RAGDocument,
28
+ RAGSearchResult,
29
+ )
17
30
  from qtype.dsl.model import Memory
18
- from qtype.interpreter.exceptions import InterpreterError
19
- from qtype.semantic.model import Model
31
+ from qtype.interpreter.auth.aws import aws
32
+ from qtype.interpreter.auth.generic import auth
33
+ from qtype.interpreter.base.secrets import SecretManagerBase
34
+ from qtype.interpreter.types import InterpreterError
35
+ from qtype.semantic.model import (
36
+ APIKeyAuthProvider,
37
+ AWSAuthProvider,
38
+ DocumentIndex,
39
+ DocumentSplitter,
40
+ Model,
41
+ VectorIndex,
42
+ )
20
43
 
21
44
  from .resource_cache import cached_resource
22
45
 
23
46
 
47
+ def to_llama_document(doc: RAGDocument) -> LlamaDocument:
48
+ """Convert a RAGDocument to a LlamaDocument."""
49
+ from llama_index.core.schema import MediaResource
50
+
51
+ # Prepare metadata, adding file_name and uri if available
52
+ metadata = doc.metadata.copy() if doc.metadata else {}
53
+ if doc.file_name:
54
+ metadata["file_name"] = doc.file_name
55
+ if doc.uri:
56
+ metadata["url"] = (
57
+ doc.uri
58
+ ) # url is more commonly used in LlamaIndex metadata
59
+
60
+ # Default text content
61
+ text = ""
62
+ if isinstance(doc.content, str):
63
+ text = doc.content
64
+
65
+ # Handle different content types
66
+ if doc.type == PrimitiveTypeEnum.text:
67
+ # Text content - store as text field
68
+ return LlamaDocument(text=text, doc_id=doc.file_id, metadata=metadata)
69
+ elif doc.type == PrimitiveTypeEnum.image and isinstance(
70
+ doc.content, bytes
71
+ ):
72
+ # Image content - store in image_resource
73
+ return LlamaDocument(
74
+ text=text, # Keep text empty or use as description
75
+ doc_id=doc.file_id,
76
+ metadata=metadata,
77
+ image_resource=MediaResource(data=doc.content),
78
+ )
79
+ elif doc.type == PrimitiveTypeEnum.audio and isinstance(
80
+ doc.content, bytes
81
+ ):
82
+ # Audio content - store in audio_resource
83
+ return LlamaDocument(
84
+ text=text,
85
+ doc_id=doc.file_id,
86
+ metadata=metadata,
87
+ audio_resource=MediaResource(data=doc.content),
88
+ )
89
+ elif doc.type == PrimitiveTypeEnum.video and isinstance(
90
+ doc.content, bytes
91
+ ):
92
+ # Video content - store in video_resource
93
+ return LlamaDocument(
94
+ text=text,
95
+ doc_id=doc.file_id,
96
+ metadata=metadata,
97
+ video_resource=MediaResource(data=doc.content),
98
+ )
99
+ else:
100
+ # Fallback for other types - store as text
101
+ return LlamaDocument(
102
+ text=str(doc.content) if doc.content else "",
103
+ doc_id=doc.file_id,
104
+ metadata=metadata,
105
+ )
106
+
107
+
108
+ def from_llama_document(doc: LlamaDocument) -> RAGDocument:
109
+ """Convert a LlamaDocument to a RAGDocument."""
110
+ # Extract file_id from doc_id or id_
111
+ file_id = doc.doc_id
112
+
113
+ # Extract file_name from metadata or use file_id as fallback
114
+ file_name = (
115
+ doc.metadata.get("file_name", file_id) if doc.metadata else file_id
116
+ )
117
+
118
+ # Extract URI from metadata if available
119
+ uri = (
120
+ doc.metadata.get("url") or doc.metadata.get("uri")
121
+ if doc.metadata
122
+ else None
123
+ )
124
+
125
+ # Determine content type and extract content based on resource fields
126
+ content_type = PrimitiveTypeEnum.text
127
+ content: str | bytes = doc.text # default to text
128
+
129
+ # Check for media resources in priority order
130
+ if hasattr(doc, "image_resource") and doc.image_resource is not None:
131
+ content_type = PrimitiveTypeEnum.image
132
+ # MediaResource has a 'data' field containing the bytes
133
+ content = (
134
+ doc.image_resource.data
135
+ if hasattr(doc.image_resource, "data")
136
+ else doc.text
137
+ ) # type: ignore
138
+ elif hasattr(doc, "audio_resource") and doc.audio_resource is not None:
139
+ content_type = PrimitiveTypeEnum.audio
140
+ content = (
141
+ doc.audio_resource.data
142
+ if hasattr(doc.audio_resource, "data")
143
+ else doc.text
144
+ ) # type: ignore
145
+ elif hasattr(doc, "video_resource") and doc.video_resource is not None:
146
+ content_type = PrimitiveTypeEnum.video
147
+ content = (
148
+ doc.video_resource.data
149
+ if hasattr(doc.video_resource, "data")
150
+ else doc.text
151
+ ) # type: ignore
152
+
153
+ return RAGDocument(
154
+ content=content,
155
+ file_id=file_id,
156
+ file_name=file_name,
157
+ uri=uri,
158
+ metadata=doc.metadata.copy() if doc.metadata else {},
159
+ type=content_type,
160
+ )
161
+
162
+
24
163
  @cached_resource
25
164
  def to_memory(session_id: str | None, memory: Memory) -> LlamaMemory:
26
165
  return LlamaMemory.from_defaults(
@@ -32,17 +171,38 @@ def to_memory(session_id: str | None, memory: Memory) -> LlamaMemory:
32
171
 
33
172
 
34
173
  @cached_resource
35
- def to_llm(model: Model, system_prompt: str | None) -> BaseLLM:
36
- """Convert a qtype Model to a LlamaIndex Model."""
174
+ def to_llm(
175
+ model: Model,
176
+ system_prompt: str | None,
177
+ secret_manager: SecretManagerBase,
178
+ ) -> BaseLLM:
179
+ """
180
+ Convert a qtype Model to a LlamaIndex Model.
37
181
 
38
- if model.provider in "aws-bedrock":
39
- # BedrockConverse requires a model_id and system_prompt
40
- # Inference params can be passed as additional kwargs
41
- from llama_index.llms.bedrock_converse import ( # type: ignore[import]
42
- BedrockConverse,
43
- )
182
+ Args:
183
+ model: The semantic model configuration
184
+ system_prompt: Optional system prompt for the model
185
+ secret_manager: Optional secret manager for resolving SecretReferences
186
+
187
+ Returns:
188
+ A LlamaIndex LLM instance
189
+ """
190
+
191
+ if model.provider == "aws-bedrock":
192
+ from llama_index.llms.bedrock_converse import BedrockConverse
193
+
194
+ from qtype.semantic.model import AWSAuthProvider
195
+
196
+ if model.auth:
197
+ # Type hint for mypy - we know it's AWSAuthProvider for aws-bedrock
198
+ assert isinstance(model.auth, AWSAuthProvider)
199
+ with aws(model.auth, secret_manager) as session:
200
+ session = session._session
201
+ else:
202
+ session = None
44
203
 
45
204
  brv: BaseLLM = BedrockConverse(
205
+ botocore_session=session,
46
206
  model=model.model_id if model.model_id else model.id,
47
207
  system_prompt=system_prompt,
48
208
  **(model.inference_params if model.inference_params else {}),
@@ -51,28 +211,67 @@ def to_llm(model: Model, system_prompt: str | None) -> BaseLLM:
51
211
  elif model.provider == "openai":
52
212
  from llama_index.llms.openai import OpenAI
53
213
 
214
+ from qtype.interpreter.auth.generic import auth
215
+ from qtype.semantic.model import APIKeyAuthProvider
216
+
217
+ api_key: str | None = None
218
+ if model.auth:
219
+ with auth(model.auth, secret_manager) as provider:
220
+ if not isinstance(provider, APIKeyAuthProvider):
221
+ raise InterpreterError(
222
+ f"OpenAI provider requires APIKeyAuthProvider, "
223
+ f"got {type(provider).__name__}"
224
+ )
225
+ # api_key is guaranteed to be str after auth() resolves it
226
+ api_key = provider.api_key # type: ignore[assignment]
227
+
54
228
  return OpenAI(
55
229
  model=model.model_id if model.model_id else model.id,
56
230
  system_prompt=system_prompt,
57
231
  **(model.inference_params if model.inference_params else {}),
58
- api_key=getattr(model.auth, "api_key", None)
59
- if model.auth
60
- else None,
232
+ api_key=api_key,
61
233
  )
62
234
  elif model.provider == "anthropic":
63
235
  from llama_index.llms.anthropic import ( # type: ignore[import-untyped]
64
236
  Anthropic,
65
237
  )
66
238
 
239
+ from qtype.interpreter.auth.generic import auth
240
+ from qtype.semantic.model import APIKeyAuthProvider
241
+
242
+ api_key: str | None = None
243
+ if model.auth:
244
+ with auth(model.auth, secret_manager) as provider:
245
+ if not isinstance(provider, APIKeyAuthProvider):
246
+ raise InterpreterError(
247
+ f"Anthropic provider requires APIKeyAuthProvider, "
248
+ f"got {type(provider).__name__}"
249
+ )
250
+ # api_key is guaranteed to be str after auth() resolves it
251
+ api_key = provider.api_key # type: ignore[assignment]
252
+
67
253
  arv: BaseLLM = Anthropic(
68
254
  model=model.model_id if model.model_id else model.id,
69
255
  system_prompt=system_prompt,
70
256
  **(model.inference_params if model.inference_params else {}),
71
- api_key=getattr(model.auth, "api_key", None)
72
- if model.auth
73
- else None,
257
+ api_key=api_key,
74
258
  )
75
259
  return arv
260
+ elif model.provider == "gcp-vertex":
261
+ from llama_index.llms.vertex import Vertex
262
+
263
+ project_name = getattr(
264
+ getattr(model, "auth", None), "profile_name", None
265
+ )
266
+
267
+ vgv: BaseLLM = Vertex(
268
+ model=model.model_id if model.model_id else model.id,
269
+ project=project_name,
270
+ system_prompt=system_prompt,
271
+ **(model.inference_params if model.inference_params else {}),
272
+ )
273
+
274
+ return vgv
76
275
  else:
77
276
  raise InterpreterError(
78
277
  f"Unsupported model provider: {model.provider}."
@@ -80,16 +279,50 @@ def to_llm(model: Model, system_prompt: str | None) -> BaseLLM:
80
279
 
81
280
 
82
281
  @cached_resource
83
- def to_embedding_model(model: Model) -> BaseEmbedding:
282
+ def to_vector_store(
283
+ index: VectorIndex, secret_manager: SecretManagerBase
284
+ ) -> BasePydanticVectorStore:
285
+ """Convert a qtype Index to a LlamaIndex vector store."""
286
+ module_path = ".".join(index.module.split(".")[:-1])
287
+ class_name = index.module.split(".")[-1]
288
+ # Dynamically import the reader module
289
+ try:
290
+ reader_module = importlib.import_module(module_path)
291
+ reader_class = getattr(reader_module, class_name)
292
+ except (ImportError, AttributeError) as e:
293
+ raise ImportError(
294
+ f"Failed to import reader class '{class_name}' from '{module_path}': {e}"
295
+ ) from e
296
+
297
+ # Resolve any SecretReferences in args
298
+ context = f"index '{index.id}'"
299
+ resolved_args = secret_manager.resolve_secrets_in_dict(index.args, context)
300
+ index_instance = reader_class(**resolved_args)
301
+
302
+ return index_instance
303
+
304
+
305
+ @cached_resource
306
+ def to_embedding_model(
307
+ model: Model, secret_manager: SecretManagerBase
308
+ ) -> BaseEmbedding:
84
309
  """Convert a qtype Model to a LlamaIndex embedding model."""
85
310
 
86
- if model.provider in {"bedrock", "aws", "aws-bedrock"}:
311
+ if model.provider == "aws-bedrock":
87
312
  from llama_index.embeddings.bedrock import ( # type: ignore[import-untyped]
88
313
  BedrockEmbedding,
89
314
  )
90
315
 
316
+ session = None
317
+ if model.auth is not None:
318
+ assert isinstance(model.auth, AWSAuthProvider)
319
+ with aws(model.auth, secret_manager) as session:
320
+ session = session._session
321
+
91
322
  bedrock_embedding: BaseEmbedding = BedrockEmbedding(
92
- model_name=model.model_id if model.model_id else model.id
323
+ botocore_session=session,
324
+ model_name=model.model_id if model.model_id else model.id,
325
+ max_retries=100,
93
326
  )
94
327
  return bedrock_embedding
95
328
  elif model.provider == "openai":
@@ -97,8 +330,20 @@ def to_embedding_model(model: Model) -> BaseEmbedding:
97
330
  OpenAIEmbedding,
98
331
  )
99
332
 
333
+ api_key = None
334
+ if model.auth:
335
+ with auth(model.auth, secret_manager) as provider:
336
+ if not isinstance(provider, APIKeyAuthProvider):
337
+ raise InterpreterError(
338
+ f"OpenAI provider requires APIKeyAuthProvider, "
339
+ f"got {type(provider).__name__}"
340
+ )
341
+ # api_key is guaranteed to be str after auth() resolves it
342
+ api_key = provider.api_key # type: ignore[assignment]
343
+
100
344
  openai_embedding: BaseEmbedding = OpenAIEmbedding(
101
- model_name=model.model_id if model.model_id else model.id
345
+ api_key=api_key,
346
+ model_name=model.model_id if model.model_id else model.id,
102
347
  )
103
348
  return openai_embedding
104
349
  else:
@@ -107,6 +352,61 @@ def to_embedding_model(model: Model) -> BaseEmbedding:
107
352
  )
108
353
 
109
354
 
355
+ @cached_resource
356
+ def to_opensearch_client(
357
+ index: DocumentIndex, secret_manager: SecretManagerBase
358
+ ) -> AsyncOpenSearch:
359
+ """
360
+ Convert a DocumentIndex to an OpenSearch/Elasticsearch client.
361
+
362
+ Args:
363
+ index: DocumentIndex configuration with endpoint, auth, etc.
364
+
365
+ Returns:
366
+ OpenSearch client instance configured with authentication
367
+
368
+ Raises:
369
+ InterpreterError: If authentication fails or configuration is invalid
370
+ """
371
+ client_kwargs: dict[str, Any] = {
372
+ "hosts": [index.endpoint],
373
+ **index.args,
374
+ }
375
+
376
+ # Handle authentication if provided
377
+ if index.auth:
378
+ if isinstance(index.auth, APIKeyAuthProvider):
379
+ # Use API key authentication
380
+ client_kwargs["api_key"] = index.auth.api_key
381
+ elif hasattr(index.auth, "type") and index.auth.type == "aws":
382
+ # Use AWS authentication with boto3 session
383
+ # Get AWS credentials from auth provider using context manager
384
+ with auth(index.auth, secret_manager) as auth_session:
385
+ # Type checker doesn't know this is a boto3.Session
386
+ # but runtime validation ensures it for AWS auth
387
+ credentials = auth_session.get_credentials() # type: ignore
388
+ if credentials is None:
389
+ raise InterpreterError(
390
+ f"Failed to obtain AWS credentials for DocumentIndex '{index.id}'"
391
+ )
392
+
393
+ # Use opensearch-py's built-in AWS auth
394
+ aws_auth = AWSV4SignerAuth(
395
+ credentials,
396
+ auth_session.region_name or "us-east-1", # type: ignore
397
+ )
398
+
399
+ client_kwargs["http_auth"] = aws_auth
400
+ client_kwargs["use_ssl"] = True
401
+ client_kwargs["verify_certs"] = True
402
+ else:
403
+ raise InterpreterError(
404
+ f"Unsupported authentication type for DocumentIndex: {type(index.auth)}"
405
+ )
406
+
407
+ return AsyncOpenSearch(**client_kwargs)
408
+
409
+
110
410
  def to_content_block(content: ChatContent) -> ContentBlock:
111
411
  if content.type == PrimitiveTypeEnum.text:
112
412
  if isinstance(content.content, str):
@@ -132,6 +432,61 @@ def to_content_block(content: ChatContent) -> ContentBlock:
132
432
  )
133
433
 
134
434
 
435
+ def variable_to_chat_message(
436
+ value: Any, variable: Any, default_role: str = "user"
437
+ ) -> ChatMessage:
438
+ """Convert any variable value to a ChatMessage based on the variable's type.
439
+
440
+ Args:
441
+ value: The value to convert (can be any primitive type or ChatMessage)
442
+ variable: The Variable definition with type information
443
+ default_role: The default message role to use (default: "user")
444
+
445
+ Returns:
446
+ ChatMessage with appropriate content blocks
447
+
448
+ Raises:
449
+ InterpreterError: If the value type cannot be converted
450
+ """
451
+ # If already a ChatMessage, return as-is
452
+ if isinstance(value, ChatMessage):
453
+ return value
454
+
455
+ # Convert based on the variable's declared type
456
+ var_type = variable.type
457
+ # Handle primitive types based on variable declaration
458
+ if isinstance(var_type, PrimitiveTypeEnum):
459
+ # Numeric/boolean types get converted to text
460
+ if var_type in (
461
+ PrimitiveTypeEnum.int,
462
+ PrimitiveTypeEnum.float,
463
+ PrimitiveTypeEnum.boolean,
464
+ ):
465
+ content = ChatContent(
466
+ type=PrimitiveTypeEnum.text, content=str(value)
467
+ )
468
+ # All other primitive types pass through as-is
469
+ else:
470
+ content = ChatContent(type=var_type, content=value)
471
+ elif isinstance(var_type, str) and (
472
+ var_type.startswith("list[") or var_type.startswith("dict[")
473
+ ):
474
+ # Handle list and dict types - convert to JSON string
475
+ import json
476
+
477
+ content = ChatContent(
478
+ type=PrimitiveTypeEnum.text, content=json.dumps(value)
479
+ )
480
+ else:
481
+ # Unsupported type - raise an error
482
+ raise InterpreterError(
483
+ f"Cannot convert variable '{variable.id}' of unsupported type "
484
+ f"'{var_type}' to ChatMessage"
485
+ )
486
+
487
+ return ChatMessage(role=default_role, blocks=[content]) # type: ignore
488
+
489
+
135
490
  def to_chat_message(message: ChatMessage) -> LlamaChatMessage:
136
491
  """Convert a ChatMessage to a LlamaChatMessage."""
137
492
  blocks = [to_content_block(content) for content in message.blocks]
@@ -140,7 +495,7 @@ def to_chat_message(message: ChatMessage) -> LlamaChatMessage:
140
495
 
141
496
  def from_chat_message(message: LlamaChatMessage) -> ChatMessage:
142
497
  """Convert a LlamaChatMessage to a ChatMessage."""
143
- blocks = []
498
+ blocks: list[ChatContent] = []
144
499
  for block in message.blocks:
145
500
  if isinstance(block, TextBlock):
146
501
  blocks.append(
@@ -158,9 +513,125 @@ def from_chat_message(message: LlamaChatMessage) -> ChatMessage:
158
513
  blocks.append(
159
514
  ChatContent(type=PrimitiveTypeEnum.file, content=block.data)
160
515
  )
516
+ elif isinstance(block, ThinkingBlock):
517
+ continue
161
518
  else:
162
519
  raise InterpreterError(
163
520
  f"Unsupported content block type: {type(block)}"
164
521
  )
165
522
 
166
- return ChatMessage(role=message.role, blocks=blocks) # type: ignore
523
+ return ChatMessage(role=message.role, blocks=blocks)
524
+
525
+
526
+ def to_text_splitter(splitter: DocumentSplitter) -> Any:
527
+ """Convert a DocumentSplitter to a LlamaIndex text splitter.
528
+
529
+ Args:
530
+ splitter: The DocumentSplitter configuration.
531
+
532
+ Returns:
533
+ An instance of the appropriate LlamaIndex text splitter class.
534
+
535
+ Raises:
536
+ InterpreterError: If the splitter class cannot be found or instantiated.
537
+ """
538
+
539
+ module_path = "llama_index.core.node_parser"
540
+ class_name = splitter.splitter_name
541
+ try:
542
+ reader_module = importlib.import_module(module_path)
543
+ splitter_class = getattr(reader_module, class_name)
544
+ except (ImportError, AttributeError) as e:
545
+ raise ImportError(
546
+ f"Failed to import reader class '{class_name}' from '{module_path}': {e}"
547
+ ) from e
548
+ from llama_index.core.schema import BaseNode
549
+
550
+ # TODO: let the user specify a custom ID namespace
551
+ namespace = uuid.UUID("12345678-1234-5678-1234-567812345678")
552
+
553
+ def id_func(i: int, doc: BaseNode) -> str:
554
+ u = uuid.uuid5(namespace, f"{doc.node_id}_{i}")
555
+ return str(u)
556
+
557
+ # Prepare arguments for the splitter
558
+ splitter_args = {
559
+ "chunk_size": splitter.chunk_size,
560
+ "chunk_overlap": splitter.chunk_overlap,
561
+ "id_func": id_func,
562
+ **splitter.args,
563
+ }
564
+
565
+ # Instantiate and return the splitter
566
+ try:
567
+ return splitter_class(**splitter_args)
568
+ except Exception as e:
569
+ raise InterpreterError(
570
+ f"Failed to instantiate {splitter.splitter_name}: {e}"
571
+ ) from e
572
+
573
+
574
+ def to_llama_vector_store_and_retriever(
575
+ index: VectorIndex, secret_manager: SecretManagerBase
576
+ ) -> tuple[BasePydanticVectorStore, Any]:
577
+ """Create a LlamaIndex vector store and retriever from a VectorIndex.
578
+
579
+ Args:
580
+ index: VectorIndex configuration
581
+
582
+ Returns:
583
+ Tuple of (vector_store, retriever)
584
+ """
585
+ from llama_index.core import VectorStoreIndex
586
+
587
+ # Get the vector store using existing function
588
+ vector_store = to_vector_store(index, secret_manager)
589
+
590
+ # Get the embedding model
591
+ embedding_model = to_embedding_model(index.embedding_model, secret_manager)
592
+
593
+ # Create a VectorStoreIndex with the vector store and embedding model
594
+ vector_index = VectorStoreIndex.from_vector_store(
595
+ vector_store=vector_store,
596
+ embed_model=embedding_model,
597
+ )
598
+
599
+ # Create retriever with optional top_k configuration
600
+ retriever = vector_index.as_retriever()
601
+
602
+ return vector_store, retriever
603
+
604
+
605
+ def from_node_with_score(node_with_score) -> RAGSearchResult:
606
+ """Convert a LlamaIndex NodeWithScore to a RAGSearchResult.
607
+
608
+ Args:
609
+ node_with_score: LlamaIndex NodeWithScore object
610
+
611
+ Returns:
612
+ RAGSearchResult with chunk and score
613
+ """
614
+ from qtype.dsl.domain_types import RAGChunk, RAGSearchResult
615
+
616
+ node = node_with_score.node
617
+
618
+ # Extract vector if available
619
+ vector = None
620
+ if hasattr(node, "embedding") and node.embedding is not None:
621
+ vector = node.embedding
622
+
623
+ # Create RAGChunk from node
624
+ chunk = RAGChunk(
625
+ content=node.text or "",
626
+ chunk_id=node.node_id,
627
+ document_id=node.metadata.get("document_id", node.node_id),
628
+ vector=vector,
629
+ metadata=node.metadata or {},
630
+ )
631
+
632
+ # Wrap in RAGSearchResult with score
633
+ return RAGSearchResult(
634
+ content=chunk,
635
+ doc_id=chunk.document_id,
636
+ score=node_with_score.score or 0.0,
637
+ )
@@ -0,0 +1,79 @@
1
+ """Converters between DataFrames and FlowMessages."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import AsyncIterator
6
+ from typing import Any, cast
7
+
8
+ import pandas as pd
9
+ from pydantic import BaseModel
10
+
11
+ from qtype.interpreter.types import FlowMessage, Session
12
+ from qtype.semantic.model import Flow
13
+
14
+
15
+ async def dataframe_to_flow_messages(
16
+ df: pd.DataFrame, session: Session
17
+ ) -> AsyncIterator[FlowMessage]:
18
+ """
19
+ Convert a DataFrame to an async generator of FlowMessages.
20
+
21
+ Each row in the DataFrame becomes a FlowMessage with the same session.
22
+
23
+ Args:
24
+ df: DataFrame where each row represents one set of inputs
25
+ session: Session object to use for all messages
26
+
27
+ Yields:
28
+ FlowMessages, one per DataFrame row
29
+ """
30
+ # Use to_dict with orient='records' - much faster than iterrows
31
+ # This returns a list of dicts directly without Series overhead
32
+ records = cast(list[dict[str, Any]], df.to_dict(orient="records"))
33
+
34
+ for record in records:
35
+ yield FlowMessage(session=session, variables=record)
36
+
37
+
38
+ def flow_messages_to_dataframe(
39
+ messages: list[FlowMessage], flow: Flow
40
+ ) -> pd.DataFrame:
41
+ """
42
+ Convert a list of FlowMessages to a DataFrame.
43
+
44
+ Extracts output variables from each message based on the flow's outputs.
45
+
46
+ Args:
47
+ messages: List of FlowMessages with results
48
+ flow: Flow definition containing output variable specifications
49
+
50
+ Returns:
51
+ DataFrame with one row per message, columns for each output variable
52
+ """
53
+ results = []
54
+ for idx, message in enumerate(messages):
55
+ row_data: dict[str, Any] = {"row": idx}
56
+
57
+ # Extract output variables
58
+ for var in flow.outputs:
59
+ if var.id in message.variables:
60
+ value = message.variables[var.id]
61
+ if isinstance(value, BaseModel):
62
+ value = value.model_dump()
63
+ row_data[var.id] = value
64
+ else:
65
+ row_data[var.id] = None
66
+
67
+ # Include error if present
68
+ if message.is_failed():
69
+ row_data["error"] = (
70
+ message.error.error_message
71
+ if message.error
72
+ else "Unknown error"
73
+ )
74
+ else:
75
+ row_data["error"] = None
76
+
77
+ results.append(row_data)
78
+
79
+ return pd.DataFrame(results)