qtype 0.0.12__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. qtype/application/commons/tools.py +1 -1
  2. qtype/application/converters/tools_from_api.py +476 -11
  3. qtype/application/converters/tools_from_module.py +38 -14
  4. qtype/application/converters/types.py +15 -30
  5. qtype/application/documentation.py +1 -1
  6. qtype/application/facade.py +102 -85
  7. qtype/base/types.py +227 -7
  8. qtype/cli.py +5 -1
  9. qtype/commands/convert.py +52 -6
  10. qtype/commands/generate.py +44 -4
  11. qtype/commands/run.py +78 -36
  12. qtype/commands/serve.py +74 -44
  13. qtype/commands/validate.py +37 -14
  14. qtype/commands/visualize.py +46 -25
  15. qtype/dsl/__init__.py +6 -5
  16. qtype/dsl/custom_types.py +1 -1
  17. qtype/dsl/domain_types.py +86 -5
  18. qtype/dsl/linker.py +384 -0
  19. qtype/dsl/loader.py +315 -0
  20. qtype/dsl/model.py +751 -263
  21. qtype/dsl/parser.py +200 -0
  22. qtype/dsl/types.py +50 -0
  23. qtype/interpreter/api.py +63 -136
  24. qtype/interpreter/auth/aws.py +19 -9
  25. qtype/interpreter/auth/generic.py +93 -16
  26. qtype/interpreter/base/base_step_executor.py +436 -0
  27. qtype/interpreter/base/batch_step_executor.py +171 -0
  28. qtype/interpreter/base/exceptions.py +50 -0
  29. qtype/interpreter/base/executor_context.py +91 -0
  30. qtype/interpreter/base/factory.py +84 -0
  31. qtype/interpreter/base/progress_tracker.py +110 -0
  32. qtype/interpreter/base/secrets.py +339 -0
  33. qtype/interpreter/base/step_cache.py +74 -0
  34. qtype/interpreter/base/stream_emitter.py +469 -0
  35. qtype/interpreter/conversions.py +471 -22
  36. qtype/interpreter/converters.py +79 -0
  37. qtype/interpreter/endpoints.py +355 -0
  38. qtype/interpreter/executors/agent_executor.py +242 -0
  39. qtype/interpreter/executors/aggregate_executor.py +93 -0
  40. qtype/interpreter/executors/bedrock_reranker_executor.py +195 -0
  41. qtype/interpreter/executors/decoder_executor.py +163 -0
  42. qtype/interpreter/executors/doc_to_text_executor.py +112 -0
  43. qtype/interpreter/executors/document_embedder_executor.py +107 -0
  44. qtype/interpreter/executors/document_search_executor.py +113 -0
  45. qtype/interpreter/executors/document_source_executor.py +118 -0
  46. qtype/interpreter/executors/document_splitter_executor.py +105 -0
  47. qtype/interpreter/executors/echo_executor.py +63 -0
  48. qtype/interpreter/executors/field_extractor_executor.py +165 -0
  49. qtype/interpreter/executors/file_source_executor.py +101 -0
  50. qtype/interpreter/executors/file_writer_executor.py +110 -0
  51. qtype/interpreter/executors/index_upsert_executor.py +232 -0
  52. qtype/interpreter/executors/invoke_embedding_executor.py +92 -0
  53. qtype/interpreter/executors/invoke_flow_executor.py +51 -0
  54. qtype/interpreter/executors/invoke_tool_executor.py +358 -0
  55. qtype/interpreter/executors/llm_inference_executor.py +272 -0
  56. qtype/interpreter/executors/prompt_template_executor.py +78 -0
  57. qtype/interpreter/executors/sql_source_executor.py +106 -0
  58. qtype/interpreter/executors/vector_search_executor.py +91 -0
  59. qtype/interpreter/flow.py +173 -22
  60. qtype/interpreter/logging_progress.py +61 -0
  61. qtype/interpreter/metadata_api.py +115 -0
  62. qtype/interpreter/resource_cache.py +5 -4
  63. qtype/interpreter/rich_progress.py +225 -0
  64. qtype/interpreter/stream/chat/__init__.py +15 -0
  65. qtype/interpreter/stream/chat/converter.py +391 -0
  66. qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
  67. qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
  68. qtype/interpreter/stream/chat/vercel.py +609 -0
  69. qtype/interpreter/stream/utils/__init__.py +15 -0
  70. qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
  71. qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
  72. qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
  73. qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
  74. qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
  75. qtype/interpreter/telemetry.py +135 -8
  76. qtype/interpreter/tools/__init__.py +5 -0
  77. qtype/interpreter/tools/function_tool_helper.py +265 -0
  78. qtype/interpreter/types.py +330 -0
  79. qtype/interpreter/typing.py +83 -89
  80. qtype/interpreter/ui/404/index.html +1 -1
  81. qtype/interpreter/ui/404.html +1 -1
  82. qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
  83. qtype/interpreter/ui/_next/static/chunks/434-b2112d19f25c44ff.js +36 -0
  84. qtype/interpreter/ui/_next/static/chunks/{964-ed4ab073db645007.js → 964-2b041321a01cbf56.js} +1 -1
  85. qtype/interpreter/ui/_next/static/chunks/app/{layout-5ccbc44fd528d089.js → layout-a05273ead5de2c41.js} +1 -1
  86. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
  87. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
  88. qtype/interpreter/ui/_next/static/chunks/{main-6d261b6c5d6fb6c2.js → main-e26b9cb206da2cac.js} +1 -1
  89. qtype/interpreter/ui/_next/static/chunks/webpack-08642e441b39b6c2.js +1 -0
  90. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
  91. qtype/interpreter/ui/_next/static/media/4cf2300e9c8272f7-s.p.woff2 +0 -0
  92. qtype/interpreter/ui/icon.png +0 -0
  93. qtype/interpreter/ui/index.html +1 -1
  94. qtype/interpreter/ui/index.txt +5 -5
  95. qtype/semantic/checker.py +643 -0
  96. qtype/semantic/generate.py +268 -85
  97. qtype/semantic/loader.py +95 -0
  98. qtype/semantic/model.py +535 -163
  99. qtype/semantic/resolver.py +63 -19
  100. qtype/semantic/visualize.py +50 -35
  101. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/METADATA +21 -4
  102. qtype-0.1.3.dist-info/RECORD +137 -0
  103. qtype/dsl/base_types.py +0 -38
  104. qtype/dsl/validator.py +0 -464
  105. qtype/interpreter/batch/__init__.py +0 -0
  106. qtype/interpreter/batch/flow.py +0 -95
  107. qtype/interpreter/batch/sql_source.py +0 -95
  108. qtype/interpreter/batch/step.py +0 -63
  109. qtype/interpreter/batch/types.py +0 -41
  110. qtype/interpreter/batch/utils.py +0 -179
  111. qtype/interpreter/chat/chat_api.py +0 -237
  112. qtype/interpreter/chat/vercel.py +0 -314
  113. qtype/interpreter/exceptions.py +0 -10
  114. qtype/interpreter/step.py +0 -67
  115. qtype/interpreter/steps/__init__.py +0 -0
  116. qtype/interpreter/steps/agent.py +0 -114
  117. qtype/interpreter/steps/condition.py +0 -36
  118. qtype/interpreter/steps/decoder.py +0 -88
  119. qtype/interpreter/steps/llm_inference.py +0 -150
  120. qtype/interpreter/steps/prompt_template.py +0 -54
  121. qtype/interpreter/steps/search.py +0 -24
  122. qtype/interpreter/steps/tool.py +0 -53
  123. qtype/interpreter/streaming_helpers.py +0 -123
  124. qtype/interpreter/ui/_next/static/chunks/736-7fc606e244fedcb1.js +0 -36
  125. qtype/interpreter/ui/_next/static/chunks/app/page-c72e847e888e549d.js +0 -1
  126. qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
  127. qtype/interpreter/ui/_next/static/chunks/webpack-8289c17c67827f22.js +0 -1
  128. qtype/interpreter/ui/_next/static/css/a262c53826df929b.css +0 -3
  129. qtype/interpreter/ui/_next/static/media/569ce4b8f30dc480-s.p.woff2 +0 -0
  130. qtype/interpreter/ui/favicon.ico +0 -0
  131. qtype/loader.py +0 -389
  132. qtype-0.0.12.dist-info/RECORD +0 -105
  133. /qtype/interpreter/ui/_next/static/{OT8QJQW3J70VbDWWfrEMT → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
  134. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/WHEEL +0 -0
  135. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/entry_points.txt +0 -0
  136. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/licenses/LICENSE +0 -0
  137. {qtype-0.0.12.dist-info → qtype-0.1.3.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import importlib
4
+ import uuid
5
+ from typing import Any
6
+
3
7
  from llama_index.core.base.embeddings.base import BaseEmbedding
4
8
  from llama_index.core.base.llms.base import BaseLLM
5
9
  from llama_index.core.base.llms.types import AudioBlock
@@ -9,18 +13,152 @@ from llama_index.core.base.llms.types import (
9
13
  DocumentBlock,
10
14
  ImageBlock,
11
15
  TextBlock,
16
+ ThinkingBlock,
12
17
  )
13
18
  from llama_index.core.memory import Memory as LlamaMemory
19
+ from llama_index.core.schema import Document as LlamaDocument
20
+ from llama_index.core.vector_stores.types import BasePydanticVectorStore
21
+ from opensearchpy import AsyncOpenSearch, AWSV4SignerAuth
14
22
 
15
- from qtype.dsl.base_types import PrimitiveTypeEnum
16
- from qtype.dsl.domain_types import ChatContent, ChatMessage
23
+ from qtype.base.types import PrimitiveTypeEnum
24
+ from qtype.dsl.domain_types import (
25
+ ChatContent,
26
+ ChatMessage,
27
+ RAGDocument,
28
+ RAGSearchResult,
29
+ )
17
30
  from qtype.dsl.model import Memory
18
- from qtype.interpreter.exceptions import InterpreterError
19
- from qtype.semantic.model import Model
31
+ from qtype.interpreter.auth.aws import aws
32
+ from qtype.interpreter.auth.generic import auth
33
+ from qtype.interpreter.base.secrets import SecretManagerBase
34
+ from qtype.interpreter.types import InterpreterError
35
+ from qtype.semantic.model import (
36
+ APIKeyAuthProvider,
37
+ DocumentIndex,
38
+ DocumentSplitter,
39
+ Model,
40
+ VectorIndex,
41
+ )
20
42
 
21
43
  from .resource_cache import cached_resource
22
44
 
23
45
 
46
+ def to_llama_document(doc: RAGDocument) -> LlamaDocument:
47
+ """Convert a RAGDocument to a LlamaDocument."""
48
+ from llama_index.core.schema import MediaResource
49
+
50
+ # Prepare metadata, adding file_name and uri if available
51
+ metadata = doc.metadata.copy() if doc.metadata else {}
52
+ if doc.file_name:
53
+ metadata["file_name"] = doc.file_name
54
+ if doc.uri:
55
+ metadata["url"] = (
56
+ doc.uri
57
+ ) # url is more commonly used in LlamaIndex metadata
58
+
59
+ # Default text content
60
+ text = ""
61
+ if isinstance(doc.content, str):
62
+ text = doc.content
63
+
64
+ # Handle different content types
65
+ if doc.type == PrimitiveTypeEnum.text:
66
+ # Text content - store as text field
67
+ return LlamaDocument(text=text, doc_id=doc.file_id, metadata=metadata)
68
+ elif doc.type == PrimitiveTypeEnum.image and isinstance(
69
+ doc.content, bytes
70
+ ):
71
+ # Image content - store in image_resource
72
+ return LlamaDocument(
73
+ text=text, # Keep text empty or use as description
74
+ doc_id=doc.file_id,
75
+ metadata=metadata,
76
+ image_resource=MediaResource(data=doc.content),
77
+ )
78
+ elif doc.type == PrimitiveTypeEnum.audio and isinstance(
79
+ doc.content, bytes
80
+ ):
81
+ # Audio content - store in audio_resource
82
+ return LlamaDocument(
83
+ text=text,
84
+ doc_id=doc.file_id,
85
+ metadata=metadata,
86
+ audio_resource=MediaResource(data=doc.content),
87
+ )
88
+ elif doc.type == PrimitiveTypeEnum.video and isinstance(
89
+ doc.content, bytes
90
+ ):
91
+ # Video content - store in video_resource
92
+ return LlamaDocument(
93
+ text=text,
94
+ doc_id=doc.file_id,
95
+ metadata=metadata,
96
+ video_resource=MediaResource(data=doc.content),
97
+ )
98
+ else:
99
+ # Fallback for other types - store as text
100
+ return LlamaDocument(
101
+ text=str(doc.content) if doc.content else "",
102
+ doc_id=doc.file_id,
103
+ metadata=metadata,
104
+ )
105
+
106
+
107
+ def from_llama_document(doc: LlamaDocument) -> RAGDocument:
108
+ """Convert a LlamaDocument to a RAGDocument."""
109
+ # Extract file_id from doc_id or id_
110
+ file_id = doc.doc_id
111
+
112
+ # Extract file_name from metadata or use file_id as fallback
113
+ file_name = (
114
+ doc.metadata.get("file_name", file_id) if doc.metadata else file_id
115
+ )
116
+
117
+ # Extract URI from metadata if available
118
+ uri = (
119
+ doc.metadata.get("url") or doc.metadata.get("uri")
120
+ if doc.metadata
121
+ else None
122
+ )
123
+
124
+ # Determine content type and extract content based on resource fields
125
+ content_type = PrimitiveTypeEnum.text
126
+ content: str | bytes = doc.text # default to text
127
+
128
+ # Check for media resources in priority order
129
+ if hasattr(doc, "image_resource") and doc.image_resource is not None:
130
+ content_type = PrimitiveTypeEnum.image
131
+ # MediaResource has a 'data' field containing the bytes
132
+ content = (
133
+ doc.image_resource.data
134
+ if hasattr(doc.image_resource, "data")
135
+ else doc.text
136
+ ) # type: ignore
137
+ elif hasattr(doc, "audio_resource") and doc.audio_resource is not None:
138
+ content_type = PrimitiveTypeEnum.audio
139
+ content = (
140
+ doc.audio_resource.data
141
+ if hasattr(doc.audio_resource, "data")
142
+ else doc.text
143
+ ) # type: ignore
144
+ elif hasattr(doc, "video_resource") and doc.video_resource is not None:
145
+ content_type = PrimitiveTypeEnum.video
146
+ content = (
147
+ doc.video_resource.data
148
+ if hasattr(doc.video_resource, "data")
149
+ else doc.text
150
+ ) # type: ignore
151
+
152
+ return RAGDocument(
153
+ content=content,
154
+ file_id=file_id,
155
+ file_name=file_name,
156
+ uri=uri,
157
+ metadata=doc.metadata.copy() if doc.metadata else {},
158
+ type=content_type,
159
+ )
160
+
161
+
24
162
  @cached_resource
25
163
  def to_memory(session_id: str | None, memory: Memory) -> LlamaMemory:
26
164
  return LlamaMemory.from_defaults(
@@ -32,17 +170,38 @@ def to_memory(session_id: str | None, memory: Memory) -> LlamaMemory:
32
170
 
33
171
 
34
172
  @cached_resource
35
- def to_llm(model: Model, system_prompt: str | None) -> BaseLLM:
36
- """Convert a qtype Model to a LlamaIndex Model."""
173
+ def to_llm(
174
+ model: Model,
175
+ system_prompt: str | None,
176
+ secret_manager: SecretManagerBase,
177
+ ) -> BaseLLM:
178
+ """
179
+ Convert a qtype Model to a LlamaIndex Model.
37
180
 
38
- if model.provider in "aws-bedrock":
39
- # BedrockConverse requires a model_id and system_prompt
40
- # Inference params can be passed as additional kwargs
41
- from llama_index.llms.bedrock_converse import ( # type: ignore[import]
42
- BedrockConverse,
43
- )
181
+ Args:
182
+ model: The semantic model configuration
183
+ system_prompt: Optional system prompt for the model
184
+ secret_manager: Optional secret manager for resolving SecretReferences
185
+
186
+ Returns:
187
+ A LlamaIndex LLM instance
188
+ """
189
+
190
+ if model.provider == "aws-bedrock":
191
+ from llama_index.llms.bedrock_converse import BedrockConverse
192
+
193
+ from qtype.semantic.model import AWSAuthProvider
194
+
195
+ if model.auth:
196
+ # Type hint for mypy - we know it's AWSAuthProvider for aws-bedrock
197
+ assert isinstance(model.auth, AWSAuthProvider)
198
+ with aws(model.auth, secret_manager) as session:
199
+ session = session._session
200
+ else:
201
+ session = None
44
202
 
45
203
  brv: BaseLLM = BedrockConverse(
204
+ botocore_session=session,
46
205
  model=model.model_id if model.model_id else model.id,
47
206
  system_prompt=system_prompt,
48
207
  **(model.inference_params if model.inference_params else {}),
@@ -51,45 +210,109 @@ def to_llm(model: Model, system_prompt: str | None) -> BaseLLM:
51
210
  elif model.provider == "openai":
52
211
  from llama_index.llms.openai import OpenAI
53
212
 
213
+ from qtype.interpreter.auth.generic import auth
214
+ from qtype.semantic.model import APIKeyAuthProvider
215
+
216
+ api_key: str | None = None
217
+ if model.auth:
218
+ with auth(model.auth, secret_manager) as provider:
219
+ if not isinstance(provider, APIKeyAuthProvider):
220
+ raise InterpreterError(
221
+ f"OpenAI provider requires APIKeyAuthProvider, "
222
+ f"got {type(provider).__name__}"
223
+ )
224
+ # api_key is guaranteed to be str after auth() resolves it
225
+ api_key = provider.api_key # type: ignore[assignment]
226
+
54
227
  return OpenAI(
55
228
  model=model.model_id if model.model_id else model.id,
56
229
  system_prompt=system_prompt,
57
230
  **(model.inference_params if model.inference_params else {}),
58
- api_key=getattr(model.auth, "api_key", None)
59
- if model.auth
60
- else None,
231
+ api_key=api_key,
61
232
  )
62
233
  elif model.provider == "anthropic":
63
234
  from llama_index.llms.anthropic import ( # type: ignore[import-untyped]
64
235
  Anthropic,
65
236
  )
66
237
 
238
+ from qtype.interpreter.auth.generic import auth
239
+ from qtype.semantic.model import APIKeyAuthProvider
240
+
241
+ api_key: str | None = None
242
+ if model.auth:
243
+ with auth(model.auth, secret_manager) as provider:
244
+ if not isinstance(provider, APIKeyAuthProvider):
245
+ raise InterpreterError(
246
+ f"Anthropic provider requires APIKeyAuthProvider, "
247
+ f"got {type(provider).__name__}"
248
+ )
249
+ # api_key is guaranteed to be str after auth() resolves it
250
+ api_key = provider.api_key # type: ignore[assignment]
251
+
67
252
  arv: BaseLLM = Anthropic(
68
253
  model=model.model_id if model.model_id else model.id,
69
254
  system_prompt=system_prompt,
70
255
  **(model.inference_params if model.inference_params else {}),
71
- api_key=getattr(model.auth, "api_key", None)
72
- if model.auth
73
- else None,
256
+ api_key=api_key,
74
257
  )
75
258
  return arv
259
+ elif model.provider == "gcp-vertex":
260
+ from llama_index.llms.vertex import Vertex
261
+
262
+ project_name = getattr(
263
+ getattr(model, "auth", None), "profile_name", None
264
+ )
265
+
266
+ vgv: BaseLLM = Vertex(
267
+ model=model.model_id if model.model_id else model.id,
268
+ project=project_name,
269
+ system_prompt=system_prompt,
270
+ **(model.inference_params if model.inference_params else {}),
271
+ )
272
+
273
+ return vgv
76
274
  else:
77
275
  raise InterpreterError(
78
276
  f"Unsupported model provider: {model.provider}."
79
277
  )
80
278
 
81
279
 
280
+ @cached_resource
281
+ def to_vector_store(
282
+ index: VectorIndex, secret_manager: SecretManagerBase
283
+ ) -> BasePydanticVectorStore:
284
+ """Convert a qtype Index to a LlamaIndex vector store."""
285
+ module_path = ".".join(index.module.split(".")[:-1])
286
+ class_name = index.module.split(".")[-1]
287
+ # Dynamically import the reader module
288
+ try:
289
+ reader_module = importlib.import_module(module_path)
290
+ reader_class = getattr(reader_module, class_name)
291
+ except (ImportError, AttributeError) as e:
292
+ raise ImportError(
293
+ f"Failed to import reader class '{class_name}' from '{module_path}': {e}"
294
+ ) from e
295
+
296
+ # Resolve any SecretReferences in args
297
+ context = f"index '{index.id}'"
298
+ resolved_args = secret_manager.resolve_secrets_in_dict(index.args, context)
299
+ index_instance = reader_class(**resolved_args)
300
+
301
+ return index_instance
302
+
303
+
82
304
  @cached_resource
83
305
  def to_embedding_model(model: Model) -> BaseEmbedding:
84
306
  """Convert a qtype Model to a LlamaIndex embedding model."""
85
307
 
86
- if model.provider in {"bedrock", "aws", "aws-bedrock"}:
308
+ if model.provider == "aws-bedrock":
87
309
  from llama_index.embeddings.bedrock import ( # type: ignore[import-untyped]
88
310
  BedrockEmbedding,
89
311
  )
90
312
 
91
313
  bedrock_embedding: BaseEmbedding = BedrockEmbedding(
92
- model_name=model.model_id if model.model_id else model.id
314
+ model_name=model.model_id if model.model_id else model.id,
315
+ max_retries=100,
93
316
  )
94
317
  return bedrock_embedding
95
318
  elif model.provider == "openai":
@@ -107,6 +330,61 @@ def to_embedding_model(model: Model) -> BaseEmbedding:
107
330
  )
108
331
 
109
332
 
333
+ @cached_resource
334
+ def to_opensearch_client(
335
+ index: DocumentIndex, secret_manager: SecretManagerBase
336
+ ) -> AsyncOpenSearch:
337
+ """
338
+ Convert a DocumentIndex to an OpenSearch/Elasticsearch client.
339
+
340
+ Args:
341
+ index: DocumentIndex configuration with endpoint, auth, etc.
342
+
343
+ Returns:
344
+ OpenSearch client instance configured with authentication
345
+
346
+ Raises:
347
+ InterpreterError: If authentication fails or configuration is invalid
348
+ """
349
+ client_kwargs: dict[str, Any] = {
350
+ "hosts": [index.endpoint],
351
+ **index.args,
352
+ }
353
+
354
+ # Handle authentication if provided
355
+ if index.auth:
356
+ if isinstance(index.auth, APIKeyAuthProvider):
357
+ # Use API key authentication
358
+ client_kwargs["api_key"] = index.auth.api_key
359
+ elif hasattr(index.auth, "type") and index.auth.type == "aws":
360
+ # Use AWS authentication with boto3 session
361
+ # Get AWS credentials from auth provider using context manager
362
+ with auth(index.auth, secret_manager) as auth_session:
363
+ # Type checker doesn't know this is a boto3.Session
364
+ # but runtime validation ensures it for AWS auth
365
+ credentials = auth_session.get_credentials() # type: ignore
366
+ if credentials is None:
367
+ raise InterpreterError(
368
+ f"Failed to obtain AWS credentials for DocumentIndex '{index.id}'"
369
+ )
370
+
371
+ # Use opensearch-py's built-in AWS auth
372
+ aws_auth = AWSV4SignerAuth(
373
+ credentials,
374
+ auth_session.region_name or "us-east-1", # type: ignore
375
+ )
376
+
377
+ client_kwargs["http_auth"] = aws_auth
378
+ client_kwargs["use_ssl"] = True
379
+ client_kwargs["verify_certs"] = True
380
+ else:
381
+ raise InterpreterError(
382
+ f"Unsupported authentication type for DocumentIndex: {type(index.auth)}"
383
+ )
384
+
385
+ return AsyncOpenSearch(**client_kwargs)
386
+
387
+
110
388
  def to_content_block(content: ChatContent) -> ContentBlock:
111
389
  if content.type == PrimitiveTypeEnum.text:
112
390
  if isinstance(content.content, str):
@@ -132,6 +410,61 @@ def to_content_block(content: ChatContent) -> ContentBlock:
132
410
  )
133
411
 
134
412
 
413
+ def variable_to_chat_message(
414
+ value: Any, variable: Any, default_role: str = "user"
415
+ ) -> ChatMessage:
416
+ """Convert any variable value to a ChatMessage based on the variable's type.
417
+
418
+ Args:
419
+ value: The value to convert (can be any primitive type or ChatMessage)
420
+ variable: The Variable definition with type information
421
+ default_role: The default message role to use (default: "user")
422
+
423
+ Returns:
424
+ ChatMessage with appropriate content blocks
425
+
426
+ Raises:
427
+ InterpreterError: If the value type cannot be converted
428
+ """
429
+ # If already a ChatMessage, return as-is
430
+ if isinstance(value, ChatMessage):
431
+ return value
432
+
433
+ # Convert based on the variable's declared type
434
+ var_type = variable.type
435
+ # Handle primitive types based on variable declaration
436
+ if isinstance(var_type, PrimitiveTypeEnum):
437
+ # Numeric/boolean types get converted to text
438
+ if var_type in (
439
+ PrimitiveTypeEnum.int,
440
+ PrimitiveTypeEnum.float,
441
+ PrimitiveTypeEnum.boolean,
442
+ ):
443
+ content = ChatContent(
444
+ type=PrimitiveTypeEnum.text, content=str(value)
445
+ )
446
+ # All other primitive types pass through as-is
447
+ else:
448
+ content = ChatContent(type=var_type, content=value)
449
+ elif isinstance(var_type, str) and (
450
+ var_type.startswith("list[") or var_type.startswith("dict[")
451
+ ):
452
+ # Handle list and dict types - convert to JSON string
453
+ import json
454
+
455
+ content = ChatContent(
456
+ type=PrimitiveTypeEnum.text, content=json.dumps(value)
457
+ )
458
+ else:
459
+ # Unsupported type - raise an error
460
+ raise InterpreterError(
461
+ f"Cannot convert variable '{variable.id}' of unsupported type "
462
+ f"'{var_type}' to ChatMessage"
463
+ )
464
+
465
+ return ChatMessage(role=default_role, blocks=[content]) # type: ignore
466
+
467
+
135
468
  def to_chat_message(message: ChatMessage) -> LlamaChatMessage:
136
469
  """Convert a ChatMessage to a LlamaChatMessage."""
137
470
  blocks = [to_content_block(content) for content in message.blocks]
@@ -140,7 +473,7 @@ def to_chat_message(message: ChatMessage) -> LlamaChatMessage:
140
473
 
141
474
  def from_chat_message(message: LlamaChatMessage) -> ChatMessage:
142
475
  """Convert a LlamaChatMessage to a ChatMessage."""
143
- blocks = []
476
+ blocks: list[ChatContent] = []
144
477
  for block in message.blocks:
145
478
  if isinstance(block, TextBlock):
146
479
  blocks.append(
@@ -158,9 +491,125 @@ def from_chat_message(message: LlamaChatMessage) -> ChatMessage:
158
491
  blocks.append(
159
492
  ChatContent(type=PrimitiveTypeEnum.file, content=block.data)
160
493
  )
494
+ elif isinstance(block, ThinkingBlock):
495
+ continue
161
496
  else:
162
497
  raise InterpreterError(
163
498
  f"Unsupported content block type: {type(block)}"
164
499
  )
165
500
 
166
- return ChatMessage(role=message.role, blocks=blocks) # type: ignore
501
+ return ChatMessage(role=message.role, blocks=blocks)
502
+
503
+
504
+ def to_text_splitter(splitter: DocumentSplitter) -> Any:
505
+ """Convert a DocumentSplitter to a LlamaIndex text splitter.
506
+
507
+ Args:
508
+ splitter: The DocumentSplitter configuration.
509
+
510
+ Returns:
511
+ An instance of the appropriate LlamaIndex text splitter class.
512
+
513
+ Raises:
514
+ InterpreterError: If the splitter class cannot be found or instantiated.
515
+ """
516
+
517
+ module_path = "llama_index.core.node_parser"
518
+ class_name = splitter.splitter_name
519
+ try:
520
+ reader_module = importlib.import_module(module_path)
521
+ splitter_class = getattr(reader_module, class_name)
522
+ except (ImportError, AttributeError) as e:
523
+ raise ImportError(
524
+ f"Failed to import reader class '{class_name}' from '{module_path}': {e}"
525
+ ) from e
526
+ from llama_index.core.schema import BaseNode
527
+
528
+ # TODO: let the user specify a custom ID namespace
529
+ namespace = uuid.UUID("12345678-1234-5678-1234-567812345678")
530
+
531
+ def id_func(i: int, doc: BaseNode) -> str:
532
+ u = uuid.uuid5(namespace, f"{doc.node_id}_{i}")
533
+ return str(u)
534
+
535
+ # Prepare arguments for the splitter
536
+ splitter_args = {
537
+ "chunk_size": splitter.chunk_size,
538
+ "chunk_overlap": splitter.chunk_overlap,
539
+ "id_func": id_func,
540
+ **splitter.args,
541
+ }
542
+
543
+ # Instantiate and return the splitter
544
+ try:
545
+ return splitter_class(**splitter_args)
546
+ except Exception as e:
547
+ raise InterpreterError(
548
+ f"Failed to instantiate {splitter.splitter_name}: {e}"
549
+ ) from e
550
+
551
+
552
+ def to_llama_vector_store_and_retriever(
553
+ index: VectorIndex, secret_manager: SecretManagerBase
554
+ ) -> tuple[BasePydanticVectorStore, Any]:
555
+ """Create a LlamaIndex vector store and retriever from a VectorIndex.
556
+
557
+ Args:
558
+ index: VectorIndex configuration
559
+
560
+ Returns:
561
+ Tuple of (vector_store, retriever)
562
+ """
563
+ from llama_index.core import VectorStoreIndex
564
+
565
+ # Get the vector store using existing function
566
+ vector_store = to_vector_store(index, secret_manager)
567
+
568
+ # Get the embedding model
569
+ embedding_model = to_embedding_model(index.embedding_model)
570
+
571
+ # Create a VectorStoreIndex with the vector store and embedding model
572
+ vector_index = VectorStoreIndex.from_vector_store(
573
+ vector_store=vector_store,
574
+ embed_model=embedding_model,
575
+ )
576
+
577
+ # Create retriever with optional top_k configuration
578
+ retriever = vector_index.as_retriever()
579
+
580
+ return vector_store, retriever
581
+
582
+
583
+ def from_node_with_score(node_with_score) -> RAGSearchResult:
584
+ """Convert a LlamaIndex NodeWithScore to a RAGSearchResult.
585
+
586
+ Args:
587
+ node_with_score: LlamaIndex NodeWithScore object
588
+
589
+ Returns:
590
+ RAGSearchResult with chunk and score
591
+ """
592
+ from qtype.dsl.domain_types import RAGChunk, RAGSearchResult
593
+
594
+ node = node_with_score.node
595
+
596
+ # Extract vector if available
597
+ vector = None
598
+ if hasattr(node, "embedding") and node.embedding is not None:
599
+ vector = node.embedding
600
+
601
+ # Create RAGChunk from node
602
+ chunk = RAGChunk(
603
+ content=node.text or "",
604
+ chunk_id=node.node_id,
605
+ document_id=node.metadata.get("document_id", node.node_id),
606
+ vector=vector,
607
+ metadata=node.metadata or {},
608
+ )
609
+
610
+ # Wrap in RAGSearchResult with score
611
+ return RAGSearchResult(
612
+ content=chunk,
613
+ doc_id=chunk.document_id,
614
+ score=node_with_score.score or 0.0,
615
+ )
@@ -0,0 +1,79 @@
1
+ """Converters between DataFrames and FlowMessages."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import AsyncIterator
6
+ from typing import Any, cast
7
+
8
+ import pandas as pd
9
+ from pydantic import BaseModel
10
+
11
+ from qtype.interpreter.types import FlowMessage, Session
12
+ from qtype.semantic.model import Flow
13
+
14
+
15
+ async def dataframe_to_flow_messages(
16
+ df: pd.DataFrame, session: Session
17
+ ) -> AsyncIterator[FlowMessage]:
18
+ """
19
+ Convert a DataFrame to an async generator of FlowMessages.
20
+
21
+ Each row in the DataFrame becomes a FlowMessage with the same session.
22
+
23
+ Args:
24
+ df: DataFrame where each row represents one set of inputs
25
+ session: Session object to use for all messages
26
+
27
+ Yields:
28
+ FlowMessages, one per DataFrame row
29
+ """
30
+ # Use to_dict with orient='records' - much faster than iterrows
31
+ # This returns a list of dicts directly without Series overhead
32
+ records = cast(list[dict[str, Any]], df.to_dict(orient="records"))
33
+
34
+ for record in records:
35
+ yield FlowMessage(session=session, variables=record)
36
+
37
+
38
+ def flow_messages_to_dataframe(
39
+ messages: list[FlowMessage], flow: Flow
40
+ ) -> pd.DataFrame:
41
+ """
42
+ Convert a list of FlowMessages to a DataFrame.
43
+
44
+ Extracts output variables from each message based on the flow's outputs.
45
+
46
+ Args:
47
+ messages: List of FlowMessages with results
48
+ flow: Flow definition containing output variable specifications
49
+
50
+ Returns:
51
+ DataFrame with one row per message, columns for each output variable
52
+ """
53
+ results = []
54
+ for idx, message in enumerate(messages):
55
+ row_data: dict[str, Any] = {"row": idx}
56
+
57
+ # Extract output variables
58
+ for var in flow.outputs:
59
+ if var.id in message.variables:
60
+ value = message.variables[var.id]
61
+ if isinstance(value, BaseModel):
62
+ value = value.model_dump()
63
+ row_data[var.id] = value
64
+ else:
65
+ row_data[var.id] = None
66
+
67
+ # Include error if present
68
+ if message.is_failed():
69
+ row_data["error"] = (
70
+ message.error.error_message
71
+ if message.error
72
+ else "Unknown error"
73
+ )
74
+ else:
75
+ row_data["error"] = None
76
+
77
+ results.append(row_data)
78
+
79
+ return pd.DataFrame(results)