qtype 0.0.15__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. qtype/application/commons/tools.py +1 -1
  2. qtype/application/converters/tools_from_api.py +5 -5
  3. qtype/application/converters/tools_from_module.py +2 -2
  4. qtype/application/converters/types.py +14 -43
  5. qtype/application/documentation.py +1 -1
  6. qtype/application/facade.py +92 -71
  7. qtype/base/types.py +227 -7
  8. qtype/commands/convert.py +20 -8
  9. qtype/commands/generate.py +19 -27
  10. qtype/commands/run.py +54 -36
  11. qtype/commands/serve.py +74 -54
  12. qtype/commands/validate.py +34 -8
  13. qtype/commands/visualize.py +46 -22
  14. qtype/dsl/__init__.py +6 -5
  15. qtype/dsl/custom_types.py +1 -1
  16. qtype/dsl/domain_types.py +65 -5
  17. qtype/dsl/linker.py +384 -0
  18. qtype/dsl/loader.py +315 -0
  19. qtype/dsl/model.py +612 -363
  20. qtype/dsl/parser.py +200 -0
  21. qtype/dsl/types.py +50 -0
  22. qtype/interpreter/api.py +58 -135
  23. qtype/interpreter/auth/aws.py +19 -9
  24. qtype/interpreter/auth/generic.py +93 -16
  25. qtype/interpreter/base/base_step_executor.py +429 -0
  26. qtype/interpreter/base/batch_step_executor.py +171 -0
  27. qtype/interpreter/base/exceptions.py +50 -0
  28. qtype/interpreter/base/executor_context.py +74 -0
  29. qtype/interpreter/base/factory.py +117 -0
  30. qtype/interpreter/base/progress_tracker.py +75 -0
  31. qtype/interpreter/base/secrets.py +339 -0
  32. qtype/interpreter/base/step_cache.py +73 -0
  33. qtype/interpreter/base/stream_emitter.py +469 -0
  34. qtype/interpreter/conversions.py +455 -21
  35. qtype/interpreter/converters.py +73 -0
  36. qtype/interpreter/endpoints.py +355 -0
  37. qtype/interpreter/executors/agent_executor.py +242 -0
  38. qtype/interpreter/executors/aggregate_executor.py +93 -0
  39. qtype/interpreter/executors/decoder_executor.py +163 -0
  40. qtype/interpreter/executors/doc_to_text_executor.py +112 -0
  41. qtype/interpreter/executors/document_embedder_executor.py +75 -0
  42. qtype/interpreter/executors/document_search_executor.py +122 -0
  43. qtype/interpreter/executors/document_source_executor.py +118 -0
  44. qtype/interpreter/executors/document_splitter_executor.py +105 -0
  45. qtype/interpreter/executors/echo_executor.py +63 -0
  46. qtype/interpreter/executors/field_extractor_executor.py +160 -0
  47. qtype/interpreter/executors/file_source_executor.py +101 -0
  48. qtype/interpreter/executors/file_writer_executor.py +110 -0
  49. qtype/interpreter/executors/index_upsert_executor.py +228 -0
  50. qtype/interpreter/executors/invoke_embedding_executor.py +92 -0
  51. qtype/interpreter/executors/invoke_flow_executor.py +51 -0
  52. qtype/interpreter/executors/invoke_tool_executor.py +353 -0
  53. qtype/interpreter/executors/llm_inference_executor.py +272 -0
  54. qtype/interpreter/executors/prompt_template_executor.py +78 -0
  55. qtype/interpreter/executors/sql_source_executor.py +106 -0
  56. qtype/interpreter/executors/vector_search_executor.py +91 -0
  57. qtype/interpreter/flow.py +147 -22
  58. qtype/interpreter/metadata_api.py +115 -0
  59. qtype/interpreter/resource_cache.py +5 -4
  60. qtype/interpreter/stream/chat/__init__.py +15 -0
  61. qtype/interpreter/stream/chat/converter.py +391 -0
  62. qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
  63. qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
  64. qtype/interpreter/stream/chat/vercel.py +609 -0
  65. qtype/interpreter/stream/utils/__init__.py +15 -0
  66. qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
  67. qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
  68. qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
  69. qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
  70. qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
  71. qtype/interpreter/telemetry.py +135 -8
  72. qtype/interpreter/tools/__init__.py +5 -0
  73. qtype/interpreter/tools/function_tool_helper.py +265 -0
  74. qtype/interpreter/types.py +328 -0
  75. qtype/interpreter/typing.py +83 -89
  76. qtype/interpreter/ui/404/index.html +1 -1
  77. qtype/interpreter/ui/404.html +1 -1
  78. qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
  79. qtype/interpreter/ui/_next/static/chunks/{393-8fd474427f8e19ce.js → 434-b2112d19f25c44ff.js} +3 -3
  80. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
  81. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
  82. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
  83. qtype/interpreter/ui/icon.png +0 -0
  84. qtype/interpreter/ui/index.html +1 -1
  85. qtype/interpreter/ui/index.txt +4 -4
  86. qtype/semantic/checker.py +583 -0
  87. qtype/semantic/generate.py +262 -83
  88. qtype/semantic/loader.py +95 -0
  89. qtype/semantic/model.py +436 -159
  90. qtype/semantic/resolver.py +59 -17
  91. qtype/semantic/visualize.py +28 -31
  92. {qtype-0.0.15.dist-info → qtype-0.1.0.dist-info}/METADATA +16 -3
  93. qtype-0.1.0.dist-info/RECORD +134 -0
  94. qtype/dsl/base_types.py +0 -38
  95. qtype/dsl/validator.py +0 -465
  96. qtype/interpreter/batch/__init__.py +0 -0
  97. qtype/interpreter/batch/file_sink_source.py +0 -162
  98. qtype/interpreter/batch/flow.py +0 -95
  99. qtype/interpreter/batch/sql_source.py +0 -92
  100. qtype/interpreter/batch/step.py +0 -74
  101. qtype/interpreter/batch/types.py +0 -41
  102. qtype/interpreter/batch/utils.py +0 -178
  103. qtype/interpreter/chat/chat_api.py +0 -237
  104. qtype/interpreter/chat/vercel.py +0 -314
  105. qtype/interpreter/exceptions.py +0 -10
  106. qtype/interpreter/step.py +0 -67
  107. qtype/interpreter/steps/__init__.py +0 -0
  108. qtype/interpreter/steps/agent.py +0 -114
  109. qtype/interpreter/steps/condition.py +0 -36
  110. qtype/interpreter/steps/decoder.py +0 -88
  111. qtype/interpreter/steps/llm_inference.py +0 -171
  112. qtype/interpreter/steps/prompt_template.py +0 -54
  113. qtype/interpreter/steps/search.py +0 -24
  114. qtype/interpreter/steps/tool.py +0 -219
  115. qtype/interpreter/streaming_helpers.py +0 -123
  116. qtype/interpreter/ui/_next/static/chunks/app/page-7e26b6156cfb55d3.js +0 -1
  117. qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
  118. qtype/interpreter/ui/_next/static/css/b40532b0db09cce3.css +0 -3
  119. qtype/interpreter/ui/favicon.ico +0 -0
  120. qtype/loader.py +0 -390
  121. qtype-0.0.15.dist-info/RECORD +0 -106
  122. /qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
  123. {qtype-0.0.15.dist-info → qtype-0.1.0.dist-info}/WHEEL +0 -0
  124. {qtype-0.0.15.dist-info → qtype-0.1.0.dist-info}/entry_points.txt +0 -0
  125. {qtype-0.0.15.dist-info → qtype-0.1.0.dist-info}/licenses/LICENSE +0 -0
  126. {qtype-0.0.15.dist-info → qtype-0.1.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import importlib
4
+ from typing import Any
5
+
3
6
  from llama_index.core.base.embeddings.base import BaseEmbedding
4
7
  from llama_index.core.base.llms.base import BaseLLM
5
8
  from llama_index.core.base.llms.types import AudioBlock
@@ -9,18 +12,147 @@ from llama_index.core.base.llms.types import (
9
12
  DocumentBlock,
10
13
  ImageBlock,
11
14
  TextBlock,
15
+ ThinkingBlock,
12
16
  )
13
17
  from llama_index.core.memory import Memory as LlamaMemory
18
+ from llama_index.core.schema import Document as LlamaDocument
19
+ from llama_index.core.vector_stores.types import BasePydanticVectorStore
20
+ from opensearchpy import AWSV4SignerAuth, OpenSearch
14
21
 
15
- from qtype.dsl.base_types import PrimitiveTypeEnum
16
- from qtype.dsl.domain_types import ChatContent, ChatMessage
22
+ from qtype.base.types import PrimitiveTypeEnum
23
+ from qtype.dsl.domain_types import ChatContent, ChatMessage, RAGDocument
17
24
  from qtype.dsl.model import Memory
18
- from qtype.interpreter.exceptions import InterpreterError
19
- from qtype.semantic.model import Model
25
+ from qtype.interpreter.auth.aws import aws
26
+ from qtype.interpreter.auth.generic import auth
27
+ from qtype.interpreter.base.secrets import SecretManagerBase
28
+ from qtype.interpreter.types import InterpreterError
29
+ from qtype.semantic.model import (
30
+ APIKeyAuthProvider,
31
+ DocumentIndex,
32
+ DocumentSplitter,
33
+ Model,
34
+ VectorIndex,
35
+ )
20
36
 
21
37
  from .resource_cache import cached_resource
22
38
 
23
39
 
40
+ def to_llama_document(doc: RAGDocument) -> LlamaDocument:
41
+ """Convert a RAGDocument to a LlamaDocument."""
42
+ from llama_index.core.schema import MediaResource
43
+
44
+ # Prepare metadata, adding file_name and uri if available
45
+ metadata = doc.metadata.copy() if doc.metadata else {}
46
+ if doc.file_name:
47
+ metadata["file_name"] = doc.file_name
48
+ if doc.uri:
49
+ metadata["url"] = (
50
+ doc.uri
51
+ ) # url is more commonly used in LlamaIndex metadata
52
+
53
+ # Default text content
54
+ text = ""
55
+ if isinstance(doc.content, str):
56
+ text = doc.content
57
+
58
+ # Handle different content types
59
+ if doc.type == PrimitiveTypeEnum.text:
60
+ # Text content - store as text field
61
+ return LlamaDocument(text=text, doc_id=doc.file_id, metadata=metadata)
62
+ elif doc.type == PrimitiveTypeEnum.image and isinstance(
63
+ doc.content, bytes
64
+ ):
65
+ # Image content - store in image_resource
66
+ return LlamaDocument(
67
+ text=text, # Keep text empty or use as description
68
+ doc_id=doc.file_id,
69
+ metadata=metadata,
70
+ image_resource=MediaResource(data=doc.content),
71
+ )
72
+ elif doc.type == PrimitiveTypeEnum.audio and isinstance(
73
+ doc.content, bytes
74
+ ):
75
+ # Audio content - store in audio_resource
76
+ return LlamaDocument(
77
+ text=text,
78
+ doc_id=doc.file_id,
79
+ metadata=metadata,
80
+ audio_resource=MediaResource(data=doc.content),
81
+ )
82
+ elif doc.type == PrimitiveTypeEnum.video and isinstance(
83
+ doc.content, bytes
84
+ ):
85
+ # Video content - store in video_resource
86
+ return LlamaDocument(
87
+ text=text,
88
+ doc_id=doc.file_id,
89
+ metadata=metadata,
90
+ video_resource=MediaResource(data=doc.content),
91
+ )
92
+ else:
93
+ # Fallback for other types - store as text
94
+ return LlamaDocument(
95
+ text=str(doc.content) if doc.content else "",
96
+ doc_id=doc.file_id,
97
+ metadata=metadata,
98
+ )
99
+
100
+
101
+ def from_llama_document(doc: LlamaDocument) -> RAGDocument:
102
+ """Convert a LlamaDocument to a RAGDocument."""
103
+ # Extract file_id from doc_id or id_
104
+ file_id = doc.doc_id
105
+
106
+ # Extract file_name from metadata or use file_id as fallback
107
+ file_name = (
108
+ doc.metadata.get("file_name", file_id) if doc.metadata else file_id
109
+ )
110
+
111
+ # Extract URI from metadata if available
112
+ uri = (
113
+ doc.metadata.get("url") or doc.metadata.get("uri")
114
+ if doc.metadata
115
+ else None
116
+ )
117
+
118
+ # Determine content type and extract content based on resource fields
119
+ content_type = PrimitiveTypeEnum.text
120
+ content: str | bytes = doc.text # default to text
121
+
122
+ # Check for media resources in priority order
123
+ if hasattr(doc, "image_resource") and doc.image_resource is not None:
124
+ content_type = PrimitiveTypeEnum.image
125
+ # MediaResource has a 'data' field containing the bytes
126
+ content = (
127
+ doc.image_resource.data
128
+ if hasattr(doc.image_resource, "data")
129
+ else doc.text
130
+ ) # type: ignore
131
+ elif hasattr(doc, "audio_resource") and doc.audio_resource is not None:
132
+ content_type = PrimitiveTypeEnum.audio
133
+ content = (
134
+ doc.audio_resource.data
135
+ if hasattr(doc.audio_resource, "data")
136
+ else doc.text
137
+ ) # type: ignore
138
+ elif hasattr(doc, "video_resource") and doc.video_resource is not None:
139
+ content_type = PrimitiveTypeEnum.video
140
+ content = (
141
+ doc.video_resource.data
142
+ if hasattr(doc.video_resource, "data")
143
+ else doc.text
144
+ ) # type: ignore
145
+
146
+ return RAGDocument(
147
+ content=content,
148
+ file_id=file_id,
149
+ file_name=file_name,
150
+ uri=uri,
151
+ metadata=doc.metadata.copy() if doc.metadata else {},
152
+ type=content_type,
153
+ )
154
+
155
+
24
156
  @cached_resource
25
157
  def to_memory(session_id: str | None, memory: Memory) -> LlamaMemory:
26
158
  return LlamaMemory.from_defaults(
@@ -32,17 +164,38 @@ def to_memory(session_id: str | None, memory: Memory) -> LlamaMemory:
32
164
 
33
165
 
34
166
  @cached_resource
35
- def to_llm(model: Model, system_prompt: str | None) -> BaseLLM:
36
- """Convert a qtype Model to a LlamaIndex Model."""
167
+ def to_llm(
168
+ model: Model,
169
+ system_prompt: str | None,
170
+ secret_manager: SecretManagerBase,
171
+ ) -> BaseLLM:
172
+ """
173
+ Convert a qtype Model to a LlamaIndex Model.
37
174
 
38
- if model.provider in "aws-bedrock":
39
- # BedrockConverse requires a model_id and system_prompt
40
- # Inference params can be passed as additional kwargs
41
- from llama_index.llms.bedrock_converse import ( # type: ignore[import]
42
- BedrockConverse,
43
- )
175
+ Args:
176
+ model: The semantic model configuration
177
+ system_prompt: Optional system prompt for the model
178
+ secret_manager: Optional secret manager for resolving SecretReferences
179
+
180
+ Returns:
181
+ A LlamaIndex LLM instance
182
+ """
183
+
184
+ if model.provider == "aws-bedrock":
185
+ from llama_index.llms.bedrock_converse import BedrockConverse
186
+
187
+ from qtype.semantic.model import AWSAuthProvider
188
+
189
+ if model.auth:
190
+ # Type hint for mypy - we know it's AWSAuthProvider for aws-bedrock
191
+ assert isinstance(model.auth, AWSAuthProvider)
192
+ with aws(model.auth, secret_manager) as session:
193
+ session = session._session
194
+ else:
195
+ session = None
44
196
 
45
197
  brv: BaseLLM = BedrockConverse(
198
+ botocore_session=session,
46
199
  model=model.model_id if model.model_id else model.id,
47
200
  system_prompt=system_prompt,
48
201
  **(model.inference_params if model.inference_params else {}),
@@ -51,39 +204,102 @@ def to_llm(model: Model, system_prompt: str | None) -> BaseLLM:
51
204
  elif model.provider == "openai":
52
205
  from llama_index.llms.openai import OpenAI
53
206
 
207
+ from qtype.interpreter.auth.generic import auth
208
+ from qtype.semantic.model import APIKeyAuthProvider
209
+
210
+ api_key: str | None = None
211
+ if model.auth:
212
+ with auth(model.auth, secret_manager) as provider:
213
+ if not isinstance(provider, APIKeyAuthProvider):
214
+ raise InterpreterError(
215
+ f"OpenAI provider requires APIKeyAuthProvider, "
216
+ f"got {type(provider).__name__}"
217
+ )
218
+ # api_key is guaranteed to be str after auth() resolves it
219
+ api_key = provider.api_key # type: ignore[assignment]
220
+
54
221
  return OpenAI(
55
222
  model=model.model_id if model.model_id else model.id,
56
223
  system_prompt=system_prompt,
57
224
  **(model.inference_params if model.inference_params else {}),
58
- api_key=getattr(model.auth, "api_key", None)
59
- if model.auth
60
- else None,
225
+ api_key=api_key,
61
226
  )
62
227
  elif model.provider == "anthropic":
63
228
  from llama_index.llms.anthropic import ( # type: ignore[import-untyped]
64
229
  Anthropic,
65
230
  )
66
231
 
232
+ from qtype.interpreter.auth.generic import auth
233
+ from qtype.semantic.model import APIKeyAuthProvider
234
+
235
+ api_key: str | None = None
236
+ if model.auth:
237
+ with auth(model.auth, secret_manager) as provider:
238
+ if not isinstance(provider, APIKeyAuthProvider):
239
+ raise InterpreterError(
240
+ f"Anthropic provider requires APIKeyAuthProvider, "
241
+ f"got {type(provider).__name__}"
242
+ )
243
+ # api_key is guaranteed to be str after auth() resolves it
244
+ api_key = provider.api_key # type: ignore[assignment]
245
+
67
246
  arv: BaseLLM = Anthropic(
68
247
  model=model.model_id if model.model_id else model.id,
69
248
  system_prompt=system_prompt,
70
249
  **(model.inference_params if model.inference_params else {}),
71
- api_key=getattr(model.auth, "api_key", None)
72
- if model.auth
73
- else None,
250
+ api_key=api_key,
74
251
  )
75
252
  return arv
253
+ elif model.provider == "gcp-vertex":
254
+ from llama_index.llms.vertex import Vertex
255
+
256
+ project_name = getattr(
257
+ getattr(model, "auth", None), "profile_name", None
258
+ )
259
+
260
+ vgv: BaseLLM = Vertex(
261
+ model=model.model_id if model.model_id else model.id,
262
+ project=project_name,
263
+ system_prompt=system_prompt,
264
+ **(model.inference_params if model.inference_params else {}),
265
+ )
266
+
267
+ return vgv
76
268
  else:
77
269
  raise InterpreterError(
78
270
  f"Unsupported model provider: {model.provider}."
79
271
  )
80
272
 
81
273
 
274
+ @cached_resource
275
+ def to_vector_store(
276
+ index: VectorIndex, secret_manager: SecretManagerBase
277
+ ) -> BasePydanticVectorStore:
278
+ """Convert a qtype Index to a LlamaIndex vector store."""
279
+ module_path = ".".join(index.module.split(".")[:-1])
280
+ class_name = index.module.split(".")[-1]
281
+ # Dynamically import the reader module
282
+ try:
283
+ reader_module = importlib.import_module(module_path)
284
+ reader_class = getattr(reader_module, class_name)
285
+ except (ImportError, AttributeError) as e:
286
+ raise ImportError(
287
+ f"Failed to import reader class '{class_name}' from '{module_path}': {e}"
288
+ ) from e
289
+
290
+ # Resolve any SecretReferences in args
291
+ context = f"index '{index.id}'"
292
+ resolved_args = secret_manager.resolve_secrets_in_dict(index.args, context)
293
+ index_instance = reader_class(**resolved_args)
294
+
295
+ return index_instance
296
+
297
+
82
298
  @cached_resource
83
299
  def to_embedding_model(model: Model) -> BaseEmbedding:
84
300
  """Convert a qtype Model to a LlamaIndex embedding model."""
85
301
 
86
- if model.provider in {"bedrock", "aws", "aws-bedrock"}:
302
+ if model.provider == "aws-bedrock":
87
303
  from llama_index.embeddings.bedrock import ( # type: ignore[import-untyped]
88
304
  BedrockEmbedding,
89
305
  )
@@ -107,6 +323,61 @@ def to_embedding_model(model: Model) -> BaseEmbedding:
107
323
  )
108
324
 
109
325
 
326
+ @cached_resource
327
+ def to_opensearch_client(
328
+ index: DocumentIndex, secret_manager: SecretManagerBase
329
+ ) -> OpenSearch:
330
+ """
331
+ Convert a DocumentIndex to an OpenSearch/Elasticsearch client.
332
+
333
+ Args:
334
+ index: DocumentIndex configuration with endpoint, auth, etc.
335
+
336
+ Returns:
337
+ OpenSearch client instance configured with authentication
338
+
339
+ Raises:
340
+ InterpreterError: If authentication fails or configuration is invalid
341
+ """
342
+ client_kwargs: dict[str, Any] = {
343
+ "hosts": [index.endpoint],
344
+ **index.args,
345
+ }
346
+
347
+ # Handle authentication if provided
348
+ if index.auth:
349
+ if isinstance(index.auth, APIKeyAuthProvider):
350
+ # Use API key authentication
351
+ client_kwargs["api_key"] = index.auth.api_key
352
+ elif hasattr(index.auth, "type") and index.auth.type == "aws":
353
+ # Use AWS authentication with boto3 session
354
+ # Get AWS credentials from auth provider using context manager
355
+ with auth(index.auth, secret_manager) as auth_session:
356
+ # Type checker doesn't know this is a boto3.Session
357
+ # but runtime validation ensures it for AWS auth
358
+ credentials = auth_session.get_credentials() # type: ignore
359
+ if credentials is None:
360
+ raise InterpreterError(
361
+ f"Failed to obtain AWS credentials for DocumentIndex '{index.id}'"
362
+ )
363
+
364
+ # Use opensearch-py's built-in AWS auth
365
+ aws_auth = AWSV4SignerAuth(
366
+ credentials,
367
+ auth_session.region_name or "us-east-1", # type: ignore
368
+ )
369
+
370
+ client_kwargs["http_auth"] = aws_auth
371
+ client_kwargs["use_ssl"] = True
372
+ client_kwargs["verify_certs"] = True
373
+ else:
374
+ raise InterpreterError(
375
+ f"Unsupported authentication type for DocumentIndex: {type(index.auth)}"
376
+ )
377
+
378
+ return OpenSearch(**client_kwargs)
379
+
380
+
110
381
  def to_content_block(content: ChatContent) -> ContentBlock:
111
382
  if content.type == PrimitiveTypeEnum.text:
112
383
  if isinstance(content.content, str):
@@ -132,6 +403,61 @@ def to_content_block(content: ChatContent) -> ContentBlock:
132
403
  )
133
404
 
134
405
 
406
+ def variable_to_chat_message(
407
+ value: Any, variable: Any, default_role: str = "user"
408
+ ) -> ChatMessage:
409
+ """Convert any variable value to a ChatMessage based on the variable's type.
410
+
411
+ Args:
412
+ value: The value to convert (can be any primitive type or ChatMessage)
413
+ variable: The Variable definition with type information
414
+ default_role: The default message role to use (default: "user")
415
+
416
+ Returns:
417
+ ChatMessage with appropriate content blocks
418
+
419
+ Raises:
420
+ InterpreterError: If the value type cannot be converted
421
+ """
422
+ # If already a ChatMessage, return as-is
423
+ if isinstance(value, ChatMessage):
424
+ return value
425
+
426
+ # Convert based on the variable's declared type
427
+ var_type = variable.type
428
+ # Handle primitive types based on variable declaration
429
+ if isinstance(var_type, PrimitiveTypeEnum):
430
+ # Numeric/boolean types get converted to text
431
+ if var_type in (
432
+ PrimitiveTypeEnum.int,
433
+ PrimitiveTypeEnum.float,
434
+ PrimitiveTypeEnum.boolean,
435
+ ):
436
+ content = ChatContent(
437
+ type=PrimitiveTypeEnum.text, content=str(value)
438
+ )
439
+ # All other primitive types pass through as-is
440
+ else:
441
+ content = ChatContent(type=var_type, content=value)
442
+ elif isinstance(var_type, str) and (
443
+ var_type.startswith("list[") or var_type.startswith("dict[")
444
+ ):
445
+ # Handle list and dict types - convert to JSON string
446
+ import json
447
+
448
+ content = ChatContent(
449
+ type=PrimitiveTypeEnum.text, content=json.dumps(value)
450
+ )
451
+ else:
452
+ # Unsupported type - raise an error
453
+ raise InterpreterError(
454
+ f"Cannot convert variable '{variable.id}' of unsupported type "
455
+ f"'{var_type}' to ChatMessage"
456
+ )
457
+
458
+ return ChatMessage(role=default_role, blocks=[content]) # type: ignore
459
+
460
+
135
461
  def to_chat_message(message: ChatMessage) -> LlamaChatMessage:
136
462
  """Convert a ChatMessage to a LlamaChatMessage."""
137
463
  blocks = [to_content_block(content) for content in message.blocks]
@@ -140,7 +466,7 @@ def to_chat_message(message: ChatMessage) -> LlamaChatMessage:
140
466
 
141
467
  def from_chat_message(message: LlamaChatMessage) -> ChatMessage:
142
468
  """Convert a LlamaChatMessage to a ChatMessage."""
143
- blocks = []
469
+ blocks: list[ChatContent] = []
144
470
  for block in message.blocks:
145
471
  if isinstance(block, TextBlock):
146
472
  blocks.append(
@@ -158,9 +484,117 @@ def from_chat_message(message: LlamaChatMessage) -> ChatMessage:
158
484
  blocks.append(
159
485
  ChatContent(type=PrimitiveTypeEnum.file, content=block.data)
160
486
  )
487
+ elif isinstance(block, ThinkingBlock):
488
+ continue
161
489
  else:
162
490
  raise InterpreterError(
163
491
  f"Unsupported content block type: {type(block)}"
164
492
  )
165
493
 
166
- return ChatMessage(role=message.role, blocks=blocks) # type: ignore
494
+ return ChatMessage(role=message.role, blocks=blocks)
495
+
496
+
497
+ def to_text_splitter(splitter: DocumentSplitter) -> Any:
498
+ """Convert a DocumentSplitter to a LlamaIndex text splitter.
499
+
500
+ Args:
501
+ splitter: The DocumentSplitter configuration.
502
+
503
+ Returns:
504
+ An instance of the appropriate LlamaIndex text splitter class.
505
+
506
+ Raises:
507
+ InterpreterError: If the splitter class cannot be found or instantiated.
508
+ """
509
+ from llama_index.core.node_parser import SentenceSplitter
510
+
511
+ # Map common splitter names to their classes
512
+ splitter_classes = {
513
+ "SentenceSplitter": SentenceSplitter,
514
+ }
515
+
516
+ # Get the splitter class
517
+ splitter_class = splitter_classes.get(splitter.splitter_name)
518
+
519
+ if splitter_class is None:
520
+ raise InterpreterError(
521
+ f"Unsupported text splitter: {splitter.splitter_name}. "
522
+ f"Supported splitters: {', '.join(splitter_classes.keys())}"
523
+ )
524
+
525
+ # Prepare arguments for the splitter
526
+ splitter_args = {
527
+ "chunk_size": splitter.chunk_size,
528
+ "chunk_overlap": splitter.chunk_overlap,
529
+ **splitter.args,
530
+ }
531
+
532
+ # Instantiate and return the splitter
533
+ try:
534
+ return splitter_class(**splitter_args)
535
+ except Exception as e:
536
+ raise InterpreterError(
537
+ f"Failed to instantiate {splitter.splitter_name}: {e}"
538
+ ) from e
539
+
540
+
541
+ def to_llama_vector_store_and_retriever(
542
+ index: VectorIndex, secret_manager: SecretManagerBase
543
+ ) -> tuple[BasePydanticVectorStore, Any]:
544
+ """Create a LlamaIndex vector store and retriever from a VectorIndex.
545
+
546
+ Args:
547
+ index: VectorIndex configuration
548
+
549
+ Returns:
550
+ Tuple of (vector_store, retriever)
551
+ """
552
+ from llama_index.core import VectorStoreIndex
553
+
554
+ # Get the vector store using existing function
555
+ vector_store = to_vector_store(index, secret_manager)
556
+
557
+ # Get the embedding model
558
+ embedding_model = to_embedding_model(index.embedding_model)
559
+
560
+ # Create a VectorStoreIndex with the vector store and embedding model
561
+ vector_index = VectorStoreIndex.from_vector_store(
562
+ vector_store=vector_store,
563
+ embed_model=embedding_model,
564
+ )
565
+
566
+ # Create retriever with optional top_k configuration
567
+ retriever = vector_index.as_retriever()
568
+
569
+ return vector_store, retriever
570
+
571
+
572
+ def from_node_with_score(node_with_score) -> Any:
573
+ """Convert a LlamaIndex NodeWithScore to a RAGSearchResult.
574
+
575
+ Args:
576
+ node_with_score: LlamaIndex NodeWithScore object
577
+
578
+ Returns:
579
+ RAGSearchResult with chunk and score
580
+ """
581
+ from qtype.dsl.domain_types import RAGChunk, RAGSearchResult
582
+
583
+ node = node_with_score.node
584
+
585
+ # Extract vector if available
586
+ vector = None
587
+ if hasattr(node, "embedding") and node.embedding is not None:
588
+ vector = node.embedding
589
+
590
+ # Create RAGChunk from node
591
+ chunk = RAGChunk(
592
+ content=node.text or "",
593
+ chunk_id=node.node_id,
594
+ document_id=node.metadata.get("document_id", node.node_id),
595
+ vector=vector,
596
+ metadata=node.metadata or {},
597
+ )
598
+
599
+ # Wrap in RAGSearchResult with score
600
+ return RAGSearchResult(chunk=chunk, score=node_with_score.score or 0.0)
@@ -0,0 +1,73 @@
1
+ """Converters between DataFrames and FlowMessages."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pandas as pd
6
+
7
+ from qtype.interpreter.types import FlowMessage, Session
8
+ from qtype.semantic.model import Flow
9
+
10
+
11
+ def dataframe_to_flow_messages(
12
+ df: pd.DataFrame, session: Session
13
+ ) -> list[FlowMessage]:
14
+ """
15
+ Convert a DataFrame to a list of FlowMessages.
16
+
17
+ Each row in the DataFrame becomes a FlowMessage with the same session.
18
+
19
+ Args:
20
+ df: DataFrame where each row represents one set of inputs
21
+ session: Session object to use for all messages
22
+
23
+ Returns:
24
+ List of FlowMessages, one per DataFrame row
25
+ """
26
+ messages = []
27
+ for _, row in df.iterrows():
28
+ variables = row.to_dict()
29
+ messages.append(FlowMessage(session=session, variables=variables))
30
+ return messages
31
+
32
+
33
+ def flow_messages_to_dataframe(
34
+ messages: list[FlowMessage], flow: Flow
35
+ ) -> pd.DataFrame:
36
+ """
37
+ Convert a list of FlowMessages to a DataFrame.
38
+
39
+ Extracts output variables from each message based on the flow's outputs.
40
+
41
+ Args:
42
+ messages: List of FlowMessages with results
43
+ flow: Flow definition containing output variable specifications
44
+
45
+ Returns:
46
+ DataFrame with one row per message, columns for each output variable
47
+ """
48
+ from typing import Any
49
+
50
+ results = []
51
+ for idx, message in enumerate(messages):
52
+ row_data: dict[str, Any] = {"row": idx}
53
+
54
+ # Extract output variables
55
+ for var in flow.outputs:
56
+ if var.id in message.variables:
57
+ row_data[var.id] = message.variables[var.id]
58
+ else:
59
+ row_data[var.id] = None
60
+
61
+ # Include error if present
62
+ if message.is_failed():
63
+ row_data["error"] = (
64
+ message.error.error_message
65
+ if message.error
66
+ else "Unknown error"
67
+ )
68
+ else:
69
+ row_data["error"] = None
70
+
71
+ results.append(row_data)
72
+
73
+ return pd.DataFrame(results)