qtype 0.0.16__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. qtype/application/commons/tools.py +1 -1
  2. qtype/application/converters/tools_from_api.py +5 -5
  3. qtype/application/converters/tools_from_module.py +2 -2
  4. qtype/application/converters/types.py +14 -43
  5. qtype/application/documentation.py +1 -1
  6. qtype/application/facade.py +94 -73
  7. qtype/base/types.py +227 -7
  8. qtype/cli.py +4 -0
  9. qtype/commands/convert.py +20 -8
  10. qtype/commands/generate.py +19 -27
  11. qtype/commands/run.py +73 -36
  12. qtype/commands/serve.py +74 -54
  13. qtype/commands/validate.py +34 -8
  14. qtype/commands/visualize.py +46 -22
  15. qtype/dsl/__init__.py +6 -5
  16. qtype/dsl/custom_types.py +1 -1
  17. qtype/dsl/domain_types.py +65 -5
  18. qtype/dsl/linker.py +384 -0
  19. qtype/dsl/loader.py +315 -0
  20. qtype/dsl/model.py +612 -363
  21. qtype/dsl/parser.py +200 -0
  22. qtype/dsl/types.py +50 -0
  23. qtype/interpreter/api.py +57 -136
  24. qtype/interpreter/auth/aws.py +19 -9
  25. qtype/interpreter/auth/generic.py +93 -16
  26. qtype/interpreter/base/base_step_executor.py +436 -0
  27. qtype/interpreter/base/batch_step_executor.py +171 -0
  28. qtype/interpreter/base/exceptions.py +50 -0
  29. qtype/interpreter/base/executor_context.py +74 -0
  30. qtype/interpreter/base/factory.py +117 -0
  31. qtype/interpreter/base/progress_tracker.py +110 -0
  32. qtype/interpreter/base/secrets.py +339 -0
  33. qtype/interpreter/base/step_cache.py +74 -0
  34. qtype/interpreter/base/stream_emitter.py +469 -0
  35. qtype/interpreter/conversions.py +462 -22
  36. qtype/interpreter/converters.py +77 -0
  37. qtype/interpreter/endpoints.py +355 -0
  38. qtype/interpreter/executors/agent_executor.py +242 -0
  39. qtype/interpreter/executors/aggregate_executor.py +93 -0
  40. qtype/interpreter/executors/decoder_executor.py +163 -0
  41. qtype/interpreter/executors/doc_to_text_executor.py +112 -0
  42. qtype/interpreter/executors/document_embedder_executor.py +107 -0
  43. qtype/interpreter/executors/document_search_executor.py +122 -0
  44. qtype/interpreter/executors/document_source_executor.py +118 -0
  45. qtype/interpreter/executors/document_splitter_executor.py +105 -0
  46. qtype/interpreter/executors/echo_executor.py +63 -0
  47. qtype/interpreter/executors/field_extractor_executor.py +160 -0
  48. qtype/interpreter/executors/file_source_executor.py +101 -0
  49. qtype/interpreter/executors/file_writer_executor.py +110 -0
  50. qtype/interpreter/executors/index_upsert_executor.py +228 -0
  51. qtype/interpreter/executors/invoke_embedding_executor.py +92 -0
  52. qtype/interpreter/executors/invoke_flow_executor.py +51 -0
  53. qtype/interpreter/executors/invoke_tool_executor.py +358 -0
  54. qtype/interpreter/executors/llm_inference_executor.py +272 -0
  55. qtype/interpreter/executors/prompt_template_executor.py +78 -0
  56. qtype/interpreter/executors/sql_source_executor.py +106 -0
  57. qtype/interpreter/executors/vector_search_executor.py +91 -0
  58. qtype/interpreter/flow.py +159 -22
  59. qtype/interpreter/metadata_api.py +115 -0
  60. qtype/interpreter/resource_cache.py +5 -4
  61. qtype/interpreter/rich_progress.py +225 -0
  62. qtype/interpreter/stream/chat/__init__.py +15 -0
  63. qtype/interpreter/stream/chat/converter.py +391 -0
  64. qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
  65. qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
  66. qtype/interpreter/stream/chat/vercel.py +609 -0
  67. qtype/interpreter/stream/utils/__init__.py +15 -0
  68. qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
  69. qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
  70. qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
  71. qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
  72. qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
  73. qtype/interpreter/telemetry.py +135 -8
  74. qtype/interpreter/tools/__init__.py +5 -0
  75. qtype/interpreter/tools/function_tool_helper.py +265 -0
  76. qtype/interpreter/types.py +330 -0
  77. qtype/interpreter/typing.py +83 -89
  78. qtype/interpreter/ui/404/index.html +1 -1
  79. qtype/interpreter/ui/404.html +1 -1
  80. qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
  81. qtype/interpreter/ui/_next/static/chunks/{393-8fd474427f8e19ce.js → 434-b2112d19f25c44ff.js} +3 -3
  82. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
  83. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
  84. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
  85. qtype/interpreter/ui/icon.png +0 -0
  86. qtype/interpreter/ui/index.html +1 -1
  87. qtype/interpreter/ui/index.txt +4 -4
  88. qtype/semantic/checker.py +583 -0
  89. qtype/semantic/generate.py +262 -83
  90. qtype/semantic/loader.py +95 -0
  91. qtype/semantic/model.py +436 -159
  92. qtype/semantic/resolver.py +63 -19
  93. qtype/semantic/visualize.py +28 -31
  94. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/METADATA +16 -3
  95. qtype-0.1.1.dist-info/RECORD +135 -0
  96. qtype/dsl/base_types.py +0 -38
  97. qtype/dsl/validator.py +0 -465
  98. qtype/interpreter/batch/__init__.py +0 -0
  99. qtype/interpreter/batch/file_sink_source.py +0 -162
  100. qtype/interpreter/batch/flow.py +0 -95
  101. qtype/interpreter/batch/sql_source.py +0 -92
  102. qtype/interpreter/batch/step.py +0 -74
  103. qtype/interpreter/batch/types.py +0 -41
  104. qtype/interpreter/batch/utils.py +0 -178
  105. qtype/interpreter/chat/chat_api.py +0 -237
  106. qtype/interpreter/chat/vercel.py +0 -314
  107. qtype/interpreter/exceptions.py +0 -10
  108. qtype/interpreter/step.py +0 -67
  109. qtype/interpreter/steps/__init__.py +0 -0
  110. qtype/interpreter/steps/agent.py +0 -114
  111. qtype/interpreter/steps/condition.py +0 -36
  112. qtype/interpreter/steps/decoder.py +0 -88
  113. qtype/interpreter/steps/llm_inference.py +0 -171
  114. qtype/interpreter/steps/prompt_template.py +0 -54
  115. qtype/interpreter/steps/search.py +0 -24
  116. qtype/interpreter/steps/tool.py +0 -219
  117. qtype/interpreter/streaming_helpers.py +0 -123
  118. qtype/interpreter/ui/_next/static/chunks/app/page-7e26b6156cfb55d3.js +0 -1
  119. qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
  120. qtype/interpreter/ui/_next/static/css/b40532b0db09cce3.css +0 -3
  121. qtype/interpreter/ui/favicon.ico +0 -0
  122. qtype/loader.py +0 -390
  123. qtype-0.0.16.dist-info/RECORD +0 -106
  124. /qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
  125. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/WHEEL +0 -0
  126. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/entry_points.txt +0 -0
  127. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/licenses/LICENSE +0 -0
  128. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import importlib
4
+ import uuid
5
+ from typing import Any
6
+
3
7
  from llama_index.core.base.embeddings.base import BaseEmbedding
4
8
  from llama_index.core.base.llms.base import BaseLLM
5
9
  from llama_index.core.base.llms.types import AudioBlock
@@ -9,18 +13,147 @@ from llama_index.core.base.llms.types import (
9
13
  DocumentBlock,
10
14
  ImageBlock,
11
15
  TextBlock,
16
+ ThinkingBlock,
12
17
  )
13
18
  from llama_index.core.memory import Memory as LlamaMemory
19
+ from llama_index.core.schema import Document as LlamaDocument
20
+ from llama_index.core.vector_stores.types import BasePydanticVectorStore
21
+ from opensearchpy import AWSV4SignerAuth, OpenSearch
14
22
 
15
- from qtype.dsl.base_types import PrimitiveTypeEnum
16
- from qtype.dsl.domain_types import ChatContent, ChatMessage
23
+ from qtype.base.types import PrimitiveTypeEnum
24
+ from qtype.dsl.domain_types import ChatContent, ChatMessage, RAGDocument
17
25
  from qtype.dsl.model import Memory
18
- from qtype.interpreter.exceptions import InterpreterError
19
- from qtype.semantic.model import Model
26
+ from qtype.interpreter.auth.aws import aws
27
+ from qtype.interpreter.auth.generic import auth
28
+ from qtype.interpreter.base.secrets import SecretManagerBase
29
+ from qtype.interpreter.types import InterpreterError
30
+ from qtype.semantic.model import (
31
+ APIKeyAuthProvider,
32
+ DocumentIndex,
33
+ DocumentSplitter,
34
+ Model,
35
+ VectorIndex,
36
+ )
20
37
 
21
38
  from .resource_cache import cached_resource
22
39
 
23
40
 
41
+ def to_llama_document(doc: RAGDocument) -> LlamaDocument:
42
+ """Convert a RAGDocument to a LlamaDocument."""
43
+ from llama_index.core.schema import MediaResource
44
+
45
+ # Prepare metadata, adding file_name and uri if available
46
+ metadata = doc.metadata.copy() if doc.metadata else {}
47
+ if doc.file_name:
48
+ metadata["file_name"] = doc.file_name
49
+ if doc.uri:
50
+ metadata["url"] = (
51
+ doc.uri
52
+ ) # url is more commonly used in LlamaIndex metadata
53
+
54
+ # Default text content
55
+ text = ""
56
+ if isinstance(doc.content, str):
57
+ text = doc.content
58
+
59
+ # Handle different content types
60
+ if doc.type == PrimitiveTypeEnum.text:
61
+ # Text content - store as text field
62
+ return LlamaDocument(text=text, doc_id=doc.file_id, metadata=metadata)
63
+ elif doc.type == PrimitiveTypeEnum.image and isinstance(
64
+ doc.content, bytes
65
+ ):
66
+ # Image content - store in image_resource
67
+ return LlamaDocument(
68
+ text=text, # Keep text empty or use as description
69
+ doc_id=doc.file_id,
70
+ metadata=metadata,
71
+ image_resource=MediaResource(data=doc.content),
72
+ )
73
+ elif doc.type == PrimitiveTypeEnum.audio and isinstance(
74
+ doc.content, bytes
75
+ ):
76
+ # Audio content - store in audio_resource
77
+ return LlamaDocument(
78
+ text=text,
79
+ doc_id=doc.file_id,
80
+ metadata=metadata,
81
+ audio_resource=MediaResource(data=doc.content),
82
+ )
83
+ elif doc.type == PrimitiveTypeEnum.video and isinstance(
84
+ doc.content, bytes
85
+ ):
86
+ # Video content - store in video_resource
87
+ return LlamaDocument(
88
+ text=text,
89
+ doc_id=doc.file_id,
90
+ metadata=metadata,
91
+ video_resource=MediaResource(data=doc.content),
92
+ )
93
+ else:
94
+ # Fallback for other types - store as text
95
+ return LlamaDocument(
96
+ text=str(doc.content) if doc.content else "",
97
+ doc_id=doc.file_id,
98
+ metadata=metadata,
99
+ )
100
+
101
+
102
+ def from_llama_document(doc: LlamaDocument) -> RAGDocument:
103
+ """Convert a LlamaDocument to a RAGDocument."""
104
+ # Extract file_id from doc_id or id_
105
+ file_id = doc.doc_id
106
+
107
+ # Extract file_name from metadata or use file_id as fallback
108
+ file_name = (
109
+ doc.metadata.get("file_name", file_id) if doc.metadata else file_id
110
+ )
111
+
112
+ # Extract URI from metadata if available
113
+ uri = (
114
+ doc.metadata.get("url") or doc.metadata.get("uri")
115
+ if doc.metadata
116
+ else None
117
+ )
118
+
119
+ # Determine content type and extract content based on resource fields
120
+ content_type = PrimitiveTypeEnum.text
121
+ content: str | bytes = doc.text # default to text
122
+
123
+ # Check for media resources in priority order
124
+ if hasattr(doc, "image_resource") and doc.image_resource is not None:
125
+ content_type = PrimitiveTypeEnum.image
126
+ # MediaResource has a 'data' field containing the bytes
127
+ content = (
128
+ doc.image_resource.data
129
+ if hasattr(doc.image_resource, "data")
130
+ else doc.text
131
+ ) # type: ignore
132
+ elif hasattr(doc, "audio_resource") and doc.audio_resource is not None:
133
+ content_type = PrimitiveTypeEnum.audio
134
+ content = (
135
+ doc.audio_resource.data
136
+ if hasattr(doc.audio_resource, "data")
137
+ else doc.text
138
+ ) # type: ignore
139
+ elif hasattr(doc, "video_resource") and doc.video_resource is not None:
140
+ content_type = PrimitiveTypeEnum.video
141
+ content = (
142
+ doc.video_resource.data
143
+ if hasattr(doc.video_resource, "data")
144
+ else doc.text
145
+ ) # type: ignore
146
+
147
+ return RAGDocument(
148
+ content=content,
149
+ file_id=file_id,
150
+ file_name=file_name,
151
+ uri=uri,
152
+ metadata=doc.metadata.copy() if doc.metadata else {},
153
+ type=content_type,
154
+ )
155
+
156
+
24
157
  @cached_resource
25
158
  def to_memory(session_id: str | None, memory: Memory) -> LlamaMemory:
26
159
  return LlamaMemory.from_defaults(
@@ -32,17 +165,38 @@ def to_memory(session_id: str | None, memory: Memory) -> LlamaMemory:
32
165
 
33
166
 
34
167
  @cached_resource
35
- def to_llm(model: Model, system_prompt: str | None) -> BaseLLM:
36
- """Convert a qtype Model to a LlamaIndex Model."""
168
+ def to_llm(
169
+ model: Model,
170
+ system_prompt: str | None,
171
+ secret_manager: SecretManagerBase,
172
+ ) -> BaseLLM:
173
+ """
174
+ Convert a qtype Model to a LlamaIndex Model.
37
175
 
38
- if model.provider in "aws-bedrock":
39
- # BedrockConverse requires a model_id and system_prompt
40
- # Inference params can be passed as additional kwargs
41
- from llama_index.llms.bedrock_converse import ( # type: ignore[import]
42
- BedrockConverse,
43
- )
176
+ Args:
177
+ model: The semantic model configuration
178
+ system_prompt: Optional system prompt for the model
179
+ secret_manager: Optional secret manager for resolving SecretReferences
180
+
181
+ Returns:
182
+ A LlamaIndex LLM instance
183
+ """
184
+
185
+ if model.provider == "aws-bedrock":
186
+ from llama_index.llms.bedrock_converse import BedrockConverse
187
+
188
+ from qtype.semantic.model import AWSAuthProvider
189
+
190
+ if model.auth:
191
+ # Type hint for mypy - we know it's AWSAuthProvider for aws-bedrock
192
+ assert isinstance(model.auth, AWSAuthProvider)
193
+ with aws(model.auth, secret_manager) as session:
194
+ session = session._session
195
+ else:
196
+ session = None
44
197
 
45
198
  brv: BaseLLM = BedrockConverse(
199
+ botocore_session=session,
46
200
  model=model.model_id if model.model_id else model.id,
47
201
  system_prompt=system_prompt,
48
202
  **(model.inference_params if model.inference_params else {}),
@@ -51,45 +205,109 @@ def to_llm(model: Model, system_prompt: str | None) -> BaseLLM:
51
205
  elif model.provider == "openai":
52
206
  from llama_index.llms.openai import OpenAI
53
207
 
208
+ from qtype.interpreter.auth.generic import auth
209
+ from qtype.semantic.model import APIKeyAuthProvider
210
+
211
+ api_key: str | None = None
212
+ if model.auth:
213
+ with auth(model.auth, secret_manager) as provider:
214
+ if not isinstance(provider, APIKeyAuthProvider):
215
+ raise InterpreterError(
216
+ f"OpenAI provider requires APIKeyAuthProvider, "
217
+ f"got {type(provider).__name__}"
218
+ )
219
+ # api_key is guaranteed to be str after auth() resolves it
220
+ api_key = provider.api_key # type: ignore[assignment]
221
+
54
222
  return OpenAI(
55
223
  model=model.model_id if model.model_id else model.id,
56
224
  system_prompt=system_prompt,
57
225
  **(model.inference_params if model.inference_params else {}),
58
- api_key=getattr(model.auth, "api_key", None)
59
- if model.auth
60
- else None,
226
+ api_key=api_key,
61
227
  )
62
228
  elif model.provider == "anthropic":
63
229
  from llama_index.llms.anthropic import ( # type: ignore[import-untyped]
64
230
  Anthropic,
65
231
  )
66
232
 
233
+ from qtype.interpreter.auth.generic import auth
234
+ from qtype.semantic.model import APIKeyAuthProvider
235
+
236
+ api_key: str | None = None
237
+ if model.auth:
238
+ with auth(model.auth, secret_manager) as provider:
239
+ if not isinstance(provider, APIKeyAuthProvider):
240
+ raise InterpreterError(
241
+ f"Anthropic provider requires APIKeyAuthProvider, "
242
+ f"got {type(provider).__name__}"
243
+ )
244
+ # api_key is guaranteed to be str after auth() resolves it
245
+ api_key = provider.api_key # type: ignore[assignment]
246
+
67
247
  arv: BaseLLM = Anthropic(
68
248
  model=model.model_id if model.model_id else model.id,
69
249
  system_prompt=system_prompt,
70
250
  **(model.inference_params if model.inference_params else {}),
71
- api_key=getattr(model.auth, "api_key", None)
72
- if model.auth
73
- else None,
251
+ api_key=api_key,
74
252
  )
75
253
  return arv
254
+ elif model.provider == "gcp-vertex":
255
+ from llama_index.llms.vertex import Vertex
256
+
257
+ project_name = getattr(
258
+ getattr(model, "auth", None), "profile_name", None
259
+ )
260
+
261
+ vgv: BaseLLM = Vertex(
262
+ model=model.model_id if model.model_id else model.id,
263
+ project=project_name,
264
+ system_prompt=system_prompt,
265
+ **(model.inference_params if model.inference_params else {}),
266
+ )
267
+
268
+ return vgv
76
269
  else:
77
270
  raise InterpreterError(
78
271
  f"Unsupported model provider: {model.provider}."
79
272
  )
80
273
 
81
274
 
275
+ @cached_resource
276
+ def to_vector_store(
277
+ index: VectorIndex, secret_manager: SecretManagerBase
278
+ ) -> BasePydanticVectorStore:
279
+ """Convert a qtype Index to a LlamaIndex vector store."""
280
+ module_path = ".".join(index.module.split(".")[:-1])
281
+ class_name = index.module.split(".")[-1]
282
+ # Dynamically import the reader module
283
+ try:
284
+ reader_module = importlib.import_module(module_path)
285
+ reader_class = getattr(reader_module, class_name)
286
+ except (ImportError, AttributeError) as e:
287
+ raise ImportError(
288
+ f"Failed to import reader class '{class_name}' from '{module_path}': {e}"
289
+ ) from e
290
+
291
+ # Resolve any SecretReferences in args
292
+ context = f"index '{index.id}'"
293
+ resolved_args = secret_manager.resolve_secrets_in_dict(index.args, context)
294
+ index_instance = reader_class(**resolved_args)
295
+
296
+ return index_instance
297
+
298
+
82
299
  @cached_resource
83
300
  def to_embedding_model(model: Model) -> BaseEmbedding:
84
301
  """Convert a qtype Model to a LlamaIndex embedding model."""
85
302
 
86
- if model.provider in {"bedrock", "aws", "aws-bedrock"}:
303
+ if model.provider == "aws-bedrock":
87
304
  from llama_index.embeddings.bedrock import ( # type: ignore[import-untyped]
88
305
  BedrockEmbedding,
89
306
  )
90
307
 
91
308
  bedrock_embedding: BaseEmbedding = BedrockEmbedding(
92
- model_name=model.model_id if model.model_id else model.id
309
+ model_name=model.model_id if model.model_id else model.id,
310
+ max_retries=100,
93
311
  )
94
312
  return bedrock_embedding
95
313
  elif model.provider == "openai":
@@ -107,6 +325,61 @@ def to_embedding_model(model: Model) -> BaseEmbedding:
107
325
  )
108
326
 
109
327
 
328
+ @cached_resource
329
+ def to_opensearch_client(
330
+ index: DocumentIndex, secret_manager: SecretManagerBase
331
+ ) -> OpenSearch:
332
+ """
333
+ Convert a DocumentIndex to an OpenSearch/Elasticsearch client.
334
+
335
+ Args:
336
+ index: DocumentIndex configuration with endpoint, auth, etc.
337
+
338
+ Returns:
339
+ OpenSearch client instance configured with authentication
340
+
341
+ Raises:
342
+ InterpreterError: If authentication fails or configuration is invalid
343
+ """
344
+ client_kwargs: dict[str, Any] = {
345
+ "hosts": [index.endpoint],
346
+ **index.args,
347
+ }
348
+
349
+ # Handle authentication if provided
350
+ if index.auth:
351
+ if isinstance(index.auth, APIKeyAuthProvider):
352
+ # Use API key authentication
353
+ client_kwargs["api_key"] = index.auth.api_key
354
+ elif hasattr(index.auth, "type") and index.auth.type == "aws":
355
+ # Use AWS authentication with boto3 session
356
+ # Get AWS credentials from auth provider using context manager
357
+ with auth(index.auth, secret_manager) as auth_session:
358
+ # Type checker doesn't know this is a boto3.Session
359
+ # but runtime validation ensures it for AWS auth
360
+ credentials = auth_session.get_credentials() # type: ignore
361
+ if credentials is None:
362
+ raise InterpreterError(
363
+ f"Failed to obtain AWS credentials for DocumentIndex '{index.id}'"
364
+ )
365
+
366
+ # Use opensearch-py's built-in AWS auth
367
+ aws_auth = AWSV4SignerAuth(
368
+ credentials,
369
+ auth_session.region_name or "us-east-1", # type: ignore
370
+ )
371
+
372
+ client_kwargs["http_auth"] = aws_auth
373
+ client_kwargs["use_ssl"] = True
374
+ client_kwargs["verify_certs"] = True
375
+ else:
376
+ raise InterpreterError(
377
+ f"Unsupported authentication type for DocumentIndex: {type(index.auth)}"
378
+ )
379
+
380
+ return OpenSearch(**client_kwargs)
381
+
382
+
110
383
  def to_content_block(content: ChatContent) -> ContentBlock:
111
384
  if content.type == PrimitiveTypeEnum.text:
112
385
  if isinstance(content.content, str):
@@ -132,6 +405,61 @@ def to_content_block(content: ChatContent) -> ContentBlock:
132
405
  )
133
406
 
134
407
 
408
+ def variable_to_chat_message(
409
+ value: Any, variable: Any, default_role: str = "user"
410
+ ) -> ChatMessage:
411
+ """Convert any variable value to a ChatMessage based on the variable's type.
412
+
413
+ Args:
414
+ value: The value to convert (can be any primitive type or ChatMessage)
415
+ variable: The Variable definition with type information
416
+ default_role: The default message role to use (default: "user")
417
+
418
+ Returns:
419
+ ChatMessage with appropriate content blocks
420
+
421
+ Raises:
422
+ InterpreterError: If the value type cannot be converted
423
+ """
424
+ # If already a ChatMessage, return as-is
425
+ if isinstance(value, ChatMessage):
426
+ return value
427
+
428
+ # Convert based on the variable's declared type
429
+ var_type = variable.type
430
+ # Handle primitive types based on variable declaration
431
+ if isinstance(var_type, PrimitiveTypeEnum):
432
+ # Numeric/boolean types get converted to text
433
+ if var_type in (
434
+ PrimitiveTypeEnum.int,
435
+ PrimitiveTypeEnum.float,
436
+ PrimitiveTypeEnum.boolean,
437
+ ):
438
+ content = ChatContent(
439
+ type=PrimitiveTypeEnum.text, content=str(value)
440
+ )
441
+ # All other primitive types pass through as-is
442
+ else:
443
+ content = ChatContent(type=var_type, content=value)
444
+ elif isinstance(var_type, str) and (
445
+ var_type.startswith("list[") or var_type.startswith("dict[")
446
+ ):
447
+ # Handle list and dict types - convert to JSON string
448
+ import json
449
+
450
+ content = ChatContent(
451
+ type=PrimitiveTypeEnum.text, content=json.dumps(value)
452
+ )
453
+ else:
454
+ # Unsupported type - raise an error
455
+ raise InterpreterError(
456
+ f"Cannot convert variable '{variable.id}' of unsupported type "
457
+ f"'{var_type}' to ChatMessage"
458
+ )
459
+
460
+ return ChatMessage(role=default_role, blocks=[content]) # type: ignore
461
+
462
+
135
463
  def to_chat_message(message: ChatMessage) -> LlamaChatMessage:
136
464
  """Convert a ChatMessage to a LlamaChatMessage."""
137
465
  blocks = [to_content_block(content) for content in message.blocks]
@@ -140,7 +468,7 @@ def to_chat_message(message: ChatMessage) -> LlamaChatMessage:
140
468
 
141
469
  def from_chat_message(message: LlamaChatMessage) -> ChatMessage:
142
470
  """Convert a LlamaChatMessage to a ChatMessage."""
143
- blocks = []
471
+ blocks: list[ChatContent] = []
144
472
  for block in message.blocks:
145
473
  if isinstance(block, TextBlock):
146
474
  blocks.append(
@@ -158,9 +486,121 @@ def from_chat_message(message: LlamaChatMessage) -> ChatMessage:
158
486
  blocks.append(
159
487
  ChatContent(type=PrimitiveTypeEnum.file, content=block.data)
160
488
  )
489
+ elif isinstance(block, ThinkingBlock):
490
+ continue
161
491
  else:
162
492
  raise InterpreterError(
163
493
  f"Unsupported content block type: {type(block)}"
164
494
  )
165
495
 
166
- return ChatMessage(role=message.role, blocks=blocks) # type: ignore
496
+ return ChatMessage(role=message.role, blocks=blocks)
497
+
498
+
499
+ def to_text_splitter(splitter: DocumentSplitter) -> Any:
500
+ """Convert a DocumentSplitter to a LlamaIndex text splitter.
501
+
502
+ Args:
503
+ splitter: The DocumentSplitter configuration.
504
+
505
+ Returns:
506
+ An instance of the appropriate LlamaIndex text splitter class.
507
+
508
+ Raises:
509
+ InterpreterError: If the splitter class cannot be found or instantiated.
510
+ """
511
+
512
+ module_path = "llama_index.core.node_parser"
513
+ class_name = splitter.splitter_name
514
+ try:
515
+ reader_module = importlib.import_module(module_path)
516
+ splitter_class = getattr(reader_module, class_name)
517
+ except (ImportError, AttributeError) as e:
518
+ raise ImportError(
519
+ f"Failed to import reader class '{class_name}' from '{module_path}': {e}"
520
+ ) from e
521
+ from llama_index.core.schema import BaseNode
522
+
523
+ # TODO: let the user specify a custom ID namespace
524
+ namespace = uuid.UUID("12345678-1234-5678-1234-567812345678")
525
+
526
+ def id_func(i: int, doc: BaseNode) -> str:
527
+ u = uuid.uuid5(namespace, f"{doc.node_id}_{i}")
528
+ return str(u)
529
+
530
+ # Prepare arguments for the splitter
531
+ splitter_args = {
532
+ "chunk_size": splitter.chunk_size,
533
+ "chunk_overlap": splitter.chunk_overlap,
534
+ "id_func": id_func,
535
+ **splitter.args,
536
+ }
537
+
538
+ # Instantiate and return the splitter
539
+ try:
540
+ return splitter_class(**splitter_args)
541
+ except Exception as e:
542
+ raise InterpreterError(
543
+ f"Failed to instantiate {splitter.splitter_name}: {e}"
544
+ ) from e
545
+
546
+
547
+ def to_llama_vector_store_and_retriever(
548
+ index: VectorIndex, secret_manager: SecretManagerBase
549
+ ) -> tuple[BasePydanticVectorStore, Any]:
550
+ """Create a LlamaIndex vector store and retriever from a VectorIndex.
551
+
552
+ Args:
553
+ index: VectorIndex configuration
554
+
555
+ Returns:
556
+ Tuple of (vector_store, retriever)
557
+ """
558
+ from llama_index.core import VectorStoreIndex
559
+
560
+ # Get the vector store using existing function
561
+ vector_store = to_vector_store(index, secret_manager)
562
+
563
+ # Get the embedding model
564
+ embedding_model = to_embedding_model(index.embedding_model)
565
+
566
+ # Create a VectorStoreIndex with the vector store and embedding model
567
+ vector_index = VectorStoreIndex.from_vector_store(
568
+ vector_store=vector_store,
569
+ embed_model=embedding_model,
570
+ )
571
+
572
+ # Create retriever with optional top_k configuration
573
+ retriever = vector_index.as_retriever()
574
+
575
+ return vector_store, retriever
576
+
577
+
578
+ def from_node_with_score(node_with_score) -> Any:
579
+ """Convert a LlamaIndex NodeWithScore to a RAGSearchResult.
580
+
581
+ Args:
582
+ node_with_score: LlamaIndex NodeWithScore object
583
+
584
+ Returns:
585
+ RAGSearchResult with chunk and score
586
+ """
587
+ from qtype.dsl.domain_types import RAGChunk, RAGSearchResult
588
+
589
+ node = node_with_score.node
590
+
591
+ # Extract vector if available
592
+ vector = None
593
+ if hasattr(node, "embedding") and node.embedding is not None:
594
+ vector = node.embedding
595
+
596
+ # Create RAGChunk from node
597
+ chunk = RAGChunk(
598
+ content=node.text or "",
599
+ chunk_id=node.node_id,
600
+ document_id=node.metadata.get("document_id", node.node_id),
601
+ vector=vector,
602
+ metadata=node.metadata or {},
603
+ )
604
+
605
+ # Wrap in RAGSearchResult with score
606
+ return RAGSearchResult(chunk=chunk, score=node_with_score.score or 0.0)
@@ -0,0 +1,77 @@
1
+ """Converters between DataFrames and FlowMessages."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pandas as pd
6
+ from pydantic import BaseModel
7
+
8
+ from qtype.interpreter.types import FlowMessage, Session
9
+ from qtype.semantic.model import Flow
10
+
11
+
12
+ def dataframe_to_flow_messages(
13
+ df: pd.DataFrame, session: Session
14
+ ) -> list[FlowMessage]:
15
+ """
16
+ Convert a DataFrame to a list of FlowMessages.
17
+
18
+ Each row in the DataFrame becomes a FlowMessage with the same session.
19
+
20
+ Args:
21
+ df: DataFrame where each row represents one set of inputs
22
+ session: Session object to use for all messages
23
+
24
+ Returns:
25
+ List of FlowMessages, one per DataFrame row
26
+ """
27
+ messages = []
28
+ for _, row in df.iterrows():
29
+ variables = row.to_dict()
30
+ messages.append(FlowMessage(session=session, variables=variables))
31
+ return messages
32
+
33
+
34
+ def flow_messages_to_dataframe(
35
+ messages: list[FlowMessage], flow: Flow
36
+ ) -> pd.DataFrame:
37
+ """
38
+ Convert a list of FlowMessages to a DataFrame.
39
+
40
+ Extracts output variables from each message based on the flow's outputs.
41
+
42
+ Args:
43
+ messages: List of FlowMessages with results
44
+ flow: Flow definition containing output variable specifications
45
+
46
+ Returns:
47
+ DataFrame with one row per message, columns for each output variable
48
+ """
49
+ from typing import Any
50
+
51
+ results = []
52
+ for idx, message in enumerate(messages):
53
+ row_data: dict[str, Any] = {"row": idx}
54
+
55
+ # Extract output variables
56
+ for var in flow.outputs:
57
+ if var.id in message.variables:
58
+ value = message.variables[var.id]
59
+ if isinstance(value, BaseModel):
60
+ value = value.model_dump()
61
+ row_data[var.id] = value
62
+ else:
63
+ row_data[var.id] = None
64
+
65
+ # Include error if present
66
+ if message.is_failed():
67
+ row_data["error"] = (
68
+ message.error.error_message
69
+ if message.error
70
+ else "Unknown error"
71
+ )
72
+ else:
73
+ row_data["error"] = None
74
+
75
+ results.append(row_data)
76
+
77
+ return pd.DataFrame(results)