qtype 0.1.7__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qtype/application/converters/tools_from_module.py +15 -0
- qtype/base/types.py +1 -9
- qtype/base/ui_shapes.py +38 -0
- qtype/commands/convert.py +3 -0
- qtype/dsl/model.py +71 -22
- qtype/interpreter/base/base_step_executor.py +10 -10
- qtype/interpreter/base/factory.py +6 -0
- qtype/interpreter/base/step_cache.py +3 -3
- qtype/interpreter/conversions.py +7 -4
- qtype/interpreter/executors/agent_executor.py +1 -2
- qtype/interpreter/executors/bedrock_reranker_executor.py +1 -2
- qtype/interpreter/executors/collect_executor.py +76 -0
- qtype/interpreter/executors/construct_executor.py +68 -0
- qtype/interpreter/executors/decoder_executor.py +1 -2
- qtype/interpreter/executors/doc_to_text_executor.py +1 -2
- qtype/interpreter/executors/document_embedder_executor.py +1 -2
- qtype/interpreter/executors/document_search_executor.py +1 -2
- qtype/interpreter/executors/document_source_executor.py +1 -2
- qtype/interpreter/executors/document_splitter_executor.py +5 -3
- qtype/interpreter/executors/echo_executor.py +1 -2
- qtype/interpreter/executors/explode_executor.py +49 -0
- qtype/interpreter/executors/field_extractor_executor.py +1 -2
- qtype/interpreter/executors/file_writer_executor.py +12 -13
- qtype/interpreter/executors/index_upsert_executor.py +3 -4
- qtype/interpreter/executors/invoke_embedding_executor.py +1 -2
- qtype/interpreter/executors/invoke_tool_executor.py +1 -2
- qtype/interpreter/executors/llm_inference_executor.py +1 -2
- qtype/interpreter/executors/prompt_template_executor.py +1 -2
- qtype/interpreter/executors/sql_source_executor.py +1 -2
- qtype/interpreter/executors/vector_search_executor.py +1 -2
- qtype/interpreter/flow.py +5 -2
- qtype/interpreter/types.py +18 -10
- qtype/interpreter/typing.py +53 -4
- qtype/interpreter/ui/404/index.html +1 -1
- qtype/interpreter/ui/404.html +1 -1
- qtype/interpreter/ui/_next/static/{20HoJN6otZ_LyHLHpCPE6 → YR7FgQl_Kpwjcqho0ogG4}/_buildManifest.js +1 -1
- qtype/interpreter/ui/_next/static/chunks/230-e22a519211de9624.js +33 -0
- qtype/interpreter/ui/_next/static/chunks/255-268261a96ffee0a9.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/4bd1b696-c023c6e3521b1417.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/app/_not-found/page-8f0e059659d8afd7.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/app/layout-1615bcffa82ff884.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/app/page-bfbb263eb356c6eb.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/ba12c10f-8d3b3efcaddd4728.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/framework-d7de93249215fb06.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/main-090112344d9a19ce.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/main-app-a5425c8070223ccc.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/pages/{_app-0a0020ddd67f79cf.js → _app-7d307437aca18ad4.js} +1 -1
- qtype/interpreter/ui/_next/static/chunks/pages/{_error-03529f2c21436739.js → _error-cb2a52f75f2162e2.js} +1 -1
- qtype/interpreter/ui/_next/static/chunks/webpack-bd0771cb30dd12fa.js +1 -0
- qtype/interpreter/ui/_next/static/css/7906798f56607e8f.css +3 -0
- qtype/interpreter/ui/assets/video-sample.mp4 +0 -0
- qtype/interpreter/ui/index.html +1 -1
- qtype/interpreter/ui/index.txt +12 -12
- qtype/semantic/checker.py +83 -0
- qtype/semantic/generate.py +0 -1
- qtype/semantic/model.py +25 -18
- {qtype-0.1.7.dist-info → qtype-0.1.11.dist-info}/METADATA +1 -1
- {qtype-0.1.7.dist-info → qtype-0.1.11.dist-info}/RECORD +63 -58
- qtype/interpreter/ui/_next/static/chunks/434-b2112d19f25c44ff.js +0 -36
- qtype/interpreter/ui/_next/static/chunks/4bd1b696-cf72ae8a39fa05aa.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/964-2b041321a01cbf56.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/app/_not-found/page-e110d2a9d0a83d82.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/app/layout-a05273ead5de2c41.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/framework-7c95b8e5103c9e90.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/main-app-6fc6346bc8f7f163.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/main-e26b9cb206da2cac.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/webpack-08642e441b39b6c2.js +0 -1
- qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +0 -3
- /qtype/interpreter/ui/_next/static/{20HoJN6otZ_LyHLHpCPE6 → YR7FgQl_Kpwjcqho0ogG4}/_ssgManifest.js +0 -0
- {qtype-0.1.7.dist-info → qtype-0.1.11.dist-info}/WHEEL +0 -0
- {qtype-0.1.7.dist-info → qtype-0.1.11.dist-info}/entry_points.txt +0 -0
- {qtype-0.1.7.dist-info → qtype-0.1.11.dist-info}/licenses/LICENSE +0 -0
- {qtype-0.1.7.dist-info → qtype-0.1.11.dist-info}/top_level.txt +0 -0
|
@@ -144,6 +144,11 @@ def _create_tool_from_function(
|
|
|
144
144
|
for p in func_info["parameters"]
|
|
145
145
|
}
|
|
146
146
|
|
|
147
|
+
# # quick hack
|
|
148
|
+
# for k, v in inputs.items():
|
|
149
|
+
# if inspect.isclass(v.type) and issubclass(v.type, BaseModel):
|
|
150
|
+
# v.type = str(v.type.__name__)
|
|
151
|
+
|
|
147
152
|
# Create output parameter based on return type
|
|
148
153
|
tool_id = func_info["module"] + "." + func_name
|
|
149
154
|
|
|
@@ -152,6 +157,7 @@ def _create_tool_from_function(
|
|
|
152
157
|
)
|
|
153
158
|
|
|
154
159
|
outputs = {"result": ToolParameter(type=output_type, optional=False)}
|
|
160
|
+
# outputs['result'].type =
|
|
155
161
|
|
|
156
162
|
return PythonFunctionTool(
|
|
157
163
|
id=tool_id,
|
|
@@ -264,6 +270,15 @@ def _map_python_type_to_variable_type(
|
|
|
264
270
|
elif python_type in get_args(VariableType):
|
|
265
271
|
# If it's a domain type, return its name
|
|
266
272
|
return python_type # type: ignore[no-any-return]
|
|
273
|
+
elif any(
|
|
274
|
+
[
|
|
275
|
+
(python_type is get_args(t)[0])
|
|
276
|
+
for t in get_args(VariableType)
|
|
277
|
+
if get_origin(t) is type
|
|
278
|
+
]
|
|
279
|
+
):
|
|
280
|
+
# It's the domain type, but the actual class (the user imported it)
|
|
281
|
+
return python_type.__name__
|
|
267
282
|
elif inspect.isclass(python_type) and issubclass(python_type, BaseModel):
|
|
268
283
|
# If it's a Pydantic model, create or retrieve its CustomType definition
|
|
269
284
|
return _pydantic_to_custom_types(python_type, custom_types)
|
qtype/base/types.py
CHANGED
|
@@ -54,21 +54,13 @@ class PrimitiveTypeEnum(str, Enum):
|
|
|
54
54
|
citation_url = "citation_url"
|
|
55
55
|
date = "date"
|
|
56
56
|
datetime = "datetime"
|
|
57
|
-
int = "int"
|
|
58
57
|
file = "file"
|
|
59
58
|
float = "float"
|
|
60
59
|
image = "image"
|
|
60
|
+
int = "int"
|
|
61
61
|
text = "text"
|
|
62
62
|
time = "time"
|
|
63
63
|
video = "video"
|
|
64
|
-
thinking = "thinking"
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
class StepCardinality(str, Enum):
|
|
68
|
-
"""Does this step emit 1 (one) or 0...N (many) items?"""
|
|
69
|
-
|
|
70
|
-
one = "one"
|
|
71
|
-
many = "many"
|
|
72
64
|
|
|
73
65
|
|
|
74
66
|
ReferenceT = TypeVar("ReferenceT")
|
qtype/base/ui_shapes.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Union
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
from pydantic import ConfigDict as PydanticConfigDict
|
|
6
|
+
from pydantic import Field
|
|
7
|
+
|
|
8
|
+
from qtype.base.types import PrimitiveTypeEnum
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TextWidget(str, Enum):
|
|
12
|
+
text = "text" # Simple text widget input, default
|
|
13
|
+
textarea = "textarea" # A paragraph editor
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TextInputUI(BaseModel):
|
|
17
|
+
model_config = PydanticConfigDict(extra="forbid")
|
|
18
|
+
|
|
19
|
+
widget: TextWidget = Field(
|
|
20
|
+
default=TextWidget.text,
|
|
21
|
+
description="What kind of text ui to present",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class FileUploadUI(BaseModel):
|
|
26
|
+
model_config = PydanticConfigDict(extra="forbid")
|
|
27
|
+
accept: str = Field(
|
|
28
|
+
default="*/*",
|
|
29
|
+
description="The mime type(s) to accept in the file upload.",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
UIType = Union[TextInputUI, FileUploadUI]
|
|
34
|
+
|
|
35
|
+
UI_INPUT_TO_TYPE = {
|
|
36
|
+
(TextInputUI, PrimitiveTypeEnum.text),
|
|
37
|
+
(FileUploadUI, PrimitiveTypeEnum.file),
|
|
38
|
+
}
|
qtype/commands/convert.py
CHANGED
|
@@ -23,6 +23,9 @@ def _convert_to_yaml(doc: Application | ToolList) -> str:
|
|
|
23
23
|
else:
|
|
24
24
|
wrapped = doc
|
|
25
25
|
|
|
26
|
+
import pprint
|
|
27
|
+
|
|
28
|
+
pprint.pprint(wrapped)
|
|
26
29
|
# NOTE: We use exclude_none but NOT exclude_unset because discriminator
|
|
27
30
|
# fields like 'type' have default values and must be included in output
|
|
28
31
|
return to_yaml_str(wrapped, exclude_none=True)
|
qtype/dsl/model.py
CHANGED
|
@@ -12,6 +12,7 @@ from pydantic import (
|
|
|
12
12
|
Field,
|
|
13
13
|
RootModel,
|
|
14
14
|
ValidationInfo,
|
|
15
|
+
model_serializer,
|
|
15
16
|
model_validator,
|
|
16
17
|
)
|
|
17
18
|
|
|
@@ -23,15 +24,19 @@ from qtype.base.types import (
|
|
|
23
24
|
ConcurrentStepMixin,
|
|
24
25
|
PrimitiveTypeEnum,
|
|
25
26
|
Reference,
|
|
26
|
-
StepCardinality,
|
|
27
27
|
StrictBaseModel,
|
|
28
28
|
)
|
|
29
|
+
from qtype.base.ui_shapes import UI_INPUT_TO_TYPE, UIType
|
|
29
30
|
from qtype.dsl.domain_types import (
|
|
31
|
+
AggregateStats,
|
|
30
32
|
ChatContent,
|
|
31
33
|
ChatMessage,
|
|
32
34
|
Embedding,
|
|
35
|
+
MessageRole,
|
|
33
36
|
RAGChunk,
|
|
34
37
|
RAGDocument,
|
|
38
|
+
RAGSearchResult,
|
|
39
|
+
SearchResult,
|
|
35
40
|
)
|
|
36
41
|
|
|
37
42
|
DOMAIN_CLASSES = {
|
|
@@ -212,12 +217,24 @@ class Variable(StrictBaseModel):
|
|
|
212
217
|
),
|
|
213
218
|
)
|
|
214
219
|
|
|
220
|
+
ui: UIType | None = Field(None, description="Hints for the UI if needed.")
|
|
221
|
+
|
|
215
222
|
@model_validator(mode="before")
|
|
216
223
|
@classmethod
|
|
217
224
|
def resolve_type(cls, data: Any, info: ValidationInfo) -> Any:
|
|
218
225
|
"""Resolve string-based type references using the shared validator."""
|
|
219
226
|
return _resolve_type_field_validator(data, info)
|
|
220
227
|
|
|
228
|
+
@model_validator(mode="after")
|
|
229
|
+
def validate_ui_type(self) -> Variable:
|
|
230
|
+
"""Ensure at least one credential source is provided."""
|
|
231
|
+
if self.ui is not None:
|
|
232
|
+
if (type(self.ui), self.type) not in UI_INPUT_TO_TYPE:
|
|
233
|
+
raise ValueError(
|
|
234
|
+
f"Variable of {self.type} is not comptabile with UI configuration {self.ui}"
|
|
235
|
+
)
|
|
236
|
+
return self
|
|
237
|
+
|
|
221
238
|
|
|
222
239
|
class SecretReference(StrictBaseModel):
|
|
223
240
|
"""
|
|
@@ -257,6 +274,21 @@ class ToolParameter(BaseModel):
|
|
|
257
274
|
"""Resolve string-based type references using the shared validator."""
|
|
258
275
|
return _resolve_type_field_validator(data, info)
|
|
259
276
|
|
|
277
|
+
@staticmethod
|
|
278
|
+
def _serialize_type(value):
|
|
279
|
+
if isinstance(value, type):
|
|
280
|
+
return value.__name__
|
|
281
|
+
elif hasattr(value, "__name__"):
|
|
282
|
+
return value.__name__
|
|
283
|
+
return value
|
|
284
|
+
|
|
285
|
+
@model_serializer
|
|
286
|
+
def _model_serializer(self):
|
|
287
|
+
# Use the default serialization, but ensure 'type' is a string
|
|
288
|
+
data = self.model_dump()
|
|
289
|
+
data["type"] = self._serialize_type(data.get("type"))
|
|
290
|
+
return data
|
|
291
|
+
|
|
260
292
|
|
|
261
293
|
class ListType(BaseModel):
|
|
262
294
|
"""Represents a list type with a specific element type."""
|
|
@@ -276,12 +308,16 @@ class ListType(BaseModel):
|
|
|
276
308
|
|
|
277
309
|
VariableType = (
|
|
278
310
|
PrimitiveTypeEnum
|
|
279
|
-
| Type[
|
|
280
|
-
| Type[ChatMessage]
|
|
281
|
-
| Type[ChatContent]
|
|
311
|
+
| Type[AggregateStats]
|
|
282
312
|
| Type[BaseModel]
|
|
283
|
-
| Type[
|
|
313
|
+
| Type[ChatContent]
|
|
314
|
+
| Type[ChatMessage]
|
|
315
|
+
| Type[Embedding]
|
|
316
|
+
| Type[MessageRole]
|
|
284
317
|
| Type[RAGChunk]
|
|
318
|
+
| Type[RAGDocument]
|
|
319
|
+
| Type[RAGSearchResult]
|
|
320
|
+
| Type[SearchResult]
|
|
285
321
|
| ListType
|
|
286
322
|
)
|
|
287
323
|
|
|
@@ -350,10 +386,6 @@ class Step(CachedStepMixin, StrictBaseModel, ABC):
|
|
|
350
386
|
|
|
351
387
|
id: str = Field(..., description="Unique ID of this component.")
|
|
352
388
|
type: str = Field(..., description="Type of the step component.")
|
|
353
|
-
cardinality: StepCardinality = Field(
|
|
354
|
-
default=StepCardinality.one,
|
|
355
|
-
description="Does this step emit 1 (one) or 0...N (many) instances of the outputs?",
|
|
356
|
-
)
|
|
357
389
|
inputs: list[Reference[Variable] | str] = Field(
|
|
358
390
|
default_factory=list,
|
|
359
391
|
description="References to the variables required by this step.",
|
|
@@ -364,6 +396,33 @@ class Step(CachedStepMixin, StrictBaseModel, ABC):
|
|
|
364
396
|
)
|
|
365
397
|
|
|
366
398
|
|
|
399
|
+
class Explode(Step):
|
|
400
|
+
"""A step that takes a list input and produces multiple outputs, one per item in the list."""
|
|
401
|
+
|
|
402
|
+
type: Literal["Explode"] = "Explode"
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
class Collect(Step, BatchableStepMixin):
|
|
406
|
+
"""A step that collects all inputs and creates a single list to return."""
|
|
407
|
+
|
|
408
|
+
type: Literal["Collect"] = "Collect"
|
|
409
|
+
|
|
410
|
+
batch_config: BatchConfig = Field(
|
|
411
|
+
default_factory=partial(BatchConfig, batch_size=sys.maxsize),
|
|
412
|
+
description="Configuration for processing the input stream in batches. If omitted, the step processes items one by one.",
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
class Construct(Step):
|
|
417
|
+
"""A step that converts variables into an instance of a Custom or Domain Type"""
|
|
418
|
+
|
|
419
|
+
type: Literal["Construct"] = "Construct"
|
|
420
|
+
field_mapping: dict[str, str] = Field(
|
|
421
|
+
...,
|
|
422
|
+
description="Mapping of type inputs to variable names, if needed.",
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
|
|
367
426
|
class PromptTemplate(Step):
|
|
368
427
|
"""Defines a prompt template with a string format and variable bindings.
|
|
369
428
|
This is used to generate prompts dynamically based on input variables."""
|
|
@@ -892,10 +951,6 @@ class Source(Step):
|
|
|
892
951
|
"""Base class for data sources"""
|
|
893
952
|
|
|
894
953
|
id: str = Field(..., description="Unique ID of the data source.")
|
|
895
|
-
cardinality: Literal[StepCardinality.many] = Field(
|
|
896
|
-
default=StepCardinality.many,
|
|
897
|
-
description="Sources always emit 0...N instances of the outputs.",
|
|
898
|
-
)
|
|
899
954
|
|
|
900
955
|
|
|
901
956
|
class SQLSource(Source):
|
|
@@ -952,7 +1007,6 @@ class Aggregate(Step):
|
|
|
952
1007
|
"""
|
|
953
1008
|
|
|
954
1009
|
type: Literal["Aggregate"] = "Aggregate"
|
|
955
|
-
cardinality: Literal[StepCardinality.one] = StepCardinality.one
|
|
956
1010
|
|
|
957
1011
|
# Outputs are now optional. The user can provide 0, 1, 2, or 3 names.
|
|
958
1012
|
# The order will be: success_count, error_count, total_count
|
|
@@ -1006,10 +1060,6 @@ class DocumentSplitter(Step, ConcurrentStepMixin):
|
|
|
1006
1060
|
"""Configuration for chunking/splitting documents into embeddable nodes/chunks."""
|
|
1007
1061
|
|
|
1008
1062
|
type: Literal["DocumentSplitter"] = "DocumentSplitter"
|
|
1009
|
-
cardinality: Literal[StepCardinality.many] = Field(
|
|
1010
|
-
default=StepCardinality.many,
|
|
1011
|
-
description="Consumes one document and emits 0...N nodes/chunks.",
|
|
1012
|
-
)
|
|
1013
1063
|
|
|
1014
1064
|
splitter_name: str = Field(
|
|
1015
1065
|
default="SentenceSplitter",
|
|
@@ -1029,10 +1079,6 @@ class DocumentEmbedder(Step, ConcurrentStepMixin):
|
|
|
1029
1079
|
"""Embeds document chunks using a specified embedding model."""
|
|
1030
1080
|
|
|
1031
1081
|
type: Literal["DocumentEmbedder"] = "DocumentEmbedder"
|
|
1032
|
-
cardinality: Literal[StepCardinality.many] = Field(
|
|
1033
|
-
default=StepCardinality.many,
|
|
1034
|
-
description="Consumes one chunk and emits one embedded chunk.",
|
|
1035
|
-
)
|
|
1036
1082
|
model: Reference[EmbeddingModel] | str = Field(
|
|
1037
1083
|
..., description="Embedding model to use for vectorization."
|
|
1038
1084
|
)
|
|
@@ -1200,6 +1246,8 @@ StepType = Annotated[
|
|
|
1200
1246
|
Agent,
|
|
1201
1247
|
Aggregate,
|
|
1202
1248
|
BedrockReranker,
|
|
1249
|
+
Collect,
|
|
1250
|
+
Construct,
|
|
1203
1251
|
Decoder,
|
|
1204
1252
|
DocToTextConverter,
|
|
1205
1253
|
DocumentEmbedder,
|
|
@@ -1207,6 +1255,7 @@ StepType = Annotated[
|
|
|
1207
1255
|
DocumentSplitter,
|
|
1208
1256
|
DocumentSource,
|
|
1209
1257
|
Echo,
|
|
1258
|
+
Explode,
|
|
1210
1259
|
FieldExtractor,
|
|
1211
1260
|
FileSource,
|
|
1212
1261
|
FileWriter,
|
|
@@ -230,21 +230,12 @@ class StepExecutor(ABC):
|
|
|
230
230
|
prepared_messages, process_item, task_limit=num_workers
|
|
231
231
|
)
|
|
232
232
|
|
|
233
|
-
# Combine all streams
|
|
234
|
-
async def emit_failed_messages() -> AsyncIterator[FlowMessage]:
|
|
235
|
-
for msg in failed_messages:
|
|
236
|
-
yield msg
|
|
237
|
-
|
|
238
|
-
all_results = stream.concat(
|
|
239
|
-
stream.iterate([result_stream, emit_failed_messages()])
|
|
240
|
-
)
|
|
241
|
-
|
|
242
233
|
# Track message counts for telemetry
|
|
243
234
|
message_count = 0
|
|
244
235
|
error_count = 0
|
|
245
236
|
|
|
246
237
|
# Stream results and track progress
|
|
247
|
-
async with
|
|
238
|
+
async with result_stream.stream() as streamer:
|
|
248
239
|
result: FlowMessage
|
|
249
240
|
async for result in streamer:
|
|
250
241
|
message_count += 1
|
|
@@ -255,6 +246,15 @@ class StepExecutor(ABC):
|
|
|
255
246
|
)
|
|
256
247
|
yield result
|
|
257
248
|
|
|
249
|
+
# Emit failed messages after processing completes
|
|
250
|
+
for msg in failed_messages:
|
|
251
|
+
message_count += 1
|
|
252
|
+
error_count += 1
|
|
253
|
+
self.progress.update_for_message(
|
|
254
|
+
msg, self.context.on_progress
|
|
255
|
+
)
|
|
256
|
+
yield msg
|
|
257
|
+
|
|
258
258
|
# Finalize and track those messages too
|
|
259
259
|
async for msg in self.finalize():
|
|
260
260
|
message_count += 1
|
|
@@ -2,6 +2,8 @@ from qtype.semantic.model import (
|
|
|
2
2
|
Agent,
|
|
3
3
|
Aggregate,
|
|
4
4
|
BedrockReranker,
|
|
5
|
+
Collect,
|
|
6
|
+
Construct,
|
|
5
7
|
Decoder,
|
|
6
8
|
DocToTextConverter,
|
|
7
9
|
DocumentEmbedder,
|
|
@@ -9,6 +11,7 @@ from qtype.semantic.model import (
|
|
|
9
11
|
DocumentSource,
|
|
10
12
|
DocumentSplitter,
|
|
11
13
|
Echo,
|
|
14
|
+
Explode,
|
|
12
15
|
FieldExtractor,
|
|
13
16
|
FileSource,
|
|
14
17
|
FileWriter,
|
|
@@ -32,6 +35,8 @@ EXECUTOR_REGISTRY = {
|
|
|
32
35
|
Agent: "qtype.interpreter.executors.agent_executor.AgentExecutor",
|
|
33
36
|
Aggregate: "qtype.interpreter.executors.aggregate_executor.AggregateExecutor",
|
|
34
37
|
BedrockReranker: "qtype.interpreter.executors.bedrock_reranker_executor.BedrockRerankerExecutor",
|
|
38
|
+
Collect: "qtype.interpreter.executors.collect_executor.CollectExecutor",
|
|
39
|
+
Construct: "qtype.interpreter.executors.construct_executor.ConstructExecutor",
|
|
35
40
|
Decoder: "qtype.interpreter.executors.decoder_executor.DecoderExecutor",
|
|
36
41
|
DocToTextConverter: "qtype.interpreter.executors.doc_to_text_executor.DocToTextConverterExecutor",
|
|
37
42
|
DocumentEmbedder: "qtype.interpreter.executors.document_embedder_executor.DocumentEmbedderExecutor",
|
|
@@ -39,6 +44,7 @@ EXECUTOR_REGISTRY = {
|
|
|
39
44
|
DocumentSource: "qtype.interpreter.executors.document_source_executor.DocumentSourceExecutor",
|
|
40
45
|
DocumentSplitter: "qtype.interpreter.executors.document_splitter_executor.DocumentSplitterExecutor",
|
|
41
46
|
Echo: "qtype.interpreter.executors.echo_executor.EchoExecutor",
|
|
47
|
+
Explode: "qtype.interpreter.executors.explode_executor.ExplodeExecutor",
|
|
42
48
|
FieldExtractor: "qtype.interpreter.executors.field_extractor_executor.FieldExtractorExecutor",
|
|
43
49
|
FileSource: "qtype.interpreter.executors.file_source_executor.FileSourceExecutor",
|
|
44
50
|
FileWriter: "qtype.interpreter.executors.file_writer_executor.FileWriterExecutor",
|
|
@@ -67,8 +67,8 @@ def from_cache_value(
|
|
|
67
67
|
) -> FlowMessage:
|
|
68
68
|
"""Reconstructs a FlowMessage from cached output values."""
|
|
69
69
|
if "FlowMessage.__error__" in cache_value:
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
70
|
+
return message.model_copy(
|
|
71
|
+
deep=True, update={"error": cache_value["FlowMessage.__error__"]}
|
|
72
|
+
)
|
|
73
73
|
else:
|
|
74
74
|
return message.copy_with_variables(cache_value)
|
qtype/interpreter/conversions.py
CHANGED
|
@@ -18,7 +18,8 @@ from llama_index.core.base.llms.types import (
|
|
|
18
18
|
from llama_index.core.memory import Memory as LlamaMemory
|
|
19
19
|
from llama_index.core.schema import Document as LlamaDocument
|
|
20
20
|
from llama_index.core.vector_stores.types import BasePydanticVectorStore
|
|
21
|
-
from opensearchpy import
|
|
21
|
+
from opensearchpy import AsyncHttpConnection, AsyncOpenSearch
|
|
22
|
+
from opensearchpy.helpers.asyncsigner import AWSV4SignerAsyncAuth
|
|
22
23
|
|
|
23
24
|
from qtype.base.types import PrimitiveTypeEnum
|
|
24
25
|
from qtype.dsl.domain_types import (
|
|
@@ -369,7 +370,7 @@ def to_opensearch_client(
|
|
|
369
370
|
InterpreterError: If authentication fails or configuration is invalid
|
|
370
371
|
"""
|
|
371
372
|
client_kwargs: dict[str, Any] = {
|
|
372
|
-
"hosts":
|
|
373
|
+
"hosts": index.endpoint,
|
|
373
374
|
**index.args,
|
|
374
375
|
}
|
|
375
376
|
|
|
@@ -390,15 +391,17 @@ def to_opensearch_client(
|
|
|
390
391
|
f"Failed to obtain AWS credentials for DocumentIndex '{index.id}'"
|
|
391
392
|
)
|
|
392
393
|
|
|
393
|
-
# Use opensearch-py's
|
|
394
|
-
aws_auth =
|
|
394
|
+
# Use opensearch-py's async AWS auth
|
|
395
|
+
aws_auth = AWSV4SignerAsyncAuth(
|
|
395
396
|
credentials,
|
|
396
397
|
auth_session.region_name or "us-east-1", # type: ignore
|
|
398
|
+
"aoss", # service name for OpenSearch Serverless
|
|
397
399
|
)
|
|
398
400
|
|
|
399
401
|
client_kwargs["http_auth"] = aws_auth
|
|
400
402
|
client_kwargs["use_ssl"] = True
|
|
401
403
|
client_kwargs["verify_certs"] = True
|
|
404
|
+
client_kwargs["connection_class"] = AsyncHttpConnection
|
|
402
405
|
else:
|
|
403
406
|
raise InterpreterError(
|
|
404
407
|
f"Unsupported authentication type for DocumentIndex: {type(index.auth)}"
|
|
@@ -93,8 +93,7 @@ class AgentExecutor(StepExecutor, ToolExecutionMixin, FunctionToolHelper):
|
|
|
93
93
|
logger.error(f"Agent execution failed: {e}", exc_info=True)
|
|
94
94
|
# Emit error event to stream so frontend can display it
|
|
95
95
|
await self.stream_emitter.error(str(e))
|
|
96
|
-
message.
|
|
97
|
-
yield message
|
|
96
|
+
yield message.copy_with_error(self.step.id, e)
|
|
98
97
|
|
|
99
98
|
async def _process_chat(
|
|
100
99
|
self,
|
|
@@ -160,8 +160,7 @@ class BedrockRerankerExecutor(StepExecutor):
|
|
|
160
160
|
logger.error(f"Reranking failed: {e}", exc_info=True)
|
|
161
161
|
# Emit error event to stream so frontend can display it
|
|
162
162
|
await self.stream_emitter.error(str(e))
|
|
163
|
-
message.
|
|
164
|
-
yield message
|
|
163
|
+
yield message.copy_with_error(self.step.id, e)
|
|
165
164
|
|
|
166
165
|
def _query(self, message: FlowMessage) -> str:
|
|
167
166
|
"""Extract the query string from the FlowMessage.
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from typing import Any, AsyncIterator
|
|
2
|
+
|
|
3
|
+
from qtype.interpreter.base.batch_step_executor import BatchedStepExecutor
|
|
4
|
+
from qtype.interpreter.base.executor_context import ExecutorContext
|
|
5
|
+
from qtype.interpreter.types import FlowMessage
|
|
6
|
+
from qtype.semantic.model import Collect
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _find_common_ancestors(messages: list[FlowMessage]) -> dict[str, Any]:
|
|
10
|
+
if not messages:
|
|
11
|
+
return {}
|
|
12
|
+
|
|
13
|
+
# 1. Start with all variables from the first message
|
|
14
|
+
common_vars = messages[0].variables.copy()
|
|
15
|
+
|
|
16
|
+
for msg in messages[1:]:
|
|
17
|
+
# 2. Identify keys that either don't exist in this message
|
|
18
|
+
# OR have a different value (diverged)
|
|
19
|
+
diverged_keys = [
|
|
20
|
+
k
|
|
21
|
+
for k, v in common_vars.items()
|
|
22
|
+
if k not in msg.variables or msg.variables[k] != v
|
|
23
|
+
]
|
|
24
|
+
# 3. Remove diverged keys to leave only the "Common Ancestors"
|
|
25
|
+
for k in diverged_keys:
|
|
26
|
+
common_vars.pop(k)
|
|
27
|
+
|
|
28
|
+
return common_vars
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CollectExecutor(BatchedStepExecutor):
|
|
32
|
+
"""Executor for Collect steps."""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
step: Collect,
|
|
37
|
+
context: ExecutorContext,
|
|
38
|
+
**dependencies,
|
|
39
|
+
):
|
|
40
|
+
super().__init__(step, context, **dependencies)
|
|
41
|
+
if not isinstance(step, Collect):
|
|
42
|
+
raise ValueError("CollectExecutor can only execute Collect steps.")
|
|
43
|
+
self.step = step
|
|
44
|
+
|
|
45
|
+
async def process_batch(
|
|
46
|
+
self,
|
|
47
|
+
batch: list[FlowMessage],
|
|
48
|
+
) -> AsyncIterator[FlowMessage]:
|
|
49
|
+
"""Process a batch of FlowMessages for the Collect step.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
batch: A list of FlowMessages to process.
|
|
53
|
+
|
|
54
|
+
Yields:
|
|
55
|
+
FlowMessages with the results of processing.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
# Note that the batch processor accumulates the messages that we need,
|
|
59
|
+
# so this function isn't called until collection is ready.
|
|
60
|
+
|
|
61
|
+
# outputs[0] and inputs[0] is safe here since semantic validation ensures only one output
|
|
62
|
+
output_name = self.step.outputs[0].id
|
|
63
|
+
input_name = self.step.inputs[0].id
|
|
64
|
+
|
|
65
|
+
if len(batch) == 0:
|
|
66
|
+
# No messages to process -- yield nothing
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
results = []
|
|
70
|
+
for msg in batch:
|
|
71
|
+
results.append(msg.variables[input_name])
|
|
72
|
+
|
|
73
|
+
# Only variables common to all input messages are propagated at the end
|
|
74
|
+
common_ancestors = _find_common_ancestors(batch)
|
|
75
|
+
new_variables = {output_name: results} | common_ancestors
|
|
76
|
+
yield FlowMessage(session=batch[0].session, variables=new_variables)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from typing import AsyncIterator
|
|
2
|
+
|
|
3
|
+
from qtype.dsl.model import ListType
|
|
4
|
+
from qtype.interpreter.base.base_step_executor import StepExecutor
|
|
5
|
+
from qtype.interpreter.base.executor_context import ExecutorContext
|
|
6
|
+
from qtype.interpreter.types import FlowMessage
|
|
7
|
+
from qtype.interpreter.typing import instantiate_variable
|
|
8
|
+
from qtype.semantic.model import Construct
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ConstructExecutor(StepExecutor):
|
|
12
|
+
"""Executor for Construct steps."""
|
|
13
|
+
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
step: Construct,
|
|
17
|
+
context: ExecutorContext,
|
|
18
|
+
**dependencies,
|
|
19
|
+
):
|
|
20
|
+
super().__init__(step, context, **dependencies)
|
|
21
|
+
if not isinstance(step, Construct):
|
|
22
|
+
raise ValueError(
|
|
23
|
+
"ConstructExecutor can only execute Construct steps."
|
|
24
|
+
)
|
|
25
|
+
self.step = step
|
|
26
|
+
|
|
27
|
+
async def process_message(
|
|
28
|
+
self,
|
|
29
|
+
message: FlowMessage,
|
|
30
|
+
) -> AsyncIterator[FlowMessage]:
|
|
31
|
+
"""Process a FlowMessage for the Construct step.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
message: The FlowMessage to process.
|
|
35
|
+
Yields:
|
|
36
|
+
FlowMessages with the results of processing.
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
# Safe since semantic validation ensures exactly one output variable
|
|
40
|
+
output_var = self.step.outputs[0]
|
|
41
|
+
|
|
42
|
+
if (
|
|
43
|
+
isinstance(output_var.type, ListType)
|
|
44
|
+
or len(self.step.inputs) == 1
|
|
45
|
+
):
|
|
46
|
+
inputs = message.variables[self.step.inputs[0].id]
|
|
47
|
+
elif hasattr(output_var.type, "model_validate"):
|
|
48
|
+
# This is a custom type (Pydantic model)
|
|
49
|
+
# So input should be a dict
|
|
50
|
+
input_values = {
|
|
51
|
+
input_var.id: message.variables[input_var.id]
|
|
52
|
+
for input_var in self.step.inputs
|
|
53
|
+
}
|
|
54
|
+
# use the mapping to convert variable names to
|
|
55
|
+
inputs = {
|
|
56
|
+
self.step.field_mapping.get(var_name, var_name): value
|
|
57
|
+
for var_name, value in input_values.items()
|
|
58
|
+
}
|
|
59
|
+
else:
|
|
60
|
+
raise ValueError(
|
|
61
|
+
"Construct step must have either a single input or output of a custom type."
|
|
62
|
+
)
|
|
63
|
+
constructed_value = instantiate_variable(output_var, inputs)
|
|
64
|
+
yield message.copy_with_variables(
|
|
65
|
+
{output_var.id: constructed_value}
|
|
66
|
+
)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
yield message.copy_with_error(self.step.id, e)
|
|
@@ -159,5 +159,4 @@ class DecoderExecutor(StepExecutor):
|
|
|
159
159
|
except Exception as e:
|
|
160
160
|
# Emit error event to stream so frontend can display it
|
|
161
161
|
await self.stream_emitter.error(str(e))
|
|
162
|
-
message.
|
|
163
|
-
yield message
|
|
162
|
+
yield message.copy_with_error(self.step.id, e)
|
|
@@ -74,8 +74,7 @@ class DocToTextConverterExecutor(StepExecutor):
|
|
|
74
74
|
except Exception as e:
|
|
75
75
|
# Emit error event to stream so frontend can display it
|
|
76
76
|
await self.stream_emitter.error(str(e))
|
|
77
|
-
message.
|
|
78
|
-
yield message
|
|
77
|
+
yield message.copy_with_error(self.step.id, e)
|
|
79
78
|
|
|
80
79
|
def _convert_doc(self, doc: RAGDocument) -> RAGDocument:
|
|
81
80
|
"""Convert a RAGDocument to text/markdown format.
|
|
@@ -109,5 +109,4 @@ class DocumentSearchExecutor(StepExecutor):
|
|
|
109
109
|
except Exception as e:
|
|
110
110
|
# Emit error event to stream so frontend can display it
|
|
111
111
|
await self.stream_emitter.error(str(e))
|
|
112
|
-
message.
|
|
113
|
-
yield message
|
|
112
|
+
yield message.copy_with_error(self.step.id, e)
|
|
@@ -114,5 +114,4 @@ class DocumentSourceExecutor(StepExecutor):
|
|
|
114
114
|
except Exception as e:
|
|
115
115
|
# Emit error event to stream so frontend can display it
|
|
116
116
|
await self.stream_emitter.error(str(e))
|
|
117
|
-
message.
|
|
118
|
-
yield message
|
|
117
|
+
yield message.copy_with_error(self.step.id, e)
|
|
@@ -96,10 +96,12 @@ class DocumentSplitterExecutor(StepExecutor):
|
|
|
96
96
|
vector=None, # Embedding will be added later
|
|
97
97
|
metadata=merged_metadata,
|
|
98
98
|
)
|
|
99
|
-
|
|
99
|
+
if (
|
|
100
|
+
chunk.content and chunk.content.strip()
|
|
101
|
+
): # Only emit non-empty chunks
|
|
102
|
+
yield message.copy_with_variables({output_id: chunk})
|
|
100
103
|
|
|
101
104
|
except Exception as e:
|
|
102
105
|
# Emit error event to stream so frontend can display it
|
|
103
106
|
await self.stream_emitter.error(str(e))
|
|
104
|
-
message.
|
|
105
|
-
yield message
|
|
107
|
+
yield message.copy_with_error(self.step.id, e)
|