qtype 0.1.3__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qtype/base/types.py +1 -9
- qtype/base/ui_shapes.py +38 -0
- qtype/dsl/model.py +45 -19
- qtype/dsl/parser.py +3 -3
- qtype/interpreter/base/base_step_executor.py +10 -10
- qtype/interpreter/base/factory.py +6 -0
- qtype/interpreter/base/step_cache.py +3 -3
- qtype/interpreter/conversions.py +32 -7
- qtype/interpreter/executors/agent_executor.py +1 -2
- qtype/interpreter/executors/bedrock_reranker_executor.py +1 -2
- qtype/interpreter/executors/collect_executor.py +76 -0
- qtype/interpreter/executors/construct_executor.py +68 -0
- qtype/interpreter/executors/decoder_executor.py +1 -2
- qtype/interpreter/executors/doc_to_text_executor.py +1 -2
- qtype/interpreter/executors/document_embedder_executor.py +19 -4
- qtype/interpreter/executors/document_search_executor.py +1 -2
- qtype/interpreter/executors/document_source_executor.py +1 -2
- qtype/interpreter/executors/document_splitter_executor.py +1 -2
- qtype/interpreter/executors/echo_executor.py +1 -2
- qtype/interpreter/executors/explode_executor.py +49 -0
- qtype/interpreter/executors/field_extractor_executor.py +1 -2
- qtype/interpreter/executors/file_writer_executor.py +12 -13
- qtype/interpreter/executors/index_upsert_executor.py +3 -4
- qtype/interpreter/executors/invoke_embedding_executor.py +37 -26
- qtype/interpreter/executors/invoke_tool_executor.py +1 -2
- qtype/interpreter/executors/llm_inference_executor.py +1 -2
- qtype/interpreter/executors/prompt_template_executor.py +1 -2
- qtype/interpreter/executors/sql_source_executor.py +1 -2
- qtype/interpreter/executors/vector_search_executor.py +1 -2
- qtype/interpreter/flow.py +6 -4
- qtype/interpreter/types.py +18 -10
- qtype/interpreter/typing.py +53 -4
- qtype/interpreter/ui/404/index.html +1 -1
- qtype/interpreter/ui/404.html +1 -1
- qtype/interpreter/ui/_next/static/{20HoJN6otZ_LyHLHpCPE6 → YR7FgQl_Kpwjcqho0ogG4}/_buildManifest.js +1 -1
- qtype/interpreter/ui/_next/static/chunks/230-e22a519211de9624.js +33 -0
- qtype/interpreter/ui/_next/static/chunks/255-268261a96ffee0a9.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/4bd1b696-c023c6e3521b1417.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/app/_not-found/page-8f0e059659d8afd7.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/app/layout-1615bcffa82ff884.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/app/page-bfbb263eb356c6eb.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/ba12c10f-8d3b3efcaddd4728.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/framework-d7de93249215fb06.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/main-090112344d9a19ce.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/main-app-a5425c8070223ccc.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/pages/{_app-0a0020ddd67f79cf.js → _app-7d307437aca18ad4.js} +1 -1
- qtype/interpreter/ui/_next/static/chunks/pages/{_error-03529f2c21436739.js → _error-cb2a52f75f2162e2.js} +1 -1
- qtype/interpreter/ui/_next/static/chunks/webpack-bd0771cb30dd12fa.js +1 -0
- qtype/interpreter/ui/_next/static/css/7906798f56607e8f.css +3 -0
- qtype/interpreter/ui/assets/video-sample.mp4 +0 -0
- qtype/interpreter/ui/index.html +1 -1
- qtype/interpreter/ui/index.txt +12 -12
- qtype/semantic/checker.py +83 -0
- qtype/semantic/generate.py +0 -1
- qtype/semantic/model.py +25 -18
- {qtype-0.1.3.dist-info → qtype-0.1.9.dist-info}/METADATA +2 -2
- {qtype-0.1.3.dist-info → qtype-0.1.9.dist-info}/RECORD +62 -57
- qtype/interpreter/ui/_next/static/chunks/434-b2112d19f25c44ff.js +0 -36
- qtype/interpreter/ui/_next/static/chunks/4bd1b696-cf72ae8a39fa05aa.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/964-2b041321a01cbf56.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/app/_not-found/page-e110d2a9d0a83d82.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/app/layout-a05273ead5de2c41.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/framework-7c95b8e5103c9e90.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/main-app-6fc6346bc8f7f163.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/main-e26b9cb206da2cac.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/webpack-08642e441b39b6c2.js +0 -1
- qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +0 -3
- /qtype/interpreter/ui/_next/static/{20HoJN6otZ_LyHLHpCPE6 → YR7FgQl_Kpwjcqho0ogG4}/_ssgManifest.js +0 -0
- {qtype-0.1.3.dist-info → qtype-0.1.9.dist-info}/WHEEL +0 -0
- {qtype-0.1.3.dist-info → qtype-0.1.9.dist-info}/entry_points.txt +0 -0
- {qtype-0.1.3.dist-info → qtype-0.1.9.dist-info}/licenses/LICENSE +0 -0
- {qtype-0.1.3.dist-info → qtype-0.1.9.dist-info}/top_level.txt +0 -0
qtype/base/types.py
CHANGED
|
@@ -54,21 +54,13 @@ class PrimitiveTypeEnum(str, Enum):
|
|
|
54
54
|
citation_url = "citation_url"
|
|
55
55
|
date = "date"
|
|
56
56
|
datetime = "datetime"
|
|
57
|
-
int = "int"
|
|
58
57
|
file = "file"
|
|
59
58
|
float = "float"
|
|
60
59
|
image = "image"
|
|
60
|
+
int = "int"
|
|
61
61
|
text = "text"
|
|
62
62
|
time = "time"
|
|
63
63
|
video = "video"
|
|
64
|
-
thinking = "thinking"
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
class StepCardinality(str, Enum):
|
|
68
|
-
"""Does this step emit 1 (one) or 0...N (many) items?"""
|
|
69
|
-
|
|
70
|
-
one = "one"
|
|
71
|
-
many = "many"
|
|
72
64
|
|
|
73
65
|
|
|
74
66
|
ReferenceT = TypeVar("ReferenceT")
|
qtype/base/ui_shapes.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Union
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
from pydantic import ConfigDict as PydanticConfigDict
|
|
6
|
+
from pydantic import Field
|
|
7
|
+
|
|
8
|
+
from qtype.base.types import PrimitiveTypeEnum
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TextWidget(str, Enum):
|
|
12
|
+
text = "text" # Simple text widget input, default
|
|
13
|
+
textarea = "textarea" # A paragraph editor
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TextInputUI(BaseModel):
|
|
17
|
+
model_config = PydanticConfigDict(extra="forbid")
|
|
18
|
+
|
|
19
|
+
widget: TextWidget = Field(
|
|
20
|
+
default=TextWidget.text,
|
|
21
|
+
description="What kind of text ui to present",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class FileUploadUI(BaseModel):
|
|
26
|
+
model_config = PydanticConfigDict(extra="forbid")
|
|
27
|
+
accept: str = Field(
|
|
28
|
+
default="*/*",
|
|
29
|
+
description="The mime type(s) to accept in the file upload.",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
UIType = Union[TextInputUI, FileUploadUI]
|
|
34
|
+
|
|
35
|
+
UI_INPUT_TO_TYPE = {
|
|
36
|
+
(TextInputUI, PrimitiveTypeEnum.text),
|
|
37
|
+
(FileUploadUI, PrimitiveTypeEnum.file),
|
|
38
|
+
}
|
qtype/dsl/model.py
CHANGED
|
@@ -23,9 +23,9 @@ from qtype.base.types import (
|
|
|
23
23
|
ConcurrentStepMixin,
|
|
24
24
|
PrimitiveTypeEnum,
|
|
25
25
|
Reference,
|
|
26
|
-
StepCardinality,
|
|
27
26
|
StrictBaseModel,
|
|
28
27
|
)
|
|
28
|
+
from qtype.base.ui_shapes import UI_INPUT_TO_TYPE, UIType
|
|
29
29
|
from qtype.dsl.domain_types import (
|
|
30
30
|
ChatContent,
|
|
31
31
|
ChatMessage,
|
|
@@ -212,12 +212,24 @@ class Variable(StrictBaseModel):
|
|
|
212
212
|
),
|
|
213
213
|
)
|
|
214
214
|
|
|
215
|
+
ui: UIType | None = Field(None, description="Hints for the UI if needed.")
|
|
216
|
+
|
|
215
217
|
@model_validator(mode="before")
|
|
216
218
|
@classmethod
|
|
217
219
|
def resolve_type(cls, data: Any, info: ValidationInfo) -> Any:
|
|
218
220
|
"""Resolve string-based type references using the shared validator."""
|
|
219
221
|
return _resolve_type_field_validator(data, info)
|
|
220
222
|
|
|
223
|
+
@model_validator(mode="after")
|
|
224
|
+
def validate_ui_type(self) -> Variable:
|
|
225
|
+
"""Ensure at least one credential source is provided."""
|
|
226
|
+
if self.ui is not None:
|
|
227
|
+
if (type(self.ui), self.type) not in UI_INPUT_TO_TYPE:
|
|
228
|
+
raise ValueError(
|
|
229
|
+
f"Variable of {self.type} is not comptabile with UI configuration {self.ui}"
|
|
230
|
+
)
|
|
231
|
+
return self
|
|
232
|
+
|
|
221
233
|
|
|
222
234
|
class SecretReference(StrictBaseModel):
|
|
223
235
|
"""
|
|
@@ -350,10 +362,6 @@ class Step(CachedStepMixin, StrictBaseModel, ABC):
|
|
|
350
362
|
|
|
351
363
|
id: str = Field(..., description="Unique ID of this component.")
|
|
352
364
|
type: str = Field(..., description="Type of the step component.")
|
|
353
|
-
cardinality: StepCardinality = Field(
|
|
354
|
-
default=StepCardinality.one,
|
|
355
|
-
description="Does this step emit 1 (one) or 0...N (many) instances of the outputs?",
|
|
356
|
-
)
|
|
357
365
|
inputs: list[Reference[Variable] | str] = Field(
|
|
358
366
|
default_factory=list,
|
|
359
367
|
description="References to the variables required by this step.",
|
|
@@ -364,6 +372,33 @@ class Step(CachedStepMixin, StrictBaseModel, ABC):
|
|
|
364
372
|
)
|
|
365
373
|
|
|
366
374
|
|
|
375
|
+
class Explode(Step):
|
|
376
|
+
"""A step that takes a list input and produces multiple outputs, one per item in the list."""
|
|
377
|
+
|
|
378
|
+
type: Literal["Explode"] = "Explode"
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
class Collect(Step, BatchableStepMixin):
|
|
382
|
+
"""A step that collects all inputs and creates a single list to return."""
|
|
383
|
+
|
|
384
|
+
type: Literal["Collect"] = "Collect"
|
|
385
|
+
|
|
386
|
+
batch_config: BatchConfig = Field(
|
|
387
|
+
default_factory=partial(BatchConfig, batch_size=sys.maxsize),
|
|
388
|
+
description="Configuration for processing the input stream in batches. If omitted, the step processes items one by one.",
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
class Construct(Step):
|
|
393
|
+
"""A step that converts variables into an instance of a Custom or Domain Type"""
|
|
394
|
+
|
|
395
|
+
type: Literal["Construct"] = "Construct"
|
|
396
|
+
field_mapping: dict[str, str] = Field(
|
|
397
|
+
...,
|
|
398
|
+
description="Mapping of type inputs to variable names, if needed.",
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
|
|
367
402
|
class PromptTemplate(Step):
|
|
368
403
|
"""Defines a prompt template with a string format and variable bindings.
|
|
369
404
|
This is used to generate prompts dynamically based on input variables."""
|
|
@@ -765,8 +800,9 @@ class AWSAuthProvider(AuthorizationProvider):
|
|
|
765
800
|
has_keys = self.access_key_id and self.secret_access_key
|
|
766
801
|
has_profile = self.profile_name
|
|
767
802
|
has_role = self.role_arn
|
|
803
|
+
has_region = self.region
|
|
768
804
|
|
|
769
|
-
if not (has_keys or has_profile or has_role):
|
|
805
|
+
if not (has_keys or has_profile or has_role or has_region):
|
|
770
806
|
raise ValueError(
|
|
771
807
|
"AWSAuthProvider must specify at least one authentication method: "
|
|
772
808
|
"access keys, profile name, or role ARN."
|
|
@@ -891,10 +927,6 @@ class Source(Step):
|
|
|
891
927
|
"""Base class for data sources"""
|
|
892
928
|
|
|
893
929
|
id: str = Field(..., description="Unique ID of the data source.")
|
|
894
|
-
cardinality: Literal[StepCardinality.many] = Field(
|
|
895
|
-
default=StepCardinality.many,
|
|
896
|
-
description="Sources always emit 0...N instances of the outputs.",
|
|
897
|
-
)
|
|
898
930
|
|
|
899
931
|
|
|
900
932
|
class SQLSource(Source):
|
|
@@ -951,7 +983,6 @@ class Aggregate(Step):
|
|
|
951
983
|
"""
|
|
952
984
|
|
|
953
985
|
type: Literal["Aggregate"] = "Aggregate"
|
|
954
|
-
cardinality: Literal[StepCardinality.one] = StepCardinality.one
|
|
955
986
|
|
|
956
987
|
# Outputs are now optional. The user can provide 0, 1, 2, or 3 names.
|
|
957
988
|
# The order will be: success_count, error_count, total_count
|
|
@@ -1005,10 +1036,6 @@ class DocumentSplitter(Step, ConcurrentStepMixin):
|
|
|
1005
1036
|
"""Configuration for chunking/splitting documents into embeddable nodes/chunks."""
|
|
1006
1037
|
|
|
1007
1038
|
type: Literal["DocumentSplitter"] = "DocumentSplitter"
|
|
1008
|
-
cardinality: Literal[StepCardinality.many] = Field(
|
|
1009
|
-
default=StepCardinality.many,
|
|
1010
|
-
description="Consumes one document and emits 0...N nodes/chunks.",
|
|
1011
|
-
)
|
|
1012
1039
|
|
|
1013
1040
|
splitter_name: str = Field(
|
|
1014
1041
|
default="SentenceSplitter",
|
|
@@ -1028,10 +1055,6 @@ class DocumentEmbedder(Step, ConcurrentStepMixin):
|
|
|
1028
1055
|
"""Embeds document chunks using a specified embedding model."""
|
|
1029
1056
|
|
|
1030
1057
|
type: Literal["DocumentEmbedder"] = "DocumentEmbedder"
|
|
1031
|
-
cardinality: Literal[StepCardinality.many] = Field(
|
|
1032
|
-
default=StepCardinality.many,
|
|
1033
|
-
description="Consumes one chunk and emits one embedded chunk.",
|
|
1034
|
-
)
|
|
1035
1058
|
model: Reference[EmbeddingModel] | str = Field(
|
|
1036
1059
|
..., description="Embedding model to use for vectorization."
|
|
1037
1060
|
)
|
|
@@ -1199,6 +1222,8 @@ StepType = Annotated[
|
|
|
1199
1222
|
Agent,
|
|
1200
1223
|
Aggregate,
|
|
1201
1224
|
BedrockReranker,
|
|
1225
|
+
Collect,
|
|
1226
|
+
Construct,
|
|
1202
1227
|
Decoder,
|
|
1203
1228
|
DocToTextConverter,
|
|
1204
1229
|
DocumentEmbedder,
|
|
@@ -1206,6 +1231,7 @@ StepType = Annotated[
|
|
|
1206
1231
|
DocumentSplitter,
|
|
1207
1232
|
DocumentSource,
|
|
1208
1233
|
Echo,
|
|
1234
|
+
Explode,
|
|
1209
1235
|
FieldExtractor,
|
|
1210
1236
|
FileSource,
|
|
1211
1237
|
FileWriter,
|
qtype/dsl/parser.py
CHANGED
|
@@ -153,12 +153,12 @@ def _format_validation_errors(
|
|
|
153
153
|
error_msg = "Validation failed (see details above)"
|
|
154
154
|
else:
|
|
155
155
|
error_msg = "Validation failed:\n"
|
|
156
|
-
for error in relevant_errors[:
|
|
156
|
+
for error in relevant_errors[:30]: # Show max 5 errors
|
|
157
157
|
loc_path = _simplify_field_path(error["loc"])
|
|
158
158
|
error_msg += f" {loc_path}: {error['msg']}\n"
|
|
159
159
|
|
|
160
|
-
if len(relevant_errors) >
|
|
161
|
-
error_msg += f" ... and {len(relevant_errors) -
|
|
160
|
+
if len(relevant_errors) > 30:
|
|
161
|
+
error_msg += f" ... and {len(relevant_errors) - 30} more errors\n"
|
|
162
162
|
|
|
163
163
|
if source_name:
|
|
164
164
|
error_msg = f"In {source_name}:\n{error_msg}"
|
|
@@ -230,21 +230,12 @@ class StepExecutor(ABC):
|
|
|
230
230
|
prepared_messages, process_item, task_limit=num_workers
|
|
231
231
|
)
|
|
232
232
|
|
|
233
|
-
# Combine all streams
|
|
234
|
-
async def emit_failed_messages() -> AsyncIterator[FlowMessage]:
|
|
235
|
-
for msg in failed_messages:
|
|
236
|
-
yield msg
|
|
237
|
-
|
|
238
|
-
all_results = stream.concat(
|
|
239
|
-
stream.iterate([result_stream, emit_failed_messages()])
|
|
240
|
-
)
|
|
241
|
-
|
|
242
233
|
# Track message counts for telemetry
|
|
243
234
|
message_count = 0
|
|
244
235
|
error_count = 0
|
|
245
236
|
|
|
246
237
|
# Stream results and track progress
|
|
247
|
-
async with
|
|
238
|
+
async with result_stream.stream() as streamer:
|
|
248
239
|
result: FlowMessage
|
|
249
240
|
async for result in streamer:
|
|
250
241
|
message_count += 1
|
|
@@ -255,6 +246,15 @@ class StepExecutor(ABC):
|
|
|
255
246
|
)
|
|
256
247
|
yield result
|
|
257
248
|
|
|
249
|
+
# Emit failed messages after processing completes
|
|
250
|
+
for msg in failed_messages:
|
|
251
|
+
message_count += 1
|
|
252
|
+
error_count += 1
|
|
253
|
+
self.progress.update_for_message(
|
|
254
|
+
msg, self.context.on_progress
|
|
255
|
+
)
|
|
256
|
+
yield msg
|
|
257
|
+
|
|
258
258
|
# Finalize and track those messages too
|
|
259
259
|
async for msg in self.finalize():
|
|
260
260
|
message_count += 1
|
|
@@ -2,6 +2,8 @@ from qtype.semantic.model import (
|
|
|
2
2
|
Agent,
|
|
3
3
|
Aggregate,
|
|
4
4
|
BedrockReranker,
|
|
5
|
+
Collect,
|
|
6
|
+
Construct,
|
|
5
7
|
Decoder,
|
|
6
8
|
DocToTextConverter,
|
|
7
9
|
DocumentEmbedder,
|
|
@@ -9,6 +11,7 @@ from qtype.semantic.model import (
|
|
|
9
11
|
DocumentSource,
|
|
10
12
|
DocumentSplitter,
|
|
11
13
|
Echo,
|
|
14
|
+
Explode,
|
|
12
15
|
FieldExtractor,
|
|
13
16
|
FileSource,
|
|
14
17
|
FileWriter,
|
|
@@ -32,6 +35,8 @@ EXECUTOR_REGISTRY = {
|
|
|
32
35
|
Agent: "qtype.interpreter.executors.agent_executor.AgentExecutor",
|
|
33
36
|
Aggregate: "qtype.interpreter.executors.aggregate_executor.AggregateExecutor",
|
|
34
37
|
BedrockReranker: "qtype.interpreter.executors.bedrock_reranker_executor.BedrockRerankerExecutor",
|
|
38
|
+
Collect: "qtype.interpreter.executors.collect_executor.CollectExecutor",
|
|
39
|
+
Construct: "qtype.interpreter.executors.construct_executor.ConstructExecutor",
|
|
35
40
|
Decoder: "qtype.interpreter.executors.decoder_executor.DecoderExecutor",
|
|
36
41
|
DocToTextConverter: "qtype.interpreter.executors.doc_to_text_executor.DocToTextConverterExecutor",
|
|
37
42
|
DocumentEmbedder: "qtype.interpreter.executors.document_embedder_executor.DocumentEmbedderExecutor",
|
|
@@ -39,6 +44,7 @@ EXECUTOR_REGISTRY = {
|
|
|
39
44
|
DocumentSource: "qtype.interpreter.executors.document_source_executor.DocumentSourceExecutor",
|
|
40
45
|
DocumentSplitter: "qtype.interpreter.executors.document_splitter_executor.DocumentSplitterExecutor",
|
|
41
46
|
Echo: "qtype.interpreter.executors.echo_executor.EchoExecutor",
|
|
47
|
+
Explode: "qtype.interpreter.executors.explode_executor.ExplodeExecutor",
|
|
42
48
|
FieldExtractor: "qtype.interpreter.executors.field_extractor_executor.FieldExtractorExecutor",
|
|
43
49
|
FileSource: "qtype.interpreter.executors.file_source_executor.FileSourceExecutor",
|
|
44
50
|
FileWriter: "qtype.interpreter.executors.file_writer_executor.FileWriterExecutor",
|
|
@@ -67,8 +67,8 @@ def from_cache_value(
|
|
|
67
67
|
) -> FlowMessage:
|
|
68
68
|
"""Reconstructs a FlowMessage from cached output values."""
|
|
69
69
|
if "FlowMessage.__error__" in cache_value:
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
70
|
+
return message.model_copy(
|
|
71
|
+
deep=True, update={"error": cache_value["FlowMessage.__error__"]}
|
|
72
|
+
)
|
|
73
73
|
else:
|
|
74
74
|
return message.copy_with_variables(cache_value)
|
qtype/interpreter/conversions.py
CHANGED
|
@@ -18,7 +18,8 @@ from llama_index.core.base.llms.types import (
|
|
|
18
18
|
from llama_index.core.memory import Memory as LlamaMemory
|
|
19
19
|
from llama_index.core.schema import Document as LlamaDocument
|
|
20
20
|
from llama_index.core.vector_stores.types import BasePydanticVectorStore
|
|
21
|
-
from opensearchpy import
|
|
21
|
+
from opensearchpy import AsyncHttpConnection, AsyncOpenSearch
|
|
22
|
+
from opensearchpy.helpers.asyncsigner import AWSV4SignerAsyncAuth
|
|
22
23
|
|
|
23
24
|
from qtype.base.types import PrimitiveTypeEnum
|
|
24
25
|
from qtype.dsl.domain_types import (
|
|
@@ -34,6 +35,7 @@ from qtype.interpreter.base.secrets import SecretManagerBase
|
|
|
34
35
|
from qtype.interpreter.types import InterpreterError
|
|
35
36
|
from qtype.semantic.model import (
|
|
36
37
|
APIKeyAuthProvider,
|
|
38
|
+
AWSAuthProvider,
|
|
37
39
|
DocumentIndex,
|
|
38
40
|
DocumentSplitter,
|
|
39
41
|
Model,
|
|
@@ -302,7 +304,9 @@ def to_vector_store(
|
|
|
302
304
|
|
|
303
305
|
|
|
304
306
|
@cached_resource
|
|
305
|
-
def to_embedding_model(
|
|
307
|
+
def to_embedding_model(
|
|
308
|
+
model: Model, secret_manager: SecretManagerBase
|
|
309
|
+
) -> BaseEmbedding:
|
|
306
310
|
"""Convert a qtype Model to a LlamaIndex embedding model."""
|
|
307
311
|
|
|
308
312
|
if model.provider == "aws-bedrock":
|
|
@@ -310,7 +314,14 @@ def to_embedding_model(model: Model) -> BaseEmbedding:
|
|
|
310
314
|
BedrockEmbedding,
|
|
311
315
|
)
|
|
312
316
|
|
|
317
|
+
session = None
|
|
318
|
+
if model.auth is not None:
|
|
319
|
+
assert isinstance(model.auth, AWSAuthProvider)
|
|
320
|
+
with aws(model.auth, secret_manager) as session:
|
|
321
|
+
session = session._session
|
|
322
|
+
|
|
313
323
|
bedrock_embedding: BaseEmbedding = BedrockEmbedding(
|
|
324
|
+
botocore_session=session,
|
|
314
325
|
model_name=model.model_id if model.model_id else model.id,
|
|
315
326
|
max_retries=100,
|
|
316
327
|
)
|
|
@@ -320,8 +331,20 @@ def to_embedding_model(model: Model) -> BaseEmbedding:
|
|
|
320
331
|
OpenAIEmbedding,
|
|
321
332
|
)
|
|
322
333
|
|
|
334
|
+
api_key = None
|
|
335
|
+
if model.auth:
|
|
336
|
+
with auth(model.auth, secret_manager) as provider:
|
|
337
|
+
if not isinstance(provider, APIKeyAuthProvider):
|
|
338
|
+
raise InterpreterError(
|
|
339
|
+
f"OpenAI provider requires APIKeyAuthProvider, "
|
|
340
|
+
f"got {type(provider).__name__}"
|
|
341
|
+
)
|
|
342
|
+
# api_key is guaranteed to be str after auth() resolves it
|
|
343
|
+
api_key = provider.api_key # type: ignore[assignment]
|
|
344
|
+
|
|
323
345
|
openai_embedding: BaseEmbedding = OpenAIEmbedding(
|
|
324
|
-
|
|
346
|
+
api_key=api_key,
|
|
347
|
+
model_name=model.model_id if model.model_id else model.id,
|
|
325
348
|
)
|
|
326
349
|
return openai_embedding
|
|
327
350
|
else:
|
|
@@ -347,7 +370,7 @@ def to_opensearch_client(
|
|
|
347
370
|
InterpreterError: If authentication fails or configuration is invalid
|
|
348
371
|
"""
|
|
349
372
|
client_kwargs: dict[str, Any] = {
|
|
350
|
-
"hosts":
|
|
373
|
+
"hosts": index.endpoint,
|
|
351
374
|
**index.args,
|
|
352
375
|
}
|
|
353
376
|
|
|
@@ -368,15 +391,17 @@ def to_opensearch_client(
|
|
|
368
391
|
f"Failed to obtain AWS credentials for DocumentIndex '{index.id}'"
|
|
369
392
|
)
|
|
370
393
|
|
|
371
|
-
# Use opensearch-py's
|
|
372
|
-
aws_auth =
|
|
394
|
+
# Use opensearch-py's async AWS auth
|
|
395
|
+
aws_auth = AWSV4SignerAsyncAuth(
|
|
373
396
|
credentials,
|
|
374
397
|
auth_session.region_name or "us-east-1", # type: ignore
|
|
398
|
+
"aoss", # service name for OpenSearch Serverless
|
|
375
399
|
)
|
|
376
400
|
|
|
377
401
|
client_kwargs["http_auth"] = aws_auth
|
|
378
402
|
client_kwargs["use_ssl"] = True
|
|
379
403
|
client_kwargs["verify_certs"] = True
|
|
404
|
+
client_kwargs["connection_class"] = AsyncHttpConnection
|
|
380
405
|
else:
|
|
381
406
|
raise InterpreterError(
|
|
382
407
|
f"Unsupported authentication type for DocumentIndex: {type(index.auth)}"
|
|
@@ -566,7 +591,7 @@ def to_llama_vector_store_and_retriever(
|
|
|
566
591
|
vector_store = to_vector_store(index, secret_manager)
|
|
567
592
|
|
|
568
593
|
# Get the embedding model
|
|
569
|
-
embedding_model = to_embedding_model(index.embedding_model)
|
|
594
|
+
embedding_model = to_embedding_model(index.embedding_model, secret_manager)
|
|
570
595
|
|
|
571
596
|
# Create a VectorStoreIndex with the vector store and embedding model
|
|
572
597
|
vector_index = VectorStoreIndex.from_vector_store(
|
|
@@ -93,8 +93,7 @@ class AgentExecutor(StepExecutor, ToolExecutionMixin, FunctionToolHelper):
|
|
|
93
93
|
logger.error(f"Agent execution failed: {e}", exc_info=True)
|
|
94
94
|
# Emit error event to stream so frontend can display it
|
|
95
95
|
await self.stream_emitter.error(str(e))
|
|
96
|
-
message.
|
|
97
|
-
yield message
|
|
96
|
+
yield message.copy_with_error(self.step.id, e)
|
|
98
97
|
|
|
99
98
|
async def _process_chat(
|
|
100
99
|
self,
|
|
@@ -160,8 +160,7 @@ class BedrockRerankerExecutor(StepExecutor):
|
|
|
160
160
|
logger.error(f"Reranking failed: {e}", exc_info=True)
|
|
161
161
|
# Emit error event to stream so frontend can display it
|
|
162
162
|
await self.stream_emitter.error(str(e))
|
|
163
|
-
message.
|
|
164
|
-
yield message
|
|
163
|
+
yield message.copy_with_error(self.step.id, e)
|
|
165
164
|
|
|
166
165
|
def _query(self, message: FlowMessage) -> str:
|
|
167
166
|
"""Extract the query string from the FlowMessage.
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from typing import Any, AsyncIterator
|
|
2
|
+
|
|
3
|
+
from qtype.interpreter.base.batch_step_executor import BatchedStepExecutor
|
|
4
|
+
from qtype.interpreter.base.executor_context import ExecutorContext
|
|
5
|
+
from qtype.interpreter.types import FlowMessage
|
|
6
|
+
from qtype.semantic.model import Collect
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _find_common_ancestors(messages: list[FlowMessage]) -> dict[str, Any]:
|
|
10
|
+
if not messages:
|
|
11
|
+
return {}
|
|
12
|
+
|
|
13
|
+
# 1. Start with all variables from the first message
|
|
14
|
+
common_vars = messages[0].variables.copy()
|
|
15
|
+
|
|
16
|
+
for msg in messages[1:]:
|
|
17
|
+
# 2. Identify keys that either don't exist in this message
|
|
18
|
+
# OR have a different value (diverged)
|
|
19
|
+
diverged_keys = [
|
|
20
|
+
k
|
|
21
|
+
for k, v in common_vars.items()
|
|
22
|
+
if k not in msg.variables or msg.variables[k] != v
|
|
23
|
+
]
|
|
24
|
+
# 3. Remove diverged keys to leave only the "Common Ancestors"
|
|
25
|
+
for k in diverged_keys:
|
|
26
|
+
common_vars.pop(k)
|
|
27
|
+
|
|
28
|
+
return common_vars
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CollectExecutor(BatchedStepExecutor):
|
|
32
|
+
"""Executor for Collect steps."""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
step: Collect,
|
|
37
|
+
context: ExecutorContext,
|
|
38
|
+
**dependencies,
|
|
39
|
+
):
|
|
40
|
+
super().__init__(step, context, **dependencies)
|
|
41
|
+
if not isinstance(step, Collect):
|
|
42
|
+
raise ValueError("CollectExecutor can only execute Collect steps.")
|
|
43
|
+
self.step = step
|
|
44
|
+
|
|
45
|
+
async def process_batch(
|
|
46
|
+
self,
|
|
47
|
+
batch: list[FlowMessage],
|
|
48
|
+
) -> AsyncIterator[FlowMessage]:
|
|
49
|
+
"""Process a batch of FlowMessages for the Collect step.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
batch: A list of FlowMessages to process.
|
|
53
|
+
|
|
54
|
+
Yields:
|
|
55
|
+
FlowMessages with the results of processing.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
# Note that the batch processor accumulates the messages that we need,
|
|
59
|
+
# so this function isn't called until collection is ready.
|
|
60
|
+
|
|
61
|
+
# outputs[0] and inputs[0] is safe here since semantic validation ensures only one output
|
|
62
|
+
output_name = self.step.outputs[0].id
|
|
63
|
+
input_name = self.step.inputs[0].id
|
|
64
|
+
|
|
65
|
+
if len(batch) == 0:
|
|
66
|
+
# No messages to process -- yield nothing
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
results = []
|
|
70
|
+
for msg in batch:
|
|
71
|
+
results.append(msg.variables[input_name])
|
|
72
|
+
|
|
73
|
+
# Only variables common to all input messages are propagated at the end
|
|
74
|
+
common_ancestors = _find_common_ancestors(batch)
|
|
75
|
+
new_variables = {output_name: results} | common_ancestors
|
|
76
|
+
yield FlowMessage(session=batch[0].session, variables=new_variables)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from typing import AsyncIterator
|
|
2
|
+
|
|
3
|
+
from qtype.dsl.model import ListType
|
|
4
|
+
from qtype.interpreter.base.base_step_executor import StepExecutor
|
|
5
|
+
from qtype.interpreter.base.executor_context import ExecutorContext
|
|
6
|
+
from qtype.interpreter.types import FlowMessage
|
|
7
|
+
from qtype.interpreter.typing import instantiate_variable
|
|
8
|
+
from qtype.semantic.model import Construct
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ConstructExecutor(StepExecutor):
|
|
12
|
+
"""Executor for Construct steps."""
|
|
13
|
+
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
step: Construct,
|
|
17
|
+
context: ExecutorContext,
|
|
18
|
+
**dependencies,
|
|
19
|
+
):
|
|
20
|
+
super().__init__(step, context, **dependencies)
|
|
21
|
+
if not isinstance(step, Construct):
|
|
22
|
+
raise ValueError(
|
|
23
|
+
"ConstructExecutor can only execute Construct steps."
|
|
24
|
+
)
|
|
25
|
+
self.step = step
|
|
26
|
+
|
|
27
|
+
async def process_message(
|
|
28
|
+
self,
|
|
29
|
+
message: FlowMessage,
|
|
30
|
+
) -> AsyncIterator[FlowMessage]:
|
|
31
|
+
"""Process a FlowMessage for the Construct step.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
message: The FlowMessage to process.
|
|
35
|
+
Yields:
|
|
36
|
+
FlowMessages with the results of processing.
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
# Safe since semantic validation ensures exactly one output variable
|
|
40
|
+
output_var = self.step.outputs[0]
|
|
41
|
+
|
|
42
|
+
if (
|
|
43
|
+
isinstance(output_var.type, ListType)
|
|
44
|
+
or len(self.step.inputs) == 1
|
|
45
|
+
):
|
|
46
|
+
inputs = message.variables[self.step.inputs[0].id]
|
|
47
|
+
elif hasattr(output_var.type, "model_validate"):
|
|
48
|
+
# This is a custom type (Pydantic model)
|
|
49
|
+
# So input should be a dict
|
|
50
|
+
input_values = {
|
|
51
|
+
input_var.id: message.variables[input_var.id]
|
|
52
|
+
for input_var in self.step.inputs
|
|
53
|
+
}
|
|
54
|
+
# use the mapping to convert variable names to
|
|
55
|
+
inputs = {
|
|
56
|
+
self.step.field_mapping.get(var_name, var_name): value
|
|
57
|
+
for var_name, value in input_values.items()
|
|
58
|
+
}
|
|
59
|
+
else:
|
|
60
|
+
raise ValueError(
|
|
61
|
+
"Construct step must have either a single input or output of a custom type."
|
|
62
|
+
)
|
|
63
|
+
constructed_value = instantiate_variable(output_var, inputs)
|
|
64
|
+
yield message.copy_with_variables(
|
|
65
|
+
{output_var.id: constructed_value}
|
|
66
|
+
)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
yield message.copy_with_error(self.step.id, e)
|
|
@@ -159,5 +159,4 @@ class DecoderExecutor(StepExecutor):
|
|
|
159
159
|
except Exception as e:
|
|
160
160
|
# Emit error event to stream so frontend can display it
|
|
161
161
|
await self.stream_emitter.error(str(e))
|
|
162
|
-
message.
|
|
163
|
-
yield message
|
|
162
|
+
yield message.copy_with_error(self.step.id, e)
|
|
@@ -74,8 +74,7 @@ class DocToTextConverterExecutor(StepExecutor):
|
|
|
74
74
|
except Exception as e:
|
|
75
75
|
# Emit error event to stream so frontend can display it
|
|
76
76
|
await self.stream_emitter.error(str(e))
|
|
77
|
-
message.
|
|
78
|
-
yield message
|
|
77
|
+
yield message.copy_with_error(self.step.id, e)
|
|
79
78
|
|
|
80
79
|
def _convert_doc(self, doc: RAGDocument) -> RAGDocument:
|
|
81
80
|
"""Convert a RAGDocument to text/markdown format.
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
1
3
|
from typing import AsyncIterator
|
|
2
4
|
|
|
3
5
|
from botocore.exceptions import ClientError
|
|
@@ -41,7 +43,7 @@ class DocumentEmbedderExecutor(StepExecutor):
|
|
|
41
43
|
self.step: DocumentEmbedder = step
|
|
42
44
|
# Initialize the embedding model once for the executor
|
|
43
45
|
self.embedding_model: BaseEmbedding = to_embedding_model(
|
|
44
|
-
self.step.model
|
|
46
|
+
self.step.model, context.secret_manager
|
|
45
47
|
)
|
|
46
48
|
|
|
47
49
|
# TODO: properly abstract this into a mixin
|
|
@@ -58,7 +60,17 @@ class DocumentEmbedderExecutor(StepExecutor):
|
|
|
58
60
|
Returns:
|
|
59
61
|
The embedding vector as a list of floats.
|
|
60
62
|
"""
|
|
61
|
-
|
|
63
|
+
|
|
64
|
+
# TODO: switch back to async once aws auth supports it.
|
|
65
|
+
# https://github.com/bazaarvoice/qtype/issues/108
|
|
66
|
+
def _call():
|
|
67
|
+
return self.embedding_model.get_text_embedding(text=text)
|
|
68
|
+
|
|
69
|
+
loop = asyncio.get_running_loop()
|
|
70
|
+
response = await loop.run_in_executor(self.context.thread_pool, _call)
|
|
71
|
+
|
|
72
|
+
return response
|
|
73
|
+
# return await self.embedding_model.aget_text_embedding(text=text)
|
|
62
74
|
|
|
63
75
|
async def process_message(
|
|
64
76
|
self,
|
|
@@ -103,5 +115,8 @@ class DocumentEmbedderExecutor(StepExecutor):
|
|
|
103
115
|
except Exception as e:
|
|
104
116
|
# Emit error event to stream so frontend can display it
|
|
105
117
|
await self.stream_emitter.error(str(e))
|
|
106
|
-
|
|
107
|
-
|
|
118
|
+
logging.error(
|
|
119
|
+
f"Error processing DocumentEmbedder step {self.step.id}",
|
|
120
|
+
exc_info=e,
|
|
121
|
+
)
|
|
122
|
+
yield message.copy_with_error(self.step.id, e)
|
|
@@ -109,5 +109,4 @@ class DocumentSearchExecutor(StepExecutor):
|
|
|
109
109
|
except Exception as e:
|
|
110
110
|
# Emit error event to stream so frontend can display it
|
|
111
111
|
await self.stream_emitter.error(str(e))
|
|
112
|
-
message.
|
|
113
|
-
yield message
|
|
112
|
+
yield message.copy_with_error(self.step.id, e)
|
|
@@ -114,5 +114,4 @@ class DocumentSourceExecutor(StepExecutor):
|
|
|
114
114
|
except Exception as e:
|
|
115
115
|
# Emit error event to stream so frontend can display it
|
|
116
116
|
await self.stream_emitter.error(str(e))
|
|
117
|
-
message.
|
|
118
|
-
yield message
|
|
117
|
+
yield message.copy_with_error(self.step.id, e)
|
|
@@ -101,5 +101,4 @@ class DocumentSplitterExecutor(StepExecutor):
|
|
|
101
101
|
except Exception as e:
|
|
102
102
|
# Emit error event to stream so frontend can display it
|
|
103
103
|
await self.stream_emitter.error(str(e))
|
|
104
|
-
message.
|
|
105
|
-
yield message
|
|
104
|
+
yield message.copy_with_error(self.step.id, e)
|