qtype 0.0.16__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. qtype/application/commons/tools.py +1 -1
  2. qtype/application/converters/tools_from_api.py +5 -5
  3. qtype/application/converters/tools_from_module.py +2 -2
  4. qtype/application/converters/types.py +14 -43
  5. qtype/application/documentation.py +1 -1
  6. qtype/application/facade.py +94 -73
  7. qtype/base/types.py +227 -7
  8. qtype/cli.py +4 -0
  9. qtype/commands/convert.py +20 -8
  10. qtype/commands/generate.py +19 -27
  11. qtype/commands/run.py +73 -36
  12. qtype/commands/serve.py +74 -54
  13. qtype/commands/validate.py +34 -8
  14. qtype/commands/visualize.py +46 -22
  15. qtype/dsl/__init__.py +6 -5
  16. qtype/dsl/custom_types.py +1 -1
  17. qtype/dsl/domain_types.py +65 -5
  18. qtype/dsl/linker.py +384 -0
  19. qtype/dsl/loader.py +315 -0
  20. qtype/dsl/model.py +612 -363
  21. qtype/dsl/parser.py +200 -0
  22. qtype/dsl/types.py +50 -0
  23. qtype/interpreter/api.py +57 -136
  24. qtype/interpreter/auth/aws.py +19 -9
  25. qtype/interpreter/auth/generic.py +93 -16
  26. qtype/interpreter/base/base_step_executor.py +436 -0
  27. qtype/interpreter/base/batch_step_executor.py +171 -0
  28. qtype/interpreter/base/exceptions.py +50 -0
  29. qtype/interpreter/base/executor_context.py +74 -0
  30. qtype/interpreter/base/factory.py +117 -0
  31. qtype/interpreter/base/progress_tracker.py +110 -0
  32. qtype/interpreter/base/secrets.py +339 -0
  33. qtype/interpreter/base/step_cache.py +74 -0
  34. qtype/interpreter/base/stream_emitter.py +469 -0
  35. qtype/interpreter/conversions.py +462 -22
  36. qtype/interpreter/converters.py +77 -0
  37. qtype/interpreter/endpoints.py +355 -0
  38. qtype/interpreter/executors/agent_executor.py +242 -0
  39. qtype/interpreter/executors/aggregate_executor.py +93 -0
  40. qtype/interpreter/executors/decoder_executor.py +163 -0
  41. qtype/interpreter/executors/doc_to_text_executor.py +112 -0
  42. qtype/interpreter/executors/document_embedder_executor.py +107 -0
  43. qtype/interpreter/executors/document_search_executor.py +122 -0
  44. qtype/interpreter/executors/document_source_executor.py +118 -0
  45. qtype/interpreter/executors/document_splitter_executor.py +105 -0
  46. qtype/interpreter/executors/echo_executor.py +63 -0
  47. qtype/interpreter/executors/field_extractor_executor.py +160 -0
  48. qtype/interpreter/executors/file_source_executor.py +101 -0
  49. qtype/interpreter/executors/file_writer_executor.py +110 -0
  50. qtype/interpreter/executors/index_upsert_executor.py +228 -0
  51. qtype/interpreter/executors/invoke_embedding_executor.py +92 -0
  52. qtype/interpreter/executors/invoke_flow_executor.py +51 -0
  53. qtype/interpreter/executors/invoke_tool_executor.py +358 -0
  54. qtype/interpreter/executors/llm_inference_executor.py +272 -0
  55. qtype/interpreter/executors/prompt_template_executor.py +78 -0
  56. qtype/interpreter/executors/sql_source_executor.py +106 -0
  57. qtype/interpreter/executors/vector_search_executor.py +91 -0
  58. qtype/interpreter/flow.py +159 -22
  59. qtype/interpreter/metadata_api.py +115 -0
  60. qtype/interpreter/resource_cache.py +5 -4
  61. qtype/interpreter/rich_progress.py +225 -0
  62. qtype/interpreter/stream/chat/__init__.py +15 -0
  63. qtype/interpreter/stream/chat/converter.py +391 -0
  64. qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
  65. qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
  66. qtype/interpreter/stream/chat/vercel.py +609 -0
  67. qtype/interpreter/stream/utils/__init__.py +15 -0
  68. qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
  69. qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
  70. qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
  71. qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
  72. qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
  73. qtype/interpreter/telemetry.py +135 -8
  74. qtype/interpreter/tools/__init__.py +5 -0
  75. qtype/interpreter/tools/function_tool_helper.py +265 -0
  76. qtype/interpreter/types.py +330 -0
  77. qtype/interpreter/typing.py +83 -89
  78. qtype/interpreter/ui/404/index.html +1 -1
  79. qtype/interpreter/ui/404.html +1 -1
  80. qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
  81. qtype/interpreter/ui/_next/static/chunks/{393-8fd474427f8e19ce.js → 434-b2112d19f25c44ff.js} +3 -3
  82. qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
  83. qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
  84. qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
  85. qtype/interpreter/ui/icon.png +0 -0
  86. qtype/interpreter/ui/index.html +1 -1
  87. qtype/interpreter/ui/index.txt +4 -4
  88. qtype/semantic/checker.py +583 -0
  89. qtype/semantic/generate.py +262 -83
  90. qtype/semantic/loader.py +95 -0
  91. qtype/semantic/model.py +436 -159
  92. qtype/semantic/resolver.py +63 -19
  93. qtype/semantic/visualize.py +28 -31
  94. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/METADATA +16 -3
  95. qtype-0.1.1.dist-info/RECORD +135 -0
  96. qtype/dsl/base_types.py +0 -38
  97. qtype/dsl/validator.py +0 -465
  98. qtype/interpreter/batch/__init__.py +0 -0
  99. qtype/interpreter/batch/file_sink_source.py +0 -162
  100. qtype/interpreter/batch/flow.py +0 -95
  101. qtype/interpreter/batch/sql_source.py +0 -92
  102. qtype/interpreter/batch/step.py +0 -74
  103. qtype/interpreter/batch/types.py +0 -41
  104. qtype/interpreter/batch/utils.py +0 -178
  105. qtype/interpreter/chat/chat_api.py +0 -237
  106. qtype/interpreter/chat/vercel.py +0 -314
  107. qtype/interpreter/exceptions.py +0 -10
  108. qtype/interpreter/step.py +0 -67
  109. qtype/interpreter/steps/__init__.py +0 -0
  110. qtype/interpreter/steps/agent.py +0 -114
  111. qtype/interpreter/steps/condition.py +0 -36
  112. qtype/interpreter/steps/decoder.py +0 -88
  113. qtype/interpreter/steps/llm_inference.py +0 -171
  114. qtype/interpreter/steps/prompt_template.py +0 -54
  115. qtype/interpreter/steps/search.py +0 -24
  116. qtype/interpreter/steps/tool.py +0 -219
  117. qtype/interpreter/streaming_helpers.py +0 -123
  118. qtype/interpreter/ui/_next/static/chunks/app/page-7e26b6156cfb55d3.js +0 -1
  119. qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
  120. qtype/interpreter/ui/_next/static/css/b40532b0db09cce3.css +0 -3
  121. qtype/interpreter/ui/favicon.ico +0 -0
  122. qtype/loader.py +0 -390
  123. qtype-0.0.16.dist-info/RECORD +0 -106
  124. /qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
  125. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/WHEEL +0 -0
  126. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/entry_points.txt +0 -0
  127. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/licenses/LICENSE +0 -0
  128. {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,163 @@
1
+ import json
2
+ import xml.etree.ElementTree as ET
3
+ from typing import Any, AsyncIterator
4
+
5
+ from qtype.dsl.model import DecoderFormat
6
+ from qtype.interpreter.base.base_step_executor import StepExecutor
7
+ from qtype.interpreter.base.executor_context import ExecutorContext
8
+ from qtype.interpreter.types import FlowMessage
9
+ from qtype.semantic.model import Decoder
10
+
11
+
12
+ class DecoderExecutor(StepExecutor):
13
+ """Executor for Decoder steps."""
14
+
15
+ def __init__(
16
+ self, step: Decoder, context: ExecutorContext, **dependencies
17
+ ):
18
+ super().__init__(step, context, **dependencies)
19
+ if not isinstance(step, Decoder):
20
+ raise ValueError("DecoderExecutor can only execute Decoder steps.")
21
+ self.step: Decoder = step
22
+
23
+ def _parse_json(self, input_str: str) -> dict[str, Any]:
24
+ """Parse a JSON string into a Python object.
25
+
26
+ Args:
27
+ input_str: The JSON string to parse.
28
+
29
+ Returns:
30
+ A dictionary parsed from the JSON.
31
+
32
+ Raises:
33
+ ValueError: If the JSON is invalid or not an object.
34
+ """
35
+ try:
36
+ cleaned_response = input_str.strip()
37
+ # Remove markdown code fences if present
38
+ if cleaned_response.startswith("```json"):
39
+ cleaned_response = cleaned_response[7:]
40
+ if cleaned_response.endswith("```"):
41
+ cleaned_response = cleaned_response[:-3]
42
+ cleaned_response = cleaned_response.strip()
43
+
44
+ # Parse the JSON
45
+ parsed = json.loads(cleaned_response)
46
+ if not isinstance(parsed, dict):
47
+ raise ValueError(f"Parsed JSON is not an object: {parsed}")
48
+ return parsed
49
+ except json.JSONDecodeError as e:
50
+ raise ValueError(f"Invalid JSON input: {e}") from e
51
+
52
+ def _parse_xml(self, input_str: str) -> dict[str, Any]:
53
+ """Parse an XML string into a Python object.
54
+
55
+ Args:
56
+ input_str: The XML string to parse.
57
+
58
+ Returns:
59
+ A dictionary with tag names as keys and text content as values.
60
+
61
+ Raises:
62
+ ValueError: If the XML is invalid.
63
+ """
64
+ try:
65
+ cleaned_response = input_str.strip()
66
+ # Remove markdown code fences if present
67
+ if cleaned_response.startswith("```xml"):
68
+ cleaned_response = cleaned_response[6:]
69
+ if cleaned_response.endswith("```"):
70
+ cleaned_response = cleaned_response[:-3]
71
+ cleaned_response = cleaned_response.strip()
72
+
73
+ # Escape ampersands
74
+ cleaned_response = cleaned_response.replace("&", "&")
75
+ tree = ET.fromstring(cleaned_response)
76
+ result = {c.tag: c.text for c in tree}
77
+
78
+ return result
79
+ except Exception as e:
80
+ raise ValueError(f"Invalid XML input: {e}") from e
81
+
82
+ def _parse(self, input_str: str) -> dict[str, Any]:
83
+ """Parse input string based on the decoder format.
84
+
85
+ Args:
86
+ input_str: The string to parse.
87
+
88
+ Returns:
89
+ A dictionary parsed from the input.
90
+
91
+ Raises:
92
+ ValueError: If the format is unsupported or parsing fails.
93
+ """
94
+ if self.step.format == DecoderFormat.json:
95
+ return self._parse_json(input_str)
96
+ elif self.step.format == DecoderFormat.xml:
97
+ return self._parse_xml(input_str)
98
+ else:
99
+ raise ValueError(
100
+ (
101
+ f"Unsupported decoder format: {self.step.format}. "
102
+ f"Supported formats are: {DecoderFormat.json}, "
103
+ f"{DecoderFormat.xml}."
104
+ )
105
+ )
106
+
107
+ async def process_message(
108
+ self,
109
+ message: FlowMessage,
110
+ ) -> AsyncIterator[FlowMessage]:
111
+ """Process a single FlowMessage for the Decoder step.
112
+
113
+ Args:
114
+ message: The FlowMessage to process.
115
+
116
+ Yields:
117
+ A FlowMessage with decoded outputs or an error.
118
+ """
119
+ input_id = self.step.inputs[0].id
120
+
121
+ try:
122
+ # Get the input string to decode
123
+ input_value = message.variables.get(input_id)
124
+ if not isinstance(input_value, str):
125
+ raise ValueError(
126
+ (
127
+ f"Input to decoder step {self.step.id} must be "
128
+ f"a string, found {type(input_value).__name__}."
129
+ )
130
+ )
131
+
132
+ await self.stream_emitter.status(
133
+ f"Decoding {self.step.format.value} input"
134
+ )
135
+
136
+ # Parse the input
137
+ result_dict = self._parse(input_value)
138
+
139
+ # Extract output variables from the parsed result
140
+ output_vars = {}
141
+ for output in self.step.outputs:
142
+ if output.id in result_dict:
143
+ output_vars[output.id] = result_dict[output.id]
144
+ else:
145
+ raise ValueError(
146
+ (
147
+ f"Output variable {output.id} not found in "
148
+ f"decoded result: {result_dict}"
149
+ )
150
+ )
151
+
152
+ await self.stream_emitter.status(
153
+ f"Decoded {len(output_vars)} output variables"
154
+ )
155
+
156
+ # Yield the result
157
+ yield message.copy_with_variables(output_vars)
158
+
159
+ except Exception as e:
160
+ # Emit error event to stream so frontend can display it
161
+ await self.stream_emitter.error(str(e))
162
+ message.set_error(self.step.id, e)
163
+ yield message
@@ -0,0 +1,112 @@
1
+ from io import BytesIO
2
+ from typing import AsyncIterator
3
+
4
+ from docling.document_converter import DocumentConverter
5
+ from docling_core.types.io import DocumentStream
6
+
7
+ from qtype.base.types import PrimitiveTypeEnum
8
+ from qtype.dsl.domain_types import RAGDocument
9
+ from qtype.interpreter.base.base_step_executor import StepExecutor
10
+ from qtype.interpreter.base.executor_context import ExecutorContext
11
+ from qtype.interpreter.types import FlowMessage
12
+ from qtype.semantic.model import DocToTextConverter
13
+
14
+
15
+ class DocToTextConverterExecutor(StepExecutor):
16
+ """Executor for DocToTextConverter steps."""
17
+
18
+ def __init__(
19
+ self,
20
+ step: DocToTextConverter,
21
+ context: ExecutorContext,
22
+ **dependencies,
23
+ ):
24
+ super().__init__(step, context, **dependencies)
25
+ if not isinstance(step, DocToTextConverter):
26
+ raise ValueError(
27
+ (
28
+ "DocToTextConverterExecutor can only execute "
29
+ "DocToTextConverter steps."
30
+ )
31
+ )
32
+ self.step: DocToTextConverter = step
33
+ # Initialize the Docling converter once for the executor
34
+ self.docling_converter = DocumentConverter()
35
+
36
+ async def process_message(
37
+ self,
38
+ message: FlowMessage,
39
+ ) -> AsyncIterator[FlowMessage]:
40
+ """Process a single FlowMessage for the DocToTextConverter step.
41
+
42
+ Args:
43
+ message: The FlowMessage to process.
44
+ Yields:
45
+ FlowMessage with converted document.
46
+ """
47
+ input_id = self.step.inputs[0].id
48
+ output_id = self.step.outputs[0].id
49
+
50
+ try:
51
+ # Get the input document
52
+ if input_id not in message.variables:
53
+ raise ValueError(f"Input variable '{input_id}' is missing")
54
+ doc = message.variables.get(input_id)
55
+ if not isinstance(doc, RAGDocument):
56
+ raise ValueError(
57
+ f"Input variable '{input_id}' must be a RAGDocument"
58
+ )
59
+
60
+ await self.stream_emitter.status(
61
+ f"Converting document: {doc.file_name}",
62
+ )
63
+
64
+ # Convert the document
65
+ converted_doc = self._convert_doc(doc)
66
+
67
+ await self.stream_emitter.status(
68
+ f"Converted {doc.file_name} to markdown text",
69
+ )
70
+
71
+ # Yield the result
72
+ yield message.copy_with_variables({output_id: converted_doc})
73
+
74
+ except Exception as e:
75
+ # Emit error event to stream so frontend can display it
76
+ await self.stream_emitter.error(str(e))
77
+ message.set_error(self.step.id, e)
78
+ yield message
79
+
80
+ def _convert_doc(self, doc: RAGDocument) -> RAGDocument:
81
+ """Convert a RAGDocument to text/markdown format.
82
+
83
+ Args:
84
+ doc: The document to convert.
85
+
86
+ Returns:
87
+ A RAGDocument with markdown text content.
88
+ """
89
+ # If already text, no conversion needed
90
+ if doc.type == PrimitiveTypeEnum.text:
91
+ return doc
92
+
93
+ # Convert based on content type
94
+ if isinstance(doc.content, bytes):
95
+ # Use DocumentStream for bytes content
96
+ stream = DocumentStream(
97
+ name=doc.file_name, stream=BytesIO(doc.content)
98
+ )
99
+ document = self.docling_converter.convert(stream).document
100
+ else:
101
+ # Convert string content directly
102
+ document = self.docling_converter.convert(doc.content).document
103
+
104
+ # Export to markdown
105
+ markdown = document.export_to_markdown()
106
+
107
+ # Return new RAGDocument with markdown content
108
+ return RAGDocument(
109
+ **doc.model_dump(exclude={"content", "type"}),
110
+ content=markdown,
111
+ type=PrimitiveTypeEnum.text,
112
+ )
@@ -0,0 +1,107 @@
1
+ from typing import AsyncIterator
2
+
3
+ from botocore.exceptions import ClientError
4
+ from llama_index.core.base.embeddings.base import BaseEmbedding
5
+ from tenacity import (
6
+ retry,
7
+ retry_if_exception,
8
+ stop_after_attempt,
9
+ wait_exponential,
10
+ )
11
+
12
+ from qtype.dsl.domain_types import RAGChunk
13
+ from qtype.interpreter.base.base_step_executor import StepExecutor
14
+ from qtype.interpreter.base.executor_context import ExecutorContext
15
+ from qtype.interpreter.conversions import to_embedding_model
16
+ from qtype.interpreter.types import FlowMessage
17
+ from qtype.semantic.model import DocumentEmbedder
18
+
19
+
20
+ def is_throttling_error(e):
21
+ return (
22
+ isinstance(e, ClientError)
23
+ and e.response["Error"]["Code"] == "ThrottlingException"
24
+ )
25
+
26
+
27
+ class DocumentEmbedderExecutor(StepExecutor):
28
+ """Executor for DocumentEmbedder steps."""
29
+
30
+ def __init__(
31
+ self, step: DocumentEmbedder, context: ExecutorContext, **dependencies
32
+ ):
33
+ super().__init__(step, context, **dependencies)
34
+ if not isinstance(step, DocumentEmbedder):
35
+ raise ValueError(
36
+ (
37
+ "DocumentEmbedderExecutor can only execute "
38
+ "DocumentEmbedder steps."
39
+ )
40
+ )
41
+ self.step: DocumentEmbedder = step
42
+ # Initialize the embedding model once for the executor
43
+ self.embedding_model: BaseEmbedding = to_embedding_model(
44
+ self.step.model
45
+ )
46
+
47
+ # TODO: properly abstract this into a mixin
48
+ @retry(
49
+ retry=retry_if_exception(is_throttling_error),
50
+ wait=wait_exponential(multiplier=0.5, min=1, max=30),
51
+ stop=stop_after_attempt(10),
52
+ )
53
+ async def _embed(self, text: str) -> list[float]:
54
+ """Generate embedding for the given text using the embedding model.
55
+
56
+ Args:
57
+ text: The text to embed.
58
+ Returns:
59
+ The embedding vector as a list of floats.
60
+ """
61
+ return await self.embedding_model.aget_text_embedding(text=text)
62
+
63
+ async def process_message(
64
+ self,
65
+ message: FlowMessage,
66
+ ) -> AsyncIterator[FlowMessage]:
67
+ """Process a single FlowMessage for the DocumentEmbedder step.
68
+
69
+ Args:
70
+ message: The FlowMessage to process.
71
+ Yields:
72
+ FlowMessage with embedded chunk.
73
+ """
74
+ input_id = self.step.inputs[0].id
75
+ output_id = self.step.outputs[0].id
76
+
77
+ try:
78
+ # Get the input chunk
79
+ chunk = message.variables.get(input_id)
80
+ if not isinstance(chunk, RAGChunk):
81
+ raise ValueError(
82
+ (
83
+ f"Input variable '{input_id}' must be a RAGChunk, "
84
+ f"got {type(chunk)}"
85
+ )
86
+ )
87
+
88
+ # Generate embedding for the chunk content
89
+ vector = await self._embed(str(chunk.content))
90
+
91
+ # Create the output chunk with the vector
92
+ embedded_chunk = RAGChunk(
93
+ vector=vector,
94
+ content=chunk.content,
95
+ chunk_id=chunk.chunk_id,
96
+ document_id=chunk.document_id,
97
+ metadata=chunk.metadata,
98
+ )
99
+
100
+ # Yield the result
101
+ yield message.copy_with_variables({output_id: embedded_chunk})
102
+
103
+ except Exception as e:
104
+ # Emit error event to stream so frontend can display it
105
+ await self.stream_emitter.error(str(e))
106
+ message.set_error(self.step.id, e)
107
+ yield message
@@ -0,0 +1,122 @@
1
+ from typing import AsyncIterator
2
+
3
+ from qtype.dsl.domain_types import RAGChunk, RAGSearchResult
4
+ from qtype.interpreter.base.base_step_executor import StepExecutor
5
+ from qtype.interpreter.base.executor_context import ExecutorContext
6
+ from qtype.interpreter.conversions import to_opensearch_client
7
+ from qtype.interpreter.types import FlowMessage
8
+ from qtype.semantic.model import DocumentSearch
9
+
10
+
11
+ class DocumentSearchExecutor(StepExecutor):
12
+ """Executor for DocumentSearch steps using OpenSearch/Elasticsearch."""
13
+
14
+ def __init__(
15
+ self, step: DocumentSearch, context: ExecutorContext, **dependencies
16
+ ):
17
+ super().__init__(step, context, **dependencies)
18
+ if not isinstance(step, DocumentSearch):
19
+ raise ValueError(
20
+ (
21
+ "DocumentSearchExecutor can only execute "
22
+ "DocumentSearch steps."
23
+ )
24
+ )
25
+ self.step: DocumentSearch = step
26
+ # Initialize the OpenSearch client once for the executor
27
+ self.client = to_opensearch_client(
28
+ self.step.index, self._secret_manager
29
+ )
30
+ self.index_name = self.step.index.name
31
+
32
+ async def process_message(
33
+ self,
34
+ message: FlowMessage,
35
+ ) -> AsyncIterator[FlowMessage]:
36
+ """Process a single FlowMessage for the DocumentSearch step.
37
+
38
+ Args:
39
+ message: The FlowMessage to process.
40
+
41
+ Yields:
42
+ FlowMessage with search results as RAGSearchResult instances.
43
+ """
44
+ input_id = self.step.inputs[0].id
45
+ output_id = self.step.outputs[0].id
46
+
47
+ try:
48
+ # Get the search query text
49
+ query_text = message.variables.get(input_id)
50
+ if not isinstance(query_text, str):
51
+ raise ValueError(
52
+ (
53
+ f"Input variable '{input_id}' must be a string "
54
+ f"(text query), got {type(query_text)}"
55
+ )
56
+ )
57
+
58
+ # Build the search query
59
+ search_body = {
60
+ "query": {
61
+ "multi_match": {
62
+ "query": query_text,
63
+ "fields": ["content^2", "title", "*"],
64
+ "type": "best_fields",
65
+ }
66
+ },
67
+ "size": 10, # Default top 10 results
68
+ }
69
+
70
+ # Apply any filters if specified
71
+ if self.step.filters:
72
+ if "query" in search_body:
73
+ search_body["query"] = {
74
+ "bool": {
75
+ "must": [search_body["query"]],
76
+ "filter": [
77
+ {"term": {k: v}}
78
+ for k, v in self.step.filters.items()
79
+ ],
80
+ }
81
+ }
82
+
83
+ # Execute the search
84
+ response = self.client.search(
85
+ index=self.index_name, body=search_body
86
+ )
87
+
88
+ # Process each hit and yield as RAGSearchResult
89
+ for hit in response["hits"]["hits"]:
90
+ source = hit["_source"]
91
+ doc_id = hit["_id"]
92
+ score = hit["_score"]
93
+
94
+ # Extract content (adjust field name based on your schema)
95
+ content = source.get("content", "")
96
+
97
+ # Build metadata from the source, excluding content field
98
+ metadata = {
99
+ k: v for k, v in source.items() if k not in ["content"]
100
+ }
101
+
102
+ # Create a RAGChunk from the search result
103
+ # Use the document ID as both chunk_id and document_id
104
+ chunk = RAGChunk(
105
+ content=content,
106
+ chunk_id=doc_id,
107
+ document_id=source.get("document_id", doc_id),
108
+ vector=None, # Document search doesn't return embeddings
109
+ metadata=metadata,
110
+ )
111
+
112
+ # Wrap in RAGSearchResult with the score
113
+ search_result = RAGSearchResult(chunk=chunk, score=score)
114
+
115
+ # Yield result for each document
116
+ yield message.copy_with_variables({output_id: search_result})
117
+
118
+ except Exception as e:
119
+ # Emit error event to stream so frontend can display it
120
+ await self.stream_emitter.error(str(e))
121
+ message.set_error(self.step.id, e)
122
+ yield message
@@ -0,0 +1,118 @@
1
+ import importlib
2
+ from typing import AsyncIterator
3
+
4
+ from qtype.interpreter.base.base_step_executor import StepExecutor
5
+ from qtype.interpreter.base.executor_context import ExecutorContext
6
+ from qtype.interpreter.conversions import from_llama_document
7
+ from qtype.interpreter.types import FlowMessage
8
+ from qtype.semantic.model import DocumentSource
9
+
10
+
11
+ class DocumentSourceExecutor(StepExecutor):
12
+ """Executor for DocumentSource steps."""
13
+
14
+ def __init__(
15
+ self, step: DocumentSource, context: ExecutorContext, **dependencies
16
+ ):
17
+ super().__init__(step, context, **dependencies)
18
+ if not isinstance(step, DocumentSource):
19
+ raise ValueError(
20
+ (
21
+ "DocumentSourceExecutor can only execute "
22
+ "DocumentSource steps."
23
+ )
24
+ )
25
+ self.step: DocumentSource = step
26
+ # Initialize the reader class once for the executor
27
+ self.reader_class = self._load_reader_class()
28
+
29
+ def _load_reader_class(self) -> type:
30
+ """Load the LlamaIndex reader class dynamically.
31
+
32
+ Returns:
33
+ The reader class.
34
+
35
+ Raises:
36
+ ImportError: If the reader class cannot be imported.
37
+ """
38
+ # Parse the reader module path
39
+ # Format: 'file.SimpleDirectoryReader' -> llama_index.readers.file + SimpleDirectoryReader
40
+ # Special case: 'file.SimpleDirectoryReader' is actually in llama_index.core
41
+ parts = self.step.reader_module.split(".")
42
+ module_path = ".".join(parts[:-1])
43
+ class_name = parts[-1]
44
+
45
+ # Dynamically import the reader module and get the class
46
+ try:
47
+ module = importlib.import_module(module_path)
48
+ reader_class = getattr(module, class_name)
49
+ return reader_class
50
+ except (ImportError, AttributeError) as e:
51
+ raise ImportError(
52
+ (
53
+ f"Failed to import reader class '{class_name}' "
54
+ f"from '{module_path}': {e}"
55
+ )
56
+ ) from e
57
+
58
+ async def process_message(
59
+ self,
60
+ message: FlowMessage,
61
+ ) -> AsyncIterator[FlowMessage]:
62
+ """Process a single FlowMessage for the DocumentSource step.
63
+
64
+ Args:
65
+ message: The FlowMessage to process.
66
+ Yields:
67
+ FlowMessages with loaded documents.
68
+ """
69
+ output_id = self.step.outputs[0].id
70
+
71
+ try:
72
+ # Resolve any SecretReferences in step args
73
+ context = f"step '{self.step.id}'"
74
+ resolved_args = self._secret_manager.resolve_secrets_in_dict(
75
+ self.step.args, context
76
+ )
77
+
78
+ # Combine resolved step args with message variables as runtime args
79
+ runtime_args = {
80
+ key: message.variables.get(key)
81
+ for key in message.variables.keys()
82
+ }
83
+ combined_args = {**resolved_args, **runtime_args}
84
+
85
+ # Instantiate the reader with combined arguments
86
+ loader = self.reader_class(**combined_args)
87
+
88
+ # Load documents using the loader
89
+ if not hasattr(loader, "load_data"):
90
+ raise AttributeError(
91
+ (
92
+ f"Reader class '{self.reader_class.__name__}' "
93
+ "does not have a 'load_data' method"
94
+ )
95
+ )
96
+ load_args = self.step.loader_args or {}
97
+
98
+ llama_documents = loader.load_data(**load_args)
99
+
100
+ # Convert LlamaIndex Documents to RAGDocuments
101
+ rag_documents = [
102
+ from_llama_document(doc) for doc in llama_documents
103
+ ]
104
+
105
+ # Emit feedback about total documents loaded
106
+ await self.stream_emitter.status(
107
+ f"Loaded {len(rag_documents)} documents"
108
+ )
109
+
110
+ # Yield one message per document (fan-out)
111
+ for doc in rag_documents:
112
+ yield message.copy_with_variables({output_id: doc})
113
+
114
+ except Exception as e:
115
+ # Emit error event to stream so frontend can display it
116
+ await self.stream_emitter.error(str(e))
117
+ message.set_error(self.step.id, e)
118
+ yield message