qtype 0.1.13__py3-none-any.whl → 0.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qtype/base/__init__.py +8 -2
- qtype/base/logging.py +0 -17
- qtype/base/resources.py +193 -0
- qtype/cli.py +5 -9
- qtype/commands/generate.py +6 -1
- qtype/commands/run.py +37 -10
- qtype/docs/Gallery/dataflow_pipelines.md +15 -2
- qtype/docs/Gallery/recipe_chatbot.md +103 -0
- qtype/docs/Gallery/recipe_chatbot.mermaid +62 -0
- qtype/docs/Gallery/recipe_chatbot.png +0 -0
- qtype/docs/Gallery/research_assistant.md +1 -1
- qtype/docs/How To/Command Line Usage/pass_inputs_on_the_cli.md +4 -1
- qtype/docs/How To/Data Processing/load_documents.md +74 -0
- qtype/docs/How To/Data Processing/read_sql_databases.md +2 -0
- qtype/docs/Reference/cli.md +3 -2
- qtype/docs/Reference/plugins.md +0 -4
- qtype/docs/Reference/semantic-validation-rules.md +1 -6
- qtype/docs/Tutorials/01-first-qtype-application.md +1 -1
- qtype/docs/Tutorials/03-structured-data.md +1 -1
- qtype/docs/Tutorials/04-tools-and-function-calling.md +1 -1
- qtype/examples/conversational_ai/simple_chatbot_with_auth.qtype.yaml +48 -0
- qtype/examples/data_processing/load_documents.qtype.yaml +31 -0
- qtype/examples/invoke_models/invoke_embedding_aws.qtype.yaml +45 -0
- qtype/examples/rag/recipe_chatbot.qtype.yaml +216 -0
- qtype/interpreter/auth/aws.py +94 -17
- qtype/interpreter/auth/generic.py +11 -12
- qtype/interpreter/base/secrets.py +4 -2
- qtype/interpreter/conversions.py +15 -14
- qtype/interpreter/converters.py +1 -1
- qtype/interpreter/executors/bedrock_reranker_executor.py +17 -28
- qtype/interpreter/executors/document_embedder_executor.py +1 -12
- qtype/interpreter/executors/invoke_embedding_executor.py +23 -33
- qtype/interpreter/executors/llm_inference_executor.py +2 -0
- qtype/interpreter/executors/sql_source_executor.py +6 -2
- qtype/interpreter/flow.py +11 -1
- qtype/mcp/server.py +11 -158
- qtype/semantic/visualize.py +10 -3
- {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/METADATA +2 -2
- {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/RECORD +42 -33
- {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/WHEEL +0 -0
- {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/entry_points.txt +0 -0
- {qtype-0.1.13.dist-info → qtype-0.1.14.dist-info}/licenses/LICENSE +0 -0
qtype/interpreter/conversions.py
CHANGED
|
@@ -194,16 +194,15 @@ def to_llm(
|
|
|
194
194
|
|
|
195
195
|
from qtype.semantic.model import AWSAuthProvider
|
|
196
196
|
|
|
197
|
+
creds_kwargs = {}
|
|
197
198
|
if model.auth:
|
|
198
199
|
# Type hint for mypy - we know it's AWSAuthProvider for aws-bedrock
|
|
199
200
|
assert isinstance(model.auth, AWSAuthProvider)
|
|
200
|
-
with aws(model.auth, secret_manager) as
|
|
201
|
-
|
|
202
|
-
else:
|
|
203
|
-
session = None
|
|
201
|
+
with aws(model.auth, secret_manager) as creds:
|
|
202
|
+
creds_kwargs = creds.as_kwargs()
|
|
204
203
|
|
|
205
204
|
brv: BaseLLM = BedrockConverse(
|
|
206
|
-
|
|
205
|
+
**creds_kwargs,
|
|
207
206
|
model=model.model_id if model.model_id else model.id,
|
|
208
207
|
system_prompt=system_prompt,
|
|
209
208
|
**(model.inference_params if model.inference_params else {}),
|
|
@@ -314,14 +313,14 @@ def to_embedding_model(
|
|
|
314
313
|
BedrockEmbedding,
|
|
315
314
|
)
|
|
316
315
|
|
|
317
|
-
|
|
316
|
+
creds_kwargs = {}
|
|
318
317
|
if model.auth is not None:
|
|
319
318
|
assert isinstance(model.auth, AWSAuthProvider)
|
|
320
|
-
with aws(model.auth, secret_manager) as
|
|
321
|
-
|
|
319
|
+
with aws(model.auth, secret_manager) as creds:
|
|
320
|
+
creds_kwargs = creds.as_kwargs()
|
|
322
321
|
|
|
323
322
|
bedrock_embedding: BaseEmbedding = BedrockEmbedding(
|
|
324
|
-
|
|
323
|
+
**creds_kwargs,
|
|
325
324
|
model_name=model.model_id if model.model_id else model.id,
|
|
326
325
|
max_retries=100,
|
|
327
326
|
)
|
|
@@ -382,10 +381,12 @@ def to_opensearch_client(
|
|
|
382
381
|
elif hasattr(index.auth, "type") and index.auth.type == "aws":
|
|
383
382
|
# Use AWS authentication with boto3 session
|
|
384
383
|
# Get AWS credentials from auth provider using context manager
|
|
385
|
-
with auth(index.auth, secret_manager) as
|
|
386
|
-
#
|
|
387
|
-
|
|
388
|
-
|
|
384
|
+
with auth(index.auth, secret_manager) as creds:
|
|
385
|
+
# Create a boto3 session from credentials to get boto3.Credentials
|
|
386
|
+
import boto3
|
|
387
|
+
|
|
388
|
+
session = boto3.Session(**creds.as_kwargs())
|
|
389
|
+
credentials = session.get_credentials()
|
|
389
390
|
if credentials is None:
|
|
390
391
|
raise InterpreterError(
|
|
391
392
|
f"Failed to obtain AWS credentials for DocumentIndex '{index.id}'"
|
|
@@ -394,7 +395,7 @@ def to_opensearch_client(
|
|
|
394
395
|
# Use opensearch-py's async AWS auth
|
|
395
396
|
aws_auth = AWSV4SignerAsyncAuth(
|
|
396
397
|
credentials,
|
|
397
|
-
|
|
398
|
+
creds.region_name or "us-east-1",
|
|
398
399
|
"aoss", # service name for OpenSearch Serverless
|
|
399
400
|
)
|
|
400
401
|
|
qtype/interpreter/converters.py
CHANGED
|
@@ -126,7 +126,7 @@ def read_dataframe_from_file(
|
|
|
126
126
|
with fsspec.open(file_path, "rb") as file_handle:
|
|
127
127
|
# Read based on MIME type
|
|
128
128
|
if mime_type == "text/csv" or mime_type == "text/plain":
|
|
129
|
-
df = pd.read_csv(file_handle) # type: ignore[arg-type]
|
|
129
|
+
df = pd.read_csv(file_handle, keep_default_na=False) # type: ignore[arg-type]
|
|
130
130
|
elif mime_type in ["application/json", "application/jsonlines"]:
|
|
131
131
|
# Check if it's JSONL by extension
|
|
132
132
|
if Path(file_path).suffix.lower() == ".jsonl":
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import asyncio
|
|
6
5
|
import logging
|
|
7
6
|
from typing import AsyncIterator
|
|
8
7
|
|
|
@@ -56,10 +55,10 @@ class BedrockRerankerExecutor(StepExecutor):
|
|
|
56
55
|
)
|
|
57
56
|
return
|
|
58
57
|
|
|
59
|
-
# Get
|
|
58
|
+
# Get region from auth or default session
|
|
60
59
|
if self.step.auth is not None:
|
|
61
|
-
with aws(self.step.auth, self.context.secret_manager) as
|
|
62
|
-
region_name =
|
|
60
|
+
with aws(self.step.auth, self.context.secret_manager) as creds:
|
|
61
|
+
region_name = creds.region_name
|
|
63
62
|
else:
|
|
64
63
|
import boto3
|
|
65
64
|
|
|
@@ -120,31 +119,21 @@ class BedrockRerankerExecutor(StepExecutor):
|
|
|
120
119
|
},
|
|
121
120
|
}
|
|
122
121
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
if self.step.auth is not None:
|
|
126
|
-
with aws(self.step.auth, self.context.secret_manager) as s:
|
|
127
|
-
client = s.client("bedrock-agent-runtime")
|
|
128
|
-
return client.rerank(
|
|
129
|
-
queries=queries,
|
|
130
|
-
sources=documents,
|
|
131
|
-
rerankingConfiguration=reranking_configuration,
|
|
132
|
-
)
|
|
133
|
-
else:
|
|
134
|
-
import boto3
|
|
135
|
-
|
|
136
|
-
session = boto3.Session()
|
|
137
|
-
client = session.client("bedrock-agent-runtime")
|
|
138
|
-
return client.rerank(
|
|
139
|
-
queries=queries,
|
|
140
|
-
sources=documents,
|
|
141
|
-
rerankingConfiguration=reranking_configuration,
|
|
142
|
-
)
|
|
122
|
+
# Create async bedrock client and call rerank
|
|
123
|
+
import aioboto3
|
|
143
124
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
self.
|
|
147
|
-
|
|
125
|
+
creds_kwargs = {}
|
|
126
|
+
if self.step.auth is not None:
|
|
127
|
+
with aws(self.step.auth, self.context.secret_manager) as creds:
|
|
128
|
+
creds_kwargs = creds.as_kwargs()
|
|
129
|
+
|
|
130
|
+
session = aioboto3.Session(**creds_kwargs)
|
|
131
|
+
async with session.client("bedrock-agent-runtime") as client:
|
|
132
|
+
response = await client.rerank(
|
|
133
|
+
queries=queries,
|
|
134
|
+
sources=documents,
|
|
135
|
+
rerankingConfiguration=reranking_configuration,
|
|
136
|
+
)
|
|
148
137
|
|
|
149
138
|
results = []
|
|
150
139
|
for d in response["results"]:
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import asyncio
|
|
2
1
|
import logging
|
|
3
2
|
from typing import AsyncIterator
|
|
4
3
|
|
|
@@ -60,17 +59,7 @@ class DocumentEmbedderExecutor(StepExecutor):
|
|
|
60
59
|
Returns:
|
|
61
60
|
The embedding vector as a list of floats.
|
|
62
61
|
"""
|
|
63
|
-
|
|
64
|
-
# TODO: switch back to async once aws auth supports it.
|
|
65
|
-
# https://github.com/bazaarvoice/qtype/issues/108
|
|
66
|
-
def _call():
|
|
67
|
-
return self.embedding_model.get_text_embedding(text=text)
|
|
68
|
-
|
|
69
|
-
loop = asyncio.get_running_loop()
|
|
70
|
-
response = await loop.run_in_executor(self.context.thread_pool, _call)
|
|
71
|
-
|
|
72
|
-
return response
|
|
73
|
-
# return await self.embedding_model.aget_text_embedding(text=text)
|
|
62
|
+
return await self.embedding_model.aget_text_embedding(text=text)
|
|
74
63
|
|
|
75
64
|
async def process_message(
|
|
76
65
|
self,
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import asyncio
|
|
2
1
|
from typing import AsyncIterator
|
|
3
2
|
|
|
4
3
|
from openinference.semconv.trace import OpenInferenceSpanKindValues
|
|
@@ -54,41 +53,32 @@ class InvokeEmbeddingExecutor(StepExecutor):
|
|
|
54
53
|
# Get the input value
|
|
55
54
|
input_value = message.get_variable(input_id)
|
|
56
55
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
if
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
f"{input_type}. Must be 'text' or 'image'."
|
|
77
|
-
)
|
|
56
|
+
# Generate embedding based on input type
|
|
57
|
+
if input_type == PrimitiveTypeEnum.text:
|
|
58
|
+
if not isinstance(input_value, str):
|
|
59
|
+
input_value = str(input_value)
|
|
60
|
+
vector = await self.embedding_model.aget_text_embedding(
|
|
61
|
+
text=input_value
|
|
62
|
+
)
|
|
63
|
+
content = input_value
|
|
64
|
+
elif input_type == PrimitiveTypeEnum.image:
|
|
65
|
+
# For image embeddings
|
|
66
|
+
vector = await self.embedding_model.aget_image_embedding(
|
|
67
|
+
image_path=input_value
|
|
68
|
+
)
|
|
69
|
+
content = input_value
|
|
70
|
+
else:
|
|
71
|
+
raise ValueError(
|
|
72
|
+
(
|
|
73
|
+
f"Unsupported input type for embedding: "
|
|
74
|
+
f"{input_type}. Must be 'text' or 'image'."
|
|
78
75
|
)
|
|
79
|
-
|
|
80
|
-
# Create the Embedding object
|
|
81
|
-
embedding = Embedding(
|
|
82
|
-
vector=vector,
|
|
83
|
-
content=content,
|
|
84
76
|
)
|
|
85
|
-
return embedding
|
|
86
77
|
|
|
87
|
-
#
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
self.context.thread_pool, _call
|
|
78
|
+
# Create the Embedding object
|
|
79
|
+
embedding = Embedding(
|
|
80
|
+
vector=vector,
|
|
81
|
+
content=content,
|
|
92
82
|
)
|
|
93
83
|
|
|
94
84
|
# Yield the result
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from typing import AsyncIterator
|
|
2
3
|
|
|
3
4
|
from llama_cloud import MessageRole as LlamaMessageRole
|
|
@@ -85,6 +86,7 @@ class LLMInferenceExecutor(StepExecutor):
|
|
|
85
86
|
|
|
86
87
|
except Exception as e:
|
|
87
88
|
# Emit error event to stream so frontend can display it
|
|
89
|
+
logging.error(f"LLM Inference search failed: {e}", exc_info=True)
|
|
88
90
|
await self.stream_emitter.error(str(e))
|
|
89
91
|
yield message.copy_with_error(self.step.id, e)
|
|
90
92
|
|
|
@@ -42,8 +42,12 @@ class SQLSourceExecutor(StepExecutor):
|
|
|
42
42
|
connect_args = {}
|
|
43
43
|
if self.step.auth:
|
|
44
44
|
with auth(self.step.auth, self._secret_manager) as creds:
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
# For AWS auth, create a boto3 session from credentials
|
|
46
|
+
from qtype.interpreter.auth.aws import AWSCredentials
|
|
47
|
+
|
|
48
|
+
if isinstance(creds, AWSCredentials):
|
|
49
|
+
session = boto3.Session(**creds.as_kwargs())
|
|
50
|
+
connect_args["session"] = session
|
|
47
51
|
engine = create_engine(connection_string, connect_args=connect_args)
|
|
48
52
|
|
|
49
53
|
output_columns = {output.id for output in self.step.outputs}
|
qtype/interpreter/flow.py
CHANGED
|
@@ -17,7 +17,7 @@ from qtype.interpreter.base import factory
|
|
|
17
17
|
from qtype.interpreter.base.executor_context import ExecutorContext
|
|
18
18
|
from qtype.interpreter.logging_progress import LoggingProgressCallback
|
|
19
19
|
from qtype.interpreter.rich_progress import RichProgressCallback
|
|
20
|
-
from qtype.interpreter.types import FlowMessage, ProgressCallback
|
|
20
|
+
from qtype.interpreter.types import FlowMessage, ProgressCallback, Session
|
|
21
21
|
from qtype.semantic.model import Flow
|
|
22
22
|
|
|
23
23
|
logger = logging.getLogger(__name__)
|
|
@@ -106,6 +106,16 @@ async def run_flow(
|
|
|
106
106
|
initial = [initial]
|
|
107
107
|
|
|
108
108
|
if isinstance(initial, list):
|
|
109
|
+
# Handle empty list by creating a single empty message
|
|
110
|
+
if len(initial) == 0:
|
|
111
|
+
session_id = kwargs.get("session_id", "default")
|
|
112
|
+
initial = [
|
|
113
|
+
FlowMessage(
|
|
114
|
+
session=Session(session_id=session_id),
|
|
115
|
+
variables={},
|
|
116
|
+
)
|
|
117
|
+
]
|
|
118
|
+
|
|
109
119
|
span.set_attribute("flow.input_count", len(initial))
|
|
110
120
|
|
|
111
121
|
# convert to async iterator
|
qtype/mcp/server.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
-
import re
|
|
5
4
|
import tempfile
|
|
6
5
|
from functools import lru_cache
|
|
7
6
|
from importlib.resources import files
|
|
@@ -12,133 +11,15 @@ import tantivy
|
|
|
12
11
|
from mcp.server.fastmcp import FastMCP
|
|
13
12
|
from pydantic import BaseModel
|
|
14
13
|
|
|
14
|
+
from qtype.base.resources import get_docs_resource, get_examples_resource
|
|
15
15
|
from qtype.commands.convert import convert_to_yaml
|
|
16
16
|
|
|
17
17
|
# Initialize FastMCP server
|
|
18
18
|
mcp = FastMCP("qtype", host="0.0.0.0")
|
|
19
19
|
|
|
20
|
-
#
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
# ============================================================================
|
|
25
|
-
# Resource Abstraction Layer
|
|
26
|
-
# ============================================================================
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class ResourceDirectory:
|
|
30
|
-
"""Abstraction for accessing resource directories (docs, examples, etc.)."""
|
|
31
|
-
|
|
32
|
-
def __init__(
|
|
33
|
-
self, name: str, file_extension: str, resolve_snippets: bool = False
|
|
34
|
-
):
|
|
35
|
-
"""Initialize a resource directory.
|
|
36
|
-
|
|
37
|
-
Args:
|
|
38
|
-
name: Directory name (e.g., "docs", "examples")
|
|
39
|
-
file_extension: File extension to search for (e.g., ".md", ".yaml")
|
|
40
|
-
resolve_snippets: Whether to resolve MkDocs snippets in file content
|
|
41
|
-
"""
|
|
42
|
-
self.name = name
|
|
43
|
-
self.file_extension = file_extension
|
|
44
|
-
self.resolve_snippets = resolve_snippets
|
|
45
|
-
self._path_cache: Path | None = None
|
|
46
|
-
|
|
47
|
-
def get_path(self) -> Path:
|
|
48
|
-
"""Get the path to this resource directory.
|
|
49
|
-
|
|
50
|
-
Returns:
|
|
51
|
-
Path to the resource directory, trying installed package first,
|
|
52
|
-
then falling back to development path.
|
|
53
|
-
"""
|
|
54
|
-
if self._path_cache is not None:
|
|
55
|
-
return self._path_cache
|
|
56
|
-
|
|
57
|
-
try:
|
|
58
|
-
# Try to get from installed package
|
|
59
|
-
resource_root = files("qtype") / self.name
|
|
60
|
-
# Check if it exists by trying to iterate
|
|
61
|
-
list(resource_root.iterdir())
|
|
62
|
-
self._path_cache = Path(str(resource_root))
|
|
63
|
-
except (FileNotFoundError, AttributeError, TypeError):
|
|
64
|
-
# Fall back to development path
|
|
65
|
-
self._path_cache = Path(__file__).parent.parent.parent / self.name
|
|
66
|
-
|
|
67
|
-
return self._path_cache
|
|
68
|
-
|
|
69
|
-
def get_file(self, file_path: str) -> str:
|
|
70
|
-
"""Get the content of a specific file.
|
|
71
|
-
|
|
72
|
-
Args:
|
|
73
|
-
file_path: Relative path to the file from the resource root.
|
|
74
|
-
|
|
75
|
-
Returns:
|
|
76
|
-
The full content of the file.
|
|
77
|
-
|
|
78
|
-
Raises:
|
|
79
|
-
FileNotFoundError: If the specified file doesn't exist.
|
|
80
|
-
ValueError: If the path tries to access files outside the directory.
|
|
81
|
-
"""
|
|
82
|
-
resource_path = self.get_path()
|
|
83
|
-
|
|
84
|
-
# Resolve the requested file path
|
|
85
|
-
requested_file = (resource_path / file_path).resolve()
|
|
86
|
-
|
|
87
|
-
# Security check: ensure the resolved path is within resource directory
|
|
88
|
-
try:
|
|
89
|
-
requested_file.relative_to(resource_path.resolve())
|
|
90
|
-
except ValueError:
|
|
91
|
-
raise ValueError(
|
|
92
|
-
f"Invalid path: '{file_path}' is outside {self.name} directory"
|
|
93
|
-
)
|
|
94
|
-
|
|
95
|
-
if not requested_file.exists():
|
|
96
|
-
raise FileNotFoundError(
|
|
97
|
-
f"{self.name.capitalize()} file not found: '{file_path}'. "
|
|
98
|
-
f"Use list_{self.name} to see available files."
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
if not requested_file.is_file():
|
|
102
|
-
raise ValueError(f"Path is not a file: '{file_path}'")
|
|
103
|
-
|
|
104
|
-
content = requested_file.read_text(encoding="utf-8")
|
|
105
|
-
|
|
106
|
-
# Apply snippet resolution if enabled
|
|
107
|
-
if self.resolve_snippets:
|
|
108
|
-
content = _resolve_snippets(content, requested_file)
|
|
109
|
-
|
|
110
|
-
return content
|
|
111
|
-
|
|
112
|
-
def list_files(self) -> list[str]:
|
|
113
|
-
"""List all files in this resource directory.
|
|
114
|
-
|
|
115
|
-
Returns:
|
|
116
|
-
Sorted list of relative paths to all files with the configured extension.
|
|
117
|
-
|
|
118
|
-
Raises:
|
|
119
|
-
FileNotFoundError: If the resource directory doesn't exist.
|
|
120
|
-
"""
|
|
121
|
-
resource_path = self.get_path()
|
|
122
|
-
|
|
123
|
-
if not resource_path.exists():
|
|
124
|
-
raise FileNotFoundError(
|
|
125
|
-
f"{self.name.capitalize()} directory not found: {resource_path}"
|
|
126
|
-
)
|
|
127
|
-
|
|
128
|
-
# Find all files with the configured extension
|
|
129
|
-
pattern = f"*{self.file_extension}"
|
|
130
|
-
files_list = []
|
|
131
|
-
for file in resource_path.rglob(pattern):
|
|
132
|
-
# Get relative path from resource root
|
|
133
|
-
rel_path = file.relative_to(resource_path)
|
|
134
|
-
files_list.append(str(rel_path))
|
|
135
|
-
|
|
136
|
-
return sorted(files_list)
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
# Initialize resource directories
|
|
140
|
-
_docs_resource = ResourceDirectory("docs", ".md", resolve_snippets=True)
|
|
141
|
-
_examples_resource = ResourceDirectory("examples", ".yaml")
|
|
20
|
+
# Get resource directories from base layer
|
|
21
|
+
_docs_resource = get_docs_resource()
|
|
22
|
+
_examples_resource = get_examples_resource()
|
|
142
23
|
|
|
143
24
|
|
|
144
25
|
# ============================================================================
|
|
@@ -171,40 +52,6 @@ def _load_schema() -> dict[str, Any]:
|
|
|
171
52
|
return json.load(f)
|
|
172
53
|
|
|
173
54
|
|
|
174
|
-
def _resolve_snippets(content: str, base_path: Path) -> str:
|
|
175
|
-
"""
|
|
176
|
-
Recursively finds and replaces MkDocs snippets in markdown content.
|
|
177
|
-
Mimics the behavior of pymdownx.snippets.
|
|
178
|
-
|
|
179
|
-
Args:
|
|
180
|
-
content: The markdown content to process
|
|
181
|
-
base_path: Path to the file being processed (used to resolve relative paths)
|
|
182
|
-
"""
|
|
183
|
-
docs_root = _docs_resource.get_path()
|
|
184
|
-
project_root = docs_root.parent
|
|
185
|
-
|
|
186
|
-
def replace_match(match):
|
|
187
|
-
snippet_path = match.group(1)
|
|
188
|
-
|
|
189
|
-
# pymdownx logic: try relative to current file, then relative to docs, then project root
|
|
190
|
-
candidates = [
|
|
191
|
-
base_path.parent / snippet_path, # Relative to the doc file
|
|
192
|
-
docs_root / snippet_path, # Relative to docs root
|
|
193
|
-
project_root / snippet_path, # Relative to project root
|
|
194
|
-
]
|
|
195
|
-
|
|
196
|
-
for candidate in candidates:
|
|
197
|
-
if candidate.exists() and candidate.is_file():
|
|
198
|
-
# Recursively resolve snippets inside the included file
|
|
199
|
-
return _resolve_snippets(
|
|
200
|
-
candidate.read_text(encoding="utf-8"), candidate
|
|
201
|
-
)
|
|
202
|
-
|
|
203
|
-
return f"> [!WARNING] Could not resolve snippet: {snippet_path}"
|
|
204
|
-
|
|
205
|
-
return SNIPPET_REGEX.sub(replace_match, content)
|
|
206
|
-
|
|
207
|
-
|
|
208
55
|
@lru_cache(maxsize=1)
|
|
209
56
|
def _build_search_index() -> tantivy.Index:
|
|
210
57
|
"""Build and cache a Tantivy search index for docs and examples.
|
|
@@ -268,13 +115,19 @@ def _build_search_index() -> tantivy.Index:
|
|
|
268
115
|
return line[2:].strip()
|
|
269
116
|
return file_path.stem
|
|
270
117
|
|
|
118
|
+
# For snippet resolution in search indexing
|
|
119
|
+
def resolve_for_indexing(content: str, file_path: Path) -> str:
|
|
120
|
+
from qtype.base.resources import _resolve_snippets
|
|
121
|
+
|
|
122
|
+
return _resolve_snippets(content, file_path, _docs_resource)
|
|
123
|
+
|
|
271
124
|
# Index documentation and examples
|
|
272
125
|
index_files(
|
|
273
126
|
docs_path,
|
|
274
127
|
"*.md",
|
|
275
128
|
"documentation",
|
|
276
129
|
"docs",
|
|
277
|
-
process_content=
|
|
130
|
+
process_content=resolve_for_indexing,
|
|
278
131
|
extract_title=extract_md_title,
|
|
279
132
|
)
|
|
280
133
|
index_files(examples_path, "*.yaml", "example", "examples")
|
qtype/semantic/visualize.py
CHANGED
|
@@ -11,7 +11,6 @@ from typing import Any
|
|
|
11
11
|
|
|
12
12
|
from pydantic import BaseModel
|
|
13
13
|
|
|
14
|
-
from qtype.dsl.model import Index
|
|
15
14
|
from qtype.semantic.model import (
|
|
16
15
|
Agent,
|
|
17
16
|
APITool,
|
|
@@ -21,6 +20,8 @@ from qtype.semantic.model import (
|
|
|
21
20
|
DocumentIndex,
|
|
22
21
|
DocumentSearch,
|
|
23
22
|
Flow,
|
|
23
|
+
Index,
|
|
24
|
+
IndexUpsert,
|
|
24
25
|
InvokeFlow,
|
|
25
26
|
InvokeTool,
|
|
26
27
|
LLMInference,
|
|
@@ -238,6 +239,12 @@ def _generate_step_node(
|
|
|
238
239
|
)
|
|
239
240
|
index_id = f"INDEX_{_sanitize_id(step.index.id)}"
|
|
240
241
|
external_connections.append(f" {node_id} -.-> {index_id}")
|
|
242
|
+
elif isinstance(step, IndexUpsert):
|
|
243
|
+
lines.append(
|
|
244
|
+
f' {node_id}@{{shape: rect, label: "💾 {step.id}"}}'
|
|
245
|
+
)
|
|
246
|
+
index_id = f"INDEX_{_sanitize_id(step.index.id)}"
|
|
247
|
+
external_connections.append(f" {node_id} -.->|writes| {index_id}")
|
|
241
248
|
else:
|
|
242
249
|
# Generic step
|
|
243
250
|
lines.append(
|
|
@@ -382,7 +389,7 @@ def _generate_shared_resources(app: Application) -> list[str]:
|
|
|
382
389
|
index_id = f"INDEX_{_sanitize_id(index.id)}"
|
|
383
390
|
if isinstance(index, VectorIndex):
|
|
384
391
|
lines.append(
|
|
385
|
-
f' {index_id}@{{shape: cyl, label: "
|
|
392
|
+
f' {index_id}@{{shape: cyl, label: "�️ {index.id}"}}'
|
|
386
393
|
)
|
|
387
394
|
# Connect to embedding model
|
|
388
395
|
emb_model_id = f"EMB_{_sanitize_id(index.embedding_model.id)}"
|
|
@@ -396,7 +403,7 @@ def _generate_shared_resources(app: Application) -> list[str]:
|
|
|
396
403
|
)
|
|
397
404
|
else:
|
|
398
405
|
lines.append(
|
|
399
|
-
f' {index_id}@{{shape: cyl, label: "
|
|
406
|
+
f' {index_id}@{{shape: cyl, label: "�️ {index.id}"}}'
|
|
400
407
|
)
|
|
401
408
|
|
|
402
409
|
if index.auth:
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: qtype
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.14
|
|
4
4
|
Summary: DSL for Generative AI Prototyping
|
|
5
5
|
Project-URL: Homepage, https://github.com/bazaarvoice/qtype
|
|
6
6
|
Author-email: Lou Kratz <lou.kratz+qtype@bazaarvoice.com>
|
|
7
7
|
License-Expression: Apache-2.0
|
|
8
8
|
License-File: LICENSE
|
|
9
9
|
Requires-Python: >=3.10
|
|
10
|
+
Requires-Dist: cachetools>=6.2.1
|
|
10
11
|
Requires-Dist: fsspec>=2025.5.1
|
|
11
12
|
Requires-Dist: google-cloud-aiplatform>=1.120.0
|
|
12
13
|
Requires-Dist: jsonschema>=4.24.0
|
|
@@ -51,7 +52,6 @@ Requires-Dist: s3fs>=2025.7.0; extra == 'interpreter'
|
|
|
51
52
|
Requires-Dist: sqlalchemy>=2.0.42; extra == 'interpreter'
|
|
52
53
|
Requires-Dist: uvicorn[standard]>=0.35.0; extra == 'interpreter'
|
|
53
54
|
Provides-Extra: mcp
|
|
54
|
-
Requires-Dist: cachetools>=6.2.1; extra == 'mcp'
|
|
55
55
|
Requires-Dist: httpx>=0.28.1; extra == 'mcp'
|
|
56
56
|
Requires-Dist: mcp[cli]>=1.25.0; extra == 'mcp'
|
|
57
57
|
Requires-Dist: tantivy>=0.25.1; extra == 'mcp'
|