qtype 0.0.16__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qtype/application/commons/tools.py +1 -1
- qtype/application/converters/tools_from_api.py +5 -5
- qtype/application/converters/tools_from_module.py +2 -2
- qtype/application/converters/types.py +14 -43
- qtype/application/documentation.py +1 -1
- qtype/application/facade.py +94 -73
- qtype/base/types.py +227 -7
- qtype/cli.py +4 -0
- qtype/commands/convert.py +20 -8
- qtype/commands/generate.py +19 -27
- qtype/commands/run.py +73 -36
- qtype/commands/serve.py +74 -54
- qtype/commands/validate.py +34 -8
- qtype/commands/visualize.py +46 -22
- qtype/dsl/__init__.py +6 -5
- qtype/dsl/custom_types.py +1 -1
- qtype/dsl/domain_types.py +65 -5
- qtype/dsl/linker.py +384 -0
- qtype/dsl/loader.py +315 -0
- qtype/dsl/model.py +612 -363
- qtype/dsl/parser.py +200 -0
- qtype/dsl/types.py +50 -0
- qtype/interpreter/api.py +57 -136
- qtype/interpreter/auth/aws.py +19 -9
- qtype/interpreter/auth/generic.py +93 -16
- qtype/interpreter/base/base_step_executor.py +436 -0
- qtype/interpreter/base/batch_step_executor.py +171 -0
- qtype/interpreter/base/exceptions.py +50 -0
- qtype/interpreter/base/executor_context.py +74 -0
- qtype/interpreter/base/factory.py +117 -0
- qtype/interpreter/base/progress_tracker.py +110 -0
- qtype/interpreter/base/secrets.py +339 -0
- qtype/interpreter/base/step_cache.py +74 -0
- qtype/interpreter/base/stream_emitter.py +469 -0
- qtype/interpreter/conversions.py +462 -22
- qtype/interpreter/converters.py +77 -0
- qtype/interpreter/endpoints.py +355 -0
- qtype/interpreter/executors/agent_executor.py +242 -0
- qtype/interpreter/executors/aggregate_executor.py +93 -0
- qtype/interpreter/executors/decoder_executor.py +163 -0
- qtype/interpreter/executors/doc_to_text_executor.py +112 -0
- qtype/interpreter/executors/document_embedder_executor.py +107 -0
- qtype/interpreter/executors/document_search_executor.py +122 -0
- qtype/interpreter/executors/document_source_executor.py +118 -0
- qtype/interpreter/executors/document_splitter_executor.py +105 -0
- qtype/interpreter/executors/echo_executor.py +63 -0
- qtype/interpreter/executors/field_extractor_executor.py +160 -0
- qtype/interpreter/executors/file_source_executor.py +101 -0
- qtype/interpreter/executors/file_writer_executor.py +110 -0
- qtype/interpreter/executors/index_upsert_executor.py +228 -0
- qtype/interpreter/executors/invoke_embedding_executor.py +92 -0
- qtype/interpreter/executors/invoke_flow_executor.py +51 -0
- qtype/interpreter/executors/invoke_tool_executor.py +358 -0
- qtype/interpreter/executors/llm_inference_executor.py +272 -0
- qtype/interpreter/executors/prompt_template_executor.py +78 -0
- qtype/interpreter/executors/sql_source_executor.py +106 -0
- qtype/interpreter/executors/vector_search_executor.py +91 -0
- qtype/interpreter/flow.py +159 -22
- qtype/interpreter/metadata_api.py +115 -0
- qtype/interpreter/resource_cache.py +5 -4
- qtype/interpreter/rich_progress.py +225 -0
- qtype/interpreter/stream/chat/__init__.py +15 -0
- qtype/interpreter/stream/chat/converter.py +391 -0
- qtype/interpreter/{chat → stream/chat}/file_conversions.py +2 -2
- qtype/interpreter/stream/chat/ui_request_to_domain_type.py +140 -0
- qtype/interpreter/stream/chat/vercel.py +609 -0
- qtype/interpreter/stream/utils/__init__.py +15 -0
- qtype/interpreter/stream/utils/build_vercel_ai_formatter.py +74 -0
- qtype/interpreter/stream/utils/callback_to_stream.py +66 -0
- qtype/interpreter/stream/utils/create_streaming_response.py +18 -0
- qtype/interpreter/stream/utils/default_chat_extract_text.py +20 -0
- qtype/interpreter/stream/utils/error_streaming_response.py +20 -0
- qtype/interpreter/telemetry.py +135 -8
- qtype/interpreter/tools/__init__.py +5 -0
- qtype/interpreter/tools/function_tool_helper.py +265 -0
- qtype/interpreter/types.py +330 -0
- qtype/interpreter/typing.py +83 -89
- qtype/interpreter/ui/404/index.html +1 -1
- qtype/interpreter/ui/404.html +1 -1
- qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_buildManifest.js +1 -1
- qtype/interpreter/ui/_next/static/chunks/{393-8fd474427f8e19ce.js → 434-b2112d19f25c44ff.js} +3 -3
- qtype/interpreter/ui/_next/static/chunks/app/page-8c67d16ac90d23cb.js +1 -0
- qtype/interpreter/ui/_next/static/chunks/ba12c10f-546f2714ff8abc66.js +1 -0
- qtype/interpreter/ui/_next/static/css/8a8d1269e362fef7.css +3 -0
- qtype/interpreter/ui/icon.png +0 -0
- qtype/interpreter/ui/index.html +1 -1
- qtype/interpreter/ui/index.txt +4 -4
- qtype/semantic/checker.py +583 -0
- qtype/semantic/generate.py +262 -83
- qtype/semantic/loader.py +95 -0
- qtype/semantic/model.py +436 -159
- qtype/semantic/resolver.py +63 -19
- qtype/semantic/visualize.py +28 -31
- {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/METADATA +16 -3
- qtype-0.1.1.dist-info/RECORD +135 -0
- qtype/dsl/base_types.py +0 -38
- qtype/dsl/validator.py +0 -465
- qtype/interpreter/batch/__init__.py +0 -0
- qtype/interpreter/batch/file_sink_source.py +0 -162
- qtype/interpreter/batch/flow.py +0 -95
- qtype/interpreter/batch/sql_source.py +0 -92
- qtype/interpreter/batch/step.py +0 -74
- qtype/interpreter/batch/types.py +0 -41
- qtype/interpreter/batch/utils.py +0 -178
- qtype/interpreter/chat/chat_api.py +0 -237
- qtype/interpreter/chat/vercel.py +0 -314
- qtype/interpreter/exceptions.py +0 -10
- qtype/interpreter/step.py +0 -67
- qtype/interpreter/steps/__init__.py +0 -0
- qtype/interpreter/steps/agent.py +0 -114
- qtype/interpreter/steps/condition.py +0 -36
- qtype/interpreter/steps/decoder.py +0 -88
- qtype/interpreter/steps/llm_inference.py +0 -171
- qtype/interpreter/steps/prompt_template.py +0 -54
- qtype/interpreter/steps/search.py +0 -24
- qtype/interpreter/steps/tool.py +0 -219
- qtype/interpreter/streaming_helpers.py +0 -123
- qtype/interpreter/ui/_next/static/chunks/app/page-7e26b6156cfb55d3.js +0 -1
- qtype/interpreter/ui/_next/static/chunks/ba12c10f-22556063851a6df2.js +0 -1
- qtype/interpreter/ui/_next/static/css/b40532b0db09cce3.css +0 -3
- qtype/interpreter/ui/favicon.ico +0 -0
- qtype/loader.py +0 -390
- qtype-0.0.16.dist-info/RECORD +0 -106
- /qtype/interpreter/ui/_next/static/{nUaw6_IwRwPqkzwe5s725 → 20HoJN6otZ_LyHLHpCPE6}/_ssgManifest.js +0 -0
- {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/WHEEL +0 -0
- {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/entry_points.txt +0 -0
- {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {qtype-0.0.16.dist-info → qtype-0.1.1.dist-info}/top_level.txt +0 -0
qtype/interpreter/batch/flow.py
DELETED
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
from typing import Any, Tuple
|
|
5
|
-
|
|
6
|
-
import pandas as pd
|
|
7
|
-
|
|
8
|
-
from qtype.interpreter.batch.step import batch_execute_step
|
|
9
|
-
from qtype.interpreter.batch.types import BatchConfig
|
|
10
|
-
from qtype.interpreter.batch.utils import reconcile_results_and_errors
|
|
11
|
-
from qtype.semantic.model import Flow, Sink
|
|
12
|
-
|
|
13
|
-
logger = logging.getLogger(__name__)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def batch_execute_flow(
|
|
17
|
-
flow: Flow,
|
|
18
|
-
inputs: pd.DataFrame,
|
|
19
|
-
batch_config: BatchConfig,
|
|
20
|
-
**kwargs: dict[Any, Any],
|
|
21
|
-
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
22
|
-
"""Executes a flow in a batch context.
|
|
23
|
-
|
|
24
|
-
Args:
|
|
25
|
-
flow: The flow to execute.
|
|
26
|
-
batch_config: The batch configuration to use.
|
|
27
|
-
**kwargs: Additional keyword arguments to pass to the flow.
|
|
28
|
-
|
|
29
|
-
Returns:
|
|
30
|
-
A list of output variables produced by the flow.
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
previous_outputs = inputs
|
|
34
|
-
|
|
35
|
-
all_errors = []
|
|
36
|
-
|
|
37
|
-
# Iterate over each step in the flow
|
|
38
|
-
for step in flow.steps:
|
|
39
|
-
results: list[pd.DataFrame] = []
|
|
40
|
-
errors: list[pd.DataFrame] = []
|
|
41
|
-
|
|
42
|
-
if isinstance(step, Sink):
|
|
43
|
-
# Send the entire batch to the sink
|
|
44
|
-
batch_results, batch_errors = batch_execute_step(
|
|
45
|
-
step, previous_outputs, batch_config
|
|
46
|
-
)
|
|
47
|
-
results.append(batch_results)
|
|
48
|
-
if len(batch_errors) > 1:
|
|
49
|
-
errors.append(batch_errors)
|
|
50
|
-
else:
|
|
51
|
-
# batch the current data into dataframes of max size batch_size
|
|
52
|
-
batch_size = batch_config.batch_size
|
|
53
|
-
for start in range(0, len(previous_outputs), batch_size):
|
|
54
|
-
end = start + batch_size
|
|
55
|
-
batch = previous_outputs.iloc[start:end].copy()
|
|
56
|
-
# Execute the step with the current batch
|
|
57
|
-
batch_results, batch_errors = batch_execute_step(
|
|
58
|
-
step, batch, batch_config
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
results.append(batch_results)
|
|
62
|
-
if len(batch_errors) > 1:
|
|
63
|
-
errors.append(batch_errors)
|
|
64
|
-
|
|
65
|
-
previous_outputs, errors_df = reconcile_results_and_errors(
|
|
66
|
-
results, errors
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
if len(errors_df):
|
|
70
|
-
all_errors.append(errors_df)
|
|
71
|
-
if batch_config.write_errors_to:
|
|
72
|
-
output_file = (
|
|
73
|
-
f"{batch_config.write_errors_to}/{step.id}.errors.parquet"
|
|
74
|
-
)
|
|
75
|
-
try:
|
|
76
|
-
errors_df.to_parquet(
|
|
77
|
-
output_file, engine="pyarrow", compression="snappy"
|
|
78
|
-
)
|
|
79
|
-
logging.info(
|
|
80
|
-
f"Saved errors for step {step.id} to {output_file}"
|
|
81
|
-
)
|
|
82
|
-
except Exception as e:
|
|
83
|
-
logging.warning(
|
|
84
|
-
f"Could not save errors step {step.id} to {output_file}",
|
|
85
|
-
exc_info=e,
|
|
86
|
-
stack_info=True,
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
# Return the last steps results and errors
|
|
90
|
-
rv_errors = (
|
|
91
|
-
pd.concat(all_errors, ignore_index=True)
|
|
92
|
-
if len(all_errors)
|
|
93
|
-
else pd.DataFrame({})
|
|
94
|
-
)
|
|
95
|
-
return previous_outputs, rv_errors
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
from typing import Any, Tuple
|
|
2
|
-
|
|
3
|
-
import boto3 # type: ignore[import-untyped]
|
|
4
|
-
import pandas as pd
|
|
5
|
-
import sqlalchemy
|
|
6
|
-
from sqlalchemy import create_engine
|
|
7
|
-
from sqlalchemy.exc import SQLAlchemyError
|
|
8
|
-
|
|
9
|
-
from qtype.base.exceptions import InterpreterError
|
|
10
|
-
from qtype.interpreter.auth.generic import auth
|
|
11
|
-
from qtype.interpreter.batch.types import BatchConfig, ErrorMode
|
|
12
|
-
from qtype.interpreter.batch.utils import reconcile_results_and_errors
|
|
13
|
-
from qtype.semantic.model import SQLSource
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def to_output_columns(
|
|
17
|
-
df: pd.DataFrame, output_columns: set[str]
|
|
18
|
-
) -> pd.DataFrame:
|
|
19
|
-
"""Filters the DataFrame to only include specified output columns.
|
|
20
|
-
|
|
21
|
-
Args:
|
|
22
|
-
df: The input DataFrame.
|
|
23
|
-
output_columns: A set of column names to retain in the DataFrame.
|
|
24
|
-
|
|
25
|
-
Returns:
|
|
26
|
-
A DataFrame containing only the specified output columns.
|
|
27
|
-
"""
|
|
28
|
-
if len(df) == 0:
|
|
29
|
-
return df
|
|
30
|
-
missing = output_columns - set(df.columns)
|
|
31
|
-
if missing:
|
|
32
|
-
raise InterpreterError(
|
|
33
|
-
f"SQL Result was missing expected columns: {','.join(missing)}, it has columns: {','.join(df.columns)}"
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
return df[[col for col in df.columns if col in output_columns]]
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def execute_sql_source(
|
|
40
|
-
step: SQLSource,
|
|
41
|
-
inputs: pd.DataFrame,
|
|
42
|
-
batch_config: BatchConfig,
|
|
43
|
-
**kwargs: dict[Any, Any],
|
|
44
|
-
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
45
|
-
"""Executes a SQLSource step to retrieve data from a SQL database.
|
|
46
|
-
|
|
47
|
-
Args:
|
|
48
|
-
step: The SQLSource step to execute.
|
|
49
|
-
|
|
50
|
-
Returns:
|
|
51
|
-
A tuple containing two DataFrames:
|
|
52
|
-
- The first DataFrame contains the successfully retrieved data.
|
|
53
|
-
- The second DataFrame contains rows that encountered errors with an 'error' column.
|
|
54
|
-
"""
|
|
55
|
-
# Create a database engine
|
|
56
|
-
connect_args = {}
|
|
57
|
-
if step.auth:
|
|
58
|
-
with auth(step.auth) as creds:
|
|
59
|
-
if isinstance(creds, boto3.Session):
|
|
60
|
-
connect_args["session"] = creds
|
|
61
|
-
engine = create_engine(step.connection, connect_args=connect_args)
|
|
62
|
-
|
|
63
|
-
output_columns = {output.id for output in step.outputs}
|
|
64
|
-
|
|
65
|
-
results = []
|
|
66
|
-
errors = []
|
|
67
|
-
step_inputs = {i.id for i in step.inputs}
|
|
68
|
-
for _, row in inputs.iterrows():
|
|
69
|
-
try:
|
|
70
|
-
# Make a dictionary of column_name: value from row
|
|
71
|
-
params = {col: row[col] for col in row.index if col in step_inputs}
|
|
72
|
-
# Execute the query and fetch the results into a DataFrame
|
|
73
|
-
with engine.connect() as connection:
|
|
74
|
-
result = connection.execute(
|
|
75
|
-
sqlalchemy.text(step.query),
|
|
76
|
-
parameters=params if len(params) else None,
|
|
77
|
-
)
|
|
78
|
-
df = pd.DataFrame(
|
|
79
|
-
result.fetchall(), columns=list(result.keys())
|
|
80
|
-
)
|
|
81
|
-
df = to_output_columns(df, output_columns)
|
|
82
|
-
# Augment with all input row columns (fan-out-right)
|
|
83
|
-
df = df.assign(**row.to_dict())
|
|
84
|
-
results.append(df)
|
|
85
|
-
except SQLAlchemyError as e:
|
|
86
|
-
if batch_config.error_mode == ErrorMode.FAIL:
|
|
87
|
-
raise e
|
|
88
|
-
# If there's an error, return an empty DataFrame and the error message
|
|
89
|
-
error_df = pd.DataFrame([{"error": str(e)}])
|
|
90
|
-
errors.append(error_df)
|
|
91
|
-
|
|
92
|
-
return reconcile_results_and_errors(results, errors)
|
qtype/interpreter/batch/step.py
DELETED
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
from functools import partial
|
|
2
|
-
from typing import Any, Tuple
|
|
3
|
-
|
|
4
|
-
import pandas as pd
|
|
5
|
-
|
|
6
|
-
from qtype.interpreter.batch.file_sink_source import (
|
|
7
|
-
execute_file_sink,
|
|
8
|
-
execute_file_source,
|
|
9
|
-
)
|
|
10
|
-
from qtype.interpreter.batch.sql_source import execute_sql_source
|
|
11
|
-
from qtype.interpreter.batch.types import BatchConfig
|
|
12
|
-
from qtype.interpreter.batch.utils import (
|
|
13
|
-
batch_iterator,
|
|
14
|
-
single_step_adapter,
|
|
15
|
-
validate_inputs,
|
|
16
|
-
)
|
|
17
|
-
from qtype.interpreter.exceptions import InterpreterError
|
|
18
|
-
from qtype.semantic.model import (
|
|
19
|
-
Condition,
|
|
20
|
-
Decoder,
|
|
21
|
-
FileSink,
|
|
22
|
-
FileSource,
|
|
23
|
-
Flow,
|
|
24
|
-
PromptTemplate,
|
|
25
|
-
Search,
|
|
26
|
-
SQLSource,
|
|
27
|
-
Step,
|
|
28
|
-
Tool,
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
SINGLE_WRAP_STEPS = {Decoder, Condition, PromptTemplate, Search, Tool}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def batch_execute_step(
|
|
35
|
-
step: Step,
|
|
36
|
-
inputs: pd.DataFrame,
|
|
37
|
-
batch_config: BatchConfig,
|
|
38
|
-
**kwargs: dict[str, Any],
|
|
39
|
-
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
40
|
-
"""
|
|
41
|
-
Executes a given step in a batch processing pipeline.
|
|
42
|
-
|
|
43
|
-
Args:
|
|
44
|
-
step (Step): The step to be executed.
|
|
45
|
-
inputs (pd.DataFrame): The input data for the step.
|
|
46
|
-
batch_config (BatchConfig): Configuration for batch processing.
|
|
47
|
-
**kwargs: Additional keyword arguments.
|
|
48
|
-
|
|
49
|
-
Returns:
|
|
50
|
-
Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the output results and any rows that returned errors.
|
|
51
|
-
"""
|
|
52
|
-
|
|
53
|
-
# ensure the inputs to step are included in the current data frame
|
|
54
|
-
validate_inputs(inputs, step)
|
|
55
|
-
|
|
56
|
-
if isinstance(step, Flow):
|
|
57
|
-
from qtype.interpreter.batch.flow import batch_execute_flow
|
|
58
|
-
|
|
59
|
-
return batch_execute_flow(step, inputs, batch_config, **kwargs)
|
|
60
|
-
elif isinstance(step, SQLSource):
|
|
61
|
-
return execute_sql_source(step, inputs, batch_config, **kwargs)
|
|
62
|
-
elif isinstance(step, FileSource):
|
|
63
|
-
return execute_file_source(step, inputs, batch_config, **kwargs)
|
|
64
|
-
elif isinstance(step, FileSink):
|
|
65
|
-
return execute_file_sink(step, inputs, batch_config, **kwargs)
|
|
66
|
-
elif type(step) in SINGLE_WRAP_STEPS:
|
|
67
|
-
return batch_iterator(
|
|
68
|
-
f=partial(single_step_adapter, step=step),
|
|
69
|
-
batch=inputs,
|
|
70
|
-
batch_config=batch_config,
|
|
71
|
-
)
|
|
72
|
-
# TODO: implement batching for multi-row steps. For example, llm inference can be sped up in batch...
|
|
73
|
-
else:
|
|
74
|
-
raise InterpreterError(f"Unsupported step type: {type(step).__name__}")
|
qtype/interpreter/batch/types.py
DELETED
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from enum import Enum
|
|
4
|
-
|
|
5
|
-
from pydantic import BaseModel, Field
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class ErrorMode(str, Enum):
|
|
9
|
-
"""Error handling mode for batch processing."""
|
|
10
|
-
|
|
11
|
-
FAIL = "fail"
|
|
12
|
-
DROP = "drop"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class BatchConfig(BaseModel):
|
|
16
|
-
"""Configuration for batch execution.
|
|
17
|
-
|
|
18
|
-
Attributes:
|
|
19
|
-
num_workers: Number of async workers for batch operations.
|
|
20
|
-
batch_size: Maximum number of rows to send to a step at a time.
|
|
21
|
-
error_mode: Error handling mode for batch processing.
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
num_workers: int = Field(
|
|
25
|
-
default=4,
|
|
26
|
-
description="Number of async workers for batch operations",
|
|
27
|
-
gt=0,
|
|
28
|
-
)
|
|
29
|
-
batch_size: int = Field(
|
|
30
|
-
default=512,
|
|
31
|
-
description="Max number of rows to send to a step at a time",
|
|
32
|
-
gt=0,
|
|
33
|
-
)
|
|
34
|
-
error_mode: ErrorMode = Field(
|
|
35
|
-
default=ErrorMode.FAIL,
|
|
36
|
-
description="Error handling mode for batch processing",
|
|
37
|
-
)
|
|
38
|
-
write_errors_to: str | None = Field(
|
|
39
|
-
default=None,
|
|
40
|
-
description="If error mode is DROP, the errors for any step are saved to this directory",
|
|
41
|
-
)
|
qtype/interpreter/batch/utils.py
DELETED
|
@@ -1,178 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import copy
|
|
4
|
-
from typing import Any, Callable, Tuple
|
|
5
|
-
|
|
6
|
-
import pandas as pd
|
|
7
|
-
|
|
8
|
-
from qtype.interpreter.batch.types import BatchConfig, ErrorMode
|
|
9
|
-
from qtype.semantic.model import Step
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class InputMissingError(Exception):
|
|
13
|
-
"""Raised when a required input variable is missing from the DataFrame."""
|
|
14
|
-
|
|
15
|
-
pass
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def validate_inputs(batch: pd.DataFrame, step: Step) -> None:
|
|
19
|
-
"""Ensures all input variables for the step are columns in the DataFrame.
|
|
20
|
-
If not, an Exception is raised.
|
|
21
|
-
|
|
22
|
-
Args:
|
|
23
|
-
batch: The input DataFrame to decode.
|
|
24
|
-
step: The step to validate.
|
|
25
|
-
Raises:
|
|
26
|
-
InputMissingError: If any input variable is missing from the DataFrame.
|
|
27
|
-
"""
|
|
28
|
-
input_ids = [input_var.id for input_var in step.inputs]
|
|
29
|
-
for input_var in input_ids:
|
|
30
|
-
if input_var not in batch.columns:
|
|
31
|
-
raise InputMissingError(
|
|
32
|
-
f"Input DataFrame must contain column '{input_var}' for step {step.id}."
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def fail_mode_wrapper(
|
|
37
|
-
f: Callable[..., dict[str, Any]],
|
|
38
|
-
row: pd.Series,
|
|
39
|
-
batch_config: BatchConfig,
|
|
40
|
-
**kwargs: dict[str, Any],
|
|
41
|
-
) -> dict | Exception:
|
|
42
|
-
"""Executes a function with error handling based on the batch configuration.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
f: The function to execute that can take any arguments and returns a dict of results.
|
|
46
|
-
row: The input row as a dictionary.
|
|
47
|
-
batch_config: Configuration for error handling.
|
|
48
|
-
**kwargs: Additional keyword arguments.
|
|
49
|
-
|
|
50
|
-
Returns:
|
|
51
|
-
The result of the function or an Exception if an error occurs and the error mode is set to CONTINUE.
|
|
52
|
-
"""
|
|
53
|
-
try:
|
|
54
|
-
# turn row into a dict and merge with kwargs
|
|
55
|
-
merged_kwargs = {**row.to_dict(), **kwargs}
|
|
56
|
-
return {**f(**merged_kwargs), **row.to_dict()}
|
|
57
|
-
except Exception as e:
|
|
58
|
-
if batch_config.error_mode == ErrorMode.FAIL:
|
|
59
|
-
raise e
|
|
60
|
-
else:
|
|
61
|
-
return e
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def single_step_adapter(
|
|
65
|
-
step: Step, **inputs: dict[str, Any]
|
|
66
|
-
) -> dict[str, Any]:
|
|
67
|
-
"""A batch adapter for steps that have no side effects or access shared resources."""
|
|
68
|
-
from qtype.interpreter.step import execute_step
|
|
69
|
-
|
|
70
|
-
step_clone = copy.deepcopy(step)
|
|
71
|
-
for input_var in step_clone.inputs:
|
|
72
|
-
if input_var.id in inputs:
|
|
73
|
-
input_var.value = inputs[input_var.id]
|
|
74
|
-
else:
|
|
75
|
-
raise ValueError(
|
|
76
|
-
f"Input variable '{input_var.id}' not found in inputs."
|
|
77
|
-
)
|
|
78
|
-
execute_step(step_clone)
|
|
79
|
-
return {
|
|
80
|
-
output_var.id: output_var.value for output_var in step_clone.outputs
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def to_series(
|
|
85
|
-
rv: dict | Exception, error_col_name: str = "error"
|
|
86
|
-
) -> pd.Series:
|
|
87
|
-
# If rv is an exception, return a series with index "error"
|
|
88
|
-
if isinstance(rv, Exception):
|
|
89
|
-
return pd.Series({error_col_name: str(rv)})
|
|
90
|
-
return pd.Series(rv) # type: ignore[no-any-return]
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def batch_iterator(
|
|
94
|
-
f: Callable[..., dict[str, Any]],
|
|
95
|
-
batch: pd.DataFrame,
|
|
96
|
-
batch_config: BatchConfig,
|
|
97
|
-
**kwargs: Any,
|
|
98
|
-
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
99
|
-
"""Executes a step over a batch of inputs with error handling.
|
|
100
|
-
|
|
101
|
-
Args:
|
|
102
|
-
step: The step to execute.
|
|
103
|
-
batch: The input DataFrame to process.
|
|
104
|
-
batch_config: Configuration for error handling.
|
|
105
|
-
**kwargs: Additional keyword arguments to pass to the step.
|
|
106
|
-
|
|
107
|
-
Returns:
|
|
108
|
-
A tuple containing two DataFrames:
|
|
109
|
-
- The first DataFrame contains successful results with output columns.
|
|
110
|
-
- The second DataFrame contains rows that encountered errors with an 'error' column.
|
|
111
|
-
"""
|
|
112
|
-
|
|
113
|
-
# Use a unique column name for errors
|
|
114
|
-
error_col = "error_" + str(id(f))
|
|
115
|
-
|
|
116
|
-
# If error_col is already in the dataframe, throw an exception
|
|
117
|
-
if error_col in batch.columns:
|
|
118
|
-
raise ValueError(
|
|
119
|
-
f"Error column name '{error_col}' already exists in the batch DataFrame."
|
|
120
|
-
)
|
|
121
|
-
|
|
122
|
-
def the_pipe(row: pd.Series) -> pd.Series:
|
|
123
|
-
return to_series(
|
|
124
|
-
fail_mode_wrapper(f, row, batch_config=batch_config, **kwargs),
|
|
125
|
-
error_col_name=error_col,
|
|
126
|
-
)
|
|
127
|
-
|
|
128
|
-
results = batch.apply(the_pipe, axis=1)
|
|
129
|
-
|
|
130
|
-
# If error column doesn't exist, add it with NaN values
|
|
131
|
-
if error_col not in results.columns:
|
|
132
|
-
results[error_col] = pd.NA
|
|
133
|
-
|
|
134
|
-
# Split the results into two dataframes, one where error_col is not defined, and one where it is.
|
|
135
|
-
success_mask = ~results[error_col].notna()
|
|
136
|
-
failed_mask = results[error_col].notna()
|
|
137
|
-
|
|
138
|
-
# Create success DataFrame (drop the error column)
|
|
139
|
-
success_df = results[success_mask].drop(columns=[error_col])
|
|
140
|
-
|
|
141
|
-
# Create failed DataFrame (keep only original columns plus error)
|
|
142
|
-
original_columns = batch.columns.tolist()
|
|
143
|
-
|
|
144
|
-
if failed_mask.any():
|
|
145
|
-
failed_df = results[failed_mask]
|
|
146
|
-
# Drop all the output columns from failed_df, keep only original input columns + error
|
|
147
|
-
failed_df = failed_df[original_columns + [error_col]]
|
|
148
|
-
else:
|
|
149
|
-
# No failed rows, create empty DataFrame with expected structure
|
|
150
|
-
failed_df = pd.DataFrame(columns=original_columns + [error_col])
|
|
151
|
-
|
|
152
|
-
return success_df, failed_df
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
def reconcile_results_and_errors(
|
|
156
|
-
results: list[pd.DataFrame], errors: list[pd.DataFrame]
|
|
157
|
-
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
158
|
-
"""
|
|
159
|
-
Concatenates lists of pandas DataFrames containing results and errors into single DataFrames.
|
|
160
|
-
|
|
161
|
-
If the input lists are empty, creates empty DataFrames as placeholders.
|
|
162
|
-
|
|
163
|
-
Args:
|
|
164
|
-
results (list[pd.DataFrame]): List of DataFrames containing results.
|
|
165
|
-
errors (list[pd.DataFrame]): List of DataFrames containing errors.
|
|
166
|
-
|
|
167
|
-
Returns:
|
|
168
|
-
Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing:
|
|
169
|
-
- A single DataFrame with all results concatenated.
|
|
170
|
-
- A single DataFrame with all errors concatenated.
|
|
171
|
-
"""
|
|
172
|
-
if not results:
|
|
173
|
-
results = [pd.DataFrame({})]
|
|
174
|
-
if not errors:
|
|
175
|
-
errors = [pd.DataFrame({})]
|
|
176
|
-
return pd.concat(results, ignore_index=True), pd.concat(
|
|
177
|
-
errors, ignore_index=True
|
|
178
|
-
)
|