outerproduct-http-types 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/PKG-INFO +1 -1
- {outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/pyproject.toml +1 -1
- {outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/src/outerproduct_http_types/__init__.py +22 -4
- {outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/src/outerproduct_http_types/common.py +3 -1
- {outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/src/outerproduct_http_types/inference.py +110 -4
- outerproduct_http_types-0.3.0/src/outerproduct_http_types/patterns.py +213 -0
- {outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/src/outerproduct_http_types/reasoning.py +39 -26
- {outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/src/outerproduct_http_types/trainer.py +57 -26
- outerproduct_http_types-0.3.0/src/outerproduct_http_types/uploads.py +58 -0
- outerproduct_http_types-0.2.0/src/outerproduct_http_types/segment.py +0 -59
- outerproduct_http_types-0.2.0/src/outerproduct_http_types/uploads.py +0 -34
- {outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/.gitignore +0 -0
- {outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/LICENSE +0 -0
- {outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/README.md +0 -0
- {outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/src/outerproduct_http_types/agentic_documents.py +0 -0
- {outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/src/outerproduct_http_types/connectors.py +0 -0
- {outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/src/outerproduct_http_types/py.typed +0 -0
- {outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/src/outerproduct_http_types/workers.py +0 -0
|
@@ -47,8 +47,19 @@ from .inference import (
|
|
|
47
47
|
ScenarioResponse,
|
|
48
48
|
ScenarioResultItem,
|
|
49
49
|
)
|
|
50
|
+
from .patterns import (
|
|
51
|
+
FilterPatternSchema,
|
|
52
|
+
PatternTrackerApplyRequest,
|
|
53
|
+
PatternTrackerDistributionResponse,
|
|
54
|
+
PatternTrackerFitRequest,
|
|
55
|
+
PatternTrackerFitResponse,
|
|
56
|
+
PatternTrackerPartitionResponse,
|
|
57
|
+
PatternTrackerResponse,
|
|
58
|
+
PatternTrackerTransformResponse,
|
|
59
|
+
PredicateSchema,
|
|
60
|
+
SchemaInfoSchema,
|
|
61
|
+
)
|
|
50
62
|
from .reasoning import ReasoningFitRequest, ReasoningFitResponse
|
|
51
|
-
from .segment import ClusterPersonaSchema, SegmentRequest, SegmentResultResponse
|
|
52
63
|
from .trainer import (
|
|
53
64
|
HardwareSpec,
|
|
54
65
|
ModalHardwareSpec,
|
|
@@ -62,7 +73,6 @@ __version__ = "0.1.0"
|
|
|
62
73
|
|
|
63
74
|
__all__ = [
|
|
64
75
|
"AnswerType",
|
|
65
|
-
"ClusterPersonaSchema",
|
|
66
76
|
"ConnectorResponse",
|
|
67
77
|
"ConnectorType",
|
|
68
78
|
"CreateConnectorRequest",
|
|
@@ -88,6 +98,16 @@ __all__ = [
|
|
|
88
98
|
"ListTablesRequest",
|
|
89
99
|
"ListTablesResponse",
|
|
90
100
|
"ModalHardwareSpec",
|
|
101
|
+
"PatternTrackerApplyRequest",
|
|
102
|
+
"PatternTrackerDistributionResponse",
|
|
103
|
+
"PatternTrackerFitRequest",
|
|
104
|
+
"PatternTrackerFitResponse",
|
|
105
|
+
"PatternTrackerPartitionResponse",
|
|
106
|
+
"PatternTrackerResponse",
|
|
107
|
+
"PatternTrackerTransformResponse",
|
|
108
|
+
"PredicateSchema",
|
|
109
|
+
"FilterPatternSchema",
|
|
110
|
+
"SchemaInfoSchema",
|
|
91
111
|
"PredictAndExplainRequest",
|
|
92
112
|
"PredictAndExplainResponse",
|
|
93
113
|
"PredictRequest",
|
|
@@ -102,8 +122,6 @@ __all__ = [
|
|
|
102
122
|
"ScenarioResultItem",
|
|
103
123
|
"Schema",
|
|
104
124
|
"SchemaResultResponse",
|
|
105
|
-
"SegmentRequest",
|
|
106
|
-
"SegmentResultResponse",
|
|
107
125
|
"StatusResponse",
|
|
108
126
|
"TabularizeJobResponse",
|
|
109
127
|
"TabularizeRequest",
|
|
@@ -31,7 +31,9 @@ class StatusResponse(BaseModel):
|
|
|
31
31
|
"""Returned by GET /models/{model_id}/status."""
|
|
32
32
|
|
|
33
33
|
model_id: str
|
|
34
|
-
job_type: str = Field(
|
|
34
|
+
job_type: str = Field(
|
|
35
|
+
description="One of: trainer_run, reasoning_fit, patterns_fit:<tracker_id>."
|
|
36
|
+
)
|
|
35
37
|
status: JobStatus
|
|
36
38
|
progress: dict[str, Any] | None = Field(
|
|
37
39
|
None, description='Progress info, e.g. {"step": 3, "total_steps": 5}'
|
|
@@ -2,7 +2,49 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import Any, Literal
|
|
4
4
|
|
|
5
|
-
from pydantic import BaseModel, ConfigDict, Field
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
6
|
+
|
|
7
|
+
# --- Request size caps ---------------------------------------------------- #
|
|
8
|
+
# Tune in review. Defined as module-level constants so callers and tests can
|
|
9
|
+
# reference them, and so the team can adjust without hunting for the values.
|
|
10
|
+
|
|
11
|
+
MAX_PREDICT_ROWS = 100_000
|
|
12
|
+
"""Hard cap on `samples` length for /predict requests."""
|
|
13
|
+
|
|
14
|
+
MAX_EXPLAIN_ROWS = 10_000
|
|
15
|
+
"""Hard cap on `samples` length for /explain and /predict_and_explain.
|
|
16
|
+
|
|
17
|
+
Lower than /predict because explanation computes per-row feature
|
|
18
|
+
attributions, which are roughly an order of magnitude more expensive
|
|
19
|
+
than prediction.
|
|
20
|
+
|
|
21
|
+
Note: real per-call cost also depends on training-time choices made
|
|
22
|
+
when the model was fit (cached sample counts, etc.). This row cap is
|
|
23
|
+
tight only when training stayed near defaults; models trained with
|
|
24
|
+
large internal caches can make explain expensive even within this
|
|
25
|
+
budget."""
|
|
26
|
+
|
|
27
|
+
MAX_SCENARIO_QUERIES = 100
|
|
28
|
+
"""Hard cap on `queries` length for /scenario.
|
|
29
|
+
|
|
30
|
+
Each query runs `n_walks` random walks of up to `max_steps` steps; total
|
|
31
|
+
work scales as n_queries x n_walks x max_steps."""
|
|
32
|
+
|
|
33
|
+
MAX_SCENARIO_WALK_BUDGET = 50_000
|
|
34
|
+
"""Hard cap on `n_walks x max_steps` per request.
|
|
35
|
+
|
|
36
|
+
Bounds the per-query inner work regardless of how the user splits the
|
|
37
|
+
budget between width (more walks) and depth (more steps)."""
|
|
38
|
+
|
|
39
|
+
# TODO(reviewers): consider adding MAX_SCENARIO_TOTAL_EXPLAINS to bound the
|
|
40
|
+
# aggregate explain work across a scenario request. Each walk step calls
|
|
41
|
+
# explain() on currently-active walks; worst-case totals (100 queries ×
|
|
42
|
+
# 500 walks × ~30 steps) can run for minutes of wall-clock even though
|
|
43
|
+
# each individual explain is small. A cap like
|
|
44
|
+
# MAX_SCENARIO_QUERIES * default n_walks would prevent the worst pile-ups
|
|
45
|
+
# without affecting typical usage. Not enforced today — wait until we see
|
|
46
|
+
# real timeouts before adding it.
|
|
47
|
+
|
|
6
48
|
|
|
7
49
|
# --- POST /v1/models/{model_id}/predict ---
|
|
8
50
|
|
|
@@ -19,6 +61,16 @@ class PredictRequest(BaseModel):
|
|
|
19
61
|
"schema in name and order."
|
|
20
62
|
)
|
|
21
63
|
|
|
64
|
+
@model_validator(mode="after")
|
|
65
|
+
def _check_sample_count(self):
|
|
66
|
+
if len(self.samples) > MAX_PREDICT_ROWS:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f"samples has {len(self.samples)} rows; the per-request cap is "
|
|
69
|
+
f"{MAX_PREDICT_ROWS}. Batch the request or use a connector-backed "
|
|
70
|
+
"workflow."
|
|
71
|
+
)
|
|
72
|
+
return self
|
|
73
|
+
|
|
22
74
|
|
|
23
75
|
class PredictResponse(BaseModel):
|
|
24
76
|
model_id: str
|
|
@@ -40,6 +92,15 @@ class ExplainRequest(BaseModel):
|
|
|
40
92
|
"schema in name and order."
|
|
41
93
|
)
|
|
42
94
|
|
|
95
|
+
@model_validator(mode="after")
|
|
96
|
+
def _check_sample_count(self):
|
|
97
|
+
if len(self.samples) > MAX_EXPLAIN_ROWS:
|
|
98
|
+
raise ValueError(
|
|
99
|
+
f"samples has {len(self.samples)} rows; the per-request cap for "
|
|
100
|
+
f"/explain is {MAX_EXPLAIN_ROWS}."
|
|
101
|
+
)
|
|
102
|
+
return self
|
|
103
|
+
|
|
43
104
|
|
|
44
105
|
class ExplainResponse(BaseModel):
|
|
45
106
|
"""Batch-shaped explanation arrays. Dimension 0 is the batch.
|
|
@@ -88,6 +149,15 @@ class PredictAndExplainRequest(BaseModel):
|
|
|
88
149
|
),
|
|
89
150
|
)
|
|
90
151
|
|
|
152
|
+
@model_validator(mode="after")
|
|
153
|
+
def _check_sample_count(self):
|
|
154
|
+
if len(self.samples) > MAX_EXPLAIN_ROWS:
|
|
155
|
+
raise ValueError(
|
|
156
|
+
f"samples has {len(self.samples)} rows; the per-request cap for "
|
|
157
|
+
f"/predict_and_explain is {MAX_EXPLAIN_ROWS}."
|
|
158
|
+
)
|
|
159
|
+
return self
|
|
160
|
+
|
|
91
161
|
|
|
92
162
|
class PredictAndExplainResponse(BaseModel):
|
|
93
163
|
"""Batch-shaped predict + explain arrays. Dimension 0 is the batch."""
|
|
@@ -155,6 +225,27 @@ class FeatureConstraintSchema(BaseModel):
|
|
|
155
225
|
value_range: tuple[float | None, float | None] | None = None
|
|
156
226
|
allowed_values: list[Any] | None = None
|
|
157
227
|
|
|
228
|
+
@model_validator(mode="after")
|
|
229
|
+
def _check_invariants(self):
|
|
230
|
+
if self.immutable and (
|
|
231
|
+
self.monotonic is not None
|
|
232
|
+
or self.value_range is not None
|
|
233
|
+
or self.allowed_values is not None
|
|
234
|
+
):
|
|
235
|
+
raise ValueError(
|
|
236
|
+
"immutable=True cannot be combined with monotonic, value_range, "
|
|
237
|
+
"or allowed_values."
|
|
238
|
+
)
|
|
239
|
+
if self.value_range is not None:
|
|
240
|
+
lo, hi = self.value_range
|
|
241
|
+
if lo is not None and hi is not None and lo > hi:
|
|
242
|
+
raise ValueError(
|
|
243
|
+
f"value_range lower bound {lo} exceeds upper bound {hi}."
|
|
244
|
+
)
|
|
245
|
+
if self.allowed_values is not None and len(self.allowed_values) == 0:
|
|
246
|
+
raise ValueError("allowed_values must be non-empty when provided.")
|
|
247
|
+
return self
|
|
248
|
+
|
|
158
249
|
|
|
159
250
|
class ScenarioRequest(BaseModel):
|
|
160
251
|
"""POST /v1/models/{model_id}/scenario -- Counterfactual search with constraints."""
|
|
@@ -168,12 +259,27 @@ class ScenarioRequest(BaseModel):
|
|
|
168
259
|
"schema in name and order."
|
|
169
260
|
)
|
|
170
261
|
desired_class: int = 1
|
|
171
|
-
n_walks: int = 500
|
|
172
|
-
max_steps: int = 30
|
|
173
|
-
epsilon: float = 0.2
|
|
262
|
+
n_walks: int = Field(500, ge=1)
|
|
263
|
+
max_steps: int = Field(30, ge=1)
|
|
264
|
+
epsilon: float = Field(0.2, gt=0.0, le=1.0)
|
|
174
265
|
random_state: int | None = 42
|
|
175
266
|
constraints: dict[str, FeatureConstraintSchema] = Field(default_factory=dict)
|
|
176
267
|
|
|
268
|
+
@model_validator(mode="after")
|
|
269
|
+
def _check_request_budget(self):
|
|
270
|
+
if len(self.queries) > MAX_SCENARIO_QUERIES:
|
|
271
|
+
raise ValueError(
|
|
272
|
+
f"queries has {len(self.queries)} entries; the per-request cap is "
|
|
273
|
+
f"{MAX_SCENARIO_QUERIES}."
|
|
274
|
+
)
|
|
275
|
+
budget = self.n_walks * self.max_steps
|
|
276
|
+
if budget > MAX_SCENARIO_WALK_BUDGET:
|
|
277
|
+
raise ValueError(
|
|
278
|
+
f"n_walks × max_steps = {budget} exceeds the per-query cap of "
|
|
279
|
+
f"{MAX_SCENARIO_WALK_BUDGET}. Reduce n_walks or max_steps."
|
|
280
|
+
)
|
|
281
|
+
return self
|
|
282
|
+
|
|
177
283
|
|
|
178
284
|
class ScenarioChange(BaseModel):
|
|
179
285
|
"""Single-feature diff between a query and one counterfactual row."""
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""Request/response schemas for the /v1/models/{model_id}/patterns/* endpoints.
|
|
2
|
+
|
|
3
|
+
PatternTracker aggregates per-sample local-rule explanations into a small set
|
|
4
|
+
of executable, labeled filter patterns. Fit runs server-side; the SDK
|
|
5
|
+
client holds an ``id`` and calls transform/distribution/partition by
|
|
6
|
+
``tracker_id``.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Any, Literal
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, Field
|
|
12
|
+
|
|
13
|
+
from .common import JobStatus
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PredicateSchema(BaseModel):
|
|
17
|
+
"""One literal of a conjunctive filter: ``feature <op> value``.
|
|
18
|
+
|
|
19
|
+
Mirrors ``outerproduct_reasoning.internal.local_rules.Predicate``: ``op``
|
|
20
|
+
is ``<=``/``>=`` for continuous features (numeric ``value``) or ``==`` for
|
|
21
|
+
categoricals (string/bool/int ``value``).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
feature: str
|
|
25
|
+
op: Literal["<=", ">=", "=="]
|
|
26
|
+
value: float | int | str | bool
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class FilterPatternSchema(BaseModel):
|
|
30
|
+
"""One executable conjunctive filter with coverage stats and a label."""
|
|
31
|
+
|
|
32
|
+
predicates: list[PredicateSchema]
|
|
33
|
+
label: str
|
|
34
|
+
support_rejects: float = Field(
|
|
35
|
+
description="Share of fit-time rejected rows the pattern matches."
|
|
36
|
+
)
|
|
37
|
+
precision: float = Field(
|
|
38
|
+
description="Share of all matched rows that are rejects."
|
|
39
|
+
)
|
|
40
|
+
lift: float = Field(description="precision / base_reject_rate.")
|
|
41
|
+
n_local_rules_covered: int
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class SchemaInfoSchema(BaseModel):
|
|
45
|
+
"""Column / dtype / categorical-level snapshot captured at fit time."""
|
|
46
|
+
|
|
47
|
+
columns: list[str]
|
|
48
|
+
dtypes: dict[str, str]
|
|
49
|
+
# frozensets don't survive JSON; the server emits sorted lists, the SDK
|
|
50
|
+
# rehydrates into frozensets on its side.
|
|
51
|
+
categorical_levels: dict[str, list[Any]]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# --- POST /v1/models/{model_id}/patterns/fit ---
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class PatternTrackerFitRequest(BaseModel):
|
|
58
|
+
"""POST /v1/models/{model_id}/patterns/fit -- Submit a pattern-tracker fit job.
|
|
59
|
+
|
|
60
|
+
Carries the fit hyperparameters plus a dataset reference that mirrors
|
|
61
|
+
:class:`ReasoningFitRequest`. The dataset is resolved on the API server
|
|
62
|
+
(inline parquet upload / pre-uploaded passthrough / connector validation)
|
|
63
|
+
before the heavy compute is dispatched to a Modal CPU function; no inline
|
|
64
|
+
data is held in memory on the API tier.
|
|
65
|
+
|
|
66
|
+
Exactly one of the three dataset modes must be set:
|
|
67
|
+
|
|
68
|
+
- **Connector-backed** (``data_connector=True``): ``connector_id`` +
|
|
69
|
+
``table_name`` reference a registered connector. The Modal worker
|
|
70
|
+
materializes the table at compute time.
|
|
71
|
+
- **Pre-uploaded** (``data_uploaded=True``): ``data_model_id`` is the
|
|
72
|
+
upload-scope pointer returned by ``POST /v1/uploads`` (independent
|
|
73
|
+
of the route's owning ``model_id``).
|
|
74
|
+
- **Inline** (default): ``data`` and ``feature_names`` carry a small
|
|
75
|
+
tabular payload directly in the request body.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
target_range: tuple[float | None, float | None] = Field(
|
|
79
|
+
description=(
|
|
80
|
+
"(lo, hi) bounds defining the 'rejected' band of predictions. "
|
|
81
|
+
"Either side may be null for an open bound; at least one must be "
|
|
82
|
+
"set."
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
mode: Literal["cover", "discovery"] = "discovery"
|
|
86
|
+
max_patterns: int = 25
|
|
87
|
+
coverage_target: float = 0.95
|
|
88
|
+
min_pattern_support: float = 0.005
|
|
89
|
+
min_precision: float = 0.5
|
|
90
|
+
max_pattern_size: int = 3
|
|
91
|
+
threshold_n_bins: int = 10
|
|
92
|
+
ensure_coverage: bool = True
|
|
93
|
+
min_wracc: float = 0.0
|
|
94
|
+
diversity_threshold: float = 0.5
|
|
95
|
+
drop_redundant: bool = True
|
|
96
|
+
child_overlap_threshold: float = 0.9
|
|
97
|
+
explained_lift_threshold: float = 1.1
|
|
98
|
+
rule_kwargs: dict[str, Any] | None = None
|
|
99
|
+
|
|
100
|
+
# --- Dataset routing (mirrors ReasoningFitRequest) ----------------------
|
|
101
|
+
data_connector: bool = Field(
|
|
102
|
+
default=False,
|
|
103
|
+
description="If True, fit over a connector-backed table.",
|
|
104
|
+
)
|
|
105
|
+
connector_id: str | None = None
|
|
106
|
+
table_name: str | None = None
|
|
107
|
+
data_uploaded: bool = Field(
|
|
108
|
+
default=False,
|
|
109
|
+
description=(
|
|
110
|
+
"If True, fit over a dataset uploaded via POST /v1/uploads. "
|
|
111
|
+
"``data_model_id`` is the upload-scope pointer."
|
|
112
|
+
),
|
|
113
|
+
)
|
|
114
|
+
data_model_id: str | None = Field(
|
|
115
|
+
default=None,
|
|
116
|
+
description=(
|
|
117
|
+
"Upload-scope model_id pointer for pre-uploaded datasets; "
|
|
118
|
+
"named distinctly to avoid colliding with the route's "
|
|
119
|
+
"owning ``model_id``."
|
|
120
|
+
),
|
|
121
|
+
)
|
|
122
|
+
data: list[list[float | str | bool | None]] | None = Field(
|
|
123
|
+
default=None,
|
|
124
|
+
description=(
|
|
125
|
+
"Inline 2D feature matrix for small datasets. The API "
|
|
126
|
+
"server writes this to S3 as parquet before dispatching the "
|
|
127
|
+
"Modal compute."
|
|
128
|
+
),
|
|
129
|
+
)
|
|
130
|
+
feature_names: list[str] | None = Field(
|
|
131
|
+
default=None,
|
|
132
|
+
description="Column names aligned to ``data``. Required when ``data`` is set.",
|
|
133
|
+
)
|
|
134
|
+
label_column: str | None = Field(
|
|
135
|
+
default=None,
|
|
136
|
+
description=(
|
|
137
|
+
"Optional label column name to drop from the dataset before "
|
|
138
|
+
"fitting. Only meaningful for pre-uploaded and connector-backed "
|
|
139
|
+
"datasets."
|
|
140
|
+
),
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class PatternTrackerFitResponse(BaseModel):
|
|
145
|
+
"""Returned immediately by POST /patterns/fit; the actual artifact is
|
|
146
|
+
retrieved via GET /patterns/{tracker_id} once the job completes."""
|
|
147
|
+
|
|
148
|
+
model_id: str
|
|
149
|
+
tracker_id: str
|
|
150
|
+
status: JobStatus
|
|
151
|
+
message: str | None = None
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
# --- GET /v1/models/{model_id}/patterns/{tracker_id} ---
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class PatternTrackerResponse(BaseModel):
|
|
158
|
+
"""The fitted pattern tracker. Optional fields are None while the job is
|
|
159
|
+
pending/running/failed; populated once the job completes successfully.
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
model_id: str
|
|
163
|
+
tracker_id: str
|
|
164
|
+
status: JobStatus
|
|
165
|
+
patterns: list[FilterPatternSchema] | None = None
|
|
166
|
+
schema_info: SchemaInfoSchema | None = None
|
|
167
|
+
target_range: tuple[float | None, float | None] | None = None
|
|
168
|
+
n_rejected_fit: int | None = None
|
|
169
|
+
coverage_fit: float | None = None
|
|
170
|
+
error_message: str | None = None
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# --- POST /v1/models/{model_id}/patterns/{tracker_id}/transform ---
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class PatternTrackerApplyRequest(BaseModel):
|
|
177
|
+
"""Body shared by /transform, /distribution, and /partition."""
|
|
178
|
+
|
|
179
|
+
samples: list[list[float | str | bool | None]] = Field(
|
|
180
|
+
description=(
|
|
181
|
+
"2D array, shape (n_samples, n_features). Cells may be numeric, "
|
|
182
|
+
"string (categorical), or bool."
|
|
183
|
+
)
|
|
184
|
+
)
|
|
185
|
+
feature_names: list[str] = Field(
|
|
186
|
+
description=(
|
|
187
|
+
"Column names. Must include every column the tracker's frozen "
|
|
188
|
+
"schema requires (extras are ignored)."
|
|
189
|
+
)
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
class PatternTrackerTransformResponse(BaseModel):
|
|
194
|
+
"""Boolean match matrix aligned to ``labels``."""
|
|
195
|
+
|
|
196
|
+
labels: list[str]
|
|
197
|
+
matrix: list[list[bool]] = Field(
|
|
198
|
+
description="Shape (n_samples, n_patterns); cell `[i, j]` true iff "
|
|
199
|
+
"sample i matches pattern labels[j]."
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class PatternTrackerDistributionResponse(BaseModel):
|
|
204
|
+
"""Per-pattern match rate over the supplied samples."""
|
|
205
|
+
|
|
206
|
+
match_rate: dict[str, float]
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class PatternTrackerPartitionResponse(BaseModel):
|
|
210
|
+
"""Matching row indices (positional, into the request's `samples`) keyed
|
|
211
|
+
by pattern label."""
|
|
212
|
+
|
|
213
|
+
indices: dict[str, list[int]]
|
|
@@ -14,21 +14,35 @@ from pydantic import BaseModel, Field, model_validator
|
|
|
14
14
|
from .common import JobResponse
|
|
15
15
|
from .trainer import HardwareSpec
|
|
16
16
|
|
|
17
|
+
# --- Compute caps --------------------------------------------------------- #
|
|
18
|
+
|
|
19
|
+
MAX_REASONING_HYPEROPT_STEPS = 200
|
|
20
|
+
"""Hard cap on `n_hyperopt_steps`.
|
|
21
|
+
|
|
22
|
+
reasoning.fit pins the surrogate to a single model type, so the budget
|
|
23
|
+
is just n_hyperopt_steps. Each step is a Modal trial."""
|
|
24
|
+
|
|
17
25
|
|
|
18
26
|
class ReasoningFitRequest(BaseModel):
|
|
19
27
|
"""POST /v1/reasoning/fit -- Fit a ReasoningModel.
|
|
20
28
|
|
|
21
|
-
Dataset delivery
|
|
22
|
-
* Inline: set `data` and `labels`.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
29
|
+
Dataset delivery has exactly one of three modes:
|
|
30
|
+
* Inline: set `data` and `labels`. The server creates a transient
|
|
31
|
+
dataset row and uploads the data to the dataset's canonical S3
|
|
32
|
+
location before training begins.
|
|
33
|
+
* Pre-uploaded: set `dataset_id` to the id returned by
|
|
34
|
+
``POST /v1/uploads``, plus `label_column`.
|
|
35
|
+
* Connector: set `data_connector=True`, `connector_id`, and
|
|
36
|
+
`table_name`.
|
|
37
|
+
|
|
38
|
+
When `teacher_predict_url` is set, labels become evaluation-only (the
|
|
39
|
+
teacher provides the training target).
|
|
26
40
|
"""
|
|
27
41
|
|
|
28
42
|
# --- dataset
|
|
29
43
|
data: list[list[float | str | bool | None]] | None = Field(
|
|
30
44
|
None,
|
|
31
|
-
description="2D feature matrix (n_samples, n_features). Omit when
|
|
45
|
+
description="2D feature matrix (n_samples, n_features). Omit when dataset_id is set.",
|
|
32
46
|
)
|
|
33
47
|
labels: list[float] | None = Field(
|
|
34
48
|
None,
|
|
@@ -44,24 +58,22 @@ class ReasoningFitRequest(BaseModel):
|
|
|
44
58
|
None,
|
|
45
59
|
description="Optional per-column schema for inline data.",
|
|
46
60
|
)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
description="
|
|
50
|
-
"
|
|
61
|
+
dataset_id: str | None = Field(
|
|
62
|
+
None,
|
|
63
|
+
description="Identifier of a previously uploaded dataset (from "
|
|
64
|
+
"``POST /v1/uploads``). Mutually exclusive with inline data and "
|
|
65
|
+
"data_connector.",
|
|
51
66
|
)
|
|
52
67
|
label_column: str | None = Field(
|
|
53
68
|
None,
|
|
54
|
-
description="Target column name in the
|
|
55
|
-
)
|
|
56
|
-
model_id: str | None = Field(
|
|
57
|
-
None, description="Custom model ID; required when data_uploaded=True."
|
|
69
|
+
description="Target column name in the dataset.",
|
|
58
70
|
)
|
|
59
71
|
|
|
60
72
|
# --- connector-based data source
|
|
61
73
|
data_connector: bool = Field(
|
|
62
74
|
False,
|
|
63
75
|
description="If true, read the dataset from a registered connector. "
|
|
64
|
-
"Mutually exclusive with inline data and
|
|
76
|
+
"Mutually exclusive with inline data and dataset_id.",
|
|
65
77
|
)
|
|
66
78
|
connector_id: str | None = Field(
|
|
67
79
|
None,
|
|
@@ -78,7 +90,7 @@ class ReasoningFitRequest(BaseModel):
|
|
|
78
90
|
description="Candidate model-family identifiers. reasoning.fit pins the "
|
|
79
91
|
"surrogate via force_model_type, so at most one entry is accepted.",
|
|
80
92
|
)
|
|
81
|
-
n_hyperopt_steps: int = 5
|
|
93
|
+
n_hyperopt_steps: int = Field(5, ge=1, le=MAX_REASONING_HYPEROPT_STEPS)
|
|
82
94
|
device: str | None = Field(None, description="'auto' | 'cuda' | 'cpu'.")
|
|
83
95
|
random_state: int = 42
|
|
84
96
|
task_type: Literal["regression", "binclass", "multiclass"] | None = Field(
|
|
@@ -113,11 +125,13 @@ class ReasoningFitRequest(BaseModel):
|
|
|
113
125
|
|
|
114
126
|
@model_validator(mode="after")
|
|
115
127
|
def _check_dataset_source(self):
|
|
116
|
-
# Exactly one of three modes: inline,
|
|
117
|
-
if self.data_connector and self.
|
|
118
|
-
raise ValueError("data_connector and
|
|
128
|
+
# Exactly one of three modes: inline, dataset_id, or connector.
|
|
129
|
+
if self.data_connector and self.dataset_id is not None:
|
|
130
|
+
raise ValueError("data_connector and dataset_id are mutually exclusive")
|
|
119
131
|
if self.data_connector and self.data is not None:
|
|
120
132
|
raise ValueError("data_connector and inline data are mutually exclusive")
|
|
133
|
+
if self.dataset_id is not None and self.data is not None:
|
|
134
|
+
raise ValueError("dataset_id and inline data are mutually exclusive")
|
|
121
135
|
|
|
122
136
|
if self.data_connector:
|
|
123
137
|
if not self.connector_id:
|
|
@@ -129,21 +143,20 @@ class ReasoningFitRequest(BaseModel):
|
|
|
129
143
|
"label_column is required when data_connector=True (unless "
|
|
130
144
|
"teacher_predict_url is set)"
|
|
131
145
|
)
|
|
132
|
-
elif self.
|
|
133
|
-
if not self.model_id:
|
|
134
|
-
raise ValueError("model_id is required when data_uploaded=True")
|
|
146
|
+
elif self.dataset_id is not None:
|
|
135
147
|
if not self.label_column and not self.teacher_predict_url:
|
|
136
148
|
raise ValueError(
|
|
137
|
-
"label_column is required when
|
|
149
|
+
"label_column is required when dataset_id is set (unless "
|
|
138
150
|
"teacher_predict_url is set)"
|
|
139
151
|
)
|
|
140
152
|
else:
|
|
141
153
|
if self.data is None:
|
|
142
|
-
raise ValueError(
|
|
154
|
+
raise ValueError(
|
|
155
|
+
"data is required for inline mode (or supply dataset_id / data_connector=True)"
|
|
156
|
+
)
|
|
143
157
|
if self.labels is None and self.teacher_predict_url is None:
|
|
144
158
|
raise ValueError(
|
|
145
|
-
"labels is required
|
|
146
|
-
"teacher_predict_url is set)"
|
|
159
|
+
"labels is required for inline mode (unless teacher_predict_url is set)"
|
|
147
160
|
)
|
|
148
161
|
return self
|
|
149
162
|
|
|
@@ -12,6 +12,14 @@ from pydantic import BaseModel, Field, model_validator
|
|
|
12
12
|
|
|
13
13
|
from .common import JobResponse
|
|
14
14
|
|
|
15
|
+
# --- Compute caps --------------------------------------------------------- #
|
|
16
|
+
|
|
17
|
+
MAX_HYPEROPT_BUDGET = 200
|
|
18
|
+
"""Hard cap on `n_trials × max(1, len(model_types))`.
|
|
19
|
+
|
|
20
|
+
Bounds total HPO trials per training request. Each trial spawns a Modal
|
|
21
|
+
container, so this is the primary cost gate."""
|
|
22
|
+
|
|
15
23
|
|
|
16
24
|
class ModalHardwareSpec(BaseModel):
|
|
17
25
|
"""Fan trials out to additional Modal containers.
|
|
@@ -36,20 +44,27 @@ HardwareSpec = ModalHardwareSpec
|
|
|
36
44
|
class TrainerRunRequest(BaseModel):
|
|
37
45
|
"""POST /v1/trainer/run -- Configure a Trainer and run HPO across a model matrix.
|
|
38
46
|
|
|
39
|
-
Dataset delivery
|
|
40
|
-
* Inline: set `data` and `labels`.
|
|
41
|
-
|
|
42
|
-
|
|
47
|
+
Dataset delivery has exactly one of three modes:
|
|
48
|
+
* Inline: set `data` and `labels`. The server creates a transient
|
|
49
|
+
dataset row and uploads the data to the dataset's canonical S3
|
|
50
|
+
location before training begins.
|
|
51
|
+
* Pre-uploaded: set `dataset_id` to the id returned by
|
|
52
|
+
``POST /v1/uploads``, plus `label_column` (the column in the uploaded
|
|
53
|
+
table that holds the target).
|
|
54
|
+
* Connector: set `data_connector=True`, `connector_id`, and
|
|
55
|
+
`table_name`.
|
|
56
|
+
|
|
57
|
+
The produced model id is returned in the response.
|
|
43
58
|
"""
|
|
44
59
|
|
|
45
60
|
# --- dataset
|
|
46
61
|
data: list[list[float | str | bool | None]] | None = Field(
|
|
47
62
|
None,
|
|
48
|
-
description="2D feature matrix (n_samples, n_features). Omit when
|
|
63
|
+
description="2D feature matrix (n_samples, n_features). Omit when dataset_id is set.",
|
|
49
64
|
)
|
|
50
65
|
labels: list[float] | None = Field(
|
|
51
66
|
None,
|
|
52
|
-
description="Target values, length n_samples. Omit when
|
|
67
|
+
description="Target values, length n_samples. Omit when dataset_id is set.",
|
|
53
68
|
)
|
|
54
69
|
feature_names: list[str] | None = Field(
|
|
55
70
|
None,
|
|
@@ -61,24 +76,23 @@ class TrainerRunRequest(BaseModel):
|
|
|
61
76
|
description="Optional per-column schema for inline data: "
|
|
62
77
|
"{name: {dtype: 'float' | 'int' | 'bool' | 'categorical'}}.",
|
|
63
78
|
)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
description="
|
|
67
|
-
"
|
|
79
|
+
dataset_id: str | None = Field(
|
|
80
|
+
None,
|
|
81
|
+
description="Identifier of a previously uploaded dataset (from "
|
|
82
|
+
"``POST /v1/uploads``). Mutually exclusive with inline data and "
|
|
83
|
+
"data_connector.",
|
|
68
84
|
)
|
|
69
85
|
label_column: str | None = Field(
|
|
70
86
|
None,
|
|
71
|
-
description="Target column name in the
|
|
72
|
-
|
|
73
|
-
model_id: str | None = Field(
|
|
74
|
-
None, description="Custom model ID; required when data_uploaded=True."
|
|
87
|
+
description="Target column name in the dataset. Required when dataset_id "
|
|
88
|
+
"is set (unless teacher_predict_url is also set).",
|
|
75
89
|
)
|
|
76
90
|
|
|
77
91
|
# --- connector-based data source
|
|
78
92
|
data_connector: bool = Field(
|
|
79
93
|
False,
|
|
80
94
|
description="If true, read the dataset from a registered connector. "
|
|
81
|
-
"Mutually exclusive with inline data and
|
|
95
|
+
"Mutually exclusive with inline data and dataset_id.",
|
|
82
96
|
)
|
|
83
97
|
connector_id: str | None = Field(
|
|
84
98
|
None,
|
|
@@ -106,7 +120,7 @@ class TrainerRunRequest(BaseModel):
|
|
|
106
120
|
"random",
|
|
107
121
|
description="HPO strategy: 'random' or 'optuna'. Resolved server-side.",
|
|
108
122
|
)
|
|
109
|
-
n_trials: int = Field(4, description="Number of HPO trials per matrix row.")
|
|
123
|
+
n_trials: int = Field(4, ge=1, description="Number of HPO trials per matrix row.")
|
|
110
124
|
n_splits: int | None = Field(
|
|
111
125
|
None,
|
|
112
126
|
description="K-fold cross-validation folds. None means a single holdout split.",
|
|
@@ -149,11 +163,13 @@ class TrainerRunRequest(BaseModel):
|
|
|
149
163
|
|
|
150
164
|
@model_validator(mode="after")
|
|
151
165
|
def _check_dataset_source(self):
|
|
152
|
-
# Exactly one of three modes: inline,
|
|
153
|
-
if self.data_connector and self.
|
|
154
|
-
raise ValueError("data_connector and
|
|
166
|
+
# Exactly one of three modes: inline, dataset_id, or connector.
|
|
167
|
+
if self.data_connector and self.dataset_id is not None:
|
|
168
|
+
raise ValueError("data_connector and dataset_id are mutually exclusive")
|
|
155
169
|
if self.data_connector and self.data is not None:
|
|
156
170
|
raise ValueError("data_connector and inline data are mutually exclusive")
|
|
171
|
+
if self.dataset_id is not None and self.data is not None:
|
|
172
|
+
raise ValueError("dataset_id and inline data are mutually exclusive")
|
|
157
173
|
|
|
158
174
|
if self.data_connector:
|
|
159
175
|
if not self.connector_id:
|
|
@@ -165,22 +181,21 @@ class TrainerRunRequest(BaseModel):
|
|
|
165
181
|
"label_column is required when data_connector=True (unless "
|
|
166
182
|
"teacher_predict_url is set)"
|
|
167
183
|
)
|
|
168
|
-
elif self.
|
|
169
|
-
if not self.model_id:
|
|
170
|
-
raise ValueError("model_id is required when data_uploaded=True")
|
|
184
|
+
elif self.dataset_id is not None:
|
|
171
185
|
if not self.label_column and not self.teacher_predict_url:
|
|
172
186
|
raise ValueError(
|
|
173
|
-
"label_column is required when
|
|
187
|
+
"label_column is required when dataset_id is set (unless "
|
|
174
188
|
"teacher_predict_url is set, in which case the teacher provides "
|
|
175
189
|
"the training target)"
|
|
176
190
|
)
|
|
177
191
|
else:
|
|
178
192
|
if self.data is None:
|
|
179
|
-
raise ValueError(
|
|
193
|
+
raise ValueError(
|
|
194
|
+
"data is required for inline mode (or supply dataset_id / data_connector=True)"
|
|
195
|
+
)
|
|
180
196
|
if self.labels is None and self.teacher_predict_url is None:
|
|
181
197
|
raise ValueError(
|
|
182
|
-
"labels is required
|
|
183
|
-
"teacher_predict_url is set)"
|
|
198
|
+
"labels is required for inline mode (unless teacher_predict_url is set)"
|
|
184
199
|
)
|
|
185
200
|
return self
|
|
186
201
|
|
|
@@ -192,6 +207,22 @@ class TrainerRunRequest(BaseModel):
|
|
|
192
207
|
raise ValueError("feature_names is required when data contains non-numeric values")
|
|
193
208
|
return self
|
|
194
209
|
|
|
210
|
+
@model_validator(mode="after")
|
|
211
|
+
def _check_hyperopt_budget(self):
|
|
212
|
+
# When model_types is None the server picks a default set, so the
|
|
213
|
+
# client-visible budget is just n_trials. We cap that as a lower
|
|
214
|
+
# bound on the real budget; the server should re-check after
|
|
215
|
+
# resolving the default model list.
|
|
216
|
+
n_models = max(1, len(self.model_types)) if self.model_types else 1
|
|
217
|
+
budget = self.n_trials * n_models
|
|
218
|
+
if budget > MAX_HYPEROPT_BUDGET:
|
|
219
|
+
raise ValueError(
|
|
220
|
+
f"n_trials x len(model_types) = {budget} exceeds the per-request "
|
|
221
|
+
f"cap of {MAX_HYPEROPT_BUDGET}. Reduce n_trials or shrink "
|
|
222
|
+
"model_types."
|
|
223
|
+
)
|
|
224
|
+
return self
|
|
225
|
+
|
|
195
226
|
|
|
196
227
|
class TrainerRunResponse(JobResponse):
|
|
197
228
|
"""POST /v1/trainer/run -- async trainer job submission response."""
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Request/response schemas for the presigned-upload endpoint."""
|
|
2
|
+
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
# --- Upload size policy --------------------------------------------------- #
|
|
8
|
+
# Tunable in review. These constants are the single source of truth for the
|
|
9
|
+
# SDK's client-side guards. Server-side enforcement of MAX_UPLOAD_BYTES will
|
|
10
|
+
# be wired when the upload flow switches from presigned PUT to presigned POST
|
|
11
|
+
# (where S3's `content-length-range` condition becomes available).
|
|
12
|
+
|
|
13
|
+
MAX_UPLOAD_BYTES = 10_000_000_000
|
|
14
|
+
"""Hard cap on uploaded file size, all formats.
|
|
15
|
+
|
|
16
|
+
Picked to comfortably hold 1M x 1k Parquet workloads with headroom for
|
|
17
|
+
~10x growth. Enforced today by the SDK before upload starts. When the
|
|
18
|
+
presigned POST switch lands, S3 will enforce server-side via
|
|
19
|
+
`content-length-range`."""
|
|
20
|
+
|
|
21
|
+
MAX_CSV_UPLOAD_BYTES = 3_000_000_000
|
|
22
|
+
"""Hard cap on CSV uploads specifically.
|
|
23
|
+
|
|
24
|
+
CSV is ~10-20x larger than the same data as Parquet, so a CSV near
|
|
25
|
+
MAX_UPLOAD_BYTES would almost always be a misformatted workload. Reject
|
|
26
|
+
early with a clear message rather than burn upload bandwidth on a file
|
|
27
|
+
that should have been Parquet."""
|
|
28
|
+
|
|
29
|
+
CSV_UPLOAD_WARN_BYTES = 500_000_000
|
|
30
|
+
"""SDK emits a UserWarning above this CSV size, suggesting Parquet.
|
|
31
|
+
|
|
32
|
+
Soft signal — at this scale Parquet would be ~50x100 MB for the same
|
|
33
|
+
data, and subsequent training reads are also faster."""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class CreateUploadRequest(BaseModel):
|
|
37
|
+
"""POST /v1/uploads -- request a presigned URL for direct-to-S3 upload."""
|
|
38
|
+
|
|
39
|
+
file_format: Literal["pkl", "csv", "parquet"] = Field(
|
|
40
|
+
...,
|
|
41
|
+
description=(
|
|
42
|
+
"Format of the dataset you will PUT to the returned URL. "
|
|
43
|
+
"'pkl' = a pickled pandas DataFrame, 'csv' = RFC4180 CSV with a "
|
|
44
|
+
"header row, 'parquet' = Apache Parquet. The label column must be "
|
|
45
|
+
"present in the uploaded table and its name is supplied on the "
|
|
46
|
+
"subsequent /v1/trainer/run or /v1/reasoning/fit call as "
|
|
47
|
+
"`label_column`."
|
|
48
|
+
),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class CreateUploadResponse(BaseModel):
|
|
53
|
+
dataset_id: str
|
|
54
|
+
upload_url: str
|
|
55
|
+
upload_key: str
|
|
56
|
+
file_format: Literal["pkl", "csv", "parquet"]
|
|
57
|
+
content_type: str
|
|
58
|
+
expires_in: int
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
"""Request/response schemas for segmentation endpoints."""
|
|
2
|
-
|
|
3
|
-
from typing import Any
|
|
4
|
-
|
|
5
|
-
from pydantic import BaseModel, ConfigDict, Field
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class ClusterPersonaSchema(BaseModel):
|
|
9
|
-
"""One cluster's persona description as exposed over HTTP.
|
|
10
|
-
|
|
11
|
-
Mirrors the shape written into ``segments.json`` by the segment Lambda;
|
|
12
|
-
the API repo aliases that S3 wire type onto this HTTP type at the
|
|
13
|
-
response boundary.
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
model_config = ConfigDict(from_attributes=True)
|
|
17
|
-
|
|
18
|
-
cluster_id: int
|
|
19
|
-
persona_name: str
|
|
20
|
-
persona_description: str
|
|
21
|
-
stats: dict[str, Any]
|
|
22
|
-
differentiating_features: list[dict[str, Any]] | None = None
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class SegmentRequest(BaseModel):
|
|
26
|
-
"""POST /v1/models/{model_id}/segment -- Supervised segmentation (async)."""
|
|
27
|
-
|
|
28
|
-
data: list[list[float | str | bool | None]] | None = Field(
|
|
29
|
-
None,
|
|
30
|
-
description="Dataset to segment; uses training data if omitted. "
|
|
31
|
-
"Cells may be numeric, string (categorical), or bool.",
|
|
32
|
-
)
|
|
33
|
-
target_values: list[float] | None = None
|
|
34
|
-
feature_names: list[str] | None = None
|
|
35
|
-
min_clusters: int = 4
|
|
36
|
-
max_clusters: int | None = 10
|
|
37
|
-
n_search_steps: int = 50
|
|
38
|
-
use_agent: bool | None = None
|
|
39
|
-
kpi_field: str | None = None
|
|
40
|
-
problem_context: str | None = None
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
class SegmentResultResponse(BaseModel):
|
|
44
|
-
"""GET /v1/models/{model_id}/segments -- Retrieve segmentation results.
|
|
45
|
-
|
|
46
|
-
Result fields are Optional because pending/running/failed jobs return
|
|
47
|
-
only model_id + status; populated only once the Lambda has uploaded
|
|
48
|
-
segments.json and the SegmentsResult is available.
|
|
49
|
-
"""
|
|
50
|
-
|
|
51
|
-
model_id: str
|
|
52
|
-
status: str
|
|
53
|
-
n_clusters: int | None = None
|
|
54
|
-
cluster_ids: list[int] | None = None
|
|
55
|
-
resolution: float | None = None
|
|
56
|
-
quality: float | None = None
|
|
57
|
-
personas: list[ClusterPersonaSchema] | None = None
|
|
58
|
-
agent_score: float | None = None
|
|
59
|
-
agent_reasoning: str | None = None
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
"""Request/response schemas for the presigned-upload endpoint."""
|
|
2
|
-
|
|
3
|
-
from typing import Literal
|
|
4
|
-
|
|
5
|
-
from pydantic import BaseModel, Field
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class CreateUploadRequest(BaseModel):
|
|
9
|
-
"""POST /v1/uploads -- request a presigned URL for direct-to-S3 upload."""
|
|
10
|
-
|
|
11
|
-
model_id: str | None = Field(
|
|
12
|
-
None,
|
|
13
|
-
description="Custom model ID; auto-generated if omitted.",
|
|
14
|
-
)
|
|
15
|
-
file_format: Literal["pkl", "csv", "parquet"] = Field(
|
|
16
|
-
...,
|
|
17
|
-
description=(
|
|
18
|
-
"Format of the dataset you will PUT to the returned URL. "
|
|
19
|
-
"'pkl' = a pickled pandas DataFrame, 'csv' = RFC4180 CSV with a "
|
|
20
|
-
"header row, 'parquet' = Apache Parquet. The label column must be "
|
|
21
|
-
"present in the uploaded table and its name is supplied on the "
|
|
22
|
-
"subsequent /v1/trainer/run or /v1/reasoning/fit call as "
|
|
23
|
-
"`label_column`."
|
|
24
|
-
),
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class CreateUploadResponse(BaseModel):
|
|
29
|
-
model_id: str
|
|
30
|
-
upload_url: str
|
|
31
|
-
upload_key: str
|
|
32
|
-
file_format: Literal["pkl", "csv", "parquet"]
|
|
33
|
-
content_type: str
|
|
34
|
-
expires_in: int
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{outerproduct_http_types-0.2.0 → outerproduct_http_types-0.3.0}/src/outerproduct_http_types/py.typed
RENAMED
|
File without changes
|
|
File without changes
|