mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/api/api/deps.py +14 -1
- mlrun/api/api/endpoints/frontend_spec.py +0 -2
- mlrun/api/api/endpoints/functions.py +15 -27
- mlrun/api/api/endpoints/grafana_proxy.py +435 -74
- mlrun/api/api/endpoints/healthz.py +5 -18
- mlrun/api/api/endpoints/model_endpoints.py +33 -37
- mlrun/api/api/utils.py +6 -13
- mlrun/api/crud/__init__.py +14 -16
- mlrun/api/crud/logs.py +5 -7
- mlrun/api/crud/model_monitoring/__init__.py +2 -2
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
- mlrun/api/crud/pipelines.py +2 -3
- mlrun/api/db/sqldb/models/models_mysql.py +52 -19
- mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
- mlrun/api/db/sqldb/session.py +19 -26
- mlrun/api/schemas/__init__.py +2 -0
- mlrun/api/schemas/constants.py +0 -13
- mlrun/api/schemas/frontend_spec.py +0 -1
- mlrun/api/schemas/model_endpoints.py +38 -195
- mlrun/api/schemas/schedule.py +2 -2
- mlrun/api/utils/clients/log_collector.py +5 -0
- mlrun/builder.py +9 -41
- mlrun/config.py +1 -76
- mlrun/data_types/__init__.py +1 -6
- mlrun/data_types/data_types.py +1 -3
- mlrun/datastore/__init__.py +2 -9
- mlrun/datastore/sources.py +20 -25
- mlrun/datastore/store_resources.py +1 -1
- mlrun/datastore/targets.py +34 -67
- mlrun/datastore/utils.py +4 -26
- mlrun/db/base.py +2 -4
- mlrun/db/filedb.py +5 -13
- mlrun/db/httpdb.py +32 -64
- mlrun/db/sqldb.py +2 -4
- mlrun/errors.py +0 -5
- mlrun/execution.py +0 -2
- mlrun/feature_store/api.py +8 -24
- mlrun/feature_store/feature_set.py +6 -28
- mlrun/feature_store/feature_vector.py +0 -2
- mlrun/feature_store/ingestion.py +11 -8
- mlrun/feature_store/retrieval/base.py +43 -271
- mlrun/feature_store/retrieval/dask_merger.py +153 -55
- mlrun/feature_store/retrieval/job.py +3 -12
- mlrun/feature_store/retrieval/local_merger.py +130 -48
- mlrun/feature_store/retrieval/spark_merger.py +125 -126
- mlrun/features.py +2 -7
- mlrun/model_monitoring/constants.py +6 -48
- mlrun/model_monitoring/helpers.py +35 -118
- mlrun/model_monitoring/model_monitoring_batch.py +260 -293
- mlrun/model_monitoring/stream_processing_fs.py +253 -220
- mlrun/platforms/iguazio.py +0 -33
- mlrun/projects/project.py +72 -34
- mlrun/runtimes/base.py +0 -5
- mlrun/runtimes/daskjob.py +0 -2
- mlrun/runtimes/function.py +3 -29
- mlrun/runtimes/kubejob.py +15 -39
- mlrun/runtimes/local.py +45 -7
- mlrun/runtimes/mpijob/abstract.py +0 -2
- mlrun/runtimes/mpijob/v1.py +0 -2
- mlrun/runtimes/pod.py +0 -2
- mlrun/runtimes/remotesparkjob.py +0 -2
- mlrun/runtimes/serving.py +0 -6
- mlrun/runtimes/sparkjob/abstract.py +2 -39
- mlrun/runtimes/sparkjob/spark3job.py +0 -2
- mlrun/serving/__init__.py +1 -2
- mlrun/serving/routers.py +35 -35
- mlrun/serving/server.py +12 -22
- mlrun/serving/states.py +30 -162
- mlrun/serving/v2_serving.py +10 -13
- mlrun/utils/clones.py +1 -1
- mlrun/utils/model_monitoring.py +96 -122
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
- mlrun/api/crud/model_monitoring/grafana.py +0 -427
- mlrun/datastore/spark_udf.py +0 -40
- mlrun/model_monitoring/__init__.py +0 -44
- mlrun/model_monitoring/common.py +0 -112
- mlrun/model_monitoring/model_endpoint.py +0 -141
- mlrun/model_monitoring/stores/__init__.py +0 -106
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -23
- mlrun/model_monitoring/stores/models/base.py +0 -18
- mlrun/model_monitoring/stores/models/mysql.py +0 -100
- mlrun/model_monitoring/stores/models/sqlite.py +0 -98
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
- mlrun/utils/db.py +0 -52
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0
|
@@ -12,87 +12,52 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
-
|
|
16
|
-
import enum
|
|
17
|
-
import json
|
|
18
|
-
import typing
|
|
19
15
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
20
16
|
|
|
21
17
|
from pydantic import BaseModel, Field
|
|
22
18
|
from pydantic.main import Extra
|
|
23
19
|
|
|
24
|
-
import mlrun.
|
|
20
|
+
import mlrun.api.utils.helpers
|
|
25
21
|
from mlrun.api.schemas.object import ObjectKind, ObjectSpec, ObjectStatus
|
|
22
|
+
from mlrun.utils.model_monitoring import EndpointType, create_model_endpoint_id
|
|
26
23
|
|
|
27
24
|
|
|
28
25
|
class ModelMonitoringStoreKinds:
|
|
29
|
-
# TODO: do changes in examples & demos In 1.5.0 remove
|
|
30
26
|
ENDPOINTS = "endpoints"
|
|
31
27
|
EVENTS = "events"
|
|
32
28
|
|
|
33
29
|
|
|
34
30
|
class ModelEndpointMetadata(BaseModel):
|
|
35
|
-
project: Optional[str]
|
|
31
|
+
project: Optional[str]
|
|
36
32
|
labels: Optional[dict] = {}
|
|
37
|
-
uid: Optional[str]
|
|
33
|
+
uid: Optional[str]
|
|
38
34
|
|
|
39
35
|
class Config:
|
|
40
36
|
extra = Extra.allow
|
|
41
37
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
:param endpoint_dict: Model endpoint dictionary.
|
|
47
|
-
:param json_parse_values: List of dictionary keys with a JSON string value that will be parsed into a
|
|
48
|
-
dictionary using json.loads().
|
|
49
|
-
"""
|
|
50
|
-
new_object = cls()
|
|
51
|
-
if json_parse_values is None:
|
|
52
|
-
json_parse_values = [mlrun.model_monitoring.EventFieldType.LABELS]
|
|
53
|
-
|
|
54
|
-
return _mapping_attributes(
|
|
55
|
-
base_model=new_object,
|
|
56
|
-
flattened_dictionary=endpoint_dict,
|
|
57
|
-
json_parse_values=json_parse_values,
|
|
58
|
-
)
|
|
38
|
+
|
|
39
|
+
class ModelMonitoringMode(mlrun.api.utils.helpers.StrEnum):
|
|
40
|
+
enabled = "enabled"
|
|
41
|
+
disabled = "disabled"
|
|
59
42
|
|
|
60
43
|
|
|
61
44
|
class ModelEndpointSpec(ObjectSpec):
|
|
62
|
-
function_uri: Optional[str]
|
|
63
|
-
model: Optional[str]
|
|
64
|
-
model_class: Optional[str]
|
|
65
|
-
model_uri: Optional[str]
|
|
66
|
-
feature_names: Optional[List[str]]
|
|
67
|
-
label_names: Optional[List[str]]
|
|
68
|
-
stream_path: Optional[str]
|
|
69
|
-
algorithm: Optional[str]
|
|
45
|
+
function_uri: Optional[str] # <project_name>/<function_name>:<tag>
|
|
46
|
+
model: Optional[str] # <model_name>:<version>
|
|
47
|
+
model_class: Optional[str]
|
|
48
|
+
model_uri: Optional[str]
|
|
49
|
+
feature_names: Optional[List[str]]
|
|
50
|
+
label_names: Optional[List[str]]
|
|
51
|
+
stream_path: Optional[str]
|
|
52
|
+
algorithm: Optional[str]
|
|
70
53
|
monitor_configuration: Optional[dict] = {}
|
|
71
|
-
active: Optional[bool]
|
|
72
|
-
monitoring_mode: Optional[
|
|
73
|
-
mlrun.model_monitoring.ModelMonitoringMode
|
|
74
|
-
] = mlrun.model_monitoring.ModelMonitoringMode.disabled.value
|
|
54
|
+
active: Optional[bool]
|
|
55
|
+
monitoring_mode: Optional[str] = ModelMonitoringMode.disabled
|
|
75
56
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
:param endpoint_dict: Model endpoint dictionary.
|
|
81
|
-
:param json_parse_values: List of dictionary keys with a JSON string value that will be parsed into a
|
|
82
|
-
dictionary using json.loads().
|
|
83
|
-
"""
|
|
84
|
-
new_object = cls()
|
|
85
|
-
if json_parse_values is None:
|
|
86
|
-
json_parse_values = [
|
|
87
|
-
mlrun.model_monitoring.EventFieldType.FEATURE_NAMES,
|
|
88
|
-
mlrun.model_monitoring.EventFieldType.LABEL_NAMES,
|
|
89
|
-
mlrun.model_monitoring.EventFieldType.MONITOR_CONFIGURATION,
|
|
90
|
-
]
|
|
91
|
-
return _mapping_attributes(
|
|
92
|
-
base_model=new_object,
|
|
93
|
-
flattened_dictionary=endpoint_dict,
|
|
94
|
-
json_parse_values=json_parse_values,
|
|
95
|
-
)
|
|
57
|
+
|
|
58
|
+
class Metric(BaseModel):
|
|
59
|
+
name: str
|
|
60
|
+
values: List[Tuple[str, float]]
|
|
96
61
|
|
|
97
62
|
|
|
98
63
|
class Histogram(BaseModel):
|
|
@@ -143,60 +108,28 @@ class Features(BaseModel):
|
|
|
143
108
|
class ModelEndpointStatus(ObjectStatus):
|
|
144
109
|
feature_stats: Optional[dict] = {}
|
|
145
110
|
current_stats: Optional[dict] = {}
|
|
146
|
-
first_request: Optional[str]
|
|
147
|
-
last_request: Optional[str]
|
|
148
|
-
|
|
149
|
-
|
|
111
|
+
first_request: Optional[str]
|
|
112
|
+
last_request: Optional[str]
|
|
113
|
+
accuracy: Optional[float]
|
|
114
|
+
error_count: Optional[int]
|
|
115
|
+
drift_status: Optional[str]
|
|
150
116
|
drift_measures: Optional[dict] = {}
|
|
151
|
-
metrics: Optional[Dict[str,
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
features: Optional[List[Features]] = []
|
|
158
|
-
children: Optional[List[str]] = []
|
|
159
|
-
children_uids: Optional[List[str]] = []
|
|
160
|
-
endpoint_type: Optional[
|
|
161
|
-
mlrun.model_monitoring.EndpointType
|
|
162
|
-
] = mlrun.model_monitoring.EndpointType.NODE_EP.value
|
|
163
|
-
monitoring_feature_set_uri: Optional[str] = ""
|
|
164
|
-
state: Optional[str] = ""
|
|
117
|
+
metrics: Optional[Dict[str, Metric]]
|
|
118
|
+
features: Optional[List[Features]]
|
|
119
|
+
children: Optional[List[str]]
|
|
120
|
+
children_uids: Optional[List[str]]
|
|
121
|
+
endpoint_type: Optional[EndpointType]
|
|
122
|
+
monitoring_feature_set_uri: Optional[str]
|
|
165
123
|
|
|
166
124
|
class Config:
|
|
167
125
|
extra = Extra.allow
|
|
168
126
|
|
|
169
|
-
@classmethod
|
|
170
|
-
def from_flat_dict(cls, endpoint_dict: dict, json_parse_values: typing.List = None):
|
|
171
|
-
"""Create a `ModelEndpointStatus` object from an endpoint dictionary
|
|
172
|
-
|
|
173
|
-
:param endpoint_dict: Model endpoint dictionary.
|
|
174
|
-
:param json_parse_values: List of dictionary keys with a JSON string value that will be parsed into a
|
|
175
|
-
dictionary using json.loads().
|
|
176
|
-
"""
|
|
177
|
-
new_object = cls()
|
|
178
|
-
if json_parse_values is None:
|
|
179
|
-
json_parse_values = [
|
|
180
|
-
mlrun.model_monitoring.EventFieldType.FEATURE_STATS,
|
|
181
|
-
mlrun.model_monitoring.EventFieldType.CURRENT_STATS,
|
|
182
|
-
mlrun.model_monitoring.EventFieldType.DRIFT_MEASURES,
|
|
183
|
-
mlrun.model_monitoring.EventFieldType.METRICS,
|
|
184
|
-
mlrun.model_monitoring.EventFieldType.CHILDREN,
|
|
185
|
-
mlrun.model_monitoring.EventFieldType.CHILDREN_UIDS,
|
|
186
|
-
mlrun.model_monitoring.EventFieldType.ENDPOINT_TYPE,
|
|
187
|
-
]
|
|
188
|
-
return _mapping_attributes(
|
|
189
|
-
base_model=new_object,
|
|
190
|
-
flattened_dictionary=endpoint_dict,
|
|
191
|
-
json_parse_values=json_parse_values,
|
|
192
|
-
)
|
|
193
|
-
|
|
194
127
|
|
|
195
128
|
class ModelEndpoint(BaseModel):
|
|
196
129
|
kind: ObjectKind = Field(ObjectKind.model_endpoint, const=True)
|
|
197
|
-
metadata: ModelEndpointMetadata
|
|
198
|
-
spec: ModelEndpointSpec
|
|
199
|
-
status: ModelEndpointStatus
|
|
130
|
+
metadata: ModelEndpointMetadata
|
|
131
|
+
spec: ModelEndpointSpec
|
|
132
|
+
status: ModelEndpointStatus
|
|
200
133
|
|
|
201
134
|
class Config:
|
|
202
135
|
extra = Extra.allow
|
|
@@ -204,68 +137,15 @@ class ModelEndpoint(BaseModel):
|
|
|
204
137
|
def __init__(self, **data: Any):
|
|
205
138
|
super().__init__(**data)
|
|
206
139
|
if self.metadata.uid is None:
|
|
207
|
-
uid =
|
|
140
|
+
uid = create_model_endpoint_id(
|
|
208
141
|
function_uri=self.spec.function_uri,
|
|
209
142
|
versioned_model=self.spec.model,
|
|
210
143
|
)
|
|
211
144
|
self.metadata.uid = str(uid)
|
|
212
145
|
|
|
213
|
-
def flat_dict(self):
|
|
214
|
-
"""Generate a flattened `ModelEndpoint` dictionary. The flattened dictionary result is important for storing
|
|
215
|
-
the model endpoint object in the database.
|
|
216
|
-
|
|
217
|
-
:return: Flattened `ModelEndpoint` dictionary.
|
|
218
|
-
"""
|
|
219
|
-
# Convert the ModelEndpoint object into a dictionary using BaseModel dict() function
|
|
220
|
-
# In addition, remove the BaseModel kind as it is not required by the DB schema
|
|
221
|
-
model_endpoint_dictionary = self.dict(exclude={"kind"})
|
|
222
|
-
|
|
223
|
-
# Initialize a flattened dictionary that will be filled with the model endpoint dictionary attributes
|
|
224
|
-
flatten_dict = {}
|
|
225
|
-
for k_object in model_endpoint_dictionary:
|
|
226
|
-
for key in model_endpoint_dictionary[k_object]:
|
|
227
|
-
# Extract the value of the current field
|
|
228
|
-
current_value = model_endpoint_dictionary[k_object][key]
|
|
229
|
-
|
|
230
|
-
# If the value is not from type str or bool (e.g. dict), convert it into a JSON string
|
|
231
|
-
# for matching the database required format
|
|
232
|
-
if not isinstance(current_value, (str, bool, int)) or isinstance(
|
|
233
|
-
current_value, enum.IntEnum
|
|
234
|
-
):
|
|
235
|
-
flatten_dict[key] = json.dumps(current_value)
|
|
236
|
-
else:
|
|
237
|
-
flatten_dict[key] = current_value
|
|
238
|
-
|
|
239
|
-
if mlrun.model_monitoring.EventFieldType.METRICS not in flatten_dict:
|
|
240
|
-
# Initialize metrics dictionary
|
|
241
|
-
flatten_dict[mlrun.model_monitoring.EventFieldType.METRICS] = {
|
|
242
|
-
mlrun.model_monitoring.EventKeyMetrics.GENERIC: {
|
|
243
|
-
mlrun.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
|
|
244
|
-
mlrun.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
# Remove the features from the dictionary as this field will be filled only within the feature analysis process
|
|
249
|
-
flatten_dict.pop(mlrun.model_monitoring.EventFieldType.FEATURES, None)
|
|
250
|
-
return flatten_dict
|
|
251
|
-
|
|
252
|
-
@classmethod
|
|
253
|
-
def from_flat_dict(cls, endpoint_dict: dict):
|
|
254
|
-
"""Create a `ModelEndpoint` object from an endpoint flattened dictionary. Because the provided dictionary
|
|
255
|
-
is flattened, we pass it as is to the subclasses without splitting the keys into spec, metadata, and status.
|
|
256
|
-
|
|
257
|
-
:param endpoint_dict: Model endpoint dictionary.
|
|
258
|
-
"""
|
|
259
|
-
|
|
260
|
-
return cls(
|
|
261
|
-
metadata=ModelEndpointMetadata.from_flat_dict(endpoint_dict=endpoint_dict),
|
|
262
|
-
spec=ModelEndpointSpec.from_flat_dict(endpoint_dict=endpoint_dict),
|
|
263
|
-
status=ModelEndpointStatus.from_flat_dict(endpoint_dict=endpoint_dict),
|
|
264
|
-
)
|
|
265
|
-
|
|
266
146
|
|
|
267
147
|
class ModelEndpointList(BaseModel):
|
|
268
|
-
endpoints: List[ModelEndpoint]
|
|
148
|
+
endpoints: List[ModelEndpoint]
|
|
269
149
|
|
|
270
150
|
|
|
271
151
|
class GrafanaColumn(BaseModel):
|
|
@@ -303,40 +183,3 @@ class GrafanaTimeSeriesTarget(BaseModel):
|
|
|
303
183
|
|
|
304
184
|
def add_data_point(self, data_point: GrafanaDataPoint):
|
|
305
185
|
self.datapoints.append((data_point.value, data_point.timestamp))
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
def _mapping_attributes(
|
|
309
|
-
base_model: BaseModel,
|
|
310
|
-
flattened_dictionary: dict,
|
|
311
|
-
json_parse_values: typing.List = None,
|
|
312
|
-
):
|
|
313
|
-
"""Generate a `BaseModel` object with the provided dictionary attributes.
|
|
314
|
-
|
|
315
|
-
:param base_model: `BaseModel` object (e.g. `ModelEndpointMetadata`).
|
|
316
|
-
:param flattened_dictionary: Flattened dictionary that contains the model endpoint attributes.
|
|
317
|
-
:param json_parse_values: List of dictionary keys with a JSON string value that will be parsed into a
|
|
318
|
-
dictionary using json.loads().
|
|
319
|
-
"""
|
|
320
|
-
# Get the fields of the provided base model object. These fields will be used to filter to relevent keys
|
|
321
|
-
# from the flattened dictionary.
|
|
322
|
-
wanted_keys = base_model.__fields__.keys()
|
|
323
|
-
|
|
324
|
-
# Generate a filtered flattened dictionary that will be parsed into the BaseModel object
|
|
325
|
-
dict_to_parse = {}
|
|
326
|
-
for field_key in wanted_keys:
|
|
327
|
-
if field_key in flattened_dictionary:
|
|
328
|
-
if field_key in json_parse_values:
|
|
329
|
-
# Parse the JSON value into a valid dictionary
|
|
330
|
-
dict_to_parse[field_key] = _json_loads_if_not_none(
|
|
331
|
-
flattened_dictionary[field_key]
|
|
332
|
-
)
|
|
333
|
-
else:
|
|
334
|
-
dict_to_parse[field_key] = flattened_dictionary[field_key]
|
|
335
|
-
|
|
336
|
-
return base_model.parse_obj(dict_to_parse)
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
def _json_loads_if_not_none(field: Any) -> Any:
|
|
340
|
-
return (
|
|
341
|
-
json.loads(field) if field and field != "null" and field is not None else None
|
|
342
|
-
)
|
mlrun/api/schemas/schedule.py
CHANGED
|
@@ -36,8 +36,8 @@ class ScheduleCronTrigger(BaseModel):
|
|
|
36
36
|
hour: Optional[Union[int, str]]
|
|
37
37
|
minute: Optional[Union[int, str]]
|
|
38
38
|
second: Optional[Union[int, str]]
|
|
39
|
-
start_date: Union[datetime, str]
|
|
40
|
-
end_date: Union[datetime, str]
|
|
39
|
+
start_date: Optional[Union[datetime, str]]
|
|
40
|
+
end_date: Optional[Union[datetime, str]]
|
|
41
41
|
|
|
42
42
|
# APScheduler also supports datetime.tzinfo type, but Pydantic doesn't - so we don't
|
|
43
43
|
timezone: Optional[str]
|
|
@@ -152,6 +152,11 @@ class LogCollectorClient(
|
|
|
152
152
|
try:
|
|
153
153
|
has_logs = await self.has_logs(run_uid, project, verbose, raise_on_error)
|
|
154
154
|
if not has_logs:
|
|
155
|
+
logger.debug(
|
|
156
|
+
"Run has no logs to collect",
|
|
157
|
+
run_uid=run_uid,
|
|
158
|
+
project=project,
|
|
159
|
+
)
|
|
155
160
|
|
|
156
161
|
# run has no logs - return empty logs and exit so caller won't wait for logs or retry
|
|
157
162
|
yield b""
|
mlrun/builder.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
|
|
15
15
|
import pathlib
|
|
16
16
|
import re
|
|
17
17
|
import tarfile
|
|
@@ -55,20 +55,20 @@ def make_dockerfile(
|
|
|
55
55
|
dock += f"ARG {build_arg_key}={build_arg_value}\n"
|
|
56
56
|
|
|
57
57
|
if source:
|
|
58
|
+
dock += f"RUN mkdir -p {workdir}\n"
|
|
58
59
|
dock += f"WORKDIR {workdir}\n"
|
|
59
60
|
# 'ADD' command does not extract zip files - add extraction stage to the dockerfile
|
|
60
61
|
if source.endswith(".zip"):
|
|
61
|
-
source_dir = os.path.join(workdir, "source")
|
|
62
62
|
stage1 = f"""
|
|
63
63
|
FROM {base_image} AS extractor
|
|
64
64
|
RUN apt-get update -qqy && apt install --assume-yes unzip
|
|
65
|
-
RUN mkdir -p
|
|
66
|
-
COPY {source}
|
|
67
|
-
RUN cd
|
|
65
|
+
RUN mkdir -p /source
|
|
66
|
+
COPY {source} /source
|
|
67
|
+
RUN cd /source && unzip {source} && rm {source}
|
|
68
68
|
"""
|
|
69
69
|
dock = stage1 + "\n" + dock
|
|
70
70
|
|
|
71
|
-
dock += f"COPY --from=extractor
|
|
71
|
+
dock += f"COPY --from=extractor /source/ {workdir}\n"
|
|
72
72
|
else:
|
|
73
73
|
dock += f"ADD {source} {workdir}\n"
|
|
74
74
|
|
|
@@ -221,21 +221,7 @@ def make_kaniko_pod(
|
|
|
221
221
|
if end == -1:
|
|
222
222
|
end = len(dest)
|
|
223
223
|
repo = dest[dest.find("/") + 1 : end]
|
|
224
|
-
|
|
225
|
-
# if no secret is given, assume ec2 instance has attached role which provides read/write access to ECR
|
|
226
|
-
assume_instance_role = not config.httpdb.builder.docker_registry_secret
|
|
227
|
-
configure_kaniko_ecr_init_container(kpod, registry, repo, assume_instance_role)
|
|
228
|
-
|
|
229
|
-
# project secret might conflict with the attached instance role
|
|
230
|
-
# ensure "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY" have no values or else kaniko will fail
|
|
231
|
-
# due to credentials conflict / lack of permission on given credentials
|
|
232
|
-
if assume_instance_role:
|
|
233
|
-
kpod.pod.spec.containers[0].env.extend(
|
|
234
|
-
[
|
|
235
|
-
client.V1EnvVar(name="AWS_ACCESS_KEY_ID", value=""),
|
|
236
|
-
client.V1EnvVar(name="AWS_SECRET_ACCESS_KEY", value=""),
|
|
237
|
-
]
|
|
238
|
-
)
|
|
224
|
+
configure_kaniko_ecr_init_container(kpod, registry, repo)
|
|
239
225
|
|
|
240
226
|
# mount regular docker config secret
|
|
241
227
|
elif secret_name:
|
|
@@ -245,9 +231,7 @@ def make_kaniko_pod(
|
|
|
245
231
|
return kpod
|
|
246
232
|
|
|
247
233
|
|
|
248
|
-
def configure_kaniko_ecr_init_container(
|
|
249
|
-
kpod, registry, repo, assume_instance_role=True
|
|
250
|
-
):
|
|
234
|
+
def configure_kaniko_ecr_init_container(kpod, registry, repo):
|
|
251
235
|
region = registry.split(".")[3]
|
|
252
236
|
|
|
253
237
|
# fail silently in order to ignore "repository already exists" errors
|
|
@@ -258,13 +242,12 @@ def configure_kaniko_ecr_init_container(
|
|
|
258
242
|
)
|
|
259
243
|
init_container_env = {}
|
|
260
244
|
|
|
261
|
-
if
|
|
245
|
+
if not config.httpdb.builder.docker_registry_secret:
|
|
262
246
|
|
|
263
247
|
# assume instance role has permissions to register and store a container image
|
|
264
248
|
# https://github.com/GoogleContainerTools/kaniko#pushing-to-amazon-ecr
|
|
265
249
|
# we only need this in the kaniko container
|
|
266
250
|
kpod.env.append(client.V1EnvVar(name="AWS_SDK_LOAD_CONFIG", value="true"))
|
|
267
|
-
|
|
268
251
|
else:
|
|
269
252
|
aws_credentials_file_env_key = "AWS_SHARED_CREDENTIALS_FILE"
|
|
270
253
|
aws_credentials_file_env_value = "/tmp/credentials"
|
|
@@ -413,20 +396,6 @@ def build_image(
|
|
|
413
396
|
user_unix_id = runtime.spec.security_context.run_as_user
|
|
414
397
|
enriched_group_id = runtime.spec.security_context.run_as_group
|
|
415
398
|
|
|
416
|
-
if source_to_copy and (
|
|
417
|
-
not runtime.spec.clone_target_dir
|
|
418
|
-
or not os.path.isabs(runtime.spec.clone_target_dir)
|
|
419
|
-
):
|
|
420
|
-
# use a temp dir for permissions and set it as the workdir
|
|
421
|
-
tmpdir = tempfile.mkdtemp()
|
|
422
|
-
relative_workdir = runtime.spec.clone_target_dir or ""
|
|
423
|
-
if relative_workdir.startswith("./"):
|
|
424
|
-
# TODO: use 'removeprefix' when we drop python 3.7 support
|
|
425
|
-
# relative_workdir.removeprefix("./")
|
|
426
|
-
relative_workdir = relative_workdir[2:]
|
|
427
|
-
|
|
428
|
-
runtime.spec.clone_target_dir = path.join(tmpdir, "mlrun", relative_workdir)
|
|
429
|
-
|
|
430
399
|
dock = make_dockerfile(
|
|
431
400
|
base_image,
|
|
432
401
|
commands,
|
|
@@ -435,7 +404,6 @@ def build_image(
|
|
|
435
404
|
extra=extra,
|
|
436
405
|
user_unix_id=user_unix_id,
|
|
437
406
|
enriched_group_id=enriched_group_id,
|
|
438
|
-
workdir=runtime.spec.clone_target_dir,
|
|
439
407
|
)
|
|
440
408
|
|
|
441
409
|
kpod = make_kaniko_pod(
|
mlrun/config.py
CHANGED
|
@@ -376,24 +376,14 @@ default_config = {
|
|
|
376
376
|
"model_endpoint_monitoring": {
|
|
377
377
|
"serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
|
|
378
378
|
"drift_thresholds": {"default": {"possible_drift": 0.5, "drift_detected": 0.7}},
|
|
379
|
-
# Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
|
|
380
|
-
# stream, and endpoints.
|
|
381
379
|
"store_prefixes": {
|
|
382
380
|
"default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
|
|
383
381
|
"user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
|
|
384
|
-
"stream": "",
|
|
385
382
|
},
|
|
386
|
-
# Offline storage path can be either relative or a full path. This path is used for general offline data
|
|
387
|
-
# storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
|
|
388
|
-
"offline_storage_path": "model-endpoints/{kind}",
|
|
389
|
-
# Default http path that points to the monitoring stream nuclio function. Will be used as a stream path
|
|
390
|
-
# when the user is working in CE environment and has not provided any stream path.
|
|
391
|
-
"default_http_sink": "http://nuclio-{project}-model-monitoring-stream.mlrun.svc.cluster.local:8080",
|
|
392
383
|
"batch_processing_function_branch": "master",
|
|
393
384
|
"parquet_batching_max_events": 10000,
|
|
394
385
|
# See mlrun.api.schemas.ModelEndpointStoreType for available options
|
|
395
|
-
"store_type": "
|
|
396
|
-
"endpoint_store_connection": "",
|
|
386
|
+
"store_type": "kv",
|
|
397
387
|
},
|
|
398
388
|
"secret_stores": {
|
|
399
389
|
"vault": {
|
|
@@ -936,68 +926,6 @@ class Config:
|
|
|
936
926
|
# Get v3io access key from the environment
|
|
937
927
|
return os.environ.get("V3IO_ACCESS_KEY")
|
|
938
928
|
|
|
939
|
-
def get_model_monitoring_file_target_path(
|
|
940
|
-
self,
|
|
941
|
-
project: str = "",
|
|
942
|
-
kind: str = "",
|
|
943
|
-
target: str = "online",
|
|
944
|
-
artifact_path: str = None,
|
|
945
|
-
) -> str:
|
|
946
|
-
"""Get the full path from the configuration based on the provided project and kind.
|
|
947
|
-
|
|
948
|
-
:param project: Project name.
|
|
949
|
-
:param kind: Kind of target path (e.g. events, log_stream, endpoints, etc.)
|
|
950
|
-
:param target: Can be either online or offline. If the target is online, then we try to get a specific
|
|
951
|
-
path for the provided kind. If it doesn't exist, use the default path.
|
|
952
|
-
If the target path is offline and the offline path is already a full path in the
|
|
953
|
-
configuration, then the result will be that path as-is. If the offline path is a
|
|
954
|
-
relative path, then the result will be based on the project artifact path and the offline
|
|
955
|
-
relative path. If project artifact path wasn't provided, then we use MLRun artifact
|
|
956
|
-
path instead.
|
|
957
|
-
:param artifact_path: Optional artifact path that will be used as a relative path. If not provided, the
|
|
958
|
-
relative artifact path will be taken from the global MLRun artifact path.
|
|
959
|
-
|
|
960
|
-
:return: Full configured path for the provided kind.
|
|
961
|
-
"""
|
|
962
|
-
|
|
963
|
-
if target != "offline":
|
|
964
|
-
store_prefix_dict = (
|
|
965
|
-
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.to_dict()
|
|
966
|
-
)
|
|
967
|
-
if store_prefix_dict.get(kind):
|
|
968
|
-
# Target exist in store prefix and has a valid string value
|
|
969
|
-
return store_prefix_dict[kind].format(project=project)
|
|
970
|
-
return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
971
|
-
project=project, kind=kind
|
|
972
|
-
)
|
|
973
|
-
|
|
974
|
-
# Get the current offline path from the configuration
|
|
975
|
-
file_path = mlrun.mlconf.model_endpoint_monitoring.offline_storage_path.format(
|
|
976
|
-
project=project, kind=kind
|
|
977
|
-
)
|
|
978
|
-
|
|
979
|
-
# Absolute path
|
|
980
|
-
if any(value in file_path for value in ["://", ":///"]) or os.path.isabs(
|
|
981
|
-
file_path
|
|
982
|
-
):
|
|
983
|
-
return file_path
|
|
984
|
-
|
|
985
|
-
# Relative path
|
|
986
|
-
else:
|
|
987
|
-
artifact_path = artifact_path or config.artifact_path
|
|
988
|
-
if artifact_path[-1] != "/":
|
|
989
|
-
artifact_path += "/"
|
|
990
|
-
|
|
991
|
-
return mlrun.utils.helpers.fill_artifact_path_template(
|
|
992
|
-
artifact_path=artifact_path + file_path, project=project
|
|
993
|
-
)
|
|
994
|
-
|
|
995
|
-
def is_ce_mode(self) -> bool:
|
|
996
|
-
# True if the setup is in CE environment
|
|
997
|
-
return isinstance(mlrun.mlconf.ce, mlrun.config.Config) and any(
|
|
998
|
-
ver in mlrun.mlconf.ce.mode for ver in ["lite", "full"]
|
|
999
|
-
)
|
|
1000
|
-
|
|
1001
929
|
|
|
1002
930
|
# Global configuration
|
|
1003
931
|
config = Config.from_dict(default_config)
|
|
@@ -1120,18 +1048,15 @@ def read_env(env=None, prefix=env_prefix):
|
|
|
1120
1048
|
cfg[path[0]] = value
|
|
1121
1049
|
|
|
1122
1050
|
env_dbpath = env.get("MLRUN_DBPATH", "")
|
|
1123
|
-
# expected format: https://mlrun-api.tenant.default-tenant.app.some-system.some-namespace.com
|
|
1124
1051
|
is_remote_mlrun = (
|
|
1125
1052
|
env_dbpath.startswith("https://mlrun-api.") and "tenant." in env_dbpath
|
|
1126
1053
|
)
|
|
1127
|
-
|
|
1128
1054
|
# It's already a standard to set this env var to configure the v3io api, so we're supporting it (instead
|
|
1129
1055
|
# of MLRUN_V3IO_API), in remote usage this can be auto detected from the DBPATH
|
|
1130
1056
|
v3io_api = env.get("V3IO_API")
|
|
1131
1057
|
if v3io_api:
|
|
1132
1058
|
config["v3io_api"] = v3io_api
|
|
1133
1059
|
elif is_remote_mlrun:
|
|
1134
|
-
# in remote mlrun we can't use http, so we'll use https
|
|
1135
1060
|
config["v3io_api"] = env_dbpath.replace("https://mlrun-api.", "https://webapi.")
|
|
1136
1061
|
|
|
1137
1062
|
# It's already a standard to set this env var to configure the v3io framesd, so we're supporting it (instead
|
mlrun/data_types/__init__.py
CHANGED
|
@@ -14,12 +14,7 @@
|
|
|
14
14
|
#
|
|
15
15
|
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
16
|
|
|
17
|
-
from .data_types import
|
|
18
|
-
InferOptions,
|
|
19
|
-
ValueType,
|
|
20
|
-
pd_schema_to_value_type,
|
|
21
|
-
python_type_to_value_type,
|
|
22
|
-
)
|
|
17
|
+
from .data_types import InferOptions, ValueType, pd_schema_to_value_type
|
|
23
18
|
from .infer import DFDataInfer
|
|
24
19
|
|
|
25
20
|
|
mlrun/data_types/data_types.py
CHANGED
|
@@ -79,9 +79,7 @@ def pa_type_to_value_type(type_):
|
|
|
79
79
|
|
|
80
80
|
|
|
81
81
|
def python_type_to_value_type(value_type):
|
|
82
|
-
type_name =
|
|
83
|
-
value_type.__name__ if hasattr(value_type, "__name__") else str(value_type)
|
|
84
|
-
)
|
|
82
|
+
type_name = value_type.__name__
|
|
85
83
|
type_map = {
|
|
86
84
|
"int": ValueType.INT64,
|
|
87
85
|
"str": ValueType.STRING,
|
mlrun/datastore/__init__.py
CHANGED
|
@@ -33,12 +33,7 @@ __all__ = [
|
|
|
33
33
|
|
|
34
34
|
import mlrun.datastore.wasbfs
|
|
35
35
|
|
|
36
|
-
from ..platforms.iguazio import
|
|
37
|
-
HTTPOutputStream,
|
|
38
|
-
KafkaOutputStream,
|
|
39
|
-
OutputStream,
|
|
40
|
-
parse_path,
|
|
41
|
-
)
|
|
36
|
+
from ..platforms.iguazio import KafkaOutputStream, OutputStream, parse_path
|
|
42
37
|
from ..utils import logger
|
|
43
38
|
from .base import DataItem
|
|
44
39
|
from .datastore import StoreManager, in_memory_store, uri_to_ipython
|
|
@@ -74,7 +69,7 @@ def get_in_memory_items():
|
|
|
74
69
|
|
|
75
70
|
|
|
76
71
|
def get_stream_pusher(stream_path: str, **kwargs):
|
|
77
|
-
"""get a stream pusher object from URL
|
|
72
|
+
"""get a stream pusher object from URL, currently only support v3io stream
|
|
78
73
|
|
|
79
74
|
common kwargs::
|
|
80
75
|
|
|
@@ -92,8 +87,6 @@ def get_stream_pusher(stream_path: str, **kwargs):
|
|
|
92
87
|
return KafkaOutputStream(
|
|
93
88
|
topic, bootstrap_servers, kwargs.get("kafka_producer_options")
|
|
94
89
|
)
|
|
95
|
-
elif stream_path.startswith("http://") or stream_path.startswith("https://"):
|
|
96
|
-
return HTTPOutputStream(stream_path=stream_path)
|
|
97
90
|
elif "://" not in stream_path:
|
|
98
91
|
return OutputStream(stream_path, **kwargs)
|
|
99
92
|
elif stream_path.startswith("v3io"):
|