mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (93) hide show
  1. mlrun/api/api/deps.py +14 -1
  2. mlrun/api/api/endpoints/frontend_spec.py +0 -2
  3. mlrun/api/api/endpoints/functions.py +15 -27
  4. mlrun/api/api/endpoints/grafana_proxy.py +435 -74
  5. mlrun/api/api/endpoints/healthz.py +5 -18
  6. mlrun/api/api/endpoints/model_endpoints.py +33 -37
  7. mlrun/api/api/utils.py +6 -13
  8. mlrun/api/crud/__init__.py +14 -16
  9. mlrun/api/crud/logs.py +5 -7
  10. mlrun/api/crud/model_monitoring/__init__.py +2 -2
  11. mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
  12. mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
  13. mlrun/api/crud/pipelines.py +2 -3
  14. mlrun/api/db/sqldb/models/models_mysql.py +52 -19
  15. mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
  16. mlrun/api/db/sqldb/session.py +19 -26
  17. mlrun/api/schemas/__init__.py +2 -0
  18. mlrun/api/schemas/constants.py +0 -13
  19. mlrun/api/schemas/frontend_spec.py +0 -1
  20. mlrun/api/schemas/model_endpoints.py +38 -195
  21. mlrun/api/schemas/schedule.py +2 -2
  22. mlrun/api/utils/clients/log_collector.py +5 -0
  23. mlrun/builder.py +9 -41
  24. mlrun/config.py +1 -76
  25. mlrun/data_types/__init__.py +1 -6
  26. mlrun/data_types/data_types.py +1 -3
  27. mlrun/datastore/__init__.py +2 -9
  28. mlrun/datastore/sources.py +20 -25
  29. mlrun/datastore/store_resources.py +1 -1
  30. mlrun/datastore/targets.py +34 -67
  31. mlrun/datastore/utils.py +4 -26
  32. mlrun/db/base.py +2 -4
  33. mlrun/db/filedb.py +5 -13
  34. mlrun/db/httpdb.py +32 -64
  35. mlrun/db/sqldb.py +2 -4
  36. mlrun/errors.py +0 -5
  37. mlrun/execution.py +0 -2
  38. mlrun/feature_store/api.py +8 -24
  39. mlrun/feature_store/feature_set.py +6 -28
  40. mlrun/feature_store/feature_vector.py +0 -2
  41. mlrun/feature_store/ingestion.py +11 -8
  42. mlrun/feature_store/retrieval/base.py +43 -271
  43. mlrun/feature_store/retrieval/dask_merger.py +153 -55
  44. mlrun/feature_store/retrieval/job.py +3 -12
  45. mlrun/feature_store/retrieval/local_merger.py +130 -48
  46. mlrun/feature_store/retrieval/spark_merger.py +125 -126
  47. mlrun/features.py +2 -7
  48. mlrun/model_monitoring/constants.py +6 -48
  49. mlrun/model_monitoring/helpers.py +35 -118
  50. mlrun/model_monitoring/model_monitoring_batch.py +260 -293
  51. mlrun/model_monitoring/stream_processing_fs.py +253 -220
  52. mlrun/platforms/iguazio.py +0 -33
  53. mlrun/projects/project.py +72 -34
  54. mlrun/runtimes/base.py +0 -5
  55. mlrun/runtimes/daskjob.py +0 -2
  56. mlrun/runtimes/function.py +3 -29
  57. mlrun/runtimes/kubejob.py +15 -39
  58. mlrun/runtimes/local.py +45 -7
  59. mlrun/runtimes/mpijob/abstract.py +0 -2
  60. mlrun/runtimes/mpijob/v1.py +0 -2
  61. mlrun/runtimes/pod.py +0 -2
  62. mlrun/runtimes/remotesparkjob.py +0 -2
  63. mlrun/runtimes/serving.py +0 -6
  64. mlrun/runtimes/sparkjob/abstract.py +2 -39
  65. mlrun/runtimes/sparkjob/spark3job.py +0 -2
  66. mlrun/serving/__init__.py +1 -2
  67. mlrun/serving/routers.py +35 -35
  68. mlrun/serving/server.py +12 -22
  69. mlrun/serving/states.py +30 -162
  70. mlrun/serving/v2_serving.py +10 -13
  71. mlrun/utils/clones.py +1 -1
  72. mlrun/utils/model_monitoring.py +96 -122
  73. mlrun/utils/version/version.json +2 -2
  74. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
  75. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
  76. mlrun/api/crud/model_monitoring/grafana.py +0 -427
  77. mlrun/datastore/spark_udf.py +0 -40
  78. mlrun/model_monitoring/__init__.py +0 -44
  79. mlrun/model_monitoring/common.py +0 -112
  80. mlrun/model_monitoring/model_endpoint.py +0 -141
  81. mlrun/model_monitoring/stores/__init__.py +0 -106
  82. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
  83. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
  84. mlrun/model_monitoring/stores/models/__init__.py +0 -23
  85. mlrun/model_monitoring/stores/models/base.py +0 -18
  86. mlrun/model_monitoring/stores/models/mysql.py +0 -100
  87. mlrun/model_monitoring/stores/models/sqlite.py +0 -98
  88. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
  89. mlrun/utils/db.py +0 -52
  90. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
  91. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
  92. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
  93. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0
@@ -12,33 +12,20 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
- import http
16
-
17
15
  from fastapi import APIRouter
18
16
 
17
+ import mlrun.api.crud
19
18
  import mlrun.api.schemas
20
- from mlrun.config import config as mlconfig
21
19
 
22
20
  router = APIRouter()
23
21
 
24
22
 
25
23
  @router.get(
26
24
  "/healthz",
27
- status_code=http.HTTPStatus.OK.value,
25
+ response_model=mlrun.api.schemas.ClientSpec,
28
26
  )
29
27
  def health():
30
28
 
31
- # offline is the initial state
32
- # waiting for chief is set for workers waiting for chief to be ready and then clusterize against it
33
- if mlconfig.httpdb.state in [
34
- mlrun.api.schemas.APIStates.offline,
35
- mlrun.api.schemas.APIStates.waiting_for_chief,
36
- ]:
37
- raise mlrun.errors.MLRunServiceUnavailableError()
38
-
39
- return {
40
- # for old `align_mlrun.sh` scripts expecting `version` in the response
41
- # TODO: remove on mlrun >= 1.6.0
42
- "version": mlconfig.version,
43
- "status": "ok",
44
- }
29
+ # TODO: From 0.7.0 client uses the /client-spec endpoint,
30
+ # when this is the oldest relevant client, remove this logic from the healthz endpoint
31
+ return mlrun.api.crud.ClientSpec().get_client_spec()
@@ -33,7 +33,7 @@ router = APIRouter()
33
33
 
34
34
  @router.put(
35
35
  "/projects/{project}/model-endpoints/{endpoint_id}",
36
- response_model=mlrun.api.schemas.ModelEndpoint,
36
+ status_code=HTTPStatus.NO_CONTENT.value,
37
37
  )
38
38
  async def create_or_patch(
39
39
  project: str,
@@ -43,9 +43,9 @@ async def create_or_patch(
43
43
  mlrun.api.api.deps.authenticate_request
44
44
  ),
45
45
  db_session: Session = Depends(mlrun.api.api.deps.get_db_session),
46
- ) -> mlrun.api.schemas.ModelEndpoint:
46
+ ):
47
47
  """
48
- Either create or update the record of a given `ModelEndpoint` object.
48
+ Either create or updates the record of a given ModelEndpoint object.
49
49
  Leaving here for backwards compatibility.
50
50
  """
51
51
 
@@ -76,7 +76,7 @@ async def create_or_patch(
76
76
  )
77
77
  # Since the endpoint records are created automatically, at point of serving function deployment, we need to use
78
78
  # V3IO_ACCESS_KEY here
79
- return await run_in_threadpool(
79
+ await run_in_threadpool(
80
80
  mlrun.api.crud.ModelEndpoints().create_or_patch,
81
81
  db_session=db_session,
82
82
  access_key=os.environ.get("V3IO_ACCESS_KEY"),
@@ -99,7 +99,7 @@ async def create_model_endpoint(
99
99
  db_session: Session = Depends(mlrun.api.api.deps.get_db_session),
100
100
  ) -> mlrun.api.schemas.ModelEndpoint:
101
101
  """
102
- Create a DB record of a given `ModelEndpoint` object.
102
+ Create a DB record of a given ModelEndpoint object.
103
103
 
104
104
  :param project: The name of the project.
105
105
  :param endpoint_id: The unique id of the model endpoint.
@@ -111,7 +111,6 @@ async def create_model_endpoint(
111
111
 
112
112
  :return: A Model endpoint object.
113
113
  """
114
-
115
114
  await mlrun.api.utils.auth.verifier.AuthVerifier().query_project_resource_permissions(
116
115
  resource_type=mlrun.api.schemas.AuthorizationResourceTypes.model_endpoint,
117
116
  project_name=project,
@@ -150,13 +149,14 @@ async def patch_model_endpoint(
150
149
  ),
151
150
  ) -> mlrun.api.schemas.ModelEndpoint:
152
151
  """
153
- Update a DB record of a given `ModelEndpoint` object.
152
+ Update a DB record of a given ModelEndpoint object.
154
153
 
155
154
  :param project: The name of the project.
156
155
  :param endpoint_id: The unique id of the model endpoint.
157
156
  :param attributes: Attributes that will be updated. The input is provided in a json structure that will be
158
157
  converted into a dictionary before applying the patch process. Note that the keys of
159
- the dictionary should exist in the DB target.
158
+ dictionary should exist in the DB target. More details about the model endpoint available
159
+ attributes can be found under :py:class:`~mlrun.api.schemas.ModelEndpoint`.
160
160
 
161
161
  example::
162
162
 
@@ -245,7 +245,7 @@ async def list_model_endpoints(
245
245
  labels or top level. By default, when no filters are applied, all available endpoints for the given project will be
246
246
  listed.
247
247
 
248
- If uids are passed: will return `ModelEndpointList` of endpoints with uid in uids
248
+ If uids are passed: will return ModelEndpointList of endpoints with uid in uids
249
249
  Labels can be used to filter on the existence of a label:
250
250
  api/projects/{project}/model-endpoints/?label=mylabel
251
251
 
@@ -264,11 +264,11 @@ async def list_model_endpoints(
264
264
  :param model: The name of the model to filter by.
265
265
  :param function: The name of the function to filter by.
266
266
  :param labels: A list of labels to filter by. Label filters work by either filtering a specific value of a label
267
- (i.e. list("key=value")) or by looking for the existence of a given key (i.e. "key").
268
- :param metrics: A list of real-time metrics to return for each endpoint. There are pre-defined real-time metrics
269
- for model endpoints such as predictions_per_second and latency_avg_5m but also custom metrics
270
- defined by the user. Please note that these metrics are stored in the time series DB and the
271
- results will be appeared under model_endpoint.spec.metrics of each endpoint.
267
+ (i.e. list("key==value")) or by looking for the existence of a given key (i.e. "key").
268
+ :param metrics: A list of metrics to return for each endpoint. There are pre-defined metrics for model endpoints
269
+ such as predictions_per_second and latency_avg_5m but also custom metrics defined by the user.
270
+ Please note that these metrics are stored in the time series DB and the results will be appeared
271
+ under model_endpoint.spec.metrics of each endpoint.
272
272
  :param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
273
273
  time, a Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`, where
274
274
  `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the earliest time.
@@ -276,9 +276,9 @@ async def list_model_endpoints(
276
276
  time, a Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`, where
277
277
  `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the earliest time.
278
278
  :param top_level: If True will return only routers and endpoint that are NOT children of any router.
279
- :param uids: Will return `ModelEndpointList` of endpoints with uid in uids.
279
+ :param uids: Will return ModelEndpointList of endpoints with uid in uids.
280
280
 
281
- :return: An object of `ModelEndpointList` which is literally a list of model endpoints along with some metadata. To
281
+ :return: An object of ModelEndpointList which is literally a list of model endpoints along with some metadata. To
282
282
  get a standard list of model endpoints use ModelEndpointList.endpoints.
283
283
  """
284
284
 
@@ -333,27 +333,23 @@ async def get_model_endpoint(
333
333
  """Get a single model endpoint object. You can apply different time series metrics that will be added to the
334
334
  result.
335
335
 
336
-
337
- :param project: The name of the project
338
- :param endpoint_id: The unique id of the model endpoint.
339
- :param start: The start time of the metrics. Can be represented by a string containing an
340
- RFC 3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
341
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or
342
- 0 for the earliest time.
343
- :param end: The end time of the metrics. Can be represented by a string containing an
344
- RFC 3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
345
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or
346
- 0 for the earliest time.
347
- :param metrics: A list of real-time metrics to return for the model endpoint. There are
348
- pre-defined real-time metrics for model endpoints such as predictions_per_second
349
- and latency_avg_5m but also custom metrics defined by the user. Please note that
350
- these metrics are stored in the time series DB and the results will be
351
- appeared under model_endpoint.spec.metrics.
352
- :param feature_analysis: When True, the base feature statistics and current feature statistics will
353
- be added to the output of the resulting object.
354
- :param auth_info: The auth info of the request
355
-
356
- :return: A `ModelEndpoint` object.
336
+ :param project: The name of the project.
337
+ :param endpoint_id: The unique id of the model endpoint.
338
+ :param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
339
+ time, a Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`,
340
+ where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the earliest time.
341
+ :param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
342
+ time, a Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`,
343
+ where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the earliest time.
344
+ :param metrics: A list of metrics to return for the model endpoint. There are pre-defined metrics for model
345
+ endpoints such as predictions_per_second and latency_avg_5m but also custom metrics
346
+ defined by the user. Please note that these metrics are stored in the time series DB and
347
+ the results will be appeared under model_endpoint.spec.metrics.
348
+ :param feature_analysis: When True, the base feature statistics and current feature statistics will be added to
349
+ the output of the resulting object.
350
+ :param auth_info: The auth info of the request.
351
+
352
+ :return: A ModelEndpoint object.
357
353
  """
358
354
  await mlrun.api.utils.auth.verifier.AuthVerifier().query_project_resource_permissions(
359
355
  mlrun.api.schemas.AuthorizationResourceTypes.model_endpoint,
mlrun/api/api/utils.py CHANGED
@@ -98,18 +98,8 @@ def get_obj_path(schema, path, user=""):
98
98
  if not path.startswith(schema_prefix):
99
99
  path = f"{schema_prefix}{path}"
100
100
 
101
- allowed_paths_list = get_allowed_path_prefixes_list()
102
- if not any(path.startswith(allowed_path) for allowed_path in allowed_paths_list):
103
- raise mlrun.errors.MLRunAccessDeniedError("Unauthorized path")
104
- return path
105
-
106
-
107
- def get_allowed_path_prefixes_list() -> typing.List[str]:
108
- """
109
- Get list of allowed paths - v3io:// is always allowed, and also the real_path parameter if specified.
110
- We never allow local files in the allowed paths list. Allowed paths must contain a schema (://).
111
- """
112
- real_path = config.httpdb.real_path
101
+ # Check if path is allowed - v3io:// is always allowed, and also the real_path parameter if specified.
102
+ # We never allow local files in the allowed paths list. Allowed paths must contain a schema (://)
113
103
  allowed_file_paths = config.httpdb.allowed_file_paths or ""
114
104
  allowed_paths_list = [
115
105
  path.strip() for path in allowed_file_paths.split(",") if "://" in path
@@ -117,7 +107,10 @@ def get_allowed_path_prefixes_list() -> typing.List[str]:
117
107
  if real_path:
118
108
  allowed_paths_list.append(real_path)
119
109
  allowed_paths_list.append("v3io://")
120
- return allowed_paths_list
110
+
111
+ if not any(path.startswith(allowed_path) for allowed_path in allowed_paths_list):
112
+ raise mlrun.errors.MLRunAccessDeniedError("Unauthorized path")
113
+ return path
121
114
 
122
115
 
123
116
  def get_secrets(auth_info: mlrun.api.schemas.AuthInfo):
@@ -12,19 +12,17 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
- # flake8: noqa: F401 - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
-
17
- from .artifacts import Artifacts
18
- from .client_spec import ClientSpec
19
- from .clusterization_spec import ClusterizationSpec
20
- from .feature_store import FeatureStore
21
- from .functions import Functions
22
- from .logs import Logs
23
- from .marketplace import Marketplace
24
- from .model_monitoring import ModelEndpoints
25
- from .pipelines import Pipelines
26
- from .projects import Projects
27
- from .runs import Runs
28
- from .runtime_resources import RuntimeResources
29
- from .secrets import Secrets, SecretsClientType
30
- from .tags import Tags
15
+ from .artifacts import Artifacts # noqa: F401
16
+ from .client_spec import ClientSpec # noqa: F401
17
+ from .clusterization_spec import ClusterizationSpec # noqa: F401
18
+ from .feature_store import FeatureStore # noqa: F401
19
+ from .functions import Functions # noqa: F401
20
+ from .logs import Logs # noqa: F401
21
+ from .marketplace import Marketplace # noqa: F401
22
+ from .model_monitoring import ModelEndpoints, ModelEndpointStoreType # noqa: F401
23
+ from .pipelines import Pipelines # noqa: F401
24
+ from .projects import Projects # noqa: F401
25
+ from .runs import Runs # noqa: F401
26
+ from .runtime_resources import RuntimeResources # noqa: F401
27
+ from .secrets import Secrets, SecretsClientType # noqa: F401
28
+ from .tags import Tags # noqa: F401
mlrun/api/crud/logs.py CHANGED
@@ -242,10 +242,7 @@ class Logs(
242
242
  def log_file_exists_for_run_uid(project: str, uid: str) -> (bool, pathlib.Path):
243
243
  """
244
244
  Checks if the log file exists for the given project and uid
245
- There could be two types of log files:
246
- 1. Log file which was created by the legacy logger with the following file format - project/<run-uid>)
247
- 2. Log file which was created by the new logger with the following file format- /project/<run-uid>-<pod-name>
248
- Therefore, we check if the log file exists for both formats
245
+ A Run's log file path is: /mlrun/logs/{project}/{uid}
249
246
  :param project: project name
250
247
  :param uid: run uid
251
248
  :return: True if the log file exists, False otherwise, and the log file path
@@ -253,9 +250,10 @@ class Logs(
253
250
  project_logs_dir = project_logs_path(project)
254
251
  if not project_logs_dir.exists():
255
252
  return False, None
256
- for file in os.listdir(str(project_logs_dir)):
257
- if file.startswith(uid):
258
- return True, project_logs_dir / file
253
+
254
+ log_file = log_path(project, uid)
255
+ if log_file.exists():
256
+ return True, log_file
259
257
 
260
258
  return False, None
261
259
 
@@ -12,6 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
- # flake8: noqa: F401 - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
15
 
17
- from .model_endpoints import ModelEndpoints
16
+ from .model_endpoint_store import ModelEndpointStoreType # noqa: F401
17
+ from .model_endpoints import ModelEndpoints # noqa: F401