mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (200) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +25 -111
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +38 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +41 -47
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +68 -0
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
  15. mlrun/common/formatters/base.py +78 -0
  16. mlrun/common/formatters/function.py +41 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +25 -4
  21. mlrun/common/schemas/alert.py +203 -0
  22. mlrun/common/schemas/api_gateway.py +148 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +8 -2
  25. mlrun/common/schemas/client_spec.py +2 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/hub.py +7 -9
  29. mlrun/common/schemas/model_monitoring/__init__.py +19 -3
  30. mlrun/common/schemas/model_monitoring/constants.py +96 -26
  31. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  32. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  33. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  34. mlrun/common/schemas/pipeline.py +0 -9
  35. mlrun/common/schemas/project.py +22 -21
  36. mlrun/common/types.py +7 -1
  37. mlrun/config.py +87 -19
  38. mlrun/data_types/data_types.py +4 -0
  39. mlrun/data_types/to_pandas.py +9 -9
  40. mlrun/datastore/__init__.py +5 -8
  41. mlrun/datastore/alibaba_oss.py +130 -0
  42. mlrun/datastore/azure_blob.py +4 -5
  43. mlrun/datastore/base.py +69 -30
  44. mlrun/datastore/datastore.py +10 -2
  45. mlrun/datastore/datastore_profile.py +90 -6
  46. mlrun/datastore/google_cloud_storage.py +1 -1
  47. mlrun/datastore/hdfs.py +5 -0
  48. mlrun/datastore/inmem.py +2 -2
  49. mlrun/datastore/redis.py +2 -2
  50. mlrun/datastore/s3.py +5 -0
  51. mlrun/datastore/snowflake_utils.py +43 -0
  52. mlrun/datastore/sources.py +172 -44
  53. mlrun/datastore/store_resources.py +7 -7
  54. mlrun/datastore/targets.py +285 -41
  55. mlrun/datastore/utils.py +68 -5
  56. mlrun/datastore/v3io.py +27 -50
  57. mlrun/db/auth_utils.py +152 -0
  58. mlrun/db/base.py +149 -14
  59. mlrun/db/factory.py +1 -1
  60. mlrun/db/httpdb.py +608 -178
  61. mlrun/db/nopdb.py +191 -7
  62. mlrun/errors.py +11 -0
  63. mlrun/execution.py +37 -20
  64. mlrun/feature_store/__init__.py +0 -2
  65. mlrun/feature_store/api.py +21 -52
  66. mlrun/feature_store/feature_set.py +48 -23
  67. mlrun/feature_store/feature_vector.py +2 -1
  68. mlrun/feature_store/ingestion.py +7 -6
  69. mlrun/feature_store/retrieval/base.py +9 -4
  70. mlrun/feature_store/retrieval/conversion.py +9 -9
  71. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  72. mlrun/feature_store/retrieval/job.py +9 -3
  73. mlrun/feature_store/retrieval/local_merger.py +2 -0
  74. mlrun/feature_store/retrieval/spark_merger.py +34 -24
  75. mlrun/feature_store/steps.py +30 -19
  76. mlrun/features.py +4 -13
  77. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  78. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  79. mlrun/frameworks/lgbm/__init__.py +1 -1
  80. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  81. mlrun/frameworks/lgbm/model_handler.py +1 -1
  82. mlrun/frameworks/parallel_coordinates.py +2 -1
  83. mlrun/frameworks/pytorch/__init__.py +2 -2
  84. mlrun/frameworks/sklearn/__init__.py +1 -1
  85. mlrun/frameworks/tf_keras/__init__.py +5 -2
  86. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  87. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  88. mlrun/frameworks/xgboost/__init__.py +1 -1
  89. mlrun/k8s_utils.py +10 -11
  90. mlrun/launcher/__init__.py +1 -1
  91. mlrun/launcher/base.py +6 -5
  92. mlrun/launcher/client.py +8 -6
  93. mlrun/launcher/factory.py +1 -1
  94. mlrun/launcher/local.py +9 -3
  95. mlrun/launcher/remote.py +9 -3
  96. mlrun/lists.py +6 -2
  97. mlrun/model.py +58 -19
  98. mlrun/model_monitoring/__init__.py +1 -1
  99. mlrun/model_monitoring/api.py +127 -301
  100. mlrun/model_monitoring/application.py +5 -296
  101. mlrun/model_monitoring/applications/__init__.py +11 -0
  102. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  103. mlrun/model_monitoring/applications/base.py +282 -0
  104. mlrun/model_monitoring/applications/context.py +214 -0
  105. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  106. mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
  107. mlrun/model_monitoring/applications/results.py +99 -0
  108. mlrun/model_monitoring/controller.py +30 -36
  109. mlrun/model_monitoring/db/__init__.py +18 -0
  110. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
  111. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  112. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
  113. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  114. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  115. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
  116. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
  117. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  118. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
  119. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  120. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
  121. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  122. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  123. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  124. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  125. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  126. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  127. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  128. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  129. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  130. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
  131. mlrun/model_monitoring/evidently_application.py +6 -118
  132. mlrun/model_monitoring/features_drift_table.py +34 -22
  133. mlrun/model_monitoring/helpers.py +100 -7
  134. mlrun/model_monitoring/model_endpoint.py +3 -2
  135. mlrun/model_monitoring/stream_processing.py +93 -228
  136. mlrun/model_monitoring/tracking_policy.py +7 -1
  137. mlrun/model_monitoring/writer.py +152 -124
  138. mlrun/package/packagers_manager.py +1 -0
  139. mlrun/package/utils/_formatter.py +2 -2
  140. mlrun/platforms/__init__.py +11 -10
  141. mlrun/platforms/iguazio.py +21 -202
  142. mlrun/projects/operations.py +30 -16
  143. mlrun/projects/pipelines.py +92 -99
  144. mlrun/projects/project.py +757 -268
  145. mlrun/render.py +15 -14
  146. mlrun/run.py +160 -162
  147. mlrun/runtimes/__init__.py +55 -3
  148. mlrun/runtimes/base.py +33 -19
  149. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  150. mlrun/runtimes/funcdoc.py +0 -28
  151. mlrun/runtimes/kubejob.py +28 -122
  152. mlrun/runtimes/local.py +5 -2
  153. mlrun/runtimes/mpijob/__init__.py +0 -20
  154. mlrun/runtimes/mpijob/abstract.py +8 -8
  155. mlrun/runtimes/mpijob/v1.py +1 -1
  156. mlrun/runtimes/nuclio/__init__.py +1 -0
  157. mlrun/runtimes/nuclio/api_gateway.py +709 -0
  158. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  159. mlrun/runtimes/nuclio/application/application.py +523 -0
  160. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  161. mlrun/runtimes/nuclio/function.py +98 -58
  162. mlrun/runtimes/nuclio/serving.py +36 -42
  163. mlrun/runtimes/pod.py +196 -45
  164. mlrun/runtimes/remotesparkjob.py +1 -1
  165. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  166. mlrun/runtimes/utils.py +6 -73
  167. mlrun/secrets.py +6 -2
  168. mlrun/serving/remote.py +2 -3
  169. mlrun/serving/routers.py +7 -4
  170. mlrun/serving/server.py +7 -8
  171. mlrun/serving/states.py +73 -43
  172. mlrun/serving/v2_serving.py +8 -7
  173. mlrun/track/tracker.py +2 -1
  174. mlrun/utils/async_http.py +25 -5
  175. mlrun/utils/helpers.py +141 -75
  176. mlrun/utils/http.py +1 -1
  177. mlrun/utils/logger.py +39 -7
  178. mlrun/utils/notifications/notification/__init__.py +14 -9
  179. mlrun/utils/notifications/notification/base.py +12 -0
  180. mlrun/utils/notifications/notification/console.py +2 -0
  181. mlrun/utils/notifications/notification/git.py +3 -1
  182. mlrun/utils/notifications/notification/ipython.py +2 -0
  183. mlrun/utils/notifications/notification/slack.py +101 -21
  184. mlrun/utils/notifications/notification/webhook.py +11 -1
  185. mlrun/utils/notifications/notification_pusher.py +147 -16
  186. mlrun/utils/retryer.py +3 -2
  187. mlrun/utils/v3io_clients.py +0 -1
  188. mlrun/utils/version/version.json +2 -2
  189. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
  190. mlrun-1.7.0rc20.dist-info/RECORD +353 -0
  191. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
  192. mlrun/kfpops.py +0 -868
  193. mlrun/model_monitoring/batch.py +0 -974
  194. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  195. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  196. mlrun/platforms/other.py +0 -305
  197. mlrun-1.7.0rc4.dist-info/RECORD +0 -321
  198. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
  199. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
  200. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
mlrun/db/nopdb.py CHANGED
@@ -16,6 +16,9 @@
16
16
  import datetime
17
17
  from typing import Optional, Union
18
18
 
19
+ import mlrun.alerts
20
+ import mlrun.common.formatters
21
+ import mlrun.common.runtimes.constants
19
22
  import mlrun.common.schemas
20
23
  import mlrun.errors
21
24
 
@@ -79,7 +82,10 @@ class NopDB(RunDBInterface):
79
82
  uid: Optional[Union[str, list[str]]] = None,
80
83
  project: Optional[str] = None,
81
84
  labels: Optional[Union[str, list[str]]] = None,
82
- state: Optional[str] = None,
85
+ state: Optional[
86
+ mlrun.common.runtimes.constants.RunStates
87
+ ] = None, # Backward compatibility
88
+ states: Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
83
89
  sort: bool = True,
84
90
  last: int = 0,
85
91
  iter: bool = False,
@@ -128,7 +134,18 @@ class NopDB(RunDBInterface):
128
134
  ):
129
135
  pass
130
136
 
131
- def del_artifact(self, key, tag="", project="", tree=None, uid=None):
137
+ def del_artifact(
138
+ self,
139
+ key,
140
+ tag="",
141
+ project="",
142
+ tree=None,
143
+ uid=None,
144
+ deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
145
+ mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
146
+ ),
147
+ secrets: dict = None,
148
+ ):
132
149
  pass
133
150
 
134
151
  def del_artifacts(self, name="", project="", tag="", labels=None):
@@ -196,7 +213,7 @@ class NopDB(RunDBInterface):
196
213
  def list_projects(
197
214
  self,
198
215
  owner: str = None,
199
- format_: mlrun.common.schemas.ProjectsFormat = mlrun.common.schemas.ProjectsFormat.name_only,
216
+ format_: mlrun.common.formatters.ProjectFormat = mlrun.common.formatters.ProjectFormat.name_only,
200
217
  labels: list[str] = None,
201
218
  state: mlrun.common.schemas.ProjectState = None,
202
219
  ) -> mlrun.common.schemas.ProjectsOutput:
@@ -351,8 +368,8 @@ class NopDB(RunDBInterface):
351
368
  namespace: str = None,
352
369
  timeout: int = 30,
353
370
  format_: Union[
354
- str, mlrun.common.schemas.PipelinesFormat
355
- ] = mlrun.common.schemas.PipelinesFormat.summary,
371
+ str, mlrun.common.formatters.PipelineFormat
372
+ ] = mlrun.common.formatters.PipelineFormat.summary,
356
373
  project: str = None,
357
374
  ):
358
375
  pass
@@ -365,8 +382,8 @@ class NopDB(RunDBInterface):
365
382
  page_token: str = "",
366
383
  filter_: str = "",
367
384
  format_: Union[
368
- str, mlrun.common.schemas.PipelinesFormat
369
- ] = mlrun.common.schemas.PipelinesFormat.metadata_only,
385
+ str, mlrun.common.formatters.PipelineFormat
386
+ ] = mlrun.common.formatters.PipelineFormat.metadata_only,
370
387
  page_size: int = None,
371
388
  ) -> mlrun.common.schemas.PipelinesOutput:
372
389
  pass
@@ -506,12 +523,100 @@ class NopDB(RunDBInterface):
506
523
  ):
507
524
  pass
508
525
 
526
+ def store_api_gateway(
527
+ self,
528
+ api_gateway: Union[
529
+ mlrun.common.schemas.APIGateway,
530
+ mlrun.runtimes.nuclio.api_gateway.APIGateway,
531
+ ],
532
+ project: str = None,
533
+ ) -> mlrun.common.schemas.APIGateway:
534
+ pass
535
+
536
+ def list_api_gateways(self, project=None):
537
+ pass
538
+
539
+ def get_api_gateway(self, name, project=None):
540
+ pass
541
+
542
+ def delete_api_gateway(self, name, project=None):
543
+ pass
544
+
509
545
  def verify_authorization(
510
546
  self,
511
547
  authorization_verification_input: mlrun.common.schemas.AuthorizationVerificationInput,
512
548
  ):
513
549
  pass
514
550
 
551
+ def remote_builder(
552
+ self,
553
+ func: "mlrun.runtimes.BaseRuntime",
554
+ with_mlrun: bool,
555
+ mlrun_version_specifier: Optional[str] = None,
556
+ skip_deployed: bool = False,
557
+ builder_env: Optional[dict] = None,
558
+ force_build: bool = False,
559
+ ):
560
+ pass
561
+
562
+ def deploy_nuclio_function(
563
+ self,
564
+ func: "mlrun.runtimes.RemoteRuntime",
565
+ builder_env: Optional[dict] = None,
566
+ ):
567
+ pass
568
+
569
+ def get_builder_status(
570
+ self,
571
+ func: "mlrun.runtimes.BaseRuntime",
572
+ offset: int = 0,
573
+ logs: bool = True,
574
+ last_log_timestamp: float = 0.0,
575
+ verbose: bool = False,
576
+ ):
577
+ pass
578
+
579
+ def get_nuclio_deploy_status(
580
+ self,
581
+ func: "mlrun.runtimes.RemoteRuntime",
582
+ last_log_timestamp: float = 0.0,
583
+ verbose: bool = False,
584
+ ):
585
+ pass
586
+
587
+ def set_run_notifications(
588
+ self,
589
+ project: str,
590
+ runs: list[mlrun.model.RunObject],
591
+ notifications: list[mlrun.model.Notification],
592
+ ):
593
+ pass
594
+
595
+ def store_run_notifications(
596
+ self,
597
+ notification_objects: list[mlrun.model.Notification],
598
+ run_uid: str,
599
+ project: str = None,
600
+ mask_params: bool = True,
601
+ ):
602
+ pass
603
+
604
+ def store_alert_notifications(
605
+ self,
606
+ session,
607
+ notification_objects: list[mlrun.model.Notification],
608
+ alert_id: str,
609
+ project: str,
610
+ mask_params: bool = True,
611
+ ):
612
+ pass
613
+
614
+ def get_log_size(self, uid, project=""):
615
+ pass
616
+
617
+ def watch_log(self, uid, project="", watch=True, offset=0):
618
+ pass
619
+
515
620
  def get_datastore_profile(
516
621
  self, name: str, project: str
517
622
  ) -> Optional[mlrun.common.schemas.DatastoreProfile]:
@@ -529,3 +634,82 @@ class NopDB(RunDBInterface):
529
634
  self, profile: mlrun.common.schemas.DatastoreProfile, project: str
530
635
  ):
531
636
  pass
637
+
638
+ def function_status(self, project, name, kind, selector):
639
+ pass
640
+
641
+ def start_function(
642
+ self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
643
+ ):
644
+ pass
645
+
646
+ def submit_workflow(
647
+ self,
648
+ project: str,
649
+ name: str,
650
+ workflow_spec: Union[
651
+ "mlrun.projects.pipelines.WorkflowSpec",
652
+ "mlrun.common.schemas.WorkflowSpec",
653
+ dict,
654
+ ],
655
+ arguments: Optional[dict] = None,
656
+ artifact_path: Optional[str] = None,
657
+ source: Optional[str] = None,
658
+ run_name: Optional[str] = None,
659
+ namespace: Optional[str] = None,
660
+ notifications: list["mlrun.model.Notification"] = None,
661
+ ) -> "mlrun.common.schemas.WorkflowResponse":
662
+ pass
663
+
664
+ def update_model_monitoring_controller(
665
+ self,
666
+ project: str,
667
+ base_period: int = 10,
668
+ image: str = "mlrun/mlrun",
669
+ ):
670
+ pass
671
+
672
+ def enable_model_monitoring(
673
+ self,
674
+ project: str,
675
+ base_period: int = 10,
676
+ image: str = "mlrun/mlrun",
677
+ deploy_histogram_data_drift_app: bool = True,
678
+ ) -> None:
679
+ pass
680
+
681
+ def deploy_histogram_data_drift_app(
682
+ self, project: str, image: str = "mlrun/mlrun"
683
+ ) -> None:
684
+ raise NotImplementedError
685
+
686
+ def generate_event(
687
+ self, name: str, event_data: Union[dict, mlrun.common.schemas.Event], project=""
688
+ ):
689
+ pass
690
+
691
+ def store_alert_config(
692
+ self,
693
+ alert_name: str,
694
+ alert_data: Union[dict, mlrun.alerts.alert.AlertConfig],
695
+ project="",
696
+ ):
697
+ pass
698
+
699
+ def get_alert_config(self, alert_name: str, project=""):
700
+ pass
701
+
702
+ def list_alerts_configs(self, project=""):
703
+ pass
704
+
705
+ def delete_alert_config(self, alert_name: str, project=""):
706
+ pass
707
+
708
+ def reset_alert_config(self, alert_name: str, project=""):
709
+ pass
710
+
711
+ def get_alert_template(self, template_name: str):
712
+ pass
713
+
714
+ def list_alert_templates(self):
715
+ pass
mlrun/errors.py CHANGED
@@ -155,6 +155,10 @@ class MLRunNotFoundError(MLRunHTTPStatusError):
155
155
  error_status_code = HTTPStatus.NOT_FOUND.value
156
156
 
157
157
 
158
+ class MLRunPaginationEndOfResultsError(MLRunNotFoundError):
159
+ pass
160
+
161
+
158
162
  class MLRunBadRequestError(MLRunHTTPStatusError):
159
163
  error_status_code = HTTPStatus.BAD_REQUEST.value
160
164
 
@@ -183,6 +187,10 @@ class MLRunInternalServerError(MLRunHTTPStatusError):
183
187
  error_status_code = HTTPStatus.INTERNAL_SERVER_ERROR.value
184
188
 
185
189
 
190
+ class MLRunNotImplementedServerError(MLRunHTTPStatusError):
191
+ error_status_code = HTTPStatus.NOT_IMPLEMENTED.value
192
+
193
+
186
194
  class MLRunServiceUnavailableError(MLRunHTTPStatusError):
187
195
  error_status_code = HTTPStatus.SERVICE_UNAVAILABLE.value
188
196
 
@@ -234,4 +242,7 @@ STATUS_ERRORS = {
234
242
  HTTPStatus.PRECONDITION_FAILED.value: MLRunPreconditionFailedError,
235
243
  HTTPStatus.INTERNAL_SERVER_ERROR.value: MLRunInternalServerError,
236
244
  HTTPStatus.SERVICE_UNAVAILABLE.value: MLRunServiceUnavailableError,
245
+ HTTPStatus.NOT_IMPLEMENTED.value: MLRunNotImplementedServerError,
237
246
  }
247
+
248
+ EXPECTED_ERRORS = (MLRunPaginationEndOfResultsError,)
mlrun/execution.py CHANGED
@@ -22,6 +22,7 @@ import yaml
22
22
  from dateutil import parser
23
23
 
24
24
  import mlrun
25
+ import mlrun.common.constants as mlrun_constants
25
26
  from mlrun.artifacts import ModelArtifact
26
27
  from mlrun.datastore.store_resources import get_store_resource
27
28
  from mlrun.errors import MLRunInvalidArgumentError
@@ -129,7 +130,9 @@ class MLClientCtx:
129
130
  @property
130
131
  def tag(self):
131
132
  """Run tag (uid or workflow id if exists)"""
132
- return self._labels.get("workflow") or self._uid
133
+ return (
134
+ self._labels.get(mlrun_constants.MLRunInternalLabels.workflow) or self._uid
135
+ )
133
136
 
134
137
  @property
135
138
  def state(self):
@@ -224,12 +227,12 @@ class MLClientCtx:
224
227
  with context.get_child_context(myparam=param) as child:
225
228
  accuracy = child_handler(child, df, **child.parameters)
226
229
  accuracy_sum += accuracy
227
- child.log_result('accuracy', accuracy)
230
+ child.log_result("accuracy", accuracy)
228
231
  if accuracy > best_accuracy:
229
232
  child.mark_as_best()
230
233
  best_accuracy = accuracy
231
234
 
232
- context.log_result('avg_accuracy', accuracy_sum / len(param_list))
235
+ context.log_result("avg_accuracy", accuracy_sum / len(param_list))
233
236
 
234
237
  :param params: Extra (or override) params to parent context
235
238
  :param with_parent_params: Child will copy the parent parameters and add to them
@@ -289,7 +292,9 @@ class MLClientCtx:
289
292
 
290
293
  Example::
291
294
 
292
- feature_vector = context.get_store_resource("store://feature-vectors/default/myvec")
295
+ feature_vector = context.get_store_resource(
296
+ "store://feature-vectors/default/myvec"
297
+ )
293
298
  dataset = context.get_store_resource("store://artifacts/default/mydata")
294
299
 
295
300
  :param url: Store resource uri/path, store://<type>/<project>/<name>:<version>
@@ -327,8 +332,10 @@ class MLClientCtx:
327
332
  "uri": uri,
328
333
  "owner": get_in(self._labels, "owner"),
329
334
  }
330
- if "workflow" in self._labels:
331
- resp["workflow"] = self._labels["workflow"]
335
+ if mlrun_constants.MLRunInternalLabels.workflow in self._labels:
336
+ resp[mlrun_constants.MLRunInternalLabels.workflow] = self._labels[
337
+ mlrun_constants.MLRunInternalLabels.workflow
338
+ ]
332
339
  return resp
333
340
 
334
341
  @classmethod
@@ -394,7 +401,7 @@ class MLClientCtx:
394
401
  self._set_input(k, v)
395
402
 
396
403
  if host and not is_api:
397
- self.set_label("host", host)
404
+ self.set_label(mlrun_constants.MLRunInternalLabels.host, host)
398
405
 
399
406
  start = get_in(attrs, "status.start_time")
400
407
  if start:
@@ -421,7 +428,7 @@ class MLClientCtx:
421
428
 
422
429
  Example::
423
430
 
424
- data_path=context.artifact_subpath('data')
431
+ data_path = context.artifact_subpath("data")
425
432
 
426
433
  """
427
434
  return os.path.join(self.artifact_path, *subpaths)
@@ -525,7 +532,7 @@ class MLClientCtx:
525
532
 
526
533
  Example::
527
534
 
528
- context.log_result('accuracy', 0.85)
535
+ context.log_result("accuracy", 0.85)
529
536
 
530
537
  :param key: Result key
531
538
  :param value: Result value
@@ -539,7 +546,7 @@ class MLClientCtx:
539
546
 
540
547
  Example::
541
548
 
542
- context.log_results({'accuracy': 0.85, 'loss': 0.2})
549
+ context.log_results({"accuracy": 0.85, "loss": 0.2})
543
550
 
544
551
  :param results: Key/value dict or results
545
552
  :param commit: Commit (write to DB now vs wait for the end of the run)
@@ -674,7 +681,9 @@ class MLClientCtx:
674
681
  "age": [42, 52, 36, 24, 73],
675
682
  "testScore": [25, 94, 57, 62, 70],
676
683
  }
677
- df = pd.DataFrame(raw_data, columns=["first_name", "last_name", "age", "testScore"])
684
+ df = pd.DataFrame(
685
+ raw_data, columns=["first_name", "last_name", "age", "testScore"]
686
+ )
678
687
  context.log_dataset("mydf", df=df, stats=True)
679
688
 
680
689
  :param key: Artifact key
@@ -752,13 +761,16 @@ class MLClientCtx:
752
761
 
753
762
  Example::
754
763
 
755
- context.log_model("model", body=dumps(model),
756
- model_file="model.pkl",
757
- metrics=context.results,
758
- training_set=training_df,
759
- label_column='label',
760
- feature_vector=feature_vector_uri,
761
- labels={"app": "fraud"})
764
+ context.log_model(
765
+ "model",
766
+ body=dumps(model),
767
+ model_file="model.pkl",
768
+ metrics=context.results,
769
+ training_set=training_df,
770
+ label_column="label",
771
+ feature_vector=feature_vector_uri,
772
+ labels={"app": "fraud"},
773
+ )
762
774
 
763
775
  :param key: Artifact key or artifact class ()
764
776
  :param body: Will use the body as the artifact content
@@ -983,10 +995,15 @@ class MLClientCtx:
983
995
  # If it's a OpenMPI job, get the global rank and compare to the logging rank (worker) set in MLRun's
984
996
  # configuration:
985
997
  labels = self.labels
986
- if "host" in labels and labels.get("kind", "job") == "mpijob":
998
+ if (
999
+ mlrun_constants.MLRunInternalLabels.host in labels
1000
+ and labels.get(mlrun_constants.MLRunInternalLabels.kind, "job") == "mpijob"
1001
+ ):
987
1002
  # The host (pod name) of each worker is created by k8s, and by default it uses the rank number as the id in
988
1003
  # the following template: ...-worker-<rank>
989
- rank = int(labels["host"].rsplit("-", 1)[1])
1004
+ rank = int(
1005
+ labels[mlrun_constants.MLRunInternalLabels.host].rsplit("-", 1)[1]
1006
+ )
990
1007
  return rank == mlrun.mlconf.packagers.logging_worker
991
1008
 
992
1009
  # Single worker is always the logging worker:
@@ -19,7 +19,6 @@ __all__ = [
19
19
  "get_online_feature_service",
20
20
  "ingest",
21
21
  "preview",
22
- "deploy_ingestion_service",
23
22
  "deploy_ingestion_service_v2",
24
23
  "delete_feature_set",
25
24
  "delete_feature_vector",
@@ -41,7 +40,6 @@ from ..features import Entity, Feature
41
40
  from .api import (
42
41
  delete_feature_set,
43
42
  delete_feature_vector,
44
- deploy_ingestion_service,
45
43
  deploy_ingestion_service_v2,
46
44
  get_feature_set,
47
45
  get_feature_vector,
@@ -113,6 +113,7 @@ def get_offline_features(
113
113
  order_by: Union[str, list[str]] = None,
114
114
  spark_service: str = None,
115
115
  timestamp_for_filtering: Union[str, dict[str, str]] = None,
116
+ additional_filters: list = None,
116
117
  ):
117
118
  """retrieve offline feature vector results
118
119
 
@@ -136,7 +137,10 @@ def get_offline_features(
136
137
  ]
137
138
  vector = FeatureVector(features=features)
138
139
  resp = get_offline_features(
139
- vector, entity_rows=trades, entity_timestamp_column="time", query="ticker in ['GOOG'] and bid>100"
140
+ vector,
141
+ entity_rows=trades,
142
+ entity_timestamp_column="time",
143
+ query="ticker in ['GOOG'] and bid>100",
140
144
  )
141
145
  print(resp.to_dataframe())
142
146
  print(vector.get_stats_table())
@@ -172,6 +176,13 @@ def get_offline_features(
172
176
  By default, the filter executes on the timestamp_key of each feature set.
173
177
  Note: the time filtering is performed on each feature set before the
174
178
  merge process using start_time and end_time params.
179
+ :param additional_filters: List of additional_filter conditions as tuples.
180
+ Each tuple should be in the format (column_name, operator, value).
181
+ Supported operators: "=", ">=", "<=", ">", "<".
182
+ Example: [("Product", "=", "Computer")]
183
+ For all supported filters, please see:
184
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
185
+
175
186
 
176
187
  """
177
188
  return _get_offline_features(
@@ -191,6 +202,7 @@ def get_offline_features(
191
202
  order_by,
192
203
  spark_service,
193
204
  timestamp_for_filtering,
205
+ additional_filters,
194
206
  )
195
207
 
196
208
 
@@ -211,6 +223,7 @@ def _get_offline_features(
211
223
  order_by: Union[str, list[str]] = None,
212
224
  spark_service: str = None,
213
225
  timestamp_for_filtering: Union[str, dict[str, str]] = None,
226
+ additional_filters=None,
214
227
  ) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
215
228
  if entity_rows is None and entity_timestamp_column is not None:
216
229
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -249,6 +262,7 @@ def _get_offline_features(
249
262
  start_time=start_time,
250
263
  end_time=end_time,
251
264
  timestamp_for_filtering=timestamp_for_filtering,
265
+ additional_filters=additional_filters,
252
266
  )
253
267
 
254
268
  merger = merger_engine(feature_vector, **(engine_args or {}))
@@ -264,6 +278,7 @@ def _get_offline_features(
264
278
  update_stats=update_stats,
265
279
  query=query,
266
280
  order_by=order_by,
281
+ additional_filters=additional_filters,
267
282
  )
268
283
 
269
284
 
@@ -307,7 +322,7 @@ def get_online_feature_service(
307
322
 
308
323
  Example::
309
324
 
310
- svc = get_online_feature_service(vector_uri, entity_keys=['ticker'])
325
+ svc = get_online_feature_service(vector_uri, entity_keys=["ticker"])
311
326
  try:
312
327
  resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
313
328
  print(resp)
@@ -456,7 +471,7 @@ def ingest(
456
471
  df = ingest(stocks_set, stocks, infer_options=fstore.InferOptions.default())
457
472
 
458
473
  # for running as remote job
459
- config = RunConfig(image='mlrun/mlrun')
474
+ config = RunConfig(image="mlrun/mlrun")
460
475
  df = ingest(stocks_set, stocks, run_config=config)
461
476
 
462
477
  # specify source and targets
@@ -1002,53 +1017,6 @@ def _deploy_ingestion_service_v2(
1002
1017
  return function.deploy(), function
1003
1018
 
1004
1019
 
1005
- @deprecated(
1006
- version="1.5.0",
1007
- reason="'deploy_ingestion_service' will be removed in 1.7.0, use 'deploy_ingestion_service_v2' instead",
1008
- category=FutureWarning,
1009
- )
1010
- def deploy_ingestion_service(
1011
- featureset: Union[FeatureSet, str],
1012
- source: DataSource = None,
1013
- targets: list[DataTargetBase] = None,
1014
- name: str = None,
1015
- run_config: RunConfig = None,
1016
- verbose=False,
1017
- ) -> str:
1018
- """Start real-time ingestion service using nuclio function
1019
-
1020
- Deploy a real-time function implementing feature ingestion pipeline
1021
- the source maps to Nuclio event triggers (http, kafka, v3io stream, etc.)
1022
-
1023
- the `run_config` parameter allow specifying the function and job configuration,
1024
- see: :py:class:`~mlrun.feature_store.RunConfig`
1025
-
1026
- example::
1027
-
1028
- source = HTTPSource()
1029
- func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
1030
- config = RunConfig(function=func)
1031
- my_set.deploy_ingestion_service(source, run_config=config)
1032
-
1033
- :param featureset: feature set object or uri
1034
- :param source: data source object describing the online or offline source
1035
- :param targets: list of data target objects
1036
- :param name: name for the job/function
1037
- :param run_config: service runtime configuration (function object/uri, resources, etc..)
1038
- :param verbose: verbose log
1039
-
1040
- :return: URL to access the deployed ingestion service
1041
- """
1042
- endpoint, _ = featureset.deploy_ingestion_service(
1043
- source=source,
1044
- targets=targets,
1045
- name=name,
1046
- run_config=run_config,
1047
- verbose=verbose,
1048
- )
1049
- return endpoint
1050
-
1051
-
1052
1020
  def _ingest_with_spark(
1053
1021
  spark=None,
1054
1022
  featureset: Union[FeatureSet, str] = None,
@@ -1121,9 +1089,10 @@ def _ingest_with_spark(
1121
1089
  df_to_write = target.prepare_spark_df(
1122
1090
  df_to_write, key_columns, timestamp_key, spark_options
1123
1091
  )
1092
+ write_format = spark_options.pop("format", None)
1124
1093
  if overwrite:
1125
1094
  write_spark_dataframe_with_options(
1126
- spark_options, df_to_write, "overwrite"
1095
+ spark_options, df_to_write, "overwrite", write_format=write_format
1127
1096
  )
1128
1097
  else:
1129
1098
  # appending an empty dataframe may cause an empty file to be created (e.g. when writing to parquet)
@@ -1131,7 +1100,7 @@ def _ingest_with_spark(
1131
1100
  df_to_write.persist()
1132
1101
  if df_to_write.count() > 0:
1133
1102
  write_spark_dataframe_with_options(
1134
- spark_options, df_to_write, "append"
1103
+ spark_options, df_to_write, "append", write_format=write_format
1135
1104
  )
1136
1105
  target.update_resource_status("ready")
1137
1106