mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (200) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +25 -111
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +38 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +41 -47
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +68 -0
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
  15. mlrun/common/formatters/base.py +78 -0
  16. mlrun/common/formatters/function.py +41 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +25 -4
  21. mlrun/common/schemas/alert.py +203 -0
  22. mlrun/common/schemas/api_gateway.py +148 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +8 -2
  25. mlrun/common/schemas/client_spec.py +2 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/hub.py +7 -9
  29. mlrun/common/schemas/model_monitoring/__init__.py +19 -3
  30. mlrun/common/schemas/model_monitoring/constants.py +96 -26
  31. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  32. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  33. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  34. mlrun/common/schemas/pipeline.py +0 -9
  35. mlrun/common/schemas/project.py +22 -21
  36. mlrun/common/types.py +7 -1
  37. mlrun/config.py +87 -19
  38. mlrun/data_types/data_types.py +4 -0
  39. mlrun/data_types/to_pandas.py +9 -9
  40. mlrun/datastore/__init__.py +5 -8
  41. mlrun/datastore/alibaba_oss.py +130 -0
  42. mlrun/datastore/azure_blob.py +4 -5
  43. mlrun/datastore/base.py +69 -30
  44. mlrun/datastore/datastore.py +10 -2
  45. mlrun/datastore/datastore_profile.py +90 -6
  46. mlrun/datastore/google_cloud_storage.py +1 -1
  47. mlrun/datastore/hdfs.py +5 -0
  48. mlrun/datastore/inmem.py +2 -2
  49. mlrun/datastore/redis.py +2 -2
  50. mlrun/datastore/s3.py +5 -0
  51. mlrun/datastore/snowflake_utils.py +43 -0
  52. mlrun/datastore/sources.py +172 -44
  53. mlrun/datastore/store_resources.py +7 -7
  54. mlrun/datastore/targets.py +285 -41
  55. mlrun/datastore/utils.py +68 -5
  56. mlrun/datastore/v3io.py +27 -50
  57. mlrun/db/auth_utils.py +152 -0
  58. mlrun/db/base.py +149 -14
  59. mlrun/db/factory.py +1 -1
  60. mlrun/db/httpdb.py +608 -178
  61. mlrun/db/nopdb.py +191 -7
  62. mlrun/errors.py +11 -0
  63. mlrun/execution.py +37 -20
  64. mlrun/feature_store/__init__.py +0 -2
  65. mlrun/feature_store/api.py +21 -52
  66. mlrun/feature_store/feature_set.py +48 -23
  67. mlrun/feature_store/feature_vector.py +2 -1
  68. mlrun/feature_store/ingestion.py +7 -6
  69. mlrun/feature_store/retrieval/base.py +9 -4
  70. mlrun/feature_store/retrieval/conversion.py +9 -9
  71. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  72. mlrun/feature_store/retrieval/job.py +9 -3
  73. mlrun/feature_store/retrieval/local_merger.py +2 -0
  74. mlrun/feature_store/retrieval/spark_merger.py +34 -24
  75. mlrun/feature_store/steps.py +30 -19
  76. mlrun/features.py +4 -13
  77. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  78. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  79. mlrun/frameworks/lgbm/__init__.py +1 -1
  80. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  81. mlrun/frameworks/lgbm/model_handler.py +1 -1
  82. mlrun/frameworks/parallel_coordinates.py +2 -1
  83. mlrun/frameworks/pytorch/__init__.py +2 -2
  84. mlrun/frameworks/sklearn/__init__.py +1 -1
  85. mlrun/frameworks/tf_keras/__init__.py +5 -2
  86. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  87. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  88. mlrun/frameworks/xgboost/__init__.py +1 -1
  89. mlrun/k8s_utils.py +10 -11
  90. mlrun/launcher/__init__.py +1 -1
  91. mlrun/launcher/base.py +6 -5
  92. mlrun/launcher/client.py +8 -6
  93. mlrun/launcher/factory.py +1 -1
  94. mlrun/launcher/local.py +9 -3
  95. mlrun/launcher/remote.py +9 -3
  96. mlrun/lists.py +6 -2
  97. mlrun/model.py +58 -19
  98. mlrun/model_monitoring/__init__.py +1 -1
  99. mlrun/model_monitoring/api.py +127 -301
  100. mlrun/model_monitoring/application.py +5 -296
  101. mlrun/model_monitoring/applications/__init__.py +11 -0
  102. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  103. mlrun/model_monitoring/applications/base.py +282 -0
  104. mlrun/model_monitoring/applications/context.py +214 -0
  105. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  106. mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
  107. mlrun/model_monitoring/applications/results.py +99 -0
  108. mlrun/model_monitoring/controller.py +30 -36
  109. mlrun/model_monitoring/db/__init__.py +18 -0
  110. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
  111. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  112. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
  113. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  114. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  115. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
  116. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
  117. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  118. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
  119. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  120. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
  121. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  122. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  123. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  124. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  125. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  126. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  127. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  128. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  129. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  130. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
  131. mlrun/model_monitoring/evidently_application.py +6 -118
  132. mlrun/model_monitoring/features_drift_table.py +34 -22
  133. mlrun/model_monitoring/helpers.py +100 -7
  134. mlrun/model_monitoring/model_endpoint.py +3 -2
  135. mlrun/model_monitoring/stream_processing.py +93 -228
  136. mlrun/model_monitoring/tracking_policy.py +7 -1
  137. mlrun/model_monitoring/writer.py +152 -124
  138. mlrun/package/packagers_manager.py +1 -0
  139. mlrun/package/utils/_formatter.py +2 -2
  140. mlrun/platforms/__init__.py +11 -10
  141. mlrun/platforms/iguazio.py +21 -202
  142. mlrun/projects/operations.py +30 -16
  143. mlrun/projects/pipelines.py +92 -99
  144. mlrun/projects/project.py +757 -268
  145. mlrun/render.py +15 -14
  146. mlrun/run.py +160 -162
  147. mlrun/runtimes/__init__.py +55 -3
  148. mlrun/runtimes/base.py +33 -19
  149. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  150. mlrun/runtimes/funcdoc.py +0 -28
  151. mlrun/runtimes/kubejob.py +28 -122
  152. mlrun/runtimes/local.py +5 -2
  153. mlrun/runtimes/mpijob/__init__.py +0 -20
  154. mlrun/runtimes/mpijob/abstract.py +8 -8
  155. mlrun/runtimes/mpijob/v1.py +1 -1
  156. mlrun/runtimes/nuclio/__init__.py +1 -0
  157. mlrun/runtimes/nuclio/api_gateway.py +709 -0
  158. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  159. mlrun/runtimes/nuclio/application/application.py +523 -0
  160. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  161. mlrun/runtimes/nuclio/function.py +98 -58
  162. mlrun/runtimes/nuclio/serving.py +36 -42
  163. mlrun/runtimes/pod.py +196 -45
  164. mlrun/runtimes/remotesparkjob.py +1 -1
  165. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  166. mlrun/runtimes/utils.py +6 -73
  167. mlrun/secrets.py +6 -2
  168. mlrun/serving/remote.py +2 -3
  169. mlrun/serving/routers.py +7 -4
  170. mlrun/serving/server.py +7 -8
  171. mlrun/serving/states.py +73 -43
  172. mlrun/serving/v2_serving.py +8 -7
  173. mlrun/track/tracker.py +2 -1
  174. mlrun/utils/async_http.py +25 -5
  175. mlrun/utils/helpers.py +141 -75
  176. mlrun/utils/http.py +1 -1
  177. mlrun/utils/logger.py +39 -7
  178. mlrun/utils/notifications/notification/__init__.py +14 -9
  179. mlrun/utils/notifications/notification/base.py +12 -0
  180. mlrun/utils/notifications/notification/console.py +2 -0
  181. mlrun/utils/notifications/notification/git.py +3 -1
  182. mlrun/utils/notifications/notification/ipython.py +2 -0
  183. mlrun/utils/notifications/notification/slack.py +101 -21
  184. mlrun/utils/notifications/notification/webhook.py +11 -1
  185. mlrun/utils/notifications/notification_pusher.py +147 -16
  186. mlrun/utils/retryer.py +3 -2
  187. mlrun/utils/v3io_clients.py +0 -1
  188. mlrun/utils/version/version.json +2 -2
  189. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
  190. mlrun-1.7.0rc20.dist-info/RECORD +353 -0
  191. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
  192. mlrun/kfpops.py +0 -868
  193. mlrun/model_monitoring/batch.py +0 -974
  194. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  195. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  196. mlrun/platforms/other.py +0 -305
  197. mlrun-1.7.0rc4.dist-info/RECORD +0 -321
  198. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
  199. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
  200. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
mlrun/config.py CHANGED
@@ -37,6 +37,7 @@ import dotenv
37
37
  import semver
38
38
  import yaml
39
39
 
40
+ import mlrun.common.constants
40
41
  import mlrun.common.schemas
41
42
  import mlrun.errors
42
43
 
@@ -87,7 +88,7 @@ default_config = {
87
88
  "mpijob_crd_version": "", # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
88
89
  "ipython_widget": True,
89
90
  "log_level": "INFO",
90
- # log formatter (options: human | json)
91
+ # log formatter (options: human | human_extended | json)
91
92
  "log_formatter": "human",
92
93
  "submit_timeout": "180", # timeout when submitting a new k8s resource
93
94
  # runtimes cleanup interval in seconds
@@ -188,6 +189,7 @@ default_config = {
188
189
  "background_tasks": {
189
190
  # enabled / disabled
190
191
  "timeout_mode": "enabled",
192
+ "function_deletion_batch_size": 10,
191
193
  # timeout in seconds to wait for background task to be updated / finished by the worker responsible for the task
192
194
  "default_timeouts": {
193
195
  "operations": {
@@ -196,6 +198,7 @@ default_config = {
196
198
  "run_abortion": "600",
197
199
  "abort_grace_period": "10",
198
200
  "delete_project": "900",
201
+ "delete_function": "900",
199
202
  },
200
203
  "runtimes": {"dask": "600"},
201
204
  },
@@ -230,6 +233,10 @@ default_config = {
230
233
  "databricks": {
231
234
  "artifact_directory_path": "/mlrun_databricks_runtime/artifacts_dictionaries"
232
235
  },
236
+ "application": {
237
+ "default_sidecar_internal_port": 8050,
238
+ "default_authentication_mode": "accessKey",
239
+ },
233
240
  },
234
241
  # TODO: function defaults should be moved to the function spec config above
235
242
  "function_defaults": {
@@ -240,6 +247,7 @@ default_config = {
240
247
  "remote": "mlrun/mlrun",
241
248
  "dask": "mlrun/ml-base",
242
249
  "mpijob": "mlrun/mlrun",
250
+ "application": "python:3.9-slim",
243
251
  },
244
252
  # see enrich_function_preemption_spec for more info,
245
253
  # and mlrun.common.schemas.function.PreemptionModes for available options
@@ -324,7 +332,13 @@ default_config = {
324
332
  # optional values (as per https://dev.mysql.com/doc/refman/8.0/en/sql-mode.html#sql-mode-full):
325
333
  #
326
334
  # if set to "nil" or "none", nothing would be set
327
- "modes": "STRICT_TRANS_TABLES",
335
+ "modes": (
336
+ "STRICT_TRANS_TABLES"
337
+ ",NO_ZERO_IN_DATE"
338
+ ",NO_ZERO_DATE"
339
+ ",ERROR_FOR_DIVISION_BY_ZERO"
340
+ ",NO_ENGINE_SUBSTITUTION",
341
+ )
328
342
  },
329
343
  },
330
344
  "jobs": {
@@ -352,10 +366,12 @@ default_config = {
352
366
  # is set to ClusterIP
353
367
  # ---------------------------------------------------------------------
354
368
  # Note: adding a mode requires special handling on
355
- # - mlrun.runtimes.constants.NuclioIngressAddTemplatedIngressModes
369
+ # - mlrun.common.runtimes.constants.NuclioIngressAddTemplatedIngressModes
356
370
  # - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
357
371
  "add_templated_ingress_host_mode": "never",
358
372
  "explicit_ack": "enabled",
373
+ # size of serving spec to move to config maps
374
+ "serving_spec_env_cutoff": 0,
359
375
  },
360
376
  "logs": {
361
377
  "decode": {
@@ -443,7 +459,7 @@ default_config = {
443
459
  # pip install <requirement_specifier>, e.g. mlrun==0.5.4, mlrun~=0.5,
444
460
  # git+https://github.com/mlrun/mlrun@development. by default uses the version
445
461
  "mlrun_version_specifier": "",
446
- "kaniko_image": "gcr.io/kaniko-project/executor:v1.8.0", # kaniko builder image
462
+ "kaniko_image": "gcr.io/kaniko-project/executor:v1.21.1", # kaniko builder image
447
463
  "kaniko_init_container_image": "alpine:3.18",
448
464
  # image for kaniko init container when docker registry is ECR
449
465
  "kaniko_aws_cli_image": "amazon/aws-cli:2.7.10",
@@ -473,6 +489,14 @@ default_config = {
473
489
  # if set to true, will log a warning for trying to use run db functionality while in nop db mode
474
490
  "verbose": True,
475
491
  },
492
+ "pagination": {
493
+ "default_page_size": 20,
494
+ "pagination_cache": {
495
+ "interval": 60,
496
+ "ttl": 3600,
497
+ "max_size": 10000,
498
+ },
499
+ },
476
500
  },
477
501
  "model_endpoint_monitoring": {
478
502
  "serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
@@ -484,6 +508,7 @@ default_config = {
484
508
  "default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
485
509
  "user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
486
510
  "stream": "",
511
+ "monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
487
512
  },
488
513
  # Offline storage path can be either relative or a full path. This path is used for general offline data
489
514
  # storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
@@ -492,12 +517,14 @@ default_config = {
492
517
  # when the user is working in CE environment and has not provided any stream path.
493
518
  "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
494
519
  "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
495
- "batch_processing_function_branch": "master",
496
520
  "parquet_batching_max_events": 10_000,
497
521
  "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
498
- # See mlrun.model_monitoring.stores.ModelEndpointStoreType for available options
522
+ # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
499
523
  "store_type": "v3io-nosql",
500
524
  "endpoint_store_connection": "",
525
+ # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
526
+ "tsdb_connector_type": "v3io-tsdb",
527
+ "tsdb_connection": "",
501
528
  },
502
529
  "secret_stores": {
503
530
  # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -533,9 +560,10 @@ default_config = {
533
560
  "feature_store": {
534
561
  "data_prefixes": {
535
562
  "default": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
536
- "nosql": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
563
+ "nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
537
564
  # "authority" is optional and generalizes [userinfo "@"] host [":" port]
538
- "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/{kind}",
565
+ "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
566
+ "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
539
567
  },
540
568
  "default_targets": "parquet,nosql",
541
569
  "default_job_image": "mlrun/mlrun",
@@ -610,8 +638,9 @@ default_config = {
610
638
  },
611
639
  "workflows": {
612
640
  "default_workflow_runner_name": "workflow-runner-{}",
613
- # Default timeout seconds for retrieving workflow id after execution:
614
- "timeouts": {"local": 120, "kfp": 30, "remote": 90},
641
+ # Default timeout seconds for retrieving workflow id after execution
642
+ # Remote workflow timeout is the maximum between remote and the inner engine timeout
643
+ "timeouts": {"local": 120, "kfp": 60, "remote": 60 * 5},
615
644
  },
616
645
  "log_collector": {
617
646
  "address": "localhost:8282",
@@ -670,6 +699,14 @@ default_config = {
670
699
  "access_key": "",
671
700
  },
672
701
  "grafana_url": "",
702
+ "alerts": {
703
+ # supported modes: "enabled", "disabled".
704
+ "mode": "enabled"
705
+ },
706
+ "auth_with_client_id": {
707
+ "enabled": False,
708
+ "request_timeout": 5,
709
+ },
673
710
  }
674
711
 
675
712
  _is_running_as_api = None
@@ -931,6 +968,10 @@ class Config:
931
968
  self.httpdb.clusterization.chief.url = chief_api_url
932
969
  return self.httpdb.clusterization.chief.url
933
970
 
971
+ @staticmethod
972
+ def internal_labels():
973
+ return mlrun.common.constants.MLRunInternalLabels.all()
974
+
934
975
  @staticmethod
935
976
  def get_storage_auto_mount_params():
936
977
  auto_mount_params = {}
@@ -1060,7 +1101,8 @@ class Config:
1060
1101
  target: str = "online",
1061
1102
  artifact_path: str = None,
1062
1103
  function_name: str = None,
1063
- ) -> str:
1104
+ **kwargs,
1105
+ ) -> typing.Union[str, list[str]]:
1064
1106
  """Get the full path from the configuration based on the provided project and kind.
1065
1107
 
1066
1108
  :param project: Project name.
@@ -1076,7 +1118,8 @@ class Config:
1076
1118
  relative artifact path will be taken from the global MLRun artifact path.
1077
1119
  :param function_name: Application name, None for model_monitoring_stream.
1078
1120
 
1079
- :return: Full configured path for the provided kind.
1121
+ :return: Full configured path for the provided kind. Can be either a single path
1122
+ or a list of paths in the case of the online model monitoring stream path.
1080
1123
  """
1081
1124
 
1082
1125
  if target != "offline":
@@ -1085,7 +1128,7 @@ class Config:
1085
1128
  )
1086
1129
  if store_prefix_dict.get(kind):
1087
1130
  # Target exist in store prefix and has a valid string value
1088
- return store_prefix_dict[kind].format(project=project)
1131
+ return store_prefix_dict[kind].format(project=project, **kwargs)
1089
1132
 
1090
1133
  if (
1091
1134
  function_name
@@ -1098,10 +1141,22 @@ class Config:
1098
1141
  if function_name is None
1099
1142
  else f"{kind}-{function_name.lower()}",
1100
1143
  )
1101
- return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
1102
- project=project,
1103
- kind=kind,
1104
- )
1144
+ elif kind == "stream": # return list for mlrun<1.6.3 BC
1145
+ return [
1146
+ mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
1147
+ project=project,
1148
+ kind=kind,
1149
+ ), # old stream uri (pipelines) for BC ML-6043
1150
+ mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
1151
+ project=project,
1152
+ kind=kind,
1153
+ ), # new stream uri (projects)
1154
+ ]
1155
+ else:
1156
+ return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
1157
+ project=project,
1158
+ kind=kind,
1159
+ )
1105
1160
 
1106
1161
  # Get the current offline path from the configuration
1107
1162
  file_path = mlrun.mlconf.model_endpoint_monitoring.offline_storage_path.format(
@@ -1348,12 +1403,25 @@ def read_env(env=None, prefix=env_prefix):
1348
1403
  if igz_domain:
1349
1404
  config["ui_url"] = f"https://mlrun-ui.{igz_domain}"
1350
1405
 
1351
- if config.get("log_level"):
1406
+ if log_level := config.get("log_level"):
1352
1407
  import mlrun.utils.logger
1353
1408
 
1354
1409
  # logger created (because of imports mess) before the config is loaded (in tests), therefore we're changing its
1355
1410
  # level manually
1356
- mlrun.utils.logger.set_logger_level(config["log_level"])
1411
+ mlrun.utils.logger.set_logger_level(log_level)
1412
+
1413
+ if log_formatter_name := config.get("log_formatter"):
1414
+ import mlrun.utils.logger
1415
+
1416
+ log_formatter = mlrun.utils.resolve_formatter_by_kind(
1417
+ mlrun.utils.FormatterKinds(log_formatter_name)
1418
+ )
1419
+ current_handler = mlrun.utils.logger.get_handler("default")
1420
+ current_formatter_name = current_handler.formatter.__class__.__name__
1421
+ desired_formatter_name = log_formatter.__name__
1422
+ if current_formatter_name != desired_formatter_name:
1423
+ current_handler.setFormatter(log_formatter())
1424
+
1357
1425
  # The default function pod resource values are of type str; however, when reading from environment variable numbers,
1358
1426
  # it converts them to type int if contains only number, so we want to convert them to str.
1359
1427
  _convert_resources_to_str(config)
@@ -41,6 +41,7 @@ class ValueType(str, Enum):
41
41
  BYTES = "bytes"
42
42
  STRING = "str"
43
43
  DATETIME = "datetime"
44
+ LIST = "List"
44
45
  BYTES_LIST = "List[bytes]"
45
46
  STRING_LIST = "List[string]"
46
47
  INT32_LIST = "List[int32]"
@@ -48,6 +49,7 @@ class ValueType(str, Enum):
48
49
  DOUBLE_LIST = "List[float]"
49
50
  FLOAT_LIST = "List[float32]"
50
51
  BOOL_LIST = "List[bool]"
52
+ Tuple = "Tuple"
51
53
 
52
54
 
53
55
  def pd_schema_to_value_type(value):
@@ -102,6 +104,8 @@ def python_type_to_value_type(value_type):
102
104
  "datetime64[ns]": ValueType.INT64,
103
105
  "datetime64[ns, tz]": ValueType.INT64,
104
106
  "category": ValueType.STRING,
107
+ "list": ValueType.LIST,
108
+ "tuple": ValueType.Tuple,
105
109
  }
106
110
 
107
111
  if type_name in type_map:
@@ -65,10 +65,10 @@ def toPandas(spark_df):
65
65
  msg = (
66
66
  "toPandas attempted Arrow optimization because "
67
67
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
68
- "failed by the reason below:\n %s\n"
68
+ f"failed by the reason below:\n {e}\n"
69
69
  "Attempting non-optimization as "
70
70
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
71
- "true." % str(e)
71
+ "true."
72
72
  )
73
73
  warnings.warn(msg)
74
74
  use_arrow = False
@@ -78,7 +78,7 @@ def toPandas(spark_df):
78
78
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
79
79
  "reached the error below and will not continue because automatic fallback "
80
80
  "with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
81
- "false.\n %s" % str(e)
81
+ f"false.\n {e}"
82
82
  )
83
83
  warnings.warn(msg)
84
84
  raise
@@ -144,7 +144,7 @@ def toPandas(spark_df):
144
144
  "reached the error below and can not continue. Note that "
145
145
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
146
146
  "effect on failures in the middle of "
147
- "computation.\n %s" % str(e)
147
+ f"computation.\n {e}"
148
148
  )
149
149
  warnings.warn(msg)
150
150
  raise
@@ -154,10 +154,10 @@ def toPandas(spark_df):
154
154
  column_counter = Counter(spark_df.columns)
155
155
 
156
156
  dtype = [None] * len(spark_df.schema)
157
- for fieldIdx, field in enumerate(spark_df.schema):
157
+ for field_idx, field in enumerate(spark_df.schema):
158
158
  # For duplicate column name, we use `iloc` to access it.
159
159
  if column_counter[field.name] > 1:
160
- pandas_col = pdf.iloc[:, fieldIdx]
160
+ pandas_col = pdf.iloc[:, field_idx]
161
161
  else:
162
162
  pandas_col = pdf[field.name]
163
163
 
@@ -171,12 +171,12 @@ def toPandas(spark_df):
171
171
  and field.nullable
172
172
  and pandas_col.isnull().any()
173
173
  ):
174
- dtype[fieldIdx] = pandas_type
174
+ dtype[field_idx] = pandas_type
175
175
  # Ensure we fall back to nullable numpy types, even when whole column is null:
176
176
  if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
177
- dtype[fieldIdx] = np.float64
177
+ dtype[field_idx] = np.float64
178
178
  if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
179
- dtype[fieldIdx] = object
179
+ dtype[field_idx] = object
180
180
 
181
181
  df = pd.DataFrame()
182
182
  for index, t in enumerate(dtype):
@@ -64,7 +64,7 @@ from .store_resources import (
64
64
  parse_store_uri,
65
65
  )
66
66
  from .targets import CSVTarget, NoSqlTarget, ParquetTarget, StreamTarget
67
- from .utils import parse_kafka_url
67
+ from .utils import get_kafka_brokers_from_dict, parse_kafka_url
68
68
 
69
69
  store_manager = StoreManager()
70
70
 
@@ -107,13 +107,10 @@ def get_stream_pusher(stream_path: str, **kwargs):
107
107
  :param stream_path: path/url of stream
108
108
  """
109
109
 
110
- if stream_path.startswith("kafka://") or "kafka_bootstrap_servers" in kwargs:
111
- topic, bootstrap_servers = parse_kafka_url(
112
- stream_path, kwargs.get("kafka_bootstrap_servers")
113
- )
114
- return KafkaOutputStream(
115
- topic, bootstrap_servers, kwargs.get("kafka_producer_options")
116
- )
110
+ kafka_brokers = get_kafka_brokers_from_dict(kwargs)
111
+ if stream_path.startswith("kafka://") or kafka_brokers:
112
+ topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
113
+ return KafkaOutputStream(topic, brokers, kwargs.get("kafka_producer_options"))
117
114
  elif stream_path.startswith("http://") or stream_path.startswith("https://"):
118
115
  return HTTPOutputStream(stream_path=stream_path)
119
116
  elif "://" not in stream_path:
@@ -0,0 +1,130 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import time
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+ from urllib.parse import urlparse
19
+
20
+ import oss2
21
+ from fsspec.registry import get_filesystem_class
22
+
23
+ import mlrun.errors
24
+
25
+ from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
26
+
27
+
28
+ class OSSStore(DataStore):
29
+ using_bucket = True
30
+
31
+ def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
32
+ super().__init__(parent, name, schema, endpoint, secrets)
33
+ # will be used in case user asks to assume a role and work through fsspec
34
+
35
+ access_key_id = self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID")
36
+ secret_key = self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY")
37
+ endpoint_url = self._get_secret_or_env("ALIBABA_ENDPOINT_URL")
38
+ if access_key_id and secret_key and endpoint_url:
39
+ self.auth = oss2.Auth(access_key_id, secret_key)
40
+ self.endpoint_url = endpoint_url
41
+ else:
42
+ raise mlrun.errors.MLRunInvalidArgumentError(
43
+ "missing ALIBABA_ACCESS_KEY_ID or ALIBABA_SECRET_ACCESS_KEY ALIBABA_ENDPOINT_URL in environment"
44
+ )
45
+
46
+ @property
47
+ def filesystem(self):
48
+ """return fsspec file system object, if supported"""
49
+ if self._filesystem:
50
+ return self._filesystem
51
+ try:
52
+ import ossfs # noqa
53
+ except ImportError as exc:
54
+ raise ImportError("ALIBABA ossfs not installed") from exc
55
+ filesystem_class = get_filesystem_class(protocol=self.kind)
56
+ self._filesystem = makeDatastoreSchemaSanitizer(
57
+ filesystem_class,
58
+ using_bucket=self.using_bucket,
59
+ **self.get_storage_options(),
60
+ )
61
+ return self._filesystem
62
+
63
+ def get_storage_options(self):
64
+ res = dict(
65
+ endpoint=self._get_secret_or_env("ALIBABA_ENDPOINT_URL"),
66
+ key=self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID"),
67
+ secret=self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY"),
68
+ )
69
+ return self._sanitize_storage_options(res)
70
+
71
+ def get_bucket_and_key(self, key):
72
+ path = self._join(key)[1:]
73
+ return self.endpoint, path
74
+
75
+ def upload(self, key, src_path):
76
+ bucket, key = self.get_bucket_and_key(key)
77
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
78
+ oss.put_object(key, open(src_path, "rb"))
79
+
80
+ def get(self, key, size=None, offset=0):
81
+ bucket, key = self.get_bucket_and_key(key)
82
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
83
+ if size or offset:
84
+ return oss.get_object(key, byte_range=self.get_range(size, offset)).read()
85
+ return oss.get_object(key).read()
86
+
87
+ def put(self, key, data, append=False):
88
+ bucket, key = self.get_bucket_and_key(key)
89
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
90
+ oss.put_object(key, data)
91
+
92
+ def stat(self, key):
93
+ bucket, key = self.get_bucket_and_key(key)
94
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
95
+ obj = oss.get_object_meta(key)
96
+ size = obj.content_length
97
+ modified = datetime.fromtimestamp(obj.last_modified)
98
+ return FileStats(size, time.mktime(modified.timetuple()))
99
+
100
+ def listdir(self, key):
101
+ remote_path = self._convert_key_to_remote_path(key)
102
+ if self.filesystem.isfile(remote_path):
103
+ return key
104
+ remote_path = f"{remote_path}/**"
105
+ files = self.filesystem.glob(remote_path)
106
+ key_length = len(key)
107
+ files = [
108
+ f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
109
+ ]
110
+ return files
111
+
112
+ def delete(self, key):
113
+ bucket, key = self.get_bucket_and_key(key)
114
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
115
+ oss.delete_object(key)
116
+
117
+ def _convert_key_to_remote_path(self, key):
118
+ key = key.strip("/")
119
+ schema = urlparse(key).scheme
120
+ # if called without passing dataitem - like in fset.purge_targets,
121
+ # key will include schema.
122
+ if not schema:
123
+ key = Path(self.endpoint, key).as_posix()
124
+ return key
125
+
126
+ @staticmethod
127
+ def get_range(size, offset):
128
+ if size:
129
+ return [offset, size]
130
+ return [offset, None]
@@ -158,18 +158,17 @@ class AzureBlobStore(DataStore):
158
158
  st[key] = parsed_value
159
159
 
160
160
  account_name = st.get("account_name")
161
- if not account_name:
162
- raise mlrun.errors.MLRunInvalidArgumentError(
163
- "Property 'account_name' is absent both in storage settings and connection string"
164
- )
165
161
  if primary_url:
166
162
  if primary_url.startswith("http://"):
167
163
  primary_url = primary_url[len("http://") :]
168
164
  if primary_url.startswith("https://"):
169
165
  primary_url = primary_url[len("https://") :]
170
166
  host = primary_url
171
- else:
167
+ elif account_name:
172
168
  host = f"{account_name}.{service}.core.windows.net"
169
+ else:
170
+ return res
171
+
173
172
  if "account_key" in st:
174
173
  res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
175
174