mlrun 1.7.0rc6__py3-none-any.whl → 1.7.0rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (84) hide show
  1. mlrun/__main__.py +2 -0
  2. mlrun/common/constants.py +6 -0
  3. mlrun/common/schemas/__init__.py +5 -0
  4. mlrun/common/schemas/api_gateway.py +8 -1
  5. mlrun/common/schemas/hub.py +7 -9
  6. mlrun/common/schemas/model_monitoring/__init__.py +4 -0
  7. mlrun/common/schemas/model_monitoring/constants.py +36 -19
  8. mlrun/{model_monitoring/stores/models/__init__.py → common/schemas/pagination.py} +9 -10
  9. mlrun/common/schemas/project.py +16 -10
  10. mlrun/common/types.py +7 -1
  11. mlrun/config.py +35 -10
  12. mlrun/data_types/data_types.py +4 -0
  13. mlrun/datastore/__init__.py +3 -7
  14. mlrun/datastore/alibaba_oss.py +130 -0
  15. mlrun/datastore/azure_blob.py +4 -5
  16. mlrun/datastore/base.py +22 -16
  17. mlrun/datastore/datastore.py +4 -0
  18. mlrun/datastore/datastore_profile.py +19 -1
  19. mlrun/datastore/google_cloud_storage.py +1 -1
  20. mlrun/datastore/snowflake_utils.py +43 -0
  21. mlrun/datastore/sources.py +11 -29
  22. mlrun/datastore/targets.py +131 -11
  23. mlrun/datastore/utils.py +10 -5
  24. mlrun/db/base.py +58 -6
  25. mlrun/db/httpdb.py +183 -77
  26. mlrun/db/nopdb.py +110 -0
  27. mlrun/feature_store/api.py +3 -2
  28. mlrun/feature_store/retrieval/spark_merger.py +27 -23
  29. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  30. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  31. mlrun/kfpops.py +2 -5
  32. mlrun/launcher/base.py +1 -1
  33. mlrun/launcher/client.py +2 -2
  34. mlrun/model.py +1 -0
  35. mlrun/model_monitoring/__init__.py +1 -1
  36. mlrun/model_monitoring/api.py +104 -295
  37. mlrun/model_monitoring/controller.py +25 -25
  38. mlrun/model_monitoring/db/__init__.py +16 -0
  39. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
  40. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  41. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
  42. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  43. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
  44. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
  45. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
  46. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
  47. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
  48. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  49. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
  50. mlrun/model_monitoring/helpers.py +3 -3
  51. mlrun/model_monitoring/stream_processing.py +41 -9
  52. mlrun/model_monitoring/tracking_policy.py +7 -1
  53. mlrun/model_monitoring/writer.py +4 -36
  54. mlrun/projects/pipelines.py +14 -2
  55. mlrun/projects/project.py +141 -122
  56. mlrun/run.py +8 -2
  57. mlrun/runtimes/__init__.py +16 -0
  58. mlrun/runtimes/base.py +10 -1
  59. mlrun/runtimes/kubejob.py +26 -121
  60. mlrun/runtimes/nuclio/api_gateway.py +243 -66
  61. mlrun/runtimes/nuclio/application/application.py +79 -1
  62. mlrun/runtimes/nuclio/application/reverse_proxy.go +9 -1
  63. mlrun/runtimes/nuclio/function.py +14 -8
  64. mlrun/runtimes/nuclio/serving.py +30 -34
  65. mlrun/runtimes/pod.py +171 -0
  66. mlrun/runtimes/utils.py +0 -28
  67. mlrun/serving/remote.py +2 -3
  68. mlrun/serving/routers.py +4 -3
  69. mlrun/serving/server.py +5 -7
  70. mlrun/serving/states.py +40 -23
  71. mlrun/serving/v2_serving.py +4 -3
  72. mlrun/utils/helpers.py +34 -0
  73. mlrun/utils/http.py +1 -1
  74. mlrun/utils/retryer.py +1 -0
  75. mlrun/utils/version/version.json +2 -2
  76. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/METADATA +25 -16
  77. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/RECORD +81 -75
  78. mlrun/model_monitoring/batch.py +0 -933
  79. mlrun/model_monitoring/stores/models/mysql.py +0 -34
  80. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  81. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/LICENSE +0 -0
  82. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/WHEEL +0 -0
  83. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/entry_points.txt +0 -0
  84. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/top_level.txt +0 -0
mlrun/__main__.py CHANGED
@@ -505,6 +505,8 @@ def build(
505
505
  if kfp:
506
506
  print("Runtime:")
507
507
  pprint(runtime)
508
+ # use kind = "job" by default if not specified
509
+ runtime.setdefault("kind", "job")
508
510
  func = new_function(runtime=runtime)
509
511
 
510
512
  elif func_url:
mlrun/common/constants.py CHANGED
@@ -14,4 +14,10 @@
14
14
  #
15
15
  IMAGE_NAME_ENRICH_REGISTRY_PREFIX = "." # prefix for image name to enrich with registry
16
16
  MLRUN_CREATED_LABEL = "mlrun-created"
17
+ MLRUN_MODEL_CONF = "model-conf"
18
+ MLRUN_SERVING_SPEC_MOUNT_PATH = f"/tmp/mlrun/{MLRUN_MODEL_CONF}"
19
+ MLRUN_SERVING_SPEC_FILENAME = "serving_spec.json"
20
+ MLRUN_SERVING_SPEC_PATH = (
21
+ f"{MLRUN_SERVING_SPEC_MOUNT_PATH}/{MLRUN_SERVING_SPEC_FILENAME}"
22
+ )
17
23
  MYSQL_MEDIUMBLOB_SIZE_BYTES = 16 * 1024 * 1024
@@ -21,6 +21,7 @@ from .api_gateway import (
21
21
  APIGatewayMetadata,
22
22
  APIGatewaysOutput,
23
23
  APIGatewaySpec,
24
+ APIGatewayState,
24
25
  APIGatewayStatus,
25
26
  APIGatewayUpstream,
26
27
  )
@@ -124,6 +125,7 @@ from .model_monitoring import (
124
125
  EventFieldType,
125
126
  EventKeyMetrics,
126
127
  Features,
128
+ FeatureSetFeatures,
127
129
  FeatureValues,
128
130
  GrafanaColumn,
129
131
  GrafanaDataPoint,
@@ -139,6 +141,7 @@ from .model_monitoring import (
139
141
  ModelMonitoringMode,
140
142
  ModelMonitoringStoreKinds,
141
143
  MonitoringFunctionNames,
144
+ PrometheusEndpoints,
142
145
  TimeSeriesTarget,
143
146
  )
144
147
  from .notification import (
@@ -149,12 +152,14 @@ from .notification import (
149
152
  SetNotificationRequest,
150
153
  )
151
154
  from .object import ObjectKind, ObjectMetadata, ObjectSpec, ObjectStatus
155
+ from .pagination import PaginationInfo
152
156
  from .pipeline import PipelinesFormat, PipelinesOutput, PipelinesPagination
153
157
  from .project import (
154
158
  IguazioProject,
155
159
  Project,
156
160
  ProjectDesiredState,
157
161
  ProjectMetadata,
162
+ ProjectOutput,
158
163
  ProjectOwner,
159
164
  ProjectsFormat,
160
165
  ProjectsOutput,
@@ -36,6 +36,13 @@ class APIGatewayAuthenticationMode(mlrun.common.types.StrEnum):
36
36
  )
37
37
 
38
38
 
39
+ class APIGatewayState(mlrun.common.types.StrEnum):
40
+ none = ""
41
+ ready = "ready"
42
+ error = "error"
43
+ waiting_for_provisioning = "waitingForProvisioning"
44
+
45
+
39
46
  class _APIGatewayBaseModel(pydantic.BaseModel):
40
47
  class Config:
41
48
  extra = pydantic.Extra.allow
@@ -72,7 +79,7 @@ class APIGatewaySpec(_APIGatewayBaseModel):
72
79
 
73
80
  class APIGatewayStatus(_APIGatewayBaseModel):
74
81
  name: Optional[str]
75
- state: Optional[str]
82
+ state: Optional[APIGatewayState]
76
83
 
77
84
 
78
85
  class APIGateway(_APIGatewayBaseModel):
@@ -59,28 +59,26 @@ class HubSource(BaseModel):
59
59
  return f"{self.spec.path}/{self.spec.object_type}/{self.spec.channel}/{relative_path}"
60
60
 
61
61
  def get_catalog_uri(self):
62
- return self.get_full_uri(mlrun.config.config.hub.catalog_filename)
62
+ return self.get_full_uri(mlrun.mlconf.hub.catalog_filename)
63
63
 
64
64
  @classmethod
65
65
  def generate_default_source(cls):
66
- if not mlrun.config.config.hub.default_source.create:
66
+ if not mlrun.mlconf.hub.default_source.create:
67
67
  return None
68
68
 
69
69
  now = datetime.now(timezone.utc)
70
70
  hub_metadata = HubObjectMetadata(
71
- name=mlrun.config.config.hub.default_source.name,
72
- description=mlrun.config.config.hub.default_source.description,
71
+ name=mlrun.mlconf.hub.default_source.name,
72
+ description=mlrun.mlconf.hub.default_source.description,
73
73
  created=now,
74
74
  updated=now,
75
75
  )
76
76
  return cls(
77
77
  metadata=hub_metadata,
78
78
  spec=HubSourceSpec(
79
- path=mlrun.config.config.hub.default_source.url,
80
- channel=mlrun.config.config.hub.default_source.channel,
81
- object_type=HubSourceType(
82
- mlrun.config.config.hub.default_source.object_type
83
- ),
79
+ path=mlrun.mlconf.hub.default_source.url,
80
+ channel=mlrun.mlconf.hub.default_source.channel,
81
+ object_type=HubSourceType(mlrun.mlconf.hub.default_source.object_type),
84
82
  ),
85
83
  status=ObjectStatus(state="created"),
86
84
  )
@@ -22,6 +22,7 @@ from .constants import (
22
22
  EventFieldType,
23
23
  EventKeyMetrics,
24
24
  EventLiveStats,
25
+ FeatureSetFeatures,
25
26
  FileTargetKind,
26
27
  FunctionURI,
27
28
  ModelEndpointTarget,
@@ -29,9 +30,12 @@ from .constants import (
29
30
  ModelMonitoringStoreKinds,
30
31
  MonitoringFunctionNames,
31
32
  ProjectSecretKeys,
33
+ PrometheusEndpoints,
32
34
  PrometheusMetric,
35
+ SchedulingKeys,
33
36
  TimeSeriesTarget,
34
37
  VersionedModel,
38
+ WriterEvent,
35
39
  )
36
40
  from .grafana import (
37
41
  GrafanaColumn,
@@ -21,6 +21,12 @@ import mlrun.common.helpers
21
21
  from mlrun.common.types import StrEnum
22
22
 
23
23
 
24
+ class MonitoringStrEnum(StrEnum):
25
+ @classmethod
26
+ def list(cls):
27
+ return list(map(lambda c: c.value, cls))
28
+
29
+
24
30
  class EventFieldType:
25
31
  FUNCTION_URI = "function_uri"
26
32
  FUNCTION = "function"
@@ -77,6 +83,20 @@ class EventFieldType:
77
83
  SAMPLE_PARQUET_PATH = "sample_parquet_path"
78
84
 
79
85
 
86
+ class FeatureSetFeatures(MonitoringStrEnum):
87
+ LATENCY = EventFieldType.LATENCY
88
+ ERROR_COUNT = EventFieldType.ERROR_COUNT
89
+ METRICS = EventFieldType.METRICS
90
+
91
+ @classmethod
92
+ def time_stamp(cls):
93
+ return EventFieldType.TIMESTAMP
94
+
95
+ @classmethod
96
+ def entity(cls):
97
+ return EventFieldType.ENDPOINT_ID
98
+
99
+
80
100
  class ApplicationEvent:
81
101
  APPLICATION_NAME = "application_name"
82
102
  CURRENT_STATS = "current_stats"
@@ -89,7 +109,7 @@ class ApplicationEvent:
89
109
  OUTPUT_STREAM_URI = "output_stream_uri"
90
110
 
91
111
 
92
- class WriterEvent(StrEnum):
112
+ class WriterEvent(MonitoringStrEnum):
93
113
  APPLICATION_NAME = "application_name"
94
114
  ENDPOINT_ID = "endpoint_id"
95
115
  START_INFER_TIME = "start_infer_time"
@@ -101,10 +121,6 @@ class WriterEvent(StrEnum):
101
121
  RESULT_EXTRA_DATA = "result_extra_data"
102
122
  CURRENT_STATS = "current_stats"
103
123
 
104
- @classmethod
105
- def list(cls):
106
- return list(map(lambda c: c.value, cls))
107
-
108
124
 
109
125
  class EventLiveStats:
110
126
  LATENCY_AVG_5M = "latency_avg_5m"
@@ -135,7 +151,7 @@ class ProjectSecretKeys:
135
151
  ENDPOINT_STORE_CONNECTION = "MODEL_MONITORING_ENDPOINT_STORE_CONNECTION"
136
152
  ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
137
153
  PIPELINES_ACCESS_KEY = "MODEL_MONITORING_PIPELINES_ACCESS_KEY"
138
- KAFKA_BOOTSTRAP_SERVERS = "KAFKA_BOOTSTRAP_SERVERS"
154
+ KAFKA_BROKERS = "KAFKA_BROKERS"
139
155
  STREAM_PATH = "STREAM_PATH"
140
156
 
141
157
 
@@ -146,6 +162,9 @@ class ModelMonitoringStoreKinds:
146
162
 
147
163
  class SchedulingKeys:
148
164
  LAST_ANALYZED = "last_analyzed"
165
+ ENDPOINT_ID = "endpoint_id"
166
+ APPLICATION_NAME = "application_name"
167
+ UID = "uid"
149
168
 
150
169
 
151
170
  class FileTargetKind:
@@ -155,6 +174,8 @@ class FileTargetKind:
155
174
  PARQUET = "parquet"
156
175
  APPS_PARQUET = "apps_parquet"
157
176
  LOG_STREAM = "log_stream"
177
+ APP_RESULTS = "app_results"
178
+ MONITORING_SCHEDULES = "monitoring_schedules"
158
179
 
159
180
 
160
181
  class ModelMonitoringMode(str, Enum):
@@ -177,20 +198,16 @@ class PrometheusMetric:
177
198
  DRIFT_STATUS = "drift_status"
178
199
 
179
200
 
180
- class MonitoringFunctionNames:
181
- WRITER = "model-monitoring-writer"
182
- BATCH = "model-monitoring-batch"
183
- APPLICATION_CONTROLLER = "model-monitoring-controller"
184
- STREAM = "model-monitoring-stream"
201
+ class PrometheusEndpoints(MonitoringStrEnum):
202
+ MODEL_MONITORING_METRICS = "/model-monitoring-metrics"
203
+ MONITORING_BATCH_METRICS = "/monitoring-batch-metrics"
204
+ MONITORING_DRIFT_STATUS = "/monitoring-drift-status"
205
+
185
206
 
186
- @staticmethod
187
- def all():
188
- return [
189
- MonitoringFunctionNames.WRITER,
190
- MonitoringFunctionNames.STREAM,
191
- MonitoringFunctionNames.BATCH,
192
- MonitoringFunctionNames.APPLICATION_CONTROLLER,
193
- ]
207
+ class MonitoringFunctionNames(MonitoringStrEnum):
208
+ STREAM = "model-monitoring-stream"
209
+ APPLICATION_CONTROLLER = "model-monitoring-controller"
210
+ WRITER = "model-monitoring-writer"
194
211
 
195
212
 
196
213
  @dataclass
@@ -12,16 +12,15 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional, Union
15
+ import typing
16
16
 
17
- from .mysql import ModelEndpointsTable as MySQLModelEndpointsTable
18
- from .sqlite import ModelEndpointsTable as SQLiteModelEndpointsTable
17
+ import pydantic
19
18
 
20
19
 
21
- def get_model_endpoints_table(
22
- connection_string: Optional[str] = None,
23
- ) -> Union[type[MySQLModelEndpointsTable], type[SQLiteModelEndpointsTable]]:
24
- """Return ModelEndpointsTable based on the provided connection string"""
25
- if connection_string and "mysql:" in connection_string:
26
- return MySQLModelEndpointsTable
27
- return SQLiteModelEndpointsTable
20
+ class PaginationInfo(pydantic.BaseModel):
21
+ class Config:
22
+ allow_population_by_field_name = True
23
+
24
+ page: typing.Optional[int]
25
+ page_size: typing.Optional[int] = pydantic.Field(alias="page-size")
26
+ page_token: typing.Optional[str] = pydantic.Field(alias="page-token")
@@ -87,6 +87,7 @@ class ProjectSpec(pydantic.BaseModel):
87
87
  custom_packagers: typing.Optional[list[tuple[str, bool]]] = None
88
88
  default_image: typing.Optional[str] = None
89
89
  build: typing.Optional[ImageBuilder] = None
90
+ default_function_node_selector: typing.Optional[dict] = {}
90
91
 
91
92
  class Config:
92
93
  extra = pydantic.Extra.allow
@@ -119,17 +120,22 @@ class IguazioProject(pydantic.BaseModel):
119
120
  data: dict
120
121
 
121
122
 
123
+ # The format query param controls the project type used:
124
+ # full - Project
125
+ # name_only - str
126
+ # summary - ProjectSummary
127
+ # leader - currently only IguazioProject supported
128
+ # The way pydantic handles typing.Union is that it takes the object and tries to coerce it to be the types of the
129
+ # union by the definition order. Therefore we can't currently add generic dict for all leader formats, but we need
130
+ # to add a specific classes for them. it's frustrating but couldn't find other workaround, see:
131
+ # https://github.com/samuelcolvin/pydantic/issues/1423, https://github.com/samuelcolvin/pydantic/issues/619
132
+ ProjectOutput = typing.TypeVar(
133
+ "ProjectOutput", Project, str, ProjectSummary, IguazioProject
134
+ )
135
+
136
+
122
137
  class ProjectsOutput(pydantic.BaseModel):
123
- # The format query param controls the project type used:
124
- # full - Project
125
- # name_only - str
126
- # summary - ProjectSummary
127
- # leader - currently only IguazioProject supported
128
- # The way pydantic handles typing.Union is that it takes the object and tries to coerce it to be the types of the
129
- # union by the definition order. Therefore we can't currently add generic dict for all leader formats, but we need
130
- # to add a specific classes for them. it's frustrating but couldn't find other workaround, see:
131
- # https://github.com/samuelcolvin/pydantic/issues/1423, https://github.com/samuelcolvin/pydantic/issues/619
132
- projects: list[typing.Union[Project, str, ProjectSummary, IguazioProject]]
138
+ projects: list[ProjectOutput]
133
139
 
134
140
 
135
141
  class ProjectSummariesOutput(pydantic.BaseModel):
mlrun/common/types.py CHANGED
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  import enum
17
16
 
@@ -23,3 +22,10 @@ class StrEnum(str, enum.Enum):
23
22
 
24
23
  def __repr__(self):
25
24
  return self.value
25
+
26
+
27
+ # Partial backport from Python 3.11
28
+ # https://docs.python.org/3/library/http.html#http.HTTPMethod
29
+ class HTTPMethod(StrEnum):
30
+ GET = "GET"
31
+ POST = "POST"
mlrun/config.py CHANGED
@@ -240,6 +240,7 @@ default_config = {
240
240
  "remote": "mlrun/mlrun",
241
241
  "dask": "mlrun/ml-base",
242
242
  "mpijob": "mlrun/mlrun",
243
+ "application": "python:3.9-slim",
243
244
  },
244
245
  # see enrich_function_preemption_spec for more info,
245
246
  # and mlrun.common.schemas.function.PreemptionModes for available options
@@ -362,6 +363,8 @@ default_config = {
362
363
  # - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
363
364
  "add_templated_ingress_host_mode": "never",
364
365
  "explicit_ack": "enabled",
366
+ # size of serving spec to move to config maps
367
+ "serving_spec_env_cutoff": 4096,
365
368
  },
366
369
  "logs": {
367
370
  "decode": {
@@ -479,6 +482,14 @@ default_config = {
479
482
  # if set to true, will log a warning for trying to use run db functionality while in nop db mode
480
483
  "verbose": True,
481
484
  },
485
+ "pagination": {
486
+ "default_page_size": 20,
487
+ "pagination_cache": {
488
+ "interval": 60,
489
+ "ttl": 3600,
490
+ "max_size": 10000,
491
+ },
492
+ },
482
493
  },
483
494
  "model_endpoint_monitoring": {
484
495
  "serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
@@ -498,10 +509,9 @@ default_config = {
498
509
  # when the user is working in CE environment and has not provided any stream path.
499
510
  "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
500
511
  "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
501
- "batch_processing_function_branch": "master",
502
512
  "parquet_batching_max_events": 10_000,
503
513
  "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
504
- # See mlrun.model_monitoring.stores.ModelEndpointStoreType for available options
514
+ # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
505
515
  "store_type": "v3io-nosql",
506
516
  "endpoint_store_connection": "",
507
517
  },
@@ -542,6 +552,7 @@ default_config = {
542
552
  "nosql": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
543
553
  # "authority" is optional and generalizes [userinfo "@"] host [":" port]
544
554
  "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/{kind}",
555
+ "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
545
556
  },
546
557
  "default_targets": "parquet,nosql",
547
558
  "default_job_image": "mlrun/mlrun",
@@ -616,8 +627,9 @@ default_config = {
616
627
  },
617
628
  "workflows": {
618
629
  "default_workflow_runner_name": "workflow-runner-{}",
619
- # Default timeout seconds for retrieving workflow id after execution:
620
- "timeouts": {"local": 120, "kfp": 30, "remote": 90},
630
+ # Default timeout seconds for retrieving workflow id after execution
631
+ # Remote workflow timeout is the maximum between remote and the inner engine timeout
632
+ "timeouts": {"local": 120, "kfp": 60, "remote": 60 * 5},
621
633
  },
622
634
  "log_collector": {
623
635
  "address": "localhost:8282",
@@ -1066,7 +1078,7 @@ class Config:
1066
1078
  target: str = "online",
1067
1079
  artifact_path: str = None,
1068
1080
  function_name: str = None,
1069
- ) -> str:
1081
+ ) -> typing.Union[str, list[str]]:
1070
1082
  """Get the full path from the configuration based on the provided project and kind.
1071
1083
 
1072
1084
  :param project: Project name.
@@ -1082,7 +1094,8 @@ class Config:
1082
1094
  relative artifact path will be taken from the global MLRun artifact path.
1083
1095
  :param function_name: Application name, None for model_monitoring_stream.
1084
1096
 
1085
- :return: Full configured path for the provided kind.
1097
+ :return: Full configured path for the provided kind. Can be either a single path
1098
+ or a list of paths in the case of the online model monitoring stream path.
1086
1099
  """
1087
1100
 
1088
1101
  if target != "offline":
@@ -1104,10 +1117,22 @@ class Config:
1104
1117
  if function_name is None
1105
1118
  else f"{kind}-{function_name.lower()}",
1106
1119
  )
1107
- return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
1108
- project=project,
1109
- kind=kind,
1110
- )
1120
+ elif kind == "stream": # return list for mlrun<1.6.3 BC
1121
+ return [
1122
+ mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
1123
+ project=project,
1124
+ kind=kind,
1125
+ ), # old stream uri (pipelines) for BC ML-6043
1126
+ mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
1127
+ project=project,
1128
+ kind=kind,
1129
+ ), # new stream uri (projects)
1130
+ ]
1131
+ else:
1132
+ return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
1133
+ project=project,
1134
+ kind=kind,
1135
+ )
1111
1136
 
1112
1137
  # Get the current offline path from the configuration
1113
1138
  file_path = mlrun.mlconf.model_endpoint_monitoring.offline_storage_path.format(
@@ -41,6 +41,7 @@ class ValueType(str, Enum):
41
41
  BYTES = "bytes"
42
42
  STRING = "str"
43
43
  DATETIME = "datetime"
44
+ LIST = "List"
44
45
  BYTES_LIST = "List[bytes]"
45
46
  STRING_LIST = "List[string]"
46
47
  INT32_LIST = "List[int32]"
@@ -48,6 +49,7 @@ class ValueType(str, Enum):
48
49
  DOUBLE_LIST = "List[float]"
49
50
  FLOAT_LIST = "List[float32]"
50
51
  BOOL_LIST = "List[bool]"
52
+ Tuple = "Tuple"
51
53
 
52
54
 
53
55
  def pd_schema_to_value_type(value):
@@ -102,6 +104,8 @@ def python_type_to_value_type(value_type):
102
104
  "datetime64[ns]": ValueType.INT64,
103
105
  "datetime64[ns, tz]": ValueType.INT64,
104
106
  "category": ValueType.STRING,
107
+ "list": ValueType.LIST,
108
+ "tuple": ValueType.Tuple,
105
109
  }
106
110
 
107
111
  if type_name in type_map:
@@ -107,13 +107,9 @@ def get_stream_pusher(stream_path: str, **kwargs):
107
107
  :param stream_path: path/url of stream
108
108
  """
109
109
 
110
- if stream_path.startswith("kafka://") or "kafka_bootstrap_servers" in kwargs:
111
- topic, bootstrap_servers = parse_kafka_url(
112
- stream_path, kwargs.get("kafka_bootstrap_servers")
113
- )
114
- return KafkaOutputStream(
115
- topic, bootstrap_servers, kwargs.get("kafka_producer_options")
116
- )
110
+ if stream_path.startswith("kafka://") or "kafka_brokers" in kwargs:
111
+ topic, brokers = parse_kafka_url(stream_path, kwargs.get("kafka_brokers"))
112
+ return KafkaOutputStream(topic, brokers, kwargs.get("kafka_producer_options"))
117
113
  elif stream_path.startswith("http://") or stream_path.startswith("https://"):
118
114
  return HTTPOutputStream(stream_path=stream_path)
119
115
  elif "://" not in stream_path:
@@ -0,0 +1,130 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import time
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+ from urllib.parse import urlparse
19
+
20
+ import oss2
21
+ from fsspec.registry import get_filesystem_class
22
+
23
+ import mlrun.errors
24
+
25
+ from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
26
+
27
+
28
+ class OSSStore(DataStore):
29
+ using_bucket = True
30
+
31
+ def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
32
+ super().__init__(parent, name, schema, endpoint, secrets)
33
+ # will be used in case user asks to assume a role and work through fsspec
34
+
35
+ access_key_id = self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID")
36
+ secret_key = self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY")
37
+ endpoint_url = self._get_secret_or_env("ALIBABA_ENDPOINT_URL")
38
+ if access_key_id and secret_key and endpoint_url:
39
+ self.auth = oss2.Auth(access_key_id, secret_key)
40
+ self.endpoint_url = endpoint_url
41
+ else:
42
+ raise mlrun.errors.MLRunInvalidArgumentError(
43
+ "missing ALIBABA_ACCESS_KEY_ID or ALIBABA_SECRET_ACCESS_KEY ALIBABA_ENDPOINT_URL in environment"
44
+ )
45
+
46
+ @property
47
+ def filesystem(self):
48
+ """return fsspec file system object, if supported"""
49
+ if self._filesystem:
50
+ return self._filesystem
51
+ try:
52
+ import ossfs # noqa
53
+ except ImportError as exc:
54
+ raise ImportError("ALIBABA ossfs not installed") from exc
55
+ filesystem_class = get_filesystem_class(protocol=self.kind)
56
+ self._filesystem = makeDatastoreSchemaSanitizer(
57
+ filesystem_class,
58
+ using_bucket=self.using_bucket,
59
+ **self.get_storage_options(),
60
+ )
61
+ return self._filesystem
62
+
63
+ def get_storage_options(self):
64
+ res = dict(
65
+ endpoint=self._get_secret_or_env("ALIBABA_ENDPOINT_URL"),
66
+ key=self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID"),
67
+ secret=self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY"),
68
+ )
69
+ return self._sanitize_storage_options(res)
70
+
71
+ def get_bucket_and_key(self, key):
72
+ path = self._join(key)[1:]
73
+ return self.endpoint, path
74
+
75
+ def upload(self, key, src_path):
76
+ bucket, key = self.get_bucket_and_key(key)
77
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
78
+ oss.put_object(key, open(src_path, "rb"))
79
+
80
+ def get(self, key, size=None, offset=0):
81
+ bucket, key = self.get_bucket_and_key(key)
82
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
83
+ if size or offset:
84
+ return oss.get_object(key, byte_range=self.get_range(size, offset)).read()
85
+ return oss.get_object(key).read()
86
+
87
+ def put(self, key, data, append=False):
88
+ bucket, key = self.get_bucket_and_key(key)
89
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
90
+ oss.put_object(key, data)
91
+
92
+ def stat(self, key):
93
+ bucket, key = self.get_bucket_and_key(key)
94
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
95
+ obj = oss.get_object_meta(key)
96
+ size = obj.content_length
97
+ modified = datetime.fromtimestamp(obj.last_modified)
98
+ return FileStats(size, time.mktime(modified.timetuple()))
99
+
100
+ def listdir(self, key):
101
+ remote_path = self._convert_key_to_remote_path(key)
102
+ if self.filesystem.isfile(remote_path):
103
+ return key
104
+ remote_path = f"{remote_path}/**"
105
+ files = self.filesystem.glob(remote_path)
106
+ key_length = len(key)
107
+ files = [
108
+ f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
109
+ ]
110
+ return files
111
+
112
+ def delete(self, key):
113
+ bucket, key = self.get_bucket_and_key(key)
114
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
115
+ oss.delete_object(key)
116
+
117
+ def _convert_key_to_remote_path(self, key):
118
+ key = key.strip("/")
119
+ schema = urlparse(key).scheme
120
+ # if called without passing dataitem - like in fset.purge_targets,
121
+ # key will include schema.
122
+ if not schema:
123
+ key = Path(self.endpoint, key).as_posix()
124
+ return key
125
+
126
+ @staticmethod
127
+ def get_range(size, offset):
128
+ if size:
129
+ return [offset, size]
130
+ return [offset, None]
@@ -158,18 +158,17 @@ class AzureBlobStore(DataStore):
158
158
  st[key] = parsed_value
159
159
 
160
160
  account_name = st.get("account_name")
161
- if not account_name:
162
- raise mlrun.errors.MLRunInvalidArgumentError(
163
- "Property 'account_name' is absent both in storage settings and connection string"
164
- )
165
161
  if primary_url:
166
162
  if primary_url.startswith("http://"):
167
163
  primary_url = primary_url[len("http://") :]
168
164
  if primary_url.startswith("https://"):
169
165
  primary_url = primary_url[len("https://") :]
170
166
  host = primary_url
171
- else:
167
+ elif account_name:
172
168
  host = f"{account_name}.{service}.core.windows.net"
169
+ else:
170
+ return res
171
+
173
172
  if "account_key" in st:
174
173
  res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
175
174