mlrun 1.3.1rc5__py3-none-any.whl → 1.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (67) hide show
  1. mlrun/__main__.py +57 -4
  2. mlrun/api/api/endpoints/marketplace.py +57 -4
  3. mlrun/api/api/endpoints/runs.py +2 -0
  4. mlrun/api/api/utils.py +102 -0
  5. mlrun/api/crud/__init__.py +1 -0
  6. mlrun/api/crud/marketplace.py +133 -44
  7. mlrun/api/crud/notifications.py +80 -0
  8. mlrun/api/crud/runs.py +2 -0
  9. mlrun/api/crud/secrets.py +1 -0
  10. mlrun/api/db/base.py +32 -0
  11. mlrun/api/db/session.py +3 -11
  12. mlrun/api/db/sqldb/db.py +162 -1
  13. mlrun/api/db/sqldb/models/models_mysql.py +41 -0
  14. mlrun/api/db/sqldb/models/models_sqlite.py +35 -0
  15. mlrun/api/main.py +54 -1
  16. mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +70 -0
  17. mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +61 -0
  18. mlrun/api/schemas/__init__.py +1 -0
  19. mlrun/api/schemas/marketplace.py +18 -8
  20. mlrun/api/{db/filedb/__init__.py → schemas/notification.py} +17 -1
  21. mlrun/api/utils/singletons/db.py +8 -14
  22. mlrun/builder.py +37 -26
  23. mlrun/config.py +12 -2
  24. mlrun/data_types/spark.py +9 -2
  25. mlrun/datastore/base.py +10 -1
  26. mlrun/datastore/sources.py +1 -1
  27. mlrun/db/__init__.py +6 -4
  28. mlrun/db/base.py +1 -2
  29. mlrun/db/httpdb.py +32 -6
  30. mlrun/db/nopdb.py +463 -0
  31. mlrun/db/sqldb.py +47 -7
  32. mlrun/execution.py +3 -0
  33. mlrun/feature_store/api.py +26 -12
  34. mlrun/feature_store/common.py +1 -1
  35. mlrun/feature_store/steps.py +110 -13
  36. mlrun/k8s_utils.py +10 -0
  37. mlrun/model.py +43 -0
  38. mlrun/projects/operations.py +5 -2
  39. mlrun/projects/pipelines.py +4 -3
  40. mlrun/projects/project.py +50 -10
  41. mlrun/run.py +5 -4
  42. mlrun/runtimes/__init__.py +2 -6
  43. mlrun/runtimes/base.py +82 -31
  44. mlrun/runtimes/function.py +22 -0
  45. mlrun/runtimes/kubejob.py +10 -8
  46. mlrun/runtimes/serving.py +1 -1
  47. mlrun/runtimes/sparkjob/__init__.py +0 -1
  48. mlrun/runtimes/sparkjob/abstract.py +0 -2
  49. mlrun/serving/states.py +2 -2
  50. mlrun/utils/helpers.py +1 -1
  51. mlrun/utils/notifications/notification/__init__.py +1 -1
  52. mlrun/utils/notifications/notification/base.py +14 -13
  53. mlrun/utils/notifications/notification/console.py +6 -3
  54. mlrun/utils/notifications/notification/git.py +19 -12
  55. mlrun/utils/notifications/notification/ipython.py +6 -3
  56. mlrun/utils/notifications/notification/slack.py +13 -12
  57. mlrun/utils/notifications/notification_pusher.py +185 -37
  58. mlrun/utils/version/version.json +2 -2
  59. {mlrun-1.3.1rc5.dist-info → mlrun-1.4.0rc2.dist-info}/METADATA +6 -2
  60. {mlrun-1.3.1rc5.dist-info → mlrun-1.4.0rc2.dist-info}/RECORD +64 -63
  61. mlrun/api/db/filedb/db.py +0 -518
  62. mlrun/db/filedb.py +0 -899
  63. mlrun/runtimes/sparkjob/spark2job.py +0 -59
  64. {mlrun-1.3.1rc5.dist-info → mlrun-1.4.0rc2.dist-info}/LICENSE +0 -0
  65. {mlrun-1.3.1rc5.dist-info → mlrun-1.4.0rc2.dist-info}/WHEEL +0 -0
  66. {mlrun-1.3.1rc5.dist-info → mlrun-1.4.0rc2.dist-info}/entry_points.txt +0 -0
  67. {mlrun-1.3.1rc5.dist-info → mlrun-1.4.0rc2.dist-info}/top_level.txt +0 -0
mlrun/db/nopdb.py ADDED
@@ -0,0 +1,463 @@
1
+ # Copyright 2022 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ import datetime
17
+ from typing import List, Optional, Union
18
+
19
+ import mlrun.errors
20
+
21
+ from ..api import schemas
22
+ from ..api.schemas import ModelEndpoint
23
+ from ..config import config
24
+ from ..utils import logger
25
+ from .base import RunDBInterface
26
+
27
+
28
+ class NopDB(RunDBInterface):
29
+ def __init__(self, url=None, *args, **kwargs):
30
+ self.url = url
31
+
32
+ def __getattribute__(self, attr):
33
+ def nop(*args, **kwargs):
34
+ env_var_message = (
35
+ "MLRUN_DBPATH is not set. Set this environment variable to the URL of the API "
36
+ "server in order to connect"
37
+ )
38
+ if config.httpdb.nop_db.raise_error:
39
+ raise mlrun.errors.MLRunBadRequestError(env_var_message)
40
+
41
+ if config.httpdb.nop_db.verbose:
42
+ logger.warning(
43
+ "Could not detect path to API server, not connected to API server!"
44
+ )
45
+ logger.warning(env_var_message)
46
+
47
+ return
48
+
49
+ if attr == "connect":
50
+ return super().__getattribute__(attr)
51
+ else:
52
+ nop()
53
+ return super().__getattribute__(attr)
54
+
55
+ def connect(self, secrets=None):
56
+ pass
57
+
58
+ def store_log(self, uid, project="", body=None, append=False):
59
+ pass
60
+
61
+ def get_log(self, uid, project="", offset=0, size=0):
62
+ pass
63
+
64
+ def store_run(self, struct, uid, project="", iter=0):
65
+ pass
66
+
67
+ def update_run(self, updates: dict, uid, project="", iter=0):
68
+ pass
69
+
70
+ def abort_run(self, uid, project="", iter=0):
71
+ pass
72
+
73
+ def read_run(self, uid, project="", iter=0):
74
+ pass
75
+
76
+ def list_runs(
77
+ self,
78
+ name="",
79
+ uid: Optional[Union[str, List[str]]] = None,
80
+ project="",
81
+ labels=None,
82
+ state="",
83
+ sort=True,
84
+ last=0,
85
+ iter=False,
86
+ start_time_from: datetime.datetime = None,
87
+ start_time_to: datetime.datetime = None,
88
+ last_update_time_from: datetime.datetime = None,
89
+ last_update_time_to: datetime.datetime = None,
90
+ partition_by: Union[schemas.RunPartitionByField, str] = None,
91
+ rows_per_partition: int = 1,
92
+ partition_sort_by: Union[schemas.SortField, str] = None,
93
+ partition_order: Union[schemas.OrderType, str] = schemas.OrderType.desc,
94
+ max_partitions: int = 0,
95
+ ):
96
+ pass
97
+
98
+ def del_run(self, uid, project="", iter=0):
99
+ pass
100
+
101
+ def del_runs(self, name="", project="", labels=None, state="", days_ago=0):
102
+ pass
103
+
104
+ def store_artifact(self, key, artifact, uid, iter=None, tag="", project=""):
105
+ pass
106
+
107
+ def read_artifact(self, key, tag="", iter=None, project=""):
108
+ pass
109
+
110
+ def list_artifacts(
111
+ self,
112
+ name="",
113
+ project="",
114
+ tag="",
115
+ labels=None,
116
+ since=None,
117
+ until=None,
118
+ iter: int = None,
119
+ best_iteration: bool = False,
120
+ kind: str = None,
121
+ category: Union[str, schemas.ArtifactCategories] = None,
122
+ ):
123
+ pass
124
+
125
+ def del_artifact(self, key, tag="", project=""):
126
+ pass
127
+
128
+ def del_artifacts(self, name="", project="", tag="", labels=None):
129
+ pass
130
+
131
+ def store_function(self, function, name, project="", tag="", versioned=False):
132
+ pass
133
+
134
+ def get_function(self, name, project="", tag="", hash_key=""):
135
+ pass
136
+
137
+ def delete_function(self, name: str, project: str = ""):
138
+ pass
139
+
140
+ def list_functions(self, name=None, project="", tag="", labels=None):
141
+ pass
142
+
143
+ def tag_objects(
144
+ self,
145
+ project: str,
146
+ tag_name: str,
147
+ tag_objects: schemas.TagObjects,
148
+ replace: bool = False,
149
+ ):
150
+ pass
151
+
152
+ def delete_objects_tag(
153
+ self, project: str, tag_name: str, tag_objects: schemas.TagObjects
154
+ ):
155
+ pass
156
+
157
+ def tag_artifacts(
158
+ self, artifacts, project: str, tag_name: str, replace: bool = False
159
+ ):
160
+ pass
161
+
162
+ def delete_artifacts_tags(self, artifacts, project: str, tag_name: str):
163
+ pass
164
+
165
+ def delete_project(
166
+ self,
167
+ name: str,
168
+ deletion_strategy: schemas.DeletionStrategy = schemas.DeletionStrategy.default(),
169
+ ):
170
+ pass
171
+
172
+ def store_project(self, name: str, project: schemas.Project) -> schemas.Project:
173
+ pass
174
+
175
+ def patch_project(
176
+ self,
177
+ name: str,
178
+ project: dict,
179
+ patch_mode: schemas.PatchMode = schemas.PatchMode.replace,
180
+ ) -> schemas.Project:
181
+ pass
182
+
183
+ def create_project(self, project: schemas.Project) -> schemas.Project:
184
+ pass
185
+
186
+ def list_projects(
187
+ self,
188
+ owner: str = None,
189
+ format_: schemas.ProjectsFormat = schemas.ProjectsFormat.full,
190
+ labels: List[str] = None,
191
+ state: schemas.ProjectState = None,
192
+ ) -> schemas.ProjectsOutput:
193
+ pass
194
+
195
+ def get_project(self, name: str) -> schemas.Project:
196
+ pass
197
+
198
+ def list_artifact_tags(
199
+ self, project=None, category: Union[str, schemas.ArtifactCategories] = None
200
+ ):
201
+ pass
202
+
203
+ def create_feature_set(
204
+ self, feature_set: Union[dict, schemas.FeatureSet], project="", versioned=True
205
+ ) -> dict:
206
+ pass
207
+
208
+ def get_feature_set(
209
+ self, name: str, project: str = "", tag: str = None, uid: str = None
210
+ ) -> dict:
211
+ pass
212
+
213
+ def list_features(
214
+ self,
215
+ project: str,
216
+ name: str = None,
217
+ tag: str = None,
218
+ entities: List[str] = None,
219
+ labels: List[str] = None,
220
+ ) -> schemas.FeaturesOutput:
221
+ pass
222
+
223
+ def list_entities(
224
+ self, project: str, name: str = None, tag: str = None, labels: List[str] = None
225
+ ) -> schemas.EntitiesOutput:
226
+ pass
227
+
228
+ def list_feature_sets(
229
+ self,
230
+ project: str = "",
231
+ name: str = None,
232
+ tag: str = None,
233
+ state: str = None,
234
+ entities: List[str] = None,
235
+ features: List[str] = None,
236
+ labels: List[str] = None,
237
+ partition_by: Union[schemas.FeatureStorePartitionByField, str] = None,
238
+ rows_per_partition: int = 1,
239
+ partition_sort_by: Union[schemas.SortField, str] = None,
240
+ partition_order: Union[schemas.OrderType, str] = schemas.OrderType.desc,
241
+ ) -> List[dict]:
242
+ pass
243
+
244
+ def store_feature_set(
245
+ self,
246
+ feature_set: Union[dict, schemas.FeatureSet],
247
+ name=None,
248
+ project="",
249
+ tag=None,
250
+ uid=None,
251
+ versioned=True,
252
+ ):
253
+ pass
254
+
255
+ def patch_feature_set(
256
+ self,
257
+ name,
258
+ feature_set: dict,
259
+ project="",
260
+ tag=None,
261
+ uid=None,
262
+ patch_mode: Union[str, schemas.PatchMode] = schemas.PatchMode.replace,
263
+ ):
264
+ pass
265
+
266
+ def delete_feature_set(self, name, project="", tag=None, uid=None):
267
+ pass
268
+
269
+ def create_feature_vector(
270
+ self,
271
+ feature_vector: Union[dict, schemas.FeatureVector],
272
+ project="",
273
+ versioned=True,
274
+ ) -> dict:
275
+ pass
276
+
277
+ def get_feature_vector(
278
+ self, name: str, project: str = "", tag: str = None, uid: str = None
279
+ ) -> dict:
280
+ pass
281
+
282
+ def list_feature_vectors(
283
+ self,
284
+ project: str = "",
285
+ name: str = None,
286
+ tag: str = None,
287
+ state: str = None,
288
+ labels: List[str] = None,
289
+ partition_by: Union[schemas.FeatureStorePartitionByField, str] = None,
290
+ rows_per_partition: int = 1,
291
+ partition_sort_by: Union[schemas.SortField, str] = None,
292
+ partition_order: Union[schemas.OrderType, str] = schemas.OrderType.desc,
293
+ ) -> List[dict]:
294
+ pass
295
+
296
+ def store_feature_vector(
297
+ self,
298
+ feature_vector: Union[dict, schemas.FeatureVector],
299
+ name=None,
300
+ project="",
301
+ tag=None,
302
+ uid=None,
303
+ versioned=True,
304
+ ):
305
+ pass
306
+
307
+ def patch_feature_vector(
308
+ self,
309
+ name,
310
+ feature_vector_update: dict,
311
+ project="",
312
+ tag=None,
313
+ uid=None,
314
+ patch_mode: Union[str, schemas.PatchMode] = schemas.PatchMode.replace,
315
+ ):
316
+ pass
317
+
318
+ def delete_feature_vector(self, name, project="", tag=None, uid=None):
319
+ pass
320
+
321
+ def list_pipelines(
322
+ self,
323
+ project: str,
324
+ namespace: str = None,
325
+ sort_by: str = "",
326
+ page_token: str = "",
327
+ filter_: str = "",
328
+ format_: Union[
329
+ str, schemas.PipelinesFormat
330
+ ] = schemas.PipelinesFormat.metadata_only,
331
+ page_size: int = None,
332
+ ) -> schemas.PipelinesOutput:
333
+ pass
334
+
335
+ def create_project_secrets(
336
+ self,
337
+ project: str,
338
+ provider: Union[
339
+ str, schemas.SecretProviderName
340
+ ] = schemas.SecretProviderName.kubernetes,
341
+ secrets: dict = None,
342
+ ):
343
+ pass
344
+
345
+ def list_project_secrets(
346
+ self,
347
+ project: str,
348
+ token: str,
349
+ provider: Union[
350
+ str, schemas.SecretProviderName
351
+ ] = schemas.SecretProviderName.kubernetes,
352
+ secrets: List[str] = None,
353
+ ) -> schemas.SecretsData:
354
+ pass
355
+
356
+ def list_project_secret_keys(
357
+ self,
358
+ project: str,
359
+ provider: Union[
360
+ str, schemas.SecretProviderName
361
+ ] = schemas.SecretProviderName.kubernetes,
362
+ token: str = None,
363
+ ) -> schemas.SecretKeysData:
364
+ pass
365
+
366
+ def delete_project_secrets(
367
+ self,
368
+ project: str,
369
+ provider: Union[
370
+ str, schemas.SecretProviderName
371
+ ] = schemas.SecretProviderName.kubernetes,
372
+ secrets: List[str] = None,
373
+ ):
374
+ pass
375
+
376
+ def create_user_secrets(
377
+ self,
378
+ user: str,
379
+ provider: Union[
380
+ str, schemas.SecretProviderName
381
+ ] = schemas.SecretProviderName.vault,
382
+ secrets: dict = None,
383
+ ):
384
+ pass
385
+
386
+ def create_model_endpoint(
387
+ self, project: str, endpoint_id: str, model_endpoint: ModelEndpoint
388
+ ):
389
+ pass
390
+
391
+ def delete_model_endpoint(self, project: str, endpoint_id: str):
392
+ pass
393
+
394
+ def list_model_endpoints(
395
+ self,
396
+ project: str,
397
+ model: Optional[str] = None,
398
+ function: Optional[str] = None,
399
+ labels: List[str] = None,
400
+ start: str = "now-1h",
401
+ end: str = "now",
402
+ metrics: Optional[List[str]] = None,
403
+ ):
404
+ pass
405
+
406
+ def get_model_endpoint(
407
+ self,
408
+ project: str,
409
+ endpoint_id: str,
410
+ start: Optional[str] = None,
411
+ end: Optional[str] = None,
412
+ metrics: Optional[List[str]] = None,
413
+ features: bool = False,
414
+ ):
415
+ pass
416
+
417
+ def patch_model_endpoint(self, project: str, endpoint_id: str, attributes: dict):
418
+ pass
419
+
420
+ def create_marketplace_source(
421
+ self, source: Union[dict, schemas.IndexedMarketplaceSource]
422
+ ):
423
+ pass
424
+
425
+ def store_marketplace_source(
426
+ self, source_name: str, source: Union[dict, schemas.IndexedMarketplaceSource]
427
+ ):
428
+ pass
429
+
430
+ def list_marketplace_sources(self):
431
+ pass
432
+
433
+ def get_marketplace_source(self, source_name: str):
434
+ pass
435
+
436
+ def delete_marketplace_source(self, source_name: str):
437
+ pass
438
+
439
+ def get_marketplace_catalog(
440
+ self,
441
+ source_name: str,
442
+ channel: str = None,
443
+ version: str = None,
444
+ tag: str = None,
445
+ force_refresh: bool = False,
446
+ ):
447
+ pass
448
+
449
+ def get_marketplace_item(
450
+ self,
451
+ source_name: str,
452
+ item_name: str,
453
+ channel: str = "development",
454
+ version: str = None,
455
+ tag: str = "latest",
456
+ force_refresh: bool = False,
457
+ ):
458
+ pass
459
+
460
+ def verify_authorization(
461
+ self, authorization_verification_input: schemas.AuthorizationVerificationInput
462
+ ):
463
+ pass
mlrun/db/sqldb.py CHANGED
@@ -128,6 +128,7 @@ class SQLDB(RunDBInterface):
128
128
  partition_sort_by: Union[schemas.SortField, str] = None,
129
129
  partition_order: Union[schemas.OrderType, str] = schemas.OrderType.desc,
130
130
  max_partitions: int = 0,
131
+ with_notifications: bool = False,
131
132
  ):
132
133
  import mlrun.api.crud
133
134
 
@@ -151,6 +152,7 @@ class SQLDB(RunDBInterface):
151
152
  partition_sort_by,
152
153
  partition_order,
153
154
  max_partitions,
155
+ with_notifications,
154
156
  )
155
157
 
156
158
  def del_run(self, uid, project=None, iter=None):
@@ -394,7 +396,17 @@ class SQLDB(RunDBInterface):
394
396
  name: str,
395
397
  project: mlrun.api.schemas.Project,
396
398
  ) -> mlrun.api.schemas.Project:
397
- raise NotImplementedError()
399
+ import mlrun.api.crud
400
+
401
+ if isinstance(project, dict):
402
+ project = mlrun.api.schemas.Project(**project)
403
+
404
+ return self._transform_db_error(
405
+ mlrun.api.crud.Projects().store_project,
406
+ self.session,
407
+ name=name,
408
+ project=project,
409
+ )
398
410
 
399
411
  def patch_project(
400
412
  self,
@@ -402,20 +414,41 @@ class SQLDB(RunDBInterface):
402
414
  project: dict,
403
415
  patch_mode: mlrun.api.schemas.PatchMode = mlrun.api.schemas.PatchMode.replace,
404
416
  ) -> mlrun.api.schemas.Project:
405
- raise NotImplementedError()
417
+ import mlrun.api.crud
418
+
419
+ return self._transform_db_error(
420
+ mlrun.api.crud.Projects().patch_project,
421
+ self.session,
422
+ name=name,
423
+ project=project,
424
+ patch_mode=patch_mode,
425
+ )
406
426
 
407
427
  def create_project(
408
428
  self,
409
429
  project: mlrun.api.schemas.Project,
410
430
  ) -> mlrun.api.schemas.Project:
411
- raise NotImplementedError()
431
+ import mlrun.api.crud
432
+
433
+ return self._transform_db_error(
434
+ mlrun.api.crud.Projects().create_project,
435
+ self.session,
436
+ project=project,
437
+ )
412
438
 
413
439
  def delete_project(
414
440
  self,
415
441
  name: str,
416
442
  deletion_strategy: mlrun.api.schemas.DeletionStrategy = mlrun.api.schemas.DeletionStrategy.default(),
417
443
  ):
418
- raise NotImplementedError()
444
+ import mlrun.api.crud
445
+
446
+ return self._transform_db_error(
447
+ mlrun.api.crud.Projects().delete_project,
448
+ self.session,
449
+ name=name,
450
+ deletion_strategy=deletion_strategy,
451
+ )
419
452
 
420
453
  def get_project(
421
454
  self, name: str = None, project_id: int = None
@@ -435,7 +468,16 @@ class SQLDB(RunDBInterface):
435
468
  labels: List[str] = None,
436
469
  state: mlrun.api.schemas.ProjectState = None,
437
470
  ) -> mlrun.api.schemas.ProjectsOutput:
438
- raise NotImplementedError()
471
+ import mlrun.api.crud
472
+
473
+ return self._transform_db_error(
474
+ mlrun.api.crud.Projects().list_projects,
475
+ self.session,
476
+ owner=owner,
477
+ format_=format_,
478
+ labels=labels,
479
+ state=state,
480
+ )
439
481
 
440
482
  @staticmethod
441
483
  def _transform_db_error(func, *args, **kwargs):
@@ -839,7 +881,6 @@ class SQLDB(RunDBInterface):
839
881
  def get_marketplace_catalog(
840
882
  self,
841
883
  source_name: str,
842
- channel: str = None,
843
884
  version: str = None,
844
885
  tag: str = None,
845
886
  force_refresh: bool = False,
@@ -850,7 +891,6 @@ class SQLDB(RunDBInterface):
850
891
  self,
851
892
  source_name: str,
852
893
  item_name: str,
853
- channel: str = "development",
854
894
  version: str = None,
855
895
  tag: str = "latest",
856
896
  force_refresh: bool = False,
mlrun/execution.py CHANGED
@@ -80,6 +80,7 @@ class MLClientCtx(object):
80
80
  self._log_level = "info"
81
81
  self._matrics_db = None
82
82
  self._autocommit = autocommit
83
+ self._notifications = []
83
84
 
84
85
  self._labels = {}
85
86
  self._annotations = {}
@@ -299,6 +300,7 @@ class MLClientCtx(object):
299
300
  self.artifact_path = spec.get(run_keys.output_path, self.artifact_path)
300
301
  self._in_path = spec.get(run_keys.input_path, self._in_path)
301
302
  inputs = spec.get(run_keys.inputs)
303
+ self._notifications = spec.get("notifications", self._notifications)
302
304
 
303
305
  self._init_dbs(rundb)
304
306
 
@@ -944,6 +946,7 @@ class MLClientCtx(object):
944
946
  "outputs": self._outputs,
945
947
  run_keys.output_path: self.artifact_path,
946
948
  run_keys.inputs: {k: v.artifact_url for k, v in self._inputs.items()},
949
+ "notifications": self._notifications,
947
950
  },
948
951
  "status": {
949
952
  "results": self._results,
@@ -12,10 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import copy
15
+ import importlib.util
16
+ import pathlib
17
+ import sys
15
18
  import warnings
16
19
  from datetime import datetime
17
- from typing import List, Optional, Union
18
- from urllib.parse import urlparse
20
+ from typing import Any, Dict, List, Optional, Union
19
21
 
20
22
  import pandas as pd
21
23
 
@@ -327,6 +329,21 @@ def _rename_source_dataframe_columns(df):
327
329
  return df
328
330
 
329
331
 
332
+ def _get_namespace(run_config: RunConfig) -> Dict[str, Any]:
333
+ # if running locally, we need to import the file dynamically to get its namespace
334
+ if run_config and run_config.local and run_config.function:
335
+ filename = run_config.function.spec.filename
336
+ if filename:
337
+ module_name = pathlib.Path(filename).name.rsplit(".", maxsplit=1)[0]
338
+ spec = importlib.util.spec_from_file_location(module_name, filename)
339
+ module = importlib.util.module_from_spec(spec)
340
+ sys.modules[module_name] = module
341
+ spec.loader.exec_module(module)
342
+ return vars(__import__(module_name))
343
+ else:
344
+ return get_caller_globals()
345
+
346
+
330
347
  def ingest(
331
348
  featureset: Union[FeatureSet, str] = None,
332
349
  source=None,
@@ -501,7 +518,8 @@ def ingest(
501
518
  featureset.spec.source = source
502
519
  featureset.spec.validate_no_processing_for_passthrough()
503
520
 
504
- namespace = namespace or get_caller_globals()
521
+ if not namespace:
522
+ namespace = _get_namespace(run_config)
505
523
 
506
524
  targets_to_ingest = targets or featureset.spec.targets or get_default_targets()
507
525
  targets_to_ingest = copy.deepcopy(targets_to_ingest)
@@ -846,7 +864,11 @@ def _ingest_with_spark(
846
864
  f"{featureset.metadata.project}-{featureset.metadata.name}"
847
865
  )
848
866
 
849
- spark = pyspark.sql.SparkSession.builder.appName(session_name).getOrCreate()
867
+ spark = (
868
+ pyspark.sql.SparkSession.builder.appName(session_name)
869
+ .config("spark.sql.session.timeZone", "UTC")
870
+ .getOrCreate()
871
+ )
850
872
  created_spark_context = True
851
873
 
852
874
  timestamp_key = featureset.spec.timestamp_key
@@ -877,14 +899,6 @@ def _ingest_with_spark(
877
899
  target.set_resource(featureset)
878
900
  if featureset.spec.passthrough and target.is_offline:
879
901
  continue
880
- if target.path and urlparse(target.path).scheme == "":
881
- if mlrun_context:
882
- mlrun_context.logger.error(
883
- "Paths for spark ingest must contain schema, i.e v3io, s3, az"
884
- )
885
- raise mlrun.errors.MLRunInvalidArgumentError(
886
- "Paths for spark ingest must contain schema, i.e v3io, s3, az"
887
- )
888
902
  spark_options = target.get_spark_options(
889
903
  key_columns, timestamp_key, overwrite
890
904
  )
@@ -218,7 +218,7 @@ class RunConfig:
218
218
  config = RunConfig("mycode.py", image="mlrun/mlrun", requirements=["spacy"])
219
219
 
220
220
  # config for using function object
221
- function = mlrun.import_function("hub://some_function")
221
+ function = mlrun.import_function("hub://some-function")
222
222
  config = RunConfig(function)
223
223
 
224
224
  :param function: this can be function uri or function object or path to function code (.py/.ipynb)