mlrun 1.6.0rc7__py3-none-any.whl → 1.6.0rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +27 -27
- mlrun/common/schemas/auth.py +2 -0
- mlrun/config.py +2 -2
- mlrun/datastore/dbfs_store.py +0 -3
- mlrun/datastore/sources.py +12 -2
- mlrun/datastore/targets.py +3 -0
- mlrun/db/httpdb.py +15 -0
- mlrun/feature_store/feature_set.py +5 -2
- mlrun/feature_store/retrieval/spark_merger.py +7 -1
- mlrun/kfpops.py +1 -1
- mlrun/launcher/client.py +1 -6
- mlrun/launcher/remote.py +5 -3
- mlrun/model.py +1 -1
- mlrun/model_monitoring/batch_application.py +48 -85
- mlrun/package/packager.py +115 -89
- mlrun/package/packagers/default_packager.py +66 -65
- mlrun/package/packagers/numpy_packagers.py +109 -62
- mlrun/package/packagers/pandas_packagers.py +12 -23
- mlrun/package/packagers/python_standard_library_packagers.py +35 -57
- mlrun/package/packagers_manager.py +16 -13
- mlrun/package/utils/_pickler.py +8 -18
- mlrun/package/utils/_supported_format.py +1 -1
- mlrun/projects/pipelines.py +11 -6
- mlrun/projects/project.py +11 -4
- mlrun/runtimes/__init__.py +6 -0
- mlrun/runtimes/base.py +8 -0
- mlrun/runtimes/daskjob.py +73 -5
- mlrun/runtimes/local.py +9 -9
- mlrun/runtimes/remotesparkjob.py +1 -0
- mlrun/runtimes/utils.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/METADATA +2 -2
- {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/RECORD +38 -38
- {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/WHEEL +1 -1
- {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/LICENSE +0 -0
- {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/top_level.txt +0 -0
|
@@ -261,8 +261,7 @@ class NumPyNDArrayPackager(DefaultPackager):
|
|
|
261
261
|
# method:
|
|
262
262
|
_ARRAY_SIZE_AS_RESULT = 10
|
|
263
263
|
|
|
264
|
-
|
|
265
|
-
def get_default_packing_artifact_type(cls, obj: np.ndarray) -> str:
|
|
264
|
+
def get_default_packing_artifact_type(self, obj: np.ndarray) -> str:
|
|
266
265
|
"""
|
|
267
266
|
Get the default artifact type. Will be a result if the array size is less than 10, otherwise file.
|
|
268
267
|
|
|
@@ -270,12 +269,11 @@ class NumPyNDArrayPackager(DefaultPackager):
|
|
|
270
269
|
|
|
271
270
|
:return: The default artifact type.
|
|
272
271
|
"""
|
|
273
|
-
if obj.size <
|
|
272
|
+
if obj.size < self._ARRAY_SIZE_AS_RESULT:
|
|
274
273
|
return ArtifactType.RESULT
|
|
275
274
|
return ArtifactType.FILE
|
|
276
275
|
|
|
277
|
-
|
|
278
|
-
def get_default_unpacking_artifact_type(cls, data_item: DataItem) -> str:
|
|
276
|
+
def get_default_unpacking_artifact_type(self, data_item: DataItem) -> str:
|
|
279
277
|
"""
|
|
280
278
|
Get the default artifact type used for unpacking. Returns dataset if the data item represents a
|
|
281
279
|
`DatasetArtifact` and otherwise, file.
|
|
@@ -289,8 +287,7 @@ class NumPyNDArrayPackager(DefaultPackager):
|
|
|
289
287
|
return ArtifactType.DATASET
|
|
290
288
|
return ArtifactType.FILE
|
|
291
289
|
|
|
292
|
-
|
|
293
|
-
def pack_result(cls, obj: np.ndarray, key: str) -> dict:
|
|
290
|
+
def pack_result(self, obj: np.ndarray, key: str) -> dict:
|
|
294
291
|
"""
|
|
295
292
|
Pack an array as a result.
|
|
296
293
|
|
|
@@ -307,9 +304,8 @@ class NumPyNDArrayPackager(DefaultPackager):
|
|
|
307
304
|
|
|
308
305
|
return super().pack_result(obj=obj, key=key)
|
|
309
306
|
|
|
310
|
-
@classmethod
|
|
311
307
|
def pack_file(
|
|
312
|
-
|
|
308
|
+
self,
|
|
313
309
|
obj: np.ndarray,
|
|
314
310
|
key: str,
|
|
315
311
|
file_format: str = DEFAULT_NUMPY_ARRAY_FORMAT,
|
|
@@ -328,19 +324,21 @@ class NumPyNDArrayPackager(DefaultPackager):
|
|
|
328
324
|
# Save to file:
|
|
329
325
|
formatter = NumPySupportedFormat.get_format_handler(fmt=file_format)
|
|
330
326
|
temp_directory = pathlib.Path(tempfile.mkdtemp())
|
|
331
|
-
|
|
327
|
+
self.add_future_clearing_path(path=temp_directory)
|
|
332
328
|
file_path = temp_directory / f"{key}.{file_format}"
|
|
333
329
|
formatter.save(obj=obj, file_path=str(file_path), **save_kwargs)
|
|
334
330
|
|
|
335
|
-
# Create the artifact and instructions:
|
|
331
|
+
# Create the artifact and instructions (Note: only 'npy' format support saving object arrays and that will
|
|
332
|
+
# require pickling, hence we set the required instruction):
|
|
336
333
|
artifact = Artifact(key=key, src_path=os.path.abspath(file_path))
|
|
337
334
|
instructions = {"file_format": file_format}
|
|
335
|
+
if file_format == NumPySupportedFormat.NPY and obj.dtype == np.object_:
|
|
336
|
+
instructions["allow_pickle"] = True
|
|
338
337
|
|
|
339
338
|
return artifact, instructions
|
|
340
339
|
|
|
341
|
-
@classmethod
|
|
342
340
|
def pack_dataset(
|
|
343
|
-
|
|
341
|
+
self,
|
|
344
342
|
obj: np.ndarray,
|
|
345
343
|
key: str,
|
|
346
344
|
file_format: str = "",
|
|
@@ -372,20 +370,22 @@ class NumPyNDArrayPackager(DefaultPackager):
|
|
|
372
370
|
|
|
373
371
|
return artifact, {}
|
|
374
372
|
|
|
375
|
-
|
|
376
|
-
|
|
373
|
+
def unpack_file(
|
|
374
|
+
self, data_item: DataItem, file_format: str = None, allow_pickle: bool = False
|
|
375
|
+
) -> np.ndarray:
|
|
377
376
|
"""
|
|
378
377
|
Unpack a numppy array from file.
|
|
379
378
|
|
|
380
|
-
:param data_item:
|
|
381
|
-
:param file_format:
|
|
382
|
-
|
|
379
|
+
:param data_item: The data item to unpack.
|
|
380
|
+
:param file_format: The file format to use for reading the array. Default is None - will be read by the file
|
|
381
|
+
extension.
|
|
382
|
+
:param allow_pickle: Whether to allow loading pickled arrays in case of object type arrays. Only relevant to
|
|
383
|
+
'npy' format. Default is False for security reasons.
|
|
383
384
|
|
|
384
385
|
:return: The unpacked array.
|
|
385
386
|
"""
|
|
386
387
|
# Get the file:
|
|
387
|
-
file_path =
|
|
388
|
-
cls.add_future_clearing_path(path=file_path)
|
|
388
|
+
file_path = self.get_data_item_local_path(data_item=data_item)
|
|
389
389
|
|
|
390
390
|
# Get the archive format by the file extension if needed:
|
|
391
391
|
if file_format is None:
|
|
@@ -401,12 +401,14 @@ class NumPyNDArrayPackager(DefaultPackager):
|
|
|
401
401
|
|
|
402
402
|
# Read the object:
|
|
403
403
|
formatter = NumPySupportedFormat.get_format_handler(fmt=file_format)
|
|
404
|
-
|
|
404
|
+
load_kwargs = {}
|
|
405
|
+
if file_format == NumPySupportedFormat.NPY:
|
|
406
|
+
load_kwargs["allow_pickle"] = allow_pickle
|
|
407
|
+
obj = formatter.load(file_path=file_path, **load_kwargs)
|
|
405
408
|
|
|
406
409
|
return obj
|
|
407
410
|
|
|
408
|
-
|
|
409
|
-
def unpack_dataset(cls, data_item: DataItem) -> np.ndarray:
|
|
411
|
+
def unpack_dataset(self, data_item: DataItem) -> np.ndarray:
|
|
410
412
|
"""
|
|
411
413
|
Unpack a numppy array from a dataset artifact.
|
|
412
414
|
|
|
@@ -434,9 +436,8 @@ class _NumPyNDArrayCollectionPackager(DefaultPackager):
|
|
|
434
436
|
DEFAULT_UNPACKING_ARTIFACT_TYPE = ArtifactType.FILE
|
|
435
437
|
PRIORITY = 4
|
|
436
438
|
|
|
437
|
-
@classmethod
|
|
438
439
|
def pack_file(
|
|
439
|
-
|
|
440
|
+
self,
|
|
440
441
|
obj: NumPyArrayCollectionType,
|
|
441
442
|
key: str,
|
|
442
443
|
file_format: str = DEFAULT_NUMPPY_ARRAY_COLLECTION_FORMAT,
|
|
@@ -455,31 +456,40 @@ class _NumPyNDArrayCollectionPackager(DefaultPackager):
|
|
|
455
456
|
# Save to file:
|
|
456
457
|
formatter = NumPySupportedFormat.get_format_handler(fmt=file_format)
|
|
457
458
|
temp_directory = pathlib.Path(tempfile.mkdtemp())
|
|
458
|
-
|
|
459
|
+
self.add_future_clearing_path(path=temp_directory)
|
|
459
460
|
file_path = temp_directory / f"{key}.{file_format}"
|
|
460
461
|
formatter.save(obj=obj, file_path=str(file_path), **save_kwargs)
|
|
461
462
|
|
|
462
|
-
# Create the artifact and instructions:
|
|
463
|
+
# Create the artifact and instructions (Note: only 'npz' format support saving object arrays and that will
|
|
464
|
+
# require pickling, hence we set the required instruction):
|
|
463
465
|
artifact = Artifact(key=key, src_path=os.path.abspath(file_path))
|
|
466
|
+
instructions = {"file_format": file_format}
|
|
467
|
+
if file_format == NumPySupportedFormat.NPZ and self._is_any_object_dtype(
|
|
468
|
+
array_collection=obj
|
|
469
|
+
):
|
|
470
|
+
instructions["allow_pickle"] = True
|
|
464
471
|
|
|
465
|
-
return artifact,
|
|
472
|
+
return artifact, instructions
|
|
466
473
|
|
|
467
|
-
@classmethod
|
|
468
474
|
def unpack_file(
|
|
469
|
-
|
|
475
|
+
self,
|
|
476
|
+
data_item: DataItem,
|
|
477
|
+
file_format: str = None,
|
|
478
|
+
allow_pickle: bool = False,
|
|
470
479
|
) -> Dict[str, np.ndarray]:
|
|
471
480
|
"""
|
|
472
481
|
Unpack a numppy array collection from file.
|
|
473
482
|
|
|
474
|
-
:param data_item:
|
|
475
|
-
:param file_format:
|
|
476
|
-
|
|
483
|
+
:param data_item: The data item to unpack.
|
|
484
|
+
:param file_format: The file format to use for reading the array collection. Default is None - will be read by
|
|
485
|
+
the file extension.
|
|
486
|
+
:param allow_pickle: Whether to allow loading pickled arrays in case of object type arrays. Only relevant to
|
|
487
|
+
'npz' format. Default is False for security reasons.
|
|
477
488
|
|
|
478
489
|
:return: The unpacked array collection.
|
|
479
490
|
"""
|
|
480
491
|
# Get the file:
|
|
481
|
-
file_path =
|
|
482
|
-
cls.add_future_clearing_path(path=file_path)
|
|
492
|
+
file_path = self.get_data_item_local_path(data_item=data_item)
|
|
483
493
|
|
|
484
494
|
# Get the archive format by the file extension if needed:
|
|
485
495
|
if file_format is None:
|
|
@@ -495,10 +505,40 @@ class _NumPyNDArrayCollectionPackager(DefaultPackager):
|
|
|
495
505
|
|
|
496
506
|
# Read the object:
|
|
497
507
|
formatter = NumPySupportedFormat.get_format_handler(fmt=file_format)
|
|
498
|
-
|
|
508
|
+
load_kwargs = {}
|
|
509
|
+
if file_format == NumPySupportedFormat.NPZ:
|
|
510
|
+
load_kwargs["allow_pickle"] = allow_pickle
|
|
511
|
+
obj = formatter.load(file_path=file_path, **load_kwargs)
|
|
499
512
|
|
|
500
513
|
return obj
|
|
501
514
|
|
|
515
|
+
@staticmethod
|
|
516
|
+
def _is_any_object_dtype(
|
|
517
|
+
array_collection: Union[np.ndarray, NumPyArrayCollectionType]
|
|
518
|
+
):
|
|
519
|
+
"""
|
|
520
|
+
Check if any of the arrays in a collection is of type `object`.
|
|
521
|
+
|
|
522
|
+
:param array_collection: The collection to check fo `object` dtype.
|
|
523
|
+
|
|
524
|
+
:return: True if at least one array in the collection is an `object` array.
|
|
525
|
+
"""
|
|
526
|
+
if isinstance(array_collection, list):
|
|
527
|
+
return any(
|
|
528
|
+
_NumPyNDArrayCollectionPackager._is_any_object_dtype(
|
|
529
|
+
array_collection=array
|
|
530
|
+
)
|
|
531
|
+
for array in array_collection
|
|
532
|
+
)
|
|
533
|
+
elif isinstance(array_collection, dict):
|
|
534
|
+
return any(
|
|
535
|
+
_NumPyNDArrayCollectionPackager._is_any_object_dtype(
|
|
536
|
+
array_collection=array
|
|
537
|
+
)
|
|
538
|
+
for array in array_collection.values()
|
|
539
|
+
)
|
|
540
|
+
return array_collection.dtype == np.object_
|
|
541
|
+
|
|
502
542
|
|
|
503
543
|
class NumPyNDArrayDictPackager(_NumPyNDArrayCollectionPackager):
|
|
504
544
|
"""
|
|
@@ -507,9 +547,8 @@ class NumPyNDArrayDictPackager(_NumPyNDArrayCollectionPackager):
|
|
|
507
547
|
|
|
508
548
|
PACKABLE_OBJECT_TYPE = Dict[str, np.ndarray]
|
|
509
549
|
|
|
510
|
-
@classmethod
|
|
511
550
|
def is_packable(
|
|
512
|
-
|
|
551
|
+
self, obj: Any, artifact_type: str = None, configurations: dict = None
|
|
513
552
|
) -> bool:
|
|
514
553
|
"""
|
|
515
554
|
Check if the object provided is a dictionary of numpy arrays.
|
|
@@ -531,7 +570,7 @@ class NumPyNDArrayDictPackager(_NumPyNDArrayCollectionPackager):
|
|
|
531
570
|
return False
|
|
532
571
|
|
|
533
572
|
# Check the artifact type is supported:
|
|
534
|
-
if artifact_type and artifact_type not in
|
|
573
|
+
if artifact_type and artifact_type not in self.get_supported_artifact_types():
|
|
535
574
|
return False
|
|
536
575
|
|
|
537
576
|
# Check an edge case where the dictionary is empty (this packager will pack empty dictionaries only if given
|
|
@@ -539,13 +578,12 @@ class NumPyNDArrayDictPackager(_NumPyNDArrayCollectionPackager):
|
|
|
539
578
|
if not obj:
|
|
540
579
|
return (
|
|
541
580
|
configurations.get("file_format", None)
|
|
542
|
-
in NumPySupportedFormat.get_multi_array_formats()
|
|
581
|
+
in NumPySupportedFormat().get_multi_array_formats()
|
|
543
582
|
)
|
|
544
583
|
|
|
545
584
|
return True
|
|
546
585
|
|
|
547
|
-
|
|
548
|
-
def pack_result(cls, obj: Dict[str, np.ndarray], key: str) -> dict:
|
|
586
|
+
def pack_result(self, obj: Dict[str, np.ndarray], key: str) -> dict:
|
|
549
587
|
"""
|
|
550
588
|
Pack a dictionary of numpy arrays as a result.
|
|
551
589
|
|
|
@@ -561,21 +599,27 @@ class NumPyNDArrayDictPackager(_NumPyNDArrayCollectionPackager):
|
|
|
561
599
|
}
|
|
562
600
|
}
|
|
563
601
|
|
|
564
|
-
@classmethod
|
|
565
602
|
def unpack_file(
|
|
566
|
-
|
|
603
|
+
self,
|
|
604
|
+
data_item: DataItem,
|
|
605
|
+
file_format: str = None,
|
|
606
|
+
allow_pickle: bool = False,
|
|
567
607
|
) -> Dict[str, np.ndarray]:
|
|
568
608
|
"""
|
|
569
609
|
Unpack a numppy array dictionary from file.
|
|
570
610
|
|
|
571
|
-
:param data_item:
|
|
572
|
-
:param file_format:
|
|
573
|
-
|
|
611
|
+
:param data_item: The data item to unpack.
|
|
612
|
+
:param file_format: The file format to use for reading the arrays dictionary. Default is None - will be read by
|
|
613
|
+
the file extension.
|
|
614
|
+
:param allow_pickle: Whether to allow loading pickled arrays in case of object type arrays. Only relevant to
|
|
615
|
+
'npz' format. Default is False for security reasons.
|
|
574
616
|
|
|
575
617
|
:return: The unpacked array.
|
|
576
618
|
"""
|
|
577
619
|
# Load the object:
|
|
578
|
-
obj = super().unpack_file(
|
|
620
|
+
obj = super().unpack_file(
|
|
621
|
+
data_item=data_item, file_format=file_format, allow_pickle=allow_pickle
|
|
622
|
+
)
|
|
579
623
|
|
|
580
624
|
# The returned object is a mapping of type NpzFile, so we cast it to a dictionary:
|
|
581
625
|
return {key: array for key, array in obj.items()}
|
|
@@ -588,9 +632,8 @@ class NumPyNDArrayListPackager(_NumPyNDArrayCollectionPackager):
|
|
|
588
632
|
|
|
589
633
|
PACKABLE_OBJECT_TYPE = List[np.ndarray]
|
|
590
634
|
|
|
591
|
-
@classmethod
|
|
592
635
|
def is_packable(
|
|
593
|
-
|
|
636
|
+
self, obj: Any, artifact_type: str = None, configurations: dict = None
|
|
594
637
|
) -> bool:
|
|
595
638
|
"""
|
|
596
639
|
Check if the object provided is a list of numpy arrays.
|
|
@@ -609,7 +652,7 @@ class NumPyNDArrayListPackager(_NumPyNDArrayCollectionPackager):
|
|
|
609
652
|
return False
|
|
610
653
|
|
|
611
654
|
# Check the artifact type is supported:
|
|
612
|
-
if artifact_type and artifact_type not in
|
|
655
|
+
if artifact_type and artifact_type not in self.get_supported_artifact_types():
|
|
613
656
|
return False
|
|
614
657
|
|
|
615
658
|
# Check an edge case where the list is empty (this packager will pack empty lists only if given specific file
|
|
@@ -617,13 +660,12 @@ class NumPyNDArrayListPackager(_NumPyNDArrayCollectionPackager):
|
|
|
617
660
|
if not obj:
|
|
618
661
|
return (
|
|
619
662
|
configurations.get("file_format", None)
|
|
620
|
-
in NumPySupportedFormat.get_multi_array_formats()
|
|
663
|
+
in NumPySupportedFormat().get_multi_array_formats()
|
|
621
664
|
)
|
|
622
665
|
|
|
623
666
|
return True
|
|
624
667
|
|
|
625
|
-
|
|
626
|
-
def pack_result(cls, obj: List[np.ndarray], key: str) -> dict:
|
|
668
|
+
def pack_result(self, obj: List[np.ndarray], key: str) -> dict:
|
|
627
669
|
"""
|
|
628
670
|
Pack a list of numpy arrays as a result.
|
|
629
671
|
|
|
@@ -634,21 +676,27 @@ class NumPyNDArrayListPackager(_NumPyNDArrayCollectionPackager):
|
|
|
634
676
|
"""
|
|
635
677
|
return {key: [array.tolist() for array in obj]}
|
|
636
678
|
|
|
637
|
-
@classmethod
|
|
638
679
|
def unpack_file(
|
|
639
|
-
|
|
680
|
+
self,
|
|
681
|
+
data_item: DataItem,
|
|
682
|
+
file_format: str = None,
|
|
683
|
+
allow_pickle: bool = False,
|
|
640
684
|
) -> List[np.ndarray]:
|
|
641
685
|
"""
|
|
642
686
|
Unpack a numppy array list from file.
|
|
643
687
|
|
|
644
|
-
:param data_item:
|
|
645
|
-
:param file_format:
|
|
646
|
-
|
|
688
|
+
:param data_item: The data item to unpack.
|
|
689
|
+
:param file_format: The file format to use for reading the arrays list. Default is None - will be read by the
|
|
690
|
+
file extension.
|
|
691
|
+
:param allow_pickle: Whether to allow loading pickled arrays in case of object type arrays. Only relevant to
|
|
692
|
+
'npz' format. Default is False for security reasons.
|
|
647
693
|
|
|
648
694
|
:return: The unpacked array.
|
|
649
695
|
"""
|
|
650
696
|
# Load the object:
|
|
651
|
-
obj = super().unpack_file(
|
|
697
|
+
obj = super().unpack_file(
|
|
698
|
+
data_item=data_item, file_format=file_format, allow_pickle=allow_pickle
|
|
699
|
+
)
|
|
652
700
|
|
|
653
701
|
# The returned object is a mapping of type NpzFile, so we cast it to a list:
|
|
654
702
|
return list(obj.values())
|
|
@@ -663,8 +711,7 @@ class NumPyNumberPackager(DefaultPackager):
|
|
|
663
711
|
DEFAULT_PACKING_ARTIFACT_TYPE = ArtifactType.RESULT
|
|
664
712
|
PACK_SUBCLASSES = True # To include all dtypes ('float32', 'uint8', ...)
|
|
665
713
|
|
|
666
|
-
|
|
667
|
-
def pack_result(cls, obj: np.number, key: str) -> dict:
|
|
714
|
+
def pack_result(self, obj: np.number, key: str) -> dict:
|
|
668
715
|
"""
|
|
669
716
|
Pack a numpy number as a result.
|
|
670
717
|
|
|
@@ -682,8 +682,7 @@ class PandasDataFramePackager(DefaultPackager):
|
|
|
682
682
|
PACKABLE_OBJECT_TYPE = pd.DataFrame
|
|
683
683
|
DEFAULT_PACKING_ARTIFACT_TYPE = ArtifactType.DATASET
|
|
684
684
|
|
|
685
|
-
|
|
686
|
-
def get_default_unpacking_artifact_type(cls, data_item: DataItem) -> str:
|
|
685
|
+
def get_default_unpacking_artifact_type(self, data_item: DataItem) -> str:
|
|
687
686
|
"""
|
|
688
687
|
Get the default artifact type used for unpacking. Returns dataset if the data item represents a
|
|
689
688
|
`DatasetArtifact` and otherwise, file.
|
|
@@ -697,8 +696,7 @@ class PandasDataFramePackager(DefaultPackager):
|
|
|
697
696
|
return ArtifactType.DATASET
|
|
698
697
|
return ArtifactType.FILE
|
|
699
698
|
|
|
700
|
-
|
|
701
|
-
def pack_result(cls, obj: pd.DataFrame, key: str) -> dict:
|
|
699
|
+
def pack_result(self, obj: pd.DataFrame, key: str) -> dict:
|
|
702
700
|
"""
|
|
703
701
|
Pack a dataframe as a result.
|
|
704
702
|
|
|
@@ -728,9 +726,8 @@ class PandasDataFramePackager(DefaultPackager):
|
|
|
728
726
|
|
|
729
727
|
return super().pack_result(obj=dataframe_dictionary, key=key)
|
|
730
728
|
|
|
731
|
-
@classmethod
|
|
732
729
|
def pack_file(
|
|
733
|
-
|
|
730
|
+
self,
|
|
734
731
|
obj: pd.DataFrame,
|
|
735
732
|
key: str,
|
|
736
733
|
file_format: str = None,
|
|
@@ -762,7 +759,7 @@ class PandasDataFramePackager(DefaultPackager):
|
|
|
762
759
|
# Save to file:
|
|
763
760
|
formatter = PandasSupportedFormat.get_format_handler(fmt=file_format)
|
|
764
761
|
temp_directory = pathlib.Path(tempfile.mkdtemp())
|
|
765
|
-
|
|
762
|
+
self.add_future_clearing_path(path=temp_directory)
|
|
766
763
|
file_path = temp_directory / f"{key}.{file_format}"
|
|
767
764
|
read_kwargs = formatter.to(
|
|
768
765
|
obj=obj, file_path=str(file_path), flatten=flatten, **to_kwargs
|
|
@@ -773,8 +770,7 @@ class PandasDataFramePackager(DefaultPackager):
|
|
|
773
770
|
|
|
774
771
|
return artifact, {"file_format": file_format, "read_kwargs": read_kwargs}
|
|
775
772
|
|
|
776
|
-
|
|
777
|
-
def pack_dataset(cls, obj: pd.DataFrame, key: str, file_format: str = "parquet"):
|
|
773
|
+
def pack_dataset(self, obj: pd.DataFrame, key: str, file_format: str = "parquet"):
|
|
778
774
|
"""
|
|
779
775
|
Pack a pandas dataframe as a dataset.
|
|
780
776
|
|
|
@@ -786,9 +782,8 @@ class PandasDataFramePackager(DefaultPackager):
|
|
|
786
782
|
"""
|
|
787
783
|
return DatasetArtifact(key=key, df=obj, format=file_format), {}
|
|
788
784
|
|
|
789
|
-
@classmethod
|
|
790
785
|
def unpack_file(
|
|
791
|
-
|
|
786
|
+
self,
|
|
792
787
|
data_item: DataItem,
|
|
793
788
|
file_format: str = None,
|
|
794
789
|
read_kwargs: dict = None,
|
|
@@ -804,8 +799,7 @@ class PandasDataFramePackager(DefaultPackager):
|
|
|
804
799
|
:return: The unpacked series.
|
|
805
800
|
"""
|
|
806
801
|
# Get the file:
|
|
807
|
-
file_path =
|
|
808
|
-
cls.add_future_clearing_path(path=file_path)
|
|
802
|
+
file_path = self.get_data_item_local_path(data_item=data_item)
|
|
809
803
|
|
|
810
804
|
# Get the archive format by the file extension if needed:
|
|
811
805
|
if file_format is None:
|
|
@@ -822,8 +816,7 @@ class PandasDataFramePackager(DefaultPackager):
|
|
|
822
816
|
read_kwargs = {}
|
|
823
817
|
return formatter.read(file_path=file_path, **read_kwargs)
|
|
824
818
|
|
|
825
|
-
|
|
826
|
-
def unpack_dataset(cls, data_item: DataItem):
|
|
819
|
+
def unpack_dataset(self, data_item: DataItem):
|
|
827
820
|
"""
|
|
828
821
|
Unpack a padnas dataframe from a dataset artifact.
|
|
829
822
|
|
|
@@ -864,8 +857,7 @@ class PandasSeriesPackager(PandasDataFramePackager):
|
|
|
864
857
|
PACKABLE_OBJECT_TYPE = pd.Series
|
|
865
858
|
DEFAULT_PACKING_ARTIFACT_TYPE = ArtifactType.FILE
|
|
866
859
|
|
|
867
|
-
|
|
868
|
-
def get_supported_artifact_types(cls) -> List[str]:
|
|
860
|
+
def get_supported_artifact_types(self) -> List[str]:
|
|
869
861
|
"""
|
|
870
862
|
Get all the supported artifact types on this packager. It will be the same as `PandasDataFramePackager` but
|
|
871
863
|
without the 'dataset' artifact type support.
|
|
@@ -876,8 +868,7 @@ class PandasSeriesPackager(PandasDataFramePackager):
|
|
|
876
868
|
supported_artifacts.remove("dataset")
|
|
877
869
|
return supported_artifacts
|
|
878
870
|
|
|
879
|
-
|
|
880
|
-
def pack_result(cls, obj: pd.Series, key: str) -> dict:
|
|
871
|
+
def pack_result(self, obj: pd.Series, key: str) -> dict:
|
|
881
872
|
"""
|
|
882
873
|
Pack a series as a result.
|
|
883
874
|
|
|
@@ -888,9 +879,8 @@ class PandasSeriesPackager(PandasDataFramePackager):
|
|
|
888
879
|
"""
|
|
889
880
|
return super().pack_result(obj=pd.DataFrame(obj), key=key)
|
|
890
881
|
|
|
891
|
-
@classmethod
|
|
892
882
|
def pack_file(
|
|
893
|
-
|
|
883
|
+
self,
|
|
894
884
|
obj: pd.Series,
|
|
895
885
|
key: str,
|
|
896
886
|
file_format: str = None,
|
|
@@ -926,9 +916,8 @@ class PandasSeriesPackager(PandasDataFramePackager):
|
|
|
926
916
|
# Return the artifact with the updated instructions:
|
|
927
917
|
return artifact, {**instructions, "column_name": column_name}
|
|
928
918
|
|
|
929
|
-
@classmethod
|
|
930
919
|
def unpack_file(
|
|
931
|
-
|
|
920
|
+
self,
|
|
932
921
|
data_item: DataItem,
|
|
933
922
|
file_format: str = None,
|
|
934
923
|
read_kwargs: dict = None,
|