sinapsis-data-tools 0.2.26__tar.gz → 0.2.28__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/PKG-INFO +1 -1
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/__init__.py +7 -5
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates → sinapsis_data_tools-0.2.28/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/inference}/ml_base_inference.py +5 -5
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates → sinapsis_data_tools-0.2.28/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/inference}/sklearn_inference.py +1 -2
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates → sinapsis_data_tools-0.2.28/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/inference}/xgboost_inference.py +1 -2
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates → sinapsis_data_tools-0.2.28/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/training}/ml_base_training.py +32 -28
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates → sinapsis_data_tools-0.2.28/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/training}/sklearn_manifold.py +24 -27
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates → sinapsis_data_tools-0.2.28/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/training}/sklearn_train.py +2 -3
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates → sinapsis_data_tools-0.2.28/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/training}/xgboost_train.py +1 -2
- sinapsis_data_tools-0.2.28/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/transformation/categorical_to_numerical.py +45 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers/csv_datasets.py +3 -2
- sinapsis_data_tools-0.2.28/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers/dataset_splitter.py +66 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers/sklearn_datasets.py +16 -18
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers/sktime_datasets.py +43 -35
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_tools.egg-info/PKG-INFO +1 -1
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_tools.egg-info/SOURCES.txt +12 -7
- sinapsis_data_tools-0.2.28/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/audio_writers/__init__.py +0 -0
- sinapsis_data_tools-0.2.28/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/image_writers/__init__.py +0 -0
- sinapsis_data_tools-0.2.28/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/video_writers/__init__.py +0 -0
- sinapsis_data_tools-0.2.28/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/pyproject.toml +1 -1
- sinapsis_data_tools-0.2.26/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers/dataset_splitter.py +0 -245
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/LICENSE +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/README.md +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_analysis/src → sinapsis_data_tools-0.2.28/packages}/sinapsis_data_analysis/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/helpers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_analysis/src}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_readers/src/sinapsis_data_readers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_analysis/src/sinapsis_data_analysis}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_readers/src/sinapsis_data_readers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_analysis/src/sinapsis_data_analysis}/helpers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/helpers/excluded_models.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/helpers/model_metrics.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/helpers/tags.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/audio_readers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/inference}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/training}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/image_readers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_readers/src/sinapsis_data_readers}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/text_readers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/coco_dataclasses.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/csv_reader.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/file_path_helpers.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/sklearn_dataset_subset.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/sktime_datasets_subset.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/tags.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/text_input_helpers.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/video_readers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/audio_readers}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/audio_readers/audio_reader_pydub.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/audio_readers/audio_reader_soundfile.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/audio_readers/audio_reader_to_bytes.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/audio_readers/base_audio_reader.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/base_file_data_loader.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_visualization/src → sinapsis_data_tools-0.2.28/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_visualization/src/sinapsis_data_visualization → sinapsis_data_tools-0.2.28/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/image_readers}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/image_readers/base_image_folder_data_loader.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/image_readers/coco_dataset_reader.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/image_readers/csv_dataset_reader.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/image_readers/image_folder_reader_cv2.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/image_readers/image_folder_reader_kornia.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/text_readers}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/text_readers/text_input.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_writers/src/sinapsis_data_writers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/video_readers}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/video_readers/base_video_reader.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/video_readers/video_reader_cv2.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/video_readers/video_reader_dali.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/video_readers/video_reader_ffmpeg.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/video_readers/video_reader_torchcodec.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_tools.egg-info/dependency_links.txt +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_tools.egg-info/requires.txt +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_tools.egg-info/top_level.txt +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_writers/src/sinapsis_data_writers/helpers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_visualization/src}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/annotation_writers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_visualization/src/sinapsis_data_visualization}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/audio_writers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/annotation_drawer_tools.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/annotation_drawer_types.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/color_utils.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/detection_utils.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/plot_distributions.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/scikit_pca_analysis.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/tags.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/base_annotation_drawer.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/base_visualization_template.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/bbox_drawer.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/data_distribution_visualization.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/key_points_drawer.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/label_drawer.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/oriented_bbox_drawer.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/segmentation_mask_drawer.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/tabular_data_visualization.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/image_writers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_writers/src/sinapsis_data_writers}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/video_writers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_writers/src/sinapsis_data_writers/helpers}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_writers/src/sinapsis_data_writers/helpers/tags.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers → sinapsis_data_tools-0.2.28/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/annotation_writers}/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/annotation_writers/base_annotation_writer.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/annotation_writers/coco_annotation_writer.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/audio_writers/audio_writer_soundfile.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/generic_data_writers/generic_data_json_writer.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/image_writers/image_saver.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/image_writers/pdf_to_image_converter.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/video_writers/base_video_writer.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/video_writers/video_writer_cv2.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/video_writers/video_writer_ffmpeg.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers/audio_encoder.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers/encode_img_base64.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers/file_downloader.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers/image_color_space_converter_cv.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers/image_color_space_converter_torch.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers/tags.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/templates/__init__.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/templates/image_color_conversion_np.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/templates/mask_non_roi.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/templates/packet_buffer_queue.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/templates/source_history_aggregator.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/templates/text_content_filter.py +0 -0
- {sinapsis_data_tools-0.2.26 → sinapsis_data_tools-0.2.28}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sinapsis-data-tools
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.28
|
|
4
4
|
Summary: Module that contains different packages to perform data-related operations through Sinapsis templates.
|
|
5
5
|
Author-email: SinapsisAI <dev@sinapsis.tech>
|
|
6
6
|
Project-URL: Homepage, https://sinapsis.tech
|
|
@@ -7,13 +7,15 @@ from sinapsis_core.template_base import Template
|
|
|
7
7
|
_root_lib_path: str = "sinapsis_data_analysis.templates"
|
|
8
8
|
|
|
9
9
|
_ADDITIONAL_TEMPLATE_MODULES = [
|
|
10
|
-
f"{_root_lib_path}.sklearn_manifold",
|
|
11
|
-
f"{_root_lib_path}.sklearn_train",
|
|
12
|
-
f"{_root_lib_path}.xgboost_train",
|
|
10
|
+
f"{_root_lib_path}.training.sklearn_manifold",
|
|
11
|
+
f"{_root_lib_path}.training.sklearn_train",
|
|
12
|
+
f"{_root_lib_path}.training.xgboost_train",
|
|
13
|
+
|
|
13
14
|
]
|
|
14
15
|
_template_lookup: dict = {
|
|
15
|
-
"SKLearnInference": f"{_root_lib_path}.sklearn_inference",
|
|
16
|
-
"XGBoostInference": f"{_root_lib_path}.xgboost_inference",
|
|
16
|
+
"SKLearnInference": f"{_root_lib_path}.inference.sklearn_inference",
|
|
17
|
+
"XGBoostInference": f"{_root_lib_path}.inference.xgboost_inference",
|
|
18
|
+
"CategoricalToNumerical": f"{_root_lib_path}.transformation.categorical_to_numerical",
|
|
17
19
|
}
|
|
18
20
|
for t_module in _ADDITIONAL_TEMPLATE_MODULES:
|
|
19
21
|
_template_lookup |= _import_template_package(t_module)
|
|
@@ -7,7 +7,7 @@ import numpy as np
|
|
|
7
7
|
from sinapsis_core.data_containers.data_packet import DataContainer
|
|
8
8
|
from sinapsis_core.template_base.base_models import TemplateAttributes
|
|
9
9
|
from sinapsis_core.template_base.template import Template
|
|
10
|
-
from sinapsis_core.utils.env_var_keys import
|
|
10
|
+
from sinapsis_core.utils.env_var_keys import WORKING_DIR
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class MLBaseInference(Template):
|
|
@@ -26,8 +26,9 @@ class MLBaseInference(Template):
|
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
28
|
model_path: str
|
|
29
|
-
root_dir : str =
|
|
29
|
+
root_dir : str = WORKING_DIR
|
|
30
30
|
generic_field_key: str
|
|
31
|
+
target_key : str
|
|
31
32
|
|
|
32
33
|
def __init__(self, attributes: TemplateAttributes) -> None:
|
|
33
34
|
super().__init__(attributes)
|
|
@@ -42,7 +43,7 @@ class MLBaseInference(Template):
|
|
|
42
43
|
Returns:
|
|
43
44
|
Any: The data from the generic field
|
|
44
45
|
"""
|
|
45
|
-
return
|
|
46
|
+
return container.data_frames[-1].content
|
|
46
47
|
|
|
47
48
|
@staticmethod
|
|
48
49
|
def data_is_valid(data: Any) -> bool:
|
|
@@ -68,7 +69,7 @@ class MLBaseInference(Template):
|
|
|
68
69
|
Any: The preprocessed data
|
|
69
70
|
"""
|
|
70
71
|
try:
|
|
71
|
-
data.pop(
|
|
72
|
+
data.pop(self.attributes.target_key)
|
|
72
73
|
except (KeyError, IndexError):
|
|
73
74
|
self.logger.info("No target column")
|
|
74
75
|
return data
|
|
@@ -95,7 +96,6 @@ class MLBaseInference(Template):
|
|
|
95
96
|
Returns:
|
|
96
97
|
np.ndarray: The model's predictions
|
|
97
98
|
"""
|
|
98
|
-
|
|
99
99
|
return self.model.predict(data)
|
|
100
100
|
|
|
101
101
|
def execute(self, container: DataContainer) -> DataContainer:
|
|
@@ -3,9 +3,8 @@ from typing import Any
|
|
|
3
3
|
|
|
4
4
|
import joblib
|
|
5
5
|
from sinapsis_core.template_base.base_models import UIPropertiesMetadata
|
|
6
|
-
|
|
7
6
|
from sinapsis_data_analysis.helpers.tags import Tags
|
|
8
|
-
from sinapsis_data_analysis.templates.ml_base_inference import MLBaseInference
|
|
7
|
+
from sinapsis_data_analysis.templates.inference.ml_base_inference import MLBaseInference
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
class SKLearnInference(MLBaseInference):
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
from sinapsis_core.template_base.base_models import UIPropertiesMetadata
|
|
5
|
-
|
|
6
5
|
from sinapsis_data_analysis.helpers.tags import Tags
|
|
7
|
-
from sinapsis_data_analysis.templates.sklearn_inference import SKLearnInference
|
|
6
|
+
from sinapsis_data_analysis.templates.inference.sklearn_inference import SKLearnInference
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
class XGBoostInference(SKLearnInference):
|
|
@@ -4,11 +4,14 @@ from abc import abstractmethod
|
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
|
-
from sinapsis_core.data_containers.data_packet import DataContainer
|
|
7
|
+
from sinapsis_core.data_containers.data_packet import DataContainer, DataFramePacket
|
|
8
8
|
from sinapsis_core.template_base.base_models import TemplateAttributes
|
|
9
9
|
from sinapsis_core.template_base.dynamic_template import BaseDynamicWrapperTemplate
|
|
10
10
|
from sinapsis_core.utils.env_var_keys import WORKING_DIR
|
|
11
|
-
from
|
|
11
|
+
from sinapsis_data_analysis.helpers.model_metrics import (
|
|
12
|
+
ModelMetrics,
|
|
13
|
+
ModelPredictionResults,
|
|
14
|
+
)
|
|
12
15
|
from sklearn.base import is_classifier, is_regressor
|
|
13
16
|
from sklearn.metrics import (
|
|
14
17
|
accuracy_score,
|
|
@@ -20,21 +23,14 @@ from sklearn.metrics import (
|
|
|
20
23
|
recall_score,
|
|
21
24
|
)
|
|
22
25
|
|
|
23
|
-
from sinapsis_data_analysis.helpers.model_metrics import (
|
|
24
|
-
ModelMetrics,
|
|
25
|
-
ModelPredictionResults,
|
|
26
|
-
)
|
|
27
|
-
|
|
28
26
|
|
|
29
27
|
class MLBaseAttributes(TemplateAttributes):
|
|
30
28
|
"""Base attributes for machine learning model templates.
|
|
31
29
|
|
|
32
30
|
Attributes:
|
|
33
|
-
generic_field_key (str): Key of the generic field where datasets are stored.
|
|
34
31
|
model_save_path (str): Path where the trained model will be saved.
|
|
35
32
|
"""
|
|
36
33
|
|
|
37
|
-
generic_field_key: str | None = None
|
|
38
34
|
root_dir : str = WORKING_DIR
|
|
39
35
|
model_save_path: str
|
|
40
36
|
|
|
@@ -69,7 +65,9 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
69
65
|
Returns:
|
|
70
66
|
Any: The dataset from the generic field.
|
|
71
67
|
"""
|
|
72
|
-
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
return container.data_frames
|
|
73
71
|
|
|
74
72
|
@staticmethod
|
|
75
73
|
def dataset_is_valid(dataset: Any) -> bool:
|
|
@@ -83,7 +81,7 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
83
81
|
"""
|
|
84
82
|
return dataset is not None
|
|
85
83
|
|
|
86
|
-
def process_dataset(self, dataset:
|
|
84
|
+
def process_dataset(self, dataset: list[DataFramePacket]) -> tuple | None:
|
|
87
85
|
"""
|
|
88
86
|
Extracts x_train, y_train, x_test, y_test from the dataset
|
|
89
87
|
|
|
@@ -94,18 +92,23 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
94
92
|
tuple | None: A tuple containing (x_train, y_train, x_test, y_test)
|
|
95
93
|
or None if the dataset doesn't have the expected attributes
|
|
96
94
|
"""
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
95
|
+
x_train, y_train, x_test, y_test = None, None, None, None
|
|
96
|
+
for set in dataset:
|
|
97
|
+
if "x_train" in set.source:
|
|
98
|
+
x_train = set.content
|
|
99
|
+
elif "x_test" in set.source:
|
|
100
|
+
x_test = set.content
|
|
101
|
+
elif "y_train" in set.source:
|
|
102
|
+
y_train = set.content
|
|
103
|
+
elif "y_test" in set.source:
|
|
104
|
+
y_test = set.content
|
|
105
|
+
else:
|
|
106
|
+
if "x_dataset" in set.source:
|
|
107
|
+
x_train = set.content
|
|
108
|
+
elif "y_dataset" in set.source:
|
|
109
|
+
y_train = set.content
|
|
110
|
+
|
|
111
|
+
return x_train, y_train, x_test, y_test
|
|
109
112
|
|
|
110
113
|
def train_model(self, x_train: Any, y_train: Any) -> None:
|
|
111
114
|
"""Train the model using the training data
|
|
@@ -114,6 +117,7 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
114
117
|
x_train (Any): The training features
|
|
115
118
|
y_train (Any): The training targets
|
|
116
119
|
"""
|
|
120
|
+
|
|
117
121
|
self.trained_model = self.model.fit(x_train, y_train)
|
|
118
122
|
|
|
119
123
|
@staticmethod
|
|
@@ -152,7 +156,7 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
152
156
|
|
|
153
157
|
return metrics
|
|
154
158
|
|
|
155
|
-
def calculate_metrics(self, y_true: np.ndarray, y_pred: np.ndarray) -> ModelMetrics:
|
|
159
|
+
def calculate_metrics(self, y_true: np.ndarray, y_pred: np.ndarray) -> ModelMetrics | None:
|
|
156
160
|
"""
|
|
157
161
|
Detects whether the model is a classifier or regressor and calculates
|
|
158
162
|
the appropriate metrics
|
|
@@ -169,7 +173,7 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
169
173
|
return self.calculate_classification_metrics(y_true, y_pred)
|
|
170
174
|
elif is_regressor(self.trained_model):
|
|
171
175
|
return self.calculate_regression_metrics(y_true, y_pred)
|
|
172
|
-
return
|
|
176
|
+
return None
|
|
173
177
|
|
|
174
178
|
def generate_predictions(self, x_test: np.ndarray, y_test: np.ndarray) -> ModelPredictionResults | None:
|
|
175
179
|
"""
|
|
@@ -183,7 +187,7 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
183
187
|
Returns:
|
|
184
188
|
ModelPredictionResults: Object containing predictions and metrics
|
|
185
189
|
"""
|
|
186
|
-
if self.trained_model is not None:
|
|
190
|
+
if self.trained_model is not None and x_test is not None:
|
|
187
191
|
predictions = self.trained_model.predict(x_test)
|
|
188
192
|
|
|
189
193
|
metrics = self.calculate_metrics(y_test, predictions)
|
|
@@ -204,8 +208,8 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
204
208
|
ModelPredictionResults: Object containing predictions and metrics.
|
|
205
209
|
"""
|
|
206
210
|
x_train, y_train, x_test, y_test = processed_data
|
|
207
|
-
|
|
208
211
|
self.train_model(x_train, y_train)
|
|
212
|
+
|
|
209
213
|
return self.generate_predictions(x_test, y_test)
|
|
210
214
|
|
|
211
215
|
def save_model(self) -> None:
|
|
@@ -221,7 +225,7 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
221
225
|
try:
|
|
222
226
|
os.makedirs(os.path.dirname(full_path), exist_ok=True)
|
|
223
227
|
self._save_model_implementation(full_path)
|
|
224
|
-
self.logger.info(f"Model saved at {
|
|
228
|
+
self.logger.info(f"Model saved at {full_path}")
|
|
225
229
|
except (MemoryError, TypeError) as e:
|
|
226
230
|
self.logger.error(f"Error saving model: {e}")
|
|
227
231
|
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
from typing import cast
|
|
3
2
|
|
|
4
3
|
import numpy as np
|
|
5
4
|
import pandas as pd
|
|
@@ -13,12 +12,8 @@ from sinapsis_core.template_base.dynamic_template import (
|
|
|
13
12
|
)
|
|
14
13
|
from sinapsis_core.template_base.dynamic_template_factory import make_dynamic_template
|
|
15
14
|
from sinapsis_core.utils.env_var_keys import SINAPSIS_BUILD_DOCS
|
|
16
|
-
from sinapsis_data_readers.templates.datasets_readers.dataset_splitter import (
|
|
17
|
-
TabularDatasetSplit,
|
|
18
|
-
)
|
|
19
|
-
from sklearn import manifold
|
|
20
|
-
|
|
21
15
|
from sinapsis_data_analysis.helpers.tags import Tags
|
|
16
|
+
from sklearn import manifold
|
|
22
17
|
|
|
23
18
|
|
|
24
19
|
class ManifoldResults(BaseModel):
|
|
@@ -42,6 +37,14 @@ class SKLearnManifold(BaseDynamicWrapperTemplate):
|
|
|
42
37
|
TSNE, MDS, Isomap, etc.
|
|
43
38
|
"""
|
|
44
39
|
|
|
40
|
+
class AttributesBaseModel(TemplateAttributes):
|
|
41
|
+
"""Attributes for the MLBaseInference template.
|
|
42
|
+
|
|
43
|
+
Attributes:
|
|
44
|
+
target_key (str): Key of the generic field where data is stored.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
target_key : str = 'target'
|
|
45
48
|
WrapperEntry = WrapperEntryConfig(
|
|
46
49
|
wrapped_object=manifold,
|
|
47
50
|
signature_from_doc_string=True,
|
|
@@ -54,15 +57,6 @@ class SKLearnManifold(BaseDynamicWrapperTemplate):
|
|
|
54
57
|
tags=[Tags.DATA_ANALYSIS, Tags.DYNAMIC, Tags.MANIFOLD, Tags.SKLEARN, Tags.MODELS],
|
|
55
58
|
)
|
|
56
59
|
|
|
57
|
-
class AttributesBaseModel(TemplateAttributes):
|
|
58
|
-
"""Attributes for the SKLearnManifold template.
|
|
59
|
-
|
|
60
|
-
Attributes:
|
|
61
|
-
generic_field_key (str): Key of the generic field
|
|
62
|
-
where the input data is stored.
|
|
63
|
-
"""
|
|
64
|
-
|
|
65
|
-
generic_field_key: str
|
|
66
60
|
|
|
67
61
|
def __init__(self, attributes: TemplateAttributes) -> None:
|
|
68
62
|
super().__init__(attributes)
|
|
@@ -83,7 +77,7 @@ class SKLearnManifold(BaseDynamicWrapperTemplate):
|
|
|
83
77
|
array_data = np.array(feature_arrays)
|
|
84
78
|
return array_data.reshape(array_data.shape[0], -1)
|
|
85
79
|
|
|
86
|
-
def get_dataset(self, container: DataContainer) ->
|
|
80
|
+
def get_dataset(self, container: DataContainer) -> list:
|
|
87
81
|
"""Get the dataset from the data container
|
|
88
82
|
|
|
89
83
|
Args:
|
|
@@ -93,13 +87,10 @@ class SKLearnManifold(BaseDynamicWrapperTemplate):
|
|
|
93
87
|
TabularDatasetSplit | None: The dataset from the generic field,
|
|
94
88
|
or None if not found
|
|
95
89
|
"""
|
|
96
|
-
|
|
97
|
-
dataset = cast(TabularDatasetSplit, dataset)
|
|
98
|
-
if dataset:
|
|
99
|
-
return dataset
|
|
100
|
-
return None
|
|
90
|
+
return container.data_frames
|
|
101
91
|
|
|
102
|
-
|
|
92
|
+
|
|
93
|
+
def process_dataset(self, dataset: list) -> ManifoldResults | None:
|
|
103
94
|
"""
|
|
104
95
|
Extracts the training data, reshapes it, and applies the
|
|
105
96
|
manifold learning transformation
|
|
@@ -111,11 +102,17 @@ class SKLearnManifold(BaseDynamicWrapperTemplate):
|
|
|
111
102
|
ManifoldResults | None: Results of the manifold transformation,
|
|
112
103
|
or None if the dataset is empty
|
|
113
104
|
"""
|
|
114
|
-
x_train =
|
|
115
|
-
|
|
105
|
+
x_train, y_train = None, None
|
|
106
|
+
for set in dataset:
|
|
107
|
+
if "x_train" in set.source:
|
|
108
|
+
x_train = set.content
|
|
109
|
+
|
|
110
|
+
elif "y_train" in set.source:
|
|
111
|
+
y_train = set.content
|
|
112
|
+
else:
|
|
113
|
+
y_train = set.content[self.attributes.target_key]
|
|
114
|
+
x_train = set.content.pop(self.attributes.target_key)
|
|
116
115
|
|
|
117
|
-
if x_train is None or x_train.empty:
|
|
118
|
-
return None
|
|
119
116
|
|
|
120
117
|
x_train_reshaped = self.reshape_arrays(x_train)
|
|
121
118
|
x_transformed = self.manifold_model.fit_transform(x_train_reshaped)
|
|
@@ -141,7 +138,7 @@ class SKLearnManifold(BaseDynamicWrapperTemplate):
|
|
|
141
138
|
results = self.process_dataset(dataset)
|
|
142
139
|
|
|
143
140
|
if results is not None:
|
|
144
|
-
self._set_generic_data(container, results)
|
|
141
|
+
self._set_generic_data(container, results.model_dump())
|
|
145
142
|
|
|
146
143
|
return container
|
|
147
144
|
|
|
@@ -5,8 +5,6 @@ from sinapsis_core.template_base.base_models import UIPropertiesMetadata
|
|
|
5
5
|
from sinapsis_core.template_base.dynamic_template import WrapperEntryConfig
|
|
6
6
|
from sinapsis_core.template_base.dynamic_template_factory import make_dynamic_template
|
|
7
7
|
from sinapsis_core.utils.env_var_keys import SINAPSIS_BUILD_DOCS
|
|
8
|
-
from sklearn import linear_model, neighbors, neural_network, svm, tree
|
|
9
|
-
|
|
10
8
|
from sinapsis_data_analysis.helpers.excluded_models import (
|
|
11
9
|
excluded_linear_models,
|
|
12
10
|
excluded_neighbors_models,
|
|
@@ -14,7 +12,8 @@ from sinapsis_data_analysis.helpers.excluded_models import (
|
|
|
14
12
|
excluded_tree_models,
|
|
15
13
|
)
|
|
16
14
|
from sinapsis_data_analysis.helpers.tags import Tags
|
|
17
|
-
from sinapsis_data_analysis.templates.ml_base_training import MLBaseTraining
|
|
15
|
+
from sinapsis_data_analysis.templates.training.ml_base_training import MLBaseTraining
|
|
16
|
+
from sklearn import linear_model, neighbors, neural_network, svm, tree
|
|
18
17
|
|
|
19
18
|
|
|
20
19
|
class SKLearnLinearModelsTrain(MLBaseTraining):
|
|
@@ -5,9 +5,8 @@ from sinapsis_core.template_base.base_models import UIPropertiesMetadata
|
|
|
5
5
|
from sinapsis_core.template_base.dynamic_template import WrapperEntryConfig
|
|
6
6
|
from sinapsis_core.template_base.dynamic_template_factory import make_dynamic_template
|
|
7
7
|
from sinapsis_core.utils.env_var_keys import SINAPSIS_BUILD_DOCS
|
|
8
|
-
|
|
9
8
|
from sinapsis_data_analysis.helpers.tags import Tags
|
|
10
|
-
from sinapsis_data_analysis.templates.sklearn_train import SKLearnLinearModelsTrain
|
|
9
|
+
from sinapsis_data_analysis.templates.training.sklearn_train import SKLearnLinearModelsTrain
|
|
11
10
|
|
|
12
11
|
INCLUDED_MODELS = [
|
|
13
12
|
"XGBClassifier",
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from sinapsis_core.data_containers.data_packet import DataContainer, DataFramePacket
|
|
3
|
+
from sinapsis_core.template_base.base_models import TemplateAttributes
|
|
4
|
+
from sinapsis_core.template_base.template import Template
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CategoricalToNumerical(Template):
|
|
8
|
+
class AttributesBaseModel(TemplateAttributes):
|
|
9
|
+
generic_key: str
|
|
10
|
+
|
|
11
|
+
@staticmethod
|
|
12
|
+
def map_categorical_to_numerical(df: pd.DataFrame| pd.Series)->tuple:
|
|
13
|
+
|
|
14
|
+
if isinstance(df, pd.Series):
|
|
15
|
+
categorical_cols = [df.name] if df.dtype in ["object", "string", "category"] else []
|
|
16
|
+
else:
|
|
17
|
+
categorical_cols = df.select_dtypes(include=["object", "string", "category"]).columns.tolist()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
category_maps = {}
|
|
21
|
+
|
|
22
|
+
for col in categorical_cols:
|
|
23
|
+
categories = df[col].astype("string").unique()
|
|
24
|
+
category_maps[col] = {cat: idx for idx, cat in enumerate(categories)}
|
|
25
|
+
for col, mapping in category_maps.items():
|
|
26
|
+
df[col] = df[col].map(mapping)
|
|
27
|
+
return df, category_maps
|
|
28
|
+
@staticmethod
|
|
29
|
+
def unmap_numerical_to_categorical(df: pd.DataFrame| pd.Series, categories: dict):
|
|
30
|
+
inverse_maps = {col: {v: k for k, v in mapping.items()} for col, mapping in categories.items()}
|
|
31
|
+
|
|
32
|
+
for col in inverse_maps:
|
|
33
|
+
df[col] = df[col].map(inverse_maps[col])
|
|
34
|
+
return df
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def execute(self, container: DataContainer) -> DataContainer:
|
|
38
|
+
for data_frame in container.data_frames:
|
|
39
|
+
|
|
40
|
+
transformed_dataset, labels = self.map_categorical_to_numerical(data_frame.content)
|
|
41
|
+
container.data_frames.append(DataFramePacket(content=transformed_dataset, generic_data = labels))
|
|
42
|
+
|
|
43
|
+
return container
|
|
44
|
+
|
|
45
|
+
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
import os
|
|
3
3
|
|
|
4
|
-
from sinapsis_core.data_containers.data_packet import DataContainer, TimeSeriesPacket
|
|
4
|
+
from sinapsis_core.data_containers.data_packet import DataContainer, DataFramePacket, TimeSeriesPacket
|
|
5
5
|
from sinapsis_core.template_base.base_models import TemplateAttributes, TemplateAttributeType
|
|
6
6
|
from sinapsis_core.template_base.template import Template
|
|
7
7
|
from sinapsis_core.utils.env_var_keys import SINAPSIS_CACHE_DIR
|
|
@@ -25,6 +25,7 @@ class CSVDatasetReader(Template):
|
|
|
25
25
|
packet = TimeSeriesPacket(content=self.csv_file)
|
|
26
26
|
container.time_series.append(packet)
|
|
27
27
|
else:
|
|
28
|
-
|
|
28
|
+
packet = DataFramePacket(content=self.csv_file)
|
|
29
|
+
container.data_frames.append(packet)
|
|
29
30
|
|
|
30
31
|
return container
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from sinapsis_core.data_containers.data_packet import DataContainer, DataFramePacket
|
|
5
|
+
from sinapsis_core.template_base import Template
|
|
6
|
+
from sinapsis_core.template_base.base_models import TemplateAttributes
|
|
7
|
+
from sklearn.model_selection import train_test_split
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CSVDatasetSplitter(Template):
|
|
11
|
+
"""
|
|
12
|
+
Template to split a tabular data set into test and train samples.
|
|
13
|
+
The template retrieves the dataset from the dataframe packet of the
|
|
14
|
+
container and stores the features and targets as new dataframes, with source
|
|
15
|
+
indicating whether train or test samples
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
class AttributesBaseModel(TemplateAttributes):
|
|
19
|
+
target_key: str = "target" # labels
|
|
20
|
+
feature_key: str | None = None # arrays
|
|
21
|
+
random_state: int = 42
|
|
22
|
+
train_size: float = 0.2
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def extract_x_y_from_packet(self, data_frame: DataFramePacket) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
target = data_frame.content.get(self.attributes.target_key)
|
|
31
|
+
feature = data_frame.content.get(self.attributes.feature_key) \
|
|
32
|
+
if self.attributes.feature_key \
|
|
33
|
+
else data_frame.drop(columns=[self.attributes.target_key])
|
|
34
|
+
return feature, target
|
|
35
|
+
|
|
36
|
+
def split_dataset(self, x_data:pd.DataFrame, y_data:pd.DataFrame) -> dict:
|
|
37
|
+
|
|
38
|
+
x_train, x_test, y_train, y_test = train_test_split(
|
|
39
|
+
x_data,
|
|
40
|
+
y_data,
|
|
41
|
+
train_size=self.attributes.train_size,
|
|
42
|
+
test_size=1 - self.attributes.train_size,
|
|
43
|
+
random_state=self.attributes.random_state,
|
|
44
|
+
)
|
|
45
|
+
data_map = {
|
|
46
|
+
"x_train": x_train,
|
|
47
|
+
"y_train": y_train,
|
|
48
|
+
"x_test": x_test,
|
|
49
|
+
"y_test": y_test
|
|
50
|
+
}
|
|
51
|
+
return data_map
|
|
52
|
+
def assign_to_dataframe_packets(self,container: DataContainer, source: str, data: pd.DataFrame) -> DataContainer:
|
|
53
|
+
df_packet = DataFramePacket(content=data, source=f"{self.instance_name}_{source}")
|
|
54
|
+
container.data_frames.append(df_packet)
|
|
55
|
+
return container
|
|
56
|
+
|
|
57
|
+
def execute(self, container: DataContainer) -> DataContainer:
|
|
58
|
+
for df in container.data_frames:
|
|
59
|
+
feature, target = self.extract_x_y_from_packet(df)
|
|
60
|
+
sample = feature, target, None, None
|
|
61
|
+
if self.attributes.train_size:
|
|
62
|
+
sample = self.split_dataset(feature, target)
|
|
63
|
+
for name, data_frame in sample.items():
|
|
64
|
+
container = self.assign_to_dataframe_packets(container, name, data_frame)
|
|
65
|
+
return container
|
|
66
|
+
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
5
|
-
from sinapsis_core.data_containers.data_packet import DataContainer, TimeSeriesPacket
|
|
5
|
+
from sinapsis_core.data_containers.data_packet import DataContainer, DataFramePacket, TimeSeriesPacket
|
|
6
6
|
from sinapsis_core.template_base import Template
|
|
7
7
|
from sinapsis_core.template_base.base_models import TemplateAttributes, TemplateAttributeType, UIPropertiesMetadata
|
|
8
8
|
from sinapsis_core.template_base.dynamic_template import (
|
|
@@ -19,9 +19,6 @@ from sklearn.utils import Bunch
|
|
|
19
19
|
|
|
20
20
|
from sinapsis_data_readers.helpers import sklearn_dataset_subset
|
|
21
21
|
from sinapsis_data_readers.helpers.tags import Tags
|
|
22
|
-
from sinapsis_data_readers.templates.datasets_readers.dataset_splitter import (
|
|
23
|
-
TabularDatasetSplit,
|
|
24
|
-
)
|
|
25
22
|
|
|
26
23
|
TARGET: str = "target"
|
|
27
24
|
|
|
@@ -137,39 +134,40 @@ class SKLearnDatasets(BaseDynamicWrapperTemplate):
|
|
|
137
134
|
results: pd.DataFrame, feature_name_cols: list, target_name_cols: list, n_features: int, split_size: float
|
|
138
135
|
) -> dict:
|
|
139
136
|
"""Method to split the dataset into training and testing samples"""
|
|
140
|
-
if feature_name_cols:
|
|
137
|
+
if feature_name_cols is not None:
|
|
141
138
|
X = results[feature_name_cols]
|
|
142
139
|
y = results[target_name_cols]
|
|
143
140
|
else:
|
|
144
141
|
X = results.iloc[:, :n_features]
|
|
145
142
|
y = results.iloc[:, n_features:]
|
|
146
143
|
|
|
147
|
-
|
|
148
144
|
x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=split_size, random_state=0)
|
|
149
|
-
|
|
150
|
-
x_train
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
y_test
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
return split_data.model_dump()
|
|
145
|
+
data_map = {
|
|
146
|
+
"x_train": x_train,
|
|
147
|
+
"y_train": y_train,
|
|
148
|
+
"x_test": x_test,
|
|
149
|
+
"y_test": y_test
|
|
150
|
+
}
|
|
151
|
+
return data_map
|
|
157
152
|
|
|
158
153
|
def execute(self, container: DataContainer) -> DataContainer:
|
|
159
154
|
sklearn_dataset = self.wrapped_callable.__func__(**self.dataset_attributes.model_dump())
|
|
160
155
|
dataset, feature_columns, target_columns, n_features = self.parse_results(sklearn_dataset)
|
|
156
|
+
|
|
161
157
|
if self.attributes.store_as_time_series:
|
|
162
158
|
time_series_packet = TimeSeriesPacket(content=dataset)
|
|
163
159
|
container.time_series.append(time_series_packet)
|
|
164
160
|
|
|
165
161
|
if self.attributes.split_dataset:
|
|
166
|
-
|
|
162
|
+
data_map = self.split_dataset(
|
|
167
163
|
dataset, feature_columns, target_columns, n_features, split_size=self.attributes.train_size
|
|
168
164
|
)
|
|
169
|
-
self._set_generic_data(container, split_dataset)
|
|
170
|
-
if sklearn_dataset and not self.attributes.split_dataset:
|
|
171
|
-
self._set_generic_data(container, dataset)
|
|
172
165
|
|
|
166
|
+
for name, df in data_map.items():
|
|
167
|
+
container.data_frames.append(DataFramePacket(content=df, source=f"{self.instance_name}_{name}"))
|
|
168
|
+
|
|
169
|
+
if sklearn_dataset and not self.attributes.split_dataset:
|
|
170
|
+
container.data_frames.append(DataFramePacket(content=dataset, source=self.instance_name))
|
|
173
171
|
return container
|
|
174
172
|
|
|
175
173
|
|