sinapsis-data-tools 0.2.27__tar.gz → 0.2.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/PKG-INFO +1 -1
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/helpers/excluded_models.py +13 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/inference/ml_base_inference.py +3 -4
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/inference/sklearn_inference.py +0 -1
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/inference/xgboost_inference.py +0 -1
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/training/ml_base_training.py +31 -31
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/training/sklearn_manifold.py +24 -27
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/training/sklearn_train.py +43 -2
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/training/xgboost_train.py +0 -1
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/transformation/categorical_to_numerical.py +8 -8
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/__init__.py +1 -2
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers/csv_datasets.py +3 -2
- sinapsis_data_tools-0.2.29/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers/dataset_splitter.py +69 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers/sklearn_datasets.py +16 -17
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers/sktime_datasets.py +43 -40
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_tools.egg-info/PKG-INFO +1 -1
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/pyproject.toml +1 -1
- sinapsis_data_tools-0.2.27/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers/dataset_splitter.py +0 -254
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/LICENSE +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/README.md +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/helpers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/helpers/model_metrics.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/helpers/tags.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/inference/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_analysis/src/sinapsis_data_analysis/templates/training/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/coco_dataclasses.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/csv_reader.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/file_path_helpers.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/sklearn_dataset_subset.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/sktime_datasets_subset.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/tags.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/helpers/text_input_helpers.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/audio_readers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/audio_readers/audio_reader_pydub.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/audio_readers/audio_reader_soundfile.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/audio_readers/audio_reader_to_bytes.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/audio_readers/base_audio_reader.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/base_file_data_loader.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/datasets_readers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/image_readers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/image_readers/base_image_folder_data_loader.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/image_readers/coco_dataset_reader.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/image_readers/csv_dataset_reader.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/image_readers/image_folder_reader_cv2.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/image_readers/image_folder_reader_kornia.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/text_readers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/text_readers/text_input.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/video_readers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/video_readers/base_video_reader.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/video_readers/video_reader_cv2.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/video_readers/video_reader_dali.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/video_readers/video_reader_ffmpeg.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_readers/src/sinapsis_data_readers/templates/video_readers/video_reader_torchcodec.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_tools.egg-info/SOURCES.txt +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_tools.egg-info/dependency_links.txt +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_tools.egg-info/requires.txt +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_tools.egg-info/top_level.txt +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/annotation_drawer_tools.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/annotation_drawer_types.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/color_utils.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/detection_utils.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/plot_distributions.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/scikit_pca_analysis.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/helpers/tags.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/base_annotation_drawer.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/base_visualization_template.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/bbox_drawer.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/data_distribution_visualization.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/key_points_drawer.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/label_drawer.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/oriented_bbox_drawer.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/segmentation_mask_drawer.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_visualization/src/sinapsis_data_visualization/templates/tabular_data_visualization.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/helpers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/helpers/tags.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/annotation_writers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/annotation_writers/base_annotation_writer.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/annotation_writers/coco_annotation_writer.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/audio_writers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/audio_writers/audio_writer_soundfile.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/generic_data_writers/generic_data_json_writer.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/image_writers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/image_writers/image_saver.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/image_writers/pdf_to_image_converter.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/video_writers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/video_writers/base_video_writer.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/video_writers/video_writer_cv2.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_data_writers/src/sinapsis_data_writers/templates/video_writers/video_writer_ffmpeg.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers/audio_encoder.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers/encode_img_base64.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers/file_downloader.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers/image_color_space_converter_cv.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers/image_color_space_converter_torch.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/helpers/tags.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/templates/__init__.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/templates/image_color_conversion_np.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/templates/mask_non_roi.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/templates/packet_buffer_queue.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/templates/source_history_aggregator.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/packages/sinapsis_generic_data_tools/src/sinapsis_generic_data_tools/templates/text_content_filter.py +0 -0
- {sinapsis_data_tools-0.2.27 → sinapsis_data_tools-0.2.29}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sinapsis-data-tools
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.29
|
|
4
4
|
Summary: Module that contains different packages to perform data-related operations through Sinapsis templates.
|
|
5
5
|
Author-email: SinapsisAI <dev@sinapsis.tech>
|
|
6
6
|
Project-URL: Homepage, https://sinapsis.tech
|
|
@@ -25,3 +25,16 @@ excluded_neighbors_models = [
|
|
|
25
25
|
]
|
|
26
26
|
excluded_tree_models = ["plot_tree", "export_text", "export_graphviz", "BaseDecisionTree"]
|
|
27
27
|
excluded_svm_models = ["l1_min_c"]
|
|
28
|
+
excluded_cluster_models = ["affinity_propagation",
|
|
29
|
+
"cluster_optics_dbscan",
|
|
30
|
+
"cluster_optics_xi",
|
|
31
|
+
"compute_optics_graph",
|
|
32
|
+
"dbscan",
|
|
33
|
+
"estimate_bandwidth",
|
|
34
|
+
"get_bin_seeds",
|
|
35
|
+
"k_means",
|
|
36
|
+
"kmeans_plusplus",
|
|
37
|
+
"linkage_tree",
|
|
38
|
+
"mean_shift",
|
|
39
|
+
"spectral_clustering",
|
|
40
|
+
"ward_tree"]
|
|
@@ -7,7 +7,7 @@ import numpy as np
|
|
|
7
7
|
from sinapsis_core.data_containers.data_packet import DataContainer
|
|
8
8
|
from sinapsis_core.template_base.base_models import TemplateAttributes
|
|
9
9
|
from sinapsis_core.template_base.template import Template
|
|
10
|
-
from sinapsis_core.utils.env_var_keys import
|
|
10
|
+
from sinapsis_core.utils.env_var_keys import WORKING_DIR
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class MLBaseInference(Template):
|
|
@@ -26,7 +26,7 @@ class MLBaseInference(Template):
|
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
28
|
model_path: str
|
|
29
|
-
root_dir : str =
|
|
29
|
+
root_dir : str = WORKING_DIR
|
|
30
30
|
generic_field_key: str
|
|
31
31
|
target_key : str
|
|
32
32
|
|
|
@@ -43,7 +43,7 @@ class MLBaseInference(Template):
|
|
|
43
43
|
Returns:
|
|
44
44
|
Any: The data from the generic field
|
|
45
45
|
"""
|
|
46
|
-
return
|
|
46
|
+
return container.data_frames[-1].content
|
|
47
47
|
|
|
48
48
|
@staticmethod
|
|
49
49
|
def data_is_valid(data: Any) -> bool:
|
|
@@ -96,7 +96,6 @@ class MLBaseInference(Template):
|
|
|
96
96
|
Returns:
|
|
97
97
|
np.ndarray: The model's predictions
|
|
98
98
|
"""
|
|
99
|
-
|
|
100
99
|
return self.model.predict(data)
|
|
101
100
|
|
|
102
101
|
def execute(self, container: DataContainer) -> DataContainer:
|
|
@@ -4,12 +4,14 @@ from abc import abstractmethod
|
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
|
-
import
|
|
8
|
-
from sinapsis_core.data_containers.data_packet import DataContainer
|
|
7
|
+
from sinapsis_core.data_containers.data_packet import DataContainer, DataFramePacket
|
|
9
8
|
from sinapsis_core.template_base.base_models import TemplateAttributes
|
|
10
9
|
from sinapsis_core.template_base.dynamic_template import BaseDynamicWrapperTemplate
|
|
11
10
|
from sinapsis_core.utils.env_var_keys import WORKING_DIR
|
|
12
|
-
from
|
|
11
|
+
from sinapsis_data_analysis.helpers.model_metrics import (
|
|
12
|
+
ModelMetrics,
|
|
13
|
+
ModelPredictionResults,
|
|
14
|
+
)
|
|
13
15
|
from sklearn.base import is_classifier, is_regressor
|
|
14
16
|
from sklearn.metrics import (
|
|
15
17
|
accuracy_score,
|
|
@@ -21,21 +23,14 @@ from sklearn.metrics import (
|
|
|
21
23
|
recall_score,
|
|
22
24
|
)
|
|
23
25
|
|
|
24
|
-
from sinapsis_data_analysis.helpers.model_metrics import (
|
|
25
|
-
ModelMetrics,
|
|
26
|
-
ModelPredictionResults,
|
|
27
|
-
)
|
|
28
|
-
|
|
29
26
|
|
|
30
27
|
class MLBaseAttributes(TemplateAttributes):
|
|
31
28
|
"""Base attributes for machine learning model templates.
|
|
32
29
|
|
|
33
30
|
Attributes:
|
|
34
|
-
generic_field_key (str): Key of the generic field where datasets are stored.
|
|
35
31
|
model_save_path (str): Path where the trained model will be saved.
|
|
36
32
|
"""
|
|
37
33
|
|
|
38
|
-
generic_field_key: str | None = None
|
|
39
34
|
root_dir : str = WORKING_DIR
|
|
40
35
|
model_save_path: str
|
|
41
36
|
|
|
@@ -70,7 +65,9 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
70
65
|
Returns:
|
|
71
66
|
Any: The dataset from the generic field.
|
|
72
67
|
"""
|
|
73
|
-
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
return container.data_frames
|
|
74
71
|
|
|
75
72
|
@staticmethod
|
|
76
73
|
def dataset_is_valid(dataset: Any) -> bool:
|
|
@@ -84,7 +81,7 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
84
81
|
"""
|
|
85
82
|
return dataset is not None
|
|
86
83
|
|
|
87
|
-
def process_dataset(self, dataset:
|
|
84
|
+
def process_dataset(self, dataset: list[DataFramePacket]) -> tuple | None:
|
|
88
85
|
"""
|
|
89
86
|
Extracts x_train, y_train, x_test, y_test from the dataset
|
|
90
87
|
|
|
@@ -95,19 +92,24 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
95
92
|
tuple | None: A tuple containing (x_train, y_train, x_test, y_test)
|
|
96
93
|
or None if the dataset doesn't have the expected attributes
|
|
97
94
|
"""
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
95
|
+
x_train, y_train, x_test, y_test = None, None, None, None
|
|
96
|
+
for data_set in dataset:
|
|
97
|
+
if data_set.source:
|
|
98
|
+
if "x_train" in data_set.source:
|
|
99
|
+
x_train = data_set.content
|
|
100
|
+
elif "x_test" in data_set.source:
|
|
101
|
+
x_test = data_set.content
|
|
102
|
+
elif "y_train" in data_set.source:
|
|
103
|
+
y_train = data_set.content
|
|
104
|
+
elif "y_test" in data_set.source:
|
|
105
|
+
y_test = data_set.content
|
|
106
|
+
else:
|
|
107
|
+
if "x_dataset" in data_set.source:
|
|
108
|
+
x_train = data_set.content
|
|
109
|
+
elif "y_dataset" in data_set.source:
|
|
110
|
+
y_train = data_set.content
|
|
111
|
+
|
|
112
|
+
return x_train, y_train, x_test, y_test
|
|
111
113
|
|
|
112
114
|
def train_model(self, x_train: Any, y_train: Any) -> None:
|
|
113
115
|
"""Train the model using the training data
|
|
@@ -117,7 +119,6 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
117
119
|
y_train (Any): The training targets
|
|
118
120
|
"""
|
|
119
121
|
|
|
120
|
-
|
|
121
122
|
self.trained_model = self.model.fit(x_train, y_train)
|
|
122
123
|
|
|
123
124
|
@staticmethod
|
|
@@ -156,7 +157,7 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
156
157
|
|
|
157
158
|
return metrics
|
|
158
159
|
|
|
159
|
-
def calculate_metrics(self, y_true: np.ndarray, y_pred: np.ndarray) -> ModelMetrics:
|
|
160
|
+
def calculate_metrics(self, y_true: np.ndarray, y_pred: np.ndarray) -> ModelMetrics | None:
|
|
160
161
|
"""
|
|
161
162
|
Detects whether the model is a classifier or regressor and calculates
|
|
162
163
|
the appropriate metrics
|
|
@@ -173,7 +174,7 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
173
174
|
return self.calculate_classification_metrics(y_true, y_pred)
|
|
174
175
|
elif is_regressor(self.trained_model):
|
|
175
176
|
return self.calculate_regression_metrics(y_true, y_pred)
|
|
176
|
-
return
|
|
177
|
+
return None
|
|
177
178
|
|
|
178
179
|
def generate_predictions(self, x_test: np.ndarray, y_test: np.ndarray) -> ModelPredictionResults | None:
|
|
179
180
|
"""
|
|
@@ -187,7 +188,7 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
187
188
|
Returns:
|
|
188
189
|
ModelPredictionResults: Object containing predictions and metrics
|
|
189
190
|
"""
|
|
190
|
-
if self.trained_model is not None:
|
|
191
|
+
if self.trained_model is not None and x_test is not None:
|
|
191
192
|
predictions = self.trained_model.predict(x_test)
|
|
192
193
|
|
|
193
194
|
metrics = self.calculate_metrics(y_test, predictions)
|
|
@@ -208,7 +209,6 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
208
209
|
ModelPredictionResults: Object containing predictions and metrics.
|
|
209
210
|
"""
|
|
210
211
|
x_train, y_train, x_test, y_test = processed_data
|
|
211
|
-
|
|
212
212
|
self.train_model(x_train, y_train)
|
|
213
213
|
|
|
214
214
|
return self.generate_predictions(x_test, y_test)
|
|
@@ -226,7 +226,7 @@ class MLBaseTraining(BaseDynamicWrapperTemplate):
|
|
|
226
226
|
try:
|
|
227
227
|
os.makedirs(os.path.dirname(full_path), exist_ok=True)
|
|
228
228
|
self._save_model_implementation(full_path)
|
|
229
|
-
self.logger.info(f"Model saved at {
|
|
229
|
+
self.logger.info(f"Model saved at {full_path}")
|
|
230
230
|
except (MemoryError, TypeError) as e:
|
|
231
231
|
self.logger.error(f"Error saving model: {e}")
|
|
232
232
|
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
from typing import cast
|
|
3
2
|
|
|
4
3
|
import numpy as np
|
|
5
4
|
import pandas as pd
|
|
@@ -13,12 +12,8 @@ from sinapsis_core.template_base.dynamic_template import (
|
|
|
13
12
|
)
|
|
14
13
|
from sinapsis_core.template_base.dynamic_template_factory import make_dynamic_template
|
|
15
14
|
from sinapsis_core.utils.env_var_keys import SINAPSIS_BUILD_DOCS
|
|
16
|
-
from sinapsis_data_readers.templates.datasets_readers.dataset_splitter import (
|
|
17
|
-
TabularDatasetSplit,
|
|
18
|
-
)
|
|
19
|
-
from sklearn import manifold
|
|
20
|
-
|
|
21
15
|
from sinapsis_data_analysis.helpers.tags import Tags
|
|
16
|
+
from sklearn import manifold
|
|
22
17
|
|
|
23
18
|
|
|
24
19
|
class ManifoldResults(BaseModel):
|
|
@@ -42,6 +37,14 @@ class SKLearnManifold(BaseDynamicWrapperTemplate):
|
|
|
42
37
|
TSNE, MDS, Isomap, etc.
|
|
43
38
|
"""
|
|
44
39
|
|
|
40
|
+
class AttributesBaseModel(TemplateAttributes):
|
|
41
|
+
"""Attributes for the MLBaseInference template.
|
|
42
|
+
|
|
43
|
+
Attributes:
|
|
44
|
+
target_key (str): Key of the generic field where data is stored.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
target_key : str = 'target'
|
|
45
48
|
WrapperEntry = WrapperEntryConfig(
|
|
46
49
|
wrapped_object=manifold,
|
|
47
50
|
signature_from_doc_string=True,
|
|
@@ -54,15 +57,6 @@ class SKLearnManifold(BaseDynamicWrapperTemplate):
|
|
|
54
57
|
tags=[Tags.DATA_ANALYSIS, Tags.DYNAMIC, Tags.MANIFOLD, Tags.SKLEARN, Tags.MODELS],
|
|
55
58
|
)
|
|
56
59
|
|
|
57
|
-
class AttributesBaseModel(TemplateAttributes):
|
|
58
|
-
"""Attributes for the SKLearnManifold template.
|
|
59
|
-
|
|
60
|
-
Attributes:
|
|
61
|
-
generic_field_key (str): Key of the generic field
|
|
62
|
-
where the input data is stored.
|
|
63
|
-
"""
|
|
64
|
-
|
|
65
|
-
generic_field_key: str
|
|
66
60
|
|
|
67
61
|
def __init__(self, attributes: TemplateAttributes) -> None:
|
|
68
62
|
super().__init__(attributes)
|
|
@@ -83,7 +77,7 @@ class SKLearnManifold(BaseDynamicWrapperTemplate):
|
|
|
83
77
|
array_data = np.array(feature_arrays)
|
|
84
78
|
return array_data.reshape(array_data.shape[0], -1)
|
|
85
79
|
|
|
86
|
-
def get_dataset(self, container: DataContainer) ->
|
|
80
|
+
def get_dataset(self, container: DataContainer) -> list:
|
|
87
81
|
"""Get the dataset from the data container
|
|
88
82
|
|
|
89
83
|
Args:
|
|
@@ -93,13 +87,10 @@ class SKLearnManifold(BaseDynamicWrapperTemplate):
|
|
|
93
87
|
TabularDatasetSplit | None: The dataset from the generic field,
|
|
94
88
|
or None if not found
|
|
95
89
|
"""
|
|
96
|
-
|
|
97
|
-
dataset = cast(TabularDatasetSplit, dataset)
|
|
98
|
-
if dataset:
|
|
99
|
-
return dataset
|
|
100
|
-
return None
|
|
90
|
+
return container.data_frames
|
|
101
91
|
|
|
102
|
-
|
|
92
|
+
|
|
93
|
+
def process_dataset(self, dataset: list) -> ManifoldResults | None:
|
|
103
94
|
"""
|
|
104
95
|
Extracts the training data, reshapes it, and applies the
|
|
105
96
|
manifold learning transformation
|
|
@@ -111,11 +102,17 @@ class SKLearnManifold(BaseDynamicWrapperTemplate):
|
|
|
111
102
|
ManifoldResults | None: Results of the manifold transformation,
|
|
112
103
|
or None if the dataset is empty
|
|
113
104
|
"""
|
|
114
|
-
x_train =
|
|
115
|
-
|
|
105
|
+
x_train, y_train = None, None
|
|
106
|
+
for set in dataset:
|
|
107
|
+
if "x_train" in set.source:
|
|
108
|
+
x_train = set.content
|
|
109
|
+
|
|
110
|
+
elif "y_train" in set.source:
|
|
111
|
+
y_train = set.content
|
|
112
|
+
else:
|
|
113
|
+
y_train = set.content[self.attributes.target_key]
|
|
114
|
+
x_train = set.content.pop(self.attributes.target_key)
|
|
116
115
|
|
|
117
|
-
if x_train is None or x_train.empty:
|
|
118
|
-
return None
|
|
119
116
|
|
|
120
117
|
x_train_reshaped = self.reshape_arrays(x_train)
|
|
121
118
|
x_transformed = self.manifold_model.fit_transform(x_train_reshaped)
|
|
@@ -141,7 +138,7 @@ class SKLearnManifold(BaseDynamicWrapperTemplate):
|
|
|
141
138
|
results = self.process_dataset(dataset)
|
|
142
139
|
|
|
143
140
|
if results is not None:
|
|
144
|
-
self._set_generic_data(container, results)
|
|
141
|
+
self._set_generic_data(container, results.model_dump())
|
|
145
142
|
|
|
146
143
|
return container
|
|
147
144
|
|
|
@@ -5,16 +5,16 @@ from sinapsis_core.template_base.base_models import UIPropertiesMetadata
|
|
|
5
5
|
from sinapsis_core.template_base.dynamic_template import WrapperEntryConfig
|
|
6
6
|
from sinapsis_core.template_base.dynamic_template_factory import make_dynamic_template
|
|
7
7
|
from sinapsis_core.utils.env_var_keys import SINAPSIS_BUILD_DOCS
|
|
8
|
-
from sklearn import linear_model, neighbors, neural_network, svm, tree
|
|
9
|
-
|
|
10
8
|
from sinapsis_data_analysis.helpers.excluded_models import (
|
|
11
9
|
excluded_linear_models,
|
|
12
10
|
excluded_neighbors_models,
|
|
13
11
|
excluded_svm_models,
|
|
14
12
|
excluded_tree_models,
|
|
13
|
+
excluded_cluster_models
|
|
15
14
|
)
|
|
16
15
|
from sinapsis_data_analysis.helpers.tags import Tags
|
|
17
16
|
from sinapsis_data_analysis.templates.training.ml_base_training import MLBaseTraining
|
|
17
|
+
from sklearn import linear_model, neighbors, neural_network, svm, tree, cluster
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class SKLearnLinearModelsTrain(MLBaseTraining):
|
|
@@ -213,6 +213,44 @@ class SKLearnSVMModelsTrain(SKLearnLinearModelsTrain):
|
|
|
213
213
|
)
|
|
214
214
|
|
|
215
215
|
|
|
216
|
+
|
|
217
|
+
class SKLearnClusterModelsTrain(SKLearnLinearModelsTrain):
|
|
218
|
+
"""
|
|
219
|
+
This template dynamically wraps sklearn's svm module,
|
|
220
|
+
providing access to models like SVC, SVR, LinearSVC,
|
|
221
|
+
LinearSVR, NuSVC, NuSVR, and OneClassSVM.
|
|
222
|
+
|
|
223
|
+
Usage example:
|
|
224
|
+
|
|
225
|
+
agent:
|
|
226
|
+
name: my_test_agent
|
|
227
|
+
templates:
|
|
228
|
+
- template_name: InputTemplate
|
|
229
|
+
class_name: InputTemplate
|
|
230
|
+
attributes: {}
|
|
231
|
+
- template_name: SVCWrapper
|
|
232
|
+
class_name: SVCWrapper
|
|
233
|
+
template_input: DataLoaderTemplate
|
|
234
|
+
attributes:
|
|
235
|
+
generic_field_key: 'data_loader_key'
|
|
236
|
+
model_save_path: 'artifacts/svc_model.joblib'
|
|
237
|
+
svc_init:
|
|
238
|
+
C: 1.0
|
|
239
|
+
kernel: rbf
|
|
240
|
+
random_state: 42
|
|
241
|
+
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
WrapperEntry = WrapperEntryConfig(
|
|
245
|
+
wrapped_object=cluster,
|
|
246
|
+
signature_from_doc_string=True,
|
|
247
|
+
exclude_module_atts=excluded_cluster_models,
|
|
248
|
+
force_init_as_method=False,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
|
|
216
254
|
def __getattr__(name: str) -> Template:
|
|
217
255
|
"""
|
|
218
256
|
Only create a template if it's imported, this avoids creating all the base models for all templates
|
|
@@ -228,6 +266,8 @@ def __getattr__(name: str) -> Template:
|
|
|
228
266
|
return make_dynamic_template(name, SKLearnTreeModelsTrain)
|
|
229
267
|
if name in SKLearnSVMModelsTrain.WrapperEntry.module_att_names:
|
|
230
268
|
return make_dynamic_template(name, SKLearnSVMModelsTrain)
|
|
269
|
+
if name in SKLearnClusterModelsTrain.WrapperEntry.module_att_names:
|
|
270
|
+
return make_dynamic_template(name, SKLearnClusterModelsTrain)
|
|
231
271
|
raise AttributeError(f"template `{name}` not found in {__name__}")
|
|
232
272
|
|
|
233
273
|
|
|
@@ -237,6 +277,7 @@ __all__ = (
|
|
|
237
277
|
+ SKLearnNNModelsTrain.WrapperEntry.module_att_names
|
|
238
278
|
+ SKLearnTreeModelsTrain.WrapperEntry.module_att_names
|
|
239
279
|
+ SKLearnSVMModelsTrain.WrapperEntry.module_att_names
|
|
280
|
+
+ SKLearnClusterModelsTrain.WrapperEntry.module_att_names
|
|
240
281
|
)
|
|
241
282
|
|
|
242
283
|
|
|
@@ -5,7 +5,6 @@ from sinapsis_core.template_base.base_models import UIPropertiesMetadata
|
|
|
5
5
|
from sinapsis_core.template_base.dynamic_template import WrapperEntryConfig
|
|
6
6
|
from sinapsis_core.template_base.dynamic_template_factory import make_dynamic_template
|
|
7
7
|
from sinapsis_core.utils.env_var_keys import SINAPSIS_BUILD_DOCS
|
|
8
|
-
|
|
9
8
|
from sinapsis_data_analysis.helpers.tags import Tags
|
|
10
9
|
from sinapsis_data_analysis.templates.training.sklearn_train import SKLearnLinearModelsTrain
|
|
11
10
|
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
|
-
from sinapsis_core.data_containers.data_packet import DataContainer
|
|
3
|
-
from sinapsis_core.template_base.template import Template
|
|
2
|
+
from sinapsis_core.data_containers.data_packet import DataContainer, DataFramePacket
|
|
4
3
|
from sinapsis_core.template_base.base_models import TemplateAttributes
|
|
5
|
-
|
|
4
|
+
from sinapsis_core.template_base.template import Template
|
|
6
5
|
|
|
7
6
|
|
|
8
7
|
class CategoricalToNumerical(Template):
|
|
@@ -10,7 +9,7 @@ class CategoricalToNumerical(Template):
|
|
|
10
9
|
generic_key: str
|
|
11
10
|
|
|
12
11
|
@staticmethod
|
|
13
|
-
def map_categorical_to_numerical(df: pd.DataFrame| pd.Series)->
|
|
12
|
+
def map_categorical_to_numerical(df: pd.DataFrame| pd.Series)->tuple:
|
|
14
13
|
|
|
15
14
|
if isinstance(df, pd.Series):
|
|
16
15
|
categorical_cols = [df.name] if df.dtype in ["object", "string", "category"] else []
|
|
@@ -36,10 +35,11 @@ class CategoricalToNumerical(Template):
|
|
|
36
35
|
|
|
37
36
|
|
|
38
37
|
def execute(self, container: DataContainer) -> DataContainer:
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
38
|
+
for data_frame in container.data_frames:
|
|
39
|
+
|
|
40
|
+
transformed_dataset, labels = self.map_categorical_to_numerical(data_frame.content)
|
|
41
|
+
container.data_frames.append(DataFramePacket(content=transformed_dataset, generic_data = labels))
|
|
42
|
+
|
|
43
43
|
return container
|
|
44
44
|
|
|
45
45
|
|
|
@@ -20,7 +20,6 @@ _template_lookup = {
|
|
|
20
20
|
"ExecuteNTimesLazyAudioReaderPydub": f"{_root_lib_path}.audio_readers.audio_reader_pydub",
|
|
21
21
|
"ExecuteNTimesLazyAudioReaderSoundfile": f"{_root_lib_path}.audio_readers.audio_reader_soundfile",
|
|
22
22
|
"FolderImageDatasetCV2": f"{_root_lib_path}.image_readers.image_folder_reader_cv2",
|
|
23
|
-
"ImageDatasetSplitter": f"{_root_lib_path}.datasets_readers.dataset_splitter",
|
|
24
23
|
"LazyAudioReaderPydub": f"{_root_lib_path}.audio_readers.audio_reader_pydub",
|
|
25
24
|
"LazyAudioReaderSoundfile": f"{_root_lib_path}.audio_readers.audio_reader_soundfile",
|
|
26
25
|
"LiveVideoReaderCV2": f"{_root_lib_path}.video_readers.video_reader_cv2",
|
|
@@ -29,7 +28,7 @@ _template_lookup = {
|
|
|
29
28
|
"MultiVideoReaderPytorch": f"{_root_lib_path}.video_readers.video_reader_dali",
|
|
30
29
|
"MultiVideoReaderFFMPEG": f"{_root_lib_path}.video_readers.video_reader_ffmpeg",
|
|
31
30
|
"MultiVideoReaderTorchCodec": f"{_root_lib_path}.video_readers.video_reader_torchcodec",
|
|
32
|
-
"
|
|
31
|
+
"CSVDatasetSplitter": f"{_root_lib_path}.datasets_readers.dataset_splitter",
|
|
33
32
|
"TextInput": f"{_root_lib_path}.text_readers.text_input",
|
|
34
33
|
"VideoReaderCV2": f"{_root_lib_path}.video_readers.video_reader_cv2",
|
|
35
34
|
"VideoReaderDali": f"{_root_lib_path}.video_readers.video_reader_dali",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
import os
|
|
3
3
|
|
|
4
|
-
from sinapsis_core.data_containers.data_packet import DataContainer, TimeSeriesPacket
|
|
4
|
+
from sinapsis_core.data_containers.data_packet import DataContainer, DataFramePacket, TimeSeriesPacket
|
|
5
5
|
from sinapsis_core.template_base.base_models import TemplateAttributes, TemplateAttributeType
|
|
6
6
|
from sinapsis_core.template_base.template import Template
|
|
7
7
|
from sinapsis_core.utils.env_var_keys import SINAPSIS_CACHE_DIR
|
|
@@ -25,6 +25,7 @@ class CSVDatasetReader(Template):
|
|
|
25
25
|
packet = TimeSeriesPacket(content=self.csv_file)
|
|
26
26
|
container.time_series.append(packet)
|
|
27
27
|
else:
|
|
28
|
-
self.
|
|
28
|
+
packet = DataFramePacket(content=self.csv_file, source=f"{self.instance_name}_x_dataset")
|
|
29
|
+
container.data_frames.append(packet)
|
|
29
30
|
|
|
30
31
|
return container
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from sinapsis_core.data_containers.data_packet import DataContainer, DataFramePacket
|
|
5
|
+
from sinapsis_core.template_base import Template
|
|
6
|
+
from sinapsis_core.template_base.base_models import TemplateAttributes
|
|
7
|
+
from sklearn.model_selection import train_test_split
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CSVDatasetSplitter(Template):
|
|
11
|
+
"""
|
|
12
|
+
Template to split a tabular data set into test and train samples.
|
|
13
|
+
The template retrieves the dataset from the dataframe packet of the
|
|
14
|
+
container and stores the features and targets as new dataframes, with source
|
|
15
|
+
indicating whether train or test samples
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
class AttributesBaseModel(TemplateAttributes):
|
|
19
|
+
target_key: str = "target" # labels
|
|
20
|
+
feature_key: str | None = None # arrays
|
|
21
|
+
random_state: int = 42
|
|
22
|
+
train_size: float = 0.2
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def extract_x_y_from_packet(self, data_frame: DataFramePacket) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
target = data_frame.content.get(self.attributes.target_key)
|
|
31
|
+
feature = data_frame.content.get(self.attributes.feature_key) \
|
|
32
|
+
if self.attributes.feature_key \
|
|
33
|
+
else data_frame.content.drop(columns=[self.attributes.target_key])
|
|
34
|
+
return feature, target
|
|
35
|
+
|
|
36
|
+
def split_dataset(self, x_data:pd.DataFrame, y_data:pd.DataFrame) -> dict:
|
|
37
|
+
|
|
38
|
+
x_train, x_test, y_train, y_test = train_test_split(
|
|
39
|
+
x_data,
|
|
40
|
+
y_data,
|
|
41
|
+
train_size=self.attributes.train_size,
|
|
42
|
+
test_size=1 - self.attributes.train_size,
|
|
43
|
+
random_state=self.attributes.random_state,
|
|
44
|
+
)
|
|
45
|
+
data_map = {
|
|
46
|
+
"x_train": x_train,
|
|
47
|
+
"y_train": y_train,
|
|
48
|
+
"x_test": x_test,
|
|
49
|
+
"y_test": y_test
|
|
50
|
+
}
|
|
51
|
+
return data_map
|
|
52
|
+
def assign_to_dataframe_packets(self,container: DataContainer, source: str, data: pd.DataFrame) -> DataContainer:
|
|
53
|
+
df_packet = DataFramePacket(content=data, source=f"{self.instance_name}_{source}")
|
|
54
|
+
container.data_frames.append(df_packet)
|
|
55
|
+
return container
|
|
56
|
+
|
|
57
|
+
def execute(self, container: DataContainer) -> DataContainer:
|
|
58
|
+
new_samples = []
|
|
59
|
+
for df in container.data_frames:
|
|
60
|
+
feature, target = self.extract_x_y_from_packet(df)
|
|
61
|
+
|
|
62
|
+
if self.attributes.train_size:
|
|
63
|
+
sample = self.split_dataset(feature, target)
|
|
64
|
+
new_samples.append(sample)
|
|
65
|
+
for sample in new_samples:
|
|
66
|
+
for name, data_frame in sample.items():
|
|
67
|
+
container = self.assign_to_dataframe_packets(container, name, data_frame)
|
|
68
|
+
return container
|
|
69
|
+
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
5
|
-
from sinapsis_core.data_containers.data_packet import DataContainer, TimeSeriesPacket
|
|
5
|
+
from sinapsis_core.data_containers.data_packet import DataContainer, DataFramePacket, TimeSeriesPacket
|
|
6
6
|
from sinapsis_core.template_base import Template
|
|
7
7
|
from sinapsis_core.template_base.base_models import TemplateAttributes, TemplateAttributeType, UIPropertiesMetadata
|
|
8
8
|
from sinapsis_core.template_base.dynamic_template import (
|
|
@@ -19,9 +19,6 @@ from sklearn.utils import Bunch
|
|
|
19
19
|
|
|
20
20
|
from sinapsis_data_readers.helpers import sklearn_dataset_subset
|
|
21
21
|
from sinapsis_data_readers.helpers.tags import Tags
|
|
22
|
-
from sinapsis_data_readers.templates.datasets_readers.dataset_splitter import (
|
|
23
|
-
TabularDatasetSplit,
|
|
24
|
-
)
|
|
25
22
|
|
|
26
23
|
TARGET: str = "target"
|
|
27
24
|
|
|
@@ -137,7 +134,7 @@ class SKLearnDatasets(BaseDynamicWrapperTemplate):
|
|
|
137
134
|
results: pd.DataFrame, feature_name_cols: list, target_name_cols: list, n_features: int, split_size: float
|
|
138
135
|
) -> dict:
|
|
139
136
|
"""Method to split the dataset into training and testing samples"""
|
|
140
|
-
if feature_name_cols:
|
|
137
|
+
if feature_name_cols is not None:
|
|
141
138
|
X = results[feature_name_cols]
|
|
142
139
|
y = results[target_name_cols]
|
|
143
140
|
else:
|
|
@@ -145,30 +142,32 @@ class SKLearnDatasets(BaseDynamicWrapperTemplate):
|
|
|
145
142
|
y = results.iloc[:, n_features:]
|
|
146
143
|
|
|
147
144
|
x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=split_size, random_state=0)
|
|
148
|
-
|
|
149
|
-
x_train
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
y_test
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
return split_data.model_dump()
|
|
145
|
+
data_map = {
|
|
146
|
+
"x_train": x_train,
|
|
147
|
+
"y_train": y_train,
|
|
148
|
+
"x_test": x_test,
|
|
149
|
+
"y_test": y_test
|
|
150
|
+
}
|
|
151
|
+
return data_map
|
|
156
152
|
|
|
157
153
|
def execute(self, container: DataContainer) -> DataContainer:
|
|
158
154
|
sklearn_dataset = self.wrapped_callable.__func__(**self.dataset_attributes.model_dump())
|
|
159
155
|
dataset, feature_columns, target_columns, n_features = self.parse_results(sklearn_dataset)
|
|
156
|
+
|
|
160
157
|
if self.attributes.store_as_time_series:
|
|
161
158
|
time_series_packet = TimeSeriesPacket(content=dataset)
|
|
162
159
|
container.time_series.append(time_series_packet)
|
|
163
160
|
|
|
164
161
|
if self.attributes.split_dataset:
|
|
165
|
-
|
|
162
|
+
data_map = self.split_dataset(
|
|
166
163
|
dataset, feature_columns, target_columns, n_features, split_size=self.attributes.train_size
|
|
167
164
|
)
|
|
168
|
-
self._set_generic_data(container, split_dataset)
|
|
169
|
-
if sklearn_dataset and not self.attributes.split_dataset:
|
|
170
|
-
self._set_generic_data(container, dataset)
|
|
171
165
|
|
|
166
|
+
for name, df in data_map.items():
|
|
167
|
+
container.data_frames.append(DataFramePacket(content=df, source=f"{self.instance_name}_{name}"))
|
|
168
|
+
|
|
169
|
+
if sklearn_dataset and not self.attributes.split_dataset:
|
|
170
|
+
container.data_frames.append(DataFramePacket(content=dataset, source=self.instance_name))
|
|
172
171
|
return container
|
|
173
172
|
|
|
174
173
|
|