sinapsis-data-analysis 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,204 @@
1
+ # -*- coding: utf-8 -*-
2
+ import joblib
3
+ from sinapsis_core.template_base import Template
4
+ from sinapsis_core.template_base.dynamic_template import WrapperEntryConfig
5
+ from sinapsis_core.template_base.dynamic_template_factory import make_dynamic_template
6
+ from sinapsis_core.utils.env_var_keys import SINAPSIS_BUILD_DOCS
7
+ from sklearn import linear_model, neighbors, neural_network, tree
8
+
9
+ from sinapsis_data_analysis.helpers.excluded_models import (
10
+ excluded_linear_models,
11
+ excluded_neighbors_models,
12
+ excluded_tree_models,
13
+ )
14
+ from sinapsis_data_analysis.templates.ml_base_training import MLBaseTraining
15
+
16
+
17
+ class SKLearnLinearModelsTrain(MLBaseTraining):
18
+ """
19
+ This template dynamically wraps sklearn linear_model module,
20
+ to train a dataset using linear models.
21
+
22
+ Usage example:
23
+
24
+ agent:
25
+ name: my_test_agent
26
+ templates:
27
+ - template_name: InputTemplate
28
+ class_name: InputTemplate
29
+ attributes: {}
30
+ template_name: LarsWrapper
31
+ class_name: LinearRegressionWrapper
32
+ template_input: load_diabetesWrapper
33
+ attributes:
34
+ generic_field_key: InputTemplate
35
+ model_save_path: "artifacts/linear_regression.joblib"
36
+ linearregression_init:
37
+ fit_intercept: true
38
+ copy_X: true
39
+ n_jobs: 8
40
+ positive: false
41
+ """
42
+
43
+ WrapperEntry = WrapperEntryConfig(
44
+ wrapped_object=linear_model,
45
+ signature_from_doc_string=True,
46
+ exclude_module_atts=excluded_linear_models,
47
+ force_init_as_method=False,
48
+ )
49
+
50
+ CATEGORY = "SKLearn"
51
+
52
+ def _save_model_implementation(self) -> None:
53
+ """
54
+ Implements the abstract method from the base class to
55
+ save the model to the path specified in attributes.
56
+ """
57
+ joblib.dump(self.trained_model, self.attributes.model_save_path)
58
+
59
+
60
+ class SKLearnNeighborsModelsTrain(SKLearnLinearModelsTrain):
61
+ """
62
+ This template dynamically wraps sklearn neighbors module,
63
+ providing access to models like KNeighborsClassifier,
64
+ KNeighborsRegressor, etc.
65
+
66
+ Usage example:
67
+
68
+ agent:
69
+ name: my_test_agent
70
+ templates:
71
+ - template_name: InputTemplate
72
+ class_name: InputTemplate
73
+ attributes: {}
74
+ - template_name: KNeighborsClassifierWrapper
75
+ class_name: KNeighborsClassifierWrapper
76
+ template_input: InputTemplate
77
+ attributes:
78
+ generic_field_key: 'input_template'
79
+ model_save_path: 'kneighbors.joblib'
80
+ kneighborsclassifier_init:
81
+ n_neighbors: 5
82
+ weights: uniform
83
+ algorithm: auto
84
+ leaf_size: 30
85
+ p: 2
86
+ metric: minkowski
87
+ n_jobs: 2
88
+ """
89
+
90
+ WrapperEntry = WrapperEntryConfig(
91
+ wrapped_object=neighbors,
92
+ signature_from_doc_string=True,
93
+ exclude_module_atts=excluded_neighbors_models,
94
+ force_init_as_method=False,
95
+ )
96
+
97
+
98
+ class SKLearnNNModelsTrain(SKLearnLinearModelsTrain):
99
+ """
100
+ This template dynamically wraps sklearn's neural_network module,
101
+ providing access to models like MLPClassifier, MLPRegressor, etc.
102
+
103
+ Usage example:
104
+
105
+ agent:
106
+ name: my_test_agent
107
+ templates:
108
+ - template_name: InputTemplate
109
+ class_name: InputTemplate
110
+ attributes: {}
111
+ - template_name: BernoulliRBMWrapper
112
+ class_name: BernoulliRBMWrapper
113
+ template_input: InputTemplate
114
+ attributes:
115
+ generic_field_key: 'input_template'
116
+ model_save_path: 'artifacts/bernoulli.joblib'
117
+ bernoullirbm_init:
118
+ n_components: 256
119
+ learning_rate: 0.1
120
+ batch_size: 10
121
+ n_iter: 10
122
+ verbose: 0
123
+ random_state: null
124
+
125
+ """
126
+
127
+ WrapperEntry = WrapperEntryConfig(
128
+ wrapped_object=neural_network, signature_from_doc_string=True, force_init_as_method=False
129
+ )
130
+
131
+
132
+ class SKLearnTreeModelsTrain(SKLearnLinearModelsTrain):
133
+ """
134
+ This template dynamically wraps sklearn's tree module,
135
+ providing access to models like DecisionTreeClassifier,
136
+ DecisionTreeRegressor, etc.
137
+
138
+ Usage example:
139
+
140
+ agent:
141
+ name: my_test_agent
142
+ templates:
143
+ - template_name: InputTemplate
144
+ class_name: InputTemplate
145
+ attributes: {}
146
+ - template_name: DecisionTreeClassifierWrapper
147
+ class_name: DecisionTreeClassifierWrapper
148
+ template_input: InputTemplate
149
+ attributes:
150
+ generic_field_key: 'input_template'
151
+ model_save_path: 'artifacts/decision_tree.joblib'
152
+ decisiontreeclassifier_init:
153
+ criterion: gini
154
+ splitter: best
155
+ max_depth: null
156
+ min_samples_split: 2
157
+ min_samples_leaf: 1
158
+ min_weight_fraction_leaf: 0.0
159
+ max_features: sqrt
160
+ random_state: 1
161
+ max_leaf_nodes: 2
162
+ min_impurity_decrease: 0.0
163
+ class_weight: balanced
164
+ ccp_alpha: 0.0
165
+
166
+ """
167
+
168
+ WrapperEntry = WrapperEntryConfig(
169
+ wrapped_object=tree,
170
+ signature_from_doc_string=True,
171
+ exclude_module_atts=excluded_tree_models,
172
+ force_init_as_method=False,
173
+ )
174
+
175
+
176
+ def __getattr__(name: str) -> Template:
177
+ """
178
+ Only create a template if it's imported, this avoids creating all the base models for all templates
179
+ and potential import errors due to not available packages.
180
+ """
181
+ if name in SKLearnLinearModelsTrain.WrapperEntry.module_att_names:
182
+ return make_dynamic_template(name, SKLearnLinearModelsTrain)
183
+ if name in SKLearnNeighborsModelsTrain.WrapperEntry.module_att_names:
184
+ return make_dynamic_template(name, SKLearnNeighborsModelsTrain)
185
+ if name in SKLearnNNModelsTrain.WrapperEntry.module_att_names:
186
+ return make_dynamic_template(name, SKLearnNNModelsTrain)
187
+ if name in SKLearnTreeModelsTrain.WrapperEntry.module_att_names:
188
+ return make_dynamic_template(name, SKLearnTreeModelsTrain)
189
+ raise AttributeError(f"template `{name}` not found in {__name__}")
190
+
191
+
192
+ __all__ = (
193
+ SKLearnLinearModelsTrain.WrapperEntry.module_att_names
194
+ + SKLearnNeighborsModelsTrain.WrapperEntry.module_att_names
195
+ + SKLearnNNModelsTrain.WrapperEntry.module_att_names
196
+ + SKLearnTreeModelsTrain.WrapperEntry.module_att_names
197
+ )
198
+
199
+
200
+ if SINAPSIS_BUILD_DOCS:
201
+ dynamic_templates = [__getattr__(template_name) for template_name in __all__]
202
+ for template in dynamic_templates:
203
+ globals()[template.__name__] = template
204
+ del template
@@ -0,0 +1,26 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+
4
+ from sinapsis_data_analysis.templates.sklearn_inference import SKLearnInference
5
+
6
+
7
+ class XGBoostInference(SKLearnInference):
8
+ """Dynamic templates to perform inference using XGBoost models.
9
+ These templates wrap the functionality of the xgb module and use the predict method on execute
10
+ using a dataset obtained through the container.
11
+
12
+ Usage example:
13
+ agent:
14
+ name: my_test_agent
15
+ templates:
16
+ - template_name: InputTemplate
17
+ class_name: InputTemplate
18
+ attributes: {}
19
+ - template_name: XGBRFClassifierInference
20
+ class_name: XGBRFClassifierInference
21
+ template_input: InputTemplate
22
+ attributes:
23
+ model_path: 'artifacts/xgb_rf_classifier.model'
24
+ generic_field_key: 'generic_dataset'
25
+
26
+ """
@@ -0,0 +1,75 @@
1
+ # -*- coding: utf-8 -*-
2
+ import xgboost as xgb
3
+ from sinapsis_core.template_base import Template
4
+ from sinapsis_core.template_base.dynamic_template import WrapperEntryConfig
5
+ from sinapsis_core.template_base.dynamic_template_factory import make_dynamic_template
6
+ from sinapsis_core.utils.env_var_keys import SINAPSIS_BUILD_DOCS
7
+
8
+ from sinapsis_data_analysis.templates.sklearn_train import SKLearnLinearModelsTrain
9
+
10
+ INCLUDED_MODELS = [
11
+ "XGBClassifier",
12
+ "XGBRegressor",
13
+ "XGBRanker",
14
+ "XGBRFClassifier",
15
+ "XGBRFRegressor",
16
+ "Booster",
17
+ ]
18
+
19
+ EXCLUDED_MODELS = [attr for attr in dir(xgb) if attr not in INCLUDED_MODELS]
20
+
21
+
22
+ class XGBoostModelsTraining(SKLearnLinearModelsTrain):
23
+ """Dynamic templates for XGBoost modules for classification, regression and boosting.
24
+ These templates wrap the functionality provided by the xgb module and use the fit method
25
+ on a dataset provided by the container.
26
+
27
+ Usage example:
28
+
29
+ agent:
30
+ name: my_test_agent
31
+
32
+ templates:
33
+ - template_name: InputTemplate
34
+ class_name: InputTemplate
35
+ attributes: {}
36
+ - template_name: XGBRankerWrapper
37
+ class_name: XGBClassifierWrapper
38
+ template_input: load_irisWrapper
39
+ attributes:
40
+ generic_field_key: load_irisWrapper
41
+ model_save_path: "artifacts/xgb_classifier.model"
42
+ xgbclassifier_init:
43
+ n_estimators: 100
44
+ max_depth: 3
45
+ learning_rate: 0.1
46
+ """
47
+
48
+ WrapperEntry = WrapperEntryConfig(
49
+ wrapped_object=xgb,
50
+ parse_entire_mro=True,
51
+ exclude_module_atts=EXCLUDED_MODELS,
52
+ force_init_as_method=False,
53
+ exclude_method_attributes=[
54
+ "kwargs",
55
+ "objective",
56
+ ],
57
+ )
58
+
59
+ CATEGORY = "XGBoost"
60
+
61
+
62
+ def __getattr__(name: str) -> Template:
63
+ if name in XGBoostModelsTraining.WrapperEntry.module_att_names:
64
+ return make_dynamic_template(name, XGBoostModelsTraining)
65
+ raise AttributeError(f"template `{name}` not found in {__name__}")
66
+
67
+
68
+ __all__ = XGBoostModelsTraining.WrapperEntry.module_att_names
69
+
70
+
71
+ if SINAPSIS_BUILD_DOCS:
72
+ dynamic_templates = [__getattr__(template_name) for template_name in __all__]
73
+ for template in dynamic_templates:
74
+ globals()[template.__name__] = template
75
+ del template
@@ -0,0 +1,178 @@
1
+ Metadata-Version: 2.4
2
+ Name: sinapsis-data-analysis
3
+ Version: 0.1.0
4
+ Summary: Templates to work with models for classification, regression and clustering with xgboost and sklearn.
5
+ Author-email: SinapsisAI <dev@sinapsis.tech>
6
+ Project-URL: Homepage, https://sinapsis.tech
7
+ Project-URL: Documentation, https://docs.sinapsis.tech/docs
8
+ Project-URL: Tutorials, https://docs.sinapsis.tech/tutorials
9
+ Project-URL: Repository, https://github.com/Sinapsis-AI/sinapsis-data-tools.git
10
+ Requires-Python: >=3.10
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: scikit-learn>=1.6.1
14
+ Requires-Dist: sinapsis>=0.1.1
15
+ Requires-Dist: sinapsis-data-readers
16
+ Requires-Dist: xgboost>=3.0.0
17
+ Provides-Extra: all
18
+ Dynamic: license-file
19
+
20
+ <h1 align="center">
21
+ <br>
22
+ <a href="https://sinapsis.tech/">
23
+ <img
24
+ src="https://github.com/Sinapsis-AI/brand-resources/blob/main/sinapsis_logo/4x/logo.png?raw=true"
25
+ alt="" width="300">
26
+ </a><br>
27
+ Sinapsis Data Analysis
28
+ <br>
29
+ </h1>
30
+
31
+ <h4 align="center">Module for machine learning model training, analysis, and inference, using the Scikit-learn and XGBoost libraries.</h4>
32
+
33
+ <p align="center">
34
+ <a href="#installation">🐍 Installation</a> •
35
+ <a href="#features"> 🚀 Features</a> •
36
+ <a href="#example"> 📚 Usage Example</a> •
37
+ <a href="#documentation">📙 Documentation</a> •
38
+ <a href="#license"> 🔍 License </a>
39
+ </p>
40
+
41
+ **Sinapsis Data Analysis** provides a comprehensive set of tools for machine learning model training, evaluation, and inference using industry-standard libraries like scikit-learn and XGBoost.
42
+
43
+ <h2 id="installation"> 🐍 Installation </h2>
44
+
45
+ Install using your package manager of choice. We encourage the use of <code>uv</code>
46
+
47
+ Example with <code>uv</code>:
48
+
49
+ ```bash
50
+ uv pip install sinapsis-data-analysis --extra-index-url https://pypi.sinapsis.tech
51
+ ```
52
+ or with raw <code>pip</code>:
53
+ ```bash
54
+ pip install sinapsis-data-analysis --extra-index-url https://pypi.sinapsis.tech
55
+ ```
56
+
57
+
58
+ <h2 id="features">🚀 Features</h2>
59
+
60
+ <h3> Templates Supported</h3>
61
+
62
+ **Sinapsis Data Analysis** provides a variety of templates for machine learning workflows:
63
+
64
+ <details>
65
+ <summary><strong><span style="font-size: 1.25em;">Scikit-Learn Models</span></strong></summary>
66
+
67
+ The following model types are supported:
68
+
69
+ - **Linear Models**: LinearRegression, Ridge, Lasso, ElasticNet, LogisticRegression, etc.
70
+ - **Neighbors Models**: KNeighborsClassifier, KNeighborsRegressor, RadiusNeighborsClassifier, etc.
71
+ - **Neural Network Models**: MLPClassifier, MLPRegressor, BernoulliRBM
72
+ - **Tree Models**: DecisionTreeClassifier, DecisionTreeRegressor, ExtraTreeClassifier, etc.
73
+
74
+ Each template uses the same base attributes:
75
+ - **`generic_field_key` (str, required)**: Key of the generic field where datasets are stored
76
+ - **`model_save_path` (str, required)**: Path where the trained model will be saved
77
+ </details>
78
+
79
+ <details>
80
+ <summary><strong><span style="font-size: 1.25em;">XGBoost Models</span></strong></summary>
81
+
82
+ XGBoost model templates include:
83
+ - XGBClassifier
84
+ - XGBRegressor
85
+ - XGBRanker
86
+ - XGBRFClassifier
87
+ - XGBRFRegressor
88
+ - Booster
89
+
90
+ Attributes are the same as those for Scikit-learn templates.
91
+ </details>
92
+
93
+ <details>
94
+ <summary><strong><span style="font-size: 1.25em;">Manifold Learning</span></strong></summary>
95
+
96
+ Templates for dimensionality reduction using scikit-learn's manifold learning techniques:
97
+
98
+ - **SKLearnManifold**: Base class for all manifold learning algorithms
99
+ - **`generic_field_key` (str, required)**: Key of the generic field where the input data is stored
100
+
101
+ Specific algorithms include t-SNE, MDS, Isomap, LocallyLinearEmbedding, and more.
102
+ </details>
103
+
104
+ <details>
105
+ <summary><strong><span style="font-size: 1.25em;">Inference Templates</span></strong></summary>
106
+
107
+ Templates for using trained models to make predictions on new data:
108
+
109
+ - **SKLearnInference**: For inference with scikit-learn models
110
+ - **XGBoostInference**: For inference with XGBoost models
111
+
112
+ To use these templates, you should replace the **`model_path`** to point to the path of the trained model.
113
+ </details>
114
+
115
+ > [!TIP]
116
+ > Use CLI command ``` sinapsis info --all-template-names``` to show a list with all the available Template names installed with Sinapsis Data Analysis.
117
+
118
+ > [!TIP]
119
+ > Use CLI command ```sinapsis info --example-template-config TEMPLATE_NAME``` to produce an example Agent config for the Template specified in ***TEMPLATE_NAME***.
120
+
121
+ For example, for ***LinearRegression*** use ```sinapsis info --example-template-config LinearRegression``` to produce an example config.
122
+
123
+ <h2 id="example"> 📚 Usage Example </h2>
124
+ Below is an example configuration for **Sinapsis Data Analysis** using LinearRegressionWrapper for regression.
125
+
126
+ <details>
127
+ <summary><strong><span style="font-size: 1.25em;">Example config</span></strong></summary>
128
+
129
+ ```yaml
130
+ agent:
131
+ name: sklearn_linear_models_agent
132
+ description: agent to train a LinearRegression model from scikit-learn using the load_diabetes dataset
133
+
134
+ templates:
135
+ - template_name: InputTemplate
136
+ class_name: InputTemplate
137
+ attributes: {}
138
+
139
+ - template_name: load_diabetesWrapper
140
+ class_name: load_diabetesWrapper
141
+ template_input: InputTemplate
142
+ attributes:
143
+ split_dataset: true
144
+ train_size: 0.8
145
+ load_diabetes:
146
+ return_X_y: false
147
+ as_frame: true
148
+
149
+ - template_name: LinearRegressionWrapper
150
+ class_name: LinearRegressionWrapper
151
+ template_input: load_diabetesWrapper
152
+ attributes:
153
+ generic_field_for_data: load_diabetesWrapper
154
+ model_save_path: "artifacts/linear_regression.joblib"
155
+ linearregression_init:
156
+ fit_intercept: true
157
+ copy_X: true
158
+ n_jobs: null
159
+ positive: false
160
+ ```
161
+ </details>
162
+
163
+ To run the config, use the CLI:
164
+ ```bash
165
+ sinapsis run name_of_config.yml
166
+ ```
167
+
168
+ <h2 id="documentation">📙 Documentation</h2>
169
+
170
+ Documentation for this and other sinapsis packages is available on the [sinapsis website](https://docs.sinapsis.tech/docs)
171
+
172
+ Tutorials for different projects within sinapsis are available at [sinapsis tutorials page](https://docs.sinapsis.tech/tutorials)
173
+
174
+ <h2 id="license">🔍 License</h2>
175
+
176
+ This project is licensed under the AGPLv3 license, which encourages open collaboration and sharing. For more details, please refer to the [LICENSE](LICENSE) file.
177
+
178
+ For commercial use, please refer to our [official Sinapsis website](https://sinapsis.tech) for information on obtaining a commercial license.
@@ -0,0 +1,17 @@
1
+ sinapsis_data_analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ sinapsis_data_analysis/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ sinapsis_data_analysis/helpers/excluded_models.py,sha256=qrxx3ct44nZYYOIF5QJKYMdSlyCPI0ch3t_AqVaSGTs,537
4
+ sinapsis_data_analysis/helpers/model_metrics.py,sha256=mR-ZLD8PrGgOh1PrYG8TOjFhJz6l2EDRtn6jtMMyI7A,729
5
+ sinapsis_data_analysis/templates/__init__.py,sha256=Q898W-pTXGVjsMZn7wgmMYjrIstoo2bVzAXAgP75qaA,883
6
+ sinapsis_data_analysis/templates/ml_base_inference.py,sha256=gtieXQ955QxlXQLaWJqTppwhjGHU6dcJfj46IoESyHE,3455
7
+ sinapsis_data_analysis/templates/ml_base_training.py,sha256=sW4isOU8B8_s32-IKzLLzz_0xvvpB4M8LhcBmtYxN_s,9043
8
+ sinapsis_data_analysis/templates/sklearn_inference.py,sha256=r_7eGi1Z1F24HojTK1b5YYJLw7uOjNn2Mvf8-JIcJB0,689
9
+ sinapsis_data_analysis/templates/sklearn_manifold.py,sha256=BXLACGI2M5_zvlPxsKlFtTj3QNKXtawNP3_W8uBU-Ug,5200
10
+ sinapsis_data_analysis/templates/sklearn_train.py,sha256=ZDBMAabDKuopyvkLd9poaRKJFY2tUbx9h6MTkvEmpvs,6489
11
+ sinapsis_data_analysis/templates/xgboost_inference.py,sha256=fjHzjFeIxtRv1oSm5P-TgYUBvwB49JUVdNaPlszl1fI,782
12
+ sinapsis_data_analysis/templates/xgboost_train.py,sha256=9oI570dMn32D6pVxLas_XhtcUO0a53PIHkf9UwNe_l8,2260
13
+ sinapsis_data_analysis-0.1.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
14
+ sinapsis_data_analysis-0.1.0.dist-info/METADATA,sha256=VEinA85NDwJQzrpDA-swhD0q5TQAUbL28LvNU_y4G94,6316
15
+ sinapsis_data_analysis-0.1.0.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
16
+ sinapsis_data_analysis-0.1.0.dist-info/top_level.txt,sha256=Mc5OyqBINgXFLrAyVBmjg25MQd6Lbg7z-rwotzEeygQ,23
17
+ sinapsis_data_analysis-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (79.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+