sklearn-migrator 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Alberto Valdés
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,46 @@
1
+ Metadata-Version: 2.4
2
+ Name: sklearn-migrator
3
+ Version: 0.1.0
4
+ Summary: A utility to migrate scikit-learn models between versions.
5
+ Home-page: https://github.com/anvaldes/sklearn_migrator
6
+ Author: Alberto Valdés
7
+ Author-email: alberto.valdes.gonzalez.96.2@gmail.com
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: scikit-learn>=0.21.3
15
+ Dynamic: author
16
+ Dynamic: author-email
17
+ Dynamic: classifier
18
+ Dynamic: description
19
+ Dynamic: description-content-type
20
+ Dynamic: home-page
21
+ Dynamic: license-file
22
+ Dynamic: requires-dist
23
+ Dynamic: requires-python
24
+ Dynamic: summary
25
+
26
+ # sklearn_migrator
27
+
28
+ **sklearn_migrator** is a lightweight Python library that helps you serialize and migrate scikit-learn models across different versions.
29
+ It is especially useful when models are trained using older versions of scikit-learn and need to be loaded or interpreted in newer environments.
30
+
31
+ ---
32
+
33
+ ## 🔧 Features
34
+
35
+ - ✅ Serialize `DecisionTreeRegressor` models safely
36
+ - 🔄 Add compatibility adjustments for internal node structures
37
+ - 🧪 Facilitate model version migration without retraining
38
+ - 📦 Easy to install and use
39
+
40
+ ---
41
+
42
+ ## 📦 Installation
43
+
44
+ ```bash
45
+ pip install git+https://github.com/tu_usuario/sklearn_migrator.git
46
+ ```
@@ -0,0 +1,21 @@
1
+ # sklearn_migrator
2
+
3
+ **sklearn_migrator** is a lightweight Python library that helps you serialize and migrate scikit-learn models across different versions.
4
+ It is especially useful when models are trained using older versions of scikit-learn and need to be loaded or interpreted in newer environments.
5
+
6
+ ---
7
+
8
+ ## 🔧 Features
9
+
10
+ - ✅ Serialize `DecisionTreeRegressor` models safely
11
+ - 🔄 Add compatibility adjustments for internal node structures
12
+ - 🧪 Facilitate model version migration without retraining
13
+ - 📦 Easy to install and use
14
+
15
+ ---
16
+
17
+ ## 📦 Installation
18
+
19
+ ```bash
20
+ pip install git+https://github.com/tu_usuario/sklearn_migrator.git
21
+ ```
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,25 @@
1
+ import setuptools
2
+
3
+ with open("README.md", "r", encoding="utf-8") as fh:
4
+ long_description = fh.read()
5
+
6
+ setuptools.setup(
7
+ name='sklearn-migrator', # Este será el nombre visible en PyPI
8
+ version='0.1.0',
9
+ author="Alberto Valdés",
10
+ author_email="alberto.valdes.gonzalez.96.2@gmail.com",
11
+ description="A utility to migrate scikit-learn models between versions.",
12
+ long_description=long_description,
13
+ long_description_content_type="text/markdown",
14
+ url="https://github.com/anvaldes/sklearn_migrator",
15
+ packages=setuptools.find_packages(),
16
+ install_requires=[
17
+ 'scikit-learn>=0.21.3',
18
+ ],
19
+ classifiers=[
20
+ "Programming Language :: Python :: 3",
21
+ "License :: OSI Approved :: MIT License",
22
+ "Operating System :: OS Independent",
23
+ ],
24
+ python_requires='>=3.8',
25
+ )
File without changes
@@ -0,0 +1,144 @@
1
+ import numpy as np
2
+ from sklearn.tree._tree import Tree
3
+ from sklearn.tree import DecisionTreeRegressor
4
+
5
+ import warnings
6
+ warnings.filterwarnings("ignore")
7
+
8
+
9
+ def _version_range_check(version, lower, upper):
10
+ return lower <= version <= upper
11
+
12
+
13
+ def _get_extended_nodes(nodes, version_in):
14
+ if '0.21.3' <= version_in < '1.3.0':
15
+ return [node + (0,) for node in nodes]
16
+ return nodes
17
+
18
+
19
+ def _build_dtype_dict(dtypes, version_in):
20
+ field_names = dtypes.names
21
+ formats = [dtypes.fields[name][0] for name in field_names]
22
+ offsets = [dtypes.fields[name][1] for name in field_names]
23
+ itemsize = dtypes.itemsize
24
+
25
+ if '0.21.3' <= version_in < '1.3.0':
26
+ return {
27
+ 'field_names': list(field_names + ('missing_go_to_left',)),
28
+ 'formats': [str(fmt) for fmt in formats + [np.dtype('uint8')]],
29
+ 'offsets': [int(off) for off in offsets + [56]],
30
+ 'itemsize': 64
31
+ }
32
+
33
+ return {
34
+ 'field_names': list(field_names),
35
+ 'formats': [str(fmt) for fmt in formats],
36
+ 'offsets': [int(off) for off in offsets],
37
+ 'itemsize': int(itemsize)
38
+ }
39
+
40
+
41
+ def _get_metadata(model, version_in):
42
+ if _version_range_check(version_in, '0.21.3', '0.22.1'):
43
+ return {
44
+ 'n_features_in': None,
45
+ 'n_features': model.n_features_,
46
+ 'n_outputs': model.n_outputs_,
47
+ 'n_classes': model.n_classes_
48
+ }
49
+ elif _version_range_check(version_in, '0.23.0', '0.23.2'):
50
+ return {
51
+ 'n_features_in': model.n_features_in_,
52
+ 'n_features': model.n_features_,
53
+ 'n_outputs': model.n_outputs_,
54
+ 'n_classes': model.n_classes_
55
+ }
56
+ elif _version_range_check(version_in, '0.24.0', '1.1.3'):
57
+ return {
58
+ 'n_features_in': model.n_features_in_,
59
+ 'n_features': model.n_features_,
60
+ 'n_outputs': model.n_outputs_,
61
+ 'n_classes': None
62
+ }
63
+ elif version_in >= '1.2.0':
64
+ return {
65
+ 'n_features_in': model.n_features_in_,
66
+ 'n_features': None,
67
+ 'n_outputs': model.n_outputs_,
68
+ 'n_classes': None
69
+ }
70
+ return {}
71
+
72
+
73
+ def serialize_decision_tree_reg(model, version_in):
74
+ tree = model.tree_
75
+ state = tree.__getstate__()
76
+
77
+ serialized_tree = {
78
+ 'max_depth': int(state['max_depth']),
79
+ 'node_count': int(state['node_count']),
80
+ 'values': state['values'].tolist(),
81
+ 'nodes': [list(n) for n in _get_extended_nodes(state['nodes'].tolist(), version_in)],
82
+ 'dtypes': _build_dtype_dict(state['nodes'].dtype, version_in)
83
+ }
84
+
85
+ metadata = _get_metadata(model, version_in)
86
+ metadata['serialized_tree'] = serialized_tree
87
+ metadata['version_sklearn_in'] = version_in
88
+
89
+ return metadata
90
+
91
+
92
+ def _build_tree_dtype(dtypes_dict, version_out):
93
+ version_lt_1_3 = version_out < '1.3.0'
94
+ num_elements = 7 if version_lt_1_3 else 8
95
+
96
+ field_names = dtypes_dict['field_names'][:num_elements]
97
+ formats = [np.dtype(fmt) for fmt in dtypes_dict['formats'][:num_elements]]
98
+ offsets = dtypes_dict['offsets'][:num_elements]
99
+ itemsize = 56 if version_lt_1_3 else 64
100
+
101
+ return np.dtype({
102
+ 'names': field_names,
103
+ 'formats': formats,
104
+ 'offsets': offsets,
105
+ 'itemsize': itemsize
106
+ }), num_elements
107
+
108
+
109
+ def deserialize_decision_tree_reg(data, version_out):
110
+ version_in = data['version_sklearn_in']
111
+ serialized = data['serialized_tree']
112
+ dtype_dict = serialized['dtypes']
113
+
114
+ tree_dtype, num_elements = _build_tree_dtype(dtype_dict, version_out)
115
+
116
+ serialized['nodes'] = [tuple(n[:num_elements]) for n in serialized['nodes']]
117
+ nodes_array = np.array(serialized['nodes'], dtype=tree_dtype)
118
+ values_array = np.array(serialized['values'])
119
+
120
+ n_classes = np.array([1], dtype=np.intp) # regression
121
+ n_outputs = data['n_outputs']
122
+ n_features = (data['n_features'] or data['n_features_in'])
123
+
124
+ tree_obj = Tree(n_features, n_classes, n_outputs)
125
+ tree_obj.__setstate__({
126
+ 'max_depth': serialized['max_depth'],
127
+ 'node_count': serialized['node_count'],
128
+ 'nodes': nodes_array,
129
+ 'values': values_array
130
+ })
131
+
132
+ new_tree = DecisionTreeRegressor(max_depth=serialized['max_depth'], random_state=42)
133
+ new_tree.tree_ = tree_obj
134
+ new_tree.n_outputs_ = n_outputs
135
+
136
+ if _version_range_check(version_out, '0.21.3', '0.22.1'):
137
+ new_tree.n_features_ = n_features
138
+ elif _version_range_check(version_out, '0.23.0', '0.24.2'):
139
+ new_tree.n_features_ = n_features
140
+ new_tree.n_features_in_ = n_features
141
+ elif version_out >= '1.0.0':
142
+ new_tree.n_features_in_ = n_features
143
+
144
+ return new_tree
@@ -0,0 +1,46 @@
1
+ Metadata-Version: 2.4
2
+ Name: sklearn-migrator
3
+ Version: 0.1.0
4
+ Summary: A utility to migrate scikit-learn models between versions.
5
+ Home-page: https://github.com/anvaldes/sklearn_migrator
6
+ Author: Alberto Valdés
7
+ Author-email: alberto.valdes.gonzalez.96.2@gmail.com
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: scikit-learn>=0.21.3
15
+ Dynamic: author
16
+ Dynamic: author-email
17
+ Dynamic: classifier
18
+ Dynamic: description
19
+ Dynamic: description-content-type
20
+ Dynamic: home-page
21
+ Dynamic: license-file
22
+ Dynamic: requires-dist
23
+ Dynamic: requires-python
24
+ Dynamic: summary
25
+
26
+ # sklearn_migrator
27
+
28
+ **sklearn_migrator** is a lightweight Python library that helps you serialize and migrate scikit-learn models across different versions.
29
+ It is especially useful when models are trained using older versions of scikit-learn and need to be loaded or interpreted in newer environments.
30
+
31
+ ---
32
+
33
+ ## 🔧 Features
34
+
35
+ - ✅ Serialize `DecisionTreeRegressor` models safely
36
+ - 🔄 Add compatibility adjustments for internal node structures
37
+ - 🧪 Facilitate model version migration without retraining
38
+ - 📦 Easy to install and use
39
+
40
+ ---
41
+
42
+ ## 📦 Installation
43
+
44
+ ```bash
45
+ pip install git+https://github.com/tu_usuario/sklearn_migrator.git
46
+ ```
@@ -0,0 +1,12 @@
1
+ LICENSE
2
+ README.md
3
+ setup.py
4
+ sklearn_migrator/__init__.py
5
+ sklearn_migrator.egg-info/PKG-INFO
6
+ sklearn_migrator.egg-info/SOURCES.txt
7
+ sklearn_migrator.egg-info/dependency_links.txt
8
+ sklearn_migrator.egg-info/requires.txt
9
+ sklearn_migrator.egg-info/top_level.txt
10
+ sklearn_migrator/regression/__init__.py
11
+ sklearn_migrator/regression/decision_tree_reg.py
12
+ tests/test_decision_tree_reg.py
@@ -0,0 +1 @@
1
+ scikit-learn>=0.21.3
@@ -0,0 +1 @@
1
+ sklearn_migrator
@@ -0,0 +1,29 @@
1
+ from sklearn.tree import DecisionTreeRegressor
2
+ from sklearn_migrator.regression.decision_tree_reg import serialize_decision_tree_reg
3
+ from sklearn_migrator.regression.decision_tree_reg import deserialize_decision_tree_reg
4
+ import sklearn
5
+
6
+ def test_decision_tree_reg():
7
+ X = [[0], [1], [2], [3]]
8
+ y = [0, 1, 2, 3]
9
+
10
+ model = DecisionTreeRegressor()
11
+ model.fit(X, y)
12
+
13
+ version = sklearn.__version__
14
+ result = serialize_decision_tree_reg(model, version_in=version)
15
+ new_model = deserialize_decision_tree_reg(result, version_out=version)
16
+
17
+ assert isinstance(result, dict)
18
+
19
+ assert 'n_features_in' in result
20
+ assert 'n_features' in result
21
+ assert 'n_outputs' in result
22
+ assert 'n_classes' in result
23
+ assert 'serialized_tree' in result
24
+ assert 'version_sklearn_in' in result
25
+
26
+ assert result['serialized_tree']['max_depth'] > 0
27
+ assert isinstance(result['serialized_tree']['nodes'], list)
28
+
29
+ assert isinstance(new_model, DecisionTreeRegressor)