orbital 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. orbital-0.2.2/LICENSE.md +21 -0
  2. orbital-0.2.2/PKG-INFO +243 -0
  3. orbital-0.2.2/README.md +197 -0
  4. orbital-0.2.2/pyproject.toml +116 -0
  5. orbital-0.2.2/setup.cfg +4 -0
  6. orbital-0.2.2/src/orbital/__init__.py +14 -0
  7. orbital-0.2.2/src/orbital/_utils/__init__.py +0 -0
  8. orbital-0.2.2/src/orbital/_utils/onnx.py +39 -0
  9. orbital-0.2.2/src/orbital/_utils/repr_pipeline.py +79 -0
  10. orbital-0.2.2/src/orbital/ast.py +135 -0
  11. orbital-0.2.2/src/orbital/sql.py +81 -0
  12. orbital-0.2.2/src/orbital/translate.py +223 -0
  13. orbital-0.2.2/src/orbital/translation/__init__.py +1 -0
  14. orbital-0.2.2/src/orbital/translation/optimizer.py +313 -0
  15. orbital-0.2.2/src/orbital/translation/steps/__init__.py +1 -0
  16. orbital-0.2.2/src/orbital/translation/steps/add.py +67 -0
  17. orbital-0.2.2/src/orbital/translation/steps/argmax.py +79 -0
  18. orbital-0.2.2/src/orbital/translation/steps/arrayfeatureextractor.py +82 -0
  19. orbital-0.2.2/src/orbital/translation/steps/cast.py +100 -0
  20. orbital-0.2.2/src/orbital/translation/steps/concat.py +105 -0
  21. orbital-0.2.2/src/orbital/translation/steps/div.py +90 -0
  22. orbital-0.2.2/src/orbital/translation/steps/gather.py +57 -0
  23. orbital-0.2.2/src/orbital/translation/steps/identity.py +19 -0
  24. orbital-0.2.2/src/orbital/translation/steps/imputer.py +37 -0
  25. orbital-0.2.2/src/orbital/translation/steps/labelencoder.py +75 -0
  26. orbital-0.2.2/src/orbital/translation/steps/linearclass.py +107 -0
  27. orbital-0.2.2/src/orbital/translation/steps/linearreg.py +86 -0
  28. orbital-0.2.2/src/orbital/translation/steps/matmul.py +151 -0
  29. orbital-0.2.2/src/orbital/translation/steps/mul.py +67 -0
  30. orbital-0.2.2/src/orbital/translation/steps/onehotencoder.py +48 -0
  31. orbital-0.2.2/src/orbital/translation/steps/reshape.py +48 -0
  32. orbital-0.2.2/src/orbital/translation/steps/scaler.py +69 -0
  33. orbital-0.2.2/src/orbital/translation/steps/softmax.py +74 -0
  34. orbital-0.2.2/src/orbital/translation/steps/sub.py +70 -0
  35. orbital-0.2.2/src/orbital/translation/steps/trees/__init__.py +6 -0
  36. orbital-0.2.2/src/orbital/translation/steps/trees/classifier.py +212 -0
  37. orbital-0.2.2/src/orbital/translation/steps/trees/regressor.py +93 -0
  38. orbital-0.2.2/src/orbital/translation/steps/trees/tree.py +175 -0
  39. orbital-0.2.2/src/orbital/translation/steps/where.py +76 -0
  40. orbital-0.2.2/src/orbital/translation/steps/zipmap.py +54 -0
  41. orbital-0.2.2/src/orbital/translation/translator.py +128 -0
  42. orbital-0.2.2/src/orbital/translation/variables.py +204 -0
  43. orbital-0.2.2/src/orbital/types.py +222 -0
  44. orbital-0.2.2/src/orbital.egg-info/PKG-INFO +243 -0
  45. orbital-0.2.2/src/orbital.egg-info/SOURCES.txt +52 -0
  46. orbital-0.2.2/src/orbital.egg-info/dependency_links.txt +1 -0
  47. orbital-0.2.2/src/orbital.egg-info/requires.txt +17 -0
  48. orbital-0.2.2/src/orbital.egg-info/top_level.txt +1 -0
  49. orbital-0.2.2/tests/test_ast.py +90 -0
  50. orbital-0.2.2/tests/test_pipeline_e2e.py +560 -0
  51. orbital-0.2.2/tests/test_pipeline_str.py +53 -0
  52. orbital-0.2.2/tests/test_sql.py +133 -0
  53. orbital-0.2.2/tests/test_translation.py +42 -0
  54. orbital-0.2.2/tests/test_types.py +78 -0
@@ -0,0 +1,21 @@
1
+ # MIT License
2
+
3
+ Copyright (c) 2024 orbital authors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
orbital-0.2.2/PKG-INFO ADDED
@@ -0,0 +1,243 @@
1
+ Metadata-Version: 2.4
2
+ Name: orbital
3
+ Version: 0.2.2
4
+ Summary: Allow SKLearn predictions to run on database systems in pure SQL.
5
+ Author: Posit Software PBC
6
+ Author-email: Alessandro Molina <alessandro.molina@posit.co>
7
+ License-Expression: MIT
8
+ Keywords: database,machine learning,sql
9
+ Classifier: Operating System :: Microsoft :: Windows
10
+ Classifier: Operating System :: POSIX
11
+ Classifier: Operating System :: Unix
12
+ Classifier: Operating System :: MacOS
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Intended Audience :: Science/Research
20
+ Classifier: Intended Audience :: Developers
21
+ Classifier: Topic :: Software Development
22
+ Classifier: Topic :: Scientific/Engineering
23
+ Classifier: Topic :: Database
24
+ Classifier: Programming Language :: Python
25
+ Classifier: Programming Language :: SQL
26
+ Requires-Python: >=3.9
27
+ Description-Content-Type: text/markdown
28
+ License-File: LICENSE.md
29
+ Requires-Dist: packaging
30
+ Requires-Dist: scikit-learn
31
+ Requires-Dist: skl2onnx~=1.19.1
32
+ Requires-Dist: onnx~=1.18.0
33
+ Requires-Dist: ibis-framework<11.0.0
34
+ Provides-Extra: test
35
+ Requires-Dist: pandas; extra == "test"
36
+ Requires-Dist: polars-lts-cpu; extra == "test"
37
+ Requires-Dist: pyarrow>=19.0.1; extra == "test"
38
+ Requires-Dist: pyarrow-hotfix; extra == "test"
39
+ Requires-Dist: ibis-framework[duckdb]>=5.1.0; extra == "test"
40
+ Requires-Dist: pytest-cov>=5.0.0; extra == "test"
41
+ Requires-Dist: pytest>=8.3.2; extra == "test"
42
+ Requires-Dist: sqlalchemy; extra == "test"
43
+ Requires-Dist: psycopg2-binary; extra == "test"
44
+ Requires-Dist: duckdb; extra == "test"
45
+ Dynamic: license-file
46
+
47
+ # orbital
48
+
49
+ Convert SKLearn pipelines into SQL queries for execution in a database
50
+ without the need for a Python environment.
51
+
52
+ See `examples` directory for [example pipelines](https://github.com/posit-dev/orbital/tree/main/examples) and [Documentation](https://posit-dev.github.io/orbital/)
53
+
54
+ **Warning**:
55
+
56
+ This is a work in progress.
57
+ You might encounter bugs or missing features.
58
+
59
+ **Note**:
60
+
61
+ Not all transformations and models can be represented as SQL queries,
62
+ so orbital might not be able to implement the specific pipeline you are using.
63
+
64
+ ## Getting Started
65
+
66
+ Install orbital:
67
+
68
+ ```bash
69
+ $ pip install orbital
70
+ ```
71
+
72
+ Prepare some data:
73
+
74
+ ```python
75
+ from sklearn.datasets import load_iris
76
+ from sklearn.model_selection import train_test_split
77
+
78
+ COLUMNS = ["sepal.length", "sepal.width", "petal.length", "petal.width"]
79
+
80
+ iris = load_iris(as_frame=True)
81
+ iris_x = iris.data.set_axis(COLUMNS, axis=1)
82
+
83
+ # SQL and orbital don't like dots in column names, replace them with underscores
84
+ iris_x.columns = COLUMNS = [cname.replace(".", "_") for cname in COLUMNS]
85
+
86
+ X_train, X_test, y_train, y_test = train_test_split(
87
+ iris_x, iris.target, test_size=0.2, random_state=42
88
+ )
89
+ ```
90
+
91
+ Define a Scikit-Learn pipeline and train it:
92
+
93
+ ```python
94
+ from sklearn.compose import ColumnTransformer
95
+ from sklearn.linear_model import LinearRegression
96
+ from sklearn.pipeline import Pipeline
97
+ from sklearn.preprocessing import StandardScaler
98
+
99
+ pipeline = Pipeline(
100
+ [
101
+ ("preprocess", ColumnTransformer([("scaler", StandardScaler(with_std=False), COLUMNS)],
102
+ remainder="passthrough")),
103
+ ("linear_regression", LinearRegression()),
104
+ ]
105
+ )
106
+ pipeline.fit(X_train, y_train)
107
+ ```
108
+
109
+ Convert the pipeline to orbital:
110
+
111
+ ```python
112
+ import orbital
113
+ import orbital.types
114
+
115
+ orbital_pipeline = orbital.parse_pipeline(pipeline, features={
116
+ "sepal_length": orbital.types.DoubleColumnType(),
117
+ "sepal_width": orbital.types.DoubleColumnType(),
118
+ "petal_length": orbital.types.DoubleColumnType(),
119
+ "petal_width": orbital.types.DoubleColumnType(),
120
+ })
121
+ ```
122
+
123
+ You can print the pipeline to see the result:
124
+
125
+ ```python
126
+ >>> print(orbital_pipeline)
127
+
128
+ ParsedPipeline(
129
+ features={
130
+ sepal_length: DoubleColumnType()
131
+ sepal_width: DoubleColumnType()
132
+ petal_length: DoubleColumnType()
133
+ petal_width: DoubleColumnType()
134
+ },
135
+ steps=[
136
+ merged_columns=Concat(
137
+ inputs: sepal_length, sepal_width, petal_length, petal_width,
138
+ attributes:
139
+ axis=1
140
+ )
141
+ variable1=Sub(
142
+ inputs: merged_columns, Su_Subcst=[5.809166666666666, 3.0616666666666665, 3.7266666666666666, 1.18333333...,
143
+ attributes:
144
+ )
145
+ multiplied=MatMul(
146
+ inputs: variable1, coef=[-0.11633479416518255, -0.05977785171980231, 0.25491374699772246, 0.5475959...,
147
+ attributes:
148
+ )
149
+ resh=Add(
150
+ inputs: multiplied, intercept=[0.9916666666666668],
151
+ attributes:
152
+ )
153
+ variable=Reshape(
154
+ inputs: resh, shape_tensor=[-1, 1],
155
+ attributes:
156
+ )
157
+ ],
158
+ )
159
+ ```
160
+
161
+ Now we can generate the SQL from the pipeline:
162
+
163
+ ```python
164
+ sql = orbital.export_sql("DATA_TABLE", orbital_pipeline, dialect="duckdb")
165
+ ```
166
+
167
+ And check the resulting query:
168
+
169
+ ```python
170
+ >>> print(sql)
171
+
172
+ SELECT ("t0"."sepal_length" - 5.809166666666666) * -0.11633479416518255 + 0.9916666666666668 +
173
+ ("t0"."sepal_width" - 3.0616666666666665) * -0.05977785171980231 +
174
+ ("t0"."petal_length" - 3.7266666666666666) * 0.25491374699772246 +
175
+ ("t0"."petal_width" - 1.1833333333333333) * 0.5475959809777828
176
+ AS "variable" FROM "DATA_TABLE" AS "t0"
177
+ ```
178
+
179
+ Once the SQL is generate, you can use it to run the pipeline on a
180
+ database. From here on the SQL can be exported and reused in other
181
+ places:
182
+
183
+ ```python
184
+ >>> print("\nPrediction with SQL")
185
+ >>> duckdb.register("DATA_TABLE", X_test)
186
+ >>> print(duckdb.sql(sql).df()["variable"][:5].to_numpy())
187
+
188
+ Prediction with SQL
189
+ [ 1.23071715 -0.04010441 2.21970287 1.34966889 1.28429336]
190
+ ```
191
+
192
+ We can verify that the prediction matches the one done by Scikit-Learn
193
+ by running the scikitlearn pipeline on the same set of data:
194
+
195
+ ```python
196
+ >>> print("\nPrediction with SciKit-Learn")
197
+ >>> print(pipeline.predict(X_test)[:5])
198
+
199
+ Prediction with SciKit-Learn
200
+ [ 1.23071715 -0.04010441 2.21970287 1.34966889 1.28429336 ]
201
+ ```
202
+
203
+ ## Supported Models
204
+
205
+ orbital currently supports the following models:
206
+
207
+ - Linear Regression
208
+ - Logistic Regression
209
+ - Lasso Regression
210
+ - Elastic Net
211
+ - Decision Tree Regressor
212
+ - Decision Tree Classifier
213
+ - Random Forest Classifier
214
+ - Gradient Boosting Regressor
215
+ - Gradient Boosting Classifier
216
+
217
+ # Testing
218
+
219
+ Setup testing environment:
220
+
221
+ ```bash
222
+ $ uv sync --no-dev --extra test
223
+ ```
224
+
225
+ Run Tests:
226
+
227
+ ```bash
228
+ $ uv run pytest -v
229
+ ```
230
+
231
+ Try Examples:
232
+
233
+ ```bash
234
+ $ uv run examples/pipeline_lineareg.py
235
+ ```
236
+
237
+ # Development
238
+
239
+ Setup a development environment:
240
+
241
+ ```bash
242
+ $ uv sync
243
+ ```
@@ -0,0 +1,197 @@
1
+ # orbital
2
+
3
+ Convert SKLearn pipelines into SQL queries for execution in a database
4
+ without the need for a Python environment.
5
+
6
+ See `examples` directory for [example pipelines](https://github.com/posit-dev/orbital/tree/main/examples) and [Documentation](https://posit-dev.github.io/orbital/)
7
+
8
+ **Warning**:
9
+
10
+ This is a work in progress.
11
+ You might encounter bugs or missing features.
12
+
13
+ **Note**:
14
+
15
+ Not all transformations and models can be represented as SQL queries,
16
+ so orbital might not be able to implement the specific pipeline you are using.
17
+
18
+ ## Getting Started
19
+
20
+ Install orbital:
21
+
22
+ ```bash
23
+ $ pip install orbital
24
+ ```
25
+
26
+ Prepare some data:
27
+
28
+ ```python
29
+ from sklearn.datasets import load_iris
30
+ from sklearn.model_selection import train_test_split
31
+
32
+ COLUMNS = ["sepal.length", "sepal.width", "petal.length", "petal.width"]
33
+
34
+ iris = load_iris(as_frame=True)
35
+ iris_x = iris.data.set_axis(COLUMNS, axis=1)
36
+
37
+ # SQL and orbital don't like dots in column names, replace them with underscores
38
+ iris_x.columns = COLUMNS = [cname.replace(".", "_") for cname in COLUMNS]
39
+
40
+ X_train, X_test, y_train, y_test = train_test_split(
41
+ iris_x, iris.target, test_size=0.2, random_state=42
42
+ )
43
+ ```
44
+
45
+ Define a Scikit-Learn pipeline and train it:
46
+
47
+ ```python
48
+ from sklearn.compose import ColumnTransformer
49
+ from sklearn.linear_model import LinearRegression
50
+ from sklearn.pipeline import Pipeline
51
+ from sklearn.preprocessing import StandardScaler
52
+
53
+ pipeline = Pipeline(
54
+ [
55
+ ("preprocess", ColumnTransformer([("scaler", StandardScaler(with_std=False), COLUMNS)],
56
+ remainder="passthrough")),
57
+ ("linear_regression", LinearRegression()),
58
+ ]
59
+ )
60
+ pipeline.fit(X_train, y_train)
61
+ ```
62
+
63
+ Convert the pipeline to orbital:
64
+
65
+ ```python
66
+ import orbital
67
+ import orbital.types
68
+
69
+ orbital_pipeline = orbital.parse_pipeline(pipeline, features={
70
+ "sepal_length": orbital.types.DoubleColumnType(),
71
+ "sepal_width": orbital.types.DoubleColumnType(),
72
+ "petal_length": orbital.types.DoubleColumnType(),
73
+ "petal_width": orbital.types.DoubleColumnType(),
74
+ })
75
+ ```
76
+
77
+ You can print the pipeline to see the result:
78
+
79
+ ```python
80
+ >>> print(orbital_pipeline)
81
+
82
+ ParsedPipeline(
83
+ features={
84
+ sepal_length: DoubleColumnType()
85
+ sepal_width: DoubleColumnType()
86
+ petal_length: DoubleColumnType()
87
+ petal_width: DoubleColumnType()
88
+ },
89
+ steps=[
90
+ merged_columns=Concat(
91
+ inputs: sepal_length, sepal_width, petal_length, petal_width,
92
+ attributes:
93
+ axis=1
94
+ )
95
+ variable1=Sub(
96
+ inputs: merged_columns, Su_Subcst=[5.809166666666666, 3.0616666666666665, 3.7266666666666666, 1.18333333...,
97
+ attributes:
98
+ )
99
+ multiplied=MatMul(
100
+ inputs: variable1, coef=[-0.11633479416518255, -0.05977785171980231, 0.25491374699772246, 0.5475959...,
101
+ attributes:
102
+ )
103
+ resh=Add(
104
+ inputs: multiplied, intercept=[0.9916666666666668],
105
+ attributes:
106
+ )
107
+ variable=Reshape(
108
+ inputs: resh, shape_tensor=[-1, 1],
109
+ attributes:
110
+ )
111
+ ],
112
+ )
113
+ ```
114
+
115
+ Now we can generate the SQL from the pipeline:
116
+
117
+ ```python
118
+ sql = orbital.export_sql("DATA_TABLE", orbital_pipeline, dialect="duckdb")
119
+ ```
120
+
121
+ And check the resulting query:
122
+
123
+ ```python
124
+ >>> print(sql)
125
+
126
+ SELECT ("t0"."sepal_length" - 5.809166666666666) * -0.11633479416518255 + 0.9916666666666668 +
127
+ ("t0"."sepal_width" - 3.0616666666666665) * -0.05977785171980231 +
128
+ ("t0"."petal_length" - 3.7266666666666666) * 0.25491374699772246 +
129
+ ("t0"."petal_width" - 1.1833333333333333) * 0.5475959809777828
130
+ AS "variable" FROM "DATA_TABLE" AS "t0"
131
+ ```
132
+
133
+ Once the SQL is generate, you can use it to run the pipeline on a
134
+ database. From here on the SQL can be exported and reused in other
135
+ places:
136
+
137
+ ```python
138
+ >>> print("\nPrediction with SQL")
139
+ >>> duckdb.register("DATA_TABLE", X_test)
140
+ >>> print(duckdb.sql(sql).df()["variable"][:5].to_numpy())
141
+
142
+ Prediction with SQL
143
+ [ 1.23071715 -0.04010441 2.21970287 1.34966889 1.28429336]
144
+ ```
145
+
146
+ We can verify that the prediction matches the one done by Scikit-Learn
147
+ by running the scikitlearn pipeline on the same set of data:
148
+
149
+ ```python
150
+ >>> print("\nPrediction with SciKit-Learn")
151
+ >>> print(pipeline.predict(X_test)[:5])
152
+
153
+ Prediction with SciKit-Learn
154
+ [ 1.23071715 -0.04010441 2.21970287 1.34966889 1.28429336 ]
155
+ ```
156
+
157
+ ## Supported Models
158
+
159
+ orbital currently supports the following models:
160
+
161
+ - Linear Regression
162
+ - Logistic Regression
163
+ - Lasso Regression
164
+ - Elastic Net
165
+ - Decision Tree Regressor
166
+ - Decision Tree Classifier
167
+ - Random Forest Classifier
168
+ - Gradient Boosting Regressor
169
+ - Gradient Boosting Classifier
170
+
171
+ # Testing
172
+
173
+ Setup testing environment:
174
+
175
+ ```bash
176
+ $ uv sync --no-dev --extra test
177
+ ```
178
+
179
+ Run Tests:
180
+
181
+ ```bash
182
+ $ uv run pytest -v
183
+ ```
184
+
185
+ Try Examples:
186
+
187
+ ```bash
188
+ $ uv run examples/pipeline_lineareg.py
189
+ ```
190
+
191
+ # Development
192
+
193
+ Setup a development environment:
194
+
195
+ ```bash
196
+ $ uv sync
197
+ ```
@@ -0,0 +1,116 @@
1
+ [build-system]
2
+ requires = [
3
+ "setuptools>=77.0.0",
4
+ "wheel"
5
+ ]
6
+ build-backend = "setuptools.build_meta"
7
+
8
+ [project]
9
+ name = "orbital"
10
+ version = "0.2.2"
11
+ description = "Allow SKLearn predictions to run on database systems in pure SQL."
12
+ keywords = ["database", "machine learning", "sql"]
13
+ readme = { file = "README.md", content-type = "text/markdown" }
14
+ license = "MIT"
15
+ authors = [
16
+ { name = "Alessandro Molina", email = "alessandro.molina@posit.co" },
17
+ { name = "Posit Software PBC" }
18
+ ]
19
+ classifiers = [
20
+ "Operating System :: Microsoft :: Windows",
21
+ "Operating System :: POSIX",
22
+ "Operating System :: Unix",
23
+ "Operating System :: MacOS",
24
+ "Programming Language :: Python :: 3",
25
+ "Programming Language :: Python :: 3.9",
26
+ "Programming Language :: Python :: 3.10",
27
+ "Programming Language :: Python :: 3.11",
28
+ "Programming Language :: Python :: 3.12",
29
+ "Programming Language :: Python :: 3.13",
30
+ "Intended Audience :: Science/Research",
31
+ "Intended Audience :: Developers",
32
+ "Topic :: Software Development",
33
+ "Topic :: Scientific/Engineering",
34
+ "Topic :: Database",
35
+ "Programming Language :: Python",
36
+ "Programming Language :: SQL",
37
+ ]
38
+
39
+ requires-python = ">=3.9"
40
+ dependencies = [
41
+ "packaging", # somehow required by skl2onnx/operator_converters
42
+ "scikit-learn",
43
+ "skl2onnx~=1.19.1",
44
+ "onnx~=1.18.0",
45
+ "ibis-framework<11.0.0",
46
+ ]
47
+
48
+ [project.optional-dependencies]
49
+ test = [
50
+ "pandas",
51
+ "polars-lts-cpu", # For testing we prefer compatibility over performance.
52
+ "pyarrow>=19.0.1",
53
+ "pyarrow-hotfix", # Ibis seems to demand this even on versions without hotfixes
54
+ "ibis-framework[duckdb]>=5.1.0",
55
+ "pytest-cov>=5.0.0",
56
+ "pytest>=8.3.2",
57
+ "sqlalchemy",
58
+ "psycopg2-binary",
59
+ "duckdb",
60
+ ]
61
+
62
+ [tool.uv]
63
+ dev-dependencies = [
64
+ # Need to repeat test dependencies
65
+ # uv doesn't allow to merge them with extras
66
+ # and we don't want users to know that they need to run
67
+ # uv sync with extras.
68
+ "pandas",
69
+ "polars-lts-cpu",
70
+ "pyarrow",
71
+ "pyarrow-hotfix",
72
+ "pytest-cov>=5.0.0",
73
+ "pytest>=8.3.2",
74
+ "ibis-framework[duckdb]>=5.1.0",
75
+ # Necessary for development workflow
76
+ "mypy>=1.11.2",
77
+ "pre-commit",
78
+ "ruff>=0.6.3",
79
+ "mkdocs-material",
80
+ "mkdocstrings[python]",
81
+ "pydot",
82
+ "onnxruntime",
83
+ "onnxscript",
84
+ "sqlalchemy",
85
+ "psycopg2-binary",
86
+ "duckdb",
87
+ ]
88
+
89
+
90
+ [tool.setuptools.packages.find]
91
+ where = ["src"]
92
+ exclude = ["tests"]
93
+
94
+ [tool.pytest.ini_options]
95
+ addopts = "--doctest-modules --cov=src --cov-report term-missing"
96
+ testpaths = ["tests"]
97
+
98
+ [tool.mypy]
99
+ ignore_missing_imports = true
100
+ mypy_path = "$MYPY_CONFIG_FILE_DIR/src"
101
+ packages = ["orbital"]
102
+
103
+ [tool.ruff]
104
+ target-version = "py39"
105
+ src = ["src"]
106
+ extend-exclude = ["docs", "tests", "examples", "proxypackage"]
107
+ output-format = "grouped"
108
+ lint.select = [
109
+ "E4", "E7", "E9",
110
+ "F",
111
+ "I001",
112
+ "D100", "D101", "D102", "D103",
113
+ "D104", "D107", "D417",
114
+ "ANN001", "ANN201", "ANN202",
115
+ "ANN204", "ANN205", "ANN206"
116
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,14 @@
1
+ """orbital, translate scikit-learn pipelines into SQL queries
2
+
3
+ orbital is a library for translating **scikit-learn** pipelines
4
+ into **SQL queries** and **Ibis expressions**.
5
+
6
+ It provides a way to execute machine learning models on databases without
7
+ the need for a python runtime environment.
8
+ """
9
+
10
+ from .ast import parse_pipeline
11
+ from .sql import export_sql
12
+ from .translate import ResultsProjection, translate
13
+
14
+ __all__ = ["parse_pipeline", "translate", "export_sql", "ResultsProjection"]
File without changes
@@ -0,0 +1,39 @@
1
+ import typing
2
+
3
+ import onnx
4
+ import onnx.helper
5
+
6
+ ListVariableTypes = typing.Union[list[int], list[float], list[str]]
7
+ VariableTypes = typing.Union[float, int, str, ListVariableTypes]
8
+
9
+
10
+ def get_initializer_data(var: onnx.TensorProto) -> VariableTypes:
11
+ """Given a constant initializer, return its value"""
12
+ attr_name = onnx.helper.tensor_dtype_to_field(var.data_type)
13
+ values = list(getattr(var, attr_name))
14
+ dimensions = getattr(var, "dims", None)
15
+
16
+ if not dimensions and len(values) == 1:
17
+ # If there are no dimensions, it's a scalar
18
+ # and we should return the single value
19
+ return values[0]
20
+ return values
21
+
22
+
23
+ def get_attr_value(attr: onnx.AttributeProto) -> VariableTypes:
24
+ """Given an attribute, return its value"""
25
+ # TODO: Check if it can be replaced with onnx.helper.get_attribute_value
26
+ if attr.type == attr.INTS:
27
+ return list(attr.ints)
28
+ elif attr.type == attr.FLOATS:
29
+ return list(attr.floats)
30
+ elif attr.type == attr.STRINGS:
31
+ return [s.decode("utf-8") if isinstance(s, bytes) else s for s in attr.strings]
32
+ elif attr.type == attr.INT:
33
+ return attr.i
34
+ elif attr.type == attr.FLOAT:
35
+ return attr.f
36
+ elif attr.type == attr.STRING:
37
+ return attr.s.decode("utf-8") if isinstance(attr.s, bytes) else attr.s
38
+ else:
39
+ raise ValueError(f"Unsupported attribute type: {attr.type}")