featurecanvas 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- featurecanvas-0.1.0/PKG-INFO +16 -0
- featurecanvas-0.1.0/README.md +0 -0
- featurecanvas-0.1.0/featurecanvas/__init__.py +4 -0
- featurecanvas-0.1.0/featurecanvas/client.py +252 -0
- featurecanvas-0.1.0/featurecanvas.egg-info/PKG-INFO +16 -0
- featurecanvas-0.1.0/featurecanvas.egg-info/SOURCES.txt +9 -0
- featurecanvas-0.1.0/featurecanvas.egg-info/dependency_links.txt +1 -0
- featurecanvas-0.1.0/featurecanvas.egg-info/requires.txt +1 -0
- featurecanvas-0.1.0/featurecanvas.egg-info/top_level.txt +1 -0
- featurecanvas-0.1.0/pyproject.toml +28 -0
- featurecanvas-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: featurecanvas
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python SDK for FeatureCanvas — the no-code feature engineering studio
|
|
5
|
+
Author-email: "G. Preetham Saxon" <gpreethamsaxon@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/GPREETHAMSAXON/FeatureCanvas
|
|
8
|
+
Project-URL: Documentation, https://github.com/GPREETHAMSAXON/FeatureCanvas#readme
|
|
9
|
+
Keywords: feature-engineering,machine-learning,data-science,featurecanvas
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
|
+
Requires-Python: >=3.9
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
Requires-Dist: requests>=2.31.0
|
|
File without changes
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""
|
|
2
|
+
FeatureCanvas Python SDK client.
|
|
3
|
+
|
|
4
|
+
Wraps the FeatureCanvas REST API so you can build feature pipelines in code,
|
|
5
|
+
run them, and export generated Python scripts — all without opening a browser.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import io
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import requests
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class FeatureCanvas:
|
|
17
|
+
"""Entry point. Create one per backend URL.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
base_url: URL of your deployed FeatureCanvas backend, e.g.
|
|
21
|
+
"https://featurecanvas.onrender.com"
|
|
22
|
+
timeout: Request timeout in seconds (default 60 — transforms on large
|
|
23
|
+
datasets can take a moment on a cold Render instance).
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, base_url: str, timeout: int = 60):
|
|
27
|
+
self.base_url = base_url.rstrip("/")
|
|
28
|
+
self.timeout = timeout
|
|
29
|
+
self._session = requests.Session()
|
|
30
|
+
|
|
31
|
+
def _get(self, path: str, **kwargs) -> Any:
|
|
32
|
+
resp = self._session.get(f"{self.base_url}{path}", timeout=self.timeout, **kwargs)
|
|
33
|
+
resp.raise_for_status()
|
|
34
|
+
return resp.json()
|
|
35
|
+
|
|
36
|
+
def _post(self, path: str, **kwargs) -> Any:
|
|
37
|
+
resp = self._session.post(f"{self.base_url}{path}", timeout=self.timeout, **kwargs)
|
|
38
|
+
resp.raise_for_status()
|
|
39
|
+
return resp.json()
|
|
40
|
+
|
|
41
|
+
def health(self) -> dict:
|
|
42
|
+
"""Check if the backend is reachable."""
|
|
43
|
+
return self._get("/api/health")
|
|
44
|
+
|
|
45
|
+
def transforms(self) -> list[dict]:
|
|
46
|
+
"""List all available transforms with their keys and param schemas."""
|
|
47
|
+
return self._get("/api/transforms")["transforms"]
|
|
48
|
+
|
|
49
|
+
def upload(self, path: str | Path) -> "Session":
|
|
50
|
+
"""Upload a CSV file and return a Session you can build transforms on.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
path: Local path to a .csv file.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Session object bound to the new session ID.
|
|
57
|
+
|
|
58
|
+
Example:
|
|
59
|
+
session = fc.upload("data/train.csv")
|
|
60
|
+
"""
|
|
61
|
+
p = Path(path)
|
|
62
|
+
if not p.exists():
|
|
63
|
+
raise FileNotFoundError(f"CSV not found: {path}")
|
|
64
|
+
with open(p, "rb") as f:
|
|
65
|
+
resp = self._session.post(
|
|
66
|
+
f"{self.base_url}/api/upload",
|
|
67
|
+
files={"file": (p.name, f, "text/csv")},
|
|
68
|
+
timeout=self.timeout,
|
|
69
|
+
)
|
|
70
|
+
resp.raise_for_status()
|
|
71
|
+
data = resp.json()
|
|
72
|
+
return Session(client=self, session_id=data["session_id"], _profile=data["profile"])
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class Session:
|
|
76
|
+
"""A FeatureCanvas session tied to one uploaded dataset.
|
|
77
|
+
|
|
78
|
+
Don't construct this directly — use ``FeatureCanvas.upload()``.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(self, client: FeatureCanvas, session_id: str, _profile: dict):
|
|
82
|
+
self._client = client
|
|
83
|
+
self.session_id = session_id
|
|
84
|
+
self._profile = _profile
|
|
85
|
+
self.target_col: str | None = None
|
|
86
|
+
|
|
87
|
+
def profile(self) -> dict:
|
|
88
|
+
"""Column stats for the raw uploaded data."""
|
|
89
|
+
return self._profile
|
|
90
|
+
|
|
91
|
+
def columns(self, parent_id: str | None = None) -> list[str]:
|
|
92
|
+
"""Columns available at a given point in the graph.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
parent_id: Backend node ID to scope to. None = raw upload columns.
|
|
96
|
+
"""
|
|
97
|
+
params = {}
|
|
98
|
+
if parent_id:
|
|
99
|
+
params["parent_id"] = parent_id
|
|
100
|
+
return self._client._get(f"/api/columns/{self.session_id}", params=params)["columns"]
|
|
101
|
+
|
|
102
|
+
def set_target(self, column: str, has_train_test_split: bool = False) -> "Session":
|
|
103
|
+
"""Set the target column for impact scoring and leakage detection.
|
|
104
|
+
|
|
105
|
+
Returns self so you can chain: ``session.set_target("churned").apply(...)``
|
|
106
|
+
"""
|
|
107
|
+
self._client._post("/api/target", json={
|
|
108
|
+
"session_id": self.session_id,
|
|
109
|
+
"target_column": column,
|
|
110
|
+
"has_train_test_split": has_train_test_split,
|
|
111
|
+
})
|
|
112
|
+
self.target_col = column
|
|
113
|
+
return self
|
|
114
|
+
|
|
115
|
+
def apply(
|
|
116
|
+
self,
|
|
117
|
+
transform_key: str,
|
|
118
|
+
parent_id: str | None = None,
|
|
119
|
+
**params,
|
|
120
|
+
) -> "Node":
|
|
121
|
+
"""Apply a transform and return a Node representing its output.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
transform_key: One of the transform keys from ``fc.transforms()``,
|
|
125
|
+
e.g. "log", "standard_scale", "frequency_encode".
|
|
126
|
+
parent_id: Backend node ID to attach to. None = attach to the
|
|
127
|
+
raw uploaded data (root of the graph).
|
|
128
|
+
**params: Transform parameters, e.g. ``column="age"``,
|
|
129
|
+
``n_bins=10``, ``strategy="quantile"``.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Node — call ``.apply()`` on it to chain further transforms,
|
|
133
|
+
``.profile()`` to inspect output stats, or ``.code()`` to export.
|
|
134
|
+
|
|
135
|
+
Example:
|
|
136
|
+
node = session.apply("log", column="monthly_income")
|
|
137
|
+
node2 = node.apply("standard_scale", column="monthly_income_log")
|
|
138
|
+
"""
|
|
139
|
+
result = self._client._post("/api/transform/apply", json={
|
|
140
|
+
"session_id": self.session_id,
|
|
141
|
+
"transform_key": transform_key,
|
|
142
|
+
"params": params,
|
|
143
|
+
"parent_id": parent_id,
|
|
144
|
+
})
|
|
145
|
+
return Node(
|
|
146
|
+
session=self,
|
|
147
|
+
node_id=result["node_id"],
|
|
148
|
+
transform_key=transform_key,
|
|
149
|
+
params=params,
|
|
150
|
+
output_columns=result["output_columns"],
|
|
151
|
+
_profile=result["profile"],
|
|
152
|
+
_leakage=result["leakage_findings"],
|
|
153
|
+
_quick_scores=result.get("quick_scores", {}),
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class Node:
|
|
158
|
+
"""A single applied transform node in the pipeline graph.
|
|
159
|
+
|
|
160
|
+
Chain further transforms with ``.apply()``, or read results with
|
|
161
|
+
``.profile()``, ``.leakage()``, ``.code()``, ``.scores()``.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
def __init__(
|
|
165
|
+
self,
|
|
166
|
+
session: Session,
|
|
167
|
+
node_id: str,
|
|
168
|
+
transform_key: str,
|
|
169
|
+
params: dict,
|
|
170
|
+
output_columns: list[str],
|
|
171
|
+
_profile: dict,
|
|
172
|
+
_leakage: list[dict],
|
|
173
|
+
_quick_scores: dict,
|
|
174
|
+
):
|
|
175
|
+
self._session = session
|
|
176
|
+
self.node_id = node_id
|
|
177
|
+
self.transform_key = transform_key
|
|
178
|
+
self.params = params
|
|
179
|
+
self.output_columns = output_columns
|
|
180
|
+
self._profile = _profile
|
|
181
|
+
self._leakage = _leakage
|
|
182
|
+
self._quick_scores = _quick_scores
|
|
183
|
+
|
|
184
|
+
def apply(self, transform_key: str, **params) -> "Node":
|
|
185
|
+
"""Chain a new transform onto this node's output.
|
|
186
|
+
|
|
187
|
+
This node becomes the parent — the new transform only sees columns
|
|
188
|
+
that exist at this point in the graph, not sibling branches.
|
|
189
|
+
|
|
190
|
+
Example:
|
|
191
|
+
log_node = session.apply("log", column="income")
|
|
192
|
+
scaled = log_node.apply("standard_scale", column="income_log")
|
|
193
|
+
"""
|
|
194
|
+
return self._session.apply(
|
|
195
|
+
transform_key=transform_key,
|
|
196
|
+
parent_id=self.node_id,
|
|
197
|
+
**params,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
def profile(self) -> dict:
|
|
201
|
+
"""Column stats for this node's output dataframe."""
|
|
202
|
+
return self._profile
|
|
203
|
+
|
|
204
|
+
def columns(self) -> list[str]:
|
|
205
|
+
"""Column names available at this node's output."""
|
|
206
|
+
return [c["name"] for c in self._profile["columns"]]
|
|
207
|
+
|
|
208
|
+
def leakage(self) -> list[dict]:
|
|
209
|
+
"""Leakage findings scoped to this node's branch."""
|
|
210
|
+
return self._leakage
|
|
211
|
+
|
|
212
|
+
def scores(self) -> dict:
|
|
213
|
+
"""Quick MI/correlation scores for each output column against the target."""
|
|
214
|
+
return self._quick_scores
|
|
215
|
+
|
|
216
|
+
def has_leakage(self, severity: str = "high") -> bool:
|
|
217
|
+
"""True if any leakage finding at or above the given severity exists."""
|
|
218
|
+
order = {"low": 0, "medium": 1, "high": 2}
|
|
219
|
+
threshold = order.get(severity, 2)
|
|
220
|
+
return any(order.get(f["severity"], 0) >= threshold for f in self._leakage)
|
|
221
|
+
|
|
222
|
+
def code(self) -> str:
|
|
223
|
+
"""Export the Python script for this node's entire branch — ready to
|
|
224
|
+
paste into a notebook or production pipeline with no FeatureCanvas dependency."""
|
|
225
|
+
resp = self._session._client._session.get(
|
|
226
|
+
f"{self._session._client.base_url}/api/export/code/{self._session.session_id}",
|
|
227
|
+
params={"node_id": self.node_id},
|
|
228
|
+
timeout=self._session._client.timeout,
|
|
229
|
+
)
|
|
230
|
+
resp.raise_for_status()
|
|
231
|
+
return resp.text
|
|
232
|
+
|
|
233
|
+
def spec(self) -> dict:
|
|
234
|
+
"""Export the portable JSON spec for this node's branch."""
|
|
235
|
+
return self._session._client._get(
|
|
236
|
+
f"/api/export/spec/{self._session.session_id}",
|
|
237
|
+
params={"node_id": self.node_id},
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
def remove(self) -> None:
|
|
241
|
+
"""Remove this node and all its descendants from the session graph."""
|
|
242
|
+
self._session._client._post("/api/transform/remove", json={
|
|
243
|
+
"session_id": self._session.session_id,
|
|
244
|
+
"node_id": self.node_id,
|
|
245
|
+
})
|
|
246
|
+
|
|
247
|
+
def __repr__(self) -> str:
|
|
248
|
+
return (
|
|
249
|
+
f"Node(transform={self.transform_key!r}, "
|
|
250
|
+
f"output_columns={self.output_columns!r}, "
|
|
251
|
+
f"leakage_findings={len(self._leakage)})"
|
|
252
|
+
)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: featurecanvas
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python SDK for FeatureCanvas — the no-code feature engineering studio
|
|
5
|
+
Author-email: "G. Preetham Saxon" <gpreethamsaxon@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/GPREETHAMSAXON/FeatureCanvas
|
|
8
|
+
Project-URL: Documentation, https://github.com/GPREETHAMSAXON/FeatureCanvas#readme
|
|
9
|
+
Keywords: feature-engineering,machine-learning,data-science,featurecanvas
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
|
+
Requires-Python: >=3.9
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
Requires-Dist: requests>=2.31.0
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
featurecanvas/__init__.py
|
|
4
|
+
featurecanvas/client.py
|
|
5
|
+
featurecanvas.egg-info/PKG-INFO
|
|
6
|
+
featurecanvas.egg-info/SOURCES.txt
|
|
7
|
+
featurecanvas.egg-info/dependency_links.txt
|
|
8
|
+
featurecanvas.egg-info/requires.txt
|
|
9
|
+
featurecanvas.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
requests>=2.31.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
featurecanvas
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "featurecanvas"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Python SDK for FeatureCanvas — the no-code feature engineering studio"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
authors = [{ name = "G. Preetham Saxon", email = "gpreethamsaxon@gmail.com" }]
|
|
12
|
+
requires-python = ">=3.9"
|
|
13
|
+
dependencies = ["requests>=2.31.0"]
|
|
14
|
+
keywords = ["feature-engineering", "machine-learning", "data-science", "featurecanvas"]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Intended Audience :: Science/Research",
|
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[project.urls]
|
|
23
|
+
Homepage = "https://github.com/GPREETHAMSAXON/FeatureCanvas"
|
|
24
|
+
Documentation = "https://github.com/GPREETHAMSAXON/FeatureCanvas#readme"
|
|
25
|
+
|
|
26
|
+
[tool.setuptools.packages.find]
|
|
27
|
+
where = ["."]
|
|
28
|
+
include = ["featurecanvas*"]
|