skweights 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skweights/__init__.py +4 -0
- skweights/weighter.py +55 -0
- skweights/wrapper.py +117 -0
- skweights-0.1.0.dist-info/METADATA +17 -0
- skweights-0.1.0.dist-info/RECORD +7 -0
- skweights-0.1.0.dist-info/WHEEL +5 -0
- skweights-0.1.0.dist-info/top_level.txt +1 -0
skweights/__init__.py
ADDED
skweights/weighter.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from sklearn.base import BaseEstimator, TransformerMixin
|
|
4
|
+
from sklearn.utils.validation import check_is_fitted
|
|
5
|
+
|
|
6
|
+
class FeatureWeighter(BaseEstimator, TransformerMixin):
|
|
7
|
+
"""
|
|
8
|
+
A transformer that applies a priori scalar weights to specific features.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
weights : dict, default=None
|
|
13
|
+
A dictionary mapping column names (for DataFrames) or
|
|
14
|
+
column indices (for NumPy arrays) to their scalar weights.
|
|
15
|
+
"""
|
|
16
|
+
def __init__(self, weights=None):
|
|
17
|
+
self.weights = weights
|
|
18
|
+
|
|
19
|
+
def fit(self, X, y=None):
|
|
20
|
+
"""
|
|
21
|
+
Stateless fit method. Validates initialization and returns self.
|
|
22
|
+
"""
|
|
23
|
+
# _is_fitted is a scikit-learn convention to prove the model
|
|
24
|
+
# has passed through the fit step of a pipeline.
|
|
25
|
+
self._is_fitted_ = True
|
|
26
|
+
return self
|
|
27
|
+
|
|
28
|
+
def transform(self, X):
|
|
29
|
+
"""
|
|
30
|
+
Applies the scalar weights to the defined features.
|
|
31
|
+
"""
|
|
32
|
+
# 1. State Validation
|
|
33
|
+
check_is_fitted(self, '_is_fitted_')
|
|
34
|
+
|
|
35
|
+
# 2. Bypass Logic
|
|
36
|
+
if self.weights is None or not self.weights:
|
|
37
|
+
return X
|
|
38
|
+
|
|
39
|
+
# 3. Immutability Principle
|
|
40
|
+
X_transformed = X.copy()
|
|
41
|
+
|
|
42
|
+
# 4. Type Routing & Matrix Operations
|
|
43
|
+
if isinstance(X_transformed, pd.DataFrame):
|
|
44
|
+
for col, weight in self.weights.items():
|
|
45
|
+
if col in X_transformed.columns:
|
|
46
|
+
X_transformed[col] = X_transformed[col] * weight
|
|
47
|
+
|
|
48
|
+
elif isinstance(X_transformed, np.ndarray):
|
|
49
|
+
for col_idx, weight in self.weights.items():
|
|
50
|
+
if isinstance(col_idx, int) and 0 <= col_idx < X_transformed.shape[1]:
|
|
51
|
+
X_transformed[:, col_idx] = X_transformed[:, col_idx] * weight
|
|
52
|
+
else:
|
|
53
|
+
raise TypeError("Input must be a Pandas DataFrame or a NumPy array.")
|
|
54
|
+
|
|
55
|
+
return X_transformed
|
skweights/wrapper.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import operator
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from sklearn.base import BaseEstimator, MetaEstimatorMixin, clone
|
|
5
|
+
from sklearn.utils.validation import check_is_fitted
|
|
6
|
+
|
|
7
|
+
# Safe mapping to avoid using eval() in production
|
|
8
|
+
OPERATOR_MAP = {
|
|
9
|
+
'==': operator.eq,
|
|
10
|
+
'!=': operator.ne,
|
|
11
|
+
'>': operator.gt,
|
|
12
|
+
'<': operator.lt,
|
|
13
|
+
'>=': operator.ge,
|
|
14
|
+
'<=': operator.le
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
class RuleConstraintWrapper(BaseEstimator, MetaEstimatorMixin):
|
|
18
|
+
"""
|
|
19
|
+
A meta-estimator that evaluates deterministic business rules before
|
|
20
|
+
delegating predictions to an underlying supervised machine learning model.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
estimator : estimator object
|
|
25
|
+
The base scikit-learn estimator (e.g., LogisticRegression).
|
|
26
|
+
rules : list of dict
|
|
27
|
+
A cascade of rules. Format:
|
|
28
|
+
[{'column': 'age', 'operator': '<', 'value': 18, 'outcome': 0}]
|
|
29
|
+
"""
|
|
30
|
+
def __init__(self, estimator, rules=None):
|
|
31
|
+
self.estimator = estimator
|
|
32
|
+
self.rules = rules if rules is not None else []
|
|
33
|
+
|
|
34
|
+
def _apply_rules(self, X):
|
|
35
|
+
"""
|
|
36
|
+
Internal method to evaluate the rule cascade using vectorized masking.
|
|
37
|
+
Returns a boolean mask of handled rows and their hardcoded predictions.
|
|
38
|
+
"""
|
|
39
|
+
n_samples = X.shape[0]
|
|
40
|
+
handled_mask = np.zeros(n_samples, dtype=bool)
|
|
41
|
+
rule_predictions = np.empty(n_samples, dtype=object)
|
|
42
|
+
|
|
43
|
+
if not self.rules:
|
|
44
|
+
return handled_mask, rule_predictions
|
|
45
|
+
|
|
46
|
+
for rule in self.rules:
|
|
47
|
+
col = rule['column']
|
|
48
|
+
op_func = OPERATOR_MAP[rule['operator']]
|
|
49
|
+
val = rule['value']
|
|
50
|
+
outcome = rule['outcome']
|
|
51
|
+
|
|
52
|
+
# Extract the column data safely
|
|
53
|
+
if isinstance(X, pd.DataFrame):
|
|
54
|
+
if col not in X.columns:
|
|
55
|
+
continue
|
|
56
|
+
col_data = X[col].values
|
|
57
|
+
else:
|
|
58
|
+
# Assume col is an integer index if X is a NumPy array
|
|
59
|
+
if not isinstance(col, int) or col >= X.shape[1]:
|
|
60
|
+
continue
|
|
61
|
+
col_data = X[:, col]
|
|
62
|
+
|
|
63
|
+
# Vectorized condition check (only on rows not yet handled)
|
|
64
|
+
condition_mask = op_func(col_data, val)
|
|
65
|
+
|
|
66
|
+
# Find rows that meet the condition AND haven't been handled by previous rules
|
|
67
|
+
active_mask = condition_mask & ~handled_mask
|
|
68
|
+
|
|
69
|
+
# Apply the outcome and update the handled mask
|
|
70
|
+
rule_predictions[active_mask] = outcome
|
|
71
|
+
handled_mask |= active_mask
|
|
72
|
+
|
|
73
|
+
return handled_mask, rule_predictions
|
|
74
|
+
|
|
75
|
+
def fit(self, X, y):
|
|
76
|
+
"""
|
|
77
|
+
Filters out data that triggers deterministic rules, then fits the
|
|
78
|
+
underlying estimator only on the remaining valid data.
|
|
79
|
+
"""
|
|
80
|
+
self.estimator_ = clone(self.estimator)
|
|
81
|
+
|
|
82
|
+
handled_mask, _ = self._apply_rules(X)
|
|
83
|
+
|
|
84
|
+
# We only train the model on data that passes the rules
|
|
85
|
+
X_passed = X[~handled_mask]
|
|
86
|
+
y_passed = np.array(y)[~handled_mask]
|
|
87
|
+
|
|
88
|
+
if len(X_passed) == 0:
|
|
89
|
+
raise ValueError("All training samples were filtered out by the business rules.")
|
|
90
|
+
|
|
91
|
+
self.estimator_.fit(X_passed, y_passed)
|
|
92
|
+
self._is_fitted_ = True
|
|
93
|
+
|
|
94
|
+
return self
|
|
95
|
+
|
|
96
|
+
def predict(self, X):
|
|
97
|
+
"""
|
|
98
|
+
Predicts outcomes by first applying rules, then delegating the remainder.
|
|
99
|
+
"""
|
|
100
|
+
check_is_fitted(self, '_is_fitted_')
|
|
101
|
+
|
|
102
|
+
n_samples = X.shape[0]
|
|
103
|
+
final_predictions = np.empty(n_samples, dtype=object)
|
|
104
|
+
|
|
105
|
+
# 1. Evaluate rules (The Gatekeeper)
|
|
106
|
+
handled_mask, rule_preds = self._apply_rules(X)
|
|
107
|
+
final_predictions[handled_mask] = rule_preds[handled_mask]
|
|
108
|
+
|
|
109
|
+
# 2. Delegate remainder to the underlying model
|
|
110
|
+
unhandled_mask = ~handled_mask
|
|
111
|
+
if np.any(unhandled_mask):
|
|
112
|
+
# Pass only the unhandled rows to the trained estimator
|
|
113
|
+
X_unhandled = X[unhandled_mask] if isinstance(X, pd.DataFrame) else X[unhandled_mask, :]
|
|
114
|
+
model_preds = self.estimator_.predict(X_unhandled)
|
|
115
|
+
final_predictions[unhandled_mask] = model_preds
|
|
116
|
+
|
|
117
|
+
return final_predictions
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: skweights
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Scikit-learn compatible meta-estimators for heuristic business rules and feature weighting.
|
|
5
|
+
Author-email: Aron Kipkurui <aronidengeno@gmail.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/wizard-hash2/skweights
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/wizard-hash2/skweights/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: scikit-learn>=1.0.0
|
|
16
|
+
Requires-Dist: pandas>=1.0.0
|
|
17
|
+
Requires-Dist: numpy>=1.20.0
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
skweights/__init__.py,sha256=UYuQ4yLE_svvpdKXhogeMtFxqSixKleYYqR73wIaL5E,139
|
|
2
|
+
skweights/weighter.py,sha256=-bhAYNhq7_cJCKv3nS_hefMXP-AmSrN8ypA7H8SLC2A,2021
|
|
3
|
+
skweights/wrapper.py,sha256=EsRfy8hqpm3ZyojdEMJo-votEf1SJ7_Ni_wixOo5bzg,4302
|
|
4
|
+
skweights-0.1.0.dist-info/METADATA,sha256=g3ZSPKxru2Rshk5H4qwV1HqitrsfctQ4tiL5bHMs39A,781
|
|
5
|
+
skweights-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
6
|
+
skweights-0.1.0.dist-info/top_level.txt,sha256=8PGJQi3n3gM1dVIQYLFjPoKAAwh-1bIsoWIY_za4-xY,10
|
|
7
|
+
skweights-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
skweights
|