skweights 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
skweights/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .weighter import FeatureWeighter
2
+ from .wrapper import RuleConstraintWrapper
3
+
4
+ __all__ = ["FeatureWeighter", "RuleConstraintWrapper"]
skweights/weighter.py ADDED
@@ -0,0 +1,55 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.base import BaseEstimator, TransformerMixin
4
+ from sklearn.utils.validation import check_is_fitted
5
+
6
+ class FeatureWeighter(BaseEstimator, TransformerMixin):
7
+ """
8
+ A transformer that applies a priori scalar weights to specific features.
9
+
10
+ Parameters
11
+ ----------
12
+ weights : dict, default=None
13
+ A dictionary mapping column names (for DataFrames) or
14
+ column indices (for NumPy arrays) to their scalar weights.
15
+ """
16
+ def __init__(self, weights=None):
17
+ self.weights = weights
18
+
19
+ def fit(self, X, y=None):
20
+ """
21
+ Stateless fit method. Validates initialization and returns self.
22
+ """
23
+ # _is_fitted is a scikit-learn convention to prove the model
24
+ # has passed through the fit step of a pipeline.
25
+ self._is_fitted_ = True
26
+ return self
27
+
28
+ def transform(self, X):
29
+ """
30
+ Applies the scalar weights to the defined features.
31
+ """
32
+ # 1. State Validation
33
+ check_is_fitted(self, '_is_fitted_')
34
+
35
+ # 2. Bypass Logic
36
+ if self.weights is None or not self.weights:
37
+ return X
38
+
39
+ # 3. Immutability Principle
40
+ X_transformed = X.copy()
41
+
42
+ # 4. Type Routing & Matrix Operations
43
+ if isinstance(X_transformed, pd.DataFrame):
44
+ for col, weight in self.weights.items():
45
+ if col in X_transformed.columns:
46
+ X_transformed[col] = X_transformed[col] * weight
47
+
48
+ elif isinstance(X_transformed, np.ndarray):
49
+ for col_idx, weight in self.weights.items():
50
+ if isinstance(col_idx, int) and 0 <= col_idx < X_transformed.shape[1]:
51
+ X_transformed[:, col_idx] = X_transformed[:, col_idx] * weight
52
+ else:
53
+ raise TypeError("Input must be a Pandas DataFrame or a NumPy array.")
54
+
55
+ return X_transformed
skweights/wrapper.py ADDED
@@ -0,0 +1,117 @@
1
+ import operator
2
+ import numpy as np
3
+ import pandas as pd
4
+ from sklearn.base import BaseEstimator, MetaEstimatorMixin, clone
5
+ from sklearn.utils.validation import check_is_fitted
6
+
7
+ # Safe mapping to avoid using eval() in production
8
+ OPERATOR_MAP = {
9
+ '==': operator.eq,
10
+ '!=': operator.ne,
11
+ '>': operator.gt,
12
+ '<': operator.lt,
13
+ '>=': operator.ge,
14
+ '<=': operator.le
15
+ }
16
+
17
+ class RuleConstraintWrapper(BaseEstimator, MetaEstimatorMixin):
18
+ """
19
+ A meta-estimator that evaluates deterministic business rules before
20
+ delegating predictions to an underlying supervised machine learning model.
21
+
22
+ Parameters
23
+ ----------
24
+ estimator : estimator object
25
+ The base scikit-learn estimator (e.g., LogisticRegression).
26
+ rules : list of dict
27
+ A cascade of rules. Format:
28
+ [{'column': 'age', 'operator': '<', 'value': 18, 'outcome': 0}]
29
+ """
30
+ def __init__(self, estimator, rules=None):
31
+ self.estimator = estimator
32
+ self.rules = rules if rules is not None else []
33
+
34
+ def _apply_rules(self, X):
35
+ """
36
+ Internal method to evaluate the rule cascade using vectorized masking.
37
+ Returns a boolean mask of handled rows and their hardcoded predictions.
38
+ """
39
+ n_samples = X.shape[0]
40
+ handled_mask = np.zeros(n_samples, dtype=bool)
41
+ rule_predictions = np.empty(n_samples, dtype=object)
42
+
43
+ if not self.rules:
44
+ return handled_mask, rule_predictions
45
+
46
+ for rule in self.rules:
47
+ col = rule['column']
48
+ op_func = OPERATOR_MAP[rule['operator']]
49
+ val = rule['value']
50
+ outcome = rule['outcome']
51
+
52
+ # Extract the column data safely
53
+ if isinstance(X, pd.DataFrame):
54
+ if col not in X.columns:
55
+ continue
56
+ col_data = X[col].values
57
+ else:
58
+ # Assume col is an integer index if X is a NumPy array
59
+ if not isinstance(col, int) or col >= X.shape[1]:
60
+ continue
61
+ col_data = X[:, col]
62
+
63
+ # Vectorized condition check (only on rows not yet handled)
64
+ condition_mask = op_func(col_data, val)
65
+
66
+ # Find rows that meet the condition AND haven't been handled by previous rules
67
+ active_mask = condition_mask & ~handled_mask
68
+
69
+ # Apply the outcome and update the handled mask
70
+ rule_predictions[active_mask] = outcome
71
+ handled_mask |= active_mask
72
+
73
+ return handled_mask, rule_predictions
74
+
75
+ def fit(self, X, y):
76
+ """
77
+ Filters out data that triggers deterministic rules, then fits the
78
+ underlying estimator only on the remaining valid data.
79
+ """
80
+ self.estimator_ = clone(self.estimator)
81
+
82
+ handled_mask, _ = self._apply_rules(X)
83
+
84
+ # We only train the model on data that passes the rules
85
+ X_passed = X[~handled_mask]
86
+ y_passed = np.array(y)[~handled_mask]
87
+
88
+ if len(X_passed) == 0:
89
+ raise ValueError("All training samples were filtered out by the business rules.")
90
+
91
+ self.estimator_.fit(X_passed, y_passed)
92
+ self._is_fitted_ = True
93
+
94
+ return self
95
+
96
+ def predict(self, X):
97
+ """
98
+ Predicts outcomes by first applying rules, then delegating the remainder.
99
+ """
100
+ check_is_fitted(self, '_is_fitted_')
101
+
102
+ n_samples = X.shape[0]
103
+ final_predictions = np.empty(n_samples, dtype=object)
104
+
105
+ # 1. Evaluate rules (The Gatekeeper)
106
+ handled_mask, rule_preds = self._apply_rules(X)
107
+ final_predictions[handled_mask] = rule_preds[handled_mask]
108
+
109
+ # 2. Delegate remainder to the underlying model
110
+ unhandled_mask = ~handled_mask
111
+ if np.any(unhandled_mask):
112
+ # Pass only the unhandled rows to the trained estimator
113
+ X_unhandled = X[unhandled_mask] if isinstance(X, pd.DataFrame) else X[unhandled_mask, :]
114
+ model_preds = self.estimator_.predict(X_unhandled)
115
+ final_predictions[unhandled_mask] = model_preds
116
+
117
+ return final_predictions
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: skweights
3
+ Version: 0.1.0
4
+ Summary: Scikit-learn compatible meta-estimators for heuristic business rules and feature weighting.
5
+ Author-email: Aron Kipkurui <aronidengeno@gmail.com>
6
+ Project-URL: Homepage, https://github.com/wizard-hash2/skweights
7
+ Project-URL: Bug Tracker, https://github.com/wizard-hash2/skweights/issues
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
13
+ Requires-Python: >=3.8
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: scikit-learn>=1.0.0
16
+ Requires-Dist: pandas>=1.0.0
17
+ Requires-Dist: numpy>=1.20.0
@@ -0,0 +1,7 @@
1
+ skweights/__init__.py,sha256=UYuQ4yLE_svvpdKXhogeMtFxqSixKleYYqR73wIaL5E,139
2
+ skweights/weighter.py,sha256=-bhAYNhq7_cJCKv3nS_hefMXP-AmSrN8ypA7H8SLC2A,2021
3
+ skweights/wrapper.py,sha256=EsRfy8hqpm3ZyojdEMJo-votEf1SJ7_Ni_wixOo5bzg,4302
4
+ skweights-0.1.0.dist-info/METADATA,sha256=g3ZSPKxru2Rshk5H4qwV1HqitrsfctQ4tiL5bHMs39A,781
5
+ skweights-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
6
+ skweights-0.1.0.dist-info/top_level.txt,sha256=8PGJQi3n3gM1dVIQYLFjPoKAAwh-1bIsoWIY_za4-xY,10
7
+ skweights-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ skweights