PyPI - skweights - Versions diffs - 0.1.0__tar.gz - Mend

skweights 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

skweights-0.1.0/PKG-INFO +17 -0
skweights-0.1.0/README.md +0 -0
skweights-0.1.0/pyproject.toml +29 -0
skweights-0.1.0/setup.cfg +4 -0
skweights-0.1.0/skweights/__init__.py +4 -0
skweights-0.1.0/skweights/weighter.py +55 -0
skweights-0.1.0/skweights/wrapper.py +117 -0
skweights-0.1.0/skweights.egg-info/PKG-INFO +17 -0
skweights-0.1.0/skweights.egg-info/SOURCES.txt +12 -0
skweights-0.1.0/skweights.egg-info/dependency_links.txt +1 -0
skweights-0.1.0/skweights.egg-info/requires.txt +3 -0
skweights-0.1.0/skweights.egg-info/top_level.txt +1 -0
skweights-0.1.0/tests/test_weighter.py +94 -0
skweights-0.1.0/tests/test_wrapper.py +74 -0

skweights-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,17 @@
+Metadata-Version: 2.4
+Name: skweights
+Version: 0.1.0
+Summary: Scikit-learn compatible meta-estimators for heuristic business rules and feature weighting.
+Author-email: Aron Kipkurui <aronidengeno@gmail.com>
+Project-URL: Homepage, https://github.com/wizard-hash2/skweights
+Project-URL: Bug Tracker, https://github.com/wizard-hash2/skweights/issues
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: scikit-learn>=1.0.0
+Requires-Dist: pandas>=1.0.0
+Requires-Dist: numpy>=1.20.0

skweights-0.1.0/README.md ADDED Viewed

File without changes

skweights-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,29 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "skweights" #  the name
+version = "0.1.0"
+authors = [
+  { name="Aron Kipkurui", email="aronidengeno@gmail.com" },
+]
+description = "Scikit-learn compatible meta-estimators for heuristic business rules and feature weighting."
+readme = "README.md"
+requires-python = ">=3.8"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Intended Audience :: Developers",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence"
+]
+dependencies = [
+    "scikit-learn>=1.0.0",
+    "pandas>=1.0.0",
+    "numpy>=1.20.0"
+]
+[project.urls]
+"Homepage" = "https://github.com/wizard-hash2/skweights"
+"Bug Tracker" = "https://github.com/wizard-hash2/skweights/issues"

skweights-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

skweights-0.1.0/skweights/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .weighter import FeatureWeighter
+from .wrapper import RuleConstraintWrapper
+__all__ = ["FeatureWeighter", "RuleConstraintWrapper"]

skweights-0.1.0/skweights/weighter.py ADDED Viewed

@@ -0,0 +1,55 @@
+import numpy as np
+import pandas as pd
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.utils.validation import check_is_fitted
+class FeatureWeighter(BaseEstimator, TransformerMixin):
+    """
+    A transformer that applies a priori scalar weights to specific features.
+    Parameters
+    ----------
+    weights : dict, default=None
+        A dictionary mapping column names (for DataFrames) or
+        column indices (for NumPy arrays) to their scalar weights.
+    """
+    def __init__(self, weights=None):
+        self.weights = weights
+    def fit(self, X, y=None):
+        """
+        Stateless fit method. Validates initialization and returns self.
+        """
+        # _is_fitted is a scikit-learn convention to prove the model
+        # has passed through the fit step of a pipeline.
+        self._is_fitted_ = True
+        return self
+    def transform(self, X):
+        """
+        Applies the scalar weights to the defined features.
+        """
+        # 1. State Validation
+        check_is_fitted(self, '_is_fitted_')
+        # 2. Bypass Logic
+        if self.weights is None or not self.weights:
+            return X
+        # 3. Immutability Principle
+        X_transformed = X.copy()
+        # 4. Type Routing & Matrix Operations
+        if isinstance(X_transformed, pd.DataFrame):
+            for col, weight in self.weights.items():
+                if col in X_transformed.columns:
+                    X_transformed[col] = X_transformed[col] * weight
+        elif isinstance(X_transformed, np.ndarray):
+            for col_idx, weight in self.weights.items():
+                if isinstance(col_idx, int) and 0 <= col_idx < X_transformed.shape[1]:
+                    X_transformed[:, col_idx] = X_transformed[:, col_idx] * weight
+        else:
+            raise TypeError("Input must be a Pandas DataFrame or a NumPy array.")
+        return X_transformed

skweights-0.1.0/skweights/wrapper.py ADDED Viewed

@@ -0,0 +1,117 @@
+import operator
+import numpy as np
+import pandas as pd
+from sklearn.base import BaseEstimator, MetaEstimatorMixin, clone
+from sklearn.utils.validation import check_is_fitted
+# Safe mapping to avoid using eval() in production
+OPERATOR_MAP = {
+    '==': operator.eq,
+    '!=': operator.ne,
+    '>': operator.gt,
+    '<': operator.lt,
+    '>=': operator.ge,
+    '<=': operator.le
+}
+class RuleConstraintWrapper(BaseEstimator, MetaEstimatorMixin):
+    """
+    A meta-estimator that evaluates deterministic business rules before
+    delegating predictions to an underlying supervised machine learning model.
+    Parameters
+    ----------
+    estimator : estimator object
+        The base scikit-learn estimator (e.g., LogisticRegression).
+    rules : list of dict
+        A cascade of rules. Format:
+        [{'column': 'age', 'operator': '<', 'value': 18, 'outcome': 0}]
+    """
+    def __init__(self, estimator, rules=None):
+        self.estimator = estimator
+        self.rules = rules if rules is not None else []
+    def _apply_rules(self, X):
+        """
+        Internal method to evaluate the rule cascade using vectorized masking.
+        Returns a boolean mask of handled rows and their hardcoded predictions.
+        """
+        n_samples = X.shape[0]
+        handled_mask = np.zeros(n_samples, dtype=bool)
+        rule_predictions = np.empty(n_samples, dtype=object)
+        if not self.rules:
+            return handled_mask, rule_predictions
+        for rule in self.rules:
+            col = rule['column']
+            op_func = OPERATOR_MAP[rule['operator']]
+            val = rule['value']
+            outcome = rule['outcome']
+            # Extract the column data safely
+            if isinstance(X, pd.DataFrame):
+                if col not in X.columns:
+                    continue
+                col_data = X[col].values
+            else:
+                # Assume col is an integer index if X is a NumPy array
+                if not isinstance(col, int) or col >= X.shape[1]:
+                    continue
+                col_data = X[:, col]
+            # Vectorized condition check (only on rows not yet handled)
+            condition_mask = op_func(col_data, val)
+            # Find rows that meet the condition AND haven't been handled by previous rules
+            active_mask = condition_mask & ~handled_mask
+            # Apply the outcome and update the handled mask
+            rule_predictions[active_mask] = outcome
+            handled_mask |= active_mask
+        return handled_mask, rule_predictions
+    def fit(self, X, y):
+        """
+        Filters out data that triggers deterministic rules, then fits the
+        underlying estimator only on the remaining valid data.
+        """
+        self.estimator_ = clone(self.estimator)
+        handled_mask, _ = self._apply_rules(X)
+        # We only train the model on data that passes the rules
+        X_passed = X[~handled_mask]
+        y_passed = np.array(y)[~handled_mask]
+        if len(X_passed) == 0:
+            raise ValueError("All training samples were filtered out by the business rules.")
+        self.estimator_.fit(X_passed, y_passed)
+        self._is_fitted_ = True
+        return self
+    def predict(self, X):
+        """
+        Predicts outcomes by first applying rules, then delegating the remainder.
+        """
+        check_is_fitted(self, '_is_fitted_')
+        n_samples = X.shape[0]
+        final_predictions = np.empty(n_samples, dtype=object)
+        # 1. Evaluate rules (The Gatekeeper)
+        handled_mask, rule_preds = self._apply_rules(X)
+        final_predictions[handled_mask] = rule_preds[handled_mask]
+        # 2. Delegate remainder to the underlying model
+        unhandled_mask = ~handled_mask
+        if np.any(unhandled_mask):
+            # Pass only the unhandled rows to the trained estimator
+            X_unhandled = X[unhandled_mask] if isinstance(X, pd.DataFrame) else X[unhandled_mask, :]
+            model_preds = self.estimator_.predict(X_unhandled)
+            final_predictions[unhandled_mask] = model_preds
+        return final_predictions

skweights-0.1.0/skweights.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,17 @@
+Metadata-Version: 2.4
+Name: skweights
+Version: 0.1.0
+Summary: Scikit-learn compatible meta-estimators for heuristic business rules and feature weighting.
+Author-email: Aron Kipkurui <aronidengeno@gmail.com>
+Project-URL: Homepage, https://github.com/wizard-hash2/skweights
+Project-URL: Bug Tracker, https://github.com/wizard-hash2/skweights/issues
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: scikit-learn>=1.0.0
+Requires-Dist: pandas>=1.0.0
+Requires-Dist: numpy>=1.20.0

skweights-0.1.0/skweights.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,12 @@
+README.md
+pyproject.toml
+skweights/__init__.py
+skweights/weighter.py
+skweights/wrapper.py
+skweights.egg-info/PKG-INFO
+skweights.egg-info/SOURCES.txt
+skweights.egg-info/dependency_links.txt
+skweights.egg-info/requires.txt
+skweights.egg-info/top_level.txt
+tests/test_weighter.py
+tests/test_wrapper.py

skweights-0.1.0/skweights.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

skweights-0.1.0/skweights.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,3 @@
+scikit-learn>=1.0.0
+pandas>=1.0.0
+numpy>=1.20.0

skweights-0.1.0/skweights.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ skweights

skweights-0.1.0/tests/test_weighter.py ADDED Viewed

@@ -0,0 +1,94 @@
+import pytest
+import numpy as np
+import pandas as pd
+from sklearn.pipeline import Pipeline
+from sklearn.linear_model import LogisticRegression
+from skweights.weighter import FeatureWeighter
+# ---------------------------------------------------------
+# Test 1: Mathematical Accuracy & Pandas Integration
+# ---------------------------------------------------------
+def test_pandas_dataframe_weighting():
+    # Setup original data
+    df = pd.DataFrame({
+        'years_experience': [1.0, 2.0, 3.0],
+        'age': [25.0, 30.0, 35.0],
+        'github_commits': [100.0, 200.0, 300.0]
+    })
+    # We want to double experience and halve commits. Age should be untouched.
+    weights = {'years_experience': 2.0, 'github_commits': 0.5}
+    weighter = FeatureWeighter(weights=weights)
+    # Execute
+    df_transformed = weighter.fit_transform(df)
+    # Assertions
+    assert list(df_transformed['years_experience']) == [2.0, 4.0, 6.0], "Failed to multiply correctly."
+    assert list(df_transformed['github_commits']) == [50.0, 100.0, 150.0], "Failed to multiply fractional weight."
+    assert list(df_transformed['age']) == [25.0, 30.0, 35.0], "Untouched column was altered."
+# ---------------------------------------------------------
+# Test 2: NumPy Array Resilience
+# ---------------------------------------------------------
+def test_numpy_array_weighting():
+    # 3 rows, 3 columns
+    X = np.array([
+        [1.0, 25.0, 100.0],
+        [2.0, 30.0, 200.0],
+        [3.0, 35.0, 300.0]
+    ])
+    # Weight index 0 (col 1) by 2.0, index 2 (col 3) by 0.5
+    weights = {0: 2.0, 2: 0.5}
+    weighter = FeatureWeighter(weights=weights)
+    X_transformed = weighter.fit_transform(X)
+    # Assertions using np.testing for safe float comparisons
+    np.testing.assert_array_equal(X_transformed[:, 0], np.array([2.0, 4.0, 6.0]))
+    np.testing.assert_array_equal(X_transformed[:, 2], np.array([50.0, 100.0, 150.0]))
+    np.testing.assert_array_equal(X_transformed[:, 1], np.array([25.0, 30.0, 35.0]))
+# ---------------------------------------------------------
+# Test 3: The Immutability Principle
+# ---------------------------------------------------------
+def test_original_data_not_mutated():
+    df = pd.DataFrame({'feature_a': [10.0, 20.0]})
+    weighter = FeatureWeighter(weights={'feature_a': 5.0})
+    _ = weighter.fit_transform(df)
+    # The original dataframe should still have the original values
+    assert list(df['feature_a']) == [10.0, 20.0], "Original DataFrame was mutated in place!"
+# ---------------------------------------------------------
+# Test 4: Pipeline Passthrough (None or Empty Weights)
+# ---------------------------------------------------------
+def test_empty_weights_passthrough():
+    df = pd.DataFrame({'feature_a': [1.0, 2.0]})
+    # If a developer initiates it without weights, it should just return the data untouched
+    weighter = FeatureWeighter(weights=None)
+    df_transformed = weighter.fit_transform(df)
+    assert list(df_transformed['feature_a']) == [1.0, 2.0]
+# ---------------------------------------------------------
+# Test 5: End-to-End Pipeline Integration
+# ---------------------------------------------------------
+def test_pipeline_integration_runs():
+    X = pd.DataFrame({'feature_a': [1.0, 2.0, 3.0, 4.0], 'feature_b': [4.0, 3.0, 2.0, 1.0]})
+    y = np.array([0, 0, 1, 1])
+    # Build a standard Scikit-learn Pipeline
+    pipe = Pipeline([
+        ('weighter', FeatureWeighter(weights={'feature_a': 10.0})),
+        ('classifier', LogisticRegression())
+    ])
+    # If fit and predict execute without shape or type errors, integration works
+    pipe.fit(X, y)
+    predictions = pipe.predict(X)
+    assert len(predictions) == 4

skweights-0.1.0/tests/test_wrapper.py ADDED Viewed

@@ -0,0 +1,74 @@
+import pytest
+import numpy as np
+import pandas as pd
+from sklearn.linear_model import LogisticRegression
+from skweights.wrapper import RuleConstraintWrapper
+# ---------------------------------------------------------
+# Test 1: The Gatekeeper Intercept (Hard Constraint)
+# ---------------------------------------------------------
+def test_rule_cascade_blocking():
+    # 1. Setup the data
+    # Row 0 triggers the rule (laptop=0). Rows 1, 2, and 3 pass.
+    df = pd.DataFrame({
+        'laptop_status': [0, 1, 1, 1],
+        'experience': [5.0, 2.0, 10.0, 1.0]
+    })
+    # We need multiple classes (0 and 1) in the passing rows
+    # so LogisticRegression doesn't crash during fit().
+    y = np.array([1, 1, 0, 0])
+    # 2. Define the business logic
+    rules = [{'column': 'laptop_status', 'operator': '==', 'value': 0, 'outcome': 0}]
+    # 3. Initialize and fit the wrapper
+    wrapper = RuleConstraintWrapper(estimator=LogisticRegression(), rules=rules)
+    wrapper.fit(df, y)
+    # 4. Predict
+    predictions = wrapper.predict(df)
+    # 5. Assertions
+    # The first row MUST be 0 because of the rule, completely ignoring
+    # the fact that in the original `y` array it was labeled as 1.
+    assert predictions[0] == 0
+    # The output shape must perfectly match the input shape
+    assert len(predictions) == 4
+# ---------------------------------------------------------
+# Test 2: NumPy Array Fallback
+# ---------------------------------------------------------
+def test_numpy_rule_evaluation():
+    # Matrix where column 0 is the constraint feature
+    X = np.array([
+        [0.0, 5.0], # Blocked
+        [1.0, 2.0], # Passed
+        [1.0, 10.0] # Passed
+    ])
+    y = np.array([1, 0, 1])
+    # Rule checks index 0 instead of a column string
+    rules = [{'column': 0, 'operator': '==', 'value': 0.0, 'outcome': 0}]
+    wrapper = RuleConstraintWrapper(estimator=LogisticRegression(), rules=rules)
+    wrapper.fit(X, y)
+    predictions = wrapper.predict(X)
+    assert predictions[0] == 0
+    assert len(predictions) == 3
+# ---------------------------------------------------------
+# Test 3: Empty Rules (Standard Estimator Behavior)
+# ---------------------------------------------------------
+def test_empty_rules_pass_through():
+    X = pd.DataFrame({'feature': [1, 2, 3, 4]})
+    y = np.array([0, 0, 1, 1])
+    # Passing no rules should just make it act like a normal LogisticRegression
+    wrapper = RuleConstraintWrapper(estimator=LogisticRegression(), rules=[])
+    wrapper.fit(X, y)
+    predictions = wrapper.predict(X)
+    assert len(predictions) == 4