PyPI - id3-classification - Versions diffs - 0.2.0__py3-none-any.whl - Mend

id3-classification 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

id3_classification-0.2.0.dist-info/METADATA +16 -0
id3_classification-0.2.0.dist-info/RECORD +7 -0
id3_classification-0.2.0.dist-info/WHEEL +5 -0
id3_classification-0.2.0.dist-info/licenses/LICENSE +0 -0
id3_classification-0.2.0.dist-info/top_level.txt +1 -0
myid3/__init__.py +1 -0
myid3/id3.py +163 -0

id3_classification-0.2.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,16 @@
+Metadata-Version: 2.4
+Name: id3-classification
+Version: 0.2.0
+Summary: Reusable ID3 Decision Tree Classifier
+Author: nanashi
+License: MIT
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy
+Requires-Dist: pandas
+Dynamic: license-file
+# ID3 Classifier
+Reusable ID3 Decision Tree Algorithm.

id3_classification-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+id3_classification-0.2.0.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+myid3/__init__.py,sha256=ICY5EHtGpg6LpP9ds8E2cuOuW5BWaRte1he9kEMKzCk,30
+myid3/id3.py,sha256=QLOmYQ4HH2fWy0yN0W8oE03Tc6PjbQ3uzG43lz29row,3350
+id3_classification-0.2.0.dist-info/METADATA,sha256=TIqxGOY0yOBaWXXF3hvQvx8yPmBzvy-agqiTDEM07sI,361
+id3_classification-0.2.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
+id3_classification-0.2.0.dist-info/top_level.txt,sha256=rABkUX8bSJrfUweW7BUmElF9KsRSAkSLbAuHYOsUSf0,6
+id3_classification-0.2.0.dist-info/RECORD,,

id3_classification-0.2.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

id3_classification-0.2.0.dist-info/licenses/LICENSE ADDED Viewed

File without changes

id3_classification-0.2.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ myid3

myid3/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .id3 import ID3Classifier

myid3/id3.py ADDED Viewed

@@ -0,0 +1,163 @@
+import numpy as np
+import math
+class ID3Classifier:
+    def __init__(self):
+        self.tree = None
+        self.target = None
+        self.features = None
+    # ===========================
+    # ENTROPY
+    # ===========================
+    def entropy(self, data):
+        values, counts = np.unique(data, return_counts=True)
+        ent = 0
+        for count in counts:
+            prob = count / sum(counts)
+            if prob > 0:
+                ent -= prob * math.log2(prob)
+        return ent
+    # ===========================
+    # INFORMATION GAIN
+    # ===========================
+    def information_gain(self, df, feature):
+        total_entropy = self.entropy(df[self.target])
+        values, counts = np.unique(df[feature], return_counts=True)
+        weighted_entropy = 0
+        for i in range(len(values)):
+            subset = df[df[feature] == values[i]]
+            subset_entropy = self.entropy(subset[self.target])
+            weight = counts[i] / sum(counts)
+            weighted_entropy += weight * subset_entropy
+        return total_entropy - weighted_entropy
+    # ===========================
+    # ID3 TREE BUILDER
+    # ===========================
+    def _id3(self, data, features):
+        # Pure node
+        if len(np.unique(data[self.target])) == 1:
+            return np.unique(data[self.target])[0]
+        # No features left
+        if len(features) == 0:
+            return data[self.target].mode()[0]
+        igs = [self.information_gain(data, f) for f in features]
+        best = features[np.argmax(igs)]
+        tree = {best: {}}
+        remaining = [f for f in features if f != best]
+        for value in np.unique(data[best]):
+            subset = data[data[best] == value]
+            if len(subset) == 0:
+                tree[best][value] = data[self.target].mode()[0]
+            else:
+                tree[best][value] = self._id3(subset, remaining)
+        return tree
+    # ===========================
+    # FIT MODEL
+    # ===========================
+    def fit(self, df, features, target):
+        self.target = target
+        self.features = features
+        self.tree = self._id3(df, features)
+    # ===========================
+    # PRINT TREE
+    # ===========================
+    def print_tree(self, tree=None, indent=""):
+        if tree is None:
+            tree = self.tree
+        if not isinstance(tree, dict):
+            print(indent + "→", tree)
+            return
+        for key in tree:
+            print(indent + key)
+            for value in tree[key]:
+                print(indent + " ├─", value)
+                self.print_tree(tree[key][value], indent + " │   ")
+    # ===========================
+    # SINGLE PREDICT
+    # ===========================
+    def _predict(self, tree, sample):
+        if not isinstance(tree, dict):
+            return tree
+        root = list(tree.keys())[0]
+        value = sample[root]
+        if value not in tree[root]:
+            return None
+        return self._predict(tree[root][value], sample)
+    # ===========================
+    # MULTIPLE PREDICT
+    # ===========================
+    def predict(self, df):
+        preds = []
+        for _, row in df.iterrows():
+            preds.append(self._predict(self.tree, row))
+        return np.array(preds)