PyPI - id3-classification - Versions diffs - 0.2.0__tar.gz - Mend

id3-classification 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

id3_classification-0.2.0/LICENSE ADDED Viewed

File without changes

id3_classification-0.2.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,16 @@
+Metadata-Version: 2.4
+Name: id3-classification
+Version: 0.2.0
+Summary: Reusable ID3 Decision Tree Classifier
+Author: nanashi
+License: MIT
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy
+Requires-Dist: pandas
+Dynamic: license-file
+# ID3 Classifier
+Reusable ID3 Decision Tree Algorithm.

id3_classification-0.2.0/README.md ADDED Viewed

@@ -0,0 +1,3 @@
+# ID3 Classifier
+Reusable ID3 Decision Tree Algorithm.

id3_classification-0.2.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,28 @@
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "id3-classification"   # MUST be unique worldwide
+version = "0.2.0"
+description = "Reusable ID3 Decision Tree Classifier"
+readme = "README.md"
+requires-python = ">=3.8"
+authors = [
+  {name="nanashi"}
+]
+license = {text="MIT"}
+dependencies = [
+    "numpy",
+    "pandas"
+]
+[tool.setuptools.packages.find]
+where = ["src"]

id3_classification-0.2.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

id3_classification-0.2.0/src/id3_classification.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,16 @@
+Metadata-Version: 2.4
+Name: id3-classification
+Version: 0.2.0
+Summary: Reusable ID3 Decision Tree Classifier
+Author: nanashi
+License: MIT
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy
+Requires-Dist: pandas
+Dynamic: license-file
+# ID3 Classifier
+Reusable ID3 Decision Tree Algorithm.

id3_classification-0.2.0/src/id3_classification.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,10 @@
+LICENSE
+README.md
+pyproject.toml
+src/id3_classification.egg-info/PKG-INFO
+src/id3_classification.egg-info/SOURCES.txt
+src/id3_classification.egg-info/dependency_links.txt
+src/id3_classification.egg-info/requires.txt
+src/id3_classification.egg-info/top_level.txt
+src/myid3/__init__.py
+src/myid3/id3.py

id3_classification-0.2.0/src/id3_classification.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

id3_classification-0.2.0/src/id3_classification.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ numpy
2	+ pandas

id3_classification-0.2.0/src/id3_classification.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ myid3

id3_classification-0.2.0/src/myid3/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .id3 import ID3Classifier

id3_classification-0.2.0/src/myid3/id3.py ADDED Viewed

@@ -0,0 +1,163 @@
+import numpy as np
+import math
+class ID3Classifier:
+    def __init__(self):
+        self.tree = None
+        self.target = None
+        self.features = None
+    # ===========================
+    # ENTROPY
+    # ===========================
+    def entropy(self, data):
+        values, counts = np.unique(data, return_counts=True)
+        ent = 0
+        for count in counts:
+            prob = count / sum(counts)
+            if prob > 0:
+                ent -= prob * math.log2(prob)
+        return ent
+    # ===========================
+    # INFORMATION GAIN
+    # ===========================
+    def information_gain(self, df, feature):
+        total_entropy = self.entropy(df[self.target])
+        values, counts = np.unique(df[feature], return_counts=True)
+        weighted_entropy = 0
+        for i in range(len(values)):
+            subset = df[df[feature] == values[i]]
+            subset_entropy = self.entropy(subset[self.target])
+            weight = counts[i] / sum(counts)
+            weighted_entropy += weight * subset_entropy
+        return total_entropy - weighted_entropy
+    # ===========================
+    # ID3 TREE BUILDER
+    # ===========================
+    def _id3(self, data, features):
+        # Pure node
+        if len(np.unique(data[self.target])) == 1:
+            return np.unique(data[self.target])[0]
+        # No features left
+        if len(features) == 0:
+            return data[self.target].mode()[0]
+        igs = [self.information_gain(data, f) for f in features]
+        best = features[np.argmax(igs)]
+        tree = {best: {}}
+        remaining = [f for f in features if f != best]
+        for value in np.unique(data[best]):
+            subset = data[data[best] == value]
+            if len(subset) == 0:
+                tree[best][value] = data[self.target].mode()[0]
+            else:
+                tree[best][value] = self._id3(subset, remaining)
+        return tree
+    # ===========================
+    # FIT MODEL
+    # ===========================
+    def fit(self, df, features, target):
+        self.target = target
+        self.features = features
+        self.tree = self._id3(df, features)
+    # ===========================
+    # PRINT TREE
+    # ===========================
+    def print_tree(self, tree=None, indent=""):
+        if tree is None:
+            tree = self.tree
+        if not isinstance(tree, dict):
+            print(indent + "→", tree)
+            return
+        for key in tree:
+            print(indent + key)
+            for value in tree[key]:
+                print(indent + " ├─", value)
+                self.print_tree(tree[key][value], indent + " │   ")
+    # ===========================
+    # SINGLE PREDICT
+    # ===========================
+    def _predict(self, tree, sample):
+        if not isinstance(tree, dict):
+            return tree
+        root = list(tree.keys())[0]
+        value = sample[root]
+        if value not in tree[root]:
+            return None
+        return self._predict(tree[root][value], sample)
+    # ===========================
+    # MULTIPLE PREDICT
+    # ===========================
+    def predict(self, df):
+        preds = []
+        for _, row in df.iterrows():
+            preds.append(self._predict(self.tree, row))
+        return np.array(preds)