PyPI - BoiiiSplit - Versions diffs - 0.1__tar.gz - Mend

BoiiiSplit 0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

boiiisplit-0.1/BoiiiSplit/__init__.py +1 -0
boiiisplit-0.1/BoiiiSplit/main.py +47 -0
boiiisplit-0.1/BoiiiSplit.egg-info/PKG-INFO +5 -0
boiiisplit-0.1/BoiiiSplit.egg-info/SOURCES.txt +9 -0
boiiisplit-0.1/BoiiiSplit.egg-info/dependency_links.txt +1 -0
boiiisplit-0.1/BoiiiSplit.egg-info/requires.txt +1 -0
boiiisplit-0.1/BoiiiSplit.egg-info/top_level.txt +1 -0
boiiisplit-0.1/PKG-INFO +5 -0
boiiisplit-0.1/README.md +0 -0
boiiisplit-0.1/setup.cfg +4 -0
boiiisplit-0.1/setup.py +12 -0

boiiisplit-0.1/BoiiiSplit/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .main import BoiiiSplit

boiiisplit-0.1/BoiiiSplit/main.py ADDED Viewed

@@ -0,0 +1,47 @@
+import pandas as pd
+def train_test_cv_split(X, y, test_size=20, cv_size=20, random_state=42):
+    """
+    Splits data into Train, Test, and CV sets using Pandas.
+    Args:
+        X (pd.DataFrame): Features
+        y (pd.Series or pd.DataFrame): Target
+        test_size (int): Percentage for test set (0-100)
+        cv_size (int): Percentage for cross-validation/validation set (0-100)
+        random_state (int): Seed for reproducibility
+    """
+    # Ensure inputs are Pandas objects
+    X = pd.DataFrame(X)
+    y = pd.Series(y) if not isinstance(y, pd.DataFrame) else y
+    # 1. Verification
+    if test_size + cv_size >= 100:
+        raise ValueError("Sum of test_size and cv_size must be less than 100.")
+    # 2. Shuffle the data using the random_state
+    # We use the index of X to shuffle both X and y in sync
+    indices = X.index.tolist()
+    shuffled_indices = pd.Series(indices).sample(frac=1, random_state=random_state).values
+    X_shuffled = X.loc[shuffled_indices].reset_index(drop=True)
+    y_shuffled = y.loc[shuffled_indices].reset_index(drop=True)
+    n = len(X_shuffled)
+    # 3. Calculate split points
+    test_cutoff = int(n * (test_size / 100))
+    cv_cutoff = int(n * (cv_size / 100)) + test_cutoff
+    # 4. Slice the data
+    # Test: 0 to test_cutoff
+    X_test, y_test = X_shuffled.iloc[:test_cutoff], y_shuffled.iloc[:test_cutoff]
+    # CV: test_cutoff to cv_cutoff
+    X_cv, y_cv = X_shuffled.iloc[test_cutoff:cv_cutoff], y_shuffled.iloc[test_cutoff:cv_cutoff]
+    # Train: cv_cutoff to the end
+    X_train, y_train = X_shuffled.iloc[cv_cutoff:], y_shuffled.iloc[cv_cutoff:]
+    return X_train, X_test, X_cv, y_train, y_test, y_cv

boiiisplit-0.1/BoiiiSplit.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,5 @@
+Metadata-Version: 2.4
+Name: BoiiiSplit
+Version: 0.1
+Requires-Dist: pandas
+Dynamic: requires-dist

boiiisplit-0.1/BoiiiSplit.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,9 @@
+README.md
+setup.py
+BoiiiSplit/__init__.py
+BoiiiSplit/main.py
+BoiiiSplit.egg-info/PKG-INFO
+BoiiiSplit.egg-info/SOURCES.txt
+BoiiiSplit.egg-info/dependency_links.txt
+BoiiiSplit.egg-info/requires.txt
+BoiiiSplit.egg-info/top_level.txt

boiiisplit-0.1/BoiiiSplit.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

boiiisplit-0.1/BoiiiSplit.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ pandas

boiiisplit-0.1/BoiiiSplit.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ BoiiiSplit

boiiisplit-0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,5 @@
+Metadata-Version: 2.4
+Name: BoiiiSplit
+Version: 0.1
+Requires-Dist: pandas
+Dynamic: requires-dist

boiiisplit-0.1/README.md ADDED Viewed

File without changes

boiiisplit-0.1/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

boiiisplit-0.1/setup.py ADDED Viewed

@@ -0,0 +1,12 @@
+from setuptools import setup, find_packages
+setup(
+    name='BoiiiSplit',
+    version='0.1',
+    packages=find_packages(),
+    install_requires=[
+        'pandas'
+    ],
+)