BoiiiSplit 0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ from .main import BoiiiSplit
@@ -0,0 +1,47 @@
1
+ import pandas as pd
2
+
3
+ def train_test_cv_split(X, y, test_size=20, cv_size=20, random_state=42):
4
+ """
5
+ Splits data into Train, Test, and CV sets using Pandas.
6
+
7
+ Args:
8
+ X (pd.DataFrame): Features
9
+ y (pd.Series or pd.DataFrame): Target
10
+ test_size (int): Percentage for test set (0-100)
11
+ cv_size (int): Percentage for cross-validation/validation set (0-100)
12
+ random_state (int): Seed for reproducibility
13
+ """
14
+
15
+ # Ensure inputs are Pandas objects
16
+ X = pd.DataFrame(X)
17
+ y = pd.Series(y) if not isinstance(y, pd.DataFrame) else y
18
+
19
+ # 1. Verification
20
+ if test_size + cv_size >= 100:
21
+ raise ValueError("Sum of test_size and cv_size must be less than 100.")
22
+
23
+ # 2. Shuffle the data using the random_state
24
+ # We use the index of X to shuffle both X and y in sync
25
+ indices = X.index.tolist()
26
+ shuffled_indices = pd.Series(indices).sample(frac=1, random_state=random_state).values
27
+
28
+ X_shuffled = X.loc[shuffled_indices].reset_index(drop=True)
29
+ y_shuffled = y.loc[shuffled_indices].reset_index(drop=True)
30
+
31
+ n = len(X_shuffled)
32
+
33
+ # 3. Calculate split points
34
+ test_cutoff = int(n * (test_size / 100))
35
+ cv_cutoff = int(n * (cv_size / 100)) + test_cutoff
36
+
37
+ # 4. Slice the data
38
+ # Test: 0 to test_cutoff
39
+ X_test, y_test = X_shuffled.iloc[:test_cutoff], y_shuffled.iloc[:test_cutoff]
40
+
41
+ # CV: test_cutoff to cv_cutoff
42
+ X_cv, y_cv = X_shuffled.iloc[test_cutoff:cv_cutoff], y_shuffled.iloc[test_cutoff:cv_cutoff]
43
+
44
+ # Train: cv_cutoff to the end
45
+ X_train, y_train = X_shuffled.iloc[cv_cutoff:], y_shuffled.iloc[cv_cutoff:]
46
+
47
+ return X_train, X_test, X_cv, y_train, y_test, y_cv
@@ -0,0 +1,5 @@
1
+ Metadata-Version: 2.4
2
+ Name: BoiiiSplit
3
+ Version: 0.1
4
+ Requires-Dist: pandas
5
+ Dynamic: requires-dist
@@ -0,0 +1,9 @@
1
+ README.md
2
+ setup.py
3
+ BoiiiSplit/__init__.py
4
+ BoiiiSplit/main.py
5
+ BoiiiSplit.egg-info/PKG-INFO
6
+ BoiiiSplit.egg-info/SOURCES.txt
7
+ BoiiiSplit.egg-info/dependency_links.txt
8
+ BoiiiSplit.egg-info/requires.txt
9
+ BoiiiSplit.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ pandas
@@ -0,0 +1 @@
1
+ BoiiiSplit
@@ -0,0 +1,5 @@
1
+ Metadata-Version: 2.4
2
+ Name: BoiiiSplit
3
+ Version: 0.1
4
+ Requires-Dist: pandas
5
+ Dynamic: requires-dist
File without changes
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,12 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name='BoiiiSplit',
5
+ version='0.1',
6
+ packages=find_packages(),
7
+ install_requires=[
8
+ 'pandas'
9
+
10
+ ],
11
+
12
+ )