dipencsv 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dipencsv/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ # dipencsv/__init__.py
2
+
3
+ from dipencsv.core import Data
4
+
5
+ __version__ = "0.1.0"
6
+ __author__ = "Dipendra"
7
+
8
+ __all__ = ["Data"]
dipencsv/analytics.py ADDED
@@ -0,0 +1,70 @@
1
+ # dipencsv/analytics.py
2
+
3
+ import pandas as pd
4
+ from dipencsv.errors import format_error, format_success
5
+
6
+ def describe(df: pd.DataFrame) -> pd.DataFrame:
7
+ numeric_df = df.select_dtypes(include="number")
8
+ if numeric_df.empty:
9
+ print("⚠️ No numeric columns found")
10
+ return pd.DataFrame()
11
+ return numeric_df.describe().round(2)
12
+
13
+
14
+ def value_counts(df: pd.DataFrame, col: str) -> pd.Series:
15
+ return df[col].value_counts()
16
+
17
+
18
+ def correlation_matrix(df: pd.DataFrame) -> pd.DataFrame:
19
+ numeric_df = df.select_dtypes(include="number")
20
+ if numeric_df.empty:
21
+ print("⚠️ No numeric columns found")
22
+ return pd.DataFrame()
23
+ return numeric_df.corr().round(2)
24
+
25
+
26
+ def outliers(df: pd.DataFrame, col: str) -> pd.DataFrame:
27
+ q1 = df[col].quantile(0.25)
28
+ q3 = df[col].quantile(0.75)
29
+ iqr = q3 - q1
30
+ lower = q1 - 1.5 * iqr
31
+ upper = q3 + 1.5 * iqr
32
+ result = df[(df[col] < lower) | (df[col] > upper)]
33
+ print(f"📊 Outliers in '{col}': {len(result)} rows")
34
+ return result
35
+
36
+
37
+ def distribution(df: pd.DataFrame, col: str) -> dict:
38
+ return {
39
+ "mean" : round(float(df[col].mean()), 2),
40
+ "median": round(float(df[col].median()), 2),
41
+ "std" : round(float(df[col].std()), 2),
42
+ "min" : round(float(df[col].min()), 2),
43
+ "max" : round(float(df[col].max()), 2),
44
+ "skew" : round(float(df[col].skew()), 2),
45
+ }
46
+ def sort(df: pd.DataFrame, col: str, asc: bool = True) -> pd.DataFrame:
47
+ if col not in df.columns:
48
+ print(format_error(f"Column '{col}' not found"))
49
+ return df
50
+ df = df.sort_values(by=col, ascending=asc).reset_index(drop=True)
51
+ direction = "ascending" if asc else "descending"
52
+ print(format_success(f"Sorted by '{col}' ({direction})"))
53
+ return df
54
+
55
+
56
+ def first(df: pd.DataFrame, n: int = 5) -> pd.DataFrame:
57
+ return df.head(n)
58
+
59
+
60
+ def last(df: pd.DataFrame, n: int = 5) -> pd.DataFrame:
61
+ return df.tail(n)
62
+
63
+
64
+ def find(df: pd.DataFrame, col: str, value) -> pd.DataFrame:
65
+ if col not in df.columns:
66
+ print(format_error(f"Column '{col}' not found"))
67
+ return pd.DataFrame()
68
+ result = df[df[col] == value]
69
+ print(format_success(f"Found {len(result)} rows where '{col}' = '{value}'"))
70
+ return result
dipencsv/cleaner.py ADDED
@@ -0,0 +1,154 @@
1
+ # dipencsv/cleaner.py
2
+
3
+ import pandas as pd
4
+
5
+ from dipencsv.errors import format_error, format_warning, format_success
6
+
7
+
8
+ def clean(df: pd.DataFrame, strategy: str = "safe") -> pd.DataFrame:
9
+ df = df.copy()
10
+
11
+ # fix column names
12
+ df = _fix_columns(df)
13
+
14
+ # remove duplicates
15
+ before = len(df)
16
+ df = df.drop_duplicates()
17
+ removed = before - len(df)
18
+ if removed > 0:
19
+ print(format_warning(f"Removed {removed} duplicate rows"))
20
+
21
+ # handle missing values
22
+ df = _handle_missing(df, strategy)
23
+
24
+ # convert text numbers to numeric
25
+ df = _fix_numeric(df)
26
+
27
+ print(format_success("Cleaning done"))
28
+ return df
29
+
30
+
31
+ def _fix_columns(df: pd.DataFrame) -> pd.DataFrame:
32
+ df.columns = (
33
+ df.columns
34
+ .str.strip()
35
+ .str.lower()
36
+ .str.replace(" ", "_")
37
+ .str.replace(r"[^\w]", "", regex=True)
38
+ )
39
+ return df
40
+
41
+
42
+ def _handle_missing(df: pd.DataFrame, strategy: str) -> pd.DataFrame:
43
+ if strategy == "safe":
44
+ for col in df.columns:
45
+ if df[col].isnull().sum() == 0:
46
+ continue
47
+ if df[col].dtype in ["float64", "int64"]:
48
+ df[col] = df[col].fillna(df[col].median())
49
+ else:
50
+ mode = df[col].mode()
51
+ if not mode.empty:
52
+ df[col] = df[col].fillna(mode[0])
53
+ elif strategy == "aggressive":
54
+ before = len(df)
55
+ df = df.dropna()
56
+ print(format_warning(f"Dropped {before - len(df)} rows with missing values"))
57
+ return df
58
+
59
+
60
+ def _fix_numeric(df: pd.DataFrame) -> pd.DataFrame:
61
+ for col in df.select_dtypes(include=["object", "str"]).columns:
62
+ converted = pd.to_numeric(df[col], errors="coerce")
63
+ if converted.notna().sum() > len(df) * 0.7:
64
+ df[col] = converted
65
+ return df
66
+
67
+ def drop(df: pd.DataFrame, cols) -> pd.DataFrame:
68
+ if isinstance(cols, str):
69
+ cols = [cols]
70
+ missing = [c for c in cols if c not in df.columns]
71
+ if missing:
72
+ print(format_error(f"Columns not found: {missing}"))
73
+ return df
74
+ df = df.drop(columns=cols)
75
+ print(format_success(f"Dropped columns: {cols}"))
76
+ return df
77
+
78
+
79
+ def rename(df: pd.DataFrame, old: str, new: str) -> pd.DataFrame:
80
+ if old not in df.columns:
81
+ print(format_error(f"Column '{old}' not found"))
82
+ return df
83
+ df = df.rename(columns={old: new})
84
+ print(format_success(f"Renamed '{old}' → '{new}'"))
85
+ return df
86
+
87
+
88
+ def fill(df: pd.DataFrame, col: str, value) -> pd.DataFrame:
89
+ if col not in df.columns:
90
+ print(format_error(f"Column '{col}' not found"))
91
+ return df
92
+ df[col] = df[col].fillna(value)
93
+ print(format_success(f"Filled missing values in '{col}' with {value}"))
94
+ return df
95
+
96
+ def encode(df: pd.DataFrame, col: str) -> pd.DataFrame:
97
+ if col not in df.columns:
98
+ print(format_error(f"Column '{col}' not found"))
99
+ return df
100
+ unique_vals = df[col].unique()
101
+ mapping = {val: idx for idx, val in enumerate(unique_vals)}
102
+ df[col] = df[col].map(mapping)
103
+ print(format_success(f"Encoded '{col}': {mapping}"))
104
+ return df
105
+
106
+
107
+ def normalize(df: pd.DataFrame, col: str) -> pd.DataFrame:
108
+ if col not in df.columns:
109
+ print(format_error(f"Column '{col}' not found"))
110
+ return df
111
+ min_val = df[col].min()
112
+ max_val = df[col].max()
113
+ if max_val == min_val:
114
+ print(format_error(f"Cannot normalize '{col}' — all values are the same"))
115
+ return df
116
+ df[col] = (df[col] - min_val) / (max_val - min_val)
117
+ print(format_success(f"Normalized '{col}' to range [0, 1]"))
118
+ return df
119
+
120
+
121
+ def standardize(df: pd.DataFrame, col: str) -> pd.DataFrame:
122
+ if col not in df.columns:
123
+ print(format_error(f"Column '{col}' not found"))
124
+ return df
125
+ mean = df[col].mean()
126
+ std = df[col].std()
127
+ if std == 0:
128
+ print(format_error(f"Cannot standardize '{col}' — std is 0"))
129
+ return df
130
+ df[col] = (df[col] - mean) / std
131
+ print(format_success(f"Standardized '{col}' — mean=0, std=1"))
132
+ return df
133
+
134
+ def remove_outliers(df: pd.DataFrame, col: str) -> pd.DataFrame:
135
+ if col not in df.columns:
136
+ print(format_error(f"Column '{col}' not found"))
137
+ return df
138
+ q1 = df[col].quantile(0.25)
139
+ q3 = df[col].quantile(0.75)
140
+ iqr = q3 - q1
141
+ lower = q1 - 1.5 * iqr
142
+ upper = q3 + 1.5 * iqr
143
+ before = len(df)
144
+ df = df[(df[col] >= lower) & (df[col] <= upper)]
145
+ removed = before - len(df)
146
+ print(format_success(f"Removed {removed} outliers from '{col}'"))
147
+ return df
148
+
149
+
150
+ def split(df: pd.DataFrame, test_size: float = 0.2, random_state: int = 42):
151
+ from sklearn.model_selection import train_test_split
152
+ train, test = train_test_split(df, test_size=test_size, random_state=random_state)
153
+ print(format_success(f"Split done — train: {len(train)} rows, test: {len(test)} rows"))
154
+ return train, test
dipencsv/core.py ADDED
@@ -0,0 +1,226 @@
1
+ # dipencsv/core.py
2
+
3
+ from dipencsv.loader import load_file, get_file_size_mb
4
+ from dipencsv.engine.pandas_engine import PandasEngine
5
+ from dipencsv.engine.stream_engine import StreamEngine
6
+ from dipencsv.summary import summary, report
7
+ from dipencsv.cleaner import clean
8
+ from dipencsv.analytics import describe, value_counts, correlation_matrix, outliers, distribution
9
+ from dipencsv.intelligence import ask, explain, magic
10
+ from dipencsv.exporter import export
11
+ from dipencsv.cleaner import clean, drop, rename, fill
12
+ from dipencsv.cleaner import clean, drop, rename, fill, encode, normalize, standardize
13
+ from dipencsv.cleaner import clean, drop, rename, fill, encode, normalize, standardize, remove_outliers, split
14
+ from dipencsv.analytics import describe, value_counts, correlation_matrix, outliers, distribution, sort, find, first, last
15
+
16
+ SIZE_THRESHOLD_MB = 500
17
+
18
+
19
+ class Data:
20
+
21
+ def __init__(self, filepath: str, stream: bool = False, auto_mode: bool = True):
22
+ self._filepath = filepath
23
+ self._engine = self._init_engine(filepath, stream, auto_mode)
24
+
25
+ def _init_engine(self, filepath, stream, auto_mode):
26
+ if stream:
27
+ print("🌊 Stream mode activated")
28
+ return StreamEngine(filepath)
29
+
30
+ if auto_mode:
31
+ size = get_file_size_mb(filepath)
32
+ if size > SIZE_THRESHOLD_MB:
33
+ print(f"🌊 File is {size:.1f}MB — auto switching to stream mode")
34
+ return StreamEngine(filepath)
35
+ else:
36
+ print(f"🐼 File is {size:.1f}MB — using pandas mode")
37
+ df = load_file(filepath)
38
+ return PandasEngine(df)
39
+
40
+ df = load_file(filepath)
41
+ return PandasEngine(df)
42
+
43
+ # --- analytics ---
44
+ def mean(self, col: str) -> float:
45
+ return self._engine.mean(col)
46
+
47
+ def median(self, col: str) -> float:
48
+ return self._engine.median(col)
49
+
50
+ def max(self, col: str) -> float:
51
+ return self._engine.max(col)
52
+
53
+ def min(self, col: str) -> float:
54
+ return self._engine.min(col)
55
+
56
+ def count(self, col: str) -> int:
57
+ return self._engine.count(col)
58
+
59
+ def group_mean(self, group_col: str, value_col: str) -> dict:
60
+ return self._engine.group_mean(group_col, value_col)
61
+
62
+ def correlation(self, col1: str, col2: str) -> float:
63
+ return self._engine.correlation(col1, col2)
64
+
65
+ def filter(self, condition: str):
66
+ return self._engine.filter(condition)
67
+
68
+ def top_n(self, col: str, n: int = 10):
69
+ return self._engine.top_n(col, n)
70
+
71
+ def summary(self):
72
+ self._engine._df if hasattr(self._engine, '_df') else None
73
+ if hasattr(self._engine, '_df'):
74
+ summary(self._engine._df)
75
+ else:
76
+ print("⚠️ summary() not supported in stream mode")
77
+
78
+ def report(self):
79
+ if hasattr(self._engine, '_df'):
80
+ report(self._engine._df)
81
+ else:
82
+ print("⚠️ report() not supported in stream mode")
83
+
84
+ def clean(self, strategy: str = "safe"):
85
+ if hasattr(self._engine, '_df'):
86
+ self._engine._df = clean(self._engine._df, strategy)
87
+ else:
88
+ print("⚠️ clean() not supported in stream mode")
89
+
90
+ def describe(self):
91
+ if hasattr(self._engine, '_df'):
92
+ return describe(self._engine._df)
93
+ else:
94
+ print("⚠️ describe() not supported in stream mode")
95
+
96
+ def value_counts(self, col: str):
97
+ if hasattr(self._engine, '_df'):
98
+ return value_counts(self._engine._df, col)
99
+ else:
100
+ print("⚠️ value_counts() not supported in stream mode")
101
+
102
+ def correlation_matrix(self):
103
+ if hasattr(self._engine, '_df'):
104
+ return correlation_matrix(self._engine._df)
105
+ else:
106
+ print("⚠️ correlation_matrix() not supported in stream mode")
107
+
108
+ def outliers(self, col: str):
109
+ if hasattr(self._engine, '_df'):
110
+ return outliers(self._engine._df, col)
111
+ else:
112
+ print("⚠️ outliers() not supported in stream mode")
113
+
114
+ def distribution(self, col: str):
115
+ if hasattr(self._engine, '_df'):
116
+ return distribution(self._engine._df, col)
117
+ else:
118
+ print("⚠️ distribution() not supported in stream mode")
119
+
120
+ def ask(self, question: str):
121
+ if hasattr(self._engine, '_df'):
122
+ return ask(self._engine._df, question)
123
+ else:
124
+ print("⚠️ ask() not supported in stream mode")
125
+
126
+ def explain(self):
127
+ if hasattr(self._engine, '_df'):
128
+ explain(self._engine._df)
129
+ else:
130
+ print("⚠️ explain() not supported in stream mode")
131
+
132
+ def magic(self):
133
+ if hasattr(self._engine, '_df'):
134
+ magic(self._engine._df)
135
+ else:
136
+ print("⚠️ magic() not supported in stream mode")
137
+
138
+ def export(self, filepath: str):
139
+ if hasattr(self._engine, '_df'):
140
+ export(self._engine._df, filepath)
141
+ else:
142
+ print("⚠️ export() not supported in stream mode")
143
+
144
+ def drop(self, cols):
145
+ if hasattr(self._engine, '_df'):
146
+ self._engine._df = drop(self._engine._df, cols)
147
+ else:
148
+ print("⚠️ drop() not supported in stream mode")
149
+
150
+ def rename(self, old: str, new: str):
151
+ if hasattr(self._engine, '_df'):
152
+ self._engine._df = rename(self._engine._df, old, new)
153
+ else:
154
+ print("⚠️ rename() not supported in stream mode")
155
+
156
+ def fill(self, col: str, value):
157
+ if hasattr(self._engine, '_df'):
158
+ self._engine._df = fill(self._engine._df, col, value)
159
+ else:
160
+ print("⚠️ fill() not supported in stream mode")
161
+
162
+ def encode(self, col: str):
163
+ if hasattr(self._engine, '_df'):
164
+ self._engine._df = encode(self._engine._df, col)
165
+ else:
166
+ print("⚠️ encode() not supported in stream mode")
167
+
168
+ def normalize(self, col: str):
169
+ if hasattr(self._engine, '_df'):
170
+ self._engine._df = normalize(self._engine._df, col)
171
+ else:
172
+ print("⚠️ normalize() not supported in stream mode")
173
+
174
+ def standardize(self, col: str):
175
+ if hasattr(self._engine, '_df'):
176
+ self._engine._df = standardize(self._engine._df, col)
177
+ else:
178
+ print("⚠️ standardize() not supported in stream mode")
179
+
180
+ def remove_outliers(self, col: str):
181
+ if hasattr(self._engine, '_df'):
182
+ self._engine._df = remove_outliers(self._engine._df, col)
183
+ else:
184
+ print("⚠️ remove_outliers() not supported in stream mode")
185
+
186
+ def split(self, test_size: float = 0.2, random_state: int = 42):
187
+ if hasattr(self._engine, '_df'):
188
+ return split(self._engine._df, test_size, random_state)
189
+ else:
190
+ print("⚠️ split() not supported in stream mode")
191
+
192
+ @property
193
+ def columns(self) -> list:
194
+ if hasattr(self._engine, '_df'):
195
+ return list(self._engine._df.columns)
196
+ return self._engine._columns
197
+
198
+ @property
199
+ def shape(self) -> tuple:
200
+ if hasattr(self._engine, '_df'):
201
+ return self._engine._df.shape
202
+ return None
203
+
204
+ def first(self, n: int = 5):
205
+ if hasattr(self._engine, '_df'):
206
+ return first(self._engine._df, n)
207
+ else:
208
+ print("⚠️ first() not supported in stream mode")
209
+
210
+ def last(self, n: int = 5):
211
+ if hasattr(self._engine, '_df'):
212
+ return last(self._engine._df, n)
213
+ else:
214
+ print("⚠️ last() not supported in stream mode")
215
+
216
+ def sort(self, col: str, asc: bool = True):
217
+ if hasattr(self._engine, '_df'):
218
+ self._engine._df = sort(self._engine._df, col, asc)
219
+ else:
220
+ print("⚠️ sort() not supported in stream mode")
221
+
222
+ def find(self, col: str, value):
223
+ if hasattr(self._engine, '_df'):
224
+ return find(self._engine._df, col, value)
225
+ else:
226
+ print("⚠️ find() not supported in stream mode")
@@ -0,0 +1,6 @@
1
+ # dipencsv/engine/__init__.py
2
+
3
+ from dipencsv.engine.pandas_engine import PandasEngine
4
+ from dipencsv.engine.stream_engine import StreamEngine
5
+
6
+ __all__ = ["PandasEngine", "StreamEngine"]
@@ -0,0 +1,42 @@
1
+ # dipencsv/engine/base.py
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+
6
+ class BaseEngine(ABC):
7
+
8
+ @abstractmethod
9
+ def mean(self, col: str) -> float:
10
+ pass
11
+
12
+ @abstractmethod
13
+ def median(self, col: str) -> float:
14
+ pass
15
+
16
+ @abstractmethod
17
+ def max(self, col: str) -> float:
18
+ pass
19
+
20
+ @abstractmethod
21
+ def min(self, col: str) -> float:
22
+ pass
23
+
24
+ @abstractmethod
25
+ def count(self, col: str) -> int:
26
+ pass
27
+
28
+ @abstractmethod
29
+ def group_mean(self, group_col: str, value_col: str) -> dict:
30
+ pass
31
+
32
+ @abstractmethod
33
+ def correlation(self, col1: str, col2: str) -> float:
34
+ pass
35
+
36
+ @abstractmethod
37
+ def filter(self, condition: str):
38
+ pass
39
+
40
+ @abstractmethod
41
+ def top_n(self, col: str, n: int):
42
+ pass
@@ -0,0 +1,53 @@
1
+ # dipencsv/engine/pandas_engine.py
2
+
3
+ import pandas as pd
4
+ from dipencsv.engine.base import BaseEngine
5
+ from dipencsv.errors import suggest_column
6
+
7
+
8
+ class PandasEngine(BaseEngine):
9
+
10
+ def __init__(self, df: pd.DataFrame):
11
+ self._df = df
12
+
13
+ def mean(self, col: str) -> float:
14
+ self._check_col(col)
15
+ return self._df[col].mean()
16
+
17
+ def median(self, col: str) -> float:
18
+ self._check_col(col)
19
+ return self._df[col].median()
20
+
21
+ def max(self, col: str) -> float:
22
+ self._check_col(col)
23
+ return self._df[col].max()
24
+
25
+ def min(self, col: str) -> float:
26
+ self._check_col(col)
27
+ return self._df[col].min()
28
+
29
+ def count(self, col: str) -> int:
30
+ self._check_col(col)
31
+ return self._df[col].count()
32
+
33
+ def group_mean(self, group_col: str, value_col: str) -> dict:
34
+ self._check_col(group_col)
35
+ self._check_col(value_col)
36
+ return self._df.groupby(group_col)[value_col].mean().to_dict()
37
+
38
+ def correlation(self, col1: str, col2: str) -> float:
39
+ self._check_col(col1)
40
+ self._check_col(col2)
41
+ return self._df[col1].corr(self._df[col2])
42
+
43
+ def filter(self, condition: str) -> pd.DataFrame:
44
+ return self._df.query(condition)
45
+
46
+ def top_n(self, col: str, n: int = 10) -> pd.DataFrame:
47
+ self._check_col(col)
48
+ return self._df.nlargest(n, col)
49
+
50
+ def _check_col(self, col: str):
51
+ if col not in self._df.columns:
52
+ suggestion = suggest_column(col, list(self._df.columns))
53
+ raise ValueError(f"❌ Column '{col}' not found.\n{suggestion}")
@@ -0,0 +1,91 @@
1
+ # dipencsv/engine/stream_engine.py
2
+
3
+ import pandas as pd
4
+ from dipencsv.engine.base import BaseEngine
5
+ from dipencsv.errors import suggest_column
6
+
7
+ CHUNK_SIZE = 100_000
8
+
9
+
10
+ class StreamEngine(BaseEngine):
11
+
12
+ def __init__(self, filepath: str, chunk_size: int = CHUNK_SIZE):
13
+ self._filepath = filepath
14
+ self._chunk_size = chunk_size
15
+ self._columns = self._get_columns()
16
+
17
+ def _get_columns(self) -> list:
18
+ df = pd.read_csv(self._filepath, nrows=1)
19
+ return list(df.columns)
20
+
21
+ def _chunks(self):
22
+ return pd.read_csv(self._filepath, chunksize=self._chunk_size)
23
+
24
+ def mean(self, col: str) -> float:
25
+ self._check_col(col)
26
+ total, count = 0, 0
27
+ for chunk in self._chunks():
28
+ total += chunk[col].sum()
29
+ count += chunk[col].count()
30
+ return total / count if count else 0
31
+
32
+ def median(self, col: str) -> float:
33
+ raise NotImplementedError("⚠️ median() not supported in stream mode — load a sample instead")
34
+
35
+ def max(self, col: str) -> float:
36
+ self._check_col(col)
37
+ result = None
38
+ for chunk in self._chunks():
39
+ chunk_max = chunk[col].max()
40
+ result = chunk_max if result is None else max(result, chunk_max)
41
+ return result
42
+
43
+ def min(self, col: str) -> float:
44
+ self._check_col(col)
45
+ result = None
46
+ for chunk in self._chunks():
47
+ chunk_min = chunk[col].min()
48
+ result = chunk_min if result is None else min(result, chunk_min)
49
+ return result
50
+
51
+ def count(self, col: str) -> int:
52
+ self._check_col(col)
53
+ total = 0
54
+ for chunk in self._chunks():
55
+ total += chunk[col].count()
56
+ return total
57
+
58
+ def group_mean(self, group_col: str, value_col: str) -> dict:
59
+ self._check_col(group_col)
60
+ self._check_col(value_col)
61
+ agg = {}
62
+ for chunk in self._chunks():
63
+ for key, grp in chunk.groupby(group_col):
64
+ if key not in agg:
65
+ agg[key] = {"sum": 0, "count": 0}
66
+ agg[key]["sum"] += grp[value_col].sum()
67
+ agg[key]["count"] += grp[value_col].count()
68
+ return {k: v["sum"] / v["count"] for k, v in agg.items()}
69
+
70
+ def correlation(self, col1: str, col2: str) -> float:
71
+ raise NotImplementedError("⚠️ correlation() not supported in stream mode")
72
+
73
+ def filter(self, condition: str) -> pd.DataFrame:
74
+ results = []
75
+ for chunk in self._chunks():
76
+ filtered = chunk.query(condition)
77
+ results.append(filtered)
78
+ return pd.concat(results, ignore_index=True)
79
+
80
+ def top_n(self, col: str, n: int = 10) -> pd.DataFrame:
81
+ self._check_col(col)
82
+ result = pd.DataFrame()
83
+ for chunk in self._chunks():
84
+ result = pd.concat([result, chunk.nlargest(n, col)])
85
+ result = result.nlargest(n, col)
86
+ return result
87
+
88
+ def _check_col(self, col: str):
89
+ if col not in self._columns:
90
+ suggestion = suggest_column(col, self._columns)
91
+ raise ValueError(f"❌ Column '{col}' not found.\n{suggestion}")
dipencsv/errors.py ADDED
@@ -0,0 +1,23 @@
1
+ # dipencsv/errors.py
2
+
3
+ from difflib import get_close_matches
4
+
5
+
6
+ def suggest_column(col: str, available_cols: list) -> str:
7
+ matches = get_close_matches(col, available_cols, n=3, cutoff=0.5)
8
+ if matches:
9
+ suggestions = "\n".join(f" - {m}" for m in matches)
10
+ return f"💡 Did you mean:\n{suggestions}"
11
+ return f"💡 Available columns:\n" + "\n".join(f" - {c}" for c in available_cols)
12
+
13
+
14
+ def format_error(msg: str) -> str:
15
+ return f"❌ {msg}"
16
+
17
+
18
+ def format_warning(msg: str) -> str:
19
+ return f"⚠️ {msg}"
20
+
21
+
22
+ def format_success(msg: str) -> str:
23
+ return f"✅ {msg}"
dipencsv/exporter.py ADDED
@@ -0,0 +1,45 @@
1
+ # dipencsv/exporter.py
2
+
3
+ import json
4
+ import pandas as pd
5
+ from dipencsv.errors import format_success, format_error
6
+
7
+
8
+ def export(df: pd.DataFrame, filepath: str):
9
+ if filepath.endswith(".csv"):
10
+ _export_csv(df, filepath)
11
+ elif filepath.endswith(".json"):
12
+ _export_json(df, filepath)
13
+ elif filepath.endswith(".xlsx"):
14
+ _export_excel(df, filepath)
15
+ else:
16
+ print(format_error(f"Unsupported format: {filepath}"))
17
+ print("💡 Supported: .csv, .json, .xlsx")
18
+
19
+
20
+ def _export_csv(df: pd.DataFrame, filepath: str):
21
+ try:
22
+ df.to_csv(filepath, index=False)
23
+ print(format_success(f"Exported to {filepath}"))
24
+ except Exception as e:
25
+ print(format_error(f"Failed to export CSV: {e}"))
26
+
27
+
28
+ def _export_json(df: pd.DataFrame, filepath: str):
29
+ try:
30
+ data = df.to_dict(orient="records")
31
+ with open(filepath, "w") as f:
32
+ json.dump(data, f, indent=2)
33
+ print(format_success(f"Exported to {filepath}"))
34
+ except Exception as e:
35
+ print(format_error(f"Failed to export JSON: {e}"))
36
+
37
+
38
+ def _export_excel(df: pd.DataFrame, filepath: str):
39
+ try:
40
+ df.to_excel(filepath, index=False)
41
+ print(format_success(f"Exported to {filepath}"))
42
+ except ImportError:
43
+ print(format_error("openpyxl not installed — run: pip install openpyxl"))
44
+ except Exception as e:
45
+ print(format_error(f"Failed to export Excel: {e}"))
@@ -0,0 +1,194 @@
1
+ # dipencsv/intelligence.py
2
+
3
+ import pandas as pd
4
+ from dipencsv.errors import suggest_column
5
+
6
+
7
+ def ask(df: pd.DataFrame, question: str):
8
+ q = question.lower().strip()
9
+
10
+ # group by — check FIRST before average/mean
11
+ if "by" in q:
12
+ result = _parse_group(q, df)
13
+ if result is not None:
14
+ print(result.to_string())
15
+ return result
16
+ else:
17
+ print("🤔 Could not parse group query.")
18
+ return None
19
+
20
+ # top N
21
+ if "top" in q:
22
+ n, col = _parse_top(q, df)
23
+ if col:
24
+ result = df.nlargest(n, col)
25
+ print(f"🏆 Top {n} by '{col}':")
26
+ print(result[[col]].to_string())
27
+ return result
28
+
29
+ # average / mean
30
+ if "average" in q or "mean" in q:
31
+ col = _find_col(q, df)
32
+ if col:
33
+ if not pd.api.types.is_numeric_dtype(df[col]):
34
+ print(f"❌ '{col}' is a text column — cannot calculate average")
35
+ return None
36
+ result = df[col].mean()
37
+ print(f"📊 Average {col}: {result:.2f}")
38
+ return result
39
+
40
+ # highest / maximum
41
+ if "highest" in q or "maximum" in q or "max" in q:
42
+ col = _find_col(q, df)
43
+ if col:
44
+ result = df[col].max()
45
+ print(f"📈 Highest {col}: {result}")
46
+ return result
47
+
48
+ # lowest / minimum
49
+ if "lowest" in q or "minimum" in q or "min" in q:
50
+ col = _find_col(q, df)
51
+ if col:
52
+ result = df[col].min()
53
+ print(f"📉 Lowest {col}: {result}")
54
+ return result
55
+
56
+ # count
57
+ if "count" in q or "how many" in q:
58
+ col = _find_col(q, df)
59
+ if col:
60
+ result = df[col].count()
61
+ print(f"🔢 Count of {col}: {result}")
62
+ return result
63
+
64
+ numeric_cols = list(df.select_dtypes(include="number").columns)
65
+ text_cols = list(df.select_dtypes(include=["object", "str"]).columns)
66
+
67
+ print("🤔 Could not understand the question.")
68
+ print(f"\n💡 Numeric columns (use for average/top/highest): {numeric_cols}")
69
+ print(f"💡 Text columns (use for grouping): {text_cols}")
70
+ print("\n💡 Try:")
71
+ if numeric_cols:
72
+ print(f" - 'average {numeric_cols[0]}'")
73
+ print(f" - 'top 10 {numeric_cols[0]}'")
74
+ if numeric_cols and text_cols:
75
+ print(f" - 'highest {numeric_cols[0]} by {text_cols[0]}'")
76
+ return None
77
+
78
+
79
+ def explain(df: pd.DataFrame):
80
+ print("=" * 40)
81
+ print("🧠 DATASET EXPLANATION")
82
+ print("=" * 40)
83
+
84
+ rows, cols = df.shape
85
+ print(f"\n📦 Dataset has {rows} rows and {cols} columns.")
86
+
87
+ numeric = df.select_dtypes(include="number")
88
+ text = df.select_dtypes(include=["object", "str"])
89
+
90
+ print(f"🔢 Numeric columns : {list(numeric.columns)}")
91
+ print(f"🔤 Text columns : {list(text.columns)}")
92
+
93
+ # trends
94
+ print("\n📈 TRENDS")
95
+ print("-" * 40)
96
+ for col in numeric.columns:
97
+ mean = numeric[col].mean()
98
+ std = numeric[col].std()
99
+ skew = numeric[col].skew()
100
+ skew_label = "right-skewed" if skew > 1 else "left-skewed" if skew < -1 else "normal"
101
+ print(f" {col}: mean={mean:.2f}, std={std:.2f}, distribution={skew_label}")
102
+
103
+ # issues
104
+ print("\n⚠️ ISSUES")
105
+ print("-" * 40)
106
+ missing = df.isnull().sum()
107
+ missing = missing[missing > 0]
108
+ if missing.empty:
109
+ print(" ✅ No missing values")
110
+ else:
111
+ for col, count in missing.items():
112
+ pct = (count / rows) * 100
113
+ print(f" ⚠️ '{col}' has {count} missing values ({pct:.1f}%)")
114
+
115
+ dupes = df.duplicated().sum()
116
+ if dupes > 0:
117
+ print(f" ⚠️ {dupes} duplicate rows found")
118
+ else:
119
+ print(" ✅ No duplicates")
120
+
121
+ # suggestions
122
+ print("\n💡 SUGGESTIONS")
123
+ print("-" * 40)
124
+ if not missing.empty:
125
+ print(" - Run data.clean() to fix missing values")
126
+ if dupes > 0:
127
+ print(" - Run data.clean() to remove duplicates")
128
+ high_skew = [c for c in numeric.columns if abs(numeric[c].skew()) > 1]
129
+ if high_skew:
130
+ print(f" - Columns with high skew: {high_skew} — consider log transform")
131
+ if not missing.empty or dupes == 0:
132
+ print(" - Data looks ready for analysis ✅")
133
+ print("=" * 40)
134
+
135
+
136
+ def magic(df: pd.DataFrame):
137
+ print("=" * 40)
138
+ print("✨ MAGIC MODE")
139
+ print("=" * 40)
140
+
141
+ rows, cols = df.shape
142
+ missing = df.isnull().sum().sum()
143
+ dupes = df.duplicated().sum()
144
+ numeric = df.select_dtypes(include="number")
145
+
146
+ print(f"\n📦 {rows} rows × {cols} cols")
147
+ print(f"⚠️ Missing values : {missing}")
148
+ print(f"⚠️ Duplicates : {dupes}")
149
+
150
+ print("\n📈 KEY INSIGHTS")
151
+ print("-" * 40)
152
+ for col in numeric.columns:
153
+ print(f" {col}: min={numeric[col].min():.2f}, mean={numeric[col].mean():.2f}, max={numeric[col].max():.2f}")
154
+
155
+ print("\n🏆 RECOMMENDATION")
156
+ print("-" * 40)
157
+ if missing > 0 or dupes > 0:
158
+ print(" ⚠️ Run data.clean() before analysis")
159
+ else:
160
+ print(" ✅ Data is clean — ready to analyze")
161
+ print("=" * 40)
162
+
163
+
164
+ # --- internal helpers ---
165
+
166
+ def _find_col(question: str, df: pd.DataFrame):
167
+ words = question.lower().split()
168
+ for col in df.columns:
169
+ if col.lower() in words: # match whole word not substring
170
+ return col
171
+ print("❌ No matching column found in your question.")
172
+ print(f"💡 Your columns: {list(df.columns)}")
173
+ return None
174
+
175
+ def _parse_top(question: str, df: pd.DataFrame):
176
+ import re
177
+ n = 10
178
+ match = re.search(r"top\s+(\d+)", question)
179
+ if match:
180
+ n = int(match.group(1))
181
+ col = _find_col(question, df)
182
+ return n, col
183
+
184
+
185
+ def _parse_group(question: str, df: pd.DataFrame):
186
+ words = question.lower().split()
187
+ cols = [c for c in df.columns if c.lower() in words] # whole word match
188
+ numeric = df.select_dtypes(include="number")
189
+ if len(cols) >= 2:
190
+ group_col = cols[0]
191
+ value_col = cols[1]
192
+ if value_col in numeric.columns:
193
+ return df.groupby(group_col)[value_col].mean().round(2)
194
+ return None
dipencsv/loader.py ADDED
@@ -0,0 +1,24 @@
1
+ # dipencsv/loader.py
2
+
3
+ import os
4
+ import pandas as pd
5
+
6
+
7
+ def load_file(filepath: str) -> pd.DataFrame:
8
+ _validate(filepath)
9
+ df = pd.read_csv(filepath)
10
+ print(f"✅ Loaded: {df.shape[0]} rows × {df.shape[1]} cols")
11
+ return df
12
+
13
+
14
+ def _validate(filepath: str):
15
+ if not os.path.exists(filepath):
16
+ raise FileNotFoundError(f"❌ File not found: {filepath}")
17
+ if not filepath.endswith(".csv"):
18
+ raise ValueError(f"❌ Only CSV files supported: {filepath}")
19
+ if os.path.getsize(filepath) == 0:
20
+ raise ValueError(f"❌ File is empty: {filepath}")
21
+
22
+
23
+ def get_file_size_mb(filepath: str) -> float:
24
+ return os.path.getsize(filepath) / (1024 * 1024)
dipencsv/summary.py ADDED
@@ -0,0 +1,103 @@
1
+ # dipencsv/summary.py
2
+
3
+ import pandas as pd
4
+ from dipencsv.errors import format_warning
5
+
6
+
7
+ def summary(df: pd.DataFrame):
8
+ print("=" * 40)
9
+ print("📊 DATASET SUMMARY")
10
+ print("=" * 40)
11
+ print(f" Rows : {df.shape[0]}")
12
+ print(f" Columns : {df.shape[1]}")
13
+ print(f" Total cells : {df.shape[0] * df.shape[1]}")
14
+ print()
15
+
16
+ print("📋 COLUMN TYPES")
17
+ print("-" * 40)
18
+ for col, dtype in df.dtypes.items():
19
+ print(f" {col:<25} {str(dtype)}")
20
+ print()
21
+
22
+ missing = df.isnull().sum()
23
+ missing = missing[missing > 0]
24
+ if not missing.empty:
25
+ print("⚠️ MISSING VALUES")
26
+ print("-" * 40)
27
+ for col, count in missing.items():
28
+ pct = (count / len(df)) * 100
29
+ print(f" {col:<25} {count} missing ({pct:.1f}%)")
30
+ print()
31
+ else:
32
+ print("✅ No missing values found")
33
+ print()
34
+
35
+ print("🔢 NUMERIC COLUMNS")
36
+ print("-" * 40)
37
+ numeric = df.select_dtypes(include="number").columns.tolist()
38
+ if numeric:
39
+ for col in numeric:
40
+ print(f" - {col}")
41
+ else:
42
+ print(" none")
43
+ print()
44
+
45
+ print("🔤 TEXT COLUMNS")
46
+ print("-" * 40)
47
+ text = df.select_dtypes(include=["object", "str"]).columns.tolist()
48
+ if text:
49
+ for col in text:
50
+ print(f" - {col}")
51
+ else:
52
+ print(" none")
53
+ print("=" * 40)
54
+
55
+
56
+ def report(df: pd.DataFrame):
57
+ print("=" * 40)
58
+ print("📝 FULL REPORT")
59
+ print("=" * 40)
60
+
61
+ # duplicates
62
+ dupes = df.duplicated().sum()
63
+ if dupes > 0:
64
+ print(format_warning(f"{dupes} duplicate rows found"))
65
+ else:
66
+ print("✅ No duplicates found")
67
+ print()
68
+
69
+ # missing %
70
+ print("📉 MISSING VALUES %")
71
+ print("-" * 40)
72
+ missing_pct = (df.isnull().sum() / len(df) * 100).round(2)
73
+ for col, pct in missing_pct.items():
74
+ status = "⚠️ " if pct > 0 else "✅"
75
+ print(f" {status} {col:<23} {pct}%")
76
+ print()
77
+
78
+ # basic stats
79
+ print("📈 BASIC STATS")
80
+ print("-" * 40)
81
+ numeric_df = df.select_dtypes(include="number")
82
+ if not numeric_df.empty:
83
+ stats = numeric_df.describe().round(2)
84
+ print(stats.to_string())
85
+ else:
86
+ print(" No numeric columns")
87
+ print()
88
+
89
+ # warnings
90
+ print("⚠️ WARNINGS")
91
+ print("-" * 40)
92
+ warnings = []
93
+ if dupes > 0:
94
+ warnings.append(f"Duplicate rows: {dupes}")
95
+ high_missing = missing_pct[missing_pct > 30]
96
+ for col, pct in high_missing.items():
97
+ warnings.append(f"'{col}' has {pct}% missing values")
98
+ if warnings:
99
+ for w in warnings:
100
+ print(f" ⚠️ {w}")
101
+ else:
102
+ print(" ✅ No major warnings")
103
+ print("=" * 40)
@@ -0,0 +1,239 @@
1
+ Metadata-Version: 2.4
2
+ Name: dipencsv
3
+ Version: 0.1.0
4
+ Summary: A beginner-friendly CSV analysis and ML data preparation toolkit
5
+ Author: Dipendra
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.8
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: pandas>=1.5.0
10
+ Requires-Dist: openpyxl>=3.0.0
11
+ Requires-Dist: scikit-learn>=1.0.0
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest; extra == "dev"
14
+
15
+ # DipenCSV 🐼
16
+
17
+ > DipenCSV is to pandas what seaborn is to matplotlib — a friendlier, higher-level API.
18
+
19
+ A beginner-friendly CSV analysis and ML data preparation toolkit built on top of pandas. Helps students and small teams clean, analyze, and prepare CSV data for machine learning — without needing to know pandas.
20
+
21
+ ## Who is it for?
22
+ - 🎓 Students cleaning data for ML assignments
23
+ - 🚀 Small startups needing quick data insights
24
+ - 👨‍💻 Developers who work with CSVs occasionally
25
+ - 📊 Anyone who finds pandas too complex
26
+
27
+ ## Installation
28
+
29
+ ```bash
30
+ pip install dipencsv
31
+ ```
32
+
33
+ ## Quick Start
34
+
35
+ ```python
36
+ from dipencsv import Data
37
+
38
+ data = Data("your_file.csv")
39
+ data.magic() # one click everything
40
+ ```
41
+
42
+ ## Full ML Workflow in 15 lines
43
+
44
+ ```python
45
+ from dipencsv import Data
46
+ from sklearn.ensemble import RandomForestClassifier
47
+
48
+ data = Data("titanic.csv")
49
+ data.clean()
50
+ data.drop(["passengerid", "name", "ticket", "cabin"])
51
+ data.encode("sex")
52
+ data.encode("embarked")
53
+ data.remove_outliers("fare")
54
+ data.normalize("fare")
55
+ data.normalize("age")
56
+
57
+ train, test = data.split(test_size=0.2)
58
+
59
+ X_train = train.drop("survived", axis=1)
60
+ y_train = train["survived"]
61
+ X_test = test.drop("survived", axis=1)
62
+ y_test = test["survived"]
63
+
64
+ model = RandomForestClassifier()
65
+ model.fit(X_train, y_train)
66
+ print(f"Accuracy: {model.score(X_test, y_test):.2f}")
67
+ # Accuracy: 0.76
68
+ ```
69
+
70
+ ---
71
+
72
+ ## All Commands
73
+
74
+ ### Loading
75
+ ```python
76
+ data = Data("file.csv") # auto mode
77
+ data = Data("bigfile.csv", stream=True) # force stream mode
78
+ data = Data("file.csv", auto_mode=False) # force pandas mode
79
+ ```
80
+
81
+ ### Properties
82
+ ```python
83
+ print(data.columns) # list of column names
84
+ print(data.shape) # (rows, columns)
85
+ ```
86
+
87
+ ### Understanding Data
88
+ ```python
89
+ data.summary() # rows, cols, types, missing values
90
+ data.report() # duplicates, missing %, basic stats
91
+ data.explain() # trends, issues, suggestions
92
+ data.magic() # ⭐ one click full analysis
93
+ ```
94
+
95
+ ### Cleaning
96
+ ```python
97
+ data.clean() # auto clean (safe mode)
98
+ data.clean(strategy="aggressive") # drop rows with missing values
99
+ data.drop("column") # drop one column
100
+ data.drop(["col1", "col2"]) # drop multiple columns
101
+ data.rename("old_name", "new_name") # rename column
102
+ data.fill("column", value) # fill missing values manually
103
+ ```
104
+
105
+ > ⚠️ After clean(), column names become lowercase with underscores.
106
+ > Always check: `print(data.columns)`
107
+
108
+ ### ML Preparation
109
+ ```python
110
+ data.encode("gender") # text → numbers (label encoding)
111
+ data.normalize("age") # scale to [0, 1]
112
+ data.standardize("salary") # scale to mean=0, std=1
113
+ data.remove_outliers("price") # remove extreme values (IQR method)
114
+ train, test = data.split(test_size=0.2) # train/test split
115
+ ```
116
+
117
+ ### Analytics
118
+ ```python
119
+ data.mean("salary")
120
+ data.median("age")
121
+ data.max("salary")
122
+ data.min("salary")
123
+ data.count("city")
124
+ data.correlation("age", "salary")
125
+ data.correlation_matrix()
126
+ data.distribution("salary")
127
+ data.outliers("salary")
128
+ data.describe()
129
+ data.value_counts("city")
130
+ data.group_mean("city", "salary")
131
+ ```
132
+
133
+ ### Querying
134
+ ```python
135
+ data.first() # first 5 rows
136
+ data.first(10) # first 10 rows
137
+ data.last() # last 5 rows
138
+ data.sort("age") # sort ascending
139
+ data.sort("age", asc=False) # sort descending
140
+ data.find("city", "Bangalore") # find rows by value
141
+ ```
142
+
143
+ ### Intelligence
144
+ ```python
145
+ data.ask("average salary")
146
+ data.ask("top 10 salary")
147
+ data.ask("highest salary by city")
148
+ data.ask("lowest age")
149
+ data.ask("count city")
150
+ data.explain()
151
+ data.magic()
152
+ ```
153
+
154
+ ### Export
155
+ ```python
156
+ data.export("output.csv") # CSV
157
+ data.export("output.json") # JSON
158
+ data.export("output.xlsx") # Excel (requires openpyxl)
159
+ ```
160
+
161
+ ---
162
+
163
+ ## Stream Mode (Big Files)
164
+
165
+ DipenCSV auto detects file size and switches to stream mode for files > 500MB:
166
+
167
+ ```python
168
+ data = Data("hugefile.csv") # auto detects
169
+ data = Data("hugefile.csv", stream=True) # force stream
170
+ ```
171
+
172
+ Supported in stream mode: `mean()`, `max()`, `min()`, `count()`, `group_mean()`, `filter()`, `top_n()`
173
+
174
+ ---
175
+
176
+ ## Smart Error Handling
177
+
178
+ ```python
179
+ data.mean("salry")
180
+ # ❌ Column 'salry' not found.
181
+ # 💡 Did you mean:
182
+ # - salary
183
+ # - salary_usd
184
+ ```
185
+
186
+ ---
187
+
188
+ ## Common Issues
189
+
190
+ **KeyError after clean()**
191
+ ```python
192
+ data.clean()
193
+ print(data.columns) # check actual column names
194
+ data.mean("salary") # use lowercase
195
+ ```
196
+
197
+ **Excel export failing**
198
+ ```bash
199
+ pip install openpyxl
200
+ ```
201
+
202
+ **Big file crashes**
203
+ ```python
204
+ data = Data("bigfile.csv", stream=True)
205
+ ```
206
+
207
+ ---
208
+
209
+ ## Dependencies
210
+ - `pandas >= 1.5.0`
211
+ - `scikit-learn >= 1.0.0`
212
+ - `openpyxl >= 3.0.0` (optional, Excel export)
213
+
214
+ ## Documentation
215
+ Full docs available in the `docs/` folder.
216
+ ## Documentation
217
+
218
+ | Doc | What it covers |
219
+ |-----|----------------|
220
+ | [Getting Started](docs/getting_started.md) | installation, quick start |
221
+ | [Loading Data](docs/loading_data.md) | Data(), auto mode, stream mode |
222
+ | [Properties](docs/properties.md) | columns, shape |
223
+ | [Understanding Data](docs/understanding_data.md) | summary, report, explain, magic |
224
+ | [Cleaning Data](docs/cleaning_data.md) | clean, drop, rename, fill |
225
+ | [ML Preparation](docs/ml_prep.md) | encode, normalize, split etc |
226
+ | [Analytics](docs/analytics.md) | mean, correlation, outliers etc |
227
+ | [Query](docs/query.md) | sort, find, first, last |
228
+ | [Intelligence](docs/intelligence.md) | ask, explain, magic |
229
+ | [Stream Mode](docs/stream_mode.md) | big file handling |
230
+ | [Export](docs/export.md) | csv, json, xlsx |
231
+ | [Examples](docs/examples.md) | real world examples |
232
+ | [FAQ](docs/faq.md) | common student questions |
233
+ | [ML Workflow](docs/ml_workflow.md) | full end-to-end ML example |
234
+
235
+ ## License
236
+ MIT License
237
+
238
+ ## Author
239
+ Built by Dipendra — a CS student who wanted pandas to be less painful.
@@ -0,0 +1,17 @@
1
+ dipencsv/__init__.py,sha256=tt4CaUaGC7ReFCulu9Aiot29paRWhQWzDali3B3O7JE,121
2
+ dipencsv/analytics.py,sha256=nqni-CL9ekMaEOvSLk1VAcYiShMfsi4XMUFvhWA6EZc,2257
3
+ dipencsv/cleaner.py,sha256=jVyyegwGWyK0UUPYmrKDjebmci6hYvq7t5DmuT5htrI,4860
4
+ dipencsv/core.py,sha256=Z2GGR7s7qoni5zLXB8o5TYRqnI0wLX9IYuv5Ot3lOs0,8050
5
+ dipencsv/errors.py,sha256=NMLDt01VSr4KZMLdH5qeuJkrUwvYLOopjWGDG5Ebu8U,598
6
+ dipencsv/exporter.py,sha256=pUhjidm4F1r6VYOYNDxufeRfI-bNdKUuWqhN-X_PDPY,1428
7
+ dipencsv/intelligence.py,sha256=NYQFwT68sLLj4oECcknZLC4fYgBcBEwXG6Mq4p8t5PY,6089
8
+ dipencsv/loader.py,sha256=7UnG2Hzde08SN0FN-22pRrruiZEUHqx9Tyy-AuwnUWI,678
9
+ dipencsv/summary.py,sha256=_qwYZsytcO7SzGXorYtK5T1FO-Kbes9TeAGdjSi0Y8E,2711
10
+ dipencsv/engine/__init__.py,sha256=6f-E6-atDCyRVVgDAg4--OJal5aBBs2xKAD4y46xUu4,184
11
+ dipencsv/engine/base.py,sha256=D8CFeRUd_d7HKrIF48sgCfN3_rGny3ebrvSsxsJIXdU,788
12
+ dipencsv/engine/pandas_engine.py,sha256=hzilZ-12C_5nTTiz2lMNn3EkObnzBl4gyCnpKzrX1gk,1594
13
+ dipencsv/engine/stream_engine.py,sha256=IhCx42n5fZ59fe6RGaG-8-GsXi20-odX698PahSsAhM,3138
14
+ dipencsv-0.1.0.dist-info/METADATA,sha256=sM7xwAtwHkh38RQzQTIw5NY2Gp5tASgzDbcy7VRL_Zo,6181
15
+ dipencsv-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
16
+ dipencsv-0.1.0.dist-info/top_level.txt,sha256=vT6kHGNkvpiVjSve9w6c8fovS-RX9qun6DprSl3bSeI,9
17
+ dipencsv-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ dipencsv