multifunctionplotter 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- multifunctionplotter/mfp.py +989 -0
- multifunctionplotter/mfp_data_manipulator.py +192 -0
- multifunctionplotter/mfp_dmanp.py +931 -0
- multifunctionplotter/mfp_dmanp_help.py +741 -0
- multifunctionplotter/mfp_help.py +396 -0
- multifunctionplotter/mfp_server.py +603 -0
- multifunctionplotter/prophet_pred.py +214 -0
- multifunctionplotter-1.0.3.dist-info/METADATA +881 -0
- multifunctionplotter-1.0.3.dist-info/RECORD +13 -0
- multifunctionplotter-1.0.3.dist-info/WHEEL +5 -0
- multifunctionplotter-1.0.3.dist-info/entry_points.txt +3 -0
- multifunctionplotter-1.0.3.dist-info/licenses/LICENSE +201 -0
- multifunctionplotter-1.0.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,931 @@
|
|
|
1
|
+
"""
|
|
2
|
+
mfp_dmanp.py
|
|
3
|
+
========================
|
|
4
|
+
Interactive CLI tool for slicing, sorting, merging, generating, appending,
|
|
5
|
+
deleting, modifying, filtering, renaming, casting, deduplicating, and
|
|
6
|
+
computing new columns on CSV / Excel / JSON data files.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python mfp_dmanp.py [datafile]
|
|
10
|
+
|
|
11
|
+
Author : Swarnadeep Seth
|
|
12
|
+
Version: 1.0.3
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
# ---------------------------------------------------------------------------
|
|
16
|
+
# Standard-library imports
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
import os
|
|
19
|
+
import sys
|
|
20
|
+
import warnings
|
|
21
|
+
from collections import deque
|
|
22
|
+
from typing import Optional
|
|
23
|
+
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
# Third-party imports
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
import numpy as np
|
|
28
|
+
import pandas as pd
|
|
29
|
+
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
# Global configuration
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
warnings.filterwarnings("ignore")
|
|
34
|
+
|
|
35
|
+
__version__ = "1.0.3"
|
|
36
|
+
|
|
37
|
+
# Maximum number of undo snapshots kept in memory.
|
|
38
|
+
_UNDO_LIMIT = 20
|
|
39
|
+
|
|
40
|
+
# File extensions recognised by load() / save().
|
|
41
|
+
_READERS: dict[str, object] = {
|
|
42
|
+
".csv": pd.read_csv,
|
|
43
|
+
".xlsx": pd.read_excel,
|
|
44
|
+
".xls": pd.read_excel,
|
|
45
|
+
".json": pd.read_json,
|
|
46
|
+
}
|
|
47
|
+
_WRITERS: dict[str, str] = {
|
|
48
|
+
".csv": "to_csv",
|
|
49
|
+
".xlsx": "to_excel",
|
|
50
|
+
".xls": "to_excel",
|
|
51
|
+
".json": "to_json",
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# Actions the REPL recognises, with one-line descriptions.
|
|
55
|
+
ACTION_HELP: dict[str, str] = {
|
|
56
|
+
# ── Inspection ──────────────────────────────────────────────────────────
|
|
57
|
+
"show": "Print the current DataFrame.",
|
|
58
|
+
"head": "Print the first N rows (default 10).",
|
|
59
|
+
"tail": "Print the last N rows (default 10).",
|
|
60
|
+
"properties": "Column index, dtypes, NaN counts, and summary statistics.",
|
|
61
|
+
"props": "Alias for 'properties'.",
|
|
62
|
+
"counts": "Frequency count of unique values in a column.",
|
|
63
|
+
# ── Transformation ───────────────────────────────────────────────────────
|
|
64
|
+
"filter": "Keep rows matching a pandas query expression.",
|
|
65
|
+
"slice": "Keep rows at positions [start, end).",
|
|
66
|
+
"sort": "Sort by a column (asc / desc).",
|
|
67
|
+
"rename": "Rename one or more columns (old:new, ...).",
|
|
68
|
+
"cast": "Change a column's dtype (int / float / str / datetime).",
|
|
69
|
+
"addcol": "Add a new column from an expression (uses df.eval).",
|
|
70
|
+
"modify": "Replace a specific value inside a column.",
|
|
71
|
+
"delete": "Drop columns (by name) or rows (by integer index).",
|
|
72
|
+
"del": "Alias for 'delete'.",
|
|
73
|
+
"dedup": "Remove duplicate rows, optionally over chosen columns.",
|
|
74
|
+
"fillna": "Fill empty / NaN cells in a column with a given value.",
|
|
75
|
+
"dropna": "Drop rows that have empty / NaN in a column (or any col).",
|
|
76
|
+
# ── I/O ──────────────────────────────────────────────────────────────────
|
|
77
|
+
"load": "Load a new CSV / Excel / JSON file, replacing current data.",
|
|
78
|
+
"generate": "Build a numeric x/y table from an expression.",
|
|
79
|
+
"gen": "Alias for 'generate'.",
|
|
80
|
+
"append": "Append rows from a second file.",
|
|
81
|
+
"merge": "Merge with a second file on a shared column.",
|
|
82
|
+
"save": "Write the current DataFrame to a file (CSV / Excel / JSON).",
|
|
83
|
+
# ── History ──────────────────────────────────────────────────────────────
|
|
84
|
+
"undo": "Revert the last mutating operation.",
|
|
85
|
+
"redo": "Re-apply the last undone operation.",
|
|
86
|
+
# ── Meta ─────────────────────────────────────────────────────────────────
|
|
87
|
+
"help": "List all available actions.",
|
|
88
|
+
"exit": "Quit the program.",
|
|
89
|
+
"q": "Alias for 'exit'.",
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
_BANNER = f"""
|
|
93
|
+
{'=' * 70}
|
|
94
|
+
MFP Data Manipulator v{__version__}
|
|
95
|
+
CSV · Excel · JSON | Filter · Cast · Dedup · Undo/Redo · and more
|
|
96
|
+
{'=' * 70}
|
|
97
|
+
Type help for a list of commands.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ---------------------------------------------------------------------------
|
|
102
|
+
# Core class
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
class MFPDataManipulator:
|
|
106
|
+
"""
|
|
107
|
+
Wraps a pandas DataFrame and exposes named operations for interactive
|
|
108
|
+
data exploration and cleaning.
|
|
109
|
+
|
|
110
|
+
Design rules
|
|
111
|
+
------------
|
|
112
|
+
- Every mutating method calls ``self._save_snapshot()`` *before* making
|
|
113
|
+
changes so that ``undo`` can restore the previous state.
|
|
114
|
+
- Every mutating method returns ``self.df`` so callers can print or
|
|
115
|
+
inspect the result immediately.
|
|
116
|
+
- I/O helpers are format-aware: the file extension drives the
|
|
117
|
+
reader / writer, so CSV, Excel, and JSON all work transparently.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
# ------------------------------------------------------------------
|
|
121
|
+
# Construction
|
|
122
|
+
# ------------------------------------------------------------------
|
|
123
|
+
|
|
124
|
+
def __init__(self, datafile: Optional[str] = None) -> None:
|
|
125
|
+
"""
|
|
126
|
+
Parameters
|
|
127
|
+
----------
|
|
128
|
+
datafile:
|
|
129
|
+
Path to a data file (CSV / Excel / JSON) to load on startup.
|
|
130
|
+
When *None* an empty DataFrame is created so the user can
|
|
131
|
+
start from a ``generate`` or ``load`` call.
|
|
132
|
+
"""
|
|
133
|
+
self._undo_stack: deque[pd.DataFrame] = deque(maxlen=_UNDO_LIMIT)
|
|
134
|
+
self._redo_stack: deque[pd.DataFrame] = deque(maxlen=_UNDO_LIMIT)
|
|
135
|
+
|
|
136
|
+
self.datafile: Optional[str] = None
|
|
137
|
+
self.df = pd.DataFrame()
|
|
138
|
+
|
|
139
|
+
if datafile:
|
|
140
|
+
self._load_file(datafile, announce=True)
|
|
141
|
+
|
|
142
|
+
# ------------------------------------------------------------------
|
|
143
|
+
# Internal helpers
|
|
144
|
+
# ------------------------------------------------------------------
|
|
145
|
+
|
|
146
|
+
def _save_snapshot(self) -> None:
|
|
147
|
+
"""Push a deep copy of the current DataFrame onto the undo stack."""
|
|
148
|
+
self._undo_stack.append(self.df.copy(deep=True))
|
|
149
|
+
self._redo_stack.clear() # new mutation invalidates redo history
|
|
150
|
+
|
|
151
|
+
def _load_file(self, path: str, announce: bool = True) -> None:
|
|
152
|
+
"""
|
|
153
|
+
Read *path* into ``self.df`` using the appropriate pandas reader.
|
|
154
|
+
|
|
155
|
+
Raises
|
|
156
|
+
------
|
|
157
|
+
FileNotFoundError
|
|
158
|
+
When the file does not exist.
|
|
159
|
+
ValueError
|
|
160
|
+
When the file extension is not supported.
|
|
161
|
+
"""
|
|
162
|
+
abs_path = os.path.abspath(path)
|
|
163
|
+
ext = os.path.splitext(abs_path)[1].lower()
|
|
164
|
+
if ext not in _READERS:
|
|
165
|
+
raise ValueError(
|
|
166
|
+
f"Unsupported file type '{ext}'. "
|
|
167
|
+
f"Supported: {', '.join(_READERS)}"
|
|
168
|
+
)
|
|
169
|
+
reader = _READERS[ext]
|
|
170
|
+
self.datafile = abs_path
|
|
171
|
+
self.df = reader(abs_path).replace(np.nan, "", regex=True)
|
|
172
|
+
if announce:
|
|
173
|
+
print(
|
|
174
|
+
f"Loaded '{abs_path}' — "
|
|
175
|
+
f"{len(self.df):,} rows × {len(self.df.columns)} cols."
|
|
176
|
+
)
|
|
177
|
+
print(self.df)
|
|
178
|
+
|
|
179
|
+
# ------------------------------------------------------------------
|
|
180
|
+
# Inspection (non-mutating — no snapshot needed)
|
|
181
|
+
# ------------------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
def show(self) -> pd.DataFrame:
|
|
184
|
+
"""Print and return the current DataFrame."""
|
|
185
|
+
print(self.df)
|
|
186
|
+
return self.df
|
|
187
|
+
|
|
188
|
+
def head(self, n: int = 10) -> pd.DataFrame:
|
|
189
|
+
"""
|
|
190
|
+
Print and return the first *n* rows.
|
|
191
|
+
|
|
192
|
+
Parameters
|
|
193
|
+
----------
|
|
194
|
+
n : int Number of rows to display (default 10).
|
|
195
|
+
"""
|
|
196
|
+
print(self.df.head(int(n)))
|
|
197
|
+
return self.df.head(int(n))
|
|
198
|
+
|
|
199
|
+
def tail(self, n: int = 10) -> pd.DataFrame:
|
|
200
|
+
"""
|
|
201
|
+
Print and return the last *n* rows.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
n : int Number of rows to display (default 10).
|
|
206
|
+
"""
|
|
207
|
+
print(self.df.tail(int(n)))
|
|
208
|
+
return self.df.tail(int(n))
|
|
209
|
+
|
|
210
|
+
def properties(self) -> pd.DataFrame:
|
|
211
|
+
"""
|
|
212
|
+
Display a column-index table, NaN / empty counts, and summary
|
|
213
|
+
statistics. Returns the ``describe()`` DataFrame.
|
|
214
|
+
"""
|
|
215
|
+
col_table = pd.DataFrame({
|
|
216
|
+
"Index": range(len(self.df.columns)),
|
|
217
|
+
"Column": self.df.columns,
|
|
218
|
+
"dtype": self.df.dtypes.values,
|
|
219
|
+
})
|
|
220
|
+
print("\nColumns:")
|
|
221
|
+
print(col_table.to_string(index=False))
|
|
222
|
+
|
|
223
|
+
nan_counts = self.df.replace("", np.nan).isnull().sum()
|
|
224
|
+
print("\nNaN / empty counts:")
|
|
225
|
+
print(nan_counts.to_string())
|
|
226
|
+
|
|
227
|
+
summary = self.df.describe(include="all")
|
|
228
|
+
print("\nSummary statistics:")
|
|
229
|
+
print(summary)
|
|
230
|
+
return summary
|
|
231
|
+
|
|
232
|
+
def counts(self, col: str) -> pd.Series:
|
|
233
|
+
"""
|
|
234
|
+
Print and return the frequency of each unique value in *col*,
|
|
235
|
+
sorted most-frequent first.
|
|
236
|
+
|
|
237
|
+
Parameters
|
|
238
|
+
----------
|
|
239
|
+
col : str Column name.
|
|
240
|
+
"""
|
|
241
|
+
result = self.df[col].value_counts(dropna=False)
|
|
242
|
+
print(result.to_string())
|
|
243
|
+
return result
|
|
244
|
+
|
|
245
|
+
# ------------------------------------------------------------------
|
|
246
|
+
# Transformation operations (mutating)
|
|
247
|
+
# ------------------------------------------------------------------
|
|
248
|
+
|
|
249
|
+
def filter(self, query: str) -> pd.DataFrame:
|
|
250
|
+
"""
|
|
251
|
+
Keep only rows that satisfy a pandas query expression.
|
|
252
|
+
|
|
253
|
+
Parameters
|
|
254
|
+
----------
|
|
255
|
+
query : str
|
|
256
|
+
A boolean expression understood by ``DataFrame.query()``.
|
|
257
|
+
Examples: ``'age > 30'``, ``'city == "Roanoke"'``,
|
|
258
|
+
``'score >= 90 and grade == "A"'``.
|
|
259
|
+
|
|
260
|
+
Notes
|
|
261
|
+
-----
|
|
262
|
+
Column names with spaces must be wrapped in backticks:
|
|
263
|
+
``'`first name` == "Alice"'``.
|
|
264
|
+
"""
|
|
265
|
+
self._save_snapshot()
|
|
266
|
+
before = len(self.df)
|
|
267
|
+
self.df = self.df.query(query).reset_index(drop=True)
|
|
268
|
+
print(f"Filter kept {len(self.df):,} of {before:,} rows.")
|
|
269
|
+
return self.df
|
|
270
|
+
|
|
271
|
+
def slice(self, start: int, end: int) -> pd.DataFrame:
|
|
272
|
+
"""
|
|
273
|
+
Keep rows in the half-open interval [start, end).
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
start, end : int Row positions (0-based).
|
|
278
|
+
"""
|
|
279
|
+
self._save_snapshot()
|
|
280
|
+
self.df = self.df.iloc[int(start):int(end)]
|
|
281
|
+
return self.df
|
|
282
|
+
|
|
283
|
+
def sort(self, col: str, order: str = "asc") -> pd.DataFrame:
|
|
284
|
+
"""
|
|
285
|
+
Sort the DataFrame by *col*.
|
|
286
|
+
|
|
287
|
+
Parameters
|
|
288
|
+
----------
|
|
289
|
+
col : str Column name.
|
|
290
|
+
order : str ``'asc'`` / ``'ascending'`` or ``'desc'`` / ``'descending'``.
|
|
291
|
+
|
|
292
|
+
Raises
|
|
293
|
+
------
|
|
294
|
+
ValueError When *order* is not recognised.
|
|
295
|
+
"""
|
|
296
|
+
order = order.strip().lower()
|
|
297
|
+
if order in {"asc", "ascending"}:
|
|
298
|
+
ascending = True
|
|
299
|
+
elif order in {"desc", "descending"}:
|
|
300
|
+
ascending = False
|
|
301
|
+
else:
|
|
302
|
+
raise ValueError(f"Unknown sort order '{order}'. Use 'asc' or 'desc'.")
|
|
303
|
+
|
|
304
|
+
self._save_snapshot()
|
|
305
|
+
self.df = self.df.sort_values(by=[col], ascending=ascending)
|
|
306
|
+
return self.df
|
|
307
|
+
|
|
308
|
+
def rename(self, mapping: str) -> pd.DataFrame:
|
|
309
|
+
"""
|
|
310
|
+
Rename one or more columns.
|
|
311
|
+
|
|
312
|
+
Parameters
|
|
313
|
+
----------
|
|
314
|
+
mapping : str
|
|
315
|
+
Comma-separated ``old:new`` pairs.
|
|
316
|
+
Example: ``'first name:first_name, Last Name:last_name'``
|
|
317
|
+
|
|
318
|
+
Raises
|
|
319
|
+
------
|
|
320
|
+
ValueError When a pair is malformed.
|
|
321
|
+
KeyError When an old column name is not found.
|
|
322
|
+
"""
|
|
323
|
+
pairs: dict[str, str] = {}
|
|
324
|
+
for token in mapping.split(","):
|
|
325
|
+
token = token.strip()
|
|
326
|
+
if ":" not in token:
|
|
327
|
+
raise ValueError(
|
|
328
|
+
f"Bad rename pair '{token}'. Expected format: old:new"
|
|
329
|
+
)
|
|
330
|
+
old, new = token.split(":", 1)
|
|
331
|
+
pairs[old.strip()] = new.strip()
|
|
332
|
+
|
|
333
|
+
missing = [k for k in pairs if k not in self.df.columns]
|
|
334
|
+
if missing:
|
|
335
|
+
raise KeyError(f"Column(s) not found: {missing}")
|
|
336
|
+
|
|
337
|
+
self._save_snapshot()
|
|
338
|
+
self.df = self.df.rename(columns=pairs)
|
|
339
|
+
print(f"Renamed: {pairs}")
|
|
340
|
+
return self.df
|
|
341
|
+
|
|
342
|
+
def cast(self, col: str, dtype: str) -> pd.DataFrame:
|
|
343
|
+
"""
|
|
344
|
+
Change the dtype of *col*.
|
|
345
|
+
|
|
346
|
+
Parameters
|
|
347
|
+
----------
|
|
348
|
+
col : str Column name.
|
|
349
|
+
dtype : str Target type: ``'int'``, ``'float'``, ``'str'``,
|
|
350
|
+
or ``'datetime'``.
|
|
351
|
+
|
|
352
|
+
Raises
|
|
353
|
+
------
|
|
354
|
+
ValueError When *dtype* is not a supported keyword or conversion fails.
|
|
355
|
+
"""
|
|
356
|
+
dtype = dtype.strip().lower()
|
|
357
|
+
self._save_snapshot()
|
|
358
|
+
try:
|
|
359
|
+
if dtype == "int":
|
|
360
|
+
self.df[col] = pd.to_numeric(self.df[col], errors="raise").astype(int)
|
|
361
|
+
elif dtype == "float":
|
|
362
|
+
self.df[col] = pd.to_numeric(self.df[col], errors="raise").astype(float)
|
|
363
|
+
elif dtype == "str":
|
|
364
|
+
self.df[col] = self.df[col].astype(str)
|
|
365
|
+
elif dtype == "datetime":
|
|
366
|
+
self.df[col] = pd.to_datetime(self.df[col], errors="raise")
|
|
367
|
+
else:
|
|
368
|
+
raise ValueError(
|
|
369
|
+
f"Unknown dtype '{dtype}'. Use: int, float, str, datetime."
|
|
370
|
+
)
|
|
371
|
+
except Exception as exc:
|
|
372
|
+
self._undo_stack.pop() # roll back the snapshot — nothing changed
|
|
373
|
+
raise exc
|
|
374
|
+
|
|
375
|
+
print(f"Column '{col}' cast to {dtype}.")
|
|
376
|
+
return self.df
|
|
377
|
+
|
|
378
|
+
def addcol(self, name: str, expr: str) -> pd.DataFrame:
|
|
379
|
+
"""
|
|
380
|
+
Add a new column derived from an expression over existing columns.
|
|
381
|
+
|
|
382
|
+
Parameters
|
|
383
|
+
----------
|
|
384
|
+
name : str Name for the new column.
|
|
385
|
+
expr : str A ``DataFrame.eval()``-compatible expression.
|
|
386
|
+
Examples: ``'price * qty'``, ``'score / score.max()'``.
|
|
387
|
+
"""
|
|
388
|
+
self._save_snapshot()
|
|
389
|
+
self.df[name] = self.df.eval(expr)
|
|
390
|
+
print(f"Column '{name}' added.")
|
|
391
|
+
return self.df
|
|
392
|
+
|
|
393
|
+
def modify(self, col: str, old_val: str, new_val: str) -> pd.DataFrame:
|
|
394
|
+
"""
|
|
395
|
+
Replace occurrences of *old_val* with *new_val* in column *col*.
|
|
396
|
+
|
|
397
|
+
Parameters
|
|
398
|
+
----------
|
|
399
|
+
col : str Target column name.
|
|
400
|
+
old_val : str Value to search for (exact match).
|
|
401
|
+
new_val : str Replacement value.
|
|
402
|
+
"""
|
|
403
|
+
self._save_snapshot()
|
|
404
|
+
self.df[col] = self.df[col].replace(old_val, new_val)
|
|
405
|
+
return self.df
|
|
406
|
+
|
|
407
|
+
def delete(self, targets: str) -> pd.DataFrame:
|
|
408
|
+
"""
|
|
409
|
+
Drop rows or columns.
|
|
410
|
+
|
|
411
|
+
Parameters
|
|
412
|
+
----------
|
|
413
|
+
targets : str
|
|
414
|
+
Comma-separated list of **integer row indices** *or*
|
|
415
|
+
**column names** — not both at once.
|
|
416
|
+
|
|
417
|
+
Logic
|
|
418
|
+
-----
|
|
419
|
+
If every token is a plain integer → treat as row indices.
|
|
420
|
+
Otherwise → treat as column names.
|
|
421
|
+
"""
|
|
422
|
+
tokens = [t.strip() for t in targets.split(",")]
|
|
423
|
+
self._save_snapshot()
|
|
424
|
+
if all(t.lstrip("-").isnumeric() for t in tokens):
|
|
425
|
+
self.df = self.df.drop(index=[int(t) for t in tokens])
|
|
426
|
+
else:
|
|
427
|
+
self.df = self.df.drop(columns=tokens)
|
|
428
|
+
return self.df
|
|
429
|
+
|
|
430
|
+
def dedup(self, cols: Optional[str] = None) -> pd.DataFrame:
|
|
431
|
+
"""
|
|
432
|
+
Remove duplicate rows.
|
|
433
|
+
|
|
434
|
+
Parameters
|
|
435
|
+
----------
|
|
436
|
+
cols : str | None
|
|
437
|
+
Comma-separated column names to consider. When *None* (or
|
|
438
|
+
empty string), all columns are used.
|
|
439
|
+
"""
|
|
440
|
+
self._save_snapshot()
|
|
441
|
+
before = len(self.df)
|
|
442
|
+
subset = (
|
|
443
|
+
[c.strip() for c in cols.split(",")]
|
|
444
|
+
if cols and cols.strip()
|
|
445
|
+
else None
|
|
446
|
+
)
|
|
447
|
+
self.df = self.df.drop_duplicates(subset=subset).reset_index(drop=True)
|
|
448
|
+
print(f"Removed {before - len(self.df):,} duplicate row(s). {len(self.df):,} remain.")
|
|
449
|
+
return self.df
|
|
450
|
+
|
|
451
|
+
def fillna(self, col: str, value: str) -> pd.DataFrame:
|
|
452
|
+
"""
|
|
453
|
+
Fill empty / NaN cells in *col* with *value*.
|
|
454
|
+
|
|
455
|
+
Parameters
|
|
456
|
+
----------
|
|
457
|
+
col : str Column name.
|
|
458
|
+
value : str Replacement value. Automatically converted to int or
|
|
459
|
+
float when the string represents a number.
|
|
460
|
+
"""
|
|
461
|
+
self._save_snapshot()
|
|
462
|
+
try:
|
|
463
|
+
typed_value: object = float(value) if "." in value else int(value)
|
|
464
|
+
except (ValueError, AttributeError):
|
|
465
|
+
typed_value = value
|
|
466
|
+
|
|
467
|
+
self.df[col] = self.df[col].replace("", np.nan)
|
|
468
|
+
self.df[col] = self.df[col].fillna(typed_value)
|
|
469
|
+
print(f"Filled NaN / empty values in '{col}' with {typed_value!r}.")
|
|
470
|
+
return self.df
|
|
471
|
+
|
|
472
|
+
def dropna(self, col: Optional[str] = None) -> pd.DataFrame:
|
|
473
|
+
"""
|
|
474
|
+
Drop rows that contain empty / NaN values.
|
|
475
|
+
|
|
476
|
+
Parameters
|
|
477
|
+
----------
|
|
478
|
+
col : str | None
|
|
479
|
+
When given, only rows where *col* is empty / NaN are dropped.
|
|
480
|
+
When *None* / empty string, any row with at least one empty /
|
|
481
|
+
NaN cell is dropped.
|
|
482
|
+
"""
|
|
483
|
+
self._save_snapshot()
|
|
484
|
+
before = len(self.df)
|
|
485
|
+
tmp = self.df.replace("", np.nan)
|
|
486
|
+
if col and col.strip():
|
|
487
|
+
self.df = tmp.dropna(subset=[col.strip()]).reset_index(drop=True)
|
|
488
|
+
scope = f"column '{col.strip()}'"
|
|
489
|
+
else:
|
|
490
|
+
self.df = tmp.dropna().reset_index(drop=True)
|
|
491
|
+
scope = "any column"
|
|
492
|
+
print(f"Dropped {before - len(self.df):,} row(s) with NaN / empty in {scope}.")
|
|
493
|
+
return self.df
|
|
494
|
+
|
|
495
|
+
# ------------------------------------------------------------------
|
|
496
|
+
# I/O operations
|
|
497
|
+
# ------------------------------------------------------------------
|
|
498
|
+
|
|
499
|
+
def load(self, path: str) -> pd.DataFrame:
|
|
500
|
+
"""
|
|
501
|
+
Replace the current DataFrame by loading a new file.
|
|
502
|
+
|
|
503
|
+
Parameters
|
|
504
|
+
----------
|
|
505
|
+
path : str Path to a CSV / Excel / JSON file.
|
|
506
|
+
|
|
507
|
+
Notes
|
|
508
|
+
-----
|
|
509
|
+
The REPL calls ``_dirty_check`` before invoking this; calling it
|
|
510
|
+
directly skips the unsaved-changes prompt.
|
|
511
|
+
"""
|
|
512
|
+
self._save_snapshot()
|
|
513
|
+
self._load_file(path, announce=True)
|
|
514
|
+
return self.df
|
|
515
|
+
|
|
516
|
+
def generate(self, xr: str, expr: str) -> pd.DataFrame:
|
|
517
|
+
"""
|
|
518
|
+
Replace the current DataFrame with a numeric x/y table.
|
|
519
|
+
|
|
520
|
+
Parameters
|
|
521
|
+
----------
|
|
522
|
+
xr : str Range string ``'start:end'`` (inclusive on both ends).
|
|
523
|
+
expr : str Python expression in terms of *x* and *np*.
|
|
524
|
+
Example: ``'5*x**2 / np.exp(x)'``
|
|
525
|
+
|
|
526
|
+
Security note
|
|
527
|
+
-------------
|
|
528
|
+
``eval`` is used intentionally for a numeric DSL.
|
|
529
|
+
Do **not** expose this to untrusted input.
|
|
530
|
+
"""
|
|
531
|
+
self._save_snapshot()
|
|
532
|
+
lo, hi = (int(v) for v in xr.split(":"))
|
|
533
|
+
x = np.linspace(lo, hi, hi - lo + 1)
|
|
534
|
+
y = eval(expr) # noqa: S307
|
|
535
|
+
self.df = pd.DataFrame({"x": x, "y": y})
|
|
536
|
+
return self.df
|
|
537
|
+
|
|
538
|
+
def append(self, datafile: str) -> pd.DataFrame:
|
|
539
|
+
"""
|
|
540
|
+
Append rows from *datafile* to the current DataFrame.
|
|
541
|
+
|
|
542
|
+
Parameters
|
|
543
|
+
----------
|
|
544
|
+
datafile : str Path to a CSV / Excel / JSON file.
|
|
545
|
+
"""
|
|
546
|
+
self._save_snapshot()
|
|
547
|
+
ext = os.path.splitext(datafile)[1].lower()
|
|
548
|
+
reader = _READERS.get(ext, pd.read_csv)
|
|
549
|
+
df2 = reader(datafile).replace(np.nan, "", regex=True)
|
|
550
|
+
before = len(self.df)
|
|
551
|
+
self.df = pd.concat([self.df, df2], ignore_index=True)
|
|
552
|
+
print(f"Appended {len(self.df) - before:,} rows.")
|
|
553
|
+
return self.df
|
|
554
|
+
|
|
555
|
+
def merge(
|
|
556
|
+
self,
|
|
557
|
+
other_file: str,
|
|
558
|
+
on_column: str,
|
|
559
|
+
how: str = "inner",
|
|
560
|
+
) -> pd.DataFrame:
|
|
561
|
+
"""
|
|
562
|
+
Merge ``self.df`` with a second file on a shared column.
|
|
563
|
+
|
|
564
|
+
Parameters
|
|
565
|
+
----------
|
|
566
|
+
other_file : str Path to the second file (CSV / Excel / JSON).
|
|
567
|
+
on_column : str Column name present in both DataFrames.
|
|
568
|
+
how : str ``'inner'``, ``'outer'``, ``'left'``, or ``'right'``.
|
|
569
|
+
"""
|
|
570
|
+
how = how.strip().lower()
|
|
571
|
+
try:
|
|
572
|
+
ext = os.path.splitext(other_file)[1].lower()
|
|
573
|
+
reader = _READERS.get(ext, pd.read_csv)
|
|
574
|
+
df2 = reader(other_file).replace(np.nan, "", regex=True)
|
|
575
|
+
self._save_snapshot()
|
|
576
|
+
self.df = pd.merge(self.df, df2, on=on_column, how=how)
|
|
577
|
+
except FileNotFoundError:
|
|
578
|
+
print(f"[ERROR] File not found: '{other_file}'")
|
|
579
|
+
except KeyError:
|
|
580
|
+
print(f"[ERROR] Column '{on_column}' not found in one of the files.")
|
|
581
|
+
return self.df
|
|
582
|
+
|
|
583
|
+
def save(self, filename: str) -> str:
|
|
584
|
+
"""
|
|
585
|
+
Save the current DataFrame to a file.
|
|
586
|
+
|
|
587
|
+
The format is determined by *filename*'s extension (CSV / Excel /
|
|
588
|
+
JSON). The file is written next to the originally loaded file,
|
|
589
|
+
or to the current working directory if no file was loaded.
|
|
590
|
+
|
|
591
|
+
Parameters
|
|
592
|
+
----------
|
|
593
|
+
filename : str Output file name or relative path.
|
|
594
|
+
|
|
595
|
+
Returns
|
|
596
|
+
-------
|
|
597
|
+
str Absolute path of the saved file.
|
|
598
|
+
"""
|
|
599
|
+
directory = (
|
|
600
|
+
os.path.dirname(self.datafile)
|
|
601
|
+
if self.datafile
|
|
602
|
+
else os.getcwd()
|
|
603
|
+
)
|
|
604
|
+
save_path = os.path.join(directory, filename)
|
|
605
|
+
ext = os.path.splitext(filename)[1].lower()
|
|
606
|
+
writer_name = _WRITERS.get(ext, "to_csv")
|
|
607
|
+
writer = getattr(self.df, writer_name)
|
|
608
|
+
|
|
609
|
+
if writer_name == "to_csv":
|
|
610
|
+
writer(save_path, index=False)
|
|
611
|
+
elif writer_name == "to_excel":
|
|
612
|
+
writer(save_path, index=False)
|
|
613
|
+
elif writer_name == "to_json":
|
|
614
|
+
writer(save_path, orient="records", indent=2)
|
|
615
|
+
|
|
616
|
+
print(f"Saved → {save_path}")
|
|
617
|
+
return save_path
|
|
618
|
+
|
|
619
|
+
# ------------------------------------------------------------------
|
|
620
|
+
# Undo / redo
|
|
621
|
+
# ------------------------------------------------------------------
|
|
622
|
+
|
|
623
|
+
def undo(self) -> pd.DataFrame:
|
|
624
|
+
"""
|
|
625
|
+
Revert the last mutating operation.
|
|
626
|
+
|
|
627
|
+
Raises
|
|
628
|
+
------
|
|
629
|
+
IndexError When the undo stack is empty.
|
|
630
|
+
"""
|
|
631
|
+
if not self._undo_stack:
|
|
632
|
+
raise IndexError("Nothing to undo.")
|
|
633
|
+
self._redo_stack.append(self.df.copy(deep=True))
|
|
634
|
+
self.df = self._undo_stack.pop()
|
|
635
|
+
print(f"Undone. DataFrame is now {len(self.df):,} rows × {len(self.df.columns)} cols.")
|
|
636
|
+
return self.df
|
|
637
|
+
|
|
638
|
+
def redo(self) -> pd.DataFrame:
|
|
639
|
+
"""
|
|
640
|
+
Re-apply the last undone operation.
|
|
641
|
+
|
|
642
|
+
Raises
|
|
643
|
+
------
|
|
644
|
+
IndexError When the redo stack is empty.
|
|
645
|
+
"""
|
|
646
|
+
if not self._redo_stack:
|
|
647
|
+
raise IndexError("Nothing to redo.")
|
|
648
|
+
self._undo_stack.append(self.df.copy(deep=True))
|
|
649
|
+
self.df = self._redo_stack.pop()
|
|
650
|
+
print(f"Redone. DataFrame is now {len(self.df):,} rows × {len(self.df.columns)} cols.")
|
|
651
|
+
return self.df
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
# ---------------------------------------------------------------------------
|
|
655
|
+
# REPL helpers
|
|
656
|
+
# ---------------------------------------------------------------------------
|
|
657
|
+
|
|
658
|
+
def _print_help() -> None:
|
|
659
|
+
"""Print a formatted, sectioned table of all available actions."""
|
|
660
|
+
sections: dict[str, list[str]] = {
|
|
661
|
+
"Inspection": ["show", "head", "tail", "properties", "props", "counts"],
|
|
662
|
+
"Transformation": ["filter", "slice", "sort", "rename", "cast", "addcol",
|
|
663
|
+
"modify", "delete", "del", "dedup", "fillna", "dropna"],
|
|
664
|
+
"I/O": ["load", "generate", "gen", "append", "merge", "save"],
|
|
665
|
+
"History": ["undo", "redo"],
|
|
666
|
+
"Meta": ["help", "exit", "q"],
|
|
667
|
+
}
|
|
668
|
+
for section, actions in sections.items():
|
|
669
|
+
print(f"\n {section}")
|
|
670
|
+
print(" " + "-" * 44)
|
|
671
|
+
for a in actions:
|
|
672
|
+
if a in ACTION_HELP:
|
|
673
|
+
print(f" {a:<14} {ACTION_HELP[a]}")
|
|
674
|
+
print()
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
def _require_data(dm: MFPDataManipulator, action: str) -> bool:
|
|
678
|
+
"""Return *True* when ``dm.df`` is non-empty; print an error otherwise."""
|
|
679
|
+
if dm.df.empty:
|
|
680
|
+
print(
|
|
681
|
+
f"[ERROR] No data loaded. "
|
|
682
|
+
f"Run 'load' or 'generate' before '{action}'."
|
|
683
|
+
)
|
|
684
|
+
return False
|
|
685
|
+
return True
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
def _dirty_check(dm: MFPDataManipulator) -> bool:
|
|
689
|
+
"""
|
|
690
|
+
Prompt for confirmation when the undo stack is non-empty (i.e. the user
|
|
691
|
+
has made changes that may not have been saved).
|
|
692
|
+
|
|
693
|
+
Returns *True* if it is safe to proceed, *False* if the user cancelled.
|
|
694
|
+
"""
|
|
695
|
+
if dm._undo_stack:
|
|
696
|
+
ans = input(
|
|
697
|
+
" You may have unsaved changes. Continue anyway? (y / n): "
|
|
698
|
+
).strip().lower()
|
|
699
|
+
return ans in {"y", "yes"}
|
|
700
|
+
return True
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
def _run_repl(dm: MFPDataManipulator) -> None:
|
|
704
|
+
"""
|
|
705
|
+
Enter the interactive command loop for *dm*.
|
|
706
|
+
|
|
707
|
+
Each iteration reads one action keyword, prompts for its arguments,
|
|
708
|
+
dispatches to the corresponding method, and prints the result.
|
|
709
|
+
Exceptions raised by method calls are caught and displayed without
|
|
710
|
+
crashing the loop.
|
|
711
|
+
"""
|
|
712
|
+
while True:
|
|
713
|
+
try:
|
|
714
|
+
action = input("\nAction> ").strip().lower()
|
|
715
|
+
except (EOFError, KeyboardInterrupt):
|
|
716
|
+
print("\nInterrupted — exiting.")
|
|
717
|
+
sys.exit(0)
|
|
718
|
+
|
|
719
|
+
try:
|
|
720
|
+
# ----------------------------------------------------------------
|
|
721
|
+
# Meta
|
|
722
|
+
# ----------------------------------------------------------------
|
|
723
|
+
if action in {"exit", "q"}:
|
|
724
|
+
if dm._undo_stack:
|
|
725
|
+
ans = input(
|
|
726
|
+
" You may have unsaved changes. Quit anyway? (y / n): "
|
|
727
|
+
).strip().lower()
|
|
728
|
+
if ans not in {"y", "yes"}:
|
|
729
|
+
continue
|
|
730
|
+
print("Goodbye.")
|
|
731
|
+
sys.exit(0)
|
|
732
|
+
|
|
733
|
+
elif action == "help":
|
|
734
|
+
_print_help()
|
|
735
|
+
|
|
736
|
+
# ----------------------------------------------------------------
|
|
737
|
+
# Inspection
|
|
738
|
+
# ----------------------------------------------------------------
|
|
739
|
+
elif action == "show":
|
|
740
|
+
if not _require_data(dm, action): continue
|
|
741
|
+
dm.show()
|
|
742
|
+
|
|
743
|
+
elif action == "head":
|
|
744
|
+
if not _require_data(dm, action): continue
|
|
745
|
+
n = input(" Number of rows (default 10): ").strip()
|
|
746
|
+
dm.head(int(n) if n else 10)
|
|
747
|
+
|
|
748
|
+
elif action == "tail":
|
|
749
|
+
if not _require_data(dm, action): continue
|
|
750
|
+
n = input(" Number of rows (default 10): ").strip()
|
|
751
|
+
dm.tail(int(n) if n else 10)
|
|
752
|
+
|
|
753
|
+
elif action in {"properties", "props"}:
|
|
754
|
+
if not _require_data(dm, action): continue
|
|
755
|
+
dm.properties()
|
|
756
|
+
|
|
757
|
+
elif action == "counts":
|
|
758
|
+
if not _require_data(dm, action): continue
|
|
759
|
+
col = input(" Column name: ").strip()
|
|
760
|
+
dm.counts(col)
|
|
761
|
+
|
|
762
|
+
# ----------------------------------------------------------------
|
|
763
|
+
# I/O (load / generate allowed without existing data)
|
|
764
|
+
# ----------------------------------------------------------------
|
|
765
|
+
elif action == "load":
|
|
766
|
+
if not _dirty_check(dm): continue
|
|
767
|
+
path = input(" File path (CSV / Excel / JSON): ").strip()
|
|
768
|
+
dm.load(path)
|
|
769
|
+
|
|
770
|
+
elif action in {"generate", "gen"}:
|
|
771
|
+
if not _dirty_check(dm): continue
|
|
772
|
+
xr = input(" x range (e.g. 0:10): ").strip()
|
|
773
|
+
expr = input(" y expression (e.g. 5*x**2/np.exp(x)): ").strip()
|
|
774
|
+
print(dm.generate(xr, expr))
|
|
775
|
+
|
|
776
|
+
elif action == "append":
|
|
777
|
+
if not _require_data(dm, action): continue
|
|
778
|
+
path = input(" File to append: ").strip()
|
|
779
|
+
print(dm.append(path))
|
|
780
|
+
|
|
781
|
+
elif action == "merge":
|
|
782
|
+
if not _require_data(dm, action): continue
|
|
783
|
+
other = input(" File to merge with: ").strip()
|
|
784
|
+
col = input(" Column to merge on: ").strip()
|
|
785
|
+
how = input(" Merge type (inner / outer / left / right): ").strip()
|
|
786
|
+
print(dm.merge(other, col, how))
|
|
787
|
+
|
|
788
|
+
elif action == "save":
|
|
789
|
+
if not _require_data(dm, action): continue
|
|
790
|
+
filename = input(
|
|
791
|
+
" Output file name (e.g. output.csv / .xlsx / .json): "
|
|
792
|
+
).strip()
|
|
793
|
+
dm.save(filename)
|
|
794
|
+
|
|
795
|
+
# ----------------------------------------------------------------
|
|
796
|
+
# Transformation
|
|
797
|
+
# ----------------------------------------------------------------
|
|
798
|
+
elif action == "filter":
|
|
799
|
+
if not _require_data(dm, action): continue
|
|
800
|
+
query = input(" Query expression (e.g. age > 30): ").strip()
|
|
801
|
+
print(dm.filter(query))
|
|
802
|
+
|
|
803
|
+
elif action == "slice":
|
|
804
|
+
if not _require_data(dm, action): continue
|
|
805
|
+
start = input(" Start index: ").strip()
|
|
806
|
+
end = input(" End index : ").strip()
|
|
807
|
+
print(dm.slice(start, end))
|
|
808
|
+
|
|
809
|
+
elif action == "sort":
|
|
810
|
+
if not _require_data(dm, action): continue
|
|
811
|
+
col = input(" Column to sort by: ").strip()
|
|
812
|
+
order = input(" Order (asc / desc): ").strip()
|
|
813
|
+
print(dm.sort(col, order))
|
|
814
|
+
|
|
815
|
+
elif action == "rename":
|
|
816
|
+
if not _require_data(dm, action): continue
|
|
817
|
+
mapping = input(
|
|
818
|
+
" Rename pairs, comma-separated (old:new, ...): "
|
|
819
|
+
).strip()
|
|
820
|
+
print(dm.rename(mapping))
|
|
821
|
+
|
|
822
|
+
elif action == "cast":
|
|
823
|
+
if not _require_data(dm, action): continue
|
|
824
|
+
col = input(" Column name: ").strip()
|
|
825
|
+
dtype = input(" Target dtype (int / float / str / datetime): ").strip()
|
|
826
|
+
print(dm.cast(col, dtype))
|
|
827
|
+
|
|
828
|
+
elif action == "addcol":
|
|
829
|
+
if not _require_data(dm, action): continue
|
|
830
|
+
name = input(" New column name: ").strip()
|
|
831
|
+
expr = input(" Expression (e.g. price * qty): ").strip()
|
|
832
|
+
print(dm.addcol(name, expr))
|
|
833
|
+
|
|
834
|
+
elif action == "modify":
|
|
835
|
+
if not _require_data(dm, action): continue
|
|
836
|
+
col = input(" Column name : ").strip()
|
|
837
|
+
old_val = input(" Old value : ").strip()
|
|
838
|
+
new_val = input(" New value : ").strip()
|
|
839
|
+
print(dm.modify(col, old_val, new_val))
|
|
840
|
+
|
|
841
|
+
elif action in {"delete", "del"}:
|
|
842
|
+
if not _require_data(dm, action): continue
|
|
843
|
+
targets = input(
|
|
844
|
+
" Column names or row indices (comma-separated): "
|
|
845
|
+
).strip()
|
|
846
|
+
print(dm.delete(targets))
|
|
847
|
+
|
|
848
|
+
elif action == "dedup":
|
|
849
|
+
if not _require_data(dm, action): continue
|
|
850
|
+
cols = input(
|
|
851
|
+
" Columns to check (comma-separated, or Enter for all): "
|
|
852
|
+
).strip()
|
|
853
|
+
print(dm.dedup(cols if cols else None))
|
|
854
|
+
|
|
855
|
+
elif action == "fillna":
|
|
856
|
+
if not _require_data(dm, action): continue
|
|
857
|
+
col = input(" Column name: ").strip()
|
|
858
|
+
value = input(" Fill value : ").strip()
|
|
859
|
+
print(dm.fillna(col, value))
|
|
860
|
+
|
|
861
|
+
elif action == "dropna":
|
|
862
|
+
if not _require_data(dm, action): continue
|
|
863
|
+
col = input(
|
|
864
|
+
" Column name (or Enter to drop rows with any NaN): "
|
|
865
|
+
).strip()
|
|
866
|
+
print(dm.dropna(col if col else None))
|
|
867
|
+
|
|
868
|
+
# ----------------------------------------------------------------
|
|
869
|
+
# Undo / redo
|
|
870
|
+
# ----------------------------------------------------------------
|
|
871
|
+
elif action == "undo":
|
|
872
|
+
dm.undo()
|
|
873
|
+
|
|
874
|
+
elif action == "redo":
|
|
875
|
+
dm.redo()
|
|
876
|
+
|
|
877
|
+
# ----------------------------------------------------------------
|
|
878
|
+
# Unknown action
|
|
879
|
+
# ----------------------------------------------------------------
|
|
880
|
+
else:
|
|
881
|
+
print(f"[WARN] Unknown action '{action}'. Type 'help' for options.")
|
|
882
|
+
|
|
883
|
+
except (KeyError, ValueError, IndexError, TypeError) as exc:
|
|
884
|
+
print(f"[ERROR] {exc}")
|
|
885
|
+
|
|
886
|
+
|
|
887
|
+
# ---------------------------------------------------------------------------
|
|
888
|
+
# Entry point
|
|
889
|
+
# ---------------------------------------------------------------------------
|
|
890
|
+
|
|
891
|
+
def main(argv: Optional[list[str]] = None) -> None:
|
|
892
|
+
"""
|
|
893
|
+
Parse CLI arguments, optionally load a datafile, and start the REPL.
|
|
894
|
+
|
|
895
|
+
Parameters
|
|
896
|
+
----------
|
|
897
|
+
argv : list[str] | None
|
|
898
|
+
Overrides ``sys.argv[1:]``; useful for tests that need to pass
|
|
899
|
+
arguments without patching the global.
|
|
900
|
+
"""
|
|
901
|
+
if argv is None:
|
|
902
|
+
argv = sys.argv[1:]
|
|
903
|
+
|
|
904
|
+
print(_BANNER)
|
|
905
|
+
|
|
906
|
+
if argv:
|
|
907
|
+
try:
|
|
908
|
+
dm = MFPDataManipulator(argv[0])
|
|
909
|
+
except (FileNotFoundError, ValueError) as exc:
|
|
910
|
+
print(f"[ERROR] {exc}")
|
|
911
|
+
sys.exit(1)
|
|
912
|
+
else:
|
|
913
|
+
while True:
|
|
914
|
+
path = input(
|
|
915
|
+
"File to load (CSV / Excel / JSON), or Enter to skip: "
|
|
916
|
+
).strip()
|
|
917
|
+
if not path:
|
|
918
|
+
dm = MFPDataManipulator()
|
|
919
|
+
print("No file loaded — run 'generate' or 'load' to get started.")
|
|
920
|
+
break
|
|
921
|
+
try:
|
|
922
|
+
dm = MFPDataManipulator(path)
|
|
923
|
+
break
|
|
924
|
+
except (FileNotFoundError, ValueError) as exc:
|
|
925
|
+
print(f"[ERROR] {exc} Please try again.")
|
|
926
|
+
|
|
927
|
+
_run_repl(dm)
|
|
928
|
+
|
|
929
|
+
|
|
930
|
+
if __name__ == "__main__":
|
|
931
|
+
main()
|