multifunctionplotter 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,931 @@
1
+ """
2
+ mfp_dmanp.py
3
+ ========================
4
+ Interactive CLI tool for slicing, sorting, merging, generating, appending,
5
+ deleting, modifying, filtering, renaming, casting, deduplicating, and
6
+ computing new columns on CSV / Excel / JSON data files.
7
+
8
+ Usage:
9
+ python mfp_dmanp.py [datafile]
10
+
11
+ Author : Swarnadeep Seth
12
+ Version: 1.0.3
13
+ """
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # Standard-library imports
17
+ # ---------------------------------------------------------------------------
18
+ import os
19
+ import sys
20
+ import warnings
21
+ from collections import deque
22
+ from typing import Optional
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # Third-party imports
26
+ # ---------------------------------------------------------------------------
27
+ import numpy as np
28
+ import pandas as pd
29
+
30
+ # ---------------------------------------------------------------------------
31
+ # Global configuration
32
+ # ---------------------------------------------------------------------------
33
+ warnings.filterwarnings("ignore")
34
+
35
+ __version__ = "1.0.3"
36
+
37
+ # Maximum number of undo snapshots kept in memory.
38
+ _UNDO_LIMIT = 20
39
+
40
+ # File extensions recognised by load() / save().
41
+ _READERS: dict[str, object] = {
42
+ ".csv": pd.read_csv,
43
+ ".xlsx": pd.read_excel,
44
+ ".xls": pd.read_excel,
45
+ ".json": pd.read_json,
46
+ }
47
+ _WRITERS: dict[str, str] = {
48
+ ".csv": "to_csv",
49
+ ".xlsx": "to_excel",
50
+ ".xls": "to_excel",
51
+ ".json": "to_json",
52
+ }
53
+
54
+ # Actions the REPL recognises, with one-line descriptions.
55
+ ACTION_HELP: dict[str, str] = {
56
+ # ── Inspection ──────────────────────────────────────────────────────────
57
+ "show": "Print the current DataFrame.",
58
+ "head": "Print the first N rows (default 10).",
59
+ "tail": "Print the last N rows (default 10).",
60
+ "properties": "Column index, dtypes, NaN counts, and summary statistics.",
61
+ "props": "Alias for 'properties'.",
62
+ "counts": "Frequency count of unique values in a column.",
63
+ # ── Transformation ───────────────────────────────────────────────────────
64
+ "filter": "Keep rows matching a pandas query expression.",
65
+ "slice": "Keep rows at positions [start, end).",
66
+ "sort": "Sort by a column (asc / desc).",
67
+ "rename": "Rename one or more columns (old:new, ...).",
68
+ "cast": "Change a column's dtype (int / float / str / datetime).",
69
+ "addcol": "Add a new column from an expression (uses df.eval).",
70
+ "modify": "Replace a specific value inside a column.",
71
+ "delete": "Drop columns (by name) or rows (by integer index).",
72
+ "del": "Alias for 'delete'.",
73
+ "dedup": "Remove duplicate rows, optionally over chosen columns.",
74
+ "fillna": "Fill empty / NaN cells in a column with a given value.",
75
+ "dropna": "Drop rows that have empty / NaN in a column (or any col).",
76
+ # ── I/O ──────────────────────────────────────────────────────────────────
77
+ "load": "Load a new CSV / Excel / JSON file, replacing current data.",
78
+ "generate": "Build a numeric x/y table from an expression.",
79
+ "gen": "Alias for 'generate'.",
80
+ "append": "Append rows from a second file.",
81
+ "merge": "Merge with a second file on a shared column.",
82
+ "save": "Write the current DataFrame to a file (CSV / Excel / JSON).",
83
+ # ── History ──────────────────────────────────────────────────────────────
84
+ "undo": "Revert the last mutating operation.",
85
+ "redo": "Re-apply the last undone operation.",
86
+ # ── Meta ─────────────────────────────────────────────────────────────────
87
+ "help": "List all available actions.",
88
+ "exit": "Quit the program.",
89
+ "q": "Alias for 'exit'.",
90
+ }
91
+
92
+ _BANNER = f"""
93
+ {'=' * 70}
94
+ MFP Data Manipulator v{__version__}
95
+ CSV · Excel · JSON | Filter · Cast · Dedup · Undo/Redo · and more
96
+ {'=' * 70}
97
+ Type help for a list of commands.
98
+ """
99
+
100
+
101
+ # ---------------------------------------------------------------------------
102
+ # Core class
103
+ # ---------------------------------------------------------------------------
104
+
105
+ class MFPDataManipulator:
106
+ """
107
+ Wraps a pandas DataFrame and exposes named operations for interactive
108
+ data exploration and cleaning.
109
+
110
+ Design rules
111
+ ------------
112
+ - Every mutating method calls ``self._save_snapshot()`` *before* making
113
+ changes so that ``undo`` can restore the previous state.
114
+ - Every mutating method returns ``self.df`` so callers can print or
115
+ inspect the result immediately.
116
+ - I/O helpers are format-aware: the file extension drives the
117
+ reader / writer, so CSV, Excel, and JSON all work transparently.
118
+ """
119
+
120
+ # ------------------------------------------------------------------
121
+ # Construction
122
+ # ------------------------------------------------------------------
123
+
124
+ def __init__(self, datafile: Optional[str] = None) -> None:
125
+ """
126
+ Parameters
127
+ ----------
128
+ datafile:
129
+ Path to a data file (CSV / Excel / JSON) to load on startup.
130
+ When *None* an empty DataFrame is created so the user can
131
+ start from a ``generate`` or ``load`` call.
132
+ """
133
+ self._undo_stack: deque[pd.DataFrame] = deque(maxlen=_UNDO_LIMIT)
134
+ self._redo_stack: deque[pd.DataFrame] = deque(maxlen=_UNDO_LIMIT)
135
+
136
+ self.datafile: Optional[str] = None
137
+ self.df = pd.DataFrame()
138
+
139
+ if datafile:
140
+ self._load_file(datafile, announce=True)
141
+
142
+ # ------------------------------------------------------------------
143
+ # Internal helpers
144
+ # ------------------------------------------------------------------
145
+
146
+ def _save_snapshot(self) -> None:
147
+ """Push a deep copy of the current DataFrame onto the undo stack."""
148
+ self._undo_stack.append(self.df.copy(deep=True))
149
+ self._redo_stack.clear() # new mutation invalidates redo history
150
+
151
+ def _load_file(self, path: str, announce: bool = True) -> None:
152
+ """
153
+ Read *path* into ``self.df`` using the appropriate pandas reader.
154
+
155
+ Raises
156
+ ------
157
+ FileNotFoundError
158
+ When the file does not exist.
159
+ ValueError
160
+ When the file extension is not supported.
161
+ """
162
+ abs_path = os.path.abspath(path)
163
+ ext = os.path.splitext(abs_path)[1].lower()
164
+ if ext not in _READERS:
165
+ raise ValueError(
166
+ f"Unsupported file type '{ext}'. "
167
+ f"Supported: {', '.join(_READERS)}"
168
+ )
169
+ reader = _READERS[ext]
170
+ self.datafile = abs_path
171
+ self.df = reader(abs_path).replace(np.nan, "", regex=True)
172
+ if announce:
173
+ print(
174
+ f"Loaded '{abs_path}' — "
175
+ f"{len(self.df):,} rows × {len(self.df.columns)} cols."
176
+ )
177
+ print(self.df)
178
+
179
+ # ------------------------------------------------------------------
180
+ # Inspection (non-mutating — no snapshot needed)
181
+ # ------------------------------------------------------------------
182
+
183
+ def show(self) -> pd.DataFrame:
184
+ """Print and return the current DataFrame."""
185
+ print(self.df)
186
+ return self.df
187
+
188
+ def head(self, n: int = 10) -> pd.DataFrame:
189
+ """
190
+ Print and return the first *n* rows.
191
+
192
+ Parameters
193
+ ----------
194
+ n : int Number of rows to display (default 10).
195
+ """
196
+ print(self.df.head(int(n)))
197
+ return self.df.head(int(n))
198
+
199
+ def tail(self, n: int = 10) -> pd.DataFrame:
200
+ """
201
+ Print and return the last *n* rows.
202
+
203
+ Parameters
204
+ ----------
205
+ n : int Number of rows to display (default 10).
206
+ """
207
+ print(self.df.tail(int(n)))
208
+ return self.df.tail(int(n))
209
+
210
+ def properties(self) -> pd.DataFrame:
211
+ """
212
+ Display a column-index table, NaN / empty counts, and summary
213
+ statistics. Returns the ``describe()`` DataFrame.
214
+ """
215
+ col_table = pd.DataFrame({
216
+ "Index": range(len(self.df.columns)),
217
+ "Column": self.df.columns,
218
+ "dtype": self.df.dtypes.values,
219
+ })
220
+ print("\nColumns:")
221
+ print(col_table.to_string(index=False))
222
+
223
+ nan_counts = self.df.replace("", np.nan).isnull().sum()
224
+ print("\nNaN / empty counts:")
225
+ print(nan_counts.to_string())
226
+
227
+ summary = self.df.describe(include="all")
228
+ print("\nSummary statistics:")
229
+ print(summary)
230
+ return summary
231
+
232
+ def counts(self, col: str) -> pd.Series:
233
+ """
234
+ Print and return the frequency of each unique value in *col*,
235
+ sorted most-frequent first.
236
+
237
+ Parameters
238
+ ----------
239
+ col : str Column name.
240
+ """
241
+ result = self.df[col].value_counts(dropna=False)
242
+ print(result.to_string())
243
+ return result
244
+
245
+ # ------------------------------------------------------------------
246
+ # Transformation operations (mutating)
247
+ # ------------------------------------------------------------------
248
+
249
+ def filter(self, query: str) -> pd.DataFrame:
250
+ """
251
+ Keep only rows that satisfy a pandas query expression.
252
+
253
+ Parameters
254
+ ----------
255
+ query : str
256
+ A boolean expression understood by ``DataFrame.query()``.
257
+ Examples: ``'age > 30'``, ``'city == "Roanoke"'``,
258
+ ``'score >= 90 and grade == "A"'``.
259
+
260
+ Notes
261
+ -----
262
+ Column names with spaces must be wrapped in backticks:
263
+ ``'`first name` == "Alice"'``.
264
+ """
265
+ self._save_snapshot()
266
+ before = len(self.df)
267
+ self.df = self.df.query(query).reset_index(drop=True)
268
+ print(f"Filter kept {len(self.df):,} of {before:,} rows.")
269
+ return self.df
270
+
271
+ def slice(self, start: int, end: int) -> pd.DataFrame:
272
+ """
273
+ Keep rows in the half-open interval [start, end).
274
+
275
+ Parameters
276
+ ----------
277
+ start, end : int Row positions (0-based).
278
+ """
279
+ self._save_snapshot()
280
+ self.df = self.df.iloc[int(start):int(end)]
281
+ return self.df
282
+
283
+ def sort(self, col: str, order: str = "asc") -> pd.DataFrame:
284
+ """
285
+ Sort the DataFrame by *col*.
286
+
287
+ Parameters
288
+ ----------
289
+ col : str Column name.
290
+ order : str ``'asc'`` / ``'ascending'`` or ``'desc'`` / ``'descending'``.
291
+
292
+ Raises
293
+ ------
294
+ ValueError When *order* is not recognised.
295
+ """
296
+ order = order.strip().lower()
297
+ if order in {"asc", "ascending"}:
298
+ ascending = True
299
+ elif order in {"desc", "descending"}:
300
+ ascending = False
301
+ else:
302
+ raise ValueError(f"Unknown sort order '{order}'. Use 'asc' or 'desc'.")
303
+
304
+ self._save_snapshot()
305
+ self.df = self.df.sort_values(by=[col], ascending=ascending)
306
+ return self.df
307
+
308
+ def rename(self, mapping: str) -> pd.DataFrame:
309
+ """
310
+ Rename one or more columns.
311
+
312
+ Parameters
313
+ ----------
314
+ mapping : str
315
+ Comma-separated ``old:new`` pairs.
316
+ Example: ``'first name:first_name, Last Name:last_name'``
317
+
318
+ Raises
319
+ ------
320
+ ValueError When a pair is malformed.
321
+ KeyError When an old column name is not found.
322
+ """
323
+ pairs: dict[str, str] = {}
324
+ for token in mapping.split(","):
325
+ token = token.strip()
326
+ if ":" not in token:
327
+ raise ValueError(
328
+ f"Bad rename pair '{token}'. Expected format: old:new"
329
+ )
330
+ old, new = token.split(":", 1)
331
+ pairs[old.strip()] = new.strip()
332
+
333
+ missing = [k for k in pairs if k not in self.df.columns]
334
+ if missing:
335
+ raise KeyError(f"Column(s) not found: {missing}")
336
+
337
+ self._save_snapshot()
338
+ self.df = self.df.rename(columns=pairs)
339
+ print(f"Renamed: {pairs}")
340
+ return self.df
341
+
342
+ def cast(self, col: str, dtype: str) -> pd.DataFrame:
343
+ """
344
+ Change the dtype of *col*.
345
+
346
+ Parameters
347
+ ----------
348
+ col : str Column name.
349
+ dtype : str Target type: ``'int'``, ``'float'``, ``'str'``,
350
+ or ``'datetime'``.
351
+
352
+ Raises
353
+ ------
354
+ ValueError When *dtype* is not a supported keyword or conversion fails.
355
+ """
356
+ dtype = dtype.strip().lower()
357
+ self._save_snapshot()
358
+ try:
359
+ if dtype == "int":
360
+ self.df[col] = pd.to_numeric(self.df[col], errors="raise").astype(int)
361
+ elif dtype == "float":
362
+ self.df[col] = pd.to_numeric(self.df[col], errors="raise").astype(float)
363
+ elif dtype == "str":
364
+ self.df[col] = self.df[col].astype(str)
365
+ elif dtype == "datetime":
366
+ self.df[col] = pd.to_datetime(self.df[col], errors="raise")
367
+ else:
368
+ raise ValueError(
369
+ f"Unknown dtype '{dtype}'. Use: int, float, str, datetime."
370
+ )
371
+ except Exception as exc:
372
+ self._undo_stack.pop() # roll back the snapshot — nothing changed
373
+ raise exc
374
+
375
+ print(f"Column '{col}' cast to {dtype}.")
376
+ return self.df
377
+
378
+ def addcol(self, name: str, expr: str) -> pd.DataFrame:
379
+ """
380
+ Add a new column derived from an expression over existing columns.
381
+
382
+ Parameters
383
+ ----------
384
+ name : str Name for the new column.
385
+ expr : str A ``DataFrame.eval()``-compatible expression.
386
+ Examples: ``'price * qty'``, ``'score / score.max()'``.
387
+ """
388
+ self._save_snapshot()
389
+ self.df[name] = self.df.eval(expr)
390
+ print(f"Column '{name}' added.")
391
+ return self.df
392
+
393
+ def modify(self, col: str, old_val: str, new_val: str) -> pd.DataFrame:
394
+ """
395
+ Replace occurrences of *old_val* with *new_val* in column *col*.
396
+
397
+ Parameters
398
+ ----------
399
+ col : str Target column name.
400
+ old_val : str Value to search for (exact match).
401
+ new_val : str Replacement value.
402
+ """
403
+ self._save_snapshot()
404
+ self.df[col] = self.df[col].replace(old_val, new_val)
405
+ return self.df
406
+
407
+ def delete(self, targets: str) -> pd.DataFrame:
408
+ """
409
+ Drop rows or columns.
410
+
411
+ Parameters
412
+ ----------
413
+ targets : str
414
+ Comma-separated list of **integer row indices** *or*
415
+ **column names** — not both at once.
416
+
417
+ Logic
418
+ -----
419
+ If every token is a plain integer → treat as row indices.
420
+ Otherwise → treat as column names.
421
+ """
422
+ tokens = [t.strip() for t in targets.split(",")]
423
+ self._save_snapshot()
424
+ if all(t.lstrip("-").isnumeric() for t in tokens):
425
+ self.df = self.df.drop(index=[int(t) for t in tokens])
426
+ else:
427
+ self.df = self.df.drop(columns=tokens)
428
+ return self.df
429
+
430
+ def dedup(self, cols: Optional[str] = None) -> pd.DataFrame:
431
+ """
432
+ Remove duplicate rows.
433
+
434
+ Parameters
435
+ ----------
436
+ cols : str | None
437
+ Comma-separated column names to consider. When *None* (or
438
+ empty string), all columns are used.
439
+ """
440
+ self._save_snapshot()
441
+ before = len(self.df)
442
+ subset = (
443
+ [c.strip() for c in cols.split(",")]
444
+ if cols and cols.strip()
445
+ else None
446
+ )
447
+ self.df = self.df.drop_duplicates(subset=subset).reset_index(drop=True)
448
+ print(f"Removed {before - len(self.df):,} duplicate row(s). {len(self.df):,} remain.")
449
+ return self.df
450
+
451
+ def fillna(self, col: str, value: str) -> pd.DataFrame:
452
+ """
453
+ Fill empty / NaN cells in *col* with *value*.
454
+
455
+ Parameters
456
+ ----------
457
+ col : str Column name.
458
+ value : str Replacement value. Automatically converted to int or
459
+ float when the string represents a number.
460
+ """
461
+ self._save_snapshot()
462
+ try:
463
+ typed_value: object = float(value) if "." in value else int(value)
464
+ except (ValueError, AttributeError):
465
+ typed_value = value
466
+
467
+ self.df[col] = self.df[col].replace("", np.nan)
468
+ self.df[col] = self.df[col].fillna(typed_value)
469
+ print(f"Filled NaN / empty values in '{col}' with {typed_value!r}.")
470
+ return self.df
471
+
472
+ def dropna(self, col: Optional[str] = None) -> pd.DataFrame:
473
+ """
474
+ Drop rows that contain empty / NaN values.
475
+
476
+ Parameters
477
+ ----------
478
+ col : str | None
479
+ When given, only rows where *col* is empty / NaN are dropped.
480
+ When *None* / empty string, any row with at least one empty /
481
+ NaN cell is dropped.
482
+ """
483
+ self._save_snapshot()
484
+ before = len(self.df)
485
+ tmp = self.df.replace("", np.nan)
486
+ if col and col.strip():
487
+ self.df = tmp.dropna(subset=[col.strip()]).reset_index(drop=True)
488
+ scope = f"column '{col.strip()}'"
489
+ else:
490
+ self.df = tmp.dropna().reset_index(drop=True)
491
+ scope = "any column"
492
+ print(f"Dropped {before - len(self.df):,} row(s) with NaN / empty in {scope}.")
493
+ return self.df
494
+
495
+ # ------------------------------------------------------------------
496
+ # I/O operations
497
+ # ------------------------------------------------------------------
498
+
499
+ def load(self, path: str) -> pd.DataFrame:
500
+ """
501
+ Replace the current DataFrame by loading a new file.
502
+
503
+ Parameters
504
+ ----------
505
+ path : str Path to a CSV / Excel / JSON file.
506
+
507
+ Notes
508
+ -----
509
+ The REPL calls ``_dirty_check`` before invoking this; calling it
510
+ directly skips the unsaved-changes prompt.
511
+ """
512
+ self._save_snapshot()
513
+ self._load_file(path, announce=True)
514
+ return self.df
515
+
516
+ def generate(self, xr: str, expr: str) -> pd.DataFrame:
517
+ """
518
+ Replace the current DataFrame with a numeric x/y table.
519
+
520
+ Parameters
521
+ ----------
522
+ xr : str Range string ``'start:end'`` (inclusive on both ends).
523
+ expr : str Python expression in terms of *x* and *np*.
524
+ Example: ``'5*x**2 / np.exp(x)'``
525
+
526
+ Security note
527
+ -------------
528
+ ``eval`` is used intentionally for a numeric DSL.
529
+ Do **not** expose this to untrusted input.
530
+ """
531
+ self._save_snapshot()
532
+ lo, hi = (int(v) for v in xr.split(":"))
533
+ x = np.linspace(lo, hi, hi - lo + 1)
534
+ y = eval(expr) # noqa: S307
535
+ self.df = pd.DataFrame({"x": x, "y": y})
536
+ return self.df
537
+
538
+ def append(self, datafile: str) -> pd.DataFrame:
539
+ """
540
+ Append rows from *datafile* to the current DataFrame.
541
+
542
+ Parameters
543
+ ----------
544
+ datafile : str Path to a CSV / Excel / JSON file.
545
+ """
546
+ self._save_snapshot()
547
+ ext = os.path.splitext(datafile)[1].lower()
548
+ reader = _READERS.get(ext, pd.read_csv)
549
+ df2 = reader(datafile).replace(np.nan, "", regex=True)
550
+ before = len(self.df)
551
+ self.df = pd.concat([self.df, df2], ignore_index=True)
552
+ print(f"Appended {len(self.df) - before:,} rows.")
553
+ return self.df
554
+
555
+ def merge(
556
+ self,
557
+ other_file: str,
558
+ on_column: str,
559
+ how: str = "inner",
560
+ ) -> pd.DataFrame:
561
+ """
562
+ Merge ``self.df`` with a second file on a shared column.
563
+
564
+ Parameters
565
+ ----------
566
+ other_file : str Path to the second file (CSV / Excel / JSON).
567
+ on_column : str Column name present in both DataFrames.
568
+ how : str ``'inner'``, ``'outer'``, ``'left'``, or ``'right'``.
569
+ """
570
+ how = how.strip().lower()
571
+ try:
572
+ ext = os.path.splitext(other_file)[1].lower()
573
+ reader = _READERS.get(ext, pd.read_csv)
574
+ df2 = reader(other_file).replace(np.nan, "", regex=True)
575
+ self._save_snapshot()
576
+ self.df = pd.merge(self.df, df2, on=on_column, how=how)
577
+ except FileNotFoundError:
578
+ print(f"[ERROR] File not found: '{other_file}'")
579
+ except KeyError:
580
+ print(f"[ERROR] Column '{on_column}' not found in one of the files.")
581
+ return self.df
582
+
583
+ def save(self, filename: str) -> str:
584
+ """
585
+ Save the current DataFrame to a file.
586
+
587
+ The format is determined by *filename*'s extension (CSV / Excel /
588
+ JSON). The file is written next to the originally loaded file,
589
+ or to the current working directory if no file was loaded.
590
+
591
+ Parameters
592
+ ----------
593
+ filename : str Output file name or relative path.
594
+
595
+ Returns
596
+ -------
597
+ str Absolute path of the saved file.
598
+ """
599
+ directory = (
600
+ os.path.dirname(self.datafile)
601
+ if self.datafile
602
+ else os.getcwd()
603
+ )
604
+ save_path = os.path.join(directory, filename)
605
+ ext = os.path.splitext(filename)[1].lower()
606
+ writer_name = _WRITERS.get(ext, "to_csv")
607
+ writer = getattr(self.df, writer_name)
608
+
609
+ if writer_name == "to_csv":
610
+ writer(save_path, index=False)
611
+ elif writer_name == "to_excel":
612
+ writer(save_path, index=False)
613
+ elif writer_name == "to_json":
614
+ writer(save_path, orient="records", indent=2)
615
+
616
+ print(f"Saved → {save_path}")
617
+ return save_path
618
+
619
+ # ------------------------------------------------------------------
620
+ # Undo / redo
621
+ # ------------------------------------------------------------------
622
+
623
+ def undo(self) -> pd.DataFrame:
624
+ """
625
+ Revert the last mutating operation.
626
+
627
+ Raises
628
+ ------
629
+ IndexError When the undo stack is empty.
630
+ """
631
+ if not self._undo_stack:
632
+ raise IndexError("Nothing to undo.")
633
+ self._redo_stack.append(self.df.copy(deep=True))
634
+ self.df = self._undo_stack.pop()
635
+ print(f"Undone. DataFrame is now {len(self.df):,} rows × {len(self.df.columns)} cols.")
636
+ return self.df
637
+
638
+ def redo(self) -> pd.DataFrame:
639
+ """
640
+ Re-apply the last undone operation.
641
+
642
+ Raises
643
+ ------
644
+ IndexError When the redo stack is empty.
645
+ """
646
+ if not self._redo_stack:
647
+ raise IndexError("Nothing to redo.")
648
+ self._undo_stack.append(self.df.copy(deep=True))
649
+ self.df = self._redo_stack.pop()
650
+ print(f"Redone. DataFrame is now {len(self.df):,} rows × {len(self.df.columns)} cols.")
651
+ return self.df
652
+
653
+
654
+ # ---------------------------------------------------------------------------
655
+ # REPL helpers
656
+ # ---------------------------------------------------------------------------
657
+
658
+ def _print_help() -> None:
659
+ """Print a formatted, sectioned table of all available actions."""
660
+ sections: dict[str, list[str]] = {
661
+ "Inspection": ["show", "head", "tail", "properties", "props", "counts"],
662
+ "Transformation": ["filter", "slice", "sort", "rename", "cast", "addcol",
663
+ "modify", "delete", "del", "dedup", "fillna", "dropna"],
664
+ "I/O": ["load", "generate", "gen", "append", "merge", "save"],
665
+ "History": ["undo", "redo"],
666
+ "Meta": ["help", "exit", "q"],
667
+ }
668
+ for section, actions in sections.items():
669
+ print(f"\n {section}")
670
+ print(" " + "-" * 44)
671
+ for a in actions:
672
+ if a in ACTION_HELP:
673
+ print(f" {a:<14} {ACTION_HELP[a]}")
674
+ print()
675
+
676
+
677
+ def _require_data(dm: MFPDataManipulator, action: str) -> bool:
678
+ """Return *True* when ``dm.df`` is non-empty; print an error otherwise."""
679
+ if dm.df.empty:
680
+ print(
681
+ f"[ERROR] No data loaded. "
682
+ f"Run 'load' or 'generate' before '{action}'."
683
+ )
684
+ return False
685
+ return True
686
+
687
+
688
+ def _dirty_check(dm: MFPDataManipulator) -> bool:
689
+ """
690
+ Prompt for confirmation when the undo stack is non-empty (i.e. the user
691
+ has made changes that may not have been saved).
692
+
693
+ Returns *True* if it is safe to proceed, *False* if the user cancelled.
694
+ """
695
+ if dm._undo_stack:
696
+ ans = input(
697
+ " You may have unsaved changes. Continue anyway? (y / n): "
698
+ ).strip().lower()
699
+ return ans in {"y", "yes"}
700
+ return True
701
+
702
+
703
+ def _run_repl(dm: MFPDataManipulator) -> None:
704
+ """
705
+ Enter the interactive command loop for *dm*.
706
+
707
+ Each iteration reads one action keyword, prompts for its arguments,
708
+ dispatches to the corresponding method, and prints the result.
709
+ Exceptions raised by method calls are caught and displayed without
710
+ crashing the loop.
711
+ """
712
+ while True:
713
+ try:
714
+ action = input("\nAction> ").strip().lower()
715
+ except (EOFError, KeyboardInterrupt):
716
+ print("\nInterrupted — exiting.")
717
+ sys.exit(0)
718
+
719
+ try:
720
+ # ----------------------------------------------------------------
721
+ # Meta
722
+ # ----------------------------------------------------------------
723
+ if action in {"exit", "q"}:
724
+ if dm._undo_stack:
725
+ ans = input(
726
+ " You may have unsaved changes. Quit anyway? (y / n): "
727
+ ).strip().lower()
728
+ if ans not in {"y", "yes"}:
729
+ continue
730
+ print("Goodbye.")
731
+ sys.exit(0)
732
+
733
+ elif action == "help":
734
+ _print_help()
735
+
736
+ # ----------------------------------------------------------------
737
+ # Inspection
738
+ # ----------------------------------------------------------------
739
+ elif action == "show":
740
+ if not _require_data(dm, action): continue
741
+ dm.show()
742
+
743
+ elif action == "head":
744
+ if not _require_data(dm, action): continue
745
+ n = input(" Number of rows (default 10): ").strip()
746
+ dm.head(int(n) if n else 10)
747
+
748
+ elif action == "tail":
749
+ if not _require_data(dm, action): continue
750
+ n = input(" Number of rows (default 10): ").strip()
751
+ dm.tail(int(n) if n else 10)
752
+
753
+ elif action in {"properties", "props"}:
754
+ if not _require_data(dm, action): continue
755
+ dm.properties()
756
+
757
+ elif action == "counts":
758
+ if not _require_data(dm, action): continue
759
+ col = input(" Column name: ").strip()
760
+ dm.counts(col)
761
+
762
+ # ----------------------------------------------------------------
763
+ # I/O (load / generate allowed without existing data)
764
+ # ----------------------------------------------------------------
765
+ elif action == "load":
766
+ if not _dirty_check(dm): continue
767
+ path = input(" File path (CSV / Excel / JSON): ").strip()
768
+ dm.load(path)
769
+
770
+ elif action in {"generate", "gen"}:
771
+ if not _dirty_check(dm): continue
772
+ xr = input(" x range (e.g. 0:10): ").strip()
773
+ expr = input(" y expression (e.g. 5*x**2/np.exp(x)): ").strip()
774
+ print(dm.generate(xr, expr))
775
+
776
+ elif action == "append":
777
+ if not _require_data(dm, action): continue
778
+ path = input(" File to append: ").strip()
779
+ print(dm.append(path))
780
+
781
+ elif action == "merge":
782
+ if not _require_data(dm, action): continue
783
+ other = input(" File to merge with: ").strip()
784
+ col = input(" Column to merge on: ").strip()
785
+ how = input(" Merge type (inner / outer / left / right): ").strip()
786
+ print(dm.merge(other, col, how))
787
+
788
+ elif action == "save":
789
+ if not _require_data(dm, action): continue
790
+ filename = input(
791
+ " Output file name (e.g. output.csv / .xlsx / .json): "
792
+ ).strip()
793
+ dm.save(filename)
794
+
795
+ # ----------------------------------------------------------------
796
+ # Transformation
797
+ # ----------------------------------------------------------------
798
+ elif action == "filter":
799
+ if not _require_data(dm, action): continue
800
+ query = input(" Query expression (e.g. age > 30): ").strip()
801
+ print(dm.filter(query))
802
+
803
+ elif action == "slice":
804
+ if not _require_data(dm, action): continue
805
+ start = input(" Start index: ").strip()
806
+ end = input(" End index : ").strip()
807
+ print(dm.slice(start, end))
808
+
809
+ elif action == "sort":
810
+ if not _require_data(dm, action): continue
811
+ col = input(" Column to sort by: ").strip()
812
+ order = input(" Order (asc / desc): ").strip()
813
+ print(dm.sort(col, order))
814
+
815
+ elif action == "rename":
816
+ if not _require_data(dm, action): continue
817
+ mapping = input(
818
+ " Rename pairs, comma-separated (old:new, ...): "
819
+ ).strip()
820
+ print(dm.rename(mapping))
821
+
822
+ elif action == "cast":
823
+ if not _require_data(dm, action): continue
824
+ col = input(" Column name: ").strip()
825
+ dtype = input(" Target dtype (int / float / str / datetime): ").strip()
826
+ print(dm.cast(col, dtype))
827
+
828
+ elif action == "addcol":
829
+ if not _require_data(dm, action): continue
830
+ name = input(" New column name: ").strip()
831
+ expr = input(" Expression (e.g. price * qty): ").strip()
832
+ print(dm.addcol(name, expr))
833
+
834
+ elif action == "modify":
835
+ if not _require_data(dm, action): continue
836
+ col = input(" Column name : ").strip()
837
+ old_val = input(" Old value : ").strip()
838
+ new_val = input(" New value : ").strip()
839
+ print(dm.modify(col, old_val, new_val))
840
+
841
+ elif action in {"delete", "del"}:
842
+ if not _require_data(dm, action): continue
843
+ targets = input(
844
+ " Column names or row indices (comma-separated): "
845
+ ).strip()
846
+ print(dm.delete(targets))
847
+
848
+ elif action == "dedup":
849
+ if not _require_data(dm, action): continue
850
+ cols = input(
851
+ " Columns to check (comma-separated, or Enter for all): "
852
+ ).strip()
853
+ print(dm.dedup(cols if cols else None))
854
+
855
+ elif action == "fillna":
856
+ if not _require_data(dm, action): continue
857
+ col = input(" Column name: ").strip()
858
+ value = input(" Fill value : ").strip()
859
+ print(dm.fillna(col, value))
860
+
861
+ elif action == "dropna":
862
+ if not _require_data(dm, action): continue
863
+ col = input(
864
+ " Column name (or Enter to drop rows with any NaN): "
865
+ ).strip()
866
+ print(dm.dropna(col if col else None))
867
+
868
+ # ----------------------------------------------------------------
869
+ # Undo / redo
870
+ # ----------------------------------------------------------------
871
+ elif action == "undo":
872
+ dm.undo()
873
+
874
+ elif action == "redo":
875
+ dm.redo()
876
+
877
+ # ----------------------------------------------------------------
878
+ # Unknown action
879
+ # ----------------------------------------------------------------
880
+ else:
881
+ print(f"[WARN] Unknown action '{action}'. Type 'help' for options.")
882
+
883
+ except (KeyError, ValueError, IndexError, TypeError) as exc:
884
+ print(f"[ERROR] {exc}")
885
+
886
+
887
+ # ---------------------------------------------------------------------------
888
+ # Entry point
889
+ # ---------------------------------------------------------------------------
890
+
891
+ def main(argv: Optional[list[str]] = None) -> None:
892
+ """
893
+ Parse CLI arguments, optionally load a datafile, and start the REPL.
894
+
895
+ Parameters
896
+ ----------
897
+ argv : list[str] | None
898
+ Overrides ``sys.argv[1:]``; useful for tests that need to pass
899
+ arguments without patching the global.
900
+ """
901
+ if argv is None:
902
+ argv = sys.argv[1:]
903
+
904
+ print(_BANNER)
905
+
906
+ if argv:
907
+ try:
908
+ dm = MFPDataManipulator(argv[0])
909
+ except (FileNotFoundError, ValueError) as exc:
910
+ print(f"[ERROR] {exc}")
911
+ sys.exit(1)
912
+ else:
913
+ while True:
914
+ path = input(
915
+ "File to load (CSV / Excel / JSON), or Enter to skip: "
916
+ ).strip()
917
+ if not path:
918
+ dm = MFPDataManipulator()
919
+ print("No file loaded — run 'generate' or 'load' to get started.")
920
+ break
921
+ try:
922
+ dm = MFPDataManipulator(path)
923
+ break
924
+ except (FileNotFoundError, ValueError) as exc:
925
+ print(f"[ERROR] {exc} Please try again.")
926
+
927
+ _run_repl(dm)
928
+
929
+
930
+ if __name__ == "__main__":
931
+ main()