multifunctionplotter 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- multifunctionplotter/mfp.py +989 -0
- multifunctionplotter/mfp_data_manipulator.py +192 -0
- multifunctionplotter/mfp_dmanp.py +931 -0
- multifunctionplotter/mfp_dmanp_help.py +741 -0
- multifunctionplotter/mfp_help.py +396 -0
- multifunctionplotter/mfp_server.py +603 -0
- multifunctionplotter/prophet_pred.py +214 -0
- multifunctionplotter-1.0.3.dist-info/METADATA +881 -0
- multifunctionplotter-1.0.3.dist-info/RECORD +13 -0
- multifunctionplotter-1.0.3.dist-info/WHEEL +5 -0
- multifunctionplotter-1.0.3.dist-info/entry_points.txt +3 -0
- multifunctionplotter-1.0.3.dist-info/licenses/LICENSE +201 -0
- multifunctionplotter-1.0.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,741 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
mfp_dmanp_help.py — MFP Data Manipulator Manual Page
|
|
4
|
+
================================================
|
|
5
|
+
Import and call ``show()`` to print the full manual, or call individual
|
|
6
|
+
section printers for targeted help.
|
|
7
|
+
|
|
8
|
+
Usage from CLI:
|
|
9
|
+
python mfp_dmanp_help.py # full manual
|
|
10
|
+
python mfp_dmanp_help.py overview # startup & file formats
|
|
11
|
+
python mfp_dmanp_help.py inspection # show, head, tail, properties, counts
|
|
12
|
+
python mfp_dmanp_help.py filter # filter / slice / sort
|
|
13
|
+
python mfp_dmanp_help.py transform # rename, cast, addcol, modify, delete
|
|
14
|
+
python mfp_dmanp_help.py clean # dedup, fillna, dropna
|
|
15
|
+
python mfp_dmanp_help.py io # load, generate, append, merge, save
|
|
16
|
+
python mfp_dmanp_help.py history # undo / redo
|
|
17
|
+
python mfp_dmanp_help.py tips # query syntax, eval expressions, gotchas
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
import sys
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# ANSI colour helpers (auto-disabled on non-TTY / Windows cmd)
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
_NO_COLOR = not sys.stdout.isatty() or os.name == "nt"
|
|
28
|
+
|
|
29
|
+
def _c(code: str, text: str) -> str:
|
|
30
|
+
return text if _NO_COLOR else f"\033[{code}m{text}\033[0m"
|
|
31
|
+
|
|
32
|
+
H1 = lambda t: _c("1;36", t) # bold cyan — top-level heading
|
|
33
|
+
H2 = lambda t: _c("1;33", t) # bold yellow — section heading
|
|
34
|
+
H3 = lambda t: _c("1;37", t) # bold white — sub-heading / command name
|
|
35
|
+
KW = lambda t: _c("1;32", t) # bold green — keyword / argument token
|
|
36
|
+
EX = lambda t: _c("90", t) # dark grey — example / prompt line
|
|
37
|
+
NOTE = lambda t: _c("35", t) # magenta — note / tip
|
|
38
|
+
WARN = lambda t: _c("1;31", t) # bold red — warning
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
# Reusable layout helpers
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
def _rule(char: str = "─", width: int = 70) -> str:
|
|
46
|
+
return _c("90", char * width)
|
|
47
|
+
|
|
48
|
+
def _h1(title: str) -> None:
|
|
49
|
+
print()
|
|
50
|
+
print(H1("┌" + "─" * 68 + "┐"))
|
|
51
|
+
print(H1(f"│ {title:<66}│"))
|
|
52
|
+
print(H1("└" + "─" * 68 + "┘"))
|
|
53
|
+
|
|
54
|
+
def _h2(title: str) -> None:
|
|
55
|
+
print()
|
|
56
|
+
print(H2(f" {title}"))
|
|
57
|
+
print(_c("33", " " + "─" * (len(title) + 2)))
|
|
58
|
+
|
|
59
|
+
def _h3(cmd: str, synopsis: str) -> None:
|
|
60
|
+
print(f"\n {H3(cmd)}")
|
|
61
|
+
print(f" {synopsis}")
|
|
62
|
+
|
|
63
|
+
def _args(*rows: tuple[str, str]) -> None:
|
|
64
|
+
"""Print an aligned argument table."""
|
|
65
|
+
w = max(len(r[0]) for r in rows) + 2
|
|
66
|
+
for name, desc in rows:
|
|
67
|
+
print(f" {KW(name):<{w + 9}} {desc}")
|
|
68
|
+
|
|
69
|
+
def _ex(*lines: str) -> None:
|
|
70
|
+
"""Print example prompt lines."""
|
|
71
|
+
for line in lines:
|
|
72
|
+
print(EX(f" Action> {line}") if not line.startswith(" ") else EX(line))
|
|
73
|
+
|
|
74
|
+
def _note(text: str) -> None:
|
|
75
|
+
print(f" {NOTE('Note:')} {text}")
|
|
76
|
+
|
|
77
|
+
def _warn(text: str) -> None:
|
|
78
|
+
print(f" {WARN('Warning:')} {text}")
|
|
79
|
+
|
|
80
|
+
def _blank() -> None:
|
|
81
|
+
print()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# ---------------------------------------------------------------------------
|
|
85
|
+
# Section: Overview
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
def section_overview() -> None:
|
|
89
|
+
_h1("MFP Data Manipulator v3.0.0 — Overview")
|
|
90
|
+
|
|
91
|
+
print(f"""
|
|
92
|
+
{H2('What it does')}
|
|
93
|
+
|
|
94
|
+
MFP Data Manipulator is an interactive command-line tool for exploring
|
|
95
|
+
and cleaning tabular data without writing any code. Load a file, type
|
|
96
|
+
commands at the prompt, inspect results immediately, and save when done.
|
|
97
|
+
|
|
98
|
+
{H2('Starting the program')}
|
|
99
|
+
|
|
100
|
+
{EX('python mfp_data_manipulator.py')} # interactive file prompt
|
|
101
|
+
{EX('python mfp_data_manipulator.py data.csv')} # load a file directly
|
|
102
|
+
|
|
103
|
+
On startup you will see the {KW('Action>')} prompt. Type a command name
|
|
104
|
+
and press Enter. The program will ask for any required arguments one
|
|
105
|
+
at a time.
|
|
106
|
+
|
|
107
|
+
{H2('Supported file formats')}""")
|
|
108
|
+
|
|
109
|
+
rows = [
|
|
110
|
+
(".csv", "Comma-separated values (read + write)"),
|
|
111
|
+
(".xlsx", "Excel workbook (read + write, requires openpyxl)"),
|
|
112
|
+
(".xls", "Legacy Excel (read + write, requires xlrd)"),
|
|
113
|
+
(".json", "JSON array of objects (read + write)"),
|
|
114
|
+
]
|
|
115
|
+
w = max(len(r[0]) for r in rows)
|
|
116
|
+
for ext, desc in rows:
|
|
117
|
+
print(f" {KW(ext):<{w + 9}} {desc}")
|
|
118
|
+
|
|
119
|
+
print(f"""
|
|
120
|
+
The format is detected automatically from the file extension for every
|
|
121
|
+
{KW('load')}, {KW('save')}, {KW('append')}, and {KW('merge')} operation.
|
|
122
|
+
|
|
123
|
+
{H2('Command anatomy')}
|
|
124
|
+
|
|
125
|
+
Every command follows the same pattern:
|
|
126
|
+
|
|
127
|
+
1. Type the command name at {KW('Action>')} and press Enter.
|
|
128
|
+
2. The program prompts for each argument in turn.
|
|
129
|
+
3. The result is printed immediately.
|
|
130
|
+
4. The change is recorded on the undo stack — type {KW('undo')} to revert.
|
|
131
|
+
|
|
132
|
+
{NOTE('Tip:')} Type {KW('help')} at the prompt for a compact command listing.
|
|
133
|
+
Type {KW('q')} or {KW('exit')} to quit (you will be warned about unsaved changes).""")
|
|
134
|
+
_blank()
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
# Section: Inspection
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
def section_inspection() -> None:
|
|
142
|
+
_h1("Inspection Commands")
|
|
143
|
+
|
|
144
|
+
print(f"""
|
|
145
|
+
These commands are read-only — they never modify the DataFrame and do
|
|
146
|
+
not push to the undo stack.""")
|
|
147
|
+
|
|
148
|
+
# show ─────────────────────────────────────────────────────────────────
|
|
149
|
+
_h2("show")
|
|
150
|
+
_h3("show", "Print the entire DataFrame to the terminal.")
|
|
151
|
+
_blank()
|
|
152
|
+
_ex("show")
|
|
153
|
+
_note("For large files use head or tail instead to avoid flooding the terminal.")
|
|
154
|
+
|
|
155
|
+
# head ─────────────────────────────────────────────────────────────────
|
|
156
|
+
_h2("head")
|
|
157
|
+
_h3("head", "Print the first N rows.")
|
|
158
|
+
_args(("N", "Number of rows to show. Default: 10 (press Enter to accept)."))
|
|
159
|
+
_blank()
|
|
160
|
+
_ex("head", " Number of rows (default 10): 20")
|
|
161
|
+
|
|
162
|
+
# tail ─────────────────────────────────────────────────────────────────
|
|
163
|
+
_h2("tail")
|
|
164
|
+
_h3("tail", "Print the last N rows.")
|
|
165
|
+
_args(("N", "Number of rows to show. Default: 10."))
|
|
166
|
+
_blank()
|
|
167
|
+
_ex("tail", " Number of rows (default 10): 5")
|
|
168
|
+
|
|
169
|
+
# properties / props ───────────────────────────────────────────────────
|
|
170
|
+
_h2("properties (alias: props)")
|
|
171
|
+
_h3("properties", "Display a full diagnostic summary of the current DataFrame.")
|
|
172
|
+
print(f"""
|
|
173
|
+
Output includes three sections:
|
|
174
|
+
|
|
175
|
+
{KW('Columns')} — index number, name, and dtype for every column.
|
|
176
|
+
{KW('NaN counts')} — number of empty / NaN cells per column.
|
|
177
|
+
{KW('Statistics')} — pandas describe() covering count, mean, std, min,
|
|
178
|
+
quartiles, max for numeric columns; top/freq for others.""")
|
|
179
|
+
_blank()
|
|
180
|
+
_ex("properties")
|
|
181
|
+
_ex("props")
|
|
182
|
+
|
|
183
|
+
# counts ───────────────────────────────────────────────────────────────
|
|
184
|
+
_h2("counts")
|
|
185
|
+
_h3("counts", "Show the frequency of each unique value in a column.")
|
|
186
|
+
_args(("Column name", "Name of the column to count."))
|
|
187
|
+
_blank()
|
|
188
|
+
_ex("counts", " Column name: status")
|
|
189
|
+
_note("Results are sorted most-frequent first. NaN / empty values are included.")
|
|
190
|
+
_blank()
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# ---------------------------------------------------------------------------
|
|
194
|
+
# Section: Filter / Slice / Sort
|
|
195
|
+
# ---------------------------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
def section_filter() -> None:
|
|
198
|
+
_h1("Row Selection — filter, slice, sort")
|
|
199
|
+
|
|
200
|
+
# filter ───────────────────────────────────────────────────────────────
|
|
201
|
+
_h2("filter")
|
|
202
|
+
_h3("filter", "Keep only rows that satisfy a boolean expression.")
|
|
203
|
+
_args(
|
|
204
|
+
("Query expression",
|
|
205
|
+
"A pandas query string (see Tips section for full syntax)."),
|
|
206
|
+
)
|
|
207
|
+
print(f"""
|
|
208
|
+
{H3('Examples')}""")
|
|
209
|
+
examples = [
|
|
210
|
+
("filter", "age > 30", "rows where age is over 30"),
|
|
211
|
+
("filter", 'city == "Roanoke"', "exact string match"),
|
|
212
|
+
("filter", "score >= 90 and grade == \"A\"","compound condition"),
|
|
213
|
+
("filter", "price != 0", "exclude zero-price rows"),
|
|
214
|
+
("filter", "`first name` == \"Alice\"", "column names with spaces need backticks"),
|
|
215
|
+
]
|
|
216
|
+
for cmd, expr, comment in examples:
|
|
217
|
+
print(f" {EX(f'Action> {cmd}')}")
|
|
218
|
+
print(f" {EX(f' Query expression: {expr}')}{_c('90', f' # {comment}')}")
|
|
219
|
+
_blank()
|
|
220
|
+
|
|
221
|
+
_note("filter resets the row index after removing rows.")
|
|
222
|
+
_warn("filter is permanent until you run undo. Use head to preview first.")
|
|
223
|
+
|
|
224
|
+
# slice ────────────────────────────────────────────────────────────────
|
|
225
|
+
_h2("slice")
|
|
226
|
+
_h3("slice", "Keep rows at integer positions [start, end) (end is excluded).")
|
|
227
|
+
_args(
|
|
228
|
+
("Start index", "First row to keep (0-based)."),
|
|
229
|
+
("End index", "Row after the last one to keep."),
|
|
230
|
+
)
|
|
231
|
+
_blank()
|
|
232
|
+
_ex("slice",
|
|
233
|
+
" Start index: 0",
|
|
234
|
+
" End index : 100")
|
|
235
|
+
_note("Equivalent to df.iloc[start:end]. Use filter for condition-based selection.")
|
|
236
|
+
|
|
237
|
+
# sort ─────────────────────────────────────────────────────────────────
|
|
238
|
+
_h2("sort")
|
|
239
|
+
_h3("sort", "Sort the DataFrame by a single column.")
|
|
240
|
+
_args(
|
|
241
|
+
("Column to sort by", "Column name."),
|
|
242
|
+
("Order", "asc (ascending) or desc (descending)."),
|
|
243
|
+
)
|
|
244
|
+
_blank()
|
|
245
|
+
_ex("sort", " Column to sort by: price", " Order (asc / desc): desc")
|
|
246
|
+
_note("Aliases ascending and descending are also accepted.")
|
|
247
|
+
_blank()
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# ---------------------------------------------------------------------------
|
|
251
|
+
# Section: Transform
|
|
252
|
+
# ---------------------------------------------------------------------------
|
|
253
|
+
|
|
254
|
+
def section_transform() -> None:
|
|
255
|
+
_h1("Transformation Commands")
|
|
256
|
+
|
|
257
|
+
# rename ───────────────────────────────────────────────────────────────
|
|
258
|
+
_h2("rename")
|
|
259
|
+
_h3("rename", "Rename one or more columns in a single operation.")
|
|
260
|
+
_args(
|
|
261
|
+
("Rename pairs",
|
|
262
|
+
"Comma-separated old:new pairs."),
|
|
263
|
+
)
|
|
264
|
+
_blank()
|
|
265
|
+
_ex("rename",
|
|
266
|
+
" Rename pairs, comma-separated (old:new, ...): first name:first_name, Last Name:last_name")
|
|
267
|
+
_note("All old names are validated before any renaming takes place.")
|
|
268
|
+
|
|
269
|
+
# cast ─────────────────────────────────────────────────────────────────
|
|
270
|
+
_h2("cast")
|
|
271
|
+
_h3("cast", "Change the data type of a column.")
|
|
272
|
+
_args(
|
|
273
|
+
("Column name", "The column to convert."),
|
|
274
|
+
("Target dtype", "One of: int | float | str | datetime"),
|
|
275
|
+
)
|
|
276
|
+
print(f"""
|
|
277
|
+
{H3('dtype reference')}
|
|
278
|
+
|
|
279
|
+
{KW('int')} Converts via pd.to_numeric then casts to Python int.
|
|
280
|
+
Fails loudly if any value cannot be converted.
|
|
281
|
+
{KW('float')} Converts via pd.to_numeric then casts to float64.
|
|
282
|
+
{KW('str')} Converts every value to a Python string (never fails).
|
|
283
|
+
{KW('datetime')} Parses dates with pd.to_datetime. Many formats are
|
|
284
|
+
recognised automatically (ISO 8601, US, EU, etc.).
|
|
285
|
+
""")
|
|
286
|
+
_ex("cast",
|
|
287
|
+
" Column name: purchase_date",
|
|
288
|
+
" Target dtype (int / float / str / datetime): datetime")
|
|
289
|
+
_note("If conversion fails, the DataFrame is NOT modified (the snapshot is rolled back).")
|
|
290
|
+
|
|
291
|
+
# addcol ───────────────────────────────────────────────────────────────
|
|
292
|
+
_h2("addcol")
|
|
293
|
+
_h3("addcol", "Add a new column computed from existing columns.")
|
|
294
|
+
_args(
|
|
295
|
+
("New column name", "Name for the derived column."),
|
|
296
|
+
("Expression", "A pandas eval expression over existing column names."),
|
|
297
|
+
)
|
|
298
|
+
print(f"""
|
|
299
|
+
{H3('Expression examples')}
|
|
300
|
+
|
|
301
|
+
{KW('price * qty')} multiply two columns
|
|
302
|
+
{KW('revenue - cost')} subtract
|
|
303
|
+
{KW('score / score.max()')} normalise to [0, 1]
|
|
304
|
+
{KW('(a + b) / 2')} average of two columns
|
|
305
|
+
{KW('tax_rate * subtotal + subtotal')} compound expression
|
|
306
|
+
""")
|
|
307
|
+
_ex("addcol",
|
|
308
|
+
" New column name: total",
|
|
309
|
+
" Expression (e.g. price * qty): price * qty")
|
|
310
|
+
_note("Uses DataFrame.eval() — column names with spaces must be wrapped in backticks.")
|
|
311
|
+
|
|
312
|
+
# modify ───────────────────────────────────────────────────────────────
|
|
313
|
+
_h2("modify")
|
|
314
|
+
_h3("modify", "Replace a specific value anywhere in a column.")
|
|
315
|
+
_args(
|
|
316
|
+
("Column name", "Column to search."),
|
|
317
|
+
("Old value", "Exact value to find (string comparison)."),
|
|
318
|
+
("New value", "Replacement value."),
|
|
319
|
+
)
|
|
320
|
+
_blank()
|
|
321
|
+
_ex("modify",
|
|
322
|
+
" Column name : status",
|
|
323
|
+
" Old value : PNDG",
|
|
324
|
+
" New value : PENDING")
|
|
325
|
+
_note("Only exact matches are replaced. For pattern-based replacement, use cast to str first.")
|
|
326
|
+
|
|
327
|
+
# delete / del ─────────────────────────────────────────────────────────
|
|
328
|
+
_h2("delete (alias: del)")
|
|
329
|
+
_h3("delete", "Drop rows (by index) or columns (by name).")
|
|
330
|
+
_args(
|
|
331
|
+
("Targets",
|
|
332
|
+
"Comma-separated integers to drop rows, OR column names to drop columns."),
|
|
333
|
+
)
|
|
334
|
+
print(f"""
|
|
335
|
+
{H3('Row deletion')} — supply integer row indices:
|
|
336
|
+
""")
|
|
337
|
+
_ex("delete", " Column names or row indices (comma-separated): 0, 5, 12")
|
|
338
|
+
print(f"""
|
|
339
|
+
{H3('Column deletion')} — supply column names:
|
|
340
|
+
""")
|
|
341
|
+
_ex("delete", " Column names or row indices (comma-separated): notes, internal_id")
|
|
342
|
+
_warn("You cannot mix row indices and column names in the same call.")
|
|
343
|
+
_blank()
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
# ---------------------------------------------------------------------------
|
|
347
|
+
# Section: Clean
|
|
348
|
+
# ---------------------------------------------------------------------------
|
|
349
|
+
|
|
350
|
+
def section_clean() -> None:
|
|
351
|
+
_h1("Data Cleaning — dedup, fillna, dropna")
|
|
352
|
+
|
|
353
|
+
# dedup ────────────────────────────────────────────────────────────────
|
|
354
|
+
_h2("dedup")
|
|
355
|
+
_h3("dedup", "Remove duplicate rows, keeping the first occurrence.")
|
|
356
|
+
_args(
|
|
357
|
+
("Columns (optional)",
|
|
358
|
+
"Comma-separated column names to compare. Press Enter to use all columns."),
|
|
359
|
+
)
|
|
360
|
+
_blank()
|
|
361
|
+
_ex("dedup", " Columns to check (comma-separated, or Enter for all): ")
|
|
362
|
+
print(EX(" → uses all columns (full-row duplicates)"))
|
|
363
|
+
_blank()
|
|
364
|
+
_ex("dedup", " Columns to check (comma-separated, or Enter for all): email")
|
|
365
|
+
print(EX(" → keeps the first row for each unique email address"))
|
|
366
|
+
_note("The row index is reset after deduplication.")
|
|
367
|
+
|
|
368
|
+
# fillna ───────────────────────────────────────────────────────────────
|
|
369
|
+
_h2("fillna")
|
|
370
|
+
_h3("fillna", "Fill empty / NaN cells in one column with a fixed value.")
|
|
371
|
+
_args(
|
|
372
|
+
("Column name", "Column containing the missing values."),
|
|
373
|
+
("Fill value", "Replacement value. Automatically cast to int or float\n"
|
|
374
|
+
" when the string is a valid number."),
|
|
375
|
+
)
|
|
376
|
+
_blank()
|
|
377
|
+
_ex("fillna",
|
|
378
|
+
" Column name: age",
|
|
379
|
+
" Fill value : 0")
|
|
380
|
+
_blank()
|
|
381
|
+
_ex("fillna",
|
|
382
|
+
" Column name: country",
|
|
383
|
+
" Fill value : Unknown")
|
|
384
|
+
_note("Both empty strings ('') and NaN are treated as missing.")
|
|
385
|
+
|
|
386
|
+
# dropna ───────────────────────────────────────────────────────────────
|
|
387
|
+
_h2("dropna")
|
|
388
|
+
_h3("dropna", "Drop rows that contain missing values.")
|
|
389
|
+
_args(
|
|
390
|
+
("Column name (optional)",
|
|
391
|
+
"Drop rows where this specific column is empty / NaN.\n"
|
|
392
|
+
" Press Enter to drop rows missing in ANY column."),
|
|
393
|
+
)
|
|
394
|
+
_blank()
|
|
395
|
+
_ex("dropna", " Column name (or Enter to drop rows with any NaN): ")
|
|
396
|
+
print(EX(" → drops every row that has at least one empty / NaN cell"))
|
|
397
|
+
_blank()
|
|
398
|
+
_ex("dropna", " Column name (or Enter to drop rows with any NaN): email")
|
|
399
|
+
print(EX(" → drops only rows where the email column is empty / NaN"))
|
|
400
|
+
_note("The row index is reset after dropping.")
|
|
401
|
+
_blank()
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
# ---------------------------------------------------------------------------
|
|
405
|
+
# Section: I/O
|
|
406
|
+
# ---------------------------------------------------------------------------
|
|
407
|
+
|
|
408
|
+
def section_io() -> None:
|
|
409
|
+
_h1("I/O Commands — load, generate, append, merge, save")
|
|
410
|
+
|
|
411
|
+
# load ─────────────────────────────────────────────────────────────────
|
|
412
|
+
_h2("load")
|
|
413
|
+
_h3("load", "Replace the current DataFrame by loading a new file.")
|
|
414
|
+
_args(
|
|
415
|
+
("File path", "Path to a CSV, Excel (.xlsx / .xls), or JSON file."),
|
|
416
|
+
)
|
|
417
|
+
_blank()
|
|
418
|
+
_ex("load", " File path (CSV / Excel / JSON): /data/customers.xlsx")
|
|
419
|
+
_note("If you have unsaved changes you will be prompted to confirm before loading.")
|
|
420
|
+
_note("The previous state is pushed to the undo stack, so undo recovers it.")
|
|
421
|
+
|
|
422
|
+
# generate / gen ───────────────────────────────────────────────────────
|
|
423
|
+
_h2("generate (alias: gen)")
|
|
424
|
+
_h3("generate", "Build a numeric x/y DataFrame from a mathematical expression.")
|
|
425
|
+
_args(
|
|
426
|
+
("x range", "Integer range in the form start:end (both inclusive)."),
|
|
427
|
+
("y expression","A Python/NumPy expression in terms of x and np."),
|
|
428
|
+
)
|
|
429
|
+
print(f"""
|
|
430
|
+
{H3('Expression examples')}
|
|
431
|
+
|
|
432
|
+
{KW('x**2')} square
|
|
433
|
+
{KW('np.sin(x)')} sine wave
|
|
434
|
+
{KW('5*x**2 / np.exp(x)')} damped parabola
|
|
435
|
+
{KW('np.log(x + 1)')} shifted log (safe for x = 0)
|
|
436
|
+
{KW('np.where(x > 5, 1, 0)')}step function
|
|
437
|
+
""")
|
|
438
|
+
_ex("generate",
|
|
439
|
+
" x range (e.g. 0:10): 0:100",
|
|
440
|
+
" y expression (e.g. 5*x**2/np.exp(x)): np.sin(x / 10)")
|
|
441
|
+
_warn("generate replaces the current DataFrame. Use undo to recover previous data.")
|
|
442
|
+
|
|
443
|
+
# append ───────────────────────────────────────────────────────────────
|
|
444
|
+
_h2("append")
|
|
445
|
+
_h3("append", "Append rows from a second file to the bottom of the current DataFrame.")
|
|
446
|
+
_args(
|
|
447
|
+
("File to append", "Path to a file whose columns match the current DataFrame."),
|
|
448
|
+
)
|
|
449
|
+
_blank()
|
|
450
|
+
_ex("append", " File to append: new_orders.csv")
|
|
451
|
+
_note("The row index is reset after appending. Column names must match exactly.")
|
|
452
|
+
|
|
453
|
+
# merge ────────────────────────────────────────────────────────────────
|
|
454
|
+
_h2("merge")
|
|
455
|
+
_h3("merge", "Join the current DataFrame with a second file on a shared column.")
|
|
456
|
+
_args(
|
|
457
|
+
("File to merge with", "Path to the second file (any supported format)."),
|
|
458
|
+
("Column to merge on", "Column name that exists in both DataFrames."),
|
|
459
|
+
("Merge type", "inner | outer | left | right"),
|
|
460
|
+
)
|
|
461
|
+
print(f"""
|
|
462
|
+
{H3('Merge type reference')}
|
|
463
|
+
|
|
464
|
+
{KW('inner')} Keep only rows where the key exists in BOTH tables.
|
|
465
|
+
{KW('outer')} Keep all rows from both tables; fill gaps with NaN.
|
|
466
|
+
{KW('left')} Keep all rows from the left (current) table.
|
|
467
|
+
{KW('right')} Keep all rows from the right (new) table.
|
|
468
|
+
""")
|
|
469
|
+
_ex("merge",
|
|
470
|
+
" File to merge with: products.csv",
|
|
471
|
+
" Column to merge on: product_id",
|
|
472
|
+
" Merge type (inner / outer / left / right): left")
|
|
473
|
+
|
|
474
|
+
# save ─────────────────────────────────────────────────────────────────
|
|
475
|
+
_h2("save")
|
|
476
|
+
_h3("save", "Write the current DataFrame to a file.")
|
|
477
|
+
_args(
|
|
478
|
+
("Output file name",
|
|
479
|
+
"File name with extension (.csv / .xlsx / .json).\n"
|
|
480
|
+
" Saved next to the loaded file, or in the current\n"
|
|
481
|
+
" working directory if no file was loaded."),
|
|
482
|
+
)
|
|
483
|
+
_blank()
|
|
484
|
+
_ex("save", " Output file name (e.g. output.csv / .xlsx / .json): cleaned_data.csv")
|
|
485
|
+
_ex("save", " Output file name (e.g. output.csv / .xlsx / .json): report.xlsx")
|
|
486
|
+
_note("The format is determined entirely by the extension you choose.")
|
|
487
|
+
_note("Saving does NOT clear the undo stack.")
|
|
488
|
+
_blank()
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
# ---------------------------------------------------------------------------
|
|
492
|
+
# Section: History (undo / redo)
|
|
493
|
+
# ---------------------------------------------------------------------------
|
|
494
|
+
|
|
495
|
+
def section_history() -> None:
|
|
496
|
+
_h1("History — undo & redo")
|
|
497
|
+
|
|
498
|
+
print(f"""
|
|
499
|
+
Every command that modifies the DataFrame automatically saves a snapshot
|
|
500
|
+
of the previous state before making any change. Up to {KW('20')} snapshots are
|
|
501
|
+
kept in memory at a time.
|
|
502
|
+
|
|
503
|
+
{NOTE('Non-mutating commands')} (show, head, tail, properties, counts) do NOT push
|
|
504
|
+
to the undo stack.
|
|
505
|
+
""")
|
|
506
|
+
|
|
507
|
+
# undo ─────────────────────────────────────────────────────────────────
|
|
508
|
+
_h2("undo")
|
|
509
|
+
_h3("undo", "Revert the last mutating operation.")
|
|
510
|
+
print(f"""
|
|
511
|
+
Pops the most recent snapshot off the undo stack and restores it as
|
|
512
|
+
the current DataFrame. The reverted state is pushed onto the redo
|
|
513
|
+
stack so it can be re-applied with {KW('redo')}.
|
|
514
|
+
""")
|
|
515
|
+
_ex("undo")
|
|
516
|
+
print(EX(" Undone. DataFrame is now 1 200 rows × 8 cols."))
|
|
517
|
+
|
|
518
|
+
# redo ─────────────────────────────────────────────────────────────────
|
|
519
|
+
_h2("redo")
|
|
520
|
+
_h3("redo", "Re-apply the last undone operation.")
|
|
521
|
+
print(f"""
|
|
522
|
+
Pops the most recent entry off the redo stack. Any new mutating
|
|
523
|
+
command clears the redo stack entirely (standard editor behaviour).
|
|
524
|
+
""")
|
|
525
|
+
_ex("redo")
|
|
526
|
+
print(EX(" Redone. DataFrame is now 900 rows × 8 cols."))
|
|
527
|
+
|
|
528
|
+
print(f"""
|
|
529
|
+
{H3('Typical workflow')}
|
|
530
|
+
|
|
531
|
+
{EX('Action> filter')}
|
|
532
|
+
{EX(' Query expression: age > 18')}
|
|
533
|
+
{EX(' Filter kept 820 of 1 000 rows.')}
|
|
534
|
+
|
|
535
|
+
{EX('Action> undo')}
|
|
536
|
+
{EX(' Undone. DataFrame is now 1 000 rows × 5 cols.')}
|
|
537
|
+
|
|
538
|
+
{EX('Action> redo')}
|
|
539
|
+
{EX(' Redone. DataFrame is now 820 rows × 5 cols.')}
|
|
540
|
+
|
|
541
|
+
{NOTE('Tip:')} The exit / load / generate commands warn you if the undo stack
|
|
542
|
+
is non-empty, indicating you may have unsaved changes.
|
|
543
|
+
""")
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
# ---------------------------------------------------------------------------
|
|
547
|
+
# Section: Tips
|
|
548
|
+
# ---------------------------------------------------------------------------
|
|
549
|
+
|
|
550
|
+
def section_tips() -> None:
|
|
551
|
+
_h1("Tips, Query Syntax & Common Patterns")
|
|
552
|
+
|
|
553
|
+
# Filter / query syntax ────────────────────────────────────────────────
|
|
554
|
+
_h2("filter — query expression syntax")
|
|
555
|
+
print(f"""
|
|
556
|
+
The {KW('filter')} command uses pandas DataFrame.query() under the hood.
|
|
557
|
+
The full expression language is a subset of Python with these rules:
|
|
558
|
+
|
|
559
|
+
{H3('Comparison operators')}
|
|
560
|
+
{KW('>')} {KW('>=')} {KW('<')} {KW('<=')} {KW('==')} {KW('!=')}
|
|
561
|
+
|
|
562
|
+
{H3('Logical operators')}
|
|
563
|
+
{KW('and')} {KW('or')} {KW('not')}
|
|
564
|
+
|
|
565
|
+
{H3('String values')} — use double or single quotes inside the expression:
|
|
566
|
+
{EX('Action> filter')}
|
|
567
|
+
{EX(' Query expression: city == "New York"')}
|
|
568
|
+
|
|
569
|
+
{H3('Column names with spaces')} — wrap in backticks:
|
|
570
|
+
{EX('Action> filter')}
|
|
571
|
+
{EX(' Query expression: `first name` == "Alice" and `zip code` == "24060"')}
|
|
572
|
+
|
|
573
|
+
{H3('Membership test')} — check if a value is in a list:
|
|
574
|
+
{EX('Action> filter')}
|
|
575
|
+
{EX(' Query expression: status in ["open", "pending"]')}
|
|
576
|
+
|
|
577
|
+
{H3('Negation')}:
|
|
578
|
+
{EX('Action> filter')}
|
|
579
|
+
{EX(' Query expression: status not in ["closed", "cancelled"]')}
|
|
580
|
+
|
|
581
|
+
{H3('Null / empty check')} — find non-empty rows:
|
|
582
|
+
{EX('Action> filter')}
|
|
583
|
+
{EX(' Query expression: email != ""')}
|
|
584
|
+
|
|
585
|
+
{NOTE('Tip:')} Run {KW('cast')} to convert a column to the right dtype before
|
|
586
|
+
filtering on numeric comparisons — CSV columns are often read as strings.
|
|
587
|
+
""")
|
|
588
|
+
|
|
589
|
+
# addcol / generate expression syntax ─────────────────────────────────
|
|
590
|
+
_h2("addcol & generate — expression syntax")
|
|
591
|
+
print(f"""
|
|
592
|
+
{KW('addcol')} uses DataFrame.eval() which understands all standard Python
|
|
593
|
+
arithmetic operators over column names.
|
|
594
|
+
|
|
595
|
+
{KW('generate')} uses Python's built-in eval() with x (a NumPy array) and
|
|
596
|
+
the np module in scope.
|
|
597
|
+
|
|
598
|
+
{H3('addcol examples')}
|
|
599
|
+
{EX('price * qty')} multiply two columns → total revenue
|
|
600
|
+
{EX('(high + low) / 2')} midpoint
|
|
601
|
+
{EX('revenue - cost')} profit
|
|
602
|
+
{EX('score / score.max()')} normalise to [0, 1]
|
|
603
|
+
{EX('tax * (subtotal + shipping)')}
|
|
604
|
+
|
|
605
|
+
{H3('generate examples')}
|
|
606
|
+
{EX('x**2')} parabola
|
|
607
|
+
{EX('np.sqrt(x)')} square root
|
|
608
|
+
{EX('np.sin(x) * np.exp(-x/10)')} damped sine
|
|
609
|
+
{EX('np.log1p(x)')} natural log of (x + 1)
|
|
610
|
+
{EX('np.where(x % 2 == 0, 1, -1)')}alternate sign by parity
|
|
611
|
+
|
|
612
|
+
{NOTE('Tip:')} For generate, always wrap potentially unsafe domains:
|
|
613
|
+
e.g. {KW('np.log(x + 1e-9)')} instead of {KW('np.log(x)')} to avoid -inf at 0.
|
|
614
|
+
""")
|
|
615
|
+
|
|
616
|
+
# cast datetime ────────────────────────────────────────────────────────
|
|
617
|
+
_h2("Working with dates")
|
|
618
|
+
print(f"""
|
|
619
|
+
Step 1 — cast the column to datetime:
|
|
620
|
+
{EX('Action> cast')}
|
|
621
|
+
{EX(' Column name: order_date')}
|
|
622
|
+
{EX(' Target dtype (int / float / str / datetime): datetime')}
|
|
623
|
+
|
|
624
|
+
Step 2 — filter using ISO 8601 strings (pandas parses them automatically):
|
|
625
|
+
{EX('Action> filter')}
|
|
626
|
+
{EX(' Query expression: order_date >= "2024-01-01"')}
|
|
627
|
+
|
|
628
|
+
Step 3 — add a derived year column:
|
|
629
|
+
{EX('Action> addcol')}
|
|
630
|
+
{EX(' New column name: year')}
|
|
631
|
+
{EX(" Expression: order_date.dt.year")}
|
|
632
|
+
|
|
633
|
+
{NOTE('Tip:')} After cast datetime, the column supports .dt accessor
|
|
634
|
+
expressions inside addcol (e.g. order_date.dt.month).
|
|
635
|
+
""")
|
|
636
|
+
|
|
637
|
+
# rename patterns ──────────────────────────────────────────────────────
|
|
638
|
+
_h2("Bulk rename pattern")
|
|
639
|
+
print(f"""
|
|
640
|
+
Rename multiple columns in one command by separating pairs with commas:
|
|
641
|
+
{EX('Action> rename')}
|
|
642
|
+
{EX(' Rename pairs: First Name:first_name, Last Name:last_name, ZIP Code:zip_code')}
|
|
643
|
+
|
|
644
|
+
Handy after loading files that have inconsistent column name formatting.
|
|
645
|
+
""")
|
|
646
|
+
|
|
647
|
+
# Common cleaning workflow ─────────────────────────────────────────────
|
|
648
|
+
_h2("Common cleaning workflow")
|
|
649
|
+
steps = [
|
|
650
|
+
("load", "Load the raw file."),
|
|
651
|
+
("properties", "Inspect dtypes, spot NaN columns."),
|
|
652
|
+
("cast", "Fix columns read as wrong dtype (e.g. dates, numbers)."),
|
|
653
|
+
("rename", "Standardise column names."),
|
|
654
|
+
("dedup", "Remove duplicate rows."),
|
|
655
|
+
("fillna", "Fill known-default columns (e.g. quantity → 0)."),
|
|
656
|
+
("dropna", "Remove rows missing critical fields (e.g. id, email)."),
|
|
657
|
+
("filter", "Discard out-of-range or invalid rows."),
|
|
658
|
+
("addcol", "Compute derived columns (e.g. total = price * qty)."),
|
|
659
|
+
("save", "Write the clean file."),
|
|
660
|
+
]
|
|
661
|
+
w = max(len(s[0]) for s in steps)
|
|
662
|
+
for i, (cmd, desc) in enumerate(steps, 1):
|
|
663
|
+
print(f" {i:>2}. {KW(cmd):<{w + 9}} {desc}")
|
|
664
|
+
_blank()
|
|
665
|
+
|
|
666
|
+
print(f" {NOTE('Tip:')} Between each step, run {KW('undo')} if the result looks wrong,")
|
|
667
|
+
print(f" then try a different approach. Run {KW('head 5')} to spot-check quickly.")
|
|
668
|
+
_blank()
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
# ---------------------------------------------------------------------------
|
|
672
|
+
# Master index
|
|
673
|
+
# ---------------------------------------------------------------------------
|
|
674
|
+
|
|
675
|
+
_SECTIONS: dict[str, tuple[str, object]] = {
|
|
676
|
+
"overview": ("Overview & startup", section_overview),
|
|
677
|
+
"inspection": ("Inspection — show, head, tail, ...", section_inspection),
|
|
678
|
+
"filter": ("Row selection — filter, slice, sort", section_filter),
|
|
679
|
+
"transform": ("Transformation — rename, cast, ...", section_transform),
|
|
680
|
+
"clean": ("Data cleaning — dedup, fillna, ...", section_clean),
|
|
681
|
+
"io": ("I/O — load, generate, save, ...", section_io),
|
|
682
|
+
"history": ("History — undo & redo", section_history),
|
|
683
|
+
"tips": ("Tips, query syntax & patterns", section_tips),
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
# ---------------------------------------------------------------------------
|
|
688
|
+
# Public API
|
|
689
|
+
# ---------------------------------------------------------------------------
|
|
690
|
+
|
|
691
|
+
def show(section: str = "") -> None:
|
|
692
|
+
"""
|
|
693
|
+
Print the manual.
|
|
694
|
+
|
|
695
|
+
Parameters
|
|
696
|
+
----------
|
|
697
|
+
section : str
|
|
698
|
+
Optional section key. When empty (default) the full manual is
|
|
699
|
+
printed. Pass one of the keys listed in ``_SECTIONS`` to print
|
|
700
|
+
only that section.
|
|
701
|
+
|
|
702
|
+
Raises
|
|
703
|
+
------
|
|
704
|
+
KeyError
|
|
705
|
+
When *section* is not a recognised key.
|
|
706
|
+
"""
|
|
707
|
+
if section:
|
|
708
|
+
key = section.strip().lower()
|
|
709
|
+
if key not in _SECTIONS:
|
|
710
|
+
valid = ", ".join(_SECTIONS)
|
|
711
|
+
raise KeyError(f"Unknown section '{key}'. Valid: {valid}")
|
|
712
|
+
_SECTIONS[key][1]()
|
|
713
|
+
return
|
|
714
|
+
|
|
715
|
+
# Full manual — print a table of contents first
|
|
716
|
+
_h1("MFP Data Manipulator — Full Manual")
|
|
717
|
+
print(f"\n {H2('Table of contents')}\n")
|
|
718
|
+
for key, (title, _) in _SECTIONS.items():
|
|
719
|
+
print(f" {KW(f'python mfp_dmanp_help.py {key}'):<52} {title}")
|
|
720
|
+
_blank()
|
|
721
|
+
|
|
722
|
+
for _, (_, printer) in _SECTIONS.items():
|
|
723
|
+
printer()
|
|
724
|
+
print(_rule())
|
|
725
|
+
|
|
726
|
+
|
|
727
|
+
# ---------------------------------------------------------------------------
|
|
728
|
+
# CLI entry point
|
|
729
|
+
# ---------------------------------------------------------------------------
|
|
730
|
+
|
|
731
|
+
if __name__ == "__main__":
|
|
732
|
+
arg = sys.argv[1].strip().lower() if len(sys.argv) > 1 else ""
|
|
733
|
+
if arg in {"-h", "--help"}:
|
|
734
|
+
print(__doc__)
|
|
735
|
+
sys.exit(0)
|
|
736
|
+
try:
|
|
737
|
+
show(arg)
|
|
738
|
+
except KeyError as exc:
|
|
739
|
+
print(f"Error: {exc}")
|
|
740
|
+
print(f"Usage: python mfp_dmanp_help.py [{' | '.join(_SECTIONS)}]")
|
|
741
|
+
sys.exit(1)
|