dataframe-textual 1.0.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
 
3
3
  from .data_frame_help_panel import DataFrameHelpPanel
4
4
  from .data_frame_table import DataFrameTable, History
5
- from .data_frame_viewer import DataFrameViewer, _load_dataframe
5
+ from .data_frame_viewer import DataFrameViewer
6
6
  from .table_screen import FrequencyScreen, RowDetailScreen, TableScreen
7
7
  from .yes_no_screen import (
8
8
  ConfirmScreen,
@@ -31,5 +31,4 @@ __all__ = [
31
31
  "FilterScreen",
32
32
  "FreezeScreen",
33
33
  "OpenFileScreen",
34
- "_load_dataframe",
35
34
  ]
@@ -4,31 +4,24 @@ import argparse
4
4
  import sys
5
5
  from pathlib import Path
6
6
 
7
+ from .common import SUPPORTED_FORMATS, load_dataframe
7
8
  from .data_frame_viewer import DataFrameViewer
8
9
 
9
- SUPPORTED_FORMATS = ["csv", "excel", "tsv", "parquet", "json", "ndjson"]
10
10
 
11
+ def cli() -> argparse.Namespace:
12
+ """Parse command-line arguments.
11
13
 
12
- def main() -> None:
13
- """Run the DataFrame Viewer application.
14
-
15
- Parses command-line arguments to determine input files or stdin, validates
16
- file existence, and launches the interactive DataFrame Viewer application.
17
-
18
- Returns:
19
- None
20
-
21
- Raises:
22
- SystemExit: If invalid arguments are provided or required files are missing.
14
+ Determines input files or stdin and validates file existence
23
15
  """
24
16
  parser = argparse.ArgumentParser(
17
+ prog="dv",
25
18
  description="Interactive terminal based viewer/editor for tabular data (e.g., CSV/Excel).",
26
19
  formatter_class=argparse.RawDescriptionHelpFormatter,
27
20
  epilog="Examples:\n"
28
- " dataframe-textual data.csv\n"
29
- " dataframe-textual file1.csv file2.csv file3.csv\n"
30
- " dataframe-textual data.xlsx (opens all sheets in tabs)\n"
31
- " cat data.csv | dataframe-textual --format csv\n",
21
+ " %(prog)s data.csv\n"
22
+ " %(prog)s file1.csv file2.csv file3.csv\n"
23
+ " %(prog)s data.xlsx (opens each sheet in separate tab)\n"
24
+ " cat data.csv | %(prog)s --format csv\n",
32
25
  )
33
26
  parser.add_argument("files", nargs="*", help="Files to view (or read from stdin)")
34
27
  parser.add_argument(
@@ -37,27 +30,59 @@ def main() -> None:
37
30
  choices=SUPPORTED_FORMATS,
38
31
  help="Specify the format of the input files (csv, excel, tsv etc.)",
39
32
  )
40
- parser.add_argument("-H", "--no-header", action="store_true", help="Specify that input files have no header row")
33
+ parser.add_argument(
34
+ "-H",
35
+ "--no-header",
36
+ action="store_true",
37
+ help="Specify that input files have no header row when reading CSV/TSV",
38
+ )
39
+ parser.add_argument(
40
+ "-I", "--no-inferrence", action="store_true", help="Do not infer data types when reading CSV/TSV"
41
+ )
42
+ parser.add_argument(
43
+ "-C", "--comment-prefix", nargs="?", const="#", help="Comment lines are skipped when reading CSV/TSV"
44
+ )
45
+ parser.add_argument("-L", "--skip-lines", type=int, default=0, help="Skip lines when reading CSV/TSV")
46
+ parser.add_argument(
47
+ "-K", "--skip-rows-after-header", type=int, default=0, help="Skip rows after header when reading CSV/TSV"
48
+ )
49
+ parser.add_argument("-U", "--null", nargs="+", help="Values to interpret as null values when reading CSV/TSV")
41
50
 
42
51
  args = parser.parse_args()
43
- filenames = []
52
+ if args.files is None:
53
+ args.files = []
44
54
 
45
55
  # Check if reading from stdin (pipe or redirect)
46
56
  if not sys.stdin.isatty():
47
- filenames.append("-")
48
- if args.files:
57
+ args.files.append("-")
58
+ else:
49
59
  # Validate all files exist
50
60
  for filename in args.files:
51
61
  if not Path(filename).exists():
52
62
  print(f"File not found: {filename}")
53
63
  sys.exit(1)
54
- filenames.extend(args.files)
55
64
 
56
- if not filenames:
65
+ if not args.files:
57
66
  parser.print_help()
58
67
  sys.exit(1)
59
68
 
60
- app = DataFrameViewer(*filenames, file_format=args.format, has_header=not args.no_header)
69
+ return args
70
+
71
+
72
+ def main() -> None:
73
+ """Run the DataFrame Viewer application."""
74
+ args = cli()
75
+ sources = load_dataframe(
76
+ args.files,
77
+ file_format=args.format,
78
+ has_header=not args.no_header,
79
+ infer_schema=not args.no_inferrence,
80
+ comment_prefix=args.comment_prefix,
81
+ skip_lines=args.skip_lines,
82
+ skip_rows_after_header=args.skip_rows_after_header,
83
+ null_values=args.null,
84
+ )
85
+ app = DataFrameViewer(*sources)
61
86
  app.run()
62
87
 
63
88
 
@@ -1,15 +1,17 @@
1
1
  """Common utilities and constants for dataframe_viewer."""
2
2
 
3
3
  import re
4
+ import sys
4
5
  from dataclasses import dataclass
6
+ from pathlib import Path
5
7
  from typing import Any
6
8
 
7
9
  import polars as pl
8
10
  from rich.text import Text
9
11
 
10
- # Special string to represent null value
11
- NULL = "NULL"
12
- NULL_DISPLAY = "-"
12
+ # Supported file formats
13
+ SUPPORTED_FORMATS = {"tsv", "csv", "excel", "xlsx", "xls", "parquet", "json", "ndjson"}
14
+
13
15
 
14
16
  # Boolean string mappings
15
17
  BOOLS = {
@@ -25,6 +27,10 @@ BOOLS = {
25
27
  "0": False,
26
28
  }
27
29
 
30
+ # Special string to represent null value
31
+ NULL = "NULL"
32
+ NULL_DISPLAY = "-"
33
+
28
34
 
29
35
  @dataclass
30
36
  class DtypeClass:
@@ -51,15 +57,15 @@ STYLES = {
51
57
  pl.UInt32: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
52
58
  pl.UInt64: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
53
59
  # float
54
- pl.Float32: DtypeClass(gtype="float", style="magenta", justify="right", itype="number", convert=float),
55
- pl.Float64: DtypeClass(gtype="float", style="magenta", justify="right", itype="number", convert=float),
56
- pl.Decimal: DtypeClass(gtype="float", style="magenta", justify="right", itype="number", convert=float),
60
+ pl.Float32: DtypeClass(gtype="float", style="yellow", justify="right", itype="number", convert=float),
61
+ pl.Float64: DtypeClass(gtype="float", style="yellow", justify="right", itype="number", convert=float),
62
+ pl.Decimal: DtypeClass(gtype="float", style="yellow", justify="right", itype="number", convert=float),
57
63
  # bool
58
64
  pl.Boolean: DtypeClass(gtype="boolean", style="blue", justify="center", itype="text", convert=lambda x: BOOLS[x.lower()]),
59
65
  # temporal
60
- pl.Date: DtypeClass(gtype="temporal", style="yellow", justify="center", itype="text", convert=str),
61
- pl.Datetime: DtypeClass(gtype="temporal", style="yellow", justify="center", itype="text", convert=str),
62
- pl.Time: DtypeClass(gtype="temporal", style="yellow", justify="center", itype="text", convert=str),
66
+ pl.Date: DtypeClass(gtype="temporal", style="magenta", justify="center", itype="text", convert=str),
67
+ pl.Datetime: DtypeClass(gtype="temporal", style="magenta", justify="center", itype="text", convert=str),
68
+ pl.Time: DtypeClass(gtype="temporal", style="magenta", justify="center", itype="text", convert=str),
63
69
  # unknown
64
70
  pl.Unknown: DtypeClass(gtype="unknown", style="", justify="", itype="text", convert=str),
65
71
  }
@@ -111,7 +117,27 @@ def DtypeConfig(dtype: pl.DataType) -> DtypeClass:
111
117
  return STYLES[pl.Unknown]
112
118
 
113
119
 
114
- def format_row(vals, dtypes, apply_justify=True, thousand_separator=False) -> list[Text]:
120
+ def format_float(value: float, thousand_separator: bool = False, precision: int = 2) -> str:
121
+ """Format a float value, keeping integers without decimal point.
122
+
123
+ Args:
124
+ val: The float value to format.
125
+ thousand_separator: Whether to include thousand separators. Defaults to False.
126
+
127
+ Returns:
128
+ The formatted float as a string.
129
+ """
130
+
131
+ if (val := int(value)) == value:
132
+ return f"{val:,}" if thousand_separator else str(val)
133
+ else:
134
+ if precision > 0:
135
+ return f"{value:,.{precision}f}" if thousand_separator else f"{value:.{precision}f}"
136
+ else:
137
+ return f"{value:,f}" if thousand_separator else str(value)
138
+
139
+
140
+ def format_row(vals, dtypes, styles=None, apply_justify=True, thousand_separator=False) -> list[Text]:
115
141
  """Format a single row with proper styling and justification.
116
142
 
117
143
  Converts raw row values to formatted Rich Text objects with appropriate
@@ -127,7 +153,7 @@ def format_row(vals, dtypes, apply_justify=True, thousand_separator=False) -> li
127
153
  """
128
154
  formatted_row = []
129
155
 
130
- for val, dtype in zip(vals, dtypes, strict=True):
156
+ for idx, (val, dtype) in enumerate(zip(vals, dtypes, strict=True)):
131
157
  dc = DtypeConfig(dtype)
132
158
 
133
159
  # Format the value
@@ -135,16 +161,18 @@ def format_row(vals, dtypes, apply_justify=True, thousand_separator=False) -> li
135
161
  text_val = NULL_DISPLAY
136
162
  elif dc.gtype == "integer" and thousand_separator:
137
163
  text_val = f"{val:,}"
138
- elif dc.gtype == "float" and thousand_separator:
139
- text_val = f"{val:,}"
164
+ elif dc.gtype == "float":
165
+ text_val = format_float(val, thousand_separator)
140
166
  else:
141
167
  text_val = str(val)
142
168
 
143
169
  formatted_row.append(
144
170
  Text(
145
171
  text_val,
146
- style=dc.style,
172
+ style=styles[idx] if styles and styles[idx] else dc.style,
147
173
  justify=dc.justify if apply_justify else "",
174
+ overflow="ellipsis",
175
+ no_wrap=True,
148
176
  )
149
177
  )
150
178
 
@@ -194,7 +222,7 @@ def get_next_item(lst: list[Any], current, offset=1) -> Any:
194
222
  return lst[next_index]
195
223
 
196
224
 
197
- def parse_polars_expression(expression: str, df: pl.DataFrame, current_col_idx: int) -> str:
225
+ def parse_polars_expression(expression: str, columns: list[str], current_col_idx: int) -> str:
198
226
  """Parse and convert an expression to Polars syntax.
199
227
 
200
228
  Replaces column references with Polars col() expressions:
@@ -212,7 +240,7 @@ def parse_polars_expression(expression: str, df: pl.DataFrame, current_col_idx:
212
240
 
213
241
  Args:
214
242
  expression: The input expression as a string.
215
- df: The DataFrame to validate column references.
243
+ columns: The list of column names in the DataFrame.
216
244
  current_col_idx: The index of the currently selected column (0-based). Used for $_ reference.
217
245
 
218
246
  Returns:
@@ -242,19 +270,19 @@ def parse_polars_expression(expression: str, df: pl.DataFrame, current_col_idx:
242
270
 
243
271
  if col_ref == "_":
244
272
  # Current selected column
245
- col_name = df.columns[current_col_idx]
273
+ col_name = columns[current_col_idx]
246
274
  elif col_ref == "#":
247
275
  # RIDX is used to store 0-based row index; add 1 for 1-based index
248
276
  return f"(pl.col('{RIDX}') + 1)"
249
277
  elif col_ref.isdigit():
250
278
  # Column by 1-based index
251
279
  col_idx = int(col_ref) - 1
252
- if col_idx < 0 or col_idx >= len(df.columns):
280
+ if col_idx < 0 or col_idx >= len(columns):
253
281
  raise ValueError(f"Column index out of range: ${col_ref}")
254
- col_name = df.columns[col_idx]
282
+ col_name = columns[col_idx]
255
283
  else:
256
284
  # Column by name
257
- if col_ref not in df.columns:
285
+ if col_ref not in columns:
258
286
  raise ValueError(f"Column not found: ${col_ref}")
259
287
  col_name = col_ref
260
288
 
@@ -283,7 +311,7 @@ def tentative_expr(term: str) -> bool:
283
311
  return False
284
312
 
285
313
 
286
- def validate_expr(term: str, df: pl.DataFrame, current_col_idx: int) -> pl.Expr | None:
314
+ def validate_expr(term: str, columns: list[str], current_col_idx: int) -> pl.Expr | None:
287
315
  """Validate and return the expression.
288
316
 
289
317
  Parses a user-provided expression string and validates it as a valid Polars expression.
@@ -291,7 +319,7 @@ def validate_expr(term: str, df: pl.DataFrame, current_col_idx: int) -> pl.Expr
291
319
 
292
320
  Args:
293
321
  term: The input expression as a string.
294
- df: The DataFrame to validate column references against.
322
+ columns: The list of column names in the DataFrame.
295
323
  current_col_idx: The index of the currently selected column (0-based). Used for $_ reference.
296
324
 
297
325
  Returns:
@@ -304,7 +332,7 @@ def validate_expr(term: str, df: pl.DataFrame, current_col_idx: int) -> pl.Expr
304
332
 
305
333
  try:
306
334
  # Parse the expression
307
- expr_str = parse_polars_expression(term, df, current_col_idx)
335
+ expr_str = parse_polars_expression(term, columns, current_col_idx)
308
336
 
309
337
  # Validate by evaluating it
310
338
  try:
@@ -318,3 +346,324 @@ def validate_expr(term: str, df: pl.DataFrame, current_col_idx: int) -> pl.Expr
318
346
  raise ValueError(f"Failed to evaluate expression `{expr_str}`: {e}") from e
319
347
  except Exception as ve:
320
348
  raise ValueError(f"Failed to validate expression `{term}`: {ve}") from ve
349
+
350
+
351
+ def load_dataframe(
352
+ filenames: list[str],
353
+ file_format: str | None = None,
354
+ has_header: bool = True,
355
+ infer_schema: bool = True,
356
+ comment_prefix: str | None = None,
357
+ skip_lines: int = 0,
358
+ skip_rows_after_header: int = 0,
359
+ null_values: list[str] | None = None,
360
+ ) -> list[tuple[pl.DataFrame, str, str]]:
361
+ """Load DataFrames from file specifications.
362
+
363
+ Handles loading from multiple files, single files, or stdin. For Excel files,
364
+ loads all sheets as separate entries. For other formats, loads as single file.
365
+
366
+ Args:
367
+ filenames: List of filenames to load. If single filename is "-", read from stdin.
368
+ file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
369
+ has_header: Whether the input files have a header row. Defaults to True.
370
+ infer_schema: Whether to infer data types for CSV/TSV files. Defaults to True.
371
+ comment_prefix: Character(s) indicating comment lines in CSV/TSV files. Defaults to None.
372
+ skip_lines: Number of lines to skip when reading CSV/TSV files. Defaults to 0.
373
+ skip_rows_after_header: Number of rows to skip after header. Defaults to 0.
374
+
375
+ Returns:
376
+ List of tuples of (DataFrame, filename, tabname) ready for display.
377
+ """
378
+ sources = []
379
+ prefix_sheet = len(filenames) > 1
380
+
381
+ for filename in filenames:
382
+ # Determine file format if not specified
383
+ if not file_format:
384
+ ext = Path(filename).suffix.lower()
385
+ if ext == ".gz" or ext == ".bz2" or ext == ".xz":
386
+ ext = Path(filename).with_suffix("").suffix.lower()
387
+ fmt = ext.removeprefix(".")
388
+
389
+ # Default to TSV
390
+ file_format = fmt if fmt in SUPPORTED_FORMATS else "tsv"
391
+
392
+ # Load each file
393
+ sources.extend(
394
+ load_file(
395
+ filename,
396
+ prefix_sheet=prefix_sheet,
397
+ file_format=file_format,
398
+ has_header=has_header,
399
+ infer_schema=infer_schema,
400
+ comment_prefix=comment_prefix,
401
+ skip_lines=skip_lines,
402
+ skip_rows_after_header=skip_rows_after_header,
403
+ null_values=null_values,
404
+ )
405
+ )
406
+
407
+ return sources
408
+
409
+
410
+ RE_COMPUTE_ERROR = re.compile(r"at column '(.*?)' \(column number \d+\)")
411
+
412
+
413
+ def handle_compute_error(
414
+ err_msg: str,
415
+ file_format: str | None,
416
+ infer_schema: bool,
417
+ schema_overrides: dict[str, pl.DataType] | None = None,
418
+ ) -> tuple[bool, dict[str, pl.DataType] | None]:
419
+ """Handle ComputeError during schema inference and determine retry strategy.
420
+
421
+ Analyzes the error message and determines whether to retry with schema overrides,
422
+ disable schema inference, or exit with an error.
423
+
424
+ Args:
425
+ err_msg: The error message from the ComputeError exception.
426
+ file_format: The file format being loaded (tsv, csv, etc.).
427
+ infer_schema: Whether schema inference is currently enabled.
428
+ schema_overrides: Current schema overrides, if any.
429
+
430
+ Returns:
431
+ A tuple of (infer_schema, schema_overrides):
432
+
433
+ Raises:
434
+ SystemExit: If the error is unrecoverable.
435
+ """
436
+ # Already disabled schema inference, cannot recover
437
+ if not infer_schema:
438
+ print(f"Error loading with schema inference disabled:\n{err_msg}", file=sys.stderr)
439
+ sys.exit(1)
440
+
441
+ # Schema mismatch error
442
+ if "found more fields than defined in 'Schema'" in err_msg:
443
+ print(f"Input might be malformed:\n{err_msg}", file=sys.stderr)
444
+ sys.exit(1)
445
+
446
+ # ComputeError: could not parse `n.a. as of 04.01.022` as `dtype` i64 at column 'PubChemCID' (column number 16)
447
+ if file_format in ("tsv", "csv") and (m := RE_COMPUTE_ERROR.search(err_msg)):
448
+ col_name = m.group(1)
449
+
450
+ if schema_overrides is None:
451
+ schema_overrides = {}
452
+ schema_overrides.update({col_name: pl.String})
453
+ else:
454
+ infer_schema = False
455
+
456
+ return infer_schema, schema_overrides
457
+
458
+
459
+ def load_stdin(
460
+ stdin_data=None,
461
+ file_format: str | None = None,
462
+ has_header: bool = True,
463
+ infer_schema: bool = True,
464
+ comment_prefix: str | None = None,
465
+ skip_lines: int = 0,
466
+ skip_rows_after_header: int = 0,
467
+ schema_overrides: dict[str, pl.DataType] | None = None,
468
+ null_values: list[str] | None = None,
469
+ ) -> list[tuple[pl.DataFrame, str, str]]:
470
+ """Load DataFrame from stdin.
471
+
472
+ If a ComputeError occurs during schema inference for a column, attempts to recover
473
+ by treating that column as a string and retrying the load. This process repeats until
474
+ all columns are successfully loaded or no further recovery is possible.
475
+
476
+ Args:
477
+ stdin_data: Optional stdin data as string. If None, read from sys.stdin.
478
+ file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
479
+ has_header: Whether the input files have a header row. Defaults to True.
480
+ infer_schema: Whether to infer data types for CSV/TSV files. Defaults to True.
481
+ comment_prefix: Character(s) indicating comment lines in CSV/TSV files. Defaults to None.
482
+ skip_lines: Number of lines to skip when reading CSV/TSV files. Defaults to 0.
483
+ skip_rows_after_header: Number of rows to skip after header. Defaults to 0.
484
+
485
+ Returns:
486
+ List of tuples of (DataFrame, filename, tabname) ready for display.
487
+ """
488
+ import os
489
+ from io import StringIO
490
+
491
+ sources = []
492
+
493
+ # Read from stdin into memory first (stdin is not seekable)
494
+ if stdin_data is None:
495
+ stdin_data = sys.stdin.read()
496
+
497
+ # Reopen stdin to /dev/tty for proper terminal interaction
498
+ try:
499
+ tty = open("/dev/tty")
500
+ os.dup2(tty.fileno(), sys.stdin.fileno())
501
+ except (OSError, FileNotFoundError):
502
+ pass
503
+
504
+ lf = pl.scan_csv(
505
+ StringIO(stdin_data),
506
+ separator="," if file_format == "csv" else "\t",
507
+ has_header=has_header,
508
+ infer_schema=infer_schema,
509
+ comment_prefix=comment_prefix,
510
+ skip_lines=skip_lines,
511
+ skip_rows_after_header=skip_rows_after_header,
512
+ schema_overrides=schema_overrides,
513
+ null_values=null_values,
514
+ )
515
+
516
+ sources = [(lf, f"stdin.{file_format}" if file_format else "stdin", "stdin")]
517
+
518
+ # Attempt to collect, handling ComputeError for schema inference issues
519
+ try:
520
+ sources = [(lf.collect(), fn, tn) for lf, fn, tn in sources]
521
+ except pl.exceptions.ComputeError as ce:
522
+ # Handle the error and determine retry strategy
523
+ infer_schema, schema_overrides = handle_compute_error(str(ce), file_format, infer_schema, schema_overrides)
524
+
525
+ # Retry loading with updated schema overrides
526
+ return load_stdin(
527
+ stdin_data,
528
+ file_format=file_format,
529
+ has_header=has_header,
530
+ infer_schema=infer_schema,
531
+ comment_prefix=comment_prefix,
532
+ skip_lines=skip_lines,
533
+ skip_rows_after_header=skip_rows_after_header,
534
+ schema_overrides=schema_overrides,
535
+ null_values=null_values,
536
+ )
537
+
538
+ return sources
539
+
540
+
541
+ def load_file(
542
+ filename: str,
543
+ first_sheet: bool = False,
544
+ prefix_sheet: bool = False,
545
+ file_format: str | None = None,
546
+ has_header: bool = True,
547
+ infer_schema: bool = True,
548
+ comment_prefix: str | None = None,
549
+ skip_lines: int = 0,
550
+ skip_rows_after_header: int = 0,
551
+ schema_overrides: dict[str, pl.DataType] | None = None,
552
+ null_values: list[str] | None = None,
553
+ ) -> list[tuple[pl.DataFrame, str, str]]:
554
+ """Load a single file.
555
+
556
+ For Excel files, when `first_sheet` is True, returns only the first sheet. Otherwise, returns one entry per sheet.
557
+ For other files or multiple files, returns one entry per file.
558
+
559
+ If a ComputeError occurs during schema inference for a column, attempts to recover
560
+ by treating that column as a string and retrying the load. This process repeats until
561
+ all columns are successfully loaded or no further recovery is possible.
562
+
563
+ Args:
564
+ filename: Path to file to load.
565
+ first_sheet: If True, only load first sheet for Excel files. Defaults to False.
566
+ prefix_sheet: If True, prefix filename to sheet name as the tab name for Excel files. Defaults to False.
567
+ file_format: Optional format specifier (i.e., 'tsv', 'csv', 'excel', 'parquet', 'json', 'ndjson') for input files.
568
+ By default, infers from file extension.
569
+ has_header: Whether the input files have a header row. Defaults to True.
570
+ infer_schema: Whether to infer data types for CSV/TSV files. Defaults to True.
571
+ comment_prefix: Character(s) indicating comment lines in CSV/TSV files. Defaults to None.
572
+ skip_lines: Number of lines to skip when reading CSV/TSV files. The header will be parsed at this offset. Defaults to 0.
573
+ skip_rows_after_header: Number of rows to skip after header when reading CSV/TSV files. Defaults to 0.
574
+
575
+ Returns:
576
+ List of tuples of (DataFrame, filename, tabname).
577
+ """
578
+ sources = []
579
+
580
+ if filename == "-":
581
+ return load_stdin(
582
+ file_format=file_format,
583
+ has_header=has_header,
584
+ infer_schema=infer_schema,
585
+ comment_prefix=comment_prefix,
586
+ skip_lines=skip_lines,
587
+ skip_rows_after_header=skip_rows_after_header,
588
+ schema_overrides=schema_overrides,
589
+ null_values=null_values,
590
+ )
591
+
592
+ filepath = Path(filename)
593
+
594
+ # Load based on file format
595
+ if file_format in ("tsv", "csv"):
596
+ lf = pl.scan_csv(
597
+ filename,
598
+ separator="\t" if file_format == "tsv" else ",",
599
+ has_header=has_header,
600
+ infer_schema=infer_schema,
601
+ comment_prefix=comment_prefix,
602
+ skip_lines=skip_lines,
603
+ skip_rows_after_header=skip_rows_after_header,
604
+ schema_overrides=schema_overrides,
605
+ null_values=null_values,
606
+ )
607
+ sources.append((lf, filename, filepath.stem))
608
+ elif file_format in ("xlsx", "xls", "excel"):
609
+ if first_sheet:
610
+ # Read only the first sheet for multiple files
611
+ lf = pl.read_excel(filename).lazy()
612
+ sources.append((lf, filename, filepath.stem))
613
+ else:
614
+ # For single file, expand all sheets
615
+ sheets = pl.read_excel(filename, sheet_id=0)
616
+ for sheet_name, df in sheets.items():
617
+ tabname = f"{filepath.stem}_{sheet_name}" if prefix_sheet else sheet_name
618
+ sources.append((df.lazy(), filename, tabname))
619
+ elif file_format == "parquet":
620
+ lf = pl.scan_parquet(filename)
621
+ sources.append((lf, filename, filepath.stem))
622
+ elif file_format == "json":
623
+ lf = pl.read_json(filename).lazy()
624
+ sources.append((lf, filename, filepath.stem))
625
+ elif file_format == "ndjson":
626
+ lf = pl.scan_ndjson(filename, schema_overrides=schema_overrides)
627
+ sources.append((lf, filename, filepath.stem))
628
+ else:
629
+ raise ValueError(f"Unsupported file format: {file_format}. Supported formats are: {SUPPORTED_FORMATS}")
630
+
631
+ # Attempt to collect, handling ComputeError for schema inference issues
632
+ try:
633
+ sources = [(lf.collect(), fn, tn) for lf, fn, tn in sources]
634
+ except pl.exceptions.ComputeError as ce:
635
+ # Handle the error and determine retry strategy
636
+ infer_schema, schema_overrides = handle_compute_error(str(ce), file_format, infer_schema, schema_overrides)
637
+
638
+ # Retry loading with updated schema overrides
639
+ return load_file(
640
+ filename,
641
+ file_format=file_format,
642
+ has_header=has_header,
643
+ infer_schema=infer_schema,
644
+ comment_prefix=comment_prefix,
645
+ skip_lines=skip_lines,
646
+ skip_rows_after_header=skip_rows_after_header,
647
+ schema_overrides=schema_overrides,
648
+ null_values=null_values,
649
+ )
650
+
651
+ return sources
652
+
653
+
654
+ def now() -> str:
655
+ """Get the current local time as a formatted string."""
656
+ import time
657
+
658
+ return time.strftime("%m/%d/%Y %H:%M:%S", time.localtime())
659
+
660
+
661
+ async def sleep_async(seconds: float) -> None:
662
+ """Async sleep to yield control back to the event loop.
663
+
664
+ Args:
665
+ seconds: The number of seconds to sleep.
666
+ """
667
+ import asyncio
668
+
669
+ await asyncio.sleep(seconds)
@@ -79,10 +79,12 @@ class DataFrameHelpPanel(Widget):
79
79
  None
80
80
  """
81
81
 
82
- def update_help(focused_widget: Widget | None):
83
- self.update_help(focused_widget)
82
+ # def update_help(focused_widget: Widget | None):
83
+ # self.update_help(focused_widget)
84
84
 
85
- self.watch(self.screen, "focused", update_help)
85
+ # self.watch(self.screen, "focused", update_help)
86
+
87
+ self.update_help(self.screen.focused)
86
88
 
87
89
  def update_help(self, focused_widget: Widget | None) -> None:
88
90
  """Update the help for the focused widget.
@@ -96,7 +98,7 @@ class DataFrameHelpPanel(Widget):
96
98
  return
97
99
  self.set_class(focused_widget is not None, "-show-help")
98
100
  if focused_widget is not None:
99
- help = self.app.HELP + "\n" + focused_widget.HELP or ""
101
+ help = (self.app.HELP or "") + "\n" + (focused_widget.HELP or "")
100
102
  if not help:
101
103
  self.remove_class("-show-help")
102
104
  try: