dataframe-textual 1.2.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,23 +4,14 @@ import argparse
4
4
  import sys
5
5
  from pathlib import Path
6
6
 
7
- from .common import load_dataframe
7
+ from .common import SUPPORTED_FORMATS, load_dataframe
8
8
  from .data_frame_viewer import DataFrameViewer
9
9
 
10
- SUPPORTED_FORMATS = ["csv", "excel", "tsv", "parquet", "json", "ndjson"]
11
10
 
11
+ def cli() -> argparse.Namespace:
12
+ """Parse command-line arguments.
12
13
 
13
- def main() -> None:
14
- """Run the DataFrame Viewer application.
15
-
16
- Parses command-line arguments to determine input files or stdin, validates
17
- file existence, and launches the interactive DataFrame Viewer application.
18
-
19
- Returns:
20
- None
21
-
22
- Raises:
23
- SystemExit: If invalid arguments are provided or required files are missing.
14
+ Determines input files or stdin and validates file existence
24
15
  """
25
16
  parser = argparse.ArgumentParser(
26
17
  prog="dv",
@@ -39,27 +30,58 @@ def main() -> None:
39
30
  choices=SUPPORTED_FORMATS,
40
31
  help="Specify the format of the input files (csv, excel, tsv etc.)",
41
32
  )
42
- parser.add_argument("-H", "--no-header", action="store_true", help="Specify that input files have no header row")
33
+ parser.add_argument(
34
+ "-H",
35
+ "--no-header",
36
+ action="store_true",
37
+ help="Specify that input files have no header row when reading CSV/TSV",
38
+ )
39
+ parser.add_argument(
40
+ "-I", "--no-inferrence", action="store_true", help="Do not infer data types when reading CSV/TSV"
41
+ )
42
+ parser.add_argument(
43
+ "-C", "--comment-prefix", nargs="?", const="#", help="Comment lines are skipped when reading CSV/TSV"
44
+ )
45
+ parser.add_argument("-L", "--skip-lines", type=int, default=0, help="Skip lines when reading CSV/TSV")
46
+ parser.add_argument(
47
+ "-K", "--skip-rows-after-header", type=int, default=0, help="Skip rows after header when reading CSV/TSV"
48
+ )
49
+ parser.add_argument("-U", "--null", nargs="+", help="Values to interpret as null values when reading CSV/TSV")
43
50
 
44
51
  args = parser.parse_args()
45
- filenames = []
52
+ if args.files is None:
53
+ args.files = []
46
54
 
47
55
  # Check if reading from stdin (pipe or redirect)
48
56
  if not sys.stdin.isatty():
49
- filenames.append("-")
50
- if args.files:
57
+ args.files.append("-")
58
+ else:
51
59
  # Validate all files exist
52
60
  for filename in args.files:
53
61
  if not Path(filename).exists():
54
62
  print(f"File not found: {filename}")
55
63
  sys.exit(1)
56
- filenames.extend(args.files)
57
64
 
58
- if not filenames:
65
+ if not args.files:
59
66
  parser.print_help()
60
67
  sys.exit(1)
61
68
 
62
- sources = load_dataframe(filenames, file_format=args.format, has_header=not args.no_header)
69
+ return args
70
+
71
+
72
+ def main() -> None:
73
+ """Run the DataFrame Viewer application."""
74
+ args = cli()
75
+ sources = load_dataframe(
76
+ args.files,
77
+ file_format=args.format,
78
+ has_header=not args.no_header,
79
+ infer_schema=not args.no_inferrence,
80
+ comment_prefix=args.comment_prefix,
81
+ skip_lines=args.skip_lines,
82
+ skip_rows_after_header=args.skip_rows_after_header,
83
+ null_values=args.null,
84
+ )
63
85
  app = DataFrameViewer(*sources)
64
86
  app.run()
65
87
 
@@ -9,9 +9,9 @@ from typing import Any
9
9
  import polars as pl
10
10
  from rich.text import Text
11
11
 
12
- # Special string to represent null value
13
- NULL = "NULL"
14
- NULL_DISPLAY = "-"
12
+ # Supported file formats
13
+ SUPPORTED_FORMATS = {"tsv", "csv", "excel", "xlsx", "xls", "parquet", "json", "ndjson"}
14
+
15
15
 
16
16
  # Boolean string mappings
17
17
  BOOLS = {
@@ -27,6 +27,10 @@ BOOLS = {
27
27
  "0": False,
28
28
  }
29
29
 
30
+ # Special string to represent null value
31
+ NULL = "NULL"
32
+ NULL_DISPLAY = "-"
33
+
30
34
 
31
35
  @dataclass
32
36
  class DtypeClass:
@@ -53,15 +57,15 @@ STYLES = {
53
57
  pl.UInt32: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
54
58
  pl.UInt64: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
55
59
  # float
56
- pl.Float32: DtypeClass(gtype="float", style="magenta", justify="right", itype="number", convert=float),
57
- pl.Float64: DtypeClass(gtype="float", style="magenta", justify="right", itype="number", convert=float),
58
- pl.Decimal: DtypeClass(gtype="float", style="magenta", justify="right", itype="number", convert=float),
60
+ pl.Float32: DtypeClass(gtype="float", style="yellow", justify="right", itype="number", convert=float),
61
+ pl.Float64: DtypeClass(gtype="float", style="yellow", justify="right", itype="number", convert=float),
62
+ pl.Decimal: DtypeClass(gtype="float", style="yellow", justify="right", itype="number", convert=float),
59
63
  # bool
60
64
  pl.Boolean: DtypeClass(gtype="boolean", style="blue", justify="center", itype="text", convert=lambda x: BOOLS[x.lower()]),
61
65
  # temporal
62
- pl.Date: DtypeClass(gtype="temporal", style="yellow", justify="center", itype="text", convert=str),
63
- pl.Datetime: DtypeClass(gtype="temporal", style="yellow", justify="center", itype="text", convert=str),
64
- pl.Time: DtypeClass(gtype="temporal", style="yellow", justify="center", itype="text", convert=str),
66
+ pl.Date: DtypeClass(gtype="temporal", style="magenta", justify="center", itype="text", convert=str),
67
+ pl.Datetime: DtypeClass(gtype="temporal", style="magenta", justify="center", itype="text", convert=str),
68
+ pl.Time: DtypeClass(gtype="temporal", style="magenta", justify="center", itype="text", convert=str),
65
69
  # unknown
66
70
  pl.Unknown: DtypeClass(gtype="unknown", style="", justify="", itype="text", convert=str),
67
71
  }
@@ -133,7 +137,7 @@ def format_float(value: float, thousand_separator: bool = False, precision: int
133
137
  return f"{value:,f}" if thousand_separator else str(value)
134
138
 
135
139
 
136
- def format_row(vals, dtypes, apply_justify=True, thousand_separator=False) -> list[Text]:
140
+ def format_row(vals, dtypes, styles=None, apply_justify=True, thousand_separator=False) -> list[Text]:
137
141
  """Format a single row with proper styling and justification.
138
142
 
139
143
  Converts raw row values to formatted Rich Text objects with appropriate
@@ -149,7 +153,7 @@ def format_row(vals, dtypes, apply_justify=True, thousand_separator=False) -> li
149
153
  """
150
154
  formatted_row = []
151
155
 
152
- for val, dtype in zip(vals, dtypes, strict=True):
156
+ for idx, (val, dtype) in enumerate(zip(vals, dtypes, strict=True)):
153
157
  dc = DtypeConfig(dtype)
154
158
 
155
159
  # Format the value
@@ -165,8 +169,10 @@ def format_row(vals, dtypes, apply_justify=True, thousand_separator=False) -> li
165
169
  formatted_row.append(
166
170
  Text(
167
171
  text_val,
168
- style=dc.style,
172
+ style=styles[idx] if styles and styles[idx] else dc.style,
169
173
  justify=dc.justify if apply_justify else "",
174
+ overflow="ellipsis",
175
+ no_wrap=True,
170
176
  )
171
177
  )
172
178
 
@@ -216,7 +222,7 @@ def get_next_item(lst: list[Any], current, offset=1) -> Any:
216
222
  return lst[next_index]
217
223
 
218
224
 
219
- def parse_polars_expression(expression: str, df: pl.DataFrame, current_col_idx: int) -> str:
225
+ def parse_polars_expression(expression: str, columns: list[str], current_col_idx: int) -> str:
220
226
  """Parse and convert an expression to Polars syntax.
221
227
 
222
228
  Replaces column references with Polars col() expressions:
@@ -234,7 +240,7 @@ def parse_polars_expression(expression: str, df: pl.DataFrame, current_col_idx:
234
240
 
235
241
  Args:
236
242
  expression: The input expression as a string.
237
- df: The DataFrame to validate column references.
243
+ columns: The list of column names in the DataFrame.
238
244
  current_col_idx: The index of the currently selected column (0-based). Used for $_ reference.
239
245
 
240
246
  Returns:
@@ -264,19 +270,19 @@ def parse_polars_expression(expression: str, df: pl.DataFrame, current_col_idx:
264
270
 
265
271
  if col_ref == "_":
266
272
  # Current selected column
267
- col_name = df.columns[current_col_idx]
273
+ col_name = columns[current_col_idx]
268
274
  elif col_ref == "#":
269
275
  # RIDX is used to store 0-based row index; add 1 for 1-based index
270
276
  return f"(pl.col('{RIDX}') + 1)"
271
277
  elif col_ref.isdigit():
272
278
  # Column by 1-based index
273
279
  col_idx = int(col_ref) - 1
274
- if col_idx < 0 or col_idx >= len(df.columns):
280
+ if col_idx < 0 or col_idx >= len(columns):
275
281
  raise ValueError(f"Column index out of range: ${col_ref}")
276
- col_name = df.columns[col_idx]
282
+ col_name = columns[col_idx]
277
283
  else:
278
284
  # Column by name
279
- if col_ref not in df.columns:
285
+ if col_ref not in columns:
280
286
  raise ValueError(f"Column not found: ${col_ref}")
281
287
  col_name = col_ref
282
288
 
@@ -305,7 +311,7 @@ def tentative_expr(term: str) -> bool:
305
311
  return False
306
312
 
307
313
 
308
- def validate_expr(term: str, df: pl.DataFrame, current_col_idx: int) -> pl.Expr | None:
314
+ def validate_expr(term: str, columns: list[str], current_col_idx: int) -> pl.Expr | None:
309
315
  """Validate and return the expression.
310
316
 
311
317
  Parses a user-provided expression string and validates it as a valid Polars expression.
@@ -313,7 +319,7 @@ def validate_expr(term: str, df: pl.DataFrame, current_col_idx: int) -> pl.Expr
313
319
 
314
320
  Args:
315
321
  term: The input expression as a string.
316
- df: The DataFrame to validate column references against.
322
+ columns: The list of column names in the DataFrame.
317
323
  current_col_idx: The index of the currently selected column (0-based). Used for $_ reference.
318
324
 
319
325
  Returns:
@@ -326,7 +332,7 @@ def validate_expr(term: str, df: pl.DataFrame, current_col_idx: int) -> pl.Expr
326
332
 
327
333
  try:
328
334
  # Parse the expression
329
- expr_str = parse_polars_expression(term, df, current_col_idx)
335
+ expr_str = parse_polars_expression(term, columns, current_col_idx)
330
336
 
331
337
  # Validate by evaluating it
332
338
  try:
@@ -343,8 +349,15 @@ def validate_expr(term: str, df: pl.DataFrame, current_col_idx: int) -> pl.Expr
343
349
 
344
350
 
345
351
  def load_dataframe(
346
- filenames: list[str], file_format: str | None = None, has_header: bool = True
347
- ) -> list[tuple[pl.LazyFrame, str, str]]:
352
+ filenames: list[str],
353
+ file_format: str | None = None,
354
+ has_header: bool = True,
355
+ infer_schema: bool = True,
356
+ comment_prefix: str | None = None,
357
+ skip_lines: int = 0,
358
+ skip_rows_after_header: int = 0,
359
+ null_values: list[str] | None = None,
360
+ ) -> list[tuple[pl.DataFrame, str, str]]:
348
361
  """Load DataFrames from file specifications.
349
362
 
350
363
  Handles loading from multiple files, single files, or stdin. For Excel files,
@@ -354,16 +367,174 @@ def load_dataframe(
354
367
  filenames: List of filenames to load. If single filename is "-", read from stdin.
355
368
  file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
356
369
  has_header: Whether the input files have a header row. Defaults to True.
370
+ infer_schema: Whether to infer data types for CSV/TSV files. Defaults to True.
371
+ comment_prefix: Character(s) indicating comment lines in CSV/TSV files. Defaults to None.
372
+ skip_lines: Number of lines to skip when reading CSV/TSV files. Defaults to 0.
373
+ skip_rows_after_header: Number of rows to skip after header. Defaults to 0.
357
374
 
358
375
  Returns:
359
- List of tuples of (LazyFrame, filename, tabname) ready for display.
376
+ List of tuples of (DataFrame, filename, tabname) ready for display.
360
377
  """
361
378
  sources = []
362
-
363
379
  prefix_sheet = len(filenames) > 1
364
380
 
365
381
  for filename in filenames:
366
- sources.extend(load_file(filename, prefix_sheet=prefix_sheet, file_format=file_format, has_header=has_header))
382
+ # Determine file format if not specified
383
+ if not file_format:
384
+ ext = Path(filename).suffix.lower()
385
+ if ext == ".gz" or ext == ".bz2" or ext == ".xz":
386
+ ext = Path(filename).with_suffix("").suffix.lower()
387
+ fmt = ext.removeprefix(".")
388
+
389
+ # Default to TSV
390
+ file_format = fmt if fmt in SUPPORTED_FORMATS else "tsv"
391
+
392
+ # Load each file
393
+ sources.extend(
394
+ load_file(
395
+ filename,
396
+ prefix_sheet=prefix_sheet,
397
+ file_format=file_format,
398
+ has_header=has_header,
399
+ infer_schema=infer_schema,
400
+ comment_prefix=comment_prefix,
401
+ skip_lines=skip_lines,
402
+ skip_rows_after_header=skip_rows_after_header,
403
+ null_values=null_values,
404
+ )
405
+ )
406
+
407
+ return sources
408
+
409
+
410
+ RE_COMPUTE_ERROR = re.compile(r"at column '(.*?)' \(column number \d+\)")
411
+
412
+
413
+ def handle_compute_error(
414
+ err_msg: str,
415
+ file_format: str | None,
416
+ infer_schema: bool,
417
+ schema_overrides: dict[str, pl.DataType] | None = None,
418
+ ) -> tuple[bool, dict[str, pl.DataType] | None]:
419
+ """Handle ComputeError during schema inference and determine retry strategy.
420
+
421
+ Analyzes the error message and determines whether to retry with schema overrides,
422
+ disable schema inference, or exit with an error.
423
+
424
+ Args:
425
+ err_msg: The error message from the ComputeError exception.
426
+ file_format: The file format being loaded (tsv, csv, etc.).
427
+ infer_schema: Whether schema inference is currently enabled.
428
+ schema_overrides: Current schema overrides, if any.
429
+
430
+ Returns:
431
+ A tuple of (infer_schema, schema_overrides):
432
+
433
+ Raises:
434
+ SystemExit: If the error is unrecoverable.
435
+ """
436
+ # Already disabled schema inference, cannot recover
437
+ if not infer_schema:
438
+ print(f"Error loading with schema inference disabled:\n{err_msg}", file=sys.stderr)
439
+ sys.exit(1)
440
+
441
+ # Schema mismatch error
442
+ if "found more fields than defined in 'Schema'" in err_msg:
443
+ print(f"Input might be malformed:\n{err_msg}", file=sys.stderr)
444
+ sys.exit(1)
445
+
446
+ # ComputeError: could not parse `n.a. as of 04.01.022` as `dtype` i64 at column 'PubChemCID' (column number 16)
447
+ if file_format in ("tsv", "csv") and (m := RE_COMPUTE_ERROR.search(err_msg)):
448
+ col_name = m.group(1)
449
+
450
+ if schema_overrides is None:
451
+ schema_overrides = {}
452
+ schema_overrides.update({col_name: pl.String})
453
+ else:
454
+ infer_schema = False
455
+
456
+ return infer_schema, schema_overrides
457
+
458
+
459
+ def load_stdin(
460
+ stdin_data=None,
461
+ file_format: str | None = None,
462
+ has_header: bool = True,
463
+ infer_schema: bool = True,
464
+ comment_prefix: str | None = None,
465
+ skip_lines: int = 0,
466
+ skip_rows_after_header: int = 0,
467
+ schema_overrides: dict[str, pl.DataType] | None = None,
468
+ null_values: list[str] | None = None,
469
+ ) -> list[tuple[pl.DataFrame, str, str]]:
470
+ """Load DataFrame from stdin.
471
+
472
+ If a ComputeError occurs during schema inference for a column, attempts to recover
473
+ by treating that column as a string and retrying the load. This process repeats until
474
+ all columns are successfully loaded or no further recovery is possible.
475
+
476
+ Args:
477
+ stdin_data: Optional stdin data as string. If None, read from sys.stdin.
478
+ file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
479
+ has_header: Whether the input files have a header row. Defaults to True.
480
+ infer_schema: Whether to infer data types for CSV/TSV files. Defaults to True.
481
+ comment_prefix: Character(s) indicating comment lines in CSV/TSV files. Defaults to None.
482
+ skip_lines: Number of lines to skip when reading CSV/TSV files. Defaults to 0.
483
+ skip_rows_after_header: Number of rows to skip after header. Defaults to 0.
484
+
485
+ Returns:
486
+ List of tuples of (DataFrame, filename, tabname) ready for display.
487
+ """
488
+ import os
489
+ from io import StringIO
490
+
491
+ sources = []
492
+
493
+ # Read from stdin into memory first (stdin is not seekable)
494
+ if stdin_data is None:
495
+ stdin_data = sys.stdin.read()
496
+
497
+ # Reopen stdin to /dev/tty for proper terminal interaction
498
+ try:
499
+ tty = open("/dev/tty")
500
+ os.dup2(tty.fileno(), sys.stdin.fileno())
501
+ except (OSError, FileNotFoundError):
502
+ pass
503
+
504
+ lf = pl.scan_csv(
505
+ StringIO(stdin_data),
506
+ separator="," if file_format == "csv" else "\t",
507
+ has_header=has_header,
508
+ infer_schema=infer_schema,
509
+ comment_prefix=comment_prefix,
510
+ skip_lines=skip_lines,
511
+ skip_rows_after_header=skip_rows_after_header,
512
+ schema_overrides=schema_overrides,
513
+ null_values=null_values,
514
+ )
515
+
516
+ sources = [(lf, f"stdin.{file_format}" if file_format else "stdin", "stdin")]
517
+
518
+ # Attempt to collect, handling ComputeError for schema inference issues
519
+ try:
520
+ sources = [(lf.collect(), fn, tn) for lf, fn, tn in sources]
521
+ except pl.exceptions.ComputeError as ce:
522
+ # Handle the error and determine retry strategy
523
+ infer_schema, schema_overrides = handle_compute_error(str(ce), file_format, infer_schema, schema_overrides)
524
+
525
+ # Retry loading with updated schema overrides
526
+ return load_stdin(
527
+ stdin_data,
528
+ file_format=file_format,
529
+ has_header=has_header,
530
+ infer_schema=infer_schema,
531
+ comment_prefix=comment_prefix,
532
+ skip_lines=skip_lines,
533
+ skip_rows_after_header=skip_rows_after_header,
534
+ schema_overrides=schema_overrides,
535
+ null_values=null_values,
536
+ )
537
+
367
538
  return sources
368
539
 
369
540
 
@@ -373,49 +544,68 @@ def load_file(
373
544
  prefix_sheet: bool = False,
374
545
  file_format: str | None = None,
375
546
  has_header: bool = True,
376
- ) -> list[tuple[pl.LazyFrame, str, str]]:
377
- """Load a single file and return list of sources.
547
+ infer_schema: bool = True,
548
+ comment_prefix: str | None = None,
549
+ skip_lines: int = 0,
550
+ skip_rows_after_header: int = 0,
551
+ schema_overrides: dict[str, pl.DataType] | None = None,
552
+ null_values: list[str] | None = None,
553
+ ) -> list[tuple[pl.DataFrame, str, str]]:
554
+ """Load a single file.
378
555
 
379
556
  For Excel files, when `first_sheet` is True, returns only the first sheet. Otherwise, returns one entry per sheet.
380
557
  For other files or multiple files, returns one entry per file.
381
558
 
559
+ If a ComputeError occurs during schema inference for a column, attempts to recover
560
+ by treating that column as a string and retrying the load. This process repeats until
561
+ all columns are successfully loaded or no further recovery is possible.
562
+
382
563
  Args:
383
564
  filename: Path to file to load.
384
565
  first_sheet: If True, only load first sheet for Excel files. Defaults to False.
385
566
  prefix_sheet: If True, prefix filename to sheet name as the tab name for Excel files. Defaults to False.
386
- file_format: Optional format specifier (i.e., 'csv', 'excel', 'tsv', 'parquet', 'json', 'ndjson') for input files.
567
+ file_format: Optional format specifier (i.e., 'tsv', 'csv', 'excel', 'parquet', 'json', 'ndjson') for input files.
387
568
  By default, infers from file extension.
388
569
  has_header: Whether the input files have a header row. Defaults to True.
570
+ infer_schema: Whether to infer data types for CSV/TSV files. Defaults to True.
571
+ comment_prefix: Character(s) indicating comment lines in CSV/TSV files. Defaults to None.
572
+ skip_lines: Number of lines to skip when reading CSV/TSV files. The header will be parsed at this offset. Defaults to 0.
573
+ skip_rows_after_header: Number of rows to skip after header when reading CSV/TSV files. Defaults to 0.
389
574
 
390
575
  Returns:
391
- List of tuples of (LazyFrame, filename, tabname).
576
+ List of tuples of (DataFrame, filename, tabname).
392
577
  """
393
578
  sources = []
394
579
 
395
580
  if filename == "-":
396
- import os
397
- from io import StringIO
398
-
399
- # Read from stdin into memory first (stdin is not seekable)
400
- stdin_data = sys.stdin.read()
401
- lf = pl.scan_csv(StringIO(stdin_data), has_header=has_header, separator="," if file_format == "csv" else "\t")
402
-
403
- # Reopen stdin to /dev/tty for proper terminal interaction
404
- try:
405
- tty = open("/dev/tty")
406
- os.dup2(tty.fileno(), sys.stdin.fileno())
407
- except (OSError, FileNotFoundError):
408
- pass
409
-
410
- sources.append((lf, f"stdin.{file_format}" if file_format else "stdin", "stdin"))
411
- return sources
581
+ return load_stdin(
582
+ file_format=file_format,
583
+ has_header=has_header,
584
+ infer_schema=infer_schema,
585
+ comment_prefix=comment_prefix,
586
+ skip_lines=skip_lines,
587
+ skip_rows_after_header=skip_rows_after_header,
588
+ schema_overrides=schema_overrides,
589
+ null_values=null_values,
590
+ )
412
591
 
413
592
  filepath = Path(filename)
414
593
 
415
- if file_format == "csv":
416
- lf = pl.scan_csv(filename, has_header=has_header)
594
+ # Load based on file format
595
+ if file_format in ("tsv", "csv"):
596
+ lf = pl.scan_csv(
597
+ filename,
598
+ separator="\t" if file_format == "tsv" else ",",
599
+ has_header=has_header,
600
+ infer_schema=infer_schema,
601
+ comment_prefix=comment_prefix,
602
+ skip_lines=skip_lines,
603
+ skip_rows_after_header=skip_rows_after_header,
604
+ schema_overrides=schema_overrides,
605
+ null_values=null_values,
606
+ )
417
607
  sources.append((lf, filename, filepath.stem))
418
- elif file_format == "excel":
608
+ elif file_format in ("xlsx", "xls", "excel"):
419
609
  if first_sheet:
420
610
  # Read only the first sheet for multiple files
421
611
  lf = pl.read_excel(filename).lazy()
@@ -426,36 +616,54 @@ def load_file(
426
616
  for sheet_name, df in sheets.items():
427
617
  tabname = f"{filepath.stem}_{sheet_name}" if prefix_sheet else sheet_name
428
618
  sources.append((df.lazy(), filename, tabname))
429
- elif file_format == "tsv":
430
- lf = pl.scan_csv(filename, has_header=has_header, separator="\t")
431
- sources.append((lf, filename, filepath.stem))
432
619
  elif file_format == "parquet":
433
620
  lf = pl.scan_parquet(filename)
434
621
  sources.append((lf, filename, filepath.stem))
435
622
  elif file_format == "json":
436
- df = pl.read_json(filename)
437
- sources.append((df, filename, filepath.stem))
623
+ lf = pl.read_json(filename).lazy()
624
+ sources.append((lf, filename, filepath.stem))
438
625
  elif file_format == "ndjson":
439
- lf = pl.scan_ndjson(filename)
626
+ lf = pl.scan_ndjson(filename, schema_overrides=schema_overrides)
440
627
  sources.append((lf, filename, filepath.stem))
441
628
  else:
442
- ext = filepath.suffix.lower()
443
- if ext == ".csv":
444
- file_format = "csv"
445
- elif ext in (".xlsx", ".xls"):
446
- file_format = "excel"
447
- elif ext in (".tsv", ".tab"):
448
- file_format = "tsv"
449
- elif ext == ".parquet":
450
- file_format = "parquet"
451
- elif ext == ".json":
452
- file_format = "json"
453
- elif ext == ".ndjson":
454
- file_format = "ndjson"
455
- else:
456
- # Default to TSV
457
- file_format = "tsv"
629
+ raise ValueError(f"Unsupported file format: {file_format}. Supported formats are: {SUPPORTED_FORMATS}")
458
630
 
459
- sources.extend(load_file(filename, first_sheet, prefix_sheet, file_format, has_header))
631
+ # Attempt to collect, handling ComputeError for schema inference issues
632
+ try:
633
+ sources = [(lf.collect(), fn, tn) for lf, fn, tn in sources]
634
+ except pl.exceptions.ComputeError as ce:
635
+ # Handle the error and determine retry strategy
636
+ infer_schema, schema_overrides = handle_compute_error(str(ce), file_format, infer_schema, schema_overrides)
637
+
638
+ # Retry loading with updated schema overrides
639
+ return load_file(
640
+ filename,
641
+ file_format=file_format,
642
+ has_header=has_header,
643
+ infer_schema=infer_schema,
644
+ comment_prefix=comment_prefix,
645
+ skip_lines=skip_lines,
646
+ skip_rows_after_header=skip_rows_after_header,
647
+ schema_overrides=schema_overrides,
648
+ null_values=null_values,
649
+ )
460
650
 
461
651
  return sources
652
+
653
+
654
+ def now() -> str:
655
+ """Get the current local time as a formatted string."""
656
+ import time
657
+
658
+ return time.strftime("%m/%d/%Y %H:%M:%S", time.localtime())
659
+
660
+
661
+ async def sleep_async(seconds: float) -> None:
662
+ """Async sleep to yield control back to the event loop.
663
+
664
+ Args:
665
+ seconds: The number of seconds to sleep.
666
+ """
667
+ import asyncio
668
+
669
+ await asyncio.sleep(seconds)
@@ -79,10 +79,12 @@ class DataFrameHelpPanel(Widget):
79
79
  None
80
80
  """
81
81
 
82
- def update_help(focused_widget: Widget | None):
83
- self.update_help(focused_widget)
82
+ # def update_help(focused_widget: Widget | None):
83
+ # self.update_help(focused_widget)
84
84
 
85
- self.watch(self.screen, "focused", update_help)
85
+ # self.watch(self.screen, "focused", update_help)
86
+
87
+ self.update_help(self.screen.focused)
86
88
 
87
89
  def update_help(self, focused_widget: Widget | None) -> None:
88
90
  """Update the help for the focused widget.
@@ -96,7 +98,7 @@ class DataFrameHelpPanel(Widget):
96
98
  return
97
99
  self.set_class(focused_widget is not None, "-show-help")
98
100
  if focused_widget is not None:
99
- help = self.app.HELP + "\n" + focused_widget.HELP or ""
101
+ help = (self.app.HELP or "") + "\n" + (focused_widget.HELP or "")
100
102
  if not help:
101
103
  self.remove_class("-show-help")
102
104
  try: