dataframe-textual 2.4.2__py3-none-any.whl → 2.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -51,15 +51,34 @@ def cli() -> argparse.Namespace:
51
51
  )
52
52
  parser.add_argument("-E", "--ignore-errors", action="store_true", help="Ignore errors when reading CSV/TSV")
53
53
  parser.add_argument(
54
- "-c", "--comment-prefix", nargs="?", const="#", help="Comment lines are skipped when reading CSV/TSV"
54
+ "-c",
55
+ "--comment-prefix",
56
+ metavar="PREFIX",
57
+ nargs="?",
58
+ const="#",
59
+ help="Comment lines starting with `PREFIX` are skipped when reading CSV/TSV",
55
60
  )
56
61
  parser.add_argument(
57
- "-q", "--quote-char", nargs="?", const=None, default='"', help="Quote character for reading CSV/TSV"
62
+ "-q",
63
+ "--quote-char",
64
+ metavar="C",
65
+ nargs="?",
66
+ const=None,
67
+ default='"',
68
+ help="Use `C` as quote character for reading CSV/TSV",
58
69
  )
59
- parser.add_argument("-l", "--skip-lines", type=int, default=0, help="Skip lines when reading CSV/TSV")
60
70
  parser.add_argument(
61
- "-a", "--skip-rows-after-header", type=int, default=0, help="Skip rows after header when reading CSV/TSV"
71
+ "-L", "--skip-lines", metavar="N", type=int, default=0, help="Skip first N lines when reading CSV/TSV"
62
72
  )
73
+ parser.add_argument(
74
+ "-A",
75
+ "--skip-rows-after-header",
76
+ metavar="N",
77
+ type=int,
78
+ default=0,
79
+ help="Skip N rows after header when reading CSV/TSV",
80
+ )
81
+ parser.add_argument("-N", "--n-rows", metavar="N", type=int, help="Stop after reading N rows from CSV/TSV")
63
82
  parser.add_argument("-n", "--null", nargs="+", help="Values to interpret as null values when reading CSV/TSV")
64
83
 
65
84
  args = parser.parse_args()
@@ -98,6 +117,7 @@ def main() -> None:
98
117
  null_values=args.null,
99
118
  ignore_errors=args.ignore_errors,
100
119
  truncate_ragged_lines=args.truncate_ragged_lines,
120
+ n_rows=args.n_rows,
101
121
  )
102
122
  app = DataFrameViewer(*sources)
103
123
  app.run()
@@ -12,7 +12,7 @@ import polars as pl
12
12
  from rich.text import Text
13
13
 
14
14
  # Supported file formats
15
- SUPPORTED_FORMATS = ["tsv", "csv", "psv", "excel", "parquet", "json", "ndjson"]
15
+ SUPPORTED_FORMATS = ["tsv", "csv", "psv", "xlsx", "xls", "parquet", "json", "ndjson"]
16
16
 
17
17
 
18
18
  # Boolean string mappings
@@ -485,6 +485,7 @@ def load_dataframe(
485
485
  null_values: list[str] | None = None,
486
486
  ignore_errors: bool = False,
487
487
  truncate_ragged_lines: bool = False,
488
+ n_rows: int | None = None,
488
489
  ) -> list[Source]:
489
490
  """Load DataFrames from file specifications.
490
491
 
@@ -502,6 +503,8 @@ def load_dataframe(
502
503
  skip_rows_after_header: Number of rows to skip after header. Defaults to 0.
503
504
  null_values: List of values to interpret as null when reading CSV/TSV files. Defaults to None.
504
505
  ignore_errors: Whether to ignore errors when reading CSV/TSV files. Defaults to False.
506
+ truncate_ragged_lines: Whether to truncate ragged lines when reading CSV/TSV files. Defaults to False.
507
+ n_rows: Number of rows to read from CSV/TSV files. Defaults to None (read all rows).
505
508
 
506
509
  Returns:
507
510
  List of `Source` objects.
@@ -531,8 +534,6 @@ def load_dataframe(
531
534
  ext = Path(filename).with_suffix("").suffix.lower()
532
535
 
533
536
  fmt = ext.removeprefix(".")
534
- if fmt in ("xls", "xlsx"):
535
- fmt = "excel"
536
537
 
537
538
  # Default to TSV
538
539
  if not fmt or fmt not in SUPPORTED_FORMATS:
@@ -553,6 +554,7 @@ def load_dataframe(
553
554
  null_values=null_values,
554
555
  ignore_errors=ignore_errors,
555
556
  truncate_ragged_lines=truncate_ragged_lines,
557
+ n_rows=n_rows,
556
558
  )
557
559
  )
558
560
 
@@ -637,6 +639,7 @@ def load_file(
637
639
  null_values: list[str] | None = None,
638
640
  ignore_errors: bool = False,
639
641
  truncate_ragged_lines: bool = False,
642
+ n_rows: int | None = None,
640
643
  ) -> list[Source]:
641
644
  """Load a single file.
642
645
 
@@ -662,6 +665,8 @@ def load_file(
662
665
  schema_overrides: Optional dictionary of column name to Polars data type to override inferred schema.
663
666
  null_values: List of values to interpret as null when reading CSV/TSV files. Defaults to None.
664
667
  ignore_errors: Whether to ignore errors when reading CSV/TSV files.
668
+ truncate_ragged_lines: Whether to truncate ragged lines when reading CSV/TSV files. Defaults to False.
669
+ n_rows: Number of rows to read from CSV/TSV files. Defaults to None (read all rows).
665
670
 
666
671
  Returns:
667
672
  List of `Source` objects.
@@ -686,9 +691,10 @@ def load_file(
686
691
  null_values=null_values,
687
692
  ignore_errors=ignore_errors,
688
693
  truncate_ragged_lines=truncate_ragged_lines,
694
+ n_rows=n_rows,
689
695
  )
690
696
  data.append(Source(lf, filename, filepath.stem))
691
- elif file_format == "excel":
697
+ elif file_format in ("xlsx", "xls"):
692
698
  if first_sheet:
693
699
  # Read only the first sheet for multiple files
694
700
  lf = pl.read_excel(source).lazy()
@@ -742,6 +748,8 @@ def load_file(
742
748
  schema_overrides=schema_overrides,
743
749
  null_values=null_values,
744
750
  ignore_errors=ignore_errors,
751
+ truncate_ragged_lines=truncate_ragged_lines,
752
+ n_rows=n_rows,
745
753
  )
746
754
 
747
755
  return data