dataframe-textual 1.5.0__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -50,7 +50,7 @@ def cli() -> argparse.Namespace:
50
50
  parser.add_argument(
51
51
  "-a", "--skip-rows-after-header", type=int, default=0, help="Skip rows after header when reading CSV/TSV"
52
52
  )
53
- parser.add_argument("-u", "--null", nargs="+", help="Values to interpret as null values when reading CSV/TSV")
53
+ parser.add_argument("-n", "--null", nargs="+", help="Values to interpret as null values when reading CSV/TSV")
54
54
 
55
55
  args = parser.parse_args()
56
56
  if args.files is None:
@@ -480,23 +480,24 @@ def load_dataframe(
480
480
  else:
481
481
  source = filename
482
482
 
483
- # Load from file
484
- # Determine file format if not specified
485
- if not file_format:
483
+ # If not specified, determine file format (may be different for each file)
484
+ fmt = file_format
485
+ if not fmt:
486
486
  ext = Path(filename).suffix.lower()
487
- if ext == ".gz" or ext == ".bz2" or ext == ".xz":
487
+ if ext == ".gz":
488
488
  ext = Path(filename).with_suffix("").suffix.lower()
489
489
  fmt = ext.removeprefix(".")
490
490
 
491
491
  # Default to TSV
492
- file_format = fmt if fmt in SUPPORTED_FORMATS else "tsv"
492
+ if not fmt or fmt not in SUPPORTED_FORMATS:
493
+ fmt = "tsv"
493
494
 
494
495
  # Load the file
495
496
  data.extend(
496
497
  load_file(
497
498
  source,
498
499
  prefix_sheet=prefix_sheet,
499
- file_format=file_format,
500
+ file_format=fmt,
500
501
  has_header=has_header,
501
502
  infer_schema=infer_schema,
502
503
  comment_prefix=comment_prefix,
@@ -611,11 +612,18 @@ def load_file(
611
612
  List of `Source` objects.
612
613
  """
613
614
  data: list[Source] = []
615
+
614
616
  filename = f"stdin.{file_format}" if isinstance(source, StringIO) else source
615
617
  filepath = Path(filename)
616
618
 
619
+ if not file_format:
620
+ ext = filepath.suffix.lower()
621
+ if ext == ".gz":
622
+ ext = Path(filename).with_suffix("").suffix.lower()
623
+ file_format = ext.removeprefix(".")
624
+
617
625
  # Load based on file format
618
- if file_format in ("tsv", "csv"):
626
+ if file_format in ("csv", "tsv"):
619
627
  lf = pl.scan_csv(
620
628
  source,
621
629
  separator="\t" if file_format == "tsv" else ",",