dataframe-textual 1.4.0__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,14 +39,18 @@ def cli() -> argparse.Namespace:
39
39
  parser.add_argument(
40
40
  "-I", "--no-inferrence", action="store_true", help="Do not infer data types when reading CSV/TSV"
41
41
  )
42
+ parser.add_argument("-E", "--ignore-errors", action="store_true", help="Ignore errors when reading CSV/TSV")
42
43
  parser.add_argument(
43
- "-C", "--comment-prefix", nargs="?", const="#", help="Comment lines are skipped when reading CSV/TSV"
44
+ "-c", "--comment-prefix", nargs="?", const="#", help="Comment lines are skipped when reading CSV/TSV"
44
45
  )
45
- parser.add_argument("-L", "--skip-lines", type=int, default=0, help="Skip lines when reading CSV/TSV")
46
46
  parser.add_argument(
47
- "-K", "--skip-rows-after-header", type=int, default=0, help="Skip rows after header when reading CSV/TSV"
47
+ "-q", "--quote-char", nargs="?", const=None, default='"', help="Quote character for reading CSV/TSV"
48
48
  )
49
- parser.add_argument("-U", "--null", nargs="+", help="Values to interpret as null values when reading CSV/TSV")
49
+ parser.add_argument("-l", "--skip-lines", type=int, default=0, help="Skip lines when reading CSV/TSV")
50
+ parser.add_argument(
51
+ "-a", "--skip-rows-after-header", type=int, default=0, help="Skip rows after header when reading CSV/TSV"
52
+ )
53
+ parser.add_argument("-n", "--null", nargs="+", help="Values to interpret as null values when reading CSV/TSV")
50
54
 
51
55
  args = parser.parse_args()
52
56
  if args.files is None:
@@ -78,9 +82,11 @@ def main() -> None:
78
82
  has_header=not args.no_header,
79
83
  infer_schema=not args.no_inferrence,
80
84
  comment_prefix=args.comment_prefix,
85
+ quote_char=args.quote_char,
81
86
  skip_lines=args.skip_lines,
82
87
  skip_rows_after_header=args.skip_rows_after_header,
83
88
  null_values=args.null,
89
+ ignore_errors=args.ignore_errors,
84
90
  )
85
91
  app = DataFrameViewer(*sources)
86
92
  app.run()
@@ -1,8 +1,10 @@
1
1
  """Common utilities and constants for dataframe_viewer."""
2
2
 
3
+ import os
3
4
  import re
4
5
  import sys
5
6
  from dataclasses import dataclass
7
+ from io import StringIO
6
8
  from pathlib import Path
7
9
  from typing import Any
8
10
 
@@ -34,6 +36,16 @@ NULL_DISPLAY = "-"
34
36
 
35
37
  @dataclass
36
38
  class DtypeClass:
39
+ """Data type class configuration.
40
+
41
+ Attributes:
42
+ gtype: Generic, high-level type as a string.
43
+ style: Style string for display purposes.
44
+ justify: Text justification for display.
45
+ itype: Input type for validation.
46
+ convert: Conversion function for the data type.
47
+ """
48
+
37
49
  gtype: str # generic, high-level type
38
50
  style: str
39
51
  justify: str
@@ -71,7 +83,6 @@ STYLES = {
71
83
  }
72
84
  # fmt: on
73
85
 
74
-
75
86
  # Subscript digits mapping for sort indicators
76
87
  SUBSCRIPT_DIGITS = {
77
88
  0: "₀",
@@ -93,6 +104,21 @@ CURSOR_TYPES = ["row", "column", "cell"]
93
104
  RIDX = "^_ridx_^"
94
105
 
95
106
 
107
+ @dataclass
108
+ class Source:
109
+ """Data source representation.
110
+
111
+ Attributes:
112
+ frame: The Polars DataFrame or LazyFrame.
113
+ filename: The name of the source file.
114
+ tabname: The name of the tab to display.
115
+ """
116
+
117
+ frame: pl.DataFrame | pl.LazyFrame
118
+ filename: str
119
+ tabname: str
120
+
121
+
96
122
  def DtypeConfig(dtype: pl.DataType) -> DtypeClass:
97
123
  """Get the DtypeClass configuration for a given Polars data type.
98
124
 
@@ -222,14 +248,91 @@ def get_next_item(lst: list[Any], current, offset=1) -> Any:
222
248
  return lst[next_index]
223
249
 
224
250
 
225
- def parse_polars_expression(expression: str, columns: list[str], current_col_idx: int) -> str:
251
+ def parse_placeholders(template: str, columns: list[str], current_cidx: int) -> list[str | pl.Expr]:
252
+ """Parse template string into a list of strings or Polars expressions
253
+
254
+ Supports multiple placeholder types:
255
+ - `$_` - Current column (based on current_cidx parameter)
256
+ - `$#` - Row index (1-based, requires '^__ridx__^' column to be present)
257
+ - `$1`, `$2`, etc. - Column index (1-based)
258
+ - `$name` - Column name (e.g., `$product_id`)
259
+
260
+ Args:
261
+ template: The template string containing placeholders and literal text
262
+ columns: List of column names in the dataframe
263
+ current_cidx: 0-based index of the current column for `$_` references in the columns list
264
+
265
+ Returns:
266
+ A list of strings (literal text) and Polars expressions (for column references)
267
+
268
+ Raises:
269
+ ValueError: If invalid column index or non-existent column name is referenced
270
+ """
271
+ if "$" not in template or template.endswith("$"):
272
+ return [template]
273
+
274
+ # Regex matches: $_ or $\d+ or $\w+ (column names)
275
+ placeholder_pattern = r"\$(_|#|\d+|[a-zA-Z_]\w*)"
276
+ placeholders = re.finditer(placeholder_pattern, template)
277
+
278
+ parts = []
279
+ last_end = 0
280
+
281
+ # Get current column name for $_ references
282
+ try:
283
+ col_name = columns[current_cidx]
284
+ except IndexError:
285
+ raise ValueError(f"Current column index {current_cidx} is out of range for columns list")
286
+
287
+ for match in placeholders:
288
+ # Add literal text before this placeholder
289
+ if match.start() > last_end:
290
+ parts.append(template[last_end : match.start()])
291
+
292
+ placeholder = match.group(1) # Extract content after '$'
293
+
294
+ if placeholder == "_":
295
+ # $_ refers to current column (where cursor was)
296
+ parts.append(pl.col(col_name))
297
+ elif placeholder == "#":
298
+ # $# refers to row index (1-based)
299
+ parts.append((pl.col(RIDX)))
300
+ elif placeholder.isdigit():
301
+ # $1, $2, etc. refer to columns by 1-based position index
302
+ col_idx = int(placeholder) - 1 # Convert to 0-based
303
+ try:
304
+ col_ref = columns[col_idx]
305
+ parts.append(pl.col(col_ref))
306
+ except IndexError:
307
+ raise ValueError(f"Invalid column index: ${placeholder} (valid range: $1 to ${len(columns)})")
308
+ else:
309
+ # $name refers to column by name
310
+ if placeholder in columns:
311
+ parts.append(pl.col(placeholder))
312
+ else:
313
+ raise ValueError(f"Column not found: ${placeholder} (available columns: {', '.join(columns)})")
314
+
315
+ last_end = match.end()
316
+
317
+ # Add remaining literal text after last placeholder
318
+ if last_end < len(template):
319
+ parts.append(template[last_end:])
320
+
321
+ # If no placeholders found, treat entire template as literal
322
+ if not parts:
323
+ parts = [template]
324
+
325
+ return parts
326
+
327
+
328
+ def parse_polars_expression(expression: str, columns: list[str], current_cidx: int) -> str:
226
329
  """Parse and convert an expression to Polars syntax.
227
330
 
228
331
  Replaces column references with Polars col() expressions:
229
332
  - $_ - Current selected column
230
333
  - $# - Row index (1-based, requires '^__ridx__^' column to be present)
231
- - $1, $2, etc. - Column by 1-based index
232
- - $col_name - Column by name (valid identifier starting with _ or letter)
334
+ - $1, $2, etc. - Column index (1-based)
335
+ - $col_name - Column name (valid identifier starting with _ or letter)
233
336
 
234
337
  Examples:
235
338
  - "$_ > 50" -> "pl.col('current_col') > 50"
@@ -241,7 +344,7 @@ def parse_polars_expression(expression: str, columns: list[str], current_col_idx
241
344
  Args:
242
345
  expression: The input expression as a string.
243
346
  columns: The list of column names in the DataFrame.
244
- current_col_idx: The index of the currently selected column (0-based). Used for $_ reference.
347
+ current_cidx: The index of the currently selected column (0-based). Used for $_ reference.
245
348
 
246
349
  Returns:
247
350
  A Python expression string with $references replaced by pl.col() calls.
@@ -258,38 +361,18 @@ def parse_polars_expression(expression: str, columns: list[str], current_col_idx
258
361
  # Return as a literal string
259
362
  return f"pl.lit({expression})"
260
363
 
261
- # Pattern to match $ followed by either:
262
- # - _ (single underscore)
263
- # - # (hash for row index)
264
- # - digits (integer)
265
- # - identifier (starts with letter or _, followed by letter/digit/_)
266
- pattern = r"\$(_|#|\d+|[a-zA-Z_]\w*)"
267
-
268
- def replace_column_ref(match):
269
- col_ref = match.group(1)
270
-
271
- if col_ref == "_":
272
- # Current selected column
273
- col_name = columns[current_col_idx]
274
- elif col_ref == "#":
275
- # RIDX is used to store 0-based row index; add 1 for 1-based index
276
- return f"(pl.col('{RIDX}') + 1)"
277
- elif col_ref.isdigit():
278
- # Column by 1-based index
279
- col_idx = int(col_ref) - 1
280
- if col_idx < 0 or col_idx >= len(columns):
281
- raise ValueError(f"Column index out of range: ${col_ref}")
282
- col_name = columns[col_idx]
283
- else:
284
- # Column by name
285
- if col_ref not in columns:
286
- raise ValueError(f"Column not found: ${col_ref}")
287
- col_name = col_ref
364
+ parts = parse_placeholders(expression, columns, current_cidx)
288
365
 
289
- return f"pl.col('{col_name}')"
366
+ result = []
367
+ for part in parts:
368
+ if isinstance(part, pl.Expr):
369
+ col = part.meta.output_name()
290
370
 
291
- result = re.sub(pattern, replace_column_ref, expression)
292
- return result
371
+ result.append(f"pl.col('{col}')")
372
+ else:
373
+ result.append(part)
374
+
375
+ return "".join(result)
293
376
 
294
377
 
295
378
  def tentative_expr(term: str) -> bool:
@@ -354,10 +437,12 @@ def load_dataframe(
354
437
  has_header: bool = True,
355
438
  infer_schema: bool = True,
356
439
  comment_prefix: str | None = None,
440
+ quote_char: str | None = '"',
357
441
  skip_lines: int = 0,
358
442
  skip_rows_after_header: int = 0,
359
443
  null_values: list[str] | None = None,
360
- ) -> list[tuple[pl.DataFrame, str, str]]:
444
+ ignore_errors: bool = False,
445
+ ) -> list[Source]:
361
446
  """Load DataFrames from file specifications.
362
447
 
363
448
  Handles loading from multiple files, single files, or stdin. For Excel files,
@@ -369,42 +454,62 @@ def load_dataframe(
369
454
  has_header: Whether the input files have a header row. Defaults to True.
370
455
  infer_schema: Whether to infer data types for CSV/TSV files. Defaults to True.
371
456
  comment_prefix: Character(s) indicating comment lines in CSV/TSV files. Defaults to None.
457
+ quote_char: Quote character for reading CSV/TSV files. Defaults to '"'.
372
458
  skip_lines: Number of lines to skip when reading CSV/TSV files. Defaults to 0.
373
459
  skip_rows_after_header: Number of rows to skip after header. Defaults to 0.
460
+ null_values: List of values to interpret as null when reading CSV/TSV files. Defaults to None.
461
+ ignore_errors: Whether to ignore errors when reading CSV/TSV files. Defaults to False.
374
462
 
375
463
  Returns:
376
- List of tuples of (DataFrame, filename, tabname) ready for display.
464
+ List of `Source` objects.
377
465
  """
378
- sources = []
466
+ data: list[Source] = []
379
467
  prefix_sheet = len(filenames) > 1
380
468
 
381
469
  for filename in filenames:
382
- # Determine file format if not specified
383
- if not file_format:
470
+ if filename == "-":
471
+ source = StringIO(sys.stdin.read())
472
+ file_format = file_format or "tsv"
473
+
474
+ # Reopen stdin to /dev/tty for proper terminal interaction
475
+ try:
476
+ tty = open("/dev/tty")
477
+ os.dup2(tty.fileno(), sys.stdin.fileno())
478
+ except (OSError, FileNotFoundError):
479
+ pass
480
+ else:
481
+ source = filename
482
+
483
+ # If not specified, determine file format (may be different for each file)
484
+ fmt = file_format
485
+ if not fmt:
384
486
  ext = Path(filename).suffix.lower()
385
- if ext == ".gz" or ext == ".bz2" or ext == ".xz":
487
+ if ext == ".gz":
386
488
  ext = Path(filename).with_suffix("").suffix.lower()
387
489
  fmt = ext.removeprefix(".")
388
490
 
389
491
  # Default to TSV
390
- file_format = fmt if fmt in SUPPORTED_FORMATS else "tsv"
492
+ if not fmt or fmt not in SUPPORTED_FORMATS:
493
+ fmt = "tsv"
391
494
 
392
- # Load each file
393
- sources.extend(
495
+ # Load the file
496
+ data.extend(
394
497
  load_file(
395
- filename,
498
+ source,
396
499
  prefix_sheet=prefix_sheet,
397
- file_format=file_format,
500
+ file_format=fmt,
398
501
  has_header=has_header,
399
502
  infer_schema=infer_schema,
400
503
  comment_prefix=comment_prefix,
504
+ quote_char=quote_char,
401
505
  skip_lines=skip_lines,
402
506
  skip_rows_after_header=skip_rows_after_header,
403
507
  null_values=null_values,
508
+ ignore_errors=ignore_errors,
404
509
  )
405
510
  )
406
511
 
407
- return sources
512
+ return data
408
513
 
409
514
 
410
515
  RE_COMPUTE_ERROR = re.compile(r"at column '(.*?)' \(column number \d+\)")
@@ -435,12 +540,19 @@ def handle_compute_error(
435
540
  """
436
541
  # Already disabled schema inference, cannot recover
437
542
  if not infer_schema:
438
- print(f"Error loading with schema inference disabled:\n{err_msg}", file=sys.stderr)
543
+ print(f"Error loading even with schema inference disabled:\n{err_msg}", file=sys.stderr)
544
+
545
+ if "CSV malformed" in err_msg:
546
+ print(
547
+ "\nSometimes quote characters might be mismatched. Try again with `-q` or `-E` to ignore errors",
548
+ file=sys.stderr,
549
+ )
550
+
439
551
  sys.exit(1)
440
552
 
441
553
  # Schema mismatch error
442
554
  if "found more fields than defined in 'Schema'" in err_msg:
443
- print(f"Input might be malformed:\n{err_msg}", file=sys.stderr)
555
+ print(f"Input might be malformed:\n{err_msg}.\nTry again with `-E` to ignore errors", file=sys.stderr)
444
556
  sys.exit(1)
445
557
 
446
558
  # ComputeError: could not parse `n.a. as of 04.01.022` as `dtype` i64 at column 'PubChemCID' (column number 16)
@@ -456,101 +568,21 @@ def handle_compute_error(
456
568
  return infer_schema, schema_overrides
457
569
 
458
570
 
459
- def load_stdin(
460
- stdin_data=None,
461
- file_format: str | None = None,
462
- has_header: bool = True,
463
- infer_schema: bool = True,
464
- comment_prefix: str | None = None,
465
- skip_lines: int = 0,
466
- skip_rows_after_header: int = 0,
467
- schema_overrides: dict[str, pl.DataType] | None = None,
468
- null_values: list[str] | None = None,
469
- ) -> list[tuple[pl.DataFrame, str, str]]:
470
- """Load DataFrame from stdin.
471
-
472
- If a ComputeError occurs during schema inference for a column, attempts to recover
473
- by treating that column as a string and retrying the load. This process repeats until
474
- all columns are successfully loaded or no further recovery is possible.
475
-
476
- Args:
477
- stdin_data: Optional stdin data as string. If None, read from sys.stdin.
478
- file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
479
- has_header: Whether the input files have a header row. Defaults to True.
480
- infer_schema: Whether to infer data types for CSV/TSV files. Defaults to True.
481
- comment_prefix: Character(s) indicating comment lines in CSV/TSV files. Defaults to None.
482
- skip_lines: Number of lines to skip when reading CSV/TSV files. Defaults to 0.
483
- skip_rows_after_header: Number of rows to skip after header. Defaults to 0.
484
-
485
- Returns:
486
- List of tuples of (DataFrame, filename, tabname) ready for display.
487
- """
488
- import os
489
- from io import StringIO
490
-
491
- sources = []
492
-
493
- # Read from stdin into memory first (stdin is not seekable)
494
- if stdin_data is None:
495
- stdin_data = sys.stdin.read()
496
-
497
- # Reopen stdin to /dev/tty for proper terminal interaction
498
- try:
499
- tty = open("/dev/tty")
500
- os.dup2(tty.fileno(), sys.stdin.fileno())
501
- except (OSError, FileNotFoundError):
502
- pass
503
-
504
- lf = pl.scan_csv(
505
- StringIO(stdin_data),
506
- separator="," if file_format == "csv" else "\t",
507
- has_header=has_header,
508
- infer_schema=infer_schema,
509
- comment_prefix=comment_prefix,
510
- skip_lines=skip_lines,
511
- skip_rows_after_header=skip_rows_after_header,
512
- schema_overrides=schema_overrides,
513
- null_values=null_values,
514
- )
515
-
516
- sources = [(lf, f"stdin.{file_format}" if file_format else "stdin", "stdin")]
517
-
518
- # Attempt to collect, handling ComputeError for schema inference issues
519
- try:
520
- sources = [(lf.collect(), fn, tn) for lf, fn, tn in sources]
521
- except pl.exceptions.ComputeError as ce:
522
- # Handle the error and determine retry strategy
523
- infer_schema, schema_overrides = handle_compute_error(str(ce), file_format, infer_schema, schema_overrides)
524
-
525
- # Retry loading with updated schema overrides
526
- return load_stdin(
527
- stdin_data,
528
- file_format=file_format,
529
- has_header=has_header,
530
- infer_schema=infer_schema,
531
- comment_prefix=comment_prefix,
532
- skip_lines=skip_lines,
533
- skip_rows_after_header=skip_rows_after_header,
534
- schema_overrides=schema_overrides,
535
- null_values=null_values,
536
- )
537
-
538
- return sources
539
-
540
-
541
571
  def load_file(
542
- filename: str,
572
+ source: str | StringIO,
543
573
  first_sheet: bool = False,
544
574
  prefix_sheet: bool = False,
545
575
  file_format: str | None = None,
546
576
  has_header: bool = True,
547
577
  infer_schema: bool = True,
548
578
  comment_prefix: str | None = None,
579
+ quote_char: str | None = '"',
549
580
  skip_lines: int = 0,
550
581
  skip_rows_after_header: int = 0,
551
582
  schema_overrides: dict[str, pl.DataType] | None = None,
552
583
  null_values: list[str] | None = None,
553
- ) -> list[tuple[pl.DataFrame, str, str]]:
584
+ ignore_errors: bool = False,
585
+ ) -> list[Source]:
554
586
  """Load a single file.
555
587
 
556
588
  For Excel files, when `first_sheet` is True, returns only the first sheet. Otherwise, returns one entry per sheet.
@@ -569,86 +601,92 @@ def load_file(
569
601
  has_header: Whether the input files have a header row. Defaults to True.
570
602
  infer_schema: Whether to infer data types for CSV/TSV files. Defaults to True.
571
603
  comment_prefix: Character(s) indicating comment lines in CSV/TSV files. Defaults to None.
604
+ quote_char: Quote character for reading CSV/TSV files. Defaults to '"'.
572
605
  skip_lines: Number of lines to skip when reading CSV/TSV files. The header will be parsed at this offset. Defaults to 0.
573
606
  skip_rows_after_header: Number of rows to skip after header when reading CSV/TSV files. Defaults to 0.
607
+ schema_overrides: Optional dictionary of column name to Polars data type to override inferred schema.
608
+ null_values: List of values to interpret as null when reading CSV/TSV files. Defaults to None.
609
+ ignore_errors: Whether to ignore errors when reading CSV/TSV files.
574
610
 
575
611
  Returns:
576
- List of tuples of (DataFrame, filename, tabname).
612
+ List of `Source` objects.
577
613
  """
578
- sources = []
579
-
580
- if filename == "-":
581
- return load_stdin(
582
- file_format=file_format,
583
- has_header=has_header,
584
- infer_schema=infer_schema,
585
- comment_prefix=comment_prefix,
586
- skip_lines=skip_lines,
587
- skip_rows_after_header=skip_rows_after_header,
588
- schema_overrides=schema_overrides,
589
- null_values=null_values,
590
- )
614
+ data: list[Source] = []
591
615
 
616
+ filename = f"stdin.{file_format}" if isinstance(source, StringIO) else source
592
617
  filepath = Path(filename)
593
618
 
619
+ if not file_format:
620
+ ext = filepath.suffix.lower()
621
+ if ext == ".gz":
622
+ ext = Path(filename).with_suffix("").suffix.lower()
623
+ file_format = ext.removeprefix(".")
624
+
594
625
  # Load based on file format
595
- if file_format in ("tsv", "csv"):
626
+ if file_format in ("csv", "tsv"):
596
627
  lf = pl.scan_csv(
597
- filename,
628
+ source,
598
629
  separator="\t" if file_format == "tsv" else ",",
599
630
  has_header=has_header,
600
631
  infer_schema=infer_schema,
601
632
  comment_prefix=comment_prefix,
633
+ quote_char=quote_char,
602
634
  skip_lines=skip_lines,
603
635
  skip_rows_after_header=skip_rows_after_header,
604
636
  schema_overrides=schema_overrides,
605
637
  null_values=null_values,
638
+ ignore_errors=ignore_errors,
606
639
  )
607
- sources.append((lf, filename, filepath.stem))
640
+ data.append(Source(lf, filename, filepath.stem))
608
641
  elif file_format in ("xlsx", "xls", "excel"):
609
642
  if first_sheet:
610
643
  # Read only the first sheet for multiple files
611
- lf = pl.read_excel(filename).lazy()
612
- sources.append((lf, filename, filepath.stem))
644
+ lf = pl.read_excel(source).lazy()
645
+ data.append(Source(lf, filename, filepath.stem))
613
646
  else:
614
647
  # For single file, expand all sheets
615
- sheets = pl.read_excel(filename, sheet_id=0)
648
+ sheets = pl.read_excel(source, sheet_id=0)
616
649
  for sheet_name, df in sheets.items():
617
650
  tabname = f"{filepath.stem}_{sheet_name}" if prefix_sheet else sheet_name
618
- sources.append((df.lazy(), filename, tabname))
651
+ data.append(Source(df.lazy(), filename, tabname))
619
652
  elif file_format == "parquet":
620
- lf = pl.scan_parquet(filename)
621
- sources.append((lf, filename, filepath.stem))
653
+ lf = pl.scan_parquet(source)
654
+ data.append(Source(lf, filename, filepath.stem))
622
655
  elif file_format == "json":
623
- lf = pl.read_json(filename).lazy()
624
- sources.append((lf, filename, filepath.stem))
656
+ lf = pl.read_json(source).lazy()
657
+ data.append(Source(lf, filename, filepath.stem))
625
658
  elif file_format == "ndjson":
626
- lf = pl.scan_ndjson(filename, schema_overrides=schema_overrides)
627
- sources.append((lf, filename, filepath.stem))
659
+ lf = pl.scan_ndjson(source, schema_overrides=schema_overrides)
660
+ data.append(Source(lf, filename, filepath.stem))
628
661
  else:
629
662
  raise ValueError(f"Unsupported file format: {file_format}. Supported formats are: {SUPPORTED_FORMATS}")
630
663
 
631
664
  # Attempt to collect, handling ComputeError for schema inference issues
632
665
  try:
633
- sources = [(lf.collect(), fn, tn) for lf, fn, tn in sources]
666
+ data = [Source(src.frame.collect(), src.filename, src.tabname) for src in data]
634
667
  except pl.exceptions.ComputeError as ce:
635
668
  # Handle the error and determine retry strategy
636
669
  infer_schema, schema_overrides = handle_compute_error(str(ce), file_format, infer_schema, schema_overrides)
637
670
 
638
671
  # Retry loading with updated schema overrides
672
+ if isinstance(source, StringIO):
673
+ source.seek(0)
674
+
639
675
  return load_file(
640
- filename,
676
+ source,
641
677
  file_format=file_format,
642
678
  has_header=has_header,
643
679
  infer_schema=infer_schema,
644
680
  comment_prefix=comment_prefix,
681
+ quote_char=quote_char,
645
682
  skip_lines=skip_lines,
646
683
  skip_rows_after_header=skip_rows_after_header,
647
684
  schema_overrides=schema_overrides,
648
685
  null_values=null_values,
686
+ ignore_errors=ignore_errors,
649
687
  )
650
688
 
651
- return sources
689
+ return data
652
690
 
653
691
 
654
692
  def now() -> str: