dataframe-textual 1.10.1__py3-none-any.whl → 1.16.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,7 +37,7 @@ def cli() -> argparse.Namespace:
37
37
  help="Specify that input files have no header row when reading CSV/TSV",
38
38
  )
39
39
  parser.add_argument(
40
- "-I", "--no-inferrence", action="store_true", help="Do not infer data types when reading CSV/TSV"
40
+ "-I", "--no-inference", action="store_true", help="Do not infer data types when reading CSV/TSV"
41
41
  )
42
42
  parser.add_argument("-E", "--ignore-errors", action="store_true", help="Ignore errors when reading CSV/TSV")
43
43
  parser.add_argument(
@@ -80,7 +80,7 @@ def main() -> None:
80
80
  args.files,
81
81
  file_format=args.format,
82
82
  has_header=not args.no_header,
83
- infer_schema=not args.no_inferrence,
83
+ infer_schema=not args.no_inference,
84
84
  comment_prefix=args.comment_prefix,
85
85
  quote_char=args.quote_char,
86
86
  skip_lines=args.skip_lines,
@@ -12,7 +12,7 @@ import polars as pl
12
12
  from rich.text import Text
13
13
 
14
14
  # Supported file formats
15
- SUPPORTED_FORMATS = {"tsv", "csv", "excel", "xlsx", "xls", "parquet", "json", "ndjson"}
15
+ SUPPORTED_FORMATS = {"tsv", "tab", "csv", "excel", "xlsx", "xls", "parquet", "json", "ndjson"}
16
16
 
17
17
 
18
18
  # Boolean string mappings
@@ -34,6 +34,29 @@ NULL = "NULL"
34
34
  NULL_DISPLAY = "-"
35
35
 
36
36
 
37
+ def format_float(value: float, thousand_separator: bool = False, precision: int = 2) -> str:
38
+ """Format a float value, keeping integers without decimal point.
39
+
40
+ Args:
41
+ val: The float value to format.
42
+ thousand_separator: Whether to include thousand separators. Defaults to False.
43
+
44
+ Returns:
45
+ The formatted float as a string.
46
+ """
47
+
48
+ if (val := int(value)) == value:
49
+ if precision > 0:
50
+ return f"{val:,}" if thousand_separator else str(val)
51
+ else:
52
+ return f"{val:,.{-precision}f}" if thousand_separator else f"{val:.{-precision}f}"
53
+ else:
54
+ if precision > 0:
55
+ return f"{value:,.{precision}f}" if thousand_separator else f"{value:.{precision}f}"
56
+ else:
57
+ return f"{value:,f}" if thousand_separator else str(value)
58
+
59
+
37
60
  @dataclass
38
61
  class DtypeClass:
39
62
  """Data type class configuration.
@@ -52,6 +75,35 @@ class DtypeClass:
52
75
  itype: str
53
76
  convert: Any
54
77
 
78
+ def format(
79
+ self, val: Any, style: str | None = None, justify: str | None = None, thousand_separator: bool = False
80
+ ) -> str:
81
+ """Format the value according to its data type.
82
+
83
+ Args:
84
+ val: The value to format.
85
+
86
+ Returns:
87
+ The formatted value as a Text.
88
+ """
89
+ # Format the value
90
+ if val is None:
91
+ text_val = NULL_DISPLAY
92
+ elif self.gtype == "integer" and thousand_separator:
93
+ text_val = f"{val:,}"
94
+ elif self.gtype == "float":
95
+ text_val = format_float(val, thousand_separator)
96
+ else:
97
+ text_val = str(val)
98
+
99
+ return Text(
100
+ text_val,
101
+ style="" if style == "" else (style or self.style),
102
+ justify="" if justify == "" else (justify or self.justify),
103
+ overflow="ellipsis",
104
+ no_wrap=True,
105
+ )
106
+
55
107
 
56
108
  # itype is used by Input widget for input validation
57
109
  # fmt: off
@@ -143,27 +195,7 @@ def DtypeConfig(dtype: pl.DataType) -> DtypeClass:
143
195
  return STYLES[pl.Unknown]
144
196
 
145
197
 
146
- def format_float(value: float, thousand_separator: bool = False, precision: int = 2) -> str:
147
- """Format a float value, keeping integers without decimal point.
148
-
149
- Args:
150
- val: The float value to format.
151
- thousand_separator: Whether to include thousand separators. Defaults to False.
152
-
153
- Returns:
154
- The formatted float as a string.
155
- """
156
-
157
- if (val := int(value)) == value:
158
- return f"{val:,}" if thousand_separator else str(val)
159
- else:
160
- if precision > 0:
161
- return f"{value:,.{precision}f}" if thousand_separator else f"{value:.{precision}f}"
162
- else:
163
- return f"{value:,f}" if thousand_separator else str(value)
164
-
165
-
166
- def format_row(vals, dtypes, styles=None, apply_justify=True, thousand_separator=False) -> list[Text]:
198
+ def format_row(vals, dtypes, styles: list[str | None] | None = None, thousand_separator=False) -> list[Text]:
167
199
  """Format a single row with proper styling and justification.
168
200
 
169
201
  Converts raw row values to formatted Rich Text objects with appropriate
@@ -172,7 +204,7 @@ def format_row(vals, dtypes, styles=None, apply_justify=True, thousand_separator
172
204
  Args:
173
205
  vals: The list of values in the row.
174
206
  dtypes: The list of data types corresponding to each value.
175
- apply_justify: Whether to apply justification styling. Defaults to True.
207
+ styles: Optional list of style overrides for each value. Defaults to None.
176
208
 
177
209
  Returns:
178
210
  A list of Rich Text objects with proper formatting applied.
@@ -181,24 +213,11 @@ def format_row(vals, dtypes, styles=None, apply_justify=True, thousand_separator
181
213
 
182
214
  for idx, (val, dtype) in enumerate(zip(vals, dtypes, strict=True)):
183
215
  dc = DtypeConfig(dtype)
184
-
185
- # Format the value
186
- if val is None:
187
- text_val = NULL_DISPLAY
188
- elif dc.gtype == "integer" and thousand_separator:
189
- text_val = f"{val:,}"
190
- elif dc.gtype == "float":
191
- text_val = format_float(val, thousand_separator)
192
- else:
193
- text_val = str(val)
194
-
195
216
  formatted_row.append(
196
- Text(
197
- text_val,
198
- style=styles[idx] if styles and styles[idx] else dc.style,
199
- justify=dc.justify if apply_justify else "",
200
- overflow="ellipsis",
201
- no_wrap=True,
217
+ dc.format(
218
+ val,
219
+ style=styles[idx] if styles and styles[idx] else None,
220
+ thousand_separator=thousand_separator,
202
221
  )
203
222
  )
204
223
 
@@ -256,6 +275,7 @@ def parse_placeholders(template: str, columns: list[str], current_cidx: int) ->
256
275
  - `$#` - Row index (1-based, requires '^__ridx__^' column to be present)
257
276
  - `$1`, `$2`, etc. - Column index (1-based)
258
277
  - `$name` - Column name (e.g., `$product_id`)
278
+ - `` $`col name` `` - Column name with spaces (e.g., `` $`product id` ``)
259
279
 
260
280
  Args:
261
281
  template: The template string containing placeholders and literal text
@@ -271,8 +291,15 @@ def parse_placeholders(template: str, columns: list[str], current_cidx: int) ->
271
291
  if "$" not in template or template.endswith("$"):
272
292
  return [template]
273
293
 
274
- # Regex matches: $_ or $\d+ or $\w+ (column names)
275
- placeholder_pattern = r"\$(_|#|\d+|[a-zA-Z_]\w*)"
294
+ # Regex matches: $_ or $# or $\d+ or $`...` (backtick-quoted names with spaces) or $\w+ (column names)
295
+ # Pattern explanation:
296
+ # \$(_|#|\d+|`[^`]+`|[a-zA-Z_]\w*)
297
+ # - $_ : current column
298
+ # - $# : row index
299
+ # - $\d+ : column by index (1-based)
300
+ # - $`[^`]+` : column by name with spaces (backtick quoted)
301
+ # - $[a-zA-Z_]\w* : column by name without spaces
302
+ placeholder_pattern = r"\$(_|#|\d+|`[^`]+`|[a-zA-Z_]\w*)"
276
303
  placeholders = re.finditer(placeholder_pattern, template)
277
304
 
278
305
  parts = []
@@ -305,6 +332,13 @@ def parse_placeholders(template: str, columns: list[str], current_cidx: int) ->
305
332
  parts.append(pl.col(col_ref))
306
333
  except IndexError:
307
334
  raise ValueError(f"Invalid column index: ${placeholder} (valid range: $1 to ${len(columns)})")
335
+ elif placeholder.startswith("`") and placeholder.endswith("`"):
336
+ # $`col name` refers to column by name with spaces
337
+ col_ref = placeholder[1:-1] # Remove backticks
338
+ if col_ref in columns:
339
+ parts.append(pl.col(col_ref))
340
+ else:
341
+ raise ValueError(f"Column not found: ${placeholder} (available columns: {', '.join(columns)})")
308
342
  else:
309
343
  # $name refers to column by name
310
344
  if placeholder in columns:
@@ -333,6 +367,7 @@ def parse_polars_expression(expression: str, columns: list[str], current_cidx: i
333
367
  - $# - Row index (1-based, requires '^__ridx__^' column to be present)
334
368
  - $1, $2, etc. - Column index (1-based)
335
369
  - $col_name - Column name (valid identifier starting with _ or letter)
370
+ - $`col name` - Column name with spaces (backtick quoted)
336
371
 
337
372
  Examples:
338
373
  - "$_ > 50" -> "pl.col('current_col') > 50"
@@ -340,6 +375,7 @@ def parse_polars_expression(expression: str, columns: list[str], current_cidx: i
340
375
  - "$1 > 50" -> "pl.col('col0') > 50"
341
376
  - "$name == 'Alex'" -> "pl.col('name') == 'Alex'"
342
377
  - "$age < $salary" -> "pl.col('age') < pl.col('salary')"
378
+ - "$`product id` > 100" -> "pl.col('product id') > 100"
343
379
 
344
380
  Args:
345
381
  expression: The input expression as a string.
@@ -705,3 +741,29 @@ async def sleep_async(seconds: float) -> None:
705
741
  import asyncio
706
742
 
707
743
  await asyncio.sleep(seconds)
744
+
745
+
746
+ def round_to_nearest_hundreds(num: int, N: int = 100) -> tuple[int, int]:
747
+ """Round a number to the nearest hundred boundaries.
748
+
749
+ Given a number, return a tuple of the two closest hundreds that bracket it.
750
+
751
+ Args:
752
+ num: The number to round.
753
+
754
+ Returns:
755
+ A tuple (lower_hundred, upper_hundred) where:
756
+ - lower_hundred is the largest multiple of 100 <= num
757
+ - upper_hundred is the smallest multiple of 100 > num
758
+
759
+ Examples:
760
+ >>> round_to_nearest_hundreds(0)
761
+ (0, 100)
762
+ >>> round_to_nearest_hundreds(150)
763
+ (100, 200)
764
+ >>> round_to_nearest_hundreds(200)
765
+ (200, 300)
766
+ """
767
+ lower = (num // N) * N
768
+ upper = lower + N
769
+ return (lower, upper)