dataframe-textual 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,41 +6,58 @@ from pathlib import Path
6
6
 
7
7
  from .data_frame_viewer import DataFrameViewer
8
8
 
9
+ SUPPORTED_FORMATS = ["csv", "excel", "tsv", "parquet", "json", "ndjson"]
9
10
 
10
- def main():
11
- """Run the DataFrame Viewer application."""
11
+
12
+ def main() -> None:
13
+ """Run the DataFrame Viewer application.
14
+
15
+ Parses command-line arguments to determine input files or stdin, validates
16
+ file existence, and launches the interactive DataFrame Viewer application.
17
+
18
+ Returns:
19
+ None
20
+
21
+ Raises:
22
+ SystemExit: If invalid arguments are provided or required files are missing.
23
+ """
12
24
  parser = argparse.ArgumentParser(
13
- description="Interactive CSV/Excel viewer for the terminal (Textual version)",
25
+ description="Interactive terminal based viewer/editor for tabular data (e.g., CSV/Excel).",
14
26
  formatter_class=argparse.RawDescriptionHelpFormatter,
15
27
  epilog="Examples:\n"
16
- " dataframe-viewer data.csv\n"
17
- " dataframe-viewer file1.csv file2.csv file3.csv\n"
18
- " dataframe-viewer data.xlsx (opens all sheets in tabs)\n"
19
- " cat data.csv | dataframe-viewer\n",
28
+ " dataframe-textual data.csv\n"
29
+ " dataframe-textual file1.csv file2.csv file3.csv\n"
30
+ " dataframe-textual data.xlsx (opens all sheets in tabs)\n"
31
+ " cat data.csv | dataframe-textual --format csv\n",
20
32
  )
33
+ parser.add_argument("files", nargs="*", help="Files to view (or read from stdin)")
21
34
  parser.add_argument(
22
- "files", nargs="*", help="CSV or Excel files to view (or read from stdin)"
35
+ "-f",
36
+ "--format",
37
+ choices=SUPPORTED_FORMATS,
38
+ help="Specify the format of the input files (csv, excel, tsv etc.)",
23
39
  )
40
+ parser.add_argument("-H", "--no-header", action="store_true", help="Specify that input files have no header row")
24
41
 
25
42
  args = parser.parse_args()
26
43
  filenames = []
27
44
 
28
45
  # Check if reading from stdin (pipe or redirect)
29
46
  if not sys.stdin.isatty():
30
- filenames = ["-"]
31
- elif args.files:
47
+ filenames.append("-")
48
+ if args.files:
32
49
  # Validate all files exist
33
50
  for filename in args.files:
34
51
  if not Path(filename).exists():
35
52
  print(f"File not found: {filename}")
36
53
  sys.exit(1)
37
- filenames = args.files
54
+ filenames.extend(args.files)
38
55
 
39
56
  if not filenames:
40
57
  parser.print_help()
41
58
  sys.exit(1)
42
59
 
43
- app = DataFrameViewer(*filenames)
60
+ app = DataFrameViewer(*filenames, file_format=args.format, has_header=not args.no_header)
44
61
  app.run()
45
62
 
46
63
 
@@ -7,6 +7,10 @@ from typing import Any
7
7
  import polars as pl
8
8
  from rich.text import Text
9
9
 
10
+ # Special string to represent null value
11
+ NULL = "NULL"
12
+ NULL_DISPLAY = "-"
13
+
10
14
  # Boolean string mappings
11
15
  BOOLS = {
12
16
  "true": True,
@@ -21,34 +25,45 @@ BOOLS = {
21
25
  "0": False,
22
26
  }
23
27
 
24
- # itype is used by Input widget for input validation
25
- # fmt: off
26
- STYLES = {
27
- "Int64": {"style": "cyan", "justify": "right", "itype": "integer", "convert": int},
28
- "Float64": {"style": "magenta", "justify": "right", "itype": "number", "convert": float},
29
- "String": {"style": "green", "justify": "left", "itype": "text", "convert": str},
30
- "Boolean": {"style": "blue", "justify": "center", "itype": "text", "convert": lambda x: BOOLS[x.lower()]},
31
- "Date": {"style": "blue", "justify": "center", "itype": "text", "convert": str},
32
- "Datetime": {"style": "blue", "justify": "center", "itype": "text", "convert": str},
33
- }
34
- # fmt: on
35
-
36
28
 
37
29
  @dataclass
38
- class DtypeConfig:
30
+ class DtypeClass:
31
+ gtype: str # generic, high-level type
39
32
  style: str
40
33
  justify: str
41
34
  itype: str
42
35
  convert: Any
43
36
 
44
- def __init__(self, dtype: pl.DataType):
45
- dc = STYLES.get(
46
- str(dtype), {"style": "", "justify": "", "itype": "text", "convert": str}
47
- )
48
- self.style = dc["style"]
49
- self.justify = dc["justify"]
50
- self.itype = dc["itype"]
51
- self.convert = dc["convert"]
37
+
38
+ # itype is used by Input widget for input validation
39
+ # fmt: off
40
+ STYLES = {
41
+ # str
42
+ pl.String: DtypeClass(gtype="string", style="green", justify="left", itype="text", convert=str),
43
+ # int
44
+ pl.Int8: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
45
+ pl.Int16: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
46
+ pl.Int32: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
47
+ pl.Int64: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
48
+ pl.Int128: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
49
+ pl.UInt8: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
50
+ pl.UInt16: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
51
+ pl.UInt32: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
52
+ pl.UInt64: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
53
+ # float
54
+ pl.Float32: DtypeClass(gtype="float", style="magenta", justify="right", itype="number", convert=float),
55
+ pl.Float64: DtypeClass(gtype="float", style="magenta", justify="right", itype="number", convert=float),
56
+ pl.Decimal: DtypeClass(gtype="float", style="magenta", justify="right", itype="number", convert=float),
57
+ # bool
58
+ pl.Boolean: DtypeClass(gtype="boolean", style="blue", justify="center", itype="text", convert=lambda x: BOOLS[x.lower()]),
59
+ # temporal
60
+ pl.Date: DtypeClass(gtype="temporal", style="yellow", justify="center", itype="text", convert=str),
61
+ pl.Datetime: DtypeClass(gtype="temporal", style="yellow", justify="center", itype="text", convert=str),
62
+ pl.Time: DtypeClass(gtype="temporal", style="yellow", justify="center", itype="text", convert=str),
63
+ # unknown
64
+ pl.Unknown: DtypeClass(gtype="unknown", style="", justify="", itype="text", convert=str),
65
+ }
66
+ # fmt: on
52
67
 
53
68
 
54
69
  # Subscript digits mapping for sort indicators
@@ -68,18 +83,47 @@ SUBSCRIPT_DIGITS = {
68
83
  # Cursor types ("none" removed)
69
84
  CURSOR_TYPES = ["row", "column", "cell"]
70
85
 
71
- # Pagination settings
72
- INITIAL_BATCH_SIZE = 100 # Load this many rows initially
73
- BATCH_SIZE = 50 # Load this many rows when scrolling
86
+ # For row index column
87
+ RIDX = "^_ridx_^"
88
+
74
89
 
90
+ def DtypeConfig(dtype: pl.DataType) -> DtypeClass:
91
+ """Get the DtypeClass configuration for a given Polars data type.
75
92
 
76
- def _format_row(vals, dtypes, apply_justify=True) -> list[Text]:
93
+ Retrieves styling and formatting configuration based on the Polars data type,
94
+ including style (color), justification, and type conversion function.
95
+
96
+ Args:
97
+ dtype: A Polars data type to get configuration for.
98
+
99
+ Returns:
100
+ A DtypeClass containing style, justification, input type, and conversion function.
101
+ """
102
+ if dc := STYLES.get(dtype):
103
+ return dc
104
+ elif isinstance(dtype, pl.Datetime):
105
+ return STYLES[pl.Datetime]
106
+ elif isinstance(dtype, pl.Date):
107
+ return STYLES[pl.Date]
108
+ elif isinstance(dtype, pl.Time):
109
+ return STYLES[pl.Time]
110
+ else:
111
+ return STYLES[pl.Unknown]
112
+
113
+
114
+ def format_row(vals, dtypes, apply_justify=True, thousand_separator=False) -> list[Text]:
77
115
  """Format a single row with proper styling and justification.
78
116
 
117
+ Converts raw row values to formatted Rich Text objects with appropriate
118
+ styling (colors), justification, and null value handling based on data types.
119
+
79
120
  Args:
80
121
  vals: The list of values in the row.
81
122
  dtypes: The list of data types corresponding to each value.
82
123
  apply_justify: Whether to apply justification styling. Defaults to True.
124
+
125
+ Returns:
126
+ A list of Rich Text objects with proper formatting applied.
83
127
  """
84
128
  formatted_row = []
85
129
 
@@ -88,9 +132,11 @@ def _format_row(vals, dtypes, apply_justify=True) -> list[Text]:
88
132
 
89
133
  # Format the value
90
134
  if val is None:
91
- text_val = "-"
92
- elif str(dtype).startswith("Float"):
93
- text_val = f"{val:.4g}"
135
+ text_val = NULL_DISPLAY
136
+ elif dc.gtype == "integer" and thousand_separator:
137
+ text_val = f"{val:,}"
138
+ elif dc.gtype == "float" and thousand_separator:
139
+ text_val = f"{val:,}"
94
140
  else:
95
141
  text_val = str(val)
96
142
 
@@ -105,16 +151,42 @@ def _format_row(vals, dtypes, apply_justify=True) -> list[Text]:
105
151
  return formatted_row
106
152
 
107
153
 
108
- def _rindex(lst: list, value) -> int:
109
- """Return the last index of value in a list. Return -1 if not found."""
154
+ def rindex(lst: list, value) -> int:
155
+ """Return the last index of value in a list. Return -1 if not found.
156
+
157
+ Searches through the list in reverse order to find the last occurrence
158
+ of the given value.
159
+
160
+ Args:
161
+ lst: The list to search through.
162
+ value: The value to find.
163
+
164
+ Returns:
165
+ The index (0-based) of the last occurrence, or -1 if not found.
166
+ """
110
167
  for i, item in enumerate(reversed(lst)):
111
168
  if item == value:
112
169
  return len(lst) - 1 - i
113
170
  return -1
114
171
 
115
172
 
116
- def _next(lst: list[Any], current, offset=1) -> Any:
117
- """Return the next item in the list after the current item, cycling if needed."""
173
+ def get_next_item(lst: list[Any], current, offset=1) -> Any:
174
+ """Return the next item in the list after the current item, cycling if needed.
175
+
176
+ Finds the current item in the list and returns the item at position (current_index + offset),
177
+ wrapping around to the beginning if necessary.
178
+
179
+ Args:
180
+ lst: The list to cycle through.
181
+ current: The current item (must be in the list).
182
+ offset: The number of positions to advance. Defaults to 1.
183
+
184
+ Returns:
185
+ The next item in the list after advancing by the offset.
186
+
187
+ Raises:
188
+ ValueError: If the current item is not found in the list.
189
+ """
118
190
  if current not in lst:
119
191
  raise ValueError("Current item not in list")
120
192
  current_index = lst.index(current)
@@ -122,83 +194,127 @@ def _next(lst: list[Any], current, offset=1) -> Any:
122
194
  return lst[next_index]
123
195
 
124
196
 
125
- def parse_filter_expression(
126
- expression: str, df: pl.DataFrame, current_col_idx: int
127
- ) -> str:
128
- """Parse and convert a filter expression to Polars syntax.
197
+ def parse_polars_expression(expression: str, df: pl.DataFrame, current_col_idx: int) -> str:
198
+ """Parse and convert an expression to Polars syntax.
129
199
 
130
- Supports:
200
+ Replaces column references with Polars col() expressions:
131
201
  - $_ - Current selected column
202
+ - $# - Row index (1-based, requires '^__ridx__^' column to be present)
132
203
  - $1, $2, etc. - Column by 1-based index
133
- - $col_name - Column by name
134
- - Comparison operators: ==, !=, <, >, <=, >=
135
- - Logical operators: &&, ||
136
- - String literals: 'text', "text"
137
- - Numeric literals: integers and floats
204
+ - $col_name - Column by name (valid identifier starting with _ or letter)
138
205
 
139
206
  Examples:
140
207
  - "$_ > 50" -> "pl.col('current_col') > 50"
208
+ - "$# > 10" -> "pl.col('^__ridx__^') > 10"
141
209
  - "$1 > 50" -> "pl.col('col0') > 50"
142
210
  - "$name == 'Alex'" -> "pl.col('name') == 'Alex'"
143
- - "$1 > 3 && $name == 'Alex'" -> "(pl.col('col0') > 3) & (pl.col('name') == 'Alex')"
144
211
  - "$age < $salary" -> "pl.col('age') < pl.col('salary')"
145
212
 
146
213
  Args:
147
- expression: The filter expression as a string.
214
+ expression: The input expression as a string.
148
215
  df: The DataFrame to validate column references.
149
216
  current_col_idx: The index of the currently selected column (0-based). Used for $_ reference.
150
217
 
151
218
  Returns:
152
- A Python expression string that can be eval'd with Polars symbols.
219
+ A Python expression string with $references replaced by pl.col() calls.
153
220
 
154
221
  Raises:
155
- ValueError: If the expression contains invalid column references.
156
- SyntaxError: If the expression has invalid syntax.
222
+ ValueError: If a column reference is invalid.
157
223
  """
158
- # Tokenize the expression
159
- # Pattern matches: $_, $index, $identifier, strings, operators, numbers, etc.
160
- token_pattern = r'\$_|\$\d+|\$\w+|\'[^\']*\'|"[^"]*"|&&|\|\||<=|>=|!=|==|[+\-*/%<>=()]|\d+\.?\d*|\w+|.'
161
-
162
- tokens = re.findall(token_pattern, expression)
163
-
164
- if not tokens:
165
- raise ValueError("Expression is empty")
166
-
167
- # Convert tokens to Polars expression syntax
168
- converted_tokens = []
169
- for token in tokens:
170
- if token.startswith("$"):
171
- # Column reference
172
- col_ref = token[1:]
173
-
174
- # Special case: $_ refers to the current selected column
175
- if col_ref == "_":
176
- col_name = df.columns[current_col_idx]
177
- # Check if it's a numeric index
178
- elif col_ref.isdigit():
179
- col_idx = int(col_ref) - 1 # Convert to 0-based index
180
- if col_idx < 0 or col_idx >= len(df.columns):
181
- raise ValueError(f"Column index out of range: ${col_ref}")
182
- col_name = df.columns[col_idx]
183
- else:
184
- # It's a column name
185
- if col_ref not in df.columns:
186
- raise ValueError(f"Column not found: ${col_ref}")
187
- col_name = col_ref
188
-
189
- converted_tokens.append(f"pl.col('{col_name}')")
190
-
191
- elif token in ("&&", "||"):
192
- # Convert logical operators and wrap surrounding expressions in parentheses
193
- if token == "&&":
194
- converted_tokens.append(") & (")
195
- else:
196
- converted_tokens.append(") | (")
197
-
224
+ # Early return if no $ present
225
+ if "$" not in expression:
226
+ if "pl." in expression:
227
+ # This may be valid Polars expression already
228
+ return expression
198
229
  else:
199
- # Keep as-is (operators, numbers, strings, parentheses)
200
- converted_tokens.append(token)
230
+ # Return as a literal string
231
+ return f"pl.lit({expression})"
232
+
233
+ # Pattern to match $ followed by either:
234
+ # - _ (single underscore)
235
+ # - # (hash for row index)
236
+ # - digits (integer)
237
+ # - identifier (starts with letter or _, followed by letter/digit/_)
238
+ pattern = r"\$(_|#|\d+|[a-zA-Z_]\w*)"
239
+
240
+ def replace_column_ref(match):
241
+ col_ref = match.group(1)
242
+
243
+ if col_ref == "_":
244
+ # Current selected column
245
+ col_name = df.columns[current_col_idx]
246
+ elif col_ref == "#":
247
+ # RIDX is used to store 0-based row index; add 1 for 1-based index
248
+ return f"(pl.col('{RIDX}') + 1)"
249
+ elif col_ref.isdigit():
250
+ # Column by 1-based index
251
+ col_idx = int(col_ref) - 1
252
+ if col_idx < 0 or col_idx >= len(df.columns):
253
+ raise ValueError(f"Column index out of range: ${col_ref}")
254
+ col_name = df.columns[col_idx]
255
+ else:
256
+ # Column by name
257
+ if col_ref not in df.columns:
258
+ raise ValueError(f"Column not found: ${col_ref}")
259
+ col_name = col_ref
260
+
261
+ return f"pl.col('{col_name}')"
201
262
 
202
- # Join tokens with space to ensure proper separation
203
- result = "(" + " ".join(converted_tokens) + ")"
263
+ result = re.sub(pattern, replace_column_ref, expression)
204
264
  return result
265
+
266
+
267
+ def tentative_expr(term: str) -> bool:
268
+ """Check if the given term could be a Polars expression.
269
+
270
+ Heuristically determines whether a string might represent a Polars expression
271
+ based on common patterns like column references ($) or direct Polars syntax (pl.).
272
+
273
+ Args:
274
+ term: The string to check.
275
+
276
+ Returns:
277
+ True if the term appears to be a Polars expression, False otherwise.
278
+ """
279
+ if "$" in term and not term.endswith("$"):
280
+ return True
281
+ if "pl." in term:
282
+ return True
283
+ return False
284
+
285
+
286
+ def validate_expr(term: str, df: pl.DataFrame, current_col_idx: int) -> pl.Expr | None:
287
+ """Validate and return the expression.
288
+
289
+ Parses a user-provided expression string and validates it as a valid Polars expression.
290
+ Converts special syntax like $_ references to proper Polars col() expressions.
291
+
292
+ Args:
293
+ term: The input expression as a string.
294
+ df: The DataFrame to validate column references against.
295
+ current_col_idx: The index of the currently selected column (0-based). Used for $_ reference.
296
+
297
+ Returns:
298
+ A valid Polars expression object if validation succeeds.
299
+
300
+ Raises:
301
+ ValueError: If the expression is invalid, contains non-existent column references, or cannot be evaluated.
302
+ """
303
+ term = term.strip()
304
+
305
+ try:
306
+ # Parse the expression
307
+ expr_str = parse_polars_expression(term, df, current_col_idx)
308
+
309
+ # Validate by evaluating it
310
+ try:
311
+ expr = eval(expr_str, {"pl": pl})
312
+ if not isinstance(expr, pl.Expr):
313
+ raise ValueError(f"Expression evaluated to `{type(expr).__name__}` instead of a Polars expression")
314
+
315
+ # Expression is valid
316
+ return expr
317
+ except Exception as e:
318
+ raise ValueError(f"Failed to evaluate expression `{expr_str}`: {e}") from e
319
+ except Exception as ve:
320
+ raise ValueError(f"Failed to validate expression `{term}`: {ve}") from ve
@@ -2,6 +2,7 @@
2
2
 
3
3
  from textwrap import dedent
4
4
 
5
+ from textual.app import ComposeResult
5
6
  from textual.containers import VerticalScroll
6
7
  from textual.css.query import NoMatches
7
8
  from textual.widget import Widget
@@ -19,8 +20,8 @@ class DataFrameHelpPanel(Widget):
19
20
  DataFrameHelpPanel {
20
21
  split: right;
21
22
  width: 33%;
22
- min-width: 30;
23
- max-width: 60;
23
+ min-width: 40;
24
+ max-width: 80;
24
25
  border-left: vkey $foreground 30%;
25
26
  padding: 0 1;
26
27
  height: 1fr;
@@ -68,7 +69,16 @@ class DataFrameHelpPanel(Widget):
68
69
 
69
70
  DEFAULT_CLASSES = "-textual-system"
70
71
 
71
- def on_mount(self):
72
+ def on_mount(self) -> None:
73
+ """Set up help panel when mounted.
74
+
75
+ Initializes the help panel by setting up a watcher for focused widget changes
76
+ to dynamically update help text based on which widget has focus.
77
+
78
+ Returns:
79
+ None
80
+ """
81
+
72
82
  def update_help(focused_widget: Widget | None):
73
83
  self.update_help(focused_widget)
74
84
 
@@ -94,5 +104,13 @@ class DataFrameHelpPanel(Widget):
94
104
  except NoMatches:
95
105
  pass
96
106
 
97
- def compose(self):
107
+ def compose(self) -> ComposeResult:
108
+ """Compose the help panel widget structure.
109
+
110
+ Creates and returns the widget hierarchy for the help panel,
111
+ including a VerticalScroll container with a Markdown display area.
112
+
113
+ Yields:
114
+ VerticalScroll: The main container with Markdown widget for help text.
115
+ """
98
116
  yield VerticalScroll(Markdown(id="widget-help"))