dataframe-textual 0.3.2__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataframe_textual/__init__.py +1 -2
- dataframe_textual/__main__.py +32 -12
- dataframe_textual/common.py +348 -91
- dataframe_textual/data_frame_help_panel.py +22 -4
- dataframe_textual/data_frame_table.py +2188 -632
- dataframe_textual/data_frame_viewer.py +198 -159
- dataframe_textual/table_screen.py +271 -86
- dataframe_textual/yes_no_screen.py +428 -163
- dataframe_textual-1.2.0.dist-info/METADATA +756 -0
- dataframe_textual-1.2.0.dist-info/RECORD +13 -0
- dataframe_textual-1.2.0.dist-info/entry_points.txt +2 -0
- dataframe_textual-0.3.2.dist-info/METADATA +0 -548
- dataframe_textual-0.3.2.dist-info/RECORD +0 -13
- dataframe_textual-0.3.2.dist-info/entry_points.txt +0 -2
- {dataframe_textual-0.3.2.dist-info → dataframe_textual-1.2.0.dist-info}/WHEEL +0 -0
- {dataframe_textual-0.3.2.dist-info → dataframe_textual-1.2.0.dist-info}/licenses/LICENSE +0 -0
dataframe_textual/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from .data_frame_help_panel import DataFrameHelpPanel
|
|
4
4
|
from .data_frame_table import DataFrameTable, History
|
|
5
|
-
from .data_frame_viewer import DataFrameViewer
|
|
5
|
+
from .data_frame_viewer import DataFrameViewer
|
|
6
6
|
from .table_screen import FrequencyScreen, RowDetailScreen, TableScreen
|
|
7
7
|
from .yes_no_screen import (
|
|
8
8
|
ConfirmScreen,
|
|
@@ -31,5 +31,4 @@ __all__ = [
|
|
|
31
31
|
"FilterScreen",
|
|
32
32
|
"FreezeScreen",
|
|
33
33
|
"OpenFileScreen",
|
|
34
|
-
"_load_dataframe",
|
|
35
34
|
]
|
dataframe_textual/__main__.py
CHANGED
|
@@ -4,43 +4,63 @@ import argparse
|
|
|
4
4
|
import sys
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
|
|
7
|
+
from .common import load_dataframe
|
|
7
8
|
from .data_frame_viewer import DataFrameViewer
|
|
8
9
|
|
|
10
|
+
SUPPORTED_FORMATS = ["csv", "excel", "tsv", "parquet", "json", "ndjson"]
|
|
9
11
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
+
|
|
13
|
+
def main() -> None:
|
|
14
|
+
"""Run the DataFrame Viewer application.
|
|
15
|
+
|
|
16
|
+
Parses command-line arguments to determine input files or stdin, validates
|
|
17
|
+
file existence, and launches the interactive DataFrame Viewer application.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
None
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
SystemExit: If invalid arguments are provided or required files are missing.
|
|
24
|
+
"""
|
|
12
25
|
parser = argparse.ArgumentParser(
|
|
13
|
-
|
|
26
|
+
prog="dv",
|
|
27
|
+
description="Interactive terminal based viewer/editor for tabular data (e.g., CSV/Excel).",
|
|
14
28
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
15
29
|
epilog="Examples:\n"
|
|
16
|
-
"
|
|
17
|
-
"
|
|
18
|
-
"
|
|
19
|
-
" cat data.csv |
|
|
30
|
+
" %(prog)s data.csv\n"
|
|
31
|
+
" %(prog)s file1.csv file2.csv file3.csv\n"
|
|
32
|
+
" %(prog)s data.xlsx (opens each sheet in separate tab)\n"
|
|
33
|
+
" cat data.csv | %(prog)s --format csv\n",
|
|
20
34
|
)
|
|
35
|
+
parser.add_argument("files", nargs="*", help="Files to view (or read from stdin)")
|
|
21
36
|
parser.add_argument(
|
|
22
|
-
"
|
|
37
|
+
"-f",
|
|
38
|
+
"--format",
|
|
39
|
+
choices=SUPPORTED_FORMATS,
|
|
40
|
+
help="Specify the format of the input files (csv, excel, tsv etc.)",
|
|
23
41
|
)
|
|
42
|
+
parser.add_argument("-H", "--no-header", action="store_true", help="Specify that input files have no header row")
|
|
24
43
|
|
|
25
44
|
args = parser.parse_args()
|
|
26
45
|
filenames = []
|
|
27
46
|
|
|
28
47
|
# Check if reading from stdin (pipe or redirect)
|
|
29
48
|
if not sys.stdin.isatty():
|
|
30
|
-
filenames
|
|
31
|
-
|
|
49
|
+
filenames.append("-")
|
|
50
|
+
if args.files:
|
|
32
51
|
# Validate all files exist
|
|
33
52
|
for filename in args.files:
|
|
34
53
|
if not Path(filename).exists():
|
|
35
54
|
print(f"File not found: {filename}")
|
|
36
55
|
sys.exit(1)
|
|
37
|
-
filenames
|
|
56
|
+
filenames.extend(args.files)
|
|
38
57
|
|
|
39
58
|
if not filenames:
|
|
40
59
|
parser.print_help()
|
|
41
60
|
sys.exit(1)
|
|
42
61
|
|
|
43
|
-
|
|
62
|
+
sources = load_dataframe(filenames, file_format=args.format, has_header=not args.no_header)
|
|
63
|
+
app = DataFrameViewer(*sources)
|
|
44
64
|
app.run()
|
|
45
65
|
|
|
46
66
|
|
dataframe_textual/common.py
CHANGED
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
"""Common utilities and constants for dataframe_viewer."""
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
|
+
import sys
|
|
4
5
|
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
5
7
|
from typing import Any
|
|
6
8
|
|
|
7
9
|
import polars as pl
|
|
8
10
|
from rich.text import Text
|
|
9
11
|
|
|
12
|
+
# Special string to represent null value
|
|
13
|
+
NULL = "NULL"
|
|
14
|
+
NULL_DISPLAY = "-"
|
|
15
|
+
|
|
10
16
|
# Boolean string mappings
|
|
11
17
|
BOOLS = {
|
|
12
18
|
"true": True,
|
|
@@ -21,34 +27,45 @@ BOOLS = {
|
|
|
21
27
|
"0": False,
|
|
22
28
|
}
|
|
23
29
|
|
|
24
|
-
# itype is used by Input widget for input validation
|
|
25
|
-
# fmt: off
|
|
26
|
-
STYLES = {
|
|
27
|
-
"Int64": {"style": "cyan", "justify": "right", "itype": "integer", "convert": int},
|
|
28
|
-
"Float64": {"style": "magenta", "justify": "right", "itype": "number", "convert": float},
|
|
29
|
-
"String": {"style": "green", "justify": "left", "itype": "text", "convert": str},
|
|
30
|
-
"Boolean": {"style": "blue", "justify": "center", "itype": "text", "convert": lambda x: BOOLS[x.lower()]},
|
|
31
|
-
"Date": {"style": "blue", "justify": "center", "itype": "text", "convert": str},
|
|
32
|
-
"Datetime": {"style": "blue", "justify": "center", "itype": "text", "convert": str},
|
|
33
|
-
}
|
|
34
|
-
# fmt: on
|
|
35
|
-
|
|
36
30
|
|
|
37
31
|
@dataclass
|
|
38
|
-
class
|
|
32
|
+
class DtypeClass:
|
|
33
|
+
gtype: str # generic, high-level type
|
|
39
34
|
style: str
|
|
40
35
|
justify: str
|
|
41
36
|
itype: str
|
|
42
37
|
convert: Any
|
|
43
38
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
39
|
+
|
|
40
|
+
# itype is used by Input widget for input validation
|
|
41
|
+
# fmt: off
|
|
42
|
+
STYLES = {
|
|
43
|
+
# str
|
|
44
|
+
pl.String: DtypeClass(gtype="string", style="green", justify="left", itype="text", convert=str),
|
|
45
|
+
# int
|
|
46
|
+
pl.Int8: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
|
|
47
|
+
pl.Int16: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
|
|
48
|
+
pl.Int32: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
|
|
49
|
+
pl.Int64: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
|
|
50
|
+
pl.Int128: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
|
|
51
|
+
pl.UInt8: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
|
|
52
|
+
pl.UInt16: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
|
|
53
|
+
pl.UInt32: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
|
|
54
|
+
pl.UInt64: DtypeClass(gtype="integer", style="cyan", justify="right", itype="integer", convert=int),
|
|
55
|
+
# float
|
|
56
|
+
pl.Float32: DtypeClass(gtype="float", style="magenta", justify="right", itype="number", convert=float),
|
|
57
|
+
pl.Float64: DtypeClass(gtype="float", style="magenta", justify="right", itype="number", convert=float),
|
|
58
|
+
pl.Decimal: DtypeClass(gtype="float", style="magenta", justify="right", itype="number", convert=float),
|
|
59
|
+
# bool
|
|
60
|
+
pl.Boolean: DtypeClass(gtype="boolean", style="blue", justify="center", itype="text", convert=lambda x: BOOLS[x.lower()]),
|
|
61
|
+
# temporal
|
|
62
|
+
pl.Date: DtypeClass(gtype="temporal", style="yellow", justify="center", itype="text", convert=str),
|
|
63
|
+
pl.Datetime: DtypeClass(gtype="temporal", style="yellow", justify="center", itype="text", convert=str),
|
|
64
|
+
pl.Time: DtypeClass(gtype="temporal", style="yellow", justify="center", itype="text", convert=str),
|
|
65
|
+
# unknown
|
|
66
|
+
pl.Unknown: DtypeClass(gtype="unknown", style="", justify="", itype="text", convert=str),
|
|
67
|
+
}
|
|
68
|
+
# fmt: on
|
|
52
69
|
|
|
53
70
|
|
|
54
71
|
# Subscript digits mapping for sort indicators
|
|
@@ -68,18 +85,67 @@ SUBSCRIPT_DIGITS = {
|
|
|
68
85
|
# Cursor types ("none" removed)
|
|
69
86
|
CURSOR_TYPES = ["row", "column", "cell"]
|
|
70
87
|
|
|
71
|
-
#
|
|
72
|
-
|
|
73
|
-
|
|
88
|
+
# For row index column
|
|
89
|
+
RIDX = "^_ridx_^"
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def DtypeConfig(dtype: pl.DataType) -> DtypeClass:
|
|
93
|
+
"""Get the DtypeClass configuration for a given Polars data type.
|
|
94
|
+
|
|
95
|
+
Retrieves styling and formatting configuration based on the Polars data type,
|
|
96
|
+
including style (color), justification, and type conversion function.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
dtype: A Polars data type to get configuration for.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
A DtypeClass containing style, justification, input type, and conversion function.
|
|
103
|
+
"""
|
|
104
|
+
if dc := STYLES.get(dtype):
|
|
105
|
+
return dc
|
|
106
|
+
elif isinstance(dtype, pl.Datetime):
|
|
107
|
+
return STYLES[pl.Datetime]
|
|
108
|
+
elif isinstance(dtype, pl.Date):
|
|
109
|
+
return STYLES[pl.Date]
|
|
110
|
+
elif isinstance(dtype, pl.Time):
|
|
111
|
+
return STYLES[pl.Time]
|
|
112
|
+
else:
|
|
113
|
+
return STYLES[pl.Unknown]
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def format_float(value: float, thousand_separator: bool = False, precision: int = 2) -> str:
|
|
117
|
+
"""Format a float value, keeping integers without decimal point.
|
|
74
118
|
|
|
119
|
+
Args:
|
|
120
|
+
val: The float value to format.
|
|
121
|
+
thousand_separator: Whether to include thousand separators. Defaults to False.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
The formatted float as a string.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
if (val := int(value)) == value:
|
|
128
|
+
return f"{val:,}" if thousand_separator else str(val)
|
|
129
|
+
else:
|
|
130
|
+
if precision > 0:
|
|
131
|
+
return f"{value:,.{precision}f}" if thousand_separator else f"{value:.{precision}f}"
|
|
132
|
+
else:
|
|
133
|
+
return f"{value:,f}" if thousand_separator else str(value)
|
|
75
134
|
|
|
76
|
-
|
|
135
|
+
|
|
136
|
+
def format_row(vals, dtypes, apply_justify=True, thousand_separator=False) -> list[Text]:
|
|
77
137
|
"""Format a single row with proper styling and justification.
|
|
78
138
|
|
|
139
|
+
Converts raw row values to formatted Rich Text objects with appropriate
|
|
140
|
+
styling (colors), justification, and null value handling based on data types.
|
|
141
|
+
|
|
79
142
|
Args:
|
|
80
143
|
vals: The list of values in the row.
|
|
81
144
|
dtypes: The list of data types corresponding to each value.
|
|
82
145
|
apply_justify: Whether to apply justification styling. Defaults to True.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
A list of Rich Text objects with proper formatting applied.
|
|
83
149
|
"""
|
|
84
150
|
formatted_row = []
|
|
85
151
|
|
|
@@ -88,9 +154,11 @@ def _format_row(vals, dtypes, apply_justify=True) -> list[Text]:
|
|
|
88
154
|
|
|
89
155
|
# Format the value
|
|
90
156
|
if val is None:
|
|
91
|
-
text_val =
|
|
92
|
-
elif
|
|
93
|
-
text_val = f"{val
|
|
157
|
+
text_val = NULL_DISPLAY
|
|
158
|
+
elif dc.gtype == "integer" and thousand_separator:
|
|
159
|
+
text_val = f"{val:,}"
|
|
160
|
+
elif dc.gtype == "float":
|
|
161
|
+
text_val = format_float(val, thousand_separator)
|
|
94
162
|
else:
|
|
95
163
|
text_val = str(val)
|
|
96
164
|
|
|
@@ -105,16 +173,42 @@ def _format_row(vals, dtypes, apply_justify=True) -> list[Text]:
|
|
|
105
173
|
return formatted_row
|
|
106
174
|
|
|
107
175
|
|
|
108
|
-
def
|
|
109
|
-
"""Return the last index of value in a list. Return -1 if not found.
|
|
176
|
+
def rindex(lst: list, value) -> int:
|
|
177
|
+
"""Return the last index of value in a list. Return -1 if not found.
|
|
178
|
+
|
|
179
|
+
Searches through the list in reverse order to find the last occurrence
|
|
180
|
+
of the given value.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
lst: The list to search through.
|
|
184
|
+
value: The value to find.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
The index (0-based) of the last occurrence, or -1 if not found.
|
|
188
|
+
"""
|
|
110
189
|
for i, item in enumerate(reversed(lst)):
|
|
111
190
|
if item == value:
|
|
112
191
|
return len(lst) - 1 - i
|
|
113
192
|
return -1
|
|
114
193
|
|
|
115
194
|
|
|
116
|
-
def
|
|
117
|
-
"""Return the next item in the list after the current item, cycling if needed.
|
|
195
|
+
def get_next_item(lst: list[Any], current, offset=1) -> Any:
|
|
196
|
+
"""Return the next item in the list after the current item, cycling if needed.
|
|
197
|
+
|
|
198
|
+
Finds the current item in the list and returns the item at position (current_index + offset),
|
|
199
|
+
wrapping around to the beginning if necessary.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
lst: The list to cycle through.
|
|
203
|
+
current: The current item (must be in the list).
|
|
204
|
+
offset: The number of positions to advance. Defaults to 1.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
The next item in the list after advancing by the offset.
|
|
208
|
+
|
|
209
|
+
Raises:
|
|
210
|
+
ValueError: If the current item is not found in the list.
|
|
211
|
+
"""
|
|
118
212
|
if current not in lst:
|
|
119
213
|
raise ValueError("Current item not in list")
|
|
120
214
|
current_index = lst.index(current)
|
|
@@ -122,83 +216,246 @@ def _next(lst: list[Any], current, offset=1) -> Any:
|
|
|
122
216
|
return lst[next_index]
|
|
123
217
|
|
|
124
218
|
|
|
125
|
-
def
|
|
126
|
-
expression
|
|
127
|
-
) -> str:
|
|
128
|
-
"""Parse and convert a filter expression to Polars syntax.
|
|
219
|
+
def parse_polars_expression(expression: str, df: pl.DataFrame, current_col_idx: int) -> str:
|
|
220
|
+
"""Parse and convert an expression to Polars syntax.
|
|
129
221
|
|
|
130
|
-
|
|
222
|
+
Replaces column references with Polars col() expressions:
|
|
131
223
|
- $_ - Current selected column
|
|
224
|
+
- $# - Row index (1-based, requires '^__ridx__^' column to be present)
|
|
132
225
|
- $1, $2, etc. - Column by 1-based index
|
|
133
|
-
- $col_name - Column by name
|
|
134
|
-
- Comparison operators: ==, !=, <, >, <=, >=
|
|
135
|
-
- Logical operators: &&, ||
|
|
136
|
-
- String literals: 'text', "text"
|
|
137
|
-
- Numeric literals: integers and floats
|
|
226
|
+
- $col_name - Column by name (valid identifier starting with _ or letter)
|
|
138
227
|
|
|
139
228
|
Examples:
|
|
140
229
|
- "$_ > 50" -> "pl.col('current_col') > 50"
|
|
230
|
+
- "$# > 10" -> "pl.col('^__ridx__^') > 10"
|
|
141
231
|
- "$1 > 50" -> "pl.col('col0') > 50"
|
|
142
232
|
- "$name == 'Alex'" -> "pl.col('name') == 'Alex'"
|
|
143
|
-
- "$1 > 3 && $name == 'Alex'" -> "(pl.col('col0') > 3) & (pl.col('name') == 'Alex')"
|
|
144
233
|
- "$age < $salary" -> "pl.col('age') < pl.col('salary')"
|
|
145
234
|
|
|
146
235
|
Args:
|
|
147
|
-
expression: The
|
|
236
|
+
expression: The input expression as a string.
|
|
148
237
|
df: The DataFrame to validate column references.
|
|
149
238
|
current_col_idx: The index of the currently selected column (0-based). Used for $_ reference.
|
|
150
239
|
|
|
151
240
|
Returns:
|
|
152
|
-
A Python expression string
|
|
241
|
+
A Python expression string with $references replaced by pl.col() calls.
|
|
153
242
|
|
|
154
243
|
Raises:
|
|
155
|
-
ValueError: If
|
|
156
|
-
SyntaxError: If the expression has invalid syntax.
|
|
244
|
+
ValueError: If a column reference is invalid.
|
|
157
245
|
"""
|
|
158
|
-
#
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
#
|
|
168
|
-
|
|
169
|
-
for
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
converted_tokens.append(f"pl.col('{col_name}')")
|
|
190
|
-
|
|
191
|
-
elif token in ("&&", "||"):
|
|
192
|
-
# Convert logical operators and wrap surrounding expressions in parentheses
|
|
193
|
-
if token == "&&":
|
|
194
|
-
converted_tokens.append(") & (")
|
|
195
|
-
else:
|
|
196
|
-
converted_tokens.append(") | (")
|
|
197
|
-
|
|
246
|
+
# Early return if no $ present
|
|
247
|
+
if "$" not in expression:
|
|
248
|
+
if "pl." in expression:
|
|
249
|
+
# This may be valid Polars expression already
|
|
250
|
+
return expression
|
|
251
|
+
else:
|
|
252
|
+
# Return as a literal string
|
|
253
|
+
return f"pl.lit({expression})"
|
|
254
|
+
|
|
255
|
+
# Pattern to match $ followed by either:
|
|
256
|
+
# - _ (single underscore)
|
|
257
|
+
# - # (hash for row index)
|
|
258
|
+
# - digits (integer)
|
|
259
|
+
# - identifier (starts with letter or _, followed by letter/digit/_)
|
|
260
|
+
pattern = r"\$(_|#|\d+|[a-zA-Z_]\w*)"
|
|
261
|
+
|
|
262
|
+
def replace_column_ref(match):
|
|
263
|
+
col_ref = match.group(1)
|
|
264
|
+
|
|
265
|
+
if col_ref == "_":
|
|
266
|
+
# Current selected column
|
|
267
|
+
col_name = df.columns[current_col_idx]
|
|
268
|
+
elif col_ref == "#":
|
|
269
|
+
# RIDX is used to store 0-based row index; add 1 for 1-based index
|
|
270
|
+
return f"(pl.col('{RIDX}') + 1)"
|
|
271
|
+
elif col_ref.isdigit():
|
|
272
|
+
# Column by 1-based index
|
|
273
|
+
col_idx = int(col_ref) - 1
|
|
274
|
+
if col_idx < 0 or col_idx >= len(df.columns):
|
|
275
|
+
raise ValueError(f"Column index out of range: ${col_ref}")
|
|
276
|
+
col_name = df.columns[col_idx]
|
|
198
277
|
else:
|
|
199
|
-
#
|
|
200
|
-
|
|
278
|
+
# Column by name
|
|
279
|
+
if col_ref not in df.columns:
|
|
280
|
+
raise ValueError(f"Column not found: ${col_ref}")
|
|
281
|
+
col_name = col_ref
|
|
282
|
+
|
|
283
|
+
return f"pl.col('{col_name}')"
|
|
201
284
|
|
|
202
|
-
|
|
203
|
-
result = "(" + " ".join(converted_tokens) + ")"
|
|
285
|
+
result = re.sub(pattern, replace_column_ref, expression)
|
|
204
286
|
return result
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def tentative_expr(term: str) -> bool:
|
|
290
|
+
"""Check if the given term could be a Polars expression.
|
|
291
|
+
|
|
292
|
+
Heuristically determines whether a string might represent a Polars expression
|
|
293
|
+
based on common patterns like column references ($) or direct Polars syntax (pl.).
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
term: The string to check.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
True if the term appears to be a Polars expression, False otherwise.
|
|
300
|
+
"""
|
|
301
|
+
if "$" in term and not term.endswith("$"):
|
|
302
|
+
return True
|
|
303
|
+
if "pl." in term:
|
|
304
|
+
return True
|
|
305
|
+
return False
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def validate_expr(term: str, df: pl.DataFrame, current_col_idx: int) -> pl.Expr | None:
|
|
309
|
+
"""Validate and return the expression.
|
|
310
|
+
|
|
311
|
+
Parses a user-provided expression string and validates it as a valid Polars expression.
|
|
312
|
+
Converts special syntax like $_ references to proper Polars col() expressions.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
term: The input expression as a string.
|
|
316
|
+
df: The DataFrame to validate column references against.
|
|
317
|
+
current_col_idx: The index of the currently selected column (0-based). Used for $_ reference.
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
A valid Polars expression object if validation succeeds.
|
|
321
|
+
|
|
322
|
+
Raises:
|
|
323
|
+
ValueError: If the expression is invalid, contains non-existent column references, or cannot be evaluated.
|
|
324
|
+
"""
|
|
325
|
+
term = term.strip()
|
|
326
|
+
|
|
327
|
+
try:
|
|
328
|
+
# Parse the expression
|
|
329
|
+
expr_str = parse_polars_expression(term, df, current_col_idx)
|
|
330
|
+
|
|
331
|
+
# Validate by evaluating it
|
|
332
|
+
try:
|
|
333
|
+
expr = eval(expr_str, {"pl": pl})
|
|
334
|
+
if not isinstance(expr, pl.Expr):
|
|
335
|
+
raise ValueError(f"Expression evaluated to `{type(expr).__name__}` instead of a Polars expression")
|
|
336
|
+
|
|
337
|
+
# Expression is valid
|
|
338
|
+
return expr
|
|
339
|
+
except Exception as e:
|
|
340
|
+
raise ValueError(f"Failed to evaluate expression `{expr_str}`: {e}") from e
|
|
341
|
+
except Exception as ve:
|
|
342
|
+
raise ValueError(f"Failed to validate expression `{term}`: {ve}") from ve
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def load_dataframe(
|
|
346
|
+
filenames: list[str], file_format: str | None = None, has_header: bool = True
|
|
347
|
+
) -> list[tuple[pl.LazyFrame, str, str]]:
|
|
348
|
+
"""Load DataFrames from file specifications.
|
|
349
|
+
|
|
350
|
+
Handles loading from multiple files, single files, or stdin. For Excel files,
|
|
351
|
+
loads all sheets as separate entries. For other formats, loads as single file.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
filenames: List of filenames to load. If single filename is "-", read from stdin.
|
|
355
|
+
file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
|
|
356
|
+
has_header: Whether the input files have a header row. Defaults to True.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
List of tuples of (LazyFrame, filename, tabname) ready for display.
|
|
360
|
+
"""
|
|
361
|
+
sources = []
|
|
362
|
+
|
|
363
|
+
prefix_sheet = len(filenames) > 1
|
|
364
|
+
|
|
365
|
+
for filename in filenames:
|
|
366
|
+
sources.extend(load_file(filename, prefix_sheet=prefix_sheet, file_format=file_format, has_header=has_header))
|
|
367
|
+
return sources
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def load_file(
|
|
371
|
+
filename: str,
|
|
372
|
+
first_sheet: bool = False,
|
|
373
|
+
prefix_sheet: bool = False,
|
|
374
|
+
file_format: str | None = None,
|
|
375
|
+
has_header: bool = True,
|
|
376
|
+
) -> list[tuple[pl.LazyFrame, str, str]]:
|
|
377
|
+
"""Load a single file and return list of sources.
|
|
378
|
+
|
|
379
|
+
For Excel files, when `first_sheet` is True, returns only the first sheet. Otherwise, returns one entry per sheet.
|
|
380
|
+
For other files or multiple files, returns one entry per file.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
filename: Path to file to load.
|
|
384
|
+
first_sheet: If True, only load first sheet for Excel files. Defaults to False.
|
|
385
|
+
prefix_sheet: If True, prefix filename to sheet name as the tab name for Excel files. Defaults to False.
|
|
386
|
+
file_format: Optional format specifier (i.e., 'csv', 'excel', 'tsv', 'parquet', 'json', 'ndjson') for input files.
|
|
387
|
+
By default, infers from file extension.
|
|
388
|
+
has_header: Whether the input files have a header row. Defaults to True.
|
|
389
|
+
|
|
390
|
+
Returns:
|
|
391
|
+
List of tuples of (LazyFrame, filename, tabname).
|
|
392
|
+
"""
|
|
393
|
+
sources = []
|
|
394
|
+
|
|
395
|
+
if filename == "-":
|
|
396
|
+
import os
|
|
397
|
+
from io import StringIO
|
|
398
|
+
|
|
399
|
+
# Read from stdin into memory first (stdin is not seekable)
|
|
400
|
+
stdin_data = sys.stdin.read()
|
|
401
|
+
lf = pl.scan_csv(StringIO(stdin_data), has_header=has_header, separator="," if file_format == "csv" else "\t")
|
|
402
|
+
|
|
403
|
+
# Reopen stdin to /dev/tty for proper terminal interaction
|
|
404
|
+
try:
|
|
405
|
+
tty = open("/dev/tty")
|
|
406
|
+
os.dup2(tty.fileno(), sys.stdin.fileno())
|
|
407
|
+
except (OSError, FileNotFoundError):
|
|
408
|
+
pass
|
|
409
|
+
|
|
410
|
+
sources.append((lf, f"stdin.{file_format}" if file_format else "stdin", "stdin"))
|
|
411
|
+
return sources
|
|
412
|
+
|
|
413
|
+
filepath = Path(filename)
|
|
414
|
+
|
|
415
|
+
if file_format == "csv":
|
|
416
|
+
lf = pl.scan_csv(filename, has_header=has_header)
|
|
417
|
+
sources.append((lf, filename, filepath.stem))
|
|
418
|
+
elif file_format == "excel":
|
|
419
|
+
if first_sheet:
|
|
420
|
+
# Read only the first sheet for multiple files
|
|
421
|
+
lf = pl.read_excel(filename).lazy()
|
|
422
|
+
sources.append((lf, filename, filepath.stem))
|
|
423
|
+
else:
|
|
424
|
+
# For single file, expand all sheets
|
|
425
|
+
sheets = pl.read_excel(filename, sheet_id=0)
|
|
426
|
+
for sheet_name, df in sheets.items():
|
|
427
|
+
tabname = f"{filepath.stem}_{sheet_name}" if prefix_sheet else sheet_name
|
|
428
|
+
sources.append((df.lazy(), filename, tabname))
|
|
429
|
+
elif file_format == "tsv":
|
|
430
|
+
lf = pl.scan_csv(filename, has_header=has_header, separator="\t")
|
|
431
|
+
sources.append((lf, filename, filepath.stem))
|
|
432
|
+
elif file_format == "parquet":
|
|
433
|
+
lf = pl.scan_parquet(filename)
|
|
434
|
+
sources.append((lf, filename, filepath.stem))
|
|
435
|
+
elif file_format == "json":
|
|
436
|
+
df = pl.read_json(filename)
|
|
437
|
+
sources.append((df, filename, filepath.stem))
|
|
438
|
+
elif file_format == "ndjson":
|
|
439
|
+
lf = pl.scan_ndjson(filename)
|
|
440
|
+
sources.append((lf, filename, filepath.stem))
|
|
441
|
+
else:
|
|
442
|
+
ext = filepath.suffix.lower()
|
|
443
|
+
if ext == ".csv":
|
|
444
|
+
file_format = "csv"
|
|
445
|
+
elif ext in (".xlsx", ".xls"):
|
|
446
|
+
file_format = "excel"
|
|
447
|
+
elif ext in (".tsv", ".tab"):
|
|
448
|
+
file_format = "tsv"
|
|
449
|
+
elif ext == ".parquet":
|
|
450
|
+
file_format = "parquet"
|
|
451
|
+
elif ext == ".json":
|
|
452
|
+
file_format = "json"
|
|
453
|
+
elif ext == ".ndjson":
|
|
454
|
+
file_format = "ndjson"
|
|
455
|
+
else:
|
|
456
|
+
# Default to TSV
|
|
457
|
+
file_format = "tsv"
|
|
458
|
+
|
|
459
|
+
sources.extend(load_file(filename, first_sheet, prefix_sheet, file_format, has_header))
|
|
460
|
+
|
|
461
|
+
return sources
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from textwrap import dedent
|
|
4
4
|
|
|
5
|
+
from textual.app import ComposeResult
|
|
5
6
|
from textual.containers import VerticalScroll
|
|
6
7
|
from textual.css.query import NoMatches
|
|
7
8
|
from textual.widget import Widget
|
|
@@ -19,8 +20,8 @@ class DataFrameHelpPanel(Widget):
|
|
|
19
20
|
DataFrameHelpPanel {
|
|
20
21
|
split: right;
|
|
21
22
|
width: 33%;
|
|
22
|
-
min-width:
|
|
23
|
-
max-width:
|
|
23
|
+
min-width: 40;
|
|
24
|
+
max-width: 80;
|
|
24
25
|
border-left: vkey $foreground 30%;
|
|
25
26
|
padding: 0 1;
|
|
26
27
|
height: 1fr;
|
|
@@ -68,7 +69,16 @@ class DataFrameHelpPanel(Widget):
|
|
|
68
69
|
|
|
69
70
|
DEFAULT_CLASSES = "-textual-system"
|
|
70
71
|
|
|
71
|
-
def on_mount(self):
|
|
72
|
+
def on_mount(self) -> None:
|
|
73
|
+
"""Set up help panel when mounted.
|
|
74
|
+
|
|
75
|
+
Initializes the help panel by setting up a watcher for focused widget changes
|
|
76
|
+
to dynamically update help text based on which widget has focus.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
None
|
|
80
|
+
"""
|
|
81
|
+
|
|
72
82
|
def update_help(focused_widget: Widget | None):
|
|
73
83
|
self.update_help(focused_widget)
|
|
74
84
|
|
|
@@ -94,5 +104,13 @@ class DataFrameHelpPanel(Widget):
|
|
|
94
104
|
except NoMatches:
|
|
95
105
|
pass
|
|
96
106
|
|
|
97
|
-
def compose(self):
|
|
107
|
+
def compose(self) -> ComposeResult:
|
|
108
|
+
"""Compose the help panel widget structure.
|
|
109
|
+
|
|
110
|
+
Creates and returns the widget hierarchy for the help panel,
|
|
111
|
+
including a VerticalScroll container with a Markdown display area.
|
|
112
|
+
|
|
113
|
+
Yields:
|
|
114
|
+
VerticalScroll: The main container with Markdown widget for help text.
|
|
115
|
+
"""
|
|
98
116
|
yield VerticalScroll(Markdown(id="widget-help"))
|