parqv 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parqv/__init__.py +31 -0
- parqv/app.py +97 -78
- parqv/cli.py +112 -0
- parqv/core/__init__.py +31 -0
- parqv/core/config.py +25 -0
- parqv/core/file_utils.py +88 -0
- parqv/core/handler_factory.py +89 -0
- parqv/core/logging.py +46 -0
- parqv/data_sources/__init__.py +44 -0
- parqv/data_sources/base/__init__.py +28 -0
- parqv/data_sources/base/exceptions.py +38 -0
- parqv/data_sources/base/handler.py +143 -0
- parqv/data_sources/formats/__init__.py +16 -0
- parqv/data_sources/formats/json.py +449 -0
- parqv/data_sources/formats/parquet.py +624 -0
- parqv/views/__init__.py +38 -0
- parqv/views/base.py +98 -0
- parqv/views/components/__init__.py +13 -0
- parqv/views/components/enhanced_data_table.py +152 -0
- parqv/views/components/error_display.py +72 -0
- parqv/views/components/loading_display.py +44 -0
- parqv/views/data_view.py +119 -46
- parqv/views/metadata_view.py +57 -13
- parqv/views/schema_view.py +197 -148
- parqv/views/utils/__init__.py +13 -0
- parqv/views/utils/data_formatters.py +162 -0
- parqv/views/utils/stats_formatters.py +160 -0
- parqv-0.2.1.dist-info/METADATA +104 -0
- parqv-0.2.1.dist-info/RECORD +34 -0
- {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/WHEEL +1 -1
- parqv/parquet_handler.py +0 -389
- parqv/views/row_group_view.py +0 -33
- parqv-0.1.0.dist-info/METADATA +0 -91
- parqv-0.1.0.dist-info/RECORD +0 -15
- {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/entry_points.txt +0 -0
- {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {parqv-0.1.0.dist-info → parqv-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,160 @@
|
|
1
|
+
"""
|
2
|
+
Statistics formatting utilities for parqv views.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import Any, Dict, List, Union
|
6
|
+
|
7
|
+
from rich.text import Text
|
8
|
+
|
9
|
+
|
10
|
+
def format_stats_for_display(stats_data: Dict[str, Any]) -> List[Union[str, Text]]:
|
11
|
+
"""
|
12
|
+
Format statistics dictionary for display as lines of rich text.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
stats_data: Raw statistics dictionary from handler
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
List of formatted lines ready for display
|
19
|
+
"""
|
20
|
+
if not stats_data:
|
21
|
+
return [Text.from_markup("[red]No statistics data available.[/red]")]
|
22
|
+
|
23
|
+
lines: List[Union[str, Text]] = []
|
24
|
+
|
25
|
+
# Extract basic column information
|
26
|
+
col_name = stats_data.get("column", "N/A")
|
27
|
+
col_type = stats_data.get("type", "Unknown")
|
28
|
+
nullable_val = stats_data.get("nullable")
|
29
|
+
|
30
|
+
# Format column header
|
31
|
+
lines.extend(_format_column_header(col_name, col_type, nullable_val))
|
32
|
+
|
33
|
+
# Handle calculation errors
|
34
|
+
calc_error = stats_data.get("error")
|
35
|
+
if calc_error:
|
36
|
+
lines.extend(_format_error_section(calc_error))
|
37
|
+
|
38
|
+
# Add informational messages
|
39
|
+
message = stats_data.get("message")
|
40
|
+
if message:
|
41
|
+
lines.extend(_format_message_section(message))
|
42
|
+
|
43
|
+
# Format calculated statistics
|
44
|
+
calculated = stats_data.get("calculated")
|
45
|
+
if calculated:
|
46
|
+
lines.extend(_format_calculated_stats(calculated, has_error=bool(calc_error)))
|
47
|
+
|
48
|
+
return lines
|
49
|
+
|
50
|
+
|
51
|
+
def format_column_info(column_name: str, column_type: str, nullable: Any) -> List[Union[str, Text]]:
|
52
|
+
"""
|
53
|
+
Format basic column information for display.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
column_name: Name of the column
|
57
|
+
column_type: Type of the column
|
58
|
+
nullable: Nullability information
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
List of formatted lines for column info
|
62
|
+
"""
|
63
|
+
return _format_column_header(column_name, column_type, nullable)
|
64
|
+
|
65
|
+
|
66
|
+
def _format_column_header(col_name: str, col_type: str, nullable_val: Any) -> List[Union[str, Text]]:
|
67
|
+
"""Format the column header section."""
|
68
|
+
# Determine nullability display
|
69
|
+
if nullable_val is True:
|
70
|
+
nullable_str = "Nullable"
|
71
|
+
elif nullable_val is False:
|
72
|
+
nullable_str = "Required"
|
73
|
+
else:
|
74
|
+
nullable_str = "Unknown Nullability"
|
75
|
+
|
76
|
+
lines = [
|
77
|
+
Text.assemble(("Column: ", "bold"), f"`{col_name}`"),
|
78
|
+
Text.assemble(("Type: ", "bold"), f"{col_type} ({nullable_str})"),
|
79
|
+
"─" * (len(col_name) + len(col_type) + 20)
|
80
|
+
]
|
81
|
+
|
82
|
+
return lines
|
83
|
+
|
84
|
+
|
85
|
+
def _format_error_section(calc_error: str) -> List[Union[str, Text]]:
|
86
|
+
"""Format the error section."""
|
87
|
+
return [
|
88
|
+
Text("Calculation Error:", style="bold red"),
|
89
|
+
f"```\n{calc_error}\n```",
|
90
|
+
""
|
91
|
+
]
|
92
|
+
|
93
|
+
|
94
|
+
def _format_message_section(message: str) -> List[Union[str, Text]]:
|
95
|
+
"""Format the informational message section."""
|
96
|
+
return [
|
97
|
+
Text(f"Info: {message}", style="italic cyan"),
|
98
|
+
""
|
99
|
+
]
|
100
|
+
|
101
|
+
|
102
|
+
def _format_calculated_stats(calculated: Dict[str, Any], has_error: bool = False) -> List[Union[str, Text]]:
|
103
|
+
"""Format the calculated statistics section."""
|
104
|
+
lines = [Text("Calculated Statistics:", style="bold")]
|
105
|
+
|
106
|
+
# Define the order of statistics to display
|
107
|
+
stats_order = [
|
108
|
+
"Total Count", "Valid Count", "Null Count", "Null Percentage",
|
109
|
+
"Distinct Count", "Distinct Values (Approx)",
|
110
|
+
"Min", "Max", "Mean", "Median (50%)", "StdDev", "Variance",
|
111
|
+
"True Count", "False Count",
|
112
|
+
"Value Counts"
|
113
|
+
]
|
114
|
+
|
115
|
+
found_stats = False
|
116
|
+
|
117
|
+
for key in stats_order:
|
118
|
+
if key in calculated:
|
119
|
+
found_stats = True
|
120
|
+
value = calculated[key]
|
121
|
+
lines.extend(_format_single_stat(key, value))
|
122
|
+
|
123
|
+
# Add any additional stats not in the predefined order
|
124
|
+
for key, value in calculated.items():
|
125
|
+
if key not in stats_order:
|
126
|
+
found_stats = True
|
127
|
+
lines.extend(_format_single_stat(key, value))
|
128
|
+
|
129
|
+
# Handle case where no stats were found
|
130
|
+
if not found_stats and not has_error:
|
131
|
+
lines.append(Text(" (No specific stats calculated for this type)", style="dim"))
|
132
|
+
|
133
|
+
return lines
|
134
|
+
|
135
|
+
|
136
|
+
def _format_single_stat(key: str, value: Any) -> List[Union[str, Text]]:
|
137
|
+
"""Format a single statistic entry."""
|
138
|
+
lines = []
|
139
|
+
|
140
|
+
if key == "Value Counts" and isinstance(value, dict):
|
141
|
+
lines.append(f" - {key}:")
|
142
|
+
for sub_key, sub_val in value.items():
|
143
|
+
sub_val_str = _format_stat_value(sub_val)
|
144
|
+
lines.append(f" - {sub_key}: {sub_val_str}")
|
145
|
+
else:
|
146
|
+
formatted_value = _format_stat_value(value)
|
147
|
+
lines.append(f" - {key}: {formatted_value}")
|
148
|
+
|
149
|
+
return lines
|
150
|
+
|
151
|
+
|
152
|
+
def _format_stat_value(value: Any) -> str:
|
153
|
+
"""Format a single statistic value."""
|
154
|
+
if isinstance(value, (int, float)):
|
155
|
+
if isinstance(value, int):
|
156
|
+
return f"{value:,}"
|
157
|
+
else:
|
158
|
+
return f"{value:,.4f}"
|
159
|
+
else:
|
160
|
+
return str(value)
|
@@ -0,0 +1,104 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: parqv
|
3
|
+
Version: 0.2.1
|
4
|
+
Summary: An interactive Python TUI for visualizing, exploring, and analyzing files directly in your terminal.
|
5
|
+
Author-email: Sangmin Yoon <sanspareilsmyn@gmail.com>
|
6
|
+
License-Expression: Apache-2.0
|
7
|
+
Requires-Python: >=3.10
|
8
|
+
Description-Content-Type: text/markdown
|
9
|
+
License-File: LICENSE
|
10
|
+
Requires-Dist: textual>=1.0.0
|
11
|
+
Requires-Dist: pyarrow>=16.0.0
|
12
|
+
Requires-Dist: pandas>=2.0.0
|
13
|
+
Requires-Dist: numpy>=1.20.0
|
14
|
+
Requires-Dist: duckdb>=1.2.0
|
15
|
+
Dynamic: license-file
|
16
|
+
|
17
|
+
# parqv
|
18
|
+
|
19
|
+
[](https://www.python.org/)
|
20
|
+
[](LICENSE)
|
21
|
+
[](https://badge.fury.io/py/parqv) <!-- TODO: Link after first PyPI release -->
|
22
|
+
[](https://textual.textualize.io/)
|
23
|
+
|
24
|
+
---
|
25
|
+
|
26
|
+
**Supported File Formats:** ✅ **Parquet** | ✅ **JSON** / **JSON Lines (ndjson)** | *(More planned!)*
|
27
|
+
|
28
|
+
---
|
29
|
+
|
30
|
+
**`parqv` is a Python-based interactive TUI (Text User Interface) tool designed to explore, analyze, and understand various data file formats directly within your terminal.** Initially supporting Parquet and JSON, `parqv` aims to provide a unified, visual experience for quick data inspection without leaving your console.
|
31
|
+
|
32
|
+
## 💻 Demo
|
33
|
+
|
34
|
+

|
35
|
+
*(Demo shows Parquet features; UI adapts for other formats)*
|
36
|
+
|
37
|
+
## 🤔 Why `parqv`?
|
38
|
+
1. **Unified Interface:** Launch `parqv <your_data_file>` to access **metadata, schema, data preview, and column statistics** all within a single, navigable terminal window. No more juggling different commands for different file types.
|
39
|
+
2. **Interactive Exploration:**
|
40
|
+
* **🖱️ Keyboard & Mouse Driven:** Navigate using familiar keys (arrows, `hjkl`, Tab) or even your mouse (thanks to `Textual`).
|
41
|
+
* **📜 Scrollable Views:** Easily scroll through large schemas, data tables, or column lists.
|
42
|
+
* **🌲 Clear Schema View:** Understand column names, data types, and nullability at a glance. (Complex nested structures visualization might vary by format).
|
43
|
+
* **📊 Dynamic Stats:** Select a column and instantly see its detailed statistics (counts, nulls, min/max, mean, distinct values, etc.).
|
44
|
+
3. **Cross-Format Consistency:**
|
45
|
+
* **🎨 Rich Display:** Leverages `rich` and `Textual` for colorful, readable tables and text across supported formats.
|
46
|
+
* **📈 Quick Stats:** Get key statistical insights consistently, regardless of the underlying file type.
|
47
|
+
* **🔌 Extensible:** Designed with a handler interface to easily add support for more file formats in the future (like CSV, Arrow IPC, etc.).
|
48
|
+
|
49
|
+
## ✨ Features (TUI Mode)
|
50
|
+
* **Multi-Format Support:** Currently supports **Parquet** (`.parquet`) and **JSON/JSON Lines** (`.json`, `.ndjson`). Run `parqv <your_file.{parquet,json,ndjson}>`.
|
51
|
+
* **Metadata Panel:** Displays key file information (path, format, size, total rows, column count, etc.). *Fields may vary slightly depending on the file format.*
|
52
|
+
* **Schema Explorer:**
|
53
|
+
* Interactive list view of columns.
|
54
|
+
* Clearly shows column names, data types, and nullability.
|
55
|
+
* **Data Table Viewer:**
|
56
|
+
* Scrollable table preview of the file's data.
|
57
|
+
* Attempts to preserve data types for better representation.
|
58
|
+
* **Column Statistics Viewer:**
|
59
|
+
* Select a column in the Schema tab to view detailed statistics.
|
60
|
+
* Shows counts (total, valid, null), percentages, and type-specific stats (min/max, mean, stddev, distinct counts, length stats, boolean value counts where applicable).
|
61
|
+
* **Row Group Inspector (Parquet Specific):**
|
62
|
+
* *This panel only appears when viewing Parquet files.*
|
63
|
+
* Lists row groups with stats (row count, compressed/uncompressed size).
|
64
|
+
* (Planned) Select a row group for more details.
|
65
|
+
|
66
|
+
## 🚀 Getting Started
|
67
|
+
|
68
|
+
**1. Prerequisites:**
|
69
|
+
* **Python:** Version 3.10 or higher.
|
70
|
+
* **pip:** The Python package installer.
|
71
|
+
|
72
|
+
**2. Install `parqv`:**
|
73
|
+
* Open your terminal and run:
|
74
|
+
```bash
|
75
|
+
pip install parqv
|
76
|
+
```
|
77
|
+
*(This will also install dependencies like `textual`, `pyarrow`, `pandas`, and `duckdb`)*
|
78
|
+
* **Updating `parqv`:**
|
79
|
+
```bash
|
80
|
+
pip install --upgrade parqv
|
81
|
+
```
|
82
|
+
|
83
|
+
**3. Run `parqv`:**
|
84
|
+
* Point `parqv` to your data file:
|
85
|
+
```bash
|
86
|
+
#parquet
|
87
|
+
parqv /path/to/your/data.parquet
|
88
|
+
|
89
|
+
# json
|
90
|
+
parqv /path/to/your/data.json
|
91
|
+
* The interactive TUI will launch. Use your keyboard (and mouse, if supported by your terminal) to navigate:
|
92
|
+
* **Arrow Keys / `j`,`k` (in lists):** Move selection up/down.
|
93
|
+
* **`Tab` / `Shift+Tab`:** Cycle focus between the main tab content and potentially other areas. (Focus handling might evolve).
|
94
|
+
* **`Enter` (in column list):** Select a column to view statistics.
|
95
|
+
* **View Switching:** Use `Ctrl+N` (Next Tab) and `Ctrl+P` (Previous Tab) or click on the tabs (Metadata, Schema, Data Preview).
|
96
|
+
* **Scrolling:** Use `PageUp` / `PageDown` / `Home` / `End` or arrow keys/mouse wheel within scrollable areas (like Schema stats or Data Preview).
|
97
|
+
* **`q` / `Ctrl+C`:** Quit `parqv`.
|
98
|
+
* *(Help Screen `?` is planned)*
|
99
|
+
|
100
|
+
---
|
101
|
+
|
102
|
+
## 📄 License
|
103
|
+
|
104
|
+
Licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE) for the full license text.
|
@@ -0,0 +1,34 @@
|
|
1
|
+
parqv/__init__.py,sha256=CcARikIb8knQqd3bGu6Y9exgSbzdywjdORz15VKKxmU,611
|
2
|
+
parqv/app.py,sha256=KXfL-RfMOOngzxn0uEeWa5UQgHRISBBl1IHK0ffQlzI,5556
|
3
|
+
parqv/cli.py,sha256=9KPOYywA53vUVp_5RI2lFgKtZay2EHosOsTo-sz4rOU,3242
|
4
|
+
parqv/parqv.css,sha256=C42ZXUwMX1ZXfGo0AmixbHxz0CWKzWBHZ_hkhq5aehg,2920
|
5
|
+
parqv/core/__init__.py,sha256=C8P-wqP72hk54qcjRNjDQ9X4hTu9eRttAyVynCAeDQw,791
|
6
|
+
parqv/core/config.py,sha256=4dAyTkqOnqoin8OdB7QGl7JYlWx0EGV_q8JgY0TnMuI,542
|
7
|
+
parqv/core/file_utils.py,sha256=m7d8wD9nxXCmfUU1b6IGBqjW0mBL2Nr40r9M8NI67FY,2374
|
8
|
+
parqv/core/handler_factory.py,sha256=xUj5vNOwLkEReL0x8CJbEbsqJ6XXm44w4NEx--TT8d4,3035
|
9
|
+
parqv/core/logging.py,sha256=fom0zPykBgn7zpW_vlvaCDlCERI1lccdjtBRMES94-A,1173
|
10
|
+
parqv/data_sources/__init__.py,sha256=7l7rA1Q4-gqPFnYRCpGnC4aZq7layQhXTAIitnUmQfM,873
|
11
|
+
parqv/data_sources/base/__init__.py,sha256=Alo4tGZJdUsfvGvDgpRodur3xn-SO7hgeODmhx6tA6U,524
|
12
|
+
parqv/data_sources/base/exceptions.py,sha256=QjTQqPw_LgEYchN2OjWCOkUuRV-Sn9I1J3gmoiS4ekA,802
|
13
|
+
parqv/data_sources/base/handler.py,sha256=I0Hmsf-WYhWyLGUHEV902iPS7Gg4dDkRPgyXOfGZSTQ,4429
|
14
|
+
parqv/data_sources/formats/__init__.py,sha256=XRaoyDCh03UIC7YwHJJiPcd5XZw_Yh6w3obD5CQ1Eog,308
|
15
|
+
parqv/data_sources/formats/json.py,sha256=bqspvndHTHIQevvXckfdeEnDCSjtl7eyv-PXJfXmaFg,19998
|
16
|
+
parqv/data_sources/formats/parquet.py,sha256=L8JA3pxjinNTZY3lMPKs9CwVO59fdwnkeSui5tQxrew,31580
|
17
|
+
parqv/views/__init__.py,sha256=aQAsH9akPMBVf7F0KNiELAtb-yoThQsR41nXk3G_mbQ,943
|
18
|
+
parqv/views/base.py,sha256=9kRsUjTXdxEoiy9gSlNmU1ksZsN7FkyXWepX4C0vBFQ,2948
|
19
|
+
parqv/views/data_view.py,sha256=Xk7XdXCN82AmH3hyrmjDAIGtc212zc1lrqE_2ijWKeY,4344
|
20
|
+
parqv/views/metadata_view.py,sha256=6qdLEyO0te1TMiVtTxq-L3PWTOxCtFkkrDH_4yaXPaw,2058
|
21
|
+
parqv/views/schema_view.py,sha256=r1ZHJZ7g20KT_EfGrh71AU0-gNj1Z4Z-WUDQCwOu3BY,8966
|
22
|
+
parqv/views/components/__init__.py,sha256=rBsX9UH67GCLn275LhG2Xyf84gRtrm-otuj6zW5JAiU,267
|
23
|
+
parqv/views/components/enhanced_data_table.py,sha256=57pJD9rT-kiy2llw4p5pZOReUtv5CZxN-2tD6e7QDaI,4952
|
24
|
+
parqv/views/components/error_display.py,sha256=Ak1AbT9dikkF3izVtVFxdRB0nas6jqjb5tPG3l-_JjQ,2444
|
25
|
+
parqv/views/components/loading_display.py,sha256=1lLxvt5oMBGDiojODTVQ6O_hVNFHCWqpWamk863BMkY,1440
|
26
|
+
parqv/views/utils/__init__.py,sha256=3PVlUMR-317CJmgvvT1iFk-mYZEETZz3H88g2e-7HAo,343
|
27
|
+
parqv/views/utils/data_formatters.py,sha256=3Kp77Ue3z-8M3BGetwxY7H3CocUeBoWaXr8tTXus_lg,4386
|
28
|
+
parqv/views/utils/stats_formatters.py,sha256=vbsVI-mi7ITyS0CGPIBx1k_IrGgZYvmMhbcN1JHkm-8,4890
|
29
|
+
parqv-0.2.1.dist-info/licenses/LICENSE,sha256=Ewl2wCa8r6ncxHlpf-ZZXb77c82zdfxHuEeKzBbm6nM,11324
|
30
|
+
parqv-0.2.1.dist-info/METADATA,sha256=q2ngRgL0aUrRrgwb45HqRuYAHJtgS--5PNlJVPCS3JA,5369
|
31
|
+
parqv-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
32
|
+
parqv-0.2.1.dist-info/entry_points.txt,sha256=8Tm8rTiIB-tbVItoOA4M7seEmFnrtK25BMH9UKzqfXg,44
|
33
|
+
parqv-0.2.1.dist-info/top_level.txt,sha256=_t3_49ZluJbvl0QU_P3GNVuXxCffqiTp37dzZIa2GEw,6
|
34
|
+
parqv-0.2.1.dist-info/RECORD,,
|