inspecty 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspecty-0.1.0/PKG-INFO +137 -0
- inspecty-0.1.0/README.md +123 -0
- inspecty-0.1.0/inspecty/__init__.py +4 -0
- inspecty-0.1.0/inspecty/printdata.py +310 -0
- inspecty-0.1.0/inspecty.egg-info/PKG-INFO +137 -0
- inspecty-0.1.0/inspecty.egg-info/SOURCES.txt +9 -0
- inspecty-0.1.0/inspecty.egg-info/dependency_links.txt +1 -0
- inspecty-0.1.0/inspecty.egg-info/requires.txt +3 -0
- inspecty-0.1.0/inspecty.egg-info/top_level.txt +1 -0
- inspecty-0.1.0/setup.cfg +4 -0
- inspecty-0.1.0/setup.py +28 -0
inspecty-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: inspecty
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Universal data inspector — one function to understand any dataset
|
|
5
|
+
Home-page: https://github.com/manjur-ai/inspecty
|
|
6
|
+
Author: Manjur Alam
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Topic :: Scientific/Engineering
|
|
11
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
12
|
+
Requires-Python: >=3.9
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# inspecty
|
|
16
|
+
|
|
17
|
+
**Universal data inspector — one function to understand any dataset.**
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install inspecty
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Instead of `print()`, `type()`, `.info()`, `.describe()` — just run `ins.inspect(data)` and get a high-density statistical report for any data type.
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Supported data types
|
|
28
|
+
|
|
29
|
+
DataFrame, Series, NumPy array (1D/2D/3D), list, tuple, set, dictionary, JSON string, CSV file path, Excel file path — any Python object.
|
|
30
|
+
|
|
31
|
+
## Quick example
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import inspecty as ins
|
|
35
|
+
import pandas as pd
|
|
36
|
+
|
|
37
|
+
df = pd.DataFrame({
|
|
38
|
+
"price": [100.5, 101.2, 100.8, 102.1, 101.5],
|
|
39
|
+
"volume": [1000, 1500, 1200, 1800, 2000],
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
ins.inspect(df)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Output:
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
Data type: <class 'pandas.DataFrame'>
|
|
49
|
+
Total rows: 5. Total columns: 2. Summary:
|
|
50
|
+
|
|
51
|
+
column: price volume
|
|
52
|
+
dtype: float64 int64
|
|
53
|
+
- ----- -----
|
|
54
|
+
Row 0: 100.5 1000
|
|
55
|
+
Row 1: 101.2 1500
|
|
56
|
+
Row 2: 100.8 1200
|
|
57
|
+
---------- ----- -----
|
|
58
|
+
count: 5 5
|
|
59
|
+
max: 102.1 2000.0
|
|
60
|
+
min: 100.5 1000.0
|
|
61
|
+
mean: 101.22 1500.0
|
|
62
|
+
std: 0.622093 412.310563
|
|
63
|
+
median: 101.2 1500.0
|
|
64
|
+
mode: 100.5 1000.0
|
|
65
|
+
nan: 0 0
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## More examples
|
|
71
|
+
|
|
72
|
+
### Pandas Series
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
series = pd.Series([10, 20, 30, 40, 50], name="RSI")
|
|
76
|
+
ins.inspect(series)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Dictionary pivot
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
data = {
|
|
83
|
+
"Ticker": ["BTC", "ETH", "SOL"],
|
|
84
|
+
"Price": [62000.5, 3400.2, 145.1],
|
|
85
|
+
"Signal": ["Buy", "Hold", "Buy"],
|
|
86
|
+
}
|
|
87
|
+
ins.inspect(data)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### 3D NumPy tensor
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
tensor = np.random.randn(2, 100, 5) # (Tickers, Days, Features)
|
|
94
|
+
ins.inspect(tensor)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Auto-load files
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
ins.inspect("historical_prices.csv")
|
|
101
|
+
ins.inspect("data.xlsx")
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### JSON string
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
json_resp = '{"Symbol": ["AAPL", "TSLA"], "Price": [150.2, 700.5]}'
|
|
108
|
+
ins.inspect(json_resp)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### System info
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
ins.inspect("all_info")
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Parameters
|
|
120
|
+
|
|
121
|
+
| Param | Type | Default | Description |
|
|
122
|
+
|-------|------|---------|-------------|
|
|
123
|
+
| `df` | Any | required | Data object, file path, or JSON string |
|
|
124
|
+
| `count` | int | 3 | Rows shown from top & bottom |
|
|
125
|
+
| `silent` | bool | False | Hide headers, show only table |
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## Why not `df.describe()`?
|
|
130
|
+
|
|
131
|
+
`df.describe()` works only on numeric columns, hides NaN counts, and needs multiple calls for basic stats. `inspect()` handles mixed types, shows NaN explicitly, and works on any data structure — not just DataFrames.
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## License
|
|
136
|
+
|
|
137
|
+
MIT
|
inspecty-0.1.0/README.md
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# inspecty
|
|
2
|
+
|
|
3
|
+
**Universal data inspector — one function to understand any dataset.**
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
pip install inspecty
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
Instead of `print()`, `type()`, `.info()`, `.describe()` — just run `ins.inspect(data)` and get a high-density statistical report for any data type.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Supported data types
|
|
14
|
+
|
|
15
|
+
DataFrame, Series, NumPy array (1D/2D/3D), list, tuple, set, dictionary, JSON string, CSV file path, Excel file path — any Python object.
|
|
16
|
+
|
|
17
|
+
## Quick example
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
import inspecty as ins
|
|
21
|
+
import pandas as pd
|
|
22
|
+
|
|
23
|
+
df = pd.DataFrame({
|
|
24
|
+
"price": [100.5, 101.2, 100.8, 102.1, 101.5],
|
|
25
|
+
"volume": [1000, 1500, 1200, 1800, 2000],
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
ins.inspect(df)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Output:
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
Data type: <class 'pandas.DataFrame'>
|
|
35
|
+
Total rows: 5. Total columns: 2. Summary:
|
|
36
|
+
|
|
37
|
+
column: price volume
|
|
38
|
+
dtype: float64 int64
|
|
39
|
+
- ----- -----
|
|
40
|
+
Row 0: 100.5 1000
|
|
41
|
+
Row 1: 101.2 1500
|
|
42
|
+
Row 2: 100.8 1200
|
|
43
|
+
---------- ----- -----
|
|
44
|
+
count: 5 5
|
|
45
|
+
max: 102.1 2000.0
|
|
46
|
+
min: 100.5 1000.0
|
|
47
|
+
mean: 101.22 1500.0
|
|
48
|
+
std: 0.622093 412.310563
|
|
49
|
+
median: 101.2 1500.0
|
|
50
|
+
mode: 100.5 1000.0
|
|
51
|
+
nan: 0 0
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## More examples
|
|
57
|
+
|
|
58
|
+
### Pandas Series
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
series = pd.Series([10, 20, 30, 40, 50], name="RSI")
|
|
62
|
+
ins.inspect(series)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### Dictionary pivot
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
data = {
|
|
69
|
+
"Ticker": ["BTC", "ETH", "SOL"],
|
|
70
|
+
"Price": [62000.5, 3400.2, 145.1],
|
|
71
|
+
"Signal": ["Buy", "Hold", "Buy"],
|
|
72
|
+
}
|
|
73
|
+
ins.inspect(data)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 3D NumPy tensor
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
tensor = np.random.randn(2, 100, 5) # (Tickers, Days, Features)
|
|
80
|
+
ins.inspect(tensor)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Auto-load files
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
ins.inspect("historical_prices.csv")
|
|
87
|
+
ins.inspect("data.xlsx")
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### JSON string
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
json_resp = '{"Symbol": ["AAPL", "TSLA"], "Price": [150.2, 700.5]}'
|
|
94
|
+
ins.inspect(json_resp)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### System info
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
ins.inspect("all_info")
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## Parameters
|
|
106
|
+
|
|
107
|
+
| Param | Type | Default | Description |
|
|
108
|
+
|-------|------|---------|-------------|
|
|
109
|
+
| `df` | Any | required | Data object, file path, or JSON string |
|
|
110
|
+
| `count` | int | 3 | Rows shown from top & bottom |
|
|
111
|
+
| `silent` | bool | False | Hide headers, show only table |
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Why not `df.describe()`?
|
|
116
|
+
|
|
117
|
+
`df.describe()` works only on numeric columns, hides NaN counts, and needs multiple calls for basic stats. `inspect()` handles mixed types, shows NaN explicitly, and works on any data structure — not just DataFrames.
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## License
|
|
122
|
+
|
|
123
|
+
MIT
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import os
|
|
4
|
+
import json
|
|
5
|
+
import sys
|
|
6
|
+
import subprocess
|
|
7
|
+
import platform
|
|
8
|
+
import shutil
|
|
9
|
+
import importlib.metadata
|
|
10
|
+
from pandas.api.types import is_numeric_dtype
|
|
11
|
+
|
|
12
|
+
# ============================================================
|
|
13
|
+
# ENVIRONMENT INSPECTION (Silent Mode Enabled)
|
|
14
|
+
# ============================================================
|
|
15
|
+
def _inspect_all_info():
|
|
16
|
+
# Setup for silent subprocess execution on Windows (prevents CMD flicker)
|
|
17
|
+
startupinfo = None
|
|
18
|
+
if platform.system() == "Windows":
|
|
19
|
+
startupinfo = subprocess.STARTUPINFO()
|
|
20
|
+
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
|
21
|
+
startupinfo.wShowWindow = subprocess.SW_HIDE
|
|
22
|
+
|
|
23
|
+
print("\n========== SYSTEM ==========")
|
|
24
|
+
print("OS: ", platform.system(), platform.release())
|
|
25
|
+
print("Architecture:", platform.machine())
|
|
26
|
+
print("Processor: ", platform.processor())
|
|
27
|
+
|
|
28
|
+
# --- LIGHTWEIGHT GPU & CUDA DETECTION ---
|
|
29
|
+
print("\n========== GPU & CUDA ==========")
|
|
30
|
+
try:
|
|
31
|
+
gpu_raw = subprocess.check_output(
|
|
32
|
+
["nvidia-smi", "--query-gpu=gpu_name,memory.total,driver_version", "--format=csv,noheader,nounits"],
|
|
33
|
+
text=True,
|
|
34
|
+
startupinfo=startupinfo
|
|
35
|
+
).strip()
|
|
36
|
+
|
|
37
|
+
if gpu_raw:
|
|
38
|
+
name, mem, driver = gpu_raw.split(',')
|
|
39
|
+
print(f"Hardware: {name.strip()}")
|
|
40
|
+
print(f"Total Memory: {mem.strip()} MB")
|
|
41
|
+
print(f"Driver Version: {driver.strip()}")
|
|
42
|
+
|
|
43
|
+
cuda_info = subprocess.check_output(["nvidia-smi"], text=True, startupinfo=startupinfo)
|
|
44
|
+
for line in cuda_info.split('\n'):
|
|
45
|
+
if "CUDA Version:" in line:
|
|
46
|
+
v_part = line.split("CUDA Version:")[1].strip()
|
|
47
|
+
print(f"CUDA Version: {v_part.split(' ')[0]}")
|
|
48
|
+
break
|
|
49
|
+
except:
|
|
50
|
+
print("Status: No NVIDIA GPU or Driver detected via nvidia-smi.")
|
|
51
|
+
|
|
52
|
+
print("\n========== SCRIPT ==========")
|
|
53
|
+
print("Current Working Directory:", os.getcwd())
|
|
54
|
+
try:
|
|
55
|
+
print("Script Path: ", os.path.abspath(sys.argv[0]))
|
|
56
|
+
except: pass
|
|
57
|
+
|
|
58
|
+
# --- UPDATED ENVIRONMENT LOGIC (ml_env detection) ---
|
|
59
|
+
print("\n========== CURRENT ENVIRONMENT ==========")
|
|
60
|
+
is_venv = (hasattr(sys, 'real_prefix') or
|
|
61
|
+
(getattr(sys, 'base_prefix', sys.prefix) != sys.prefix))
|
|
62
|
+
|
|
63
|
+
if is_venv:
|
|
64
|
+
env_name = os.path.basename(sys.prefix)
|
|
65
|
+
else:
|
|
66
|
+
env_name = "Global System"
|
|
67
|
+
|
|
68
|
+
print("Name: ", env_name)
|
|
69
|
+
print("Path: ", sys.prefix)
|
|
70
|
+
print("Python: ", sys.version.split()[0])
|
|
71
|
+
print("Python Executable:", sys.executable)
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
pip_info = subprocess.check_output(
|
|
75
|
+
[sys.executable, "-m", "pip", "--version"],
|
|
76
|
+
text=True, startupinfo=startupinfo
|
|
77
|
+
).strip()
|
|
78
|
+
print("\nPip: ", pip_info)
|
|
79
|
+
except:
|
|
80
|
+
print("\nPip: Not detected")
|
|
81
|
+
|
|
82
|
+
print("\nInstalled Packages:")
|
|
83
|
+
try:
|
|
84
|
+
packages = sorted(
|
|
85
|
+
[(d.metadata["Name"], d.version) for d in importlib.metadata.distributions()],
|
|
86
|
+
key=lambda x: x[0].lower()
|
|
87
|
+
)
|
|
88
|
+
line = []
|
|
89
|
+
for name, ver in packages:
|
|
90
|
+
line.append(f"{name} {ver}")
|
|
91
|
+
if len(line) == 4:
|
|
92
|
+
print(" | ".join(line))
|
|
93
|
+
line = []
|
|
94
|
+
if line:
|
|
95
|
+
print(" | ".join(line))
|
|
96
|
+
except:
|
|
97
|
+
print("Could not read installed packages")
|
|
98
|
+
|
|
99
|
+
print("\n========== DEFAULT PYTHON (CMD) ==========")
|
|
100
|
+
try:
|
|
101
|
+
default_python = shutil.which("python")
|
|
102
|
+
if default_python:
|
|
103
|
+
print("python command ->", default_python)
|
|
104
|
+
try:
|
|
105
|
+
v = subprocess.check_output(
|
|
106
|
+
[default_python, "--version"],
|
|
107
|
+
text=True, startupinfo=startupinfo
|
|
108
|
+
).strip()
|
|
109
|
+
print("Version: ", v)
|
|
110
|
+
except: pass
|
|
111
|
+
else:
|
|
112
|
+
print("python command not found in PATH")
|
|
113
|
+
except: pass
|
|
114
|
+
|
|
115
|
+
print("\n========== OTHER PYTHON INSTALLATIONS ==========")
|
|
116
|
+
versions = set()
|
|
117
|
+
try:
|
|
118
|
+
result = subprocess.run(
|
|
119
|
+
["where", "python"],
|
|
120
|
+
capture_output=True, text=True, startupinfo=startupinfo
|
|
121
|
+
)
|
|
122
|
+
python_paths = list(set(result.stdout.splitlines()))
|
|
123
|
+
current_exe = os.path.abspath(sys.executable)
|
|
124
|
+
idx = 1
|
|
125
|
+
|
|
126
|
+
for path in python_paths:
|
|
127
|
+
if os.path.abspath(path) == current_exe:
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
print(f"\nEnvironment #{idx}")
|
|
131
|
+
print("Python Path:", path)
|
|
132
|
+
try:
|
|
133
|
+
v = subprocess.check_output(
|
|
134
|
+
[path, "--version"],
|
|
135
|
+
text=True, startupinfo=startupinfo
|
|
136
|
+
).strip()
|
|
137
|
+
print("Python: ", v)
|
|
138
|
+
versions.add(v)
|
|
139
|
+
except: pass
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
pipv = subprocess.check_output(
|
|
143
|
+
[path, "-m", "pip", "--version"],
|
|
144
|
+
text=True, startupinfo=startupinfo
|
|
145
|
+
).strip()
|
|
146
|
+
print("pip: ", pipv)
|
|
147
|
+
except:
|
|
148
|
+
print("pip: not installed")
|
|
149
|
+
|
|
150
|
+
# RESTORED PACKAGE LISTING FOR OTHER ENVIRONMENTS
|
|
151
|
+
print("\nInstalled Packages:")
|
|
152
|
+
try:
|
|
153
|
+
pkg_output = subprocess.check_output(
|
|
154
|
+
[path, "-m", "pip", "list", "--format=freeze"],
|
|
155
|
+
text=True, startupinfo=startupinfo
|
|
156
|
+
)
|
|
157
|
+
pkg_lines = sorted(pkg_output.strip().splitlines())
|
|
158
|
+
line = []
|
|
159
|
+
for p in pkg_lines:
|
|
160
|
+
name_ver = p.replace("==", " ")
|
|
161
|
+
line.append(name_ver)
|
|
162
|
+
if len(line) == 4:
|
|
163
|
+
print(" | ".join(line))
|
|
164
|
+
line = []
|
|
165
|
+
if line:
|
|
166
|
+
print(" | ".join(line))
|
|
167
|
+
except:
|
|
168
|
+
print("Could not read installed packages")
|
|
169
|
+
|
|
170
|
+
idx += 1
|
|
171
|
+
except:
|
|
172
|
+
print("Could not detect other python installations")
|
|
173
|
+
|
|
174
|
+
print("\n========== WARNINGS ==========")
|
|
175
|
+
if len(versions) > 1:
|
|
176
|
+
print("WARNING: multiple Python versions detected")
|
|
177
|
+
for v in versions:
|
|
178
|
+
print(" ", v)
|
|
179
|
+
|
|
180
|
+
print("\n========== TOOL ==========")
|
|
181
|
+
tool = "Terminal"
|
|
182
|
+
if "VSCODE_PID" in os.environ: tool = "VS Code"
|
|
183
|
+
elif "PYCHARM_HOSTED" in os.environ: tool = "PyCharm"
|
|
184
|
+
elif "ipykernel" in sys.modules: tool = "Jupyter"
|
|
185
|
+
elif "idlelib" in sys.modules: tool = "IDLE"
|
|
186
|
+
print("Running in:", tool)
|
|
187
|
+
|
|
188
|
+
# ============================================================
|
|
189
|
+
# ORIGINAL INSPECT FUNCTION
|
|
190
|
+
# ============================================================
|
|
191
|
+
def inspect(df, count=None, silent=False):
|
|
192
|
+
if isinstance(df, str) and df.lower() == "all_info":
|
|
193
|
+
_inspect_all_info()
|
|
194
|
+
return
|
|
195
|
+
|
|
196
|
+
if not silent:
|
|
197
|
+
print(f"\nData type: {type(df)}")
|
|
198
|
+
|
|
199
|
+
is_native_type = False
|
|
200
|
+
if isinstance(df, str):
|
|
201
|
+
if df.lower().endswith(('.csv', '.xlsx')):
|
|
202
|
+
if os.path.exists(df):
|
|
203
|
+
if not silent: print(f"File detected: '{df}'. Loading for inspection...")
|
|
204
|
+
try:
|
|
205
|
+
df = pd.read_csv(df) if df.lower().endswith('.csv') else pd.read_excel(df)
|
|
206
|
+
except Exception as e:
|
|
207
|
+
if not silent: print(f"Error loading file: {e}")
|
|
208
|
+
else:
|
|
209
|
+
if not silent: print(f"Path string detected, but file not found at: {df}")
|
|
210
|
+
elif (df.strip().startswith('{') and df.strip().endswith('}')) or \
|
|
211
|
+
(df.strip().startswith('[') and df.strip().endswith(']')):
|
|
212
|
+
try:
|
|
213
|
+
df = json.loads(df)
|
|
214
|
+
if not silent: print("JSON string detected. Parsed for inspection.")
|
|
215
|
+
except: pass
|
|
216
|
+
|
|
217
|
+
if isinstance(df, dict):
|
|
218
|
+
is_native_type = True
|
|
219
|
+
lengths = [len(v) for v in df.values() if isinstance(v, (list, tuple))]
|
|
220
|
+
all_lists = all(isinstance(v, (list, tuple)) for v in df.values())
|
|
221
|
+
if all_lists and len(set(lengths)) == 1 and len(lengths) > 0:
|
|
222
|
+
df = pd.DataFrame(df)
|
|
223
|
+
if not silent: print("Dictionary detected with equal-length lists. Dictionary keys used as Columns.")
|
|
224
|
+
else:
|
|
225
|
+
df = pd.Series(df).to_frame(name="value")
|
|
226
|
+
if not silent: print("Input is a dictionary. Keys moved to Index for inspection.")
|
|
227
|
+
elif isinstance(df, (list, tuple, set)):
|
|
228
|
+
is_native_type = True
|
|
229
|
+
original_type_name = type(df).__name__
|
|
230
|
+
df = pd.Series(list(df))
|
|
231
|
+
if not silent: print(f"Input is a {original_type_name}. Converted for inspection.")
|
|
232
|
+
|
|
233
|
+
if isinstance(df, (pd.DataFrame, pd.Series, np.ndarray)):
|
|
234
|
+
if isinstance(df, pd.Series):
|
|
235
|
+
series_name = df.name if df.name else "it_is_series"
|
|
236
|
+
new_df = df.to_frame(name=series_name)
|
|
237
|
+
if not silent:
|
|
238
|
+
prefix = "" if is_native_type else "It is a pandas Series. "
|
|
239
|
+
print(f"{prefix}Total items: {len(new_df)}. Summary:")
|
|
240
|
+
elif isinstance(df, np.ndarray):
|
|
241
|
+
numpy_new = df.copy()
|
|
242
|
+
if numpy_new.ndim == 1:
|
|
243
|
+
new_df = pd.DataFrame(numpy_new, columns=["numpy_array"])
|
|
244
|
+
if not silent: print(f"Input is a 1D numpy array (Shape: {numpy_new.shape}). Total items: {len(new_df)}. Summary:")
|
|
245
|
+
elif numpy_new.ndim == 2:
|
|
246
|
+
col_names = [f"col_{i}" for i in range(numpy_new.shape[1])]
|
|
247
|
+
new_df = pd.DataFrame(numpy_new, columns=col_names)
|
|
248
|
+
if not silent: print(f"Input is a 2D numpy array (Shape: {numpy_new.shape}). Total rows: {len(new_df)}. Total columns: {len(new_df.columns)}. Summary:")
|
|
249
|
+
elif numpy_new.ndim == 3:
|
|
250
|
+
depth, rows, cols = numpy_new.shape
|
|
251
|
+
new_df = pd.DataFrame(numpy_new.reshape(-1, cols), columns=[f"feature_{i}" for i in range(cols)])
|
|
252
|
+
if not silent: print(f"Input is a 3D numpy array (Shape: {numpy_new.shape}). Total items (flattened): {depth*rows}. Total columns: {cols}. Summary:")
|
|
253
|
+
else:
|
|
254
|
+
if not silent: print(f"Array is {numpy_new.ndim}D (Shape: {numpy_new.shape}). Default print:")
|
|
255
|
+
print(numpy_new); return
|
|
256
|
+
elif isinstance(df, pd.DataFrame):
|
|
257
|
+
new_df = df.copy()
|
|
258
|
+
if not silent:
|
|
259
|
+
prefix = "" if is_native_type else "It is a pandas DataFrame. "
|
|
260
|
+
print(f"{prefix}Total rows: {len(new_df)}. Total columns: {len(new_df.columns)}. Summary:")
|
|
261
|
+
|
|
262
|
+
def safe_stat(col, func_name):
|
|
263
|
+
try:
|
|
264
|
+
if func_name == 'max':
|
|
265
|
+
val = col.max()
|
|
266
|
+
return val.strftime('%Y-%m-%d %H:%M:%S') if pd.api.types.is_datetime64_any_dtype(col) else val
|
|
267
|
+
if func_name == 'min':
|
|
268
|
+
val = col.min()
|
|
269
|
+
return val.strftime('%Y-%m-%d %H:%M:%S') if pd.api.types.is_datetime64_any_dtype(col) else val
|
|
270
|
+
if func_name == 'mean': return col.mean() if is_numeric_dtype(col) else "-"
|
|
271
|
+
if func_name == 'std': return col.std() if is_numeric_dtype(col) else "-"
|
|
272
|
+
if func_name == 'median': return col.median() if is_numeric_dtype(col) else "-"
|
|
273
|
+
if func_name == 'mode':
|
|
274
|
+
m = col.mode()
|
|
275
|
+
return m.iloc[0] if not m.empty else "-"
|
|
276
|
+
except: return "-"
|
|
277
|
+
return "-"
|
|
278
|
+
|
|
279
|
+
row_to_print = count if count else 3
|
|
280
|
+
if new_df.empty: print("The DataFrame is empty."); return
|
|
281
|
+
if not is_numeric_dtype(new_df.index) or (len(new_df) > 0 and new_df.index[-1] != (len(new_df) - 1)):
|
|
282
|
+
new_df.reset_index(drop=False, inplace=True)
|
|
283
|
+
|
|
284
|
+
headers = new_df.columns
|
|
285
|
+
dashs = ['-----'] * len(headers)
|
|
286
|
+
summary_rows = [["dtype:", *new_df.dtypes.astype(str)], ["-", *dashs]]
|
|
287
|
+
|
|
288
|
+
first_rows = new_df.head(min(row_to_print, len(new_df)))
|
|
289
|
+
for i, (_, row) in enumerate(first_rows.iterrows()):
|
|
290
|
+
summary_rows.append([f"Row {i}:", *row.tolist()])
|
|
291
|
+
|
|
292
|
+
if len(new_df) > (row_to_print * 2):
|
|
293
|
+
summary_rows.append(["--", *dashs])
|
|
294
|
+
last_rows = new_df.tail(row_to_print)
|
|
295
|
+
for i, (_, row) in enumerate(last_rows.iterrows()):
|
|
296
|
+
summary_rows.append([f"Row {len(new_df)-row_to_print+i}:", *row.tolist()])
|
|
297
|
+
|
|
298
|
+
summary_rows.append(["----------", *dashs])
|
|
299
|
+
summary_rows.append(["count:", *new_df.count().tolist()])
|
|
300
|
+
summary_rows.append(["max:", *new_df.apply(lambda c: safe_stat(c, 'max')).tolist()])
|
|
301
|
+
summary_rows.append(["min:", *new_df.apply(lambda c: safe_stat(c, 'min')).tolist()])
|
|
302
|
+
summary_rows.append(["mean:", *new_df.apply(lambda c: safe_stat(c, 'mean')).tolist()])
|
|
303
|
+
summary_rows.append(["std:", *new_df.apply(lambda c: safe_stat(c, 'std')).tolist()])
|
|
304
|
+
summary_rows.append(["median:", *new_df.apply(lambda c: safe_stat(c, 'median')).tolist()])
|
|
305
|
+
summary_rows.append(["mode:", *new_df.apply(lambda c: safe_stat(c, 'mode')).tolist()])
|
|
306
|
+
summary_rows.append(["nan:", *new_df.isna().sum().tolist()])
|
|
307
|
+
|
|
308
|
+
print(pd.DataFrame(summary_rows, columns=["column:", *headers]).to_string(index=False))
|
|
309
|
+
else:
|
|
310
|
+
print(df)
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: inspecty
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Universal data inspector — one function to understand any dataset
|
|
5
|
+
Home-page: https://github.com/manjur-ai/inspecty
|
|
6
|
+
Author: Manjur Alam
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Topic :: Scientific/Engineering
|
|
11
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
12
|
+
Requires-Python: >=3.9
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# inspecty
|
|
16
|
+
|
|
17
|
+
**Universal data inspector — one function to understand any dataset.**
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install inspecty
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Instead of `print()`, `type()`, `.info()`, `.describe()` — just run `ins.inspect(data)` and get a high-density statistical report for any data type.
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Supported data types
|
|
28
|
+
|
|
29
|
+
DataFrame, Series, NumPy array (1D/2D/3D), list, tuple, set, dictionary, JSON string, CSV file path, Excel file path — any Python object.
|
|
30
|
+
|
|
31
|
+
## Quick example
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import inspecty as ins
|
|
35
|
+
import pandas as pd
|
|
36
|
+
|
|
37
|
+
df = pd.DataFrame({
|
|
38
|
+
"price": [100.5, 101.2, 100.8, 102.1, 101.5],
|
|
39
|
+
"volume": [1000, 1500, 1200, 1800, 2000],
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
ins.inspect(df)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Output:
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
Data type: <class 'pandas.DataFrame'>
|
|
49
|
+
Total rows: 5. Total columns: 2. Summary:
|
|
50
|
+
|
|
51
|
+
column: price volume
|
|
52
|
+
dtype: float64 int64
|
|
53
|
+
- ----- -----
|
|
54
|
+
Row 0: 100.5 1000
|
|
55
|
+
Row 1: 101.2 1500
|
|
56
|
+
Row 2: 100.8 1200
|
|
57
|
+
---------- ----- -----
|
|
58
|
+
count: 5 5
|
|
59
|
+
max: 102.1 2000.0
|
|
60
|
+
min: 100.5 1000.0
|
|
61
|
+
mean: 101.22 1500.0
|
|
62
|
+
std: 0.622093 412.310563
|
|
63
|
+
median: 101.2 1500.0
|
|
64
|
+
mode: 100.5 1000.0
|
|
65
|
+
nan: 0 0
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## More examples
|
|
71
|
+
|
|
72
|
+
### Pandas Series
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
series = pd.Series([10, 20, 30, 40, 50], name="RSI")
|
|
76
|
+
ins.inspect(series)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Dictionary pivot
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
data = {
|
|
83
|
+
"Ticker": ["BTC", "ETH", "SOL"],
|
|
84
|
+
"Price": [62000.5, 3400.2, 145.1],
|
|
85
|
+
"Signal": ["Buy", "Hold", "Buy"],
|
|
86
|
+
}
|
|
87
|
+
ins.inspect(data)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### 3D NumPy tensor
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
tensor = np.random.randn(2, 100, 5) # (Tickers, Days, Features)
|
|
94
|
+
ins.inspect(tensor)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Auto-load files
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
ins.inspect("historical_prices.csv")
|
|
101
|
+
ins.inspect("data.xlsx")
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### JSON string
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
json_resp = '{"Symbol": ["AAPL", "TSLA"], "Price": [150.2, 700.5]}'
|
|
108
|
+
ins.inspect(json_resp)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### System info
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
ins.inspect("all_info")
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Parameters
|
|
120
|
+
|
|
121
|
+
| Param | Type | Default | Description |
|
|
122
|
+
|-------|------|---------|-------------|
|
|
123
|
+
| `df` | Any | required | Data object, file path, or JSON string |
|
|
124
|
+
| `count` | int | 3 | Rows shown from top & bottom |
|
|
125
|
+
| `silent` | bool | False | Hide headers, show only table |
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## Why not `df.describe()`?
|
|
130
|
+
|
|
131
|
+
`df.describe()` works only on numeric columns, hides NaN counts, and needs multiple calls for basic stats. `inspect()` handles mixed types, shows NaN explicitly, and works on any data structure — not just DataFrames.
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## License
|
|
136
|
+
|
|
137
|
+
MIT
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
inspecty
|
inspecty-0.1.0/setup.cfg
ADDED
inspecty-0.1.0/setup.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
with open("README.md", "r", encoding="utf-8") as fh:
|
|
4
|
+
long_description = fh.read()
|
|
5
|
+
|
|
6
|
+
setup(
|
|
7
|
+
name="inspecty",
|
|
8
|
+
version="0.1.0",
|
|
9
|
+
author="Manjur Alam",
|
|
10
|
+
description="Universal data inspector — one function to understand any dataset",
|
|
11
|
+
long_description=long_description,
|
|
12
|
+
long_description_content_type="text/markdown",
|
|
13
|
+
url="https://github.com/manjur-ai/inspecty",
|
|
14
|
+
packages=find_packages(),
|
|
15
|
+
install_requires=[
|
|
16
|
+
"numpy>=1.21.0",
|
|
17
|
+
"pandas>=1.3.0",
|
|
18
|
+
"openpyxl>=3.0.0",
|
|
19
|
+
],
|
|
20
|
+
classifiers=[
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"License :: OSI Approved :: MIT License",
|
|
23
|
+
"Operating System :: OS Independent",
|
|
24
|
+
"Topic :: Scientific/Engineering",
|
|
25
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
26
|
+
],
|
|
27
|
+
python_requires='>=3.9',
|
|
28
|
+
)
|