evolver-tools 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolver_tools/__init__.py +2 -0
- evolver_tools/__main__.py +3 -0
- evolver_tools/cli.py +89 -0
- evolver_tools/vendor/b64/__init__.py +2 -0
- evolver_tools/vendor/b64/b64.py +176 -0
- evolver_tools/vendor/cal_tool/__init__.py +1 -0
- evolver_tools/vendor/cal_tool/cli.py +234 -0
- evolver_tools/vendor/chart_cli/__init__.py +444 -0
- evolver_tools/vendor/chart_cli/__main__.py +3 -0
- evolver_tools/vendor/colors/__init__.py +5 -0
- evolver_tools/vendor/colors/__main__.py +97 -0
- evolver_tools/vendor/csv_stats/__init__.py +5 -0
- evolver_tools/vendor/csv_stats/__main__.py +4 -0
- evolver_tools/vendor/csv_stats/analyzer.py +258 -0
- evolver_tools/vendor/csv_stats/cli.py +45 -0
- evolver_tools/vendor/dirsize/__init__.py +183 -0
- evolver_tools/vendor/envcheck/__init__.py +426 -0
- evolver_tools/vendor/ff/__init__.py +427 -0
- evolver_tools/vendor/ff/__main__.py +3 -0
- evolver_tools/vendor/find_dups/__init__.py +7 -0
- evolver_tools/vendor/find_dups/cli.py +392 -0
- evolver_tools/vendor/hashsum/__init__.py +211 -0
- evolver_tools/vendor/hashsum/__main__.py +5 -0
- evolver_tools/vendor/http_live/__init__.py +265 -0
- evolver_tools/vendor/http_live/__main__.py +2 -0
- evolver_tools/vendor/ipinfo/__init__.py +3 -0
- evolver_tools/vendor/ipinfo/__main__.py +30 -0
- evolver_tools/vendor/jq_lite/__init__.py +257 -0
- evolver_tools/vendor/jq_lite/__main__.py +5 -0
- evolver_tools/vendor/json2csv/__init__.py +3 -0
- evolver_tools/vendor/json2csv/__main__.py +82 -0
- evolver_tools/vendor/jsonql/__init__.py +326 -0
- evolver_tools/vendor/jsonql/__main__.py +5 -0
- evolver_tools/vendor/license_cli/__init__.py +1 -0
- evolver_tools/vendor/license_cli/__main__.py +4 -0
- evolver_tools/vendor/license_cli/cli.py +289 -0
- evolver_tools/vendor/markdown_check/__init__.py +211 -0
- evolver_tools/vendor/nb/__init__.py +319 -0
- evolver_tools/vendor/nb/__main__.py +3 -0
- evolver_tools/vendor/passgen/__init__.py +224 -0
- evolver_tools/vendor/portcheck/__init__.py +2 -0
- evolver_tools/vendor/portcheck/__main__.py +66 -0
- evolver_tools/vendor/project_doctor/__init__.py +412 -0
- evolver_tools/vendor/project_doctor/__main__.py +3 -0
- evolver_tools/vendor/ren/__init__.py +283 -0
- evolver_tools/vendor/ren/__main__.py +3 -0
- evolver_tools/vendor/siege_lite/__init__.py +250 -0
- evolver_tools/vendor/siege_lite/__main__.py +3 -0
- evolver_tools/vendor/smellfinder/__init__.py +376 -0
- evolver_tools/vendor/smellfinder/__main__.py +3 -0
- evolver_tools/vendor/sqlite_cli/__init__.py +326 -0
- evolver_tools/vendor/sqlite_cli/__main__.py +5 -0
- evolver_tools/vendor/sysmon/__init__.py +299 -0
- evolver_tools/vendor/sysmon/__main__.py +3 -0
- evolver_tools/vendor/timer/__init__.py +127 -0
- evolver_tools/vendor/treedir/__init__.py +2 -0
- evolver_tools/vendor/treedir/__main__.py +128 -0
- evolver_tools/vendor/urlparse_tool/__init__.py +3 -0
- evolver_tools/vendor/urlparse_tool/cli.py +212 -0
- evolver_tools/vendor/web_summary/__init__.py +341 -0
- evolver_tools/vendor/web_summary/__main__.py +3 -0
- evolver_tools/vendor/wordcount/__init__.py +2 -0
- evolver_tools/vendor/wordcount/__main__.py +101 -0
- evolver_tools-1.4.0.dist-info/METADATA +107 -0
- evolver_tools-1.4.0.dist-info/RECORD +69 -0
- evolver_tools-1.4.0.dist-info/WHEEL +5 -0
- evolver_tools-1.4.0.dist-info/entry_points.txt +34 -0
- evolver_tools-1.4.0.dist-info/licenses/LICENSE +21 -0
- evolver_tools-1.4.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""csv-stats core analyzer — pure stdlib, zero dependencies."""
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import math
|
|
5
|
+
from collections import Counter
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def red(s): return f"\033[91m{s}\033[0m"
|
|
9
|
+
def green(s): return f"\033[92m{s}\033[0m"
|
|
10
|
+
def yellow(s): return f"\033[93m{s}\033[0m"
|
|
11
|
+
def cyan(s): return f"\033[96m{s}\033[0m"
|
|
12
|
+
def dim(s): return f"\033[2m{s}\033[0m"
|
|
13
|
+
def bold(s): return f"\033[1m{s}\033[0m"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def infer_type(values):
|
|
17
|
+
"""Infer column type from sample values."""
|
|
18
|
+
nums = []
|
|
19
|
+
for v in values:
|
|
20
|
+
if v == '' or v is None:
|
|
21
|
+
continue
|
|
22
|
+
try:
|
|
23
|
+
nums.append(float(v))
|
|
24
|
+
except (ValueError, TypeError):
|
|
25
|
+
return 'text'
|
|
26
|
+
if len(nums) == 0:
|
|
27
|
+
return 'text'
|
|
28
|
+
all_int = all(n == int(n) for n in nums)
|
|
29
|
+
return 'int' if all_int else 'float'
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def numeric_stats(values):
|
|
33
|
+
"""Compute numeric statistics."""
|
|
34
|
+
nums = []
|
|
35
|
+
for v in values:
|
|
36
|
+
if v == '' or v is None:
|
|
37
|
+
continue
|
|
38
|
+
try:
|
|
39
|
+
nums.append(float(v))
|
|
40
|
+
except (ValueError, TypeError):
|
|
41
|
+
pass
|
|
42
|
+
if not nums:
|
|
43
|
+
return None
|
|
44
|
+
n = len(nums)
|
|
45
|
+
mean = sum(nums) / n
|
|
46
|
+
variance = sum((x - mean) ** 2 for x in nums) / n
|
|
47
|
+
std = math.sqrt(variance)
|
|
48
|
+
sorted_nums = sorted(nums)
|
|
49
|
+
|
|
50
|
+
def percentile(p):
|
|
51
|
+
idx = int(n * p / 100)
|
|
52
|
+
return sorted_nums[min(idx, n - 1)]
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
'count': n, 'missing': len(values) - n,
|
|
56
|
+
'mean': round(mean, 2), 'std': round(std, 2),
|
|
57
|
+
'min': round(min(nums), 2), 'p25': round(percentile(25), 2),
|
|
58
|
+
'p50': round(percentile(50), 2), 'p75': round(percentile(75), 2),
|
|
59
|
+
'max': round(max(nums), 2), 'unique': len(set(nums)),
|
|
60
|
+
'sum': round(sum(nums), 2),
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def text_stats(values):
|
|
65
|
+
"""Compute text column statistics."""
|
|
66
|
+
non_empty = [v for v in values if v != '' and v is not None]
|
|
67
|
+
lens = [len(str(v)) for v in non_empty]
|
|
68
|
+
return {
|
|
69
|
+
'count': len(values), 'non_empty': len(non_empty),
|
|
70
|
+
'missing': len(values) - len(non_empty),
|
|
71
|
+
'unique': len(set(non_empty)),
|
|
72
|
+
'max_length': max(lens) if lens else 0,
|
|
73
|
+
'min_length': min(lens) if lens else 0,
|
|
74
|
+
'avg_length': round(sum(lens) / len(lens), 1) if lens else 0,
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def correlation(x_vals, y_vals):
|
|
79
|
+
"""Pearson correlation coefficient."""
|
|
80
|
+
x_nums, y_nums = [], []
|
|
81
|
+
for x, y in zip(x_vals, y_vals):
|
|
82
|
+
if x == '' or y == '' or x is None or y is None:
|
|
83
|
+
continue
|
|
84
|
+
try:
|
|
85
|
+
x_nums.append(float(x))
|
|
86
|
+
y_nums.append(float(y))
|
|
87
|
+
except (ValueError, TypeError):
|
|
88
|
+
pass
|
|
89
|
+
n = len(x_nums)
|
|
90
|
+
if n < 3:
|
|
91
|
+
return None
|
|
92
|
+
mean_x = sum(x_nums) / n
|
|
93
|
+
mean_y = sum(y_nums) / n
|
|
94
|
+
num = sum((x - mean_x) * (y - mean_y) for x, y in zip(x_nums, y_nums))
|
|
95
|
+
denom_x = math.sqrt(sum((x - mean_x) ** 2 for x in x_nums))
|
|
96
|
+
denom_y = math.sqrt(sum((y - mean_y) ** 2 for y in y_nums))
|
|
97
|
+
if denom_x == 0 or denom_y == 0:
|
|
98
|
+
return None
|
|
99
|
+
return round(num / (denom_x * denom_y), 4)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def draw_histogram(values, bins=10, width=30):
|
|
103
|
+
"""Draw a simple ASCII histogram."""
|
|
104
|
+
nums = []
|
|
105
|
+
for v in values:
|
|
106
|
+
if v == '' or v is None:
|
|
107
|
+
continue
|
|
108
|
+
try:
|
|
109
|
+
nums.append(float(v))
|
|
110
|
+
except (ValueError, TypeError):
|
|
111
|
+
pass
|
|
112
|
+
if not nums:
|
|
113
|
+
return []
|
|
114
|
+
min_v, max_v = min(nums), max(nums)
|
|
115
|
+
if min_v == max_v:
|
|
116
|
+
return [(f"{min_v:.1f}", len(nums), '█' * width)]
|
|
117
|
+
bucket_size = (max_v - min_v) / bins
|
|
118
|
+
buckets = [0] * bins
|
|
119
|
+
for n in nums:
|
|
120
|
+
idx = min(int((n - min_v) / bucket_size), bins - 1)
|
|
121
|
+
buckets[idx] += 1
|
|
122
|
+
max_count = max(buckets)
|
|
123
|
+
result = []
|
|
124
|
+
for i in range(bins):
|
|
125
|
+
label = f"{min_v + i * bucket_size:.1f}"
|
|
126
|
+
count = buckets[i]
|
|
127
|
+
bar_len = int(count / max_count * width) if max_count > 0 else 0
|
|
128
|
+
result.append((label, count, '█' * bar_len))
|
|
129
|
+
return result
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def analyze_csv(filepath, max_rows=None, delimiter=','):
|
|
133
|
+
"""Analyze a CSV file and return structured results."""
|
|
134
|
+
try:
|
|
135
|
+
with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
|
|
136
|
+
reader = csv.reader(f, delimiter=delimiter)
|
|
137
|
+
headers = next(reader, None)
|
|
138
|
+
if not headers:
|
|
139
|
+
return {'error': 'Empty file or missing header row'}
|
|
140
|
+
headers = [h.strip() for h in headers]
|
|
141
|
+
columns = {h: [] for h in headers}
|
|
142
|
+
total_rows = 0
|
|
143
|
+
for row in reader:
|
|
144
|
+
if max_rows and total_rows >= max_rows:
|
|
145
|
+
break
|
|
146
|
+
total_rows += 1
|
|
147
|
+
for i, h in enumerate(headers):
|
|
148
|
+
val = row[i].strip() if i < len(row) else ''
|
|
149
|
+
columns[h].append(val)
|
|
150
|
+
except Exception as e:
|
|
151
|
+
return {'error': str(e), 'file': str(filepath)}
|
|
152
|
+
|
|
153
|
+
n_rows = len(next(iter(columns.values())))
|
|
154
|
+
col_stats = {}
|
|
155
|
+
for h in headers:
|
|
156
|
+
vals = columns[h]
|
|
157
|
+
ct = infer_type(vals)
|
|
158
|
+
if ct in ('int', 'float'):
|
|
159
|
+
stats = numeric_stats(vals)
|
|
160
|
+
if stats:
|
|
161
|
+
stats['type'] = ct
|
|
162
|
+
stats['histogram'] = draw_histogram(vals)
|
|
163
|
+
col_stats[h] = stats
|
|
164
|
+
else:
|
|
165
|
+
ts = text_stats(vals)
|
|
166
|
+
ts['type'] = 'text'
|
|
167
|
+
counter = Counter(vals)
|
|
168
|
+
ts['top_values'] = counter.most_common(10)
|
|
169
|
+
col_stats[h] = ts
|
|
170
|
+
|
|
171
|
+
numeric_cols = [h for h in headers if col_stats[h] and col_stats[h].get('type') in ('int', 'float')]
|
|
172
|
+
corr_matrix = None
|
|
173
|
+
if len(numeric_cols) >= 2:
|
|
174
|
+
corr_matrix = {}
|
|
175
|
+
for i, c1 in enumerate(numeric_cols):
|
|
176
|
+
for c2 in numeric_cols[i+1:]:
|
|
177
|
+
r = correlation(columns[c1], columns[c2])
|
|
178
|
+
if r is not None:
|
|
179
|
+
corr_matrix[f"{c1} × {c2}"] = r
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
'file': str(filepath), 'rows': n_rows, 'columns': len(headers),
|
|
183
|
+
'headers': headers, 'col_stats': col_stats,
|
|
184
|
+
'correlations': corr_matrix,
|
|
185
|
+
'max_rows_reached': max_rows and n_rows >= max_rows,
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def print_report(result):
|
|
190
|
+
"""Print formatted analysis report to stdout."""
|
|
191
|
+
if 'error' in result:
|
|
192
|
+
print("\n" + red('✗ Error: ' + str(result.get('error', ''))))
|
|
193
|
+
return
|
|
194
|
+
|
|
195
|
+
print(f"\n{bold('📊 CSV Analysis Report')}")
|
|
196
|
+
print(f" {dim('File:')} {result['file']}")
|
|
197
|
+
print(f" {dim('Rows:')} {result['rows']:,}")
|
|
198
|
+
print(f" {dim('Columns:')} {result['columns']}")
|
|
199
|
+
if result.get('max_rows_reached'):
|
|
200
|
+
print(f" {yellow('⚠ Sample limit reached')}")
|
|
201
|
+
print()
|
|
202
|
+
|
|
203
|
+
for h in result['headers']:
|
|
204
|
+
stats = result['col_stats'].get(h)
|
|
205
|
+
if not stats:
|
|
206
|
+
continue
|
|
207
|
+
print(f" {bold(h)}")
|
|
208
|
+
if stats['type'] in ('int', 'float'):
|
|
209
|
+
print(f" {'Type':<12} {green(stats['type'])}")
|
|
210
|
+
print(f" {'Count':<12} {stats['count']:,}" +
|
|
211
|
+
(yellow(f' (+{stats["missing"]} missing)') if stats['missing'] else ""))
|
|
212
|
+
print(f" {'Mean':<12} {stats['mean']:,.2f}")
|
|
213
|
+
print(f" {'Std Dev':<12} {stats['std']:,.2f}")
|
|
214
|
+
print(f" {'Min':<12} {stats['min']:,.2f}")
|
|
215
|
+
print(f" {'P25':<12} {stats['p25']:,.2f}")
|
|
216
|
+
print(f" {'P50 (Median)':<12} {stats['p50']:,.2f}")
|
|
217
|
+
print(f" {'P75':<12} {stats['p75']:,.2f}")
|
|
218
|
+
print(f" {'Max':<12} {stats['max']:,.2f}")
|
|
219
|
+
if stats.get('sum') is not None:
|
|
220
|
+
print(f" {'Sum':<12} {stats['sum']:,.2f}")
|
|
221
|
+
print(f" {'Unique':<12} {stats['unique']}")
|
|
222
|
+
if stats.get('histogram'):
|
|
223
|
+
print(f" {'Distribution':<12}")
|
|
224
|
+
for label, count, bar in stats['histogram']:
|
|
225
|
+
print(f" {label:<8} {bar} {count}")
|
|
226
|
+
else:
|
|
227
|
+
missing = stats.get('missing', 0)
|
|
228
|
+
print(f" {'Type':<12} {cyan('text')}")
|
|
229
|
+
print(f" {'Total':<12} {stats['count']:,}" +
|
|
230
|
+
(yellow(f' (+{missing} missing)') if missing else ''))
|
|
231
|
+
print(f" {'Non-empty':<12} {stats['non_empty']:,}")
|
|
232
|
+
print(f" {'Unique':<12} {stats['unique']:,}")
|
|
233
|
+
print(f" {'Max Len':<12} {stats['max_length']} chars")
|
|
234
|
+
print(f" {'Min Len':<12} {stats['min_length']} chars")
|
|
235
|
+
print(f" {'Avg Len':<12} {stats['avg_length']} chars")
|
|
236
|
+
if stats.get('top_values'):
|
|
237
|
+
print(f" {'Top 10':<12}")
|
|
238
|
+
for val, cnt in stats['top_values']:
|
|
239
|
+
val_str = str(val)[:30]
|
|
240
|
+
bar_len = int(cnt / max(1, stats['count']) * 20)
|
|
241
|
+
print(f" {val_str:<32} {'█' * bar_len} {cnt}")
|
|
242
|
+
print()
|
|
243
|
+
|
|
244
|
+
if result.get('correlations'):
|
|
245
|
+
print(f" {bold('Correlation Matrix')}")
|
|
246
|
+
sorted_corr = sorted(result['correlations'].items(), key=lambda x: abs(x[1]), reverse=True)
|
|
247
|
+
for pair, coef in sorted_corr[:10]:
|
|
248
|
+
color = green if abs(coef) > 0.5 else (yellow if abs(coef) > 0.3 else dim)
|
|
249
|
+
bar_len = int(abs(coef) * 15)
|
|
250
|
+
bar = '█' * bar_len + '░' * (15 - bar_len)
|
|
251
|
+
sign = '+' if coef >= 0 else '−'
|
|
252
|
+
print(f" {pair:<40} {color(f'{sign}{abs(coef):.4f}')} {bar}")
|
|
253
|
+
print()
|
|
254
|
+
|
|
255
|
+
total_missing = sum(s.get('missing', 0) for s in result['col_stats'].values() if s)
|
|
256
|
+
if total_missing > 0:
|
|
257
|
+
print(f" {yellow(f'⚠ {total_missing:,} total missing values')}")
|
|
258
|
+
print()
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""csv-stats — CLI entry point."""
|
|
3
|
+
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from .analyzer import analyze_csv, print_report
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def main():
|
|
11
|
+
import argparse
|
|
12
|
+
parser = argparse.ArgumentParser(
|
|
13
|
+
description="csv-stats — CSV data analysis tool (zero dependencies)"
|
|
14
|
+
)
|
|
15
|
+
parser.add_argument("file", help="Path to CSV file")
|
|
16
|
+
parser.add_argument("-d", "--delimiter", default=",", help="Delimiter (default: ,)")
|
|
17
|
+
parser.add_argument("--max-rows", type=int, default=100000, help="Max rows to read")
|
|
18
|
+
parser.add_argument("--json", action="store_true", help="JSON output")
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"--sample",
|
|
21
|
+
type=int,
|
|
22
|
+
default=10000,
|
|
23
|
+
help="Only analyze first N rows (default: 10000, 0 = all)",
|
|
24
|
+
)
|
|
25
|
+
args = parser.parse_args()
|
|
26
|
+
|
|
27
|
+
if not Path(args.file).exists():
|
|
28
|
+
print(f"✗ File not found: {args.file}")
|
|
29
|
+
sys.exit(1)
|
|
30
|
+
|
|
31
|
+
max_rows = args.sample if args.sample > 0 else None
|
|
32
|
+
result = analyze_csv(args.file, max_rows=max_rows, delimiter=args.delimiter)
|
|
33
|
+
|
|
34
|
+
if args.json:
|
|
35
|
+
import json
|
|
36
|
+
for h, s in result.get("col_stats", {}).items():
|
|
37
|
+
if s and "histogram" in s:
|
|
38
|
+
del s["histogram"]
|
|
39
|
+
print(json.dumps(result, indent=2, ensure_ascii=False, default=str))
|
|
40
|
+
else:
|
|
41
|
+
print_report(result)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
if __name__ == "__main__":
|
|
45
|
+
main()
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""dirsize — 目录空间分析器 / Directory disk usage analyzer.
|
|
3
|
+
|
|
4
|
+
Zero-dependency CLI that shows directory sizes sorted with human-readable output.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import argparse
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def human_size(bytes_val: int) -> str:
|
|
13
|
+
"""Convert bytes to human-readable string."""
|
|
14
|
+
for unit in ("B", "KB", "MB", "GB", "TB"):
|
|
15
|
+
if abs(bytes_val) < 1024:
|
|
16
|
+
return f"{bytes_val:>7.1f} {unit}"
|
|
17
|
+
bytes_val /= 1024
|
|
18
|
+
return f"{bytes_val:>7.1f} PB"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def calc_size(path: str) -> tuple[int, int]:
|
|
22
|
+
"""Calculate total size and file count. Returns (bytes, file_count)."""
|
|
23
|
+
total = 0
|
|
24
|
+
count = 0
|
|
25
|
+
try:
|
|
26
|
+
for dirpath, dirnames, filenames in os.walk(path):
|
|
27
|
+
# Skip hidden dirs unless root
|
|
28
|
+
if dirpath != path:
|
|
29
|
+
dirnames[:] = [d for d in dirnames if not d.startswith(".")]
|
|
30
|
+
for f in filenames:
|
|
31
|
+
fp = os.path.join(dirpath, f)
|
|
32
|
+
try:
|
|
33
|
+
total += os.path.getsize(fp)
|
|
34
|
+
count += 1
|
|
35
|
+
except OSError:
|
|
36
|
+
pass
|
|
37
|
+
except PermissionError:
|
|
38
|
+
pass
|
|
39
|
+
return total, count
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def scan_directory(path: str, max_depth: int = 1, top_n: int = 20,
|
|
43
|
+
min_size: int = 0, show_hidden: bool = False) -> list:
|
|
44
|
+
"""Scan directory and return sorted entries."""
|
|
45
|
+
results = []
|
|
46
|
+
base_depth = path.rstrip(os.sep).count(os.sep)
|
|
47
|
+
|
|
48
|
+
for entry in os.scandir(path):
|
|
49
|
+
if entry.name.startswith(".") and not show_hidden:
|
|
50
|
+
continue
|
|
51
|
+
try:
|
|
52
|
+
if entry.is_dir(follow_symlinks=False):
|
|
53
|
+
depth = entry.path.rstrip(os.sep).count(os.sep) - base_depth
|
|
54
|
+
if depth < max_depth:
|
|
55
|
+
size, files = calc_size(entry.path)
|
|
56
|
+
else:
|
|
57
|
+
# Estimate: don't recurse into deeper dirs
|
|
58
|
+
size = 0
|
|
59
|
+
files = 0
|
|
60
|
+
for f in os.scandir(entry.path):
|
|
61
|
+
try:
|
|
62
|
+
size += os.path.getsize(f.path)
|
|
63
|
+
files += 1
|
|
64
|
+
except OSError:
|
|
65
|
+
pass
|
|
66
|
+
entry_type = "D"
|
|
67
|
+
else:
|
|
68
|
+
size = entry.stat().st_size
|
|
69
|
+
files = 0
|
|
70
|
+
entry_type = "F"
|
|
71
|
+
|
|
72
|
+
if size >= min_size:
|
|
73
|
+
results.append((entry.name, size, files, entry_type))
|
|
74
|
+
except OSError:
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
results.sort(key=lambda x: x[1], reverse=True)
|
|
78
|
+
return results[:top_n]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def entry():
|
|
82
|
+
args = parse_args()
|
|
83
|
+
|
|
84
|
+
if args.list:
|
|
85
|
+
# List mode: show directory contents sorted by size
|
|
86
|
+
results = scan_directory(
|
|
87
|
+
args.path,
|
|
88
|
+
max_depth=args.depth,
|
|
89
|
+
top_n=args.top,
|
|
90
|
+
min_size=args.min_size,
|
|
91
|
+
show_hidden=args.all,
|
|
92
|
+
)
|
|
93
|
+
if not results:
|
|
94
|
+
print(f"(empty or no entries >= {human_size(args.min_size)})")
|
|
95
|
+
return
|
|
96
|
+
|
|
97
|
+
# Find longest name for alignment
|
|
98
|
+
name_width = max(len(r[0]) for r in results)
|
|
99
|
+
name_width = min(name_width, 50)
|
|
100
|
+
name_width = max(name_width, 20)
|
|
101
|
+
|
|
102
|
+
print(f" TYPE {'NAME'.ljust(name_width)} {'SIZE'.rjust(10)} {'FILES'.rjust(5)}")
|
|
103
|
+
print(f" {'----' * ((name_width // 4) + 5)}")
|
|
104
|
+
total_size = 0
|
|
105
|
+
total_files = 0
|
|
106
|
+
for name, size, files, etype in results:
|
|
107
|
+
print(f" [{etype}] {name.ljust(name_width)} {human_size(size)} {files:>5}")
|
|
108
|
+
total_size += size
|
|
109
|
+
total_files += files
|
|
110
|
+
print(f" {'----' * ((name_width // 4) + 5)}")
|
|
111
|
+
print(f" Total: {human_size(total_size)} in {total_files} files")
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
# Single path mode
|
|
115
|
+
if not os.path.exists(args.path):
|
|
116
|
+
print(f"Error: path not found: {args.path}", file=sys.stderr)
|
|
117
|
+
sys.exit(1)
|
|
118
|
+
|
|
119
|
+
if os.path.isfile(args.path):
|
|
120
|
+
size = os.path.getsize(args.path)
|
|
121
|
+
print(f"{human_size(size)} {args.path}")
|
|
122
|
+
else:
|
|
123
|
+
size, files = calc_size(args.path)
|
|
124
|
+
print(f"{human_size(size)} {args.path} ({files} files)")
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def parse_args():
|
|
128
|
+
parser = argparse.ArgumentParser(
|
|
129
|
+
description="dirsize — 目录空间分析器",
|
|
130
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
131
|
+
epilog="""Examples:
|
|
132
|
+
dirsize # Show current dir size
|
|
133
|
+
dirsize /home # Show /home size
|
|
134
|
+
dirsize --list ~/Projects # List Projects contents sorted by size
|
|
135
|
+
dirsize --list --depth 2 --top 10 # Top 10 dirs, 2 levels deep
|
|
136
|
+
dirsize --list --min-size 1MB # Only entries >= 1 MB
|
|
137
|
+
dirsize --list --all # Include hidden dirs/files
|
|
138
|
+
""")
|
|
139
|
+
parser.add_argument("path", nargs="?", default=".",
|
|
140
|
+
help="Directory or file to analyze (default: .)")
|
|
141
|
+
parser.add_argument("-l", "--list", action="store_true",
|
|
142
|
+
help="List directory contents sorted by size")
|
|
143
|
+
parser.add_argument("-d", "--depth", type=int, default=1,
|
|
144
|
+
help="Max recursion depth for --list (default: 1)")
|
|
145
|
+
parser.add_argument("-t", "--top", type=int, default=20,
|
|
146
|
+
help="Number of entries to show (default: 20)")
|
|
147
|
+
parser.add_argument("-m", "--min-size", type=str, default="0",
|
|
148
|
+
help=f"Minimum size filter like 1MB, 500KB (default: 0)")
|
|
149
|
+
parser.add_argument("-a", "--all", action="store_true",
|
|
150
|
+
help="Include hidden files/dirs")
|
|
151
|
+
|
|
152
|
+
args = parser.parse_args()
|
|
153
|
+
|
|
154
|
+
# Parse min-size
|
|
155
|
+
if args.min_size and args.min_size != "0":
|
|
156
|
+
args.min_size = parse_size(args.min_size)
|
|
157
|
+
else:
|
|
158
|
+
args.min_size = 0
|
|
159
|
+
|
|
160
|
+
return args
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def parse_size(s: str) -> int:
|
|
164
|
+
"""Parse size string like '1MB', '500KB', '2GB' to bytes."""
|
|
165
|
+
s = s.strip().upper()
|
|
166
|
+
units = {"B": 1, "KB": 1024, "MB": 1024**2, "GB": 1024**3, "TB": 1024**4}
|
|
167
|
+
for suffix, multiplier in units.items():
|
|
168
|
+
if s.endswith(suffix):
|
|
169
|
+
try:
|
|
170
|
+
num = float(s[: -len(suffix)])
|
|
171
|
+
return int(num * multiplier)
|
|
172
|
+
except ValueError:
|
|
173
|
+
break
|
|
174
|
+
# Try parsing as plain number (bytes)
|
|
175
|
+
try:
|
|
176
|
+
return int(s)
|
|
177
|
+
except ValueError:
|
|
178
|
+
print(f"Warning: invalid size '{s}', using 0", file=sys.stderr)
|
|
179
|
+
return 0
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
if __name__ == "__main__":
|
|
183
|
+
entry()
|