codesize 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codesize/__init__.py +24 -0
- codesize/__main__.py +28 -0
- codesize/_version.py +1 -0
- codesize/argbuilder.py +1426 -0
- codesize/cli.py +169 -0
- codesize/output.py +281 -0
- codesize/scanner.py +237 -0
- codesize/version.py +35 -0
- codesize-1.0.0.dist-info/METADATA +109 -0
- codesize-1.0.0.dist-info/RECORD +13 -0
- codesize-1.0.0.dist-info/WHEEL +4 -0
- codesize-1.0.0.dist-info/entry_points.txt +2 -0
- codesize-1.0.0.dist-info/licenses/LICENSE +24 -0
codesize/cli.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------
|
|
2
|
+
# cli.py
|
|
3
|
+
# ------
|
|
4
|
+
#
|
|
5
|
+
# Command-line interface entry point for codesize. Supports Python, Go,
|
|
6
|
+
# Rust, and Elixir.
|
|
7
|
+
#
|
|
8
|
+
# (c) 2026 WaterJuice — Unlicense; see LICENSE in the project root.
|
|
9
|
+
#
|
|
10
|
+
# Authors
|
|
11
|
+
# -------
|
|
12
|
+
# bena (via Claude)
|
|
13
|
+
#
|
|
14
|
+
# Version History
|
|
15
|
+
# ---------------
|
|
16
|
+
# Mar 2026 - Created
|
|
17
|
+
# ----------------------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
# ----------------------------------------------------------------------------------------
|
|
20
|
+
# Imports
|
|
21
|
+
# ----------------------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
import sys
|
|
24
|
+
import traceback
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from codesize.output import display_stats
|
|
27
|
+
from codesize.scanner import scan_directory
|
|
28
|
+
from codesize.version import VERSION_STR
|
|
29
|
+
from .argbuilder import ArgsParser
|
|
30
|
+
from .argbuilder import Namespace
|
|
31
|
+
|
|
32
|
+
# ----------------------------------------------------------------------------------------
|
|
33
|
+
# Constants
|
|
34
|
+
# ----------------------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
LICENSE_TEXT = """\
|
|
37
|
+
This is free and unencumbered software released into the public domain.
|
|
38
|
+
|
|
39
|
+
Anyone is free to copy, modify, publish, use, compile, sell, or
|
|
40
|
+
distribute this software, either in source code form or as a compiled
|
|
41
|
+
binary, for any purpose, commercial or non-commercial, and by any
|
|
42
|
+
means.
|
|
43
|
+
|
|
44
|
+
In jurisdictions that recognize copyright laws, the author or authors
|
|
45
|
+
of this software dedicate any and all copyright interest in the
|
|
46
|
+
software to the public domain. We make this dedication for the benefit
|
|
47
|
+
of the public at large and to the detriment of our heirs and
|
|
48
|
+
successors. We intend this dedication to be an overt act of
|
|
49
|
+
relinquishment in perpetuity of all present and future rights to this
|
|
50
|
+
software under copyright law.
|
|
51
|
+
|
|
52
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
53
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
54
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
55
|
+
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
56
|
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
57
|
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
58
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
|
59
|
+
|
|
60
|
+
For more information, please refer to <https://unlicense.org>"""
|
|
61
|
+
|
|
62
|
+
# ----------------------------------------------------------------------------------------
|
|
63
|
+
# Functions
|
|
64
|
+
# ----------------------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ----------------------------------------------------------------------------------------
|
|
68
|
+
def parse_args(argv: list[str]) -> Namespace:
|
|
69
|
+
"""Parse command-line arguments."""
|
|
70
|
+
p = ArgsParser(
|
|
71
|
+
prog="codesize",
|
|
72
|
+
description=(
|
|
73
|
+
f"codesize: {VERSION_STR}\n"
|
|
74
|
+
"(c) 2026 WaterJuice. Unlicense.\n\n"
|
|
75
|
+
"Analyse code statistics in a directory.\n"
|
|
76
|
+
"Supports Python, Go, Rust, and Elixir."
|
|
77
|
+
),
|
|
78
|
+
version=f"codesize: {VERSION_STR}\npython: {sys.version.split()[0]}",
|
|
79
|
+
)
|
|
80
|
+
p.add_argument(
|
|
81
|
+
"directory",
|
|
82
|
+
nargs="?",
|
|
83
|
+
default=".",
|
|
84
|
+
help="Directory to scan (default: current directory)",
|
|
85
|
+
)
|
|
86
|
+
p.add_argument(
|
|
87
|
+
"--license",
|
|
88
|
+
action="version",
|
|
89
|
+
version=LICENSE_TEXT,
|
|
90
|
+
help="show license and exit",
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Output format options (mutually exclusive)
|
|
94
|
+
format_group = p.add_mutex_group()
|
|
95
|
+
format_group.add_argument(
|
|
96
|
+
"--plain",
|
|
97
|
+
action="store_true",
|
|
98
|
+
help="Plain text output (no box or colours)",
|
|
99
|
+
)
|
|
100
|
+
format_group.add_argument(
|
|
101
|
+
"--json",
|
|
102
|
+
action="store_true",
|
|
103
|
+
help="JSON output",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
return p.parse(argv)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# ----------------------------------------------------------------------------------------
|
|
110
|
+
def main(argv: list[str] | None = None) -> int:
|
|
111
|
+
"""
|
|
112
|
+
Main entry point for the codesize CLI.
|
|
113
|
+
|
|
114
|
+
Parameters:
|
|
115
|
+
argv: Command line arguments (without program name). If None, uses sys.argv[1:].
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Exit code (0 for success, non-zero for error)
|
|
119
|
+
"""
|
|
120
|
+
if argv is None:
|
|
121
|
+
argv = sys.argv[1:]
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
return _main_inner(argv)
|
|
125
|
+
except KeyboardInterrupt:
|
|
126
|
+
print()
|
|
127
|
+
print("---- Manually Terminated ----")
|
|
128
|
+
print()
|
|
129
|
+
return 1
|
|
130
|
+
except SystemExit:
|
|
131
|
+
raise
|
|
132
|
+
except BaseException as e:
|
|
133
|
+
t = "-----------------------------------------------------------------------------\n"
|
|
134
|
+
t += "UNHANDLED EXCEPTION OCCURRED!!\n"
|
|
135
|
+
t += "\n"
|
|
136
|
+
t += traceback.format_exc()
|
|
137
|
+
t += "\n"
|
|
138
|
+
t += f"EXCEPTION: {type(e)} {e}\n"
|
|
139
|
+
t += "-----------------------------------------------------------------------------\n"
|
|
140
|
+
t += "\n"
|
|
141
|
+
print(t, file=sys.stderr)
|
|
142
|
+
return 1
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# ----------------------------------------------------------------------------------------
|
|
146
|
+
def _main_inner(argv: list[str]) -> int:
|
|
147
|
+
"""Inner main function that does the actual work."""
|
|
148
|
+
args = parse_args(argv)
|
|
149
|
+
directory = Path(args.directory).resolve()
|
|
150
|
+
|
|
151
|
+
if not directory.exists():
|
|
152
|
+
print(f"Error: Directory '{directory}' does not exist.", file=sys.stderr)
|
|
153
|
+
return 1
|
|
154
|
+
|
|
155
|
+
if not directory.is_dir():
|
|
156
|
+
print(f"Error: '{directory}' is not a directory.", file=sys.stderr)
|
|
157
|
+
return 1
|
|
158
|
+
|
|
159
|
+
# Determine output format
|
|
160
|
+
if args.json:
|
|
161
|
+
output_format = "json"
|
|
162
|
+
elif args.plain:
|
|
163
|
+
output_format = "plain"
|
|
164
|
+
else:
|
|
165
|
+
output_format = "box"
|
|
166
|
+
|
|
167
|
+
stats = scan_directory(directory)
|
|
168
|
+
display_stats(stats, directory, output_format)
|
|
169
|
+
return 0
|
codesize/output.py
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------
|
|
2
|
+
# output.py
|
|
3
|
+
# ---------
|
|
4
|
+
#
|
|
5
|
+
# Output formatting with ASCII box and optional ANSI colour support.
|
|
6
|
+
# Displays per-language breakdown and totals.
|
|
7
|
+
#
|
|
8
|
+
# (c) 2026 WaterJuice — Unlicense; see LICENSE in the project root.
|
|
9
|
+
#
|
|
10
|
+
# Authors
|
|
11
|
+
# -------
|
|
12
|
+
# bena (via Claude)
|
|
13
|
+
#
|
|
14
|
+
# Version History
|
|
15
|
+
# ---------------
|
|
16
|
+
# Mar 2026 - Created
|
|
17
|
+
# ----------------------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
# ----------------------------------------------------------------------------------------
|
|
20
|
+
# Imports
|
|
21
|
+
# ----------------------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
import json
|
|
24
|
+
import sys
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Literal
|
|
27
|
+
from codesize.scanner import CodeStats
|
|
28
|
+
from codesize.scanner import LangStats
|
|
29
|
+
|
|
30
|
+
# ----------------------------------------------------------------------------------------
|
|
31
|
+
# Constants
|
|
32
|
+
# ----------------------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
# ANSI colour codes
|
|
35
|
+
RESET = "\033[0m"
|
|
36
|
+
BOLD = "\033[1m"
|
|
37
|
+
CYAN = "\033[36m"
|
|
38
|
+
GREEN = "\033[32m"
|
|
39
|
+
YELLOW = "\033[33m"
|
|
40
|
+
MAGENTA = "\033[35m"
|
|
41
|
+
|
|
42
|
+
# Column colours (Files, Size, Lines, Code)
|
|
43
|
+
COL_COLOURS = (CYAN, GREEN, YELLOW, MAGENTA)
|
|
44
|
+
|
|
45
|
+
# Box drawing characters
|
|
46
|
+
BOX_TOP_LEFT = "╭"
|
|
47
|
+
BOX_TOP_RIGHT = "╮"
|
|
48
|
+
BOX_BOTTOM_LEFT = "╰"
|
|
49
|
+
BOX_BOTTOM_RIGHT = "╯"
|
|
50
|
+
BOX_HORIZONTAL = "─"
|
|
51
|
+
BOX_VERTICAL = "│"
|
|
52
|
+
BOX_TEE_LEFT = "├"
|
|
53
|
+
BOX_TEE_RIGHT = "┤"
|
|
54
|
+
|
|
55
|
+
# ----------------------------------------------------------------------------------------
|
|
56
|
+
# Functions
|
|
57
|
+
# ----------------------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# ----------------------------------------------------------------------------------------
|
|
61
|
+
def use_colour() -> bool:
|
|
62
|
+
"""Determine if ANSI colour should be used."""
|
|
63
|
+
return sys.stdout.isatty()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# ----------------------------------------------------------------------------------------
|
|
67
|
+
def colour(text: str, *codes: str) -> str:
|
|
68
|
+
"""Apply ANSI colour codes to text if output is a terminal."""
|
|
69
|
+
if not use_colour():
|
|
70
|
+
return text
|
|
71
|
+
return "".join(codes) + text + RESET
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ----------------------------------------------------------------------------------------
|
|
75
|
+
def format_size(kb: float) -> str:
|
|
76
|
+
"""Format size with appropriate unit."""
|
|
77
|
+
if kb >= 1024:
|
|
78
|
+
return f"{kb / 1024:.2f} MB"
|
|
79
|
+
return f"{kb:.2f} KB"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# ----------------------------------------------------------------------------------------
|
|
83
|
+
def format_number(n: int) -> str:
|
|
84
|
+
"""Format number with thousand separators."""
|
|
85
|
+
return f"{n:,}"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# ----------------------------------------------------------------------------------------
|
|
89
|
+
def display_stats(
|
|
90
|
+
stats: CodeStats, directory: Path, output_format: Literal["box", "plain", "json"]
|
|
91
|
+
) -> None:
|
|
92
|
+
"""Display statistics in the specified format."""
|
|
93
|
+
if output_format == "json":
|
|
94
|
+
display_json(stats, directory)
|
|
95
|
+
elif output_format == "plain":
|
|
96
|
+
display_plain(stats, directory)
|
|
97
|
+
else:
|
|
98
|
+
display_box(stats, directory)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ----------------------------------------------------------------------------------------
|
|
102
|
+
def _lang_to_dict(lang: LangStats) -> dict[str, int | float]:
|
|
103
|
+
"""Convert language stats to a dictionary for JSON output."""
|
|
104
|
+
return {
|
|
105
|
+
"files": lang.num_files,
|
|
106
|
+
"total_bytes": lang.total_bytes,
|
|
107
|
+
"total_kb": round(lang.total_kb, 2),
|
|
108
|
+
"total_lines": lang.total_lines,
|
|
109
|
+
"code_lines": lang.code_lines,
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# ----------------------------------------------------------------------------------------
|
|
114
|
+
def display_json(stats: CodeStats, directory: Path) -> None:
|
|
115
|
+
"""Display statistics as JSON."""
|
|
116
|
+
data: dict[str, object] = {
|
|
117
|
+
"directory": directory.name,
|
|
118
|
+
"languages": {lang.name: _lang_to_dict(lang) for lang in stats.languages},
|
|
119
|
+
}
|
|
120
|
+
if len(stats.languages) > 1:
|
|
121
|
+
data["total"] = _lang_to_dict(stats.total)
|
|
122
|
+
print(json.dumps(data, indent=2))
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# ----------------------------------------------------------------------------------------
|
|
126
|
+
def display_plain(stats: CodeStats, directory: Path) -> None:
|
|
127
|
+
"""Display statistics as plain text."""
|
|
128
|
+
print(f"Code Stats: {directory.name}")
|
|
129
|
+
if not stats.languages:
|
|
130
|
+
print("No supported code files found.")
|
|
131
|
+
return
|
|
132
|
+
for lang in stats.languages:
|
|
133
|
+
print(
|
|
134
|
+
f"{lang.name}: {format_number(lang.num_files)} files, "
|
|
135
|
+
f"{format_size(lang.total_kb)}, "
|
|
136
|
+
f"{format_number(lang.total_lines)} lines, "
|
|
137
|
+
f"{format_number(lang.code_lines)} code lines"
|
|
138
|
+
)
|
|
139
|
+
if len(stats.languages) > 1:
|
|
140
|
+
total = stats.total
|
|
141
|
+
print(
|
|
142
|
+
f"Total: {format_number(total.num_files)} files, "
|
|
143
|
+
f"{format_size(total.total_kb)}, "
|
|
144
|
+
f"{format_number(total.total_lines)} lines, "
|
|
145
|
+
f"{format_number(total.code_lines)} code lines"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ----------------------------------------------------------------------------------------
|
|
150
|
+
def display_box(stats: CodeStats, directory: Path) -> None:
|
|
151
|
+
"""Display statistics in a formatted ASCII box with per-language breakdown."""
|
|
152
|
+
if not stats.languages:
|
|
153
|
+
print()
|
|
154
|
+
print("No supported code files found.")
|
|
155
|
+
print()
|
|
156
|
+
return
|
|
157
|
+
|
|
158
|
+
title = f" Code Stats: {directory.name} "
|
|
159
|
+
show_total = len(stats.languages) > 1
|
|
160
|
+
|
|
161
|
+
# Build data rows: (label, files, size, lines, code)
|
|
162
|
+
headers = ("", "Files", "Size", "Lines", "Code")
|
|
163
|
+
rows: list[tuple[str, ...]] = []
|
|
164
|
+
for lang in stats.languages:
|
|
165
|
+
rows.append(
|
|
166
|
+
(
|
|
167
|
+
lang.name,
|
|
168
|
+
format_number(lang.num_files),
|
|
169
|
+
format_size(lang.total_kb),
|
|
170
|
+
format_number(lang.total_lines),
|
|
171
|
+
format_number(lang.code_lines),
|
|
172
|
+
)
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
total = stats.total
|
|
176
|
+
total_row = (
|
|
177
|
+
"Total",
|
|
178
|
+
format_number(total.num_files),
|
|
179
|
+
format_size(total.total_kb),
|
|
180
|
+
format_number(total.total_lines),
|
|
181
|
+
format_number(total.code_lines),
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
all_rows = list(rows)
|
|
185
|
+
if show_total:
|
|
186
|
+
all_rows.append(total_row)
|
|
187
|
+
|
|
188
|
+
# Calculate column widths
|
|
189
|
+
num_cols = len(headers)
|
|
190
|
+
col_widths: list[int] = []
|
|
191
|
+
for i in range(num_cols):
|
|
192
|
+
w = len(headers[i])
|
|
193
|
+
for r in all_rows:
|
|
194
|
+
w = max(w, len(r[i]))
|
|
195
|
+
col_widths.append(w)
|
|
196
|
+
|
|
197
|
+
# Format a content line (plain text, no borders or colours)
|
|
198
|
+
def make_line(row: tuple[str, ...]) -> str:
|
|
199
|
+
parts = [f" {row[0].ljust(col_widths[0])}"]
|
|
200
|
+
for i in range(1, num_cols):
|
|
201
|
+
parts.append(f" {row[i].rjust(col_widths[i])}")
|
|
202
|
+
parts.append(" ")
|
|
203
|
+
return "".join(parts)
|
|
204
|
+
|
|
205
|
+
# Calculate inner width (between vertical bars)
|
|
206
|
+
all_lines = [make_line(r) for r in all_rows] + [make_line(headers)]
|
|
207
|
+
inner_width = max(len(title), max(len(s) for s in all_lines))
|
|
208
|
+
|
|
209
|
+
# Box parts
|
|
210
|
+
top_border = BOX_TOP_LEFT + BOX_HORIZONTAL * inner_width + BOX_TOP_RIGHT
|
|
211
|
+
bottom_border = BOX_BOTTOM_LEFT + BOX_HORIZONTAL * inner_width + BOX_BOTTOM_RIGHT
|
|
212
|
+
separator = BOX_TEE_LEFT + BOX_HORIZONTAL * inner_width + BOX_TEE_RIGHT
|
|
213
|
+
|
|
214
|
+
# Print header
|
|
215
|
+
print()
|
|
216
|
+
print(colour(top_border, BOLD))
|
|
217
|
+
|
|
218
|
+
# Title line
|
|
219
|
+
title_padded = title.center(inner_width)
|
|
220
|
+
print(
|
|
221
|
+
colour(BOX_VERTICAL, BOLD)
|
|
222
|
+
+ colour(title_padded, BOLD, CYAN)
|
|
223
|
+
+ colour(BOX_VERTICAL, BOLD)
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# Separator
|
|
227
|
+
print(colour(separator, BOLD))
|
|
228
|
+
|
|
229
|
+
# Header row
|
|
230
|
+
header_line = make_line(headers).ljust(inner_width)
|
|
231
|
+
if use_colour():
|
|
232
|
+
print(
|
|
233
|
+
colour(BOX_VERTICAL, BOLD)
|
|
234
|
+
+ colour(header_line, BOLD)
|
|
235
|
+
+ colour(BOX_VERTICAL, BOLD)
|
|
236
|
+
)
|
|
237
|
+
else:
|
|
238
|
+
print(BOX_VERTICAL + header_line + BOX_VERTICAL)
|
|
239
|
+
|
|
240
|
+
# Data rows (language rows + optional total)
|
|
241
|
+
def print_data_row(row: tuple[str, ...], bold_label: bool = False) -> None:
|
|
242
|
+
plain = make_line(row)
|
|
243
|
+
padded = plain.ljust(inner_width)
|
|
244
|
+
if use_colour():
|
|
245
|
+
# Label column
|
|
246
|
+
label = row[0].ljust(col_widths[0])
|
|
247
|
+
if bold_label:
|
|
248
|
+
label = colour(label, BOLD)
|
|
249
|
+
# Value columns — each gets its own colour
|
|
250
|
+
values = ""
|
|
251
|
+
for i in range(1, num_cols):
|
|
252
|
+
col_colour = COL_COLOURS[(i - 1) % len(COL_COLOURS)]
|
|
253
|
+
values += " " + colour(row[i].rjust(col_widths[i]), BOLD, col_colour)
|
|
254
|
+
values += " "
|
|
255
|
+
# Trailing padding to match inner_width
|
|
256
|
+
visible_len = (
|
|
257
|
+
2
|
|
258
|
+
+ col_widths[0]
|
|
259
|
+
+ sum(2 + col_widths[i] for i in range(1, num_cols))
|
|
260
|
+
+ 2
|
|
261
|
+
)
|
|
262
|
+
padding = " " * max(0, inner_width - visible_len)
|
|
263
|
+
print(
|
|
264
|
+
colour(BOX_VERTICAL, BOLD)
|
|
265
|
+
+ f" {label}{values}{padding}"
|
|
266
|
+
+ colour(BOX_VERTICAL, BOLD)
|
|
267
|
+
)
|
|
268
|
+
else:
|
|
269
|
+
print(BOX_VERTICAL + padded + BOX_VERTICAL)
|
|
270
|
+
|
|
271
|
+
for row in rows:
|
|
272
|
+
print_data_row(row)
|
|
273
|
+
|
|
274
|
+
# Total row (only if multiple languages)
|
|
275
|
+
if show_total:
|
|
276
|
+
print(colour(separator, BOLD))
|
|
277
|
+
print_data_row(total_row, bold_label=True)
|
|
278
|
+
|
|
279
|
+
# Footer
|
|
280
|
+
print(colour(bottom_border, BOLD))
|
|
281
|
+
print()
|
codesize/scanner.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------
|
|
2
|
+
# scanner.py
|
|
3
|
+
# ----------
|
|
4
|
+
#
|
|
5
|
+
# Scans directories for code files and collects statistics.
|
|
6
|
+
# Supports Python, Go, Rust, and Elixir.
|
|
7
|
+
#
|
|
8
|
+
# (c) 2026 WaterJuice — Unlicense; see LICENSE in the project root.
|
|
9
|
+
#
|
|
10
|
+
# Authors
|
|
11
|
+
# -------
|
|
12
|
+
# bena (via Claude)
|
|
13
|
+
#
|
|
14
|
+
# Version History
|
|
15
|
+
# ---------------
|
|
16
|
+
# Mar 2026 - Created
|
|
17
|
+
# ----------------------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
# ----------------------------------------------------------------------------------------
|
|
20
|
+
# Imports
|
|
21
|
+
# ----------------------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
import os
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
from dataclasses import field
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
# ----------------------------------------------------------------------------------------
|
|
29
|
+
# Language Definitions
|
|
30
|
+
# ----------------------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True)
|
|
34
|
+
class Language:
|
|
35
|
+
"""Configuration for a supported programming language."""
|
|
36
|
+
|
|
37
|
+
name: str
|
|
38
|
+
extensions: tuple[str, ...]
|
|
39
|
+
line_comment: str
|
|
40
|
+
block_comment_start: str | None = None
|
|
41
|
+
block_comment_end: str | None = None
|
|
42
|
+
docstring_markers: tuple[str, ...] = ()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
LANGUAGES: tuple[Language, ...] = (
|
|
46
|
+
Language("Python", (".py",), "#", docstring_markers=('"""', "'''")),
|
|
47
|
+
Language("Go", (".go",), "//", block_comment_start="/*", block_comment_end="*/"),
|
|
48
|
+
Language("Rust", (".rs",), "//", block_comment_start="/*", block_comment_end="*/"),
|
|
49
|
+
Language("Elixir", (".ex", ".exs"), "#", docstring_markers=('"""',)),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# ----------------------------------------------------------------------------------------
|
|
53
|
+
# Data Classes
|
|
54
|
+
# ----------------------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class LangStats:
|
|
59
|
+
"""Statistics for files of a single language."""
|
|
60
|
+
|
|
61
|
+
name: str
|
|
62
|
+
num_files: int = 0
|
|
63
|
+
total_bytes: int = 0
|
|
64
|
+
total_lines: int = 0
|
|
65
|
+
code_lines: int = 0
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def total_kb(self) -> float:
|
|
69
|
+
"""Return total size in kilobytes."""
|
|
70
|
+
return self.total_bytes / 1024
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class CodeStats:
|
|
75
|
+
"""Statistics for all code files in a directory."""
|
|
76
|
+
|
|
77
|
+
languages: list[LangStats] = field(default_factory=lambda: list[LangStats]())
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def total(self) -> LangStats:
|
|
81
|
+
"""Return combined statistics across all languages."""
|
|
82
|
+
return LangStats(
|
|
83
|
+
name="Total",
|
|
84
|
+
num_files=sum(lang.num_files for lang in self.languages),
|
|
85
|
+
total_bytes=sum(lang.total_bytes for lang in self.languages),
|
|
86
|
+
total_lines=sum(lang.total_lines for lang in self.languages),
|
|
87
|
+
code_lines=sum(lang.code_lines for lang in self.languages),
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ----------------------------------------------------------------------------------------
|
|
92
|
+
# Functions
|
|
93
|
+
# ----------------------------------------------------------------------------------------
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# ----------------------------------------------------------------------------------------
|
|
97
|
+
def _is_code_line(stripped: str, comment_prefix: str) -> bool:
|
|
98
|
+
"""Determine if a line is a code line (not blank or decorative comment)."""
|
|
99
|
+
if not stripped:
|
|
100
|
+
return False
|
|
101
|
+
if stripped.startswith(comment_prefix):
|
|
102
|
+
# Count comments with meaningful content, but not bare markers or separator lines
|
|
103
|
+
comment_body = stripped[len(comment_prefix) :]
|
|
104
|
+
return any(ch.isalnum() for ch in comment_body)
|
|
105
|
+
return True
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# ----------------------------------------------------------------------------------------
|
|
109
|
+
def count_lines(filepath: Path, lang: Language) -> tuple[int, int]:
|
|
110
|
+
"""
|
|
111
|
+
Count total lines and code lines in a file.
|
|
112
|
+
|
|
113
|
+
Handles single-line comments, block comments, and docstrings/heredocs
|
|
114
|
+
according to the language configuration.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Tuple of (total_lines, code_lines).
|
|
118
|
+
"""
|
|
119
|
+
total_lines = 0
|
|
120
|
+
code_lines = 0
|
|
121
|
+
in_block_comment = False
|
|
122
|
+
block_end: str | None = None
|
|
123
|
+
in_docstring = False
|
|
124
|
+
docstring_marker: str | None = None
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
content = filepath.read_text(encoding="utf-8", errors="replace")
|
|
128
|
+
except OSError:
|
|
129
|
+
return 0, 0
|
|
130
|
+
|
|
131
|
+
for line in content.splitlines():
|
|
132
|
+
total_lines += 1
|
|
133
|
+
stripped = line.strip()
|
|
134
|
+
|
|
135
|
+
# Inside a block comment (/* ... */)
|
|
136
|
+
if in_block_comment:
|
|
137
|
+
if block_end and block_end in stripped:
|
|
138
|
+
in_block_comment = False
|
|
139
|
+
block_end = None
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
# Inside a docstring/heredoc
|
|
143
|
+
if in_docstring:
|
|
144
|
+
if docstring_marker and docstring_marker in stripped:
|
|
145
|
+
in_docstring = False
|
|
146
|
+
docstring_marker = None
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
# Check for start of block comment
|
|
150
|
+
if lang.block_comment_start and stripped.startswith(lang.block_comment_start):
|
|
151
|
+
rest = stripped[len(lang.block_comment_start) :]
|
|
152
|
+
if lang.block_comment_end and lang.block_comment_end not in rest:
|
|
153
|
+
in_block_comment = True
|
|
154
|
+
block_end = lang.block_comment_end
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
# Check for docstring/heredoc markers (Python: starts with """, Elixir: ends with """)
|
|
158
|
+
found_marker = False
|
|
159
|
+
for marker in lang.docstring_markers:
|
|
160
|
+
starts = stripped.startswith(marker)
|
|
161
|
+
ends = stripped.endswith(marker)
|
|
162
|
+
if starts or ends:
|
|
163
|
+
if starts:
|
|
164
|
+
rest = stripped[len(marker) :]
|
|
165
|
+
else:
|
|
166
|
+
rest = stripped[: -len(marker)]
|
|
167
|
+
if marker not in rest:
|
|
168
|
+
in_docstring = True
|
|
169
|
+
docstring_marker = marker
|
|
170
|
+
found_marker = True
|
|
171
|
+
break
|
|
172
|
+
if found_marker:
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
if _is_code_line(stripped, lang.line_comment):
|
|
176
|
+
code_lines += 1
|
|
177
|
+
|
|
178
|
+
return total_lines, code_lines
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# ----------------------------------------------------------------------------------------
|
|
182
|
+
def scan_directory(directory: Path) -> CodeStats:
|
|
183
|
+
"""
|
|
184
|
+
Scan a directory recursively for code files and collect statistics.
|
|
185
|
+
|
|
186
|
+
Uses os.walk with hidden-directory pruning for fast traversal. Skips
|
|
187
|
+
directories starting with a dot (e.g. .venv, .git). Only includes
|
|
188
|
+
languages that have at least one file.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
directory: The directory to scan.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
CodeStats object containing per-language and total statistics.
|
|
195
|
+
"""
|
|
196
|
+
# Build extension lookup
|
|
197
|
+
extension_map: dict[str, Language] = {}
|
|
198
|
+
for lang in LANGUAGES:
|
|
199
|
+
for ext in lang.extensions:
|
|
200
|
+
extension_map[ext] = lang
|
|
201
|
+
|
|
202
|
+
# Walk once, pruning hidden dirs, collecting and processing files
|
|
203
|
+
lang_stats: dict[str, LangStats] = {}
|
|
204
|
+
|
|
205
|
+
for dirpath, dirnames, filenames in os.walk(directory):
|
|
206
|
+
# Prune hidden directories in-place so os.walk doesn't descend into them
|
|
207
|
+
dirnames[:] = [d for d in dirnames if not d.startswith(".")]
|
|
208
|
+
|
|
209
|
+
for filename in filenames:
|
|
210
|
+
ext = os.path.splitext(filename)[1]
|
|
211
|
+
lang = extension_map.get(ext)
|
|
212
|
+
if lang is None:
|
|
213
|
+
continue
|
|
214
|
+
|
|
215
|
+
filepath = Path(dirpath, filename)
|
|
216
|
+
|
|
217
|
+
if lang.name not in lang_stats:
|
|
218
|
+
lang_stats[lang.name] = LangStats(name=lang.name)
|
|
219
|
+
stats = lang_stats[lang.name]
|
|
220
|
+
stats.num_files += 1
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
stats.total_bytes += filepath.stat().st_size
|
|
224
|
+
except OSError:
|
|
225
|
+
pass
|
|
226
|
+
|
|
227
|
+
lines, code = count_lines(filepath, lang)
|
|
228
|
+
stats.total_lines += lines
|
|
229
|
+
stats.code_lines += code
|
|
230
|
+
|
|
231
|
+
# Preserve language ordering from LANGUAGES
|
|
232
|
+
languages: list[LangStats] = []
|
|
233
|
+
for lang in LANGUAGES:
|
|
234
|
+
if lang.name in lang_stats:
|
|
235
|
+
languages.append(lang_stats[lang.name])
|
|
236
|
+
|
|
237
|
+
return CodeStats(languages=languages)
|