messy-python-tokensaver 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- messy_python/__init__.py +72 -0
- messy_python/cli.py +93 -0
- messy_python/compressor.py +176 -0
- messy_python/jupyter_magic.py +58 -0
- messy_python/transformer.py +269 -0
- messy_python/validator.py +133 -0
- messy_python_tokensaver-1.0.0.dist-info/METADATA +395 -0
- messy_python_tokensaver-1.0.0.dist-info/RECORD +12 -0
- messy_python_tokensaver-1.0.0.dist-info/WHEEL +5 -0
- messy_python_tokensaver-1.0.0.dist-info/entry_points.txt +2 -0
- messy_python_tokensaver-1.0.0.dist-info/licenses/LICENSE +170 -0
- messy_python_tokensaver-1.0.0.dist-info/top_level.txt +1 -0
messy_python/__init__.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# ─────────────────────────────────────────────────────
|
|
2
|
+
# Messy Python — Token-efficient Python syntax
|
|
3
|
+
# Powered by Bio-Neural .ai · bio.neural.ai@gmail.com
|
|
4
|
+
# Licensed under the Bio-Neural Community License (BNCL)
|
|
5
|
+
# ─────────────────────────────────────────────────────
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
messy_python
|
|
9
|
+
============
|
|
10
|
+
A syntax preprocessor that lets you write Python with C-style braces and
|
|
11
|
+
semicolons, reducing token consumption by 20-30% for LLM code generation.
|
|
12
|
+
|
|
13
|
+
Usage
|
|
14
|
+
-----
|
|
15
|
+
from messy_python import transform, compress, validate
|
|
16
|
+
|
|
17
|
+
# Transform messy → standard Python
|
|
18
|
+
standard = transform("def foo() { return 1; }")
|
|
19
|
+
|
|
20
|
+
# Compress standard → messy Python
|
|
21
|
+
messy = compress(open("script.py").read())
|
|
22
|
+
|
|
23
|
+
# Validate messy Python syntax
|
|
24
|
+
result = validate(messy_code)
|
|
25
|
+
|
|
26
|
+
CLI
|
|
27
|
+
---
|
|
28
|
+
messy-python script.mpy -o script.py
|
|
29
|
+
messy-python --compress script.py -o script.mpy
|
|
30
|
+
messy-python --validate script.mpy
|
|
31
|
+
|
|
32
|
+
Jupyter / Colab
|
|
33
|
+
---------------
|
|
34
|
+
%load_ext messy_python
|
|
35
|
+
%%messy_python
|
|
36
|
+
def foo() { return 1; }
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
__version__ = "1.0.0"
|
|
40
|
+
__author__ = "Ava Billions & Chris Knight"
|
|
41
|
+
__organisation__ = "Bio-Neural .ai"
|
|
42
|
+
__email__ = "bio.neural.ai@gmail.com"
|
|
43
|
+
__license__ = "Bio-Neural Community License (BNCL) v1.0"
|
|
44
|
+
|
|
45
|
+
from .transformer import transform
|
|
46
|
+
from .compressor import compress
|
|
47
|
+
from .validator import validate
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
from .jupyter_magic import load_ipython_extension
|
|
51
|
+
except ImportError:
|
|
52
|
+
def load_ipython_extension(ipython=None):
|
|
53
|
+
raise ImportError(
|
|
54
|
+
"Jupyter support requires IPython. "
|
|
55
|
+
"Install with: pip install messy-python[jupyter]"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
ATTRIBUTION_COMMENT = """\
|
|
59
|
+
# ─────────────────────────────────────────────────────
|
|
60
|
+
# Messy Python — Token-efficient Python syntax
|
|
61
|
+
# Powered by Bio-Neural .ai · bio.neural.ai@gmail.com
|
|
62
|
+
# Licensed under the Bio-Neural Community License (BNCL)
|
|
63
|
+
# ─────────────────────────────────────────────────────
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
__all__ = [
|
|
67
|
+
"transform",
|
|
68
|
+
"compress",
|
|
69
|
+
"validate",
|
|
70
|
+
"load_ipython_extension",
|
|
71
|
+
"ATTRIBUTION_COMMENT",
|
|
72
|
+
]
|
messy_python/cli.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# ─────────────────────────────────────────────────────
|
|
2
|
+
# Messy Python — Token-efficient Python syntax
|
|
3
|
+
# Powered by Bio-Neural .ai · bio.neural.ai@gmail.com
|
|
4
|
+
# Licensed under the Bio-Neural Community License (BNCL)
|
|
5
|
+
# ─────────────────────────────────────────────────────
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
cli.py
|
|
9
|
+
Command-line interface for Messy Python.
|
|
10
|
+
|
|
11
|
+
Usage
|
|
12
|
+
-----
|
|
13
|
+
messy-python script.mpy # transform to stdout
|
|
14
|
+
messy-python script.mpy -o script.py # transform to file
|
|
15
|
+
messy-python --compress script.py # compress to stdout
|
|
16
|
+
messy-python --compress script.py -o out.mpy # compress to file
|
|
17
|
+
messy-python --validate script.mpy # validate only
|
|
18
|
+
messy-python --version # print version
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import sys
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
|
|
25
|
+
from . import __version__, transform, compress, validate
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def main():
|
|
29
|
+
parser = argparse.ArgumentParser(
|
|
30
|
+
prog="messy-python",
|
|
31
|
+
description="Messy Python — Token-efficient Python syntax by Bio-Neural .ai",
|
|
32
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
33
|
+
epilog="""
|
|
34
|
+
Examples:
|
|
35
|
+
messy-python script.mpy Transform messy → standard Python
|
|
36
|
+
messy-python script.mpy -o out.py Write output to file
|
|
37
|
+
messy-python --compress script.py Compress standard → messy Python
|
|
38
|
+
messy-python --validate script.mpy Validate messy Python syntax
|
|
39
|
+
|
|
40
|
+
Bio-Neural .ai · bio.neural.ai@gmail.com · BNCL v1.0
|
|
41
|
+
"""
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
parser.add_argument("input", nargs="?", help="Input file (.mpy or .py)")
|
|
45
|
+
parser.add_argument("-o", "--output", help="Output file path")
|
|
46
|
+
parser.add_argument("--compress", action="store_true",
|
|
47
|
+
help="Compress standard Python → Messy Python")
|
|
48
|
+
parser.add_argument("--validate", action="store_true",
|
|
49
|
+
help="Validate Messy Python syntax only")
|
|
50
|
+
parser.add_argument("--debug", action="store_true",
|
|
51
|
+
help="Show transformation steps")
|
|
52
|
+
parser.add_argument("--version", action="version",
|
|
53
|
+
version=f"messy-python {__version__} by Bio-Neural .ai")
|
|
54
|
+
|
|
55
|
+
args = parser.parse_args()
|
|
56
|
+
|
|
57
|
+
# Read input
|
|
58
|
+
if args.input:
|
|
59
|
+
input_path = Path(args.input)
|
|
60
|
+
if not input_path.exists():
|
|
61
|
+
print(f"Error: File not found: {args.input}", file=sys.stderr)
|
|
62
|
+
sys.exit(1)
|
|
63
|
+
code = input_path.read_text(encoding="utf-8")
|
|
64
|
+
else:
|
|
65
|
+
if sys.stdin.isatty():
|
|
66
|
+
parser.print_help()
|
|
67
|
+
sys.exit(0)
|
|
68
|
+
code = sys.stdin.read()
|
|
69
|
+
|
|
70
|
+
# ── Validate ──────────────────────────────────────────────────────────────
|
|
71
|
+
if args.validate:
|
|
72
|
+
result = validate(code)
|
|
73
|
+
print(result)
|
|
74
|
+
sys.exit(0 if result.valid else 1)
|
|
75
|
+
|
|
76
|
+
# ── Compress ──────────────────────────────────────────────────────────────
|
|
77
|
+
if args.compress:
|
|
78
|
+
output = compress(code)
|
|
79
|
+
else:
|
|
80
|
+
# ── Transform ─────────────────────────────────────────────────────────
|
|
81
|
+
output = transform(code, debug=args.debug)
|
|
82
|
+
|
|
83
|
+
# Write output
|
|
84
|
+
if args.output:
|
|
85
|
+
Path(args.output).write_text(output, encoding="utf-8")
|
|
86
|
+
action = "Compressed" if args.compress else "Transformed"
|
|
87
|
+
print(f"{action}: {args.input} → {args.output}")
|
|
88
|
+
else:
|
|
89
|
+
print(output)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
if __name__ == "__main__":
|
|
93
|
+
main()
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# ─────────────────────────────────────────────────────
|
|
2
|
+
# Messy Python — Token-efficient Python syntax
|
|
3
|
+
# Powered by Bio-Neural .ai · bio.neural.ai@gmail.com
|
|
4
|
+
# Licensed under the Bio-Neural Community License (BNCL)
|
|
5
|
+
# ─────────────────────────────────────────────────────
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
compressor.py
|
|
9
|
+
Converts standard Python to Messy Python syntax for token efficiency.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
import tokenize
|
|
14
|
+
import io
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
_BLOCK_KEYWORDS = frozenset([
|
|
18
|
+
"if", "elif", "else", "for", "while", "with",
|
|
19
|
+
"def", "class", "try", "except", "finally", "async",
|
|
20
|
+
])
|
|
21
|
+
|
|
22
|
+
_BLOCK_OPEN = re.compile(
|
|
23
|
+
r"^(\s*)(" + "|".join(_BLOCK_KEYWORDS) + r")\b(.*):(\s*(?:#.*)?)$"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
_SIMPLE_STMT = re.compile(r"^\s*(return|pass|break|continue|raise|yield|del|assert|import|from)\b")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _indent_level(line: str) -> int:
|
|
30
|
+
return len(line) - len(line.lstrip())
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def compress(code: str, max_line_length: int = 120) -> str:
|
|
34
|
+
"""
|
|
35
|
+
Compress standard Python to token-efficient Messy Python.
|
|
36
|
+
|
|
37
|
+
Transformations applied:
|
|
38
|
+
- Block-opening colons replaced with ` {`
|
|
39
|
+
- Closing blocks get `}` inserted
|
|
40
|
+
- Short consecutive simple statements merged with `;`
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
code : str
|
|
45
|
+
Standard Python source code.
|
|
46
|
+
max_line_length : int
|
|
47
|
+
Maximum output line length before avoiding merges.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
str
|
|
52
|
+
Messy Python source with attribution comment prepended.
|
|
53
|
+
|
|
54
|
+
Example
|
|
55
|
+
-------
|
|
56
|
+
>>> compress("def double(x):\\n return x * 2\\n")
|
|
57
|
+
'# ───...\\ndef double(x) { return x * 2; }\\n'
|
|
58
|
+
"""
|
|
59
|
+
from . import ATTRIBUTION_COMMENT
|
|
60
|
+
|
|
61
|
+
lines = code.splitlines()
|
|
62
|
+
result = []
|
|
63
|
+
indent_stack = [0] # track open indent levels
|
|
64
|
+
|
|
65
|
+
i = 0
|
|
66
|
+
while i < len(lines):
|
|
67
|
+
line = lines[i]
|
|
68
|
+
stripped = line.rstrip()
|
|
69
|
+
|
|
70
|
+
# Skip blank lines
|
|
71
|
+
if not stripped.strip():
|
|
72
|
+
result.append("")
|
|
73
|
+
i += 1
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
# Comments — pass through
|
|
77
|
+
if stripped.strip().startswith("#"):
|
|
78
|
+
result.append(stripped)
|
|
79
|
+
i += 1
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
current_indent = _indent_level(stripped)
|
|
83
|
+
|
|
84
|
+
# Close braces for any dedents
|
|
85
|
+
while indent_stack and indent_stack[-1] > current_indent:
|
|
86
|
+
indent_stack.pop()
|
|
87
|
+
# Insert closing brace at the indent level that just closed
|
|
88
|
+
if result:
|
|
89
|
+
# append } to previous non-empty line
|
|
90
|
+
for j in range(len(result) - 1, -1, -1):
|
|
91
|
+
if result[j].strip():
|
|
92
|
+
result[j] = result[j].rstrip()
|
|
93
|
+
if not result[j].rstrip().endswith(";"):
|
|
94
|
+
result[j] += ";"
|
|
95
|
+
break
|
|
96
|
+
result.append(" " * current_indent + "}")
|
|
97
|
+
|
|
98
|
+
# Match block-opening line
|
|
99
|
+
m = _BLOCK_OPEN.match(stripped)
|
|
100
|
+
if m:
|
|
101
|
+
indent_str = m.group(1)
|
|
102
|
+
keyword = m.group(2)
|
|
103
|
+
rest = m.group(3)
|
|
104
|
+
comment = m.group(4).strip()
|
|
105
|
+
|
|
106
|
+
# Build the opening line with `{`
|
|
107
|
+
open_line = f"{indent_str}{keyword}{rest} {{"
|
|
108
|
+
if comment:
|
|
109
|
+
open_line += f" {comment}"
|
|
110
|
+
|
|
111
|
+
# Peek ahead: if body is a single short statement, inline it
|
|
112
|
+
if i + 1 < len(lines):
|
|
113
|
+
next_line = lines[i + 1]
|
|
114
|
+
next_stripped = next_line.strip()
|
|
115
|
+
next_indent = _indent_level(next_line.rstrip()) if next_line.strip() else 999
|
|
116
|
+
|
|
117
|
+
# Check if next line is the only body line (i+2 dedents or EOF)
|
|
118
|
+
after_indent = _indent_level(lines[i + 2].rstrip()) if i + 2 < len(lines) and lines[i + 2].strip() else current_indent
|
|
119
|
+
|
|
120
|
+
is_single_body = (
|
|
121
|
+
next_indent > current_indent and
|
|
122
|
+
after_indent <= current_indent and
|
|
123
|
+
not _BLOCK_OPEN.match(next_stripped) and
|
|
124
|
+
next_stripped and
|
|
125
|
+
not next_stripped.startswith("#")
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
if is_single_body and len(open_line) + len(next_stripped) + 4 <= max_line_length:
|
|
129
|
+
# Inline: `def foo() { return x; }`
|
|
130
|
+
result.append(f"{open_line} {next_stripped}; }}")
|
|
131
|
+
i += 2
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
result.append(open_line)
|
|
135
|
+
indent_stack.append(current_indent + 4)
|
|
136
|
+
i += 1
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
# Simple statement — try to merge with next if both are short and same indent
|
|
140
|
+
if (i + 1 < len(lines) and
|
|
141
|
+
lines[i + 1].strip() and
|
|
142
|
+
not lines[i + 1].strip().startswith("#") and
|
|
143
|
+
_indent_level(lines[i + 1].rstrip()) == current_indent and
|
|
144
|
+
not _BLOCK_OPEN.match(lines[i + 1].strip())):
|
|
145
|
+
|
|
146
|
+
merged = stripped.rstrip() + "; " + lines[i + 1].strip()
|
|
147
|
+
if len(merged) <= max_line_length:
|
|
148
|
+
result.append(merged + ";")
|
|
149
|
+
i += 2
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
# Add semicolon to simple statement
|
|
153
|
+
stmt = stripped.rstrip()
|
|
154
|
+
if not stmt.endswith(":") and not stmt.endswith(";") and not stmt.startswith("#"):
|
|
155
|
+
stmt += ";"
|
|
156
|
+
result.append(stmt)
|
|
157
|
+
i += 1
|
|
158
|
+
|
|
159
|
+
# Close any remaining open blocks
|
|
160
|
+
while len(indent_stack) > 1:
|
|
161
|
+
indent_stack.pop()
|
|
162
|
+
if result:
|
|
163
|
+
for j in range(len(result) - 1, -1, -1):
|
|
164
|
+
if result[j].strip():
|
|
165
|
+
if not result[j].rstrip().endswith(";"):
|
|
166
|
+
result[j] += ";"
|
|
167
|
+
break
|
|
168
|
+
result.append("}")
|
|
169
|
+
|
|
170
|
+
output = "\n".join(result)
|
|
171
|
+
|
|
172
|
+
# Prepend attribution
|
|
173
|
+
if ATTRIBUTION_COMMENT.strip() not in output:
|
|
174
|
+
output = ATTRIBUTION_COMMENT + "\n" + output
|
|
175
|
+
|
|
176
|
+
return output
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# ─────────────────────────────────────────────────────
|
|
2
|
+
# Messy Python — Token-efficient Python syntax
|
|
3
|
+
# Powered by Bio-Neural .ai · bio.neural.ai@gmail.com
|
|
4
|
+
# Licensed under the Bio-Neural Community License (BNCL)
|
|
5
|
+
# ─────────────────────────────────────────────────────
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
jupyter_magic.py
|
|
9
|
+
Provides the %%messy_python cell magic for Jupyter and Google Colab.
|
|
10
|
+
|
|
11
|
+
Usage
|
|
12
|
+
-----
|
|
13
|
+
%load_ext messy_python
|
|
14
|
+
|
|
15
|
+
%%messy_python
|
|
16
|
+
def foo() { return 1; }
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from IPython.core.magic import register_cell_magic
|
|
20
|
+
from IPython.core.magic_arguments import magic_arguments, argument, parse_argstring
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def load_ipython_extension(ipython):
|
|
24
|
+
"""Called by %load_ext messy_python"""
|
|
25
|
+
|
|
26
|
+
@magic_arguments()
|
|
27
|
+
@argument("--debug", action="store_true", help="Print transformation steps")
|
|
28
|
+
@argument("--validate-only", action="store_true", dest="validate_only",
|
|
29
|
+
help="Only validate, do not execute")
|
|
30
|
+
@register_cell_magic
|
|
31
|
+
def messy_python(line, cell):
|
|
32
|
+
"""
|
|
33
|
+
%%messy_python [--debug] [--validate-only]
|
|
34
|
+
|
|
35
|
+
Transform and execute Messy Python cell.
|
|
36
|
+
"""
|
|
37
|
+
from messy_python import transform, validate
|
|
38
|
+
|
|
39
|
+
args = parse_argstring(messy_python, line)
|
|
40
|
+
|
|
41
|
+
if args.validate_only:
|
|
42
|
+
result = validate(cell)
|
|
43
|
+
print(result)
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
standard_code = transform(cell, debug=args.debug)
|
|
47
|
+
|
|
48
|
+
if args.debug:
|
|
49
|
+
print("─" * 60)
|
|
50
|
+
print("Transformed output:")
|
|
51
|
+
print("─" * 60)
|
|
52
|
+
print(standard_code)
|
|
53
|
+
print("─" * 60)
|
|
54
|
+
|
|
55
|
+
ipython.run_cell(standard_code)
|
|
56
|
+
|
|
57
|
+
print("Messy Python loaded. Use %%messy_python in cells.")
|
|
58
|
+
print("Powered by Bio-Neural .ai · bio.neural.ai@gmail.com")
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# ─────────────────────────────────────────────────────
|
|
2
|
+
# Messy Python — Token-efficient Python syntax
|
|
3
|
+
# Powered by Bio-Neural .ai · bio.neural.ai@gmail.com
|
|
4
|
+
# Licensed under the Bio-Neural Community License (BNCL)
|
|
5
|
+
# ─────────────────────────────────────────────────────
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
transformer.py
|
|
9
|
+
Converts Messy Python syntax to standard Python.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
import textwrap
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Keywords that precede a block-opening brace
|
|
18
|
+
_BLOCK_KEYWORDS = frozenset([
|
|
19
|
+
"if", "elif", "else", "for", "while", "with",
|
|
20
|
+
"def", "class", "try", "except", "finally", "async",
|
|
21
|
+
])
|
|
22
|
+
|
|
23
|
+
_KEYWORD_PATTERN = re.compile(
|
|
24
|
+
r"^\s*(" + "|".join(_BLOCK_KEYWORDS) + r")\b"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _tokenise_strings(line: str):
|
|
29
|
+
"""
|
|
30
|
+
Yield (text, is_string) segments so we never mutate string contents.
|
|
31
|
+
Handles single, double, and triple quotes.
|
|
32
|
+
"""
|
|
33
|
+
segments = []
|
|
34
|
+
i = 0
|
|
35
|
+
in_string = False
|
|
36
|
+
quote_char = None
|
|
37
|
+
triple = False
|
|
38
|
+
buf = []
|
|
39
|
+
|
|
40
|
+
while i < len(line):
|
|
41
|
+
ch = line[i]
|
|
42
|
+
|
|
43
|
+
if not in_string:
|
|
44
|
+
# Check for triple quote
|
|
45
|
+
if line[i:i+3] in ('"""', "'''"):
|
|
46
|
+
if buf:
|
|
47
|
+
segments.append(("".join(buf), False))
|
|
48
|
+
buf = []
|
|
49
|
+
q = line[i:i+3]
|
|
50
|
+
end = line.find(q, i + 3)
|
|
51
|
+
if end == -1:
|
|
52
|
+
segments.append((line[i:], True))
|
|
53
|
+
return segments
|
|
54
|
+
segments.append((line[i:end+3], True))
|
|
55
|
+
i = end + 3
|
|
56
|
+
continue
|
|
57
|
+
# Single/double quote
|
|
58
|
+
elif ch in ('"', "'"):
|
|
59
|
+
if buf:
|
|
60
|
+
segments.append(("".join(buf), False))
|
|
61
|
+
buf = []
|
|
62
|
+
end = i + 1
|
|
63
|
+
while end < len(line):
|
|
64
|
+
if line[end] == ch and line[end-1] != "\\":
|
|
65
|
+
break
|
|
66
|
+
end += 1
|
|
67
|
+
segments.append((line[i:end+1], True))
|
|
68
|
+
i = end + 1
|
|
69
|
+
continue
|
|
70
|
+
# Comment — rest of line is not code
|
|
71
|
+
elif ch == "#":
|
|
72
|
+
if buf:
|
|
73
|
+
segments.append(("".join(buf), False))
|
|
74
|
+
buf = []
|
|
75
|
+
segments.append((line[i:], True)) # treat comment as opaque
|
|
76
|
+
return segments
|
|
77
|
+
else:
|
|
78
|
+
buf.append(ch)
|
|
79
|
+
i += 1
|
|
80
|
+
|
|
81
|
+
if buf:
|
|
82
|
+
segments.append(("".join(buf), False))
|
|
83
|
+
return segments
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _process_line(line: str) -> list[str]:
|
|
87
|
+
"""
|
|
88
|
+
Process a single logical line of Messy Python.
|
|
89
|
+
Returns a list of standard Python lines (may be multiple if semicolons split them).
|
|
90
|
+
"""
|
|
91
|
+
stripped = line.rstrip()
|
|
92
|
+
if not stripped:
|
|
93
|
+
return [""]
|
|
94
|
+
|
|
95
|
+
indent = len(stripped) - len(stripped.lstrip())
|
|
96
|
+
indent_str = stripped[:indent]
|
|
97
|
+
|
|
98
|
+
# Build code without string content for structural analysis
|
|
99
|
+
segments = _tokenise_strings(stripped)
|
|
100
|
+
code_only = "".join(s for s, is_str in segments if not is_str)
|
|
101
|
+
full_reconstructed = "".join(s for s, _ in segments)
|
|
102
|
+
|
|
103
|
+
# ── Handle closing brace ──────────────────────────────────────────────────
|
|
104
|
+
# A line that is only `}` (after stripping) closes a block — discard it
|
|
105
|
+
if code_only.strip() in ("}", "};"):
|
|
106
|
+
return []
|
|
107
|
+
|
|
108
|
+
# ── Handle opening brace at end of control/def/class line ────────────────
|
|
109
|
+
# e.g. `def foo() {` → `def foo():`
|
|
110
|
+
# Also handles `} else {`, `} elif cond {`
|
|
111
|
+
brace_at_end = code_only.rstrip().endswith("{")
|
|
112
|
+
if brace_at_end:
|
|
113
|
+
# Remove the trailing `{` from code_only
|
|
114
|
+
new_code = code_only.rstrip()[:-1].rstrip()
|
|
115
|
+
|
|
116
|
+
# Reconstruct with string segments intact but trailing { removed
|
|
117
|
+
clean = full_reconstructed.rstrip()
|
|
118
|
+
# find last { that's not in a string
|
|
119
|
+
# walk backwards through segments
|
|
120
|
+
result_parts = []
|
|
121
|
+
removed = False
|
|
122
|
+
for seg, is_str in reversed(segments):
|
|
123
|
+
if not removed and not is_str and "{" in seg:
|
|
124
|
+
idx = seg.rfind("{")
|
|
125
|
+
seg = seg[:idx] + seg[idx+1:]
|
|
126
|
+
removed = True
|
|
127
|
+
result_parts.insert(0, seg)
|
|
128
|
+
clean_line = "".join(result_parts).rstrip()
|
|
129
|
+
|
|
130
|
+
# Add colon if not already there
|
|
131
|
+
if not clean_line.rstrip().endswith(":"):
|
|
132
|
+
clean_line = clean_line.rstrip() + ":"
|
|
133
|
+
|
|
134
|
+
# Handle `} else {` and `} elif … {` patterns
|
|
135
|
+
clean_line = re.sub(r"^\s*\}\s*", indent_str, clean_line)
|
|
136
|
+
|
|
137
|
+
return [clean_line]
|
|
138
|
+
|
|
139
|
+
# ── Handle inline block: `if cond { stmt; stmt; }` ────────────────────────
|
|
140
|
+
# Detect `{` not at end — inline block on one line
|
|
141
|
+
if "{" in code_only and not brace_at_end:
|
|
142
|
+
# Expand inline: split on { and }
|
|
143
|
+
# e.g. `if n > 0 { return n; }` →
|
|
144
|
+
# if n > 0:
|
|
145
|
+
# return n
|
|
146
|
+
match = re.match(r"^(\s*)(.*?)\s*\{(.*?)\}\s*$", stripped)
|
|
147
|
+
if match:
|
|
148
|
+
pre = match.group(1)
|
|
149
|
+
head = match.group(2).strip()
|
|
150
|
+
body = match.group(3).strip()
|
|
151
|
+
lines_out = []
|
|
152
|
+
if head:
|
|
153
|
+
header_line = pre + head
|
|
154
|
+
if not header_line.rstrip().endswith(":"):
|
|
155
|
+
header_line = header_line.rstrip() + ":"
|
|
156
|
+
lines_out.append(header_line)
|
|
157
|
+
body_indent = pre + " "
|
|
158
|
+
else:
|
|
159
|
+
body_indent = pre
|
|
160
|
+
# Split body on semicolons (outside strings)
|
|
161
|
+
for stmt in _split_semicolons(body):
|
|
162
|
+
if stmt.strip():
|
|
163
|
+
lines_out.append(body_indent + stmt.strip())
|
|
164
|
+
return lines_out
|
|
165
|
+
|
|
166
|
+
# ── Split on semicolons ───────────────────────────────────────────────────
|
|
167
|
+
parts = _split_semicolons(stripped)
|
|
168
|
+
if len(parts) > 1:
|
|
169
|
+
out = []
|
|
170
|
+
for part in parts:
|
|
171
|
+
part = part.strip()
|
|
172
|
+
if part:
|
|
173
|
+
out.append(indent_str + part)
|
|
174
|
+
return out
|
|
175
|
+
|
|
176
|
+
# ── Plain line — return as-is ─────────────────────────────────────────────
|
|
177
|
+
return [stripped]
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _split_semicolons(code: str) -> list[str]:
|
|
181
|
+
"""Split a code string on semicolons that are outside strings/comments."""
|
|
182
|
+
segments = _tokenise_strings(code)
|
|
183
|
+
parts = []
|
|
184
|
+
current = []
|
|
185
|
+
for seg, is_str in segments:
|
|
186
|
+
if is_str:
|
|
187
|
+
current.append(seg)
|
|
188
|
+
else:
|
|
189
|
+
bits = seg.split(";")
|
|
190
|
+
for i, bit in enumerate(bits):
|
|
191
|
+
if i < len(bits) - 1:
|
|
192
|
+
current.append(bit)
|
|
193
|
+
parts.append("".join(current))
|
|
194
|
+
current = []
|
|
195
|
+
else:
|
|
196
|
+
current.append(bit)
|
|
197
|
+
if current:
|
|
198
|
+
parts.append("".join(current))
|
|
199
|
+
return [p for p in parts if p.strip()]
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _fix_indentation(lines: list[str]) -> list[str]:
|
|
203
|
+
"""
|
|
204
|
+
Re-indent the output so blocks opened by `:` are properly indented.
|
|
205
|
+
This is the second pass after brace/semicolon expansion.
|
|
206
|
+
"""
|
|
207
|
+
result = []
|
|
208
|
+
indent_level = 0
|
|
209
|
+
indent_unit = " "
|
|
210
|
+
|
|
211
|
+
for line in lines:
|
|
212
|
+
stripped = line.strip()
|
|
213
|
+
if not stripped:
|
|
214
|
+
result.append("")
|
|
215
|
+
continue
|
|
216
|
+
|
|
217
|
+
# Dedent if this line starts with else/elif/except/finally
|
|
218
|
+
if re.match(r"^(else|elif|except|finally)\b", stripped):
|
|
219
|
+
indent_level = max(0, indent_level - 1)
|
|
220
|
+
|
|
221
|
+
result.append(indent_unit * indent_level + stripped)
|
|
222
|
+
|
|
223
|
+
# Increase indent after block-opening colon
|
|
224
|
+
if stripped.endswith(":") and not stripped.startswith("#"):
|
|
225
|
+
indent_level += 1
|
|
226
|
+
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def transform(code: str, debug: bool = False) -> str:
|
|
231
|
+
"""
|
|
232
|
+
Transform Messy Python code to standard Python.
|
|
233
|
+
|
|
234
|
+
Parameters
|
|
235
|
+
----------
|
|
236
|
+
code : str
|
|
237
|
+
Messy Python source code.
|
|
238
|
+
debug : bool
|
|
239
|
+
If True, prints transformation steps to stdout.
|
|
240
|
+
|
|
241
|
+
Returns
|
|
242
|
+
-------
|
|
243
|
+
str
|
|
244
|
+
Standard Python source code with attribution comment prepended.
|
|
245
|
+
|
|
246
|
+
Example
|
|
247
|
+
-------
|
|
248
|
+
>>> transform("def double(x) { return x * 2; }")
|
|
249
|
+
'# ───...\\ndef double(x):\\n return x * 2\\n'
|
|
250
|
+
"""
|
|
251
|
+
from . import ATTRIBUTION_COMMENT
|
|
252
|
+
|
|
253
|
+
raw_lines = code.splitlines()
|
|
254
|
+
expanded: list[str] = []
|
|
255
|
+
|
|
256
|
+
for raw_line in raw_lines:
|
|
257
|
+
processed = _process_line(raw_line)
|
|
258
|
+
expanded.extend(processed)
|
|
259
|
+
if debug:
|
|
260
|
+
print(f"[debug] {raw_line!r} → {processed}")
|
|
261
|
+
|
|
262
|
+
final_lines = _fix_indentation(expanded)
|
|
263
|
+
output = "\n".join(final_lines)
|
|
264
|
+
|
|
265
|
+
# Prepend attribution
|
|
266
|
+
if ATTRIBUTION_COMMENT.strip() not in output:
|
|
267
|
+
output = ATTRIBUTION_COMMENT + "\n" + output
|
|
268
|
+
|
|
269
|
+
return output
|