celes 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- celes-0.1.0/PKG-INFO +80 -0
- celes-0.1.0/README.md +66 -0
- celes-0.1.0/celes/__init__.py +17 -0
- celes-0.1.0/celes/celes_to_md.py +151 -0
- celes-0.1.0/celes/cli.py +115 -0
- celes-0.1.0/celes/core.py +140 -0
- celes-0.1.0/celes/md_to_celes.py +163 -0
- celes-0.1.0/celes/parser.py +264 -0
- celes-0.1.0/celes/validator.py +301 -0
- celes-0.1.0/celes.egg-info/PKG-INFO +80 -0
- celes-0.1.0/celes.egg-info/SOURCES.txt +16 -0
- celes-0.1.0/celes.egg-info/dependency_links.txt +1 -0
- celes-0.1.0/celes.egg-info/entry_points.txt +2 -0
- celes-0.1.0/celes.egg-info/top_level.txt +1 -0
- celes-0.1.0/pyproject.toml +25 -0
- celes-0.1.0/setup.cfg +4 -0
- celes-0.1.0/setup.py +22 -0
celes-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: celes
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Celes 0.1 — a tag-based markup language toolkit
|
|
5
|
+
License: MIT
|
|
6
|
+
Keywords: markup,language,markdown,parser,celes
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Topic :: Text Processing :: Markup
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Dynamic: requires-python
|
|
14
|
+
|
|
15
|
+
# Celes 0.1
|
|
16
|
+
|
|
17
|
+
A tag-based markup language and toolkit. Write documents in Celes, convert to HTML, validate, or round-trip with Markdown.
|
|
18
|
+
|
|
19
|
+
## Install
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install celes
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## CLI Usage
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# Convert a Celes file to HTML
|
|
29
|
+
celes parse doc.celes doc.html
|
|
30
|
+
|
|
31
|
+
# Validate a Celes file
|
|
32
|
+
celes validate doc.celes
|
|
33
|
+
|
|
34
|
+
# Convert Markdown to Celes
|
|
35
|
+
celes md README.md README.celes
|
|
36
|
+
|
|
37
|
+
# Convert Celes to Markdown
|
|
38
|
+
celes tomd doc.celes doc.md
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Python API
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from celes import parse_celes, validate_celes, convert_md_to_celes, convert_celes_to_md
|
|
45
|
+
|
|
46
|
+
# Parse to HTML
|
|
47
|
+
html = parse_celes(source)
|
|
48
|
+
|
|
49
|
+
# Validate
|
|
50
|
+
is_valid, errors = validate_celes(source)
|
|
51
|
+
for e in errors:
|
|
52
|
+
print(e) # " ✗ Line 3: <header> is missing required -size attribute"
|
|
53
|
+
|
|
54
|
+
# Convert
|
|
55
|
+
celes_source = convert_md_to_celes(markdown_source)
|
|
56
|
+
markdown_source = convert_celes_to_md(celes_source)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Celes Syntax (0.1)
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
<!Celes-0.1>
|
|
63
|
+
; This is a comment
|
|
64
|
+
<title>{My Page}
|
|
65
|
+
<header -size=1>{Hello, World!}
|
|
66
|
+
<line>{This is <bold>{bold} and <italic>{italic} text.}
|
|
67
|
+
<list -bullet=circle>{Item one}<sublist -bullet=circle>{Sub-item}
|
|
68
|
+
<list -bullet=number>{Numbered item}
|
|
69
|
+
<table>{Name, Age}
|
|
70
|
+
<item>{Alice, 30}
|
|
71
|
+
<codeblock>{print("hello")}
|
|
72
|
+
<image>{photo.png}
|
|
73
|
+
<linkimage -image=photo.png>{https://example.com}
|
|
74
|
+
<blockquote>{A quote <nestquote>{nested}}
|
|
75
|
+
<line -align=center>{Centered}
|
|
76
|
+
<newline>
|
|
77
|
+
<pagebreak>
|
|
78
|
+
<insertspace>
|
|
79
|
+
<empty>{<raw> text with <angle> brackets}
|
|
80
|
+
```
|
celes-0.1.0/README.md
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Celes 0.1
|
|
2
|
+
|
|
3
|
+
A tag-based markup language and toolkit. Write documents in Celes, convert to HTML, validate, or round-trip with Markdown.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install celes
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## CLI Usage
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Convert a Celes file to HTML
|
|
15
|
+
celes parse doc.celes doc.html
|
|
16
|
+
|
|
17
|
+
# Validate a Celes file
|
|
18
|
+
celes validate doc.celes
|
|
19
|
+
|
|
20
|
+
# Convert Markdown to Celes
|
|
21
|
+
celes md README.md README.celes
|
|
22
|
+
|
|
23
|
+
# Convert Celes to Markdown
|
|
24
|
+
celes tomd doc.celes doc.md
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Python API
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from celes import parse_celes, validate_celes, convert_md_to_celes, convert_celes_to_md
|
|
31
|
+
|
|
32
|
+
# Parse to HTML
|
|
33
|
+
html = parse_celes(source)
|
|
34
|
+
|
|
35
|
+
# Validate
|
|
36
|
+
is_valid, errors = validate_celes(source)
|
|
37
|
+
for e in errors:
|
|
38
|
+
print(e) # " ✗ Line 3: <header> is missing required -size attribute"
|
|
39
|
+
|
|
40
|
+
# Convert
|
|
41
|
+
celes_source = convert_md_to_celes(markdown_source)
|
|
42
|
+
markdown_source = convert_celes_to_md(celes_source)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Celes Syntax (0.1)
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
<!Celes-0.1>
|
|
49
|
+
; This is a comment
|
|
50
|
+
<title>{My Page}
|
|
51
|
+
<header -size=1>{Hello, World!}
|
|
52
|
+
<line>{This is <bold>{bold} and <italic>{italic} text.}
|
|
53
|
+
<list -bullet=circle>{Item one}<sublist -bullet=circle>{Sub-item}
|
|
54
|
+
<list -bullet=number>{Numbered item}
|
|
55
|
+
<table>{Name, Age}
|
|
56
|
+
<item>{Alice, 30}
|
|
57
|
+
<codeblock>{print("hello")}
|
|
58
|
+
<image>{photo.png}
|
|
59
|
+
<linkimage -image=photo.png>{https://example.com}
|
|
60
|
+
<blockquote>{A quote <nestquote>{nested}}
|
|
61
|
+
<line -align=center>{Centered}
|
|
62
|
+
<newline>
|
|
63
|
+
<pagebreak>
|
|
64
|
+
<insertspace>
|
|
65
|
+
<empty>{<raw> text with <angle> brackets}
|
|
66
|
+
```
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Celes 0.1 — A tag-based markup language.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .parser import parse_celes
|
|
6
|
+
from .validator import validate_celes, CelesError
|
|
7
|
+
from .md_to_celes import convert_md_to_celes
|
|
8
|
+
from .celes_to_md import convert_celes_to_md
|
|
9
|
+
|
|
10
|
+
__version__ = "0.1.0"
|
|
11
|
+
__all__ = [
|
|
12
|
+
"parse_celes",
|
|
13
|
+
"validate_celes",
|
|
14
|
+
"CelesError",
|
|
15
|
+
"convert_md_to_celes",
|
|
16
|
+
"convert_celes_to_md",
|
|
17
|
+
]
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Celes 0.1 — Celes to Markdown converter.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from .core import tokenize, find_matching_brace, parse_attributes
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def inline_to_md(content):
|
|
10
|
+
if content is None:
|
|
11
|
+
return ''
|
|
12
|
+
result = ''
|
|
13
|
+
i = 0
|
|
14
|
+
while i < len(content):
|
|
15
|
+
tag_start = content.find('<', i)
|
|
16
|
+
if tag_start == -1:
|
|
17
|
+
result += content[i:]
|
|
18
|
+
break
|
|
19
|
+
result += content[i:tag_start]
|
|
20
|
+
tag_end = content.find('>', tag_start)
|
|
21
|
+
if tag_end == -1:
|
|
22
|
+
result += content[tag_start:]
|
|
23
|
+
break
|
|
24
|
+
tag_header = content[tag_start + 1:tag_end]
|
|
25
|
+
header_match = re.match(r'^([\w+]+)(.*)', tag_header, re.DOTALL)
|
|
26
|
+
if not header_match:
|
|
27
|
+
result += content[tag_start:tag_end + 1]
|
|
28
|
+
i = tag_end + 1
|
|
29
|
+
continue
|
|
30
|
+
tagname = header_match.group(1).lower()
|
|
31
|
+
attrs = parse_attributes(header_match.group(2))
|
|
32
|
+
after_tag = tag_end + 1
|
|
33
|
+
inner = ''
|
|
34
|
+
end = after_tag
|
|
35
|
+
if after_tag < len(content) and content[after_tag] == '{':
|
|
36
|
+
close = find_matching_brace(content, after_tag)
|
|
37
|
+
if close != -1:
|
|
38
|
+
inner = content[after_tag + 1:close]
|
|
39
|
+
end = close + 1
|
|
40
|
+
inner_md = inline_to_md(inner)
|
|
41
|
+
if tagname == 'bold': result += f'**{inner_md}**'
|
|
42
|
+
elif tagname == 'italic': result += f'*{inner_md}*'
|
|
43
|
+
elif tagname == 'bold+italic': result += f'***{inner_md}***'
|
|
44
|
+
elif tagname == 'underline': result += f'<u>{inner_md}</u>'
|
|
45
|
+
elif tagname == 'strike': result += f'~~{inner_md}~~'
|
|
46
|
+
elif tagname == 'code': result += f'`{inner}`'
|
|
47
|
+
elif tagname == 'link':
|
|
48
|
+
body = attrs.get('body', inner_md)
|
|
49
|
+
result += f'[{body}]({inner})'
|
|
50
|
+
elif tagname == 'newline': result += '\n'
|
|
51
|
+
elif tagname == 'empty': result += inner
|
|
52
|
+
elif tagname == 'checkmark':
|
|
53
|
+
checked = 'x' if 'check' in attrs else ' '
|
|
54
|
+
result += f'[{checked}] {inner_md}'
|
|
55
|
+
elif tagname == 'nestquote': result += f'>> {inner_md}'
|
|
56
|
+
else: result += inner_md
|
|
57
|
+
i = end
|
|
58
|
+
return result
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def convert_celes_to_md(source):
|
|
62
|
+
tokens = tokenize(source)
|
|
63
|
+
output = []
|
|
64
|
+
title = None
|
|
65
|
+
i = 0
|
|
66
|
+
|
|
67
|
+
while i < len(tokens):
|
|
68
|
+
lineno, tagname, attrs, content = tokens[i]
|
|
69
|
+
|
|
70
|
+
if tagname == 'declaration':
|
|
71
|
+
i += 1; continue
|
|
72
|
+
elif tagname == 'comment':
|
|
73
|
+
output.append(f'<!-- {content} -->')
|
|
74
|
+
i += 1
|
|
75
|
+
elif tagname == 'title':
|
|
76
|
+
title = content
|
|
77
|
+
i += 1
|
|
78
|
+
elif tagname == 'header':
|
|
79
|
+
size = int(attrs.get('size', 1))
|
|
80
|
+
output.append(f'{"#" * size} {inline_to_md(content)}')
|
|
81
|
+
i += 1
|
|
82
|
+
elif tagname == 'line':
|
|
83
|
+
align = attrs.get('align', '')
|
|
84
|
+
text = inline_to_md(content)
|
|
85
|
+
if align and align != 'left':
|
|
86
|
+
output.append(f'<div align="{align}">{text}</div>')
|
|
87
|
+
else:
|
|
88
|
+
output.append(text)
|
|
89
|
+
i += 1
|
|
90
|
+
elif tagname == 'blockquote':
|
|
91
|
+
text = inline_to_md(content)
|
|
92
|
+
for bq_line in text.splitlines():
|
|
93
|
+
output.append(f'> {bq_line}')
|
|
94
|
+
i += 1
|
|
95
|
+
elif tagname == 'codeblock':
|
|
96
|
+
output.append('```')
|
|
97
|
+
output.append(content or '')
|
|
98
|
+
output.append('```')
|
|
99
|
+
i += 1
|
|
100
|
+
elif tagname == 'image':
|
|
101
|
+
output.append(f'')
|
|
102
|
+
i += 1
|
|
103
|
+
elif tagname == 'linkimage':
|
|
104
|
+
img = attrs.get('image', '')
|
|
105
|
+
output.append(f'[]({content})')
|
|
106
|
+
i += 1
|
|
107
|
+
elif tagname == 'table':
|
|
108
|
+
cols = [c.strip() for c in (content or '').split(',')]
|
|
109
|
+
output.append('| ' + ' | '.join(cols) + ' |')
|
|
110
|
+
output.append('| ' + ' | '.join(['---'] * len(cols)) + ' |')
|
|
111
|
+
i += 1
|
|
112
|
+
while i < len(tokens) and tokens[i][1] == 'item':
|
|
113
|
+
cells = [c.strip() for c in (tokens[i][3] or '').split(',')]
|
|
114
|
+
output.append('| ' + ' | '.join(cells) + ' |')
|
|
115
|
+
i += 1
|
|
116
|
+
elif tagname == 'list':
|
|
117
|
+
counter = 1
|
|
118
|
+
while i < len(tokens) and tokens[i][1] == 'list':
|
|
119
|
+
t_attrs = tokens[i][2]
|
|
120
|
+
t_content = tokens[i][3] or ''
|
|
121
|
+
t_bullet = t_attrs.get('bullet', 'circle')
|
|
122
|
+
text = inline_to_md(t_content)
|
|
123
|
+
if t_bullet == 'number':
|
|
124
|
+
output.append(f'{counter}. {text}')
|
|
125
|
+
counter += 1
|
|
126
|
+
else:
|
|
127
|
+
output.append(f'- {text}')
|
|
128
|
+
i += 1
|
|
129
|
+
while i < len(tokens) and tokens[i][1] == 'sublist':
|
|
130
|
+
s_attrs = tokens[i][2]
|
|
131
|
+
s_content = tokens[i][3] or ''
|
|
132
|
+
s_bullet = s_attrs.get('bullet', 'circle')
|
|
133
|
+
s_text = inline_to_md(s_content)
|
|
134
|
+
if s_bullet == 'number':
|
|
135
|
+
output.append(f' 1. {s_text}')
|
|
136
|
+
else:
|
|
137
|
+
output.append(f' - {s_text}')
|
|
138
|
+
i += 1
|
|
139
|
+
elif tagname == 'newline':
|
|
140
|
+
output.append('')
|
|
141
|
+
i += 1
|
|
142
|
+
elif tagname in ('pagebreak', 'insertspace'):
|
|
143
|
+
output.append('\n---\n')
|
|
144
|
+
i += 1
|
|
145
|
+
else:
|
|
146
|
+
i += 1
|
|
147
|
+
|
|
148
|
+
if title:
|
|
149
|
+
output.insert(0, f'---\ntitle: {title}\n---\n')
|
|
150
|
+
|
|
151
|
+
return '\n'.join(output)
|
celes-0.1.0/celes/cli.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Celes CLI — command line interface for the Celes 0.1 toolkit.
|
|
3
|
+
|
|
4
|
+
Commands:
|
|
5
|
+
celes parse input.celes [output.html] — convert to HTML
|
|
6
|
+
celes validate input.celes — validate a file
|
|
7
|
+
celes md input.md [output.celes] — convert Markdown to Celes
|
|
8
|
+
celes tomd input.celes [output.md] — convert Celes to Markdown
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import sys
|
|
12
|
+
import os
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def read_file(path):
|
|
16
|
+
try:
|
|
17
|
+
with open(path, 'r', encoding='utf-8') as f:
|
|
18
|
+
return f.read()
|
|
19
|
+
except FileNotFoundError:
|
|
20
|
+
print(f'Error: File not found: {path}', file=sys.stderr)
|
|
21
|
+
sys.exit(1)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def write_file(path, content):
|
|
25
|
+
with open(path, 'w', encoding='utf-8') as f:
|
|
26
|
+
f.write(content)
|
|
27
|
+
print(f'✓ Output written to {path}')
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def cmd_parse(args):
|
|
31
|
+
if not args:
|
|
32
|
+
print('Usage: celes parse <input.celes> [output.html]')
|
|
33
|
+
sys.exit(1)
|
|
34
|
+
from .parser import parse_celes
|
|
35
|
+
source = read_file(args[0])
|
|
36
|
+
result = parse_celes(source)
|
|
37
|
+
if len(args) >= 2:
|
|
38
|
+
write_file(args[1], result)
|
|
39
|
+
else:
|
|
40
|
+
print(result)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def cmd_validate(args):
|
|
44
|
+
if not args:
|
|
45
|
+
print('Usage: celes validate <input.celes>')
|
|
46
|
+
sys.exit(1)
|
|
47
|
+
from .validator import main_validate
|
|
48
|
+
source = read_file(args[0])
|
|
49
|
+
code = main_validate(source, filename=args[0])
|
|
50
|
+
sys.exit(code)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def cmd_md(args):
|
|
54
|
+
if not args:
|
|
55
|
+
print('Usage: celes md <input.md> [output.celes]')
|
|
56
|
+
sys.exit(1)
|
|
57
|
+
from .md_to_celes import convert_md_to_celes
|
|
58
|
+
source = read_file(args[0])
|
|
59
|
+
result = convert_md_to_celes(source)
|
|
60
|
+
if len(args) >= 2:
|
|
61
|
+
write_file(args[1], result)
|
|
62
|
+
else:
|
|
63
|
+
print(result)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def cmd_tomd(args):
|
|
67
|
+
if not args:
|
|
68
|
+
print('Usage: celes tomd <input.celes> [output.md]')
|
|
69
|
+
sys.exit(1)
|
|
70
|
+
from .celes_to_md import convert_celes_to_md
|
|
71
|
+
source = read_file(args[0])
|
|
72
|
+
result = convert_celes_to_md(source)
|
|
73
|
+
if len(args) >= 2:
|
|
74
|
+
write_file(args[1], result)
|
|
75
|
+
else:
|
|
76
|
+
print(result)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
COMMANDS = {
|
|
80
|
+
'parse': cmd_parse,
|
|
81
|
+
'validate': cmd_validate,
|
|
82
|
+
'md': cmd_md,
|
|
83
|
+
'tomd': cmd_tomd,
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def main():
|
|
88
|
+
if len(sys.argv) < 2 or sys.argv[1] in ('-h', '--help'):
|
|
89
|
+
print("""Celes 0.1 Toolkit
|
|
90
|
+
|
|
91
|
+
Usage:
|
|
92
|
+
celes parse <input.celes> [output.html] Convert Celes → HTML
|
|
93
|
+
celes validate <input.celes> Validate a Celes file
|
|
94
|
+
celes md <input.md> [output.celes] Convert Markdown → Celes
|
|
95
|
+
celes tomd <input.celes> [output.md] Convert Celes → Markdown
|
|
96
|
+
|
|
97
|
+
Examples:
|
|
98
|
+
celes parse doc.celes doc.html
|
|
99
|
+
celes validate doc.celes
|
|
100
|
+
celes md README.md README.celes
|
|
101
|
+
celes tomd doc.celes doc.md
|
|
102
|
+
""")
|
|
103
|
+
sys.exit(0)
|
|
104
|
+
|
|
105
|
+
command = sys.argv[1]
|
|
106
|
+
if command not in COMMANDS:
|
|
107
|
+
print(f'Unknown command: {command!r}')
|
|
108
|
+
print(f'Available commands: {", ".join(COMMANDS)}')
|
|
109
|
+
sys.exit(1)
|
|
110
|
+
|
|
111
|
+
COMMANDS[command](sys.argv[2:])
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
if __name__ == '__main__':
|
|
115
|
+
main()
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Celes core utilities — shared across parser, validator, and converters.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
SELF_CLOSING_TAGS = {'newline', 'pagebreak', 'insertspace'}
|
|
9
|
+
|
|
10
|
+
BLOCK_TAGS = {
|
|
11
|
+
'title', 'header', 'line', 'blockquote', 'codeblock',
|
|
12
|
+
'image', 'linkimage', 'list', 'sublist', 'table', 'item',
|
|
13
|
+
'newline', 'pagebreak', 'insertspace',
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
INLINE_TAGS = {
|
|
17
|
+
'bold', 'italic', 'bold+italic', 'underline', 'strike',
|
|
18
|
+
'code', 'link', 'checkmark', 'nestquote', 'empty',
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
REQUIRED_ATTRS = {
|
|
22
|
+
'header': ['size'],
|
|
23
|
+
'link': ['body'],
|
|
24
|
+
'linkimage': ['image'],
|
|
25
|
+
'checkmark': [], # needs either -check or -uncheck key
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
VALID_ATTR_VALUES = {
|
|
29
|
+
'size': {'1', '2', '3', '4', '5', '6'},
|
|
30
|
+
'bullet': {'circle', 'number'},
|
|
31
|
+
'align': {'left', 'center', 'right'},
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def find_matching_brace(s, start):
|
|
36
|
+
"""Return index of closing } matching { at s[start]. Returns -1 if not found."""
|
|
37
|
+
depth = 0
|
|
38
|
+
for i in range(start, len(s)):
|
|
39
|
+
if s[i] == '{':
|
|
40
|
+
depth += 1
|
|
41
|
+
elif s[i] == '}':
|
|
42
|
+
depth -= 1
|
|
43
|
+
if depth == 0:
|
|
44
|
+
return i
|
|
45
|
+
return -1
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def parse_attributes(attr_str):
|
|
49
|
+
"""Parse '-key=value -flag' strings into a dict."""
|
|
50
|
+
attrs = {}
|
|
51
|
+
attr_str = attr_str.strip()
|
|
52
|
+
pattern = re.compile(r'-(\w+)(?:=(?=\S)(.*?))?(?=\s+-\w|$)', re.DOTALL)
|
|
53
|
+
for m in pattern.finditer(attr_str):
|
|
54
|
+
key = m.group(1)
|
|
55
|
+
val = m.group(2)
|
|
56
|
+
attrs[key] = val.strip() if val is not None else True
|
|
57
|
+
return attrs
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def split_multi_tag_line(line):
|
|
61
|
+
"""
|
|
62
|
+
Split a line containing multiple adjacent tags into individual tag strings.
|
|
63
|
+
e.g. '<list>{Item}<sublist>{Sub}' → ['<list>{Item}', '<sublist>{Sub}']
|
|
64
|
+
"""
|
|
65
|
+
line = line.strip()
|
|
66
|
+
if line.startswith(';') or line.startswith('<!'):
|
|
67
|
+
return [line]
|
|
68
|
+
result = []
|
|
69
|
+
i = 0
|
|
70
|
+
while i < len(line):
|
|
71
|
+
if line[i] != '<':
|
|
72
|
+
break
|
|
73
|
+
tag_end = line.find('>', i)
|
|
74
|
+
if tag_end == -1:
|
|
75
|
+
result.append(line[i:])
|
|
76
|
+
break
|
|
77
|
+
after_tag = tag_end + 1
|
|
78
|
+
if after_tag < len(line) and line[after_tag] == '{':
|
|
79
|
+
close = find_matching_brace(line, after_tag)
|
|
80
|
+
if close != -1:
|
|
81
|
+
result.append(line[i:close + 1])
|
|
82
|
+
i = close + 1
|
|
83
|
+
continue
|
|
84
|
+
result.append(line[i:after_tag])
|
|
85
|
+
i = after_tag
|
|
86
|
+
return result if result else [line]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def parse_tag_line(line):
|
|
90
|
+
"""
|
|
91
|
+
Parse a single tag string into (tagname, attrs, content).
|
|
92
|
+
Returns ('comment', {}, text) for comments.
|
|
93
|
+
Returns ('error', {}, message) for parse failures.
|
|
94
|
+
Self-closing tags return content=None.
|
|
95
|
+
"""
|
|
96
|
+
line = line.strip()
|
|
97
|
+
if not line:
|
|
98
|
+
return None
|
|
99
|
+
if line.startswith(';'):
|
|
100
|
+
return ('comment', {}, line[1:].strip())
|
|
101
|
+
if line.startswith('<!Celes'):
|
|
102
|
+
return ('declaration', {}, line)
|
|
103
|
+
if not line.startswith('<'):
|
|
104
|
+
return ('error', {}, f'Line does not start with a tag: {line!r}')
|
|
105
|
+
tag_end = line.find('>')
|
|
106
|
+
if tag_end == -1:
|
|
107
|
+
return ('error', {}, f'Unclosed tag header: {line!r}')
|
|
108
|
+
tag_header = line[1:tag_end]
|
|
109
|
+
header_match = re.match(r'^([\w+]+)(.*)', tag_header, re.DOTALL)
|
|
110
|
+
if not header_match:
|
|
111
|
+
return ('error', {}, f'Cannot parse tag name from: {line!r}')
|
|
112
|
+
tagname = header_match.group(1).lower()
|
|
113
|
+
attrs = parse_attributes(header_match.group(2))
|
|
114
|
+
rest = line[tag_end + 1:].strip()
|
|
115
|
+
if not rest:
|
|
116
|
+
return (tagname, attrs, None)
|
|
117
|
+
if not rest.startswith('{'):
|
|
118
|
+
return ('error', {}, f'Missing {{...}} content after tag in: {line!r}')
|
|
119
|
+
close = find_matching_brace(rest, 0)
|
|
120
|
+
if close == -1:
|
|
121
|
+
return ('error', {}, f'Unclosed brace in: {line!r}')
|
|
122
|
+
content = rest[1:close]
|
|
123
|
+
return (tagname, attrs, content)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def tokenize(source):
|
|
127
|
+
"""
|
|
128
|
+
Convert a Celes source string into a list of (line_number, tagname, attrs, content) tuples.
|
|
129
|
+
"""
|
|
130
|
+
tokens = []
|
|
131
|
+
for lineno, raw_line in enumerate(source.splitlines(), start=1):
|
|
132
|
+
if not raw_line.strip():
|
|
133
|
+
continue
|
|
134
|
+
for single in split_multi_tag_line(raw_line):
|
|
135
|
+
if not single.strip():
|
|
136
|
+
continue
|
|
137
|
+
result = parse_tag_line(single)
|
|
138
|
+
if result:
|
|
139
|
+
tokens.append((lineno, *result))
|
|
140
|
+
return tokens
|