diary-docs 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diary/__init__.py +1 -0
- diary/__main__.py +3 -0
- diary/aimb/__init__.py +48 -0
- diary/aimb/hasher.py +157 -0
- diary/aimb/merge.py +252 -0
- diary/aimb/parser.py +202 -0
- diary/cli.py +999 -0
- diary/git_utils.py +202 -0
- diary/indexer/__init__.py +44 -0
- diary/indexer/database.py +340 -0
- diary/indexer/extractors.py +468 -0
- diary/indexer/gitignore.py +62 -0
- diary/indexer/indexer.py +511 -0
- diary/indexer/reporter.py +137 -0
- diary/indexer/scanner.py +65 -0
- diary/sync/__init__.py +33 -0
- diary/sync/detector.py +405 -0
- diary/sync/engine.py +404 -0
- diary/sync/protocol.py +176 -0
- diary/templates.py +102 -0
- diary_docs-0.1.0.dist-info/METADATA +228 -0
- diary_docs-0.1.0.dist-info/RECORD +26 -0
- diary_docs-0.1.0.dist-info/WHEEL +5 -0
- diary_docs-0.1.0.dist-info/entry_points.txt +2 -0
- diary_docs-0.1.0.dist-info/licenses/LICENSE +21 -0
- diary_docs-0.1.0.dist-info/top_level.txt +1 -0
diary/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
diary/__main__.py
ADDED
diary/aimb/__init__.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""AIMB — AI Managed Block parser, hasher, and merge for markdown files."""
|
|
2
|
+
|
|
3
|
+
from .hasher import (
|
|
4
|
+
compute_block_hash,
|
|
5
|
+
verify_block_hash,
|
|
6
|
+
hash_all_blocks,
|
|
7
|
+
get_stored_hashes,
|
|
8
|
+
find_changed_blocks,
|
|
9
|
+
)
|
|
10
|
+
from .merge import (
|
|
11
|
+
ConflictInfo,
|
|
12
|
+
MergeDecision,
|
|
13
|
+
apply_replace,
|
|
14
|
+
decide_block_action,
|
|
15
|
+
format_conflict_report,
|
|
16
|
+
generate_block_diff,
|
|
17
|
+
)
|
|
18
|
+
from .parser import (
|
|
19
|
+
AIMBBlock,
|
|
20
|
+
parse_aimb_blocks,
|
|
21
|
+
parse_frontmatter,
|
|
22
|
+
extract_manual_blocks,
|
|
23
|
+
has_aimb_blocks,
|
|
24
|
+
get_block_by_id,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
# parser
|
|
29
|
+
"AIMBBlock",
|
|
30
|
+
"parse_aimb_blocks",
|
|
31
|
+
"parse_frontmatter",
|
|
32
|
+
"extract_manual_blocks",
|
|
33
|
+
"has_aimb_blocks",
|
|
34
|
+
"get_block_by_id",
|
|
35
|
+
# hasher
|
|
36
|
+
"compute_block_hash",
|
|
37
|
+
"verify_block_hash",
|
|
38
|
+
"hash_all_blocks",
|
|
39
|
+
"get_stored_hashes",
|
|
40
|
+
"find_changed_blocks",
|
|
41
|
+
# merge
|
|
42
|
+
"MergeDecision",
|
|
43
|
+
"ConflictInfo",
|
|
44
|
+
"decide_block_action",
|
|
45
|
+
"generate_block_diff",
|
|
46
|
+
"format_conflict_report",
|
|
47
|
+
"apply_replace",
|
|
48
|
+
]
|
diary/aimb/hasher.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""AIMB block hasher — SHA256 computation and integrity checking.
|
|
2
|
+
|
|
3
|
+
Functions
|
|
4
|
+
---------
|
|
5
|
+
compute_block_hash
|
|
6
|
+
SHA256 hex digest of block content.
|
|
7
|
+
verify_block_hash
|
|
8
|
+
Compare computed hash against an expected value.
|
|
9
|
+
hash_all_blocks
|
|
10
|
+
Compute hashes for every AIMB block in markdown content.
|
|
11
|
+
get_stored_hashes
|
|
12
|
+
Extract the hashes stored in AIMB metadata tags.
|
|
13
|
+
find_changed_blocks
|
|
14
|
+
Detect blocks whose stored hash differs from the current computed hash.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import hashlib
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from .parser import parse_aimb_blocks
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ── Helpers ──────────────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _sha256(content: str) -> str:
|
|
29
|
+
"""Return hex-encoded SHA-256 digest of *content*."""
|
|
30
|
+
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ── Public API ───────────────────────────────────────────────────────────
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def compute_block_hash(content: str) -> str:
|
|
37
|
+
"""Return the SHA-256 hex digest of *content*.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
content : str
|
|
42
|
+
The raw text content of an AIMB block (text between the opening
|
|
43
|
+
``<!-- ai-managed ... -->`` and closing ``<!-- /ai-managed -->``
|
|
44
|
+
tags).
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
str
|
|
49
|
+
64-character hex-encoded SHA-256 digest.
|
|
50
|
+
|
|
51
|
+
Examples
|
|
52
|
+
--------
|
|
53
|
+
>>> compute_block_hash("test")
|
|
54
|
+
"9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"
|
|
55
|
+
"""
|
|
56
|
+
return _sha256(content)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def verify_block_hash(content: str, expected_hash: str) -> bool:
|
|
60
|
+
"""Verify that *content* hashes to *expected_hash*.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
content : str
|
|
65
|
+
Raw block content to hash.
|
|
66
|
+
expected_hash : str
|
|
67
|
+
Previously recorded hash to compare against.
|
|
68
|
+
|
|
69
|
+
Returns
|
|
70
|
+
-------
|
|
71
|
+
bool
|
|
72
|
+
``True`` if the computed hash matches *expected_hash*.
|
|
73
|
+
"""
|
|
74
|
+
return compute_block_hash(content) == expected_hash
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def hash_all_blocks(content: str) -> dict[str, str]:
|
|
78
|
+
"""Compute the hash for every AIMB block in *content*.
|
|
79
|
+
|
|
80
|
+
Hashes only the **content** between the opening and closing tags
|
|
81
|
+
(not the tags themselves).
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
content : str
|
|
86
|
+
Full markdown content with AIMB blocks.
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
dict[str, str]
|
|
91
|
+
Mapping of ``{block_id: hash}`` for every AIMB block found.
|
|
92
|
+
Empty dict if no AIMB blocks exist.
|
|
93
|
+
"""
|
|
94
|
+
blocks = parse_aimb_blocks(content)
|
|
95
|
+
return {block.id: compute_block_hash(block.content) for block in blocks}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def get_stored_hashes(content: str) -> dict[str, str]:
|
|
99
|
+
"""Extract the hashes stored in AIMB block metadata tags.
|
|
100
|
+
|
|
101
|
+
These are the ``hash="..."`` values written into each
|
|
102
|
+
``<!-- ai-managed ... -->`` opening tag.
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
content : str
|
|
107
|
+
Full markdown content with AIMB blocks.
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
dict[str, str]
|
|
112
|
+
Mapping of ``{block_id: stored_hash}``.
|
|
113
|
+
Empty dict if no AIMB blocks exist.
|
|
114
|
+
"""
|
|
115
|
+
blocks = parse_aimb_blocks(content)
|
|
116
|
+
return {block.id: block.hash for block in blocks}
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def find_changed_blocks(old_content: str, new_content: str) -> list[dict[str, Any]]:
|
|
120
|
+
"""Find AIMB blocks whose stored hash differs from their current computed hash.
|
|
121
|
+
|
|
122
|
+
For each AIMB block:
|
|
123
|
+
- The **stored hash** is read from the metadata in *old_content*.
|
|
124
|
+
- The **computed hash** is calculated from the block content in
|
|
125
|
+
*new_content*.
|
|
126
|
+
|
|
127
|
+
Parameters
|
|
128
|
+
----------
|
|
129
|
+
old_content : str
|
|
130
|
+
Previous version of the markdown content (source of stored hashes).
|
|
131
|
+
new_content : str
|
|
132
|
+
Current version of the markdown content (source of block content to
|
|
133
|
+
hash).
|
|
134
|
+
|
|
135
|
+
Returns
|
|
136
|
+
-------
|
|
137
|
+
list[dict[str, Any]]
|
|
138
|
+
A list of dicts, one per changed block, with keys:
|
|
139
|
+
``id``, ``old_hash``, ``new_hash``. Empty list if no blocks
|
|
140
|
+
changed or no AIMB blocks exist.
|
|
141
|
+
"""
|
|
142
|
+
stored = get_stored_hashes(old_content)
|
|
143
|
+
computed = hash_all_blocks(new_content)
|
|
144
|
+
|
|
145
|
+
changed: list[dict[str, Any]] = []
|
|
146
|
+
for block_id, old_hash in stored.items():
|
|
147
|
+
new_hash = computed.get(block_id)
|
|
148
|
+
if new_hash is not None and old_hash != new_hash:
|
|
149
|
+
changed.append(
|
|
150
|
+
{
|
|
151
|
+
"id": block_id,
|
|
152
|
+
"old_hash": old_hash,
|
|
153
|
+
"new_hash": new_hash,
|
|
154
|
+
}
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
return changed
|
diary/aimb/merge.py
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""Replace-or-flag conflict resolution for AIMB blocks.
|
|
2
|
+
|
|
3
|
+
Provides conflict-averse merge logic for AI-Managed Blocks:
|
|
4
|
+
|
|
5
|
+
- :class:`MergeDecision` — three-way action enum (REPLACE / FLAG_CONFLICT / SKIP)
|
|
6
|
+
- :class:`ConflictInfo` — details about a detected conflict
|
|
7
|
+
- :func:`decide_block_action` — compare stored vs. current hash to decide
|
|
8
|
+
- :func:`generate_block_diff` — unified diff between two block versions
|
|
9
|
+
- :func:`format_conflict_report` — human-readable conflict summary
|
|
10
|
+
- :func:`apply_replace` — update a block's content and its metadata tags
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import difflib
|
|
16
|
+
import enum
|
|
17
|
+
import re
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from datetime import date
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from .hasher import compute_block_hash
|
|
23
|
+
from .parser import AIMBBlock, get_block_by_id
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ── Enum ─────────────────────────────────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class MergeDecision(enum.Enum):
|
|
30
|
+
"""Action to take for a single AIMB block during a merge.
|
|
31
|
+
|
|
32
|
+
+------------------+--------------------------------------------------+
|
|
33
|
+
| Member | Meaning |
|
|
34
|
+
+==================+==================================================+
|
|
35
|
+
| ``REPLACE`` | No manual edit detected — safe to overwrite. |
|
|
36
|
+
+------------------+--------------------------------------------------+
|
|
37
|
+
| ``FLAG_CONFLICT``| Block was manually edited — needs human review. |
|
|
38
|
+
+------------------+--------------------------------------------------+
|
|
39
|
+
| ``SKIP`` | Block no longer exists in the new content. |
|
|
40
|
+
+------------------+--------------------------------------------------+
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
REPLACE = "replace"
|
|
44
|
+
FLAG_CONFLICT = "flag_conflict"
|
|
45
|
+
SKIP = "skip"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ── Dataclass ─────────────────────────────────────────────────────────────
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class ConflictInfo:
|
|
53
|
+
"""Details about a single merge conflict.
|
|
54
|
+
|
|
55
|
+
Attributes
|
|
56
|
+
----------
|
|
57
|
+
block_id : str
|
|
58
|
+
Block identifier.
|
|
59
|
+
file_path : str
|
|
60
|
+
Path to the file containing the conflict.
|
|
61
|
+
old_hash : str
|
|
62
|
+
Expected hash (from the stored/snapshot record).
|
|
63
|
+
current_hash : str
|
|
64
|
+
Actual hash in the current file metadata.
|
|
65
|
+
diff_preview : str
|
|
66
|
+
Unified diff between the stored and current block content.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
block_id: str
|
|
70
|
+
file_path: str
|
|
71
|
+
old_hash: str
|
|
72
|
+
current_hash: str
|
|
73
|
+
diff_preview: str = ""
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# ── Public API ────────────────────────────────────────────────────────────
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def decide_block_action(
|
|
80
|
+
block: AIMBBlock,
|
|
81
|
+
stored_hash: str,
|
|
82
|
+
new_content: str,
|
|
83
|
+
) -> MergeDecision:
|
|
84
|
+
"""Decide what to do with *block* when merging.
|
|
85
|
+
|
|
86
|
+
Decision logic:
|
|
87
|
+
|
|
88
|
+
1. If ``block.hash`` (current metadata) matches *stored_hash* (previous
|
|
89
|
+
snapshot) → :attr:`MergeDecision.REPLACE` — no manual edits, safe to
|
|
90
|
+
overwrite.
|
|
91
|
+
2. If the block's ``id`` is **not** found in *new_content* →
|
|
92
|
+
:attr:`MergeDecision.SKIP` — the block was removed upstream.
|
|
93
|
+
3. Otherwise → :attr:`MergeDecision.FLAG_CONFLICT` — the block was
|
|
94
|
+
manually edited and needs human review.
|
|
95
|
+
|
|
96
|
+
Parameters
|
|
97
|
+
----------
|
|
98
|
+
block : AIMBBlock
|
|
99
|
+
The block as it currently exists in the file (parsed metadata).
|
|
100
|
+
stored_hash : str
|
|
101
|
+
The hash recorded when the block was last written (snapshot).
|
|
102
|
+
new_content : str
|
|
103
|
+
The full markdown content being merged in (used to check existence).
|
|
104
|
+
|
|
105
|
+
Returns
|
|
106
|
+
-------
|
|
107
|
+
MergeDecision
|
|
108
|
+
The recommended action.
|
|
109
|
+
"""
|
|
110
|
+
# 1. Hashes match → safe to replace
|
|
111
|
+
if block.hash == stored_hash:
|
|
112
|
+
return MergeDecision.REPLACE
|
|
113
|
+
|
|
114
|
+
# 2. Block no longer present in the incoming content → skip it
|
|
115
|
+
if get_block_by_id(new_content, block.id) is None:
|
|
116
|
+
return MergeDecision.SKIP
|
|
117
|
+
|
|
118
|
+
# 3. Hashes differ → manual edit detected
|
|
119
|
+
return MergeDecision.FLAG_CONFLICT
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def generate_block_diff(content_before: str, content_after: str) -> str:
|
|
123
|
+
"""Return a unified diff string comparing two versions of block content.
|
|
124
|
+
|
|
125
|
+
Parameters
|
|
126
|
+
----------
|
|
127
|
+
content_before : str
|
|
128
|
+
Previous block content.
|
|
129
|
+
content_after : str
|
|
130
|
+
Current block content.
|
|
131
|
+
|
|
132
|
+
Returns
|
|
133
|
+
-------
|
|
134
|
+
str
|
|
135
|
+
Unified diff (3 context lines). Returns an empty string when
|
|
136
|
+
the two contents are identical.
|
|
137
|
+
"""
|
|
138
|
+
lines_before = content_before.splitlines(keepends=True)
|
|
139
|
+
lines_after = content_after.splitlines(keepends=True)
|
|
140
|
+
|
|
141
|
+
diff = list(
|
|
142
|
+
difflib.unified_diff(
|
|
143
|
+
lines_before,
|
|
144
|
+
lines_after,
|
|
145
|
+
fromfile="before",
|
|
146
|
+
tofile="after",
|
|
147
|
+
n=3,
|
|
148
|
+
)
|
|
149
|
+
)
|
|
150
|
+
return "".join(diff)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def format_conflict_report(conflicts: list[ConflictInfo]) -> str:
|
|
154
|
+
"""Format a list of conflicts into a human-readable report string.
|
|
155
|
+
|
|
156
|
+
Parameters
|
|
157
|
+
----------
|
|
158
|
+
conflicts : list[ConflictInfo]
|
|
159
|
+
Zero or more conflict records.
|
|
160
|
+
|
|
161
|
+
Returns
|
|
162
|
+
-------
|
|
163
|
+
str
|
|
164
|
+
Markdown-formatted conflict report. When *conflicts* is empty
|
|
165
|
+
the report simply states zero conflicts.
|
|
166
|
+
"""
|
|
167
|
+
lines: list[str] = [
|
|
168
|
+
"# Merge Conflict Report",
|
|
169
|
+
"",
|
|
170
|
+
f"Total conflicts: {len(conflicts)}",
|
|
171
|
+
"",
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
for i, c in enumerate(conflicts, 1):
|
|
175
|
+
lines.append(f"## {i}. Block ``{c.block_id}`` in ``{c.file_path}``")
|
|
176
|
+
lines.append("")
|
|
177
|
+
lines.append("| Field | Value |")
|
|
178
|
+
lines.append("|-------|-------|")
|
|
179
|
+
lines.append(f"| Stored hash (expected) | ``{c.old_hash}`` |")
|
|
180
|
+
lines.append(f"| Current hash (in file) | ``{c.current_hash}`` |")
|
|
181
|
+
lines.append("| Status | Manual edit detected — review required |")
|
|
182
|
+
lines.append("")
|
|
183
|
+
|
|
184
|
+
if c.diff_preview:
|
|
185
|
+
lines.append("### Diff Preview")
|
|
186
|
+
lines.append("")
|
|
187
|
+
lines.append("```diff")
|
|
188
|
+
lines.append(c.diff_preview.rstrip("\n"))
|
|
189
|
+
lines.append("```")
|
|
190
|
+
lines.append("")
|
|
191
|
+
|
|
192
|
+
return "\n".join(lines)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def apply_replace(content: str, block_id: str, new_block_content: str) -> str:
|
|
196
|
+
"""Replace the content of an AIMB block in-place.
|
|
197
|
+
|
|
198
|
+
1. Locates the ``<!-- ai-managed id="<block_id>" ... -->`` tag pair.
|
|
199
|
+
2. Replaces the text *between* the open and close tags with
|
|
200
|
+
*new_block_content*.
|
|
201
|
+
3. Updates the ``hash`` attribute in the opening tag.
|
|
202
|
+
4. Updates the ``updated`` attribute to today's date (YYYY-MM-DD).
|
|
203
|
+
|
|
204
|
+
Parameters
|
|
205
|
+
----------
|
|
206
|
+
content : str
|
|
207
|
+
Full markdown content containing the AIMB block.
|
|
208
|
+
block_id : str
|
|
209
|
+
The ``id`` of the block to replace.
|
|
210
|
+
new_block_content : str
|
|
211
|
+
New content for the block (text that goes between the tags).
|
|
212
|
+
|
|
213
|
+
Returns
|
|
214
|
+
-------
|
|
215
|
+
str
|
|
216
|
+
Updated markdown content with the block replaced and its metadata
|
|
217
|
+
tags refreshed.
|
|
218
|
+
|
|
219
|
+
Raises
|
|
220
|
+
------
|
|
221
|
+
ValueError
|
|
222
|
+
If no AIMB block with *block_id* exists in *content*.
|
|
223
|
+
"""
|
|
224
|
+
new_hash = compute_block_hash(new_block_content)
|
|
225
|
+
today = date.today().isoformat()
|
|
226
|
+
|
|
227
|
+
escaped_id = re.escape(block_id)
|
|
228
|
+
pattern = re.compile(
|
|
229
|
+
r"(<!--\s*ai-managed\s+"
|
|
230
|
+
r'id="' + escaped_id + r'"\s+)'
|
|
231
|
+
r'hash="[^"]*"\s+'
|
|
232
|
+
r'updated="[^"]*"'
|
|
233
|
+
r"(\s*-->)\s*\n?"
|
|
234
|
+
r"(.*?)"
|
|
235
|
+
r"(<!--\s*/ai-managed\s*-->)",
|
|
236
|
+
re.DOTALL,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
def _replace(m: re.Match) -> str:
|
|
240
|
+
opening = m.group(1)
|
|
241
|
+
sep = m.group(2)
|
|
242
|
+
close_tag = m.group(4)
|
|
243
|
+
|
|
244
|
+
new_opening = f'{opening}hash="{new_hash}" updated="{today}"{sep}\n'
|
|
245
|
+
return new_opening + new_block_content.rstrip("\n") + "\n" + close_tag
|
|
246
|
+
|
|
247
|
+
result, count = pattern.subn(_replace, content, count=1)
|
|
248
|
+
if count == 0:
|
|
249
|
+
raise ValueError(
|
|
250
|
+
f"AIMB block '{block_id}' not found in content"
|
|
251
|
+
)
|
|
252
|
+
return result
|
diary/aimb/parser.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""AIMB block parser — extracts AI Managed Blocks and manual blocks from markdown content.
|
|
2
|
+
|
|
3
|
+
AIMB format::
|
|
4
|
+
|
|
5
|
+
<!-- ai-managed id="arch" hash="abc123" updated="2024-01-01" -->
|
|
6
|
+
... managed content ...
|
|
7
|
+
<!-- /ai-managed -->
|
|
8
|
+
|
|
9
|
+
Manual block format::
|
|
10
|
+
|
|
11
|
+
<!-- manual -->
|
|
12
|
+
... user-only content ...
|
|
13
|
+
<!-- /manual -->
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import re
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
# ── Regex patterns ──────────────────────────────────────────────────────
|
|
23
|
+
|
|
24
|
+
_AIMB_OPEN_RE = re.compile(
|
|
25
|
+
r'<!--\s*ai-managed\s+'
|
|
26
|
+
r'id="([^"]+)"\s+'
|
|
27
|
+
r'hash="([^"]*)"\s+'
|
|
28
|
+
r'updated="([^"]*)"\s*'
|
|
29
|
+
r'-->\s*\n?'
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
_AIMB_CLOSE_RE = re.compile(r'<!--\s*/ai-managed\s*-->')
|
|
33
|
+
|
|
34
|
+
# Combined AIMB block (open → content → close)
|
|
35
|
+
_AIMB_BLOCK_RE = re.compile(
|
|
36
|
+
r'<!--\s*ai-managed\s+'
|
|
37
|
+
r'id="([^"]+)"\s+'
|
|
38
|
+
r'hash="([^"]*)"\s+'
|
|
39
|
+
r'updated="([^"]*)"\s*'
|
|
40
|
+
r'-->\s*\n?'
|
|
41
|
+
r'(.*?)'
|
|
42
|
+
r'<!--\s*/ai-managed\s*-->',
|
|
43
|
+
re.DOTALL,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
_MANUAL_BLOCK_RE = re.compile(
|
|
47
|
+
r'<!--\s*manual\s*-->\s*\n?'
|
|
48
|
+
r'(.*?)'
|
|
49
|
+
r'<!--\s*/manual\s*-->',
|
|
50
|
+
re.DOTALL,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# ── Dataclass ───────────────────────────────────────────────────────────
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class AIMBBlock:
|
|
59
|
+
"""A single AI-Managed Block extracted from a markdown file.
|
|
60
|
+
|
|
61
|
+
Attributes
|
|
62
|
+
----------
|
|
63
|
+
id : str
|
|
64
|
+
Unique block identifier (e.g. ``"arch"``, ``"decisions"``).
|
|
65
|
+
hash : str
|
|
66
|
+
Content hash recorded when the block was last written.
|
|
67
|
+
updated : str
|
|
68
|
+
ISO-8601 date string of last update.
|
|
69
|
+
content : str
|
|
70
|
+
Raw markdown content *between* the opening and closing tags
|
|
71
|
+
(leading/trailing whitespace stripped).
|
|
72
|
+
start_line : int
|
|
73
|
+
0-based line index of the opening tag in the source content.
|
|
74
|
+
end_line : int
|
|
75
|
+
0-based line index of the closing tag.
|
|
76
|
+
is_manual : bool
|
|
77
|
+
``True`` if this is a user-manual (non-AIMB) block.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
id: str
|
|
81
|
+
hash: str
|
|
82
|
+
updated: str
|
|
83
|
+
content: str
|
|
84
|
+
start_line: int = 0
|
|
85
|
+
end_line: int = 0
|
|
86
|
+
is_manual: bool = False
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ── Public API ─────────────────────────────────────────────────────────
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def parse_aimb_blocks(content: str) -> list[AIMBBlock]:
|
|
93
|
+
"""Parse all AIMB blocks from *content*.
|
|
94
|
+
|
|
95
|
+
Returns a list of :class:`AIMBBlock` instances, one per ``<!--
|
|
96
|
+
ai-managed ... -->`` / ``<!-- /ai-managed -->`` pair found in the
|
|
97
|
+
input. Returns an empty list if no AIMB blocks exist.
|
|
98
|
+
"""
|
|
99
|
+
lines = content.split("\n")
|
|
100
|
+
blocks: list[AIMBBlock] = []
|
|
101
|
+
|
|
102
|
+
for m in _AIMB_BLOCK_RE.finditer(content):
|
|
103
|
+
block_id = m.group(1)
|
|
104
|
+
block_hash = m.group(2)
|
|
105
|
+
block_updated = m.group(3)
|
|
106
|
+
block_content = m.group(4).strip("\n")
|
|
107
|
+
|
|
108
|
+
# Calculate line positions from the match offsets
|
|
109
|
+
match_start = m.start()
|
|
110
|
+
match_end = m.end()
|
|
111
|
+
|
|
112
|
+
start_line = _offset_to_line(content, match_start)
|
|
113
|
+
end_line = _offset_to_line(content, match_end)
|
|
114
|
+
|
|
115
|
+
blocks.append(
|
|
116
|
+
AIMBBlock(
|
|
117
|
+
id=block_id,
|
|
118
|
+
hash=block_hash,
|
|
119
|
+
updated=block_updated,
|
|
120
|
+
content=block_content,
|
|
121
|
+
start_line=start_line,
|
|
122
|
+
end_line=end_line,
|
|
123
|
+
is_manual=False,
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
return blocks
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def parse_frontmatter(content: str) -> dict[str, str]:
|
|
131
|
+
"""Parse YAML-style frontmatter from markdown content.
|
|
132
|
+
|
|
133
|
+
Expects content delimited by ``---\\n`` fences at the *very beginning*
|
|
134
|
+
of the file. Supports simple ``key: value`` pairs only (no nested
|
|
135
|
+
YAML, no lists).
|
|
136
|
+
|
|
137
|
+
Returns
|
|
138
|
+
-------
|
|
139
|
+
dict
|
|
140
|
+
Parsed frontmatter fields (may be empty).
|
|
141
|
+
"""
|
|
142
|
+
result: dict[str, str] = {}
|
|
143
|
+
m = re.match(r"^---\n(.*?)\n---(?:\n|$)", content, re.DOTALL)
|
|
144
|
+
if not m:
|
|
145
|
+
return result
|
|
146
|
+
|
|
147
|
+
for line in m.group(1).split("\n"):
|
|
148
|
+
line = line.strip()
|
|
149
|
+
if not line or line.startswith("#"):
|
|
150
|
+
continue
|
|
151
|
+
kv = re.match(r"(\w[\w_-]*)\s*:\s*(.*)", line)
|
|
152
|
+
if kv:
|
|
153
|
+
key = kv.group(1).strip()
|
|
154
|
+
value = kv.group(2).strip()
|
|
155
|
+
# Strip optional surrounding quotes
|
|
156
|
+
if len(value) >= 2 and value[0] == value[-1] and value[0] in ('"', "'"):
|
|
157
|
+
value = value[1:-1]
|
|
158
|
+
result[key] = value
|
|
159
|
+
|
|
160
|
+
return result
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def extract_manual_blocks(content: str) -> list[dict[str, Any]]:
|
|
164
|
+
"""Extract all ``<!-- manual -->`` / ``<!-- /manual -->`` blocks.
|
|
165
|
+
|
|
166
|
+
Returns a list of dicts with keys ``content``, ``start_line``,
|
|
167
|
+
``end_line``.
|
|
168
|
+
"""
|
|
169
|
+
blocks: list[dict[str, Any]] = []
|
|
170
|
+
for m in _MANUAL_BLOCK_RE.finditer(content):
|
|
171
|
+
block_content = m.group(1).strip("\n")
|
|
172
|
+
start_line = _offset_to_line(content, m.start())
|
|
173
|
+
end_line = _offset_to_line(content, m.end())
|
|
174
|
+
blocks.append(
|
|
175
|
+
{
|
|
176
|
+
"content": block_content,
|
|
177
|
+
"start_line": start_line,
|
|
178
|
+
"end_line": end_line,
|
|
179
|
+
}
|
|
180
|
+
)
|
|
181
|
+
return blocks
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def has_aimb_blocks(content: str) -> bool:
|
|
185
|
+
"""Return ``True`` if *content* contains at least one AIMB block."""
|
|
186
|
+
return _AIMB_OPEN_RE.search(content) is not None
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def get_block_by_id(content: str, block_id: str) -> AIMBBlock | None:
|
|
190
|
+
"""Return the :class:`AIMBBlock` with the given *block_id*, or ``None``."""
|
|
191
|
+
for block in parse_aimb_blocks(content):
|
|
192
|
+
if block.id == block_id:
|
|
193
|
+
return block
|
|
194
|
+
return None
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# ── Internal helpers ───────────────────────────────────────────────────
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _offset_to_line(content: str, offset: int) -> int:
|
|
201
|
+
"""Convert a byte/char offset into a 0-based line number."""
|
|
202
|
+
return content[:offset].count("\n")
|