diary-docs 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diary/__init__.py +1 -0
- diary/__main__.py +3 -0
- diary/aimb/__init__.py +48 -0
- diary/aimb/hasher.py +157 -0
- diary/aimb/merge.py +252 -0
- diary/aimb/parser.py +202 -0
- diary/cli.py +999 -0
- diary/git_utils.py +202 -0
- diary/indexer/__init__.py +44 -0
- diary/indexer/database.py +340 -0
- diary/indexer/extractors.py +468 -0
- diary/indexer/gitignore.py +62 -0
- diary/indexer/indexer.py +511 -0
- diary/indexer/reporter.py +137 -0
- diary/indexer/scanner.py +65 -0
- diary/sync/__init__.py +33 -0
- diary/sync/detector.py +405 -0
- diary/sync/engine.py +404 -0
- diary/sync/protocol.py +176 -0
- diary/templates.py +102 -0
- diary_docs-0.1.0.dist-info/METADATA +228 -0
- diary_docs-0.1.0.dist-info/RECORD +26 -0
- diary_docs-0.1.0.dist-info/WHEEL +5 -0
- diary_docs-0.1.0.dist-info/entry_points.txt +2 -0
- diary_docs-0.1.0.dist-info/licenses/LICENSE +21 -0
- diary_docs-0.1.0.dist-info/top_level.txt +1 -0
diary/sync/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""DIARY sync package — change detection, agent protocol, and git-backed sync logic."""
|
|
2
|
+
|
|
3
|
+
from diary.sync.detector import (
|
|
4
|
+
ChangeType,
|
|
5
|
+
DetectedChange,
|
|
6
|
+
detect_code_changes,
|
|
7
|
+
detect_doc_changes,
|
|
8
|
+
classify_changes,
|
|
9
|
+
)
|
|
10
|
+
from diary.sync.protocol import (
|
|
11
|
+
SyncReport,
|
|
12
|
+
SyncStats,
|
|
13
|
+
ChangeEntry,
|
|
14
|
+
ConflictEntry,
|
|
15
|
+
serialize_report,
|
|
16
|
+
parse_report,
|
|
17
|
+
get_ai_update_prompt,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"ChangeType",
|
|
22
|
+
"DetectedChange",
|
|
23
|
+
"detect_code_changes",
|
|
24
|
+
"detect_doc_changes",
|
|
25
|
+
"classify_changes",
|
|
26
|
+
"SyncReport",
|
|
27
|
+
"SyncStats",
|
|
28
|
+
"ChangeEntry",
|
|
29
|
+
"ConflictEntry",
|
|
30
|
+
"serialize_report",
|
|
31
|
+
"parse_report",
|
|
32
|
+
"get_ai_update_prompt",
|
|
33
|
+
]
|
diary/sync/detector.py
ADDED
|
@@ -0,0 +1,405 @@
|
|
|
1
|
+
"""Change detection pipeline for workspace code and docs.
|
|
2
|
+
|
|
3
|
+
Detects what changed, how it changed, and which AIMB blocks are affected.
|
|
4
|
+
v1 uses SHA-256 only — no AST/Symbol comparison.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
import re
|
|
11
|
+
import subprocess
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from enum import Enum
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from diary.aimb.hasher import compute_block_hash
|
|
18
|
+
from diary.aimb.parser import parse_aimb_blocks
|
|
19
|
+
from diary.git_utils import get_git_diff
|
|
20
|
+
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
# Types
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ChangeType(Enum):
|
|
27
|
+
"""Classification of a detected change."""
|
|
28
|
+
|
|
29
|
+
UNCHANGED = "unchanged"
|
|
30
|
+
CODE_CHANGED = "code_changed"
|
|
31
|
+
DOC_CHANGED_MANUALLY = "doc_changed_manually"
|
|
32
|
+
AIMB_BLOCK_CHANGED = "aimb_block_changed"
|
|
33
|
+
NEW_FILE = "new_file"
|
|
34
|
+
DELETED_FILE = "deleted_file"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class DetectedChange:
|
|
39
|
+
"""A single change detected during the workspace scan.
|
|
40
|
+
|
|
41
|
+
Attributes
|
|
42
|
+
----------
|
|
43
|
+
file_path : str
|
|
44
|
+
Path relative to the workspace root (forward-slash separated).
|
|
45
|
+
change_type : ChangeType
|
|
46
|
+
What kind of change was detected.
|
|
47
|
+
affected_aimb_blocks : list[str]
|
|
48
|
+
IDs of AIMB blocks whose content hash changed. Empty for most
|
|
49
|
+
non-doc change types.
|
|
50
|
+
confidence : float
|
|
51
|
+
How certain the detector is about this classification (0.0 – 1.0).
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
file_path: str
|
|
55
|
+
change_type: ChangeType
|
|
56
|
+
affected_aimb_blocks: list[str] = field(default_factory=list)
|
|
57
|
+
confidence: float = 1.0
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
# Internal helpers
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _sha256(content: str) -> str:
|
|
66
|
+
"""Return hex-encoded SHA-256 digest of *content*."""
|
|
67
|
+
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _read_file_or_empty(path: Path) -> str:
|
|
71
|
+
"""Read a text file, returning ``""`` on any error."""
|
|
72
|
+
try:
|
|
73
|
+
return path.read_text(encoding="utf-8")
|
|
74
|
+
except (FileNotFoundError, OSError):
|
|
75
|
+
return ""
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _git_show(workspace_path: Path, file_path: str) -> str:
|
|
79
|
+
"""Return the committed content of *file_path* at HEAD.
|
|
80
|
+
|
|
81
|
+
Returns empty string if the file is not tracked in git or on error.
|
|
82
|
+
"""
|
|
83
|
+
try:
|
|
84
|
+
result = subprocess.run(
|
|
85
|
+
["git", "show", f"HEAD:{file_path}"],
|
|
86
|
+
capture_output=True,
|
|
87
|
+
text=True,
|
|
88
|
+
cwd=str(workspace_path),
|
|
89
|
+
)
|
|
90
|
+
if result.returncode == 0:
|
|
91
|
+
return result.stdout
|
|
92
|
+
except (FileNotFoundError, OSError):
|
|
93
|
+
pass
|
|
94
|
+
return ""
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _strip_aimb_blocks(content: str) -> str:
|
|
98
|
+
"""Remove all AIMB block tags and their content from *content*.
|
|
99
|
+
|
|
100
|
+
Returns the remainder (text outside AIMB blocks).
|
|
101
|
+
"""
|
|
102
|
+
# Matches: <!-- ai-managed id="..." hash="..." updated="..." --> ... <!-- /ai-managed -->
|
|
103
|
+
block_re = re.compile(
|
|
104
|
+
r"<!--\s*ai-managed\s+"
|
|
105
|
+
r'id="[^"]*"\s+'
|
|
106
|
+
r'hash="[^"]*"\s+'
|
|
107
|
+
r'updated="[^"]*"\s*'
|
|
108
|
+
r"-->\s*\n?"
|
|
109
|
+
r".*?"
|
|
110
|
+
r"<!--\s*/ai-managed\s*-->",
|
|
111
|
+
re.DOTALL,
|
|
112
|
+
)
|
|
113
|
+
return block_re.sub("", content).strip()
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# ---------------------------------------------------------------------------
|
|
117
|
+
# Public API
|
|
118
|
+
# ---------------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def detect_code_changes(workspace: Path, index_db: Any) -> list[DetectedChange]:
|
|
122
|
+
"""Detect source-code changes by comparing working tree with indexed state.
|
|
123
|
+
|
|
124
|
+
Uses ``get_git_diff()`` to enumerate changed files, then cross-references
|
|
125
|
+
each file's current SHA-256 digest against the hash stored in the index
|
|
126
|
+
database's ``hashes`` table.
|
|
127
|
+
|
|
128
|
+
Parameters
|
|
129
|
+
----------
|
|
130
|
+
workspace : Path
|
|
131
|
+
Repository root directory.
|
|
132
|
+
index_db : Any
|
|
133
|
+
An open :class:`~diary.indexer.database.IndexDatabase` instance.
|
|
134
|
+
Its ``conn`` attribute is queried for stored hashes.
|
|
135
|
+
|
|
136
|
+
Returns
|
|
137
|
+
-------
|
|
138
|
+
list[DetectedChange]
|
|
139
|
+
One entry per changed file. Unchanged files are omitted.
|
|
140
|
+
"""
|
|
141
|
+
changes: list[DetectedChange] = []
|
|
142
|
+
git_changes = get_git_diff(workspace)
|
|
143
|
+
|
|
144
|
+
for entry in git_changes:
|
|
145
|
+
status = entry["status"]
|
|
146
|
+
path = entry["path"]
|
|
147
|
+
|
|
148
|
+
# --- Deleted file ---
|
|
149
|
+
if status == "D":
|
|
150
|
+
changes.append(
|
|
151
|
+
DetectedChange(
|
|
152
|
+
file_path=path,
|
|
153
|
+
change_type=ChangeType.DELETED_FILE,
|
|
154
|
+
confidence=1.0,
|
|
155
|
+
)
|
|
156
|
+
)
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
full_path = workspace / path
|
|
160
|
+
|
|
161
|
+
# File disappeared between diff and now
|
|
162
|
+
if not full_path.is_file():
|
|
163
|
+
changes.append(
|
|
164
|
+
DetectedChange(
|
|
165
|
+
file_path=path,
|
|
166
|
+
change_type=ChangeType.DELETED_FILE,
|
|
167
|
+
confidence=1.0,
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
# Compute current SHA-256
|
|
173
|
+
content = _read_file_or_empty(full_path)
|
|
174
|
+
current_hash = _sha256(content)
|
|
175
|
+
|
|
176
|
+
# Look up stored hash from the index database
|
|
177
|
+
cur = index_db.conn.execute(
|
|
178
|
+
"SELECT sha256 FROM hashes WHERE path = ?",
|
|
179
|
+
(path,),
|
|
180
|
+
)
|
|
181
|
+
row = cur.fetchone()
|
|
182
|
+
|
|
183
|
+
if row is None:
|
|
184
|
+
# File not yet indexed — treat as new
|
|
185
|
+
changes.append(
|
|
186
|
+
DetectedChange(
|
|
187
|
+
file_path=path,
|
|
188
|
+
change_type=ChangeType.NEW_FILE,
|
|
189
|
+
confidence=1.0,
|
|
190
|
+
)
|
|
191
|
+
)
|
|
192
|
+
elif row[0] != current_hash:
|
|
193
|
+
# Hash mismatch → file modified since last index
|
|
194
|
+
changes.append(
|
|
195
|
+
DetectedChange(
|
|
196
|
+
file_path=path,
|
|
197
|
+
change_type=ChangeType.CODE_CHANGED,
|
|
198
|
+
confidence=1.0,
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
return changes
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def detect_doc_changes(workspace: Path) -> list[DetectedChange]:
|
|
206
|
+
"""Detect documentation changes by comparing working tree against git HEAD.
|
|
207
|
+
|
|
208
|
+
Walks all ``.md`` files tracked by git (plus any new ``.md`` files on
|
|
209
|
+
disk under ``docs/``), parses AIMB blocks, and compares block hashes
|
|
210
|
+
between the committed and working-tree versions.
|
|
211
|
+
|
|
212
|
+
Classification rules
|
|
213
|
+
--------------------
|
|
214
|
+
* ``AIMB_BLOCK_CHANGED`` — at least one AIMB block hash differs.
|
|
215
|
+
* ``DOC_CHANGED_MANUALLY`` — file content changed but no AIMB block
|
|
216
|
+
hash differs (manual edits outside AIMB regions).
|
|
217
|
+
* ``NEW_FILE`` — file exists on disk but is not tracked in git HEAD.
|
|
218
|
+
* ``DELETED_FILE`` — file tracked in git but missing from disk.
|
|
219
|
+
|
|
220
|
+
Parameters
|
|
221
|
+
----------
|
|
222
|
+
workspace : Path
|
|
223
|
+
Repository root directory.
|
|
224
|
+
|
|
225
|
+
Returns
|
|
226
|
+
-------
|
|
227
|
+
list[DetectedChange]
|
|
228
|
+
One entry per changed doc file. Unchanged files are omitted.
|
|
229
|
+
"""
|
|
230
|
+
changes: list[DetectedChange] = []
|
|
231
|
+
workspace_str = str(workspace)
|
|
232
|
+
|
|
233
|
+
# --- Collect candidate .md files ---
|
|
234
|
+
# Start with git-tracked .md files (includes all tracked locations)
|
|
235
|
+
candidate_rel_paths: set[str] = set()
|
|
236
|
+
|
|
237
|
+
try:
|
|
238
|
+
result = subprocess.run(
|
|
239
|
+
["git", "ls-files", "*.md"],
|
|
240
|
+
capture_output=True,
|
|
241
|
+
text=True,
|
|
242
|
+
cwd=workspace_str,
|
|
243
|
+
)
|
|
244
|
+
if result.returncode == 0:
|
|
245
|
+
for line in result.stdout.strip().splitlines():
|
|
246
|
+
line = line.strip()
|
|
247
|
+
if line:
|
|
248
|
+
candidate_rel_paths.add(line)
|
|
249
|
+
except (FileNotFoundError, OSError):
|
|
250
|
+
pass
|
|
251
|
+
|
|
252
|
+
# Also walk docs/ on disk for untracked / new files
|
|
253
|
+
docs_dir = workspace / "docs"
|
|
254
|
+
if docs_dir.is_dir():
|
|
255
|
+
for p in docs_dir.rglob("*.md"):
|
|
256
|
+
if p.is_file():
|
|
257
|
+
try:
|
|
258
|
+
rel = p.relative_to(workspace).as_posix()
|
|
259
|
+
candidate_rel_paths.add(rel)
|
|
260
|
+
except ValueError:
|
|
261
|
+
pass
|
|
262
|
+
|
|
263
|
+
if not candidate_rel_paths:
|
|
264
|
+
return changes
|
|
265
|
+
|
|
266
|
+
# --- Inspect each candidate ---
|
|
267
|
+
for rel_path in sorted(candidate_rel_paths):
|
|
268
|
+
full_path = workspace / rel_path
|
|
269
|
+
current = _read_file_or_empty(full_path)
|
|
270
|
+
baseline = _git_show(workspace, rel_path)
|
|
271
|
+
|
|
272
|
+
# Deleted from working tree but tracked in git
|
|
273
|
+
if not current and baseline:
|
|
274
|
+
changes.append(
|
|
275
|
+
DetectedChange(
|
|
276
|
+
file_path=rel_path,
|
|
277
|
+
change_type=ChangeType.DELETED_FILE,
|
|
278
|
+
confidence=1.0,
|
|
279
|
+
)
|
|
280
|
+
)
|
|
281
|
+
continue
|
|
282
|
+
|
|
283
|
+
# Exists on disk but not in git HEAD → new file
|
|
284
|
+
if current and not baseline:
|
|
285
|
+
changes.append(
|
|
286
|
+
DetectedChange(
|
|
287
|
+
file_path=rel_path,
|
|
288
|
+
change_type=ChangeType.NEW_FILE,
|
|
289
|
+
confidence=1.0,
|
|
290
|
+
)
|
|
291
|
+
)
|
|
292
|
+
continue
|
|
293
|
+
|
|
294
|
+
# Both exist — compare content
|
|
295
|
+
if _sha256(current) == _sha256(baseline):
|
|
296
|
+
continue # unchanged
|
|
297
|
+
|
|
298
|
+
# --- Content differs — classify ---
|
|
299
|
+
baseline_blocks = parse_aimb_blocks(baseline)
|
|
300
|
+
current_blocks = parse_aimb_blocks(current)
|
|
301
|
+
|
|
302
|
+
if not baseline_blocks and not current_blocks:
|
|
303
|
+
# No AIMB blocks in either version → purely manual change
|
|
304
|
+
changes.append(
|
|
305
|
+
DetectedChange(
|
|
306
|
+
file_path=rel_path,
|
|
307
|
+
change_type=ChangeType.DOC_CHANGED_MANUALLY,
|
|
308
|
+
confidence=0.8,
|
|
309
|
+
)
|
|
310
|
+
)
|
|
311
|
+
continue
|
|
312
|
+
|
|
313
|
+
# Compare per-block hashes
|
|
314
|
+
baseline_block_hashes = {
|
|
315
|
+
b.id: compute_block_hash(b.content) for b in baseline_blocks
|
|
316
|
+
}
|
|
317
|
+
current_block_hashes = {
|
|
318
|
+
b.id: compute_block_hash(b.content) for b in current_blocks
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
changed_block_ids: list[str] = []
|
|
322
|
+
all_ids = set(baseline_block_hashes) | set(current_block_hashes)
|
|
323
|
+
for bid in all_ids:
|
|
324
|
+
old_h = baseline_block_hashes.get(bid)
|
|
325
|
+
new_h = current_block_hashes.get(bid)
|
|
326
|
+
if old_h != new_h:
|
|
327
|
+
changed_block_ids.append(bid)
|
|
328
|
+
|
|
329
|
+
if changed_block_ids:
|
|
330
|
+
# At least one AIMB block content changed
|
|
331
|
+
# Check whether manual-only (non-AIMB) content also changed
|
|
332
|
+
baseline_remainder = _strip_aimb_blocks(baseline)
|
|
333
|
+
current_remainder = _strip_aimb_blocks(current)
|
|
334
|
+
only_aimb = baseline_remainder == current_remainder
|
|
335
|
+
|
|
336
|
+
changes.append(
|
|
337
|
+
DetectedChange(
|
|
338
|
+
file_path=rel_path,
|
|
339
|
+
change_type=ChangeType.AIMB_BLOCK_CHANGED,
|
|
340
|
+
affected_aimb_blocks=changed_block_ids,
|
|
341
|
+
confidence=1.0 if only_aimb else 0.9,
|
|
342
|
+
)
|
|
343
|
+
)
|
|
344
|
+
else:
|
|
345
|
+
# File content changed but no AIMB block hash changed
|
|
346
|
+
changes.append(
|
|
347
|
+
DetectedChange(
|
|
348
|
+
file_path=rel_path,
|
|
349
|
+
change_type=ChangeType.DOC_CHANGED_MANUALLY,
|
|
350
|
+
confidence=0.8,
|
|
351
|
+
)
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
return changes
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def classify_changes(all_changes: list[DetectedChange]) -> dict[str, Any]:
|
|
358
|
+
"""Group changes by type and affected documentation.
|
|
359
|
+
|
|
360
|
+
Parameters
|
|
361
|
+
----------
|
|
362
|
+
all_changes : list[DetectedChange]
|
|
363
|
+
Combined results from ``detect_code_changes`` and / or
|
|
364
|
+
``detect_doc_changes``.
|
|
365
|
+
|
|
366
|
+
Returns
|
|
367
|
+
-------
|
|
368
|
+
dict
|
|
369
|
+
Keys:
|
|
370
|
+
|
|
371
|
+
* ``"by_type"``
|
|
372
|
+
``{change_type_name: [DetectedChange, ...]}``
|
|
373
|
+
* ``"by_doc"``
|
|
374
|
+
``{doc_or_block_id: [DetectedChange, ...]}``
|
|
375
|
+
* ``"summary"``
|
|
376
|
+
Short human-readable string (e.g. ``"3 code_changed, 1 aimb_block_changed"``).
|
|
377
|
+
"""
|
|
378
|
+
by_type: dict[str, list[DetectedChange]] = {}
|
|
379
|
+
by_doc: dict[str, list[DetectedChange]] = {}
|
|
380
|
+
|
|
381
|
+
for change in all_changes:
|
|
382
|
+
type_key = change.change_type.name
|
|
383
|
+
by_type.setdefault(type_key, []).append(change)
|
|
384
|
+
|
|
385
|
+
# Group doc changes by the doc file that contains them
|
|
386
|
+
if change.change_type in (
|
|
387
|
+
ChangeType.AIMB_BLOCK_CHANGED,
|
|
388
|
+
ChangeType.DOC_CHANGED_MANUALLY,
|
|
389
|
+
):
|
|
390
|
+
by_doc.setdefault(change.file_path, []).append(change)
|
|
391
|
+
elif change.affected_aimb_blocks:
|
|
392
|
+
for bid in change.affected_aimb_blocks:
|
|
393
|
+
by_doc.setdefault(bid, []).append(change)
|
|
394
|
+
|
|
395
|
+
# Build a concise summary line
|
|
396
|
+
parts: list[str] = []
|
|
397
|
+
for type_key, items in sorted(by_type.items()):
|
|
398
|
+
parts.append(f"{len(items)} {type_key.lower()}")
|
|
399
|
+
summary = ", ".join(parts) if parts else "no changes detected"
|
|
400
|
+
|
|
401
|
+
return {
|
|
402
|
+
"by_type": by_type,
|
|
403
|
+
"by_doc": by_doc,
|
|
404
|
+
"summary": summary,
|
|
405
|
+
}
|