diary-docs 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
diary/sync/__init__.py ADDED
@@ -0,0 +1,33 @@
1
+ """DIARY sync package — change detection, agent protocol, and git-backed sync logic."""
2
+
3
+ from diary.sync.detector import (
4
+ ChangeType,
5
+ DetectedChange,
6
+ detect_code_changes,
7
+ detect_doc_changes,
8
+ classify_changes,
9
+ )
10
+ from diary.sync.protocol import (
11
+ SyncReport,
12
+ SyncStats,
13
+ ChangeEntry,
14
+ ConflictEntry,
15
+ serialize_report,
16
+ parse_report,
17
+ get_ai_update_prompt,
18
+ )
19
+
20
+ __all__ = [
21
+ "ChangeType",
22
+ "DetectedChange",
23
+ "detect_code_changes",
24
+ "detect_doc_changes",
25
+ "classify_changes",
26
+ "SyncReport",
27
+ "SyncStats",
28
+ "ChangeEntry",
29
+ "ConflictEntry",
30
+ "serialize_report",
31
+ "parse_report",
32
+ "get_ai_update_prompt",
33
+ ]
diary/sync/detector.py ADDED
@@ -0,0 +1,405 @@
1
+ """Change detection pipeline for workspace code and docs.
2
+
3
+ Detects what changed, how it changed, and which AIMB blocks are affected.
4
+ v1 uses SHA-256 only — no AST/Symbol comparison.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import hashlib
10
+ import re
11
+ import subprocess
12
+ from dataclasses import dataclass, field
13
+ from enum import Enum
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ from diary.aimb.hasher import compute_block_hash
18
+ from diary.aimb.parser import parse_aimb_blocks
19
+ from diary.git_utils import get_git_diff
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Types
23
+ # ---------------------------------------------------------------------------
24
+
25
+
26
+ class ChangeType(Enum):
27
+ """Classification of a detected change."""
28
+
29
+ UNCHANGED = "unchanged"
30
+ CODE_CHANGED = "code_changed"
31
+ DOC_CHANGED_MANUALLY = "doc_changed_manually"
32
+ AIMB_BLOCK_CHANGED = "aimb_block_changed"
33
+ NEW_FILE = "new_file"
34
+ DELETED_FILE = "deleted_file"
35
+
36
+
37
+ @dataclass
38
+ class DetectedChange:
39
+ """A single change detected during the workspace scan.
40
+
41
+ Attributes
42
+ ----------
43
+ file_path : str
44
+ Path relative to the workspace root (forward-slash separated).
45
+ change_type : ChangeType
46
+ What kind of change was detected.
47
+ affected_aimb_blocks : list[str]
48
+ IDs of AIMB blocks whose content hash changed. Empty for most
49
+ non-doc change types.
50
+ confidence : float
51
+ How certain the detector is about this classification (0.0 – 1.0).
52
+ """
53
+
54
+ file_path: str
55
+ change_type: ChangeType
56
+ affected_aimb_blocks: list[str] = field(default_factory=list)
57
+ confidence: float = 1.0
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Internal helpers
62
+ # ---------------------------------------------------------------------------
63
+
64
+
65
+ def _sha256(content: str) -> str:
66
+ """Return hex-encoded SHA-256 digest of *content*."""
67
+ return hashlib.sha256(content.encode("utf-8")).hexdigest()
68
+
69
+
70
+ def _read_file_or_empty(path: Path) -> str:
71
+ """Read a text file, returning ``""`` on any error."""
72
+ try:
73
+ return path.read_text(encoding="utf-8")
74
+ except (FileNotFoundError, OSError):
75
+ return ""
76
+
77
+
78
+ def _git_show(workspace_path: Path, file_path: str) -> str:
79
+ """Return the committed content of *file_path* at HEAD.
80
+
81
+ Returns empty string if the file is not tracked in git or on error.
82
+ """
83
+ try:
84
+ result = subprocess.run(
85
+ ["git", "show", f"HEAD:{file_path}"],
86
+ capture_output=True,
87
+ text=True,
88
+ cwd=str(workspace_path),
89
+ )
90
+ if result.returncode == 0:
91
+ return result.stdout
92
+ except (FileNotFoundError, OSError):
93
+ pass
94
+ return ""
95
+
96
+
97
+ def _strip_aimb_blocks(content: str) -> str:
98
+ """Remove all AIMB block tags and their content from *content*.
99
+
100
+ Returns the remainder (text outside AIMB blocks).
101
+ """
102
+ # Matches: <!-- ai-managed id="..." hash="..." updated="..." --> ... <!-- /ai-managed -->
103
+ block_re = re.compile(
104
+ r"<!--\s*ai-managed\s+"
105
+ r'id="[^"]*"\s+'
106
+ r'hash="[^"]*"\s+'
107
+ r'updated="[^"]*"\s*'
108
+ r"-->\s*\n?"
109
+ r".*?"
110
+ r"<!--\s*/ai-managed\s*-->",
111
+ re.DOTALL,
112
+ )
113
+ return block_re.sub("", content).strip()
114
+
115
+
116
+ # ---------------------------------------------------------------------------
117
+ # Public API
118
+ # ---------------------------------------------------------------------------
119
+
120
+
121
+ def detect_code_changes(workspace: Path, index_db: Any) -> list[DetectedChange]:
122
+ """Detect source-code changes by comparing working tree with indexed state.
123
+
124
+ Uses ``get_git_diff()`` to enumerate changed files, then cross-references
125
+ each file's current SHA-256 digest against the hash stored in the index
126
+ database's ``hashes`` table.
127
+
128
+ Parameters
129
+ ----------
130
+ workspace : Path
131
+ Repository root directory.
132
+ index_db : Any
133
+ An open :class:`~diary.indexer.database.IndexDatabase` instance.
134
+ Its ``conn`` attribute is queried for stored hashes.
135
+
136
+ Returns
137
+ -------
138
+ list[DetectedChange]
139
+ One entry per changed file. Unchanged files are omitted.
140
+ """
141
+ changes: list[DetectedChange] = []
142
+ git_changes = get_git_diff(workspace)
143
+
144
+ for entry in git_changes:
145
+ status = entry["status"]
146
+ path = entry["path"]
147
+
148
+ # --- Deleted file ---
149
+ if status == "D":
150
+ changes.append(
151
+ DetectedChange(
152
+ file_path=path,
153
+ change_type=ChangeType.DELETED_FILE,
154
+ confidence=1.0,
155
+ )
156
+ )
157
+ continue
158
+
159
+ full_path = workspace / path
160
+
161
+ # File disappeared between diff and now
162
+ if not full_path.is_file():
163
+ changes.append(
164
+ DetectedChange(
165
+ file_path=path,
166
+ change_type=ChangeType.DELETED_FILE,
167
+ confidence=1.0,
168
+ )
169
+ )
170
+ continue
171
+
172
+ # Compute current SHA-256
173
+ content = _read_file_or_empty(full_path)
174
+ current_hash = _sha256(content)
175
+
176
+ # Look up stored hash from the index database
177
+ cur = index_db.conn.execute(
178
+ "SELECT sha256 FROM hashes WHERE path = ?",
179
+ (path,),
180
+ )
181
+ row = cur.fetchone()
182
+
183
+ if row is None:
184
+ # File not yet indexed — treat as new
185
+ changes.append(
186
+ DetectedChange(
187
+ file_path=path,
188
+ change_type=ChangeType.NEW_FILE,
189
+ confidence=1.0,
190
+ )
191
+ )
192
+ elif row[0] != current_hash:
193
+ # Hash mismatch → file modified since last index
194
+ changes.append(
195
+ DetectedChange(
196
+ file_path=path,
197
+ change_type=ChangeType.CODE_CHANGED,
198
+ confidence=1.0,
199
+ )
200
+ )
201
+
202
+ return changes
203
+
204
+
205
+ def detect_doc_changes(workspace: Path) -> list[DetectedChange]:
206
+ """Detect documentation changes by comparing working tree against git HEAD.
207
+
208
+ Walks all ``.md`` files tracked by git (plus any new ``.md`` files on
209
+ disk under ``docs/``), parses AIMB blocks, and compares block hashes
210
+ between the committed and working-tree versions.
211
+
212
+ Classification rules
213
+ --------------------
214
+ * ``AIMB_BLOCK_CHANGED`` — at least one AIMB block hash differs.
215
+ * ``DOC_CHANGED_MANUALLY`` — file content changed but no AIMB block
216
+ hash differs (manual edits outside AIMB regions).
217
+ * ``NEW_FILE`` — file exists on disk but is not tracked in git HEAD.
218
+ * ``DELETED_FILE`` — file tracked in git but missing from disk.
219
+
220
+ Parameters
221
+ ----------
222
+ workspace : Path
223
+ Repository root directory.
224
+
225
+ Returns
226
+ -------
227
+ list[DetectedChange]
228
+ One entry per changed doc file. Unchanged files are omitted.
229
+ """
230
+ changes: list[DetectedChange] = []
231
+ workspace_str = str(workspace)
232
+
233
+ # --- Collect candidate .md files ---
234
+ # Start with git-tracked .md files (includes all tracked locations)
235
+ candidate_rel_paths: set[str] = set()
236
+
237
+ try:
238
+ result = subprocess.run(
239
+ ["git", "ls-files", "*.md"],
240
+ capture_output=True,
241
+ text=True,
242
+ cwd=workspace_str,
243
+ )
244
+ if result.returncode == 0:
245
+ for line in result.stdout.strip().splitlines():
246
+ line = line.strip()
247
+ if line:
248
+ candidate_rel_paths.add(line)
249
+ except (FileNotFoundError, OSError):
250
+ pass
251
+
252
+ # Also walk docs/ on disk for untracked / new files
253
+ docs_dir = workspace / "docs"
254
+ if docs_dir.is_dir():
255
+ for p in docs_dir.rglob("*.md"):
256
+ if p.is_file():
257
+ try:
258
+ rel = p.relative_to(workspace).as_posix()
259
+ candidate_rel_paths.add(rel)
260
+ except ValueError:
261
+ pass
262
+
263
+ if not candidate_rel_paths:
264
+ return changes
265
+
266
+ # --- Inspect each candidate ---
267
+ for rel_path in sorted(candidate_rel_paths):
268
+ full_path = workspace / rel_path
269
+ current = _read_file_or_empty(full_path)
270
+ baseline = _git_show(workspace, rel_path)
271
+
272
+ # Deleted from working tree but tracked in git
273
+ if not current and baseline:
274
+ changes.append(
275
+ DetectedChange(
276
+ file_path=rel_path,
277
+ change_type=ChangeType.DELETED_FILE,
278
+ confidence=1.0,
279
+ )
280
+ )
281
+ continue
282
+
283
+ # Exists on disk but not in git HEAD → new file
284
+ if current and not baseline:
285
+ changes.append(
286
+ DetectedChange(
287
+ file_path=rel_path,
288
+ change_type=ChangeType.NEW_FILE,
289
+ confidence=1.0,
290
+ )
291
+ )
292
+ continue
293
+
294
+ # Both exist — compare content
295
+ if _sha256(current) == _sha256(baseline):
296
+ continue # unchanged
297
+
298
+ # --- Content differs — classify ---
299
+ baseline_blocks = parse_aimb_blocks(baseline)
300
+ current_blocks = parse_aimb_blocks(current)
301
+
302
+ if not baseline_blocks and not current_blocks:
303
+ # No AIMB blocks in either version → purely manual change
304
+ changes.append(
305
+ DetectedChange(
306
+ file_path=rel_path,
307
+ change_type=ChangeType.DOC_CHANGED_MANUALLY,
308
+ confidence=0.8,
309
+ )
310
+ )
311
+ continue
312
+
313
+ # Compare per-block hashes
314
+ baseline_block_hashes = {
315
+ b.id: compute_block_hash(b.content) for b in baseline_blocks
316
+ }
317
+ current_block_hashes = {
318
+ b.id: compute_block_hash(b.content) for b in current_blocks
319
+ }
320
+
321
+ changed_block_ids: list[str] = []
322
+ all_ids = set(baseline_block_hashes) | set(current_block_hashes)
323
+ for bid in all_ids:
324
+ old_h = baseline_block_hashes.get(bid)
325
+ new_h = current_block_hashes.get(bid)
326
+ if old_h != new_h:
327
+ changed_block_ids.append(bid)
328
+
329
+ if changed_block_ids:
330
+ # At least one AIMB block content changed
331
+ # Check whether manual-only (non-AIMB) content also changed
332
+ baseline_remainder = _strip_aimb_blocks(baseline)
333
+ current_remainder = _strip_aimb_blocks(current)
334
+ only_aimb = baseline_remainder == current_remainder
335
+
336
+ changes.append(
337
+ DetectedChange(
338
+ file_path=rel_path,
339
+ change_type=ChangeType.AIMB_BLOCK_CHANGED,
340
+ affected_aimb_blocks=changed_block_ids,
341
+ confidence=1.0 if only_aimb else 0.9,
342
+ )
343
+ )
344
+ else:
345
+ # File content changed but no AIMB block hash changed
346
+ changes.append(
347
+ DetectedChange(
348
+ file_path=rel_path,
349
+ change_type=ChangeType.DOC_CHANGED_MANUALLY,
350
+ confidence=0.8,
351
+ )
352
+ )
353
+
354
+ return changes
355
+
356
+
357
+ def classify_changes(all_changes: list[DetectedChange]) -> dict[str, Any]:
358
+ """Group changes by type and affected documentation.
359
+
360
+ Parameters
361
+ ----------
362
+ all_changes : list[DetectedChange]
363
+ Combined results from ``detect_code_changes`` and / or
364
+ ``detect_doc_changes``.
365
+
366
+ Returns
367
+ -------
368
+ dict
369
+ Keys:
370
+
371
+ * ``"by_type"``
372
+ ``{change_type_name: [DetectedChange, ...]}``
373
+ * ``"by_doc"``
374
+ ``{doc_or_block_id: [DetectedChange, ...]}``
375
+ * ``"summary"``
376
+ Short human-readable string (e.g. ``"3 code_changed, 1 aimb_block_changed"``).
377
+ """
378
+ by_type: dict[str, list[DetectedChange]] = {}
379
+ by_doc: dict[str, list[DetectedChange]] = {}
380
+
381
+ for change in all_changes:
382
+ type_key = change.change_type.name
383
+ by_type.setdefault(type_key, []).append(change)
384
+
385
+ # Group doc changes by the doc file that contains them
386
+ if change.change_type in (
387
+ ChangeType.AIMB_BLOCK_CHANGED,
388
+ ChangeType.DOC_CHANGED_MANUALLY,
389
+ ):
390
+ by_doc.setdefault(change.file_path, []).append(change)
391
+ elif change.affected_aimb_blocks:
392
+ for bid in change.affected_aimb_blocks:
393
+ by_doc.setdefault(bid, []).append(change)
394
+
395
+ # Build a concise summary line
396
+ parts: list[str] = []
397
+ for type_key, items in sorted(by_type.items()):
398
+ parts.append(f"{len(items)} {type_key.lower()}")
399
+ summary = ", ".join(parts) if parts else "no changes detected"
400
+
401
+ return {
402
+ "by_type": by_type,
403
+ "by_doc": by_doc,
404
+ "summary": summary,
405
+ }