docsync 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
docsync/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from docsync.commands.cascade import find_affected_docs
2
+ from docsync.commands.check import check_refs
3
+ from docsync.core.parser import parse_doc
4
+
5
+ __all__ = ["parse_doc", "check_refs", "find_affected_docs"]
docsync/cli.py ADDED
@@ -0,0 +1,50 @@
1
+ import argparse
2
+ import sys
3
+ from importlib.metadata import version
4
+ from pathlib import Path
5
+
6
+ from docsync.commands import cascade, check, init, sync, tree
7
+
8
+ VERSION = version("docsync")
9
+
10
+
11
+ def main():
12
+ parser = argparse.ArgumentParser(description="Keep docs in sync with code")
13
+ parser.add_argument("-v", "--version", action="version", version=f"docsync {VERSION}")
14
+ subparsers = parser.add_subparsers(dest="command", required=True)
15
+
16
+ check_parser = subparsers.add_parser("check", help="validate all refs exist")
17
+ check_parser.add_argument("path", type=Path, help="docs directory to check")
18
+
19
+ cascade_parser = subparsers.add_parser("cascade", help="list docs affected by git diff")
20
+ cascade_parser.add_argument("commit", help="commit ref (e.g., HEAD~1, abc123)")
21
+ cascade_parser.add_argument("--docs", type=Path, default=Path("docs"), help="docs directory")
22
+
23
+ sync_parser = subparsers.add_parser("sync", help="generate prompt for AI to fix docs")
24
+ sync_parser.add_argument("path", type=Path, help="docs directory")
25
+ sync_parser.add_argument("--incremental", action="store_true", help="only include changed docs")
26
+ sync_parser.add_argument("--json", action="store_true", help="output as JSON instead of text")
27
+ sync_parser.add_argument("--parallel", action="store_true", help="ignore dependencies, sync all at once")
28
+ sync_parser.add_argument("--update-lock", action="store_true", help="update lock.json with current commit")
29
+
30
+ tree_parser = subparsers.add_parser("tree", help="show doc dependency tree")
31
+ tree_parser.add_argument("path", type=Path, help="docs directory")
32
+
33
+ subparsers.add_parser("init", help="create .docsync/ folder")
34
+
35
+ args = parser.parse_args()
36
+
37
+ if args.command == "check":
38
+ sys.exit(check.run(args.path))
39
+ elif args.command == "cascade":
40
+ sys.exit(cascade.run(args.commit, args.docs))
41
+ elif args.command == "sync":
42
+ sys.exit(sync.run(args.path, args.incremental, args.json, args.parallel, args.update_lock))
43
+ elif args.command == "tree":
44
+ sys.exit(tree.run(args.path))
45
+ elif args.command == "init":
46
+ sys.exit(init.run())
47
+
48
+
49
+ if __name__ == "__main__":
50
+ main()
File without changes
@@ -0,0 +1,120 @@
1
+ from __future__ import annotations
2
+
3
+ import subprocess
4
+ from collections import defaultdict
5
+ from pathlib import Path
6
+ from typing import NamedTuple
7
+
8
+ from docsync.core.config import Config, find_repo_root
9
+ from docsync.core.parser import parse_doc
10
+
11
+
12
+ class CascadeResult(NamedTuple):
13
+ affected_docs: list[Path]
14
+ direct_hits: list[Path]
15
+ cascade_hits: list[Path]
16
+ circular_refs: list[tuple[Path, Path]]
17
+
18
+
19
+ def find_affected_docs(
20
+ docs_path: Path, commit_ref: str, config: Config, repo_root: Path | None = None
21
+ ) -> CascadeResult:
22
+ if repo_root is None:
23
+ repo_root = find_repo_root(docs_path)
24
+ changed_files = _get_changed_files(commit_ref, repo_root)
25
+ if not changed_files:
26
+ return CascadeResult([], [], [], [])
27
+ source_to_docs, doc_to_docs = _build_indexes(docs_path, repo_root)
28
+ direct_hits = _find_direct_hits(changed_files, source_to_docs)
29
+ cascade_hits, circular_refs = _cascade(direct_hits, doc_to_docs, config.cascade_depth_limit)
30
+ all_affected = list(set(direct_hits) | set(cascade_hits))
31
+ return CascadeResult(
32
+ affected_docs=all_affected, direct_hits=direct_hits, cascade_hits=cascade_hits, circular_refs=circular_refs
33
+ )
34
+
35
+
36
+ def _get_changed_files(commit_ref: str, repo_root: Path) -> list[str]:
37
+ try:
38
+ result = subprocess.run(
39
+ ["git", "diff", "--name-only", commit_ref], capture_output=True, text=True, check=True, cwd=repo_root
40
+ )
41
+ return [f.strip() for f in result.stdout.splitlines() if f.strip()]
42
+ except subprocess.CalledProcessError:
43
+ return []
44
+
45
+
46
+ def _build_indexes(docs_path: Path, repo_root: Path) -> tuple[dict[str, list[Path]], dict[Path, list[Path]]]:
47
+ source_to_docs: dict[str, list[Path]] = defaultdict(list)
48
+ doc_to_docs: dict[Path, list[Path]] = defaultdict(list)
49
+ doc_files = list(docs_path.rglob("*.md"))
50
+ for doc_file in doc_files:
51
+ try:
52
+ parsed = parse_doc(doc_file)
53
+ except Exception:
54
+ continue
55
+ for ref in parsed.related_sources:
56
+ source_to_docs[ref.path].append(doc_file)
57
+ for ref in parsed.related_docs:
58
+ ref_path = repo_root / ref.path
59
+ if ref_path.exists():
60
+ doc_to_docs[ref_path].append(doc_file)
61
+ return source_to_docs, doc_to_docs
62
+
63
+
64
+ def _find_direct_hits(changed_files: list[str], source_to_docs: dict[str, list[Path]]) -> list[Path]:
65
+ hits = []
66
+ for changed in changed_files:
67
+ if changed in source_to_docs:
68
+ hits.extend(source_to_docs[changed])
69
+ for source_ref, docs in source_to_docs.items():
70
+ if source_ref.endswith("/") and changed.startswith(source_ref):
71
+ hits.extend(docs)
72
+ return list(set(hits))
73
+
74
+
75
+ def _cascade(
76
+ initial_docs: list[Path], doc_to_docs: dict[Path, list[Path]], depth_limit: int | None
77
+ ) -> tuple[list[Path], list[tuple[Path, Path]]]:
78
+ cascade_hits = []
79
+ circular_refs = []
80
+ visited = set(initial_docs)
81
+ current_level = set(initial_docs)
82
+ depth = 0
83
+ while current_level:
84
+ if depth_limit is not None and depth >= depth_limit:
85
+ break
86
+ next_level = set()
87
+ for doc in current_level:
88
+ for referencing_doc in doc_to_docs.get(doc, []):
89
+ if referencing_doc in visited:
90
+ if referencing_doc not in initial_docs:
91
+ circular_refs.append((doc, referencing_doc))
92
+ continue
93
+ visited.add(referencing_doc)
94
+ cascade_hits.append(referencing_doc)
95
+ next_level.add(referencing_doc)
96
+ current_level = next_level
97
+ depth += 1
98
+ return cascade_hits, circular_refs
99
+
100
+
101
+ def run(commit_ref: str, docs_path: Path) -> int:
102
+ from docsync.core.config import load_config
103
+
104
+ config = load_config()
105
+ result = find_affected_docs(docs_path, commit_ref, config)
106
+ if not result.affected_docs:
107
+ print("No docs affected")
108
+ return 0
109
+ print(f"Direct hits ({len(result.direct_hits)}):")
110
+ for doc in result.direct_hits:
111
+ print(f" {doc}")
112
+ if result.cascade_hits:
113
+ print(f"\nCascade hits ({len(result.cascade_hits)}):")
114
+ for doc in result.cascade_hits:
115
+ print(f" {doc}")
116
+ if result.circular_refs:
117
+ print("\nWarning: circular refs detected:")
118
+ for src, dst in result.circular_refs:
119
+ print(f" {src} <-> {dst}")
120
+ return 0
@@ -0,0 +1,92 @@
1
+ from __future__ import annotations
2
+
3
+ import fnmatch
4
+ from dataclasses import dataclass, field
5
+ from pathlib import Path
6
+ from typing import Iterator
7
+
8
+ from docsync.core.config import Config, find_repo_root
9
+ from docsync.core.parser import RefEntry, parse_doc
10
+
11
+
12
+ @dataclass
13
+ class RefError:
14
+ doc_path: Path
15
+ ref: RefEntry
16
+ message: str
17
+
18
+
19
+ @dataclass
20
+ class CheckResult:
21
+ doc_path: Path
22
+ errors: list[RefError] = field(default_factory=list)
23
+
24
+ @property
25
+ def ok(self) -> bool:
26
+ return len(self.errors) == 0
27
+
28
+
29
+ def check_refs(docs_path: Path, config: Config, repo_root: Path | None = None) -> Iterator[CheckResult]:
30
+ docs_path = docs_path.resolve()
31
+ if repo_root is None:
32
+ repo_root = find_repo_root(docs_path)
33
+ doc_files = list(docs_path.rglob("*.md"))
34
+ for doc_file in doc_files:
35
+ if _is_ignored(doc_file, config.ignored_paths, repo_root):
36
+ continue
37
+ yield _check_single_doc(doc_file, repo_root)
38
+
39
+
40
+ def _check_single_doc(doc_path: Path, repo_root: Path) -> CheckResult:
41
+ result = CheckResult(doc_path=doc_path)
42
+ try:
43
+ parsed = parse_doc(doc_path)
44
+ except Exception as e:
45
+ result.errors.append(
46
+ RefError(
47
+ doc_path=doc_path,
48
+ ref=RefEntry(path="", description="", line_number=0),
49
+ message=f"failed to parse doc: {e}",
50
+ )
51
+ )
52
+ return result
53
+ for ref in parsed.related_docs:
54
+ ref_path = repo_root / ref.path
55
+ if not ref_path.exists():
56
+ result.errors.append(RefError(doc_path=doc_path, ref=ref, message=f"related doc not found: {ref.path}"))
57
+ for ref in parsed.related_sources:
58
+ ref_path = repo_root / ref.path
59
+ if not ref_path.exists() and not _glob_matches(ref.path, repo_root):
60
+ result.errors.append(RefError(doc_path=doc_path, ref=ref, message=f"related source not found: {ref.path}"))
61
+ return result
62
+
63
+
64
+ def _glob_matches(pattern: str, repo_root: Path) -> bool:
65
+ if "*" in pattern or "?" in pattern:
66
+ matches = list(repo_root.glob(pattern))
67
+ return len(matches) > 0
68
+ return False
69
+
70
+
71
+ def _is_ignored(path: Path, ignored_patterns: list[str], repo_root: Path) -> bool:
72
+ rel_path = str(path.relative_to(repo_root))
73
+ for pattern in ignored_patterns:
74
+ if fnmatch.fnmatch(rel_path, pattern):
75
+ return True
76
+ return False
77
+
78
+
79
+ def run(docs_path: Path) -> int:
80
+ from docsync.core.config import load_config
81
+
82
+ config = load_config()
83
+ has_errors = False
84
+ for result in check_refs(docs_path, config):
85
+ if not result.ok:
86
+ has_errors = True
87
+ for error in result.errors:
88
+ print(f"{result.doc_path}:{error.ref.line_number}: {error.message}")
89
+ if has_errors:
90
+ return 1
91
+ print("All refs valid")
92
+ return 0
@@ -0,0 +1,9 @@
1
+ from pathlib import Path
2
+
3
+ from docsync.core.config import init_docsync
4
+
5
+
6
+ def run() -> int:
7
+ docsync_dir = init_docsync(Path.cwd())
8
+ print(f"Created {docsync_dir}/")
9
+ return 0
@@ -0,0 +1,172 @@
1
+ import fnmatch
2
+ import json
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from docsync.core.config import Config, find_repo_root
8
+ from docsync.core.constants import DOCSYNC_DIR, SYNC_FILENAME, SYNCS_DIR, load_default_prompt
9
+ from docsync.core.parser import parse_doc
10
+
11
+
12
+ def generate_validation_report(docs_path: Path, config: Config, incremental: bool = False) -> dict[str, Any]:
13
+ docs_path = docs_path.resolve()
14
+ repo_root = find_repo_root(docs_path)
15
+ doc_files = list(docs_path.rglob("*.md"))
16
+ metadata: dict[str, Any] = {"incremental": incremental}
17
+ if incremental:
18
+ from docsync.commands.cascade import find_affected_docs
19
+ from docsync.core.lock import load_lock
20
+
21
+ lock = load_lock(repo_root)
22
+ if lock.last_analyzed_commit:
23
+ result = find_affected_docs(docs_path, lock.last_analyzed_commit, config, repo_root)
24
+ affected_set = set(result.affected_docs)
25
+ doc_files = [f for f in doc_files if f in affected_set]
26
+ metadata["since_commit"] = lock.last_analyzed_commit
27
+ else:
28
+ metadata["since_commit"] = None
29
+ docs = []
30
+ for doc_file in doc_files:
31
+ if _is_ignored(doc_file, config.ignored_paths, repo_root):
32
+ continue
33
+ parsed = parse_doc(doc_file)
34
+ rel_path = str(doc_file.relative_to(repo_root))
35
+ docs.append(
36
+ {
37
+ "path": rel_path,
38
+ "related_docs": [ref.path for ref in parsed.related_docs],
39
+ "related_sources": [ref.path for ref in parsed.related_sources],
40
+ }
41
+ )
42
+ return {
43
+ "repo_root": str(repo_root),
44
+ "metadata": metadata,
45
+ "docs": docs,
46
+ }
47
+
48
+
49
+ def _is_ignored(path: Path, ignored_patterns: list[str], repo_root: Path) -> bool:
50
+ rel_path = str(path.relative_to(repo_root))
51
+ for pattern in ignored_patterns:
52
+ if fnmatch.fnmatch(rel_path, pattern):
53
+ return True
54
+ return False
55
+
56
+
57
+ def print_validation_report(docs_path: Path, config: Config, incremental: bool = False) -> str:
58
+ report = generate_validation_report(docs_path, config, incremental)
59
+ return json.dumps(report, indent=2)
60
+
61
+
62
+ def _load_prompt_template(repo_root: Path, parallel: bool) -> str:
63
+ prompt_path = repo_root / DOCSYNC_DIR / SYNC_FILENAME
64
+ if prompt_path.exists():
65
+ return prompt_path.read_text()
66
+ return load_default_prompt(parallel)
67
+
68
+
69
+ def _format_docs_list(docs: list[dict[str, Any]]) -> str:
70
+ lines = []
71
+ for i, doc in enumerate(docs, 1):
72
+ lines.append(f"{i}. {doc['path']}")
73
+ if doc["related_sources"]:
74
+ sources = ", ".join(doc["related_sources"])
75
+ lines.append(f" sources: {sources}")
76
+ if doc["related_docs"]:
77
+ related = ", ".join(doc["related_docs"])
78
+ lines.append(f" related docs: {related}")
79
+ lines.append("")
80
+ return "\n".join(lines)
81
+
82
+
83
+ def _format_phases(levels: list[list[dict[str, Any]]]) -> str:
84
+ lines = []
85
+ for i, level_docs in enumerate(levels):
86
+ if not level_docs:
87
+ continue
88
+ if i == 0:
89
+ lines.append("Phase 1 - Independent (launch parallel):")
90
+ else:
91
+ lines.append(f"\nPhase {i + 1} - Level {i} (after phase {i} completes):")
92
+ for doc in level_docs:
93
+ lines.append(f" {doc['path']}")
94
+ if doc["related_sources"]:
95
+ sources = ", ".join(doc["related_sources"])
96
+ lines.append(f" sources: {sources}")
97
+ lines.append("")
98
+ return "\n".join(lines)
99
+
100
+
101
+ def _get_syncs_dir() -> str:
102
+ timestamp = datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
103
+ return f".docsync/{SYNCS_DIR}/{timestamp}"
104
+
105
+
106
+ def _build_sync_levels(docs: list[dict[str, Any]], repo_root: Path) -> list[list[dict[str, Any]]]:
107
+ doc_paths = {repo_root / d["path"] for d in docs}
108
+ doc_by_path = {repo_root / d["path"]: d for d in docs}
109
+ deps: dict[Path, list[Path]] = {}
110
+ for d in docs:
111
+ path = repo_root / d["path"]
112
+ deps[path] = [repo_root / rd for rd in d["related_docs"] if (repo_root / rd) in doc_paths]
113
+ assigned: dict[Path, int] = {}
114
+
115
+ def get_level(doc: Path, visiting: set[Path]) -> int:
116
+ if doc in assigned:
117
+ return assigned[doc]
118
+ if doc in visiting:
119
+ return 0
120
+ if not deps.get(doc):
121
+ assigned[doc] = 0
122
+ return 0
123
+ visiting.add(doc)
124
+ max_dep = max((get_level(dep, visiting) for dep in deps[doc]), default=-1)
125
+ visiting.remove(doc)
126
+ level = max_dep + 1
127
+ assigned[doc] = level
128
+ return level
129
+
130
+ for path in doc_paths:
131
+ get_level(path, set())
132
+ max_level = max(assigned.values()) if assigned else 0
133
+ levels: list[list[dict[str, Any]]] = [[] for _ in range(max_level + 1)]
134
+ for path, level in assigned.items():
135
+ levels[level].append(doc_by_path[path])
136
+ return [level for level in levels if level]
137
+
138
+
139
+ def generate_sync_prompt(docs_path: Path, config: Config, incremental: bool = False, parallel: bool = False) -> str:
140
+ report = generate_validation_report(docs_path, config, incremental)
141
+ docs = report["docs"]
142
+ if not docs:
143
+ return "No docs to sync."
144
+ repo_root = Path(report["repo_root"])
145
+ syncs_dir = _get_syncs_dir()
146
+ template = _load_prompt_template(repo_root, parallel)
147
+
148
+ if parallel:
149
+ docs_list = _format_docs_list(docs)
150
+ return template.format(count=len(docs), docs_list=docs_list, syncs_dir=syncs_dir)
151
+ else:
152
+ levels = _build_sync_levels(docs, repo_root)
153
+ phases = _format_phases(levels)
154
+ return template.format(count=len(docs), phases=phases, syncs_dir=syncs_dir)
155
+
156
+
157
+ def run(docs_path: Path, incremental: bool, as_json: bool, parallel: bool, update_lock: bool = False) -> int:
158
+ from docsync.core.config import find_repo_root, load_config
159
+ from docsync.core.lock import Lock, get_current_commit, save_lock
160
+
161
+ config = load_config()
162
+ if as_json:
163
+ print(print_validation_report(docs_path, config, incremental))
164
+ else:
165
+ print(generate_sync_prompt(docs_path, config, incremental, parallel))
166
+ if update_lock:
167
+ repo_root = find_repo_root(docs_path)
168
+ commit = get_current_commit()
169
+ if commit:
170
+ lock = Lock({"last_analyzed_commit": commit})
171
+ save_lock(lock, repo_root)
172
+ return 0
@@ -0,0 +1,121 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict
4
+ from pathlib import Path
5
+ from typing import NamedTuple
6
+
7
+ from docsync.core.config import Config, find_repo_root
8
+ from docsync.core.parser import parse_doc
9
+
10
+
11
+ class DependencyTree(NamedTuple):
12
+ levels: list[list[Path]]
13
+ circular: list[tuple[Path, Path]]
14
+ doc_deps: dict[Path, list[Path]]
15
+
16
+
17
+ def build_dependency_tree(docs_path: Path, config: Config, repo_root: Path | None = None) -> DependencyTree:
18
+ if repo_root is None:
19
+ repo_root = find_repo_root(docs_path)
20
+ doc_deps = _build_doc_dependencies(docs_path, repo_root)
21
+ levels, circular = _compute_levels(doc_deps)
22
+ return DependencyTree(levels=levels, circular=circular, doc_deps=doc_deps)
23
+
24
+
25
+ def _build_doc_dependencies(docs_path: Path, repo_root: Path) -> dict[Path, list[Path]]:
26
+ doc_deps: dict[Path, list[Path]] = defaultdict(list)
27
+ doc_files = list(docs_path.rglob("*.md"))
28
+ for doc_file in doc_files:
29
+ try:
30
+ parsed = parse_doc(doc_file)
31
+ except Exception:
32
+ continue
33
+ for ref in parsed.related_docs:
34
+ ref_path = repo_root / ref.path
35
+ if ref_path.exists():
36
+ doc_deps[doc_file].append(ref_path)
37
+ if doc_file not in doc_deps:
38
+ doc_deps[doc_file] = []
39
+ return dict(doc_deps)
40
+
41
+
42
+ def _compute_levels(doc_deps: dict[Path, list[Path]]) -> tuple[list[list[Path]], list[tuple[Path, Path]]]:
43
+ all_docs = set(doc_deps.keys())
44
+ assigned: dict[Path, int] = {}
45
+ circular: list[tuple[Path, Path]] = []
46
+
47
+ def get_level(doc: Path, visiting: set[Path]) -> int:
48
+ if doc in assigned:
49
+ return assigned[doc]
50
+ if doc in visiting:
51
+ return -1
52
+ if doc not in doc_deps:
53
+ assigned[doc] = 0
54
+ return 0
55
+ deps = doc_deps[doc]
56
+ if not deps:
57
+ assigned[doc] = 0
58
+ return 0
59
+ visiting.add(doc)
60
+ max_dep_level = -1
61
+ for dep in deps:
62
+ dep_level = get_level(dep, visiting)
63
+ if dep_level == -1:
64
+ circular.append((doc, dep))
65
+ continue
66
+ max_dep_level = max(max_dep_level, dep_level)
67
+ visiting.remove(doc)
68
+ level = max_dep_level + 1 if max_dep_level >= 0 else 0
69
+ assigned[doc] = level
70
+ return level
71
+
72
+ for doc in all_docs:
73
+ get_level(doc, set())
74
+
75
+ max_level = max(assigned.values()) if assigned else 0
76
+ levels: list[list[Path]] = [[] for _ in range(max_level + 1)]
77
+ for doc, level in assigned.items():
78
+ levels[level].append(doc)
79
+
80
+ for level_docs in levels:
81
+ level_docs.sort()
82
+
83
+ return levels, circular
84
+
85
+
86
+ def format_tree(tree: DependencyTree, repo_root: Path) -> str:
87
+ lines = []
88
+ for i, level_docs in enumerate(tree.levels):
89
+ if not level_docs:
90
+ continue
91
+ if i == 0:
92
+ lines.append(f"Level 0 - Independent ({len(level_docs)}):")
93
+ else:
94
+ lines.append(f"\nLevel {i} ({len(level_docs)}):")
95
+ for doc in level_docs:
96
+ rel_path = doc.relative_to(repo_root)
97
+ deps = tree.doc_deps.get(doc, [])
98
+ if deps:
99
+ dep_names = ", ".join(str(d.relative_to(repo_root)) for d in deps)
100
+ lines.append(f" {rel_path}")
101
+ lines.append(f" └── depends on: {dep_names}")
102
+ else:
103
+ lines.append(f" {rel_path}")
104
+ if tree.circular:
105
+ lines.append("\nCircular dependencies (warning):")
106
+ for src, dst in tree.circular:
107
+ src_rel = src.relative_to(repo_root)
108
+ dst_rel = dst.relative_to(repo_root)
109
+ lines.append(f" {src_rel} <-> {dst_rel}")
110
+ return "\n".join(lines)
111
+
112
+
113
+ def run(docs_path: Path) -> int:
114
+ from docsync.core.config import load_config
115
+
116
+ config = load_config()
117
+ docs_path = docs_path.resolve()
118
+ repo_root = find_repo_root(docs_path)
119
+ tree = build_dependency_tree(docs_path, config, repo_root)
120
+ print(format_tree(tree, repo_root))
121
+ return 0
File without changes
docsync/core/config.py ADDED
@@ -0,0 +1,91 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from docsync.core.constants import CONFIG_FILENAME, DEFAULT_CONFIG, DOCSYNC_DIR, SYNCS_DIR
8
+
9
+
10
+ class ConfigError(Exception):
11
+ pass
12
+
13
+
14
+ class Config:
15
+ def __init__(self, data: dict[str, Any]):
16
+ self.ignored_paths: list[str] = data.get("ignored_paths", DEFAULT_CONFIG["ignored_paths"])
17
+ self.cascade_depth_limit: int | None = data.get("cascade_depth_limit", DEFAULT_CONFIG["cascade_depth_limit"])
18
+
19
+
20
+ def validate_config(data: dict[str, Any], config_path: Path | None = None) -> list[str]:
21
+ errors = []
22
+ valid_keys = {"ignored_paths", "cascade_depth_limit"}
23
+ for key in data:
24
+ if key not in valid_keys:
25
+ errors.append(f"unknown key: {key}")
26
+ if "ignored_paths" in data:
27
+ if not isinstance(data["ignored_paths"], list):
28
+ errors.append("ignored_paths must be a list")
29
+ elif not all(isinstance(p, str) for p in data["ignored_paths"]):
30
+ errors.append("ignored_paths must contain only strings")
31
+ if "cascade_depth_limit" in data:
32
+ val = data["cascade_depth_limit"]
33
+ if val is not None and not isinstance(val, int):
34
+ errors.append("cascade_depth_limit must be null or integer")
35
+ return errors
36
+
37
+
38
+ def load_config(start_path: Path | None = None, validate: bool = True) -> Config:
39
+ config_path = find_config(start_path or Path.cwd())
40
+ if config_path is None:
41
+ return Config({})
42
+ with open(config_path) as f:
43
+ data = json.load(f)
44
+ if validate:
45
+ errors = validate_config(data, config_path)
46
+ if errors:
47
+ raise ConfigError(f"{config_path}: {', '.join(errors)}")
48
+ return Config(data)
49
+
50
+
51
+ def find_config(start_path: Path) -> Path | None:
52
+ current = start_path.resolve()
53
+ while current != current.parent:
54
+ config_path = current / DOCSYNC_DIR / CONFIG_FILENAME
55
+ if config_path.exists():
56
+ return config_path
57
+ current = current.parent
58
+ return None
59
+
60
+
61
+ def find_docsync_dir(start_path: Path) -> Path | None:
62
+ current = start_path.resolve()
63
+ while current != current.parent:
64
+ docsync_dir = current / DOCSYNC_DIR
65
+ if docsync_dir.exists():
66
+ return docsync_dir
67
+ current = current.parent
68
+ return None
69
+
70
+
71
+ def find_repo_root(start_path: Path) -> Path:
72
+ current = start_path.resolve()
73
+ while current != current.parent:
74
+ if (current / ".git").exists():
75
+ return current
76
+ current = current.parent
77
+ return start_path.resolve()
78
+
79
+
80
+ def init_docsync(target_dir: Path) -> Path:
81
+ docsync_dir = target_dir / DOCSYNC_DIR
82
+ docsync_dir.mkdir(exist_ok=True)
83
+ config_path = docsync_dir / CONFIG_FILENAME
84
+ with open(config_path, "w") as f:
85
+ json.dump(DEFAULT_CONFIG, f, indent=2)
86
+ syncs_dir = docsync_dir / SYNCS_DIR
87
+ syncs_dir.mkdir(exist_ok=True)
88
+ gitignore_path = syncs_dir / ".gitignore"
89
+ with open(gitignore_path, "w") as f:
90
+ f.write("*\n!.gitignore\n")
91
+ return docsync_dir
@@ -0,0 +1,26 @@
1
+ import re
2
+ from pathlib import Path
3
+
4
+ RELATED_DOCS_PATTERN = re.compile(r"^related docs:\s*$", re.MULTILINE | re.IGNORECASE)
5
+ RELATED_SOURCES_PATTERN = re.compile(r"^related sources:\s*$", re.MULTILINE | re.IGNORECASE)
6
+ LIST_ITEM_PATTERN = re.compile(r"^-\s+(\S+)\s+-\s+(.+)$")
7
+
8
+ DOCSYNC_DIR = ".docsync"
9
+ CONFIG_FILENAME = "config.json"
10
+ LOCK_FILENAME = "lock.json"
11
+ SYNC_FILENAME = "sync.md"
12
+ SYNCS_DIR = "syncs"
13
+
14
+ PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
15
+
16
+ DEFAULT_CONFIG = {
17
+ "ignored_paths": [],
18
+ "cascade_depth_limit": None,
19
+ }
20
+
21
+ DEFAULT_LOCK = {"last_analyzed_commit": None, "last_run": None, "docs_validated": []}
22
+
23
+
24
+ def load_default_prompt(parallel: bool = False) -> str:
25
+ filename = "sync-parallel.md" if parallel else "sync.md"
26
+ return (PROMPTS_DIR / filename).read_text()
docsync/core/lock.py ADDED
@@ -0,0 +1,60 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import subprocess
5
+ from datetime import datetime, timezone
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from docsync.core.constants import DOCSYNC_DIR, LOCK_FILENAME
10
+
11
+
12
+ class Lock:
13
+ def __init__(self, data: dict[str, Any]):
14
+ self.last_analyzed_commit: str | None = data.get("last_analyzed_commit")
15
+ self.last_run: str | None = data.get("last_run")
16
+ self.docs_validated: list[str] = data.get("docs_validated", [])
17
+
18
+ def to_dict(self) -> dict[str, Any]:
19
+ return {
20
+ "last_analyzed_commit": self.last_analyzed_commit,
21
+ "last_run": self.last_run,
22
+ "docs_validated": self.docs_validated,
23
+ }
24
+
25
+
26
+ def load_lock(start_path: Path | None = None) -> Lock:
27
+ lock_path = find_lock(start_path or Path.cwd())
28
+ if lock_path is None:
29
+ return Lock({})
30
+ with open(lock_path) as f:
31
+ data = json.load(f)
32
+ return Lock(data)
33
+
34
+
35
+ def find_lock(start_path: Path) -> Path | None:
36
+ current = start_path.resolve()
37
+ while current != current.parent:
38
+ lock_path = current / DOCSYNC_DIR / LOCK_FILENAME
39
+ if lock_path.exists():
40
+ return lock_path
41
+ current = current.parent
42
+ return None
43
+
44
+
45
+ def save_lock(lock: Lock, repo_root: Path) -> Path:
46
+ docsync_dir = repo_root / DOCSYNC_DIR
47
+ docsync_dir.mkdir(exist_ok=True)
48
+ lock_path = docsync_dir / LOCK_FILENAME
49
+ lock.last_run = datetime.now(timezone.utc).isoformat()
50
+ with open(lock_path, "w") as f:
51
+ json.dump(lock.to_dict(), f, indent=2)
52
+ return lock_path
53
+
54
+
55
+ def get_current_commit() -> str | None:
56
+ try:
57
+ result = subprocess.run(["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True)
58
+ return result.stdout.strip()
59
+ except (subprocess.CalledProcessError, FileNotFoundError):
60
+ return None
docsync/core/parser.py ADDED
@@ -0,0 +1,47 @@
1
+ import re
2
+ from pathlib import Path
3
+ from typing import NamedTuple
4
+
5
+ RELATED_DOCS_HEADER = re.compile(r"^related docs:\s*$", re.MULTILINE | re.IGNORECASE)
6
+ RELATED_SOURCES_HEADER = re.compile(r"^related sources:\s*$", re.MULTILINE | re.IGNORECASE)
7
+ LIST_ITEM = re.compile(r"^-\s+(\S+(?:\s+\S+)*?)\s+-\s+(.+)$")
8
+
9
+
10
+ class RefEntry(NamedTuple):
11
+ path: str
12
+ description: str
13
+ line_number: int
14
+
15
+
16
+ class ParsedDoc(NamedTuple):
17
+ related_docs: list[RefEntry]
18
+ related_sources: list[RefEntry]
19
+
20
+
21
+ def parse_doc(filepath: Path) -> ParsedDoc:
22
+ content = filepath.read_text()
23
+ lines = content.splitlines()
24
+ related_docs = _extract_section(lines, RELATED_DOCS_HEADER)
25
+ related_sources = _extract_section(lines, RELATED_SOURCES_HEADER)
26
+ return ParsedDoc(related_docs=related_docs, related_sources=related_sources)
27
+
28
+
29
+ def _extract_section(lines: list[str], header_pattern: re.Pattern) -> list[RefEntry]:
30
+ entries = []
31
+ in_section = False
32
+ for i, line in enumerate(lines, start=1):
33
+ if header_pattern.match(line):
34
+ in_section = True
35
+ continue
36
+ if in_section:
37
+ if not line.strip():
38
+ continue
39
+ if line.startswith("-"):
40
+ match = LIST_ITEM.match(line)
41
+ if match:
42
+ entries.append(
43
+ RefEntry(path=match.group(1).strip(), description=match.group(2).strip(), line_number=i)
44
+ )
45
+ else:
46
+ break
47
+ return entries
@@ -0,0 +1,21 @@
1
+ Sync {count} docs by launching PARALLEL agents (one per doc).
2
+
3
+ Each agent will:
4
+ 1. Read the doc + all related sources
5
+ 2. Fix any outdated/incorrect content directly in the doc
6
+ 3. Write a report to {syncs_dir}
7
+
8
+ Report format ({syncs_dir}/{{doc-name}}.md):
9
+ ```markdown
10
+ ## Changes made
11
+ - what was fixed
12
+
13
+ ## Why it was wrong
14
+ - explanation referencing the source code
15
+ ```
16
+
17
+ IMPORTANT: Launch ALL agents in a SINGLE message for parallel execution.
18
+
19
+ Docs to sync:
20
+
21
+ {docs_list}
@@ -0,0 +1,17 @@
1
+ Sync {count} docs by launching agents in phases (respecting dependencies).
2
+
3
+ Each agent will:
4
+ 1. Read the doc + all related sources
5
+ 2. Fix any outdated/incorrect content directly in the doc
6
+ 3. Write a report to {syncs_dir}
7
+
8
+ Report format ({syncs_dir}/{{doc-name}}.md):
9
+ ```markdown
10
+ ## Changes made
11
+ - what was fixed
12
+
13
+ ## Why it was wrong
14
+ - explanation referencing the source code
15
+ ```
16
+
17
+ {phases}
@@ -0,0 +1,216 @@
1
+ Metadata-Version: 2.4
2
+ Name: docsync
3
+ Version: 0.1.0
4
+ Summary: Auto-validate and update docs in large codebases
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.9
7
+ Provides-Extra: dev
8
+ Requires-Dist: bump2version>=1; extra == 'dev'
9
+ Requires-Dist: pytest>=7; extra == 'dev'
10
+ Requires-Dist: ruff>=0.9; extra == 'dev'
11
+ Requires-Dist: towncrier>=23; extra == 'dev'
12
+ Description-Content-Type: text/markdown
13
+
14
+ # Overview
15
+
16
+ CLI tool that keeps documentation in sync with code in large codebases. Detects which docs are affected by code changes and generates reports for AI validation.
17
+
18
+ ```
19
+ src/booking/handler.ts changed
20
+
21
+ v
22
+ ┌─────────────────────────┐
23
+ │ docsync cascade HEAD~1 │
24
+ └───────────┬─────────────┘
25
+
26
+ v
27
+ ┌─────────────────────────┐ ┌─────────────────────────┐
28
+ │ Direct hits: │ │ docs/bookings.md │
29
+ │ - docs/bookings.md │ ──> │ │
30
+ └─────────────────────────┘ │ related sources: │
31
+ │ │ - src/booking/ <───── │ ← matched!
32
+ v └─────────────────────────┘
33
+ ┌─────────────────────────┐
34
+ │ Cascade hits: │ docs/bookings.md references
35
+ │ - docs/payments.md │ ──> docs/payments.md, so it
36
+ └─────────────────────────┘ might need review too
37
+ ```
38
+
39
+ <details>
40
+ <summary>How it works</summary>
41
+
42
+ Each doc ends with metadata sections:
43
+
44
+ ```markdown
45
+ # Booking System
46
+
47
+ How bookings work...
48
+
49
+ ---
50
+
51
+ related docs:
52
+ - docs/payments.md - payment integration
53
+
54
+ related sources:
55
+ - src/booking/ - booking module
56
+ - src/booking/commands/ - command handlers
57
+ ```
58
+
59
+ When `src/booking/handler.ts` changes:
60
+
61
+ ```
62
+ docsync cascade HEAD~1
63
+
64
+ Direct hits (1):
65
+ docs/bookings.md <- references src/booking/
66
+
67
+ Cascade hits (1):
68
+ docs/payments.md <- referenced BY docs/bookings.md
69
+ ```
70
+
71
+ The cascade propagates: if `bookings.md` might be outdated, then `payments.md` (which references it) might also need review.
72
+
73
+ </details>
74
+
75
+ ## Motivation
76
+
77
+ In large codebases, docs get outdated because:
78
+ 1. No one remembers which docs need updating when a file changes
79
+ 2. AI agents don't know which files to read to validate each doc
80
+
81
+ docsync solves this by adding "hints" to each doc - `related sources:` tells any AI exactly what to read.
82
+
83
+ ## Features
84
+
85
+ - check - validates all referenced paths exist
86
+ - cascade - finds docs affected by code changes (with directory matching)
87
+ - sync - generates prompt for AI to fix docs (ordered by deps)
88
+ - tree - shows doc dependency tree
89
+
90
+ ## Quickstart
91
+
92
+ ### 1. Install
93
+
94
+ ```bash
95
+ pipx install docsync
96
+ ```
97
+
98
+ ### 2. Add metadata to your docs
99
+
100
+ Each doc needs two sections at the end (after a `---` separator):
101
+
102
+ ```markdown
103
+ # My Feature
104
+
105
+ Documentation content here...
106
+
107
+ ---
108
+
109
+ related docs:
110
+ - docs/other-feature.md - brief description
111
+
112
+ related sources:
113
+ - src/feature/ - main module
114
+ - src/feature/utils.ts - helper functions
115
+ ```
116
+
117
+ ### 3. Initialize config (optional)
118
+
119
+ ```bash
120
+ docsync init # creates .docsync/ folder
121
+ ```
122
+
123
+ <details>
124
+ <summary>Config options</summary>
125
+
126
+ ```
127
+ .docsync/
128
+ ├── config.json # required
129
+ ├── sync.md # optional - custom prompt template
130
+ ├── lock.json # optional - tracks last synced commit
131
+ └── syncs/ # ignored - AI writes sync reports here
132
+ ```
133
+
134
+ config.json:
135
+ ```json
136
+ {
137
+ "ignored_paths": ["**/migrations/**", "**/*.test.ts"],
138
+ "cascade_depth_limit": null
139
+ }
140
+ ```
141
+
142
+ sync.md (custom template):
143
+ ```markdown
144
+ Sync {count} docs. Write reports to {syncs_dir}/
145
+
146
+ {phases}
147
+ ```
148
+
149
+ Placeholders: `{count}`, `{phases}`, `{docs_list}`, `{syncs_dir}`
150
+
151
+ </details>
152
+
153
+ ### 4. Validate your setup
154
+
155
+ ```bash
156
+ docsync check docs/ # ensures all paths exist
157
+ ```
158
+
159
+ ### 5. Use it
160
+
161
+ ```bash
162
+ docsync cascade HEAD~5 --docs docs/ # docs affected by last 5 commits
163
+ docsync sync docs/ | pbcopy # generate AI prompt
164
+ claude "$(docsync sync docs/)" # or pipe directly to AI
165
+ ```
166
+
167
+ ## Commands
168
+
169
+ ```bash
170
+ docsync check <path> # validate refs exist
171
+ docsync cascade <commit> --docs <dir> # list affected docs
172
+ docsync sync <path> # generate prompt (ordered by deps)
173
+ docsync sync <path> --parallel # ignore deps, all at once
174
+ docsync sync <path> --incremental # only include changed docs
175
+ docsync sync <path> --update-lock # update lock.json after sync
176
+ docsync sync <path> --json # output as JSON for scripts
177
+ docsync tree <path> # show doc dependency tree
178
+ docsync init # create .docsync/ folder
179
+ docsync --version # show version
180
+ ```
181
+
182
+ ### AI Sync
183
+
184
+ The `sync` command generates a prompt for AI to fix docs in phases (respecting dependencies):
185
+
186
+ ```
187
+ Sync 5 docs by launching agents in phases (respecting dependencies).
188
+
189
+ Each agent will:
190
+ 1. Read the doc + all related sources
191
+ 2. Fix any outdated/incorrect content directly in the doc
192
+ 3. Write a report to .docsync/syncs/2024-01-15T10-30-00/
193
+
194
+ Phase 1 - Independent (launch parallel):
195
+ docs/utils.md
196
+ docs/config.md
197
+
198
+ Phase 2 - Level 1 (after phase 1 completes):
199
+ docs/auth.md
200
+ sources: src/auth/
201
+
202
+ Phase 3 - Level 2 (after phase 2 completes):
203
+ docs/login.md
204
+ sources: src/login/
205
+ ```
206
+
207
+ Use `--parallel` to ignore dependencies and sync all at once.
208
+
209
+ ## Development
210
+
211
+ ```bash
212
+ make install # create venv + install
213
+ make check # lint
214
+ make test # run tests
215
+ docsync check docs/ # practical test
216
+ ```
@@ -0,0 +1,20 @@
1
+ docsync/__init__.py,sha256=ukKr35GuAF_rKmKQJNTKfsz9HEmZ87kt0Qz8RL-hYGE,205
2
+ docsync/cli.py,sha256=l4g5VUtgiF9kyq6XNNe6mhKwI3NlS01X-5gq4kkbgkk,2184
3
+ docsync/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ docsync/commands/cascade.py,sha256=vjM84402_KD1_ljR6qmLRgDbmihYI-46zhudgxDiHks,4403
5
+ docsync/commands/check.py,sha256=DYOKMhB6lzO3YpzMCw69ulYRKCMLoH0nhhAdMtAyWd0,2819
6
+ docsync/commands/init.py,sha256=9zLQVgiYjd7X-cXZ0jQ3023_HOic0EST8xTuMxZ9THk,184
7
+ docsync/commands/sync.py,sha256=WnChiBqoQxp2dGsIMx7Gb-xtI1fB2yO4UyHN90tnjkc,6365
8
+ docsync/commands/tree.py,sha256=IqiFbGzZrQyys5wPlU9T35s_94qfMOQTdx8lYha303E,4041
9
+ docsync/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ docsync/core/config.py,sha256=GBMP41Aum4gS0LH4p8VLIrGq6HfdaM92oLuF-sAHY8U,3046
11
+ docsync/core/constants.py,sha256=nXS35iNPIxW3vnFOJu6fWiGcNRiz-1v919I9n_wzKtM,796
12
+ docsync/core/lock.py,sha256=4Fzs_8nfcIVyCZ3hSmNdjVxkfJ2hXz48eOfLTBr0MRQ,1855
13
+ docsync/core/parser.py,sha256=rmnlZfhuWjVGa_XCZvHwAnNCANr6vx--xr1tS6O_fW8,1505
14
+ docsync/prompts/sync-parallel.md,sha256=6LJ8BrvzJwEmvkDiyt3sAQtlRJrmDmR93hi8DYIqLko,469
15
+ docsync/prompts/sync.md,sha256=nBaAPwNK9LU8c3EXSkfCL1hIDvJZ5ijfxZwB9DDOEa0,390
16
+ docsync-0.1.0.dist-info/METADATA,sha256=wPOvVF-wPhMRR_7fH7PEC2UfonsDXxUOP_EMvjx-EGk,5944
17
+ docsync-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
18
+ docsync-0.1.0.dist-info/entry_points.txt,sha256=Mk67DbEgbGkQmp5NRqOgivotQv1R3RcaD4ncUlZXabk,45
19
+ docsync-0.1.0.dist-info/licenses/LICENSE,sha256=njaGk8b8NcBMEtm31x6Vt9w5HYU3kMPZNHN2DidK2TU,1069
20
+ docsync-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ docsync = docsync.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Lucas Vieira
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.