codealmanac 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. codealmanac/__init__.py +13 -0
  2. codealmanac/app.py +175 -0
  3. codealmanac/cli/__init__.py +1 -0
  4. codealmanac/cli/dispatch/__init__.py +0 -0
  5. codealmanac/cli/dispatch/admin.py +124 -0
  6. codealmanac/cli/dispatch/config.py +50 -0
  7. codealmanac/cli/dispatch/root.py +328 -0
  8. codealmanac/cli/main.py +28 -0
  9. codealmanac/cli/parser/__init__.py +0 -0
  10. codealmanac/cli/parser/admin.py +81 -0
  11. codealmanac/cli/parser/lifecycle.py +57 -0
  12. codealmanac/cli/parser/root.py +19 -0
  13. codealmanac/cli/parser/wiki.py +87 -0
  14. codealmanac/cli/render/__init__.py +0 -0
  15. codealmanac/cli/render/admin.py +191 -0
  16. codealmanac/cli/render/root.py +290 -0
  17. codealmanac/core/__init__.py +1 -0
  18. codealmanac/core/errors.py +45 -0
  19. codealmanac/core/models.py +14 -0
  20. codealmanac/core/paths.py +25 -0
  21. codealmanac/core/slug.py +7 -0
  22. codealmanac/core/text.py +5 -0
  23. codealmanac/database/__init__.py +15 -0
  24. codealmanac/database/sqlite.py +54 -0
  25. codealmanac/integrations/__init__.py +1 -0
  26. codealmanac/integrations/automation/__init__.py +3 -0
  27. codealmanac/integrations/automation/scheduler/__init__.py +5 -0
  28. codealmanac/integrations/automation/scheduler/launchd.py +163 -0
  29. codealmanac/integrations/command.py +56 -0
  30. codealmanac/integrations/harnesses/__init__.py +7 -0
  31. codealmanac/integrations/harnesses/claude/__init__.py +1 -0
  32. codealmanac/integrations/harnesses/claude/adapter.py +217 -0
  33. codealmanac/integrations/harnesses/codex/__init__.py +3 -0
  34. codealmanac/integrations/harnesses/codex/adapter.py +221 -0
  35. codealmanac/integrations/harnesses/git_status.py +49 -0
  36. codealmanac/integrations/sources/__init__.py +29 -0
  37. codealmanac/integrations/sources/filesystem/__init__.py +5 -0
  38. codealmanac/integrations/sources/filesystem/adapter.py +685 -0
  39. codealmanac/integrations/sources/filesystem/selection.py +209 -0
  40. codealmanac/integrations/sources/git/__init__.py +3 -0
  41. codealmanac/integrations/sources/git/adapter.py +132 -0
  42. codealmanac/integrations/sources/github/__init__.py +3 -0
  43. codealmanac/integrations/sources/github/adapter.py +413 -0
  44. codealmanac/integrations/sources/runtime.py +22 -0
  45. codealmanac/integrations/sources/transcripts/__init__.py +33 -0
  46. codealmanac/integrations/sources/transcripts/claude.py +61 -0
  47. codealmanac/integrations/sources/transcripts/codex.py +69 -0
  48. codealmanac/integrations/sources/transcripts/jsonl.py +84 -0
  49. codealmanac/integrations/sources/transcripts/runtime.py +387 -0
  50. codealmanac/integrations/sources/web/__init__.py +3 -0
  51. codealmanac/integrations/sources/web/adapter.py +303 -0
  52. codealmanac/integrations/updates/__init__.py +7 -0
  53. codealmanac/integrations/updates/package.py +85 -0
  54. codealmanac/integrations/workspaces/__init__.py +1 -0
  55. codealmanac/integrations/workspaces/git/__init__.py +3 -0
  56. codealmanac/integrations/workspaces/git/probe.py +128 -0
  57. codealmanac/manual/README.md +24 -0
  58. codealmanac/manual/__init__.py +19 -0
  59. codealmanac/manual/build.md +20 -0
  60. codealmanac/manual/evidence.md +23 -0
  61. codealmanac/manual/garden.md +20 -0
  62. codealmanac/manual/ingest.md +17 -0
  63. codealmanac/manual/library.py +84 -0
  64. codealmanac/manual/models.py +83 -0
  65. codealmanac/manual/pages.md +28 -0
  66. codealmanac/manual/requests.py +6 -0
  67. codealmanac/manual/sources.md +18 -0
  68. codealmanac/manual/style.md +19 -0
  69. codealmanac/prompts/__init__.py +5 -0
  70. codealmanac/prompts/base/notability.md +14 -0
  71. codealmanac/prompts/base/purpose.md +23 -0
  72. codealmanac/prompts/base/syntax.md +19 -0
  73. codealmanac/prompts/models.py +9 -0
  74. codealmanac/prompts/operations/garden.md +26 -0
  75. codealmanac/prompts/operations/ingest.md +18 -0
  76. codealmanac/prompts/renderer.py +24 -0
  77. codealmanac/prompts/requests.py +22 -0
  78. codealmanac/server/__init__.py +1 -0
  79. codealmanac/server/app.py +202 -0
  80. codealmanac/server/assets/__init__.py +1 -0
  81. codealmanac/server/assets/app.css +865 -0
  82. codealmanac/server/assets/app.js +3 -0
  83. codealmanac/server/assets/index.html +80 -0
  84. codealmanac/server/assets/viewer/api.js +30 -0
  85. codealmanac/server/assets/viewer/components.js +197 -0
  86. codealmanac/server/assets/viewer/main.js +126 -0
  87. codealmanac/server/assets/viewer/renderers.js +122 -0
  88. codealmanac/server/assets/viewer/routes.js +36 -0
  89. codealmanac/services/__init__.py +1 -0
  90. codealmanac/services/automation/__init__.py +3 -0
  91. codealmanac/services/automation/models.py +83 -0
  92. codealmanac/services/automation/ports.py +14 -0
  93. codealmanac/services/automation/requests.py +40 -0
  94. codealmanac/services/automation/service.py +294 -0
  95. codealmanac/services/config/__init__.py +17 -0
  96. codealmanac/services/config/models.py +61 -0
  97. codealmanac/services/config/requests.py +21 -0
  98. codealmanac/services/config/service.py +55 -0
  99. codealmanac/services/config/store.py +26 -0
  100. codealmanac/services/diagnostics/__init__.py +1 -0
  101. codealmanac/services/diagnostics/models.py +22 -0
  102. codealmanac/services/diagnostics/requests.py +8 -0
  103. codealmanac/services/diagnostics/service.py +283 -0
  104. codealmanac/services/harnesses/__init__.py +1 -0
  105. codealmanac/services/harnesses/models.py +104 -0
  106. codealmanac/services/harnesses/ports.py +18 -0
  107. codealmanac/services/harnesses/requests.py +19 -0
  108. codealmanac/services/harnesses/service.py +38 -0
  109. codealmanac/services/health/__init__.py +1 -0
  110. codealmanac/services/health/requests.py +8 -0
  111. codealmanac/services/health/service.py +20 -0
  112. codealmanac/services/index/__init__.py +1 -0
  113. codealmanac/services/index/models.py +135 -0
  114. codealmanac/services/index/requests.py +26 -0
  115. codealmanac/services/index/service.py +86 -0
  116. codealmanac/services/index/store.py +411 -0
  117. codealmanac/services/index/views.py +524 -0
  118. codealmanac/services/pages/__init__.py +1 -0
  119. codealmanac/services/pages/requests.py +17 -0
  120. codealmanac/services/pages/service.py +26 -0
  121. codealmanac/services/runs/__init__.py +1 -0
  122. codealmanac/services/runs/models.py +91 -0
  123. codealmanac/services/runs/requests.py +76 -0
  124. codealmanac/services/runs/service.py +86 -0
  125. codealmanac/services/runs/store.py +256 -0
  126. codealmanac/services/search/__init__.py +1 -0
  127. codealmanac/services/search/requests.py +23 -0
  128. codealmanac/services/search/service.py +31 -0
  129. codealmanac/services/sources/__init__.py +1 -0
  130. codealmanac/services/sources/models.py +126 -0
  131. codealmanac/services/sources/ports.py +30 -0
  132. codealmanac/services/sources/requests.py +76 -0
  133. codealmanac/services/sources/service.py +351 -0
  134. codealmanac/services/tagging/__init__.py +1 -0
  135. codealmanac/services/tagging/models.py +9 -0
  136. codealmanac/services/tagging/requests.py +35 -0
  137. codealmanac/services/tagging/service.py +43 -0
  138. codealmanac/services/topics/__init__.py +1 -0
  139. codealmanac/services/topics/models.py +36 -0
  140. codealmanac/services/topics/requests.py +115 -0
  141. codealmanac/services/topics/service.py +297 -0
  142. codealmanac/services/updates/__init__.py +4 -0
  143. codealmanac/services/updates/models.py +83 -0
  144. codealmanac/services/updates/ports.py +17 -0
  145. codealmanac/services/updates/requests.py +10 -0
  146. codealmanac/services/updates/service.py +113 -0
  147. codealmanac/services/viewer/__init__.py +1 -0
  148. codealmanac/services/viewer/models.py +80 -0
  149. codealmanac/services/viewer/renderer.py +89 -0
  150. codealmanac/services/viewer/requests.py +86 -0
  151. codealmanac/services/viewer/service.py +211 -0
  152. codealmanac/services/wiki/__init__.py +1 -0
  153. codealmanac/services/wiki/documents.py +83 -0
  154. codealmanac/services/wiki/frontmatter.py +94 -0
  155. codealmanac/services/wiki/frontmatter_rewrite.py +142 -0
  156. codealmanac/services/wiki/models.py +69 -0
  157. codealmanac/services/wiki/paths.py +42 -0
  158. codealmanac/services/wiki/service.py +57 -0
  159. codealmanac/services/wiki/templates.py +73 -0
  160. codealmanac/services/wiki/topics.py +266 -0
  161. codealmanac/services/wiki/wikilinks.py +58 -0
  162. codealmanac/services/workspaces/__init__.py +1 -0
  163. codealmanac/services/workspaces/models.py +124 -0
  164. codealmanac/services/workspaces/ports.py +9 -0
  165. codealmanac/services/workspaces/requests.py +82 -0
  166. codealmanac/services/workspaces/roots.py +74 -0
  167. codealmanac/services/workspaces/service.py +303 -0
  168. codealmanac/services/workspaces/store.py +127 -0
  169. codealmanac/workflows/__init__.py +1 -0
  170. codealmanac/workflows/build/__init__.py +1 -0
  171. codealmanac/workflows/build/models.py +8 -0
  172. codealmanac/workflows/build/service.py +45 -0
  173. codealmanac/workflows/garden/__init__.py +3 -0
  174. codealmanac/workflows/garden/models.py +30 -0
  175. codealmanac/workflows/garden/requests.py +22 -0
  176. codealmanac/workflows/garden/service.py +239 -0
  177. codealmanac/workflows/ingest/__init__.py +1 -0
  178. codealmanac/workflows/ingest/models.py +26 -0
  179. codealmanac/workflows/ingest/requests.py +39 -0
  180. codealmanac/workflows/ingest/service.py +302 -0
  181. codealmanac/workflows/lifecycle.py +197 -0
  182. codealmanac/workflows/sync/__init__.py +3 -0
  183. codealmanac/workflows/sync/models.py +157 -0
  184. codealmanac/workflows/sync/requests.py +63 -0
  185. codealmanac/workflows/sync/service.py +651 -0
  186. codealmanac/workflows/sync/store.py +51 -0
  187. codealmanac-0.1.0.dev0.dist-info/METADATA +248 -0
  188. codealmanac-0.1.0.dev0.dist-info/RECORD +192 -0
  189. codealmanac-0.1.0.dev0.dist-info/WHEEL +5 -0
  190. codealmanac-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  191. codealmanac-0.1.0.dev0.dist-info/licenses/LICENSE.md +201 -0
  192. codealmanac-0.1.0.dev0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,86 @@
1
+ from pathlib import Path
2
+
3
+ from pydantic import field_validator
4
+
5
+ from codealmanac.core.models import CodeAlmanacModel
6
+ from codealmanac.core.text import required_text
7
+ from codealmanac.services.wiki.paths import (
8
+ looks_like_dir,
9
+ normalize_reference_path_preserving_case,
10
+ )
11
+
12
+
13
+ class ViewerOverviewRequest(CodeAlmanacModel):
14
+ cwd: Path
15
+ wiki: str | None = None
16
+ page_limit: int = 30
17
+
18
+ @field_validator("page_limit")
19
+ @classmethod
20
+ def non_negative_page_limit(cls, value: int) -> int:
21
+ if value < 0:
22
+ raise ValueError("page_limit must be non-negative")
23
+ return value
24
+
25
+
26
+ class ViewerPageRequest(CodeAlmanacModel):
27
+ cwd: Path
28
+ slug: str
29
+ wiki: str | None = None
30
+
31
+ @field_validator("slug")
32
+ @classmethod
33
+ def require_slug(cls, value: str) -> str:
34
+ return required_text(value, "slug")
35
+
36
+
37
+ class ViewerSearchRequest(CodeAlmanacModel):
38
+ cwd: Path
39
+ wiki: str | None = None
40
+ query: str | None = None
41
+ limit: int = 50
42
+
43
+ @field_validator("limit")
44
+ @classmethod
45
+ def non_negative_limit(cls, value: int) -> int:
46
+ if value < 0:
47
+ raise ValueError("limit must be non-negative")
48
+ return value
49
+
50
+
51
+ class ViewerFileRequest(CodeAlmanacModel):
52
+ cwd: Path
53
+ path: str
54
+ wiki: str | None = None
55
+ limit: int = 50
56
+
57
+ @field_validator("path")
58
+ @classmethod
59
+ def normalize_file_path(cls, value: str) -> str:
60
+ path = required_text(value, "file path")
61
+ normalized = normalize_reference_path_preserving_case(
62
+ path,
63
+ looks_like_dir(path),
64
+ )
65
+ if not normalized:
66
+ raise ValueError("file path must be repo-relative")
67
+ return normalized
68
+
69
+ @field_validator("limit")
70
+ @classmethod
71
+ def non_negative_limit(cls, value: int) -> int:
72
+ if value < 0:
73
+ raise ValueError("limit must be non-negative")
74
+ return value
75
+
76
+
77
+ class ViewerTopicRequest(CodeAlmanacModel):
78
+ cwd: Path
79
+ slug: str
80
+ wiki: str | None = None
81
+ include_descendants: bool = False
82
+
83
+ @field_validator("slug")
84
+ @classmethod
85
+ def require_slug(cls, value: str) -> str:
86
+ return required_text(value, "topic slug")
@@ -0,0 +1,211 @@
1
+ from pathlib import Path
2
+
3
+ from codealmanac.core.errors import NotFoundError
4
+ from codealmanac.core.slug import to_kebab_case
5
+ from codealmanac.services.index.models import PageView, SearchPageResult
6
+ from codealmanac.services.index.requests import SearchIndexRequest
7
+ from codealmanac.services.index.service import IndexService
8
+ from codealmanac.services.viewer.models import (
9
+ ViewerFile,
10
+ ViewerFileKind,
11
+ ViewerFileReference,
12
+ ViewerOverview,
13
+ ViewerPage,
14
+ ViewerPageSummary,
15
+ ViewerSearch,
16
+ ViewerTopic,
17
+ ViewerTopicSummary,
18
+ ViewerWorkspace,
19
+ )
20
+ from codealmanac.services.viewer.renderer import MarkdownRenderer
21
+ from codealmanac.services.viewer.requests import (
22
+ ViewerFileRequest,
23
+ ViewerOverviewRequest,
24
+ ViewerPageRequest,
25
+ ViewerSearchRequest,
26
+ ViewerTopicRequest,
27
+ )
28
+ from codealmanac.services.wiki.paths import looks_like_dir
29
+ from codealmanac.services.workspaces.models import Workspace
30
+ from codealmanac.services.workspaces.requests import SelectWorkspaceRequest
31
+ from codealmanac.services.workspaces.service import WorkspacesService
32
+
33
+
34
+ class ViewerService:
35
+ def __init__(
36
+ self,
37
+ workspaces: WorkspacesService,
38
+ index: IndexService,
39
+ renderer: MarkdownRenderer,
40
+ ):
41
+ self.workspaces = workspaces
42
+ self.index = index
43
+ self.renderer = renderer
44
+
45
+ def overview(self, request: ViewerOverviewRequest) -> ViewerOverview:
46
+ workspace = self.select_workspace(request.cwd, request.wiki)
47
+ summary = self.index.summary(workspace.workspace_id)
48
+ pages = self.index.search(
49
+ workspace.workspace_id,
50
+ SearchIndexRequest(limit=request.page_limit),
51
+ )
52
+ topics = self.index.list_topics(workspace.workspace_id)
53
+ return ViewerOverview(
54
+ workspace=viewer_workspace(workspace),
55
+ page_count=summary.pages,
56
+ topic_count=summary.topics,
57
+ pages=tuple(page_summary_from_search(page) for page in pages),
58
+ topics=tuple(
59
+ ViewerTopicSummary(
60
+ slug=topic.slug,
61
+ title=topic.title,
62
+ description=topic.description,
63
+ page_count=topic.page_count,
64
+ )
65
+ for topic in topics
66
+ ),
67
+ featured_page=self.get_featured_page(workspace),
68
+ )
69
+
70
+ def page(self, request: ViewerPageRequest) -> ViewerPage:
71
+ workspace = self.select_workspace(request.cwd, request.wiki)
72
+ page = self.get_page_or_raise(workspace, request.slug)
73
+ related_pages = self.related_pages(workspace, page)
74
+ return ViewerPage(
75
+ workspace=viewer_workspace(workspace),
76
+ slug=page.slug,
77
+ title=page.title,
78
+ summary=page.summary,
79
+ topics=page.topics,
80
+ body=page.body,
81
+ html=self.renderer.render(page.body),
82
+ backlinks=page.wikilinks_in,
83
+ outgoing_links=page.wikilinks_out,
84
+ file_refs=tuple(
85
+ ViewerFileReference(path=ref.path, is_dir=ref.is_dir)
86
+ for ref in page.file_refs
87
+ ),
88
+ related_pages=related_pages,
89
+ )
90
+
91
+ def search(self, request: ViewerSearchRequest) -> ViewerSearch:
92
+ workspace = self.select_workspace(request.cwd, request.wiki)
93
+ pages = self.index.search(
94
+ workspace.workspace_id,
95
+ SearchIndexRequest(query=request.query, limit=request.limit),
96
+ )
97
+ return ViewerSearch(
98
+ workspace=viewer_workspace(workspace),
99
+ query=request.query,
100
+ pages=tuple(page_summary_from_search(page) for page in pages),
101
+ )
102
+
103
+ def file(self, request: ViewerFileRequest) -> ViewerFile:
104
+ workspace = self.select_workspace(request.cwd, request.wiki)
105
+ pages = self.index.search(
106
+ workspace.workspace_id,
107
+ SearchIndexRequest(mentions=request.path, limit=request.limit),
108
+ )
109
+ kind = (
110
+ ViewerFileKind.DIRECTORY
111
+ if looks_like_dir(request.path)
112
+ else ViewerFileKind.FILE
113
+ )
114
+ return ViewerFile(
115
+ workspace=viewer_workspace(workspace),
116
+ path=request.path,
117
+ kind=kind,
118
+ pages=tuple(page_summary_from_search(page) for page in pages),
119
+ )
120
+
121
+ def topic(self, request: ViewerTopicRequest) -> ViewerTopic:
122
+ workspace = self.select_workspace(request.cwd, request.wiki)
123
+ slug = to_kebab_case(request.slug)
124
+ topic = self.index.get_topic(
125
+ workspace.workspace_id,
126
+ slug,
127
+ request.include_descendants,
128
+ )
129
+ if topic is None:
130
+ raise NotFoundError("topic", request.slug)
131
+ pages = tuple(
132
+ page_summary
133
+ for page_slug in topic.pages
134
+ if (page_summary := self.page_summary(workspace, page_slug)) is not None
135
+ )
136
+ return ViewerTopic(
137
+ workspace=viewer_workspace(workspace),
138
+ slug=topic.slug,
139
+ title=topic.title,
140
+ description=topic.description,
141
+ parents=topic.parents,
142
+ children=topic.children,
143
+ pages=pages,
144
+ )
145
+
146
+ def select_workspace(self, cwd: Path, wiki: str | None) -> Workspace:
147
+ if wiki is None:
148
+ return self.workspaces.resolve(cwd)
149
+ return self.workspaces.select(
150
+ SelectWorkspaceRequest(selector=wiki, base_path=cwd)
151
+ )
152
+
153
+ def get_page_or_raise(self, workspace: Workspace, slug: str) -> PageView:
154
+ normalized = to_kebab_case(slug)
155
+ page = self.index.get_page(workspace.workspace_id, normalized)
156
+ if page is None:
157
+ raise NotFoundError("page", slug)
158
+ return page
159
+
160
+ def page_summary(
161
+ self,
162
+ workspace: Workspace,
163
+ slug: str,
164
+ ) -> ViewerPageSummary | None:
165
+ page = self.index.get_page(workspace.workspace_id, slug)
166
+ if page is None:
167
+ return None
168
+ return page_summary_from_view(page)
169
+
170
+ def get_featured_page(self, workspace: Workspace) -> ViewerPageSummary | None:
171
+ return self.page_summary(workspace, "getting-started")
172
+
173
+ def related_pages(
174
+ self,
175
+ workspace: Workspace,
176
+ page: PageView,
177
+ ) -> tuple[ViewerPageSummary, ...]:
178
+ seen: set[str] = set()
179
+ related: list[ViewerPageSummary] = []
180
+ for slug in (*page.wikilinks_in, *page.wikilinks_out):
181
+ if slug in seen or slug == page.slug:
182
+ continue
183
+ seen.add(slug)
184
+ summary = self.page_summary(workspace, slug)
185
+ if summary is not None:
186
+ related.append(summary)
187
+ return tuple(related)
188
+
189
+
190
+ def viewer_workspace(workspace: Workspace) -> ViewerWorkspace:
191
+ return ViewerWorkspace(name=workspace.name, root_path=workspace.root_path)
192
+
193
+
194
+ def page_summary_from_search(page: SearchPageResult) -> ViewerPageSummary:
195
+ return ViewerPageSummary(
196
+ slug=page.slug,
197
+ title=page.title,
198
+ summary=page.summary,
199
+ topics=page.topics,
200
+ archived=page.archived_at is not None,
201
+ )
202
+
203
+
204
+ def page_summary_from_view(page: PageView) -> ViewerPageSummary:
205
+ return ViewerPageSummary(
206
+ slug=page.slug,
207
+ title=page.title,
208
+ summary=page.summary,
209
+ topics=page.topics,
210
+ archived=page.archived_at is not None,
211
+ )
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,83 @@
1
+ from hashlib import sha256
2
+ from pathlib import Path
3
+
4
+ from codealmanac.core.slug import to_kebab_case
5
+ from codealmanac.services.wiki.frontmatter import first_h1, parse_frontmatter
6
+ from codealmanac.services.wiki.models import (
7
+ CrossWikiLink,
8
+ FileLink,
9
+ FileReference,
10
+ FolderLink,
11
+ PageDocument,
12
+ PageLink,
13
+ )
14
+ from codealmanac.services.wiki.paths import (
15
+ looks_like_dir,
16
+ normalize_reference_path,
17
+ normalize_reference_path_preserving_case,
18
+ )
19
+ from codealmanac.services.wiki.wikilinks import extract_wikilinks
20
+
21
+
22
+ def load_page_document(page_path: Path, pages_path: Path) -> PageDocument | None:
23
+ raw = page_path.read_text(encoding="utf-8")
24
+ frontmatter = parse_frontmatter(raw)
25
+ relative_path = page_path.relative_to(pages_path).as_posix()
26
+ slug_source = frontmatter.page_id or page_path.stem
27
+ slug = to_kebab_case(slug_source)
28
+ if not slug:
29
+ return None
30
+
31
+ title = frontmatter.title or first_h1(frontmatter.body) or page_path.stem
32
+ file_refs = list(frontmatter_file_refs(frontmatter.files))
33
+ page_links: list[str] = []
34
+ cross_wiki_links: list[tuple[str, str]] = []
35
+
36
+ for link in extract_wikilinks(frontmatter.body):
37
+ if isinstance(link, PageLink):
38
+ page_links.append(link.target)
39
+ elif isinstance(link, FileLink | FolderLink):
40
+ file_refs.append(link.ref)
41
+ elif isinstance(link, CrossWikiLink):
42
+ cross_wiki_links.append((link.wiki, link.target))
43
+
44
+ return PageDocument(
45
+ slug=slug,
46
+ title=title,
47
+ summary=frontmatter.summary,
48
+ file_path=page_path,
49
+ relative_path=relative_path,
50
+ content_hash=sha256(raw.encode("utf-8")).hexdigest(),
51
+ updated_at=int(page_path.stat().st_mtime),
52
+ archived_at=frontmatter.archived_at,
53
+ superseded_by=frontmatter.superseded_by,
54
+ topics=tuple(to_kebab_case(topic) for topic in frontmatter.topics),
55
+ file_refs=dedupe_file_refs(file_refs),
56
+ page_links=tuple(sorted(set(page_links))),
57
+ cross_wiki_links=tuple(sorted(set(cross_wiki_links))),
58
+ body=frontmatter.body,
59
+ )
60
+
61
+
62
+ def frontmatter_file_refs(files: tuple[str, ...]) -> tuple[FileReference, ...]:
63
+ refs: list[FileReference] = []
64
+ for raw in files:
65
+ is_dir = looks_like_dir(raw)
66
+ normalized = normalize_reference_path(raw, is_dir)
67
+ original = normalize_reference_path_preserving_case(raw, is_dir)
68
+ if normalized:
69
+ refs.append(
70
+ FileReference(
71
+ path=normalized,
72
+ original_path=original,
73
+ is_dir=is_dir,
74
+ )
75
+ )
76
+ return tuple(refs)
77
+
78
+
79
+ def dedupe_file_refs(refs: list[FileReference]) -> tuple[FileReference, ...]:
80
+ unique: dict[tuple[str, bool], FileReference] = {}
81
+ for ref in refs:
82
+ unique[(ref.path, ref.is_dir)] = ref
83
+ return tuple(sorted(unique.values(), key=lambda ref: (ref.path, ref.is_dir)))
@@ -0,0 +1,94 @@
1
+ import re
2
+ from datetime import UTC, date, datetime
3
+ from typing import Any
4
+
5
+ import frontmatter
6
+ from pydantic import BaseModel, ConfigDict, ValidationError, field_validator
7
+ from yaml import YAMLError
8
+
9
+ from codealmanac.services.wiki.models import ParsedFrontmatter
10
+
11
+
12
+ def parse_frontmatter(raw: str) -> ParsedFrontmatter:
13
+ try:
14
+ post = frontmatter.loads(raw)
15
+ fields = FrontmatterFields.model_validate(post.metadata)
16
+ except (YAMLError, ValueError, ValidationError):
17
+ return ParsedFrontmatter(body=raw)
18
+ return ParsedFrontmatter(
19
+ page_id=fields.page_id,
20
+ title=fields.title,
21
+ summary=fields.summary,
22
+ topics=fields.topics,
23
+ files=fields.files,
24
+ archived_at=fields.archived_at,
25
+ superseded_by=fields.superseded_by,
26
+ body=post.content,
27
+ )
28
+
29
+
30
+ def strip_frontmatter(raw: str) -> str:
31
+ return parse_frontmatter(raw).body
32
+
33
+
34
+ def first_h1(body: str) -> str | None:
35
+ for line in body.splitlines()[:40]:
36
+ match = re.match(r"^#\s+(.+?)\s*#*\s*$", line)
37
+ if match is not None:
38
+ return match.group(1)
39
+ return None
40
+
41
+
42
+ class FrontmatterFields(BaseModel):
43
+ model_config = ConfigDict(extra="ignore", frozen=True)
44
+
45
+ page_id: str | None = None
46
+ title: str | None = None
47
+ summary: str | None = None
48
+ topics: tuple[str, ...] = ()
49
+ files: tuple[str, ...] = ()
50
+ archived_at: int | None = None
51
+ superseded_by: str | None = None
52
+
53
+ @field_validator("page_id", "title", "summary", "superseded_by", mode="before")
54
+ @classmethod
55
+ def optional_text(cls, value: Any) -> str | None:
56
+ if isinstance(value, str) and value.strip():
57
+ return value.strip()
58
+ return None
59
+
60
+ @field_validator("topics", "files", mode="before")
61
+ @classmethod
62
+ def text_tuple(cls, value: Any) -> tuple[str, ...]:
63
+ if not isinstance(value, list | tuple):
64
+ return ()
65
+ values: list[str] = []
66
+ for item in value:
67
+ if isinstance(item, str) and item.strip():
68
+ values.append(item.strip())
69
+ return tuple(values)
70
+
71
+ @field_validator("archived_at", mode="before")
72
+ @classmethod
73
+ def epoch_seconds(cls, value: Any) -> int | None:
74
+ if isinstance(value, datetime):
75
+ return timestamp_seconds(value)
76
+ if isinstance(value, date):
77
+ return timestamp_seconds(
78
+ datetime(value.year, value.month, value.day, tzinfo=UTC)
79
+ )
80
+ if isinstance(value, int | float):
81
+ return int(value)
82
+ if isinstance(value, str) and value.strip():
83
+ try:
84
+ parsed = datetime.fromisoformat(value.strip())
85
+ except ValueError:
86
+ return None
87
+ return timestamp_seconds(parsed)
88
+ return None
89
+
90
+
91
+ def timestamp_seconds(value: datetime) -> int:
92
+ if value.tzinfo is None:
93
+ value = value.replace(tzinfo=UTC)
94
+ return int(value.timestamp())
@@ -0,0 +1,142 @@
1
+ from collections.abc import Callable
2
+ from io import StringIO
3
+ from pathlib import Path
4
+ from uuid import uuid4
5
+
6
+ from ruamel.yaml import YAML
7
+ from ruamel.yaml.comments import CommentedMap, CommentedSeq
8
+
9
+ from codealmanac.core.errors import ValidationFailed
10
+ from codealmanac.core.models import CodeAlmanacModel
11
+ from codealmanac.core.slug import to_kebab_case
12
+
13
+
14
+ class PageTopicsRewrite(CodeAlmanacModel):
15
+ path: Path
16
+ topics: tuple[str, ...]
17
+
18
+
19
+ def rewrite_page_topics(path: Path, topics: tuple[str, ...]) -> None:
20
+ raw = path.read_bytes().decode("utf-8")
21
+ split = split_frontmatter(raw)
22
+ line_ending = "\r\n" if "\r\n" in split.frontmatter else "\n"
23
+ yaml = YAML(typ="rt")
24
+ yaml.preserve_quotes = True
25
+ if split.frontmatter.strip():
26
+ data = yaml.load(split.frontmatter) or CommentedMap()
27
+ if not isinstance(data, CommentedMap):
28
+ raise ValidationFailed(f"frontmatter must be a YAML mapping: {path}")
29
+ else:
30
+ data = CommentedMap()
31
+
32
+ apply_topics(data, topics)
33
+
34
+ output = StringIO()
35
+ yaml.dump(data, output)
36
+ frontmatter = output.getvalue().rstrip("\n")
37
+ if line_ending != "\n":
38
+ frontmatter = frontmatter.replace("\n", line_ending)
39
+ next_raw = f"---{line_ending}{frontmatter}{line_ending}---{line_ending}"
40
+ next_raw += split.body
41
+ temporary = path.with_name(f".{path.name}.{uuid4().hex}.tmp")
42
+ temporary.write_bytes(next_raw.encode("utf-8"))
43
+ temporary.replace(path)
44
+
45
+
46
+ def plan_page_topic_rewrites(
47
+ pages_path: Path,
48
+ transform: Callable[[tuple[str, ...]], tuple[str, ...]],
49
+ ) -> tuple[PageTopicsRewrite, ...]:
50
+ if not pages_path.is_dir():
51
+ return ()
52
+ rewrites: list[PageTopicsRewrite] = []
53
+ for page_path in sorted(pages_path.rglob("*.md")):
54
+ before = read_page_topics(page_path)
55
+ after = canonical_topic_tuple(transform(before))
56
+ if after != before:
57
+ rewrites.append(PageTopicsRewrite(path=page_path, topics=after))
58
+ return tuple(rewrites)
59
+
60
+
61
+ def apply_page_topic_rewrites(rewrites: tuple[PageTopicsRewrite, ...]) -> int:
62
+ for rewrite in rewrites:
63
+ rewrite_page_topics(rewrite.path, rewrite.topics)
64
+ return len(rewrites)
65
+
66
+
67
+ def read_page_topics(path: Path) -> tuple[str, ...]:
68
+ raw = path.read_bytes().decode("utf-8")
69
+ split = split_frontmatter(raw)
70
+ if not split.frontmatter.strip():
71
+ return ()
72
+ yaml = YAML(typ="rt")
73
+ yaml.preserve_quotes = True
74
+ try:
75
+ data = yaml.load(split.frontmatter) or CommentedMap()
76
+ except Exception as error:
77
+ raise ValidationFailed(f"invalid frontmatter: {path}") from error
78
+ if not isinstance(data, CommentedMap):
79
+ raise ValidationFailed(f"frontmatter must be a YAML mapping: {path}")
80
+ return canonical_topic_tuple(tuple(str(item) for item in topic_sequence(data)))
81
+
82
+
83
+ class FrontmatterSplit:
84
+ def __init__(self, frontmatter: str, body: str):
85
+ self.frontmatter = frontmatter
86
+ self.body = body
87
+
88
+
89
+ def split_frontmatter(raw: str) -> FrontmatterSplit:
90
+ if raw.startswith("---\r\n"):
91
+ return split_with_delimiter(raw, "\r\n")
92
+ if raw.startswith("---\n"):
93
+ return split_with_delimiter(raw, "\n")
94
+ return FrontmatterSplit(frontmatter="", body=raw)
95
+
96
+
97
+ def split_with_delimiter(raw: str, line_ending: str) -> FrontmatterSplit:
98
+ opener = f"---{line_ending}"
99
+ closer = f"{line_ending}---"
100
+ end = raw.find(closer, len(opener))
101
+ if end == -1:
102
+ return FrontmatterSplit(frontmatter="", body=raw)
103
+ frontmatter = raw[len(opener) : end]
104
+ body_start = end + len(closer)
105
+ if raw.startswith(line_ending, body_start):
106
+ body_start += len(line_ending)
107
+ body = raw[body_start:]
108
+ return FrontmatterSplit(frontmatter=frontmatter, body=body)
109
+
110
+
111
+ def apply_topics(data: CommentedMap, topics: tuple[str, ...]) -> None:
112
+ sequence = topic_sequence(data)
113
+ desired = list(topics)
114
+ desired_set = set(desired)
115
+ seen: set[str] = set()
116
+ for index in range(len(sequence) - 1, -1, -1):
117
+ raw_topic = str(sequence[index])
118
+ if raw_topic not in desired_set or raw_topic in seen:
119
+ del sequence[index]
120
+ continue
121
+ sequence[index] = raw_topic
122
+ seen.add(raw_topic)
123
+ for topic in desired:
124
+ if topic not in seen:
125
+ sequence.append(topic)
126
+ seen.add(topic)
127
+ data["topics"] = sequence
128
+
129
+
130
+ def topic_sequence(data: CommentedMap) -> CommentedSeq:
131
+ existing = data.get("topics")
132
+ if isinstance(existing, CommentedSeq):
133
+ return existing
134
+ sequence = CommentedSeq()
135
+ if isinstance(existing, list):
136
+ sequence.extend(str(item) for item in existing)
137
+ return sequence
138
+
139
+
140
+ def canonical_topic_tuple(topics: tuple[str, ...]) -> tuple[str, ...]:
141
+ canonical = tuple(to_kebab_case(str(topic)) for topic in topics)
142
+ return tuple(topic for topic in dict.fromkeys(canonical) if topic)
@@ -0,0 +1,69 @@
1
+ from enum import StrEnum
2
+ from pathlib import Path
3
+
4
+ from codealmanac.core.models import CodeAlmanacModel
5
+
6
+
7
+ class WikilinkKind(StrEnum):
8
+ PAGE = "page"
9
+ FILE = "file"
10
+ FOLDER = "folder"
11
+ CROSS_WIKI = "xwiki"
12
+
13
+
14
+ class FileReference(CodeAlmanacModel):
15
+ path: str
16
+ original_path: str
17
+ is_dir: bool
18
+
19
+
20
+ class PageLink(CodeAlmanacModel):
21
+ kind: WikilinkKind
22
+ target: str
23
+
24
+
25
+ class FileLink(CodeAlmanacModel):
26
+ kind: WikilinkKind
27
+ ref: FileReference
28
+
29
+
30
+ class FolderLink(CodeAlmanacModel):
31
+ kind: WikilinkKind
32
+ ref: FileReference
33
+
34
+
35
+ class CrossWikiLink(CodeAlmanacModel):
36
+ kind: WikilinkKind
37
+ wiki: str
38
+ target: str
39
+
40
+
41
+ Wikilink = PageLink | FileLink | FolderLink | CrossWikiLink
42
+
43
+
44
+ class ParsedFrontmatter(CodeAlmanacModel):
45
+ page_id: str | None = None
46
+ title: str | None = None
47
+ summary: str | None = None
48
+ topics: tuple[str, ...] = ()
49
+ files: tuple[str, ...] = ()
50
+ archived_at: int | None = None
51
+ superseded_by: str | None = None
52
+ body: str
53
+
54
+
55
+ class PageDocument(CodeAlmanacModel):
56
+ slug: str
57
+ title: str
58
+ summary: str | None
59
+ file_path: Path
60
+ relative_path: str
61
+ content_hash: str
62
+ updated_at: int
63
+ archived_at: int | None
64
+ superseded_by: str | None
65
+ topics: tuple[str, ...]
66
+ file_refs: tuple[FileReference, ...]
67
+ page_links: tuple[str, ...]
68
+ cross_wiki_links: tuple[tuple[str, str], ...]
69
+ body: str