diary-docs 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diary/__init__.py +1 -0
- diary/__main__.py +3 -0
- diary/aimb/__init__.py +48 -0
- diary/aimb/hasher.py +157 -0
- diary/aimb/merge.py +252 -0
- diary/aimb/parser.py +202 -0
- diary/cli.py +999 -0
- diary/git_utils.py +202 -0
- diary/indexer/__init__.py +44 -0
- diary/indexer/database.py +340 -0
- diary/indexer/extractors.py +468 -0
- diary/indexer/gitignore.py +62 -0
- diary/indexer/indexer.py +511 -0
- diary/indexer/reporter.py +137 -0
- diary/indexer/scanner.py +65 -0
- diary/sync/__init__.py +33 -0
- diary/sync/detector.py +405 -0
- diary/sync/engine.py +404 -0
- diary/sync/protocol.py +176 -0
- diary/templates.py +102 -0
- diary_docs-0.1.0.dist-info/METADATA +228 -0
- diary_docs-0.1.0.dist-info/RECORD +26 -0
- diary_docs-0.1.0.dist-info/WHEEL +5 -0
- diary_docs-0.1.0.dist-info/entry_points.txt +2 -0
- diary_docs-0.1.0.dist-info/licenses/LICENSE +21 -0
- diary_docs-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Regex-based symbol extractors for 10 programming/markup languages.
|
|
3
|
+
|
|
4
|
+
Limitations (by design):
|
|
5
|
+
- No syntax validation — purely regex match on lines.
|
|
6
|
+
- No brace/scope depth tracking — parent assignment uses heuristics
|
|
7
|
+
(indentation or closest preceding declaration).
|
|
8
|
+
- Multi-line declarations (e.g. TypeScript generics spanning several lines)
|
|
9
|
+
are matched on the starting line only.
|
|
10
|
+
- Nested classes/record-inside-method in brace languages are not tracked
|
|
11
|
+
— only the first-level nesting is captured for methods inside types.
|
|
12
|
+
|
|
13
|
+
Language support:
|
|
14
|
+
.py — Python
|
|
15
|
+
.ts — TypeScript
|
|
16
|
+
.js — JavaScript
|
|
17
|
+
.php — PHP
|
|
18
|
+
.java — Java
|
|
19
|
+
.go — Go
|
|
20
|
+
.cs — C#
|
|
21
|
+
.yaml / .yml — YAML
|
|
22
|
+
.json — JSON
|
|
23
|
+
.md — Markdown
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import json
|
|
27
|
+
import re
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
|
|
30
|
+
# ── Type definitions ──────────────────────────────────────────────────
|
|
31
|
+
# Each symbol dict:
|
|
32
|
+
# name – identifier name
|
|
33
|
+
# type – 'class' | 'function' | 'interface' | 'struct' | 'enum'
|
|
34
|
+
# | 'trait' | 'record' | 'heading' | 'key'
|
|
35
|
+
# line – 1-based line number
|
|
36
|
+
# parent – name of enclosing type (or None)
|
|
37
|
+
# namespace – qualified scope (future use; currently empty)
|
|
38
|
+
# signature – raw declaration line(s) (currently just the matched line)
|
|
39
|
+
# end_line – same as line (no block-size inference)
|
|
40
|
+
|
|
41
|
+
# ── Helpers ────────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
def _find_parent(lines, type_decls, method_line, method_indent):
|
|
44
|
+
"""
|
|
45
|
+
Find the most recent type-declaration whose indentation is *strictly*
|
|
46
|
+
less than *method_indent* and whose line precedes *method_line*.
|
|
47
|
+
Returns the type name or None.
|
|
48
|
+
"""
|
|
49
|
+
parent = None
|
|
50
|
+
for name, _line, _indent in reversed(type_decls):
|
|
51
|
+
if _line < method_line and _indent < method_indent:
|
|
52
|
+
parent = name
|
|
53
|
+
break
|
|
54
|
+
return parent
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _make_symbol(name, typ, line, parent=None, namespace=""):
|
|
58
|
+
return {
|
|
59
|
+
"name": name,
|
|
60
|
+
"type": typ,
|
|
61
|
+
"line": line,
|
|
62
|
+
"parent": parent,
|
|
63
|
+
"namespace": namespace,
|
|
64
|
+
"signature": "",
|
|
65
|
+
"end_line": line,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ── Language handlers ──────────────────────────────────────────────────
|
|
70
|
+
|
|
71
|
+
def _extract_python(content: str) -> list[dict]:
|
|
72
|
+
lines = content.split("\n")
|
|
73
|
+
classes: list[tuple[str, int, int]] = [] # (name, line, indent)
|
|
74
|
+
symbols: list[dict] = []
|
|
75
|
+
|
|
76
|
+
for i, line in enumerate(lines):
|
|
77
|
+
stripped = line.lstrip()
|
|
78
|
+
if not stripped or stripped.startswith("#"):
|
|
79
|
+
continue
|
|
80
|
+
indent = len(line) - len(stripped)
|
|
81
|
+
|
|
82
|
+
cm = re.match(r"class\s+(\w+)", stripped)
|
|
83
|
+
if cm:
|
|
84
|
+
classes.append((cm.group(1), i + 1, indent))
|
|
85
|
+
symbols.append(_make_symbol(cm.group(1), "class", i + 1))
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
fm = re.match(r"(?:async\s+)?def\s+(\w+)", stripped)
|
|
89
|
+
if fm:
|
|
90
|
+
name = fm.group(1)
|
|
91
|
+
line_no = i + 1
|
|
92
|
+
parent = _find_parent(lines, classes, line_no, indent)
|
|
93
|
+
symbols.append(_make_symbol(name, "function", line_no, parent))
|
|
94
|
+
|
|
95
|
+
return symbols
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _extract_typescript(content: str) -> list[dict]:
|
|
99
|
+
lines = content.split("\n")
|
|
100
|
+
type_decls: list[tuple[str, int, int]] = []
|
|
101
|
+
symbols: list[dict] = []
|
|
102
|
+
|
|
103
|
+
type_pattern = re.compile(
|
|
104
|
+
r"(?:export\s+)?(?:default\s+)?(?:abstract\s+)?"
|
|
105
|
+
r"(class|interface|type|enum)\s+(\w+)"
|
|
106
|
+
)
|
|
107
|
+
func_pattern = re.compile(
|
|
108
|
+
r"(?:export\s+)?(?:default\s+)?"
|
|
109
|
+
r"(?:async\s+)?function\s+(\w+)"
|
|
110
|
+
)
|
|
111
|
+
# Arrow-function shorthand: `const foo = (...) =>` or `let foo = (...) =>`
|
|
112
|
+
arrow_pattern = re.compile(r"(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s*)?\(")
|
|
113
|
+
|
|
114
|
+
for i, line in enumerate(lines):
|
|
115
|
+
stripped = line.strip()
|
|
116
|
+
if not stripped or stripped.startswith("//") or stripped.startswith("/*"):
|
|
117
|
+
continue
|
|
118
|
+
indent = len(line) - len(line.lstrip())
|
|
119
|
+
|
|
120
|
+
tm = type_pattern.search(stripped)
|
|
121
|
+
if tm:
|
|
122
|
+
kind, name = tm.group(1), tm.group(2)
|
|
123
|
+
type_decls.append((name, i + 1, indent))
|
|
124
|
+
symbols.append(_make_symbol(name, kind, i + 1))
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
fm = func_pattern.search(stripped)
|
|
128
|
+
if fm:
|
|
129
|
+
name = fm.group(1)
|
|
130
|
+
line_no = i + 1
|
|
131
|
+
parent = _find_parent(lines, type_decls, line_no, indent)
|
|
132
|
+
symbols.append(_make_symbol(name, "function", line_no, parent))
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
am = arrow_pattern.match(stripped)
|
|
136
|
+
if am:
|
|
137
|
+
name = am.group(1)
|
|
138
|
+
line_no = i + 1
|
|
139
|
+
parent = _find_parent(lines, type_decls, line_no, indent)
|
|
140
|
+
symbols.append(_make_symbol(name, "function", line_no, parent))
|
|
141
|
+
|
|
142
|
+
return symbols
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _extract_javascript(content: str) -> list[dict]:
|
|
146
|
+
lines = content.split("\n")
|
|
147
|
+
type_decls: list[tuple[str, int, int]] = []
|
|
148
|
+
symbols: list[dict] = []
|
|
149
|
+
|
|
150
|
+
class_pattern = re.compile(r"class\s+(\w+)")
|
|
151
|
+
func_pattern = re.compile(
|
|
152
|
+
r"(?:async\s+)?function\s+(?:\*\s+)?(\w+)"
|
|
153
|
+
)
|
|
154
|
+
arrow_pattern = re.compile(
|
|
155
|
+
r"(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s*)?(?:\(|function)"
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
for i, line in enumerate(lines):
|
|
159
|
+
stripped = line.strip()
|
|
160
|
+
if not stripped or stripped.startswith("//") or stripped.startswith("/*"):
|
|
161
|
+
continue
|
|
162
|
+
indent = len(line) - len(line.lstrip())
|
|
163
|
+
|
|
164
|
+
cm = class_pattern.search(stripped)
|
|
165
|
+
if cm:
|
|
166
|
+
name = cm.group(1)
|
|
167
|
+
type_decls.append((name, i + 1, indent))
|
|
168
|
+
symbols.append(_make_symbol(name, "class", i + 1))
|
|
169
|
+
continue
|
|
170
|
+
|
|
171
|
+
fm = func_pattern.search(stripped)
|
|
172
|
+
if fm:
|
|
173
|
+
name = fm.group(1)
|
|
174
|
+
line_no = i + 1
|
|
175
|
+
parent = _find_parent(lines, type_decls, line_no, indent)
|
|
176
|
+
symbols.append(_make_symbol(name, "function", line_no, parent))
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
am = arrow_pattern.match(stripped)
|
|
180
|
+
if am:
|
|
181
|
+
name = am.group(1)
|
|
182
|
+
line_no = i + 1
|
|
183
|
+
parent = _find_parent(lines, type_decls, line_no, indent)
|
|
184
|
+
symbols.append(_make_symbol(name, "function", line_no, parent))
|
|
185
|
+
|
|
186
|
+
return symbols
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _extract_php(content: str) -> list[dict]:
|
|
190
|
+
lines = content.split("\n")
|
|
191
|
+
type_decls: list[tuple[str, int, int]] = []
|
|
192
|
+
symbols: list[dict] = []
|
|
193
|
+
|
|
194
|
+
type_pattern = re.compile(
|
|
195
|
+
r"(?:abstract\s+)?(class|interface|trait)\s+(\w+)"
|
|
196
|
+
)
|
|
197
|
+
func_pattern = re.compile(
|
|
198
|
+
r"(?:public|private|protected)?\s*(?:static\s+)?function\s+(\w+)"
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
for i, line in enumerate(lines):
|
|
202
|
+
stripped = line.strip()
|
|
203
|
+
if not stripped or stripped.startswith("//") or stripped.startswith("#"):
|
|
204
|
+
continue
|
|
205
|
+
if stripped.startswith("<?") or stripped.startswith("//") or stripped.startswith("/*"):
|
|
206
|
+
continue
|
|
207
|
+
indent = len(line) - len(line.lstrip())
|
|
208
|
+
|
|
209
|
+
tm = type_pattern.search(stripped)
|
|
210
|
+
if tm:
|
|
211
|
+
kind, name = tm.group(1), tm.group(2)
|
|
212
|
+
type_decls.append((name, i + 1, indent))
|
|
213
|
+
symbols.append(_make_symbol(name, kind, i + 1))
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
fm = func_pattern.search(stripped)
|
|
217
|
+
if fm:
|
|
218
|
+
name = fm.group(1)
|
|
219
|
+
line_no = i + 1
|
|
220
|
+
parent = _find_parent(lines, type_decls, line_no, indent)
|
|
221
|
+
symbols.append(_make_symbol(name, "function", line_no, parent))
|
|
222
|
+
|
|
223
|
+
return symbols
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _extract_java(content: str) -> list[dict]:
|
|
227
|
+
lines = content.split("\n")
|
|
228
|
+
type_decls: list[tuple[str, int, int]] = []
|
|
229
|
+
symbols: list[dict] = []
|
|
230
|
+
|
|
231
|
+
type_pattern = re.compile(
|
|
232
|
+
r"(?:public|private|protected)?\s*"
|
|
233
|
+
r"(?:abstract|final|static\s+)?"
|
|
234
|
+
r"(class|interface|enum)\s+(\w+)"
|
|
235
|
+
)
|
|
236
|
+
# Java methods: optional modifiers + return-type + name + (
|
|
237
|
+
# Use a broad pattern and exclude non-method lines
|
|
238
|
+
method_pattern = re.compile(
|
|
239
|
+
r"(?:public|private|protected)?\s*"
|
|
240
|
+
r"(?:static|final|abstract|synchronized|native|transient|volatile)?\s*"
|
|
241
|
+
r"(?:<[^>]*>\s*)?" # generics before return type
|
|
242
|
+
r"\w+(?:<[^>]*>)?(?:\[\])?\s+" # return type (simplified)
|
|
243
|
+
r"(\w+)\s*\("
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Keywords that look like method definitions but aren't
|
|
247
|
+
non_method = {"if", "for", "while", "switch", "catch", "class", "interface", "enum", "return", "new", "this", "super", "throws"}
|
|
248
|
+
|
|
249
|
+
for i, line in enumerate(lines):
|
|
250
|
+
stripped = line.strip()
|
|
251
|
+
if not stripped or stripped.startswith("//") or stripped.startswith("/*") or stripped.startswith("*"):
|
|
252
|
+
continue
|
|
253
|
+
if stripped.startswith("import") or stripped.startswith("package"):
|
|
254
|
+
continue
|
|
255
|
+
indent = len(line) - len(line.lstrip())
|
|
256
|
+
|
|
257
|
+
tm = type_pattern.search(stripped)
|
|
258
|
+
if tm:
|
|
259
|
+
kind, name = tm.group(1), tm.group(2)
|
|
260
|
+
type_decls.append((name, i + 1, indent))
|
|
261
|
+
symbols.append(_make_symbol(name, kind, i + 1))
|
|
262
|
+
continue
|
|
263
|
+
|
|
264
|
+
# Check for annotations
|
|
265
|
+
if stripped.startswith("@"):
|
|
266
|
+
continue
|
|
267
|
+
|
|
268
|
+
mm = method_pattern.search(stripped)
|
|
269
|
+
if mm and mm.group(1) not in non_method:
|
|
270
|
+
name = mm.group(1)
|
|
271
|
+
line_no = i + 1
|
|
272
|
+
parent = _find_parent(lines, type_decls, line_no, indent)
|
|
273
|
+
symbols.append(_make_symbol(name, "method", line_no, parent))
|
|
274
|
+
|
|
275
|
+
return symbols
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _extract_go(content: str) -> list[dict]:
|
|
279
|
+
lines = content.split("\n")
|
|
280
|
+
type_decls: list[tuple[str, int, int]] = []
|
|
281
|
+
symbols: list[dict] = []
|
|
282
|
+
|
|
283
|
+
struct_pattern = re.compile(r"type\s+(\w+)\s+struct")
|
|
284
|
+
iface_pattern = re.compile(r"type\s+(\w+)\s+interface")
|
|
285
|
+
# Top-level func: func Name(...)
|
|
286
|
+
func_pattern = re.compile(r"func\s+(\w+)\s*\(")
|
|
287
|
+
# Method on type: func (recv *Type|Type) Name(...)
|
|
288
|
+
method_pattern = re.compile(r"func\s+\([\w\s\*\[\]]+\)\s+(\w+)\s*\(")
|
|
289
|
+
|
|
290
|
+
for i, line in enumerate(lines):
|
|
291
|
+
stripped = line.strip()
|
|
292
|
+
if not stripped or stripped.startswith("//") or stripped.startswith("/*"):
|
|
293
|
+
continue
|
|
294
|
+
indent = len(line) - len(line.lstrip())
|
|
295
|
+
|
|
296
|
+
sm = struct_pattern.search(stripped)
|
|
297
|
+
if sm:
|
|
298
|
+
name = sm.group(1)
|
|
299
|
+
type_decls.append((name, i + 1, indent))
|
|
300
|
+
symbols.append(_make_symbol(name, "struct", i + 1))
|
|
301
|
+
continue
|
|
302
|
+
|
|
303
|
+
im = iface_pattern.search(stripped)
|
|
304
|
+
if im:
|
|
305
|
+
name = im.group(1)
|
|
306
|
+
type_decls.append((name, i + 1, indent))
|
|
307
|
+
symbols.append(_make_symbol(name, "interface", i + 1))
|
|
308
|
+
continue
|
|
309
|
+
|
|
310
|
+
mm = method_pattern.search(stripped)
|
|
311
|
+
if mm:
|
|
312
|
+
name = mm.group(1)
|
|
313
|
+
line_no = i + 1
|
|
314
|
+
parent = _find_parent(lines, type_decls, line_no, indent)
|
|
315
|
+
symbols.append(_make_symbol(name, "function", line_no, parent))
|
|
316
|
+
continue
|
|
317
|
+
|
|
318
|
+
fm = func_pattern.search(stripped)
|
|
319
|
+
if fm:
|
|
320
|
+
name = fm.group(1)
|
|
321
|
+
# Skip if it's actually a method (already matched above)
|
|
322
|
+
if not stripped.strip().startswith("func ("):
|
|
323
|
+
line_no = i + 1
|
|
324
|
+
parent = _find_parent(lines, type_decls, line_no, indent)
|
|
325
|
+
symbols.append(_make_symbol(name, "function", line_no, parent))
|
|
326
|
+
|
|
327
|
+
return symbols
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _extract_csharp(content: str) -> list[dict]:
|
|
331
|
+
lines = content.split("\n")
|
|
332
|
+
type_decls: list[tuple[str, int, int]] = []
|
|
333
|
+
symbols: list[dict] = []
|
|
334
|
+
|
|
335
|
+
type_pattern = re.compile(
|
|
336
|
+
r"(?:public|private|protected|internal)?\s*"
|
|
337
|
+
r"(?:abstract|sealed|static|partial|readonly)?\s*"
|
|
338
|
+
r"(class|interface|struct|enum|record)\s+(\w+)"
|
|
339
|
+
)
|
|
340
|
+
method_pattern = re.compile(
|
|
341
|
+
r"(?:public|private|protected|internal)?\s*"
|
|
342
|
+
r"(?:static|virtual|override|abstract|sealed|async|unsafe|new|partial)?\s*"
|
|
343
|
+
r"\w+(?:<[^>]*>)?(?:\[\])?\s+" # return type (simplified)
|
|
344
|
+
r"(\w+)\s*\("
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
non_method = {"if", "for", "while", "switch", "catch", "class", "interface",
|
|
348
|
+
"struct", "enum", "record", "return", "new", "this", "base",
|
|
349
|
+
"sizeof", "typeof", "nameof", "throw", "yield", "using"}
|
|
350
|
+
|
|
351
|
+
for i, line in enumerate(lines):
|
|
352
|
+
stripped = line.strip()
|
|
353
|
+
if not stripped or stripped.startswith("//") or stripped.startswith("/*") or stripped.startswith("*"):
|
|
354
|
+
continue
|
|
355
|
+
if stripped.startswith("using ") or stripped.startswith("namespace ") or stripped.startswith("#"):
|
|
356
|
+
continue
|
|
357
|
+
indent = len(line) - len(line.lstrip())
|
|
358
|
+
|
|
359
|
+
# Skip attributes
|
|
360
|
+
if stripped.startswith("["):
|
|
361
|
+
continue
|
|
362
|
+
|
|
363
|
+
tm = type_pattern.search(stripped)
|
|
364
|
+
if tm:
|
|
365
|
+
kind, name = tm.group(1), tm.group(2)
|
|
366
|
+
type_decls.append((name, i + 1, indent))
|
|
367
|
+
symbols.append(_make_symbol(name, kind, i + 1))
|
|
368
|
+
continue
|
|
369
|
+
|
|
370
|
+
mm = method_pattern.search(stripped)
|
|
371
|
+
if mm and mm.group(1) not in non_method:
|
|
372
|
+
name = mm.group(1)
|
|
373
|
+
line_no = i + 1
|
|
374
|
+
parent = _find_parent(lines, type_decls, line_no, indent)
|
|
375
|
+
symbols.append(_make_symbol(name, "method", line_no, parent))
|
|
376
|
+
|
|
377
|
+
return symbols
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def _extract_yaml(content: str) -> list[dict]:
|
|
381
|
+
symbols: list[dict] = []
|
|
382
|
+
for i, line in enumerate(content.split("\n")):
|
|
383
|
+
# Top-level key: no leading whitespace, not a comment, not empty
|
|
384
|
+
if line and not line.startswith(" ") and not line.startswith("\t") and not line.startswith("#"):
|
|
385
|
+
m = re.match(r"(\w[\w_-]*)\s*:", line)
|
|
386
|
+
if m:
|
|
387
|
+
symbols.append(_make_symbol(m.group(1), "key", i + 1))
|
|
388
|
+
return symbols
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def _extract_json(content: str) -> list[dict]:
|
|
392
|
+
try:
|
|
393
|
+
data = json.loads(content)
|
|
394
|
+
except json.JSONDecodeError:
|
|
395
|
+
return []
|
|
396
|
+
if not isinstance(data, dict):
|
|
397
|
+
return []
|
|
398
|
+
symbols: list[dict] = []
|
|
399
|
+
# Find each key in the source to get its line number.
|
|
400
|
+
for m in re.finditer(r'"([^"]+)"\s*:', content):
|
|
401
|
+
key = m.group(1)
|
|
402
|
+
if key in data:
|
|
403
|
+
line_no = content[: m.start()].count("\n") + 1
|
|
404
|
+
symbols.append(_make_symbol(key, "key", line_no))
|
|
405
|
+
return symbols
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def _extract_markdown(content: str) -> list[dict]:
|
|
409
|
+
symbols: list[dict] = []
|
|
410
|
+
pattern = re.compile(r"^(#{1,6})\s+(.+)", re.MULTILINE)
|
|
411
|
+
for m in pattern.finditer(content):
|
|
412
|
+
level = len(m.group(1))
|
|
413
|
+
text = m.group(2).strip()
|
|
414
|
+
# Calculate 1-based line number
|
|
415
|
+
line_no = content[: m.start()].count("\n") + 1
|
|
416
|
+
symbols.append(_make_symbol(text, "heading", line_no))
|
|
417
|
+
return symbols
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
# ── Extension map ─────────────────────────────────────────────────────
|
|
421
|
+
EXTENSION_MAP: dict[str, callable] = {
|
|
422
|
+
".py": _extract_python,
|
|
423
|
+
".ts": _extract_typescript,
|
|
424
|
+
".tsx": _extract_typescript,
|
|
425
|
+
".js": _extract_javascript,
|
|
426
|
+
".jsx": _extract_javascript,
|
|
427
|
+
".php": _extract_php,
|
|
428
|
+
".java": _extract_java,
|
|
429
|
+
".go": _extract_go,
|
|
430
|
+
".cs": _extract_csharp,
|
|
431
|
+
".yaml": _extract_yaml,
|
|
432
|
+
".yml": _extract_yaml,
|
|
433
|
+
".json": _extract_json,
|
|
434
|
+
".md": _extract_markdown,
|
|
435
|
+
".mdx": _extract_markdown,
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
# ── Public API ─────────────────────────────────────────────────────────
|
|
440
|
+
|
|
441
|
+
def extract_symbols(file_path: Path, content: str) -> list[dict]:
|
|
442
|
+
"""
|
|
443
|
+
Extract symbol declarations from *content* based on the file extension.
|
|
444
|
+
|
|
445
|
+
Parameters
|
|
446
|
+
----------
|
|
447
|
+
file_path : Path
|
|
448
|
+
Path to the source file (used only for its extension).
|
|
449
|
+
content : str
|
|
450
|
+
Raw file content (may be empty).
|
|
451
|
+
|
|
452
|
+
Returns
|
|
453
|
+
-------
|
|
454
|
+
list[dict]
|
|
455
|
+
Each dict has keys: name, type, line, parent, namespace,
|
|
456
|
+
signature, end_line.
|
|
457
|
+
Returns an empty list for unsupported or missing extensions
|
|
458
|
+
and for empty content.
|
|
459
|
+
"""
|
|
460
|
+
if not content.strip():
|
|
461
|
+
return []
|
|
462
|
+
|
|
463
|
+
ext = file_path.suffix.lower()
|
|
464
|
+
handler = EXTENSION_MAP.get(ext)
|
|
465
|
+
if handler is None:
|
|
466
|
+
return []
|
|
467
|
+
|
|
468
|
+
return handler(content)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""gitignore — manage .gitignore entries for the knowledge index directory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
OLD_ENTRY = "docs/.index/"
|
|
9
|
+
ENTRY = "docs/.index/knowledge-*.db"
|
|
10
|
+
HEADER = "# DIARY Knowledge Index"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def ensure_gitignore(root_path: Path) -> bool:
|
|
14
|
+
"""Ensure ``docs/.index/knowledge-*.db`` is listed in ``.gitignore`` at *root_path*.
|
|
15
|
+
|
|
16
|
+
If the old entry ``docs/.index/`` exists, it is replaced with the new
|
|
17
|
+
wildcard pattern ``docs/.index/knowledge-*.db``.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
root_path : Path
|
|
22
|
+
Repository root directory that should contain a ``.gitignore``.
|
|
23
|
+
|
|
24
|
+
Returns
|
|
25
|
+
-------
|
|
26
|
+
bool
|
|
27
|
+
``True`` if the file was created or modified, ``False`` if unchanged.
|
|
28
|
+
"""
|
|
29
|
+
gitignore_path = root_path / ".gitignore"
|
|
30
|
+
|
|
31
|
+
if not gitignore_path.exists():
|
|
32
|
+
gitignore_path.write_text(f"{HEADER}\n{ENTRY}\n")
|
|
33
|
+
return True
|
|
34
|
+
|
|
35
|
+
# Read-modify-write: parse existing content
|
|
36
|
+
original = gitignore_path.read_text()
|
|
37
|
+
lines = original.split("\n")
|
|
38
|
+
|
|
39
|
+
# Check if the new entry is already present (strip each line for robustness)
|
|
40
|
+
if any(line.strip() == ENTRY for line in lines):
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
new_lines: list[str] = []
|
|
44
|
+
modified = False
|
|
45
|
+
for line in lines:
|
|
46
|
+
if line.strip() == OLD_ENTRY:
|
|
47
|
+
new_lines.append(ENTRY)
|
|
48
|
+
modified = True
|
|
49
|
+
else:
|
|
50
|
+
new_lines.append(line)
|
|
51
|
+
|
|
52
|
+
if modified:
|
|
53
|
+
gitignore_path.write_text("\n".join(new_lines))
|
|
54
|
+
return True
|
|
55
|
+
|
|
56
|
+
# Append — ensure a trailing newline first so we don't graft onto the
|
|
57
|
+
# last line of an existing file that lacks a final newline.
|
|
58
|
+
if not original.endswith("\n"):
|
|
59
|
+
original += "\n"
|
|
60
|
+
|
|
61
|
+
gitignore_path.write_text(f"{original}{HEADER}\n{ENTRY}\n")
|
|
62
|
+
return True
|