diary-docs 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,468 @@
1
+ """
2
+ Regex-based symbol extractors for 10 programming/markup languages.
3
+
4
+ Limitations (by design):
5
+ - No syntax validation — purely regex match on lines.
6
+ - No brace/scope depth tracking — parent assignment uses heuristics
7
+ (indentation or closest preceding declaration).
8
+ - Multi-line declarations (e.g. TypeScript generics spanning several lines)
9
+ are matched on the starting line only.
10
+ - Nested classes/record-inside-method in brace languages are not tracked
11
+ — only the first-level nesting is captured for methods inside types.
12
+
13
+ Language support:
14
+ .py — Python
15
+ .ts — TypeScript
16
+ .js — JavaScript
17
+ .php — PHP
18
+ .java — Java
19
+ .go — Go
20
+ .cs — C#
21
+ .yaml / .yml — YAML
22
+ .json — JSON
23
+ .md — Markdown
24
+ """
25
+
26
+ import json
27
+ import re
28
+ from pathlib import Path
29
+
30
+ # ── Type definitions ──────────────────────────────────────────────────
31
+ # Each symbol dict:
32
+ # name – identifier name
33
+ # type – 'class' | 'function' | 'interface' | 'struct' | 'enum'
34
+ # | 'trait' | 'record' | 'heading' | 'key'
35
+ # line – 1-based line number
36
+ # parent – name of enclosing type (or None)
37
+ # namespace – qualified scope (future use; currently empty)
38
+ # signature – raw declaration line(s) (currently just the matched line)
39
+ # end_line – same as line (no block-size inference)
40
+
41
+ # ── Helpers ────────────────────────────────────────────────────────────
42
+
43
+ def _find_parent(lines, type_decls, method_line, method_indent):
44
+ """
45
+ Find the most recent type-declaration whose indentation is *strictly*
46
+ less than *method_indent* and whose line precedes *method_line*.
47
+ Returns the type name or None.
48
+ """
49
+ parent = None
50
+ for name, _line, _indent in reversed(type_decls):
51
+ if _line < method_line and _indent < method_indent:
52
+ parent = name
53
+ break
54
+ return parent
55
+
56
+
57
+ def _make_symbol(name, typ, line, parent=None, namespace=""):
58
+ return {
59
+ "name": name,
60
+ "type": typ,
61
+ "line": line,
62
+ "parent": parent,
63
+ "namespace": namespace,
64
+ "signature": "",
65
+ "end_line": line,
66
+ }
67
+
68
+
69
+ # ── Language handlers ──────────────────────────────────────────────────
70
+
71
+ def _extract_python(content: str) -> list[dict]:
72
+ lines = content.split("\n")
73
+ classes: list[tuple[str, int, int]] = [] # (name, line, indent)
74
+ symbols: list[dict] = []
75
+
76
+ for i, line in enumerate(lines):
77
+ stripped = line.lstrip()
78
+ if not stripped or stripped.startswith("#"):
79
+ continue
80
+ indent = len(line) - len(stripped)
81
+
82
+ cm = re.match(r"class\s+(\w+)", stripped)
83
+ if cm:
84
+ classes.append((cm.group(1), i + 1, indent))
85
+ symbols.append(_make_symbol(cm.group(1), "class", i + 1))
86
+ continue
87
+
88
+ fm = re.match(r"(?:async\s+)?def\s+(\w+)", stripped)
89
+ if fm:
90
+ name = fm.group(1)
91
+ line_no = i + 1
92
+ parent = _find_parent(lines, classes, line_no, indent)
93
+ symbols.append(_make_symbol(name, "function", line_no, parent))
94
+
95
+ return symbols
96
+
97
+
98
+ def _extract_typescript(content: str) -> list[dict]:
99
+ lines = content.split("\n")
100
+ type_decls: list[tuple[str, int, int]] = []
101
+ symbols: list[dict] = []
102
+
103
+ type_pattern = re.compile(
104
+ r"(?:export\s+)?(?:default\s+)?(?:abstract\s+)?"
105
+ r"(class|interface|type|enum)\s+(\w+)"
106
+ )
107
+ func_pattern = re.compile(
108
+ r"(?:export\s+)?(?:default\s+)?"
109
+ r"(?:async\s+)?function\s+(\w+)"
110
+ )
111
+ # Arrow-function shorthand: `const foo = (...) =>` or `let foo = (...) =>`
112
+ arrow_pattern = re.compile(r"(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s*)?\(")
113
+
114
+ for i, line in enumerate(lines):
115
+ stripped = line.strip()
116
+ if not stripped or stripped.startswith("//") or stripped.startswith("/*"):
117
+ continue
118
+ indent = len(line) - len(line.lstrip())
119
+
120
+ tm = type_pattern.search(stripped)
121
+ if tm:
122
+ kind, name = tm.group(1), tm.group(2)
123
+ type_decls.append((name, i + 1, indent))
124
+ symbols.append(_make_symbol(name, kind, i + 1))
125
+ continue
126
+
127
+ fm = func_pattern.search(stripped)
128
+ if fm:
129
+ name = fm.group(1)
130
+ line_no = i + 1
131
+ parent = _find_parent(lines, type_decls, line_no, indent)
132
+ symbols.append(_make_symbol(name, "function", line_no, parent))
133
+ continue
134
+
135
+ am = arrow_pattern.match(stripped)
136
+ if am:
137
+ name = am.group(1)
138
+ line_no = i + 1
139
+ parent = _find_parent(lines, type_decls, line_no, indent)
140
+ symbols.append(_make_symbol(name, "function", line_no, parent))
141
+
142
+ return symbols
143
+
144
+
145
+ def _extract_javascript(content: str) -> list[dict]:
146
+ lines = content.split("\n")
147
+ type_decls: list[tuple[str, int, int]] = []
148
+ symbols: list[dict] = []
149
+
150
+ class_pattern = re.compile(r"class\s+(\w+)")
151
+ func_pattern = re.compile(
152
+ r"(?:async\s+)?function\s+(?:\*\s+)?(\w+)"
153
+ )
154
+ arrow_pattern = re.compile(
155
+ r"(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s*)?(?:\(|function)"
156
+ )
157
+
158
+ for i, line in enumerate(lines):
159
+ stripped = line.strip()
160
+ if not stripped or stripped.startswith("//") or stripped.startswith("/*"):
161
+ continue
162
+ indent = len(line) - len(line.lstrip())
163
+
164
+ cm = class_pattern.search(stripped)
165
+ if cm:
166
+ name = cm.group(1)
167
+ type_decls.append((name, i + 1, indent))
168
+ symbols.append(_make_symbol(name, "class", i + 1))
169
+ continue
170
+
171
+ fm = func_pattern.search(stripped)
172
+ if fm:
173
+ name = fm.group(1)
174
+ line_no = i + 1
175
+ parent = _find_parent(lines, type_decls, line_no, indent)
176
+ symbols.append(_make_symbol(name, "function", line_no, parent))
177
+ continue
178
+
179
+ am = arrow_pattern.match(stripped)
180
+ if am:
181
+ name = am.group(1)
182
+ line_no = i + 1
183
+ parent = _find_parent(lines, type_decls, line_no, indent)
184
+ symbols.append(_make_symbol(name, "function", line_no, parent))
185
+
186
+ return symbols
187
+
188
+
189
+ def _extract_php(content: str) -> list[dict]:
190
+ lines = content.split("\n")
191
+ type_decls: list[tuple[str, int, int]] = []
192
+ symbols: list[dict] = []
193
+
194
+ type_pattern = re.compile(
195
+ r"(?:abstract\s+)?(class|interface|trait)\s+(\w+)"
196
+ )
197
+ func_pattern = re.compile(
198
+ r"(?:public|private|protected)?\s*(?:static\s+)?function\s+(\w+)"
199
+ )
200
+
201
+ for i, line in enumerate(lines):
202
+ stripped = line.strip()
203
+ if not stripped or stripped.startswith("//") or stripped.startswith("#"):
204
+ continue
205
+ if stripped.startswith("<?") or stripped.startswith("//") or stripped.startswith("/*"):
206
+ continue
207
+ indent = len(line) - len(line.lstrip())
208
+
209
+ tm = type_pattern.search(stripped)
210
+ if tm:
211
+ kind, name = tm.group(1), tm.group(2)
212
+ type_decls.append((name, i + 1, indent))
213
+ symbols.append(_make_symbol(name, kind, i + 1))
214
+ continue
215
+
216
+ fm = func_pattern.search(stripped)
217
+ if fm:
218
+ name = fm.group(1)
219
+ line_no = i + 1
220
+ parent = _find_parent(lines, type_decls, line_no, indent)
221
+ symbols.append(_make_symbol(name, "function", line_no, parent))
222
+
223
+ return symbols
224
+
225
+
226
+ def _extract_java(content: str) -> list[dict]:
227
+ lines = content.split("\n")
228
+ type_decls: list[tuple[str, int, int]] = []
229
+ symbols: list[dict] = []
230
+
231
+ type_pattern = re.compile(
232
+ r"(?:public|private|protected)?\s*"
233
+ r"(?:abstract|final|static\s+)?"
234
+ r"(class|interface|enum)\s+(\w+)"
235
+ )
236
+ # Java methods: optional modifiers + return-type + name + (
237
+ # Use a broad pattern and exclude non-method lines
238
+ method_pattern = re.compile(
239
+ r"(?:public|private|protected)?\s*"
240
+ r"(?:static|final|abstract|synchronized|native|transient|volatile)?\s*"
241
+ r"(?:<[^>]*>\s*)?" # generics before return type
242
+ r"\w+(?:<[^>]*>)?(?:\[\])?\s+" # return type (simplified)
243
+ r"(\w+)\s*\("
244
+ )
245
+
246
+ # Keywords that look like method definitions but aren't
247
+ non_method = {"if", "for", "while", "switch", "catch", "class", "interface", "enum", "return", "new", "this", "super", "throws"}
248
+
249
+ for i, line in enumerate(lines):
250
+ stripped = line.strip()
251
+ if not stripped or stripped.startswith("//") or stripped.startswith("/*") or stripped.startswith("*"):
252
+ continue
253
+ if stripped.startswith("import") or stripped.startswith("package"):
254
+ continue
255
+ indent = len(line) - len(line.lstrip())
256
+
257
+ tm = type_pattern.search(stripped)
258
+ if tm:
259
+ kind, name = tm.group(1), tm.group(2)
260
+ type_decls.append((name, i + 1, indent))
261
+ symbols.append(_make_symbol(name, kind, i + 1))
262
+ continue
263
+
264
+ # Check for annotations
265
+ if stripped.startswith("@"):
266
+ continue
267
+
268
+ mm = method_pattern.search(stripped)
269
+ if mm and mm.group(1) not in non_method:
270
+ name = mm.group(1)
271
+ line_no = i + 1
272
+ parent = _find_parent(lines, type_decls, line_no, indent)
273
+ symbols.append(_make_symbol(name, "method", line_no, parent))
274
+
275
+ return symbols
276
+
277
+
278
+ def _extract_go(content: str) -> list[dict]:
279
+ lines = content.split("\n")
280
+ type_decls: list[tuple[str, int, int]] = []
281
+ symbols: list[dict] = []
282
+
283
+ struct_pattern = re.compile(r"type\s+(\w+)\s+struct")
284
+ iface_pattern = re.compile(r"type\s+(\w+)\s+interface")
285
+ # Top-level func: func Name(...)
286
+ func_pattern = re.compile(r"func\s+(\w+)\s*\(")
287
+ # Method on type: func (recv *Type|Type) Name(...)
288
+ method_pattern = re.compile(r"func\s+\([\w\s\*\[\]]+\)\s+(\w+)\s*\(")
289
+
290
+ for i, line in enumerate(lines):
291
+ stripped = line.strip()
292
+ if not stripped or stripped.startswith("//") or stripped.startswith("/*"):
293
+ continue
294
+ indent = len(line) - len(line.lstrip())
295
+
296
+ sm = struct_pattern.search(stripped)
297
+ if sm:
298
+ name = sm.group(1)
299
+ type_decls.append((name, i + 1, indent))
300
+ symbols.append(_make_symbol(name, "struct", i + 1))
301
+ continue
302
+
303
+ im = iface_pattern.search(stripped)
304
+ if im:
305
+ name = im.group(1)
306
+ type_decls.append((name, i + 1, indent))
307
+ symbols.append(_make_symbol(name, "interface", i + 1))
308
+ continue
309
+
310
+ mm = method_pattern.search(stripped)
311
+ if mm:
312
+ name = mm.group(1)
313
+ line_no = i + 1
314
+ parent = _find_parent(lines, type_decls, line_no, indent)
315
+ symbols.append(_make_symbol(name, "function", line_no, parent))
316
+ continue
317
+
318
+ fm = func_pattern.search(stripped)
319
+ if fm:
320
+ name = fm.group(1)
321
+ # Skip if it's actually a method (already matched above)
322
+ if not stripped.strip().startswith("func ("):
323
+ line_no = i + 1
324
+ parent = _find_parent(lines, type_decls, line_no, indent)
325
+ symbols.append(_make_symbol(name, "function", line_no, parent))
326
+
327
+ return symbols
328
+
329
+
330
+ def _extract_csharp(content: str) -> list[dict]:
331
+ lines = content.split("\n")
332
+ type_decls: list[tuple[str, int, int]] = []
333
+ symbols: list[dict] = []
334
+
335
+ type_pattern = re.compile(
336
+ r"(?:public|private|protected|internal)?\s*"
337
+ r"(?:abstract|sealed|static|partial|readonly)?\s*"
338
+ r"(class|interface|struct|enum|record)\s+(\w+)"
339
+ )
340
+ method_pattern = re.compile(
341
+ r"(?:public|private|protected|internal)?\s*"
342
+ r"(?:static|virtual|override|abstract|sealed|async|unsafe|new|partial)?\s*"
343
+ r"\w+(?:<[^>]*>)?(?:\[\])?\s+" # return type (simplified)
344
+ r"(\w+)\s*\("
345
+ )
346
+
347
+ non_method = {"if", "for", "while", "switch", "catch", "class", "interface",
348
+ "struct", "enum", "record", "return", "new", "this", "base",
349
+ "sizeof", "typeof", "nameof", "throw", "yield", "using"}
350
+
351
+ for i, line in enumerate(lines):
352
+ stripped = line.strip()
353
+ if not stripped or stripped.startswith("//") or stripped.startswith("/*") or stripped.startswith("*"):
354
+ continue
355
+ if stripped.startswith("using ") or stripped.startswith("namespace ") or stripped.startswith("#"):
356
+ continue
357
+ indent = len(line) - len(line.lstrip())
358
+
359
+ # Skip attributes
360
+ if stripped.startswith("["):
361
+ continue
362
+
363
+ tm = type_pattern.search(stripped)
364
+ if tm:
365
+ kind, name = tm.group(1), tm.group(2)
366
+ type_decls.append((name, i + 1, indent))
367
+ symbols.append(_make_symbol(name, kind, i + 1))
368
+ continue
369
+
370
+ mm = method_pattern.search(stripped)
371
+ if mm and mm.group(1) not in non_method:
372
+ name = mm.group(1)
373
+ line_no = i + 1
374
+ parent = _find_parent(lines, type_decls, line_no, indent)
375
+ symbols.append(_make_symbol(name, "method", line_no, parent))
376
+
377
+ return symbols
378
+
379
+
380
+ def _extract_yaml(content: str) -> list[dict]:
381
+ symbols: list[dict] = []
382
+ for i, line in enumerate(content.split("\n")):
383
+ # Top-level key: no leading whitespace, not a comment, not empty
384
+ if line and not line.startswith(" ") and not line.startswith("\t") and not line.startswith("#"):
385
+ m = re.match(r"(\w[\w_-]*)\s*:", line)
386
+ if m:
387
+ symbols.append(_make_symbol(m.group(1), "key", i + 1))
388
+ return symbols
389
+
390
+
391
+ def _extract_json(content: str) -> list[dict]:
392
+ try:
393
+ data = json.loads(content)
394
+ except json.JSONDecodeError:
395
+ return []
396
+ if not isinstance(data, dict):
397
+ return []
398
+ symbols: list[dict] = []
399
+ # Find each key in the source to get its line number.
400
+ for m in re.finditer(r'"([^"]+)"\s*:', content):
401
+ key = m.group(1)
402
+ if key in data:
403
+ line_no = content[: m.start()].count("\n") + 1
404
+ symbols.append(_make_symbol(key, "key", line_no))
405
+ return symbols
406
+
407
+
408
+ def _extract_markdown(content: str) -> list[dict]:
409
+ symbols: list[dict] = []
410
+ pattern = re.compile(r"^(#{1,6})\s+(.+)", re.MULTILINE)
411
+ for m in pattern.finditer(content):
412
+ level = len(m.group(1))
413
+ text = m.group(2).strip()
414
+ # Calculate 1-based line number
415
+ line_no = content[: m.start()].count("\n") + 1
416
+ symbols.append(_make_symbol(text, "heading", line_no))
417
+ return symbols
418
+
419
+
420
+ # ── Extension map ─────────────────────────────────────────────────────
421
+ EXTENSION_MAP: dict[str, callable] = {
422
+ ".py": _extract_python,
423
+ ".ts": _extract_typescript,
424
+ ".tsx": _extract_typescript,
425
+ ".js": _extract_javascript,
426
+ ".jsx": _extract_javascript,
427
+ ".php": _extract_php,
428
+ ".java": _extract_java,
429
+ ".go": _extract_go,
430
+ ".cs": _extract_csharp,
431
+ ".yaml": _extract_yaml,
432
+ ".yml": _extract_yaml,
433
+ ".json": _extract_json,
434
+ ".md": _extract_markdown,
435
+ ".mdx": _extract_markdown,
436
+ }
437
+
438
+
439
+ # ── Public API ─────────────────────────────────────────────────────────
440
+
441
+ def extract_symbols(file_path: Path, content: str) -> list[dict]:
442
+ """
443
+ Extract symbol declarations from *content* based on the file extension.
444
+
445
+ Parameters
446
+ ----------
447
+ file_path : Path
448
+ Path to the source file (used only for its extension).
449
+ content : str
450
+ Raw file content (may be empty).
451
+
452
+ Returns
453
+ -------
454
+ list[dict]
455
+ Each dict has keys: name, type, line, parent, namespace,
456
+ signature, end_line.
457
+ Returns an empty list for unsupported or missing extensions
458
+ and for empty content.
459
+ """
460
+ if not content.strip():
461
+ return []
462
+
463
+ ext = file_path.suffix.lower()
464
+ handler = EXTENSION_MAP.get(ext)
465
+ if handler is None:
466
+ return []
467
+
468
+ return handler(content)
@@ -0,0 +1,62 @@
1
+ """gitignore — manage .gitignore entries for the knowledge index directory."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+
8
+ OLD_ENTRY = "docs/.index/"
9
+ ENTRY = "docs/.index/knowledge-*.db"
10
+ HEADER = "# DIARY Knowledge Index"
11
+
12
+
13
+ def ensure_gitignore(root_path: Path) -> bool:
14
+ """Ensure ``docs/.index/knowledge-*.db`` is listed in ``.gitignore`` at *root_path*.
15
+
16
+ If the old entry ``docs/.index/`` exists, it is replaced with the new
17
+ wildcard pattern ``docs/.index/knowledge-*.db``.
18
+
19
+ Parameters
20
+ ----------
21
+ root_path : Path
22
+ Repository root directory that should contain a ``.gitignore``.
23
+
24
+ Returns
25
+ -------
26
+ bool
27
+ ``True`` if the file was created or modified, ``False`` if unchanged.
28
+ """
29
+ gitignore_path = root_path / ".gitignore"
30
+
31
+ if not gitignore_path.exists():
32
+ gitignore_path.write_text(f"{HEADER}\n{ENTRY}\n")
33
+ return True
34
+
35
+ # Read-modify-write: parse existing content
36
+ original = gitignore_path.read_text()
37
+ lines = original.split("\n")
38
+
39
+ # Check if the new entry is already present (strip each line for robustness)
40
+ if any(line.strip() == ENTRY for line in lines):
41
+ return False
42
+
43
+ new_lines: list[str] = []
44
+ modified = False
45
+ for line in lines:
46
+ if line.strip() == OLD_ENTRY:
47
+ new_lines.append(ENTRY)
48
+ modified = True
49
+ else:
50
+ new_lines.append(line)
51
+
52
+ if modified:
53
+ gitignore_path.write_text("\n".join(new_lines))
54
+ return True
55
+
56
+ # Append — ensure a trailing newline first so we don't graft onto the
57
+ # last line of an existing file that lacks a final newline.
58
+ if not original.endswith("\n"):
59
+ original += "\n"
60
+
61
+ gitignore_path.write_text(f"{original}{HEADER}\n{ENTRY}\n")
62
+ return True