diary-docs 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
diary/git_utils.py ADDED
@@ -0,0 +1,202 @@
1
+ """git_utils — Git operations for branch detection, diff, and detached HEAD handling.
2
+
3
+ All functions use ``subprocess.run(['git', ...])`` with pure stdlib — no external
4
+ git libraries. Every public function handles errors gracefully, returning sensible
5
+ defaults instead of raising.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import subprocess
11
+ import unicodedata
12
+ from pathlib import Path
13
+
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # Public API
17
+ # ---------------------------------------------------------------------------
18
+
19
+
20
+ def is_git_repo(workspace_path: Path) -> bool:
21
+ """Return ``True`` if *workspace_path* is inside a Git repository."""
22
+ return (workspace_path / ".git").is_dir()
23
+
24
+
25
+ def get_current_branch(workspace_path: Path) -> str:
26
+ """Return the current branch name, or ``"HEAD"`` if in detached HEAD state.
27
+
28
+ Parameters
29
+ ----------
30
+ workspace_path : Path
31
+ Path to the repository root (or any directory inside it).
32
+
33
+ Returns
34
+ -------
35
+ str
36
+ Branch name, ``"HEAD"`` if detached, or empty string on failure.
37
+ """
38
+ if not is_git_repo(workspace_path):
39
+ return ""
40
+
41
+ try:
42
+ result = subprocess.run(
43
+ ["git", "rev-parse", "--abbrev-ref", "HEAD"],
44
+ capture_output=True,
45
+ text=True,
46
+ cwd=workspace_path,
47
+ )
48
+ if result.returncode != 0:
49
+ return ""
50
+ return result.stdout.strip()
51
+ except (FileNotFoundError, OSError):
52
+ return ""
53
+
54
+
55
+ def is_detached_head(workspace_path: Path) -> bool:
56
+ """Return ``True`` if the repository is in a detached HEAD state."""
57
+ return get_current_branch(workspace_path) == "HEAD"
58
+
59
+
60
+ def get_git_diff(workspace_path: Path, base_ref: str = "HEAD") -> list[dict]:
61
+ """Return a list of changed files compared to *base_ref*.
62
+
63
+ Each entry is a dict with ``"status"`` (``"A"``/``"M"``/``"D"``) and
64
+ ``"path"`` keys.
65
+
66
+ Parameters
67
+ ----------
68
+ workspace_path : Path
69
+ Repository root.
70
+ base_ref : str
71
+ Git ref to diff against (default ``"HEAD"``).
72
+
73
+ Returns
74
+ -------
75
+ list[dict]
76
+ Empty list on failure or no changes.
77
+ """
78
+ if not is_git_repo(workspace_path):
79
+ return []
80
+
81
+ try:
82
+ result = subprocess.run(
83
+ ["git", "diff", "--name-status", base_ref],
84
+ capture_output=True,
85
+ text=True,
86
+ cwd=workspace_path,
87
+ )
88
+ if result.returncode != 0:
89
+ return []
90
+
91
+ changes: list[dict] = []
92
+ for line in result.stdout.strip().splitlines():
93
+ line = line.strip()
94
+ if not line:
95
+ continue
96
+ # Typical output: "M\tsrc/foo.py"
97
+ parts = line.split("\t", maxsplit=1)
98
+ if len(parts) == 2:
99
+ changes.append({"status": parts[0], "path": parts[1]})
100
+ return changes
101
+ except (FileNotFoundError, OSError):
102
+ return []
103
+
104
+
105
+ def get_file_diff(workspace_path: Path, file_path: str) -> str:
106
+ """Return the unified diff for *file_path* against ``HEAD``.
107
+
108
+ Parameters
109
+ ----------
110
+ workspace_path : Path
111
+ Repository root.
112
+ file_path : str
113
+ Path to the file, relative to the repository root.
114
+
115
+ Returns
116
+ -------
117
+ str
118
+ Empty string if the file has no unstaged changes or on error.
119
+ """
120
+ if not is_git_repo(workspace_path):
121
+ return ""
122
+
123
+ try:
124
+ result = subprocess.run(
125
+ ["git", "diff", "HEAD", "--", file_path],
126
+ capture_output=True,
127
+ text=True,
128
+ cwd=workspace_path,
129
+ )
130
+ if result.returncode != 0:
131
+ return ""
132
+ return result.stdout
133
+ except (FileNotFoundError, OSError):
134
+ return ""
135
+
136
+
137
+ def sanitize_branch_name(name: str) -> str:
138
+ """Normalize a branch name for use as a filename suffix.
139
+
140
+ Rules applied in order:
141
+
142
+ 1. NFC-normalise (``unicodedata.normalize('NFC', ...)``).
143
+ 2. Lowercase.
144
+ 3. Replace ``/`` with ``_``.
145
+ 4. Replace any non-alphanumeric character (except ``_``, ``.``, ``-``)
146
+ with ``_``.
147
+ 5. Strip leading/trailing ``_``, ``.``, ``-``.
148
+ 6. Truncate to 100 characters.
149
+
150
+ Parameters
151
+ ----------
152
+ name : str
153
+ Raw branch name (e.g. ``"feature/my-branch"``).
154
+
155
+ Returns
156
+ -------
157
+ str
158
+ Sanitised, safe filename fragment.
159
+ """
160
+ if not name:
161
+ return "default"
162
+
163
+ s = unicodedata.normalize("NFC", name)
164
+ s = s.lower()
165
+ s = s.replace("/", "_")
166
+
167
+ # Replace anything that isn't alphanumeric, _, -, or . with _
168
+ cleaned: list[str] = []
169
+ for ch in s:
170
+ if ch.isalnum() or ch in ("_", "-", "."):
171
+ cleaned.append(ch)
172
+ else:
173
+ cleaned.append("_")
174
+ s = "".join(cleaned)
175
+
176
+ # Strip leading/trailing separators
177
+ s = s.strip("_.-")
178
+
179
+ # Truncate
180
+ s = s[:100]
181
+
182
+ return s if s else "default"
183
+
184
+
185
+ def get_index_path(workspace_path: Path, branch: str) -> Path:
186
+ """Return the filesystem path to the per-branch knowledge index database.
187
+
188
+ The path is ``<workspace>/docs/.index/knowledge-<sanitized-branch>.db``.
189
+
190
+ Parameters
191
+ ----------
192
+ workspace_path : Path
193
+ Repository root.
194
+ branch : str
195
+ Current branch name (raw — will be sanitised automatically).
196
+
197
+ Returns
198
+ -------
199
+ Path
200
+ """
201
+ safe = sanitize_branch_name(branch)
202
+ return workspace_path / "docs" / ".index" / f"knowledge-{safe}.db"
@@ -0,0 +1,44 @@
1
+ """diary indexer — file scanner, symbol extraction, and search database.
2
+
3
+ Submodules are loaded lazily so the package does not crash if individual
4
+ submodule files do not yet exist (e.g. during initial scaffolding).
5
+ """
6
+
7
+ __all__ = [
8
+ "scan_files",
9
+ "extract_symbols",
10
+ "IndexDatabase",
11
+ "generate_report",
12
+ "ensure_gitignore",
13
+ ]
14
+
15
+
16
+ # ---------------------------------------------------------------------------
17
+ # Safe imports — each submodule is loaded inside a try/except so the package
18
+ # works even when submodule files haven't been created yet.
19
+ # ---------------------------------------------------------------------------
20
+
21
+ try:
22
+ from .scanner import scan_files # type: ignore[import-untyped]
23
+ except ImportError:
24
+ scan_files = None # type: ignore[assignment]
25
+
26
+ try:
27
+ from .extractors import extract_symbols # type: ignore[import-untyped]
28
+ except ImportError:
29
+ extract_symbols = None # type: ignore[assignment]
30
+
31
+ try:
32
+ from .database import IndexDatabase # type: ignore[import-untyped]
33
+ except ImportError:
34
+ IndexDatabase = None # type: ignore[assignment]
35
+
36
+ try:
37
+ from .reporter import generate_report # type: ignore[import-untyped]
38
+ except ImportError:
39
+ generate_report = None # type: ignore[assignment]
40
+
41
+ try:
42
+ from .gitignore import ensure_gitignore # type: ignore[import-untyped]
43
+ except ImportError:
44
+ ensure_gitignore = None # type: ignore[assignment]
@@ -0,0 +1,340 @@
1
+ """SQLite database layer for the knowledge indexer.
2
+
3
+ Provides an IndexDatabase class with schema creation, CRUD operations,
4
+ WAL mode, schema versioning, batch commits, and context manager support.
5
+ Python stdlib only (sqlite3, pathlib).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import sqlite3
12
+ from pathlib import Path
13
+ from typing import Optional
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ SCHEMA_VERSION = 1
18
+
19
+ # fmt: off
20
+ _CREATE_TABLES = [
21
+ """CREATE TABLE IF NOT EXISTS files (
22
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
23
+ path TEXT UNIQUE NOT NULL,
24
+ rel_path TEXT NOT NULL,
25
+ language TEXT NOT NULL,
26
+ sha256 TEXT NOT NULL,
27
+ size INTEGER NOT NULL,
28
+ modified REAL NOT NULL,
29
+ lines INTEGER NOT NULL
30
+ )""",
31
+ """CREATE TABLE IF NOT EXISTS symbols (
32
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
33
+ file_id INTEGER NOT NULL REFERENCES files(id),
34
+ name TEXT NOT NULL,
35
+ fqn TEXT,
36
+ type TEXT NOT NULL,
37
+ parent TEXT,
38
+ namespace TEXT,
39
+ start_line INTEGER,
40
+ end_line INTEGER,
41
+ visibility TEXT DEFAULT 'public',
42
+ signature TEXT
43
+ )""",
44
+ """CREATE TABLE IF NOT EXISTS documents (
45
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
46
+ path TEXT UNIQUE NOT NULL,
47
+ title TEXT,
48
+ headings TEXT,
49
+ summary TEXT,
50
+ sha256 TEXT NOT NULL
51
+ )""",
52
+ """CREATE TABLE IF NOT EXISTS relations (
53
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
54
+ doc_id INTEGER NOT NULL REFERENCES documents(id),
55
+ symbol_id INTEGER NOT NULL REFERENCES symbols(id),
56
+ file_id INTEGER REFERENCES files(id),
57
+ confidence REAL NOT NULL DEFAULT 1.0,
58
+ reason TEXT
59
+ )""",
60
+ """CREATE TABLE IF NOT EXISTS dependencies (
61
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
62
+ source_id INTEGER NOT NULL REFERENCES files(id),
63
+ target_path TEXT NOT NULL,
64
+ dep_type TEXT DEFAULT 'import'
65
+ )""",
66
+ """CREATE TABLE IF NOT EXISTS summaries (
67
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
68
+ file_id INTEGER REFERENCES files(id),
69
+ doc_id INTEGER REFERENCES documents(id),
70
+ content TEXT NOT NULL,
71
+ summary_type TEXT DEFAULT 'concise'
72
+ )""",
73
+ """CREATE TABLE IF NOT EXISTS hashes (
74
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
75
+ path TEXT UNIQUE NOT NULL,
76
+ sha256 TEXT NOT NULL
77
+ )""",
78
+ """CREATE TABLE IF NOT EXISTS metadata (
79
+ key TEXT PRIMARY KEY,
80
+ value TEXT NOT NULL
81
+ )""",
82
+ ]
83
+
84
+ _INDEXES = [
85
+ "CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)",
86
+ "CREATE INDEX IF NOT EXISTS idx_symbols_file_id ON symbols(file_id)",
87
+ "CREATE INDEX IF NOT EXISTS idx_relations_doc_id ON relations(doc_id)",
88
+ "CREATE INDEX IF NOT EXISTS idx_relations_symbol_id ON relations(symbol_id)",
89
+ "CREATE INDEX IF NOT EXISTS idx_files_path ON files(path)",
90
+ "CREATE INDEX IF NOT EXISTS idx_documents_path ON documents(path)",
91
+ ]
92
+
93
+ # All data tables (everything except metadata) — for clear_all
94
+ _DATA_TABLES = [
95
+ # Delete child rows first (FK-safe order)
96
+ "dependencies",
97
+ "relations",
98
+ "summaries",
99
+ "symbols",
100
+ "hashes",
101
+ "documents",
102
+ "files",
103
+ ]
104
+ # fmt: on
105
+
106
+ _BATCH_SIZE = 100
107
+
108
+
109
+ class IndexDatabase:
110
+ """SQLite-backed index database with WAL mode and schema versioning.
111
+
112
+ Parameters
113
+ ----------
114
+ db_path : Path
115
+ Path to the SQLite database file.
116
+ """
117
+
118
+ def __init__(self, db_path: Path) -> None:
119
+ self.db_path = db_path
120
+ self._count = 0
121
+ self.conn: sqlite3.Connection = sqlite3.connect(str(db_path))
122
+ self.conn.execute("PRAGMA journal_mode=WAL;")
123
+ self.conn.execute("PRAGMA foreign_keys=ON;")
124
+
125
+ def create_tables(self) -> None:
126
+ """Create all tables, indexes, and set schema version.
127
+
128
+ If the existing schema version (PRAGMA user_version) does not match
129
+ the expected SCHEMA_VERSION, all tables are dropped and recreated.
130
+ """
131
+ cur = self.conn.execute("PRAGMA user_version")
132
+ existing_version = cur.fetchone()[0]
133
+
134
+ if existing_version != SCHEMA_VERSION:
135
+ if existing_version != 0:
136
+ logger.info(
137
+ "Schema version mismatch (existing=%d, expected=%d) — recreating",
138
+ existing_version,
139
+ SCHEMA_VERSION,
140
+ )
141
+ # Drop all known tables so we start fresh
142
+ for table in _DATA_TABLES:
143
+ self.conn.execute(f"DROP TABLE IF EXISTS {table}")
144
+ self.conn.execute("DROP TABLE IF EXISTS metadata")
145
+
146
+ for ddl in _CREATE_TABLES:
147
+ self.conn.execute(ddl)
148
+
149
+ for idx in _INDEXES:
150
+ self.conn.execute(idx)
151
+
152
+ self.conn.execute(f"PRAGMA user_version = {SCHEMA_VERSION}")
153
+ self.conn.commit()
154
+
155
+ # ------------------------------------------------------------------
156
+ # Insert helpers
157
+ # ------------------------------------------------------------------
158
+
159
+ def _maybe_commit(self) -> None:
160
+ self._count += 1
161
+ if self._count % _BATCH_SIZE == 0:
162
+ self.conn.commit()
163
+
164
+ # ------------------------------------------------------------------
165
+ # CRUD
166
+ # ------------------------------------------------------------------
167
+
168
+ def insert_file(
169
+ self,
170
+ path: str,
171
+ rel_path: str,
172
+ language: str,
173
+ sha256: str,
174
+ size: int,
175
+ modified: float,
176
+ lines: int,
177
+ ) -> int:
178
+ try:
179
+ cur = self.conn.execute(
180
+ "INSERT INTO files (path, rel_path, language, sha256, size, modified, lines) "
181
+ "VALUES (?, ?, ?, ?, ?, ?, ?)",
182
+ (path, rel_path, language, sha256, size, modified, lines),
183
+ )
184
+ self._maybe_commit()
185
+ return cur.lastrowid # type: ignore[return-value]
186
+ except sqlite3.IntegrityError:
187
+ # File already indexed — return existing id
188
+ cur = self.conn.execute("SELECT id FROM files WHERE path = ?", (path,))
189
+ row = cur.fetchone()
190
+ if row is not None:
191
+ return row[0]
192
+ raise
193
+
194
+ def insert_symbol(
195
+ self,
196
+ file_id: int,
197
+ name: str,
198
+ fqn: Optional[str],
199
+ sym_type: str,
200
+ parent: Optional[str],
201
+ namespace: str,
202
+ start_line: int,
203
+ end_line: int,
204
+ visibility: str = "public",
205
+ signature: str = "",
206
+ ) -> Optional[int]:
207
+ try:
208
+ cur = self.conn.execute(
209
+ "INSERT INTO symbols (file_id, name, fqn, type, parent, namespace, "
210
+ "start_line, end_line, visibility, signature) "
211
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
212
+ (file_id, name, fqn, sym_type, parent, namespace, start_line, end_line, visibility, signature),
213
+ )
214
+ self._maybe_commit()
215
+ return cur.lastrowid # type: ignore[return-value]
216
+ except sqlite3.Error as exc:
217
+ logger.warning("Failed to insert symbol %s: %s", name, exc)
218
+ return None
219
+
220
+ def insert_document(
221
+ self,
222
+ path: str,
223
+ title: str,
224
+ headings: str,
225
+ summary: str,
226
+ sha256: str,
227
+ ) -> int:
228
+ try:
229
+ cur = self.conn.execute(
230
+ "INSERT INTO documents (path, title, headings, summary, sha256) "
231
+ "VALUES (?, ?, ?, ?, ?)",
232
+ (path, title, headings, summary, sha256),
233
+ )
234
+ self._maybe_commit()
235
+ return cur.lastrowid # type: ignore[return-value]
236
+ except sqlite3.IntegrityError:
237
+ cur = self.conn.execute("SELECT id FROM documents WHERE path = ?", (path,))
238
+ row = cur.fetchone()
239
+ if row is not None:
240
+ return row[0]
241
+ raise
242
+
243
+ def insert_relation(
244
+ self,
245
+ doc_id: int,
246
+ symbol_id: int,
247
+ file_id: int,
248
+ confidence: float = 1.0,
249
+ reason: str = "",
250
+ ) -> Optional[int]:
251
+ try:
252
+ cur = self.conn.execute(
253
+ "INSERT INTO relations (doc_id, symbol_id, file_id, confidence, reason) "
254
+ "VALUES (?, ?, ?, ?, ?)",
255
+ (doc_id, symbol_id, file_id, confidence, reason),
256
+ )
257
+ self._maybe_commit()
258
+ return cur.lastrowid # type: ignore[return-value]
259
+ except sqlite3.Error as exc:
260
+ logger.warning("Failed to insert relation: %s", exc)
261
+ return None
262
+
263
+ def insert_dependency(
264
+ self,
265
+ source_id: int,
266
+ target_path: str,
267
+ dep_type: str = "import",
268
+ ) -> Optional[int]:
269
+ try:
270
+ cur = self.conn.execute(
271
+ "INSERT INTO dependencies (source_id, target_path, dep_type) "
272
+ "VALUES (?, ?, ?)",
273
+ (source_id, target_path, dep_type),
274
+ )
275
+ self._maybe_commit()
276
+ return cur.lastrowid # type: ignore[return-value]
277
+ except sqlite3.Error as exc:
278
+ logger.warning("Failed to insert dependency: %s", exc)
279
+ return None
280
+
281
+ def insert_summary(
282
+ self,
283
+ file_id: Optional[int],
284
+ doc_id: Optional[int],
285
+ content: str,
286
+ summary_type: str = "concise",
287
+ ) -> Optional[int]:
288
+ try:
289
+ cur = self.conn.execute(
290
+ "INSERT INTO summaries (file_id, doc_id, content, summary_type) "
291
+ "VALUES (?, ?, ?, ?)",
292
+ (file_id, doc_id, content, summary_type),
293
+ )
294
+ self._maybe_commit()
295
+ return cur.lastrowid # type: ignore[return-value]
296
+ except sqlite3.Error as exc:
297
+ logger.warning("Failed to insert summary: %s", exc)
298
+ return None
299
+
300
+ # ------------------------------------------------------------------
301
+ # Bulk operations
302
+ # ------------------------------------------------------------------
303
+
304
+ def clear_all(self) -> None:
305
+ """Delete all rows from data tables (preserves metadata).
306
+
307
+ Also resets the AUTOINCREMENT counters by deleting from sqlite_sequence.
308
+ """
309
+ for table in _DATA_TABLES:
310
+ self.conn.execute(f"DELETE FROM {table}")
311
+ self.conn.execute("DELETE FROM sqlite_sequence")
312
+ self.conn.commit()
313
+ self._count = 0
314
+
315
+ # ------------------------------------------------------------------
316
+ # Lifecycle
317
+ # ------------------------------------------------------------------
318
+
319
+ def close(self) -> None:
320
+ """Commit pending changes and close the connection."""
321
+ try:
322
+ self.conn.commit()
323
+ except sqlite3.Error:
324
+ pass
325
+ self.conn.close()
326
+
327
+ # ------------------------------------------------------------------
328
+ # Context manager
329
+ # ------------------------------------------------------------------
330
+
331
+ def __enter__(self) -> "IndexDatabase":
332
+ return self
333
+
334
+ def __exit__(
335
+ self,
336
+ exc_type: Optional[type],
337
+ exc_val: Optional[BaseException],
338
+ exc_tb: Optional[object],
339
+ ) -> None:
340
+ self.close()