@thecat69/cache-ctrl 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +289 -78
- package/cache_ctrl.ts +107 -25
- package/package.json +2 -1
- package/skills/cache-ctrl-caller/SKILL.md +53 -114
- package/skills/cache-ctrl-external/SKILL.md +29 -89
- package/skills/cache-ctrl-local/SKILL.md +82 -164
- package/src/analysis/graphBuilder.ts +85 -0
- package/src/analysis/pageRank.ts +164 -0
- package/src/analysis/symbolExtractor.ts +240 -0
- package/src/cache/cacheManager.ts +53 -4
- package/src/cache/externalCache.ts +72 -77
- package/src/cache/graphCache.ts +12 -0
- package/src/cache/localCache.ts +2 -0
- package/src/commands/checkFiles.ts +9 -6
- package/src/commands/flush.ts +9 -2
- package/src/commands/graph.ts +131 -0
- package/src/commands/inspect.ts +13 -181
- package/src/commands/inspectExternal.ts +79 -0
- package/src/commands/inspectLocal.ts +134 -0
- package/src/commands/install.ts +6 -0
- package/src/commands/invalidate.ts +24 -24
- package/src/commands/list.ts +11 -11
- package/src/commands/map.ts +87 -0
- package/src/commands/prune.ts +20 -8
- package/src/commands/search.ts +9 -2
- package/src/commands/touch.ts +15 -25
- package/src/commands/uninstall.ts +103 -0
- package/src/commands/update.ts +65 -0
- package/src/commands/version.ts +14 -0
- package/src/commands/watch.ts +270 -0
- package/src/commands/writeExternal.ts +51 -0
- package/src/commands/writeLocal.ts +121 -0
- package/src/files/changeDetector.ts +15 -0
- package/src/files/gitFiles.ts +15 -0
- package/src/files/openCodeInstaller.ts +21 -2
- package/src/index.ts +314 -58
- package/src/search/keywordSearch.ts +24 -0
- package/src/types/cache.ts +38 -26
- package/src/types/commands.ts +123 -22
- package/src/types/result.ts +26 -9
- package/src/utils/errors.ts +14 -0
- package/src/utils/traversal.ts +42 -0
- package/src/commands/checkFreshness.ts +0 -123
- package/src/commands/write.ts +0 -170
- package/src/http/freshnessChecker.ts +0 -116
|
@@ -5,209 +5,127 @@ description: How to use cache-ctrl to detect file changes and manage the local c
|
|
|
5
5
|
|
|
6
6
|
# cache-ctrl — Local Cache Usage
|
|
7
7
|
|
|
8
|
-
Manage `.ai/local-context-gatherer_cache/context.json` to avoid redundant full-repo scans.
|
|
9
|
-
Three tiers of access — use the best one available.
|
|
10
|
-
|
|
11
|
-
## Availability Detection (run once at startup)
|
|
12
|
-
|
|
13
|
-
1. Call `cache_ctrl_check_files` (built-in tool).
|
|
14
|
-
- Success → **use Tier 1** for all operations below.
|
|
15
|
-
- Failure (tool not found / permission denied) → continue to step 2.
|
|
16
|
-
2. Run `bash: "which cache-ctrl"`.
|
|
17
|
-
- Exit 0 → **use Tier 2** for all operations below.
|
|
18
|
-
- Not found → **use Tier 3** for all operations below.
|
|
19
|
-
|
|
20
|
-
---
|
|
21
|
-
|
|
22
8
|
## Fact-Writing Rules
|
|
23
9
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
- **Each fact string must be ≤ 300 characters** (schema hard limit: 800). If an observation needs more, split it into two facts or summarize.
|
|
27
|
-
- **Max 30 facts per file.** Choose only the most architecturally meaningful observations.
|
|
28
|
-
- **Never write**: raw import lines, function bodies, code snippets, or verbatim text from the file.
|
|
29
|
-
- **Do write**: what the file exports, what pattern it uses, what dependencies it has, what its responsibility is.
|
|
30
|
-
|
|
31
|
-
**Good fact** ✅:
|
|
32
|
-
> `"Exports writeCommand — validates subject, merges per-path facts atomically, returns Result<WriteResult>"`
|
|
33
|
-
|
|
34
|
-
**Bad fact** ❌:
|
|
35
|
-
> `"import { ExternalCacheFileSchema, LocalCacheFileSchema } from '../types/cache.js'; import { ErrorCode, Result } from '../types/result.js'; import { WriteArgs, WriteResult } from '../types/commands.js'"` ← this is raw file content
|
|
36
|
-
|
|
37
|
-
**Global facts** are for cross-cutting structural observations only (e.g. CLI entry pattern, installation steps). Max 20, each ≤ 300 chars. Only update global_facts when you re-read a structural file (AGENTS.md, install.sh, package.json, *.toml, opencode.json).
|
|
38
|
-
|
|
39
|
-
---
|
|
40
|
-
|
|
41
|
-
## Mandatory: Write Before Return
|
|
10
|
+
Per-file entries use the `FileFacts` object shape:
|
|
42
11
|
|
|
43
|
-
|
|
12
|
+
```json
|
|
13
|
+
{
|
|
14
|
+
"summary": "One-sentence description of what this file does",
|
|
15
|
+
"role": "implementation",
|
|
16
|
+
"importance": 2,
|
|
17
|
+
"facts": ["Concise observation 1", "Concise observation 2"]
|
|
18
|
+
}
|
|
19
|
+
```
|
|
44
20
|
|
|
45
|
-
|
|
21
|
+
Fields:
|
|
22
|
+
- **`summary`** — mandatory. One sentence.
|
|
23
|
+
- **`role`** — mandatory. One of: `entry-point`, `interface`, `implementation`, `test`, `config`.
|
|
24
|
+
- **`importance`** — strongly recommended. `1` = core, `2` = supporting, `3` = peripheral.
|
|
25
|
+
- **`facts`** — optional. Max 10 items, each ≤ 300 chars.
|
|
46
26
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
5. Return your summary
|
|
27
|
+
Content quality rules:
|
|
28
|
+
- **Never write** raw import lines, code snippets, or verbatim file content.
|
|
29
|
+
- **Do write** concise architectural observations: purpose, key exports, constraints, dependencies, notable patterns.
|
|
30
|
+
- Write facts as **enumerable observations** — one entry per distinct property, up to the 10-item limit.
|
|
52
31
|
|
|
53
|
-
|
|
32
|
+
Good example ✅:
|
|
33
|
+
> `"Delegates local writes to writeLocalCommand and preserves unrelated paths through per-path merge"`
|
|
54
34
|
|
|
55
|
-
|
|
35
|
+
Bad example ❌:
|
|
36
|
+
> `"import { ExternalCacheFileSchema } from '../types/cache.js'"` ← raw file content
|
|
56
37
|
|
|
57
|
-
|
|
38
|
+
**Global facts** — cross-cutting structural observations only (CLI entry pattern, installation steps, etc.). Max 20, each ≤ 300 chars. Only update `global_facts` when re-reading a structural file: `AGENTS.md`, `install.sh`, `opencode.json`, `package.json`, `*.toml`.
|
|
58
39
|
|
|
59
|
-
|
|
40
|
+
## Scan Workflow
|
|
60
41
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
42
|
+
1. Call `cache_ctrl_check_files` to identify changed and new files.
|
|
43
|
+
2. Read only the changed/new files (skip unchanged ones).
|
|
44
|
+
3. Extract `FileFacts` per file (follow Fact-Writing Rules above).
|
|
45
|
+
4. Call `cache_ctrl_write_local` — **mandatory** (see Write-Before-Return Rule below for the skip exception).
|
|
46
|
+
5. Return your summary.
|
|
66
47
|
|
|
67
|
-
|
|
68
|
-
- `status: "unchanged"` → tracked files are content-stable; skip re-scan and return cached context.
|
|
69
|
-
- `status: "changed"` → at least one tracked file changed; proceed to **delta scan** (read content of `changed_files` + `new_files` only — do not re-read unchanged files).
|
|
70
|
-
- `status: "unchanged"` with empty `tracked_files` → cold start, proceed to scan.
|
|
48
|
+
> **⚠ Cache is non-exhaustive:** `status: "unchanged"` only confirms previously-tracked files are stable — it does not mean the file set is complete. Always check `new_files` and `deleted_git_files` in the response.
|
|
71
49
|
|
|
72
|
-
|
|
73
|
-
- `new_files` — untracked non-ignored files absent from cache, plus git-tracked files absent from cache when the cache is non-empty (blank-slate caches skip git-tracked files to avoid false positives on cold start)
|
|
74
|
-
- `deleted_git_files` — git-tracked files deleted from the working tree (reported by `git ls-files --deleted`)
|
|
50
|
+
## Write-Before-Return Rule
|
|
75
51
|
|
|
76
|
-
|
|
52
|
+
**Every invocation that reads any file MUST call `cache_ctrl_write_local` before returning.**
|
|
77
53
|
|
|
78
|
-
|
|
54
|
+
The only time you may skip the write is when ALL of the following are true:
|
|
79
55
|
|
|
80
|
-
|
|
56
|
+
| Condition | Required value |
|
|
57
|
+
|---|---|
|
|
58
|
+
| `changed_files` from `check_files` | `[]` |
|
|
59
|
+
| `new_files` from `check_files` | `[]` |
|
|
60
|
+
| No files were force-requested by caller | true |
|
|
61
|
+
| Cache already exists and is non-empty | true |
|
|
62
|
+
| This invocation was NOT triggered by a cache invalidation | true |
|
|
81
63
|
|
|
82
|
-
|
|
83
|
-
**Tier 2:** `cache-ctrl invalidate local`
|
|
84
|
-
**Tier 3:** Skip — overwriting the file in step 3 is sufficient.
|
|
64
|
+
If any condition is not met, you **must** write.
|
|
85
65
|
|
|
86
|
-
|
|
66
|
+
> **⛔ Write-or-fail:** Returning without writing after reading files is a critical failure — the cache will be stale. Even if you believe facts are unchanged, if you read a file, you write.
|
|
87
67
|
|
|
88
|
-
|
|
68
|
+
## `cache_ctrl_write_local` Reference
|
|
89
69
|
|
|
90
|
-
|
|
70
|
+
Always use `cache_ctrl_write_local` — never write cache files directly.
|
|
91
71
|
|
|
92
|
-
#### Input fields
|
|
72
|
+
#### Input fields
|
|
93
73
|
|
|
94
74
|
| Field | Type | Required | Notes |
|
|
95
75
|
|---|---|---|---|
|
|
96
76
|
| `topic` | `string` | ✅ | Human description of what was scanned |
|
|
97
77
|
| `description` | `string` | ✅ | One-liner for keyword search |
|
|
98
|
-
| `tracked_files` | `Array<{ path: string }>` | ✅ |
|
|
99
|
-
| `
|
|
100
|
-
| `
|
|
101
|
-
| `cache_miss_reason` | `string` | optional | Why
|
|
102
|
-
|
|
103
|
-
> **Cold start vs incremental**: On first run (no existing cache), submit all relevant files. On subsequent runs, submit only new and changed files — the tool merges them in.
|
|
104
|
-
|
|
105
|
-
> **Auto-set by the tool — do not include**: `timestamp` (current UTC), `mtime` (filesystem `lstat()`), and `hash` (SHA-256) per `tracked_files` entry.
|
|
106
|
-
|
|
107
|
-
### Scope rule for `facts`
|
|
108
|
-
|
|
109
|
-
Submit `facts` ONLY for files you actually read in this session (i.e., files present in
|
|
110
|
-
your submitted `tracked_files`). Never reconstruct or re-submit facts for unchanged files —
|
|
111
|
-
the tool preserves them automatically via per-path merge.
|
|
112
|
-
|
|
113
|
-
Submitting a facts key for a path absent from submitted `tracked_files` is a
|
|
114
|
-
VALIDATION_ERROR and the entire write is rejected.
|
|
78
|
+
| `tracked_files` | `Array<{ path: string }>` | ✅ | `mtime` and `hash` are auto-computed |
|
|
79
|
+
| `facts` | `Record<string, FileFacts>` | optional | Per-file structured facts; per-path merge |
|
|
80
|
+
| `global_facts` | `string[]` | optional | Last-write-wins; see trigger rule above |
|
|
81
|
+
| `cache_miss_reason` | `string` | optional | Why prior cache was discarded |
|
|
115
82
|
|
|
116
|
-
|
|
83
|
+
> **Auto-set by the tool — do not include:** `timestamp`, `mtime`, `hash`.
|
|
84
|
+
> **Write is per-path merge:** Submitted paths replace existing entries for those paths. Other paths are preserved. Deleted-file entries are evicted automatically.
|
|
117
85
|
|
|
118
|
-
|
|
119
|
-
facts — not just the diff. A 2-line change does not support a complete re-description of the
|
|
120
|
-
file, and submitting partial facts for a re-read path **permanently replaces** whatever was
|
|
121
|
-
cached before.
|
|
86
|
+
#### Scope rule for `facts`
|
|
122
87
|
|
|
123
|
-
|
|
124
|
-
structure, key dependencies, patterns, constraints, entry points). Do not bundle multiple
|
|
125
|
-
distinct properties into a single string. A file should have as many fact entries as it has
|
|
126
|
-
distinct notable properties, not a prose summary compressed into one or two lines.
|
|
88
|
+
Submit `facts` ONLY for files you actually read in this session (files present in `tracked_files`). Never reconstruct or re-submit facts for unchanged files — the tool preserves them automatically.
|
|
127
89
|
|
|
128
|
-
|
|
90
|
+
Submitting a `facts` key for a path absent from `tracked_files` is a `VALIDATION_ERROR` and the entire write is rejected.
|
|
129
91
|
|
|
130
|
-
|
|
131
|
-
AGENTS.md, install.sh, opencode.json, package.json, *.toml config files.
|
|
92
|
+
#### Fact completeness
|
|
132
93
|
|
|
133
|
-
|
|
134
|
-
The existing value is preserved automatically.
|
|
94
|
+
When a file appears in `changed_files` or `new_files`, read the **whole file** before writing facts — not just the diff. Submitting partial facts for a re-read path **permanently replaces** whatever was cached.
|
|
135
95
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
Facts for files deleted from disk are evicted automatically on the next write — no agent
|
|
139
|
-
action needed. `global_facts` is never evicted.
|
|
140
|
-
|
|
141
|
-
#### Tier 1 — `cache_ctrl_write`
|
|
96
|
+
#### Example
|
|
142
97
|
|
|
143
98
|
```json
|
|
144
99
|
{
|
|
145
|
-
"
|
|
146
|
-
"
|
|
147
|
-
|
|
148
|
-
"
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
100
|
+
"topic": "src/commands scan",
|
|
101
|
+
"description": "Scan of src/commands after write refactor",
|
|
102
|
+
"tracked_files": [
|
|
103
|
+
{ "path": "src/commands/writeLocal.ts" }
|
|
104
|
+
],
|
|
105
|
+
"facts": {
|
|
106
|
+
"src/commands/writeLocal.ts": {
|
|
107
|
+
"summary": "Thin router dispatching write calls based on agent type.",
|
|
108
|
+
"role": "implementation",
|
|
109
|
+
"importance": 2,
|
|
110
|
+
"facts": [
|
|
111
|
+
"Delegates to writeLocalCommand for agent=local",
|
|
112
|
+
"Delegates to writeExternalCommand for all other agents"
|
|
113
|
+
]
|
|
114
|
+
}
|
|
153
115
|
}
|
|
154
116
|
}
|
|
155
117
|
```
|
|
156
118
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
`cache-ctrl write local --data '<json>'` — pass the same `content` object as JSON string.
|
|
160
|
-
|
|
161
|
-
#### Tier 3
|
|
162
|
-
|
|
163
|
-
Not available — there is no direct-file fallback for writes. If neither Tier 1 nor Tier 2 is accessible, request access to one of them.
|
|
164
|
-
|
|
165
|
-
### 4. Confirm cache (optional)
|
|
166
|
-
|
|
167
|
-
**Tier 1:** Call `cache_ctrl_list` with `agent: "local"` to confirm the entry was written.
|
|
168
|
-
**Tier 2:** `cache-ctrl list --agent local`
|
|
169
|
-
**Tier 3:** `read` `.ai/local-context-gatherer_cache/context.json` and verify `timestamp` is current.
|
|
170
|
-
|
|
171
|
-
Note: local entries show `is_stale: true` only when `cache_ctrl_check_files` detects actual changes (changed files, new non-ignored files, or deleted files). A freshly-written cache with no subsequent file changes will show `is_stale: false`.
|
|
172
|
-
|
|
173
|
-
---
|
|
174
|
-
|
|
175
|
-
## Tool / Command Reference
|
|
176
|
-
|
|
177
|
-
| Operation | Tier 1 (built-in) | Tier 2 (CLI) | Tier 3 (manual) |
|
|
178
|
-
|---|---|---|---|
|
|
179
|
-
| Detect file changes | `cache_ctrl_check_files` | `cache-ctrl check-files` | read `context.json`, check `timestamp` |
|
|
180
|
-
| Invalidate cache | `cache_ctrl_invalidate` | `cache-ctrl invalidate local` | overwrite file in next step |
|
|
181
|
-
| Confirm written | `cache_ctrl_list` | `cache-ctrl list --agent local` | `read` file, check `timestamp` |
|
|
182
|
-
| Read facts (filtered) | `cache_ctrl_inspect` with `filter`, `folder`, or `searchFacts` | `cache-ctrl inspect local context --filter <kw>[,<kw>...]` / `--folder <path>` / `--search-facts <kw>[,<kw>...]` | `read` file, extract `facts`/`global_facts` |
|
|
183
|
-
| Read all facts (rare) | `cache_ctrl_inspect` (no filter) | `cache-ctrl inspect local context` | `read` file directly |
|
|
184
|
-
| Write cache | `cache_ctrl_write` | `cache-ctrl write local --data '<json>'` | ❌ not available |
|
|
185
|
-
|
|
186
|
-
> **⚠ Always use at least one filter when reading facts for a specific task.** Three targeting options are available — use the most specific one that fits your task:
|
|
187
|
-
>
|
|
188
|
-
> | Flag | What it matches | Best for |
|
|
189
|
-
> |---|---|---|
|
|
190
|
-
> | `--filter <kw>` | File path contains keyword | When you know which files by name/path segment |
|
|
191
|
-
> | `--folder <path>` | File path starts with folder prefix (recursive) | When you need all files in a directory subtree |
|
|
192
|
-
> | `--search-facts <kw>` | Any fact string contains keyword | When you need files related to a concept, pattern, or API |
|
|
193
|
-
>
|
|
194
|
-
> The flags are AND-ed when combined. Omit all filters only when you genuinely need facts for the entire repository (rare — e.g. building a full index; only appropriate for ≤ ~20 tracked files). An unfiltered `inspect` on a large repo can return thousands of fact strings.
|
|
195
|
-
|
|
196
|
-
> **`tracked_files` is never returned by `inspect` for the local agent.** It is internal operational metadata consumed by `check-files`. It will not appear in any inspect response.
|
|
197
|
-
|
|
198
|
-
## server_time in Responses
|
|
199
|
-
|
|
200
|
-
Every `cache_ctrl_*` tool call returns a `server_time` field at the outer JSON level:
|
|
201
|
-
|
|
202
|
-
```json
|
|
203
|
-
{ "ok": true, "value": { ... }, "server_time": "2026-04-05T12:34:56.789Z" }
|
|
204
|
-
```
|
|
205
|
-
|
|
206
|
-
Use this to assess how stale stored timestamps are — you do not need `bash` or system access to know the current time.
|
|
119
|
+
## Eviction
|
|
207
120
|
|
|
208
|
-
|
|
121
|
+
Facts for files deleted from disk are evicted automatically on the next write — no agent action needed. `global_facts` is never evicted.
|
|
209
122
|
|
|
210
|
-
|
|
123
|
+
## Tool Reference
|
|
211
124
|
|
|
212
|
-
|
|
213
|
-
|
|
125
|
+
| Operation | Tool |
|
|
126
|
+
|---|---|
|
|
127
|
+
| Detect file changes | `cache_ctrl_check_files` |
|
|
128
|
+
| Invalidate cache | `cache_ctrl_invalidate` (agent: "local") |
|
|
129
|
+
| Write cache | `cache_ctrl_write_local` |
|
|
130
|
+
| Read facts (filtered) | `cache_ctrl_inspect` (agent: "local", filter / folder / search_facts) |
|
|
131
|
+
| Confirm written | `cache_ctrl_list` (agent: "local") |
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
|
|
3
|
+
import { extractSymbols } from "./symbolExtractor.js";
|
|
4
|
+
|
|
5
|
+
/** Dependency metadata tracked for one source file node in the graph. */
|
|
6
|
+
export interface GraphNode {
|
|
7
|
+
deps: string[];
|
|
8
|
+
defs: string[];
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/** Directed dependency graph keyed by absolute source file path. */
|
|
12
|
+
export type DependencyGraph = Map<string, GraphNode>;
|
|
13
|
+
|
|
14
|
+
const RESOLUTION_EXTENSIONS = ["", ".ts", ".tsx", ".js", ".jsx"];
|
|
15
|
+
|
|
16
|
+
function resolveDependencyToKnownFile(depPath: string, knownFiles: Set<string>): string | null {
|
|
17
|
+
for (const extension of RESOLUTION_EXTENSIONS) {
|
|
18
|
+
const candidatePath = `${depPath}${extension}`;
|
|
19
|
+
if (knownFiles.has(candidatePath)) {
|
|
20
|
+
return candidatePath;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const basename = path.basename(depPath);
|
|
25
|
+
if (basename.endsWith(".js")) {
|
|
26
|
+
const withoutJs = depPath.slice(0, -3);
|
|
27
|
+
for (const extension of [".ts", ".tsx"]) {
|
|
28
|
+
const candidatePath = `${withoutJs}${extension}`;
|
|
29
|
+
if (knownFiles.has(candidatePath)) {
|
|
30
|
+
return candidatePath;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (basename.endsWith(".jsx")) {
|
|
36
|
+
const withoutJsx = depPath.slice(0, -4);
|
|
37
|
+
const candidatePath = `${withoutJsx}.tsx`;
|
|
38
|
+
if (knownFiles.has(candidatePath)) {
|
|
39
|
+
return candidatePath;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Build a dependency graph for all source files under repoRoot.
|
|
48
|
+
*
|
|
49
|
+
* @param filePaths - Source file paths to include as graph nodes.
|
|
50
|
+
* @param repoRoot - Repository root for symbol extraction and import resolution.
|
|
51
|
+
* @returns Dependency graph keyed by resolved absolute file paths.
|
|
52
|
+
*
|
|
53
|
+
* Files not in the provided list are filtered from deps.
|
|
54
|
+
*/
|
|
55
|
+
export async function buildGraph(filePaths: string[], repoRoot: string): Promise<DependencyGraph> {
|
|
56
|
+
const absoluteFilePaths = filePaths.map((filePath) => path.resolve(filePath));
|
|
57
|
+
const knownFileSet = new Set(absoluteFilePaths);
|
|
58
|
+
|
|
59
|
+
const extractedSymbols = await Promise.all(
|
|
60
|
+
absoluteFilePaths.map(async (filePath) => ({
|
|
61
|
+
filePath,
|
|
62
|
+
symbols: await extractSymbols(filePath, repoRoot),
|
|
63
|
+
})),
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
const graph: DependencyGraph = new Map();
|
|
67
|
+
|
|
68
|
+
for (const { filePath, symbols } of extractedSymbols) {
|
|
69
|
+
const resolvedDependencies = new Set<string>();
|
|
70
|
+
|
|
71
|
+
for (const dependency of symbols.deps) {
|
|
72
|
+
const resolvedDependency = resolveDependencyToKnownFile(dependency, knownFileSet);
|
|
73
|
+
if (resolvedDependency !== null) {
|
|
74
|
+
resolvedDependencies.add(resolvedDependency);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
graph.set(filePath, {
|
|
79
|
+
deps: [...resolvedDependencies],
|
|
80
|
+
defs: symbols.defs,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return graph;
|
|
85
|
+
}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import type { DependencyGraph } from "./graphBuilder.js";
|
|
2
|
+
|
|
3
|
+
/** Tuning options for dependency-graph PageRank computation. */
|
|
4
|
+
export interface PageRankOptions {
|
|
5
|
+
/** Damping factor (default 0.85) */
|
|
6
|
+
dampingFactor?: number;
|
|
7
|
+
/** Max iterations (default 100) */
|
|
8
|
+
maxIterations?: number;
|
|
9
|
+
/** Convergence threshold (default 1e-6) */
|
|
10
|
+
tolerance?: number;
|
|
11
|
+
/** Files to use as personalization seeds (boosts their rank and neighbors) */
|
|
12
|
+
seedFiles?: string[];
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Compute Personalized PageRank over a dependency graph.
|
|
17
|
+
* Returns a map of file path → rank score (normalized, sums to 1.0).
|
|
18
|
+
* Higher rank = more central / more relevant to seed files.
|
|
19
|
+
*/
|
|
20
|
+
export function computePageRank(
|
|
21
|
+
graph: DependencyGraph,
|
|
22
|
+
options?: PageRankOptions,
|
|
23
|
+
): Map<string, number> {
|
|
24
|
+
const nodes = [...graph.keys()];
|
|
25
|
+
const nodeCount = nodes.length;
|
|
26
|
+
|
|
27
|
+
if (nodeCount === 0) {
|
|
28
|
+
return new Map();
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const dampingFactor = options?.dampingFactor ?? 0.85;
|
|
32
|
+
const maxIterations = options?.maxIterations ?? 100;
|
|
33
|
+
const tolerance = options?.tolerance ?? 1e-6;
|
|
34
|
+
|
|
35
|
+
const personalization = buildPersonalizationVector(nodes, options?.seedFiles);
|
|
36
|
+
const inLinks = buildInLinks(graph, nodes);
|
|
37
|
+
|
|
38
|
+
let ranks = new Map<string, number>();
|
|
39
|
+
const initialRank = 1 / nodeCount;
|
|
40
|
+
for (const node of nodes) {
|
|
41
|
+
ranks.set(node, initialRank);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
for (let iteration = 0; iteration < maxIterations; iteration += 1) {
|
|
45
|
+
const danglingRank = computeDanglingRank(graph, ranks);
|
|
46
|
+
const danglingContribution = dampingFactor * (danglingRank / nodeCount);
|
|
47
|
+
|
|
48
|
+
const nextRanks = new Map<string, number>();
|
|
49
|
+
let totalDelta = 0;
|
|
50
|
+
|
|
51
|
+
for (const node of nodes) {
|
|
52
|
+
const incomingNodes = inLinks.get(node) ?? [];
|
|
53
|
+
let incomingContribution = 0;
|
|
54
|
+
|
|
55
|
+
for (const sourceNode of incomingNodes) {
|
|
56
|
+
const sourceRank = ranks.get(sourceNode);
|
|
57
|
+
if (sourceRank === undefined) {
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const outDegree = graph.get(sourceNode)?.deps.length ?? 0;
|
|
62
|
+
if (outDegree > 0) {
|
|
63
|
+
incomingContribution += sourceRank / outDegree;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const personalWeight = personalization.get(node) ?? 0;
|
|
68
|
+
const rank = (1 - dampingFactor) * personalWeight + dampingFactor * incomingContribution + danglingContribution;
|
|
69
|
+
nextRanks.set(node, rank);
|
|
70
|
+
|
|
71
|
+
const previousRank = ranks.get(node) ?? 0;
|
|
72
|
+
totalDelta += Math.abs(rank - previousRank);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
ranks = nextRanks;
|
|
76
|
+
|
|
77
|
+
if (totalDelta < tolerance) {
|
|
78
|
+
break;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return normalizeRanks(ranks);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function buildInLinks(graph: DependencyGraph, nodes: string[]): Map<string, string[]> {
|
|
86
|
+
const inLinks = new Map<string, string[]>();
|
|
87
|
+
for (const node of nodes) {
|
|
88
|
+
inLinks.set(node, []);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
for (const [sourceNode, graphNode] of graph.entries()) {
|
|
92
|
+
for (const targetNode of graphNode.deps) {
|
|
93
|
+
const targetInLinks = inLinks.get(targetNode);
|
|
94
|
+
if (targetInLinks) {
|
|
95
|
+
targetInLinks.push(sourceNode);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return inLinks;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function buildPersonalizationVector(nodes: string[], seedFiles: string[] | undefined): Map<string, number> {
|
|
104
|
+
const vector = new Map<string, number>();
|
|
105
|
+
|
|
106
|
+
const seedSet = new Set(seedFiles ?? []);
|
|
107
|
+
const validSeeds = nodes.filter((node) => seedSet.has(node));
|
|
108
|
+
|
|
109
|
+
if (validSeeds.length > 0) {
|
|
110
|
+
const seedWeight = 1 / validSeeds.length;
|
|
111
|
+
for (const node of nodes) {
|
|
112
|
+
vector.set(node, 0);
|
|
113
|
+
}
|
|
114
|
+
for (const seed of validSeeds) {
|
|
115
|
+
vector.set(seed, seedWeight);
|
|
116
|
+
}
|
|
117
|
+
return vector;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const uniformWeight = 1 / nodes.length;
|
|
121
|
+
for (const node of nodes) {
|
|
122
|
+
vector.set(node, uniformWeight);
|
|
123
|
+
}
|
|
124
|
+
return vector;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function computeDanglingRank(graph: DependencyGraph, ranks: Map<string, number>): number {
|
|
128
|
+
let danglingRank = 0;
|
|
129
|
+
|
|
130
|
+
for (const [node, graphNode] of graph.entries()) {
|
|
131
|
+
if (graphNode.deps.length > 0) {
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
danglingRank += ranks.get(node) ?? 0;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return danglingRank;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function normalizeRanks(ranks: Map<string, number>): Map<string, number> {
|
|
141
|
+
let totalRank = 0;
|
|
142
|
+
for (const value of ranks.values()) {
|
|
143
|
+
totalRank += value;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (totalRank <= 0) {
|
|
147
|
+
const normalized = new Map<string, number>();
|
|
148
|
+
const size = ranks.size;
|
|
149
|
+
if (size === 0) {
|
|
150
|
+
return normalized;
|
|
151
|
+
}
|
|
152
|
+
const uniformRank = 1 / size;
|
|
153
|
+
for (const node of ranks.keys()) {
|
|
154
|
+
normalized.set(node, uniformRank);
|
|
155
|
+
}
|
|
156
|
+
return normalized;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const normalized = new Map<string, number>();
|
|
160
|
+
for (const [node, value] of ranks.entries()) {
|
|
161
|
+
normalized.set(node, value / totalRank);
|
|
162
|
+
}
|
|
163
|
+
return normalized;
|
|
164
|
+
}
|