sourcebook 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,25 +4,27 @@
4
4
 
5
5
  # sourcebook
6
6
 
7
- Generate AI context files from your codebase's actual conventions. Not what agents already know what they keep missing.
7
+ **AI can read your code. It still doesn't know how your project works.**
8
+
9
+ sourcebook captures the project knowledge your team carries in its head — conventions, patterns, traps, and where things actually go — and turns it into context your coding agent can use.
8
10
 
9
11
  ```bash
10
12
  npx sourcebook init
11
13
  ```
12
14
 
13
- One command. Analyzes your codebase. Outputs a `CLAUDE.md` tuned for how your project actually works.
14
-
15
15
  <p align="center">
16
16
  <img src="demo.svg" alt="sourcebook demo" width="820" />
17
17
  </p>
18
18
 
19
+ > Tools like Repomix give AI your entire codebase. sourcebook gives it your project knowledge.
20
+
19
21
  ## Why
20
22
 
21
- AI coding agents spend most of their context window just orienting — reading files to build a mental model before doing real work. Developers manually write context files (`CLAUDE.md`, `.cursorrules`, `copilot-instructions.md`), but most are generic and go stale fast.
23
+ AI coding agents spend most of their context window orienting — reading files to build a mental model before doing real work. Most context files (`CLAUDE.md`, `.cursorrules`) are generic and go stale fast.
22
24
 
23
- Research shows auto-generated context that restates obvious information (tech stack, directory structure) actually makes agents [worse by 2-3%](https://arxiv.org/abs/2502.09601). The only context that helps is **non-discoverable information** — things agents can't figure out by reading the code alone.
25
+ Research shows auto-generated context that restates obvious information actually makes agents [worse by 2-3%](https://arxiv.org/abs/2502.09601). The only context that helps is **non-discoverable information** — the project knowledge agents can't figure out by reading code alone.
24
26
 
25
- sourcebook inverts the typical approach: instead of dumping everything, it extracts only what agents keep missing, filtered through a discoverability test.
27
+ sourcebook extracts only what agents keep missing: the conventions, hidden dependencies, fragile areas, and dominant patterns that live in your team's heads — not in the code.
26
28
 
27
29
  ## What It Finds
28
30
 
@@ -18,6 +18,10 @@ export declare function checkLicense(): Promise<LicenseInfo>;
18
18
  * Save a license key to disk.
19
19
  */
20
20
  export declare function saveLicenseKey(key: string): void;
21
+ /**
22
+ * Remove the license key from disk.
23
+ */
24
+ export declare function removeLicenseKey(): void;
21
25
  /**
22
26
  * Gate a feature behind Pro license.
23
27
  * Prints upgrade message and exits if not licensed.
@@ -44,12 +44,13 @@ export async function checkLicense() {
44
44
  catch {
45
45
  // Network error or timeout — fall back to cache or offline validation
46
46
  if (cached && cached.key === key) {
47
- return cached.info;
48
- }
49
- // Offline grace: if key looks valid (format check), allow Pro for 7 days
50
- if (isValidKeyFormat(key)) {
51
- return { valid: true, tier: "pro" };
47
+ // Only grant offline access if last validation was within 7 days
48
+ const OFFLINE_GRACE_MS = 7 * 24 * 60 * 60 * 1000;
49
+ if (Date.now() - cached.timestamp <= OFFLINE_GRACE_MS) {
50
+ return cached.info;
51
+ }
52
52
  }
53
+ // No valid cached validation within 7 days — deny access
53
54
  }
54
55
  return { valid: false, tier: "free" };
55
56
  }
@@ -58,9 +59,22 @@ export async function checkLicense() {
58
59
  */
59
60
  export function saveLicenseKey(key) {
60
61
  if (!fs.existsSync(LICENSE_DIR)) {
61
- fs.mkdirSync(LICENSE_DIR, { recursive: true });
62
+ fs.mkdirSync(LICENSE_DIR, { recursive: true, mode: 0o700 });
63
+ }
64
+ fs.writeFileSync(LICENSE_FILE, key.trim(), { encoding: "utf-8", mode: 0o600 });
65
+ }
66
+ /**
67
+ * Remove the license key from disk.
68
+ */
69
+ export function removeLicenseKey() {
70
+ try {
71
+ if (fs.existsSync(LICENSE_FILE)) {
72
+ fs.unlinkSync(LICENSE_FILE);
73
+ }
74
+ }
75
+ catch {
76
+ // ignore cleanup errors
62
77
  }
63
- fs.writeFileSync(LICENSE_FILE, key.trim(), "utf-8");
64
78
  }
65
79
  /**
66
80
  * Read the license key from disk.
@@ -93,10 +107,10 @@ function readCache() {
93
107
  }
94
108
  function writeCache(key, info) {
95
109
  if (!fs.existsSync(LICENSE_DIR)) {
96
- fs.mkdirSync(LICENSE_DIR, { recursive: true });
110
+ fs.mkdirSync(LICENSE_DIR, { recursive: true, mode: 0o700 });
97
111
  }
98
112
  const entry = { key, info, timestamp: Date.now() };
99
- fs.writeFileSync(CACHE_FILE, JSON.stringify(entry), "utf-8");
113
+ fs.writeFileSync(CACHE_FILE, JSON.stringify(entry), { encoding: "utf-8", mode: 0o600 });
100
114
  }
101
115
  function isCacheExpired(timestamp) {
102
116
  return Date.now() - timestamp > CACHE_TTL_MS;
package/dist/cli.js CHANGED
File without changes
@@ -1,5 +1,5 @@
1
1
  import chalk from "chalk";
2
- import { saveLicenseKey, checkLicense } from "../auth/license.js";
2
+ import { saveLicenseKey, removeLicenseKey, checkLicense } from "../auth/license.js";
3
3
  export async function activate(key) {
4
4
  if (!key || key.trim().length === 0) {
5
5
  console.log(chalk.red("\nNo license key provided."));
@@ -9,9 +9,9 @@ export async function activate(key) {
9
9
  }
10
10
  console.log(chalk.bold("\nsourcebook activate"));
11
11
  console.log(chalk.dim("Validating license key...\n"));
12
- // Save key first
12
+ // Validate first, only save if valid
13
+ // Temporarily save so checkLicense can read it, then remove if invalid
13
14
  saveLicenseKey(key);
14
- // Validate it
15
15
  const license = await checkLicense();
16
16
  if (license.tier === "pro" || license.tier === "team") {
17
17
  console.log(chalk.green("✓") +
@@ -30,9 +30,11 @@ export async function activate(key) {
30
30
  console.log("");
31
31
  }
32
32
  else {
33
+ // Validation failed — remove the saved key to prevent offline bypass
34
+ removeLicenseKey();
33
35
  console.log(chalk.yellow("⚠") +
34
- " License key saved but could not be validated.");
35
- console.log(chalk.dim(" This may be a network issue. The key will be re-validated on next use."));
36
+ " License key could not be validated and was not saved.");
37
+ console.log(chalk.dim(" This may be a network issue. Please try again when you have an internet connection."));
36
38
  console.log(chalk.dim(" If the problem persists, contact roy@maroond.ai\n"));
37
39
  }
38
40
  }
@@ -28,6 +28,8 @@ const SOURCEBOOK_HEADERS = new Set([
28
28
  "High-Impact Files",
29
29
  "Code Conventions",
30
30
  "Constraints",
31
+ "Quick Reference",
32
+ "Dominant Patterns",
31
33
  ]);
32
34
  /**
33
35
  * Re-analyze and regenerate context files while preserving manual edits.
@@ -1,5 +1,12 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
+ function safePath(dir, file) {
4
+ const resolved = path.resolve(path.join(dir, file));
5
+ if (!resolved.startsWith(path.resolve(dir) + path.sep) && resolved !== path.resolve(dir)) {
6
+ return null;
7
+ }
8
+ return resolved;
9
+ }
3
10
  export async function detectBuildCommands(dir) {
4
11
  const commands = {};
5
12
  // Check package.json scripts
@@ -1,5 +1,12 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
+ function safePath(dir, file) {
4
+ const resolved = path.resolve(path.join(dir, file));
5
+ if (!resolved.startsWith(path.resolve(dir) + path.sep) && resolved !== path.resolve(dir)) {
6
+ return null;
7
+ }
8
+ return resolved;
9
+ }
3
10
  export async function detectFrameworks(dir, files) {
4
11
  const detected = [];
5
12
  // Read all package.json files (root + workspaces/sub-packages)
@@ -8,7 +15,9 @@ export async function detectFrameworks(dir, files) {
8
15
  pkgFiles.push("package.json");
9
16
  const allDeps = {};
10
17
  for (const pkgFile of pkgFiles) {
11
- const pkgPath = path.join(dir, pkgFile);
18
+ const pkgPath = safePath(dir, pkgFile);
19
+ if (!pkgPath)
20
+ continue;
12
21
  if (fs.existsSync(pkgPath)) {
13
22
  try {
14
23
  const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
@@ -44,29 +53,31 @@ export async function detectFrameworks(dir, files) {
44
53
  // Check for next.config
45
54
  const nextConfig = files.find((f) => /^next\.config\.(js|mjs|ts)$/.test(f));
46
55
  if (nextConfig) {
47
- try {
48
- const configContent = fs.readFileSync(path.join(dir, nextConfig), "utf-8");
49
- if (configContent.includes("output:") && configContent.includes("standalone")) {
50
- findings.push({
51
- category: "Next.js deployment",
52
- description: "Standalone output mode is enabled. Build produces a self-contained server in .next/standalone.",
53
- confidence: "high",
54
- discoverable: false,
55
- });
56
+ const safeNextConfig = safePath(dir, nextConfig);
57
+ if (safeNextConfig)
58
+ try {
59
+ const configContent = fs.readFileSync(safeNextConfig, "utf-8");
60
+ if (configContent.includes("output:") && configContent.includes("standalone")) {
61
+ findings.push({
62
+ category: "Next.js deployment",
63
+ description: "Standalone output mode is enabled. Build produces a self-contained server in .next/standalone.",
64
+ confidence: "high",
65
+ discoverable: false,
66
+ });
67
+ }
68
+ if (configContent.includes("images") && configContent.includes("remotePatterns")) {
69
+ findings.push({
70
+ category: "Next.js images",
71
+ description: "Remote image patterns are configured. New image domains must be added to next.config before use.",
72
+ rationale: "Agents will try to use next/image with arbitrary URLs and get 400 errors without this config.",
73
+ confidence: "high",
74
+ discoverable: false,
75
+ });
76
+ }
56
77
  }
57
- if (configContent.includes("images") && configContent.includes("remotePatterns")) {
58
- findings.push({
59
- category: "Next.js images",
60
- description: "Remote image patterns are configured. New image domains must be added to next.config before use.",
61
- rationale: "Agents will try to use next/image with arbitrary URLs and get 400 errors without this config.",
62
- confidence: "high",
63
- discoverable: false,
64
- });
78
+ catch {
79
+ // can't read config
65
80
  }
66
- }
67
- catch {
68
- // can't read config
69
- }
70
81
  }
71
82
  detected.push({
72
83
  name: "Next.js",
@@ -157,7 +168,10 @@ export async function detectFrameworks(dir, files) {
157
168
  if (hasTwConfig) {
158
169
  try {
159
170
  const configPath = files.find((f) => /^tailwind\.config\.(js|ts|mjs|cjs)$/.test(f));
160
- const content = fs.readFileSync(path.join(dir, configPath), "utf-8");
171
+ const safeConfigPath = safePath(dir, configPath);
172
+ if (!safeConfigPath)
173
+ throw new Error("path escape");
174
+ const content = fs.readFileSync(safeConfigPath, "utf-8");
161
175
  if (content.includes("extend") && content.includes("colors")) {
162
176
  findings.push({
163
177
  category: "Tailwind",
@@ -204,9 +218,7 @@ export async function detectFrameworks(dir, files) {
204
218
  }
205
219
  const paths = tsconfig?.compilerOptions?.paths;
206
220
  if (paths) {
207
- const aliases = Object.keys(paths)
208
- .map((k) => k.replace("/*", ""))
209
- .join(", ");
221
+ const aliases = [...new Set(Object.keys(paths).map((k) => k.replace("/*", "")))].join(", ");
210
222
  findings.push({
211
223
  category: "TypeScript imports",
212
224
  description: `Path aliases configured: ${aliases}. Use these instead of relative imports.`,
@@ -1,4 +1,4 @@
1
- import { execSync } from "node:child_process";
1
+ import { execFileSync } from "node:child_process";
2
2
  import path from "node:path";
3
3
  /**
4
4
  * Mine git history for non-obvious context:
@@ -33,7 +33,7 @@ export async function analyzeGitHistory(dir) {
33
33
  }
34
34
  function isGitRepo(dir) {
35
35
  try {
36
- execSync("git rev-parse --is-inside-work-tree", {
36
+ execFileSync("git", ["rev-parse", "--is-inside-work-tree"], {
37
37
  cwd: dir,
38
38
  stdio: "pipe",
39
39
  });
@@ -45,7 +45,7 @@ function isGitRepo(dir) {
45
45
  }
46
46
  function git(dir, args) {
47
47
  try {
48
- return execSync(`git ${args}`, {
48
+ return execFileSync("git", args, {
49
49
  cwd: dir,
50
50
  stdio: "pipe",
51
51
  maxBuffer: 10 * 1024 * 1024,
@@ -60,16 +60,21 @@ function git(dir, args) {
60
60
  */
61
61
  function detectRevertedPatterns(dir, revertedPatterns) {
62
62
  const findings = [];
63
- const revertLog = git(dir, 'log --grep="^Revert" --oneline --since="1 year ago" -50');
63
+ const revertLog = git(dir, ["log", "--grep=^Revert", "--oneline", "--since=1 year ago", "-50"]);
64
64
  if (!revertLog.trim())
65
65
  return findings;
66
66
  const reverts = revertLog.trim().split("\n").filter(Boolean);
67
67
  if (reverts.length >= 2) {
68
68
  // Extract what was reverted
69
69
  const revertDescriptions = [];
70
+ const REVERT_NOISE = [
71
+ /\.yml$/i, /\.yaml$/i, /scorecard/i, /dependabot/i,
72
+ /^update /i, /^bump /i, /^deps/i, /^ci:/i, /^build:/i,
73
+ /^chore\(deps\)/i, /^chore\(release\)/i,
74
+ ];
70
75
  for (const line of reverts.slice(0, 10)) {
71
76
  const match = line.match(/^[a-f0-9]+ Revert "(.+)"/);
72
- if (match) {
77
+ if (match && !REVERT_NOISE.some(n => n.test(match[1]))) {
73
78
  revertDescriptions.push(match[1]);
74
79
  revertedPatterns.push(match[1]);
75
80
  }
@@ -94,7 +99,7 @@ function detectRevertedPatterns(dir, revertedPatterns) {
94
99
  function detectAntiPatterns(dir) {
95
100
  const findings = [];
96
101
  // Extract detailed info from reverted commits
97
- const revertLog = git(dir, 'log --grep="^Revert" --format="%s" --since="1 year ago" -20');
102
+ const revertLog = git(dir, ["log", "--grep=^Revert", "--format=%s", "--since=1 year ago", "-20"]);
98
103
  if (revertLog.trim()) {
99
104
  const antiPatterns = [];
100
105
  for (const line of revertLog.trim().split("\n").filter(Boolean)) {
@@ -103,8 +108,15 @@ function detectAntiPatterns(dir) {
103
108
  antiPatterns.push(match[1]);
104
109
  }
105
110
  }
106
- if (antiPatterns.length > 0) {
107
- for (const pattern of antiPatterns.slice(0, 5)) {
111
+ // Filter out noise: CI config, deps, version bumps
112
+ const REVERT_NOISE = [
113
+ /\.yml$/i, /\.yaml$/i, /scorecard/i, /dependabot/i,
114
+ /^update /i, /^bump /i, /^deps/i, /^ci:/i, /^build:/i,
115
+ /^chore\(deps\)/i, /^chore\(release\)/i,
116
+ ];
117
+ const meaningful = antiPatterns.filter(p => !REVERT_NOISE.some(n => n.test(p)));
118
+ if (meaningful.length > 0) {
119
+ for (const pattern of meaningful.slice(0, 5)) {
108
120
  findings.push({
109
121
  category: "Anti-patterns",
110
122
  description: `Tried and reverted: "${pattern}". This approach was explicitly rejected.`,
@@ -116,13 +128,13 @@ function detectAntiPatterns(dir) {
116
128
  }
117
129
  }
118
130
  // Detect files deleted in bulk (abandoned features/approaches)
119
- const deletedLog = git(dir, 'log --diff-filter=D --name-only --pretty=format:"COMMIT %s" --since="6 months ago" -50');
131
+ const deletedLog = git(dir, ["log", "--diff-filter=D", "--name-only", "--pretty=format:COMMIT %s", "--since=6 months ago", "-50"]);
120
132
  if (deletedLog.trim()) {
121
133
  const deletionBatches = [];
122
134
  let currentMessage = "";
123
135
  let currentFiles = [];
124
136
  for (const line of deletedLog.split("\n")) {
125
- const commitMatch = line.match(/^"?COMMIT (.+)"?$/);
137
+ const commitMatch = line.match(/^COMMIT (.+)$/);
126
138
  if (commitMatch) {
127
139
  if (currentFiles.length >= 3) {
128
140
  deletionBatches.push({ message: currentMessage, files: currentFiles });
@@ -137,8 +149,22 @@ function detectAntiPatterns(dir) {
137
149
  if (currentFiles.length >= 3) {
138
150
  deletionBatches.push({ message: currentMessage, files: currentFiles });
139
151
  }
152
+ // Filter out release/changeset/version commits and revert-of-revert noise
153
+ const NOISE_PATTERNS = [
154
+ /^chore\(release\)/i,
155
+ /^\[ci\] release/i,
156
+ /^version packages/i,
157
+ /^changeset/i,
158
+ /^bump/i,
159
+ /^release/i,
160
+ /^Revert "Revert/i,
161
+ /^merge/i,
162
+ /^ci:/i,
163
+ /^build:/i,
164
+ /^Revert /i,
165
+ ];
140
166
  // Only report significant deletions (3+ files in one commit = abandoned feature)
141
- for (const batch of deletionBatches.slice(0, 3)) {
167
+ for (const batch of deletionBatches.filter(b => !NOISE_PATTERNS.some(p => p.test(b.message))).slice(0, 3)) {
142
168
  if (batch.files.length >= 3) {
143
169
  const fileList = batch.files.slice(0, 3).map((f) => path.basename(f)).join(", ");
144
170
  findings.push({
@@ -159,7 +185,7 @@ function detectAntiPatterns(dir) {
159
185
  function detectActiveAreas(dir, activeAreas) {
160
186
  const findings = [];
161
187
  // Get files changed in the last 30 days, count changes per directory
162
- const recentChanges = git(dir, 'log --since="30 days ago" --name-only --pretty=format: --diff-filter=AMRC');
188
+ const recentChanges = git(dir, ["log", "--since=30 days ago", "--name-only", "--pretty=format:", "--diff-filter=AMRC"]);
163
189
  if (!recentChanges.trim())
164
190
  return findings;
165
191
  const dirCounts = new Map();
@@ -198,14 +224,14 @@ function detectActiveAreas(dir, activeAreas) {
198
224
  function detectCoChangeCoupling(dir, clusters) {
199
225
  const findings = [];
200
226
  // Get the last 200 commits with their changed files
201
- const log = git(dir, 'log --name-only --pretty=format:"COMMIT" --since="6 months ago" -200');
227
+ const log = git(dir, ["log", "--name-only", "--pretty=format:COMMIT", "--since=6 months ago", "-200"]);
202
228
  if (!log.trim())
203
229
  return findings;
204
230
  // Parse commits into file groups
205
231
  const commits = [];
206
232
  let current = [];
207
233
  for (const line of log.split("\n")) {
208
- if (line.trim() === '"COMMIT"' || line.trim() === "COMMIT") {
234
+ if (line.trim() === "COMMIT") {
209
235
  if (current.length > 0)
210
236
  commits.push(current);
211
237
  current = [];
@@ -293,7 +319,7 @@ function detectCoChangeCoupling(dir, clusters) {
293
319
  function detectRapidReEdits(dir) {
294
320
  const findings = [];
295
321
  // Get files with high commit frequency in short windows
296
- const log = git(dir, 'log --format="%H %aI" --name-only --since="3 months ago" -300');
322
+ const log = git(dir, ["log", "--format=%H %aI", "--name-only", "--since=3 months ago", "-300"]);
297
323
  if (!log.trim())
298
324
  return findings;
299
325
  // Track edits per file with timestamps
@@ -313,9 +339,17 @@ function detectRapidReEdits(dir) {
313
339
  }
314
340
  // Find files edited 5+ times within a 7-day window
315
341
  const churnyFiles = [];
342
+ // Filter out non-source files that naturally churn
343
+ const NON_SOURCE_PATTERNS = [
344
+ /\.md$/i, /\.mdx$/i, /\.rst$/i, /\.txt$/i, /\.json$/i, /\.ya?ml$/i, /\.lock$/i, /\.log$/i,
345
+ /CHANGELOG/i, /\.env/, /\.generated\./, /\.config\./,
346
+ /\.github\//, /\.claude\//, /dashboard\//, /ops\//,
347
+ ];
316
348
  for (const [file, dates] of fileEdits) {
317
349
  if (dates.length < 5)
318
350
  continue;
351
+ if (NON_SOURCE_PATTERNS.some((p) => p.test(file)))
352
+ continue;
319
353
  // Sort dates
320
354
  dates.sort((a, b) => a.getTime() - b.getTime());
321
355
  // Sliding window: find any 7-day window with 5+ edits
@@ -354,7 +388,7 @@ function detectRapidReEdits(dir) {
354
388
  */
355
389
  function detectCommitPatterns(dir) {
356
390
  const findings = [];
357
- const log = git(dir, 'log --oneline --since="6 months ago" -200');
391
+ const log = git(dir, ["log", "--oneline", "--since=6 months ago", "-200"]);
358
392
  if (!log.trim())
359
393
  return findings;
360
394
  const messages = log.trim().split("\n").filter(Boolean);
@@ -377,7 +411,7 @@ function detectCommitPatterns(dir) {
377
411
  .map(([scope]) => scope);
378
412
  findings.push({
379
413
  category: "Commit conventions",
380
- description: `Uses Conventional Commits (feat/fix/docs/etc). ${topScopes.length > 0 ? `Common scopes: ${topScopes.join(", ")}` : ""}. Follow this pattern for new commits.`,
414
+ description: `Uses Conventional Commits (feat/fix/docs/etc).${topScopes.length > 0 ? ` Common scopes: ${topScopes.join(", ")}.` : ""} Follow this pattern for new commits.`,
381
415
  confidence: "high",
382
416
  discoverable: false,
383
417
  });
@@ -1,5 +1,12 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
+ function safePath(dir, file) {
4
+ const resolved = path.resolve(path.join(dir, file));
5
+ if (!resolved.startsWith(path.resolve(dir) + path.sep) && resolved !== path.resolve(dir)) {
6
+ return null;
7
+ }
8
+ return resolved;
9
+ }
3
10
  /**
4
11
  * Build an import/dependency graph and run PageRank to identify
5
12
  * the most structurally important files. Conventions found in
@@ -16,7 +23,9 @@ export async function analyzeImportGraph(dir, files) {
16
23
  const edges = [];
17
24
  const fileSet = new Set(sourceFiles);
18
25
  for (const file of sourceFiles) {
19
- const filePath = path.join(dir, file);
26
+ const filePath = safePath(dir, file);
27
+ if (!filePath)
28
+ continue;
20
29
  let content;
21
30
  try {
22
31
  content = fs.readFileSync(filePath, "utf-8");
@@ -28,6 +28,7 @@ export async function scanProject(dir) {
28
28
  nodir: true,
29
29
  ignore: IGNORE_PATTERNS,
30
30
  dot: true,
31
+ follow: false,
31
32
  });
32
33
  // Detect languages from file extensions
33
34
  const languages = detectLanguages(files);
@@ -1,5 +1,12 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
+ function safePath(dir, file) {
4
+ const resolved = path.resolve(path.join(dir, file));
5
+ if (!resolved.startsWith(path.resolve(dir) + path.sep) && resolved !== path.resolve(dir)) {
6
+ return null;
7
+ }
8
+ return resolved;
9
+ }
3
10
  /**
4
11
  * Detect code patterns and conventions that are non-obvious.
5
12
  * This is the core intelligence layer -- finding things agents miss.
@@ -17,8 +24,11 @@ export async function detectPatterns(dir, files, frameworks) {
17
24
  const sampled = sampleFiles(sourceFiles, 50);
18
25
  const fileContents = new Map();
19
26
  for (const file of sampled) {
27
+ const safe = safePath(dir, file);
28
+ if (!safe)
29
+ continue;
20
30
  try {
21
- const content = fs.readFileSync(path.join(dir, file), "utf-8");
31
+ const content = fs.readFileSync(safe, "utf-8");
22
32
  fileContents.set(file, content);
23
33
  }
24
34
  catch {
@@ -54,8 +64,11 @@ function sampleFiles(files, maxCount) {
54
64
  f.includes("layout.") ||
55
65
  f.includes("middleware."));
56
66
  const rest = files.filter((f) => !priority.includes(f));
57
- const shuffled = rest.sort(() => Math.random() - 0.5);
58
- return [...priority, ...shuffled].slice(0, maxCount);
67
+ // Deterministic sampling: sort by path, take evenly spaced files
68
+ const sorted = rest.sort();
69
+ const step = Math.max(1, Math.floor(sorted.length / Math.max(1, maxCount - priority.length)));
70
+ const sampled = sorted.filter((_, i) => i % step === 0);
71
+ return [...priority, ...sampled].slice(0, maxCount);
59
72
  }
60
73
  function detectBarrelExports(files, contents) {
61
74
  const indexFiles = files.filter((f) => path.basename(f).startsWith("index.") && !f.includes("node_modules"));
@@ -275,8 +288,11 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
275
288
  const allContents = new Map(contents);
276
289
  for (const file of extraSample) {
277
290
  if (!allContents.has(file)) {
291
+ const safe = safePath(dir, file);
292
+ if (!safe)
293
+ continue;
278
294
  try {
279
- const content = fs.readFileSync(path.join(dir, file), "utf-8");
295
+ const content = fs.readFileSync(safe, "utf-8");
280
296
  allContents.set(file, content);
281
297
  }
282
298
  catch { /* skip */ }
@@ -305,7 +321,25 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
305
321
  }
306
322
  }
307
323
  }
308
- const dominantI18n = i18nPatterns.filter((p) => p.count >= 3).sort((a, b) => b.count - a.count);
324
+ // Filter: if only t() matched, require corroborating evidence (i18n files or packages)
325
+ const hasI18nFiles = files.some((f) => f.includes("locale") || f.includes("i18n") || f.includes("translations") || f.includes("messages/"));
326
+ let hasI18nPackage = false;
327
+ for (const [f, c] of allContents) {
328
+ if (f.endsWith("package.json") && (c.includes("i18next") || c.includes("react-intl") || c.includes("next-intl") || c.includes("@lingui"))) {
329
+ hasI18nPackage = true;
330
+ break;
331
+ }
332
+ }
333
+ const dominantI18n = i18nPatterns
334
+ .filter((p) => {
335
+ if (p.count < 3)
336
+ return false;
337
+ // t() alone is too generic — require corroborating evidence
338
+ if (p.hook === 't("key")' && !hasI18nFiles && !hasI18nPackage)
339
+ return false;
340
+ return true;
341
+ })
342
+ .sort((a, b) => b.count - a.count);
309
343
  if (dominantI18n.length > 0) {
310
344
  const primary = dominantI18n[0];
311
345
  let desc = `User-facing strings use ${primary.hook} for internationalization.`;
@@ -332,10 +366,10 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
332
366
  // 2. ROUTING / API PATTERNS
333
367
  // ========================================
334
368
  const routerPatterns = [
335
- { pattern: "trpc\\.router|createTRPCRouter|t\\.router", name: "tRPC routers", count: 0 },
369
+ { pattern: "trpc\\.router|createTRPCRouter|from ['\"]@trpc", name: "tRPC routers", count: 0 },
336
370
  { pattern: "express\\.Router|router\\.get|router\\.post", name: "Express routers", count: 0 },
337
371
  { pattern: "app\\.get\\(|app\\.post\\(|app\\.put\\(", name: "Express app routes", count: 0 },
338
- { pattern: "Hono|app\\.route\\(|c\\.json\\(", name: "Hono routes", count: 0 },
372
+ { pattern: "new Hono|from ['\"]hono['\"]", name: "Hono routes", count: 0 },
339
373
  { pattern: "FastAPI|@app\\.(get|post|put|delete)", name: "FastAPI endpoints", count: 0 },
340
374
  { pattern: "flask\\.route|@app\\.route", name: "Flask routes", count: 0 },
341
375
  { pattern: "gin\\.Engine|r\\.GET|r\\.POST", name: "Gin routes", count: 0 },
@@ -364,7 +398,7 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
364
398
  // ========================================
365
399
  const schemaPatterns = [
366
400
  { pattern: "z\\.object|z\\.string|z\\.number", name: "Zod", usage: "Use Zod schemas for validation", count: 0 },
367
- { pattern: "BaseModel|Field\\(", name: "Pydantic", usage: "Use Pydantic BaseModel for data classes", count: 0 },
401
+ { pattern: "class\\s+\\w+\\(BaseModel\\)|from pydantic", name: "Pydantic", usage: "Use Pydantic BaseModel for data classes", count: 0 },
368
402
  { pattern: "Joi\\.object|Joi\\.string", name: "Joi", usage: "Use Joi schemas for validation", count: 0 },
369
403
  { pattern: "yup\\.object|yup\\.string", name: "Yup", usage: "Use Yup schemas for validation", count: 0 },
370
404
  { pattern: "class.*Serializer.*:|serializers\\.Serializer", name: "Django serializers", usage: "Use Django REST serializers for API data", count: 0 },
@@ -420,7 +454,7 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
420
454
  // 5. TESTING PATTERNS
421
455
  // ========================================
422
456
  const testPatterns = [
423
- { pattern: "describe\\(|it\\(|test\\(", name: "Jest/Vitest", count: 0 },
457
+ { pattern: "describe\\(|it\\(|test\\(", name: "_generic_test", count: 0 },
424
458
  { pattern: "def test_|class Test|pytest", name: "pytest", count: 0 },
425
459
  { pattern: "func Test.*\\(t \\*testing\\.T\\)", name: "Go testing", count: 0 },
426
460
  { pattern: "expect\\(.*\\)\\.to", name: "Chai/expect", count: 0 },
@@ -432,8 +466,11 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
432
466
  .slice(0, 10);
433
467
  for (const file of testSampled) {
434
468
  if (!allContents.has(file)) {
469
+ const safe = safePath(dir, file);
470
+ if (!safe)
471
+ continue;
435
472
  try {
436
- const content = fs.readFileSync(path.join(dir, file), "utf-8");
473
+ const content = fs.readFileSync(safe, "utf-8");
437
474
  allContents.set(file, content);
438
475
  }
439
476
  catch { /* skip */ }
@@ -450,7 +487,46 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
450
487
  }
451
488
  const dominantTest = testPatterns.filter((p) => p.count >= 2).sort((a, b) => b.count - a.count);
452
489
  if (dominantTest.length > 0) {
453
- const primary = dominantTest[0];
490
+ let primary = dominantTest[0];
491
+ // Disambiguate generic test pattern by checking package.json devDependencies
492
+ if (primary.name === "_generic_test") {
493
+ let pkgContent = allContents.get("package.json") || "";
494
+ if (!pkgContent) {
495
+ const pkgPath = safePath(dir, "package.json");
496
+ if (pkgPath) {
497
+ try {
498
+ pkgContent = fs.readFileSync(pkgPath, "utf-8");
499
+ }
500
+ catch { /* skip */ }
501
+ }
502
+ }
503
+ if (pkgContent.includes('"vitest"')) {
504
+ primary = { ...primary, name: "Vitest" };
505
+ }
506
+ else if (pkgContent.includes('"jest"') || pkgContent.includes('"@jest/')) {
507
+ primary = { ...primary, name: "Jest" };
508
+ }
509
+ else if (pkgContent.includes('"mocha"')) {
510
+ primary = { ...primary, name: "Mocha" };
511
+ }
512
+ else if (pkgContent.includes('"jasmine"')) {
513
+ primary = { ...primary, name: "Jasmine" };
514
+ }
515
+ else {
516
+ // Check for Deno (deno.json/deno.jsonc) or Bun (bun.lockb)
517
+ const hasDeno = files.some(f => f === "deno.json" || f === "deno.jsonc" || f === "deno.lock");
518
+ const hasBun = files.some(f => f === "bun.lockb" || f === "bunfig.toml");
519
+ if (hasDeno) {
520
+ primary = { ...primary, name: "Deno test" };
521
+ }
522
+ else if (hasBun) {
523
+ primary = { ...primary, name: "Bun test" };
524
+ }
525
+ else {
526
+ primary = { ...primary, name: "Jest" }; // default for JS/TS projects
527
+ }
528
+ }
529
+ }
454
530
  // Also detect common test utilities/helpers
455
531
  const testHelperFiles = files.filter((f) => (f.includes("test-utils") || f.includes("testUtils") || f.includes("fixtures") || f.includes("helpers")) &&
456
532
  (f.includes("test") || f.includes("spec")));
@@ -511,9 +587,9 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
511
587
  // 7. STYLING CONVENTIONS
512
588
  // ========================================
513
589
  const stylePatterns = [
514
- { pattern: "className=|class=.*tw-", name: "Tailwind CSS", desc: "Styling uses Tailwind CSS utility classes", count: 0 },
515
- { pattern: "styled\\.|styled\\(|css`", name: "styled-components/Emotion", desc: "Styling uses CSS-in-JS (styled-components or Emotion)", count: 0 },
516
- { pattern: "styles\\.\\w+|from.*\\.module\\.(css|scss)", name: "CSS Modules", desc: "Styling uses CSS Modules (*.module.css)", count: 0 },
590
+ { pattern: "class=.*tw-|className=[\"'](?:flex |grid |p-|m-|text-|bg-|border-|rounded-|shadow-|w-|h-)", name: "Tailwind CSS", desc: "Styling uses Tailwind CSS utility classes", count: 0 },
591
+ { pattern: "from ['\"]styled-components|from ['\"]@emotion|styled\\.|styled\\(", name: "styled-components/Emotion", desc: "Styling uses CSS-in-JS (styled-components or Emotion)", count: 0 },
592
+ { pattern: "from.*\\.module\\.(css|scss)", name: "CSS Modules", desc: "Styling uses CSS Modules (*.module.css)", count: 0 },
517
593
  ];
518
594
  for (const [f, content] of allContents) {
519
595
  for (const p of stylePatterns) {
@@ -530,13 +606,15 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
530
606
  if (primary.name === "Tailwind CSS") {
531
607
  const twConfig = files.find((f) => f.includes("tailwind.config"));
532
608
  if (twConfig) {
533
- try {
534
- const configContent = fs.readFileSync(path.join(dir, twConfig), "utf-8");
535
- if (configContent.includes("colors") || configContent.includes("extend")) {
536
- desc += ` Custom design tokens defined in ${twConfig} — use these instead of arbitrary values.`;
609
+ const safeTw = safePath(dir, twConfig);
610
+ if (safeTw)
611
+ try {
612
+ const configContent = fs.readFileSync(safeTw, "utf-8");
613
+ if (configContent.includes("colors") || configContent.includes("extend")) {
614
+ desc += ` Custom design tokens defined in ${twConfig} — use these instead of arbitrary values.`;
615
+ }
537
616
  }
538
- }
539
- catch { /* skip */ }
617
+ catch { /* skip */ }
540
618
  }
541
619
  }
542
620
  findings.push({
@@ -627,13 +705,15 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
627
705
  if (dominantRouter.length > 0) {
628
706
  const routeDirs = files
629
707
  .filter((f) => (f.includes("routes") || f.includes("routers") || f.includes("api/") || f.includes("app/api/")) &&
630
- !f.includes("node_modules") && !f.includes(".test.") &&
708
+ !f.includes("node_modules") && !f.includes(".test.") && !f.includes(".spec.") &&
709
+ !f.includes("test/") && !f.includes("tests/") && !f.includes("__test") &&
710
+ !f.includes("fixture") && !f.includes("mock") &&
631
711
  (f.endsWith(".ts") || f.endsWith(".js") || f.endsWith(".py") || f.endsWith(".go")))
632
712
  .map((f) => {
633
713
  const parts = f.split("/");
634
- // Get the directory containing route files
635
714
  return parts.slice(0, -1).join("/");
636
715
  })
716
+ .filter((v) => v && v !== "." && v.length > 0) // filter empty/root paths
637
717
  .filter((v, i, a) => a.indexOf(v) === i)
638
718
  .slice(0, 3);
639
719
  if (routeDirs.length > 0) {
@@ -2,9 +2,13 @@ import fs from "node:fs";
2
2
  import path from "node:path";
3
3
  export async function writeOutput(dir, filename, content) {
4
4
  const filePath = path.join(dir, filename);
5
- const parentDir = path.dirname(filePath);
5
+ const resolved = path.resolve(filePath);
6
+ if (!resolved.startsWith(path.resolve(dir) + path.sep)) {
7
+ throw new Error(`Output path escapes target directory: ${filename}`);
8
+ }
9
+ const parentDir = path.dirname(resolved);
6
10
  if (!fs.existsSync(parentDir)) {
7
11
  fs.mkdirSync(parentDir, { recursive: true });
8
12
  }
9
- fs.writeFileSync(filePath, content, "utf-8");
13
+ fs.writeFileSync(resolved, content, "utf-8");
10
14
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sourcebook",
3
- "version": "0.5.0",
3
+ "version": "0.5.2",
4
4
  "description": "Extract the conventions, constraints, and architectural truths your AI coding agents keep missing.",
5
5
  "type": "module",
6
6
  "bin": {