sourcebook 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,25 +4,27 @@
4
4
 
5
5
  # sourcebook
6
6
 
7
- Generate AI context files from your codebase's actual conventions. Not what agents already know what they keep missing.
7
+ **AI can read your code. It still doesn't know how your project works.**
8
+
9
+ sourcebook captures the project knowledge your team carries in its head — conventions, patterns, traps, and where things actually go — and turns it into context your coding agent can use.
8
10
 
9
11
  ```bash
10
12
  npx sourcebook init
11
13
  ```
12
14
 
13
- One command. Analyzes your codebase. Outputs a `CLAUDE.md` tuned for how your project actually works.
14
-
15
15
  <p align="center">
16
16
  <img src="demo.svg" alt="sourcebook demo" width="820" />
17
17
  </p>
18
18
 
19
+ > Tools like Repomix give AI your entire codebase. sourcebook gives it your project knowledge.
20
+
19
21
  ## Why
20
22
 
21
- AI coding agents spend most of their context window just orienting — reading files to build a mental model before doing real work. Developers manually write context files (`CLAUDE.md`, `.cursorrules`, `copilot-instructions.md`), but most are generic and go stale fast.
23
+ AI coding agents spend most of their context window orienting — reading files to build a mental model before doing real work. Most context files (`CLAUDE.md`, `.cursorrules`) are generic and go stale fast.
22
24
 
23
- Research shows auto-generated context that restates obvious information (tech stack, directory structure) actually makes agents [worse by 2-3%](https://arxiv.org/abs/2502.09601). The only context that helps is **non-discoverable information** — things agents can't figure out by reading the code alone.
25
+ Research shows auto-generated context that restates obvious information actually makes agents [worse by 2-3%](https://arxiv.org/abs/2502.09601). The only context that helps is **non-discoverable information** — the project knowledge agents can't figure out by reading code alone.
24
26
 
25
- sourcebook inverts the typical approach: instead of dumping everything, it extracts only what agents keep missing, filtered through a discoverability test.
27
+ sourcebook extracts only what agents keep missing: the conventions, hidden dependencies, fragile areas, and dominant patterns that live in your team's heads — not in the code.
26
28
 
27
29
  ## What It Finds
28
30
 
package/dist/cli.js CHANGED
File without changes
@@ -28,6 +28,8 @@ const SOURCEBOOK_HEADERS = new Set([
28
28
  "High-Impact Files",
29
29
  "Code Conventions",
30
30
  "Constraints",
31
+ "Quick Reference",
32
+ "Dominant Patterns",
31
33
  ]);
32
34
  /**
33
35
  * Re-analyze and regenerate context files while preserving manual edits.
@@ -218,9 +218,7 @@ export async function detectFrameworks(dir, files) {
218
218
  }
219
219
  const paths = tsconfig?.compilerOptions?.paths;
220
220
  if (paths) {
221
- const aliases = Object.keys(paths)
222
- .map((k) => k.replace("/*", ""))
223
- .join(", ");
221
+ const aliases = [...new Set(Object.keys(paths).map((k) => k.replace("/*", "")))].join(", ");
224
222
  findings.push({
225
223
  category: "TypeScript imports",
226
224
  description: `Path aliases configured: ${aliases}. Use these instead of relative imports.`,
@@ -67,9 +67,14 @@ function detectRevertedPatterns(dir, revertedPatterns) {
67
67
  if (reverts.length >= 2) {
68
68
  // Extract what was reverted
69
69
  const revertDescriptions = [];
70
+ const REVERT_NOISE = [
71
+ /\.yml$/i, /\.yaml$/i, /scorecard/i, /dependabot/i,
72
+ /^update /i, /^bump /i, /^deps/i, /^ci:/i, /^build:/i,
73
+ /^chore\(deps\)/i, /^chore\(release\)/i,
74
+ ];
70
75
  for (const line of reverts.slice(0, 10)) {
71
76
  const match = line.match(/^[a-f0-9]+ Revert "(.+)"/);
72
- if (match) {
77
+ if (match && !REVERT_NOISE.some(n => n.test(match[1]))) {
73
78
  revertDescriptions.push(match[1]);
74
79
  revertedPatterns.push(match[1]);
75
80
  }
@@ -103,8 +108,15 @@ function detectAntiPatterns(dir) {
103
108
  antiPatterns.push(match[1]);
104
109
  }
105
110
  }
106
- if (antiPatterns.length > 0) {
107
- for (const pattern of antiPatterns.slice(0, 5)) {
111
+ // Filter out noise: CI config, deps, version bumps
112
+ const REVERT_NOISE = [
113
+ /\.yml$/i, /\.yaml$/i, /scorecard/i, /dependabot/i,
114
+ /^update /i, /^bump /i, /^deps/i, /^ci:/i, /^build:/i,
115
+ /^chore\(deps\)/i, /^chore\(release\)/i,
116
+ ];
117
+ const meaningful = antiPatterns.filter(p => !REVERT_NOISE.some(n => n.test(p)));
118
+ if (meaningful.length > 0) {
119
+ for (const pattern of meaningful.slice(0, 5)) {
108
120
  findings.push({
109
121
  category: "Anti-patterns",
110
122
  description: `Tried and reverted: "${pattern}". This approach was explicitly rejected.`,
@@ -137,8 +149,22 @@ function detectAntiPatterns(dir) {
137
149
  if (currentFiles.length >= 3) {
138
150
  deletionBatches.push({ message: currentMessage, files: currentFiles });
139
151
  }
152
+ // Filter out release/changeset/version commits and revert-of-revert noise
153
+ const NOISE_PATTERNS = [
154
+ /^chore\(release\)/i,
155
+ /^\[ci\] release/i,
156
+ /^version packages/i,
157
+ /^changeset/i,
158
+ /^bump/i,
159
+ /^release/i,
160
+ /^Revert "Revert/i,
161
+ /^merge/i,
162
+ /^ci:/i,
163
+ /^build:/i,
164
+ /^Revert /i,
165
+ ];
140
166
  // Only report significant deletions (3+ files in one commit = abandoned feature)
141
- for (const batch of deletionBatches.slice(0, 3)) {
167
+ for (const batch of deletionBatches.filter(b => !NOISE_PATTERNS.some(p => p.test(b.message))).slice(0, 3)) {
142
168
  if (batch.files.length >= 3) {
143
169
  const fileList = batch.files.slice(0, 3).map((f) => path.basename(f)).join(", ");
144
170
  findings.push({
@@ -313,9 +339,17 @@ function detectRapidReEdits(dir) {
313
339
  }
314
340
  // Find files edited 5+ times within a 7-day window
315
341
  const churnyFiles = [];
342
+ // Filter out non-source files that naturally churn
343
+ const NON_SOURCE_PATTERNS = [
344
+ /\.md$/i, /\.mdx$/i, /\.rst$/i, /\.txt$/i, /\.json$/i, /\.ya?ml$/i, /\.lock$/i, /\.log$/i,
345
+ /CHANGELOG/i, /\.env/, /\.generated\./, /\.config\./,
346
+ /\.github\//, /\.claude\//, /dashboard\//, /ops\//,
347
+ ];
316
348
  for (const [file, dates] of fileEdits) {
317
349
  if (dates.length < 5)
318
350
  continue;
351
+ if (NON_SOURCE_PATTERNS.some((p) => p.test(file)))
352
+ continue;
319
353
  // Sort dates
320
354
  dates.sort((a, b) => a.getTime() - b.getTime());
321
355
  // Sliding window: find any 7-day window with 5+ edits
@@ -377,7 +411,7 @@ function detectCommitPatterns(dir) {
377
411
  .map(([scope]) => scope);
378
412
  findings.push({
379
413
  category: "Commit conventions",
380
- description: `Uses Conventional Commits (feat/fix/docs/etc). ${topScopes.length > 0 ? `Common scopes: ${topScopes.join(", ")}` : ""}. Follow this pattern for new commits.`,
414
+ description: `Uses Conventional Commits (feat/fix/docs/etc).${topScopes.length > 0 ? ` Common scopes: ${topScopes.join(", ")}.` : ""} Follow this pattern for new commits.`,
381
415
  confidence: "high",
382
416
  discoverable: false,
383
417
  });
@@ -64,8 +64,11 @@ function sampleFiles(files, maxCount) {
64
64
  f.includes("layout.") ||
65
65
  f.includes("middleware."));
66
66
  const rest = files.filter((f) => !priority.includes(f));
67
- const shuffled = rest.sort(() => Math.random() - 0.5);
68
- return [...priority, ...shuffled].slice(0, maxCount);
67
+ // Deterministic sampling: sort by path, take evenly spaced files
68
+ const sorted = rest.sort();
69
+ const step = Math.max(1, Math.floor(sorted.length / Math.max(1, maxCount - priority.length)));
70
+ const sampled = sorted.filter((_, i) => i % step === 0);
71
+ return [...priority, ...sampled].slice(0, maxCount);
69
72
  }
70
73
  function detectBarrelExports(files, contents) {
71
74
  const indexFiles = files.filter((f) => path.basename(f).startsWith("index.") && !f.includes("node_modules"));
@@ -318,7 +321,25 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
318
321
  }
319
322
  }
320
323
  }
321
- const dominantI18n = i18nPatterns.filter((p) => p.count >= 3).sort((a, b) => b.count - a.count);
324
+ // Filter: if only t() matched, require corroborating evidence (i18n files or packages)
325
+ const hasI18nFiles = files.some((f) => f.includes("locale") || f.includes("i18n") || f.includes("translations") || f.includes("messages/"));
326
+ let hasI18nPackage = false;
327
+ for (const [f, c] of allContents) {
328
+ if (f.endsWith("package.json") && (c.includes("i18next") || c.includes("react-intl") || c.includes("next-intl") || c.includes("@lingui"))) {
329
+ hasI18nPackage = true;
330
+ break;
331
+ }
332
+ }
333
+ const dominantI18n = i18nPatterns
334
+ .filter((p) => {
335
+ if (p.count < 3)
336
+ return false;
337
+ // t() alone is too generic — require corroborating evidence
338
+ if (p.hook === 't("key")' && !hasI18nFiles && !hasI18nPackage)
339
+ return false;
340
+ return true;
341
+ })
342
+ .sort((a, b) => b.count - a.count);
322
343
  if (dominantI18n.length > 0) {
323
344
  const primary = dominantI18n[0];
324
345
  let desc = `User-facing strings use ${primary.hook} for internationalization.`;
@@ -345,10 +366,10 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
345
366
  // 2. ROUTING / API PATTERNS
346
367
  // ========================================
347
368
  const routerPatterns = [
348
- { pattern: "trpc\\.router|createTRPCRouter|t\\.router", name: "tRPC routers", count: 0 },
369
+ { pattern: "trpc\\.router|createTRPCRouter|from ['\"]@trpc", name: "tRPC routers", count: 0 },
349
370
  { pattern: "express\\.Router|router\\.get|router\\.post", name: "Express routers", count: 0 },
350
371
  { pattern: "app\\.get\\(|app\\.post\\(|app\\.put\\(", name: "Express app routes", count: 0 },
351
- { pattern: "Hono|app\\.route\\(|c\\.json\\(", name: "Hono routes", count: 0 },
372
+ { pattern: "new Hono|from ['\"]hono['\"]", name: "Hono routes", count: 0 },
352
373
  { pattern: "FastAPI|@app\\.(get|post|put|delete)", name: "FastAPI endpoints", count: 0 },
353
374
  { pattern: "flask\\.route|@app\\.route", name: "Flask routes", count: 0 },
354
375
  { pattern: "gin\\.Engine|r\\.GET|r\\.POST", name: "Gin routes", count: 0 },
@@ -377,7 +398,7 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
377
398
  // ========================================
378
399
  const schemaPatterns = [
379
400
  { pattern: "z\\.object|z\\.string|z\\.number", name: "Zod", usage: "Use Zod schemas for validation", count: 0 },
380
- { pattern: "BaseModel|Field\\(", name: "Pydantic", usage: "Use Pydantic BaseModel for data classes", count: 0 },
401
+ { pattern: "class\\s+\\w+\\(BaseModel\\)|from pydantic", name: "Pydantic", usage: "Use Pydantic BaseModel for data classes", count: 0 },
381
402
  { pattern: "Joi\\.object|Joi\\.string", name: "Joi", usage: "Use Joi schemas for validation", count: 0 },
382
403
  { pattern: "yup\\.object|yup\\.string", name: "Yup", usage: "Use Yup schemas for validation", count: 0 },
383
404
  { pattern: "class.*Serializer.*:|serializers\\.Serializer", name: "Django serializers", usage: "Use Django REST serializers for API data", count: 0 },
@@ -433,7 +454,7 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
433
454
  // 5. TESTING PATTERNS
434
455
  // ========================================
435
456
  const testPatterns = [
436
- { pattern: "describe\\(|it\\(|test\\(", name: "Jest/Vitest", count: 0 },
457
+ { pattern: "describe\\(|it\\(|test\\(", name: "_generic_test", count: 0 },
437
458
  { pattern: "def test_|class Test|pytest", name: "pytest", count: 0 },
438
459
  { pattern: "func Test.*\\(t \\*testing\\.T\\)", name: "Go testing", count: 0 },
439
460
  { pattern: "expect\\(.*\\)\\.to", name: "Chai/expect", count: 0 },
@@ -466,7 +487,46 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
466
487
  }
467
488
  const dominantTest = testPatterns.filter((p) => p.count >= 2).sort((a, b) => b.count - a.count);
468
489
  if (dominantTest.length > 0) {
469
- const primary = dominantTest[0];
490
+ let primary = dominantTest[0];
491
+ // Disambiguate generic test pattern by checking package.json devDependencies
492
+ if (primary.name === "_generic_test") {
493
+ let pkgContent = allContents.get("package.json") || "";
494
+ if (!pkgContent) {
495
+ const pkgPath = safePath(dir, "package.json");
496
+ if (pkgPath) {
497
+ try {
498
+ pkgContent = fs.readFileSync(pkgPath, "utf-8");
499
+ }
500
+ catch { /* skip */ }
501
+ }
502
+ }
503
+ if (pkgContent.includes('"vitest"')) {
504
+ primary = { ...primary, name: "Vitest" };
505
+ }
506
+ else if (pkgContent.includes('"jest"') || pkgContent.includes('"@jest/')) {
507
+ primary = { ...primary, name: "Jest" };
508
+ }
509
+ else if (pkgContent.includes('"mocha"')) {
510
+ primary = { ...primary, name: "Mocha" };
511
+ }
512
+ else if (pkgContent.includes('"jasmine"')) {
513
+ primary = { ...primary, name: "Jasmine" };
514
+ }
515
+ else {
516
+ // Check for Deno (deno.json/deno.jsonc) or Bun (bun.lockb)
517
+ const hasDeno = files.some(f => f === "deno.json" || f === "deno.jsonc" || f === "deno.lock");
518
+ const hasBun = files.some(f => f === "bun.lockb" || f === "bunfig.toml");
519
+ if (hasDeno) {
520
+ primary = { ...primary, name: "Deno test" };
521
+ }
522
+ else if (hasBun) {
523
+ primary = { ...primary, name: "Bun test" };
524
+ }
525
+ else {
526
+ primary = { ...primary, name: "Jest" }; // default for JS/TS projects
527
+ }
528
+ }
529
+ }
470
530
  // Also detect common test utilities/helpers
471
531
  const testHelperFiles = files.filter((f) => (f.includes("test-utils") || f.includes("testUtils") || f.includes("fixtures") || f.includes("helpers")) &&
472
532
  (f.includes("test") || f.includes("spec")));
@@ -527,9 +587,9 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
527
587
  // 7. STYLING CONVENTIONS
528
588
  // ========================================
529
589
  const stylePatterns = [
530
- { pattern: "className=|class=.*tw-", name: "Tailwind CSS", desc: "Styling uses Tailwind CSS utility classes", count: 0 },
531
- { pattern: "styled\\.|styled\\(|css`", name: "styled-components/Emotion", desc: "Styling uses CSS-in-JS (styled-components or Emotion)", count: 0 },
532
- { pattern: "styles\\.\\w+|from.*\\.module\\.(css|scss)", name: "CSS Modules", desc: "Styling uses CSS Modules (*.module.css)", count: 0 },
590
+ { pattern: "class=.*tw-|className=[\"'](?:flex |grid |p-|m-|text-|bg-|border-|rounded-|shadow-|w-|h-)", name: "Tailwind CSS", desc: "Styling uses Tailwind CSS utility classes", count: 0 },
591
+ { pattern: "from ['\"]styled-components|from ['\"]@emotion|styled\\.|styled\\(", name: "styled-components/Emotion", desc: "Styling uses CSS-in-JS (styled-components or Emotion)", count: 0 },
592
+ { pattern: "from.*\\.module\\.(css|scss)", name: "CSS Modules", desc: "Styling uses CSS Modules (*.module.css)", count: 0 },
533
593
  ];
534
594
  for (const [f, content] of allContents) {
535
595
  for (const p of stylePatterns) {
@@ -645,13 +705,15 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
645
705
  if (dominantRouter.length > 0) {
646
706
  const routeDirs = files
647
707
  .filter((f) => (f.includes("routes") || f.includes("routers") || f.includes("api/") || f.includes("app/api/")) &&
648
- !f.includes("node_modules") && !f.includes(".test.") &&
708
+ !f.includes("node_modules") && !f.includes(".test.") && !f.includes(".spec.") &&
709
+ !f.includes("test/") && !f.includes("tests/") && !f.includes("__test") &&
710
+ !f.includes("fixture") && !f.includes("mock") &&
649
711
  (f.endsWith(".ts") || f.endsWith(".js") || f.endsWith(".py") || f.endsWith(".go")))
650
712
  .map((f) => {
651
713
  const parts = f.split("/");
652
- // Get the directory containing route files
653
714
  return parts.slice(0, -1).join("/");
654
715
  })
716
+ .filter((v) => v && v !== "." && v.length > 0) // filter empty/root paths
655
717
  .filter((v, i, a) => a.indexOf(v) === i)
656
718
  .slice(0, 3);
657
719
  if (routeDirs.length > 0) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sourcebook",
3
- "version": "0.5.1",
3
+ "version": "0.5.2",
4
4
  "description": "Extract the conventions, constraints, and architectural truths your AI coding agents keep missing.",
5
5
  "type": "module",
6
6
  "bin": {