sourcebook 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -6
- package/dist/cli.js +0 -0
- package/dist/commands/update.js +2 -0
- package/dist/scanner/frameworks.js +1 -3
- package/dist/scanner/git.js +39 -5
- package/dist/scanner/patterns.js +75 -13
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -4,25 +4,27 @@
|
|
|
4
4
|
|
|
5
5
|
# sourcebook
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
**AI can read your code. It still doesn't know how your project works.**
|
|
8
|
+
|
|
9
|
+
sourcebook captures the project knowledge your team carries in its head — conventions, patterns, traps, and where things actually go — and turns it into context your coding agent can use.
|
|
8
10
|
|
|
9
11
|
```bash
|
|
10
12
|
npx sourcebook init
|
|
11
13
|
```
|
|
12
14
|
|
|
13
|
-
One command. Analyzes your codebase. Outputs a `CLAUDE.md` tuned for how your project actually works.
|
|
14
|
-
|
|
15
15
|
<p align="center">
|
|
16
16
|
<img src="demo.svg" alt="sourcebook demo" width="820" />
|
|
17
17
|
</p>
|
|
18
18
|
|
|
19
|
+
> Tools like Repomix give AI your entire codebase. sourcebook gives it your project knowledge.
|
|
20
|
+
|
|
19
21
|
## Why
|
|
20
22
|
|
|
21
|
-
AI coding agents spend most of their context window
|
|
23
|
+
AI coding agents spend most of their context window orienting — reading files to build a mental model before doing real work. Most context files (`CLAUDE.md`, `.cursorrules`) are generic and go stale fast.
|
|
22
24
|
|
|
23
|
-
Research shows auto-generated context that restates obvious information
|
|
25
|
+
Research shows auto-generated context that restates obvious information actually makes agents [worse by 2-3%](https://arxiv.org/abs/2502.09601). The only context that helps is **non-discoverable information** — the project knowledge agents can't figure out by reading code alone.
|
|
24
26
|
|
|
25
|
-
sourcebook
|
|
27
|
+
sourcebook extracts only what agents keep missing: the conventions, hidden dependencies, fragile areas, and dominant patterns that live in your team's heads — not in the code.
|
|
26
28
|
|
|
27
29
|
## What It Finds
|
|
28
30
|
|
package/dist/cli.js
CHANGED
|
File without changes
|
package/dist/commands/update.js
CHANGED
|
@@ -218,9 +218,7 @@ export async function detectFrameworks(dir, files) {
|
|
|
218
218
|
}
|
|
219
219
|
const paths = tsconfig?.compilerOptions?.paths;
|
|
220
220
|
if (paths) {
|
|
221
|
-
const aliases = Object.keys(paths)
|
|
222
|
-
.map((k) => k.replace("/*", ""))
|
|
223
|
-
.join(", ");
|
|
221
|
+
const aliases = [...new Set(Object.keys(paths).map((k) => k.replace("/*", "")))].join(", ");
|
|
224
222
|
findings.push({
|
|
225
223
|
category: "TypeScript imports",
|
|
226
224
|
description: `Path aliases configured: ${aliases}. Use these instead of relative imports.`,
|
package/dist/scanner/git.js
CHANGED
|
@@ -67,9 +67,14 @@ function detectRevertedPatterns(dir, revertedPatterns) {
|
|
|
67
67
|
if (reverts.length >= 2) {
|
|
68
68
|
// Extract what was reverted
|
|
69
69
|
const revertDescriptions = [];
|
|
70
|
+
const REVERT_NOISE = [
|
|
71
|
+
/\.yml$/i, /\.yaml$/i, /scorecard/i, /dependabot/i,
|
|
72
|
+
/^update /i, /^bump /i, /^deps/i, /^ci:/i, /^build:/i,
|
|
73
|
+
/^chore\(deps\)/i, /^chore\(release\)/i,
|
|
74
|
+
];
|
|
70
75
|
for (const line of reverts.slice(0, 10)) {
|
|
71
76
|
const match = line.match(/^[a-f0-9]+ Revert "(.+)"/);
|
|
72
|
-
if (match) {
|
|
77
|
+
if (match && !REVERT_NOISE.some(n => n.test(match[1]))) {
|
|
73
78
|
revertDescriptions.push(match[1]);
|
|
74
79
|
revertedPatterns.push(match[1]);
|
|
75
80
|
}
|
|
@@ -103,8 +108,15 @@ function detectAntiPatterns(dir) {
|
|
|
103
108
|
antiPatterns.push(match[1]);
|
|
104
109
|
}
|
|
105
110
|
}
|
|
106
|
-
|
|
107
|
-
|
|
111
|
+
// Filter out noise: CI config, deps, version bumps
|
|
112
|
+
const REVERT_NOISE = [
|
|
113
|
+
/\.yml$/i, /\.yaml$/i, /scorecard/i, /dependabot/i,
|
|
114
|
+
/^update /i, /^bump /i, /^deps/i, /^ci:/i, /^build:/i,
|
|
115
|
+
/^chore\(deps\)/i, /^chore\(release\)/i,
|
|
116
|
+
];
|
|
117
|
+
const meaningful = antiPatterns.filter(p => !REVERT_NOISE.some(n => n.test(p)));
|
|
118
|
+
if (meaningful.length > 0) {
|
|
119
|
+
for (const pattern of meaningful.slice(0, 5)) {
|
|
108
120
|
findings.push({
|
|
109
121
|
category: "Anti-patterns",
|
|
110
122
|
description: `Tried and reverted: "${pattern}". This approach was explicitly rejected.`,
|
|
@@ -137,8 +149,22 @@ function detectAntiPatterns(dir) {
|
|
|
137
149
|
if (currentFiles.length >= 3) {
|
|
138
150
|
deletionBatches.push({ message: currentMessage, files: currentFiles });
|
|
139
151
|
}
|
|
152
|
+
// Filter out release/changeset/version commits and revert-of-revert noise
|
|
153
|
+
const NOISE_PATTERNS = [
|
|
154
|
+
/^chore\(release\)/i,
|
|
155
|
+
/^\[ci\] release/i,
|
|
156
|
+
/^version packages/i,
|
|
157
|
+
/^changeset/i,
|
|
158
|
+
/^bump/i,
|
|
159
|
+
/^release/i,
|
|
160
|
+
/^Revert "Revert/i,
|
|
161
|
+
/^merge/i,
|
|
162
|
+
/^ci:/i,
|
|
163
|
+
/^build:/i,
|
|
164
|
+
/^Revert /i,
|
|
165
|
+
];
|
|
140
166
|
// Only report significant deletions (3+ files in one commit = abandoned feature)
|
|
141
|
-
for (const batch of deletionBatches.slice(0, 3)) {
|
|
167
|
+
for (const batch of deletionBatches.filter(b => !NOISE_PATTERNS.some(p => p.test(b.message))).slice(0, 3)) {
|
|
142
168
|
if (batch.files.length >= 3) {
|
|
143
169
|
const fileList = batch.files.slice(0, 3).map((f) => path.basename(f)).join(", ");
|
|
144
170
|
findings.push({
|
|
@@ -313,9 +339,17 @@ function detectRapidReEdits(dir) {
|
|
|
313
339
|
}
|
|
314
340
|
// Find files edited 5+ times within a 7-day window
|
|
315
341
|
const churnyFiles = [];
|
|
342
|
+
// Filter out non-source files that naturally churn
|
|
343
|
+
const NON_SOURCE_PATTERNS = [
|
|
344
|
+
/\.md$/i, /\.mdx$/i, /\.rst$/i, /\.txt$/i, /\.json$/i, /\.ya?ml$/i, /\.lock$/i, /\.log$/i,
|
|
345
|
+
/CHANGELOG/i, /\.env/, /\.generated\./, /\.config\./,
|
|
346
|
+
/\.github\//, /\.claude\//, /dashboard\//, /ops\//,
|
|
347
|
+
];
|
|
316
348
|
for (const [file, dates] of fileEdits) {
|
|
317
349
|
if (dates.length < 5)
|
|
318
350
|
continue;
|
|
351
|
+
if (NON_SOURCE_PATTERNS.some((p) => p.test(file)))
|
|
352
|
+
continue;
|
|
319
353
|
// Sort dates
|
|
320
354
|
dates.sort((a, b) => a.getTime() - b.getTime());
|
|
321
355
|
// Sliding window: find any 7-day window with 5+ edits
|
|
@@ -377,7 +411,7 @@ function detectCommitPatterns(dir) {
|
|
|
377
411
|
.map(([scope]) => scope);
|
|
378
412
|
findings.push({
|
|
379
413
|
category: "Commit conventions",
|
|
380
|
-
description: `Uses Conventional Commits (feat/fix/docs/etc)
|
|
414
|
+
description: `Uses Conventional Commits (feat/fix/docs/etc).${topScopes.length > 0 ? ` Common scopes: ${topScopes.join(", ")}.` : ""} Follow this pattern for new commits.`,
|
|
381
415
|
confidence: "high",
|
|
382
416
|
discoverable: false,
|
|
383
417
|
});
|
package/dist/scanner/patterns.js
CHANGED
|
@@ -64,8 +64,11 @@ function sampleFiles(files, maxCount) {
|
|
|
64
64
|
f.includes("layout.") ||
|
|
65
65
|
f.includes("middleware."));
|
|
66
66
|
const rest = files.filter((f) => !priority.includes(f));
|
|
67
|
-
|
|
68
|
-
|
|
67
|
+
// Deterministic sampling: sort by path, take evenly spaced files
|
|
68
|
+
const sorted = rest.sort();
|
|
69
|
+
const step = Math.max(1, Math.floor(sorted.length / Math.max(1, maxCount - priority.length)));
|
|
70
|
+
const sampled = sorted.filter((_, i) => i % step === 0);
|
|
71
|
+
return [...priority, ...sampled].slice(0, maxCount);
|
|
69
72
|
}
|
|
70
73
|
function detectBarrelExports(files, contents) {
|
|
71
74
|
const indexFiles = files.filter((f) => path.basename(f).startsWith("index.") && !f.includes("node_modules"));
|
|
@@ -318,7 +321,25 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
|
|
|
318
321
|
}
|
|
319
322
|
}
|
|
320
323
|
}
|
|
321
|
-
|
|
324
|
+
// Filter: if only t() matched, require corroborating evidence (i18n files or packages)
|
|
325
|
+
const hasI18nFiles = files.some((f) => f.includes("locale") || f.includes("i18n") || f.includes("translations") || f.includes("messages/"));
|
|
326
|
+
let hasI18nPackage = false;
|
|
327
|
+
for (const [f, c] of allContents) {
|
|
328
|
+
if (f.endsWith("package.json") && (c.includes("i18next") || c.includes("react-intl") || c.includes("next-intl") || c.includes("@lingui"))) {
|
|
329
|
+
hasI18nPackage = true;
|
|
330
|
+
break;
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
const dominantI18n = i18nPatterns
|
|
334
|
+
.filter((p) => {
|
|
335
|
+
if (p.count < 3)
|
|
336
|
+
return false;
|
|
337
|
+
// t() alone is too generic — require corroborating evidence
|
|
338
|
+
if (p.hook === 't("key")' && !hasI18nFiles && !hasI18nPackage)
|
|
339
|
+
return false;
|
|
340
|
+
return true;
|
|
341
|
+
})
|
|
342
|
+
.sort((a, b) => b.count - a.count);
|
|
322
343
|
if (dominantI18n.length > 0) {
|
|
323
344
|
const primary = dominantI18n[0];
|
|
324
345
|
let desc = `User-facing strings use ${primary.hook} for internationalization.`;
|
|
@@ -345,10 +366,10 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
|
|
|
345
366
|
// 2. ROUTING / API PATTERNS
|
|
346
367
|
// ========================================
|
|
347
368
|
const routerPatterns = [
|
|
348
|
-
{ pattern: "trpc\\.router|createTRPCRouter|
|
|
369
|
+
{ pattern: "trpc\\.router|createTRPCRouter|from ['\"]@trpc", name: "tRPC routers", count: 0 },
|
|
349
370
|
{ pattern: "express\\.Router|router\\.get|router\\.post", name: "Express routers", count: 0 },
|
|
350
371
|
{ pattern: "app\\.get\\(|app\\.post\\(|app\\.put\\(", name: "Express app routes", count: 0 },
|
|
351
|
-
{ pattern: "Hono|
|
|
372
|
+
{ pattern: "new Hono|from ['\"]hono['\"]", name: "Hono routes", count: 0 },
|
|
352
373
|
{ pattern: "FastAPI|@app\\.(get|post|put|delete)", name: "FastAPI endpoints", count: 0 },
|
|
353
374
|
{ pattern: "flask\\.route|@app\\.route", name: "Flask routes", count: 0 },
|
|
354
375
|
{ pattern: "gin\\.Engine|r\\.GET|r\\.POST", name: "Gin routes", count: 0 },
|
|
@@ -377,7 +398,7 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
|
|
|
377
398
|
// ========================================
|
|
378
399
|
const schemaPatterns = [
|
|
379
400
|
{ pattern: "z\\.object|z\\.string|z\\.number", name: "Zod", usage: "Use Zod schemas for validation", count: 0 },
|
|
380
|
-
{ pattern: "BaseModel|
|
|
401
|
+
{ pattern: "class\\s+\\w+\\(BaseModel\\)|from pydantic", name: "Pydantic", usage: "Use Pydantic BaseModel for data classes", count: 0 },
|
|
381
402
|
{ pattern: "Joi\\.object|Joi\\.string", name: "Joi", usage: "Use Joi schemas for validation", count: 0 },
|
|
382
403
|
{ pattern: "yup\\.object|yup\\.string", name: "Yup", usage: "Use Yup schemas for validation", count: 0 },
|
|
383
404
|
{ pattern: "class.*Serializer.*:|serializers\\.Serializer", name: "Django serializers", usage: "Use Django REST serializers for API data", count: 0 },
|
|
@@ -433,7 +454,7 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
|
|
|
433
454
|
// 5. TESTING PATTERNS
|
|
434
455
|
// ========================================
|
|
435
456
|
const testPatterns = [
|
|
436
|
-
{ pattern: "describe\\(|it\\(|test\\(", name: "
|
|
457
|
+
{ pattern: "describe\\(|it\\(|test\\(", name: "_generic_test", count: 0 },
|
|
437
458
|
{ pattern: "def test_|class Test|pytest", name: "pytest", count: 0 },
|
|
438
459
|
{ pattern: "func Test.*\\(t \\*testing\\.T\\)", name: "Go testing", count: 0 },
|
|
439
460
|
{ pattern: "expect\\(.*\\)\\.to", name: "Chai/expect", count: 0 },
|
|
@@ -466,7 +487,46 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
|
|
|
466
487
|
}
|
|
467
488
|
const dominantTest = testPatterns.filter((p) => p.count >= 2).sort((a, b) => b.count - a.count);
|
|
468
489
|
if (dominantTest.length > 0) {
|
|
469
|
-
|
|
490
|
+
let primary = dominantTest[0];
|
|
491
|
+
// Disambiguate generic test pattern by checking package.json devDependencies
|
|
492
|
+
if (primary.name === "_generic_test") {
|
|
493
|
+
let pkgContent = allContents.get("package.json") || "";
|
|
494
|
+
if (!pkgContent) {
|
|
495
|
+
const pkgPath = safePath(dir, "package.json");
|
|
496
|
+
if (pkgPath) {
|
|
497
|
+
try {
|
|
498
|
+
pkgContent = fs.readFileSync(pkgPath, "utf-8");
|
|
499
|
+
}
|
|
500
|
+
catch { /* skip */ }
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
if (pkgContent.includes('"vitest"')) {
|
|
504
|
+
primary = { ...primary, name: "Vitest" };
|
|
505
|
+
}
|
|
506
|
+
else if (pkgContent.includes('"jest"') || pkgContent.includes('"@jest/')) {
|
|
507
|
+
primary = { ...primary, name: "Jest" };
|
|
508
|
+
}
|
|
509
|
+
else if (pkgContent.includes('"mocha"')) {
|
|
510
|
+
primary = { ...primary, name: "Mocha" };
|
|
511
|
+
}
|
|
512
|
+
else if (pkgContent.includes('"jasmine"')) {
|
|
513
|
+
primary = { ...primary, name: "Jasmine" };
|
|
514
|
+
}
|
|
515
|
+
else {
|
|
516
|
+
// Check for Deno (deno.json/deno.jsonc) or Bun (bun.lockb)
|
|
517
|
+
const hasDeno = files.some(f => f === "deno.json" || f === "deno.jsonc" || f === "deno.lock");
|
|
518
|
+
const hasBun = files.some(f => f === "bun.lockb" || f === "bunfig.toml");
|
|
519
|
+
if (hasDeno) {
|
|
520
|
+
primary = { ...primary, name: "Deno test" };
|
|
521
|
+
}
|
|
522
|
+
else if (hasBun) {
|
|
523
|
+
primary = { ...primary, name: "Bun test" };
|
|
524
|
+
}
|
|
525
|
+
else {
|
|
526
|
+
primary = { ...primary, name: "Jest" }; // default for JS/TS projects
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
}
|
|
470
530
|
// Also detect common test utilities/helpers
|
|
471
531
|
const testHelperFiles = files.filter((f) => (f.includes("test-utils") || f.includes("testUtils") || f.includes("fixtures") || f.includes("helpers")) &&
|
|
472
532
|
(f.includes("test") || f.includes("spec")));
|
|
@@ -527,9 +587,9 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
|
|
|
527
587
|
// 7. STYLING CONVENTIONS
|
|
528
588
|
// ========================================
|
|
529
589
|
const stylePatterns = [
|
|
530
|
-
{ pattern: "
|
|
531
|
-
{ pattern: "styled\\.|styled\\(
|
|
532
|
-
{ pattern: "
|
|
590
|
+
{ pattern: "class=.*tw-|className=[\"'](?:flex |grid |p-|m-|text-|bg-|border-|rounded-|shadow-|w-|h-)", name: "Tailwind CSS", desc: "Styling uses Tailwind CSS utility classes", count: 0 },
|
|
591
|
+
{ pattern: "from ['\"]styled-components|from ['\"]@emotion|styled\\.|styled\\(", name: "styled-components/Emotion", desc: "Styling uses CSS-in-JS (styled-components or Emotion)", count: 0 },
|
|
592
|
+
{ pattern: "from.*\\.module\\.(css|scss)", name: "CSS Modules", desc: "Styling uses CSS Modules (*.module.css)", count: 0 },
|
|
533
593
|
];
|
|
534
594
|
for (const [f, content] of allContents) {
|
|
535
595
|
for (const p of stylePatterns) {
|
|
@@ -645,13 +705,15 @@ function detectDominantPatterns(dir, files, contents, frameworks) {
|
|
|
645
705
|
if (dominantRouter.length > 0) {
|
|
646
706
|
const routeDirs = files
|
|
647
707
|
.filter((f) => (f.includes("routes") || f.includes("routers") || f.includes("api/") || f.includes("app/api/")) &&
|
|
648
|
-
!f.includes("node_modules") && !f.includes(".test.") &&
|
|
708
|
+
!f.includes("node_modules") && !f.includes(".test.") && !f.includes(".spec.") &&
|
|
709
|
+
!f.includes("test/") && !f.includes("tests/") && !f.includes("__test") &&
|
|
710
|
+
!f.includes("fixture") && !f.includes("mock") &&
|
|
649
711
|
(f.endsWith(".ts") || f.endsWith(".js") || f.endsWith(".py") || f.endsWith(".go")))
|
|
650
712
|
.map((f) => {
|
|
651
713
|
const parts = f.split("/");
|
|
652
|
-
// Get the directory containing route files
|
|
653
714
|
return parts.slice(0, -1).join("/");
|
|
654
715
|
})
|
|
716
|
+
.filter((v) => v && v !== "." && v.length > 0) // filter empty/root paths
|
|
655
717
|
.filter((v, i, a) => a.indexOf(v) === i)
|
|
656
718
|
.slice(0, 3);
|
|
657
719
|
if (routeDirs.length > 0) {
|