mulch-cli 0.4.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/README.md +24 -4
  2. package/package.json +11 -16
  3. package/src/api.ts +310 -0
  4. package/src/cli.ts +54 -0
  5. package/src/commands/add.ts +61 -0
  6. package/src/commands/compact.ts +924 -0
  7. package/src/commands/delete.ts +103 -0
  8. package/src/commands/diff.ts +209 -0
  9. package/src/commands/doctor.ts +586 -0
  10. package/src/commands/edit.ts +253 -0
  11. package/src/commands/init.ts +33 -0
  12. package/src/commands/learn.ts +170 -0
  13. package/src/commands/onboard.ts +362 -0
  14. package/src/commands/prime.ts +327 -0
  15. package/src/commands/prune.ts +128 -0
  16. package/src/commands/query.ts +177 -0
  17. package/src/commands/ready.ts +194 -0
  18. package/src/commands/record.ts +959 -0
  19. package/src/commands/search.ts +234 -0
  20. package/src/commands/setup.ts +823 -0
  21. package/src/commands/status.ts +83 -0
  22. package/src/commands/sync.ts +224 -0
  23. package/src/commands/update.ts +112 -0
  24. package/src/commands/validate.ts +107 -0
  25. package/src/index.ts +50 -0
  26. package/src/schemas/config.ts +31 -0
  27. package/src/schemas/index.ts +18 -0
  28. package/src/schemas/record-schema.ts +177 -0
  29. package/src/schemas/record.ts +83 -0
  30. package/src/utils/bm25.ts +243 -0
  31. package/src/utils/budget.ts +157 -0
  32. package/src/utils/config.ts +117 -0
  33. package/src/utils/expertise.ts +379 -0
  34. package/src/utils/format.ts +767 -0
  35. package/src/utils/git.ts +89 -0
  36. package/src/utils/index.ts +54 -0
  37. package/src/utils/json-output.ts +13 -0
  38. package/src/utils/lock.ts +82 -0
  39. package/src/utils/markers.ts +51 -0
  40. package/src/utils/scoring.ts +101 -0
  41. package/src/utils/version.ts +46 -0
  42. package/dist/cli.d.ts +0 -3
  43. package/dist/cli.d.ts.map +0 -1
  44. package/dist/cli.js +0 -50
  45. package/dist/cli.js.map +0 -1
  46. package/dist/commands/add.d.ts +0 -3
  47. package/dist/commands/add.d.ts.map +0 -1
  48. package/dist/commands/add.js +0 -47
  49. package/dist/commands/add.js.map +0 -1
  50. package/dist/commands/compact.d.ts +0 -5
  51. package/dist/commands/compact.d.ts.map +0 -1
  52. package/dist/commands/compact.js +0 -709
  53. package/dist/commands/compact.js.map +0 -1
  54. package/dist/commands/delete.d.ts +0 -3
  55. package/dist/commands/delete.d.ts.map +0 -1
  56. package/dist/commands/delete.js +0 -82
  57. package/dist/commands/delete.js.map +0 -1
  58. package/dist/commands/diff.d.ts +0 -11
  59. package/dist/commands/diff.d.ts.map +0 -1
  60. package/dist/commands/diff.js +0 -170
  61. package/dist/commands/diff.js.map +0 -1
  62. package/dist/commands/doctor.d.ts +0 -3
  63. package/dist/commands/doctor.d.ts.map +0 -1
  64. package/dist/commands/doctor.js +0 -391
  65. package/dist/commands/doctor.js.map +0 -1
  66. package/dist/commands/edit.d.ts +0 -3
  67. package/dist/commands/edit.d.ts.map +0 -1
  68. package/dist/commands/edit.js +0 -210
  69. package/dist/commands/edit.js.map +0 -1
  70. package/dist/commands/init.d.ts +0 -3
  71. package/dist/commands/init.d.ts.map +0 -1
  72. package/dist/commands/init.js +0 -30
  73. package/dist/commands/init.js.map +0 -1
  74. package/dist/commands/learn.d.ts +0 -12
  75. package/dist/commands/learn.d.ts.map +0 -1
  76. package/dist/commands/learn.js +0 -130
  77. package/dist/commands/learn.js.map +0 -1
  78. package/dist/commands/onboard.d.ts +0 -10
  79. package/dist/commands/onboard.d.ts.map +0 -1
  80. package/dist/commands/onboard.js +0 -286
  81. package/dist/commands/onboard.js.map +0 -1
  82. package/dist/commands/prime.d.ts +0 -3
  83. package/dist/commands/prime.d.ts.map +0 -1
  84. package/dist/commands/prime.js +0 -242
  85. package/dist/commands/prime.js.map +0 -1
  86. package/dist/commands/prune.d.ts +0 -8
  87. package/dist/commands/prune.d.ts.map +0 -1
  88. package/dist/commands/prune.js +0 -90
  89. package/dist/commands/prune.js.map +0 -1
  90. package/dist/commands/query.d.ts +0 -3
  91. package/dist/commands/query.d.ts.map +0 -1
  92. package/dist/commands/query.js +0 -118
  93. package/dist/commands/query.js.map +0 -1
  94. package/dist/commands/ready.d.ts +0 -3
  95. package/dist/commands/ready.d.ts.map +0 -1
  96. package/dist/commands/ready.js +0 -160
  97. package/dist/commands/ready.js.map +0 -1
  98. package/dist/commands/record.d.ts +0 -13
  99. package/dist/commands/record.d.ts.map +0 -1
  100. package/dist/commands/record.js +0 -688
  101. package/dist/commands/record.js.map +0 -1
  102. package/dist/commands/search.d.ts +0 -3
  103. package/dist/commands/search.d.ts.map +0 -1
  104. package/dist/commands/search.js +0 -163
  105. package/dist/commands/search.js.map +0 -1
  106. package/dist/commands/setup.d.ts +0 -29
  107. package/dist/commands/setup.d.ts.map +0 -1
  108. package/dist/commands/setup.js +0 -548
  109. package/dist/commands/setup.js.map +0 -1
  110. package/dist/commands/status.d.ts +0 -3
  111. package/dist/commands/status.d.ts.map +0 -1
  112. package/dist/commands/status.js +0 -61
  113. package/dist/commands/status.js.map +0 -1
  114. package/dist/commands/sync.d.ts +0 -3
  115. package/dist/commands/sync.d.ts.map +0 -1
  116. package/dist/commands/sync.js +0 -176
  117. package/dist/commands/sync.js.map +0 -1
  118. package/dist/commands/update.d.ts +0 -3
  119. package/dist/commands/update.d.ts.map +0 -1
  120. package/dist/commands/update.js +0 -72
  121. package/dist/commands/update.js.map +0 -1
  122. package/dist/commands/validate.d.ts +0 -3
  123. package/dist/commands/validate.d.ts.map +0 -1
  124. package/dist/commands/validate.js +0 -86
  125. package/dist/commands/validate.js.map +0 -1
  126. package/dist/index.d.ts +0 -7
  127. package/dist/index.d.ts.map +0 -1
  128. package/dist/index.js +0 -8
  129. package/dist/index.js.map +0 -1
  130. package/dist/schemas/config.d.ts +0 -17
  131. package/dist/schemas/config.d.ts.map +0 -1
  132. package/dist/schemas/config.js +0 -16
  133. package/dist/schemas/config.js.map +0 -1
  134. package/dist/schemas/index.d.ts +0 -5
  135. package/dist/schemas/index.d.ts.map +0 -1
  136. package/dist/schemas/index.js +0 -3
  137. package/dist/schemas/index.js.map +0 -1
  138. package/dist/schemas/record-schema.d.ts +0 -379
  139. package/dist/schemas/record-schema.d.ts.map +0 -1
  140. package/dist/schemas/record-schema.js +0 -148
  141. package/dist/schemas/record-schema.js.map +0 -1
  142. package/dist/schemas/record.d.ts +0 -60
  143. package/dist/schemas/record.d.ts.map +0 -1
  144. package/dist/schemas/record.js +0 -2
  145. package/dist/schemas/record.js.map +0 -1
  146. package/dist/utils/bm25.d.ts +0 -39
  147. package/dist/utils/bm25.d.ts.map +0 -1
  148. package/dist/utils/bm25.js +0 -171
  149. package/dist/utils/bm25.js.map +0 -1
  150. package/dist/utils/budget.d.ts +0 -35
  151. package/dist/utils/budget.d.ts.map +0 -1
  152. package/dist/utils/budget.js +0 -114
  153. package/dist/utils/budget.js.map +0 -1
  154. package/dist/utils/config.d.ts +0 -12
  155. package/dist/utils/config.d.ts.map +0 -1
  156. package/dist/utils/config.js +0 -89
  157. package/dist/utils/config.js.map +0 -1
  158. package/dist/utils/expertise.d.ts +0 -57
  159. package/dist/utils/expertise.d.ts.map +0 -1
  160. package/dist/utils/expertise.js +0 -264
  161. package/dist/utils/expertise.js.map +0 -1
  162. package/dist/utils/format.d.ts +0 -31
  163. package/dist/utils/format.d.ts.map +0 -1
  164. package/dist/utils/format.js +0 -556
  165. package/dist/utils/format.js.map +0 -1
  166. package/dist/utils/git.d.ts +0 -6
  167. package/dist/utils/git.d.ts.map +0 -1
  168. package/dist/utils/git.js +0 -81
  169. package/dist/utils/git.js.map +0 -1
  170. package/dist/utils/index.d.ts +0 -8
  171. package/dist/utils/index.d.ts.map +0 -1
  172. package/dist/utils/index.js +0 -8
  173. package/dist/utils/index.js.map +0 -1
  174. package/dist/utils/json-output.d.ts +0 -8
  175. package/dist/utils/json-output.d.ts.map +0 -1
  176. package/dist/utils/json-output.js +0 -7
  177. package/dist/utils/json-output.js.map +0 -1
  178. package/dist/utils/lock.d.ts +0 -6
  179. package/dist/utils/lock.d.ts.map +0 -1
  180. package/dist/utils/lock.js +0 -70
  181. package/dist/utils/lock.js.map +0 -1
  182. package/dist/utils/markers.d.ts +0 -22
  183. package/dist/utils/markers.d.ts.map +0 -1
  184. package/dist/utils/markers.js +0 -42
  185. package/dist/utils/markers.js.map +0 -1
  186. package/dist/utils/scoring.d.ts +0 -73
  187. package/dist/utils/scoring.d.ts.map +0 -1
  188. package/dist/utils/scoring.js +0 -80
  189. package/dist/utils/scoring.js.map +0 -1
  190. package/dist/utils/version.d.ts +0 -15
  191. package/dist/utils/version.d.ts.map +0 -1
  192. package/dist/utils/version.js +0 -48
  193. package/dist/utils/version.js.map +0 -1
@@ -0,0 +1,177 @@
1
+ const linkArray = {
2
+ type: "array",
3
+ items: { type: "string", pattern: "^([a-z0-9-]+:)?mx-[0-9a-f]{4,8}$" },
4
+ } as const;
5
+
6
+ export const recordSchema = {
7
+ $schema: "http://json-schema.org/draft-07/schema#",
8
+ title: "Mulch Expertise Record",
9
+ description: "A single expertise record in a Mulch domain file",
10
+ type: "object",
11
+ definitions: {
12
+ classification: {
13
+ type: "string",
14
+ enum: ["foundational", "tactical", "observational"],
15
+ },
16
+ evidence: {
17
+ type: "object",
18
+ properties: {
19
+ commit: { type: "string" },
20
+ date: { type: "string" },
21
+ issue: { type: "string" },
22
+ file: { type: "string" },
23
+ bead: { type: "string" },
24
+ },
25
+ additionalProperties: false,
26
+ },
27
+ outcome: {
28
+ type: "object",
29
+ properties: {
30
+ status: { type: "string", enum: ["success", "failure", "partial"] },
31
+ duration: { type: "number" },
32
+ test_results: { type: "string" },
33
+ agent: { type: "string" },
34
+ notes: { type: "string" },
35
+ recorded_at: { type: "string" },
36
+ },
37
+ required: ["status"],
38
+ additionalProperties: false,
39
+ },
40
+ },
41
+ oneOf: [
42
+ {
43
+ type: "object",
44
+ properties: {
45
+ id: { type: "string", pattern: "^mx-[0-9a-f]{4,8}$" },
46
+ type: { type: "string", const: "convention" },
47
+ content: { type: "string" },
48
+ classification: { $ref: "#/definitions/classification" },
49
+ recorded_at: { type: "string" },
50
+ evidence: { $ref: "#/definitions/evidence" },
51
+ tags: { type: "array", items: { type: "string" } },
52
+ relates_to: linkArray,
53
+ supersedes: linkArray,
54
+ outcomes: { type: "array", items: { $ref: "#/definitions/outcome" } },
55
+ },
56
+ required: ["type", "content", "classification", "recorded_at"],
57
+ additionalProperties: false,
58
+ },
59
+ {
60
+ type: "object",
61
+ properties: {
62
+ id: { type: "string", pattern: "^mx-[0-9a-f]{4,8}$" },
63
+ type: { type: "string", const: "pattern" },
64
+ name: { type: "string" },
65
+ description: { type: "string" },
66
+ files: { type: "array", items: { type: "string" } },
67
+ classification: { $ref: "#/definitions/classification" },
68
+ recorded_at: { type: "string" },
69
+ evidence: { $ref: "#/definitions/evidence" },
70
+ tags: { type: "array", items: { type: "string" } },
71
+ relates_to: linkArray,
72
+ supersedes: linkArray,
73
+ outcomes: { type: "array", items: { $ref: "#/definitions/outcome" } },
74
+ },
75
+ required: [
76
+ "type",
77
+ "name",
78
+ "description",
79
+ "classification",
80
+ "recorded_at",
81
+ ],
82
+ additionalProperties: false,
83
+ },
84
+ {
85
+ type: "object",
86
+ properties: {
87
+ id: { type: "string", pattern: "^mx-[0-9a-f]{4,8}$" },
88
+ type: { type: "string", const: "failure" },
89
+ description: { type: "string" },
90
+ resolution: { type: "string" },
91
+ classification: { $ref: "#/definitions/classification" },
92
+ recorded_at: { type: "string" },
93
+ evidence: { $ref: "#/definitions/evidence" },
94
+ tags: { type: "array", items: { type: "string" } },
95
+ relates_to: linkArray,
96
+ supersedes: linkArray,
97
+ outcomes: { type: "array", items: { $ref: "#/definitions/outcome" } },
98
+ },
99
+ required: [
100
+ "type",
101
+ "description",
102
+ "resolution",
103
+ "classification",
104
+ "recorded_at",
105
+ ],
106
+ additionalProperties: false,
107
+ },
108
+ {
109
+ type: "object",
110
+ properties: {
111
+ id: { type: "string", pattern: "^mx-[0-9a-f]{4,8}$" },
112
+ type: { type: "string", const: "decision" },
113
+ title: { type: "string" },
114
+ rationale: { type: "string" },
115
+ date: { type: "string" },
116
+ classification: { $ref: "#/definitions/classification" },
117
+ recorded_at: { type: "string" },
118
+ evidence: { $ref: "#/definitions/evidence" },
119
+ tags: { type: "array", items: { type: "string" } },
120
+ relates_to: linkArray,
121
+ supersedes: linkArray,
122
+ outcomes: { type: "array", items: { $ref: "#/definitions/outcome" } },
123
+ },
124
+ required: ["type", "title", "rationale", "classification", "recorded_at"],
125
+ additionalProperties: false,
126
+ },
127
+ {
128
+ type: "object",
129
+ properties: {
130
+ id: { type: "string", pattern: "^mx-[0-9a-f]{4,8}$" },
131
+ type: { type: "string", const: "reference" },
132
+ name: { type: "string" },
133
+ description: { type: "string" },
134
+ files: { type: "array", items: { type: "string" } },
135
+ classification: { $ref: "#/definitions/classification" },
136
+ recorded_at: { type: "string" },
137
+ evidence: { $ref: "#/definitions/evidence" },
138
+ tags: { type: "array", items: { type: "string" } },
139
+ relates_to: linkArray,
140
+ supersedes: linkArray,
141
+ outcomes: { type: "array", items: { $ref: "#/definitions/outcome" } },
142
+ },
143
+ required: [
144
+ "type",
145
+ "name",
146
+ "description",
147
+ "classification",
148
+ "recorded_at",
149
+ ],
150
+ additionalProperties: false,
151
+ },
152
+ {
153
+ type: "object",
154
+ properties: {
155
+ id: { type: "string", pattern: "^mx-[0-9a-f]{4,8}$" },
156
+ type: { type: "string", const: "guide" },
157
+ name: { type: "string" },
158
+ description: { type: "string" },
159
+ classification: { $ref: "#/definitions/classification" },
160
+ recorded_at: { type: "string" },
161
+ evidence: { $ref: "#/definitions/evidence" },
162
+ tags: { type: "array", items: { type: "string" } },
163
+ relates_to: linkArray,
164
+ supersedes: linkArray,
165
+ outcomes: { type: "array", items: { $ref: "#/definitions/outcome" } },
166
+ },
167
+ required: [
168
+ "type",
169
+ "name",
170
+ "description",
171
+ "classification",
172
+ "recorded_at",
173
+ ],
174
+ additionalProperties: false,
175
+ },
176
+ ],
177
+ } as const;
@@ -0,0 +1,83 @@
1
+ export type RecordType =
2
+ | "convention"
3
+ | "pattern"
4
+ | "failure"
5
+ | "decision"
6
+ | "reference"
7
+ | "guide";
8
+
9
+ export type Classification = "foundational" | "tactical" | "observational";
10
+
11
+ export interface Evidence {
12
+ commit?: string;
13
+ date?: string;
14
+ issue?: string;
15
+ file?: string;
16
+ bead?: string;
17
+ }
18
+
19
+ export interface Outcome {
20
+ status: "success" | "failure" | "partial";
21
+ duration?: number;
22
+ test_results?: string;
23
+ agent?: string;
24
+ notes?: string;
25
+ recorded_at?: string;
26
+ }
27
+
28
+ interface BaseRecord {
29
+ id?: string;
30
+ classification: Classification;
31
+ recorded_at: string;
32
+ evidence?: Evidence;
33
+ tags?: string[];
34
+ relates_to?: string[];
35
+ supersedes?: string[];
36
+ outcomes?: Outcome[];
37
+ }
38
+
39
+ export interface ConventionRecord extends BaseRecord {
40
+ type: "convention";
41
+ content: string;
42
+ }
43
+
44
+ export interface PatternRecord extends BaseRecord {
45
+ type: "pattern";
46
+ name: string;
47
+ description: string;
48
+ files?: string[];
49
+ }
50
+
51
+ export interface FailureRecord extends BaseRecord {
52
+ type: "failure";
53
+ description: string;
54
+ resolution: string;
55
+ }
56
+
57
+ export interface DecisionRecord extends BaseRecord {
58
+ type: "decision";
59
+ title: string;
60
+ rationale: string;
61
+ date?: string;
62
+ }
63
+
64
+ export interface ReferenceRecord extends BaseRecord {
65
+ type: "reference";
66
+ name: string;
67
+ description: string;
68
+ files?: string[];
69
+ }
70
+
71
+ export interface GuideRecord extends BaseRecord {
72
+ type: "guide";
73
+ name: string;
74
+ description: string;
75
+ }
76
+
77
+ export type ExpertiseRecord =
78
+ | ConventionRecord
79
+ | PatternRecord
80
+ | FailureRecord
81
+ | DecisionRecord
82
+ | ReferenceRecord
83
+ | GuideRecord;
@@ -0,0 +1,243 @@
1
+ import type { ExpertiseRecord } from "../schemas/record.ts";
2
+
3
+ /**
4
+ * BM25 parameters (tuned for short document collections like expertise records)
5
+ */
6
+ export interface BM25Params {
7
+ /** Controls non-linear term frequency normalization (typical: 1.2-2.0) */
8
+ k1: number;
9
+ /** Controls document length normalization (0 = no normalization, 1 = full normalization) */
10
+ b: number;
11
+ }
12
+
13
+ /**
14
+ * Default BM25 parameters optimized for expertise records
15
+ */
16
+ export const DEFAULT_BM25_PARAMS: BM25Params = {
17
+ k1: 1.5,
18
+ b: 0.75,
19
+ };
20
+
21
+ /**
22
+ * Result of BM25 search
23
+ */
24
+ export interface BM25Result {
25
+ record: ExpertiseRecord;
26
+ score: number;
27
+ /** Fields that matched the query */
28
+ matchedFields: string[];
29
+ }
30
+
31
+ /**
32
+ * Tokenize text into searchable terms
33
+ */
34
+ export function tokenize(text: string): string[] {
35
+ return text
36
+ .toLowerCase()
37
+ .replace(/[^\w\s-]/g, " ") // Replace punctuation with spaces (keep hyphens in words)
38
+ .split(/\s+/)
39
+ .filter((token) => token.length > 0);
40
+ }
41
+
42
+ /**
43
+ * Extract searchable text from a record
44
+ */
45
+ export function extractRecordText(record: ExpertiseRecord): {
46
+ allText: string;
47
+ fieldTexts: Record<string, string>;
48
+ } {
49
+ const fieldTexts: Record<string, string> = {};
50
+ const allParts: string[] = [];
51
+
52
+ // Helper to add field text
53
+ const addField = (name: string, value: unknown): void => {
54
+ if (typeof value === "string" && value.trim().length > 0) {
55
+ fieldTexts[name] = value;
56
+ allParts.push(value);
57
+ } else if (Array.isArray(value)) {
58
+ const arrayText = value
59
+ .filter((item) => typeof item === "string")
60
+ .join(" ");
61
+ if (arrayText.trim().length > 0) {
62
+ fieldTexts[name] = arrayText;
63
+ allParts.push(arrayText);
64
+ }
65
+ }
66
+ };
67
+
68
+ // Extract type-specific fields
69
+ switch (record.type) {
70
+ case "pattern":
71
+ addField("name", record.name);
72
+ addField("description", record.description);
73
+ addField("files", record.files);
74
+ break;
75
+ case "convention":
76
+ addField("content", record.content);
77
+ break;
78
+ case "failure":
79
+ addField("description", record.description);
80
+ addField("resolution", record.resolution);
81
+ break;
82
+ case "decision":
83
+ addField("title", record.title);
84
+ addField("rationale", record.rationale);
85
+ break;
86
+ case "reference":
87
+ addField("name", record.name);
88
+ addField("description", record.description);
89
+ addField("files", record.files);
90
+ break;
91
+ case "guide":
92
+ addField("name", record.name);
93
+ addField("description", record.description);
94
+ break;
95
+ }
96
+
97
+ // Add common fields
98
+ addField("tags", record.tags);
99
+
100
+ return {
101
+ allText: allParts.join(" "),
102
+ fieldTexts,
103
+ };
104
+ }
105
+
106
+ /**
107
+ * Calculate term frequency in a document
108
+ */
109
+ function calculateTermFrequency(tokens: string[]): Map<string, number> {
110
+ const tf = new Map<string, number>();
111
+ for (const token of tokens) {
112
+ tf.set(token, (tf.get(token) || 0) + 1);
113
+ }
114
+ return tf;
115
+ }
116
+
117
+ /**
118
+ * Calculate inverse document frequency for all terms in the corpus
119
+ */
120
+ function calculateIDF(
121
+ corpus: Array<{ tokens: string[] }>,
122
+ ): Map<string, number> {
123
+ const docCount = corpus.length;
124
+ const docFreq = new Map<string, number>();
125
+
126
+ // Count how many documents contain each term
127
+ for (const doc of corpus) {
128
+ const uniqueTerms = new Set(doc.tokens);
129
+ for (const term of uniqueTerms) {
130
+ docFreq.set(term, (docFreq.get(term) || 0) + 1);
131
+ }
132
+ }
133
+
134
+ // Calculate IDF for each term
135
+ const idf = new Map<string, number>();
136
+ for (const [term, freq] of docFreq.entries()) {
137
+ // IDF formula: log((N - df + 0.5) / (df + 0.5) + 1)
138
+ // The +1 ensures positive values for common terms
139
+ idf.set(term, Math.log((docCount - freq + 0.5) / (freq + 0.5) + 1));
140
+ }
141
+
142
+ return idf;
143
+ }
144
+
145
+ /**
146
+ * Calculate BM25 score for a single document against a query
147
+ */
148
+ function calculateBM25Score(
149
+ queryTokens: string[],
150
+ docTokens: string[],
151
+ docLength: number,
152
+ avgDocLength: number,
153
+ idf: Map<string, number>,
154
+ params: BM25Params,
155
+ ): number {
156
+ const tf = calculateTermFrequency(docTokens);
157
+ let score = 0;
158
+
159
+ for (const queryTerm of queryTokens) {
160
+ const termFreq = tf.get(queryTerm) || 0;
161
+ const termIDF = idf.get(queryTerm) || 0;
162
+
163
+ // BM25 formula
164
+ const numerator = termFreq * (params.k1 + 1);
165
+ const denominator =
166
+ termFreq +
167
+ params.k1 * (1 - params.b + params.b * (docLength / avgDocLength));
168
+
169
+ score += termIDF * (numerator / denominator);
170
+ }
171
+
172
+ return score;
173
+ }
174
+
175
+ /**
176
+ * Search records using BM25 ranking
177
+ */
178
+ export function searchBM25(
179
+ records: ExpertiseRecord[],
180
+ query: string,
181
+ params: BM25Params = DEFAULT_BM25_PARAMS,
182
+ ): BM25Result[] {
183
+ if (records.length === 0 || query.trim().length === 0) {
184
+ return [];
185
+ }
186
+
187
+ const queryTokens = tokenize(query);
188
+ if (queryTokens.length === 0) {
189
+ return [];
190
+ }
191
+
192
+ // Extract and tokenize all documents
193
+ const docs = records.map((record) => {
194
+ const { allText, fieldTexts } = extractRecordText(record);
195
+ const tokens = tokenize(allText);
196
+ return { record, tokens, allText, fieldTexts };
197
+ });
198
+
199
+ // Calculate average document length
200
+ const totalLength = docs.reduce((sum, doc) => sum + doc.tokens.length, 0);
201
+ const avgDocLength = totalLength / docs.length;
202
+
203
+ // Calculate IDF for all terms
204
+ const idf = calculateIDF(docs);
205
+
206
+ // Score each document
207
+ const results: BM25Result[] = [];
208
+
209
+ for (const doc of docs) {
210
+ const score = calculateBM25Score(
211
+ queryTokens,
212
+ doc.tokens,
213
+ doc.tokens.length,
214
+ avgDocLength,
215
+ idf,
216
+ params,
217
+ );
218
+
219
+ // Only include results with score > 0
220
+ if (score > 0) {
221
+ // Determine which fields matched
222
+ const matchedFields: string[] = [];
223
+ for (const [fieldName, fieldText] of Object.entries(doc.fieldTexts)) {
224
+ const fieldTokens = tokenize(fieldText);
225
+ const hasMatch = queryTokens.some((qt) => fieldTokens.includes(qt));
226
+ if (hasMatch) {
227
+ matchedFields.push(fieldName);
228
+ }
229
+ }
230
+
231
+ results.push({
232
+ record: doc.record,
233
+ score,
234
+ matchedFields,
235
+ });
236
+ }
237
+ }
238
+
239
+ // Sort by score descending
240
+ results.sort((a, b) => b.score - a.score);
241
+
242
+ return results;
243
+ }
@@ -0,0 +1,157 @@
1
+ import type {
2
+ Classification,
3
+ ExpertiseRecord,
4
+ RecordType,
5
+ } from "../schemas/record.ts";
6
+ import { type ScoredRecord, computeConfirmationScore } from "./scoring.ts";
7
+
8
+ export const DEFAULT_BUDGET = 4000;
9
+
10
+ /** Priority order for record types (lower index = higher priority) */
11
+ const TYPE_PRIORITY: RecordType[] = [
12
+ "convention",
13
+ "decision",
14
+ "pattern",
15
+ "guide",
16
+ "failure",
17
+ "reference",
18
+ ];
19
+
20
+ /** Priority order for classifications (lower index = higher priority) */
21
+ const CLASSIFICATION_PRIORITY: Classification[] = [
22
+ "foundational",
23
+ "tactical",
24
+ "observational",
25
+ ];
26
+
27
+ export interface DomainRecords {
28
+ domain: string;
29
+ records: ScoredRecord[];
30
+ }
31
+
32
+ export interface BudgetResult {
33
+ /** Records kept, grouped by domain (preserves original domain order) */
34
+ kept: DomainRecords[];
35
+ /** Total number of records that were dropped */
36
+ droppedCount: number;
37
+ /** Number of domains that had records dropped */
38
+ droppedDomainCount: number;
39
+ }
40
+
41
+ /**
42
+ * Sort records by priority: type order, then classification, then confirmation score
43
+ * (higher score = higher priority), then recency (newest first).
44
+ */
45
+ function recordSortKey(r: ScoredRecord): [number, number, number, number] {
46
+ const typeIdx = TYPE_PRIORITY.indexOf(r.type);
47
+ const classIdx = CLASSIFICATION_PRIORITY.indexOf(r.classification);
48
+ const confirmationScore = computeConfirmationScore(r);
49
+ const time = r.recorded_at ? new Date(r.recorded_at).getTime() : 0;
50
+ return [typeIdx, classIdx, -confirmationScore, -time];
51
+ }
52
+
53
+ function compareRecords(a: ScoredRecord, b: ScoredRecord): number {
54
+ const ka = recordSortKey(a);
55
+ const kb = recordSortKey(b);
56
+ for (let i = 0; i < 4; i++) {
57
+ if (ka[i] !== kb[i]) return ka[i] - kb[i];
58
+ }
59
+ return 0;
60
+ }
61
+
62
+ /**
63
+ * Estimate token count from character count (chars / 4).
64
+ */
65
+ export function estimateTokens(text: string): number {
66
+ return Math.ceil(text.length / 4);
67
+ }
68
+
69
+ /**
70
+ * Apply a token budget to records across multiple domains.
71
+ *
72
+ * Records are prioritized by type (conventions first, then decisions, etc.),
73
+ * then by classification (foundational > tactical > observational),
74
+ * then by confirmation score (higher = higher priority),
75
+ * then by recency (newest first).
76
+ *
77
+ * The formatRecord callback is used to estimate per-record token cost.
78
+ */
79
+ export function applyBudget(
80
+ domains: DomainRecords[],
81
+ budget: number,
82
+ formatRecord: (record: ExpertiseRecord, domain: string) => string,
83
+ ): BudgetResult {
84
+ // Flatten all records with their domain, then sort by priority
85
+ const tagged: Array<{ domain: string; record: ScoredRecord }> = [];
86
+ for (const d of domains) {
87
+ for (const r of d.records) {
88
+ tagged.push({ domain: d.domain, record: r });
89
+ }
90
+ }
91
+ tagged.sort((a, b) => compareRecords(a.record, b.record));
92
+
93
+ const totalRecords = tagged.length;
94
+ let usedTokens = 0;
95
+ const kept = new Set<number>();
96
+
97
+ for (let i = 0; i < tagged.length; i++) {
98
+ const formatted = formatRecord(tagged[i].record, tagged[i].domain);
99
+ const cost = estimateTokens(formatted);
100
+ if (usedTokens + cost <= budget) {
101
+ usedTokens += cost;
102
+ kept.add(i);
103
+ }
104
+ }
105
+
106
+ // Rebuild domain groups preserving original domain order and record order
107
+ const domainOrder = domains.map((d) => d.domain);
108
+ const result: DomainRecords[] = [];
109
+ const droppedDomains = new Set<string>();
110
+
111
+ for (const domainName of domainOrder) {
112
+ const originalRecords = domains.find(
113
+ (d) => d.domain === domainName,
114
+ )!.records;
115
+ const keptRecords: ScoredRecord[] = [];
116
+
117
+ for (const rec of originalRecords) {
118
+ // Find this record's index in the tagged array
119
+ const idx = tagged.findIndex(
120
+ (t) => t.domain === domainName && t.record === rec,
121
+ );
122
+ if (idx !== -1 && kept.has(idx)) {
123
+ keptRecords.push(rec);
124
+ } else if (idx !== -1) {
125
+ droppedDomains.add(domainName);
126
+ }
127
+ }
128
+
129
+ if (keptRecords.length > 0) {
130
+ result.push({ domain: domainName, records: keptRecords });
131
+ } else if (originalRecords.length > 0) {
132
+ droppedDomains.add(domainName);
133
+ }
134
+ }
135
+
136
+ const droppedCount = totalRecords - kept.size;
137
+
138
+ return {
139
+ kept: result,
140
+ droppedCount,
141
+ droppedDomainCount: droppedDomains.size,
142
+ };
143
+ }
144
+
145
+ /**
146
+ * Format the truncation summary line shown when records are dropped.
147
+ */
148
+ export function formatBudgetSummary(
149
+ droppedCount: number,
150
+ droppedDomainCount: number,
151
+ ): string {
152
+ const domainPart =
153
+ droppedDomainCount > 0
154
+ ? ` across ${droppedDomainCount} domain${droppedDomainCount === 1 ? "" : "s"}`
155
+ : "";
156
+ return `... and ${droppedCount} more record${droppedCount === 1 ? "" : "s"}${domainPart} (use --budget <n> to show more)`;
157
+ }