mulch-cli 0.4.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -4
- package/package.json +11 -16
- package/src/api.ts +310 -0
- package/src/cli.ts +54 -0
- package/src/commands/add.ts +61 -0
- package/src/commands/compact.ts +924 -0
- package/src/commands/delete.ts +103 -0
- package/src/commands/diff.ts +209 -0
- package/src/commands/doctor.ts +586 -0
- package/src/commands/edit.ts +253 -0
- package/src/commands/init.ts +33 -0
- package/src/commands/learn.ts +170 -0
- package/src/commands/onboard.ts +362 -0
- package/src/commands/prime.ts +327 -0
- package/src/commands/prune.ts +128 -0
- package/src/commands/query.ts +177 -0
- package/src/commands/ready.ts +194 -0
- package/src/commands/record.ts +959 -0
- package/src/commands/search.ts +234 -0
- package/src/commands/setup.ts +823 -0
- package/src/commands/status.ts +83 -0
- package/src/commands/sync.ts +224 -0
- package/src/commands/update.ts +112 -0
- package/src/commands/validate.ts +107 -0
- package/src/index.ts +50 -0
- package/src/schemas/config.ts +31 -0
- package/src/schemas/index.ts +18 -0
- package/src/schemas/record-schema.ts +177 -0
- package/src/schemas/record.ts +83 -0
- package/src/utils/bm25.ts +243 -0
- package/src/utils/budget.ts +157 -0
- package/src/utils/config.ts +117 -0
- package/src/utils/expertise.ts +379 -0
- package/src/utils/format.ts +767 -0
- package/src/utils/git.ts +89 -0
- package/src/utils/index.ts +54 -0
- package/src/utils/json-output.ts +13 -0
- package/src/utils/lock.ts +82 -0
- package/src/utils/markers.ts +51 -0
- package/src/utils/scoring.ts +101 -0
- package/src/utils/version.ts +46 -0
- package/dist/cli.d.ts +0 -3
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js +0 -50
- package/dist/cli.js.map +0 -1
- package/dist/commands/add.d.ts +0 -3
- package/dist/commands/add.d.ts.map +0 -1
- package/dist/commands/add.js +0 -47
- package/dist/commands/add.js.map +0 -1
- package/dist/commands/compact.d.ts +0 -5
- package/dist/commands/compact.d.ts.map +0 -1
- package/dist/commands/compact.js +0 -709
- package/dist/commands/compact.js.map +0 -1
- package/dist/commands/delete.d.ts +0 -3
- package/dist/commands/delete.d.ts.map +0 -1
- package/dist/commands/delete.js +0 -82
- package/dist/commands/delete.js.map +0 -1
- package/dist/commands/diff.d.ts +0 -11
- package/dist/commands/diff.d.ts.map +0 -1
- package/dist/commands/diff.js +0 -170
- package/dist/commands/diff.js.map +0 -1
- package/dist/commands/doctor.d.ts +0 -3
- package/dist/commands/doctor.d.ts.map +0 -1
- package/dist/commands/doctor.js +0 -391
- package/dist/commands/doctor.js.map +0 -1
- package/dist/commands/edit.d.ts +0 -3
- package/dist/commands/edit.d.ts.map +0 -1
- package/dist/commands/edit.js +0 -210
- package/dist/commands/edit.js.map +0 -1
- package/dist/commands/init.d.ts +0 -3
- package/dist/commands/init.d.ts.map +0 -1
- package/dist/commands/init.js +0 -30
- package/dist/commands/init.js.map +0 -1
- package/dist/commands/learn.d.ts +0 -12
- package/dist/commands/learn.d.ts.map +0 -1
- package/dist/commands/learn.js +0 -130
- package/dist/commands/learn.js.map +0 -1
- package/dist/commands/onboard.d.ts +0 -10
- package/dist/commands/onboard.d.ts.map +0 -1
- package/dist/commands/onboard.js +0 -286
- package/dist/commands/onboard.js.map +0 -1
- package/dist/commands/prime.d.ts +0 -3
- package/dist/commands/prime.d.ts.map +0 -1
- package/dist/commands/prime.js +0 -242
- package/dist/commands/prime.js.map +0 -1
- package/dist/commands/prune.d.ts +0 -8
- package/dist/commands/prune.d.ts.map +0 -1
- package/dist/commands/prune.js +0 -90
- package/dist/commands/prune.js.map +0 -1
- package/dist/commands/query.d.ts +0 -3
- package/dist/commands/query.d.ts.map +0 -1
- package/dist/commands/query.js +0 -118
- package/dist/commands/query.js.map +0 -1
- package/dist/commands/ready.d.ts +0 -3
- package/dist/commands/ready.d.ts.map +0 -1
- package/dist/commands/ready.js +0 -160
- package/dist/commands/ready.js.map +0 -1
- package/dist/commands/record.d.ts +0 -13
- package/dist/commands/record.d.ts.map +0 -1
- package/dist/commands/record.js +0 -688
- package/dist/commands/record.js.map +0 -1
- package/dist/commands/search.d.ts +0 -3
- package/dist/commands/search.d.ts.map +0 -1
- package/dist/commands/search.js +0 -163
- package/dist/commands/search.js.map +0 -1
- package/dist/commands/setup.d.ts +0 -29
- package/dist/commands/setup.d.ts.map +0 -1
- package/dist/commands/setup.js +0 -548
- package/dist/commands/setup.js.map +0 -1
- package/dist/commands/status.d.ts +0 -3
- package/dist/commands/status.d.ts.map +0 -1
- package/dist/commands/status.js +0 -61
- package/dist/commands/status.js.map +0 -1
- package/dist/commands/sync.d.ts +0 -3
- package/dist/commands/sync.d.ts.map +0 -1
- package/dist/commands/sync.js +0 -176
- package/dist/commands/sync.js.map +0 -1
- package/dist/commands/update.d.ts +0 -3
- package/dist/commands/update.d.ts.map +0 -1
- package/dist/commands/update.js +0 -72
- package/dist/commands/update.js.map +0 -1
- package/dist/commands/validate.d.ts +0 -3
- package/dist/commands/validate.d.ts.map +0 -1
- package/dist/commands/validate.js +0 -86
- package/dist/commands/validate.js.map +0 -1
- package/dist/index.d.ts +0 -7
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -8
- package/dist/index.js.map +0 -1
- package/dist/schemas/config.d.ts +0 -17
- package/dist/schemas/config.d.ts.map +0 -1
- package/dist/schemas/config.js +0 -16
- package/dist/schemas/config.js.map +0 -1
- package/dist/schemas/index.d.ts +0 -5
- package/dist/schemas/index.d.ts.map +0 -1
- package/dist/schemas/index.js +0 -3
- package/dist/schemas/index.js.map +0 -1
- package/dist/schemas/record-schema.d.ts +0 -379
- package/dist/schemas/record-schema.d.ts.map +0 -1
- package/dist/schemas/record-schema.js +0 -148
- package/dist/schemas/record-schema.js.map +0 -1
- package/dist/schemas/record.d.ts +0 -60
- package/dist/schemas/record.d.ts.map +0 -1
- package/dist/schemas/record.js +0 -2
- package/dist/schemas/record.js.map +0 -1
- package/dist/utils/bm25.d.ts +0 -39
- package/dist/utils/bm25.d.ts.map +0 -1
- package/dist/utils/bm25.js +0 -171
- package/dist/utils/bm25.js.map +0 -1
- package/dist/utils/budget.d.ts +0 -35
- package/dist/utils/budget.d.ts.map +0 -1
- package/dist/utils/budget.js +0 -114
- package/dist/utils/budget.js.map +0 -1
- package/dist/utils/config.d.ts +0 -12
- package/dist/utils/config.d.ts.map +0 -1
- package/dist/utils/config.js +0 -89
- package/dist/utils/config.js.map +0 -1
- package/dist/utils/expertise.d.ts +0 -57
- package/dist/utils/expertise.d.ts.map +0 -1
- package/dist/utils/expertise.js +0 -264
- package/dist/utils/expertise.js.map +0 -1
- package/dist/utils/format.d.ts +0 -31
- package/dist/utils/format.d.ts.map +0 -1
- package/dist/utils/format.js +0 -556
- package/dist/utils/format.js.map +0 -1
- package/dist/utils/git.d.ts +0 -6
- package/dist/utils/git.d.ts.map +0 -1
- package/dist/utils/git.js +0 -81
- package/dist/utils/git.js.map +0 -1
- package/dist/utils/index.d.ts +0 -8
- package/dist/utils/index.d.ts.map +0 -1
- package/dist/utils/index.js +0 -8
- package/dist/utils/index.js.map +0 -1
- package/dist/utils/json-output.d.ts +0 -8
- package/dist/utils/json-output.d.ts.map +0 -1
- package/dist/utils/json-output.js +0 -7
- package/dist/utils/json-output.js.map +0 -1
- package/dist/utils/lock.d.ts +0 -6
- package/dist/utils/lock.d.ts.map +0 -1
- package/dist/utils/lock.js +0 -70
- package/dist/utils/lock.js.map +0 -1
- package/dist/utils/markers.d.ts +0 -22
- package/dist/utils/markers.d.ts.map +0 -1
- package/dist/utils/markers.js +0 -42
- package/dist/utils/markers.js.map +0 -1
- package/dist/utils/scoring.d.ts +0 -73
- package/dist/utils/scoring.d.ts.map +0 -1
- package/dist/utils/scoring.js +0 -80
- package/dist/utils/scoring.js.map +0 -1
- package/dist/utils/version.d.ts +0 -15
- package/dist/utils/version.d.ts.map +0 -1
- package/dist/utils/version.js +0 -48
- package/dist/utils/version.js.map +0 -1
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
const linkArray = {
|
|
2
|
+
type: "array",
|
|
3
|
+
items: { type: "string", pattern: "^([a-z0-9-]+:)?mx-[0-9a-f]{4,8}$" },
|
|
4
|
+
} as const;
|
|
5
|
+
|
|
6
|
+
export const recordSchema = {
|
|
7
|
+
$schema: "http://json-schema.org/draft-07/schema#",
|
|
8
|
+
title: "Mulch Expertise Record",
|
|
9
|
+
description: "A single expertise record in a Mulch domain file",
|
|
10
|
+
type: "object",
|
|
11
|
+
definitions: {
|
|
12
|
+
classification: {
|
|
13
|
+
type: "string",
|
|
14
|
+
enum: ["foundational", "tactical", "observational"],
|
|
15
|
+
},
|
|
16
|
+
evidence: {
|
|
17
|
+
type: "object",
|
|
18
|
+
properties: {
|
|
19
|
+
commit: { type: "string" },
|
|
20
|
+
date: { type: "string" },
|
|
21
|
+
issue: { type: "string" },
|
|
22
|
+
file: { type: "string" },
|
|
23
|
+
bead: { type: "string" },
|
|
24
|
+
},
|
|
25
|
+
additionalProperties: false,
|
|
26
|
+
},
|
|
27
|
+
outcome: {
|
|
28
|
+
type: "object",
|
|
29
|
+
properties: {
|
|
30
|
+
status: { type: "string", enum: ["success", "failure", "partial"] },
|
|
31
|
+
duration: { type: "number" },
|
|
32
|
+
test_results: { type: "string" },
|
|
33
|
+
agent: { type: "string" },
|
|
34
|
+
notes: { type: "string" },
|
|
35
|
+
recorded_at: { type: "string" },
|
|
36
|
+
},
|
|
37
|
+
required: ["status"],
|
|
38
|
+
additionalProperties: false,
|
|
39
|
+
},
|
|
40
|
+
},
|
|
41
|
+
oneOf: [
|
|
42
|
+
{
|
|
43
|
+
type: "object",
|
|
44
|
+
properties: {
|
|
45
|
+
id: { type: "string", pattern: "^mx-[0-9a-f]{4,8}$" },
|
|
46
|
+
type: { type: "string", const: "convention" },
|
|
47
|
+
content: { type: "string" },
|
|
48
|
+
classification: { $ref: "#/definitions/classification" },
|
|
49
|
+
recorded_at: { type: "string" },
|
|
50
|
+
evidence: { $ref: "#/definitions/evidence" },
|
|
51
|
+
tags: { type: "array", items: { type: "string" } },
|
|
52
|
+
relates_to: linkArray,
|
|
53
|
+
supersedes: linkArray,
|
|
54
|
+
outcomes: { type: "array", items: { $ref: "#/definitions/outcome" } },
|
|
55
|
+
},
|
|
56
|
+
required: ["type", "content", "classification", "recorded_at"],
|
|
57
|
+
additionalProperties: false,
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
type: "object",
|
|
61
|
+
properties: {
|
|
62
|
+
id: { type: "string", pattern: "^mx-[0-9a-f]{4,8}$" },
|
|
63
|
+
type: { type: "string", const: "pattern" },
|
|
64
|
+
name: { type: "string" },
|
|
65
|
+
description: { type: "string" },
|
|
66
|
+
files: { type: "array", items: { type: "string" } },
|
|
67
|
+
classification: { $ref: "#/definitions/classification" },
|
|
68
|
+
recorded_at: { type: "string" },
|
|
69
|
+
evidence: { $ref: "#/definitions/evidence" },
|
|
70
|
+
tags: { type: "array", items: { type: "string" } },
|
|
71
|
+
relates_to: linkArray,
|
|
72
|
+
supersedes: linkArray,
|
|
73
|
+
outcomes: { type: "array", items: { $ref: "#/definitions/outcome" } },
|
|
74
|
+
},
|
|
75
|
+
required: [
|
|
76
|
+
"type",
|
|
77
|
+
"name",
|
|
78
|
+
"description",
|
|
79
|
+
"classification",
|
|
80
|
+
"recorded_at",
|
|
81
|
+
],
|
|
82
|
+
additionalProperties: false,
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
type: "object",
|
|
86
|
+
properties: {
|
|
87
|
+
id: { type: "string", pattern: "^mx-[0-9a-f]{4,8}$" },
|
|
88
|
+
type: { type: "string", const: "failure" },
|
|
89
|
+
description: { type: "string" },
|
|
90
|
+
resolution: { type: "string" },
|
|
91
|
+
classification: { $ref: "#/definitions/classification" },
|
|
92
|
+
recorded_at: { type: "string" },
|
|
93
|
+
evidence: { $ref: "#/definitions/evidence" },
|
|
94
|
+
tags: { type: "array", items: { type: "string" } },
|
|
95
|
+
relates_to: linkArray,
|
|
96
|
+
supersedes: linkArray,
|
|
97
|
+
outcomes: { type: "array", items: { $ref: "#/definitions/outcome" } },
|
|
98
|
+
},
|
|
99
|
+
required: [
|
|
100
|
+
"type",
|
|
101
|
+
"description",
|
|
102
|
+
"resolution",
|
|
103
|
+
"classification",
|
|
104
|
+
"recorded_at",
|
|
105
|
+
],
|
|
106
|
+
additionalProperties: false,
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
type: "object",
|
|
110
|
+
properties: {
|
|
111
|
+
id: { type: "string", pattern: "^mx-[0-9a-f]{4,8}$" },
|
|
112
|
+
type: { type: "string", const: "decision" },
|
|
113
|
+
title: { type: "string" },
|
|
114
|
+
rationale: { type: "string" },
|
|
115
|
+
date: { type: "string" },
|
|
116
|
+
classification: { $ref: "#/definitions/classification" },
|
|
117
|
+
recorded_at: { type: "string" },
|
|
118
|
+
evidence: { $ref: "#/definitions/evidence" },
|
|
119
|
+
tags: { type: "array", items: { type: "string" } },
|
|
120
|
+
relates_to: linkArray,
|
|
121
|
+
supersedes: linkArray,
|
|
122
|
+
outcomes: { type: "array", items: { $ref: "#/definitions/outcome" } },
|
|
123
|
+
},
|
|
124
|
+
required: ["type", "title", "rationale", "classification", "recorded_at"],
|
|
125
|
+
additionalProperties: false,
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
type: "object",
|
|
129
|
+
properties: {
|
|
130
|
+
id: { type: "string", pattern: "^mx-[0-9a-f]{4,8}$" },
|
|
131
|
+
type: { type: "string", const: "reference" },
|
|
132
|
+
name: { type: "string" },
|
|
133
|
+
description: { type: "string" },
|
|
134
|
+
files: { type: "array", items: { type: "string" } },
|
|
135
|
+
classification: { $ref: "#/definitions/classification" },
|
|
136
|
+
recorded_at: { type: "string" },
|
|
137
|
+
evidence: { $ref: "#/definitions/evidence" },
|
|
138
|
+
tags: { type: "array", items: { type: "string" } },
|
|
139
|
+
relates_to: linkArray,
|
|
140
|
+
supersedes: linkArray,
|
|
141
|
+
outcomes: { type: "array", items: { $ref: "#/definitions/outcome" } },
|
|
142
|
+
},
|
|
143
|
+
required: [
|
|
144
|
+
"type",
|
|
145
|
+
"name",
|
|
146
|
+
"description",
|
|
147
|
+
"classification",
|
|
148
|
+
"recorded_at",
|
|
149
|
+
],
|
|
150
|
+
additionalProperties: false,
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
type: "object",
|
|
154
|
+
properties: {
|
|
155
|
+
id: { type: "string", pattern: "^mx-[0-9a-f]{4,8}$" },
|
|
156
|
+
type: { type: "string", const: "guide" },
|
|
157
|
+
name: { type: "string" },
|
|
158
|
+
description: { type: "string" },
|
|
159
|
+
classification: { $ref: "#/definitions/classification" },
|
|
160
|
+
recorded_at: { type: "string" },
|
|
161
|
+
evidence: { $ref: "#/definitions/evidence" },
|
|
162
|
+
tags: { type: "array", items: { type: "string" } },
|
|
163
|
+
relates_to: linkArray,
|
|
164
|
+
supersedes: linkArray,
|
|
165
|
+
outcomes: { type: "array", items: { $ref: "#/definitions/outcome" } },
|
|
166
|
+
},
|
|
167
|
+
required: [
|
|
168
|
+
"type",
|
|
169
|
+
"name",
|
|
170
|
+
"description",
|
|
171
|
+
"classification",
|
|
172
|
+
"recorded_at",
|
|
173
|
+
],
|
|
174
|
+
additionalProperties: false,
|
|
175
|
+
},
|
|
176
|
+
],
|
|
177
|
+
} as const;
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
export type RecordType =
|
|
2
|
+
| "convention"
|
|
3
|
+
| "pattern"
|
|
4
|
+
| "failure"
|
|
5
|
+
| "decision"
|
|
6
|
+
| "reference"
|
|
7
|
+
| "guide";
|
|
8
|
+
|
|
9
|
+
export type Classification = "foundational" | "tactical" | "observational";
|
|
10
|
+
|
|
11
|
+
export interface Evidence {
|
|
12
|
+
commit?: string;
|
|
13
|
+
date?: string;
|
|
14
|
+
issue?: string;
|
|
15
|
+
file?: string;
|
|
16
|
+
bead?: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface Outcome {
|
|
20
|
+
status: "success" | "failure" | "partial";
|
|
21
|
+
duration?: number;
|
|
22
|
+
test_results?: string;
|
|
23
|
+
agent?: string;
|
|
24
|
+
notes?: string;
|
|
25
|
+
recorded_at?: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
interface BaseRecord {
|
|
29
|
+
id?: string;
|
|
30
|
+
classification: Classification;
|
|
31
|
+
recorded_at: string;
|
|
32
|
+
evidence?: Evidence;
|
|
33
|
+
tags?: string[];
|
|
34
|
+
relates_to?: string[];
|
|
35
|
+
supersedes?: string[];
|
|
36
|
+
outcomes?: Outcome[];
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface ConventionRecord extends BaseRecord {
|
|
40
|
+
type: "convention";
|
|
41
|
+
content: string;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export interface PatternRecord extends BaseRecord {
|
|
45
|
+
type: "pattern";
|
|
46
|
+
name: string;
|
|
47
|
+
description: string;
|
|
48
|
+
files?: string[];
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export interface FailureRecord extends BaseRecord {
|
|
52
|
+
type: "failure";
|
|
53
|
+
description: string;
|
|
54
|
+
resolution: string;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface DecisionRecord extends BaseRecord {
|
|
58
|
+
type: "decision";
|
|
59
|
+
title: string;
|
|
60
|
+
rationale: string;
|
|
61
|
+
date?: string;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export interface ReferenceRecord extends BaseRecord {
|
|
65
|
+
type: "reference";
|
|
66
|
+
name: string;
|
|
67
|
+
description: string;
|
|
68
|
+
files?: string[];
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export interface GuideRecord extends BaseRecord {
|
|
72
|
+
type: "guide";
|
|
73
|
+
name: string;
|
|
74
|
+
description: string;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export type ExpertiseRecord =
|
|
78
|
+
| ConventionRecord
|
|
79
|
+
| PatternRecord
|
|
80
|
+
| FailureRecord
|
|
81
|
+
| DecisionRecord
|
|
82
|
+
| ReferenceRecord
|
|
83
|
+
| GuideRecord;
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
import type { ExpertiseRecord } from "../schemas/record.ts";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* BM25 parameters (tuned for short document collections like expertise records)
|
|
5
|
+
*/
|
|
6
|
+
export interface BM25Params {
|
|
7
|
+
/** Controls non-linear term frequency normalization (typical: 1.2-2.0) */
|
|
8
|
+
k1: number;
|
|
9
|
+
/** Controls document length normalization (0 = no normalization, 1 = full normalization) */
|
|
10
|
+
b: number;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Default BM25 parameters optimized for expertise records
|
|
15
|
+
*/
|
|
16
|
+
export const DEFAULT_BM25_PARAMS: BM25Params = {
|
|
17
|
+
k1: 1.5,
|
|
18
|
+
b: 0.75,
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Result of BM25 search
|
|
23
|
+
*/
|
|
24
|
+
export interface BM25Result {
|
|
25
|
+
record: ExpertiseRecord;
|
|
26
|
+
score: number;
|
|
27
|
+
/** Fields that matched the query */
|
|
28
|
+
matchedFields: string[];
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Tokenize text into searchable terms
|
|
33
|
+
*/
|
|
34
|
+
export function tokenize(text: string): string[] {
|
|
35
|
+
return text
|
|
36
|
+
.toLowerCase()
|
|
37
|
+
.replace(/[^\w\s-]/g, " ") // Replace punctuation with spaces (keep hyphens in words)
|
|
38
|
+
.split(/\s+/)
|
|
39
|
+
.filter((token) => token.length > 0);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Extract searchable text from a record
|
|
44
|
+
*/
|
|
45
|
+
export function extractRecordText(record: ExpertiseRecord): {
|
|
46
|
+
allText: string;
|
|
47
|
+
fieldTexts: Record<string, string>;
|
|
48
|
+
} {
|
|
49
|
+
const fieldTexts: Record<string, string> = {};
|
|
50
|
+
const allParts: string[] = [];
|
|
51
|
+
|
|
52
|
+
// Helper to add field text
|
|
53
|
+
const addField = (name: string, value: unknown): void => {
|
|
54
|
+
if (typeof value === "string" && value.trim().length > 0) {
|
|
55
|
+
fieldTexts[name] = value;
|
|
56
|
+
allParts.push(value);
|
|
57
|
+
} else if (Array.isArray(value)) {
|
|
58
|
+
const arrayText = value
|
|
59
|
+
.filter((item) => typeof item === "string")
|
|
60
|
+
.join(" ");
|
|
61
|
+
if (arrayText.trim().length > 0) {
|
|
62
|
+
fieldTexts[name] = arrayText;
|
|
63
|
+
allParts.push(arrayText);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
// Extract type-specific fields
|
|
69
|
+
switch (record.type) {
|
|
70
|
+
case "pattern":
|
|
71
|
+
addField("name", record.name);
|
|
72
|
+
addField("description", record.description);
|
|
73
|
+
addField("files", record.files);
|
|
74
|
+
break;
|
|
75
|
+
case "convention":
|
|
76
|
+
addField("content", record.content);
|
|
77
|
+
break;
|
|
78
|
+
case "failure":
|
|
79
|
+
addField("description", record.description);
|
|
80
|
+
addField("resolution", record.resolution);
|
|
81
|
+
break;
|
|
82
|
+
case "decision":
|
|
83
|
+
addField("title", record.title);
|
|
84
|
+
addField("rationale", record.rationale);
|
|
85
|
+
break;
|
|
86
|
+
case "reference":
|
|
87
|
+
addField("name", record.name);
|
|
88
|
+
addField("description", record.description);
|
|
89
|
+
addField("files", record.files);
|
|
90
|
+
break;
|
|
91
|
+
case "guide":
|
|
92
|
+
addField("name", record.name);
|
|
93
|
+
addField("description", record.description);
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Add common fields
|
|
98
|
+
addField("tags", record.tags);
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
allText: allParts.join(" "),
|
|
102
|
+
fieldTexts,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Calculate term frequency in a document
|
|
108
|
+
*/
|
|
109
|
+
function calculateTermFrequency(tokens: string[]): Map<string, number> {
|
|
110
|
+
const tf = new Map<string, number>();
|
|
111
|
+
for (const token of tokens) {
|
|
112
|
+
tf.set(token, (tf.get(token) || 0) + 1);
|
|
113
|
+
}
|
|
114
|
+
return tf;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Calculate inverse document frequency for all terms in the corpus
|
|
119
|
+
*/
|
|
120
|
+
function calculateIDF(
|
|
121
|
+
corpus: Array<{ tokens: string[] }>,
|
|
122
|
+
): Map<string, number> {
|
|
123
|
+
const docCount = corpus.length;
|
|
124
|
+
const docFreq = new Map<string, number>();
|
|
125
|
+
|
|
126
|
+
// Count how many documents contain each term
|
|
127
|
+
for (const doc of corpus) {
|
|
128
|
+
const uniqueTerms = new Set(doc.tokens);
|
|
129
|
+
for (const term of uniqueTerms) {
|
|
130
|
+
docFreq.set(term, (docFreq.get(term) || 0) + 1);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Calculate IDF for each term
|
|
135
|
+
const idf = new Map<string, number>();
|
|
136
|
+
for (const [term, freq] of docFreq.entries()) {
|
|
137
|
+
// IDF formula: log((N - df + 0.5) / (df + 0.5) + 1)
|
|
138
|
+
// The +1 ensures positive values for common terms
|
|
139
|
+
idf.set(term, Math.log((docCount - freq + 0.5) / (freq + 0.5) + 1));
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return idf;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Calculate BM25 score for a single document against a query
|
|
147
|
+
*/
|
|
148
|
+
function calculateBM25Score(
|
|
149
|
+
queryTokens: string[],
|
|
150
|
+
docTokens: string[],
|
|
151
|
+
docLength: number,
|
|
152
|
+
avgDocLength: number,
|
|
153
|
+
idf: Map<string, number>,
|
|
154
|
+
params: BM25Params,
|
|
155
|
+
): number {
|
|
156
|
+
const tf = calculateTermFrequency(docTokens);
|
|
157
|
+
let score = 0;
|
|
158
|
+
|
|
159
|
+
for (const queryTerm of queryTokens) {
|
|
160
|
+
const termFreq = tf.get(queryTerm) || 0;
|
|
161
|
+
const termIDF = idf.get(queryTerm) || 0;
|
|
162
|
+
|
|
163
|
+
// BM25 formula
|
|
164
|
+
const numerator = termFreq * (params.k1 + 1);
|
|
165
|
+
const denominator =
|
|
166
|
+
termFreq +
|
|
167
|
+
params.k1 * (1 - params.b + params.b * (docLength / avgDocLength));
|
|
168
|
+
|
|
169
|
+
score += termIDF * (numerator / denominator);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return score;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Search records using BM25 ranking
|
|
177
|
+
*/
|
|
178
|
+
export function searchBM25(
|
|
179
|
+
records: ExpertiseRecord[],
|
|
180
|
+
query: string,
|
|
181
|
+
params: BM25Params = DEFAULT_BM25_PARAMS,
|
|
182
|
+
): BM25Result[] {
|
|
183
|
+
if (records.length === 0 || query.trim().length === 0) {
|
|
184
|
+
return [];
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const queryTokens = tokenize(query);
|
|
188
|
+
if (queryTokens.length === 0) {
|
|
189
|
+
return [];
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Extract and tokenize all documents
|
|
193
|
+
const docs = records.map((record) => {
|
|
194
|
+
const { allText, fieldTexts } = extractRecordText(record);
|
|
195
|
+
const tokens = tokenize(allText);
|
|
196
|
+
return { record, tokens, allText, fieldTexts };
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
// Calculate average document length
|
|
200
|
+
const totalLength = docs.reduce((sum, doc) => sum + doc.tokens.length, 0);
|
|
201
|
+
const avgDocLength = totalLength / docs.length;
|
|
202
|
+
|
|
203
|
+
// Calculate IDF for all terms
|
|
204
|
+
const idf = calculateIDF(docs);
|
|
205
|
+
|
|
206
|
+
// Score each document
|
|
207
|
+
const results: BM25Result[] = [];
|
|
208
|
+
|
|
209
|
+
for (const doc of docs) {
|
|
210
|
+
const score = calculateBM25Score(
|
|
211
|
+
queryTokens,
|
|
212
|
+
doc.tokens,
|
|
213
|
+
doc.tokens.length,
|
|
214
|
+
avgDocLength,
|
|
215
|
+
idf,
|
|
216
|
+
params,
|
|
217
|
+
);
|
|
218
|
+
|
|
219
|
+
// Only include results with score > 0
|
|
220
|
+
if (score > 0) {
|
|
221
|
+
// Determine which fields matched
|
|
222
|
+
const matchedFields: string[] = [];
|
|
223
|
+
for (const [fieldName, fieldText] of Object.entries(doc.fieldTexts)) {
|
|
224
|
+
const fieldTokens = tokenize(fieldText);
|
|
225
|
+
const hasMatch = queryTokens.some((qt) => fieldTokens.includes(qt));
|
|
226
|
+
if (hasMatch) {
|
|
227
|
+
matchedFields.push(fieldName);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
results.push({
|
|
232
|
+
record: doc.record,
|
|
233
|
+
score,
|
|
234
|
+
matchedFields,
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Sort by score descending
|
|
240
|
+
results.sort((a, b) => b.score - a.score);
|
|
241
|
+
|
|
242
|
+
return results;
|
|
243
|
+
}
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
Classification,
|
|
3
|
+
ExpertiseRecord,
|
|
4
|
+
RecordType,
|
|
5
|
+
} from "../schemas/record.ts";
|
|
6
|
+
import { type ScoredRecord, computeConfirmationScore } from "./scoring.ts";
|
|
7
|
+
|
|
8
|
+
export const DEFAULT_BUDGET = 4000;
|
|
9
|
+
|
|
10
|
+
/** Priority order for record types (lower index = higher priority) */
|
|
11
|
+
const TYPE_PRIORITY: RecordType[] = [
|
|
12
|
+
"convention",
|
|
13
|
+
"decision",
|
|
14
|
+
"pattern",
|
|
15
|
+
"guide",
|
|
16
|
+
"failure",
|
|
17
|
+
"reference",
|
|
18
|
+
];
|
|
19
|
+
|
|
20
|
+
/** Priority order for classifications (lower index = higher priority) */
|
|
21
|
+
const CLASSIFICATION_PRIORITY: Classification[] = [
|
|
22
|
+
"foundational",
|
|
23
|
+
"tactical",
|
|
24
|
+
"observational",
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
export interface DomainRecords {
|
|
28
|
+
domain: string;
|
|
29
|
+
records: ScoredRecord[];
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface BudgetResult {
|
|
33
|
+
/** Records kept, grouped by domain (preserves original domain order) */
|
|
34
|
+
kept: DomainRecords[];
|
|
35
|
+
/** Total number of records that were dropped */
|
|
36
|
+
droppedCount: number;
|
|
37
|
+
/** Number of domains that had records dropped */
|
|
38
|
+
droppedDomainCount: number;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Sort records by priority: type order, then classification, then confirmation score
|
|
43
|
+
* (higher score = higher priority), then recency (newest first).
|
|
44
|
+
*/
|
|
45
|
+
function recordSortKey(r: ScoredRecord): [number, number, number, number] {
|
|
46
|
+
const typeIdx = TYPE_PRIORITY.indexOf(r.type);
|
|
47
|
+
const classIdx = CLASSIFICATION_PRIORITY.indexOf(r.classification);
|
|
48
|
+
const confirmationScore = computeConfirmationScore(r);
|
|
49
|
+
const time = r.recorded_at ? new Date(r.recorded_at).getTime() : 0;
|
|
50
|
+
return [typeIdx, classIdx, -confirmationScore, -time];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function compareRecords(a: ScoredRecord, b: ScoredRecord): number {
|
|
54
|
+
const ka = recordSortKey(a);
|
|
55
|
+
const kb = recordSortKey(b);
|
|
56
|
+
for (let i = 0; i < 4; i++) {
|
|
57
|
+
if (ka[i] !== kb[i]) return ka[i] - kb[i];
|
|
58
|
+
}
|
|
59
|
+
return 0;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Estimate token count from character count (chars / 4).
|
|
64
|
+
*/
|
|
65
|
+
export function estimateTokens(text: string): number {
|
|
66
|
+
return Math.ceil(text.length / 4);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Apply a token budget to records across multiple domains.
|
|
71
|
+
*
|
|
72
|
+
* Records are prioritized by type (conventions first, then decisions, etc.),
|
|
73
|
+
* then by classification (foundational > tactical > observational),
|
|
74
|
+
* then by confirmation score (higher = higher priority),
|
|
75
|
+
* then by recency (newest first).
|
|
76
|
+
*
|
|
77
|
+
* The formatRecord callback is used to estimate per-record token cost.
|
|
78
|
+
*/
|
|
79
|
+
export function applyBudget(
|
|
80
|
+
domains: DomainRecords[],
|
|
81
|
+
budget: number,
|
|
82
|
+
formatRecord: (record: ExpertiseRecord, domain: string) => string,
|
|
83
|
+
): BudgetResult {
|
|
84
|
+
// Flatten all records with their domain, then sort by priority
|
|
85
|
+
const tagged: Array<{ domain: string; record: ScoredRecord }> = [];
|
|
86
|
+
for (const d of domains) {
|
|
87
|
+
for (const r of d.records) {
|
|
88
|
+
tagged.push({ domain: d.domain, record: r });
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
tagged.sort((a, b) => compareRecords(a.record, b.record));
|
|
92
|
+
|
|
93
|
+
const totalRecords = tagged.length;
|
|
94
|
+
let usedTokens = 0;
|
|
95
|
+
const kept = new Set<number>();
|
|
96
|
+
|
|
97
|
+
for (let i = 0; i < tagged.length; i++) {
|
|
98
|
+
const formatted = formatRecord(tagged[i].record, tagged[i].domain);
|
|
99
|
+
const cost = estimateTokens(formatted);
|
|
100
|
+
if (usedTokens + cost <= budget) {
|
|
101
|
+
usedTokens += cost;
|
|
102
|
+
kept.add(i);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Rebuild domain groups preserving original domain order and record order
|
|
107
|
+
const domainOrder = domains.map((d) => d.domain);
|
|
108
|
+
const result: DomainRecords[] = [];
|
|
109
|
+
const droppedDomains = new Set<string>();
|
|
110
|
+
|
|
111
|
+
for (const domainName of domainOrder) {
|
|
112
|
+
const originalRecords = domains.find(
|
|
113
|
+
(d) => d.domain === domainName,
|
|
114
|
+
)!.records;
|
|
115
|
+
const keptRecords: ScoredRecord[] = [];
|
|
116
|
+
|
|
117
|
+
for (const rec of originalRecords) {
|
|
118
|
+
// Find this record's index in the tagged array
|
|
119
|
+
const idx = tagged.findIndex(
|
|
120
|
+
(t) => t.domain === domainName && t.record === rec,
|
|
121
|
+
);
|
|
122
|
+
if (idx !== -1 && kept.has(idx)) {
|
|
123
|
+
keptRecords.push(rec);
|
|
124
|
+
} else if (idx !== -1) {
|
|
125
|
+
droppedDomains.add(domainName);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (keptRecords.length > 0) {
|
|
130
|
+
result.push({ domain: domainName, records: keptRecords });
|
|
131
|
+
} else if (originalRecords.length > 0) {
|
|
132
|
+
droppedDomains.add(domainName);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const droppedCount = totalRecords - kept.size;
|
|
137
|
+
|
|
138
|
+
return {
|
|
139
|
+
kept: result,
|
|
140
|
+
droppedCount,
|
|
141
|
+
droppedDomainCount: droppedDomains.size,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Format the truncation summary line shown when records are dropped.
|
|
147
|
+
*/
|
|
148
|
+
export function formatBudgetSummary(
|
|
149
|
+
droppedCount: number,
|
|
150
|
+
droppedDomainCount: number,
|
|
151
|
+
): string {
|
|
152
|
+
const domainPart =
|
|
153
|
+
droppedDomainCount > 0
|
|
154
|
+
? ` across ${droppedDomainCount} domain${droppedDomainCount === 1 ? "" : "s"}`
|
|
155
|
+
: "";
|
|
156
|
+
return `... and ${droppedCount} more record${droppedCount === 1 ? "" : "s"}${domainPart} (use --budget <n> to show more)`;
|
|
157
|
+
}
|