fullstackgtm 0.16.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +69 -0
- package/INSTALL_FOR_AGENTS.md +10 -5
- package/README.md +17 -0
- package/dist/cli.js +141 -12
- package/dist/index.d.ts +3 -1
- package/dist/index.js +3 -1
- package/dist/llm.d.ts +7 -0
- package/dist/llm.js +7 -1
- package/dist/market.d.ts +35 -0
- package/dist/market.js +100 -0
- package/dist/marketAxes.d.ts +77 -0
- package/dist/marketAxes.js +199 -0
- package/dist/marketClassify.d.ts +49 -0
- package/dist/marketClassify.js +201 -0
- package/dist/marketReport.js +114 -1
- package/dist/mcp.js +45 -0
- package/docs/api.md +29 -2
- package/llms.txt +16 -0
- package/package.json +1 -1
- package/src/cli.ts +150 -12
- package/src/index.ts +24 -0
- package/src/llm.ts +7 -1
- package/src/market.ts +130 -0
- package/src/marketAxes.ts +268 -0
- package/src/marketClassify.ts +286 -0
- package/src/marketReport.ts +134 -1
- package/src/mcp.ts +65 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import type { MarketAxis, MarketConfig, MarketObservation, ObservationSet } from "./market.ts";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Axis discovery for a market map — the method that earns a strategic 2x2
|
|
5
|
+
* instead of asserting one. Axes are claim-scoring rubrics in the config
|
|
6
|
+
* (reviewable, versioned); a vendor's position on an axis is the
|
|
7
|
+
* intensity-weighted mean of the scores of claims it voices. Two checks keep
|
|
8
|
+
* axes honest, both computed deterministically from the stored observations:
|
|
9
|
+
*
|
|
10
|
+
* 1. Triangulation — PCA over the vendor × claim intensity matrix gives the
|
|
11
|
+
* category's own top variance directions; a real axis correlates with a
|
|
12
|
+
* principal component (it is derivable from the data, not just felt).
|
|
13
|
+
* 2. Orthogonality — two configured axes that correlate ≥ ~0.75 at the
|
|
14
|
+
* vendor level are one axis twice. Sometimes that redundancy is the
|
|
15
|
+
* finding: the category couples the two ideas, and the empty quadrant is
|
|
16
|
+
* the strategic white space.
|
|
17
|
+
*
|
|
18
|
+
* Everything here is pure math over the store: same observations, same map.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
export const VOICE_WEIGHT: Record<string, number> = { loud: 1.0, quiet: 0.5 };
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Intensity-weighted mean of claim scores over claims the vendor voices.
|
|
25
|
+
* Claims scored null on the axis are excluded; returns null if the vendor
|
|
26
|
+
* voices nothing scoreable (e.g. fully unobservable).
|
|
27
|
+
*/
|
|
28
|
+
export function axisPosition(
|
|
29
|
+
vendorId: string,
|
|
30
|
+
claimScores: Record<string, number | null>,
|
|
31
|
+
observations: MarketObservation[],
|
|
32
|
+
): number | null {
|
|
33
|
+
let num = 0;
|
|
34
|
+
let den = 0;
|
|
35
|
+
for (const obs of observations) {
|
|
36
|
+
if (obs.vendorId !== vendorId) continue;
|
|
37
|
+
const score = claimScores[obs.claimId];
|
|
38
|
+
if (score === null || score === undefined) continue;
|
|
39
|
+
const weight = VOICE_WEIGHT[obs.intensity] ?? 0;
|
|
40
|
+
if (weight > 0) {
|
|
41
|
+
num += score * weight;
|
|
42
|
+
den += weight;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return den > 0 ? num / den : null;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Share of the claim space voiced (loud + half-weight quiet) over observable claims. */
|
|
49
|
+
export function messageBreadth(
|
|
50
|
+
vendorId: string,
|
|
51
|
+
observations: MarketObservation[],
|
|
52
|
+
): { breadth: number | null; loudCount: number } {
|
|
53
|
+
let voiced = 0;
|
|
54
|
+
let observable = 0;
|
|
55
|
+
let loudCount = 0;
|
|
56
|
+
for (const obs of observations) {
|
|
57
|
+
if (obs.vendorId !== vendorId) continue;
|
|
58
|
+
if (obs.intensity === "unobservable") continue;
|
|
59
|
+
observable += 1;
|
|
60
|
+
voiced += VOICE_WEIGHT[obs.intensity] ?? 0;
|
|
61
|
+
if (obs.intensity === "loud") loudCount += 1;
|
|
62
|
+
}
|
|
63
|
+
return { breadth: observable > 0 ? voiced / observable : null, loudCount };
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export function pearson(xs: number[], ys: number[]): number {
|
|
67
|
+
const n = xs.length;
|
|
68
|
+
if (n < 3) return 0;
|
|
69
|
+
const mx = xs.reduce((sum, x) => sum + x, 0) / n;
|
|
70
|
+
const my = ys.reduce((sum, y) => sum + y, 0) / n;
|
|
71
|
+
const sx = Math.sqrt(xs.reduce((sum, x) => sum + (x - mx) ** 2, 0));
|
|
72
|
+
const sy = Math.sqrt(ys.reduce((sum, y) => sum + (y - my) ** 2, 0));
|
|
73
|
+
if (!sx || !sy) return 0;
|
|
74
|
+
return xs.reduce((sum, x, i) => sum + (x - mx) * (ys[i] - my), 0) / (sx * sy);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
// PCA — power iteration over the column-centered vendor × claim weight
|
|
79
|
+
// matrix. Pure and dependency-free; two components are all the canvas needs
|
|
80
|
+
// (PC1 should recover the category's primary axis; PC2 is the
|
|
81
|
+
// maximum-differentiation direction orthogonal to it).
|
|
82
|
+
|
|
83
|
+
export type PrincipalComponent = {
|
|
84
|
+
/** claimId → loading. Sign is arbitrary; read poles from the extremes. */
|
|
85
|
+
loadings: Array<{ claimId: string; loading: number }>;
|
|
86
|
+
/** vendorId → score on this component. */
|
|
87
|
+
scores: Array<{ vendorId: string; score: number }>;
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
export function pcaTop2(
|
|
91
|
+
config: MarketConfig,
|
|
92
|
+
set: ObservationSet,
|
|
93
|
+
): { vendors: string[]; pc1: PrincipalComponent; pc2: PrincipalComponent } {
|
|
94
|
+
const claimIds = config.claims.map((claim) => claim.id);
|
|
95
|
+
const byCell = new Map(set.observations.map((obs) => [`${obs.vendorId}|${obs.claimId}`, obs]));
|
|
96
|
+
// Exclude fully-unobservable vendors: they carry no information, only zeros.
|
|
97
|
+
const vendors = config.vendors
|
|
98
|
+
.map((vendor) => vendor.id)
|
|
99
|
+
.filter((vendorId) =>
|
|
100
|
+
claimIds.some((claimId) => {
|
|
101
|
+
const obs = byCell.get(`${vendorId}|${claimId}`);
|
|
102
|
+
return obs !== undefined && obs.intensity !== "unobservable";
|
|
103
|
+
}),
|
|
104
|
+
);
|
|
105
|
+
|
|
106
|
+
const matrix = vendors.map((vendorId) =>
|
|
107
|
+
claimIds.map((claimId) => VOICE_WEIGHT[byCell.get(`${vendorId}|${claimId}`)?.intensity ?? ""] ?? 0),
|
|
108
|
+
);
|
|
109
|
+
const means = claimIds.map((_, j) => matrix.reduce((sum, row) => sum + row[j], 0) / vendors.length);
|
|
110
|
+
const centered = matrix.map((row) => row.map((value, j) => value - means[j]));
|
|
111
|
+
|
|
112
|
+
const component = (deflate?: number[]): { loadings: number[]; scores: number[] } => {
|
|
113
|
+
let v = new Array<number>(claimIds.length).fill(1 / Math.sqrt(claimIds.length));
|
|
114
|
+
for (let iteration = 0; iteration < 300; iteration += 1) {
|
|
115
|
+
if (deflate) {
|
|
116
|
+
const dot = v.reduce((sum, x, k) => sum + x * deflate[k], 0);
|
|
117
|
+
v = v.map((x, k) => x - dot * deflate[k]);
|
|
118
|
+
}
|
|
119
|
+
const scores = centered.map((row) => row.reduce((sum, x, j) => sum + x * v[j], 0));
|
|
120
|
+
v = claimIds.map((_, j) => centered.reduce((sum, row, i) => sum + row[j] * scores[i], 0));
|
|
121
|
+
const norm = Math.sqrt(v.reduce((sum, x) => sum + x * x, 0)) || 1;
|
|
122
|
+
v = v.map((x) => x / norm);
|
|
123
|
+
}
|
|
124
|
+
return { loadings: v, scores: centered.map((row) => row.reduce((sum, x, j) => sum + x * v[j], 0)) };
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
const first = component();
|
|
128
|
+
const second = component(first.loadings);
|
|
129
|
+
const shape = (raw: { loadings: number[]; scores: number[] }): PrincipalComponent => ({
|
|
130
|
+
loadings: claimIds.map((claimId, j) => ({ claimId, loading: raw.loadings[j] })),
|
|
131
|
+
scores: vendors.map((vendorId, i) => ({ vendorId, score: raw.scores[i] })),
|
|
132
|
+
});
|
|
133
|
+
return { vendors, pc1: shape(first), pc2: shape(second) };
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// ---------------------------------------------------------------------------
|
|
137
|
+
// The axes report: positions, triangulation vs PCA, orthogonality screen.
|
|
138
|
+
|
|
139
|
+
export type AxisVendorPosition = { vendorId: string; position: number | null };
|
|
140
|
+
|
|
141
|
+
export type AxisAssessment = {
|
|
142
|
+
axis: MarketAxis;
|
|
143
|
+
positions: AxisVendorPosition[];
|
|
144
|
+
/** Standard deviation of placeable vendor positions — does the axis separate anyone? */
|
|
145
|
+
spread: number;
|
|
146
|
+
rVsPc1: number;
|
|
147
|
+
rVsPc2: number;
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
export type AxisPairing = {
|
|
151
|
+
aId: string;
|
|
152
|
+
bId: string;
|
|
153
|
+
r: number;
|
|
154
|
+
verdict: "near-orthogonal" | "correlated — weak pair" | "redundant — same axis twice";
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
export type AxesReport = {
|
|
158
|
+
vendors: string[];
|
|
159
|
+
pc1: PrincipalComponent;
|
|
160
|
+
pc2: PrincipalComponent;
|
|
161
|
+
assessments: AxisAssessment[];
|
|
162
|
+
/** Includes the derived breadth axis in pairings. */
|
|
163
|
+
pairings: AxisPairing[];
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
export function pairingVerdict(r: number): AxisPairing["verdict"] {
|
|
167
|
+
const magnitude = Math.abs(r);
|
|
168
|
+
if (magnitude < 0.4) return "near-orthogonal";
|
|
169
|
+
if (magnitude < 0.75) return "correlated — weak pair";
|
|
170
|
+
return "redundant — same axis twice";
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
export function assessAxes(config: MarketConfig, set: ObservationSet): AxesReport {
|
|
174
|
+
const { vendors, pc1, pc2 } = pcaTop2(config, set);
|
|
175
|
+
const pcScore = (pc: PrincipalComponent) => new Map(pc.scores.map((entry) => [entry.vendorId, entry.score]));
|
|
176
|
+
const pc1ByVendor = pcScore(pc1);
|
|
177
|
+
const pc2ByVendor = pcScore(pc2);
|
|
178
|
+
|
|
179
|
+
const axes = config.axes ?? [];
|
|
180
|
+
const positionsById = new Map<string, Map<string, number>>();
|
|
181
|
+
const assessments: AxisAssessment[] = axes.map((axis) => {
|
|
182
|
+
const positions: AxisVendorPosition[] = vendors.map((vendorId) => ({
|
|
183
|
+
vendorId,
|
|
184
|
+
position: axisPosition(vendorId, axis.claimScores, set.observations),
|
|
185
|
+
}));
|
|
186
|
+
const placeable = positions.filter((entry): entry is { vendorId: string; position: number } => entry.position !== null);
|
|
187
|
+
positionsById.set(axis.id, new Map(placeable.map((entry) => [entry.vendorId, entry.position])));
|
|
188
|
+
const values = placeable.map((entry) => entry.position);
|
|
189
|
+
const mean = values.reduce((sum, x) => sum + x, 0) / Math.max(values.length, 1);
|
|
190
|
+
const spread = Math.sqrt(values.reduce((sum, x) => sum + (x - mean) ** 2, 0) / Math.max(values.length, 1));
|
|
191
|
+
const aligned = placeable.filter((entry) => pc1ByVendor.has(entry.vendorId));
|
|
192
|
+
return {
|
|
193
|
+
axis,
|
|
194
|
+
positions,
|
|
195
|
+
spread,
|
|
196
|
+
rVsPc1: pearson(aligned.map((entry) => entry.position), aligned.map((entry) => pc1ByVendor.get(entry.vendorId) as number)),
|
|
197
|
+
rVsPc2: pearson(aligned.map((entry) => entry.position), aligned.map((entry) => pc2ByVendor.get(entry.vendorId) as number)),
|
|
198
|
+
};
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
// Derived breadth axis joins the orthogonality screen (it's free and often
|
|
202
|
+
// the only near-orthogonal partner early on).
|
|
203
|
+
const breadthPositions = new Map<string, number>();
|
|
204
|
+
for (const vendorId of vendors) {
|
|
205
|
+
const { breadth } = messageBreadth(vendorId, set.observations);
|
|
206
|
+
if (breadth !== null) breadthPositions.set(vendorId, breadth);
|
|
207
|
+
}
|
|
208
|
+
positionsById.set("breadth", breadthPositions);
|
|
209
|
+
|
|
210
|
+
const ids = [...axes.map((axis) => axis.id), "breadth"];
|
|
211
|
+
const pairings: AxisPairing[] = [];
|
|
212
|
+
for (let i = 0; i < ids.length; i += 1) {
|
|
213
|
+
for (let j = i + 1; j < ids.length; j += 1) {
|
|
214
|
+
const a = positionsById.get(ids[i]) as Map<string, number>;
|
|
215
|
+
const b = positionsById.get(ids[j]) as Map<string, number>;
|
|
216
|
+
const shared = vendors.filter((vendorId) => a.has(vendorId) && b.has(vendorId));
|
|
217
|
+
const r = pearson(shared.map((vendorId) => a.get(vendorId) as number), shared.map((vendorId) => b.get(vendorId) as number));
|
|
218
|
+
pairings.push({ aId: ids[i], bId: ids[j], r, verdict: pairingVerdict(r) });
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return { vendors, pc1, pc2, assessments, pairings };
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
export function axesReportToText(report: AxesReport): string {
|
|
226
|
+
const lines: string[] = [];
|
|
227
|
+
for (const [label, pc] of [
|
|
228
|
+
["PC1", report.pc1],
|
|
229
|
+
["PC2", report.pc2],
|
|
230
|
+
] as const) {
|
|
231
|
+
lines.push(`=== ${label} — claim loadings (extremes; sign is arbitrary, read the poles) ===`);
|
|
232
|
+
const ordered = [...pc.loadings].sort((a, b) => a.loading - b.loading);
|
|
233
|
+
for (const entry of ordered.slice(0, 5)) {
|
|
234
|
+
lines.push(` ${entry.loading >= 0 ? "+" : ""}${entry.loading.toFixed(2)} ${entry.claimId}`);
|
|
235
|
+
}
|
|
236
|
+
lines.push(" ...");
|
|
237
|
+
for (const entry of ordered.slice(-5)) {
|
|
238
|
+
lines.push(` ${entry.loading >= 0 ? "+" : ""}${entry.loading.toFixed(2)} ${entry.claimId}`);
|
|
239
|
+
}
|
|
240
|
+
lines.push(
|
|
241
|
+
` vendor scores: ${[...pc.scores]
|
|
242
|
+
.sort((a, b) => a.score - b.score)
|
|
243
|
+
.map((entry) => `${entry.vendorId}=${entry.score >= 0 ? "+" : ""}${entry.score.toFixed(2)}`)
|
|
244
|
+
.join(" ")}`,
|
|
245
|
+
);
|
|
246
|
+
lines.push("");
|
|
247
|
+
}
|
|
248
|
+
if (report.assessments.length > 0) {
|
|
249
|
+
lines.push("=== configured axes vs PCA (triangulation: a real axis is derivable from the data) ===");
|
|
250
|
+
for (const assessment of report.assessments) {
|
|
251
|
+
lines.push(
|
|
252
|
+
` ${assessment.axis.id.padEnd(20)} spread=${assessment.spread.toFixed(3)} r(PC1)=${assessment.rVsPc1 >= 0 ? "+" : ""}${assessment.rVsPc1.toFixed(2)} r(PC2)=${assessment.rVsPc2 >= 0 ? "+" : ""}${assessment.rVsPc2.toFixed(2)} [${assessment.axis.status ?? ""}]`,
|
|
253
|
+
);
|
|
254
|
+
}
|
|
255
|
+
lines.push("");
|
|
256
|
+
lines.push("=== orthogonality screen (|r|>0.75 = redundant pair) ===");
|
|
257
|
+
for (const pairing of report.pairings) {
|
|
258
|
+
const flag = pairing.verdict === "redundant — same axis twice" ? " <-- redundant" : "";
|
|
259
|
+
lines.push(
|
|
260
|
+
` ${pairing.aId.padEnd(18)} x ${pairing.bId.padEnd(18)} r=${pairing.r >= 0 ? "+" : ""}${pairing.r.toFixed(2)}${flag}`,
|
|
261
|
+
);
|
|
262
|
+
}
|
|
263
|
+
} else {
|
|
264
|
+
lines.push("No axes configured. Read the PC loadings above, name the two directions, and add them");
|
|
265
|
+
lines.push("to market.config.json as axes: [{ id, label, negativePole, positivePole, rubric, claimScores }].");
|
|
266
|
+
}
|
|
267
|
+
return `${lines.join("\n")}\n`;
|
|
268
|
+
}
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import { DEFAULT_MODELS, forcedToolCall, type LlmCallOptions } from "./llm.ts";
|
|
2
|
+
import {
|
|
3
|
+
loadCaptureTexts,
|
|
4
|
+
observationId,
|
|
5
|
+
verifyEvidenceSpans,
|
|
6
|
+
type CaptureEntry,
|
|
7
|
+
type MarketClaim,
|
|
8
|
+
type MarketConfig,
|
|
9
|
+
type MarketObservation,
|
|
10
|
+
type ObservationSet,
|
|
11
|
+
type SpanVerificationFailure,
|
|
12
|
+
} from "./market.ts";
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* LLM intensity classification for the market map — the same
|
|
16
|
+
* semi-deterministic posture as call extraction, with one upgrade calls
|
|
17
|
+
* can't have: because the source pages are stored captures, every quoted
|
|
18
|
+
* span is verified mechanically against the capture it cites before the
|
|
19
|
+
* observation is accepted. A reading whose quote isn't verbatim on the page
|
|
20
|
+
* bounces back to the model once with the failures named; if it still can't
|
|
21
|
+
* quote the page, classification fails rather than storing unverifiable
|
|
22
|
+
* evidence.
|
|
23
|
+
*
|
|
24
|
+
* Deterministic parts stay deterministic: vendors with no usable captures
|
|
25
|
+
* score UNOBSERVABLE on every claim without an LLM call, and front states
|
|
26
|
+
* downstream are computed from the store, never from model output.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
// Bound cost and context: a vendor's pages are classified in one call.
|
|
30
|
+
const MAX_DOSSIER_CHARS = 48_000;
|
|
31
|
+
|
|
32
|
+
const CLASSIFY_INSTRUCTIONS = `Classify this vendor's messaging intensity for EVERY claim listed.
|
|
33
|
+
Rules:
|
|
34
|
+
- Judge ONLY from the captured pages below. Do not use outside knowledge of the vendor.
|
|
35
|
+
- intensity per the surface rule: "loud" = hero copy or a top-level-nav named product/program with a dedicated page; "quiet" = present on any page below that; "absent" = nowhere in the captures.
|
|
36
|
+
- evidence quotes MUST be verbatim spans copied exactly from the captured text (≤300 chars). Every loud or quiet reading needs at least one quote. If you cannot quote it, the reading is absent.
|
|
37
|
+
- An explicit disavowal ("we do not offer X", "call 988") is absent — put the disavowal quote in reason, it is informative signal.
|
|
38
|
+
- url must be the page the quote came from, exactly as given in the page headers below.
|
|
39
|
+
- reason: one reviewer-facing sentence.
|
|
40
|
+
- Return a reading for every claim id. Never invent claim ids.`;
|
|
41
|
+
|
|
42
|
+
const classifySchema = (claimIds: string[]) =>
|
|
43
|
+
({
|
|
44
|
+
type: "object",
|
|
45
|
+
required: ["readings"],
|
|
46
|
+
properties: {
|
|
47
|
+
readings: {
|
|
48
|
+
type: "array",
|
|
49
|
+
items: {
|
|
50
|
+
type: "object",
|
|
51
|
+
required: ["claimId", "intensity", "confidence", "reason", "evidence"],
|
|
52
|
+
properties: {
|
|
53
|
+
claimId: { type: "string", enum: claimIds },
|
|
54
|
+
intensity: { type: "string", enum: ["loud", "quiet", "absent"] },
|
|
55
|
+
confidence: { type: "string", enum: ["high", "medium", "low"] },
|
|
56
|
+
reason: { type: "string", description: "One reviewer-facing sentence." },
|
|
57
|
+
evidence: {
|
|
58
|
+
type: "array",
|
|
59
|
+
items: {
|
|
60
|
+
type: "object",
|
|
61
|
+
required: ["quote", "url"],
|
|
62
|
+
properties: {
|
|
63
|
+
quote: { type: "string", description: "VERBATIM span copied exactly from the captured page text. Never paraphrase." },
|
|
64
|
+
url: { type: "string", description: "The page URL the quote came from, exactly as shown in the page header." },
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
}) as const;
|
|
73
|
+
|
|
74
|
+
type LlmReading = {
|
|
75
|
+
claimId: string;
|
|
76
|
+
intensity: "loud" | "quiet" | "absent";
|
|
77
|
+
confidence: "high" | "medium" | "low";
|
|
78
|
+
reason: string;
|
|
79
|
+
evidence: Array<{ quote: string; url: string }>;
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
function buildDossier(entries: CaptureEntry[], textByHash: Map<string, string>): string {
|
|
83
|
+
const pages = entries
|
|
84
|
+
.filter((entry) => entry.captureHash && textByHash.has(entry.captureHash))
|
|
85
|
+
.map((entry) => ({ entry, text: textByHash.get(entry.captureHash as string) as string }));
|
|
86
|
+
if (pages.length === 0) return "";
|
|
87
|
+
const budget = Math.floor(MAX_DOSSIER_CHARS / pages.length);
|
|
88
|
+
return pages
|
|
89
|
+
.map(({ entry, text }) => {
|
|
90
|
+
const body =
|
|
91
|
+
text.length <= budget
|
|
92
|
+
? text
|
|
93
|
+
: `${text.slice(0, budget / 2)}\n[... middle of page truncated ...]\n${text.slice(-budget / 2)}`;
|
|
94
|
+
return `=== PAGE (${entry.kind}) ${entry.url} ===\n${body}`;
|
|
95
|
+
})
|
|
96
|
+
.join("\n\n");
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function claimsBlock(claims: MarketClaim[]): string {
|
|
100
|
+
return claims
|
|
101
|
+
.map((claim) => `- ${claim.id}: ${claim.capability}\n How to judge: ${claim.definition}`)
|
|
102
|
+
.join("\n");
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export type ClassifyMarketOptions = {
|
|
106
|
+
llm: LlmCallOptions;
|
|
107
|
+
/** Observation run label to produce; must be new (the store is append-only). */
|
|
108
|
+
runLabel: string;
|
|
109
|
+
/** Capture run to classify; defaults to the most recent run in the manifest. */
|
|
110
|
+
captureRun?: string;
|
|
111
|
+
/** Restrict to these vendor ids (e.g. one new vendor); defaults to all. */
|
|
112
|
+
vendors?: string[];
|
|
113
|
+
/** Captures directory override (tests); defaults to the profile market home. */
|
|
114
|
+
capturesDir?: string;
|
|
115
|
+
now?: () => Date;
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
export type ClassifyMarketResult = {
|
|
119
|
+
set: ObservationSet;
|
|
120
|
+
model: string;
|
|
121
|
+
/** Cells where the model's quote failed mechanical verification and the retry fixed it. */
|
|
122
|
+
retriedVendorIds: string[];
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
export async function classifyMarket(
|
|
126
|
+
config: MarketConfig,
|
|
127
|
+
options: ClassifyMarketOptions,
|
|
128
|
+
): Promise<ClassifyMarketResult> {
|
|
129
|
+
const model = options.llm.model ?? DEFAULT_MODELS[options.llm.provider];
|
|
130
|
+
const { entries, textByHash } = loadCaptureTexts(config.category, options.capturesDir);
|
|
131
|
+
if (entries.length === 0) {
|
|
132
|
+
throw new Error(`No captures for ${config.category} — run \`market capture\` first`);
|
|
133
|
+
}
|
|
134
|
+
const captureRun = options.captureRun ?? entries[entries.length - 1].runLabel;
|
|
135
|
+
const runEntries = entries.filter((entry) => entry.runLabel === captureRun);
|
|
136
|
+
if (runEntries.length === 0) {
|
|
137
|
+
throw new Error(`No captures for run "${captureRun}" — available: ${[...new Set(entries.map((e) => e.runLabel))].join(", ")}`);
|
|
138
|
+
}
|
|
139
|
+
const observedAt = (options.now ?? (() => new Date()))().toISOString();
|
|
140
|
+
const vendorIds = options.vendors ?? config.vendors.map((vendor) => vendor.id);
|
|
141
|
+
const claimIds = config.claims.map((claim) => claim.id);
|
|
142
|
+
|
|
143
|
+
const observations: MarketObservation[] = [];
|
|
144
|
+
const retriedVendorIds: string[] = [];
|
|
145
|
+
|
|
146
|
+
for (const vendorId of vendorIds) {
|
|
147
|
+
const vendor = config.vendors.find((candidate) => candidate.id === vendorId);
|
|
148
|
+
if (!vendor) throw new Error(`Unknown vendor "${vendorId}"`);
|
|
149
|
+
const vendorEntries = runEntries.filter((entry) => entry.vendorId === vendorId);
|
|
150
|
+
const hashByUrl = new Map(
|
|
151
|
+
vendorEntries.filter((entry) => entry.captureHash).map((entry) => [entry.url, entry.captureHash as string]),
|
|
152
|
+
);
|
|
153
|
+
const dossier = buildDossier(vendorEntries, textByHash);
|
|
154
|
+
|
|
155
|
+
if (!dossier) {
|
|
156
|
+
// Deterministic: no usable captures means UNOBSERVABLE everywhere — never
|
|
157
|
+
// ask a model to judge pages that were never read.
|
|
158
|
+
for (const claim of config.claims) {
|
|
159
|
+
observations.push({
|
|
160
|
+
id: observationId(config.category, options.runLabel, vendorId, claim.id),
|
|
161
|
+
vendorId,
|
|
162
|
+
claimId: claim.id,
|
|
163
|
+
observedAt,
|
|
164
|
+
intensity: "unobservable",
|
|
165
|
+
confidence: "high",
|
|
166
|
+
reason: `No usable captures for ${vendor.name} in run ${captureRun} — cannot judge.`,
|
|
167
|
+
evidence: [],
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
continue;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const prompt = (feedback: string) =>
|
|
174
|
+
`${CLASSIFY_INSTRUCTIONS}\n\nSurface rule for this category:\n${config.surfaceRule ?? "(default rule above)"}\n\nClaims to classify (all of them):\n${claimsBlock(config.claims)}\n${feedback}\nVendor: ${vendor.name}\nCaptured pages:\n${dossier}`;
|
|
175
|
+
|
|
176
|
+
const attempt = async (feedback: string): Promise<{ readings: LlmReading[]; problems: string[]; failures: SpanVerificationFailure[] }> => {
|
|
177
|
+
const result = (await forcedToolCall(prompt(feedback), "classify_market_claims", classifySchema(claimIds), model, options.llm)) as {
|
|
178
|
+
readings?: LlmReading[];
|
|
179
|
+
};
|
|
180
|
+
const readings = (result.readings ?? []).filter((reading) => claimIds.includes(reading.claimId));
|
|
181
|
+
const seen = new Set(readings.map((reading) => reading.claimId));
|
|
182
|
+
const problems = claimIds.filter((claimId) => !seen.has(claimId)).map((claimId) => `missing reading for ${claimId}`);
|
|
183
|
+
const candidate = readings.map((reading) => toObservation(reading, vendorId));
|
|
184
|
+
const failures = verifyEvidenceSpans(candidate, textByHash);
|
|
185
|
+
return { readings, problems, failures };
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
const toObservation = (reading: LlmReading, vendor: string): MarketObservation => ({
|
|
189
|
+
id: observationId(config.category, options.runLabel, vendor, reading.claimId),
|
|
190
|
+
vendorId: vendor,
|
|
191
|
+
claimId: reading.claimId,
|
|
192
|
+
observedAt,
|
|
193
|
+
intensity: reading.intensity,
|
|
194
|
+
confidence: reading.confidence,
|
|
195
|
+
reason: reading.reason,
|
|
196
|
+
evidence: (reading.evidence ?? []).map((item, index) => ({
|
|
197
|
+
id: `${observationId(config.category, options.runLabel, vendor, reading.claimId)}_ev${index}`,
|
|
198
|
+
sourceSystem: "web" as const,
|
|
199
|
+
sourceObjectType: "page",
|
|
200
|
+
sourceObjectId: item.url,
|
|
201
|
+
text: item.quote,
|
|
202
|
+
observedAt,
|
|
203
|
+
metadata: { url: item.url, captureHash: hashByUrl.get(item.url) ?? "" },
|
|
204
|
+
})),
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
let outcome = await attempt("");
|
|
208
|
+
if (outcome.problems.length > 0 || outcome.failures.length > 0) {
|
|
209
|
+
retriedVendorIds.push(vendorId);
|
|
210
|
+
const failureLines = [
|
|
211
|
+
...outcome.problems,
|
|
212
|
+
...outcome.failures.map((failure) => `${failure.claimId}: ${failure.problem} (your quote: "${failure.quote.slice(0, 80)}")`),
|
|
213
|
+
].join("\n- ");
|
|
214
|
+
outcome = await attempt(
|
|
215
|
+
`\nYour previous answer had problems. Fix exactly these and answer again in full:\n- ${failureLines}\nQuotes must be copied character-for-character from the captured text.\n`,
|
|
216
|
+
);
|
|
217
|
+
}
|
|
218
|
+
if (outcome.problems.length > 0 || outcome.failures.length > 0) {
|
|
219
|
+
const detail = [...outcome.problems, ...outcome.failures.map((failure) => `${failure.claimId}: ${failure.problem}`)].slice(0, 10);
|
|
220
|
+
throw new Error(
|
|
221
|
+
`Classification for ${vendor.name} failed mechanical verification after a retry:\n ${detail.join("\n ")}\nNothing was stored. Re-run, try another --model, or classify this vendor by hand via the worksheet.`,
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
for (const reading of outcome.readings) observations.push(toObservation(reading, vendorId));
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
return {
|
|
228
|
+
set: {
|
|
229
|
+
id: `set_${config.category}_${options.runLabel}`,
|
|
230
|
+
category: config.category,
|
|
231
|
+
runLabel: options.runLabel,
|
|
232
|
+
runAt: observedAt,
|
|
233
|
+
extractor: `llm:${options.llm.provider}:${model}`,
|
|
234
|
+
observations,
|
|
235
|
+
},
|
|
236
|
+
model,
|
|
237
|
+
retriedVendorIds,
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* The agent-driven alternative to LLM classification: a worksheet carrying
|
|
243
|
+
* everything needed to classify one vendor by hand or by an agent driving
|
|
244
|
+
* the CLI/MCP — claims with judging definitions, the surface rule, and the
|
|
245
|
+
* captured page texts. Submissions come back through `market observe`,
|
|
246
|
+
* which runs the same validation and span verification as `classify`.
|
|
247
|
+
*/
|
|
248
|
+
export type MarketWorksheet = {
|
|
249
|
+
category: string;
|
|
250
|
+
captureRun: string;
|
|
251
|
+
surfaceRule?: string;
|
|
252
|
+
vendor: { id: string; name: string };
|
|
253
|
+
claims: MarketClaim[];
|
|
254
|
+
pages: Array<{ kind: CaptureEntry["kind"]; url: string; captureHash: string; text: string }>;
|
|
255
|
+
instructions: string;
|
|
256
|
+
};
|
|
257
|
+
|
|
258
|
+
export function buildWorksheet(
|
|
259
|
+
config: MarketConfig,
|
|
260
|
+
vendorId: string,
|
|
261
|
+
options: { captureRun?: string; capturesDir?: string } = {},
|
|
262
|
+
): MarketWorksheet {
|
|
263
|
+
const vendor = config.vendors.find((candidate) => candidate.id === vendorId);
|
|
264
|
+
if (!vendor) throw new Error(`Unknown vendor "${vendorId}"`);
|
|
265
|
+
const { entries, textByHash } = loadCaptureTexts(config.category, options.capturesDir);
|
|
266
|
+
const captureRun = options.captureRun ?? entries[entries.length - 1]?.runLabel;
|
|
267
|
+
if (!captureRun) throw new Error(`No captures for ${config.category} — run \`market capture\` first`);
|
|
268
|
+
const pages = entries
|
|
269
|
+
.filter((entry) => entry.runLabel === captureRun && entry.vendorId === vendorId && entry.captureHash)
|
|
270
|
+
.map((entry) => ({
|
|
271
|
+
kind: entry.kind,
|
|
272
|
+
url: entry.url,
|
|
273
|
+
captureHash: entry.captureHash as string,
|
|
274
|
+
text: textByHash.get(entry.captureHash as string) ?? "",
|
|
275
|
+
}));
|
|
276
|
+
return {
|
|
277
|
+
category: config.category,
|
|
278
|
+
captureRun,
|
|
279
|
+
surfaceRule: config.surfaceRule,
|
|
280
|
+
vendor: { id: vendor.id, name: vendor.name },
|
|
281
|
+
claims: config.claims,
|
|
282
|
+
pages,
|
|
283
|
+
instructions:
|
|
284
|
+
"Produce one observation per claim (intensity loud|quiet|absent from these pages only; unobservable only if a page you need failed to capture). Every loud/quiet reading must quote a verbatim span (≤300 chars) from a page's text, with that page's url and captureHash in evidence metadata. Submit as an ObservationSet via `market observe --from <file>` — quotes are mechanically verified against the captures.",
|
|
285
|
+
};
|
|
286
|
+
}
|