fullstackgtm 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +36 -0
- package/dist/cli.js +121 -12
- package/dist/index.d.ts +2 -1
- package/dist/index.js +2 -1
- package/dist/llm.d.ts +7 -0
- package/dist/llm.js +7 -1
- package/dist/market.d.ts +19 -0
- package/dist/market.js +76 -0
- package/dist/marketClassify.d.ts +49 -0
- package/dist/marketClassify.js +201 -0
- package/dist/mcp.js +45 -0
- package/package.json +1 -1
- package/src/cli.ts +129 -12
- package/src/index.ts +11 -0
- package/src/llm.ts +7 -1
- package/src/market.ts +92 -0
- package/src/marketClassify.ts +286 -0
- package/src/mcp.ts +65 -0
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,42 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
|
5
5
|
and the project adheres to [Semantic Versioning](https://semver.org/).
|
|
6
6
|
The path to 1.0 is planned in [docs/roadmap-to-1.0.md](./docs/roadmap-to-1.0.md).
|
|
7
7
|
|
|
8
|
+
## [0.17.0] — 2026-06-11
|
|
9
|
+
|
|
10
|
+
Market map classification: intensity readings become a one-command step, and
|
|
11
|
+
the verbatim-quote rule becomes a mechanical gate instead of a prompt
|
|
12
|
+
instruction.
|
|
13
|
+
|
|
14
|
+
### Added
|
|
15
|
+
|
|
16
|
+
- **`fullstackgtm market classify`** — LLM intensity readings for every
|
|
17
|
+
vendor × claim cell from the stored captures, through the same
|
|
18
|
+
bring-your-own-key constrained-tool-call seam as `call parse`
|
|
19
|
+
(provenance `extractor: "llm:<provider>:<model>"`). Vendors with no
|
|
20
|
+
usable captures score UNOBSERVABLE deterministically, without an LLM
|
|
21
|
+
call. `--vendor` classifies one vendor to `--out` for hand-merging;
|
|
22
|
+
`--model` overrides the provider default.
|
|
23
|
+
- **Mechanical span verification** — because market sources are *stored*
|
|
24
|
+
captures (unlike transcripts, which pass through), every quoted evidence
|
|
25
|
+
span is checked character-for-character (whitespace-normalized) against
|
|
26
|
+
the capture it cites. Readings that fail bounce back to the model once
|
|
27
|
+
with the failures named; persistent failures abort with nothing stored.
|
|
28
|
+
The same gate now guards `market observe` (escape hatch: `--unverified`,
|
|
29
|
+
for sets whose captures genuinely live elsewhere) and the MCP submission
|
|
30
|
+
path — every proposal channel passes the same gate.
|
|
31
|
+
- **`fullstackgtm market worksheet --vendor <id>`** — the no-key channel:
|
|
32
|
+
a self-contained packet (claims with judging definitions, surface rule,
|
|
33
|
+
captured page texts) for an agent or human to classify by hand and
|
|
34
|
+
submit via `observe`.
|
|
35
|
+
- **`fullstackgtm market refresh`** — capture → classify → front drift →
|
|
36
|
+
HTML field report, one command. The weekly refresh is now a single
|
|
37
|
+
invocation (schedule it however you schedule things).
|
|
38
|
+
- **MCP**: `fullstackgtm_market_worksheet` and `fullstackgtm_market_observe`
|
|
39
|
+
(validates + verifies + appends; returns the computed front states on
|
|
40
|
+
acceptance).
|
|
41
|
+
- `forcedToolCall` exported from `llm.ts` — the one seam every LLM feature
|
|
42
|
+
in the package goes through.
|
|
43
|
+
|
|
8
44
|
## [0.16.0] — 2026-06-11
|
|
9
45
|
|
|
10
46
|
The market map: a live model of the competitive category a company sells
|
package/dist/cli.js
CHANGED
|
@@ -18,7 +18,8 @@ import { auditReportToHtml, auditReportToMarkdown } from "./report.js";
|
|
|
18
18
|
import { builtinAuditRules } from "./rules.js";
|
|
19
19
|
import { sampleSnapshot } from "./sampleData.js";
|
|
20
20
|
import { normalizeTranscript, parseCall, suggestCallDeal } from "./calls.js";
|
|
21
|
-
import { captureMarket, computeFrontStates, createFileObservationStore, diffFrontStates, loadMarketConfig, starterMarketConfig, validateObservationSet, } from "./market.js";
|
|
21
|
+
import { captureMarket, computeFrontStates, createFileObservationStore, diffFrontStates, loadCaptureTexts, loadMarketConfig, starterMarketConfig, validateObservationSet, verifyEvidenceSpans, } from "./market.js";
|
|
22
|
+
import { buildWorksheet, classifyMarket } from "./marketClassify.js";
|
|
22
23
|
import { marketMapToHtml, marketMapToMarkdown } from "./marketReport.js";
|
|
23
24
|
import { DEFAULT_RUBRIC, detectProviderFromKey, extractInsightsLlm, parseRubric, resolveLlmCredential, scoreCallLlm, validateLlmKey, } from "./llm.js";
|
|
24
25
|
import { resolveRecord } from "./resolve.js";
|
|
@@ -59,13 +60,18 @@ Usage:
|
|
|
59
60
|
found (exists/ambiguous) — call before ANY record creation
|
|
60
61
|
fullstackgtm market init --category <name> start a market map: vendors + claim taxonomy as reviewable config
|
|
61
62
|
fullstackgtm market capture [--config <path>] [--run <label>]
|
|
62
|
-
fullstackgtm market
|
|
63
|
+
fullstackgtm market classify [--run <label>] [--vendor <id>] [--model m] [--out <path>]
|
|
64
|
+
fullstackgtm market worksheet --vendor <id> [--out <path>]
|
|
65
|
+
fullstackgtm market observe --from <observations.json> [--unverified]
|
|
63
66
|
fullstackgtm market fronts [--run <label>] [--diff <prior-run>] [--json]
|
|
64
67
|
fullstackgtm market report [--run <label>] [--format md|html] [--out <path>]
|
|
68
|
+
fullstackgtm market refresh [--run <label>] [--model m]
|
|
65
69
|
the live competitive map: capture vendor pages (content-addressed),
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
70
|
+
classify intensity per claim (LLM bring-your-own-key, or fill the
|
|
71
|
+
worksheet with any agent) — every quoted span is verified verbatim
|
|
72
|
+
against the stored capture it cites before it's accepted — then
|
|
73
|
+
compute deterministic front states and drift, render the field
|
|
74
|
+
report. refresh = capture → classify → drift → report in one step
|
|
69
75
|
fullstackgtm suggest --plan-id <id> | --plan <path> [source options] [--json] [--out <path>]
|
|
70
76
|
derive values for requires_human_* placeholders
|
|
71
77
|
from snapshot evidence, with confidence + reasons
|
|
@@ -617,9 +623,14 @@ async function requireLlmCredential(command = "parse") {
|
|
|
617
623
|
if (resolved)
|
|
618
624
|
return resolved;
|
|
619
625
|
// Scoring is inherently LLM work — there is no keyword fallback to suggest.
|
|
620
|
-
const fallbackHint = command === "parse"
|
|
626
|
+
const fallbackHint = command === "parse"
|
|
627
|
+
? ", or pass --deterministic for the free keyword baseline"
|
|
628
|
+
: command === "score"
|
|
629
|
+
? " (call score has no non-LLM mode)"
|
|
630
|
+
: ", or classify by hand: `market worksheet --vendor <id>` then `market observe --from`";
|
|
631
|
+
const work = command === "score" ? "scoring" : command === "parse" ? "extraction" : "classification";
|
|
621
632
|
if (!process.stdin.isTTY) {
|
|
622
|
-
throw new Error(`LLM ${
|
|
633
|
+
throw new Error(`LLM ${work} needs an API key. Set ANTHROPIC_API_KEY or OPENAI_API_KEY, or run \`echo "$KEY" | fullstackgtm login anthropic\` (or \`login openai\`) once${fallbackHint}.`);
|
|
623
634
|
}
|
|
624
635
|
console.error("LLM parsing needs an API key (Anthropic or OpenAI) — yours, used directly with the provider.");
|
|
625
636
|
console.error(`Paste it once; it is validated and stored at ${credentialsPath()} (file mode 0600), like CRM logins.`);
|
|
@@ -729,9 +740,11 @@ function buildCallPlan(parsed, deal, proposed, current, extraNextSteps) {
|
|
|
729
740
|
/**
|
|
730
741
|
* The market map: claim taxonomy in a reviewable config file, page captures
|
|
731
742
|
* and append-only observations under the profile home, deterministic front
|
|
732
|
-
* states and reports computed from the store.
|
|
733
|
-
*
|
|
734
|
-
*
|
|
743
|
+
* states and reports computed from the store. Intensity readings enter as
|
|
744
|
+
* proposals through two channels — `classify` (LLM, bring-your-own-key, the
|
|
745
|
+
* call-intelligence pattern) and `worksheet`/`observe` (an agent or human
|
|
746
|
+
* fills the worksheet) — and BOTH pass the same mechanical gate: every quoted
|
|
747
|
+
* span is verified verbatim against the stored capture it cites.
|
|
735
748
|
*/
|
|
736
749
|
async function marketCommand(args) {
|
|
737
750
|
const [subcommand, ...rest] = args;
|
|
@@ -740,9 +753,18 @@ async function marketCommand(args) {
|
|
|
740
753
|
console.log(`Usage:
|
|
741
754
|
market init --category <name> [--out <path>] write a starter market.config.json
|
|
742
755
|
market capture [--config <path>] [--run <label>]
|
|
743
|
-
market
|
|
756
|
+
market classify [--run <label>] [--capture-run <label>] [--vendor <id>] [--model m] [--out <path>]
|
|
757
|
+
market worksheet --vendor <id> [--capture-run <label>] [--out <path>]
|
|
758
|
+
market observe --from <observations.json> [--unverified]
|
|
744
759
|
market fronts [--config <path>] [--run <label>] [--diff <prior-run>] [--json]
|
|
745
760
|
market report [--config <path>] [--run <label>] [--format md|html] [--out <path>]
|
|
761
|
+
market refresh [--run <label>] [--model m] capture → classify → fronts drift → HTML report
|
|
762
|
+
|
|
763
|
+
classify uses your Anthropic/OpenAI key (like call parse) to read the stored
|
|
764
|
+
captures and propose intensity readings; worksheet is the no-key path (an
|
|
765
|
+
agent or human fills it, submits via observe). Either way, every quoted span
|
|
766
|
+
is verified character-for-character against the capture it cites before the
|
|
767
|
+
observation is accepted — quotes that aren't on the page bounce.
|
|
746
768
|
|
|
747
769
|
The taxonomy (vendors + claims) is config you review and version; captures
|
|
748
770
|
and observations live under ~/.fullstackgtm/market/<category> (profile-scoped,
|
|
@@ -785,10 +807,97 @@ recomputed deterministically on every invocation — never stored.`);
|
|
|
785
807
|
process.exitCode = 1;
|
|
786
808
|
return;
|
|
787
809
|
}
|
|
810
|
+
if (!rest.includes("--unverified")) {
|
|
811
|
+
const { textByHash } = loadCaptureTexts(config.category);
|
|
812
|
+
const failures = verifyEvidenceSpans(set.observations, textByHash);
|
|
813
|
+
if (failures.length > 0) {
|
|
814
|
+
console.error(`Rejected: ${failures.length} evidence span(s) failed verification against the stored captures`);
|
|
815
|
+
for (const failure of failures.slice(0, 20)) {
|
|
816
|
+
console.error(` - ${failure.vendorId} × ${failure.claimId}: ${failure.problem}`);
|
|
817
|
+
}
|
|
818
|
+
console.error("Quotes must be copied verbatim from the captured pages. (--unverified skips this gate when the captures genuinely live elsewhere.)");
|
|
819
|
+
process.exitCode = 1;
|
|
820
|
+
return;
|
|
821
|
+
}
|
|
822
|
+
}
|
|
788
823
|
await store.append(set);
|
|
789
824
|
console.log(`Appended ${set.runLabel}: ${set.observations.length} observations (${set.extractor})`);
|
|
790
825
|
return;
|
|
791
826
|
}
|
|
827
|
+
if (subcommand === "worksheet") {
|
|
828
|
+
const vendorId = option(rest, "--vendor");
|
|
829
|
+
if (!vendorId)
|
|
830
|
+
throw new Error("market worksheet requires --vendor <id>");
|
|
831
|
+
const worksheet = buildWorksheet(config, vendorId, { captureRun: option(rest, "--capture-run") ?? undefined });
|
|
832
|
+
const outPath = option(rest, "--out");
|
|
833
|
+
const payload = `${JSON.stringify(worksheet, null, 2)}\n`;
|
|
834
|
+
if (outPath) {
|
|
835
|
+
writeFileSync(resolve(process.cwd(), outPath), payload);
|
|
836
|
+
console.log(`Wrote ${outPath} (${worksheet.pages.length} captured pages, ${worksheet.claims.length} claims)`);
|
|
837
|
+
}
|
|
838
|
+
else {
|
|
839
|
+
console.log(payload);
|
|
840
|
+
}
|
|
841
|
+
return;
|
|
842
|
+
}
|
|
843
|
+
if (subcommand === "classify") {
|
|
844
|
+
const credential = await requireLlmCredential("market classify");
|
|
845
|
+
const vendorFilter = option(rest, "--vendor");
|
|
846
|
+
const outPath = option(rest, "--out");
|
|
847
|
+
if (vendorFilter && !outPath) {
|
|
848
|
+
throw new Error("market classify --vendor produces a partial set (coverage validation would reject it) — pass --out <path> to inspect/merge it by hand");
|
|
849
|
+
}
|
|
850
|
+
const result = await classifyMarket(config, {
|
|
851
|
+
llm: { ...credential, model: option(rest, "--model") ?? undefined },
|
|
852
|
+
runLabel: option(rest, "--run") ?? option(rest, "--capture-run") ?? "run-1",
|
|
853
|
+
captureRun: option(rest, "--capture-run") ?? undefined,
|
|
854
|
+
vendors: vendorFilter ? [vendorFilter] : undefined,
|
|
855
|
+
});
|
|
856
|
+
if (result.retriedVendorIds.length > 0) {
|
|
857
|
+
console.error(`Span verification bounced ${result.retriedVendorIds.join(", ")} once; retry passed.`);
|
|
858
|
+
}
|
|
859
|
+
if (outPath) {
|
|
860
|
+
writeFileSync(resolve(process.cwd(), outPath), `${JSON.stringify(result.set, null, 2)}\n`);
|
|
861
|
+
console.log(`Wrote ${outPath}: ${result.set.observations.length} verified observations (${result.set.extractor})`);
|
|
862
|
+
return;
|
|
863
|
+
}
|
|
864
|
+
const problems = validateObservationSet(config, result.set);
|
|
865
|
+
if (problems.length > 0) {
|
|
866
|
+
throw new Error(`Classified set failed validation: ${problems.slice(0, 5).join("; ")}`);
|
|
867
|
+
}
|
|
868
|
+
await store.append(result.set);
|
|
869
|
+
console.log(`Appended ${result.set.runLabel}: ${result.set.observations.length} observations, every span verified (${result.set.extractor})`);
|
|
870
|
+
return;
|
|
871
|
+
}
|
|
872
|
+
if (subcommand === "refresh") {
|
|
873
|
+
const credential = await requireLlmCredential("market classify");
|
|
874
|
+
const runLabel = option(rest, "--run") ?? `run-${new Date().toISOString().slice(0, 10)}`;
|
|
875
|
+
const prior = await store.latest();
|
|
876
|
+
console.log(`Capturing ${config.vendors.length} vendors as ${runLabel}…`);
|
|
877
|
+
const captured = await captureMarket(config, { runLabel });
|
|
878
|
+
const failed = captured.entries.filter((entry) => !entry.captureHash);
|
|
879
|
+
if (failed.length > 0)
|
|
880
|
+
console.log(`${failed.length} page(s) failed/empty — affected cells will verify against remaining pages or read unobservable.`);
|
|
881
|
+
console.log(`Classifying with ${credential.provider}…`);
|
|
882
|
+
const result = await classifyMarket(config, {
|
|
883
|
+
llm: { ...credential, model: option(rest, "--model") ?? undefined },
|
|
884
|
+
runLabel,
|
|
885
|
+
captureRun: runLabel,
|
|
886
|
+
});
|
|
887
|
+
await store.append(result.set);
|
|
888
|
+
const fronts = computeFrontStates(config, result.set);
|
|
889
|
+
if (prior) {
|
|
890
|
+
const drift = diffFrontStates(computeFrontStates(config, prior), fronts);
|
|
891
|
+
if (drift.length === 0)
|
|
892
|
+
console.log(`No front changes since ${prior.runLabel}.`);
|
|
893
|
+
for (const change of drift)
|
|
894
|
+
console.log(`CHANGED ${change.claimId}: ${change.before} → ${change.after}`);
|
|
895
|
+
}
|
|
896
|
+
const outPath = option(rest, "--out") ?? `${config.category}-${runLabel}.html`;
|
|
897
|
+
writeFileSync(resolve(process.cwd(), outPath), marketMapToHtml(config, result.set));
|
|
898
|
+
console.log(`Wrote ${outPath}`);
|
|
899
|
+
return;
|
|
900
|
+
}
|
|
792
901
|
const loadSet = async () => {
|
|
793
902
|
const runLabel = option(rest, "--run");
|
|
794
903
|
const set = runLabel ? await store.get(runLabel) : await store.latest();
|
|
@@ -838,7 +947,7 @@ recomputed deterministically on every invocation — never stored.`);
|
|
|
838
947
|
}
|
|
839
948
|
return;
|
|
840
949
|
}
|
|
841
|
-
throw new Error(`Unknown market subcommand: ${subcommand} (try: init, capture, observe, fronts, report)`);
|
|
950
|
+
throw new Error(`Unknown market subcommand: ${subcommand} (try: init, capture, classify, worksheet, observe, fronts, report, refresh)`);
|
|
842
951
|
}
|
|
843
952
|
/**
|
|
844
953
|
* The resolve gate: exit 0 = safe to create, exit 2 = match found (exists or
|
package/dist/index.d.ts
CHANGED
|
@@ -19,7 +19,8 @@ export { extractCallInsights, normalizeTranscript, parseCall, parseTranscript, s
|
|
|
19
19
|
export { sampleSnapshot } from "./sampleData.ts";
|
|
20
20
|
export { DEFAULT_MODELS, DEFAULT_RUBRIC, detectProviderFromKey, extractInsightsLlm, parseRubric, resolveLlmCredential, scoreCallLlm, validateLlmKey, type CallScorecard, type LlmCredential, type LlmExtractedInsight, type LlmProvider, type Rubric, type ScoredDimension, } from "./llm.ts";
|
|
21
21
|
export { resolveRecord, type ResolveCandidate, type ResolveMatch, type ResolveResult } from "./resolve.ts";
|
|
22
|
-
export { captureMarket, computeFrontStates, createFileObservationStore, diffFrontStates, extractReadableText, loadMarketConfig, marketHome, observationId, parseMarketConfig, starterMarketConfig, validateObservationSet, type CaptureEntry, type CaptureOptions, type ClaimFront, type ClaimIntensity, type FrontDrift, type FrontState, type MarketClaim, type MarketConfig, type MarketObservation, type MarketVendor, type ObservationConfidence, type ObservationSet, type ObservationStore, } from "./market.ts";
|
|
22
|
+
export { captureMarket, computeFrontStates, createFileObservationStore, diffFrontStates, extractReadableText, loadCaptureTexts, loadMarketConfig, marketHome, normalizeForMatch, observationId, parseMarketConfig, starterMarketConfig, validateObservationSet, verifyEvidenceSpans, type CaptureEntry, type CaptureOptions, type ClaimFront, type ClaimIntensity, type FrontDrift, type FrontState, type MarketClaim, type MarketConfig, type MarketObservation, type MarketVendor, type ObservationConfidence, type ObservationSet, type ObservationStore, type SpanVerificationFailure, } from "./market.ts";
|
|
23
|
+
export { buildWorksheet, classifyMarket, type ClassifyMarketOptions, type ClassifyMarketResult, type MarketWorksheet, } from "./marketClassify.ts";
|
|
23
24
|
export { marketMapToHtml, marketMapToMarkdown } from "./marketReport.ts";
|
|
24
25
|
export { suggestValues, type SuggestionConfidence, type ValueSuggestion } from "./suggest.ts";
|
|
25
26
|
export type { ApprovalStatus, AuditFinding, AuditFindingSeverity, CanonicalAccount, CanonicalActivity, CanonicalContact, CanonicalDeal, CanonicalGtmSnapshot, CanonicalUser, CrmProvider, GtmAuditRule, GtmConnector, GtmEvidence, GtmEvidenceSourceSystem, GtmObjectType, GtmPolicy, GtmRuleContext, GtmRuleResult, GtmSnapshotIndex, PatchOperation, PatchOperationResult, PatchOperationType, PatchPlan, PatchPlanRun, PatchPlanRunStatus, PatchVerification, PipelineFinding, PipelineFindingStatus, PipelineFindingType, ProviderIdentity, RiskLevel, SourceFreshness, } from "./types.ts";
|
package/dist/index.js
CHANGED
|
@@ -19,6 +19,7 @@ export { extractCallInsights, normalizeTranscript, parseCall, parseTranscript, s
|
|
|
19
19
|
export { sampleSnapshot } from "./sampleData.js";
|
|
20
20
|
export { DEFAULT_MODELS, DEFAULT_RUBRIC, detectProviderFromKey, extractInsightsLlm, parseRubric, resolveLlmCredential, scoreCallLlm, validateLlmKey, } from "./llm.js";
|
|
21
21
|
export { resolveRecord } from "./resolve.js";
|
|
22
|
-
export { captureMarket, computeFrontStates, createFileObservationStore, diffFrontStates, extractReadableText, loadMarketConfig, marketHome, observationId, parseMarketConfig, starterMarketConfig, validateObservationSet, } from "./market.js";
|
|
22
|
+
export { captureMarket, computeFrontStates, createFileObservationStore, diffFrontStates, extractReadableText, loadCaptureTexts, loadMarketConfig, marketHome, normalizeForMatch, observationId, parseMarketConfig, starterMarketConfig, validateObservationSet, verifyEvidenceSpans, } from "./market.js";
|
|
23
|
+
export { buildWorksheet, classifyMarket, } from "./marketClassify.js";
|
|
23
24
|
export { marketMapToHtml, marketMapToMarkdown } from "./marketReport.js";
|
|
24
25
|
export { suggestValues } from "./suggest.js";
|
package/dist/llm.d.ts
CHANGED
|
@@ -64,6 +64,13 @@ export declare function scoreCallLlm(transcript: string, rubric: Rubric, options
|
|
|
64
64
|
title?: string;
|
|
65
65
|
}): Promise<CallScorecard>;
|
|
66
66
|
export declare function parseRubric(json: string): Rubric;
|
|
67
|
+
/**
|
|
68
|
+
* Shared constrained-tool-call plumbing: force the model to answer through a
|
|
69
|
+
* single tool whose input_schema is the output contract. Exported for other
|
|
70
|
+
* semi-deterministic features (market classification) — every LLM feature in
|
|
71
|
+
* the package goes through this one seam.
|
|
72
|
+
*/
|
|
73
|
+
export declare function forcedToolCall(prompt: string, toolName: string, schema: object, model: string, options: LlmCallOptions): Promise<unknown>;
|
|
67
74
|
/** Cheap key validation against the provider's model-list endpoint. Status line only. */
|
|
68
75
|
export declare function validateLlmKey(provider: LlmProvider, apiKey: string, fetchImpl?: typeof fetch): Promise<{
|
|
69
76
|
ok: boolean;
|
package/dist/llm.js
CHANGED
|
@@ -158,7 +158,13 @@ export function parseRubric(json) {
|
|
|
158
158
|
};
|
|
159
159
|
}
|
|
160
160
|
// ── Provider plumbing (raw fetch, forced tool calls) ───────────────────────
|
|
161
|
-
|
|
161
|
+
/**
|
|
162
|
+
* Shared constrained-tool-call plumbing: force the model to answer through a
|
|
163
|
+
* single tool whose input_schema is the output contract. Exported for other
|
|
164
|
+
* semi-deterministic features (market classification) — every LLM feature in
|
|
165
|
+
* the package goes through this one seam.
|
|
166
|
+
*/
|
|
167
|
+
export async function forcedToolCall(prompt, toolName, schema, model, options) {
|
|
162
168
|
const fetchImpl = options.fetchImpl ?? fetch;
|
|
163
169
|
if (options.provider === "anthropic") {
|
|
164
170
|
const response = await llmFetch(fetchImpl, ANTHROPIC_URL, {
|
package/dist/market.d.ts
CHANGED
|
@@ -126,6 +126,25 @@ export declare function createFileObservationStore(category: string, directory?:
|
|
|
126
126
|
* Returns problems; an empty array means accept.
|
|
127
127
|
*/
|
|
128
128
|
export declare function validateObservationSet(config: MarketConfig, set: ObservationSet): string[];
|
|
129
|
+
export declare function loadCaptureTexts(category: string, directory?: string): {
|
|
130
|
+
entries: CaptureEntry[];
|
|
131
|
+
textByHash: Map<string, string>;
|
|
132
|
+
};
|
|
133
|
+
/**
|
|
134
|
+
* Whitespace-only normalization for span matching, plus one extraction
|
|
135
|
+
* artifact: the HTML-to-text step can emit a line break before punctuation
|
|
136
|
+
* that follows an inline tag ("placements\n. Districts"), which no honest
|
|
137
|
+
* quoter would reproduce — so whitespace *before* punctuation is dropped
|
|
138
|
+
* too. Words, casing, and characters must still match the page exactly.
|
|
139
|
+
*/
|
|
140
|
+
export declare function normalizeForMatch(value: string): string;
|
|
141
|
+
export type SpanVerificationFailure = {
|
|
142
|
+
vendorId: string;
|
|
143
|
+
claimId: string;
|
|
144
|
+
quote: string;
|
|
145
|
+
problem: string;
|
|
146
|
+
};
|
|
147
|
+
export declare function verifyEvidenceSpans(observations: MarketObservation[], textByHash: Map<string, string>): SpanVerificationFailure[];
|
|
129
148
|
export type ClaimFront = {
|
|
130
149
|
claimId: string;
|
|
131
150
|
state: FrontState;
|
package/dist/market.js
CHANGED
|
@@ -270,6 +270,82 @@ export function validateObservationSet(config, set) {
|
|
|
270
270
|
}
|
|
271
271
|
return problems;
|
|
272
272
|
}
|
|
273
|
+
// ---------------------------------------------------------------------------
|
|
274
|
+
// Evidence span verification — the deterministic gate that makes the
|
|
275
|
+
// verbatim-quote rule mechanical instead of a prompt instruction. Because the
|
|
276
|
+
// source documents are *stored* (unlike call transcripts, which pass through),
|
|
277
|
+
// every quoted span can be checked against the capture it cites before the
|
|
278
|
+
// observation is accepted. Comparison is whitespace-normalized only: case and
|
|
279
|
+
// wording must match the page exactly.
|
|
280
|
+
export function loadCaptureTexts(category, directory) {
|
|
281
|
+
const dir = directory ?? join(marketHome(category), "captures");
|
|
282
|
+
const manifestPath = join(dir, "manifest.json");
|
|
283
|
+
const entries = existsSync(manifestPath)
|
|
284
|
+
? JSON.parse(readFileSync(manifestPath, "utf8"))
|
|
285
|
+
: [];
|
|
286
|
+
const textByHash = new Map();
|
|
287
|
+
for (const entry of entries) {
|
|
288
|
+
if (entry.captureHash && !textByHash.has(entry.captureHash)) {
|
|
289
|
+
try {
|
|
290
|
+
textByHash.set(entry.captureHash, readFileSync(join(dir, `${entry.captureHash}.txt`), "utf8"));
|
|
291
|
+
}
|
|
292
|
+
catch {
|
|
293
|
+
// Missing capture file: verification of anything citing it will fail loudly.
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
return { entries, textByHash };
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Whitespace-only normalization for span matching, plus one extraction
|
|
301
|
+
* artifact: the HTML-to-text step can emit a line break before punctuation
|
|
302
|
+
* that follows an inline tag ("placements\n. Districts"), which no honest
|
|
303
|
+
* quoter would reproduce — so whitespace *before* punctuation is dropped
|
|
304
|
+
* too. Words, casing, and characters must still match the page exactly.
|
|
305
|
+
*/
|
|
306
|
+
export function normalizeForMatch(value) {
|
|
307
|
+
return value
|
|
308
|
+
.replace(/\s+([.,;:!?])/g, "$1")
|
|
309
|
+
.replace(/\s+/g, " ")
|
|
310
|
+
.trim();
|
|
311
|
+
}
|
|
312
|
+
export function verifyEvidenceSpans(observations, textByHash) {
|
|
313
|
+
const failures = [];
|
|
314
|
+
for (const obs of observations) {
|
|
315
|
+
for (const evidence of obs.evidence) {
|
|
316
|
+
const quote = evidence.text ?? "";
|
|
317
|
+
const hash = String(evidence.metadata?.captureHash ?? "");
|
|
318
|
+
if (!hash) {
|
|
319
|
+
failures.push({
|
|
320
|
+
vendorId: obs.vendorId,
|
|
321
|
+
claimId: obs.claimId,
|
|
322
|
+
quote,
|
|
323
|
+
problem: "evidence has no captureHash — spans must cite a stored capture",
|
|
324
|
+
});
|
|
325
|
+
continue;
|
|
326
|
+
}
|
|
327
|
+
const captureText = textByHash.get(hash);
|
|
328
|
+
if (captureText === undefined) {
|
|
329
|
+
failures.push({
|
|
330
|
+
vendorId: obs.vendorId,
|
|
331
|
+
claimId: obs.claimId,
|
|
332
|
+
quote,
|
|
333
|
+
problem: `capture ${hash.slice(0, 12)} not found — evidence must stay resolvable`,
|
|
334
|
+
});
|
|
335
|
+
continue;
|
|
336
|
+
}
|
|
337
|
+
if (!normalizeForMatch(captureText).includes(normalizeForMatch(quote))) {
|
|
338
|
+
failures.push({
|
|
339
|
+
vendorId: obs.vendorId,
|
|
340
|
+
claimId: obs.claimId,
|
|
341
|
+
quote,
|
|
342
|
+
problem: `quote not found verbatim in capture ${hash.slice(0, 12)}`,
|
|
343
|
+
});
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
return failures;
|
|
348
|
+
}
|
|
273
349
|
/**
|
|
274
350
|
* Front rule v1: 0 loud → open (if anyone is quiet) or vacant; 1 loud →
|
|
275
351
|
* owned; 2–3 loud → contested; ≥4 loud → saturated. Unobservable cells are
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { type LlmCallOptions } from "./llm.ts";
|
|
2
|
+
import { type CaptureEntry, type MarketClaim, type MarketConfig, type ObservationSet } from "./market.ts";
|
|
3
|
+
export type ClassifyMarketOptions = {
|
|
4
|
+
llm: LlmCallOptions;
|
|
5
|
+
/** Observation run label to produce; must be new (the store is append-only). */
|
|
6
|
+
runLabel: string;
|
|
7
|
+
/** Capture run to classify; defaults to the most recent run in the manifest. */
|
|
8
|
+
captureRun?: string;
|
|
9
|
+
/** Restrict to these vendor ids (e.g. one new vendor); defaults to all. */
|
|
10
|
+
vendors?: string[];
|
|
11
|
+
/** Captures directory override (tests); defaults to the profile market home. */
|
|
12
|
+
capturesDir?: string;
|
|
13
|
+
now?: () => Date;
|
|
14
|
+
};
|
|
15
|
+
export type ClassifyMarketResult = {
|
|
16
|
+
set: ObservationSet;
|
|
17
|
+
model: string;
|
|
18
|
+
/** Cells where the model's quote failed mechanical verification and the retry fixed it. */
|
|
19
|
+
retriedVendorIds: string[];
|
|
20
|
+
};
|
|
21
|
+
export declare function classifyMarket(config: MarketConfig, options: ClassifyMarketOptions): Promise<ClassifyMarketResult>;
|
|
22
|
+
/**
|
|
23
|
+
* The agent-driven alternative to LLM classification: a worksheet carrying
|
|
24
|
+
* everything needed to classify one vendor by hand or by an agent driving
|
|
25
|
+
* the CLI/MCP — claims with judging definitions, the surface rule, and the
|
|
26
|
+
* captured page texts. Submissions come back through `market observe`,
|
|
27
|
+
* which runs the same validation and span verification as `classify`.
|
|
28
|
+
*/
|
|
29
|
+
export type MarketWorksheet = {
|
|
30
|
+
category: string;
|
|
31
|
+
captureRun: string;
|
|
32
|
+
surfaceRule?: string;
|
|
33
|
+
vendor: {
|
|
34
|
+
id: string;
|
|
35
|
+
name: string;
|
|
36
|
+
};
|
|
37
|
+
claims: MarketClaim[];
|
|
38
|
+
pages: Array<{
|
|
39
|
+
kind: CaptureEntry["kind"];
|
|
40
|
+
url: string;
|
|
41
|
+
captureHash: string;
|
|
42
|
+
text: string;
|
|
43
|
+
}>;
|
|
44
|
+
instructions: string;
|
|
45
|
+
};
|
|
46
|
+
export declare function buildWorksheet(config: MarketConfig, vendorId: string, options?: {
|
|
47
|
+
captureRun?: string;
|
|
48
|
+
capturesDir?: string;
|
|
49
|
+
}): MarketWorksheet;
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import { DEFAULT_MODELS, forcedToolCall } from "./llm.js";
|
|
2
|
+
import { loadCaptureTexts, observationId, verifyEvidenceSpans, } from "./market.js";
|
|
3
|
+
/**
|
|
4
|
+
* LLM intensity classification for the market map — the same
|
|
5
|
+
* semi-deterministic posture as call extraction, with one upgrade calls
|
|
6
|
+
* can't have: because the source pages are stored captures, every quoted
|
|
7
|
+
* span is verified mechanically against the capture it cites before the
|
|
8
|
+
* observation is accepted. A reading whose quote isn't verbatim on the page
|
|
9
|
+
* bounces back to the model once with the failures named; if it still can't
|
|
10
|
+
* quote the page, classification fails rather than storing unverifiable
|
|
11
|
+
* evidence.
|
|
12
|
+
*
|
|
13
|
+
* Deterministic parts stay deterministic: vendors with no usable captures
|
|
14
|
+
* score UNOBSERVABLE on every claim without an LLM call, and front states
|
|
15
|
+
* downstream are computed from the store, never from model output.
|
|
16
|
+
*/
|
|
17
|
+
// Bound cost and context: a vendor's pages are classified in one call.
|
|
18
|
+
const MAX_DOSSIER_CHARS = 48_000;
|
|
19
|
+
const CLASSIFY_INSTRUCTIONS = `Classify this vendor's messaging intensity for EVERY claim listed.
|
|
20
|
+
Rules:
|
|
21
|
+
- Judge ONLY from the captured pages below. Do not use outside knowledge of the vendor.
|
|
22
|
+
- intensity per the surface rule: "loud" = hero copy or a top-level-nav named product/program with a dedicated page; "quiet" = present on any page below that; "absent" = nowhere in the captures.
|
|
23
|
+
- evidence quotes MUST be verbatim spans copied exactly from the captured text (≤300 chars). Every loud or quiet reading needs at least one quote. If you cannot quote it, the reading is absent.
|
|
24
|
+
- An explicit disavowal ("we do not offer X", "call 988") is absent — put the disavowal quote in reason, it is informative signal.
|
|
25
|
+
- url must be the page the quote came from, exactly as given in the page headers below.
|
|
26
|
+
- reason: one reviewer-facing sentence.
|
|
27
|
+
- Return a reading for every claim id. Never invent claim ids.`;
|
|
28
|
+
const classifySchema = (claimIds) => ({
|
|
29
|
+
type: "object",
|
|
30
|
+
required: ["readings"],
|
|
31
|
+
properties: {
|
|
32
|
+
readings: {
|
|
33
|
+
type: "array",
|
|
34
|
+
items: {
|
|
35
|
+
type: "object",
|
|
36
|
+
required: ["claimId", "intensity", "confidence", "reason", "evidence"],
|
|
37
|
+
properties: {
|
|
38
|
+
claimId: { type: "string", enum: claimIds },
|
|
39
|
+
intensity: { type: "string", enum: ["loud", "quiet", "absent"] },
|
|
40
|
+
confidence: { type: "string", enum: ["high", "medium", "low"] },
|
|
41
|
+
reason: { type: "string", description: "One reviewer-facing sentence." },
|
|
42
|
+
evidence: {
|
|
43
|
+
type: "array",
|
|
44
|
+
items: {
|
|
45
|
+
type: "object",
|
|
46
|
+
required: ["quote", "url"],
|
|
47
|
+
properties: {
|
|
48
|
+
quote: { type: "string", description: "VERBATIM span copied exactly from the captured page text. Never paraphrase." },
|
|
49
|
+
url: { type: "string", description: "The page URL the quote came from, exactly as shown in the page header." },
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
},
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
},
|
|
57
|
+
});
|
|
58
|
+
function buildDossier(entries, textByHash) {
|
|
59
|
+
const pages = entries
|
|
60
|
+
.filter((entry) => entry.captureHash && textByHash.has(entry.captureHash))
|
|
61
|
+
.map((entry) => ({ entry, text: textByHash.get(entry.captureHash) }));
|
|
62
|
+
if (pages.length === 0)
|
|
63
|
+
return "";
|
|
64
|
+
const budget = Math.floor(MAX_DOSSIER_CHARS / pages.length);
|
|
65
|
+
return pages
|
|
66
|
+
.map(({ entry, text }) => {
|
|
67
|
+
const body = text.length <= budget
|
|
68
|
+
? text
|
|
69
|
+
: `${text.slice(0, budget / 2)}\n[... middle of page truncated ...]\n${text.slice(-budget / 2)}`;
|
|
70
|
+
return `=== PAGE (${entry.kind}) ${entry.url} ===\n${body}`;
|
|
71
|
+
})
|
|
72
|
+
.join("\n\n");
|
|
73
|
+
}
|
|
74
|
+
function claimsBlock(claims) {
|
|
75
|
+
return claims
|
|
76
|
+
.map((claim) => `- ${claim.id}: ${claim.capability}\n How to judge: ${claim.definition}`)
|
|
77
|
+
.join("\n");
|
|
78
|
+
}
|
|
79
|
+
export async function classifyMarket(config, options) {
|
|
80
|
+
const model = options.llm.model ?? DEFAULT_MODELS[options.llm.provider];
|
|
81
|
+
const { entries, textByHash } = loadCaptureTexts(config.category, options.capturesDir);
|
|
82
|
+
if (entries.length === 0) {
|
|
83
|
+
throw new Error(`No captures for ${config.category} — run \`market capture\` first`);
|
|
84
|
+
}
|
|
85
|
+
const captureRun = options.captureRun ?? entries[entries.length - 1].runLabel;
|
|
86
|
+
const runEntries = entries.filter((entry) => entry.runLabel === captureRun);
|
|
87
|
+
if (runEntries.length === 0) {
|
|
88
|
+
throw new Error(`No captures for run "${captureRun}" — available: ${[...new Set(entries.map((e) => e.runLabel))].join(", ")}`);
|
|
89
|
+
}
|
|
90
|
+
const observedAt = (options.now ?? (() => new Date()))().toISOString();
|
|
91
|
+
const vendorIds = options.vendors ?? config.vendors.map((vendor) => vendor.id);
|
|
92
|
+
const claimIds = config.claims.map((claim) => claim.id);
|
|
93
|
+
const observations = [];
|
|
94
|
+
const retriedVendorIds = [];
|
|
95
|
+
for (const vendorId of vendorIds) {
|
|
96
|
+
const vendor = config.vendors.find((candidate) => candidate.id === vendorId);
|
|
97
|
+
if (!vendor)
|
|
98
|
+
throw new Error(`Unknown vendor "${vendorId}"`);
|
|
99
|
+
const vendorEntries = runEntries.filter((entry) => entry.vendorId === vendorId);
|
|
100
|
+
const hashByUrl = new Map(vendorEntries.filter((entry) => entry.captureHash).map((entry) => [entry.url, entry.captureHash]));
|
|
101
|
+
const dossier = buildDossier(vendorEntries, textByHash);
|
|
102
|
+
if (!dossier) {
|
|
103
|
+
// Deterministic: no usable captures means UNOBSERVABLE everywhere — never
|
|
104
|
+
// ask a model to judge pages that were never read.
|
|
105
|
+
for (const claim of config.claims) {
|
|
106
|
+
observations.push({
|
|
107
|
+
id: observationId(config.category, options.runLabel, vendorId, claim.id),
|
|
108
|
+
vendorId,
|
|
109
|
+
claimId: claim.id,
|
|
110
|
+
observedAt,
|
|
111
|
+
intensity: "unobservable",
|
|
112
|
+
confidence: "high",
|
|
113
|
+
reason: `No usable captures for ${vendor.name} in run ${captureRun} — cannot judge.`,
|
|
114
|
+
evidence: [],
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
const prompt = (feedback) => `${CLASSIFY_INSTRUCTIONS}\n\nSurface rule for this category:\n${config.surfaceRule ?? "(default rule above)"}\n\nClaims to classify (all of them):\n${claimsBlock(config.claims)}\n${feedback}\nVendor: ${vendor.name}\nCaptured pages:\n${dossier}`;
|
|
120
|
+
const attempt = async (feedback) => {
|
|
121
|
+
const result = (await forcedToolCall(prompt(feedback), "classify_market_claims", classifySchema(claimIds), model, options.llm));
|
|
122
|
+
const readings = (result.readings ?? []).filter((reading) => claimIds.includes(reading.claimId));
|
|
123
|
+
const seen = new Set(readings.map((reading) => reading.claimId));
|
|
124
|
+
const problems = claimIds.filter((claimId) => !seen.has(claimId)).map((claimId) => `missing reading for ${claimId}`);
|
|
125
|
+
const candidate = readings.map((reading) => toObservation(reading, vendorId));
|
|
126
|
+
const failures = verifyEvidenceSpans(candidate, textByHash);
|
|
127
|
+
return { readings, problems, failures };
|
|
128
|
+
};
|
|
129
|
+
const toObservation = (reading, vendor) => ({
|
|
130
|
+
id: observationId(config.category, options.runLabel, vendor, reading.claimId),
|
|
131
|
+
vendorId: vendor,
|
|
132
|
+
claimId: reading.claimId,
|
|
133
|
+
observedAt,
|
|
134
|
+
intensity: reading.intensity,
|
|
135
|
+
confidence: reading.confidence,
|
|
136
|
+
reason: reading.reason,
|
|
137
|
+
evidence: (reading.evidence ?? []).map((item, index) => ({
|
|
138
|
+
id: `${observationId(config.category, options.runLabel, vendor, reading.claimId)}_ev${index}`,
|
|
139
|
+
sourceSystem: "web",
|
|
140
|
+
sourceObjectType: "page",
|
|
141
|
+
sourceObjectId: item.url,
|
|
142
|
+
text: item.quote,
|
|
143
|
+
observedAt,
|
|
144
|
+
metadata: { url: item.url, captureHash: hashByUrl.get(item.url) ?? "" },
|
|
145
|
+
})),
|
|
146
|
+
});
|
|
147
|
+
let outcome = await attempt("");
|
|
148
|
+
if (outcome.problems.length > 0 || outcome.failures.length > 0) {
|
|
149
|
+
retriedVendorIds.push(vendorId);
|
|
150
|
+
const failureLines = [
|
|
151
|
+
...outcome.problems,
|
|
152
|
+
...outcome.failures.map((failure) => `${failure.claimId}: ${failure.problem} (your quote: "${failure.quote.slice(0, 80)}")`),
|
|
153
|
+
].join("\n- ");
|
|
154
|
+
outcome = await attempt(`\nYour previous answer had problems. Fix exactly these and answer again in full:\n- ${failureLines}\nQuotes must be copied character-for-character from the captured text.\n`);
|
|
155
|
+
}
|
|
156
|
+
if (outcome.problems.length > 0 || outcome.failures.length > 0) {
|
|
157
|
+
const detail = [...outcome.problems, ...outcome.failures.map((failure) => `${failure.claimId}: ${failure.problem}`)].slice(0, 10);
|
|
158
|
+
throw new Error(`Classification for ${vendor.name} failed mechanical verification after a retry:\n ${detail.join("\n ")}\nNothing was stored. Re-run, try another --model, or classify this vendor by hand via the worksheet.`);
|
|
159
|
+
}
|
|
160
|
+
for (const reading of outcome.readings)
|
|
161
|
+
observations.push(toObservation(reading, vendorId));
|
|
162
|
+
}
|
|
163
|
+
return {
|
|
164
|
+
set: {
|
|
165
|
+
id: `set_${config.category}_${options.runLabel}`,
|
|
166
|
+
category: config.category,
|
|
167
|
+
runLabel: options.runLabel,
|
|
168
|
+
runAt: observedAt,
|
|
169
|
+
extractor: `llm:${options.llm.provider}:${model}`,
|
|
170
|
+
observations,
|
|
171
|
+
},
|
|
172
|
+
model,
|
|
173
|
+
retriedVendorIds,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
export function buildWorksheet(config, vendorId, options = {}) {
|
|
177
|
+
const vendor = config.vendors.find((candidate) => candidate.id === vendorId);
|
|
178
|
+
if (!vendor)
|
|
179
|
+
throw new Error(`Unknown vendor "${vendorId}"`);
|
|
180
|
+
const { entries, textByHash } = loadCaptureTexts(config.category, options.capturesDir);
|
|
181
|
+
const captureRun = options.captureRun ?? entries[entries.length - 1]?.runLabel;
|
|
182
|
+
if (!captureRun)
|
|
183
|
+
throw new Error(`No captures for ${config.category} — run \`market capture\` first`);
|
|
184
|
+
const pages = entries
|
|
185
|
+
.filter((entry) => entry.runLabel === captureRun && entry.vendorId === vendorId && entry.captureHash)
|
|
186
|
+
.map((entry) => ({
|
|
187
|
+
kind: entry.kind,
|
|
188
|
+
url: entry.url,
|
|
189
|
+
captureHash: entry.captureHash,
|
|
190
|
+
text: textByHash.get(entry.captureHash) ?? "",
|
|
191
|
+
}));
|
|
192
|
+
return {
|
|
193
|
+
category: config.category,
|
|
194
|
+
captureRun,
|
|
195
|
+
surfaceRule: config.surfaceRule,
|
|
196
|
+
vendor: { id: vendor.id, name: vendor.name },
|
|
197
|
+
claims: config.claims,
|
|
198
|
+
pages,
|
|
199
|
+
instructions: "Produce one observation per claim (intensity loud|quiet|absent from these pages only; unobservable only if a page you need failed to capture). Every loud/quiet reading must quote a verbatim span (≤300 chars) from a page's text, with that page's url and captureHash in evidence metadata. Submit as an ObservationSet via `market observe --from <file>` — quotes are mechanically verified against the captures.",
|
|
200
|
+
};
|
|
201
|
+
}
|