npm - fullstackgtm - Versions diffs - 0.20.0 → 0.21.0 - Mend

fullstackgtm 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,37 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and the project adheres to [Semantic Versioning](https://semver.org/).
 The path to 1.0 is planned in [docs/roadmap-to-1.0.md](./docs/roadmap-to-1.0.md).
+## [0.21.0] — 2026-06-12
+Scale estimation v2 — dimensional, calibrated, SMB-bias-robust.
+### Changed
+- **`market scale` estimates revenue, not a normalized blend.** v1 averaged
+  [0,1]-normalized signals, which quietly mixed dimensions: review/customer
+  counts proxy CUSTOMER COUNT while employees/revenue proxy REVENUE, so
+  many-small-customer vendors outranked fewer-bigger-customer ones (observed
+  live: a ~$20M SMB dialer outranked a ~$33M mid-market platform). v2
+  converts every signal into revenue space first: revenue signals are used
+  directly; headcount × a revenue-per-employee ratio; customer counts × a
+  revenue-per-customer ratio **calibrated within the set (median over
+  vendors that have both) and stratified by the vendor's new `acvBand`** —
+  revenue-per-review spans ~75× between SMB tools and enterprise suites,
+  which is the bias, killed at the source. Per-vendor output is an
+  estimated revenue (weighted geometric mean: revenue 3 / headcount 2 /
+  customers 1) with a disclosed max/min **uncertainty spread**, an index =
+  share of the set's summed estimates, and the full calibration table.
+  Uncalibratable metrics (no revenue pair anywhere) are skipped and named.
+- Report bubbles: dot area ∝ estimated revenue share (normalized to the
+  set's max for visual range — ratios preserved); caption now says
+  "estimated revenue share … citable but NOT audited" and points at
+  `market scale` for the per-vendor estimates and spreads.
+- New config fields: `MarketVendor.acvBand` ("smb" | "mid" | "enterprise"
+  by convention — usually obvious from the pricing page the map already
+  captures) and `ScaleSignal.dimension` override.
+- SMB-bias regression test: many cheap-product reviews must not outrank
+  fewer expensive-product reviews when band calibration says otherwise.
 ## [0.20.0] — 2026-06-12
 The directive layer: the market map joined to your own CRM ground truth.

package/dist/index.d.ts CHANGED Viewed

@@ -24,7 +24,7 @@ export { captureMarket, computeFrontStates, createFileObservationStore, diffFron
 export { assessAxes, axesReportToText, axisPosition, messageBreadth, pcaTop2, pearson, type AxesReport, type AxisAssessment, type AxisPairing, type PrincipalComponent, } from "./marketAxes.ts";
 export { buildWorksheet, classifyMarket, type ClassifyMarketOptions, type ClassifyMarketResult, type MarketWorksheet, } from "./marketClassify.ts";
 export { computeDirectives, computeOverlayStats, directivesToPlan, overlayToMarkdown, type CallDocument, type ClaimMentionStats, type DirectiveStat, type DirectiveType, type MarketDirective, type OverlayOptions, type OverlayStats, type VendorMentionStats, } from "./marketOverlay.ts";
-export { computeScaleIndex, scaleReportToText, type ScaleReport, type VendorScale } from "./marketScale.ts";
+export { computeScaleIndex, dimensionForMetric, scaleReportToText, type ScaleDimension, type ScaleReport, type SignalEstimate, type VendorScale, } from "./marketScale.ts";
 export { marketMapToHtml, marketMapToMarkdown } from "./marketReport.ts";
 export { suggestValues, type SuggestionConfidence, type ValueSuggestion } from "./suggest.ts";
 export type { ApprovalStatus, AuditFinding, AuditFindingSeverity, CanonicalAccount, CanonicalActivity, CanonicalContact, CanonicalDeal, CanonicalGtmSnapshot, CanonicalUser, CrmProvider, GtmAuditRule, GtmConnector, GtmEvidence, GtmEvidenceSourceSystem, GtmObjectType, GtmPolicy, GtmRuleContext, GtmRuleResult, GtmSnapshotIndex, PatchOperation, PatchOperationResult, PatchOperationType, PatchPlan, PatchPlanRun, PatchPlanRunStatus, PatchVerification, PipelineFinding, PipelineFindingStatus, PipelineFindingType, ProviderIdentity, RiskLevel, SourceFreshness, } from "./types.ts";

package/dist/index.js CHANGED Viewed

@@ -24,6 +24,6 @@ export { captureMarket, computeFrontStates, createFileObservationStore, diffFron
 export { assessAxes, axesReportToText, axisPosition, messageBreadth, pcaTop2, pearson, } from "./marketAxes.js";
 export { buildWorksheet, classifyMarket, } from "./marketClassify.js";
 export { computeDirectives, computeOverlayStats, directivesToPlan, overlayToMarkdown, } from "./marketOverlay.js";
-export { computeScaleIndex, scaleReportToText } from "./marketScale.js";
+export { computeScaleIndex, dimensionForMetric, scaleReportToText, } from "./marketScale.js";
 export { marketMapToHtml, marketMapToMarkdown } from "./marketReport.js";
 export { suggestValues } from "./suggest.js";

package/dist/market.d.ts CHANGED Viewed

@@ -53,6 +53,12 @@ export type ScaleSignal = {
     quote: string;
     asOf: string;
     caveat?: string;
+    /**
+     * What the signal proxies: revenue (used directly), headcount, or
+     * customers (count-of-customers proxies like reviews). Inferred from the
+     * metric name when omitted; set explicitly for unusual metrics.
+     */
+    dimension?: "revenue" | "headcount" | "customers";
 };
 export type MarketVendor = {
     id: string;
@@ -67,6 +73,14 @@ export type MarketVendor = {
     aliases?: string[];
     /** Public scale signals; see ScaleSignal. */
     scaleSignals?: ScaleSignal[];
+    /**
+     * ACV stratum ("smb" | "mid" | "enterprise" by convention) used to
+     * calibrate customer-count → revenue conversion in the scale index.
+     * Revenue-per-customer differs ~75× between SMB tools and enterprise
+     * suites; stratifying kills the many-small-customers bias. Usually
+     * obvious from the vendor's own pricing page (which the map captures).
+     */
+    acvBand?: string;
     notes?: string;
 };
 export type MarketAxis = {

package/dist/marketReport.js CHANGED Viewed

@@ -131,9 +131,12 @@ function axisSectionsHtml(config, set) {
     const useScale = report.vendors.length > 0 && report.vendors.every((vendorId) => scaleIndex.get(vendorId) !== null && scaleIndex.get(vendorId) !== undefined);
     const loudCounts = new Map(report.vendors.map((vendorId) => [vendorId, messageBreadth(vendorId, set.observations).loudCount]));
     const maxLoud = Math.max(1, ...loudCounts.values());
-    const sizeOf = (vendorId) => useScale ? scaleIndex.get(vendorId) : (loudCounts.get(vendorId) ?? 0) / maxLoud;
+    // Bubble areas stay proportional to the metric; dividing by the max just
+    // spends the full visual range without distorting any ratio.
+    const maxShare = Math.max(1e-9, ...report.vendors.map((vendorId) => scaleIndex.get(vendorId) ?? 0));
+    const sizeOf = (vendorId) => useScale ? scaleIndex.get(vendorId) / maxShare : (loudCounts.get(vendorId) ?? 0) / maxLoud;
     const sizeCaption = useScale
-        ? `Dot area &#8733; relative scale index (within this vendor set, from: ${e(scale.metricsUsed.join(", "))} — citable signals, not true market share)`
+        ? `Dot area &#8733; estimated revenue share of this vendor set (signals: ${e(scale.metricsUsed.join(", "))}; calibrated within-set, ACV-band stratified, citable but NOT audited — see \`market scale\` for estimates and spreads)`
         : "Dot area &#8733; LOUD count";
     const breadthAxis = {
         id: "breadth",

package/dist/marketScale.d.ts CHANGED Viewed

@@ -1,41 +1,70 @@
 import type { MarketConfig, ScaleSignal } from "./market.ts";
 /**
- * Relative scale index over the mapped vendor set — the honest version of
- * "bubble size = market share". True segment market share is unknowable from
- * public data for mostly-private vendor sets, so this computes a composite
- * index from whatever citable signals exist per vendor (review counts,
- * headcount, disclosed revenue, self-reported customers), each of which is
- * biased in a different direction; the composite triangulates.
+ * Relative scale estimation over the mapped vendor set — v2, dimensional.
  *
- * Method, deterministic and auditable:
- *   1. Per metric, log10(value + 1) — these signals span orders of magnitude.
- *   2. Normalize each metric to [0, 1] across the vendors that HAVE it
- *      (min–max within the set; a metric only one vendor has is skipped —
- *      it cannot rank anyone).
- *   3. A vendor's index = arithmetic mean of its normalized metric scores
- *      (mean-of-normalized rather than geometric-of-raw so missing signals
- *      neither punish nor reward), reported with coverage (which metrics).
+ * v1 normalized every signal onto [0,1] and averaged, which quietly mixed
+ * dimensions: review/customer counts proxy CUSTOMER COUNT (N), while
+ * employees and revenue proxy REVENUE (N × ACV). Averaging the two inflates
+ * many-small-customer vendors against few-big-customer ones — the SMB bias.
  *
- * Vendors with zero signals get index null — the report falls back to its
- * LOUD-count sizing for the whole map rather than mixing semantics.
+ * v2 converts every signal into REVENUE SPACE before combining:
+ *
+ *   1. Signals are classed by dimension: revenue (used directly),
+ *      headcount (× revenue-per-employee), customers (× revenue-per-customer).
+ *   2. Conversion ratios are CALIBRATED within the set, per metric, as the
+ *      median ratio over vendors that have both the metric and a revenue
+ *      signal — and customer-dimension ratios are stratified by each
+ *      vendor's `acvBand` (smb / mid / enterprise), because revenue-per-
+ *      review spans ~75× between SMB tools and enterprise suites. A band
+ *      without calibration pairs falls back to the global median; a metric
+ *      with no pairs anywhere is unusable and reported as skipped.
+ *   3. A vendor's estimated revenue is the weighted geometric mean of its
+ *      per-signal estimates (revenue weight 3, headcount 2, customers 1 —
+ *      reliability order), with an uncertainty band = max/min estimate
+ *      ratio, reported, never hidden.
+ *   4. index = share of the set's summed estimated revenue; bubbles render
+ *      area-proportional to it. Labeled "estimated revenue share" with the
+ *      calibration disclosed — still never "market share" unqualified:
+ *      it is revenue share OF THE MAPPED SET, from citable-but-unaudited
+ *      signals.
+ *
+ * Deterministic and auditable end to end: same config, same estimates.
  */
+export type ScaleDimension = "revenue" | "headcount" | "customers";
+export declare function dimensionForMetric(metric: string): ScaleDimension;
+export type SignalEstimate = {
+    metric: string;
+    dimension: ScaleDimension;
+    rawValue: number;
+    /** Revenue-per-unit ratio applied (1 for revenue signals); null = unusable. */
+    ratio: number | null;
+    estimatedRevenue: number | null;
+    /** What calibrated the ratio: "direct", "band:<name>", "global", "fallback". */
+    calibration: string;
+};
 export type VendorScale = {
     vendorId: string;
-    /** [0, 1] within the mapped set; null when the vendor has no usable signals. */
+    acvBand?: string;
+    estimates: SignalEstimate[];
+    /** Weighted geometric mean of usable estimates; null with no usable signals. */
+    estimatedRevenue: number | null;
+    /** max/min across usable estimates — 1 = perfect agreement among signals. */
+    uncertainty: number | null;
+    /** Share of the set's summed estimated revenue; drives bubble area. */
     index: number | null;
-    /** Metrics that contributed, with their normalized scores. */
-    coverage: Array<{
-        metric: string;
-        value: number;
-        normalized: number;
-    }>;
     signals: ScaleSignal[];
 };
 export type ScaleReport = {
     vendors: VendorScale[];
-    /** Metrics used (present for ≥2 vendors) and metrics skipped (singletons). */
     metricsUsed: string[];
     metricsSkipped: string[];
+    /** Calibrated ratios for the appendix: metric × stratum → revenue-per-unit. */
+    calibrations: Array<{
+        metric: string;
+        stratum: string;
+        revenuePerUnit: number;
+        pairs: number;
+    }>;
     complete: boolean;
 };
 export declare function computeScaleIndex(config: MarketConfig): ScaleReport;

package/dist/marketScale.js CHANGED Viewed

@@ -1,68 +1,168 @@
+const DIMENSION_WEIGHT = { revenue: 3, headcount: 2, customers: 1 };
+/** Used only when headcount has zero calibration pairs in the whole set. */
+const FALLBACK_REVENUE_PER_EMPLOYEE = 200_000;
+export function dimensionForMetric(metric) {
+    const name = metric.toLowerCase();
+    if (name.includes("revenue") || name.includes("arr"))
+        return "revenue";
+    if (name.includes("employee") || name.includes("headcount"))
+        return "headcount";
+    return "customers"; // reviews, customers, installs — count-of-customers proxies
+}
+function median(values) {
+    const sorted = [...values].sort((a, b) => a - b);
+    const mid = Math.floor(sorted.length / 2);
+    return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
+}
 export function computeScaleIndex(config) {
-    const byMetric = new Map();
+    const rows = [];
     for (const vendor of config.vendors) {
         for (const signal of vendor.scaleSignals ?? []) {
-            if (!Number.isFinite(signal.value) || signal.value < 0)
+            if (!Number.isFinite(signal.value) || signal.value <= 0)
                 continue;
-            const rows = byMetric.get(signal.metric) ?? [];
-            rows.push({ vendorId: vendor.id, value: signal.value });
-            byMetric.set(signal.metric, rows);
+            rows.push({
+                vendorId: vendor.id,
+                band: vendor.acvBand ?? "unknown",
+                metric: signal.metric,
+                dimension: signal.dimension ?? dimensionForMetric(signal.metric),
+                value: signal.value,
+            });
         }
     }
-    const metricsUsed = [];
-    const metricsSkipped = [];
-    const normalized = new Map();
-    for (const [metric, rows] of byMetric) {
-        // Last write wins if a vendor lists the same metric twice.
-        const perVendor = new Map(rows.map((row) => [row.vendorId, row.value]));
-        if (perVendor.size < 2) {
-            metricsSkipped.push(metric);
-            continue;
-        }
-        metricsUsed.push(metric);
-        const logs = new Map([...perVendor].map(([vendorId, value]) => [vendorId, Math.log10(value + 1)]));
-        const values = [...logs.values()];
-        const lo = Math.min(...values);
-        const hi = Math.max(...values);
-        const span = hi - lo || 1;
-        const scores = new Map();
-        for (const [vendorId, log] of logs) {
-            scores.set(vendorId, { value: perVendor.get(vendorId), normalized: (log - lo) / span });
+    // A vendor's reference revenue for calibration: median of its revenue signals.
+    const revenueByVendor = new Map();
+    for (const vendor of config.vendors) {
+        const revenues = rows
+            .filter((row) => row.vendorId === vendor.id && row.dimension === "revenue")
+            .map((row) => row.value);
+        if (revenues.length > 0)
+            revenueByVendor.set(vendor.id, median(revenues));
+    }
+    // Per-metric calibration. Customer-dimension metrics stratify by acvBand;
+    // headcount calibrates globally (revenue-per-employee is the most stable
+    // ratio in B2B software, which is also why headcount outweighs customers).
+    const calibrations = [];
+    const ratioFor = new Map();
+    const nonRevenueMetrics = [...new Set(rows.filter((row) => row.dimension !== "revenue").map((row) => row.metric))].sort();
+    for (const metric of nonRevenueMetrics) {
+        const pairs = rows
+            .filter((row) => row.metric === metric && revenueByVendor.has(row.vendorId))
+            .map((row) => ({ band: row.band, ratio: revenueByVendor.get(row.vendorId) / row.value }));
+        const byBand = new Map();
+        if (dimensionForMetric(metric) === "customers") {
+            for (const band of [...new Set(pairs.map((pair) => pair.band))]) {
+                const bandRatios = pairs.filter((pair) => pair.band === band).map((pair) => pair.ratio);
+                if (bandRatios.length >= 1) {
+                    byBand.set(band, median(bandRatios));
+                    calibrations.push({ metric, stratum: `band:${band}`, revenuePerUnit: Math.round(median(bandRatios)), pairs: bandRatios.length });
+                }
+            }
         }
-        normalized.set(metric, scores);
+        const global = pairs.length > 0 ? median(pairs.map((pair) => pair.ratio)) : null;
+        if (global !== null)
+            calibrations.push({ metric, stratum: "global", revenuePerUnit: Math.round(global), pairs: pairs.length });
+        ratioFor.set(metric, { global, byBand });
     }
-    metricsUsed.sort();
-    metricsSkipped.sort();
+    const metricsUsed = new Set();
+    const metricsSkipped = new Set();
     const vendors = config.vendors.map((vendor) => {
-        const coverage = [];
-        for (const metric of metricsUsed) {
-            const score = normalized.get(metric)?.get(vendor.id);
-            if (score)
-                coverage.push({ metric, value: score.value, normalized: Number(score.normalized.toFixed(4)) });
+        const vendorRows = rows.filter((row) => row.vendorId === vendor.id);
+        const estimates = vendorRows.map((row) => {
+            if (row.dimension === "revenue") {
+                metricsUsed.add(row.metric);
+                return { metric: row.metric, dimension: row.dimension, rawValue: row.value, ratio: 1, estimatedRevenue: row.value, calibration: "direct" };
+            }
+            const calibration = ratioFor.get(row.metric);
+            let ratio = null;
+            let stratum = "fallback";
+            if (calibration) {
+                if (row.dimension === "customers" && calibration.byBand.has(row.band)) {
+                    ratio = calibration.byBand.get(row.band);
+                    stratum = `band:${row.band}`;
+                }
+                else if (calibration.global !== null) {
+                    ratio = calibration.global;
+                    stratum = "global";
+                }
+            }
+            if (ratio === null && row.dimension === "headcount") {
+                ratio = FALLBACK_REVENUE_PER_EMPLOYEE;
+                stratum = "fallback";
+            }
+            if (ratio === null) {
+                metricsSkipped.add(row.metric);
+                return { metric: row.metric, dimension: row.dimension, rawValue: row.value, ratio: null, estimatedRevenue: null, calibration: "uncalibratable" };
+            }
+            metricsUsed.add(row.metric);
+            return { metric: row.metric, dimension: row.dimension, rawValue: row.value, ratio, estimatedRevenue: row.value * ratio, calibration: stratum };
+        });
+        const usable = estimates.filter((estimate) => estimate.estimatedRevenue !== null);
+        let estimatedRevenue = null;
+        let uncertainty = null;
+        if (usable.length > 0) {
+            let weightSum = 0;
+            let logSum = 0;
+            for (const estimate of usable) {
+                const weight = DIMENSION_WEIGHT[estimate.dimension];
+                weightSum += weight;
+                logSum += weight * Math.log(estimate.estimatedRevenue);
+            }
+            estimatedRevenue = Math.exp(logSum / weightSum);
+            const values = usable.map((estimate) => estimate.estimatedRevenue);
+            uncertainty = Number((Math.max(...values) / Math.min(...values)).toFixed(2));
         }
-        const index = coverage.length > 0
-            ? Number((coverage.reduce((sum, entry) => sum + entry.normalized, 0) / coverage.length).toFixed(4))
-            : null;
-        return { vendorId: vendor.id, index, coverage, signals: vendor.scaleSignals ?? [] };
+        return {
+            vendorId: vendor.id,
+            acvBand: vendor.acvBand,
+            estimates,
+            estimatedRevenue,
+            uncertainty,
+            index: null,
+            signals: vendor.scaleSignals ?? [],
+        };
     });
+    const total = vendors.reduce((sum, vendor) => sum + (vendor.estimatedRevenue ?? 0), 0);
+    for (const vendor of vendors) {
+        vendor.index = vendor.estimatedRevenue !== null && total > 0 ? Number((vendor.estimatedRevenue / total).toFixed(4)) : null;
+    }
     return {
         vendors,
-        metricsUsed,
-        metricsSkipped,
+        metricsUsed: [...metricsUsed].sort(),
+        metricsSkipped: [...metricsSkipped].filter((metric) => !metricsUsed.has(metric)).sort(),
+        calibrations,
         complete: vendors.every((vendor) => vendor.index !== null),
     };
 }
+function money(value) {
+    if (value >= 1e9)
+        return `$${(value / 1e9).toFixed(1)}B`;
+    if (value >= 1e6)
+        return `$${(value / 1e6).toFixed(1)}M`;
+    return `$${Math.round(value / 1e3)}K`;
+}
 export function scaleReportToText(config, report) {
     const names = new Map(config.vendors.map((vendor) => [vendor.id, vendor.name]));
     const lines = [];
-    lines.push(`Scale index (relative, within this ${config.vendors.length}-vendor set — not market share):`);
-    lines.push(`metrics used: ${report.metricsUsed.join(", ") || "none"}${report.metricsSkipped.length ? ` · skipped (single-vendor): ${report.metricsSkipped.join(", ")}` : ""}`);
+    lines.push(`Estimated revenue share (of this ${config.vendors.length}-vendor set; calibrated from citable signals, NOT audited):`);
+    lines.push(`metrics: ${report.metricsUsed.join(", ") || "none"}${report.metricsSkipped.length ? ` · uncalibratable: ${report.metricsSkipped.join(", ")}` : ""}`);
     lines.push("");
-    const ranked = [...report.vendors].sort((a, b) => (b.index ?? -1) - (a.index ?? -1));
+    const ranked = [...report.vendors].sort((a, b) => (b.estimatedRevenue ?? -1) - (a.estimatedRevenue ?? -1));
     for (const vendor of ranked) {
-        const idx = vendor.index === null ? "  n/a" : vendor.index.toFixed(2);
-        const cov = vendor.coverage.map((entry) => `${entry.metric}=${entry.value}`).join(", ") || "no signals";
-        lines.push(`  ${idx}  ${(names.get(vendor.vendorId) ?? vendor.vendorId).padEnd(22)} ${cov}`);
+        if (vendor.estimatedRevenue === null) {
+            lines.push(`   n/a  ${(names.get(vendor.vendorId) ?? vendor.vendorId).padEnd(22)} no usable signals`);
+            continue;
+        }
+        const share = `${((vendor.index ?? 0) * 100).toFixed(1)}%`.padStart(6);
+        const spread = vendor.uncertainty !== null && vendor.uncertainty > 1 ? ` (×${vendor.uncertainty.toFixed(1)} signal spread)` : "";
+        lines.push(`${share}  ${(names.get(vendor.vendorId) ?? vendor.vendorId).padEnd(22)} ~${money(vendor.estimatedRevenue)}${spread}  [${vendor.estimates
+            .filter((estimate) => estimate.estimatedRevenue !== null)
+            .map((estimate) => `${estimate.metric}→${money(estimate.estimatedRevenue)}`)
+            .join(", ")}]`);
+    }
+    lines.push("");
+    lines.push("calibrations (median revenue-per-unit):");
+    for (const calibration of report.calibrations) {
+        lines.push(`  ${calibration.metric.padEnd(26)} ${calibration.stratum.padEnd(16)} ${money(calibration.revenuePerUnit)}/unit (${calibration.pairs} pair${calibration.pairs === 1 ? "" : "s"})`);
     }
     return `${lines.join("\n")}\n`;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "fullstackgtm",
-  "version": "0.20.0",
+  "version": "0.21.0",
   "description": "Open-source agentic GTM ops framework: canonical GTM data model, pluggable deterministic audits, reviewable dry-run patch plans, approval-gated write-back with conflict detection, and cross-system entity resolution. HubSpot, Salesforce, and Stripe connectors included.",
   "license": "Apache-2.0",
   "author": "Full Stack GTM",

package/src/index.ts CHANGED Viewed

@@ -197,7 +197,15 @@ export {
   type OverlayStats,
   type VendorMentionStats,
 } from "./marketOverlay.ts";
-export { computeScaleIndex, scaleReportToText, type ScaleReport, type VendorScale } from "./marketScale.ts";
+export {
+  computeScaleIndex,
+  dimensionForMetric,
+  scaleReportToText,
+  type ScaleDimension,
+  type ScaleReport,
+  type SignalEstimate,
+  type VendorScale,
+} from "./marketScale.ts";
 export { marketMapToHtml, marketMapToMarkdown } from "./marketReport.ts";
 export { suggestValues, type SuggestionConfidence, type ValueSuggestion } from "./suggest.ts";
 export type {

package/src/market.ts CHANGED Viewed

@@ -63,6 +63,12 @@ export type ScaleSignal = {
   quote: string;
   asOf: string;
   caveat?: string;
+  /**
+   * What the signal proxies: revenue (used directly), headcount, or
+   * customers (count-of-customers proxies like reviews). Inferred from the
+   * metric name when omitted; set explicitly for unusual metrics.
+   */
+  dimension?: "revenue" | "headcount" | "customers";
 };
 export type MarketVendor = {
@@ -78,6 +84,14 @@ export type MarketVendor = {
   aliases?: string[];
   /** Public scale signals; see ScaleSignal. */
   scaleSignals?: ScaleSignal[];
+  /**
+   * ACV stratum ("smb" | "mid" | "enterprise" by convention) used to
+   * calibrate customer-count → revenue conversion in the scale index.
+   * Revenue-per-customer differs ~75× between SMB tools and enterprise
+   * suites; stratifying kills the many-small-customers bias. Usually
+   * obvious from the vendor's own pricing page (which the map captures).
+   */
+  acvBand?: string;
   notes?: string;
 };

package/src/marketReport.ts CHANGED Viewed

@@ -172,10 +172,13 @@ function axisSectionsHtml(
   const useScale = report.vendors.length > 0 && report.vendors.every((vendorId) => scaleIndex.get(vendorId) !== null && scaleIndex.get(vendorId) !== undefined);
   const loudCounts = new Map(report.vendors.map((vendorId) => [vendorId, messageBreadth(vendorId, set.observations).loudCount]));
   const maxLoud = Math.max(1, ...loudCounts.values());
+  // Bubble areas stay proportional to the metric; dividing by the max just
+  // spends the full visual range without distorting any ratio.
+  const maxShare = Math.max(1e-9, ...report.vendors.map((vendorId) => scaleIndex.get(vendorId) ?? 0));
   const sizeOf = (vendorId: string): number =>
-    useScale ? (scaleIndex.get(vendorId) as number) : (loudCounts.get(vendorId) ?? 0) / maxLoud;
+    useScale ? (scaleIndex.get(vendorId) as number) / maxShare : (loudCounts.get(vendorId) ?? 0) / maxLoud;
   const sizeCaption = useScale
-    ? `Dot area &#8733; relative scale index (within this vendor set, from: ${e(scale.metricsUsed.join(", "))} — citable signals, not true market share)`
+    ? `Dot area &#8733; estimated revenue share of this vendor set (signals: ${e(scale.metricsUsed.join(", "))}; calibrated within-set, ACV-band stratified, citable but NOT audited — see \`market scale\` for estimates and spreads)`
     : "Dot area &#8733; LOUD count";
   const breadthAxis: ScatterAxis & { id: string } = {

package/src/marketScale.ts CHANGED Viewed

@@ -1,111 +1,251 @@
 import type { MarketConfig, ScaleSignal } from "./market.ts";
 /**
- * Relative scale index over the mapped vendor set — the honest version of
- * "bubble size = market share". True segment market share is unknowable from
- * public data for mostly-private vendor sets, so this computes a composite
- * index from whatever citable signals exist per vendor (review counts,
- * headcount, disclosed revenue, self-reported customers), each of which is
- * biased in a different direction; the composite triangulates.
+ * Relative scale estimation over the mapped vendor set — v2, dimensional.
  *
- * Method, deterministic and auditable:
- *   1. Per metric, log10(value + 1) — these signals span orders of magnitude.
- *   2. Normalize each metric to [0, 1] across the vendors that HAVE it
- *      (min–max within the set; a metric only one vendor has is skipped —
- *      it cannot rank anyone).
- *   3. A vendor's index = arithmetic mean of its normalized metric scores
- *      (mean-of-normalized rather than geometric-of-raw so missing signals
- *      neither punish nor reward), reported with coverage (which metrics).
+ * v1 normalized every signal onto [0,1] and averaged, which quietly mixed
+ * dimensions: review/customer counts proxy CUSTOMER COUNT (N), while
+ * employees and revenue proxy REVENUE (N × ACV). Averaging the two inflates
+ * many-small-customer vendors against few-big-customer ones — the SMB bias.
  *
- * Vendors with zero signals get index null — the report falls back to its
- * LOUD-count sizing for the whole map rather than mixing semantics.
+ * v2 converts every signal into REVENUE SPACE before combining:
+ *
+ *   1. Signals are classed by dimension: revenue (used directly),
+ *      headcount (× revenue-per-employee), customers (× revenue-per-customer).
+ *   2. Conversion ratios are CALIBRATED within the set, per metric, as the
+ *      median ratio over vendors that have both the metric and a revenue
+ *      signal — and customer-dimension ratios are stratified by each
+ *      vendor's `acvBand` (smb / mid / enterprise), because revenue-per-
+ *      review spans ~75× between SMB tools and enterprise suites. A band
+ *      without calibration pairs falls back to the global median; a metric
+ *      with no pairs anywhere is unusable and reported as skipped.
+ *   3. A vendor's estimated revenue is the weighted geometric mean of its
+ *      per-signal estimates (revenue weight 3, headcount 2, customers 1 —
+ *      reliability order), with an uncertainty band = max/min estimate
+ *      ratio, reported, never hidden.
+ *   4. index = share of the set's summed estimated revenue; bubbles render
+ *      area-proportional to it. Labeled "estimated revenue share" with the
+ *      calibration disclosed — still never "market share" unqualified:
+ *      it is revenue share OF THE MAPPED SET, from citable-but-unaudited
+ *      signals.
+ *
+ * Deterministic and auditable end to end: same config, same estimates.
  */
+export type ScaleDimension = "revenue" | "headcount" | "customers";
+const DIMENSION_WEIGHT: Record<ScaleDimension, number> = { revenue: 3, headcount: 2, customers: 1 };
+/** Used only when headcount has zero calibration pairs in the whole set. */
+const FALLBACK_REVENUE_PER_EMPLOYEE = 200_000;
+export function dimensionForMetric(metric: string): ScaleDimension {
+  const name = metric.toLowerCase();
+  if (name.includes("revenue") || name.includes("arr")) return "revenue";
+  if (name.includes("employee") || name.includes("headcount")) return "headcount";
+  return "customers"; // reviews, customers, installs — count-of-customers proxies
+}
+export type SignalEstimate = {
+  metric: string;
+  dimension: ScaleDimension;
+  rawValue: number;
+  /** Revenue-per-unit ratio applied (1 for revenue signals); null = unusable. */
+  ratio: number | null;
+  estimatedRevenue: number | null;
+  /** What calibrated the ratio: "direct", "band:<name>", "global", "fallback". */
+  calibration: string;
+};
 export type VendorScale = {
   vendorId: string;
-  /** [0, 1] within the mapped set; null when the vendor has no usable signals. */
+  acvBand?: string;
+  estimates: SignalEstimate[];
+  /** Weighted geometric mean of usable estimates; null with no usable signals. */
+  estimatedRevenue: number | null;
+  /** max/min across usable estimates — 1 = perfect agreement among signals. */
+  uncertainty: number | null;
+  /** Share of the set's summed estimated revenue; drives bubble area. */
   index: number | null;
-  /** Metrics that contributed, with their normalized scores. */
-  coverage: Array<{ metric: string; value: number; normalized: number }>;
   signals: ScaleSignal[];
 };
 export type ScaleReport = {
   vendors: VendorScale[];
-  /** Metrics used (present for ≥2 vendors) and metrics skipped (singletons). */
   metricsUsed: string[];
   metricsSkipped: string[];
+  /** Calibrated ratios for the appendix: metric × stratum → revenue-per-unit. */
+  calibrations: Array<{ metric: string; stratum: string; revenuePerUnit: number; pairs: number }>;
   complete: boolean;
 };
+function median(values: number[]): number {
+  const sorted = [...values].sort((a, b) => a - b);
+  const mid = Math.floor(sorted.length / 2);
+  return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
+}
 export function computeScaleIndex(config: MarketConfig): ScaleReport {
-  const byMetric = new Map<string, Array<{ vendorId: string; value: number }>>();
+  type Row = { vendorId: string; band: string; metric: string; dimension: ScaleDimension; value: number };
+  const rows: Row[] = [];
   for (const vendor of config.vendors) {
     for (const signal of vendor.scaleSignals ?? []) {
-      if (!Number.isFinite(signal.value) || signal.value < 0) continue;
-      const rows = byMetric.get(signal.metric) ?? [];
-      rows.push({ vendorId: vendor.id, value: signal.value });
-      byMetric.set(signal.metric, rows);
+      if (!Number.isFinite(signal.value) || signal.value <= 0) continue;
+      rows.push({
+        vendorId: vendor.id,
+        band: vendor.acvBand ?? "unknown",
+        metric: signal.metric,
+        dimension: signal.dimension ?? dimensionForMetric(signal.metric),
+        value: signal.value,
+      });
     }
   }
-  const metricsUsed: string[] = [];
-  const metricsSkipped: string[] = [];
-  const normalized = new Map<string, Map<string, { value: number; normalized: number }>>();
-  for (const [metric, rows] of byMetric) {
-    // Last write wins if a vendor lists the same metric twice.
-    const perVendor = new Map(rows.map((row) => [row.vendorId, row.value]));
-    if (perVendor.size < 2) {
-      metricsSkipped.push(metric);
-      continue;
-    }
-    metricsUsed.push(metric);
-    const logs = new Map([...perVendor].map(([vendorId, value]) => [vendorId, Math.log10(value + 1)]));
-    const values = [...logs.values()];
-    const lo = Math.min(...values);
-    const hi = Math.max(...values);
-    const span = hi - lo || 1;
-    const scores = new Map<string, { value: number; normalized: number }>();
-    for (const [vendorId, log] of logs) {
-      scores.set(vendorId, { value: perVendor.get(vendorId) as number, normalized: (log - lo) / span });
+  // A vendor's reference revenue for calibration: median of its revenue signals.
+  const revenueByVendor = new Map<string, number>();
+  for (const vendor of config.vendors) {
+    const revenues = rows
+      .filter((row) => row.vendorId === vendor.id && row.dimension === "revenue")
+      .map((row) => row.value);
+    if (revenues.length > 0) revenueByVendor.set(vendor.id, median(revenues));
+  }
+  // Per-metric calibration. Customer-dimension metrics stratify by acvBand;
+  // headcount calibrates globally (revenue-per-employee is the most stable
+  // ratio in B2B software, which is also why headcount outweighs customers).
+  const calibrations: ScaleReport["calibrations"] = [];
+  const ratioFor = new Map<string, { global: number | null; byBand: Map<string, number> }>();
+  const nonRevenueMetrics = [...new Set(rows.filter((row) => row.dimension !== "revenue").map((row) => row.metric))].sort();
+  for (const metric of nonRevenueMetrics) {
+    const pairs = rows
+      .filter((row) => row.metric === metric && revenueByVendor.has(row.vendorId))
+      .map((row) => ({ band: row.band, ratio: (revenueByVendor.get(row.vendorId) as number) / row.value }));
+    const byBand = new Map<string, number>();
+    if (dimensionForMetric(metric) === "customers") {
+      for (const band of [...new Set(pairs.map((pair) => pair.band))]) {
+        const bandRatios = pairs.filter((pair) => pair.band === band).map((pair) => pair.ratio);
+        if (bandRatios.length >= 1) {
+          byBand.set(band, median(bandRatios));
+          calibrations.push({ metric, stratum: `band:${band}`, revenuePerUnit: Math.round(median(bandRatios)), pairs: bandRatios.length });
+        }
+      }
     }
-    normalized.set(metric, scores);
+    const global = pairs.length > 0 ? median(pairs.map((pair) => pair.ratio)) : null;
+    if (global !== null) calibrations.push({ metric, stratum: "global", revenuePerUnit: Math.round(global), pairs: pairs.length });
+    ratioFor.set(metric, { global, byBand });
   }
-  metricsUsed.sort();
-  metricsSkipped.sort();
+  const metricsUsed = new Set<string>();
+  const metricsSkipped = new Set<string>();
   const vendors: VendorScale[] = config.vendors.map((vendor) => {
-    const coverage: VendorScale["coverage"] = [];
-    for (const metric of metricsUsed) {
-      const score = normalized.get(metric)?.get(vendor.id);
-      if (score) coverage.push({ metric, value: score.value, normalized: Number(score.normalized.toFixed(4)) });
+    const vendorRows = rows.filter((row) => row.vendorId === vendor.id);
+    const estimates: SignalEstimate[] = vendorRows.map((row) => {
+      if (row.dimension === "revenue") {
+        metricsUsed.add(row.metric);
+        return { metric: row.metric, dimension: row.dimension, rawValue: row.value, ratio: 1, estimatedRevenue: row.value, calibration: "direct" };
+      }
+      const calibration = ratioFor.get(row.metric);
+      let ratio: number | null = null;
+      let stratum = "fallback";
+      if (calibration) {
+        if (row.dimension === "customers" && calibration.byBand.has(row.band)) {
+          ratio = calibration.byBand.get(row.band) as number;
+          stratum = `band:${row.band}`;
+        } else if (calibration.global !== null) {
+          ratio = calibration.global;
+          stratum = "global";
+        }
+      }
+      if (ratio === null && row.dimension === "headcount") {
+        ratio = FALLBACK_REVENUE_PER_EMPLOYEE;
+        stratum = "fallback";
+      }
+      if (ratio === null) {
+        metricsSkipped.add(row.metric);
+        return { metric: row.metric, dimension: row.dimension, rawValue: row.value, ratio: null, estimatedRevenue: null, calibration: "uncalibratable" };
+      }
+      metricsUsed.add(row.metric);
+      return { metric: row.metric, dimension: row.dimension, rawValue: row.value, ratio, estimatedRevenue: row.value * ratio, calibration: stratum };
+    });
+    const usable = estimates.filter(
+      (estimate): estimate is SignalEstimate & { estimatedRevenue: number } => estimate.estimatedRevenue !== null,
+    );
+    let estimatedRevenue: number | null = null;
+    let uncertainty: number | null = null;
+    if (usable.length > 0) {
+      let weightSum = 0;
+      let logSum = 0;
+      for (const estimate of usable) {
+        const weight = DIMENSION_WEIGHT[estimate.dimension];
+        weightSum += weight;
+        logSum += weight * Math.log(estimate.estimatedRevenue);
+      }
+      estimatedRevenue = Math.exp(logSum / weightSum);
+      const values = usable.map((estimate) => estimate.estimatedRevenue);
+      uncertainty = Number((Math.max(...values) / Math.min(...values)).toFixed(2));
     }
-    const index =
-      coverage.length > 0
-        ? Number((coverage.reduce((sum, entry) => sum + entry.normalized, 0) / coverage.length).toFixed(4))
-        : null;
-    return { vendorId: vendor.id, index, coverage, signals: vendor.scaleSignals ?? [] };
+    return {
+      vendorId: vendor.id,
+      acvBand: vendor.acvBand,
+      estimates,
+      estimatedRevenue,
+      uncertainty,
+      index: null,
+      signals: vendor.scaleSignals ?? [],
+    };
   });
+  const total = vendors.reduce((sum, vendor) => sum + (vendor.estimatedRevenue ?? 0), 0);
+  for (const vendor of vendors) {
+    vendor.index = vendor.estimatedRevenue !== null && total > 0 ? Number((vendor.estimatedRevenue / total).toFixed(4)) : null;
+  }
   return {
     vendors,
-    metricsUsed,
-    metricsSkipped,
+    metricsUsed: [...metricsUsed].sort(),
+    metricsSkipped: [...metricsSkipped].filter((metric) => !metricsUsed.has(metric)).sort(),
+    calibrations,
     complete: vendors.every((vendor) => vendor.index !== null),
   };
 }
+function money(value: number): string {
+  if (value >= 1e9) return `$${(value / 1e9).toFixed(1)}B`;
+  if (value >= 1e6) return `$${(value / 1e6).toFixed(1)}M`;
+  return `$${Math.round(value / 1e3)}K`;
+}
 export function scaleReportToText(config: MarketConfig, report: ScaleReport): string {
   const names = new Map(config.vendors.map((vendor) => [vendor.id, vendor.name]));
   const lines: string[] = [];
-  lines.push(`Scale index (relative, within this ${config.vendors.length}-vendor set — not market share):`);
-  lines.push(`metrics used: ${report.metricsUsed.join(", ") || "none"}${report.metricsSkipped.length ? ` · skipped (single-vendor): ${report.metricsSkipped.join(", ")}` : ""}`);
+  lines.push(`Estimated revenue share (of this ${config.vendors.length}-vendor set; calibrated from citable signals, NOT audited):`);
+  lines.push(
+    `metrics: ${report.metricsUsed.join(", ") || "none"}${report.metricsSkipped.length ? ` · uncalibratable: ${report.metricsSkipped.join(", ")}` : ""}`,
+  );
   lines.push("");
-  const ranked = [...report.vendors].sort((a, b) => (b.index ?? -1) - (a.index ?? -1));
+  const ranked = [...report.vendors].sort((a, b) => (b.estimatedRevenue ?? -1) - (a.estimatedRevenue ?? -1));
   for (const vendor of ranked) {
-    const idx = vendor.index === null ? "  n/a" : vendor.index.toFixed(2);
-    const cov = vendor.coverage.map((entry) => `${entry.metric}=${entry.value}`).join(", ") || "no signals";
-    lines.push(`  ${idx}  ${(names.get(vendor.vendorId) ?? vendor.vendorId).padEnd(22)} ${cov}`);
+    if (vendor.estimatedRevenue === null) {
+      lines.push(`   n/a  ${(names.get(vendor.vendorId) ?? vendor.vendorId).padEnd(22)} no usable signals`);
+      continue;
+    }
+    const share = `${((vendor.index ?? 0) * 100).toFixed(1)}%`.padStart(6);
+    const spread = vendor.uncertainty !== null && vendor.uncertainty > 1 ? ` (×${vendor.uncertainty.toFixed(1)} signal spread)` : "";
+    lines.push(
+      `${share}  ${(names.get(vendor.vendorId) ?? vendor.vendorId).padEnd(22)} ~${money(vendor.estimatedRevenue)}${spread}  [${vendor.estimates
+        .filter((estimate) => estimate.estimatedRevenue !== null)
+        .map((estimate) => `${estimate.metric}→${money(estimate.estimatedRevenue as number)}`)
+        .join(", ")}]`,
+    );
+  }
+  lines.push("");
+  lines.push("calibrations (median revenue-per-unit):");
+  for (const calibration of report.calibrations) {
+    lines.push(
+      `  ${calibration.metric.padEnd(26)} ${calibration.stratum.padEnd(16)} ${money(calibration.revenuePerUnit)}/unit (${calibration.pairs} pair${calibration.pairs === 1 ? "" : "s"})`,
+    );
   }
   return `${lines.join("\n")}\n`;
 }