@ainyc/canonry 4.85.0 → 4.87.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/agent-workspace/skills/canonry/references/canonry-cli.md +3 -1
- package/assets/assets/{BacklinksPage-CDAv0ggn.js → BacklinksPage-BPvsw_Bi.js} +1 -1
- package/assets/assets/{ChartPrimitives-CnAmsyt7.js → ChartPrimitives-BdlKCq7y.js} +1 -1
- package/assets/assets/ProjectPage-1Q8YC9Vd.js +6 -0
- package/assets/assets/{RunRow-CVZ5o8fg.js → RunRow-DcSsnE5c.js} +1 -1
- package/assets/assets/{RunsPage-Bzy5c0MZ.js → RunsPage-DKoIMkQL.js} +1 -1
- package/assets/assets/{SettingsPage-B1ocxPBe.js → SettingsPage-bH3PdNKb.js} +1 -1
- package/assets/assets/{TrafficPage-D2zepQOC.js → TrafficPage-IW_DX-0V.js} +1 -1
- package/assets/assets/{TrafficSourceDetailPage-C7JuAkaK.js → TrafficSourceDetailPage-DRHOGn9B.js} +1 -1
- package/assets/assets/{arrow-left-Bv3CWylm.js → arrow-left-B5Du72nk.js} +1 -1
- package/assets/assets/{extract-error-message-BtVid5TP.js → extract-error-message-C7Vhd5zH.js} +1 -1
- package/assets/assets/index-C_ZzKZfM.js +210 -0
- package/assets/assets/index-ClkRAeHL.css +1 -0
- package/assets/assets/{trash-2-BoimCsYz.js → trash-2-DWcofmpv.js} +1 -1
- package/assets/index.html +2 -2
- package/dist/{chunk-3K3QRSYE.js → chunk-5LW7CJAO.js} +276 -53
- package/dist/{chunk-62YB3ML7.js → chunk-6XMXBAEW.js} +47 -2
- package/dist/{chunk-7BMSWI2K.js → chunk-DUDFNP5Y.js} +19 -4
- package/dist/{chunk-I2BJC3DT.js → chunk-MDRDX5R2.js} +634 -205
- package/dist/cli.js +73 -9
- package/dist/index.js +4 -4
- package/dist/{intelligence-service-AHHBQKRD.js → intelligence-service-XUKYOHKL.js} +2 -2
- package/dist/mcp.js +2 -2
- package/package.json +7 -7
- package/assets/assets/ProjectPage-C9KEgRxD.js +0 -6
- package/assets/assets/index-BgWgJE7S.css +0 -1
- package/assets/assets/index-DmNti_xn.js +0 -210
|
@@ -1832,6 +1832,210 @@ import { z as z18 } from "zod";
|
|
|
1832
1832
|
|
|
1833
1833
|
// ../contracts/src/discovery.ts
|
|
1834
1834
|
import { z as z17 } from "zod";
|
|
1835
|
+
|
|
1836
|
+
// ../contracts/src/embeddings.ts
|
|
1837
|
+
function cosineSimilarity(a, b) {
|
|
1838
|
+
if (a.length === 0 || b.length === 0) {
|
|
1839
|
+
throw new Error("cosineSimilarity: vectors must be non-empty");
|
|
1840
|
+
}
|
|
1841
|
+
if (a.length !== b.length) {
|
|
1842
|
+
throw new Error(`cosineSimilarity: vector length mismatch (${a.length} vs ${b.length})`);
|
|
1843
|
+
}
|
|
1844
|
+
let dot = 0;
|
|
1845
|
+
let magA = 0;
|
|
1846
|
+
let magB = 0;
|
|
1847
|
+
for (let i = 0; i < a.length; i++) {
|
|
1848
|
+
dot += a[i] * b[i];
|
|
1849
|
+
magA += a[i] * a[i];
|
|
1850
|
+
magB += b[i] * b[i];
|
|
1851
|
+
}
|
|
1852
|
+
if (magA === 0 || magB === 0) return 0;
|
|
1853
|
+
return dot / (Math.sqrt(magA) * Math.sqrt(magB));
|
|
1854
|
+
}
|
|
1855
|
+
function clusterByCosine(items, vectors, threshold) {
|
|
1856
|
+
if (threshold < 0 || threshold > 1) {
|
|
1857
|
+
throw new Error(`clusterByCosine: threshold must be in [0, 1], got ${threshold}`);
|
|
1858
|
+
}
|
|
1859
|
+
if (items.length !== vectors.length) {
|
|
1860
|
+
throw new Error(`clusterByCosine: items/vectors length mismatch (${items.length} vs ${vectors.length})`);
|
|
1861
|
+
}
|
|
1862
|
+
if (items.length === 0) return [];
|
|
1863
|
+
const parent = items.map((_, i) => i);
|
|
1864
|
+
const find = (x) => {
|
|
1865
|
+
let root = x;
|
|
1866
|
+
while (parent[root] !== root) root = parent[root];
|
|
1867
|
+
let cur = x;
|
|
1868
|
+
while (parent[cur] !== root) {
|
|
1869
|
+
const next = parent[cur];
|
|
1870
|
+
parent[cur] = root;
|
|
1871
|
+
cur = next;
|
|
1872
|
+
}
|
|
1873
|
+
return root;
|
|
1874
|
+
};
|
|
1875
|
+
const union = (a, b) => {
|
|
1876
|
+
const ra = find(a);
|
|
1877
|
+
const rb = find(b);
|
|
1878
|
+
if (ra !== rb) parent[ra] = rb;
|
|
1879
|
+
};
|
|
1880
|
+
for (let i = 0; i < items.length; i++) {
|
|
1881
|
+
for (let j = i + 1; j < items.length; j++) {
|
|
1882
|
+
if (cosineSimilarity(vectors[i], vectors[j]) >= threshold) {
|
|
1883
|
+
union(i, j);
|
|
1884
|
+
}
|
|
1885
|
+
}
|
|
1886
|
+
}
|
|
1887
|
+
const byRoot = /* @__PURE__ */ new Map();
|
|
1888
|
+
for (let i = 0; i < items.length; i++) {
|
|
1889
|
+
const root = find(i);
|
|
1890
|
+
const existing = byRoot.get(root);
|
|
1891
|
+
if (existing) existing.push(i);
|
|
1892
|
+
else byRoot.set(root, [i]);
|
|
1893
|
+
}
|
|
1894
|
+
return Array.from(byRoot.values()).map((indices) => indices.map((idx) => items[idx]));
|
|
1895
|
+
}
|
|
1896
|
+
function pickClusterRepresentative(cluster) {
|
|
1897
|
+
if (cluster.length === 0) throw new Error("pickClusterRepresentative: cluster is empty");
|
|
1898
|
+
let best = cluster[0];
|
|
1899
|
+
for (let i = 1; i < cluster.length; i++) {
|
|
1900
|
+
if (cluster[i].length < best.length) best = cluster[i];
|
|
1901
|
+
}
|
|
1902
|
+
return best;
|
|
1903
|
+
}
|
|
1904
|
+
|
|
1905
|
+
// ../contracts/src/url-normalize.ts
|
|
1906
|
+
var STRIP_KEYS = /* @__PURE__ */ new Set([
|
|
1907
|
+
// Click identifiers
|
|
1908
|
+
"fbclid",
|
|
1909
|
+
"gclid",
|
|
1910
|
+
"msclkid",
|
|
1911
|
+
"ttclid",
|
|
1912
|
+
"li_fat_id",
|
|
1913
|
+
"igshid",
|
|
1914
|
+
"yclid",
|
|
1915
|
+
"dclid",
|
|
1916
|
+
"gbraid",
|
|
1917
|
+
"wbraid",
|
|
1918
|
+
"bingid",
|
|
1919
|
+
// Mailchimp
|
|
1920
|
+
"mc_cid",
|
|
1921
|
+
"mc_eid",
|
|
1922
|
+
// Google Analytics linkers
|
|
1923
|
+
"_ga",
|
|
1924
|
+
"_gl",
|
|
1925
|
+
// Google Tag Manager debug
|
|
1926
|
+
"gtm_latency",
|
|
1927
|
+
"gtm_debug",
|
|
1928
|
+
// WordPress internal noise
|
|
1929
|
+
"preview",
|
|
1930
|
+
"preview_id",
|
|
1931
|
+
"preview_nonce",
|
|
1932
|
+
"_thumbnail_id",
|
|
1933
|
+
// Common cache-busters/versioning
|
|
1934
|
+
"v",
|
|
1935
|
+
"ver",
|
|
1936
|
+
"version"
|
|
1937
|
+
]);
|
|
1938
|
+
function shouldStrip(key) {
|
|
1939
|
+
if (STRIP_KEYS.has(key)) return true;
|
|
1940
|
+
if (key.startsWith("utm_")) return true;
|
|
1941
|
+
return false;
|
|
1942
|
+
}
|
|
1943
|
+
function parseQuery(query) {
|
|
1944
|
+
if (query === "") return [];
|
|
1945
|
+
return query.split("&").map((pair) => {
|
|
1946
|
+
const eq = pair.indexOf("=");
|
|
1947
|
+
if (eq === -1) return { key: pair, value: null };
|
|
1948
|
+
return { key: pair.slice(0, eq), value: pair.slice(eq + 1) };
|
|
1949
|
+
});
|
|
1950
|
+
}
|
|
1951
|
+
function encodeQuery(pairs) {
|
|
1952
|
+
return pairs.map((p) => p.value === null ? p.key : `${p.key}=${p.value}`).join("&");
|
|
1953
|
+
}
|
|
1954
|
+
function collapseRootIndex(path) {
|
|
1955
|
+
if (path === "/index.html" || path === "/index.php") return "/";
|
|
1956
|
+
return path;
|
|
1957
|
+
}
|
|
1958
|
+
function dropTrailingSlash(path) {
|
|
1959
|
+
if (path.length > 1 && path.endsWith("/")) {
|
|
1960
|
+
return path.replace(/\/+$/, "");
|
|
1961
|
+
}
|
|
1962
|
+
return path;
|
|
1963
|
+
}
|
|
1964
|
+
function absolutizeProjectUrl(url, canonicalDomain) {
|
|
1965
|
+
if (!url) return "";
|
|
1966
|
+
const trimmed = url.trim();
|
|
1967
|
+
if (!trimmed) return "";
|
|
1968
|
+
if (/^https?:\/\//i.test(trimmed)) return trimmed;
|
|
1969
|
+
if (trimmed.startsWith("//")) return `https:${trimmed}`;
|
|
1970
|
+
const host = canonicalDomain.trim().replace(/^https?:\/\//i, "").replace(/\/+$/, "");
|
|
1971
|
+
if (!host) return trimmed;
|
|
1972
|
+
if (trimmed.startsWith("/")) return `https://${host}${trimmed}`;
|
|
1973
|
+
return `https://${host}/${trimmed}`;
|
|
1974
|
+
}
|
|
1975
|
+
function hostOf(value) {
|
|
1976
|
+
if (value == null) return null;
|
|
1977
|
+
const trimmed = value.trim();
|
|
1978
|
+
if (!trimmed) return null;
|
|
1979
|
+
try {
|
|
1980
|
+
const url = trimmed.includes("://") ? new URL(trimmed) : new URL(`https://${trimmed}`);
|
|
1981
|
+
return url.hostname.replace(/^www\./, "").toLowerCase();
|
|
1982
|
+
} catch {
|
|
1983
|
+
return null;
|
|
1984
|
+
}
|
|
1985
|
+
}
|
|
1986
|
+
function normalizeUrlPath(input) {
|
|
1987
|
+
if (input == null) return null;
|
|
1988
|
+
let trimmed = input.trim();
|
|
1989
|
+
if (trimmed === "") return null;
|
|
1990
|
+
trimmed = trimmed.replace(/ /g, " ").replace(/\s+/g, " ").trim();
|
|
1991
|
+
if (trimmed === "" || trimmed === "/") return "/";
|
|
1992
|
+
if (trimmed === "(not set)") return null;
|
|
1993
|
+
trimmed = trimmed.replace(/([a-z0-9])[).]+$/i, "$1");
|
|
1994
|
+
if (trimmed.startsWith("/)") || trimmed.startsWith("/ ")) {
|
|
1995
|
+
trimmed = "/";
|
|
1996
|
+
}
|
|
1997
|
+
if (trimmed.includes(" ")) {
|
|
1998
|
+
trimmed = trimmed.split(" ")[0];
|
|
1999
|
+
}
|
|
2000
|
+
if (trimmed === "" || trimmed === "/") return "/";
|
|
2001
|
+
let pathPart;
|
|
2002
|
+
let queryPart;
|
|
2003
|
+
if (/^https?:\/\//i.test(trimmed)) {
|
|
2004
|
+
let url;
|
|
2005
|
+
try {
|
|
2006
|
+
url = new URL(trimmed);
|
|
2007
|
+
} catch {
|
|
2008
|
+
return null;
|
|
2009
|
+
}
|
|
2010
|
+
pathPart = url.pathname || "/";
|
|
2011
|
+
queryPart = url.search.startsWith("?") ? url.search.slice(1) : url.search;
|
|
2012
|
+
} else {
|
|
2013
|
+
let raw = trimmed;
|
|
2014
|
+
const hashIdx = raw.indexOf("#");
|
|
2015
|
+
if (hashIdx !== -1) raw = raw.slice(0, hashIdx);
|
|
2016
|
+
const qIdx = raw.indexOf("?");
|
|
2017
|
+
if (qIdx === -1) {
|
|
2018
|
+
pathPart = raw;
|
|
2019
|
+
queryPart = "";
|
|
2020
|
+
} else {
|
|
2021
|
+
pathPart = raw.slice(0, qIdx);
|
|
2022
|
+
queryPart = raw.slice(qIdx + 1);
|
|
2023
|
+
}
|
|
2024
|
+
}
|
|
2025
|
+
if (pathPart === "") pathPart = "/";
|
|
2026
|
+
pathPart = collapseRootIndex(pathPart);
|
|
2027
|
+
pathPart = dropTrailingSlash(pathPart);
|
|
2028
|
+
const pairs = parseQuery(queryPart).filter((p) => !shouldStrip(p.key));
|
|
2029
|
+
pairs.sort((a, b) => {
|
|
2030
|
+
if (a.key < b.key) return -1;
|
|
2031
|
+
if (a.key > b.key) return 1;
|
|
2032
|
+
return 0;
|
|
2033
|
+
});
|
|
2034
|
+
if (pairs.length === 0) return pathPart;
|
|
2035
|
+
return `${pathPart}?${encodeQuery(pairs)}`;
|
|
2036
|
+
}
|
|
2037
|
+
|
|
2038
|
+
// ../contracts/src/discovery.ts
|
|
1835
2039
|
var discoveryBucketSchema = z17.enum(["cited", "aspirational", "wasted-surface"]);
|
|
1836
2040
|
var DiscoveryBuckets = discoveryBucketSchema.enum;
|
|
1837
2041
|
var DEFAULT_DISCOVERY_PROMOTE_BUCKETS = [
|
|
@@ -1963,6 +2167,231 @@ var queryProvenanceSchema = z17.union([
|
|
|
1963
2167
|
z17.literal("cli"),
|
|
1964
2168
|
z17.string().regex(/^discovery:.+$/)
|
|
1965
2169
|
]);
|
|
2170
|
+
var DISCOVERY_HARVEST_MAX_WORDS = 12;
|
|
2171
|
+
var DISCOVERY_HARVEST_MIN_CHARS = 3;
|
|
2172
|
+
var DISCOVERY_HARVEST_NOVELTY_THRESHOLD = DISCOVERY_DEFAULT_DEDUP_THRESHOLD;
|
|
2173
|
+
var DISCOVERY_HARVEST_MIN_ANCHOR_TERMS = 1;
|
|
2174
|
+
var DISCOVERY_HARVEST_PHONE_DIGITS = 7;
|
|
2175
|
+
var HARVEST_SIGNIFICANT_TOKEN_MIN = 4;
|
|
2176
|
+
var HARVEST_STOPWORDS = /* @__PURE__ */ new Set([
|
|
2177
|
+
"the",
|
|
2178
|
+
"and",
|
|
2179
|
+
"for",
|
|
2180
|
+
"with",
|
|
2181
|
+
"near",
|
|
2182
|
+
"best",
|
|
2183
|
+
"top",
|
|
2184
|
+
"your",
|
|
2185
|
+
"you",
|
|
2186
|
+
"are",
|
|
2187
|
+
"how",
|
|
2188
|
+
"what",
|
|
2189
|
+
"does",
|
|
2190
|
+
"this",
|
|
2191
|
+
"that",
|
|
2192
|
+
"from",
|
|
2193
|
+
"into",
|
|
2194
|
+
"about",
|
|
2195
|
+
"who",
|
|
2196
|
+
"why"
|
|
2197
|
+
]);
|
|
2198
|
+
var HARVEST_NAV_MARKERS = [
|
|
2199
|
+
"address",
|
|
2200
|
+
"directions",
|
|
2201
|
+
"hours",
|
|
2202
|
+
"login",
|
|
2203
|
+
"log in",
|
|
2204
|
+
"sign in",
|
|
2205
|
+
"signin",
|
|
2206
|
+
"phone number",
|
|
2207
|
+
"zip code",
|
|
2208
|
+
"postal code",
|
|
2209
|
+
"email address"
|
|
2210
|
+
];
|
|
2211
|
+
var discoveryHarvestCandidateSchema = z17.object({
|
|
2212
|
+
query: z17.string().min(1),
|
|
2213
|
+
probeHits: z17.number().int().positive()
|
|
2214
|
+
});
|
|
2215
|
+
var discoveryHarvestStatsSchema = z17.object({
|
|
2216
|
+
/** Distinct candidates extracted before gating. */
|
|
2217
|
+
rawCandidates: z17.number().int().nonnegative(),
|
|
2218
|
+
/** Candidates that passed every gate. */
|
|
2219
|
+
admitted: z17.number().int().nonnegative(),
|
|
2220
|
+
/** Per-reason rejection tally (each rejected candidate counted exactly once,
|
|
2221
|
+
* at the first gate it failed). Lexical-gate order: belowFloor → length →
|
|
2222
|
+
* navigational → duplicate (EXACT already-tracked) → offAnchor; then
|
|
2223
|
+
* `semanticDuplicate` for candidates dropped by the cosine novelty pass.
|
|
2224
|
+
* Invariant: `admitted + Σ(rejected) === rawCandidates`. */
|
|
2225
|
+
rejected: z17.object({
|
|
2226
|
+
belowFloor: z17.number().int().nonnegative(),
|
|
2227
|
+
length: z17.number().int().nonnegative(),
|
|
2228
|
+
navigational: z17.number().int().nonnegative(),
|
|
2229
|
+
/** Dropped by the cheap exact-match check against the tracked basket. */
|
|
2230
|
+
duplicate: z17.number().int().nonnegative(),
|
|
2231
|
+
offAnchor: z17.number().int().nonnegative(),
|
|
2232
|
+
/** Dropped by the embedding cosine novelty pass (a paraphrase / synonym /
|
|
2233
|
+
* stem variant of a tracked query that exact match can't see). 0 when the
|
|
2234
|
+
* semantic pass did not run — see `semanticNoveltyApplied`. */
|
|
2235
|
+
semanticDuplicate: z17.number().int().nonnegative()
|
|
2236
|
+
})
|
|
2237
|
+
});
|
|
2238
|
+
var discoveryHarvestDtoSchema = z17.object({
|
|
2239
|
+
sessionId: z17.string(),
|
|
2240
|
+
projectId: z17.string(),
|
|
2241
|
+
/** The provider whose probes were harvested (the session's seed provider).
|
|
2242
|
+
* Discovery is Gemini-only today; carried so a future multi-provider
|
|
2243
|
+
* discovery can attribute candidates. */
|
|
2244
|
+
provider: z17.string(),
|
|
2245
|
+
status: discoverySessionStatusSchema,
|
|
2246
|
+
/** Recurrence floor applied: a candidate must have appeared in ≥ this many
|
|
2247
|
+
* distinct probes to be admitted. */
|
|
2248
|
+
minProbeHits: z17.number().int().positive(),
|
|
2249
|
+
/** Whether the subject-anchor filter actually ran (requested AND the corpus
|
|
2250
|
+
* had ≥ `DISCOVERY_HARVEST_MIN_ANCHOR_TERMS` significant terms). */
|
|
2251
|
+
anchorApplied: z17.boolean(),
|
|
2252
|
+
/** Whether the embedding cosine novelty pass ran. False when embeddings were
|
|
2253
|
+
* unavailable (no Gemini key / no tracked queries / no candidates), in which
|
|
2254
|
+
* case novelty fell back to the cheap exact-match check only. */
|
|
2255
|
+
semanticNoveltyApplied: z17.boolean(),
|
|
2256
|
+
candidates: z17.array(discoveryHarvestCandidateSchema),
|
|
2257
|
+
stats: discoveryHarvestStatsSchema
|
|
2258
|
+
});
|
|
2259
|
+
function normalizeHarvestQuery(query) {
|
|
2260
|
+
return query.trim().replace(/\s+/g, " ").toLowerCase();
|
|
2261
|
+
}
|
|
2262
|
+
function harvestTokens(query) {
|
|
2263
|
+
return normalizeHarvestQuery(query).split(/[^a-z0-9]+/).filter(Boolean);
|
|
2264
|
+
}
|
|
2265
|
+
function significantHarvestTokens(query) {
|
|
2266
|
+
return harvestTokens(query).filter(
|
|
2267
|
+
(t) => t.length >= HARVEST_SIGNIFICANT_TOKEN_MIN && !HARVEST_STOPWORDS.has(t)
|
|
2268
|
+
);
|
|
2269
|
+
}
|
|
2270
|
+
function longestDigitRun(query) {
|
|
2271
|
+
let max = 0;
|
|
2272
|
+
let run = 0;
|
|
2273
|
+
for (const ch of query) {
|
|
2274
|
+
if (ch >= "0" && ch <= "9") {
|
|
2275
|
+
run++;
|
|
2276
|
+
if (run > max) max = run;
|
|
2277
|
+
} else {
|
|
2278
|
+
run = 0;
|
|
2279
|
+
}
|
|
2280
|
+
}
|
|
2281
|
+
return max;
|
|
2282
|
+
}
|
|
2283
|
+
function isNavigationalHarvestQuery(query) {
|
|
2284
|
+
const norm = normalizeHarvestQuery(query);
|
|
2285
|
+
if (longestDigitRun(norm) >= DISCOVERY_HARVEST_PHONE_DIGITS) return true;
|
|
2286
|
+
const tokens = new Set(harvestTokens(norm));
|
|
2287
|
+
for (const marker of HARVEST_NAV_MARKERS) {
|
|
2288
|
+
if (marker.includes(" ")) {
|
|
2289
|
+
const re = new RegExp(`(?<![a-z0-9])${marker.replace(/ /g, "\\s+")}(?![a-z0-9])`);
|
|
2290
|
+
if (re.test(norm)) return true;
|
|
2291
|
+
} else if (tokens.has(marker)) {
|
|
2292
|
+
return true;
|
|
2293
|
+
}
|
|
2294
|
+
}
|
|
2295
|
+
return false;
|
|
2296
|
+
}
|
|
2297
|
+
function buildHarvestAnchorTerms(corpus, domains = []) {
|
|
2298
|
+
const set = /* @__PURE__ */ new Set();
|
|
2299
|
+
for (const text of corpus) {
|
|
2300
|
+
for (const token of significantHarvestTokens(text)) set.add(token);
|
|
2301
|
+
}
|
|
2302
|
+
for (const domain of domains) {
|
|
2303
|
+
const host = hostOf(domain);
|
|
2304
|
+
if (!host) continue;
|
|
2305
|
+
const label = host.replace(/\.[a-z0-9]+$/, "");
|
|
2306
|
+
for (const token of significantHarvestTokens(label)) set.add(token);
|
|
2307
|
+
}
|
|
2308
|
+
return [...set];
|
|
2309
|
+
}
|
|
2310
|
+
function gateHarvestedSearchQueries(input) {
|
|
2311
|
+
const minProbeHits = Math.max(1, Math.floor(input.minProbeHits ?? 1));
|
|
2312
|
+
const anchorTermSet = new Set(input.anchorTerms ?? []);
|
|
2313
|
+
const applyAnchor = (input.applyAnchor ?? true) && anchorTermSet.size >= DISCOVERY_HARVEST_MIN_ANCHOR_TERMS;
|
|
2314
|
+
const trackedNorm = new Set(
|
|
2315
|
+
input.trackedQueries.map(normalizeHarvestQuery).filter(Boolean)
|
|
2316
|
+
);
|
|
2317
|
+
const stats = {
|
|
2318
|
+
rawCandidates: input.candidates.length,
|
|
2319
|
+
admitted: 0,
|
|
2320
|
+
rejected: { belowFloor: 0, length: 0, navigational: 0, duplicate: 0, offAnchor: 0, semanticDuplicate: 0 }
|
|
2321
|
+
};
|
|
2322
|
+
const admitted = [];
|
|
2323
|
+
for (const candidate of input.candidates) {
|
|
2324
|
+
if (candidate.probeHits < minProbeHits) {
|
|
2325
|
+
stats.rejected.belowFloor++;
|
|
2326
|
+
continue;
|
|
2327
|
+
}
|
|
2328
|
+
const norm = normalizeHarvestQuery(candidate.query);
|
|
2329
|
+
const words = norm ? norm.split(" ").length : 0;
|
|
2330
|
+
if (norm.length < DISCOVERY_HARVEST_MIN_CHARS || words > DISCOVERY_HARVEST_MAX_WORDS) {
|
|
2331
|
+
stats.rejected.length++;
|
|
2332
|
+
continue;
|
|
2333
|
+
}
|
|
2334
|
+
if (isNavigationalHarvestQuery(norm)) {
|
|
2335
|
+
stats.rejected.navigational++;
|
|
2336
|
+
continue;
|
|
2337
|
+
}
|
|
2338
|
+
if (trackedNorm.has(norm)) {
|
|
2339
|
+
stats.rejected.duplicate++;
|
|
2340
|
+
continue;
|
|
2341
|
+
}
|
|
2342
|
+
if (applyAnchor) {
|
|
2343
|
+
const sig = significantHarvestTokens(norm);
|
|
2344
|
+
if (!sig.some((t) => anchorTermSet.has(t))) {
|
|
2345
|
+
stats.rejected.offAnchor++;
|
|
2346
|
+
continue;
|
|
2347
|
+
}
|
|
2348
|
+
}
|
|
2349
|
+
admitted.push({ query: norm, probeHits: candidate.probeHits });
|
|
2350
|
+
stats.admitted++;
|
|
2351
|
+
}
|
|
2352
|
+
admitted.sort((a, b) => b.probeHits - a.probeHits || a.query.localeCompare(b.query));
|
|
2353
|
+
return { admitted, anchorApplied: applyAnchor, stats };
|
|
2354
|
+
}
|
|
2355
|
+
function applyHarvestSemanticNovelty(input) {
|
|
2356
|
+
const { result, candidateVectors, trackedVectors } = input;
|
|
2357
|
+
const threshold = input.threshold ?? DISCOVERY_HARVEST_NOVELTY_THRESHOLD;
|
|
2358
|
+
if (candidateVectors.length !== result.admitted.length || trackedVectors.length === 0) {
|
|
2359
|
+
return result;
|
|
2360
|
+
}
|
|
2361
|
+
const admitted = [];
|
|
2362
|
+
let semanticDuplicate = 0;
|
|
2363
|
+
for (let i = 0; i < result.admitted.length; i++) {
|
|
2364
|
+
const vec = candidateVectors[i];
|
|
2365
|
+
const isDup = trackedVectors.some((t) => cosineSimilarity(vec, t) >= threshold);
|
|
2366
|
+
if (isDup) semanticDuplicate++;
|
|
2367
|
+
else admitted.push(result.admitted[i]);
|
|
2368
|
+
}
|
|
2369
|
+
return {
|
|
2370
|
+
admitted,
|
|
2371
|
+
anchorApplied: result.anchorApplied,
|
|
2372
|
+
stats: {
|
|
2373
|
+
...result.stats,
|
|
2374
|
+
admitted: admitted.length,
|
|
2375
|
+
rejected: { ...result.stats.rejected, semanticDuplicate }
|
|
2376
|
+
}
|
|
2377
|
+
};
|
|
2378
|
+
}
|
|
2379
|
+
function aggregateHarvestedQueries(probes) {
|
|
2380
|
+
const counts = /* @__PURE__ */ new Map();
|
|
2381
|
+
for (const probe of probes) {
|
|
2382
|
+
const seenInProbe = /* @__PURE__ */ new Set();
|
|
2383
|
+
for (const raw of probe.searchQueries) {
|
|
2384
|
+
if (typeof raw !== "string") continue;
|
|
2385
|
+
const norm = normalizeHarvestQuery(raw);
|
|
2386
|
+
if (!norm || seenInProbe.has(norm)) continue;
|
|
2387
|
+
seenInProbe.add(norm);
|
|
2388
|
+
const existing = counts.get(norm);
|
|
2389
|
+
if (existing) existing.probeHits++;
|
|
2390
|
+
else counts.set(norm, { query: norm, probeHits: 1 });
|
|
2391
|
+
}
|
|
2392
|
+
}
|
|
2393
|
+
return [...counts.values()];
|
|
2394
|
+
}
|
|
1966
2395
|
|
|
1967
2396
|
// ../contracts/src/surface-class.ts
|
|
1968
2397
|
var surfaceClassSchema = z18.enum([
|
|
@@ -2734,6 +3163,196 @@ var ccCachedReleaseSchema = z23.object({
|
|
|
2734
3163
|
|
|
2735
3164
|
// ../contracts/src/composites.ts
|
|
2736
3165
|
import { z as z24 } from "zod";
|
|
3166
|
+
var metricToneSchema = z24.enum(["positive", "caution", "negative", "neutral"]);
|
|
3167
|
+
var scoreSummarySchema = z24.object({
|
|
3168
|
+
label: z24.string(),
|
|
3169
|
+
value: z24.string(),
|
|
3170
|
+
delta: z24.string(),
|
|
3171
|
+
tone: metricToneSchema,
|
|
3172
|
+
description: z24.string(),
|
|
3173
|
+
tooltip: z24.string().optional(),
|
|
3174
|
+
trend: z24.array(z24.number()),
|
|
3175
|
+
progress: z24.number().optional(),
|
|
3176
|
+
providerCoverage: z24.string().optional()
|
|
3177
|
+
});
|
|
3178
|
+
var mentionShareSchema = scoreSummarySchema.extend({
|
|
3179
|
+
breakdown: z24.object({
|
|
3180
|
+
projectMentionSnapshots: z24.number().int().nonnegative(),
|
|
3181
|
+
competitorMentionSnapshots: z24.number().int().nonnegative(),
|
|
3182
|
+
perCompetitor: z24.array(z24.object({
|
|
3183
|
+
domain: z24.string(),
|
|
3184
|
+
mentionSnapshots: z24.number().int().nonnegative(),
|
|
3185
|
+
shareOfCompetitiveTotal: z24.number()
|
|
3186
|
+
})),
|
|
3187
|
+
snapshotsWithAnswerText: z24.number().int().nonnegative(),
|
|
3188
|
+
snapshotsTotal: z24.number().int().nonnegative()
|
|
3189
|
+
})
|
|
3190
|
+
});
|
|
3191
|
+
var movementSummarySchema = z24.object({
|
|
3192
|
+
gained: z24.number().int().nonnegative(),
|
|
3193
|
+
lost: z24.number().int().nonnegative(),
|
|
3194
|
+
tone: metricToneSchema,
|
|
3195
|
+
hasPreviousRun: z24.boolean(),
|
|
3196
|
+
gainedQueries: z24.array(z24.string()).optional(),
|
|
3197
|
+
lostQueries: z24.array(z24.string()).optional()
|
|
3198
|
+
});
|
|
3199
|
+
var movementComparisonSchema = z24.object({
|
|
3200
|
+
hasPreviousRun: z24.boolean(),
|
|
3201
|
+
comparable: z24.boolean(),
|
|
3202
|
+
querySetChanged: z24.boolean(),
|
|
3203
|
+
previousRunAt: z24.string().nullable(),
|
|
3204
|
+
currentQueryCount: z24.number().int().nonnegative(),
|
|
3205
|
+
previousQueryCount: z24.number().int().nonnegative(),
|
|
3206
|
+
comparableQueryCount: z24.number().int().nonnegative(),
|
|
3207
|
+
addedQueryCount: z24.number().int().nonnegative(),
|
|
3208
|
+
removedQueryCount: z24.number().int().nonnegative(),
|
|
3209
|
+
addedQueries: z24.array(z24.string()),
|
|
3210
|
+
removedQueries: z24.array(z24.string())
|
|
3211
|
+
});
|
|
3212
|
+
var projectOverviewInsightSchema = z24.object({
|
|
3213
|
+
id: z24.string(),
|
|
3214
|
+
projectId: z24.string(),
|
|
3215
|
+
runId: z24.string().nullable(),
|
|
3216
|
+
type: z24.enum([
|
|
3217
|
+
"regression",
|
|
3218
|
+
"gain",
|
|
3219
|
+
"opportunity",
|
|
3220
|
+
"first-citation",
|
|
3221
|
+
"provider-pickup",
|
|
3222
|
+
"persistent-gap",
|
|
3223
|
+
"competitor-gained",
|
|
3224
|
+
"competitor-lost",
|
|
3225
|
+
"gbp-lodging-gap",
|
|
3226
|
+
"gbp-listing-discrepancy",
|
|
3227
|
+
"gbp-cta-gap",
|
|
3228
|
+
"gbp-metric-drop",
|
|
3229
|
+
"gbp-keyword-drop"
|
|
3230
|
+
]),
|
|
3231
|
+
severity: z24.enum(["critical", "high", "medium", "low"]),
|
|
3232
|
+
title: z24.string(),
|
|
3233
|
+
query: z24.string(),
|
|
3234
|
+
provider: z24.string(),
|
|
3235
|
+
recommendation: z24.object({
|
|
3236
|
+
action: z24.string(),
|
|
3237
|
+
target: z24.string().optional(),
|
|
3238
|
+
reason: z24.string()
|
|
3239
|
+
}).optional(),
|
|
3240
|
+
cause: z24.object({
|
|
3241
|
+
cause: z24.string(),
|
|
3242
|
+
competitorDomain: z24.string().optional(),
|
|
3243
|
+
details: z24.string().optional()
|
|
3244
|
+
}).optional(),
|
|
3245
|
+
dismissed: z24.boolean(),
|
|
3246
|
+
createdAt: z24.string()
|
|
3247
|
+
});
|
|
3248
|
+
var projectOverviewHealthSchema = z24.object({
|
|
3249
|
+
id: z24.string(),
|
|
3250
|
+
projectId: z24.string(),
|
|
3251
|
+
runId: z24.string().nullable(),
|
|
3252
|
+
overallCitedRate: z24.number(),
|
|
3253
|
+
overallMentionRate: z24.number(),
|
|
3254
|
+
totalPairs: z24.number().int().nonnegative(),
|
|
3255
|
+
citedPairs: z24.number().int().nonnegative(),
|
|
3256
|
+
mentionedPairs: z24.number().int().nonnegative(),
|
|
3257
|
+
providerBreakdown: z24.record(z24.string(), z24.object({
|
|
3258
|
+
citedRate: z24.number(),
|
|
3259
|
+
mentionRate: z24.number(),
|
|
3260
|
+
cited: z24.number().int().nonnegative(),
|
|
3261
|
+
mentioned: z24.number().int().nonnegative(),
|
|
3262
|
+
total: z24.number().int().nonnegative()
|
|
3263
|
+
})),
|
|
3264
|
+
createdAt: z24.string(),
|
|
3265
|
+
status: z24.enum(["ready", "no-data"]),
|
|
3266
|
+
reason: z24.literal("no-runs-yet").optional()
|
|
3267
|
+
});
|
|
3268
|
+
var projectOverviewDtoSchema = z24.object({
|
|
3269
|
+
project: projectDtoSchema,
|
|
3270
|
+
latestRun: latestProjectRunDtoSchema,
|
|
3271
|
+
health: projectOverviewHealthSchema.nullable(),
|
|
3272
|
+
topInsights: z24.array(projectOverviewInsightSchema),
|
|
3273
|
+
queryCounts: z24.object({
|
|
3274
|
+
totalQueries: z24.number().int().nonnegative(),
|
|
3275
|
+
citedQueries: z24.number().int().nonnegative(),
|
|
3276
|
+
notCitedQueries: z24.number().int().nonnegative(),
|
|
3277
|
+
citedRate: z24.number(),
|
|
3278
|
+
mentionedQueries: z24.number().int().nonnegative(),
|
|
3279
|
+
notMentionedQueries: z24.number().int().nonnegative(),
|
|
3280
|
+
mentionRate: z24.number()
|
|
3281
|
+
}),
|
|
3282
|
+
providers: z24.array(z24.object({
|
|
3283
|
+
provider: z24.string(),
|
|
3284
|
+
citedRate: z24.number(),
|
|
3285
|
+
cited: z24.number().int().nonnegative(),
|
|
3286
|
+
total: z24.number().int().nonnegative()
|
|
3287
|
+
})),
|
|
3288
|
+
transitions: z24.object({
|
|
3289
|
+
since: z24.string().nullable(),
|
|
3290
|
+
gained: z24.number().int().nonnegative(),
|
|
3291
|
+
lost: z24.number().int().nonnegative(),
|
|
3292
|
+
emerging: z24.number().int().nonnegative()
|
|
3293
|
+
}),
|
|
3294
|
+
scores: z24.object({
|
|
3295
|
+
mention: scoreSummarySchema,
|
|
3296
|
+
visibility: scoreSummarySchema,
|
|
3297
|
+
mentionShare: mentionShareSchema,
|
|
3298
|
+
gapQueries: scoreSummarySchema,
|
|
3299
|
+
mentionGaps: scoreSummarySchema,
|
|
3300
|
+
indexCoverage: scoreSummarySchema,
|
|
3301
|
+
competitorPressure: scoreSummarySchema,
|
|
3302
|
+
runStatus: scoreSummarySchema
|
|
3303
|
+
}),
|
|
3304
|
+
movementSummary: movementSummarySchema,
|
|
3305
|
+
citationMovement: movementSummarySchema,
|
|
3306
|
+
mentionMovement: movementSummarySchema,
|
|
3307
|
+
movementComparison: movementComparisonSchema,
|
|
3308
|
+
competitors: z24.array(z24.object({
|
|
3309
|
+
id: z24.string(),
|
|
3310
|
+
domain: z24.string(),
|
|
3311
|
+
citationCount: z24.number().int().nonnegative(),
|
|
3312
|
+
totalQueries: z24.number().int().nonnegative(),
|
|
3313
|
+
pressureLabel: z24.enum(["None", "Low", "Moderate", "High"]),
|
|
3314
|
+
citedQueries: z24.array(z24.string())
|
|
3315
|
+
})),
|
|
3316
|
+
providerScores: z24.array(z24.object({
|
|
3317
|
+
provider: z24.string(),
|
|
3318
|
+
model: z24.string().nullable(),
|
|
3319
|
+
score: z24.number(),
|
|
3320
|
+
cited: z24.number().int().nonnegative(),
|
|
3321
|
+
total: z24.number().int().nonnegative(),
|
|
3322
|
+
trend: z24.array(z24.number()).optional()
|
|
3323
|
+
})),
|
|
3324
|
+
attentionItems: z24.array(z24.object({
|
|
3325
|
+
id: z24.string(),
|
|
3326
|
+
tone: metricToneSchema,
|
|
3327
|
+
title: z24.string(),
|
|
3328
|
+
detail: z24.string(),
|
|
3329
|
+
actionLabel: z24.string(),
|
|
3330
|
+
href: z24.string()
|
|
3331
|
+
})),
|
|
3332
|
+
runHistory: z24.array(z24.object({
|
|
3333
|
+
runId: z24.string(),
|
|
3334
|
+
createdAt: z24.string(),
|
|
3335
|
+
citedCount: z24.number().int().nonnegative(),
|
|
3336
|
+
totalCount: z24.number().int().nonnegative(),
|
|
3337
|
+
citationRate: z24.number(),
|
|
3338
|
+
mentionedCount: z24.number().int().nonnegative(),
|
|
3339
|
+
mentionRate: z24.number(),
|
|
3340
|
+
status: z24.string()
|
|
3341
|
+
})),
|
|
3342
|
+
suggestedQueries: z24.object({
|
|
3343
|
+
rows: z24.array(z24.object({
|
|
3344
|
+
query: z24.string(),
|
|
3345
|
+
impressions: z24.number(),
|
|
3346
|
+
clicks: z24.number(),
|
|
3347
|
+
avgPosition: z24.number(),
|
|
3348
|
+
reason: z24.string()
|
|
3349
|
+
})),
|
|
3350
|
+
totalCandidates: z24.number().int().nonnegative(),
|
|
3351
|
+
skippedAlreadyTracked: z24.number().int().nonnegative()
|
|
3352
|
+
}),
|
|
3353
|
+
dateRangeLabel: z24.string(),
|
|
3354
|
+
contextLabel: z24.string()
|
|
3355
|
+
});
|
|
2737
3356
|
var searchHitKindSchema = z24.enum(["snapshot", "insight"]);
|
|
2738
3357
|
var projectSearchSnapshotHitSchema = z24.object({
|
|
2739
3358
|
kind: z24.literal("snapshot"),
|
|
@@ -3104,137 +3723,9 @@ function summarizeCheckResults(results) {
|
|
|
3104
3723
|
return summary;
|
|
3105
3724
|
}
|
|
3106
3725
|
|
|
3107
|
-
// ../contracts/src/
|
|
3108
|
-
|
|
3109
|
-
|
|
3110
|
-
"fbclid",
|
|
3111
|
-
"gclid",
|
|
3112
|
-
"msclkid",
|
|
3113
|
-
"ttclid",
|
|
3114
|
-
"li_fat_id",
|
|
3115
|
-
"igshid",
|
|
3116
|
-
"yclid",
|
|
3117
|
-
"dclid",
|
|
3118
|
-
"gbraid",
|
|
3119
|
-
"wbraid",
|
|
3120
|
-
"bingid",
|
|
3121
|
-
// Mailchimp
|
|
3122
|
-
"mc_cid",
|
|
3123
|
-
"mc_eid",
|
|
3124
|
-
// Google Analytics linkers
|
|
3125
|
-
"_ga",
|
|
3126
|
-
"_gl",
|
|
3127
|
-
// Google Tag Manager debug
|
|
3128
|
-
"gtm_latency",
|
|
3129
|
-
"gtm_debug",
|
|
3130
|
-
// WordPress internal noise
|
|
3131
|
-
"preview",
|
|
3132
|
-
"preview_id",
|
|
3133
|
-
"preview_nonce",
|
|
3134
|
-
"_thumbnail_id",
|
|
3135
|
-
// Common cache-busters/versioning
|
|
3136
|
-
"v",
|
|
3137
|
-
"ver",
|
|
3138
|
-
"version"
|
|
3139
|
-
]);
|
|
3140
|
-
function shouldStrip(key) {
|
|
3141
|
-
if (STRIP_KEYS.has(key)) return true;
|
|
3142
|
-
if (key.startsWith("utm_")) return true;
|
|
3143
|
-
return false;
|
|
3144
|
-
}
|
|
3145
|
-
function parseQuery(query) {
|
|
3146
|
-
if (query === "") return [];
|
|
3147
|
-
return query.split("&").map((pair) => {
|
|
3148
|
-
const eq = pair.indexOf("=");
|
|
3149
|
-
if (eq === -1) return { key: pair, value: null };
|
|
3150
|
-
return { key: pair.slice(0, eq), value: pair.slice(eq + 1) };
|
|
3151
|
-
});
|
|
3152
|
-
}
|
|
3153
|
-
function encodeQuery(pairs) {
|
|
3154
|
-
return pairs.map((p) => p.value === null ? p.key : `${p.key}=${p.value}`).join("&");
|
|
3155
|
-
}
|
|
3156
|
-
function collapseRootIndex(path) {
|
|
3157
|
-
if (path === "/index.html" || path === "/index.php") return "/";
|
|
3158
|
-
return path;
|
|
3159
|
-
}
|
|
3160
|
-
function dropTrailingSlash(path) {
|
|
3161
|
-
if (path.length > 1 && path.endsWith("/")) {
|
|
3162
|
-
return path.replace(/\/+$/, "");
|
|
3163
|
-
}
|
|
3164
|
-
return path;
|
|
3165
|
-
}
|
|
3166
|
-
function absolutizeProjectUrl(url, canonicalDomain) {
|
|
3167
|
-
if (!url) return "";
|
|
3168
|
-
const trimmed = url.trim();
|
|
3169
|
-
if (!trimmed) return "";
|
|
3170
|
-
if (/^https?:\/\//i.test(trimmed)) return trimmed;
|
|
3171
|
-
if (trimmed.startsWith("//")) return `https:${trimmed}`;
|
|
3172
|
-
const host = canonicalDomain.trim().replace(/^https?:\/\//i, "").replace(/\/+$/, "");
|
|
3173
|
-
if (!host) return trimmed;
|
|
3174
|
-
if (trimmed.startsWith("/")) return `https://${host}${trimmed}`;
|
|
3175
|
-
return `https://${host}/${trimmed}`;
|
|
3176
|
-
}
|
|
3177
|
-
function hostOf(value) {
|
|
3178
|
-
if (value == null) return null;
|
|
3179
|
-
const trimmed = value.trim();
|
|
3180
|
-
if (!trimmed) return null;
|
|
3181
|
-
try {
|
|
3182
|
-
const url = trimmed.includes("://") ? new URL(trimmed) : new URL(`https://${trimmed}`);
|
|
3183
|
-
return url.hostname.replace(/^www\./, "").toLowerCase();
|
|
3184
|
-
} catch {
|
|
3185
|
-
return null;
|
|
3186
|
-
}
|
|
3187
|
-
}
|
|
3188
|
-
function normalizeUrlPath(input) {
|
|
3189
|
-
if (input == null) return null;
|
|
3190
|
-
let trimmed = input.trim();
|
|
3191
|
-
if (trimmed === "") return null;
|
|
3192
|
-
trimmed = trimmed.replace(/ /g, " ").replace(/\s+/g, " ").trim();
|
|
3193
|
-
if (trimmed === "" || trimmed === "/") return "/";
|
|
3194
|
-
if (trimmed === "(not set)") return null;
|
|
3195
|
-
trimmed = trimmed.replace(/([a-z0-9])[).]+$/i, "$1");
|
|
3196
|
-
if (trimmed.startsWith("/)") || trimmed.startsWith("/ ")) {
|
|
3197
|
-
trimmed = "/";
|
|
3198
|
-
}
|
|
3199
|
-
if (trimmed.includes(" ")) {
|
|
3200
|
-
trimmed = trimmed.split(" ")[0];
|
|
3201
|
-
}
|
|
3202
|
-
if (trimmed === "" || trimmed === "/") return "/";
|
|
3203
|
-
let pathPart;
|
|
3204
|
-
let queryPart;
|
|
3205
|
-
if (/^https?:\/\//i.test(trimmed)) {
|
|
3206
|
-
let url;
|
|
3207
|
-
try {
|
|
3208
|
-
url = new URL(trimmed);
|
|
3209
|
-
} catch {
|
|
3210
|
-
return null;
|
|
3211
|
-
}
|
|
3212
|
-
pathPart = url.pathname || "/";
|
|
3213
|
-
queryPart = url.search.startsWith("?") ? url.search.slice(1) : url.search;
|
|
3214
|
-
} else {
|
|
3215
|
-
let raw = trimmed;
|
|
3216
|
-
const hashIdx = raw.indexOf("#");
|
|
3217
|
-
if (hashIdx !== -1) raw = raw.slice(0, hashIdx);
|
|
3218
|
-
const qIdx = raw.indexOf("?");
|
|
3219
|
-
if (qIdx === -1) {
|
|
3220
|
-
pathPart = raw;
|
|
3221
|
-
queryPart = "";
|
|
3222
|
-
} else {
|
|
3223
|
-
pathPart = raw.slice(0, qIdx);
|
|
3224
|
-
queryPart = raw.slice(qIdx + 1);
|
|
3225
|
-
}
|
|
3226
|
-
}
|
|
3227
|
-
if (pathPart === "") pathPart = "/";
|
|
3228
|
-
pathPart = collapseRootIndex(pathPart);
|
|
3229
|
-
pathPart = dropTrailingSlash(pathPart);
|
|
3230
|
-
const pairs = parseQuery(queryPart).filter((p) => !shouldStrip(p.key));
|
|
3231
|
-
pairs.sort((a, b) => {
|
|
3232
|
-
if (a.key < b.key) return -1;
|
|
3233
|
-
if (a.key > b.key) return 1;
|
|
3234
|
-
return 0;
|
|
3235
|
-
});
|
|
3236
|
-
if (pairs.length === 0) return pathPart;
|
|
3237
|
-
return `${pathPart}?${encodeQuery(pairs)}`;
|
|
3726
|
+
// ../contracts/src/query-normalize.ts
|
|
3727
|
+
function normalizeQueryText(value) {
|
|
3728
|
+
return value.trim().toLowerCase();
|
|
3238
3729
|
}
|
|
3239
3730
|
|
|
3240
3731
|
// ../contracts/src/citations.ts
|
|
@@ -4293,75 +4784,6 @@ var trafficEventsResponseSchema = z30.object({
|
|
|
4293
4784
|
events: z30.array(trafficEventEntrySchema)
|
|
4294
4785
|
});
|
|
4295
4786
|
|
|
4296
|
-
// ../contracts/src/embeddings.ts
|
|
4297
|
-
function cosineSimilarity(a, b) {
|
|
4298
|
-
if (a.length === 0 || b.length === 0) {
|
|
4299
|
-
throw new Error("cosineSimilarity: vectors must be non-empty");
|
|
4300
|
-
}
|
|
4301
|
-
if (a.length !== b.length) {
|
|
4302
|
-
throw new Error(`cosineSimilarity: vector length mismatch (${a.length} vs ${b.length})`);
|
|
4303
|
-
}
|
|
4304
|
-
let dot = 0;
|
|
4305
|
-
let magA = 0;
|
|
4306
|
-
let magB = 0;
|
|
4307
|
-
for (let i = 0; i < a.length; i++) {
|
|
4308
|
-
dot += a[i] * b[i];
|
|
4309
|
-
magA += a[i] * a[i];
|
|
4310
|
-
magB += b[i] * b[i];
|
|
4311
|
-
}
|
|
4312
|
-
if (magA === 0 || magB === 0) return 0;
|
|
4313
|
-
return dot / (Math.sqrt(magA) * Math.sqrt(magB));
|
|
4314
|
-
}
|
|
4315
|
-
function clusterByCosine(items, vectors, threshold) {
|
|
4316
|
-
if (threshold < 0 || threshold > 1) {
|
|
4317
|
-
throw new Error(`clusterByCosine: threshold must be in [0, 1], got ${threshold}`);
|
|
4318
|
-
}
|
|
4319
|
-
if (items.length !== vectors.length) {
|
|
4320
|
-
throw new Error(`clusterByCosine: items/vectors length mismatch (${items.length} vs ${vectors.length})`);
|
|
4321
|
-
}
|
|
4322
|
-
if (items.length === 0) return [];
|
|
4323
|
-
const parent = items.map((_, i) => i);
|
|
4324
|
-
const find = (x) => {
|
|
4325
|
-
let root = x;
|
|
4326
|
-
while (parent[root] !== root) root = parent[root];
|
|
4327
|
-
let cur = x;
|
|
4328
|
-
while (parent[cur] !== root) {
|
|
4329
|
-
const next = parent[cur];
|
|
4330
|
-
parent[cur] = root;
|
|
4331
|
-
cur = next;
|
|
4332
|
-
}
|
|
4333
|
-
return root;
|
|
4334
|
-
};
|
|
4335
|
-
const union = (a, b) => {
|
|
4336
|
-
const ra = find(a);
|
|
4337
|
-
const rb = find(b);
|
|
4338
|
-
if (ra !== rb) parent[ra] = rb;
|
|
4339
|
-
};
|
|
4340
|
-
for (let i = 0; i < items.length; i++) {
|
|
4341
|
-
for (let j = i + 1; j < items.length; j++) {
|
|
4342
|
-
if (cosineSimilarity(vectors[i], vectors[j]) >= threshold) {
|
|
4343
|
-
union(i, j);
|
|
4344
|
-
}
|
|
4345
|
-
}
|
|
4346
|
-
}
|
|
4347
|
-
const byRoot = /* @__PURE__ */ new Map();
|
|
4348
|
-
for (let i = 0; i < items.length; i++) {
|
|
4349
|
-
const root = find(i);
|
|
4350
|
-
const existing = byRoot.get(root);
|
|
4351
|
-
if (existing) existing.push(i);
|
|
4352
|
-
else byRoot.set(root, [i]);
|
|
4353
|
-
}
|
|
4354
|
-
return Array.from(byRoot.values()).map((indices) => indices.map((idx) => items[idx]));
|
|
4355
|
-
}
|
|
4356
|
-
function pickClusterRepresentative(cluster) {
|
|
4357
|
-
if (cluster.length === 0) throw new Error("pickClusterRepresentative: cluster is empty");
|
|
4358
|
-
let best = cluster[0];
|
|
4359
|
-
for (let i = 1; i < cluster.length; i++) {
|
|
4360
|
-
if (cluster[i].length < best.length) best = cluster[i];
|
|
4361
|
-
}
|
|
4362
|
-
return best;
|
|
4363
|
-
}
|
|
4364
|
-
|
|
4365
4787
|
// ../contracts/src/formatting.ts
|
|
4366
4788
|
function formatRatio(value) {
|
|
4367
4789
|
if (!Number.isFinite(value) || value === 0) return "0%";
|
|
@@ -4736,6 +5158,11 @@ export {
|
|
|
4736
5158
|
categorizeSource,
|
|
4737
5159
|
categorizeSourceWithCompetitors,
|
|
4738
5160
|
categoryLabel,
|
|
5161
|
+
clusterByCosine,
|
|
5162
|
+
pickClusterRepresentative,
|
|
5163
|
+
absolutizeProjectUrl,
|
|
5164
|
+
hostOf,
|
|
5165
|
+
normalizeUrlPath,
|
|
4739
5166
|
discoveryBucketSchema,
|
|
4740
5167
|
DiscoveryBuckets,
|
|
4741
5168
|
DEFAULT_DISCOVERY_PROMOTE_BUCKETS,
|
|
@@ -4754,6 +5181,11 @@ export {
|
|
|
4754
5181
|
discoveryPromoteRequestSchema,
|
|
4755
5182
|
discoveryPromotePreviewSchema,
|
|
4756
5183
|
discoveryPromoteResultSchema,
|
|
5184
|
+
discoveryHarvestDtoSchema,
|
|
5185
|
+
buildHarvestAnchorTerms,
|
|
5186
|
+
gateHarvestedSearchQueries,
|
|
5187
|
+
applyHarvestSemanticNovelty,
|
|
5188
|
+
aggregateHarvestedQueries,
|
|
4757
5189
|
surfaceClassLabel,
|
|
4758
5190
|
surfaceClassFromCompetitorType,
|
|
4759
5191
|
classifySurfaceFromCategory,
|
|
@@ -4792,6 +5224,7 @@ export {
|
|
|
4792
5224
|
backlinksInstallResultDtoSchema,
|
|
4793
5225
|
ccAvailableReleaseSchema,
|
|
4794
5226
|
ccCachedReleaseSchema,
|
|
5227
|
+
projectOverviewDtoSchema,
|
|
4795
5228
|
ContentActions,
|
|
4796
5229
|
contentActionLabel,
|
|
4797
5230
|
actionConfidenceLabel,
|
|
@@ -4815,9 +5248,7 @@ export {
|
|
|
4815
5248
|
CheckCategories,
|
|
4816
5249
|
doctorReportSchema,
|
|
4817
5250
|
summarizeCheckResults,
|
|
4818
|
-
|
|
4819
|
-
hostOf,
|
|
4820
|
-
normalizeUrlPath,
|
|
5251
|
+
normalizeQueryText,
|
|
4821
5252
|
citationVisibilityResponseSchema,
|
|
4822
5253
|
emptyCitationVisibility,
|
|
4823
5254
|
citationStateToCited,
|
|
@@ -4854,8 +5285,6 @@ export {
|
|
|
4854
5285
|
trafficEventKindSchema,
|
|
4855
5286
|
TrafficEventKinds,
|
|
4856
5287
|
trafficEventsResponseSchema,
|
|
4857
|
-
clusterByCosine,
|
|
4858
|
-
pickClusterRepresentative,
|
|
4859
5288
|
formatRatio,
|
|
4860
5289
|
formatNumber,
|
|
4861
5290
|
formatDate,
|