@ainyc/canonry 4.85.0 → 4.86.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. package/assets/agent-workspace/skills/canonry/references/canonry-cli.md +3 -1
  2. package/assets/assets/{BacklinksPage-CDAv0ggn.js → BacklinksPage-BNrvc-gV.js} +1 -1
  3. package/assets/assets/{ChartPrimitives-CnAmsyt7.js → ChartPrimitives-BlIkdUdy.js} +1 -1
  4. package/assets/assets/{ProjectPage-C9KEgRxD.js → ProjectPage-CAyx_xNr.js} +2 -2
  5. package/assets/assets/{RunRow-CVZ5o8fg.js → RunRow-CAPnKzi7.js} +1 -1
  6. package/assets/assets/{RunsPage-Bzy5c0MZ.js → RunsPage-idnuzKBn.js} +1 -1
  7. package/assets/assets/{SettingsPage-B1ocxPBe.js → SettingsPage-Bka67uJq.js} +1 -1
  8. package/assets/assets/{TrafficPage-D2zepQOC.js → TrafficPage-C_o-rA5o.js} +1 -1
  9. package/assets/assets/{TrafficSourceDetailPage-C7JuAkaK.js → TrafficSourceDetailPage-D_jvoSTV.js} +1 -1
  10. package/assets/assets/{arrow-left-Bv3CWylm.js → arrow-left-B-JfzARi.js} +1 -1
  11. package/assets/assets/{extract-error-message-BtVid5TP.js → extract-error-message-BhPbjIX6.js} +1 -1
  12. package/assets/assets/{index-DmNti_xn.js → index-uPSrDA8e.js} +61 -61
  13. package/assets/assets/{trash-2-BoimCsYz.js → trash-2-BbRvn40h.js} +1 -1
  14. package/assets/index.html +1 -1
  15. package/dist/{chunk-I2BJC3DT.js → chunk-23HGQV22.js} +439 -207
  16. package/dist/{chunk-3K3QRSYE.js → chunk-DLBQU3VG.js} +93 -2
  17. package/dist/{chunk-62YB3ML7.js → chunk-LLJPZKHG.js} +46 -1
  18. package/dist/{chunk-7BMSWI2K.js → chunk-SELXBOAP.js} +19 -4
  19. package/dist/cli.js +63 -4
  20. package/dist/index.js +4 -4
  21. package/dist/{intelligence-service-AHHBQKRD.js → intelligence-service-ZHUJKZRO.js} +2 -2
  22. package/dist/mcp.js +2 -2
  23. package/package.json +7 -7
@@ -1832,6 +1832,210 @@ import { z as z18 } from "zod";
1832
1832
 
1833
1833
  // ../contracts/src/discovery.ts
1834
1834
  import { z as z17 } from "zod";
1835
+
1836
+ // ../contracts/src/embeddings.ts
1837
+ function cosineSimilarity(a, b) {
1838
+ if (a.length === 0 || b.length === 0) {
1839
+ throw new Error("cosineSimilarity: vectors must be non-empty");
1840
+ }
1841
+ if (a.length !== b.length) {
1842
+ throw new Error(`cosineSimilarity: vector length mismatch (${a.length} vs ${b.length})`);
1843
+ }
1844
+ let dot = 0;
1845
+ let magA = 0;
1846
+ let magB = 0;
1847
+ for (let i = 0; i < a.length; i++) {
1848
+ dot += a[i] * b[i];
1849
+ magA += a[i] * a[i];
1850
+ magB += b[i] * b[i];
1851
+ }
1852
+ if (magA === 0 || magB === 0) return 0;
1853
+ return dot / (Math.sqrt(magA) * Math.sqrt(magB));
1854
+ }
1855
+ function clusterByCosine(items, vectors, threshold) {
1856
+ if (threshold < 0 || threshold > 1) {
1857
+ throw new Error(`clusterByCosine: threshold must be in [0, 1], got ${threshold}`);
1858
+ }
1859
+ if (items.length !== vectors.length) {
1860
+ throw new Error(`clusterByCosine: items/vectors length mismatch (${items.length} vs ${vectors.length})`);
1861
+ }
1862
+ if (items.length === 0) return [];
1863
+ const parent = items.map((_, i) => i);
1864
+ const find = (x) => {
1865
+ let root = x;
1866
+ while (parent[root] !== root) root = parent[root];
1867
+ let cur = x;
1868
+ while (parent[cur] !== root) {
1869
+ const next = parent[cur];
1870
+ parent[cur] = root;
1871
+ cur = next;
1872
+ }
1873
+ return root;
1874
+ };
1875
+ const union = (a, b) => {
1876
+ const ra = find(a);
1877
+ const rb = find(b);
1878
+ if (ra !== rb) parent[ra] = rb;
1879
+ };
1880
+ for (let i = 0; i < items.length; i++) {
1881
+ for (let j = i + 1; j < items.length; j++) {
1882
+ if (cosineSimilarity(vectors[i], vectors[j]) >= threshold) {
1883
+ union(i, j);
1884
+ }
1885
+ }
1886
+ }
1887
+ const byRoot = /* @__PURE__ */ new Map();
1888
+ for (let i = 0; i < items.length; i++) {
1889
+ const root = find(i);
1890
+ const existing = byRoot.get(root);
1891
+ if (existing) existing.push(i);
1892
+ else byRoot.set(root, [i]);
1893
+ }
1894
+ return Array.from(byRoot.values()).map((indices) => indices.map((idx) => items[idx]));
1895
+ }
1896
+ function pickClusterRepresentative(cluster) {
1897
+ if (cluster.length === 0) throw new Error("pickClusterRepresentative: cluster is empty");
1898
+ let best = cluster[0];
1899
+ for (let i = 1; i < cluster.length; i++) {
1900
+ if (cluster[i].length < best.length) best = cluster[i];
1901
+ }
1902
+ return best;
1903
+ }
1904
+
1905
+ // ../contracts/src/url-normalize.ts
1906
+ var STRIP_KEYS = /* @__PURE__ */ new Set([
1907
+ // Click identifiers
1908
+ "fbclid",
1909
+ "gclid",
1910
+ "msclkid",
1911
+ "ttclid",
1912
+ "li_fat_id",
1913
+ "igshid",
1914
+ "yclid",
1915
+ "dclid",
1916
+ "gbraid",
1917
+ "wbraid",
1918
+ "bingid",
1919
+ // Mailchimp
1920
+ "mc_cid",
1921
+ "mc_eid",
1922
+ // Google Analytics linkers
1923
+ "_ga",
1924
+ "_gl",
1925
+ // Google Tag Manager debug
1926
+ "gtm_latency",
1927
+ "gtm_debug",
1928
+ // WordPress internal noise
1929
+ "preview",
1930
+ "preview_id",
1931
+ "preview_nonce",
1932
+ "_thumbnail_id",
1933
+ // Common cache-busters/versioning
1934
+ "v",
1935
+ "ver",
1936
+ "version"
1937
+ ]);
1938
+ function shouldStrip(key) {
1939
+ if (STRIP_KEYS.has(key)) return true;
1940
+ if (key.startsWith("utm_")) return true;
1941
+ return false;
1942
+ }
1943
+ function parseQuery(query) {
1944
+ if (query === "") return [];
1945
+ return query.split("&").map((pair) => {
1946
+ const eq = pair.indexOf("=");
1947
+ if (eq === -1) return { key: pair, value: null };
1948
+ return { key: pair.slice(0, eq), value: pair.slice(eq + 1) };
1949
+ });
1950
+ }
1951
+ function encodeQuery(pairs) {
1952
+ return pairs.map((p) => p.value === null ? p.key : `${p.key}=${p.value}`).join("&");
1953
+ }
1954
+ function collapseRootIndex(path) {
1955
+ if (path === "/index.html" || path === "/index.php") return "/";
1956
+ return path;
1957
+ }
1958
+ function dropTrailingSlash(path) {
1959
+ if (path.length > 1 && path.endsWith("/")) {
1960
+ return path.replace(/\/+$/, "");
1961
+ }
1962
+ return path;
1963
+ }
1964
+ function absolutizeProjectUrl(url, canonicalDomain) {
1965
+ if (!url) return "";
1966
+ const trimmed = url.trim();
1967
+ if (!trimmed) return "";
1968
+ if (/^https?:\/\//i.test(trimmed)) return trimmed;
1969
+ if (trimmed.startsWith("//")) return `https:${trimmed}`;
1970
+ const host = canonicalDomain.trim().replace(/^https?:\/\//i, "").replace(/\/+$/, "");
1971
+ if (!host) return trimmed;
1972
+ if (trimmed.startsWith("/")) return `https://${host}${trimmed}`;
1973
+ return `https://${host}/${trimmed}`;
1974
+ }
1975
+ function hostOf(value) {
1976
+ if (value == null) return null;
1977
+ const trimmed = value.trim();
1978
+ if (!trimmed) return null;
1979
+ try {
1980
+ const url = trimmed.includes("://") ? new URL(trimmed) : new URL(`https://${trimmed}`);
1981
+ return url.hostname.replace(/^www\./, "").toLowerCase();
1982
+ } catch {
1983
+ return null;
1984
+ }
1985
+ }
1986
+ function normalizeUrlPath(input) {
1987
+ if (input == null) return null;
1988
+ let trimmed = input.trim();
1989
+ if (trimmed === "") return null;
1990
+ trimmed = trimmed.replace(/&nbsp;/g, " ").replace(/\s+/g, " ").trim();
1991
+ if (trimmed === "" || trimmed === "/") return "/";
1992
+ if (trimmed === "(not set)") return null;
1993
+ trimmed = trimmed.replace(/([a-z0-9])[).]+$/i, "$1");
1994
+ if (trimmed.startsWith("/)") || trimmed.startsWith("/ ")) {
1995
+ trimmed = "/";
1996
+ }
1997
+ if (trimmed.includes(" ")) {
1998
+ trimmed = trimmed.split(" ")[0];
1999
+ }
2000
+ if (trimmed === "" || trimmed === "/") return "/";
2001
+ let pathPart;
2002
+ let queryPart;
2003
+ if (/^https?:\/\//i.test(trimmed)) {
2004
+ let url;
2005
+ try {
2006
+ url = new URL(trimmed);
2007
+ } catch {
2008
+ return null;
2009
+ }
2010
+ pathPart = url.pathname || "/";
2011
+ queryPart = url.search.startsWith("?") ? url.search.slice(1) : url.search;
2012
+ } else {
2013
+ let raw = trimmed;
2014
+ const hashIdx = raw.indexOf("#");
2015
+ if (hashIdx !== -1) raw = raw.slice(0, hashIdx);
2016
+ const qIdx = raw.indexOf("?");
2017
+ if (qIdx === -1) {
2018
+ pathPart = raw;
2019
+ queryPart = "";
2020
+ } else {
2021
+ pathPart = raw.slice(0, qIdx);
2022
+ queryPart = raw.slice(qIdx + 1);
2023
+ }
2024
+ }
2025
+ if (pathPart === "") pathPart = "/";
2026
+ pathPart = collapseRootIndex(pathPart);
2027
+ pathPart = dropTrailingSlash(pathPart);
2028
+ const pairs = parseQuery(queryPart).filter((p) => !shouldStrip(p.key));
2029
+ pairs.sort((a, b) => {
2030
+ if (a.key < b.key) return -1;
2031
+ if (a.key > b.key) return 1;
2032
+ return 0;
2033
+ });
2034
+ if (pairs.length === 0) return pathPart;
2035
+ return `${pathPart}?${encodeQuery(pairs)}`;
2036
+ }
2037
+
2038
+ // ../contracts/src/discovery.ts
1835
2039
  var discoveryBucketSchema = z17.enum(["cited", "aspirational", "wasted-surface"]);
1836
2040
  var DiscoveryBuckets = discoveryBucketSchema.enum;
1837
2041
  var DEFAULT_DISCOVERY_PROMOTE_BUCKETS = [
@@ -1963,6 +2167,231 @@ var queryProvenanceSchema = z17.union([
1963
2167
  z17.literal("cli"),
1964
2168
  z17.string().regex(/^discovery:.+$/)
1965
2169
  ]);
2170
+ var DISCOVERY_HARVEST_MAX_WORDS = 12;
2171
+ var DISCOVERY_HARVEST_MIN_CHARS = 3;
2172
+ var DISCOVERY_HARVEST_NOVELTY_THRESHOLD = DISCOVERY_DEFAULT_DEDUP_THRESHOLD;
2173
+ var DISCOVERY_HARVEST_MIN_ANCHOR_TERMS = 1;
2174
+ var DISCOVERY_HARVEST_PHONE_DIGITS = 7;
2175
+ var HARVEST_SIGNIFICANT_TOKEN_MIN = 4;
2176
+ var HARVEST_STOPWORDS = /* @__PURE__ */ new Set([
2177
+ "the",
2178
+ "and",
2179
+ "for",
2180
+ "with",
2181
+ "near",
2182
+ "best",
2183
+ "top",
2184
+ "your",
2185
+ "you",
2186
+ "are",
2187
+ "how",
2188
+ "what",
2189
+ "does",
2190
+ "this",
2191
+ "that",
2192
+ "from",
2193
+ "into",
2194
+ "about",
2195
+ "who",
2196
+ "why"
2197
+ ]);
2198
+ var HARVEST_NAV_MARKERS = [
2199
+ "address",
2200
+ "directions",
2201
+ "hours",
2202
+ "login",
2203
+ "log in",
2204
+ "sign in",
2205
+ "signin",
2206
+ "phone number",
2207
+ "zip code",
2208
+ "postal code",
2209
+ "email address"
2210
+ ];
2211
+ var discoveryHarvestCandidateSchema = z17.object({
2212
+ query: z17.string().min(1),
2213
+ probeHits: z17.number().int().positive()
2214
+ });
2215
+ var discoveryHarvestStatsSchema = z17.object({
2216
+ /** Distinct candidates extracted before gating. */
2217
+ rawCandidates: z17.number().int().nonnegative(),
2218
+ /** Candidates that passed every gate. */
2219
+ admitted: z17.number().int().nonnegative(),
2220
+ /** Per-reason rejection tally (each rejected candidate counted exactly once,
2221
+ * at the first gate it failed). Lexical-gate order: belowFloor → length →
2222
+ * navigational → duplicate (EXACT already-tracked) → offAnchor; then
2223
+ * `semanticDuplicate` for candidates dropped by the cosine novelty pass.
2224
+ * Invariant: `admitted + Σ(rejected) === rawCandidates`. */
2225
+ rejected: z17.object({
2226
+ belowFloor: z17.number().int().nonnegative(),
2227
+ length: z17.number().int().nonnegative(),
2228
+ navigational: z17.number().int().nonnegative(),
2229
+ /** Dropped by the cheap exact-match check against the tracked basket. */
2230
+ duplicate: z17.number().int().nonnegative(),
2231
+ offAnchor: z17.number().int().nonnegative(),
2232
+ /** Dropped by the embedding cosine novelty pass (a paraphrase / synonym /
2233
+ * stem variant of a tracked query that exact match can't see). 0 when the
2234
+ * semantic pass did not run — see `semanticNoveltyApplied`. */
2235
+ semanticDuplicate: z17.number().int().nonnegative()
2236
+ })
2237
+ });
2238
+ var discoveryHarvestDtoSchema = z17.object({
2239
+ sessionId: z17.string(),
2240
+ projectId: z17.string(),
2241
+ /** The provider whose probes were harvested (the session's seed provider).
2242
+ * Discovery is Gemini-only today; carried so a future multi-provider
2243
+ * discovery can attribute candidates. */
2244
+ provider: z17.string(),
2245
+ status: discoverySessionStatusSchema,
2246
+ /** Recurrence floor applied: a candidate must have appeared in ≥ this many
2247
+ * distinct probes to be admitted. */
2248
+ minProbeHits: z17.number().int().positive(),
2249
+ /** Whether the subject-anchor filter actually ran (requested AND the corpus
2250
+ * had ≥ `DISCOVERY_HARVEST_MIN_ANCHOR_TERMS` significant terms). */
2251
+ anchorApplied: z17.boolean(),
2252
+ /** Whether the embedding cosine novelty pass ran. False when embeddings were
2253
+ * unavailable (no Gemini key / no tracked queries / no candidates), in which
2254
+ * case novelty fell back to the cheap exact-match check only. */
2255
+ semanticNoveltyApplied: z17.boolean(),
2256
+ candidates: z17.array(discoveryHarvestCandidateSchema),
2257
+ stats: discoveryHarvestStatsSchema
2258
+ });
2259
+ function normalizeHarvestQuery(query) {
2260
+ return query.trim().replace(/\s+/g, " ").toLowerCase();
2261
+ }
2262
+ function harvestTokens(query) {
2263
+ return normalizeHarvestQuery(query).split(/[^a-z0-9]+/).filter(Boolean);
2264
+ }
2265
+ function significantHarvestTokens(query) {
2266
+ return harvestTokens(query).filter(
2267
+ (t) => t.length >= HARVEST_SIGNIFICANT_TOKEN_MIN && !HARVEST_STOPWORDS.has(t)
2268
+ );
2269
+ }
2270
+ function longestDigitRun(query) {
2271
+ let max = 0;
2272
+ let run = 0;
2273
+ for (const ch of query) {
2274
+ if (ch >= "0" && ch <= "9") {
2275
+ run++;
2276
+ if (run > max) max = run;
2277
+ } else {
2278
+ run = 0;
2279
+ }
2280
+ }
2281
+ return max;
2282
+ }
2283
+ function isNavigationalHarvestQuery(query) {
2284
+ const norm = normalizeHarvestQuery(query);
2285
+ if (longestDigitRun(norm) >= DISCOVERY_HARVEST_PHONE_DIGITS) return true;
2286
+ const tokens = new Set(harvestTokens(norm));
2287
+ for (const marker of HARVEST_NAV_MARKERS) {
2288
+ if (marker.includes(" ")) {
2289
+ const re = new RegExp(`(?<![a-z0-9])${marker.replace(/ /g, "\\s+")}(?![a-z0-9])`);
2290
+ if (re.test(norm)) return true;
2291
+ } else if (tokens.has(marker)) {
2292
+ return true;
2293
+ }
2294
+ }
2295
+ return false;
2296
+ }
2297
+ function buildHarvestAnchorTerms(corpus, domains = []) {
2298
+ const set = /* @__PURE__ */ new Set();
2299
+ for (const text of corpus) {
2300
+ for (const token of significantHarvestTokens(text)) set.add(token);
2301
+ }
2302
+ for (const domain of domains) {
2303
+ const host = hostOf(domain);
2304
+ if (!host) continue;
2305
+ const label = host.replace(/\.[a-z0-9]+$/, "");
2306
+ for (const token of significantHarvestTokens(label)) set.add(token);
2307
+ }
2308
+ return [...set];
2309
+ }
2310
+ function gateHarvestedSearchQueries(input) {
2311
+ const minProbeHits = Math.max(1, Math.floor(input.minProbeHits ?? 1));
2312
+ const anchorTermSet = new Set(input.anchorTerms ?? []);
2313
+ const applyAnchor = (input.applyAnchor ?? true) && anchorTermSet.size >= DISCOVERY_HARVEST_MIN_ANCHOR_TERMS;
2314
+ const trackedNorm = new Set(
2315
+ input.trackedQueries.map(normalizeHarvestQuery).filter(Boolean)
2316
+ );
2317
+ const stats = {
2318
+ rawCandidates: input.candidates.length,
2319
+ admitted: 0,
2320
+ rejected: { belowFloor: 0, length: 0, navigational: 0, duplicate: 0, offAnchor: 0, semanticDuplicate: 0 }
2321
+ };
2322
+ const admitted = [];
2323
+ for (const candidate of input.candidates) {
2324
+ if (candidate.probeHits < minProbeHits) {
2325
+ stats.rejected.belowFloor++;
2326
+ continue;
2327
+ }
2328
+ const norm = normalizeHarvestQuery(candidate.query);
2329
+ const words = norm ? norm.split(" ").length : 0;
2330
+ if (norm.length < DISCOVERY_HARVEST_MIN_CHARS || words > DISCOVERY_HARVEST_MAX_WORDS) {
2331
+ stats.rejected.length++;
2332
+ continue;
2333
+ }
2334
+ if (isNavigationalHarvestQuery(norm)) {
2335
+ stats.rejected.navigational++;
2336
+ continue;
2337
+ }
2338
+ if (trackedNorm.has(norm)) {
2339
+ stats.rejected.duplicate++;
2340
+ continue;
2341
+ }
2342
+ if (applyAnchor) {
2343
+ const sig = significantHarvestTokens(norm);
2344
+ if (!sig.some((t) => anchorTermSet.has(t))) {
2345
+ stats.rejected.offAnchor++;
2346
+ continue;
2347
+ }
2348
+ }
2349
+ admitted.push({ query: norm, probeHits: candidate.probeHits });
2350
+ stats.admitted++;
2351
+ }
2352
+ admitted.sort((a, b) => b.probeHits - a.probeHits || a.query.localeCompare(b.query));
2353
+ return { admitted, anchorApplied: applyAnchor, stats };
2354
+ }
2355
+ function applyHarvestSemanticNovelty(input) {
2356
+ const { result, candidateVectors, trackedVectors } = input;
2357
+ const threshold = input.threshold ?? DISCOVERY_HARVEST_NOVELTY_THRESHOLD;
2358
+ if (candidateVectors.length !== result.admitted.length || trackedVectors.length === 0) {
2359
+ return result;
2360
+ }
2361
+ const admitted = [];
2362
+ let semanticDuplicate = 0;
2363
+ for (let i = 0; i < result.admitted.length; i++) {
2364
+ const vec = candidateVectors[i];
2365
+ const isDup = trackedVectors.some((t) => cosineSimilarity(vec, t) >= threshold);
2366
+ if (isDup) semanticDuplicate++;
2367
+ else admitted.push(result.admitted[i]);
2368
+ }
2369
+ return {
2370
+ admitted,
2371
+ anchorApplied: result.anchorApplied,
2372
+ stats: {
2373
+ ...result.stats,
2374
+ admitted: admitted.length,
2375
+ rejected: { ...result.stats.rejected, semanticDuplicate }
2376
+ }
2377
+ };
2378
+ }
2379
+ function aggregateHarvestedQueries(probes) {
2380
+ const counts = /* @__PURE__ */ new Map();
2381
+ for (const probe of probes) {
2382
+ const seenInProbe = /* @__PURE__ */ new Set();
2383
+ for (const raw of probe.searchQueries) {
2384
+ if (typeof raw !== "string") continue;
2385
+ const norm = normalizeHarvestQuery(raw);
2386
+ if (!norm || seenInProbe.has(norm)) continue;
2387
+ seenInProbe.add(norm);
2388
+ const existing = counts.get(norm);
2389
+ if (existing) existing.probeHits++;
2390
+ else counts.set(norm, { query: norm, probeHits: 1 });
2391
+ }
2392
+ }
2393
+ return [...counts.values()];
2394
+ }
1966
2395
 
1967
2396
  // ../contracts/src/surface-class.ts
1968
2397
  var surfaceClassSchema = z18.enum([
@@ -3104,139 +3533,6 @@ function summarizeCheckResults(results) {
3104
3533
  return summary;
3105
3534
  }
3106
3535
 
3107
- // ../contracts/src/url-normalize.ts
3108
- var STRIP_KEYS = /* @__PURE__ */ new Set([
3109
- // Click identifiers
3110
- "fbclid",
3111
- "gclid",
3112
- "msclkid",
3113
- "ttclid",
3114
- "li_fat_id",
3115
- "igshid",
3116
- "yclid",
3117
- "dclid",
3118
- "gbraid",
3119
- "wbraid",
3120
- "bingid",
3121
- // Mailchimp
3122
- "mc_cid",
3123
- "mc_eid",
3124
- // Google Analytics linkers
3125
- "_ga",
3126
- "_gl",
3127
- // Google Tag Manager debug
3128
- "gtm_latency",
3129
- "gtm_debug",
3130
- // WordPress internal noise
3131
- "preview",
3132
- "preview_id",
3133
- "preview_nonce",
3134
- "_thumbnail_id",
3135
- // Common cache-busters/versioning
3136
- "v",
3137
- "ver",
3138
- "version"
3139
- ]);
3140
- function shouldStrip(key) {
3141
- if (STRIP_KEYS.has(key)) return true;
3142
- if (key.startsWith("utm_")) return true;
3143
- return false;
3144
- }
3145
- function parseQuery(query) {
3146
- if (query === "") return [];
3147
- return query.split("&").map((pair) => {
3148
- const eq = pair.indexOf("=");
3149
- if (eq === -1) return { key: pair, value: null };
3150
- return { key: pair.slice(0, eq), value: pair.slice(eq + 1) };
3151
- });
3152
- }
3153
- function encodeQuery(pairs) {
3154
- return pairs.map((p) => p.value === null ? p.key : `${p.key}=${p.value}`).join("&");
3155
- }
3156
- function collapseRootIndex(path) {
3157
- if (path === "/index.html" || path === "/index.php") return "/";
3158
- return path;
3159
- }
3160
- function dropTrailingSlash(path) {
3161
- if (path.length > 1 && path.endsWith("/")) {
3162
- return path.replace(/\/+$/, "");
3163
- }
3164
- return path;
3165
- }
3166
- function absolutizeProjectUrl(url, canonicalDomain) {
3167
- if (!url) return "";
3168
- const trimmed = url.trim();
3169
- if (!trimmed) return "";
3170
- if (/^https?:\/\//i.test(trimmed)) return trimmed;
3171
- if (trimmed.startsWith("//")) return `https:${trimmed}`;
3172
- const host = canonicalDomain.trim().replace(/^https?:\/\//i, "").replace(/\/+$/, "");
3173
- if (!host) return trimmed;
3174
- if (trimmed.startsWith("/")) return `https://${host}${trimmed}`;
3175
- return `https://${host}/${trimmed}`;
3176
- }
3177
- function hostOf(value) {
3178
- if (value == null) return null;
3179
- const trimmed = value.trim();
3180
- if (!trimmed) return null;
3181
- try {
3182
- const url = trimmed.includes("://") ? new URL(trimmed) : new URL(`https://${trimmed}`);
3183
- return url.hostname.replace(/^www\./, "").toLowerCase();
3184
- } catch {
3185
- return null;
3186
- }
3187
- }
3188
- function normalizeUrlPath(input) {
3189
- if (input == null) return null;
3190
- let trimmed = input.trim();
3191
- if (trimmed === "") return null;
3192
- trimmed = trimmed.replace(/&nbsp;/g, " ").replace(/\s+/g, " ").trim();
3193
- if (trimmed === "" || trimmed === "/") return "/";
3194
- if (trimmed === "(not set)") return null;
3195
- trimmed = trimmed.replace(/([a-z0-9])[).]+$/i, "$1");
3196
- if (trimmed.startsWith("/)") || trimmed.startsWith("/ ")) {
3197
- trimmed = "/";
3198
- }
3199
- if (trimmed.includes(" ")) {
3200
- trimmed = trimmed.split(" ")[0];
3201
- }
3202
- if (trimmed === "" || trimmed === "/") return "/";
3203
- let pathPart;
3204
- let queryPart;
3205
- if (/^https?:\/\//i.test(trimmed)) {
3206
- let url;
3207
- try {
3208
- url = new URL(trimmed);
3209
- } catch {
3210
- return null;
3211
- }
3212
- pathPart = url.pathname || "/";
3213
- queryPart = url.search.startsWith("?") ? url.search.slice(1) : url.search;
3214
- } else {
3215
- let raw = trimmed;
3216
- const hashIdx = raw.indexOf("#");
3217
- if (hashIdx !== -1) raw = raw.slice(0, hashIdx);
3218
- const qIdx = raw.indexOf("?");
3219
- if (qIdx === -1) {
3220
- pathPart = raw;
3221
- queryPart = "";
3222
- } else {
3223
- pathPart = raw.slice(0, qIdx);
3224
- queryPart = raw.slice(qIdx + 1);
3225
- }
3226
- }
3227
- if (pathPart === "") pathPart = "/";
3228
- pathPart = collapseRootIndex(pathPart);
3229
- pathPart = dropTrailingSlash(pathPart);
3230
- const pairs = parseQuery(queryPart).filter((p) => !shouldStrip(p.key));
3231
- pairs.sort((a, b) => {
3232
- if (a.key < b.key) return -1;
3233
- if (a.key > b.key) return 1;
3234
- return 0;
3235
- });
3236
- if (pairs.length === 0) return pathPart;
3237
- return `${pathPart}?${encodeQuery(pairs)}`;
3238
- }
3239
-
3240
3536
  // ../contracts/src/citations.ts
3241
3537
  import { z as z27 } from "zod";
3242
3538
  var citationCoverageProviderSchema = z27.object({
@@ -4293,75 +4589,6 @@ var trafficEventsResponseSchema = z30.object({
4293
4589
  events: z30.array(trafficEventEntrySchema)
4294
4590
  });
4295
4591
 
4296
- // ../contracts/src/embeddings.ts
4297
- function cosineSimilarity(a, b) {
4298
- if (a.length === 0 || b.length === 0) {
4299
- throw new Error("cosineSimilarity: vectors must be non-empty");
4300
- }
4301
- if (a.length !== b.length) {
4302
- throw new Error(`cosineSimilarity: vector length mismatch (${a.length} vs ${b.length})`);
4303
- }
4304
- let dot = 0;
4305
- let magA = 0;
4306
- let magB = 0;
4307
- for (let i = 0; i < a.length; i++) {
4308
- dot += a[i] * b[i];
4309
- magA += a[i] * a[i];
4310
- magB += b[i] * b[i];
4311
- }
4312
- if (magA === 0 || magB === 0) return 0;
4313
- return dot / (Math.sqrt(magA) * Math.sqrt(magB));
4314
- }
4315
- function clusterByCosine(items, vectors, threshold) {
4316
- if (threshold < 0 || threshold > 1) {
4317
- throw new Error(`clusterByCosine: threshold must be in [0, 1], got ${threshold}`);
4318
- }
4319
- if (items.length !== vectors.length) {
4320
- throw new Error(`clusterByCosine: items/vectors length mismatch (${items.length} vs ${vectors.length})`);
4321
- }
4322
- if (items.length === 0) return [];
4323
- const parent = items.map((_, i) => i);
4324
- const find = (x) => {
4325
- let root = x;
4326
- while (parent[root] !== root) root = parent[root];
4327
- let cur = x;
4328
- while (parent[cur] !== root) {
4329
- const next = parent[cur];
4330
- parent[cur] = root;
4331
- cur = next;
4332
- }
4333
- return root;
4334
- };
4335
- const union = (a, b) => {
4336
- const ra = find(a);
4337
- const rb = find(b);
4338
- if (ra !== rb) parent[ra] = rb;
4339
- };
4340
- for (let i = 0; i < items.length; i++) {
4341
- for (let j = i + 1; j < items.length; j++) {
4342
- if (cosineSimilarity(vectors[i], vectors[j]) >= threshold) {
4343
- union(i, j);
4344
- }
4345
- }
4346
- }
4347
- const byRoot = /* @__PURE__ */ new Map();
4348
- for (let i = 0; i < items.length; i++) {
4349
- const root = find(i);
4350
- const existing = byRoot.get(root);
4351
- if (existing) existing.push(i);
4352
- else byRoot.set(root, [i]);
4353
- }
4354
- return Array.from(byRoot.values()).map((indices) => indices.map((idx) => items[idx]));
4355
- }
4356
- function pickClusterRepresentative(cluster) {
4357
- if (cluster.length === 0) throw new Error("pickClusterRepresentative: cluster is empty");
4358
- let best = cluster[0];
4359
- for (let i = 1; i < cluster.length; i++) {
4360
- if (cluster[i].length < best.length) best = cluster[i];
4361
- }
4362
- return best;
4363
- }
4364
-
4365
4592
  // ../contracts/src/formatting.ts
4366
4593
  function formatRatio(value) {
4367
4594
  if (!Number.isFinite(value) || value === 0) return "0%";
@@ -4736,6 +4963,11 @@ export {
4736
4963
  categorizeSource,
4737
4964
  categorizeSourceWithCompetitors,
4738
4965
  categoryLabel,
4966
+ clusterByCosine,
4967
+ pickClusterRepresentative,
4968
+ absolutizeProjectUrl,
4969
+ hostOf,
4970
+ normalizeUrlPath,
4739
4971
  discoveryBucketSchema,
4740
4972
  DiscoveryBuckets,
4741
4973
  DEFAULT_DISCOVERY_PROMOTE_BUCKETS,
@@ -4754,6 +4986,11 @@ export {
4754
4986
  discoveryPromoteRequestSchema,
4755
4987
  discoveryPromotePreviewSchema,
4756
4988
  discoveryPromoteResultSchema,
4989
+ discoveryHarvestDtoSchema,
4990
+ buildHarvestAnchorTerms,
4991
+ gateHarvestedSearchQueries,
4992
+ applyHarvestSemanticNovelty,
4993
+ aggregateHarvestedQueries,
4757
4994
  surfaceClassLabel,
4758
4995
  surfaceClassFromCompetitorType,
4759
4996
  classifySurfaceFromCategory,
@@ -4815,9 +5052,6 @@ export {
4815
5052
  CheckCategories,
4816
5053
  doctorReportSchema,
4817
5054
  summarizeCheckResults,
4818
- absolutizeProjectUrl,
4819
- hostOf,
4820
- normalizeUrlPath,
4821
5055
  citationVisibilityResponseSchema,
4822
5056
  emptyCitationVisibility,
4823
5057
  citationStateToCited,
@@ -4854,8 +5088,6 @@ export {
4854
5088
  trafficEventKindSchema,
4855
5089
  TrafficEventKinds,
4856
5090
  trafficEventsResponseSchema,
4857
- clusterByCosine,
4858
- pickClusterRepresentative,
4859
5091
  formatRatio,
4860
5092
  formatNumber,
4861
5093
  formatDate,