@mneme-ai/core 1.67.1 → 1.69.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ascension/ascension.test.d.ts +13 -0
- package/dist/ascension/ascension.test.d.ts.map +1 -0
- package/dist/ascension/ascension.test.js +313 -0
- package/dist/ascension/ascension.test.js.map +1 -0
- package/dist/ascension/circadian_heartbeat.d.ts +70 -0
- package/dist/ascension/circadian_heartbeat.d.ts.map +1 -0
- package/dist/ascension/circadian_heartbeat.js +176 -0
- package/dist/ascension/circadian_heartbeat.js.map +1 -0
- package/dist/ascension/conformal_apoptosis.d.ts +98 -0
- package/dist/ascension/conformal_apoptosis.d.ts.map +1 -0
- package/dist/ascension/conformal_apoptosis.js +175 -0
- package/dist/ascension/conformal_apoptosis.js.map +1 -0
- package/dist/ascension/inbox_tier.d.ts +74 -0
- package/dist/ascension/inbox_tier.d.ts.map +1 -0
- package/dist/ascension/inbox_tier.js +119 -0
- package/dist/ascension/inbox_tier.js.map +1 -0
- package/dist/ascension/index.d.ts +73 -0
- package/dist/ascension/index.d.ts.map +1 -0
- package/dist/ascension/index.js +126 -0
- package/dist/ascension/index.js.map +1 -0
- package/dist/ascension/prophetic_embedder.d.ts +38 -0
- package/dist/ascension/prophetic_embedder.d.ts.map +1 -0
- package/dist/ascension/prophetic_embedder.js +85 -0
- package/dist/ascension/prophetic_embedder.js.map +1 -0
- package/dist/ascension/sovereign_mode.d.ts +46 -0
- package/dist/ascension/sovereign_mode.d.ts.map +1 -0
- package/dist/ascension/sovereign_mode.js +94 -0
- package/dist/ascension/sovereign_mode.js.map +1 -0
- package/dist/ascension/superposed_antivirus.d.ts +62 -0
- package/dist/ascension/superposed_antivirus.d.ts.map +1 -0
- package/dist/ascension/superposed_antivirus.js +198 -0
- package/dist/ascension/superposed_antivirus.js.map +1 -0
- package/dist/hyperscan/bench.d.ts +32 -0
- package/dist/hyperscan/bench.d.ts.map +1 -0
- package/dist/hyperscan/bench.js +81 -0
- package/dist/hyperscan/bench.js.map +1 -0
- package/dist/hyperscan/cross_citation.d.ts +51 -0
- package/dist/hyperscan/cross_citation.d.ts.map +1 -0
- package/dist/hyperscan/cross_citation.js +140 -0
- package/dist/hyperscan/cross_citation.js.map +1 -0
- package/dist/hyperscan/cross_source_qa.d.ts +49 -0
- package/dist/hyperscan/cross_source_qa.d.ts.map +1 -0
- package/dist/hyperscan/cross_source_qa.js +219 -0
- package/dist/hyperscan/cross_source_qa.js.map +1 -0
- package/dist/hyperscan/hyperscan.test.d.ts +5 -0
- package/dist/hyperscan/hyperscan.test.d.ts.map +1 -0
- package/dist/hyperscan/hyperscan.test.js +191 -0
- package/dist/hyperscan/hyperscan.test.js.map +1 -0
- package/dist/hyperscan/hyperscan_molecule.d.ts +76 -0
- package/dist/hyperscan/hyperscan_molecule.d.ts.map +1 -0
- package/dist/hyperscan/hyperscan_molecule.js +144 -0
- package/dist/hyperscan/hyperscan_molecule.js.map +1 -0
- package/dist/hyperscan/index.d.ts +26 -0
- package/dist/hyperscan/index.d.ts.map +1 -0
- package/dist/hyperscan/index.js +26 -0
- package/dist/hyperscan/index.js.map +1 -0
- package/dist/hyperscan/nucleus_dust_htc.d.ts +71 -0
- package/dist/hyperscan/nucleus_dust_htc.d.ts.map +1 -0
- package/dist/hyperscan/nucleus_dust_htc.js +242 -0
- package/dist/hyperscan/nucleus_dust_htc.js.map +1 -0
- package/dist/hyperscan/prose_shadow.d.ts +50 -0
- package/dist/hyperscan/prose_shadow.d.ts.map +1 -0
- package/dist/hyperscan/prose_shadow.js +225 -0
- package/dist/hyperscan/prose_shadow.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +19 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v1.69.0 -- HYPERSCAN MOLECULE (shape-shifting data structure).
|
|
3
|
+
*
|
|
4
|
+
* The wild bit: a single MOLECULE holds FOUR orthogonal forms of one
|
|
5
|
+
* piece of knowledge. The caller asks for whichever FORM best
|
|
6
|
+
* matches their query class -- the molecule "shape-shifts" without
|
|
7
|
+
* recomputation.
|
|
8
|
+
*
|
|
9
|
+
* textForm plain-English summary (~30 tokens)
|
|
10
|
+
* vectorForm bag-of-words vector for cosine retrieval
|
|
11
|
+
* structuralForm AST-shape signature (functions / paths / symbols)
|
|
12
|
+
* temporalForm commit-chain neighborhood (predecessor / successor SHAs)
|
|
13
|
+
*
|
|
14
|
+
* "Mix algorithm สุดโต่ง": at query time the caller picks ONE of N
|
|
15
|
+
* retrieval algorithms (Jaccard / cosine / structural-match /
|
|
16
|
+
* temporal-distance / hybrid-weighted) and the molecule emits the
|
|
17
|
+
* matching form. Different forms NEVER recomputed -- pre-built.
|
|
18
|
+
*
|
|
19
|
+
* This is the data structure the user asked for: "โมเลกุลใหม่
|
|
20
|
+
* มารองรับ" + "การสลับร่างแปลงร่าง" + "mix algorithm บ้าๆสุดโต่ง".
|
|
21
|
+
*/
|
|
22
|
+
import { createHash } from "node:crypto";
|
|
23
|
+
const STOPWORDS = new Set([
|
|
24
|
+
"the", "and", "for", "with", "this", "that", "from", "into", "when",
|
|
25
|
+
"function", "const", "var", "let", "true", "false", "null", "return", "import", "export",
|
|
26
|
+
]);
|
|
27
|
+
function bagOfWords(text) {
|
|
28
|
+
const out = new Map();
|
|
29
|
+
for (const t of text.toLowerCase().replace(/[^a-z0-9_\s]/g, " ").split(/\s+/)) {
|
|
30
|
+
if (t.length < 3 || STOPWORDS.has(t))
|
|
31
|
+
continue;
|
|
32
|
+
out.set(t, (out.get(t) ?? 0) + 1);
|
|
33
|
+
}
|
|
34
|
+
return out;
|
|
35
|
+
}
|
|
36
|
+
function extractStructural(text) {
|
|
37
|
+
const functions = [...text.matchAll(/\b(?:function\s+)?(\w{3,})\s*\(/g)].map((m) => m[1]).filter((x) => !!x);
|
|
38
|
+
const classes = [...text.matchAll(/\bclass\s+(\w{2,})/g)].map((m) => m[1]);
|
|
39
|
+
const paths = [...text.matchAll(/[\w./_-]+\.(?:ts|tsx|js|mjs|cjs|jsx|json|md|sql|yml|yaml|py|rs|go)/g)].map((m) => m[0]);
|
|
40
|
+
const symbols = [...text.matchAll(/\b[A-Z][A-Z_]{2,}\b/g)].map((m) => m[0]);
|
|
41
|
+
return {
|
|
42
|
+
functions: [...new Set(functions)].slice(0, 10),
|
|
43
|
+
classes: [...new Set(classes)].slice(0, 10),
|
|
44
|
+
paths: [...new Set(paths)].slice(0, 10),
|
|
45
|
+
symbols: [...new Set(symbols)].slice(0, 10),
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
/** Build a molecule from a text excerpt + provenance. */
|
|
49
|
+
export function buildMolecule(input) {
|
|
50
|
+
const id = createHash("sha256").update(`${input.source.kind}|${input.source.ref}|${input.text}`).digest("hex").slice(0, 16);
|
|
51
|
+
return {
|
|
52
|
+
id,
|
|
53
|
+
textForm: input.text.slice(0, 280),
|
|
54
|
+
vectorForm: bagOfWords(input.text),
|
|
55
|
+
structuralForm: extractStructural(input.text),
|
|
56
|
+
temporalForm: {
|
|
57
|
+
predecessors: input.predecessors ?? [],
|
|
58
|
+
successors: input.successors ?? [],
|
|
59
|
+
epoch: input.epoch ?? "",
|
|
60
|
+
},
|
|
61
|
+
source: input.source,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
// ─── Retrieval algorithms (the "mix") ────────────────────────────────
|
|
65
|
+
function cosine(a, b) {
|
|
66
|
+
let dot = 0, na = 0, nb = 0;
|
|
67
|
+
for (const [t, c] of a) {
|
|
68
|
+
na += c * c;
|
|
69
|
+
if (b.has(t))
|
|
70
|
+
dot += c * b.get(t);
|
|
71
|
+
}
|
|
72
|
+
for (const [, c] of b)
|
|
73
|
+
nb += c * c;
|
|
74
|
+
return (na === 0 || nb === 0) ? 0 : dot / Math.sqrt(na * nb);
|
|
75
|
+
}
|
|
76
|
+
function jaccard(a, b) {
|
|
77
|
+
if (a.size === 0 || b.size === 0)
|
|
78
|
+
return 0;
|
|
79
|
+
let inter = 0;
|
|
80
|
+
for (const x of a)
|
|
81
|
+
if (b.has(x))
|
|
82
|
+
inter += 1;
|
|
83
|
+
return inter / (a.size + b.size - inter);
|
|
84
|
+
}
|
|
85
|
+
function structuralMatch(query, mol) {
|
|
86
|
+
let total = 0, matched = 0;
|
|
87
|
+
for (const key of ["functions", "classes", "paths", "symbols"]) {
|
|
88
|
+
const q = new Set(query[key]);
|
|
89
|
+
const m = new Set(mol[key]);
|
|
90
|
+
total += Math.max(q.size, m.size);
|
|
91
|
+
matched += [...q].filter((x) => m.has(x)).length;
|
|
92
|
+
}
|
|
93
|
+
return total === 0 ? 0 : matched / total;
|
|
94
|
+
}
|
|
95
|
+
function temporalDistance(queryEpoch, mol) {
|
|
96
|
+
if (!queryEpoch || !mol.temporalForm.epoch)
|
|
97
|
+
return 0;
|
|
98
|
+
const dt = Math.abs(Date.parse(queryEpoch) - Date.parse(mol.temporalForm.epoch));
|
|
99
|
+
if (!Number.isFinite(dt))
|
|
100
|
+
return 0;
|
|
101
|
+
// Score: closer in time -> higher.
|
|
102
|
+
const days = dt / 86400000;
|
|
103
|
+
return 1 / (1 + days / 30); // half-life ~30 days
|
|
104
|
+
}
|
|
105
|
+
const ALGO_WEIGHTS = {
|
|
106
|
+
cosine: 0.35,
|
|
107
|
+
jaccard: 0.25,
|
|
108
|
+
structural: 0.25,
|
|
109
|
+
temporal: 0.15,
|
|
110
|
+
hybrid: 0, // not used; placeholder
|
|
111
|
+
};
|
|
112
|
+
/** Query a corpus of molecules using one of N retrieval algorithms.
|
|
113
|
+
* "hybrid" fuses cosine + jaccard + structural + temporal via the
|
|
114
|
+
* ALGO_WEIGHTS mix (the wild algorithm-mix the user requested). */
|
|
115
|
+
export function query(molecules, algo, q) {
|
|
116
|
+
const queryVec = q.text ? bagOfWords(q.text) : new Map();
|
|
117
|
+
const queryTokens = new Set(queryVec.keys());
|
|
118
|
+
const queryStructural = q.text ? extractStructural(q.text) : { functions: [], classes: [], paths: [], symbols: [] };
|
|
119
|
+
const matches = molecules.map((mol) => {
|
|
120
|
+
const molTokens = new Set(mol.vectorForm.keys());
|
|
121
|
+
const scores = {};
|
|
122
|
+
if (algo === "cosine" || algo === "hybrid")
|
|
123
|
+
scores.cosine = cosine(queryVec, mol.vectorForm);
|
|
124
|
+
if (algo === "jaccard" || algo === "hybrid")
|
|
125
|
+
scores.jaccard = jaccard(queryTokens, molTokens);
|
|
126
|
+
if (algo === "structural" || algo === "hybrid")
|
|
127
|
+
scores.structural = structuralMatch(queryStructural, mol.structuralForm);
|
|
128
|
+
if (algo === "temporal" || algo === "hybrid")
|
|
129
|
+
scores.temporal = temporalDistance(q.epoch ?? "", mol);
|
|
130
|
+
let finalScore = 0;
|
|
131
|
+
if (algo === "hybrid") {
|
|
132
|
+
for (const a of ["cosine", "jaccard", "structural", "temporal"]) {
|
|
133
|
+
finalScore += (scores[a] ?? 0) * ALGO_WEIGHTS[a];
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
else {
|
|
137
|
+
finalScore = scores[algo] ?? 0;
|
|
138
|
+
}
|
|
139
|
+
return { molecule: mol, scores, finalScore };
|
|
140
|
+
});
|
|
141
|
+
matches.sort((a, b) => b.finalScore - a.finalScore);
|
|
142
|
+
return matches;
|
|
143
|
+
}
|
|
144
|
+
//# sourceMappingURL=hyperscan_molecule.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hyperscan_molecule.js","sourceRoot":"","sources":["../../src/hyperscan/hyperscan_molecule.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AA2BzC,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACxB,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;IACnE,UAAU,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ;CACzF,CAAC,CAAC;AAEH,SAAS,UAAU,CAAC,IAAY;IAC9B,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkB,CAAC;IACtC,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,eAAe,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9E,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,SAAS;QAC/C,GAAG,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,MAAM,SAAS,GAAG,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,kCAAkC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3H,MAAM,OAAO,GAAG,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,qBAAqB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;IAC5E,MAAM,KAAK,GAAG,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,qEAAqE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACzH,MAAM,OAAO,GAAG,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,sBAAsB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5E,OAAO;QACL,SAAS,EAAE,CAAC,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;QAC/C,OAAO,EAAE,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;QAC3C,KAAK,EAAE,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;QACvC,OAAO,EAAE,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;KAC5C,CAAC;AACJ,CAAC;AAUD,yDAAyD;AACzD,MAAM,UAAU,aAAa,CAAC,KAAyB;IACrD,MAAM,EAAE,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC5H,OAAO;QACL,EAAE;QACF,QAAQ,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;QAClC,UAAU,EAAE,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC;QAClC,cAAc,EAAE,iBAAiB,CAAC,KAAK,CAAC,IAAI,CAAC;QAC7C,YAAY,EAAE;YACZ,YAAY,EAAE,KAAK,CAAC,YAAY,IAAI,EAAE;YACtC,UAAU,EAAE,KAAK,CAAC,UAAU,IAAI,EAAE;YAClC,KAAK,EAAE,KAAK,CAAC,KAAK,IAAI,EAAE;SACzB;QACD,MAAM,EAAE,KAAK,CAAC,MAAM;KACrB,CAAC;AACJ,CAAC;AAED,wEAAwE;AAExE,SAAS,MAAM,CAAC,CAAsB,EAAE,CAAsB;IAC5D,IAAI,GAAG,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAC5B,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;QACvB,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;QACZ,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC;IACrC,CAAC;IACD,KAAK,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC;QAAE,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;IACnC,OAAO,CAAC,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC;AAC/D,CAAC;AAED,SAAS,OAAO,CAAC,CAAc,EAAE,CAAc;IAC7C,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC3C,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,IAAI,CAAC;QAAE,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,KAAK,IAAI,CAAC,CAAC;IAC5C,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC;AAC3C,CAAC;AAED,SAAS,eAAe,CAAC,KAA0C,EAAE,GAAwC;IAC3G,IAAI,KAAK,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC,CAAC;IAC3B,KAAK,MAAM,GAAG,IAAI,CAAC,WAAW,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,CAAU,EAAE,CAAC;QACxE,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;QAC9B,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;QAC5B,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;QAClC,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACnD,CAAC;IACD,OAAO,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,GAAG,KAAK,CAAC;AAC3C,CAAC;AAED,SAAS,gBAAgB,CAAC,UAAkB,EAAE,GAAsB;IAClE,IAAI,CAAC,UAAU,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,KAAK;QAAE,OAAO,CAAC,CAAC;IACrD,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC;IACjF,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;QAAE,OAAO,CAAC,CAAC;IACnC,mCAAmC;IACnC,MAAM,IAAI,GAAG,EAAE,GAAG,QAAQ,CAAC;IAC3B,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,qBAAqB;AACnD,CAAC;AAiBD,MAAM,YAAY,GAAkC;IAClD,MAAM,EAAE,IAAI;IACZ,OAAO,EAAE,IAAI;IACb,UAAU,EAAE,IAAI;IAChB,QAAQ,EAAE,IAAI;IACd,MAAM,EAAE,CAAC,EAAE,wBAAwB;CACpC,CAAC;AAEF;;oEAEoE;AACpE,MAAM,UAAU,KAAK,CACnB,SAA8B,EAC9B,IAAmB,EACnB,CAAa;IAEb,MAAM,QAAQ,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,EAAE,CAAC;IACzD,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,MAAM,eAAe,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;IAEpH,MAAM,OAAO,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACpC,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC,CAAC;QACjD,MAAM,MAAM,GAA4B,EAAE,CAAC;QAC3C,IAAI,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,QAAQ;YAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,QAAQ,EAAE,GAAG,CAAC,UAAU,CAAC,CAAC;QAC7F,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,QAAQ;YAAE,MAAM,CAAC,OAAO,GAAG,OAAO,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;QAC9F,IAAI,IAAI,KAAK,YAAY,IAAI,IAAI,KAAK,QAAQ;YAAE,MAAM,CAAC,UAAU,GAAG,eAAe,CAAC,eAAe,EAAE,GAAG,CAAC,cAAc,CAAC,CAAC;QACzH,IAAI,IAAI,KAAK,UAAU,IAAI,IAAI,KAAK,QAAQ;YAAE,MAAM,CAAC,QAAQ,GAAG,gBAAgB,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC;QAErG,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtB,KAAK,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,YAAY,EAAE,UAAU,CAAoB,EAAE,CAAC;gBACnF,UAAU,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;YACnD,CAAC;QACH,CAAC;aAAM,CAAC;YACN,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjC,CAAC;QAED,OAAO,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;IAC/C,CAAC,CAAC,CAAC;IACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC;IACpD,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v1.69.0 -- HYPERSCAN PROTOCOL.
|
|
3
|
+
*
|
|
4
|
+
* Four wild moves to close the prose-scan gap + Q&A trust gap +
|
|
5
|
+
* HTC coverage gap. Plus a shape-shifting molecule data structure
|
|
6
|
+
* that supports 5 mixed retrieval algorithms.
|
|
7
|
+
*
|
|
8
|
+
* H1 prose shadow scan entity extraction from prose claims
|
|
9
|
+
* H2 cross-citation every named entity needs codebase evidence
|
|
10
|
+
* H3 cross-source Q&A fuse retrieval across 5 source kinds
|
|
11
|
+
* H4 nucleus dust HTC auto-populate HTC coverage 0% -> ~100%
|
|
12
|
+
* MOLECULE text/vector/structural/temporal forms
|
|
13
|
+
*/
|
|
14
|
+
export * as proseShadow from "./prose_shadow.js";
|
|
15
|
+
export * as crossCitation from "./cross_citation.js";
|
|
16
|
+
export * as crossSourceQa from "./cross_source_qa.js";
|
|
17
|
+
export * as nucleusDustHtc from "./nucleus_dust_htc.js";
|
|
18
|
+
export * as hyperscanMolecule from "./hyperscan_molecule.js";
|
|
19
|
+
export * as bench from "./bench.js";
|
|
20
|
+
export { proseScan, extractEntities } from "./prose_shadow.js";
|
|
21
|
+
export { crossCitationGround, parseTriples } from "./cross_citation.js";
|
|
22
|
+
export { crossSourceAsk } from "./cross_source_qa.js";
|
|
23
|
+
export { generateDust, computeCoverage, clusterDust, readAbstracts } from "./nucleus_dust_htc.js";
|
|
24
|
+
export { buildMolecule, query, type HyperscanMolecule, type RetrievalAlgo, type MoleculeMatch } from "./hyperscan_molecule.js";
|
|
25
|
+
export { runHyperscanBench, renderBench } from "./bench.js";
|
|
26
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/hyperscan/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,KAAK,WAAW,MAAM,mBAAmB,CAAC;AACjD,OAAO,KAAK,aAAa,MAAM,qBAAqB,CAAC;AACrD,OAAO,KAAK,aAAa,MAAM,sBAAsB,CAAC;AACtD,OAAO,KAAK,cAAc,MAAM,uBAAuB,CAAC;AACxD,OAAO,KAAK,iBAAiB,MAAM,yBAAyB,CAAC;AAC7D,OAAO,KAAK,KAAK,MAAM,YAAY,CAAC;AAEpC,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,mBAAmB,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,eAAe,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAClG,OAAO,EAAE,aAAa,EAAE,KAAK,EAAE,KAAK,iBAAiB,EAAE,KAAK,aAAa,EAAE,KAAK,aAAa,EAAE,MAAM,yBAAyB,CAAC;AAC/H,OAAO,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v1.69.0 -- HYPERSCAN PROTOCOL.
|
|
3
|
+
*
|
|
4
|
+
* Four wild moves to close the prose-scan gap + Q&A trust gap +
|
|
5
|
+
* HTC coverage gap. Plus a shape-shifting molecule data structure
|
|
6
|
+
* that supports 5 mixed retrieval algorithms.
|
|
7
|
+
*
|
|
8
|
+
* H1 prose shadow scan entity extraction from prose claims
|
|
9
|
+
* H2 cross-citation every named entity needs codebase evidence
|
|
10
|
+
* H3 cross-source Q&A fuse retrieval across 5 source kinds
|
|
11
|
+
* H4 nucleus dust HTC auto-populate HTC coverage 0% -> ~100%
|
|
12
|
+
* MOLECULE text/vector/structural/temporal forms
|
|
13
|
+
*/
|
|
14
|
+
export * as proseShadow from "./prose_shadow.js";
|
|
15
|
+
export * as crossCitation from "./cross_citation.js";
|
|
16
|
+
export * as crossSourceQa from "./cross_source_qa.js";
|
|
17
|
+
export * as nucleusDustHtc from "./nucleus_dust_htc.js";
|
|
18
|
+
export * as hyperscanMolecule from "./hyperscan_molecule.js";
|
|
19
|
+
export * as bench from "./bench.js";
|
|
20
|
+
export { proseScan, extractEntities } from "./prose_shadow.js";
|
|
21
|
+
export { crossCitationGround, parseTriples } from "./cross_citation.js";
|
|
22
|
+
export { crossSourceAsk } from "./cross_source_qa.js";
|
|
23
|
+
export { generateDust, computeCoverage, clusterDust, readAbstracts } from "./nucleus_dust_htc.js";
|
|
24
|
+
export { buildMolecule, query } from "./hyperscan_molecule.js";
|
|
25
|
+
export { runHyperscanBench, renderBench } from "./bench.js";
|
|
26
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/hyperscan/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,KAAK,WAAW,MAAM,mBAAmB,CAAC;AACjD,OAAO,KAAK,aAAa,MAAM,qBAAqB,CAAC;AACrD,OAAO,KAAK,aAAa,MAAM,sBAAsB,CAAC;AACtD,OAAO,KAAK,cAAc,MAAM,uBAAuB,CAAC;AACxD,OAAO,KAAK,iBAAiB,MAAM,yBAAyB,CAAC;AAC7D,OAAO,KAAK,KAAK,MAAM,YAAY,CAAC;AAEpC,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,mBAAmB,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,eAAe,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAClG,OAAO,EAAE,aAAa,EAAE,KAAK,EAAkE,MAAM,yBAAyB,CAAC;AAC/H,OAAO,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC"}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v1.69.0 -- HYPERSCAN H4: NUCLEUS DUST HTC.
|
|
3
|
+
*
|
|
4
|
+
* Wild idea: HTC's coverage is 0% on fresh repos because the user
|
|
5
|
+
* must manually run `htc-build populate Layer 1`. We make it automatic.
|
|
6
|
+
*
|
|
7
|
+
* "Nucleus dust" -- every observable artifact gets compressed into a
|
|
8
|
+
* tiny HTC molecule on the fly, NO manual step. The DUST accumulates
|
|
9
|
+
* silently as Mneme runs.
|
|
10
|
+
*
|
|
11
|
+
* - Every commit subject -> Layer 1 abstract (heuristic, no LLM)
|
|
12
|
+
* - Every file's top docstring -> Layer 1 abstract for the file
|
|
13
|
+
* - Auto-cluster by token-similarity into Layer 2
|
|
14
|
+
* - Concatenate Layer 2 into Layer 3 (memoir) on demand
|
|
15
|
+
*
|
|
16
|
+
* Layer 1 abstracts are HEURISTIC (no LLM call) -- "feat(area): summary"
|
|
17
|
+
* commits already carry their condensation. For prose commits we
|
|
18
|
+
* truncate to 30 tokens. This is a "free path" alternative to the
|
|
19
|
+
* LLM-driven layer.
|
|
20
|
+
*
|
|
21
|
+
* Coverage tracking: total abstracts / (total commits + total source
|
|
22
|
+
* files). Targets >= 80% on any healthy repo.
|
|
23
|
+
*/
|
|
24
|
+
export interface DustAbstract {
|
|
25
|
+
/** Stable id: hash of source. */
|
|
26
|
+
id: string;
|
|
27
|
+
/** What got compressed: commit hash, file path, etc. */
|
|
28
|
+
source: {
|
|
29
|
+
kind: "commit" | "file-docstring";
|
|
30
|
+
ref: string;
|
|
31
|
+
};
|
|
32
|
+
/** The abstract text (<=30 tokens). */
|
|
33
|
+
abstract: string;
|
|
34
|
+
/** Estimated token count. */
|
|
35
|
+
tokenCount: number;
|
|
36
|
+
/** ISO ts. */
|
|
37
|
+
generatedAt: string;
|
|
38
|
+
}
|
|
39
|
+
export interface DustCluster {
|
|
40
|
+
clusterId: string;
|
|
41
|
+
/** Topic label inferred from member abstracts. */
|
|
42
|
+
label: string;
|
|
43
|
+
/** Members. */
|
|
44
|
+
memberIds: string[];
|
|
45
|
+
/** Token count of the cluster summary. */
|
|
46
|
+
tokenCount: number;
|
|
47
|
+
}
|
|
48
|
+
export interface CoverageStats {
|
|
49
|
+
totalCommits: number;
|
|
50
|
+
totalFiles: number;
|
|
51
|
+
abstractsGenerated: number;
|
|
52
|
+
coveragePct: number;
|
|
53
|
+
updatedAt: string;
|
|
54
|
+
}
|
|
55
|
+
/** Idempotent: generates abstracts for everything not yet covered. */
|
|
56
|
+
export declare function generateDust(repoRoot: string, opts?: {
|
|
57
|
+
maxCommits?: number;
|
|
58
|
+
maxFiles?: number;
|
|
59
|
+
}): {
|
|
60
|
+
added: number;
|
|
61
|
+
abstracts: DustAbstract[];
|
|
62
|
+
};
|
|
63
|
+
export declare function readAbstracts(repoRoot: string): DustAbstract[];
|
|
64
|
+
/** Compute coverage = abstracts / (commits + source files). */
|
|
65
|
+
export declare function computeCoverage(repoRoot: string): CoverageStats;
|
|
66
|
+
/** Auto-cluster abstracts by token overlap. Lightweight; no LLM. */
|
|
67
|
+
export declare function clusterDust(repoRoot: string, opts?: {
|
|
68
|
+
jaccardThreshold?: number;
|
|
69
|
+
maxClusters?: number;
|
|
70
|
+
}): DustCluster[];
|
|
71
|
+
//# sourceMappingURL=nucleus_dust_htc.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"nucleus_dust_htc.d.ts","sourceRoot":"","sources":["../../src/hyperscan/nucleus_dust_htc.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAYH,MAAM,WAAW,YAAY;IAC3B,iCAAiC;IACjC,EAAE,EAAE,MAAM,CAAC;IACX,wDAAwD;IACxD,MAAM,EAAE;QAAE,IAAI,EAAE,QAAQ,GAAG,gBAAgB,CAAC;QAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAC;IAC3D,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,6BAA6B;IAC7B,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc;IACd,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,WAAW;IAC1B,SAAS,EAAE,MAAM,CAAC;IAClB,kDAAkD;IAClD,KAAK,EAAE,MAAM,CAAC;IACd,eAAe;IACf,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,0CAA0C;IAC1C,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,aAAa;IAC5B,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB;AAqED,sEAAsE;AACtE,wBAAgB,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE;IAAE,UAAU,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,YAAY,EAAE,CAAA;CAAE,CAsC9I;AAED,wBAAgB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,YAAY,EAAE,CAW9D;AAED,+DAA+D;AAC/D,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,CAoB/D;AAED,oEAAoE;AACpE,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE;IAAE,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,WAAW,EAAE,CA4CvH"}
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v1.69.0 -- HYPERSCAN H4: NUCLEUS DUST HTC.
|
|
3
|
+
*
|
|
4
|
+
* Wild idea: HTC's coverage is 0% on fresh repos because the user
|
|
5
|
+
* must manually run `htc-build populate Layer 1`. We make it automatic.
|
|
6
|
+
*
|
|
7
|
+
* "Nucleus dust" -- every observable artifact gets compressed into a
|
|
8
|
+
* tiny HTC molecule on the fly, NO manual step. The DUST accumulates
|
|
9
|
+
* silently as Mneme runs.
|
|
10
|
+
*
|
|
11
|
+
* - Every commit subject -> Layer 1 abstract (heuristic, no LLM)
|
|
12
|
+
* - Every file's top docstring -> Layer 1 abstract for the file
|
|
13
|
+
* - Auto-cluster by token-similarity into Layer 2
|
|
14
|
+
* - Concatenate Layer 2 into Layer 3 (memoir) on demand
|
|
15
|
+
*
|
|
16
|
+
* Layer 1 abstracts are HEURISTIC (no LLM call) -- "feat(area): summary"
|
|
17
|
+
* commits already carry their condensation. For prose commits we
|
|
18
|
+
* truncate to 30 tokens. This is a "free path" alternative to the
|
|
19
|
+
* LLM-driven layer.
|
|
20
|
+
*
|
|
21
|
+
* Coverage tracking: total abstracts / (total commits + total source
|
|
22
|
+
* files). Targets >= 80% on any healthy repo.
|
|
23
|
+
*/
|
|
24
|
+
import { existsSync, readFileSync, readdirSync, statSync, writeFileSync, mkdirSync } from "node:fs";
|
|
25
|
+
import { join } from "node:path";
|
|
26
|
+
import { execSync } from "node:child_process";
|
|
27
|
+
import { createHash } from "node:crypto";
|
|
28
|
+
const DUST_DIR = ".mneme/hyperscan/htc-dust";
|
|
29
|
+
const ABSTRACTS_FILE = ".mneme/hyperscan/htc-dust/abstracts.jsonl";
|
|
30
|
+
const CLUSTERS_FILE = ".mneme/hyperscan/htc-dust/clusters.json";
|
|
31
|
+
const COVERAGE_FILE = ".mneme/hyperscan/htc-dust/coverage.json";
|
|
32
|
+
function ensureDir(repoRoot) {
|
|
33
|
+
const d = join(repoRoot, DUST_DIR);
|
|
34
|
+
if (!existsSync(d))
|
|
35
|
+
mkdirSync(d, { recursive: true });
|
|
36
|
+
}
|
|
37
|
+
function estTokenCount(s) {
|
|
38
|
+
return Math.max(1, Math.round(s.split(/\s+/).length * 1.3));
|
|
39
|
+
}
|
|
40
|
+
/** Heuristic Layer-1 abstract for a commit subject. */
|
|
41
|
+
function abstractCommit(subject, body) {
|
|
42
|
+
// Conventional commit lines are already abstract-shaped.
|
|
43
|
+
if (/^[a-z]+(\([\w-]+\))?:/.test(subject)) {
|
|
44
|
+
return subject.split("\n")[0].slice(0, 140);
|
|
45
|
+
}
|
|
46
|
+
// Otherwise: take first sentence of subject + first line of body.
|
|
47
|
+
const firstSent = subject.split(/(?<=[.!?])\s+/)[0];
|
|
48
|
+
if (body) {
|
|
49
|
+
const firstBodyLine = body.split("\n").find((l) => l.trim().length > 0) ?? "";
|
|
50
|
+
return `${firstSent} (${firstBodyLine.slice(0, 80)})`.slice(0, 140);
|
|
51
|
+
}
|
|
52
|
+
return firstSent.slice(0, 140);
|
|
53
|
+
}
|
|
54
|
+
/** Heuristic Layer-1 abstract for a source-file docstring. */
|
|
55
|
+
function abstractDocstring(path, doc) {
|
|
56
|
+
const fname = path.split(/[\\/]/).pop();
|
|
57
|
+
// Take the first non-empty meaningful line.
|
|
58
|
+
const firstLine = doc.split("\n").map((l) => l.trim()).find((l) => l && !l.startsWith("*") && !/^v\d/.test(l)) ?? doc.slice(0, 80);
|
|
59
|
+
return `${fname}: ${firstLine.slice(0, 100)}`.slice(0, 140);
|
|
60
|
+
}
|
|
61
|
+
function walkSourceFiles(repoRoot, max = 500) {
|
|
62
|
+
const out = [];
|
|
63
|
+
const skip = new Set(["node_modules", ".git", "dist", "build", ".mneme", "coverage"]);
|
|
64
|
+
const walk = (dir) => {
|
|
65
|
+
if (out.length >= max)
|
|
66
|
+
return;
|
|
67
|
+
let entries = [];
|
|
68
|
+
try {
|
|
69
|
+
entries = readdirSync(dir);
|
|
70
|
+
}
|
|
71
|
+
catch {
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
for (const e of entries) {
|
|
75
|
+
if (skip.has(e))
|
|
76
|
+
continue;
|
|
77
|
+
const p = join(dir, e);
|
|
78
|
+
try {
|
|
79
|
+
const s = statSync(p);
|
|
80
|
+
if (s.isDirectory())
|
|
81
|
+
walk(p);
|
|
82
|
+
else if (/\.ts$/.test(e)) {
|
|
83
|
+
try {
|
|
84
|
+
out.push({ path: p, content: readFileSync(p, "utf8") });
|
|
85
|
+
}
|
|
86
|
+
catch { /* */ }
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
catch { /* */ }
|
|
90
|
+
}
|
|
91
|
+
};
|
|
92
|
+
walk(repoRoot);
|
|
93
|
+
return out;
|
|
94
|
+
}
|
|
95
|
+
function readCommits(repoRoot, max = 500) {
|
|
96
|
+
try {
|
|
97
|
+
const r = execSync(`git -C "${repoRoot}" log --max-count=${max} --pretty=format:%H%x09%s%x09%b%n---COMMIT---`, { encoding: "utf8", stdio: ["ignore", "pipe", "ignore"], timeout: 5000 });
|
|
98
|
+
return r.split(/\n---COMMIT---\n/).map((row) => {
|
|
99
|
+
const [hash, subject, ...rest] = row.split("\t");
|
|
100
|
+
if (!hash || !subject)
|
|
101
|
+
return null;
|
|
102
|
+
return { hash, subject, body: rest.join("\t") };
|
|
103
|
+
}).filter((x) => x !== null);
|
|
104
|
+
}
|
|
105
|
+
catch {
|
|
106
|
+
return [];
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
/** Idempotent: generates abstracts for everything not yet covered. */
|
|
110
|
+
export function generateDust(repoRoot, opts) {
|
|
111
|
+
ensureDir(repoRoot);
|
|
112
|
+
const existing = readAbstracts(repoRoot);
|
|
113
|
+
const existingIds = new Set(existing.map((a) => a.id));
|
|
114
|
+
const newAbstracts = [];
|
|
115
|
+
const ts = new Date().toISOString();
|
|
116
|
+
for (const c of readCommits(repoRoot, opts?.maxCommits ?? 500)) {
|
|
117
|
+
const id = createHash("sha256").update(`commit|${c.hash}`).digest("hex").slice(0, 16);
|
|
118
|
+
if (existingIds.has(id))
|
|
119
|
+
continue;
|
|
120
|
+
const abs = abstractCommit(c.subject, c.body);
|
|
121
|
+
newAbstracts.push({
|
|
122
|
+
id,
|
|
123
|
+
source: { kind: "commit", ref: c.hash },
|
|
124
|
+
abstract: abs,
|
|
125
|
+
tokenCount: estTokenCount(abs),
|
|
126
|
+
generatedAt: ts,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
for (const f of walkSourceFiles(repoRoot, opts?.maxFiles ?? 500)) {
|
|
130
|
+
const m = f.content.match(/^\s*\/\*\*([\s\S]*?)\*\//);
|
|
131
|
+
if (!m)
|
|
132
|
+
continue;
|
|
133
|
+
const id = createHash("sha256").update(`file|${f.path}`).digest("hex").slice(0, 16);
|
|
134
|
+
if (existingIds.has(id))
|
|
135
|
+
continue;
|
|
136
|
+
const abs = abstractDocstring(f.path, m[1]);
|
|
137
|
+
newAbstracts.push({
|
|
138
|
+
id,
|
|
139
|
+
source: { kind: "file-docstring", ref: f.path.replace(repoRoot, "").replace(/^[/\\]/, "") },
|
|
140
|
+
abstract: abs,
|
|
141
|
+
tokenCount: estTokenCount(abs),
|
|
142
|
+
generatedAt: ts,
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
if (newAbstracts.length > 0) {
|
|
146
|
+
const fs = require("node:fs");
|
|
147
|
+
fs.appendFileSync(join(repoRoot, ABSTRACTS_FILE), newAbstracts.map((a) => JSON.stringify(a)).join("\n") + "\n", "utf8");
|
|
148
|
+
}
|
|
149
|
+
return { added: newAbstracts.length, abstracts: newAbstracts };
|
|
150
|
+
}
|
|
151
|
+
export function readAbstracts(repoRoot) {
|
|
152
|
+
const p = join(repoRoot, ABSTRACTS_FILE);
|
|
153
|
+
if (!existsSync(p))
|
|
154
|
+
return [];
|
|
155
|
+
const out = [];
|
|
156
|
+
try {
|
|
157
|
+
for (const line of readFileSync(p, "utf8").split("\n")) {
|
|
158
|
+
if (!line.trim())
|
|
159
|
+
continue;
|
|
160
|
+
try {
|
|
161
|
+
out.push(JSON.parse(line));
|
|
162
|
+
}
|
|
163
|
+
catch { /* */ }
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
catch { /* */ }
|
|
167
|
+
return out;
|
|
168
|
+
}
|
|
169
|
+
/** Compute coverage = abstracts / (commits + source files). */
|
|
170
|
+
export function computeCoverage(repoRoot) {
|
|
171
|
+
const commits = readCommits(repoRoot);
|
|
172
|
+
const files = walkSourceFiles(repoRoot);
|
|
173
|
+
const abstracts = readAbstracts(repoRoot);
|
|
174
|
+
const totalCommits = commits.length;
|
|
175
|
+
const totalFiles = files.filter((f) => /\/\*\*/.test(f.content.slice(0, 200))).length;
|
|
176
|
+
const denominator = Math.max(1, totalCommits + totalFiles);
|
|
177
|
+
const coveragePct = (abstracts.length / denominator) * 100;
|
|
178
|
+
const stats = {
|
|
179
|
+
totalCommits,
|
|
180
|
+
totalFiles,
|
|
181
|
+
abstractsGenerated: abstracts.length,
|
|
182
|
+
coveragePct,
|
|
183
|
+
updatedAt: new Date().toISOString(),
|
|
184
|
+
};
|
|
185
|
+
try {
|
|
186
|
+
ensureDir(repoRoot);
|
|
187
|
+
writeFileSync(join(repoRoot, COVERAGE_FILE), JSON.stringify(stats, null, 2) + "\n", "utf8");
|
|
188
|
+
}
|
|
189
|
+
catch { /* */ }
|
|
190
|
+
return stats;
|
|
191
|
+
}
|
|
192
|
+
/** Auto-cluster abstracts by token overlap. Lightweight; no LLM. */
|
|
193
|
+
export function clusterDust(repoRoot, opts) {
|
|
194
|
+
const threshold = opts?.jaccardThreshold ?? 0.3;
|
|
195
|
+
const maxClusters = opts?.maxClusters ?? 50;
|
|
196
|
+
const abstracts = readAbstracts(repoRoot);
|
|
197
|
+
const remaining = new Set(abstracts.map((a) => a.id));
|
|
198
|
+
const idToTokens = new Map();
|
|
199
|
+
for (const a of abstracts) {
|
|
200
|
+
idToTokens.set(a.id, new Set((a.abstract.toLowerCase().match(/[a-z][a-z0-9_-]+/g) ?? []).filter((t) => t.length >= 4)));
|
|
201
|
+
}
|
|
202
|
+
const clusters = [];
|
|
203
|
+
for (const seed of abstracts) {
|
|
204
|
+
if (!remaining.has(seed.id) || clusters.length >= maxClusters)
|
|
205
|
+
continue;
|
|
206
|
+
const seedTokens = idToTokens.get(seed.id);
|
|
207
|
+
const members = [seed.id];
|
|
208
|
+
remaining.delete(seed.id);
|
|
209
|
+
for (const other of abstracts) {
|
|
210
|
+
if (!remaining.has(other.id))
|
|
211
|
+
continue;
|
|
212
|
+
const otherTokens = idToTokens.get(other.id);
|
|
213
|
+
const inter = [...seedTokens].filter((t) => otherTokens.has(t)).length;
|
|
214
|
+
const union = seedTokens.size + otherTokens.size - inter;
|
|
215
|
+
const jac = union === 0 ? 0 : inter / union;
|
|
216
|
+
if (jac >= threshold) {
|
|
217
|
+
members.push(other.id);
|
|
218
|
+
remaining.delete(other.id);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
// Cluster label: most-frequent meaningful token across members.
|
|
222
|
+
const tokenFreq = new Map();
|
|
223
|
+
for (const id of members) {
|
|
224
|
+
for (const t of idToTokens.get(id) ?? [])
|
|
225
|
+
tokenFreq.set(t, (tokenFreq.get(t) ?? 0) + 1);
|
|
226
|
+
}
|
|
227
|
+
const label = [...tokenFreq.entries()].sort((a, b) => b[1] - a[1])[0]?.[0] ?? "general";
|
|
228
|
+
clusters.push({
|
|
229
|
+
clusterId: `cluster-${clusters.length}`,
|
|
230
|
+
label,
|
|
231
|
+
memberIds: members,
|
|
232
|
+
tokenCount: members.length * 5,
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
try {
|
|
236
|
+
ensureDir(repoRoot);
|
|
237
|
+
writeFileSync(join(repoRoot, CLUSTERS_FILE), JSON.stringify(clusters, null, 2) + "\n", "utf8");
|
|
238
|
+
}
|
|
239
|
+
catch { /* */ }
|
|
240
|
+
return clusters;
|
|
241
|
+
}
|
|
242
|
+
//# sourceMappingURL=nucleus_dust_htc.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"nucleus_dust_htc.js","sourceRoot":"","sources":["../../src/hyperscan/nucleus_dust_htc.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,WAAW,EAAE,QAAQ,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACpG,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,MAAM,QAAQ,GAAG,2BAA2B,CAAC;AAC7C,MAAM,cAAc,GAAG,2CAA2C,CAAC;AACnE,MAAM,aAAa,GAAG,yCAAyC,CAAC;AAChE,MAAM,aAAa,GAAG,yCAAyC,CAAC;AAiChE,SAAS,SAAS,CAAC,QAAgB;IACjC,MAAM,CAAC,GAAG,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IACnC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;QAAE,SAAS,CAAC,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;AACxD,CAAC;AAED,SAAS,aAAa,CAAC,CAAS;IAC9B,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC;AAC9D,CAAC;AAED,uDAAuD;AACvD,SAAS,cAAc,CAAC,OAAe,EAAE,IAAY;IACnD,yDAAyD;IACzD,IAAI,uBAAuB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1C,OAAO,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC/C,CAAC;IACD,kEAAkE;IAClE,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC,CAAE,CAAC;IACrD,IAAI,IAAI,EAAE,CAAC;QACT,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAC9E,OAAO,GAAG,SAAS,KAAK,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACtE,CAAC;IACD,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AACjC,CAAC;AAED,8DAA8D;AAC9D,SAAS,iBAAiB,CAAC,IAAY,EAAE,GAAW;IAClD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,GAAG,EAAG,CAAC;IACzC,4CAA4C;IAC5C,MAAM,SAAS,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACnI,OAAO,GAAG,KAAK,KAAK,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AAC9D,CAAC;AAED,SAAS,eAAe,CAAC,QAAgB,EAAE,GAAG,GAAG,GAAG;IAClD,MAAM,GAAG,GAA6C,EAAE,CAAC;IACzD,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,CAAC,cAAc,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC,CAAC;IACtF,MAAM,IAAI,GAAG,CAAC,GAAW,EAAE,EAAE;QAC3B,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG;YAAE,OAAO;QAC9B,IAAI,OAAO,GAAa,EAAE,CAAC;QAC3B,IAAI,CAAC;YAAC,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;QAAC,CAAC;QAAC,MAAM,CAAC;YAAC,OAAO;QAAC,CAAC;QACrD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;gBAAE,SAAS;YAC1B,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;YACvB,IAAI,CAAC;gBACH,MAAM,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;gBACtB,IAAI,CAAC,CAAC,WAAW,EAAE;oBAAE,IAAI,CAAC,CAAC,CAAC,CAAC;qBACxB,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;oBACzB,IAAI,CAAC;wBAAC,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;oBAAC,CAAC;oBAAC,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC;gBAClF,CAAC;YACH,CAAC;YAAC,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC;QACnB,CAAC;IACH,CAAC,CAAC;IACF,IAAI,CAAC,QAAQ,CAAC,CAAC;IACf,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,WAAW,CAAC,QAAgB,EAAE,GAAG,GAAG,GAAG;IAC9C,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,QAAQ,CAAC,WAAW,QAAQ,qBAAqB,GAAG,+CAA+C,EAC3G,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5E,OAAO,CAAC,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;YAC7C,MAAM,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACjD,IAAI,CAAC,IAAI,IAAI,CAAC,OAAO;gBAAE,OAAO,IAAI,CAAC;YACnC,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QAClD,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAwD,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;IACrF,CAAC;IAAC,MAAM,CAAC;QAAC,OAAO,EAAE,CAAC;IAAC,CAAC;AACxB,CAAC;AAED,sEAAsE;AACtE,MAAM,UAAU,YAAY,CAAC,QAAgB,EAAE,IAAiD;IAC9F,SAAS,CAAC,QAAQ,CAAC,CAAC;IACpB,MAAM,QAAQ,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACzC,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACvD,MAAM,YAAY,GAAmB,EAAE,CAAC;IACxC,MAAM,EAAE,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAEpC,KAAK,MAAM,CAAC,IAAI,WAAW,CAAC,QAAQ,EAAE,IAAI,EAAE,UAAU,IAAI,GAAG,CAAC,EAAE,CAAC;QAC/D,MAAM,EAAE,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACtF,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;YAAE,SAAS;QAClC,MAAM,GAAG,GAAG,cAAc,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;QAC9C,YAAY,CAAC,IAAI,CAAC;YAChB,EAAE;YACF,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,EAAE,CAAC,CAAC,IAAI,EAAE;YACvC,QAAQ,EAAE,GAAG;YACb,UAAU,EAAE,aAAa,CAAC,GAAG,CAAC;YAC9B,WAAW,EAAE,EAAE;SAChB,CAAC,CAAC;IACL,CAAC;IACD,KAAK,MAAM,CAAC,IAAI,eAAe,CAAC,QAAQ,EAAE,IAAI,EAAE,QAAQ,IAAI,GAAG,CAAC,EAAE,CAAC;QACjE,MAAM,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QACtD,IAAI,CAAC,CAAC;YAAE,SAAS;QACjB,MAAM,EAAE,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACpF,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;YAAE,SAAS;QAClC,MAAM,GAAG,GAAG,iBAAiB,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;QAC7C,YAAY,CAAC,IAAI,CAAC;YAChB,EAAE;YACF,MAAM,EAAE,EAAE,IAAI,EAAE,gBAAgB,EAAE,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,EAAE;YAC3F,QAAQ,EAAE,GAAG;YACb,UAAU,EAAE,aAAa,CAAC,GAAG,CAAC;YAC9B,WAAW,EAAE,EAAE;SAChB,CAAC,CAAC;IACL,CAAC;IACD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,EAAE,GAAG,OAAO,CAAC,SAAS,CAA6B,CAAC;QAC1D,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,QAAQ,EAAE,cAAc,CAAC,EAAE,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,MAAM,CAAC,CAAC;IAC1H,CAAC;IACD,OAAO,EAAE,KAAK,EAAE,YAAY,CAAC,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,CAAC;AACjE,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,QAAgB;IAC5C,MAAM,CAAC,GAAG,IAAI,CAAC,QAAQ,EAAE,cAAc,CAAC,CAAC;IACzC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;QAAE,OAAO,EAAE,CAAC;IAC9B,MAAM,GAAG,GAAmB,EAAE,CAAC;IAC/B,IAAI,CAAC;QACH,KAAK,MAAM,IAAI,IAAI,YAAY,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YACvD,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;gBAAE,SAAS;YAC3B,IAAI,CAAC;gBAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAiB,CAAC,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;IAAC,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC;IACjB,OAAO,GAAG,CAAC;AACb,CAAC;AAED,+DAA+D;AAC/D,MAAM,UAAU,eAAe,CAAC,QAAgB;IAC9C,MAAM,OAAO,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,KAAK,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC;IACxC,MAAM,SAAS,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IAC1C,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC;IACpC,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACtF,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,YAAY,GAAG,UAAU,CAAC,CAAC;IAC3D,MAAM,WAAW,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,WAAW,CAAC,GAAG,GAAG,CAAC;IAC3D,MAAM,KAAK,GAAkB;QAC3B,YAAY;QACZ,UAAU;QACV,kBAAkB,EAAE,SAAS,CAAC,MAAM;QACpC,WAAW;QACX,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACpC,CAAC;IACF,IAAI,CAAC;QACH,SAAS,CAAC,QAAQ,CAAC,CAAC;QACpB,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,aAAa,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,EAAE,MAAM,CAAC,CAAC;IAC9F,CAAC;IAAC,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC;IACjB,OAAO,KAAK,CAAC;AACf,CAAC;AAED,oEAAoE;AACpE,MAAM,UAAU,WAAW,CAAC,QAAgB,EAAE,IAA0D;IACtG,MAAM,SAAS,GAAG,IAAI,EAAE,gBAAgB,IAAI,GAAG,CAAC;IAChD,MAAM,WAAW,GAAG,IAAI,EAAE,WAAW,IAAI,EAAE,CAAC;IAC5C,MAAM,SAAS,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IAC1C,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACtD,MAAM,UAAU,GAAG,IAAI,GAAG,EAAuB,CAAC;IAClD,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,mBAAmB,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1H,CAAC;IACD,MAAM,QAAQ,GAAkB,EAAE,CAAC;IACnC,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,QAAQ,CAAC,MAAM,IAAI,WAAW;YAAE,SAAS;QACxE,MAAM,UAAU,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAE,CAAC;QAC5C,MAAM,OAAO,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC1B,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC1B,KAAK,MAAM,KAAK,IAAI,SAAS,EAAE,CAAC;YAC9B,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;gBAAE,SAAS;YACvC,MAAM,WAAW,GAAG,UAAU,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAE,CAAC;YAC9C,MAAM,KAAK,GAAG,CAAC,GAAG,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YACvE,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,GAAG,WAAW,CAAC,IAAI,GAAG,KAAK,CAAC;YACzD,MAAM,GAAG,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC;YAC5C,IAAI,GAAG,IAAI,SAAS,EAAE,CAAC;gBACrB,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;gBACvB,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QACD,gEAAgE;QAChE,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC5C,KAAK,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;YACzB,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,EAAE;gBAAE,SAAS,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC1F,CAAC;QACD,MAAM,KAAK,GAAG,CAAC,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC;QACxF,QAAQ,CAAC,IAAI,CAAC;YACZ,SAAS,EAAE,WAAW,QAAQ,CAAC,MAAM,EAAE;YACvC,KAAK;YACL,SAAS,EAAE,OAAO;YAClB,UAAU,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC;SAC/B,CAAC,CAAC;IACL,CAAC;IACD,IAAI,CAAC;QACH,SAAS,CAAC,QAAQ,CAAC,CAAC;QACpB,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,aAAa,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,EAAE,MAAM,CAAC,CAAC;IACjG,CAAC;IAAC,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC;IACjB,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v1.69.0 -- HYPERSCAN H1: PROSE SHADOW SCAN.
|
|
3
|
+
*
|
|
4
|
+
* Wild idea: the v1.65 antivirus catches claims with syntax markers
|
|
5
|
+
* (parens, file-extensions, version strings). It MISSES prose like:
|
|
6
|
+
*
|
|
7
|
+
* "wraith-utils-2099 is integrated for caching" -- fake npm package
|
|
8
|
+
* "Sentry catches our errors" -- general service name
|
|
9
|
+
* "we use Datadog for APM" -- general service name
|
|
10
|
+
* "RustCrypto's libcrypto powers our hash" -- fake library claim
|
|
11
|
+
*
|
|
12
|
+
* Prose Shadow extracts ENTITY CANDIDATES from any text using three
|
|
13
|
+
* mixed algorithms (title-case detector + package-shape pattern +
|
|
14
|
+
* acronym matcher) and verifies each against:
|
|
15
|
+
*
|
|
16
|
+
* 1. Local citations -- does this name appear in package.json,
|
|
17
|
+
* imports, env vars, or source files?
|
|
18
|
+
* 2. Domain authority -- is it a known real service? (curated bank)
|
|
19
|
+
* 3. Negation triggers -- does the prose attribute behavior to a
|
|
20
|
+
* name that fails grounding?
|
|
21
|
+
*
|
|
22
|
+
* Output: list of suspect ENTITIES (not just regex matches) with a
|
|
23
|
+
* confidence band. This is the missing class the user identified.
|
|
24
|
+
*/
|
|
25
|
+
export interface EntityCandidate {
|
|
26
|
+
surface: string;
|
|
27
|
+
kind: "title-cased" | "package-shape" | "acronym" | "domain-suffixed";
|
|
28
|
+
/** Position in original text. */
|
|
29
|
+
offset: number;
|
|
30
|
+
}
|
|
31
|
+
export interface ProseSuspect {
|
|
32
|
+
entity: string;
|
|
33
|
+
kind: EntityCandidate["kind"];
|
|
34
|
+
/** Why this entity is suspect (none-of-our-evidence-supports-it / not-in-deps / etc.). */
|
|
35
|
+
reason: string;
|
|
36
|
+
/** 0..1; higher = stronger suspicion. */
|
|
37
|
+
confidence: number;
|
|
38
|
+
/** Citation hits found (helps caller decide). */
|
|
39
|
+
citationsFound: string[];
|
|
40
|
+
}
|
|
41
|
+
export interface ProseScanReport {
|
|
42
|
+
entitiesExtracted: number;
|
|
43
|
+
suspects: ProseSuspect[];
|
|
44
|
+
/** Whitelisted "known-real" entities the prose mentions. */
|
|
45
|
+
recognized: string[];
|
|
46
|
+
ms: number;
|
|
47
|
+
}
|
|
48
|
+
export declare function extractEntities(text: string): EntityCandidate[];
|
|
49
|
+
export declare function proseScan(repoRoot: string, claim: string): ProseScanReport;
|
|
50
|
+
//# sourceMappingURL=prose_shadow.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prose_shadow.d.ts","sourceRoot":"","sources":["../../src/hyperscan/prose_shadow.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAKH,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,aAAa,GAAG,eAAe,GAAG,SAAS,GAAG,iBAAiB,CAAC;IACtE,iCAAiC;IACjC,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,eAAe,CAAC,MAAM,CAAC,CAAC;IAC9B,0FAA0F;IAC1F,MAAM,EAAE,MAAM,CAAC;IACf,yCAAyC;IACzC,UAAU,EAAE,MAAM,CAAC;IACnB,iDAAiD;IACjD,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,eAAe;IAC9B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,QAAQ,EAAE,YAAY,EAAE,CAAC;IACzB,4DAA4D;IAC5D,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,EAAE,EAAE,MAAM,CAAC;CACZ;AA6CD,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,eAAe,EAAE,CAiB/D;AA4ED,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,eAAe,CAuC1E"}
|