@futdevpro/fdp-agent-memory 0.1.0 → 1.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +7 -7
- package/build/package.json +6 -5
- package/build/src/_cli/_collections/fam-arg.util.js +48 -0
- package/build/src/_cli/_collections/fam-cli.const.js +40 -0
- package/build/src/_cli/_collections/fam-output.util.js +86 -0
- package/build/src/_cli/_collections/fam-project-discovery.util.js +98 -0
- package/build/src/_cli/_commands/capture.command.js +73 -0
- package/build/src/_cli/_commands/config.command.js +93 -0
- package/build/src/_cli/_commands/doctor.command.js +124 -0
- package/build/src/_cli/_commands/errors.command.js +66 -0
- package/build/src/_cli/_commands/export.command.js +65 -0
- package/build/src/_cli/_commands/find-duplicates.command.js +97 -0
- package/build/src/_cli/_commands/import.command.js +136 -0
- package/build/src/_cli/_commands/init.command.js +147 -0
- package/build/src/_cli/_commands/read.command.js +109 -0
- package/build/src/_cli/_commands/scan-projects.command.js +138 -0
- package/build/src/_cli/_commands/scan.command.js +98 -0
- package/build/src/_cli/_commands/seed.command.js +40 -0
- package/build/src/_cli/_commands/serve.command.js +373 -0
- package/build/src/_cli/_commands/start.command.js +134 -0
- package/build/src/_cli/_commands/stats.command.js +54 -0
- package/build/src/_cli/_commands/write.command.js +103 -0
- package/build/src/_cli/_models/interfaces/fam-cli-global-options.interface.js +2 -0
- package/build/src/_cli/_models/interfaces/fam-cli-output.interface.js +9 -0
- package/build/src/_cli/_models/interfaces/fam-client-result.interface.js +2 -0
- package/build/src/_cli/_services/fam-client.service.js +140 -0
- package/build/src/_cli/register-commands.js +86 -0
- package/build/src/_collections/config-catalog.const.js +67 -1
- package/build/src/_collections/fam-console.util.js +367 -0
- package/build/src/_collections/fam-entry-bootstrap.util.js +158 -4
- package/build/src/_collections/fam-error-factory.util.js +0 -9
- package/build/src/_collections/fam-mcp-bridge.util.js +49 -0
- package/build/src/_collections/fam-reference-code.util.js +105 -0
- package/build/src/_collections/fam-version.const.js +10 -0
- package/build/src/_models/data-models/fam-entry-base-properties.const.js +1 -0
- package/build/src/_models/data-models/fam-entry.data-model.js +6 -0
- package/build/src/_models/data-models/fam-ingest-run.data-model.js +3 -1
- package/build/src/_models/data-models/fam-reference.data-model.js +7 -0
- package/build/src/_modules/capture/_collections/fam-capture.const.js +11 -0
- package/build/src/_modules/capture/_services/fam-auto-capture.control-service.js +87 -0
- package/build/src/_modules/capture/index.js +8 -0
- package/build/src/_modules/embedding/_collections/fam-embedding-prefix.util.js +77 -0
- package/build/src/_modules/embedding/_services/fam-duplicate-scan.control-service.js +202 -0
- package/build/src/_modules/embedding/_services/fam-embedding-pipeline.control-service.js +33 -9
- package/build/src/_modules/embedding/_services/fam-embedding.control-service.js +21 -2
- package/build/src/_modules/embedding/_services/fam-entry.data-service.js +135 -0
- package/build/src/_modules/embedding/_services/fam-vector-search.control-service.js +42 -32
- package/build/src/_modules/embedding/index.js +4 -1
- package/build/src/_modules/export/_collections/fam-export.const.js +22 -0
- package/build/src/_modules/export/_services/fam-export.control-service.js +64 -0
- package/build/src/_modules/export/index.js +8 -0
- package/build/src/_modules/ingest/_collections/fam-famignore.util.js +83 -0
- package/build/src/_modules/ingest/_collections/fam-file-routing.util.js +59 -48
- package/build/src/_modules/ingest/_collections/fam-git-repo.util.js +193 -0
- package/build/src/_modules/ingest/_collections/fam-project-identity.util.js +134 -0
- package/build/src/_modules/ingest/_collections/fam-scan-progress.util.js +57 -0
- package/build/src/_modules/ingest/_collections/fam-scan-summary.util.js +60 -0
- package/build/src/_modules/ingest/_collections/fam-scan-weight.util.js +53 -0
- package/build/src/_modules/ingest/_collections/fam-secret-exclude.util.js +37 -14
- package/build/src/_modules/ingest/_collections/fam-sliding-chunker.util.js +34 -0
- package/build/src/_modules/ingest/_collections/fam-ts-chunker.util.js +200 -14
- package/build/src/_modules/ingest/_services/fam-delta-compare.util.js +4 -1
- package/build/src/_modules/ingest/_services/fam-ingest-run.data-service.js +7 -4
- package/build/src/_modules/ingest/_services/fam-ingest.control-service.js +349 -17
- package/build/src/_modules/ingest/_services/fam-scan.control-service.js +25 -2
- package/build/src/_modules/ingest/index.js +3 -1
- package/build/src/_modules/mcp/_collections/fam-active-rules.util.js +56 -0
- package/build/src/_modules/mcp/_collections/fam-core-tools.const.js +47 -6
- package/build/src/_modules/mcp/_services/fam-capabilities-tool.service.js +4 -4
- package/build/src/_modules/mcp/_services/fam-capability-registry.service.js +224 -18
- package/build/src/_modules/mcp/_services/fam-mcp-adapter.service.js +4 -4
- package/build/src/_modules/mcp/_services/fam-mcp-server.service.js +4 -4
- package/build/src/_modules/mcp/_services/fam-read-tool.service.js +53 -1
- package/build/src/_modules/mcp/_services/fam-write-tool.service.js +104 -8
- package/build/src/_modules/mcp/index.js +4 -4
- package/build/src/_modules/migration/_collections/fam-claude-mem-normalize.util.js +66 -3
- package/build/src/_modules/migration/_collections/fam-prompt-aggregate.util.js +143 -0
- package/build/src/_modules/migration/_collections/fam-target-mapping.util.js +19 -0
- package/build/src/_modules/migration/_enums/fam-claude-mem-source.type-enum.js +6 -0
- package/build/src/_modules/migration/_models/interfaces/fam-claude-mem.interface.js +5 -0
- package/build/src/_modules/migration/_services/fam-agent-memory-reader.service.js +125 -0
- package/build/src/_modules/migration/_services/fam-claude-mem-import.control-service.js +101 -18
- package/build/src/_modules/migration/_services/fam-import-dedup.data-service.js +53 -0
- package/build/src/_modules/migration/index.js +3 -1
- package/build/src/_modules/retrieval/_services/fam-retrieval-candidate.data-service.js +78 -4
- package/build/src/_modules/retrieval/_services/fam-retrieval.control-service.js +293 -50
- package/build/src/_modules/scope-reference/_collections/fam-scope-normalize.util.js +6 -3
- package/build/src/_modules/scope-reference/_services/fam-reference.data-service.js +18 -0
- package/build/src/_modules/scope-reference/_services/fam-scope-resolver.control-service.js +79 -20
- package/build/src/_routes/server/api/api.controller.js +34 -2
- package/build/src/_routes/server/client-app/client-app.control-service.js +1 -1
- package/build/src/_routes/server/server-status/server-status.controller.js +2 -1
- package/build/src/app.server.js +13 -1
- package/build/src/environments/environment.js +1 -1
- package/build/src/index.js +1 -1
- package/client-dist/{chunk-GHKRM4SM.js → chunk-I77GXVAQ.js} +1 -1
- package/client-dist/{chunk-LMTL7GA3.js → chunk-YXHWCJ5O.js} +1 -1
- package/client-dist/index.html +1 -1
- package/client-dist/{main-2KWB3QYK.js → main-PJPEDVJT.js} +1 -1
- package/package.json +6 -5
|
@@ -3,9 +3,17 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.FAM_SecretExclude_Util = void 0;
|
|
4
4
|
/**
|
|
5
5
|
* `FAM_SecretExclude_Util` (SP-4.1, dsgn-004 §6.3) — a scan-réteg **fájlnév-/útvonal-alapú**
|
|
6
|
-
* secret-kizárása. A felsorolt minták (`.env*`, `*.key`, `*.pem`, `*.p12`, `id_rsa*`,
|
|
7
|
-
*
|
|
8
|
-
* routing **ELŐTT** fut, és a kizárt fájlok a `filesSkipped`-be számolódnak
|
|
6
|
+
* secret-kizárása. A felsorolt minták (`.env*`, `*.key`, `*.pem`, `*.p12`, `id_rsa*`, `*.crt`,
|
|
7
|
+
* `secrets/` mappa + a `secret`/`credential` névrész NEM-forrásfájlokon) **SOHA** nem ingestálódnak;
|
|
8
|
+
* a kizárás a file-típus-routing **ELŐTT** fut, és a kizárt fájlok a `filesSkipped`-be számolódnak
|
|
9
|
+
* (audit, NEM néma).
|
|
10
|
+
*
|
|
11
|
+
* **FALSE-POSITIVE FIX (2026-06-20 audit):** a korábbi `*secret*` / `*credential*` substring-minta
|
|
12
|
+
* **bármely** forrásfájlt is kizárt, aminek a neve tartalmazta ezeket (pl. maga a
|
|
13
|
+
* `fam-secret-exclude.util.ts`, vagy egy `secret-manager.service.ts`) → kód-RAG-lefedettség-rés. Ezért
|
|
14
|
+
* a `secret`/`credential` névrész-minta MOST CSAK **nem-forrás/doc-kiterjesztésekre** alkalmazódik
|
|
15
|
+
* (egy `.ts`/`.md` fájl, aminek a neve „secret", az KÓD a titokról, NEM titok-fájl). A valódi titok-
|
|
16
|
+
* fájlok (`.env`/`.key`/`.pem`/…) kiterjesztés-/névalapon továbbra is mindig kizártak.
|
|
9
17
|
*
|
|
10
18
|
* **HATÁR (dsgn-004 §6 megjegyzés + `MP-4-detailed` kockázat) — KIFEJEZETTEN BACKLOG:** ez CSAK
|
|
11
19
|
* fájlnév-/útvonal-alapú kizárás. A **tartalmi** PII/secret-detektálás (a fájlon BELÜLI titkok
|
|
@@ -15,12 +23,10 @@ exports.FAM_SecretExclude_Util = void 0;
|
|
|
15
23
|
*/
|
|
16
24
|
class FAM_SecretExclude_Util {
|
|
17
25
|
/**
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
* ettől FÜGGETLENÜL is alkalmazza őket (a config-override NEM kapcsolhatja ki a secret-védelmet).
|
|
21
|
-
* A vizsgálat a fájlnévre (regex) + az útvonal-szegmensekre (`secrets/`) megy.
|
|
26
|
+
* HARD secret-fájlnév-minták (dsgn-004 §6.3) — **MINDIG** kizárnak, kiterjesztéstől függetlenül:
|
|
27
|
+
* ezek a fájlok maguk a titkok (kulcs/cert/env). A config-override NEM kapcsolhatja ki őket.
|
|
22
28
|
*/
|
|
23
|
-
static
|
|
29
|
+
static HARD_SECRET_PATTERNS = [
|
|
24
30
|
/^\.env(\..*)?$/i, // .env, .env.local, .env.production, ...
|
|
25
31
|
/\.key$/i, // *.key
|
|
26
32
|
/\.pem$/i, // *.pem
|
|
@@ -29,14 +35,24 @@ class FAM_SecretExclude_Util {
|
|
|
29
35
|
/\.pfx$/i, // *.pfx (cert)
|
|
30
36
|
/^id_rsa.*$/i, // id_rsa, id_rsa.pub, ...
|
|
31
37
|
/^id_ed25519.*$/i, // id_ed25519, id_ed25519.pub
|
|
32
|
-
/secret/i, // *secret* (bárhol a fájlnévben)
|
|
33
|
-
/credential/i, // *credential* (bárhol a fájlnévben)
|
|
34
38
|
];
|
|
39
|
+
/**
|
|
40
|
+
* SOFT névrész-minták (`secret`/`credential` bárhol a fájlnévben) — CSAK NEM-forrás/doc-fájlokra
|
|
41
|
+
* (pl. `secrets.json`, `my-credential.txt`). Egy `.ts`/`.md` fájl, aminek a neve „secret", az KÓD/
|
|
42
|
+
* DOC a titokról (legit forrás), NEM titok → ingestálódik (false-positive fix, lásd osztály-jsdoc).
|
|
43
|
+
*/
|
|
44
|
+
static SOFT_SECRET_NAME_PATTERNS = [
|
|
45
|
+
/secret/i, // *secret* (csak nem-forrás kiterjesztésen)
|
|
46
|
+
/credential/i, // *credential* (csak nem-forrás kiterjesztésen)
|
|
47
|
+
];
|
|
48
|
+
/** Ismert FORRÁS-/DOC-kiterjesztések — ezeken a SOFT névrész-minta NEM zár ki (legit kód/doc). */
|
|
49
|
+
static SOURCE_DOC_EXTENSION = /\.(ts|tsx|mts|cts|js|jsx|mjs|cjs|md|markdown)$/i;
|
|
35
50
|
/** A secret-mappák (az útvonal BÁRMELY szegmense — pl. `secrets/`, `.ssh/`). */
|
|
36
51
|
static SECRET_DIR_SEGMENTS = ['secrets', '.ssh', '.gnupg'];
|
|
37
52
|
/**
|
|
38
|
-
* Secret-e a (projekt-relatív, `/`-normalizált) útvonal —
|
|
39
|
-
*
|
|
53
|
+
* Secret-e a (projekt-relatív, `/`-normalizált) útvonal — secret-mappa-szegmens VAGY HARD-minta
|
|
54
|
+
* VAGY (nem-forrásfájlon) SOFT névrész-minta alapján (dsgn-004 §6.3). `true` → SOHA nem ingestálódik
|
|
55
|
+
* (`filesSkipped`, audit).
|
|
40
56
|
*/
|
|
41
57
|
static isSecret(relativePath) {
|
|
42
58
|
const normalized = relativePath.replace(/\\/g, '/');
|
|
@@ -47,8 +63,15 @@ class FAM_SecretExclude_Util {
|
|
|
47
63
|
if (dirSegments.some((segment) => FAM_SecretExclude_Util.SECRET_DIR_SEGMENTS.includes(segment))) {
|
|
48
64
|
return true;
|
|
49
65
|
}
|
|
50
|
-
// (2)
|
|
51
|
-
|
|
66
|
+
// (2) HARD secret-fájlnév-minta (mindig).
|
|
67
|
+
if (FAM_SecretExclude_Util.HARD_SECRET_PATTERNS.some((pattern) => pattern.test(fileName))) {
|
|
68
|
+
return true;
|
|
69
|
+
}
|
|
70
|
+
// (3) SOFT névrész-minta — CSAK ha NEM ismert forrás-/doc-kiterjesztés (a "secret"-nevű .ts legit kód).
|
|
71
|
+
if (FAM_SecretExclude_Util.SOURCE_DOC_EXTENSION.test(fileName)) {
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
return FAM_SecretExclude_Util.SOFT_SECRET_NAME_PATTERNS.some((pattern) => pattern.test(fileName));
|
|
52
75
|
}
|
|
53
76
|
}
|
|
54
77
|
exports.FAM_SecretExclude_Util = FAM_SecretExclude_Util;
|
|
@@ -32,6 +32,19 @@ class FAM_SlidingChunker_Util {
|
|
|
32
32
|
if (paragraphStart >= 0) {
|
|
33
33
|
charCursor = paragraphStart;
|
|
34
34
|
}
|
|
35
|
+
// FAM-REV-065: egy paragrafus ÖNMAGÁBAN a maxSize felett (pl. sortörés-nélküli nagy JSON/blob) →
|
|
36
|
+
// flush + HARD-SPLIT (char-vágás maxSize-onként). KÜLÖNBEN egyetlen óriás-chunk lenne, ami az
|
|
37
|
+
// embedding-modell token-limitjét túllépi → `error` státusz → a tartalom NEM kereshető (info-veszteség).
|
|
38
|
+
if (paragraph.length > sizing.maxSize) {
|
|
39
|
+
if (currentText.trim().length) {
|
|
40
|
+
FAM_SlidingChunker_Util.emit(chunks, currentText.trim(), currentCharStart, sizing.minSize);
|
|
41
|
+
currentText = '';
|
|
42
|
+
}
|
|
43
|
+
FAM_SlidingChunker_Util.hardSplit(chunks, paragraph, charCursor, sizing);
|
|
44
|
+
charCursor = charCursor + paragraph.length;
|
|
45
|
+
currentCharStart = charCursor;
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
35
48
|
if (currentText.length && currentText.length + paragraph.length + 2 > sizing.maxSize) {
|
|
36
49
|
FAM_SlidingChunker_Util.emit(chunks, currentText.trim(), currentCharStart, sizing.minSize);
|
|
37
50
|
if (currentText.length > sizing.overlap) {
|
|
@@ -60,6 +73,27 @@ class FAM_SlidingChunker_Util {
|
|
|
60
73
|
}
|
|
61
74
|
return chunks;
|
|
62
75
|
}
|
|
76
|
+
/**
|
|
77
|
+
* Egy maxSize feletti, természetes törés-pont NÉLKÜLI szöveg HARD-SPLIT-je (FAM-REV-065): char-vágás
|
|
78
|
+
* `maxSize`-onként, `overlap`-pal lépve. A darabok a `minSize`-szűrés ALÓL MENTESEK (a tail-darab is kell
|
|
79
|
+
* — semmi ne vesszen el), és a `position` a forrás-offszetre mutat. Így a nagy JSON/blob is embeddelhető
|
|
80
|
+
* (modell-token-limit alatti darabok), nem ragad `error`-ba.
|
|
81
|
+
*/
|
|
82
|
+
static hardSplit(chunks, text, charStart, sizing) {
|
|
83
|
+
const step = Math.max(1, sizing.maxSize - sizing.overlap);
|
|
84
|
+
for (let i = 0; i < text.length; i += step) {
|
|
85
|
+
const piece = text.slice(i, i + sizing.maxSize);
|
|
86
|
+
chunks.push({
|
|
87
|
+
content: piece,
|
|
88
|
+
position: { charStart: charStart + i, charEnd: charStart + i + piece.length },
|
|
89
|
+
chunkType: 'generic',
|
|
90
|
+
headingPath: [],
|
|
91
|
+
});
|
|
92
|
+
if (i + sizing.maxSize >= text.length) {
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
63
97
|
/** Egy chunk hozzáadása `chunkType='generic'`-kel (zaj-szűrés a `minSize`-ra). */
|
|
64
98
|
static emit(chunks, content, charStart, minSize) {
|
|
65
99
|
if (content.length < minSize) {
|
|
@@ -5,9 +5,11 @@ exports.FAM_TsChunker_Util = void 0;
|
|
|
5
5
|
* `FAM_TsChunker_Util` (SP-4.2, dsgn-004 §3.1, **BFR-AM-005 workaround**) — TypeScript-szemantikus
|
|
6
6
|
* chunker `.ts/.tsx/.mts/.js/.jsx`-re. A CCAP `PS_TypeScriptChunker_Util` regex/brace-depth
|
|
7
7
|
* megközelítésének projekt-lokális portja: import-csoport → top-level deklarációk (class/interface/
|
|
8
|
-
* enum/type/function/const-export) →
|
|
9
|
-
*
|
|
10
|
-
*
|
|
8
|
+
* enum/type/function/const-export) → JSDoc + `@Decorator` a következő kód-chunkkal együtt. A max-size
|
|
9
|
+
* FELETTI blokk darabolása: **class → metódus-tudatos** (a class-body member-határai mentén; minden adekvát
|
|
10
|
+
* metódus a saját chunkja `['class:Foo','method:bar']` symbol-lánccal + `chunkType:'method'`, a class-fej +
|
|
11
|
+
* mezők egy `'class'` chunk), **egyéb nagy blokk** (hosszú függvény/interface/enum) → soronkénti
|
|
12
|
+
* sliding-window (örökölt chunkType + symbol-lánc). A méret-konstansok a config-ból (`chunk.ts.*`), default `1500/100/50`.
|
|
11
13
|
*
|
|
12
14
|
* **BFR-AM-005 (dsgn-004 §3.1 megjegyzés):** a regex/brace-depth téved string-literálban lévő `{`/`}`
|
|
13
15
|
* vagy template-literál esetén — a `countBraces` konzervatívan kihagyja a string-/line-comment-
|
|
@@ -27,6 +29,8 @@ class FAM_TsChunker_Util {
|
|
|
27
29
|
static TYPE_ALIAS_START = /^(?:export\s+)?type\s+(\w+)\s*=/;
|
|
28
30
|
static STANDALONE_FUNCTION = /^(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+(\w+)/;
|
|
29
31
|
static CONST_EXPORT = /^(?:export\s+)?const\s+(\w+)/;
|
|
32
|
+
/** Class-member-signature → metódus-név (modifierek után `név(`); getter/setter/constructor is). */
|
|
33
|
+
static METHOD_SIG = /^(?:public|private|protected|static|async|readonly|abstract|override|get|set|\s)*\*?\s*([\w$]+)\s*(?:<[^>]*>)?\s*\(/;
|
|
30
34
|
static DECORATOR_LINE = /^\s*@\w+/;
|
|
31
35
|
static JSDOC_OPEN = /^\s*\/\*\*/;
|
|
32
36
|
static JSDOC_CLOSE = /\*\//;
|
|
@@ -271,40 +275,222 @@ class FAM_TsChunker_Util {
|
|
|
271
275
|
}
|
|
272
276
|
return depth;
|
|
273
277
|
}
|
|
274
|
-
/**
|
|
278
|
+
/**
|
|
279
|
+
* Nagy (max-size feletti) blokk darabolása. `class` (entityName-mel) → **metódus-tudatos** member-bontás
|
|
280
|
+
* (`splitClassByMembers`); egyéb nagy blokk (hosszú függvény/interface/enum) VAGY sikertelen class-parse →
|
|
281
|
+
* soronkénti sliding-window (`slidingWindowLines`).
|
|
282
|
+
*/
|
|
275
283
|
static splitLargeBlock(block, sizing, headingPath) {
|
|
284
|
+
if (block.blockType === 'class' && block.entityName) {
|
|
285
|
+
const classChunks = FAM_TsChunker_Util.splitClassByMembers(block, sizing, headingPath);
|
|
286
|
+
if (classChunks.length) {
|
|
287
|
+
return classChunks;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
return FAM_TsChunker_Util.slidingWindowLines({
|
|
291
|
+
lines: block.lines, charStart: block.charStart, lineStart: block.lineStart,
|
|
292
|
+
sizing: sizing, headingPath: headingPath, chunkType: block.blockType,
|
|
293
|
+
});
|
|
294
|
+
}
|
|
295
|
+
/**
|
|
296
|
+
* Soronkénti sliding-window egy sor-tömbre (a nem-class nagy blokkok + a túl nagy egyetlen metódus
|
|
297
|
+
* fallbackja). Örökli a kapott `chunkType`-ot + `headingPath`-ot; a sub-chunkok a maxSize körül törnek,
|
|
298
|
+
* `overlap/80` sornyi átfedéssel lépnek.
|
|
299
|
+
*
|
|
300
|
+
* **FAM-REV-061:** minden ablak a SAJÁT abszolút `charStart`-ját kapja (a blokk-kezdet + az ablak első
|
|
301
|
+
* soráig tartó char-offset) — korábban MINDEN ablak `set.charStart`-ot kapott (a blokk kezdete), így a nagy
|
|
302
|
+
* blokk összes sub-chunkja azonos charStart-ra ütközött (a location-info „hányadik char/sor" félrevezetett).
|
|
303
|
+
*/
|
|
304
|
+
static slidingWindowLines(set) {
|
|
276
305
|
const chunks = [];
|
|
277
|
-
const lines =
|
|
278
|
-
const overlapLines = Math.max(1, Math.floor(sizing.overlap / 80));
|
|
306
|
+
const lines = set.lines;
|
|
307
|
+
const overlapLines = Math.max(1, Math.floor(set.sizing.overlap / 80));
|
|
308
|
+
// Sor-kezdő char-offsetek a blokkon belül (a per-ablak abszolút charStart-hoz; +1 a `\n`-ért).
|
|
309
|
+
const lineCharOffset = [];
|
|
310
|
+
let acc = 0;
|
|
311
|
+
for (let i = 0; i < lines.length; i++) {
|
|
312
|
+
lineCharOffset[i] = acc;
|
|
313
|
+
acc += lines[i].length + 1;
|
|
314
|
+
}
|
|
279
315
|
let startIdx = 0;
|
|
280
316
|
while (startIdx < lines.length) {
|
|
281
317
|
let currentText = '';
|
|
282
318
|
let endIdx = startIdx;
|
|
283
319
|
while (endIdx < lines.length) {
|
|
284
320
|
const candidate = currentText + (currentText ? '\n' : '') + lines[endIdx];
|
|
285
|
-
if (candidate.length > sizing.maxSize && currentText.length) {
|
|
321
|
+
if (candidate.length > set.sizing.maxSize && currentText.length) {
|
|
286
322
|
break;
|
|
287
323
|
}
|
|
288
324
|
currentText = candidate;
|
|
289
325
|
endIdx++;
|
|
290
326
|
}
|
|
291
|
-
if (currentText.trim().length >= sizing.minSize) {
|
|
327
|
+
if (currentText.trim().length >= set.sizing.minSize) {
|
|
328
|
+
const windowCharStart = set.charStart + (lineCharOffset[startIdx] ?? 0);
|
|
292
329
|
chunks.push({
|
|
293
330
|
content: currentText,
|
|
294
331
|
position: {
|
|
295
|
-
charStart:
|
|
296
|
-
charEnd:
|
|
297
|
-
lineStart:
|
|
298
|
-
lineEnd:
|
|
332
|
+
charStart: windowCharStart,
|
|
333
|
+
charEnd: windowCharStart + currentText.length,
|
|
334
|
+
lineStart: set.lineStart + startIdx,
|
|
335
|
+
lineEnd: set.lineStart + endIdx - 1,
|
|
299
336
|
},
|
|
300
|
-
chunkType:
|
|
301
|
-
headingPath: headingPath,
|
|
337
|
+
chunkType: set.chunkType,
|
|
338
|
+
headingPath: set.headingPath,
|
|
302
339
|
});
|
|
303
340
|
}
|
|
304
341
|
startIdx = Math.max(startIdx + 1, endIdx - overlapLines);
|
|
305
342
|
}
|
|
306
343
|
return chunks;
|
|
307
344
|
}
|
|
345
|
+
/**
|
|
346
|
+
* Metódus-tudatos class-darabolás (FAM-REV-049): a class-body MEMBER-határait (brace-depth) követve a
|
|
347
|
+
* class-fejet + a metódusokat külön chunkokra bontja — minden adekvát (>= minSize) metódus a saját
|
|
348
|
+
* chunkja a beágyazott symbol-lánccal (`['class:Foo','method:bar']`, `chunkType:'method'`). A
|
|
349
|
+
* `parseClassUnits` 1-nél több unitot ad (class-fej + 1+ member) → `packClassUnits`; különben üres
|
|
350
|
+
* (a hívó soros slidingjára esik).
|
|
351
|
+
*/
|
|
352
|
+
static splitClassByMembers(block, sizing, classHeadingPath) {
|
|
353
|
+
const units = FAM_TsChunker_Util.parseClassUnits(block);
|
|
354
|
+
if (units.length <= 1) {
|
|
355
|
+
return [];
|
|
356
|
+
}
|
|
357
|
+
return FAM_TsChunker_Util.packClassUnits(units, block, sizing, classHeadingPath);
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* A class-blokk member-egységekre bontása (FAM-REV-049): az első unit a class-FEJ (JSDoc + signature a
|
|
361
|
+
* body-nyitásig), majd a class-body memberei a brace-depth member-határai mentén (egy member zár, ha a
|
|
362
|
+
* body-szintre [depth 1] tér vissza, VAGY egy `;`-re végződő egy-soros field). Brace-depth (NEM AST).
|
|
363
|
+
*/
|
|
364
|
+
static parseClassUnits(block) {
|
|
365
|
+
const lines = block.lines;
|
|
366
|
+
// (1) class-body nyitás: az első sor, ahol a kumulált brace-depth >= 1.
|
|
367
|
+
let depth = 0;
|
|
368
|
+
let openIdx = -1;
|
|
369
|
+
for (let i = 0; i < lines.length; i++) {
|
|
370
|
+
depth += FAM_TsChunker_Util.countBraces(lines[i]);
|
|
371
|
+
if (depth >= 1) {
|
|
372
|
+
openIdx = i;
|
|
373
|
+
break;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
if (openIdx < 0) {
|
|
377
|
+
return [];
|
|
378
|
+
}
|
|
379
|
+
const units = [];
|
|
380
|
+
units.push({ lines: lines.slice(0, openIdx + 1), lineOffset: 0, isMethod: false });
|
|
381
|
+
// (2) a body memberei (depth 1-ről indulva).
|
|
382
|
+
let current = [];
|
|
383
|
+
let currentStart = openIdx + 1;
|
|
384
|
+
let bodyDepth = 1;
|
|
385
|
+
for (let i = openIdx + 1; i < lines.length; i++) {
|
|
386
|
+
const line = lines[i];
|
|
387
|
+
const before = bodyDepth;
|
|
388
|
+
bodyDepth += FAM_TsChunker_Util.countBraces(line);
|
|
389
|
+
if (bodyDepth <= 0) {
|
|
390
|
+
// class-záró `}` → a maradékot flush-öljük (a záró brace-sort nem külön chunkoljuk).
|
|
391
|
+
if (current.length) {
|
|
392
|
+
units.push(FAM_TsChunker_Util.makeMemberUnit(current, currentStart));
|
|
393
|
+
}
|
|
394
|
+
break;
|
|
395
|
+
}
|
|
396
|
+
if (!current.length) {
|
|
397
|
+
currentStart = i;
|
|
398
|
+
}
|
|
399
|
+
current.push(line);
|
|
400
|
+
const closedBackToBody = before > 1 && bodyDepth === 1; // braced member (metódus) lezárult
|
|
401
|
+
const oneLineField = before === 1 && bodyDepth === 1 && line.trim().endsWith(';');
|
|
402
|
+
if (closedBackToBody || oneLineField) {
|
|
403
|
+
units.push(FAM_TsChunker_Util.makeMemberUnit(current, currentStart));
|
|
404
|
+
current = [];
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
return units;
|
|
408
|
+
}
|
|
409
|
+
/** Egy member-sortömb → `FAM_ClassUnit` (a signature-sorból kinyert metódus-névvel). */
|
|
410
|
+
static makeMemberUnit(lines, lineOffset) {
|
|
411
|
+
const sigLine = lines.find((line) => {
|
|
412
|
+
const trimmed = line.trim();
|
|
413
|
+
return trimmed.length > 0
|
|
414
|
+
&& !trimmed.startsWith('/*') && !trimmed.startsWith('*') && !trimmed.startsWith('//')
|
|
415
|
+
&& !trimmed.startsWith('@');
|
|
416
|
+
}) ?? '';
|
|
417
|
+
const match = sigLine.trim().match(FAM_TsChunker_Util.METHOD_SIG);
|
|
418
|
+
const isMethod = !!match && sigLine.includes('(');
|
|
419
|
+
return { lines: lines, lineOffset: lineOffset, name: match ? match[1] : undefined, isMethod: isMethod };
|
|
420
|
+
}
|
|
421
|
+
/**
|
|
422
|
+
* A member-egységek chunkokra csomagolása (FAM-REV-049): minSize-ig akkumulál (semmi ne essen ki a
|
|
423
|
+
* zaj-szűrőn), maxSize előtt flush-öl; az adekvát egy-metódusú chunk `method:`-symbol-láncot + `'method'`
|
|
424
|
+
* típust kap, a többi (class-fej / field-csoport) a `'class'` típust + a class-symbol-láncot. A maxSize
|
|
425
|
+
* feletti egyetlen member soros sliding-window-ra bomlik (örökölt symbol-lánc).
|
|
426
|
+
*/
|
|
427
|
+
static packClassUnits(units, block, sizing, classHeadingPath) {
|
|
428
|
+
// Sor-kezdő char-offsetek a class-blokkon belül (a pontos charStart-hoz).
|
|
429
|
+
const lineCharOffset = [];
|
|
430
|
+
let acc = 0;
|
|
431
|
+
for (let i = 0; i < block.lines.length; i++) {
|
|
432
|
+
lineCharOffset[i] = acc;
|
|
433
|
+
acc += block.lines[i].length + 1;
|
|
434
|
+
}
|
|
435
|
+
const chunks = [];
|
|
436
|
+
let group = [];
|
|
437
|
+
const groupText = () => group.map((unit) => unit.lines.join('\n')).join('\n');
|
|
438
|
+
const flush = () => {
|
|
439
|
+
if (!group.length) {
|
|
440
|
+
return;
|
|
441
|
+
}
|
|
442
|
+
const text = groupText();
|
|
443
|
+
if (text.trim().length) {
|
|
444
|
+
const first = group[0];
|
|
445
|
+
const last = group[group.length - 1];
|
|
446
|
+
const lastLineOffset = last.lineOffset + last.lines.length - 1;
|
|
447
|
+
const single = group.length === 1 && first.isMethod && !!first.name;
|
|
448
|
+
const baseChar = block.charStart + (lineCharOffset[first.lineOffset] ?? 0);
|
|
449
|
+
chunks.push({
|
|
450
|
+
content: text,
|
|
451
|
+
position: {
|
|
452
|
+
charStart: baseChar,
|
|
453
|
+
charEnd: baseChar + text.length,
|
|
454
|
+
lineStart: block.lineStart + first.lineOffset,
|
|
455
|
+
lineEnd: block.lineStart + lastLineOffset,
|
|
456
|
+
},
|
|
457
|
+
chunkType: single ? 'method' : 'class',
|
|
458
|
+
headingPath: single ? [...classHeadingPath, `method:${first.name}`] : classHeadingPath,
|
|
459
|
+
});
|
|
460
|
+
}
|
|
461
|
+
group = [];
|
|
462
|
+
};
|
|
463
|
+
for (const unit of units) {
|
|
464
|
+
const unitText = unit.lines.join('\n');
|
|
465
|
+
// Túl nagy egyetlen member → flush + soros sliding (örökölt method/class symbol-lánc).
|
|
466
|
+
if (unitText.length > sizing.maxSize) {
|
|
467
|
+
flush();
|
|
468
|
+
const headPath = unit.isMethod && unit.name
|
|
469
|
+
? [...classHeadingPath, `method:${unit.name}`]
|
|
470
|
+
: classHeadingPath;
|
|
471
|
+
chunks.push(...FAM_TsChunker_Util.slidingWindowLines({
|
|
472
|
+
lines: unit.lines,
|
|
473
|
+
charStart: block.charStart + (lineCharOffset[unit.lineOffset] ?? 0),
|
|
474
|
+
lineStart: block.lineStart + unit.lineOffset,
|
|
475
|
+
sizing: sizing,
|
|
476
|
+
headingPath: headPath,
|
|
477
|
+
chunkType: unit.isMethod ? 'method' : 'class',
|
|
478
|
+
}));
|
|
479
|
+
continue;
|
|
480
|
+
}
|
|
481
|
+
// maxSize-túllépés a csoporttal → előbb flush.
|
|
482
|
+
if (group.length && (groupText().length + 1 + unitText.length) > sizing.maxSize) {
|
|
483
|
+
flush();
|
|
484
|
+
}
|
|
485
|
+
group.push(unit);
|
|
486
|
+
// Elérte a minSize-t → flush (az adekvát metódus így a SAJÁT chunkja marad).
|
|
487
|
+
if (groupText().trim().length >= sizing.minSize) {
|
|
488
|
+
flush();
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
flush();
|
|
492
|
+
return chunks;
|
|
493
|
+
}
|
|
308
494
|
/** A symbol-`headingPath` egy blokkhoz (`['class:Foo']`); generic / névtelen → üres tömb. */
|
|
309
495
|
static buildHeadingPath(block) {
|
|
310
496
|
if (block.entityName && block.blockType !== 'generic' && block.blockType !== 'imports') {
|
|
@@ -51,7 +51,10 @@ class FAM_DeltaCompare_Util {
|
|
|
51
51
|
}
|
|
52
52
|
matchedIndices.add(chunk.chunkIndex);
|
|
53
53
|
if (existingEntry.contentHash === contentHash) {
|
|
54
|
-
items.push({
|
|
54
|
+
items.push({
|
|
55
|
+
verdict: 'equal', chunk: chunk, existingId: existingEntry._id, contentHash: contentHash,
|
|
56
|
+
existingEmbeddingStatus: existingEntry.embeddingStatus, // FAM-REV-066: a re-embed-if-not-completed-hez
|
|
57
|
+
});
|
|
55
58
|
}
|
|
56
59
|
else {
|
|
57
60
|
items.push({ verdict: 'modified', chunk: chunk, existingId: existingEntry._id, contentHash: contentHash });
|
|
@@ -31,13 +31,16 @@ class FAM_IngestRun_DataService extends nts_dynamo_1.DyNTS_DataService {
|
|
|
31
31
|
return new nts_dynamo_1.DyNTS_ArchiveDataService(new fam_ingest_run_data_model_1.FAM_IngestRun_DataModel(), this.dataParams, this.issuer);
|
|
32
32
|
}
|
|
33
33
|
/**
|
|
34
|
-
* A run megnyitása a scan ELEJÉN (dsgn-004 §5). A `_id
|
|
35
|
-
*
|
|
36
|
-
*
|
|
34
|
+
* A run megnyitása a scan ELEJÉN (dsgn-004 §5). A `_id`-t a Dynamo `saveData` GENERÁLJA (Mongo
|
|
35
|
+
* ObjectId, stringként) — a `FAM_Scope` mintáját követve (`findOrCreateScope`). **NEM** állítunk
|
|
36
|
+
* client-generált UUID `_id`-t: a Dynamo db-rétege a get-by-id-t `findById`-dal végzi, ami a `_id`-t
|
|
37
|
+
* ObjectId-ra cast-olja → egy UUID-string cast-ja elhasal (`DyNTS-DBS-GI0`). A hívó a VISSZAADOTT
|
|
38
|
+
* rekord `_id`-jét használja runId-ként (a chunkok ezzel az `ingestRunId`-vel hivatkoznak rá MÉG a
|
|
39
|
+
* run-zárás előtt). Friss `saveData` (új rekord — nincs Mixed-mutáció); a számlálók/verdiktek a
|
|
40
|
+
* `close`-ban frissülnek.
|
|
37
41
|
*/
|
|
38
42
|
async open(set) {
|
|
39
43
|
const record = new fam_ingest_run_data_model_1.FAM_IngestRun_DataModel({
|
|
40
|
-
_id: set.runId,
|
|
41
44
|
trigger: set.trigger,
|
|
42
45
|
table: set.table,
|
|
43
46
|
scopePath: set.scopePath,
|