@framers/agentos 0.1.125 → 0.1.126
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/provenance/crypto/AgentKeyManager.d.ts.map +1 -1
- package/dist/core/provenance/crypto/AgentKeyManager.js +9 -3
- package/dist/core/provenance/crypto/AgentKeyManager.js.map +1 -1
- package/dist/core/text-processing/ITextProcessor.d.ts +43 -0
- package/dist/core/text-processing/ITextProcessor.d.ts.map +1 -0
- package/dist/core/text-processing/ITextProcessor.js +6 -0
- package/dist/core/text-processing/ITextProcessor.js.map +1 -0
- package/dist/core/text-processing/TextProcessingPipeline.d.ts +49 -0
- package/dist/core/text-processing/TextProcessingPipeline.d.ts.map +1 -0
- package/dist/core/text-processing/TextProcessingPipeline.js +61 -0
- package/dist/core/text-processing/TextProcessingPipeline.js.map +1 -0
- package/dist/core/text-processing/filters/StopWordFilter.d.ts +30 -0
- package/dist/core/text-processing/filters/StopWordFilter.d.ts.map +1 -0
- package/dist/core/text-processing/filters/StopWordFilter.js +76 -0
- package/dist/core/text-processing/filters/StopWordFilter.js.map +1 -0
- package/dist/core/text-processing/index.d.ts +20 -0
- package/dist/core/text-processing/index.d.ts.map +1 -0
- package/dist/core/text-processing/index.js +24 -0
- package/dist/core/text-processing/index.js.map +1 -0
- package/dist/core/text-processing/lemmatizers/NoOpLemmatizer.d.ts +11 -0
- package/dist/core/text-processing/lemmatizers/NoOpLemmatizer.d.ts.map +1 -0
- package/dist/core/text-processing/lemmatizers/NoOpLemmatizer.js +13 -0
- package/dist/core/text-processing/lemmatizers/NoOpLemmatizer.js.map +1 -0
- package/dist/core/text-processing/lemmatizers/WordNetLemmatizer.d.ts +25 -0
- package/dist/core/text-processing/lemmatizers/WordNetLemmatizer.d.ts.map +1 -0
- package/dist/core/text-processing/lemmatizers/WordNetLemmatizer.js +64 -0
- package/dist/core/text-processing/lemmatizers/WordNetLemmatizer.js.map +1 -0
- package/dist/core/text-processing/normalizers/AccentStripper.d.ts +17 -0
- package/dist/core/text-processing/normalizers/AccentStripper.d.ts.map +1 -0
- package/dist/core/text-processing/normalizers/AccentStripper.js +22 -0
- package/dist/core/text-processing/normalizers/AccentStripper.js.map +1 -0
- package/dist/core/text-processing/normalizers/LowercaseNormalizer.d.ts +11 -0
- package/dist/core/text-processing/normalizers/LowercaseNormalizer.d.ts.map +1 -0
- package/dist/core/text-processing/normalizers/LowercaseNormalizer.js +13 -0
- package/dist/core/text-processing/normalizers/LowercaseNormalizer.js.map +1 -0
- package/dist/core/text-processing/presets.d.ts +22 -0
- package/dist/core/text-processing/presets.d.ts.map +1 -0
- package/dist/core/text-processing/presets.js +45 -0
- package/dist/core/text-processing/presets.js.map +1 -0
- package/dist/core/text-processing/stemmers/NoOpStemmer.d.ts +14 -0
- package/dist/core/text-processing/stemmers/NoOpStemmer.d.ts.map +1 -0
- package/dist/core/text-processing/stemmers/NoOpStemmer.js +16 -0
- package/dist/core/text-processing/stemmers/NoOpStemmer.js.map +1 -0
- package/dist/core/text-processing/stemmers/PorterStemmer.d.ts +31 -0
- package/dist/core/text-processing/stemmers/PorterStemmer.d.ts.map +1 -0
- package/dist/core/text-processing/stemmers/PorterStemmer.js +62 -0
- package/dist/core/text-processing/stemmers/PorterStemmer.js.map +1 -0
- package/dist/core/text-processing/tokenizers/CodeTokenizer.d.ts +25 -0
- package/dist/core/text-processing/tokenizers/CodeTokenizer.d.ts.map +1 -0
- package/dist/core/text-processing/tokenizers/CodeTokenizer.js +75 -0
- package/dist/core/text-processing/tokenizers/CodeTokenizer.js.map +1 -0
- package/dist/core/text-processing/tokenizers/StandardTokenizer.d.ts +22 -0
- package/dist/core/text-processing/tokenizers/StandardTokenizer.d.ts.map +1 -0
- package/dist/core/text-processing/tokenizers/StandardTokenizer.js +37 -0
- package/dist/core/text-processing/tokenizers/StandardTokenizer.js.map +1 -0
- package/dist/core/text-processing/types.d.ts +18 -0
- package/dist/core/text-processing/types.d.ts.map +1 -0
- package/dist/core/text-processing/types.js +6 -0
- package/dist/core/text-processing/types.js.map +1 -0
- package/dist/core/vector-search/HnswIndexSidecar.d.ts +91 -0
- package/dist/core/vector-search/HnswIndexSidecar.d.ts.map +1 -0
- package/dist/core/vector-search/HnswIndexSidecar.js +270 -0
- package/dist/core/vector-search/HnswIndexSidecar.js.map +1 -0
- package/dist/core/vector-search/index.d.ts +7 -0
- package/dist/core/vector-search/index.d.ts.map +1 -0
- package/dist/core/vector-search/index.js +6 -0
- package/dist/core/vector-search/index.js.map +1 -0
- package/dist/core/vector-search/types.d.ts +42 -0
- package/dist/core/vector-search/types.d.ts.map +1 -0
- package/dist/core/vector-search/types.js +6 -0
- package/dist/core/vector-search/types.js.map +1 -0
- package/dist/memory/CognitiveMemoryManager.js +3 -3
- package/dist/memory/CognitiveMemoryManager.js.map +1 -1
- package/dist/memory/consolidation/ConsolidationLoop.js +3 -3
- package/dist/memory/consolidation/ConsolidationLoop.js.map +1 -1
- package/dist/memory/facade/Memory.d.ts +21 -0
- package/dist/memory/facade/Memory.d.ts.map +1 -1
- package/dist/memory/facade/Memory.js +124 -62
- package/dist/memory/facade/Memory.js.map +1 -1
- package/dist/memory/io/ChatGptImporter.d.ts.map +1 -1
- package/dist/memory/io/ChatGptImporter.js +7 -6
- package/dist/memory/io/ChatGptImporter.js.map +1 -1
- package/dist/memory/io/CsvImporter.d.ts +15 -0
- package/dist/memory/io/CsvImporter.d.ts.map +1 -1
- package/dist/memory/io/CsvImporter.js +28 -7
- package/dist/memory/io/CsvImporter.js.map +1 -1
- package/dist/memory/io/JsonExporter.d.ts +14 -0
- package/dist/memory/io/JsonExporter.d.ts.map +1 -1
- package/dist/memory/io/JsonExporter.js +22 -3
- package/dist/memory/io/JsonExporter.js.map +1 -1
- package/dist/memory/io/JsonImporter.d.ts +15 -0
- package/dist/memory/io/JsonImporter.d.ts.map +1 -1
- package/dist/memory/io/JsonImporter.js +35 -16
- package/dist/memory/io/JsonImporter.js.map +1 -1
- package/dist/memory/io/MarkdownImporter.d.ts.map +1 -1
- package/dist/memory/io/MarkdownImporter.js +7 -5
- package/dist/memory/io/MarkdownImporter.js.map +1 -1
- package/dist/memory/io/ObsidianImporter.d.ts.map +1 -1
- package/dist/memory/io/ObsidianImporter.js +9 -23
- package/dist/memory/io/ObsidianImporter.js.map +1 -1
- package/dist/memory/io/SqliteExporter.d.ts.map +1 -1
- package/dist/memory/io/SqliteExporter.js +1 -3
- package/dist/memory/io/SqliteExporter.js.map +1 -1
- package/dist/memory/io/SqliteImporter.d.ts.map +1 -1
- package/dist/memory/io/SqliteImporter.js +10 -11
- package/dist/memory/io/SqliteImporter.js.map +1 -1
- package/dist/memory/store/HnswSidecar.d.ts +7 -0
- package/dist/memory/store/HnswSidecar.d.ts.map +1 -1
- package/dist/memory/store/HnswSidecar.js +7 -0
- package/dist/memory/store/HnswSidecar.js.map +1 -1
- package/dist/memory/store/SqliteBrain.d.ts +14 -2
- package/dist/memory/store/SqliteBrain.d.ts.map +1 -1
- package/dist/memory/store/SqliteBrain.js +37 -34
- package/dist/memory/store/SqliteBrain.js.map +1 -1
- package/dist/memory/store/SqliteKnowledgeGraph.d.ts.map +1 -1
- package/dist/memory/store/SqliteKnowledgeGraph.js +12 -55
- package/dist/memory/store/SqliteKnowledgeGraph.js.map +1 -1
- package/dist/memory/store/SqliteMemoryGraph.d.ts.map +1 -1
- package/dist/memory/store/SqliteMemoryGraph.js +11 -16
- package/dist/memory/store/SqliteMemoryGraph.js.map +1 -1
- package/dist/memory/store/tracePersistence.d.ts +1 -1
- package/dist/memory/store/tracePersistence.d.ts.map +1 -1
- package/dist/memory/store/tracePersistence.js +3 -3
- package/dist/memory/store/tracePersistence.js.map +1 -1
- package/dist/memory/tools/MemoryAddTool.d.ts.map +1 -1
- package/dist/memory/tools/MemoryAddTool.js +2 -7
- package/dist/memory/tools/MemoryAddTool.js.map +1 -1
- package/dist/memory/tools/MemoryMergeTool.d.ts.map +1 -1
- package/dist/memory/tools/MemoryMergeTool.js +2 -3
- package/dist/memory/tools/MemoryMergeTool.js.map +1 -1
- package/dist/memory/tools/MemorySearchTool.d.ts.map +1 -1
- package/dist/memory/tools/MemorySearchTool.js +6 -5
- package/dist/memory/tools/MemorySearchTool.js.map +1 -1
- package/dist/memory/tools/MemoryUpdateTool.d.ts.map +1 -1
- package/dist/memory/tools/MemoryUpdateTool.js +2 -3
- package/dist/memory/tools/MemoryUpdateTool.js.map +1 -1
- package/dist/memory/util/crossPlatformCrypto.d.ts +31 -0
- package/dist/memory/util/crossPlatformCrypto.d.ts.map +1 -0
- package/dist/memory/util/crossPlatformCrypto.js +60 -0
- package/dist/memory/util/crossPlatformCrypto.js.map +1 -0
- package/dist/rag/VectorStoreManager.d.ts.map +1 -1
- package/dist/rag/VectorStoreManager.js +4 -2
- package/dist/rag/VectorStoreManager.js.map +1 -1
- package/dist/rag/implementations/vector_stores/HnswlibVectorStore.d.ts +6 -0
- package/dist/rag/implementations/vector_stores/HnswlibVectorStore.d.ts.map +1 -1
- package/dist/rag/implementations/vector_stores/HnswlibVectorStore.js +9 -0
- package/dist/rag/implementations/vector_stores/HnswlibVectorStore.js.map +1 -1
- package/dist/rag/implementations/vector_stores/SqlVectorStore.d.ts +23 -0
- package/dist/rag/implementations/vector_stores/SqlVectorStore.d.ts.map +1 -1
- package/dist/rag/implementations/vector_stores/SqlVectorStore.js +109 -5
- package/dist/rag/implementations/vector_stores/SqlVectorStore.js.map +1 -1
- package/dist/rag/search/BM25Index.d.ts +13 -0
- package/dist/rag/search/BM25Index.d.ts.map +1 -1
- package/dist/rag/search/BM25Index.js +14 -20
- package/dist/rag/search/BM25Index.js.map +1 -1
- package/package.json +3 -2
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LowercaseNormalizer.js","sourceRoot":"","sources":["../../../../src/core/text-processing/normalizers/LowercaseNormalizer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,MAAM,OAAO,mBAAmB;IAAhC;QACW,SAAI,GAAG,qBAAqB,CAAC;IAKxC,CAAC;IAHC,OAAO,CAAC,MAAe;QACrB,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,CAAC;IACjE,CAAC;CACF"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Pre-built pipeline configurations for common use cases.
|
|
3
|
+
* @module agentos/core/text-processing/presets
|
|
4
|
+
*/
|
|
5
|
+
import { TextProcessingPipeline } from './TextProcessingPipeline';
|
|
6
|
+
/**
|
|
7
|
+
* Pipeline for English prose text.
|
|
8
|
+
* Standard tokenizer → lowercase → strip accents → remove stop words → Porter stem.
|
|
9
|
+
*/
|
|
10
|
+
export declare function createProsePipeline(): TextProcessingPipeline;
|
|
11
|
+
/**
|
|
12
|
+
* Pipeline for source code and technical identifiers.
|
|
13
|
+
* Code tokenizer (camelCase/snake_case split) → lowercase → code stop words → no stemming.
|
|
14
|
+
*/
|
|
15
|
+
export declare function createCodePipeline(): TextProcessingPipeline;
|
|
16
|
+
/**
|
|
17
|
+
* Default pipeline for RAG / hybrid search.
|
|
18
|
+
* Standard tokenizer → lowercase → remove stop words → Porter stem.
|
|
19
|
+
* Good balance of recall and precision for mixed-content corpora.
|
|
20
|
+
*/
|
|
21
|
+
export declare function createRagPipeline(): TextProcessingPipeline;
|
|
22
|
+
//# sourceMappingURL=presets.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"presets.d.ts","sourceRoot":"","sources":["../../../src/core/text-processing/presets.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AASlE;;;GAGG;AACH,wBAAgB,mBAAmB,IAAI,sBAAsB,CAM5D;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,IAAI,sBAAsB,CAK3D;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,IAAI,sBAAsB,CAK1D"}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Pre-built pipeline configurations for common use cases.
|
|
3
|
+
* @module agentos/core/text-processing/presets
|
|
4
|
+
*/
|
|
5
|
+
import { TextProcessingPipeline } from './TextProcessingPipeline.js';
|
|
6
|
+
import { StandardTokenizer } from './tokenizers/StandardTokenizer.js';
|
|
7
|
+
import { CodeTokenizer } from './tokenizers/CodeTokenizer.js';
|
|
8
|
+
import { LowercaseNormalizer } from './normalizers/LowercaseNormalizer.js';
|
|
9
|
+
import { AccentStripper } from './normalizers/AccentStripper.js';
|
|
10
|
+
import { StopWordFilter, CODE_STOP_WORDS } from './filters/StopWordFilter.js';
|
|
11
|
+
import { PorterStemmer } from './stemmers/PorterStemmer.js';
|
|
12
|
+
import { NoOpStemmer } from './stemmers/NoOpStemmer.js';
|
|
13
|
+
/**
|
|
14
|
+
* Pipeline for English prose text.
|
|
15
|
+
* Standard tokenizer → lowercase → strip accents → remove stop words → Porter stem.
|
|
16
|
+
*/
|
|
17
|
+
export function createProsePipeline() {
|
|
18
|
+
return new TextProcessingPipeline(new StandardTokenizer())
|
|
19
|
+
.add(new LowercaseNormalizer())
|
|
20
|
+
.add(new AccentStripper())
|
|
21
|
+
.add(new StopWordFilter()) /* default: getNaturalStopWords() → 170 words when natural available */
|
|
22
|
+
.add(new PorterStemmer());
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Pipeline for source code and technical identifiers.
|
|
26
|
+
* Code tokenizer (camelCase/snake_case split) → lowercase → code stop words → no stemming.
|
|
27
|
+
*/
|
|
28
|
+
export function createCodePipeline() {
|
|
29
|
+
return new TextProcessingPipeline(new CodeTokenizer())
|
|
30
|
+
.add(new LowercaseNormalizer())
|
|
31
|
+
.add(new StopWordFilter(CODE_STOP_WORDS))
|
|
32
|
+
.add(new NoOpStemmer());
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Default pipeline for RAG / hybrid search.
|
|
36
|
+
* Standard tokenizer → lowercase → remove stop words → Porter stem.
|
|
37
|
+
* Good balance of recall and precision for mixed-content corpora.
|
|
38
|
+
*/
|
|
39
|
+
export function createRagPipeline() {
|
|
40
|
+
return new TextProcessingPipeline(new StandardTokenizer())
|
|
41
|
+
.add(new LowercaseNormalizer())
|
|
42
|
+
.add(new StopWordFilter()) /* default: getNaturalStopWords() → 170 words when natural available */
|
|
43
|
+
.add(new PorterStemmer());
|
|
44
|
+
}
|
|
45
|
+
//# sourceMappingURL=presets.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"presets.js","sourceRoot":"","sources":["../../../src/core/text-processing/presets.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAClE,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AACnE,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,mBAAmB,EAAE,MAAM,mCAAmC,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAC9D,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAC3E,OAAO,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAErD;;;GAGG;AACH,MAAM,UAAU,mBAAmB;IACjC,OAAO,IAAI,sBAAsB,CAAC,IAAI,iBAAiB,EAAE,CAAC;SACvD,GAAG,CAAC,IAAI,mBAAmB,EAAE,CAAC;SAC9B,GAAG,CAAC,IAAI,cAAc,EAAE,CAAC;SACzB,GAAG,CAAC,IAAI,cAAc,EAAE,CAAC,CAAC,uEAAuE;SACjG,GAAG,CAAC,IAAI,aAAa,EAAE,CAAC,CAAC;AAC9B,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB;IAChC,OAAO,IAAI,sBAAsB,CAAC,IAAI,aAAa,EAAE,CAAC;SACnD,GAAG,CAAC,IAAI,mBAAmB,EAAE,CAAC;SAC9B,GAAG,CAAC,IAAI,cAAc,CAAC,eAAe,CAAC,CAAC;SACxC,GAAG,CAAC,IAAI,WAAW,EAAE,CAAC,CAAC;AAC5B,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,iBAAiB;IAC/B,OAAO,IAAI,sBAAsB,CAAC,IAAI,iBAAiB,EAAE,CAAC;SACvD,GAAG,CAAC,IAAI,mBAAmB,EAAE,CAAC;SAC9B,GAAG,CAAC,IAAI,cAAc,EAAE,CAAC,CAAC,uEAAuE;SACjG,GAAG,CAAC,IAAI,aAAa,EAAE,CAAC,CAAC;AAC9B,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Pass-through stemmer that does nothing.
|
|
3
|
+
* Use for code identifiers where stemming would be harmful
|
|
4
|
+
* (e.g. `kubernetes` → `kubernet` is wrong).
|
|
5
|
+
*
|
|
6
|
+
* @module agentos/core/text-processing/stemmers/NoOpStemmer
|
|
7
|
+
*/
|
|
8
|
+
import type { Token } from '../types';
|
|
9
|
+
import type { ITextProcessor } from '../ITextProcessor';
|
|
10
|
+
export declare class NoOpStemmer implements ITextProcessor {
|
|
11
|
+
readonly name = "NoOpStemmer";
|
|
12
|
+
process(tokens: Token[]): Token[];
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=NoOpStemmer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"NoOpStemmer.d.ts","sourceRoot":"","sources":["../../../../src/core/text-processing/stemmers/NoOpStemmer.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAExD,qBAAa,WAAY,YAAW,cAAc;IAChD,QAAQ,CAAC,IAAI,iBAAiB;IAE9B,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,KAAK,EAAE;CAGlC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Pass-through stemmer that does nothing.
|
|
3
|
+
* Use for code identifiers where stemming would be harmful
|
|
4
|
+
* (e.g. `kubernetes` → `kubernet` is wrong).
|
|
5
|
+
*
|
|
6
|
+
* @module agentos/core/text-processing/stemmers/NoOpStemmer
|
|
7
|
+
*/
|
|
8
|
+
export class NoOpStemmer {
|
|
9
|
+
constructor() {
|
|
10
|
+
this.name = 'NoOpStemmer';
|
|
11
|
+
}
|
|
12
|
+
process(tokens) {
|
|
13
|
+
return tokens;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=NoOpStemmer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"NoOpStemmer.js","sourceRoot":"","sources":["../../../../src/core/text-processing/stemmers/NoOpStemmer.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAKH,MAAM,OAAO,WAAW;IAAxB;QACW,SAAI,GAAG,aAAa,CAAC;IAKhC,CAAC;IAHC,OAAO,CAAC,MAAe;QACrB,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Porter stemmer wrapping the `natural` package.
|
|
3
|
+
* Falls back to no-op if `natural` is not installed.
|
|
4
|
+
*
|
|
5
|
+
* @module agentos/core/text-processing/stemmers/PorterStemmer
|
|
6
|
+
*/
|
|
7
|
+
import type { Token } from '../types';
|
|
8
|
+
import type { ITextProcessor } from '../ITextProcessor';
|
|
9
|
+
/**
|
|
10
|
+
* Porter stemmer — reduces words to their morphological root.
|
|
11
|
+
* `running` → `run`, `foxes` → `fox`, `connected` → `connect`.
|
|
12
|
+
*
|
|
13
|
+
* Uses the `natural` npm package (already in agentos dependencies).
|
|
14
|
+
* Falls back to no-op if `natural` can't be imported.
|
|
15
|
+
*
|
|
16
|
+
* Sets `token.stem` with the stemmed form. Also updates `token.text`
|
|
17
|
+
* so downstream processors work with stemmed tokens.
|
|
18
|
+
*/
|
|
19
|
+
export declare class PorterStemmer implements ITextProcessor {
|
|
20
|
+
readonly name = "PorterStemmer";
|
|
21
|
+
private initialized;
|
|
22
|
+
private ensureLoaded;
|
|
23
|
+
process(tokens: Token[]): Token[];
|
|
24
|
+
/**
|
|
25
|
+
* Async initialization — call once before first use to load `natural`.
|
|
26
|
+
* The pipeline calls this automatically, but you can call it early
|
|
27
|
+
* to avoid the lazy-load delay on first process() call.
|
|
28
|
+
*/
|
|
29
|
+
initialize(): Promise<void>;
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=PorterStemmer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PorterStemmer.d.ts","sourceRoot":"","sources":["../../../../src/core/text-processing/stemmers/PorterStemmer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAkBxD;;;;;;;;;GASG;AACH,qBAAa,aAAc,YAAW,cAAc;IAClD,QAAQ,CAAC,IAAI,mBAAmB;IAEhC,OAAO,CAAC,WAAW,CAAS;YAEd,YAAY;IAO1B,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,KAAK,EAAE;IAUjC;;;;OAIG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;CAGlC"}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Porter stemmer wrapping the `natural` package.
|
|
3
|
+
* Falls back to no-op if `natural` is not installed.
|
|
4
|
+
*
|
|
5
|
+
* @module agentos/core/text-processing/stemmers/PorterStemmer
|
|
6
|
+
*/
|
|
7
|
+
/** Lazy-loaded stem function from the `natural` package. */
|
|
8
|
+
let stemFn = null;
|
|
9
|
+
let loadAttempted = false;
|
|
10
|
+
async function loadStemmer() {
|
|
11
|
+
if (loadAttempted)
|
|
12
|
+
return;
|
|
13
|
+
loadAttempted = true;
|
|
14
|
+
try {
|
|
15
|
+
const natural = await import('natural');
|
|
16
|
+
stemFn = (word) => natural.PorterStemmer.stem(word);
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
/* natural not installed — stemmer will be a no-op */
|
|
20
|
+
stemFn = null;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Porter stemmer — reduces words to their morphological root.
|
|
25
|
+
* `running` → `run`, `foxes` → `fox`, `connected` → `connect`.
|
|
26
|
+
*
|
|
27
|
+
* Uses the `natural` npm package (already in agentos dependencies).
|
|
28
|
+
* Falls back to no-op if `natural` can't be imported.
|
|
29
|
+
*
|
|
30
|
+
* Sets `token.stem` with the stemmed form. Also updates `token.text`
|
|
31
|
+
* so downstream processors work with stemmed tokens.
|
|
32
|
+
*/
|
|
33
|
+
export class PorterStemmer {
|
|
34
|
+
constructor() {
|
|
35
|
+
this.name = 'PorterStemmer';
|
|
36
|
+
this.initialized = false;
|
|
37
|
+
}
|
|
38
|
+
async ensureLoaded() {
|
|
39
|
+
if (!this.initialized) {
|
|
40
|
+
await loadStemmer();
|
|
41
|
+
this.initialized = true;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
process(tokens) {
|
|
45
|
+
/* Synchronous path if already loaded */
|
|
46
|
+
if (!stemFn)
|
|
47
|
+
return tokens;
|
|
48
|
+
return tokens.map(t => {
|
|
49
|
+
const stemmed = stemFn(t.text);
|
|
50
|
+
return { ...t, text: stemmed, stem: stemmed };
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Async initialization — call once before first use to load `natural`.
|
|
55
|
+
* The pipeline calls this automatically, but you can call it early
|
|
56
|
+
* to avoid the lazy-load delay on first process() call.
|
|
57
|
+
*/
|
|
58
|
+
async initialize() {
|
|
59
|
+
await this.ensureLoaded();
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=PorterStemmer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PorterStemmer.js","sourceRoot":"","sources":["../../../../src/core/text-processing/stemmers/PorterStemmer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAKH,4DAA4D;AAC5D,IAAI,MAAM,GAAsC,IAAI,CAAC;AACrD,IAAI,aAAa,GAAG,KAAK,CAAC;AAE1B,KAAK,UAAU,WAAW;IACxB,IAAI,aAAa;QAAE,OAAO;IAC1B,aAAa,GAAG,IAAI,CAAC;IACrB,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,GAAG,CAAC,IAAY,EAAE,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9D,CAAC;IAAC,MAAM,CAAC;QACP,qDAAqD;QACrD,MAAM,GAAG,IAAI,CAAC;IAChB,CAAC;AACH,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,OAAO,aAAa;IAA1B;QACW,SAAI,GAAG,eAAe,CAAC;QAExB,gBAAW,GAAG,KAAK,CAAC;IA2B9B,CAAC;IAzBS,KAAK,CAAC,YAAY;QACxB,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,MAAM,WAAW,EAAE,CAAC;YACpB,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,OAAO,CAAC,MAAe;QACrB,wCAAwC;QACxC,IAAI,CAAC,MAAM;YAAE,OAAO,MAAM,CAAC;QAE3B,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;YACpB,MAAM,OAAO,GAAG,MAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAChC,OAAO,EAAE,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;QAChD,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,UAAU;QACd,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;IAC5B,CAAC;CACF"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Code-aware tokenizer that splits camelCase, snake_case,
|
|
3
|
+
* SCREAMING_SNAKE, and dot-separated identifiers into individual words.
|
|
4
|
+
*
|
|
5
|
+
* @module agentos/core/text-processing/tokenizers/CodeTokenizer
|
|
6
|
+
*/
|
|
7
|
+
import type { Token } from '../types';
|
|
8
|
+
import type { ITokenizer } from '../ITextProcessor';
|
|
9
|
+
/**
|
|
10
|
+
* Code-aware tokenizer.
|
|
11
|
+
*
|
|
12
|
+
* Splits identifiers that programmers write:
|
|
13
|
+
* - `getUserName` → `get`, `user`, `name`
|
|
14
|
+
* - `get_user_name` → `get`, `user`, `name`
|
|
15
|
+
* - `MAX_RETRY_COUNT` → `max`, `retry`, `count`
|
|
16
|
+
* - `XMLParser` → `xml`, `parser`
|
|
17
|
+
* - `path.to.module` → `path`, `to`, `module`
|
|
18
|
+
*/
|
|
19
|
+
export declare class CodeTokenizer implements ITokenizer {
|
|
20
|
+
readonly name = "CodeTokenizer";
|
|
21
|
+
private minLength;
|
|
22
|
+
constructor(minLength?: number);
|
|
23
|
+
tokenize(text: string): Token[];
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=CodeTokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"CodeTokenizer.d.ts","sourceRoot":"","sources":["../../../../src/core/text-processing/tokenizers/CodeTokenizer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAWpD;;;;;;;;;GASG;AACH,qBAAa,aAAc,YAAW,UAAU;IAC9C,QAAQ,CAAC,IAAI,mBAAmB;IAEhC,OAAO,CAAC,SAAS,CAAS;gBAEd,SAAS,GAAE,MAAU;IAIjC,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,KAAK,EAAE;CAgDhC"}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Code-aware tokenizer that splits camelCase, snake_case,
|
|
3
|
+
* SCREAMING_SNAKE, and dot-separated identifiers into individual words.
|
|
4
|
+
*
|
|
5
|
+
* @module agentos/core/text-processing/tokenizers/CodeTokenizer
|
|
6
|
+
*/
|
|
7
|
+
/** Matches word-like sequences including dots for qualified names. */
|
|
8
|
+
const CODE_WORD_REGEX = /[\p{L}\p{N}_.]+/gu;
|
|
9
|
+
/** Split camelCase: insert boundary before uppercase letters that follow lowercase. */
|
|
10
|
+
const CAMEL_SPLIT = /([a-z\d])([A-Z])/g;
|
|
11
|
+
/** Split ALLCAPS followed by lowercase: XMLParser → XML + Parser. */
|
|
12
|
+
const CAPS_SPLIT = /([A-Z]+)([A-Z][a-z])/g;
|
|
13
|
+
/**
|
|
14
|
+
* Code-aware tokenizer.
|
|
15
|
+
*
|
|
16
|
+
* Splits identifiers that programmers write:
|
|
17
|
+
* - `getUserName` → `get`, `user`, `name`
|
|
18
|
+
* - `get_user_name` → `get`, `user`, `name`
|
|
19
|
+
* - `MAX_RETRY_COUNT` → `max`, `retry`, `count`
|
|
20
|
+
* - `XMLParser` → `xml`, `parser`
|
|
21
|
+
* - `path.to.module` → `path`, `to`, `module`
|
|
22
|
+
*/
|
|
23
|
+
export class CodeTokenizer {
|
|
24
|
+
constructor(minLength = 2) {
|
|
25
|
+
this.name = 'CodeTokenizer';
|
|
26
|
+
this.minLength = minLength;
|
|
27
|
+
}
|
|
28
|
+
tokenize(text) {
|
|
29
|
+
const tokens = [];
|
|
30
|
+
let match;
|
|
31
|
+
CODE_WORD_REGEX.lastIndex = 0;
|
|
32
|
+
while ((match = CODE_WORD_REGEX.exec(text)) !== null) {
|
|
33
|
+
const word = match[0];
|
|
34
|
+
const position = match.index;
|
|
35
|
+
/* Split on dots first (path.to.module) */
|
|
36
|
+
const dotParts = word.split('.');
|
|
37
|
+
let offset = 0;
|
|
38
|
+
for (const part of dotParts) {
|
|
39
|
+
if (part.length === 0) {
|
|
40
|
+
offset += 1;
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
/* Split on underscores (snake_case) */
|
|
44
|
+
const underscoreParts = part.split('_');
|
|
45
|
+
let subOffset = 0;
|
|
46
|
+
for (const sub of underscoreParts) {
|
|
47
|
+
if (sub.length === 0) {
|
|
48
|
+
subOffset += 1;
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
/* Split camelCase */
|
|
52
|
+
const camelSplit = sub
|
|
53
|
+
.replace(CAMEL_SPLIT, '$1\0$2')
|
|
54
|
+
.replace(CAPS_SPLIT, '$1\0$2')
|
|
55
|
+
.split('\0');
|
|
56
|
+
let camelOffset = 0;
|
|
57
|
+
for (const fragment of camelSplit) {
|
|
58
|
+
if (fragment.length >= this.minLength) {
|
|
59
|
+
tokens.push({
|
|
60
|
+
text: fragment,
|
|
61
|
+
original: word,
|
|
62
|
+
position: position + offset + subOffset + camelOffset,
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
camelOffset += fragment.length;
|
|
66
|
+
}
|
|
67
|
+
subOffset += sub.length + 1; /* +1 for the underscore */
|
|
68
|
+
}
|
|
69
|
+
offset += part.length + 1; /* +1 for the dot */
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return tokens;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=CodeTokenizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"CodeTokenizer.js","sourceRoot":"","sources":["../../../../src/core/text-processing/tokenizers/CodeTokenizer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAKH,sEAAsE;AACtE,MAAM,eAAe,GAAG,mBAAmB,CAAC;AAE5C,uFAAuF;AACvF,MAAM,WAAW,GAAG,mBAAmB,CAAC;AAExC,qEAAqE;AACrE,MAAM,UAAU,GAAG,uBAAuB,CAAC;AAE3C;;;;;;;;;GASG;AACH,MAAM,OAAO,aAAa;IAKxB,YAAY,YAAoB,CAAC;QAJxB,SAAI,GAAG,eAAe,CAAC;QAK9B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,QAAQ,CAAC,IAAY;QACnB,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,IAAI,KAA6B,CAAC;QAElC,eAAe,CAAC,SAAS,GAAG,CAAC,CAAC;QAC9B,OAAO,CAAC,KAAK,GAAG,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACrD,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC;YAE7B,0CAA0C;YAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAEjC,IAAI,MAAM,GAAG,CAAC,CAAC;YACf,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;gBAC5B,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBAAC,MAAM,IAAI,CAAC,CAAC;oBAAC,SAAS;gBAAC,CAAC;gBAEjD,uCAAuC;gBACvC,MAAM,eAAe,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBAExC,IAAI,SAAS,GAAG,CAAC,CAAC;gBAClB,KAAK,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;oBAClC,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;wBAAC,SAAS,IAAI,CAAC,CAAC;wBAAC,SAAS;oBAAC,CAAC;oBAEnD,qBAAqB;oBACrB,MAAM,UAAU,GAAG,GAAG;yBACnB,OAAO,CAAC,WAAW,EAAE,QAAQ,CAAC;yBAC9B,OAAO,CAAC,UAAU,EAAE,QAAQ,CAAC;yBAC7B,KAAK,CAAC,IAAI,CAAC,CAAC;oBAEf,IAAI,WAAW,GAAG,CAAC,CAAC;oBACpB,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;wBAClC,IAAI,QAAQ,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;4BACtC,MAAM,CAAC,IAAI,CAAC;gCACV,IAAI,EAAE,QAAQ;gCACd,QAAQ,EAAE,IAAI;gCACd,QAAQ,EAAE,QAAQ,GAAG,MAAM,GAAG,SAAS,GAAG,WAAW;6BACtD,CAAC,CAAC;wBACL,CAAC;wBACD,WAAW,IAAI,QAAQ,CAAC,MAAM,CAAC;oBACjC,CAAC;oBACD,SAAS,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,2BAA2B;gBAC1D,CAAC;gBACD,MAAM,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,oBAAoB;YACjD,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Unicode-aware word tokenizer.
|
|
3
|
+
* Splits on whitespace, punctuation, and special characters.
|
|
4
|
+
* Produces tokens with position tracking.
|
|
5
|
+
*
|
|
6
|
+
* @module agentos/core/text-processing/tokenizers/StandardTokenizer
|
|
7
|
+
*/
|
|
8
|
+
import type { Token } from '../types';
|
|
9
|
+
import type { ITokenizer } from '../ITextProcessor';
|
|
10
|
+
/**
|
|
11
|
+
* Standard tokenizer that splits text on Unicode word boundaries.
|
|
12
|
+
* Handles punctuation, whitespace, hyphens, and special characters.
|
|
13
|
+
* Preserves position offsets for each token.
|
|
14
|
+
*/
|
|
15
|
+
export declare class StandardTokenizer implements ITokenizer {
|
|
16
|
+
readonly name = "StandardTokenizer";
|
|
17
|
+
/** Minimum token length to emit (default 2). */
|
|
18
|
+
private minLength;
|
|
19
|
+
constructor(minLength?: number);
|
|
20
|
+
tokenize(text: string): Token[];
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=StandardTokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"StandardTokenizer.d.ts","sourceRoot":"","sources":["../../../../src/core/text-processing/tokenizers/StandardTokenizer.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAKpD;;;;GAIG;AACH,qBAAa,iBAAkB,YAAW,UAAU;IAClD,QAAQ,CAAC,IAAI,uBAAuB;IAEpC,gDAAgD;IAChD,OAAO,CAAC,SAAS,CAAS;gBAEd,SAAS,GAAE,MAAU;IAIjC,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,KAAK,EAAE;CAkBhC"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Unicode-aware word tokenizer.
|
|
3
|
+
* Splits on whitespace, punctuation, and special characters.
|
|
4
|
+
* Produces tokens with position tracking.
|
|
5
|
+
*
|
|
6
|
+
* @module agentos/core/text-processing/tokenizers/StandardTokenizer
|
|
7
|
+
*/
|
|
8
|
+
/** Matches word-like sequences: letters, digits, underscores. */
|
|
9
|
+
const WORD_REGEX = /[\p{L}\p{N}_]+/gu;
|
|
10
|
+
/**
|
|
11
|
+
* Standard tokenizer that splits text on Unicode word boundaries.
|
|
12
|
+
* Handles punctuation, whitespace, hyphens, and special characters.
|
|
13
|
+
* Preserves position offsets for each token.
|
|
14
|
+
*/
|
|
15
|
+
export class StandardTokenizer {
|
|
16
|
+
constructor(minLength = 2) {
|
|
17
|
+
this.name = 'StandardTokenizer';
|
|
18
|
+
this.minLength = minLength;
|
|
19
|
+
}
|
|
20
|
+
tokenize(text) {
|
|
21
|
+
const tokens = [];
|
|
22
|
+
let match;
|
|
23
|
+
WORD_REGEX.lastIndex = 0;
|
|
24
|
+
while ((match = WORD_REGEX.exec(text)) !== null) {
|
|
25
|
+
const word = match[0];
|
|
26
|
+
if (word.length >= this.minLength) {
|
|
27
|
+
tokens.push({
|
|
28
|
+
text: word,
|
|
29
|
+
original: word,
|
|
30
|
+
position: match.index,
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
return tokens;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
//# sourceMappingURL=StandardTokenizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"StandardTokenizer.js","sourceRoot":"","sources":["../../../../src/core/text-processing/tokenizers/StandardTokenizer.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAKH,iEAAiE;AACjE,MAAM,UAAU,GAAG,kBAAkB,CAAC;AAEtC;;;;GAIG;AACH,MAAM,OAAO,iBAAiB;IAM5B,YAAY,YAAoB,CAAC;QALxB,SAAI,GAAG,mBAAmB,CAAC;QAMlC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,QAAQ,CAAC,IAAY;QACnB,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,IAAI,KAA6B,CAAC;QAElC,UAAU,CAAC,SAAS,GAAG,CAAC,CAAC;QACzB,OAAO,CAAC,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAChD,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;gBAClC,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,IAAI;oBACV,QAAQ,EAAE,IAAI;oBACd,QAAQ,EAAE,KAAK,CAAC,KAAK;iBACtB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Core types for the text processing pipeline.
|
|
3
|
+
* @module agentos/core/text-processing/types
|
|
4
|
+
*/
|
|
5
|
+
/** A single processed token with position and optional linguistic annotations. */
|
|
6
|
+
export interface Token {
|
|
7
|
+
/** The processed token text (after normalization, stemming, etc.). */
|
|
8
|
+
text: string;
|
|
9
|
+
/** The original text before any processing. */
|
|
10
|
+
original: string;
|
|
11
|
+
/** Character offset in the source text. */
|
|
12
|
+
position: number;
|
|
13
|
+
/** Stemmed form (set by stemmer processors). */
|
|
14
|
+
stem?: string;
|
|
15
|
+
/** Lemmatized form (set by lemmatizer processors). */
|
|
16
|
+
lemma?: string;
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/core/text-processing/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,kFAAkF;AAClF,MAAM,WAAW,KAAK;IACpB,sEAAsE;IACtE,IAAI,EAAE,MAAM,CAAC;IACb,+CAA+C;IAC/C,QAAQ,EAAE,MAAM,CAAC;IACjB,2CAA2C;IAC3C,QAAQ,EAAE,MAAM,CAAC;IACjB,gDAAgD;IAChD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,sDAAsD;IACtD,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/core/text-processing/types.ts"],"names":[],"mappings":"AAAA;;;GAGG"}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Shared HNSW index sidecar that sits alongside a SQLite database.
|
|
3
|
+
* Provides O(log n) approximate nearest neighbor search via hnswlib-node,
|
|
4
|
+
* with automatic activation at a configurable document count threshold
|
|
5
|
+
* and graceful fallback when the native addon is unavailable.
|
|
6
|
+
*
|
|
7
|
+
* Used by both the RAG system (SqlVectorStore) and the Memory system (SqliteBrain).
|
|
8
|
+
*
|
|
9
|
+
* @module agentos/core/vector-search/HnswIndexSidecar
|
|
10
|
+
*/
|
|
11
|
+
import type { HnswSidecarConfig, HnswSidecarStats, HnswSearchResult } from './types';
|
|
12
|
+
/**
|
|
13
|
+
* HNSW index sidecar — manages an hnswlib-node index file alongside
|
|
14
|
+
* a primary data store (SQLite, etc.).
|
|
15
|
+
*
|
|
16
|
+
* The primary store remains the source of truth. The HNSW index is
|
|
17
|
+
* rebuildable from it at any time. This sidecar handles:
|
|
18
|
+
* - Dynamic import of hnswlib-node (graceful if missing)
|
|
19
|
+
* - Auto-activation at document count threshold
|
|
20
|
+
* - Auto-resize when capacity is reached
|
|
21
|
+
* - Persistence to disk (.hnsw + .hnsw.map.json)
|
|
22
|
+
* - Dirty tracking with explicit save()
|
|
23
|
+
*/
|
|
24
|
+
export declare class HnswIndexSidecar {
|
|
25
|
+
private config;
|
|
26
|
+
private index;
|
|
27
|
+
private hnswlib;
|
|
28
|
+
/** Maps HNSW integer labels → document string IDs. */
|
|
29
|
+
private labelToId;
|
|
30
|
+
/** Maps document string IDs → HNSW integer labels. */
|
|
31
|
+
private idToLabel;
|
|
32
|
+
/** Next label to assign. */
|
|
33
|
+
private nextLabel;
|
|
34
|
+
/** Current index capacity. */
|
|
35
|
+
private capacity;
|
|
36
|
+
/** Whether index has unsaved changes. */
|
|
37
|
+
private dirty;
|
|
38
|
+
/** Whether hnswlib-node was successfully imported. */
|
|
39
|
+
private hnswAvailable;
|
|
40
|
+
/**
|
|
41
|
+
* Initialize the sidecar. Attempts to dynamically import hnswlib-node.
|
|
42
|
+
* If the import fails, the sidecar stays inactive (brute-force fallback).
|
|
43
|
+
* If an existing index file is found, it's loaded from disk.
|
|
44
|
+
*/
|
|
45
|
+
initialize(config: HnswSidecarConfig): Promise<void>;
|
|
46
|
+
/** Persist and release the index. */
|
|
47
|
+
shutdown(): Promise<void>;
|
|
48
|
+
/** True when the index is loaded AND has vectors (above threshold or loaded from disk). */
|
|
49
|
+
isActive(): boolean;
|
|
50
|
+
/** True when hnswlib-node was successfully imported. */
|
|
51
|
+
isAvailable(): boolean;
|
|
52
|
+
/** Get statistics about the sidecar. */
|
|
53
|
+
getStats(): HnswSidecarStats;
|
|
54
|
+
/**
|
|
55
|
+
* Add a single vector. Auto-resizes if capacity is reached.
|
|
56
|
+
* Does nothing if hnswlib is unavailable or the item is already indexed.
|
|
57
|
+
*/
|
|
58
|
+
add(id: string, embedding: number[]): Promise<void>;
|
|
59
|
+
/**
|
|
60
|
+
* Add multiple vectors at once. More efficient than calling add() in a loop.
|
|
61
|
+
*/
|
|
62
|
+
addBatch(items: Array<{
|
|
63
|
+
id: string;
|
|
64
|
+
embedding: number[];
|
|
65
|
+
}>): Promise<void>;
|
|
66
|
+
/**
|
|
67
|
+
* Soft-delete a vector by marking its label as deleted in the HNSW graph.
|
|
68
|
+
*/
|
|
69
|
+
remove(id: string): Promise<void>;
|
|
70
|
+
/**
|
|
71
|
+
* Search for the top-K nearest neighbors.
|
|
72
|
+
* Returns empty if the sidecar is inactive.
|
|
73
|
+
*/
|
|
74
|
+
search(query: number[], topK: number): Promise<HnswSearchResult[]>;
|
|
75
|
+
/**
|
|
76
|
+
* Full rebuild of the HNSW index from source-of-truth data.
|
|
77
|
+
* Called when the activation threshold is crossed or on manual rebuild.
|
|
78
|
+
*/
|
|
79
|
+
rebuildFromData(items: Array<{
|
|
80
|
+
id: string;
|
|
81
|
+
embedding: number[];
|
|
82
|
+
}>): Promise<void>;
|
|
83
|
+
/** Save the HNSW index and label map to disk. */
|
|
84
|
+
save(): Promise<void>;
|
|
85
|
+
/**
|
|
86
|
+
* Load an existing HNSW index from disk.
|
|
87
|
+
* Returns true if loaded successfully, false if no index exists.
|
|
88
|
+
*/
|
|
89
|
+
load(): Promise<boolean>;
|
|
90
|
+
}
|
|
91
|
+
//# sourceMappingURL=HnswIndexSidecar.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HnswIndexSidecar.d.ts","sourceRoot":"","sources":["../../../src/core/vector-search/HnswIndexSidecar.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAGH,OAAO,KAAK,EACV,iBAAiB,EACjB,gBAAgB,EAChB,gBAAgB,EACjB,MAAM,SAAS,CAAC;AAkBjB;;;;;;;;;;;GAWG;AACH,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAAqB;IACnC,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,OAAO,CAAa;IAE5B,sDAAsD;IACtD,OAAO,CAAC,SAAS,CAAkC;IACnD,sDAAsD;IACtD,OAAO,CAAC,SAAS,CAAkC;IACnD,4BAA4B;IAC5B,OAAO,CAAC,SAAS,CAAa;IAC9B,8BAA8B;IAC9B,OAAO,CAAC,QAAQ,CAAa;IAC7B,yCAAyC;IACzC,OAAO,CAAC,KAAK,CAAkB;IAC/B,sDAAsD;IACtD,OAAO,CAAC,aAAa,CAAkB;IAIvC;;;;OAIG;IACG,UAAU,CAAC,MAAM,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAqB1D,qCAAqC;IAC/B,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAW/B,2FAA2F;IAC3F,QAAQ,IAAI,OAAO;IAInB,wDAAwD;IACxD,WAAW,IAAI,OAAO;IAItB,wCAAwC;IACxC,QAAQ,IAAI,gBAAgB;IAY5B;;;OAGG;IACG,GAAG,CAAC,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAsBzD;;OAEG;IACG,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAMhF;;OAEG;IACG,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAWvC;;;OAGG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAkCxE;;;OAGG;IACG,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IA4BvF,iDAAiD;IAC3C,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAwB3B;;;OAGG;IACG,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;CAkC/B"}
|