@yamo/memory-mesh 2.3.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/memory_mesh.js +1 -1
- package/lib/llm/client.d.ts +111 -0
- package/lib/llm/client.js +299 -357
- package/lib/llm/client.ts +413 -0
- package/lib/llm/index.d.ts +17 -0
- package/lib/llm/index.js +15 -8
- package/lib/llm/index.ts +19 -0
- package/lib/memory/adapters/client.d.ts +183 -0
- package/lib/memory/adapters/client.js +518 -0
- package/lib/memory/adapters/client.ts +678 -0
- package/lib/memory/adapters/config.d.ts +137 -0
- package/lib/memory/adapters/config.js +189 -0
- package/lib/memory/adapters/config.ts +259 -0
- package/lib/memory/adapters/errors.d.ts +76 -0
- package/lib/memory/adapters/errors.js +128 -0
- package/lib/memory/adapters/errors.ts +166 -0
- package/lib/memory/context-manager.d.ts +44 -0
- package/lib/memory/context-manager.js +344 -0
- package/lib/memory/context-manager.ts +432 -0
- package/lib/memory/embeddings/factory.d.ts +59 -0
- package/lib/memory/embeddings/factory.js +148 -0
- package/lib/{embeddings/factory.js → memory/embeddings/factory.ts} +69 -28
- package/lib/memory/embeddings/index.d.ts +2 -0
- package/lib/memory/embeddings/index.js +2 -0
- package/lib/memory/embeddings/index.ts +2 -0
- package/lib/memory/embeddings/service.d.ts +164 -0
- package/lib/memory/embeddings/service.js +515 -0
- package/lib/{embeddings/service.js → memory/embeddings/service.ts} +223 -156
- package/lib/memory/index.d.ts +9 -0
- package/lib/memory/index.js +9 -1
- package/lib/memory/index.ts +20 -0
- package/lib/memory/memory-mesh.d.ts +274 -0
- package/lib/memory/memory-mesh.js +1469 -678
- package/lib/memory/memory-mesh.ts +1803 -0
- package/lib/memory/memory-translator.d.ts +19 -0
- package/lib/memory/memory-translator.js +125 -0
- package/lib/memory/memory-translator.ts +158 -0
- package/lib/memory/schema.d.ts +111 -0
- package/lib/memory/schema.js +183 -0
- package/lib/memory/schema.ts +267 -0
- package/lib/memory/scorer.d.ts +26 -0
- package/lib/memory/scorer.js +77 -0
- package/lib/memory/scorer.ts +95 -0
- package/lib/memory/search/index.d.ts +1 -0
- package/lib/memory/search/index.js +1 -0
- package/lib/memory/search/index.ts +1 -0
- package/lib/memory/search/keyword-search.d.ts +62 -0
- package/lib/memory/search/keyword-search.js +135 -0
- package/lib/{search/keyword-search.js → memory/search/keyword-search.ts} +66 -36
- package/lib/scrubber/config/defaults.d.ts +53 -0
- package/lib/scrubber/config/defaults.js +49 -57
- package/lib/scrubber/config/defaults.ts +117 -0
- package/lib/scrubber/index.d.ts +6 -0
- package/lib/scrubber/index.js +3 -23
- package/lib/scrubber/index.ts +7 -0
- package/lib/scrubber/scrubber.d.ts +61 -0
- package/lib/scrubber/scrubber.js +99 -121
- package/lib/scrubber/scrubber.ts +168 -0
- package/lib/scrubber/stages/chunker.d.ts +13 -0
- package/lib/scrubber/stages/metadata-annotator.d.ts +18 -0
- package/lib/scrubber/stages/normalizer.d.ts +13 -0
- package/lib/scrubber/stages/semantic-filter.d.ts +13 -0
- package/lib/scrubber/stages/structural-cleaner.d.ts +13 -0
- package/lib/scrubber/stages/validator.d.ts +18 -0
- package/lib/scrubber/telemetry.d.ts +36 -0
- package/lib/scrubber/telemetry.js +53 -58
- package/lib/scrubber/telemetry.ts +99 -0
- package/lib/utils/logger.d.ts +29 -0
- package/lib/utils/logger.js +64 -0
- package/lib/utils/logger.ts +85 -0
- package/lib/utils/skill-metadata.d.ts +32 -0
- package/lib/utils/skill-metadata.js +132 -0
- package/lib/utils/skill-metadata.ts +147 -0
- package/lib/yamo/emitter.d.ts +73 -0
- package/lib/yamo/emitter.js +78 -143
- package/lib/yamo/emitter.ts +249 -0
- package/lib/yamo/schema.d.ts +58 -0
- package/lib/yamo/schema.js +81 -108
- package/lib/yamo/schema.ts +165 -0
- package/package.json +11 -8
- package/index.d.ts +0 -111
- package/lib/embeddings/index.js +0 -2
- package/lib/index.js +0 -6
- package/lib/lancedb/client.js +0 -633
- package/lib/lancedb/config.js +0 -215
- package/lib/lancedb/errors.js +0 -144
- package/lib/lancedb/index.js +0 -4
- package/lib/lancedb/schema.js +0 -217
- package/lib/scrubber/errors/scrubber-error.js +0 -43
- package/lib/scrubber/stages/chunker.js +0 -103
- package/lib/scrubber/stages/metadata-annotator.js +0 -74
- package/lib/scrubber/stages/normalizer.js +0 -59
- package/lib/scrubber/stages/semantic-filter.js +0 -61
- package/lib/scrubber/stages/structural-cleaner.js +0 -82
- package/lib/scrubber/stages/validator.js +0 -66
- package/lib/scrubber/utils/hash.js +0 -39
- package/lib/scrubber/utils/html-parser.js +0 -45
- package/lib/scrubber/utils/pattern-matcher.js +0 -63
- package/lib/scrubber/utils/token-counter.js +0 -31
- package/lib/search/index.js +0 -1
- package/lib/utils/index.js +0 -1
- package/lib/yamo/index.js +0 -15
package/lib/scrubber/scrubber.js
CHANGED
|
@@ -2,129 +2,107 @@
|
|
|
2
2
|
* S-MORA Layer 0 Scrubber - Main Orchestrator
|
|
3
3
|
* @module smora/scrubber/scrubber
|
|
4
4
|
*/
|
|
5
|
-
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
14
|
-
import { defaultScrubberConfig } from './config/defaults.js';
|
|
15
|
-
|
|
5
|
+
import { StructuralCleaner } from "./stages/structural-cleaner.js";
|
|
6
|
+
import { SemanticFilter } from "./stages/semantic-filter.js";
|
|
7
|
+
import { Normalizer } from "./stages/normalizer.js";
|
|
8
|
+
import { Chunker } from "./stages/chunker.js";
|
|
9
|
+
import { MetadataAnnotator } from "./stages/metadata-annotator.js";
|
|
10
|
+
import { Validator } from "./stages/validator.js";
|
|
11
|
+
import { ScrubberTelemetry, } from "./telemetry.js";
|
|
12
|
+
// import { ScrubberError } from './errors/scrubber-error'; // Assuming this exists or I should check
|
|
13
|
+
import { defaultScrubberConfig } from "./config/defaults.js";
|
|
16
14
|
export class Scrubber {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
* Main entry point - process a raw document
|
|
25
|
-
* @param {Object} document - { content: string, source: string, type: 'html'|'md'|'txt' }
|
|
26
|
-
* @returns {Promise<Object>} - { chunks: Array, metadata: Object, telemetry: Object }
|
|
27
|
-
*/
|
|
28
|
-
async process(document) {
|
|
29
|
-
const startTime = Date.now();
|
|
30
|
-
const result = {
|
|
31
|
-
chunks: [],
|
|
32
|
-
metadata: {
|
|
33
|
-
source: document.source,
|
|
34
|
-
type: document.type,
|
|
35
|
-
processingTimestamp: new Date().toISOString()
|
|
36
|
-
},
|
|
37
|
-
telemetry: {}
|
|
38
|
-
};
|
|
39
|
-
|
|
40
|
-
try {
|
|
41
|
-
// If disabled, return empty chunks
|
|
42
|
-
if (!this.config.enabled) {
|
|
43
|
-
result.success = true;
|
|
44
|
-
result.telemetry.totalDuration = Date.now() - startTime;
|
|
45
|
-
return result;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
// Stage 1: Structural Cleaning
|
|
49
|
-
const cleaned = await this._executeStage('structural', () =>
|
|
50
|
-
this.stages.structural.clean(document.content)
|
|
51
|
-
);
|
|
52
|
-
result.telemetry.structural = this.telemetry.getStageStats('structural');
|
|
53
|
-
|
|
54
|
-
// Stage 2: Semantic Filtering
|
|
55
|
-
const filtered = await this._executeStage('semantic', () =>
|
|
56
|
-
this.stages.semantic.filter(cleaned)
|
|
57
|
-
);
|
|
58
|
-
result.telemetry.semantic = this.telemetry.getStageStats('semantic');
|
|
59
|
-
|
|
60
|
-
// Stage 3: Normalization
|
|
61
|
-
const normalized = await this._executeStage('normalization', () =>
|
|
62
|
-
this.stages.normalizer.normalize(filtered)
|
|
63
|
-
);
|
|
64
|
-
result.telemetry.normalization = this.telemetry.getStageStats('normalization');
|
|
65
|
-
|
|
66
|
-
// Stage 4: Chunking
|
|
67
|
-
const chunks = await this._executeStage('chunking', () =>
|
|
68
|
-
this.stages.chunker.chunk(normalized)
|
|
69
|
-
);
|
|
70
|
-
result.telemetry.chunking = this.telemetry.getStageStats('chunking');
|
|
71
|
-
|
|
72
|
-
// Stage 5: Metadata Annotation
|
|
73
|
-
const annotated = await this._executeStage('metadata', () =>
|
|
74
|
-
this.stages.metadata.annotate(chunks, document)
|
|
75
|
-
);
|
|
76
|
-
result.telemetry.metadata = this.telemetry.getStageStats('metadata');
|
|
77
|
-
|
|
78
|
-
// Stage 6: Validation
|
|
79
|
-
result.chunks = await this._executeStage('validation', () =>
|
|
80
|
-
this.stages.validator.validate(annotated)
|
|
81
|
-
);
|
|
82
|
-
result.telemetry.validation = this.telemetry.getStageStats('validation');
|
|
83
|
-
|
|
84
|
-
result.telemetry.totalDuration = Date.now() - startTime;
|
|
85
|
-
result.success = true;
|
|
86
|
-
|
|
87
|
-
return result;
|
|
88
|
-
} catch (error) {
|
|
89
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
90
|
-
result.success = false;
|
|
91
|
-
result.error = message;
|
|
92
|
-
result.telemetry.totalDuration = Date.now() - startTime;
|
|
15
|
+
config;
|
|
16
|
+
stages; // Using any for stages as they are not yet converted
|
|
17
|
+
telemetry;
|
|
18
|
+
constructor(config = {}) {
|
|
19
|
+
this.config = { ...defaultScrubberConfig, ...config };
|
|
20
|
+
this.stages = this._initializeStages();
|
|
21
|
+
this.telemetry = new ScrubberTelemetry();
|
|
93
22
|
}
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
23
|
+
/**
|
|
24
|
+
* Main entry point - process a raw document
|
|
25
|
+
* @param {Object} document - { content: string, source: string, type: 'html'|'md'|'txt' }
|
|
26
|
+
* @returns {Promise<Object>} - { chunks: Array, metadata: Object, telemetry: Object }
|
|
27
|
+
*/
|
|
28
|
+
async process(document) {
|
|
29
|
+
const startTime = Date.now();
|
|
30
|
+
const result = {
|
|
31
|
+
chunks: [],
|
|
32
|
+
metadata: {
|
|
33
|
+
source: document.source,
|
|
34
|
+
type: document.type,
|
|
35
|
+
processingTimestamp: new Date().toISOString(),
|
|
36
|
+
},
|
|
37
|
+
telemetry: {},
|
|
38
|
+
};
|
|
39
|
+
try {
|
|
40
|
+
// If disabled, return empty chunks
|
|
41
|
+
if (!this.config.enabled) {
|
|
42
|
+
result.success = true;
|
|
43
|
+
result.telemetry.totalDuration = Date.now() - startTime;
|
|
44
|
+
return result;
|
|
45
|
+
}
|
|
46
|
+
// Stage 1: Structural Cleaning
|
|
47
|
+
const cleaned = await this._executeStage("structural", () => this.stages.structural.clean(document.content));
|
|
48
|
+
result.telemetry.structural = this.telemetry.getStageStats("structural");
|
|
49
|
+
// Stage 2: Semantic Filtering
|
|
50
|
+
const filtered = await this._executeStage("semantic", () => this.stages.semantic.filter(cleaned));
|
|
51
|
+
result.telemetry.semantic = this.telemetry.getStageStats("semantic");
|
|
52
|
+
// Stage 3: Normalization
|
|
53
|
+
const normalized = await this._executeStage("normalization", () => this.stages.normalizer.normalize(filtered));
|
|
54
|
+
result.telemetry.normalization =
|
|
55
|
+
this.telemetry.getStageStats("normalization");
|
|
56
|
+
// Stage 4: Chunking
|
|
57
|
+
const chunks = await this._executeStage("chunking", () => this.stages.chunker.chunk(normalized));
|
|
58
|
+
result.telemetry.chunking = this.telemetry.getStageStats("chunking");
|
|
59
|
+
// Stage 5: Metadata Annotation
|
|
60
|
+
const annotated = await this._executeStage("metadata", () => this.stages.metadata.annotate(chunks, document));
|
|
61
|
+
result.telemetry.metadata = this.telemetry.getStageStats("metadata");
|
|
62
|
+
// Stage 6: Validation
|
|
63
|
+
result.chunks = await this._executeStage("validation", () => this.stages.validator.validate(annotated));
|
|
64
|
+
result.telemetry.validation = this.telemetry.getStageStats("validation");
|
|
65
|
+
result.telemetry.totalDuration = Date.now() - startTime;
|
|
66
|
+
result.success = true;
|
|
67
|
+
return result;
|
|
68
|
+
}
|
|
69
|
+
catch (error) {
|
|
70
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
71
|
+
result.success = false;
|
|
72
|
+
result.error = message;
|
|
73
|
+
result.telemetry.totalDuration = Date.now() - startTime;
|
|
74
|
+
return result;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
async _executeStage(stageName, stageFn) {
|
|
78
|
+
const startTime = Date.now();
|
|
79
|
+
try {
|
|
80
|
+
const result = await stageFn();
|
|
81
|
+
const duration = Date.now() - startTime;
|
|
82
|
+
this.telemetry.recordStage(stageName, duration, true);
|
|
83
|
+
return result;
|
|
84
|
+
}
|
|
85
|
+
catch (error) {
|
|
86
|
+
const duration = Date.now() - startTime;
|
|
87
|
+
this.telemetry.recordStage(stageName, duration, false);
|
|
88
|
+
throw error;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
_initializeStages() {
|
|
92
|
+
return {
|
|
93
|
+
structural: new StructuralCleaner(this.config.structural),
|
|
94
|
+
semantic: new SemanticFilter(this.config.semantic),
|
|
95
|
+
normalizer: new Normalizer(this.config.normalization),
|
|
96
|
+
chunker: new Chunker(this.config.chunking),
|
|
97
|
+
metadata: new MetadataAnnotator(this.config.metadata),
|
|
98
|
+
validator: new Validator(this.config.validation),
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
getMetrics() {
|
|
102
|
+
return this.telemetry.getSummary();
|
|
103
|
+
}
|
|
104
|
+
healthCheck() {
|
|
105
|
+
return Promise.resolve({ status: "healthy" });
|
|
107
106
|
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
_initializeStages() {
|
|
111
|
-
return {
|
|
112
|
-
structural: new StructuralCleaner(this.config.structural),
|
|
113
|
-
semantic: new SemanticFilter(this.config.semantic),
|
|
114
|
-
normalizer: new Normalizer(this.config.normalization),
|
|
115
|
-
chunker: new Chunker(this.config.chunking),
|
|
116
|
-
metadata: new MetadataAnnotator(this.config.metadata),
|
|
117
|
-
validator: new Validator(this.config.validation)
|
|
118
|
-
};
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
getMetrics() {
|
|
122
|
-
return this.telemetry.getSummary();
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
async healthCheck() {
|
|
126
|
-
return { status: 'healthy' };
|
|
127
|
-
}
|
|
128
107
|
}
|
|
129
|
-
|
|
130
108
|
export default Scrubber;
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* S-MORA Layer 0 Scrubber - Main Orchestrator
|
|
3
|
+
* @module smora/scrubber/scrubber
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { StructuralCleaner } from "./stages/structural-cleaner.js";
|
|
7
|
+
import { SemanticFilter } from "./stages/semantic-filter.js";
|
|
8
|
+
import { Normalizer } from "./stages/normalizer.js";
|
|
9
|
+
import { Chunker } from "./stages/chunker.js";
|
|
10
|
+
import { MetadataAnnotator } from "./stages/metadata-annotator.js";
|
|
11
|
+
import { Validator } from "./stages/validator.js";
|
|
12
|
+
import {
|
|
13
|
+
ScrubberTelemetry,
|
|
14
|
+
TelemetrySummary,
|
|
15
|
+
StageSummary,
|
|
16
|
+
} from "./telemetry.js";
|
|
17
|
+
// import { ScrubberError } from './errors/scrubber-error'; // Assuming this exists or I should check
|
|
18
|
+
import { defaultScrubberConfig, ScrubberConfig } from "./config/defaults.js";
|
|
19
|
+
|
|
20
|
+
// Interfaces for input/output
|
|
21
|
+
export interface ScrubberDocument {
|
|
22
|
+
content: string;
|
|
23
|
+
source: string;
|
|
24
|
+
type: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface Chunk {
|
|
28
|
+
text: string;
|
|
29
|
+
[key: string]: any;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface ScrubberResult {
|
|
33
|
+
chunks: Chunk[];
|
|
34
|
+
metadata: {
|
|
35
|
+
source: string;
|
|
36
|
+
type: string;
|
|
37
|
+
processingTimestamp: string;
|
|
38
|
+
[key: string]: any;
|
|
39
|
+
};
|
|
40
|
+
telemetry: Partial<Record<string, StageSummary>> & { totalDuration?: number };
|
|
41
|
+
success?: boolean;
|
|
42
|
+
error?: string;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export class Scrubber {
|
|
46
|
+
config: ScrubberConfig;
|
|
47
|
+
stages: any; // Using any for stages as they are not yet converted
|
|
48
|
+
telemetry: ScrubberTelemetry;
|
|
49
|
+
|
|
50
|
+
constructor(config: Partial<ScrubberConfig> = {}) {
|
|
51
|
+
this.config = { ...defaultScrubberConfig, ...config };
|
|
52
|
+
this.stages = this._initializeStages();
|
|
53
|
+
this.telemetry = new ScrubberTelemetry();
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Main entry point - process a raw document
|
|
58
|
+
* @param {Object} document - { content: string, source: string, type: 'html'|'md'|'txt' }
|
|
59
|
+
* @returns {Promise<Object>} - { chunks: Array, metadata: Object, telemetry: Object }
|
|
60
|
+
*/
|
|
61
|
+
async process(document: ScrubberDocument): Promise<ScrubberResult> {
|
|
62
|
+
const startTime = Date.now();
|
|
63
|
+
const result: ScrubberResult = {
|
|
64
|
+
chunks: [],
|
|
65
|
+
metadata: {
|
|
66
|
+
source: document.source,
|
|
67
|
+
type: document.type,
|
|
68
|
+
processingTimestamp: new Date().toISOString(),
|
|
69
|
+
},
|
|
70
|
+
telemetry: {},
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
try {
|
|
74
|
+
// If disabled, return empty chunks
|
|
75
|
+
if (!this.config.enabled) {
|
|
76
|
+
result.success = true;
|
|
77
|
+
result.telemetry.totalDuration = Date.now() - startTime;
|
|
78
|
+
return result;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Stage 1: Structural Cleaning
|
|
82
|
+
const cleaned = await this._executeStage("structural", () =>
|
|
83
|
+
this.stages.structural.clean(document.content),
|
|
84
|
+
);
|
|
85
|
+
result.telemetry.structural = this.telemetry.getStageStats("structural");
|
|
86
|
+
|
|
87
|
+
// Stage 2: Semantic Filtering
|
|
88
|
+
const filtered = await this._executeStage("semantic", () =>
|
|
89
|
+
this.stages.semantic.filter(cleaned),
|
|
90
|
+
);
|
|
91
|
+
result.telemetry.semantic = this.telemetry.getStageStats("semantic");
|
|
92
|
+
|
|
93
|
+
// Stage 3: Normalization
|
|
94
|
+
const normalized = await this._executeStage("normalization", () =>
|
|
95
|
+
this.stages.normalizer.normalize(filtered),
|
|
96
|
+
);
|
|
97
|
+
result.telemetry.normalization =
|
|
98
|
+
this.telemetry.getStageStats("normalization");
|
|
99
|
+
|
|
100
|
+
// Stage 4: Chunking
|
|
101
|
+
const chunks = await this._executeStage("chunking", () =>
|
|
102
|
+
this.stages.chunker.chunk(normalized),
|
|
103
|
+
);
|
|
104
|
+
result.telemetry.chunking = this.telemetry.getStageStats("chunking");
|
|
105
|
+
|
|
106
|
+
// Stage 5: Metadata Annotation
|
|
107
|
+
const annotated = await this._executeStage("metadata", () =>
|
|
108
|
+
this.stages.metadata.annotate(chunks, document),
|
|
109
|
+
);
|
|
110
|
+
result.telemetry.metadata = this.telemetry.getStageStats("metadata");
|
|
111
|
+
|
|
112
|
+
// Stage 6: Validation
|
|
113
|
+
result.chunks = await this._executeStage("validation", () =>
|
|
114
|
+
this.stages.validator.validate(annotated),
|
|
115
|
+
);
|
|
116
|
+
result.telemetry.validation = this.telemetry.getStageStats("validation");
|
|
117
|
+
|
|
118
|
+
result.telemetry.totalDuration = Date.now() - startTime;
|
|
119
|
+
result.success = true;
|
|
120
|
+
|
|
121
|
+
return result;
|
|
122
|
+
} catch (error) {
|
|
123
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
124
|
+
result.success = false;
|
|
125
|
+
result.error = message;
|
|
126
|
+
result.telemetry.totalDuration = Date.now() - startTime;
|
|
127
|
+
return result;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
async _executeStage<T>(
|
|
132
|
+
stageName: string,
|
|
133
|
+
stageFn: () => Promise<T> | T,
|
|
134
|
+
): Promise<T> {
|
|
135
|
+
const startTime = Date.now();
|
|
136
|
+
try {
|
|
137
|
+
const result = await stageFn();
|
|
138
|
+
const duration = Date.now() - startTime;
|
|
139
|
+
this.telemetry.recordStage(stageName, duration, true);
|
|
140
|
+
return result;
|
|
141
|
+
} catch (error) {
|
|
142
|
+
const duration = Date.now() - startTime;
|
|
143
|
+
this.telemetry.recordStage(stageName, duration, false);
|
|
144
|
+
throw error;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
_initializeStages() {
|
|
149
|
+
return {
|
|
150
|
+
structural: new StructuralCleaner(this.config.structural),
|
|
151
|
+
semantic: new SemanticFilter(this.config.semantic),
|
|
152
|
+
normalizer: new Normalizer(this.config.normalization),
|
|
153
|
+
chunker: new Chunker(this.config.chunking),
|
|
154
|
+
metadata: new MetadataAnnotator(this.config.metadata),
|
|
155
|
+
validator: new Validator(this.config.validation),
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
getMetrics(): TelemetrySummary {
|
|
160
|
+
return this.telemetry.getSummary();
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
healthCheck(): Promise<{ status: string }> {
|
|
164
|
+
return Promise.resolve({ status: "healthy" });
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
export default Scrubber;
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for metadata-annotator.js
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export interface AnnotatorConfig {
|
|
6
|
+
includeTimestamp?: boolean;
|
|
7
|
+
[key: string]: any;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface AnnotatedData {
|
|
11
|
+
content: string;
|
|
12
|
+
metadata: Record<string, any>;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export class MetadataAnnotator {
|
|
16
|
+
constructor(config?: AnnotatorConfig);
|
|
17
|
+
annotate(content: string): Promise<AnnotatedData>;
|
|
18
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for normalizer.js
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export interface NormalizerConfig {
|
|
6
|
+
lowercase?: boolean;
|
|
7
|
+
[key: string]: any;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export class Normalizer {
|
|
11
|
+
constructor(config?: NormalizerConfig);
|
|
12
|
+
normalize(content: string): Promise<string>;
|
|
13
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for semantic-filter.js
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export interface FilterConfig {
|
|
6
|
+
threshold?: number;
|
|
7
|
+
[key: string]: any;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export class SemanticFilter {
|
|
11
|
+
constructor(config?: FilterConfig);
|
|
12
|
+
filter(content: string): Promise<string>;
|
|
13
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for structural-cleaner.js
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export interface CleanConfig {
|
|
6
|
+
preserveStructure?: boolean;
|
|
7
|
+
[key: string]: any;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export class StructuralCleaner {
|
|
11
|
+
constructor(config?: CleanConfig);
|
|
12
|
+
clean(content: string): Promise<string>;
|
|
13
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for validator.js
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export interface ValidatorConfig {
|
|
6
|
+
strict?: boolean;
|
|
7
|
+
[key: string]: any;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface ValidationResult {
|
|
11
|
+
valid: boolean;
|
|
12
|
+
errors?: string[];
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export class Validator {
|
|
16
|
+
constructor(config?: ValidatorConfig);
|
|
17
|
+
validate(content: string): Promise<ValidationResult>;
|
|
18
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* S-MORA Layer 0 Scrubber Telemetry Collection
|
|
3
|
+
* @module smora/scrubber/telemetry
|
|
4
|
+
*/
|
|
5
|
+
export interface StageStats {
|
|
6
|
+
count: number;
|
|
7
|
+
totalTime: number;
|
|
8
|
+
errors: number;
|
|
9
|
+
}
|
|
10
|
+
export interface StageSummary {
|
|
11
|
+
count: number;
|
|
12
|
+
avgTime: number;
|
|
13
|
+
totalTime: number;
|
|
14
|
+
errors: number;
|
|
15
|
+
}
|
|
16
|
+
export interface TelemetrySummary {
|
|
17
|
+
stages: Record<string, StageStats>;
|
|
18
|
+
performance: {
|
|
19
|
+
structural: number;
|
|
20
|
+
semantic: number;
|
|
21
|
+
normalization: number;
|
|
22
|
+
chunking: number;
|
|
23
|
+
metadata: number;
|
|
24
|
+
validation: number;
|
|
25
|
+
total: number;
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
export declare class ScrubberTelemetry {
|
|
29
|
+
stats: Record<string, StageStats>;
|
|
30
|
+
constructor();
|
|
31
|
+
recordStage(stage: string, duration: number, success?: boolean): void;
|
|
32
|
+
getStageStats(stage: string): StageSummary;
|
|
33
|
+
getSummary(): TelemetrySummary;
|
|
34
|
+
reset(): void;
|
|
35
|
+
assertPerformanceBudget(budget?: number): void;
|
|
36
|
+
}
|
|
@@ -2,65 +2,60 @@
|
|
|
2
2
|
* S-MORA Layer 0 Scrubber Telemetry Collection
|
|
3
3
|
* @module smora/scrubber/telemetry
|
|
4
4
|
*/
|
|
5
|
-
|
|
6
5
|
export class ScrubberTelemetry {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
recordStage(stage, duration, success = true) {
|
|
19
|
-
if (!this.stats[stage]) {
|
|
20
|
-
this.stats[stage] = { count: 0, totalTime: 0, errors: 0 };
|
|
6
|
+
stats;
|
|
7
|
+
constructor() {
|
|
8
|
+
this.stats = {
|
|
9
|
+
structural: { count: 0, totalTime: 0, errors: 0 },
|
|
10
|
+
semantic: { count: 0, totalTime: 0, errors: 0 },
|
|
11
|
+
normalization: { count: 0, totalTime: 0, errors: 0 },
|
|
12
|
+
chunking: { count: 0, totalTime: 0, errors: 0 },
|
|
13
|
+
metadata: { count: 0, totalTime: 0, errors: 0 },
|
|
14
|
+
validation: { count: 0, totalTime: 0, errors: 0 },
|
|
15
|
+
};
|
|
21
16
|
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
17
|
+
recordStage(stage, duration, success = true) {
|
|
18
|
+
if (!this.stats[stage]) {
|
|
19
|
+
this.stats[stage] = { count: 0, totalTime: 0, errors: 0 };
|
|
20
|
+
}
|
|
21
|
+
this.stats[stage].count++;
|
|
22
|
+
this.stats[stage].totalTime += duration;
|
|
23
|
+
if (!success) {
|
|
24
|
+
this.stats[stage].errors++;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
getStageStats(stage) {
|
|
28
|
+
const stats = this.stats[stage] || { count: 0, totalTime: 0, errors: 0 };
|
|
29
|
+
return {
|
|
30
|
+
count: stats.count,
|
|
31
|
+
avgTime: stats.count > 0 ? stats.totalTime / stats.count : 0,
|
|
32
|
+
totalTime: stats.totalTime,
|
|
33
|
+
errors: stats.errors,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
getSummary() {
|
|
37
|
+
return {
|
|
38
|
+
stages: this.stats,
|
|
39
|
+
performance: {
|
|
40
|
+
structural: this.stats.structural.totalTime,
|
|
41
|
+
semantic: this.stats.semantic.totalTime,
|
|
42
|
+
normalization: this.stats.normalization.totalTime,
|
|
43
|
+
chunking: this.stats.chunking.totalTime,
|
|
44
|
+
metadata: this.stats.metadata.totalTime,
|
|
45
|
+
validation: this.stats.validation.totalTime,
|
|
46
|
+
total: Object.values(this.stats).reduce((sum, s) => sum + s.totalTime, 0),
|
|
47
|
+
},
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
reset() {
|
|
51
|
+
Object.keys(this.stats).forEach((key) => {
|
|
52
|
+
this.stats[key] = { count: 0, totalTime: 0, errors: 0 };
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
assertPerformanceBudget(budget = 10) {
|
|
56
|
+
const summary = this.getSummary();
|
|
57
|
+
if (summary.performance.total > budget) {
|
|
58
|
+
throw new Error(`Performance budget exceeded: ${summary.performance.total}ms > ${budget}ms`);
|
|
59
|
+
}
|
|
64
60
|
}
|
|
65
|
-
}
|
|
66
61
|
}
|