@yamo/memory-mesh 2.3.2 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -2
- package/bin/memory_mesh.js +1 -1
- package/lib/llm/client.d.ts +86 -0
- package/lib/llm/client.js +300 -357
- package/lib/llm/client.ts +334 -0
- package/lib/llm/index.d.ts +17 -0
- package/lib/llm/index.js +16 -8
- package/lib/llm/index.ts +18 -0
- package/lib/memory/adapters/client.d.ts +120 -0
- package/lib/memory/adapters/client.js +519 -0
- package/lib/memory/adapters/client.ts +519 -0
- package/lib/memory/adapters/config.d.ts +130 -0
- package/lib/memory/adapters/config.js +190 -0
- package/lib/memory/adapters/config.ts +190 -0
- package/lib/memory/adapters/errors.d.ts +84 -0
- package/lib/memory/adapters/errors.js +129 -0
- package/lib/memory/adapters/errors.ts +129 -0
- package/lib/memory/context-manager.d.ts +41 -0
- package/lib/memory/context-manager.js +345 -0
- package/lib/memory/context-manager.ts +345 -0
- package/lib/memory/embeddings/factory.d.ts +57 -0
- package/lib/memory/embeddings/factory.js +149 -0
- package/lib/memory/embeddings/factory.ts +149 -0
- package/lib/memory/embeddings/index.d.ts +2 -0
- package/lib/memory/embeddings/index.js +3 -0
- package/lib/memory/embeddings/index.ts +3 -0
- package/lib/memory/embeddings/service.d.ts +134 -0
- package/lib/memory/embeddings/service.js +516 -0
- package/lib/memory/embeddings/service.ts +516 -0
- package/lib/memory/index.d.ts +9 -0
- package/lib/memory/index.js +10 -1
- package/lib/memory/index.ts +10 -0
- package/lib/memory/memory-mesh.d.ts +332 -0
- package/lib/memory/memory-mesh.js +1470 -678
- package/lib/memory/memory-mesh.ts +1517 -0
- package/lib/memory/memory-translator.d.ts +14 -0
- package/lib/memory/memory-translator.js +126 -0
- package/lib/memory/memory-translator.ts +126 -0
- package/lib/memory/schema.d.ts +130 -0
- package/lib/memory/schema.js +184 -0
- package/lib/memory/schema.ts +184 -0
- package/lib/memory/scorer.d.ts +25 -0
- package/lib/memory/scorer.js +78 -0
- package/lib/memory/scorer.ts +78 -0
- package/lib/memory/search/index.d.ts +1 -0
- package/lib/memory/search/index.js +2 -0
- package/lib/memory/search/index.ts +2 -0
- package/lib/memory/search/keyword-search.d.ts +46 -0
- package/lib/memory/search/keyword-search.js +136 -0
- package/lib/memory/search/keyword-search.ts +136 -0
- package/lib/scrubber/config/defaults.d.ts +46 -0
- package/lib/scrubber/config/defaults.js +50 -57
- package/lib/scrubber/config/defaults.ts +55 -0
- package/lib/scrubber/errors/scrubber-error.d.ts +22 -0
- package/lib/scrubber/errors/scrubber-error.js +28 -32
- package/lib/scrubber/errors/scrubber-error.ts +44 -0
- package/lib/scrubber/index.d.ts +5 -0
- package/lib/scrubber/index.js +4 -23
- package/lib/scrubber/index.ts +6 -0
- package/lib/scrubber/scrubber.d.ts +44 -0
- package/lib/scrubber/scrubber.js +100 -121
- package/lib/scrubber/scrubber.ts +109 -0
- package/lib/scrubber/stages/chunker.d.ts +25 -0
- package/lib/scrubber/stages/chunker.js +74 -91
- package/lib/scrubber/stages/chunker.ts +104 -0
- package/lib/scrubber/stages/metadata-annotator.d.ts +17 -0
- package/lib/scrubber/stages/metadata-annotator.js +55 -65
- package/lib/scrubber/stages/metadata-annotator.ts +75 -0
- package/lib/scrubber/stages/normalizer.d.ts +16 -0
- package/lib/scrubber/stages/normalizer.js +42 -50
- package/lib/scrubber/stages/normalizer.ts +60 -0
- package/lib/scrubber/stages/semantic-filter.d.ts +16 -0
- package/lib/scrubber/stages/semantic-filter.js +42 -52
- package/lib/scrubber/stages/semantic-filter.ts +62 -0
- package/lib/scrubber/stages/structural-cleaner.d.ts +18 -0
- package/lib/scrubber/stages/structural-cleaner.js +66 -75
- package/lib/scrubber/stages/structural-cleaner.ts +83 -0
- package/lib/scrubber/stages/validator.d.ts +17 -0
- package/lib/scrubber/stages/validator.js +46 -56
- package/lib/scrubber/stages/validator.ts +67 -0
- package/lib/scrubber/telemetry.d.ts +29 -0
- package/lib/scrubber/telemetry.js +54 -58
- package/lib/scrubber/telemetry.ts +62 -0
- package/lib/scrubber/utils/hash.d.ts +14 -0
- package/lib/scrubber/utils/hash.js +30 -32
- package/lib/scrubber/utils/hash.ts +40 -0
- package/lib/scrubber/utils/html-parser.d.ts +14 -0
- package/lib/scrubber/utils/html-parser.js +32 -39
- package/lib/scrubber/utils/html-parser.ts +46 -0
- package/lib/scrubber/utils/pattern-matcher.d.ts +12 -0
- package/lib/scrubber/utils/pattern-matcher.js +48 -57
- package/lib/scrubber/utils/pattern-matcher.ts +64 -0
- package/lib/scrubber/utils/token-counter.d.ts +18 -0
- package/lib/scrubber/utils/token-counter.js +24 -25
- package/lib/scrubber/utils/token-counter.ts +32 -0
- package/lib/utils/logger.d.ts +19 -0
- package/lib/utils/logger.js +65 -0
- package/lib/utils/logger.ts +65 -0
- package/lib/utils/skill-metadata.d.ts +24 -0
- package/lib/utils/skill-metadata.js +133 -0
- package/lib/utils/skill-metadata.ts +133 -0
- package/lib/yamo/emitter.d.ts +46 -0
- package/lib/yamo/emitter.js +79 -143
- package/lib/yamo/emitter.ts +171 -0
- package/lib/yamo/index.d.ts +14 -0
- package/lib/yamo/index.js +6 -7
- package/lib/yamo/index.ts +16 -0
- package/lib/yamo/schema.d.ts +56 -0
- package/lib/yamo/schema.js +82 -108
- package/lib/yamo/schema.ts +133 -0
- package/package.json +13 -8
- package/index.d.ts +0 -111
- package/lib/embeddings/factory.js +0 -151
- package/lib/embeddings/index.js +0 -2
- package/lib/embeddings/service.js +0 -586
- package/lib/index.js +0 -6
- package/lib/lancedb/client.js +0 -633
- package/lib/lancedb/config.js +0 -215
- package/lib/lancedb/errors.js +0 -144
- package/lib/lancedb/index.js +0 -4
- package/lib/lancedb/schema.js +0 -217
- package/lib/search/index.js +0 -1
- package/lib/search/keyword-search.js +0 -144
- package/lib/utils/index.js +0 -1
|
@@ -1,66 +1,56 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
1
2
|
/**
|
|
2
3
|
* S-MORA Layer 0 Scrubber - Stage 6: Validation
|
|
3
4
|
* @module smora/scrubber/stages/validator
|
|
4
5
|
*/
|
|
5
|
-
|
|
6
6
|
import { TokenCounter } from '../utils/token-counter.js';
|
|
7
|
-
import { ValidationError } from '../errors/scrubber-error.js';
|
|
8
|
-
|
|
9
7
|
export class Validator {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* Validate chunks
|
|
17
|
-
* @param {Array} chunks - Array of chunks
|
|
18
|
-
* @returns {Promise<Array>} - Validated chunks
|
|
19
|
-
*/
|
|
20
|
-
async validate(chunks) {
|
|
21
|
-
const valid = [];
|
|
22
|
-
const errors = [];
|
|
23
|
-
|
|
24
|
-
for (const chunk of chunks) {
|
|
25
|
-
const validation = this._validateChunk(chunk);
|
|
26
|
-
|
|
27
|
-
if (validation.valid) {
|
|
28
|
-
valid.push(chunk);
|
|
29
|
-
} else {
|
|
30
|
-
errors.push({
|
|
31
|
-
chunkIndex: chunk.index,
|
|
32
|
-
errors: validation.errors
|
|
33
|
-
});
|
|
34
|
-
}
|
|
8
|
+
constructor(config) {
|
|
9
|
+
this.config = config;
|
|
10
|
+
this.tokenCounter = new TokenCounter();
|
|
35
11
|
}
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
12
|
+
/**
|
|
13
|
+
* Validate chunks
|
|
14
|
+
* @param {Array} chunks - Array of chunks
|
|
15
|
+
* @returns {Promise<Array>} - Validated chunks
|
|
16
|
+
*/
|
|
17
|
+
async validate(chunks) {
|
|
18
|
+
const valid = [];
|
|
19
|
+
const errors = [];
|
|
20
|
+
for (const chunk of chunks) {
|
|
21
|
+
const validation = this._validateChunk(chunk);
|
|
22
|
+
if (validation.valid) {
|
|
23
|
+
valid.push(chunk);
|
|
24
|
+
}
|
|
25
|
+
else {
|
|
26
|
+
errors.push({
|
|
27
|
+
chunkIndex: chunk.index,
|
|
28
|
+
errors: validation.errors
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return valid;
|
|
45
33
|
}
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
34
|
+
_validateChunk(chunk) {
|
|
35
|
+
const errors = [];
|
|
36
|
+
if (this.config.rejectEmptyChunks && !chunk.text.trim()) {
|
|
37
|
+
errors.push('empty_chunk');
|
|
38
|
+
}
|
|
39
|
+
if (this.config.enforceMinLength) {
|
|
40
|
+
const tokens = this.tokenCounter.count(chunk.text);
|
|
41
|
+
if (tokens < this.config.minTokens) {
|
|
42
|
+
errors.push(`chunk_too_short: ${tokens} < ${this.config.minTokens}`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
if (this.config.enforceMaxLength) {
|
|
46
|
+
const tokens = this.tokenCounter.count(chunk.text);
|
|
47
|
+
if (tokens > this.config.hardMaxTokens) {
|
|
48
|
+
errors.push(`chunk_too_long: ${tokens} > ${this.config.hardMaxTokens}`);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return {
|
|
52
|
+
valid: errors.length === 0,
|
|
53
|
+
errors
|
|
54
|
+
};
|
|
52
55
|
}
|
|
53
|
-
|
|
54
|
-
if (this.config.enforceMaxLength) {
|
|
55
|
-
const tokens = this.tokenCounter.count(chunk.text);
|
|
56
|
-
if (tokens > this.config.hardMaxTokens) {
|
|
57
|
-
errors.push(`chunk_too_long: ${tokens} > ${this.config.hardMaxTokens}`);
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
return {
|
|
62
|
-
valid: errors.length === 0,
|
|
63
|
-
errors
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
56
|
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
2
|
+
/**
|
|
3
|
+
* S-MORA Layer 0 Scrubber - Stage 6: Validation
|
|
4
|
+
* @module smora/scrubber/stages/validator
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { TokenCounter } from '../utils/token-counter.js';
|
|
8
|
+
import { ValidationError } from '../errors/scrubber-error.js';
|
|
9
|
+
|
|
10
|
+
export class Validator {
|
|
11
|
+
constructor(config) {
|
|
12
|
+
this.config = config;
|
|
13
|
+
this.tokenCounter = new TokenCounter();
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Validate chunks
|
|
18
|
+
* @param {Array} chunks - Array of chunks
|
|
19
|
+
* @returns {Promise<Array>} - Validated chunks
|
|
20
|
+
*/
|
|
21
|
+
async validate(chunks) {
|
|
22
|
+
const valid = [];
|
|
23
|
+
const errors = [];
|
|
24
|
+
|
|
25
|
+
for (const chunk of chunks) {
|
|
26
|
+
const validation = this._validateChunk(chunk);
|
|
27
|
+
|
|
28
|
+
if (validation.valid) {
|
|
29
|
+
valid.push(chunk);
|
|
30
|
+
} else {
|
|
31
|
+
errors.push({
|
|
32
|
+
chunkIndex: chunk.index,
|
|
33
|
+
errors: validation.errors
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return valid;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
_validateChunk(chunk) {
|
|
42
|
+
const errors = [];
|
|
43
|
+
|
|
44
|
+
if (this.config.rejectEmptyChunks && !chunk.text.trim()) {
|
|
45
|
+
errors.push('empty_chunk');
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (this.config.enforceMinLength) {
|
|
49
|
+
const tokens = this.tokenCounter.count(chunk.text);
|
|
50
|
+
if (tokens < this.config.minTokens) {
|
|
51
|
+
errors.push(`chunk_too_short: ${tokens} < ${this.config.minTokens}`);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (this.config.enforceMaxLength) {
|
|
56
|
+
const tokens = this.tokenCounter.count(chunk.text);
|
|
57
|
+
if (tokens > this.config.hardMaxTokens) {
|
|
58
|
+
errors.push(`chunk_too_long: ${tokens} > ${this.config.hardMaxTokens}`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
valid: errors.length === 0,
|
|
64
|
+
errors
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* S-MORA Layer 0 Scrubber Telemetry Collection
|
|
3
|
+
* @module smora/scrubber/telemetry
|
|
4
|
+
*/
|
|
5
|
+
export declare class ScrubberTelemetry {
|
|
6
|
+
stats: any;
|
|
7
|
+
constructor();
|
|
8
|
+
recordStage(stage: any, duration: any, success?: boolean): void;
|
|
9
|
+
getStageStats(stage: any): {
|
|
10
|
+
count: any;
|
|
11
|
+
avgTime: number;
|
|
12
|
+
totalTime: any;
|
|
13
|
+
errors: any;
|
|
14
|
+
};
|
|
15
|
+
getSummary(): {
|
|
16
|
+
stages: any;
|
|
17
|
+
performance: {
|
|
18
|
+
structural: any;
|
|
19
|
+
semantic: any;
|
|
20
|
+
normalization: any;
|
|
21
|
+
chunking: any;
|
|
22
|
+
metadata: any;
|
|
23
|
+
validation: any;
|
|
24
|
+
total: unknown;
|
|
25
|
+
};
|
|
26
|
+
};
|
|
27
|
+
reset(): void;
|
|
28
|
+
assertPerformanceBudget(budget?: number): void;
|
|
29
|
+
}
|
|
@@ -1,66 +1,62 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
1
2
|
/**
|
|
2
3
|
* S-MORA Layer 0 Scrubber Telemetry Collection
|
|
3
4
|
* @module smora/scrubber/telemetry
|
|
4
5
|
*/
|
|
5
|
-
|
|
6
6
|
export class ScrubberTelemetry {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
recordStage(stage, duration, success = true) {
|
|
19
|
-
if (!this.stats[stage]) {
|
|
20
|
-
this.stats[stage] = { count: 0, totalTime: 0, errors: 0 };
|
|
7
|
+
stats;
|
|
8
|
+
constructor() {
|
|
9
|
+
this.stats = {
|
|
10
|
+
structural: { count: 0, totalTime: 0, errors: 0 },
|
|
11
|
+
semantic: { count: 0, totalTime: 0, errors: 0 },
|
|
12
|
+
normalization: { count: 0, totalTime: 0, errors: 0 },
|
|
13
|
+
chunking: { count: 0, totalTime: 0, errors: 0 },
|
|
14
|
+
metadata: { count: 0, totalTime: 0, errors: 0 },
|
|
15
|
+
validation: { count: 0, totalTime: 0, errors: 0 },
|
|
16
|
+
};
|
|
21
17
|
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
18
|
+
recordStage(stage, duration, success = true) {
|
|
19
|
+
if (!this.stats[stage]) {
|
|
20
|
+
this.stats[stage] = { count: 0, totalTime: 0, errors: 0 };
|
|
21
|
+
}
|
|
22
|
+
this.stats[stage].count++;
|
|
23
|
+
this.stats[stage].totalTime += duration;
|
|
24
|
+
if (!success) {
|
|
25
|
+
this.stats[stage].errors++;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
getStageStats(stage) {
|
|
29
|
+
const stats = this.stats[stage] || { count: 0, totalTime: 0, errors: 0 };
|
|
30
|
+
return {
|
|
31
|
+
count: stats.count,
|
|
32
|
+
avgTime: stats.count > 0 ? stats.totalTime / stats.count : 0,
|
|
33
|
+
totalTime: stats.totalTime,
|
|
34
|
+
errors: stats.errors,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
getSummary() {
|
|
38
|
+
return {
|
|
39
|
+
stages: this.stats,
|
|
40
|
+
performance: {
|
|
41
|
+
structural: this.stats.structural.totalTime,
|
|
42
|
+
semantic: this.stats.semantic.totalTime,
|
|
43
|
+
normalization: this.stats.normalization.totalTime,
|
|
44
|
+
chunking: this.stats.chunking.totalTime,
|
|
45
|
+
metadata: this.stats.metadata.totalTime,
|
|
46
|
+
validation: this.stats.validation.totalTime,
|
|
47
|
+
total: Object.values(this.stats).reduce((sum, s) => sum + s.totalTime, 0),
|
|
48
|
+
},
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
reset() {
|
|
52
|
+
Object.keys(this.stats).forEach((key) => {
|
|
53
|
+
this.stats[key] = { count: 0, totalTime: 0, errors: 0 };
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
assertPerformanceBudget(budget = 10) {
|
|
57
|
+
const summary = this.getSummary();
|
|
58
|
+
if (summary.performance.total > budget) {
|
|
59
|
+
throw new Error(`Performance budget exceeded: ${summary.performance.total}ms > ${budget}ms`);
|
|
60
|
+
}
|
|
64
61
|
}
|
|
65
|
-
}
|
|
66
62
|
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
2
|
+
/**
|
|
3
|
+
* S-MORA Layer 0 Scrubber Telemetry Collection
|
|
4
|
+
* @module smora/scrubber/telemetry
|
|
5
|
+
*/
|
|
6
|
+
export class ScrubberTelemetry {
|
|
7
|
+
stats;
|
|
8
|
+
constructor() {
|
|
9
|
+
this.stats = {
|
|
10
|
+
structural: { count: 0, totalTime: 0, errors: 0 },
|
|
11
|
+
semantic: { count: 0, totalTime: 0, errors: 0 },
|
|
12
|
+
normalization: { count: 0, totalTime: 0, errors: 0 },
|
|
13
|
+
chunking: { count: 0, totalTime: 0, errors: 0 },
|
|
14
|
+
metadata: { count: 0, totalTime: 0, errors: 0 },
|
|
15
|
+
validation: { count: 0, totalTime: 0, errors: 0 },
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
recordStage(stage, duration, success = true) {
|
|
19
|
+
if (!this.stats[stage]) {
|
|
20
|
+
this.stats[stage] = { count: 0, totalTime: 0, errors: 0 };
|
|
21
|
+
}
|
|
22
|
+
this.stats[stage].count++;
|
|
23
|
+
this.stats[stage].totalTime += duration;
|
|
24
|
+
if (!success) {
|
|
25
|
+
this.stats[stage].errors++;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
getStageStats(stage) {
|
|
29
|
+
const stats = this.stats[stage] || { count: 0, totalTime: 0, errors: 0 };
|
|
30
|
+
return {
|
|
31
|
+
count: stats.count,
|
|
32
|
+
avgTime: stats.count > 0 ? stats.totalTime / stats.count : 0,
|
|
33
|
+
totalTime: stats.totalTime,
|
|
34
|
+
errors: stats.errors,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
getSummary() {
|
|
38
|
+
return {
|
|
39
|
+
stages: this.stats,
|
|
40
|
+
performance: {
|
|
41
|
+
structural: this.stats.structural.totalTime,
|
|
42
|
+
semantic: this.stats.semantic.totalTime,
|
|
43
|
+
normalization: this.stats.normalization.totalTime,
|
|
44
|
+
chunking: this.stats.chunking.totalTime,
|
|
45
|
+
metadata: this.stats.metadata.totalTime,
|
|
46
|
+
validation: this.stats.validation.totalTime,
|
|
47
|
+
total: Object.values(this.stats).reduce((sum, s) => sum + s.totalTime, 0),
|
|
48
|
+
},
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
reset() {
|
|
52
|
+
Object.keys(this.stats).forEach((key) => {
|
|
53
|
+
this.stats[key] = { count: 0, totalTime: 0, errors: 0 };
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
assertPerformanceBudget(budget = 10) {
|
|
57
|
+
const summary = this.getSummary();
|
|
58
|
+
if (summary.performance.total > budget) {
|
|
59
|
+
throw new Error(`Performance budget exceeded: ${summary.performance.total}ms > ${budget}ms`);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export declare class HashUtil {
|
|
2
|
+
/**
|
|
3
|
+
* Hash content for deduplication
|
|
4
|
+
* @param {string} content - Content to hash
|
|
5
|
+
* @returns {string} - SHA256 hash
|
|
6
|
+
*/
|
|
7
|
+
hash(content: any): string;
|
|
8
|
+
/**
|
|
9
|
+
* Fast hash for caching (non-cryptographic)
|
|
10
|
+
* @param {string} content - Content to hash
|
|
11
|
+
* @returns {string} - Simple hash
|
|
12
|
+
*/
|
|
13
|
+
fastHash(content: any): string;
|
|
14
|
+
}
|
|
@@ -1,39 +1,37 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
1
2
|
/**
|
|
2
3
|
* Content Hashing Utilities
|
|
3
4
|
* @module smora/scrubber/utils/hash
|
|
4
5
|
*/
|
|
5
6
|
import crypto from 'crypto';
|
|
6
|
-
|
|
7
7
|
export class HashUtil {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
.digest('hex');
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
/**
|
|
26
|
-
* Fast hash for caching (non-cryptographic)
|
|
27
|
-
* @param {string} content - Content to hash
|
|
28
|
-
* @returns {string} - Simple hash
|
|
29
|
-
*/
|
|
30
|
-
fastHash(content) {
|
|
31
|
-
let hash = 0;
|
|
32
|
-
for (let i = 0; i < content.length; i++) {
|
|
33
|
-
const char = content.charCodeAt(i);
|
|
34
|
-
hash = ((hash << 5) - hash) + char;
|
|
35
|
-
hash = hash & hash;
|
|
8
|
+
/**
|
|
9
|
+
* Hash content for deduplication
|
|
10
|
+
* @param {string} content - Content to hash
|
|
11
|
+
* @returns {string} - SHA256 hash
|
|
12
|
+
*/
|
|
13
|
+
hash(content) {
|
|
14
|
+
const normalized = content
|
|
15
|
+
.toLowerCase()
|
|
16
|
+
.trim()
|
|
17
|
+
.replace(/\s+/g, ' ');
|
|
18
|
+
return crypto
|
|
19
|
+
.createHash('sha256')
|
|
20
|
+
.update(normalized)
|
|
21
|
+
.digest('hex');
|
|
36
22
|
}
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
}
|
|
23
|
+
/**
|
|
24
|
+
* Fast hash for caching (non-cryptographic)
|
|
25
|
+
* @param {string} content - Content to hash
|
|
26
|
+
* @returns {string} - Simple hash
|
|
27
|
+
*/
|
|
28
|
+
fastHash(content) {
|
|
29
|
+
let hash = 0;
|
|
30
|
+
for (let i = 0; i < content.length; i++) {
|
|
31
|
+
const char = content.charCodeAt(i);
|
|
32
|
+
hash = ((hash << 5) - hash) + char;
|
|
33
|
+
hash = hash & hash;
|
|
34
|
+
}
|
|
35
|
+
return hash.toString(36);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
2
|
+
/**
|
|
3
|
+
* Content Hashing Utilities
|
|
4
|
+
* @module smora/scrubber/utils/hash
|
|
5
|
+
*/
|
|
6
|
+
import crypto from 'crypto';
|
|
7
|
+
|
|
8
|
+
export class HashUtil {
|
|
9
|
+
/**
|
|
10
|
+
* Hash content for deduplication
|
|
11
|
+
* @param {string} content - Content to hash
|
|
12
|
+
* @returns {string} - SHA256 hash
|
|
13
|
+
*/
|
|
14
|
+
hash(content) {
|
|
15
|
+
const normalized = content
|
|
16
|
+
.toLowerCase()
|
|
17
|
+
.trim()
|
|
18
|
+
.replace(/\s+/g, ' ');
|
|
19
|
+
|
|
20
|
+
return crypto
|
|
21
|
+
.createHash('sha256')
|
|
22
|
+
.update(normalized)
|
|
23
|
+
.digest('hex');
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Fast hash for caching (non-cryptographic)
|
|
28
|
+
* @param {string} content - Content to hash
|
|
29
|
+
* @returns {string} - Simple hash
|
|
30
|
+
*/
|
|
31
|
+
fastHash(content) {
|
|
32
|
+
let hash = 0;
|
|
33
|
+
for (let i = 0; i < content.length; i++) {
|
|
34
|
+
const char = content.charCodeAt(i);
|
|
35
|
+
hash = ((hash << 5) - hash) + char;
|
|
36
|
+
hash = hash & hash;
|
|
37
|
+
}
|
|
38
|
+
return hash.toString(36);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML Parsing Utilities
|
|
3
|
+
* @module smora/scrubber/utils/html-parser
|
|
4
|
+
*/
|
|
5
|
+
export declare class HTMLParser {
|
|
6
|
+
/**
|
|
7
|
+
* Extract text content from HTML
|
|
8
|
+
* @param {string} html - HTML content
|
|
9
|
+
* @returns {string} - Extracted text
|
|
10
|
+
*/
|
|
11
|
+
parse(html: any): any;
|
|
12
|
+
_extractText(html: any): any;
|
|
13
|
+
_stripTags(html: any): any;
|
|
14
|
+
}
|
|
@@ -1,45 +1,38 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
1
2
|
/**
|
|
2
3
|
* HTML Parsing Utilities
|
|
3
4
|
* @module smora/scrubber/utils/html-parser
|
|
4
5
|
*/
|
|
5
|
-
|
|
6
6
|
export class HTMLParser {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
return text;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
_stripTags(html) {
|
|
43
|
-
return html.replace(/<[^>]+>/g, '');
|
|
44
|
-
}
|
|
7
|
+
/**
|
|
8
|
+
* Extract text content from HTML
|
|
9
|
+
* @param {string} html - HTML content
|
|
10
|
+
* @returns {string} - Extracted text
|
|
11
|
+
*/
|
|
12
|
+
parse(html) {
|
|
13
|
+
return this._extractText(html);
|
|
14
|
+
}
|
|
15
|
+
_extractText(html) {
|
|
16
|
+
// Remove scripts, styles, and comments
|
|
17
|
+
let text = html;
|
|
18
|
+
text = text.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
|
|
19
|
+
text = text.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
|
|
20
|
+
text = text.replace(/<!--[\s\S]*?-->/g, '');
|
|
21
|
+
// Convert headings to markdown
|
|
22
|
+
text = text.replace(/<h([1-6])([^>]*)>(.*?)<\/h\1>/gi, (match, level, attrs, content) => {
|
|
23
|
+
const headingLevel = parseInt(level);
|
|
24
|
+
const hashes = '#'.repeat(headingLevel);
|
|
25
|
+
return `${hashes} ${this._stripTags(content)}\n\n`;
|
|
26
|
+
});
|
|
27
|
+
// Convert paragraphs
|
|
28
|
+
text = text.replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n');
|
|
29
|
+
// Convert lists
|
|
30
|
+
text = text.replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n');
|
|
31
|
+
// Remove remaining tags
|
|
32
|
+
text = text.replace(/<[^>]+>/g, '');
|
|
33
|
+
return text;
|
|
34
|
+
}
|
|
35
|
+
_stripTags(html) {
|
|
36
|
+
return html.replace(/<[^>]+>/g, '');
|
|
37
|
+
}
|
|
45
38
|
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
2
|
+
/**
|
|
3
|
+
* HTML Parsing Utilities
|
|
4
|
+
* @module smora/scrubber/utils/html-parser
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export class HTMLParser {
|
|
8
|
+
/**
|
|
9
|
+
* Extract text content from HTML
|
|
10
|
+
* @param {string} html - HTML content
|
|
11
|
+
* @returns {string} - Extracted text
|
|
12
|
+
*/
|
|
13
|
+
parse(html) {
|
|
14
|
+
return this._extractText(html);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
_extractText(html) {
|
|
18
|
+
// Remove scripts, styles, and comments
|
|
19
|
+
let text = html;
|
|
20
|
+
text = text.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
|
|
21
|
+
text = text.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
|
|
22
|
+
text = text.replace(/<!--[\s\S]*?-->/g, '');
|
|
23
|
+
|
|
24
|
+
// Convert headings to markdown
|
|
25
|
+
text = text.replace(/<h([1-6])([^>]*)>(.*?)<\/h\1>/gi, (match, level, attrs, content) => {
|
|
26
|
+
const headingLevel = parseInt(level);
|
|
27
|
+
const hashes = '#'.repeat(headingLevel);
|
|
28
|
+
return `${hashes} ${this._stripTags(content)}\n\n`;
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
// Convert paragraphs
|
|
32
|
+
text = text.replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n');
|
|
33
|
+
|
|
34
|
+
// Convert lists
|
|
35
|
+
text = text.replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n');
|
|
36
|
+
|
|
37
|
+
// Remove remaining tags
|
|
38
|
+
text = text.replace(/<[^>]+>/g, '');
|
|
39
|
+
|
|
40
|
+
return text;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
_stripTags(html) {
|
|
44
|
+
return html.replace(/<[^>]+>/g, '');
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Boilerplate Pattern Matching Utilities
|
|
3
|
+
* @module smora/scrubber/utils/pattern-matcher
|
|
4
|
+
*/
|
|
5
|
+
export declare class PatternMatcher {
|
|
6
|
+
constructor();
|
|
7
|
+
_loadDefaultPatterns(): (string | RegExp)[];
|
|
8
|
+
getBoilerplatePatterns(): any;
|
|
9
|
+
addPattern(pattern: any): void;
|
|
10
|
+
removePattern(index: any): void;
|
|
11
|
+
isBoilerplate(text: any): any;
|
|
12
|
+
}
|