@yamo/memory-mesh 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +80 -0
  3. package/bin/memory_mesh.js +69 -0
  4. package/bin/scrubber.js +81 -0
  5. package/index.d.ts +111 -0
  6. package/lib/adapters/index.js +3 -0
  7. package/lib/embeddings/factory.js +150 -0
  8. package/lib/embeddings/index.js +2 -0
  9. package/lib/embeddings/service.js +586 -0
  10. package/lib/index.js +18 -0
  11. package/lib/lancedb/client.js +631 -0
  12. package/lib/lancedb/config.js +215 -0
  13. package/lib/lancedb/errors.js +144 -0
  14. package/lib/lancedb/index.js +4 -0
  15. package/lib/lancedb/schema.js +197 -0
  16. package/lib/memory/index.js +3 -0
  17. package/lib/memory/memory-context-manager.js +388 -0
  18. package/lib/memory/memory-mesh.js +910 -0
  19. package/lib/memory/memory-translator.js +130 -0
  20. package/lib/memory/migrate-memory.js +227 -0
  21. package/lib/memory/migrate-to-v2.js +120 -0
  22. package/lib/memory/scorer.js +85 -0
  23. package/lib/memory/vector-memory.js +364 -0
  24. package/lib/privacy/audit-logger.js +176 -0
  25. package/lib/privacy/dlp-redactor.js +72 -0
  26. package/lib/privacy/index.js +10 -0
  27. package/lib/reporting/skill-report-generator.js +283 -0
  28. package/lib/scrubber/.gitkeep +1 -0
  29. package/lib/scrubber/config/defaults.js +62 -0
  30. package/lib/scrubber/errors/scrubber-error.js +43 -0
  31. package/lib/scrubber/index.js +25 -0
  32. package/lib/scrubber/scrubber.js +130 -0
  33. package/lib/scrubber/stages/chunker.js +103 -0
  34. package/lib/scrubber/stages/metadata-annotator.js +74 -0
  35. package/lib/scrubber/stages/normalizer.js +59 -0
  36. package/lib/scrubber/stages/semantic-filter.js +61 -0
  37. package/lib/scrubber/stages/structural-cleaner.js +82 -0
  38. package/lib/scrubber/stages/validator.js +66 -0
  39. package/lib/scrubber/telemetry.js +66 -0
  40. package/lib/scrubber/utils/hash.js +39 -0
  41. package/lib/scrubber/utils/html-parser.js +45 -0
  42. package/lib/scrubber/utils/pattern-matcher.js +63 -0
  43. package/lib/scrubber/utils/token-counter.js +31 -0
  44. package/lib/search/filter.js +275 -0
  45. package/lib/search/hybrid.js +137 -0
  46. package/lib/search/index.js +3 -0
  47. package/lib/search/pattern-miner.js +160 -0
  48. package/lib/utils/error-sanitizer.js +84 -0
  49. package/lib/utils/handoff-validator.js +85 -0
  50. package/lib/utils/index.js +4 -0
  51. package/lib/utils/spinner.js +190 -0
  52. package/lib/utils/streaming-client.js +128 -0
  53. package/package.json +39 -0
  54. package/skills/SKILL.md +462 -0
  55. package/skills/skill-scrubber.yamo +41 -0
@@ -0,0 +1,74 @@
1
+ /**
2
+ * S-MORA Layer 0 Scrubber - Stage 5: Metadata Annotation
3
+ * @module smora/scrubber/stages/metadata-annotator
4
+ */
5
+
6
+ import { HashUtil } from '../utils/hash.js';
7
+
8
+ export class MetadataAnnotator {
9
+ constructor(config) {
10
+ this.config = config;
11
+ this.hashUtil = new HashUtil();
12
+ }
13
+
14
+ /**
15
+ * Add metadata to chunks
16
+ * @param {Array} chunks - Array of chunks
17
+ * @param {Object} document - Original document metadata
18
+ * @returns {Promise<Array>} - Annotated chunks
19
+ */
20
+ async annotate(chunks, document) {
21
+ const headingPath = [];
22
+
23
+ return chunks.map((chunk, index) => {
24
+ const metadata = {
25
+ ...chunk.metadata,
26
+ source: this.config.addSource ? document.source : undefined,
27
+ doc_type: this.config.addSource ? document.type : undefined,
28
+ section: this.config.addSection ? this._extractSection(chunk) : undefined,
29
+ heading_path: this.config.addHeadingPath ?
30
+ this._buildHeadingPath(chunk, headingPath) :
31
+ undefined,
32
+ ingestion_timestamp: this.config.addTimestamp ?
33
+ new Date().toISOString() :
34
+ undefined,
35
+ hash: this.config.addHash ?
36
+ this.hashUtil.hash(chunk.text) :
37
+ undefined
38
+ };
39
+
40
+ return {
41
+ ...chunk,
42
+ metadata: Object.fromEntries(
43
+ Object.entries(metadata).filter(([_, v]) => v !== undefined)
44
+ )
45
+ };
46
+ });
47
+ }
48
+
49
+ _extractSection(chunk) {
50
+ if (chunk.metadata.heading) {
51
+ return chunk.metadata.heading;
52
+ }
53
+ return 'unnamed-section';
54
+ }
55
+
56
+ _buildHeadingPath(chunk, currentPath) {
57
+ const heading = chunk.metadata.heading;
58
+
59
+ if (heading && heading !== currentPath[currentPath.length - 1]) {
60
+ if (currentPath.length === 0 || this._isSubHeading(heading, currentPath[currentPath.length - 1])) {
61
+ currentPath.push(heading);
62
+ } else {
63
+ currentPath.length = 0;
64
+ currentPath.push(heading);
65
+ }
66
+ }
67
+
68
+ return [...currentPath];
69
+ }
70
+
71
+ _isSubHeading(heading1, heading2) {
72
+ return heading1.length > heading2.length;
73
+ }
74
+ }
@@ -0,0 +1,59 @@
1
+ /**
2
+ * S-MORA Layer 0 Scrubber - Stage 3: Normalization
3
+ * @module smora/scrubber/stages/normalizer
4
+ */
5
+
6
+ export class Normalizer {
7
+ constructor(config) {
8
+ this.config = config;
9
+ }
10
+
11
+ /**
12
+ * Normalize content structure
13
+ * @param {string} content - Filtered content
14
+ * @returns {Promise<string>} - Normalized content
15
+ */
16
+ async normalize(content) {
17
+ let normalized = content;
18
+
19
+ if (this.config.normalizeHeadings) {
20
+ normalized = this._normalizeHeadings(normalized);
21
+ }
22
+
23
+ if (this.config.normalizeLists) {
24
+ normalized = this._normalizeLists(normalized);
25
+ }
26
+
27
+ if (this.config.normalizePunctuation) {
28
+ normalized = this._normalizePunctuation(normalized);
29
+ }
30
+
31
+ return normalized;
32
+ }
33
+
34
+ _normalizeHeadings(content) {
35
+ let normalized = content.replace(/(#{1,6})([^\s#])/g, '$1 $2');
36
+ normalized = normalized.replace(/^\s*(#{1,6})/gm, '$1');
37
+ normalized = normalized.replace(/#{7,}/g, '######');
38
+ return normalized;
39
+ }
40
+
41
+ _normalizeLists(content) {
42
+ let normalized = content.replace(/(\s*)([-*+])(\S)/g, '$1$2 $3');
43
+ normalized = normalized.replace(/(\s*)(\d+)(\S)/g, (match, ws, num, char) => {
44
+ if (!/\.\s/.test(match.substring(ws.length + num.length))) {
45
+ return `${ws}${num}. ${char}`;
46
+ }
47
+ return match;
48
+ });
49
+ return normalized;
50
+ }
51
+
52
+ _normalizePunctuation(content) {
53
+ // Remove quotes (both straight and curly)
54
+ let normalized = content.replace(/["'""''`]/g, '');
55
+ normalized = normalized.replace(/ +/g, ' ');
56
+ normalized = normalized.replace(/\.{4,}/g, '...');
57
+ return normalized;
58
+ }
59
+ }
@@ -0,0 +1,61 @@
1
+ /**
2
+ * S-MORA Layer 0 Scrubber - Stage 2: Semantic Filtering
3
+ * @module smora/scrubber/stages/semantic-filter
4
+ */
5
+
6
+ import { PatternMatcher } from '../utils/pattern-matcher.js';
7
+ import { HashUtil } from '../utils/hash.js';
8
+
9
+ export class SemanticFilter {
10
+ constructor(config) {
11
+ this.config = config;
12
+ this.patternMatcher = new PatternMatcher();
13
+ this.hashUtil = new HashUtil();
14
+ }
15
+
16
+ /**
17
+ * Filter semantically empty content
18
+ * @param {string} content - Cleaned content
19
+ * @returns {Promise<string>} - Filtered content
20
+ */
21
+ async filter(content) {
22
+ const paragraphs = content.split(/\n\n+/);
23
+
24
+ let filtered = paragraphs.filter(p => !this._isBoilerplate(p));
25
+ filtered = await this._removeDuplicates(filtered);
26
+ filtered = filtered.filter(p => this._hasSignal(p));
27
+
28
+ return filtered.join('\n\n');
29
+ }
30
+
31
+ _isBoilerplate(paragraph) {
32
+ return this.patternMatcher.isBoilerplate(paragraph);
33
+ }
34
+
35
+ async _removeDuplicates(paragraphs) {
36
+ if (!this.config.removeDuplicates) return paragraphs;
37
+
38
+ const seen = new Set();
39
+ const unique = [];
40
+
41
+ for (const para of paragraphs) {
42
+ const hash = this.hashUtil.hash(para);
43
+ if (!seen.has(hash)) {
44
+ seen.add(hash);
45
+ unique.push(para);
46
+ }
47
+ }
48
+
49
+ return unique;
50
+ }
51
+
52
+ _hasSignal(paragraph) {
53
+ const text = paragraph.trim();
54
+ if (text.length < 10) return false;
55
+
56
+ const signalChars = text.replace(/[^a-zA-Z0-9]/g, '').length;
57
+ const ratio = signalChars / text.length;
58
+
59
+ return ratio >= (this.config.minSignalRatio || 0.3);
60
+ }
61
+ }
@@ -0,0 +1,82 @@
1
+ /**
2
+ * S-MORA Layer 0 Scrubber - Stage 1: Structural Cleaning
3
+ * @module smora/scrubber/stages/structural-cleaner
4
+ */
5
+
6
+ import { HTMLParser } from '../utils/html-parser.js';
7
+ import { StructuralCleaningError, ScrubberError } from '../errors/scrubber-error.js';
8
+
9
+ export class StructuralCleaner {
10
+ constructor(config) {
11
+ this.config = config;
12
+ this.htmlParser = new HTMLParser();
13
+ }
14
+
15
+ /**
16
+ * Clean document structure
17
+ * @param {string} content - Raw document content
18
+ * @returns {Promise<string>} - Cleaned content
19
+ */
20
+ async clean(content) {
21
+ try {
22
+ const type = this._detectType(content);
23
+ let cleaned = content;
24
+
25
+ if (type === 'html') {
26
+ cleaned = await this._cleanHTML(cleaned);
27
+ // HTML may have markdown headings, normalize them
28
+ cleaned = await this._cleanMarkdown(cleaned);
29
+ } else if (type === 'markdown') {
30
+ cleaned = await this._cleanMarkdown(cleaned);
31
+ }
32
+
33
+ cleaned = this._collapseWhitespace(cleaned);
34
+ cleaned = this._normalizeLineBreaks(cleaned);
35
+
36
+ return cleaned;
37
+ } catch (error) {
38
+ const message = error instanceof Error ? error.message : String(error);
39
+ throw new ScrubberError(
40
+ `Failed to clean content: ${message}`,
41
+ { stage: 'structural-cleaner', originalError: error }
42
+ );
43
+ }
44
+ }
45
+
46
+ _detectType(content) {
47
+ if (content.trim().startsWith('<')) return 'html';
48
+ if (/^#{1,6}\s/.test(content) || /^#{1,6}[A-Za-z]/.test(content)) return 'markdown';
49
+ return 'text';
50
+ }
51
+
52
+ async _cleanHTML(content) {
53
+ return this.htmlParser.parse(content);
54
+ }
55
+
56
+ async _cleanMarkdown(content) {
57
+ let cleaned = content;
58
+ // Add space after heading markers when missing
59
+ cleaned = cleaned.replace(/(#{1,6})([^\s#])/g, '$1 $2');
60
+ // Add space after list markers when missing
61
+ cleaned = cleaned.replace(/(\s*)([-*+])(\S)/g, '$1$2 $3');
62
+ // Add space after numbered list markers when missing
63
+ cleaned = cleaned.replace(/(\s*)(\d+)(\S)/g, (match, ws, num, char) => {
64
+ // Only if it looks like a numbered list (digit followed by non-dot, non-space)
65
+ if (!/\.\s/.test(match.substring(ws.length + num.length))) {
66
+ return `${ws}${num}. ${char}`;
67
+ }
68
+ return match;
69
+ });
70
+ return cleaned;
71
+ }
72
+
73
+ _collapseWhitespace(content) {
74
+ let cleaned = content.replace(/[ \t]+/g, ' ');
75
+ cleaned = cleaned.replace(/\n{3,}/g, '\n\n');
76
+ return cleaned;
77
+ }
78
+
79
+ _normalizeLineBreaks(content) {
80
+ return content.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
81
+ }
82
+ }
@@ -0,0 +1,66 @@
1
+ /**
2
+ * S-MORA Layer 0 Scrubber - Stage 6: Validation
3
+ * @module smora/scrubber/stages/validator
4
+ */
5
+
6
+ import { TokenCounter } from '../utils/token-counter.js';
7
+ import { ValidationError } from '../errors/scrubber-error.js';
8
+
9
+ export class Validator {
10
+ constructor(config) {
11
+ this.config = config;
12
+ this.tokenCounter = new TokenCounter();
13
+ }
14
+
15
+ /**
16
+ * Validate chunks
17
+ * @param {Array} chunks - Array of chunks
18
+ * @returns {Promise<Array>} - Validated chunks
19
+ */
20
+ async validate(chunks) {
21
+ const valid = [];
22
+ const errors = [];
23
+
24
+ for (const chunk of chunks) {
25
+ const validation = this._validateChunk(chunk);
26
+
27
+ if (validation.valid) {
28
+ valid.push(chunk);
29
+ } else {
30
+ errors.push({
31
+ chunkIndex: chunk.index,
32
+ errors: validation.errors
33
+ });
34
+ }
35
+ }
36
+
37
+ return valid;
38
+ }
39
+
40
+ _validateChunk(chunk) {
41
+ const errors = [];
42
+
43
+ if (this.config.rejectEmptyChunks && !chunk.text.trim()) {
44
+ errors.push('empty_chunk');
45
+ }
46
+
47
+ if (this.config.enforceMinLength) {
48
+ const tokens = this.tokenCounter.count(chunk.text);
49
+ if (tokens < this.config.minTokens) {
50
+ errors.push(`chunk_too_short: ${tokens} < ${this.config.minTokens}`);
51
+ }
52
+ }
53
+
54
+ if (this.config.enforceMaxLength) {
55
+ const tokens = this.tokenCounter.count(chunk.text);
56
+ if (tokens > this.config.hardMaxTokens) {
57
+ errors.push(`chunk_too_long: ${tokens} > ${this.config.hardMaxTokens}`);
58
+ }
59
+ }
60
+
61
+ return {
62
+ valid: errors.length === 0,
63
+ errors
64
+ };
65
+ }
66
+ }
@@ -0,0 +1,66 @@
1
+ /**
2
+ * S-MORA Layer 0 Scrubber Telemetry Collection
3
+ * @module smora/scrubber/telemetry
4
+ */
5
+
6
+ export class ScrubberTelemetry {
7
+ constructor() {
8
+ this.stats = {
9
+ structural: { count: 0, totalTime: 0, errors: 0 },
10
+ semantic: { count: 0, totalTime: 0, errors: 0 },
11
+ normalization: { count: 0, totalTime: 0, errors: 0 },
12
+ chunking: { count: 0, totalTime: 0, errors: 0 },
13
+ metadata: { count: 0, totalTime: 0, errors: 0 },
14
+ validation: { count: 0, totalTime: 0, errors: 0 }
15
+ };
16
+ }
17
+
18
+ recordStage(stage, duration, success = true) {
19
+ if (!this.stats[stage]) {
20
+ this.stats[stage] = { count: 0, totalTime: 0, errors: 0 };
21
+ }
22
+ this.stats[stage].count++;
23
+ this.stats[stage].totalTime += duration;
24
+ if (!success) this.stats[stage].errors++;
25
+ }
26
+
27
+ getStageStats(stage) {
28
+ const stats = this.stats[stage];
29
+ return {
30
+ count: stats.count,
31
+ avgTime: stats.count > 0 ? stats.totalTime / stats.count : 0,
32
+ totalTime: stats.totalTime,
33
+ errors: stats.errors
34
+ };
35
+ }
36
+
37
+ getSummary() {
38
+ return {
39
+ stages: this.stats,
40
+ performance: {
41
+ structural: this.stats.structural.totalTime,
42
+ semantic: this.stats.semantic.totalTime,
43
+ normalization: this.stats.normalization.totalTime,
44
+ chunking: this.stats.chunking.totalTime,
45
+ metadata: this.stats.metadata.totalTime,
46
+ validation: this.stats.validation.totalTime,
47
+ total: Object.values(this.stats).reduce((sum, s) => sum + s.totalTime, 0)
48
+ }
49
+ };
50
+ }
51
+
52
+ reset() {
53
+ Object.keys(this.stats).forEach(key => {
54
+ this.stats[key] = { count: 0, totalTime: 0, errors: 0 };
55
+ });
56
+ }
57
+
58
+ assertPerformanceBudget(budget = 10) {
59
+ const summary = this.getSummary();
60
+ if (summary.performance.total > budget) {
61
+ throw new Error(
62
+ `Performance budget exceeded: ${summary.performance.total}ms > ${budget}ms`
63
+ );
64
+ }
65
+ }
66
+ }
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Content Hashing Utilities
3
+ * @module smora/scrubber/utils/hash
4
+ */
5
+ import crypto from 'crypto';
6
+
7
+ export class HashUtil {
8
+ /**
9
+ * Hash content for deduplication
10
+ * @param {string} content - Content to hash
11
+ * @returns {string} - SHA256 hash
12
+ */
13
+ hash(content) {
14
+ const normalized = content
15
+ .toLowerCase()
16
+ .trim()
17
+ .replace(/\s+/g, ' ');
18
+
19
+ return crypto
20
+ .createHash('sha256')
21
+ .update(normalized)
22
+ .digest('hex');
23
+ }
24
+
25
+ /**
26
+ * Fast hash for caching (non-cryptographic)
27
+ * @param {string} content - Content to hash
28
+ * @returns {string} - Simple hash
29
+ */
30
+ fastHash(content) {
31
+ let hash = 0;
32
+ for (let i = 0; i < content.length; i++) {
33
+ const char = content.charCodeAt(i);
34
+ hash = ((hash << 5) - hash) + char;
35
+ hash = hash & hash;
36
+ }
37
+ return hash.toString(36);
38
+ }
39
+ }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * HTML Parsing Utilities
3
+ * @module smora/scrubber/utils/html-parser
4
+ */
5
+
6
+ export class HTMLParser {
7
+ /**
8
+ * Extract text content from HTML
9
+ * @param {string} html - HTML content
10
+ * @returns {string} - Extracted text
11
+ */
12
+ parse(html) {
13
+ return this._extractText(html);
14
+ }
15
+
16
+ _extractText(html) {
17
+ // Remove scripts, styles, and comments
18
+ let text = html;
19
+ text = text.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
20
+ text = text.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
21
+ text = text.replace(/<!--[\s\S]*?-->/g, '');
22
+
23
+ // Convert headings to markdown
24
+ text = text.replace(/<h([1-6])([^>]*)>(.*?)<\/h\1>/gi, (match, level, attrs, content) => {
25
+ const headingLevel = parseInt(level);
26
+ const hashes = '#'.repeat(headingLevel);
27
+ return `${hashes} ${this._stripTags(content)}\n\n`;
28
+ });
29
+
30
+ // Convert paragraphs
31
+ text = text.replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n');
32
+
33
+ // Convert lists
34
+ text = text.replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n');
35
+
36
+ // Remove remaining tags
37
+ text = text.replace(/<[^>]+>/g, '');
38
+
39
+ return text;
40
+ }
41
+
42
+ _stripTags(html) {
43
+ return html.replace(/<[^>]+>/g, '');
44
+ }
45
+ }
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Boilerplate Pattern Matching Utilities
3
+ * @module smora/scrubber/utils/pattern-matcher
4
+ */
5
+
6
+ export class PatternMatcher {
7
+ constructor() {
8
+ this.boilerplatePatterns = this._loadDefaultPatterns();
9
+ }
10
+
11
+ _loadDefaultPatterns() {
12
+ return [
13
+ // Legal/Footer
14
+ /©\s*\d{4}/i,
15
+ /all rights reserved/i,
16
+ /copyright\s+\d{4}/i,
17
+
18
+ // Navigation
19
+ /^home\s*\|/i,
20
+ /^navigation\s*:|menu\s*:/i,
21
+ /sidebar/i,
22
+
23
+ // Meta
24
+ /^last\s+updated?\s*:/i,
25
+ /cookie\s+policy/i,
26
+ /privacy\s+policy/i,
27
+
28
+ // Auto-generated
29
+ /^table\s+of\s+contents?$/i,
30
+ /^contents\s*$/i,
31
+ /jump\s+to\s+(section|navigation)/i,
32
+
33
+ // Strings
34
+ 'home | docs | contact',
35
+ 'skip to main content',
36
+ 'this site uses cookies'
37
+ ];
38
+ }
39
+
40
+ getBoilerplatePatterns() {
41
+ return this.boilerplatePatterns;
42
+ }
43
+
44
+ addPattern(pattern) {
45
+ this.boilerplatePatterns.push(pattern);
46
+ }
47
+
48
+ removePattern(index) {
49
+ if (index >= 0 && index < this.boilerplatePatterns.length) {
50
+ this.boilerplatePatterns.splice(index, 1);
51
+ }
52
+ }
53
+
54
+ isBoilerplate(text) {
55
+ const lowerText = text.toLowerCase().trim();
56
+ return this.boilerplatePatterns.some(pattern => {
57
+ if (pattern instanceof RegExp) {
58
+ return pattern.test(lowerText);
59
+ }
60
+ return lowerText.includes(pattern);
61
+ });
62
+ }
63
+ }
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Token Counting Utilities
3
+ * @module smora/scrubber/utils/token-counter
4
+ */
5
+
6
+ export class TokenCounter {
7
+ /**
8
+ * Estimate token count (approximation)
9
+ * @param {string} text - Text to count
10
+ * @returns {number} - Estimated token count
11
+ */
12
+ count(text) {
13
+ // Simple approximation: ~4 characters per token
14
+ return Math.ceil(text.length / 4);
15
+ }
16
+
17
+ /**
18
+ * More accurate token count (slower)
19
+ * @param {string} text - Text to count
20
+ * @returns {number} - More accurate token count
21
+ */
22
+ countAccurate(text) {
23
+ const words = text.split(/\s+/).filter(w => w.length > 0);
24
+ let tokens = words.length;
25
+ const punctuationMatches = text.match(/[.,!?;:]/g);
26
+ if (punctuationMatches) {
27
+ tokens += punctuationMatches.length;
28
+ }
29
+ return tokens;
30
+ }
31
+ }