@yamo/memory-mesh 2.3.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/bin/memory_mesh.js +1 -1
  2. package/lib/llm/client.d.ts +111 -0
  3. package/lib/llm/client.js +299 -357
  4. package/lib/llm/client.ts +413 -0
  5. package/lib/llm/index.d.ts +17 -0
  6. package/lib/llm/index.js +15 -8
  7. package/lib/llm/index.ts +19 -0
  8. package/lib/memory/adapters/client.d.ts +183 -0
  9. package/lib/memory/adapters/client.js +518 -0
  10. package/lib/memory/adapters/client.ts +678 -0
  11. package/lib/memory/adapters/config.d.ts +137 -0
  12. package/lib/memory/adapters/config.js +189 -0
  13. package/lib/memory/adapters/config.ts +259 -0
  14. package/lib/memory/adapters/errors.d.ts +76 -0
  15. package/lib/memory/adapters/errors.js +128 -0
  16. package/lib/memory/adapters/errors.ts +166 -0
  17. package/lib/memory/context-manager.d.ts +44 -0
  18. package/lib/memory/context-manager.js +344 -0
  19. package/lib/memory/context-manager.ts +432 -0
  20. package/lib/memory/embeddings/factory.d.ts +59 -0
  21. package/lib/memory/embeddings/factory.js +148 -0
  22. package/lib/{embeddings/factory.js → memory/embeddings/factory.ts} +69 -28
  23. package/lib/memory/embeddings/index.d.ts +2 -0
  24. package/lib/memory/embeddings/index.js +2 -0
  25. package/lib/memory/embeddings/index.ts +2 -0
  26. package/lib/memory/embeddings/service.d.ts +164 -0
  27. package/lib/memory/embeddings/service.js +515 -0
  28. package/lib/{embeddings/service.js → memory/embeddings/service.ts} +223 -156
  29. package/lib/memory/index.d.ts +9 -0
  30. package/lib/memory/index.js +9 -1
  31. package/lib/memory/index.ts +20 -0
  32. package/lib/memory/memory-mesh.d.ts +274 -0
  33. package/lib/memory/memory-mesh.js +1469 -678
  34. package/lib/memory/memory-mesh.ts +1803 -0
  35. package/lib/memory/memory-translator.d.ts +19 -0
  36. package/lib/memory/memory-translator.js +125 -0
  37. package/lib/memory/memory-translator.ts +158 -0
  38. package/lib/memory/schema.d.ts +111 -0
  39. package/lib/memory/schema.js +183 -0
  40. package/lib/memory/schema.ts +267 -0
  41. package/lib/memory/scorer.d.ts +26 -0
  42. package/lib/memory/scorer.js +77 -0
  43. package/lib/memory/scorer.ts +95 -0
  44. package/lib/memory/search/index.d.ts +1 -0
  45. package/lib/memory/search/index.js +1 -0
  46. package/lib/memory/search/index.ts +1 -0
  47. package/lib/memory/search/keyword-search.d.ts +62 -0
  48. package/lib/memory/search/keyword-search.js +135 -0
  49. package/lib/{search/keyword-search.js → memory/search/keyword-search.ts} +66 -36
  50. package/lib/scrubber/config/defaults.d.ts +53 -0
  51. package/lib/scrubber/config/defaults.js +49 -57
  52. package/lib/scrubber/config/defaults.ts +117 -0
  53. package/lib/scrubber/index.d.ts +6 -0
  54. package/lib/scrubber/index.js +3 -23
  55. package/lib/scrubber/index.ts +7 -0
  56. package/lib/scrubber/scrubber.d.ts +61 -0
  57. package/lib/scrubber/scrubber.js +99 -121
  58. package/lib/scrubber/scrubber.ts +168 -0
  59. package/lib/scrubber/stages/chunker.d.ts +13 -0
  60. package/lib/scrubber/stages/metadata-annotator.d.ts +18 -0
  61. package/lib/scrubber/stages/normalizer.d.ts +13 -0
  62. package/lib/scrubber/stages/semantic-filter.d.ts +13 -0
  63. package/lib/scrubber/stages/structural-cleaner.d.ts +13 -0
  64. package/lib/scrubber/stages/validator.d.ts +18 -0
  65. package/lib/scrubber/telemetry.d.ts +36 -0
  66. package/lib/scrubber/telemetry.js +53 -58
  67. package/lib/scrubber/telemetry.ts +99 -0
  68. package/lib/utils/logger.d.ts +29 -0
  69. package/lib/utils/logger.js +64 -0
  70. package/lib/utils/logger.ts +85 -0
  71. package/lib/utils/skill-metadata.d.ts +32 -0
  72. package/lib/utils/skill-metadata.js +132 -0
  73. package/lib/utils/skill-metadata.ts +147 -0
  74. package/lib/yamo/emitter.d.ts +73 -0
  75. package/lib/yamo/emitter.js +78 -143
  76. package/lib/yamo/emitter.ts +249 -0
  77. package/lib/yamo/schema.d.ts +58 -0
  78. package/lib/yamo/schema.js +81 -108
  79. package/lib/yamo/schema.ts +165 -0
  80. package/package.json +11 -8
  81. package/index.d.ts +0 -111
  82. package/lib/embeddings/index.js +0 -2
  83. package/lib/index.js +0 -6
  84. package/lib/lancedb/client.js +0 -633
  85. package/lib/lancedb/config.js +0 -215
  86. package/lib/lancedb/errors.js +0 -144
  87. package/lib/lancedb/index.js +0 -4
  88. package/lib/lancedb/schema.js +0 -217
  89. package/lib/scrubber/errors/scrubber-error.js +0 -43
  90. package/lib/scrubber/stages/chunker.js +0 -103
  91. package/lib/scrubber/stages/metadata-annotator.js +0 -74
  92. package/lib/scrubber/stages/normalizer.js +0 -59
  93. package/lib/scrubber/stages/semantic-filter.js +0 -61
  94. package/lib/scrubber/stages/structural-cleaner.js +0 -82
  95. package/lib/scrubber/stages/validator.js +0 -66
  96. package/lib/scrubber/utils/hash.js +0 -39
  97. package/lib/scrubber/utils/html-parser.js +0 -45
  98. package/lib/scrubber/utils/pattern-matcher.js +0 -63
  99. package/lib/scrubber/utils/token-counter.js +0 -31
  100. package/lib/search/index.js +0 -1
  101. package/lib/utils/index.js +0 -1
  102. package/lib/yamo/index.js +0 -15
@@ -3,40 +3,64 @@
3
3
  * Provides basic TF-IDF style retrieval to complement vector search
4
4
  */
5
5
 
6
+ export interface KeywordDoc {
7
+ content: string;
8
+ metadata?: any;
9
+ }
10
+
11
+ export interface KeywordSearchResult extends KeywordDoc {
12
+ id: string;
13
+ score: number;
14
+ matches: string[];
15
+ }
16
+
17
+ export interface SearchOptions {
18
+ limit?: number;
19
+ }
20
+
6
21
  export class KeywordSearch {
22
+ index: Map<string, Map<string, number>>; // token -> Map<docId, tf>
23
+ docLengths: Map<string, number>; // docId -> length
24
+ idf: Map<string, number>; // token -> idf value
25
+ docs: Map<string, KeywordDoc>; // docId -> content (optional, for snippet)
26
+ isDirty: boolean;
27
+
7
28
  constructor() {
8
- this.index = new Map(); // token -> Map<docId, tf>
9
- this.docLengths = new Map(); // docId -> length
10
- this.idf = new Map(); // token -> idf value
11
- this.docs = new Map(); // docId -> content (optional, for snippet)
29
+ this.index = new Map();
30
+ this.docLengths = new Map();
31
+ this.idf = new Map();
32
+ this.docs = new Map();
12
33
  this.isDirty = false;
13
34
  }
14
35
 
15
36
  /**
16
37
  * Tokenize text into normalized terms
17
- * @param {string} text
38
+ * @param {string} text
18
39
  * @returns {string[]} tokens
19
40
  */
20
- tokenize(text) {
21
- if (!text) return [];
22
- return text.toLowerCase()
23
- .replace(/[^\w\s]/g, '') // Remove punctuation
41
+ tokenize(text: string): string[] {
42
+ if (!text) {
43
+ return [];
44
+ }
45
+ return text
46
+ .toLowerCase()
47
+ .replace(/[^\w\s]/g, "") // Remove punctuation
24
48
  .split(/\s+/)
25
- .filter(t => t.length > 2) // Filter stopwords/short
26
- .map(t => t.substring(0, 20)); // Truncate
49
+ .filter((t) => t.length > 2) // Filter stopwords/short
50
+ .map((t) => t.substring(0, 20)); // Truncate
27
51
  }
28
52
 
29
53
  /**
30
54
  * Add a document to the index
31
- * @param {string} id
32
- * @param {string} content
55
+ * @param {string} id
56
+ * @param {string} content
33
57
  * @param {Object} [metadata]
34
58
  */
35
- add(id, content, metadata = {}) {
59
+ add(id: string, content: string, metadata: any = {}): void {
36
60
  const tokens = this.tokenize(content);
37
- const termFreqs = new Map();
61
+ const termFreqs = new Map<string, number>();
38
62
 
39
- tokens.forEach(t => {
63
+ tokens.forEach((t) => {
40
64
  termFreqs.set(t, (termFreqs.get(t) || 0) + 1);
41
65
  });
42
66
 
@@ -48,7 +72,7 @@ export class KeywordSearch {
48
72
  if (!this.index.has(token)) {
49
73
  this.index.set(token, new Map());
50
74
  }
51
- this.index.get(token).set(id, freq);
75
+ this.index.get(token)!.set(id, freq);
52
76
  }
53
77
 
54
78
  this.isDirty = true;
@@ -56,12 +80,12 @@ export class KeywordSearch {
56
80
 
57
81
  /**
58
82
  * Remove a document
59
- * @param {string} id
83
+ * @param {string} id
60
84
  */
61
- remove(id) {
85
+ remove(id: string): void {
62
86
  this.docLengths.delete(id);
63
87
  this.docs.delete(id);
64
-
88
+
65
89
  // This is expensive O(Vocab), but okay for small scale
66
90
  for (const docMap of this.index.values()) {
67
91
  docMap.delete(id);
@@ -72,9 +96,11 @@ export class KeywordSearch {
72
96
  /**
73
97
  * Recalculate IDF scores
74
98
  */
75
- _computeStats() {
76
- if (!this.isDirty) return;
77
-
99
+ _computeStats(): void {
100
+ if (!this.isDirty) {
101
+ return;
102
+ }
103
+
78
104
  const N = this.docLengths.size;
79
105
  this.idf.clear();
80
106
 
@@ -94,18 +120,20 @@ export class KeywordSearch {
94
120
  * @param {Object} options
95
121
  * @returns {Array<{id: string, score: number, matches: string[], content: string, metadata: Object}>}
96
122
  */
97
- search(query, options = {}) {
123
+ search(query: string, options: SearchOptions = {}): KeywordSearchResult[] {
98
124
  this._computeStats();
99
-
125
+
100
126
  const tokens = this.tokenize(query);
101
- const scores = new Map(); // docId -> score
102
- const matches = new Map(); // docId -> matched tokens
127
+ const scores = new Map<string, number>(); // docId -> score
128
+ const matches = new Map<string, string[]>(); // docId -> matched tokens
103
129
 
104
130
  const limit = options.limit || 10;
105
131
 
106
132
  for (const token of tokens) {
107
133
  const docMap = this.index.get(token);
108
- if (!docMap) continue;
134
+ if (!docMap) {
135
+ continue;
136
+ }
109
137
 
110
138
  const idf = this.idf.get(token) || 0;
111
139
 
@@ -114,11 +142,13 @@ export class KeywordSearch {
114
142
  // Score = tf * idf * (normalization?)
115
143
  // Simple variant:
116
144
  const score = tf * idf;
117
-
145
+
118
146
  scores.set(docId, (scores.get(docId) || 0) + score);
119
-
120
- if (!matches.has(docId)) matches.set(docId, []);
121
- matches.get(docId).push(token);
147
+
148
+ if (!matches.has(docId)) {
149
+ matches.set(docId, []);
150
+ }
151
+ matches.get(docId)!.push(token);
122
152
  }
123
153
  }
124
154
 
@@ -128,7 +158,7 @@ export class KeywordSearch {
128
158
  id,
129
159
  score,
130
160
  matches: matches.get(id) || [],
131
- ...this.docs.get(id)
161
+ ...this.docs.get(id)!,
132
162
  }))
133
163
  .sort((a, b) => b.score - a.score)
134
164
  .slice(0, limit);
@@ -136,9 +166,9 @@ export class KeywordSearch {
136
166
 
137
167
  /**
138
168
  * Bulk load records
139
- * @param {Array} records
169
+ * @param {Array} records
140
170
  */
141
- load(records) {
142
- records.forEach(r => this.add(r.id, r.content, r.metadata));
171
+ load(records: { id: string; content: string; metadata?: any }[]): void {
172
+ records.forEach((r) => this.add(r.id, r.content, r.metadata));
143
173
  }
144
174
  }
@@ -0,0 +1,53 @@
1
+ /**
2
+ * S-MORA Layer 0 Scrubber Default Configuration
3
+ * @module smora/scrubber/config/defaults
4
+ */
5
+ export interface StructuralConfig {
6
+ stripHTML: boolean;
7
+ normalizeMarkdown: boolean;
8
+ collapseWhitespace: boolean;
9
+ removeScripts: boolean;
10
+ removeStyles: boolean;
11
+ }
12
+ export interface SemanticConfig {
13
+ removeDuplicates: boolean;
14
+ removeBoilerplate: boolean;
15
+ minSignalRatio: number;
16
+ boilerplatePatterns: string;
17
+ }
18
+ export interface NormalizationConfig {
19
+ normalizeHeadings: boolean;
20
+ normalizeLists: boolean;
21
+ normalizePunctuation: boolean;
22
+ }
23
+ export interface ChunkingConfig {
24
+ maxTokens: number;
25
+ minTokens: number;
26
+ hardMaxTokens: number;
27
+ splitOnHeadings: boolean;
28
+ preserveContext: boolean;
29
+ }
30
+ export interface MetadataConfig {
31
+ addSource: boolean;
32
+ addSection: boolean;
33
+ addHeadingPath: boolean;
34
+ addTimestamp: boolean;
35
+ addHash: boolean;
36
+ }
37
+ export interface ValidationConfig {
38
+ enforceMinLength: boolean;
39
+ enforceMaxLength: boolean;
40
+ rejectEmptyChunks: boolean;
41
+ }
42
+ export interface ScrubberConfig {
43
+ enabled: boolean;
44
+ structural: StructuralConfig;
45
+ semantic: SemanticConfig;
46
+ normalization: NormalizationConfig;
47
+ chunking: ChunkingConfig;
48
+ metadata: MetadataConfig;
49
+ validation: ValidationConfig;
50
+ logTransformations: boolean;
51
+ cachePatterns: boolean;
52
+ }
53
+ export declare const defaultScrubberConfig: ScrubberConfig;
@@ -2,61 +2,53 @@
2
2
  * S-MORA Layer 0 Scrubber Default Configuration
3
3
  * @module smora/scrubber/config/defaults
4
4
  */
5
-
6
5
  export const defaultScrubberConfig = {
7
- // Master switch
8
- enabled: false,
9
-
10
- // Stage 1: Structural Cleaning
11
- structural: {
12
- stripHTML: true,
13
- normalizeMarkdown: true,
14
- collapseWhitespace: true,
15
- removeScripts: true,
16
- removeStyles: true
17
- },
18
-
19
- // Stage 2: Semantic Filtering
20
- semantic: {
21
- removeDuplicates: true,
22
- removeBoilerplate: true,
23
- minSignalRatio: 0.3,
24
- boilerplatePatterns: 'default'
25
- },
26
-
27
- // Stage 3: Normalization
28
- normalization: {
29
- normalizeHeadings: true,
30
- normalizeLists: true,
31
- normalizePunctuation: true
32
- },
33
-
34
- // Stage 4: Chunking
35
- chunking: {
36
- maxTokens: 500,
37
- minTokens: 10,
38
- hardMaxTokens: 2000,
39
- splitOnHeadings: true,
40
- preserveContext: true
41
- },
42
-
43
- // Stage 5: Metadata Annotation
44
- metadata: {
45
- addSource: true,
46
- addSection: true,
47
- addHeadingPath: true,
48
- addTimestamp: true,
49
- addHash: true
50
- },
51
-
52
- // Stage 6: Validation
53
- validation: {
54
- enforceMinLength: true,
55
- enforceMaxLength: true,
56
- rejectEmptyChunks: true
57
- },
58
-
59
- // Performance
60
- logTransformations: false,
61
- cachePatterns: true
62
- };
6
+ // Master switch - enabled by default for security (PII/sensitive data protection)
7
+ enabled: true,
8
+ // Stage 1: Structural Cleaning
9
+ structural: {
10
+ stripHTML: true,
11
+ normalizeMarkdown: true,
12
+ collapseWhitespace: true,
13
+ removeScripts: true,
14
+ removeStyles: true,
15
+ },
16
+ // Stage 2: Semantic Filtering
17
+ semantic: {
18
+ removeDuplicates: true,
19
+ removeBoilerplate: true,
20
+ minSignalRatio: 0.3,
21
+ boilerplatePatterns: "default",
22
+ },
23
+ // Stage 3: Normalization
24
+ normalization: {
25
+ normalizeHeadings: true,
26
+ normalizeLists: true,
27
+ normalizePunctuation: true,
28
+ },
29
+ // Stage 4: Chunking
30
+ chunking: {
31
+ maxTokens: 500,
32
+ minTokens: 10,
33
+ hardMaxTokens: 2000,
34
+ splitOnHeadings: true,
35
+ preserveContext: true,
36
+ },
37
+ // Stage 5: Metadata Annotation
38
+ metadata: {
39
+ addSource: true,
40
+ addSection: true,
41
+ addHeadingPath: true,
42
+ addTimestamp: true,
43
+ addHash: true,
44
+ },
45
+ // Stage 6: Validation
46
+ validation: {
47
+ enforceMinLength: true,
48
+ enforceMaxLength: true,
49
+ rejectEmptyChunks: true,
50
+ },
51
+ // Performance
52
+ logTransformations: false,
53
+ cachePatterns: true,
54
+ };
@@ -0,0 +1,117 @@
1
+ /**
2
+ * S-MORA Layer 0 Scrubber Default Configuration
3
+ * @module smora/scrubber/config/defaults
4
+ */
5
+
6
+ export interface StructuralConfig {
7
+ stripHTML: boolean;
8
+ normalizeMarkdown: boolean;
9
+ collapseWhitespace: boolean;
10
+ removeScripts: boolean;
11
+ removeStyles: boolean;
12
+ }
13
+
14
+ export interface SemanticConfig {
15
+ removeDuplicates: boolean;
16
+ removeBoilerplate: boolean;
17
+ minSignalRatio: number;
18
+ boilerplatePatterns: string;
19
+ }
20
+
21
+ export interface NormalizationConfig {
22
+ normalizeHeadings: boolean;
23
+ normalizeLists: boolean;
24
+ normalizePunctuation: boolean;
25
+ }
26
+
27
+ export interface ChunkingConfig {
28
+ maxTokens: number;
29
+ minTokens: number;
30
+ hardMaxTokens: number;
31
+ splitOnHeadings: boolean;
32
+ preserveContext: boolean;
33
+ }
34
+
35
+ export interface MetadataConfig {
36
+ addSource: boolean;
37
+ addSection: boolean;
38
+ addHeadingPath: boolean;
39
+ addTimestamp: boolean;
40
+ addHash: boolean;
41
+ }
42
+
43
+ export interface ValidationConfig {
44
+ enforceMinLength: boolean;
45
+ enforceMaxLength: boolean;
46
+ rejectEmptyChunks: boolean;
47
+ }
48
+
49
+ export interface ScrubberConfig {
50
+ enabled: boolean;
51
+ structural: StructuralConfig;
52
+ semantic: SemanticConfig;
53
+ normalization: NormalizationConfig;
54
+ chunking: ChunkingConfig;
55
+ metadata: MetadataConfig;
56
+ validation: ValidationConfig;
57
+ logTransformations: boolean;
58
+ cachePatterns: boolean;
59
+ }
60
+
61
+ export const defaultScrubberConfig: ScrubberConfig = {
62
+ // Master switch - enabled by default for security (PII/sensitive data protection)
63
+ enabled: true,
64
+
65
+ // Stage 1: Structural Cleaning
66
+ structural: {
67
+ stripHTML: true,
68
+ normalizeMarkdown: true,
69
+ collapseWhitespace: true,
70
+ removeScripts: true,
71
+ removeStyles: true,
72
+ },
73
+
74
+ // Stage 2: Semantic Filtering
75
+ semantic: {
76
+ removeDuplicates: true,
77
+ removeBoilerplate: true,
78
+ minSignalRatio: 0.3,
79
+ boilerplatePatterns: "default",
80
+ },
81
+
82
+ // Stage 3: Normalization
83
+ normalization: {
84
+ normalizeHeadings: true,
85
+ normalizeLists: true,
86
+ normalizePunctuation: true,
87
+ },
88
+
89
+ // Stage 4: Chunking
90
+ chunking: {
91
+ maxTokens: 500,
92
+ minTokens: 10,
93
+ hardMaxTokens: 2000,
94
+ splitOnHeadings: true,
95
+ preserveContext: true,
96
+ },
97
+
98
+ // Stage 5: Metadata Annotation
99
+ metadata: {
100
+ addSource: true,
101
+ addSection: true,
102
+ addHeadingPath: true,
103
+ addTimestamp: true,
104
+ addHash: true,
105
+ },
106
+
107
+ // Stage 6: Validation
108
+ validation: {
109
+ enforceMinLength: true,
110
+ enforceMaxLength: true,
111
+ rejectEmptyChunks: true,
112
+ },
113
+
114
+ // Performance
115
+ logTransformations: false,
116
+ cachePatterns: true,
117
+ };
@@ -0,0 +1,6 @@
1
+ /**
2
+ * YAMO Scrubber Module
3
+ * PII and sensitive data sanitization
4
+ */
5
+ export { Scrubber } from "./scrubber.js";
6
+ export { ScrubberConfig } from "./config/defaults.js";
@@ -1,25 +1,5 @@
1
1
  /**
2
- * S-MORA Layer 0 Scrubber
3
- * Deterministic ingestion-time preprocessing layer
4
- * @module smora/scrubber
2
+ * YAMO Scrubber Module
3
+ * PII and sensitive data sanitization
5
4
  */
6
-
7
- export { defaultScrubberConfig } from './config/defaults.js';
8
- export {
9
- ScrubberError,
10
- StructuralCleaningError,
11
- ChunkingError,
12
- ValidationError
13
- } from './errors/scrubber-error.js';
14
- export { ScrubberTelemetry } from './telemetry.js';
15
- export { Scrubber } from './scrubber.js';
16
- export { HashUtil } from './utils/hash.js';
17
- export { TokenCounter } from './utils/token-counter.js';
18
- export { PatternMatcher } from './utils/pattern-matcher.js';
19
- export { HTMLParser } from './utils/html-parser.js';
20
- export { StructuralCleaner } from './stages/structural-cleaner.js';
21
- export { SemanticFilter } from './stages/semantic-filter.js';
22
- export { Normalizer } from './stages/normalizer.js';
23
- export { Chunker } from './stages/chunker.js';
24
- export { MetadataAnnotator } from './stages/metadata-annotator.js';
25
- export { Validator } from './stages/validator.js';
5
+ export { Scrubber } from "./scrubber.js";
@@ -0,0 +1,7 @@
1
+ /**
2
+ * YAMO Scrubber Module
3
+ * PII and sensitive data sanitization
4
+ */
5
+
6
+ export { Scrubber } from "./scrubber.js";
7
+ export { ScrubberConfig } from "./config/defaults.js";
@@ -0,0 +1,61 @@
1
+ /**
2
+ * S-MORA Layer 0 Scrubber - Main Orchestrator
3
+ * @module smora/scrubber/scrubber
4
+ */
5
+ import { StructuralCleaner } from "./stages/structural-cleaner.js";
6
+ import { SemanticFilter } from "./stages/semantic-filter.js";
7
+ import { Normalizer } from "./stages/normalizer.js";
8
+ import { Chunker } from "./stages/chunker.js";
9
+ import { MetadataAnnotator } from "./stages/metadata-annotator.js";
10
+ import { Validator } from "./stages/validator.js";
11
+ import { ScrubberTelemetry, TelemetrySummary, StageSummary } from "./telemetry.js";
12
+ import { ScrubberConfig } from "./config/defaults.js";
13
+ export interface ScrubberDocument {
14
+ content: string;
15
+ source: string;
16
+ type: string;
17
+ }
18
+ export interface Chunk {
19
+ text: string;
20
+ [key: string]: any;
21
+ }
22
+ export interface ScrubberResult {
23
+ chunks: Chunk[];
24
+ metadata: {
25
+ source: string;
26
+ type: string;
27
+ processingTimestamp: string;
28
+ [key: string]: any;
29
+ };
30
+ telemetry: Partial<Record<string, StageSummary>> & {
31
+ totalDuration?: number;
32
+ };
33
+ success?: boolean;
34
+ error?: string;
35
+ }
36
+ export declare class Scrubber {
37
+ config: ScrubberConfig;
38
+ stages: any;
39
+ telemetry: ScrubberTelemetry;
40
+ constructor(config?: Partial<ScrubberConfig>);
41
+ /**
42
+ * Main entry point - process a raw document
43
+ * @param {Object} document - { content: string, source: string, type: 'html'|'md'|'txt' }
44
+ * @returns {Promise<Object>} - { chunks: Array, metadata: Object, telemetry: Object }
45
+ */
46
+ process(document: ScrubberDocument): Promise<ScrubberResult>;
47
+ _executeStage<T>(stageName: string, stageFn: () => Promise<T> | T): Promise<T>;
48
+ _initializeStages(): {
49
+ structural: StructuralCleaner;
50
+ semantic: SemanticFilter;
51
+ normalizer: Normalizer;
52
+ chunker: Chunker;
53
+ metadata: MetadataAnnotator;
54
+ validator: Validator;
55
+ };
56
+ getMetrics(): TelemetrySummary;
57
+ healthCheck(): Promise<{
58
+ status: string;
59
+ }>;
60
+ }
61
+ export default Scrubber;