@yamo/memory-mesh 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +80 -0
  3. package/bin/memory_mesh.js +69 -0
  4. package/bin/scrubber.js +81 -0
  5. package/index.d.ts +111 -0
  6. package/lib/adapters/index.js +3 -0
  7. package/lib/embeddings/factory.js +150 -0
  8. package/lib/embeddings/index.js +2 -0
  9. package/lib/embeddings/service.js +586 -0
  10. package/lib/index.js +18 -0
  11. package/lib/lancedb/client.js +631 -0
  12. package/lib/lancedb/config.js +215 -0
  13. package/lib/lancedb/errors.js +144 -0
  14. package/lib/lancedb/index.js +4 -0
  15. package/lib/lancedb/schema.js +197 -0
  16. package/lib/memory/index.js +3 -0
  17. package/lib/memory/memory-context-manager.js +388 -0
  18. package/lib/memory/memory-mesh.js +910 -0
  19. package/lib/memory/memory-translator.js +130 -0
  20. package/lib/memory/migrate-memory.js +227 -0
  21. package/lib/memory/migrate-to-v2.js +120 -0
  22. package/lib/memory/scorer.js +85 -0
  23. package/lib/memory/vector-memory.js +364 -0
  24. package/lib/privacy/audit-logger.js +176 -0
  25. package/lib/privacy/dlp-redactor.js +72 -0
  26. package/lib/privacy/index.js +10 -0
  27. package/lib/reporting/skill-report-generator.js +283 -0
  28. package/lib/scrubber/.gitkeep +1 -0
  29. package/lib/scrubber/config/defaults.js +62 -0
  30. package/lib/scrubber/errors/scrubber-error.js +43 -0
  31. package/lib/scrubber/index.js +25 -0
  32. package/lib/scrubber/scrubber.js +130 -0
  33. package/lib/scrubber/stages/chunker.js +103 -0
  34. package/lib/scrubber/stages/metadata-annotator.js +74 -0
  35. package/lib/scrubber/stages/normalizer.js +59 -0
  36. package/lib/scrubber/stages/semantic-filter.js +61 -0
  37. package/lib/scrubber/stages/structural-cleaner.js +82 -0
  38. package/lib/scrubber/stages/validator.js +66 -0
  39. package/lib/scrubber/telemetry.js +66 -0
  40. package/lib/scrubber/utils/hash.js +39 -0
  41. package/lib/scrubber/utils/html-parser.js +45 -0
  42. package/lib/scrubber/utils/pattern-matcher.js +63 -0
  43. package/lib/scrubber/utils/token-counter.js +31 -0
  44. package/lib/search/filter.js +275 -0
  45. package/lib/search/hybrid.js +137 -0
  46. package/lib/search/index.js +3 -0
  47. package/lib/search/pattern-miner.js +160 -0
  48. package/lib/utils/error-sanitizer.js +84 -0
  49. package/lib/utils/handoff-validator.js +85 -0
  50. package/lib/utils/index.js +4 -0
  51. package/lib/utils/spinner.js +190 -0
  52. package/lib/utils/streaming-client.js +128 -0
  53. package/package.json +39 -0
  54. package/skills/SKILL.md +462 -0
  55. package/skills/skill-scrubber.yamo +41 -0
@@ -0,0 +1,283 @@
1
+ import { promises as fs } from 'fs';
2
+ import path from 'path';
3
+
4
+ /**
5
+ * Skill Execution Report Generator
6
+ *
7
+ * Generates JSON reports for skill executions, capturing:
8
+ * - Skill metadata (name, version, type)
9
+ * - Execution details (duration, status, provider)
10
+ * - Input/output metrics
11
+ * - Quality indicators
12
+ */
13
+ export class SkillReportGenerator {
14
+ constructor(options = {}) {
15
+ this.reportsDir = options.reportsDir || this._getReportsDir();
16
+ this.version = '1.0.0';
17
+ }
18
+
19
+ _getReportsDir() {
20
+ // @ts-ignore
21
+ const home = process.env.HOME || process.env.USERPROFILE || process.cwd();
22
+ return path.join(home, '.yamo', 'reports');
23
+ }
24
+
25
+ /**
26
+ * Generate a unique report ID
27
+ * @param {string} sessionId - Session identifier
28
+ * @returns {string} Report ID
29
+ */
30
+ _generateReportId(sessionId) {
31
+ const timestamp = Date.now();
32
+ const shortSession = sessionId ? sessionId.substring(0, 8) : 'unknown';
33
+ return `skill_execution_${timestamp}_${shortSession}`;
34
+ }
35
+
36
+ /**
37
+ * Extract skill type from file path or name
38
+ * @param {string} skillName - Name of the skill
39
+ * @param {string[]} contextFiles - Context files used
40
+ * @returns {string} Skill type
41
+ */
42
+ _getSkillType(skillName, contextFiles = []) {
43
+ if (skillName === 'LLMClient') return 'direct';
44
+
45
+ const skillFile = contextFiles.find(f => f.endsWith('.yamo'));
46
+ if (!skillFile) return 'unknown';
47
+
48
+ if (skillFile.includes('utility/')) return 'utility';
49
+ if (skillFile.includes('generator/')) return 'generator';
50
+ if (skillFile.includes('protocol/')) return 'protocol';
51
+ if (skillFile.includes('system-skills/')) return 'system';
52
+
53
+ return 'custom';
54
+ }
55
+
56
+ /**
57
+ * Parse skill metadata from .yamo file path
58
+ * @param {string[]} contextFiles - Context files
59
+ * @returns {Object} Skill metadata
60
+ */
61
+ _parseSkillMetadata(contextFiles = []) {
62
+ const skillFile = contextFiles.find(f => f.endsWith('.yamo'));
63
+ if (!skillFile) {
64
+ return { version: null, description: null };
65
+ }
66
+
67
+ // Return basic info - full parsing would require reading the file
68
+ return {
69
+ version: '1.0.0', // Default version
70
+ file: skillFile
71
+ };
72
+ }
73
+
74
+ /**
75
+ * Create a report object from execution data
76
+ * @param {Object} executionData - Data from skill execution
77
+ * @returns {Object} Report object
78
+ */
79
+ createReport(executionData) {
80
+ const {
81
+ skill,
82
+ sessionId,
83
+ duration,
84
+ provider,
85
+ model,
86
+ promptLength,
87
+ responseLength,
88
+ contextFiles = [],
89
+ parameters = {},
90
+ status = 'success',
91
+ error = null,
92
+ artifactsCreated = [],
93
+ memoryCaptured = false
94
+ } = executionData;
95
+
96
+ const reportId = this._generateReportId(sessionId);
97
+ const skillMeta = this._parseSkillMetadata(contextFiles);
98
+ const skillType = this._getSkillType(skill, contextFiles);
99
+
100
+ return {
101
+ report_id: reportId,
102
+ timestamp: new Date().toISOString(),
103
+ skill: {
104
+ name: skill,
105
+ version: skillMeta.version,
106
+ type: skillType,
107
+ file: skillMeta.file || null
108
+ },
109
+ execution: {
110
+ session_id: sessionId,
111
+ duration_ms: Math.round(duration),
112
+ status,
113
+ error: error ? String(error) : null,
114
+ provider,
115
+ model
116
+ },
117
+ input: {
118
+ prompt_length: promptLength,
119
+ context_files: contextFiles,
120
+ parameters
121
+ },
122
+ output: {
123
+ response_length: responseLength,
124
+ artifacts_created: artifactsCreated,
125
+ tokens_used: null // Could be populated if provider returns token count
126
+ },
127
+ quality: {
128
+ memory_captured: memoryCaptured,
129
+ artifacts_saved: artifactsCreated.length > 0
130
+ },
131
+ meta: {
132
+ generator: 'yamo-skills',
133
+ version: this.version
134
+ }
135
+ };
136
+ }
137
+
138
+ /**
139
+ * Generate filename for a report
140
+ * @param {Object} report - Report object
141
+ * @returns {string} Filename
142
+ */
143
+ getReportFilename(report) {
144
+ // Format: skill-{name}_{timestamp}_{ms}.json
145
+ // Include milliseconds for uniqueness when multiple reports per second
146
+ const safeName = report.skill.name.toLowerCase().replace(/[^a-z0-9]/g, '-');
147
+ const timestamp = report.timestamp.replace(/[:.]/g, '-').replace('T', '_').slice(0, 19);
148
+ const ms = report.timestamp.slice(20, 23) || '000';
149
+ return `skill-${safeName}_${timestamp}-${ms}.json`;
150
+ }
151
+
152
+ /**
153
+ * Ensure reports directory exists
154
+ */
155
+ async _ensureReportsDir() {
156
+ try {
157
+ await fs.mkdir(this.reportsDir, { recursive: true });
158
+ } catch (error) {
159
+ const e = error instanceof Error ? error : new Error(String(error));
160
+ // @ts-ignore
161
+ if (e.code !== 'EEXIST') {
162
+ throw e;
163
+ }
164
+ }
165
+ }
166
+
167
+ /**
168
+ * Save a report to disk
169
+ * @param {Object} report - Report object to save
170
+ * @returns {Promise<string>} Path to saved report
171
+ */
172
+ async saveReport(report) {
173
+ await this._ensureReportsDir();
174
+
175
+ const filename = this.getReportFilename(report);
176
+ const filepath = path.join(this.reportsDir, filename);
177
+
178
+ await fs.writeFile(filepath, JSON.stringify(report, null, 2), 'utf8');
179
+
180
+ return filepath;
181
+ }
182
+
183
+ /**
184
+ * Generate and save a report in one call
185
+ * @param {Object} executionData - Data from skill execution
186
+ * @returns {Promise<Object>} Report object with filepath
187
+ */
188
+ async generateAndSave(executionData) {
189
+ const report = this.createReport(executionData);
190
+ const filepath = await this.saveReport(report);
191
+
192
+ return {
193
+ ...report,
194
+ _filepath: filepath
195
+ };
196
+ }
197
+
198
+ /**
199
+ * List recent reports
200
+ * @param {number} limit - Maximum number of reports to return
201
+ * @returns {Promise<string[]>} Array of report filenames
202
+ */
203
+ async listReports(limit = 10) {
204
+ try {
205
+ await this._ensureReportsDir();
206
+ const files = await fs.readdir(this.reportsDir);
207
+
208
+ // Filter JSON files and sort by name (descending = newest first)
209
+ const reports = files
210
+ .filter(f => f.endsWith('.json'))
211
+ .sort((a, b) => b.localeCompare(a))
212
+ .slice(0, limit);
213
+
214
+ return reports;
215
+ } catch (error) {
216
+ return [];
217
+ }
218
+ }
219
+
220
+ /**
221
+ * Read a specific report
222
+ * @param {string} filename - Report filename
223
+ * @returns {Promise<Object|null>} Report object or null
224
+ */
225
+ async readReport(filename) {
226
+ try {
227
+ const filepath = path.join(this.reportsDir, filename);
228
+ const content = await fs.readFile(filepath, 'utf8');
229
+ return JSON.parse(content);
230
+ } catch (error) {
231
+ return null;
232
+ }
233
+ }
234
+
235
+ /**
236
+ * Get aggregate statistics from recent reports
237
+ * @param {number} limit - Number of reports to analyze
238
+ * @returns {Promise<Object>} Statistics object
239
+ */
240
+ async getStats(limit = 100) {
241
+ const reportFiles = await this.listReports(limit);
242
+ const stats = {
243
+ total_reports: reportFiles.length,
244
+ skills_used: {},
245
+ providers_used: {},
246
+ success_count: 0,
247
+ error_count: 0,
248
+ total_duration_ms: 0,
249
+ avg_duration_ms: 0
250
+ };
251
+
252
+ for (const filename of reportFiles) {
253
+ const report = await this.readReport(filename);
254
+ if (!report) continue;
255
+
256
+ // Count skills
257
+ const skillName = report.skill?.name || 'unknown';
258
+ stats.skills_used[skillName] = (stats.skills_used[skillName] || 0) + 1;
259
+
260
+ // Count providers
261
+ const provider = report.execution?.provider || 'unknown';
262
+ stats.providers_used[provider] = (stats.providers_used[provider] || 0) + 1;
263
+
264
+ // Count success/error
265
+ if (report.execution?.status === 'success') {
266
+ stats.success_count++;
267
+ } else {
268
+ stats.error_count++;
269
+ }
270
+
271
+ // Sum duration
272
+ stats.total_duration_ms += report.execution?.duration_ms || 0;
273
+ }
274
+
275
+ if (stats.total_reports > 0) {
276
+ stats.avg_duration_ms = Math.round(stats.total_duration_ms / stats.total_reports);
277
+ }
278
+
279
+ return stats;
280
+ }
281
+ }
282
+
283
+ export default SkillReportGenerator;
@@ -0,0 +1 @@
1
+ # This directory contains S-MORA Layer 0 Scrubber components
@@ -0,0 +1,62 @@
1
+ /**
2
+ * S-MORA Layer 0 Scrubber Default Configuration
3
+ * @module smora/scrubber/config/defaults
4
+ */
5
+
6
+ export const defaultScrubberConfig = {
7
+ // Master switch
8
+ enabled: false,
9
+
10
+ // Stage 1: Structural Cleaning
11
+ structural: {
12
+ stripHTML: true,
13
+ normalizeMarkdown: true,
14
+ collapseWhitespace: true,
15
+ removeScripts: true,
16
+ removeStyles: true
17
+ },
18
+
19
+ // Stage 2: Semantic Filtering
20
+ semantic: {
21
+ removeDuplicates: true,
22
+ removeBoilerplate: true,
23
+ minSignalRatio: 0.3,
24
+ boilerplatePatterns: 'default'
25
+ },
26
+
27
+ // Stage 3: Normalization
28
+ normalization: {
29
+ normalizeHeadings: true,
30
+ normalizeLists: true,
31
+ normalizePunctuation: true
32
+ },
33
+
34
+ // Stage 4: Chunking
35
+ chunking: {
36
+ maxTokens: 500,
37
+ minTokens: 10,
38
+ hardMaxTokens: 2000,
39
+ splitOnHeadings: true,
40
+ preserveContext: true
41
+ },
42
+
43
+ // Stage 5: Metadata Annotation
44
+ metadata: {
45
+ addSource: true,
46
+ addSection: true,
47
+ addHeadingPath: true,
48
+ addTimestamp: true,
49
+ addHash: true
50
+ },
51
+
52
+ // Stage 6: Validation
53
+ validation: {
54
+ enforceMinLength: true,
55
+ enforceMaxLength: true,
56
+ rejectEmptyChunks: true
57
+ },
58
+
59
+ // Performance
60
+ logTransformations: false,
61
+ cachePatterns: true
62
+ };
@@ -0,0 +1,43 @@
1
+ /**
2
+ * S-MORA Layer 0 Scrubber Error Classes
3
+ * @module smora/scrubber/errors/scrubber-error
4
+ */
5
+
6
+ export class ScrubberError extends Error {
7
+ constructor(message, details = {}) {
8
+ super(message);
9
+ this.name = 'ScrubberError';
10
+ this.details = details;
11
+ this.timestamp = new Date().toISOString();
12
+ }
13
+
14
+ toJSON() {
15
+ return {
16
+ name: this.name,
17
+ message: this.message,
18
+ details: this.details,
19
+ timestamp: this.timestamp
20
+ };
21
+ }
22
+ }
23
+
24
+ export class StructuralCleaningError extends ScrubberError {
25
+ constructor(message, details = {}) {
26
+ super(message, details);
27
+ this.name = 'StructuralCleaningError';
28
+ }
29
+ }
30
+
31
+ export class ChunkingError extends ScrubberError {
32
+ constructor(message, details = {}) {
33
+ super(message, details);
34
+ this.name = 'ChunkingError';
35
+ }
36
+ }
37
+
38
+ export class ValidationError extends ScrubberError {
39
+ constructor(message, details = {}) {
40
+ super(message, details);
41
+ this.name = 'ValidationError';
42
+ }
43
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * S-MORA Layer 0 Scrubber
3
+ * Deterministic ingestion-time preprocessing layer
4
+ * @module smora/scrubber
5
+ */
6
+
7
+ export { defaultScrubberConfig } from './config/defaults.js';
8
+ export {
9
+ ScrubberError,
10
+ StructuralCleaningError,
11
+ ChunkingError,
12
+ ValidationError
13
+ } from './errors/scrubber-error.js';
14
+ export { ScrubberTelemetry } from './telemetry.js';
15
+ export { Scrubber } from './scrubber.js';
16
+ export { HashUtil } from './utils/hash.js';
17
+ export { TokenCounter } from './utils/token-counter.js';
18
+ export { PatternMatcher } from './utils/pattern-matcher.js';
19
+ export { HTMLParser } from './utils/html-parser.js';
20
+ export { StructuralCleaner } from './stages/structural-cleaner.js';
21
+ export { SemanticFilter } from './stages/semantic-filter.js';
22
+ export { Normalizer } from './stages/normalizer.js';
23
+ export { Chunker } from './stages/chunker.js';
24
+ export { MetadataAnnotator } from './stages/metadata-annotator.js';
25
+ export { Validator } from './stages/validator.js';
@@ -0,0 +1,130 @@
1
+ /**
2
+ * S-MORA Layer 0 Scrubber - Main Orchestrator
3
+ * @module smora/scrubber/scrubber
4
+ */
5
+
6
+ import { StructuralCleaner } from './stages/structural-cleaner.js';
7
+ import { SemanticFilter } from './stages/semantic-filter.js';
8
+ import { Normalizer } from './stages/normalizer.js';
9
+ import { Chunker } from './stages/chunker.js';
10
+ import { MetadataAnnotator } from './stages/metadata-annotator.js';
11
+ import { Validator } from './stages/validator.js';
12
+ import { ScrubberTelemetry } from './telemetry.js';
13
+ import { ScrubberError } from './errors/scrubber-error.js';
14
+ import { defaultScrubberConfig } from './config/defaults.js';
15
+
16
+ export class Scrubber {
17
+ constructor(config = {}) {
18
+ this.config = { ...defaultScrubberConfig, ...config };
19
+ this.stages = this._initializeStages();
20
+ this.telemetry = new ScrubberTelemetry();
21
+ }
22
+
23
+ /**
24
+ * Main entry point - process a raw document
25
+ * @param {Object} document - { content: string, source: string, type: 'html'|'md'|'txt' }
26
+ * @returns {Promise<Object>} - { chunks: Array, metadata: Object, telemetry: Object }
27
+ */
28
+ async process(document) {
29
+ const startTime = Date.now();
30
+ const result = {
31
+ chunks: [],
32
+ metadata: {
33
+ source: document.source,
34
+ type: document.type,
35
+ processingTimestamp: new Date().toISOString()
36
+ },
37
+ telemetry: {}
38
+ };
39
+
40
+ try {
41
+ // If disabled, return empty chunks
42
+ if (!this.config.enabled) {
43
+ result.success = true;
44
+ result.telemetry.totalDuration = Date.now() - startTime;
45
+ return result;
46
+ }
47
+
48
+ // Stage 1: Structural Cleaning
49
+ const cleaned = await this._executeStage('structural', () =>
50
+ this.stages.structural.clean(document.content)
51
+ );
52
+ result.telemetry.structural = this.telemetry.getStageStats('structural');
53
+
54
+ // Stage 2: Semantic Filtering
55
+ const filtered = await this._executeStage('semantic', () =>
56
+ this.stages.semantic.filter(cleaned)
57
+ );
58
+ result.telemetry.semantic = this.telemetry.getStageStats('semantic');
59
+
60
+ // Stage 3: Normalization
61
+ const normalized = await this._executeStage('normalization', () =>
62
+ this.stages.normalizer.normalize(filtered)
63
+ );
64
+ result.telemetry.normalization = this.telemetry.getStageStats('normalization');
65
+
66
+ // Stage 4: Chunking
67
+ const chunks = await this._executeStage('chunking', () =>
68
+ this.stages.chunker.chunk(normalized)
69
+ );
70
+ result.telemetry.chunking = this.telemetry.getStageStats('chunking');
71
+
72
+ // Stage 5: Metadata Annotation
73
+ const annotated = await this._executeStage('metadata', () =>
74
+ this.stages.metadata.annotate(chunks, document)
75
+ );
76
+ result.telemetry.metadata = this.telemetry.getStageStats('metadata');
77
+
78
+ // Stage 6: Validation
79
+ result.chunks = await this._executeStage('validation', () =>
80
+ this.stages.validator.validate(annotated)
81
+ );
82
+ result.telemetry.validation = this.telemetry.getStageStats('validation');
83
+
84
+ result.telemetry.totalDuration = Date.now() - startTime;
85
+ result.success = true;
86
+
87
+ return result;
88
+ } catch (error) {
89
+ const message = error instanceof Error ? error.message : String(error);
90
+ result.success = false;
91
+ result.error = message;
92
+ result.telemetry.totalDuration = Date.now() - startTime;
93
+ }
94
+ }
95
+
96
+ async _executeStage(stageName, stageFn) {
97
+ const startTime = Date.now();
98
+ try {
99
+ const result = await stageFn();
100
+ const duration = Date.now() - startTime;
101
+ this.telemetry.recordStage(stageName, duration, true);
102
+ return result;
103
+ } catch (error) {
104
+ const duration = Date.now() - startTime;
105
+ this.telemetry.recordStage(stageName, duration, false);
106
+ throw error;
107
+ }
108
+ }
109
+
110
+ _initializeStages() {
111
+ return {
112
+ structural: new StructuralCleaner(this.config.structural),
113
+ semantic: new SemanticFilter(this.config.semantic),
114
+ normalizer: new Normalizer(this.config.normalization),
115
+ chunker: new Chunker(this.config.chunking),
116
+ metadata: new MetadataAnnotator(this.config.metadata),
117
+ validator: new Validator(this.config.validation)
118
+ };
119
+ }
120
+
121
+ getMetrics() {
122
+ return this.telemetry.getSummary();
123
+ }
124
+
125
+ async healthCheck() {
126
+ return { status: 'healthy' };
127
+ }
128
+ }
129
+
130
+ export default Scrubber;
@@ -0,0 +1,103 @@
1
+ /**
2
+ * S-MORA Layer 0 Scrubber - Stage 4: Chunking
3
+ * @module smora/scrubber/stages/chunker
4
+ */
5
+
6
+ import { TokenCounter } from '../utils/token-counter.js';
7
+ import { ChunkingError, ScrubberError } from '../errors/scrubber-error.js';
8
+
9
+ export class Chunker {
10
+ constructor(config) {
11
+ this.config = config;
12
+ this.tokenCounter = new TokenCounter();
13
+ }
14
+
15
+ /**
16
+ * Split content into chunks
17
+ * @param {string} content - Normalized content
18
+ * @returns {Promise<Array>} - Array of chunks with metadata
19
+ */
20
+ async chunk(content) {
21
+ try {
22
+ const chunks = [];
23
+ const paragraphs = content.split(/\n\n+/);
24
+
25
+ let currentChunk = {
26
+ text: '',
27
+ tokens: 0,
28
+ heading: this._extractInitialHeading(content)
29
+ };
30
+
31
+ for (const para of paragraphs) {
32
+ const isHeading = this._isHeading(para);
33
+ const paraTokens = this.tokenCounter.count(para);
34
+
35
+ if (this._shouldStartNewChunk(currentChunk, para, paraTokens, isHeading)) {
36
+ if (currentChunk.tokens >= this.config.minTokens) {
37
+ chunks.push({ ...currentChunk });
38
+ }
39
+ currentChunk = {
40
+ text: '',
41
+ tokens: 0,
42
+ heading: isHeading ? this._extractHeadingText(para) : currentChunk.heading
43
+ };
44
+ }
45
+
46
+ currentChunk.text += (currentChunk.text ? '\n\n' : '') + para;
47
+ currentChunk.tokens += paraTokens;
48
+
49
+ if (currentChunk.tokens > this.config.hardMaxTokens) {
50
+ chunks.push({ ...currentChunk });
51
+ currentChunk = { text: '', tokens: 0, heading: null };
52
+ }
53
+ }
54
+
55
+ if (currentChunk.tokens >= this.config.minTokens) {
56
+ chunks.push(currentChunk);
57
+ }
58
+
59
+ return chunks.map((chunk, index) => ({
60
+ index,
61
+ text: chunk.text.trim(),
62
+ metadata: {
63
+ tokens: chunk.tokens,
64
+ heading: chunk.heading,
65
+ position: index
66
+ }
67
+ }));
68
+ } catch (error) {
69
+ const message = error instanceof Error ? error.message : String(error);
70
+ throw new ScrubberError(
71
+ `Failed to chunk content: ${message}`,
72
+ { stage: 'chunker', originalError: error }
73
+ );
74
+ }
75
+ }
76
+
77
+ _isHeading(line) {
78
+ return /^#{1,6}\s/.test(line);
79
+ }
80
+
81
+ _shouldStartNewChunk(currentChunk, para, paraTokens, isHeading) {
82
+ if (this.config.splitOnHeadings && isHeading && currentChunk.tokens > 0) {
83
+ return true;
84
+ }
85
+
86
+ const wouldExceed = (currentChunk.tokens + paraTokens) > this.config.maxTokens;
87
+ if (wouldExceed && currentChunk.tokens > 0) {
88
+ return true;
89
+ }
90
+
91
+ return false;
92
+ }
93
+
94
+ _extractInitialHeading(content) {
95
+ const match = content.match(/^#{1,6}\s+(.+)$/m);
96
+ return match ? match[1] : null;
97
+ }
98
+
99
+ _extractHeadingText(headingLine) {
100
+ const match = headingLine.match(/^#{1,6}\s+(.+)$/);
101
+ return match ? match[1] : null;
102
+ }
103
+ }