catport 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/ARCHITECTURE.md +94 -0
  2. package/CONTRIBUTING.md +133 -0
  3. package/LICENSE +21 -0
  4. package/README.md +414 -0
  5. package/bin/catport +8 -0
  6. package/package.json +48 -0
  7. package/src/cli/args.js +133 -0
  8. package/src/cli/main.js +78 -0
  9. package/src/cli/parser.js +152 -0
  10. package/src/cli/ui.js +78 -0
  11. package/src/config/constants.js +62 -0
  12. package/src/config/ignores.js +119 -0
  13. package/src/config/loader.js +15 -0
  14. package/src/config/options.js +181 -0
  15. package/src/core/analyzer.js +23 -0
  16. package/src/core/bundler.js +165 -0
  17. package/src/core/extractor.js +76 -0
  18. package/src/core/ignore.js +65 -0
  19. package/src/core/processor.js +59 -0
  20. package/src/core/scanner.js +184 -0
  21. package/src/formatters/index.js +78 -0
  22. package/src/formatters/json.js +284 -0
  23. package/src/formatters/markdown.js +164 -0
  24. package/src/formatters/multipart.js +127 -0
  25. package/src/formatters/xml.js +221 -0
  26. package/src/formatters/yaml.js +147 -0
  27. package/src/index.js +11 -0
  28. package/src/optimizers/definitions.js +79 -0
  29. package/src/optimizers/index.js +96 -0
  30. package/src/optimizers/langs/batch.js +3 -0
  31. package/src/optimizers/langs/c_family.js +3 -0
  32. package/src/optimizers/langs/clojure.js +3 -0
  33. package/src/optimizers/langs/css.js +3 -0
  34. package/src/optimizers/langs/go.js +5 -0
  35. package/src/optimizers/langs/haskell.js +4 -0
  36. package/src/optimizers/langs/html.js +4 -0
  37. package/src/optimizers/langs/ini.js +4 -0
  38. package/src/optimizers/langs/javascript.js +11 -0
  39. package/src/optimizers/langs/lua.js +4 -0
  40. package/src/optimizers/langs/markdown.js +3 -0
  41. package/src/optimizers/langs/perl.js +3 -0
  42. package/src/optimizers/langs/php.js +4 -0
  43. package/src/optimizers/langs/powershell.js +5 -0
  44. package/src/optimizers/langs/python.js +5 -0
  45. package/src/optimizers/langs/ruby.js +4 -0
  46. package/src/optimizers/langs/rust.js +3 -0
  47. package/src/optimizers/langs/shell.js +4 -0
  48. package/src/optimizers/langs/sql.js +4 -0
  49. package/src/optimizers/langs/xml.js +3 -0
  50. package/src/optimizers/langs/yaml.js +3 -0
  51. package/src/optimizers/tokenizer.js +444 -0
  52. package/src/utils/git.js +35 -0
  53. package/src/utils/io.js +79 -0
  54. package/src/utils/logger.js +25 -0
  55. package/src/utils/path.js +59 -0
  56. package/src/utils/style.js +59 -0
@@ -0,0 +1,184 @@
1
+ import { join, relative, resolve, isAbsolute, dirname } from 'node:path';
2
+ import { Ignore } from './ignore.js';
3
+
4
+ export const Scanner = {
5
+ async *scan(config, io) {
6
+ const cwd = io.cwd ? io.cwd() : process.cwd();
7
+ const baseIgnore = Ignore.create(config.ignore || []);
8
+
9
+ if (config.gitFiles) {
10
+ yield* Scanner._scanDirect(config.gitFiles, config, io, cwd, baseIgnore);
11
+ return;
12
+ }
13
+
14
+ const roots = (config.paths && config.paths.length) ? config.paths : ['.'];
15
+ const visited = new Set();
16
+
17
+ for (const root of roots) {
18
+ const full = resolve(cwd, root);
19
+ let stats;
20
+ try {
21
+ stats = await io.stat(full);
22
+ } catch {
23
+ continue;
24
+ }
25
+
26
+ if (stats.isDirectory()) {
27
+ yield* Scanner._walk(full, baseIgnore, config, io, full, cwd, visited);
28
+ } else {
29
+ const rel = relative(dirname(full), full).replace(/\\/g, '/');
30
+
31
+ if (baseIgnore.test(rel)) {
32
+ continue;
33
+ }
34
+
35
+ if (config.extSet && config.extSet.size > 0) {
36
+ const ext = full.split('.').pop().toLowerCase();
37
+ if (!config.extSet.has(ext)) {
38
+ continue;
39
+ }
40
+ }
41
+
42
+ yield {
43
+ path: full,
44
+ rel: rel,
45
+ isDir: false
46
+ };
47
+ }
48
+ }
49
+ },
50
+
51
+ async *_scanDirect(files, config, io, cwd, baseIgnore) {
52
+ // If specific paths were requested (e.g. "catport src/ -g"), we must intersect
53
+ // the gitFiles list with those path scopes.
54
+ const roots = (config.paths && config.paths.length)
55
+ ? config.paths.map(p => resolve(cwd, p))
56
+ : [cwd];
57
+
58
+ for (const full of files) {
59
+ // relative() returns a path starting with '..' if outside, or an absolute path on different drives (Win32)
60
+ const inScope = roots.some(root => {
61
+ const rel = relative(root, full);
62
+ return !rel.startsWith('..') && !isAbsolute(rel);
63
+ });
64
+ if (!inScope) {
65
+ continue;
66
+ }
67
+
68
+ // Git reports deleted files, we must skip them.
69
+ try {
70
+ const stats = await io.stat(full);
71
+ if (!stats.isFile()) {
72
+ continue;
73
+ }
74
+ } catch {
75
+ continue;
76
+ }
77
+
78
+ const fileRoot = roots.find(root => {
79
+ const rel = relative(root, full);
80
+ return !rel.startsWith('..') && !isAbsolute(rel);
81
+ }) || cwd;
82
+
83
+ const rel = relative(fileRoot, full).replace(/\\/g, '/');
84
+
85
+ if (baseIgnore.test(rel)) {
86
+ continue;
87
+ }
88
+
89
+ if (config.extSet && config.extSet.size > 0) {
90
+ const ext = full.split('.').pop().toLowerCase();
91
+ if (!config.extSet.has(ext)) {
92
+ continue;
93
+ }
94
+ }
95
+
96
+ yield {
97
+ path: full,
98
+ rel: rel,
99
+ isDir: false
100
+ };
101
+ }
102
+ },
103
+
104
+ async *_walk(dir, ignore, config, io, root, cwd, visited) {
105
+ // Cycle detection
106
+ try {
107
+ const stats = await io.stat(dir);
108
+ if (stats.dev !== undefined && stats.ino !== undefined) {
109
+ const key = `${stats.dev}:${stats.ino}`;
110
+ if (visited.has(key)) {
111
+ return;
112
+ }
113
+ visited.add(key);
114
+ }
115
+ } catch {
116
+ return;
117
+ }
118
+
119
+ let currentIgnore = ignore;
120
+
121
+ if (!config.noIgnore) {
122
+ try {
123
+ const gitignore = await io.readText(join(dir, '.gitignore'));
124
+ const scopedPatterns = Ignore.parse(gitignore, dir, cwd);
125
+ currentIgnore = ignore.extend(scopedPatterns);
126
+ } catch {
127
+ // No .gitignore found, proceed
128
+ }
129
+ }
130
+
131
+ let entries = [];
132
+ try {
133
+ entries = await io.readdir(dir);
134
+ } catch {
135
+ return;
136
+ }
137
+
138
+ for (const e of entries) {
139
+ const full = join(dir, e.name);
140
+ const rel = relative(root, full).replace(/\\/g, '/');
141
+
142
+ if (e.name === '.git') {
143
+ continue;
144
+ }
145
+
146
+ if (currentIgnore.test(rel)) {
147
+ continue;
148
+ }
149
+
150
+ let isDir = e.isDirectory();
151
+ if (e.isSymbolicLink()) {
152
+ try {
153
+ const stats = await io.stat(full);
154
+ isDir = stats.isDirectory();
155
+ } catch {
156
+ // Broken link or permission error
157
+ isDir = false;
158
+ }
159
+ }
160
+
161
+ if (isDir) {
162
+ yield {
163
+ path: full,
164
+ rel: rel,
165
+ isDir: true
166
+ };
167
+ yield* Scanner._walk(full, currentIgnore, config, io, root, cwd, visited);
168
+ } else {
169
+ if (config.extSet && config.extSet.size > 0) {
170
+ const ext = e.name.split('.').pop().toLowerCase();
171
+ if (!config.extSet.has(ext)) {
172
+ continue;
173
+ }
174
+ }
175
+
176
+ yield {
177
+ path: full,
178
+ rel: rel,
179
+ isDir: false
180
+ };
181
+ }
182
+ }
183
+ }
184
+ };
@@ -0,0 +1,78 @@
1
+ import { Markdown } from './markdown.js';
2
+ import { Xml } from './xml.js';
3
+ import { Json } from './json.js';
4
+ import { Yaml } from './yaml.js';
5
+ import { Multipart } from './multipart.js';
6
+ import { FORMAT } from '../config/constants.js';
7
+
8
+ const REGISTRY = {
9
+ [FORMAT.MD]: Markdown,
10
+ [FORMAT.XML]: Xml,
11
+ [FORMAT.JSON]: Json,
12
+ [FORMAT.YAML]: Yaml,
13
+ [FORMAT.MULTIPART]: Multipart
14
+ };
15
+
16
+ export const Formatter = {
17
+ register: (key, impl) => {
18
+ REGISTRY[key] = impl;
19
+ },
20
+ get: (type) => {
21
+ return REGISTRY[type] || Markdown;
22
+ },
23
+ detect: (content) => {
24
+ const t = content.trim();
25
+
26
+ if (t.startsWith('{')) {
27
+ return Json;
28
+ }
29
+ if (t.startsWith('<')) {
30
+ return Xml;
31
+ }
32
+ if (t.startsWith('meta:')) {
33
+ return Yaml;
34
+ }
35
+ if (t.startsWith('MIME-Version: 1.0')) {
36
+ return Multipart;
37
+ }
38
+
39
+ const codeBlockMatch = t.match(/```(\w*)\n([\s\S]*?)```/);
40
+ if (codeBlockMatch) {
41
+ const lang = codeBlockMatch[1].toLowerCase();
42
+ const body = codeBlockMatch[2].trim();
43
+
44
+ if (lang === 'json') {
45
+ return Json;
46
+ }
47
+ if (lang === 'xml') {
48
+ return Xml;
49
+ }
50
+ if (lang === 'yaml' || lang === 'yml') {
51
+ return Yaml;
52
+ }
53
+
54
+ if (body.startsWith('{')) {
55
+ return Json;
56
+ }
57
+ if (body.startsWith('<')) {
58
+ return Xml;
59
+ }
60
+ if (body.startsWith('meta:')) {
61
+ return Yaml;
62
+ }
63
+ if (body.startsWith('files:')) {
64
+ return Yaml;
65
+ }
66
+ }
67
+
68
+ const sample = t.slice(0, 1024);
69
+ if (sample.includes('<?xml') || sample.includes('<project name=')) {
70
+ return Xml;
71
+ }
72
+ if (sample.includes('"files": [')) {
73
+ return Json;
74
+ }
75
+
76
+ return Markdown;
77
+ }
78
+ };
@@ -0,0 +1,284 @@
1
+ const ESCAPE_MAP = Object.freeze({
2
+ '"': '\\"', '\\': '\\\\', '\b': '\\b', '\f': '\\f',
3
+ '\n': '\\n', '\r': '\\r', '\t': '\\t'
4
+ });
5
+
6
+ const normalizeJsonLike = (() => {
7
+ const R_DQ = /"(?:\\[\s\S]|[^\\"])*"/;
8
+ const R_SQ = /'(?:\\[\s\S]|[^\\'])*'/;
9
+ const R_CMT = /\/\/[^\n]*|\/\*[\s\S]*?\*\//;
10
+
11
+ // We explicitly exclude control chars, quotes, and JSON structure chars from keys.
12
+ const R_KEY_GENERIC = /(\s*)([^\s"':,{}[\]]+)\s*(:)/;
13
+
14
+ // Trailing Comma
15
+ const R_TRL = /(,)\s*([}\]])/;
16
+
17
+ // Order is critical: strings -> comments -> keys -> trailing commas
18
+ const MASTER = new RegExp(
19
+ `(${R_DQ.source})|(${R_SQ.source})|(${R_CMT.source})|` +
20
+ `${R_KEY_GENERIC.source}|${R_TRL.source}`,
21
+ 'g'
22
+ );
23
+
24
+ const SQ_ESCAPE_REGEX = /["\b\f\n\r\t]/g;
25
+ const esc = (c) => ESCAPE_MAP[c];
26
+
27
+ return (src) => {
28
+ if (!src || typeof src !== 'string') {
29
+ return src;
30
+ }
31
+
32
+ return src.replace(MASTER, (match, ...args) => {
33
+ const [
34
+ doubleQuotedString, singleQuotedString, comment,
35
+ keyPre, keyName, keyCol, // R_KEY_GENERIC (3 groups)
36
+ trailingComma, trailingBracket // R_TRL (2 groups)
37
+ ] = args;
38
+
39
+ if (doubleQuotedString) {
40
+ return doubleQuotedString;
41
+ }
42
+ if (comment) {
43
+ return '';
44
+ }
45
+
46
+ if (singleQuotedString) {
47
+ let content = singleQuotedString.slice(1, -1);
48
+ content = content.replace(/\\'/g, "'");
49
+ content = content.replace(SQ_ESCAPE_REGEX, esc);
50
+ return `"${content}"`;
51
+ }
52
+
53
+ // Quote bare keys
54
+ if (keyName !== undefined) {
55
+ return `${keyPre}"${keyName}"${keyCol}`;
56
+ }
57
+
58
+ if (trailingComma) {
59
+ return trailingBracket;
60
+ }
61
+
62
+ return match;
63
+ });
64
+ };
65
+ })();
66
+
67
+ /**
68
+ * Extracts balanced JSON-like blocks from a string.
69
+ */
70
+ const extractBalancedObjects = (text) => {
71
+ const results = [];
72
+ let depth = 0;
73
+ let inString = false;
74
+ let quoteChar = '';
75
+ let start = -1;
76
+ let escape = false;
77
+
78
+ for (let i = 0; i < text.length; i++) {
79
+ const c = text[i];
80
+
81
+ if (inString) {
82
+ if (escape) {
83
+ escape = false;
84
+ } else if (c === '\\') {
85
+ escape = true;
86
+ } else if (c === quoteChar) {
87
+ inString = false;
88
+ }
89
+ } else {
90
+ if (c === '"' || c === "'") {
91
+ inString = true;
92
+ quoteChar = c;
93
+ } else if (c === '{') {
94
+ if (depth === 0) {
95
+ start = i;
96
+ }
97
+ depth++;
98
+ } else if (c === '}') {
99
+ if (depth > 0) {
100
+ depth--;
101
+ if (depth === 0) {
102
+ results.push(text.slice(start, i + 1));
103
+ }
104
+ }
105
+ }
106
+ }
107
+ }
108
+ return results;
109
+ };
110
+
111
+ export const Json = {
112
+ getInstruction: () => `**CRITICAL:** Rules for every file (follow strictly and EXACTLY):
113
+ - Return valid JSON.
114
+ - The root MUST be an object containing a "files" array.
115
+ - Each item in the array MUST have "path" and "content" fields.
116
+ - Properly escape strings (e.g., quotes, newlines).
117
+
118
+ ## Expected Schema
119
+
120
+ \`\`\`json
121
+ {
122
+ "type": "object",
123
+ "required": ["files"],
124
+ "properties": {
125
+ "files": {
126
+ "type": "array",
127
+ "items": {
128
+ "type": "object",
129
+ "required": ["path", "content"],
130
+ "properties": {
131
+ "path": { "type": "string", "description": "Relative file path" },
132
+ "content": { "type": "string", "description": "File content" }
133
+ }
134
+ }
135
+ }
136
+ }
137
+ }
138
+ \`\`\`
139
+
140
+ ## Examples
141
+
142
+ ## Correct:
143
+
144
+ {
145
+ "files": [
146
+ {
147
+ "path": "src/main.js",
148
+ "content": "console.log(\\"Hello\\");"
149
+ },
150
+ {
151
+ "path": "config.json",
152
+ "content": "{\\n \\"debug\\": true\\n}"
153
+ }
154
+ ]
155
+ }
156
+
157
+ ## Wrong:
158
+
159
+ \`\`\`json
160
+ [ { "path": ... } ] ✗ (Root must be object with "files" key)
161
+ \`\`\`
162
+
163
+ { "path": ... } ✗ (Must be inside "files" array)
164
+
165
+ Only output valid JSON. No explanations, no markdown, no extra text.`,
166
+
167
+ header: (m) => {
168
+ const meta = {
169
+ name: m.name,
170
+ context: m.context
171
+ };
172
+ if (m.tree) {
173
+ meta.tree = m.tree;
174
+ }
175
+ return `{\n "meta": ${JSON.stringify(meta)},\n "files": [\n`;
176
+ },
177
+
178
+ file: (f) => {
179
+ return ` ${JSON.stringify({
180
+ path: f.rel,
181
+ content: f.content
182
+ })}`;
183
+ },
184
+
185
+ footer: (m) => {
186
+ let out = '\n ]';
187
+ if (m.task) {
188
+ out += `,\n "task": ${JSON.stringify(m.task)}`;
189
+ }
190
+ if (m.instructionText) {
191
+ out += `,\n "instruction": ${JSON.stringify(m.instructionText)}`;
192
+ }
193
+ return out + '\n}';
194
+ },
195
+
196
+ parse: (txt, logger) => {
197
+ if (!txt) {
198
+ return [];
199
+ }
200
+ const clean = txt.trim();
201
+ if (!clean) {
202
+ return [];
203
+ }
204
+
205
+ const tryParse = (src) => {
206
+ try {
207
+ const obj = JSON.parse(src);
208
+ if (!obj || typeof obj !== 'object' || obj === null) {
209
+ return null;
210
+ }
211
+ if (!Array.isArray(obj.files)) {
212
+ return null;
213
+ }
214
+ const valid = obj.files.every((f) => {
215
+ return f &&
216
+ typeof f === 'object' &&
217
+ typeof f.path === 'string' &&
218
+ f.path.trim() !== '' &&
219
+ Object.hasOwn(f, 'content') &&
220
+ typeof f.content === 'string';
221
+ });
222
+ if (!valid) {
223
+ return null;
224
+ }
225
+ return obj.files;
226
+ } catch {
227
+ return null;
228
+ }
229
+ };
230
+
231
+ const candidates = [];
232
+
233
+ candidates.push(clean);
234
+ {
235
+ const normalized = normalizeJsonLike(clean);
236
+ if (normalized !== clean) {
237
+ candidates.push(normalized);
238
+ }
239
+ }
240
+
241
+ const mdBlocks = [...clean.matchAll(/(?:^|\n)[ \t]*```(?:json)?\s*\n?([\s\S]*?)\n?[ \t]*```/g)];
242
+ for (const m of mdBlocks) {
243
+ const block = m[1].trim();
244
+ if (block) {
245
+ candidates.push(block);
246
+ {
247
+ const norm = normalizeJsonLike(block);
248
+ if (norm !== block) {
249
+ candidates.push(norm);
250
+ }
251
+ }
252
+ }
253
+ }
254
+
255
+ const rawObjects = extractBalancedObjects(clean);
256
+ // Sort by length descending to prefer larger wrapping objects
257
+ rawObjects.sort((a, b) => b.length - a.length);
258
+
259
+ const NOISE_TRESHOLD = 20; // arbitrary min length to avoid noise
260
+ for (const objStr of rawObjects) {
261
+ if (objStr.length > NOISE_TRESHOLD) {
262
+ candidates.push(objStr);
263
+ {
264
+ const norm = normalizeJsonLike(objStr);
265
+ if (norm !== objStr) {
266
+ candidates.push(norm);
267
+ }
268
+ }
269
+ }
270
+ }
271
+
272
+ for (const candidate of candidates) {
273
+ const files = tryParse(candidate);
274
+ if (files !== null) {
275
+ return files;
276
+ }
277
+ }
278
+
279
+ if (logger) {
280
+ logger.warn('Failed to extract a files array from the JSON');
281
+ }
282
+ return [];
283
+ }
284
+ };
@@ -0,0 +1,164 @@
1
+ import { Path } from '../utils/path.js';
2
+
3
+ export const FILE_MARKER = '### ◼◼◼ FILE:';
4
+ export const INSTRUCTION_MARKER = '### ◼◼◼ END OF FILES - INSTRUCTIONS FOLLOW';
5
+ const ESCAPED_MARKER = FILE_MARKER.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
6
+ const FILE_MARKER_RE = new RegExp(`^\\s*${ESCAPED_MARKER}\\s*(.+)$`, 'gim');
7
+
8
+ export const Markdown = {
9
+ getInstruction: () => `**CRITICAL:** Rules for every file (follow strictly and EXACTLY):
10
+ - Output only blocks starting with "${FILE_MARKER} <path>" followed by a fenced code block.
11
+ - Code block with language tag must follow immediately. Nothing else.
12
+ - No bold, no "(updated / fixed)", no explanations, no extra text.
13
+
14
+ ## Examples
15
+
16
+ ## Correct:
17
+
18
+ ${FILE_MARKER} src/components/Button.tsx
19
+ \`\`\`tsx
20
+ import React from 'react';
21
+ export const Button = () => <button>Click</button>;
22
+ \`\`\`
23
+
24
+ ${FILE_MARKER} utils/helpers.ts
25
+ \`\`\`ts
26
+ export const format = (n: number) => n.toFixed(2);
27
+ \`\`\`
28
+
29
+ ## Wrong:
30
+
31
+ \`\`\`
32
+ **src/app.ts** ✗
33
+ File: config.json ✗
34
+ Here is the update: ✗
35
+ \`\`\`
36
+
37
+ Only output "${FILE_MARKER} <path>" followed by a fenced code block. Nothing else.`,
38
+
39
+ header: (m) => {
40
+ let out = `# ${m.name}\n`;
41
+ if (m.context) {
42
+ out += `> **Context**: ${m.context}\n`;
43
+ }
44
+ if (m.tree) {
45
+ out += `\n## Structure\n\`\`\`text\n${m.tree}\n\`\`\`\n\n`;
46
+ }
47
+ return out + '---\n\n';
48
+ },
49
+
50
+ file: (f) => {
51
+ const content = f.content || '';
52
+ const ext = f.rel.split('.').pop() ?? 'txt';
53
+ const existingFences = content.match(/^(`{3,}|~{3,})/gm) ?? [];
54
+ const longest = existingFences.reduce((len, m) => Math.max(len, m.length), 0);
55
+ const fenceLen = Math.max(3, longest + 1);
56
+ const fence = '`'.repeat(fenceLen);
57
+
58
+ return `${FILE_MARKER} ${f.rel}\n${fence}${ext}\n${content}\n${fence}\n\n`;
59
+ },
60
+
61
+ footer: (m) => {
62
+ let out = m.task ? `\n---\n> **Task**: ${m.task}\n` : '';
63
+ if (m.instructionText) {
64
+ out += `\n${INSTRUCTION_MARKER}\n${m.instructionText}\n`;
65
+ }
66
+ return out;
67
+ },
68
+
69
+ parse: (text, logger) => {
70
+ // Find the last occurrence of INSTRUCTION_MARKER to avoid splitting on markers inside file content
71
+ const lastInstructionIdx = text.lastIndexOf(INSTRUCTION_MARKER);
72
+ const processText = lastInstructionIdx !== -1 ? text.slice(0, lastInstructionIdx) : text;
73
+
74
+ const files = [];
75
+ let pos = 0;
76
+
77
+ while (true) {
78
+ FILE_MARKER_RE.lastIndex = pos;
79
+ const markerMatch = FILE_MARKER_RE.exec(processText);
80
+ if (!markerMatch) {
81
+ break;
82
+ }
83
+
84
+ const rawPath = markerMatch[1].trim()
85
+ .replace(/[:'"`]*$/, '') // trailing : or quotes
86
+ .replace(/\s*\(.*\)$/, '') // (updated), (copy), …
87
+ .replace(/\s*\[.*\]$/, '') // [modified], …
88
+ .trim();
89
+
90
+ const path = Path.clean(rawPath);
91
+ if (!path) {
92
+ pos = markerMatch.index + markerMatch[0].length;
93
+ if (logger) {
94
+ logger.warn(`Skipping invalid path in marker at index ${markerMatch.index}: "${rawPath}"`);
95
+ }
96
+ continue;
97
+ }
98
+
99
+ const afterMarkerPos = markerMatch.index + markerMatch[0].length;
100
+ const rest = processText.slice(afterMarkerPos);
101
+
102
+ const openFenceMatch = rest.match(/^([ \t]*)(`{3,}|~{3,})/m);
103
+ if (!openFenceMatch) {
104
+ pos = afterMarkerPos;
105
+ if (logger) {
106
+ logger.warn(`Skipping file "${path}": Marker found but no fenced code block follows immediately.`);
107
+ }
108
+ continue;
109
+ }
110
+
111
+ const indent = openFenceMatch[1];
112
+ const fencePart = openFenceMatch[2];
113
+ const fenceChar = fencePart[0];
114
+ const fenceLen = fencePart.length;
115
+ const fenceRE = new RegExp(`^${indent}${fenceChar}{${fenceLen},}(?:[ \t]*|[ \t]+\\S.*)$`, 'm');
116
+
117
+ const fenceStartPos = afterMarkerPos + openFenceMatch.index + openFenceMatch[0].length;
118
+ const afterFence = rest.slice(openFenceMatch.index + openFenceMatch[0].length);
119
+ const langLineMatch = afterFence.match(/^(.*\r?\n)/);
120
+ const codeBlockStart = fenceStartPos + (langLineMatch?.[0].length ?? 0);
121
+
122
+ const closeMatch = fenceRE.exec(processText.slice(codeBlockStart));
123
+ if (!closeMatch) {
124
+ if (logger) {
125
+ logger.warn(`Skipping file "${path}": Code block not closed (reached end of input).`);
126
+ }
127
+ // Skip this file and continue with the next one
128
+ pos = afterMarkerPos;
129
+ continue;
130
+ }
131
+
132
+ const closePosAbs = codeBlockStart + closeMatch.index + closeMatch[0].length;
133
+
134
+ let content = processText.slice(codeBlockStart, codeBlockStart + closeMatch.index);
135
+
136
+ if (indent) {
137
+ const indentRE = new RegExp(`^${indent}`, 'gm');
138
+ content = content.replace(indentRE, '');
139
+ }
140
+
141
+ content = content
142
+ .replace(/\r\n/g, '\n')
143
+ .replace(/\r/g, '\n')
144
+ .trimEnd();
145
+
146
+ if (content.length > 0 || processText.slice(codeBlockStart, codeBlockStart + closeMatch.index).trim().length > 0) {
147
+ files.push({ path, content });
148
+ } else {
149
+ if (logger) {
150
+ logger.warn(`Skipping file "${path}": Empty code block extracted.`);
151
+ }
152
+ }
153
+
154
+ pos = closePosAbs;
155
+
156
+ const trailing = processText.slice(pos).match(/^\r?\n/);
157
+ if (trailing) {
158
+ pos += trailing[0].length;
159
+ }
160
+ }
161
+
162
+ return files;
163
+ }
164
+ };