catport 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +94 -0
- package/CONTRIBUTING.md +133 -0
- package/LICENSE +21 -0
- package/README.md +414 -0
- package/bin/catport +8 -0
- package/package.json +48 -0
- package/src/cli/args.js +133 -0
- package/src/cli/main.js +78 -0
- package/src/cli/parser.js +152 -0
- package/src/cli/ui.js +78 -0
- package/src/config/constants.js +62 -0
- package/src/config/ignores.js +119 -0
- package/src/config/loader.js +15 -0
- package/src/config/options.js +181 -0
- package/src/core/analyzer.js +23 -0
- package/src/core/bundler.js +165 -0
- package/src/core/extractor.js +76 -0
- package/src/core/ignore.js +65 -0
- package/src/core/processor.js +59 -0
- package/src/core/scanner.js +184 -0
- package/src/formatters/index.js +78 -0
- package/src/formatters/json.js +284 -0
- package/src/formatters/markdown.js +164 -0
- package/src/formatters/multipart.js +127 -0
- package/src/formatters/xml.js +221 -0
- package/src/formatters/yaml.js +147 -0
- package/src/index.js +11 -0
- package/src/optimizers/definitions.js +79 -0
- package/src/optimizers/index.js +96 -0
- package/src/optimizers/langs/batch.js +3 -0
- package/src/optimizers/langs/c_family.js +3 -0
- package/src/optimizers/langs/clojure.js +3 -0
- package/src/optimizers/langs/css.js +3 -0
- package/src/optimizers/langs/go.js +5 -0
- package/src/optimizers/langs/haskell.js +4 -0
- package/src/optimizers/langs/html.js +4 -0
- package/src/optimizers/langs/ini.js +4 -0
- package/src/optimizers/langs/javascript.js +11 -0
- package/src/optimizers/langs/lua.js +4 -0
- package/src/optimizers/langs/markdown.js +3 -0
- package/src/optimizers/langs/perl.js +3 -0
- package/src/optimizers/langs/php.js +4 -0
- package/src/optimizers/langs/powershell.js +5 -0
- package/src/optimizers/langs/python.js +5 -0
- package/src/optimizers/langs/ruby.js +4 -0
- package/src/optimizers/langs/rust.js +3 -0
- package/src/optimizers/langs/shell.js +4 -0
- package/src/optimizers/langs/sql.js +4 -0
- package/src/optimizers/langs/xml.js +3 -0
- package/src/optimizers/langs/yaml.js +3 -0
- package/src/optimizers/tokenizer.js +444 -0
- package/src/utils/git.js +35 -0
- package/src/utils/io.js +79 -0
- package/src/utils/logger.js +25 -0
- package/src/utils/path.js +59 -0
- package/src/utils/style.js +59 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import { join, relative, resolve, isAbsolute, dirname } from 'node:path';
|
|
2
|
+
import { Ignore } from './ignore.js';
|
|
3
|
+
|
|
4
|
+
export const Scanner = {
|
|
5
|
+
async *scan(config, io) {
|
|
6
|
+
const cwd = io.cwd ? io.cwd() : process.cwd();
|
|
7
|
+
const baseIgnore = Ignore.create(config.ignore || []);
|
|
8
|
+
|
|
9
|
+
if (config.gitFiles) {
|
|
10
|
+
yield* Scanner._scanDirect(config.gitFiles, config, io, cwd, baseIgnore);
|
|
11
|
+
return;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const roots = (config.paths && config.paths.length) ? config.paths : ['.'];
|
|
15
|
+
const visited = new Set();
|
|
16
|
+
|
|
17
|
+
for (const root of roots) {
|
|
18
|
+
const full = resolve(cwd, root);
|
|
19
|
+
let stats;
|
|
20
|
+
try {
|
|
21
|
+
stats = await io.stat(full);
|
|
22
|
+
} catch {
|
|
23
|
+
continue;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (stats.isDirectory()) {
|
|
27
|
+
yield* Scanner._walk(full, baseIgnore, config, io, full, cwd, visited);
|
|
28
|
+
} else {
|
|
29
|
+
const rel = relative(dirname(full), full).replace(/\\/g, '/');
|
|
30
|
+
|
|
31
|
+
if (baseIgnore.test(rel)) {
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (config.extSet && config.extSet.size > 0) {
|
|
36
|
+
const ext = full.split('.').pop().toLowerCase();
|
|
37
|
+
if (!config.extSet.has(ext)) {
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
yield {
|
|
43
|
+
path: full,
|
|
44
|
+
rel: rel,
|
|
45
|
+
isDir: false
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
|
|
51
|
+
async *_scanDirect(files, config, io, cwd, baseIgnore) {
|
|
52
|
+
// If specific paths were requested (e.g. "catport src/ -g"), we must intersect
|
|
53
|
+
// the gitFiles list with those path scopes.
|
|
54
|
+
const roots = (config.paths && config.paths.length)
|
|
55
|
+
? config.paths.map(p => resolve(cwd, p))
|
|
56
|
+
: [cwd];
|
|
57
|
+
|
|
58
|
+
for (const full of files) {
|
|
59
|
+
// relative() returns a path starting with '..' if outside, or an absolute path on different drives (Win32)
|
|
60
|
+
const inScope = roots.some(root => {
|
|
61
|
+
const rel = relative(root, full);
|
|
62
|
+
return !rel.startsWith('..') && !isAbsolute(rel);
|
|
63
|
+
});
|
|
64
|
+
if (!inScope) {
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Git reports deleted files, we must skip them.
|
|
69
|
+
try {
|
|
70
|
+
const stats = await io.stat(full);
|
|
71
|
+
if (!stats.isFile()) {
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
} catch {
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const fileRoot = roots.find(root => {
|
|
79
|
+
const rel = relative(root, full);
|
|
80
|
+
return !rel.startsWith('..') && !isAbsolute(rel);
|
|
81
|
+
}) || cwd;
|
|
82
|
+
|
|
83
|
+
const rel = relative(fileRoot, full).replace(/\\/g, '/');
|
|
84
|
+
|
|
85
|
+
if (baseIgnore.test(rel)) {
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (config.extSet && config.extSet.size > 0) {
|
|
90
|
+
const ext = full.split('.').pop().toLowerCase();
|
|
91
|
+
if (!config.extSet.has(ext)) {
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
yield {
|
|
97
|
+
path: full,
|
|
98
|
+
rel: rel,
|
|
99
|
+
isDir: false
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
},
|
|
103
|
+
|
|
104
|
+
async *_walk(dir, ignore, config, io, root, cwd, visited) {
|
|
105
|
+
// Cycle detection
|
|
106
|
+
try {
|
|
107
|
+
const stats = await io.stat(dir);
|
|
108
|
+
if (stats.dev !== undefined && stats.ino !== undefined) {
|
|
109
|
+
const key = `${stats.dev}:${stats.ino}`;
|
|
110
|
+
if (visited.has(key)) {
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
visited.add(key);
|
|
114
|
+
}
|
|
115
|
+
} catch {
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
let currentIgnore = ignore;
|
|
120
|
+
|
|
121
|
+
if (!config.noIgnore) {
|
|
122
|
+
try {
|
|
123
|
+
const gitignore = await io.readText(join(dir, '.gitignore'));
|
|
124
|
+
const scopedPatterns = Ignore.parse(gitignore, dir, cwd);
|
|
125
|
+
currentIgnore = ignore.extend(scopedPatterns);
|
|
126
|
+
} catch {
|
|
127
|
+
// No .gitignore found, proceed
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
let entries = [];
|
|
132
|
+
try {
|
|
133
|
+
entries = await io.readdir(dir);
|
|
134
|
+
} catch {
|
|
135
|
+
return;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
for (const e of entries) {
|
|
139
|
+
const full = join(dir, e.name);
|
|
140
|
+
const rel = relative(root, full).replace(/\\/g, '/');
|
|
141
|
+
|
|
142
|
+
if (e.name === '.git') {
|
|
143
|
+
continue;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (currentIgnore.test(rel)) {
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
let isDir = e.isDirectory();
|
|
151
|
+
if (e.isSymbolicLink()) {
|
|
152
|
+
try {
|
|
153
|
+
const stats = await io.stat(full);
|
|
154
|
+
isDir = stats.isDirectory();
|
|
155
|
+
} catch {
|
|
156
|
+
// Broken link or permission error
|
|
157
|
+
isDir = false;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (isDir) {
|
|
162
|
+
yield {
|
|
163
|
+
path: full,
|
|
164
|
+
rel: rel,
|
|
165
|
+
isDir: true
|
|
166
|
+
};
|
|
167
|
+
yield* Scanner._walk(full, currentIgnore, config, io, root, cwd, visited);
|
|
168
|
+
} else {
|
|
169
|
+
if (config.extSet && config.extSet.size > 0) {
|
|
170
|
+
const ext = e.name.split('.').pop().toLowerCase();
|
|
171
|
+
if (!config.extSet.has(ext)) {
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
yield {
|
|
177
|
+
path: full,
|
|
178
|
+
rel: rel,
|
|
179
|
+
isDir: false
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
};
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { Markdown } from './markdown.js';
|
|
2
|
+
import { Xml } from './xml.js';
|
|
3
|
+
import { Json } from './json.js';
|
|
4
|
+
import { Yaml } from './yaml.js';
|
|
5
|
+
import { Multipart } from './multipart.js';
|
|
6
|
+
import { FORMAT } from '../config/constants.js';
|
|
7
|
+
|
|
8
|
+
const REGISTRY = {
|
|
9
|
+
[FORMAT.MD]: Markdown,
|
|
10
|
+
[FORMAT.XML]: Xml,
|
|
11
|
+
[FORMAT.JSON]: Json,
|
|
12
|
+
[FORMAT.YAML]: Yaml,
|
|
13
|
+
[FORMAT.MULTIPART]: Multipart
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
export const Formatter = {
|
|
17
|
+
register: (key, impl) => {
|
|
18
|
+
REGISTRY[key] = impl;
|
|
19
|
+
},
|
|
20
|
+
get: (type) => {
|
|
21
|
+
return REGISTRY[type] || Markdown;
|
|
22
|
+
},
|
|
23
|
+
detect: (content) => {
|
|
24
|
+
const t = content.trim();
|
|
25
|
+
|
|
26
|
+
if (t.startsWith('{')) {
|
|
27
|
+
return Json;
|
|
28
|
+
}
|
|
29
|
+
if (t.startsWith('<')) {
|
|
30
|
+
return Xml;
|
|
31
|
+
}
|
|
32
|
+
if (t.startsWith('meta:')) {
|
|
33
|
+
return Yaml;
|
|
34
|
+
}
|
|
35
|
+
if (t.startsWith('MIME-Version: 1.0')) {
|
|
36
|
+
return Multipart;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const codeBlockMatch = t.match(/```(\w*)\n([\s\S]*?)```/);
|
|
40
|
+
if (codeBlockMatch) {
|
|
41
|
+
const lang = codeBlockMatch[1].toLowerCase();
|
|
42
|
+
const body = codeBlockMatch[2].trim();
|
|
43
|
+
|
|
44
|
+
if (lang === 'json') {
|
|
45
|
+
return Json;
|
|
46
|
+
}
|
|
47
|
+
if (lang === 'xml') {
|
|
48
|
+
return Xml;
|
|
49
|
+
}
|
|
50
|
+
if (lang === 'yaml' || lang === 'yml') {
|
|
51
|
+
return Yaml;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (body.startsWith('{')) {
|
|
55
|
+
return Json;
|
|
56
|
+
}
|
|
57
|
+
if (body.startsWith('<')) {
|
|
58
|
+
return Xml;
|
|
59
|
+
}
|
|
60
|
+
if (body.startsWith('meta:')) {
|
|
61
|
+
return Yaml;
|
|
62
|
+
}
|
|
63
|
+
if (body.startsWith('files:')) {
|
|
64
|
+
return Yaml;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const sample = t.slice(0, 1024);
|
|
69
|
+
if (sample.includes('<?xml') || sample.includes('<project name=')) {
|
|
70
|
+
return Xml;
|
|
71
|
+
}
|
|
72
|
+
if (sample.includes('"files": [')) {
|
|
73
|
+
return Json;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return Markdown;
|
|
77
|
+
}
|
|
78
|
+
};
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
const ESCAPE_MAP = Object.freeze({
|
|
2
|
+
'"': '\\"', '\\': '\\\\', '\b': '\\b', '\f': '\\f',
|
|
3
|
+
'\n': '\\n', '\r': '\\r', '\t': '\\t'
|
|
4
|
+
});
|
|
5
|
+
|
|
6
|
+
const normalizeJsonLike = (() => {
|
|
7
|
+
const R_DQ = /"(?:\\[\s\S]|[^\\"])*"/;
|
|
8
|
+
const R_SQ = /'(?:\\[\s\S]|[^\\'])*'/;
|
|
9
|
+
const R_CMT = /\/\/[^\n]*|\/\*[\s\S]*?\*\//;
|
|
10
|
+
|
|
11
|
+
// We explicitly exclude control chars, quotes, and JSON structure chars from keys.
|
|
12
|
+
const R_KEY_GENERIC = /(\s*)([^\s"':,{}[\]]+)\s*(:)/;
|
|
13
|
+
|
|
14
|
+
// Trailing Comma
|
|
15
|
+
const R_TRL = /(,)\s*([}\]])/;
|
|
16
|
+
|
|
17
|
+
// Order is critical: strings -> comments -> keys -> trailing commas
|
|
18
|
+
const MASTER = new RegExp(
|
|
19
|
+
`(${R_DQ.source})|(${R_SQ.source})|(${R_CMT.source})|` +
|
|
20
|
+
`${R_KEY_GENERIC.source}|${R_TRL.source}`,
|
|
21
|
+
'g'
|
|
22
|
+
);
|
|
23
|
+
|
|
24
|
+
const SQ_ESCAPE_REGEX = /["\b\f\n\r\t]/g;
|
|
25
|
+
const esc = (c) => ESCAPE_MAP[c];
|
|
26
|
+
|
|
27
|
+
return (src) => {
|
|
28
|
+
if (!src || typeof src !== 'string') {
|
|
29
|
+
return src;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return src.replace(MASTER, (match, ...args) => {
|
|
33
|
+
const [
|
|
34
|
+
doubleQuotedString, singleQuotedString, comment,
|
|
35
|
+
keyPre, keyName, keyCol, // R_KEY_GENERIC (3 groups)
|
|
36
|
+
trailingComma, trailingBracket // R_TRL (2 groups)
|
|
37
|
+
] = args;
|
|
38
|
+
|
|
39
|
+
if (doubleQuotedString) {
|
|
40
|
+
return doubleQuotedString;
|
|
41
|
+
}
|
|
42
|
+
if (comment) {
|
|
43
|
+
return '';
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if (singleQuotedString) {
|
|
47
|
+
let content = singleQuotedString.slice(1, -1);
|
|
48
|
+
content = content.replace(/\\'/g, "'");
|
|
49
|
+
content = content.replace(SQ_ESCAPE_REGEX, esc);
|
|
50
|
+
return `"${content}"`;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Quote bare keys
|
|
54
|
+
if (keyName !== undefined) {
|
|
55
|
+
return `${keyPre}"${keyName}"${keyCol}`;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (trailingComma) {
|
|
59
|
+
return trailingBracket;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return match;
|
|
63
|
+
});
|
|
64
|
+
};
|
|
65
|
+
})();
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Extracts balanced JSON-like blocks from a string.
|
|
69
|
+
*/
|
|
70
|
+
const extractBalancedObjects = (text) => {
|
|
71
|
+
const results = [];
|
|
72
|
+
let depth = 0;
|
|
73
|
+
let inString = false;
|
|
74
|
+
let quoteChar = '';
|
|
75
|
+
let start = -1;
|
|
76
|
+
let escape = false;
|
|
77
|
+
|
|
78
|
+
for (let i = 0; i < text.length; i++) {
|
|
79
|
+
const c = text[i];
|
|
80
|
+
|
|
81
|
+
if (inString) {
|
|
82
|
+
if (escape) {
|
|
83
|
+
escape = false;
|
|
84
|
+
} else if (c === '\\') {
|
|
85
|
+
escape = true;
|
|
86
|
+
} else if (c === quoteChar) {
|
|
87
|
+
inString = false;
|
|
88
|
+
}
|
|
89
|
+
} else {
|
|
90
|
+
if (c === '"' || c === "'") {
|
|
91
|
+
inString = true;
|
|
92
|
+
quoteChar = c;
|
|
93
|
+
} else if (c === '{') {
|
|
94
|
+
if (depth === 0) {
|
|
95
|
+
start = i;
|
|
96
|
+
}
|
|
97
|
+
depth++;
|
|
98
|
+
} else if (c === '}') {
|
|
99
|
+
if (depth > 0) {
|
|
100
|
+
depth--;
|
|
101
|
+
if (depth === 0) {
|
|
102
|
+
results.push(text.slice(start, i + 1));
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return results;
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
export const Json = {
|
|
112
|
+
getInstruction: () => `**CRITICAL:** Rules for every file (follow strictly and EXACTLY):
|
|
113
|
+
- Return valid JSON.
|
|
114
|
+
- The root MUST be an object containing a "files" array.
|
|
115
|
+
- Each item in the array MUST have "path" and "content" fields.
|
|
116
|
+
- Properly escape strings (e.g., quotes, newlines).
|
|
117
|
+
|
|
118
|
+
## Expected Schema
|
|
119
|
+
|
|
120
|
+
\`\`\`json
|
|
121
|
+
{
|
|
122
|
+
"type": "object",
|
|
123
|
+
"required": ["files"],
|
|
124
|
+
"properties": {
|
|
125
|
+
"files": {
|
|
126
|
+
"type": "array",
|
|
127
|
+
"items": {
|
|
128
|
+
"type": "object",
|
|
129
|
+
"required": ["path", "content"],
|
|
130
|
+
"properties": {
|
|
131
|
+
"path": { "type": "string", "description": "Relative file path" },
|
|
132
|
+
"content": { "type": "string", "description": "File content" }
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
\`\`\`
|
|
139
|
+
|
|
140
|
+
## Examples
|
|
141
|
+
|
|
142
|
+
## Correct:
|
|
143
|
+
|
|
144
|
+
{
|
|
145
|
+
"files": [
|
|
146
|
+
{
|
|
147
|
+
"path": "src/main.js",
|
|
148
|
+
"content": "console.log(\\"Hello\\");"
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
"path": "config.json",
|
|
152
|
+
"content": "{\\n \\"debug\\": true\\n}"
|
|
153
|
+
}
|
|
154
|
+
]
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
## Wrong:
|
|
158
|
+
|
|
159
|
+
\`\`\`json
|
|
160
|
+
[ { "path": ... } ] ✗ (Root must be object with "files" key)
|
|
161
|
+
\`\`\`
|
|
162
|
+
|
|
163
|
+
{ "path": ... } ✗ (Must be inside "files" array)
|
|
164
|
+
|
|
165
|
+
Only output valid JSON. No explanations, no markdown, no extra text.`,
|
|
166
|
+
|
|
167
|
+
header: (m) => {
|
|
168
|
+
const meta = {
|
|
169
|
+
name: m.name,
|
|
170
|
+
context: m.context
|
|
171
|
+
};
|
|
172
|
+
if (m.tree) {
|
|
173
|
+
meta.tree = m.tree;
|
|
174
|
+
}
|
|
175
|
+
return `{\n "meta": ${JSON.stringify(meta)},\n "files": [\n`;
|
|
176
|
+
},
|
|
177
|
+
|
|
178
|
+
file: (f) => {
|
|
179
|
+
return ` ${JSON.stringify({
|
|
180
|
+
path: f.rel,
|
|
181
|
+
content: f.content
|
|
182
|
+
})}`;
|
|
183
|
+
},
|
|
184
|
+
|
|
185
|
+
footer: (m) => {
|
|
186
|
+
let out = '\n ]';
|
|
187
|
+
if (m.task) {
|
|
188
|
+
out += `,\n "task": ${JSON.stringify(m.task)}`;
|
|
189
|
+
}
|
|
190
|
+
if (m.instructionText) {
|
|
191
|
+
out += `,\n "instruction": ${JSON.stringify(m.instructionText)}`;
|
|
192
|
+
}
|
|
193
|
+
return out + '\n}';
|
|
194
|
+
},
|
|
195
|
+
|
|
196
|
+
parse: (txt, logger) => {
|
|
197
|
+
if (!txt) {
|
|
198
|
+
return [];
|
|
199
|
+
}
|
|
200
|
+
const clean = txt.trim();
|
|
201
|
+
if (!clean) {
|
|
202
|
+
return [];
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const tryParse = (src) => {
|
|
206
|
+
try {
|
|
207
|
+
const obj = JSON.parse(src);
|
|
208
|
+
if (!obj || typeof obj !== 'object' || obj === null) {
|
|
209
|
+
return null;
|
|
210
|
+
}
|
|
211
|
+
if (!Array.isArray(obj.files)) {
|
|
212
|
+
return null;
|
|
213
|
+
}
|
|
214
|
+
const valid = obj.files.every((f) => {
|
|
215
|
+
return f &&
|
|
216
|
+
typeof f === 'object' &&
|
|
217
|
+
typeof f.path === 'string' &&
|
|
218
|
+
f.path.trim() !== '' &&
|
|
219
|
+
Object.hasOwn(f, 'content') &&
|
|
220
|
+
typeof f.content === 'string';
|
|
221
|
+
});
|
|
222
|
+
if (!valid) {
|
|
223
|
+
return null;
|
|
224
|
+
}
|
|
225
|
+
return obj.files;
|
|
226
|
+
} catch {
|
|
227
|
+
return null;
|
|
228
|
+
}
|
|
229
|
+
};
|
|
230
|
+
|
|
231
|
+
const candidates = [];
|
|
232
|
+
|
|
233
|
+
candidates.push(clean);
|
|
234
|
+
{
|
|
235
|
+
const normalized = normalizeJsonLike(clean);
|
|
236
|
+
if (normalized !== clean) {
|
|
237
|
+
candidates.push(normalized);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
const mdBlocks = [...clean.matchAll(/(?:^|\n)[ \t]*```(?:json)?\s*\n?([\s\S]*?)\n?[ \t]*```/g)];
|
|
242
|
+
for (const m of mdBlocks) {
|
|
243
|
+
const block = m[1].trim();
|
|
244
|
+
if (block) {
|
|
245
|
+
candidates.push(block);
|
|
246
|
+
{
|
|
247
|
+
const norm = normalizeJsonLike(block);
|
|
248
|
+
if (norm !== block) {
|
|
249
|
+
candidates.push(norm);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const rawObjects = extractBalancedObjects(clean);
|
|
256
|
+
// Sort by length descending to prefer larger wrapping objects
|
|
257
|
+
rawObjects.sort((a, b) => b.length - a.length);
|
|
258
|
+
|
|
259
|
+
const NOISE_TRESHOLD = 20; // arbitrary min length to avoid noise
|
|
260
|
+
for (const objStr of rawObjects) {
|
|
261
|
+
if (objStr.length > NOISE_TRESHOLD) {
|
|
262
|
+
candidates.push(objStr);
|
|
263
|
+
{
|
|
264
|
+
const norm = normalizeJsonLike(objStr);
|
|
265
|
+
if (norm !== objStr) {
|
|
266
|
+
candidates.push(norm);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
for (const candidate of candidates) {
|
|
273
|
+
const files = tryParse(candidate);
|
|
274
|
+
if (files !== null) {
|
|
275
|
+
return files;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
if (logger) {
|
|
280
|
+
logger.warn('Failed to extract a files array from the JSON');
|
|
281
|
+
}
|
|
282
|
+
return [];
|
|
283
|
+
}
|
|
284
|
+
};
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import { Path } from '../utils/path.js';
|
|
2
|
+
|
|
3
|
+
export const FILE_MARKER = '### ◼◼◼ FILE:';
|
|
4
|
+
export const INSTRUCTION_MARKER = '### ◼◼◼ END OF FILES - INSTRUCTIONS FOLLOW';
|
|
5
|
+
const ESCAPED_MARKER = FILE_MARKER.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
6
|
+
const FILE_MARKER_RE = new RegExp(`^\\s*${ESCAPED_MARKER}\\s*(.+)$`, 'gim');
|
|
7
|
+
|
|
8
|
+
export const Markdown = {
|
|
9
|
+
getInstruction: () => `**CRITICAL:** Rules for every file (follow strictly and EXACTLY):
|
|
10
|
+
- Output only blocks starting with "${FILE_MARKER} <path>" followed by a fenced code block.
|
|
11
|
+
- Code block with language tag must follow immediately. Nothing else.
|
|
12
|
+
- No bold, no "(updated / fixed)", no explanations, no extra text.
|
|
13
|
+
|
|
14
|
+
## Examples
|
|
15
|
+
|
|
16
|
+
## Correct:
|
|
17
|
+
|
|
18
|
+
${FILE_MARKER} src/components/Button.tsx
|
|
19
|
+
\`\`\`tsx
|
|
20
|
+
import React from 'react';
|
|
21
|
+
export const Button = () => <button>Click</button>;
|
|
22
|
+
\`\`\`
|
|
23
|
+
|
|
24
|
+
${FILE_MARKER} utils/helpers.ts
|
|
25
|
+
\`\`\`ts
|
|
26
|
+
export const format = (n: number) => n.toFixed(2);
|
|
27
|
+
\`\`\`
|
|
28
|
+
|
|
29
|
+
## Wrong:
|
|
30
|
+
|
|
31
|
+
\`\`\`
|
|
32
|
+
**src/app.ts** ✗
|
|
33
|
+
File: config.json ✗
|
|
34
|
+
Here is the update: ✗
|
|
35
|
+
\`\`\`
|
|
36
|
+
|
|
37
|
+
Only output "${FILE_MARKER} <path>" followed by a fenced code block. Nothing else.`,
|
|
38
|
+
|
|
39
|
+
header: (m) => {
|
|
40
|
+
let out = `# ${m.name}\n`;
|
|
41
|
+
if (m.context) {
|
|
42
|
+
out += `> **Context**: ${m.context}\n`;
|
|
43
|
+
}
|
|
44
|
+
if (m.tree) {
|
|
45
|
+
out += `\n## Structure\n\`\`\`text\n${m.tree}\n\`\`\`\n\n`;
|
|
46
|
+
}
|
|
47
|
+
return out + '---\n\n';
|
|
48
|
+
},
|
|
49
|
+
|
|
50
|
+
file: (f) => {
|
|
51
|
+
const content = f.content || '';
|
|
52
|
+
const ext = f.rel.split('.').pop() ?? 'txt';
|
|
53
|
+
const existingFences = content.match(/^(`{3,}|~{3,})/gm) ?? [];
|
|
54
|
+
const longest = existingFences.reduce((len, m) => Math.max(len, m.length), 0);
|
|
55
|
+
const fenceLen = Math.max(3, longest + 1);
|
|
56
|
+
const fence = '`'.repeat(fenceLen);
|
|
57
|
+
|
|
58
|
+
return `${FILE_MARKER} ${f.rel}\n${fence}${ext}\n${content}\n${fence}\n\n`;
|
|
59
|
+
},
|
|
60
|
+
|
|
61
|
+
footer: (m) => {
|
|
62
|
+
let out = m.task ? `\n---\n> **Task**: ${m.task}\n` : '';
|
|
63
|
+
if (m.instructionText) {
|
|
64
|
+
out += `\n${INSTRUCTION_MARKER}\n${m.instructionText}\n`;
|
|
65
|
+
}
|
|
66
|
+
return out;
|
|
67
|
+
},
|
|
68
|
+
|
|
69
|
+
parse: (text, logger) => {
|
|
70
|
+
// Find the last occurrence of INSTRUCTION_MARKER to avoid splitting on markers inside file content
|
|
71
|
+
const lastInstructionIdx = text.lastIndexOf(INSTRUCTION_MARKER);
|
|
72
|
+
const processText = lastInstructionIdx !== -1 ? text.slice(0, lastInstructionIdx) : text;
|
|
73
|
+
|
|
74
|
+
const files = [];
|
|
75
|
+
let pos = 0;
|
|
76
|
+
|
|
77
|
+
while (true) {
|
|
78
|
+
FILE_MARKER_RE.lastIndex = pos;
|
|
79
|
+
const markerMatch = FILE_MARKER_RE.exec(processText);
|
|
80
|
+
if (!markerMatch) {
|
|
81
|
+
break;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const rawPath = markerMatch[1].trim()
|
|
85
|
+
.replace(/[:'"`]*$/, '') // trailing : or quotes
|
|
86
|
+
.replace(/\s*\(.*\)$/, '') // (updated), (copy), …
|
|
87
|
+
.replace(/\s*\[.*\]$/, '') // [modified], …
|
|
88
|
+
.trim();
|
|
89
|
+
|
|
90
|
+
const path = Path.clean(rawPath);
|
|
91
|
+
if (!path) {
|
|
92
|
+
pos = markerMatch.index + markerMatch[0].length;
|
|
93
|
+
if (logger) {
|
|
94
|
+
logger.warn(`Skipping invalid path in marker at index ${markerMatch.index}: "${rawPath}"`);
|
|
95
|
+
}
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const afterMarkerPos = markerMatch.index + markerMatch[0].length;
|
|
100
|
+
const rest = processText.slice(afterMarkerPos);
|
|
101
|
+
|
|
102
|
+
const openFenceMatch = rest.match(/^([ \t]*)(`{3,}|~{3,})/m);
|
|
103
|
+
if (!openFenceMatch) {
|
|
104
|
+
pos = afterMarkerPos;
|
|
105
|
+
if (logger) {
|
|
106
|
+
logger.warn(`Skipping file "${path}": Marker found but no fenced code block follows immediately.`);
|
|
107
|
+
}
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const indent = openFenceMatch[1];
|
|
112
|
+
const fencePart = openFenceMatch[2];
|
|
113
|
+
const fenceChar = fencePart[0];
|
|
114
|
+
const fenceLen = fencePart.length;
|
|
115
|
+
const fenceRE = new RegExp(`^${indent}${fenceChar}{${fenceLen},}(?:[ \t]*|[ \t]+\\S.*)$`, 'm');
|
|
116
|
+
|
|
117
|
+
const fenceStartPos = afterMarkerPos + openFenceMatch.index + openFenceMatch[0].length;
|
|
118
|
+
const afterFence = rest.slice(openFenceMatch.index + openFenceMatch[0].length);
|
|
119
|
+
const langLineMatch = afterFence.match(/^(.*\r?\n)/);
|
|
120
|
+
const codeBlockStart = fenceStartPos + (langLineMatch?.[0].length ?? 0);
|
|
121
|
+
|
|
122
|
+
const closeMatch = fenceRE.exec(processText.slice(codeBlockStart));
|
|
123
|
+
if (!closeMatch) {
|
|
124
|
+
if (logger) {
|
|
125
|
+
logger.warn(`Skipping file "${path}": Code block not closed (reached end of input).`);
|
|
126
|
+
}
|
|
127
|
+
// Skip this file and continue with the next one
|
|
128
|
+
pos = afterMarkerPos;
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const closePosAbs = codeBlockStart + closeMatch.index + closeMatch[0].length;
|
|
133
|
+
|
|
134
|
+
let content = processText.slice(codeBlockStart, codeBlockStart + closeMatch.index);
|
|
135
|
+
|
|
136
|
+
if (indent) {
|
|
137
|
+
const indentRE = new RegExp(`^${indent}`, 'gm');
|
|
138
|
+
content = content.replace(indentRE, '');
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
content = content
|
|
142
|
+
.replace(/\r\n/g, '\n')
|
|
143
|
+
.replace(/\r/g, '\n')
|
|
144
|
+
.trimEnd();
|
|
145
|
+
|
|
146
|
+
if (content.length > 0 || processText.slice(codeBlockStart, codeBlockStart + closeMatch.index).trim().length > 0) {
|
|
147
|
+
files.push({ path, content });
|
|
148
|
+
} else {
|
|
149
|
+
if (logger) {
|
|
150
|
+
logger.warn(`Skipping file "${path}": Empty code block extracted.`);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
pos = closePosAbs;
|
|
155
|
+
|
|
156
|
+
const trailing = processText.slice(pos).match(/^\r?\n/);
|
|
157
|
+
if (trailing) {
|
|
158
|
+
pos += trailing[0].length;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return files;
|
|
163
|
+
}
|
|
164
|
+
};
|