@xbrowser/cli 1.6.0 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +37 -2
- package/dist/daemon-main.js +36 -1
- package/dist/index.js +37 -2
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -2178,10 +2178,45 @@ function createTurndown() {
|
|
|
2178
2178
|
},
|
|
2179
2179
|
replacement: () => ""
|
|
2180
2180
|
});
|
|
2181
|
+
turndown.addRule("complexTables", {
|
|
2182
|
+
filter: (node) => {
|
|
2183
|
+
if (typeof node !== "object" || node === null) return false;
|
|
2184
|
+
const n = node;
|
|
2185
|
+
return n.nodeName?.toLowerCase() === "table";
|
|
2186
|
+
},
|
|
2187
|
+
replacement: (_content, node) => {
|
|
2188
|
+
const html = node.outerHTML || "";
|
|
2189
|
+
if (!html) return "";
|
|
2190
|
+
const rows = [];
|
|
2191
|
+
const rowRegex = /<tr[^>]*>([\s\S]*?)<\/tr>/gi;
|
|
2192
|
+
let rowMatch;
|
|
2193
|
+
while ((rowMatch = rowRegex.exec(html)) !== null) {
|
|
2194
|
+
const cells = [];
|
|
2195
|
+
const cellRegex = /<t[dh][^>]*>([\s\S]*?)<\/t[dh]>/gi;
|
|
2196
|
+
let cellMatch;
|
|
2197
|
+
while ((cellMatch = cellRegex.exec(rowMatch[1])) !== null) {
|
|
2198
|
+
cells.push(cellMatch[1].replace(/<[^>]+>/g, "").trim().replace(/\n/g, " "));
|
|
2199
|
+
}
|
|
2200
|
+
if (cells.length > 0) rows.push(cells);
|
|
2201
|
+
}
|
|
2202
|
+
if (rows.length === 0) return "";
|
|
2203
|
+
const mdRows = rows.map((cells) => {
|
|
2204
|
+
const escaped = cells.map((c) => c.replace(/\|/g, "\\|") || "");
|
|
2205
|
+
return `| ${escaped.join(" | ")} |`;
|
|
2206
|
+
});
|
|
2207
|
+
const hasHeader = /<th/i.test(html);
|
|
2208
|
+
if (hasHeader && mdRows.length > 0) {
|
|
2209
|
+
const colCount = rows[0].length;
|
|
2210
|
+
const sep = `| ${Array(colCount).fill("---").join(" | ")} |`;
|
|
2211
|
+
mdRows.splice(1, 0, sep);
|
|
2212
|
+
}
|
|
2213
|
+
return "\n\n" + mdRows.join("\n") + "\n\n";
|
|
2214
|
+
}
|
|
2215
|
+
});
|
|
2181
2216
|
return turndown;
|
|
2182
2217
|
}
|
|
2183
2218
|
function postClean(md) {
|
|
2184
|
-
md = md.replace(/<
|
|
2219
|
+
md = md.replace(/<[^>]+>/g, "");
|
|
2185
2220
|
md = md.replace(/\n{3,}/g, "\n\n");
|
|
2186
2221
|
md = md.replace(/!\[[^\]]*\]\(\s*\)/g, "");
|
|
2187
2222
|
md = md.replace(/\[([^\]]*)\]\(\s*\)/g, "$1");
|
|
@@ -12523,7 +12558,7 @@ async function handleEvalMode(argv) {
|
|
|
12523
12558
|
}
|
|
12524
12559
|
async function handleChainInput(input, argv) {
|
|
12525
12560
|
const cdpEndpoint = argv ? extractCdpFromArgv(argv) : void 0;
|
|
12526
|
-
const jsonMode = argv ? argv.includes("--json") || argv.includes("-j") : false;
|
|
12561
|
+
const jsonMode = argv ? argv.some((a) => a === "--json" || a.startsWith("--json=") || a.includes(" --json") || a.startsWith("--json")) || argv.includes("-j") : false;
|
|
12527
12562
|
const chainResult = await executeChain(input, { cdpEndpoint });
|
|
12528
12563
|
if (jsonMode) {
|
|
12529
12564
|
const output = {
|
package/dist/daemon-main.js
CHANGED
|
@@ -2139,10 +2139,45 @@ function createTurndown() {
|
|
|
2139
2139
|
},
|
|
2140
2140
|
replacement: () => ""
|
|
2141
2141
|
});
|
|
2142
|
+
turndown.addRule("complexTables", {
|
|
2143
|
+
filter: (node) => {
|
|
2144
|
+
if (typeof node !== "object" || node === null) return false;
|
|
2145
|
+
const n = node;
|
|
2146
|
+
return n.nodeName?.toLowerCase() === "table";
|
|
2147
|
+
},
|
|
2148
|
+
replacement: (_content, node) => {
|
|
2149
|
+
const html = node.outerHTML || "";
|
|
2150
|
+
if (!html) return "";
|
|
2151
|
+
const rows = [];
|
|
2152
|
+
const rowRegex = /<tr[^>]*>([\s\S]*?)<\/tr>/gi;
|
|
2153
|
+
let rowMatch;
|
|
2154
|
+
while ((rowMatch = rowRegex.exec(html)) !== null) {
|
|
2155
|
+
const cells = [];
|
|
2156
|
+
const cellRegex = /<t[dh][^>]*>([\s\S]*?)<\/t[dh]>/gi;
|
|
2157
|
+
let cellMatch;
|
|
2158
|
+
while ((cellMatch = cellRegex.exec(rowMatch[1])) !== null) {
|
|
2159
|
+
cells.push(cellMatch[1].replace(/<[^>]+>/g, "").trim().replace(/\n/g, " "));
|
|
2160
|
+
}
|
|
2161
|
+
if (cells.length > 0) rows.push(cells);
|
|
2162
|
+
}
|
|
2163
|
+
if (rows.length === 0) return "";
|
|
2164
|
+
const mdRows = rows.map((cells) => {
|
|
2165
|
+
const escaped = cells.map((c) => c.replace(/\|/g, "\\|") || "");
|
|
2166
|
+
return `| ${escaped.join(" | ")} |`;
|
|
2167
|
+
});
|
|
2168
|
+
const hasHeader = /<th/i.test(html);
|
|
2169
|
+
if (hasHeader && mdRows.length > 0) {
|
|
2170
|
+
const colCount = rows[0].length;
|
|
2171
|
+
const sep = `| ${Array(colCount).fill("---").join(" | ")} |`;
|
|
2172
|
+
mdRows.splice(1, 0, sep);
|
|
2173
|
+
}
|
|
2174
|
+
return "\n\n" + mdRows.join("\n") + "\n\n";
|
|
2175
|
+
}
|
|
2176
|
+
});
|
|
2142
2177
|
return turndown;
|
|
2143
2178
|
}
|
|
2144
2179
|
function postClean(md) {
|
|
2145
|
-
md = md.replace(/<
|
|
2180
|
+
md = md.replace(/<[^>]+>/g, "");
|
|
2146
2181
|
md = md.replace(/\n{3,}/g, "\n\n");
|
|
2147
2182
|
md = md.replace(/!\[[^\]]*\]\(\s*\)/g, "");
|
|
2148
2183
|
md = md.replace(/\[([^\]]*)\]\(\s*\)/g, "$1");
|
package/dist/index.js
CHANGED
|
@@ -2218,10 +2218,45 @@ function createTurndown() {
|
|
|
2218
2218
|
},
|
|
2219
2219
|
replacement: () => ""
|
|
2220
2220
|
});
|
|
2221
|
+
turndown.addRule("complexTables", {
|
|
2222
|
+
filter: (node) => {
|
|
2223
|
+
if (typeof node !== "object" || node === null) return false;
|
|
2224
|
+
const n = node;
|
|
2225
|
+
return n.nodeName?.toLowerCase() === "table";
|
|
2226
|
+
},
|
|
2227
|
+
replacement: (_content, node) => {
|
|
2228
|
+
const html = node.outerHTML || "";
|
|
2229
|
+
if (!html) return "";
|
|
2230
|
+
const rows = [];
|
|
2231
|
+
const rowRegex = /<tr[^>]*>([\s\S]*?)<\/tr>/gi;
|
|
2232
|
+
let rowMatch;
|
|
2233
|
+
while ((rowMatch = rowRegex.exec(html)) !== null) {
|
|
2234
|
+
const cells = [];
|
|
2235
|
+
const cellRegex = /<t[dh][^>]*>([\s\S]*?)<\/t[dh]>/gi;
|
|
2236
|
+
let cellMatch;
|
|
2237
|
+
while ((cellMatch = cellRegex.exec(rowMatch[1])) !== null) {
|
|
2238
|
+
cells.push(cellMatch[1].replace(/<[^>]+>/g, "").trim().replace(/\n/g, " "));
|
|
2239
|
+
}
|
|
2240
|
+
if (cells.length > 0) rows.push(cells);
|
|
2241
|
+
}
|
|
2242
|
+
if (rows.length === 0) return "";
|
|
2243
|
+
const mdRows = rows.map((cells) => {
|
|
2244
|
+
const escaped = cells.map((c) => c.replace(/\|/g, "\\|") || "");
|
|
2245
|
+
return `| ${escaped.join(" | ")} |`;
|
|
2246
|
+
});
|
|
2247
|
+
const hasHeader = /<th/i.test(html);
|
|
2248
|
+
if (hasHeader && mdRows.length > 0) {
|
|
2249
|
+
const colCount = rows[0].length;
|
|
2250
|
+
const sep = `| ${Array(colCount).fill("---").join(" | ")} |`;
|
|
2251
|
+
mdRows.splice(1, 0, sep);
|
|
2252
|
+
}
|
|
2253
|
+
return "\n\n" + mdRows.join("\n") + "\n\n";
|
|
2254
|
+
}
|
|
2255
|
+
});
|
|
2221
2256
|
return turndown;
|
|
2222
2257
|
}
|
|
2223
2258
|
function postClean(md) {
|
|
2224
|
-
md = md.replace(/<
|
|
2259
|
+
md = md.replace(/<[^>]+>/g, "");
|
|
2225
2260
|
md = md.replace(/\n{3,}/g, "\n\n");
|
|
2226
2261
|
md = md.replace(/!\[[^\]]*\]\(\s*\)/g, "");
|
|
2227
2262
|
md = md.replace(/\[([^\]]*)\]\(\s*\)/g, "$1");
|
|
@@ -12846,7 +12881,7 @@ async function handleEvalMode(argv) {
|
|
|
12846
12881
|
}
|
|
12847
12882
|
async function handleChainInput(input, argv) {
|
|
12848
12883
|
const cdpEndpoint = argv ? extractCdpFromArgv(argv) : void 0;
|
|
12849
|
-
const jsonMode = argv ? argv.includes("--json") || argv.includes("-j") : false;
|
|
12884
|
+
const jsonMode = argv ? argv.some((a) => a === "--json" || a.startsWith("--json=") || a.includes(" --json") || a.startsWith("--json")) || argv.includes("-j") : false;
|
|
12850
12885
|
const chainResult = await executeChain(input, { cdpEndpoint });
|
|
12851
12886
|
if (jsonMode) {
|
|
12852
12887
|
const output = {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@xbrowser/cli",
|
|
3
|
-
"version": "1.6.
|
|
3
|
+
"version": "1.6.2",
|
|
4
4
|
"description": "Browser automation CLI for web scraping, headless browsing, SEO analysis, and AI agent workflows. A command-line alternative to Playwright, Puppeteer, and Selenium.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|