@xbrowser/cli 1.6.0 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -2178,10 +2178,45 @@ function createTurndown() {
2178
2178
  },
2179
2179
  replacement: () => ""
2180
2180
  });
2181
+ turndown.addRule("complexTables", {
2182
+ filter: (node) => {
2183
+ if (typeof node !== "object" || node === null) return false;
2184
+ const n = node;
2185
+ return n.nodeName?.toLowerCase() === "table";
2186
+ },
2187
+ replacement: (_content, node) => {
2188
+ const html = node.outerHTML || "";
2189
+ if (!html) return "";
2190
+ const rows = [];
2191
+ const rowRegex = /<tr[^>]*>([\s\S]*?)<\/tr>/gi;
2192
+ let rowMatch;
2193
+ while ((rowMatch = rowRegex.exec(html)) !== null) {
2194
+ const cells = [];
2195
+ const cellRegex = /<t[dh][^>]*>([\s\S]*?)<\/t[dh]>/gi;
2196
+ let cellMatch;
2197
+ while ((cellMatch = cellRegex.exec(rowMatch[1])) !== null) {
2198
+ cells.push(cellMatch[1].replace(/<[^>]+>/g, "").trim().replace(/\n/g, " "));
2199
+ }
2200
+ if (cells.length > 0) rows.push(cells);
2201
+ }
2202
+ if (rows.length === 0) return "";
2203
+ const mdRows = rows.map((cells) => {
2204
+ const escaped = cells.map((c) => c.replace(/\|/g, "\\|") || "");
2205
+ return `| ${escaped.join(" | ")} |`;
2206
+ });
2207
+ const hasHeader = /<th/i.test(html);
2208
+ if (hasHeader && mdRows.length > 0) {
2209
+ const colCount = rows[0].length;
2210
+ const sep = `| ${Array(colCount).fill("---").join(" | ")} |`;
2211
+ mdRows.splice(1, 0, sep);
2212
+ }
2213
+ return "\n\n" + mdRows.join("\n") + "\n\n";
2214
+ }
2215
+ });
2181
2216
  return turndown;
2182
2217
  }
2183
2218
  function postClean(md) {
2184
- md = md.replace(/<(?:table|div|tbody|thead|tr|td|th|span|colgroup|col)\b[^>]*(?:>[\s\S]{200,}?<\/(?:table|div|tbody|thead|tr|td|th|span|colgroup|col)>)/g, "\n[\u26A0\uFE0F HTML block removed \u2014 complex table/layout not converted to Markdown]\n");
2219
+ md = md.replace(/<[^>]+>/g, "");
2185
2220
  md = md.replace(/\n{3,}/g, "\n\n");
2186
2221
  md = md.replace(/!\[[^\]]*\]\(\s*\)/g, "");
2187
2222
  md = md.replace(/\[([^\]]*)\]\(\s*\)/g, "$1");
@@ -12523,7 +12558,7 @@ async function handleEvalMode(argv) {
12523
12558
  }
12524
12559
  async function handleChainInput(input, argv) {
12525
12560
  const cdpEndpoint = argv ? extractCdpFromArgv(argv) : void 0;
12526
- const jsonMode = argv ? argv.includes("--json") || argv.includes("-j") : false;
12561
+ const jsonMode = argv ? argv.some((a) => a === "--json" || a.startsWith("--json=") || a.includes(" --json") || a.startsWith("--json")) || argv.includes("-j") : false;
12527
12562
  const chainResult = await executeChain(input, { cdpEndpoint });
12528
12563
  if (jsonMode) {
12529
12564
  const output = {
@@ -2139,10 +2139,45 @@ function createTurndown() {
2139
2139
  },
2140
2140
  replacement: () => ""
2141
2141
  });
2142
+ turndown.addRule("complexTables", {
2143
+ filter: (node) => {
2144
+ if (typeof node !== "object" || node === null) return false;
2145
+ const n = node;
2146
+ return n.nodeName?.toLowerCase() === "table";
2147
+ },
2148
+ replacement: (_content, node) => {
2149
+ const html = node.outerHTML || "";
2150
+ if (!html) return "";
2151
+ const rows = [];
2152
+ const rowRegex = /<tr[^>]*>([\s\S]*?)<\/tr>/gi;
2153
+ let rowMatch;
2154
+ while ((rowMatch = rowRegex.exec(html)) !== null) {
2155
+ const cells = [];
2156
+ const cellRegex = /<t[dh][^>]*>([\s\S]*?)<\/t[dh]>/gi;
2157
+ let cellMatch;
2158
+ while ((cellMatch = cellRegex.exec(rowMatch[1])) !== null) {
2159
+ cells.push(cellMatch[1].replace(/<[^>]+>/g, "").trim().replace(/\n/g, " "));
2160
+ }
2161
+ if (cells.length > 0) rows.push(cells);
2162
+ }
2163
+ if (rows.length === 0) return "";
2164
+ const mdRows = rows.map((cells) => {
2165
+ const escaped = cells.map((c) => c.replace(/\|/g, "\\|") || "");
2166
+ return `| ${escaped.join(" | ")} |`;
2167
+ });
2168
+ const hasHeader = /<th/i.test(html);
2169
+ if (hasHeader && mdRows.length > 0) {
2170
+ const colCount = rows[0].length;
2171
+ const sep = `| ${Array(colCount).fill("---").join(" | ")} |`;
2172
+ mdRows.splice(1, 0, sep);
2173
+ }
2174
+ return "\n\n" + mdRows.join("\n") + "\n\n";
2175
+ }
2176
+ });
2142
2177
  return turndown;
2143
2178
  }
2144
2179
  function postClean(md) {
2145
- md = md.replace(/<(?:table|div|tbody|thead|tr|td|th|span|colgroup|col)\b[^>]*(?:>[\s\S]{200,}?<\/(?:table|div|tbody|thead|tr|td|th|span|colgroup|col)>)/g, "\n[\u26A0\uFE0F HTML block removed \u2014 complex table/layout not converted to Markdown]\n");
2180
+ md = md.replace(/<[^>]+>/g, "");
2146
2181
  md = md.replace(/\n{3,}/g, "\n\n");
2147
2182
  md = md.replace(/!\[[^\]]*\]\(\s*\)/g, "");
2148
2183
  md = md.replace(/\[([^\]]*)\]\(\s*\)/g, "$1");
package/dist/index.js CHANGED
@@ -2218,10 +2218,45 @@ function createTurndown() {
2218
2218
  },
2219
2219
  replacement: () => ""
2220
2220
  });
2221
+ turndown.addRule("complexTables", {
2222
+ filter: (node) => {
2223
+ if (typeof node !== "object" || node === null) return false;
2224
+ const n = node;
2225
+ return n.nodeName?.toLowerCase() === "table";
2226
+ },
2227
+ replacement: (_content, node) => {
2228
+ const html = node.outerHTML || "";
2229
+ if (!html) return "";
2230
+ const rows = [];
2231
+ const rowRegex = /<tr[^>]*>([\s\S]*?)<\/tr>/gi;
2232
+ let rowMatch;
2233
+ while ((rowMatch = rowRegex.exec(html)) !== null) {
2234
+ const cells = [];
2235
+ const cellRegex = /<t[dh][^>]*>([\s\S]*?)<\/t[dh]>/gi;
2236
+ let cellMatch;
2237
+ while ((cellMatch = cellRegex.exec(rowMatch[1])) !== null) {
2238
+ cells.push(cellMatch[1].replace(/<[^>]+>/g, "").trim().replace(/\n/g, " "));
2239
+ }
2240
+ if (cells.length > 0) rows.push(cells);
2241
+ }
2242
+ if (rows.length === 0) return "";
2243
+ const mdRows = rows.map((cells) => {
2244
+ const escaped = cells.map((c) => c.replace(/\|/g, "\\|") || "");
2245
+ return `| ${escaped.join(" | ")} |`;
2246
+ });
2247
+ const hasHeader = /<th/i.test(html);
2248
+ if (hasHeader && mdRows.length > 0) {
2249
+ const colCount = rows[0].length;
2250
+ const sep = `| ${Array(colCount).fill("---").join(" | ")} |`;
2251
+ mdRows.splice(1, 0, sep);
2252
+ }
2253
+ return "\n\n" + mdRows.join("\n") + "\n\n";
2254
+ }
2255
+ });
2221
2256
  return turndown;
2222
2257
  }
2223
2258
  function postClean(md) {
2224
- md = md.replace(/<(?:table|div|tbody|thead|tr|td|th|span|colgroup|col)\b[^>]*(?:>[\s\S]{200,}?<\/(?:table|div|tbody|thead|tr|td|th|span|colgroup|col)>)/g, "\n[\u26A0\uFE0F HTML block removed \u2014 complex table/layout not converted to Markdown]\n");
2259
+ md = md.replace(/<[^>]+>/g, "");
2225
2260
  md = md.replace(/\n{3,}/g, "\n\n");
2226
2261
  md = md.replace(/!\[[^\]]*\]\(\s*\)/g, "");
2227
2262
  md = md.replace(/\[([^\]]*)\]\(\s*\)/g, "$1");
@@ -12846,7 +12881,7 @@ async function handleEvalMode(argv) {
12846
12881
  }
12847
12882
  async function handleChainInput(input, argv) {
12848
12883
  const cdpEndpoint = argv ? extractCdpFromArgv(argv) : void 0;
12849
- const jsonMode = argv ? argv.includes("--json") || argv.includes("-j") : false;
12884
+ const jsonMode = argv ? argv.some((a) => a === "--json" || a.startsWith("--json=") || a.includes(" --json") || a.startsWith("--json")) || argv.includes("-j") : false;
12850
12885
  const chainResult = await executeChain(input, { cdpEndpoint });
12851
12886
  if (jsonMode) {
12852
12887
  const output = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@xbrowser/cli",
3
- "version": "1.6.0",
3
+ "version": "1.6.2",
4
4
  "description": "Browser automation CLI for web scraping, headless browsing, SEO analysis, and AI agent workflows. A command-line alternative to Playwright, Puppeteer, and Selenium.",
5
5
  "type": "module",
6
6
  "bin": {