@xbrowser/cli 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -2358,44 +2358,45 @@ var scrapeCommand = registerCommand({
2358
2358
  switch (p.format) {
2359
2359
  case "markdown": {
2360
2360
  const tablesMd = await page.evaluate(() => {
2361
- const tableSelectors = [
2362
- "table",
2363
- '[role="table"]',
2364
- '[role="grid"]',
2365
- '[class*="el-table"]',
2366
- // Element UI
2367
- '[class*="ant-table"]',
2368
- // Ant Design
2369
- '[class*="MuiTable"]',
2370
- // Material UI
2371
- '[class*="table"]'
2372
- // Generic table-like
2373
- ].join(",");
2374
- const tables = document.querySelectorAll(tableSelectors);
2375
- if (tables.length === 0) return "";
2361
+ document.querySelectorAll(
2362
+ '.el-table__fixed, .el-table__fixed-right, [class*="fixed-left"], [class*="fixed-right"], .ant-table-fixed-left, .ant-table-fixed-right'
2363
+ ).forEach((el) => el.remove());
2364
+ document.querySelectorAll("table").forEach((t) => {
2365
+ if (t.closest(".el-table__fixed, .el-table__fixed-right")) t.remove();
2366
+ });
2367
+ const tables = document.querySelectorAll("table");
2368
+ if (tables.length === 0) {
2369
+ const altTables = document.querySelectorAll(
2370
+ '[role="table"], [role="grid"], .el-table__body, .ant-table-tbody'
2371
+ );
2372
+ if (altTables.length === 0) return "";
2373
+ return Array.from(altTables).map((table) => {
2374
+ return extractRowsFromContainer(table);
2375
+ }).filter((md) => md).join("\n\n");
2376
+ }
2376
2377
  return Array.from(tables).map((table) => {
2377
- const rows = table.querySelectorAll('tr, [role="row"], [class*="row"]');
2378
+ return extractRowsFromContainer(table);
2379
+ }).filter((md) => md).join("\n\n");
2380
+ function extractRowsFromContainer(container) {
2381
+ const rows = container.querySelectorAll(':scope > tr, :scope > thead > tr, :scope > tbody > tr, :scope > tfoot > tr, [role="row"]');
2378
2382
  if (rows.length === 0) return "";
2379
2383
  const mdRows = Array.from(rows).map((row) => {
2380
- const cells = row.querySelectorAll('th, td, [role="columnheader"], [role="cell"], [class*="cell"], [class*="col"]');
2384
+ const cells = row.querySelectorAll(':scope > th, :scope > td, :scope > [role="columnheader"], :scope > [role="cell"]');
2385
+ if (cells.length === 0) return "";
2381
2386
  return "| " + Array.from(cells).map((c) => {
2382
2387
  const cellText = c.innerText?.trim().replace(/\n/g, " ") || "";
2383
2388
  return cellText.replace(/\|/g, "\\|") || "";
2384
2389
  }).join(" | ") + " |";
2385
- }).join("\n");
2390
+ }).filter((r) => r);
2391
+ if (mdRows.length === 0) return "";
2386
2392
  const headerRow = rows[0];
2387
- const headerCells = headerRow.querySelectorAll('th, [role="columnheader"], [class*="header"]');
2388
- const hasHeader = headerCells.length > 0;
2389
- if (hasHeader && mdRows) {
2390
- const headerCount = headerCells.length;
2391
- const sep = "| " + Array(headerCount).fill("---").join(" | ") + " |";
2392
- return mdRows.split("\n").map((line, i) => {
2393
- if (i === 0) return line + "\n" + sep;
2394
- return line;
2395
- }).join("\n");
2393
+ const headerCells = headerRow.querySelectorAll(':scope > th, :scope > [role="columnheader"]');
2394
+ if (headerCells.length > 0) {
2395
+ const sep = "| " + Array(headerCells.length).fill("---").join(" | ") + " |";
2396
+ return mdRows[0] + "\n" + sep + "\n" + mdRows.slice(1).join("\n");
2396
2397
  }
2397
- return mdRows;
2398
- }).join("\n\n");
2398
+ return mdRows.join("\n");
2399
+ }
2399
2400
  });
2400
2401
  content = htmlToMarkdown(html, { onlyMainContent: p.onlyMainContent });
2401
2402
  if (tablesMd) {
@@ -2316,44 +2316,45 @@ var scrapeCommand = registerCommand({
2316
2316
  switch (p.format) {
2317
2317
  case "markdown": {
2318
2318
  const tablesMd = await page.evaluate(() => {
2319
- const tableSelectors = [
2320
- "table",
2321
- '[role="table"]',
2322
- '[role="grid"]',
2323
- '[class*="el-table"]',
2324
- // Element UI
2325
- '[class*="ant-table"]',
2326
- // Ant Design
2327
- '[class*="MuiTable"]',
2328
- // Material UI
2329
- '[class*="table"]'
2330
- // Generic table-like
2331
- ].join(",");
2332
- const tables = document.querySelectorAll(tableSelectors);
2333
- if (tables.length === 0) return "";
2319
+ document.querySelectorAll(
2320
+ '.el-table__fixed, .el-table__fixed-right, [class*="fixed-left"], [class*="fixed-right"], .ant-table-fixed-left, .ant-table-fixed-right'
2321
+ ).forEach((el) => el.remove());
2322
+ document.querySelectorAll("table").forEach((t) => {
2323
+ if (t.closest(".el-table__fixed, .el-table__fixed-right")) t.remove();
2324
+ });
2325
+ const tables = document.querySelectorAll("table");
2326
+ if (tables.length === 0) {
2327
+ const altTables = document.querySelectorAll(
2328
+ '[role="table"], [role="grid"], .el-table__body, .ant-table-tbody'
2329
+ );
2330
+ if (altTables.length === 0) return "";
2331
+ return Array.from(altTables).map((table) => {
2332
+ return extractRowsFromContainer(table);
2333
+ }).filter((md) => md).join("\n\n");
2334
+ }
2334
2335
  return Array.from(tables).map((table) => {
2335
- const rows = table.querySelectorAll('tr, [role="row"], [class*="row"]');
2336
+ return extractRowsFromContainer(table);
2337
+ }).filter((md) => md).join("\n\n");
2338
+ function extractRowsFromContainer(container) {
2339
+ const rows = container.querySelectorAll(':scope > tr, :scope > thead > tr, :scope > tbody > tr, :scope > tfoot > tr, [role="row"]');
2336
2340
  if (rows.length === 0) return "";
2337
2341
  const mdRows = Array.from(rows).map((row) => {
2338
- const cells = row.querySelectorAll('th, td, [role="columnheader"], [role="cell"], [class*="cell"], [class*="col"]');
2342
+ const cells = row.querySelectorAll(':scope > th, :scope > td, :scope > [role="columnheader"], :scope > [role="cell"]');
2343
+ if (cells.length === 0) return "";
2339
2344
  return "| " + Array.from(cells).map((c) => {
2340
2345
  const cellText = c.innerText?.trim().replace(/\n/g, " ") || "";
2341
2346
  return cellText.replace(/\|/g, "\\|") || "";
2342
2347
  }).join(" | ") + " |";
2343
- }).join("\n");
2348
+ }).filter((r) => r);
2349
+ if (mdRows.length === 0) return "";
2344
2350
  const headerRow = rows[0];
2345
- const headerCells = headerRow.querySelectorAll('th, [role="columnheader"], [class*="header"]');
2346
- const hasHeader = headerCells.length > 0;
2347
- if (hasHeader && mdRows) {
2348
- const headerCount = headerCells.length;
2349
- const sep = "| " + Array(headerCount).fill("---").join(" | ") + " |";
2350
- return mdRows.split("\n").map((line, i) => {
2351
- if (i === 0) return line + "\n" + sep;
2352
- return line;
2353
- }).join("\n");
2351
+ const headerCells = headerRow.querySelectorAll(':scope > th, :scope > [role="columnheader"]');
2352
+ if (headerCells.length > 0) {
2353
+ const sep = "| " + Array(headerCells.length).fill("---").join(" | ") + " |";
2354
+ return mdRows[0] + "\n" + sep + "\n" + mdRows.slice(1).join("\n");
2354
2355
  }
2355
- return mdRows;
2356
- }).join("\n\n");
2356
+ return mdRows.join("\n");
2357
+ }
2357
2358
  });
2358
2359
  content = htmlToMarkdown(html, { onlyMainContent: p.onlyMainContent });
2359
2360
  if (tablesMd) {
package/dist/index.js CHANGED
@@ -2398,44 +2398,45 @@ var scrapeCommand = registerCommand({
2398
2398
  switch (p.format) {
2399
2399
  case "markdown": {
2400
2400
  const tablesMd = await page.evaluate(() => {
2401
- const tableSelectors = [
2402
- "table",
2403
- '[role="table"]',
2404
- '[role="grid"]',
2405
- '[class*="el-table"]',
2406
- // Element UI
2407
- '[class*="ant-table"]',
2408
- // Ant Design
2409
- '[class*="MuiTable"]',
2410
- // Material UI
2411
- '[class*="table"]'
2412
- // Generic table-like
2413
- ].join(",");
2414
- const tables = document.querySelectorAll(tableSelectors);
2415
- if (tables.length === 0) return "";
2401
+ document.querySelectorAll(
2402
+ '.el-table__fixed, .el-table__fixed-right, [class*="fixed-left"], [class*="fixed-right"], .ant-table-fixed-left, .ant-table-fixed-right'
2403
+ ).forEach((el) => el.remove());
2404
+ document.querySelectorAll("table").forEach((t) => {
2405
+ if (t.closest(".el-table__fixed, .el-table__fixed-right")) t.remove();
2406
+ });
2407
+ const tables = document.querySelectorAll("table");
2408
+ if (tables.length === 0) {
2409
+ const altTables = document.querySelectorAll(
2410
+ '[role="table"], [role="grid"], .el-table__body, .ant-table-tbody'
2411
+ );
2412
+ if (altTables.length === 0) return "";
2413
+ return Array.from(altTables).map((table) => {
2414
+ return extractRowsFromContainer(table);
2415
+ }).filter((md) => md).join("\n\n");
2416
+ }
2416
2417
  return Array.from(tables).map((table) => {
2417
- const rows = table.querySelectorAll('tr, [role="row"], [class*="row"]');
2418
+ return extractRowsFromContainer(table);
2419
+ }).filter((md) => md).join("\n\n");
2420
+ function extractRowsFromContainer(container) {
2421
+ const rows = container.querySelectorAll(':scope > tr, :scope > thead > tr, :scope > tbody > tr, :scope > tfoot > tr, [role="row"]');
2418
2422
  if (rows.length === 0) return "";
2419
2423
  const mdRows = Array.from(rows).map((row) => {
2420
- const cells = row.querySelectorAll('th, td, [role="columnheader"], [role="cell"], [class*="cell"], [class*="col"]');
2424
+ const cells = row.querySelectorAll(':scope > th, :scope > td, :scope > [role="columnheader"], :scope > [role="cell"]');
2425
+ if (cells.length === 0) return "";
2421
2426
  return "| " + Array.from(cells).map((c) => {
2422
2427
  const cellText = c.innerText?.trim().replace(/\n/g, " ") || "";
2423
2428
  return cellText.replace(/\|/g, "\\|") || "";
2424
2429
  }).join(" | ") + " |";
2425
- }).join("\n");
2430
+ }).filter((r) => r);
2431
+ if (mdRows.length === 0) return "";
2426
2432
  const headerRow = rows[0];
2427
- const headerCells = headerRow.querySelectorAll('th, [role="columnheader"], [class*="header"]');
2428
- const hasHeader = headerCells.length > 0;
2429
- if (hasHeader && mdRows) {
2430
- const headerCount = headerCells.length;
2431
- const sep = "| " + Array(headerCount).fill("---").join(" | ") + " |";
2432
- return mdRows.split("\n").map((line, i) => {
2433
- if (i === 0) return line + "\n" + sep;
2434
- return line;
2435
- }).join("\n");
2433
+ const headerCells = headerRow.querySelectorAll(':scope > th, :scope > [role="columnheader"]');
2434
+ if (headerCells.length > 0) {
2435
+ const sep = "| " + Array(headerCells.length).fill("---").join(" | ") + " |";
2436
+ return mdRows[0] + "\n" + sep + "\n" + mdRows.slice(1).join("\n");
2436
2437
  }
2437
- return mdRows;
2438
- }).join("\n\n");
2438
+ return mdRows.join("\n");
2439
+ }
2439
2440
  });
2440
2441
  content = htmlToMarkdown(html, { onlyMainContent: p.onlyMainContent });
2441
2442
  if (tablesMd) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@xbrowser/cli",
3
- "version": "1.2.0",
3
+ "version": "1.2.1",
4
4
  "description": "Browser automation CLI for web scraping, headless browsing, SEO analysis, and AI agent workflows. A command-line alternative to Playwright, Puppeteer, and Selenium.",
5
5
  "type": "module",
6
6
  "bin": {