@xbrowser/cli 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +30 -29
- package/dist/daemon-main.js +30 -29
- package/dist/index.js +30 -29
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -2358,44 +2358,45 @@ var scrapeCommand = registerCommand({
|
|
|
2358
2358
|
switch (p.format) {
|
|
2359
2359
|
case "markdown": {
|
|
2360
2360
|
const tablesMd = await page.evaluate(() => {
|
|
2361
|
-
|
|
2362
|
-
"
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
|
|
2371
|
-
|
|
2372
|
-
|
|
2373
|
-
|
|
2374
|
-
|
|
2375
|
-
|
|
2361
|
+
document.querySelectorAll(
|
|
2362
|
+
'.el-table__fixed, .el-table__fixed-right, [class*="fixed-left"], [class*="fixed-right"], .ant-table-fixed-left, .ant-table-fixed-right'
|
|
2363
|
+
).forEach((el) => el.remove());
|
|
2364
|
+
document.querySelectorAll("table").forEach((t) => {
|
|
2365
|
+
if (t.closest(".el-table__fixed, .el-table__fixed-right")) t.remove();
|
|
2366
|
+
});
|
|
2367
|
+
const tables = document.querySelectorAll("table");
|
|
2368
|
+
if (tables.length === 0) {
|
|
2369
|
+
const altTables = document.querySelectorAll(
|
|
2370
|
+
'[role="table"], [role="grid"], .el-table__body, .ant-table-tbody'
|
|
2371
|
+
);
|
|
2372
|
+
if (altTables.length === 0) return "";
|
|
2373
|
+
return Array.from(altTables).map((table) => {
|
|
2374
|
+
return extractRowsFromContainer(table);
|
|
2375
|
+
}).filter((md) => md).join("\n\n");
|
|
2376
|
+
}
|
|
2376
2377
|
return Array.from(tables).map((table) => {
|
|
2377
|
-
|
|
2378
|
+
return extractRowsFromContainer(table);
|
|
2379
|
+
}).filter((md) => md).join("\n\n");
|
|
2380
|
+
function extractRowsFromContainer(container) {
|
|
2381
|
+
const rows = container.querySelectorAll(':scope > tr, :scope > thead > tr, :scope > tbody > tr, :scope > tfoot > tr, [role="row"]');
|
|
2378
2382
|
if (rows.length === 0) return "";
|
|
2379
2383
|
const mdRows = Array.from(rows).map((row) => {
|
|
2380
|
-
const cells = row.querySelectorAll('th, td, [role="columnheader"], [role="cell"]
|
|
2384
|
+
const cells = row.querySelectorAll(':scope > th, :scope > td, :scope > [role="columnheader"], :scope > [role="cell"]');
|
|
2385
|
+
if (cells.length === 0) return "";
|
|
2381
2386
|
return "| " + Array.from(cells).map((c) => {
|
|
2382
2387
|
const cellText = c.innerText?.trim().replace(/\n/g, " ") || "";
|
|
2383
2388
|
return cellText.replace(/\|/g, "\\|") || "";
|
|
2384
2389
|
}).join(" | ") + " |";
|
|
2385
|
-
}).
|
|
2390
|
+
}).filter((r) => r);
|
|
2391
|
+
if (mdRows.length === 0) return "";
|
|
2386
2392
|
const headerRow = rows[0];
|
|
2387
|
-
const headerCells = headerRow.querySelectorAll('th, [role="columnheader"]
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
const sep = "| " + Array(headerCount).fill("---").join(" | ") + " |";
|
|
2392
|
-
return mdRows.split("\n").map((line, i) => {
|
|
2393
|
-
if (i === 0) return line + "\n" + sep;
|
|
2394
|
-
return line;
|
|
2395
|
-
}).join("\n");
|
|
2393
|
+
const headerCells = headerRow.querySelectorAll(':scope > th, :scope > [role="columnheader"]');
|
|
2394
|
+
if (headerCells.length > 0) {
|
|
2395
|
+
const sep = "| " + Array(headerCells.length).fill("---").join(" | ") + " |";
|
|
2396
|
+
return mdRows[0] + "\n" + sep + "\n" + mdRows.slice(1).join("\n");
|
|
2396
2397
|
}
|
|
2397
|
-
return mdRows;
|
|
2398
|
-
}
|
|
2398
|
+
return mdRows.join("\n");
|
|
2399
|
+
}
|
|
2399
2400
|
});
|
|
2400
2401
|
content = htmlToMarkdown(html, { onlyMainContent: p.onlyMainContent });
|
|
2401
2402
|
if (tablesMd) {
|
package/dist/daemon-main.js
CHANGED
|
@@ -2316,44 +2316,45 @@ var scrapeCommand = registerCommand({
|
|
|
2316
2316
|
switch (p.format) {
|
|
2317
2317
|
case "markdown": {
|
|
2318
2318
|
const tablesMd = await page.evaluate(() => {
|
|
2319
|
-
|
|
2320
|
-
"
|
|
2321
|
-
|
|
2322
|
-
|
|
2323
|
-
|
|
2324
|
-
|
|
2325
|
-
|
|
2326
|
-
|
|
2327
|
-
|
|
2328
|
-
|
|
2329
|
-
|
|
2330
|
-
|
|
2331
|
-
|
|
2332
|
-
|
|
2333
|
-
|
|
2319
|
+
document.querySelectorAll(
|
|
2320
|
+
'.el-table__fixed, .el-table__fixed-right, [class*="fixed-left"], [class*="fixed-right"], .ant-table-fixed-left, .ant-table-fixed-right'
|
|
2321
|
+
).forEach((el) => el.remove());
|
|
2322
|
+
document.querySelectorAll("table").forEach((t) => {
|
|
2323
|
+
if (t.closest(".el-table__fixed, .el-table__fixed-right")) t.remove();
|
|
2324
|
+
});
|
|
2325
|
+
const tables = document.querySelectorAll("table");
|
|
2326
|
+
if (tables.length === 0) {
|
|
2327
|
+
const altTables = document.querySelectorAll(
|
|
2328
|
+
'[role="table"], [role="grid"], .el-table__body, .ant-table-tbody'
|
|
2329
|
+
);
|
|
2330
|
+
if (altTables.length === 0) return "";
|
|
2331
|
+
return Array.from(altTables).map((table) => {
|
|
2332
|
+
return extractRowsFromContainer(table);
|
|
2333
|
+
}).filter((md) => md).join("\n\n");
|
|
2334
|
+
}
|
|
2334
2335
|
return Array.from(tables).map((table) => {
|
|
2335
|
-
|
|
2336
|
+
return extractRowsFromContainer(table);
|
|
2337
|
+
}).filter((md) => md).join("\n\n");
|
|
2338
|
+
function extractRowsFromContainer(container) {
|
|
2339
|
+
const rows = container.querySelectorAll(':scope > tr, :scope > thead > tr, :scope > tbody > tr, :scope > tfoot > tr, [role="row"]');
|
|
2336
2340
|
if (rows.length === 0) return "";
|
|
2337
2341
|
const mdRows = Array.from(rows).map((row) => {
|
|
2338
|
-
const cells = row.querySelectorAll('th, td, [role="columnheader"], [role="cell"]
|
|
2342
|
+
const cells = row.querySelectorAll(':scope > th, :scope > td, :scope > [role="columnheader"], :scope > [role="cell"]');
|
|
2343
|
+
if (cells.length === 0) return "";
|
|
2339
2344
|
return "| " + Array.from(cells).map((c) => {
|
|
2340
2345
|
const cellText = c.innerText?.trim().replace(/\n/g, " ") || "";
|
|
2341
2346
|
return cellText.replace(/\|/g, "\\|") || "";
|
|
2342
2347
|
}).join(" | ") + " |";
|
|
2343
|
-
}).
|
|
2348
|
+
}).filter((r) => r);
|
|
2349
|
+
if (mdRows.length === 0) return "";
|
|
2344
2350
|
const headerRow = rows[0];
|
|
2345
|
-
const headerCells = headerRow.querySelectorAll('th, [role="columnheader"]
|
|
2346
|
-
|
|
2347
|
-
|
|
2348
|
-
|
|
2349
|
-
const sep = "| " + Array(headerCount).fill("---").join(" | ") + " |";
|
|
2350
|
-
return mdRows.split("\n").map((line, i) => {
|
|
2351
|
-
if (i === 0) return line + "\n" + sep;
|
|
2352
|
-
return line;
|
|
2353
|
-
}).join("\n");
|
|
2351
|
+
const headerCells = headerRow.querySelectorAll(':scope > th, :scope > [role="columnheader"]');
|
|
2352
|
+
if (headerCells.length > 0) {
|
|
2353
|
+
const sep = "| " + Array(headerCells.length).fill("---").join(" | ") + " |";
|
|
2354
|
+
return mdRows[0] + "\n" + sep + "\n" + mdRows.slice(1).join("\n");
|
|
2354
2355
|
}
|
|
2355
|
-
return mdRows;
|
|
2356
|
-
}
|
|
2356
|
+
return mdRows.join("\n");
|
|
2357
|
+
}
|
|
2357
2358
|
});
|
|
2358
2359
|
content = htmlToMarkdown(html, { onlyMainContent: p.onlyMainContent });
|
|
2359
2360
|
if (tablesMd) {
|
package/dist/index.js
CHANGED
|
@@ -2398,44 +2398,45 @@ var scrapeCommand = registerCommand({
|
|
|
2398
2398
|
switch (p.format) {
|
|
2399
2399
|
case "markdown": {
|
|
2400
2400
|
const tablesMd = await page.evaluate(() => {
|
|
2401
|
-
|
|
2402
|
-
"
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
|
|
2408
|
-
|
|
2409
|
-
|
|
2410
|
-
|
|
2411
|
-
|
|
2412
|
-
|
|
2413
|
-
|
|
2414
|
-
|
|
2415
|
-
|
|
2401
|
+
document.querySelectorAll(
|
|
2402
|
+
'.el-table__fixed, .el-table__fixed-right, [class*="fixed-left"], [class*="fixed-right"], .ant-table-fixed-left, .ant-table-fixed-right'
|
|
2403
|
+
).forEach((el) => el.remove());
|
|
2404
|
+
document.querySelectorAll("table").forEach((t) => {
|
|
2405
|
+
if (t.closest(".el-table__fixed, .el-table__fixed-right")) t.remove();
|
|
2406
|
+
});
|
|
2407
|
+
const tables = document.querySelectorAll("table");
|
|
2408
|
+
if (tables.length === 0) {
|
|
2409
|
+
const altTables = document.querySelectorAll(
|
|
2410
|
+
'[role="table"], [role="grid"], .el-table__body, .ant-table-tbody'
|
|
2411
|
+
);
|
|
2412
|
+
if (altTables.length === 0) return "";
|
|
2413
|
+
return Array.from(altTables).map((table) => {
|
|
2414
|
+
return extractRowsFromContainer(table);
|
|
2415
|
+
}).filter((md) => md).join("\n\n");
|
|
2416
|
+
}
|
|
2416
2417
|
return Array.from(tables).map((table) => {
|
|
2417
|
-
|
|
2418
|
+
return extractRowsFromContainer(table);
|
|
2419
|
+
}).filter((md) => md).join("\n\n");
|
|
2420
|
+
function extractRowsFromContainer(container) {
|
|
2421
|
+
const rows = container.querySelectorAll(':scope > tr, :scope > thead > tr, :scope > tbody > tr, :scope > tfoot > tr, [role="row"]');
|
|
2418
2422
|
if (rows.length === 0) return "";
|
|
2419
2423
|
const mdRows = Array.from(rows).map((row) => {
|
|
2420
|
-
const cells = row.querySelectorAll('th, td, [role="columnheader"], [role="cell"]
|
|
2424
|
+
const cells = row.querySelectorAll(':scope > th, :scope > td, :scope > [role="columnheader"], :scope > [role="cell"]');
|
|
2425
|
+
if (cells.length === 0) return "";
|
|
2421
2426
|
return "| " + Array.from(cells).map((c) => {
|
|
2422
2427
|
const cellText = c.innerText?.trim().replace(/\n/g, " ") || "";
|
|
2423
2428
|
return cellText.replace(/\|/g, "\\|") || "";
|
|
2424
2429
|
}).join(" | ") + " |";
|
|
2425
|
-
}).
|
|
2430
|
+
}).filter((r) => r);
|
|
2431
|
+
if (mdRows.length === 0) return "";
|
|
2426
2432
|
const headerRow = rows[0];
|
|
2427
|
-
const headerCells = headerRow.querySelectorAll('th, [role="columnheader"]
|
|
2428
|
-
|
|
2429
|
-
|
|
2430
|
-
|
|
2431
|
-
const sep = "| " + Array(headerCount).fill("---").join(" | ") + " |";
|
|
2432
|
-
return mdRows.split("\n").map((line, i) => {
|
|
2433
|
-
if (i === 0) return line + "\n" + sep;
|
|
2434
|
-
return line;
|
|
2435
|
-
}).join("\n");
|
|
2433
|
+
const headerCells = headerRow.querySelectorAll(':scope > th, :scope > [role="columnheader"]');
|
|
2434
|
+
if (headerCells.length > 0) {
|
|
2435
|
+
const sep = "| " + Array(headerCells.length).fill("---").join(" | ") + " |";
|
|
2436
|
+
return mdRows[0] + "\n" + sep + "\n" + mdRows.slice(1).join("\n");
|
|
2436
2437
|
}
|
|
2437
|
-
return mdRows;
|
|
2438
|
-
}
|
|
2438
|
+
return mdRows.join("\n");
|
|
2439
|
+
}
|
|
2439
2440
|
});
|
|
2440
2441
|
content = htmlToMarkdown(html, { onlyMainContent: p.onlyMainContent });
|
|
2441
2442
|
if (tablesMd) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@xbrowser/cli",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.1",
|
|
4
4
|
"description": "Browser automation CLI for web scraping, headless browsing, SEO analysis, and AI agent workflows. A command-line alternative to Playwright, Puppeteer, and Selenium.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|