opensteer 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -6
- package/dist/{chunk-SXPIGCSD.js → chunk-XIH3WGPY.js} +99 -32
- package/dist/cli/server.cjs +100 -33
- package/dist/cli/server.js +2 -2
- package/dist/index.cjs +99 -32
- package/dist/index.js +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -134,12 +134,6 @@ Install the Opensteer skill pack:
|
|
|
134
134
|
opensteer skills install
|
|
135
135
|
```
|
|
136
136
|
|
|
137
|
-
Fallback (direct upstream `skills` CLI):
|
|
138
|
-
|
|
139
|
-
```bash
|
|
140
|
-
npx skills add https://github.com/steerlabs/opensteer-skills --skill opensteer
|
|
141
|
-
```
|
|
142
|
-
|
|
143
137
|
Claude Code marketplace plugin:
|
|
144
138
|
|
|
145
139
|
```text
|
|
@@ -2999,8 +2999,56 @@ function applyCleaner(mode, html) {
|
|
|
2999
2999
|
return cleanForAction(html);
|
|
3000
3000
|
}
|
|
3001
3001
|
}
|
|
3002
|
+
function canonicalizeDuplicateNodeIds($) {
|
|
3003
|
+
const occurrencesByNodeId = /* @__PURE__ */ new Map();
|
|
3004
|
+
let order = 0;
|
|
3005
|
+
$("*").each(function() {
|
|
3006
|
+
const element = this;
|
|
3007
|
+
const nodeId = $(element).attr(OS_NODE_ID_ATTR);
|
|
3008
|
+
if (!nodeId) {
|
|
3009
|
+
order += 1;
|
|
3010
|
+
return;
|
|
3011
|
+
}
|
|
3012
|
+
const list = occurrencesByNodeId.get(nodeId) || [];
|
|
3013
|
+
list.push({
|
|
3014
|
+
element,
|
|
3015
|
+
order
|
|
3016
|
+
});
|
|
3017
|
+
occurrencesByNodeId.set(nodeId, list);
|
|
3018
|
+
order += 1;
|
|
3019
|
+
});
|
|
3020
|
+
for (const occurrences of occurrencesByNodeId.values()) {
|
|
3021
|
+
if (occurrences.length <= 1) continue;
|
|
3022
|
+
const canonical = pickCanonicalNodeIdOccurrence($, occurrences);
|
|
3023
|
+
for (const occurrence of occurrences) {
|
|
3024
|
+
if (occurrence.element === canonical.element) continue;
|
|
3025
|
+
$(occurrence.element).removeAttr(OS_NODE_ID_ATTR);
|
|
3026
|
+
}
|
|
3027
|
+
}
|
|
3028
|
+
}
|
|
3029
|
+
function pickCanonicalNodeIdOccurrence($, occurrences) {
|
|
3030
|
+
let best = occurrences[0];
|
|
3031
|
+
let bestScore = scoreNodeIdOccurrence($, best.element);
|
|
3032
|
+
for (let i = 1; i < occurrences.length; i += 1) {
|
|
3033
|
+
const candidate = occurrences[i];
|
|
3034
|
+
const candidateScore = scoreNodeIdOccurrence($, candidate.element);
|
|
3035
|
+
if (candidateScore > bestScore || candidateScore === bestScore && candidate.order < best.order) {
|
|
3036
|
+
best = candidate;
|
|
3037
|
+
bestScore = candidateScore;
|
|
3038
|
+
}
|
|
3039
|
+
}
|
|
3040
|
+
return best;
|
|
3041
|
+
}
|
|
3042
|
+
function scoreNodeIdOccurrence($, element) {
|
|
3043
|
+
const el = $(element);
|
|
3044
|
+
const descendantCount = el.find("*").length;
|
|
3045
|
+
const normalizedTextLength = el.text().replace(/\s+/g, " ").trim().length;
|
|
3046
|
+
const attributeCount = Object.keys(el.attr() || {}).length;
|
|
3047
|
+
return descendantCount * 100 + normalizedTextLength * 10 + attributeCount;
|
|
3048
|
+
}
|
|
3002
3049
|
async function assignCounters(page, html, nodePaths, nodeMeta) {
|
|
3003
3050
|
const $ = cheerio3.load(html, { xmlMode: false });
|
|
3051
|
+
canonicalizeDuplicateNodeIds($);
|
|
3004
3052
|
const counterIndex = /* @__PURE__ */ new Map();
|
|
3005
3053
|
let nextCounter = 1;
|
|
3006
3054
|
const assignedByNodeId = /* @__PURE__ */ new Map();
|
|
@@ -3182,44 +3230,63 @@ function stripNodeIds(html) {
|
|
|
3182
3230
|
$(`[${OS_NODE_ID_ATTR}]`).removeAttr(OS_NODE_ID_ATTR);
|
|
3183
3231
|
return $.html();
|
|
3184
3232
|
}
|
|
3233
|
+
function isLiveCounterSyncFailure(error) {
|
|
3234
|
+
if (!(error instanceof Error)) return false;
|
|
3235
|
+
return error.message.startsWith(
|
|
3236
|
+
"Failed to synchronize snapshot counters with the live DOM:"
|
|
3237
|
+
);
|
|
3238
|
+
}
|
|
3185
3239
|
async function prepareSnapshot(page, options = {}) {
|
|
3186
3240
|
const mode = options.mode ?? "action";
|
|
3187
3241
|
const withCounters = options.withCounters ?? true;
|
|
3188
3242
|
const shouldMarkInteractive = options.markInteractive ?? true;
|
|
3189
|
-
|
|
3190
|
-
|
|
3191
|
-
|
|
3192
|
-
|
|
3193
|
-
|
|
3194
|
-
|
|
3195
|
-
|
|
3196
|
-
|
|
3197
|
-
|
|
3198
|
-
|
|
3199
|
-
|
|
3200
|
-
|
|
3243
|
+
const maxAttempts = withCounters ? 4 : 1;
|
|
3244
|
+
let lastCounterSyncError = null;
|
|
3245
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
|
3246
|
+
if (shouldMarkInteractive) {
|
|
3247
|
+
await markInteractiveElements(page);
|
|
3248
|
+
}
|
|
3249
|
+
const serialized = await serializePageHTML(page);
|
|
3250
|
+
const rawHtml = serialized.html;
|
|
3251
|
+
const processedHtml = rawHtml;
|
|
3252
|
+
const reducedHtml = applyCleaner(mode, processedHtml);
|
|
3253
|
+
let cleanedHtml = reducedHtml;
|
|
3254
|
+
let counterIndex = null;
|
|
3255
|
+
if (withCounters) {
|
|
3256
|
+
try {
|
|
3257
|
+
const counted = await assignCounters(
|
|
3258
|
+
page,
|
|
3259
|
+
reducedHtml,
|
|
3260
|
+
serialized.nodePaths,
|
|
3261
|
+
serialized.nodeMeta
|
|
3262
|
+
);
|
|
3263
|
+
cleanedHtml = counted.html;
|
|
3264
|
+
counterIndex = counted.counterIndex;
|
|
3265
|
+
} catch (error) {
|
|
3266
|
+
if (attempt < maxAttempts && isLiveCounterSyncFailure(error)) {
|
|
3267
|
+
lastCounterSyncError = error;
|
|
3268
|
+
continue;
|
|
3269
|
+
}
|
|
3270
|
+
throw error;
|
|
3271
|
+
}
|
|
3272
|
+
} else {
|
|
3273
|
+
cleanedHtml = stripNodeIds(cleanedHtml);
|
|
3274
|
+
}
|
|
3275
|
+
if (mode === "extraction") {
|
|
3276
|
+
const $unwrap = cheerio3.load(cleanedHtml, { xmlMode: false });
|
|
3277
|
+
cleanedHtml = $unwrap("body").html()?.trim() || cleanedHtml;
|
|
3278
|
+
}
|
|
3279
|
+
return {
|
|
3280
|
+
mode,
|
|
3281
|
+
url: page.url(),
|
|
3282
|
+
rawHtml,
|
|
3283
|
+
processedHtml,
|
|
3201
3284
|
reducedHtml,
|
|
3202
|
-
|
|
3203
|
-
|
|
3204
|
-
|
|
3205
|
-
cleanedHtml = counted.html;
|
|
3206
|
-
counterIndex = counted.counterIndex;
|
|
3207
|
-
} else {
|
|
3208
|
-
cleanedHtml = stripNodeIds(cleanedHtml);
|
|
3209
|
-
}
|
|
3210
|
-
if (mode === "extraction") {
|
|
3211
|
-
const $unwrap = cheerio3.load(cleanedHtml, { xmlMode: false });
|
|
3212
|
-
cleanedHtml = $unwrap("body").html()?.trim() || cleanedHtml;
|
|
3285
|
+
cleanedHtml,
|
|
3286
|
+
counterIndex
|
|
3287
|
+
};
|
|
3213
3288
|
}
|
|
3214
|
-
|
|
3215
|
-
mode,
|
|
3216
|
-
url: page.url(),
|
|
3217
|
-
rawHtml,
|
|
3218
|
-
processedHtml,
|
|
3219
|
-
reducedHtml,
|
|
3220
|
-
cleanedHtml,
|
|
3221
|
-
counterIndex
|
|
3222
|
-
};
|
|
3289
|
+
throw lastCounterSyncError || new Error("Failed to prepare snapshot after retrying counter sync.");
|
|
3223
3290
|
}
|
|
3224
3291
|
|
|
3225
3292
|
// src/element-path/errors.ts
|
package/dist/cli/server.cjs
CHANGED
|
@@ -4219,8 +4219,56 @@ function applyCleaner(mode, html) {
|
|
|
4219
4219
|
return cleanForAction(html);
|
|
4220
4220
|
}
|
|
4221
4221
|
}
|
|
4222
|
+
function canonicalizeDuplicateNodeIds($) {
|
|
4223
|
+
const occurrencesByNodeId = /* @__PURE__ */ new Map();
|
|
4224
|
+
let order = 0;
|
|
4225
|
+
$("*").each(function() {
|
|
4226
|
+
const element = this;
|
|
4227
|
+
const nodeId = $(element).attr(OS_NODE_ID_ATTR);
|
|
4228
|
+
if (!nodeId) {
|
|
4229
|
+
order += 1;
|
|
4230
|
+
return;
|
|
4231
|
+
}
|
|
4232
|
+
const list = occurrencesByNodeId.get(nodeId) || [];
|
|
4233
|
+
list.push({
|
|
4234
|
+
element,
|
|
4235
|
+
order
|
|
4236
|
+
});
|
|
4237
|
+
occurrencesByNodeId.set(nodeId, list);
|
|
4238
|
+
order += 1;
|
|
4239
|
+
});
|
|
4240
|
+
for (const occurrences of occurrencesByNodeId.values()) {
|
|
4241
|
+
if (occurrences.length <= 1) continue;
|
|
4242
|
+
const canonical = pickCanonicalNodeIdOccurrence($, occurrences);
|
|
4243
|
+
for (const occurrence of occurrences) {
|
|
4244
|
+
if (occurrence.element === canonical.element) continue;
|
|
4245
|
+
$(occurrence.element).removeAttr(OS_NODE_ID_ATTR);
|
|
4246
|
+
}
|
|
4247
|
+
}
|
|
4248
|
+
}
|
|
4249
|
+
function pickCanonicalNodeIdOccurrence($, occurrences) {
|
|
4250
|
+
let best = occurrences[0];
|
|
4251
|
+
let bestScore = scoreNodeIdOccurrence($, best.element);
|
|
4252
|
+
for (let i = 1; i < occurrences.length; i += 1) {
|
|
4253
|
+
const candidate = occurrences[i];
|
|
4254
|
+
const candidateScore = scoreNodeIdOccurrence($, candidate.element);
|
|
4255
|
+
if (candidateScore > bestScore || candidateScore === bestScore && candidate.order < best.order) {
|
|
4256
|
+
best = candidate;
|
|
4257
|
+
bestScore = candidateScore;
|
|
4258
|
+
}
|
|
4259
|
+
}
|
|
4260
|
+
return best;
|
|
4261
|
+
}
|
|
4262
|
+
function scoreNodeIdOccurrence($, element) {
|
|
4263
|
+
const el = $(element);
|
|
4264
|
+
const descendantCount = el.find("*").length;
|
|
4265
|
+
const normalizedTextLength = el.text().replace(/\s+/g, " ").trim().length;
|
|
4266
|
+
const attributeCount = Object.keys(el.attr() || {}).length;
|
|
4267
|
+
return descendantCount * 100 + normalizedTextLength * 10 + attributeCount;
|
|
4268
|
+
}
|
|
4222
4269
|
async function assignCounters(page, html, nodePaths, nodeMeta) {
|
|
4223
4270
|
const $ = cheerio3.load(html, { xmlMode: false });
|
|
4271
|
+
canonicalizeDuplicateNodeIds($);
|
|
4224
4272
|
const counterIndex = /* @__PURE__ */ new Map();
|
|
4225
4273
|
let nextCounter = 1;
|
|
4226
4274
|
const assignedByNodeId = /* @__PURE__ */ new Map();
|
|
@@ -4402,44 +4450,63 @@ function stripNodeIds(html) {
|
|
|
4402
4450
|
$(`[${OS_NODE_ID_ATTR}]`).removeAttr(OS_NODE_ID_ATTR);
|
|
4403
4451
|
return $.html();
|
|
4404
4452
|
}
|
|
4453
|
+
function isLiveCounterSyncFailure(error) {
|
|
4454
|
+
if (!(error instanceof Error)) return false;
|
|
4455
|
+
return error.message.startsWith(
|
|
4456
|
+
"Failed to synchronize snapshot counters with the live DOM:"
|
|
4457
|
+
);
|
|
4458
|
+
}
|
|
4405
4459
|
async function prepareSnapshot(page, options = {}) {
|
|
4406
4460
|
const mode = options.mode ?? "action";
|
|
4407
4461
|
const withCounters = options.withCounters ?? true;
|
|
4408
4462
|
const shouldMarkInteractive = options.markInteractive ?? true;
|
|
4409
|
-
|
|
4410
|
-
|
|
4411
|
-
|
|
4412
|
-
|
|
4413
|
-
|
|
4414
|
-
|
|
4415
|
-
|
|
4416
|
-
|
|
4417
|
-
|
|
4418
|
-
|
|
4419
|
-
|
|
4420
|
-
|
|
4463
|
+
const maxAttempts = withCounters ? 4 : 1;
|
|
4464
|
+
let lastCounterSyncError = null;
|
|
4465
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
|
4466
|
+
if (shouldMarkInteractive) {
|
|
4467
|
+
await markInteractiveElements(page);
|
|
4468
|
+
}
|
|
4469
|
+
const serialized = await serializePageHTML(page);
|
|
4470
|
+
const rawHtml = serialized.html;
|
|
4471
|
+
const processedHtml = rawHtml;
|
|
4472
|
+
const reducedHtml = applyCleaner(mode, processedHtml);
|
|
4473
|
+
let cleanedHtml = reducedHtml;
|
|
4474
|
+
let counterIndex = null;
|
|
4475
|
+
if (withCounters) {
|
|
4476
|
+
try {
|
|
4477
|
+
const counted = await assignCounters(
|
|
4478
|
+
page,
|
|
4479
|
+
reducedHtml,
|
|
4480
|
+
serialized.nodePaths,
|
|
4481
|
+
serialized.nodeMeta
|
|
4482
|
+
);
|
|
4483
|
+
cleanedHtml = counted.html;
|
|
4484
|
+
counterIndex = counted.counterIndex;
|
|
4485
|
+
} catch (error) {
|
|
4486
|
+
if (attempt < maxAttempts && isLiveCounterSyncFailure(error)) {
|
|
4487
|
+
lastCounterSyncError = error;
|
|
4488
|
+
continue;
|
|
4489
|
+
}
|
|
4490
|
+
throw error;
|
|
4491
|
+
}
|
|
4492
|
+
} else {
|
|
4493
|
+
cleanedHtml = stripNodeIds(cleanedHtml);
|
|
4494
|
+
}
|
|
4495
|
+
if (mode === "extraction") {
|
|
4496
|
+
const $unwrap = cheerio3.load(cleanedHtml, { xmlMode: false });
|
|
4497
|
+
cleanedHtml = $unwrap("body").html()?.trim() || cleanedHtml;
|
|
4498
|
+
}
|
|
4499
|
+
return {
|
|
4500
|
+
mode,
|
|
4501
|
+
url: page.url(),
|
|
4502
|
+
rawHtml,
|
|
4503
|
+
processedHtml,
|
|
4421
4504
|
reducedHtml,
|
|
4422
|
-
|
|
4423
|
-
|
|
4424
|
-
|
|
4425
|
-
cleanedHtml = counted.html;
|
|
4426
|
-
counterIndex = counted.counterIndex;
|
|
4427
|
-
} else {
|
|
4428
|
-
cleanedHtml = stripNodeIds(cleanedHtml);
|
|
4429
|
-
}
|
|
4430
|
-
if (mode === "extraction") {
|
|
4431
|
-
const $unwrap = cheerio3.load(cleanedHtml, { xmlMode: false });
|
|
4432
|
-
cleanedHtml = $unwrap("body").html()?.trim() || cleanedHtml;
|
|
4505
|
+
cleanedHtml,
|
|
4506
|
+
counterIndex
|
|
4507
|
+
};
|
|
4433
4508
|
}
|
|
4434
|
-
|
|
4435
|
-
mode,
|
|
4436
|
-
url: page.url(),
|
|
4437
|
-
rawHtml,
|
|
4438
|
-
processedHtml,
|
|
4439
|
-
reducedHtml,
|
|
4440
|
-
cleanedHtml,
|
|
4441
|
-
counterIndex
|
|
4442
|
-
};
|
|
4509
|
+
throw lastCounterSyncError || new Error("Failed to prepare snapshot after retrying counter sync.");
|
|
4443
4510
|
}
|
|
4444
4511
|
|
|
4445
4512
|
// src/element-path/errors.ts
|
|
@@ -12943,7 +13010,7 @@ async function handleRequest(request, socket) {
|
|
|
12943
13010
|
await launchPromise;
|
|
12944
13011
|
}
|
|
12945
13012
|
if (url) {
|
|
12946
|
-
await instance.
|
|
13013
|
+
await instance.goto(url);
|
|
12947
13014
|
}
|
|
12948
13015
|
sendResponse(socket, {
|
|
12949
13016
|
id,
|
package/dist/cli/server.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
2
|
Opensteer,
|
|
3
3
|
normalizeError
|
|
4
|
-
} from "../chunk-
|
|
4
|
+
} from "../chunk-XIH3WGPY.js";
|
|
5
5
|
import "../chunk-3H5RRIMZ.js";
|
|
6
6
|
|
|
7
7
|
// src/cli/server.ts
|
|
@@ -425,7 +425,7 @@ async function handleRequest(request, socket) {
|
|
|
425
425
|
await launchPromise;
|
|
426
426
|
}
|
|
427
427
|
if (url) {
|
|
428
|
-
await instance.
|
|
428
|
+
await instance.goto(url);
|
|
429
429
|
}
|
|
430
430
|
sendResponse(socket, {
|
|
431
431
|
id,
|
package/dist/index.cjs
CHANGED
|
@@ -4303,8 +4303,56 @@ function applyCleaner(mode, html) {
|
|
|
4303
4303
|
return cleanForAction(html);
|
|
4304
4304
|
}
|
|
4305
4305
|
}
|
|
4306
|
+
function canonicalizeDuplicateNodeIds($) {
|
|
4307
|
+
const occurrencesByNodeId = /* @__PURE__ */ new Map();
|
|
4308
|
+
let order = 0;
|
|
4309
|
+
$("*").each(function() {
|
|
4310
|
+
const element = this;
|
|
4311
|
+
const nodeId = $(element).attr(OS_NODE_ID_ATTR);
|
|
4312
|
+
if (!nodeId) {
|
|
4313
|
+
order += 1;
|
|
4314
|
+
return;
|
|
4315
|
+
}
|
|
4316
|
+
const list = occurrencesByNodeId.get(nodeId) || [];
|
|
4317
|
+
list.push({
|
|
4318
|
+
element,
|
|
4319
|
+
order
|
|
4320
|
+
});
|
|
4321
|
+
occurrencesByNodeId.set(nodeId, list);
|
|
4322
|
+
order += 1;
|
|
4323
|
+
});
|
|
4324
|
+
for (const occurrences of occurrencesByNodeId.values()) {
|
|
4325
|
+
if (occurrences.length <= 1) continue;
|
|
4326
|
+
const canonical = pickCanonicalNodeIdOccurrence($, occurrences);
|
|
4327
|
+
for (const occurrence of occurrences) {
|
|
4328
|
+
if (occurrence.element === canonical.element) continue;
|
|
4329
|
+
$(occurrence.element).removeAttr(OS_NODE_ID_ATTR);
|
|
4330
|
+
}
|
|
4331
|
+
}
|
|
4332
|
+
}
|
|
4333
|
+
function pickCanonicalNodeIdOccurrence($, occurrences) {
|
|
4334
|
+
let best = occurrences[0];
|
|
4335
|
+
let bestScore = scoreNodeIdOccurrence($, best.element);
|
|
4336
|
+
for (let i = 1; i < occurrences.length; i += 1) {
|
|
4337
|
+
const candidate = occurrences[i];
|
|
4338
|
+
const candidateScore = scoreNodeIdOccurrence($, candidate.element);
|
|
4339
|
+
if (candidateScore > bestScore || candidateScore === bestScore && candidate.order < best.order) {
|
|
4340
|
+
best = candidate;
|
|
4341
|
+
bestScore = candidateScore;
|
|
4342
|
+
}
|
|
4343
|
+
}
|
|
4344
|
+
return best;
|
|
4345
|
+
}
|
|
4346
|
+
function scoreNodeIdOccurrence($, element) {
|
|
4347
|
+
const el = $(element);
|
|
4348
|
+
const descendantCount = el.find("*").length;
|
|
4349
|
+
const normalizedTextLength = el.text().replace(/\s+/g, " ").trim().length;
|
|
4350
|
+
const attributeCount = Object.keys(el.attr() || {}).length;
|
|
4351
|
+
return descendantCount * 100 + normalizedTextLength * 10 + attributeCount;
|
|
4352
|
+
}
|
|
4306
4353
|
async function assignCounters(page, html, nodePaths, nodeMeta) {
|
|
4307
4354
|
const $ = cheerio3.load(html, { xmlMode: false });
|
|
4355
|
+
canonicalizeDuplicateNodeIds($);
|
|
4308
4356
|
const counterIndex = /* @__PURE__ */ new Map();
|
|
4309
4357
|
let nextCounter = 1;
|
|
4310
4358
|
const assignedByNodeId = /* @__PURE__ */ new Map();
|
|
@@ -4486,44 +4534,63 @@ function stripNodeIds(html) {
|
|
|
4486
4534
|
$(`[${OS_NODE_ID_ATTR}]`).removeAttr(OS_NODE_ID_ATTR);
|
|
4487
4535
|
return $.html();
|
|
4488
4536
|
}
|
|
4537
|
+
function isLiveCounterSyncFailure(error) {
|
|
4538
|
+
if (!(error instanceof Error)) return false;
|
|
4539
|
+
return error.message.startsWith(
|
|
4540
|
+
"Failed to synchronize snapshot counters with the live DOM:"
|
|
4541
|
+
);
|
|
4542
|
+
}
|
|
4489
4543
|
async function prepareSnapshot(page, options = {}) {
|
|
4490
4544
|
const mode = options.mode ?? "action";
|
|
4491
4545
|
const withCounters = options.withCounters ?? true;
|
|
4492
4546
|
const shouldMarkInteractive = options.markInteractive ?? true;
|
|
4493
|
-
|
|
4494
|
-
|
|
4495
|
-
|
|
4496
|
-
|
|
4497
|
-
|
|
4498
|
-
|
|
4499
|
-
|
|
4500
|
-
|
|
4501
|
-
|
|
4502
|
-
|
|
4503
|
-
|
|
4504
|
-
|
|
4547
|
+
const maxAttempts = withCounters ? 4 : 1;
|
|
4548
|
+
let lastCounterSyncError = null;
|
|
4549
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
|
4550
|
+
if (shouldMarkInteractive) {
|
|
4551
|
+
await markInteractiveElements(page);
|
|
4552
|
+
}
|
|
4553
|
+
const serialized = await serializePageHTML(page);
|
|
4554
|
+
const rawHtml = serialized.html;
|
|
4555
|
+
const processedHtml = rawHtml;
|
|
4556
|
+
const reducedHtml = applyCleaner(mode, processedHtml);
|
|
4557
|
+
let cleanedHtml = reducedHtml;
|
|
4558
|
+
let counterIndex = null;
|
|
4559
|
+
if (withCounters) {
|
|
4560
|
+
try {
|
|
4561
|
+
const counted = await assignCounters(
|
|
4562
|
+
page,
|
|
4563
|
+
reducedHtml,
|
|
4564
|
+
serialized.nodePaths,
|
|
4565
|
+
serialized.nodeMeta
|
|
4566
|
+
);
|
|
4567
|
+
cleanedHtml = counted.html;
|
|
4568
|
+
counterIndex = counted.counterIndex;
|
|
4569
|
+
} catch (error) {
|
|
4570
|
+
if (attempt < maxAttempts && isLiveCounterSyncFailure(error)) {
|
|
4571
|
+
lastCounterSyncError = error;
|
|
4572
|
+
continue;
|
|
4573
|
+
}
|
|
4574
|
+
throw error;
|
|
4575
|
+
}
|
|
4576
|
+
} else {
|
|
4577
|
+
cleanedHtml = stripNodeIds(cleanedHtml);
|
|
4578
|
+
}
|
|
4579
|
+
if (mode === "extraction") {
|
|
4580
|
+
const $unwrap = cheerio3.load(cleanedHtml, { xmlMode: false });
|
|
4581
|
+
cleanedHtml = $unwrap("body").html()?.trim() || cleanedHtml;
|
|
4582
|
+
}
|
|
4583
|
+
return {
|
|
4584
|
+
mode,
|
|
4585
|
+
url: page.url(),
|
|
4586
|
+
rawHtml,
|
|
4587
|
+
processedHtml,
|
|
4505
4588
|
reducedHtml,
|
|
4506
|
-
|
|
4507
|
-
|
|
4508
|
-
|
|
4509
|
-
cleanedHtml = counted.html;
|
|
4510
|
-
counterIndex = counted.counterIndex;
|
|
4511
|
-
} else {
|
|
4512
|
-
cleanedHtml = stripNodeIds(cleanedHtml);
|
|
4513
|
-
}
|
|
4514
|
-
if (mode === "extraction") {
|
|
4515
|
-
const $unwrap = cheerio3.load(cleanedHtml, { xmlMode: false });
|
|
4516
|
-
cleanedHtml = $unwrap("body").html()?.trim() || cleanedHtml;
|
|
4589
|
+
cleanedHtml,
|
|
4590
|
+
counterIndex
|
|
4591
|
+
};
|
|
4517
4592
|
}
|
|
4518
|
-
|
|
4519
|
-
mode,
|
|
4520
|
-
url: page.url(),
|
|
4521
|
-
rawHtml,
|
|
4522
|
-
processedHtml,
|
|
4523
|
-
reducedHtml,
|
|
4524
|
-
cleanedHtml,
|
|
4525
|
-
counterIndex
|
|
4526
|
-
};
|
|
4593
|
+
throw lastCounterSyncError || new Error("Failed to prepare snapshot after retrying counter sync.");
|
|
4527
4594
|
}
|
|
4528
4595
|
|
|
4529
4596
|
// src/element-path/errors.ts
|
package/dist/index.js
CHANGED