opensteer 0.9.4 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/bin.cjs CHANGED
@@ -6679,7 +6679,7 @@ function assertValidSemanticOperationInput(name, input) {
6679
6679
  }
6680
6680
  );
6681
6681
  }
6682
- var opensteerComputerAnnotationNames, opensteerExposedSemanticOperationNames, opensteerPackageRunnableSemanticOperationNames, snapshotModeSchema, viewportSchema, opensteerBrowserLaunchOptionsSchema, attachBrowserOptionsSchema, opensteerBrowserOptionsSchema, opensteerBrowserContextOptionsSchema, targetByElementSchema2, targetByPersistSchema2, targetBySelectorSchema2, opensteerTargetInputSchema, opensteerResolvedTargetSchema, opensteerActionResultSchema, opensteerSnapshotCounterSchema, opensteerSessionStateSchema, opensteerOpenInputSchema, opensteerPageListInputSchema, opensteerPageListOutputSchema, opensteerPageNewInputSchema, opensteerPageActivateInputSchema, opensteerPageCloseInputSchema, opensteerPageCloseOutputSchema, opensteerPageGotoInputSchema, opensteerPageEvaluateInputSchema, opensteerPageEvaluateOutputSchema, opensteerAddInitScriptInputSchema, opensteerAddInitScriptOutputSchema, opensteerCapturedScriptSchema, opensteerCaptureScriptsInputSchema, opensteerCaptureScriptsOutputSchema, opensteerPageSnapshotInputSchema, opensteerPageSnapshotOutputSchema, opensteerComputerMouseButtonSchema, opensteerComputerKeyModifierSchema, opensteerDomClickInputSchema, opensteerDomHoverInputSchema, opensteerDomInputInputSchema, opensteerDomScrollInputSchema, opensteerExtractSchemaSchema, opensteerDomExtractInputSchema, jsonValueSchema2, opensteerDomExtractOutputSchema, opensteerSessionCloseInputSchema, opensteerSessionCloseOutputSchema, opensteerComputerAnnotationSchema, opensteerComputerClickActionSchema, opensteerComputerMoveActionSchema, opensteerComputerScrollActionSchema, opensteerComputerTypeActionSchema, opensteerComputerKeyActionSchema, opensteerComputerDragActionSchema, opensteerComputerScreenshotActionSchema, opensteerComputerWaitActionSchema, opensteerComputerActionSchema, opensteerComputerScreenshotOptionsSchema, opensteerComputerExecuteInputSchema, opensteerComputerTracePointSchema, opensteerComputerTraceEnrichmentSchema, opensteerComputerExecuteTimingSchema, opensteerComputerDisplayScaleSchema, opensteerComputerExecuteOutputSchema, opensteerSemanticOperationSpecificationsBase, exposedSemanticOperationNameSet, opensteerSemanticOperationSpecificationsInternal, opensteerSemanticOperationSpecifications, opensteerSemanticOperationSpecificationMap, semanticRestBasePath, opensteerSemanticRestEndpoints;
6682
+ var opensteerComputerAnnotationNames, opensteerExposedSemanticOperationNames, opensteerPackageRunnableSemanticOperationNames, snapshotModeSchema, viewportSchema, opensteerBrowserLaunchOptionsSchema, attachBrowserOptionsSchema, opensteerBrowserOptionsSchema, opensteerBrowserContextOptionsSchema, targetByElementSchema2, targetByPersistSchema2, targetBySelectorSchema2, opensteerTargetInputSchema, opensteerResolvedTargetSchema, opensteerActionResultSchema, opensteerSnapshotCounterSchema, opensteerSessionStateSchema, opensteerOpenInputSchema, opensteerPageListInputSchema, opensteerPageListOutputSchema, opensteerPageNewInputSchema, opensteerPageActivateInputSchema, opensteerPageCloseInputSchema, opensteerPageCloseOutputSchema, opensteerPageGotoInputSchema, opensteerPageEvaluateInputSchema, opensteerPageEvaluateOutputSchema, opensteerAddInitScriptInputSchema, opensteerAddInitScriptOutputSchema, opensteerCapturedScriptSchema, opensteerCaptureScriptsInputSchema, opensteerCaptureScriptsOutputSchema, opensteerPageSnapshotInputSchema, opensteerPageSnapshotOutputSchema, opensteerComputerMouseButtonSchema, opensteerComputerKeyModifierSchema, opensteerDomClickInputSchema, opensteerDomHoverInputSchema, opensteerDomInputInputSchema, opensteerDomScrollInputSchema, opensteerExtractTemplateSchema, opensteerDomExtractInputSchema, jsonValueSchema2, opensteerDomExtractOutputSchema, opensteerSessionCloseInputSchema, opensteerSessionCloseOutputSchema, opensteerComputerAnnotationSchema, opensteerComputerClickActionSchema, opensteerComputerMoveActionSchema, opensteerComputerScrollActionSchema, opensteerComputerTypeActionSchema, opensteerComputerKeyActionSchema, opensteerComputerDragActionSchema, opensteerComputerScreenshotActionSchema, opensteerComputerWaitActionSchema, opensteerComputerActionSchema, opensteerComputerScreenshotOptionsSchema, opensteerComputerExecuteInputSchema, opensteerComputerTracePointSchema, opensteerComputerTraceEnrichmentSchema, opensteerComputerExecuteTimingSchema, opensteerComputerDisplayScaleSchema, opensteerComputerExecuteOutputSchema, opensteerSemanticOperationSpecificationsBase, exposedSemanticOperationNameSet, opensteerSemanticOperationSpecificationsInternal, opensteerSemanticOperationSpecifications, opensteerSemanticOperationSpecificationMap, semanticRestBasePath, opensteerSemanticRestEndpoints;
6683
6683
  var init_semantic = __esm({
6684
6684
  "../protocol/src/semantic.ts"() {
6685
6685
  init_json2();
@@ -7261,10 +7261,10 @@ var init_semantic = __esm({
7261
7261
  required: ["target", "direction", "amount"]
7262
7262
  }
7263
7263
  );
7264
- opensteerExtractSchemaSchema = objectSchema(
7264
+ opensteerExtractTemplateSchema = objectSchema(
7265
7265
  {},
7266
7266
  {
7267
- title: "OpensteerExtractSchema",
7267
+ title: "OpensteerExtractTemplate",
7268
7268
  additionalProperties: true
7269
7269
  }
7270
7270
  );
@@ -7272,13 +7272,13 @@ var init_semantic = __esm({
7272
7272
  ...objectSchema(
7273
7273
  {
7274
7274
  persist: stringSchema(),
7275
- schema: opensteerExtractSchemaSchema
7275
+ template: opensteerExtractTemplateSchema
7276
7276
  },
7277
7277
  {
7278
7278
  title: "OpensteerDomExtractInput"
7279
7279
  }
7280
7280
  ),
7281
- anyOf: [defineSchema({ required: ["persist"] }), defineSchema({ required: ["schema"] })]
7281
+ anyOf: [defineSchema({ required: ["persist"] }), defineSchema({ required: ["template"] })]
7282
7282
  });
7283
7283
  jsonValueSchema2 = recordSchema({}, { title: "JsonValueRecord" });
7284
7284
  opensteerDomExtractOutputSchema = objectSchema(
@@ -12220,7 +12220,7 @@ var init_package = __esm({
12220
12220
  "../runtime-core/package.json"() {
12221
12221
  package_default2 = {
12222
12222
  name: "@opensteer/runtime-core",
12223
- version: "0.2.3",
12223
+ version: "0.2.4",
12224
12224
  description: "Shared semantic runtime for Opensteer local and cloud execution.",
12225
12225
  license: "MIT",
12226
12226
  type: "module",
@@ -12468,6 +12468,7 @@ var init_defaults = __esm({
12468
12468
  return false;
12469
12469
  }
12470
12470
  try {
12471
+ const startedAt = Date.now();
12471
12472
  await input.engine.waitForPostLoadQuiet({
12472
12473
  pageRef: input.pageRef,
12473
12474
  timeoutMs: effectiveTimeout,
@@ -12475,9 +12476,13 @@ var init_defaults = __esm({
12475
12476
  captureWindowMs: Math.min(NAVIGATION_POST_LOAD_CAPTURE_WINDOW_MS, effectiveTimeout),
12476
12477
  signal: input.signal
12477
12478
  });
12479
+ const visualTimeout = Math.max(0, effectiveTimeout - (Date.now() - startedAt));
12480
+ if (visualTimeout <= 0) {
12481
+ return true;
12482
+ }
12478
12483
  await input.engine.waitForVisualStability({
12479
12484
  pageRef: input.pageRef,
12480
- timeoutMs: effectiveTimeout,
12485
+ timeoutMs: visualTimeout,
12481
12486
  settleMs: profile.settleMs,
12482
12487
  scope: profile.scope
12483
12488
  });
@@ -17809,9 +17814,9 @@ var init_extraction_consolidation = __esm({
17809
17814
  CLUSTER_FALLBACK_PREFIX = "variant";
17810
17815
  }
17811
17816
  });
17812
- function assertValidOpensteerExtractionSchemaRoot(schema) {
17813
- if (!schema || typeof schema !== "object" || Array.isArray(schema)) {
17814
- throw new Error("Invalid extraction schema: expected a JSON object at the top level.");
17817
+ function assertValidOpensteerExtractionTemplateRoot(template) {
17818
+ if (!template || typeof template !== "object" || Array.isArray(template)) {
17819
+ throw new Error("Invalid extraction template: expected a JSON object at the top level.");
17815
17820
  }
17816
17821
  }
17817
17822
  function isPersistedOpensteerExtractionValueNode2(value) {
@@ -17833,12 +17838,12 @@ function isPersistedOpensteerExtractionArrayNode2(value) {
17833
17838
  return "$array" in value;
17834
17839
  }
17835
17840
  async function compileOpensteerExtractionFieldTargets(options) {
17836
- assertValidOpensteerExtractionSchemaRoot(options.schema);
17841
+ assertValidOpensteerExtractionTemplateRoot(options.template);
17837
17842
  const fields = [];
17838
- await collectFieldTargetsFromSchemaObject({
17843
+ await collectFieldTargetsFromTemplateObject({
17839
17844
  dom: options.dom,
17840
17845
  pageRef: options.pageRef,
17841
- value: options.schema,
17846
+ value: options.template,
17842
17847
  path: "",
17843
17848
  fields,
17844
17849
  insideArray: false
@@ -17890,13 +17895,13 @@ function createOpensteerExtractionDescriptorStore(options) {
17890
17895
  }
17891
17896
  return new MemoryOpensteerExtractionDescriptorStore(namespace);
17892
17897
  }
17893
- async function collectFieldTargetsFromSchemaObject(options) {
17898
+ async function collectFieldTargetsFromTemplateObject(options) {
17894
17899
  for (const [key, childValue] of Object.entries(options.value)) {
17895
17900
  const normalizedKey = normalizeKey(key);
17896
17901
  if (!normalizedKey) {
17897
17902
  continue;
17898
17903
  }
17899
- await collectFieldTargetsFromSchemaValue({
17904
+ await collectFieldTargetsFromTemplateValue({
17900
17905
  dom: options.dom,
17901
17906
  pageRef: options.pageRef,
17902
17907
  value: childValue,
@@ -17906,8 +17911,8 @@ async function collectFieldTargetsFromSchemaObject(options) {
17906
17911
  });
17907
17912
  }
17908
17913
  }
17909
- async function collectFieldTargetsFromSchemaValue(options) {
17910
- const normalizedField = normalizeSchemaField(options.value);
17914
+ async function collectFieldTargetsFromTemplateValue(options) {
17915
+ const normalizedField = normalizeTemplateField(options.value);
17911
17916
  if (normalizedField !== null) {
17912
17917
  options.fields.push(
17913
17918
  await compileFieldTarget({
@@ -17922,12 +17927,12 @@ async function collectFieldTargetsFromSchemaValue(options) {
17922
17927
  if (Array.isArray(options.value)) {
17923
17928
  if (options.insideArray) {
17924
17929
  throw new Error(
17925
- `Nested arrays are not supported in extraction schema at "${labelForPath(options.path)}".`
17930
+ `Nested arrays are not supported in extraction template at "${labelForPath(options.path)}".`
17926
17931
  );
17927
17932
  }
17928
17933
  if (options.value.length === 0) {
17929
17934
  throw new Error(
17930
- `Extraction array "${labelForPath(options.path)}" must include at least one representative item.`
17935
+ `Extraction array "${labelForPath(options.path)}" must include at least one representative template item.`
17931
17936
  );
17932
17937
  }
17933
17938
  for (let index = 0; index < options.value.length; index += 1) {
@@ -17938,7 +17943,7 @@ async function collectFieldTargetsFromSchemaValue(options) {
17938
17943
  );
17939
17944
  }
17940
17945
  const fieldCountBeforeItem = options.fields.length;
17941
- await collectFieldTargetsFromSchemaObject({
17946
+ await collectFieldTargetsFromTemplateObject({
17942
17947
  dom: options.dom,
17943
17948
  pageRef: options.pageRef,
17944
17949
  value: itemValue,
@@ -17949,7 +17954,7 @@ async function collectFieldTargetsFromSchemaValue(options) {
17949
17954
  const itemFields = options.fields.slice(fieldCountBeforeItem);
17950
17955
  if (!itemFields.some((field) => !("source" in field))) {
17951
17956
  throw new Error(
17952
- `Extraction array "${labelForPath(options.path)}" item ${String(index)} must include at least one element- or selector-backed field.`
17957
+ `Extraction array "${labelForPath(options.path)}" item ${String(index)} must include at least one element number or selector field.`
17953
17958
  );
17954
17959
  }
17955
17960
  }
@@ -17957,10 +17962,10 @@ async function collectFieldTargetsFromSchemaValue(options) {
17957
17962
  }
17958
17963
  if (!options.value || typeof options.value !== "object") {
17959
17964
  throw new Error(
17960
- `Invalid extraction schema value at "${labelForPath(options.path)}": expected an object, array, or field descriptor.`
17965
+ `Invalid extraction template value at "${labelForPath(options.path)}": expected an object, array, or field descriptor.`
17961
17966
  );
17962
17967
  }
17963
- await collectFieldTargetsFromSchemaObject({
17968
+ await collectFieldTargetsFromTemplateObject({
17964
17969
  dom: options.dom,
17965
17970
  pageRef: options.pageRef,
17966
17971
  value: options.value,
@@ -17992,7 +17997,7 @@ async function compileFieldTarget(options) {
17992
17997
  path: await resolveSelectorFieldPath({
17993
17998
  dom: options.dom,
17994
17999
  pageRef: options.pageRef,
17995
- selector: `[c="${String(options.field.element)}"]`
18000
+ selector: `[c="${String(options.field.c)}"]`
17996
18001
  }),
17997
18002
  ...options.field.attribute === void 0 ? {} : { attribute: options.field.attribute }
17998
18003
  };
@@ -18293,24 +18298,29 @@ function countNonNullLeaves(value) {
18293
18298
  }
18294
18299
  return Object.values(value).reduce((sum, item) => sum + countNonNullLeaves(item), 0);
18295
18300
  }
18296
- function normalizeSchemaField(value) {
18301
+ function normalizeTemplateField(value) {
18302
+ if (typeof value === "number") {
18303
+ return {
18304
+ c: normalizeExtractionCounter(value)
18305
+ };
18306
+ }
18297
18307
  if (!value || typeof value !== "object" || Array.isArray(value)) {
18298
18308
  return null;
18299
18309
  }
18300
18310
  const raw = value;
18301
- const hasElement = raw.element !== void 0;
18311
+ const hasCounter = raw.c !== void 0 || raw.element !== void 0;
18302
18312
  const hasSelector = raw.selector !== void 0;
18303
18313
  const hasSource = raw.source !== void 0;
18304
- const targetCount = Number(hasElement) + Number(hasSelector) + Number(hasSource);
18314
+ const targetCount = Number(hasCounter) + Number(hasSelector) + Number(hasSource);
18305
18315
  if (targetCount === 0) {
18306
18316
  return null;
18307
18317
  }
18308
18318
  if (targetCount !== 1) {
18309
18319
  throw new Error(
18310
- "Extraction field descriptors must specify exactly one of element, selector, or source."
18320
+ "Extraction field descriptors must specify exactly one of c/element, selector, or source."
18311
18321
  );
18312
18322
  }
18313
- const attribute = raw.attribute === void 0 ? void 0 : normalizeNonEmptyString2("attribute", raw.attribute);
18323
+ const attribute = raw.attr !== void 0 ? normalizeNonEmptyString2("attr", raw.attr) : raw.attribute === void 0 ? void 0 : normalizeNonEmptyString2("attribute", raw.attribute);
18314
18324
  if (hasSource) {
18315
18325
  if (raw.source !== "current_url") {
18316
18326
  throw new Error(`Unsupported extraction source "${String(raw.source)}".`);
@@ -18325,17 +18335,20 @@ function normalizeSchemaField(value) {
18325
18335
  ...attribute === void 0 ? {} : { attribute }
18326
18336
  };
18327
18337
  }
18328
- const element = Number(raw.element);
18329
- if (!Number.isInteger(element) || element < 1) {
18330
- throw new Error(
18331
- `Extraction field element must be a positive integer, received ${String(raw.element)}.`
18332
- );
18333
- }
18334
18338
  return {
18335
- element,
18339
+ c: normalizeExtractionCounter(raw.c ?? raw.element),
18336
18340
  ...attribute === void 0 ? {} : { attribute }
18337
18341
  };
18338
18342
  }
18343
+ function normalizeExtractionCounter(value) {
18344
+ const counter = Number(value);
18345
+ if (!Number.isInteger(counter) || counter < 1) {
18346
+ throw new Error(
18347
+ `Extraction element number must be a positive integer, received ${String(value)}.`
18348
+ );
18349
+ }
18350
+ return counter;
18351
+ }
18339
18352
  function normalizeNamespace(namespace) {
18340
18353
  const normalized = String(namespace ?? "default").trim();
18341
18354
  return normalized.length === 0 ? "default" : normalized;
@@ -18366,7 +18379,7 @@ function parseExtractionDescriptorRecord(record) {
18366
18379
  kind: "dom-extraction",
18367
18380
  persist: raw.persist,
18368
18381
  root,
18369
- ...typeof raw.schemaHash === "string" ? { schemaHash: raw.schemaHash } : {},
18382
+ ...typeof raw.templateHash === "string" ? { templateHash: raw.templateHash } : typeof raw.schemaHash === "string" ? { templateHash: raw.schemaHash } : {},
18370
18383
  ...typeof raw.sourceUrl === "string" ? { sourceUrl: raw.sourceUrl } : {}
18371
18384
  }
18372
18385
  };
@@ -18475,7 +18488,7 @@ var init_extraction2 = __esm({
18475
18488
  kind: "dom-extraction",
18476
18489
  persist: input.persist,
18477
18490
  root: input.root,
18478
- ...input.schemaHash === void 0 ? {} : { schemaHash: input.schemaHash },
18491
+ ...input.templateHash === void 0 ? {} : { templateHash: input.templateHash },
18479
18492
  ...input.sourceUrl === void 0 ? {} : { sourceUrl: input.sourceUrl }
18480
18493
  };
18481
18494
  const key = persistKey(this.namespace, input.persist);
@@ -18524,7 +18537,7 @@ var init_extraction2 = __esm({
18524
18537
  kind: "dom-extraction",
18525
18538
  persist: input.persist,
18526
18539
  root: input.root,
18527
- ...input.schemaHash === void 0 ? {} : { schemaHash: input.schemaHash },
18540
+ ...input.templateHash === void 0 ? {} : { templateHash: input.templateHash },
18528
18541
  ...input.sourceUrl === void 0 ? {} : { sourceUrl: input.sourceUrl }
18529
18542
  };
18530
18543
  const key = persistKey(this.namespace, input.persist);
@@ -18701,19 +18714,260 @@ function truncateValue(value, max) {
18701
18714
  }
18702
18715
  return `${head}${TRUNCATION_SUFFIX}`;
18703
18716
  }
18717
+ function takeValueWithinSerializedLengthFromEnd(value, max) {
18718
+ let serializedLength = 0;
18719
+ const chars = [];
18720
+ for (let index = value.length - 1; index >= 0; index -= 1) {
18721
+ const char = value[index];
18722
+ let nextLength = 1;
18723
+ if (char === "&") {
18724
+ nextLength = 5;
18725
+ } else if (char === "<" || char === ">") {
18726
+ nextLength = 4;
18727
+ } else if (char === '"') {
18728
+ nextLength = 6;
18729
+ }
18730
+ if (serializedLength + nextLength > max) {
18731
+ break;
18732
+ }
18733
+ chars.push(char);
18734
+ serializedLength += nextLength;
18735
+ }
18736
+ return chars.reverse().join("");
18737
+ }
18738
+ function truncateValueInMiddle(value, headMax, tailMax, marker = MIDDLE_TRUNCATION_MARKER) {
18739
+ const markerLength = getSerializedLength(marker);
18740
+ const max = headMax + markerLength + tailMax;
18741
+ if (getSerializedLength(value) <= max) {
18742
+ return value;
18743
+ }
18744
+ const head = takeValueWithinSerializedLength(value, headMax).replace(/\s+$/u, "");
18745
+ const tail = takeValueWithinSerializedLengthFromEnd(value, tailMax).replace(/^\s+/u, "");
18746
+ if (head.length === 0) {
18747
+ return tail.length === 0 ? marker : `${marker}${tail}`;
18748
+ }
18749
+ if (tail.length === 0) {
18750
+ return `${head}${marker}`;
18751
+ }
18752
+ return `${head}${marker}${tail}`;
18753
+ }
18704
18754
  function getAttrLimit(attr) {
18705
- if (URL_ATTRS.has(attr)) {
18706
- return URL_ATTR_MAX;
18755
+ if (attr === "srcset") {
18756
+ return SRCSET_ATTR_MAX;
18707
18757
  }
18708
18758
  if (TEXT_ATTRS.has(attr)) {
18709
18759
  return TEXT_ATTR_MAX;
18710
18760
  }
18711
18761
  return void 0;
18712
18762
  }
18763
+ function shouldBoundAttr(attr) {
18764
+ return MIDDLE_TRUNCATED_URL_ATTRS.has(attr) || getAttrLimit(attr) !== void 0;
18765
+ }
18713
18766
  function setBoundedAttr(el, attr, value) {
18767
+ if (MIDDLE_TRUNCATED_URL_ATTRS.has(attr)) {
18768
+ el.attr(
18769
+ attr,
18770
+ truncateValueInMiddle(value, MIDDLE_TRUNCATION_HEAD_MAX, MIDDLE_TRUNCATION_TAIL_MAX)
18771
+ );
18772
+ return;
18773
+ }
18714
18774
  const limit = getAttrLimit(attr);
18775
+ if (attr === "srcset" && limit !== void 0) {
18776
+ el.attr(attr, truncateSrcsetValue(value, limit));
18777
+ return;
18778
+ }
18715
18779
  el.attr(attr, limit === void 0 ? value : truncateValue(value, limit));
18716
18780
  }
18781
+ function truncateSrcsetValue(value, max) {
18782
+ if (getSerializedLength(value) <= max) {
18783
+ return value;
18784
+ }
18785
+ const candidates = parseSrcsetCandidates2(value);
18786
+ if (candidates.length === 0) {
18787
+ return truncateValueInMiddle(value, SRCSET_FALLBACK_HEAD_MAX, SRCSET_FALLBACK_TAIL_MAX);
18788
+ }
18789
+ for (const [headMax, tailMax, includeBest] of [
18790
+ [SRCSET_CANDIDATE_HEAD_MAX, SRCSET_CANDIDATE_TAIL_MAX, true],
18791
+ [SRCSET_COMPACT_CANDIDATE_HEAD_MAX, SRCSET_COMPACT_CANDIDATE_TAIL_MAX, true],
18792
+ [SRCSET_COMPACT_CANDIDATE_HEAD_MAX, SRCSET_COMPACT_CANDIDATE_TAIL_MAX, false]
18793
+ ]) {
18794
+ const compact = buildTruncatedSrcsetValue(candidates, headMax, tailMax, includeBest);
18795
+ if (getSerializedLength(compact) <= max) {
18796
+ return compact;
18797
+ }
18798
+ }
18799
+ return truncateValueInMiddle(value, SRCSET_FALLBACK_HEAD_MAX, SRCSET_FALLBACK_TAIL_MAX);
18800
+ }
18801
+ function buildTruncatedSrcsetValue(candidates, headMax, tailMax, includeBest) {
18802
+ const kept = getPreferredSrcsetCandidateIndices(candidates, includeBest);
18803
+ const parts = [];
18804
+ let previousIndex;
18805
+ for (const candidateIndex of kept) {
18806
+ if (previousIndex !== void 0 && candidateIndex - previousIndex > 1) {
18807
+ parts.push(MIDDLE_TRUNCATION_MARKER);
18808
+ }
18809
+ parts.push(formatSrcsetCandidate(candidates[candidateIndex], headMax, tailMax));
18810
+ previousIndex = candidateIndex;
18811
+ }
18812
+ return parts.join(", ");
18813
+ }
18814
+ function getPreferredSrcsetCandidateIndices(candidates, includeBest) {
18815
+ if (candidates.length === 0) {
18816
+ return [];
18817
+ }
18818
+ const kept = /* @__PURE__ */ new Set([0, candidates.length - 1]);
18819
+ if (includeBest) {
18820
+ kept.add(pickBestSrcsetCandidateIndex(candidates));
18821
+ }
18822
+ return [...kept].filter((index) => index >= 0 && index < candidates.length).sort((a, b) => a - b);
18823
+ }
18824
+ function pickBestSrcsetCandidateIndex(candidates) {
18825
+ let bestWidthIndex = -1;
18826
+ let bestWidth = -1;
18827
+ let bestDensityIndex = -1;
18828
+ let bestDensity = -1;
18829
+ for (let index = 0; index < candidates.length; index += 1) {
18830
+ const candidate = candidates[index];
18831
+ if (typeof candidate.width === "number" && Number.isFinite(candidate.width) && candidate.width > bestWidth) {
18832
+ bestWidth = candidate.width;
18833
+ bestWidthIndex = index;
18834
+ }
18835
+ if (typeof candidate.density === "number" && Number.isFinite(candidate.density) && candidate.density > bestDensity) {
18836
+ bestDensity = candidate.density;
18837
+ bestDensityIndex = index;
18838
+ }
18839
+ }
18840
+ if (bestWidthIndex >= 0) {
18841
+ return bestWidthIndex;
18842
+ }
18843
+ if (bestDensityIndex >= 0) {
18844
+ return bestDensityIndex;
18845
+ }
18846
+ return candidates.length - 1;
18847
+ }
18848
+ function formatSrcsetCandidate(candidate, headMax, tailMax) {
18849
+ const url = truncateValueInMiddle(candidate.url, headMax, tailMax);
18850
+ return candidate.descriptorText ? `${url} ${candidate.descriptorText}` : url;
18851
+ }
18852
+ function parseSrcsetCandidates2(raw) {
18853
+ const text = raw.trim();
18854
+ if (!text) {
18855
+ return [];
18856
+ }
18857
+ const out = [];
18858
+ let index = 0;
18859
+ while (index < text.length) {
18860
+ index = skipSrcsetSeparators(text, index);
18861
+ if (index >= text.length) {
18862
+ break;
18863
+ }
18864
+ const urlToken = readSrcsetUrlToken(text, index);
18865
+ index = urlToken.nextIndex;
18866
+ const url = urlToken.value.trim();
18867
+ if (!url) {
18868
+ continue;
18869
+ }
18870
+ index = skipSrcsetWhitespace(text, index);
18871
+ const descriptors = [];
18872
+ while (index < text.length && text[index] !== ",") {
18873
+ const descriptorToken = readSrcsetDescriptorToken(text, index);
18874
+ if (!descriptorToken.value) {
18875
+ index = descriptorToken.nextIndex;
18876
+ continue;
18877
+ }
18878
+ descriptors.push(descriptorToken.value);
18879
+ index = descriptorToken.nextIndex;
18880
+ index = skipSrcsetWhitespace(text, index);
18881
+ }
18882
+ if (index < text.length && text[index] === ",") {
18883
+ index += 1;
18884
+ }
18885
+ let width = null;
18886
+ let density = null;
18887
+ for (const descriptor of descriptors) {
18888
+ const token = descriptor.trim().toLowerCase();
18889
+ if (!token) {
18890
+ continue;
18891
+ }
18892
+ const widthMatch = token.match(/^(\d+)w$/);
18893
+ if (widthMatch) {
18894
+ const parsed = Number.parseInt(widthMatch[1], 10);
18895
+ if (Number.isFinite(parsed)) {
18896
+ width = parsed;
18897
+ }
18898
+ continue;
18899
+ }
18900
+ const densityMatch = token.match(/^(\d*\.?\d+)x$/);
18901
+ if (densityMatch) {
18902
+ const parsed = Number.parseFloat(densityMatch[1]);
18903
+ if (Number.isFinite(parsed)) {
18904
+ density = parsed;
18905
+ }
18906
+ }
18907
+ }
18908
+ out.push({
18909
+ url,
18910
+ descriptorText: descriptors.join(" "),
18911
+ width,
18912
+ density
18913
+ });
18914
+ }
18915
+ return out;
18916
+ }
18917
+ function skipSrcsetWhitespace(value, index) {
18918
+ let cursor = index;
18919
+ while (cursor < value.length && /\s/u.test(value[cursor])) {
18920
+ cursor += 1;
18921
+ }
18922
+ return cursor;
18923
+ }
18924
+ function skipSrcsetSeparators(value, index) {
18925
+ let cursor = skipSrcsetWhitespace(value, index);
18926
+ while (cursor < value.length && value[cursor] === ",") {
18927
+ cursor += 1;
18928
+ cursor = skipSrcsetWhitespace(value, cursor);
18929
+ }
18930
+ return cursor;
18931
+ }
18932
+ function readSrcsetUrlToken(value, index) {
18933
+ let cursor = index;
18934
+ let out = "";
18935
+ const isDataUrl = value.slice(index, index + 5).toLowerCase().startsWith("data:");
18936
+ while (cursor < value.length) {
18937
+ const char = value[cursor];
18938
+ if (/\s/u.test(char)) {
18939
+ break;
18940
+ }
18941
+ if (char === "," && !isDataUrl) {
18942
+ break;
18943
+ }
18944
+ out += char;
18945
+ cursor += 1;
18946
+ }
18947
+ if (isDataUrl && out.endsWith(",") && cursor < value.length) {
18948
+ out = out.slice(0, -1);
18949
+ }
18950
+ return {
18951
+ value: out,
18952
+ nextIndex: cursor
18953
+ };
18954
+ }
18955
+ function readSrcsetDescriptorToken(value, index) {
18956
+ let cursor = skipSrcsetWhitespace(value, index);
18957
+ let out = "";
18958
+ while (cursor < value.length) {
18959
+ const char = value[cursor];
18960
+ if (char === "," || /\s/u.test(char)) {
18961
+ break;
18962
+ }
18963
+ out += char;
18964
+ cursor += 1;
18965
+ }
18966
+ return {
18967
+ value: out.trim(),
18968
+ nextIndex: cursor
18969
+ };
18970
+ }
18717
18971
  function removeNoise($) {
18718
18972
  for (const tag of STRIP_TAGS) {
18719
18973
  $(tag).remove();
@@ -18738,38 +18992,68 @@ function markInlineSelfHiddenFallback($) {
18738
18992
  });
18739
18993
  }
18740
18994
  function pruneSelfHiddenNodes($) {
18741
- const nodes = [];
18742
- $(`[${OPENSTEER_SELF_HIDDEN_ATTR}]`).each(function collectSelfHiddenNodes() {
18743
- nodes.push($(this));
18744
- });
18745
- nodes.sort((left, right) => right.parents().length - left.parents().length);
18746
- for (const el of nodes) {
18747
- if (!el[0]) {
18995
+ for (const node of getElementsInReverseDocumentOrder($)) {
18996
+ if (node.attribs?.[OPENSTEER_SELF_HIDDEN_ATTR] === void 0) {
18748
18997
  continue;
18749
18998
  }
18999
+ const el = $(node);
18750
19000
  el.contents().each(function removeSelfHiddenText() {
18751
19001
  if (this.type === "text") {
18752
19002
  $(this).remove();
18753
19003
  }
18754
19004
  });
18755
- if (el.children().length === 0) {
19005
+ if (!hasElementChildren(node)) {
18756
19006
  el.remove();
18757
19007
  }
18758
19008
  }
18759
19009
  }
18760
- function hasDirectText($, el) {
18761
- return el.contents().filter(function hasDirectNodeText() {
18762
- return this.type === "text" && $(this).text().trim() !== "";
18763
- }).length > 0;
19010
+ function getChildNodes(node) {
19011
+ return node?.children ?? [];
19012
+ }
19013
+ function isElementLikeNode(node) {
19014
+ return node?.type === "tag" || node?.type === "script" || node?.type === "style";
19015
+ }
19016
+ function hasDirectText(node) {
19017
+ if (!node) {
19018
+ return false;
19019
+ }
19020
+ for (const child of getChildNodes(node)) {
19021
+ if (child.type === "text" && (child.data || "").trim() !== "") {
19022
+ return true;
19023
+ }
19024
+ }
19025
+ return false;
19026
+ }
19027
+ function hasElementChildren(node) {
19028
+ if (!node) {
19029
+ return false;
19030
+ }
19031
+ for (const child of getChildNodes(node)) {
19032
+ if (isElementLikeNode(child)) {
19033
+ return true;
19034
+ }
19035
+ }
19036
+ return false;
18764
19037
  }
18765
- function hasTextDeep(el) {
18766
- return el.text().trim().length > 0;
19038
+ function hasTextDeepNode(node) {
19039
+ if (!node) {
19040
+ return false;
19041
+ }
19042
+ if (node.type === "text") {
19043
+ return (node.data || "").trim() !== "";
19044
+ }
19045
+ for (const child of getChildNodes(node)) {
19046
+ if (hasTextDeepNode(child)) {
19047
+ return true;
19048
+ }
19049
+ }
19050
+ return false;
18767
19051
  }
18768
19052
  function hasActionLabel(attrs) {
18769
19053
  return typeof attrs["aria-label"] === "string" && attrs["aria-label"].trim() !== "" || typeof attrs["aria-labelledby"] === "string" && attrs["aria-labelledby"].trim() !== "" || typeof attrs["aria-describedby"] === "string" && attrs["aria-describedby"].trim() !== "" || typeof attrs.title === "string" && attrs.title.trim() !== "" || typeof attrs.placeholder === "string" && attrs.placeholder.trim() !== "" || typeof attrs.value === "string" && attrs.value.trim() !== "";
18770
19054
  }
18771
19055
  function unwrapActionNode($, el) {
18772
- if (hasTextDeep(el)) {
19056
+ if (hasTextDeepNode(el[0])) {
18773
19057
  if (el.prev().length > 0) {
18774
19058
  el.before(" ");
18775
19059
  }
@@ -18790,7 +19074,7 @@ function stripToAttrs(el, keep) {
18790
19074
  if (typeof value !== "string") {
18791
19075
  continue;
18792
19076
  }
18793
- if (getAttrLimit(attr) !== void 0) {
19077
+ if (shouldBoundAttr(attr)) {
18794
19078
  setBoundedAttr(el, attr, value);
18795
19079
  }
18796
19080
  }
@@ -18808,6 +19092,9 @@ function restoreBoundedAttr(el, attr, value) {
18808
19092
  function deduplicateImages(html) {
18809
19093
  const seen = /* @__PURE__ */ new Set();
18810
19094
  return html.replace(/<img\b([^>]*)>/gi, (full, attrContent) => {
19095
+ if (/\bc\s*=/.test(attrContent)) {
19096
+ return full;
19097
+ }
18811
19098
  const srcMatch = attrContent.match(/\bsrc\s*=\s*(["']?)(.*?)\1/);
18812
19099
  const srcsetMatch = attrContent.match(/\bsrcset\s*=\s*(["'])(.*?)\1/);
18813
19100
  let src = null;
@@ -18826,59 +19113,155 @@ function deduplicateImages(html) {
18826
19113
  return full;
18827
19114
  });
18828
19115
  }
18829
- function isPreservedImageElement($, el) {
18830
- const tag = (el[0]?.tagName || "").toLowerCase();
19116
+ function hasAttribute2(node, attr) {
19117
+ return node?.attribs?.[attr] !== void 0;
19118
+ }
19119
+ function hasPictureAncestor(node) {
19120
+ let current = node?.parent;
19121
+ while (current) {
19122
+ if (isElementLikeNode(current) && (current.tagName || "").toLowerCase() === "picture") {
19123
+ return true;
19124
+ }
19125
+ current = current.parent;
19126
+ }
19127
+ return false;
19128
+ }
19129
+ function pictureHasPreservedDescendant(node) {
19130
+ if (!node) {
19131
+ return false;
19132
+ }
19133
+ for (const child of getChildNodes(node)) {
19134
+ if (!isElementLikeNode(child)) {
19135
+ continue;
19136
+ }
19137
+ const tag = (child.tagName || "").toLowerCase();
19138
+ if (tag === "img") {
19139
+ return true;
19140
+ }
19141
+ if (tag === "source" && typeof child.attribs?.src === "string" && child.attribs.src.trim() !== "") {
19142
+ return true;
19143
+ }
19144
+ if (tag === "source" && typeof child.attribs?.srcset === "string" && child.attribs.srcset.trim() !== "") {
19145
+ return true;
19146
+ }
19147
+ if (pictureHasPreservedDescendant(child)) {
19148
+ return true;
19149
+ }
19150
+ }
19151
+ return false;
19152
+ }
19153
+ function isPreservedImageElement(node) {
19154
+ const tag = (node?.tagName || "").toLowerCase();
18831
19155
  if (tag === "img") {
18832
19156
  return true;
18833
19157
  }
18834
19158
  if (tag === "picture") {
18835
- const hasImg = el.find("img").length > 0;
18836
- const hasSource = el.find("source[src], source[srcset]").length > 0;
18837
- return hasImg || hasSource;
19159
+ return pictureHasPreservedDescendant(node);
18838
19160
  }
18839
19161
  if (tag === "source") {
18840
- const inPicture = el.parents("picture").length > 0;
18841
- const hasSrc = el.attr("src") != null && el.attr("src").trim() !== "" || el.attr("srcset") != null && el.attr("srcset").trim() !== "";
19162
+ const inPicture = hasPictureAncestor(node);
19163
+ const hasSrc = typeof node?.attribs?.src === "string" && node.attribs.src.trim() !== "" || typeof node?.attribs?.srcset === "string" && node.attribs.srcset.trim() !== "";
18842
19164
  return inPicture && hasSrc;
18843
19165
  }
18844
19166
  return false;
18845
19167
  }
19168
+ function getElementsInReverseDocumentOrder($) {
19169
+ return $.root().find("*").toArray().reverse().filter((node) => node.type === "tag");
19170
+ }
19171
+ function getNodeDepth(node) {
19172
+ let depth = 0;
19173
+ let current = node.parent;
19174
+ while (current) {
19175
+ depth++;
19176
+ current = current.parent;
19177
+ }
19178
+ return depth;
19179
+ }
19180
+ function getElementsByDepthDescending($) {
19181
+ const elements = $.root().find("*").toArray().filter((node) => node.type === "tag");
19182
+ const depths = /* @__PURE__ */ new Map();
19183
+ for (const el of elements) {
19184
+ depths.set(el, getNodeDepth(el));
19185
+ }
19186
+ return elements.sort((a, b) => (depths.get(b) ?? 0) - (depths.get(a) ?? 0));
19187
+ }
18846
19188
  function flattenExtractionTree($) {
18847
- const flatten = (root) => {
18848
- root.find("*").each(function flattenNode() {
18849
- const el = $(this);
18850
- const node = el[0];
18851
- if (!node) {
18852
- return;
18853
- }
18854
- const tag = (node.tagName || "").toLowerCase();
18855
- if (ROOT_TAGS.has(tag) || isBoundaryTag(tag)) {
18856
- return;
18857
- }
18858
- if (isPreservedImageElement($, el)) {
18859
- return;
18860
- }
18861
- if (tag === "a") {
18862
- el.children().each(function flattenAnchorChild() {
18863
- flatten($(this));
18864
- });
18865
- return;
18866
- }
18867
- const hasText = hasDirectText($, el);
18868
- if (hasText) {
18869
- return;
18870
- }
18871
- if (el.children().length === 0) {
18872
- el.remove();
18873
- return;
19189
+ for (const node of getElementsInReverseDocumentOrder($)) {
19190
+ const el = $(node);
19191
+ const tag = (node.tagName || "").toLowerCase();
19192
+ if (ROOT_TAGS.has(tag) || isBoundaryTag(tag) || isPreservedImageElement(node)) {
19193
+ continue;
19194
+ }
19195
+ if (tag === "a" || hasDirectText(node)) {
19196
+ continue;
19197
+ }
19198
+ if (!hasElementChildren(node)) {
19199
+ el.remove();
19200
+ continue;
19201
+ }
19202
+ el.replaceWith(el.contents());
19203
+ }
19204
+ }
19205
+ function hasMarkedAncestor(el, attr) {
19206
+ let current = el[0]?.parent;
19207
+ while (current) {
19208
+ if (!isElementLikeNode(current)) {
19209
+ return false;
19210
+ }
19211
+ if (current.attribs?.[attr] !== void 0) {
19212
+ return true;
19213
+ }
19214
+ current = current.parent;
19215
+ }
19216
+ return false;
19217
+ }
19218
+ function isIndicatorImage(node) {
19219
+ return (node?.tagName || "").toLowerCase() === "img" && (hasAttribute2(node, "alt") || hasAttribute2(node, "src") || hasAttribute2(node, "srcset"));
19220
+ }
19221
+ function isIndicatorPictureSource(node) {
19222
+ return (node?.tagName || "").toLowerCase() === "source" && hasPictureAncestor(node) && (hasAttribute2(node, "src") || hasAttribute2(node, "srcset"));
19223
+ }
19224
+ function isSemanticIndicator(node) {
19225
+ const tag = (node?.tagName || "").toLowerCase();
19226
+ if (tag === "svg") {
19227
+ return true;
19228
+ }
19229
+ return hasAttribute2(node, "aria-label") || hasAttribute2(node, "title") || hasAttribute2(node, "data-icon") || node?.attribs?.role === "img";
19230
+ }
19231
+ function findIndicatorDescendant(root) {
19232
+ if (!root) {
19233
+ return void 0;
19234
+ }
19235
+ let firstImage;
19236
+ let firstSource;
19237
+ let firstSemantic;
19238
+ const visit = (node) => {
19239
+ if (!isElementLikeNode(node)) {
19240
+ return false;
19241
+ }
19242
+ if (isIndicatorImage(node)) {
19243
+ firstImage = node;
19244
+ return true;
19245
+ }
19246
+ if (firstSource === void 0 && isIndicatorPictureSource(node)) {
19247
+ firstSource = node;
19248
+ }
19249
+ if (firstSemantic === void 0 && isSemanticIndicator(node)) {
19250
+ firstSemantic = node;
19251
+ }
19252
+ for (const child of getChildNodes(node)) {
19253
+ if (visit(child)) {
19254
+ return true;
18874
19255
  }
18875
- el.children().each(function flattenChild() {
18876
- flatten($(this));
18877
- });
18878
- el.replaceWith(el.contents());
18879
- });
19256
+ }
19257
+ return false;
18880
19258
  };
18881
- flatten($.root());
19259
+ for (const child of getChildNodes(root)) {
19260
+ if (visit(child)) {
19261
+ return firstImage;
19262
+ }
19263
+ }
19264
+ return firstImage ?? firstSource ?? firstSemantic;
18882
19265
  }
18883
19266
  function serializeForExtraction($, root) {
18884
19267
  const lines = [];
@@ -18947,7 +19330,7 @@ function serializeForExtraction($, root) {
18947
19330
  lines.push(`${" ".repeat(depth)}</${tagName}>`);
18948
19331
  }
18949
19332
  traverse(root, 0);
18950
- return lines.join("\n");
19333
+ return lines.map((l) => l.trim()).filter((l) => l.length > 0).join("");
18951
19334
  }
18952
19335
  function isClickable($, el, context) {
18953
19336
  if (context.hasPreMarked) {
@@ -19039,7 +19422,11 @@ function cleanForExtraction(html) {
19039
19422
  }
19040
19423
  });
19041
19424
  flattenExtractionTree($clean);
19042
- return deduplicateImages(serializeForExtraction($clean, $clean.root()[0]));
19425
+ const root = $clean.root()[0];
19426
+ if (root === void 0) {
19427
+ return "";
19428
+ }
19429
+ return deduplicateImages(serializeForExtraction($clean, root));
19043
19430
  }
19044
19431
  function cleanForAction(html) {
19045
19432
  if (!html.trim()) {
@@ -19065,22 +19452,12 @@ function cleanForAction(html) {
19065
19452
  $(`[${clickableMark}]`).each(function markIndicators() {
19066
19453
  const el = $(this);
19067
19454
  const wrapperAttrs = el.attr() || {};
19068
- if (hasTextDeep(el) || hasActionLabel(wrapperAttrs)) {
19455
+ if (hasTextDeepNode(el[0]) || hasActionLabel(wrapperAttrs)) {
19069
19456
  return;
19070
19457
  }
19071
- const imageIndicator = el.find("img[alt], img[src], img[srcset]").first();
19072
- if (imageIndicator.length) {
19073
- imageIndicator.attr(indicatorMark, "1");
19074
- return;
19075
- }
19076
- const pictureSourceIndicator = el.find("picture source[src], picture source[srcset]").first();
19077
- if (pictureSourceIndicator.length) {
19078
- pictureSourceIndicator.attr(indicatorMark, "1");
19079
- return;
19080
- }
19081
- const semanticIndicator = el.find('[aria-label], [title], [data-icon], [role="img"], svg').first();
19082
- if (semanticIndicator.length) {
19083
- semanticIndicator.attr(indicatorMark, "1");
19458
+ const indicatorNode = findIndicatorDescendant(el[0]);
19459
+ if (indicatorNode !== void 0) {
19460
+ $(indicatorNode).attr(indicatorMark, "1");
19084
19461
  }
19085
19462
  });
19086
19463
  $(`[${clickableMark}]`).each(function removeEmptyClickable() {
@@ -19090,7 +19467,7 @@ function cleanForAction(html) {
19090
19467
  if (NATIVE_INTERACTIVE_TAGS.has(tag) || tag === "a") {
19091
19468
  return;
19092
19469
  }
19093
- if (el.children().length > 0 || hasDirectText($, el)) {
19470
+ if (hasElementChildren(node) || hasDirectText(node)) {
19094
19471
  return;
19095
19472
  }
19096
19473
  const wrapperAttrs = el.attr() || {};
@@ -19116,46 +19493,31 @@ function cleanForAction(html) {
19116
19493
  current = ancestor.parent();
19117
19494
  }
19118
19495
  });
19119
- let changed = true;
19120
- while (changed) {
19121
- changed = false;
19122
- const nodes = [];
19123
- $("*").each(function collectNodes() {
19124
- nodes.push($(this));
19125
- });
19126
- nodes.sort((left, right) => right.parents().length - left.parents().length);
19127
- for (const el of nodes) {
19128
- const node = el[0];
19129
- if (!node) {
19130
- continue;
19131
- }
19132
- const tag = (node.tagName || "").toLowerCase();
19133
- if (ROOT_TAGS.has(tag) || isBoundaryTag(tag)) {
19134
- continue;
19135
- }
19136
- if (el.attr(clickableMark) !== void 0 || el.attr(indicatorMark) !== void 0) {
19137
- continue;
19138
- }
19139
- const insideClickable = el.parents(`[${clickableMark}]`).length > 0;
19140
- const preserveBranch = el.attr(branchMark) !== void 0;
19141
- const hasContent = el.children().length > 0 || hasDirectText($, el);
19142
- if (insideClickable || preserveBranch) {
19143
- if (!hasContent) {
19144
- el.remove();
19145
- } else {
19146
- unwrapActionNode($, el);
19147
- }
19148
- changed = true;
19149
- continue;
19150
- }
19496
+ for (const node of getElementsByDepthDescending($)) {
19497
+ const el = $(node);
19498
+ const tag = (node.tagName || "").toLowerCase();
19499
+ if (ROOT_TAGS.has(tag) || isBoundaryTag(tag)) {
19500
+ continue;
19501
+ }
19502
+ if (el.attr(clickableMark) !== void 0 || el.attr(indicatorMark) !== void 0) {
19503
+ continue;
19504
+ }
19505
+ const insideClickable = hasMarkedAncestor(el, clickableMark);
19506
+ const preserveBranch = el.attr(branchMark) !== void 0;
19507
+ const hasContent = hasElementChildren(node) || hasDirectText(node);
19508
+ if (insideClickable || preserveBranch) {
19151
19509
  if (!hasContent) {
19152
19510
  el.remove();
19153
- changed = true;
19154
- continue;
19511
+ } else {
19512
+ unwrapActionNode($, el);
19155
19513
  }
19156
- unwrapActionNode($, el);
19157
- changed = true;
19514
+ continue;
19515
+ }
19516
+ if (!hasContent) {
19517
+ el.remove();
19518
+ continue;
19158
19519
  }
19520
+ unwrapActionNode($, el);
19159
19521
  }
19160
19522
  $.root().find("*").contents().each(function normalizeActionTextNodes() {
19161
19523
  if (this.type !== "text") {
@@ -19235,21 +19597,7 @@ function cleanForAction(html) {
19235
19597
  OPENSTEER_SPARSE_COUNTER_ATTR
19236
19598
  ]);
19237
19599
  if (clickable) {
19238
- for (const attr of [
19239
- "href",
19240
- "role",
19241
- "type",
19242
- "title",
19243
- "placeholder",
19244
- "value",
19245
- "aria-label",
19246
- "aria-labelledby",
19247
- "aria-describedby",
19248
- "aria-expanded",
19249
- "aria-pressed",
19250
- "aria-selected",
19251
- "aria-haspopup"
19252
- ]) {
19600
+ for (const attr of ["href", "role", "type", "title", "placeholder", "value", "aria-label"]) {
19253
19601
  keep.add(attr);
19254
19602
  }
19255
19603
  }
@@ -19273,16 +19621,25 @@ function cleanForAction(html) {
19273
19621
  });
19274
19622
  return compactHtml(deduplicateImages($.html()));
19275
19623
  }
19276
- var STRIP_TAGS, TEXT_ATTR_MAX, URL_ATTR_MAX, URL_ATTRS, TEXT_ATTRS, TRUNCATION_SUFFIX, NOISE_SELECTORS, VOID_TAGS2;
19624
+ var STRIP_TAGS, TEXT_ATTR_MAX, SRCSET_ATTR_MAX, MIDDLE_TRUNCATED_URL_ATTRS, TEXT_ATTRS, TRUNCATION_SUFFIX, MIDDLE_TRUNCATION_MARKER, MIDDLE_TRUNCATION_HEAD_MAX, MIDDLE_TRUNCATION_TAIL_MAX, SRCSET_CANDIDATE_HEAD_MAX, SRCSET_CANDIDATE_TAIL_MAX, SRCSET_COMPACT_CANDIDATE_HEAD_MAX, SRCSET_COMPACT_CANDIDATE_TAIL_MAX, SRCSET_FALLBACK_HEAD_MAX, SRCSET_FALLBACK_TAIL_MAX, NOISE_SELECTORS, VOID_TAGS2;
19277
19625
  var init_cleaner = __esm({
19278
19626
  "../runtime-core/src/sdk/snapshot/cleaner.ts"() {
19279
19627
  init_constants();
19280
19628
  STRIP_TAGS = /* @__PURE__ */ new Set(["script", "style", "noscript", "meta", "link", "template"]);
19281
19629
  TEXT_ATTR_MAX = 150;
19282
- URL_ATTR_MAX = 500;
19283
- URL_ATTRS = /* @__PURE__ */ new Set(["href", "src", "srcset"]);
19630
+ SRCSET_ATTR_MAX = 160;
19631
+ MIDDLE_TRUNCATED_URL_ATTRS = /* @__PURE__ */ new Set(["href", "src"]);
19284
19632
  TEXT_ATTRS = /* @__PURE__ */ new Set(["alt", "title", "aria-label", "placeholder", "value"]);
19285
- TRUNCATION_SUFFIX = " [truncated]";
19633
+ TRUNCATION_SUFFIX = "...";
19634
+ MIDDLE_TRUNCATION_MARKER = "...";
19635
+ MIDDLE_TRUNCATION_HEAD_MAX = 40;
19636
+ MIDDLE_TRUNCATION_TAIL_MAX = 20;
19637
+ SRCSET_CANDIDATE_HEAD_MAX = 36;
19638
+ SRCSET_CANDIDATE_TAIL_MAX = 12;
19639
+ SRCSET_COMPACT_CANDIDATE_HEAD_MAX = 20;
19640
+ SRCSET_COMPACT_CANDIDATE_TAIL_MAX = 8;
19641
+ SRCSET_FALLBACK_HEAD_MAX = 56;
19642
+ SRCSET_FALLBACK_TAIL_MAX = 20;
19286
19643
  NOISE_SELECTORS = [
19287
19644
  `[${OPENSTEER_HIDDEN_ATTR}]`,
19288
19645
  "[hidden]",
@@ -19821,9 +20178,9 @@ function renderNode(snapshot, node, nodesById, snapshotsByDocumentRef, snapshotI
19821
20178
  const snapshotAttributes = normalizeNodeAttributes(node.attributes);
19822
20179
  const authoredAttributes = stripInternalSnapshotAttributes(snapshotAttributes);
19823
20180
  const attributes = [...authoredAttributes];
19824
- const subtreeHidden = hasAttribute2(snapshotAttributes, OPENSTEER_HIDDEN_ATTR) || isLikelySubtreeHidden(node);
19825
- const selfHidden = !subtreeHidden && (hasAttribute2(snapshotAttributes, OPENSTEER_SELF_HIDDEN_ATTR) || isLikelySelfHidden(node, nodesById));
19826
- const interactive = !subtreeHidden && !selfHidden && (hasAttribute2(snapshotAttributes, OPENSTEER_INTERACTIVE_ATTR) || isLikelyInteractive(tagName, node, authoredAttributes));
20181
+ const subtreeHidden = hasAttribute3(snapshotAttributes, OPENSTEER_HIDDEN_ATTR) || isLikelySubtreeHidden(node);
20182
+ const selfHidden = !subtreeHidden && (hasAttribute3(snapshotAttributes, OPENSTEER_SELF_HIDDEN_ATTR) || isLikelySelfHidden(node, nodesById));
20183
+ const interactive = !subtreeHidden && !selfHidden && (hasAttribute3(snapshotAttributes, OPENSTEER_INTERACTIVE_ATTR) || isLikelyInteractive(tagName, node, authoredAttributes));
19827
20184
  if (interactive) {
19828
20185
  attributes.push({ name: OPENSTEER_INTERACTIVE_ATTR, value: "1" });
19829
20186
  }
@@ -20141,7 +20498,7 @@ function parseOpacity(value) {
20141
20498
  const parsed = Number.parseFloat(value);
20142
20499
  return Number.isFinite(parsed) ? parsed : Number.NaN;
20143
20500
  }
20144
- function hasAttribute2(attributes, name) {
20501
+ function hasAttribute3(attributes, name) {
20145
20502
  const normalizedName = name.toLowerCase();
20146
20503
  return attributes.some((attribute) => attribute.name.toLowerCase() === normalizedName);
20147
20504
  }
@@ -23471,12 +23828,12 @@ var init_runtime3 = __esm({
23471
23828
  async (timeout) => {
23472
23829
  let descriptor2;
23473
23830
  let data;
23474
- if (input.schema !== void 0) {
23475
- assertValidOpensteerExtractionSchemaRoot(input.schema);
23831
+ if (input.template !== void 0) {
23832
+ assertValidOpensteerExtractionTemplateRoot(input.template);
23476
23833
  const fieldTargets = await timeout.runStep(
23477
23834
  () => compileOpensteerExtractionFieldTargets({
23478
23835
  pageRef,
23479
- schema: input.schema,
23836
+ template: input.template,
23480
23837
  dom: this.requireDom()
23481
23838
  })
23482
23839
  );
@@ -23508,7 +23865,7 @@ var init_runtime3 = __esm({
23508
23865
  () => descriptors.write({
23509
23866
  persist,
23510
23867
  root: payload,
23511
- schemaHash: canonicalJsonString(input.schema),
23868
+ templateHash: canonicalJsonString(input.template),
23512
23869
  sourceUrl: pageInfo.url
23513
23870
  })
23514
23871
  );
@@ -23568,7 +23925,7 @@ var init_runtime3 = __esm({
23568
23925
  artifacts,
23569
23926
  data: {
23570
23927
  ...input.persist === void 0 ? {} : { persist: input.persist },
23571
- ...descriptor?.payload.schemaHash === void 0 ? {} : { schemaHash: descriptor.payload.schemaHash },
23928
+ ...descriptor?.payload.templateHash === void 0 ? {} : { templateHash: descriptor.payload.templateHash },
23572
23929
  data: output.data
23573
23930
  },
23574
23931
  context: buildRuntimeTraceContext({
@@ -30195,9 +30552,6 @@ var init_opensteer = __esm({
30195
30552
  await delay5(pollIntervalMs);
30196
30553
  }
30197
30554
  }
30198
- async snapshot(mode = "action") {
30199
- return (await this.runtime.snapshot({ mode })).html;
30200
- }
30201
30555
  async cookies(domain) {
30202
30556
  return new SessionCookieJar(
30203
30557
  await this.runtime.getCookies(domain === void 0 ? {} : { domain })
@@ -30255,7 +30609,7 @@ var init_opensteer = __esm({
30255
30609
 
30256
30610
  // package.json
30257
30611
  var package_default = {
30258
- version: "0.9.4"};
30612
+ version: "0.9.5"};
30259
30613
 
30260
30614
  // src/cli/bin.ts
30261
30615
  init_browser_manager();
@@ -30288,11 +30642,11 @@ Navigation:
30288
30642
 
30289
30643
  DOM:
30290
30644
  snapshot [action|extraction]
30291
- click <element> [--button left|middle|right] [--persist <key>] [--capture-network <label>]
30292
- hover <element> [--persist <key>] [--capture-network <label>]
30293
- input <element> <text> [--press-enter] [--persist <key>] [--capture-network <label>]
30294
- scroll <direction> <amount> [--element <n>] [--persist <key>] [--capture-network <label>]
30295
- extract <schema> [--persist <key>]
30645
+ click <element> --persist <key> [--button left|middle|right] [--capture-network <label>]
30646
+ hover <element> --persist <key> [--capture-network <label>]
30647
+ input <element> <text> --persist <key> [--press-enter] [--capture-network <label>]
30648
+ scroll <direction> <amount> --persist <key> [--element <n>] [--capture-network <label>]
30649
+ extract <template> --persist <key>
30296
30650
  evaluate <script>
30297
30651
  init-script <script>
30298
30652
 
@@ -30842,18 +31196,18 @@ async function buildOperationInput(operation, parsed, runtime) {
30842
31196
  },
30843
31197
  direction,
30844
31198
  amount,
30845
- ...persist === void 0 ? {} : { persist },
31199
+ persist,
30846
31200
  ...captureNetwork === void 0 ? {} : { captureNetwork }
30847
31201
  };
30848
31202
  }
30849
31203
  case "dom.extract": {
30850
31204
  if (parsed.rest[0] === void 0) {
30851
- throw new Error("extract requires a schema.");
31205
+ throw new Error("extract requires a template.");
30852
31206
  }
30853
- const persist = readExtractPersistKey(parsed);
31207
+ const persist = readPersistKey(parsed, "extract");
30854
31208
  return {
30855
- schema: parseRequiredJsonObjectArgument(joinRest(parsed.rest, 0), "extract schema"),
30856
- ...persist === void 0 ? {} : { persist }
31209
+ persist,
31210
+ template: parseRequiredJsonObjectArgument(joinRest(parsed.rest, 0), "extract template")
30857
31211
  };
30858
31212
  }
30859
31213
  case "network.query": {
@@ -31063,7 +31417,7 @@ function buildElementTargetInput(parsed, verb) {
31063
31417
  kind: "element",
31064
31418
  element
31065
31419
  },
31066
- ...persist === void 0 ? {} : { persist },
31420
+ persist,
31067
31421
  ...captureNetwork === void 0 ? {} : { captureNetwork }
31068
31422
  };
31069
31423
  }
@@ -31268,23 +31622,10 @@ function readKeyModifiers(value) {
31268
31622
  function readPersistKey(parsed, verb) {
31269
31623
  const value = readSingle(parsed.rawOptions, "persist");
31270
31624
  if (value === void 0) {
31271
- return void 0;
31272
- }
31273
- if (value === "true" || value === "false") {
31274
- throw new Error(`${verb} requires "--persist <key>" when using --persist.`);
31275
- }
31276
- if (verb === "scroll" && readOptionalNumber(parsed.rawOptions, "element") === void 0) {
31277
- throw new Error('scroll requires "--element <n>" when using "--persist <key>".');
31278
- }
31279
- return value;
31280
- }
31281
- function readExtractPersistKey(parsed) {
31282
- const value = readSingle(parsed.rawOptions, "persist");
31283
- if (value === void 0) {
31284
- return void 0;
31625
+ throw new Error(`${verb} requires "--persist <key>".`);
31285
31626
  }
31286
31627
  if (value === "true" || value === "false") {
31287
- throw new Error('extract requires "--persist <key>" when using --persist.');
31628
+ throw new Error(`${verb} requires "--persist <key>".`);
31288
31629
  }
31289
31630
  return value;
31290
31631
  }