@isdk/web-fetcher 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.action.cn.md +28 -4
  2. package/README.action.md +27 -4
  3. package/README.cn.md +21 -0
  4. package/README.engine.cn.md +35 -7
  5. package/README.engine.md +30 -2
  6. package/README.md +23 -1
  7. package/dist/index.d.mts +1571 -1448
  8. package/dist/index.d.ts +1571 -1448
  9. package/dist/index.js +1 -1
  10. package/dist/index.mjs +1 -1
  11. package/docs/README.md +23 -1
  12. package/docs/_media/README.action.md +27 -4
  13. package/docs/_media/README.cn.md +21 -0
  14. package/docs/_media/README.engine.md +30 -2
  15. package/docs/classes/CheerioFetchEngine.md +169 -93
  16. package/docs/classes/ClickAction.md +29 -29
  17. package/docs/classes/EngineUpgradeError.md +335 -0
  18. package/docs/classes/EvaluateAction.md +29 -29
  19. package/docs/classes/ExtractAction.md +29 -29
  20. package/docs/classes/FetchAction.md +31 -29
  21. package/docs/classes/FetchEngine.md +159 -91
  22. package/docs/classes/FetchSession.md +14 -14
  23. package/docs/classes/FillAction.md +29 -29
  24. package/docs/classes/GetContentAction.md +29 -29
  25. package/docs/classes/GotoAction.md +29 -29
  26. package/docs/classes/KeyboardPressAction.md +29 -29
  27. package/docs/classes/KeyboardTypeAction.md +29 -29
  28. package/docs/classes/MouseClickAction.md +29 -29
  29. package/docs/classes/MouseMoveAction.md +29 -29
  30. package/docs/classes/MouseWheelAction.md +533 -0
  31. package/docs/classes/PauseAction.md +29 -29
  32. package/docs/classes/PlaywrightFetchEngine.md +252 -118
  33. package/docs/classes/ScrollIntoViewAction.md +533 -0
  34. package/docs/classes/SubmitAction.md +29 -29
  35. package/docs/classes/TrimAction.md +29 -29
  36. package/docs/classes/WaitForAction.md +29 -29
  37. package/docs/classes/WebFetcher.md +5 -5
  38. package/docs/enumerations/FetchActionResultStatus.md +4 -4
  39. package/docs/functions/fetchWeb.md +2 -2
  40. package/docs/functions/getRandomDelay.md +25 -0
  41. package/docs/globals.md +8 -1
  42. package/docs/interfaces/BaseFetchActionProperties.md +13 -13
  43. package/docs/interfaces/BaseFetchCollectorActionProperties.md +17 -17
  44. package/docs/interfaces/BaseFetcherProperties.md +44 -28
  45. package/docs/interfaces/DispatchedEngineAction.md +4 -4
  46. package/docs/interfaces/EvaluateActionOptions.md +3 -3
  47. package/docs/interfaces/ExtractActionProperties.md +13 -13
  48. package/docs/interfaces/FetchActionMeta.md +73 -0
  49. package/docs/interfaces/FetchActionProperties.md +15 -19
  50. package/docs/interfaces/FetchActionResult.md +7 -7
  51. package/docs/interfaces/FetchContext.md +65 -41
  52. package/docs/interfaces/FetchEngineContext.md +57 -33
  53. package/docs/interfaces/FetchMetadata.md +5 -5
  54. package/docs/interfaces/FetchResponse.md +14 -14
  55. package/docs/interfaces/FetchReturnTypeRegistry.md +7 -7
  56. package/docs/interfaces/FetchSite.md +55 -31
  57. package/docs/interfaces/FetcherOptions.md +55 -31
  58. package/docs/interfaces/GotoActionOptions.md +8 -8
  59. package/docs/interfaces/KeyboardPressParams.md +3 -3
  60. package/docs/interfaces/KeyboardTypeParams.md +3 -3
  61. package/docs/interfaces/MouseClickParams.md +6 -6
  62. package/docs/interfaces/MouseMoveParams.md +5 -5
  63. package/docs/interfaces/MouseWheelParams.md +69 -0
  64. package/docs/interfaces/PendingEngineRequest.md +3 -3
  65. package/docs/interfaces/ScrollIntoViewParams.md +17 -0
  66. package/docs/interfaces/StorageOptions.md +5 -5
  67. package/docs/interfaces/SubmitActionOptions.md +2 -2
  68. package/docs/interfaces/TrimActionOptions.md +3 -3
  69. package/docs/interfaces/WaitForActionOptions.md +5 -5
  70. package/docs/type-aliases/BaseFetchActionOptions.md +1 -1
  71. package/docs/type-aliases/BaseFetchCollectorOptions.md +1 -1
  72. package/docs/type-aliases/BrowserEngine.md +1 -1
  73. package/docs/type-aliases/FetchActionCapabilities.md +1 -1
  74. package/docs/type-aliases/FetchActionCapabilityMode.md +1 -1
  75. package/docs/type-aliases/FetchActionInContext.md +38 -0
  76. package/docs/type-aliases/FetchActionOptions.md +1 -1
  77. package/docs/type-aliases/FetchEngineAction.md +2 -2
  78. package/docs/type-aliases/FetchEngineType.md +1 -1
  79. package/docs/type-aliases/FetchReturnType.md +1 -1
  80. package/docs/type-aliases/FetchReturnTypeFor.md +1 -1
  81. package/docs/type-aliases/OnFetchPauseCallback.md +1 -1
  82. package/docs/type-aliases/ResourceType.md +1 -1
  83. package/docs/type-aliases/TrimPreset.md +1 -1
  84. package/docs/variables/DefaultFetcherProperties.md +1 -1
  85. package/docs/variables/FetcherOptionKeys.md +1 -1
  86. package/docs/variables/TRIM_PRESETS.md +1 -1
  87. package/package.json +7 -7
  88. package/docs/interfaces/FetchActionInContext.md +0 -190
package/dist/index.mjs CHANGED
@@ -1 +1 @@
1
- var t={engine:"auto",enableSmart:!0,useSiteRegistry:!0,antibot:!1,debug:!1,headers:{},cookies:[],throwHttpErrors:void 0,output:{cookies:!0,sessionState:!0},proxy:[],blockResources:[],storage:{purge:!0},ignoreSslErrors:!0,browser:{engine:"playwright",headless:!0,waitUntil:"domcontentloaded"},http:{method:"GET"},timeoutMs:6e4,requestHandlerTimeoutSecs:void 0,maxConcurrency:1,maxRequestsPerMinute:1e3,delayBetweenRequestsMs:0,retries:0,sites:[]},e=Object.keys(t).concat(["actions","onPause"]),i=(t=>(t[t.Failed=0]="Failed",t[t.Success=1]="Success",t[t.Skipped=2]="Skipped",t))(i||{}),s=class t{static register(t){const e=t.id;if(!e)throw new Error("FetchAction.register: actionClass.id is required");this.registry.set(e,t)}static get(t){return this.registry.get(t)}static create(e){const i="string"==typeof e?e:e.id||e.name||e.action;if(!i)throw new Error("Action must have id, name or action");const s=i instanceof t?i.constructor:this.registry.get(i);return s?new s:void 0}static has(t){return this.registry.has(t)}static list(){return Array.from(this.registry.keys())}static getCapability(t){return this.capabilities[t]??"noop"}getCapability(t){return this.constructor.getCapability(t)}get id(){return this.constructor.id}get returnType(){return this.constructor.returnType}get capabilities(){return this.constructor.capabilities}async delegateToEngine(t,e,...i){const s=t.internal.engine;if(!s)throw new Error("No engine available");if("function"!=typeof s[e])throw new Error(`Engine does not have a method named '${String(e)}'`);return await s[e](...i)}installCollectors(e,i){const s=i?.collectors;if(!s?.length)return;const n=[],c=new Set;for(const i of s){const s=r(i.activateOn),l=r(i.collectOn),u=r(i.deactivateOn),h=!(i.background??!0),f=t.create(i);if(!f)continue;let d=!1,w=!1,p=0;const y=async t=>{if(!d&&!w){d=!0;try{await(f.onBeforeExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:f.id,phase:"before",error:t})}}},m=async(t,s)=>{if(!w){d||await y(s);try{const n=Promise.resolve(f.onExecute?.(e,i,s)).then(s=>{var n,r;if(i.storeAs){((n=e.outputs)[r=i.storeAs]||(n[r]=[])).push(s)}return e.eventBus.emit("collector:result",{action:this.id,collector:i.id||i.name,event:t,result:s}),s}).catch(s=>{e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,event:t,phase:"exec",error:s})}).finally(()=>{p++});h&&(c.add(n),n.finally(()=>c.delete(n)))}catch(i){e.eventBus.emit("collector:error",{action:this.id,collector:f.id,event:t,phase:"exec",error:i})}}},g=async()=>{if(!w){0===p&&m("collector:after"),w=!0;try{await(f.onAfterExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,phase:"after",error:t})}finally{e.eventBus.emit("collector:end",{action:this.id,collector:i.id||i.name}),b.forEach(t=>t())}}},x=o(e,s,y),b=a(e,l,m),v=o(e,u,g);if(n.push(...x,...b,...v),!s.length&&!l.length&&!u.length){const t=()=>{g()};e.eventBus.once(`action:${this.id}.end`,t),n.push(()=>e.eventBus.off("fetcher:action:end",t))}}return n.length||c.size>0?{cleanup:()=>n.forEach(t=>t()),awaitExecPendings:async()=>{c.size>0&&await Promise.allSettled(Array.from(c))}}:void 0}async beforeExec(t,e){t.internal.actionStack||(t.internal.actionStack=[]);const i=t.internal.actionStack,s=i.length,n=i.length>0?i[i.length-1].id:void 0,r={...e,id:this.id,depth:s,parent:n};i.push(r),t.currentAction=r;const o={action:this,context:t,options:e,index:e?.index,depth:s,stack:[...i]};t.eventBus.emit(`action:${this.id}.start`,o),t.eventBus.emit("action:start",o),await(this.onBeforeExec?.(t,e));return{entry:o,collectors:this.installCollectors(t,e)}}async afterExec(t,e,i,s){const n=t.internal.actionStack,r=n.length-1,o=s?.collectors;try{if(await(o?.awaitExecPendings()),t.lastResult=i,"response"!==i?.returnType||i.error||(t.lastResponse=i.result),e?.storeAs){const s=t.outputs[e.storeAs],n=i?.result;"object"!=typeof s||null===s||"object"!=typeof n||null===n||Array.isArray(s)||Array.isArray(n)?t.outputs[e.storeAs]=n:t.outputs[e.storeAs]={...s,...n}}i?.error&&(t.currentAction.error=i.error),await(this.onAfterExec?.(t,e));const s={action:this,context:t,options:e,result:i,depth:r,stack:[...n]};i?.error&&(s.error=i.error);try{t.eventBus.emit(`action:${this.id}.end`,s)}catch(t){}try{t.eventBus.emit("action:end",s)}catch(t){}}finally{try{o?.cleanup()}finally{n.pop();const e=n.length;t.currentAction=e>0?n[e-1]:void 0}}}async execute(t,e){e?.args&&!e.params&&(e.params=e.args);const i=await this.beforeExec(t,e),s=e?.failOnError??!0;let n;try{return t.throwHttpErrors=s,n=await this.onExecute(t,e),n&&n.returnType||(n={status:1,returnType:this.returnType??"any",result:n}),n}catch(e){if(n={status:0,error:e,meta:{id:this.id,engineType:t.engine,capability:this.getCapability(t.engine)}},s)throw e;return n}finally{await this.afterExec(t,e,n,i)}}};s.registry=new Map,s.returnType="any",s.capabilities={http:"noop",browser:"noop"};var n=s;function r(t){return t?Array.isArray(t)?t:[t]:[]}function o(t,e,i){const s=[];for(const n of e)if("string"==typeof n||n instanceof RegExp){const e=(...t)=>{i(t[0])};t.eventBus.once(n,e),s.push(()=>t.eventBus.off(n,e))}return s}function a(t,e,i){const s=[];for(const n of e)if("string"==typeof n||n instanceof RegExp){const e=t=>i(n,t);t.eventBus.on(n,e),s.push(()=>t.eventBus.off(n,e))}return s}import{EventEmitter as c}from"events-ex";import{defaultsDeep as l}from"lodash-es";import{customAlphabet as u}from"nanoid";var h=u("0123456789abcdefghijklmnopqrstuvwxyz",12);function f(t,e,...i){if(!t)return;const{prefix:s,id:n,category:r}=e;if(!0===t||t===r||Array.isArray(t)&&t.includes(r)){const t=n?`:${n}`:"";console.log(`[${s}${t}:${r}]`,...i)}}import{defaultsDeep as d,merge as w}from"lodash-es";import{EventEmitter as p}from"events-ex";import{CommonError as y}from"@isdk/common-error";import{Configuration as m,KeyValueStore as g,PERSIST_STATE_KEY as x,RequestQueue as b,ProxyConfiguration as v}from"crawlee";import{CommonError as $}from"@isdk/common-error";var _=new Set(["string","number","boolean","html","object","array"]),k=new Set(["selector","has","exclude","required","strict","relativeTo","order","anchor","depth"]);function E(t){if("string"==typeof t)return{type:"string",selector:t,mode:"text"};if(!t||"object"!=typeof t)return{type:"string",mode:"text"};let e={...t};if(function(t){if(!t||"object"!=typeof t)return!1;if(Array.isArray(t))return!1;if("type"in t)return"string"!=typeof t.type||!_.has(t.type);const e=Object.keys(t);if(0===e.length)return!1;for(const t of e)if(!["selector","attribute","has","exclude","mode","required","strict","relativeTo","order","anchor","depth"].includes(t))return!0;return!1}(e)){const t={};for(const i of Object.keys(e))k.has(i)||(t[i]=E(e[i]),delete e[i]);e.type="object",e.properties=t}if(e.type||(e.type="string"),"object"===e.type){const t=e;t.properties||(t.properties={});for(const e in t.properties)t.properties[e]=E(t.properties[e]);delete t.mode,delete t.items,delete t.attribute}else if("array"===e.type){const t=e;t.attribute&&!t.items&&(t.items={type:"string",attribute:t.attribute,mode:"text"},delete t.attribute),t.items||(t.items={type:"string",mode:"text"}),t.items=E(t.items),"string"==typeof t.mode&&(t.mode={type:t.mode})}else{const t=e;t.mode||("html"===t.type?t.mode="html":t.mode="text")}if(e.selector&&(e.has||e.exclude)){const{selector:t,has:i,exclude:s}=e,n=t.split(",").map(t=>t.trim()).map(t=>{let e=t;return i&&(e+=`:has(${i})`),s&&(e+=`:not(${s})`),e});e.selector=n.join(", "),delete e.has,delete e.exclude}return e}async function C(t,e,i){const s=E(t);return S.call(this,s,e,i)}async function S(t,e,i){const s=t.type,n=t.selector,r=t.strict??i;if(!e)return this._logDebug("extract",`_extract: No scope for selector "${n||""}", type "${s||"value"}"`),"array"===s?[]:null;switch(s){case"object":return q.call(this,t,e,r);case"array":return R.call(this,t,e,r);default:return T.call(this,t,e,r)}}async function q(t,e,i){const{selector:s,properties:n,strict:r}=t,o=r??i,a=t._skipSelector;let c=e;if(s&&!a){const t=await this._querySelectorAll(e,s);c=t.length>0?t[0]:null,this._logDebug("extract",`_extractObject: selector "${s}" found ${t.length} elements`)}if(!c){if(this._logDebug("extract",`_extractObject: scope not found for selector "${s||""}"`),o&&t.required)throw new $(`Required object "${s||""}" is missing.`,"extract");return null}let l=t.depth??0;const u=l;for(;;){const{result:i,hasValue:r,missingRequired:h}=await A.call(this,t,c,o);if(0===h.length)return!s&&!r&&Object.keys(n).length>0?(this._logDebug("extract","_extractObject result: null"),null):(this._logDebug("extract","_extractObject result:",i),i);let f=!1;if(l>0)if(a)f=!0;else{const t=await this._isSameElement(c,e),i=await this._contains(e,c);f=!t&&i}if(f){const t=await this._parentElement(c);if(t){let i=!0;if(a||(i=await this._isSameElement(e,t)||await this._contains(e,t)),i){this._logDebug("extract",`_extractObject: missing required fields [${h.join(", ")}], bubbling up from depth ${u-l} to ${u-l+1}`),c=t,l--;continue}}}if(o)throw new $(`Required property "${h[0]}" is missing.`,"extract");return null}}async function A(t,e,i){const{properties:s,relativeTo:n,order:r}=t,o={},a=[];let c=!1;const l=r||Object.keys(s);let u=e;const h=new Map,f="previous"===n;for(const t of l){const n=s[t];if(!n)continue;this._logDebug("extract",`_extractObject: extracting property "${t}"`);let r,l=u;if(n.anchor){const r=await U.call(this,n.anchor,s,h,e,f,n.depth);if(!r){if(i)throw new $(`Anchor "${n.anchor}" not found or out of scope.`,"extract");o[t]=null,n.required&&a.push(t);continue}l=r.scopeForField,f&&(u=l)}let d=null;const w=n.selector,p="array"===n.type;if(w){let t=await this._querySelectorAll(l,w);if(t.length>0){void 0!==n.depth&&"object"!==n.type&&(t=await Promise.all(t.map(t=>N.call(this,t,l,n.depth)))),d=t[0];const e={...n,_skipSelector:!0};if(r=await S.call(this,e,p?t:d,i),f&&!n.anchor){const e=p&&Array.isArray(r)?t[t.length-1]:d;u=await F.call(this,e,u)}p&&(d=t[t.length-1])}else r=null}else r=await S.call(this,n,l,i),null!==r&&(d=Array.isArray(l)?l[0]:l);d&&h.set(t,d),null===r&&n.required&&a.push(t),null!==r&&(c=!0),o[t]=r}return{result:o,hasValue:c,missingRequired:a}}async function R(t,e,i){const{selector:s,items:n,mode:r,strict:o}=t,a=o??i,c=t._skipSelector;let l=s&&!c?await this._querySelectorAll(e,s):Array.isArray(e)?e:[e];s&&!c&&void 0!==t.depth&&(l=await Promise.all(l.map(i=>N.call(this,i,e,t.depth)))),this._logDebug("extract",`_extractArray: selector "${s||""}" found ${l.length} elements`);const u=j.call(this,r);if(void 0!==a&&void 0===u.strict&&(u.strict=a),(!r||"columnar"===u.type)&&1===l.length&&n){this._logDebug("extract","_extractArray: trying columnar extraction");const t=await M.call(this,n,l[0],u);if(t)return t}if("segmented"===u.type&&n){this._logDebug("extract",`_extractArray: trying segmented extraction for ${l.length} containers`);const t=[];let e=!1;for(const i of l){const s=await O.call(this,n,i,u);s&&(e=!0,t.push(...s))}if(e)return t}return this._logDebug("extract",`_extractArray: using nested extraction for ${l.length} elements`),P.call(this,n,l,{strict:u.strict})}async function T(t,e,i){const{selector:s}=t,n=t._skipSelector,r=t.strict??i;let o=e;if(s&&!n){const i=await this._querySelectorAll(e,s);o=i.length>0?i[0]:null,o&&void 0!==t.depth&&(o=await N.call(this,o,e,t.depth)),this._logDebug("extract",`_extractValue: selector "${s}" found ${i.length} elements`)}else Array.isArray(e)&&(o=e.length>0?e[0]:null);if(!o){if(this._logDebug("extract",`_extractValue: element not found for selector "${s||""}"`),r&&t.required)throw new $(`Required value "${s||""}" is missing.`,"extract");return null}const a=await this._extractValue(t,o);return this._logDebug("extract",`_extractValue: extracted for selector "${s||""}":`,a),a}function j(t){return t?"string"==typeof t?{type:t}:t:{type:"nested"}}async function P(t,e,i){const s=[],n=t.required,r=!0===i?.strict,o="object"===t.type||"array"===t.type;for(const i of e){const e=await S.call(this,t,i,r);if(null!==e)s.push(e);else{if(n&&r)throw new $("Required item is missing in array.","extract");n||o||s.push(null)}}return s}async function M(t,e,i){const s="object"===t.type,n=!0===i?.strict,r=!0===i?.inference;if(s){const i=t.properties,s=Object.keys(i);if(0===s.length)return null;const o={},a={};let c=null,l=0,u=[];for(const t of s){const s=i[t];if("array"===s.type||"object"===s.type)return this._logDebug("extract",`_extractColumnar: field "${t}" has nested structure, columnar not supported`),null;const h=s;let f=[];f=h.selector?await this._querySelectorAll(e,h.selector):[e],a[t]=f;const d=f.length;if(this._logDebug("extract",`_extractColumnar: field "${t}" with selector "${h.selector||""}" found ${d} matches`),d>l&&(l=d,u=f),h.selector)if(null===c)c=d,this._logDebug("extract",`_extractColumnar: set commonCount to ${c}`);else if(c!==d)if(this._logDebug("extract",`_extractColumnar: count mismatch for field "${t}": ${d} vs ${c}`),r&&l>1)c=-1,this._logDebug("extract","_extractColumnar: mismatch marked for inference");else if(n){if(!(1===d&&await this._isSameElement(f[0],e))){if(s.required&&d<c)throw new $(`Required field "${t}" is missing at index ${d}.`,"extract");throw new $(`Columnar extraction mismatch: field "${t}" has ${d} matches, but expected ${c}.`,"extract")}}const w=await Promise.all(f.map(t=>this._extractValue(h,t)));this._logDebug("extract",`_extractColumnar: field "${t}" values:`,w),o[t]=w}if(r&&-1===c&&l>1&&u.length>0){const i=[];for(const t of u){const s=await this._findContainerChild(t,e);s&&i.push(s)}const s=[];for(const t of i){await this._findClosestAncestor(t,s)||s.push(t)}if(s.length>1)return P.call(this,t,s,{strict:n})}if(l<=1)return null;if(-1===c&&n)return null;const h=n&&-1!==c?c:l,f={};if(h>1)for(const t of s){if(1===o[t].length){(!i[t].selector||await this._isSameElement(a[t][0],e))&&(f[t]=!0)}}const d=[];for(let t=0;t<h;t++){const e={};let r=!1;for(const a of s){const s=o[a],c=i[a];let l=s[t];if(f[a]&&(l=s[0]),void 0===l&&(l=null),null===l&&c.required){if(this._logDebug("extract",`_extractColumnar: skipping row ${t} because required field "${a}" is null`),n)throw new $(`Required field "${a}" is missing at index ${t}.`,"extract");r=!0;break}e[a]=l}r||d.push(e)}return d}{const i=t;if(!i.selector)return null;const s=await this._querySelectorAll(e,i.selector);if(s.length<=1)return null;const n=await Promise.all(s.map(t=>this._extractValue(i,t)));return i.required?n.filter(t=>null!==t):n}}async function O(t,e,i){if(!("object"===t.type))return null;const s=t.properties,n=Object.keys(s);if(0===n.length)return null;let r;if(i?.anchor)r=s[i.anchor]?.selector||i.anchor;else for(const t of n)if(s[t].selector){r=s[t].selector;break}if(!r)return this._logDebug("extract","_extractSegmented: no anchor selector found, falling back to nested"),null;const o=await this._querySelectorAll(e,r);if(this._logDebug("extract",`_extractSegmented: anchor selector "${r}" found ${o.length} elements`),0===o.length){if(i?.strict)throw new $(`Segmented extraction failed: no elements found for anchor selector "${r}".`,"extract");return[]}const a=[];for(let s=0;s<o.length;s++){const n=o[s],c=s>0?o[s-1]:null,l=s<o.length-1?o[s+1]:null;let u,h=n,f=null;if(c&&(f=await this._findCommonAncestor(n,c)),!f&&l)f=await this._findCommonAncestor(n,l);else if(f&&l){const t=await this._findCommonAncestor(n,l);t&&await this._contains(f,t)&&(f=t)}if(f){const t=await N.call(this,n,f,i?.depth);t&&!await this._isSameElement(t,n)&&(h=t)}else{const t=await N.call(this,n,e,i?.depth);t&&(h=t)}if(await this._isSameElement(h,n)){u=[n,...await this._nextSiblingsUntil(n,r)],this._logDebug("extract",`_extractSegmented: segment ${s} (flat) created with ${u.length} elements`)}else u=h,this._logDebug("extract",`_extractSegmented: segment ${s} (nested) identified as container element`);const d={...t};i?.relativeTo&&!d.relativeTo&&(d.relativeTo=i.relativeTo);const w=await S.call(this,d,u,i?.strict),p=t.required,y="object"===t.type||"array"===t.type;if(null!==w)a.push(w);else{if(p&&i?.strict)throw new $("Required item is missing in array.","extract");p||y||a.push(null)}}return a}async function N(t,e,i){const s=Array.isArray(e),n=s?e:[e],r=s?await this._findClosestAncestor(t,n):await this._findContainerChild(t,e);if(void 0===i||!r)return r;let o=t;for(let t=0;t<i&&!await this._isSameElement(o,r);t++){const t=await this._parentElement(o);if(!t||!await this._contains(r,t))break;o=t}return o}async function U(t,e,i,s,n,r){let o=null;if(e.hasOwnProperty(t))o=i.get(t)||null;else{const e=await this._querySelectorAll(s,t);e.length>0&&(o=e[0])}if(o){const t=[];let e=o,i=0;const n=void 0!==r?r:1e3;for(;e&&i<=n;){const n=await this._nextSiblingsUntil(e);t.push(...n);const r=await this._parentElement(e);if(!r)break;if(Array.isArray(s)?null!==await this._findClosestAncestor(r,s):await this._isSameElement(r,s))break;e=r,i++}if(t.length>0||void 0!==r)return{scopeForField:t}}return null}async function F(t,e){const i=await N.call(this,t,e);if(i){if(!Array.isArray(e))return this._nextSiblingsUntil(i);{let t=e.indexOf(i);if(-1===t)for(let s=0;s<e.length;s++)if(await this._isSameElement(e[s],i)){t=s;break}if(-1!==t)return e.slice(t+1)}}return Array.isArray(e)?e:[e]}function H(){let t=()=>{};const e=new Promise(e=>{t=e});return e.release=t,e}m.getGlobalConfig().set("persistStorage",!1);var L={scripts:["script"],styles:["style",'link[rel="stylesheet"]'],svgs:["svg"],images:["img","picture","canvas"],hidden:["[hidden]",'[style*="display:none"]','[style*="display: none"]']},B=class{constructor(){this.hdrs={},this._initializedSessions=new Set,this.pendingRequests=new Map,this.requestCounter=0,this.actionEmitter=new p,this.isPageActive=!1,this.isEngineDisposed=!1,this.navigationLock=function(){const t=H();return t.release(),t}(),this.isExecutingAction=!1,this.actionQueue=[],this.isProcessingActionLoop=!1,this.blockedTypes=new Set}static register(t){const e=t.id;if(!e)throw new Error("Engine must define static id");if(this.registry.has(e))throw new Error(`Engine id duplicated: ${e}`);this.registry.set(e,t)}static get(t){return this.registry.get(t)}static getByMode(t){for(const[e,i]of this.registry.entries())if(i.mode===t)return i}static async create(e,i){const s=d(i,e,t),n=s.engine??e.engine,r=n?this.get(n)??this.getByMode(n):null;if(r){const t=new r;return await t.initialize(e,s),t}}_logDebug(t,...e){f(this.opts?.debug,{prefix:"FetchEngine",id:this.id,category:t},...e)}_getTrimInfo(t){let{selectors:e=[],presets:i=[]}=t;"string"==typeof e&&(e=[e]),"string"==typeof i&&(i=[i]);const s=i.includes("all"),n=[...e];for(const[t,e]of Object.entries(L))(s||i.includes(t))&&n.push(...e);return{selectors:n,removeComments:s||i.includes("comments"),removeHidden:s||i.includes("hidden")}}async _extract(t,e,i){return S.call(this,t,e,i)}_normalizeArrayMode(t){return j.call(this,t)}async _extractNested(t,e,i){return P.call(this,t,e,i)}async _extractColumnar(t,e,i){return M.call(this,t,e,i)}async _extractSegmented(t,e,i){return O.call(this,t,e,i)}async buildResponse(t){const e=await this._buildResponse(t),i=e.headers["content-type"]||"";return e.contentType=i.split(";")[0].trim(),!1!==this.opts?.output?.cookies?!e.cookies&&t.session&&(e.cookies=t.session.getCookies(t.request.url)):delete e.cookies,!1!==this.opts?.output?.sessionState?this.crawler?.sessionPool&&(e.sessionState=await this.crawler.sessionPool.getState()):delete e.sessionState,this.opts?.debug&&(e.metadata={...e.metadata,mode:this.mode,engine:this.id,proxy:t.proxyInfo?.url||("string"==typeof this.opts.proxy?this.opts.proxy:Array.isArray(this.opts.proxy)?this.opts.proxy[0]:void 0)}),e}waitFor(t){return this.dispatchAction({type:"waitFor",options:t})}click(t){return this.dispatchAction({type:"click",selector:t})}mouseMove(t){return this.dispatchAction({type:"mouseMove",params:t})}mouseClick(t){return this.dispatchAction({type:"mouseClick",params:t})}keyboardType(t,e){return this.dispatchAction({type:"keyboardType",params:{text:t,delay:e}})}keyboardPress(t,e){return this.dispatchAction({type:"keyboardPress",params:{key:t,delay:e}})}fill(t,e){return this.dispatchAction({type:"fill",selector:t,value:e})}submit(t,e){return this.dispatchAction({type:"submit",selector:t,options:e})}trim(t){return this.dispatchAction({type:"trim",options:t})}pause(t){return this.dispatchAction({type:"pause",message:t})}evaluate(t){return this.dispatchAction({type:"evaluate",params:t})}extract(t){t&&"object"==typeof t&&t.schema&&(t=t.schema);const e=E(t);return this.dispatchAction({type:"extract",schema:e})}get id(){return this.constructor.id}async getState(){return{cookies:await this.cookies(),sessionState:await(this.crawler?.sessionPool?.getState())}}get mode(){return this.constructor.mode}get context(){return this.ctx}async initialize(t,e){if(this.ctx)return;w(t,e),this.ctx=t,this.opts=t,this.hdrs=function(t){const e={};if(t&&"object"==typeof t)for(const[i,s]of Object.entries(t))e[i.toLowerCase()]=s;return e}(t.headers),this._initialCookies=[...t.cookies??[]],t.internal||(t.internal={}),t.internal.engine=this,t.engine=this.mode,this.actionEmitter.setMaxListeners(100);const i=t.storage||{},s=i.persist??!1,n=this.config=new m({persistStorage:s,storageClientOptions:{persistStorage:s,...i.config},...i.config}),r=i.id||t.id;this.requestQueue=await b.open(r,{config:n});const o=this.opts?.proxy?"string"==typeof this.opts.proxy?[this.opts.proxy]:this.opts.proxy:void 0;o?.length&&(this.proxyConfiguration=new v({proxyUrls:o}));const a=await this._getSpecificCrawlerOptions(t),c=d({persistenceOptions:{enable:!0,storeId:r},persistStateKeyValueStoreId:r},t.sessionPoolOptions,{maxPoolSize:1,sessionOptions:{maxUsageCount:1e3,maxErrorScore:3}});t.sessionState&&t.cookies&&t.cookies.length>0&&console.warn('[FetchEngine] Warning: Both "sessionState" and "cookies" are provided. Explicit "cookies" will override any conflicting cookies restored from "sessionState".');const l={...d(a,{requestQueue:this.requestQueue,maxConcurrency:1,minConcurrency:1,useSessionPool:!0,persistCookiesPerSession:!0,sessionPoolOptions:c}),requestHandler:this._requestHandler.bind(this),errorHandler:this._failedRequestHandler.bind(this),failedRequestHandler:this._failedRequestHandler.bind(this)};l.preNavigationHooks||(l.preNavigationHooks=[]),l.preNavigationHooks.unshift(({crawler:t,session:e,request:i},s)=>{if(this.currentSession=e,e&&!this._initializedSessions.has(e.id)){if(this._initialCookies&&this._initialCookies.length>0){const t=this._initialCookies.map(t=>{const e={...t};return"no_restriction"===e.sameSite&&(e.sameSite="None"),e});e.setCookies(t,i.url)}this._initializedSessions.add(e.id)}});const u=this.crawler=this._createCrawler(l,n),h=this.kvStore=await g.open(r,{config:n}),f=await h.getValue(x);!t.sessionState||f&&!t.overrideSessionState||await h.setValue(x,t.sessionState),this.isCrawlerReady=!0,this.crawlerRunPromise=u.run(),this.crawlerRunPromise.finally(()=>{this.isCrawlerReady=!1}).catch(t=>{console.error("Crawler background error:",t)})}async cleanup(){await(this._cleanup?.()),await this._commonCleanup();const t=this.ctx;t&&t.internal?.engine===this&&(t.internal.engine=void 0),this.ctx=void 0,this.opts=void 0}async _processAction(t,e){switch(this._logDebug(e.type,"Executing action:",e),e.type){case"extract":return C.call(this,e.schema,this._getInitialElementScope(t));case"pause":return this._handlePause(e);case"getContent":return this.buildResponse(t);case"waitFor":return e.options?.ms&&1===Object.keys(e.options).length?void await new Promise(t=>setTimeout(t,e.options.ms)):this.executeAction(t,e);default:return this.executeAction(t,e)}}async _handlePause(t){const e=this.ctx?.onPause;e?(console.info(t.message||"Execution paused for manual intervention."),await e({message:t.message}),console.info("Resuming execution...")):console.warn("[PauseAction] was called, but no `onPause` handler was provided in fetchWeb options. Skipped.")}async _executePendingActions(t){if(this.isEngineDisposed)return;this.activeContext=t;const e=async()=>{if(!this.isProcessingActionLoop){this.isProcessingActionLoop=!0,this._logDebug("action-loop",`Action loop started. Current queue size: ${this.actionQueue.length}`);try{for(;this.actionQueue.length>0&&this.isPageActive&&!this.isEngineDisposed;){const e=this.actionQueue.shift();this._logDebug("action-loop",`Processing action: ${e.action.type}`,e.action);try{if("dispose"===e.action.type){this.actionEmitter.emit("dispose"),e.resolve();continue}this.isExecutingAction=!0;const i=await this._processAction(t,e.action);this._logDebug("action-loop",`Action completed: ${e.action.type}`),e.resolve(i)}catch(t){this._logDebug("action-loop",`Action failed: ${e.action.type}`,t),e.reject(t)}finally{this.isExecutingAction=!1,await new Promise(t=>setImmediate(t))}}}finally{this.isProcessingActionLoop=!1,this._logDebug("action-loop","Action loop paused/finished.")}}};await new Promise(t=>{const i=t=>{this.actionQueue.push(t),e()},s=()=>{this.actionEmitter.removeListener("dispatch",i),this.activeContext=void 0,t()};this.actionEmitter.on("dispatch",i),this.actionEmitter.once("dispose",s),e(),this.isEngineDisposed&&(s(),this.actionEmitter.removeListener("dispose",s))})}async _sharedRequestHandler(t){const{request:e}=t;this._logDebug("request",`Processing request: ${e.url}`);try{this.currentSession=t.session,this.isPageActive=!0;const i=this.pendingRequests.get(e.userData.requestId);if(i){const s=await this.buildResponse(t),n=!s.statusCode||s.statusCode>=400;if(this.ctx?.throwHttpErrors&&n){const t=new y(`Request for ${s.finalUrl} failed with status ${s.statusCode||"N/A"}`,"request",s.statusCode);i.reject(t)}else this.lastResponse=s,i.resolve(s);this.pendingRequests.delete(e.userData.requestId)}await this._executePendingActions(t)}finally{if(this.currentSession){const t=this.currentSession.getCookies(e.url);t&&(this._initialCookies=t)}this.isPageActive=!1,this.navigationLock.release()}}async _sharedFailedRequestHandler(t,e){const{request:i}=t,s=this.pendingRequests.get(i.userData.requestId);if(s&&e&&this.ctx?.throwHttpErrors){this.pendingRequests.delete(i.userData.requestId);const t=e.response,n=t?.statusCode||500,r=t?.url?t.url:i.url,o=new y(`Request${r?" for "+r:""} failed: ${e.message}`,"request",n);s.reject(o)}return this._sharedRequestHandler(t)}async dispatchAction(t){if(!this.isPageActive)throw new Error("No active page. Call goto() before performing actions.");return this.isExecutingAction&&this.activeContext?(this._logDebug(t.type,"Re-entrant action execution:",t),await this._processAction(this.activeContext,t)):new Promise((e,i)=>{this.actionEmitter.emit("dispatch",{action:t,resolve:e,reject:i})})}async _requestHandler(t){await this._sharedRequestHandler(t)}async _failedRequestHandler(t,e){await this._sharedFailedRequestHandler(t,e)}async _commonCleanup(){if(this.isEngineDisposed=!0,this._initializedSessions.clear(),this.actionEmitter.emit("dispose"),this.navigationLock?.release(),this.pendingRequests.size>0){for(const[,t]of this.pendingRequests)t.reject(new Error("Cleanup:Request cancelled"));this.pendingRequests.clear()}if(this.crawler){try{await(this.crawler.teardown?.())}catch(t){console.error("crawler teardown error:",t)}this.crawler=void 0}this.crawlerRunPromise=void 0,this.isCrawlerReady=void 0;const t=(this.opts?.storage||{}).purge??!0;this.requestQueue&&(t&&await this.requestQueue.drop().catch(t=>console.error("Error dropping requestQueue:",t)),this.requestQueue=void 0),this.kvStore&&(t&&await this.kvStore.drop().catch(t=>console.error("Error dropping kvStore:",t)),this.kvStore=void 0),this.actionEmitter.removeAllListeners(),this.pendingRequests.clear(),this.actionQueue=[],this.config=void 0}async blockResources(t,e){return e&&this.blockedTypes.clear(),t.forEach(t=>this.blockedTypes.add(t)),t.length}getContent(){return this.lastResponse?Promise.resolve(this.lastResponse):Promise.reject(new Error("No content fetched yet. Call goto() first."))}async headers(t,e){if(void 0===t)return{...this.hdrs};if("string"==typeof t&&void 0===e)return this.hdrs[t.toLowerCase()]||"";if(null!==t&&"object"==typeof t){const i={};for(const[e,s]of Object.entries(t))i[e.toLowerCase()]=String(s);return this.hdrs=!0===e?i:{...this.hdrs,...i},!0}return"string"==typeof t&&("string"==typeof e?this.hdrs[t.toLowerCase()]=e:null===e&&delete this.hdrs[t.toLowerCase()],!0)}async cookies(t){const e=this.lastResponse?.url||"";if(Array.isArray(t))return this.currentSession?this.currentSession.setCookies(t,e):this._initialCookies=[...t],!0;if(null===t)return this.currentSession,this._initialCookies=[],!0;if(this.currentSession){return this.currentSession.getCookies(e)}return[...this._initialCookies||[]]}async dispose(){await this.cleanup()}};async function I(t,e){let i;const s=e?.engine||t.engine;if(s&&"auto"!==s){if(i=await B.create(t,{engine:s}),!i)throw new Error(`Engine "${s}" is not available or failed to initialize.`);return i}const n=function(t,e){if(!t||!e?.length)return null;const i=new URL(t);let s=e.find(t=>t.domain===i.hostname);s||(s=e.find(t=>i.hostname.endsWith(t.domain)));if(!s)return null;if(s.pathScope?.length){if(!s.pathScope.some(t=>i.pathname.startsWith(t)))return null}return s}(e?.url||t.url,t.sites);if(n?.engine&&"auto"!==n.engine&&(i=await B.create(t,{engine:n.engine}),i))return i;if(i=await B.create(t,{engine:"http"}),!i)throw new Error("Failed to create default http engine");return i}B.registry=new Map;var V=class{constructor(t={}){this.options=t,this.closed=!1,this.id=h(),this.context=this.createContext(t)}_logDebug(t,...e){f(this.context.debug,{prefix:"FetchSession",id:this.id.slice(0,8),category:t},...e)}async execute(t,e=this.context){const i=t.id||t.name||t.action;this._logDebug("execute",`Executing action: ${i}`,t.params);const s=t.index??(e.internal.actionIndex||0);e.internal.actionIndex=s+1,await this.ensureEngine(t,e);const r=n.create(t);if(!r)throw new Error(`Unknown action: ${t.id||t.name}`);const o={...t,index:s};let a,c;e.currentAction={...o,startedAt:Date.now()};try{return a=await r.execute(e,o),a}catch(t){throw c=t,c}finally{e.currentAction=void 0}}async executeAll(t,e){this._logDebug("executeAll",`Total actions: ${t.length}`,t.map(t=>t.id||t.name||t.action));const i=e?{...this.context,...e,id:this.context.id,eventBus:this.context.eventBus,outputs:this.context.outputs,execute:this.context.execute,action:this.context.action}:this.context;let s=e?.index??0;try{for(;s<t.length;){const e=t[s];await this.execute({...e,index:s},i),s++}const e=await this.execute({id:"getContent",index:s},i);return{result:e?.result,outputs:this.getOutputs()}}catch(t){throw t.actionIndex=s,t}}getOutputs(){return this.context.outputs}async getState(){return this.context.internal.engine?.getState()}async dispose(){if(this.closed)return;const t=this.context.eventBus;t.emit("session:closing",{sessionId:this.id});try{await(this.context.internal.engine?.dispose())}finally{this.closed=!0}t.emit("session:closed",{sessionId:this.id})}async ensureEngine(t,e){if(this.closed)throw new Error("Session is closed");if(!e.internal.engine){const i=t?.params?.url??e.url,s=await I(e,{url:i});if(!s)throw new Error("No engine found");e.internal.engine=s}}createContext(e=this.options){const i=new c;return l({...e,id:this.id,eventBus:i,outputs:{},internal:{},execute:async t=>this.execute(t),action:async function(t,e,i){return this.execute({name:t,params:e,...i})}},t)}},D=class{constructor(t={}){this.defaults=t}async createSession(t){const e={...this.defaults,...t||{}};return new V(e)}async fetch(t,e){"string"!=typeof t&&(t=(e=t).url);const i=await this.createSession(e);try{const s=e?.actions||[];t&&0!==s.findIndex(e=>("goto"===e.id||"goto"===e.name)&&e.params?.url===t)&&s.unshift({id:"goto",params:{url:t}});return await i.executeAll(s)}finally{await i.dispose()}}};import{CheerioCrawler as z}from"crawlee";import*as G from"cheerio";import{newFunction as J}from"util-ex";import{CommonError as K,ErrorCode as Q,NotFoundError as W}from"@isdk/common-error";var Z="___BR___",X="___BLOCK___",Y="___P___",tt=/\s+/g,et=new RegExp(` *(${Z}|${X}|${Y}) *`,"g"),it=new RegExp(`(?:${X}|${Y})+`,"g");var st={"&amp;":"&amp;","&lt;":"&lt;","&gt;":"&gt;"},nt={"&quot;":'"',"&apos;":"'","&nbsp;":" ","&copy;":"©","&reg;":"®","&trade;":"™","&sect;":"§","&para;":"¶","&bull;":"•","&hellip;":"…","&euro;":"€","&pound;":"£","&yen;":"¥","&cent;":"¢","&curren;":"¤","&brvbar;":"¦","&uml;":"¨","&ordf;":"ª","&laquo;":"«","&raquo;":"»","&not;":"¬","&shy;":"","&macr;":"¯","&deg;":"°","&plusmn;":"±","&sup2;":"²","&sup3;":"³","&acute;":"´","&micro;":"µ","&middot;":"·","&cedil;":"¸","&sup1;":"¹","&ordm;":"º","&iquest;":"¿","&times;":"×","&divide;":"÷","&ndash;":"–","&mdash;":"—","&lsquo;":"‘","&rsquo;":"’","&sbquo;":"‚","&ldquo;":"“","&rdquo;":"”","&bdquo;":"„","&dagger;":"†","&Dagger;":"‡","&permil;":"‰","&lsaquo;":"‹","&rsaquo;":"›"};function rt(t){return t?t.replace(/&(#?[a-zA-Z0-9]+);/g,t=>{const e=t.toLowerCase();if(st[e])return t;if(nt[e])return nt[e];if(t.startsWith("&#")){const e=t.startsWith("&#x")?parseInt(t.slice(3,-1),16):parseInt(t.slice(2,-1),10);if(!isNaN(e)){if(160===e)return" ";try{return String.fromCodePoint(e)}catch(e){return t}}}return t}):t}var ot=class extends B{_ensureCheerioContext(t){if(!t.$&&t.body){let e="string"==typeof t.body?t.body:Buffer.isBuffer(t.body)?t.body.toString("utf-8"):JSON.stringify(t.body);e.trim().startsWith("<")||(e=`<html><body><pre>${e}</pre></body></html>`),t.$=G.load(e)}}async _buildResponse(t){this._ensureCheerioContext(t);const{request:e,response:i,body:s,$:n}=t,r=n?.html();let o="string"==typeof s?s:Buffer.isBuffer(s)?s.toString("utf-8"):String(s??"");r&&r!==o&&(o=r);let a=i?.headers;if(!a&&i?.rawHeaders){a={};const t=i.rawHeaders;for(let e=0;e<t.length;e+=2)a[t[e].toLowerCase()]=t[e+1]}const c={url:e.url,finalUrl:e.loadedUrl||e.url,statusCode:i?.statusCode??200,statusText:i?.statusMessage,headers:a||{},body:s,html:rt(o),text:o};if(this.opts?.debug&&i?.timings){const t=i.timings;c.metadata={timings:{start:t.start,total:t.phases?.total,ttfb:t.phases?.firstByte,dns:t.phases?.dns,tcp:t.phases?.tcp,download:t.phases?.download}}}return c}async _querySelectorAll(t,e){if(Array.isArray(t)){if(0===t.length)return[];const{$:i}=t[0],s=t.map(t=>t.el[0]).filter(Boolean),n=i(s);return n.find(e).add(n.filter(e)).toArray().map(t=>({$:i,el:i(t)}))}const{$:i,el:s}=t;return":scope"===e?[{$:i,el:s}]:s.find(e).add(s.filter(e)).toArray().map(t=>({$:i,el:i(t)}))}async _nextSiblingsUntil(t,e){const{$:i,el:s}=t;return(e?s.nextUntil(e):s.nextAll()).toArray().map(t=>({$:i,el:i(t)}))}async _parentElement(t){const{$:e,el:i}=t,s=i.parent();return 0===s.length?null:{$:e,el:s}}async _isSameElement(t,e){return t.el[0]===e.el[0]}async _findClosestAncestor(t,e){if(0===e.length)return null;const i=new Set(e.map(t=>t.el[0])),{$:s,el:n}=t;let r=n;for(;r.length>0;){if(i.has(r[0]))return{$:s,el:r};r=r.parent()}return null}async _contains(t,e){const i=t.el[0],s=e.el[0];if(i===s)return!0;const n=t.$;return"function"==typeof n.contains?n.contains(i,s):t.el.find(e.el).length>0}async _findCommonAncestor(t,e){const{$:i,el:s}=t,{el:n}=e;if(s[0]===n[0])return t;if(await this._contains(t,e))return t;if(await this._contains(e,t))return e;const r=s.parents().toArray(),o=n.parents().toArray(),a=new Set(o);for(const t of r)if(a.has(t))return{$:i,el:i(t)};return null}async _findContainerChild(t,e){const{$:i,el:s}=t,n=e.el[0];let r=s;if(r[0]===n)return t;const o=r.parents().toArray();for(let t=0;t<o.length;t++)if(o[t]===n){return{$:i,el:i(t>0?o[t-1]:s[0])}}if(n===i.root()[0]){return{$:i,el:i(o.length>0?o[o.length-1]:s[0])}}return null}async _extractValue(t,e){const{$:i,el:s}=e,{attribute:n,type:r="string",mode:o="text"}=t;if(this._logDebug("extract",`_extractValue: el.length=${s.length} schema=${JSON.stringify(t)}`),0===s.length)return null;let a="";if(n?a=s.attr(n)??null:"html"===r||"html"===o||"outerHTML"===o?(a="outerHTML"===o?i.html(s):s.html()??("html"===r?"":null),a&&(a=rt(a.trim()))):a="innerText"===o?function(t){const e=t.clone();e.find("script, style, noscript, template").remove(),e.find("[hidden]").remove(),e.find("br").replaceWith(Z),e.find("p").before(Y).after(Y),e.find("div, h1, h2, h3, h4, h5, h6, li, ul, ol, tr, dl, dt, dd, blockquote, pre, form, table, article, section, header, footer, nav, main, aside, hr, address, fieldset, figure, figcaption, details, summary").before(X).after(X);let i=e.text();return i=i.replace(tt," "),i=i.replace(et,"$1"),i=i.replace(it,t=>t.includes(Y)?Y:X),i=i.replaceAll(Z,"\n"),i=i.replaceAll(Y,"\n\n"),i=i.replaceAll(X,"\n"),i.trim()}(s):s.text().trim(),null===a)return null;switch(r){case"number":return parseFloat(a.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=a.toLowerCase();return"true"===t||"1"===t;default:return a}}_getInitialElementScope(t){const{$:e}=t;return e?{$:e,el:e.root()}:null}async executeAction(t,e){const{$:i}=t;switch(e.type){case"dispose":return;case"navigate":{const{url:i,opts:s}=e;this._logDebug("navigate",`Navigating to: ${i}`);const n=await this._requestWithRedirects(t,{url:i,method:"GET",headers:{...this.hdrs,...s?.headers}});return await this._updateStateAfterNavigation(t,n),this.lastResponse}case"mouseMove":case"mouseClick":case"keyboardType":case"keyboardPress":throw new K(`Action "${e.type}" is only supported in browser engine mode.`,e.type,"not_supported");case"click":{if(!i)throw new K(`Cheerio context not available for action: ${e.type}`,"click");const s=e.selector,n=i(s).first();let r;if(0===n.length)try{r=new URL(s,t.request.loadedUrl||t.request.url).href}catch{throw new K(`click: selector not found or invalid URL: ${s}`,"click")}else{if(!n.is("a")||!n.attr("href")){if(n.is('input[type="submit"], button[type="submit"], button, input')){const e=n.closest("form");return e.length?this.executeAction(t,{type:"submit",selector:e}):void this._logDebug("click","Button/input clicked but no form found and no JS support in http mode. Ignoring.")}throw new K(`click: unsupported element for http simulate. Selector: ${s}`,"click")}{const e=n.attr("href");r=new URL(e,t.request.loadedUrl||t.request.url).href}}const o=await t.sendRequest({url:r});return void await this._updateStateAfterNavigation(t,o)}case"fill":{if(!i)throw new K(`Cheerio context not available for action: ${e.type}`),"fill";const s=i(e.selector).first();if(0===s.length)throw new K(`fill: selector not found: ${e.selector}`);if(!s.is("input, textarea, select"))throw new K(`fill: not a form field: ${e.selector}`);return s.val(e.value),void(this.lastResponse=await this.buildResponse(t))}case"trim":{if(!i)throw new K(`Cheerio context not available for action: ${e.type}`,"trim");const{selectors:s,removeComments:n}=this._getTrimInfo(e.options);return s.forEach(t=>i(t).remove()),n&&i("*").contents().filter((t,e)=>"comment"===e.type).remove(),void(this.lastResponse=await this.buildResponse(t))}case"waitFor":return void(e.options?.ms&&await new Promise(t=>setTimeout(t,e.options.ms)));case"submit":{if(!i)throw new K(`Cheerio context not available for action: ${e.type}`,"submit");const s="string"==typeof e.selector?i(e.selector).first():null!=e.selector?e.selector:i("form").first();if(0===s.length)throw new W(e.selector,"submit");const n=s.attr("action")||t.request.loadedUrl||t.request.url,r=(s.attr("method")||"GET").toUpperCase(),o=new URL(n,t.request.loadedUrl||t.request.url).href,a={};let c;if(s.find("input, select, textarea").each((t,e)=>{const s=i(e),n=s.attr("name");if(!n)return;const r=s.val();null!=r&&(a[n]=String(r))}),"GET"===r){const e=new URL(o);Object.entries(a).forEach(([t,i])=>e.searchParams.set(t,i)),c=await this._requestWithRedirects(t,{url:e.href,method:"GET"})}else{const i=e.options?.enctype||s.attr("enctype")||"application/x-www-form-urlencoded";let n;const r={};"application/json"===i?(n=JSON.stringify(a),r["Content-Type"]="application/json"):(n=new URLSearchParams(a).toString(),r["Content-Type"]="application/x-www-form-urlencoded"),this._logDebug("submit","Submitting POST to:",o,"enctype:",i),c=await this._requestWithRedirects(t,{url:o,method:"POST",body:n,headers:r})}return await this._updateStateAfterNavigation(t,c),void this._logDebug("submit","Submit finished. Current URL:",t.request.loadedUrl||t.request.url)}case"evaluate":{const{fn:s,args:n=[]}=e.params,r=t.request.loadedUrl||t.request.url;let o=null;const a=t=>t&&0!==t.length?{textContent:t.text(),innerHTML:t.html(),outerHTML:i.html(t),getAttribute:e=>t.attr(e),matches:e=>t.is(e)}:null,c=this,l={location:{_href:r,get href(){return this._href},set href(t){if(t&&t!==this._href){this._href=t;const e=new URL(t,r).href;o=c.goto(e)}},assign(t){this.href=t},replace(t){this.href=t}}},u={getElementById:t=>a(i(`#${t}`).first()),querySelector:t=>a(i(t).first()),querySelectorAll:t=>i(t).toArray().map(t=>a(i(t))),getElementsByClassName:t=>i(`.${t}`).toArray().map(t=>a(i(t))),getElementsByTagName:t=>i(t).toArray().map(t=>a(i(t))),get body(){return a(i("body").first())},get title(){return i("title").text()}};l.document=u;const h={window:l,document:u,$:i,console:console};let f;const d=J(s,h);return f="function"==typeof d?await d(n):d,o?await o:l.location.href===r&&(this.lastResponse=await this.buildResponse(t)),f}default:throw new K(`Unknown action type: ${e.type}`,"CheerioFetchEngine.executeAction",Q.NotSupported)}}async _requestWithRedirects(t,e){let{url:i,method:s,body:n,headers:r={}}=e,o=0;let a;for(;o<=5;){if(t.session){const e=t.session.getCookieString(i);e&&(r={...r,cookie:e})}if(a=await t.sendRequest({url:i,method:s,body:n,headers:r,followRedirect:!1}),!a)break;const e=a.statusCode,c=a.headers||a.req?.res?.headers||a.res?.headers||{};if(t.session&&c["set-cookie"]&&t.session.setCookies(c["set-cookie"],i),[301,302,303,307,308].includes(e)){const t=c.location;if(!t)break;if(i=new URL(t,i).href,o++,[301,302,303].includes(e)){this._logDebug("http",`Redirect ${e} (method conversion to GET):`,i),s="GET",n=void 0;const{"content-type":t,"Content-Type":o,"content-length":a,"Content-Length":c,...l}=r;r=l}else this._logDebug("http",`Redirect ${e} (method preserved):`,i);continue}break}return a}async _updateStateAfterNavigation(t,e){const i=e;t.response=i,t.body=i.body,t.$=void 0,i.url&&(t.request.loadedUrl=i.url),this.lastResponse=await this.buildResponse(t)}_createCrawler(t,e){return new z(t,e)}_getSpecificCrawlerOptions(t){return{additionalMimeTypes:["text/plain"],maxRequestRetries:1,requestHandlerTimeoutSecs:t.requestHandlerTimeoutSecs,proxyConfiguration:this.proxyConfiguration,preNavigationHooks:[({session:e,request:i},s)=>{s.throwHttpErrors=t.throwHttpErrors,this.opts?.timeoutMs&&(s.timeout={request:this.opts.timeoutMs})}]}}async goto(t,e){if(this.isPageActive)return this.dispatchAction({type:"navigate",url:t,opts:e});const i="req-"+ ++this.requestCounter,s=new Promise((t,s)=>{const n=e?.timeoutMs||this.opts?.timeoutMs||3e4,r=setTimeout(()=>{this.pendingRequests.delete(i),this.navigationLock.release(),s(new K(`goto timed out after ${n}ms.`,"gotoTimeout",Q.RequestTimeout))},n);this.pendingRequests.set(i,{resolve:e=>{clearTimeout(r),t(e)},reject:t=>{clearTimeout(r),s(t)}})});return this.requestQueue.addRequest({...e,url:t,headers:{...this.hdrs,...e?.headers},userData:{requestId:i},uniqueKey:`${t}-${i}`}).catch(t=>{const e=this.pendingRequests.get(i);e&&(this.pendingRequests.delete(i),this.navigationLock.release(),e.reject(t))}),await this.navigationLock,this.navigationLock=H(),s}};ot.id="cheerio",ot.mode="http",B.register(ot);import{PlaywrightCrawler as at}from"crawlee";import{firefox as ct}from"playwright";import{CommonError as lt,ErrorCode as ut,NotFoundError as ht}from"@isdk/common-error";var ft=3e4,dt=class extends B{constructor(){super(...arguments),this.currentMousePos={x:0,y:0}}async _buildResponse(t){const{page:e,response:i,request:s,session:n}=t;if(!e||e.isClosed())return{url:s.url,finalUrl:s.loadedUrl||s.url,statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:"",html:"",text:""};const r=await e.content(),o=await e.textContent("body"),a=await e.context().cookies();n&&n.setCookies(a,s.url);const c={url:e.url(),finalUrl:e.url(),statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:r,html:r,text:o||""};if(this.opts?.debug&&i){const t="function"==typeof i.request?i.request():i.request;if(t&&"function"==typeof t.timing){const e=t.timing();c.metadata={timings:{start:e.startTime,total:e.responseEnd-e.startTime,ttfb:e.responseStart-e.requestStart,dns:e.domainLookupEnd-e.domainLookupStart,tcp:e.connectEnd-e.connectStart,download:e.responseEnd-e.responseStart}}}}return!1!==this.opts?.output?.cookies&&(c.cookies=a),c}async _querySelectorAll(t,e){const i=Array.isArray(t)?t:[t],s=[];for(const t of i){const i=await t.locator(e).all();s.push(...i);try{await t.evaluate((t,e)=>t.matches(e),e)}catch(t){}}const n=[];for(const t of i){let i=!1;try{i=await t.evaluate((t,e)=>t.matches(e),e)}catch{}i&&n.push(t);const s=await t.locator(e).all();n.push(...s)}return n}async _nextSiblingsUntil(t,e){const i=await t.locator("xpath=following-sibling::*").all();if(!e)return i;const s=[];for(const t of i){if(await t.evaluate((t,e)=>t.matches(e),e))break;s.push(t)}return s}async _parentElement(t){const e=t.locator("xpath=..");return 0===await e.count()?null:e.first()}async _isSameElement(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return!1;try{return await i.evaluate((t,e)=>t===e,s)}finally{await i.dispose(),await s.dispose()}}async _findClosestAncestor(t,e){if(0===e.length)return null;const i=await t.elementHandle();if(!i)return null;const s=await Promise.all(e.map(t=>t.elementHandle()));try{const t=await i.evaluate((t,e)=>{const i=new Set(e);let s=t;for(;s;){if(i.has(s))return e.indexOf(s);s=s.parentElement}return-1},s);return-1!==t?e[t]:null}finally{await i.dispose(),await Promise.all(s.map(t=>t?.dispose()))}}async _contains(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return!1;try{return await i.evaluate((t,e)=>t.contains(e),s)}finally{await i.dispose(),await s.dispose()}}async _findCommonAncestor(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return null;try{const e=await i.evaluateHandle((t,e)=>{let i=null;if(t===e)i=t;else if(t.contains(e))i=t;else if(e.contains(t))i=e;else{const s=new Set;let n=e.parentElement;for(;n;)s.add(n),n=n.parentElement;for(n=t.parentElement;n;){if(s.has(n)){i=n;break}n=n.parentElement}}return i&&1===i.nodeType?function t(e){if(e.id)return`//*[@id="${e.id}"]`;if(e===document.body)return"/html/body";if(e===document.documentElement)return"/html";let i=0;const s=e.parentNode?e.parentNode.childNodes:[];for(let n=0;n<s.length;n++){const r=s[n];if(r===e)return t(e.parentNode)+"/"+e.tagName.toLowerCase()+"["+(i+1)+"]";1===r.nodeType&&r.tagName===e.tagName&&i++}return""}(i):null},s);if(!e)return null;const n=await e.jsonValue();return"string"==typeof n&&n?t.page().locator(`xpath=${n}`):null}finally{await i.dispose(),await s.dispose()}}async _findContainerChild(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return null;try{const e=await i.evaluateHandle((t,e)=>{let i=null;if(t===e)i=t;else{let s=t;for(;s;){if(s.parentElement===e){i=s;break}s=s.parentElement}}return i&&1===i.nodeType?function t(e){if(e.id)return`//*[@id="${e.id}"]`;if(e===document.body)return"/html/body";if(e===document.documentElement)return"/html";let i=0;const s=e.parentNode?e.parentNode.childNodes:[];for(let n=0;n<s.length;n++){const r=s[n];if(r===e)return t(e.parentNode)+"/"+e.tagName.toLowerCase()+"["+(i+1)+"]";1===r.nodeType&&r.tagName===e.tagName&&i++}return""}(i):null},s);if(!e)return null;const n=await e.jsonValue();return"string"==typeof n&&n?t.page().locator(`xpath=${n}`):null}finally{await i.dispose(),await s.dispose()}}async _extractValue(t,e){const{attribute:i,type:s="string",mode:n="text"}=t,r=await e.count();if(this._logDebug("extract",`_extractValue: count=${r} schema=${JSON.stringify(t)}`),0===r)return null;let o="";if(i?o=await e.getAttribute(i):"html"===s||"html"===n||"outerHTML"===n?(o="outerHTML"===n?await e.evaluate(t=>t.outerHTML):await e.innerHTML(),o&&(o=rt(o))):o="innerText"===n?await e.innerText():await e.textContent(),null===o)return null;switch(o=o.trim(),s){case"number":return parseFloat(o.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=o.toLowerCase();return"true"===t||"1"===t;default:return o}}_getInitialElementScope(t){const{page:e}=t;return e?e.locator(":root"):null}async _waitForNavigation(t,e,i){const{page:s}=t,n=this.opts?.timeoutMs||ft;try{await s.waitForURL(t=>t.href!==e,{waitUntil:"domcontentloaded",timeout:5e3}),this._logDebug(i,"URL changed to:",s.url())}catch(t){this._logDebug(i,"No URL change detected within 5s")}await s.waitForLoadState("networkidle",{timeout:n}),this.lastResponse=await this.buildResponse(t)}_getRandomDelay(t,e=.3){const i=t*(1-e),s=t*(1+e);return Math.floor(Math.random()*(s-i+1)+i)}_getTrajectory(t,e,i=-1){const s=[];if(-1===i){const s=Math.sqrt(Math.pow(e.x-t.x,2)+Math.pow(e.y-t.y,2)),n=10*Math.random()+5;i=Math.max(5,Math.floor(s/n))}const n=t.x+(e.x-t.x)/2,r=t.y+(e.y-t.y)/2,o=n+100*(Math.random()-.5),a=r+100*(Math.random()-.5);this._logDebug("mouseMove",`Trajectory: start(${t.x},${t.y}) -> end(${e.x},${e.y}), cp(${o},${a}), steps: ${i}`);for(let n=1;n<=i;n++){const r=n/i;let c=(1-r)*(1-r)*t.x+2*(1-r)*r*o+r*r*e.x,l=(1-r)*(1-r)*t.y+2*(1-r)*r*a+r*r*e.y;n<i&&(c+=1.5*(Math.random()-.5),l+=1.5*(Math.random()-.5)),s.push({x:c,y:l})}return s}async _moveToSelector(t,e,i=-1){const{page:s}=t,n=s.locator(e).first(),r=await n.boundingBox();if(!r)throw new lt(`Selector not found or not visible for mouse movement: ${e}`,"mouseMove");const o=r.x+r.width/2,a=r.y+r.height/2,c=this._getTrajectory(this.currentMousePos,{x:o,y:a},i);for(const t of c)await s.mouse.move(t.x,t.y),(i>1||-1===i)&&await s.waitForTimeout(15*Math.random()+5);return this.currentMousePos={x:o,y:a},this.currentMousePos}async executeAction(t,e){const{page:i}=t,s=this.opts?.timeoutMs||ft;switch(e.type){case"dispose":return;case"navigate":{this._logDebug("navigate",`Navigating to: ${e.url}`);const s=await i.goto(e.url,{waitUntil:e.opts?.waitUntil||"domcontentloaded",timeout:this.opts?.timeoutMs||ft});s&&(t={...t,response:s},this._logDebug("navigate",`Navigation status: ${s.status()} for ${s.url()}`));const n=await this.buildResponse(t);return this.lastResponse=n,n}case"mouseMove":{const{x:s,y:n,selector:r,steps:o=-1}=e.params;if(r)await this._moveToSelector(t,r,o);else if(void 0!==s&&void 0!==n){const t=this._getTrajectory(this.currentMousePos,{x:s,y:n},o);for(const e of t)await i.mouse.move(e.x,e.y),(o>1||-1===o)&&await i.waitForTimeout(15*Math.random()+5);this.currentMousePos={x:s,y:n}}return}case"mouseClick":{const{x:s,y:n,selector:r,button:o="left",clickCount:a=1,delay:c=0}=e.params;return r?(await this._moveToSelector(t,r,-1),await i.mouse.click(this.currentMousePos.x,this.currentMousePos.y,{button:o,clickCount:a,delay:this._getRandomDelay(c||50,.2)})):void 0!==s&&void 0!==n?(await i.mouse.click(s,n,{button:o,clickCount:a,delay:this._getRandomDelay(c||50,.2)}),this.currentMousePos={x:s,y:n}):await i.mouse.click(this.currentMousePos.x,this.currentMousePos.y,{button:o,clickCount:a,delay:this._getRandomDelay(c||50,.2)}),await i.waitForTimeout(this._getRandomDelay(100,.5)),void(this.lastResponse=await this.buildResponse(t))}case"keyboardType":{const{text:s,delay:n=150}=e.params;return await i.keyboard.type(s,{delay:this._getRandomDelay(n)}),void(this.lastResponse=await this.buildResponse(t))}case"keyboardPress":{const{key:s,delay:n=50}=e.params;return await i.keyboard.press(s,{delay:this._getRandomDelay(n)}),void(this.lastResponse=await this.buildResponse(t))}case"click":{this._logDebug("click","Clicking selector:",e.selector);const n=i.url();return await i.click(e.selector,{timeout:s}),void await this._waitForNavigation(t,n,"click")}case"fill":await i.fill(e.selector,e.value,{timeout:s});const n=await this.buildResponse(t);return void(this.lastResponse=n);case"trim":{const s=this._getTrimInfo(e.options);return await i.evaluate(t=>{const{selectors:e,removeComments:i,removeHidden:s}=t;if(e.forEach(t=>{document.querySelectorAll(t).forEach(t=>t.remove())}),s){const t=[];document.querySelectorAll("*").forEach(e=>{const i=window.getComputedStyle(e);"none"!==i.display&&"hidden"!==i.visibility||t.push(e)}),t.forEach(t=>t.remove())}if(i){const t=document.createNodeIterator(document,NodeFilter.SHOW_COMMENT),e=[];let i;for(;i=t.nextNode();)e.push(i);e.forEach(t=>t.parentElement?.removeChild(t))}},s),void(this.lastResponse=await this.buildResponse(t))}case"waitFor":try{e.options?.selector&&await i.waitForSelector(e.options.selector,{timeout:s}),e.options?.networkIdle&&await i.waitForLoadState("networkidle",{timeout:s}),e.options?.ms&&await i.waitForTimeout(this._getRandomDelay(e.options.ms,.1))}catch(t){if(!1!==e.options?.failOnTimeout)throw t}return;case"submit":{const s=e.selector||"form",n=i.locator(s).first();if(0===await n.count())throw new ht(s,"submit");if("application/json"===(e.options?.enctype||"application/x-www-form-urlencoded")){const t=await n.elementHandle();if(!t)throw new lt(`submit: could not get form handle for ${s}`,"submit");const e=await t.evaluate(async t=>{const e=new FormData(t),i={};e.forEach((t,e)=>{i[e]=t.toString()});const s=await fetch(t.action,{method:t.method,headers:{"Content-Type":"application/json"},body:JSON.stringify(i)}),n=await s.text();return{status:s.status,statusText:s.statusText,headers:Object.fromEntries(s.headers.entries()),body:n,html:n,text:n,url:t.action,finalUrl:s.url}});return await t.dispose(),await i.setContent(e.html),void(this.lastResponse=e)}{this._logDebug("submit","Submitting form...");const e=i.url();return await n.evaluate(t=>t.submit()),void await this._waitForNavigation(t,e,"submit")}}case"evaluate":{const{fn:n,args:r=[]}=e.params,o=i.url();let a;if(a="function"==typeof n?await i.evaluate(n,r):await i.evaluate(([t,e])=>{const i=(0,eval)(`(${t})`);return"function"==typeof i?i(e):i},[n,r]),i.url()!==o)await i.waitForLoadState("domcontentloaded",{timeout:s}).catch(()=>{}),this.lastResponse=await this.buildResponse(t);else try{this.lastResponse=await this.buildResponse(t)}catch(e){await i.waitForLoadState("domcontentloaded",{timeout:s}).catch(()=>{}),this.lastResponse=await this.buildResponse(t)}return a}default:throw new lt(`Unknown action type: ${e.type}`,"PlaywrightFetchEngine.executeAction",ut.NotSupported)}}_createCrawler(t,e){return new at(t,e)}async _getSpecificCrawlerOptions(t){const e=t.browser?.headless??!0,i={maxRequestRetries:t.retries||3,headless:e,proxyConfiguration:this.proxyConfiguration,requestHandlerTimeoutSecs:t.requestHandlerTimeoutSecs,preNavigationHooks:[async({page:e,request:i},s)=>{s.throwHttpErrors=t.throwHttpErrors;const n=this.blockedTypes;n.size>0&&await e.route("**/*",t=>{n.has(t.request().resourceType())?t.abort():t.continue()})}]},s=t.browser?.launchOptions||{};if(this.opts?.antibot){i.browserPoolOptions={useFingerprints:!1};const{launchOptions:t}=await import("camoufox-js"),n=await t({headless:e,...s});i.launchContext={launcher:ct,launchOptions:n},i.postNavigationHooks=[async({page:t,handleCloudflareChallenge:e})=>{await e()}]}else Object.keys(s).length>0&&(i.launchContext={launchOptions:s});return i}async goto(t,e){if(this.isPageActive)return this.dispatchAction({type:"navigate",url:t,opts:e});if(!this.requestQueue)throw new lt("RequestQueue not initialized","goto");const i="req-"+ ++this.requestCounter,s=new Promise((t,e)=>{this.pendingRequests.set(i,{resolve:t,reject:e})});return await this.requestQueue.addRequest({url:t,headers:this.hdrs,userData:{requestId:i,waitUntil:e?.waitUntil||"domcontentloaded"},uniqueKey:`${t}-${i}`}),s}};dt.id="playwright",dt.mode="browser",B.register(dt);var wt=class extends n{async onExecute(t,e){const{selector:i,...s}=e?.params||{};if(!i)throw new Error("Selector is required for click action");await this.delegateToEngine(t,"click",i,s)}};wt.id="click",wt.returnType="none",wt.capabilities={http:"simulate",browser:"native"},n.register(wt);var pt=class extends n{async onExecute(t,e){const{selector:i,value:s,...n}=e?.params||{};if(!i)throw new Error("Selector is required for fill action");if(void 0===s)throw new Error("Value is required for fill action");await this.delegateToEngine(t,"fill",i,s,n)}};pt.id="fill",pt.returnType="none",pt.capabilities={http:"simulate",browser:"native"},n.register(pt);var yt=class extends n{async onExecute(t,e){return await this.delegateToEngine(t,"getContent",e?.params)}};yt.id="getContent",yt.returnType="response",yt.capabilities={http:"native",browser:"native"},n.register(yt);var mt=class extends n{async onExecute(t,e,i){const s=e?.params,n=s?.url||t.url;if(!n)throw new Error("URL is required for goto action");const r=t.internal.engine;if(!r)throw new Error("No engine available");t.url=n;return await r.goto(n,s)}};mt.id="goto",mt.returnType="response",mt.capabilities={http:"native",browser:"native"},n.register(mt);var gt=class extends n{async onExecute(t,e){const{selector:i,...s}=e?.params||{};await this.delegateToEngine(t,"submit",i,s)}};gt.id="submit",gt.returnType="none",gt.capabilities={http:"simulate",browser:"native"},n.register(gt);var xt=class extends n{async onExecute(t,e){const i=t.internal.engine;if(!i)throw new Error("No engine available");await i.waitFor(e?.params)}};xt.id="waitFor",xt.returnType="none",xt.capabilities={http:"native",browser:"native"},n.register(xt);var bt=class extends n{async onExecute(t,e){const i=e?.params;if(!i)throw new Error("Schema is required for extract action");return this.delegateToEngine(t,"extract",i)}};bt.id="extract",bt.returnType="any",bt.capabilities={http:"native",browser:"native"},n.register(bt);var vt=class extends n{async onExecute(t,e){const{selector:i,message:s,attribute:n}=e?.params||{},r=t.internal.engine;if("browser"===r?.mode){if(i){if(!await(r?.extract({selector:i,attribute:n})))return}r&&"pause"in r?await r.pause(s):console.warn("[PauseAction] was called, but the current engine does not support `pause`. Skipped.")}else console.warn("[PauseAction] can only run in browser engine. Skipped.")}};vt.id="pause",vt.capabilities={http:"native",browser:"native"},vt.returnType="none",n.register(vt);var $t=class extends n{async onExecute(t,e){const i=e?.params||{};await this.delegateToEngine(t,"trim",i)}};$t.id="trim",$t.returnType="none",$t.capabilities={http:"simulate",browser:"native"},n.register($t);var _t=class extends n{async onExecute(t,e){const i=e?.params;if(!i)throw new Error("evaluate action: params is required");return await this.delegateToEngine(t,"evaluate",i)}};_t.id="evaluate",_t.returnType="any",_t.capabilities={http:"simulate",browser:"native"},n.register(_t);var kt=class extends n{async onExecute(t,e){const i=e?.params;await this.delegateToEngine(t,"mouseMove",i)}};kt.id="mouseMove",kt.returnType="none",kt.capabilities={http:"noop",browser:"native"};var Et=class extends n{async onExecute(t,e){const i=e?.params;await this.delegateToEngine(t,"mouseClick",i)}};Et.id="mouseClick",Et.returnType="none",Et.capabilities={http:"noop",browser:"native"},n.register(kt),n.register(Et);var Ct=class extends n{async onExecute(t,e){const i=e?.params;if(!i?.text)throw new Error("text is required for keyboardType action");await this.delegateToEngine(t,"keyboardType",i.text,i.delay)}};Ct.id="keyboardType",Ct.returnType="none",Ct.capabilities={http:"noop",browser:"native"};var St=class extends n{async onExecute(t,e){const i=e?.params;if(!i?.key)throw new Error("key is required for keyboardPress action");await this.delegateToEngine(t,"keyboardPress",i.key,i.delay)}};async function qt(t,e){return(new D).fetch(t,e)}St.id="keyboardPress",St.returnType="none",St.capabilities={http:"noop",browser:"native"},n.register(Ct),n.register(St);export{ot as CheerioFetchEngine,wt as ClickAction,t as DefaultFetcherProperties,_t as EvaluateAction,bt as ExtractAction,n as FetchAction,i as FetchActionResultStatus,B as FetchEngine,V as FetchSession,e as FetcherOptionKeys,pt as FillAction,yt as GetContentAction,mt as GotoAction,St as KeyboardPressAction,Ct as KeyboardTypeAction,Et as MouseClickAction,kt as MouseMoveAction,vt as PauseAction,dt as PlaywrightFetchEngine,gt as SubmitAction,L as TRIM_PRESETS,$t as TrimAction,xt as WaitForAction,D as WebFetcher,qt as fetchWeb};
1
+ var t=(t=>(t[t.Failed=0]="Failed",t[t.Success=1]="Success",t[t.Skipped=2]="Skipped",t))(t||{}),e=class extends Error{constructor(t){super(`Engine upgrade requested for status ${t.statusCode}`),this.res=t,this.code="ENGINE_UPGRADE_REQUIRED",this.name="EngineUpgradeError"}},i={engine:"auto",enableSmart:!0,syncStateOnUpgrade:!1,upgradeThresholdMs:5e3,useSiteRegistry:!0,antibot:!1,debug:!1,headers:{},cookies:[],throwHttpErrors:void 0,output:{cookies:!0,sessionState:!0},proxy:[],blockResources:[],storage:{purge:!0},ignoreSslErrors:!0,browser:{engine:"playwright",headless:!0,waitUntil:"domcontentloaded"},http:{method:"GET"},timeoutMs:6e4,requestHandlerTimeoutSecs:void 0,maxConcurrency:1,maxRequestsPerMinute:1e3,delayBetweenRequestsMs:0,retries:0,sites:[]},s=Object.keys(i).concat(["actions","onPause"]);import{defaultsDeep as n,merge as r}from"lodash-es";import{EventEmitter as o}from"events-ex";import{CommonError as a,ErrorCode as c}from"@isdk/common-error";import{Configuration as l,KeyValueStore as u,PERSIST_STATE_KEY as h,RequestQueue as f,ProxyConfiguration as d}from"crawlee";import{CommonError as w}from"@isdk/common-error";var p=new Set(["string","number","boolean","html","object","array"]),y=new Set(["selector","has","exclude","required","strict","relativeTo","order","anchor","depth"]);function m(t){if("string"==typeof t)return{type:"string",selector:t,mode:"text"};if(!t||"object"!=typeof t)return{type:"string",mode:"text"};let e={...t};if(function(t){if(!t||"object"!=typeof t)return!1;if(Array.isArray(t))return!1;if("type"in t)return"string"!=typeof t.type||!p.has(t.type);const e=Object.keys(t);if(0===e.length)return!1;for(const t of e)if(!["selector","attribute","has","exclude","mode","required","strict","relativeTo","order","anchor","depth"].includes(t))return!0;return!1}(e)){const t={};for(const i of Object.keys(e))y.has(i)||(t[i]=m(e[i]),delete e[i]);e.type="object",e.properties=t}if(e.type||(e.type="string"),"object"===e.type){const t=e;t.properties||(t.properties={});for(const e in t.properties)t.properties[e]=m(t.properties[e]);delete t.mode,delete t.items,delete t.attribute}else if("array"===e.type){const t=e;t.attribute&&!t.items&&(t.items={type:"string",attribute:t.attribute,mode:"text"},delete t.attribute),t.items||(t.items={type:"string",mode:"text"}),t.items=m(t.items),"string"==typeof t.mode&&(t.mode={type:t.mode})}else{const t=e;t.mode||("html"===t.type?t.mode="html":t.mode="text")}if(e.selector&&(e.has||e.exclude)){const{selector:t,has:i,exclude:s}=e,n=t.split(",").map(t=>t.trim()).map(t=>{let e=t;return i&&(e+=`:has(${i})`),s&&(e+=`:not(${s})`),e});e.selector=n.join(", "),delete e.has,delete e.exclude}return e}async function g(t,e,i){const s=m(t);return x.call(this,s,e,i)}async function x(t,e,i){const s=t.type,n=t.selector,r=t.strict??i;if(!e)return this._logDebug("extract",`_extract: No scope for selector "${n||""}", type "${s||"value"}"`),"array"===s?[]:null;switch(s){case"object":return b.call(this,t,e,r);case"array":return $.call(this,t,e,r);default:return _.call(this,t,e,r)}}async function b(t,e,i){const{selector:s,properties:n,strict:r}=t,o=r??i,a=t._skipSelector;let c=e;if(s&&!a){const t=await this._querySelectorAll(e,s);c=t.length>0?t[0]:null,this._logDebug("extract",`_extractObject: selector "${s}" found ${t.length} elements`)}if(!c){if(this._logDebug("extract",`_extractObject: scope not found for selector "${s||""}"`),o&&t.required)throw new w(`Required object "${s||""}" is missing.`,"extract");return null}let l=t.depth??0;const u=l;for(;;){const{result:i,hasValue:r,missingRequired:h}=await v.call(this,t,c,o);if(0===h.length)return!s&&!r&&Object.keys(n).length>0?(this._logDebug("extract","_extractObject result: null"),null):(this._logDebug("extract","_extractObject result:",i),i);let f=!1;if(l>0)if(a)f=!0;else{const t=await this._isSameElement(c,e),i=await this._contains(e,c);f=!t&&i}if(f){const t=await this._parentElement(c);if(t){let i=!0;if(a||(i=await this._isSameElement(e,t)||await this._contains(e,t)),i){this._logDebug("extract",`_extractObject: missing required fields [${h.join(", ")}], bubbling up from depth ${u-l} to ${u-l+1}`),c=t,l--;continue}}}if(o)throw new w(`Required property "${h[0]}" is missing.`,"extract");return null}}async function v(t,e,i){const{properties:s,relativeTo:n,order:r}=t,o={},a=[];let c=!1;const l=r||Object.keys(s);let u=e;const h=new Map,f="previous"===n;for(const t of l){const n=s[t];if(!n)continue;this._logDebug("extract",`_extractObject: extracting property "${t}"`);let r,l=u;if(n.anchor){const r=await R.call(this,n.anchor,s,h,e,f,n.depth);if(!r){if(i)throw new w(`Anchor "${n.anchor}" not found or out of scope.`,"extract");o[t]=null,n.required&&a.push(t);continue}l=r.scopeForField,f&&(u=l)}let d=null;const p=n.selector,y="array"===n.type;if(p){let t=await this._querySelectorAll(l,p);if(t.length>0){void 0!==n.depth&&"object"!==n.type&&(t=await Promise.all(t.map(t=>C.call(this,t,l,n.depth)))),d=t[0];const e={...n,_skipSelector:!0};if(r=await x.call(this,e,y?t:d,i),f&&!n.anchor){const e=y&&Array.isArray(r)?t[t.length-1]:d;u=await A.call(this,e,u)}y&&(d=t[t.length-1])}else r=null}else r=await x.call(this,n,l,i),null!==r&&(d=Array.isArray(l)?l[0]:l);d&&h.set(t,d),null===r&&n.required&&a.push(t),null!==r&&(c=!0),o[t]=r}return{result:o,hasValue:c,missingRequired:a}}async function $(t,e,i){const{selector:s,items:n,mode:r,strict:o}=t,a=o??i,c=t._skipSelector;let l=s&&!c?await this._querySelectorAll(e,s):Array.isArray(e)?e:[e];s&&!c&&void 0!==t.depth&&(l=await Promise.all(l.map(i=>C.call(this,i,e,t.depth)))),this._logDebug("extract",`_extractArray: selector "${s||""}" found ${l.length} elements`);const u=E.call(this,r);if(void 0!==a&&void 0===u.strict&&(u.strict=a),(!r||"columnar"===u.type)&&1===l.length&&n){this._logDebug("extract","_extractArray: trying columnar extraction");const t=await S.call(this,n,l[0],u);if(t)return t}if("segmented"===u.type&&n){this._logDebug("extract",`_extractArray: trying segmented extraction for ${l.length} containers`);const t=[];let e=!1;for(const i of l){const s=await q.call(this,n,i,u);s&&(e=!0,t.push(...s))}if(e)return t}return this._logDebug("extract",`_extractArray: using nested extraction for ${l.length} elements`),k.call(this,n,l,{strict:u.strict})}async function _(t,e,i){const{selector:s}=t,n=t._skipSelector,r=t.strict??i;let o=e;if(s&&!n){const i=await this._querySelectorAll(e,s);o=i.length>0?i[0]:null,o&&void 0!==t.depth&&(o=await C.call(this,o,e,t.depth)),this._logDebug("extract",`_extractValue: selector "${s}" found ${i.length} elements`)}else Array.isArray(e)&&(o=e.length>0?e[0]:null);if(!o){if(this._logDebug("extract",`_extractValue: element not found for selector "${s||""}"`),r&&t.required)throw new w(`Required value "${s||""}" is missing.`,"extract");return null}const a=await this._extractValue(t,o);return this._logDebug("extract",`_extractValue: extracted for selector "${s||""}":`,a),a}function E(t){return t?"string"==typeof t?{type:t}:t:{type:"nested"}}async function k(t,e,i){const s=[],n=t.required,r=!0===i?.strict,o="object"===t.type||"array"===t.type;for(const i of e){const e=await x.call(this,t,i,r);if(null!==e)s.push(e);else{if(n&&r)throw new w("Required item is missing in array.","extract");n||o||s.push(null)}}return s}async function S(t,e,i){const s="object"===t.type,n=!0===i?.strict,r=!0===i?.inference;if(s){const i=t.properties,s=Object.keys(i);if(0===s.length)return null;const o={},a={};let c=null,l=0,u=[];for(const t of s){const s=i[t];if("array"===s.type||"object"===s.type)return this._logDebug("extract",`_extractColumnar: field "${t}" has nested structure, columnar not supported`),null;const h=s;let f=[];f=h.selector?await this._querySelectorAll(e,h.selector):[e],a[t]=f;const d=f.length;if(this._logDebug("extract",`_extractColumnar: field "${t}" with selector "${h.selector||""}" found ${d} matches`),d>l&&(l=d,u=f),h.selector)if(null===c)c=d,this._logDebug("extract",`_extractColumnar: set commonCount to ${c}`);else if(c!==d)if(this._logDebug("extract",`_extractColumnar: count mismatch for field "${t}": ${d} vs ${c}`),r&&l>1)c=-1,this._logDebug("extract","_extractColumnar: mismatch marked for inference");else if(n){if(!(1===d&&await this._isSameElement(f[0],e))){if(s.required&&d<c)throw new w(`Required field "${t}" is missing at index ${d}.`,"extract");throw new w(`Columnar extraction mismatch: field "${t}" has ${d} matches, but expected ${c}.`,"extract")}}const p=await Promise.all(f.map(t=>this._extractValue(h,t)));this._logDebug("extract",`_extractColumnar: field "${t}" values:`,p),o[t]=p}if(r&&-1===c&&l>1&&u.length>0){const i=[];for(const t of u){const s=await this._findContainerChild(t,e);s&&i.push(s)}const s=[];for(const t of i){await this._findClosestAncestor(t,s)||s.push(t)}if(s.length>1)return k.call(this,t,s,{strict:n})}if(l<=1)return null;if(-1===c&&n)return null;const h=n&&-1!==c?c:l,f={};if(h>1)for(const t of s){if(1===o[t].length){(!i[t].selector||await this._isSameElement(a[t][0],e))&&(f[t]=!0)}}const d=[];for(let t=0;t<h;t++){const e={};let r=!1;for(const a of s){const s=o[a],c=i[a];let l=s[t];if(f[a]&&(l=s[0]),void 0===l&&(l=null),null===l&&c.required){if(this._logDebug("extract",`_extractColumnar: skipping row ${t} because required field "${a}" is null`),n)throw new w(`Required field "${a}" is missing at index ${t}.`,"extract");r=!0;break}e[a]=l}r||d.push(e)}return d}{const i=t;if(!i.selector)return null;const s=await this._querySelectorAll(e,i.selector);if(s.length<=1)return null;const n=await Promise.all(s.map(t=>this._extractValue(i,t)));return i.required?n.filter(t=>null!==t):n}}async function q(t,e,i){if(!("object"===t.type))return null;const s=t.properties,n=Object.keys(s);if(0===n.length)return null;let r;if(i?.anchor)r=s[i.anchor]?.selector||i.anchor;else for(const t of n)if(s[t].selector){r=s[t].selector;break}if(!r)return this._logDebug("extract","_extractSegmented: no anchor selector found, falling back to nested"),null;const o=await this._querySelectorAll(e,r);if(this._logDebug("extract",`_extractSegmented: anchor selector "${r}" found ${o.length} elements`),0===o.length){if(i?.strict)throw new w(`Segmented extraction failed: no elements found for anchor selector "${r}".`,"extract");return[]}const a=[];for(let s=0;s<o.length;s++){const n=o[s],c=s>0?o[s-1]:null,l=s<o.length-1?o[s+1]:null;let u,h=n,f=null;if(c&&(f=await this._findCommonAncestor(n,c)),!f&&l)f=await this._findCommonAncestor(n,l);else if(f&&l){const t=await this._findCommonAncestor(n,l);t&&await this._contains(f,t)&&(f=t)}if(f){const t=await C.call(this,n,f,i?.depth);t&&!await this._isSameElement(t,n)&&(h=t)}else{const t=await C.call(this,n,e,i?.depth);t&&(h=t)}if(await this._isSameElement(h,n)){u=[n,...await this._nextSiblingsUntil(n,r)],this._logDebug("extract",`_extractSegmented: segment ${s} (flat) created with ${u.length} elements`)}else u=h,this._logDebug("extract",`_extractSegmented: segment ${s} (nested) identified as container element`);const d={...t};i?.relativeTo&&!d.relativeTo&&(d.relativeTo=i.relativeTo);const p=await x.call(this,d,u,i?.strict),y=t.required,m="object"===t.type||"array"===t.type;if(null!==p)a.push(p);else{if(y&&i?.strict)throw new w("Required item is missing in array.","extract");y||m||a.push(null)}}return a}async function C(t,e,i){const s=Array.isArray(e),n=s?e:[e],r=s?await this._findClosestAncestor(t,n):await this._findContainerChild(t,e);if(void 0===i||!r)return r;let o=t;for(let t=0;t<i&&!await this._isSameElement(o,r);t++){const t=await this._parentElement(o);if(!t||!await this._contains(r,t))break;o=t}return o}async function R(t,e,i,s,n,r){let o=null;if(e.hasOwnProperty(t))o=i.get(t)||null;else{const e=await this._querySelectorAll(s,t);e.length>0&&(o=e[0])}if(o){const t=[];let e=o,i=0;const n=void 0!==r?r:1e3;for(;e&&i<=n;){const n=await this._nextSiblingsUntil(e);t.push(...n);const r=await this._parentElement(e);if(!r)break;if(Array.isArray(s)?null!==await this._findClosestAncestor(r,s):await this._isSameElement(r,s))break;e=r,i++}if(t.length>0||void 0!==r)return{scopeForField:t}}return null}async function A(t,e){const i=await C.call(this,t,e);if(i){if(!Array.isArray(e))return this._nextSiblingsUntil(i);{let t=e.indexOf(i);if(-1===t)for(let s=0;s<e.length;s++)if(await this._isSameElement(e[s],i)){t=s;break}if(-1!==t)return e.slice(t+1)}}return Array.isArray(e)?e:[e]}function T(){let t=()=>{};const e=new Promise(e=>{t=e});return e.release=t,e}import{customAlphabet as M}from"nanoid";var P=M("0123456789abcdefghijklmnopqrstuvwxyz",12);function j(t){const e=t["retry-after"]||t["Retry-After"]||t["RETRY-AFTER"];if(!e)return null;const i=parseInt(e,10);if(!isNaN(i))return 1e3*i;const s=Date.parse(e);if(!isNaN(s)){const t=s-Date.now();return t>0?t:0}return null}function N(t,e,...i){if(!t)return;const{prefix:s,id:n,category:r}=e;if(!0===t||t===r||Array.isArray(t)&&t.includes(r)){const t=n?`:${n}`:"";console.log(`[${s}${t}:${r}]`,...i)}}l.getGlobalConfig().set("persistStorage",!1);var O={scripts:["script"],styles:["style",'link[rel="stylesheet"]'],svgs:["svg"],images:["img","picture","canvas"],hidden:["[hidden]",'[style*="display:none"]','[style*="display: none"]']},U=class{constructor(){this.hdrs={},this._initializedSessions=new Set,this.pendingRequests=new Map,this.requestCounter=0,this.actionEmitter=new o,this.isPageActive=!1,this.isEngineDisposed=!1,this.navigationLock=function(){const t=T();return t.release(),t}(),this.isExecutingAction=!1,this.actionQueue=[],this.isProcessingActionLoop=!1,this.blockedTypes=new Set}static register(t){const e=t.id;if(!e)throw new Error("Engine must define static id");if(this.registry.has(e))throw new Error(`Engine id duplicated: ${e}`);this.registry.set(e,t)}static get(t){return this.registry.get(t)}static getByMode(t){for(const[e,i]of this.registry.entries())if(i.mode===t)return i}static async create(t,e){const s=n(e,t,i),r=s.engine??t.engine,o=r?this.get(r)??this.getByMode(r):null;if(o){const e=new o;return await e.initialize(t,s),e}}_logDebug(t,...e){N(this.opts?.debug,{prefix:"FetchEngine",id:this.id,category:t},...e)}_getTrimInfo(t){let{selectors:e=[],presets:i=[]}=t;"string"==typeof e&&(e=[e]),"string"==typeof i&&(i=[i]);const s=i.includes("all"),n=[...e];for(const[t,e]of Object.entries(O))(s||i.includes(t))&&n.push(...e);return{selectors:n,removeComments:s||i.includes("comments"),removeHidden:s||i.includes("hidden")}}async _extract(t,e,i){return x.call(this,t,e,i)}_normalizeArrayMode(t){return E.call(this,t)}async _extractNested(t,e,i){return k.call(this,t,e,i)}async _extractColumnar(t,e,i){return S.call(this,t,e,i)}async _extractSegmented(t,e,i){return q.call(this,t,e,i)}async buildResponse(t){const e=await this._buildResponse(t),i=e.headers["content-type"]||"";return e.contentType=i.split(";")[0].trim(),!1!==this.opts?.output?.cookies?!e.cookies&&t.session&&(e.cookies=t.session.getCookies(t.request.url||e.url)):delete e.cookies,!1!==this.opts?.output?.sessionState?this.crawler?.sessionPool&&(e.sessionState=await this.crawler.sessionPool.getState()):delete e.sessionState,e.metadata={...e.metadata,mode:this.mode,engine:this.id},this.opts?.debug&&(e.metadata={...e.metadata,proxy:t.proxyInfo?.url||("string"==typeof this.opts.proxy?this.opts.proxy:Array.isArray(this.opts.proxy)?this.opts.proxy[0]:void 0)}),e}waitFor(t){return this.dispatchAction({type:"waitFor",options:t})}click(t){return this.dispatchAction({type:"click",selector:t})}mouseMove(t){return this.dispatchAction({type:"mouseMove",params:t})}mouseClick(t){return this.dispatchAction({type:"mouseClick",params:t})}mouseWheel(t){return this.dispatchAction({type:"mouseWheel",params:t})}scrollIntoView(t){return this.dispatchAction({type:"scrollIntoView",params:t})}keyboardType(t,e){return this.dispatchAction({type:"keyboardType",params:{text:t,delay:e}})}keyboardPress(t,e){return this.dispatchAction({type:"keyboardPress",params:{key:t,delay:e}})}fill(t,e){return this.dispatchAction({type:"fill",selector:t,value:e})}submit(t,e){return this.dispatchAction({type:"submit",selector:t,options:e})}trim(t){return this.dispatchAction({type:"trim",options:t})}pause(t){return this.dispatchAction({type:"pause",message:t})}evaluate(t){return this.dispatchAction({type:"evaluate",params:t})}extract(t){t&&"object"==typeof t&&t.schema&&(t=t.schema);const e=m(t);return this.dispatchAction({type:"extract",schema:e})}get id(){return this.constructor.id}async getState(){return{cookies:await this.cookies(),sessionState:await(this.crawler?.sessionPool?.getState())}}get mode(){return this.constructor.mode}get context(){return this.ctx}async initialize(t,e){if(this.ctx)return;r(t,e),this.ctx=t,this.opts=t,this.hdrs=function(t){const e={};if(t&&"object"==typeof t)for(const[i,s]of Object.entries(t))e[i.toLowerCase()]=s;return e}(t.headers),this._initialCookies=[...t.cookies??[]],t.internal||(t.internal={}),t.internal.engine=this,t.engine=this.mode,this.actionEmitter.setMaxListeners(100);const i=t.storage||{},s=i.persist??!1,o=this.config=new l({persistStorage:s,storageClientOptions:{persistStorage:s,...i.config},...i.config}),a=i.id||t.id;this.requestQueue=await f.open(a,{config:o});const c=this.opts?.proxy?"string"==typeof this.opts.proxy?[this.opts.proxy]:this.opts.proxy:void 0;c?.length&&(this.proxyConfiguration=new d({proxyUrls:c}));const w=await this._getSpecificCrawlerOptions(t),p=n({persistenceOptions:{enable:!0,storeId:a},persistStateKeyValueStoreId:a},t.sessionPoolOptions,{maxPoolSize:1,sessionOptions:{maxUsageCount:1e3,maxErrorScore:3}});t.sessionState&&t.cookies&&t.cookies.length>0&&console.warn('[FetchEngine] Warning: Both "sessionState" and "cookies" are provided. Explicit "cookies" will override any conflicting cookies restored from "sessionState".');const y={...n(w,{requestQueue:this.requestQueue,maxConcurrency:1,minConcurrency:1,useSessionPool:!0,persistCookiesPerSession:!0,sessionPoolOptions:p}),requestHandler:this._requestHandler.bind(this),errorHandler:this._failedRequestHandler.bind(this),failedRequestHandler:this._failedRequestHandler.bind(this)};y.preNavigationHooks||(y.preNavigationHooks=[]),y.preNavigationHooks.unshift(({crawler:t,session:e,request:i},s)=>{if(this.currentSession=e,e&&!this._initializedSessions.has(e.id)){if(this._initialCookies&&this._initialCookies.length>0){const t=this._initialCookies.map(t=>{const e={...t};return"no_restriction"===e.sameSite&&(e.sameSite="None"),e});e.setCookies(t,i.url)}this._initializedSessions.add(e.id)}});const m=this.crawler=this._createCrawler(y,o),g=this.kvStore=await u.open(a,{config:o}),x=await g.getValue(h);!t.sessionState||x&&!t.overrideSessionState||await g.setValue(h,t.sessionState),this.isCrawlerReady=!0,this.crawlerRunPromise=m.run(),this.crawlerRunPromise.finally(()=>{this.isCrawlerReady=!1}).catch(t=>{console.error("Crawler background error:",t)})}async cleanup(){await(this._cleanup?.()),await this._commonCleanup();const t=this.ctx;t&&t.internal?.engine===this&&(t.internal.engine=void 0),this.ctx=void 0,this.opts=void 0}async _processAction(t,e){switch(this._logDebug(e.type,"Executing action:",e),e.type){case"extract":return g.call(this,e.schema,this._getInitialElementScope(t));case"pause":return this._handlePause(e);case"getContent":return this.buildResponse(t);case"waitFor":return e.options?.ms&&1===Object.keys(e.options).length?void await new Promise(t=>setTimeout(t,e.options.ms)):this.executeAction(t,e);default:return this.executeAction(t,e)}}async _handlePause(t){const e=this.ctx?.onPause;e?(console.info(t.message||"Execution paused for manual intervention."),await e({message:t.message}),console.info("Resuming execution...")):console.warn("[PauseAction] was called, but no `onPause` handler was provided in fetchWeb options. Skipped.")}async _executePendingActions(t){if(this.isEngineDisposed)return;this.activeContext=t;const e=async()=>{if(!this.isProcessingActionLoop){this.isProcessingActionLoop=!0,this._logDebug("action-loop",`Action loop started. Current queue size: ${this.actionQueue.length}`);try{for(;this.actionQueue.length>0&&this.isPageActive&&!this.isEngineDisposed;){const e=this.actionQueue.shift();this._logDebug("action-loop",`Processing action: ${e.action.type}`,e.action);try{if("dispose"===e.action.type){this.actionEmitter.emit("dispose"),e.resolve();continue}this.isExecutingAction=!0;const i=await this._processAction(t,e.action);this._logDebug("action-loop",`Action completed: ${e.action.type}`),e.resolve(i)}catch(t){this._logDebug("action-loop",`Action failed: ${e.action.type}`,t),e.reject(t)}finally{this.isExecutingAction=!1,await new Promise(t=>setImmediate(t))}}}finally{this.isProcessingActionLoop=!1,this._logDebug("action-loop","Action loop paused/finished.")}}};await new Promise(t=>{const i=t=>{this.actionQueue.push(t),e()},s=()=>{this.actionEmitter.removeListener("dispatch",i),this.activeContext=void 0,t()};this.actionEmitter.on("dispatch",i),this.actionEmitter.once("dispose",s),e(),this.isEngineDisposed&&(s(),this.actionEmitter.removeListener("dispose",s))})}async _sharedRequestHandler(t){const{request:e}=t;this._logDebug("request",`Processing request: ${e.url}`);try{this.currentSession=t.session,this.isPageActive=!0;const i=this.pendingRequests.get(e.userData.requestId);if(i){const s=await this.buildResponse(t),n=!s.statusCode||s.statusCode>=400;if(this.ctx?.throwHttpErrors&&n){let t=`Request for ${s.finalUrl} failed with status ${s.statusCode||"N/A"}`;const e=j(s.headers);e&&(t+=`. Retry after ${e}ms`);const n=new a(t,"request",s.statusCode);n.response=s,i.reject(n)}else this.lastResponse=s,i.resolve(s);this.pendingRequests.delete(e.userData.requestId)}await this._executePendingActions(t)}finally{if(this.currentSession){const t=this.currentSession.getCookies(e.url);t&&(this._initialCookies=t)}this.isPageActive=!1,this.navigationLock.release()}}async _sharedFailedRequestHandler(t,e){const{request:i}=t,s=this.pendingRequests.get(i.userData.requestId);if(s&&e&&this.ctx?.throwHttpErrors){this.pendingRequests.delete(i.userData.requestId);const n=e.response;n&&(t.session?.setCookiesFromResponse(n),t.response=n,!t.body&&n.body&&(t.body=n.body));const r=n?.statusCode||(e.message.includes("timed out")?c.RequestTimeout:c.InternalError),o=n?.url?n.url:i.url;let l=r===c.RequestTimeout?o+" "+e.message:`Request${o?" for "+o:""} failed: ${e.message}`;const u=n?.headers?j(n.headers):null;u&&(l+=`. Retry after ${u}ms`);const h=new a(l,"request",r);h.response=await this.buildResponse(t),s.reject(h)}return this._sharedRequestHandler(t)}async dispatchAction(t){if(!this.isPageActive)throw new Error("No active page. Call goto() before performing actions.");return this.isExecutingAction&&this.activeContext?(this._logDebug(t.type,"Re-entrant action execution:",t),await this._processAction(this.activeContext,t)):new Promise((e,i)=>{this.actionEmitter.emit("dispatch",{action:t,resolve:e,reject:i})})}async _requestHandler(t){await this._sharedRequestHandler(t)}async _failedRequestHandler(t,e){await this._sharedFailedRequestHandler(t,e)}async _commonCleanup(){if(this.isEngineDisposed=!0,this._initializedSessions.clear(),this.actionEmitter.emit("dispose"),this.navigationLock?.release(),this.pendingRequests.size>0){for(const[,t]of this.pendingRequests)t.reject(new Error("Cleanup:Request cancelled"));this.pendingRequests.clear()}if(this.crawler){try{await(this.crawler.teardown?.())}catch(t){console.error("crawler teardown error:",t)}this.crawler=void 0}this.crawlerRunPromise=void 0,this.isCrawlerReady=void 0;const t=(this.opts?.storage||{}).purge??!0;this.requestQueue&&(t&&await this.requestQueue.drop().catch(t=>console.error("Error dropping requestQueue:",t)),this.requestQueue=void 0),this.kvStore&&(t&&await this.kvStore.drop().catch(t=>console.error("Error dropping kvStore:",t)),this.kvStore=void 0),this.actionEmitter.removeAllListeners(),this.pendingRequests.clear(),this.actionQueue=[],this.config=void 0}async blockResources(t,e){return e&&this.blockedTypes.clear(),t.forEach(t=>this.blockedTypes.add(t)),t.length}getContent(){return this.lastResponse?Promise.resolve(this.lastResponse):Promise.reject(new Error("No content fetched yet. Call goto() first."))}async headers(t,e){if(void 0===t)return{...this.hdrs};if("string"==typeof t&&void 0===e)return this.hdrs[t.toLowerCase()]||"";if(null!==t&&"object"==typeof t){const i={};for(const[e,s]of Object.entries(t))i[e.toLowerCase()]=String(s);return this.hdrs=!0===e?i:{...this.hdrs,...i},!0}return"string"==typeof t&&("string"==typeof e?this.hdrs[t.toLowerCase()]=e:null===e&&delete this.hdrs[t.toLowerCase()],!0)}async cookies(t){const e=this.lastResponse?.url||this.ctx?.url||"";if(Array.isArray(t))return this.currentSession?this.currentSession.setCookies(t,e):this._initialCookies=[...t],!0;if(null===t)return this.currentSession,this._initialCookies=[],!0;if(this.currentSession){return this.currentSession.getCookies(e)}return[...this._initialCookies||[]]}async dispose(){await this.cleanup()}};function I(t,e=.3){const i=t*(1-e),s=t*(1+e);return Math.floor(Math.random()*(s-i+1)+i)}async function F(t,e){let i;const s=e?.engine||t.engine;if(s&&"auto"!==s){if(i=await U.create(t,{engine:s}),!i)throw new Error(`Engine "${s}" is not available or failed to initialize.`);return i}const n=function(t,e){if(!t||!e?.length)return null;const i=new URL(t);let s=e.find(t=>t.domain===i.hostname);s||(s=e.find(t=>i.hostname.endsWith(t.domain)));if(!s)return null;if(s.pathScope?.length){if(!s.pathScope.some(t=>i.pathname.startsWith(t)))return null}return s}(e?.url||t.url,t.sites);if(n?.engine&&"auto"!==n.engine&&(i=await U.create(t,{engine:n.engine}),i))return i;if(i=await U.create(t,{engine:"http"}),!i)throw new Error("Failed to create default http engine");return i}async function L(t,e){if(!t.enableSmart)return!1;if("browser"===t.internal.engine?.mode)return!0;if(H(e,t.upgradeThresholdMs)){const e=t.internal.engine;e&&(t.syncStateOnUpgrade?t.cookies=await e.cookies()||[]:t.cookies=[],await e.dispose(),t.internal.engine=void 0);const i=await U.create(t,{engine:"browser"});if(i)return t.eventBus.emit("context:engine:upgraded",{to:"browser"}),i}return!1}function H(t,e=5e3){if(t.statusCode>=500||401===t.statusCode||403===t.statusCode)return!0;if(429===t.statusCode){const i=j(t.headers);return null===i||i>e}return!!t.contentType&&!(!t.contentType.includes("text/html")||(i=t.html,!["__NUXT__","__NEXT_DATA__",'id="__NEXT"',"data-reactroot","data-hydration","ng-version","window.__APOLLO_STATE__","webpackJsonp","vite","requirejs","System.register","Please enable JavaScript","enable-javascript","captcha","Cloudflare"].some(t=>i.includes(t))));var i}U.registry=new Map;var V=class t{static register(t){const e=t.id;if(!e)throw new Error("FetchAction.register: actionClass.id is required");this.registry.set(e,t)}static get(t){return this.registry.get(t)}static create(e){const i="string"==typeof e?e:e.id||e.name||e.action;if(!i)throw new Error("Action must have id, name or action");const s=i instanceof t?i.constructor:this.registry.get(i);return s?new s:void 0}static has(t){return this.registry.has(t)}static list(){return Array.from(this.registry.keys())}static getCapability(t){return this.capabilities[t]??"noop"}getCapability(t){return this.constructor.getCapability(t)}get id(){return this.constructor.id}get returnType(){return this.constructor.returnType}get capabilities(){return this.constructor.capabilities}async delegateToEngine(t,e,...i){const s=t.internal.engine;if(!s)throw new Error("No engine available");if("function"!=typeof s[e])throw new Error(`Engine does not have a method named '${String(e)}'`);return await s[e](...i)}installCollectors(e,i){const s=i?.collectors;if(!s?.length)return;const n=[],r=new Set;for(const i of s){const s=B(i.activateOn),o=B(i.collectOn),a=B(i.deactivateOn),c=!(i.background??!0),l=t.create(i);if(!l)continue;let u=!1,h=!1,f=0;const d=async t=>{if(!u&&!h){u=!0;try{await(l.onBeforeExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:l.id,phase:"before",error:t})}}},w=async(t,s)=>{if(!h){u||await d(s);try{const n=Promise.resolve(l.onExecute?.(e,i,s)).then(s=>{var n,r;if(i.storeAs){((n=e.outputs)[r=i.storeAs]||(n[r]=[])).push(s)}return e.eventBus.emit("collector:result",{action:this.id,collector:i.id||i.name,event:t,result:s}),s}).catch(s=>{e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,event:t,phase:"exec",error:s})}).finally(()=>{f++});c&&(r.add(n),n.finally(()=>r.delete(n)))}catch(i){e.eventBus.emit("collector:error",{action:this.id,collector:l.id,event:t,phase:"exec",error:i})}}},p=async()=>{if(!h){0===f&&w("collector:after"),h=!0;try{await(l.onAfterExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,phase:"after",error:t})}finally{e.eventBus.emit("collector:end",{action:this.id,collector:i.id||i.name}),m.forEach(t=>t())}}},y=G(e,s,d),m=z(e,o,w),g=G(e,a,p);if(n.push(...y,...m,...g),!s.length&&!o.length&&!a.length){const t=()=>{p()};e.eventBus.once(`action:${this.id}.end`,t),n.push(()=>e.eventBus.off("fetcher:action:end",t))}}return n.length||r.size>0?{cleanup:()=>n.forEach(t=>t()),awaitExecPendings:async()=>{r.size>0&&await Promise.allSettled(Array.from(r))}}:void 0}async beforeExec(t,e){t.internal.actionStack||(t.internal.actionStack=[]);const i=t.internal.actionStack,s=i.length,n=i.length>0?i[i.length-1].id:void 0,r={...e,id:this.id,depth:s,parent:n};i.push(r),t.currentAction=r;const o={action:this,context:t,options:e,index:e?.index,depth:s,stack:[...i]};t.eventBus.emit(`action:${this.id}.start`,o),t.eventBus.emit("action:start",o),await(this.onBeforeExec?.(t,e));return{entry:o,collectors:this.installCollectors(t,e)}}async afterExec(t,e,i,s){const n=t.internal.actionStack,r=n.length-1,o=s?.collectors;try{if(await(o?.awaitExecPendings()),t.lastResult=i,"response"!==i?.returnType||i.error||(t.lastResponse=i.result),e?.storeAs){const s=t.outputs[e.storeAs],n=i?.result;"object"!=typeof s||null===s||"object"!=typeof n||null===n||Array.isArray(s)||Array.isArray(n)?t.outputs[e.storeAs]=n:t.outputs[e.storeAs]={...s,...n}}i?.error&&(t.currentAction.error=i.error),await(this.onAfterExec?.(t,e));const s={action:this,context:t,options:e,result:i,depth:r,stack:[...n]};i?.error&&(s.error=i.error);try{t.eventBus.emit(`action:${this.id}.end`,s)}catch(t){}try{t.eventBus.emit("action:end",s)}catch(t){}}finally{try{o?.cleanup()}finally{n.pop();const e=n.length;t.currentAction=e>0?n[e-1]:void 0}}}async execute(t,i){i?.args&&!i.params&&(i.params=i.args);const s=await this.beforeExec(t,i),n=i?.failOnError??!0,r=i?.maxRetries??t.retries??0;let o,a=0;try{for(;;)try{t.throwHttpErrors=n;const s=await this.onExecute(t,i);if(o=s&&s.returnType?s:{status:1,returnType:this.returnType??"any",result:s},t.enableSmart&&"response"===o?.returnType&&o.result){if(H(o.result,t.upgradeThresholdMs)&&"browser"!==t.internal.engine?.mode)throw new e(o.result)}return o}catch(i){if(i instanceof e||"ENGINE_UPGRADE_REQUIRED"===i.code)throw i;const s=i.response;if(s&&t.enableSmart){if(H(s,t.upgradeThresholdMs)&&"browser"!==t.internal.engine?.mode)throw new e(s);const i=j(s.headers);if(null!==i&&i<=(t.upgradeThresholdMs||5e3)&&a<r){await new Promise(t=>setTimeout(t,i)),a++;continue}}if(o={status:0,error:i,meta:{id:this.id,retries:a,engineType:t.engine,capability:this.getCapability(t.engine)}},n)throw i;return o}}finally{await this.afterExec(t,i,o,s)}}};V.registry=new Map,V.returnType="any",V.capabilities={http:"noop",browser:"noop"};var D=V;function B(t){return t?Array.isArray(t)?t:[t]:[]}function G(t,e,i){const s=[];for(const n of e)if("string"==typeof n||n instanceof RegExp){const e=(...t)=>{i(t[0])};t.eventBus.once(n,e),s.push(()=>t.eventBus.off(n,e))}return s}function z(t,e,i){const s=[];for(const n of e)if("string"==typeof n||n instanceof RegExp){const e=t=>i(n,t);t.eventBus.on(n,e),s.push(()=>t.eventBus.off(n,e))}return s}import{EventEmitter as W}from"events-ex";import{defaultsDeep as J}from"lodash-es";var Q=class{constructor(t={}){this.options=t,this.closed=!1,this.id=P(),this.context=this.createContext(t)}_logDebug(t,...e){N(this.context.debug,{prefix:"FetchSession",id:this.id.slice(0,8),category:t},...e)}async execute(t,e=this.context){const i=t.id||t.name||t.action;this._logDebug("execute",`Executing action: ${i}`,t.params);const s=t.index??(e.internal.actionIndex||0);e.internal.actionIndex=s+1,await this.ensureEngine(t,e);const n=D.create(t);if(!n)throw new Error(`Unknown action: ${t.id||t.name}`);const r={...t,index:s};let o,a;e.currentAction={...r,startedAt:Date.now()};try{return o=await n.execute(e,r),o}catch(t){throw a=t,a}finally{e.currentAction=void 0}}async executeAll(t,i){this._logDebug("executeAll",`Total actions: ${t.length}`,t.map(t=>t.id||t.name||t.action));const s=this.context.internal,n=i?J({id:this.context.id,eventBus:this.context.eventBus,outputs:this.context.outputs,execute:this.context.execute,action:this.context.action,internal:s},i,this.context):this.context;let r=!1;for(;;){let s=i?.index??0;try{for(;s<t.length;){const e=t[s];await this.execute({...e,index:s},n),s++}const e=await this.execute({id:"getContent",index:s},n);return{result:e?.result,outputs:this.getOutputs()}}catch(t){if(!r&&(t instanceof e||"ENGINE_UPGRADE_REQUIRED"===t.code)){this._logDebug("executeAll","Engine upgrade signaled, restarting session...",t.res.statusCode),await L(n,t.res),r=!0;continue}throw t.actionIndex=s,t}}}getOutputs(){return this.context.outputs}async getState(){return this.context.internal.engine?.getState()}async dispose(){if(this.closed)return;const t=this.context.eventBus;t.emit("session:closing",{sessionId:this.id});try{await(this.context.internal.engine?.dispose())}finally{this.closed=!0}t.emit("session:closed",{sessionId:this.id})}async ensureEngine(t,e){if(this.closed)throw new Error("Session is closed");if(!e.internal.engine){const i=t?.params?.url??e.url,s=await F(e,{url:i});if(!s)throw new Error("No engine found");e.internal.engine=s}}createContext(t=this.options){const e=new W;return J({...t,id:this.id,eventBus:e,outputs:{},internal:{},execute:async t=>this.execute(t),action:async function(t,e,i){return this.execute({name:t,params:e,...i})}},i)}},K=class{constructor(t={}){this.defaults=t}async createSession(t){const e={...this.defaults,...t||{}};return new Q(e)}async fetch(t,e){"string"!=typeof t&&(t=(e=t).url);const i=await this.createSession(e);try{const s=e?.actions||[];t&&0!==s.findIndex(e=>("goto"===e.id||"goto"===e.name)&&e.params?.url===t)&&s.unshift({id:"goto",params:{url:t}});return await i.executeAll(s)}finally{await i.dispose()}}};import{CheerioCrawler as X}from"crawlee";import*as Y from"cheerio";import{newFunction as Z}from"util-ex";import{CommonError as tt,ErrorCode as et,NotFoundError as it}from"@isdk/common-error";var st="___BR___",nt="___BLOCK___",rt="___P___",ot=/\s+/g,at=new RegExp(` *(${st}|${nt}|${rt}) *`,"g"),ct=new RegExp(`(?:${nt}|${rt})+`,"g");var lt={"&amp;":"&amp;","&lt;":"&lt;","&gt;":"&gt;"},ut={"&quot;":'"',"&apos;":"'","&nbsp;":" ","&copy;":"©","&reg;":"®","&trade;":"™","&sect;":"§","&para;":"¶","&bull;":"•","&hellip;":"…","&euro;":"€","&pound;":"£","&yen;":"¥","&cent;":"¢","&curren;":"¤","&brvbar;":"¦","&uml;":"¨","&ordf;":"ª","&laquo;":"«","&raquo;":"»","&not;":"¬","&shy;":"","&macr;":"¯","&deg;":"°","&plusmn;":"±","&sup2;":"²","&sup3;":"³","&acute;":"´","&micro;":"µ","&middot;":"·","&cedil;":"¸","&sup1;":"¹","&ordm;":"º","&iquest;":"¿","&times;":"×","&divide;":"÷","&ndash;":"–","&mdash;":"—","&lsquo;":"‘","&rsquo;":"’","&sbquo;":"‚","&ldquo;":"“","&rdquo;":"”","&bdquo;":"„","&dagger;":"†","&Dagger;":"‡","&permil;":"‰","&lsaquo;":"‹","&rsaquo;":"›"};function ht(t){return t?t.replace(/&(#?[a-zA-Z0-9]+);/g,t=>{const e=t.toLowerCase();if(lt[e])return t;if(ut[e])return ut[e];if(t.startsWith("&#")){const e=t.startsWith("&#x")?parseInt(t.slice(3,-1),16):parseInt(t.slice(2,-1),10);if(!isNaN(e)){if(160===e)return" ";try{return String.fromCodePoint(e)}catch(e){return t}}}return t}):t}var ft=class extends U{_ensureCheerioContext(t){if(!t.$&&t.body){let e="string"==typeof t.body?t.body:Buffer.isBuffer(t.body)?t.body.toString("utf-8"):JSON.stringify(t.body);e.trim().startsWith("<")||(e=`<html><body><pre>${e}</pre></body></html>`),t.$=Y.load(e)}}async _buildResponse(t){this._ensureCheerioContext(t);const{request:e,response:i,body:s,$:n}=t,r=n?.html();let o="string"==typeof s?s:Buffer.isBuffer(s)?s.toString("utf-8"):String(s??"");r&&r!==o&&(o=r);let a=i?.headers;if(!a&&i?.rawHeaders){a={};const t=i.rawHeaders;for(let e=0;e<t.length;e+=2)a[t[e].toLowerCase()]=t[e+1]}const c={url:e.url,finalUrl:e.loadedUrl||e.url,statusCode:i?.statusCode??200,statusText:i?.statusMessage,headers:a||{},body:s,html:ht(o),text:o};if(this.opts?.debug&&i?.timings){const t=i.timings;c.metadata={timings:{start:t.start,total:t.phases?.total,ttfb:t.phases?.firstByte,dns:t.phases?.dns,tcp:t.phases?.tcp,download:t.phases?.download}}}return c}async _querySelectorAll(t,e){if(Array.isArray(t)){if(0===t.length)return[];const{$:i}=t[0],s=t.map(t=>t.el[0]).filter(Boolean),n=i(s);return n.find(e).add(n.filter(e)).toArray().map(t=>({$:i,el:i(t)}))}const{$:i,el:s}=t;return":scope"===e?[{$:i,el:s}]:s.find(e).add(s.filter(e)).toArray().map(t=>({$:i,el:i(t)}))}async _nextSiblingsUntil(t,e){const{$:i,el:s}=t;return(e?s.nextUntil(e):s.nextAll()).toArray().map(t=>({$:i,el:i(t)}))}async _parentElement(t){const{$:e,el:i}=t,s=i.parent();return 0===s.length?null:{$:e,el:s}}async _isSameElement(t,e){return t.el[0]===e.el[0]}async _findClosestAncestor(t,e){if(0===e.length)return null;const i=new Set(e.map(t=>t.el[0])),{$:s,el:n}=t;let r=n;for(;r.length>0;){if(i.has(r[0]))return{$:s,el:r};r=r.parent()}return null}async _contains(t,e){const i=t.el[0],s=e.el[0];if(i===s)return!0;const n=t.$;return"function"==typeof n.contains?n.contains(i,s):t.el.find(e.el).length>0}async _findCommonAncestor(t,e){const{$:i,el:s}=t,{el:n}=e;if(s[0]===n[0])return t;if(await this._contains(t,e))return t;if(await this._contains(e,t))return e;const r=s.parents().toArray(),o=n.parents().toArray(),a=new Set(o);for(const t of r)if(a.has(t))return{$:i,el:i(t)};return null}async _findContainerChild(t,e){const{$:i,el:s}=t,n=e.el[0];let r=s;if(r[0]===n)return t;const o=r.parents().toArray();for(let t=0;t<o.length;t++)if(o[t]===n){return{$:i,el:i(t>0?o[t-1]:s[0])}}if(n===i.root()[0]){return{$:i,el:i(o.length>0?o[o.length-1]:s[0])}}return null}async _extractValue(t,e){const{$:i,el:s}=e,{attribute:n,type:r="string",mode:o="text"}=t;if(this._logDebug("extract",`_extractValue: el.length=${s.length} schema=${JSON.stringify(t)}`),0===s.length)return null;let a="";if(n?a=s.attr(n)??null:"html"===r||"html"===o||"outerHTML"===o?(a="outerHTML"===o?i.html(s):s.html()??("html"===r?"":null),a&&(a=ht(a.trim()))):a="innerText"===o?function(t){const e=t.clone();e.find("script, style, noscript, template").remove(),e.find("[hidden]").remove(),e.find("br").replaceWith(st),e.find("p").before(rt).after(rt),e.find("div, h1, h2, h3, h4, h5, h6, li, ul, ol, tr, dl, dt, dd, blockquote, pre, form, table, article, section, header, footer, nav, main, aside, hr, address, fieldset, figure, figcaption, details, summary").before(nt).after(nt);let i=e.text();return i=i.replace(ot," "),i=i.replace(at,"$1"),i=i.replace(ct,t=>t.includes(rt)?rt:nt),i=i.replaceAll(st,"\n"),i=i.replaceAll(rt,"\n\n"),i=i.replaceAll(nt,"\n"),i.trim()}(s):s.text().trim(),null===a)return null;switch(r){case"number":return parseFloat(a.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=a.toLowerCase();return"true"===t||"1"===t;default:return a}}_getInitialElementScope(t){const{$:e}=t;return e?{$:e,el:e.root()}:null}async executeAction(t,e){const{$:i}=t;switch(e.type){case"dispose":return;case"navigate":{const{url:i,opts:s}=e;this._logDebug("navigate",`Navigating to: ${i}`);const n=await this._requestWithRedirects(t,{url:i,method:"GET",headers:{...this.hdrs,...s?.headers}});return await this._updateStateAfterNavigation(t,n),this.lastResponse}case"mouseMove":case"mouseClick":case"mouseWheel":case"keyboardType":case"keyboardPress":case"scrollIntoView":throw new tt(`Action "${e.type}" is only supported in browser engine mode.`,e.type,"not_supported");case"click":{if(!i)throw new tt(`Cheerio context not available for action: ${e.type}`,"click");const s=e.selector,n=i(s).first();let r;if(0===n.length)try{r=new URL(s,t.request.loadedUrl||t.request.url).href}catch{throw new tt(`click: selector not found or invalid URL: ${s}`,"click")}else{if(!n.is("a")||!n.attr("href")){if(n.is('input[type="submit"], button[type="submit"], button, input')){const e=n.closest("form");return e.length?this.executeAction(t,{type:"submit",selector:e}):void this._logDebug("click","Button/input clicked but no form found and no JS support in http mode. Ignoring.")}throw new tt(`click: unsupported element for http simulate. Selector: ${s}`,"click")}{const e=n.attr("href");r=new URL(e,t.request.loadedUrl||t.request.url).href}}const o=await t.sendRequest({url:r});return void await this._updateStateAfterNavigation(t,o)}case"fill":{if(!i)throw new tt(`Cheerio context not available for action: ${e.type}`),"fill";const s=i(e.selector).first();if(0===s.length)throw new tt(`fill: selector not found: ${e.selector}`);if(!s.is("input, textarea, select"))throw new tt(`fill: not a form field: ${e.selector}`);return s.val(e.value),void(this.lastResponse=await this.buildResponse(t))}case"trim":{if(!i)throw new tt(`Cheerio context not available for action: ${e.type}`,"trim");const{selectors:s,removeComments:n}=this._getTrimInfo(e.options);return s.forEach(t=>i(t).remove()),n&&i("*").contents().filter((t,e)=>"comment"===e.type).remove(),void(this.lastResponse=await this.buildResponse(t))}case"waitFor":return void(e.options?.ms&&await new Promise(t=>setTimeout(t,e.options.ms)));case"submit":{if(!i)throw new tt(`Cheerio context not available for action: ${e.type}`,"submit");const s="string"==typeof e.selector?i(e.selector).first():null!=e.selector?e.selector:i("form").first();if(0===s.length)throw new it(e.selector,"submit");const n=s.attr("action")||t.request.loadedUrl||t.request.url,r=(s.attr("method")||"GET").toUpperCase(),o=new URL(n,t.request.loadedUrl||t.request.url).href,a={};let c;if(s.find("input, select, textarea").each((t,e)=>{const s=i(e),n=s.attr("name");if(!n)return;const r=s.val();null!=r&&(a[n]=String(r))}),"GET"===r){const e=new URL(o);Object.entries(a).forEach(([t,i])=>e.searchParams.set(t,i)),c=await this._requestWithRedirects(t,{url:e.href,method:"GET"})}else{const i=e.options?.enctype||s.attr("enctype")||"application/x-www-form-urlencoded";let n;const r={};"application/json"===i?(n=JSON.stringify(a),r["Content-Type"]="application/json"):(n=new URLSearchParams(a).toString(),r["Content-Type"]="application/x-www-form-urlencoded"),this._logDebug("submit","Submitting POST to:",o,"enctype:",i),c=await this._requestWithRedirects(t,{url:o,method:"POST",body:n,headers:r})}return await this._updateStateAfterNavigation(t,c),void this._logDebug("submit","Submit finished. Current URL:",t.request.loadedUrl||t.request.url)}case"evaluate":{const{fn:s,args:n=[]}=e.params,r=t.request.loadedUrl||t.request.url;let o=null;const a=t=>t&&0!==t.length?{textContent:t.text(),innerHTML:t.html(),outerHTML:i.html(t),getAttribute:e=>t.attr(e),matches:e=>t.is(e)}:null,c=this,l={location:{_href:r,get href(){return this._href},set href(t){if(t&&t!==this._href){this._href=t;const e=new URL(t,r).href;o=c.goto(e)}},assign(t){this.href=t},replace(t){this.href=t}}},u={getElementById:t=>a(i(`#${t}`).first()),querySelector:t=>a(i(t).first()),querySelectorAll:t=>i(t).toArray().map(t=>a(i(t))),getElementsByClassName:t=>i(`.${t}`).toArray().map(t=>a(i(t))),getElementsByTagName:t=>i(t).toArray().map(t=>a(i(t))),get body(){return a(i("body").first())},get title(){return i("title").text()}};l.document=u;const h={window:l,document:u,$:i,console:console};let f;const d=Z(s,h);return f="function"==typeof d?await d(n):d,o?await o:l.location.href===r&&(this.lastResponse=await this.buildResponse(t)),f}default:throw new tt(`Unknown action type: ${e.type}`,"CheerioFetchEngine.executeAction",et.NotSupported)}}async _requestWithRedirects(t,e){let{url:i,method:s,body:n,headers:r={}}=e,o=0;let a;for(;o<=5;){if(t.session){const e=t.session.getCookieString(i);e&&(r={...r,cookie:e})}if(a=await t.sendRequest({url:i,method:s,body:n,headers:r,followRedirect:!1}),!a)break;const e=a.statusCode,c=a.headers||a.req?.res?.headers||a.res?.headers||{};if(t.session&&c["set-cookie"]&&t.session.setCookies(c["set-cookie"],i),[301,302,303,307,308].includes(e)){const t=c.location;if(!t)break;if(i=new URL(t,i).href,o++,[301,302,303].includes(e)){this._logDebug("http",`Redirect ${e} (method conversion to GET):`,i),s="GET",n=void 0;const{"content-type":t,"Content-Type":o,"content-length":a,"Content-Length":c,...l}=r;r=l}else this._logDebug("http",`Redirect ${e} (method preserved):`,i);continue}break}return a}async _updateStateAfterNavigation(t,e){const i=e;t.response=i,t.body=i.body,t.$=void 0,i.url&&(t.request.loadedUrl=i.url),this.lastResponse=await this.buildResponse(t)}_createCrawler(t,e){return new X(t,e)}_getSpecificCrawlerOptions(t){return{additionalMimeTypes:["text/plain"],maxRequestRetries:1,requestHandlerTimeoutSecs:t.requestHandlerTimeoutSecs,proxyConfiguration:this.proxyConfiguration,preNavigationHooks:[({session:e,request:i},s)=>{s.throwHttpErrors=t.throwHttpErrors,this.opts?.timeoutMs&&(s.timeout={request:this.opts.timeoutMs})}]}}async goto(t,e){if(this.isPageActive)return this.dispatchAction({type:"navigate",url:t,opts:e});const i="req-"+ ++this.requestCounter,s=new Promise((t,s)=>{const n=e?.timeoutMs||this.opts?.timeoutMs||3e4,r=setTimeout(()=>{this.pendingRequests.delete(i),this.navigationLock.release(),s(new tt(`goto timed out after ${n}ms.`,"gotoTimeout",et.RequestTimeout))},n);this.pendingRequests.set(i,{resolve:e=>{clearTimeout(r),t(e)},reject:t=>{clearTimeout(r),s(t)}})});return this.requestQueue.addRequest({...e,url:t,headers:{...this.hdrs,...e?.headers},userData:{requestId:i},uniqueKey:`${t}-${i}`}).catch(t=>{const e=this.pendingRequests.get(i);e&&(this.pendingRequests.delete(i),this.navigationLock.release(),e.reject(t))}),await this.navigationLock,this.navigationLock=T(),s}};ft.id="cheerio",ft.mode="http",U.register(ft);import{PlaywrightCrawler as dt}from"crawlee";import{firefox as wt}from"playwright";import{CommonError as pt,ErrorCode as yt,NotFoundError as mt}from"@isdk/common-error";var gt=3e4,xt=class extends U{constructor(){super(...arguments),this.currentMousePos={x:0,y:0},this.mouseInitialized=!1}async _buildResponse(t){const{page:e,response:i,request:s,session:n}=t;if(!e||e.isClosed())return{url:s.url,finalUrl:s.loadedUrl||s.url,statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:"",html:"",text:""};const r=await e.content(),o=await e.textContent("body"),a=await e.context().cookies();n&&n.setCookies(a,s.url);const c={url:e.url(),finalUrl:e.url(),statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:r,html:r,text:o||""};if(this.opts?.debug&&i){const t="function"==typeof i.request?i.request():i.request;if(t&&"function"==typeof t.timing){const e=t.timing();c.metadata={timings:{start:e.startTime,total:e.responseEnd-e.startTime,ttfb:e.responseStart-e.requestStart,dns:e.domainLookupEnd-e.domainLookupStart,tcp:e.connectEnd-e.connectStart,download:e.responseEnd-e.responseStart}}}}return!1!==this.opts?.output?.cookies&&(c.cookies=a),c}async _querySelectorAll(t,e){const i=Array.isArray(t)?t:[t],s=[];for(const t of i){const i=await t.locator(e).all();s.push(...i);try{await t.evaluate((t,e)=>t.matches(e),e)}catch(t){}}const n=[];for(const t of i){let i=!1;try{i=await t.evaluate((t,e)=>t.matches(e),e)}catch{}i&&n.push(t);const s=await t.locator(e).all();n.push(...s)}return n}async _nextSiblingsUntil(t,e){const i=await t.locator("xpath=following-sibling::*").all();if(!e)return i;const s=[];for(const t of i){if(await t.evaluate((t,e)=>t.matches(e),e))break;s.push(t)}return s}async _parentElement(t){const e=t.locator("xpath=..");return 0===await e.count()?null:e.first()}async _isSameElement(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return!1;try{return await i.evaluate((t,e)=>t===e,s)}finally{await i.dispose(),await s.dispose()}}async _findClosestAncestor(t,e){if(0===e.length)return null;const i=await t.elementHandle();if(!i)return null;const s=await Promise.all(e.map(t=>t.elementHandle()));try{const t=await i.evaluate((t,e)=>{const i=new Set(e);let s=t;for(;s;){if(i.has(s))return e.indexOf(s);s=s.parentElement}return-1},s);return-1!==t?e[t]:null}finally{await i.dispose(),await Promise.all(s.map(t=>t?.dispose()))}}async _contains(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return!1;try{return await i.evaluate((t,e)=>t.contains(e),s)}finally{await i.dispose(),await s.dispose()}}async _findCommonAncestor(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return null;try{const e=await i.evaluateHandle((t,e)=>{let i=null;if(t===e)i=t;else if(t.contains(e))i=t;else if(e.contains(t))i=e;else{const s=new Set;let n=e.parentElement;for(;n;)s.add(n),n=n.parentElement;for(n=t.parentElement;n;){if(s.has(n)){i=n;break}n=n.parentElement}}return i&&1===i.nodeType?function t(e){if(e.id)return`//*[@id="${e.id}"]`;if(e===document.body)return"/html/body";if(e===document.documentElement)return"/html";let i=0;const s=e.parentNode?e.parentNode.childNodes:[];for(let n=0;n<s.length;n++){const r=s[n];if(r===e)return t(e.parentNode)+"/"+e.tagName.toLowerCase()+"["+(i+1)+"]";1===r.nodeType&&r.tagName===e.tagName&&i++}return""}(i):null},s);if(!e)return null;const n=await e.jsonValue();return"string"==typeof n&&n?t.page().locator(`xpath=${n}`):null}finally{await i.dispose(),await s.dispose()}}async _findContainerChild(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return null;try{const e=await i.evaluateHandle((t,e)=>{let i=null;if(t===e)i=t;else{let s=t;for(;s;){if(s.parentElement===e){i=s;break}s=s.parentElement}}return i&&1===i.nodeType?function t(e){if(e.id)return`//*[@id="${e.id}"]`;if(e===document.body)return"/html/body";if(e===document.documentElement)return"/html";let i=0;const s=e.parentNode?e.parentNode.childNodes:[];for(let n=0;n<s.length;n++){const r=s[n];if(r===e)return t(e.parentNode)+"/"+e.tagName.toLowerCase()+"["+(i+1)+"]";1===r.nodeType&&r.tagName===e.tagName&&i++}return""}(i):null},s);if(!e)return null;const n=await e.jsonValue();return"string"==typeof n&&n?t.page().locator(`xpath=${n}`):null}finally{await i.dispose(),await s.dispose()}}async _extractValue(t,e){const{attribute:i,type:s="string",mode:n="text"}=t,r=await e.count();if(this._logDebug("extract",`_extractValue: count=${r} schema=${JSON.stringify(t)}`),0===r)return null;let o="";if(i?o=await e.getAttribute(i):"html"===s||"html"===n||"outerHTML"===n?(o="outerHTML"===n?await e.evaluate(t=>t.outerHTML):await e.innerHTML(),o&&(o=ht(o))):o="innerText"===n?await e.innerText():await e.textContent(),null===o)return null;switch(o=o.trim(),s){case"number":return parseFloat(o.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=o.toLowerCase();return"true"===t||"1"===t;default:return o}}_getInitialElementScope(t){const{page:e}=t;return e?e.locator(":root"):null}async _waitForNavigation(t,e,i){const{page:s}=t,n=this.opts?.timeoutMs||gt;try{await s.waitForURL(t=>t.href!==e,{waitUntil:"domcontentloaded",timeout:5e3}),this._logDebug(i,"URL changed to:",s.url())}catch(t){this._logDebug(i,"No URL change detected within 5s")}await s.waitForLoadState("networkidle",{timeout:n}),this.lastResponse=await this.buildResponse(t)}async _sharedRequestHandler(t){const{page:e}=t;return e&&!this.mouseInitialized&&await this._initializeMousePos(e),super._sharedRequestHandler(t)}async _initializeMousePos(t){if(this.mouseInitialized||0!==this.currentMousePos.x||0!==this.currentMousePos.y)return void(this.mouseInitialized=!0);let e=0,i=0;Math.random()>.5?(e=0,i=Math.floor(600*Math.random())+100):(e=Math.floor(800*Math.random())+100,i=0),this.currentMousePos={x:e,y:i};try{await t.mouse.move(e,i),this.mouseInitialized=!0}catch(t){}}_getTrajectory(t,e,i=-1){const s=[],n=Math.sqrt(Math.pow(e.x-t.x,2)+Math.pow(e.y-t.y,2));-1===i&&(i=Math.max(2,Math.min(3,Math.floor(n/400)+2)));const r=t.x+(e.x-t.x)/2,o=t.y+(e.y-t.y)/2,a=r+(Math.random()-.5)*n*.1,c=o+(Math.random()-.5)*n*.1,l=t=>t<.5?4*t*t*t:1-Math.pow(-2*t+2,3)/2;for(let n=1;n<=i;n++){const r=l(n/i),o=(1-r)*(1-r)*t.x+2*(1-r)*r*a+r*r*e.x,u=(1-r)*(1-r)*t.y+2*(1-r)*r*c+r*r*e.y;s.push({x:o,y:u})}return s}async _moveToPos(t,e,i=-1){const{page:s}=t,n={...this.currentMousePos};e.x<0&&(e.x=Math.floor(Math.random()*I(Math.abs(e.x)))+(n.x||0)),e.y<0&&(e.y=Math.floor(Math.random()*I(Math.abs(e.y)))+(n.y||0));const r=s.viewportSize();r&&(e.x=Math.max(0,Math.min(e.x,r.width-1)),e.y=Math.max(0,Math.min(e.y,r.height-1)));const o=this._getTrajectory(n,e,i),a=Math.sqrt(Math.pow(e.x-n.x,2)+Math.pow(e.y-n.y,2)),c=Math.max(1,Math.min(3,a/500+1));let l=0,u=n;for(const t of o){const e=Math.sqrt(Math.pow(t.x-u.x,2)+Math.pow(t.y-u.y,2));e>l&&(l=e),u=t}const h=Math.max(5,Math.floor(l/c));for(const t of o)await s.mouse.move(t.x,t.y,{steps:h});return this.currentMousePos=e,this.currentMousePos}async _ensureVisible(t,e){const{page:i}=t,s=i.locator(e).first();await s.scrollIntoViewIfNeeded();const n=await s.boundingBox();if(!n)throw new pt(`Selector not found or not visible: ${e}`,"ensureVisible");return{x:n.x+n.width/2,y:n.y+n.height/2}}async _moveToSelector(t,e,i=-1){const s=await this._ensureVisible(t,e);return this._moveToPos(t,s,i)}async executeAction(t,e){const{page:i}=t,s=this.opts?.timeoutMs||gt;switch(e.type){case"dispose":return;case"navigate":{const s=await i.goto(e.url,{waitUntil:e.opts?.waitUntil||"domcontentloaded",timeout:this.opts?.timeoutMs||gt});s&&(t={...t,response:s},this._logDebug("navigate",`Navigation status: ${s.status()} for ${s.url()}`));const n=await this.buildResponse(t);return this.lastResponse=n,n}case"mouseMove":{const{x:i,y:s,selector:n,steps:r=-1}=e.params;return void(n?(await this._moveToSelector(t,n,r),this.lastResponse=await this.buildResponse(t)):void 0!==i&&void 0!==s&&await this._moveToPos(t,{x:i,y:s},r))}case"mouseClick":{const{x:s,y:n,selector:r,button:o="left",clickCount:a=1,delay:c=0,steps:l=-1}=e.params;return r?await this._moveToSelector(t,r,l):void 0!==s&&void 0!==n&&await this._moveToPos(t,{x:s,y:n},l),await i.mouse.click(this.currentMousePos.x,this.currentMousePos.y,{button:o,clickCount:a,delay:I(c||50,.2)}),await i.waitForTimeout(I(100,.5)),void(this.lastResponse=await this.buildResponse(t))}case"mouseWheel":{const{x:s,y:n,selector:r,deltaX:o=0,deltaY:a=0,steps:c=1}=e.params;if(r){const e=await this._ensureVisible(t,r);await i.mouse.move(e.x,e.y),this.currentMousePos=e}else void 0!==s&&void 0!==n&&await this._moveToPos(t,{x:s,y:n});if(c>1){const t=o/c,e=a/c;for(let s=0;s<c;s++)await i.mouse.wheel(t,e)}else await i.mouse.wheel(o,a);return await i.waitForTimeout(I(100,.5)),void(this.lastResponse=await this.buildResponse(t))}case"scrollIntoView":{const{selector:i}=e.params;return await this._ensureVisible(t,i),void(this.lastResponse=await this.buildResponse(t))}case"keyboardType":{const{text:s,delay:n=150}=e.params;return await i.keyboard.type(s,{delay:I(n)}),void(this.lastResponse=await this.buildResponse(t))}case"keyboardPress":{const{key:s,delay:n=50}=e.params;return await i.keyboard.press(s,{delay:I(n)}),void(this.lastResponse=await this.buildResponse(t))}case"click":{const n=i.url();return await i.click(e.selector,{timeout:s}),void await this._waitForNavigation(t,n,"click")}case"fill":await i.fill(e.selector,e.value,{timeout:s});const n=await this.buildResponse(t);return void(this.lastResponse=n);case"trim":{const s=this._getTrimInfo(e.options);return await i.evaluate(t=>{const{selectors:e,removeComments:i,removeHidden:s}=t;if(e.forEach(t=>{document.querySelectorAll(t).forEach(t=>t.remove())}),s){const t=[];document.querySelectorAll("*").forEach(e=>{const i=window.getComputedStyle(e);"none"!==i.display&&"hidden"!==i.visibility||t.push(e)}),t.forEach(t=>t.remove())}if(i){const t=document.createNodeIterator(document,NodeFilter.SHOW_COMMENT),e=[];let i;for(;i=t.nextNode();)e.push(i);e.forEach(t=>t.parentElement?.removeChild(t))}},s),void(this.lastResponse=await this.buildResponse(t))}case"waitFor":try{e.options?.selector&&await i.waitForSelector(e.options.selector,{timeout:s}),e.options?.networkIdle&&await i.waitForLoadState("networkidle",{timeout:s}),e.options?.ms&&await i.waitForTimeout(I(e.options.ms,.1))}catch(t){if(!1!==e.options?.failOnTimeout)throw t}return;case"submit":{const s=e.selector||"form",n=i.locator(s).first();if(0===await n.count())throw new mt(s,"submit");if("application/json"===(e.options?.enctype||"application/x-www-form-urlencoded")){const t=await n.elementHandle();if(!t)throw new pt(`submit: could not get form handle for ${s}`,"submit");const e=await t.evaluate(async t=>{const e=new FormData(t),i={};e.forEach((t,e)=>{i[e]=t.toString()});const s=await fetch(t.action,{method:t.method,headers:{"Content-Type":"application/json"},body:JSON.stringify(i)}),n=await s.text();return{status:s.status,statusText:s.statusText,headers:Object.fromEntries(s.headers.entries()),body:n,html:n,text:n,url:t.action,finalUrl:s.url}});return await t.dispose(),await i.setContent(e.html),void(this.lastResponse=e)}{this._logDebug("submit","Submitting form by form.submit()...");const e=i.url();return await n.evaluate(t=>t.submit()),void await this._waitForNavigation(t,e,"submit")}}case"evaluate":{const{fn:n,args:r=[]}=e.params,o=i.url();let a;if(a="function"==typeof n?await i.evaluate(n,r):await i.evaluate(([t,e])=>{const i=(0,eval)(`(${t})`);return"function"==typeof i?i(e):i},[n,r]),i.url()!==o)await i.waitForLoadState("domcontentloaded",{timeout:s}).catch(()=>{}),this.lastResponse=await this.buildResponse(t);else try{this.lastResponse=await this.buildResponse(t)}catch(e){await i.waitForLoadState("domcontentloaded",{timeout:s}).catch(()=>{}),this.lastResponse=await this.buildResponse(t)}return a}default:throw new pt(`Unknown action type: ${e.type}`,"PlaywrightFetchEngine.executeAction",yt.NotSupported)}}_createCrawler(t,e){return new dt(t,e)}async _getSpecificCrawlerOptions(t){const e=t.browser?.headless??!0,i={maxRequestRetries:t.retries||3,headless:e,proxyConfiguration:this.proxyConfiguration,requestHandlerTimeoutSecs:t.requestHandlerTimeoutSecs,preNavigationHooks:[async({page:e,request:i},s)=>{s.throwHttpErrors=t.throwHttpErrors;const n=this.blockedTypes;n.size>0&&await e.route("**/*",t=>{n.has(t.request().resourceType())?t.abort():t.continue()})}]},s=t.browser?.launchOptions||{};if(this.opts?.antibot){i.browserPoolOptions={useFingerprints:!1};const{launchOptions:t}=await import("camoufox-js"),n=await t({headless:e,...s});i.launchContext={launcher:wt,launchOptions:n},i.postNavigationHooks=[async({page:t,handleCloudflareChallenge:e})=>{await e()}]}else Object.keys(s).length>0&&(i.launchContext={launchOptions:s});return i}async goto(t,e){if(this.isPageActive)return this.dispatchAction({type:"navigate",url:t,opts:e});if(!this.requestQueue)throw new pt("RequestQueue not initialized","goto");const i="req-"+ ++this.requestCounter,s=new Promise((t,e)=>{this.pendingRequests.set(i,{resolve:t,reject:e})});return await this.requestQueue.addRequest({url:t,headers:this.hdrs,userData:{requestId:i,waitUntil:e?.waitUntil||"domcontentloaded"},uniqueKey:`${t}-${i}`}),s}};xt.id="playwright",xt.mode="browser",U.register(xt);var bt=class extends D{async onExecute(t,e){const{selector:i,...s}=e?.params||{};if(!i)throw new Error("Selector is required for click action");await this.delegateToEngine(t,"click",i,s)}};bt.id="click",bt.returnType="none",bt.capabilities={http:"simulate",browser:"native"},D.register(bt);var vt=class extends D{async onExecute(t,e){const{selector:i,value:s,...n}=e?.params||{};if(!i)throw new Error("Selector is required for fill action");if(void 0===s)throw new Error("Value is required for fill action");await this.delegateToEngine(t,"fill",i,s,n)}};vt.id="fill",vt.returnType="none",vt.capabilities={http:"simulate",browser:"native"},D.register(vt);var $t=class extends D{async onExecute(t,e){return await this.delegateToEngine(t,"getContent",e?.params)}};$t.id="getContent",$t.returnType="response",$t.capabilities={http:"native",browser:"native"},D.register($t);var _t=class extends D{async onExecute(t,e,i){const s=e?.params,n=s?.url||t.url;if(!n)throw new Error("URL is required for goto action");const r=t.internal.engine;if(!r)throw new Error("No engine available");t.url=n;return await r.goto(n,s)}};_t.id="goto",_t.returnType="response",_t.capabilities={http:"native",browser:"native"},D.register(_t);var Et=class extends D{async onExecute(t,e){const{selector:i,...s}=e?.params||{};await this.delegateToEngine(t,"submit",i,s)}};Et.id="submit",Et.returnType="none",Et.capabilities={http:"simulate",browser:"native"},D.register(Et);var kt=class extends D{async onExecute(t,e){const i=t.internal.engine;if(!i)throw new Error("No engine available");await i.waitFor(e?.params)}};kt.id="waitFor",kt.returnType="none",kt.capabilities={http:"native",browser:"native"},D.register(kt);var St=class extends D{async onExecute(t,e){const i=e?.params;if(!i)throw new Error("Schema is required for extract action");return this.delegateToEngine(t,"extract",i)}};St.id="extract",St.returnType="any",St.capabilities={http:"native",browser:"native"},D.register(St);var qt=class extends D{async onExecute(t,e){const{selector:i,message:s,attribute:n}=e?.params||{},r=t.internal.engine;if("browser"===r?.mode){if(i){if(!await(r?.extract({selector:i,attribute:n})))return}r&&"pause"in r?await r.pause(s):console.warn("[PauseAction] was called, but the current engine does not support `pause`. Skipped.")}else console.warn("[PauseAction] can only run in browser engine. Skipped.")}};qt.id="pause",qt.capabilities={http:"native",browser:"native"},qt.returnType="none",D.register(qt);var Ct=class extends D{async onExecute(t,e){const i=e?.params||{};await this.delegateToEngine(t,"trim",i)}};Ct.id="trim",Ct.returnType="none",Ct.capabilities={http:"simulate",browser:"native"},D.register(Ct);var Rt=class extends D{async onExecute(t,e){const i=e?.params;if(!i)throw new Error("evaluate action: params is required");return await this.delegateToEngine(t,"evaluate",i)}};Rt.id="evaluate",Rt.returnType="any",Rt.capabilities={http:"simulate",browser:"native"},D.register(Rt);var At=class extends D{async onExecute(t,e){const i=e?.params;await this.delegateToEngine(t,"mouseMove",i)}};At.id="mouseMove",At.returnType="none",At.capabilities={http:"noop",browser:"native"};var Tt=class extends D{async onExecute(t,e){const i=e?.params;await this.delegateToEngine(t,"mouseClick",i)}};Tt.id="mouseClick",Tt.returnType="none",Tt.capabilities={http:"noop",browser:"native"};var Mt=class extends D{async onExecute(t,e){const i=e?.params;await this.delegateToEngine(t,"scrollIntoView",i)}};Mt.id="scrollIntoView",Mt.returnType="none",Mt.capabilities={http:"noop",browser:"native"};var Pt=class extends D{async onExecute(t,e){const i=e?.params;await this.delegateToEngine(t,"mouseWheel",i)}};Pt.id="mouseWheel",Pt.returnType="none",Pt.capabilities={http:"noop",browser:"native"},D.register(At),D.register(Tt),D.register(Mt),D.register(Pt);var jt=class extends D{async onExecute(t,e){const i=e?.params;if(!i?.text)throw new Error("text is required for keyboardType action");await this.delegateToEngine(t,"keyboardType",i.text,i.delay)}};jt.id="keyboardType",jt.returnType="none",jt.capabilities={http:"noop",browser:"native"};var Nt=class extends D{async onExecute(t,e){const i=e?.params;if(!i?.key)throw new Error("key is required for keyboardPress action");await this.delegateToEngine(t,"keyboardPress",i.key,i.delay)}};async function Ot(t,e){return(new K).fetch(t,e)}Nt.id="keyboardPress",Nt.returnType="none",Nt.capabilities={http:"noop",browser:"native"},D.register(jt),D.register(Nt);export{ft as CheerioFetchEngine,bt as ClickAction,i as DefaultFetcherProperties,e as EngineUpgradeError,Rt as EvaluateAction,St as ExtractAction,D as FetchAction,t as FetchActionResultStatus,U as FetchEngine,Q as FetchSession,s as FetcherOptionKeys,vt as FillAction,$t as GetContentAction,_t as GotoAction,Nt as KeyboardPressAction,jt as KeyboardTypeAction,Tt as MouseClickAction,At as MouseMoveAction,Pt as MouseWheelAction,qt as PauseAction,xt as PlaywrightFetchEngine,Mt as ScrollIntoViewAction,Et as SubmitAction,O as TRIM_PRESETS,Ct as TrimAction,kt as WaitForAction,K as WebFetcher,Ot as fetchWeb,I as getRandomDelay};
package/docs/README.md CHANGED
@@ -24,9 +24,23 @@ English | [简体中文](_media/README.cn.md)
24
24
  * **📜 Declarative Action Scripts**: Define multi-step workflows (like logging in, filling forms, and clicking buttons) in a simple, readable JSON format.
25
25
  * **📊 Powerful and Flexible Data Extraction**: Easily extract all kinds of structured data, from simple text to complex nested objects, through an intuitive and powerful declarative Schema.
26
26
  * **🧠 Smart Engine Selection**: Automatically detects dynamic sites and can upgrade the engine from `http` to `browser` on the fly.
27
+ * **🛡️ Anti-Bot Evasion**: In `browser` mode, an optional `antibot` flag helps to bypass common anti-bot measures like Cloudflare challenges.
28
+ * **🕹️ High-Fidelity Interaction Simulation**: Supports Bézier curve-based mouse trajectory movement, realistic typing delay simulation, and complex keyboard interactions to significantly improve anti-bot evasion.
27
29
  * **🧩 Extensible**: Easily create custom, high-level "composite" actions to encapsulate reusable business logic (e.g., a `login` action).
28
30
  * **🧲 Advanced Collectors**: Asynchronously collect data in the background, triggered by events during the execution of a main action.
29
- * **🛡️ Anti-Bot Evasion**: In `browser` mode, an optional `antibot` flag helps to bypass common anti-bot measures like Cloudflare challenges.
31
+
32
+ ---
33
+
34
+ ### Smart Upgrade and Retry Strategy
35
+
36
+ When `enableSmart` is enabled, the system automatically determines whether an engine upgrade is needed based on response characteristics:
37
+
38
+ - Triggers for upgrade include:
39
+ - HTTP status codes: `401 / 403 / 500 / 429`
40
+ - Page appears to be dynamically rendered (detected typical JS framework signatures in HTML)
41
+ - `Retry-After` exceeds `upgradeThresholdMs`
42
+ - During upgrade, you can choose whether to sync Cookies / Session state (`syncStateOnUpgrade`)
43
+ - For `429` responses, if `Retry-After` is less than the `upgradeThresholdMs` threshold, the system will prioritize retry over upgrade
30
44
 
31
45
  ---
32
46
 
@@ -153,6 +167,12 @@ This is the main entry point for the library.
153
167
  * `headless` (boolean): Run in headless mode (default: `true`).
154
168
  * `launchOptions` (object): Playwright launch options (e.g., `{ slowMo: 50, args: [...] }`).
155
169
  * `sessionPoolOptions` (SessionPoolOptions): Advanced configuration for the underlying Crawlee SessionPool.
170
+ * `enableSmart` (boolean): Enable smart detection and automatic engine upgrade (default: `true`).
171
+ * `syncStateOnUpgrade` (boolean): Whether to sync Cookies / Session state when upgrading from http to browser engine (default: `false`).
172
+ * `upgradeThresholdMs` (number): Wait time threshold in milliseconds to trigger engine upgrade; upgrades if exceeded or no explicit retry info (default: `5000`).
173
+ * `maxRetries` (number): Maximum retry attempts for a single Action (default: `0`).
174
+ * `failOnError` (boolean): Whether to throw an exception when an Action fails (default: `true` for main flow, `false` for collector).
175
+ * `failOnTimeout` (boolean): Whether to treat timeout as failure (default: `false`).
156
176
  * ...and many other options for proxy, retries, etc.
157
177
 
158
178
  ### Built-in Actions
@@ -165,6 +185,8 @@ The library provides a set of powerful built-in actions, many of which are engin
165
185
  * `submit`: Submits a form (Engine-specific).
166
186
  * `mouseMove`: Moves the mouse cursor to a specific coordinate or element (Bézier curve supported).
167
187
  * `mouseClick`: Triggers a mouse click at the current position or specified coordinates.
188
+ * `mouseWheel`: Simulates a mouse wheel scroll event with horizontal and vertical deltas. Supports splitting into multiple steps and automatic scrolling to make the target element visible.
189
+ * `scrollIntoView`: Scrolls the page or a container to make a specific element visible in the viewport.
168
190
  * `keyboardType`: Simulates human-like typing into the currently focused element.
169
191
  * `keyboardPress`: Simulates pressing a single key or a key combination.
170
192
  * `trim`: Removes elements from the DOM to clean up the page.
@@ -253,8 +253,8 @@ Moves the mouse cursor to a specific coordinate or element. In `browser` mode, i
253
253
 
254
254
  * **`id`**: `mouseMove`
255
255
  * **`params`**:
256
- * `x` (number, optional): The absolute X coordinate.
257
- * `y` (number, optional): The absolute Y coordinate.
256
+ * `x` (number, optional): The absolute X coordinate. If negative, it's treated as a relative random offset from current position.
257
+ * `y` (number, optional): The absolute Y coordinate. If negative, it's treated as a relative random offset from current position.
258
258
  * `selector` (string, optional): A CSS selector. If provided, the mouse moves to the center of the element.
259
259
  * `steps` (number, optional): The number of intermediate steps for the trajectory (default: `-1`). Set to `-1` to calculate steps automatically based on distance (simulating natural speed).
260
260
  * **`returns`**: `none`
@@ -265,14 +265,37 @@ Triggers a mouse click at the current position or specified coordinates. If a `s
265
265
 
266
266
  * **`id`**: `mouseClick`
267
267
  * **`params`**:
268
- * `x` (number, optional): The absolute X coordinate to click.
269
- * `y` (number, optional): The absolute Y coordinate to click.
268
+ * `x` (number, optional): The absolute X coordinate to click. If negative, it's treated as a relative random offset from current position.
269
+ * `y` (number, optional): The absolute Y coordinate to click. If negative, it's treated as a relative random offset from current position.
270
270
  * `selector` (string, optional): A CSS selector. If provided, moves the mouse to the element first.
271
271
  * `button` (string, optional): The mouse button to use (`left`, `right`, or `middle`). Default is `left`.
272
272
  * `clickCount` (number, optional): The number of clicks (e.g., 2 for double-click). Default is 1.
273
273
  * `delay` (number, optional): Delay between mousedown and mouseup in milliseconds.
274
274
  * **`returns`**: `none`
275
275
 
276
+ #### `mouseWheel`
277
+
278
+ Simulates a mouse wheel scroll event. If a `selector` is provided, the element is automatically scrolled into view, and the cursor is moved to its center before scrolling. If `steps` is provided, the scroll delta is split into multiple steps for realistic simulation.
279
+
280
+ * **`id`**: `mouseWheel`
281
+ * **`params`**:
282
+ * `x` (number, optional): The absolute X coordinate to scroll at. If negative, it's treated as a relative random offset from current position.
283
+ * `y` (number, optional): The absolute Y coordinate to scroll at. If negative, it's treated as a relative random offset from current position.
284
+ * `selector` (string, optional): A CSS selector. If provided, ensures the element is visible and moves the mouse to its center first.
285
+ * `deltaX` (number, optional): The horizontal scroll amount. Default is 0.
286
+ * `deltaY` (number, optional): The vertical scroll amount. Default is 0.
287
+ * `steps` (number, optional): The number of steps to split the scroll into (default: `1`).
288
+ * **`returns`**: `none`
289
+
290
+ #### `scrollIntoView`
291
+
292
+ Scrolls the page or a scrollable container to make a specific element visible in the viewport.
293
+
294
+ * **`id`**: `scrollIntoView`
295
+ * **`params`**:
296
+ * `selector` (string): The CSS selector of the element to scroll into view.
297
+ * **`returns`**: `none`
298
+
276
299
  #### `keyboardType`
277
300
 
278
301
  Simulates a person typing text into the currently focused element.
@@ -27,6 +27,19 @@
27
27
 
28
28
  ---
29
29
 
30
+ ### 智能升级与重试策略
31
+
32
+ 当 `enableSmart` 开启时,系统会根据响应特征自动判断是否需要升级引擎:
33
+
34
+ - 触发升级的条件包括:
35
+ - HTTP 状态码:`401 / 403 / 500 / 429`
36
+ - 页面疑似动态渲染(HTML 中检测到典型 JS 框架特征)
37
+ - `Retry-After` 超过 `upgradeThresholdMs`
38
+ - 升级过程中可选择是否同步 Cookies / Session 状态(`syncStateOnUpgrade`)
39
+ - 对于 `429` 响应,若 `Retry-After` 小于`upgradeThresholdMs`阈值,系统会优先重试而非升级
40
+
41
+ ---
42
+
30
43
  ## 📦 安装
31
44
 
32
45
  1. **安装依赖包:**
@@ -150,6 +163,12 @@ searchGoogle('gemini');
150
163
  * `headless` (boolean): 是否以无头模式运行(默认:`true`)。
151
164
  * `launchOptions` (object): Playwright 启动选项(例如 `{ slowMo: 50, args: [...] }`)。
152
165
  * `sessionPoolOptions` (SessionPoolOptions): 底层 Crawlee SessionPool 的高级配置。
166
+ * `enableSmart` (boolean): 是否启用智能探测与自动引擎升级(默认:`true`)。
167
+ * `syncStateOnUpgrade` (boolean): 当从 http 升级到 browser 引擎时,是否同步 Cookies / Session 状态(默认:`false`)。
168
+ * `upgradeThresholdMs` (number): 触发引擎升级的等待时间阈值(毫秒),超过该时间或无明确重试信息则升级(默认:`5000`)。
169
+ * `maxRetries` (number): 单个 Action 的最大重试次数(默认:`0`)。
170
+ * `failOnError` (boolean): Action 失败时是否抛出异常(默认:主流程 `true`,collector `false`)。
171
+ * `failOnTimeout` (boolean): 超时是否视为失败(默认:`false`)。
153
172
  * ...以及许多其他用于代理、重试等的选项。
154
173
 
155
174
  ### 内置动作 (Built-in Actions)
@@ -162,6 +181,8 @@ searchGoogle('gemini');
162
181
  * `submit`: 提交表单(引擎相关)。
163
182
  * `mouseMove`: 将鼠标指针移动到指定的坐标或元素(支持贝塞尔曲线)。
164
183
  * `mouseClick`: 在当前位置或指定坐标触发鼠标点击。
184
+ * `mouseWheel`: 在目标位置模拟鼠标滚轮滚动,支持水平和垂直偏移、分步模拟以及自动将目标元素滚动到视口。
185
+ * `scrollIntoView`: 滚动页面或容器,使特定元素在视口中可见。
165
186
  * `keyboardType`: 模拟真人在当前获得焦点的元素中输入文本。
166
187
  * `keyboardPress`: 模拟按下单个按键或组合键。
167
188
  * `trim`: 从 DOM 中移除元素以清理页面(如脚本、广告、隐藏内容)。
@@ -64,9 +64,27 @@ The engine is initialized lazily upon the first action execution and remains fix
64
64
  1. **Explicit Option**: If `options.engine` (or temporary context override in `executeAll`) is provided and NOT set to `'auto'`.
65
65
  * ⚠️ **Fail-Fast**: If the requested engine is unavailable (e.g., missing dependencies), an error is thrown immediately.
66
66
  2. **Site Registry**: If set to `'auto'` (default), the system attempts to match the target URL against the `sites` registry.
67
- 3. **Smart Upgrade**: If enabled, the engine may be dynamically upgraded from `http` to `browser` based on response characteristics (e.g., bot detection or heavy JS).
67
+ 3. **Smart Upgrade**: If `enableSmart: true`, the system will automatically upgrade from `http` to `browser` under the following conditions:
68
+ - Returns `401 / 403 / 500 / 429`
69
+ - HTML content is identified as "highly dynamic" (heavy JS)
70
+ - `Retry-After` exceeds `upgradeThresholdMs`
71
+ - You can optionally sync Cookies/Session during upgrade (`syncStateOnUpgrade`)
72
+ - If upgrade fails or still doesn't meet requirements, the original error is thrown
68
73
  4. **Default**: Falls back to `'http'` (Cheerio).
69
74
 
75
+ #### Smart Upgrade & Retry
76
+
77
+ During `executeAll`, if an Action throws `ENGINE_UPGRADE_REQUIRED`:
78
+
79
+ - The system attempts to release the current engine
80
+ - Creates a new browser engine
81
+ - **Automatically re-executes the action list from the beginning**
82
+ - Side effects of successfully executed actions (e.g., cookie writes) can be preserved based on configuration
83
+
84
+ For `429` responses:
85
+ - If `Retry-After` exists and is less than `upgradeThresholdMs`
86
+ - The system will automatically retry within the same engine without triggering an upgrade
87
+
70
88
  #### Batch Execution with Overrides
71
89
 
72
90
  You can execute a sequence of actions with temporary configuration overrides (e.g., headers, timeout) that apply only to that specific batch, without modifying the session's global state.
@@ -112,6 +130,16 @@ If both `sessionState` and `cookies` are provided, the engine adopts a **"Merge
112
130
 
113
131
  ---
114
132
 
133
+ ### Error Enhancement & Retry-After Support
134
+
135
+ - All HTTP errors now attach the original `FetchResponse` to the error object (`error.response`)
136
+ - Support for parsing HTTP `Retry-After` header:
137
+ - Integer (seconds) format
138
+ - HTTP date format
139
+ - Retry wait time hints are included in error messages
140
+
141
+ ---
142
+
115
143
  ## 🏗️ 3. Architecture and Workflow
116
144
 
117
145
  The engine's architecture is designed to solve a key challenge: providing a simple, **stateful-like API** (`goto()`, then `click()`, then `fill()`) on top of Crawlee's fundamentally **stateless, asynchronous** request handling.
@@ -158,7 +186,7 @@ There are two primary engine implementations:
158
186
  * ✅ **Fast and Lightweight**: Ideal for speed and low resource consumption.
159
187
  * ✅ **HTTP-Compliant Redirects**: Correctly handles 301-303 and 307/308 redirects, preserving methods/bodies or converting to GET as per HTTP specifications.
160
188
  * ❌ **No JavaScript Execution**: Cannot interact with client-side rendered content.
161
- * ⚙️ **Simulated Interaction**: Actions like `click` and `submit` are simulated by making new HTTP requests. **Browser-only actions** (e.g., `mouseMove`, `keyboardType`) will throw a `not_supported` error.
189
+ * ⚙️ **Simulated Interaction**: Actions like `click` and `submit` are simulated by making new HTTP requests. **Browser-only actions** (e.g., `mouseMove`, `mouseWheel`, `keyboardType`) will throw a `not_supported` error.
162
190
  * **Use Case**: Scraping static websites, server-rendered pages, or APIs.
163
191
 
164
192
  ### `PlaywrightFetchEngine` (browser mode)