@isdk/web-fetcher 0.2.12 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.action.cn.md +197 -155
- package/README.action.extract.cn.md +263 -0
- package/README.action.extract.md +263 -0
- package/README.action.md +202 -147
- package/README.cn.md +25 -15
- package/README.engine.cn.md +118 -14
- package/README.engine.md +115 -14
- package/README.md +19 -10
- package/dist/index.d.mts +667 -50
- package/dist/index.d.ts +667 -50
- package/dist/index.js +1 -1
- package/dist/index.mjs +1 -1
- package/docs/README.md +19 -10
- package/docs/_media/README.action.md +202 -147
- package/docs/_media/README.cn.md +25 -15
- package/docs/_media/README.engine.md +115 -14
- package/docs/classes/CheerioFetchEngine.md +805 -135
- package/docs/classes/ClickAction.md +33 -33
- package/docs/classes/EvaluateAction.md +559 -0
- package/docs/classes/ExtractAction.md +33 -33
- package/docs/classes/FetchAction.md +39 -33
- package/docs/classes/FetchEngine.md +660 -122
- package/docs/classes/FetchSession.md +38 -16
- package/docs/classes/FillAction.md +33 -33
- package/docs/classes/GetContentAction.md +33 -33
- package/docs/classes/GotoAction.md +33 -33
- package/docs/classes/KeyboardPressAction.md +533 -0
- package/docs/classes/KeyboardTypeAction.md +533 -0
- package/docs/classes/MouseClickAction.md +533 -0
- package/docs/classes/MouseMoveAction.md +533 -0
- package/docs/classes/PauseAction.md +33 -33
- package/docs/classes/PlaywrightFetchEngine.md +820 -122
- package/docs/classes/SubmitAction.md +33 -33
- package/docs/classes/TrimAction.md +533 -0
- package/docs/classes/WaitForAction.md +33 -33
- package/docs/classes/WebFetcher.md +9 -9
- package/docs/enumerations/FetchActionResultStatus.md +4 -4
- package/docs/functions/fetchWeb.md +6 -6
- package/docs/globals.md +14 -0
- package/docs/interfaces/BaseFetchActionProperties.md +12 -12
- package/docs/interfaces/BaseFetchCollectorActionProperties.md +16 -16
- package/docs/interfaces/BaseFetcherProperties.md +32 -28
- package/docs/interfaces/Cookie.md +14 -14
- package/docs/interfaces/DispatchedEngineAction.md +4 -4
- package/docs/interfaces/EvaluateActionOptions.md +81 -0
- package/docs/interfaces/ExtractActionProperties.md +12 -12
- package/docs/interfaces/FetchActionInContext.md +15 -15
- package/docs/interfaces/FetchActionProperties.md +13 -13
- package/docs/interfaces/FetchActionResult.md +6 -6
- package/docs/interfaces/FetchContext.md +42 -38
- package/docs/interfaces/FetchEngineContext.md +37 -33
- package/docs/interfaces/FetchMetadata.md +5 -5
- package/docs/interfaces/FetchResponse.md +14 -14
- package/docs/interfaces/FetchReturnTypeRegistry.md +8 -8
- package/docs/interfaces/FetchSite.md +35 -31
- package/docs/interfaces/FetcherOptions.md +34 -30
- package/docs/interfaces/GotoActionOptions.md +14 -6
- package/docs/interfaces/KeyboardPressParams.md +25 -0
- package/docs/interfaces/KeyboardTypeParams.md +25 -0
- package/docs/interfaces/MouseClickParams.md +49 -0
- package/docs/interfaces/MouseMoveParams.md +41 -0
- package/docs/interfaces/PendingEngineRequest.md +3 -3
- package/docs/interfaces/StorageOptions.md +5 -5
- package/docs/interfaces/SubmitActionOptions.md +2 -2
- package/docs/interfaces/TrimActionOptions.md +27 -0
- package/docs/interfaces/WaitForActionOptions.md +5 -5
- package/docs/type-aliases/BaseFetchActionOptions.md +1 -1
- package/docs/type-aliases/BaseFetchCollectorOptions.md +1 -1
- package/docs/type-aliases/BrowserEngine.md +1 -1
- package/docs/type-aliases/FetchActionCapabilities.md +1 -1
- package/docs/type-aliases/FetchActionCapabilityMode.md +1 -1
- package/docs/type-aliases/FetchActionOptions.md +1 -1
- package/docs/type-aliases/FetchEngineAction.md +2 -2
- package/docs/type-aliases/FetchEngineType.md +1 -1
- package/docs/type-aliases/FetchReturnType.md +1 -1
- package/docs/type-aliases/FetchReturnTypeFor.md +1 -1
- package/docs/type-aliases/OnFetchPauseCallback.md +1 -1
- package/docs/type-aliases/ResourceType.md +1 -1
- package/docs/type-aliases/TrimPreset.md +13 -0
- package/docs/variables/DefaultFetcherProperties.md +1 -1
- package/docs/variables/FetcherOptionKeys.md +1 -1
- package/docs/variables/TRIM_PRESETS.md +11 -0
- package/package.json +11 -11
package/dist/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";var t,e=Object.create,i=Object.defineProperty,s=Object.getOwnPropertyDescriptor,n=Object.getOwnPropertyNames,r=Object.getPrototypeOf,o=Object.prototype.hasOwnProperty,a=(t,e,r,a)=>{if(e&&"object"==typeof e||"function"==typeof e)for(let c of n(e))o.call(t,c)||c===r||i(t,c,{get:()=>e[c],enumerable:!(a=s(e,c))||a.enumerable});return t},c={};((t,e)=>{for(var s in e)i(t,s,{get:e[s],enumerable:!0})})(c,{CheerioFetchEngine:()=>B,ClickAction:()=>D,DefaultFetcherProperties:()=>l,ExtractAction:()=>X,FetchAction:()=>w,FetchActionResultStatus:()=>h,FetchEngine:()=>$,FetchSession:()=>k,FetcherOptionKeys:()=>u,FillAction:()=>J,GetContentAction:()=>K,GotoAction:()=>V,PauseAction:()=>Y,PlaywrightFetchEngine:()=>W,SubmitAction:()=>Q,WaitForAction:()=>Z,WebFetcher:()=>C,fetchWeb:()=>tt}),module.exports=(t=c,a(i({},"__esModule",{value:!0}),t));var l={engine:"auto",enableSmart:!0,useSiteRegistry:!0,antibot:!1,debug:!1,headers:{},cookies:[],throwHttpErrors:void 0,output:{cookies:!0,sessionState:!0},proxy:[],blockResources:[],storage:{purge:!0},ignoreSslErrors:!0,browser:{engine:"playwright",headless:!0,waitUntil:"domcontentloaded"},http:{method:"GET"},timeoutMs:6e4,requestHandlerTimeoutSecs:void 0,maxConcurrency:1,maxRequestsPerMinute:1e3,delayBetweenRequestsMs:0,retries:0,sites:[]},u=Object.keys(l).concat(["actions","onPause"]),h=(t=>(t[t.Failed=0]="Failed",t[t.Success=1]="Success",t[t.Skipped=2]="Skipped",t))(h||{}),f=class t{static register(t){const e=t.id;if(!e)throw new Error("FetchAction.register: actionClass.id is required");this.registry.set(e,t)}static get(t){return this.registry.get(t)}static create(e){const i="string"==typeof e?e:e.id||e.name||e.action;if(!i)throw new Error("Action must have id, name or action");const s=i instanceof t?i.constructor:this.registry.get(i);return s?new s:void 0}static has(t){return this.registry.has(t)}static list(){return Array.from(this.registry.keys())}static getCapability(t){return this.capabilities[t]??"noop"}getCapability(t){return this.constructor.getCapability(t)}get id(){return this.constructor.id}get returnType(){return this.constructor.returnType}get capabilities(){return this.constructor.capabilities}async delegateToEngine(t,e,...i){const s=t.internal.engine;if(!s)throw new Error("No engine available");if("function"!=typeof s[e])throw new Error(`Engine does not have a method named '${String(e)}'`);return await s[e](...i)}installCollectors(e,i){const s=i?.collectors;if(!s?.length)return;const n=[],r=new Set;for(const i of s){const s=d(i.activateOn),o=d(i.collectOn),a=d(i.deactivateOn),c=!(i.background??!0),l=t.create(i);if(!l)continue;let u=!1,h=!1,f=0;const w=async t=>{if(!u&&!h){u=!0;try{await(l.onBeforeExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:l.id,phase:"before",error:t})}}},m=async(t,s)=>{if(!h){u||await w(s);try{const n=Promise.resolve(l.onExecute?.(e,i,s)).then(s=>{var n,r;if(i.storeAs){((n=e.outputs)[r=i.storeAs]||(n[r]=[])).push(s)}return e.eventBus.emit("collector:result",{action:this.id,collector:i.id||i.name,event:t,result:s}),s}).catch(s=>{e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,event:t,phase:"exec",error:s})}).finally(()=>{f++});c&&(r.add(n),n.finally(()=>r.delete(n)))}catch(i){e.eventBus.emit("collector:error",{action:this.id,collector:l.id,event:t,phase:"exec",error:i})}}},g=async()=>{if(!h){0===f&&m("collector:after"),h=!0;try{await(l.onAfterExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,phase:"after",error:t})}finally{e.eventBus.emit("collector:end",{action:this.id,collector:i.id||i.name}),x.forEach(t=>t())}}},b=p(e,s,w),x=y(e,o,m),v=p(e,a,g);if(n.push(...b,...x,...v),!s.length&&!o.length&&!a.length){const t=()=>{g()};e.eventBus.once(`action:${this.id}.end`,t),n.push(()=>e.eventBus.off("fetcher:action:end",t))}}return n.length||r.size>0?{cleanup:()=>n.forEach(t=>t()),awaitExecPendings:async()=>{r.size>0&&await Promise.allSettled(Array.from(r))}}:void 0}async beforeExec(t,e){t.internal.actionStack||(t.internal.actionStack=[]);const i=t.internal.actionStack,s=i.length,n=i.length>0?i[i.length-1].id:void 0,r={...e,id:this.id,depth:s,parent:n};i.push(r),t.currentAction=r;const o={action:this,context:t,options:e,index:e?.index,depth:s,stack:[...i]};t.eventBus.emit(`action:${this.id}.start`,o),t.eventBus.emit("action:start",o),await(this.onBeforeExec?.(t,e));return{entry:o,collectors:this.installCollectors(t,e)}}async afterExec(t,e,i,s){const n=t.internal.actionStack,r=n.length-1,o=s?.collectors;try{await(o?.awaitExecPendings()),t.lastResult=i,"response"!==i?.returnType||i.error||(t.lastResponse=i.result),e?.storeAs&&(t.outputs[e.storeAs]=i?.result),i?.error&&(t.currentAction.error=i.error),await(this.onAfterExec?.(t,e));const s={action:this,context:t,options:e,result:i,depth:r,stack:[...n]};i?.error&&(s.error=i.error);try{t.eventBus.emit(`action:${this.id}.end`,s)}catch(t){}try{t.eventBus.emit("action:end",s)}catch(t){}}finally{try{o?.cleanup()}finally{n.pop();const e=n.length;t.currentAction=e>0?n[e-1]:void 0}}}async execute(t,e){e?.args&&!e.params&&(e.params=e.args);const i=await this.beforeExec(t,e),s=e?.failOnError??!0;let n;try{return t.throwHttpErrors=s,n=await this.onExecute(t,e),n&&n.returnType||(n={status:1,returnType:this.returnType??"any",result:n}),n}catch(e){if(n={status:0,error:e,meta:{id:this.id,engineType:t.engine,capability:this.getCapability(t.engine)}},s)throw e;return n}finally{await this.afterExec(t,e,n,i)}}};f.registry=new Map,f.returnType="any",f.capabilities={http:"noop",browser:"noop"};var w=f;function d(t){return t?Array.isArray(t)?t:[t]:[]}function p(t,e,i){const s=[];for(const n of e)if("string"==typeof n||n instanceof RegExp){const e=(...t)=>{i(t[0])};t.eventBus.once(n,e),s.push(()=>t.eventBus.off(n,e))}return s}function y(t,e,i){const s=[];for(const n of e)if("string"==typeof n||n instanceof RegExp){const e=t=>i(n,t);t.eventBus.on(n,e),s.push(()=>t.eventBus.off(n,e))}return s}var m=require("events-ex");var g=require("lodash-es"),b=(0,require("nanoid").customAlphabet)("0123456789abcdefghijklmnopqrstuvwxyz",12);var x=require("lodash-es"),v=require("events-ex"),E=require("@isdk/common-error"),S=require("crawlee");function q(){let t=()=>{};const e=new Promise(e=>{t=e});return e.release=t,e}S.Configuration.getGlobalConfig().set("persistStorage",!1);var $=class{constructor(){this.hdrs={},this._initializedSessions=new Set,this.pendingRequests=new Map,this.requestCounter=0,this.actionEmitter=new v.EventEmitter,this.isPageActive=!1,this.isEngineDisposed=!1,this.navigationLock=function(){const t=q();return t.release(),t}(),this.blockedTypes=new Set}static register(t){const e=t.id;if(!e)throw new Error("Engine must define static id");if(this.registry.has(e))throw new Error(`Engine id duplicated: ${e}`);this.registry.set(e,t)}static get(t){return this.registry.get(t)}static getByMode(t){for(const[e,i]of this.registry.entries())if(i.mode===t)return i}static async create(t,e){const i=(0,x.defaultsDeep)(e,t,l),s=i.engine??t.engine,n=s?this.get(s)??this.getByMode(s):null;if(n){const e=new n;return await e.initialize(t,i),e}}_isImplicitObject(t){if(!t||"object"!=typeof t)return!1;const e=new Set(["type","selector","attribute","has","exclude","properties","items","mode"]),i=Object.keys(t);if(0===i.length)return!1;if("type"in t)return!1;for(const t of i)if(!e.has(t))return!0;return!1}async _extract(t,e){const i=t.type;if(!e)return"array"===i?[]:null;if("object"===i){const{selector:i,properties:s}=t;let n=e;if(i){const t=await this._querySelectorAll(e,i);n=t.length>0?t[0]:null}if(!n)return null;const r={};for(const t in s)r[t]=await this._extract(s[t],n);return r}if(!i&&this._isImplicitObject(t)){const i={},s=t;for(const t in s)i[t]=await this._extract(s[t],e);return i}if("array"===i){const{selector:i,items:s,mode:n}=t,r=i?await this._querySelectorAll(e,i):[e],o=this._normalizeArrayMode(n);if((!n||"columnar"===o.type)&&1===r.length&&s){const t=await this._extractColumnar(s,r[0],o);if(t)return t}if("segmented"===o.type&&1===r.length&&s){const t=await this._extractSegmented(s,r[0],o);if(t)return t}return this._extractNested(s,r)}const{selector:s}=t;let n=e;if(s){const t=await this._querySelectorAll(e,s);n=t.length>0?t[0]:null}else Array.isArray(e)&&(n=e.length>0?e[0]:null);return n?this._extractValue(t,n):null}_normalizeArrayMode(t){return t?"string"==typeof t?{type:t}:t:{type:"nested"}}async _extractNested(t,e){const i=[];for(const s of e)i.push(await this._extract(t,s));return i}async _extractColumnar(t,e,i){const s="object"===t.type||!t.type&&this._isImplicitObject(t),n=!1!==i?.strict,r=!0===i?.inference;if(s){const i="object"===t.type?t.properties:t,s=Object.keys(i);if(0===s.length)return null;const o={};let a=null,c=0,l=[];for(const t of s){const s=i[t];if("array"===s.type||"object"===s.type||!s.type&&this._isImplicitObject(s))return null;const u=s;let h=[];h=u.selector?await this._querySelectorAll(e,u.selector):[e];const f=h.length;if(f>c&&(c=f,l=h),u.selector)if(null===a)a=f;else if(a!==f)if(r&&c>1)a=-1;else if(n)throw new E.CommonError(`Columnar extraction mismatch: field "${t}" has ${f} matches, but expected ${a}.`,"extract");const w=await Promise.all(h.map(t=>this._extractValue(u,t)));o[t]=w}if(r&&-1===a&&c>1&&l.length>0){const i=[];for(const t of l){let s=t,n=await this._parentElement(s),r=s;for(;n;){if(await this._isSameElement(n,e)){i.push(r);break}r=n,s=n,n=await this._parentElement(s)}}const s=[];for(const t of i){let e=!1;for(const i of s)if(await this._isSameElement(t,i)){e=!0;break}e||s.push(t)}if(s.length>1)return this._extractNested(t,s)}if(c<=1)return null;if(-1===a&&n)return null;const u=n&&-1!==a?a:c,h=[];for(let t=0;t<u;t++){const e={};for(const n of s){const s=o[n];if(1===s.length&&u>1){if(!i[n].selector){e[n]=s[0];continue}}e[n]=void 0!==s[t]?s[t]:null}h.push(e)}return h}{const i=t;if(!i.selector)return null;const s=await this._querySelectorAll(e,i.selector);return s.length<=1?null:Promise.all(s.map(t=>this._extractValue(i,t)))}}async _extractSegmented(t,e,i){if(!("object"===t.type||!t.type&&this._isImplicitObject(t)))return null;const s="object"===t.type?t.properties:t,n=Object.keys(s);if(0===n.length)return null;const r=s[i?.anchor||n[0]];if(!r.selector)return null;const o=await this._querySelectorAll(e,r.selector);if(0===o.length)return[];const a=[];for(let e=0;e<o.length;e++){const i=o[e],s=[i,...await this._nextSiblingsUntil(i,r.selector)];a.push(await this._extract(t,s))}return a}async buildResponse(t){const e=await this._buildResponse(t),i=e.headers["content-type"]||"";return e.contentType=i.split(";")[0].trim(),!1!==this.opts?.output?.cookies?!e.cookies&&t.session&&(e.cookies=t.session.getCookies(t.request.url)):delete e.cookies,!1!==this.opts?.output?.sessionState?this.crawler?.sessionPool&&(e.sessionState=await this.crawler.sessionPool.getState()):delete e.sessionState,this.opts?.debug&&(e.metadata={...e.metadata,mode:this.mode,engine:this.id,proxy:t.proxyInfo?.url||("string"==typeof this.opts.proxy?this.opts.proxy:Array.isArray(this.opts.proxy)?this.opts.proxy[0]:void 0)}),e}waitFor(t){return this.dispatchAction({type:"waitFor",options:t})}click(t){return this.dispatchAction({type:"click",selector:t})}fill(t,e){return this.dispatchAction({type:"fill",selector:t,value:e})}submit(t,e){return this.dispatchAction({type:"submit",selector:t,options:e})}pause(t){return this.dispatchAction({type:"pause",message:t})}extract(t){const e=this._normalizeSchema(t);return this.dispatchAction({type:"extract",schema:e})}_normalizeSchema(t){const e=JSON.parse(JSON.stringify(t));if(e.properties)for(const t in e.properties)e.properties[t]=this._normalizeSchema(e.properties[t]);if(e.items&&(e.items=this._normalizeSchema(e.items)),"array"===e.type&&(e.attribute&&!e.items&&(e.items={attribute:e.attribute},delete e.attribute),e.items||(e.items={type:"string"})),e.selector&&(e.has||e.exclude)){const{selector:t,has:i,exclude:s}=e,n=t.split(",").map(t=>{let e=t.trim();return i&&(e=`${e}:has(${i})`),s&&(e=`${e}:not(${s})`),e}).join(", ");e.selector=n,delete e.has,delete e.exclude}return e}get id(){return this.constructor.id}async getState(){return{cookies:await this.cookies(),sessionState:await(this.crawler?.sessionPool?.getState())}}get mode(){return this.constructor.mode}get context(){return this.ctx}async initialize(t,e){if(this.ctx)return;(0,x.merge)(t,e),this.ctx=t,this.opts=t,this.hdrs=function(t){const e={};if(t&&"object"==typeof t)for(const[i,s]of Object.entries(t))e[i.toLowerCase()]=s;return e}(t.headers),this._initialCookies=[...t.cookies??[]],t.internal||(t.internal={}),t.internal.engine=this,t.engine=this.mode,this.actionEmitter.setMaxListeners(100);const i=t.storage||{},s=i.persist??!1,n=this.config=new S.Configuration({persistStorage:s,storageClientOptions:{persistStorage:s,...i.config},...i.config}),r=i.id||t.id;this.requestQueue=await S.RequestQueue.open(r,{config:n});const o=this.opts?.proxy?"string"==typeof this.opts.proxy?[this.opts.proxy]:this.opts.proxy:void 0;o?.length&&(this.proxyConfiguration=new S.ProxyConfiguration({proxyUrls:o}));const a=await this._getSpecificCrawlerOptions(t),c=(0,x.defaultsDeep)({persistenceOptions:{enable:!0,storeId:r},persistStateKeyValueStoreId:r},t.sessionPoolOptions,{maxPoolSize:1,sessionOptions:{maxUsageCount:1e3,maxErrorScore:3}});t.sessionState&&t.cookies&&t.cookies.length>0&&console.warn('[FetchEngine] Warning: Both "sessionState" and "cookies" are provided. Explicit "cookies" will override any conflicting cookies restored from "sessionState".');const l={...(0,x.defaultsDeep)(a,{requestQueue:this.requestQueue,maxConcurrency:1,minConcurrency:1,useSessionPool:!0,persistCookiesPerSession:!0,sessionPoolOptions:c}),requestHandler:this._requestHandler.bind(this),errorHandler:this._failedRequestHandler.bind(this),failedRequestHandler:this._failedRequestHandler.bind(this)};l.preNavigationHooks||(l.preNavigationHooks=[]),l.preNavigationHooks.unshift(({crawler:t,session:e,request:i},s)=>{if(this.currentSession=e,e&&!this._initializedSessions.has(e.id)){if(this._initialCookies&&this._initialCookies.length>0){const t=this._initialCookies.map(t=>{const e={...t};return"no_restriction"===e.sameSite&&(e.sameSite="None"),e});e.setCookies(t,i.url)}this._initializedSessions.add(e.id)}});const u=this.crawler=this._createCrawler(l,n),h=this.kvStore=await S.KeyValueStore.open(r,{config:n}),f=await h.getValue(S.PERSIST_STATE_KEY);!t.sessionState||f&&!t.overrideSessionState||await h.setValue(S.PERSIST_STATE_KEY,t.sessionState),this.isCrawlerReady=!0,this.crawlerRunPromise=u.run(),this.crawlerRunPromise.finally(()=>{this.isCrawlerReady=!1}).catch(t=>{console.error("Crawler background error:",t)})}async cleanup(){await(this._cleanup?.()),await this._commonCleanup();const t=this.ctx;t&&t.internal?.engine===this&&(t.internal.engine=void 0),this.ctx=void 0,this.opts=void 0}async _executePendingActions(t){this.isEngineDisposed||await new Promise(e=>{const i=async({action:e,resolve:i,reject:s})=>{try{if("dispose"===e.type)return this.actionEmitter.emit("dispose"),void i();i(await this.executeAction(t,e))}catch(t){s(t)}},s=()=>{this.actionEmitter.removeListener("dispatch",i),e()};this.actionEmitter.on("dispatch",i),this.actionEmitter.once("dispose",s),this.isEngineDisposed&&(s(),this.actionEmitter.removeListener("dispose",s))})}async _sharedRequestHandler(t){const{request:e}=t;try{this.currentSession=t.session,this.isPageActive=!0;const i=this.pendingRequests.get(e.userData.requestId);if(i){const s=await this.buildResponse(t),n=!s.statusCode||s.statusCode>=400;if(this.ctx?.throwHttpErrors&&n){const t=new E.CommonError(`Request for ${s.finalUrl} failed with status ${s.statusCode||"N/A"}`,"request",s.statusCode);i.reject(t)}else this.lastResponse=s,i.resolve(s);this.pendingRequests.delete(e.userData.requestId)}await this._executePendingActions(t)}finally{if(this.currentSession){const t=this.currentSession.getCookies(e.url);t&&(this._initialCookies=t)}this.isPageActive=!1,this.navigationLock.release()}}async _sharedFailedRequestHandler(t,e){const{request:i}=t,s=this.pendingRequests.get(i.userData.requestId);if(s&&e&&this.ctx?.throwHttpErrors){this.pendingRequests.delete(i.userData.requestId);const t=e.response,n=t?.statusCode||500,r=t?.url?t.url:i.url,o=new E.CommonError(`Request${r?" for "+r:""} failed: ${e.message}`,"request",n);s.reject(o)}return this._sharedRequestHandler(t)}async dispatchAction(t){if(!this.isPageActive)throw new Error("No active page. Call goto() before performing actions.");return new Promise((e,i)=>{this.actionEmitter.emit("dispatch",{action:t,resolve:e,reject:i})})}async _requestHandler(t){await this._sharedRequestHandler(t)}async _failedRequestHandler(t,e){await this._sharedFailedRequestHandler(t,e)}async _commonCleanup(){if(this.isEngineDisposed=!0,this._initializedSessions.clear(),this.actionEmitter.emit("dispose"),this.navigationLock?.release(),this.pendingRequests.size>0){for(const[,t]of this.pendingRequests)t.reject(new Error("Cleanup:Request cancelled"));this.pendingRequests.clear()}if(this.crawler){try{await(this.crawler.teardown?.())}catch(t){console.error("crawler teardown error:",t)}this.crawler=void 0}this.crawlerRunPromise=void 0,this.isCrawlerReady=void 0;const t=(this.opts?.storage||{}).purge??!0;this.requestQueue&&(t&&await this.requestQueue.drop().catch(t=>console.error("Error dropping requestQueue:",t)),this.requestQueue=void 0),this.kvStore&&(t&&await this.kvStore.drop().catch(t=>console.error("Error dropping kvStore:",t)),this.kvStore=void 0),this.actionEmitter.removeAllListeners(),this.pendingRequests.clear(),this.config=void 0}async blockResources(t,e){return e&&this.blockedTypes.clear(),t.forEach(t=>this.blockedTypes.add(t)),t.length}getContent(){return this.lastResponse?Promise.resolve(this.lastResponse):Promise.reject(new Error("No content fetched yet. Call goto() first."))}async headers(t,e){if(void 0===t)return{...this.hdrs};if("string"==typeof t&&void 0===e)return this.hdrs[t.toLowerCase()]||"";if(null!==t&&"object"==typeof t){const i={};for(const[e,s]of Object.entries(t))i[e.toLowerCase()]=String(s);return this.hdrs=!0===e?i:{...this.hdrs,...i},!0}return"string"==typeof t&&("string"==typeof e?this.hdrs[t.toLowerCase()]=e:null===e&&delete this.hdrs[t.toLowerCase()],!0)}async cookies(t){const e=this.lastResponse?.url||"";if(Array.isArray(t))return this.currentSession?this.currentSession.setCookies(t,e):this._initialCookies=[...t],!0;if(null===t)return this.currentSession,this._initialCookies=[],!0;if(this.currentSession){return this.currentSession.getCookies(e)}return[...this._initialCookies||[]]}async dispose(){await this.cleanup()}};async function _(t,e){let i;const s=e?.engine||t.engine;if(s&&"auto"!==s){if(i=await $.create(t,{engine:s}),!i)throw new Error(`Engine "${s}" is not available or failed to initialize.`);return i}const n=function(t,e){if(!t||!e?.length)return null;const i=new URL(t);let s=e.find(t=>t.domain===i.hostname);s||(s=e.find(t=>i.hostname.endsWith(t.domain)));if(!s)return null;if(s.pathScope?.length){if(!s.pathScope.some(t=>i.pathname.startsWith(t)))return null}return s}(e?.url||t.url,t.sites);if(n?.engine&&"auto"!==n.engine&&(i=await $.create(t,{engine:n.engine}),i))return i;if(i=await $.create(t,{engine:"http"}),!i)throw new Error("Failed to create default http engine");return i}$.registry=new Map;var k=class{constructor(t={}){this.options=t,this.closed=!1,this.id=b(),this.context=this.createContext(t)}async execute(t,e=this.context){const i=t.index??(e.internal.actionIndex||0);e.internal.actionIndex=i+1,await this.ensureEngine(t,e);const s=w.create(t);if(!s)throw new Error(`Unknown action: ${t.id||t.name}`);const n={...t,index:i};let r,o;e.currentAction={...n,startedAt:Date.now()};try{return r=await s.execute(e,n),r}catch(t){throw o=t,o}finally{e.currentAction=void 0}}async executeAll(t,e){const i=e?{...this.context,...e,id:this.context.id,eventBus:this.context.eventBus,outputs:this.context.outputs,execute:this.context.execute,action:this.context.action}:this.context;let s=e?.index??0;try{for(;s<t.length;){const e=t[s];await this.execute({...e,index:s},i),s++}const e=await this.execute({id:"getContent",index:s},i);return{result:e?.result,outputs:this.getOutputs()}}catch(t){throw t.actionIndex=s,t}}getOutputs(){return this.context.outputs}async getState(){return this.context.internal.engine?.getState()}async dispose(){if(this.closed)return;const t=this.context.eventBus;t.emit("session:closing",{sessionId:this.id});try{await(this.context.internal.engine?.dispose())}finally{this.closed=!0}t.emit("session:closed",{sessionId:this.id})}async ensureEngine(t,e){if(this.closed)throw new Error("Session is closed");if(!e.internal.engine){const i=t?.params?.url??e.url,s=await _(e,{url:i});if(!s)throw new Error("No engine found");e.internal.engine=s}}createContext(t=this.options){const e=new m.EventEmitter;return(0,g.defaultsDeep)({...t,id:this.id,eventBus:e,outputs:{},internal:{},execute:async t=>this.execute(t),action:async function(t,e,i){return this.execute({name:t,params:e,...i})}},l)}},C=class{constructor(t={}){this.defaults=t}async createSession(t){const e={...this.defaults,...t||{}};return new k(e)}async fetch(t,e){"string"!=typeof t&&(t=(e=t).url);const i=await this.createSession(e);try{const s=e?.actions||[];t&&0!==s.findIndex(e=>("goto"===e.id||"goto"===e.name)&&e.params?.url===t)&&s.unshift({id:"goto",params:{url:t}});return await i.executeAll(s)}finally{await i.dispose()}}},A=require("crawlee"),R=((t,s,n)=>(n=null!=t?e(r(t)):{},a(!s&&t&&t.__esModule?n:i(n,"default",{value:t,enumerable:!0}),t)))(require("cheerio")),j=require("@isdk/common-error"),P="___BR___",O="___BLOCK___",T="___P___",F=/\s+/g,U=new RegExp(` *(${P}|${O}|${T}) *`,"g"),N=new RegExp(`(?:${O}|${T})+`,"g");var H={"&":"&","<":"<",">":">",""":"""},M={"'":"'"," ":" ","©":"©","®":"®","™":"™"};function L(t){return t?t.replace(/&(#?[a-zA-Z0-9]+);/g,t=>{const e=t.toLowerCase();if(H[e])return t;if(M[e])return M[e];if(t.startsWith("&#")){const e=t.startsWith("&#x")?parseInt(t.slice(3,-1),16):parseInt(t.slice(2,-1),10);if(!isNaN(e))return 160===e?" ":String.fromCharCode(e)}return t}):t}var B=class extends ${_ensureCheerioContext(t){if(!t.$&&t.body){let e="string"==typeof t.body?t.body:Buffer.isBuffer(t.body)?t.body.toString("utf-8"):JSON.stringify(t.body);e.trim().startsWith("<")||(e=`<html><body><pre>${e}</pre></body></html>`),t.$=R.load(e)}}async _buildResponse(t){this._ensureCheerioContext(t);const{request:e,response:i,body:s,$:n}=t,r=n?.html();let o="string"==typeof s?s:Buffer.isBuffer(s)?s.toString("utf-8"):String(s??"");r&&r!==o&&(o=r);let a=i?.headers;if(!a&&i?.rawHeaders){a={};const t=i.rawHeaders;for(let e=0;e<t.length;e+=2)a[t[e].toLowerCase()]=t[e+1]}const c={url:e.url,finalUrl:e.loadedUrl||e.url,statusCode:i?.statusCode??200,statusText:i?.statusMessage,headers:a||{},body:s,html:L(o),text:o};if(this.opts?.debug&&i?.timings){const t=i.timings;c.metadata={timings:{start:t.start,total:t.phases?.total,ttfb:t.phases?.firstByte,dns:t.phases?.dns,tcp:t.phases?.tcp,download:t.phases?.download}}}return c}async _querySelectorAll(t,e){if(Array.isArray(t)){if(0===t.length)return[];const{$:i}=t[0],s=t.map(t=>t.el[0]).filter(Boolean),n=i(s);return n.find(e).add(n.filter(e)).toArray().map(t=>({$:i,el:i(t)}))}const{$:i,el:s}=t;return s.find(e).toArray().map(t=>({$:i,el:i(t)}))}async _nextSiblingsUntil(t,e){const{$:i,el:s}=t;return(e?s.nextUntil(e):s.nextAll()).toArray().map(t=>({$:i,el:i(t)}))}async _parentElement(t){const{$:e,el:i}=t,s=i.parent();return 0===s.length?null:{$:e,el:s}}async _isSameElement(t,e){return t.el[0]===e.el[0]}async _extractValue(t,e){const{$:i,el:s}=e,{attribute:n,type:r="string",mode:o="text"}=t;if(0===s.length)return null;let a="";if(n?a=s.attr(n)??null:"html"===r||"html"===o||"outerHTML"===o?(a="outerHTML"===o?i.html(s):s.html()??("html"===r?"":null),a&&(a=L(a.trim()))):a="innerText"===o?function(t){const e=t.clone();e.find("br").replaceWith(P),e.find("p").before(T).after(T),e.find("div, h1, h2, h3, h4, h5, h6, li, ul, ol, tr, dl, dt, dd, blockquote, pre, form, table, article, section, header, footer, nav, main, aside").before(O).after(O);let i=e.text();return i=i.replace(F," "),i=i.replace(U,"$1"),i=i.replace(N,t=>t.includes(T)?T:O),i=i.replaceAll(P,"\n"),i=i.replaceAll(T,"\n\n"),i=i.replaceAll(O,"\n"),i.trim()}(s):s.text().trim(),null===a)return null;switch(r){case"number":return parseFloat(a.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=a.toLowerCase();return"true"===t||"1"===t;default:return a}}async executeAction(t,e){const{$:i}=t;switch(e.type){case"dispose":return;case"extract":if(!i)throw new j.CommonError(`Cheerio context not available for action: ${e.type}`,"extract");return this._extract(e.schema,{$:i,el:i.root()});case"click":{if(!i)throw new j.CommonError(`Cheerio context not available for action: ${e.type}`,"click");const s=e.selector,n=i(s).first();let r;if(0===n.length)try{r=new URL(s,t.request.loadedUrl||t.request.url).href}catch{throw new j.CommonError(`click: selector not found or invalid URL: ${s}`,"click")}else{if(!n.is("a")||!n.attr("href")){if(n.is('input[type="submit"], button[type="submit"], button, input')){const e=n.closest("form");if(e.length)return this.executeAction(t,{type:"submit",selector:e});throw new j.CommonError("click: submit-like element without form","click")}throw new j.CommonError(`click: unsupported element for http simulate. Selector: ${s}`,"click")}{const e=n.attr("href");r=new URL(e,t.request.loadedUrl||t.request.url).href}}const o=await t.sendRequest({url:r});return void await this._updateStateAfterNavigation(t,o)}case"fill":{if(!i)throw new j.CommonError(`Cheerio context not available for action: ${e.type}`),"fill";const s=i(e.selector).first();if(0===s.length)throw new j.CommonError(`fill: selector not found: ${e.selector}`);if(!s.is("input, textarea, select"))throw new j.CommonError(`fill: not a form field: ${e.selector}`);return s.val(e.value),void(this.lastResponse=await this.buildResponse(t))}case"waitFor":return void(e.options?.ms&&await new Promise(t=>setTimeout(t,e.options.ms)));case"pause":const s=this.ctx?.onPause;return void(s?(console.info(e.message||"Execution paused for manual intervention."),await s({message:e.message}),console.info("Resuming execution...")):console.warn("[PauseAction] was called, but no `onPause` handler was provided in fetchWeb options. Skipped."));case"submit":{if(!i)throw new j.CommonError(`Cheerio context not available for action: ${e.type}`,"submit");const s="string"==typeof e.selector?i(e.selector).first():null!=e.selector?e.selector:i("form").first();if(0===s.length)throw new j.NotFoundError(e.selector,"submit");const n=s.attr("action")||t.request.loadedUrl||t.request.url,r=(s.attr("method")||"GET").toUpperCase(),o=new URL(n,t.request.loadedUrl||t.request.url).href,a={};let c;if(s.find("input, select, textarea").each((t,e)=>{const s=i(e),n=s.attr("name");if(!n)return;const r=s.val();null!=r&&(a[n]=String(r))}),"GET"===r){const e=new URL(o);Object.entries(a).forEach(([t,i])=>e.searchParams.set(t,i)),c=await t.sendRequest({url:e.href,method:"GET"})}else{let i;const n={};"application/json"===(e.options?.enctype||s.attr("enctype")||"application/x-www-form-urlencoded")?(i=JSON.stringify(a),n["Content-Type"]="application/json"):(i=new URLSearchParams(a).toString(),n["Content-Type"]="application/x-www-form-urlencoded"),c=await t.sendRequest({url:o,method:"POST",body:i,headers:n})}return void await this._updateStateAfterNavigation(t,c)}case"getContent":return this.buildResponse(t);default:throw new j.CommonError(`Unknown action type: ${e.type}`,"CheerioFetchEngine.executeAction",j.ErrorCode.NotSupported)}}async _updateStateAfterNavigation(t,e){const i=e;t.response=i,t.body=i.body,t.$=void 0,i.url&&(t.request.loadedUrl=i.url),this.lastResponse=await this.buildResponse(t)}_createCrawler(t,e){return new A.CheerioCrawler(t,e)}_getSpecificCrawlerOptions(t){return{additionalMimeTypes:["text/plain"],maxRequestRetries:1,requestHandlerTimeoutSecs:t.requestHandlerTimeoutSecs,proxyConfiguration:this.proxyConfiguration,preNavigationHooks:[({session:e,request:i},s)=>{s.throwHttpErrors=t.throwHttpErrors,this.opts?.timeoutMs&&(s.timeout={request:this.opts.timeoutMs})}]}}async goto(t,e){this.isPageActive&&this.dispatchAction({type:"dispose"}).catch(()=>{});const i="req-"+ ++this.requestCounter,s=new Promise((t,s)=>{const n=e?.timeoutMs||this.opts?.timeoutMs||3e4,r=setTimeout(()=>{this.pendingRequests.delete(i),this.navigationLock.release(),s(new j.CommonError(`goto timed out after ${n}ms.`,"gotoTimeout",j.ErrorCode.RequestTimeout))},n);this.pendingRequests.set(i,{resolve:e=>{clearTimeout(r),t(e)},reject:t=>{clearTimeout(r),s(t)}})});return this.requestQueue.addRequest({...e,url:t,headers:{...this.hdrs,...e?.headers},userData:{requestId:i},uniqueKey:`${t}-${i}`}).catch(t=>{const e=this.pendingRequests.get(i);e&&(this.pendingRequests.delete(i),this.navigationLock.release(),e.reject(t))}),await this.navigationLock,this.navigationLock=q(),s}};B.id="cheerio",B.mode="http",$.register(B);var I=require("crawlee"),z=require("playwright"),G=require("@isdk/common-error"),W=class extends ${async _buildResponse(t){const{page:e,response:i,request:s,session:n}=t;if(!e||e.isClosed())return{url:s.url,finalUrl:s.loadedUrl||s.url,statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:"",html:"",text:""};const r=await e.content(),o=await e.textContent("body"),a=await e.context().cookies();n&&n.setCookies(a,s.url);const c={url:e.url(),finalUrl:e.url(),statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:r,html:r,text:o||""};if(this.opts?.debug&&i){const t="function"==typeof i.request?i.request():i.request;if(t&&"function"==typeof t.timing){const e=t.timing();c.metadata={timings:{start:e.startTime,total:e.responseEnd-e.startTime,ttfb:e.responseStart-e.requestStart,dns:e.domainLookupEnd-e.domainLookupStart,tcp:e.connectEnd-e.connectStart,download:e.responseEnd-e.responseStart}}}}return!1!==this.opts?.output?.cookies&&(c.cookies=a),c}async _querySelectorAll(t,e){if(Array.isArray(t)){const i=[];for(const s of t){const t=await s.locator(e).all();i.push(...t);await s.evaluate((t,e)=>t.matches(e),e)&&i.push(s)}return i}return t.locator(e).all()}async _nextSiblingsUntil(t,e){const i=await t.locator("xpath=following-sibling::*").all();if(!e)return i;const s=[];for(const t of i){if(await t.evaluate((t,e)=>t.matches(e),e))break;s.push(t)}return s}async _parentElement(t){const e=t.locator("xpath=..");return 0===await e.count()?null:e.first()}async _isSameElement(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return!1;const n=await i.evaluate((t,e)=>t===e,s);return await i.dispose(),await s.dispose(),n}async _extractValue(t,e){const{attribute:i,type:s="string",mode:n="text"}=t;if(0===await e.count())return null;let r="";if(i?r=await e.getAttribute(i):"html"===s||"html"===n||"outerHTML"===n?(r="outerHTML"===n?await e.evaluate(t=>t.outerHTML):await e.innerHTML(),r&&(r=L(r))):r="innerText"===n?await e.innerText():await e.textContent(),null===r)return null;switch(r=r.trim(),s){case"number":return parseFloat(r.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=r.toLowerCase();return"true"===t||"1"===t;default:return r}}async executeAction(t,e){const{page:i}=t,s=this.opts?.timeoutMs||3e4;switch(e.type){case"navigate":{const s=await i.goto(e.url,{waitUntil:e.opts?.waitUntil||"domcontentloaded",timeout:this.opts?.timeoutMs||3e4});s&&(t={...t,response:s});const n=await this.buildResponse(t);return this.lastResponse=n,n}case"extract":{const s=await this._extract(e.schema,i.locator("body"));return this.lastResponse=await this.buildResponse(t),s}case"click":{await i.click(e.selector,{timeout:s}),await i.waitForLoadState("networkidle",{timeout:s});const n=await this.buildResponse(t);return void(this.lastResponse=n)}case"fill":await i.fill(e.selector,e.value,{timeout:s});const n=await this.buildResponse(t);return void(this.lastResponse=n);case"waitFor":try{e.options?.selector&&await i.waitForSelector(e.options.selector,{timeout:s}),e.options?.networkIdle&&await i.waitForLoadState("networkidle",{timeout:s})}catch(t){if(!1!==e.options?.failOnTimeout)throw t}return void(e.options?.ms&&await i.waitForTimeout(e.options.ms));case"submit":{const n=e.selector||"form",r=i.locator(n).first();if(0===await r.count())throw new G.NotFoundError(n,"submit");if("application/json"===(e.options?.enctype||"application/x-www-form-urlencoded")){const t=await r.elementHandle();if(!t)throw new G.CommonError(`submit: could not get form handle for ${n}`,"submit");const e=await t.evaluate(async t=>{const e=new FormData(t),i={};e.forEach((t,e)=>{i[e]=t.toString()});const s=await fetch(t.action,{method:t.method,headers:{"Content-Type":"application/json"},body:JSON.stringify(i)}),n=await s.text();return{status:s.status,statusText:s.statusText,headers:Object.fromEntries(s.headers.entries()),body:n,html:n,text:n,url:t.action,finalUrl:s.url}});return await t.dispose(),await i.setContent(e.html),void(this.lastResponse=e)}return await r.evaluate(t=>t.submit()),await i.waitForLoadState("networkidle",{timeout:s}),void(this.lastResponse=await this.buildResponse(t))}case"pause":{const t=this.ctx?.onPause;return void(t?(console.info(e.message||"Execution paused for manual intervention."),await t({message:e.message}),console.info("Resuming execution...")):console.warn("[PauseAction] was called, but no `onPause` handler was provided in fetchWeb options. Skipped."))}case"getContent":return this.buildResponse(t);default:throw new G.CommonError(`Unknown action type: ${e.type}`,"PlaywrightFetchEngine.executeAction",G.ErrorCode.NotSupported)}}_createCrawler(t,e){return new I.PlaywrightCrawler(t,e)}async _getSpecificCrawlerOptions(t){const e=t.browser?.headless??!0,i={maxRequestRetries:t.retries||3,headless:e,proxyConfiguration:this.proxyConfiguration,requestHandlerTimeoutSecs:t.requestHandlerTimeoutSecs,preNavigationHooks:[async({page:e,request:i},s)=>{s.throwHttpErrors=t.throwHttpErrors;const n=this.blockedTypes;n.size>0&&await e.route("**/*",t=>{n.has(t.request().resourceType())?t.abort():t.continue()})}]};if(this.opts?.antibot){i.browserPoolOptions={useFingerprints:!1};const{launchOptions:t}=await import("camoufox-js"),s=await t({headless:e});i.launchContext={launcher:z.firefox,launchOptions:s},i.postNavigationHooks=[async({page:t,handleCloudflareChallenge:e})=>{await e()}]}return i}async goto(t,e){if(this.isPageActive)return this.dispatchAction({type:"navigate",url:t,opts:e});if(!this.requestQueue)throw new G.CommonError("RequestQueue not initialized","goto");const i="req-"+ ++this.requestCounter,s=new Promise((t,e)=>{this.pendingRequests.set(i,{resolve:t,reject:e})});return await this.requestQueue.addRequest({url:t,headers:this.hdrs,userData:{requestId:i,waitUntil:e?.waitUntil||"domcontentloaded"},uniqueKey:`${t}-${i}`}),s}};W.id="playwright",W.mode="browser",$.register(W);var D=class extends w{async onExecute(t,e){const{selector:i,...s}=e?.params||{};if(!i)throw new Error("Selector is required for click action");await this.delegateToEngine(t,"click",i,s)}};D.id="click",D.returnType="none",D.capabilities={http:"simulate",browser:"native"},w.register(D);var J=class extends w{async onExecute(t,e){const{selector:i,value:s,...n}=e?.params||{};if(!i)throw new Error("Selector is required for fill action");if(void 0===s)throw new Error("Value is required for fill action");await this.delegateToEngine(t,"fill",i,s,n)}};J.id="fill",J.returnType="none",J.capabilities={http:"simulate",browser:"native"},w.register(J);var K=class extends w{async onExecute(t,e){return await this.delegateToEngine(t,"getContent",e?.params)}};K.id="getContent",K.returnType="response",K.capabilities={http:"native",browser:"native"},w.register(K);var V=class extends w{async onExecute(t,e,i){const s=e?.params,n=s?.url||t.url;if(!n)throw new Error("URL is required for goto action");const r=t.internal.engine;if(!r)throw new Error("No engine available");t.url=n;return await r.goto(n,s)}};V.id="goto",V.returnType="response",V.capabilities={http:"native",browser:"native"},w.register(V);var Q=class extends w{async onExecute(t,e){const{selector:i,...s}=e?.params||{};await this.delegateToEngine(t,"submit",i,s)}};Q.id="submit",Q.returnType="none",Q.capabilities={http:"simulate",browser:"native"},w.register(Q);var Z=class extends w{async onExecute(t,e){const i=t.internal.engine;if(!i)throw new Error("No engine available");await i.waitFor(e?.params)}};Z.id="waitFor",Z.returnType="none",Z.capabilities={http:"native",browser:"native"},w.register(Z);var X=class extends w{async onExecute(t,e){const i=e?.params;if(!i)throw new Error("Schema is required for extract action");return this.delegateToEngine(t,"extract",i)}};X.id="extract",X.returnType="any",X.capabilities={http:"native",browser:"native"},w.register(X);var Y=class extends w{async onExecute(t,e){const{selector:i,message:s,attribute:n}=e?.params||{},r=t.internal.engine;if("browser"===r?.mode){if(i){if(!await(r?.extract({selector:i,attribute:n})))return}r&&"pause"in r?await r.pause(s):console.warn("[PauseAction] was called, but the current engine does not support `pause`. Skipped.")}else console.warn("[PauseAction] can only run in browser engine. Skipped.")}};async function tt(t,e){return(new C).fetch(t,e)}Y.id="pause",Y.capabilities={http:"native",browser:"native"},Y.returnType="none",w.register(Y);
|
|
1
|
+
"use strict";var t,e=Object.create,i=Object.defineProperty,s=Object.getOwnPropertyDescriptor,n=Object.getOwnPropertyNames,r=Object.getPrototypeOf,o=Object.prototype.hasOwnProperty,a=(t,e,r,a)=>{if(e&&"object"==typeof e||"function"==typeof e)for(let c of n(e))o.call(t,c)||c===r||i(t,c,{get:()=>e[c],enumerable:!(a=s(e,c))||a.enumerable});return t},c={};((t,e)=>{for(var s in e)i(t,s,{get:e[s],enumerable:!0})})(c,{CheerioFetchEngine:()=>at,ClickAction:()=>dt,DefaultFetcherProperties:()=>l,EvaluateAction:()=>$t,ExtractAction:()=>xt,FetchAction:()=>d,FetchActionResultStatus:()=>h,FetchEngine:()=>D,FetchSession:()=>z,FetcherOptionKeys:()=>u,FillAction:()=>wt,GetContentAction:()=>yt,GotoAction:()=>pt,KeyboardPressAction:()=>Et,KeyboardTypeAction:()=>qt,MouseClickAction:()=>kt,MouseMoveAction:()=>_t,PauseAction:()=>bt,PlaywrightFetchEngine:()=>ft,SubmitAction:()=>mt,TRIM_PRESETS:()=>V,TrimAction:()=>vt,WaitForAction:()=>gt,WebFetcher:()=>K,fetchWeb:()=>St}),module.exports=(t=c,a(i({},"__esModule",{value:!0}),t));var l={engine:"auto",enableSmart:!0,useSiteRegistry:!0,antibot:!1,debug:!1,headers:{},cookies:[],throwHttpErrors:void 0,output:{cookies:!0,sessionState:!0},proxy:[],blockResources:[],storage:{purge:!0},ignoreSslErrors:!0,browser:{engine:"playwright",headless:!0,waitUntil:"domcontentloaded"},http:{method:"GET"},timeoutMs:6e4,requestHandlerTimeoutSecs:void 0,maxConcurrency:1,maxRequestsPerMinute:1e3,delayBetweenRequestsMs:0,retries:0,sites:[]},u=Object.keys(l).concat(["actions","onPause"]),h=(t=>(t[t.Failed=0]="Failed",t[t.Success=1]="Success",t[t.Skipped=2]="Skipped",t))(h||{}),f=class t{static register(t){const e=t.id;if(!e)throw new Error("FetchAction.register: actionClass.id is required");this.registry.set(e,t)}static get(t){return this.registry.get(t)}static create(e){const i="string"==typeof e?e:e.id||e.name||e.action;if(!i)throw new Error("Action must have id, name or action");const s=i instanceof t?i.constructor:this.registry.get(i);return s?new s:void 0}static has(t){return this.registry.has(t)}static list(){return Array.from(this.registry.keys())}static getCapability(t){return this.capabilities[t]??"noop"}getCapability(t){return this.constructor.getCapability(t)}get id(){return this.constructor.id}get returnType(){return this.constructor.returnType}get capabilities(){return this.constructor.capabilities}async delegateToEngine(t,e,...i){const s=t.internal.engine;if(!s)throw new Error("No engine available");if("function"!=typeof s[e])throw new Error(`Engine does not have a method named '${String(e)}'`);return await s[e](...i)}installCollectors(e,i){const s=i?.collectors;if(!s?.length)return;const n=[],r=new Set;for(const i of s){const s=w(i.activateOn),o=w(i.collectOn),a=w(i.deactivateOn),c=!(i.background??!0),l=t.create(i);if(!l)continue;let u=!1,h=!1,f=0;const d=async t=>{if(!u&&!h){u=!0;try{await(l.onBeforeExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:l.id,phase:"before",error:t})}}},m=async(t,s)=>{if(!h){u||await d(s);try{const n=Promise.resolve(l.onExecute?.(e,i,s)).then(s=>{var n,r;if(i.storeAs){((n=e.outputs)[r=i.storeAs]||(n[r]=[])).push(s)}return e.eventBus.emit("collector:result",{action:this.id,collector:i.id||i.name,event:t,result:s}),s}).catch(s=>{e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,event:t,phase:"exec",error:s})}).finally(()=>{f++});c&&(r.add(n),n.finally(()=>r.delete(n)))}catch(i){e.eventBus.emit("collector:error",{action:this.id,collector:l.id,event:t,phase:"exec",error:i})}}},g=async()=>{if(!h){0===f&&m("collector:after"),h=!0;try{await(l.onAfterExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,phase:"after",error:t})}finally{e.eventBus.emit("collector:end",{action:this.id,collector:i.id||i.name}),b.forEach(t=>t())}}},x=y(e,s,d),b=p(e,o,m),v=y(e,a,g);if(n.push(...x,...b,...v),!s.length&&!o.length&&!a.length){const t=()=>{g()};e.eventBus.once(`action:${this.id}.end`,t),n.push(()=>e.eventBus.off("fetcher:action:end",t))}}return n.length||r.size>0?{cleanup:()=>n.forEach(t=>t()),awaitExecPendings:async()=>{r.size>0&&await Promise.allSettled(Array.from(r))}}:void 0}async beforeExec(t,e){t.internal.actionStack||(t.internal.actionStack=[]);const i=t.internal.actionStack,s=i.length,n=i.length>0?i[i.length-1].id:void 0,r={...e,id:this.id,depth:s,parent:n};i.push(r),t.currentAction=r;const o={action:this,context:t,options:e,index:e?.index,depth:s,stack:[...i]};t.eventBus.emit(`action:${this.id}.start`,o),t.eventBus.emit("action:start",o),await(this.onBeforeExec?.(t,e));return{entry:o,collectors:this.installCollectors(t,e)}}async afterExec(t,e,i,s){const n=t.internal.actionStack,r=n.length-1,o=s?.collectors;try{if(await(o?.awaitExecPendings()),t.lastResult=i,"response"!==i?.returnType||i.error||(t.lastResponse=i.result),e?.storeAs){const s=t.outputs[e.storeAs],n=i?.result;"object"!=typeof s||null===s||"object"!=typeof n||null===n||Array.isArray(s)||Array.isArray(n)?t.outputs[e.storeAs]=n:t.outputs[e.storeAs]={...s,...n}}i?.error&&(t.currentAction.error=i.error),await(this.onAfterExec?.(t,e));const s={action:this,context:t,options:e,result:i,depth:r,stack:[...n]};i?.error&&(s.error=i.error);try{t.eventBus.emit(`action:${this.id}.end`,s)}catch(t){}try{t.eventBus.emit("action:end",s)}catch(t){}}finally{try{o?.cleanup()}finally{n.pop();const e=n.length;t.currentAction=e>0?n[e-1]:void 0}}}async execute(t,e){e?.args&&!e.params&&(e.params=e.args);const i=await this.beforeExec(t,e),s=e?.failOnError??!0;let n;try{return t.throwHttpErrors=s,n=await this.onExecute(t,e),n&&n.returnType||(n={status:1,returnType:this.returnType??"any",result:n}),n}catch(e){if(n={status:0,error:e,meta:{id:this.id,engineType:t.engine,capability:this.getCapability(t.engine)}},s)throw e;return n}finally{await this.afterExec(t,e,n,i)}}};f.registry=new Map,f.returnType="any",f.capabilities={http:"noop",browser:"noop"};var d=f;function w(t){return t?Array.isArray(t)?t:[t]:[]}function y(t,e,i){const s=[];for(const n of e)if("string"==typeof n||n instanceof RegExp){const e=(...t)=>{i(t[0])};t.eventBus.once(n,e),s.push(()=>t.eventBus.off(n,e))}return s}function p(t,e,i){const s=[];for(const n of e)if("string"==typeof n||n instanceof RegExp){const e=t=>i(n,t);t.eventBus.on(n,e),s.push(()=>t.eventBus.off(n,e))}return s}var m=require("events-ex");var g=require("lodash-es"),x=(0,require("nanoid").customAlphabet)("0123456789abcdefghijklmnopqrstuvwxyz",12);function b(t,e,...i){if(!t)return;const{prefix:s,id:n,category:r}=e;if(!0===t||t===r||Array.isArray(t)&&t.includes(r)){const t=n?`:${n}`:"";console.log(`[${s}${t}:${r}]`,...i)}}var v=require("lodash-es"),$=require("events-ex"),_=require("@isdk/common-error"),k=require("crawlee"),q=require("@isdk/common-error"),E=new Set(["string","number","boolean","html","object","array"]),S=new Set(["selector","has","exclude","required","strict","relativeTo","order","anchor","depth"]);function C(t){if("string"==typeof t)return{type:"string",selector:t,mode:"text"};if(!t||"object"!=typeof t)return{type:"string",mode:"text"};let e={...t};if(function(t){if(!t||"object"!=typeof t)return!1;if(Array.isArray(t))return!1;if("type"in t)return"string"!=typeof t.type||!E.has(t.type);const e=Object.keys(t);if(0===e.length)return!1;for(const t of e)if(!["selector","attribute","has","exclude","mode","required","strict","relativeTo","order","anchor","depth"].includes(t))return!0;return!1}(e)){const t={};for(const i of Object.keys(e))S.has(i)||(t[i]=C(e[i]),delete e[i]);e.type="object",e.properties=t}if(e.type||(e.type="string"),"object"===e.type){const t=e;t.properties||(t.properties={});for(const e in t.properties)t.properties[e]=C(t.properties[e]);delete t.mode,delete t.items,delete t.attribute}else if("array"===e.type){const t=e;t.attribute&&!t.items&&(t.items={type:"string",attribute:t.attribute,mode:"text"},delete t.attribute),t.items||(t.items={type:"string",mode:"text"}),t.items=C(t.items),"string"==typeof t.mode&&(t.mode={type:t.mode})}else{const t=e;t.mode||("html"===t.type?t.mode="html":t.mode="text")}if(e.selector&&(e.has||e.exclude)){const{selector:t,has:i,exclude:s}=e,n=t.split(",").map(t=>t.trim()).map(t=>{let e=t;return i&&(e+=`:has(${i})`),s&&(e+=`:not(${s})`),e});e.selector=n.join(", "),delete e.has,delete e.exclude}return e}async function A(t,e,i){const s=C(t);return R.call(this,s,e,i)}async function R(t,e,i){const s=t.type,n=t.selector,r=t.strict??i;if(!e)return this._logDebug("extract",`_extract: No scope for selector "${n||""}", type "${s||"value"}"`),"array"===s?[]:null;switch(s){case"object":return T.call(this,t,e,r);case"array":return P.call(this,t,e,r);default:return O.call(this,t,e,r)}}async function T(t,e,i){const{selector:s,properties:n,strict:r}=t,o=r??i,a=t._skipSelector;let c=e;if(s&&!a){const t=await this._querySelectorAll(e,s);c=t.length>0?t[0]:null,this._logDebug("extract",`_extractObject: selector "${s}" found ${t.length} elements`)}if(!c){if(this._logDebug("extract",`_extractObject: scope not found for selector "${s||""}"`),o&&t.required)throw new q.CommonError(`Required object "${s||""}" is missing.`,"extract");return null}let l=t.depth??0;const u=l;for(;;){const{result:i,hasValue:r,missingRequired:h}=await j.call(this,t,c,o);if(0===h.length)return!s&&!r&&Object.keys(n).length>0?(this._logDebug("extract","_extractObject result: null"),null):(this._logDebug("extract","_extractObject result:",i),i);let f=!1;if(l>0)if(a)f=!0;else{const t=await this._isSameElement(c,e),i=await this._contains(e,c);f=!t&&i}if(f){const t=await this._parentElement(c);if(t){let i=!0;if(a||(i=await this._isSameElement(e,t)||await this._contains(e,t)),i){this._logDebug("extract",`_extractObject: missing required fields [${h.join(", ")}], bubbling up from depth ${u-l} to ${u-l+1}`),c=t,l--;continue}}}if(o)throw new q.CommonError(`Required property "${h[0]}" is missing.`,"extract");return null}}async function j(t,e,i){const{properties:s,relativeTo:n,order:r}=t,o={},a=[];let c=!1;const l=r||Object.keys(s);let u=e;const h=new Map,f="previous"===n;for(const t of l){const n=s[t];if(!n)continue;this._logDebug("extract",`_extractObject: extracting property "${t}"`);let r,l=u;if(n.anchor){const r=await L.call(this,n.anchor,s,h,e,f,n.depth);if(!r){if(i)throw new q.CommonError(`Anchor "${n.anchor}" not found or out of scope.`,"extract");o[t]=null,n.required&&a.push(t);continue}l=r.scopeForField,f&&(u=l)}let d=null;const w=n.selector,y="array"===n.type;if(w){let t=await this._querySelectorAll(l,w);if(t.length>0){void 0!==n.depth&&"object"!==n.type&&(t=await Promise.all(t.map(t=>H.call(this,t,l,n.depth)))),d=t[0];const e={...n,_skipSelector:!0};if(r=await R.call(this,e,y?t:d,i),f&&!n.anchor){const e=y&&Array.isArray(r)?t[t.length-1]:d;u=await I.call(this,e,u)}y&&(d=t[t.length-1])}else r=null}else r=await R.call(this,n,l,i),null!==r&&(d=Array.isArray(l)?l[0]:l);d&&h.set(t,d),null===r&&n.required&&a.push(t),null!==r&&(c=!0),o[t]=r}return{result:o,hasValue:c,missingRequired:a}}async function P(t,e,i){const{selector:s,items:n,mode:r,strict:o}=t,a=o??i,c=t._skipSelector;let l=s&&!c?await this._querySelectorAll(e,s):Array.isArray(e)?e:[e];s&&!c&&void 0!==t.depth&&(l=await Promise.all(l.map(i=>H.call(this,i,e,t.depth)))),this._logDebug("extract",`_extractArray: selector "${s||""}" found ${l.length} elements`);const u=M.call(this,r);if(void 0!==a&&void 0===u.strict&&(u.strict=a),(!r||"columnar"===u.type)&&1===l.length&&n){this._logDebug("extract","_extractArray: trying columnar extraction");const t=await N.call(this,n,l[0],u);if(t)return t}if("segmented"===u.type&&n){this._logDebug("extract",`_extractArray: trying segmented extraction for ${l.length} containers`);const t=[];let e=!1;for(const i of l){const s=await U.call(this,n,i,u);s&&(e=!0,t.push(...s))}if(e)return t}return this._logDebug("extract",`_extractArray: using nested extraction for ${l.length} elements`),F.call(this,n,l,{strict:u.strict})}async function O(t,e,i){const{selector:s}=t,n=t._skipSelector,r=t.strict??i;let o=e;if(s&&!n){const i=await this._querySelectorAll(e,s);o=i.length>0?i[0]:null,o&&void 0!==t.depth&&(o=await H.call(this,o,e,t.depth)),this._logDebug("extract",`_extractValue: selector "${s}" found ${i.length} elements`)}else Array.isArray(e)&&(o=e.length>0?e[0]:null);if(!o){if(this._logDebug("extract",`_extractValue: element not found for selector "${s||""}"`),r&&t.required)throw new q.CommonError(`Required value "${s||""}" is missing.`,"extract");return null}const a=await this._extractValue(t,o);return this._logDebug("extract",`_extractValue: extracted for selector "${s||""}":`,a),a}function M(t){return t?"string"==typeof t?{type:t}:t:{type:"nested"}}async function F(t,e,i){const s=[],n=t.required,r=!0===i?.strict,o="object"===t.type||"array"===t.type;for(const i of e){const e=await R.call(this,t,i,r);if(null!==e)s.push(e);else{if(n&&r)throw new q.CommonError("Required item is missing in array.","extract");n||o||s.push(null)}}return s}async function N(t,e,i){const s="object"===t.type,n=!0===i?.strict,r=!0===i?.inference;if(s){const i=t.properties,s=Object.keys(i);if(0===s.length)return null;const o={},a={};let c=null,l=0,u=[];for(const t of s){const s=i[t];if("array"===s.type||"object"===s.type)return this._logDebug("extract",`_extractColumnar: field "${t}" has nested structure, columnar not supported`),null;const h=s;let f=[];f=h.selector?await this._querySelectorAll(e,h.selector):[e],a[t]=f;const d=f.length;if(this._logDebug("extract",`_extractColumnar: field "${t}" with selector "${h.selector||""}" found ${d} matches`),d>l&&(l=d,u=f),h.selector)if(null===c)c=d,this._logDebug("extract",`_extractColumnar: set commonCount to ${c}`);else if(c!==d)if(this._logDebug("extract",`_extractColumnar: count mismatch for field "${t}": ${d} vs ${c}`),r&&l>1)c=-1,this._logDebug("extract","_extractColumnar: mismatch marked for inference");else if(n){if(!(1===d&&await this._isSameElement(f[0],e))){if(s.required&&d<c)throw new q.CommonError(`Required field "${t}" is missing at index ${d}.`,"extract");throw new q.CommonError(`Columnar extraction mismatch: field "${t}" has ${d} matches, but expected ${c}.`,"extract")}}const w=await Promise.all(f.map(t=>this._extractValue(h,t)));this._logDebug("extract",`_extractColumnar: field "${t}" values:`,w),o[t]=w}if(r&&-1===c&&l>1&&u.length>0){const i=[];for(const t of u){const s=await this._findContainerChild(t,e);s&&i.push(s)}const s=[];for(const t of i){await this._findClosestAncestor(t,s)||s.push(t)}if(s.length>1)return F.call(this,t,s,{strict:n})}if(l<=1)return null;if(-1===c&&n)return null;const h=n&&-1!==c?c:l,f={};if(h>1)for(const t of s){if(1===o[t].length){(!i[t].selector||await this._isSameElement(a[t][0],e))&&(f[t]=!0)}}const d=[];for(let t=0;t<h;t++){const e={};let r=!1;for(const a of s){const s=o[a],c=i[a];let l=s[t];if(f[a]&&(l=s[0]),void 0===l&&(l=null),null===l&&c.required){if(this._logDebug("extract",`_extractColumnar: skipping row ${t} because required field "${a}" is null`),n)throw new q.CommonError(`Required field "${a}" is missing at index ${t}.`,"extract");r=!0;break}e[a]=l}r||d.push(e)}return d}{const i=t;if(!i.selector)return null;const s=await this._querySelectorAll(e,i.selector);if(s.length<=1)return null;const n=await Promise.all(s.map(t=>this._extractValue(i,t)));return i.required?n.filter(t=>null!==t):n}}async function U(t,e,i){if(!("object"===t.type))return null;const s=t.properties,n=Object.keys(s);if(0===n.length)return null;let r;if(i?.anchor)r=s[i.anchor]?.selector||i.anchor;else for(const t of n)if(s[t].selector){r=s[t].selector;break}if(!r)return this._logDebug("extract","_extractSegmented: no anchor selector found, falling back to nested"),null;const o=await this._querySelectorAll(e,r);if(this._logDebug("extract",`_extractSegmented: anchor selector "${r}" found ${o.length} elements`),0===o.length){if(i?.strict)throw new q.CommonError(`Segmented extraction failed: no elements found for anchor selector "${r}".`,"extract");return[]}const a=[];for(let s=0;s<o.length;s++){const n=o[s],c=s>0?o[s-1]:null,l=s<o.length-1?o[s+1]:null;let u,h=n,f=null;if(c&&(f=await this._findCommonAncestor(n,c)),!f&&l)f=await this._findCommonAncestor(n,l);else if(f&&l){const t=await this._findCommonAncestor(n,l);t&&await this._contains(f,t)&&(f=t)}if(f){const t=await H.call(this,n,f,i?.depth);t&&!await this._isSameElement(t,n)&&(h=t)}else{const t=await H.call(this,n,e,i?.depth);t&&(h=t)}if(await this._isSameElement(h,n)){u=[n,...await this._nextSiblingsUntil(n,r)],this._logDebug("extract",`_extractSegmented: segment ${s} (flat) created with ${u.length} elements`)}else u=h,this._logDebug("extract",`_extractSegmented: segment ${s} (nested) identified as container element`);const d={...t};i?.relativeTo&&!d.relativeTo&&(d.relativeTo=i.relativeTo);const w=await R.call(this,d,u,i?.strict),y=t.required,p="object"===t.type||"array"===t.type;if(null!==w)a.push(w);else{if(y&&i?.strict)throw new q.CommonError("Required item is missing in array.","extract");y||p||a.push(null)}}return a}async function H(t,e,i){const s=Array.isArray(e),n=s?e:[e],r=s?await this._findClosestAncestor(t,n):await this._findContainerChild(t,e);if(void 0===i||!r)return r;let o=t;for(let t=0;t<i&&!await this._isSameElement(o,r);t++){const t=await this._parentElement(o);if(!t||!await this._contains(r,t))break;o=t}return o}async function L(t,e,i,s,n,r){let o=null;if(e.hasOwnProperty(t))o=i.get(t)||null;else{const e=await this._querySelectorAll(s,t);e.length>0&&(o=e[0])}if(o){const t=[];let e=o,i=0;const n=void 0!==r?r:1e3;for(;e&&i<=n;){const n=await this._nextSiblingsUntil(e);t.push(...n);const r=await this._parentElement(e);if(!r)break;if(Array.isArray(s)?null!==await this._findClosestAncestor(r,s):await this._isSameElement(r,s))break;e=r,i++}if(t.length>0||void 0!==r)return{scopeForField:t}}return null}async function I(t,e){const i=await H.call(this,t,e);if(i){if(!Array.isArray(e))return this._nextSiblingsUntil(i);{let t=e.indexOf(i);if(-1===t)for(let s=0;s<e.length;s++)if(await this._isSameElement(e[s],i)){t=s;break}if(-1!==t)return e.slice(t+1)}}return Array.isArray(e)?e:[e]}function B(){let t=()=>{};const e=new Promise(e=>{t=e});return e.release=t,e}k.Configuration.getGlobalConfig().set("persistStorage",!1);var V={scripts:["script"],styles:["style",'link[rel="stylesheet"]'],svgs:["svg"],images:["img","picture","canvas"],hidden:["[hidden]",'[style*="display:none"]','[style*="display: none"]']},D=class{constructor(){this.hdrs={},this._initializedSessions=new Set,this.pendingRequests=new Map,this.requestCounter=0,this.actionEmitter=new $.EventEmitter,this.isPageActive=!1,this.isEngineDisposed=!1,this.navigationLock=function(){const t=B();return t.release(),t}(),this.isExecutingAction=!1,this.actionQueue=[],this.isProcessingActionLoop=!1,this.blockedTypes=new Set}static register(t){const e=t.id;if(!e)throw new Error("Engine must define static id");if(this.registry.has(e))throw new Error(`Engine id duplicated: ${e}`);this.registry.set(e,t)}static get(t){return this.registry.get(t)}static getByMode(t){for(const[e,i]of this.registry.entries())if(i.mode===t)return i}static async create(t,e){const i=(0,v.defaultsDeep)(e,t,l),s=i.engine??t.engine,n=s?this.get(s)??this.getByMode(s):null;if(n){const e=new n;return await e.initialize(t,i),e}}_logDebug(t,...e){b(this.opts?.debug,{prefix:"FetchEngine",id:this.id,category:t},...e)}_getTrimInfo(t){let{selectors:e=[],presets:i=[]}=t;"string"==typeof e&&(e=[e]),"string"==typeof i&&(i=[i]);const s=i.includes("all"),n=[...e];for(const[t,e]of Object.entries(V))(s||i.includes(t))&&n.push(...e);return{selectors:n,removeComments:s||i.includes("comments"),removeHidden:s||i.includes("hidden")}}async _extract(t,e,i){return R.call(this,t,e,i)}_normalizeArrayMode(t){return M.call(this,t)}async _extractNested(t,e,i){return F.call(this,t,e,i)}async _extractColumnar(t,e,i){return N.call(this,t,e,i)}async _extractSegmented(t,e,i){return U.call(this,t,e,i)}async buildResponse(t){const e=await this._buildResponse(t),i=e.headers["content-type"]||"";return e.contentType=i.split(";")[0].trim(),!1!==this.opts?.output?.cookies?!e.cookies&&t.session&&(e.cookies=t.session.getCookies(t.request.url)):delete e.cookies,!1!==this.opts?.output?.sessionState?this.crawler?.sessionPool&&(e.sessionState=await this.crawler.sessionPool.getState()):delete e.sessionState,this.opts?.debug&&(e.metadata={...e.metadata,mode:this.mode,engine:this.id,proxy:t.proxyInfo?.url||("string"==typeof this.opts.proxy?this.opts.proxy:Array.isArray(this.opts.proxy)?this.opts.proxy[0]:void 0)}),e}waitFor(t){return this.dispatchAction({type:"waitFor",options:t})}click(t){return this.dispatchAction({type:"click",selector:t})}mouseMove(t){return this.dispatchAction({type:"mouseMove",params:t})}mouseClick(t){return this.dispatchAction({type:"mouseClick",params:t})}keyboardType(t,e){return this.dispatchAction({type:"keyboardType",params:{text:t,delay:e}})}keyboardPress(t,e){return this.dispatchAction({type:"keyboardPress",params:{key:t,delay:e}})}fill(t,e){return this.dispatchAction({type:"fill",selector:t,value:e})}submit(t,e){return this.dispatchAction({type:"submit",selector:t,options:e})}trim(t){return this.dispatchAction({type:"trim",options:t})}pause(t){return this.dispatchAction({type:"pause",message:t})}evaluate(t){return this.dispatchAction({type:"evaluate",params:t})}extract(t){t&&"object"==typeof t&&t.schema&&(t=t.schema);const e=C(t);return this.dispatchAction({type:"extract",schema:e})}get id(){return this.constructor.id}async getState(){return{cookies:await this.cookies(),sessionState:await(this.crawler?.sessionPool?.getState())}}get mode(){return this.constructor.mode}get context(){return this.ctx}async initialize(t,e){if(this.ctx)return;(0,v.merge)(t,e),this.ctx=t,this.opts=t,this.hdrs=function(t){const e={};if(t&&"object"==typeof t)for(const[i,s]of Object.entries(t))e[i.toLowerCase()]=s;return e}(t.headers),this._initialCookies=[...t.cookies??[]],t.internal||(t.internal={}),t.internal.engine=this,t.engine=this.mode,this.actionEmitter.setMaxListeners(100);const i=t.storage||{},s=i.persist??!1,n=this.config=new k.Configuration({persistStorage:s,storageClientOptions:{persistStorage:s,...i.config},...i.config}),r=i.id||t.id;this.requestQueue=await k.RequestQueue.open(r,{config:n});const o=this.opts?.proxy?"string"==typeof this.opts.proxy?[this.opts.proxy]:this.opts.proxy:void 0;o?.length&&(this.proxyConfiguration=new k.ProxyConfiguration({proxyUrls:o}));const a=await this._getSpecificCrawlerOptions(t),c=(0,v.defaultsDeep)({persistenceOptions:{enable:!0,storeId:r},persistStateKeyValueStoreId:r},t.sessionPoolOptions,{maxPoolSize:1,sessionOptions:{maxUsageCount:1e3,maxErrorScore:3}});t.sessionState&&t.cookies&&t.cookies.length>0&&console.warn('[FetchEngine] Warning: Both "sessionState" and "cookies" are provided. Explicit "cookies" will override any conflicting cookies restored from "sessionState".');const l={...(0,v.defaultsDeep)(a,{requestQueue:this.requestQueue,maxConcurrency:1,minConcurrency:1,useSessionPool:!0,persistCookiesPerSession:!0,sessionPoolOptions:c}),requestHandler:this._requestHandler.bind(this),errorHandler:this._failedRequestHandler.bind(this),failedRequestHandler:this._failedRequestHandler.bind(this)};l.preNavigationHooks||(l.preNavigationHooks=[]),l.preNavigationHooks.unshift(({crawler:t,session:e,request:i},s)=>{if(this.currentSession=e,e&&!this._initializedSessions.has(e.id)){if(this._initialCookies&&this._initialCookies.length>0){const t=this._initialCookies.map(t=>{const e={...t};return"no_restriction"===e.sameSite&&(e.sameSite="None"),e});e.setCookies(t,i.url)}this._initializedSessions.add(e.id)}});const u=this.crawler=this._createCrawler(l,n),h=this.kvStore=await k.KeyValueStore.open(r,{config:n}),f=await h.getValue(k.PERSIST_STATE_KEY);!t.sessionState||f&&!t.overrideSessionState||await h.setValue(k.PERSIST_STATE_KEY,t.sessionState),this.isCrawlerReady=!0,this.crawlerRunPromise=u.run(),this.crawlerRunPromise.finally(()=>{this.isCrawlerReady=!1}).catch(t=>{console.error("Crawler background error:",t)})}async cleanup(){await(this._cleanup?.()),await this._commonCleanup();const t=this.ctx;t&&t.internal?.engine===this&&(t.internal.engine=void 0),this.ctx=void 0,this.opts=void 0}async _processAction(t,e){switch(this._logDebug(e.type,"Executing action:",e),e.type){case"extract":return A.call(this,e.schema,this._getInitialElementScope(t));case"pause":return this._handlePause(e);case"getContent":return this.buildResponse(t);case"waitFor":return e.options?.ms&&1===Object.keys(e.options).length?void await new Promise(t=>setTimeout(t,e.options.ms)):this.executeAction(t,e);default:return this.executeAction(t,e)}}async _handlePause(t){const e=this.ctx?.onPause;e?(console.info(t.message||"Execution paused for manual intervention."),await e({message:t.message}),console.info("Resuming execution...")):console.warn("[PauseAction] was called, but no `onPause` handler was provided in fetchWeb options. Skipped.")}async _executePendingActions(t){if(this.isEngineDisposed)return;this.activeContext=t;const e=async()=>{if(!this.isProcessingActionLoop){this.isProcessingActionLoop=!0,this._logDebug("action-loop",`Action loop started. Current queue size: ${this.actionQueue.length}`);try{for(;this.actionQueue.length>0&&this.isPageActive&&!this.isEngineDisposed;){const e=this.actionQueue.shift();this._logDebug("action-loop",`Processing action: ${e.action.type}`,e.action);try{if("dispose"===e.action.type){this.actionEmitter.emit("dispose"),e.resolve();continue}this.isExecutingAction=!0;const i=await this._processAction(t,e.action);this._logDebug("action-loop",`Action completed: ${e.action.type}`),e.resolve(i)}catch(t){this._logDebug("action-loop",`Action failed: ${e.action.type}`,t),e.reject(t)}finally{this.isExecutingAction=!1,await new Promise(t=>setImmediate(t))}}}finally{this.isProcessingActionLoop=!1,this._logDebug("action-loop","Action loop paused/finished.")}}};await new Promise(t=>{const i=t=>{this.actionQueue.push(t),e()},s=()=>{this.actionEmitter.removeListener("dispatch",i),this.activeContext=void 0,t()};this.actionEmitter.on("dispatch",i),this.actionEmitter.once("dispose",s),e(),this.isEngineDisposed&&(s(),this.actionEmitter.removeListener("dispose",s))})}async _sharedRequestHandler(t){const{request:e}=t;this._logDebug("request",`Processing request: ${e.url}`);try{this.currentSession=t.session,this.isPageActive=!0;const i=this.pendingRequests.get(e.userData.requestId);if(i){const s=await this.buildResponse(t),n=!s.statusCode||s.statusCode>=400;if(this.ctx?.throwHttpErrors&&n){const t=new _.CommonError(`Request for ${s.finalUrl} failed with status ${s.statusCode||"N/A"}`,"request",s.statusCode);i.reject(t)}else this.lastResponse=s,i.resolve(s);this.pendingRequests.delete(e.userData.requestId)}await this._executePendingActions(t)}finally{if(this.currentSession){const t=this.currentSession.getCookies(e.url);t&&(this._initialCookies=t)}this.isPageActive=!1,this.navigationLock.release()}}async _sharedFailedRequestHandler(t,e){const{request:i}=t,s=this.pendingRequests.get(i.userData.requestId);if(s&&e&&this.ctx?.throwHttpErrors){this.pendingRequests.delete(i.userData.requestId);const t=e.response,n=t?.statusCode||500,r=t?.url?t.url:i.url,o=new _.CommonError(`Request${r?" for "+r:""} failed: ${e.message}`,"request",n);s.reject(o)}return this._sharedRequestHandler(t)}async dispatchAction(t){if(!this.isPageActive)throw new Error("No active page. Call goto() before performing actions.");return this.isExecutingAction&&this.activeContext?(this._logDebug(t.type,"Re-entrant action execution:",t),await this._processAction(this.activeContext,t)):new Promise((e,i)=>{this.actionEmitter.emit("dispatch",{action:t,resolve:e,reject:i})})}async _requestHandler(t){await this._sharedRequestHandler(t)}async _failedRequestHandler(t,e){await this._sharedFailedRequestHandler(t,e)}async _commonCleanup(){if(this.isEngineDisposed=!0,this._initializedSessions.clear(),this.actionEmitter.emit("dispose"),this.navigationLock?.release(),this.pendingRequests.size>0){for(const[,t]of this.pendingRequests)t.reject(new Error("Cleanup:Request cancelled"));this.pendingRequests.clear()}if(this.crawler){try{await(this.crawler.teardown?.())}catch(t){console.error("crawler teardown error:",t)}this.crawler=void 0}this.crawlerRunPromise=void 0,this.isCrawlerReady=void 0;const t=(this.opts?.storage||{}).purge??!0;this.requestQueue&&(t&&await this.requestQueue.drop().catch(t=>console.error("Error dropping requestQueue:",t)),this.requestQueue=void 0),this.kvStore&&(t&&await this.kvStore.drop().catch(t=>console.error("Error dropping kvStore:",t)),this.kvStore=void 0),this.actionEmitter.removeAllListeners(),this.pendingRequests.clear(),this.actionQueue=[],this.config=void 0}async blockResources(t,e){return e&&this.blockedTypes.clear(),t.forEach(t=>this.blockedTypes.add(t)),t.length}getContent(){return this.lastResponse?Promise.resolve(this.lastResponse):Promise.reject(new Error("No content fetched yet. Call goto() first."))}async headers(t,e){if(void 0===t)return{...this.hdrs};if("string"==typeof t&&void 0===e)return this.hdrs[t.toLowerCase()]||"";if(null!==t&&"object"==typeof t){const i={};for(const[e,s]of Object.entries(t))i[e.toLowerCase()]=String(s);return this.hdrs=!0===e?i:{...this.hdrs,...i},!0}return"string"==typeof t&&("string"==typeof e?this.hdrs[t.toLowerCase()]=e:null===e&&delete this.hdrs[t.toLowerCase()],!0)}async cookies(t){const e=this.lastResponse?.url||"";if(Array.isArray(t))return this.currentSession?this.currentSession.setCookies(t,e):this._initialCookies=[...t],!0;if(null===t)return this.currentSession,this._initialCookies=[],!0;if(this.currentSession){return this.currentSession.getCookies(e)}return[...this._initialCookies||[]]}async dispose(){await this.cleanup()}};async function G(t,e){let i;const s=e?.engine||t.engine;if(s&&"auto"!==s){if(i=await D.create(t,{engine:s}),!i)throw new Error(`Engine "${s}" is not available or failed to initialize.`);return i}const n=function(t,e){if(!t||!e?.length)return null;const i=new URL(t);let s=e.find(t=>t.domain===i.hostname);s||(s=e.find(t=>i.hostname.endsWith(t.domain)));if(!s)return null;if(s.pathScope?.length){if(!s.pathScope.some(t=>i.pathname.startsWith(t)))return null}return s}(e?.url||t.url,t.sites);if(n?.engine&&"auto"!==n.engine&&(i=await D.create(t,{engine:n.engine}),i))return i;if(i=await D.create(t,{engine:"http"}),!i)throw new Error("Failed to create default http engine");return i}D.registry=new Map;var z=class{constructor(t={}){this.options=t,this.closed=!1,this.id=x(),this.context=this.createContext(t)}_logDebug(t,...e){b(this.context.debug,{prefix:"FetchSession",id:this.id.slice(0,8),category:t},...e)}async execute(t,e=this.context){const i=t.id||t.name||t.action;this._logDebug("execute",`Executing action: ${i}`,t.params);const s=t.index??(e.internal.actionIndex||0);e.internal.actionIndex=s+1,await this.ensureEngine(t,e);const n=d.create(t);if(!n)throw new Error(`Unknown action: ${t.id||t.name}`);const r={...t,index:s};let o,a;e.currentAction={...r,startedAt:Date.now()};try{return o=await n.execute(e,r),o}catch(t){throw a=t,a}finally{e.currentAction=void 0}}async executeAll(t,e){this._logDebug("executeAll",`Total actions: ${t.length}`,t.map(t=>t.id||t.name||t.action));const i=e?{...this.context,...e,id:this.context.id,eventBus:this.context.eventBus,outputs:this.context.outputs,execute:this.context.execute,action:this.context.action}:this.context;let s=e?.index??0;try{for(;s<t.length;){const e=t[s];await this.execute({...e,index:s},i),s++}const e=await this.execute({id:"getContent",index:s},i);return{result:e?.result,outputs:this.getOutputs()}}catch(t){throw t.actionIndex=s,t}}getOutputs(){return this.context.outputs}async getState(){return this.context.internal.engine?.getState()}async dispose(){if(this.closed)return;const t=this.context.eventBus;t.emit("session:closing",{sessionId:this.id});try{await(this.context.internal.engine?.dispose())}finally{this.closed=!0}t.emit("session:closed",{sessionId:this.id})}async ensureEngine(t,e){if(this.closed)throw new Error("Session is closed");if(!e.internal.engine){const i=t?.params?.url??e.url,s=await G(e,{url:i});if(!s)throw new Error("No engine found");e.internal.engine=s}}createContext(t=this.options){const e=new m.EventEmitter;return(0,g.defaultsDeep)({...t,id:this.id,eventBus:e,outputs:{},internal:{},execute:async t=>this.execute(t),action:async function(t,e,i){return this.execute({name:t,params:e,...i})}},l)}},K=class{constructor(t={}){this.defaults=t}async createSession(t){const e={...this.defaults,...t||{}};return new z(e)}async fetch(t,e){"string"!=typeof t&&(t=(e=t).url);const i=await this.createSession(e);try{const s=e?.actions||[];t&&0!==s.findIndex(e=>("goto"===e.id||"goto"===e.name)&&e.params?.url===t)&&s.unshift({id:"goto",params:{url:t}});return await i.executeAll(s)}finally{await i.dispose()}}},J=require("crawlee"),W=((t,s,n)=>(n=null!=t?e(r(t)):{},a(!s&&t&&t.__esModule?n:i(n,"default",{value:t,enumerable:!0}),t)))(require("cheerio")),Q=require("util-ex"),Z=require("@isdk/common-error"),X="___BR___",Y="___BLOCK___",tt="___P___",et=/\s+/g,it=new RegExp(` *(${X}|${Y}|${tt}) *`,"g"),st=new RegExp(`(?:${Y}|${tt})+`,"g");var nt={"&":"&","<":"<",">":">"},rt={""":'"',"'":"'"," ":" ","©":"©","®":"®","™":"™","§":"§","¶":"¶","•":"•","…":"…","€":"€","£":"£","¥":"¥","¢":"¢","¤":"¤","¦":"¦","¨":"¨","ª":"ª","«":"«","»":"»","¬":"¬","­":"","¯":"¯","°":"°","±":"±","²":"²","³":"³","´":"´","µ":"µ","·":"·","¸":"¸","¹":"¹","º":"º","¿":"¿","×":"×","÷":"÷","–":"–","—":"—","‘":"‘","’":"’","‚":"‚","“":"“","”":"”","„":"„","†":"†","‡":"‡","‰":"‰","‹":"‹","›":"›"};function ot(t){return t?t.replace(/&(#?[a-zA-Z0-9]+);/g,t=>{const e=t.toLowerCase();if(nt[e])return t;if(rt[e])return rt[e];if(t.startsWith("&#")){const e=t.startsWith("&#x")?parseInt(t.slice(3,-1),16):parseInt(t.slice(2,-1),10);if(!isNaN(e)){if(160===e)return" ";try{return String.fromCodePoint(e)}catch(e){return t}}}return t}):t}var at=class extends D{_ensureCheerioContext(t){if(!t.$&&t.body){let e="string"==typeof t.body?t.body:Buffer.isBuffer(t.body)?t.body.toString("utf-8"):JSON.stringify(t.body);e.trim().startsWith("<")||(e=`<html><body><pre>${e}</pre></body></html>`),t.$=W.load(e)}}async _buildResponse(t){this._ensureCheerioContext(t);const{request:e,response:i,body:s,$:n}=t,r=n?.html();let o="string"==typeof s?s:Buffer.isBuffer(s)?s.toString("utf-8"):String(s??"");r&&r!==o&&(o=r);let a=i?.headers;if(!a&&i?.rawHeaders){a={};const t=i.rawHeaders;for(let e=0;e<t.length;e+=2)a[t[e].toLowerCase()]=t[e+1]}const c={url:e.url,finalUrl:e.loadedUrl||e.url,statusCode:i?.statusCode??200,statusText:i?.statusMessage,headers:a||{},body:s,html:ot(o),text:o};if(this.opts?.debug&&i?.timings){const t=i.timings;c.metadata={timings:{start:t.start,total:t.phases?.total,ttfb:t.phases?.firstByte,dns:t.phases?.dns,tcp:t.phases?.tcp,download:t.phases?.download}}}return c}async _querySelectorAll(t,e){if(Array.isArray(t)){if(0===t.length)return[];const{$:i}=t[0],s=t.map(t=>t.el[0]).filter(Boolean),n=i(s);return n.find(e).add(n.filter(e)).toArray().map(t=>({$:i,el:i(t)}))}const{$:i,el:s}=t;return":scope"===e?[{$:i,el:s}]:s.find(e).add(s.filter(e)).toArray().map(t=>({$:i,el:i(t)}))}async _nextSiblingsUntil(t,e){const{$:i,el:s}=t;return(e?s.nextUntil(e):s.nextAll()).toArray().map(t=>({$:i,el:i(t)}))}async _parentElement(t){const{$:e,el:i}=t,s=i.parent();return 0===s.length?null:{$:e,el:s}}async _isSameElement(t,e){return t.el[0]===e.el[0]}async _findClosestAncestor(t,e){if(0===e.length)return null;const i=new Set(e.map(t=>t.el[0])),{$:s,el:n}=t;let r=n;for(;r.length>0;){if(i.has(r[0]))return{$:s,el:r};r=r.parent()}return null}async _contains(t,e){const i=t.el[0],s=e.el[0];if(i===s)return!0;const n=t.$;return"function"==typeof n.contains?n.contains(i,s):t.el.find(e.el).length>0}async _findCommonAncestor(t,e){const{$:i,el:s}=t,{el:n}=e;if(s[0]===n[0])return t;if(await this._contains(t,e))return t;if(await this._contains(e,t))return e;const r=s.parents().toArray(),o=n.parents().toArray(),a=new Set(o);for(const t of r)if(a.has(t))return{$:i,el:i(t)};return null}async _findContainerChild(t,e){const{$:i,el:s}=t,n=e.el[0];let r=s;if(r[0]===n)return t;const o=r.parents().toArray();for(let t=0;t<o.length;t++)if(o[t]===n){return{$:i,el:i(t>0?o[t-1]:s[0])}}if(n===i.root()[0]){return{$:i,el:i(o.length>0?o[o.length-1]:s[0])}}return null}async _extractValue(t,e){const{$:i,el:s}=e,{attribute:n,type:r="string",mode:o="text"}=t;if(this._logDebug("extract",`_extractValue: el.length=${s.length} schema=${JSON.stringify(t)}`),0===s.length)return null;let a="";if(n?a=s.attr(n)??null:"html"===r||"html"===o||"outerHTML"===o?(a="outerHTML"===o?i.html(s):s.html()??("html"===r?"":null),a&&(a=ot(a.trim()))):a="innerText"===o?function(t){const e=t.clone();e.find("script, style, noscript, template").remove(),e.find("[hidden]").remove(),e.find("br").replaceWith(X),e.find("p").before(tt).after(tt),e.find("div, h1, h2, h3, h4, h5, h6, li, ul, ol, tr, dl, dt, dd, blockquote, pre, form, table, article, section, header, footer, nav, main, aside, hr, address, fieldset, figure, figcaption, details, summary").before(Y).after(Y);let i=e.text();return i=i.replace(et," "),i=i.replace(it,"$1"),i=i.replace(st,t=>t.includes(tt)?tt:Y),i=i.replaceAll(X,"\n"),i=i.replaceAll(tt,"\n\n"),i=i.replaceAll(Y,"\n"),i.trim()}(s):s.text().trim(),null===a)return null;switch(r){case"number":return parseFloat(a.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=a.toLowerCase();return"true"===t||"1"===t;default:return a}}_getInitialElementScope(t){const{$:e}=t;return e?{$:e,el:e.root()}:null}async executeAction(t,e){const{$:i}=t;switch(e.type){case"dispose":return;case"navigate":{const{url:i,opts:s}=e;this._logDebug("navigate",`Navigating to: ${i}`);const n=await this._requestWithRedirects(t,{url:i,method:"GET",headers:{...this.hdrs,...s?.headers}});return await this._updateStateAfterNavigation(t,n),this.lastResponse}case"mouseMove":case"mouseClick":case"keyboardType":case"keyboardPress":throw new Z.CommonError(`Action "${e.type}" is only supported in browser engine mode.`,e.type,"not_supported");case"click":{if(!i)throw new Z.CommonError(`Cheerio context not available for action: ${e.type}`,"click");const s=e.selector,n=i(s).first();let r;if(0===n.length)try{r=new URL(s,t.request.loadedUrl||t.request.url).href}catch{throw new Z.CommonError(`click: selector not found or invalid URL: ${s}`,"click")}else{if(!n.is("a")||!n.attr("href")){if(n.is('input[type="submit"], button[type="submit"], button, input')){const e=n.closest("form");return e.length?this.executeAction(t,{type:"submit",selector:e}):void this._logDebug("click","Button/input clicked but no form found and no JS support in http mode. Ignoring.")}throw new Z.CommonError(`click: unsupported element for http simulate. Selector: ${s}`,"click")}{const e=n.attr("href");r=new URL(e,t.request.loadedUrl||t.request.url).href}}const o=await t.sendRequest({url:r});return void await this._updateStateAfterNavigation(t,o)}case"fill":{if(!i)throw new Z.CommonError(`Cheerio context not available for action: ${e.type}`),"fill";const s=i(e.selector).first();if(0===s.length)throw new Z.CommonError(`fill: selector not found: ${e.selector}`);if(!s.is("input, textarea, select"))throw new Z.CommonError(`fill: not a form field: ${e.selector}`);return s.val(e.value),void(this.lastResponse=await this.buildResponse(t))}case"trim":{if(!i)throw new Z.CommonError(`Cheerio context not available for action: ${e.type}`,"trim");const{selectors:s,removeComments:n}=this._getTrimInfo(e.options);return s.forEach(t=>i(t).remove()),n&&i("*").contents().filter((t,e)=>"comment"===e.type).remove(),void(this.lastResponse=await this.buildResponse(t))}case"waitFor":return void(e.options?.ms&&await new Promise(t=>setTimeout(t,e.options.ms)));case"submit":{if(!i)throw new Z.CommonError(`Cheerio context not available for action: ${e.type}`,"submit");const s="string"==typeof e.selector?i(e.selector).first():null!=e.selector?e.selector:i("form").first();if(0===s.length)throw new Z.NotFoundError(e.selector,"submit");const n=s.attr("action")||t.request.loadedUrl||t.request.url,r=(s.attr("method")||"GET").toUpperCase(),o=new URL(n,t.request.loadedUrl||t.request.url).href,a={};let c;if(s.find("input, select, textarea").each((t,e)=>{const s=i(e),n=s.attr("name");if(!n)return;const r=s.val();null!=r&&(a[n]=String(r))}),"GET"===r){const e=new URL(o);Object.entries(a).forEach(([t,i])=>e.searchParams.set(t,i)),c=await this._requestWithRedirects(t,{url:e.href,method:"GET"})}else{const i=e.options?.enctype||s.attr("enctype")||"application/x-www-form-urlencoded";let n;const r={};"application/json"===i?(n=JSON.stringify(a),r["Content-Type"]="application/json"):(n=new URLSearchParams(a).toString(),r["Content-Type"]="application/x-www-form-urlencoded"),this._logDebug("submit","Submitting POST to:",o,"enctype:",i),c=await this._requestWithRedirects(t,{url:o,method:"POST",body:n,headers:r})}return await this._updateStateAfterNavigation(t,c),void this._logDebug("submit","Submit finished. Current URL:",t.request.loadedUrl||t.request.url)}case"evaluate":{const{fn:s,args:n=[]}=e.params,r=t.request.loadedUrl||t.request.url;let o=null;const a=t=>t&&0!==t.length?{textContent:t.text(),innerHTML:t.html(),outerHTML:i.html(t),getAttribute:e=>t.attr(e),matches:e=>t.is(e)}:null,c=this,l={location:{_href:r,get href(){return this._href},set href(t){if(t&&t!==this._href){this._href=t;const e=new URL(t,r).href;o=c.goto(e)}},assign(t){this.href=t},replace(t){this.href=t}}},u={getElementById:t=>a(i(`#${t}`).first()),querySelector:t=>a(i(t).first()),querySelectorAll:t=>i(t).toArray().map(t=>a(i(t))),getElementsByClassName:t=>i(`.${t}`).toArray().map(t=>a(i(t))),getElementsByTagName:t=>i(t).toArray().map(t=>a(i(t))),get body(){return a(i("body").first())},get title(){return i("title").text()}};l.document=u;const h={window:l,document:u,$:i,console:console};let f;const d=(0,Q.newFunction)(s,h);return f="function"==typeof d?await d(n):d,o?await o:l.location.href===r&&(this.lastResponse=await this.buildResponse(t)),f}default:throw new Z.CommonError(`Unknown action type: ${e.type}`,"CheerioFetchEngine.executeAction",Z.ErrorCode.NotSupported)}}async _requestWithRedirects(t,e){let{url:i,method:s,body:n,headers:r={}}=e,o=0;let a;for(;o<=5;){if(t.session){const e=t.session.getCookieString(i);e&&(r={...r,cookie:e})}if(a=await t.sendRequest({url:i,method:s,body:n,headers:r,followRedirect:!1}),!a)break;const e=a.statusCode,c=a.headers||a.req?.res?.headers||a.res?.headers||{};if(t.session&&c["set-cookie"]&&t.session.setCookies(c["set-cookie"],i),[301,302,303,307,308].includes(e)){const t=c.location;if(!t)break;if(i=new URL(t,i).href,o++,[301,302,303].includes(e)){this._logDebug("http",`Redirect ${e} (method conversion to GET):`,i),s="GET",n=void 0;const{"content-type":t,"Content-Type":o,"content-length":a,"Content-Length":c,...l}=r;r=l}else this._logDebug("http",`Redirect ${e} (method preserved):`,i);continue}break}return a}async _updateStateAfterNavigation(t,e){const i=e;t.response=i,t.body=i.body,t.$=void 0,i.url&&(t.request.loadedUrl=i.url),this.lastResponse=await this.buildResponse(t)}_createCrawler(t,e){return new J.CheerioCrawler(t,e)}_getSpecificCrawlerOptions(t){return{additionalMimeTypes:["text/plain"],maxRequestRetries:1,requestHandlerTimeoutSecs:t.requestHandlerTimeoutSecs,proxyConfiguration:this.proxyConfiguration,preNavigationHooks:[({session:e,request:i},s)=>{s.throwHttpErrors=t.throwHttpErrors,this.opts?.timeoutMs&&(s.timeout={request:this.opts.timeoutMs})}]}}async goto(t,e){if(this.isPageActive)return this.dispatchAction({type:"navigate",url:t,opts:e});const i="req-"+ ++this.requestCounter,s=new Promise((t,s)=>{const n=e?.timeoutMs||this.opts?.timeoutMs||3e4,r=setTimeout(()=>{this.pendingRequests.delete(i),this.navigationLock.release(),s(new Z.CommonError(`goto timed out after ${n}ms.`,"gotoTimeout",Z.ErrorCode.RequestTimeout))},n);this.pendingRequests.set(i,{resolve:e=>{clearTimeout(r),t(e)},reject:t=>{clearTimeout(r),s(t)}})});return this.requestQueue.addRequest({...e,url:t,headers:{...this.hdrs,...e?.headers},userData:{requestId:i},uniqueKey:`${t}-${i}`}).catch(t=>{const e=this.pendingRequests.get(i);e&&(this.pendingRequests.delete(i),this.navigationLock.release(),e.reject(t))}),await this.navigationLock,this.navigationLock=B(),s}};at.id="cheerio",at.mode="http",D.register(at);var ct=require("crawlee"),lt=require("playwright"),ut=require("@isdk/common-error"),ht=3e4,ft=class extends D{constructor(){super(...arguments),this.currentMousePos={x:0,y:0}}async _buildResponse(t){const{page:e,response:i,request:s,session:n}=t;if(!e||e.isClosed())return{url:s.url,finalUrl:s.loadedUrl||s.url,statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:"",html:"",text:""};const r=await e.content(),o=await e.textContent("body"),a=await e.context().cookies();n&&n.setCookies(a,s.url);const c={url:e.url(),finalUrl:e.url(),statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:r,html:r,text:o||""};if(this.opts?.debug&&i){const t="function"==typeof i.request?i.request():i.request;if(t&&"function"==typeof t.timing){const e=t.timing();c.metadata={timings:{start:e.startTime,total:e.responseEnd-e.startTime,ttfb:e.responseStart-e.requestStart,dns:e.domainLookupEnd-e.domainLookupStart,tcp:e.connectEnd-e.connectStart,download:e.responseEnd-e.responseStart}}}}return!1!==this.opts?.output?.cookies&&(c.cookies=a),c}async _querySelectorAll(t,e){const i=Array.isArray(t)?t:[t],s=[];for(const t of i){const i=await t.locator(e).all();s.push(...i);try{await t.evaluate((t,e)=>t.matches(e),e)}catch(t){}}const n=[];for(const t of i){let i=!1;try{i=await t.evaluate((t,e)=>t.matches(e),e)}catch{}i&&n.push(t);const s=await t.locator(e).all();n.push(...s)}return n}async _nextSiblingsUntil(t,e){const i=await t.locator("xpath=following-sibling::*").all();if(!e)return i;const s=[];for(const t of i){if(await t.evaluate((t,e)=>t.matches(e),e))break;s.push(t)}return s}async _parentElement(t){const e=t.locator("xpath=..");return 0===await e.count()?null:e.first()}async _isSameElement(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return!1;try{return await i.evaluate((t,e)=>t===e,s)}finally{await i.dispose(),await s.dispose()}}async _findClosestAncestor(t,e){if(0===e.length)return null;const i=await t.elementHandle();if(!i)return null;const s=await Promise.all(e.map(t=>t.elementHandle()));try{const t=await i.evaluate((t,e)=>{const i=new Set(e);let s=t;for(;s;){if(i.has(s))return e.indexOf(s);s=s.parentElement}return-1},s);return-1!==t?e[t]:null}finally{await i.dispose(),await Promise.all(s.map(t=>t?.dispose()))}}async _contains(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return!1;try{return await i.evaluate((t,e)=>t.contains(e),s)}finally{await i.dispose(),await s.dispose()}}async _findCommonAncestor(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return null;try{const e=await i.evaluateHandle((t,e)=>{let i=null;if(t===e)i=t;else if(t.contains(e))i=t;else if(e.contains(t))i=e;else{const s=new Set;let n=e.parentElement;for(;n;)s.add(n),n=n.parentElement;for(n=t.parentElement;n;){if(s.has(n)){i=n;break}n=n.parentElement}}return i&&1===i.nodeType?function t(e){if(e.id)return`//*[@id="${e.id}"]`;if(e===document.body)return"/html/body";if(e===document.documentElement)return"/html";let i=0;const s=e.parentNode?e.parentNode.childNodes:[];for(let n=0;n<s.length;n++){const r=s[n];if(r===e)return t(e.parentNode)+"/"+e.tagName.toLowerCase()+"["+(i+1)+"]";1===r.nodeType&&r.tagName===e.tagName&&i++}return""}(i):null},s);if(!e)return null;const n=await e.jsonValue();return"string"==typeof n&&n?t.page().locator(`xpath=${n}`):null}finally{await i.dispose(),await s.dispose()}}async _findContainerChild(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return null;try{const e=await i.evaluateHandle((t,e)=>{let i=null;if(t===e)i=t;else{let s=t;for(;s;){if(s.parentElement===e){i=s;break}s=s.parentElement}}return i&&1===i.nodeType?function t(e){if(e.id)return`//*[@id="${e.id}"]`;if(e===document.body)return"/html/body";if(e===document.documentElement)return"/html";let i=0;const s=e.parentNode?e.parentNode.childNodes:[];for(let n=0;n<s.length;n++){const r=s[n];if(r===e)return t(e.parentNode)+"/"+e.tagName.toLowerCase()+"["+(i+1)+"]";1===r.nodeType&&r.tagName===e.tagName&&i++}return""}(i):null},s);if(!e)return null;const n=await e.jsonValue();return"string"==typeof n&&n?t.page().locator(`xpath=${n}`):null}finally{await i.dispose(),await s.dispose()}}async _extractValue(t,e){const{attribute:i,type:s="string",mode:n="text"}=t,r=await e.count();if(this._logDebug("extract",`_extractValue: count=${r} schema=${JSON.stringify(t)}`),0===r)return null;let o="";if(i?o=await e.getAttribute(i):"html"===s||"html"===n||"outerHTML"===n?(o="outerHTML"===n?await e.evaluate(t=>t.outerHTML):await e.innerHTML(),o&&(o=ot(o))):o="innerText"===n?await e.innerText():await e.textContent(),null===o)return null;switch(o=o.trim(),s){case"number":return parseFloat(o.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=o.toLowerCase();return"true"===t||"1"===t;default:return o}}_getInitialElementScope(t){const{page:e}=t;return e?e.locator(":root"):null}async _waitForNavigation(t,e,i){const{page:s}=t,n=this.opts?.timeoutMs||ht;try{await s.waitForURL(t=>t.href!==e,{waitUntil:"domcontentloaded",timeout:5e3}),this._logDebug(i,"URL changed to:",s.url())}catch(t){this._logDebug(i,"No URL change detected within 5s")}await s.waitForLoadState("networkidle",{timeout:n}),this.lastResponse=await this.buildResponse(t)}_getRandomDelay(t,e=.3){const i=t*(1-e),s=t*(1+e);return Math.floor(Math.random()*(s-i+1)+i)}_getTrajectory(t,e,i=-1){const s=[];if(-1===i){const s=Math.sqrt(Math.pow(e.x-t.x,2)+Math.pow(e.y-t.y,2)),n=10*Math.random()+5;i=Math.max(5,Math.floor(s/n))}const n=t.x+(e.x-t.x)/2,r=t.y+(e.y-t.y)/2,o=n+100*(Math.random()-.5),a=r+100*(Math.random()-.5);this._logDebug("mouseMove",`Trajectory: start(${t.x},${t.y}) -> end(${e.x},${e.y}), cp(${o},${a}), steps: ${i}`);for(let n=1;n<=i;n++){const r=n/i;let c=(1-r)*(1-r)*t.x+2*(1-r)*r*o+r*r*e.x,l=(1-r)*(1-r)*t.y+2*(1-r)*r*a+r*r*e.y;n<i&&(c+=1.5*(Math.random()-.5),l+=1.5*(Math.random()-.5)),s.push({x:c,y:l})}return s}async _moveToSelector(t,e,i=-1){const{page:s}=t,n=s.locator(e).first(),r=await n.boundingBox();if(!r)throw new ut.CommonError(`Selector not found or not visible for mouse movement: ${e}`,"mouseMove");const o=r.x+r.width/2,a=r.y+r.height/2,c=this._getTrajectory(this.currentMousePos,{x:o,y:a},i);for(const t of c)await s.mouse.move(t.x,t.y),(i>1||-1===i)&&await s.waitForTimeout(15*Math.random()+5);return this.currentMousePos={x:o,y:a},this.currentMousePos}async executeAction(t,e){const{page:i}=t,s=this.opts?.timeoutMs||ht;switch(e.type){case"dispose":return;case"navigate":{this._logDebug("navigate",`Navigating to: ${e.url}`);const s=await i.goto(e.url,{waitUntil:e.opts?.waitUntil||"domcontentloaded",timeout:this.opts?.timeoutMs||ht});s&&(t={...t,response:s},this._logDebug("navigate",`Navigation status: ${s.status()} for ${s.url()}`));const n=await this.buildResponse(t);return this.lastResponse=n,n}case"mouseMove":{const{x:s,y:n,selector:r,steps:o=-1}=e.params;if(r)await this._moveToSelector(t,r,o);else if(void 0!==s&&void 0!==n){const t=this._getTrajectory(this.currentMousePos,{x:s,y:n},o);for(const e of t)await i.mouse.move(e.x,e.y),(o>1||-1===o)&&await i.waitForTimeout(15*Math.random()+5);this.currentMousePos={x:s,y:n}}return}case"mouseClick":{const{x:s,y:n,selector:r,button:o="left",clickCount:a=1,delay:c=0}=e.params;return r?(await this._moveToSelector(t,r,-1),await i.mouse.click(this.currentMousePos.x,this.currentMousePos.y,{button:o,clickCount:a,delay:this._getRandomDelay(c||50,.2)})):void 0!==s&&void 0!==n?(await i.mouse.click(s,n,{button:o,clickCount:a,delay:this._getRandomDelay(c||50,.2)}),this.currentMousePos={x:s,y:n}):await i.mouse.click(this.currentMousePos.x,this.currentMousePos.y,{button:o,clickCount:a,delay:this._getRandomDelay(c||50,.2)}),await i.waitForTimeout(this._getRandomDelay(100,.5)),void(this.lastResponse=await this.buildResponse(t))}case"keyboardType":{const{text:s,delay:n=150}=e.params;return await i.keyboard.type(s,{delay:this._getRandomDelay(n)}),void(this.lastResponse=await this.buildResponse(t))}case"keyboardPress":{const{key:s,delay:n=50}=e.params;return await i.keyboard.press(s,{delay:this._getRandomDelay(n)}),void(this.lastResponse=await this.buildResponse(t))}case"click":{this._logDebug("click","Clicking selector:",e.selector);const n=i.url();return await i.click(e.selector,{timeout:s}),void await this._waitForNavigation(t,n,"click")}case"fill":await i.fill(e.selector,e.value,{timeout:s});const n=await this.buildResponse(t);return void(this.lastResponse=n);case"trim":{const s=this._getTrimInfo(e.options);return await i.evaluate(t=>{const{selectors:e,removeComments:i,removeHidden:s}=t;if(e.forEach(t=>{document.querySelectorAll(t).forEach(t=>t.remove())}),s){const t=[];document.querySelectorAll("*").forEach(e=>{const i=window.getComputedStyle(e);"none"!==i.display&&"hidden"!==i.visibility||t.push(e)}),t.forEach(t=>t.remove())}if(i){const t=document.createNodeIterator(document,NodeFilter.SHOW_COMMENT),e=[];let i;for(;i=t.nextNode();)e.push(i);e.forEach(t=>t.parentElement?.removeChild(t))}},s),void(this.lastResponse=await this.buildResponse(t))}case"waitFor":try{e.options?.selector&&await i.waitForSelector(e.options.selector,{timeout:s}),e.options?.networkIdle&&await i.waitForLoadState("networkidle",{timeout:s}),e.options?.ms&&await i.waitForTimeout(this._getRandomDelay(e.options.ms,.1))}catch(t){if(!1!==e.options?.failOnTimeout)throw t}return;case"submit":{const s=e.selector||"form",n=i.locator(s).first();if(0===await n.count())throw new ut.NotFoundError(s,"submit");if("application/json"===(e.options?.enctype||"application/x-www-form-urlencoded")){const t=await n.elementHandle();if(!t)throw new ut.CommonError(`submit: could not get form handle for ${s}`,"submit");const e=await t.evaluate(async t=>{const e=new FormData(t),i={};e.forEach((t,e)=>{i[e]=t.toString()});const s=await fetch(t.action,{method:t.method,headers:{"Content-Type":"application/json"},body:JSON.stringify(i)}),n=await s.text();return{status:s.status,statusText:s.statusText,headers:Object.fromEntries(s.headers.entries()),body:n,html:n,text:n,url:t.action,finalUrl:s.url}});return await t.dispose(),await i.setContent(e.html),void(this.lastResponse=e)}{this._logDebug("submit","Submitting form...");const e=i.url();return await n.evaluate(t=>t.submit()),void await this._waitForNavigation(t,e,"submit")}}case"evaluate":{const{fn:n,args:r=[]}=e.params,o=i.url();let a;if(a="function"==typeof n?await i.evaluate(n,r):await i.evaluate(([t,e])=>{const i=(0,eval)(`(${t})`);return"function"==typeof i?i(e):i},[n,r]),i.url()!==o)await i.waitForLoadState("domcontentloaded",{timeout:s}).catch(()=>{}),this.lastResponse=await this.buildResponse(t);else try{this.lastResponse=await this.buildResponse(t)}catch(e){await i.waitForLoadState("domcontentloaded",{timeout:s}).catch(()=>{}),this.lastResponse=await this.buildResponse(t)}return a}default:throw new ut.CommonError(`Unknown action type: ${e.type}`,"PlaywrightFetchEngine.executeAction",ut.ErrorCode.NotSupported)}}_createCrawler(t,e){return new ct.PlaywrightCrawler(t,e)}async _getSpecificCrawlerOptions(t){const e=t.browser?.headless??!0,i={maxRequestRetries:t.retries||3,headless:e,proxyConfiguration:this.proxyConfiguration,requestHandlerTimeoutSecs:t.requestHandlerTimeoutSecs,preNavigationHooks:[async({page:e,request:i},s)=>{s.throwHttpErrors=t.throwHttpErrors;const n=this.blockedTypes;n.size>0&&await e.route("**/*",t=>{n.has(t.request().resourceType())?t.abort():t.continue()})}]},s=t.browser?.launchOptions||{};if(this.opts?.antibot){i.browserPoolOptions={useFingerprints:!1};const{launchOptions:t}=await import("camoufox-js"),n=await t({headless:e,...s});i.launchContext={launcher:lt.firefox,launchOptions:n},i.postNavigationHooks=[async({page:t,handleCloudflareChallenge:e})=>{await e()}]}else Object.keys(s).length>0&&(i.launchContext={launchOptions:s});return i}async goto(t,e){if(this.isPageActive)return this.dispatchAction({type:"navigate",url:t,opts:e});if(!this.requestQueue)throw new ut.CommonError("RequestQueue not initialized","goto");const i="req-"+ ++this.requestCounter,s=new Promise((t,e)=>{this.pendingRequests.set(i,{resolve:t,reject:e})});return await this.requestQueue.addRequest({url:t,headers:this.hdrs,userData:{requestId:i,waitUntil:e?.waitUntil||"domcontentloaded"},uniqueKey:`${t}-${i}`}),s}};ft.id="playwright",ft.mode="browser",D.register(ft);var dt=class extends d{async onExecute(t,e){const{selector:i,...s}=e?.params||{};if(!i)throw new Error("Selector is required for click action");await this.delegateToEngine(t,"click",i,s)}};dt.id="click",dt.returnType="none",dt.capabilities={http:"simulate",browser:"native"},d.register(dt);var wt=class extends d{async onExecute(t,e){const{selector:i,value:s,...n}=e?.params||{};if(!i)throw new Error("Selector is required for fill action");if(void 0===s)throw new Error("Value is required for fill action");await this.delegateToEngine(t,"fill",i,s,n)}};wt.id="fill",wt.returnType="none",wt.capabilities={http:"simulate",browser:"native"},d.register(wt);var yt=class extends d{async onExecute(t,e){return await this.delegateToEngine(t,"getContent",e?.params)}};yt.id="getContent",yt.returnType="response",yt.capabilities={http:"native",browser:"native"},d.register(yt);var pt=class extends d{async onExecute(t,e,i){const s=e?.params,n=s?.url||t.url;if(!n)throw new Error("URL is required for goto action");const r=t.internal.engine;if(!r)throw new Error("No engine available");t.url=n;return await r.goto(n,s)}};pt.id="goto",pt.returnType="response",pt.capabilities={http:"native",browser:"native"},d.register(pt);var mt=class extends d{async onExecute(t,e){const{selector:i,...s}=e?.params||{};await this.delegateToEngine(t,"submit",i,s)}};mt.id="submit",mt.returnType="none",mt.capabilities={http:"simulate",browser:"native"},d.register(mt);var gt=class extends d{async onExecute(t,e){const i=t.internal.engine;if(!i)throw new Error("No engine available");await i.waitFor(e?.params)}};gt.id="waitFor",gt.returnType="none",gt.capabilities={http:"native",browser:"native"},d.register(gt);var xt=class extends d{async onExecute(t,e){const i=e?.params;if(!i)throw new Error("Schema is required for extract action");return this.delegateToEngine(t,"extract",i)}};xt.id="extract",xt.returnType="any",xt.capabilities={http:"native",browser:"native"},d.register(xt);var bt=class extends d{async onExecute(t,e){const{selector:i,message:s,attribute:n}=e?.params||{},r=t.internal.engine;if("browser"===r?.mode){if(i){if(!await(r?.extract({selector:i,attribute:n})))return}r&&"pause"in r?await r.pause(s):console.warn("[PauseAction] was called, but the current engine does not support `pause`. Skipped.")}else console.warn("[PauseAction] can only run in browser engine. Skipped.")}};bt.id="pause",bt.capabilities={http:"native",browser:"native"},bt.returnType="none",d.register(bt);var vt=class extends d{async onExecute(t,e){const i=e?.params||{};await this.delegateToEngine(t,"trim",i)}};vt.id="trim",vt.returnType="none",vt.capabilities={http:"simulate",browser:"native"},d.register(vt);var $t=class extends d{async onExecute(t,e){const i=e?.params;if(!i)throw new Error("evaluate action: params is required");return await this.delegateToEngine(t,"evaluate",i)}};$t.id="evaluate",$t.returnType="any",$t.capabilities={http:"simulate",browser:"native"},d.register($t);var _t=class extends d{async onExecute(t,e){const i=e?.params;await this.delegateToEngine(t,"mouseMove",i)}};_t.id="mouseMove",_t.returnType="none",_t.capabilities={http:"noop",browser:"native"};var kt=class extends d{async onExecute(t,e){const i=e?.params;await this.delegateToEngine(t,"mouseClick",i)}};kt.id="mouseClick",kt.returnType="none",kt.capabilities={http:"noop",browser:"native"},d.register(_t),d.register(kt);var qt=class extends d{async onExecute(t,e){const i=e?.params;if(!i?.text)throw new Error("text is required for keyboardType action");await this.delegateToEngine(t,"keyboardType",i.text,i.delay)}};qt.id="keyboardType",qt.returnType="none",qt.capabilities={http:"noop",browser:"native"};var Et=class extends d{async onExecute(t,e){const i=e?.params;if(!i?.key)throw new Error("key is required for keyboardPress action");await this.delegateToEngine(t,"keyboardPress",i.key,i.delay)}};async function St(t,e){return(new K).fetch(t,e)}Et.id="keyboardPress",Et.returnType="none",Et.capabilities={http:"noop",browser:"native"},d.register(qt),d.register(Et);
|
package/dist/index.mjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
var t={engine:"auto",enableSmart:!0,useSiteRegistry:!0,antibot:!1,debug:!1,headers:{},cookies:[],throwHttpErrors:void 0,output:{cookies:!0,sessionState:!0},proxy:[],blockResources:[],storage:{purge:!0},ignoreSslErrors:!0,browser:{engine:"playwright",headless:!0,waitUntil:"domcontentloaded"},http:{method:"GET"},timeoutMs:6e4,requestHandlerTimeoutSecs:void 0,maxConcurrency:1,maxRequestsPerMinute:1e3,delayBetweenRequestsMs:0,retries:0,sites:[]},e=Object.keys(t).concat(["actions","onPause"]),i=(t=>(t[t.Failed=0]="Failed",t[t.Success=1]="Success",t[t.Skipped=2]="Skipped",t))(i||{}),s=class t{static register(t){const e=t.id;if(!e)throw new Error("FetchAction.register: actionClass.id is required");this.registry.set(e,t)}static get(t){return this.registry.get(t)}static create(e){const i="string"==typeof e?e:e.id||e.name||e.action;if(!i)throw new Error("Action must have id, name or action");const s=i instanceof t?i.constructor:this.registry.get(i);return s?new s:void 0}static has(t){return this.registry.has(t)}static list(){return Array.from(this.registry.keys())}static getCapability(t){return this.capabilities[t]??"noop"}getCapability(t){return this.constructor.getCapability(t)}get id(){return this.constructor.id}get returnType(){return this.constructor.returnType}get capabilities(){return this.constructor.capabilities}async delegateToEngine(t,e,...i){const s=t.internal.engine;if(!s)throw new Error("No engine available");if("function"!=typeof s[e])throw new Error(`Engine does not have a method named '${String(e)}'`);return await s[e](...i)}installCollectors(e,i){const s=i?.collectors;if(!s?.length)return;const n=[],c=new Set;for(const i of s){const s=r(i.activateOn),l=r(i.collectOn),u=r(i.deactivateOn),h=!(i.background??!0),f=t.create(i);if(!f)continue;let w=!1,d=!1,p=0;const y=async t=>{if(!w&&!d){w=!0;try{await(f.onBeforeExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:f.id,phase:"before",error:t})}}},m=async(t,s)=>{if(!d){w||await y(s);try{const n=Promise.resolve(f.onExecute?.(e,i,s)).then(s=>{var n,r;if(i.storeAs){((n=e.outputs)[r=i.storeAs]||(n[r]=[])).push(s)}return e.eventBus.emit("collector:result",{action:this.id,collector:i.id||i.name,event:t,result:s}),s}).catch(s=>{e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,event:t,phase:"exec",error:s})}).finally(()=>{p++});h&&(c.add(n),n.finally(()=>c.delete(n)))}catch(i){e.eventBus.emit("collector:error",{action:this.id,collector:f.id,event:t,phase:"exec",error:i})}}},g=async()=>{if(!d){0===p&&m("collector:after"),d=!0;try{await(f.onAfterExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,phase:"after",error:t})}finally{e.eventBus.emit("collector:end",{action:this.id,collector:i.id||i.name}),b.forEach(t=>t())}}},x=o(e,s,y),b=a(e,l,m),v=o(e,u,g);if(n.push(...x,...b,...v),!s.length&&!l.length&&!u.length){const t=()=>{g()};e.eventBus.once(`action:${this.id}.end`,t),n.push(()=>e.eventBus.off("fetcher:action:end",t))}}return n.length||c.size>0?{cleanup:()=>n.forEach(t=>t()),awaitExecPendings:async()=>{c.size>0&&await Promise.allSettled(Array.from(c))}}:void 0}async beforeExec(t,e){t.internal.actionStack||(t.internal.actionStack=[]);const i=t.internal.actionStack,s=i.length,n=i.length>0?i[i.length-1].id:void 0,r={...e,id:this.id,depth:s,parent:n};i.push(r),t.currentAction=r;const o={action:this,context:t,options:e,index:e?.index,depth:s,stack:[...i]};t.eventBus.emit(`action:${this.id}.start`,o),t.eventBus.emit("action:start",o),await(this.onBeforeExec?.(t,e));return{entry:o,collectors:this.installCollectors(t,e)}}async afterExec(t,e,i,s){const n=t.internal.actionStack,r=n.length-1,o=s?.collectors;try{await(o?.awaitExecPendings()),t.lastResult=i,"response"!==i?.returnType||i.error||(t.lastResponse=i.result),e?.storeAs&&(t.outputs[e.storeAs]=i?.result),i?.error&&(t.currentAction.error=i.error),await(this.onAfterExec?.(t,e));const s={action:this,context:t,options:e,result:i,depth:r,stack:[...n]};i?.error&&(s.error=i.error);try{t.eventBus.emit(`action:${this.id}.end`,s)}catch(t){}try{t.eventBus.emit("action:end",s)}catch(t){}}finally{try{o?.cleanup()}finally{n.pop();const e=n.length;t.currentAction=e>0?n[e-1]:void 0}}}async execute(t,e){e?.args&&!e.params&&(e.params=e.args);const i=await this.beforeExec(t,e),s=e?.failOnError??!0;let n;try{return t.throwHttpErrors=s,n=await this.onExecute(t,e),n&&n.returnType||(n={status:1,returnType:this.returnType??"any",result:n}),n}catch(e){if(n={status:0,error:e,meta:{id:this.id,engineType:t.engine,capability:this.getCapability(t.engine)}},s)throw e;return n}finally{await this.afterExec(t,e,n,i)}}};s.registry=new Map,s.returnType="any",s.capabilities={http:"noop",browser:"noop"};var n=s;function r(t){return t?Array.isArray(t)?t:[t]:[]}function o(t,e,i){const s=[];for(const n of e)if("string"==typeof n||n instanceof RegExp){const e=(...t)=>{i(t[0])};t.eventBus.once(n,e),s.push(()=>t.eventBus.off(n,e))}return s}function a(t,e,i){const s=[];for(const n of e)if("string"==typeof n||n instanceof RegExp){const e=t=>i(n,t);t.eventBus.on(n,e),s.push(()=>t.eventBus.off(n,e))}return s}import{EventEmitter as c}from"events-ex";import{defaultsDeep as l}from"lodash-es";import{customAlphabet as u}from"nanoid";var h=u("0123456789abcdefghijklmnopqrstuvwxyz",12);import{defaultsDeep as f,merge as w}from"lodash-es";import{EventEmitter as d}from"events-ex";import{CommonError as p}from"@isdk/common-error";import{Configuration as y,KeyValueStore as m,PERSIST_STATE_KEY as g,RequestQueue as x,ProxyConfiguration as b}from"crawlee";function v(){let t=()=>{};const e=new Promise(e=>{t=e});return e.release=t,e}y.getGlobalConfig().set("persistStorage",!1);var E=class{constructor(){this.hdrs={},this._initializedSessions=new Set,this.pendingRequests=new Map,this.requestCounter=0,this.actionEmitter=new d,this.isPageActive=!1,this.isEngineDisposed=!1,this.navigationLock=function(){const t=v();return t.release(),t}(),this.blockedTypes=new Set}static register(t){const e=t.id;if(!e)throw new Error("Engine must define static id");if(this.registry.has(e))throw new Error(`Engine id duplicated: ${e}`);this.registry.set(e,t)}static get(t){return this.registry.get(t)}static getByMode(t){for(const[e,i]of this.registry.entries())if(i.mode===t)return i}static async create(e,i){const s=f(i,e,t),n=s.engine??e.engine,r=n?this.get(n)??this.getByMode(n):null;if(r){const t=new r;return await t.initialize(e,s),t}}_isImplicitObject(t){if(!t||"object"!=typeof t)return!1;const e=new Set(["type","selector","attribute","has","exclude","properties","items","mode"]),i=Object.keys(t);if(0===i.length)return!1;if("type"in t)return!1;for(const t of i)if(!e.has(t))return!0;return!1}async _extract(t,e){const i=t.type;if(!e)return"array"===i?[]:null;if("object"===i){const{selector:i,properties:s}=t;let n=e;if(i){const t=await this._querySelectorAll(e,i);n=t.length>0?t[0]:null}if(!n)return null;const r={};for(const t in s)r[t]=await this._extract(s[t],n);return r}if(!i&&this._isImplicitObject(t)){const i={},s=t;for(const t in s)i[t]=await this._extract(s[t],e);return i}if("array"===i){const{selector:i,items:s,mode:n}=t,r=i?await this._querySelectorAll(e,i):[e],o=this._normalizeArrayMode(n);if((!n||"columnar"===o.type)&&1===r.length&&s){const t=await this._extractColumnar(s,r[0],o);if(t)return t}if("segmented"===o.type&&1===r.length&&s){const t=await this._extractSegmented(s,r[0],o);if(t)return t}return this._extractNested(s,r)}const{selector:s}=t;let n=e;if(s){const t=await this._querySelectorAll(e,s);n=t.length>0?t[0]:null}else Array.isArray(e)&&(n=e.length>0?e[0]:null);return n?this._extractValue(t,n):null}_normalizeArrayMode(t){return t?"string"==typeof t?{type:t}:t:{type:"nested"}}async _extractNested(t,e){const i=[];for(const s of e)i.push(await this._extract(t,s));return i}async _extractColumnar(t,e,i){const s="object"===t.type||!t.type&&this._isImplicitObject(t),n=!1!==i?.strict,r=!0===i?.inference;if(s){const i="object"===t.type?t.properties:t,s=Object.keys(i);if(0===s.length)return null;const o={};let a=null,c=0,l=[];for(const t of s){const s=i[t];if("array"===s.type||"object"===s.type||!s.type&&this._isImplicitObject(s))return null;const u=s;let h=[];h=u.selector?await this._querySelectorAll(e,u.selector):[e];const f=h.length;if(f>c&&(c=f,l=h),u.selector)if(null===a)a=f;else if(a!==f)if(r&&c>1)a=-1;else if(n)throw new p(`Columnar extraction mismatch: field "${t}" has ${f} matches, but expected ${a}.`,"extract");const w=await Promise.all(h.map(t=>this._extractValue(u,t)));o[t]=w}if(r&&-1===a&&c>1&&l.length>0){const i=[];for(const t of l){let s=t,n=await this._parentElement(s),r=s;for(;n;){if(await this._isSameElement(n,e)){i.push(r);break}r=n,s=n,n=await this._parentElement(s)}}const s=[];for(const t of i){let e=!1;for(const i of s)if(await this._isSameElement(t,i)){e=!0;break}e||s.push(t)}if(s.length>1)return this._extractNested(t,s)}if(c<=1)return null;if(-1===a&&n)return null;const u=n&&-1!==a?a:c,h=[];for(let t=0;t<u;t++){const e={};for(const n of s){const s=o[n];if(1===s.length&&u>1){if(!i[n].selector){e[n]=s[0];continue}}e[n]=void 0!==s[t]?s[t]:null}h.push(e)}return h}{const i=t;if(!i.selector)return null;const s=await this._querySelectorAll(e,i.selector);return s.length<=1?null:Promise.all(s.map(t=>this._extractValue(i,t)))}}async _extractSegmented(t,e,i){if(!("object"===t.type||!t.type&&this._isImplicitObject(t)))return null;const s="object"===t.type?t.properties:t,n=Object.keys(s);if(0===n.length)return null;const r=s[i?.anchor||n[0]];if(!r.selector)return null;const o=await this._querySelectorAll(e,r.selector);if(0===o.length)return[];const a=[];for(let e=0;e<o.length;e++){const i=o[e],s=[i,...await this._nextSiblingsUntil(i,r.selector)];a.push(await this._extract(t,s))}return a}async buildResponse(t){const e=await this._buildResponse(t),i=e.headers["content-type"]||"";return e.contentType=i.split(";")[0].trim(),!1!==this.opts?.output?.cookies?!e.cookies&&t.session&&(e.cookies=t.session.getCookies(t.request.url)):delete e.cookies,!1!==this.opts?.output?.sessionState?this.crawler?.sessionPool&&(e.sessionState=await this.crawler.sessionPool.getState()):delete e.sessionState,this.opts?.debug&&(e.metadata={...e.metadata,mode:this.mode,engine:this.id,proxy:t.proxyInfo?.url||("string"==typeof this.opts.proxy?this.opts.proxy:Array.isArray(this.opts.proxy)?this.opts.proxy[0]:void 0)}),e}waitFor(t){return this.dispatchAction({type:"waitFor",options:t})}click(t){return this.dispatchAction({type:"click",selector:t})}fill(t,e){return this.dispatchAction({type:"fill",selector:t,value:e})}submit(t,e){return this.dispatchAction({type:"submit",selector:t,options:e})}pause(t){return this.dispatchAction({type:"pause",message:t})}extract(t){const e=this._normalizeSchema(t);return this.dispatchAction({type:"extract",schema:e})}_normalizeSchema(t){const e=JSON.parse(JSON.stringify(t));if(e.properties)for(const t in e.properties)e.properties[t]=this._normalizeSchema(e.properties[t]);if(e.items&&(e.items=this._normalizeSchema(e.items)),"array"===e.type&&(e.attribute&&!e.items&&(e.items={attribute:e.attribute},delete e.attribute),e.items||(e.items={type:"string"})),e.selector&&(e.has||e.exclude)){const{selector:t,has:i,exclude:s}=e,n=t.split(",").map(t=>{let e=t.trim();return i&&(e=`${e}:has(${i})`),s&&(e=`${e}:not(${s})`),e}).join(", ");e.selector=n,delete e.has,delete e.exclude}return e}get id(){return this.constructor.id}async getState(){return{cookies:await this.cookies(),sessionState:await(this.crawler?.sessionPool?.getState())}}get mode(){return this.constructor.mode}get context(){return this.ctx}async initialize(t,e){if(this.ctx)return;w(t,e),this.ctx=t,this.opts=t,this.hdrs=function(t){const e={};if(t&&"object"==typeof t)for(const[i,s]of Object.entries(t))e[i.toLowerCase()]=s;return e}(t.headers),this._initialCookies=[...t.cookies??[]],t.internal||(t.internal={}),t.internal.engine=this,t.engine=this.mode,this.actionEmitter.setMaxListeners(100);const i=t.storage||{},s=i.persist??!1,n=this.config=new y({persistStorage:s,storageClientOptions:{persistStorage:s,...i.config},...i.config}),r=i.id||t.id;this.requestQueue=await x.open(r,{config:n});const o=this.opts?.proxy?"string"==typeof this.opts.proxy?[this.opts.proxy]:this.opts.proxy:void 0;o?.length&&(this.proxyConfiguration=new b({proxyUrls:o}));const a=await this._getSpecificCrawlerOptions(t),c=f({persistenceOptions:{enable:!0,storeId:r},persistStateKeyValueStoreId:r},t.sessionPoolOptions,{maxPoolSize:1,sessionOptions:{maxUsageCount:1e3,maxErrorScore:3}});t.sessionState&&t.cookies&&t.cookies.length>0&&console.warn('[FetchEngine] Warning: Both "sessionState" and "cookies" are provided. Explicit "cookies" will override any conflicting cookies restored from "sessionState".');const l={...f(a,{requestQueue:this.requestQueue,maxConcurrency:1,minConcurrency:1,useSessionPool:!0,persistCookiesPerSession:!0,sessionPoolOptions:c}),requestHandler:this._requestHandler.bind(this),errorHandler:this._failedRequestHandler.bind(this),failedRequestHandler:this._failedRequestHandler.bind(this)};l.preNavigationHooks||(l.preNavigationHooks=[]),l.preNavigationHooks.unshift(({crawler:t,session:e,request:i},s)=>{if(this.currentSession=e,e&&!this._initializedSessions.has(e.id)){if(this._initialCookies&&this._initialCookies.length>0){const t=this._initialCookies.map(t=>{const e={...t};return"no_restriction"===e.sameSite&&(e.sameSite="None"),e});e.setCookies(t,i.url)}this._initializedSessions.add(e.id)}});const u=this.crawler=this._createCrawler(l,n),h=this.kvStore=await m.open(r,{config:n}),d=await h.getValue(g);!t.sessionState||d&&!t.overrideSessionState||await h.setValue(g,t.sessionState),this.isCrawlerReady=!0,this.crawlerRunPromise=u.run(),this.crawlerRunPromise.finally(()=>{this.isCrawlerReady=!1}).catch(t=>{console.error("Crawler background error:",t)})}async cleanup(){await(this._cleanup?.()),await this._commonCleanup();const t=this.ctx;t&&t.internal?.engine===this&&(t.internal.engine=void 0),this.ctx=void 0,this.opts=void 0}async _executePendingActions(t){this.isEngineDisposed||await new Promise(e=>{const i=async({action:e,resolve:i,reject:s})=>{try{if("dispose"===e.type)return this.actionEmitter.emit("dispose"),void i();i(await this.executeAction(t,e))}catch(t){s(t)}},s=()=>{this.actionEmitter.removeListener("dispatch",i),e()};this.actionEmitter.on("dispatch",i),this.actionEmitter.once("dispose",s),this.isEngineDisposed&&(s(),this.actionEmitter.removeListener("dispose",s))})}async _sharedRequestHandler(t){const{request:e}=t;try{this.currentSession=t.session,this.isPageActive=!0;const i=this.pendingRequests.get(e.userData.requestId);if(i){const s=await this.buildResponse(t),n=!s.statusCode||s.statusCode>=400;if(this.ctx?.throwHttpErrors&&n){const t=new p(`Request for ${s.finalUrl} failed with status ${s.statusCode||"N/A"}`,"request",s.statusCode);i.reject(t)}else this.lastResponse=s,i.resolve(s);this.pendingRequests.delete(e.userData.requestId)}await this._executePendingActions(t)}finally{if(this.currentSession){const t=this.currentSession.getCookies(e.url);t&&(this._initialCookies=t)}this.isPageActive=!1,this.navigationLock.release()}}async _sharedFailedRequestHandler(t,e){const{request:i}=t,s=this.pendingRequests.get(i.userData.requestId);if(s&&e&&this.ctx?.throwHttpErrors){this.pendingRequests.delete(i.userData.requestId);const t=e.response,n=t?.statusCode||500,r=t?.url?t.url:i.url,o=new p(`Request${r?" for "+r:""} failed: ${e.message}`,"request",n);s.reject(o)}return this._sharedRequestHandler(t)}async dispatchAction(t){if(!this.isPageActive)throw new Error("No active page. Call goto() before performing actions.");return new Promise((e,i)=>{this.actionEmitter.emit("dispatch",{action:t,resolve:e,reject:i})})}async _requestHandler(t){await this._sharedRequestHandler(t)}async _failedRequestHandler(t,e){await this._sharedFailedRequestHandler(t,e)}async _commonCleanup(){if(this.isEngineDisposed=!0,this._initializedSessions.clear(),this.actionEmitter.emit("dispose"),this.navigationLock?.release(),this.pendingRequests.size>0){for(const[,t]of this.pendingRequests)t.reject(new Error("Cleanup:Request cancelled"));this.pendingRequests.clear()}if(this.crawler){try{await(this.crawler.teardown?.())}catch(t){console.error("crawler teardown error:",t)}this.crawler=void 0}this.crawlerRunPromise=void 0,this.isCrawlerReady=void 0;const t=(this.opts?.storage||{}).purge??!0;this.requestQueue&&(t&&await this.requestQueue.drop().catch(t=>console.error("Error dropping requestQueue:",t)),this.requestQueue=void 0),this.kvStore&&(t&&await this.kvStore.drop().catch(t=>console.error("Error dropping kvStore:",t)),this.kvStore=void 0),this.actionEmitter.removeAllListeners(),this.pendingRequests.clear(),this.config=void 0}async blockResources(t,e){return e&&this.blockedTypes.clear(),t.forEach(t=>this.blockedTypes.add(t)),t.length}getContent(){return this.lastResponse?Promise.resolve(this.lastResponse):Promise.reject(new Error("No content fetched yet. Call goto() first."))}async headers(t,e){if(void 0===t)return{...this.hdrs};if("string"==typeof t&&void 0===e)return this.hdrs[t.toLowerCase()]||"";if(null!==t&&"object"==typeof t){const i={};for(const[e,s]of Object.entries(t))i[e.toLowerCase()]=String(s);return this.hdrs=!0===e?i:{...this.hdrs,...i},!0}return"string"==typeof t&&("string"==typeof e?this.hdrs[t.toLowerCase()]=e:null===e&&delete this.hdrs[t.toLowerCase()],!0)}async cookies(t){const e=this.lastResponse?.url||"";if(Array.isArray(t))return this.currentSession?this.currentSession.setCookies(t,e):this._initialCookies=[...t],!0;if(null===t)return this.currentSession,this._initialCookies=[],!0;if(this.currentSession){return this.currentSession.getCookies(e)}return[...this._initialCookies||[]]}async dispose(){await this.cleanup()}};async function S(t,e){let i;const s=e?.engine||t.engine;if(s&&"auto"!==s){if(i=await E.create(t,{engine:s}),!i)throw new Error(`Engine "${s}" is not available or failed to initialize.`);return i}const n=function(t,e){if(!t||!e?.length)return null;const i=new URL(t);let s=e.find(t=>t.domain===i.hostname);s||(s=e.find(t=>i.hostname.endsWith(t.domain)));if(!s)return null;if(s.pathScope?.length){if(!s.pathScope.some(t=>i.pathname.startsWith(t)))return null}return s}(e?.url||t.url,t.sites);if(n?.engine&&"auto"!==n.engine&&(i=await E.create(t,{engine:n.engine}),i))return i;if(i=await E.create(t,{engine:"http"}),!i)throw new Error("Failed to create default http engine");return i}E.registry=new Map;var $=class{constructor(t={}){this.options=t,this.closed=!1,this.id=h(),this.context=this.createContext(t)}async execute(t,e=this.context){const i=t.index??(e.internal.actionIndex||0);e.internal.actionIndex=i+1,await this.ensureEngine(t,e);const s=n.create(t);if(!s)throw new Error(`Unknown action: ${t.id||t.name}`);const r={...t,index:i};let o,a;e.currentAction={...r,startedAt:Date.now()};try{return o=await s.execute(e,r),o}catch(t){throw a=t,a}finally{e.currentAction=void 0}}async executeAll(t,e){const i=e?{...this.context,...e,id:this.context.id,eventBus:this.context.eventBus,outputs:this.context.outputs,execute:this.context.execute,action:this.context.action}:this.context;let s=e?.index??0;try{for(;s<t.length;){const e=t[s];await this.execute({...e,index:s},i),s++}const e=await this.execute({id:"getContent",index:s},i);return{result:e?.result,outputs:this.getOutputs()}}catch(t){throw t.actionIndex=s,t}}getOutputs(){return this.context.outputs}async getState(){return this.context.internal.engine?.getState()}async dispose(){if(this.closed)return;const t=this.context.eventBus;t.emit("session:closing",{sessionId:this.id});try{await(this.context.internal.engine?.dispose())}finally{this.closed=!0}t.emit("session:closed",{sessionId:this.id})}async ensureEngine(t,e){if(this.closed)throw new Error("Session is closed");if(!e.internal.engine){const i=t?.params?.url??e.url,s=await S(e,{url:i});if(!s)throw new Error("No engine found");e.internal.engine=s}}createContext(e=this.options){const i=new c;return l({...e,id:this.id,eventBus:i,outputs:{},internal:{},execute:async t=>this.execute(t),action:async function(t,e,i){return this.execute({name:t,params:e,...i})}},t)}},k=class{constructor(t={}){this.defaults=t}async createSession(t){const e={...this.defaults,...t||{}};return new $(e)}async fetch(t,e){"string"!=typeof t&&(t=(e=t).url);const i=await this.createSession(e);try{const s=e?.actions||[];t&&0!==s.findIndex(e=>("goto"===e.id||"goto"===e.name)&&e.params?.url===t)&&s.unshift({id:"goto",params:{url:t}});return await i.executeAll(s)}finally{await i.dispose()}}};import{CheerioCrawler as _}from"crawlee";import*as q from"cheerio";import{CommonError as C,ErrorCode as R,NotFoundError as P}from"@isdk/common-error";var T="___BR___",j="___BLOCK___",A="___P___",O=/\s+/g,U=new RegExp(` *(${T}|${j}|${A}) *`,"g"),N=new RegExp(`(?:${j}|${A})+`,"g");var F={"&":"&","<":"<",">":">",""":"""},H={"'":"'"," ":" ","©":"©","®":"®","™":"™"};function L(t){return t?t.replace(/&(#?[a-zA-Z0-9]+);/g,t=>{const e=t.toLowerCase();if(F[e])return t;if(H[e])return H[e];if(t.startsWith("&#")){const e=t.startsWith("&#x")?parseInt(t.slice(3,-1),16):parseInt(t.slice(2,-1),10);if(!isNaN(e))return 160===e?" ":String.fromCharCode(e)}return t}):t}var M=class extends E{_ensureCheerioContext(t){if(!t.$&&t.body){let e="string"==typeof t.body?t.body:Buffer.isBuffer(t.body)?t.body.toString("utf-8"):JSON.stringify(t.body);e.trim().startsWith("<")||(e=`<html><body><pre>${e}</pre></body></html>`),t.$=q.load(e)}}async _buildResponse(t){this._ensureCheerioContext(t);const{request:e,response:i,body:s,$:n}=t,r=n?.html();let o="string"==typeof s?s:Buffer.isBuffer(s)?s.toString("utf-8"):String(s??"");r&&r!==o&&(o=r);let a=i?.headers;if(!a&&i?.rawHeaders){a={};const t=i.rawHeaders;for(let e=0;e<t.length;e+=2)a[t[e].toLowerCase()]=t[e+1]}const c={url:e.url,finalUrl:e.loadedUrl||e.url,statusCode:i?.statusCode??200,statusText:i?.statusMessage,headers:a||{},body:s,html:L(o),text:o};if(this.opts?.debug&&i?.timings){const t=i.timings;c.metadata={timings:{start:t.start,total:t.phases?.total,ttfb:t.phases?.firstByte,dns:t.phases?.dns,tcp:t.phases?.tcp,download:t.phases?.download}}}return c}async _querySelectorAll(t,e){if(Array.isArray(t)){if(0===t.length)return[];const{$:i}=t[0],s=t.map(t=>t.el[0]).filter(Boolean),n=i(s);return n.find(e).add(n.filter(e)).toArray().map(t=>({$:i,el:i(t)}))}const{$:i,el:s}=t;return s.find(e).toArray().map(t=>({$:i,el:i(t)}))}async _nextSiblingsUntil(t,e){const{$:i,el:s}=t;return(e?s.nextUntil(e):s.nextAll()).toArray().map(t=>({$:i,el:i(t)}))}async _parentElement(t){const{$:e,el:i}=t,s=i.parent();return 0===s.length?null:{$:e,el:s}}async _isSameElement(t,e){return t.el[0]===e.el[0]}async _extractValue(t,e){const{$:i,el:s}=e,{attribute:n,type:r="string",mode:o="text"}=t;if(0===s.length)return null;let a="";if(n?a=s.attr(n)??null:"html"===r||"html"===o||"outerHTML"===o?(a="outerHTML"===o?i.html(s):s.html()??("html"===r?"":null),a&&(a=L(a.trim()))):a="innerText"===o?function(t){const e=t.clone();e.find("br").replaceWith(T),e.find("p").before(A).after(A),e.find("div, h1, h2, h3, h4, h5, h6, li, ul, ol, tr, dl, dt, dd, blockquote, pre, form, table, article, section, header, footer, nav, main, aside").before(j).after(j);let i=e.text();return i=i.replace(O," "),i=i.replace(U,"$1"),i=i.replace(N,t=>t.includes(A)?A:j),i=i.replaceAll(T,"\n"),i=i.replaceAll(A,"\n\n"),i=i.replaceAll(j,"\n"),i.trim()}(s):s.text().trim(),null===a)return null;switch(r){case"number":return parseFloat(a.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=a.toLowerCase();return"true"===t||"1"===t;default:return a}}async executeAction(t,e){const{$:i}=t;switch(e.type){case"dispose":return;case"extract":if(!i)throw new C(`Cheerio context not available for action: ${e.type}`,"extract");return this._extract(e.schema,{$:i,el:i.root()});case"click":{if(!i)throw new C(`Cheerio context not available for action: ${e.type}`,"click");const s=e.selector,n=i(s).first();let r;if(0===n.length)try{r=new URL(s,t.request.loadedUrl||t.request.url).href}catch{throw new C(`click: selector not found or invalid URL: ${s}`,"click")}else{if(!n.is("a")||!n.attr("href")){if(n.is('input[type="submit"], button[type="submit"], button, input')){const e=n.closest("form");if(e.length)return this.executeAction(t,{type:"submit",selector:e});throw new C("click: submit-like element without form","click")}throw new C(`click: unsupported element for http simulate. Selector: ${s}`,"click")}{const e=n.attr("href");r=new URL(e,t.request.loadedUrl||t.request.url).href}}const o=await t.sendRequest({url:r});return void await this._updateStateAfterNavigation(t,o)}case"fill":{if(!i)throw new C(`Cheerio context not available for action: ${e.type}`),"fill";const s=i(e.selector).first();if(0===s.length)throw new C(`fill: selector not found: ${e.selector}`);if(!s.is("input, textarea, select"))throw new C(`fill: not a form field: ${e.selector}`);return s.val(e.value),void(this.lastResponse=await this.buildResponse(t))}case"waitFor":return void(e.options?.ms&&await new Promise(t=>setTimeout(t,e.options.ms)));case"pause":const s=this.ctx?.onPause;return void(s?(console.info(e.message||"Execution paused for manual intervention."),await s({message:e.message}),console.info("Resuming execution...")):console.warn("[PauseAction] was called, but no `onPause` handler was provided in fetchWeb options. Skipped."));case"submit":{if(!i)throw new C(`Cheerio context not available for action: ${e.type}`,"submit");const s="string"==typeof e.selector?i(e.selector).first():null!=e.selector?e.selector:i("form").first();if(0===s.length)throw new P(e.selector,"submit");const n=s.attr("action")||t.request.loadedUrl||t.request.url,r=(s.attr("method")||"GET").toUpperCase(),o=new URL(n,t.request.loadedUrl||t.request.url).href,a={};let c;if(s.find("input, select, textarea").each((t,e)=>{const s=i(e),n=s.attr("name");if(!n)return;const r=s.val();null!=r&&(a[n]=String(r))}),"GET"===r){const e=new URL(o);Object.entries(a).forEach(([t,i])=>e.searchParams.set(t,i)),c=await t.sendRequest({url:e.href,method:"GET"})}else{let i;const n={};"application/json"===(e.options?.enctype||s.attr("enctype")||"application/x-www-form-urlencoded")?(i=JSON.stringify(a),n["Content-Type"]="application/json"):(i=new URLSearchParams(a).toString(),n["Content-Type"]="application/x-www-form-urlencoded"),c=await t.sendRequest({url:o,method:"POST",body:i,headers:n})}return void await this._updateStateAfterNavigation(t,c)}case"getContent":return this.buildResponse(t);default:throw new C(`Unknown action type: ${e.type}`,"CheerioFetchEngine.executeAction",R.NotSupported)}}async _updateStateAfterNavigation(t,e){const i=e;t.response=i,t.body=i.body,t.$=void 0,i.url&&(t.request.loadedUrl=i.url),this.lastResponse=await this.buildResponse(t)}_createCrawler(t,e){return new _(t,e)}_getSpecificCrawlerOptions(t){return{additionalMimeTypes:["text/plain"],maxRequestRetries:1,requestHandlerTimeoutSecs:t.requestHandlerTimeoutSecs,proxyConfiguration:this.proxyConfiguration,preNavigationHooks:[({session:e,request:i},s)=>{s.throwHttpErrors=t.throwHttpErrors,this.opts?.timeoutMs&&(s.timeout={request:this.opts.timeoutMs})}]}}async goto(t,e){this.isPageActive&&this.dispatchAction({type:"dispose"}).catch(()=>{});const i="req-"+ ++this.requestCounter,s=new Promise((t,s)=>{const n=e?.timeoutMs||this.opts?.timeoutMs||3e4,r=setTimeout(()=>{this.pendingRequests.delete(i),this.navigationLock.release(),s(new C(`goto timed out after ${n}ms.`,"gotoTimeout",R.RequestTimeout))},n);this.pendingRequests.set(i,{resolve:e=>{clearTimeout(r),t(e)},reject:t=>{clearTimeout(r),s(t)}})});return this.requestQueue.addRequest({...e,url:t,headers:{...this.hdrs,...e?.headers},userData:{requestId:i},uniqueKey:`${t}-${i}`}).catch(t=>{const e=this.pendingRequests.get(i);e&&(this.pendingRequests.delete(i),this.navigationLock.release(),e.reject(t))}),await this.navigationLock,this.navigationLock=v(),s}};M.id="cheerio",M.mode="http",E.register(M);import{PlaywrightCrawler as B}from"crawlee";import{firefox as I}from"playwright";import{CommonError as z,ErrorCode as D,NotFoundError as J}from"@isdk/common-error";var G=class extends E{async _buildResponse(t){const{page:e,response:i,request:s,session:n}=t;if(!e||e.isClosed())return{url:s.url,finalUrl:s.loadedUrl||s.url,statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:"",html:"",text:""};const r=await e.content(),o=await e.textContent("body"),a=await e.context().cookies();n&&n.setCookies(a,s.url);const c={url:e.url(),finalUrl:e.url(),statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:r,html:r,text:o||""};if(this.opts?.debug&&i){const t="function"==typeof i.request?i.request():i.request;if(t&&"function"==typeof t.timing){const e=t.timing();c.metadata={timings:{start:e.startTime,total:e.responseEnd-e.startTime,ttfb:e.responseStart-e.requestStart,dns:e.domainLookupEnd-e.domainLookupStart,tcp:e.connectEnd-e.connectStart,download:e.responseEnd-e.responseStart}}}}return!1!==this.opts?.output?.cookies&&(c.cookies=a),c}async _querySelectorAll(t,e){if(Array.isArray(t)){const i=[];for(const s of t){const t=await s.locator(e).all();i.push(...t);await s.evaluate((t,e)=>t.matches(e),e)&&i.push(s)}return i}return t.locator(e).all()}async _nextSiblingsUntil(t,e){const i=await t.locator("xpath=following-sibling::*").all();if(!e)return i;const s=[];for(const t of i){if(await t.evaluate((t,e)=>t.matches(e),e))break;s.push(t)}return s}async _parentElement(t){const e=t.locator("xpath=..");return 0===await e.count()?null:e.first()}async _isSameElement(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return!1;const n=await i.evaluate((t,e)=>t===e,s);return await i.dispose(),await s.dispose(),n}async _extractValue(t,e){const{attribute:i,type:s="string",mode:n="text"}=t;if(0===await e.count())return null;let r="";if(i?r=await e.getAttribute(i):"html"===s||"html"===n||"outerHTML"===n?(r="outerHTML"===n?await e.evaluate(t=>t.outerHTML):await e.innerHTML(),r&&(r=L(r))):r="innerText"===n?await e.innerText():await e.textContent(),null===r)return null;switch(r=r.trim(),s){case"number":return parseFloat(r.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=r.toLowerCase();return"true"===t||"1"===t;default:return r}}async executeAction(t,e){const{page:i}=t,s=this.opts?.timeoutMs||3e4;switch(e.type){case"navigate":{const s=await i.goto(e.url,{waitUntil:e.opts?.waitUntil||"domcontentloaded",timeout:this.opts?.timeoutMs||3e4});s&&(t={...t,response:s});const n=await this.buildResponse(t);return this.lastResponse=n,n}case"extract":{const s=await this._extract(e.schema,i.locator("body"));return this.lastResponse=await this.buildResponse(t),s}case"click":{await i.click(e.selector,{timeout:s}),await i.waitForLoadState("networkidle",{timeout:s});const n=await this.buildResponse(t);return void(this.lastResponse=n)}case"fill":await i.fill(e.selector,e.value,{timeout:s});const n=await this.buildResponse(t);return void(this.lastResponse=n);case"waitFor":try{e.options?.selector&&await i.waitForSelector(e.options.selector,{timeout:s}),e.options?.networkIdle&&await i.waitForLoadState("networkidle",{timeout:s})}catch(t){if(!1!==e.options?.failOnTimeout)throw t}return void(e.options?.ms&&await i.waitForTimeout(e.options.ms));case"submit":{const n=e.selector||"form",r=i.locator(n).first();if(0===await r.count())throw new J(n,"submit");if("application/json"===(e.options?.enctype||"application/x-www-form-urlencoded")){const t=await r.elementHandle();if(!t)throw new z(`submit: could not get form handle for ${n}`,"submit");const e=await t.evaluate(async t=>{const e=new FormData(t),i={};e.forEach((t,e)=>{i[e]=t.toString()});const s=await fetch(t.action,{method:t.method,headers:{"Content-Type":"application/json"},body:JSON.stringify(i)}),n=await s.text();return{status:s.status,statusText:s.statusText,headers:Object.fromEntries(s.headers.entries()),body:n,html:n,text:n,url:t.action,finalUrl:s.url}});return await t.dispose(),await i.setContent(e.html),void(this.lastResponse=e)}return await r.evaluate(t=>t.submit()),await i.waitForLoadState("networkidle",{timeout:s}),void(this.lastResponse=await this.buildResponse(t))}case"pause":{const t=this.ctx?.onPause;return void(t?(console.info(e.message||"Execution paused for manual intervention."),await t({message:e.message}),console.info("Resuming execution...")):console.warn("[PauseAction] was called, but no `onPause` handler was provided in fetchWeb options. Skipped."))}case"getContent":return this.buildResponse(t);default:throw new z(`Unknown action type: ${e.type}`,"PlaywrightFetchEngine.executeAction",D.NotSupported)}}_createCrawler(t,e){return new B(t,e)}async _getSpecificCrawlerOptions(t){const e=t.browser?.headless??!0,i={maxRequestRetries:t.retries||3,headless:e,proxyConfiguration:this.proxyConfiguration,requestHandlerTimeoutSecs:t.requestHandlerTimeoutSecs,preNavigationHooks:[async({page:e,request:i},s)=>{s.throwHttpErrors=t.throwHttpErrors;const n=this.blockedTypes;n.size>0&&await e.route("**/*",t=>{n.has(t.request().resourceType())?t.abort():t.continue()})}]};if(this.opts?.antibot){i.browserPoolOptions={useFingerprints:!1};const{launchOptions:t}=await import("camoufox-js"),s=await t({headless:e});i.launchContext={launcher:I,launchOptions:s},i.postNavigationHooks=[async({page:t,handleCloudflareChallenge:e})=>{await e()}]}return i}async goto(t,e){if(this.isPageActive)return this.dispatchAction({type:"navigate",url:t,opts:e});if(!this.requestQueue)throw new z("RequestQueue not initialized","goto");const i="req-"+ ++this.requestCounter,s=new Promise((t,e)=>{this.pendingRequests.set(i,{resolve:t,reject:e})});return await this.requestQueue.addRequest({url:t,headers:this.hdrs,userData:{requestId:i,waitUntil:e?.waitUntil||"domcontentloaded"},uniqueKey:`${t}-${i}`}),s}};G.id="playwright",G.mode="browser",E.register(G);var K=class extends n{async onExecute(t,e){const{selector:i,...s}=e?.params||{};if(!i)throw new Error("Selector is required for click action");await this.delegateToEngine(t,"click",i,s)}};K.id="click",K.returnType="none",K.capabilities={http:"simulate",browser:"native"},n.register(K);var V=class extends n{async onExecute(t,e){const{selector:i,value:s,...n}=e?.params||{};if(!i)throw new Error("Selector is required for fill action");if(void 0===s)throw new Error("Value is required for fill action");await this.delegateToEngine(t,"fill",i,s,n)}};V.id="fill",V.returnType="none",V.capabilities={http:"simulate",browser:"native"},n.register(V);var Q=class extends n{async onExecute(t,e){return await this.delegateToEngine(t,"getContent",e?.params)}};Q.id="getContent",Q.returnType="response",Q.capabilities={http:"native",browser:"native"},n.register(Q);var W=class extends n{async onExecute(t,e,i){const s=e?.params,n=s?.url||t.url;if(!n)throw new Error("URL is required for goto action");const r=t.internal.engine;if(!r)throw new Error("No engine available");t.url=n;return await r.goto(n,s)}};W.id="goto",W.returnType="response",W.capabilities={http:"native",browser:"native"},n.register(W);var Z=class extends n{async onExecute(t,e){const{selector:i,...s}=e?.params||{};await this.delegateToEngine(t,"submit",i,s)}};Z.id="submit",Z.returnType="none",Z.capabilities={http:"simulate",browser:"native"},n.register(Z);var X=class extends n{async onExecute(t,e){const i=t.internal.engine;if(!i)throw new Error("No engine available");await i.waitFor(e?.params)}};X.id="waitFor",X.returnType="none",X.capabilities={http:"native",browser:"native"},n.register(X);var Y=class extends n{async onExecute(t,e){const i=e?.params;if(!i)throw new Error("Schema is required for extract action");return this.delegateToEngine(t,"extract",i)}};Y.id="extract",Y.returnType="any",Y.capabilities={http:"native",browser:"native"},n.register(Y);var tt=class extends n{async onExecute(t,e){const{selector:i,message:s,attribute:n}=e?.params||{},r=t.internal.engine;if("browser"===r?.mode){if(i){if(!await(r?.extract({selector:i,attribute:n})))return}r&&"pause"in r?await r.pause(s):console.warn("[PauseAction] was called, but the current engine does not support `pause`. Skipped.")}else console.warn("[PauseAction] can only run in browser engine. Skipped.")}};async function et(t,e){return(new k).fetch(t,e)}tt.id="pause",tt.capabilities={http:"native",browser:"native"},tt.returnType="none",n.register(tt);export{M as CheerioFetchEngine,K as ClickAction,t as DefaultFetcherProperties,Y as ExtractAction,n as FetchAction,i as FetchActionResultStatus,E as FetchEngine,$ as FetchSession,e as FetcherOptionKeys,V as FillAction,Q as GetContentAction,W as GotoAction,tt as PauseAction,G as PlaywrightFetchEngine,Z as SubmitAction,X as WaitForAction,k as WebFetcher,et as fetchWeb};
|
|
1
|
+
var t={engine:"auto",enableSmart:!0,useSiteRegistry:!0,antibot:!1,debug:!1,headers:{},cookies:[],throwHttpErrors:void 0,output:{cookies:!0,sessionState:!0},proxy:[],blockResources:[],storage:{purge:!0},ignoreSslErrors:!0,browser:{engine:"playwright",headless:!0,waitUntil:"domcontentloaded"},http:{method:"GET"},timeoutMs:6e4,requestHandlerTimeoutSecs:void 0,maxConcurrency:1,maxRequestsPerMinute:1e3,delayBetweenRequestsMs:0,retries:0,sites:[]},e=Object.keys(t).concat(["actions","onPause"]),i=(t=>(t[t.Failed=0]="Failed",t[t.Success=1]="Success",t[t.Skipped=2]="Skipped",t))(i||{}),s=class t{static register(t){const e=t.id;if(!e)throw new Error("FetchAction.register: actionClass.id is required");this.registry.set(e,t)}static get(t){return this.registry.get(t)}static create(e){const i="string"==typeof e?e:e.id||e.name||e.action;if(!i)throw new Error("Action must have id, name or action");const s=i instanceof t?i.constructor:this.registry.get(i);return s?new s:void 0}static has(t){return this.registry.has(t)}static list(){return Array.from(this.registry.keys())}static getCapability(t){return this.capabilities[t]??"noop"}getCapability(t){return this.constructor.getCapability(t)}get id(){return this.constructor.id}get returnType(){return this.constructor.returnType}get capabilities(){return this.constructor.capabilities}async delegateToEngine(t,e,...i){const s=t.internal.engine;if(!s)throw new Error("No engine available");if("function"!=typeof s[e])throw new Error(`Engine does not have a method named '${String(e)}'`);return await s[e](...i)}installCollectors(e,i){const s=i?.collectors;if(!s?.length)return;const n=[],c=new Set;for(const i of s){const s=r(i.activateOn),l=r(i.collectOn),u=r(i.deactivateOn),h=!(i.background??!0),f=t.create(i);if(!f)continue;let d=!1,w=!1,p=0;const y=async t=>{if(!d&&!w){d=!0;try{await(f.onBeforeExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:f.id,phase:"before",error:t})}}},m=async(t,s)=>{if(!w){d||await y(s);try{const n=Promise.resolve(f.onExecute?.(e,i,s)).then(s=>{var n,r;if(i.storeAs){((n=e.outputs)[r=i.storeAs]||(n[r]=[])).push(s)}return e.eventBus.emit("collector:result",{action:this.id,collector:i.id||i.name,event:t,result:s}),s}).catch(s=>{e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,event:t,phase:"exec",error:s})}).finally(()=>{p++});h&&(c.add(n),n.finally(()=>c.delete(n)))}catch(i){e.eventBus.emit("collector:error",{action:this.id,collector:f.id,event:t,phase:"exec",error:i})}}},g=async()=>{if(!w){0===p&&m("collector:after"),w=!0;try{await(f.onAfterExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,phase:"after",error:t})}finally{e.eventBus.emit("collector:end",{action:this.id,collector:i.id||i.name}),b.forEach(t=>t())}}},x=o(e,s,y),b=a(e,l,m),v=o(e,u,g);if(n.push(...x,...b,...v),!s.length&&!l.length&&!u.length){const t=()=>{g()};e.eventBus.once(`action:${this.id}.end`,t),n.push(()=>e.eventBus.off("fetcher:action:end",t))}}return n.length||c.size>0?{cleanup:()=>n.forEach(t=>t()),awaitExecPendings:async()=>{c.size>0&&await Promise.allSettled(Array.from(c))}}:void 0}async beforeExec(t,e){t.internal.actionStack||(t.internal.actionStack=[]);const i=t.internal.actionStack,s=i.length,n=i.length>0?i[i.length-1].id:void 0,r={...e,id:this.id,depth:s,parent:n};i.push(r),t.currentAction=r;const o={action:this,context:t,options:e,index:e?.index,depth:s,stack:[...i]};t.eventBus.emit(`action:${this.id}.start`,o),t.eventBus.emit("action:start",o),await(this.onBeforeExec?.(t,e));return{entry:o,collectors:this.installCollectors(t,e)}}async afterExec(t,e,i,s){const n=t.internal.actionStack,r=n.length-1,o=s?.collectors;try{if(await(o?.awaitExecPendings()),t.lastResult=i,"response"!==i?.returnType||i.error||(t.lastResponse=i.result),e?.storeAs){const s=t.outputs[e.storeAs],n=i?.result;"object"!=typeof s||null===s||"object"!=typeof n||null===n||Array.isArray(s)||Array.isArray(n)?t.outputs[e.storeAs]=n:t.outputs[e.storeAs]={...s,...n}}i?.error&&(t.currentAction.error=i.error),await(this.onAfterExec?.(t,e));const s={action:this,context:t,options:e,result:i,depth:r,stack:[...n]};i?.error&&(s.error=i.error);try{t.eventBus.emit(`action:${this.id}.end`,s)}catch(t){}try{t.eventBus.emit("action:end",s)}catch(t){}}finally{try{o?.cleanup()}finally{n.pop();const e=n.length;t.currentAction=e>0?n[e-1]:void 0}}}async execute(t,e){e?.args&&!e.params&&(e.params=e.args);const i=await this.beforeExec(t,e),s=e?.failOnError??!0;let n;try{return t.throwHttpErrors=s,n=await this.onExecute(t,e),n&&n.returnType||(n={status:1,returnType:this.returnType??"any",result:n}),n}catch(e){if(n={status:0,error:e,meta:{id:this.id,engineType:t.engine,capability:this.getCapability(t.engine)}},s)throw e;return n}finally{await this.afterExec(t,e,n,i)}}};s.registry=new Map,s.returnType="any",s.capabilities={http:"noop",browser:"noop"};var n=s;function r(t){return t?Array.isArray(t)?t:[t]:[]}function o(t,e,i){const s=[];for(const n of e)if("string"==typeof n||n instanceof RegExp){const e=(...t)=>{i(t[0])};t.eventBus.once(n,e),s.push(()=>t.eventBus.off(n,e))}return s}function a(t,e,i){const s=[];for(const n of e)if("string"==typeof n||n instanceof RegExp){const e=t=>i(n,t);t.eventBus.on(n,e),s.push(()=>t.eventBus.off(n,e))}return s}import{EventEmitter as c}from"events-ex";import{defaultsDeep as l}from"lodash-es";import{customAlphabet as u}from"nanoid";var h=u("0123456789abcdefghijklmnopqrstuvwxyz",12);function f(t,e,...i){if(!t)return;const{prefix:s,id:n,category:r}=e;if(!0===t||t===r||Array.isArray(t)&&t.includes(r)){const t=n?`:${n}`:"";console.log(`[${s}${t}:${r}]`,...i)}}import{defaultsDeep as d,merge as w}from"lodash-es";import{EventEmitter as p}from"events-ex";import{CommonError as y}from"@isdk/common-error";import{Configuration as m,KeyValueStore as g,PERSIST_STATE_KEY as x,RequestQueue as b,ProxyConfiguration as v}from"crawlee";import{CommonError as $}from"@isdk/common-error";var _=new Set(["string","number","boolean","html","object","array"]),k=new Set(["selector","has","exclude","required","strict","relativeTo","order","anchor","depth"]);function E(t){if("string"==typeof t)return{type:"string",selector:t,mode:"text"};if(!t||"object"!=typeof t)return{type:"string",mode:"text"};let e={...t};if(function(t){if(!t||"object"!=typeof t)return!1;if(Array.isArray(t))return!1;if("type"in t)return"string"!=typeof t.type||!_.has(t.type);const e=Object.keys(t);if(0===e.length)return!1;for(const t of e)if(!["selector","attribute","has","exclude","mode","required","strict","relativeTo","order","anchor","depth"].includes(t))return!0;return!1}(e)){const t={};for(const i of Object.keys(e))k.has(i)||(t[i]=E(e[i]),delete e[i]);e.type="object",e.properties=t}if(e.type||(e.type="string"),"object"===e.type){const t=e;t.properties||(t.properties={});for(const e in t.properties)t.properties[e]=E(t.properties[e]);delete t.mode,delete t.items,delete t.attribute}else if("array"===e.type){const t=e;t.attribute&&!t.items&&(t.items={type:"string",attribute:t.attribute,mode:"text"},delete t.attribute),t.items||(t.items={type:"string",mode:"text"}),t.items=E(t.items),"string"==typeof t.mode&&(t.mode={type:t.mode})}else{const t=e;t.mode||("html"===t.type?t.mode="html":t.mode="text")}if(e.selector&&(e.has||e.exclude)){const{selector:t,has:i,exclude:s}=e,n=t.split(",").map(t=>t.trim()).map(t=>{let e=t;return i&&(e+=`:has(${i})`),s&&(e+=`:not(${s})`),e});e.selector=n.join(", "),delete e.has,delete e.exclude}return e}async function C(t,e,i){const s=E(t);return S.call(this,s,e,i)}async function S(t,e,i){const s=t.type,n=t.selector,r=t.strict??i;if(!e)return this._logDebug("extract",`_extract: No scope for selector "${n||""}", type "${s||"value"}"`),"array"===s?[]:null;switch(s){case"object":return q.call(this,t,e,r);case"array":return R.call(this,t,e,r);default:return T.call(this,t,e,r)}}async function q(t,e,i){const{selector:s,properties:n,strict:r}=t,o=r??i,a=t._skipSelector;let c=e;if(s&&!a){const t=await this._querySelectorAll(e,s);c=t.length>0?t[0]:null,this._logDebug("extract",`_extractObject: selector "${s}" found ${t.length} elements`)}if(!c){if(this._logDebug("extract",`_extractObject: scope not found for selector "${s||""}"`),o&&t.required)throw new $(`Required object "${s||""}" is missing.`,"extract");return null}let l=t.depth??0;const u=l;for(;;){const{result:i,hasValue:r,missingRequired:h}=await A.call(this,t,c,o);if(0===h.length)return!s&&!r&&Object.keys(n).length>0?(this._logDebug("extract","_extractObject result: null"),null):(this._logDebug("extract","_extractObject result:",i),i);let f=!1;if(l>0)if(a)f=!0;else{const t=await this._isSameElement(c,e),i=await this._contains(e,c);f=!t&&i}if(f){const t=await this._parentElement(c);if(t){let i=!0;if(a||(i=await this._isSameElement(e,t)||await this._contains(e,t)),i){this._logDebug("extract",`_extractObject: missing required fields [${h.join(", ")}], bubbling up from depth ${u-l} to ${u-l+1}`),c=t,l--;continue}}}if(o)throw new $(`Required property "${h[0]}" is missing.`,"extract");return null}}async function A(t,e,i){const{properties:s,relativeTo:n,order:r}=t,o={},a=[];let c=!1;const l=r||Object.keys(s);let u=e;const h=new Map,f="previous"===n;for(const t of l){const n=s[t];if(!n)continue;this._logDebug("extract",`_extractObject: extracting property "${t}"`);let r,l=u;if(n.anchor){const r=await U.call(this,n.anchor,s,h,e,f,n.depth);if(!r){if(i)throw new $(`Anchor "${n.anchor}" not found or out of scope.`,"extract");o[t]=null,n.required&&a.push(t);continue}l=r.scopeForField,f&&(u=l)}let d=null;const w=n.selector,p="array"===n.type;if(w){let t=await this._querySelectorAll(l,w);if(t.length>0){void 0!==n.depth&&"object"!==n.type&&(t=await Promise.all(t.map(t=>N.call(this,t,l,n.depth)))),d=t[0];const e={...n,_skipSelector:!0};if(r=await S.call(this,e,p?t:d,i),f&&!n.anchor){const e=p&&Array.isArray(r)?t[t.length-1]:d;u=await F.call(this,e,u)}p&&(d=t[t.length-1])}else r=null}else r=await S.call(this,n,l,i),null!==r&&(d=Array.isArray(l)?l[0]:l);d&&h.set(t,d),null===r&&n.required&&a.push(t),null!==r&&(c=!0),o[t]=r}return{result:o,hasValue:c,missingRequired:a}}async function R(t,e,i){const{selector:s,items:n,mode:r,strict:o}=t,a=o??i,c=t._skipSelector;let l=s&&!c?await this._querySelectorAll(e,s):Array.isArray(e)?e:[e];s&&!c&&void 0!==t.depth&&(l=await Promise.all(l.map(i=>N.call(this,i,e,t.depth)))),this._logDebug("extract",`_extractArray: selector "${s||""}" found ${l.length} elements`);const u=j.call(this,r);if(void 0!==a&&void 0===u.strict&&(u.strict=a),(!r||"columnar"===u.type)&&1===l.length&&n){this._logDebug("extract","_extractArray: trying columnar extraction");const t=await M.call(this,n,l[0],u);if(t)return t}if("segmented"===u.type&&n){this._logDebug("extract",`_extractArray: trying segmented extraction for ${l.length} containers`);const t=[];let e=!1;for(const i of l){const s=await O.call(this,n,i,u);s&&(e=!0,t.push(...s))}if(e)return t}return this._logDebug("extract",`_extractArray: using nested extraction for ${l.length} elements`),P.call(this,n,l,{strict:u.strict})}async function T(t,e,i){const{selector:s}=t,n=t._skipSelector,r=t.strict??i;let o=e;if(s&&!n){const i=await this._querySelectorAll(e,s);o=i.length>0?i[0]:null,o&&void 0!==t.depth&&(o=await N.call(this,o,e,t.depth)),this._logDebug("extract",`_extractValue: selector "${s}" found ${i.length} elements`)}else Array.isArray(e)&&(o=e.length>0?e[0]:null);if(!o){if(this._logDebug("extract",`_extractValue: element not found for selector "${s||""}"`),r&&t.required)throw new $(`Required value "${s||""}" is missing.`,"extract");return null}const a=await this._extractValue(t,o);return this._logDebug("extract",`_extractValue: extracted for selector "${s||""}":`,a),a}function j(t){return t?"string"==typeof t?{type:t}:t:{type:"nested"}}async function P(t,e,i){const s=[],n=t.required,r=!0===i?.strict,o="object"===t.type||"array"===t.type;for(const i of e){const e=await S.call(this,t,i,r);if(null!==e)s.push(e);else{if(n&&r)throw new $("Required item is missing in array.","extract");n||o||s.push(null)}}return s}async function M(t,e,i){const s="object"===t.type,n=!0===i?.strict,r=!0===i?.inference;if(s){const i=t.properties,s=Object.keys(i);if(0===s.length)return null;const o={},a={};let c=null,l=0,u=[];for(const t of s){const s=i[t];if("array"===s.type||"object"===s.type)return this._logDebug("extract",`_extractColumnar: field "${t}" has nested structure, columnar not supported`),null;const h=s;let f=[];f=h.selector?await this._querySelectorAll(e,h.selector):[e],a[t]=f;const d=f.length;if(this._logDebug("extract",`_extractColumnar: field "${t}" with selector "${h.selector||""}" found ${d} matches`),d>l&&(l=d,u=f),h.selector)if(null===c)c=d,this._logDebug("extract",`_extractColumnar: set commonCount to ${c}`);else if(c!==d)if(this._logDebug("extract",`_extractColumnar: count mismatch for field "${t}": ${d} vs ${c}`),r&&l>1)c=-1,this._logDebug("extract","_extractColumnar: mismatch marked for inference");else if(n){if(!(1===d&&await this._isSameElement(f[0],e))){if(s.required&&d<c)throw new $(`Required field "${t}" is missing at index ${d}.`,"extract");throw new $(`Columnar extraction mismatch: field "${t}" has ${d} matches, but expected ${c}.`,"extract")}}const w=await Promise.all(f.map(t=>this._extractValue(h,t)));this._logDebug("extract",`_extractColumnar: field "${t}" values:`,w),o[t]=w}if(r&&-1===c&&l>1&&u.length>0){const i=[];for(const t of u){const s=await this._findContainerChild(t,e);s&&i.push(s)}const s=[];for(const t of i){await this._findClosestAncestor(t,s)||s.push(t)}if(s.length>1)return P.call(this,t,s,{strict:n})}if(l<=1)return null;if(-1===c&&n)return null;const h=n&&-1!==c?c:l,f={};if(h>1)for(const t of s){if(1===o[t].length){(!i[t].selector||await this._isSameElement(a[t][0],e))&&(f[t]=!0)}}const d=[];for(let t=0;t<h;t++){const e={};let r=!1;for(const a of s){const s=o[a],c=i[a];let l=s[t];if(f[a]&&(l=s[0]),void 0===l&&(l=null),null===l&&c.required){if(this._logDebug("extract",`_extractColumnar: skipping row ${t} because required field "${a}" is null`),n)throw new $(`Required field "${a}" is missing at index ${t}.`,"extract");r=!0;break}e[a]=l}r||d.push(e)}return d}{const i=t;if(!i.selector)return null;const s=await this._querySelectorAll(e,i.selector);if(s.length<=1)return null;const n=await Promise.all(s.map(t=>this._extractValue(i,t)));return i.required?n.filter(t=>null!==t):n}}async function O(t,e,i){if(!("object"===t.type))return null;const s=t.properties,n=Object.keys(s);if(0===n.length)return null;let r;if(i?.anchor)r=s[i.anchor]?.selector||i.anchor;else for(const t of n)if(s[t].selector){r=s[t].selector;break}if(!r)return this._logDebug("extract","_extractSegmented: no anchor selector found, falling back to nested"),null;const o=await this._querySelectorAll(e,r);if(this._logDebug("extract",`_extractSegmented: anchor selector "${r}" found ${o.length} elements`),0===o.length){if(i?.strict)throw new $(`Segmented extraction failed: no elements found for anchor selector "${r}".`,"extract");return[]}const a=[];for(let s=0;s<o.length;s++){const n=o[s],c=s>0?o[s-1]:null,l=s<o.length-1?o[s+1]:null;let u,h=n,f=null;if(c&&(f=await this._findCommonAncestor(n,c)),!f&&l)f=await this._findCommonAncestor(n,l);else if(f&&l){const t=await this._findCommonAncestor(n,l);t&&await this._contains(f,t)&&(f=t)}if(f){const t=await N.call(this,n,f,i?.depth);t&&!await this._isSameElement(t,n)&&(h=t)}else{const t=await N.call(this,n,e,i?.depth);t&&(h=t)}if(await this._isSameElement(h,n)){u=[n,...await this._nextSiblingsUntil(n,r)],this._logDebug("extract",`_extractSegmented: segment ${s} (flat) created with ${u.length} elements`)}else u=h,this._logDebug("extract",`_extractSegmented: segment ${s} (nested) identified as container element`);const d={...t};i?.relativeTo&&!d.relativeTo&&(d.relativeTo=i.relativeTo);const w=await S.call(this,d,u,i?.strict),p=t.required,y="object"===t.type||"array"===t.type;if(null!==w)a.push(w);else{if(p&&i?.strict)throw new $("Required item is missing in array.","extract");p||y||a.push(null)}}return a}async function N(t,e,i){const s=Array.isArray(e),n=s?e:[e],r=s?await this._findClosestAncestor(t,n):await this._findContainerChild(t,e);if(void 0===i||!r)return r;let o=t;for(let t=0;t<i&&!await this._isSameElement(o,r);t++){const t=await this._parentElement(o);if(!t||!await this._contains(r,t))break;o=t}return o}async function U(t,e,i,s,n,r){let o=null;if(e.hasOwnProperty(t))o=i.get(t)||null;else{const e=await this._querySelectorAll(s,t);e.length>0&&(o=e[0])}if(o){const t=[];let e=o,i=0;const n=void 0!==r?r:1e3;for(;e&&i<=n;){const n=await this._nextSiblingsUntil(e);t.push(...n);const r=await this._parentElement(e);if(!r)break;if(Array.isArray(s)?null!==await this._findClosestAncestor(r,s):await this._isSameElement(r,s))break;e=r,i++}if(t.length>0||void 0!==r)return{scopeForField:t}}return null}async function F(t,e){const i=await N.call(this,t,e);if(i){if(!Array.isArray(e))return this._nextSiblingsUntil(i);{let t=e.indexOf(i);if(-1===t)for(let s=0;s<e.length;s++)if(await this._isSameElement(e[s],i)){t=s;break}if(-1!==t)return e.slice(t+1)}}return Array.isArray(e)?e:[e]}function H(){let t=()=>{};const e=new Promise(e=>{t=e});return e.release=t,e}m.getGlobalConfig().set("persistStorage",!1);var L={scripts:["script"],styles:["style",'link[rel="stylesheet"]'],svgs:["svg"],images:["img","picture","canvas"],hidden:["[hidden]",'[style*="display:none"]','[style*="display: none"]']},B=class{constructor(){this.hdrs={},this._initializedSessions=new Set,this.pendingRequests=new Map,this.requestCounter=0,this.actionEmitter=new p,this.isPageActive=!1,this.isEngineDisposed=!1,this.navigationLock=function(){const t=H();return t.release(),t}(),this.isExecutingAction=!1,this.actionQueue=[],this.isProcessingActionLoop=!1,this.blockedTypes=new Set}static register(t){const e=t.id;if(!e)throw new Error("Engine must define static id");if(this.registry.has(e))throw new Error(`Engine id duplicated: ${e}`);this.registry.set(e,t)}static get(t){return this.registry.get(t)}static getByMode(t){for(const[e,i]of this.registry.entries())if(i.mode===t)return i}static async create(e,i){const s=d(i,e,t),n=s.engine??e.engine,r=n?this.get(n)??this.getByMode(n):null;if(r){const t=new r;return await t.initialize(e,s),t}}_logDebug(t,...e){f(this.opts?.debug,{prefix:"FetchEngine",id:this.id,category:t},...e)}_getTrimInfo(t){let{selectors:e=[],presets:i=[]}=t;"string"==typeof e&&(e=[e]),"string"==typeof i&&(i=[i]);const s=i.includes("all"),n=[...e];for(const[t,e]of Object.entries(L))(s||i.includes(t))&&n.push(...e);return{selectors:n,removeComments:s||i.includes("comments"),removeHidden:s||i.includes("hidden")}}async _extract(t,e,i){return S.call(this,t,e,i)}_normalizeArrayMode(t){return j.call(this,t)}async _extractNested(t,e,i){return P.call(this,t,e,i)}async _extractColumnar(t,e,i){return M.call(this,t,e,i)}async _extractSegmented(t,e,i){return O.call(this,t,e,i)}async buildResponse(t){const e=await this._buildResponse(t),i=e.headers["content-type"]||"";return e.contentType=i.split(";")[0].trim(),!1!==this.opts?.output?.cookies?!e.cookies&&t.session&&(e.cookies=t.session.getCookies(t.request.url)):delete e.cookies,!1!==this.opts?.output?.sessionState?this.crawler?.sessionPool&&(e.sessionState=await this.crawler.sessionPool.getState()):delete e.sessionState,this.opts?.debug&&(e.metadata={...e.metadata,mode:this.mode,engine:this.id,proxy:t.proxyInfo?.url||("string"==typeof this.opts.proxy?this.opts.proxy:Array.isArray(this.opts.proxy)?this.opts.proxy[0]:void 0)}),e}waitFor(t){return this.dispatchAction({type:"waitFor",options:t})}click(t){return this.dispatchAction({type:"click",selector:t})}mouseMove(t){return this.dispatchAction({type:"mouseMove",params:t})}mouseClick(t){return this.dispatchAction({type:"mouseClick",params:t})}keyboardType(t,e){return this.dispatchAction({type:"keyboardType",params:{text:t,delay:e}})}keyboardPress(t,e){return this.dispatchAction({type:"keyboardPress",params:{key:t,delay:e}})}fill(t,e){return this.dispatchAction({type:"fill",selector:t,value:e})}submit(t,e){return this.dispatchAction({type:"submit",selector:t,options:e})}trim(t){return this.dispatchAction({type:"trim",options:t})}pause(t){return this.dispatchAction({type:"pause",message:t})}evaluate(t){return this.dispatchAction({type:"evaluate",params:t})}extract(t){t&&"object"==typeof t&&t.schema&&(t=t.schema);const e=E(t);return this.dispatchAction({type:"extract",schema:e})}get id(){return this.constructor.id}async getState(){return{cookies:await this.cookies(),sessionState:await(this.crawler?.sessionPool?.getState())}}get mode(){return this.constructor.mode}get context(){return this.ctx}async initialize(t,e){if(this.ctx)return;w(t,e),this.ctx=t,this.opts=t,this.hdrs=function(t){const e={};if(t&&"object"==typeof t)for(const[i,s]of Object.entries(t))e[i.toLowerCase()]=s;return e}(t.headers),this._initialCookies=[...t.cookies??[]],t.internal||(t.internal={}),t.internal.engine=this,t.engine=this.mode,this.actionEmitter.setMaxListeners(100);const i=t.storage||{},s=i.persist??!1,n=this.config=new m({persistStorage:s,storageClientOptions:{persistStorage:s,...i.config},...i.config}),r=i.id||t.id;this.requestQueue=await b.open(r,{config:n});const o=this.opts?.proxy?"string"==typeof this.opts.proxy?[this.opts.proxy]:this.opts.proxy:void 0;o?.length&&(this.proxyConfiguration=new v({proxyUrls:o}));const a=await this._getSpecificCrawlerOptions(t),c=d({persistenceOptions:{enable:!0,storeId:r},persistStateKeyValueStoreId:r},t.sessionPoolOptions,{maxPoolSize:1,sessionOptions:{maxUsageCount:1e3,maxErrorScore:3}});t.sessionState&&t.cookies&&t.cookies.length>0&&console.warn('[FetchEngine] Warning: Both "sessionState" and "cookies" are provided. Explicit "cookies" will override any conflicting cookies restored from "sessionState".');const l={...d(a,{requestQueue:this.requestQueue,maxConcurrency:1,minConcurrency:1,useSessionPool:!0,persistCookiesPerSession:!0,sessionPoolOptions:c}),requestHandler:this._requestHandler.bind(this),errorHandler:this._failedRequestHandler.bind(this),failedRequestHandler:this._failedRequestHandler.bind(this)};l.preNavigationHooks||(l.preNavigationHooks=[]),l.preNavigationHooks.unshift(({crawler:t,session:e,request:i},s)=>{if(this.currentSession=e,e&&!this._initializedSessions.has(e.id)){if(this._initialCookies&&this._initialCookies.length>0){const t=this._initialCookies.map(t=>{const e={...t};return"no_restriction"===e.sameSite&&(e.sameSite="None"),e});e.setCookies(t,i.url)}this._initializedSessions.add(e.id)}});const u=this.crawler=this._createCrawler(l,n),h=this.kvStore=await g.open(r,{config:n}),f=await h.getValue(x);!t.sessionState||f&&!t.overrideSessionState||await h.setValue(x,t.sessionState),this.isCrawlerReady=!0,this.crawlerRunPromise=u.run(),this.crawlerRunPromise.finally(()=>{this.isCrawlerReady=!1}).catch(t=>{console.error("Crawler background error:",t)})}async cleanup(){await(this._cleanup?.()),await this._commonCleanup();const t=this.ctx;t&&t.internal?.engine===this&&(t.internal.engine=void 0),this.ctx=void 0,this.opts=void 0}async _processAction(t,e){switch(this._logDebug(e.type,"Executing action:",e),e.type){case"extract":return C.call(this,e.schema,this._getInitialElementScope(t));case"pause":return this._handlePause(e);case"getContent":return this.buildResponse(t);case"waitFor":return e.options?.ms&&1===Object.keys(e.options).length?void await new Promise(t=>setTimeout(t,e.options.ms)):this.executeAction(t,e);default:return this.executeAction(t,e)}}async _handlePause(t){const e=this.ctx?.onPause;e?(console.info(t.message||"Execution paused for manual intervention."),await e({message:t.message}),console.info("Resuming execution...")):console.warn("[PauseAction] was called, but no `onPause` handler was provided in fetchWeb options. Skipped.")}async _executePendingActions(t){if(this.isEngineDisposed)return;this.activeContext=t;const e=async()=>{if(!this.isProcessingActionLoop){this.isProcessingActionLoop=!0,this._logDebug("action-loop",`Action loop started. Current queue size: ${this.actionQueue.length}`);try{for(;this.actionQueue.length>0&&this.isPageActive&&!this.isEngineDisposed;){const e=this.actionQueue.shift();this._logDebug("action-loop",`Processing action: ${e.action.type}`,e.action);try{if("dispose"===e.action.type){this.actionEmitter.emit("dispose"),e.resolve();continue}this.isExecutingAction=!0;const i=await this._processAction(t,e.action);this._logDebug("action-loop",`Action completed: ${e.action.type}`),e.resolve(i)}catch(t){this._logDebug("action-loop",`Action failed: ${e.action.type}`,t),e.reject(t)}finally{this.isExecutingAction=!1,await new Promise(t=>setImmediate(t))}}}finally{this.isProcessingActionLoop=!1,this._logDebug("action-loop","Action loop paused/finished.")}}};await new Promise(t=>{const i=t=>{this.actionQueue.push(t),e()},s=()=>{this.actionEmitter.removeListener("dispatch",i),this.activeContext=void 0,t()};this.actionEmitter.on("dispatch",i),this.actionEmitter.once("dispose",s),e(),this.isEngineDisposed&&(s(),this.actionEmitter.removeListener("dispose",s))})}async _sharedRequestHandler(t){const{request:e}=t;this._logDebug("request",`Processing request: ${e.url}`);try{this.currentSession=t.session,this.isPageActive=!0;const i=this.pendingRequests.get(e.userData.requestId);if(i){const s=await this.buildResponse(t),n=!s.statusCode||s.statusCode>=400;if(this.ctx?.throwHttpErrors&&n){const t=new y(`Request for ${s.finalUrl} failed with status ${s.statusCode||"N/A"}`,"request",s.statusCode);i.reject(t)}else this.lastResponse=s,i.resolve(s);this.pendingRequests.delete(e.userData.requestId)}await this._executePendingActions(t)}finally{if(this.currentSession){const t=this.currentSession.getCookies(e.url);t&&(this._initialCookies=t)}this.isPageActive=!1,this.navigationLock.release()}}async _sharedFailedRequestHandler(t,e){const{request:i}=t,s=this.pendingRequests.get(i.userData.requestId);if(s&&e&&this.ctx?.throwHttpErrors){this.pendingRequests.delete(i.userData.requestId);const t=e.response,n=t?.statusCode||500,r=t?.url?t.url:i.url,o=new y(`Request${r?" for "+r:""} failed: ${e.message}`,"request",n);s.reject(o)}return this._sharedRequestHandler(t)}async dispatchAction(t){if(!this.isPageActive)throw new Error("No active page. Call goto() before performing actions.");return this.isExecutingAction&&this.activeContext?(this._logDebug(t.type,"Re-entrant action execution:",t),await this._processAction(this.activeContext,t)):new Promise((e,i)=>{this.actionEmitter.emit("dispatch",{action:t,resolve:e,reject:i})})}async _requestHandler(t){await this._sharedRequestHandler(t)}async _failedRequestHandler(t,e){await this._sharedFailedRequestHandler(t,e)}async _commonCleanup(){if(this.isEngineDisposed=!0,this._initializedSessions.clear(),this.actionEmitter.emit("dispose"),this.navigationLock?.release(),this.pendingRequests.size>0){for(const[,t]of this.pendingRequests)t.reject(new Error("Cleanup:Request cancelled"));this.pendingRequests.clear()}if(this.crawler){try{await(this.crawler.teardown?.())}catch(t){console.error("crawler teardown error:",t)}this.crawler=void 0}this.crawlerRunPromise=void 0,this.isCrawlerReady=void 0;const t=(this.opts?.storage||{}).purge??!0;this.requestQueue&&(t&&await this.requestQueue.drop().catch(t=>console.error("Error dropping requestQueue:",t)),this.requestQueue=void 0),this.kvStore&&(t&&await this.kvStore.drop().catch(t=>console.error("Error dropping kvStore:",t)),this.kvStore=void 0),this.actionEmitter.removeAllListeners(),this.pendingRequests.clear(),this.actionQueue=[],this.config=void 0}async blockResources(t,e){return e&&this.blockedTypes.clear(),t.forEach(t=>this.blockedTypes.add(t)),t.length}getContent(){return this.lastResponse?Promise.resolve(this.lastResponse):Promise.reject(new Error("No content fetched yet. Call goto() first."))}async headers(t,e){if(void 0===t)return{...this.hdrs};if("string"==typeof t&&void 0===e)return this.hdrs[t.toLowerCase()]||"";if(null!==t&&"object"==typeof t){const i={};for(const[e,s]of Object.entries(t))i[e.toLowerCase()]=String(s);return this.hdrs=!0===e?i:{...this.hdrs,...i},!0}return"string"==typeof t&&("string"==typeof e?this.hdrs[t.toLowerCase()]=e:null===e&&delete this.hdrs[t.toLowerCase()],!0)}async cookies(t){const e=this.lastResponse?.url||"";if(Array.isArray(t))return this.currentSession?this.currentSession.setCookies(t,e):this._initialCookies=[...t],!0;if(null===t)return this.currentSession,this._initialCookies=[],!0;if(this.currentSession){return this.currentSession.getCookies(e)}return[...this._initialCookies||[]]}async dispose(){await this.cleanup()}};async function I(t,e){let i;const s=e?.engine||t.engine;if(s&&"auto"!==s){if(i=await B.create(t,{engine:s}),!i)throw new Error(`Engine "${s}" is not available or failed to initialize.`);return i}const n=function(t,e){if(!t||!e?.length)return null;const i=new URL(t);let s=e.find(t=>t.domain===i.hostname);s||(s=e.find(t=>i.hostname.endsWith(t.domain)));if(!s)return null;if(s.pathScope?.length){if(!s.pathScope.some(t=>i.pathname.startsWith(t)))return null}return s}(e?.url||t.url,t.sites);if(n?.engine&&"auto"!==n.engine&&(i=await B.create(t,{engine:n.engine}),i))return i;if(i=await B.create(t,{engine:"http"}),!i)throw new Error("Failed to create default http engine");return i}B.registry=new Map;var V=class{constructor(t={}){this.options=t,this.closed=!1,this.id=h(),this.context=this.createContext(t)}_logDebug(t,...e){f(this.context.debug,{prefix:"FetchSession",id:this.id.slice(0,8),category:t},...e)}async execute(t,e=this.context){const i=t.id||t.name||t.action;this._logDebug("execute",`Executing action: ${i}`,t.params);const s=t.index??(e.internal.actionIndex||0);e.internal.actionIndex=s+1,await this.ensureEngine(t,e);const r=n.create(t);if(!r)throw new Error(`Unknown action: ${t.id||t.name}`);const o={...t,index:s};let a,c;e.currentAction={...o,startedAt:Date.now()};try{return a=await r.execute(e,o),a}catch(t){throw c=t,c}finally{e.currentAction=void 0}}async executeAll(t,e){this._logDebug("executeAll",`Total actions: ${t.length}`,t.map(t=>t.id||t.name||t.action));const i=e?{...this.context,...e,id:this.context.id,eventBus:this.context.eventBus,outputs:this.context.outputs,execute:this.context.execute,action:this.context.action}:this.context;let s=e?.index??0;try{for(;s<t.length;){const e=t[s];await this.execute({...e,index:s},i),s++}const e=await this.execute({id:"getContent",index:s},i);return{result:e?.result,outputs:this.getOutputs()}}catch(t){throw t.actionIndex=s,t}}getOutputs(){return this.context.outputs}async getState(){return this.context.internal.engine?.getState()}async dispose(){if(this.closed)return;const t=this.context.eventBus;t.emit("session:closing",{sessionId:this.id});try{await(this.context.internal.engine?.dispose())}finally{this.closed=!0}t.emit("session:closed",{sessionId:this.id})}async ensureEngine(t,e){if(this.closed)throw new Error("Session is closed");if(!e.internal.engine){const i=t?.params?.url??e.url,s=await I(e,{url:i});if(!s)throw new Error("No engine found");e.internal.engine=s}}createContext(e=this.options){const i=new c;return l({...e,id:this.id,eventBus:i,outputs:{},internal:{},execute:async t=>this.execute(t),action:async function(t,e,i){return this.execute({name:t,params:e,...i})}},t)}},D=class{constructor(t={}){this.defaults=t}async createSession(t){const e={...this.defaults,...t||{}};return new V(e)}async fetch(t,e){"string"!=typeof t&&(t=(e=t).url);const i=await this.createSession(e);try{const s=e?.actions||[];t&&0!==s.findIndex(e=>("goto"===e.id||"goto"===e.name)&&e.params?.url===t)&&s.unshift({id:"goto",params:{url:t}});return await i.executeAll(s)}finally{await i.dispose()}}};import{CheerioCrawler as z}from"crawlee";import*as G from"cheerio";import{newFunction as J}from"util-ex";import{CommonError as K,ErrorCode as Q,NotFoundError as W}from"@isdk/common-error";var Z="___BR___",X="___BLOCK___",Y="___P___",tt=/\s+/g,et=new RegExp(` *(${Z}|${X}|${Y}) *`,"g"),it=new RegExp(`(?:${X}|${Y})+`,"g");var st={"&":"&","<":"<",">":">"},nt={""":'"',"'":"'"," ":" ","©":"©","®":"®","™":"™","§":"§","¶":"¶","•":"•","…":"…","€":"€","£":"£","¥":"¥","¢":"¢","¤":"¤","¦":"¦","¨":"¨","ª":"ª","«":"«","»":"»","¬":"¬","­":"","¯":"¯","°":"°","±":"±","²":"²","³":"³","´":"´","µ":"µ","·":"·","¸":"¸","¹":"¹","º":"º","¿":"¿","×":"×","÷":"÷","–":"–","—":"—","‘":"‘","’":"’","‚":"‚","“":"“","”":"”","„":"„","†":"†","‡":"‡","‰":"‰","‹":"‹","›":"›"};function rt(t){return t?t.replace(/&(#?[a-zA-Z0-9]+);/g,t=>{const e=t.toLowerCase();if(st[e])return t;if(nt[e])return nt[e];if(t.startsWith("&#")){const e=t.startsWith("&#x")?parseInt(t.slice(3,-1),16):parseInt(t.slice(2,-1),10);if(!isNaN(e)){if(160===e)return" ";try{return String.fromCodePoint(e)}catch(e){return t}}}return t}):t}var ot=class extends B{_ensureCheerioContext(t){if(!t.$&&t.body){let e="string"==typeof t.body?t.body:Buffer.isBuffer(t.body)?t.body.toString("utf-8"):JSON.stringify(t.body);e.trim().startsWith("<")||(e=`<html><body><pre>${e}</pre></body></html>`),t.$=G.load(e)}}async _buildResponse(t){this._ensureCheerioContext(t);const{request:e,response:i,body:s,$:n}=t,r=n?.html();let o="string"==typeof s?s:Buffer.isBuffer(s)?s.toString("utf-8"):String(s??"");r&&r!==o&&(o=r);let a=i?.headers;if(!a&&i?.rawHeaders){a={};const t=i.rawHeaders;for(let e=0;e<t.length;e+=2)a[t[e].toLowerCase()]=t[e+1]}const c={url:e.url,finalUrl:e.loadedUrl||e.url,statusCode:i?.statusCode??200,statusText:i?.statusMessage,headers:a||{},body:s,html:rt(o),text:o};if(this.opts?.debug&&i?.timings){const t=i.timings;c.metadata={timings:{start:t.start,total:t.phases?.total,ttfb:t.phases?.firstByte,dns:t.phases?.dns,tcp:t.phases?.tcp,download:t.phases?.download}}}return c}async _querySelectorAll(t,e){if(Array.isArray(t)){if(0===t.length)return[];const{$:i}=t[0],s=t.map(t=>t.el[0]).filter(Boolean),n=i(s);return n.find(e).add(n.filter(e)).toArray().map(t=>({$:i,el:i(t)}))}const{$:i,el:s}=t;return":scope"===e?[{$:i,el:s}]:s.find(e).add(s.filter(e)).toArray().map(t=>({$:i,el:i(t)}))}async _nextSiblingsUntil(t,e){const{$:i,el:s}=t;return(e?s.nextUntil(e):s.nextAll()).toArray().map(t=>({$:i,el:i(t)}))}async _parentElement(t){const{$:e,el:i}=t,s=i.parent();return 0===s.length?null:{$:e,el:s}}async _isSameElement(t,e){return t.el[0]===e.el[0]}async _findClosestAncestor(t,e){if(0===e.length)return null;const i=new Set(e.map(t=>t.el[0])),{$:s,el:n}=t;let r=n;for(;r.length>0;){if(i.has(r[0]))return{$:s,el:r};r=r.parent()}return null}async _contains(t,e){const i=t.el[0],s=e.el[0];if(i===s)return!0;const n=t.$;return"function"==typeof n.contains?n.contains(i,s):t.el.find(e.el).length>0}async _findCommonAncestor(t,e){const{$:i,el:s}=t,{el:n}=e;if(s[0]===n[0])return t;if(await this._contains(t,e))return t;if(await this._contains(e,t))return e;const r=s.parents().toArray(),o=n.parents().toArray(),a=new Set(o);for(const t of r)if(a.has(t))return{$:i,el:i(t)};return null}async _findContainerChild(t,e){const{$:i,el:s}=t,n=e.el[0];let r=s;if(r[0]===n)return t;const o=r.parents().toArray();for(let t=0;t<o.length;t++)if(o[t]===n){return{$:i,el:i(t>0?o[t-1]:s[0])}}if(n===i.root()[0]){return{$:i,el:i(o.length>0?o[o.length-1]:s[0])}}return null}async _extractValue(t,e){const{$:i,el:s}=e,{attribute:n,type:r="string",mode:o="text"}=t;if(this._logDebug("extract",`_extractValue: el.length=${s.length} schema=${JSON.stringify(t)}`),0===s.length)return null;let a="";if(n?a=s.attr(n)??null:"html"===r||"html"===o||"outerHTML"===o?(a="outerHTML"===o?i.html(s):s.html()??("html"===r?"":null),a&&(a=rt(a.trim()))):a="innerText"===o?function(t){const e=t.clone();e.find("script, style, noscript, template").remove(),e.find("[hidden]").remove(),e.find("br").replaceWith(Z),e.find("p").before(Y).after(Y),e.find("div, h1, h2, h3, h4, h5, h6, li, ul, ol, tr, dl, dt, dd, blockquote, pre, form, table, article, section, header, footer, nav, main, aside, hr, address, fieldset, figure, figcaption, details, summary").before(X).after(X);let i=e.text();return i=i.replace(tt," "),i=i.replace(et,"$1"),i=i.replace(it,t=>t.includes(Y)?Y:X),i=i.replaceAll(Z,"\n"),i=i.replaceAll(Y,"\n\n"),i=i.replaceAll(X,"\n"),i.trim()}(s):s.text().trim(),null===a)return null;switch(r){case"number":return parseFloat(a.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=a.toLowerCase();return"true"===t||"1"===t;default:return a}}_getInitialElementScope(t){const{$:e}=t;return e?{$:e,el:e.root()}:null}async executeAction(t,e){const{$:i}=t;switch(e.type){case"dispose":return;case"navigate":{const{url:i,opts:s}=e;this._logDebug("navigate",`Navigating to: ${i}`);const n=await this._requestWithRedirects(t,{url:i,method:"GET",headers:{...this.hdrs,...s?.headers}});return await this._updateStateAfterNavigation(t,n),this.lastResponse}case"mouseMove":case"mouseClick":case"keyboardType":case"keyboardPress":throw new K(`Action "${e.type}" is only supported in browser engine mode.`,e.type,"not_supported");case"click":{if(!i)throw new K(`Cheerio context not available for action: ${e.type}`,"click");const s=e.selector,n=i(s).first();let r;if(0===n.length)try{r=new URL(s,t.request.loadedUrl||t.request.url).href}catch{throw new K(`click: selector not found or invalid URL: ${s}`,"click")}else{if(!n.is("a")||!n.attr("href")){if(n.is('input[type="submit"], button[type="submit"], button, input')){const e=n.closest("form");return e.length?this.executeAction(t,{type:"submit",selector:e}):void this._logDebug("click","Button/input clicked but no form found and no JS support in http mode. Ignoring.")}throw new K(`click: unsupported element for http simulate. Selector: ${s}`,"click")}{const e=n.attr("href");r=new URL(e,t.request.loadedUrl||t.request.url).href}}const o=await t.sendRequest({url:r});return void await this._updateStateAfterNavigation(t,o)}case"fill":{if(!i)throw new K(`Cheerio context not available for action: ${e.type}`),"fill";const s=i(e.selector).first();if(0===s.length)throw new K(`fill: selector not found: ${e.selector}`);if(!s.is("input, textarea, select"))throw new K(`fill: not a form field: ${e.selector}`);return s.val(e.value),void(this.lastResponse=await this.buildResponse(t))}case"trim":{if(!i)throw new K(`Cheerio context not available for action: ${e.type}`,"trim");const{selectors:s,removeComments:n}=this._getTrimInfo(e.options);return s.forEach(t=>i(t).remove()),n&&i("*").contents().filter((t,e)=>"comment"===e.type).remove(),void(this.lastResponse=await this.buildResponse(t))}case"waitFor":return void(e.options?.ms&&await new Promise(t=>setTimeout(t,e.options.ms)));case"submit":{if(!i)throw new K(`Cheerio context not available for action: ${e.type}`,"submit");const s="string"==typeof e.selector?i(e.selector).first():null!=e.selector?e.selector:i("form").first();if(0===s.length)throw new W(e.selector,"submit");const n=s.attr("action")||t.request.loadedUrl||t.request.url,r=(s.attr("method")||"GET").toUpperCase(),o=new URL(n,t.request.loadedUrl||t.request.url).href,a={};let c;if(s.find("input, select, textarea").each((t,e)=>{const s=i(e),n=s.attr("name");if(!n)return;const r=s.val();null!=r&&(a[n]=String(r))}),"GET"===r){const e=new URL(o);Object.entries(a).forEach(([t,i])=>e.searchParams.set(t,i)),c=await this._requestWithRedirects(t,{url:e.href,method:"GET"})}else{const i=e.options?.enctype||s.attr("enctype")||"application/x-www-form-urlencoded";let n;const r={};"application/json"===i?(n=JSON.stringify(a),r["Content-Type"]="application/json"):(n=new URLSearchParams(a).toString(),r["Content-Type"]="application/x-www-form-urlencoded"),this._logDebug("submit","Submitting POST to:",o,"enctype:",i),c=await this._requestWithRedirects(t,{url:o,method:"POST",body:n,headers:r})}return await this._updateStateAfterNavigation(t,c),void this._logDebug("submit","Submit finished. Current URL:",t.request.loadedUrl||t.request.url)}case"evaluate":{const{fn:s,args:n=[]}=e.params,r=t.request.loadedUrl||t.request.url;let o=null;const a=t=>t&&0!==t.length?{textContent:t.text(),innerHTML:t.html(),outerHTML:i.html(t),getAttribute:e=>t.attr(e),matches:e=>t.is(e)}:null,c=this,l={location:{_href:r,get href(){return this._href},set href(t){if(t&&t!==this._href){this._href=t;const e=new URL(t,r).href;o=c.goto(e)}},assign(t){this.href=t},replace(t){this.href=t}}},u={getElementById:t=>a(i(`#${t}`).first()),querySelector:t=>a(i(t).first()),querySelectorAll:t=>i(t).toArray().map(t=>a(i(t))),getElementsByClassName:t=>i(`.${t}`).toArray().map(t=>a(i(t))),getElementsByTagName:t=>i(t).toArray().map(t=>a(i(t))),get body(){return a(i("body").first())},get title(){return i("title").text()}};l.document=u;const h={window:l,document:u,$:i,console:console};let f;const d=J(s,h);return f="function"==typeof d?await d(n):d,o?await o:l.location.href===r&&(this.lastResponse=await this.buildResponse(t)),f}default:throw new K(`Unknown action type: ${e.type}`,"CheerioFetchEngine.executeAction",Q.NotSupported)}}async _requestWithRedirects(t,e){let{url:i,method:s,body:n,headers:r={}}=e,o=0;let a;for(;o<=5;){if(t.session){const e=t.session.getCookieString(i);e&&(r={...r,cookie:e})}if(a=await t.sendRequest({url:i,method:s,body:n,headers:r,followRedirect:!1}),!a)break;const e=a.statusCode,c=a.headers||a.req?.res?.headers||a.res?.headers||{};if(t.session&&c["set-cookie"]&&t.session.setCookies(c["set-cookie"],i),[301,302,303,307,308].includes(e)){const t=c.location;if(!t)break;if(i=new URL(t,i).href,o++,[301,302,303].includes(e)){this._logDebug("http",`Redirect ${e} (method conversion to GET):`,i),s="GET",n=void 0;const{"content-type":t,"Content-Type":o,"content-length":a,"Content-Length":c,...l}=r;r=l}else this._logDebug("http",`Redirect ${e} (method preserved):`,i);continue}break}return a}async _updateStateAfterNavigation(t,e){const i=e;t.response=i,t.body=i.body,t.$=void 0,i.url&&(t.request.loadedUrl=i.url),this.lastResponse=await this.buildResponse(t)}_createCrawler(t,e){return new z(t,e)}_getSpecificCrawlerOptions(t){return{additionalMimeTypes:["text/plain"],maxRequestRetries:1,requestHandlerTimeoutSecs:t.requestHandlerTimeoutSecs,proxyConfiguration:this.proxyConfiguration,preNavigationHooks:[({session:e,request:i},s)=>{s.throwHttpErrors=t.throwHttpErrors,this.opts?.timeoutMs&&(s.timeout={request:this.opts.timeoutMs})}]}}async goto(t,e){if(this.isPageActive)return this.dispatchAction({type:"navigate",url:t,opts:e});const i="req-"+ ++this.requestCounter,s=new Promise((t,s)=>{const n=e?.timeoutMs||this.opts?.timeoutMs||3e4,r=setTimeout(()=>{this.pendingRequests.delete(i),this.navigationLock.release(),s(new K(`goto timed out after ${n}ms.`,"gotoTimeout",Q.RequestTimeout))},n);this.pendingRequests.set(i,{resolve:e=>{clearTimeout(r),t(e)},reject:t=>{clearTimeout(r),s(t)}})});return this.requestQueue.addRequest({...e,url:t,headers:{...this.hdrs,...e?.headers},userData:{requestId:i},uniqueKey:`${t}-${i}`}).catch(t=>{const e=this.pendingRequests.get(i);e&&(this.pendingRequests.delete(i),this.navigationLock.release(),e.reject(t))}),await this.navigationLock,this.navigationLock=H(),s}};ot.id="cheerio",ot.mode="http",B.register(ot);import{PlaywrightCrawler as at}from"crawlee";import{firefox as ct}from"playwright";import{CommonError as lt,ErrorCode as ut,NotFoundError as ht}from"@isdk/common-error";var ft=3e4,dt=class extends B{constructor(){super(...arguments),this.currentMousePos={x:0,y:0}}async _buildResponse(t){const{page:e,response:i,request:s,session:n}=t;if(!e||e.isClosed())return{url:s.url,finalUrl:s.loadedUrl||s.url,statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:"",html:"",text:""};const r=await e.content(),o=await e.textContent("body"),a=await e.context().cookies();n&&n.setCookies(a,s.url);const c={url:e.url(),finalUrl:e.url(),statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:r,html:r,text:o||""};if(this.opts?.debug&&i){const t="function"==typeof i.request?i.request():i.request;if(t&&"function"==typeof t.timing){const e=t.timing();c.metadata={timings:{start:e.startTime,total:e.responseEnd-e.startTime,ttfb:e.responseStart-e.requestStart,dns:e.domainLookupEnd-e.domainLookupStart,tcp:e.connectEnd-e.connectStart,download:e.responseEnd-e.responseStart}}}}return!1!==this.opts?.output?.cookies&&(c.cookies=a),c}async _querySelectorAll(t,e){const i=Array.isArray(t)?t:[t],s=[];for(const t of i){const i=await t.locator(e).all();s.push(...i);try{await t.evaluate((t,e)=>t.matches(e),e)}catch(t){}}const n=[];for(const t of i){let i=!1;try{i=await t.evaluate((t,e)=>t.matches(e),e)}catch{}i&&n.push(t);const s=await t.locator(e).all();n.push(...s)}return n}async _nextSiblingsUntil(t,e){const i=await t.locator("xpath=following-sibling::*").all();if(!e)return i;const s=[];for(const t of i){if(await t.evaluate((t,e)=>t.matches(e),e))break;s.push(t)}return s}async _parentElement(t){const e=t.locator("xpath=..");return 0===await e.count()?null:e.first()}async _isSameElement(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return!1;try{return await i.evaluate((t,e)=>t===e,s)}finally{await i.dispose(),await s.dispose()}}async _findClosestAncestor(t,e){if(0===e.length)return null;const i=await t.elementHandle();if(!i)return null;const s=await Promise.all(e.map(t=>t.elementHandle()));try{const t=await i.evaluate((t,e)=>{const i=new Set(e);let s=t;for(;s;){if(i.has(s))return e.indexOf(s);s=s.parentElement}return-1},s);return-1!==t?e[t]:null}finally{await i.dispose(),await Promise.all(s.map(t=>t?.dispose()))}}async _contains(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return!1;try{return await i.evaluate((t,e)=>t.contains(e),s)}finally{await i.dispose(),await s.dispose()}}async _findCommonAncestor(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return null;try{const e=await i.evaluateHandle((t,e)=>{let i=null;if(t===e)i=t;else if(t.contains(e))i=t;else if(e.contains(t))i=e;else{const s=new Set;let n=e.parentElement;for(;n;)s.add(n),n=n.parentElement;for(n=t.parentElement;n;){if(s.has(n)){i=n;break}n=n.parentElement}}return i&&1===i.nodeType?function t(e){if(e.id)return`//*[@id="${e.id}"]`;if(e===document.body)return"/html/body";if(e===document.documentElement)return"/html";let i=0;const s=e.parentNode?e.parentNode.childNodes:[];for(let n=0;n<s.length;n++){const r=s[n];if(r===e)return t(e.parentNode)+"/"+e.tagName.toLowerCase()+"["+(i+1)+"]";1===r.nodeType&&r.tagName===e.tagName&&i++}return""}(i):null},s);if(!e)return null;const n=await e.jsonValue();return"string"==typeof n&&n?t.page().locator(`xpath=${n}`):null}finally{await i.dispose(),await s.dispose()}}async _findContainerChild(t,e){const i=await t.elementHandle(),s=await e.elementHandle();if(!i||!s)return null;try{const e=await i.evaluateHandle((t,e)=>{let i=null;if(t===e)i=t;else{let s=t;for(;s;){if(s.parentElement===e){i=s;break}s=s.parentElement}}return i&&1===i.nodeType?function t(e){if(e.id)return`//*[@id="${e.id}"]`;if(e===document.body)return"/html/body";if(e===document.documentElement)return"/html";let i=0;const s=e.parentNode?e.parentNode.childNodes:[];for(let n=0;n<s.length;n++){const r=s[n];if(r===e)return t(e.parentNode)+"/"+e.tagName.toLowerCase()+"["+(i+1)+"]";1===r.nodeType&&r.tagName===e.tagName&&i++}return""}(i):null},s);if(!e)return null;const n=await e.jsonValue();return"string"==typeof n&&n?t.page().locator(`xpath=${n}`):null}finally{await i.dispose(),await s.dispose()}}async _extractValue(t,e){const{attribute:i,type:s="string",mode:n="text"}=t,r=await e.count();if(this._logDebug("extract",`_extractValue: count=${r} schema=${JSON.stringify(t)}`),0===r)return null;let o="";if(i?o=await e.getAttribute(i):"html"===s||"html"===n||"outerHTML"===n?(o="outerHTML"===n?await e.evaluate(t=>t.outerHTML):await e.innerHTML(),o&&(o=rt(o))):o="innerText"===n?await e.innerText():await e.textContent(),null===o)return null;switch(o=o.trim(),s){case"number":return parseFloat(o.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=o.toLowerCase();return"true"===t||"1"===t;default:return o}}_getInitialElementScope(t){const{page:e}=t;return e?e.locator(":root"):null}async _waitForNavigation(t,e,i){const{page:s}=t,n=this.opts?.timeoutMs||ft;try{await s.waitForURL(t=>t.href!==e,{waitUntil:"domcontentloaded",timeout:5e3}),this._logDebug(i,"URL changed to:",s.url())}catch(t){this._logDebug(i,"No URL change detected within 5s")}await s.waitForLoadState("networkidle",{timeout:n}),this.lastResponse=await this.buildResponse(t)}_getRandomDelay(t,e=.3){const i=t*(1-e),s=t*(1+e);return Math.floor(Math.random()*(s-i+1)+i)}_getTrajectory(t,e,i=-1){const s=[];if(-1===i){const s=Math.sqrt(Math.pow(e.x-t.x,2)+Math.pow(e.y-t.y,2)),n=10*Math.random()+5;i=Math.max(5,Math.floor(s/n))}const n=t.x+(e.x-t.x)/2,r=t.y+(e.y-t.y)/2,o=n+100*(Math.random()-.5),a=r+100*(Math.random()-.5);this._logDebug("mouseMove",`Trajectory: start(${t.x},${t.y}) -> end(${e.x},${e.y}), cp(${o},${a}), steps: ${i}`);for(let n=1;n<=i;n++){const r=n/i;let c=(1-r)*(1-r)*t.x+2*(1-r)*r*o+r*r*e.x,l=(1-r)*(1-r)*t.y+2*(1-r)*r*a+r*r*e.y;n<i&&(c+=1.5*(Math.random()-.5),l+=1.5*(Math.random()-.5)),s.push({x:c,y:l})}return s}async _moveToSelector(t,e,i=-1){const{page:s}=t,n=s.locator(e).first(),r=await n.boundingBox();if(!r)throw new lt(`Selector not found or not visible for mouse movement: ${e}`,"mouseMove");const o=r.x+r.width/2,a=r.y+r.height/2,c=this._getTrajectory(this.currentMousePos,{x:o,y:a},i);for(const t of c)await s.mouse.move(t.x,t.y),(i>1||-1===i)&&await s.waitForTimeout(15*Math.random()+5);return this.currentMousePos={x:o,y:a},this.currentMousePos}async executeAction(t,e){const{page:i}=t,s=this.opts?.timeoutMs||ft;switch(e.type){case"dispose":return;case"navigate":{this._logDebug("navigate",`Navigating to: ${e.url}`);const s=await i.goto(e.url,{waitUntil:e.opts?.waitUntil||"domcontentloaded",timeout:this.opts?.timeoutMs||ft});s&&(t={...t,response:s},this._logDebug("navigate",`Navigation status: ${s.status()} for ${s.url()}`));const n=await this.buildResponse(t);return this.lastResponse=n,n}case"mouseMove":{const{x:s,y:n,selector:r,steps:o=-1}=e.params;if(r)await this._moveToSelector(t,r,o);else if(void 0!==s&&void 0!==n){const t=this._getTrajectory(this.currentMousePos,{x:s,y:n},o);for(const e of t)await i.mouse.move(e.x,e.y),(o>1||-1===o)&&await i.waitForTimeout(15*Math.random()+5);this.currentMousePos={x:s,y:n}}return}case"mouseClick":{const{x:s,y:n,selector:r,button:o="left",clickCount:a=1,delay:c=0}=e.params;return r?(await this._moveToSelector(t,r,-1),await i.mouse.click(this.currentMousePos.x,this.currentMousePos.y,{button:o,clickCount:a,delay:this._getRandomDelay(c||50,.2)})):void 0!==s&&void 0!==n?(await i.mouse.click(s,n,{button:o,clickCount:a,delay:this._getRandomDelay(c||50,.2)}),this.currentMousePos={x:s,y:n}):await i.mouse.click(this.currentMousePos.x,this.currentMousePos.y,{button:o,clickCount:a,delay:this._getRandomDelay(c||50,.2)}),await i.waitForTimeout(this._getRandomDelay(100,.5)),void(this.lastResponse=await this.buildResponse(t))}case"keyboardType":{const{text:s,delay:n=150}=e.params;return await i.keyboard.type(s,{delay:this._getRandomDelay(n)}),void(this.lastResponse=await this.buildResponse(t))}case"keyboardPress":{const{key:s,delay:n=50}=e.params;return await i.keyboard.press(s,{delay:this._getRandomDelay(n)}),void(this.lastResponse=await this.buildResponse(t))}case"click":{this._logDebug("click","Clicking selector:",e.selector);const n=i.url();return await i.click(e.selector,{timeout:s}),void await this._waitForNavigation(t,n,"click")}case"fill":await i.fill(e.selector,e.value,{timeout:s});const n=await this.buildResponse(t);return void(this.lastResponse=n);case"trim":{const s=this._getTrimInfo(e.options);return await i.evaluate(t=>{const{selectors:e,removeComments:i,removeHidden:s}=t;if(e.forEach(t=>{document.querySelectorAll(t).forEach(t=>t.remove())}),s){const t=[];document.querySelectorAll("*").forEach(e=>{const i=window.getComputedStyle(e);"none"!==i.display&&"hidden"!==i.visibility||t.push(e)}),t.forEach(t=>t.remove())}if(i){const t=document.createNodeIterator(document,NodeFilter.SHOW_COMMENT),e=[];let i;for(;i=t.nextNode();)e.push(i);e.forEach(t=>t.parentElement?.removeChild(t))}},s),void(this.lastResponse=await this.buildResponse(t))}case"waitFor":try{e.options?.selector&&await i.waitForSelector(e.options.selector,{timeout:s}),e.options?.networkIdle&&await i.waitForLoadState("networkidle",{timeout:s}),e.options?.ms&&await i.waitForTimeout(this._getRandomDelay(e.options.ms,.1))}catch(t){if(!1!==e.options?.failOnTimeout)throw t}return;case"submit":{const s=e.selector||"form",n=i.locator(s).first();if(0===await n.count())throw new ht(s,"submit");if("application/json"===(e.options?.enctype||"application/x-www-form-urlencoded")){const t=await n.elementHandle();if(!t)throw new lt(`submit: could not get form handle for ${s}`,"submit");const e=await t.evaluate(async t=>{const e=new FormData(t),i={};e.forEach((t,e)=>{i[e]=t.toString()});const s=await fetch(t.action,{method:t.method,headers:{"Content-Type":"application/json"},body:JSON.stringify(i)}),n=await s.text();return{status:s.status,statusText:s.statusText,headers:Object.fromEntries(s.headers.entries()),body:n,html:n,text:n,url:t.action,finalUrl:s.url}});return await t.dispose(),await i.setContent(e.html),void(this.lastResponse=e)}{this._logDebug("submit","Submitting form...");const e=i.url();return await n.evaluate(t=>t.submit()),void await this._waitForNavigation(t,e,"submit")}}case"evaluate":{const{fn:n,args:r=[]}=e.params,o=i.url();let a;if(a="function"==typeof n?await i.evaluate(n,r):await i.evaluate(([t,e])=>{const i=(0,eval)(`(${t})`);return"function"==typeof i?i(e):i},[n,r]),i.url()!==o)await i.waitForLoadState("domcontentloaded",{timeout:s}).catch(()=>{}),this.lastResponse=await this.buildResponse(t);else try{this.lastResponse=await this.buildResponse(t)}catch(e){await i.waitForLoadState("domcontentloaded",{timeout:s}).catch(()=>{}),this.lastResponse=await this.buildResponse(t)}return a}default:throw new lt(`Unknown action type: ${e.type}`,"PlaywrightFetchEngine.executeAction",ut.NotSupported)}}_createCrawler(t,e){return new at(t,e)}async _getSpecificCrawlerOptions(t){const e=t.browser?.headless??!0,i={maxRequestRetries:t.retries||3,headless:e,proxyConfiguration:this.proxyConfiguration,requestHandlerTimeoutSecs:t.requestHandlerTimeoutSecs,preNavigationHooks:[async({page:e,request:i},s)=>{s.throwHttpErrors=t.throwHttpErrors;const n=this.blockedTypes;n.size>0&&await e.route("**/*",t=>{n.has(t.request().resourceType())?t.abort():t.continue()})}]},s=t.browser?.launchOptions||{};if(this.opts?.antibot){i.browserPoolOptions={useFingerprints:!1};const{launchOptions:t}=await import("camoufox-js"),n=await t({headless:e,...s});i.launchContext={launcher:ct,launchOptions:n},i.postNavigationHooks=[async({page:t,handleCloudflareChallenge:e})=>{await e()}]}else Object.keys(s).length>0&&(i.launchContext={launchOptions:s});return i}async goto(t,e){if(this.isPageActive)return this.dispatchAction({type:"navigate",url:t,opts:e});if(!this.requestQueue)throw new lt("RequestQueue not initialized","goto");const i="req-"+ ++this.requestCounter,s=new Promise((t,e)=>{this.pendingRequests.set(i,{resolve:t,reject:e})});return await this.requestQueue.addRequest({url:t,headers:this.hdrs,userData:{requestId:i,waitUntil:e?.waitUntil||"domcontentloaded"},uniqueKey:`${t}-${i}`}),s}};dt.id="playwright",dt.mode="browser",B.register(dt);var wt=class extends n{async onExecute(t,e){const{selector:i,...s}=e?.params||{};if(!i)throw new Error("Selector is required for click action");await this.delegateToEngine(t,"click",i,s)}};wt.id="click",wt.returnType="none",wt.capabilities={http:"simulate",browser:"native"},n.register(wt);var pt=class extends n{async onExecute(t,e){const{selector:i,value:s,...n}=e?.params||{};if(!i)throw new Error("Selector is required for fill action");if(void 0===s)throw new Error("Value is required for fill action");await this.delegateToEngine(t,"fill",i,s,n)}};pt.id="fill",pt.returnType="none",pt.capabilities={http:"simulate",browser:"native"},n.register(pt);var yt=class extends n{async onExecute(t,e){return await this.delegateToEngine(t,"getContent",e?.params)}};yt.id="getContent",yt.returnType="response",yt.capabilities={http:"native",browser:"native"},n.register(yt);var mt=class extends n{async onExecute(t,e,i){const s=e?.params,n=s?.url||t.url;if(!n)throw new Error("URL is required for goto action");const r=t.internal.engine;if(!r)throw new Error("No engine available");t.url=n;return await r.goto(n,s)}};mt.id="goto",mt.returnType="response",mt.capabilities={http:"native",browser:"native"},n.register(mt);var gt=class extends n{async onExecute(t,e){const{selector:i,...s}=e?.params||{};await this.delegateToEngine(t,"submit",i,s)}};gt.id="submit",gt.returnType="none",gt.capabilities={http:"simulate",browser:"native"},n.register(gt);var xt=class extends n{async onExecute(t,e){const i=t.internal.engine;if(!i)throw new Error("No engine available");await i.waitFor(e?.params)}};xt.id="waitFor",xt.returnType="none",xt.capabilities={http:"native",browser:"native"},n.register(xt);var bt=class extends n{async onExecute(t,e){const i=e?.params;if(!i)throw new Error("Schema is required for extract action");return this.delegateToEngine(t,"extract",i)}};bt.id="extract",bt.returnType="any",bt.capabilities={http:"native",browser:"native"},n.register(bt);var vt=class extends n{async onExecute(t,e){const{selector:i,message:s,attribute:n}=e?.params||{},r=t.internal.engine;if("browser"===r?.mode){if(i){if(!await(r?.extract({selector:i,attribute:n})))return}r&&"pause"in r?await r.pause(s):console.warn("[PauseAction] was called, but the current engine does not support `pause`. Skipped.")}else console.warn("[PauseAction] can only run in browser engine. Skipped.")}};vt.id="pause",vt.capabilities={http:"native",browser:"native"},vt.returnType="none",n.register(vt);var $t=class extends n{async onExecute(t,e){const i=e?.params||{};await this.delegateToEngine(t,"trim",i)}};$t.id="trim",$t.returnType="none",$t.capabilities={http:"simulate",browser:"native"},n.register($t);var _t=class extends n{async onExecute(t,e){const i=e?.params;if(!i)throw new Error("evaluate action: params is required");return await this.delegateToEngine(t,"evaluate",i)}};_t.id="evaluate",_t.returnType="any",_t.capabilities={http:"simulate",browser:"native"},n.register(_t);var kt=class extends n{async onExecute(t,e){const i=e?.params;await this.delegateToEngine(t,"mouseMove",i)}};kt.id="mouseMove",kt.returnType="none",kt.capabilities={http:"noop",browser:"native"};var Et=class extends n{async onExecute(t,e){const i=e?.params;await this.delegateToEngine(t,"mouseClick",i)}};Et.id="mouseClick",Et.returnType="none",Et.capabilities={http:"noop",browser:"native"},n.register(kt),n.register(Et);var Ct=class extends n{async onExecute(t,e){const i=e?.params;if(!i?.text)throw new Error("text is required for keyboardType action");await this.delegateToEngine(t,"keyboardType",i.text,i.delay)}};Ct.id="keyboardType",Ct.returnType="none",Ct.capabilities={http:"noop",browser:"native"};var St=class extends n{async onExecute(t,e){const i=e?.params;if(!i?.key)throw new Error("key is required for keyboardPress action");await this.delegateToEngine(t,"keyboardPress",i.key,i.delay)}};async function qt(t,e){return(new D).fetch(t,e)}St.id="keyboardPress",St.returnType="none",St.capabilities={http:"noop",browser:"native"},n.register(Ct),n.register(St);export{ot as CheerioFetchEngine,wt as ClickAction,t as DefaultFetcherProperties,_t as EvaluateAction,bt as ExtractAction,n as FetchAction,i as FetchActionResultStatus,B as FetchEngine,V as FetchSession,e as FetcherOptionKeys,pt as FillAction,yt as GetContentAction,mt as GotoAction,St as KeyboardPressAction,Ct as KeyboardTypeAction,Et as MouseClickAction,kt as MouseMoveAction,vt as PauseAction,dt as PlaywrightFetchEngine,gt as SubmitAction,L as TRIM_PRESETS,$t as TrimAction,xt as WaitForAction,D as WebFetcher,qt as fetchWeb};
|
package/docs/README.md
CHANGED
|
@@ -136,7 +136,7 @@ This is the main entry point for the library.
|
|
|
136
136
|
* `url` (string): The initial URL to navigate to.
|
|
137
137
|
* `engine` ('http' | 'browser' | 'auto'): The engine to use. Defaults to `auto`.
|
|
138
138
|
* `proxy` (string | string[]): Proxy URL(s) to use for requests.
|
|
139
|
-
* `debug` (boolean): Enable detailed execution metadata (timings, engine used, etc.) in response.
|
|
139
|
+
* `debug` (boolean | string | string[]): Enable detailed execution metadata (timings, engine used, etc.) in response, or enable debug logs for specific categories (e.g., 'extract', 'submit', 'request').
|
|
140
140
|
* `actions` (FetchActionOptions[]): An array of action objects to execute. (Supports `action`/`name` as alias for `id`, and `args` as alias for `params`)
|
|
141
141
|
* `headers` (Record<string, string>): Headers to use for all requests.
|
|
142
142
|
* `cookies` (Cookie[]): Array of cookies to use.
|
|
@@ -149,21 +149,30 @@ This is the main entry point for the library.
|
|
|
149
149
|
* `output` (object): Controls the output fields in `FetchResponse`.
|
|
150
150
|
* `cookies` (boolean): Whether to include cookies in the response (default: `true`).
|
|
151
151
|
* `sessionState` (boolean): Whether to include session state in the response (default: `true`).
|
|
152
|
+
* `browser` (object): Browser engine configuration.
|
|
153
|
+
* `headless` (boolean): Run in headless mode (default: `true`).
|
|
154
|
+
* `launchOptions` (object): Playwright launch options (e.g., `{ slowMo: 50, args: [...] }`).
|
|
152
155
|
* `sessionPoolOptions` (SessionPoolOptions): Advanced configuration for the underlying Crawlee SessionPool.
|
|
153
156
|
* ...and many other options for proxy, retries, etc.
|
|
154
157
|
|
|
155
158
|
### Built-in Actions
|
|
156
159
|
|
|
157
|
-
|
|
160
|
+
The library provides a set of powerful built-in actions, many of which are engine-agnostic and handled centrally for consistency:
|
|
158
161
|
|
|
159
162
|
* `goto`: Navigates to a new URL.
|
|
160
|
-
* `click`: Clicks on an element
|
|
161
|
-
* `fill`: Fills an input field
|
|
162
|
-
* `submit`: Submits a form.
|
|
163
|
-
* `
|
|
164
|
-
* `
|
|
165
|
-
* `
|
|
166
|
-
* `
|
|
163
|
+
* `click`: Clicks on an element (Engine-specific).
|
|
164
|
+
* `fill`: Fills an input field (Engine-specific).
|
|
165
|
+
* `submit`: Submits a form (Engine-specific).
|
|
166
|
+
* `mouseMove`: Moves the mouse cursor to a specific coordinate or element (Bézier curve supported).
|
|
167
|
+
* `mouseClick`: Triggers a mouse click at the current position or specified coordinates.
|
|
168
|
+
* `keyboardType`: Simulates human-like typing into the currently focused element.
|
|
169
|
+
* `keyboardPress`: Simulates pressing a single key or a key combination.
|
|
170
|
+
* `trim`: Removes elements from the DOM to clean up the page.
|
|
171
|
+
* `waitFor`: Pauses execution to wait for a specific condition (Supports fixed timeouts centrally).
|
|
172
|
+
* `pause`: Pauses execution for manual intervention (Handled centrally).
|
|
173
|
+
* `getContent`: Retrieves the full content of the current page (Handled centrally).
|
|
174
|
+
* `evaluate`: Executes custom JavaScript within the page context.
|
|
175
|
+
* `extract`: Extracts structured data using an engine-agnostic core logic and engine-specific DOM primitives. Supports `required` fields and `strict` validation.
|
|
167
176
|
|
|
168
177
|
### Response Structure
|
|
169
178
|
|
|
@@ -176,7 +185,7 @@ The `fetchWeb` function returns an object containing:
|
|
|
176
185
|
* `cookies`: Array of cookies.
|
|
177
186
|
* `sessionState`: Crawlee session state.
|
|
178
187
|
* `text`, `html`: Page content.
|
|
179
|
-
* `outputs` (Record<string, any>): Data extracted and stored via `storeAs`.
|
|
188
|
+
* `outputs` (Record<string, any>): Data extracted and stored via `storeAs`. Note: When multiple actions store objects into the same key, they are merged instead of overwritten.
|
|
180
189
|
|
|
181
190
|
---
|
|
182
191
|
|