@cyia/crawl 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/define.d.ts +2 -1
- package/index.mjs +1 -1
- package/package.json +1 -1
package/define.d.ts
CHANGED
|
@@ -383,7 +383,8 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
|
|
|
383
383
|
type: "custom";
|
|
384
384
|
config: any;
|
|
385
385
|
}>, undefined>;
|
|
386
|
-
|
|
386
|
+
type ActionType = typeof ActionDefine;
|
|
387
|
+
export type ActionItemInputType = typeof ActionDefine;
|
|
387
388
|
export type QueueList = v.InferInput<typeof ActionDefine>[];
|
|
388
389
|
export declare const GlobalConfig: v.ObjectSchema<{
|
|
389
390
|
readonly maxTimeout: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
package/index.mjs
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import ie from"puppeteer-core";import{ElementHandle as M}from"puppeteer-core";import{load as L}from"cheerio";function b(a,o){let i=L(a,void 0,!0),t=i("body");return t.find("script,style,iframe,footer,br,hr,svg").remove(),t.find("*").removeAttr("class"),t.find("*").removeAttr("style"),i("*").contents().filter(function(){return this.type==="comment"||this.type==="text"&&!this.data.trim()}).remove(),i("*").contents().filter(function(){return this.type==="text"&&!!this.data.trim()}).text((r,n)=>n.trim()),o?t.text():t.html()}import{promise as U}from"fastq";var u=class{page;browser;parent;#e={};constructor(o,i,t){this.page=o,this.browser=i,this.parent=t}ab;timeoutId;setMaxTimeout(o){this.ab=new AbortController,this.timeoutId=setTimeout(()=>{this.ab.abort("timeout")},o)}clearTimeout(){clearTimeout(this.timeoutId)}setVariable(o,i){this.#e[o]=i}getVariable(o){return this.#e[o]}#t(o,i){let t,r=!1;for(let n=0;n<i.length;n++){let l=i[n];if(l===".."){if(!o.parent)throw new Error("未找到父级");o=o.parent}else if(r){if(!t||typeof t!="object")throw new Error(`${i}路径下未找到值`);t=t[l]}else t=o.#e[l],r=!0}return t}#o(o){if(typeof o=="string")return o;if(o.source==="variable")return typeof o.key=="string"?this.#e[o.key]:this.#t(this,o.key)}async exeQueue(o){let i;for(let t of o)switch(console.log("准备执行",t),t.type){case"click":await this.page.click(t.selector,{offset:t.offset,delay:t.delay,count:t.count});break;case"type":{await this.page.type(t.selector,t.text,{delay:t.delay});break}case"goto":{i=await this.page.goto(this.#o(t.url),{waitUntil:t.waitUntil,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"setViewport":{i=await this.page.setViewport({width:t.width,height:t.height,isMobile:t.isMobile,isLandscape:t.isLandscape});break}case"wait":{switch(t.config.mode){case"selector":{i=await this.page.waitForSelector(t.config.selector,{visible:t.config.visible,hidden:t.config.hidden,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"request":{let r=t.config;i=await this.page.waitForRequest(async n=>!(r.urlRegexp&&!r.urlRegexp.test(n.url())||r.method&&r.method!==n.method()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"response":{let r=t.config;i=await this.page.waitForResponse(async n=>!(r.urlRegexp&&!r.urlRegexp.test(n.url())||r.status&&r.status!==n.status()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"networkIdle":{i=await this.page.waitForNetworkIdle({idleTime:t.config.idleTime,concurrency:t.config.concurrency,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"navigation":{i=await this.page.waitForNavigation({signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}}break}case"selector":{t.multi?i=this.#e[t.output]=await this.page.$$(t.selector):i=this.#e[t.output]=await this.page.$(t.selector);break}case"keypress":{await this.page.keyboard.press(t.key,{delay:t.delay});break}case"findData":{let r=this.#e[t.input];Array.isArray(r)?t.kind==="property"&&(i=this.#e[t.output]=await Promise.all(r.map(n=>n.getProperty(t.key).then(l=>l.jsonValue())))):r instanceof M&&(i=this.#e[t.output]=r.getProperty(t.key).then(n=>n.jsonValue()));break}case"getContent":{let r=await this.page.content();i=this.#e[t.output]=t.cleanContent?b(r,t.format==="text"):r;break}case"page":{let r=this.#e[t.input],n=Array.isArray(r)?r:[r],l=U(s=>(s.page.setVariable("$item",n[s.index]),s.page.setVariable("$index",s.index),s.page.setVariable("$first",s.index===0),s.page.setVariable("$last",s.index===n.length-1),s.page.exeQueue(t.actions)),t.concurrency),g;l.error(s=>{s&&(g=s)});let y=[];for(let s=0;s<n.length;s++)y.push(await this.browser.openPage(async O=>l.push({page:O,index:s}),this).catch(()=>{}));if(await l.drained(),t.throwError&&g)throw g;i=y;break}case"setUserAgent":{await this.page.setUserAgent(t.userAgent);break}case"close":{await this.page.close({runBeforeUnload:!1}),this.clearTimeout();break}case"custom":{if(typeof t.fn=="function")i=await t.fn(this);else{let r=this.browser.getCustom(t.config.type);if(!r)throw new Error(`自定义[${t.config.type}]未实现处理`);i=await r(t.config,this)}break}default:break}return i}};import*as e from"valibot";var B=e.picklist(["0","1","2","3","4","5","6","7","8","9","Power","Eject","Abort","Help","Backspace","Tab","Numpad5","NumpadEnter","Enter","\r",`
|
|
2
|
-
`,"ShiftLeft","ShiftRight","ControlLeft","ControlRight","AltLeft","AltRight","Pause","CapsLock","Escape","Convert","NonConvert","Space","Numpad9","PageUp","Numpad3","PageDown","End","Numpad1","Home","Numpad7","ArrowLeft","Numpad4","Numpad8","ArrowUp","ArrowRight","Numpad6","Numpad2","ArrowDown","Select","Open","PrintScreen","Insert","Numpad0","Delete","NumpadDecimal","Digit0","Digit1","Digit2","Digit3","Digit4","Digit5","Digit6","Digit7","Digit8","Digit9","KeyA","KeyB","KeyC","KeyD","KeyE","KeyF","KeyG","KeyH","KeyI","KeyJ","KeyK","KeyL","KeyM","KeyN","KeyO","KeyP","KeyQ","KeyR","KeyS","KeyT","KeyU","KeyV","KeyW","KeyX","KeyY","KeyZ","MetaLeft","MetaRight","ContextMenu","NumpadMultiply","NumpadAdd","NumpadSubtract","NumpadDivide","F1","F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","NumLock","ScrollLock","AudioVolumeMute","AudioVolumeDown","AudioVolumeUp","MediaTrackNext","MediaTrackPrevious","MediaStop","MediaPlayPause","Semicolon","Equal","NumpadEqual","Comma","Minus","Period","Slash","Backquote","BracketLeft","Backslash","BracketRight","Quote","AltGraph","Props","Cancel","Clear","Shift","Control","Alt","Accept","ModeChange"," ","Print","Execute","\0","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","Meta","*","+","-","/",";","=",",",".","`","[","\\","]","'","Attn","CrSel","ExSel","EraseEof","Play","ZoomOut",")","!","@","#","$","%","^","&","(","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z",":","<","_",">","?","~","{",",","}",'"',"SoftLeft","SoftRight","Camera","Call","EndCall","VolumeDown","VolumeUp"]),c=e.optional(e.number()),f=e.optional(e.boolean()),W=e.optional(e.string());var G=c,$=e.pipe(e.string(),e.transform(a=>new RegExp(a))),H=e.pipe(e.tuple([e.string(),e.pipe(e.string())]),e.transform(([a,o])=>new RegExp(a,o))),v=e.union([$,H]),p=e.string(),q=e.union([e.string(),e.object({source:e.literal("variable"),key:e.union([e.string(),e.array(e.string())])})]),h=e.object({timeout:G,waitUntil:e.optional(e.picklist(["load","domcontentloaded","networkidle0","networkidle2"]),"networkidle2"),url:q,type:e.literal("goto")}),d=e.object({width:e.optional(e.number(),1920),height:e.optional(e.number(),1080),isMobile:e.optional(e.boolean()),isLandscape:e.optional(e.boolean()),type:e.literal("setViewport")}),w=e.object({userAgent:e.string(),type:e.literal("setUserAgent")}),Q=e.object({selector:p,visible:f,hidden:f}),J=e.object({mode:e.literal("selector"),...Q.entries}),Y=e.object({mode:e.literal("request"),urlRegexp:v,method:W}),Z=e.object({mode:e.literal("response"),urlRegexp:e.optional(v),status:c}),_=e.object({mode:e.literal("networkIdle"),idleTime:c,concurrency:c}),z=e.object({mode:e.literal("navigation")}),k=e.object({type:e.literal("wait"),config:e.variant("mode",[J,Y,Z,_,z])}),x=e.object({type:e.literal("click"),selector:p,offset:e.optional(e.object({x:e.number(),y:e.number()})),delay:c,count:c}),
|
|
2
|
+
`,"ShiftLeft","ShiftRight","ControlLeft","ControlRight","AltLeft","AltRight","Pause","CapsLock","Escape","Convert","NonConvert","Space","Numpad9","PageUp","Numpad3","PageDown","End","Numpad1","Home","Numpad7","ArrowLeft","Numpad4","Numpad8","ArrowUp","ArrowRight","Numpad6","Numpad2","ArrowDown","Select","Open","PrintScreen","Insert","Numpad0","Delete","NumpadDecimal","Digit0","Digit1","Digit2","Digit3","Digit4","Digit5","Digit6","Digit7","Digit8","Digit9","KeyA","KeyB","KeyC","KeyD","KeyE","KeyF","KeyG","KeyH","KeyI","KeyJ","KeyK","KeyL","KeyM","KeyN","KeyO","KeyP","KeyQ","KeyR","KeyS","KeyT","KeyU","KeyV","KeyW","KeyX","KeyY","KeyZ","MetaLeft","MetaRight","ContextMenu","NumpadMultiply","NumpadAdd","NumpadSubtract","NumpadDivide","F1","F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","NumLock","ScrollLock","AudioVolumeMute","AudioVolumeDown","AudioVolumeUp","MediaTrackNext","MediaTrackPrevious","MediaStop","MediaPlayPause","Semicolon","Equal","NumpadEqual","Comma","Minus","Period","Slash","Backquote","BracketLeft","Backslash","BracketRight","Quote","AltGraph","Props","Cancel","Clear","Shift","Control","Alt","Accept","ModeChange"," ","Print","Execute","\0","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","Meta","*","+","-","/",";","=",",",".","`","[","\\","]","'","Attn","CrSel","ExSel","EraseEof","Play","ZoomOut",")","!","@","#","$","%","^","&","(","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z",":","<","_",">","?","~","{",",","}",'"',"SoftLeft","SoftRight","Camera","Call","EndCall","VolumeDown","VolumeUp"]),c=e.optional(e.number()),f=e.optional(e.boolean()),W=e.optional(e.string());var G=c,$=e.pipe(e.string(),e.transform(a=>new RegExp(a))),H=e.pipe(e.tuple([e.string(),e.pipe(e.string())]),e.transform(([a,o])=>new RegExp(a,o))),v=e.union([$,H]),p=e.string(),q=e.union([e.string(),e.object({source:e.literal("variable"),key:e.union([e.string(),e.array(e.string())])})]),h=e.object({timeout:G,waitUntil:e.optional(e.picklist(["load","domcontentloaded","networkidle0","networkidle2"]),"networkidle2"),url:q,type:e.literal("goto")}),d=e.object({width:e.optional(e.number(),1920),height:e.optional(e.number(),1080),isMobile:e.optional(e.boolean()),isLandscape:e.optional(e.boolean()),type:e.literal("setViewport")}),w=e.object({userAgent:e.string(),type:e.literal("setUserAgent")}),Q=e.object({selector:p,visible:f,hidden:f}),J=e.object({mode:e.literal("selector"),...Q.entries}),Y=e.object({mode:e.literal("request"),urlRegexp:v,method:W}),Z=e.object({mode:e.literal("response"),urlRegexp:e.optional(v),status:c}),_=e.object({mode:e.literal("networkIdle"),idleTime:c,concurrency:c}),z=e.object({mode:e.literal("navigation")}),k=e.object({type:e.literal("wait"),config:e.variant("mode",[J,Y,Z,_,z])}),x=e.object({type:e.literal("click"),selector:p,offset:e.optional(e.object({x:e.number(),y:e.number()})),delay:c,count:c}),A=e.object({type:e.literal("type"),selector:p,text:e.string(),delay:c}),C=e.object({type:e.literal("keypress"),key:B,delay:c}),P=e.object({type:e.literal("selector"),selector:p,output:e.string(),multi:e.optional(e.boolean(),!1)}),T=e.object({type:e.literal("findData"),input:e.string(),output:e.string(),kind:e.picklist(["property"]),key:e.optional(e.string())}),E=e.object({type:e.literal("getContent"),format:e.optional(e.picklist(["html","text"]),"html"),cleanContent:f,output:e.string()}),D=e.object({type:e.literal("close")}),I=e.object({type:e.literal("custom"),config:e.optional(e.looseObject({type:e.string()})),fn:e.optional(e.custom(Boolean))}),X=e.object({type:e.literal("page"),input:e.string(),concurrency:e.optional(e.number(),2),throwError:e.optional(e.boolean(),!1),actions:e.lazy(()=>e.array(F))}),ee=[...[h,d,w,k,x,A,C,P,T,E,D,I].map(a=>a.entries.type.literal),"page"];var F=e.fallback(e.union([h,d,w,k,x,A,C,P,T,E,X,D,I]),a=>{if(ee.includes((a?.value).type))throw new Error(JSON.stringify(a?.issues));return{type:"custom",config:a?.value}}),S=e.array(F),me=e.object({maxTimeout:c,actionTimeout:c});import*as K from"valibot";import{Browser as re,computeExecutablePath as ne}from"@puppeteer/browsers";import*as j from"fs";import{Browser as R,computeExecutablePath as te,install as oe}from"@puppeteer/browsers";async function V(a){let o=await oe({browser:R.CHROME,baseUrl:"https://cdn.npmmirror.com/binaries/chrome-for-testing",...a,unpack:!0})}function he(a,o){return te({cacheDir:a,browser:R.CHROME,buildId:o})}import{PUPPETEER_REVISIONS as ae}from"puppeteer-core/internal/revisions.js";async function Fe(a){return m.init(a)}var N=ae.chrome,m=class a{browser;static async init(o){let i=re.CHROME,t=ne({cacheDir:o.cacheDir,browser:i,buildId:N});j.existsSync(t)||(console.log("准备下载"),await V({cacheDir:o.cacheDir,buildId:N,browser:i}));let r=await ie.launch({...o,executablePath:t});return new a(r)}constructor(o){this.browser=o}#e;#t=new Map;setConfig(o){this.#e=o}getConfig(){return this.#e}registerCustom(o,i){this.#t.set(o,i)}clearCustom(){this.#t.clear()}getCustom(o){return this.#t.get(o)}async openPage(o,i){let t=new u(await this.browser.newPage(),this,i);return this.#e?.maxTimeout&&t.setMaxTimeout(this.#e.maxTimeout),o(t)}runQueue(o,i){let t=K.safeParse(S,o);if(!t.success)throw new Error(`解析配置错误
|
|
3
3
|
${JSON.stringify(t.issues)}`);return this.openPage(async r=>{if(i)for(let n in i)r.setVariable(n,i[n]);return r.exeQueue(t.output)})}};export{F as ActionDefine,S as ActionListDefine,me as GlobalConfig,m as WebBrowser,u as WebPage,V as download,he as getExecutablePath,Fe as init};
|