@cyia/crawl 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/define.d.ts +22 -4
- package/index.mjs +3 -3
- package/package.json +1 -1
package/define.d.ts
CHANGED
|
@@ -56,7 +56,10 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
|
|
|
56
56
|
}, undefined>, v.ObjectSchema<{
|
|
57
57
|
readonly type: v.LiteralSchema<"type", undefined>;
|
|
58
58
|
readonly selector: v.StringSchema<undefined>;
|
|
59
|
-
readonly text: v.StringSchema<undefined
|
|
59
|
+
readonly text: v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
60
|
+
readonly source: v.LiteralSchema<"variable", undefined>;
|
|
61
|
+
readonly key: v.UnionSchema<[v.StringSchema<undefined>, v.ArraySchema<v.StringSchema<undefined>, undefined>], undefined>;
|
|
62
|
+
}, undefined>], undefined>;
|
|
60
63
|
readonly delay: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
61
64
|
}, undefined>, v.ObjectSchema<{
|
|
62
65
|
readonly type: v.LiteralSchema<"keypress", undefined>;
|
|
@@ -154,7 +157,13 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
|
|
|
154
157
|
} | {
|
|
155
158
|
type: "type";
|
|
156
159
|
selector: string;
|
|
157
|
-
text: string
|
|
160
|
+
text: (string | {
|
|
161
|
+
source: "variable";
|
|
162
|
+
key: (string | string[] | undefined) & (string | string[]);
|
|
163
|
+
} | undefined) & (string | {
|
|
164
|
+
source: "variable";
|
|
165
|
+
key: (string | string[] | undefined) & (string | string[]);
|
|
166
|
+
});
|
|
158
167
|
delay?: number | undefined;
|
|
159
168
|
} | {
|
|
160
169
|
type: "keypress";
|
|
@@ -248,7 +257,10 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
|
|
|
248
257
|
}, undefined>, v.ObjectSchema<{
|
|
249
258
|
readonly type: v.LiteralSchema<"type", undefined>;
|
|
250
259
|
readonly selector: v.StringSchema<undefined>;
|
|
251
|
-
readonly text: v.StringSchema<undefined
|
|
260
|
+
readonly text: v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
261
|
+
readonly source: v.LiteralSchema<"variable", undefined>;
|
|
262
|
+
readonly key: v.UnionSchema<[v.StringSchema<undefined>, v.ArraySchema<v.StringSchema<undefined>, undefined>], undefined>;
|
|
263
|
+
}, undefined>], undefined>;
|
|
252
264
|
readonly delay: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
253
265
|
}, undefined>, v.ObjectSchema<{
|
|
254
266
|
readonly type: v.LiteralSchema<"keypress", undefined>;
|
|
@@ -346,7 +358,13 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
|
|
|
346
358
|
} | {
|
|
347
359
|
type: "type";
|
|
348
360
|
selector: string;
|
|
349
|
-
text: string
|
|
361
|
+
text: (string | {
|
|
362
|
+
source: "variable";
|
|
363
|
+
key: (string | string[] | undefined) & (string | string[]);
|
|
364
|
+
} | undefined) & (string | {
|
|
365
|
+
source: "variable";
|
|
366
|
+
key: (string | string[] | undefined) & (string | string[]);
|
|
367
|
+
});
|
|
350
368
|
delay?: number | undefined;
|
|
351
369
|
} | {
|
|
352
370
|
type: "keypress";
|
package/index.mjs
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import ie from"puppeteer-core";import{ElementHandle as
|
|
2
|
-
`,"ShiftLeft","ShiftRight","ControlLeft","ControlRight","AltLeft","AltRight","Pause","CapsLock","Escape","Convert","NonConvert","Space","Numpad9","PageUp","Numpad3","PageDown","End","Numpad1","Home","Numpad7","ArrowLeft","Numpad4","Numpad8","ArrowUp","ArrowRight","Numpad6","Numpad2","ArrowDown","Select","Open","PrintScreen","Insert","Numpad0","Delete","NumpadDecimal","Digit0","Digit1","Digit2","Digit3","Digit4","Digit5","Digit6","Digit7","Digit8","Digit9","KeyA","KeyB","KeyC","KeyD","KeyE","KeyF","KeyG","KeyH","KeyI","KeyJ","KeyK","KeyL","KeyM","KeyN","KeyO","KeyP","KeyQ","KeyR","KeyS","KeyT","KeyU","KeyV","KeyW","KeyX","KeyY","KeyZ","MetaLeft","MetaRight","ContextMenu","NumpadMultiply","NumpadAdd","NumpadSubtract","NumpadDivide","F1","F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","NumLock","ScrollLock","AudioVolumeMute","AudioVolumeDown","AudioVolumeUp","MediaTrackNext","MediaTrackPrevious","MediaStop","MediaPlayPause","Semicolon","Equal","NumpadEqual","Comma","Minus","Period","Slash","Backquote","BracketLeft","Backslash","BracketRight","Quote","AltGraph","Props","Cancel","Clear","Shift","Control","Alt","Accept","ModeChange"," ","Print","Execute","\0","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","Meta","*","+","-","/",";","=",",",".","`","[","\\","]","'","Attn","CrSel","ExSel","EraseEof","Play","ZoomOut",")","!","@","#","$","%","^","&","(","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z",":","<","_",">","?","~","{",",","}",'"',"SoftLeft","SoftRight","Camera","Call","EndCall","VolumeDown","VolumeUp"]),c=e.optional(e.number()),f=e.optional(e.boolean()),
|
|
3
|
-
${JSON.stringify(t.issues)}`);return this.openPage(async r=>{if(i)for(let n in i)r.setVariable(n,i[n]);return r.exeQueue(t.output)})}};export{
|
|
1
|
+
import ie from"puppeteer-core";import{ElementHandle as B}from"puppeteer-core";import{load as M}from"cheerio";function b(a,o){let i=M(a,void 0,!0),t=i("body");return t.find("script,style,iframe,footer,br,hr,svg").remove(),t.find("*").removeAttr("class"),t.find("*").removeAttr("style"),i("*").contents().filter(function(){return this.type==="comment"||this.type==="text"&&!this.data.trim()}).remove(),i("*").contents().filter(function(){return this.type==="text"&&!!this.data.trim()}).text((r,n)=>n.trim()),o?t.text():t.html()}import{promise as U}from"fastq";var u=class{page;browser;parent;#e={};constructor(o,i,t){this.page=o,this.browser=i,this.parent=t}ab;timeoutId;setMaxTimeout(o){this.ab=new AbortController,this.timeoutId=setTimeout(()=>{this.ab.abort("timeout")},o)}clearTimeout(){clearTimeout(this.timeoutId)}setVariable(o,i){this.#e[o]=i}getVariable(o){return this.#e[o]}#t(o,i){let t,r=!1;for(let n=0;n<i.length;n++){let l=i[n];if(l===".."){if(!o.parent)throw new Error("未找到父级");o=o.parent}else if(r){if(!t||typeof t!="object")throw new Error(`${i}路径下未找到值`);t=t[l]}else t=o.#e[l],r=!0}return t}#o(o){if(typeof o=="string")return o;if(o.source==="variable")return typeof o.key=="string"?this.#e[o.key]:this.#t(this,o.key)}async exeQueue(o){let i;for(let t of o)switch(console.log("准备执行",t),t.type){case"click":await this.page.click(t.selector,{offset:t.offset,delay:t.delay,count:t.count});break;case"type":{await this.page.type(t.selector,this.#o(t.text),{delay:t.delay});break}case"goto":{i=await this.page.goto(this.#o(t.url),{waitUntil:t.waitUntil,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"setViewport":{i=await this.page.setViewport({width:t.width,height:t.height,isMobile:t.isMobile,isLandscape:t.isLandscape});break}case"wait":{switch(t.config.mode){case"selector":{i=await this.page.waitForSelector(t.config.selector,{visible:t.config.visible,hidden:t.config.hidden,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"request":{let r=t.config;i=await this.page.waitForRequest(async n=>!(r.urlRegexp&&!r.urlRegexp.test(n.url())||r.method&&r.method!==n.method()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"response":{let r=t.config;i=await this.page.waitForResponse(async n=>!(r.urlRegexp&&!r.urlRegexp.test(n.url())||r.status&&r.status!==n.status()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"networkIdle":{i=await this.page.waitForNetworkIdle({idleTime:t.config.idleTime,concurrency:t.config.concurrency,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"navigation":{i=await this.page.waitForNavigation({signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}}break}case"selector":{t.multi?i=this.#e[t.output]=await this.page.$$(t.selector):i=this.#e[t.output]=await this.page.$(t.selector);break}case"keypress":{await this.page.keyboard.press(t.key,{delay:t.delay});break}case"findData":{let r=this.#e[t.input];Array.isArray(r)?t.kind==="property"&&(i=this.#e[t.output]=await Promise.all(r.map(n=>n.getProperty(t.key).then(l=>l.jsonValue())))):r instanceof B&&(i=this.#e[t.output]=r.getProperty(t.key).then(n=>n.jsonValue()));break}case"getContent":{let r=await this.page.content();i=this.#e[t.output]=t.cleanContent?b(r,t.format==="text"):r;break}case"page":{let r=this.#e[t.input],n=Array.isArray(r)?r:[r],l=U(s=>(s.page.setVariable("$item",n[s.index]),s.page.setVariable("$index",s.index),s.page.setVariable("$first",s.index===0),s.page.setVariable("$last",s.index===n.length-1),s.page.exeQueue(t.actions)),t.concurrency),g;l.error(s=>{s&&(g=s)});let y=[];for(let s=0;s<n.length;s++)y.push(await this.browser.openPage(async L=>l.push({page:L,index:s}),this).catch(()=>{}));if(await l.drained(),t.throwError&&g)throw g;i=y,t.output&&(this.#e[t.output]=i);break}case"setUserAgent":{await this.page.setUserAgent(t.userAgent);break}case"close":{await this.page.close({runBeforeUnload:!1}),this.clearTimeout();break}case"custom":{if(typeof t.fn=="function")i=await t.fn(this);else{let r=this.browser.getCustom(t.config.type);if(!r)throw new Error(`自定义[${t.config.type}]未实现处理`);i=await r(t.config,this)}break}default:break}return i}};import*as e from"valibot";var W=e.picklist(["0","1","2","3","4","5","6","7","8","9","Power","Eject","Abort","Help","Backspace","Tab","Numpad5","NumpadEnter","Enter","\r",`
|
|
2
|
+
`,"ShiftLeft","ShiftRight","ControlLeft","ControlRight","AltLeft","AltRight","Pause","CapsLock","Escape","Convert","NonConvert","Space","Numpad9","PageUp","Numpad3","PageDown","End","Numpad1","Home","Numpad7","ArrowLeft","Numpad4","Numpad8","ArrowUp","ArrowRight","Numpad6","Numpad2","ArrowDown","Select","Open","PrintScreen","Insert","Numpad0","Delete","NumpadDecimal","Digit0","Digit1","Digit2","Digit3","Digit4","Digit5","Digit6","Digit7","Digit8","Digit9","KeyA","KeyB","KeyC","KeyD","KeyE","KeyF","KeyG","KeyH","KeyI","KeyJ","KeyK","KeyL","KeyM","KeyN","KeyO","KeyP","KeyQ","KeyR","KeyS","KeyT","KeyU","KeyV","KeyW","KeyX","KeyY","KeyZ","MetaLeft","MetaRight","ContextMenu","NumpadMultiply","NumpadAdd","NumpadSubtract","NumpadDivide","F1","F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","NumLock","ScrollLock","AudioVolumeMute","AudioVolumeDown","AudioVolumeUp","MediaTrackNext","MediaTrackPrevious","MediaStop","MediaPlayPause","Semicolon","Equal","NumpadEqual","Comma","Minus","Period","Slash","Backquote","BracketLeft","Backslash","BracketRight","Quote","AltGraph","Props","Cancel","Clear","Shift","Control","Alt","Accept","ModeChange"," ","Print","Execute","\0","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","Meta","*","+","-","/",";","=",",",".","`","[","\\","]","'","Attn","CrSel","ExSel","EraseEof","Play","ZoomOut",")","!","@","#","$","%","^","&","(","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z",":","<","_",">","?","~","{",",","}",'"',"SoftLeft","SoftRight","Camera","Call","EndCall","VolumeDown","VolumeUp"]),c=e.optional(e.number()),f=e.optional(e.boolean()),G=e.optional(e.string());var $=c,H=e.pipe(e.string(),e.transform(a=>new RegExp(a))),q=e.pipe(e.tuple([e.string(),e.pipe(e.string())]),e.transform(([a,o])=>new RegExp(a,o))),v=e.union([H,q]),p=e.string(),h=e.union([e.string(),e.object({source:e.literal("variable"),key:e.union([e.string(),e.array(e.string())])})]),d=e.object({timeout:$,waitUntil:e.optional(e.picklist(["load","domcontentloaded","networkidle0","networkidle2"]),"networkidle2"),url:h,type:e.literal("goto")}),w=e.object({width:e.optional(e.number(),1920),height:e.optional(e.number(),1080),isMobile:e.optional(e.boolean()),isLandscape:e.optional(e.boolean()),type:e.literal("setViewport")}),x=e.object({userAgent:e.string(),type:e.literal("setUserAgent")}),Q=e.object({selector:p,visible:f,hidden:f}),J=e.object({mode:e.literal("selector"),...Q.entries}),Y=e.object({mode:e.literal("request"),urlRegexp:v,method:G}),Z=e.object({mode:e.literal("response"),urlRegexp:e.optional(v),status:c}),z=e.object({mode:e.literal("networkIdle"),idleTime:c,concurrency:c}),X=e.object({mode:e.literal("navigation")}),k=e.object({type:e.literal("wait"),config:e.variant("mode",[J,Y,Z,z,X])}),A=e.object({type:e.literal("click"),selector:p,offset:e.optional(e.object({x:e.number(),y:e.number()})),delay:c,count:c}),C=e.object({type:e.literal("type"),selector:p,text:h,delay:c}),P=e.object({type:e.literal("keypress"),key:W,delay:c}),T=e.object({type:e.literal("selector"),selector:p,output:e.string(),multi:e.optional(e.boolean(),!1)}),E=e.object({type:e.literal("findData"),input:e.string(),output:e.string(),kind:e.picklist(["property"]),key:e.optional(e.string())}),D=e.object({type:e.literal("getContent"),format:e.optional(e.picklist(["html","text"]),"html"),cleanContent:f,output:e.string()}),I=e.object({type:e.literal("close")}),F=e.object({type:e.literal("custom"),config:e.optional(e.looseObject({type:e.string()})),fn:e.optional(e.custom(Boolean))}),_=e.object({type:e.literal("page"),input:e.string(),output:e.optional(e.string()),concurrency:e.optional(e.number(),2),throwError:e.optional(e.boolean(),!1),actions:e.lazy(()=>e.array(S))}),ee=[...[d,w,x,k,A,C,P,T,E,D,I,F].map(a=>a.entries.type.literal),"page"],S=e.fallback(e.union([d,w,x,k,A,C,P,T,E,D,_,I,F]),a=>{if(ee.includes((a?.value).type))throw new Error(JSON.stringify(a?.issues));return{type:"custom",config:a?.value}}),R=e.array(S),me=e.object({maxTimeout:c,actionTimeout:c});import*as j from"valibot";import{Browser as re,computeExecutablePath as ne}from"@puppeteer/browsers";import*as O from"fs";import{Browser as N,computeExecutablePath as te,install as oe}from"@puppeteer/browsers";async function V(a){let o=await oe({browser:N.CHROME,baseUrl:"https://cdn.npmmirror.com/binaries/chrome-for-testing",...a,unpack:!0})}function he(a,o){return te({cacheDir:a,browser:N.CHROME,buildId:o})}import{PUPPETEER_REVISIONS as ae}from"puppeteer-core/internal/revisions.js";async function Fe(a){return m.init(a)}var K=ae.chrome,m=class a{browser;static async init(o){let i=re.CHROME,t=ne({cacheDir:o.cacheDir,browser:i,buildId:K});O.existsSync(t)||(console.log("准备下载"),await V({cacheDir:o.cacheDir,buildId:K,browser:i}));let r=await ie.launch({...o,executablePath:t});return new a(r)}constructor(o){this.browser=o}#e;#t=new Map;setConfig(o){this.#e=o}getConfig(){return this.#e}registerCustom(o,i){this.#t.set(o,i)}clearCustom(){this.#t.clear()}getCustom(o){return this.#t.get(o)}async openPage(o,i){let t=new u(await this.browser.newPage(),this,i);return this.#e?.maxTimeout&&t.setMaxTimeout(this.#e.maxTimeout),o(t)}runQueue(o,i){let t=j.safeParse(R,o);if(!t.success)throw new Error(`解析配置错误
|
|
3
|
+
${JSON.stringify(t.issues)}`);return this.openPage(async r=>{if(i)for(let n in i)r.setVariable(n,i[n]);return r.exeQueue(t.output)})}};export{S as ActionDefine,R as ActionListDefine,me as GlobalConfig,m as WebBrowser,u as WebPage,V as download,he as getExecutablePath,Fe as init};
|