@cyia/crawl 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/define.d.ts CHANGED
@@ -56,7 +56,10 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
56
56
  }, undefined>, v.ObjectSchema<{
57
57
  readonly type: v.LiteralSchema<"type", undefined>;
58
58
  readonly selector: v.StringSchema<undefined>;
59
- readonly text: v.StringSchema<undefined>;
59
+ readonly text: v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
60
+ readonly source: v.LiteralSchema<"variable", undefined>;
61
+ readonly key: v.UnionSchema<[v.StringSchema<undefined>, v.ArraySchema<v.StringSchema<undefined>, undefined>], undefined>;
62
+ }, undefined>], undefined>;
60
63
  readonly delay: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
61
64
  }, undefined>, v.ObjectSchema<{
62
65
  readonly type: v.LiteralSchema<"keypress", undefined>;
@@ -75,7 +78,7 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
75
78
  readonly key: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
76
79
  }, undefined>, v.ObjectSchema<{
77
80
  readonly type: v.LiteralSchema<"getContent", undefined>;
78
- readonly format: v.OptionalSchema<v.PicklistSchema<["html", "text"], undefined>, "html">;
81
+ readonly format: v.OptionalSchema<v.PicklistSchema<["html", "text", "markdown"], undefined>, "html">;
79
82
  readonly cleanContent: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
80
83
  readonly output: v.StringSchema<undefined>;
81
84
  }, undefined>, v.GenericSchema<{
@@ -154,7 +157,13 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
154
157
  } | {
155
158
  type: "type";
156
159
  selector: string;
157
- text: string;
160
+ text: (string | {
161
+ source: "variable";
162
+ key: (string | string[] | undefined) & (string | string[]);
163
+ } | undefined) & (string | {
164
+ source: "variable";
165
+ key: (string | string[] | undefined) & (string | string[]);
166
+ });
158
167
  delay?: number | undefined;
159
168
  } | {
160
169
  type: "keypress";
@@ -173,7 +182,7 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
173
182
  key?: string | undefined;
174
183
  } | {
175
184
  type: "getContent";
176
- format: "text" | "html";
185
+ format: "html" | "text" | "markdown";
177
186
  cleanContent?: boolean | undefined;
178
187
  output: string;
179
188
  } | {
@@ -248,7 +257,10 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
248
257
  }, undefined>, v.ObjectSchema<{
249
258
  readonly type: v.LiteralSchema<"type", undefined>;
250
259
  readonly selector: v.StringSchema<undefined>;
251
- readonly text: v.StringSchema<undefined>;
260
+ readonly text: v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
261
+ readonly source: v.LiteralSchema<"variable", undefined>;
262
+ readonly key: v.UnionSchema<[v.StringSchema<undefined>, v.ArraySchema<v.StringSchema<undefined>, undefined>], undefined>;
263
+ }, undefined>], undefined>;
252
264
  readonly delay: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
253
265
  }, undefined>, v.ObjectSchema<{
254
266
  readonly type: v.LiteralSchema<"keypress", undefined>;
@@ -267,7 +279,7 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
267
279
  readonly key: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
268
280
  }, undefined>, v.ObjectSchema<{
269
281
  readonly type: v.LiteralSchema<"getContent", undefined>;
270
- readonly format: v.OptionalSchema<v.PicklistSchema<["html", "text"], undefined>, "html">;
282
+ readonly format: v.OptionalSchema<v.PicklistSchema<["html", "text", "markdown"], undefined>, "html">;
271
283
  readonly cleanContent: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
272
284
  readonly output: v.StringSchema<undefined>;
273
285
  }, undefined>, v.GenericSchema<{
@@ -346,7 +358,13 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
346
358
  } | {
347
359
  type: "type";
348
360
  selector: string;
349
- text: string;
361
+ text: (string | {
362
+ source: "variable";
363
+ key: (string | string[] | undefined) & (string | string[]);
364
+ } | undefined) & (string | {
365
+ source: "variable";
366
+ key: (string | string[] | undefined) & (string | string[]);
367
+ });
350
368
  delay?: number | undefined;
351
369
  } | {
352
370
  type: "keypress";
@@ -365,7 +383,7 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
365
383
  key?: string | undefined;
366
384
  } | {
367
385
  type: "getContent";
368
- format: "text" | "html";
386
+ format: "html" | "text" | "markdown";
369
387
  cleanContent?: boolean | undefined;
370
388
  output: string;
371
389
  } | {
package/format.d.ts CHANGED
@@ -1 +1,4 @@
1
- export declare function format(rawHtml: string, returnText?: boolean): string;
1
+ export declare function format(rawHtml: string, options: {
2
+ cleanContent?: boolean;
3
+ format: 'html' | 'text' | 'markdown';
4
+ }): string | undefined;
package/index.d.ts CHANGED
@@ -2,3 +2,4 @@ export * from './init';
2
2
  export * from './define';
3
3
  export * from './page';
4
4
  export * from './download';
5
+ export * from './format';
package/index.mjs CHANGED
@@ -1,3 +1,3 @@
1
- import ie from"puppeteer-core";import{ElementHandle as M}from"puppeteer-core";import{load as L}from"cheerio";function b(a,o){let i=L(a,void 0,!0),t=i("body");return t.find("script,style,iframe,footer,br,hr,svg").remove(),t.find("*").removeAttr("class"),t.find("*").removeAttr("style"),i("*").contents().filter(function(){return this.type==="comment"||this.type==="text"&&!this.data.trim()}).remove(),i("*").contents().filter(function(){return this.type==="text"&&!!this.data.trim()}).text((r,n)=>n.trim()),o?t.text():t.html()}import{promise as B}from"fastq";var u=class{page;browser;parent;#e={};constructor(o,i,t){this.page=o,this.browser=i,this.parent=t}ab;timeoutId;setMaxTimeout(o){this.ab=new AbortController,this.timeoutId=setTimeout(()=>{this.ab.abort("timeout")},o)}clearTimeout(){clearTimeout(this.timeoutId)}setVariable(o,i){this.#e[o]=i}getVariable(o){return this.#e[o]}#t(o,i){let t,r=!1;for(let n=0;n<i.length;n++){let l=i[n];if(l===".."){if(!o.parent)throw new Error("未找到父级");o=o.parent}else if(r){if(!t||typeof t!="object")throw new Error(`${i}路径下未找到值`);t=t[l]}else t=o.#e[l],r=!0}return t}#o(o){if(typeof o=="string")return o;if(o.source==="variable")return typeof o.key=="string"?this.#e[o.key]:this.#t(this,o.key)}async exeQueue(o){let i;for(let t of o)switch(console.log("准备执行",t),t.type){case"click":await this.page.click(t.selector,{offset:t.offset,delay:t.delay,count:t.count});break;case"type":{await this.page.type(t.selector,t.text,{delay:t.delay});break}case"goto":{i=await this.page.goto(this.#o(t.url),{waitUntil:t.waitUntil,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"setViewport":{i=await this.page.setViewport({width:t.width,height:t.height,isMobile:t.isMobile,isLandscape:t.isLandscape});break}case"wait":{switch(t.config.mode){case"selector":{i=await this.page.waitForSelector(t.config.selector,{visible:t.config.visible,hidden:t.config.hidden,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"request":{let r=t.config;i=await this.page.waitForRequest(async n=>!(r.urlRegexp&&!r.urlRegexp.test(n.url())||r.method&&r.method!==n.method()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"response":{let r=t.config;i=await this.page.waitForResponse(async n=>!(r.urlRegexp&&!r.urlRegexp.test(n.url())||r.status&&r.status!==n.status()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"networkIdle":{i=await this.page.waitForNetworkIdle({idleTime:t.config.idleTime,concurrency:t.config.concurrency,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"navigation":{i=await this.page.waitForNavigation({signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}}break}case"selector":{t.multi?i=this.#e[t.output]=await this.page.$$(t.selector):i=this.#e[t.output]=await this.page.$(t.selector);break}case"keypress":{await this.page.keyboard.press(t.key,{delay:t.delay});break}case"findData":{let r=this.#e[t.input];Array.isArray(r)?t.kind==="property"&&(i=this.#e[t.output]=await Promise.all(r.map(n=>n.getProperty(t.key).then(l=>l.jsonValue())))):r instanceof M&&(i=this.#e[t.output]=r.getProperty(t.key).then(n=>n.jsonValue()));break}case"getContent":{let r=await this.page.content();i=this.#e[t.output]=t.cleanContent?b(r,t.format==="text"):r;break}case"page":{let r=this.#e[t.input],n=Array.isArray(r)?r:[r],l=B(s=>(s.page.setVariable("$item",n[s.index]),s.page.setVariable("$index",s.index),s.page.setVariable("$first",s.index===0),s.page.setVariable("$last",s.index===n.length-1),s.page.exeQueue(t.actions)),t.concurrency),g;l.error(s=>{s&&(g=s)});let y=[];for(let s=0;s<n.length;s++)y.push(await this.browser.openPage(async O=>l.push({page:O,index:s}),this).catch(()=>{}));if(await l.drained(),t.throwError&&g)throw g;i=y,t.output&&(this.#e[t.output]=i);break}case"setUserAgent":{await this.page.setUserAgent(t.userAgent);break}case"close":{await this.page.close({runBeforeUnload:!1}),this.clearTimeout();break}case"custom":{if(typeof t.fn=="function")i=await t.fn(this);else{let r=this.browser.getCustom(t.config.type);if(!r)throw new Error(`自定义[${t.config.type}]未实现处理`);i=await r(t.config,this)}break}default:break}return i}};import*as e from"valibot";var U=e.picklist(["0","1","2","3","4","5","6","7","8","9","Power","Eject","Abort","Help","Backspace","Tab","Numpad5","NumpadEnter","Enter","\r",`
2
- `,"ShiftLeft","ShiftRight","ControlLeft","ControlRight","AltLeft","AltRight","Pause","CapsLock","Escape","Convert","NonConvert","Space","Numpad9","PageUp","Numpad3","PageDown","End","Numpad1","Home","Numpad7","ArrowLeft","Numpad4","Numpad8","ArrowUp","ArrowRight","Numpad6","Numpad2","ArrowDown","Select","Open","PrintScreen","Insert","Numpad0","Delete","NumpadDecimal","Digit0","Digit1","Digit2","Digit3","Digit4","Digit5","Digit6","Digit7","Digit8","Digit9","KeyA","KeyB","KeyC","KeyD","KeyE","KeyF","KeyG","KeyH","KeyI","KeyJ","KeyK","KeyL","KeyM","KeyN","KeyO","KeyP","KeyQ","KeyR","KeyS","KeyT","KeyU","KeyV","KeyW","KeyX","KeyY","KeyZ","MetaLeft","MetaRight","ContextMenu","NumpadMultiply","NumpadAdd","NumpadSubtract","NumpadDivide","F1","F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","NumLock","ScrollLock","AudioVolumeMute","AudioVolumeDown","AudioVolumeUp","MediaTrackNext","MediaTrackPrevious","MediaStop","MediaPlayPause","Semicolon","Equal","NumpadEqual","Comma","Minus","Period","Slash","Backquote","BracketLeft","Backslash","BracketRight","Quote","AltGraph","Props","Cancel","Clear","Shift","Control","Alt","Accept","ModeChange"," ","Print","Execute","\0","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","Meta","*","+","-","/",";","=",",",".","`","[","\\","]","'","Attn","CrSel","ExSel","EraseEof","Play","ZoomOut",")","!","@","#","$","%","^","&","(","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z",":","<","_",">","?","~","{",",","}",'"',"SoftLeft","SoftRight","Camera","Call","EndCall","VolumeDown","VolumeUp"]),c=e.optional(e.number()),f=e.optional(e.boolean()),W=e.optional(e.string());var G=c,$=e.pipe(e.string(),e.transform(a=>new RegExp(a))),H=e.pipe(e.tuple([e.string(),e.pipe(e.string())]),e.transform(([a,o])=>new RegExp(a,o))),v=e.union([$,H]),p=e.string(),q=e.union([e.string(),e.object({source:e.literal("variable"),key:e.union([e.string(),e.array(e.string())])})]),h=e.object({timeout:G,waitUntil:e.optional(e.picklist(["load","domcontentloaded","networkidle0","networkidle2"]),"networkidle2"),url:q,type:e.literal("goto")}),d=e.object({width:e.optional(e.number(),1920),height:e.optional(e.number(),1080),isMobile:e.optional(e.boolean()),isLandscape:e.optional(e.boolean()),type:e.literal("setViewport")}),w=e.object({userAgent:e.string(),type:e.literal("setUserAgent")}),Q=e.object({selector:p,visible:f,hidden:f}),J=e.object({mode:e.literal("selector"),...Q.entries}),Y=e.object({mode:e.literal("request"),urlRegexp:v,method:W}),Z=e.object({mode:e.literal("response"),urlRegexp:e.optional(v),status:c}),z=e.object({mode:e.literal("networkIdle"),idleTime:c,concurrency:c}),X=e.object({mode:e.literal("navigation")}),x=e.object({type:e.literal("wait"),config:e.variant("mode",[J,Y,Z,z,X])}),k=e.object({type:e.literal("click"),selector:p,offset:e.optional(e.object({x:e.number(),y:e.number()})),delay:c,count:c}),A=e.object({type:e.literal("type"),selector:p,text:e.string(),delay:c}),C=e.object({type:e.literal("keypress"),key:U,delay:c}),P=e.object({type:e.literal("selector"),selector:p,output:e.string(),multi:e.optional(e.boolean(),!1)}),T=e.object({type:e.literal("findData"),input:e.string(),output:e.string(),kind:e.picklist(["property"]),key:e.optional(e.string())}),E=e.object({type:e.literal("getContent"),format:e.optional(e.picklist(["html","text"]),"html"),cleanContent:f,output:e.string()}),D=e.object({type:e.literal("close")}),I=e.object({type:e.literal("custom"),config:e.optional(e.looseObject({type:e.string()})),fn:e.optional(e.custom(Boolean))}),_=e.object({type:e.literal("page"),input:e.string(),output:e.optional(e.string()),concurrency:e.optional(e.number(),2),throwError:e.optional(e.boolean(),!1),actions:e.lazy(()=>e.array(F))}),ee=[...[h,d,w,x,k,A,C,P,T,E,D,I].map(a=>a.entries.type.literal),"page"],F=e.fallback(e.union([h,d,w,x,k,A,C,P,T,E,_,D,I]),a=>{if(ee.includes((a?.value).type))throw new Error(JSON.stringify(a?.issues));return{type:"custom",config:a?.value}}),S=e.array(F),me=e.object({maxTimeout:c,actionTimeout:c});import*as K from"valibot";import{Browser as re,computeExecutablePath as ne}from"@puppeteer/browsers";import*as j from"fs";import{Browser as R,computeExecutablePath as te,install as oe}from"@puppeteer/browsers";async function N(a){let o=await oe({browser:R.CHROME,baseUrl:"https://cdn.npmmirror.com/binaries/chrome-for-testing",...a,unpack:!0})}function he(a,o){return te({cacheDir:a,browser:R.CHROME,buildId:o})}import{PUPPETEER_REVISIONS as ae}from"puppeteer-core/internal/revisions.js";async function Fe(a){return m.init(a)}var V=ae.chrome,m=class a{browser;static async init(o){let i=re.CHROME,t=ne({cacheDir:o.cacheDir,browser:i,buildId:V});j.existsSync(t)||(console.log("准备下载"),await N({cacheDir:o.cacheDir,buildId:V,browser:i}));let r=await ie.launch({...o,executablePath:t});return new a(r)}constructor(o){this.browser=o}#e;#t=new Map;setConfig(o){this.#e=o}getConfig(){return this.#e}registerCustom(o,i){this.#t.set(o,i)}clearCustom(){this.#t.clear()}getCustom(o){return this.#t.get(o)}async openPage(o,i){let t=new u(await this.browser.newPage(),this,i);return this.#e?.maxTimeout&&t.setMaxTimeout(this.#e.maxTimeout),o(t)}runQueue(o,i){let t=K.safeParse(S,o);if(!t.success)throw new Error(`解析配置错误
3
- ${JSON.stringify(t.issues)}`);return this.openPage(async r=>{if(i)for(let n in i)r.setVariable(n,i[n]);return r.exeQueue(t.output)})}};export{F as ActionDefine,S as ActionListDefine,me as GlobalConfig,m as WebBrowser,u as WebPage,N as download,he as getExecutablePath,Fe as init};
1
+ import re from"puppeteer-core";import{ElementHandle as U}from"puppeteer-core";import{load as M}from"cheerio";import B from"turndown";function h(a,o){let i=M(a,void 0,!0),t=i("body");if(o.cleanContent&&(t.find("script,style,iframe,footer,br,hr,svg,header").remove(),t.find("*").removeAttr("class"),t.find("*").removeAttr("style"),i("*").contents().filter(function(){return this.type==="comment"||this.type==="text"&&!this.data.trim()}).remove(),i("*").contents().filter(function(){return this.type==="text"&&!!this.data.trim()}).text((n,l)=>l.trim())),o.format==="html")return t.html();if(o.format==="text")return t.text();if(o.format==="markdown"){var r=new B;return r.turndown(t.html())}}import{promise as W}from"fastq";var u=class{page;browser;parent;#e={};constructor(o,i,t){this.page=o,this.browser=i,this.parent=t}ab;timeoutId;setMaxTimeout(o){this.ab=new AbortController,this.timeoutId=setTimeout(()=>{this.ab.abort("timeout")},o)}clearTimeout(){clearTimeout(this.timeoutId)}setVariable(o,i){this.#e[o]=i}getVariable(o){return this.#e[o]}#t(o,i){let t,r=!1;for(let n=0;n<i.length;n++){let l=i[n];if(l===".."){if(!o.parent)throw new Error("未找到父级");o=o.parent}else if(r){if(!t||typeof t!="object")throw new Error(`${i}路径下未找到值`);t=t[l]}else t=o.#e[l],r=!0}return t}#o(o){if(typeof o=="string")return o;if(o.source==="variable")return typeof o.key=="string"?this.#e[o.key]:this.#t(this,o.key)}async exeQueue(o){let i;for(let t of o)switch(console.log("准备执行",t),t.type){case"click":await this.page.click(t.selector,{offset:t.offset,delay:t.delay,count:t.count});break;case"type":{await this.page.type(t.selector,this.#o(t.text),{delay:t.delay});break}case"goto":{i=await this.page.goto(this.#o(t.url),{waitUntil:t.waitUntil,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"setViewport":{i=await this.page.setViewport({width:t.width,height:t.height,isMobile:t.isMobile,isLandscape:t.isLandscape});break}case"wait":{switch(t.config.mode){case"selector":{i=await this.page.waitForSelector(t.config.selector,{visible:t.config.visible,hidden:t.config.hidden,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"request":{let r=t.config;i=await this.page.waitForRequest(async n=>!(r.urlRegexp&&!r.urlRegexp.test(n.url())||r.method&&r.method!==n.method()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"response":{let r=t.config;i=await this.page.waitForResponse(async n=>!(r.urlRegexp&&!r.urlRegexp.test(n.url())||r.status&&r.status!==n.status()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"networkIdle":{i=await this.page.waitForNetworkIdle({idleTime:t.config.idleTime,concurrency:t.config.concurrency,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"navigation":{i=await this.page.waitForNavigation({signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}}break}case"selector":{t.multi?i=this.#e[t.output]=await this.page.$$(t.selector):i=this.#e[t.output]=await this.page.$(t.selector);break}case"keypress":{await this.page.keyboard.press(t.key,{delay:t.delay});break}case"findData":{let r=this.#e[t.input];Array.isArray(r)?t.kind==="property"&&(i=this.#e[t.output]=await Promise.all(r.map(n=>n.getProperty(t.key).then(l=>l.jsonValue())))):r instanceof U&&(i=this.#e[t.output]=r.getProperty(t.key).then(n=>n.jsonValue()));break}case"getContent":{let r=await this.page.content();i=this.#e[t.output]=h(r,{cleanContent:t.cleanContent,format:t.format});break}case"page":{let r=this.#e[t.input],n=Array.isArray(r)?r:[r],l=W(s=>(s.page.setVariable("$item",n[s.index]),s.page.setVariable("$index",s.index),s.page.setVariable("$first",s.index===0),s.page.setVariable("$last",s.index===n.length-1),s.page.exeQueue(t.actions)),t.concurrency),g;l.error(s=>{s&&(g=s)});let y=[];for(let s=0;s<n.length;s++)y.push(await this.browser.openPage(async L=>l.push({page:L,index:s}),this).catch(()=>{}));if(await l.drained(),t.throwError&&g)throw g;i=y,t.output&&(this.#e[t.output]=i);break}case"setUserAgent":{await this.page.setUserAgent(t.userAgent);break}case"close":{await this.page.close({runBeforeUnload:!1}),this.clearTimeout();break}case"custom":{if(typeof t.fn=="function")i=await t.fn(this);else{let r=this.browser.getCustom(t.config.type);if(!r)throw new Error(`自定义[${t.config.type}]未实现处理`);i=await r(t.config,this)}break}default:break}return i}};import*as e from"valibot";var G=e.picklist(["0","1","2","3","4","5","6","7","8","9","Power","Eject","Abort","Help","Backspace","Tab","Numpad5","NumpadEnter","Enter","\r",`
2
+ `,"ShiftLeft","ShiftRight","ControlLeft","ControlRight","AltLeft","AltRight","Pause","CapsLock","Escape","Convert","NonConvert","Space","Numpad9","PageUp","Numpad3","PageDown","End","Numpad1","Home","Numpad7","ArrowLeft","Numpad4","Numpad8","ArrowUp","ArrowRight","Numpad6","Numpad2","ArrowDown","Select","Open","PrintScreen","Insert","Numpad0","Delete","NumpadDecimal","Digit0","Digit1","Digit2","Digit3","Digit4","Digit5","Digit6","Digit7","Digit8","Digit9","KeyA","KeyB","KeyC","KeyD","KeyE","KeyF","KeyG","KeyH","KeyI","KeyJ","KeyK","KeyL","KeyM","KeyN","KeyO","KeyP","KeyQ","KeyR","KeyS","KeyT","KeyU","KeyV","KeyW","KeyX","KeyY","KeyZ","MetaLeft","MetaRight","ContextMenu","NumpadMultiply","NumpadAdd","NumpadSubtract","NumpadDivide","F1","F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","NumLock","ScrollLock","AudioVolumeMute","AudioVolumeDown","AudioVolumeUp","MediaTrackNext","MediaTrackPrevious","MediaStop","MediaPlayPause","Semicolon","Equal","NumpadEqual","Comma","Minus","Period","Slash","Backquote","BracketLeft","Backslash","BracketRight","Quote","AltGraph","Props","Cancel","Clear","Shift","Control","Alt","Accept","ModeChange"," ","Print","Execute","\0","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","Meta","*","+","-","/",";","=",",",".","`","[","\\","]","'","Attn","CrSel","ExSel","EraseEof","Play","ZoomOut",")","!","@","#","$","%","^","&","(","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z",":","<","_",">","?","~","{",",","}",'"',"SoftLeft","SoftRight","Camera","Call","EndCall","VolumeDown","VolumeUp"]),c=e.optional(e.number()),f=e.optional(e.boolean()),$=e.optional(e.string());var H=c,q=e.pipe(e.string(),e.transform(a=>new RegExp(a))),Q=e.pipe(e.tuple([e.string(),e.pipe(e.string())]),e.transform(([a,o])=>new RegExp(a,o))),b=e.union([q,Q]),p=e.string(),v=e.union([e.string(),e.object({source:e.literal("variable"),key:e.union([e.string(),e.array(e.string())])})]),d=e.object({timeout:H,waitUntil:e.optional(e.picklist(["load","domcontentloaded","networkidle0","networkidle2"]),"networkidle2"),url:v,type:e.literal("goto")}),w=e.object({width:e.optional(e.number(),1920),height:e.optional(e.number(),1080),isMobile:e.optional(e.boolean()),isLandscape:e.optional(e.boolean()),type:e.literal("setViewport")}),k=e.object({userAgent:e.string(),type:e.literal("setUserAgent")}),J=e.object({selector:p,visible:f,hidden:f}),Y=e.object({mode:e.literal("selector"),...J.entries}),Z=e.object({mode:e.literal("request"),urlRegexp:b,method:$}),z=e.object({mode:e.literal("response"),urlRegexp:e.optional(b),status:c}),X=e.object({mode:e.literal("networkIdle"),idleTime:c,concurrency:c}),_=e.object({mode:e.literal("navigation")}),x=e.object({type:e.literal("wait"),config:e.variant("mode",[Y,Z,z,X,_])}),C=e.object({type:e.literal("click"),selector:p,offset:e.optional(e.object({x:e.number(),y:e.number()})),delay:c,count:c}),A=e.object({type:e.literal("type"),selector:p,text:v,delay:c}),P=e.object({type:e.literal("keypress"),key:G,delay:c}),T=e.object({type:e.literal("selector"),selector:p,output:e.string(),multi:e.optional(e.boolean(),!1)}),E=e.object({type:e.literal("findData"),input:e.string(),output:e.string(),kind:e.picklist(["property"]),key:e.optional(e.string())}),D=e.object({type:e.literal("getContent"),format:e.optional(e.picklist(["html","text","markdown"]),"html"),cleanContent:f,output:e.string()}),I=e.object({type:e.literal("close")}),F=e.object({type:e.literal("custom"),config:e.optional(e.looseObject({type:e.string()})),fn:e.optional(e.custom(Boolean))}),ee=e.object({type:e.literal("page"),input:e.string(),output:e.optional(e.string()),concurrency:e.optional(e.number(),2),throwError:e.optional(e.boolean(),!1),actions:e.lazy(()=>e.array(S))}),te=[...[d,w,k,x,C,A,P,T,E,D,I,F].map(a=>a.entries.type.literal),"page"],S=e.fallback(e.union([d,w,k,x,C,A,P,T,E,D,ee,I,F]),a=>{if(te.includes((a?.value).type))throw new Error(JSON.stringify(a?.issues));return{type:"custom",config:a?.value}}),R=e.array(S),he=e.object({maxTimeout:c,actionTimeout:c});import*as j from"valibot";import{Browser as ne,computeExecutablePath as ae}from"@puppeteer/browsers";import*as O from"fs";import{Browser as N,computeExecutablePath as oe,install as ie}from"@puppeteer/browsers";async function V(a){let o=await ie({browser:N.CHROME,baseUrl:"https://cdn.npmmirror.com/binaries/chrome-for-testing",...a,unpack:!0})}function we(a,o){return oe({cacheDir:a,browser:N.CHROME,buildId:o})}import{PUPPETEER_REVISIONS as se}from"puppeteer-core/internal/revisions.js";async function Re(a){return m.init(a)}var K=se.chrome,m=class a{browser;static async init(o){let i=ne.CHROME,t=ae({cacheDir:o.cacheDir,browser:i,buildId:K});O.existsSync(t)||(console.log("准备下载"),await V({cacheDir:o.cacheDir,buildId:K,browser:i}));let r=await re.launch({...o,executablePath:t});return new a(r)}constructor(o){this.browser=o}#e;#t=new Map;setConfig(o){this.#e=o}getConfig(){return this.#e}registerCustom(o,i){this.#t.set(o,i)}clearCustom(){this.#t.clear()}getCustom(o){return this.#t.get(o)}async openPage(o,i){let t=new u(await this.browser.newPage(),this,i);return this.#e?.maxTimeout&&t.setMaxTimeout(this.#e.maxTimeout),o(t)}runQueue(o,i){let t=j.safeParse(R,o);if(!t.success)throw new Error(`解析配置错误
3
+ ${JSON.stringify(t.issues)}`);return this.openPage(async r=>{if(i)for(let n in i)r.setVariable(n,i[n]);return r.exeQueue(t.output)})}};export{S as ActionDefine,R as ActionListDefine,he as GlobalConfig,m as WebBrowser,u as WebPage,V as download,h as format,we as getExecutablePath,Re as init};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cyia/crawl",
3
- "version": "0.0.4",
3
+ "version": "0.0.6",
4
4
  "author": "wszgrcy",
5
5
  "description": "",
6
6
  "dependencies": {