@ooneex/html 0.0.14 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +2 -150
- package/dist/index.js.map +2 -2
- package/package.json +3 -3
package/dist/index.js
CHANGED
|
@@ -1,151 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
import * as cheerio from "cheerio";
|
|
1
|
+
import*as j from"cheerio";import{Exception as G}from"@ooneex/exception";import{HttpStatus as J}from"@ooneex/http-status";class x extends G{constructor(w,C={}){super(w,{status:J.Code.InternalServerError,data:C});this.name="HtmlException"}}class F{$;constructor(){this.$=j.load("")}load(w){return this.$=j.load(w),this}async loadUrl(w){let C=w instanceof URL?w.toString():w;try{return this.$=await j.fromURL(C),this}catch(R){throw new x(`Failed to fetch URL: ${C}`,{status:500,data:{url:C,error:R instanceof Error?R.message:String(R)}})}}getContent(){return this.$.text().trim()}getHtml(){return this.$.html().trim()??""}getImages(){let w=this.$,C=[];return w("img").each((R,I)=>{let p=w(I),H=p.attr("src");if(H)C.push({src:H,alt:p.attr("alt")||null,title:p.attr("title")||null,width:p.attr("width")||null,height:p.attr("height")||null})}),C}getLinks(){let w=this.$,C=[];return w("a").each((R,I)=>{let p=w(I),H=p.attr("href");if(H)C.push({href:H,text:p.text().trim()||null,title:p.attr("title")||null,target:p.attr("target")||null,rel:p.attr("rel")||null})}),C}getHeadings(){let w=this.$,C=[];return w("h1, h2, h3, h4, h5, h6").each((R,I)=>{let p=w(I),H=I.tagName.toLowerCase(),q=Number.parseInt(H.charAt(1),10);C.push({level:q,text:p.text().trim(),id:p.attr("id")||null})}),C}getVideos(){let w=this.$,C=[];return w("video").each((R,I)=>{let p=w(I),H=[];p.find("source").each((q,z)=>{let B=w(z),D=B.attr("src");if(D)H.push({src:D,type:B.attr("type")||null})}),C.push({src:p.attr("src")||null,poster:p.attr("poster")||null,width:p.attr("width")||null,height:p.attr("height")||null,controls:p.attr("controls")!==void 0,autoplay:p.attr("autoplay")!==void 0,loop:p.attr("loop")!==void 0,muted:p.attr("muted")!==void 0,sources:H})}),C}getTasks(){let w=this.$,C=[];return w('input[type="checkbox"]').each((R,I)=>{let p=w(I),H=p.parent(),q=p.attr("checked")!==void 0,z=H.text().trim();C.push({text:z,checked:q})}),C}}export{x as HtmlException,F as Html};
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
import { Exception } from "@ooneex/exception";
|
|
6
|
-
import { HttpStatus } from "@ooneex/http-status";
|
|
7
|
-
|
|
8
|
-
class HtmlException extends Exception {
|
|
9
|
-
constructor(message, data = {}) {
|
|
10
|
-
super(message, {
|
|
11
|
-
status: HttpStatus.Code.InternalServerError,
|
|
12
|
-
data
|
|
13
|
-
});
|
|
14
|
-
this.name = "HtmlException";
|
|
15
|
-
}
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
// src/Html.ts
|
|
19
|
-
class Html {
|
|
20
|
-
$;
|
|
21
|
-
constructor() {
|
|
22
|
-
this.$ = cheerio.load("");
|
|
23
|
-
}
|
|
24
|
-
load(html) {
|
|
25
|
-
this.$ = cheerio.load(html);
|
|
26
|
-
return this;
|
|
27
|
-
}
|
|
28
|
-
async loadUrl(url) {
|
|
29
|
-
const urlString = url instanceof URL ? url.toString() : url;
|
|
30
|
-
try {
|
|
31
|
-
this.$ = await cheerio.fromURL(urlString);
|
|
32
|
-
return this;
|
|
33
|
-
} catch (error) {
|
|
34
|
-
throw new HtmlException(`Failed to fetch URL: ${urlString}`, {
|
|
35
|
-
status: 500,
|
|
36
|
-
data: {
|
|
37
|
-
url: urlString,
|
|
38
|
-
error: error instanceof Error ? error.message : String(error)
|
|
39
|
-
}
|
|
40
|
-
});
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
getContent() {
|
|
44
|
-
return this.$.text().trim();
|
|
45
|
-
}
|
|
46
|
-
getHtml() {
|
|
47
|
-
return this.$.html().trim() ?? "";
|
|
48
|
-
}
|
|
49
|
-
getImages() {
|
|
50
|
-
const $ = this.$;
|
|
51
|
-
const images = [];
|
|
52
|
-
$("img").each((_, element) => {
|
|
53
|
-
const $img = $(element);
|
|
54
|
-
const src = $img.attr("src");
|
|
55
|
-
if (src) {
|
|
56
|
-
images.push({
|
|
57
|
-
src,
|
|
58
|
-
alt: $img.attr("alt") || null,
|
|
59
|
-
title: $img.attr("title") || null,
|
|
60
|
-
width: $img.attr("width") || null,
|
|
61
|
-
height: $img.attr("height") || null
|
|
62
|
-
});
|
|
63
|
-
}
|
|
64
|
-
});
|
|
65
|
-
return images;
|
|
66
|
-
}
|
|
67
|
-
getLinks() {
|
|
68
|
-
const $ = this.$;
|
|
69
|
-
const links = [];
|
|
70
|
-
$("a").each((_, element) => {
|
|
71
|
-
const $link = $(element);
|
|
72
|
-
const href = $link.attr("href");
|
|
73
|
-
if (href) {
|
|
74
|
-
links.push({
|
|
75
|
-
href,
|
|
76
|
-
text: $link.text().trim() || null,
|
|
77
|
-
title: $link.attr("title") || null,
|
|
78
|
-
target: $link.attr("target") || null,
|
|
79
|
-
rel: $link.attr("rel") || null
|
|
80
|
-
});
|
|
81
|
-
}
|
|
82
|
-
});
|
|
83
|
-
return links;
|
|
84
|
-
}
|
|
85
|
-
getHeadings() {
|
|
86
|
-
const $ = this.$;
|
|
87
|
-
const headings = [];
|
|
88
|
-
$("h1, h2, h3, h4, h5, h6").each((_, element) => {
|
|
89
|
-
const $heading = $(element);
|
|
90
|
-
const tagName = element.tagName.toLowerCase();
|
|
91
|
-
const level = Number.parseInt(tagName.charAt(1), 10);
|
|
92
|
-
headings.push({
|
|
93
|
-
level,
|
|
94
|
-
text: $heading.text().trim(),
|
|
95
|
-
id: $heading.attr("id") || null
|
|
96
|
-
});
|
|
97
|
-
});
|
|
98
|
-
return headings;
|
|
99
|
-
}
|
|
100
|
-
getVideos() {
|
|
101
|
-
const $ = this.$;
|
|
102
|
-
const videos = [];
|
|
103
|
-
$("video").each((_, element) => {
|
|
104
|
-
const $video = $(element);
|
|
105
|
-
const sources = [];
|
|
106
|
-
$video.find("source").each((_2, sourceElement) => {
|
|
107
|
-
const $source = $(sourceElement);
|
|
108
|
-
const src = $source.attr("src");
|
|
109
|
-
if (src) {
|
|
110
|
-
sources.push({
|
|
111
|
-
src,
|
|
112
|
-
type: $source.attr("type") || null
|
|
113
|
-
});
|
|
114
|
-
}
|
|
115
|
-
});
|
|
116
|
-
videos.push({
|
|
117
|
-
src: $video.attr("src") || null,
|
|
118
|
-
poster: $video.attr("poster") || null,
|
|
119
|
-
width: $video.attr("width") || null,
|
|
120
|
-
height: $video.attr("height") || null,
|
|
121
|
-
controls: $video.attr("controls") !== undefined,
|
|
122
|
-
autoplay: $video.attr("autoplay") !== undefined,
|
|
123
|
-
loop: $video.attr("loop") !== undefined,
|
|
124
|
-
muted: $video.attr("muted") !== undefined,
|
|
125
|
-
sources
|
|
126
|
-
});
|
|
127
|
-
});
|
|
128
|
-
return videos;
|
|
129
|
-
}
|
|
130
|
-
getTasks() {
|
|
131
|
-
const $ = this.$;
|
|
132
|
-
const tasks = [];
|
|
133
|
-
$('input[type="checkbox"]').each((_, element) => {
|
|
134
|
-
const $checkbox = $(element);
|
|
135
|
-
const $parent = $checkbox.parent();
|
|
136
|
-
const checked = $checkbox.attr("checked") !== undefined;
|
|
137
|
-
const text = $parent.text().trim();
|
|
138
|
-
tasks.push({
|
|
139
|
-
text,
|
|
140
|
-
checked
|
|
141
|
-
});
|
|
142
|
-
});
|
|
143
|
-
return tasks;
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
export {
|
|
147
|
-
HtmlException,
|
|
148
|
-
Html
|
|
149
|
-
};
|
|
150
|
-
|
|
151
|
-
//# debugId=7A2947AEA10680F664756E2164756E21
|
|
3
|
+
//# debugId=F91403C7DF5ED76864756E2164756E21
|
package/dist/index.js.map
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"import type { CheerioAPI } from \"cheerio\";\nimport * as cheerio from \"cheerio\";\nimport { HtmlException } from \"./HtmlException\";\nimport type { HtmlHeadingType, HtmlImageType, HtmlLinkType, HtmlTaskType, HtmlVideoType, IHtml } from \"./types\";\n\n/**\n * HTML document parser and analyzer using Cheerio\n */\nexport class Html implements IHtml {\n private $: CheerioAPI;\n\n constructor() {\n this.$ = cheerio.load(\"\");\n }\n\n /**\n * Load HTML from a string\n * @param html - HTML string to parse\n * @returns this instance for chaining\n */\n public load(html: string): this {\n this.$ = cheerio.load(html);\n return this;\n }\n\n /**\n * Load HTML from a URL using Cheerio's fromURL method\n * @param url - URL to fetch HTML from\n * @returns Promise resolving to this instance for chaining\n */\n public async loadUrl(url: string | URL): Promise<this> {\n const urlString = url instanceof URL ? url.toString() : url;\n\n try {\n this.$ = await cheerio.fromURL(urlString);\n return this;\n } catch (error) {\n throw new HtmlException(`Failed to fetch URL: ${urlString}`, {\n status: 500,\n data: {\n url: urlString,\n error: error instanceof Error ? error.message : String(error),\n },\n });\n }\n }\n\n /**\n * Get the text content of the HTML document\n * @returns Trimmed text content\n */\n public getContent(): string {\n return this.$.text().trim();\n }\n\n /**\n * Get the full HTML string of the document\n * @returns HTML string\n */\n public getHtml(): string {\n return this.$.html().trim() ?? \"\";\n }\n\n /**\n * Extract all images from the HTML document\n * @returns Array of image information\n */\n public getImages(): HtmlImageType[] {\n const $ = this.$;\n const images: HtmlImageType[] = [];\n\n $(\"img\").each((_, element) => {\n const $img = $(element);\n const src = $img.attr(\"src\");\n\n if (src) {\n images.push({\n src,\n alt: $img.attr(\"alt\") || null,\n title: $img.attr(\"title\") || null,\n width: $img.attr(\"width\") || null,\n height: $img.attr(\"height\") || null,\n });\n }\n });\n\n return images;\n }\n\n /**\n * Extract all links from the HTML document\n * @returns Array of link information\n */\n public getLinks(): HtmlLinkType[] {\n const $ = this.$;\n const links: HtmlLinkType[] = [];\n\n $(\"a\").each((_, element) => {\n const $link = $(element);\n const href = $link.attr(\"href\");\n\n if (href) {\n links.push({\n href,\n text: $link.text().trim() || null,\n title: $link.attr(\"title\") || null,\n target: $link.attr(\"target\") || null,\n rel: $link.attr(\"rel\") || null,\n });\n }\n });\n\n return links;\n }\n\n /**\n * Extract all headings from the HTML document\n * @returns Array of heading information\n */\n public getHeadings(): HtmlHeadingType[] {\n const $ = this.$;\n const headings: HtmlHeadingType[] = [];\n\n $(\"h1, h2, h3, h4, h5, h6\").each((_, element) => {\n const $heading = $(element);\n const tagName = element.tagName.toLowerCase();\n const level = Number.parseInt(tagName.charAt(1), 10);\n\n headings.push({\n level,\n text: $heading.text().trim(),\n id: $heading.attr(\"id\") || null,\n });\n });\n\n return headings;\n }\n\n /**\n * Extract all videos from the HTML document\n * @returns Array of video information\n */\n public getVideos(): HtmlVideoType[] {\n const $ = this.$;\n const videos: HtmlVideoType[] = [];\n\n $(\"video\").each((_, element) => {\n const $video = $(element);\n const sources: { src: string; type: string | null }[] = [];\n\n $video.find(\"source\").each((_, sourceElement) => {\n const $source = $(sourceElement);\n const src = $source.attr(\"src\");\n\n if (src) {\n sources.push({\n src,\n type: $source.attr(\"type\") || null,\n });\n }\n });\n\n videos.push({\n src: $video.attr(\"src\") || null,\n poster: $video.attr(\"poster\") || null,\n width: $video.attr(\"width\") || null,\n height: $video.attr(\"height\") || null,\n controls: $video.attr(\"controls\") !== undefined,\n autoplay: $video.attr(\"autoplay\") !== undefined,\n loop: $video.attr(\"loop\") !== undefined,\n muted: $video.attr(\"muted\") !== undefined,\n sources,\n });\n });\n\n return videos;\n }\n\n /**\n * Extract all tasks (checkbox list items) from the HTML document\n * @returns Array of task information\n */\n public getTasks(): HtmlTaskType[] {\n const $ = this.$;\n const tasks: HtmlTaskType[] = [];\n\n $('input[type=\"checkbox\"]').each((_, element) => {\n const $checkbox = $(element);\n const $parent = $checkbox.parent();\n const checked = $checkbox.attr(\"checked\") !== undefined;\n\n const text = $parent.text().trim();\n\n tasks.push({\n text,\n checked,\n });\n });\n\n return tasks;\n }\n}\n",
|
|
6
6
|
"import { Exception } from \"@ooneex/exception\";\nimport { HttpStatus } from \"@ooneex/http-status\";\n\nexport class HtmlException extends Exception {\n constructor(message: string, data: Record<string, unknown> = {}) {\n super(message, {\n status: HttpStatus.Code.InternalServerError,\n data,\n });\n this.name = \"HtmlException\";\n }\n}\n"
|
|
7
7
|
],
|
|
8
|
-
"mappings": "
|
|
9
|
-
"debugId": "
|
|
8
|
+
"mappings": "AACA,0BCDA,oBAAS,0BACT,qBAAS,4BAEF,MAAM,UAAsB,CAAU,CAC3C,WAAW,CAAC,EAAiB,EAAgC,CAAC,EAAG,CAC/D,MAAM,EAAS,CACb,OAAQ,EAAW,KAAK,oBACxB,MACF,CAAC,EACD,KAAK,KAAO,gBAEhB,CDHO,MAAM,CAAsB,CACzB,EAER,WAAW,EAAG,CACZ,KAAK,EAAY,OAAK,EAAE,EAQnB,IAAI,CAAC,EAAoB,CAE9B,OADA,KAAK,EAAY,OAAK,CAAI,EACnB,UAQI,QAAO,CAAC,EAAkC,CACrD,IAAM,EAAY,aAAe,IAAM,EAAI,SAAS,EAAI,EAExD,GAAI,CAEF,OADA,KAAK,EAAI,MAAc,UAAQ,CAAS,EACjC,KACP,MAAO,EAAO,CACd,MAAM,IAAI,EAAc,wBAAwB,IAAa,CAC3D,OAAQ,IACR,KAAM,CACJ,IAAK,EACL,MAAO,aAAiB,MAAQ,EAAM,QAAU,OAAO,CAAK,CAC9D,CACF,CAAC,GAQE,UAAU,EAAW,CAC1B,OAAO,KAAK,EAAE,KAAK,EAAE,KAAK,EAOrB,OAAO,EAAW,CACvB,OAAO,KAAK,EAAE,KAAK,EAAE,KAAK,GAAK,GAO1B,SAAS,EAAoB,CAClC,IAAM,EAAI,KAAK,EACT,EAA0B,CAAC,EAiBjC,OAfA,EAAE,KAAK,EAAE,KAAK,CAAC,EAAG,IAAY,CAC5B,IAAM,EAAO,EAAE,CAAO,EAChB,EAAM,EAAK,KAAK,KAAK,EAE3B,GAAI,EACF,EAAO,KAAK,CACV,MACA,IAAK,EAAK,KAAK,KAAK,GAAK,KACzB,MAAO,EAAK,KAAK,OAAO,GAAK,KAC7B,MAAO,EAAK,KAAK,OAAO,GAAK,KAC7B,OAAQ,EAAK,KAAK,QAAQ,GAAK,IACjC,CAAC,EAEJ,EAEM,EAOF,QAAQ,EAAmB,CAChC,IAAM,EAAI,KAAK,EACT,EAAwB,CAAC,EAiB/B,OAfA,EAAE,GAAG,EAAE,KAAK,CAAC,EAAG,IAAY,CAC1B,IAAM,EAAQ,EAAE,CAAO,EACjB,EAAO,EAAM,KAAK,MAAM,EAE9B,GAAI,EACF,EAAM,KAAK,CACT,OACA,KAAM,EAAM,KAAK,EAAE,KAAK,GAAK,KAC7B,MAAO,EAAM,KAAK,OAAO,GAAK,KAC9B,OAAQ,EAAM,KAAK,QAAQ,GAAK,KAChC,IAAK,EAAM,KAAK,KAAK,GAAK,IAC5B,CAAC,EAEJ,EAEM,EAOF,WAAW,EAAsB,CACtC,IAAM,EAAI,KAAK,EACT,EAA8B,CAAC,EAcrC,OAZA,EAAE,wBAAwB,EAAE,KAAK,CAAC,EAAG,IAAY,CAC/C,IAAM,EAAW,EAAE,CAAO,EACpB,EAAU,EAAQ,QAAQ,YAAY,EACtC,EAAQ,OAAO,SAAS,EAAQ,OAAO,CAAC,EAAG,EAAE,EAEnD,EAAS,KAAK,CACZ,QACA,KAAM,EAAS,KAAK,EAAE,KAAK,EAC3B,GAAI,EAAS,KAAK,IAAI,GAAK,IAC7B,CAAC,EACF,EAEM,EAOF,SAAS,EAAoB,CAClC,IAAM,EAAI,KAAK,EACT,EAA0B,CAAC,EA+BjC,OA7BA,EAAE,OAAO,EAAE,KAAK,CAAC,EAAG,IAAY,CAC9B,IAAM,EAAS,EAAE,CAAO,EAClB,EAAkD,CAAC,EAEzD,EAAO,KAAK,QAAQ,EAAE,KAAK,CAAC,EAAG,IAAkB,CAC/C,IAAM,EAAU,EAAE,CAAa,EACzB,EAAM,EAAQ,KAAK,KAAK,EAE9B,GAAI,EACF,EAAQ,KAAK,CACX,MACA,KAAM,EAAQ,KAAK,MAAM,GAAK,IAChC,CAAC,EAEJ,EAED,EAAO,KAAK,CACV,IAAK,EAAO,KAAK,KAAK,GAAK,KAC3B,OAAQ,EAAO,KAAK,QAAQ,GAAK,KACjC,MAAO,EAAO,KAAK,OAAO,GAAK,KAC/B,OAAQ,EAAO,KAAK,QAAQ,GAAK,KACjC,SAAU,EAAO,KAAK,UAAU,IAAM,OACtC,SAAU,EAAO,KAAK,UAAU,IAAM,OACtC,KAAM,EAAO,KAAK,MAAM,IAAM,OAC9B,MAAO,EAAO,KAAK,OAAO,IAAM,OAChC,SACF,CAAC,EACF,EAEM,EAOF,QAAQ,EAAmB,CAChC,IAAM,EAAI,KAAK,EACT,EAAwB,CAAC,EAe/B,OAbA,EAAE,wBAAwB,EAAE,KAAK,CAAC,EAAG,IAAY,CAC/C,IAAM,EAAY,EAAE,CAAO,EACrB,EAAU,EAAU,OAAO,EAC3B,EAAU,EAAU,KAAK,SAAS,IAAM,OAExC,EAAO,EAAQ,KAAK,EAAE,KAAK,EAEjC,EAAM,KAAK,CACT,OACA,SACF,CAAC,EACF,EAEM,EAEX",
|
|
9
|
+
"debugId": "F91403C7DF5ED76864756E2164756E21",
|
|
10
10
|
"names": []
|
|
11
11
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ooneex/html",
|
|
3
3
|
"description": "HTML parsing and DOM manipulation utilities powered by Cheerio",
|
|
4
|
-
"version": "0.0.
|
|
4
|
+
"version": "0.0.16",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|
|
7
7
|
"dist",
|
|
@@ -28,8 +28,8 @@
|
|
|
28
28
|
"npm:publish": "bun publish --tolerate-republish --access public"
|
|
29
29
|
},
|
|
30
30
|
"dependencies": {
|
|
31
|
-
"@ooneex/exception": "0.0.
|
|
32
|
-
"@ooneex/http-status": "0.0.
|
|
31
|
+
"@ooneex/exception": "0.0.16",
|
|
32
|
+
"@ooneex/http-status": "0.0.16",
|
|
33
33
|
"cheerio": "^1.1.2"
|
|
34
34
|
},
|
|
35
35
|
"keywords": [
|