@ooneex/html 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Ooneex
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1 @@
1
+ # @ooneex/html
@@ -0,0 +1,235 @@
1
+ /**
2
+ * Heading information extracted from HTML
3
+ */
4
+ interface HtmlHeadingType {
5
+ /**
6
+ * Heading level (1-6)
7
+ */
8
+ level: number;
9
+ /**
10
+ * Heading text content
11
+ */
12
+ text: string;
13
+ /**
14
+ * Heading id attribute
15
+ */
16
+ id: string | null;
17
+ }
18
+ /**
19
+ * Link information extracted from HTML
20
+ */
21
+ interface HtmlLinkType {
22
+ /**
23
+ * Link href URL
24
+ */
25
+ href: string;
26
+ /**
27
+ * Link text content
28
+ */
29
+ text: string | null;
30
+ /**
31
+ * Link title attribute
32
+ */
33
+ title: string | null;
34
+ /**
35
+ * Link target attribute
36
+ */
37
+ target: string | null;
38
+ /**
39
+ * Link rel attribute
40
+ */
41
+ rel: string | null;
42
+ }
43
+ /**
44
+ * Image information extracted from HTML
45
+ */
46
+ interface HtmlImageType {
47
+ /**
48
+ * Image source URL
49
+ */
50
+ src: string;
51
+ /**
52
+ * Image alt text
53
+ */
54
+ alt: string | null;
55
+ /**
56
+ * Image title attribute
57
+ */
58
+ title: string | null;
59
+ /**
60
+ * Image width attribute
61
+ */
62
+ width: string | null;
63
+ /**
64
+ * Image height attribute
65
+ */
66
+ height: string | null;
67
+ }
68
+ /**
69
+ * Task information extracted from HTML (checkbox list items)
70
+ */
71
+ interface HtmlTaskType {
72
+ /**
73
+ * Task text content
74
+ */
75
+ text: string;
76
+ /**
77
+ * Whether the task is checked/completed
78
+ */
79
+ checked: boolean;
80
+ }
81
+ /**
82
+ * Video information extracted from HTML
83
+ */
84
+ interface HtmlVideoType {
85
+ /**
86
+ * Video source URL
87
+ */
88
+ src: string | null;
89
+ /**
90
+ * Video poster image URL
91
+ */
92
+ poster: string | null;
93
+ /**
94
+ * Video width attribute
95
+ */
96
+ width: string | null;
97
+ /**
98
+ * Video height attribute
99
+ */
100
+ height: string | null;
101
+ /**
102
+ * Video controls attribute
103
+ */
104
+ controls: boolean;
105
+ /**
106
+ * Video autoplay attribute
107
+ */
108
+ autoplay: boolean;
109
+ /**
110
+ * Video loop attribute
111
+ */
112
+ loop: boolean;
113
+ /**
114
+ * Video muted attribute
115
+ */
116
+ muted: boolean;
117
+ /**
118
+ * Video source elements
119
+ */
120
+ sources: {
121
+ src: string;
122
+ type: string | null;
123
+ }[];
124
+ }
125
+ /**
126
+ * Interface for HTML class
127
+ */
128
+ interface IHtml {
129
+ /**
130
+ * Load HTML from a string
131
+ * @param html - HTML string to parse
132
+ * @returns this instance for chaining
133
+ */
134
+ load(html: string): this;
135
+ /**
136
+ * Load HTML from a URL using Cheerio's fromURL method
137
+ * @param url - URL to fetch HTML from
138
+ * @returns Promise resolving to this instance for chaining
139
+ */
140
+ loadUrl(url: string | URL): Promise<this>;
141
+ /**
142
+ * Get the text content of the HTML document
143
+ * @returns Trimmed text content
144
+ */
145
+ getContent(): string;
146
+ /**
147
+ * Get the full HTML string of the document
148
+ * @returns HTML string
149
+ */
150
+ getHtml(): string;
151
+ /**
152
+ * Extract all images from the HTML document
153
+ * @returns Array of image information
154
+ */
155
+ getImages(): HtmlImageType[];
156
+ /**
157
+ * Extract all links from the HTML document
158
+ * @returns Array of link information
159
+ */
160
+ getLinks(): HtmlLinkType[];
161
+ /**
162
+ * Extract all headings from the HTML document
163
+ * @returns Array of heading information
164
+ */
165
+ getHeadings(): HtmlHeadingType[];
166
+ /**
167
+ * Extract all videos from the HTML document
168
+ * @returns Array of video information
169
+ */
170
+ getVideos(): HtmlVideoType[];
171
+ /**
172
+ * Extract all tasks (checkbox list items) from the HTML document
173
+ * @returns Array of task information
174
+ */
175
+ getTasks(): HtmlTaskType[];
176
+ }
177
+ /**
178
+ * HTML document parser and analyzer using Cheerio
179
+ */
180
+ declare class Html implements IHtml {
181
+ private $;
182
+ constructor();
183
+ /**
184
+ * Load HTML from a string
185
+ * @param html - HTML string to parse
186
+ * @returns this instance for chaining
187
+ */
188
+ load(html: string): this;
189
+ /**
190
+ * Load HTML from a URL using Cheerio's fromURL method
191
+ * @param url - URL to fetch HTML from
192
+ * @returns Promise resolving to this instance for chaining
193
+ */
194
+ loadUrl(url: string | URL): Promise<this>;
195
+ /**
196
+ * Get the text content of the HTML document
197
+ * @returns Trimmed text content
198
+ */
199
+ getContent(): string;
200
+ /**
201
+ * Get the full HTML string of the document
202
+ * @returns HTML string
203
+ */
204
+ getHtml(): string;
205
+ /**
206
+ * Extract all images from the HTML document
207
+ * @returns Array of image information
208
+ */
209
+ getImages(): HtmlImageType[];
210
+ /**
211
+ * Extract all links from the HTML document
212
+ * @returns Array of link information
213
+ */
214
+ getLinks(): HtmlLinkType[];
215
+ /**
216
+ * Extract all headings from the HTML document
217
+ * @returns Array of heading information
218
+ */
219
+ getHeadings(): HtmlHeadingType[];
220
+ /**
221
+ * Extract all videos from the HTML document
222
+ * @returns Array of video information
223
+ */
224
+ getVideos(): HtmlVideoType[];
225
+ /**
226
+ * Extract all tasks (checkbox list items) from the HTML document
227
+ * @returns Array of task information
228
+ */
229
+ getTasks(): HtmlTaskType[];
230
+ }
231
+ import { Exception } from "@ooneex/exception";
232
+ declare class HtmlException extends Exception {
233
+ constructor(message: string, data?: Record<string, unknown>);
234
+ }
235
+ export { IHtml, HtmlVideoType, HtmlTaskType, HtmlLinkType, HtmlImageType, HtmlHeadingType, HtmlException, Html };
package/dist/index.js ADDED
@@ -0,0 +1,3 @@
1
+ import*as j from"cheerio";import{Exception as G}from"@ooneex/exception";import{HttpStatus as J}from"@ooneex/http-status";class x extends G{constructor(w,C={}){super(w,{status:J.Code.InternalServerError,data:C});this.name="HtmlException"}}class F{$;constructor(){this.$=j.load("")}load(w){return this.$=j.load(w),this}async loadUrl(w){let C=w instanceof URL?w.toString():w;try{return this.$=await j.fromURL(C),this}catch(R){throw new x(`Failed to fetch URL: ${C}`,{status:500,data:{url:C,error:R instanceof Error?R.message:String(R)}})}}getContent(){return this.$.text().trim()}getHtml(){return this.$.html().trim()??""}getImages(){let w=this.$,C=[];return w("img").each((R,I)=>{let p=w(I),H=p.attr("src");if(H)C.push({src:H,alt:p.attr("alt")||null,title:p.attr("title")||null,width:p.attr("width")||null,height:p.attr("height")||null})}),C}getLinks(){let w=this.$,C=[];return w("a").each((R,I)=>{let p=w(I),H=p.attr("href");if(H)C.push({href:H,text:p.text().trim()||null,title:p.attr("title")||null,target:p.attr("target")||null,rel:p.attr("rel")||null})}),C}getHeadings(){let w=this.$,C=[];return w("h1, h2, h3, h4, h5, h6").each((R,I)=>{let p=w(I),H=I.tagName.toLowerCase(),q=Number.parseInt(H.charAt(1),10);C.push({level:q,text:p.text().trim(),id:p.attr("id")||null})}),C}getVideos(){let w=this.$,C=[];return w("video").each((R,I)=>{let p=w(I),H=[];p.find("source").each((q,z)=>{let B=w(z),D=B.attr("src");if(D)H.push({src:D,type:B.attr("type")||null})}),C.push({src:p.attr("src")||null,poster:p.attr("poster")||null,width:p.attr("width")||null,height:p.attr("height")||null,controls:p.attr("controls")!==void 0,autoplay:p.attr("autoplay")!==void 0,loop:p.attr("loop")!==void 0,muted:p.attr("muted")!==void 0,sources:H})}),C}getTasks(){let w=this.$,C=[];return w('input[type="checkbox"]').each((R,I)=>{let p=w(I),H=p.parent(),q=p.attr("checked")!==void 0,z=H.text().trim();C.push({text:z,checked:q})}),C}}export{x as HtmlException,F as Html};
2
+
3
+ //# debugId=F91403C7DF5ED76864756E2164756E21
@@ -0,0 +1,11 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["src/Html.ts", "src/HtmlException.ts"],
4
+ "sourcesContent": [
5
+ "import type { CheerioAPI } from \"cheerio\";\nimport * as cheerio from \"cheerio\";\nimport { HtmlException } from \"./HtmlException\";\nimport type { HtmlHeadingType, HtmlImageType, HtmlLinkType, HtmlTaskType, HtmlVideoType, IHtml } from \"./types\";\n\n/**\n * HTML document parser and analyzer using Cheerio\n */\nexport class Html implements IHtml {\n private $: CheerioAPI;\n\n constructor() {\n this.$ = cheerio.load(\"\");\n }\n\n /**\n * Load HTML from a string\n * @param html - HTML string to parse\n * @returns this instance for chaining\n */\n public load(html: string): this {\n this.$ = cheerio.load(html);\n return this;\n }\n\n /**\n * Load HTML from a URL using Cheerio's fromURL method\n * @param url - URL to fetch HTML from\n * @returns Promise resolving to this instance for chaining\n */\n public async loadUrl(url: string | URL): Promise<this> {\n const urlString = url instanceof URL ? url.toString() : url;\n\n try {\n this.$ = await cheerio.fromURL(urlString);\n return this;\n } catch (error) {\n throw new HtmlException(`Failed to fetch URL: ${urlString}`, {\n status: 500,\n data: {\n url: urlString,\n error: error instanceof Error ? error.message : String(error),\n },\n });\n }\n }\n\n /**\n * Get the text content of the HTML document\n * @returns Trimmed text content\n */\n public getContent(): string {\n return this.$.text().trim();\n }\n\n /**\n * Get the full HTML string of the document\n * @returns HTML string\n */\n public getHtml(): string {\n return this.$.html().trim() ?? \"\";\n }\n\n /**\n * Extract all images from the HTML document\n * @returns Array of image information\n */\n public getImages(): HtmlImageType[] {\n const $ = this.$;\n const images: HtmlImageType[] = [];\n\n $(\"img\").each((_, element) => {\n const $img = $(element);\n const src = $img.attr(\"src\");\n\n if (src) {\n images.push({\n src,\n alt: $img.attr(\"alt\") || null,\n title: $img.attr(\"title\") || null,\n width: $img.attr(\"width\") || null,\n height: $img.attr(\"height\") || null,\n });\n }\n });\n\n return images;\n }\n\n /**\n * Extract all links from the HTML document\n * @returns Array of link information\n */\n public getLinks(): HtmlLinkType[] {\n const $ = this.$;\n const links: HtmlLinkType[] = [];\n\n $(\"a\").each((_, element) => {\n const $link = $(element);\n const href = $link.attr(\"href\");\n\n if (href) {\n links.push({\n href,\n text: $link.text().trim() || null,\n title: $link.attr(\"title\") || null,\n target: $link.attr(\"target\") || null,\n rel: $link.attr(\"rel\") || null,\n });\n }\n });\n\n return links;\n }\n\n /**\n * Extract all headings from the HTML document\n * @returns Array of heading information\n */\n public getHeadings(): HtmlHeadingType[] {\n const $ = this.$;\n const headings: HtmlHeadingType[] = [];\n\n $(\"h1, h2, h3, h4, h5, h6\").each((_, element) => {\n const $heading = $(element);\n const tagName = element.tagName.toLowerCase();\n const level = Number.parseInt(tagName.charAt(1), 10);\n\n headings.push({\n level,\n text: $heading.text().trim(),\n id: $heading.attr(\"id\") || null,\n });\n });\n\n return headings;\n }\n\n /**\n * Extract all videos from the HTML document\n * @returns Array of video information\n */\n public getVideos(): HtmlVideoType[] {\n const $ = this.$;\n const videos: HtmlVideoType[] = [];\n\n $(\"video\").each((_, element) => {\n const $video = $(element);\n const sources: { src: string; type: string | null }[] = [];\n\n $video.find(\"source\").each((_, sourceElement) => {\n const $source = $(sourceElement);\n const src = $source.attr(\"src\");\n\n if (src) {\n sources.push({\n src,\n type: $source.attr(\"type\") || null,\n });\n }\n });\n\n videos.push({\n src: $video.attr(\"src\") || null,\n poster: $video.attr(\"poster\") || null,\n width: $video.attr(\"width\") || null,\n height: $video.attr(\"height\") || null,\n controls: $video.attr(\"controls\") !== undefined,\n autoplay: $video.attr(\"autoplay\") !== undefined,\n loop: $video.attr(\"loop\") !== undefined,\n muted: $video.attr(\"muted\") !== undefined,\n sources,\n });\n });\n\n return videos;\n }\n\n /**\n * Extract all tasks (checkbox list items) from the HTML document\n * @returns Array of task information\n */\n public getTasks(): HtmlTaskType[] {\n const $ = this.$;\n const tasks: HtmlTaskType[] = [];\n\n $('input[type=\"checkbox\"]').each((_, element) => {\n const $checkbox = $(element);\n const $parent = $checkbox.parent();\n const checked = $checkbox.attr(\"checked\") !== undefined;\n\n const text = $parent.text().trim();\n\n tasks.push({\n text,\n checked,\n });\n });\n\n return tasks;\n }\n}\n",
6
+ "import { Exception } from \"@ooneex/exception\";\nimport { HttpStatus } from \"@ooneex/http-status\";\n\nexport class HtmlException extends Exception {\n constructor(message: string, data: Record<string, unknown> = {}) {\n super(message, {\n status: HttpStatus.Code.InternalServerError,\n data,\n });\n this.name = \"HtmlException\";\n }\n}\n"
7
+ ],
8
+ "mappings": "AACA,0BCDA,oBAAS,0BACT,qBAAS,4BAEF,MAAM,UAAsB,CAAU,CAC3C,WAAW,CAAC,EAAiB,EAAgC,CAAC,EAAG,CAC/D,MAAM,EAAS,CACb,OAAQ,EAAW,KAAK,oBACxB,MACF,CAAC,EACD,KAAK,KAAO,gBAEhB,CDHO,MAAM,CAAsB,CACzB,EAER,WAAW,EAAG,CACZ,KAAK,EAAY,OAAK,EAAE,EAQnB,IAAI,CAAC,EAAoB,CAE9B,OADA,KAAK,EAAY,OAAK,CAAI,EACnB,UAQI,QAAO,CAAC,EAAkC,CACrD,IAAM,EAAY,aAAe,IAAM,EAAI,SAAS,EAAI,EAExD,GAAI,CAEF,OADA,KAAK,EAAI,MAAc,UAAQ,CAAS,EACjC,KACP,MAAO,EAAO,CACd,MAAM,IAAI,EAAc,wBAAwB,IAAa,CAC3D,OAAQ,IACR,KAAM,CACJ,IAAK,EACL,MAAO,aAAiB,MAAQ,EAAM,QAAU,OAAO,CAAK,CAC9D,CACF,CAAC,GAQE,UAAU,EAAW,CAC1B,OAAO,KAAK,EAAE,KAAK,EAAE,KAAK,EAOrB,OAAO,EAAW,CACvB,OAAO,KAAK,EAAE,KAAK,EAAE,KAAK,GAAK,GAO1B,SAAS,EAAoB,CAClC,IAAM,EAAI,KAAK,EACT,EAA0B,CAAC,EAiBjC,OAfA,EAAE,KAAK,EAAE,KAAK,CAAC,EAAG,IAAY,CAC5B,IAAM,EAAO,EAAE,CAAO,EAChB,EAAM,EAAK,KAAK,KAAK,EAE3B,GAAI,EACF,EAAO,KAAK,CACV,MACA,IAAK,EAAK,KAAK,KAAK,GAAK,KACzB,MAAO,EAAK,KAAK,OAAO,GAAK,KAC7B,MAAO,EAAK,KAAK,OAAO,GAAK,KAC7B,OAAQ,EAAK,KAAK,QAAQ,GAAK,IACjC,CAAC,EAEJ,EAEM,EAOF,QAAQ,EAAmB,CAChC,IAAM,EAAI,KAAK,EACT,EAAwB,CAAC,EAiB/B,OAfA,EAAE,GAAG,EAAE,KAAK,CAAC,EAAG,IAAY,CAC1B,IAAM,EAAQ,EAAE,CAAO,EACjB,EAAO,EAAM,KAAK,MAAM,EAE9B,GAAI,EACF,EAAM,KAAK,CACT,OACA,KAAM,EAAM,KAAK,EAAE,KAAK,GAAK,KAC7B,MAAO,EAAM,KAAK,OAAO,GAAK,KAC9B,OAAQ,EAAM,KAAK,QAAQ,GAAK,KAChC,IAAK,EAAM,KAAK,KAAK,GAAK,IAC5B,CAAC,EAEJ,EAEM,EAOF,WAAW,EAAsB,CACtC,IAAM,EAAI,KAAK,EACT,EAA8B,CAAC,EAcrC,OAZA,EAAE,wBAAwB,EAAE,KAAK,CAAC,EAAG,IAAY,CAC/C,IAAM,EAAW,EAAE,CAAO,EACpB,EAAU,EAAQ,QAAQ,YAAY,EACtC,EAAQ,OAAO,SAAS,EAAQ,OAAO,CAAC,EAAG,EAAE,EAEnD,EAAS,KAAK,CACZ,QACA,KAAM,EAAS,KAAK,EAAE,KAAK,EAC3B,GAAI,EAAS,KAAK,IAAI,GAAK,IAC7B,CAAC,EACF,EAEM,EAOF,SAAS,EAAoB,CAClC,IAAM,EAAI,KAAK,EACT,EAA0B,CAAC,EA+BjC,OA7BA,EAAE,OAAO,EAAE,KAAK,CAAC,EAAG,IAAY,CAC9B,IAAM,EAAS,EAAE,CAAO,EAClB,EAAkD,CAAC,EAEzD,EAAO,KAAK,QAAQ,EAAE,KAAK,CAAC,EAAG,IAAkB,CAC/C,IAAM,EAAU,EAAE,CAAa,EACzB,EAAM,EAAQ,KAAK,KAAK,EAE9B,GAAI,EACF,EAAQ,KAAK,CACX,MACA,KAAM,EAAQ,KAAK,MAAM,GAAK,IAChC,CAAC,EAEJ,EAED,EAAO,KAAK,CACV,IAAK,EAAO,KAAK,KAAK,GAAK,KAC3B,OAAQ,EAAO,KAAK,QAAQ,GAAK,KACjC,MAAO,EAAO,KAAK,OAAO,GAAK,KAC/B,OAAQ,EAAO,KAAK,QAAQ,GAAK,KACjC,SAAU,EAAO,KAAK,UAAU,IAAM,OACtC,SAAU,EAAO,KAAK,UAAU,IAAM,OACtC,KAAM,EAAO,KAAK,MAAM,IAAM,OAC9B,MAAO,EAAO,KAAK,OAAO,IAAM,OAChC,SACF,CAAC,EACF,EAEM,EAOF,QAAQ,EAAmB,CAChC,IAAM,EAAI,KAAK,EACT,EAAwB,CAAC,EAe/B,OAbA,EAAE,wBAAwB,EAAE,KAAK,CAAC,EAAG,IAAY,CAC/C,IAAM,EAAY,EAAE,CAAO,EACrB,EAAU,EAAU,OAAO,EAC3B,EAAU,EAAU,KAAK,SAAS,IAAM,OAExC,EAAO,EAAQ,KAAK,EAAE,KAAK,EAEjC,EAAM,KAAK,CACT,OACA,SACF,CAAC,EACF,EAEM,EAEX",
9
+ "debugId": "F91403C7DF5ED76864756E2164756E21",
10
+ "names": []
11
+ }
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "@ooneex/html",
3
+ "description": "HTML parsing and DOM manipulation utilities powered by Cheerio",
4
+ "version": "0.0.4",
5
+ "type": "module",
6
+ "files": [
7
+ "dist",
8
+ "LICENSE",
9
+ "README.md",
10
+ "package.json"
11
+ ],
12
+ "module": "./dist/index.js",
13
+ "types": "./dist/index.d.ts",
14
+ "exports": {
15
+ ".": {
16
+ "import": {
17
+ "types": "./dist/index.d.ts",
18
+ "default": "./dist/index.js"
19
+ }
20
+ },
21
+ "./package.json": "./package.json"
22
+ },
23
+ "license": "MIT",
24
+ "scripts": {
25
+ "test": "bun test tests",
26
+ "build": "bunup",
27
+ "lint": "tsgo --noEmit && bunx biome lint",
28
+ "publish": "bun publish --access public || true"
29
+ },
30
+ "dependencies": {
31
+ "@ooneex/exception": "0.0.1",
32
+ "@ooneex/http-status": "0.0.1",
33
+ "cheerio": "^1.1.2"
34
+ },
35
+ "keywords": [
36
+ "bun",
37
+ "cheerio",
38
+ "dom",
39
+ "html",
40
+ "markup",
41
+ "ooneex",
42
+ "parser",
43
+ "template",
44
+ "typescript"
45
+ ]
46
+ }