@maze014/dom-fetch 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +174 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.js +143 -0
- package/dist/index.js.map +1 -0
- package/package.json +28 -0
- package/src/constants.ts +3 -0
- package/src/env.d.ts +1 -0
- package/src/fetch.ts +35 -0
- package/src/index.ts +70 -0
- package/src/types.ts +24 -0
- package/src/validations.ts +49 -0
- package/tsup.config.ts +17 -0
package/README.md
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# domFetch
|
|
2
|
+
|
|
3
|
+
Fetch and extract DOM elements from a **URL or local HTML file** using a CSS query selector.
|
|
4
|
+
|
|
5
|
+
`dom-fetch` lets you retrieve HTML elements and choose how they are represented (raw element, HTML string, children HTML, or a structured breakdown).
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
$ npm install dom-fetch
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Usage
|
|
18
|
+
|
|
19
|
+
### Basic example (fetch from a URL)
|
|
20
|
+
|
|
21
|
+
```ts
|
|
22
|
+
import { selectElements } from "dom-fetch";
|
|
23
|
+
|
|
24
|
+
const elements = await selectElements(
|
|
25
|
+
"https://example.com",
|
|
26
|
+
"h1"
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
console.log(elements);
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
By default:
|
|
33
|
+
- `source` is `"url"`
|
|
34
|
+
- `output` is `"html"`
|
|
35
|
+
|
|
36
|
+
So this returns an array of `outerHTML` strings.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
### Fetch from a local HTML file
|
|
41
|
+
|
|
42
|
+
```ts
|
|
43
|
+
const elements = await selectElements(
|
|
44
|
+
"./index.html",
|
|
45
|
+
".article",
|
|
46
|
+
{ source: "file" }
|
|
47
|
+
);
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
### Change output format
|
|
53
|
+
|
|
54
|
+
```ts
|
|
55
|
+
const elements = await selectElements(
|
|
56
|
+
"https://example.com",
|
|
57
|
+
"a",
|
|
58
|
+
{ output: "breakdown" }
|
|
59
|
+
);
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## Output formats
|
|
65
|
+
|
|
66
|
+
The `output` option controls how each matched element is returned.
|
|
67
|
+
|
|
68
|
+
| Output value | Description |
|
|
69
|
+
|-------------|------------|
|
|
70
|
+
| `"html"` | The full HTML of the matched element (`outerHTML`) |
|
|
71
|
+
| `"children"` | The inner HTML of the matched element |
|
|
72
|
+
| `"object"` | The raw DOM `Element` |
|
|
73
|
+
| `"breakdown"` | A structured object describing the element |
|
|
74
|
+
|
|
75
|
+
### Breakdown output example
|
|
76
|
+
|
|
77
|
+
```ts
|
|
78
|
+
{
|
|
79
|
+
tag: "a",
|
|
80
|
+
text: "Click here",
|
|
81
|
+
html: "Click here",
|
|
82
|
+
attributes: {
|
|
83
|
+
href: "/about",
|
|
84
|
+
class: "link"
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Within a node script from a resource
|
|
90
|
+
|
|
91
|
+
First, create a simple js file that displays the paragraphs of the NodeJS page
|
|
92
|
+
|
|
93
|
+
```js
|
|
94
|
+
// nodeParagraphs.mjs
|
|
95
|
+
|
|
96
|
+
const source = "https://nodejs.org/en/learn/getting-started/introduction-to-nodejs";
|
|
97
|
+
const selector = "main p";
|
|
98
|
+
|
|
99
|
+
const paragraphs = await selectElements(source, selector)
|
|
100
|
+
|
|
101
|
+
console.log(paragraphs.join(""))
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Then write out in an html file
|
|
105
|
+
```bash
|
|
106
|
+
$ node nodeParagraphs.mjs > paragraphs.html
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Within a node script from a local file
|
|
110
|
+
|
|
111
|
+
First, create a simple js file that displays the paragraphs of the NodeJS page
|
|
112
|
+
|
|
113
|
+
```js
|
|
114
|
+
// nodeParagraphs.mjs
|
|
115
|
+
|
|
116
|
+
const source = "./example/nodePage.html";
|
|
117
|
+
const selector = "main p";
|
|
118
|
+
const options = { source : 'file' };
|
|
119
|
+
|
|
120
|
+
const paragraphs = await selectElements(source, selector, options)
|
|
121
|
+
|
|
122
|
+
console.log(paragraphs.join(""))
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
Then write out in an html file
|
|
126
|
+
```bash
|
|
127
|
+
$ node nodeParagraphs.mjs > paragraphs.html
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## API
|
|
133
|
+
|
|
134
|
+
### `selectElements(source: string, selector: string, options?: FetchOptions): Promise<any[]>`
|
|
135
|
+
|
|
136
|
+
Fetches elements matching a CSS selector from a given source.
|
|
137
|
+
|
|
138
|
+
#### Parameters
|
|
139
|
+
|
|
140
|
+
| Parameter | Type | Default | Description |
|
|
141
|
+
|---------|------|---------|-------------|
|
|
142
|
+
| `source` | `string` | — | URL or relative file path containing HTML |
|
|
143
|
+
| `selector` | `string` | — | CSS selector (uses `querySelectorAll`) |
|
|
144
|
+
| `options.output` | `"object" \| "html" \| "children" \| "breakdown"` | `"html"` | Format of returned elements |
|
|
145
|
+
| `options.source` | `"url" \| "file"` | `"url"` | Defines how the source is fetched |
|
|
146
|
+
|
|
147
|
+
#### Returns
|
|
148
|
+
|
|
149
|
+
**`Promise<any[]>`**
|
|
150
|
+
An array of elements formatted according to the selected `output` option.
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
## FetchOptions
|
|
155
|
+
|
|
156
|
+
```ts
|
|
157
|
+
type FetchOptions = {
|
|
158
|
+
output?: "object" | "html" | "children" | "breakdown";
|
|
159
|
+
source?: "url" | "file";
|
|
160
|
+
};
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## Tests
|
|
166
|
+
|
|
167
|
+
A test project is available via [this repository](https://github.com/ManuUseGitHub/domFetchTest).
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## License
|
|
173
|
+
|
|
174
|
+
[MIT](https://github.com/ManuUseGitHub/domFetch?tab=MIT-1-ov-file#readme)
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Defines the kind of representation you can get from the request
|
|
3
|
+
* object : the element itself
|
|
4
|
+
* html : the targeted elements HTML. The root element being the element matched by the queryy selector
|
|
5
|
+
* children : the HTML of the children from the targeted elements matched
|
|
6
|
+
* breakdown : the brokendown element
|
|
7
|
+
*/
|
|
8
|
+
type FetchOutput = "object" | "html" | "children" | "breakdown";
|
|
9
|
+
/**
|
|
10
|
+
* Defines the kind of sourse to fetch from or read from
|
|
11
|
+
* file : the source should be a file defined by a relative path
|
|
12
|
+
* url : the source should be the response of a fetched resource through the HTTP GET method
|
|
13
|
+
*/
|
|
14
|
+
type FetchSource = "file" | "url";
|
|
15
|
+
/**
|
|
16
|
+
* Defines how the request should work to fetch the element of a given source kind
|
|
17
|
+
*/
|
|
18
|
+
type FetchOptions = {
|
|
19
|
+
output: FetchOutput;
|
|
20
|
+
source: FetchSource;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
*
|
|
25
|
+
* @param source References the source from where to fetch the DOM. It can be an a relative file path or an URL
|
|
26
|
+
* @param selector The query selector used to fetch elements from the DOM. (will run querySelectorAll)
|
|
27
|
+
* @param options The FetchOptions needed for the requested elements
|
|
28
|
+
* @returns
|
|
29
|
+
*/
|
|
30
|
+
declare function selectElements(source: string, selector: string, options?: FetchOptions): Promise<any[]>;
|
|
31
|
+
|
|
32
|
+
export { selectElements };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/index.ts
|
|
20
|
+
var index_exports = {};
|
|
21
|
+
__export(index_exports, {
|
|
22
|
+
selectElements: () => selectElements
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(index_exports);
|
|
25
|
+
|
|
26
|
+
// src/fetch.ts
|
|
27
|
+
var import_jsdom = require("jsdom");
|
|
28
|
+
|
|
29
|
+
// src/constants.ts
|
|
30
|
+
var HTML_CONTENT_TYPE = "text/html";
|
|
31
|
+
var VERSION = "1.0.0";
|
|
32
|
+
|
|
33
|
+
// src/validations.ts
|
|
34
|
+
var import_node_fs = require("fs");
|
|
35
|
+
var import_promises = require("fs/promises");
|
|
36
|
+
var UA = `dom-fetch/${VERSION}`;
|
|
37
|
+
function validateOutputOption(options) {
|
|
38
|
+
const output = options.output;
|
|
39
|
+
if (!/^(?:object|html|children|breakdown)$/.test(output)) {
|
|
40
|
+
throw `output option not supported ["${output}"]`;
|
|
41
|
+
}
|
|
42
|
+
return output;
|
|
43
|
+
}
|
|
44
|
+
function validateSourceOption(options) {
|
|
45
|
+
const source = options.source;
|
|
46
|
+
if (!/^(?:url|file)$/.test(source)) {
|
|
47
|
+
throw `source option not supported ["${source}"]`;
|
|
48
|
+
}
|
|
49
|
+
return source;
|
|
50
|
+
}
|
|
51
|
+
async function validateResource(source) {
|
|
52
|
+
const res = await fetch(source, {
|
|
53
|
+
headers: {
|
|
54
|
+
"User-Agent": UA,
|
|
55
|
+
Accept: "text/html"
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
if (!res.ok) {
|
|
59
|
+
throw new Error(`Failed to fetch ${source}`);
|
|
60
|
+
}
|
|
61
|
+
return res;
|
|
62
|
+
}
|
|
63
|
+
async function validateFileExistance(source) {
|
|
64
|
+
if (!(0, import_node_fs.existsSync)(source)) {
|
|
65
|
+
throw new Error(`no such file ["${source}"]`);
|
|
66
|
+
}
|
|
67
|
+
return await (0, import_promises.readFile)(source, "utf-8");
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// src/fetch.ts
|
|
71
|
+
async function _fromHttp(source, selector) {
|
|
72
|
+
const res = await validateResource(source);
|
|
73
|
+
const html = await res.text();
|
|
74
|
+
const dom = new import_jsdom.JSDOM(html, {
|
|
75
|
+
url: source,
|
|
76
|
+
contentType: HTML_CONTENT_TYPE
|
|
77
|
+
});
|
|
78
|
+
const document = dom.window.document;
|
|
79
|
+
return Array.from(document.querySelectorAll(selector));
|
|
80
|
+
}
|
|
81
|
+
var _fromFile = async (source, selector) => {
|
|
82
|
+
let html = await validateFileExistance(source);
|
|
83
|
+
const dom = new import_jsdom.JSDOM(html, {
|
|
84
|
+
contentType: HTML_CONTENT_TYPE
|
|
85
|
+
});
|
|
86
|
+
const document = dom.window.document;
|
|
87
|
+
return Array.from(document.querySelectorAll(selector));
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
// src/index.ts
|
|
91
|
+
async function selectElements(source, selector, options) {
|
|
92
|
+
try {
|
|
93
|
+
let nodes = [];
|
|
94
|
+
const fixedOptions = _initOptions(options);
|
|
95
|
+
const sourceOption = validateSourceOption(fixedOptions);
|
|
96
|
+
if (sourceOption == "url") {
|
|
97
|
+
nodes = await _fromHttp(source, selector);
|
|
98
|
+
} else if (sourceOption == "file") {
|
|
99
|
+
nodes = await _fromFile(source, selector);
|
|
100
|
+
}
|
|
101
|
+
return nodes.map((el) => {
|
|
102
|
+
return _computed(el, fixedOptions);
|
|
103
|
+
});
|
|
104
|
+
} catch (error) {
|
|
105
|
+
return Promise.reject(error);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
var _initOptions = (options = {}) => {
|
|
109
|
+
const { output = "html", source = "url" } = options;
|
|
110
|
+
return { output, source };
|
|
111
|
+
};
|
|
112
|
+
var _computed = (el, options) => {
|
|
113
|
+
let result;
|
|
114
|
+
const output = validateOutputOption(options);
|
|
115
|
+
switch (output) {
|
|
116
|
+
case "html":
|
|
117
|
+
case "children":
|
|
118
|
+
if (output == "html") {
|
|
119
|
+
result = el.outerHTML;
|
|
120
|
+
} else {
|
|
121
|
+
result = el.innerHTML;
|
|
122
|
+
}
|
|
123
|
+
break;
|
|
124
|
+
case "breakdown":
|
|
125
|
+
result = {
|
|
126
|
+
tag: el.tagName.toLowerCase(),
|
|
127
|
+
text: el.textContent?.trim() ?? "",
|
|
128
|
+
html: el.innerHTML,
|
|
129
|
+
attributes: Object.fromEntries(
|
|
130
|
+
Array.from(el.attributes).map((a) => [a.name, a.value])
|
|
131
|
+
)
|
|
132
|
+
};
|
|
133
|
+
break;
|
|
134
|
+
default:
|
|
135
|
+
result = el;
|
|
136
|
+
}
|
|
137
|
+
return result;
|
|
138
|
+
};
|
|
139
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
140
|
+
0 && (module.exports = {
|
|
141
|
+
selectElements
|
|
142
|
+
});
|
|
143
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/fetch.ts","../src/constants.ts","../src/validations.ts"],"sourcesContent":["import { FetchOptions } from \"./types\";\nimport { _fromFile, _fromHttp } from \"./fetch\";\nimport { validateOutputOption, validateSourceOption } from \"./validations\";\n\n/**\n *\n * @param source References the source from where to fetch the DOM. It can be an a relative file path or an URL\n * @param selector The query selector used to fetch elements from the DOM. (will run querySelectorAll)\n * @param options The FetchOptions needed for the requested elements\n * @returns\n */\nexport async function selectElements(\n\tsource: string,\n\tselector: string,\n\toptions?: FetchOptions\n) {\n\ttry {\n\t\tlet nodes: Array<Element> = [];\n\t\tconst fixedOptions = _initOptions(options);\n\t\tconst sourceOption = validateSourceOption(fixedOptions);\n\n\t\tif (sourceOption == \"url\") {\n\t\t\tnodes = await _fromHttp(source, selector);\n\t\t} else if (sourceOption == \"file\") {\n\t\t\tnodes = await _fromFile(source, selector);\n\t\t}\n\n\t\treturn nodes.map((el) => {\n\t\t\treturn _computed(el, fixedOptions);\n\t\t});\n\t} catch (error: any) {\n\t\treturn Promise.reject(error);\n\t}\n}\n\n// ============================= PRIVATE functions =============================\n\nconst _initOptions = (options: any = {}) => {\n\tconst { output = \"html\", source = \"url\" } = options;\n\treturn { output, source } as FetchOptions;\n};\n\nconst _computed = (el: Element, options: FetchOptions) => {\n\tlet result: any;\n\tconst output = validateOutputOption(options);\n\n\tswitch (output) {\n\t\tcase \"html\":\n\t\tcase \"children\":\n\t\t\tif (output == \"html\") {\n\t\t\t\tresult = el.outerHTML;\n\t\t\t} else {\n\t\t\t\tresult = el.innerHTML;\n\t\t\t}\n\t\t\tbreak;\n\t\tcase \"breakdown\":\n\t\t\tresult = {\n\t\t\t\ttag: el.tagName.toLowerCase(),\n\t\t\t\ttext: el.textContent?.trim() ?? \"\",\n\t\t\t\thtml: el.innerHTML,\n\t\t\t\tattributes: Object.fromEntries(\n\t\t\t\t\tArray.from(el.attributes).map((a) => [a.name, a.value])\n\t\t\t\t),\n\t\t\t};\n\t\t\tbreak;\n\t\tdefault:\n\t\t\tresult = el;\n\t}\n\treturn result;\n};\n","import { JSDOM } from \"jsdom\";\nimport { HTML_CONTENT_TYPE } from \"./constants\";\nimport {\n\tvalidateFileExistance,\n\tvalidateResource,\n} from \"./validations\";\n\nexport async function _fromHttp(\n\tsource: string,\n\tselector: string\n): Promise<Element[]> {\n\tconst res = await validateResource(source);\n\n\tconst html = await res.text();\n\n\tconst dom = new JSDOM(html, {\n\t\turl: source,\n\t\tcontentType: HTML_CONTENT_TYPE,\n\t});\n\n\tconst document = dom.window.document;\n\n\treturn Array.from(document.querySelectorAll(selector));\n}\n\nexport const _fromFile = async (source: string, selector: string) => {\n\tlet html = await validateFileExistance(source);\n\n\tconst dom = new JSDOM(html, {\n\t\tcontentType: HTML_CONTENT_TYPE,\n\t});\n\n\tconst document = dom.window.document;\n\treturn Array.from(document.querySelectorAll(selector));\n};\n","export const PACKAGE_ERROR_INTRO = \"Fetch-Dom error : \"\nexport const HTML_CONTENT_TYPE = \"text/html\";\nexport const VERSION = __VERSION__;","import { existsSync } from \"node:fs\";\nimport { VERSION } from \"./constants\";\nimport { FetchOptions } from \"./types\";\nimport { readFile } from \"node:fs/promises\";\n\nconst UA = `dom-fetch/${VERSION}`;\n\nexport function validateOutputOption(options: FetchOptions) {\n\tconst output = options.output;\n\tif (!/^(?:object|html|children|breakdown)$/.test(output)) {\n\t\tthrow `output option not supported [\"${output}\"]`;\n\t}\n\treturn output;\n}\n\nexport function validateSourceOption(options: FetchOptions) {\n\tconst source = options.source;\n\tif (!/^(?:url|file)$/.test(source)) {\n\t\tthrow `source option not supported [\"${source}\"]`;\n\t}\n\treturn source;\n}\n\nexport function validateHTTPSource(source: string) {\n\tif (!source.startsWith(\"http\")) {\n\t\tthrow \"source given is not an URL\";\n\t}\n}\n\nexport async function validateResource(source: string) {\n\tconst res = await fetch(source, {\n\t\theaders: {\n\t\t\t\"User-Agent\": UA,\n\t\t\tAccept: \"text/html\",\n\t\t},\n\t});\n\n\tif (!res.ok) {\n\t\tthrow new Error(`Failed to fetch ${source}`);\n\t}\n\treturn res;\n}\n\nexport async function validateFileExistance(source: string) {\n\tif (!existsSync(source)) {\n\t\tthrow new Error(`no such file [\"${source}\"]`);\n\t}\n\treturn await readFile(source, \"utf-8\");\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,mBAAsB;;;ACCf,IAAM,oBAAoB;AAC1B,IAAM,UAAU;;;ACFvB,qBAA2B;AAG3B,sBAAyB;AAEzB,IAAM,KAAK,aAAa,OAAO;AAExB,SAAS,qBAAqB,SAAuB;AAC3D,QAAM,SAAS,QAAQ;AACvB,MAAI,CAAC,uCAAuC,KAAK,MAAM,GAAG;AACzD,UAAM,iCAAiC,MAAM;AAAA,EAC9C;AACA,SAAO;AACR;AAEO,SAAS,qBAAqB,SAAuB;AAC3D,QAAM,SAAS,QAAQ;AACvB,MAAI,CAAC,iBAAiB,KAAK,MAAM,GAAG;AACnC,UAAM,iCAAiC,MAAM;AAAA,EAC9C;AACA,SAAO;AACR;AAQA,eAAsB,iBAAiB,QAAgB;AACtD,QAAM,MAAM,MAAM,MAAM,QAAQ;AAAA,IAC/B,SAAS;AAAA,MACR,cAAc;AAAA,MACd,QAAQ;AAAA,IACT;AAAA,EACD,CAAC;AAED,MAAI,CAAC,IAAI,IAAI;AACZ,UAAM,IAAI,MAAM,mBAAmB,MAAM,EAAE;AAAA,EAC5C;AACA,SAAO;AACR;AAEA,eAAsB,sBAAsB,QAAgB;AAC3D,MAAI,KAAC,2BAAW,MAAM,GAAG;AACxB,UAAM,IAAI,MAAM,kBAAkB,MAAM,IAAI;AAAA,EAC7C;AACA,SAAO,UAAM,0BAAS,QAAQ,OAAO;AACtC;;;AFzCA,eAAsB,UACrB,QACA,UACqB;AACrB,QAAM,MAAM,MAAM,iBAAiB,MAAM;AAEzC,QAAM,OAAO,MAAM,IAAI,KAAK;AAE5B,QAAM,MAAM,IAAI,mBAAM,MAAM;AAAA,IAC3B,KAAK;AAAA,IACL,aAAa;AAAA,EACd,CAAC;AAED,QAAM,WAAW,IAAI,OAAO;AAE5B,SAAO,MAAM,KAAK,SAAS,iBAAiB,QAAQ,CAAC;AACtD;AAEO,IAAM,YAAY,OAAO,QAAgB,aAAqB;AACpE,MAAI,OAAO,MAAM,sBAAsB,MAAM;AAE7C,QAAM,MAAM,IAAI,mBAAM,MAAM;AAAA,IAC3B,aAAa;AAAA,EACd,CAAC;AAED,QAAM,WAAW,IAAI,OAAO;AAC5B,SAAO,MAAM,KAAK,SAAS,iBAAiB,QAAQ,CAAC;AACtD;;;ADvBA,eAAsB,eACrB,QACA,UACA,SACC;AACD,MAAI;AACH,QAAI,QAAwB,CAAC;AAC7B,UAAM,eAAe,aAAa,OAAO;AACzC,UAAM,eAAe,qBAAqB,YAAY;AAEtD,QAAI,gBAAgB,OAAO;AAC1B,cAAQ,MAAM,UAAU,QAAQ,QAAQ;AAAA,IACzC,WAAW,gBAAgB,QAAQ;AAClC,cAAQ,MAAM,UAAU,QAAQ,QAAQ;AAAA,IACzC;AAEA,WAAO,MAAM,IAAI,CAAC,OAAO;AACxB,aAAO,UAAU,IAAI,YAAY;AAAA,IAClC,CAAC;AAAA,EACF,SAAS,OAAY;AACpB,WAAO,QAAQ,OAAO,KAAK;AAAA,EAC5B;AACD;AAIA,IAAM,eAAe,CAAC,UAAe,CAAC,MAAM;AAC3C,QAAM,EAAE,SAAS,QAAQ,SAAS,MAAM,IAAI;AAC5C,SAAO,EAAE,QAAQ,OAAO;AACzB;AAEA,IAAM,YAAY,CAAC,IAAa,YAA0B;AACzD,MAAI;AACJ,QAAM,SAAS,qBAAqB,OAAO;AAE3C,UAAQ,QAAQ;AAAA,IACf,KAAK;AAAA,IACL,KAAK;AACJ,UAAI,UAAU,QAAQ;AACrB,iBAAS,GAAG;AAAA,MACb,OAAO;AACN,iBAAS,GAAG;AAAA,MACb;AACA;AAAA,IACD,KAAK;AACJ,eAAS;AAAA,QACR,KAAK,GAAG,QAAQ,YAAY;AAAA,QAC5B,MAAM,GAAG,aAAa,KAAK,KAAK;AAAA,QAChC,MAAM,GAAG;AAAA,QACT,YAAY,OAAO;AAAA,UAClB,MAAM,KAAK,GAAG,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC;AAAA,QACvD;AAAA,MACD;AACA;AAAA,IACD;AACC,eAAS;AAAA,EACX;AACA,SAAO;AACR;","names":[]}
|
package/package.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@maze014/dom-fetch",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"main": "dist/index.js",
|
|
5
|
+
"description": "Fetch and extract DOM elements from a URL or local HTML file using a CSS query selector.",
|
|
6
|
+
"type": "commonjs",
|
|
7
|
+
"devDependencies": {
|
|
8
|
+
"@types/jsdom": "^27.0.0",
|
|
9
|
+
"jsmin": "^1.0.1",
|
|
10
|
+
"tsup": "^8.5.1",
|
|
11
|
+
"typescript": "^5.9.3"
|
|
12
|
+
},
|
|
13
|
+
"repository": {
|
|
14
|
+
"type": "git",
|
|
15
|
+
"url": "git+https://github.com/ManuUseGitHub/domFetch.git"
|
|
16
|
+
},
|
|
17
|
+
"dependencies": {
|
|
18
|
+
"jsdom": "^27.4.0"
|
|
19
|
+
},
|
|
20
|
+
"scripts": {
|
|
21
|
+
"build": "rm -rf dist; tsup",
|
|
22
|
+
"dev": "tsup --watch"
|
|
23
|
+
},
|
|
24
|
+
"homepage": "https://github.com/ManuUseGitHub/domFetch#readme",
|
|
25
|
+
"author": "Jean Luc Emmanuel VERHANNEMAN",
|
|
26
|
+
"license": "MIT",
|
|
27
|
+
"types": "./dist/index.d.ts"
|
|
28
|
+
}
|
package/src/constants.ts
ADDED
package/src/env.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
declare const __VERSION__: string;
|
package/src/fetch.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { JSDOM } from "jsdom";
|
|
2
|
+
import { HTML_CONTENT_TYPE } from "./constants";
|
|
3
|
+
import {
|
|
4
|
+
validateFileExistance,
|
|
5
|
+
validateResource,
|
|
6
|
+
} from "./validations";
|
|
7
|
+
|
|
8
|
+
export async function _fromHttp(
|
|
9
|
+
source: string,
|
|
10
|
+
selector: string
|
|
11
|
+
): Promise<Element[]> {
|
|
12
|
+
const res = await validateResource(source);
|
|
13
|
+
|
|
14
|
+
const html = await res.text();
|
|
15
|
+
|
|
16
|
+
const dom = new JSDOM(html, {
|
|
17
|
+
url: source,
|
|
18
|
+
contentType: HTML_CONTENT_TYPE,
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
const document = dom.window.document;
|
|
22
|
+
|
|
23
|
+
return Array.from(document.querySelectorAll(selector));
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export const _fromFile = async (source: string, selector: string) => {
|
|
27
|
+
let html = await validateFileExistance(source);
|
|
28
|
+
|
|
29
|
+
const dom = new JSDOM(html, {
|
|
30
|
+
contentType: HTML_CONTENT_TYPE,
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
const document = dom.window.document;
|
|
34
|
+
return Array.from(document.querySelectorAll(selector));
|
|
35
|
+
};
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { FetchOptions } from "./types";
|
|
2
|
+
import { _fromFile, _fromHttp } from "./fetch";
|
|
3
|
+
import { validateOutputOption, validateSourceOption } from "./validations";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
*
|
|
7
|
+
* @param source References the source from where to fetch the DOM. It can be an a relative file path or an URL
|
|
8
|
+
* @param selector The query selector used to fetch elements from the DOM. (will run querySelectorAll)
|
|
9
|
+
* @param options The FetchOptions needed for the requested elements
|
|
10
|
+
* @returns
|
|
11
|
+
*/
|
|
12
|
+
export async function selectElements(
|
|
13
|
+
source: string,
|
|
14
|
+
selector: string,
|
|
15
|
+
options?: FetchOptions
|
|
16
|
+
) {
|
|
17
|
+
try {
|
|
18
|
+
let nodes: Array<Element> = [];
|
|
19
|
+
const fixedOptions = _initOptions(options);
|
|
20
|
+
const sourceOption = validateSourceOption(fixedOptions);
|
|
21
|
+
|
|
22
|
+
if (sourceOption == "url") {
|
|
23
|
+
nodes = await _fromHttp(source, selector);
|
|
24
|
+
} else if (sourceOption == "file") {
|
|
25
|
+
nodes = await _fromFile(source, selector);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
return nodes.map((el) => {
|
|
29
|
+
return _computed(el, fixedOptions);
|
|
30
|
+
});
|
|
31
|
+
} catch (error: any) {
|
|
32
|
+
return Promise.reject(error);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ============================= PRIVATE functions =============================
|
|
37
|
+
|
|
38
|
+
const _initOptions = (options: any = {}) => {
|
|
39
|
+
const { output = "html", source = "url" } = options;
|
|
40
|
+
return { output, source } as FetchOptions;
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
const _computed = (el: Element, options: FetchOptions) => {
|
|
44
|
+
let result: any;
|
|
45
|
+
const output = validateOutputOption(options);
|
|
46
|
+
|
|
47
|
+
switch (output) {
|
|
48
|
+
case "html":
|
|
49
|
+
case "children":
|
|
50
|
+
if (output == "html") {
|
|
51
|
+
result = el.outerHTML;
|
|
52
|
+
} else {
|
|
53
|
+
result = el.innerHTML;
|
|
54
|
+
}
|
|
55
|
+
break;
|
|
56
|
+
case "breakdown":
|
|
57
|
+
result = {
|
|
58
|
+
tag: el.tagName.toLowerCase(),
|
|
59
|
+
text: el.textContent?.trim() ?? "",
|
|
60
|
+
html: el.innerHTML,
|
|
61
|
+
attributes: Object.fromEntries(
|
|
62
|
+
Array.from(el.attributes).map((a) => [a.name, a.value])
|
|
63
|
+
),
|
|
64
|
+
};
|
|
65
|
+
break;
|
|
66
|
+
default:
|
|
67
|
+
result = el;
|
|
68
|
+
}
|
|
69
|
+
return result;
|
|
70
|
+
};
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
|
|
2
|
+
/**
|
|
3
|
+
* Defines the kind of representation you can get from the request
|
|
4
|
+
* object : the element itself
|
|
5
|
+
* html : the targeted elements HTML. The root element being the element matched by the queryy selector
|
|
6
|
+
* children : the HTML of the children from the targeted elements matched
|
|
7
|
+
* breakdown : the brokendown element
|
|
8
|
+
*/
|
|
9
|
+
export type FetchOutput = "object" | "html" | "children" | "breakdown";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Defines the kind of sourse to fetch from or read from
|
|
13
|
+
* file : the source should be a file defined by a relative path
|
|
14
|
+
* url : the source should be the response of a fetched resource through the HTTP GET method
|
|
15
|
+
*/
|
|
16
|
+
export type FetchSource = "file" | "url";
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Defines how the request should work to fetch the element of a given source kind
|
|
20
|
+
*/
|
|
21
|
+
export type FetchOptions = {
|
|
22
|
+
output: FetchOutput;
|
|
23
|
+
source: FetchSource;
|
|
24
|
+
};
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { existsSync } from "node:fs";
|
|
2
|
+
import { VERSION } from "./constants";
|
|
3
|
+
import { FetchOptions } from "./types";
|
|
4
|
+
import { readFile } from "node:fs/promises";
|
|
5
|
+
|
|
6
|
+
const UA = `dom-fetch/${VERSION}`;
|
|
7
|
+
|
|
8
|
+
export function validateOutputOption(options: FetchOptions) {
|
|
9
|
+
const output = options.output;
|
|
10
|
+
if (!/^(?:object|html|children|breakdown)$/.test(output)) {
|
|
11
|
+
throw `output option not supported ["${output}"]`;
|
|
12
|
+
}
|
|
13
|
+
return output;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function validateSourceOption(options: FetchOptions) {
|
|
17
|
+
const source = options.source;
|
|
18
|
+
if (!/^(?:url|file)$/.test(source)) {
|
|
19
|
+
throw `source option not supported ["${source}"]`;
|
|
20
|
+
}
|
|
21
|
+
return source;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function validateHTTPSource(source: string) {
|
|
25
|
+
if (!source.startsWith("http")) {
|
|
26
|
+
throw "source given is not an URL";
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export async function validateResource(source: string) {
|
|
31
|
+
const res = await fetch(source, {
|
|
32
|
+
headers: {
|
|
33
|
+
"User-Agent": UA,
|
|
34
|
+
Accept: "text/html",
|
|
35
|
+
},
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
if (!res.ok) {
|
|
39
|
+
throw new Error(`Failed to fetch ${source}`);
|
|
40
|
+
}
|
|
41
|
+
return res;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export async function validateFileExistance(source: string) {
|
|
45
|
+
if (!existsSync(source)) {
|
|
46
|
+
throw new Error(`no such file ["${source}"]`);
|
|
47
|
+
}
|
|
48
|
+
return await readFile(source, "utf-8");
|
|
49
|
+
}
|
package/tsup.config.ts
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { defineConfig } from "tsup";
|
|
2
|
+
import pkg from "./package.json";
|
|
3
|
+
console.log(pkg.version)
|
|
4
|
+
export default defineConfig({
|
|
5
|
+
entry: ["src/index.ts"],
|
|
6
|
+
outDir: 'dist',
|
|
7
|
+
format: ["cjs"], // Node-friendly
|
|
8
|
+
target: "node18",
|
|
9
|
+
bundle: true, // 👈 single file
|
|
10
|
+
splitting: false, // 👈 force ONE file
|
|
11
|
+
sourcemap: true,
|
|
12
|
+
clean: true,
|
|
13
|
+
dts: true, // generates index.d.ts
|
|
14
|
+
define: {
|
|
15
|
+
__VERSION__: JSON.stringify(pkg.version),
|
|
16
|
+
},
|
|
17
|
+
});
|