@xcrap/dom 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +242 -56
- package/dist/errors.d.ts +2 -1
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +1 -1
- package/dist/errors.js.map +1 -1
- package/dist/extraction-model.d.ts +30 -0
- package/dist/extraction-model.d.ts.map +1 -0
- package/dist/extraction-model.interface.d.ts +4 -0
- package/dist/extraction-model.interface.d.ts.map +1 -0
- package/dist/extraction-model.interface.js +2 -0
- package/dist/extraction-model.interface.js.map +1 -0
- package/dist/extraction-model.js +67 -0
- package/dist/extraction-model.js.map +1 -0
- package/dist/index.d.ts +3 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -2
- package/dist/index.js.map +1 -1
- package/dist/parser.d.ts +16 -15
- package/dist/parser.d.ts.map +1 -1
- package/dist/parser.js +9 -8
- package/dist/parser.js.map +1 -1
- package/dist/query-builders.d.ts +7 -0
- package/dist/query-builders.d.ts.map +1 -0
- package/dist/query-builders.js +13 -0
- package/dist/query-builders.js.map +1 -0
- package/dist/utils/index.d.ts +3 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +3 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/select-first-element.d.ts +3 -0
- package/dist/utils/select-first-element.d.ts.map +1 -0
- package/dist/utils/select-first-element.js +11 -0
- package/dist/utils/select-first-element.js.map +1 -0
- package/dist/utils/select-many-elements.d.ts +3 -0
- package/dist/utils/select-many-elements.d.ts.map +1 -0
- package/dist/utils/select-many-elements.js +18 -0
- package/dist/utils/select-many-elements.js.map +1 -0
- package/dist/utils/to-document.d.ts +2 -0
- package/dist/utils/to-document.d.ts.map +1 -0
- package/dist/utils/to-document.js +7 -0
- package/dist/utils/to-document.js.map +1 -0
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
# 🕷️ Xcrap DOM
|
|
1
|
+
# 🕷️ Xcrap DOM Extractor
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
> Extracting data from HTML using declarative models — part of the [Xcrap](https://github.com/xcrap) framework.
|
|
4
|
+
|
|
5
|
+
Xcrap DOM is a **client-side** package designed for DOM data extraction using declarative models. It works natively in browser environments (browser extensions, TamperMonkey user scripts, etc.) and supports both **CSS selectors** and **XPath** queries out of the box.
|
|
4
6
|
|
|
5
7
|
---
|
|
6
8
|
|
|
@@ -15,107 +17,291 @@ npm i @xcrap/dom
|
|
|
15
17
|
|
|
16
18
|
---
|
|
17
19
|
|
|
18
|
-
## 🛠️
|
|
20
|
+
## 🛠️ Quick Start
|
|
19
21
|
|
|
20
|
-
|
|
22
|
+
### Creating a parser
|
|
21
23
|
|
|
22
|
-
|
|
24
|
+
`DomParser` receives an HTML string and parses it into a document internally.
|
|
23
25
|
|
|
24
26
|
```ts
|
|
25
27
|
import { DomParser } from "@xcrap/dom"
|
|
26
28
|
|
|
27
|
-
const html =
|
|
29
|
+
const html = document.documentElement.outerHTML
|
|
28
30
|
const parser = new DomParser(html)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## 🔍 Query Builders
|
|
29
36
|
|
|
37
|
+
Instead of passing raw strings as queries, use the `css()` and `xpath()` helpers to build typed `QueryConfig` objects.
|
|
38
|
+
|
|
39
|
+
```ts
|
|
40
|
+
import { css, xpath } from "@xcrap/dom"
|
|
41
|
+
|
|
42
|
+
css("h1") // { type: "css", value: "h1" }
|
|
43
|
+
xpath("//h1") // { type: "xpath", value: "//h1" }
|
|
30
44
|
```
|
|
31
45
|
|
|
32
|
-
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## 📤 Extracting Data
|
|
49
|
+
|
|
50
|
+
### `extractValue` — single value from a single element
|
|
33
51
|
|
|
34
52
|
```ts
|
|
35
|
-
import { DomParser, extract } from "@xcrap/dom"
|
|
53
|
+
import { DomParser, css, extract } from "@xcrap/dom"
|
|
36
54
|
|
|
37
|
-
const html = `<html><
|
|
55
|
+
const html = `<html><body><h1>Hello World</h1></body></html>`
|
|
38
56
|
const parser = new DomParser(html)
|
|
39
57
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
58
|
+
const title = parser.extractValue({
|
|
59
|
+
query: css("h1"),
|
|
60
|
+
extractor: extract("innerText"),
|
|
61
|
+
default: null // returned if the element is not found
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
console.log(title) // "Hello World"
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### `extractValues` — one value per matched element
|
|
43
68
|
|
|
44
|
-
|
|
45
|
-
|
|
69
|
+
```ts
|
|
70
|
+
import { DomParser, css, extractHref } from "@xcrap/dom"
|
|
46
71
|
|
|
47
|
-
|
|
48
|
-
|
|
72
|
+
const html = `<html><body><a href="/a">A</a><a href="/b">B</a></body></html>`
|
|
73
|
+
const parser = new DomParser(html)
|
|
49
74
|
|
|
75
|
+
const links = parser.extractValues({
|
|
76
|
+
query: css("a"),
|
|
77
|
+
extractor: extractHref,
|
|
78
|
+
limit: 10 // optional
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
console.log(links) // ["/a", "/b"]
|
|
50
82
|
```
|
|
51
83
|
|
|
52
|
-
###
|
|
84
|
+
### `extractModel` — parse a subtree with a `DomExtractionModel`
|
|
53
85
|
|
|
54
|
-
|
|
86
|
+
```ts
|
|
87
|
+
import { DomParser, DomExtractionModel, css, extract } from "@xcrap/dom"
|
|
88
|
+
|
|
89
|
+
const html = `<html><body>
|
|
90
|
+
<h1>Header</h1>
|
|
91
|
+
<p id="user-id">42</p>
|
|
92
|
+
<p class="username">john_doe</p>
|
|
93
|
+
</body></html>`
|
|
94
|
+
|
|
95
|
+
const parser = new DomParser(html)
|
|
96
|
+
|
|
97
|
+
const model = new DomExtractionModel({
|
|
98
|
+
heading: { query: css("h1"), extractor: extract("innerText") },
|
|
99
|
+
userId: { query: css("#user-id"), extractor: extract("innerText") },
|
|
100
|
+
username: { query: css(".username"), extractor: extract("innerText") },
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
const data = parser.extractModel({ model })
|
|
104
|
+
|
|
105
|
+
console.log(data)
|
|
106
|
+
// { heading: "Header", userId: "42", username: "john_doe" }
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### `extractModels` — parse a list of elements, each with the same model
|
|
55
110
|
|
|
56
111
|
```ts
|
|
57
|
-
import { DomParser,
|
|
112
|
+
import { DomParser, DomExtractionModel, css, extractInnerText, extractHref } from "@xcrap/dom"
|
|
113
|
+
|
|
114
|
+
const html = `<html><body>
|
|
115
|
+
<ul>
|
|
116
|
+
<li><a href="/page/1">Page 1</a></li>
|
|
117
|
+
<li><a href="/page/2">Page 2</a></li>
|
|
118
|
+
<li><a href="/page/3">Page 3</a></li>
|
|
119
|
+
</ul>
|
|
120
|
+
</body></html>`
|
|
58
121
|
|
|
59
|
-
const html = `<html><body><h1>Header</h1><div><p id="id">1</p><p id="name">Name</p><p class="age">23</p></div></body></html>`
|
|
60
122
|
const parser = new DomParser(html)
|
|
61
123
|
|
|
62
|
-
const
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
124
|
+
const itemModel = new DomExtractionModel({
|
|
125
|
+
label: { query: css("a"), extractor: extractInnerText },
|
|
126
|
+
url: { query: css("a"), extractor: extractHref },
|
|
127
|
+
})
|
|
128
|
+
|
|
129
|
+
const items = parser.extractModels({
|
|
130
|
+
query: css("li"),
|
|
131
|
+
model: itemModel,
|
|
132
|
+
limit: 10 // optional
|
|
133
|
+
})
|
|
134
|
+
|
|
135
|
+
console.log(items)
|
|
136
|
+
// [
|
|
137
|
+
// { label: "Page 1", url: "/page/1" },
|
|
138
|
+
// { label: "Page 2", url: "/page/2" },
|
|
139
|
+
// { label: "Page 3", url: "/page/3" },
|
|
140
|
+
// ]
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## 🧩 `DomExtractionModel` — Declarative Extraction
|
|
146
|
+
|
|
147
|
+
`DomExtractionModel` receives a `shape` — a plain object where each key maps to an extraction descriptor.
|
|
148
|
+
|
|
149
|
+
### Base value descriptor
|
|
150
|
+
|
|
151
|
+
| Field | Type | Required | Description |
|
|
152
|
+
|---|---|---|---|
|
|
153
|
+
| `query` | `QueryConfig` | No | CSS or XPath query to locate the element |
|
|
154
|
+
| `extractor` | `ExtractorFunction` | **Yes** | Function that receives the element and returns a value |
|
|
155
|
+
| `multiple` | `boolean` | No | If `true`, matches all elements and returns an array |
|
|
156
|
+
| `limit` | `number` | No | Max number of elements when `multiple: true` |
|
|
157
|
+
| `default` | `string \| string[] \| null` | No | Fallback value when the element is not found |
|
|
158
|
+
|
|
159
|
+
### Nested model descriptor (sub-parsing)
|
|
160
|
+
|
|
161
|
+
| Field | Type | Required | Description |
|
|
162
|
+
|---|---|---|---|
|
|
163
|
+
| `query` | `QueryConfig` | **Yes** | Query to locate the root element for the nested model |
|
|
164
|
+
| `model` | `ExtractionModel` | **Yes** | Another `DomExtractionModel` to parse the subtree |
|
|
165
|
+
| `multiple` | `boolean` | No | If `true`, applies the model to every matched element |
|
|
166
|
+
| `limit` | `number` | No | Max number of elements when `multiple: true` |
|
|
167
|
+
| `extractor` | `ExtractorFunction` | No | Optional post-processor before feeding into `model.parse()` |
|
|
168
|
+
|
|
169
|
+
### Using XPath
|
|
170
|
+
|
|
171
|
+
```ts
|
|
172
|
+
import { DomExtractionModel, xpath, extract } from "@xcrap/dom"
|
|
173
|
+
|
|
174
|
+
const model = new DomExtractionModel({
|
|
175
|
+
heading: {
|
|
176
|
+
query: xpath("//h1"),
|
|
177
|
+
extractor: extract("innerText")
|
|
178
|
+
}
|
|
179
|
+
})
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Extracting arrays
|
|
183
|
+
|
|
184
|
+
```ts
|
|
185
|
+
import { DomExtractionModel, css, extractInnerText } from "@xcrap/dom"
|
|
186
|
+
|
|
187
|
+
const model = new DomExtractionModel({
|
|
188
|
+
tags: {
|
|
189
|
+
query: css("li"),
|
|
190
|
+
extractor: extractInnerText,
|
|
191
|
+
multiple: true,
|
|
192
|
+
limit: 5
|
|
193
|
+
}
|
|
79
194
|
})
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### Nested models
|
|
80
198
|
|
|
81
|
-
|
|
199
|
+
```ts
|
|
200
|
+
import { DomExtractionModel, css, extractInnerText, extractHref } from "@xcrap/dom"
|
|
82
201
|
|
|
83
|
-
|
|
202
|
+
const linkModel = new DomExtractionModel({
|
|
203
|
+
label: { query: css("a"), extractor: extractInnerText },
|
|
204
|
+
href: { query: css("a"), extractor: extractHref },
|
|
205
|
+
})
|
|
84
206
|
|
|
207
|
+
const pageModel = new DomExtractionModel({
|
|
208
|
+
title: { query: css("h1"), extractor: extractInnerText },
|
|
209
|
+
links: { query: css("li"), model: linkModel, multiple: true },
|
|
210
|
+
})
|
|
85
211
|
```
|
|
86
212
|
|
|
87
|
-
|
|
213
|
+
---
|
|
88
214
|
|
|
89
|
-
|
|
215
|
+
## ⚡ Built-in Extractors
|
|
216
|
+
|
|
217
|
+
Import and use pre-built extractors to avoid repetition:
|
|
218
|
+
|
|
219
|
+
| Extractor | Extracts |
|
|
220
|
+
|---|---|
|
|
221
|
+
| `extractInnerText` | `element.innerText` |
|
|
222
|
+
| `extractTextContent` | `element.textContent` |
|
|
223
|
+
| `extractInnerHtml` | `element.innerHTML` |
|
|
224
|
+
| `extractOuterHtml` | `element.outerHTML` |
|
|
225
|
+
| `extractTagName` | `element.tagName` |
|
|
226
|
+
| `extractClassList` | `element.classList` as `string[]` |
|
|
227
|
+
| `extractId` | `element.id` |
|
|
228
|
+
| `extractHref` | `href` attribute |
|
|
229
|
+
| `extractSrc` | `src` attribute |
|
|
230
|
+
| `extractValue` | `value` attribute |
|
|
231
|
+
| `extractStyle` | `style` attribute |
|
|
232
|
+
| `extractTitle` | `title` attribute |
|
|
233
|
+
| `extractPlaceholder` | `placeholder` attribute |
|
|
234
|
+
| `extractName` | `name` attribute |
|
|
235
|
+
| `extractType` | `type` attribute |
|
|
236
|
+
| `extractDisabled` | `disabled` attribute |
|
|
237
|
+
| `extractChecked` | `checked` attribute |
|
|
238
|
+
| `extractRequired` | `required` attribute |
|
|
239
|
+
| `extractAriaLabel` | `aria-label` attribute |
|
|
240
|
+
| `extractAriaHidden` | `aria-hidden` attribute |
|
|
241
|
+
| `extractAriaExpanded` | `aria-expanded` attribute |
|
|
242
|
+
| `extractChildElementCount` | `element.childElementCount` |
|
|
243
|
+
| `extractLocalName` | `element.localName` |
|
|
244
|
+
| `extractAttribute(name)` | any attribute by name |
|
|
245
|
+
|
|
246
|
+
### Using `extract()` directly
|
|
90
247
|
|
|
91
|
-
|
|
248
|
+
```ts
|
|
249
|
+
import { extract } from "@xcrap/dom"
|
|
92
250
|
|
|
93
|
-
|
|
251
|
+
// Property
|
|
252
|
+
extract("innerText") // → element.innerText
|
|
253
|
+
extract("innerHTML") // → element.innerHTML
|
|
94
254
|
|
|
95
|
-
|
|
255
|
+
// Attribute (second argument = true)
|
|
256
|
+
extract("data-id", true) // → element.getAttribute("data-id")
|
|
257
|
+
extract("href", true) // → element.getAttribute("href")
|
|
258
|
+
```
|
|
96
259
|
|
|
97
|
-
|
|
260
|
+
### Sibling helpers
|
|
98
261
|
|
|
99
|
-
|
|
262
|
+
```ts
|
|
263
|
+
import { fromNextElementSibling, fromPreviousElementSibling, extractInnerText } from "@xcrap/dom"
|
|
100
264
|
|
|
101
|
-
|
|
265
|
+
// Extracts innerText of the NEXT sibling
|
|
266
|
+
const nextText = fromNextElementSibling(extractInnerText)
|
|
267
|
+
|
|
268
|
+
// Extracts innerText of the PREVIOUS sibling
|
|
269
|
+
const prevText = fromPreviousElementSibling(extractInnerText)
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
---
|
|
273
|
+
|
|
274
|
+
## ⚠️ Environment
|
|
275
|
+
|
|
276
|
+
This package depends on browser-native APIs (`DOMParser`, `document.evaluate`, `XPathResult`, `window`, etc.).
|
|
277
|
+
It is designed to run exclusively in **browser environments**:
|
|
278
|
+
|
|
279
|
+
- Browser extensions (Manifest V2 / V3)
|
|
280
|
+
- TamperMonkey / Greasemonkey user scripts
|
|
281
|
+
- In-browser web scraping tools
|
|
102
282
|
|
|
103
|
-
|
|
283
|
+
It will **not** work in Node.js without a DOM emulation layer (e.g., `jsdom` + `linkedom`).
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## 🧪 Testing
|
|
104
288
|
|
|
105
289
|
```bash
|
|
106
290
|
npm run test
|
|
107
|
-
|
|
108
291
|
```
|
|
109
292
|
|
|
293
|
+
---
|
|
294
|
+
|
|
110
295
|
## 🤝 Contributing
|
|
111
296
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
297
|
+
1. Fork the repository.
|
|
298
|
+
2. Create a new branch (`git checkout -b feature/my-feature`).
|
|
299
|
+
3. Commit your changes (`git commit -m 'feat: add my feature'`).
|
|
300
|
+
4. Push to the branch (`git push origin feature/my-feature`).
|
|
301
|
+
5. Open a Pull Request.
|
|
302
|
+
|
|
303
|
+
---
|
|
118
304
|
|
|
119
305
|
## 📝 License
|
|
120
306
|
|
|
121
|
-
This project is licensed under the MIT License.
|
|
307
|
+
This project is licensed under the [MIT License](./LICENSE).
|
package/dist/errors.d.ts
CHANGED
package/dist/errors.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,wBAAyB,SAAQ,KAAK;gBACnC,KAAK,CAAC,EAAE,
|
|
1
|
+
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA;AAE9C,qBAAa,wBAAyB,SAAQ,KAAK;gBACnC,KAAK,CAAC,EAAE,WAAW;CAIlC;AAED,qBAAa,kBAAmB,SAAQ,KAAK;;CAK5C;AAED,qBAAa,kBAAmB,SAAQ,KAAK;gBAC7B,GAAG,EAAE,MAAM;CAG1B;AAED,qBAAa,sBAAuB,SAAQ,KAAK;gBACjC,IAAI,EAAE,MAAM;CAG3B"}
|
package/dist/errors.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export class HTMLElementNotFoundError extends Error {
|
|
2
2
|
constructor(query) {
|
|
3
|
-
super(`Element with query "${query
|
|
3
|
+
super(`Element with query "${query ? JSON.stringify(query) : 'no query provided'}" not found`);
|
|
4
4
|
this.name = "HTMLElementNotFoundError";
|
|
5
5
|
}
|
|
6
6
|
}
|
package/dist/errors.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAEA,MAAM,OAAO,wBAAyB,SAAQ,KAAK;IAC/C,YAAY,KAAmB;QAC3B,KAAK,CAAC,uBAAuB,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,mBAAmB,aAAa,CAAC,CAAA;QAC9F,IAAI,CAAC,IAAI,GAAG,0BAA0B,CAAA;IAC1C,CAAC;CACJ;AAED,MAAM,OAAO,kBAAmB,SAAQ,KAAK;IACzC;QACI,KAAK,CAAC,oCAAoC,CAAC,CAAA;QAC3C,IAAI,CAAC,IAAI,GAAG,oBAAoB,CAAA;IACpC,CAAC;CACJ;AAED,MAAM,OAAO,kBAAmB,SAAQ,KAAK;IACzC,YAAY,GAAW;QACnB,KAAK,CAAC,mBAAmB,GAAG,aAAa,CAAC,CAAA;IAC9C,CAAC;CACJ;AAED,MAAM,OAAO,sBAAuB,SAAQ,KAAK;IAC7C,YAAY,IAAY;QACpB,KAAK,CAAC,wBAAwB,IAAI,aAAa,CAAC,CAAA;IACpD,CAAC;CACJ"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { ExtractionModel } from "./extraction-model.interface.js";
|
|
2
|
+
import { ExtractorFunction } from "./extractors.js";
|
|
3
|
+
import { QueryConfig } from "./query-builders.js";
|
|
4
|
+
export type DomExtractionModelShapeBaseValue = {
|
|
5
|
+
query?: QueryConfig;
|
|
6
|
+
default?: string | string[] | null;
|
|
7
|
+
multiple?: boolean;
|
|
8
|
+
limit?: number;
|
|
9
|
+
extractor: ExtractorFunction;
|
|
10
|
+
};
|
|
11
|
+
export type DomExtractionModelShapeNestedValue = {
|
|
12
|
+
query: QueryConfig;
|
|
13
|
+
limit?: number;
|
|
14
|
+
multiple?: boolean;
|
|
15
|
+
model: ExtractionModel;
|
|
16
|
+
extractor?: ExtractorFunction;
|
|
17
|
+
};
|
|
18
|
+
export type DomExtractionModelValue = DomExtractionModelShapeBaseValue | DomExtractionModelShapeNestedValue;
|
|
19
|
+
export type DomExtractionModelShape = {
|
|
20
|
+
[key: string]: DomExtractionModelValue;
|
|
21
|
+
};
|
|
22
|
+
export type ParseBaseValueReturnType = (undefined | string)[] | string | null | undefined;
|
|
23
|
+
export declare class DomExtractionModel implements ExtractionModel {
|
|
24
|
+
readonly shape: DomExtractionModelShape;
|
|
25
|
+
constructor(shape: DomExtractionModelShape);
|
|
26
|
+
parse(source: string): any;
|
|
27
|
+
protected parseBaseValue(value: DomExtractionModelShapeBaseValue, root: Element): ParseBaseValueReturnType;
|
|
28
|
+
protected parseNestedValue(value: DomExtractionModelShapeNestedValue, root: Element): any;
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=extraction-model.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extraction-model.d.ts","sourceRoot":"","sources":["../src/extraction-model.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAA;AACjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACnD,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAA;AAEjD,MAAM,MAAM,gCAAgC,GAAG;IAC3C,KAAK,CAAC,EAAE,WAAW,CAAA;IACnB,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,IAAI,CAAA;IAClC,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,EAAE,iBAAiB,CAAA;CAC/B,CAAA;AAED,MAAM,MAAM,kCAAkC,GAAG;IAC7C,KAAK,EAAE,WAAW,CAAA;IAClB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,KAAK,EAAE,eAAe,CAAA;IACtB,SAAS,CAAC,EAAE,iBAAiB,CAAA;CAChC,CAAA;AAED,MAAM,MAAM,uBAAuB,GAC7B,gCAAgC,GAChC,kCAAkC,CAAA;AAExC,MAAM,MAAM,uBAAuB,GAAG;IAClC,CAAC,GAAG,EAAE,MAAM,GAAG,uBAAuB,CAAA;CACzC,CAAA;AAED,MAAM,MAAM,wBAAwB,GAC9B,CAAC,SAAS,GAAG,MAAM,CAAC,EAAE,GACtB,MAAM,GACN,IAAI,GACJ,SAAS,CAAA;AAEf,qBAAa,kBAAmB,YAAW,eAAe;IAC1C,QAAQ,CAAC,KAAK,EAAE,uBAAuB;gBAA9B,KAAK,EAAE,uBAAuB;IAEnD,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,GAAG;IAoB1B,SAAS,CAAC,cAAc,CACpB,KAAK,EAAE,gCAAgC,EACvC,IAAI,EAAE,OAAO,GACd,wBAAwB;IA8B3B,SAAS,CAAC,gBAAgB,CACtB,KAAK,EAAE,kCAAkC,EACzC,IAAI,EAAE,OAAO;CAwBpB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extraction-model.interface.d.ts","sourceRoot":"","sources":["../src/extraction-model.interface.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,eAAe;IAC5B,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,GAAG,CAAA;CAC7B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extraction-model.interface.js","sourceRoot":"","sources":["../src/extraction-model.interface.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { MultipleQueryError, HTMLElementNotFoundError } from "./errors.js";
|
|
2
|
+
import { selectManyElements, selectFirstElement } from "./utils/index.js";
|
|
3
|
+
export class DomExtractionModel {
|
|
4
|
+
constructor(shape) {
|
|
5
|
+
this.shape = shape;
|
|
6
|
+
}
|
|
7
|
+
parse(source) {
|
|
8
|
+
const document = new window.DOMParser().parseFromString(source, "text/html");
|
|
9
|
+
const root = document.documentElement;
|
|
10
|
+
const data = {};
|
|
11
|
+
for (const key in this.shape) {
|
|
12
|
+
const value = this.shape[key];
|
|
13
|
+
const isNestedValue = "model" in value;
|
|
14
|
+
if (isNestedValue) {
|
|
15
|
+
data[key] = this.parseNestedValue(value, root);
|
|
16
|
+
}
|
|
17
|
+
else {
|
|
18
|
+
data[key] = this.parseBaseValue(value, root);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
return data;
|
|
22
|
+
}
|
|
23
|
+
parseBaseValue(value, root) {
|
|
24
|
+
if (value.multiple) {
|
|
25
|
+
if (!value.query) {
|
|
26
|
+
throw new MultipleQueryError();
|
|
27
|
+
}
|
|
28
|
+
let elements = selectManyElements(value.query, root);
|
|
29
|
+
if (value.limit !== undefined) {
|
|
30
|
+
elements = elements.slice(0, value.limit);
|
|
31
|
+
}
|
|
32
|
+
return elements.map(element => value.extractor(element));
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
const element = value.query
|
|
36
|
+
? selectFirstElement(value.query, root)
|
|
37
|
+
: root;
|
|
38
|
+
if (!element) {
|
|
39
|
+
if (value.default === undefined) {
|
|
40
|
+
throw new HTMLElementNotFoundError(value.query);
|
|
41
|
+
}
|
|
42
|
+
return value.default;
|
|
43
|
+
}
|
|
44
|
+
return value.extractor(element);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
parseNestedValue(value, root) {
|
|
48
|
+
if (value.multiple) {
|
|
49
|
+
let elements = selectManyElements(value.query, root);
|
|
50
|
+
if (value.limit !== undefined) {
|
|
51
|
+
elements = elements.slice(0, value.limit);
|
|
52
|
+
}
|
|
53
|
+
return elements.map(element => value.model.parse(element.outerHTML));
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
const element = selectFirstElement(value.query, root);
|
|
57
|
+
if (!element) {
|
|
58
|
+
throw new HTMLElementNotFoundError(value.query);
|
|
59
|
+
}
|
|
60
|
+
const source = value.extractor
|
|
61
|
+
? (value.extractor(element))
|
|
62
|
+
: element.outerHTML;
|
|
63
|
+
return value.model.parse(source);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
//# sourceMappingURL=extraction-model.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extraction-model.js","sourceRoot":"","sources":["../src/extraction-model.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,wBAAwB,EAAE,MAAM,aAAa,CAAA;AAC1E,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AAmCzE,MAAM,OAAO,kBAAkB;IAC3B,YAAqB,KAA8B;QAA9B,UAAK,GAAL,KAAK,CAAyB;IAAI,CAAC;IAExD,KAAK,CAAC,MAAc;QAChB,MAAM,QAAQ,GAAG,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC,eAAe,CAAC,MAAM,EAAE,WAAW,CAAC,CAAA;QAC5E,MAAM,IAAI,GAAG,QAAQ,CAAC,eAAe,CAAA;QAErC,MAAM,IAAI,GAAyC,EAAE,CAAA;QAErD,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;YAC7B,MAAM,aAAa,GAAG,OAAO,IAAI,KAAK,CAAA;YAEtC,IAAI,aAAa,EAAE,CAAC;gBAChB,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,IAAI,CAAC,CAAA;YAClD,CAAC;iBAAM,CAAC;gBACJ,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,IAAI,CAAC,CAAA;YAChD,CAAC;QACL,CAAC;QAED,OAAO,IAAI,CAAA;IACf,CAAC;IAES,cAAc,CACpB,KAAuC,EACvC,IAAa;QAEb,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACjB,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;gBACf,MAAM,IAAI,kBAAkB,EAAE,CAAA;YAClC,CAAC;YAED,IAAI,QAAQ,GAAG,kBAAkB,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,CAAC,CAAA;YAEpD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;gBAC5B,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,CAAA;YAC7C,CAAC;YAED,OAAO,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,CAAC,SAAS,CAAC,OAAsB,CAAC,CAAC,CAAA;QAC3E,CAAC;aAAM,CAAC;YACJ,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK;gBACvB,CAAC,CAAC,kBAAkB,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,CAAC;gBACvC,CAAC,CAAC,IAAI,CAAA;YAEV,IAAI,CAAC,OAAO,EAAE,CAAC;gBACX,IAAI,KAAK,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;oBAC9B,MAAM,IAAI,wBAAwB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;gBACnD,CAAC;gBAED,OAAO,KAAK,CAAC,OAAO,CAAA;YACxB,CAAC;YAED,OAAO,KAAK,CAAC,SAAS,CAAC,OAAsB,CAAC,CAAA;QAClD,CAAC;IACL,CAAC;IAES,gBAAgB,CACtB,KAAyC,EACzC,IAAa;QAEb,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACjB,IAAI,QAAQ,GAAG,kBAAkB,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,CAAC,CAAA;YAEpD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;gBAC5B,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,CAAA;YAC7C,CAAC;YAED,OAAO,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAA;QACxE,CAAC;aAAM,CAAC;YACJ,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,CAAC,CAAA;YAErD,IAAI,CAAC,OAAO,EAAE,CAAC;gBACX,MAAM,IAAI,wBAAwB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;YACnD,CAAC;YAED,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS;gBAC1B,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,OAAsB,CAAC,CAAW;gBACrD,CAAC,CAAC,OAAO,CAAC,SAAS,CAAA;YAEvB,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAA;QACpC,CAAC;IACL,CAAC;CACJ"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
export * from "./parser.js";
|
|
2
|
-
export * from "./
|
|
3
|
-
export * from "./
|
|
2
|
+
export * from "./extraction-model.js";
|
|
3
|
+
export * from "./extraction-model.interface.js";
|
|
4
4
|
export * from "./extractors.js";
|
|
5
5
|
export * from "./errors.js";
|
|
6
|
+
export * from "./query-builders.js";
|
|
6
7
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAA;AAC3B,cAAc,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAA;AAC3B,cAAc,uBAAuB,CAAA;AACrC,cAAc,iCAAiC,CAAA;AAC/C,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,qBAAqB,CAAA"}
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
export * from "./parser.js";
|
|
2
|
-
export * from "./
|
|
3
|
-
export * from "./
|
|
2
|
+
export * from "./extraction-model.js";
|
|
3
|
+
export * from "./extraction-model.interface.js";
|
|
4
4
|
export * from "./extractors.js";
|
|
5
5
|
export * from "./errors.js";
|
|
6
|
+
export * from "./query-builders.js";
|
|
6
7
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAA;AAC3B,cAAc,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAA;AAC3B,cAAc,uBAAuB,CAAA;AACrC,cAAc,iCAAiC,CAAA;AAC/C,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,qBAAqB,CAAA"}
|
package/dist/parser.d.ts
CHANGED
|
@@ -1,31 +1,32 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { ExtractionModel } from "./extraction-model.interface.js";
|
|
2
2
|
import { ExtractorFunction } from "./extractors.js";
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
import { QueryConfig } from "./query-builders.js";
|
|
4
|
+
export type ExtractValuesOptions = {
|
|
5
|
+
query: QueryConfig;
|
|
5
6
|
extractor: ExtractorFunction;
|
|
6
7
|
limit?: number;
|
|
7
8
|
};
|
|
8
|
-
export type
|
|
9
|
-
query?:
|
|
9
|
+
export type ExtractValueOptions = {
|
|
10
|
+
query?: QueryConfig;
|
|
10
11
|
extractor: ExtractorFunction;
|
|
11
12
|
default?: string | null;
|
|
12
13
|
};
|
|
13
|
-
export type
|
|
14
|
-
query?:
|
|
15
|
-
model:
|
|
14
|
+
export type ExtractModelOptions = {
|
|
15
|
+
query?: QueryConfig;
|
|
16
|
+
model: ExtractionModel;
|
|
16
17
|
};
|
|
17
|
-
export type
|
|
18
|
-
query:
|
|
19
|
-
model:
|
|
18
|
+
export type ExtractModelsOptions = {
|
|
19
|
+
query: QueryConfig;
|
|
20
|
+
model: ExtractionModel;
|
|
20
21
|
limit?: number;
|
|
21
22
|
};
|
|
22
23
|
export declare class DomParser {
|
|
23
24
|
readonly source: string;
|
|
24
25
|
readonly root: Document;
|
|
25
26
|
constructor(source: string);
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
extractValues({ query, extractor, limit }: ExtractValuesOptions): (string | undefined)[];
|
|
28
|
+
extractValue({ query, extractor, default: default_ }: ExtractValueOptions): any | undefined | null;
|
|
29
|
+
extractModel({ model, query }: ExtractModelOptions): any;
|
|
30
|
+
extractModels({ model, query, limit }: ExtractModelsOptions): any[];
|
|
30
31
|
}
|
|
31
32
|
//# sourceMappingURL=parser.d.ts.map
|
package/dist/parser.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAA;AAEjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACnD,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAA;AAEjD,MAAM,MAAM,oBAAoB,GAAG;IAC/B,KAAK,EAAE,WAAW,CAAA;IAClB,SAAS,EAAE,iBAAiB,CAAA;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAA;CACjB,CAAA;AAED,MAAM,MAAM,mBAAmB,GAAG;IAC9B,KAAK,CAAC,EAAE,WAAW,CAAA;IACnB,SAAS,EAAE,iBAAiB,CAAA;IAC5B,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAC1B,CAAA;AAED,MAAM,MAAM,mBAAmB,GAAG;IAC9B,KAAK,CAAC,EAAE,WAAW,CAAA;IACnB,KAAK,EAAE,eAAe,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,oBAAoB,GAAG;IAC/B,KAAK,EAAE,WAAW,CAAA;IAClB,KAAK,EAAE,eAAe,CAAA;IACtB,KAAK,CAAC,EAAE,MAAM,CAAA;CACjB,CAAA;AAED,qBAAa,SAAS;IAGN,QAAQ,CAAC,MAAM,EAAE,MAAM;IAFnC,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAA;gBAEF,MAAM,EAAE,MAAM;IAInC,aAAa,CAAC,EACV,KAAK,EACL,SAAS,EACT,KAAK,EACR,EAAE,oBAAoB,GAAG,CAAC,MAAM,GAAG,SAAS,CAAC,EAAE;IAchD,YAAY,CAAC,EACT,KAAK,EACL,SAAS,EACT,OAAO,EAAE,QAAQ,EACpB,EAAE,mBAAmB,GAAG,GAAG,GAAG,SAAS,GAAG,IAAI;IAmB/C,YAAY,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,mBAAmB;IAYlD,aAAa,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,oBAAoB;CAa9D"}
|
package/dist/parser.js
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
|
+
import { selectManyElements, selectFirstElement } from "./utils/index.js";
|
|
1
2
|
import { HTMLElementNotFoundError } from "./errors.js";
|
|
2
3
|
export class DomParser {
|
|
3
4
|
constructor(source) {
|
|
4
5
|
this.source = source;
|
|
5
6
|
this.root = new window.DOMParser().parseFromString(source, "text/html");
|
|
6
7
|
}
|
|
7
|
-
|
|
8
|
-
const elements = this.root
|
|
8
|
+
extractValues({ query, extractor, limit }) {
|
|
9
|
+
const elements = selectManyElements(query, this.root);
|
|
9
10
|
const items = [];
|
|
10
11
|
for (const element of elements) {
|
|
11
12
|
if (limit !== undefined && items.length >= limit)
|
|
@@ -15,10 +16,10 @@ export class DomParser {
|
|
|
15
16
|
}
|
|
16
17
|
return items;
|
|
17
18
|
}
|
|
18
|
-
|
|
19
|
+
extractValue({ query, extractor, default: default_ }) {
|
|
19
20
|
let data;
|
|
20
21
|
if (query) {
|
|
21
|
-
const element = this.root
|
|
22
|
+
const element = selectFirstElement(query, this.root);
|
|
22
23
|
if (!element) {
|
|
23
24
|
if (default_ !== undefined)
|
|
24
25
|
return default_;
|
|
@@ -31,17 +32,17 @@ export class DomParser {
|
|
|
31
32
|
}
|
|
32
33
|
return data ?? default_;
|
|
33
34
|
}
|
|
34
|
-
|
|
35
|
+
extractModel({ model, query }) {
|
|
35
36
|
const element = query
|
|
36
|
-
? this.root
|
|
37
|
+
? selectFirstElement(query, this.root)
|
|
37
38
|
: this.root.documentElement;
|
|
38
39
|
if (!element) {
|
|
39
40
|
throw new HTMLElementNotFoundError(query);
|
|
40
41
|
}
|
|
41
42
|
return model.parse(element.outerHTML);
|
|
42
43
|
}
|
|
43
|
-
|
|
44
|
-
const elements = this.root
|
|
44
|
+
extractModels({ model, query, limit }) {
|
|
45
|
+
const elements = selectManyElements(query, this.root);
|
|
45
46
|
const dataList = [];
|
|
46
47
|
for (const element of elements) {
|
|
47
48
|
if (limit !== undefined && dataList.length >= limit)
|
package/dist/parser.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parser.js","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"parser.js","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AAEzE,OAAO,EAAE,wBAAwB,EAAE,MAAM,aAAa,CAAA;AA2BtD,MAAM,OAAO,SAAS;IAGlB,YAAqB,MAAc;QAAd,WAAM,GAAN,MAAM,CAAQ;QAC/B,IAAI,CAAC,IAAI,GAAG,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC,eAAe,CAAC,MAAM,EAAE,WAAW,CAAC,CAAA;IAC3E,CAAC;IAED,aAAa,CAAC,EACV,KAAK,EACL,SAAS,EACT,KAAK,EACc;QACnB,MAAM,QAAQ,GAAG,kBAAkB,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,CAAA;QAErD,MAAM,KAAK,GAA2B,EAAE,CAAA;QAExC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC7B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,MAAM,IAAI,KAAK;gBAAE,MAAK;YACvD,MAAM,IAAI,GAAG,SAAS,CAAC,OAAsB,CAAC,CAAA;YAC9C,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACpB,CAAC;QAED,OAAO,KAAK,CAAA;IAChB,CAAC;IAED,YAAY,CAAC,EACT,KAAK,EACL,SAAS,EACT,OAAO,EAAE,QAAQ,EACC;QAClB,IAAI,IAA4B,CAAA;QAEhC,IAAI,KAAK,EAAE,CAAC;YACR,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,CAAA;YAEpD,IAAI,CAAC,OAAO,EAAE,CAAC;gBACX,IAAI,QAAQ,KAAK,SAAS;oBAAE,OAAO,QAAQ,CAAA;gBAC3C,MAAM,IAAI,wBAAwB,CAAC,KAAK,CAAC,CAAA;YAC7C,CAAC;YAED,IAAI,GAAG,SAAS,CAAC,OAAsB,CAAC,CAAA;QAC5C,CAAC;aAAM,CAAC;YACJ,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,CAAA;QAC/C,CAAC;QAED,OAAO,IAAI,IAAI,QAAQ,CAAA;IAC3B,CAAC;IAED,YAAY,CAAC,EAAE,KAAK,EAAE,KAAK,EAAuB;QAC9C,MAAM,OAAO,GAAG,KAAK;YACjB,CAAC,CAAC,kBAAkB,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC;YACtC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAA;QAE/B,IAAI,CAAC,OAAO,EAAE,CAAC;YACX,MAAM,IAAI,wBAAwB,CAAC,KAAK,CAAC,CAAA;QAC7C,CAAC;QAED,OAAO,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;IACzC,CAAC;IAED,aAAa,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAwB;QACvD,MAAM,QAAQ,GAAG,kBAAkB,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,CAAA;QAErD,MAAM,QAAQ,GAAU,EAAE,CAAA;QAE1B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC7B,IAAI,KAAK,KAAK,SAAS,IAAI,QAAQ,CAAC,MAAM,IAAI,KAAK;gBAAE,MAAK;YAC1D,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;YAC3C,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACvB,CAAC;QAED,OAAO,QAAQ,CAAA;IACnB,CAAC;CACJ"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"query-builders.d.ts","sourceRoot":"","sources":["../src/query-builders.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,WAAW,GAAG;IACtB,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,KAAK,GAAG,OAAO,CAAA;CACxB,CAAA;AAED,wBAAgB,GAAG,CAAC,KAAK,EAAE,MAAM,GAAG,WAAW,CAK9C;AAED,wBAAgB,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,WAAW,CAKhD"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"query-builders.js","sourceRoot":"","sources":["../src/query-builders.ts"],"names":[],"mappings":"AAKA,MAAM,UAAU,GAAG,CAAC,KAAa;IAC7B,OAAO;QACH,KAAK,EAAE,KAAK;QACZ,IAAI,EAAE,KAAK;KACd,CAAA;AACL,CAAC;AAED,MAAM,UAAU,KAAK,CAAC,KAAa;IAC/B,OAAO;QACH,KAAK,EAAE,KAAK;QACZ,IAAI,EAAE,OAAO;KAChB,CAAA;AACL,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA,cAAc,2BAA2B,CAAA;AACzC,cAAc,2BAA2B,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA,cAAc,2BAA2B,CAAA;AACzC,cAAc,2BAA2B,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"select-first-element.d.ts","sourceRoot":"","sources":["../../src/utils/select-first-element.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAGlD,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,GAAG,QAAQ,GAAG,OAAO,GAAG,IAAI,CAc/F"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { toDocument } from "./to-document.js";
|
|
2
|
+
export function selectFirstElement(query, root) {
|
|
3
|
+
if (query.type === "css") {
|
|
4
|
+
return root.querySelector(query.value);
|
|
5
|
+
}
|
|
6
|
+
else {
|
|
7
|
+
const document = toDocument(root);
|
|
8
|
+
return document.evaluate(query.value, root, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=select-first-element.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"select-first-element.js","sourceRoot":"","sources":["../../src/utils/select-first-element.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAE7C,MAAM,UAAU,kBAAkB,CAAC,KAAkB,EAAE,IAAwB;IAC3E,IAAI,KAAK,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;QACvB,OAAO,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;IAC1C,CAAC;SAAM,CAAC;QACJ,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAA;QAEjC,OAAO,QAAQ,CAAC,QAAQ,CACpB,KAAK,CAAC,KAAK,EACX,IAAI,EACJ,IAAI,EACJ,WAAW,CAAC,uBAAuB,EACnC,IAAI,CACP,CAAC,eAAiC,CAAA;IACvC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"select-many-elements.d.ts","sourceRoot":"","sources":["../../src/utils/select-many-elements.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAGlD,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,GAAG,QAAQ,GAAG,OAAO,EAAE,CAwB1F"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { toDocument } from "./to-document.js";
|
|
2
|
+
export function selectManyElements(query, root) {
|
|
3
|
+
if (query.type === "css") {
|
|
4
|
+
return Array.from(root.querySelectorAll(query.value));
|
|
5
|
+
}
|
|
6
|
+
else {
|
|
7
|
+
const document = toDocument(root);
|
|
8
|
+
const result = document.evaluate(query.value, root, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
|
|
9
|
+
const elements = [];
|
|
10
|
+
let node = result.iterateNext();
|
|
11
|
+
while (node) {
|
|
12
|
+
elements.push(node);
|
|
13
|
+
node = result.iterateNext();
|
|
14
|
+
}
|
|
15
|
+
return elements;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=select-many-elements.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"select-many-elements.js","sourceRoot":"","sources":["../../src/utils/select-many-elements.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAE7C,MAAM,UAAU,kBAAkB,CAAC,KAAkB,EAAE,IAAwB;IAC3E,IAAI,KAAK,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;QACvB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAA;IACzD,CAAC;SAAM,CAAC;QACJ,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAA;QAEjC,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAC5B,KAAK,CAAC,KAAK,EACX,IAAI,EACJ,IAAI,EACJ,WAAW,CAAC,0BAA0B,EACtC,IAAI,CACP,CAAA;QAED,MAAM,QAAQ,GAAc,EAAE,CAAA;QAC9B,IAAI,IAAI,GAAG,MAAM,CAAC,WAAW,EAAE,CAAA;QAE/B,OAAO,IAAI,EAAE,CAAC;YACV,QAAQ,CAAC,IAAI,CAAC,IAAe,CAAC,CAAA;YAC9B,IAAI,GAAG,MAAM,CAAC,WAAW,EAAE,CAAA;QAC/B,CAAC;QAED,OAAO,QAAQ,CAAA;IACnB,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"to-document.d.ts","sourceRoot":"","sources":["../../src/utils/to-document.ts"],"names":[],"mappings":"AAAA,wBAAgB,UAAU,CAAC,IAAI,EAAE,OAAO,GAAG,QAAQ,GAAG,QAAQ,CAM7D"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"to-document.js","sourceRoot":"","sources":["../../src/utils/to-document.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,UAAU,CAAC,IAAwB;IAC/C,IAAI,IAAI,YAAY,QAAQ,EAAE,CAAC;QAC3B,OAAO,IAAI,CAAA;IACf,CAAC;IAED,OAAO,IAAI,SAAS,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,SAAS,EAAE,WAAW,CAAC,CAAA;AACvE,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@xcrap/dom",
|
|
3
|
-
"version": "0.0
|
|
4
|
-
"description": "DOM
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Xcrap DOM is a client-side package designed for DOM data extraction using declarative models. It works natively in browser environments (browser extensions, TamperMonkey user scripts, etc.) and supports both CSS selectors and XPath queries out of the box.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
],
|
|
22
22
|
"author": "Marcuth",
|
|
23
23
|
"license": "MIT",
|
|
24
|
-
"type": "
|
|
24
|
+
"type": "module",
|
|
25
25
|
"devDependencies": {
|
|
26
26
|
"@types/jest": "^30.0.0",
|
|
27
27
|
"ts-jest": "^29.4.6",
|