@xcrap/dom 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +242 -56
  2. package/dist/errors.d.ts +2 -1
  3. package/dist/errors.d.ts.map +1 -1
  4. package/dist/errors.js +1 -1
  5. package/dist/errors.js.map +1 -1
  6. package/dist/extraction-model.d.ts +30 -0
  7. package/dist/extraction-model.d.ts.map +1 -0
  8. package/dist/extraction-model.interface.d.ts +4 -0
  9. package/dist/extraction-model.interface.d.ts.map +1 -0
  10. package/dist/extraction-model.interface.js +2 -0
  11. package/dist/extraction-model.interface.js.map +1 -0
  12. package/dist/extraction-model.js +67 -0
  13. package/dist/extraction-model.js.map +1 -0
  14. package/dist/index.d.ts +3 -2
  15. package/dist/index.d.ts.map +1 -1
  16. package/dist/index.js +3 -2
  17. package/dist/index.js.map +1 -1
  18. package/dist/parser.d.ts +16 -15
  19. package/dist/parser.d.ts.map +1 -1
  20. package/dist/parser.js +9 -8
  21. package/dist/parser.js.map +1 -1
  22. package/dist/query-builders.d.ts +7 -0
  23. package/dist/query-builders.d.ts.map +1 -0
  24. package/dist/query-builders.js +13 -0
  25. package/dist/query-builders.js.map +1 -0
  26. package/dist/utils/index.d.ts +3 -0
  27. package/dist/utils/index.d.ts.map +1 -0
  28. package/dist/utils/index.js +3 -0
  29. package/dist/utils/index.js.map +1 -0
  30. package/dist/utils/select-first-element.d.ts +3 -0
  31. package/dist/utils/select-first-element.d.ts.map +1 -0
  32. package/dist/utils/select-first-element.js +11 -0
  33. package/dist/utils/select-first-element.js.map +1 -0
  34. package/dist/utils/select-many-elements.d.ts +3 -0
  35. package/dist/utils/select-many-elements.d.ts.map +1 -0
  36. package/dist/utils/select-many-elements.js +18 -0
  37. package/dist/utils/select-many-elements.js.map +1 -0
  38. package/dist/utils/to-document.d.ts +2 -0
  39. package/dist/utils/to-document.d.ts.map +1 -0
  40. package/dist/utils/to-document.js +7 -0
  41. package/dist/utils/to-document.js.map +1 -0
  42. package/package.json +3 -3
package/README.md CHANGED
@@ -1,6 +1,8 @@
1
- # 🕷️ Xcrap DOM Parser: Parsing HTML using declarative models
1
+ # 🕷️ Xcrap DOM Extractor
2
2
 
3
- Xcrap DOM is a package from the Xcrap framework, designed to handle DOM data extraction (client-side) using declarative models. It is perfect for use in web scraping extensions and TamperMonkey user scripts.
3
+ > Extracting data from HTML using declarative models part of the [Xcrap](https://github.com/xcrap) framework.
4
+
5
+ Xcrap DOM is a **client-side** package designed for DOM data extraction using declarative models. It works natively in browser environments (browser extensions, TamperMonkey user scripts, etc.) and supports both **CSS selectors** and **XPath** queries out of the box.
4
6
 
5
7
  ---
6
8
 
@@ -15,107 +17,291 @@ npm i @xcrap/dom
15
17
 
16
18
  ---
17
19
 
18
- ## 🛠️ How to Use
20
+ ## 🛠️ Quick Start
19
21
 
20
- There are several ways to use this parsing engine, from using pre-made models to expanding it by creating parsers for other file types and maintaining the interlocking of these models.
22
+ ### Creating a parser
21
23
 
22
- ### Providing an HTML string
24
+ `DomParser` receives an HTML string and parses it into a document internally.
23
25
 
24
26
  ```ts
25
27
  import { DomParser } from "@xcrap/dom"
26
28
 
27
- const html = "<html><head><title>Page Title</title></head><body></body></html>" // or document.documentElement.outerHTML
29
+ const html = document.documentElement.outerHTML
28
30
  const parser = new DomParser(html)
31
+ ```
32
+
33
+ ---
34
+
35
+ ## 🔍 Query Builders
29
36
 
37
+ Instead of passing raw strings as queries, use the `css()` and `xpath()` helpers to build typed `QueryConfig` objects.
38
+
39
+ ```ts
40
+ import { css, xpath } from "@xcrap/dom"
41
+
42
+ css("h1") // { type: "css", value: "h1" }
43
+ xpath("//h1") // { type: "xpath", value: "//h1" }
30
44
  ```
31
45
 
32
- ### Data extraction without using models
46
+ ---
47
+
48
+ ## 📤 Extracting Data
49
+
50
+ ### `extractValue` — single value from a single element
33
51
 
34
52
  ```ts
35
- import { DomParser, extract } from "@xcrap/dom"
53
+ import { DomParser, css, extract } from "@xcrap/dom"
36
54
 
37
- const html = `<html><head><title>Page Title</title></head><body><a href="https://example.com">Link</a></body></html>`
55
+ const html = `<html><body><h1>Hello World</h1></body></html>`
38
56
  const parser = new DomParser(html)
39
57
 
40
- // parseFirst() searches for and extracts something from the first element found
41
- // extract(key: string, isAttribute?: boolean) is a generic extraction function; you can use some that are already created and ready for use by importing them from the same location :)
42
- const title = parser.parseFirst({ query: "title", extractor: extract("innerText") })
58
+ const title = parser.extractValue({
59
+ query: css("h1"),
60
+ extractor: extract("innerText"),
61
+ default: null // returned if the element is not found
62
+ })
63
+
64
+ console.log(title) // "Hello World"
65
+ ```
66
+
67
+ ### `extractValues` — one value per matched element
43
68
 
44
- // parseMany() searches for all elements matching a query (you can limit the number of results) and uses the extractor to get the data
45
- const links = parser.parseMany({ query: "a", extractor: extract("href", true) })
69
+ ```ts
70
+ import { DomParser, css, extractHref } from "@xcrap/dom"
46
71
 
47
- console.log(title) // "Page Title"
48
- console.log(links) // ["https://example.com"]
72
+ const html = `<html><body><a href="/a">A</a><a href="/b">B</a></body></html>`
73
+ const parser = new DomParser(html)
49
74
 
75
+ const links = parser.extractValues({
76
+ query: css("a"),
77
+ extractor: extractHref,
78
+ limit: 10 // optional
79
+ })
80
+
81
+ console.log(links) // ["/a", "/b"]
50
82
  ```
51
83
 
52
- ### Data extraction using models
84
+ ### `extractModel` parse a subtree with a `DomExtractionModel`
53
85
 
54
- ParsingModels are sufficiently decoupled so that you don't have to rely on Parser instances, but we will use them here nonetheless:
86
+ ```ts
87
+ import { DomParser, DomExtractionModel, css, extract } from "@xcrap/dom"
88
+
89
+ const html = `<html><body>
90
+ <h1>Header</h1>
91
+ <p id="user-id">42</p>
92
+ <p class="username">john_doe</p>
93
+ </body></html>`
94
+
95
+ const parser = new DomParser(html)
96
+
97
+ const model = new DomExtractionModel({
98
+ heading: { query: css("h1"), extractor: extract("innerText") },
99
+ userId: { query: css("#user-id"), extractor: extract("innerText") },
100
+ username: { query: css(".username"), extractor: extract("innerText") },
101
+ })
102
+
103
+ const data = parser.extractModel({ model })
104
+
105
+ console.log(data)
106
+ // { heading: "Header", userId: "42", username: "john_doe" }
107
+ ```
108
+
109
+ ### `extractModels` — parse a list of elements, each with the same model
55
110
 
56
111
  ```ts
57
- import { DomParser, DomParsingModel, extract } from "@xcrap/dom"
112
+ import { DomParser, DomExtractionModel, css, extractInnerText, extractHref } from "@xcrap/dom"
113
+
114
+ const html = `<html><body>
115
+ <ul>
116
+ <li><a href="/page/1">Page 1</a></li>
117
+ <li><a href="/page/2">Page 2</a></li>
118
+ <li><a href="/page/3">Page 3</a></li>
119
+ </ul>
120
+ </body></html>`
58
121
 
59
- const html = `<html><body><h1>Header</h1><div><p id="id">1</p><p id="name">Name</p><p class="age">23</p></div></body></html>`
60
122
  const parser = new DomParser(html)
61
123
 
62
- const rootParsingModel = new DomParsingModel({
63
- heading: {
64
- query: "h1",
65
- extractor: extract("innerText")
66
- },
67
- id: {
68
- query: "#id",
69
- extractor: extract("innerText")
70
- },
71
- name: {
72
- query: "#name",
73
- extractor: extract("innerText")
74
- },
75
- age: {
76
- query: ".age",
77
- extractor: extract("innerText")
78
- }
124
+ const itemModel = new DomExtractionModel({
125
+ label: { query: css("a"), extractor: extractInnerText },
126
+ url: { query: css("a"), extractor: extractHref },
127
+ })
128
+
129
+ const items = parser.extractModels({
130
+ query: css("li"),
131
+ model: itemModel,
132
+ limit: 10 // optional
133
+ })
134
+
135
+ console.log(items)
136
+ // [
137
+ // { label: "Page 1", url: "/page/1" },
138
+ // { label: "Page 2", url: "/page/2" },
139
+ // { label: "Page 3", url: "/page/3" },
140
+ // ]
141
+ ```
142
+
143
+ ---
144
+
145
+ ## 🧩 `DomExtractionModel` — Declarative Extraction
146
+
147
+ `DomExtractionModel` receives a `shape` — a plain object where each key maps to an extraction descriptor.
148
+
149
+ ### Base value descriptor
150
+
151
+ | Field | Type | Required | Description |
152
+ |---|---|---|---|
153
+ | `query` | `QueryConfig` | No | CSS or XPath query to locate the element |
154
+ | `extractor` | `ExtractorFunction` | **Yes** | Function that receives the element and returns a value |
155
+ | `multiple` | `boolean` | No | If `true`, matches all elements and returns an array |
156
+ | `limit` | `number` | No | Max number of elements when `multiple: true` |
157
+ | `default` | `string \| string[] \| null` | No | Fallback value when the element is not found |
158
+
159
+ ### Nested model descriptor (sub-parsing)
160
+
161
+ | Field | Type | Required | Description |
162
+ |---|---|---|---|
163
+ | `query` | `QueryConfig` | **Yes** | Query to locate the root element for the nested model |
164
+ | `model` | `ExtractionModel` | **Yes** | Another `DomExtractionModel` to parse the subtree |
165
+ | `multiple` | `boolean` | No | If `true`, applies the model to every matched element |
166
+ | `limit` | `number` | No | Max number of elements when `multiple: true` |
167
+ | `extractor` | `ExtractorFunction` | No | Optional post-processor before feeding into `model.parse()` |
168
+
169
+ ### Using XPath
170
+
171
+ ```ts
172
+ import { DomExtractionModel, xpath, extract } from "@xcrap/dom"
173
+
174
+ const model = new DomExtractionModel({
175
+ heading: {
176
+ query: xpath("//h1"),
177
+ extractor: extract("innerText")
178
+ }
179
+ })
180
+ ```
181
+
182
+ ### Extracting arrays
183
+
184
+ ```ts
185
+ import { DomExtractionModel, css, extractInnerText } from "@xcrap/dom"
186
+
187
+ const model = new DomExtractionModel({
188
+ tags: {
189
+ query: css("li"),
190
+ extractor: extractInnerText,
191
+ multiple: true,
192
+ limit: 5
193
+ }
79
194
  })
195
+ ```
196
+
197
+ ### Nested models
80
198
 
81
- const data = parser.extractFirst({ model: rootParsingModel })
199
+ ```ts
200
+ import { DomExtractionModel, css, extractInnerText, extractHref } from "@xcrap/dom"
82
201
 
83
- console.log(data) // { heading: "Header", id: "1", name: "Name", age: "23" }
202
+ const linkModel = new DomExtractionModel({
203
+ label: { query: css("a"), extractor: extractInnerText },
204
+ href: { query: css("a"), extractor: extractHref },
205
+ })
84
206
 
207
+ const pageModel = new DomExtractionModel({
208
+ title: { query: css("h1"), extractor: extractInnerText },
209
+ links: { query: css("li"), model: linkModel, multiple: true },
210
+ })
85
211
  ```
86
212
 
87
- ## 🧠 Create your own Parser: Concepts
213
+ ---
88
214
 
89
- ### What is a Parser?
215
+ ## Built-in Extractors
216
+
217
+ Import and use pre-built extractors to avoid repetition:
218
+
219
+ | Extractor | Extracts |
220
+ |---|---|
221
+ | `extractInnerText` | `element.innerText` |
222
+ | `extractTextContent` | `element.textContent` |
223
+ | `extractInnerHtml` | `element.innerHTML` |
224
+ | `extractOuterHtml` | `element.outerHTML` |
225
+ | `extractTagName` | `element.tagName` |
226
+ | `extractClassList` | `element.classList` as `string[]` |
227
+ | `extractId` | `element.id` |
228
+ | `extractHref` | `href` attribute |
229
+ | `extractSrc` | `src` attribute |
230
+ | `extractValue` | `value` attribute |
231
+ | `extractStyle` | `style` attribute |
232
+ | `extractTitle` | `title` attribute |
233
+ | `extractPlaceholder` | `placeholder` attribute |
234
+ | `extractName` | `name` attribute |
235
+ | `extractType` | `type` attribute |
236
+ | `extractDisabled` | `disabled` attribute |
237
+ | `extractChecked` | `checked` attribute |
238
+ | `extractRequired` | `required` attribute |
239
+ | `extractAriaLabel` | `aria-label` attribute |
240
+ | `extractAriaHidden` | `aria-hidden` attribute |
241
+ | `extractAriaExpanded` | `aria-expanded` attribute |
242
+ | `extractChildElementCount` | `element.childElementCount` |
243
+ | `extractLocalName` | `element.localName` |
244
+ | `extractAttribute(name)` | any attribute by name |
245
+
246
+ ### Using `extract()` directly
90
247
 
91
- A Parser for this library is a class that handles a file type in some way, loads that file, and may or may not have methods to easily extract data.
248
+ ```ts
249
+ import { extract } from "@xcrap/dom"
92
250
 
93
- A parser has a default method called `parseModel`, which is a wrapper that receives a `ParsingModel` and calls the `parse()` method, providing the internal `source` property.
251
+ // Property
252
+ extract("innerText") // → element.innerText
253
+ extract("innerHTML") // → element.innerHTML
94
254
 
95
- ### What is a ParsingModel?
255
+ // Attribute (second argument = true)
256
+ extract("data-id", true) // → element.getAttribute("data-id")
257
+ extract("href", true) // → element.getAttribute("href")
258
+ ```
96
259
 
97
- A Parsing Model is a class that receives a `shape` in its constructor and stores it as a property. It must have a method called `parse()` that receives a `source`, which is the code/text containing the information to be extracted.
260
+ ### Sibling helpers
98
261
 
99
- This `shape` is used to declare how the information will be extracted from the `source`.
262
+ ```ts
263
+ import { fromNextElementSibling, fromPreviousElementSibling, extractInnerText } from "@xcrap/dom"
100
264
 
101
- ## 🧪 Testing
265
+ // Extracts innerText of the NEXT sibling
266
+ const nextText = fromNextElementSibling(extractInnerText)
267
+
268
+ // Extracts innerText of the PREVIOUS sibling
269
+ const prevText = fromPreviousElementSibling(extractInnerText)
270
+ ```
271
+
272
+ ---
273
+
274
+ ## ⚠️ Environment
275
+
276
+ This package depends on browser-native APIs (`DOMParser`, `document.evaluate`, `XPathResult`, `window`, etc.).
277
+ It is designed to run exclusively in **browser environments**:
278
+
279
+ - Browser extensions (Manifest V2 / V3)
280
+ - TamperMonkey / Greasemonkey user scripts
281
+ - In-browser web scraping tools
102
282
 
103
- Automated tests are located in `__tests__`. To run them:
283
+ It will **not** work in Node.js without a DOM emulation layer (e.g., `jsdom` + `linkedom`).
284
+
285
+ ---
286
+
287
+ ## 🧪 Testing
104
288
 
105
289
  ```bash
106
290
  npm run test
107
-
108
291
  ```
109
292
 
293
+ ---
294
+
110
295
  ## 🤝 Contributing
111
296
 
112
- * Want to contribute? Follow these steps:
113
- * Fork the repository.
114
- * Create a new branch (`git checkout -b feature-new`).
115
- * Commit your changes (`git commit -m 'Add new feature'`).
116
- * Push to the branch (`git push origin feature-new`).
117
- * Open a Pull Request.
297
+ 1. Fork the repository.
298
+ 2. Create a new branch (`git checkout -b feature/my-feature`).
299
+ 3. Commit your changes (`git commit -m 'feat: add my feature'`).
300
+ 4. Push to the branch (`git push origin feature/my-feature`).
301
+ 5. Open a Pull Request.
302
+
303
+ ---
118
304
 
119
305
  ## 📝 License
120
306
 
121
- This project is licensed under the MIT License.
307
+ This project is licensed under the [MIT License](./LICENSE).
package/dist/errors.d.ts CHANGED
@@ -1,5 +1,6 @@
1
+ import { QueryConfig } from "./query-builders";
1
2
  export declare class HTMLElementNotFoundError extends Error {
2
- constructor(query?: string);
3
+ constructor(query?: QueryConfig);
3
4
  }
4
5
  export declare class MultipleQueryError extends Error {
5
6
  constructor();
@@ -1 +1 @@
1
- {"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,wBAAyB,SAAQ,KAAK;gBACnC,KAAK,CAAC,EAAE,MAAM;CAI7B;AAED,qBAAa,kBAAmB,SAAQ,KAAK;;CAK5C;AAED,qBAAa,kBAAmB,SAAQ,KAAK;gBAC7B,GAAG,EAAE,MAAM;CAG1B;AAED,qBAAa,sBAAuB,SAAQ,KAAK;gBACjC,IAAI,EAAE,MAAM;CAG3B"}
1
+ {"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA;AAE9C,qBAAa,wBAAyB,SAAQ,KAAK;gBACnC,KAAK,CAAC,EAAE,WAAW;CAIlC;AAED,qBAAa,kBAAmB,SAAQ,KAAK;;CAK5C;AAED,qBAAa,kBAAmB,SAAQ,KAAK;gBAC7B,GAAG,EAAE,MAAM;CAG1B;AAED,qBAAa,sBAAuB,SAAQ,KAAK;gBACjC,IAAI,EAAE,MAAM;CAG3B"}
package/dist/errors.js CHANGED
@@ -1,6 +1,6 @@
1
1
  export class HTMLElementNotFoundError extends Error {
2
2
  constructor(query) {
3
- super(`Element with query "${query || 'no query provided'}" not found`);
3
+ super(`Element with query "${query ? JSON.stringify(query) : 'no query provided'}" not found`);
4
4
  this.name = "HTMLElementNotFoundError";
5
5
  }
6
6
  }
@@ -1 +1 @@
1
- {"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,wBAAyB,SAAQ,KAAK;IAC/C,YAAY,KAAc;QACtB,KAAK,CAAC,uBAAuB,KAAK,IAAI,mBAAmB,aAAa,CAAC,CAAA;QACvE,IAAI,CAAC,IAAI,GAAG,0BAA0B,CAAA;IAC1C,CAAC;CACJ;AAED,MAAM,OAAO,kBAAmB,SAAQ,KAAK;IACzC;QACI,KAAK,CAAC,oCAAoC,CAAC,CAAA;QAC3C,IAAI,CAAC,IAAI,GAAG,oBAAoB,CAAA;IACpC,CAAC;CACJ;AAED,MAAM,OAAO,kBAAmB,SAAQ,KAAK;IACzC,YAAY,GAAW;QACnB,KAAK,CAAC,mBAAmB,GAAG,aAAa,CAAC,CAAA;IAC9C,CAAC;CACJ;AAED,MAAM,OAAO,sBAAuB,SAAQ,KAAK;IAC7C,YAAY,IAAY;QACpB,KAAK,CAAC,wBAAwB,IAAI,aAAa,CAAC,CAAA;IACpD,CAAC;CACJ"}
1
+ {"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAEA,MAAM,OAAO,wBAAyB,SAAQ,KAAK;IAC/C,YAAY,KAAmB;QAC3B,KAAK,CAAC,uBAAuB,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,mBAAmB,aAAa,CAAC,CAAA;QAC9F,IAAI,CAAC,IAAI,GAAG,0BAA0B,CAAA;IAC1C,CAAC;CACJ;AAED,MAAM,OAAO,kBAAmB,SAAQ,KAAK;IACzC;QACI,KAAK,CAAC,oCAAoC,CAAC,CAAA;QAC3C,IAAI,CAAC,IAAI,GAAG,oBAAoB,CAAA;IACpC,CAAC;CACJ;AAED,MAAM,OAAO,kBAAmB,SAAQ,KAAK;IACzC,YAAY,GAAW;QACnB,KAAK,CAAC,mBAAmB,GAAG,aAAa,CAAC,CAAA;IAC9C,CAAC;CACJ;AAED,MAAM,OAAO,sBAAuB,SAAQ,KAAK;IAC7C,YAAY,IAAY;QACpB,KAAK,CAAC,wBAAwB,IAAI,aAAa,CAAC,CAAA;IACpD,CAAC;CACJ"}
@@ -0,0 +1,30 @@
1
+ import { ExtractionModel } from "./extraction-model.interface.js";
2
+ import { ExtractorFunction } from "./extractors.js";
3
+ import { QueryConfig } from "./query-builders.js";
4
+ export type DomExtractionModelShapeBaseValue = {
5
+ query?: QueryConfig;
6
+ default?: string | string[] | null;
7
+ multiple?: boolean;
8
+ limit?: number;
9
+ extractor: ExtractorFunction;
10
+ };
11
+ export type DomExtractionModelShapeNestedValue = {
12
+ query: QueryConfig;
13
+ limit?: number;
14
+ multiple?: boolean;
15
+ model: ExtractionModel;
16
+ extractor?: ExtractorFunction;
17
+ };
18
+ export type DomExtractionModelValue = DomExtractionModelShapeBaseValue | DomExtractionModelShapeNestedValue;
19
+ export type DomExtractionModelShape = {
20
+ [key: string]: DomExtractionModelValue;
21
+ };
22
+ export type ParseBaseValueReturnType = (undefined | string)[] | string | null | undefined;
23
+ export declare class DomExtractionModel implements ExtractionModel {
24
+ readonly shape: DomExtractionModelShape;
25
+ constructor(shape: DomExtractionModelShape);
26
+ parse(source: string): any;
27
+ protected parseBaseValue(value: DomExtractionModelShapeBaseValue, root: Element): ParseBaseValueReturnType;
28
+ protected parseNestedValue(value: DomExtractionModelShapeNestedValue, root: Element): any;
29
+ }
30
+ //# sourceMappingURL=extraction-model.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extraction-model.d.ts","sourceRoot":"","sources":["../src/extraction-model.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAA;AACjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACnD,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAA;AAEjD,MAAM,MAAM,gCAAgC,GAAG;IAC3C,KAAK,CAAC,EAAE,WAAW,CAAA;IACnB,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,IAAI,CAAA;IAClC,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,EAAE,iBAAiB,CAAA;CAC/B,CAAA;AAED,MAAM,MAAM,kCAAkC,GAAG;IAC7C,KAAK,EAAE,WAAW,CAAA;IAClB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,KAAK,EAAE,eAAe,CAAA;IACtB,SAAS,CAAC,EAAE,iBAAiB,CAAA;CAChC,CAAA;AAED,MAAM,MAAM,uBAAuB,GAC7B,gCAAgC,GAChC,kCAAkC,CAAA;AAExC,MAAM,MAAM,uBAAuB,GAAG;IAClC,CAAC,GAAG,EAAE,MAAM,GAAG,uBAAuB,CAAA;CACzC,CAAA;AAED,MAAM,MAAM,wBAAwB,GAC9B,CAAC,SAAS,GAAG,MAAM,CAAC,EAAE,GACtB,MAAM,GACN,IAAI,GACJ,SAAS,CAAA;AAEf,qBAAa,kBAAmB,YAAW,eAAe;IAC1C,QAAQ,CAAC,KAAK,EAAE,uBAAuB;gBAA9B,KAAK,EAAE,uBAAuB;IAEnD,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,GAAG;IAoB1B,SAAS,CAAC,cAAc,CACpB,KAAK,EAAE,gCAAgC,EACvC,IAAI,EAAE,OAAO,GACd,wBAAwB;IA8B3B,SAAS,CAAC,gBAAgB,CACtB,KAAK,EAAE,kCAAkC,EACzC,IAAI,EAAE,OAAO;CAwBpB"}
@@ -0,0 +1,4 @@
1
+ export interface ExtractionModel {
2
+ parse(source: string): any;
3
+ }
4
+ //# sourceMappingURL=extraction-model.interface.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extraction-model.interface.d.ts","sourceRoot":"","sources":["../src/extraction-model.interface.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,eAAe;IAC5B,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,GAAG,CAAA;CAC7B"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=extraction-model.interface.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extraction-model.interface.js","sourceRoot":"","sources":["../src/extraction-model.interface.ts"],"names":[],"mappings":""}
@@ -0,0 +1,67 @@
1
+ import { MultipleQueryError, HTMLElementNotFoundError } from "./errors.js";
2
+ import { selectManyElements, selectFirstElement } from "./utils/index.js";
3
+ export class DomExtractionModel {
4
+ constructor(shape) {
5
+ this.shape = shape;
6
+ }
7
+ parse(source) {
8
+ const document = new window.DOMParser().parseFromString(source, "text/html");
9
+ const root = document.documentElement;
10
+ const data = {};
11
+ for (const key in this.shape) {
12
+ const value = this.shape[key];
13
+ const isNestedValue = "model" in value;
14
+ if (isNestedValue) {
15
+ data[key] = this.parseNestedValue(value, root);
16
+ }
17
+ else {
18
+ data[key] = this.parseBaseValue(value, root);
19
+ }
20
+ }
21
+ return data;
22
+ }
23
+ parseBaseValue(value, root) {
24
+ if (value.multiple) {
25
+ if (!value.query) {
26
+ throw new MultipleQueryError();
27
+ }
28
+ let elements = selectManyElements(value.query, root);
29
+ if (value.limit !== undefined) {
30
+ elements = elements.slice(0, value.limit);
31
+ }
32
+ return elements.map(element => value.extractor(element));
33
+ }
34
+ else {
35
+ const element = value.query
36
+ ? selectFirstElement(value.query, root)
37
+ : root;
38
+ if (!element) {
39
+ if (value.default === undefined) {
40
+ throw new HTMLElementNotFoundError(value.query);
41
+ }
42
+ return value.default;
43
+ }
44
+ return value.extractor(element);
45
+ }
46
+ }
47
+ parseNestedValue(value, root) {
48
+ if (value.multiple) {
49
+ let elements = selectManyElements(value.query, root);
50
+ if (value.limit !== undefined) {
51
+ elements = elements.slice(0, value.limit);
52
+ }
53
+ return elements.map(element => value.model.parse(element.outerHTML));
54
+ }
55
+ else {
56
+ const element = selectFirstElement(value.query, root);
57
+ if (!element) {
58
+ throw new HTMLElementNotFoundError(value.query);
59
+ }
60
+ const source = value.extractor
61
+ ? (value.extractor(element))
62
+ : element.outerHTML;
63
+ return value.model.parse(source);
64
+ }
65
+ }
66
+ }
67
+ //# sourceMappingURL=extraction-model.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extraction-model.js","sourceRoot":"","sources":["../src/extraction-model.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,wBAAwB,EAAE,MAAM,aAAa,CAAA;AAC1E,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AAmCzE,MAAM,OAAO,kBAAkB;IAC3B,YAAqB,KAA8B;QAA9B,UAAK,GAAL,KAAK,CAAyB;IAAI,CAAC;IAExD,KAAK,CAAC,MAAc;QAChB,MAAM,QAAQ,GAAG,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC,eAAe,CAAC,MAAM,EAAE,WAAW,CAAC,CAAA;QAC5E,MAAM,IAAI,GAAG,QAAQ,CAAC,eAAe,CAAA;QAErC,MAAM,IAAI,GAAyC,EAAE,CAAA;QAErD,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;YAC7B,MAAM,aAAa,GAAG,OAAO,IAAI,KAAK,CAAA;YAEtC,IAAI,aAAa,EAAE,CAAC;gBAChB,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,IAAI,CAAC,CAAA;YAClD,CAAC;iBAAM,CAAC;gBACJ,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,IAAI,CAAC,CAAA;YAChD,CAAC;QACL,CAAC;QAED,OAAO,IAAI,CAAA;IACf,CAAC;IAES,cAAc,CACpB,KAAuC,EACvC,IAAa;QAEb,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACjB,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;gBACf,MAAM,IAAI,kBAAkB,EAAE,CAAA;YAClC,CAAC;YAED,IAAI,QAAQ,GAAG,kBAAkB,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,CAAC,CAAA;YAEpD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;gBAC5B,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,CAAA;YAC7C,CAAC;YAED,OAAO,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,CAAC,SAAS,CAAC,OAAsB,CAAC,CAAC,CAAA;QAC3E,CAAC;aAAM,CAAC;YACJ,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK;gBACvB,CAAC,CAAC,kBAAkB,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,CAAC;gBACvC,CAAC,CAAC,IAAI,CAAA;YAEV,IAAI,CAAC,OAAO,EAAE,CAAC;gBACX,IAAI,KAAK,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;oBAC9B,MAAM,IAAI,wBAAwB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;gBACnD,CAAC;gBAED,OAAO,KAAK,CAAC,OAAO,CAAA;YACxB,CAAC;YAED,OAAO,KAAK,CAAC,SAAS,CAAC,OAAsB,CAAC,CAAA;QAClD,CAAC;IACL,CAAC;IAES,gBAAgB,CACtB,KAAyC,EACzC,IAAa;QAEb,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACjB,IAAI,QAAQ,GAAG,kBAAkB,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,CAAC,CAAA;YAEpD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;gBAC5B,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,CAAA;YAC7C,CAAC;YAED,OAAO,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAA;QACxE,CAAC;aAAM,CAAC;YACJ,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,CAAC,CAAA;YAErD,IAAI,CAAC,OAAO,EAAE,CAAC;gBACX,MAAM,IAAI,wBAAwB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;YACnD,CAAC;YAED,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS;gBAC1B,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,OAAsB,CAAC,CAAW;gBACrD,CAAC,CAAC,OAAO,CAAC,SAAS,CAAA;YAEvB,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAA;QACpC,CAAC;IACL,CAAC;CACJ"}
package/dist/index.d.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  export * from "./parser.js";
2
- export * from "./parsing-model.js";
3
- export * from "./parsing-model.interface.js";
2
+ export * from "./extraction-model.js";
3
+ export * from "./extraction-model.interface.js";
4
4
  export * from "./extractors.js";
5
5
  export * from "./errors.js";
6
+ export * from "./query-builders.js";
6
7
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAA;AAC3B,cAAc,oBAAoB,CAAA;AAClC,cAAc,8BAA8B,CAAA;AAC5C,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAA;AAC3B,cAAc,uBAAuB,CAAA;AACrC,cAAc,iCAAiC,CAAA;AAC/C,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,qBAAqB,CAAA"}
package/dist/index.js CHANGED
@@ -1,6 +1,7 @@
1
1
  export * from "./parser.js";
2
- export * from "./parsing-model.js";
3
- export * from "./parsing-model.interface.js";
2
+ export * from "./extraction-model.js";
3
+ export * from "./extraction-model.interface.js";
4
4
  export * from "./extractors.js";
5
5
  export * from "./errors.js";
6
+ export * from "./query-builders.js";
6
7
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAA;AAC3B,cAAc,oBAAoB,CAAA;AAClC,cAAc,8BAA8B,CAAA;AAC5C,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAA;AAC3B,cAAc,uBAAuB,CAAA;AACrC,cAAc,iCAAiC,CAAA;AAC/C,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,qBAAqB,CAAA"}
package/dist/parser.d.ts CHANGED
@@ -1,31 +1,32 @@
1
- import { ParsingModel } from "./parsing-model.interface.js";
1
+ import { ExtractionModel } from "./extraction-model.interface.js";
2
2
  import { ExtractorFunction } from "./extractors.js";
3
- export type ParseManyOptions = {
4
- query: string;
3
+ import { QueryConfig } from "./query-builders.js";
4
+ export type ExtractValuesOptions = {
5
+ query: QueryConfig;
5
6
  extractor: ExtractorFunction;
6
7
  limit?: number;
7
8
  };
8
- export type ParseFirstOptions = {
9
- query?: string;
9
+ export type ExtractValueOptions = {
10
+ query?: QueryConfig;
10
11
  extractor: ExtractorFunction;
11
12
  default?: string | null;
12
13
  };
13
- export type ExtractFirstOptions = {
14
- query?: string;
15
- model: ParsingModel;
14
+ export type ExtractModelOptions = {
15
+ query?: QueryConfig;
16
+ model: ExtractionModel;
16
17
  };
17
- export type ExtractManyOptions = {
18
- query: string;
19
- model: ParsingModel;
18
+ export type ExtractModelsOptions = {
19
+ query: QueryConfig;
20
+ model: ExtractionModel;
20
21
  limit?: number;
21
22
  };
22
23
  export declare class DomParser {
23
24
  readonly source: string;
24
25
  readonly root: Document;
25
26
  constructor(source: string);
26
- parseMany({ query, extractor, limit }: ParseManyOptions): (string | undefined)[];
27
- parseFirst({ query, extractor, default: default_ }: ParseFirstOptions): any | undefined | null;
28
- extractFirst({ model, query }: ExtractFirstOptions): any;
29
- extractMany({ model, query, limit }: ExtractManyOptions): any[];
27
+ extractValues({ query, extractor, limit }: ExtractValuesOptions): (string | undefined)[];
28
+ extractValue({ query, extractor, default: default_ }: ExtractValueOptions): any | undefined | null;
29
+ extractModel({ model, query }: ExtractModelOptions): any;
30
+ extractModels({ model, query, limit }: ExtractModelsOptions): any[];
30
31
  }
31
32
  //# sourceMappingURL=parser.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAA;AAE3D,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AAEnD,MAAM,MAAM,gBAAgB,GAAG;IAC3B,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,EAAE,iBAAiB,CAAA;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAA;CACjB,CAAA;AAED,MAAM,MAAM,iBAAiB,GAAG;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,EAAE,iBAAiB,CAAA;IAC5B,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAC1B,CAAA;AAED,MAAM,MAAM,mBAAmB,GAAG;IAC9B,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,EAAE,YAAY,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC7B,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,YAAY,CAAA;IACnB,KAAK,CAAC,EAAE,MAAM,CAAA;CACjB,CAAA;AAED,qBAAa,SAAS;IAGN,QAAQ,CAAC,MAAM,EAAE,MAAM;IAFnC,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAA;gBAEF,MAAM,EAAE,MAAM;IAInC,SAAS,CAAC,EACN,KAAK,EACL,SAAS,EACT,KAAK,EACR,EAAE,gBAAgB,GAAG,CAAC,MAAM,GAAG,SAAS,CAAC,EAAE;IAc5C,UAAU,CAAC,EACP,KAAK,EACL,SAAS,EACT,OAAO,EAAE,QAAQ,EACpB,EAAE,iBAAiB,GAAG,GAAG,GAAG,SAAS,GAAG,IAAI;IAmB7C,YAAY,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,mBAAmB;IAYlD,WAAW,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,kBAAkB;CAa1D"}
1
+ {"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAA;AAEjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAA;AACnD,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAA;AAEjD,MAAM,MAAM,oBAAoB,GAAG;IAC/B,KAAK,EAAE,WAAW,CAAA;IAClB,SAAS,EAAE,iBAAiB,CAAA;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAA;CACjB,CAAA;AAED,MAAM,MAAM,mBAAmB,GAAG;IAC9B,KAAK,CAAC,EAAE,WAAW,CAAA;IACnB,SAAS,EAAE,iBAAiB,CAAA;IAC5B,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAC1B,CAAA;AAED,MAAM,MAAM,mBAAmB,GAAG;IAC9B,KAAK,CAAC,EAAE,WAAW,CAAA;IACnB,KAAK,EAAE,eAAe,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,oBAAoB,GAAG;IAC/B,KAAK,EAAE,WAAW,CAAA;IAClB,KAAK,EAAE,eAAe,CAAA;IACtB,KAAK,CAAC,EAAE,MAAM,CAAA;CACjB,CAAA;AAED,qBAAa,SAAS;IAGN,QAAQ,CAAC,MAAM,EAAE,MAAM;IAFnC,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAA;gBAEF,MAAM,EAAE,MAAM;IAInC,aAAa,CAAC,EACV,KAAK,EACL,SAAS,EACT,KAAK,EACR,EAAE,oBAAoB,GAAG,CAAC,MAAM,GAAG,SAAS,CAAC,EAAE;IAchD,YAAY,CAAC,EACT,KAAK,EACL,SAAS,EACT,OAAO,EAAE,QAAQ,EACpB,EAAE,mBAAmB,GAAG,GAAG,GAAG,SAAS,GAAG,IAAI;IAmB/C,YAAY,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,mBAAmB;IAYlD,aAAa,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,oBAAoB;CAa9D"}
package/dist/parser.js CHANGED
@@ -1,11 +1,12 @@
1
+ import { selectManyElements, selectFirstElement } from "./utils/index.js";
1
2
  import { HTMLElementNotFoundError } from "./errors.js";
2
3
  export class DomParser {
3
4
  constructor(source) {
4
5
  this.source = source;
5
6
  this.root = new window.DOMParser().parseFromString(source, "text/html");
6
7
  }
7
- parseMany({ query, extractor, limit }) {
8
- const elements = this.root.querySelectorAll(query);
8
+ extractValues({ query, extractor, limit }) {
9
+ const elements = selectManyElements(query, this.root);
9
10
  const items = [];
10
11
  for (const element of elements) {
11
12
  if (limit !== undefined && items.length >= limit)
@@ -15,10 +16,10 @@ export class DomParser {
15
16
  }
16
17
  return items;
17
18
  }
18
- parseFirst({ query, extractor, default: default_ }) {
19
+ extractValue({ query, extractor, default: default_ }) {
19
20
  let data;
20
21
  if (query) {
21
- const element = this.root.querySelector(query);
22
+ const element = selectFirstElement(query, this.root);
22
23
  if (!element) {
23
24
  if (default_ !== undefined)
24
25
  return default_;
@@ -31,17 +32,17 @@ export class DomParser {
31
32
  }
32
33
  return data ?? default_;
33
34
  }
34
- extractFirst({ model, query }) {
35
+ extractModel({ model, query }) {
35
36
  const element = query
36
- ? this.root.querySelector(query)
37
+ ? selectFirstElement(query, this.root)
37
38
  : this.root.documentElement;
38
39
  if (!element) {
39
40
  throw new HTMLElementNotFoundError(query);
40
41
  }
41
42
  return model.parse(element.outerHTML);
42
43
  }
43
- extractMany({ model, query, limit }) {
44
- const elements = this.root.querySelectorAll(query);
44
+ extractModels({ model, query, limit }) {
45
+ const elements = selectManyElements(query, this.root);
45
46
  const dataList = [];
46
47
  for (const element of elements) {
47
48
  if (limit !== undefined && dataList.length >= limit)
@@ -1 +1 @@
1
- {"version":3,"file":"parser.js","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,wBAAwB,EAAE,MAAM,aAAa,CAAA;AA0BtD,MAAM,OAAO,SAAS;IAGlB,YAAqB,MAAc;QAAd,WAAM,GAAN,MAAM,CAAQ;QAC/B,IAAI,CAAC,IAAI,GAAG,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC,eAAe,CAAC,MAAM,EAAE,WAAW,CAAC,CAAA;IAC3E,CAAC;IAED,SAAS,CAAC,EACN,KAAK,EACL,SAAS,EACT,KAAK,EACU;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAA;QAElD,MAAM,KAAK,GAA2B,EAAE,CAAA;QAExC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC7B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,MAAM,IAAI,KAAK;gBAAE,MAAK;YACvD,MAAM,IAAI,GAAG,SAAS,CAAC,OAAsB,CAAC,CAAA;YAC9C,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACpB,CAAC;QAED,OAAO,KAAK,CAAA;IAChB,CAAC;IAED,UAAU,CAAC,EACP,KAAK,EACL,SAAS,EACT,OAAO,EAAE,QAAQ,EACD;QAChB,IAAI,IAA4B,CAAA;QAEhC,IAAI,KAAK,EAAE,CAAC;YACR,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAA;YAE9C,IAAI,CAAC,OAAO,EAAE,CAAC;gBACX,IAAI,QAAQ,KAAK,SAAS;oBAAE,OAAO,QAAQ,CAAA;gBAC3C,MAAM,IAAI,wBAAwB,CAAC,KAAK,CAAC,CAAA;YAC7C,CAAC;YAED,IAAI,GAAG,SAAS,CAAC,OAAsB,CAAC,CAAA;QAC5C,CAAC;aAAM,CAAC;YACJ,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,CAAA;QAC/C,CAAC;QAED,OAAO,IAAI,IAAI,QAAQ,CAAA;IAC3B,CAAC;IAED,YAAY,CAAC,EAAE,KAAK,EAAE,KAAK,EAAuB;QAC9C,MAAM,OAAO,GAAG,KAAK;YACjB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC;YAChC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAA;QAE/B,IAAI,CAAC,OAAO,EAAE,CAAC;YACX,MAAM,IAAI,wBAAwB,CAAC,KAAK,CAAC,CAAA;QAC7C,CAAC;QAED,OAAO,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;IACzC,CAAC;IAED,WAAW,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAsB;QACnD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAA;QAElD,MAAM,QAAQ,GAAU,EAAE,CAAA;QAE1B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC7B,IAAI,KAAK,KAAK,SAAS,IAAI,QAAQ,CAAC,MAAM,IAAI,KAAK;gBAAE,MAAK;YAC1D,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;YAC3C,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACvB,CAAC;QAED,OAAO,QAAQ,CAAA;IACnB,CAAC;CACJ"}
1
+ {"version":3,"file":"parser.js","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AAEzE,OAAO,EAAE,wBAAwB,EAAE,MAAM,aAAa,CAAA;AA2BtD,MAAM,OAAO,SAAS;IAGlB,YAAqB,MAAc;QAAd,WAAM,GAAN,MAAM,CAAQ;QAC/B,IAAI,CAAC,IAAI,GAAG,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC,eAAe,CAAC,MAAM,EAAE,WAAW,CAAC,CAAA;IAC3E,CAAC;IAED,aAAa,CAAC,EACV,KAAK,EACL,SAAS,EACT,KAAK,EACc;QACnB,MAAM,QAAQ,GAAG,kBAAkB,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,CAAA;QAErD,MAAM,KAAK,GAA2B,EAAE,CAAA;QAExC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC7B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,MAAM,IAAI,KAAK;gBAAE,MAAK;YACvD,MAAM,IAAI,GAAG,SAAS,CAAC,OAAsB,CAAC,CAAA;YAC9C,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACpB,CAAC;QAED,OAAO,KAAK,CAAA;IAChB,CAAC;IAED,YAAY,CAAC,EACT,KAAK,EACL,SAAS,EACT,OAAO,EAAE,QAAQ,EACC;QAClB,IAAI,IAA4B,CAAA;QAEhC,IAAI,KAAK,EAAE,CAAC;YACR,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,CAAA;YAEpD,IAAI,CAAC,OAAO,EAAE,CAAC;gBACX,IAAI,QAAQ,KAAK,SAAS;oBAAE,OAAO,QAAQ,CAAA;gBAC3C,MAAM,IAAI,wBAAwB,CAAC,KAAK,CAAC,CAAA;YAC7C,CAAC;YAED,IAAI,GAAG,SAAS,CAAC,OAAsB,CAAC,CAAA;QAC5C,CAAC;aAAM,CAAC;YACJ,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,CAAA;QAC/C,CAAC;QAED,OAAO,IAAI,IAAI,QAAQ,CAAA;IAC3B,CAAC;IAED,YAAY,CAAC,EAAE,KAAK,EAAE,KAAK,EAAuB;QAC9C,MAAM,OAAO,GAAG,KAAK;YACjB,CAAC,CAAC,kBAAkB,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC;YACtC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAA;QAE/B,IAAI,CAAC,OAAO,EAAE,CAAC;YACX,MAAM,IAAI,wBAAwB,CAAC,KAAK,CAAC,CAAA;QAC7C,CAAC;QAED,OAAO,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;IACzC,CAAC;IAED,aAAa,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAwB;QACvD,MAAM,QAAQ,GAAG,kBAAkB,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,CAAA;QAErD,MAAM,QAAQ,GAAU,EAAE,CAAA;QAE1B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC7B,IAAI,KAAK,KAAK,SAAS,IAAI,QAAQ,CAAC,MAAM,IAAI,KAAK;gBAAE,MAAK;YAC1D,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;YAC3C,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACvB,CAAC;QAED,OAAO,QAAQ,CAAA;IACnB,CAAC;CACJ"}
@@ -0,0 +1,7 @@
1
+ export type QueryConfig = {
2
+ value: string;
3
+ type: "css" | "xpath";
4
+ };
5
+ export declare function css(query: string): QueryConfig;
6
+ export declare function xpath(query: string): QueryConfig;
7
+ //# sourceMappingURL=query-builders.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"query-builders.d.ts","sourceRoot":"","sources":["../src/query-builders.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,WAAW,GAAG;IACtB,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,EAAE,KAAK,GAAG,OAAO,CAAA;CACxB,CAAA;AAED,wBAAgB,GAAG,CAAC,KAAK,EAAE,MAAM,GAAG,WAAW,CAK9C;AAED,wBAAgB,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,WAAW,CAKhD"}
@@ -0,0 +1,13 @@
1
+ export function css(query) {
2
+ return {
3
+ value: query,
4
+ type: "css",
5
+ };
6
+ }
7
+ export function xpath(query) {
8
+ return {
9
+ value: query,
10
+ type: "xpath",
11
+ };
12
+ }
13
+ //# sourceMappingURL=query-builders.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"query-builders.js","sourceRoot":"","sources":["../src/query-builders.ts"],"names":[],"mappings":"AAKA,MAAM,UAAU,GAAG,CAAC,KAAa;IAC7B,OAAO;QACH,KAAK,EAAE,KAAK;QACZ,IAAI,EAAE,KAAK;KACd,CAAA;AACL,CAAC;AAED,MAAM,UAAU,KAAK,CAAC,KAAa;IAC/B,OAAO;QACH,KAAK,EAAE,KAAK;QACZ,IAAI,EAAE,OAAO;KAChB,CAAA;AACL,CAAC"}
@@ -0,0 +1,3 @@
1
+ export * from "./select-first-element.js";
2
+ export * from "./select-many-elements.js";
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA,cAAc,2BAA2B,CAAA;AACzC,cAAc,2BAA2B,CAAA"}
@@ -0,0 +1,3 @@
1
+ export * from "./select-first-element.js";
2
+ export * from "./select-many-elements.js";
3
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA,cAAc,2BAA2B,CAAA;AACzC,cAAc,2BAA2B,CAAA"}
@@ -0,0 +1,3 @@
1
+ import { QueryConfig } from "../query-builders.js";
2
+ export declare function selectFirstElement(query: QueryConfig, root: Element | Document): Element | null;
3
+ //# sourceMappingURL=select-first-element.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"select-first-element.d.ts","sourceRoot":"","sources":["../../src/utils/select-first-element.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAGlD,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,GAAG,QAAQ,GAAG,OAAO,GAAG,IAAI,CAc/F"}
@@ -0,0 +1,11 @@
1
+ import { toDocument } from "./to-document.js";
2
+ export function selectFirstElement(query, root) {
3
+ if (query.type === "css") {
4
+ return root.querySelector(query.value);
5
+ }
6
+ else {
7
+ const document = toDocument(root);
8
+ return document.evaluate(query.value, root, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
9
+ }
10
+ }
11
+ //# sourceMappingURL=select-first-element.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"select-first-element.js","sourceRoot":"","sources":["../../src/utils/select-first-element.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAE7C,MAAM,UAAU,kBAAkB,CAAC,KAAkB,EAAE,IAAwB;IAC3E,IAAI,KAAK,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;QACvB,OAAO,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;IAC1C,CAAC;SAAM,CAAC;QACJ,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAA;QAEjC,OAAO,QAAQ,CAAC,QAAQ,CACpB,KAAK,CAAC,KAAK,EACX,IAAI,EACJ,IAAI,EACJ,WAAW,CAAC,uBAAuB,EACnC,IAAI,CACP,CAAC,eAAiC,CAAA;IACvC,CAAC;AACL,CAAC"}
@@ -0,0 +1,3 @@
1
+ import { QueryConfig } from "../query-builders.js";
2
+ export declare function selectManyElements(query: QueryConfig, root: Element | Document): Element[];
3
+ //# sourceMappingURL=select-many-elements.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"select-many-elements.d.ts","sourceRoot":"","sources":["../../src/utils/select-many-elements.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAGlD,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,GAAG,QAAQ,GAAG,OAAO,EAAE,CAwB1F"}
@@ -0,0 +1,18 @@
1
+ import { toDocument } from "./to-document.js";
2
+ export function selectManyElements(query, root) {
3
+ if (query.type === "css") {
4
+ return Array.from(root.querySelectorAll(query.value));
5
+ }
6
+ else {
7
+ const document = toDocument(root);
8
+ const result = document.evaluate(query.value, root, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
9
+ const elements = [];
10
+ let node = result.iterateNext();
11
+ while (node) {
12
+ elements.push(node);
13
+ node = result.iterateNext();
14
+ }
15
+ return elements;
16
+ }
17
+ }
18
+ //# sourceMappingURL=select-many-elements.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"select-many-elements.js","sourceRoot":"","sources":["../../src/utils/select-many-elements.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAE7C,MAAM,UAAU,kBAAkB,CAAC,KAAkB,EAAE,IAAwB;IAC3E,IAAI,KAAK,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;QACvB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAA;IACzD,CAAC;SAAM,CAAC;QACJ,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAA;QAEjC,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAC5B,KAAK,CAAC,KAAK,EACX,IAAI,EACJ,IAAI,EACJ,WAAW,CAAC,0BAA0B,EACtC,IAAI,CACP,CAAA;QAED,MAAM,QAAQ,GAAc,EAAE,CAAA;QAC9B,IAAI,IAAI,GAAG,MAAM,CAAC,WAAW,EAAE,CAAA;QAE/B,OAAO,IAAI,EAAE,CAAC;YACV,QAAQ,CAAC,IAAI,CAAC,IAAe,CAAC,CAAA;YAC9B,IAAI,GAAG,MAAM,CAAC,WAAW,EAAE,CAAA;QAC/B,CAAC;QAED,OAAO,QAAQ,CAAA;IACnB,CAAC;AACL,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function toDocument(root: Element | Document): Document;
2
+ //# sourceMappingURL=to-document.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"to-document.d.ts","sourceRoot":"","sources":["../../src/utils/to-document.ts"],"names":[],"mappings":"AAAA,wBAAgB,UAAU,CAAC,IAAI,EAAE,OAAO,GAAG,QAAQ,GAAG,QAAQ,CAM7D"}
@@ -0,0 +1,7 @@
1
+ export function toDocument(root) {
2
+ if (root instanceof Document) {
3
+ return root;
4
+ }
5
+ return new DOMParser().parseFromString(root.outerHTML, "text/html");
6
+ }
7
+ //# sourceMappingURL=to-document.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"to-document.js","sourceRoot":"","sources":["../../src/utils/to-document.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,UAAU,CAAC,IAAwB;IAC/C,IAAI,IAAI,YAAY,QAAQ,EAAE,CAAC;QAC3B,OAAO,IAAI,CAAA;IACf,CAAC;IAED,OAAO,IAAI,SAAS,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,SAAS,EAAE,WAAW,CAAC,CAAA;AACvE,CAAC"}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@xcrap/dom",
3
- "version": "0.0.1",
4
- "description": "DOM parsing and extraction utilities",
3
+ "version": "0.1.0",
4
+ "description": "Xcrap DOM is a client-side package designed for DOM data extraction using declarative models. It works natively in browser environments (browser extensions, TamperMonkey user scripts, etc.) and supports both CSS selectors and XPath queries out of the box.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",
7
7
  "scripts": {
@@ -21,7 +21,7 @@
21
21
  ],
22
22
  "author": "Marcuth",
23
23
  "license": "MIT",
24
- "type": "commonjs",
24
+ "type": "module",
25
25
  "devDependencies": {
26
26
  "@types/jest": "^30.0.0",
27
27
  "ts-jest": "^29.4.6",