@mkven/xml-parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +113 -0
- package/package.json +45 -0
package/README.md
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# @mkven/xml-parser
|
|
2
|
+
|
|
3
|
+
Opinionated XML parsing utilities built on top of [fast-xml-parser](https://github.com/NaturalIntelligence/fast-xml-parser). Designed for processing XML data feeds — both as raw buffers and from ZIP archives.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pnpm add @mkven/xml-parser
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Classes
|
|
12
|
+
|
|
13
|
+
### `XmlParser`
|
|
14
|
+
|
|
15
|
+
Parses XML buffers and readable streams with the following conventions:
|
|
16
|
+
|
|
17
|
+
- **Tag names** are uppercased (`<ticket>` → `TICKET`)
|
|
18
|
+
- **Attributes** are prefixed with `$` (`id="1"` → `$id: "1"`)
|
|
19
|
+
- **All values are strings** — numbers are never parsed (`"007"` stays `"007"`)
|
|
20
|
+
- **`"null"` attribute values** are converted to empty string
|
|
21
|
+
- **Whitespace** in attribute values is normalized (tabs, newlines, ` `, invisible Unicode → collapsed/trimmed)
|
|
22
|
+
- **Malformed XML** with `><<` and `>&<` patterns is auto-escaped before parsing
|
|
23
|
+
|
|
24
|
+
```typescript
|
|
25
|
+
import { XmlParser } from "@mkven/xml-parser";
|
|
26
|
+
|
|
27
|
+
const parser = new XmlParser();
|
|
28
|
+
|
|
29
|
+
// Parse a buffer
|
|
30
|
+
const result = parser.parse(buffer);
|
|
31
|
+
// { ROOT: { ROW: [{ $id: "1" }, { $id: "2" }] } }
|
|
32
|
+
|
|
33
|
+
// Stream-parse by tag name
|
|
34
|
+
const stream = parser.createParseReadStream(readableStream, "ticket");
|
|
35
|
+
stream.on("data", async (rows: Record<string, string>[] | null) => {
|
|
36
|
+
// rows is always an array (or null if chunk had no matching tags)
|
|
37
|
+
});
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### `XmlZipParser`
|
|
41
|
+
|
|
42
|
+
Extends `XmlParser` with ZIP archive support. Extracts `.xml` files from ZIP and parses them.
|
|
43
|
+
|
|
44
|
+
```typescript
|
|
45
|
+
import { XmlZipParser } from "@mkven/xml-parser";
|
|
46
|
+
|
|
47
|
+
const parser = new XmlZipParser();
|
|
48
|
+
|
|
49
|
+
// Parse all XML files from a ZIP buffer (sync, via adm-zip)
|
|
50
|
+
const results = parser.parseFromZip(zipBuffer);
|
|
51
|
+
// [{ name: "data.xml", parsedData: { ROOT: { ROW: [...] } } }]
|
|
52
|
+
|
|
53
|
+
// Stream XML files from a ZIP ReadStream (async, via unzipper)
|
|
54
|
+
for await (const { name, stream } of parser.createParseReadStreamsGetterFromZip(readStream, "ticket")) {
|
|
55
|
+
stream.on("data", async (rows) => { /* ... */ });
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### `ZipParser`
|
|
60
|
+
|
|
61
|
+
Low-level static ZIP extraction utilities used internally by `XmlZipParser`.
|
|
62
|
+
|
|
63
|
+
- `ZipParser.getEntries(buffer)` — sync extraction via `adm-zip`
|
|
64
|
+
- `ZipParser.createReadStreamsGetterFromEntries(readStream)` — async streaming via `unzipper`
|
|
65
|
+
|
|
66
|
+
## Options
|
|
67
|
+
|
|
68
|
+
### `alwaysArray`
|
|
69
|
+
|
|
70
|
+
By default, `fast-xml-parser` returns a single child element as an object and multiple children as an array. Pass `{ alwaysArray: true }` to always wrap child elements in arrays (root element and attributes are not affected):
|
|
71
|
+
|
|
72
|
+
```typescript
|
|
73
|
+
// Default behavior
|
|
74
|
+
const parser = new XmlParser();
|
|
75
|
+
parser.parse(xml);
|
|
76
|
+
// Single child: { ROOT: { ROW: { $id: "1" } } } — object
|
|
77
|
+
// Multiple children: { ROOT: { ROW: [{ $id: "1" }, ...] } } — array
|
|
78
|
+
|
|
79
|
+
// With alwaysArray
|
|
80
|
+
const parser = new XmlParser({ alwaysArray: true });
|
|
81
|
+
parser.parse(xml);
|
|
82
|
+
// Single child: { ROOT: { ROW: [{ $id: "1" }] } } — always array
|
|
83
|
+
// Multiple children: { ROOT: { ROW: [{ $id: "1" }, ...] } } — array
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Interfaces
|
|
87
|
+
|
|
88
|
+
- `IXmlParser` — interface for `XmlParser` (generic `parse<T>`, `createParseReadStream<T>`)
|
|
89
|
+
- `IXmlZipParser` — extends `IXmlParser` with `parseFromZip<T>` and `createParseReadStreamsGetterFromZip<T>`
|
|
90
|
+
- `ParseReadStream<T>` — stream-like object with typed `on("data", listener)` method
|
|
91
|
+
|
|
92
|
+
## Dependencies
|
|
93
|
+
|
|
94
|
+
- [fast-xml-parser](https://www.npmjs.com/package/fast-xml-parser) — XML parsing engine
|
|
95
|
+
- [adm-zip](https://www.npmjs.com/package/adm-zip) — synchronous ZIP extraction
|
|
96
|
+
- [unzipper](https://www.npmjs.com/package/unzipper) — streaming ZIP extraction
|
|
97
|
+
|
|
98
|
+
## Development
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
sh check.sh # lint (biome) + typecheck (tsc) + tests (vitest)
|
|
102
|
+
sh health.sh # gitleaks + outdated deps + audit
|
|
103
|
+
sh all-checks.sh # both
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Releasing
|
|
107
|
+
|
|
108
|
+
Uses [release-it](https://github.com/release-it/release-it) with conventional changelog:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
pnpm run release:dry # preview
|
|
112
|
+
pnpm run release # bump version, update CHANGELOG.md, tag, publish to npm
|
|
113
|
+
```
|
package/package.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@mkven/xml-parser",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Opinionated XML parsing utilities with ZIP archive support",
|
|
5
|
+
"author": "Damir Manapov",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"type": "module",
|
|
8
|
+
"main": "dist/index.js",
|
|
9
|
+
"types": "dist/index.d.ts",
|
|
10
|
+
"exports": {
|
|
11
|
+
".": {
|
|
12
|
+
"types": "./dist/index.d.ts",
|
|
13
|
+
"import": "./dist/index.js"
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"files": [
|
|
17
|
+
"dist"
|
|
18
|
+
],
|
|
19
|
+
"publishConfig": {
|
|
20
|
+
"access": "public"
|
|
21
|
+
},
|
|
22
|
+
"dependencies": {
|
|
23
|
+
"adm-zip": "^0.5.16",
|
|
24
|
+
"fast-xml-parser": "^5.2.3",
|
|
25
|
+
"unzipper": "^0.12.3"
|
|
26
|
+
},
|
|
27
|
+
"devDependencies": {
|
|
28
|
+
"@biomejs/biome": "^2.0.6",
|
|
29
|
+
"@release-it/conventional-changelog": "^10.0.0",
|
|
30
|
+
"@types/adm-zip": "^0.5.7",
|
|
31
|
+
"@types/node": "^25.2.3",
|
|
32
|
+
"@types/unzipper": "^0.10.11",
|
|
33
|
+
"release-it": "^19.0.3",
|
|
34
|
+
"typescript": "^5.8.3",
|
|
35
|
+
"vitest": "^4.0.18"
|
|
36
|
+
},
|
|
37
|
+
"scripts": {
|
|
38
|
+
"build": "tsc",
|
|
39
|
+
"lint": "biome check --write .",
|
|
40
|
+
"typecheck": "tsc --noEmit",
|
|
41
|
+
"test": "vitest run",
|
|
42
|
+
"release": "release-it",
|
|
43
|
+
"release:dry": "release-it --dry-run"
|
|
44
|
+
}
|
|
45
|
+
}
|