@mkven/xml-parser 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -6
- package/dist/XmlParser.d.ts +15 -0
- package/dist/XmlParser.js +68 -0
- package/dist/XmlZipParser.d.ts +21 -0
- package/dist/XmlZipParser.js +35 -0
- package/dist/ZipParser.d.ts +13 -0
- package/dist/ZipParser.js +29 -0
- package/dist/cjs/XmlParser.js +70 -0
- package/dist/cjs/XmlZipParser.js +40 -0
- package/dist/cjs/ZipParser.js +34 -0
- package/dist/cjs/index.js +12 -0
- package/dist/cjs/types.js +2 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +3 -0
- package/dist/types.d.ts +20 -0
- package/dist/types.js +1 -0
- package/package.json +4 -3
package/README.md
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# @mkven/xml-parser
|
|
2
2
|
|
|
3
|
+
[](https://www.npmjs.com/package/@mkven/xml-parser)
|
|
4
|
+
|
|
3
5
|
Opinionated XML parsing utilities built on top of [fast-xml-parser](https://github.com/NaturalIntelligence/fast-xml-parser). Designed for processing XML data feeds — both as raw buffers and from ZIP archives.
|
|
4
6
|
|
|
5
7
|
## Installation
|
|
@@ -21,6 +23,14 @@ Parses XML buffers and readable streams with the following conventions:
|
|
|
21
23
|
- **Whitespace** in attribute values is normalized (tabs, newlines, ` `, invisible Unicode → collapsed/trimmed)
|
|
22
24
|
- **Malformed XML** with `><<` and `>&<` patterns is auto-escaped before parsing
|
|
23
25
|
|
|
26
|
+
#### `parse<T>(data: Buffer, validationOptions?): T`
|
|
27
|
+
|
|
28
|
+
Parses an XML buffer and returns the parsed object.
|
|
29
|
+
|
|
30
|
+
#### `createParseReadStream<T>(stream: Readable, rowTag: string, validationOptions?): ParseReadStream<T>`
|
|
31
|
+
|
|
32
|
+
Wraps a readable stream into a chunked parser that emits arrays of parsed rows matching `rowTag`. Each `"data"` event receives `T[] | null` (`null` when a chunk has no matching tags).
|
|
33
|
+
|
|
24
34
|
```typescript
|
|
25
35
|
import { XmlParser } from "@mkven/xml-parser";
|
|
26
36
|
|
|
@@ -39,7 +49,19 @@ stream.on("data", async (rows: Record<string, string>[] | null) => {
|
|
|
39
49
|
|
|
40
50
|
### `XmlZipParser`
|
|
41
51
|
|
|
42
|
-
Extends `XmlParser` with ZIP archive support.
|
|
52
|
+
Extends `XmlParser` with ZIP archive support. Only `.xml` files inside the archive are processed; other entries are skipped.
|
|
53
|
+
|
|
54
|
+
#### `parseFromZip(data: Buffer, validationOptions?): Array<{ name, parsedData }>`
|
|
55
|
+
|
|
56
|
+
Synchronous. Extracts all XML files from a ZIP buffer via `adm-zip` and parses each one.
|
|
57
|
+
|
|
58
|
+
#### `static createReadStreamsGetterFromZip(data: ReadStream): AsyncGenerator<{ name, stream }>`
|
|
59
|
+
|
|
60
|
+
Static async generator. Streams ZIP entries via `unzipper`, yielding raw readable streams for each `.xml` entry. Non-XML entries are autodrained. Useful when you need the raw XML stream without parsing.
|
|
61
|
+
|
|
62
|
+
#### `createParseReadStreamsGetterFromZip<T>(data: ReadStream, rowTag: string, validationOptions?): AsyncGenerator<{ name, stream: ParseReadStream<T> }>`
|
|
63
|
+
|
|
64
|
+
Instance async generator. Combines the static streaming method above with `createParseReadStream`, yielding a `ParseReadStream<T>` per XML entry.
|
|
43
65
|
|
|
44
66
|
```typescript
|
|
45
67
|
import { XmlZipParser } from "@mkven/xml-parser";
|
|
@@ -50,7 +72,12 @@ const parser = new XmlZipParser();
|
|
|
50
72
|
const results = parser.parseFromZip(zipBuffer);
|
|
51
73
|
// [{ name: "data.xml", parsedData: { ROOT: { ROW: [...] } } }]
|
|
52
74
|
|
|
53
|
-
// Stream XML
|
|
75
|
+
// Stream raw XML entries from a ZIP (static, no parsing)
|
|
76
|
+
for await (const { name, stream } of XmlZipParser.createReadStreamsGetterFromZip(readStream)) {
|
|
77
|
+
// stream is a raw Readable for each .xml file
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Stream and parse XML entries from a ZIP
|
|
54
81
|
for await (const { name, stream } of parser.createParseReadStreamsGetterFromZip(readStream, "ticket")) {
|
|
55
82
|
stream.on("data", async (rows) => { /* ... */ });
|
|
56
83
|
}
|
|
@@ -58,10 +85,15 @@ for await (const { name, stream } of parser.createParseReadStreamsGetterFromZip(
|
|
|
58
85
|
|
|
59
86
|
### `ZipParser`
|
|
60
87
|
|
|
61
|
-
Low-level static ZIP extraction
|
|
88
|
+
Low-level static utilities for ZIP extraction. Used internally by `XmlZipParser`, but exported for direct use.
|
|
89
|
+
|
|
90
|
+
#### `static getEntries(data: Buffer): Array<{ name: string, data: Buffer }>`
|
|
91
|
+
|
|
92
|
+
Synchronous extraction via `adm-zip`. Returns all entries with their names and data buffers.
|
|
62
93
|
|
|
63
|
-
|
|
64
|
-
|
|
94
|
+
#### `static createReadStreamsGetterFromEntries(data: ReadStream): AsyncGenerator<{ name: string, stream: Entry }>`
|
|
95
|
+
|
|
96
|
+
Async generator via `unzipper`. Streams ZIP entries one by one. Directory prefixes are stripped from entry names via `basename`.
|
|
65
97
|
|
|
66
98
|
## Options
|
|
67
99
|
|
|
@@ -83,10 +115,14 @@ parser.parse(xml);
|
|
|
83
115
|
// Multiple children: { ROOT: { ROW: [{ $id: "1" }, ...] } } — array
|
|
84
116
|
```
|
|
85
117
|
|
|
118
|
+
### `validationOptions`
|
|
119
|
+
|
|
120
|
+
All parsing methods accept an optional `validationOptions` parameter (`ValidationOptions | boolean` from `fast-xml-parser`). Pass `true` to enable validation, or a `ValidationOptions` object for fine-grained control.
|
|
121
|
+
|
|
86
122
|
## Interfaces
|
|
87
123
|
|
|
88
124
|
- `IXmlParser` — interface for `XmlParser` (generic `parse<T>`, `createParseReadStream<T>`)
|
|
89
|
-
- `IXmlZipParser` — extends `IXmlParser` with `parseFromZip<T
|
|
125
|
+
- `IXmlZipParser` — extends `IXmlParser` with `parseFromZip<T>`, `createParseReadStreamsGetterFromZip<T>`
|
|
90
126
|
- `ParseReadStream<T>` — stream-like object with typed `on("data", listener)` method
|
|
91
127
|
|
|
92
128
|
## Dependencies
|
|
@@ -111,3 +147,7 @@ Uses [release-it](https://github.com/release-it/release-it) with conventional ch
|
|
|
111
147
|
pnpm run release:dry # preview
|
|
112
148
|
pnpm run release # bump version, update CHANGELOG.md, tag, publish to npm
|
|
113
149
|
```
|
|
150
|
+
|
|
151
|
+
## License
|
|
152
|
+
|
|
153
|
+
MIT
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { type Readable } from "node:stream";
|
|
2
|
+
import { type validationOptions as ValidationOptions, XMLParser } from "fast-xml-parser";
|
|
3
|
+
import type { IXmlParser } from "./types.js";
|
|
4
|
+
declare class XmlParser implements IXmlParser {
|
|
5
|
+
protected parser: XMLParser;
|
|
6
|
+
constructor(options?: {
|
|
7
|
+
alwaysArray?: boolean;
|
|
8
|
+
});
|
|
9
|
+
private static prepareFileContent;
|
|
10
|
+
parse(data: Buffer, validationOptions?: ValidationOptions | boolean): any;
|
|
11
|
+
createParseReadStream<T = Record<string, string>>(stream: Readable, rowTag: string, validationOptions?: ValidationOptions | boolean): {
|
|
12
|
+
on: (_event: "data", listener: (parseXmlData: T[] | null) => Promise<void>) => void;
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
export default XmlParser;
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { Transform } from "node:stream";
|
|
2
|
+
import { XMLParser, } from "fast-xml-parser";
|
|
3
|
+
const allVariationsOfSpaces = /(?:\s| )+/;
|
|
4
|
+
const tabulationsCarriageReturnsAndEscapes = /[\t\n\r]/;
|
|
5
|
+
const notUsedUnicodeStringCodes = /[\u0002\u0003\u200B\u202A\u202B]/;
|
|
6
|
+
class XmlParser {
|
|
7
|
+
parser;
|
|
8
|
+
constructor(options) {
|
|
9
|
+
this.parser = new XMLParser({
|
|
10
|
+
trimValues: true,
|
|
11
|
+
numberParseOptions: {
|
|
12
|
+
skipLike: /\d/mu,
|
|
13
|
+
hex: true,
|
|
14
|
+
leadingZeros: true,
|
|
15
|
+
},
|
|
16
|
+
ignoreAttributes: false,
|
|
17
|
+
attributeNamePrefix: "$",
|
|
18
|
+
transformTagName: (tagName) => tagName.toUpperCase(),
|
|
19
|
+
...(options?.alwaysArray && {
|
|
20
|
+
isArray: (_name, jpath, _isLeaf, isAttribute) => isAttribute === undefined && jpath.includes("."),
|
|
21
|
+
}),
|
|
22
|
+
attributeValueProcessor(_attrName, attrValue) {
|
|
23
|
+
if (attrValue === "null") {
|
|
24
|
+
return "";
|
|
25
|
+
}
|
|
26
|
+
return attrValue
|
|
27
|
+
.replaceAll(new RegExp(allVariationsOfSpaces, "gu"), " ")
|
|
28
|
+
.replaceAll(new RegExp(tabulationsCarriageReturnsAndEscapes, "gu"), " ")
|
|
29
|
+
.replaceAll(new RegExp(notUsedUnicodeStringCodes, "gu"), "")
|
|
30
|
+
.trim();
|
|
31
|
+
},
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
static prepareFileContent(content) {
|
|
35
|
+
return content.replaceAll("><<", "><<").replaceAll(">&<", ">&<");
|
|
36
|
+
}
|
|
37
|
+
parse(data, validationOptions) {
|
|
38
|
+
const prepared = XmlParser.prepareFileContent(data.toString());
|
|
39
|
+
return this.parser.parse(prepared, validationOptions);
|
|
40
|
+
}
|
|
41
|
+
createParseReadStream(stream, rowTag, validationOptions) {
|
|
42
|
+
const tagRegex = new RegExp(`<${rowTag} (\n|.)*>`, "gu");
|
|
43
|
+
return {
|
|
44
|
+
on: (_event, listener) => {
|
|
45
|
+
let remainingXmlData = "";
|
|
46
|
+
const dataTransform = new Transform({
|
|
47
|
+
transform: async (chunk, _encoding, next) => {
|
|
48
|
+
const chunkString = remainingXmlData + chunk;
|
|
49
|
+
const tags = chunkString.match(tagRegex);
|
|
50
|
+
if (!tags) {
|
|
51
|
+
await listener(null);
|
|
52
|
+
return next();
|
|
53
|
+
}
|
|
54
|
+
const xmlData = XmlParser.prepareFileContent(tags.join("\n"));
|
|
55
|
+
const result = this.parser.parse(`<DATA>${xmlData}</DATA>`, validationOptions);
|
|
56
|
+
remainingXmlData = chunkString.replaceAll(tagRegex, "");
|
|
57
|
+
await listener(Array.isArray(result.DATA[rowTag.toUpperCase()])
|
|
58
|
+
? result.DATA[rowTag.toUpperCase()]
|
|
59
|
+
: [result.DATA[rowTag.toUpperCase()]]);
|
|
60
|
+
return next();
|
|
61
|
+
},
|
|
62
|
+
});
|
|
63
|
+
stream.pipe(dataTransform);
|
|
64
|
+
},
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
export default XmlParser;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { ReadStream } from "node:fs";
|
|
2
|
+
import type { validationOptions as ValidationOptions } from "fast-xml-parser";
|
|
3
|
+
import type { IXmlZipParser } from "./types.js";
|
|
4
|
+
import XmlParser from "./XmlParser.js";
|
|
5
|
+
declare class XmlZipParser extends XmlParser implements IXmlZipParser {
|
|
6
|
+
parseFromZip(data: Buffer, validationOptions?: ValidationOptions | boolean): {
|
|
7
|
+
name: string;
|
|
8
|
+
parsedData: any;
|
|
9
|
+
}[];
|
|
10
|
+
static createReadStreamsGetterFromZip(data: ReadStream): AsyncGenerator<{
|
|
11
|
+
name: string;
|
|
12
|
+
stream: import("unzipper").Entry;
|
|
13
|
+
}, void, unknown>;
|
|
14
|
+
createParseReadStreamsGetterFromZip<T = Record<string, string>>(data: ReadStream, rowTag: string, validationOptions?: ValidationOptions | boolean): AsyncGenerator<{
|
|
15
|
+
name: string;
|
|
16
|
+
stream: {
|
|
17
|
+
on: (_event: "data", listener: (parseXmlData: T[] | null) => Promise<void>) => void;
|
|
18
|
+
};
|
|
19
|
+
}, void, unknown>;
|
|
20
|
+
}
|
|
21
|
+
export default XmlZipParser;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { extname } from "node:path";
|
|
2
|
+
import XmlParser from "./XmlParser.js";
|
|
3
|
+
import ZipParser from "./ZipParser.js";
|
|
4
|
+
const XML_EXTENSION = ".xml";
|
|
5
|
+
class XmlZipParser extends XmlParser {
|
|
6
|
+
parseFromZip(data, validationOptions) {
|
|
7
|
+
const entries = ZipParser.getEntries(data);
|
|
8
|
+
const xmlEntries = entries.filter((entry) => extname(entry.name) === XML_EXTENSION);
|
|
9
|
+
return xmlEntries.map((xmlEntry) => ({
|
|
10
|
+
name: xmlEntry.name,
|
|
11
|
+
parsedData: this.parse(xmlEntry.data, validationOptions),
|
|
12
|
+
}));
|
|
13
|
+
}
|
|
14
|
+
static async *createReadStreamsGetterFromZip(data) {
|
|
15
|
+
const entries = ZipParser.createReadStreamsGetterFromEntries(data);
|
|
16
|
+
for await (const entry of entries) {
|
|
17
|
+
if (extname(entry.name) === XML_EXTENSION) {
|
|
18
|
+
yield entry;
|
|
19
|
+
}
|
|
20
|
+
else {
|
|
21
|
+
entry.stream.autodrain();
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
async *createParseReadStreamsGetterFromZip(data, rowTag, validationOptions) {
|
|
26
|
+
const xmlEntries = XmlZipParser.createReadStreamsGetterFromZip(data);
|
|
27
|
+
for await (const entry of xmlEntries) {
|
|
28
|
+
yield {
|
|
29
|
+
name: entry.name,
|
|
30
|
+
stream: this.createParseReadStream(entry.stream, rowTag, validationOptions),
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
export default XmlZipParser;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { ReadStream } from "node:fs";
|
|
2
|
+
import unzipper from "unzipper";
|
|
3
|
+
declare class ZipParser {
|
|
4
|
+
static createReadStreamsGetterFromEntries(data: ReadStream): AsyncGenerator<{
|
|
5
|
+
name: string;
|
|
6
|
+
stream: unzipper.Entry;
|
|
7
|
+
}, void, unknown>;
|
|
8
|
+
static getEntries(data: Buffer): {
|
|
9
|
+
name: string;
|
|
10
|
+
data: Buffer<ArrayBufferLike>;
|
|
11
|
+
}[];
|
|
12
|
+
}
|
|
13
|
+
export default ZipParser;
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { basename } from "node:path";
|
|
2
|
+
import AdmZip from "adm-zip";
|
|
3
|
+
import unzipper from "unzipper";
|
|
4
|
+
class ZipParser {
|
|
5
|
+
static async *createReadStreamsGetterFromEntries(data) {
|
|
6
|
+
const zip = data.pipe(unzipper.Parse({ forceStream: true }));
|
|
7
|
+
data.on("close", () => zip.end());
|
|
8
|
+
for await (const entry of zip) {
|
|
9
|
+
const typedEntry = entry;
|
|
10
|
+
const fileName = entry.path;
|
|
11
|
+
yield {
|
|
12
|
+
name: basename(fileName),
|
|
13
|
+
stream: typedEntry,
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
static getEntries(data) {
|
|
18
|
+
const zip = new AdmZip(data);
|
|
19
|
+
const zipEntries = zip.getEntries();
|
|
20
|
+
return zipEntries.map((entry) => {
|
|
21
|
+
const entryData = entry.getData();
|
|
22
|
+
return {
|
|
23
|
+
name: entry.name,
|
|
24
|
+
data: entryData,
|
|
25
|
+
};
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
export default ZipParser;
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const node_stream_1 = require("node:stream");
|
|
4
|
+
const fast_xml_parser_1 = require("fast-xml-parser");
|
|
5
|
+
const allVariationsOfSpaces = /(?:\s| )+/;
|
|
6
|
+
const tabulationsCarriageReturnsAndEscapes = /[\t\n\r]/;
|
|
7
|
+
const notUsedUnicodeStringCodes = /[\u0002\u0003\u200B\u202A\u202B]/;
|
|
8
|
+
class XmlParser {
|
|
9
|
+
parser;
|
|
10
|
+
constructor(options) {
|
|
11
|
+
this.parser = new fast_xml_parser_1.XMLParser({
|
|
12
|
+
trimValues: true,
|
|
13
|
+
numberParseOptions: {
|
|
14
|
+
skipLike: /\d/mu,
|
|
15
|
+
hex: true,
|
|
16
|
+
leadingZeros: true,
|
|
17
|
+
},
|
|
18
|
+
ignoreAttributes: false,
|
|
19
|
+
attributeNamePrefix: "$",
|
|
20
|
+
transformTagName: (tagName) => tagName.toUpperCase(),
|
|
21
|
+
...(options?.alwaysArray && {
|
|
22
|
+
isArray: (_name, jpath, _isLeaf, isAttribute) => isAttribute === undefined && jpath.includes("."),
|
|
23
|
+
}),
|
|
24
|
+
attributeValueProcessor(_attrName, attrValue) {
|
|
25
|
+
if (attrValue === "null") {
|
|
26
|
+
return "";
|
|
27
|
+
}
|
|
28
|
+
return attrValue
|
|
29
|
+
.replaceAll(new RegExp(allVariationsOfSpaces, "gu"), " ")
|
|
30
|
+
.replaceAll(new RegExp(tabulationsCarriageReturnsAndEscapes, "gu"), " ")
|
|
31
|
+
.replaceAll(new RegExp(notUsedUnicodeStringCodes, "gu"), "")
|
|
32
|
+
.trim();
|
|
33
|
+
},
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
static prepareFileContent(content) {
|
|
37
|
+
return content.replaceAll("><<", "><<").replaceAll(">&<", ">&<");
|
|
38
|
+
}
|
|
39
|
+
parse(data, validationOptions) {
|
|
40
|
+
const prepared = XmlParser.prepareFileContent(data.toString());
|
|
41
|
+
return this.parser.parse(prepared, validationOptions);
|
|
42
|
+
}
|
|
43
|
+
createParseReadStream(stream, rowTag, validationOptions) {
|
|
44
|
+
const tagRegex = new RegExp(`<${rowTag} (\n|.)*>`, "gu");
|
|
45
|
+
return {
|
|
46
|
+
on: (_event, listener) => {
|
|
47
|
+
let remainingXmlData = "";
|
|
48
|
+
const dataTransform = new node_stream_1.Transform({
|
|
49
|
+
transform: async (chunk, _encoding, next) => {
|
|
50
|
+
const chunkString = remainingXmlData + chunk;
|
|
51
|
+
const tags = chunkString.match(tagRegex);
|
|
52
|
+
if (!tags) {
|
|
53
|
+
await listener(null);
|
|
54
|
+
return next();
|
|
55
|
+
}
|
|
56
|
+
const xmlData = XmlParser.prepareFileContent(tags.join("\n"));
|
|
57
|
+
const result = this.parser.parse(`<DATA>${xmlData}</DATA>`, validationOptions);
|
|
58
|
+
remainingXmlData = chunkString.replaceAll(tagRegex, "");
|
|
59
|
+
await listener(Array.isArray(result.DATA[rowTag.toUpperCase()])
|
|
60
|
+
? result.DATA[rowTag.toUpperCase()]
|
|
61
|
+
: [result.DATA[rowTag.toUpperCase()]]);
|
|
62
|
+
return next();
|
|
63
|
+
},
|
|
64
|
+
});
|
|
65
|
+
stream.pipe(dataTransform);
|
|
66
|
+
},
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
exports.default = XmlParser;
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const node_path_1 = require("node:path");
|
|
7
|
+
const XmlParser_js_1 = __importDefault(require("./XmlParser.js"));
|
|
8
|
+
const ZipParser_js_1 = __importDefault(require("./ZipParser.js"));
|
|
9
|
+
const XML_EXTENSION = ".xml";
|
|
10
|
+
class XmlZipParser extends XmlParser_js_1.default {
|
|
11
|
+
parseFromZip(data, validationOptions) {
|
|
12
|
+
const entries = ZipParser_js_1.default.getEntries(data);
|
|
13
|
+
const xmlEntries = entries.filter((entry) => (0, node_path_1.extname)(entry.name) === XML_EXTENSION);
|
|
14
|
+
return xmlEntries.map((xmlEntry) => ({
|
|
15
|
+
name: xmlEntry.name,
|
|
16
|
+
parsedData: this.parse(xmlEntry.data, validationOptions),
|
|
17
|
+
}));
|
|
18
|
+
}
|
|
19
|
+
static async *createReadStreamsGetterFromZip(data) {
|
|
20
|
+
const entries = ZipParser_js_1.default.createReadStreamsGetterFromEntries(data);
|
|
21
|
+
for await (const entry of entries) {
|
|
22
|
+
if ((0, node_path_1.extname)(entry.name) === XML_EXTENSION) {
|
|
23
|
+
yield entry;
|
|
24
|
+
}
|
|
25
|
+
else {
|
|
26
|
+
entry.stream.autodrain();
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
async *createParseReadStreamsGetterFromZip(data, rowTag, validationOptions) {
|
|
31
|
+
const xmlEntries = XmlZipParser.createReadStreamsGetterFromZip(data);
|
|
32
|
+
for await (const entry of xmlEntries) {
|
|
33
|
+
yield {
|
|
34
|
+
name: entry.name,
|
|
35
|
+
stream: this.createParseReadStream(entry.stream, rowTag, validationOptions),
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
exports.default = XmlZipParser;
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const node_path_1 = require("node:path");
|
|
7
|
+
const adm_zip_1 = __importDefault(require("adm-zip"));
|
|
8
|
+
const unzipper_1 = __importDefault(require("unzipper"));
|
|
9
|
+
class ZipParser {
|
|
10
|
+
static async *createReadStreamsGetterFromEntries(data) {
|
|
11
|
+
const zip = data.pipe(unzipper_1.default.Parse({ forceStream: true }));
|
|
12
|
+
data.on("close", () => zip.end());
|
|
13
|
+
for await (const entry of zip) {
|
|
14
|
+
const typedEntry = entry;
|
|
15
|
+
const fileName = entry.path;
|
|
16
|
+
yield {
|
|
17
|
+
name: (0, node_path_1.basename)(fileName),
|
|
18
|
+
stream: typedEntry,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
static getEntries(data) {
|
|
23
|
+
const zip = new adm_zip_1.default(data);
|
|
24
|
+
const zipEntries = zip.getEntries();
|
|
25
|
+
return zipEntries.map((entry) => {
|
|
26
|
+
const entryData = entry.getData();
|
|
27
|
+
return {
|
|
28
|
+
name: entry.name,
|
|
29
|
+
data: entryData,
|
|
30
|
+
};
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
exports.default = ZipParser;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.ZipParser = exports.XmlZipParser = exports.XmlParser = void 0;
|
|
7
|
+
var XmlParser_js_1 = require("./XmlParser.js");
|
|
8
|
+
Object.defineProperty(exports, "XmlParser", { enumerable: true, get: function () { return __importDefault(XmlParser_js_1).default; } });
|
|
9
|
+
var XmlZipParser_js_1 = require("./XmlZipParser.js");
|
|
10
|
+
Object.defineProperty(exports, "XmlZipParser", { enumerable: true, get: function () { return __importDefault(XmlZipParser_js_1).default; } });
|
|
11
|
+
var ZipParser_js_1 = require("./ZipParser.js");
|
|
12
|
+
Object.defineProperty(exports, "ZipParser", { enumerable: true, get: function () { return __importDefault(ZipParser_js_1).default; } });
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { ReadStream } from "node:fs";
|
|
2
|
+
import type { Readable } from "node:stream";
|
|
3
|
+
import type { validationOptions as ValidationOptions } from "fast-xml-parser";
|
|
4
|
+
export interface ParseReadStream<T = Record<string, string>> {
|
|
5
|
+
on: (event: "data", listener: (parseXmlData: T[] | null) => Promise<void>) => void;
|
|
6
|
+
}
|
|
7
|
+
export interface IXmlParser {
|
|
8
|
+
parse<T = Record<string, unknown>>(data: Buffer, validationOptions?: ValidationOptions | boolean): T;
|
|
9
|
+
createParseReadStream<T = Record<string, string>>(stream: Readable, rowTag: string, validationOptions?: ValidationOptions | boolean): ParseReadStream<T>;
|
|
10
|
+
}
|
|
11
|
+
export interface IXmlZipParser extends IXmlParser {
|
|
12
|
+
parseFromZip<T = Record<string, unknown>>(data: Buffer, validationOptions?: ValidationOptions | boolean): Array<{
|
|
13
|
+
name: string;
|
|
14
|
+
parsedData: T;
|
|
15
|
+
}>;
|
|
16
|
+
createParseReadStreamsGetterFromZip<T = Record<string, string>>(data: ReadStream, rowTag: string, validationOptions?: ValidationOptions | boolean): AsyncGenerator<{
|
|
17
|
+
name: string;
|
|
18
|
+
stream: ParseReadStream<T>;
|
|
19
|
+
}>;
|
|
20
|
+
}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mkven/xml-parser",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Opinionated XML parsing utilities with ZIP archive support",
|
|
5
5
|
"author": "Damir Manapov",
|
|
6
6
|
"license": "MIT",
|
|
@@ -10,7 +10,8 @@
|
|
|
10
10
|
"exports": {
|
|
11
11
|
".": {
|
|
12
12
|
"types": "./dist/index.d.ts",
|
|
13
|
-
"import": "./dist/index.js"
|
|
13
|
+
"import": "./dist/index.js",
|
|
14
|
+
"require": "./dist/cjs/index.js"
|
|
14
15
|
}
|
|
15
16
|
},
|
|
16
17
|
"files": [
|
|
@@ -35,7 +36,7 @@
|
|
|
35
36
|
"vitest": "^4.0.18"
|
|
36
37
|
},
|
|
37
38
|
"scripts": {
|
|
38
|
-
"build": "tsc",
|
|
39
|
+
"build": "tsc && tsc -p tsconfig.cjs.json",
|
|
39
40
|
"lint": "biome check --write .",
|
|
40
41
|
"typecheck": "tsc --noEmit",
|
|
41
42
|
"test": "vitest run",
|