@pipobscure/xml 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/chars.js ADDED
@@ -0,0 +1,74 @@
1
+ /**
2
+ * @pipobscure/xml — XML character classification
3
+ *
4
+ * All functions operate on numeric Unicode code points (from `charCodeAt`)
5
+ * for maximum performance on the ASCII-heavy content typical of
6
+ * CalDAV / CardDAV payloads.
7
+ *
8
+ * The ranges follow XML 1.0 (fifth edition) §2.2 and the XML Namespaces 1.0
9
+ * specification. The parser itself is tolerant and will not reject documents
10
+ * solely on character-class grounds, but these helpers are used to drive the
11
+ * tokeniser for correct name recognition.
12
+ */
13
+ /** XML whitespace: space, tab, carriage-return, newline. */
14
+ export function isXmlWhitespace(code) {
15
+ return code === 0x20 || code === 0x09 || code === 0x0a || code === 0x0d;
16
+ }
17
+ /**
18
+ * Valid XML NameStartChar (includes `:` so that QNames can be lexed as a
19
+ * single token and split on `:` afterwards).
20
+ *
21
+ * XML 1.0 §2.3 production [4]
22
+ */
23
+ export function isNameStartChar(code) {
24
+ if (code < 0x41) return code === 0x3a || code === 0x5f; // : or _
25
+ if (code <= 0x5a) return true; // A-Z
26
+ if (code < 0x61) return false;
27
+ if (code <= 0x7a) return true; // a-z
28
+ if (code < 0xc0) return false;
29
+ if (code <= 0xd6) return true;
30
+ if (code < 0xd8) return false;
31
+ if (code <= 0xf6) return true;
32
+ if (code < 0xf8) return false;
33
+ if (code <= 0x2ff) return true;
34
+ if (code < 0x370) return false;
35
+ if (code <= 0x37d) return true;
36
+ if (code < 0x37f) return false;
37
+ if (code <= 0x1fff) return true;
38
+ if (code === 0x200c || code === 0x200d) return true;
39
+ if (code < 0x2070) return false;
40
+ if (code <= 0x218f) return true;
41
+ if (code < 0x2c00) return false;
42
+ if (code <= 0x2fef) return true;
43
+ if (code < 0x3001) return false;
44
+ if (code <= 0xd7ff) return true;
45
+ if (code < 0xf900) return false;
46
+ if (code <= 0xfdcf) return true;
47
+ if (code < 0xfdf0) return false;
48
+ if (code <= 0xfffd) return true;
49
+ if (code < 0x10000) return false;
50
+ return code <= 0xeffff;
51
+ }
52
+ /**
53
+ * Valid XML NameChar (superset of NameStartChar).
54
+ *
55
+ * XML 1.0 §2.3 production [4a]
56
+ */
57
+ export function isNameChar(code) {
58
+ if (isNameStartChar(code)) return true;
59
+ if (code === 0x2d || code === 0x2e) return true; // - or .
60
+ if (code >= 0x30 && code <= 0x39) return true; // 0-9
61
+ if (code === 0xb7) return true;
62
+ if (code >= 0x0300 && code <= 0x036f) return true;
63
+ if (code >= 0x203f && code <= 0x2040) return true;
64
+ return false;
65
+ }
66
+ /** ASCII hex digit [0-9A-Fa-f]. */
67
+ export function isHexDigit(code) {
68
+ return (code >= 0x30 && code <= 0x39) || (code >= 0x41 && code <= 0x46) || (code >= 0x61 && code <= 0x66);
69
+ }
70
+ /** ASCII decimal digit [0-9]. */
71
+ export function isDecimalDigit(code) {
72
+ return code >= 0x30 && code <= 0x39;
73
+ }
74
+ //# sourceMappingURL=chars.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chars.js","sourceRoot":"","sources":["../src/chars.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,4DAA4D;AAC5D,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,OAAO,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,IAAI,CAAC;AAC1E,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,IAAI,IAAI,GAAG,IAAI;QAAE,OAAO,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,IAAI,CAAC,CAAC,SAAS;IACjE,IAAI,IAAI,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,CAAC,MAAM;IACrC,IAAI,IAAI,GAAG,IAAI;QAAE,OAAO,KAAK,CAAC;IAC9B,IAAI,IAAI,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,CAAC,MAAM;IACrC,IAAI,IAAI,GAAG,IAAI;QAAE,OAAO,KAAK,CAAC;IAC9B,IAAI,IAAI,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC;IAC9B,IAAI,IAAI,GAAG,IAAI;QAAE,OAAO,KAAK,CAAC;IAC9B,IAAI,IAAI,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC;IAC9B,IAAI,IAAI,GAAG,IAAI;QAAE,OAAO,KAAK,CAAC;IAC9B,IAAI,IAAI,IAAI,KAAK;QAAE,OAAO,IAAI,CAAC;IAC/B,IAAI,IAAI,GAAG,KAAK;QAAE,OAAO,KAAK,CAAC;IAC/B,IAAI,IAAI,IAAI,KAAK;QAAE,OAAO,IAAI,CAAC;IAC/B,IAAI,IAAI,GAAG,KAAK;QAAE,OAAO,KAAK,CAAC;IAC/B,IAAI,IAAI,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAChC,IAAI,IAAI,KAAK,MAAM,IAAI,IAAI,KAAK,MAAM;QAAE,OAAO,IAAI,CAAC;IACpD,IAAI,IAAI,GAAG,MAAM;QAAE,OAAO,KAAK,CAAC;IAChC,IAAI,IAAI,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAChC,IAAI,IAAI,GAAG,MAAM;QAAE,OAAO,KAAK,CAAC;IAChC,IAAI,IAAI,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAChC,IAAI,IAAI,GAAG,MAAM;QAAE,OAAO,KAAK,CAAC;IAChC,IAAI,IAAI,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAChC,IAAI,IAAI,GAAG,MAAM;QAAE,OAAO,KAAK,CAAC;IAChC,IAAI,IAAI,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAChC,IAAI,IAAI,GAAG,MAAM;QAAE,OAAO,KAAK,CAAC;IAChC,IAAI,IAAI,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAChC,IAAI,IAAI,GAAG,OAAO;QAAE,OAAO,KAAK,CAAC;IACjC,OAAO,IAAI,IAAI,OAAO,CAAC;AACzB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,IAAI,eAAe,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACvC,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC,CAAC,SAAS;IAC1D,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,CAAC,MAAM;IACrD,IAAI,IAAI,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IAC/B,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAClD,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAClD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,mCAAmC;AACnC,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,OAAO,CACL,CAAC,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC;QAC9B,CAAC,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC;QAC9B,CAAC,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC,CAC/B,CAAC;AACJ,CAAC;AAED,iCAAiC;AACjC,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC;AACtC,CAAC"}
@@ -0,0 +1,26 @@
1
+ /**
2
+ * @pipobscure/xml
3
+ *
4
+ * A fully capable, forgiving XML parser that produces plain JS objects
5
+ * suitable for JSON serialisation and TypeScript consumption.
6
+ *
7
+ * Quick start
8
+ * ───────────
9
+ * ```ts
10
+ * import { parse } from '@pipobscure/xml';
11
+ *
12
+ * const doc = parse(`<?xml version="1.0"?>
13
+ * <cal:calendar xmlns:cal="urn:ietf:params:xml:ns:caldav">
14
+ * <cal:displayname>My Calendar</cal:displayname>
15
+ * </cal:calendar>`);
16
+ *
17
+ * // doc.type === 'document'
18
+ * // doc.children[0].type === 'xml-declaration'
19
+ * // doc.children[1].type === 'element'
20
+ * ```
21
+ */
22
+ export { parse, ParseError } from './parser.ts';
23
+ export type { NodeType, Node, Attribute, XmlDeclaration, DocumentType, ProcessingInstruction, Comment, CData, Text, Element, Document, ChildNode, DocumentChild, AnyNode } from './types.ts';
24
+ export { isDocument, isElement, isText, isCData, isComment, isProcessingInstruction, isDocumentType, isXmlDeclaration } from './types.ts';
25
+ export { textContent, rootElement, child, requireChild, children, childElements, childElementCount, descendant, descendants, attr } from './query.ts';
26
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAGH,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGhD,YAAY,EACV,QAAQ,EACR,IAAI,EACJ,SAAS,EACT,cAAc,EACd,YAAY,EACZ,qBAAqB,EACrB,OAAO,EACP,KAAK,EACL,IAAI,EACJ,OAAO,EACP,QAAQ,EACR,SAAS,EACT,aAAa,EACb,OAAO,GACR,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,UAAU,EACV,SAAS,EACT,MAAM,EACN,OAAO,EACP,SAAS,EACT,uBAAuB,EACvB,cAAc,EACd,gBAAgB,GACjB,MAAM,YAAY,CAAC;AAGpB,OAAO,EACL,WAAW,EACX,WAAW,EACX,KAAK,EACL,YAAY,EACZ,QAAQ,EACR,aAAa,EACb,iBAAiB,EACjB,UAAU,EACV,WAAW,EACX,IAAI,GACL,MAAM,YAAY,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,27 @@
1
+ /**
2
+ * @pipobscure/xml
3
+ *
4
+ * A fully capable, forgiving XML parser that produces plain JS objects
5
+ * suitable for JSON serialisation and TypeScript consumption.
6
+ *
7
+ * Quick start
8
+ * ───────────
9
+ * ```ts
10
+ * import { parse } from '@pipobscure/xml';
11
+ *
12
+ * const doc = parse(`<?xml version="1.0"?>
13
+ * <cal:calendar xmlns:cal="urn:ietf:params:xml:ns:caldav">
14
+ * <cal:displayname>My Calendar</cal:displayname>
15
+ * </cal:calendar>`);
16
+ *
17
+ * // doc.type === 'document'
18
+ * // doc.children[0].type === 'xml-declaration'
19
+ * // doc.children[1].type === 'element'
20
+ * ```
21
+ */
22
+ // Parser function and error class
23
+ export { parse, ParseError } from './parser.js';
24
+ export { isDocument, isElement, isText, isCData, isComment, isProcessingInstruction, isDocumentType, isXmlDeclaration } from './types.js';
25
+ // Tree-query helpers
26
+ export { textContent, rootElement, child, requireChild, children, childElements, childElementCount, descendant, descendants, attr } from './query.js';
27
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,kCAAkC;AAClC,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAoBhD,OAAO,EACL,UAAU,EACV,SAAS,EACT,MAAM,EACN,OAAO,EACP,SAAS,EACT,uBAAuB,EACvB,cAAc,EACd,gBAAgB,GACjB,MAAM,YAAY,CAAC;AAEpB,qBAAqB;AACrB,OAAO,EACL,WAAW,EACX,WAAW,EACX,KAAK,EACL,YAAY,EACZ,QAAQ,EACR,aAAa,EACb,iBAAiB,EACjB,UAAU,EACV,WAAW,EACX,IAAI,GACL,MAAM,YAAY,CAAC"}
@@ -0,0 +1,52 @@
1
+ /**
2
+ * @pipobscure/xml — Recursive-descent XML parser
3
+ *
4
+ * Design goals
5
+ * ─────────────
6
+ * • Correctness for well-formed XML (CalDAV, CardDAV, Atom, WebDAV …)
7
+ * • Tolerant / forgiving: recovers from many real-world XML quirks instead
8
+ * of aborting with an error. The aim is to parse what servers actually send,
9
+ * not only what the spec mandates.
10
+ * • Optimised for small documents (< ~1 MB) — no streaming, no SAX.
11
+ * • Pure TypeScript, zero dependencies, plain-object output (JSON-safe).
12
+ *
13
+ * Tolerance specifics
14
+ * ────────────────────
15
+ * • Unknown named entity references (e.g. `&nbsp;`) are left verbatim
16
+ * (`&nbsp;`) rather than causing an error.
17
+ * • Undefined namespace prefixes resolve to `null` rather than throwing.
18
+ * • `--` inside comments is allowed (browsers are lenient here too).
19
+ * • Missing XML declaration is fine.
20
+ * • Attribute values may use either quote style.
21
+ * • DOCTYPE internal subsets are captured verbatim, not validated.
22
+ * • The BOM (U+FEFF) at the start of the stream is silently skipped.
23
+ */
24
+ import type { Document } from './types.ts';
25
+ /**
26
+ * Thrown when the input is so malformed that the parser cannot produce a
27
+ * meaningful tree. In practice the parser tries hard to recover, so only
28
+ * truly unrecoverable situations (e.g. no root element found) reach here.
29
+ */
30
+ export declare class ParseError extends Error {
31
+ /** Byte offset in the source string where the problem was detected. */
32
+ readonly position: number;
33
+ /** 1-based line number. */
34
+ readonly line: number;
35
+ /** 1-based column number. */
36
+ readonly column: number;
37
+ constructor(message: string, position: number, line: number, column: number);
38
+ }
39
+ /**
40
+ * Parses an XML string into a `Document` tree of plain JS objects.
41
+ *
42
+ * The parser is deliberately forgiving:
43
+ * - Unknown entity references are preserved verbatim.
44
+ * - Undeclared namespace prefixes resolve to `null`.
45
+ * - Minor structural quirks (missing closing quotes, stray characters in
46
+ * element tags, unterminated comments) are recovered from where possible.
47
+ *
48
+ * @throws {ParseError} Only for unrecoverable structural failures such as a
49
+ * completely absent root element.
50
+ */
51
+ export declare function parse(xml: string): Document;
52
+ //# sourceMappingURL=parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,KAAK,EACV,QAAQ,EAWT,MAAM,YAAY,CAAC;AA6BpB;;;;GAIG;AACH,qBAAa,UAAW,SAAQ,KAAK;IACnC,uEAAuE;IACvE,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,2BAA2B;IAC3B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,6BAA6B;IAC7B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;gBAEZ,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;CAO5E;AAytBD;;;;;;;;;;;GAWG;AACH,wBAAgB,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,QAAQ,CAE3C"}