@miy2/xml-api 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE.md ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright 2026 Kazuya Miyashita
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,94 @@
1
+ # XML API
2
+
3
+ An XML synchronization engine that maintains full fidelity between source code and the Document Object Model (DOM).
4
+
5
+ This project provides a foundational XML parser and manipulation API designed for WYSIWYG editors and Integrated Development Environments (IDEs). It features a DOM-compatible interface that bidirectionally synchronizes intuitive application edits and source code modifications, all while preserving details like whitespace and indentation.
6
+
7
+ ## Key Features
8
+
9
+ - **Full Fidelity**: Edits preserve all whitespace, indentation, and comments automatically in unmodified parts of the code.
10
+ - **Bidirectional Sync**: Instantly synchronizes changes between the source code and the in-memory model, ensuring consistent state across operations.
11
+ - **DOM Compatibility**: Provides a familiar interface (`Element`, `Document`, `setAttribute`, etc.) for intuitive application development.
12
+ - **Incremental Updates**: High performance through incremental parsing and minimal text patching.
13
+
14
+ ## Architecture
15
+
16
+
17
+
18
+ The system is built on a robust synchronization engine orchestrated by the **XMLAPI**:
19
+
20
+
21
+
22
+ 1. **SyncEngine**: The core engine that manages state via Transactions and orchestrates updates.
23
+
24
+ 2. **CST (Concrete Syntax Tree)**: Captures the exact physical structure of the source code, including formatting.
25
+
26
+ 3. **Model**: The authoritative internal representation that maintains object identity, coordinates synchronization, and provides a semantic view for traversal.
27
+
28
+
29
+
30
+ Additionally, a **DOM-compatible Interface** wraps the Model for familiar application development.
31
+
32
+ ## Basic Usage
33
+
34
+ ### Initialization
35
+
36
+ ```typescript
37
+ import { XMLAPI } from '@miy2/xml-api';
38
+
39
+ const xml = `<root>
40
+ <item id="1">Original Value</item>
41
+ </root>`;
42
+
43
+ const api = new XMLAPI(xml);
44
+ ```
45
+
46
+ ### Manipulating via DOM API (Recommended)
47
+
48
+ You can use standard DOM methods to manipulate the XML. These changes are automatically reflected back to the source code with minimal patches.
49
+
50
+ ```typescript
51
+ const doc = api.getDocument(); // Returns a DOM-like Document
52
+ const item = doc.querySelector('item');
53
+
54
+ if (item) {
55
+ item.setAttribute('status', 'active');
56
+ item.textContent = 'Updated Value';
57
+ }
58
+
59
+ console.log(api.input);
60
+ /*
61
+ Output:
62
+ <root>
63
+ <item id="1" status="active">Updated Value</item>
64
+ </root>
65
+ */
66
+ ```
67
+
68
+ ### Low-level Incremental Updates
69
+
70
+ ```typescript
71
+ // Update the source code directly at specific offsets
72
+ api.updateInput(14, 28, "New Content");
73
+ ```
74
+
75
+ ## Documentation
76
+
77
+ - **[Getting Started](docs/guide/getting-started.md)**: Installation and detailed usage.
78
+ - **[Architecture](docs/architecture/overview.md)**: Deep dive into the system design.
79
+ - **[Core Concepts](docs/guide/core-concepts.md)**: Understanding Fidelity and the Layered model.
80
+ - **[API Reference](docs/api/reference/README.md)**: Auto-generated API documentation.
81
+
82
+ ## Development
83
+
84
+ This project uses [pnpm](https://pnpm.io/).
85
+
86
+ ```bash
87
+ pnpm install
88
+ pnpm test
89
+ pnpm docs:gen-api
90
+ ```
91
+
92
+ ## License
93
+
94
+ [MIT](LICENSE.md)
@@ -0,0 +1,13 @@
1
+ import type { Transaction } from "../engine/transaction";
2
+ /**
3
+ * Interface for connecting the SyncEngine to an external collaboration system (e.g., Yjs, Automerge).
4
+ */
5
+ export interface CollabBridge {
6
+ /**
7
+ * Called by SyncEngine when a local transaction is successfully dispatched.
8
+ * The bridge implementations should convert this transaction into CRDT operations and broadcast them.
9
+ *
10
+ * @param tr The committed transaction.
11
+ */
12
+ receiveLocalTransaction(tr: Transaction): void;
13
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,93 @@
1
+ import type { CST } from "./xml-cst";
2
+ /**
3
+ * A function that performs semantic or contextual validation on a parsed CST node.
4
+ *
5
+ * It is used to enforce rules that cannot be easily expressed by the grammar itself,
6
+ * such as matching start and end tag names or ensuring attribute uniqueness.
7
+ * If it returns false, the node's `wellFormed` flag is set to false, but the
8
+ * parsing process continues.
9
+ */
10
+ export type Validator = (node: CST, input: string) => boolean;
11
+ /**
12
+ * Fundamental building blocks of the grammar.
13
+ * These can be combined to represent various syntax structures equivalent to EBNF.
14
+ */
15
+ export type Expression =
16
+ /** Matches a specific exact string. */
17
+ {
18
+ type: "Literal";
19
+ value: string;
20
+ }
21
+ /** Matches a regular expression pattern. */
22
+ | {
23
+ type: "RegExpMatch";
24
+ pattern: string;
25
+ }
26
+ /** Matches multiple expressions in order (AND). */
27
+ | {
28
+ type: "Sequence";
29
+ expressions: Expression[];
30
+ }
31
+ /** Matches any one of the provided expressions (OR). */
32
+ | {
33
+ type: "Choice";
34
+ expressions: Expression[];
35
+ }
36
+ /** Matches an expression repeated a specified number of times. */
37
+ | {
38
+ type: "Repeat";
39
+ expression: Expression;
40
+ min: number;
41
+ max: number;
42
+ }
43
+ /** Matches expression A, provided that expression B does not match (negation/exclusion). */
44
+ | {
45
+ type: "Exclusion";
46
+ a: Expression;
47
+ b: Expression;
48
+ }
49
+ /** Invokes another named rule (used for recursive definitions). */
50
+ | {
51
+ type: "Reference";
52
+ name: string;
53
+ };
54
+ /** Defines a literal string match. */
55
+ export declare const lit: (value: string) => Expression;
56
+ /** Defines a regular expression match. */
57
+ export declare const reg: (pattern: string) => Expression;
58
+ /** Defines a sequence of matches in order. */
59
+ export declare const seq: (...expressions: Expression[]) => Expression;
60
+ /** Defines a choice between multiple alternatives. */
61
+ export declare const alt: (...expressions: Expression[]) => Expression;
62
+ /** Zero or more repetitions (equivalent to `*` in EBNF). */
63
+ export declare const rep: (expression: Expression) => Expression;
64
+ /** One or more repetitions (equivalent to `+` in EBNF). */
65
+ export declare const plus: (expression: Expression) => Expression;
66
+ /** Zero or one occurrence (equivalent to `?` in EBNF, optional). */
67
+ export declare const opt: (expression: Expression) => Expression;
68
+ /** Matches A but excludes B (e.g., matching PITarget as a Name excluding "xml"). */
69
+ export declare const exc: (a: Expression, b: Expression) => Expression;
70
+ /** References another rule by its name. */
71
+ export declare const ref: (name: string) => Expression;
72
+ export declare class Grammar {
73
+ readonly rules: {
74
+ [key: string]: Expression;
75
+ };
76
+ readonly validators: {
77
+ [key: string]: Validator;
78
+ };
79
+ readonly rootRule: string;
80
+ constructor(rules: {
81
+ [key: string]: Expression;
82
+ }, validators: {
83
+ [key: string]: Validator;
84
+ }, rootRule: string);
85
+ }
86
+ export declare class GrammarBuilder {
87
+ private rules;
88
+ private validators;
89
+ private rootRule;
90
+ rule(name: string, expression: Expression): void;
91
+ verifyRule(name: string, validator: Validator): void;
92
+ build(rootRule?: string): Grammar;
93
+ }
@@ -0,0 +1,95 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.GrammarBuilder = exports.Grammar = exports.ref = exports.exc = exports.opt = exports.plus = exports.rep = exports.alt = exports.seq = exports.reg = exports.lit = void 0;
4
+ // Combinators
5
+ /** Defines a literal string match. */
6
+ const lit = (value) => ({ type: "Literal", value });
7
+ exports.lit = lit;
8
+ /** Defines a regular expression match. */
9
+ const reg = (pattern) => ({
10
+ type: "RegExpMatch",
11
+ pattern,
12
+ });
13
+ exports.reg = reg;
14
+ /** Defines a sequence of matches in order. */
15
+ const seq = (...expressions) => ({
16
+ type: "Sequence",
17
+ expressions,
18
+ });
19
+ exports.seq = seq;
20
+ /** Defines a choice between multiple alternatives. */
21
+ const alt = (...expressions) => ({
22
+ type: "Choice",
23
+ expressions,
24
+ });
25
+ exports.alt = alt;
26
+ /** Zero or more repetitions (equivalent to `*` in EBNF). */
27
+ const rep = (expression) => ({
28
+ type: "Repeat",
29
+ expression,
30
+ min: 0,
31
+ max: Infinity,
32
+ });
33
+ exports.rep = rep;
34
+ /** One or more repetitions (equivalent to `+` in EBNF). */
35
+ const plus = (expression) => ({
36
+ type: "Repeat",
37
+ expression,
38
+ min: 1,
39
+ max: Infinity,
40
+ });
41
+ exports.plus = plus;
42
+ /** Zero or one occurrence (equivalent to `?` in EBNF, optional). */
43
+ const opt = (expression) => ({
44
+ type: "Repeat",
45
+ expression,
46
+ min: 0,
47
+ max: 1,
48
+ });
49
+ exports.opt = opt;
50
+ /** Matches A but excludes B (e.g., matching PITarget as a Name excluding "xml"). */
51
+ const exc = (a, b) => ({
52
+ type: "Exclusion",
53
+ a,
54
+ b,
55
+ });
56
+ exports.exc = exc;
57
+ /** References another rule by its name. */
58
+ const ref = (name) => ({ type: "Reference", name });
59
+ exports.ref = ref;
60
+ class Grammar {
61
+ constructor(rules, validators, rootRule) {
62
+ this.rules = rules;
63
+ this.validators = validators;
64
+ this.rootRule = rootRule;
65
+ }
66
+ }
67
+ exports.Grammar = Grammar;
68
+ class GrammarBuilder {
69
+ constructor() {
70
+ this.rules = {};
71
+ this.validators = {};
72
+ this.rootRule = null;
73
+ }
74
+ rule(name, expression) {
75
+ if (this.rootRule === null) {
76
+ this.rootRule = name;
77
+ }
78
+ this.rules[name] = expression;
79
+ }
80
+ verifyRule(name, validator) {
81
+ if (!this.rules[name]) {
82
+ throw new Error(`Rule ${name} not found`);
83
+ }
84
+ this.validators[name] = validator;
85
+ }
86
+ build(rootRule) {
87
+ const root = rootRule || this.rootRule;
88
+ if (!root) {
89
+ throw new Error("No root rule defined");
90
+ }
91
+ // Return an immutable Grammar instance with a shallow copy of the definitions
92
+ return new Grammar({ ...this.rules }, { ...this.validators }, root);
93
+ }
94
+ }
95
+ exports.GrammarBuilder = GrammarBuilder;
@@ -0,0 +1,23 @@
1
+ import type { Grammar } from "./grammar";
2
+ import { CST } from "./xml-cst";
3
+ export declare class Parser {
4
+ grammar: Grammar;
5
+ constructor(grammar: Grammar);
6
+ parse(input: string, rootRule?: string): CST | null;
7
+ /**
8
+ * Parses the input starting from a specific position using a given rule.
9
+ * Useful for incremental parsing.
10
+ */
11
+ parseAt(input: string, pos: number, ruleName: string): {
12
+ node: CST;
13
+ end: number;
14
+ } | null;
15
+ private execute;
16
+ private execLiteral;
17
+ private execRegExp;
18
+ private execSequence;
19
+ private execChoice;
20
+ private execRepeat;
21
+ private execExclusion;
22
+ private execReference;
23
+ }
@@ -0,0 +1,161 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.Parser = void 0;
4
+ const xml_cst_1 = require("./xml-cst");
5
+ class Parser {
6
+ constructor(grammar) {
7
+ this.grammar = grammar;
8
+ }
9
+ parse(input, rootRule) {
10
+ const startRule = rootRule || this.grammar.rootRule;
11
+ const ctx = { input, pos: 0, grammar: this.grammar };
12
+ // Treat the entry point as a Reference to the root rule.
13
+ // This ensures consistent behavior (validation, node type naming) with internal references.
14
+ const rootExpr = { type: "Reference", name: startRule };
15
+ const result = this.execute(rootExpr, ctx);
16
+ // Ensure the entire input is consumed and the result is well-formed
17
+ if (result && ctx.pos === input.length) {
18
+ // Note: Top-level validation is already handled by executeReference if rootExpr is a Reference.
19
+ // If result.wellFormed is false, we still return it, but the caller can check the flag.
20
+ return result;
21
+ }
22
+ return null;
23
+ }
24
+ /**
25
+ * Parses the input starting from a specific position using a given rule.
26
+ * Useful for incremental parsing.
27
+ */
28
+ parseAt(input, pos, ruleName) {
29
+ const ctx = { input, pos, grammar: this.grammar };
30
+ const rootExpr = { type: "Reference", name: ruleName };
31
+ const result = this.execute(rootExpr, ctx);
32
+ if (result) {
33
+ return { node: result, end: ctx.pos };
34
+ }
35
+ return null;
36
+ }
37
+ execute(expr, ctx) {
38
+ switch (expr.type) {
39
+ case "Literal":
40
+ return this.execLiteral(expr, ctx);
41
+ case "RegExpMatch":
42
+ return this.execRegExp(expr, ctx);
43
+ case "Sequence":
44
+ return this.execSequence(expr, ctx);
45
+ case "Choice":
46
+ return this.execChoice(expr, ctx);
47
+ case "Repeat":
48
+ return this.execRepeat(expr, ctx);
49
+ case "Exclusion":
50
+ return this.execExclusion(expr, ctx);
51
+ case "Reference":
52
+ return this.execReference(expr, ctx);
53
+ }
54
+ }
55
+ execLiteral(expr, ctx) {
56
+ if (ctx.input.startsWith(expr.value, ctx.pos)) {
57
+ const start = ctx.pos;
58
+ ctx.pos += expr.value.length;
59
+ return new xml_cst_1.CST("literal", undefined, start, ctx.pos);
60
+ }
61
+ return null;
62
+ }
63
+ execRegExp(expr, ctx) {
64
+ const regex = new RegExp(expr.pattern, "y");
65
+ regex.lastIndex = ctx.pos;
66
+ const match = regex.exec(ctx.input);
67
+ if (match) {
68
+ const start = ctx.pos;
69
+ ctx.pos += match[0].length;
70
+ return new xml_cst_1.CST("regex", undefined, start, ctx.pos);
71
+ }
72
+ return null;
73
+ }
74
+ execSequence(expr, ctx) {
75
+ const startPos = ctx.pos;
76
+ const children = [];
77
+ let wellFormed = true;
78
+ for (const childExpr of expr.expressions) {
79
+ const result = this.execute(childExpr, ctx);
80
+ if (result === null) {
81
+ ctx.pos = startPos;
82
+ return null;
83
+ }
84
+ children.push(result);
85
+ if (!result.wellFormed) {
86
+ wellFormed = false;
87
+ }
88
+ }
89
+ return new xml_cst_1.CST("sequence", undefined, startPos, ctx.pos, children, wellFormed);
90
+ }
91
+ execChoice(expr, ctx) {
92
+ for (const childExpr of expr.expressions) {
93
+ const startPos = ctx.pos;
94
+ const result = this.execute(childExpr, ctx);
95
+ if (result !== null) {
96
+ return result;
97
+ }
98
+ ctx.pos = startPos;
99
+ }
100
+ return null;
101
+ }
102
+ execRepeat(expr, ctx) {
103
+ const startPos = ctx.pos;
104
+ const children = [];
105
+ let count = 0;
106
+ let wellFormed = true;
107
+ while (count < expr.max) {
108
+ const checkpoint = ctx.pos;
109
+ const result = this.execute(expr.expression, ctx);
110
+ if (result === null || ctx.pos === checkpoint)
111
+ break;
112
+ children.push(result);
113
+ if (!result.wellFormed) {
114
+ wellFormed = false;
115
+ }
116
+ count++;
117
+ }
118
+ if (count < expr.min) {
119
+ ctx.pos = startPos;
120
+ return null;
121
+ }
122
+ return new xml_cst_1.CST("repeat", undefined, startPos, ctx.pos, children, wellFormed);
123
+ }
124
+ execExclusion(expr, ctx) {
125
+ const startPos = ctx.pos;
126
+ const resultA = this.execute(expr.a, ctx);
127
+ if (resultA === null)
128
+ return null;
129
+ const endPosA = ctx.pos;
130
+ ctx.pos = startPos;
131
+ const resultB = this.execute(expr.b, ctx);
132
+ // Check if B matches and is at least as long as A
133
+ if (resultB !== null &&
134
+ startPos + (resultB.end - resultB.start) >= endPosA) {
135
+ ctx.pos = startPos;
136
+ return null;
137
+ }
138
+ ctx.pos = endPosA;
139
+ return resultA;
140
+ }
141
+ execReference(expr, ctx) {
142
+ const rule = ctx.grammar.rules[expr.name];
143
+ if (!rule)
144
+ return null; // Should not happen if grammar is well-defined, or could throw error
145
+ const result = this.execute(rule, ctx);
146
+ if (result === null)
147
+ return null;
148
+ // Always wrap the result to preserve the rule name in the hierarchy.
149
+ const node = new xml_cst_1.CST(result.type, expr.name, result.start, result.end, [result], result.wellFormed);
150
+ // Apply validation
151
+ const validator = ctx.grammar.validators[expr.name];
152
+ if (validator) {
153
+ // Pass only necessary context (input string) to the validator
154
+ if (!validator(node, ctx.input)) {
155
+ node.wellFormed = false;
156
+ }
157
+ }
158
+ return node;
159
+ }
160
+ }
161
+ exports.Parser = Parser;
@@ -0,0 +1,88 @@
1
+ /**
2
+ * Represents a node in the Concrete Syntax Tree (Parse Tree).
3
+ * Each node corresponds to a match of a grammatical structure or rule.
4
+ */
5
+ export declare class CST {
6
+ /**
7
+ * The type of the grammatical structure matched.
8
+ * Common values include "literal", "regex", "sequence", "repeat".
9
+ * This describes the structural nature of the match, not the grammar rule name.
10
+ */
11
+ type: string;
12
+ /**
13
+ * The name of the grammar rule corresponding to this node (e.g., "element", "attribute").
14
+ * Defined only if this node represents a named rule reference; otherwise undefined.
15
+ */
16
+ name: string | undefined;
17
+ /**
18
+ * The 0-based starting index of this node in the entire input string (inclusive).
19
+ */
20
+ start: number;
21
+ /**
22
+ * The 0-based ending index of this node in the entire input string (exclusive).
23
+ * The length of the match is (end - start).
24
+ */
25
+ end: number;
26
+ /**
27
+ * Child nodes contained within this structure.
28
+ * Empty for leaf nodes like literals or regex matches.
29
+ */
30
+ children: CST[];
31
+ /**
32
+ * Indicates whether the node satisfies additional validation logic beyond basic parsing.
33
+ * If false, the node was parsed successfully according to the grammar structure
34
+ * but failed a semantic or contextual validation check.
35
+ */
36
+ wellFormed: boolean;
37
+ /**
38
+ * Reference to the parent node in the syntax tree.
39
+ * Null if this is the root node.
40
+ */
41
+ parent: CST | null;
42
+ constructor(
43
+ /**
44
+ * The type of the grammatical structure matched.
45
+ * Common values include "literal", "regex", "sequence", "repeat".
46
+ * This describes the structural nature of the match, not the grammar rule name.
47
+ */
48
+ type: string,
49
+ /**
50
+ * The name of the grammar rule corresponding to this node (e.g., "element", "attribute").
51
+ * Defined only if this node represents a named rule reference; otherwise undefined.
52
+ */
53
+ name: string | undefined,
54
+ /**
55
+ * The 0-based starting index of this node in the entire input string (inclusive).
56
+ */
57
+ start: number,
58
+ /**
59
+ * The 0-based ending index of this node in the entire input string (exclusive).
60
+ * The length of the match is (end - start).
61
+ */
62
+ end: number,
63
+ /**
64
+ * Child nodes contained within this structure.
65
+ * Empty for leaf nodes like literals or regex matches.
66
+ */
67
+ children?: CST[],
68
+ /**
69
+ * Indicates whether the node satisfies additional validation logic beyond basic parsing.
70
+ * If false, the node was parsed successfully according to the grammar structure
71
+ * but failed a semantic or contextual validation check.
72
+ */
73
+ wellFormed?: boolean);
74
+ /**
75
+ * Retrieves the substring matching this node from the entire original input string.
76
+ */
77
+ getText(input: string): string;
78
+ /**
79
+ * Shifts the start and end positions of this node and its children.
80
+ * @param pos The position where the change occurred.
81
+ * @param delta The change in length.
82
+ */
83
+ shift(pos: number, delta: number): void;
84
+ /**
85
+ * Unwraps single-child Reference nodes to find the underlying structural node.
86
+ */
87
+ unwrap(): CST;
88
+ }