toon-formatter 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/json.js ADDED
@@ -0,0 +1,297 @@
1
+ /**
2
+ * JSON ↔ TOON Converter
3
+ */
4
+
5
+ import { formatValue, parseValue, splitByDelimiter } from './utils.js';
6
+
7
+ /**
8
+ * Converts JSON to TOON format
9
+ * @param {*} data - JSON data to convert
10
+ * @param {string} key - Current key name (for recursion)
11
+ * @param {number} depth - Current indentation depth
12
+ * @returns {string} TOON formatted string
13
+ */
14
+ export function jsonToToon(data, key = '', depth = 0) {
15
+ const indent = ' '.repeat(depth);
16
+ const nextIndent = ' '.repeat(depth + 1);
17
+
18
+ // ---- Primitive ----
19
+ if (data === null || typeof data !== 'object') {
20
+ return `${indent}${key}: ${formatValue(data)}`;
21
+ }
22
+
23
+ // ---- Array ----
24
+ if (Array.isArray(data)) {
25
+ const length = data.length;
26
+
27
+ // Empty array
28
+ if (length === 0) {
29
+ return `${indent}${key}[0]:`;
30
+ }
31
+
32
+ // Array of primitives
33
+ if (typeof data[0] !== 'object' || data[0] === null) {
34
+ const values = data.map(v => formatValue(v)).join(', ');
35
+ return `${indent}${key}[${length}]: ${values}`;
36
+ }
37
+
38
+ // ---- Array of objects ----
39
+
40
+ // Determine if all fields in object are primitives
41
+ const firstObj = data[0];
42
+ const fields = Object.keys(firstObj);
43
+ const isTabular = data.every(row =>
44
+ fields.every(f =>
45
+ row[f] === null ||
46
+ ['string', 'number', 'boolean'].includes(typeof row[f])
47
+ )
48
+ );
49
+
50
+ // ---- TABULAR ARRAY (structured array) ----
51
+ if (isTabular) {
52
+ const header = fields.join(',');
53
+ const lines = [];
54
+ lines.push(`${indent}${key}[${length}]{${header}}:`);
55
+
56
+ data.forEach(row => {
57
+ const rowVals = fields.map(f => formatValue(row[f]));
58
+ lines.push(`${nextIndent}${rowVals.join(',')}`);
59
+ });
60
+
61
+ return lines.join('\n');
62
+ }
63
+
64
+ // ---- YAML-STYLE ARRAY (nested objects present) ----
65
+ const lines = [];
66
+ lines.push(`${indent}${key}[${length}]:`);
67
+
68
+ data.forEach(row => {
69
+ lines.push(`${nextIndent}-`); // item marker
70
+ for (const f of fields) {
71
+ const child = row[f];
72
+ const block = jsonToToon(child, f, depth + 2);
73
+ lines.push(block);
74
+ }
75
+ });
76
+
77
+ return lines.join('\n');
78
+ }
79
+
80
+ // ---- Object ----
81
+ const lines = [];
82
+
83
+ if (key) lines.push(`${indent}${key}:`);
84
+
85
+ for (const childKey in data) {
86
+ if (Object.prototype.hasOwnProperty.call(data, childKey)) {
87
+ const child = data[childKey];
88
+ const block = jsonToToon(child, childKey, depth + 1);
89
+ lines.push(block);
90
+ }
91
+ }
92
+
93
+ return lines.join('\n');
94
+ }
95
+
96
+ /**
97
+ * Converts TOON to JSON format
98
+ * @param {string} toonString - TOON formatted string
99
+ * @returns {*} Parsed JSON data
100
+ */
101
+ export function toonToJson(toonString) {
102
+ const lines = toonString.split('\n');
103
+ let root = {};
104
+ let stack = [];
105
+
106
+ // Pre-process: Check for Root Array or Root Primitive
107
+ const firstLine = lines.find(l => l.trim() !== '');
108
+ if (!firstLine) return {}; // Empty document
109
+
110
+ // Root Array detection: [N]... at start of line
111
+ if (firstLine.trim().startsWith('[')) {
112
+ root = [];
113
+ stack.push({ obj: root, indent: 0, isRootArray: true });
114
+ } else {
115
+ stack.push({ obj: root, indent: -1 }); // Root object container
116
+ }
117
+
118
+ // State for tabular arrays
119
+ let tabularHeaders = null;
120
+ let tabularTarget = null;
121
+ let tabularIndent = -1;
122
+ let tabularDelimiter = ',';
123
+
124
+ for (let i = 0; i < lines.length; i++) {
125
+ const line = lines[i];
126
+ if (line.trim() === '') continue;
127
+
128
+ const indentMatch = line.match(/^(\s*)/);
129
+ const indent = indentMatch ? indentMatch[1].length : 0;
130
+ const trimmed = line.trim();
131
+
132
+ // --- Tabular Data Handling ---
133
+ if (tabularTarget) {
134
+ if (tabularIndent === -1) {
135
+ if (indent > stack[stack.length - 1].indent) {
136
+ tabularIndent = indent;
137
+ } else {
138
+ tabularTarget = null;
139
+ tabularHeaders = null;
140
+ }
141
+ }
142
+
143
+ if (tabularTarget && indent === tabularIndent) {
144
+ const values = splitByDelimiter(trimmed, tabularDelimiter).map(parseValue);
145
+ const rowObj = {};
146
+ tabularHeaders.forEach((h, idx) => {
147
+ rowObj[h] = values[idx];
148
+ });
149
+ tabularTarget.push(rowObj);
150
+ continue;
151
+ } else if (tabularTarget && indent < tabularIndent) {
152
+ tabularTarget = null;
153
+ tabularHeaders = null;
154
+ tabularIndent = -1;
155
+ }
156
+ }
157
+
158
+ // Adjust stack based on indentation
159
+ while (stack.length > 1 && stack[stack.length - 1].indent >= indent) {
160
+ stack.pop();
161
+ }
162
+
163
+ let parent = stack[stack.length - 1].obj;
164
+
165
+ // Root Array Header check
166
+ if (stack.length === 1 && stack[0].isRootArray && trimmed.startsWith('[')) {
167
+ const rootHeaderMatch = trimmed.match(/^\[(\d+)(.*?)\](?:\{(.*?)\})?:\s*(.*)$/);
168
+ if (rootHeaderMatch) {
169
+ const delimChar = rootHeaderMatch[2];
170
+ const fieldsStr = rootHeaderMatch[3];
171
+
172
+ let delimiter = ',';
173
+ if (delimChar === '\\t') delimiter = '\t';
174
+ else if (delimChar === '|') delimiter = '|';
175
+
176
+ if (fieldsStr) {
177
+ tabularHeaders = fieldsStr.split(',').map(s => s.trim());
178
+ tabularTarget = root;
179
+ tabularIndent = -1;
180
+ tabularDelimiter = delimiter;
181
+ }
182
+ }
183
+ continue;
184
+ }
185
+
186
+ // --- List Item Handling (-) ---
187
+ if (trimmed.startsWith('-')) {
188
+ const content = trimmed.slice(1).trim();
189
+
190
+ if (content === '') {
191
+ const newObj = {};
192
+ parent.push(newObj);
193
+ stack.push({ obj: newObj, indent: indent });
194
+ continue;
195
+ } else {
196
+ const kvMatch = content.match(/^(.+?):\s*(.*)$/);
197
+ const arrayMatch = content.match(/^\[(\d+)(.*?)\](?:\{(.*?)\})?:\s*(.*)$/);
198
+
199
+ if (arrayMatch) {
200
+ const length = parseInt(arrayMatch[1], 10);
201
+ const delimChar = arrayMatch[2] || ',';
202
+ const delimiter = delimChar === '\\t' ? '\t' : (delimChar === '|' ? '|' : ',');
203
+ const fieldsStr = arrayMatch[3];
204
+ const rest = arrayMatch[4];
205
+
206
+ const newArray = [];
207
+ parent.push(newArray);
208
+
209
+ if (fieldsStr) {
210
+ tabularHeaders = fieldsStr.split(',').map(s => s.trim());
211
+ tabularTarget = newArray;
212
+ tabularIndent = -1;
213
+ tabularDelimiter = delimiter;
214
+ } else if (rest) {
215
+ const values = splitByDelimiter(rest, delimiter).map(parseValue);
216
+ newArray.push(...values);
217
+ } else {
218
+ stack.push({ obj: newArray, indent: indent + 1 });
219
+ }
220
+ continue;
221
+ }
222
+
223
+ if (kvMatch) {
224
+ const key = kvMatch[1].trim();
225
+ const valStr = kvMatch[2].trim();
226
+ const newObj = {};
227
+
228
+ if (valStr === '') {
229
+ newObj[key] = {};
230
+ parent.push(newObj);
231
+ stack.push({ obj: newObj[key], indent: indent + 1 });
232
+ } else {
233
+ newObj[key] = parseValue(valStr);
234
+ parent.push(newObj);
235
+ stack.push({ obj: newObj, indent: indent });
236
+ }
237
+ continue;
238
+ }
239
+
240
+ parent.push(parseValue(content));
241
+ continue;
242
+ }
243
+ }
244
+
245
+ // --- Key-Value or Array Header Handling ---
246
+ const arrayHeaderMatch = trimmed.match(/^(.+?)\[(\d+)(.*?)\](?:\{(.*?)\})?:\s*(.*)$/);
247
+
248
+ if (arrayHeaderMatch) {
249
+ const key = arrayHeaderMatch[1].trim();
250
+ const length = parseInt(arrayHeaderMatch[2], 10);
251
+ const delimChar = arrayHeaderMatch[3];
252
+ const fieldsStr = arrayHeaderMatch[4];
253
+ const valueStr = arrayHeaderMatch[5];
254
+
255
+ let delimiter = ',';
256
+ if (delimChar === '\\t') delimiter = '\t';
257
+ else if (delimChar === '|') delimiter = '|';
258
+
259
+ const newArray = [];
260
+
261
+ if (!Array.isArray(parent)) {
262
+ parent[key] = newArray;
263
+ }
264
+
265
+ if (fieldsStr) {
266
+ tabularHeaders = fieldsStr.split(',').map(s => s.trim());
267
+ tabularTarget = newArray;
268
+ tabularIndent = -1;
269
+ tabularDelimiter = delimiter;
270
+ } else if (valueStr && valueStr.trim() !== '') {
271
+ const values = splitByDelimiter(valueStr, delimiter).map(parseValue);
272
+ newArray.push(...values);
273
+ } else {
274
+ stack.push({ obj: newArray, indent: indent + 1 });
275
+ }
276
+ continue;
277
+ }
278
+
279
+ // Standard Key-Value: key: value
280
+ const kvMatch = trimmed.match(/^(.+?):\s*(.*)$/);
281
+ if (kvMatch) {
282
+ const key = kvMatch[1].trim();
283
+ const valStr = kvMatch[2].trim();
284
+
285
+ if (valStr === '') {
286
+ const newObj = {};
287
+ parent[key] = newObj;
288
+ stack.push({ obj: newObj, indent: indent + 1 });
289
+ } else {
290
+ parent[key] = parseValue(valStr);
291
+ }
292
+ continue;
293
+ }
294
+ }
295
+
296
+ return root;
297
+ }
package/src/utils.js ADDED
@@ -0,0 +1,81 @@
1
+ /**
2
+ * Utility functions for TOON conversion
3
+ */
4
+
5
+ /**
6
+ * Encodes XML reserved characters to prevent parsing errors
7
+ * @param {string} rawXmlString - Raw XML string
8
+ * @returns {string} Encoded XML string
9
+ */
10
+ export function encodeXmlReservedChars(rawXmlString) {
11
+ if (typeof rawXmlString !== 'string') {
12
+ return '';
13
+ }
14
+
15
+ let encodedString = rawXmlString;
16
+ const ampersandRegex = /&(?!#|\w+;)/g;
17
+ encodedString = encodedString.replace(ampersandRegex, '&amp;');
18
+ return encodedString;
19
+ }
20
+
21
+ /**
22
+ * Splits a string by delimiter while respecting quoted strings
23
+ * @param {string} text - Text to split
24
+ * @param {string} delimiter - Delimiter character
25
+ * @returns {string[]} Array of split values
26
+ */
27
+ export function splitByDelimiter(text, delimiter) {
28
+ const result = [];
29
+ let current = '';
30
+ let inQuote = false;
31
+ for (let i = 0; i < text.length; i++) {
32
+ const char = text[i];
33
+ if (char === '"' && (i === 0 || text[i - 1] !== '\\')) {
34
+ inQuote = !inQuote;
35
+ }
36
+ if (char === delimiter && !inQuote) {
37
+ result.push(current);
38
+ current = '';
39
+ } else {
40
+ current += char;
41
+ }
42
+ }
43
+ result.push(current);
44
+ return result;
45
+ }
46
+
47
+ /**
48
+ * Parses a value string into its correct JavaScript type
49
+ * @param {string} val - Value string to parse
50
+ * @returns {*} Parsed value (string, number, boolean, or null)
51
+ */
52
+ export function parseValue(val) {
53
+ val = val.trim();
54
+ if (val === 'true') return true;
55
+ if (val === 'false') return false;
56
+ if (val === 'null') return null;
57
+ if (val === '') return ""; // Empty string
58
+
59
+ // Number check: simplified, can be improved
60
+ if (!isNaN(Number(val)) && val !== '' && !val.startsWith('0') && val !== '0') return Number(val);
61
+ if (val === '0') return 0;
62
+ if (val.match(/^-?0\./)) return Number(val); // 0.5, -0.5
63
+
64
+ // String unquoting
65
+ if (val.startsWith('"') && val.endsWith('"')) {
66
+ // Remove surrounding quotes and unescape internal quotes
67
+ return val.slice(1, -1).replace(/\\"/g, '"').replace(/\\\\/g, '\\');
68
+ }
69
+ return val;
70
+ }
71
+
72
+ /**
73
+ * Formats a value according to TOON rules
74
+ * @param {*} v - Value to format
75
+ * @returns {string} Formatted value
76
+ */
77
+ export function formatValue(v) {
78
+ if (v === null) return "null";
79
+ if (typeof v === "string") return `"${v.replace(/"/g, '\\"')}"`;
80
+ return v; // number, boolean
81
+ }
@@ -0,0 +1,218 @@
1
+ /**
2
+ * TOON String Validator
3
+ */
4
+
5
+ import { splitByDelimiter } from './utils.js';
6
+
7
+ /**
8
+ * Validates a TOON string for syntax and structural correctness
9
+ * @param {string} toonString - TOON string to validate
10
+ * @returns {{isValid: boolean, error: string|null}} Validation result
11
+ */
12
+ export function validateToonString(toonString) {
13
+ if (!toonString || typeof toonString !== 'string') {
14
+ return { isValid: false, error: 'Input must be a non-empty string.' };
15
+ }
16
+
17
+ const lines = toonString.split('\n');
18
+ // Stack of contexts: { indent, type: 'root'|'object'|'array', expected?, count? }
19
+ const contextStack = [{ indent: 0, type: 'root', count: 0 }];
20
+ let lineNumber = 0;
21
+
22
+ // Regex Definitions (based on TOON Rules)
23
+ const REGEX = {
24
+ mapKey: /^[^:\[]+:\s*$/,
25
+ arrayKey: /^[^:]+\[(\d+)([\t|])?\](?:\{[^}]+\})?:\s*(.*)$/, // Capture N, delimiter, content
26
+ rootArray: /^\[(\d+)([\t|])?\](?:\{[^}]+\})?:\s*(.*)$/, // Capture N, delimiter, content
27
+ listItem: /^\-.*/,
28
+ listItemEmpty: /^\-\s*$/,
29
+ keyValue: /^[^:\[]+:\s*(?:".*?"|[^"].*)$/,
30
+ tabularRow: /^\s*[^:]+\s*$/,
31
+ };
32
+
33
+ let isInsideTabularArray = false;
34
+
35
+ function opensNewBlock(trimmedLine) {
36
+ return trimmedLine.match(REGEX.mapKey) ||
37
+ trimmedLine.match(REGEX.arrayKey) ||
38
+ trimmedLine.match(REGEX.rootArray) ||
39
+ trimmedLine.match(REGEX.listItemEmpty);
40
+ }
41
+
42
+ function startsTabular(trimmedLine) {
43
+ const isArray = trimmedLine.match(REGEX.arrayKey) || trimmedLine.match(REGEX.rootArray);
44
+ return isArray && trimmedLine.includes('{') && trimmedLine.includes('}');
45
+ }
46
+
47
+ for (const rawLine of lines) {
48
+ lineNumber++;
49
+ const line = rawLine.trimEnd();
50
+
51
+ if (line.trim() === '' || line.trim().startsWith('#')) {
52
+ continue;
53
+ }
54
+
55
+ const trimmedLine = line.trim();
56
+ const currentIndent = rawLine.search(/\S|$/);
57
+ const currentContext = contextStack[contextStack.length - 1];
58
+ const requiredIndent = currentContext.indent;
59
+
60
+ // --- Inline Array Validation ---
61
+ let arrayMatch = trimmedLine.match(REGEX.arrayKey) || trimmedLine.match(REGEX.rootArray);
62
+ if (arrayMatch) {
63
+ const size = parseInt(arrayMatch[1], 10);
64
+ const delimChar = arrayMatch[2];
65
+ const content = arrayMatch[3];
66
+
67
+ if (content && content.trim() !== '') {
68
+ // Inline Array: Validate immediately
69
+ let delimiter = ',';
70
+ if (delimChar === '\\t') delimiter = '\t';
71
+ else if (delimChar === '|') delimiter = '|';
72
+
73
+ const items = splitByDelimiter(content, delimiter);
74
+ const validItems = items.filter(i => i.trim() !== '');
75
+
76
+ if (validItems.length !== size) {
77
+ return { isValid: false, error: `L${lineNumber}: Array size mismatch. Declared ${size}, found ${validItems.length} inline items.` };
78
+ }
79
+ } else {
80
+ // Block Array start.
81
+ if (trimmedLine.match(REGEX.rootArray) && contextStack.length === 1) {
82
+ contextStack[0].type = 'array';
83
+ contextStack[0].expected = size;
84
+ contextStack[0].count = 0;
85
+ }
86
+ }
87
+ }
88
+
89
+ // --- State Management (Tabular) ---
90
+ if (isInsideTabularArray) {
91
+ const rootContext = contextStack[0];
92
+ if (currentIndent >= rootContext.indent || (rootContext.indent === 0 && currentIndent > 0)) {
93
+ if (trimmedLine.includes(':') && !trimmedLine.startsWith('"')) {
94
+ return { isValid: false, error: `L${lineNumber}: Tabular rows cannot contain a colon.` };
95
+ }
96
+ if (rootContext.type === 'array') {
97
+ rootContext.count++;
98
+ }
99
+ if (rootContext.indent === 0) {
100
+ rootContext.indent = currentIndent;
101
+ }
102
+ continue;
103
+ } else {
104
+ isInsideTabularArray = false;
105
+ }
106
+ }
107
+
108
+ // --- Indentation Check ---
109
+ if (currentIndent > requiredIndent) {
110
+ // New Block
111
+ const prevLineTrimmed = lines[lineNumber - 2] ? lines[lineNumber - 2].trim() : '';
112
+ if (!opensNewBlock(prevLineTrimmed)) {
113
+ return { isValid: false, error: `L${lineNumber}: Indentation error.` };
114
+ }
115
+
116
+ let newContext = { indent: currentIndent, type: 'object' };
117
+
118
+ const prevArrayMatch = prevLineTrimmed.match(REGEX.arrayKey) || prevLineTrimmed.match(REGEX.rootArray);
119
+ if (prevArrayMatch) {
120
+ const isRootArrayAlreadySet = prevLineTrimmed.match(REGEX.rootArray) &&
121
+ contextStack.length === 1 &&
122
+ contextStack[0].type === 'array';
123
+
124
+ if (!isRootArrayAlreadySet) {
125
+ const size = parseInt(prevArrayMatch[1], 10);
126
+ newContext = { indent: currentIndent, type: 'array', expected: size, count: 0 };
127
+ contextStack.push(newContext);
128
+ }
129
+ } else {
130
+ contextStack.push(newContext);
131
+ }
132
+
133
+ if (contextStack.length === 1 && contextStack[0].type === 'array' && contextStack[0].indent === 0) {
134
+ contextStack[0].indent = currentIndent;
135
+ }
136
+
137
+ const targetContext = contextStack[contextStack.length - 1];
138
+ if (targetContext.type === 'array') {
139
+ if (trimmedLine.match(REGEX.listItem)) {
140
+ targetContext.count++;
141
+ }
142
+ }
143
+
144
+ } else if (currentIndent < requiredIndent) {
145
+ // Un-indentation
146
+ let foundMatch = false;
147
+
148
+ while (contextStack.length > 1) {
149
+ const popped = contextStack.pop();
150
+
151
+ if (popped.type === 'array') {
152
+ if (popped.count !== popped.expected) {
153
+ return { isValid: false, error: `Array size mismatch. Declared ${popped.expected}, found ${popped.count} items (ending around L${lineNumber}).` };
154
+ }
155
+ }
156
+
157
+ if (currentIndent === contextStack[contextStack.length - 1].indent) {
158
+ foundMatch = true;
159
+ break;
160
+ }
161
+ }
162
+
163
+ if (!foundMatch && currentIndent !== 0) {
164
+ return { isValid: false, error: `L${lineNumber}: Invalid un-indentation.` };
165
+ }
166
+
167
+ const parentContext = contextStack[contextStack.length - 1];
168
+ if (parentContext.type === 'array') {
169
+ if (trimmedLine.match(REGEX.listItem)) {
170
+ parentContext.count++;
171
+ }
172
+ }
173
+
174
+ } else {
175
+ // Same Indent
176
+ if (currentContext.type === 'array') {
177
+ if (trimmedLine.match(REGEX.listItem)) {
178
+ currentContext.count++;
179
+ }
180
+ }
181
+ }
182
+
183
+ // --- Syntax Check ---
184
+ if (trimmedLine.match(REGEX.arrayKey) || trimmedLine.match(REGEX.rootArray)) {
185
+ if (startsTabular(trimmedLine)) isInsideTabularArray = true;
186
+ }
187
+ else if (trimmedLine.match(REGEX.mapKey)) { }
188
+ else if (trimmedLine.match(REGEX.listItem)) { }
189
+ else if (trimmedLine.includes(':')) {
190
+ if (!trimmedLine.match(REGEX.keyValue)) {
191
+ return { isValid: false, error: `L${lineNumber}: Invalid Key-Value assignment.` };
192
+ }
193
+ }
194
+ else if (trimmedLine.startsWith('"') && trimmedLine.endsWith('"')) { }
195
+ else {
196
+ return { isValid: false, error: `L${lineNumber}: Unrecognized TOON syntax.` };
197
+ }
198
+ }
199
+
200
+ // Final check
201
+ while (contextStack.length > 1) {
202
+ const popped = contextStack.pop();
203
+ if (popped.type === 'array') {
204
+ if (popped.count !== popped.expected) {
205
+ return { isValid: false, error: `Array size mismatch. Declared ${popped.expected}, found ${popped.count} items.` };
206
+ }
207
+ }
208
+ }
209
+
210
+ // Check root array if applicable
211
+ if (contextStack[0].type === 'array') {
212
+ if (contextStack[0].count !== contextStack[0].expected) {
213
+ return { isValid: false, error: `Root Array size mismatch. Declared ${contextStack[0].expected}, found ${contextStack[0].count} items.` };
214
+ }
215
+ }
216
+
217
+ return { isValid: true, error: null };
218
+ }