@zxsylph/dbml-formatter 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ ---
2
+ description: How to publish the package to NPM
3
+ ---
4
+
5
+ # Publishing to NPM
6
+
7
+ Follow these steps to publish your `dbml_formatter` tool to the NPM registry.
8
+
9
+ ## 1. Prepare the Package
10
+
11
+ 1. **Check Package Name**: The name `dbml-extension` might be taken. You should check [npmjs.com](https://www.npmjs.com/) or change `"name"` in `package.json` to something unique (e.g., `@your-username/dbml-formatter` or `dbml-fmt-cli`).
12
+ 2. **Update Version**: Ensure `"version"` in `package.json` is correct (e.g., `1.0.0` for initial release).
13
+ 3. **Clean Dependencies**: Since we are using `ts-node` at runtime, ensuring `typescript` and `ts-node` are in `"dependencies"` (which they are) is correct for this setup.
14
+
15
+ ## 2. Authenticate
16
+
17
+ Run the following command to log in to your NPM account:
18
+
19
+ ```bash
20
+ npm login
21
+ ```
22
+
23
+ ## 3. Publish
24
+
25
+ Run the publish command:
26
+
27
+ ```bash
28
+ npm publish
29
+ ```
30
+
31
+ If you are using a scoped name (like `@user/pkg`), use:
32
+
33
+ ```bash
34
+ npm publish --access public
35
+ ```
36
+
37
+ ## 4. Usage After Publishing
38
+
39
+ Once published, anyone can run your tool using `npx`:
40
+
41
+ ```bash
42
+ npx [your-package-name] <file.dbml>
43
+ ```
44
+
45
+ If you named it `dbml-extension`, they would run `npx dbml-extension <file>`.
46
+ If you want the command to be `npx dbml_formatter`, you should name the package `dbml_formatter` (if available), OR users can run:
47
+
48
+ ```bash
49
+ npx -p [your-package-name] dbml_formatter <file>
50
+ ```
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ require('ts-node').register();
3
+ require('../src/cli.ts');
package/package.json ADDED
@@ -0,0 +1,21 @@
1
+ {
2
+ "name": "@zxsylph/dbml-formatter",
3
+ "version": "1.0.1",
4
+ "description": "",
5
+ "main": "index.js",
6
+ "scripts": {
7
+ "test": "echo \"Error: no test specified\" && exit 1"
8
+ },
9
+ "keywords": [],
10
+ "author": "",
11
+ "license": "ISC",
12
+ "bin": {
13
+ "dbml_formatter": "./bin/dbml_formatter.js"
14
+ },
15
+ "dependencies": {
16
+ "@dbml/core": "^5.4.1",
17
+ "@types/node": "^25.0.8",
18
+ "ts-node": "^10.9.2",
19
+ "typescript": "^5.9.3"
20
+ }
21
+ }
package/sample.dbml ADDED
@@ -0,0 +1,21 @@
1
+ // Table definition for users
2
+ Table users {
3
+ id integer [primary key, increment] // Auto-incrementing ID
4
+ username varchar
5
+ email varchar [unique]
6
+ created_at timestamp
7
+ }
8
+
9
+ /*
10
+ Multi-line comment
11
+ about posts
12
+ */
13
+ Table posts{
14
+ id integer [primary key]
15
+ user_id integer
16
+ title varchar
17
+ body text [note: 'Content of the post']
18
+ }
19
+
20
+ // Reference link
21
+ Ref: posts.user_id > users.id
@@ -0,0 +1,3 @@
1
+ Table messy{id int[pk]
2
+ name varchar}
3
+ Ref : messy.id>other.id
@@ -0,0 +1,4 @@
1
+ Table messy{id int[pk]
2
+ name varchar
3
+ a_id int [Ref: > other.id, note: 'other.id']}
4
+ Ref : messy.id>other.id
@@ -0,0 +1,5 @@
1
+ Table messy{id int[pk]
2
+ name varchar
3
+ a_id int [Ref: > other.id, note: 'other.id']
4
+ note: "table note"}
5
+ Ref : messy.id>other.id
@@ -0,0 +1,5 @@
1
+ table messy{id int[pk]
2
+ name varchar
3
+ a_id int [ref: > other.id, note: 'other.id']
4
+ note: "table note"}
5
+ Ref : messy.id>other.id
@@ -0,0 +1,6 @@
1
+ table messy{id int[pk]
2
+ name varchar
3
+ a_id int [ref: > other.id, note: 'other.id']
4
+ table_id int [note: 'link to table table']
5
+ note: "table note"}
6
+ Ref : messy.id>other.id
package/src/cli.ts ADDED
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env node
2
+ import * as fs from 'fs';
3
+ import * as path from 'path';
4
+ import { format } from './formatter/formatter';
5
+
6
+ const args = process.argv.slice(2);
7
+
8
+ if (args.length === 0) {
9
+ console.error('Usage: dbml-fmt <file>');
10
+ process.exit(1);
11
+ }
12
+
13
+ const filePath = args[0];
14
+ const absPath = path.resolve(process.cwd(), filePath);
15
+
16
+ if (!fs.existsSync(absPath)) {
17
+ console.error(`File not found: ${absPath}`);
18
+ process.exit(1);
19
+ }
20
+
21
+ try {
22
+ const content = fs.readFileSync(absPath, 'utf-8');
23
+ const formatted = format(content);
24
+ console.log(formatted);
25
+ } catch (error) {
26
+ console.error('Error formatting file:', error);
27
+ process.exit(1);
28
+ }
@@ -0,0 +1,21 @@
1
+ import { importer, exporter } from '@dbml/core';
2
+ import * as fs from 'fs';
3
+ import * as path from 'path';
4
+
5
+ const samplePath = path.join(__dirname, '../sample.dbml');
6
+ const content = fs.readFileSync(samplePath, 'utf-8');
7
+
8
+ console.log('Original Content:');
9
+ console.log(content);
10
+
11
+ try {
12
+ const imported = importer.import(content, 'dbml');
13
+ console.log('Imported (Intermediate Model):');
14
+ // console.log(imported); // It might be large
15
+
16
+ const exported = exporter.export(imported, 'dbml');
17
+ console.log('Exported (Formatted Content):');
18
+ console.log(exported);
19
+ } catch (err) {
20
+ console.error('Error:', err);
21
+ }
@@ -0,0 +1,558 @@
1
+ import { Token, TokenType, tokenize } from './tokenizer';
2
+
3
+ export interface FormatterOptions {
4
+ indentSize?: number;
5
+ useTabs?: boolean;
6
+ }
7
+
8
+ export function format(input: string, options: FormatterOptions = {}): string {
9
+ const rawTokens = tokenize(input);
10
+ const indentSize = options.indentSize || 2;
11
+ const indentChar = options.useTabs ? '\t' : ' ';
12
+ const oneIndent = indentChar.repeat(indentSize);
13
+
14
+ // Initial Pass: Identify Block Types and context
15
+ // Actually we can process linearly but when we hit `Table {`, we switch to "buffer mode".
16
+
17
+ let output = '';
18
+ let indentLevel = 0;
19
+
20
+ // Helper to get current indentation string
21
+ const getIndent = () => oneIndent.repeat(Math.max(0, indentLevel));
22
+
23
+ let i = 0;
24
+ while (i < rawTokens.length) {
25
+ let token = rawTokens[i];
26
+
27
+ // --- Lookahead to detect start of Table block ---
28
+ if (token.type === TokenType.Symbol && token.value === '{') {
29
+ // Identify if this is a Table block
30
+ let isTable = false;
31
+ let backIndex = i - 1;
32
+ while(backIndex >= 0 && (rawTokens[backIndex].type === TokenType.Whitespace || rawTokens[backIndex].type === TokenType.Comment)) {
33
+ backIndex--;
34
+ }
35
+ // Determine block keyword (heuristic search back)
36
+ let searchIndex = backIndex;
37
+ while (searchIndex >= 0) {
38
+ const t = rawTokens[searchIndex];
39
+ if (t.type === TokenType.Symbol && (t.value === '}' || t.value === '{')) break;
40
+ if (t.type === TokenType.Word) {
41
+ // Check case-insensitive
42
+ if (t.value.toLowerCase() === 'table') {
43
+ isTable = true;
44
+ break;
45
+ }
46
+ }
47
+ searchIndex--;
48
+ }
49
+
50
+ if (isTable) {
51
+ // BUFFER MODE
52
+ // 1. Flush/print the '{'
53
+ // Should ensure space before '{'
54
+ if (output.length > 0 && !output.endsWith(' ') && !output.endsWith('\n')) {
55
+ output += ' ';
56
+ }
57
+ output += '{';
58
+ output += '\n';
59
+ indentLevel++;
60
+ i++;
61
+
62
+ // 2. Collect tokens inside `{ ... }`
63
+ const buffer: Token[] = [];
64
+ let depth = 1;
65
+ while (i < rawTokens.length) {
66
+ const t = rawTokens[i];
67
+ if (t.type === TokenType.Symbol && t.value === '{') depth++;
68
+ if (t.type === TokenType.Symbol && t.value === '}') {
69
+ depth--;
70
+ if (depth === 0) break; // Found end of table
71
+ }
72
+ buffer.push(t);
73
+ i++;
74
+ }
75
+ // Now `rawTokens[i]` is the closing `}` (or we ran out)
76
+
77
+ // 3. Process the buffer
78
+ // a. Separate into logical "lines" (statements)
79
+ // b. Identify Table Note
80
+ // c. Identify Field Lines
81
+
82
+ let tableNoteTokens: Token[] = [];
83
+ const otherLinesGroups: Token[][] = [];
84
+
85
+ // Split buffer into "line groups".
86
+ // A line group is a set of tokens ending with newline(s).
87
+ let currentGroup: Token[] = [];
88
+
89
+ for (let k = 0; k < buffer.length; k++) {
90
+ const t = buffer[k];
91
+
92
+ // We blindly optimize: split on newline unless inside `nested` brackets.
93
+ currentGroup.push(t);
94
+
95
+ if (t.type === TokenType.Whitespace && t.value.includes('\n')) {
96
+ // Check depth
97
+ let brDepth = 0;
98
+ for (const gt of currentGroup) {
99
+ if (gt.type === TokenType.Symbol && gt.value === '[') brDepth++;
100
+ if (gt.type === TokenType.Symbol && gt.value === ']') brDepth--;
101
+ if (gt.type === TokenType.Symbol && gt.value === '{') brDepth++;
102
+ if (gt.type === TokenType.Symbol && gt.value === '}') brDepth--;
103
+ }
104
+
105
+ if (brDepth === 0) {
106
+ // End of logical line
107
+ // Check if it is a Note
108
+ const meaningful = currentGroup.filter(x => x.type !== TokenType.Whitespace && x.type !== TokenType.Comment);
109
+
110
+ let isNote = false;
111
+ if (meaningful.length >= 3) {
112
+ if (meaningful[0].type === TokenType.Word && meaningful[0].value.toLowerCase() === 'note' &&
113
+ meaningful[1].type === TokenType.Symbol && meaningful[1].value === ':') {
114
+ isNote = true;
115
+ }
116
+ }
117
+
118
+ if (isNote) {
119
+ // This is the table note
120
+ tableNoteTokens = currentGroup;
121
+ } else {
122
+ otherLinesGroups.push(currentGroup);
123
+ }
124
+ currentGroup = [];
125
+ }
126
+ }
127
+ }
128
+ if (currentGroup.length > 0) {
129
+ // Check remaining
130
+ const meaningful = currentGroup.filter(x => x.type !== TokenType.Whitespace && x.type !== TokenType.Comment);
131
+
132
+ let isNote = false;
133
+ if (meaningful.length >= 3) {
134
+ if (meaningful[0].type === TokenType.Word && meaningful[0].value.toLowerCase() === 'note' &&
135
+ meaningful[1].type === TokenType.Symbol && meaningful[1].value === ':') {
136
+ isNote = true;
137
+ }
138
+ }
139
+
140
+ if (isNote) {
141
+ tableNoteTokens = currentGroup;
142
+ } else {
143
+ otherLinesGroups.push(currentGroup);
144
+ }
145
+ }
146
+
147
+ // 4. Print Table Note first (if exists)
148
+ if (tableNoteTokens.length > 0) {
149
+ // Ensure tokens end with newline if they don't?
150
+ // Usually they contain the newline token.
151
+ // But if it was the last line of buffer (no newline), we must add one.
152
+
153
+ output += processTokens(tableNoteTokens, indentLevel, indentChar, indentSize, false);
154
+
155
+ // Rule: after table note add one empty line
156
+ // So we want `output` to end with `\n\n`.
157
+ if (output.endsWith('\n\n')) {
158
+ // already good
159
+ } else if (output.endsWith('\n')) {
160
+ output += '\n';
161
+ } else {
162
+ output += '\n\n';
163
+ }
164
+ }
165
+
166
+ // 5. Print other lines (Process Fields)
167
+ for (let lgIdx = 0; lgIdx < otherLinesGroups.length; lgIdx++) {
168
+ const lineTokens = otherLinesGroups[lgIdx];
169
+ // Check for Field Settings `[...]` reordering
170
+ // Find `[` ... `]`
171
+ let openBracketIdx = -1;
172
+ let closeBracketIdx = -1;
173
+
174
+ for(let idx=0; idx<lineTokens.length; idx++) {
175
+ if (lineTokens[idx].type === TokenType.Symbol && lineTokens[idx].value === '[') openBracketIdx = idx;
176
+ if (lineTokens[idx].type === TokenType.Symbol && lineTokens[idx].value === ']') closeBracketIdx = idx;
177
+ }
178
+
179
+ if (openBracketIdx !== -1 && closeBracketIdx !== -1 && closeBracketIdx > openBracketIdx) {
180
+ // Extract tokens inside
181
+ const inside = lineTokens.slice(openBracketIdx + 1, closeBracketIdx);
182
+ // Check if multiline
183
+ const settings: Token[][] = [];
184
+ let currentSetting: Token[] = [];
185
+ for(const t of inside) {
186
+ if (t.type === TokenType.Symbol && t.value === ',') {
187
+ settings.push(currentSetting);
188
+ currentSetting = [];
189
+ } else {
190
+ currentSetting.push(t);
191
+ }
192
+ }
193
+ if (currentSetting.length > 0) settings.push(currentSetting);
194
+
195
+ // Identify "note" setting
196
+ let noteIndex = -1;
197
+ for(let s=0; s<settings.length; s++) {
198
+ const sMeaningful = settings[s].filter(x => x.type !== TokenType.Whitespace && x.type !== TokenType.Comment);
199
+ if (sMeaningful.length > 0 && sMeaningful[0].type === TokenType.Word && sMeaningful[0].value.toLowerCase() === 'note') {
200
+ noteIndex = s;
201
+ break;
202
+ }
203
+ }
204
+
205
+ if (noteIndex > 0) { // If found and NOT already first
206
+ // Move to front
207
+ const noteSetting = settings.splice(noteIndex, 1)[0];
208
+ settings.unshift(noteSetting);
209
+
210
+ // Reconstruct lineTokens
211
+ const newInside: Token[] = [];
212
+ for(let s=0; s<settings.length; s++) {
213
+ newInside.push(...settings[s]);
214
+ if (s < settings.length - 1) {
215
+ newInside.push({ type: TokenType.Symbol, value: ',', line: 0, column: 0 });
216
+ }
217
+ }
218
+
219
+ // Replace in lineTokens
220
+ lineTokens.splice(openBracketIdx + 1, closeBracketIdx - openBracketIdx - 1, ...newInside);
221
+ }
222
+ }
223
+
224
+ // 6. Apply "Quote Data Types" logic
225
+ let wordCount = 0;
226
+ for (const t of lineTokens) {
227
+ // Only count words before `[`?
228
+ if (t.type === TokenType.Symbol && t.value === '[') break;
229
+ if (t.type === TokenType.Word) {
230
+ wordCount++;
231
+ if (wordCount === 2) {
232
+ // Quote this token!
233
+ t.value = `"${t.value}"`;
234
+ // Note: we are modifying the token object directly in the buffer.
235
+ }
236
+ }
237
+ if (t.type === TokenType.String && wordCount < 2) {
238
+ // Strings count as words/tokens for position?
239
+ // Example `name "varchar"` -> "varchar" IS the string.
240
+ wordCount++;
241
+ }
242
+ }
243
+
244
+ // Ensure previous line enforced newline if missing?
245
+ // processTokens appends tokens. If tokens lack newline, it might merge?
246
+ // `lineTokens` usually comes from `currentGroup` which ended with newline token (except last one).
247
+ // If last group lacks newline, and we print next group...
248
+
249
+ // Check if output buffer needs separation?
250
+ // processTokens logic respects local newlines inside `lineTokens`.
251
+ // But if `lineTokens` (last group) didn't have newline, we append.
252
+
253
+ output += processTokens(lineTokens, indentLevel, indentChar, indentSize, true);
254
+
255
+ // Heuristic: If we just printed a line group, and it didn't generate a newline at end,
256
+ // AND there is another group coming, insert newline?
257
+ // But `processTokens` output might end with proper indent? No.
258
+
259
+ // Let's check `output`.
260
+ if (lgIdx < otherLinesGroups.length - 1) {
261
+ if (!output.endsWith('\n')) {
262
+ // This implies the group didn't end with newline token.
263
+ // Force it.
264
+ output += '\n';
265
+ }
266
+ }
267
+ }
268
+
269
+ // End block
270
+ indentLevel--;
271
+ if (!output.endsWith('\n')) output += '\n';
272
+ output += getIndent() + '}';
273
+
274
+ // Rule: after table close } add on empty line
275
+ output += '\n'; // This ensures at least one newline after `}`
276
+ // To ensure "one empty line", we need two newlines total?
277
+ // `output` ends with `}\n`.
278
+ // If we add `\n`, it becomes `}\n\n`.
279
+ output += '\n';
280
+
281
+ if (i < rawTokens.length && rawTokens[i].type === TokenType.Symbol && rawTokens[i].value === '}') {
282
+ i++;
283
+ }
284
+
285
+ continue; // Continue outer loop
286
+ }
287
+ }
288
+
289
+ // Linear Loop Fallback Logic needs to use `processTokens` properly or duplicate spacing logic
290
+ // Actually, the issue is that in the fallback loop:
291
+ // `output += processTokens([token], ...)`
292
+ // `processTokens` checks `localOutput` (which is empty for that call) to decide spacing.
293
+ // It doesn't know about `output`'s tail.
294
+
295
+ // Fix: We must pass `output` context to `processTokens`, or handle spacing BEFORE calling `processTokens`.
296
+
297
+ // Easier Fix: Centralize "appendToken" logic.
298
+
299
+ // Let's rewrite the main loop to handle spacing explicitly before appending.
300
+
301
+ // But `processTokens` handles a list.
302
+
303
+ // Let's modify `processTokens` to accept `previousChar` or `needsSpaceCheck`?
304
+
305
+ // Actually, `processTokens` is used for buffered content (Table block).
306
+ // The main loop handles non-buffered content.
307
+
308
+ // The previous implementation was:
309
+ /*
310
+ if (output.length > 0) {
311
+ let needsSpace = true;
312
+ // ... checks ...
313
+ if (needsSpace) output += ' ';
314
+ }
315
+ output += token.value;
316
+ */
317
+
318
+ // In the new implementation:
319
+ /*
320
+ const singleTokenList = [token];
321
+ output += processTokens(singleTokenList, ...);
322
+ */
323
+
324
+ // `processTokens` internal logic:
325
+ // `if (localOutput.length > 0) { check space }`
326
+ // Since `singleTokenList` has 1 item, `localOutput` is empty initially, so NO SPACE is added.
327
+
328
+ // We need to restore the spacing logic in the main loop for the fallback case.
329
+ // AND ensuring `processTokens` handles its internal list correctly.
330
+
331
+ // Let's fix the Main Loop Fallback first.
332
+
333
+ // --- Fallback: Standard Linear Processing for non-Table content ---
334
+
335
+ // Handle whitespace first
336
+ if (token.type === TokenType.Whitespace) {
337
+ const newlines = (token.value.match(/\n/g) || []).length;
338
+ if (newlines > 0) {
339
+ const toPrint = Math.min(newlines, 2);
340
+ if (!output.endsWith('\n')) {
341
+ output += '\n'.repeat(toPrint);
342
+ } else {
343
+ if (toPrint > 1 && !output.endsWith('\n\n')) {
344
+ output += '\n';
345
+ }
346
+ }
347
+ }
348
+ i++;
349
+ continue;
350
+ }
351
+
352
+ // Apply spacing based on GLOBAL `output`
353
+ if (output.endsWith('\n')) {
354
+ if (token.value !== '}') {
355
+ output += getIndent();
356
+ }
357
+ } else if (output.length > 0) {
358
+ let needsSpace = true;
359
+ const lastChar = output[output.length - 1];
360
+ if (lastChar === ' ' || lastChar === '\n' || lastChar === '(' || lastChar === '[' || lastChar === '.') {
361
+ needsSpace = false;
362
+ }
363
+ if (token.type === TokenType.Symbol) {
364
+ if (token.value === ',' || token.value === ']' || token.value === ')' || token.value === '.' || token.value === ':') {
365
+ needsSpace = false;
366
+ }
367
+ // Ref: > or < logic?
368
+ }
369
+ if (needsSpace) output += ' ';
370
+ }
371
+
372
+ // Output with Keyword Normalization
373
+ switch (token.type) {
374
+ case TokenType.Word:
375
+ // Global Keyword PascalCase
376
+ // table -> Table
377
+ if (token.value.toLowerCase() === 'table') {
378
+ token.value = 'Table';
379
+ }
380
+
381
+ // ref -> Ref
382
+ if (token.value.toLowerCase() === 'ref') {
383
+ token.value = 'Ref';
384
+ }
385
+
386
+ // note -> Note (if followed by colon?)
387
+ if (token.value.toLowerCase() === 'note') {
388
+ // Check next token for `:`
389
+ let nextIdx = i + 1;
390
+ while(nextIdx < rawTokens.length && (rawTokens[nextIdx].type === TokenType.Whitespace || rawTokens[nextIdx].type === TokenType.Comment)) {
391
+ nextIdx++;
392
+ }
393
+ if (nextIdx < rawTokens.length && rawTokens[nextIdx].type === TokenType.Symbol && rawTokens[nextIdx].value === ':') {
394
+ token.value = 'Note';
395
+ }
396
+ }
397
+
398
+ output += token.value;
399
+ break;
400
+
401
+ case TokenType.String:
402
+ let val = token.value;
403
+ if (val.startsWith("'") && !val.startsWith("'''")) {
404
+ const content = val.slice(1, -1);
405
+ const escaped = content.replace(/"/g, '\\"');
406
+ val = `"${escaped}"`;
407
+ }
408
+ output += val;
409
+ break;
410
+ default:
411
+ output += token.value;
412
+ break;
413
+ }
414
+
415
+ i++;
416
+ }
417
+
418
+ return output.trim() + '\n';
419
+ }
420
+
421
+ // ... helper functions ...
422
+
423
+ function processTokens(
424
+ tokens: Token[],
425
+ baseIndentLevel: number,
426
+ indentChar: string,
427
+ indentSize: number,
428
+ isInsideTable: boolean
429
+ ): string {
430
+
431
+ let localOutput = '';
432
+ let currentIndentLevel = baseIndentLevel;
433
+ const oneIndent = indentChar.repeat(indentSize);
434
+ const getLocalIndent = () => oneIndent.repeat(Math.max(0, currentIndentLevel));
435
+
436
+ // ... multiline stack and checkArrayMultiline ...
437
+ const checkArrayMultiline = (startIdx: number): boolean => {
438
+ let depth = 1;
439
+ let hasComma = false;
440
+ for (let k = startIdx + 1; k < tokens.length; k++) {
441
+ if (tokens[k].type === TokenType.Symbol && tokens[k].value === '[') depth++;
442
+ if (tokens[k].type === TokenType.Symbol && tokens[k].value === ']') depth--;
443
+ if (depth === 1 && tokens[k].type === TokenType.Symbol && tokens[k].value === ',') hasComma = true;
444
+ if (depth === 0) return hasComma;
445
+ }
446
+ return false;
447
+ };
448
+
449
+ const multilineArrayStack: boolean[] = [];
450
+
451
+ for (let i = 0; i < tokens.length; i++) {
452
+ const token = tokens[i];
453
+
454
+ // Whitespace handling
455
+ if (token.type === TokenType.Whitespace) {
456
+ const newlines = (token.value.match(/\n/g) || []).length;
457
+ if (newlines > 0) {
458
+ const toPrint = Math.min(newlines, 2);
459
+ if (!localOutput.endsWith('\n')) {
460
+ localOutput += '\n'.repeat(toPrint);
461
+ } else {
462
+ if (toPrint > 1 && !localOutput.endsWith('\n\n')) {
463
+ localOutput += '\n';
464
+ }
465
+ }
466
+ }
467
+ continue;
468
+ }
469
+
470
+ // Corrected Spacing/Indent Logic for processTokens:
471
+ if (localOutput.length === 0 || localOutput.endsWith('\n')) {
472
+ if (token.value !== '}') {
473
+ localOutput += getLocalIndent();
474
+ }
475
+ } else {
476
+ // Not start of line
477
+ let needsSpace = true;
478
+ const lastChar = localOutput[localOutput.length - 1];
479
+ if (lastChar === ' ' || lastChar === '\n' || lastChar === '(' || lastChar === '[' || lastChar === '.') {
480
+ needsSpace = false;
481
+ }
482
+ if (token.type === TokenType.Symbol) {
483
+ if (token.value === ',' || token.value === ']' || token.value === ')' || token.value === '.' || token.value === ':') {
484
+ needsSpace = false;
485
+ }
486
+ }
487
+ if (needsSpace) localOutput += ' ';
488
+ }
489
+
490
+ switch (token.type) {
491
+ case TokenType.Symbol:
492
+ if (token.value === '{') {
493
+ localOutput += '{\n';
494
+ currentIndentLevel++;
495
+ } else if (token.value === '}') {
496
+ if (!localOutput.endsWith('\n')) localOutput += '\n';
497
+ currentIndentLevel--;
498
+ localOutput += getLocalIndent() + '}';
499
+ } else if (token.value === '[') {
500
+ const isMultiline = checkArrayMultiline(i);
501
+ multilineArrayStack.push(isMultiline);
502
+ localOutput += '[';
503
+ if (isMultiline) {
504
+ localOutput += '\n';
505
+ currentIndentLevel++;
506
+ }
507
+ } else if (token.value === ']') {
508
+ const isMultiline = multilineArrayStack.pop();
509
+ if (isMultiline) {
510
+ if (!localOutput.endsWith('\n')) localOutput += '\n';
511
+ currentIndentLevel--;
512
+ if (localOutput.endsWith('\n')) localOutput += getLocalIndent();
513
+ }
514
+ localOutput += ']';
515
+ } else if (token.value === ',') {
516
+ localOutput += ',';
517
+ const currentMultiline = multilineArrayStack.length > 0 && multilineArrayStack[multilineArrayStack.length - 1];
518
+ if (currentMultiline) localOutput += '\n';
519
+ } else {
520
+ localOutput += token.value;
521
+ }
522
+ break;
523
+
524
+ case TokenType.Word:
525
+ // Handle keyword PascalCase in buffer
526
+ if (token.value.toLowerCase() === 'table') token.value = 'Table';
527
+ if (token.value.toLowerCase() === 'ref') token.value = 'Ref';
528
+ if (token.value.toLowerCase() === 'note') {
529
+ // Peek locally inside tokens list
530
+ let nextIdx = i + 1;
531
+ while(nextIdx < tokens.length && (tokens[nextIdx].type === TokenType.Whitespace || tokens[nextIdx].type === TokenType.Comment)) {
532
+ nextIdx++;
533
+ }
534
+ if (nextIdx < tokens.length && tokens[nextIdx].type === TokenType.Symbol && tokens[nextIdx].value === ':') {
535
+ token.value = 'Note';
536
+ }
537
+ }
538
+ localOutput += token.value;
539
+ break;
540
+
541
+ case TokenType.String:
542
+ let val = token.value;
543
+ if (val.startsWith("'") && !val.startsWith("'''")) {
544
+ const content = val.slice(1, -1);
545
+ const escaped = content.replace(/"/g, '\\"');
546
+ val = `"${escaped}"`;
547
+ }
548
+ localOutput += val;
549
+ break;
550
+
551
+ default:
552
+ localOutput += token.value;
553
+ break;
554
+ }
555
+ }
556
+
557
+ return localOutput;
558
+ }
@@ -0,0 +1,222 @@
1
+ export enum TokenType {
2
+ Whitespace,
3
+ Comment,
4
+ String,
5
+ Symbol,
6
+ Word,
7
+ Unknown
8
+ }
9
+
10
+ export interface Token {
11
+ type: TokenType;
12
+ value: string;
13
+ line: number;
14
+ column: number;
15
+ }
16
+
17
+ export function tokenize(input: string): Token[] {
18
+ const tokens: Token[] = [];
19
+ let current = 0;
20
+ let line = 1;
21
+ let column = 1;
22
+
23
+ while (current < input.length) {
24
+ let char = input[current];
25
+
26
+ // Handle Whitespace
27
+ if (/\s/.test(char)) {
28
+ let value = '';
29
+ const startLine = line;
30
+ const startColumn = column;
31
+
32
+ while (current < input.length && /\s/.test(input[current])) {
33
+ if (input[current] === '\n') {
34
+ line++;
35
+ column = 1;
36
+ } else {
37
+ column++;
38
+ }
39
+ value += input[current];
40
+ current++;
41
+ }
42
+ tokens.push({ type: TokenType.Whitespace, value, line: startLine, column: startColumn });
43
+ continue;
44
+ }
45
+
46
+ // Handle Comments
47
+ if (char === '/' && input[current + 1] === '/') {
48
+ let value = '';
49
+ const startLine = line;
50
+ const startColumn = column;
51
+
52
+ while (current < input.length && input[current] !== '\n') {
53
+ value += input[current];
54
+ current++;
55
+ column++;
56
+ }
57
+ tokens.push({ type: TokenType.Comment, value, line: startLine, column: startColumn });
58
+ continue;
59
+ }
60
+
61
+ if (char === '/' && input[current + 1] === '*') {
62
+ let value = '';
63
+ const startLine = line;
64
+ const startColumn = column;
65
+
66
+ value += '/*';
67
+ current += 2;
68
+ column += 2;
69
+
70
+ while (current < input.length) {
71
+ if (input[current] === '*' && input[current + 1] === '/') {
72
+ value += '*/';
73
+ current += 2;
74
+ column += 2;
75
+ break;
76
+ }
77
+ if (input[current] === '\n') {
78
+ line++;
79
+ column = 1;
80
+ } else {
81
+ column++;
82
+ }
83
+ value += input[current];
84
+ current++;
85
+ }
86
+ tokens.push({ type: TokenType.Comment, value, line: startLine, column: startColumn });
87
+ continue;
88
+ }
89
+
90
+ // Handle Strings
91
+ // Triple quote '''
92
+ if (char === '\'' && input[current + 1] === '\'' && input[current + 2] === '\'') {
93
+ let value = "'''";
94
+ const startLine = line;
95
+ const startColumn = column;
96
+ current += 3;
97
+ column += 3;
98
+
99
+ while (current < input.length) {
100
+ if (input[current] === '\'' && input[current + 1] === '\'' && input[current + 2] === '\'') {
101
+ value += "'''";
102
+ current += 3;
103
+ column += 3;
104
+ break;
105
+ }
106
+ if (input[current] === '\n') {
107
+ line++;
108
+ column = 1;
109
+ } else {
110
+ column++;
111
+ }
112
+ value += input[current];
113
+ current++;
114
+ }
115
+ tokens.push({ type: TokenType.String, value, line: startLine, column: startColumn });
116
+ continue;
117
+ }
118
+
119
+ // Single quote '
120
+ if (char === '\'') {
121
+ let value = "'";
122
+ const startLine = line;
123
+ const startColumn = column;
124
+ current++;
125
+ column++;
126
+
127
+ while (current < input.length) {
128
+ // Escape sequence \' handling could be added here if needed, but for now simple check
129
+ if (input[current] === '\\' && input[current + 1] === '\'') {
130
+ value += "\\'";
131
+ current += 2;
132
+ column += 2;
133
+ continue;
134
+ }
135
+
136
+ if (input[current] === '\'') {
137
+ value += "'";
138
+ current++;
139
+ column++;
140
+ break;
141
+ }
142
+ if (input[current] === '\n') {
143
+ line++;
144
+ column = 1;
145
+ } else {
146
+ column++;
147
+ }
148
+ value += input[current];
149
+ current++;
150
+ }
151
+ tokens.push({ type: TokenType.String, value, line: startLine, column: startColumn });
152
+ continue;
153
+ }
154
+
155
+ // Double quote "
156
+ if (char === '"') {
157
+ let value = '"';
158
+ const startLine = line;
159
+ const startColumn = column;
160
+ current++;
161
+ column++;
162
+
163
+ while (current < input.length) {
164
+ if (input[current] === '\\' && input[current + 1] === '"') {
165
+ value += '\\"';
166
+ current += 2;
167
+ column += 2;
168
+ continue;
169
+ }
170
+
171
+ if (input[current] === '"') {
172
+ value += '"';
173
+ current++;
174
+ column++;
175
+ break;
176
+ }
177
+ if (input[current] === '\n') {
178
+ line++;
179
+ column = 1;
180
+ } else {
181
+ column++;
182
+ }
183
+ value += input[current];
184
+ current++;
185
+ }
186
+ tokens.push({ type: TokenType.String, value, line: startLine, column: startColumn });
187
+ continue;
188
+ }
189
+
190
+
191
+ // Handle Symbols
192
+ if (/[\{\}\[\]\(\),:>.\-<\\]/.test(char)) {
193
+ tokens.push({ type: TokenType.Symbol, value: char, line, column });
194
+ current++;
195
+ column++;
196
+ continue;
197
+ }
198
+
199
+ // Handle Words (Identifiers, Keywords, Numbers, etc.)
200
+ // We accept mostly anything that isn't whitespace or special symbols
201
+ if (/[a-zA-Z0-9_]/.test(char)) {
202
+ let value = '';
203
+ const startLine = line;
204
+ const startColumn = column;
205
+
206
+ while (current < input.length && /[a-zA-Z0-9_]/.test(input[current])) {
207
+ value += input[current];
208
+ current++;
209
+ column++;
210
+ }
211
+ tokens.push({ type: TokenType.Word, value, line: startLine, column: startColumn });
212
+ continue;
213
+ }
214
+
215
+ // Fallback or Unknown
216
+ tokens.push({ type: TokenType.Unknown, value: char, line, column });
217
+ current++;
218
+ column++;
219
+ }
220
+
221
+ return tokens;
222
+ }
@@ -0,0 +1,13 @@
1
+ import { format } from './formatter/formatter';
2
+ import * as fs from 'fs';
3
+ import * as path from 'path';
4
+
5
+ const samplePath = path.join(__dirname, '../sample.dbml');
6
+ const content = fs.readFileSync(samplePath, 'utf-8');
7
+
8
+ const formatted = format(content);
9
+
10
+ console.log('--- Original ---');
11
+ console.log(content);
12
+ console.log('--- Formatted ---');
13
+ console.log(formatted);
@@ -0,0 +1,14 @@
1
+ import { tokenize, TokenType } from './formatter/tokenizer';
2
+ import * as fs from 'fs';
3
+ import * as path from 'path';
4
+
5
+ const samplePath = path.join(__dirname, '../sample.dbml');
6
+ const content = fs.readFileSync(samplePath, 'utf-8');
7
+
8
+ const tokens = tokenize(content);
9
+
10
+ console.log('Tokens:');
11
+ tokens.forEach(t => {
12
+ let typeName = TokenType[t.type];
13
+ console.log(`[${typeName}] ${JSON.stringify(t.value)}`);
14
+ });
package/tsconfig.json ADDED
@@ -0,0 +1,10 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "es2018",
4
+ "module": "commonjs",
5
+ "outDir": "./dist",
6
+ "rootDir": "./src",
7
+ "strict": true,
8
+ "esModuleInterop": true
9
+ }
10
+ }