@zxsylph/dbml-formatter 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/workflows/publish_to_npm.md +50 -0
- package/bin/dbml_formatter.js +3 -0
- package/package.json +21 -0
- package/sample.dbml +21 -0
- package/sample_messy.dbml +3 -0
- package/sample_messy2.dbml +4 -0
- package/sample_messy3.dbml +5 -0
- package/sample_messy4.dbml +5 -0
- package/sample_messy5.dbml +6 -0
- package/src/cli.ts +28 -0
- package/src/experiment.ts +21 -0
- package/src/formatter/formatter.ts +558 -0
- package/src/formatter/tokenizer.ts +222 -0
- package/src/test_formatter.ts +13 -0
- package/src/test_tokenizer.ts +14 -0
- package/tsconfig.json +10 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: How to publish the package to NPM
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Publishing to NPM
|
|
6
|
+
|
|
7
|
+
Follow these steps to publish your `dbml_formatter` tool to the NPM registry.
|
|
8
|
+
|
|
9
|
+
## 1. Prepare the Package
|
|
10
|
+
|
|
11
|
+
1. **Check Package Name**: The name `dbml-extension` might be taken. You should check [npmjs.com](https://www.npmjs.com/) or change `"name"` in `package.json` to something unique (e.g., `@your-username/dbml-formatter` or `dbml-fmt-cli`).
|
|
12
|
+
2. **Update Version**: Ensure `"version"` in `package.json` is correct (e.g., `1.0.0` for initial release).
|
|
13
|
+
3. **Clean Dependencies**: Since we are using `ts-node` at runtime, ensuring `typescript` and `ts-node` are in `"dependencies"` (which they are) is correct for this setup.
|
|
14
|
+
|
|
15
|
+
## 2. Authenticate
|
|
16
|
+
|
|
17
|
+
Run the following command to log in to your NPM account:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
npm login
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## 3. Publish
|
|
24
|
+
|
|
25
|
+
Run the publish command:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
npm publish
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
If you are using a scoped name (like `@user/pkg`), use:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
npm publish --access public
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## 4. Usage After Publishing
|
|
38
|
+
|
|
39
|
+
Once published, anyone can run your tool using `npx`:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
npx [your-package-name] <file.dbml>
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
If you named it `dbml-extension`, they would run `npx dbml-extension <file>`.
|
|
46
|
+
If you want the command to be `npx dbml_formatter`, you should name the package `dbml_formatter` (if available), OR users can run:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
npx -p [your-package-name] dbml_formatter <file>
|
|
50
|
+
```
|
package/package.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@zxsylph/dbml-formatter",
|
|
3
|
+
"version": "1.0.1",
|
|
4
|
+
"description": "",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
8
|
+
},
|
|
9
|
+
"keywords": [],
|
|
10
|
+
"author": "",
|
|
11
|
+
"license": "ISC",
|
|
12
|
+
"bin": {
|
|
13
|
+
"dbml_formatter": "./bin/dbml_formatter.js"
|
|
14
|
+
},
|
|
15
|
+
"dependencies": {
|
|
16
|
+
"@dbml/core": "^5.4.1",
|
|
17
|
+
"@types/node": "^25.0.8",
|
|
18
|
+
"ts-node": "^10.9.2",
|
|
19
|
+
"typescript": "^5.9.3"
|
|
20
|
+
}
|
|
21
|
+
}
|
package/sample.dbml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
// Table definition for users
|
|
2
|
+
Table users {
|
|
3
|
+
id integer [primary key, increment] // Auto-incrementing ID
|
|
4
|
+
username varchar
|
|
5
|
+
email varchar [unique]
|
|
6
|
+
created_at timestamp
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
/*
|
|
10
|
+
Multi-line comment
|
|
11
|
+
about posts
|
|
12
|
+
*/
|
|
13
|
+
Table posts{
|
|
14
|
+
id integer [primary key]
|
|
15
|
+
user_id integer
|
|
16
|
+
title varchar
|
|
17
|
+
body text [note: 'Content of the post']
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Reference link
|
|
21
|
+
Ref: posts.user_id > users.id
|
package/src/cli.ts
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import * as fs from 'fs';
|
|
3
|
+
import * as path from 'path';
|
|
4
|
+
import { format } from './formatter/formatter';
|
|
5
|
+
|
|
6
|
+
const args = process.argv.slice(2);
|
|
7
|
+
|
|
8
|
+
if (args.length === 0) {
|
|
9
|
+
console.error('Usage: dbml-fmt <file>');
|
|
10
|
+
process.exit(1);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
const filePath = args[0];
|
|
14
|
+
const absPath = path.resolve(process.cwd(), filePath);
|
|
15
|
+
|
|
16
|
+
if (!fs.existsSync(absPath)) {
|
|
17
|
+
console.error(`File not found: ${absPath}`);
|
|
18
|
+
process.exit(1);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
try {
|
|
22
|
+
const content = fs.readFileSync(absPath, 'utf-8');
|
|
23
|
+
const formatted = format(content);
|
|
24
|
+
console.log(formatted);
|
|
25
|
+
} catch (error) {
|
|
26
|
+
console.error('Error formatting file:', error);
|
|
27
|
+
process.exit(1);
|
|
28
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { importer, exporter } from '@dbml/core';
|
|
2
|
+
import * as fs from 'fs';
|
|
3
|
+
import * as path from 'path';
|
|
4
|
+
|
|
5
|
+
const samplePath = path.join(__dirname, '../sample.dbml');
|
|
6
|
+
const content = fs.readFileSync(samplePath, 'utf-8');
|
|
7
|
+
|
|
8
|
+
console.log('Original Content:');
|
|
9
|
+
console.log(content);
|
|
10
|
+
|
|
11
|
+
try {
|
|
12
|
+
const imported = importer.import(content, 'dbml');
|
|
13
|
+
console.log('Imported (Intermediate Model):');
|
|
14
|
+
// console.log(imported); // It might be large
|
|
15
|
+
|
|
16
|
+
const exported = exporter.export(imported, 'dbml');
|
|
17
|
+
console.log('Exported (Formatted Content):');
|
|
18
|
+
console.log(exported);
|
|
19
|
+
} catch (err) {
|
|
20
|
+
console.error('Error:', err);
|
|
21
|
+
}
|
|
@@ -0,0 +1,558 @@
|
|
|
1
|
+
import { Token, TokenType, tokenize } from './tokenizer';
|
|
2
|
+
|
|
3
|
+
export interface FormatterOptions {
|
|
4
|
+
indentSize?: number;
|
|
5
|
+
useTabs?: boolean;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export function format(input: string, options: FormatterOptions = {}): string {
|
|
9
|
+
const rawTokens = tokenize(input);
|
|
10
|
+
const indentSize = options.indentSize || 2;
|
|
11
|
+
const indentChar = options.useTabs ? '\t' : ' ';
|
|
12
|
+
const oneIndent = indentChar.repeat(indentSize);
|
|
13
|
+
|
|
14
|
+
// Initial Pass: Identify Block Types and context
|
|
15
|
+
// Actually we can process linearly but when we hit `Table {`, we switch to "buffer mode".
|
|
16
|
+
|
|
17
|
+
let output = '';
|
|
18
|
+
let indentLevel = 0;
|
|
19
|
+
|
|
20
|
+
// Helper to get current indentation string
|
|
21
|
+
const getIndent = () => oneIndent.repeat(Math.max(0, indentLevel));
|
|
22
|
+
|
|
23
|
+
let i = 0;
|
|
24
|
+
while (i < rawTokens.length) {
|
|
25
|
+
let token = rawTokens[i];
|
|
26
|
+
|
|
27
|
+
// --- Lookahead to detect start of Table block ---
|
|
28
|
+
if (token.type === TokenType.Symbol && token.value === '{') {
|
|
29
|
+
// Identify if this is a Table block
|
|
30
|
+
let isTable = false;
|
|
31
|
+
let backIndex = i - 1;
|
|
32
|
+
while(backIndex >= 0 && (rawTokens[backIndex].type === TokenType.Whitespace || rawTokens[backIndex].type === TokenType.Comment)) {
|
|
33
|
+
backIndex--;
|
|
34
|
+
}
|
|
35
|
+
// Determine block keyword (heuristic search back)
|
|
36
|
+
let searchIndex = backIndex;
|
|
37
|
+
while (searchIndex >= 0) {
|
|
38
|
+
const t = rawTokens[searchIndex];
|
|
39
|
+
if (t.type === TokenType.Symbol && (t.value === '}' || t.value === '{')) break;
|
|
40
|
+
if (t.type === TokenType.Word) {
|
|
41
|
+
// Check case-insensitive
|
|
42
|
+
if (t.value.toLowerCase() === 'table') {
|
|
43
|
+
isTable = true;
|
|
44
|
+
break;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
searchIndex--;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (isTable) {
|
|
51
|
+
// BUFFER MODE
|
|
52
|
+
// 1. Flush/print the '{'
|
|
53
|
+
// Should ensure space before '{'
|
|
54
|
+
if (output.length > 0 && !output.endsWith(' ') && !output.endsWith('\n')) {
|
|
55
|
+
output += ' ';
|
|
56
|
+
}
|
|
57
|
+
output += '{';
|
|
58
|
+
output += '\n';
|
|
59
|
+
indentLevel++;
|
|
60
|
+
i++;
|
|
61
|
+
|
|
62
|
+
// 2. Collect tokens inside `{ ... }`
|
|
63
|
+
const buffer: Token[] = [];
|
|
64
|
+
let depth = 1;
|
|
65
|
+
while (i < rawTokens.length) {
|
|
66
|
+
const t = rawTokens[i];
|
|
67
|
+
if (t.type === TokenType.Symbol && t.value === '{') depth++;
|
|
68
|
+
if (t.type === TokenType.Symbol && t.value === '}') {
|
|
69
|
+
depth--;
|
|
70
|
+
if (depth === 0) break; // Found end of table
|
|
71
|
+
}
|
|
72
|
+
buffer.push(t);
|
|
73
|
+
i++;
|
|
74
|
+
}
|
|
75
|
+
// Now `rawTokens[i]` is the closing `}` (or we ran out)
|
|
76
|
+
|
|
77
|
+
// 3. Process the buffer
|
|
78
|
+
// a. Separate into logical "lines" (statements)
|
|
79
|
+
// b. Identify Table Note
|
|
80
|
+
// c. Identify Field Lines
|
|
81
|
+
|
|
82
|
+
let tableNoteTokens: Token[] = [];
|
|
83
|
+
const otherLinesGroups: Token[][] = [];
|
|
84
|
+
|
|
85
|
+
// Split buffer into "line groups".
|
|
86
|
+
// A line group is a set of tokens ending with newline(s).
|
|
87
|
+
let currentGroup: Token[] = [];
|
|
88
|
+
|
|
89
|
+
for (let k = 0; k < buffer.length; k++) {
|
|
90
|
+
const t = buffer[k];
|
|
91
|
+
|
|
92
|
+
// We blindly optimize: split on newline unless inside `nested` brackets.
|
|
93
|
+
currentGroup.push(t);
|
|
94
|
+
|
|
95
|
+
if (t.type === TokenType.Whitespace && t.value.includes('\n')) {
|
|
96
|
+
// Check depth
|
|
97
|
+
let brDepth = 0;
|
|
98
|
+
for (const gt of currentGroup) {
|
|
99
|
+
if (gt.type === TokenType.Symbol && gt.value === '[') brDepth++;
|
|
100
|
+
if (gt.type === TokenType.Symbol && gt.value === ']') brDepth--;
|
|
101
|
+
if (gt.type === TokenType.Symbol && gt.value === '{') brDepth++;
|
|
102
|
+
if (gt.type === TokenType.Symbol && gt.value === '}') brDepth--;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (brDepth === 0) {
|
|
106
|
+
// End of logical line
|
|
107
|
+
// Check if it is a Note
|
|
108
|
+
const meaningful = currentGroup.filter(x => x.type !== TokenType.Whitespace && x.type !== TokenType.Comment);
|
|
109
|
+
|
|
110
|
+
let isNote = false;
|
|
111
|
+
if (meaningful.length >= 3) {
|
|
112
|
+
if (meaningful[0].type === TokenType.Word && meaningful[0].value.toLowerCase() === 'note' &&
|
|
113
|
+
meaningful[1].type === TokenType.Symbol && meaningful[1].value === ':') {
|
|
114
|
+
isNote = true;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (isNote) {
|
|
119
|
+
// This is the table note
|
|
120
|
+
tableNoteTokens = currentGroup;
|
|
121
|
+
} else {
|
|
122
|
+
otherLinesGroups.push(currentGroup);
|
|
123
|
+
}
|
|
124
|
+
currentGroup = [];
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
if (currentGroup.length > 0) {
|
|
129
|
+
// Check remaining
|
|
130
|
+
const meaningful = currentGroup.filter(x => x.type !== TokenType.Whitespace && x.type !== TokenType.Comment);
|
|
131
|
+
|
|
132
|
+
let isNote = false;
|
|
133
|
+
if (meaningful.length >= 3) {
|
|
134
|
+
if (meaningful[0].type === TokenType.Word && meaningful[0].value.toLowerCase() === 'note' &&
|
|
135
|
+
meaningful[1].type === TokenType.Symbol && meaningful[1].value === ':') {
|
|
136
|
+
isNote = true;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (isNote) {
|
|
141
|
+
tableNoteTokens = currentGroup;
|
|
142
|
+
} else {
|
|
143
|
+
otherLinesGroups.push(currentGroup);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// 4. Print Table Note first (if exists)
|
|
148
|
+
if (tableNoteTokens.length > 0) {
|
|
149
|
+
// Ensure tokens end with newline if they don't?
|
|
150
|
+
// Usually they contain the newline token.
|
|
151
|
+
// But if it was the last line of buffer (no newline), we must add one.
|
|
152
|
+
|
|
153
|
+
output += processTokens(tableNoteTokens, indentLevel, indentChar, indentSize, false);
|
|
154
|
+
|
|
155
|
+
// Rule: after table note add one empty line
|
|
156
|
+
// So we want `output` to end with `\n\n`.
|
|
157
|
+
if (output.endsWith('\n\n')) {
|
|
158
|
+
// already good
|
|
159
|
+
} else if (output.endsWith('\n')) {
|
|
160
|
+
output += '\n';
|
|
161
|
+
} else {
|
|
162
|
+
output += '\n\n';
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// 5. Print other lines (Process Fields)
|
|
167
|
+
for (let lgIdx = 0; lgIdx < otherLinesGroups.length; lgIdx++) {
|
|
168
|
+
const lineTokens = otherLinesGroups[lgIdx];
|
|
169
|
+
// Check for Field Settings `[...]` reordering
|
|
170
|
+
// Find `[` ... `]`
|
|
171
|
+
let openBracketIdx = -1;
|
|
172
|
+
let closeBracketIdx = -1;
|
|
173
|
+
|
|
174
|
+
for(let idx=0; idx<lineTokens.length; idx++) {
|
|
175
|
+
if (lineTokens[idx].type === TokenType.Symbol && lineTokens[idx].value === '[') openBracketIdx = idx;
|
|
176
|
+
if (lineTokens[idx].type === TokenType.Symbol && lineTokens[idx].value === ']') closeBracketIdx = idx;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (openBracketIdx !== -1 && closeBracketIdx !== -1 && closeBracketIdx > openBracketIdx) {
|
|
180
|
+
// Extract tokens inside
|
|
181
|
+
const inside = lineTokens.slice(openBracketIdx + 1, closeBracketIdx);
|
|
182
|
+
// Check if multiline
|
|
183
|
+
const settings: Token[][] = [];
|
|
184
|
+
let currentSetting: Token[] = [];
|
|
185
|
+
for(const t of inside) {
|
|
186
|
+
if (t.type === TokenType.Symbol && t.value === ',') {
|
|
187
|
+
settings.push(currentSetting);
|
|
188
|
+
currentSetting = [];
|
|
189
|
+
} else {
|
|
190
|
+
currentSetting.push(t);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
if (currentSetting.length > 0) settings.push(currentSetting);
|
|
194
|
+
|
|
195
|
+
// Identify "note" setting
|
|
196
|
+
let noteIndex = -1;
|
|
197
|
+
for(let s=0; s<settings.length; s++) {
|
|
198
|
+
const sMeaningful = settings[s].filter(x => x.type !== TokenType.Whitespace && x.type !== TokenType.Comment);
|
|
199
|
+
if (sMeaningful.length > 0 && sMeaningful[0].type === TokenType.Word && sMeaningful[0].value.toLowerCase() === 'note') {
|
|
200
|
+
noteIndex = s;
|
|
201
|
+
break;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
if (noteIndex > 0) { // If found and NOT already first
|
|
206
|
+
// Move to front
|
|
207
|
+
const noteSetting = settings.splice(noteIndex, 1)[0];
|
|
208
|
+
settings.unshift(noteSetting);
|
|
209
|
+
|
|
210
|
+
// Reconstruct lineTokens
|
|
211
|
+
const newInside: Token[] = [];
|
|
212
|
+
for(let s=0; s<settings.length; s++) {
|
|
213
|
+
newInside.push(...settings[s]);
|
|
214
|
+
if (s < settings.length - 1) {
|
|
215
|
+
newInside.push({ type: TokenType.Symbol, value: ',', line: 0, column: 0 });
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Replace in lineTokens
|
|
220
|
+
lineTokens.splice(openBracketIdx + 1, closeBracketIdx - openBracketIdx - 1, ...newInside);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// 6. Apply "Quote Data Types" logic
|
|
225
|
+
let wordCount = 0;
|
|
226
|
+
for (const t of lineTokens) {
|
|
227
|
+
// Only count words before `[`?
|
|
228
|
+
if (t.type === TokenType.Symbol && t.value === '[') break;
|
|
229
|
+
if (t.type === TokenType.Word) {
|
|
230
|
+
wordCount++;
|
|
231
|
+
if (wordCount === 2) {
|
|
232
|
+
// Quote this token!
|
|
233
|
+
t.value = `"${t.value}"`;
|
|
234
|
+
// Note: we are modifying the token object directly in the buffer.
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
if (t.type === TokenType.String && wordCount < 2) {
|
|
238
|
+
// Strings count as words/tokens for position?
|
|
239
|
+
// Example `name "varchar"` -> "varchar" IS the string.
|
|
240
|
+
wordCount++;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Ensure previous line enforced newline if missing?
|
|
245
|
+
// processTokens appends tokens. If tokens lack newline, it might merge?
|
|
246
|
+
// `lineTokens` usually comes from `currentGroup` which ended with newline token (except last one).
|
|
247
|
+
// If last group lacks newline, and we print next group...
|
|
248
|
+
|
|
249
|
+
// Check if output buffer needs separation?
|
|
250
|
+
// processTokens logic respects local newlines inside `lineTokens`.
|
|
251
|
+
// But if `lineTokens` (last group) didn't have newline, we append.
|
|
252
|
+
|
|
253
|
+
output += processTokens(lineTokens, indentLevel, indentChar, indentSize, true);
|
|
254
|
+
|
|
255
|
+
// Heuristic: If we just printed a line group, and it didn't generate a newline at end,
|
|
256
|
+
// AND there is another group coming, insert newline?
|
|
257
|
+
// But `processTokens` output might end with proper indent? No.
|
|
258
|
+
|
|
259
|
+
// Let's check `output`.
|
|
260
|
+
if (lgIdx < otherLinesGroups.length - 1) {
|
|
261
|
+
if (!output.endsWith('\n')) {
|
|
262
|
+
// This implies the group didn't end with newline token.
|
|
263
|
+
// Force it.
|
|
264
|
+
output += '\n';
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// End block
|
|
270
|
+
indentLevel--;
|
|
271
|
+
if (!output.endsWith('\n')) output += '\n';
|
|
272
|
+
output += getIndent() + '}';
|
|
273
|
+
|
|
274
|
+
// Rule: after table close } add on empty line
|
|
275
|
+
output += '\n'; // This ensures at least one newline after `}`
|
|
276
|
+
// To ensure "one empty line", we need two newlines total?
|
|
277
|
+
// `output` ends with `}\n`.
|
|
278
|
+
// If we add `\n`, it becomes `}\n\n`.
|
|
279
|
+
output += '\n';
|
|
280
|
+
|
|
281
|
+
if (i < rawTokens.length && rawTokens[i].type === TokenType.Symbol && rawTokens[i].value === '}') {
|
|
282
|
+
i++;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
continue; // Continue outer loop
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Linear Loop Fallback Logic needs to use `processTokens` properly or duplicate spacing logic
|
|
290
|
+
// Actually, the issue is that in the fallback loop:
|
|
291
|
+
// `output += processTokens([token], ...)`
|
|
292
|
+
// `processTokens` checks `localOutput` (which is empty for that call) to decide spacing.
|
|
293
|
+
// It doesn't know about `output`'s tail.
|
|
294
|
+
|
|
295
|
+
// Fix: We must pass `output` context to `processTokens`, or handle spacing BEFORE calling `processTokens`.
|
|
296
|
+
|
|
297
|
+
// Easier Fix: Centralize "appendToken" logic.
|
|
298
|
+
|
|
299
|
+
// Let's rewrite the main loop to handle spacing explicitly before appending.
|
|
300
|
+
|
|
301
|
+
// But `processTokens` handles a list.
|
|
302
|
+
|
|
303
|
+
// Let's modify `processTokens` to accept `previousChar` or `needsSpaceCheck`?
|
|
304
|
+
|
|
305
|
+
// Actually, `processTokens` is used for buffered content (Table block).
|
|
306
|
+
// The main loop handles non-buffered content.
|
|
307
|
+
|
|
308
|
+
// The previous implementation was:
|
|
309
|
+
/*
|
|
310
|
+
if (output.length > 0) {
|
|
311
|
+
let needsSpace = true;
|
|
312
|
+
// ... checks ...
|
|
313
|
+
if (needsSpace) output += ' ';
|
|
314
|
+
}
|
|
315
|
+
output += token.value;
|
|
316
|
+
*/
|
|
317
|
+
|
|
318
|
+
// In the new implementation:
|
|
319
|
+
/*
|
|
320
|
+
const singleTokenList = [token];
|
|
321
|
+
output += processTokens(singleTokenList, ...);
|
|
322
|
+
*/
|
|
323
|
+
|
|
324
|
+
// `processTokens` internal logic:
|
|
325
|
+
// `if (localOutput.length > 0) { check space }`
|
|
326
|
+
// Since `singleTokenList` has 1 item, `localOutput` is empty initially, so NO SPACE is added.
|
|
327
|
+
|
|
328
|
+
// We need to restore the spacing logic in the main loop for the fallback case.
|
|
329
|
+
// AND ensuring `processTokens` handles its internal list correctly.
|
|
330
|
+
|
|
331
|
+
// Let's fix the Main Loop Fallback first.
|
|
332
|
+
|
|
333
|
+
// --- Fallback: Standard Linear Processing for non-Table content ---
|
|
334
|
+
|
|
335
|
+
// Handle whitespace first
|
|
336
|
+
if (token.type === TokenType.Whitespace) {
|
|
337
|
+
const newlines = (token.value.match(/\n/g) || []).length;
|
|
338
|
+
if (newlines > 0) {
|
|
339
|
+
const toPrint = Math.min(newlines, 2);
|
|
340
|
+
if (!output.endsWith('\n')) {
|
|
341
|
+
output += '\n'.repeat(toPrint);
|
|
342
|
+
} else {
|
|
343
|
+
if (toPrint > 1 && !output.endsWith('\n\n')) {
|
|
344
|
+
output += '\n';
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
i++;
|
|
349
|
+
continue;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// Apply spacing based on GLOBAL `output`
|
|
353
|
+
if (output.endsWith('\n')) {
|
|
354
|
+
if (token.value !== '}') {
|
|
355
|
+
output += getIndent();
|
|
356
|
+
}
|
|
357
|
+
} else if (output.length > 0) {
|
|
358
|
+
let needsSpace = true;
|
|
359
|
+
const lastChar = output[output.length - 1];
|
|
360
|
+
if (lastChar === ' ' || lastChar === '\n' || lastChar === '(' || lastChar === '[' || lastChar === '.') {
|
|
361
|
+
needsSpace = false;
|
|
362
|
+
}
|
|
363
|
+
if (token.type === TokenType.Symbol) {
|
|
364
|
+
if (token.value === ',' || token.value === ']' || token.value === ')' || token.value === '.' || token.value === ':') {
|
|
365
|
+
needsSpace = false;
|
|
366
|
+
}
|
|
367
|
+
// Ref: > or < logic?
|
|
368
|
+
}
|
|
369
|
+
if (needsSpace) output += ' ';
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// Output with Keyword Normalization
|
|
373
|
+
switch (token.type) {
|
|
374
|
+
case TokenType.Word:
|
|
375
|
+
// Global Keyword PascalCase
|
|
376
|
+
// table -> Table
|
|
377
|
+
if (token.value.toLowerCase() === 'table') {
|
|
378
|
+
token.value = 'Table';
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// ref -> Ref
|
|
382
|
+
if (token.value.toLowerCase() === 'ref') {
|
|
383
|
+
token.value = 'Ref';
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// note -> Note (if followed by colon?)
|
|
387
|
+
if (token.value.toLowerCase() === 'note') {
|
|
388
|
+
// Check next token for `:`
|
|
389
|
+
let nextIdx = i + 1;
|
|
390
|
+
while(nextIdx < rawTokens.length && (rawTokens[nextIdx].type === TokenType.Whitespace || rawTokens[nextIdx].type === TokenType.Comment)) {
|
|
391
|
+
nextIdx++;
|
|
392
|
+
}
|
|
393
|
+
if (nextIdx < rawTokens.length && rawTokens[nextIdx].type === TokenType.Symbol && rawTokens[nextIdx].value === ':') {
|
|
394
|
+
token.value = 'Note';
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
output += token.value;
|
|
399
|
+
break;
|
|
400
|
+
|
|
401
|
+
case TokenType.String:
|
|
402
|
+
let val = token.value;
|
|
403
|
+
if (val.startsWith("'") && !val.startsWith("'''")) {
|
|
404
|
+
const content = val.slice(1, -1);
|
|
405
|
+
const escaped = content.replace(/"/g, '\\"');
|
|
406
|
+
val = `"${escaped}"`;
|
|
407
|
+
}
|
|
408
|
+
output += val;
|
|
409
|
+
break;
|
|
410
|
+
default:
|
|
411
|
+
output += token.value;
|
|
412
|
+
break;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
i++;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
return output.trim() + '\n';
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// ... helper functions ...
|
|
422
|
+
|
|
423
|
+
function processTokens(
|
|
424
|
+
tokens: Token[],
|
|
425
|
+
baseIndentLevel: number,
|
|
426
|
+
indentChar: string,
|
|
427
|
+
indentSize: number,
|
|
428
|
+
isInsideTable: boolean
|
|
429
|
+
): string {
|
|
430
|
+
|
|
431
|
+
let localOutput = '';
|
|
432
|
+
let currentIndentLevel = baseIndentLevel;
|
|
433
|
+
const oneIndent = indentChar.repeat(indentSize);
|
|
434
|
+
const getLocalIndent = () => oneIndent.repeat(Math.max(0, currentIndentLevel));
|
|
435
|
+
|
|
436
|
+
// ... multiline stack and checkArrayMultiline ...
|
|
437
|
+
const checkArrayMultiline = (startIdx: number): boolean => {
|
|
438
|
+
let depth = 1;
|
|
439
|
+
let hasComma = false;
|
|
440
|
+
for (let k = startIdx + 1; k < tokens.length; k++) {
|
|
441
|
+
if (tokens[k].type === TokenType.Symbol && tokens[k].value === '[') depth++;
|
|
442
|
+
if (tokens[k].type === TokenType.Symbol && tokens[k].value === ']') depth--;
|
|
443
|
+
if (depth === 1 && tokens[k].type === TokenType.Symbol && tokens[k].value === ',') hasComma = true;
|
|
444
|
+
if (depth === 0) return hasComma;
|
|
445
|
+
}
|
|
446
|
+
return false;
|
|
447
|
+
};
|
|
448
|
+
|
|
449
|
+
const multilineArrayStack: boolean[] = [];
|
|
450
|
+
|
|
451
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
452
|
+
const token = tokens[i];
|
|
453
|
+
|
|
454
|
+
// Whitespace handling
|
|
455
|
+
if (token.type === TokenType.Whitespace) {
|
|
456
|
+
const newlines = (token.value.match(/\n/g) || []).length;
|
|
457
|
+
if (newlines > 0) {
|
|
458
|
+
const toPrint = Math.min(newlines, 2);
|
|
459
|
+
if (!localOutput.endsWith('\n')) {
|
|
460
|
+
localOutput += '\n'.repeat(toPrint);
|
|
461
|
+
} else {
|
|
462
|
+
if (toPrint > 1 && !localOutput.endsWith('\n\n')) {
|
|
463
|
+
localOutput += '\n';
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
continue;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
// Corrected Spacing/Indent Logic for processTokens:
|
|
471
|
+
if (localOutput.length === 0 || localOutput.endsWith('\n')) {
|
|
472
|
+
if (token.value !== '}') {
|
|
473
|
+
localOutput += getLocalIndent();
|
|
474
|
+
}
|
|
475
|
+
} else {
|
|
476
|
+
// Not start of line
|
|
477
|
+
let needsSpace = true;
|
|
478
|
+
const lastChar = localOutput[localOutput.length - 1];
|
|
479
|
+
if (lastChar === ' ' || lastChar === '\n' || lastChar === '(' || lastChar === '[' || lastChar === '.') {
|
|
480
|
+
needsSpace = false;
|
|
481
|
+
}
|
|
482
|
+
if (token.type === TokenType.Symbol) {
|
|
483
|
+
if (token.value === ',' || token.value === ']' || token.value === ')' || token.value === '.' || token.value === ':') {
|
|
484
|
+
needsSpace = false;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
if (needsSpace) localOutput += ' ';
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
switch (token.type) {
|
|
491
|
+
case TokenType.Symbol:
|
|
492
|
+
if (token.value === '{') {
|
|
493
|
+
localOutput += '{\n';
|
|
494
|
+
currentIndentLevel++;
|
|
495
|
+
} else if (token.value === '}') {
|
|
496
|
+
if (!localOutput.endsWith('\n')) localOutput += '\n';
|
|
497
|
+
currentIndentLevel--;
|
|
498
|
+
localOutput += getLocalIndent() + '}';
|
|
499
|
+
} else if (token.value === '[') {
|
|
500
|
+
const isMultiline = checkArrayMultiline(i);
|
|
501
|
+
multilineArrayStack.push(isMultiline);
|
|
502
|
+
localOutput += '[';
|
|
503
|
+
if (isMultiline) {
|
|
504
|
+
localOutput += '\n';
|
|
505
|
+
currentIndentLevel++;
|
|
506
|
+
}
|
|
507
|
+
} else if (token.value === ']') {
|
|
508
|
+
const isMultiline = multilineArrayStack.pop();
|
|
509
|
+
if (isMultiline) {
|
|
510
|
+
if (!localOutput.endsWith('\n')) localOutput += '\n';
|
|
511
|
+
currentIndentLevel--;
|
|
512
|
+
if (localOutput.endsWith('\n')) localOutput += getLocalIndent();
|
|
513
|
+
}
|
|
514
|
+
localOutput += ']';
|
|
515
|
+
} else if (token.value === ',') {
|
|
516
|
+
localOutput += ',';
|
|
517
|
+
const currentMultiline = multilineArrayStack.length > 0 && multilineArrayStack[multilineArrayStack.length - 1];
|
|
518
|
+
if (currentMultiline) localOutput += '\n';
|
|
519
|
+
} else {
|
|
520
|
+
localOutput += token.value;
|
|
521
|
+
}
|
|
522
|
+
break;
|
|
523
|
+
|
|
524
|
+
case TokenType.Word:
|
|
525
|
+
// Handle keyword PascalCase in buffer
|
|
526
|
+
if (token.value.toLowerCase() === 'table') token.value = 'Table';
|
|
527
|
+
if (token.value.toLowerCase() === 'ref') token.value = 'Ref';
|
|
528
|
+
if (token.value.toLowerCase() === 'note') {
|
|
529
|
+
// Peek locally inside tokens list
|
|
530
|
+
let nextIdx = i + 1;
|
|
531
|
+
while(nextIdx < tokens.length && (tokens[nextIdx].type === TokenType.Whitespace || tokens[nextIdx].type === TokenType.Comment)) {
|
|
532
|
+
nextIdx++;
|
|
533
|
+
}
|
|
534
|
+
if (nextIdx < tokens.length && tokens[nextIdx].type === TokenType.Symbol && tokens[nextIdx].value === ':') {
|
|
535
|
+
token.value = 'Note';
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
localOutput += token.value;
|
|
539
|
+
break;
|
|
540
|
+
|
|
541
|
+
case TokenType.String:
|
|
542
|
+
let val = token.value;
|
|
543
|
+
if (val.startsWith("'") && !val.startsWith("'''")) {
|
|
544
|
+
const content = val.slice(1, -1);
|
|
545
|
+
const escaped = content.replace(/"/g, '\\"');
|
|
546
|
+
val = `"${escaped}"`;
|
|
547
|
+
}
|
|
548
|
+
localOutput += val;
|
|
549
|
+
break;
|
|
550
|
+
|
|
551
|
+
default:
|
|
552
|
+
localOutput += token.value;
|
|
553
|
+
break;
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
return localOutput;
|
|
558
|
+
}
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
export enum TokenType {
|
|
2
|
+
Whitespace,
|
|
3
|
+
Comment,
|
|
4
|
+
String,
|
|
5
|
+
Symbol,
|
|
6
|
+
Word,
|
|
7
|
+
Unknown
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface Token {
|
|
11
|
+
type: TokenType;
|
|
12
|
+
value: string;
|
|
13
|
+
line: number;
|
|
14
|
+
column: number;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function tokenize(input: string): Token[] {
|
|
18
|
+
const tokens: Token[] = [];
|
|
19
|
+
let current = 0;
|
|
20
|
+
let line = 1;
|
|
21
|
+
let column = 1;
|
|
22
|
+
|
|
23
|
+
while (current < input.length) {
|
|
24
|
+
let char = input[current];
|
|
25
|
+
|
|
26
|
+
// Handle Whitespace
|
|
27
|
+
if (/\s/.test(char)) {
|
|
28
|
+
let value = '';
|
|
29
|
+
const startLine = line;
|
|
30
|
+
const startColumn = column;
|
|
31
|
+
|
|
32
|
+
while (current < input.length && /\s/.test(input[current])) {
|
|
33
|
+
if (input[current] === '\n') {
|
|
34
|
+
line++;
|
|
35
|
+
column = 1;
|
|
36
|
+
} else {
|
|
37
|
+
column++;
|
|
38
|
+
}
|
|
39
|
+
value += input[current];
|
|
40
|
+
current++;
|
|
41
|
+
}
|
|
42
|
+
tokens.push({ type: TokenType.Whitespace, value, line: startLine, column: startColumn });
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Handle Comments
|
|
47
|
+
if (char === '/' && input[current + 1] === '/') {
|
|
48
|
+
let value = '';
|
|
49
|
+
const startLine = line;
|
|
50
|
+
const startColumn = column;
|
|
51
|
+
|
|
52
|
+
while (current < input.length && input[current] !== '\n') {
|
|
53
|
+
value += input[current];
|
|
54
|
+
current++;
|
|
55
|
+
column++;
|
|
56
|
+
}
|
|
57
|
+
tokens.push({ type: TokenType.Comment, value, line: startLine, column: startColumn });
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (char === '/' && input[current + 1] === '*') {
|
|
62
|
+
let value = '';
|
|
63
|
+
const startLine = line;
|
|
64
|
+
const startColumn = column;
|
|
65
|
+
|
|
66
|
+
value += '/*';
|
|
67
|
+
current += 2;
|
|
68
|
+
column += 2;
|
|
69
|
+
|
|
70
|
+
while (current < input.length) {
|
|
71
|
+
if (input[current] === '*' && input[current + 1] === '/') {
|
|
72
|
+
value += '*/';
|
|
73
|
+
current += 2;
|
|
74
|
+
column += 2;
|
|
75
|
+
break;
|
|
76
|
+
}
|
|
77
|
+
if (input[current] === '\n') {
|
|
78
|
+
line++;
|
|
79
|
+
column = 1;
|
|
80
|
+
} else {
|
|
81
|
+
column++;
|
|
82
|
+
}
|
|
83
|
+
value += input[current];
|
|
84
|
+
current++;
|
|
85
|
+
}
|
|
86
|
+
tokens.push({ type: TokenType.Comment, value, line: startLine, column: startColumn });
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Handle Strings
|
|
91
|
+
// Triple quote '''
|
|
92
|
+
if (char === '\'' && input[current + 1] === '\'' && input[current + 2] === '\'') {
|
|
93
|
+
let value = "'''";
|
|
94
|
+
const startLine = line;
|
|
95
|
+
const startColumn = column;
|
|
96
|
+
current += 3;
|
|
97
|
+
column += 3;
|
|
98
|
+
|
|
99
|
+
while (current < input.length) {
|
|
100
|
+
if (input[current] === '\'' && input[current + 1] === '\'' && input[current + 2] === '\'') {
|
|
101
|
+
value += "'''";
|
|
102
|
+
current += 3;
|
|
103
|
+
column += 3;
|
|
104
|
+
break;
|
|
105
|
+
}
|
|
106
|
+
if (input[current] === '\n') {
|
|
107
|
+
line++;
|
|
108
|
+
column = 1;
|
|
109
|
+
} else {
|
|
110
|
+
column++;
|
|
111
|
+
}
|
|
112
|
+
value += input[current];
|
|
113
|
+
current++;
|
|
114
|
+
}
|
|
115
|
+
tokens.push({ type: TokenType.String, value, line: startLine, column: startColumn });
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Single quote '
|
|
120
|
+
if (char === '\'') {
|
|
121
|
+
let value = "'";
|
|
122
|
+
const startLine = line;
|
|
123
|
+
const startColumn = column;
|
|
124
|
+
current++;
|
|
125
|
+
column++;
|
|
126
|
+
|
|
127
|
+
while (current < input.length) {
|
|
128
|
+
// Escape sequence \' handling could be added here if needed, but for now simple check
|
|
129
|
+
if (input[current] === '\\' && input[current + 1] === '\'') {
|
|
130
|
+
value += "\\'";
|
|
131
|
+
current += 2;
|
|
132
|
+
column += 2;
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (input[current] === '\'') {
|
|
137
|
+
value += "'";
|
|
138
|
+
current++;
|
|
139
|
+
column++;
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
if (input[current] === '\n') {
|
|
143
|
+
line++;
|
|
144
|
+
column = 1;
|
|
145
|
+
} else {
|
|
146
|
+
column++;
|
|
147
|
+
}
|
|
148
|
+
value += input[current];
|
|
149
|
+
current++;
|
|
150
|
+
}
|
|
151
|
+
tokens.push({ type: TokenType.String, value, line: startLine, column: startColumn });
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Double quote "
|
|
156
|
+
if (char === '"') {
|
|
157
|
+
let value = '"';
|
|
158
|
+
const startLine = line;
|
|
159
|
+
const startColumn = column;
|
|
160
|
+
current++;
|
|
161
|
+
column++;
|
|
162
|
+
|
|
163
|
+
while (current < input.length) {
|
|
164
|
+
if (input[current] === '\\' && input[current + 1] === '"') {
|
|
165
|
+
value += '\\"';
|
|
166
|
+
current += 2;
|
|
167
|
+
column += 2;
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (input[current] === '"') {
|
|
172
|
+
value += '"';
|
|
173
|
+
current++;
|
|
174
|
+
column++;
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
if (input[current] === '\n') {
|
|
178
|
+
line++;
|
|
179
|
+
column = 1;
|
|
180
|
+
} else {
|
|
181
|
+
column++;
|
|
182
|
+
}
|
|
183
|
+
value += input[current];
|
|
184
|
+
current++;
|
|
185
|
+
}
|
|
186
|
+
tokens.push({ type: TokenType.String, value, line: startLine, column: startColumn });
|
|
187
|
+
continue;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
// Handle Symbols
|
|
192
|
+
if (/[\{\}\[\]\(\),:>.\-<\\]/.test(char)) {
|
|
193
|
+
tokens.push({ type: TokenType.Symbol, value: char, line, column });
|
|
194
|
+
current++;
|
|
195
|
+
column++;
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Handle Words (Identifiers, Keywords, Numbers, etc.)
|
|
200
|
+
// We accept mostly anything that isn't whitespace or special symbols
|
|
201
|
+
if (/[a-zA-Z0-9_]/.test(char)) {
|
|
202
|
+
let value = '';
|
|
203
|
+
const startLine = line;
|
|
204
|
+
const startColumn = column;
|
|
205
|
+
|
|
206
|
+
while (current < input.length && /[a-zA-Z0-9_]/.test(input[current])) {
|
|
207
|
+
value += input[current];
|
|
208
|
+
current++;
|
|
209
|
+
column++;
|
|
210
|
+
}
|
|
211
|
+
tokens.push({ type: TokenType.Word, value, line: startLine, column: startColumn });
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Fallback or Unknown
|
|
216
|
+
tokens.push({ type: TokenType.Unknown, value: char, line, column });
|
|
217
|
+
current++;
|
|
218
|
+
column++;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return tokens;
|
|
222
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { format } from './formatter/formatter';
|
|
2
|
+
import * as fs from 'fs';
|
|
3
|
+
import * as path from 'path';
|
|
4
|
+
|
|
5
|
+
const samplePath = path.join(__dirname, '../sample.dbml');
|
|
6
|
+
const content = fs.readFileSync(samplePath, 'utf-8');
|
|
7
|
+
|
|
8
|
+
const formatted = format(content);
|
|
9
|
+
|
|
10
|
+
console.log('--- Original ---');
|
|
11
|
+
console.log(content);
|
|
12
|
+
console.log('--- Formatted ---');
|
|
13
|
+
console.log(formatted);
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { tokenize, TokenType } from './formatter/tokenizer';
|
|
2
|
+
import * as fs from 'fs';
|
|
3
|
+
import * as path from 'path';
|
|
4
|
+
|
|
5
|
+
const samplePath = path.join(__dirname, '../sample.dbml');
|
|
6
|
+
const content = fs.readFileSync(samplePath, 'utf-8');
|
|
7
|
+
|
|
8
|
+
const tokens = tokenize(content);
|
|
9
|
+
|
|
10
|
+
console.log('Tokens:');
|
|
11
|
+
tokens.forEach(t => {
|
|
12
|
+
let typeName = TokenType[t.type];
|
|
13
|
+
console.log(`[${typeName}] ${JSON.stringify(t.value)}`);
|
|
14
|
+
});
|