pgsql-parse 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +107 -0
- package/deparse.d.ts +31 -0
- package/deparse.js +67 -0
- package/esm/deparse.js +67 -0
- package/esm/index.js +26 -0
- package/esm/parse.js +134 -0
- package/esm/scanner.js +94 -0
- package/esm/types.js +18 -0
- package/index.d.ts +20 -0
- package/index.js +41 -0
- package/package.json +51 -0
- package/parse.d.ts +19 -0
- package/parse.js +138 -0
- package/scanner.d.ts +45 -0
- package/scanner.js +97 -0
- package/types.d.ts +59 -0
- package/types.js +23 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2020 Dan Lynch <pyramation@gmail.com>
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# pgsql-parse
|
|
2
|
+
|
|
3
|
+
<p align="center" width="100%">
|
|
4
|
+
<img height="250" src="https://raw.githubusercontent.com/constructive-io/constructive/refs/heads/main/assets/outline-logo.svg" />
|
|
5
|
+
</p>
|
|
6
|
+
|
|
7
|
+
Comment and whitespace preserving PostgreSQL parser. A drop-in enhancement for `pgsql-parser` that preserves SQL comments (`--` line and `/* */` block) and vertical whitespace (blank lines) through parse-deparse round trips.
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```sh
|
|
12
|
+
npm install pgsql-parse
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Features
|
|
16
|
+
|
|
17
|
+
* **Comment Preservation** -- Retains `--` line comments and `/* */` block comments through parse-deparse cycles
|
|
18
|
+
* **Vertical Whitespace** -- Preserves blank lines between statements for readable output
|
|
19
|
+
* **Idempotent Round-Trips** -- `parse -> deparse -> parse -> deparse` produces identical output
|
|
20
|
+
* **Drop-in API** -- Re-exports `parse`, `parseSync`, `deparse`, `deparseSync`, `loadModule` from `pgsql-parser`
|
|
21
|
+
* **Synthetic AST Nodes** -- `RawComment` and `RawWhitespace` nodes interleaved into the `stmts` array by byte position
|
|
22
|
+
|
|
23
|
+
## How It Works
|
|
24
|
+
|
|
25
|
+
1. A pure TypeScript scanner extracts comment and whitespace tokens with byte positions from the raw SQL text
|
|
26
|
+
2. Enhanced `parse`/`parseSync` call the standard `libpg-query` parser, then interleave synthetic `RawComment` and `RawWhitespace` nodes into the `stmts` array based on byte position
|
|
27
|
+
3. `deparseEnhanced()` dispatches on node type -- real `RawStmt` entries go through the standard deparser, while synthetic nodes emit their comment text or blank lines directly
|
|
28
|
+
|
|
29
|
+
## API
|
|
30
|
+
|
|
31
|
+
### Enhanced Parse
|
|
32
|
+
|
|
33
|
+
```typescript
|
|
34
|
+
import { parse, parseSync, deparseEnhanced, loadModule } from 'pgsql-parse';
|
|
35
|
+
|
|
36
|
+
// Async (handles initialization automatically)
|
|
37
|
+
const result = await parse(`
|
|
38
|
+
-- Create users table
|
|
39
|
+
CREATE TABLE users (id serial PRIMARY KEY);
|
|
40
|
+
|
|
41
|
+
-- Create posts table
|
|
42
|
+
CREATE TABLE posts (id serial PRIMARY KEY);
|
|
43
|
+
`);
|
|
44
|
+
|
|
45
|
+
// result.stmts contains RawComment, RawWhitespace, and RawStmt nodes
|
|
46
|
+
const sql = deparseEnhanced(result);
|
|
47
|
+
// Output preserves comments and blank lines
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Sync Methods
|
|
51
|
+
|
|
52
|
+
```typescript
|
|
53
|
+
import { parseSync, deparseEnhanced, loadModule } from 'pgsql-parse';
|
|
54
|
+
|
|
55
|
+
await loadModule();
|
|
56
|
+
|
|
57
|
+
const result = parseSync('-- comment\nSELECT 1;');
|
|
58
|
+
const sql = deparseEnhanced(result);
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Type Guards
|
|
62
|
+
|
|
63
|
+
```typescript
|
|
64
|
+
import { isRawComment, isRawWhitespace, isRawStmt } from 'pgsql-parse';
|
|
65
|
+
|
|
66
|
+
for (const stmt of result.stmts) {
|
|
67
|
+
if (isRawComment(stmt)) {
|
|
68
|
+
console.log('Comment:', stmt.RawComment.text);
|
|
69
|
+
} else if (isRawWhitespace(stmt)) {
|
|
70
|
+
console.log('Blank lines:', stmt.RawWhitespace.lines);
|
|
71
|
+
} else if (isRawStmt(stmt)) {
|
|
72
|
+
console.log('Statement:', stmt);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Credits
|
|
78
|
+
|
|
79
|
+
Built on the excellent work of several contributors:
|
|
80
|
+
|
|
81
|
+
* **[Dan Lynch](https://github.com/pyramation)** -- official maintainer since 2018 and architect of the current implementation
|
|
82
|
+
* **[Lukas Fittl](https://github.com/lfittl)** for [libpg_query](https://github.com/pganalyze/libpg_query) -- the core PostgreSQL parser that powers this project
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
**🛠Built by the [Constructive](https://constructive.io) team — creators of modular Postgres tooling for secure, composable backends. If you like our work, contribute on [GitHub](https://github.com/constructive-io).**
|
|
87
|
+
|
|
88
|
+
## Related
|
|
89
|
+
|
|
90
|
+
* [pgpm](https://pgpm.dev): A Postgres Package Manager that brings modular development to PostgreSQL with reusable packages, deterministic migrations, recursive dependency resolution, and tag-aware versioning.
|
|
91
|
+
* [pgsql-test](https://www.npmjs.com/package/pgsql-test): Instant, isolated PostgreSQL databases for each test with automatic transaction rollbacks, context switching, and clean seeding for fast, reliable database testing.
|
|
92
|
+
* [pgsql-seed](https://www.npmjs.com/package/pgsql-seed): PostgreSQL seeding utilities for CSV, JSON, SQL data loading, and pgpm deployment.
|
|
93
|
+
* [pgsql-parser](https://www.npmjs.com/package/pgsql-parser): The real PostgreSQL parser for Node.js, providing symmetric parsing and deparsing of SQL statements with actual PostgreSQL parser integration.
|
|
94
|
+
* [pgsql-deparser](https://www.npmjs.com/package/pgsql-deparser): A streamlined tool designed for converting PostgreSQL ASTs back into SQL queries, focusing solely on deparser functionality to complement `pgsql-parser`.
|
|
95
|
+
* [@pgsql/parser](https://www.npmjs.com/package/@pgsql/parser): Multi-version PostgreSQL parser with dynamic version selection at runtime, supporting PostgreSQL 15, 16, and 17 in a single package.
|
|
96
|
+
* [@pgsql/types](https://www.npmjs.com/package/@pgsql/types): Offers TypeScript type definitions for PostgreSQL AST nodes, facilitating type-safe construction, analysis, and manipulation of ASTs.
|
|
97
|
+
* [@pgsql/enums](https://www.npmjs.com/package/@pgsql/enums): Provides TypeScript enum definitions for PostgreSQL constants, enabling type-safe usage of PostgreSQL enums and constants in your applications.
|
|
98
|
+
* [@pgsql/utils](https://www.npmjs.com/package/@pgsql/utils): A comprehensive utility library for PostgreSQL, offering type-safe AST node creation and enum value conversions, simplifying the construction and manipulation of PostgreSQL ASTs.
|
|
99
|
+
* [@pgsql/traverse](https://www.npmjs.com/package/@pgsql/traverse): PostgreSQL AST traversal utilities for pgsql-parser, providing a visitor pattern for traversing PostgreSQL Abstract Syntax Tree nodes, similar to Babel's traverse functionality but specifically designed for PostgreSQL AST structures.
|
|
100
|
+
* [pg-proto-parser](https://www.npmjs.com/package/pg-proto-parser): A TypeScript tool that parses PostgreSQL Protocol Buffers definitions to generate TypeScript interfaces, utility functions, and JSON mappings for enums.
|
|
101
|
+
* [libpg-query](https://github.com/constructive-io/libpg-query-node): The real PostgreSQL parser exposed for Node.js, used primarily in `pgsql-parser` for parsing and deparsing SQL queries.
|
|
102
|
+
|
|
103
|
+
## Disclaimer
|
|
104
|
+
|
|
105
|
+
AS DESCRIBED IN THE LICENSES, THE SOFTWARE IS PROVIDED "AS IS", AT YOUR OWN RISK, AND WITHOUT WARRANTIES OF ANY KIND.
|
|
106
|
+
|
|
107
|
+
No developer or entity involved in creating Software will be liable for any claims or damages whatsoever associated with your use, inability to use, or your interaction with other users of the Software code or Software CLI, including any direct, indirect, incidental, special, exemplary, punitive or consequential damages, or loss of profits, cryptocurrencies, tokens, or anything else of value.
|
package/deparse.d.ts
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enhanced deparser that handles synthetic RawComment and RawWhitespace nodes
|
|
3
|
+
* in addition to all standard PostgreSQL AST nodes.
|
|
4
|
+
*
|
|
5
|
+
* This does NOT modify the upstream Deparser class. Instead, it processes
|
|
6
|
+
* the EnhancedParseResult's stmts array and delegates real statements
|
|
7
|
+
* to the standard deparser.
|
|
8
|
+
*/
|
|
9
|
+
import { Deparser, DeparserOptions } from 'pgsql-deparser';
|
|
10
|
+
import { EnhancedParseResult } from './types';
|
|
11
|
+
/**
|
|
12
|
+
* Deparse an EnhancedParseResult back to SQL, preserving comments
|
|
13
|
+
* and vertical whitespace.
|
|
14
|
+
*
|
|
15
|
+
* The output strategy:
|
|
16
|
+
* - Each real statement gets a newline separator from the previous element
|
|
17
|
+
* - RawComment nodes emit their comment text
|
|
18
|
+
* - RawWhitespace nodes emit blank lines (the node itself IS the separator)
|
|
19
|
+
* - Adjacent statements/comments without a RawWhitespace between them
|
|
20
|
+
* get a single newline separator
|
|
21
|
+
*/
|
|
22
|
+
export declare function deparseEnhanced(result: EnhancedParseResult, opts?: DeparserOptions): string;
|
|
23
|
+
/**
|
|
24
|
+
* Sync version of deparseEnhanced.
|
|
25
|
+
*/
|
|
26
|
+
export declare const deparseEnhancedSync: typeof deparseEnhanced;
|
|
27
|
+
/**
|
|
28
|
+
* Standard deparse — re-exported from pgsql-deparser for convenience.
|
|
29
|
+
* Use this when you have a standard ParseResult without synthetic nodes.
|
|
30
|
+
*/
|
|
31
|
+
export { Deparser, DeparserOptions };
|
package/deparse.js
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Enhanced deparser that handles synthetic RawComment and RawWhitespace nodes
|
|
4
|
+
* in addition to all standard PostgreSQL AST nodes.
|
|
5
|
+
*
|
|
6
|
+
* This does NOT modify the upstream Deparser class. Instead, it processes
|
|
7
|
+
* the EnhancedParseResult's stmts array and delegates real statements
|
|
8
|
+
* to the standard deparser.
|
|
9
|
+
*/
|
|
10
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
+
exports.Deparser = exports.deparseEnhancedSync = void 0;
|
|
12
|
+
exports.deparseEnhanced = deparseEnhanced;
|
|
13
|
+
const pgsql_deparser_1 = require("pgsql-deparser");
|
|
14
|
+
Object.defineProperty(exports, "Deparser", { enumerable: true, get: function () { return pgsql_deparser_1.Deparser; } });
|
|
15
|
+
const types_1 = require("./types");
|
|
16
|
+
/**
|
|
17
|
+
* Deparse a single RawComment node back to SQL comment text.
|
|
18
|
+
*/
|
|
19
|
+
function deparseComment(comment) {
|
|
20
|
+
return `--${comment.text}`;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Deparse an EnhancedParseResult back to SQL, preserving comments
|
|
24
|
+
* and vertical whitespace.
|
|
25
|
+
*
|
|
26
|
+
* The output strategy:
|
|
27
|
+
* - Each real statement gets a newline separator from the previous element
|
|
28
|
+
* - RawComment nodes emit their comment text
|
|
29
|
+
* - RawWhitespace nodes emit blank lines (the node itself IS the separator)
|
|
30
|
+
* - Adjacent statements/comments without a RawWhitespace between them
|
|
31
|
+
* get a single newline separator
|
|
32
|
+
*/
|
|
33
|
+
function deparseEnhanced(result, opts = {}) {
|
|
34
|
+
const newline = opts.newline ?? '\n';
|
|
35
|
+
const lines = [];
|
|
36
|
+
for (const stmt of result.stmts) {
|
|
37
|
+
if ((0, types_1.isRawComment)(stmt)) {
|
|
38
|
+
const commentText = deparseComment(stmt.RawComment);
|
|
39
|
+
if (stmt.RawComment.trailing && lines.length > 0) {
|
|
40
|
+
// Trailing comment: append to the previous line
|
|
41
|
+
lines[lines.length - 1] += ' ' + commentText;
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
lines.push(commentText);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
else if ((0, types_1.isRawWhitespace)(stmt)) {
|
|
48
|
+
// Each blank line in the original source becomes an empty line in output.
|
|
49
|
+
// The whitespace node represents N blank lines between content.
|
|
50
|
+
for (let i = 0; i < stmt.RawWhitespace.lines; i++) {
|
|
51
|
+
lines.push('');
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
else if ((0, types_1.isRawStmt)(stmt)) {
|
|
55
|
+
// Wrap in a minimal ParseResult so the standard deparser handles it
|
|
56
|
+
const sql = pgsql_deparser_1.Deparser.deparse({ version: 0, stmts: [stmt] }, opts);
|
|
57
|
+
if (sql) {
|
|
58
|
+
lines.push(sql);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return lines.join(newline);
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Sync version of deparseEnhanced.
|
|
66
|
+
*/
|
|
67
|
+
exports.deparseEnhancedSync = deparseEnhanced;
|
package/esm/deparse.js
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enhanced deparser that handles synthetic RawComment and RawWhitespace nodes
|
|
3
|
+
* in addition to all standard PostgreSQL AST nodes.
|
|
4
|
+
*
|
|
5
|
+
* This does NOT modify the upstream Deparser class. Instead, it processes
|
|
6
|
+
* the EnhancedParseResult's stmts array and delegates real statements
|
|
7
|
+
* to the standard deparser.
|
|
8
|
+
*/
|
|
9
|
+
import { Deparser } from 'pgsql-deparser';
|
|
10
|
+
import { isRawComment, isRawWhitespace, isRawStmt, } from './types';
|
|
11
|
+
/**
|
|
12
|
+
* Deparse a single RawComment node back to SQL comment text.
|
|
13
|
+
*/
|
|
14
|
+
function deparseComment(comment) {
|
|
15
|
+
return `--${comment.text}`;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Deparse an EnhancedParseResult back to SQL, preserving comments
|
|
19
|
+
* and vertical whitespace.
|
|
20
|
+
*
|
|
21
|
+
* The output strategy:
|
|
22
|
+
* - Each real statement gets a newline separator from the previous element
|
|
23
|
+
* - RawComment nodes emit their comment text
|
|
24
|
+
* - RawWhitespace nodes emit blank lines (the node itself IS the separator)
|
|
25
|
+
* - Adjacent statements/comments without a RawWhitespace between them
|
|
26
|
+
* get a single newline separator
|
|
27
|
+
*/
|
|
28
|
+
export function deparseEnhanced(result, opts = {}) {
|
|
29
|
+
const newline = opts.newline ?? '\n';
|
|
30
|
+
const lines = [];
|
|
31
|
+
for (const stmt of result.stmts) {
|
|
32
|
+
if (isRawComment(stmt)) {
|
|
33
|
+
const commentText = deparseComment(stmt.RawComment);
|
|
34
|
+
if (stmt.RawComment.trailing && lines.length > 0) {
|
|
35
|
+
// Trailing comment: append to the previous line
|
|
36
|
+
lines[lines.length - 1] += ' ' + commentText;
|
|
37
|
+
}
|
|
38
|
+
else {
|
|
39
|
+
lines.push(commentText);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
else if (isRawWhitespace(stmt)) {
|
|
43
|
+
// Each blank line in the original source becomes an empty line in output.
|
|
44
|
+
// The whitespace node represents N blank lines between content.
|
|
45
|
+
for (let i = 0; i < stmt.RawWhitespace.lines; i++) {
|
|
46
|
+
lines.push('');
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
else if (isRawStmt(stmt)) {
|
|
50
|
+
// Wrap in a minimal ParseResult so the standard deparser handles it
|
|
51
|
+
const sql = Deparser.deparse({ version: 0, stmts: [stmt] }, opts);
|
|
52
|
+
if (sql) {
|
|
53
|
+
lines.push(sql);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return lines.join(newline);
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Sync version of deparseEnhanced.
|
|
61
|
+
*/
|
|
62
|
+
export const deparseEnhancedSync = deparseEnhanced;
|
|
63
|
+
/**
|
|
64
|
+
* Standard deparse — re-exported from pgsql-deparser for convenience.
|
|
65
|
+
* Use this when you have a standard ParseResult without synthetic nodes.
|
|
66
|
+
*/
|
|
67
|
+
export { Deparser };
|
package/esm/index.js
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pgsql-parse — Comment and whitespace preserving PostgreSQL parser.
|
|
3
|
+
*
|
|
4
|
+
* Drop-in enhancement over pgsql-parser that preserves SQL -- line
|
|
5
|
+
* comments and vertical whitespace (blank lines) through
|
|
6
|
+
* parse→deparse round trips.
|
|
7
|
+
*
|
|
8
|
+
* Synthetic AST nodes:
|
|
9
|
+
* - RawComment: represents a SQL comment
|
|
10
|
+
* - RawWhitespace: represents significant vertical whitespace
|
|
11
|
+
*
|
|
12
|
+
* These nodes are interleaved with real RawStmt entries in the
|
|
13
|
+
* stmts array, ordered by byte position in the original source.
|
|
14
|
+
*/
|
|
15
|
+
// Enhanced parse functions (comment/whitespace preserving)
|
|
16
|
+
export { parse, parseSync } from './parse';
|
|
17
|
+
// Enhanced deparse function
|
|
18
|
+
export { deparseEnhanced, deparseEnhancedSync, Deparser } from './deparse';
|
|
19
|
+
// Re-export standard deparse for non-enhanced use
|
|
20
|
+
export { deparse, deparseSync } from 'pgsql-deparser';
|
|
21
|
+
// Re-export loadModule from @libpg-query/parser
|
|
22
|
+
export { loadModule } from '@libpg-query/parser';
|
|
23
|
+
// Types
|
|
24
|
+
export { isRawComment, isRawWhitespace, isRawStmt, } from './types';
|
|
25
|
+
// Scanner (for advanced use)
|
|
26
|
+
export { scanComments } from './scanner';
|
package/esm/parse.js
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enhanced parse functions that preserve comments and vertical whitespace
|
|
3
|
+
* by interleaving synthetic RawComment and RawWhitespace nodes into the
|
|
4
|
+
* parse result's stmts array.
|
|
5
|
+
*/
|
|
6
|
+
import { parse as libParse, parseSync as libParseSync } from '@libpg-query/parser';
|
|
7
|
+
import { scanComments } from './scanner';
|
|
8
|
+
/**
|
|
9
|
+
* Find the actual SQL start position for a statement by skipping
|
|
10
|
+
* past any comments and whitespace that the parser included in
|
|
11
|
+
* the stmt_location..stmt_location+stmt_len range.
|
|
12
|
+
*
|
|
13
|
+
* The parser's stmt_location often includes preceding whitespace
|
|
14
|
+
* and comments that were stripped during parsing. We need the
|
|
15
|
+
* position of the first real SQL token.
|
|
16
|
+
*/
|
|
17
|
+
function findActualSqlStart(sql, stmtLoc, elements) {
|
|
18
|
+
let pos = stmtLoc;
|
|
19
|
+
// Iteratively skip whitespace and any scanned elements (comments/whitespace)
|
|
20
|
+
// that start at or after our current position
|
|
21
|
+
let changed = true;
|
|
22
|
+
while (changed) {
|
|
23
|
+
changed = false;
|
|
24
|
+
// Skip whitespace characters
|
|
25
|
+
while (pos < sql.length && /\s/.test(sql[pos])) {
|
|
26
|
+
pos++;
|
|
27
|
+
changed = true;
|
|
28
|
+
}
|
|
29
|
+
// Skip past any scanned element that starts at current position
|
|
30
|
+
for (const elem of elements) {
|
|
31
|
+
if (elem.value.start === pos || (elem.value.start >= stmtLoc && elem.value.start < pos + 1 && elem.value.end > pos)) {
|
|
32
|
+
if (elem.value.end > pos) {
|
|
33
|
+
pos = elem.value.end;
|
|
34
|
+
changed = true;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return pos;
|
|
40
|
+
}
|
|
41
|
+
function buildStmtRanges(stmts, sql, elements) {
|
|
42
|
+
return stmts.map(stmt => {
|
|
43
|
+
const loc = stmt.stmt_location ?? 0;
|
|
44
|
+
const actualStart = findActualSqlStart(sql, loc, elements);
|
|
45
|
+
const len = stmt.stmt_len ?? sql.length - loc;
|
|
46
|
+
return { actualStart, end: loc + len };
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
function interleave(parseResult, sql, elements) {
|
|
50
|
+
const stmts = parseResult.stmts ?? [];
|
|
51
|
+
const items = [];
|
|
52
|
+
const ranges = buildStmtRanges(stmts, sql, elements);
|
|
53
|
+
// Add scanned elements (comments and whitespace)
|
|
54
|
+
for (const elem of elements) {
|
|
55
|
+
if (elem.kind === 'comment') {
|
|
56
|
+
// Check if this comment falls inside a statement's byte range.
|
|
57
|
+
// If so, hoist it above that statement instead of leaving it
|
|
58
|
+
// at its original position (which would place it after the
|
|
59
|
+
// statement or trailing at the wrong spot).
|
|
60
|
+
let hoistedPosition = null;
|
|
61
|
+
for (const range of ranges) {
|
|
62
|
+
if (elem.value.start > range.actualStart && elem.value.start < range.end) {
|
|
63
|
+
hoistedPosition = range.actualStart;
|
|
64
|
+
break;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
items.push({
|
|
68
|
+
position: hoistedPosition ?? elem.value.start,
|
|
69
|
+
priority: 0,
|
|
70
|
+
entry: {
|
|
71
|
+
RawComment: {
|
|
72
|
+
type: elem.value.type,
|
|
73
|
+
text: elem.value.text,
|
|
74
|
+
location: elem.value.start,
|
|
75
|
+
// Only preserve trailing flag when NOT hoisted —
|
|
76
|
+
// a hoisted comment becomes a standalone line above the statement.
|
|
77
|
+
...(hoistedPosition == null && elem.value.trailing ? { trailing: true } : {}),
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
items.push({
|
|
84
|
+
position: elem.value.start,
|
|
85
|
+
priority: 1, // whitespace sorts after comments at same position
|
|
86
|
+
entry: {
|
|
87
|
+
RawWhitespace: {
|
|
88
|
+
lines: elem.value.lines,
|
|
89
|
+
location: elem.value.start,
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
// Add parsed statements with their actual SQL start position
|
|
96
|
+
for (let i = 0; i < stmts.length; i++) {
|
|
97
|
+
items.push({
|
|
98
|
+
position: ranges[i].actualStart,
|
|
99
|
+
priority: 2, // statements sort after comments and whitespace
|
|
100
|
+
entry: stmts[i],
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
// Sort by position, then by priority
|
|
104
|
+
items.sort((a, b) => {
|
|
105
|
+
if (a.position !== b.position)
|
|
106
|
+
return a.position - b.position;
|
|
107
|
+
return a.priority - b.priority;
|
|
108
|
+
});
|
|
109
|
+
return {
|
|
110
|
+
version: parseResult.version,
|
|
111
|
+
stmts: items.map(item => item.entry),
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Parse SQL with comment and whitespace preservation (async).
|
|
116
|
+
*
|
|
117
|
+
* Returns an EnhancedParseResult where the stmts array contains
|
|
118
|
+
* real RawStmt entries interleaved with synthetic RawComment and
|
|
119
|
+
* RawWhitespace nodes, all ordered by their byte position in the
|
|
120
|
+
* original source text.
|
|
121
|
+
*/
|
|
122
|
+
export async function parse(sql) {
|
|
123
|
+
const parseResult = await libParse(sql);
|
|
124
|
+
const elements = scanComments(sql);
|
|
125
|
+
return interleave(parseResult, sql, elements);
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Parse SQL with comment and whitespace preservation (sync).
|
|
129
|
+
*/
|
|
130
|
+
export function parseSync(sql) {
|
|
131
|
+
const parseResult = libParseSync(sql);
|
|
132
|
+
const elements = scanComments(sql);
|
|
133
|
+
return interleave(parseResult, sql, elements);
|
|
134
|
+
}
|
package/esm/scanner.js
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scanner for extracting comments and vertical whitespace
|
|
3
|
+
* from PostgreSQL SQL source text.
|
|
4
|
+
*
|
|
5
|
+
* Uses PostgreSQL's real lexer via @libpg-query/parser's scanSync()
|
|
6
|
+
* to identify SQL_COMMENT tokens with exact byte positions.
|
|
7
|
+
* Whitespace detection uses token gaps to find blank lines
|
|
8
|
+
* between statements/comments.
|
|
9
|
+
*/
|
|
10
|
+
import { scanSync } from '@libpg-query/parser';
|
|
11
|
+
/** Token type for -- line comments from PostgreSQL's lexer */
|
|
12
|
+
const SQL_COMMENT = 275;
|
|
13
|
+
/**
|
|
14
|
+
* Count blank lines in a string region.
|
|
15
|
+
* Returns 0 if there are fewer than 2 newlines (no blank line).
|
|
16
|
+
*/
|
|
17
|
+
function countBlankLines(text) {
|
|
18
|
+
let newlines = 0;
|
|
19
|
+
for (let i = 0; i < text.length; i++) {
|
|
20
|
+
if (text[i] === '\n')
|
|
21
|
+
newlines++;
|
|
22
|
+
}
|
|
23
|
+
return newlines >= 2 ? newlines - 1 : 0;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Scan SQL source text and extract all -- line comments and significant
|
|
27
|
+
* vertical whitespace (2+ consecutive newlines).
|
|
28
|
+
*
|
|
29
|
+
* Uses PostgreSQL's real lexer (via WASM scanSync) for comment detection,
|
|
30
|
+
* so all string literal types (single-quoted, dollar-quoted,
|
|
31
|
+
* escape strings, etc.) are handled correctly by the actual
|
|
32
|
+
* PostgreSQL scanner — no reimplementation needed.
|
|
33
|
+
*/
|
|
34
|
+
export function scanComments(sql) {
|
|
35
|
+
const elements = [];
|
|
36
|
+
let tokens;
|
|
37
|
+
try {
|
|
38
|
+
const scanResult = scanSync(sql);
|
|
39
|
+
tokens = scanResult.tokens;
|
|
40
|
+
}
|
|
41
|
+
catch {
|
|
42
|
+
return [];
|
|
43
|
+
}
|
|
44
|
+
let prevEnd = 0;
|
|
45
|
+
for (const token of tokens) {
|
|
46
|
+
if (token.start > prevEnd) {
|
|
47
|
+
const gap = sql.substring(prevEnd, token.start);
|
|
48
|
+
const blankLines = countBlankLines(gap);
|
|
49
|
+
if (blankLines > 0) {
|
|
50
|
+
elements.push({
|
|
51
|
+
kind: 'whitespace',
|
|
52
|
+
value: {
|
|
53
|
+
lines: blankLines,
|
|
54
|
+
start: prevEnd,
|
|
55
|
+
end: token.start,
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
if (token.tokenType === SQL_COMMENT) {
|
|
61
|
+
// A comment is "trailing" if no newline exists between the previous
|
|
62
|
+
// token's end and this comment's start (i.e. same line).
|
|
63
|
+
const gapBeforeComment = sql.substring(prevEnd, token.start);
|
|
64
|
+
const trailing = prevEnd > 0 && !gapBeforeComment.includes('\n');
|
|
65
|
+
elements.push({
|
|
66
|
+
kind: 'comment',
|
|
67
|
+
value: {
|
|
68
|
+
type: 'line',
|
|
69
|
+
text: sql.substring(token.start + 2, token.end), // strip --
|
|
70
|
+
start: token.start,
|
|
71
|
+
end: token.end,
|
|
72
|
+
trailing,
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
prevEnd = token.end;
|
|
77
|
+
}
|
|
78
|
+
if (prevEnd < sql.length) {
|
|
79
|
+
const gap = sql.substring(prevEnd, sql.length);
|
|
80
|
+
const blankLines = countBlankLines(gap);
|
|
81
|
+
if (blankLines > 0) {
|
|
82
|
+
elements.push({
|
|
83
|
+
kind: 'whitespace',
|
|
84
|
+
value: {
|
|
85
|
+
lines: blankLines,
|
|
86
|
+
start: prevEnd,
|
|
87
|
+
end: sql.length,
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
elements.sort((a, b) => a.value.start - b.value.start);
|
|
93
|
+
return elements;
|
|
94
|
+
}
|
package/esm/types.js
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type guard: check if a stmt entry is a RawComment node.
|
|
3
|
+
*/
|
|
4
|
+
export function isRawComment(stmt) {
|
|
5
|
+
return 'RawComment' in stmt;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Type guard: check if a stmt entry is a RawWhitespace node.
|
|
9
|
+
*/
|
|
10
|
+
export function isRawWhitespace(stmt) {
|
|
11
|
+
return 'RawWhitespace' in stmt;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Type guard: check if a stmt entry is a real RawStmt.
|
|
15
|
+
*/
|
|
16
|
+
export function isRawStmt(stmt) {
|
|
17
|
+
return 'stmt' in stmt;
|
|
18
|
+
}
|
package/index.d.ts
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pgsql-parse — Comment and whitespace preserving PostgreSQL parser.
|
|
3
|
+
*
|
|
4
|
+
* Drop-in enhancement over pgsql-parser that preserves SQL -- line
|
|
5
|
+
* comments and vertical whitespace (blank lines) through
|
|
6
|
+
* parse→deparse round trips.
|
|
7
|
+
*
|
|
8
|
+
* Synthetic AST nodes:
|
|
9
|
+
* - RawComment: represents a SQL comment
|
|
10
|
+
* - RawWhitespace: represents significant vertical whitespace
|
|
11
|
+
*
|
|
12
|
+
* These nodes are interleaved with real RawStmt entries in the
|
|
13
|
+
* stmts array, ordered by byte position in the original source.
|
|
14
|
+
*/
|
|
15
|
+
export { parse, parseSync } from './parse';
|
|
16
|
+
export { deparseEnhanced, deparseEnhancedSync, Deparser, DeparserOptions } from './deparse';
|
|
17
|
+
export { deparse, deparseSync } from 'pgsql-deparser';
|
|
18
|
+
export { loadModule } from '@libpg-query/parser';
|
|
19
|
+
export { RawComment, RawWhitespace, EnhancedStmt, EnhancedParseResult, isRawComment, isRawWhitespace, isRawStmt, } from './types';
|
|
20
|
+
export { scanComments, ScannedComment, ScannedWhitespace, ScannedElement } from './scanner';
|
package/index.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* pgsql-parse — Comment and whitespace preserving PostgreSQL parser.
|
|
4
|
+
*
|
|
5
|
+
* Drop-in enhancement over pgsql-parser that preserves SQL -- line
|
|
6
|
+
* comments and vertical whitespace (blank lines) through
|
|
7
|
+
* parse→deparse round trips.
|
|
8
|
+
*
|
|
9
|
+
* Synthetic AST nodes:
|
|
10
|
+
* - RawComment: represents a SQL comment
|
|
11
|
+
* - RawWhitespace: represents significant vertical whitespace
|
|
12
|
+
*
|
|
13
|
+
* These nodes are interleaved with real RawStmt entries in the
|
|
14
|
+
* stmts array, ordered by byte position in the original source.
|
|
15
|
+
*/
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
exports.scanComments = exports.isRawStmt = exports.isRawWhitespace = exports.isRawComment = exports.loadModule = exports.deparseSync = exports.deparse = exports.Deparser = exports.deparseEnhancedSync = exports.deparseEnhanced = exports.parseSync = exports.parse = void 0;
|
|
18
|
+
// Enhanced parse functions (comment/whitespace preserving)
|
|
19
|
+
var parse_1 = require("./parse");
|
|
20
|
+
Object.defineProperty(exports, "parse", { enumerable: true, get: function () { return parse_1.parse; } });
|
|
21
|
+
Object.defineProperty(exports, "parseSync", { enumerable: true, get: function () { return parse_1.parseSync; } });
|
|
22
|
+
// Enhanced deparse function
|
|
23
|
+
var deparse_1 = require("./deparse");
|
|
24
|
+
Object.defineProperty(exports, "deparseEnhanced", { enumerable: true, get: function () { return deparse_1.deparseEnhanced; } });
|
|
25
|
+
Object.defineProperty(exports, "deparseEnhancedSync", { enumerable: true, get: function () { return deparse_1.deparseEnhancedSync; } });
|
|
26
|
+
Object.defineProperty(exports, "Deparser", { enumerable: true, get: function () { return deparse_1.Deparser; } });
|
|
27
|
+
// Re-export standard deparse for non-enhanced use
|
|
28
|
+
var pgsql_deparser_1 = require("pgsql-deparser");
|
|
29
|
+
Object.defineProperty(exports, "deparse", { enumerable: true, get: function () { return pgsql_deparser_1.deparse; } });
|
|
30
|
+
Object.defineProperty(exports, "deparseSync", { enumerable: true, get: function () { return pgsql_deparser_1.deparseSync; } });
|
|
31
|
+
// Re-export loadModule from @libpg-query/parser
|
|
32
|
+
var parser_1 = require("@libpg-query/parser");
|
|
33
|
+
Object.defineProperty(exports, "loadModule", { enumerable: true, get: function () { return parser_1.loadModule; } });
|
|
34
|
+
// Types
|
|
35
|
+
var types_1 = require("./types");
|
|
36
|
+
Object.defineProperty(exports, "isRawComment", { enumerable: true, get: function () { return types_1.isRawComment; } });
|
|
37
|
+
Object.defineProperty(exports, "isRawWhitespace", { enumerable: true, get: function () { return types_1.isRawWhitespace; } });
|
|
38
|
+
Object.defineProperty(exports, "isRawStmt", { enumerable: true, get: function () { return types_1.isRawStmt; } });
|
|
39
|
+
// Scanner (for advanced use)
|
|
40
|
+
var scanner_1 = require("./scanner");
|
|
41
|
+
Object.defineProperty(exports, "scanComments", { enumerable: true, get: function () { return scanner_1.scanComments; } });
|
package/package.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "pgsql-parse",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"author": "Constructive <developers@constructive.io>",
|
|
5
|
+
"description": "Comment and whitespace preserving PostgreSQL parser",
|
|
6
|
+
"main": "index.js",
|
|
7
|
+
"module": "esm/index.js",
|
|
8
|
+
"types": "index.d.ts",
|
|
9
|
+
"homepage": "https://github.com/constructive-io/pgsql-parser",
|
|
10
|
+
"license": "MIT",
|
|
11
|
+
"publishConfig": {
|
|
12
|
+
"access": "public",
|
|
13
|
+
"directory": "dist"
|
|
14
|
+
},
|
|
15
|
+
"repository": {
|
|
16
|
+
"type": "git",
|
|
17
|
+
"url": "https://github.com/constructive-io/pgsql-parser"
|
|
18
|
+
},
|
|
19
|
+
"bugs": {
|
|
20
|
+
"url": "https://github.com/constructive-io/pgsql-parser/issues"
|
|
21
|
+
},
|
|
22
|
+
"scripts": {
|
|
23
|
+
"copy": "makage assets",
|
|
24
|
+
"clean": "makage clean dist",
|
|
25
|
+
"prepublishOnly": "npm run build",
|
|
26
|
+
"build": "npm run clean && tsc && tsc -p tsconfig.esm.json && npm run copy",
|
|
27
|
+
"build:dev": "npm run clean && tsc --declarationMap && tsc -p tsconfig.esm.json && npm run copy",
|
|
28
|
+
"lint": "eslint . --fix",
|
|
29
|
+
"test": "jest",
|
|
30
|
+
"test:watch": "jest --watch"
|
|
31
|
+
},
|
|
32
|
+
"keywords": [
|
|
33
|
+
"sql",
|
|
34
|
+
"postgres",
|
|
35
|
+
"postgresql",
|
|
36
|
+
"pg",
|
|
37
|
+
"parser",
|
|
38
|
+
"comment",
|
|
39
|
+
"whitespace",
|
|
40
|
+
"round-trip"
|
|
41
|
+
],
|
|
42
|
+
"dependencies": {
|
|
43
|
+
"@libpg-query/parser": "^17.6.10",
|
|
44
|
+
"@pgsql/types": "^17.6.2",
|
|
45
|
+
"pgsql-deparser": "17.18.3"
|
|
46
|
+
},
|
|
47
|
+
"devDependencies": {
|
|
48
|
+
"makage": "^0.1.8"
|
|
49
|
+
},
|
|
50
|
+
"gitHead": "6571608759a472a0fb8f462737056e8e5a2bb0dc"
|
|
51
|
+
}
|
package/parse.d.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enhanced parse functions that preserve comments and vertical whitespace
|
|
3
|
+
* by interleaving synthetic RawComment and RawWhitespace nodes into the
|
|
4
|
+
* parse result's stmts array.
|
|
5
|
+
*/
|
|
6
|
+
import { EnhancedParseResult } from './types';
|
|
7
|
+
/**
|
|
8
|
+
* Parse SQL with comment and whitespace preservation (async).
|
|
9
|
+
*
|
|
10
|
+
* Returns an EnhancedParseResult where the stmts array contains
|
|
11
|
+
* real RawStmt entries interleaved with synthetic RawComment and
|
|
12
|
+
* RawWhitespace nodes, all ordered by their byte position in the
|
|
13
|
+
* original source text.
|
|
14
|
+
*/
|
|
15
|
+
export declare function parse(sql: string): Promise<EnhancedParseResult>;
|
|
16
|
+
/**
|
|
17
|
+
* Parse SQL with comment and whitespace preservation (sync).
|
|
18
|
+
*/
|
|
19
|
+
export declare function parseSync(sql: string): EnhancedParseResult;
|
package/parse.js
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Enhanced parse functions that preserve comments and vertical whitespace
|
|
4
|
+
* by interleaving synthetic RawComment and RawWhitespace nodes into the
|
|
5
|
+
* parse result's stmts array.
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.parse = parse;
|
|
9
|
+
exports.parseSync = parseSync;
|
|
10
|
+
const parser_1 = require("@libpg-query/parser");
|
|
11
|
+
const scanner_1 = require("./scanner");
|
|
12
|
+
/**
|
|
13
|
+
* Find the actual SQL start position for a statement by skipping
|
|
14
|
+
* past any comments and whitespace that the parser included in
|
|
15
|
+
* the stmt_location..stmt_location+stmt_len range.
|
|
16
|
+
*
|
|
17
|
+
* The parser's stmt_location often includes preceding whitespace
|
|
18
|
+
* and comments that were stripped during parsing. We need the
|
|
19
|
+
* position of the first real SQL token.
|
|
20
|
+
*/
|
|
21
|
+
function findActualSqlStart(sql, stmtLoc, elements) {
|
|
22
|
+
let pos = stmtLoc;
|
|
23
|
+
// Iteratively skip whitespace and any scanned elements (comments/whitespace)
|
|
24
|
+
// that start at or after our current position
|
|
25
|
+
let changed = true;
|
|
26
|
+
while (changed) {
|
|
27
|
+
changed = false;
|
|
28
|
+
// Skip whitespace characters
|
|
29
|
+
while (pos < sql.length && /\s/.test(sql[pos])) {
|
|
30
|
+
pos++;
|
|
31
|
+
changed = true;
|
|
32
|
+
}
|
|
33
|
+
// Skip past any scanned element that starts at current position
|
|
34
|
+
for (const elem of elements) {
|
|
35
|
+
if (elem.value.start === pos || (elem.value.start >= stmtLoc && elem.value.start < pos + 1 && elem.value.end > pos)) {
|
|
36
|
+
if (elem.value.end > pos) {
|
|
37
|
+
pos = elem.value.end;
|
|
38
|
+
changed = true;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return pos;
|
|
44
|
+
}
|
|
45
|
+
function buildStmtRanges(stmts, sql, elements) {
|
|
46
|
+
return stmts.map(stmt => {
|
|
47
|
+
const loc = stmt.stmt_location ?? 0;
|
|
48
|
+
const actualStart = findActualSqlStart(sql, loc, elements);
|
|
49
|
+
const len = stmt.stmt_len ?? sql.length - loc;
|
|
50
|
+
return { actualStart, end: loc + len };
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
function interleave(parseResult, sql, elements) {
|
|
54
|
+
const stmts = parseResult.stmts ?? [];
|
|
55
|
+
const items = [];
|
|
56
|
+
const ranges = buildStmtRanges(stmts, sql, elements);
|
|
57
|
+
// Add scanned elements (comments and whitespace)
|
|
58
|
+
for (const elem of elements) {
|
|
59
|
+
if (elem.kind === 'comment') {
|
|
60
|
+
// Check if this comment falls inside a statement's byte range.
|
|
61
|
+
// If so, hoist it above that statement instead of leaving it
|
|
62
|
+
// at its original position (which would place it after the
|
|
63
|
+
// statement or trailing at the wrong spot).
|
|
64
|
+
let hoistedPosition = null;
|
|
65
|
+
for (const range of ranges) {
|
|
66
|
+
if (elem.value.start > range.actualStart && elem.value.start < range.end) {
|
|
67
|
+
hoistedPosition = range.actualStart;
|
|
68
|
+
break;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
items.push({
|
|
72
|
+
position: hoistedPosition ?? elem.value.start,
|
|
73
|
+
priority: 0,
|
|
74
|
+
entry: {
|
|
75
|
+
RawComment: {
|
|
76
|
+
type: elem.value.type,
|
|
77
|
+
text: elem.value.text,
|
|
78
|
+
location: elem.value.start,
|
|
79
|
+
// Only preserve trailing flag when NOT hoisted —
|
|
80
|
+
// a hoisted comment becomes a standalone line above the statement.
|
|
81
|
+
...(hoistedPosition == null && elem.value.trailing ? { trailing: true } : {}),
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
items.push({
|
|
88
|
+
position: elem.value.start,
|
|
89
|
+
priority: 1, // whitespace sorts after comments at same position
|
|
90
|
+
entry: {
|
|
91
|
+
RawWhitespace: {
|
|
92
|
+
lines: elem.value.lines,
|
|
93
|
+
location: elem.value.start,
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// Add parsed statements with their actual SQL start position
|
|
100
|
+
for (let i = 0; i < stmts.length; i++) {
|
|
101
|
+
items.push({
|
|
102
|
+
position: ranges[i].actualStart,
|
|
103
|
+
priority: 2, // statements sort after comments and whitespace
|
|
104
|
+
entry: stmts[i],
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
// Sort by position, then by priority
|
|
108
|
+
items.sort((a, b) => {
|
|
109
|
+
if (a.position !== b.position)
|
|
110
|
+
return a.position - b.position;
|
|
111
|
+
return a.priority - b.priority;
|
|
112
|
+
});
|
|
113
|
+
return {
|
|
114
|
+
version: parseResult.version,
|
|
115
|
+
stmts: items.map(item => item.entry),
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Parse SQL with comment and whitespace preservation (async).
|
|
120
|
+
*
|
|
121
|
+
* Returns an EnhancedParseResult where the stmts array contains
|
|
122
|
+
* real RawStmt entries interleaved with synthetic RawComment and
|
|
123
|
+
* RawWhitespace nodes, all ordered by their byte position in the
|
|
124
|
+
* original source text.
|
|
125
|
+
*/
|
|
126
|
+
async function parse(sql) {
|
|
127
|
+
const parseResult = await (0, parser_1.parse)(sql);
|
|
128
|
+
const elements = (0, scanner_1.scanComments)(sql);
|
|
129
|
+
return interleave(parseResult, sql, elements);
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Parse SQL with comment and whitespace preservation (sync).
|
|
133
|
+
*/
|
|
134
|
+
function parseSync(sql) {
|
|
135
|
+
const parseResult = (0, parser_1.parseSync)(sql);
|
|
136
|
+
const elements = (0, scanner_1.scanComments)(sql);
|
|
137
|
+
return interleave(parseResult, sql, elements);
|
|
138
|
+
}
|
package/scanner.d.ts
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scanner for extracting comments and vertical whitespace
|
|
3
|
+
* from PostgreSQL SQL source text.
|
|
4
|
+
*
|
|
5
|
+
* Uses PostgreSQL's real lexer via @libpg-query/parser's scanSync()
|
|
6
|
+
* to identify SQL_COMMENT tokens with exact byte positions.
|
|
7
|
+
* Whitespace detection uses token gaps to find blank lines
|
|
8
|
+
* between statements/comments.
|
|
9
|
+
*/
|
|
10
|
+
export interface ScannedComment {
|
|
11
|
+
type: 'line';
|
|
12
|
+
/** The comment text (without the -- delimiter) */
|
|
13
|
+
text: string;
|
|
14
|
+
/** Byte offset of the start of the comment (including --) */
|
|
15
|
+
start: number;
|
|
16
|
+
/** Byte offset of the end of the comment (exclusive) */
|
|
17
|
+
end: number;
|
|
18
|
+
/** True if this comment is on the same line as a preceding token (trailing comment) */
|
|
19
|
+
trailing: boolean;
|
|
20
|
+
}
|
|
21
|
+
export interface ScannedWhitespace {
|
|
22
|
+
/** Number of blank lines (consecutive \n\n sequences) */
|
|
23
|
+
lines: number;
|
|
24
|
+
/** Byte offset of the start of the whitespace region */
|
|
25
|
+
start: number;
|
|
26
|
+
/** Byte offset of the end of the whitespace region */
|
|
27
|
+
end: number;
|
|
28
|
+
}
|
|
29
|
+
export type ScannedElement = {
|
|
30
|
+
kind: 'comment';
|
|
31
|
+
value: ScannedComment;
|
|
32
|
+
} | {
|
|
33
|
+
kind: 'whitespace';
|
|
34
|
+
value: ScannedWhitespace;
|
|
35
|
+
};
|
|
36
|
+
/**
|
|
37
|
+
* Scan SQL source text and extract all -- line comments and significant
|
|
38
|
+
* vertical whitespace (2+ consecutive newlines).
|
|
39
|
+
*
|
|
40
|
+
* Uses PostgreSQL's real lexer (via WASM scanSync) for comment detection,
|
|
41
|
+
* so all string literal types (single-quoted, dollar-quoted,
|
|
42
|
+
* escape strings, etc.) are handled correctly by the actual
|
|
43
|
+
* PostgreSQL scanner — no reimplementation needed.
|
|
44
|
+
*/
|
|
45
|
+
export declare function scanComments(sql: string): ScannedElement[];
|
package/scanner.js
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Scanner for extracting comments and vertical whitespace
|
|
4
|
+
* from PostgreSQL SQL source text.
|
|
5
|
+
*
|
|
6
|
+
* Uses PostgreSQL's real lexer via @libpg-query/parser's scanSync()
|
|
7
|
+
* to identify SQL_COMMENT tokens with exact byte positions.
|
|
8
|
+
* Whitespace detection uses token gaps to find blank lines
|
|
9
|
+
* between statements/comments.
|
|
10
|
+
*/
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.scanComments = scanComments;
|
|
13
|
+
const parser_1 = require("@libpg-query/parser");
|
|
14
|
+
/** Token type for -- line comments from PostgreSQL's lexer */
|
|
15
|
+
const SQL_COMMENT = 275;
|
|
16
|
+
/**
|
|
17
|
+
* Count blank lines in a string region.
|
|
18
|
+
* Returns 0 if there are fewer than 2 newlines (no blank line).
|
|
19
|
+
*/
|
|
20
|
+
function countBlankLines(text) {
|
|
21
|
+
let newlines = 0;
|
|
22
|
+
for (let i = 0; i < text.length; i++) {
|
|
23
|
+
if (text[i] === '\n')
|
|
24
|
+
newlines++;
|
|
25
|
+
}
|
|
26
|
+
return newlines >= 2 ? newlines - 1 : 0;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Scan SQL source text and extract all -- line comments and significant
|
|
30
|
+
* vertical whitespace (2+ consecutive newlines).
|
|
31
|
+
*
|
|
32
|
+
* Uses PostgreSQL's real lexer (via WASM scanSync) for comment detection,
|
|
33
|
+
* so all string literal types (single-quoted, dollar-quoted,
|
|
34
|
+
* escape strings, etc.) are handled correctly by the actual
|
|
35
|
+
* PostgreSQL scanner — no reimplementation needed.
|
|
36
|
+
*/
|
|
37
|
+
function scanComments(sql) {
|
|
38
|
+
const elements = [];
|
|
39
|
+
let tokens;
|
|
40
|
+
try {
|
|
41
|
+
const scanResult = (0, parser_1.scanSync)(sql);
|
|
42
|
+
tokens = scanResult.tokens;
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
return [];
|
|
46
|
+
}
|
|
47
|
+
let prevEnd = 0;
|
|
48
|
+
for (const token of tokens) {
|
|
49
|
+
if (token.start > prevEnd) {
|
|
50
|
+
const gap = sql.substring(prevEnd, token.start);
|
|
51
|
+
const blankLines = countBlankLines(gap);
|
|
52
|
+
if (blankLines > 0) {
|
|
53
|
+
elements.push({
|
|
54
|
+
kind: 'whitespace',
|
|
55
|
+
value: {
|
|
56
|
+
lines: blankLines,
|
|
57
|
+
start: prevEnd,
|
|
58
|
+
end: token.start,
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
if (token.tokenType === SQL_COMMENT) {
|
|
64
|
+
// A comment is "trailing" if no newline exists between the previous
|
|
65
|
+
// token's end and this comment's start (i.e. same line).
|
|
66
|
+
const gapBeforeComment = sql.substring(prevEnd, token.start);
|
|
67
|
+
const trailing = prevEnd > 0 && !gapBeforeComment.includes('\n');
|
|
68
|
+
elements.push({
|
|
69
|
+
kind: 'comment',
|
|
70
|
+
value: {
|
|
71
|
+
type: 'line',
|
|
72
|
+
text: sql.substring(token.start + 2, token.end), // strip --
|
|
73
|
+
start: token.start,
|
|
74
|
+
end: token.end,
|
|
75
|
+
trailing,
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
prevEnd = token.end;
|
|
80
|
+
}
|
|
81
|
+
if (prevEnd < sql.length) {
|
|
82
|
+
const gap = sql.substring(prevEnd, sql.length);
|
|
83
|
+
const blankLines = countBlankLines(gap);
|
|
84
|
+
if (blankLines > 0) {
|
|
85
|
+
elements.push({
|
|
86
|
+
kind: 'whitespace',
|
|
87
|
+
value: {
|
|
88
|
+
lines: blankLines,
|
|
89
|
+
start: prevEnd,
|
|
90
|
+
end: sql.length,
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
elements.sort((a, b) => a.value.start - b.value.start);
|
|
96
|
+
return elements;
|
|
97
|
+
}
|
package/types.d.ts
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { RawStmt } from '@pgsql/types';
|
|
2
|
+
/**
|
|
3
|
+
* Synthetic AST node representing a SQL comment.
|
|
4
|
+
* Not produced by PostgreSQL's parser — injected by pgsql-parse
|
|
5
|
+
* to preserve comments through parse→deparse round trips.
|
|
6
|
+
*/
|
|
7
|
+
export interface RawComment {
|
|
8
|
+
/** Always 'line' — only -- comments are supported */
|
|
9
|
+
type: 'line';
|
|
10
|
+
/** The comment text (without the -- delimiter) */
|
|
11
|
+
text: string;
|
|
12
|
+
/** Byte offset in the original source (for ordering) */
|
|
13
|
+
location: number;
|
|
14
|
+
/** True if this comment is on the same line as the preceding statement */
|
|
15
|
+
trailing?: boolean;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Synthetic AST node representing significant vertical whitespace.
|
|
19
|
+
* Represents one or more blank lines between statements.
|
|
20
|
+
*/
|
|
21
|
+
export interface RawWhitespace {
|
|
22
|
+
/** Number of blank lines */
|
|
23
|
+
lines: number;
|
|
24
|
+
/** Byte offset in the original source (for ordering) */
|
|
25
|
+
location: number;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* A statement entry that can hold either a real RawStmt or a synthetic node.
|
|
29
|
+
* The stmts array in EnhancedParseResult contains these.
|
|
30
|
+
*/
|
|
31
|
+
export type EnhancedStmt = RawStmt | {
|
|
32
|
+
RawComment: RawComment;
|
|
33
|
+
} | {
|
|
34
|
+
RawWhitespace: RawWhitespace;
|
|
35
|
+
};
|
|
36
|
+
/**
|
|
37
|
+
* Enhanced parse result that includes synthetic comment and whitespace nodes
|
|
38
|
+
* interleaved with the real RawStmt entries by byte position.
|
|
39
|
+
*/
|
|
40
|
+
export interface EnhancedParseResult {
|
|
41
|
+
version: number;
|
|
42
|
+
stmts: EnhancedStmt[];
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Type guard: check if a stmt entry is a RawComment node.
|
|
46
|
+
*/
|
|
47
|
+
export declare function isRawComment(stmt: EnhancedStmt): stmt is {
|
|
48
|
+
RawComment: RawComment;
|
|
49
|
+
};
|
|
50
|
+
/**
|
|
51
|
+
* Type guard: check if a stmt entry is a RawWhitespace node.
|
|
52
|
+
*/
|
|
53
|
+
export declare function isRawWhitespace(stmt: EnhancedStmt): stmt is {
|
|
54
|
+
RawWhitespace: RawWhitespace;
|
|
55
|
+
};
|
|
56
|
+
/**
|
|
57
|
+
* Type guard: check if a stmt entry is a real RawStmt.
|
|
58
|
+
*/
|
|
59
|
+
export declare function isRawStmt(stmt: EnhancedStmt): stmt is RawStmt;
|
package/types.js
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.isRawComment = isRawComment;
|
|
4
|
+
exports.isRawWhitespace = isRawWhitespace;
|
|
5
|
+
exports.isRawStmt = isRawStmt;
|
|
6
|
+
/**
|
|
7
|
+
* Type guard: check if a stmt entry is a RawComment node.
|
|
8
|
+
*/
|
|
9
|
+
function isRawComment(stmt) {
|
|
10
|
+
return 'RawComment' in stmt;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Type guard: check if a stmt entry is a RawWhitespace node.
|
|
14
|
+
*/
|
|
15
|
+
function isRawWhitespace(stmt) {
|
|
16
|
+
return 'RawWhitespace' in stmt;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Type guard: check if a stmt entry is a real RawStmt.
|
|
20
|
+
*/
|
|
21
|
+
function isRawStmt(stmt) {
|
|
22
|
+
return 'stmt' in stmt;
|
|
23
|
+
}
|