pgsql-parse 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 Dan Lynch <pyramation@gmail.com>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,107 @@
1
+ # pgsql-parse
2
+
3
+ <p align="center" width="100%">
4
+ <img height="250" src="https://raw.githubusercontent.com/constructive-io/constructive/refs/heads/main/assets/outline-logo.svg" />
5
+ </p>
6
+
7
+ Comment and whitespace preserving PostgreSQL parser. A drop-in enhancement for `pgsql-parser` that preserves SQL comments (`--` line and `/* */` block) and vertical whitespace (blank lines) through parse-deparse round trips.
8
+
9
+ ## Installation
10
+
11
+ ```sh
12
+ npm install pgsql-parse
13
+ ```
14
+
15
+ ## Features
16
+
17
+ * **Comment Preservation** -- Retains `--` line comments and `/* */` block comments through parse-deparse cycles
18
+ * **Vertical Whitespace** -- Preserves blank lines between statements for readable output
19
+ * **Idempotent Round-Trips** -- `parse -> deparse -> parse -> deparse` produces identical output
20
+ * **Drop-in API** -- Re-exports `parse`, `parseSync`, `deparse`, `deparseSync`, `loadModule` from `pgsql-parser`
21
+ * **Synthetic AST Nodes** -- `RawComment` and `RawWhitespace` nodes interleaved into the `stmts` array by byte position
22
+
23
+ ## How It Works
24
+
25
+ 1. A pure TypeScript scanner extracts comment and whitespace tokens with byte positions from the raw SQL text
26
+ 2. Enhanced `parse`/`parseSync` call the standard `libpg-query` parser, then interleave synthetic `RawComment` and `RawWhitespace` nodes into the `stmts` array based on byte position
27
+ 3. `deparseEnhanced()` dispatches on node type -- real `RawStmt` entries go through the standard deparser, while synthetic nodes emit their comment text or blank lines directly
28
+
29
+ ## API
30
+
31
+ ### Enhanced Parse
32
+
33
+ ```typescript
34
+ import { parse, parseSync, deparseEnhanced, loadModule } from 'pgsql-parse';
35
+
36
+ // Async (handles initialization automatically)
37
+ const result = await parse(`
38
+ -- Create users table
39
+ CREATE TABLE users (id serial PRIMARY KEY);
40
+
41
+ -- Create posts table
42
+ CREATE TABLE posts (id serial PRIMARY KEY);
43
+ `);
44
+
45
+ // result.stmts contains RawComment, RawWhitespace, and RawStmt nodes
46
+ const sql = deparseEnhanced(result);
47
+ // Output preserves comments and blank lines
48
+ ```
49
+
50
+ ### Sync Methods
51
+
52
+ ```typescript
53
+ import { parseSync, deparseEnhanced, loadModule } from 'pgsql-parse';
54
+
55
+ await loadModule();
56
+
57
+ const result = parseSync('-- comment\nSELECT 1;');
58
+ const sql = deparseEnhanced(result);
59
+ ```
60
+
61
+ ### Type Guards
62
+
63
+ ```typescript
64
+ import { isRawComment, isRawWhitespace, isRawStmt } from 'pgsql-parse';
65
+
66
+ for (const stmt of result.stmts) {
67
+ if (isRawComment(stmt)) {
68
+ console.log('Comment:', stmt.RawComment.text);
69
+ } else if (isRawWhitespace(stmt)) {
70
+ console.log('Blank lines:', stmt.RawWhitespace.lines);
71
+ } else if (isRawStmt(stmt)) {
72
+ console.log('Statement:', stmt);
73
+ }
74
+ }
75
+ ```
76
+
77
+ ## Credits
78
+
79
+ Built on the excellent work of several contributors:
80
+
81
+ * **[Dan Lynch](https://github.com/pyramation)** -- official maintainer since 2018 and architect of the current implementation
82
+ * **[Lukas Fittl](https://github.com/lfittl)** for [libpg_query](https://github.com/pganalyze/libpg_query) -- the core PostgreSQL parser that powers this project
83
+
84
+ ---
85
+
86
+ **🛠 Built by the [Constructive](https://constructive.io) team — creators of modular Postgres tooling for secure, composable backends. If you like our work, contribute on [GitHub](https://github.com/constructive-io).**
87
+
88
+ ## Related
89
+
90
+ * [pgpm](https://pgpm.dev): A Postgres Package Manager that brings modular development to PostgreSQL with reusable packages, deterministic migrations, recursive dependency resolution, and tag-aware versioning.
91
+ * [pgsql-test](https://www.npmjs.com/package/pgsql-test): Instant, isolated PostgreSQL databases for each test with automatic transaction rollbacks, context switching, and clean seeding for fast, reliable database testing.
92
+ * [pgsql-seed](https://www.npmjs.com/package/pgsql-seed): PostgreSQL seeding utilities for CSV, JSON, SQL data loading, and pgpm deployment.
93
+ * [pgsql-parser](https://www.npmjs.com/package/pgsql-parser): The real PostgreSQL parser for Node.js, providing symmetric parsing and deparsing of SQL statements with actual PostgreSQL parser integration.
94
+ * [pgsql-deparser](https://www.npmjs.com/package/pgsql-deparser): A streamlined tool designed for converting PostgreSQL ASTs back into SQL queries, focusing solely on deparser functionality to complement `pgsql-parser`.
95
+ * [@pgsql/parser](https://www.npmjs.com/package/@pgsql/parser): Multi-version PostgreSQL parser with dynamic version selection at runtime, supporting PostgreSQL 15, 16, and 17 in a single package.
96
+ * [@pgsql/types](https://www.npmjs.com/package/@pgsql/types): Offers TypeScript type definitions for PostgreSQL AST nodes, facilitating type-safe construction, analysis, and manipulation of ASTs.
97
+ * [@pgsql/enums](https://www.npmjs.com/package/@pgsql/enums): Provides TypeScript enum definitions for PostgreSQL constants, enabling type-safe usage of PostgreSQL enums and constants in your applications.
98
+ * [@pgsql/utils](https://www.npmjs.com/package/@pgsql/utils): A comprehensive utility library for PostgreSQL, offering type-safe AST node creation and enum value conversions, simplifying the construction and manipulation of PostgreSQL ASTs.
99
+ * [@pgsql/traverse](https://www.npmjs.com/package/@pgsql/traverse): PostgreSQL AST traversal utilities for pgsql-parser, providing a visitor pattern for traversing PostgreSQL Abstract Syntax Tree nodes, similar to Babel's traverse functionality but specifically designed for PostgreSQL AST structures.
100
+ * [pg-proto-parser](https://www.npmjs.com/package/pg-proto-parser): A TypeScript tool that parses PostgreSQL Protocol Buffers definitions to generate TypeScript interfaces, utility functions, and JSON mappings for enums.
101
+ * [libpg-query](https://github.com/constructive-io/libpg-query-node): The real PostgreSQL parser exposed for Node.js, used primarily in `pgsql-parser` for parsing and deparsing SQL queries.
102
+
103
+ ## Disclaimer
104
+
105
+ AS DESCRIBED IN THE LICENSES, THE SOFTWARE IS PROVIDED "AS IS", AT YOUR OWN RISK, AND WITHOUT WARRANTIES OF ANY KIND.
106
+
107
+ No developer or entity involved in creating Software will be liable for any claims or damages whatsoever associated with your use, inability to use, or your interaction with other users of the Software code or Software CLI, including any direct, indirect, incidental, special, exemplary, punitive or consequential damages, or loss of profits, cryptocurrencies, tokens, or anything else of value.
package/deparse.d.ts ADDED
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Enhanced deparser that handles synthetic RawComment and RawWhitespace nodes
3
+ * in addition to all standard PostgreSQL AST nodes.
4
+ *
5
+ * This does NOT modify the upstream Deparser class. Instead, it processes
6
+ * the EnhancedParseResult's stmts array and delegates real statements
7
+ * to the standard deparser.
8
+ */
9
+ import { Deparser, DeparserOptions } from 'pgsql-deparser';
10
+ import { EnhancedParseResult } from './types';
11
+ /**
12
+ * Deparse an EnhancedParseResult back to SQL, preserving comments
13
+ * and vertical whitespace.
14
+ *
15
+ * The output strategy:
16
+ * - Each real statement gets a newline separator from the previous element
17
+ * - RawComment nodes emit their comment text
18
+ * - RawWhitespace nodes emit blank lines (the node itself IS the separator)
19
+ * - Adjacent statements/comments without a RawWhitespace between them
20
+ * get a single newline separator
21
+ */
22
+ export declare function deparseEnhanced(result: EnhancedParseResult, opts?: DeparserOptions): string;
23
+ /**
24
+ * Sync version of deparseEnhanced.
25
+ */
26
+ export declare const deparseEnhancedSync: typeof deparseEnhanced;
27
+ /**
28
+ * Standard deparse — re-exported from pgsql-deparser for convenience.
29
+ * Use this when you have a standard ParseResult without synthetic nodes.
30
+ */
31
+ export { Deparser, DeparserOptions };
package/deparse.js ADDED
@@ -0,0 +1,67 @@
1
+ "use strict";
2
+ /**
3
+ * Enhanced deparser that handles synthetic RawComment and RawWhitespace nodes
4
+ * in addition to all standard PostgreSQL AST nodes.
5
+ *
6
+ * This does NOT modify the upstream Deparser class. Instead, it processes
7
+ * the EnhancedParseResult's stmts array and delegates real statements
8
+ * to the standard deparser.
9
+ */
10
+ Object.defineProperty(exports, "__esModule", { value: true });
11
+ exports.Deparser = exports.deparseEnhancedSync = void 0;
12
+ exports.deparseEnhanced = deparseEnhanced;
13
+ const pgsql_deparser_1 = require("pgsql-deparser");
14
+ Object.defineProperty(exports, "Deparser", { enumerable: true, get: function () { return pgsql_deparser_1.Deparser; } });
15
+ const types_1 = require("./types");
16
+ /**
17
+ * Deparse a single RawComment node back to SQL comment text.
18
+ */
19
+ function deparseComment(comment) {
20
+ return `--${comment.text}`;
21
+ }
22
+ /**
23
+ * Deparse an EnhancedParseResult back to SQL, preserving comments
24
+ * and vertical whitespace.
25
+ *
26
+ * The output strategy:
27
+ * - Each real statement gets a newline separator from the previous element
28
+ * - RawComment nodes emit their comment text
29
+ * - RawWhitespace nodes emit blank lines (the node itself IS the separator)
30
+ * - Adjacent statements/comments without a RawWhitespace between them
31
+ * get a single newline separator
32
+ */
33
+ function deparseEnhanced(result, opts = {}) {
34
+ const newline = opts.newline ?? '\n';
35
+ const lines = [];
36
+ for (const stmt of result.stmts) {
37
+ if ((0, types_1.isRawComment)(stmt)) {
38
+ const commentText = deparseComment(stmt.RawComment);
39
+ if (stmt.RawComment.trailing && lines.length > 0) {
40
+ // Trailing comment: append to the previous line
41
+ lines[lines.length - 1] += ' ' + commentText;
42
+ }
43
+ else {
44
+ lines.push(commentText);
45
+ }
46
+ }
47
+ else if ((0, types_1.isRawWhitespace)(stmt)) {
48
+ // Each blank line in the original source becomes an empty line in output.
49
+ // The whitespace node represents N blank lines between content.
50
+ for (let i = 0; i < stmt.RawWhitespace.lines; i++) {
51
+ lines.push('');
52
+ }
53
+ }
54
+ else if ((0, types_1.isRawStmt)(stmt)) {
55
+ // Wrap in a minimal ParseResult so the standard deparser handles it
56
+ const sql = pgsql_deparser_1.Deparser.deparse({ version: 0, stmts: [stmt] }, opts);
57
+ if (sql) {
58
+ lines.push(sql);
59
+ }
60
+ }
61
+ }
62
+ return lines.join(newline);
63
+ }
64
+ /**
65
+ * Sync version of deparseEnhanced.
66
+ */
67
+ exports.deparseEnhancedSync = deparseEnhanced;
package/esm/deparse.js ADDED
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Enhanced deparser that handles synthetic RawComment and RawWhitespace nodes
3
+ * in addition to all standard PostgreSQL AST nodes.
4
+ *
5
+ * This does NOT modify the upstream Deparser class. Instead, it processes
6
+ * the EnhancedParseResult's stmts array and delegates real statements
7
+ * to the standard deparser.
8
+ */
9
+ import { Deparser } from 'pgsql-deparser';
10
+ import { isRawComment, isRawWhitespace, isRawStmt, } from './types';
11
+ /**
12
+ * Deparse a single RawComment node back to SQL comment text.
13
+ */
14
+ function deparseComment(comment) {
15
+ return `--${comment.text}`;
16
+ }
17
+ /**
18
+ * Deparse an EnhancedParseResult back to SQL, preserving comments
19
+ * and vertical whitespace.
20
+ *
21
+ * The output strategy:
22
+ * - Each real statement gets a newline separator from the previous element
23
+ * - RawComment nodes emit their comment text
24
+ * - RawWhitespace nodes emit blank lines (the node itself IS the separator)
25
+ * - Adjacent statements/comments without a RawWhitespace between them
26
+ * get a single newline separator
27
+ */
28
+ export function deparseEnhanced(result, opts = {}) {
29
+ const newline = opts.newline ?? '\n';
30
+ const lines = [];
31
+ for (const stmt of result.stmts) {
32
+ if (isRawComment(stmt)) {
33
+ const commentText = deparseComment(stmt.RawComment);
34
+ if (stmt.RawComment.trailing && lines.length > 0) {
35
+ // Trailing comment: append to the previous line
36
+ lines[lines.length - 1] += ' ' + commentText;
37
+ }
38
+ else {
39
+ lines.push(commentText);
40
+ }
41
+ }
42
+ else if (isRawWhitespace(stmt)) {
43
+ // Each blank line in the original source becomes an empty line in output.
44
+ // The whitespace node represents N blank lines between content.
45
+ for (let i = 0; i < stmt.RawWhitespace.lines; i++) {
46
+ lines.push('');
47
+ }
48
+ }
49
+ else if (isRawStmt(stmt)) {
50
+ // Wrap in a minimal ParseResult so the standard deparser handles it
51
+ const sql = Deparser.deparse({ version: 0, stmts: [stmt] }, opts);
52
+ if (sql) {
53
+ lines.push(sql);
54
+ }
55
+ }
56
+ }
57
+ return lines.join(newline);
58
+ }
59
+ /**
60
+ * Sync version of deparseEnhanced.
61
+ */
62
+ export const deparseEnhancedSync = deparseEnhanced;
63
+ /**
64
+ * Standard deparse — re-exported from pgsql-deparser for convenience.
65
+ * Use this when you have a standard ParseResult without synthetic nodes.
66
+ */
67
+ export { Deparser };
package/esm/index.js ADDED
@@ -0,0 +1,26 @@
1
+ /**
2
+ * pgsql-parse — Comment and whitespace preserving PostgreSQL parser.
3
+ *
4
+ * Drop-in enhancement over pgsql-parser that preserves SQL -- line
5
+ * comments and vertical whitespace (blank lines) through
6
+ * parse→deparse round trips.
7
+ *
8
+ * Synthetic AST nodes:
9
+ * - RawComment: represents a SQL comment
10
+ * - RawWhitespace: represents significant vertical whitespace
11
+ *
12
+ * These nodes are interleaved with real RawStmt entries in the
13
+ * stmts array, ordered by byte position in the original source.
14
+ */
15
+ // Enhanced parse functions (comment/whitespace preserving)
16
+ export { parse, parseSync } from './parse';
17
+ // Enhanced deparse function
18
+ export { deparseEnhanced, deparseEnhancedSync, Deparser } from './deparse';
19
+ // Re-export standard deparse for non-enhanced use
20
+ export { deparse, deparseSync } from 'pgsql-deparser';
21
+ // Re-export loadModule from @libpg-query/parser
22
+ export { loadModule } from '@libpg-query/parser';
23
+ // Types
24
+ export { isRawComment, isRawWhitespace, isRawStmt, } from './types';
25
+ // Scanner (for advanced use)
26
+ export { scanComments } from './scanner';
package/esm/parse.js ADDED
@@ -0,0 +1,134 @@
1
+ /**
2
+ * Enhanced parse functions that preserve comments and vertical whitespace
3
+ * by interleaving synthetic RawComment and RawWhitespace nodes into the
4
+ * parse result's stmts array.
5
+ */
6
+ import { parse as libParse, parseSync as libParseSync } from '@libpg-query/parser';
7
+ import { scanComments } from './scanner';
8
+ /**
9
+ * Find the actual SQL start position for a statement by skipping
10
+ * past any comments and whitespace that the parser included in
11
+ * the stmt_location..stmt_location+stmt_len range.
12
+ *
13
+ * The parser's stmt_location often includes preceding whitespace
14
+ * and comments that were stripped during parsing. We need the
15
+ * position of the first real SQL token.
16
+ */
17
+ function findActualSqlStart(sql, stmtLoc, elements) {
18
+ let pos = stmtLoc;
19
+ // Iteratively skip whitespace and any scanned elements (comments/whitespace)
20
+ // that start at or after our current position
21
+ let changed = true;
22
+ while (changed) {
23
+ changed = false;
24
+ // Skip whitespace characters
25
+ while (pos < sql.length && /\s/.test(sql[pos])) {
26
+ pos++;
27
+ changed = true;
28
+ }
29
+ // Skip past any scanned element that starts at current position
30
+ for (const elem of elements) {
31
+ if (elem.value.start === pos || (elem.value.start >= stmtLoc && elem.value.start < pos + 1 && elem.value.end > pos)) {
32
+ if (elem.value.end > pos) {
33
+ pos = elem.value.end;
34
+ changed = true;
35
+ }
36
+ }
37
+ }
38
+ }
39
+ return pos;
40
+ }
41
+ function buildStmtRanges(stmts, sql, elements) {
42
+ return stmts.map(stmt => {
43
+ const loc = stmt.stmt_location ?? 0;
44
+ const actualStart = findActualSqlStart(sql, loc, elements);
45
+ const len = stmt.stmt_len ?? sql.length - loc;
46
+ return { actualStart, end: loc + len };
47
+ });
48
+ }
49
+ function interleave(parseResult, sql, elements) {
50
+ const stmts = parseResult.stmts ?? [];
51
+ const items = [];
52
+ const ranges = buildStmtRanges(stmts, sql, elements);
53
+ // Add scanned elements (comments and whitespace)
54
+ for (const elem of elements) {
55
+ if (elem.kind === 'comment') {
56
+ // Check if this comment falls inside a statement's byte range.
57
+ // If so, hoist it above that statement instead of leaving it
58
+ // at its original position (which would place it after the
59
+ // statement or trailing at the wrong spot).
60
+ let hoistedPosition = null;
61
+ for (const range of ranges) {
62
+ if (elem.value.start > range.actualStart && elem.value.start < range.end) {
63
+ hoistedPosition = range.actualStart;
64
+ break;
65
+ }
66
+ }
67
+ items.push({
68
+ position: hoistedPosition ?? elem.value.start,
69
+ priority: 0,
70
+ entry: {
71
+ RawComment: {
72
+ type: elem.value.type,
73
+ text: elem.value.text,
74
+ location: elem.value.start,
75
+ // Only preserve trailing flag when NOT hoisted —
76
+ // a hoisted comment becomes a standalone line above the statement.
77
+ ...(hoistedPosition == null && elem.value.trailing ? { trailing: true } : {}),
78
+ }
79
+ }
80
+ });
81
+ }
82
+ else {
83
+ items.push({
84
+ position: elem.value.start,
85
+ priority: 1, // whitespace sorts after comments at same position
86
+ entry: {
87
+ RawWhitespace: {
88
+ lines: elem.value.lines,
89
+ location: elem.value.start,
90
+ }
91
+ }
92
+ });
93
+ }
94
+ }
95
+ // Add parsed statements with their actual SQL start position
96
+ for (let i = 0; i < stmts.length; i++) {
97
+ items.push({
98
+ position: ranges[i].actualStart,
99
+ priority: 2, // statements sort after comments and whitespace
100
+ entry: stmts[i],
101
+ });
102
+ }
103
+ // Sort by position, then by priority
104
+ items.sort((a, b) => {
105
+ if (a.position !== b.position)
106
+ return a.position - b.position;
107
+ return a.priority - b.priority;
108
+ });
109
+ return {
110
+ version: parseResult.version,
111
+ stmts: items.map(item => item.entry),
112
+ };
113
+ }
114
+ /**
115
+ * Parse SQL with comment and whitespace preservation (async).
116
+ *
117
+ * Returns an EnhancedParseResult where the stmts array contains
118
+ * real RawStmt entries interleaved with synthetic RawComment and
119
+ * RawWhitespace nodes, all ordered by their byte position in the
120
+ * original source text.
121
+ */
122
+ export async function parse(sql) {
123
+ const parseResult = await libParse(sql);
124
+ const elements = scanComments(sql);
125
+ return interleave(parseResult, sql, elements);
126
+ }
127
+ /**
128
+ * Parse SQL with comment and whitespace preservation (sync).
129
+ */
130
+ export function parseSync(sql) {
131
+ const parseResult = libParseSync(sql);
132
+ const elements = scanComments(sql);
133
+ return interleave(parseResult, sql, elements);
134
+ }
package/esm/scanner.js ADDED
@@ -0,0 +1,94 @@
1
+ /**
2
+ * Scanner for extracting comments and vertical whitespace
3
+ * from PostgreSQL SQL source text.
4
+ *
5
+ * Uses PostgreSQL's real lexer via @libpg-query/parser's scanSync()
6
+ * to identify SQL_COMMENT tokens with exact byte positions.
7
+ * Whitespace detection uses token gaps to find blank lines
8
+ * between statements/comments.
9
+ */
10
+ import { scanSync } from '@libpg-query/parser';
11
+ /** Token type for -- line comments from PostgreSQL's lexer */
12
+ const SQL_COMMENT = 275;
13
+ /**
14
+ * Count blank lines in a string region.
15
+ * Returns 0 if there are fewer than 2 newlines (no blank line).
16
+ */
17
+ function countBlankLines(text) {
18
+ let newlines = 0;
19
+ for (let i = 0; i < text.length; i++) {
20
+ if (text[i] === '\n')
21
+ newlines++;
22
+ }
23
+ return newlines >= 2 ? newlines - 1 : 0;
24
+ }
25
+ /**
26
+ * Scan SQL source text and extract all -- line comments and significant
27
+ * vertical whitespace (2+ consecutive newlines).
28
+ *
29
+ * Uses PostgreSQL's real lexer (via WASM scanSync) for comment detection,
30
+ * so all string literal types (single-quoted, dollar-quoted,
31
+ * escape strings, etc.) are handled correctly by the actual
32
+ * PostgreSQL scanner — no reimplementation needed.
33
+ */
34
+ export function scanComments(sql) {
35
+ const elements = [];
36
+ let tokens;
37
+ try {
38
+ const scanResult = scanSync(sql);
39
+ tokens = scanResult.tokens;
40
+ }
41
+ catch {
42
+ return [];
43
+ }
44
+ let prevEnd = 0;
45
+ for (const token of tokens) {
46
+ if (token.start > prevEnd) {
47
+ const gap = sql.substring(prevEnd, token.start);
48
+ const blankLines = countBlankLines(gap);
49
+ if (blankLines > 0) {
50
+ elements.push({
51
+ kind: 'whitespace',
52
+ value: {
53
+ lines: blankLines,
54
+ start: prevEnd,
55
+ end: token.start,
56
+ }
57
+ });
58
+ }
59
+ }
60
+ if (token.tokenType === SQL_COMMENT) {
61
+ // A comment is "trailing" if no newline exists between the previous
62
+ // token's end and this comment's start (i.e. same line).
63
+ const gapBeforeComment = sql.substring(prevEnd, token.start);
64
+ const trailing = prevEnd > 0 && !gapBeforeComment.includes('\n');
65
+ elements.push({
66
+ kind: 'comment',
67
+ value: {
68
+ type: 'line',
69
+ text: sql.substring(token.start + 2, token.end), // strip --
70
+ start: token.start,
71
+ end: token.end,
72
+ trailing,
73
+ }
74
+ });
75
+ }
76
+ prevEnd = token.end;
77
+ }
78
+ if (prevEnd < sql.length) {
79
+ const gap = sql.substring(prevEnd, sql.length);
80
+ const blankLines = countBlankLines(gap);
81
+ if (blankLines > 0) {
82
+ elements.push({
83
+ kind: 'whitespace',
84
+ value: {
85
+ lines: blankLines,
86
+ start: prevEnd,
87
+ end: sql.length,
88
+ }
89
+ });
90
+ }
91
+ }
92
+ elements.sort((a, b) => a.value.start - b.value.start);
93
+ return elements;
94
+ }
package/esm/types.js ADDED
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Type guard: check if a stmt entry is a RawComment node.
3
+ */
4
+ export function isRawComment(stmt) {
5
+ return 'RawComment' in stmt;
6
+ }
7
+ /**
8
+ * Type guard: check if a stmt entry is a RawWhitespace node.
9
+ */
10
+ export function isRawWhitespace(stmt) {
11
+ return 'RawWhitespace' in stmt;
12
+ }
13
+ /**
14
+ * Type guard: check if a stmt entry is a real RawStmt.
15
+ */
16
+ export function isRawStmt(stmt) {
17
+ return 'stmt' in stmt;
18
+ }
package/index.d.ts ADDED
@@ -0,0 +1,20 @@
1
+ /**
2
+ * pgsql-parse — Comment and whitespace preserving PostgreSQL parser.
3
+ *
4
+ * Drop-in enhancement over pgsql-parser that preserves SQL -- line
5
+ * comments and vertical whitespace (blank lines) through
6
+ * parse→deparse round trips.
7
+ *
8
+ * Synthetic AST nodes:
9
+ * - RawComment: represents a SQL comment
10
+ * - RawWhitespace: represents significant vertical whitespace
11
+ *
12
+ * These nodes are interleaved with real RawStmt entries in the
13
+ * stmts array, ordered by byte position in the original source.
14
+ */
15
+ export { parse, parseSync } from './parse';
16
+ export { deparseEnhanced, deparseEnhancedSync, Deparser, DeparserOptions } from './deparse';
17
+ export { deparse, deparseSync } from 'pgsql-deparser';
18
+ export { loadModule } from '@libpg-query/parser';
19
+ export { RawComment, RawWhitespace, EnhancedStmt, EnhancedParseResult, isRawComment, isRawWhitespace, isRawStmt, } from './types';
20
+ export { scanComments, ScannedComment, ScannedWhitespace, ScannedElement } from './scanner';
package/index.js ADDED
@@ -0,0 +1,41 @@
1
+ "use strict";
2
+ /**
3
+ * pgsql-parse — Comment and whitespace preserving PostgreSQL parser.
4
+ *
5
+ * Drop-in enhancement over pgsql-parser that preserves SQL -- line
6
+ * comments and vertical whitespace (blank lines) through
7
+ * parse→deparse round trips.
8
+ *
9
+ * Synthetic AST nodes:
10
+ * - RawComment: represents a SQL comment
11
+ * - RawWhitespace: represents significant vertical whitespace
12
+ *
13
+ * These nodes are interleaved with real RawStmt entries in the
14
+ * stmts array, ordered by byte position in the original source.
15
+ */
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ exports.scanComments = exports.isRawStmt = exports.isRawWhitespace = exports.isRawComment = exports.loadModule = exports.deparseSync = exports.deparse = exports.Deparser = exports.deparseEnhancedSync = exports.deparseEnhanced = exports.parseSync = exports.parse = void 0;
18
+ // Enhanced parse functions (comment/whitespace preserving)
19
+ var parse_1 = require("./parse");
20
+ Object.defineProperty(exports, "parse", { enumerable: true, get: function () { return parse_1.parse; } });
21
+ Object.defineProperty(exports, "parseSync", { enumerable: true, get: function () { return parse_1.parseSync; } });
22
+ // Enhanced deparse function
23
+ var deparse_1 = require("./deparse");
24
+ Object.defineProperty(exports, "deparseEnhanced", { enumerable: true, get: function () { return deparse_1.deparseEnhanced; } });
25
+ Object.defineProperty(exports, "deparseEnhancedSync", { enumerable: true, get: function () { return deparse_1.deparseEnhancedSync; } });
26
+ Object.defineProperty(exports, "Deparser", { enumerable: true, get: function () { return deparse_1.Deparser; } });
27
+ // Re-export standard deparse for non-enhanced use
28
+ var pgsql_deparser_1 = require("pgsql-deparser");
29
+ Object.defineProperty(exports, "deparse", { enumerable: true, get: function () { return pgsql_deparser_1.deparse; } });
30
+ Object.defineProperty(exports, "deparseSync", { enumerable: true, get: function () { return pgsql_deparser_1.deparseSync; } });
31
+ // Re-export loadModule from @libpg-query/parser
32
+ var parser_1 = require("@libpg-query/parser");
33
+ Object.defineProperty(exports, "loadModule", { enumerable: true, get: function () { return parser_1.loadModule; } });
34
+ // Types
35
+ var types_1 = require("./types");
36
+ Object.defineProperty(exports, "isRawComment", { enumerable: true, get: function () { return types_1.isRawComment; } });
37
+ Object.defineProperty(exports, "isRawWhitespace", { enumerable: true, get: function () { return types_1.isRawWhitespace; } });
38
+ Object.defineProperty(exports, "isRawStmt", { enumerable: true, get: function () { return types_1.isRawStmt; } });
39
+ // Scanner (for advanced use)
40
+ var scanner_1 = require("./scanner");
41
+ Object.defineProperty(exports, "scanComments", { enumerable: true, get: function () { return scanner_1.scanComments; } });
package/package.json ADDED
@@ -0,0 +1,51 @@
1
+ {
2
+ "name": "pgsql-parse",
3
+ "version": "0.2.0",
4
+ "author": "Constructive <developers@constructive.io>",
5
+ "description": "Comment and whitespace preserving PostgreSQL parser",
6
+ "main": "index.js",
7
+ "module": "esm/index.js",
8
+ "types": "index.d.ts",
9
+ "homepage": "https://github.com/constructive-io/pgsql-parser",
10
+ "license": "MIT",
11
+ "publishConfig": {
12
+ "access": "public",
13
+ "directory": "dist"
14
+ },
15
+ "repository": {
16
+ "type": "git",
17
+ "url": "https://github.com/constructive-io/pgsql-parser"
18
+ },
19
+ "bugs": {
20
+ "url": "https://github.com/constructive-io/pgsql-parser/issues"
21
+ },
22
+ "scripts": {
23
+ "copy": "makage assets",
24
+ "clean": "makage clean dist",
25
+ "prepublishOnly": "npm run build",
26
+ "build": "npm run clean && tsc && tsc -p tsconfig.esm.json && npm run copy",
27
+ "build:dev": "npm run clean && tsc --declarationMap && tsc -p tsconfig.esm.json && npm run copy",
28
+ "lint": "eslint . --fix",
29
+ "test": "jest",
30
+ "test:watch": "jest --watch"
31
+ },
32
+ "keywords": [
33
+ "sql",
34
+ "postgres",
35
+ "postgresql",
36
+ "pg",
37
+ "parser",
38
+ "comment",
39
+ "whitespace",
40
+ "round-trip"
41
+ ],
42
+ "dependencies": {
43
+ "@libpg-query/parser": "^17.6.10",
44
+ "@pgsql/types": "^17.6.2",
45
+ "pgsql-deparser": "17.18.3"
46
+ },
47
+ "devDependencies": {
48
+ "makage": "^0.1.8"
49
+ },
50
+ "gitHead": "6571608759a472a0fb8f462737056e8e5a2bb0dc"
51
+ }
package/parse.d.ts ADDED
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Enhanced parse functions that preserve comments and vertical whitespace
3
+ * by interleaving synthetic RawComment and RawWhitespace nodes into the
4
+ * parse result's stmts array.
5
+ */
6
+ import { EnhancedParseResult } from './types';
7
+ /**
8
+ * Parse SQL with comment and whitespace preservation (async).
9
+ *
10
+ * Returns an EnhancedParseResult where the stmts array contains
11
+ * real RawStmt entries interleaved with synthetic RawComment and
12
+ * RawWhitespace nodes, all ordered by their byte position in the
13
+ * original source text.
14
+ */
15
+ export declare function parse(sql: string): Promise<EnhancedParseResult>;
16
+ /**
17
+ * Parse SQL with comment and whitespace preservation (sync).
18
+ */
19
+ export declare function parseSync(sql: string): EnhancedParseResult;
package/parse.js ADDED
@@ -0,0 +1,138 @@
1
+ "use strict";
2
+ /**
3
+ * Enhanced parse functions that preserve comments and vertical whitespace
4
+ * by interleaving synthetic RawComment and RawWhitespace nodes into the
5
+ * parse result's stmts array.
6
+ */
7
+ Object.defineProperty(exports, "__esModule", { value: true });
8
+ exports.parse = parse;
9
+ exports.parseSync = parseSync;
10
+ const parser_1 = require("@libpg-query/parser");
11
+ const scanner_1 = require("./scanner");
12
+ /**
13
+ * Find the actual SQL start position for a statement by skipping
14
+ * past any comments and whitespace that the parser included in
15
+ * the stmt_location..stmt_location+stmt_len range.
16
+ *
17
+ * The parser's stmt_location often includes preceding whitespace
18
+ * and comments that were stripped during parsing. We need the
19
+ * position of the first real SQL token.
20
+ */
21
+ function findActualSqlStart(sql, stmtLoc, elements) {
22
+ let pos = stmtLoc;
23
+ // Iteratively skip whitespace and any scanned elements (comments/whitespace)
24
+ // that start at or after our current position
25
+ let changed = true;
26
+ while (changed) {
27
+ changed = false;
28
+ // Skip whitespace characters
29
+ while (pos < sql.length && /\s/.test(sql[pos])) {
30
+ pos++;
31
+ changed = true;
32
+ }
33
+ // Skip past any scanned element that starts at current position
34
+ for (const elem of elements) {
35
+ if (elem.value.start === pos || (elem.value.start >= stmtLoc && elem.value.start < pos + 1 && elem.value.end > pos)) {
36
+ if (elem.value.end > pos) {
37
+ pos = elem.value.end;
38
+ changed = true;
39
+ }
40
+ }
41
+ }
42
+ }
43
+ return pos;
44
+ }
45
+ function buildStmtRanges(stmts, sql, elements) {
46
+ return stmts.map(stmt => {
47
+ const loc = stmt.stmt_location ?? 0;
48
+ const actualStart = findActualSqlStart(sql, loc, elements);
49
+ const len = stmt.stmt_len ?? sql.length - loc;
50
+ return { actualStart, end: loc + len };
51
+ });
52
+ }
53
+ function interleave(parseResult, sql, elements) {
54
+ const stmts = parseResult.stmts ?? [];
55
+ const items = [];
56
+ const ranges = buildStmtRanges(stmts, sql, elements);
57
+ // Add scanned elements (comments and whitespace)
58
+ for (const elem of elements) {
59
+ if (elem.kind === 'comment') {
60
+ // Check if this comment falls inside a statement's byte range.
61
+ // If so, hoist it above that statement instead of leaving it
62
+ // at its original position (which would place it after the
63
+ // statement or trailing at the wrong spot).
64
+ let hoistedPosition = null;
65
+ for (const range of ranges) {
66
+ if (elem.value.start > range.actualStart && elem.value.start < range.end) {
67
+ hoistedPosition = range.actualStart;
68
+ break;
69
+ }
70
+ }
71
+ items.push({
72
+ position: hoistedPosition ?? elem.value.start,
73
+ priority: 0,
74
+ entry: {
75
+ RawComment: {
76
+ type: elem.value.type,
77
+ text: elem.value.text,
78
+ location: elem.value.start,
79
+ // Only preserve trailing flag when NOT hoisted —
80
+ // a hoisted comment becomes a standalone line above the statement.
81
+ ...(hoistedPosition == null && elem.value.trailing ? { trailing: true } : {}),
82
+ }
83
+ }
84
+ });
85
+ }
86
+ else {
87
+ items.push({
88
+ position: elem.value.start,
89
+ priority: 1, // whitespace sorts after comments at same position
90
+ entry: {
91
+ RawWhitespace: {
92
+ lines: elem.value.lines,
93
+ location: elem.value.start,
94
+ }
95
+ }
96
+ });
97
+ }
98
+ }
99
+ // Add parsed statements with their actual SQL start position
100
+ for (let i = 0; i < stmts.length; i++) {
101
+ items.push({
102
+ position: ranges[i].actualStart,
103
+ priority: 2, // statements sort after comments and whitespace
104
+ entry: stmts[i],
105
+ });
106
+ }
107
+ // Sort by position, then by priority
108
+ items.sort((a, b) => {
109
+ if (a.position !== b.position)
110
+ return a.position - b.position;
111
+ return a.priority - b.priority;
112
+ });
113
+ return {
114
+ version: parseResult.version,
115
+ stmts: items.map(item => item.entry),
116
+ };
117
+ }
118
+ /**
119
+ * Parse SQL with comment and whitespace preservation (async).
120
+ *
121
+ * Returns an EnhancedParseResult where the stmts array contains
122
+ * real RawStmt entries interleaved with synthetic RawComment and
123
+ * RawWhitespace nodes, all ordered by their byte position in the
124
+ * original source text.
125
+ */
126
+ async function parse(sql) {
127
+ const parseResult = await (0, parser_1.parse)(sql);
128
+ const elements = (0, scanner_1.scanComments)(sql);
129
+ return interleave(parseResult, sql, elements);
130
+ }
131
+ /**
132
+ * Parse SQL with comment and whitespace preservation (sync).
133
+ */
134
+ function parseSync(sql) {
135
+ const parseResult = (0, parser_1.parseSync)(sql);
136
+ const elements = (0, scanner_1.scanComments)(sql);
137
+ return interleave(parseResult, sql, elements);
138
+ }
package/scanner.d.ts ADDED
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Scanner for extracting comments and vertical whitespace
3
+ * from PostgreSQL SQL source text.
4
+ *
5
+ * Uses PostgreSQL's real lexer via @libpg-query/parser's scanSync()
6
+ * to identify SQL_COMMENT tokens with exact byte positions.
7
+ * Whitespace detection uses token gaps to find blank lines
8
+ * between statements/comments.
9
+ */
10
+ export interface ScannedComment {
11
+ type: 'line';
12
+ /** The comment text (without the -- delimiter) */
13
+ text: string;
14
+ /** Byte offset of the start of the comment (including --) */
15
+ start: number;
16
+ /** Byte offset of the end of the comment (exclusive) */
17
+ end: number;
18
+ /** True if this comment is on the same line as a preceding token (trailing comment) */
19
+ trailing: boolean;
20
+ }
21
+ export interface ScannedWhitespace {
22
+ /** Number of blank lines (consecutive \n\n sequences) */
23
+ lines: number;
24
+ /** Byte offset of the start of the whitespace region */
25
+ start: number;
26
+ /** Byte offset of the end of the whitespace region */
27
+ end: number;
28
+ }
29
+ export type ScannedElement = {
30
+ kind: 'comment';
31
+ value: ScannedComment;
32
+ } | {
33
+ kind: 'whitespace';
34
+ value: ScannedWhitespace;
35
+ };
36
+ /**
37
+ * Scan SQL source text and extract all -- line comments and significant
38
+ * vertical whitespace (2+ consecutive newlines).
39
+ *
40
+ * Uses PostgreSQL's real lexer (via WASM scanSync) for comment detection,
41
+ * so all string literal types (single-quoted, dollar-quoted,
42
+ * escape strings, etc.) are handled correctly by the actual
43
+ * PostgreSQL scanner — no reimplementation needed.
44
+ */
45
+ export declare function scanComments(sql: string): ScannedElement[];
package/scanner.js ADDED
@@ -0,0 +1,97 @@
1
+ "use strict";
2
+ /**
3
+ * Scanner for extracting comments and vertical whitespace
4
+ * from PostgreSQL SQL source text.
5
+ *
6
+ * Uses PostgreSQL's real lexer via @libpg-query/parser's scanSync()
7
+ * to identify SQL_COMMENT tokens with exact byte positions.
8
+ * Whitespace detection uses token gaps to find blank lines
9
+ * between statements/comments.
10
+ */
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.scanComments = scanComments;
13
+ const parser_1 = require("@libpg-query/parser");
14
+ /** Token type for -- line comments from PostgreSQL's lexer */
15
+ const SQL_COMMENT = 275;
16
+ /**
17
+ * Count blank lines in a string region.
18
+ * Returns 0 if there are fewer than 2 newlines (no blank line).
19
+ */
20
+ function countBlankLines(text) {
21
+ let newlines = 0;
22
+ for (let i = 0; i < text.length; i++) {
23
+ if (text[i] === '\n')
24
+ newlines++;
25
+ }
26
+ return newlines >= 2 ? newlines - 1 : 0;
27
+ }
28
+ /**
29
+ * Scan SQL source text and extract all -- line comments and significant
30
+ * vertical whitespace (2+ consecutive newlines).
31
+ *
32
+ * Uses PostgreSQL's real lexer (via WASM scanSync) for comment detection,
33
+ * so all string literal types (single-quoted, dollar-quoted,
34
+ * escape strings, etc.) are handled correctly by the actual
35
+ * PostgreSQL scanner — no reimplementation needed.
36
+ */
37
+ function scanComments(sql) {
38
+ const elements = [];
39
+ let tokens;
40
+ try {
41
+ const scanResult = (0, parser_1.scanSync)(sql);
42
+ tokens = scanResult.tokens;
43
+ }
44
+ catch {
45
+ return [];
46
+ }
47
+ let prevEnd = 0;
48
+ for (const token of tokens) {
49
+ if (token.start > prevEnd) {
50
+ const gap = sql.substring(prevEnd, token.start);
51
+ const blankLines = countBlankLines(gap);
52
+ if (blankLines > 0) {
53
+ elements.push({
54
+ kind: 'whitespace',
55
+ value: {
56
+ lines: blankLines,
57
+ start: prevEnd,
58
+ end: token.start,
59
+ }
60
+ });
61
+ }
62
+ }
63
+ if (token.tokenType === SQL_COMMENT) {
64
+ // A comment is "trailing" if no newline exists between the previous
65
+ // token's end and this comment's start (i.e. same line).
66
+ const gapBeforeComment = sql.substring(prevEnd, token.start);
67
+ const trailing = prevEnd > 0 && !gapBeforeComment.includes('\n');
68
+ elements.push({
69
+ kind: 'comment',
70
+ value: {
71
+ type: 'line',
72
+ text: sql.substring(token.start + 2, token.end), // strip --
73
+ start: token.start,
74
+ end: token.end,
75
+ trailing,
76
+ }
77
+ });
78
+ }
79
+ prevEnd = token.end;
80
+ }
81
+ if (prevEnd < sql.length) {
82
+ const gap = sql.substring(prevEnd, sql.length);
83
+ const blankLines = countBlankLines(gap);
84
+ if (blankLines > 0) {
85
+ elements.push({
86
+ kind: 'whitespace',
87
+ value: {
88
+ lines: blankLines,
89
+ start: prevEnd,
90
+ end: sql.length,
91
+ }
92
+ });
93
+ }
94
+ }
95
+ elements.sort((a, b) => a.value.start - b.value.start);
96
+ return elements;
97
+ }
package/types.d.ts ADDED
@@ -0,0 +1,59 @@
1
+ import { RawStmt } from '@pgsql/types';
2
+ /**
3
+ * Synthetic AST node representing a SQL comment.
4
+ * Not produced by PostgreSQL's parser — injected by pgsql-parse
5
+ * to preserve comments through parse→deparse round trips.
6
+ */
7
+ export interface RawComment {
8
+ /** Always 'line' — only -- comments are supported */
9
+ type: 'line';
10
+ /** The comment text (without the -- delimiter) */
11
+ text: string;
12
+ /** Byte offset in the original source (for ordering) */
13
+ location: number;
14
+ /** True if this comment is on the same line as the preceding statement */
15
+ trailing?: boolean;
16
+ }
17
+ /**
18
+ * Synthetic AST node representing significant vertical whitespace.
19
+ * Represents one or more blank lines between statements.
20
+ */
21
+ export interface RawWhitespace {
22
+ /** Number of blank lines */
23
+ lines: number;
24
+ /** Byte offset in the original source (for ordering) */
25
+ location: number;
26
+ }
27
+ /**
28
+ * A statement entry that can hold either a real RawStmt or a synthetic node.
29
+ * The stmts array in EnhancedParseResult contains these.
30
+ */
31
+ export type EnhancedStmt = RawStmt | {
32
+ RawComment: RawComment;
33
+ } | {
34
+ RawWhitespace: RawWhitespace;
35
+ };
36
+ /**
37
+ * Enhanced parse result that includes synthetic comment and whitespace nodes
38
+ * interleaved with the real RawStmt entries by byte position.
39
+ */
40
+ export interface EnhancedParseResult {
41
+ version: number;
42
+ stmts: EnhancedStmt[];
43
+ }
44
+ /**
45
+ * Type guard: check if a stmt entry is a RawComment node.
46
+ */
47
+ export declare function isRawComment(stmt: EnhancedStmt): stmt is {
48
+ RawComment: RawComment;
49
+ };
50
+ /**
51
+ * Type guard: check if a stmt entry is a RawWhitespace node.
52
+ */
53
+ export declare function isRawWhitespace(stmt: EnhancedStmt): stmt is {
54
+ RawWhitespace: RawWhitespace;
55
+ };
56
+ /**
57
+ * Type guard: check if a stmt entry is a real RawStmt.
58
+ */
59
+ export declare function isRawStmt(stmt: EnhancedStmt): stmt is RawStmt;
package/types.js ADDED
@@ -0,0 +1,23 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.isRawComment = isRawComment;
4
+ exports.isRawWhitespace = isRawWhitespace;
5
+ exports.isRawStmt = isRawStmt;
6
+ /**
7
+ * Type guard: check if a stmt entry is a RawComment node.
8
+ */
9
+ function isRawComment(stmt) {
10
+ return 'RawComment' in stmt;
11
+ }
12
+ /**
13
+ * Type guard: check if a stmt entry is a RawWhitespace node.
14
+ */
15
+ function isRawWhitespace(stmt) {
16
+ return 'RawWhitespace' in stmt;
17
+ }
18
+ /**
19
+ * Type guard: check if a stmt entry is a real RawStmt.
20
+ */
21
+ function isRawStmt(stmt) {
22
+ return 'stmt' in stmt;
23
+ }