pgsql-deparser 17.4.1 → 17.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deparser.js +23 -9
- package/esm/deparser.js +23 -9
- package/esm/utils/quote-utils.js +30 -0
- package/esm/utils/statement-splitter.js +125 -0
- package/package.json +4 -3
- package/utils/quote-utils.d.ts +15 -0
- package/utils/quote-utils.js +30 -0
- package/utils/statement-splitter.d.ts +38 -0
- package/utils/statement-splitter.js +131 -0
package/deparser.js
CHANGED
|
@@ -867,13 +867,16 @@ class Deparser {
|
|
|
867
867
|
formatStr = '(%s)';
|
|
868
868
|
}
|
|
869
869
|
const boolContext = { ...context, bool: true };
|
|
870
|
+
// explanation of our syntax/fix below:
|
|
871
|
+
// return formatStr.replace('%s', andArgs); // ❌ Interprets $ as special syntax
|
|
872
|
+
// return formatStr.replace('%s', () => andArgs); // ✅ Function callback prevents interpretation
|
|
870
873
|
switch (boolop) {
|
|
871
874
|
case 'AND_EXPR':
|
|
872
875
|
const andArgs = args.map(arg => this.visit(arg, boolContext)).join(' AND ');
|
|
873
|
-
return formatStr.replace('%s', andArgs);
|
|
876
|
+
return formatStr.replace('%s', () => andArgs);
|
|
874
877
|
case 'OR_EXPR':
|
|
875
878
|
const orArgs = args.map(arg => this.visit(arg, boolContext)).join(' OR ');
|
|
876
|
-
return formatStr.replace('%s', orArgs);
|
|
879
|
+
return formatStr.replace('%s', () => orArgs);
|
|
877
880
|
case 'NOT_EXPR':
|
|
878
881
|
return `NOT (${this.visit(args[0], context)})`;
|
|
879
882
|
default:
|
|
@@ -1132,23 +1135,23 @@ class Deparser {
|
|
|
1132
1135
|
else if (nodeAny.sval !== undefined) {
|
|
1133
1136
|
if (typeof nodeAny.sval === 'object' && nodeAny.sval !== null) {
|
|
1134
1137
|
if (nodeAny.sval.sval !== undefined) {
|
|
1135
|
-
return quote_utils_1.QuoteUtils.
|
|
1138
|
+
return quote_utils_1.QuoteUtils.formatEString(nodeAny.sval.sval);
|
|
1136
1139
|
}
|
|
1137
1140
|
else if (nodeAny.sval.String && nodeAny.sval.String.sval !== undefined) {
|
|
1138
|
-
return quote_utils_1.QuoteUtils.
|
|
1141
|
+
return quote_utils_1.QuoteUtils.formatEString(nodeAny.sval.String.sval);
|
|
1139
1142
|
}
|
|
1140
1143
|
else if (Object.keys(nodeAny.sval).length === 0) {
|
|
1141
1144
|
return "''";
|
|
1142
1145
|
}
|
|
1143
1146
|
else {
|
|
1144
|
-
return quote_utils_1.QuoteUtils.
|
|
1147
|
+
return quote_utils_1.QuoteUtils.formatEString(nodeAny.sval.toString());
|
|
1145
1148
|
}
|
|
1146
1149
|
}
|
|
1147
1150
|
else if (nodeAny.sval === null) {
|
|
1148
1151
|
return 'NULL';
|
|
1149
1152
|
}
|
|
1150
1153
|
else {
|
|
1151
|
-
return quote_utils_1.QuoteUtils.
|
|
1154
|
+
return quote_utils_1.QuoteUtils.formatEString(nodeAny.sval);
|
|
1152
1155
|
}
|
|
1153
1156
|
}
|
|
1154
1157
|
else if (nodeAny.boolval !== undefined) {
|
|
@@ -1835,7 +1838,7 @@ class Deparser {
|
|
|
1835
1838
|
}
|
|
1836
1839
|
String(node, context) {
|
|
1837
1840
|
if (context.isStringLiteral || context.isEnumValue) {
|
|
1838
|
-
return
|
|
1841
|
+
return quote_utils_1.QuoteUtils.formatEString(node.sval || '');
|
|
1839
1842
|
}
|
|
1840
1843
|
const value = node.sval || '';
|
|
1841
1844
|
if (context.parentNodeTypes.includes('DefElem') ||
|
|
@@ -5054,6 +5057,18 @@ class Deparser {
|
|
|
5054
5057
|
: argValue;
|
|
5055
5058
|
return `${node.defname} = ${quotedValue}`;
|
|
5056
5059
|
}
|
|
5060
|
+
// Handle CopyStmt WITH clause options - uppercase format without quotes
|
|
5061
|
+
if (context.parentNodeTypes.includes('CopyStmt')) {
|
|
5062
|
+
if (node.defname === 'format' && node.arg && this.getNodeType(node.arg) === 'String') {
|
|
5063
|
+
const stringData = this.getNodeData(node.arg);
|
|
5064
|
+
return `FORMAT ${stringData.sval.toUpperCase()}`;
|
|
5065
|
+
}
|
|
5066
|
+
// Handle other COPY options with uppercase defname
|
|
5067
|
+
if (node.arg) {
|
|
5068
|
+
return `${node.defname.toUpperCase()} ${argValue}`;
|
|
5069
|
+
}
|
|
5070
|
+
return node.defname.toUpperCase();
|
|
5071
|
+
}
|
|
5057
5072
|
// Handle CREATE OPERATOR and CREATE TYPE context
|
|
5058
5073
|
if (context.parentNodeTypes.includes('DefineStmt')) {
|
|
5059
5074
|
const preservedName = this.preserveOperatorDefElemCase(node.defname);
|
|
@@ -5744,8 +5759,7 @@ class Deparser {
|
|
|
5744
5759
|
output.push('NULL');
|
|
5745
5760
|
}
|
|
5746
5761
|
else if (node.comment) {
|
|
5747
|
-
|
|
5748
|
-
output.push(`'${escapedComment}'`);
|
|
5762
|
+
output.push(quote_utils_1.QuoteUtils.formatEString(node.comment));
|
|
5749
5763
|
}
|
|
5750
5764
|
return output.join(' ');
|
|
5751
5765
|
}
|
package/esm/deparser.js
CHANGED
|
@@ -864,13 +864,16 @@ export class Deparser {
|
|
|
864
864
|
formatStr = '(%s)';
|
|
865
865
|
}
|
|
866
866
|
const boolContext = { ...context, bool: true };
|
|
867
|
+
// explanation of our syntax/fix below:
|
|
868
|
+
// return formatStr.replace('%s', andArgs); // ❌ Interprets $ as special syntax
|
|
869
|
+
// return formatStr.replace('%s', () => andArgs); // ✅ Function callback prevents interpretation
|
|
867
870
|
switch (boolop) {
|
|
868
871
|
case 'AND_EXPR':
|
|
869
872
|
const andArgs = args.map(arg => this.visit(arg, boolContext)).join(' AND ');
|
|
870
|
-
return formatStr.replace('%s', andArgs);
|
|
873
|
+
return formatStr.replace('%s', () => andArgs);
|
|
871
874
|
case 'OR_EXPR':
|
|
872
875
|
const orArgs = args.map(arg => this.visit(arg, boolContext)).join(' OR ');
|
|
873
|
-
return formatStr.replace('%s', orArgs);
|
|
876
|
+
return formatStr.replace('%s', () => orArgs);
|
|
874
877
|
case 'NOT_EXPR':
|
|
875
878
|
return `NOT (${this.visit(args[0], context)})`;
|
|
876
879
|
default:
|
|
@@ -1129,23 +1132,23 @@ export class Deparser {
|
|
|
1129
1132
|
else if (nodeAny.sval !== undefined) {
|
|
1130
1133
|
if (typeof nodeAny.sval === 'object' && nodeAny.sval !== null) {
|
|
1131
1134
|
if (nodeAny.sval.sval !== undefined) {
|
|
1132
|
-
return QuoteUtils.
|
|
1135
|
+
return QuoteUtils.formatEString(nodeAny.sval.sval);
|
|
1133
1136
|
}
|
|
1134
1137
|
else if (nodeAny.sval.String && nodeAny.sval.String.sval !== undefined) {
|
|
1135
|
-
return QuoteUtils.
|
|
1138
|
+
return QuoteUtils.formatEString(nodeAny.sval.String.sval);
|
|
1136
1139
|
}
|
|
1137
1140
|
else if (Object.keys(nodeAny.sval).length === 0) {
|
|
1138
1141
|
return "''";
|
|
1139
1142
|
}
|
|
1140
1143
|
else {
|
|
1141
|
-
return QuoteUtils.
|
|
1144
|
+
return QuoteUtils.formatEString(nodeAny.sval.toString());
|
|
1142
1145
|
}
|
|
1143
1146
|
}
|
|
1144
1147
|
else if (nodeAny.sval === null) {
|
|
1145
1148
|
return 'NULL';
|
|
1146
1149
|
}
|
|
1147
1150
|
else {
|
|
1148
|
-
return QuoteUtils.
|
|
1151
|
+
return QuoteUtils.formatEString(nodeAny.sval);
|
|
1149
1152
|
}
|
|
1150
1153
|
}
|
|
1151
1154
|
else if (nodeAny.boolval !== undefined) {
|
|
@@ -1832,7 +1835,7 @@ export class Deparser {
|
|
|
1832
1835
|
}
|
|
1833
1836
|
String(node, context) {
|
|
1834
1837
|
if (context.isStringLiteral || context.isEnumValue) {
|
|
1835
|
-
return
|
|
1838
|
+
return QuoteUtils.formatEString(node.sval || '');
|
|
1836
1839
|
}
|
|
1837
1840
|
const value = node.sval || '';
|
|
1838
1841
|
if (context.parentNodeTypes.includes('DefElem') ||
|
|
@@ -5051,6 +5054,18 @@ export class Deparser {
|
|
|
5051
5054
|
: argValue;
|
|
5052
5055
|
return `${node.defname} = ${quotedValue}`;
|
|
5053
5056
|
}
|
|
5057
|
+
// Handle CopyStmt WITH clause options - uppercase format without quotes
|
|
5058
|
+
if (context.parentNodeTypes.includes('CopyStmt')) {
|
|
5059
|
+
if (node.defname === 'format' && node.arg && this.getNodeType(node.arg) === 'String') {
|
|
5060
|
+
const stringData = this.getNodeData(node.arg);
|
|
5061
|
+
return `FORMAT ${stringData.sval.toUpperCase()}`;
|
|
5062
|
+
}
|
|
5063
|
+
// Handle other COPY options with uppercase defname
|
|
5064
|
+
if (node.arg) {
|
|
5065
|
+
return `${node.defname.toUpperCase()} ${argValue}`;
|
|
5066
|
+
}
|
|
5067
|
+
return node.defname.toUpperCase();
|
|
5068
|
+
}
|
|
5054
5069
|
// Handle CREATE OPERATOR and CREATE TYPE context
|
|
5055
5070
|
if (context.parentNodeTypes.includes('DefineStmt')) {
|
|
5056
5071
|
const preservedName = this.preserveOperatorDefElemCase(node.defname);
|
|
@@ -5741,8 +5756,7 @@ export class Deparser {
|
|
|
5741
5756
|
output.push('NULL');
|
|
5742
5757
|
}
|
|
5743
5758
|
else if (node.comment) {
|
|
5744
|
-
|
|
5745
|
-
output.push(`'${escapedComment}'`);
|
|
5759
|
+
output.push(QuoteUtils.formatEString(node.comment));
|
|
5746
5760
|
}
|
|
5747
5761
|
return output.join(' ');
|
|
5748
5762
|
}
|
package/esm/utils/quote-utils.js
CHANGED
|
@@ -48,4 +48,34 @@ export class QuoteUtils {
|
|
|
48
48
|
static escape(literal) {
|
|
49
49
|
return `'${literal.replace(/'/g, "''")}'`;
|
|
50
50
|
}
|
|
51
|
+
/**
|
|
52
|
+
* Escapes a string value for use in E-prefixed string literals
|
|
53
|
+
* Handles both backslashes and single quotes properly
|
|
54
|
+
*/
|
|
55
|
+
static escapeEString(value) {
|
|
56
|
+
return value.replace(/\\/g, '\\\\').replace(/'/g, "''");
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Formats a string as an E-prefixed string literal with proper escaping
|
|
60
|
+
* This wraps the complete E-prefix logic including detection and formatting
|
|
61
|
+
*/
|
|
62
|
+
static formatEString(value) {
|
|
63
|
+
const needsEscape = QuoteUtils.needsEscapePrefix(value);
|
|
64
|
+
if (needsEscape) {
|
|
65
|
+
const escapedValue = QuoteUtils.escapeEString(value);
|
|
66
|
+
return `E'${escapedValue}'`;
|
|
67
|
+
}
|
|
68
|
+
else {
|
|
69
|
+
return QuoteUtils.escape(value);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Determines if a string value needs E-prefix for escaped string literals
|
|
74
|
+
* Detects backslash escape sequences that require E-prefix in PostgreSQL
|
|
75
|
+
*/
|
|
76
|
+
static needsEscapePrefix(value) {
|
|
77
|
+
// Always use E'' if the string contains any backslashes,
|
|
78
|
+
// unless it's a raw \x... bytea-style literal.
|
|
79
|
+
return !/^\\x[0-9a-fA-F]+$/i.test(value) && value.includes('\\');
|
|
80
|
+
}
|
|
51
81
|
}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import { parse } from 'libpg-query';
|
|
2
|
+
/**
|
|
3
|
+
* Extracts a single statement from SQL using PostgreSQL's location information.
|
|
4
|
+
* Handles Unicode properly by using byte positions instead of character positions.
|
|
5
|
+
*/
|
|
6
|
+
export function extractStatement(originalSQL, rawStmt, isFirst = false, options = {}) {
|
|
7
|
+
let extracted = null;
|
|
8
|
+
// Convert string to buffer to handle byte positions correctly (for Unicode)
|
|
9
|
+
const sqlBuffer = Buffer.from(originalSQL, 'utf8');
|
|
10
|
+
if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
|
|
11
|
+
// Use byte positions as provided by PostgreSQL
|
|
12
|
+
const startByte = rawStmt.stmt_location;
|
|
13
|
+
const endByte = rawStmt.stmt_location + rawStmt.stmt_len;
|
|
14
|
+
// Extract using byte positions and convert back to string
|
|
15
|
+
const extractedBuffer = sqlBuffer.slice(startByte, endByte);
|
|
16
|
+
extracted = extractedBuffer.toString('utf8');
|
|
17
|
+
}
|
|
18
|
+
else if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len === undefined) {
|
|
19
|
+
// We have location but no length - extract from location to end of file
|
|
20
|
+
const extractedBuffer = sqlBuffer.slice(rawStmt.stmt_location);
|
|
21
|
+
extracted = extractedBuffer.toString('utf8');
|
|
22
|
+
}
|
|
23
|
+
else if (isFirst && rawStmt.stmt_len !== undefined) {
|
|
24
|
+
// For first statement when location is missing but we have length
|
|
25
|
+
const extractedBuffer = sqlBuffer.slice(0, rawStmt.stmt_len);
|
|
26
|
+
extracted = extractedBuffer.toString('utf8');
|
|
27
|
+
}
|
|
28
|
+
else if (isFirst && rawStmt.stmt_location === undefined && rawStmt.stmt_len === undefined) {
|
|
29
|
+
// For first statement when both location and length are missing, use entire SQL
|
|
30
|
+
extracted = originalSQL;
|
|
31
|
+
}
|
|
32
|
+
if (extracted && options.stripComments !== false) {
|
|
33
|
+
// Split into lines to handle leading whitespace and comments properly
|
|
34
|
+
const lines = extracted.split('\n');
|
|
35
|
+
let startLineIndex = 0;
|
|
36
|
+
// Find the first line that contains actual SQL content
|
|
37
|
+
for (let i = 0; i < lines.length; i++) {
|
|
38
|
+
const line = lines[i].trim();
|
|
39
|
+
// Skip empty lines and comment-only lines
|
|
40
|
+
if (line === '' || line.startsWith('--')) {
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
startLineIndex = i;
|
|
44
|
+
break;
|
|
45
|
+
}
|
|
46
|
+
// Reconstruct from the first SQL line, preserving the original indentation of that line
|
|
47
|
+
if (startLineIndex < lines.length) {
|
|
48
|
+
const resultLines = lines.slice(startLineIndex);
|
|
49
|
+
extracted = resultLines.join('\n').trim();
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
// Final validation unless skipped
|
|
53
|
+
if (extracted && !options.skipValidation) {
|
|
54
|
+
const firstLine = extracted.split('\n')[0].trim();
|
|
55
|
+
const firstWord = firstLine.split(/\s+/)[0].toUpperCase();
|
|
56
|
+
// Only check for most obvious malformed patterns at the BEGINNING
|
|
57
|
+
if (
|
|
58
|
+
// Check if it starts with truncated patterns (not just contains anywhere)
|
|
59
|
+
extracted.trim().startsWith('ELECT ') || // Missing S from SELECT
|
|
60
|
+
extracted.trim().startsWith('REATE ') || // Missing C from CREATE
|
|
61
|
+
extracted.trim().startsWith('NSERT ') || // Missing I from INSERT
|
|
62
|
+
// Completely empty or whitespace only
|
|
63
|
+
extracted.trim().length === 0) {
|
|
64
|
+
return null; // Invalid extraction, skip this statement
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return extracted;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Splits SQL text into individual statements using PostgreSQL's parser.
|
|
71
|
+
* Handles Unicode characters properly and provides detailed location information.
|
|
72
|
+
*/
|
|
73
|
+
export async function splitStatements(sql, options = {}) {
|
|
74
|
+
const parseResult = await parse(sql);
|
|
75
|
+
const statements = [];
|
|
76
|
+
if (!parseResult.stmts) {
|
|
77
|
+
return statements;
|
|
78
|
+
}
|
|
79
|
+
for (let idx = 0; idx < parseResult.stmts.length; idx++) {
|
|
80
|
+
const stmt = parseResult.stmts[idx];
|
|
81
|
+
const extracted = extractStatement(sql, stmt, idx === 0, options);
|
|
82
|
+
if (extracted) {
|
|
83
|
+
statements.push({
|
|
84
|
+
statement: extracted,
|
|
85
|
+
index: idx,
|
|
86
|
+
location: stmt.stmt_location,
|
|
87
|
+
length: stmt.stmt_len
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return statements;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Utility to generate statement keys for fixtures
|
|
95
|
+
*/
|
|
96
|
+
export function generateStatementKey(relativePath, statementIndex, extension = 'sql') {
|
|
97
|
+
return `${relativePath.replace(/\.sql$/, '')}-${statementIndex + 1}.${extension}`;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Test utility to compare byte vs character extraction for debugging Unicode issues
|
|
101
|
+
*/
|
|
102
|
+
export function debugUnicodeExtraction(sql, rawStmt) {
|
|
103
|
+
const charLength = sql.length;
|
|
104
|
+
const byteLength = Buffer.from(sql, 'utf8').length;
|
|
105
|
+
// Character-based extraction (old way)
|
|
106
|
+
let characterBased = '';
|
|
107
|
+
if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
|
|
108
|
+
characterBased = sql.substring(rawStmt.stmt_location, rawStmt.stmt_location + rawStmt.stmt_len);
|
|
109
|
+
}
|
|
110
|
+
// Byte-based extraction (new way)
|
|
111
|
+
let byteBased = '';
|
|
112
|
+
if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
|
|
113
|
+
const sqlBuffer = Buffer.from(sql, 'utf8');
|
|
114
|
+
const extractedBuffer = sqlBuffer.slice(rawStmt.stmt_location, rawStmt.stmt_location + rawStmt.stmt_len);
|
|
115
|
+
byteBased = extractedBuffer.toString('utf8');
|
|
116
|
+
}
|
|
117
|
+
return {
|
|
118
|
+
characterBased,
|
|
119
|
+
byteBased,
|
|
120
|
+
matches: characterBased === byteBased,
|
|
121
|
+
unicodeChars: byteLength - charLength,
|
|
122
|
+
byteLength,
|
|
123
|
+
charLength
|
|
124
|
+
};
|
|
125
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pgsql-deparser",
|
|
3
|
-
"version": "17.
|
|
3
|
+
"version": "17.5.0",
|
|
4
4
|
"author": "Dan Lynch <pyramation@gmail.com>",
|
|
5
5
|
"description": "PostgreSQL AST Deparser",
|
|
6
6
|
"main": "index.js",
|
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
"fixtures:ast": "ts-node scripts/make-fixtures-ast.ts",
|
|
31
31
|
"fixtures:sql": "ts-node scripts/make-fixtures-sql.ts",
|
|
32
32
|
"fixtures": "ts-node scripts/make-fixtures.ts",
|
|
33
|
+
"fixtures:upstream-diff": "ts-node scripts/make-upstream-diff.ts",
|
|
33
34
|
"lint": "eslint . --fix",
|
|
34
35
|
"test": "jest",
|
|
35
36
|
"test:watch": "jest --watch"
|
|
@@ -48,7 +49,7 @@
|
|
|
48
49
|
"libpg-query": "17.3.3"
|
|
49
50
|
},
|
|
50
51
|
"dependencies": {
|
|
51
|
-
"@pgsql/types": "^17.4.
|
|
52
|
+
"@pgsql/types": "^17.4.2"
|
|
52
53
|
},
|
|
53
|
-
"gitHead": "
|
|
54
|
+
"gitHead": "a066ec74d2840bcf75bc0f975475508180a621fc"
|
|
54
55
|
}
|
package/utils/quote-utils.d.ts
CHANGED
|
@@ -2,4 +2,19 @@ export declare class QuoteUtils {
|
|
|
2
2
|
static needsQuotes(value: string): boolean;
|
|
3
3
|
static quote(value: any): any;
|
|
4
4
|
static escape(literal: string): string;
|
|
5
|
+
/**
|
|
6
|
+
* Escapes a string value for use in E-prefixed string literals
|
|
7
|
+
* Handles both backslashes and single quotes properly
|
|
8
|
+
*/
|
|
9
|
+
static escapeEString(value: string): string;
|
|
10
|
+
/**
|
|
11
|
+
* Formats a string as an E-prefixed string literal with proper escaping
|
|
12
|
+
* This wraps the complete E-prefix logic including detection and formatting
|
|
13
|
+
*/
|
|
14
|
+
static formatEString(value: string): string;
|
|
15
|
+
/**
|
|
16
|
+
* Determines if a string value needs E-prefix for escaped string literals
|
|
17
|
+
* Detects backslash escape sequences that require E-prefix in PostgreSQL
|
|
18
|
+
*/
|
|
19
|
+
static needsEscapePrefix(value: string): boolean;
|
|
5
20
|
}
|
package/utils/quote-utils.js
CHANGED
|
@@ -51,5 +51,35 @@ class QuoteUtils {
|
|
|
51
51
|
static escape(literal) {
|
|
52
52
|
return `'${literal.replace(/'/g, "''")}'`;
|
|
53
53
|
}
|
|
54
|
+
/**
|
|
55
|
+
* Escapes a string value for use in E-prefixed string literals
|
|
56
|
+
* Handles both backslashes and single quotes properly
|
|
57
|
+
*/
|
|
58
|
+
static escapeEString(value) {
|
|
59
|
+
return value.replace(/\\/g, '\\\\').replace(/'/g, "''");
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Formats a string as an E-prefixed string literal with proper escaping
|
|
63
|
+
* This wraps the complete E-prefix logic including detection and formatting
|
|
64
|
+
*/
|
|
65
|
+
static formatEString(value) {
|
|
66
|
+
const needsEscape = QuoteUtils.needsEscapePrefix(value);
|
|
67
|
+
if (needsEscape) {
|
|
68
|
+
const escapedValue = QuoteUtils.escapeEString(value);
|
|
69
|
+
return `E'${escapedValue}'`;
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
return QuoteUtils.escape(value);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Determines if a string value needs E-prefix for escaped string literals
|
|
77
|
+
* Detects backslash escape sequences that require E-prefix in PostgreSQL
|
|
78
|
+
*/
|
|
79
|
+
static needsEscapePrefix(value) {
|
|
80
|
+
// Always use E'' if the string contains any backslashes,
|
|
81
|
+
// unless it's a raw \x... bytea-style literal.
|
|
82
|
+
return !/^\\x[0-9a-fA-F]+$/i.test(value) && value.includes('\\');
|
|
83
|
+
}
|
|
54
84
|
}
|
|
55
85
|
exports.QuoteUtils = QuoteUtils;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { RawStmt } from '@pgsql/types';
|
|
2
|
+
export interface ExtractedStatement {
|
|
3
|
+
statement: string;
|
|
4
|
+
index: number;
|
|
5
|
+
location?: number;
|
|
6
|
+
length?: number;
|
|
7
|
+
}
|
|
8
|
+
export interface StatementSplitterOptions {
|
|
9
|
+
/** Skip validation for malformed statements */
|
|
10
|
+
skipValidation?: boolean;
|
|
11
|
+
/** Strip leading comments from extracted statements */
|
|
12
|
+
stripComments?: boolean;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Extracts a single statement from SQL using PostgreSQL's location information.
|
|
16
|
+
* Handles Unicode properly by using byte positions instead of character positions.
|
|
17
|
+
*/
|
|
18
|
+
export declare function extractStatement(originalSQL: string, rawStmt: RawStmt, isFirst?: boolean, options?: StatementSplitterOptions): string | null;
|
|
19
|
+
/**
|
|
20
|
+
* Splits SQL text into individual statements using PostgreSQL's parser.
|
|
21
|
+
* Handles Unicode characters properly and provides detailed location information.
|
|
22
|
+
*/
|
|
23
|
+
export declare function splitStatements(sql: string, options?: StatementSplitterOptions): Promise<ExtractedStatement[]>;
|
|
24
|
+
/**
|
|
25
|
+
* Utility to generate statement keys for fixtures
|
|
26
|
+
*/
|
|
27
|
+
export declare function generateStatementKey(relativePath: string, statementIndex: number, extension?: string): string;
|
|
28
|
+
/**
|
|
29
|
+
* Test utility to compare byte vs character extraction for debugging Unicode issues
|
|
30
|
+
*/
|
|
31
|
+
export declare function debugUnicodeExtraction(sql: string, rawStmt: RawStmt): {
|
|
32
|
+
characterBased: string;
|
|
33
|
+
byteBased: string;
|
|
34
|
+
matches: boolean;
|
|
35
|
+
unicodeChars: number;
|
|
36
|
+
byteLength: number;
|
|
37
|
+
charLength: number;
|
|
38
|
+
};
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.extractStatement = extractStatement;
|
|
4
|
+
exports.splitStatements = splitStatements;
|
|
5
|
+
exports.generateStatementKey = generateStatementKey;
|
|
6
|
+
exports.debugUnicodeExtraction = debugUnicodeExtraction;
|
|
7
|
+
const libpg_query_1 = require("libpg-query");
|
|
8
|
+
/**
|
|
9
|
+
* Extracts a single statement from SQL using PostgreSQL's location information.
|
|
10
|
+
* Handles Unicode properly by using byte positions instead of character positions.
|
|
11
|
+
*/
|
|
12
|
+
function extractStatement(originalSQL, rawStmt, isFirst = false, options = {}) {
|
|
13
|
+
let extracted = null;
|
|
14
|
+
// Convert string to buffer to handle byte positions correctly (for Unicode)
|
|
15
|
+
const sqlBuffer = Buffer.from(originalSQL, 'utf8');
|
|
16
|
+
if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
|
|
17
|
+
// Use byte positions as provided by PostgreSQL
|
|
18
|
+
const startByte = rawStmt.stmt_location;
|
|
19
|
+
const endByte = rawStmt.stmt_location + rawStmt.stmt_len;
|
|
20
|
+
// Extract using byte positions and convert back to string
|
|
21
|
+
const extractedBuffer = sqlBuffer.slice(startByte, endByte);
|
|
22
|
+
extracted = extractedBuffer.toString('utf8');
|
|
23
|
+
}
|
|
24
|
+
else if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len === undefined) {
|
|
25
|
+
// We have location but no length - extract from location to end of file
|
|
26
|
+
const extractedBuffer = sqlBuffer.slice(rawStmt.stmt_location);
|
|
27
|
+
extracted = extractedBuffer.toString('utf8');
|
|
28
|
+
}
|
|
29
|
+
else if (isFirst && rawStmt.stmt_len !== undefined) {
|
|
30
|
+
// For first statement when location is missing but we have length
|
|
31
|
+
const extractedBuffer = sqlBuffer.slice(0, rawStmt.stmt_len);
|
|
32
|
+
extracted = extractedBuffer.toString('utf8');
|
|
33
|
+
}
|
|
34
|
+
else if (isFirst && rawStmt.stmt_location === undefined && rawStmt.stmt_len === undefined) {
|
|
35
|
+
// For first statement when both location and length are missing, use entire SQL
|
|
36
|
+
extracted = originalSQL;
|
|
37
|
+
}
|
|
38
|
+
if (extracted && options.stripComments !== false) {
|
|
39
|
+
// Split into lines to handle leading whitespace and comments properly
|
|
40
|
+
const lines = extracted.split('\n');
|
|
41
|
+
let startLineIndex = 0;
|
|
42
|
+
// Find the first line that contains actual SQL content
|
|
43
|
+
for (let i = 0; i < lines.length; i++) {
|
|
44
|
+
const line = lines[i].trim();
|
|
45
|
+
// Skip empty lines and comment-only lines
|
|
46
|
+
if (line === '' || line.startsWith('--')) {
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
startLineIndex = i;
|
|
50
|
+
break;
|
|
51
|
+
}
|
|
52
|
+
// Reconstruct from the first SQL line, preserving the original indentation of that line
|
|
53
|
+
if (startLineIndex < lines.length) {
|
|
54
|
+
const resultLines = lines.slice(startLineIndex);
|
|
55
|
+
extracted = resultLines.join('\n').trim();
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// Final validation unless skipped
|
|
59
|
+
if (extracted && !options.skipValidation) {
|
|
60
|
+
const firstLine = extracted.split('\n')[0].trim();
|
|
61
|
+
const firstWord = firstLine.split(/\s+/)[0].toUpperCase();
|
|
62
|
+
// Only check for most obvious malformed patterns at the BEGINNING
|
|
63
|
+
if (
|
|
64
|
+
// Check if it starts with truncated patterns (not just contains anywhere)
|
|
65
|
+
extracted.trim().startsWith('ELECT ') || // Missing S from SELECT
|
|
66
|
+
extracted.trim().startsWith('REATE ') || // Missing C from CREATE
|
|
67
|
+
extracted.trim().startsWith('NSERT ') || // Missing I from INSERT
|
|
68
|
+
// Completely empty or whitespace only
|
|
69
|
+
extracted.trim().length === 0) {
|
|
70
|
+
return null; // Invalid extraction, skip this statement
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return extracted;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Splits SQL text into individual statements using PostgreSQL's parser.
|
|
77
|
+
* Handles Unicode characters properly and provides detailed location information.
|
|
78
|
+
*/
|
|
79
|
+
async function splitStatements(sql, options = {}) {
|
|
80
|
+
const parseResult = await (0, libpg_query_1.parse)(sql);
|
|
81
|
+
const statements = [];
|
|
82
|
+
if (!parseResult.stmts) {
|
|
83
|
+
return statements;
|
|
84
|
+
}
|
|
85
|
+
for (let idx = 0; idx < parseResult.stmts.length; idx++) {
|
|
86
|
+
const stmt = parseResult.stmts[idx];
|
|
87
|
+
const extracted = extractStatement(sql, stmt, idx === 0, options);
|
|
88
|
+
if (extracted) {
|
|
89
|
+
statements.push({
|
|
90
|
+
statement: extracted,
|
|
91
|
+
index: idx,
|
|
92
|
+
location: stmt.stmt_location,
|
|
93
|
+
length: stmt.stmt_len
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return statements;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Utility to generate statement keys for fixtures
|
|
101
|
+
*/
|
|
102
|
+
function generateStatementKey(relativePath, statementIndex, extension = 'sql') {
|
|
103
|
+
return `${relativePath.replace(/\.sql$/, '')}-${statementIndex + 1}.${extension}`;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Test utility to compare byte vs character extraction for debugging Unicode issues
|
|
107
|
+
*/
|
|
108
|
+
function debugUnicodeExtraction(sql, rawStmt) {
|
|
109
|
+
const charLength = sql.length;
|
|
110
|
+
const byteLength = Buffer.from(sql, 'utf8').length;
|
|
111
|
+
// Character-based extraction (old way)
|
|
112
|
+
let characterBased = '';
|
|
113
|
+
if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
|
|
114
|
+
characterBased = sql.substring(rawStmt.stmt_location, rawStmt.stmt_location + rawStmt.stmt_len);
|
|
115
|
+
}
|
|
116
|
+
// Byte-based extraction (new way)
|
|
117
|
+
let byteBased = '';
|
|
118
|
+
if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
|
|
119
|
+
const sqlBuffer = Buffer.from(sql, 'utf8');
|
|
120
|
+
const extractedBuffer = sqlBuffer.slice(rawStmt.stmt_location, rawStmt.stmt_location + rawStmt.stmt_len);
|
|
121
|
+
byteBased = extractedBuffer.toString('utf8');
|
|
122
|
+
}
|
|
123
|
+
return {
|
|
124
|
+
characterBased,
|
|
125
|
+
byteBased,
|
|
126
|
+
matches: characterBased === byteBased,
|
|
127
|
+
unicodeChars: byteLength - charLength,
|
|
128
|
+
byteLength,
|
|
129
|
+
charLength
|
|
130
|
+
};
|
|
131
|
+
}
|