pgsql-deparser 17.4.1 → 17.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/deparser.js CHANGED
@@ -867,13 +867,16 @@ class Deparser {
867
867
  formatStr = '(%s)';
868
868
  }
869
869
  const boolContext = { ...context, bool: true };
870
+ // explanation of our syntax/fix below:
871
+ // return formatStr.replace('%s', andArgs); // ❌ Interprets $ as special syntax
872
+ // return formatStr.replace('%s', () => andArgs); // ✅ Function callback prevents interpretation
870
873
  switch (boolop) {
871
874
  case 'AND_EXPR':
872
875
  const andArgs = args.map(arg => this.visit(arg, boolContext)).join(' AND ');
873
- return formatStr.replace('%s', andArgs);
876
+ return formatStr.replace('%s', () => andArgs);
874
877
  case 'OR_EXPR':
875
878
  const orArgs = args.map(arg => this.visit(arg, boolContext)).join(' OR ');
876
- return formatStr.replace('%s', orArgs);
879
+ return formatStr.replace('%s', () => orArgs);
877
880
  case 'NOT_EXPR':
878
881
  return `NOT (${this.visit(args[0], context)})`;
879
882
  default:
@@ -1132,23 +1135,23 @@ class Deparser {
1132
1135
  else if (nodeAny.sval !== undefined) {
1133
1136
  if (typeof nodeAny.sval === 'object' && nodeAny.sval !== null) {
1134
1137
  if (nodeAny.sval.sval !== undefined) {
1135
- return quote_utils_1.QuoteUtils.escape(nodeAny.sval.sval);
1138
+ return quote_utils_1.QuoteUtils.formatEString(nodeAny.sval.sval);
1136
1139
  }
1137
1140
  else if (nodeAny.sval.String && nodeAny.sval.String.sval !== undefined) {
1138
- return quote_utils_1.QuoteUtils.escape(nodeAny.sval.String.sval);
1141
+ return quote_utils_1.QuoteUtils.formatEString(nodeAny.sval.String.sval);
1139
1142
  }
1140
1143
  else if (Object.keys(nodeAny.sval).length === 0) {
1141
1144
  return "''";
1142
1145
  }
1143
1146
  else {
1144
- return quote_utils_1.QuoteUtils.escape(nodeAny.sval.toString());
1147
+ return quote_utils_1.QuoteUtils.formatEString(nodeAny.sval.toString());
1145
1148
  }
1146
1149
  }
1147
1150
  else if (nodeAny.sval === null) {
1148
1151
  return 'NULL';
1149
1152
  }
1150
1153
  else {
1151
- return quote_utils_1.QuoteUtils.escape(nodeAny.sval);
1154
+ return quote_utils_1.QuoteUtils.formatEString(nodeAny.sval);
1152
1155
  }
1153
1156
  }
1154
1157
  else if (nodeAny.boolval !== undefined) {
@@ -1835,7 +1838,7 @@ class Deparser {
1835
1838
  }
1836
1839
  String(node, context) {
1837
1840
  if (context.isStringLiteral || context.isEnumValue) {
1838
- return `'${node.sval || ''}'`;
1841
+ return quote_utils_1.QuoteUtils.formatEString(node.sval || '');
1839
1842
  }
1840
1843
  const value = node.sval || '';
1841
1844
  if (context.parentNodeTypes.includes('DefElem') ||
@@ -5054,6 +5057,18 @@ class Deparser {
5054
5057
  : argValue;
5055
5058
  return `${node.defname} = ${quotedValue}`;
5056
5059
  }
5060
+ // Handle CopyStmt WITH clause options - uppercase format without quotes
5061
+ if (context.parentNodeTypes.includes('CopyStmt')) {
5062
+ if (node.defname === 'format' && node.arg && this.getNodeType(node.arg) === 'String') {
5063
+ const stringData = this.getNodeData(node.arg);
5064
+ return `FORMAT ${stringData.sval.toUpperCase()}`;
5065
+ }
5066
+ // Handle other COPY options with uppercase defname
5067
+ if (node.arg) {
5068
+ return `${node.defname.toUpperCase()} ${argValue}`;
5069
+ }
5070
+ return node.defname.toUpperCase();
5071
+ }
5057
5072
  // Handle CREATE OPERATOR and CREATE TYPE context
5058
5073
  if (context.parentNodeTypes.includes('DefineStmt')) {
5059
5074
  const preservedName = this.preserveOperatorDefElemCase(node.defname);
@@ -5744,8 +5759,7 @@ class Deparser {
5744
5759
  output.push('NULL');
5745
5760
  }
5746
5761
  else if (node.comment) {
5747
- const escapedComment = node.comment.replace(/'/g, "''");
5748
- output.push(`'${escapedComment}'`);
5762
+ output.push(quote_utils_1.QuoteUtils.formatEString(node.comment));
5749
5763
  }
5750
5764
  return output.join(' ');
5751
5765
  }
package/esm/deparser.js CHANGED
@@ -864,13 +864,16 @@ export class Deparser {
864
864
  formatStr = '(%s)';
865
865
  }
866
866
  const boolContext = { ...context, bool: true };
867
+ // explanation of our syntax/fix below:
868
+ // return formatStr.replace('%s', andArgs); // ❌ Interprets $ as special syntax
869
+ // return formatStr.replace('%s', () => andArgs); // ✅ Function callback prevents interpretation
867
870
  switch (boolop) {
868
871
  case 'AND_EXPR':
869
872
  const andArgs = args.map(arg => this.visit(arg, boolContext)).join(' AND ');
870
- return formatStr.replace('%s', andArgs);
873
+ return formatStr.replace('%s', () => andArgs);
871
874
  case 'OR_EXPR':
872
875
  const orArgs = args.map(arg => this.visit(arg, boolContext)).join(' OR ');
873
- return formatStr.replace('%s', orArgs);
876
+ return formatStr.replace('%s', () => orArgs);
874
877
  case 'NOT_EXPR':
875
878
  return `NOT (${this.visit(args[0], context)})`;
876
879
  default:
@@ -1129,23 +1132,23 @@ export class Deparser {
1129
1132
  else if (nodeAny.sval !== undefined) {
1130
1133
  if (typeof nodeAny.sval === 'object' && nodeAny.sval !== null) {
1131
1134
  if (nodeAny.sval.sval !== undefined) {
1132
- return QuoteUtils.escape(nodeAny.sval.sval);
1135
+ return QuoteUtils.formatEString(nodeAny.sval.sval);
1133
1136
  }
1134
1137
  else if (nodeAny.sval.String && nodeAny.sval.String.sval !== undefined) {
1135
- return QuoteUtils.escape(nodeAny.sval.String.sval);
1138
+ return QuoteUtils.formatEString(nodeAny.sval.String.sval);
1136
1139
  }
1137
1140
  else if (Object.keys(nodeAny.sval).length === 0) {
1138
1141
  return "''";
1139
1142
  }
1140
1143
  else {
1141
- return QuoteUtils.escape(nodeAny.sval.toString());
1144
+ return QuoteUtils.formatEString(nodeAny.sval.toString());
1142
1145
  }
1143
1146
  }
1144
1147
  else if (nodeAny.sval === null) {
1145
1148
  return 'NULL';
1146
1149
  }
1147
1150
  else {
1148
- return QuoteUtils.escape(nodeAny.sval);
1151
+ return QuoteUtils.formatEString(nodeAny.sval);
1149
1152
  }
1150
1153
  }
1151
1154
  else if (nodeAny.boolval !== undefined) {
@@ -1832,7 +1835,7 @@ export class Deparser {
1832
1835
  }
1833
1836
  String(node, context) {
1834
1837
  if (context.isStringLiteral || context.isEnumValue) {
1835
- return `'${node.sval || ''}'`;
1838
+ return QuoteUtils.formatEString(node.sval || '');
1836
1839
  }
1837
1840
  const value = node.sval || '';
1838
1841
  if (context.parentNodeTypes.includes('DefElem') ||
@@ -5051,6 +5054,18 @@ export class Deparser {
5051
5054
  : argValue;
5052
5055
  return `${node.defname} = ${quotedValue}`;
5053
5056
  }
5057
+ // Handle CopyStmt WITH clause options - uppercase format without quotes
5058
+ if (context.parentNodeTypes.includes('CopyStmt')) {
5059
+ if (node.defname === 'format' && node.arg && this.getNodeType(node.arg) === 'String') {
5060
+ const stringData = this.getNodeData(node.arg);
5061
+ return `FORMAT ${stringData.sval.toUpperCase()}`;
5062
+ }
5063
+ // Handle other COPY options with uppercase defname
5064
+ if (node.arg) {
5065
+ return `${node.defname.toUpperCase()} ${argValue}`;
5066
+ }
5067
+ return node.defname.toUpperCase();
5068
+ }
5054
5069
  // Handle CREATE OPERATOR and CREATE TYPE context
5055
5070
  if (context.parentNodeTypes.includes('DefineStmt')) {
5056
5071
  const preservedName = this.preserveOperatorDefElemCase(node.defname);
@@ -5741,8 +5756,7 @@ export class Deparser {
5741
5756
  output.push('NULL');
5742
5757
  }
5743
5758
  else if (node.comment) {
5744
- const escapedComment = node.comment.replace(/'/g, "''");
5745
- output.push(`'${escapedComment}'`);
5759
+ output.push(QuoteUtils.formatEString(node.comment));
5746
5760
  }
5747
5761
  return output.join(' ');
5748
5762
  }
@@ -48,4 +48,34 @@ export class QuoteUtils {
48
48
  static escape(literal) {
49
49
  return `'${literal.replace(/'/g, "''")}'`;
50
50
  }
51
+ /**
52
+ * Escapes a string value for use in E-prefixed string literals
53
+ * Handles both backslashes and single quotes properly
54
+ */
55
+ static escapeEString(value) {
56
+ return value.replace(/\\/g, '\\\\').replace(/'/g, "''");
57
+ }
58
+ /**
59
+ * Formats a string as an E-prefixed string literal with proper escaping
60
+ * This wraps the complete E-prefix logic including detection and formatting
61
+ */
62
+ static formatEString(value) {
63
+ const needsEscape = QuoteUtils.needsEscapePrefix(value);
64
+ if (needsEscape) {
65
+ const escapedValue = QuoteUtils.escapeEString(value);
66
+ return `E'${escapedValue}'`;
67
+ }
68
+ else {
69
+ return QuoteUtils.escape(value);
70
+ }
71
+ }
72
+ /**
73
+ * Determines if a string value needs E-prefix for escaped string literals
74
+ * Detects backslash escape sequences that require E-prefix in PostgreSQL
75
+ */
76
+ static needsEscapePrefix(value) {
77
+ // Always use E'' if the string contains any backslashes,
78
+ // unless it's a raw \x... bytea-style literal.
79
+ return !/^\\x[0-9a-fA-F]+$/i.test(value) && value.includes('\\');
80
+ }
51
81
  }
@@ -0,0 +1,125 @@
1
+ import { parse } from 'libpg-query';
2
+ /**
3
+ * Extracts a single statement from SQL using PostgreSQL's location information.
4
+ * Handles Unicode properly by using byte positions instead of character positions.
5
+ */
6
+ export function extractStatement(originalSQL, rawStmt, isFirst = false, options = {}) {
7
+ let extracted = null;
8
+ // Convert string to buffer to handle byte positions correctly (for Unicode)
9
+ const sqlBuffer = Buffer.from(originalSQL, 'utf8');
10
+ if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
11
+ // Use byte positions as provided by PostgreSQL
12
+ const startByte = rawStmt.stmt_location;
13
+ const endByte = rawStmt.stmt_location + rawStmt.stmt_len;
14
+ // Extract using byte positions and convert back to string
15
+ const extractedBuffer = sqlBuffer.slice(startByte, endByte);
16
+ extracted = extractedBuffer.toString('utf8');
17
+ }
18
+ else if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len === undefined) {
19
+ // We have location but no length - extract from location to end of file
20
+ const extractedBuffer = sqlBuffer.slice(rawStmt.stmt_location);
21
+ extracted = extractedBuffer.toString('utf8');
22
+ }
23
+ else if (isFirst && rawStmt.stmt_len !== undefined) {
24
+ // For first statement when location is missing but we have length
25
+ const extractedBuffer = sqlBuffer.slice(0, rawStmt.stmt_len);
26
+ extracted = extractedBuffer.toString('utf8');
27
+ }
28
+ else if (isFirst && rawStmt.stmt_location === undefined && rawStmt.stmt_len === undefined) {
29
+ // For first statement when both location and length are missing, use entire SQL
30
+ extracted = originalSQL;
31
+ }
32
+ if (extracted && options.stripComments !== false) {
33
+ // Split into lines to handle leading whitespace and comments properly
34
+ const lines = extracted.split('\n');
35
+ let startLineIndex = 0;
36
+ // Find the first line that contains actual SQL content
37
+ for (let i = 0; i < lines.length; i++) {
38
+ const line = lines[i].trim();
39
+ // Skip empty lines and comment-only lines
40
+ if (line === '' || line.startsWith('--')) {
41
+ continue;
42
+ }
43
+ startLineIndex = i;
44
+ break;
45
+ }
46
+ // Reconstruct from the first SQL line, preserving the original indentation of that line
47
+ if (startLineIndex < lines.length) {
48
+ const resultLines = lines.slice(startLineIndex);
49
+ extracted = resultLines.join('\n').trim();
50
+ }
51
+ }
52
+ // Final validation unless skipped
53
+ if (extracted && !options.skipValidation) {
54
+ const firstLine = extracted.split('\n')[0].trim();
55
+ const firstWord = firstLine.split(/\s+/)[0].toUpperCase();
56
+ // Only check for most obvious malformed patterns at the BEGINNING
57
+ if (
58
+ // Check if it starts with truncated patterns (not just contains anywhere)
59
+ extracted.trim().startsWith('ELECT ') || // Missing S from SELECT
60
+ extracted.trim().startsWith('REATE ') || // Missing C from CREATE
61
+ extracted.trim().startsWith('NSERT ') || // Missing I from INSERT
62
+ // Completely empty or whitespace only
63
+ extracted.trim().length === 0) {
64
+ return null; // Invalid extraction, skip this statement
65
+ }
66
+ }
67
+ return extracted;
68
+ }
69
+ /**
70
+ * Splits SQL text into individual statements using PostgreSQL's parser.
71
+ * Handles Unicode characters properly and provides detailed location information.
72
+ */
73
+ export async function splitStatements(sql, options = {}) {
74
+ const parseResult = await parse(sql);
75
+ const statements = [];
76
+ if (!parseResult.stmts) {
77
+ return statements;
78
+ }
79
+ for (let idx = 0; idx < parseResult.stmts.length; idx++) {
80
+ const stmt = parseResult.stmts[idx];
81
+ const extracted = extractStatement(sql, stmt, idx === 0, options);
82
+ if (extracted) {
83
+ statements.push({
84
+ statement: extracted,
85
+ index: idx,
86
+ location: stmt.stmt_location,
87
+ length: stmt.stmt_len
88
+ });
89
+ }
90
+ }
91
+ return statements;
92
+ }
93
+ /**
94
+ * Utility to generate statement keys for fixtures
95
+ */
96
+ export function generateStatementKey(relativePath, statementIndex, extension = 'sql') {
97
+ return `${relativePath.replace(/\.sql$/, '')}-${statementIndex + 1}.${extension}`;
98
+ }
99
+ /**
100
+ * Test utility to compare byte vs character extraction for debugging Unicode issues
101
+ */
102
+ export function debugUnicodeExtraction(sql, rawStmt) {
103
+ const charLength = sql.length;
104
+ const byteLength = Buffer.from(sql, 'utf8').length;
105
+ // Character-based extraction (old way)
106
+ let characterBased = '';
107
+ if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
108
+ characterBased = sql.substring(rawStmt.stmt_location, rawStmt.stmt_location + rawStmt.stmt_len);
109
+ }
110
+ // Byte-based extraction (new way)
111
+ let byteBased = '';
112
+ if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
113
+ const sqlBuffer = Buffer.from(sql, 'utf8');
114
+ const extractedBuffer = sqlBuffer.slice(rawStmt.stmt_location, rawStmt.stmt_location + rawStmt.stmt_len);
115
+ byteBased = extractedBuffer.toString('utf8');
116
+ }
117
+ return {
118
+ characterBased,
119
+ byteBased,
120
+ matches: characterBased === byteBased,
121
+ unicodeChars: byteLength - charLength,
122
+ byteLength,
123
+ charLength
124
+ };
125
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pgsql-deparser",
3
- "version": "17.4.1",
3
+ "version": "17.5.0",
4
4
  "author": "Dan Lynch <pyramation@gmail.com>",
5
5
  "description": "PostgreSQL AST Deparser",
6
6
  "main": "index.js",
@@ -30,6 +30,7 @@
30
30
  "fixtures:ast": "ts-node scripts/make-fixtures-ast.ts",
31
31
  "fixtures:sql": "ts-node scripts/make-fixtures-sql.ts",
32
32
  "fixtures": "ts-node scripts/make-fixtures.ts",
33
+ "fixtures:upstream-diff": "ts-node scripts/make-upstream-diff.ts",
33
34
  "lint": "eslint . --fix",
34
35
  "test": "jest",
35
36
  "test:watch": "jest --watch"
@@ -48,7 +49,7 @@
48
49
  "libpg-query": "17.3.3"
49
50
  },
50
51
  "dependencies": {
51
- "@pgsql/types": "^17.4.1"
52
+ "@pgsql/types": "^17.4.2"
52
53
  },
53
- "gitHead": "254b19f220b9bca358dcb64e761c8ffc5ddf8f82"
54
+ "gitHead": "a066ec74d2840bcf75bc0f975475508180a621fc"
54
55
  }
@@ -2,4 +2,19 @@ export declare class QuoteUtils {
2
2
  static needsQuotes(value: string): boolean;
3
3
  static quote(value: any): any;
4
4
  static escape(literal: string): string;
5
+ /**
6
+ * Escapes a string value for use in E-prefixed string literals
7
+ * Handles both backslashes and single quotes properly
8
+ */
9
+ static escapeEString(value: string): string;
10
+ /**
11
+ * Formats a string as an E-prefixed string literal with proper escaping
12
+ * This wraps the complete E-prefix logic including detection and formatting
13
+ */
14
+ static formatEString(value: string): string;
15
+ /**
16
+ * Determines if a string value needs E-prefix for escaped string literals
17
+ * Detects backslash escape sequences that require E-prefix in PostgreSQL
18
+ */
19
+ static needsEscapePrefix(value: string): boolean;
5
20
  }
@@ -51,5 +51,35 @@ class QuoteUtils {
51
51
  static escape(literal) {
52
52
  return `'${literal.replace(/'/g, "''")}'`;
53
53
  }
54
+ /**
55
+ * Escapes a string value for use in E-prefixed string literals
56
+ * Handles both backslashes and single quotes properly
57
+ */
58
+ static escapeEString(value) {
59
+ return value.replace(/\\/g, '\\\\').replace(/'/g, "''");
60
+ }
61
+ /**
62
+ * Formats a string as an E-prefixed string literal with proper escaping
63
+ * This wraps the complete E-prefix logic including detection and formatting
64
+ */
65
+ static formatEString(value) {
66
+ const needsEscape = QuoteUtils.needsEscapePrefix(value);
67
+ if (needsEscape) {
68
+ const escapedValue = QuoteUtils.escapeEString(value);
69
+ return `E'${escapedValue}'`;
70
+ }
71
+ else {
72
+ return QuoteUtils.escape(value);
73
+ }
74
+ }
75
+ /**
76
+ * Determines if a string value needs E-prefix for escaped string literals
77
+ * Detects backslash escape sequences that require E-prefix in PostgreSQL
78
+ */
79
+ static needsEscapePrefix(value) {
80
+ // Always use E'' if the string contains any backslashes,
81
+ // unless it's a raw \x... bytea-style literal.
82
+ return !/^\\x[0-9a-fA-F]+$/i.test(value) && value.includes('\\');
83
+ }
54
84
  }
55
85
  exports.QuoteUtils = QuoteUtils;
@@ -0,0 +1,38 @@
1
+ import { RawStmt } from '@pgsql/types';
2
+ export interface ExtractedStatement {
3
+ statement: string;
4
+ index: number;
5
+ location?: number;
6
+ length?: number;
7
+ }
8
+ export interface StatementSplitterOptions {
9
+ /** Skip validation for malformed statements */
10
+ skipValidation?: boolean;
11
+ /** Strip leading comments from extracted statements */
12
+ stripComments?: boolean;
13
+ }
14
+ /**
15
+ * Extracts a single statement from SQL using PostgreSQL's location information.
16
+ * Handles Unicode properly by using byte positions instead of character positions.
17
+ */
18
+ export declare function extractStatement(originalSQL: string, rawStmt: RawStmt, isFirst?: boolean, options?: StatementSplitterOptions): string | null;
19
+ /**
20
+ * Splits SQL text into individual statements using PostgreSQL's parser.
21
+ * Handles Unicode characters properly and provides detailed location information.
22
+ */
23
+ export declare function splitStatements(sql: string, options?: StatementSplitterOptions): Promise<ExtractedStatement[]>;
24
+ /**
25
+ * Utility to generate statement keys for fixtures
26
+ */
27
+ export declare function generateStatementKey(relativePath: string, statementIndex: number, extension?: string): string;
28
+ /**
29
+ * Test utility to compare byte vs character extraction for debugging Unicode issues
30
+ */
31
+ export declare function debugUnicodeExtraction(sql: string, rawStmt: RawStmt): {
32
+ characterBased: string;
33
+ byteBased: string;
34
+ matches: boolean;
35
+ unicodeChars: number;
36
+ byteLength: number;
37
+ charLength: number;
38
+ };
@@ -0,0 +1,131 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.extractStatement = extractStatement;
4
+ exports.splitStatements = splitStatements;
5
+ exports.generateStatementKey = generateStatementKey;
6
+ exports.debugUnicodeExtraction = debugUnicodeExtraction;
7
+ const libpg_query_1 = require("libpg-query");
8
+ /**
9
+ * Extracts a single statement from SQL using PostgreSQL's location information.
10
+ * Handles Unicode properly by using byte positions instead of character positions.
11
+ */
12
+ function extractStatement(originalSQL, rawStmt, isFirst = false, options = {}) {
13
+ let extracted = null;
14
+ // Convert string to buffer to handle byte positions correctly (for Unicode)
15
+ const sqlBuffer = Buffer.from(originalSQL, 'utf8');
16
+ if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
17
+ // Use byte positions as provided by PostgreSQL
18
+ const startByte = rawStmt.stmt_location;
19
+ const endByte = rawStmt.stmt_location + rawStmt.stmt_len;
20
+ // Extract using byte positions and convert back to string
21
+ const extractedBuffer = sqlBuffer.slice(startByte, endByte);
22
+ extracted = extractedBuffer.toString('utf8');
23
+ }
24
+ else if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len === undefined) {
25
+ // We have location but no length - extract from location to end of file
26
+ const extractedBuffer = sqlBuffer.slice(rawStmt.stmt_location);
27
+ extracted = extractedBuffer.toString('utf8');
28
+ }
29
+ else if (isFirst && rawStmt.stmt_len !== undefined) {
30
+ // For first statement when location is missing but we have length
31
+ const extractedBuffer = sqlBuffer.slice(0, rawStmt.stmt_len);
32
+ extracted = extractedBuffer.toString('utf8');
33
+ }
34
+ else if (isFirst && rawStmt.stmt_location === undefined && rawStmt.stmt_len === undefined) {
35
+ // For first statement when both location and length are missing, use entire SQL
36
+ extracted = originalSQL;
37
+ }
38
+ if (extracted && options.stripComments !== false) {
39
+ // Split into lines to handle leading whitespace and comments properly
40
+ const lines = extracted.split('\n');
41
+ let startLineIndex = 0;
42
+ // Find the first line that contains actual SQL content
43
+ for (let i = 0; i < lines.length; i++) {
44
+ const line = lines[i].trim();
45
+ // Skip empty lines and comment-only lines
46
+ if (line === '' || line.startsWith('--')) {
47
+ continue;
48
+ }
49
+ startLineIndex = i;
50
+ break;
51
+ }
52
+ // Reconstruct from the first SQL line, preserving the original indentation of that line
53
+ if (startLineIndex < lines.length) {
54
+ const resultLines = lines.slice(startLineIndex);
55
+ extracted = resultLines.join('\n').trim();
56
+ }
57
+ }
58
+ // Final validation unless skipped
59
+ if (extracted && !options.skipValidation) {
60
+ const firstLine = extracted.split('\n')[0].trim();
61
+ const firstWord = firstLine.split(/\s+/)[0].toUpperCase();
62
+ // Only check for most obvious malformed patterns at the BEGINNING
63
+ if (
64
+ // Check if it starts with truncated patterns (not just contains anywhere)
65
+ extracted.trim().startsWith('ELECT ') || // Missing S from SELECT
66
+ extracted.trim().startsWith('REATE ') || // Missing C from CREATE
67
+ extracted.trim().startsWith('NSERT ') || // Missing I from INSERT
68
+ // Completely empty or whitespace only
69
+ extracted.trim().length === 0) {
70
+ return null; // Invalid extraction, skip this statement
71
+ }
72
+ }
73
+ return extracted;
74
+ }
75
+ /**
76
+ * Splits SQL text into individual statements using PostgreSQL's parser.
77
+ * Handles Unicode characters properly and provides detailed location information.
78
+ */
79
+ async function splitStatements(sql, options = {}) {
80
+ const parseResult = await (0, libpg_query_1.parse)(sql);
81
+ const statements = [];
82
+ if (!parseResult.stmts) {
83
+ return statements;
84
+ }
85
+ for (let idx = 0; idx < parseResult.stmts.length; idx++) {
86
+ const stmt = parseResult.stmts[idx];
87
+ const extracted = extractStatement(sql, stmt, idx === 0, options);
88
+ if (extracted) {
89
+ statements.push({
90
+ statement: extracted,
91
+ index: idx,
92
+ location: stmt.stmt_location,
93
+ length: stmt.stmt_len
94
+ });
95
+ }
96
+ }
97
+ return statements;
98
+ }
99
+ /**
100
+ * Utility to generate statement keys for fixtures
101
+ */
102
+ function generateStatementKey(relativePath, statementIndex, extension = 'sql') {
103
+ return `${relativePath.replace(/\.sql$/, '')}-${statementIndex + 1}.${extension}`;
104
+ }
105
+ /**
106
+ * Test utility to compare byte vs character extraction for debugging Unicode issues
107
+ */
108
+ function debugUnicodeExtraction(sql, rawStmt) {
109
+ const charLength = sql.length;
110
+ const byteLength = Buffer.from(sql, 'utf8').length;
111
+ // Character-based extraction (old way)
112
+ let characterBased = '';
113
+ if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
114
+ characterBased = sql.substring(rawStmt.stmt_location, rawStmt.stmt_location + rawStmt.stmt_len);
115
+ }
116
+ // Byte-based extraction (new way)
117
+ let byteBased = '';
118
+ if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
119
+ const sqlBuffer = Buffer.from(sql, 'utf8');
120
+ const extractedBuffer = sqlBuffer.slice(rawStmt.stmt_location, rawStmt.stmt_location + rawStmt.stmt_len);
121
+ byteBased = extractedBuffer.toString('utf8');
122
+ }
123
+ return {
124
+ characterBased,
125
+ byteBased,
126
+ matches: characterBased === byteBased,
127
+ unicodeChars: byteLength - charLength,
128
+ byteLength,
129
+ charLength
130
+ };
131
+ }