duckdb 0.6.2-dev1030.0 → 0.6.2-dev1040.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.6.2-dev1030.0",
5
+ "version": "0.6.2-dev1040.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -877,7 +877,20 @@ struct IntegerCastOperation {
877
877
 
878
878
  template <class T, bool NEGATIVE, bool ALLOW_EXPONENT, class OP = IntegerCastOperation>
879
879
  static bool IntegerCastLoop(const char *buf, idx_t len, T &result, bool strict) {
880
- idx_t start_pos = NEGATIVE || *buf == '+' ? 1 : 0;
880
+ idx_t start_pos;
881
+ if (NEGATIVE) {
882
+ start_pos = 1;
883
+ } else {
884
+ if (*buf == '+') {
885
+ if (strict) {
886
+ // leading plus is not allowed in strict mode
887
+ return false;
888
+ }
889
+ start_pos = 1;
890
+ } else {
891
+ start_pos = 0;
892
+ }
893
+ }
881
894
  idx_t pos = start_pos;
882
895
  while (pos < len) {
883
896
  if (!StringUtil::CharacterIsDigit(buf[pos])) {
@@ -1168,6 +1181,10 @@ static bool TryDoubleCast(const char *buf, idx_t len, T &result, bool strict) {
1168
1181
  return false;
1169
1182
  }
1170
1183
  if (*buf == '+') {
1184
+ if (strict) {
1185
+ // plus is not allowed in strict mode
1186
+ return false;
1187
+ }
1171
1188
  buf++;
1172
1189
  len--;
1173
1190
  }
@@ -116,7 +116,7 @@ static bool SplitStringifiedListInternal(const string_t &input, OP &state) {
116
116
  if (!SkipToClose(pos, buf, len, ++lvl, ']')) {
117
117
  return false;
118
118
  }
119
- } else if (buf[pos] == '"' || buf[pos] == '\'') {
119
+ } else if ((buf[pos] == '"' || buf[pos] == '\'') && pos == start_pos) {
120
120
  SkipToCloseQuotes(pos, buf, len);
121
121
  } else if (buf[pos] == '{') {
122
122
  idx_t struct_lvl = 0;
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.6.2-dev1030"
2
+ #define DUCKDB_VERSION "0.6.2-dev1040"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "27846005c0"
5
+ #define DUCKDB_SOURCE_ID "6fab192961"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -19,41 +19,158 @@ namespace duckdb {
19
19
  Parser::Parser(ParserOptions options_p) : options(options_p) {
20
20
  }
21
21
 
22
+ struct UnicodeSpace {
23
+ UnicodeSpace(idx_t pos, idx_t bytes) : pos(pos), bytes(bytes) {
24
+ }
25
+
26
+ idx_t pos;
27
+ idx_t bytes;
28
+ };
29
+
30
+ static bool ReplaceUnicodeSpaces(const string &query, string &new_query, vector<UnicodeSpace> &unicode_spaces) {
31
+ if (unicode_spaces.empty()) {
32
+ // no unicode spaces found
33
+ return false;
34
+ }
35
+ idx_t prev = 0;
36
+ for (auto &usp : unicode_spaces) {
37
+ new_query += query.substr(prev, usp.pos - prev);
38
+ new_query += " ";
39
+ prev = usp.pos + usp.bytes;
40
+ }
41
+ new_query += query.substr(prev, query.size() - prev);
42
+ return true;
43
+ }
44
+
45
+ // This function strips unicode space characters from the query and replaces them with regular spaces
46
+ // It returns true if any unicode space characters were found and stripped
47
+ // See here for a list of unicode space characters - https://jkorpela.fi/chars/spaces.html
48
+ static bool StripUnicodeSpaces(const string &query_str, string &new_query) {
49
+ const idx_t NBSP_LEN = 2;
50
+ const idx_t USP_LEN = 3;
51
+ idx_t pos = 0;
52
+ unsigned char quote;
53
+ vector<UnicodeSpace> unicode_spaces;
54
+ auto query = (unsigned char *)query_str.c_str();
55
+ auto qsize = query_str.size();
56
+
57
+ regular:
58
+ for (; pos + 2 < qsize; pos++) {
59
+ if (query[pos] == 0xC2) {
60
+ if (query[pos + 1] == 0xA0) {
61
+ // U+00A0 - C2A0
62
+ unicode_spaces.emplace_back(pos, NBSP_LEN);
63
+ }
64
+ }
65
+ if (query[pos] == 0xE2) {
66
+ if (query[pos + 1] == 0x80) {
67
+ if (query[pos + 2] >= 0x80 && query[pos + 2] <= 0x8B) {
68
+ // U+2000 to U+200B
69
+ // E28080 - E2808B
70
+ unicode_spaces.emplace_back(pos, USP_LEN);
71
+ } else if (query[pos + 2] == 0xAF) {
72
+ // U+202F - E280AF
73
+ unicode_spaces.emplace_back(pos, USP_LEN);
74
+ }
75
+ } else if (query[pos + 1] == 0x81) {
76
+ if (query[pos + 2] == 0x9F) {
77
+ // U+205F - E2819f
78
+ unicode_spaces.emplace_back(pos, USP_LEN);
79
+ } else if (query[pos + 2] == 0xA0) {
80
+ // U+2060 - E281A0
81
+ unicode_spaces.emplace_back(pos, USP_LEN);
82
+ }
83
+ }
84
+ } else if (query[pos] == 0xE3) {
85
+ if (query[pos + 1] == 0x80 && query[pos + 2] == 0x80) {
86
+ // U+3000 - E38080
87
+ unicode_spaces.emplace_back(pos, USP_LEN);
88
+ }
89
+ } else if (query[pos] == 0xEF) {
90
+ if (query[pos + 1] == 0xBB && query[pos + 2] == 0xBF) {
91
+ // U+FEFF - EFBBBF
92
+ unicode_spaces.emplace_back(pos, USP_LEN);
93
+ }
94
+ } else if (query[pos] == '"' || query[pos] == '\'') {
95
+ quote = query[pos];
96
+ pos++;
97
+ goto in_quotes;
98
+ } else if (query[pos] == '-' && query[pos + 1] == '-') {
99
+ goto in_comment;
100
+ }
101
+ }
102
+ goto end;
103
+ in_quotes:
104
+ for (; pos + 1 < qsize; pos++) {
105
+ if (query[pos] == quote) {
106
+ if (query[pos + 1] == quote) {
107
+ // escaped quote
108
+ pos++;
109
+ continue;
110
+ }
111
+ pos++;
112
+ goto regular;
113
+ }
114
+ }
115
+ goto end;
116
+ in_comment:
117
+ for (; pos < qsize; pos++) {
118
+ if (query[pos] == '\n' || query[pos] == '\r') {
119
+ goto regular;
120
+ }
121
+ }
122
+ goto end;
123
+ end:
124
+ return ReplaceUnicodeSpaces(query_str, new_query, unicode_spaces);
125
+ }
126
+
22
127
  void Parser::ParseQuery(const string &query) {
23
128
  Transformer transformer(options.max_expression_depth);
129
+ string parser_error;
130
+ {
131
+ // check if there are any unicode spaces in the string
132
+ string new_query;
133
+ if (StripUnicodeSpaces(query, new_query)) {
134
+ // there are - strip the unicode spaces and re-run the query
135
+ ParseQuery(new_query);
136
+ return;
137
+ }
138
+ }
24
139
  {
25
140
  PostgresParser::SetPreserveIdentifierCase(options.preserve_identifier_case);
26
141
  PostgresParser parser;
27
142
  parser.Parse(query);
143
+ if (parser.success) {
144
+ if (!parser.parse_tree) {
145
+ // empty statement
146
+ return;
147
+ }
28
148
 
29
- if (!parser.success) {
30
- if (options.extensions) {
31
- for (auto &ext : *options.extensions) {
32
- D_ASSERT(ext.parse_function);
33
- auto result = ext.parse_function(ext.parser_info.get(), query);
34
- if (result.type == ParserExtensionResultType::PARSE_SUCCESSFUL) {
35
- auto statement = make_unique<ExtensionStatement>(ext, move(result.parse_data));
36
- statement->stmt_length = query.size();
37
- statement->stmt_location = 0;
38
- statements.push_back(move(statement));
39
- return;
40
- }
41
- if (result.type == ParserExtensionResultType::DISPLAY_EXTENSION_ERROR) {
42
- throw ParserException(result.error);
43
- }
149
+ // if it succeeded, we transform the Postgres parse tree into a list of
150
+ // SQLStatements
151
+ transformer.TransformParseTree(parser.parse_tree, statements);
152
+ } else {
153
+ parser_error = QueryErrorContext::Format(query, parser.error_message, parser.error_location - 1);
154
+ }
155
+ }
156
+ if (!parser_error.empty()) {
157
+ if (options.extensions) {
158
+ for (auto &ext : *options.extensions) {
159
+ D_ASSERT(ext.parse_function);
160
+ auto result = ext.parse_function(ext.parser_info.get(), query);
161
+ if (result.type == ParserExtensionResultType::PARSE_SUCCESSFUL) {
162
+ auto statement = make_unique<ExtensionStatement>(ext, move(result.parse_data));
163
+ statement->stmt_length = query.size();
164
+ statement->stmt_location = 0;
165
+ statements.push_back(move(statement));
166
+ return;
167
+ }
168
+ if (result.type == ParserExtensionResultType::DISPLAY_EXTENSION_ERROR) {
169
+ throw ParserException(result.error);
44
170
  }
45
171
  }
46
- throw ParserException(QueryErrorContext::Format(query, parser.error_message, parser.error_location - 1));
47
172
  }
48
-
49
- if (!parser.parse_tree) {
50
- // empty statement
51
- return;
52
- }
53
-
54
- // if it succeeded, we transform the Postgres parse tree into a list of
55
- // SQLStatements
56
- transformer.TransformParseTree(parser.parse_tree, statements);
173
+ throw ParserException(parser_error);
57
174
  }
58
175
  if (!statements.empty()) {
59
176
  auto &last_statement = statements.back();