duckdb 0.6.2-dev1030.0 → 0.6.2-dev1040.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -877,7 +877,20 @@ struct IntegerCastOperation {
|
|
|
877
877
|
|
|
878
878
|
template <class T, bool NEGATIVE, bool ALLOW_EXPONENT, class OP = IntegerCastOperation>
|
|
879
879
|
static bool IntegerCastLoop(const char *buf, idx_t len, T &result, bool strict) {
|
|
880
|
-
idx_t start_pos
|
|
880
|
+
idx_t start_pos;
|
|
881
|
+
if (NEGATIVE) {
|
|
882
|
+
start_pos = 1;
|
|
883
|
+
} else {
|
|
884
|
+
if (*buf == '+') {
|
|
885
|
+
if (strict) {
|
|
886
|
+
// leading plus is not allowed in strict mode
|
|
887
|
+
return false;
|
|
888
|
+
}
|
|
889
|
+
start_pos = 1;
|
|
890
|
+
} else {
|
|
891
|
+
start_pos = 0;
|
|
892
|
+
}
|
|
893
|
+
}
|
|
881
894
|
idx_t pos = start_pos;
|
|
882
895
|
while (pos < len) {
|
|
883
896
|
if (!StringUtil::CharacterIsDigit(buf[pos])) {
|
|
@@ -1168,6 +1181,10 @@ static bool TryDoubleCast(const char *buf, idx_t len, T &result, bool strict) {
|
|
|
1168
1181
|
return false;
|
|
1169
1182
|
}
|
|
1170
1183
|
if (*buf == '+') {
|
|
1184
|
+
if (strict) {
|
|
1185
|
+
// plus is not allowed in strict mode
|
|
1186
|
+
return false;
|
|
1187
|
+
}
|
|
1171
1188
|
buf++;
|
|
1172
1189
|
len--;
|
|
1173
1190
|
}
|
|
@@ -116,7 +116,7 @@ static bool SplitStringifiedListInternal(const string_t &input, OP &state) {
|
|
|
116
116
|
if (!SkipToClose(pos, buf, len, ++lvl, ']')) {
|
|
117
117
|
return false;
|
|
118
118
|
}
|
|
119
|
-
} else if (buf[pos] == '"' || buf[pos] == '\'') {
|
|
119
|
+
} else if ((buf[pos] == '"' || buf[pos] == '\'') && pos == start_pos) {
|
|
120
120
|
SkipToCloseQuotes(pos, buf, len);
|
|
121
121
|
} else if (buf[pos] == '{') {
|
|
122
122
|
idx_t struct_lvl = 0;
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
|
2
|
-
#define DUCKDB_VERSION "0.6.2-
|
|
2
|
+
#define DUCKDB_VERSION "0.6.2-dev1040"
|
|
3
3
|
#endif
|
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
|
5
|
+
#define DUCKDB_SOURCE_ID "6fab192961"
|
|
6
6
|
#endif
|
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
|
8
8
|
#include "duckdb/main/database.hpp"
|
|
@@ -19,41 +19,158 @@ namespace duckdb {
|
|
|
19
19
|
Parser::Parser(ParserOptions options_p) : options(options_p) {
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
+
struct UnicodeSpace {
|
|
23
|
+
UnicodeSpace(idx_t pos, idx_t bytes) : pos(pos), bytes(bytes) {
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
idx_t pos;
|
|
27
|
+
idx_t bytes;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
static bool ReplaceUnicodeSpaces(const string &query, string &new_query, vector<UnicodeSpace> &unicode_spaces) {
|
|
31
|
+
if (unicode_spaces.empty()) {
|
|
32
|
+
// no unicode spaces found
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
idx_t prev = 0;
|
|
36
|
+
for (auto &usp : unicode_spaces) {
|
|
37
|
+
new_query += query.substr(prev, usp.pos - prev);
|
|
38
|
+
new_query += " ";
|
|
39
|
+
prev = usp.pos + usp.bytes;
|
|
40
|
+
}
|
|
41
|
+
new_query += query.substr(prev, query.size() - prev);
|
|
42
|
+
return true;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// This function strips unicode space characters from the query and replaces them with regular spaces
|
|
46
|
+
// It returns true if any unicode space characters were found and stripped
|
|
47
|
+
// See here for a list of unicode space characters - https://jkorpela.fi/chars/spaces.html
|
|
48
|
+
static bool StripUnicodeSpaces(const string &query_str, string &new_query) {
|
|
49
|
+
const idx_t NBSP_LEN = 2;
|
|
50
|
+
const idx_t USP_LEN = 3;
|
|
51
|
+
idx_t pos = 0;
|
|
52
|
+
unsigned char quote;
|
|
53
|
+
vector<UnicodeSpace> unicode_spaces;
|
|
54
|
+
auto query = (unsigned char *)query_str.c_str();
|
|
55
|
+
auto qsize = query_str.size();
|
|
56
|
+
|
|
57
|
+
regular:
|
|
58
|
+
for (; pos + 2 < qsize; pos++) {
|
|
59
|
+
if (query[pos] == 0xC2) {
|
|
60
|
+
if (query[pos + 1] == 0xA0) {
|
|
61
|
+
// U+00A0 - C2A0
|
|
62
|
+
unicode_spaces.emplace_back(pos, NBSP_LEN);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
if (query[pos] == 0xE2) {
|
|
66
|
+
if (query[pos + 1] == 0x80) {
|
|
67
|
+
if (query[pos + 2] >= 0x80 && query[pos + 2] <= 0x8B) {
|
|
68
|
+
// U+2000 to U+200B
|
|
69
|
+
// E28080 - E2808B
|
|
70
|
+
unicode_spaces.emplace_back(pos, USP_LEN);
|
|
71
|
+
} else if (query[pos + 2] == 0xAF) {
|
|
72
|
+
// U+202F - E280AF
|
|
73
|
+
unicode_spaces.emplace_back(pos, USP_LEN);
|
|
74
|
+
}
|
|
75
|
+
} else if (query[pos + 1] == 0x81) {
|
|
76
|
+
if (query[pos + 2] == 0x9F) {
|
|
77
|
+
// U+205F - E2819f
|
|
78
|
+
unicode_spaces.emplace_back(pos, USP_LEN);
|
|
79
|
+
} else if (query[pos + 2] == 0xA0) {
|
|
80
|
+
// U+2060 - E281A0
|
|
81
|
+
unicode_spaces.emplace_back(pos, USP_LEN);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
} else if (query[pos] == 0xE3) {
|
|
85
|
+
if (query[pos + 1] == 0x80 && query[pos + 2] == 0x80) {
|
|
86
|
+
// U+3000 - E38080
|
|
87
|
+
unicode_spaces.emplace_back(pos, USP_LEN);
|
|
88
|
+
}
|
|
89
|
+
} else if (query[pos] == 0xEF) {
|
|
90
|
+
if (query[pos + 1] == 0xBB && query[pos + 2] == 0xBF) {
|
|
91
|
+
// U+FEFF - EFBBBF
|
|
92
|
+
unicode_spaces.emplace_back(pos, USP_LEN);
|
|
93
|
+
}
|
|
94
|
+
} else if (query[pos] == '"' || query[pos] == '\'') {
|
|
95
|
+
quote = query[pos];
|
|
96
|
+
pos++;
|
|
97
|
+
goto in_quotes;
|
|
98
|
+
} else if (query[pos] == '-' && query[pos + 1] == '-') {
|
|
99
|
+
goto in_comment;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
goto end;
|
|
103
|
+
in_quotes:
|
|
104
|
+
for (; pos + 1 < qsize; pos++) {
|
|
105
|
+
if (query[pos] == quote) {
|
|
106
|
+
if (query[pos + 1] == quote) {
|
|
107
|
+
// escaped quote
|
|
108
|
+
pos++;
|
|
109
|
+
continue;
|
|
110
|
+
}
|
|
111
|
+
pos++;
|
|
112
|
+
goto regular;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
goto end;
|
|
116
|
+
in_comment:
|
|
117
|
+
for (; pos < qsize; pos++) {
|
|
118
|
+
if (query[pos] == '\n' || query[pos] == '\r') {
|
|
119
|
+
goto regular;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
goto end;
|
|
123
|
+
end:
|
|
124
|
+
return ReplaceUnicodeSpaces(query_str, new_query, unicode_spaces);
|
|
125
|
+
}
|
|
126
|
+
|
|
22
127
|
void Parser::ParseQuery(const string &query) {
|
|
23
128
|
Transformer transformer(options.max_expression_depth);
|
|
129
|
+
string parser_error;
|
|
130
|
+
{
|
|
131
|
+
// check if there are any unicode spaces in the string
|
|
132
|
+
string new_query;
|
|
133
|
+
if (StripUnicodeSpaces(query, new_query)) {
|
|
134
|
+
// there are - strip the unicode spaces and re-run the query
|
|
135
|
+
ParseQuery(new_query);
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
24
139
|
{
|
|
25
140
|
PostgresParser::SetPreserveIdentifierCase(options.preserve_identifier_case);
|
|
26
141
|
PostgresParser parser;
|
|
27
142
|
parser.Parse(query);
|
|
143
|
+
if (parser.success) {
|
|
144
|
+
if (!parser.parse_tree) {
|
|
145
|
+
// empty statement
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
28
148
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
149
|
+
// if it succeeded, we transform the Postgres parse tree into a list of
|
|
150
|
+
// SQLStatements
|
|
151
|
+
transformer.TransformParseTree(parser.parse_tree, statements);
|
|
152
|
+
} else {
|
|
153
|
+
parser_error = QueryErrorContext::Format(query, parser.error_message, parser.error_location - 1);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
if (!parser_error.empty()) {
|
|
157
|
+
if (options.extensions) {
|
|
158
|
+
for (auto &ext : *options.extensions) {
|
|
159
|
+
D_ASSERT(ext.parse_function);
|
|
160
|
+
auto result = ext.parse_function(ext.parser_info.get(), query);
|
|
161
|
+
if (result.type == ParserExtensionResultType::PARSE_SUCCESSFUL) {
|
|
162
|
+
auto statement = make_unique<ExtensionStatement>(ext, move(result.parse_data));
|
|
163
|
+
statement->stmt_length = query.size();
|
|
164
|
+
statement->stmt_location = 0;
|
|
165
|
+
statements.push_back(move(statement));
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
if (result.type == ParserExtensionResultType::DISPLAY_EXTENSION_ERROR) {
|
|
169
|
+
throw ParserException(result.error);
|
|
44
170
|
}
|
|
45
171
|
}
|
|
46
|
-
throw ParserException(QueryErrorContext::Format(query, parser.error_message, parser.error_location - 1));
|
|
47
172
|
}
|
|
48
|
-
|
|
49
|
-
if (!parser.parse_tree) {
|
|
50
|
-
// empty statement
|
|
51
|
-
return;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
// if it succeeded, we transform the Postgres parse tree into a list of
|
|
55
|
-
// SQLStatements
|
|
56
|
-
transformer.TransformParseTree(parser.parse_tree, statements);
|
|
173
|
+
throw ParserException(parser_error);
|
|
57
174
|
}
|
|
58
175
|
if (!statements.empty()) {
|
|
59
176
|
auto &last_statement = statements.back();
|