duckdb 0.9.2-dev9.0 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- package/.github/workflows/HighPriorityIssues.yml +36 -0
- package/.github/workflows/NodeJS.yml +18 -26
- package/README.md +2 -2
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-timebucket.cpp +7 -10
- package/src/duckdb/extension/icu/icu-timezone.cpp +3 -0
- package/src/duckdb/extension/json/buffered_json_reader.cpp +11 -18
- package/src/duckdb/extension/json/json_scan.cpp +10 -5
- package/src/duckdb/extension/parquet/parquet_extension.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +6 -12
- package/src/duckdb/src/catalog/catalog_set.cpp +3 -4
- package/src/duckdb/src/common/arrow/appender/union_data.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +17 -8
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +5 -1
- package/src/duckdb/src/common/enum_util.cpp +67 -0
- package/src/duckdb/src/common/file_system.cpp +5 -1
- package/src/duckdb/src/common/hive_partitioning.cpp +6 -3
- package/src/duckdb/src/common/multi_file_reader.cpp +4 -2
- package/src/duckdb/src/common/types/list_segment.cpp +4 -0
- package/src/duckdb/src/common/types/vector.cpp +66 -34
- package/src/duckdb/src/common/types.cpp +3 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +84 -25
- package/src/duckdb/src/core_functions/function_list.cpp +2 -1
- package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +8 -1
- package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +23 -0
- package/src/duckdb/src/core_functions/scalar/string/jaccard.cpp +16 -23
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +27 -18
- package/src/duckdb/src/execution/index/art/art_key.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +5 -2
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +4 -3
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +25 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +5 -2
- package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +0 -1
- package/src/duckdb/src/execution/physical_operator.cpp +2 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +5 -0
- package/src/duckdb/src/execution/window_executor.cpp +13 -1
- package/src/duckdb/src/function/cast/union/from_struct.cpp +24 -7
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +1 -1
- package/src/duckdb/src/function/function_set.cpp +1 -1
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
- package/src/duckdb/src/function/scalar/string/concat.cpp +4 -1
- package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +32 -0
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +46 -2
- package/src/duckdb/src/function/table/arrow.cpp +19 -17
- package/src/duckdb/src/function/table/arrow_conversion.cpp +67 -31
- package/src/duckdb/src/function/table/copy_csv.cpp +3 -3
- package/src/duckdb/src/function/table/system/pragma_user_agent.cpp +50 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +8 -2
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +74 -4
- package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +82 -3
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +19 -9
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +16 -0
- package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +6 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/pipe_file_system.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +14 -2
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +6 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_enum.hpp +21 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +5 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +6 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +6 -0
- package/src/duckdb/src/include/duckdb/function/copy_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +3 -1
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +37 -2
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/function/udf_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +5 -4
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +12 -0
- package/src/duckdb/src/include/duckdb/main/settings.hpp +18 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule.hpp +0 -2
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +8 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/statement/create_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +5 -7
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +0 -1
- package/src/duckdb/src/include/duckdb.h +1 -1
- package/src/duckdb/src/main/capi/config-c.cpp +1 -0
- package/src/duckdb/src/main/capi/duckdb-c.cpp +9 -1
- package/src/duckdb/src/main/config.cpp +18 -0
- package/src/duckdb/src/main/database.cpp +1 -0
- package/src/duckdb/src/main/extension/extension_alias.cpp +2 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +5 -4
- package/src/duckdb/src/main/settings/settings.cpp +49 -0
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +0 -8
- package/src/duckdb/src/optimizer/filter_combiner.cpp +37 -23
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +7 -4
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +5 -4
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +15 -4
- package/src/duckdb/src/parallel/pipeline_executor.cpp +81 -40
- package/src/duckdb/src/parser/parsed_data/create_view_info.cpp +27 -0
- package/src/duckdb/src/parser/statement/create_statement.cpp +4 -0
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +16 -3
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +7 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +3 -2
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +3 -0
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +76 -2
- package/src/duckdb/src/storage/data_table.cpp +7 -1
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +14 -0
- package/src/duckdb/src/storage/storage_info.cpp +2 -1
- package/src/duckdb/src/storage/table/row_version_manager.cpp +5 -3
- package/src/duckdb/src/transaction/commit_state.cpp +1 -0
- package/src/duckdb/third_party/parquet/parquet_types.cpp +224 -221
- package/src/duckdb/third_party/parquet/parquet_types.h +0 -14
- package/src/duckdb/ub_src_common_arrow_appender.cpp +0 -4
- package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
- package/src/duckdb/ub_src_function_table_system.cpp +2 -0
- package/test/columns.test.ts +1 -1
@@ -0,0 +1,36 @@
|
|
1
|
+
name: Create Internal issue when the "High Priority" label is applied
|
2
|
+
on:
|
3
|
+
issues:
|
4
|
+
types:
|
5
|
+
- labeled
|
6
|
+
|
7
|
+
env:
|
8
|
+
GH_TOKEN: ${{ secrets.DUCKDBLABS_BOT_TOKEN }}
|
9
|
+
# an event triggering this workflow is either an issue or a pull request,
|
10
|
+
# hence only one of the numbers will be filled in the TITLE_PREFIX
|
11
|
+
TITLE_PREFIX: "[duckdb-node/#${{ github.event.issue.number }}]"
|
12
|
+
PUBLIC_ISSUE_TITLE: ${{ github.event.issue.title }}
|
13
|
+
|
14
|
+
jobs:
|
15
|
+
create_or_label_issue:
|
16
|
+
if: github.event.label.name == 'High Priority'
|
17
|
+
runs-on: ubuntu-latest
|
18
|
+
steps:
|
19
|
+
- name: Get mirror issue number
|
20
|
+
run: |
|
21
|
+
gh issue list --repo duckdblabs/duckdb-internal --search "${TITLE_PREFIX}" --json title,number --jq ".[] | select(.title | startswith(\"$TITLE_PREFIX\")).number" > mirror_issue_number.txt
|
22
|
+
echo "MIRROR_ISSUE_NUMBER=$(cat mirror_issue_number.txt)" >> $GITHUB_ENV
|
23
|
+
|
24
|
+
- name: Print whether mirror issue exists
|
25
|
+
run: |
|
26
|
+
if [ "$MIRROR_ISSUE_NUMBER" == "" ]; then
|
27
|
+
echo "Mirror issue with title prefix '$TITLE_PREFIX' does not exist yet"
|
28
|
+
else
|
29
|
+
echo "Mirror issue with title prefix '$TITLE_PREFIX' exists with number $MIRROR_ISSUE_NUMBER"
|
30
|
+
fi
|
31
|
+
|
32
|
+
- name: Create or label issue
|
33
|
+
run: |
|
34
|
+
if [ "$MIRROR_ISSUE_NUMBER" == "" ]; then
|
35
|
+
gh issue create --repo duckdblabs/duckdb-internal --label "Node.js" --label "High Priority" --title "$TITLE_PREFIX - $PUBLIC_ISSUE_TITLE" --body "See https://github.com/duckdb/duckdb-node/issues/${{ github.event.issue.number }}"
|
36
|
+
fi
|
@@ -2,6 +2,8 @@ name: NodeJS
|
|
2
2
|
on:
|
3
3
|
push:
|
4
4
|
pull_request:
|
5
|
+
workflow_dispatch:
|
6
|
+
repository_dispatch:
|
5
7
|
|
6
8
|
concurrency:
|
7
9
|
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
|
@@ -34,55 +36,36 @@ jobs:
|
|
34
36
|
name: node.js Linux
|
35
37
|
runs-on: ubuntu-20.04
|
36
38
|
needs: set-up-npm
|
39
|
+
continue-on-error: ${{ matrix.node != '18' && matrix.node != '20' && matrix.node != '21' }}
|
37
40
|
env:
|
38
41
|
TARGET_ARCH: ${{ matrix.target_arch }}
|
39
42
|
DUCKDB_NODE_BUILD_CACHE: 0
|
40
43
|
strategy:
|
41
44
|
matrix:
|
42
45
|
# node.js current support policy to be found at https://github.com/duckdb/duckdb-node/tree/main/#Supported-Node-versions
|
43
|
-
node: [ '12', '14', '16', '17', '18', '19', '20' ]
|
46
|
+
node: [ '12', '14', '16', '17', '18', '19', '20', '21']
|
44
47
|
target_arch: [ x64, arm64 ]
|
45
48
|
isRelease:
|
46
49
|
- ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
|
47
50
|
exclude:
|
48
51
|
- isRelease: false
|
49
52
|
node: 12
|
50
|
-
target_arch: x64
|
51
53
|
- isRelease: false
|
52
54
|
node: 14
|
53
|
-
target_arch: x64
|
54
55
|
- isRelease: false
|
55
56
|
node: 16
|
56
|
-
target_arch: x64
|
57
57
|
- isRelease: false
|
58
58
|
node: 17
|
59
|
-
target_arch: x64
|
60
|
-
- isRelease: false
|
61
|
-
node: 18
|
62
|
-
target_arch: x64
|
63
59
|
- isRelease: false
|
64
60
|
node: 19
|
65
|
-
target_arch: x64
|
66
|
-
- isRelease: false
|
67
|
-
node: 12
|
68
|
-
target_arch: arm64
|
69
|
-
- isRelease: false
|
70
|
-
node: 14
|
71
|
-
target_arch: arm64
|
72
|
-
- isRelease: false
|
73
|
-
node: 16
|
74
|
-
target_arch: arm64
|
75
|
-
- isRelease: false
|
76
|
-
node: 17
|
77
|
-
target_arch: arm64
|
78
61
|
- isRelease: false
|
79
62
|
node: 18
|
80
63
|
target_arch: arm64
|
81
64
|
- isRelease: false
|
82
|
-
node:
|
65
|
+
node: 20
|
83
66
|
target_arch: arm64
|
84
67
|
- isRelease: false
|
85
|
-
node:
|
68
|
+
node: 21
|
86
69
|
target_arch: arm64
|
87
70
|
|
88
71
|
steps:
|
@@ -127,10 +110,11 @@ jobs:
|
|
127
110
|
name: node.js OSX
|
128
111
|
runs-on: macos-latest
|
129
112
|
needs: linux-nodejs
|
113
|
+
continue-on-error: ${{ matrix.node != '18' && matrix.node != '20' && matrix.node != '21' }}
|
130
114
|
strategy:
|
131
115
|
matrix:
|
132
116
|
target_arch: [ x64, arm64 ]
|
133
|
-
node: [ '12', '14', '16', '17', '18', '19', '20' ]
|
117
|
+
node: [ '12', '14', '16', '17', '18', '19', '20', '21']
|
134
118
|
isRelease:
|
135
119
|
- ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
|
136
120
|
exclude:
|
@@ -143,7 +127,7 @@ jobs:
|
|
143
127
|
- isRelease: false
|
144
128
|
node: 17
|
145
129
|
- isRelease: false
|
146
|
-
node:
|
130
|
+
node: 19
|
147
131
|
- target_arch: arm64
|
148
132
|
node: 12
|
149
133
|
- target_arch: arm64
|
@@ -158,6 +142,11 @@ jobs:
|
|
158
142
|
with:
|
159
143
|
fetch-depth: 0
|
160
144
|
|
145
|
+
# Default Python (3.12) doesn't have support for distutils
|
146
|
+
- uses: actions/setup-python@v4
|
147
|
+
with:
|
148
|
+
python-version: '3.11'
|
149
|
+
|
161
150
|
- name: Setup Ccache
|
162
151
|
uses: hendrikmuhs/ccache-action@main
|
163
152
|
with:
|
@@ -184,12 +173,13 @@ jobs:
|
|
184
173
|
name: node.js Windows
|
185
174
|
runs-on: windows-latest
|
186
175
|
needs: linux-nodejs
|
176
|
+
continue-on-error: ${{ matrix.node != '18' && matrix.node != '20' && matrix.node != '21' }}
|
187
177
|
env:
|
188
178
|
npm_config_msvs_version: 2019
|
189
179
|
|
190
180
|
strategy:
|
191
181
|
matrix:
|
192
|
-
node: [ '12', '14', '16', '17', '18', '19', '20' ]
|
182
|
+
node: [ '12', '14', '16', '17', '18', '19', '20', '21']
|
193
183
|
isRelease:
|
194
184
|
- ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
|
195
185
|
exclude:
|
@@ -205,6 +195,8 @@ jobs:
|
|
205
195
|
node: 18
|
206
196
|
- isRelease: false
|
207
197
|
node: 19
|
198
|
+
- isRelease: false
|
199
|
+
node: 20
|
208
200
|
|
209
201
|
steps:
|
210
202
|
- uses: actions/setup-python@v4
|
package/README.md
CHANGED
@@ -101,10 +101,10 @@ var stmt = con.prepare('select ?::INTEGER as fortytwo', function(err, stmt) {
|
|
101
101
|
```
|
102
102
|
|
103
103
|
## Supported Node versions
|
104
|
-
We actively support only LTS and In-Support Node versions, as per July 2023, they are: Node
|
104
|
+
We actively support only LTS and In-Support Node versions, as per July 2023, they are: Node 18, Node 20 and Node 21.
|
105
105
|
Release schedule for Node.js can be checked here: https://github.com/nodejs/release#release-schedule.
|
106
106
|
|
107
|
-
We currently bundle and test DuckDB also for Node 10, 12, 14, 17 and 19. We plan of going so going forward as long as the tooling supports it.
|
107
|
+
We currently bundle and test DuckDB also for Node 10, 12, 14, 16, 17 and 19. We plan of going so going forward as long as the tooling supports it.
|
108
108
|
As per July 2023, Node 15 has been removed from the supported versions.
|
109
109
|
|
110
110
|
## Development
|
package/package.json
CHANGED
@@ -76,24 +76,21 @@ struct ICUTimeBucket : public ICUDateFunc {
|
|
76
76
|
|
77
77
|
static inline timestamp_t WidthConvertibleToDaysCommon(int32_t bucket_width_days, const timestamp_t ts,
|
78
78
|
const timestamp_t origin, icu::Calendar *calendar) {
|
79
|
-
const auto trunc_days = TruncationFactory(DatePartSpecifier::DAY);
|
80
79
|
const auto sub_days = SubtractFactory(DatePartSpecifier::DAY);
|
81
80
|
|
82
|
-
|
83
|
-
trunc_days(calendar, tmp_micros);
|
84
|
-
timestamp_t truncated_ts = GetTimeUnsafe(calendar, tmp_micros);
|
85
|
-
|
86
|
-
int64_t ts_days = sub_days(calendar, origin, truncated_ts);
|
81
|
+
int64_t ts_days = sub_days(calendar, origin, ts);
|
87
82
|
int64_t result_days = (ts_days / bucket_width_days) * bucket_width_days;
|
88
83
|
if (result_days < NumericLimits<int32_t>::Minimum() || result_days > NumericLimits<int32_t>::Maximum()) {
|
89
84
|
throw OutOfRangeException("Timestamp out of range");
|
90
85
|
}
|
91
|
-
|
92
|
-
|
93
|
-
|
86
|
+
timestamp_t bucket = Add(calendar, origin, interval_t {0, static_cast<int32_t>(result_days), 0});
|
87
|
+
if (ts < bucket) {
|
88
|
+
D_ASSERT(ts < origin);
|
89
|
+
bucket = Add(calendar, bucket, interval_t {0, -bucket_width_days, 0});
|
90
|
+
D_ASSERT(ts > bucket);
|
94
91
|
}
|
95
92
|
|
96
|
-
return
|
93
|
+
return bucket;
|
97
94
|
}
|
98
95
|
|
99
96
|
static inline timestamp_t WidthConvertibleToMonthsCommon(int32_t bucket_width_months, const timestamp_t ts,
|
@@ -81,6 +81,9 @@ static void ICUTimeZoneFunction(ClientContext &context, TableFunctionInput &data
|
|
81
81
|
break;
|
82
82
|
}
|
83
83
|
|
84
|
+
// What PG reports is the total offset for today,
|
85
|
+
// which is the ICU total offset (i.e., "raw") plus the DST offset.
|
86
|
+
raw_offset_ms += dst_offset_ms;
|
84
87
|
output.SetValue(2, index, Value::INTERVAL(Interval::FromMicro(raw_offset_ms * Interval::MICROS_PER_MSEC)));
|
85
88
|
output.SetValue(3, index, Value(dst_offset_ms != 0));
|
86
89
|
++index;
|
@@ -23,7 +23,7 @@ bool JSONFileHandle::IsOpen() const {
|
|
23
23
|
}
|
24
24
|
|
25
25
|
void JSONFileHandle::Close() {
|
26
|
-
if (IsOpen() && file_handle->
|
26
|
+
if (IsOpen() && !file_handle->IsPipe()) {
|
27
27
|
file_handle->Close();
|
28
28
|
file_handle = nullptr;
|
29
29
|
}
|
@@ -72,30 +72,23 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
|
|
72
72
|
D_ASSERT(size != 0);
|
73
73
|
if (plain_file_source) {
|
74
74
|
file_handle->Read(pointer, size, position);
|
75
|
-
|
76
|
-
|
77
|
-
return;
|
78
|
-
}
|
79
|
-
|
80
|
-
if (sample_run) { // Cache the buffer
|
75
|
+
} else if (sample_run) { // Cache the buffer
|
81
76
|
file_handle->Read(pointer, size, position);
|
82
|
-
actual_reads++;
|
83
77
|
|
84
78
|
cached_buffers.emplace_back(allocator.Allocate(size));
|
85
79
|
memcpy(cached_buffers.back().get(), pointer, size);
|
86
80
|
cached_size += size;
|
81
|
+
} else {
|
82
|
+
if (!cached_buffers.empty() || position < cached_size) {
|
83
|
+
ReadFromCache(pointer, size, position);
|
84
|
+
}
|
87
85
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
if (!cached_buffers.empty() || position < cached_size) {
|
92
|
-
ReadFromCache(pointer, size, position);
|
93
|
-
actual_reads++;
|
86
|
+
if (size != 0) {
|
87
|
+
file_handle->Read(pointer, size, position);
|
88
|
+
}
|
94
89
|
}
|
95
|
-
|
96
|
-
|
97
|
-
file_handle->Read(pointer, size, position);
|
98
|
-
actual_reads++;
|
90
|
+
if (++actual_reads > requested_reads) {
|
91
|
+
throw InternalException("JSONFileHandle performed more actual reads than requested reads");
|
99
92
|
}
|
100
93
|
}
|
101
94
|
|
@@ -214,17 +214,22 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
|
|
214
214
|
|
215
215
|
idx_t JSONGlobalTableFunctionState::MaxThreads() const {
|
216
216
|
auto &bind_data = state.bind_data;
|
217
|
-
if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED) {
|
218
|
-
return state.system_threads;
|
219
|
-
}
|
220
217
|
|
221
218
|
if (!state.json_readers.empty() && state.json_readers[0]->HasFileHandle()) {
|
219
|
+
// We opened and auto-detected a file, so we can get a better estimate
|
222
220
|
auto &reader = *state.json_readers[0];
|
223
|
-
if (
|
224
|
-
|
221
|
+
if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED ||
|
222
|
+
reader.GetFormat() == JSONFormat::NEWLINE_DELIMITED) {
|
223
|
+
return MaxValue<idx_t>(state.json_readers[0]->GetFileHandle().FileSize() / bind_data.maximum_object_size,
|
224
|
+
1);
|
225
225
|
}
|
226
226
|
}
|
227
227
|
|
228
|
+
if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED) {
|
229
|
+
// We haven't opened any files, so this is our best bet
|
230
|
+
return state.system_threads;
|
231
|
+
}
|
232
|
+
|
228
233
|
// One reader per file
|
229
234
|
return bind_data.files.size();
|
230
235
|
}
|
@@ -740,8 +740,8 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
|
|
740
740
|
}
|
741
741
|
}
|
742
742
|
|
743
|
-
unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
|
744
|
-
vector<LogicalType> &sql_types) {
|
743
|
+
unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, const CopyInfo &info, const vector<string> &names,
|
744
|
+
const vector<LogicalType> &sql_types) {
|
745
745
|
D_ASSERT(names.size() == sql_types.size());
|
746
746
|
bool row_group_size_bytes_set = false;
|
747
747
|
auto bind_data = make_uniq<ParquetWriteBindData>();
|
@@ -32,6 +32,7 @@ unique_ptr<CreateInfo> ViewCatalogEntry::GetInfo() const {
|
|
32
32
|
result->query = unique_ptr_cast<SQLStatement, SelectStatement>(query->Copy());
|
33
33
|
result->aliases = aliases;
|
34
34
|
result->types = types;
|
35
|
+
result->temporary = temporary;
|
35
36
|
return std::move(result);
|
36
37
|
}
|
37
38
|
|
@@ -58,23 +59,16 @@ string ViewCatalogEntry::ToSQL() const {
|
|
58
59
|
//! Return empty sql with view name so pragma view_tables don't complain
|
59
60
|
return sql;
|
60
61
|
}
|
61
|
-
|
62
|
+
auto info = GetInfo();
|
63
|
+
auto result = info->ToString();
|
64
|
+
return result + ";\n";
|
62
65
|
}
|
63
66
|
|
64
67
|
unique_ptr<CatalogEntry> ViewCatalogEntry::Copy(ClientContext &context) const {
|
65
68
|
D_ASSERT(!internal);
|
66
|
-
|
67
|
-
create_info.query = unique_ptr_cast<SQLStatement, SelectStatement>(query->Copy());
|
68
|
-
for (idx_t i = 0; i < aliases.size(); i++) {
|
69
|
-
create_info.aliases.push_back(aliases[i]);
|
70
|
-
}
|
71
|
-
for (idx_t i = 0; i < types.size(); i++) {
|
72
|
-
create_info.types.push_back(types[i]);
|
73
|
-
}
|
74
|
-
create_info.temporary = temporary;
|
75
|
-
create_info.sql = sql;
|
69
|
+
auto create_info = GetInfo();
|
76
70
|
|
77
|
-
return make_uniq<ViewCatalogEntry>(catalog, schema, create_info);
|
71
|
+
return make_uniq<ViewCatalogEntry>(catalog, schema, create_info->Cast<CreateViewInfo>());
|
78
72
|
}
|
79
73
|
|
80
74
|
} // namespace duckdb
|
@@ -199,6 +199,8 @@ bool CatalogSet::AlterOwnership(CatalogTransaction transaction, ChangeOwnershipI
|
|
199
199
|
bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name, AlterInfo &alter_info) {
|
200
200
|
// lock the catalog for writing
|
201
201
|
lock_guard<mutex> write_lock(catalog.GetWriteLock());
|
202
|
+
// lock this catalog set to disallow reading
|
203
|
+
lock_guard<mutex> read_lock(catalog_lock);
|
202
204
|
|
203
205
|
// first check if the entry exists in the unordered set
|
204
206
|
EntryIndex entry_index;
|
@@ -210,9 +212,6 @@ bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name,
|
|
210
212
|
throw CatalogException("Cannot alter entry \"%s\" because it is an internal system entry", entry->name);
|
211
213
|
}
|
212
214
|
|
213
|
-
// lock this catalog set to disallow reading
|
214
|
-
lock_guard<mutex> read_lock(catalog_lock);
|
215
|
-
|
216
215
|
// create a new entry and replace the currently stored one
|
217
216
|
// set the timestamp to the timestamp of the current transaction
|
218
217
|
// and point it to the updated table node
|
@@ -316,6 +315,7 @@ void CatalogSet::DropEntryInternal(CatalogTransaction transaction, EntryIndex en
|
|
316
315
|
bool CatalogSet::DropEntry(CatalogTransaction transaction, const string &name, bool cascade, bool allow_drop_internal) {
|
317
316
|
// lock the catalog for writing
|
318
317
|
lock_guard<mutex> write_lock(catalog.GetWriteLock());
|
318
|
+
lock_guard<mutex> read_lock(catalog_lock);
|
319
319
|
// we can only delete an entry that exists
|
320
320
|
EntryIndex entry_index;
|
321
321
|
auto entry = GetEntryInternal(transaction, name, &entry_index);
|
@@ -326,7 +326,6 @@ bool CatalogSet::DropEntry(CatalogTransaction transaction, const string &name, b
|
|
326
326
|
throw CatalogException("Cannot drop entry \"%s\" because it is an internal system entry", entry->name);
|
327
327
|
}
|
328
328
|
|
329
|
-
lock_guard<mutex> read_lock(catalog_lock);
|
330
329
|
DropEntryInternal(transaction, std::move(entry_index), *entry, cascade);
|
331
330
|
return true;
|
332
331
|
}
|
@@ -24,7 +24,7 @@ void ArrowUnionData::Append(ArrowAppendData &append_data, Vector &input, idx_t f
|
|
24
24
|
|
25
25
|
duckdb::vector<Vector> child_vectors;
|
26
26
|
for (const auto &child : UnionType::CopyMemberTypes(input.GetType())) {
|
27
|
-
child_vectors.emplace_back(child.second);
|
27
|
+
child_vectors.emplace_back(child.second, size);
|
28
28
|
}
|
29
29
|
|
30
30
|
for (idx_t input_idx = from; input_idx < to; input_idx++) {
|
@@ -193,26 +193,26 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
|
|
193
193
|
if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
|
194
194
|
InitializeAppenderForType<ArrowVarcharData<string_t>>(append_data);
|
195
195
|
} else {
|
196
|
-
InitializeAppenderForType<ArrowVarcharData<string_t, ArrowVarcharConverter,
|
196
|
+
InitializeAppenderForType<ArrowVarcharData<string_t, ArrowVarcharConverter, int32_t>>(append_data);
|
197
197
|
}
|
198
198
|
break;
|
199
199
|
case LogicalTypeId::UUID:
|
200
200
|
if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
|
201
201
|
InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter>>(append_data);
|
202
202
|
} else {
|
203
|
-
InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter,
|
203
|
+
InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter, int32_t>>(append_data);
|
204
204
|
}
|
205
205
|
break;
|
206
206
|
case LogicalTypeId::ENUM:
|
207
207
|
switch (type.InternalType()) {
|
208
208
|
case PhysicalType::UINT8:
|
209
|
-
InitializeAppenderForType<ArrowEnumData<
|
209
|
+
InitializeAppenderForType<ArrowEnumData<int8_t>>(append_data);
|
210
210
|
break;
|
211
211
|
case PhysicalType::UINT16:
|
212
|
-
InitializeAppenderForType<ArrowEnumData<
|
212
|
+
InitializeAppenderForType<ArrowEnumData<int16_t>>(append_data);
|
213
213
|
break;
|
214
214
|
case PhysicalType::UINT32:
|
215
|
-
InitializeAppenderForType<ArrowEnumData<
|
215
|
+
InitializeAppenderForType<ArrowEnumData<int32_t>>(append_data);
|
216
216
|
break;
|
217
217
|
default:
|
218
218
|
throw InternalException("Unsupported internal enum type");
|
@@ -227,11 +227,20 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
|
|
227
227
|
case LogicalTypeId::STRUCT:
|
228
228
|
InitializeAppenderForType<ArrowStructData>(append_data);
|
229
229
|
break;
|
230
|
-
case LogicalTypeId::LIST:
|
231
|
-
|
230
|
+
case LogicalTypeId::LIST: {
|
231
|
+
if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
|
232
|
+
InitializeAppenderForType<ArrowListData<int64_t>>(append_data);
|
233
|
+
} else {
|
234
|
+
InitializeAppenderForType<ArrowListData<int32_t>>(append_data);
|
235
|
+
}
|
232
236
|
break;
|
237
|
+
}
|
233
238
|
case LogicalTypeId::MAP:
|
234
|
-
|
239
|
+
if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
|
240
|
+
InitializeAppenderForType<ArrowMapData<int64_t>>(append_data);
|
241
|
+
} else {
|
242
|
+
InitializeAppenderForType<ArrowMapData<int32_t>>(append_data);
|
243
|
+
}
|
235
244
|
break;
|
236
245
|
default:
|
237
246
|
throw NotImplementedException("Unsupported type in DuckDB -> Arrow Conversion: %s\n", type.ToString());
|
@@ -187,7 +187,11 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
|
|
187
187
|
break;
|
188
188
|
}
|
189
189
|
case LogicalTypeId::LIST: {
|
190
|
-
|
190
|
+
if (options.arrow_offset_size == ArrowOffsetSize::LARGE) {
|
191
|
+
child.format = "+L";
|
192
|
+
} else {
|
193
|
+
child.format = "+l";
|
194
|
+
}
|
191
195
|
child.n_children = 1;
|
192
196
|
root_holder.nested_children.emplace_back();
|
193
197
|
root_holder.nested_children.back().resize(1);
|
@@ -64,6 +64,7 @@
|
|
64
64
|
#include "duckdb/common/types/timestamp.hpp"
|
65
65
|
#include "duckdb/common/types/vector.hpp"
|
66
66
|
#include "duckdb/common/types/vector_buffer.hpp"
|
67
|
+
#include "duckdb/core_functions/aggregate/quantile_enum.hpp"
|
67
68
|
#include "duckdb/execution/index/art/art.hpp"
|
68
69
|
#include "duckdb/execution/index/art/node.hpp"
|
69
70
|
#include "duckdb/execution/operator/scan/csv/base_csv_reader.hpp"
|
@@ -4571,6 +4572,44 @@ ProfilerPrintFormat EnumUtil::FromString<ProfilerPrintFormat>(const char *value)
|
|
4571
4572
|
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
4572
4573
|
}
|
4573
4574
|
|
4575
|
+
template<>
|
4576
|
+
const char* EnumUtil::ToChars<QuantileSerializationType>(QuantileSerializationType value) {
|
4577
|
+
switch(value) {
|
4578
|
+
case QuantileSerializationType::NON_DECIMAL:
|
4579
|
+
return "NON_DECIMAL";
|
4580
|
+
case QuantileSerializationType::DECIMAL_DISCRETE:
|
4581
|
+
return "DECIMAL_DISCRETE";
|
4582
|
+
case QuantileSerializationType::DECIMAL_DISCRETE_LIST:
|
4583
|
+
return "DECIMAL_DISCRETE_LIST";
|
4584
|
+
case QuantileSerializationType::DECIMAL_CONTINUOUS:
|
4585
|
+
return "DECIMAL_CONTINUOUS";
|
4586
|
+
case QuantileSerializationType::DECIMAL_CONTINUOUS_LIST:
|
4587
|
+
return "DECIMAL_CONTINUOUS_LIST";
|
4588
|
+
default:
|
4589
|
+
throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
|
4590
|
+
}
|
4591
|
+
}
|
4592
|
+
|
4593
|
+
template<>
|
4594
|
+
QuantileSerializationType EnumUtil::FromString<QuantileSerializationType>(const char *value) {
|
4595
|
+
if (StringUtil::Equals(value, "NON_DECIMAL")) {
|
4596
|
+
return QuantileSerializationType::NON_DECIMAL;
|
4597
|
+
}
|
4598
|
+
if (StringUtil::Equals(value, "DECIMAL_DISCRETE")) {
|
4599
|
+
return QuantileSerializationType::DECIMAL_DISCRETE;
|
4600
|
+
}
|
4601
|
+
if (StringUtil::Equals(value, "DECIMAL_DISCRETE_LIST")) {
|
4602
|
+
return QuantileSerializationType::DECIMAL_DISCRETE_LIST;
|
4603
|
+
}
|
4604
|
+
if (StringUtil::Equals(value, "DECIMAL_CONTINUOUS")) {
|
4605
|
+
return QuantileSerializationType::DECIMAL_CONTINUOUS;
|
4606
|
+
}
|
4607
|
+
if (StringUtil::Equals(value, "DECIMAL_CONTINUOUS_LIST")) {
|
4608
|
+
return QuantileSerializationType::DECIMAL_CONTINUOUS_LIST;
|
4609
|
+
}
|
4610
|
+
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
4611
|
+
}
|
4612
|
+
|
4574
4613
|
template<>
|
4575
4614
|
const char* EnumUtil::ToChars<QueryNodeType>(QueryNodeType value) {
|
4576
4615
|
switch(value) {
|
@@ -5118,6 +5157,29 @@ SinkFinalizeType EnumUtil::FromString<SinkFinalizeType>(const char *value) {
|
|
5118
5157
|
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
5119
5158
|
}
|
5120
5159
|
|
5160
|
+
template<>
|
5161
|
+
const char* EnumUtil::ToChars<SinkNextBatchType>(SinkNextBatchType value) {
|
5162
|
+
switch(value) {
|
5163
|
+
case SinkNextBatchType::READY:
|
5164
|
+
return "READY";
|
5165
|
+
case SinkNextBatchType::BLOCKED:
|
5166
|
+
return "BLOCKED";
|
5167
|
+
default:
|
5168
|
+
throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
|
5169
|
+
}
|
5170
|
+
}
|
5171
|
+
|
5172
|
+
template<>
|
5173
|
+
SinkNextBatchType EnumUtil::FromString<SinkNextBatchType>(const char *value) {
|
5174
|
+
if (StringUtil::Equals(value, "READY")) {
|
5175
|
+
return SinkNextBatchType::READY;
|
5176
|
+
}
|
5177
|
+
if (StringUtil::Equals(value, "BLOCKED")) {
|
5178
|
+
return SinkNextBatchType::BLOCKED;
|
5179
|
+
}
|
5180
|
+
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
5181
|
+
}
|
5182
|
+
|
5121
5183
|
template<>
|
5122
5184
|
const char* EnumUtil::ToChars<SinkResultType>(SinkResultType value) {
|
5123
5185
|
switch(value) {
|
@@ -6010,6 +6072,8 @@ const char* EnumUtil::ToChars<UnionInvalidReason>(UnionInvalidReason value) {
|
|
6010
6072
|
return "VALIDITY_OVERLAP";
|
6011
6073
|
case UnionInvalidReason::TAG_MISMATCH:
|
6012
6074
|
return "TAG_MISMATCH";
|
6075
|
+
case UnionInvalidReason::NULL_TAG:
|
6076
|
+
return "NULL_TAG";
|
6013
6077
|
default:
|
6014
6078
|
throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
|
6015
6079
|
}
|
@@ -6032,6 +6096,9 @@ UnionInvalidReason EnumUtil::FromString<UnionInvalidReason>(const char *value) {
|
|
6032
6096
|
if (StringUtil::Equals(value, "TAG_MISMATCH")) {
|
6033
6097
|
return UnionInvalidReason::TAG_MISMATCH;
|
6034
6098
|
}
|
6099
|
+
if (StringUtil::Equals(value, "NULL_TAG")) {
|
6100
|
+
return UnionInvalidReason::NULL_TAG;
|
6101
|
+
}
|
6035
6102
|
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
6036
6103
|
}
|
6037
6104
|
|
@@ -344,7 +344,7 @@ bool FileSystem::FileExists(const string &filename) {
|
|
344
344
|
}
|
345
345
|
|
346
346
|
bool FileSystem::IsPipe(const string &filename) {
|
347
|
-
|
347
|
+
return false;
|
348
348
|
}
|
349
349
|
|
350
350
|
void FileSystem::RemoveFile(const string &filename) {
|
@@ -500,6 +500,10 @@ bool FileHandle::CanSeek() {
|
|
500
500
|
return file_system.CanSeek();
|
501
501
|
}
|
502
502
|
|
503
|
+
bool FileHandle::IsPipe() {
|
504
|
+
return file_system.IsPipe(path);
|
505
|
+
}
|
506
|
+
|
503
507
|
string FileHandle::ReadLine() {
|
504
508
|
string result;
|
505
509
|
char buffer[1];
|
@@ -64,7 +64,10 @@ static void ConvertKnownColRefToConstants(unique_ptr<Expression> &expr,
|
|
64
64
|
// - s3://bucket/var1=value1/bla/bla/var2=value2
|
65
65
|
// - http(s)://domain(:port)/lala/kasdl/var1=value1/?not-a-var=not-a-value
|
66
66
|
// - folder/folder/folder/../var1=value1/etc/.//var2=value2
|
67
|
-
const string HivePartitioning::
|
67
|
+
const string &HivePartitioning::RegexString() {
|
68
|
+
static string REGEX = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
|
69
|
+
return REGEX;
|
70
|
+
}
|
68
71
|
|
69
72
|
std::map<string, string> HivePartitioning::Parse(const string &filename, duckdb_re2::RE2 ®ex) {
|
70
73
|
std::map<string, string> result;
|
@@ -79,7 +82,7 @@ std::map<string, string> HivePartitioning::Parse(const string &filename, duckdb_
|
|
79
82
|
}
|
80
83
|
|
81
84
|
std::map<string, string> HivePartitioning::Parse(const string &filename) {
|
82
|
-
duckdb_re2::RE2 regex(
|
85
|
+
duckdb_re2::RE2 regex(RegexString());
|
83
86
|
return Parse(filename, regex);
|
84
87
|
}
|
85
88
|
|
@@ -94,7 +97,7 @@ void HivePartitioning::ApplyFiltersToFileList(ClientContext &context, vector<str
|
|
94
97
|
vector<bool> have_preserved_filter(filters.size(), false);
|
95
98
|
vector<unique_ptr<Expression>> pruned_filters;
|
96
99
|
unordered_set<idx_t> filters_applied_to_files;
|
97
|
-
duckdb_re2::RE2 regex(
|
100
|
+
duckdb_re2::RE2 regex(RegexString());
|
98
101
|
auto table_index = get.table_index;
|
99
102
|
|
100
103
|
if ((!filename_enabled && !hive_enabled) || filters.empty()) {
|
@@ -102,7 +102,9 @@ bool MultiFileReader::ComplexFilterPushdown(ClientContext &context, vector<strin
|
|
102
102
|
|
103
103
|
unordered_map<string, column_t> column_map;
|
104
104
|
for (idx_t i = 0; i < get.column_ids.size(); i++) {
|
105
|
-
|
105
|
+
if (!IsRowIdColumnId(get.column_ids[i])) {
|
106
|
+
column_map.insert({get.names[get.column_ids[i]], i});
|
107
|
+
}
|
106
108
|
}
|
107
109
|
|
108
110
|
auto start_files = files.size();
|
@@ -432,7 +434,7 @@ void MultiFileReaderOptions::AutoDetectHiveTypesInternal(const string &file, Cli
|
|
432
434
|
}
|
433
435
|
Value value(part.second);
|
434
436
|
for (auto &candidate : candidates) {
|
435
|
-
const bool success = value.TryCastAs(context, candidate);
|
437
|
+
const bool success = value.TryCastAs(context, candidate, true);
|
436
438
|
if (success) {
|
437
439
|
hive_types_schema[name] = candidate;
|
438
440
|
break;
|
@@ -462,6 +462,10 @@ void SegmentPrimitiveFunction(ListSegmentFunctions &functions) {
|
|
462
462
|
|
463
463
|
void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType &type) {
|
464
464
|
|
465
|
+
if (type.id() == LogicalTypeId::UNKNOWN) {
|
466
|
+
throw ParameterNotResolvedException();
|
467
|
+
}
|
468
|
+
|
465
469
|
auto physical_type = type.InternalType();
|
466
470
|
switch (physical_type) {
|
467
471
|
case PhysicalType::BIT:
|