duckdb 0.9.2-dev7.0 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (123) hide show
  1. package/.github/workflows/HighPriorityIssues.yml +36 -0
  2. package/.github/workflows/NodeJS.yml +18 -26
  3. package/README.md +2 -2
  4. package/lib/duckdb.d.ts +2 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-timebucket.cpp +7 -10
  7. package/src/duckdb/extension/icu/icu-timezone.cpp +3 -0
  8. package/src/duckdb/extension/json/buffered_json_reader.cpp +11 -18
  9. package/src/duckdb/extension/json/json_scan.cpp +10 -5
  10. package/src/duckdb/extension/parquet/parquet_extension.cpp +2 -2
  11. package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +6 -12
  12. package/src/duckdb/src/catalog/catalog_set.cpp +3 -4
  13. package/src/duckdb/src/common/arrow/appender/union_data.cpp +1 -1
  14. package/src/duckdb/src/common/arrow/arrow_appender.cpp +17 -8
  15. package/src/duckdb/src/common/arrow/arrow_converter.cpp +5 -1
  16. package/src/duckdb/src/common/enum_util.cpp +67 -0
  17. package/src/duckdb/src/common/file_system.cpp +5 -1
  18. package/src/duckdb/src/common/hive_partitioning.cpp +6 -3
  19. package/src/duckdb/src/common/multi_file_reader.cpp +4 -2
  20. package/src/duckdb/src/common/types/list_segment.cpp +4 -0
  21. package/src/duckdb/src/common/types/vector.cpp +66 -34
  22. package/src/duckdb/src/common/types.cpp +3 -1
  23. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +84 -25
  24. package/src/duckdb/src/core_functions/function_list.cpp +2 -1
  25. package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +8 -1
  26. package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +23 -0
  27. package/src/duckdb/src/core_functions/scalar/string/jaccard.cpp +16 -23
  28. package/src/duckdb/src/execution/aggregate_hashtable.cpp +27 -18
  29. package/src/duckdb/src/execution/index/art/art_key.cpp +4 -4
  30. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -3
  31. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +5 -2
  32. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +4 -3
  33. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +25 -4
  34. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +5 -2
  35. package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +0 -1
  36. package/src/duckdb/src/execution/physical_operator.cpp +2 -1
  37. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +5 -0
  38. package/src/duckdb/src/execution/window_executor.cpp +13 -1
  39. package/src/duckdb/src/function/cast/union/from_struct.cpp +24 -7
  40. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +1 -1
  41. package/src/duckdb/src/function/function_set.cpp +1 -1
  42. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  43. package/src/duckdb/src/function/scalar/string/concat.cpp +4 -1
  44. package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +32 -0
  45. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +46 -2
  46. package/src/duckdb/src/function/table/arrow.cpp +19 -17
  47. package/src/duckdb/src/function/table/arrow_conversion.cpp +67 -31
  48. package/src/duckdb/src/function/table/copy_csv.cpp +3 -3
  49. package/src/duckdb/src/function/table/system/pragma_user_agent.cpp +50 -0
  50. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  51. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  52. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +4 -0
  53. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +8 -2
  54. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +74 -4
  55. package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +82 -3
  56. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +19 -9
  57. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +16 -0
  58. package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +6 -0
  59. package/src/duckdb/src/include/duckdb/common/file_system.hpp +1 -0
  60. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +7 -0
  61. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  62. package/src/duckdb/src/include/duckdb/common/pipe_file_system.hpp +3 -0
  63. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +14 -2
  64. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +6 -0
  65. package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_enum.hpp +21 -0
  66. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +5 -2
  67. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +6 -3
  68. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +1 -1
  69. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +1 -1
  70. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp +1 -1
  71. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +2 -2
  72. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +6 -0
  73. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +2 -2
  74. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +3 -1
  75. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +37 -2
  76. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  77. package/src/duckdb/src/include/duckdb/function/udf_function.hpp +1 -1
  78. package/src/duckdb/src/include/duckdb/main/config.hpp +5 -0
  79. package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +5 -4
  80. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +12 -0
  81. package/src/duckdb/src/include/duckdb/main/settings.hpp +18 -0
  82. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/optimizer/rule.hpp +0 -2
  84. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +8 -0
  85. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +5 -0
  86. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +4 -1
  87. package/src/duckdb/src/include/duckdb/parser/statement/create_statement.hpp +1 -0
  88. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
  89. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +5 -7
  90. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +0 -1
  91. package/src/duckdb/src/include/duckdb.h +1 -1
  92. package/src/duckdb/src/main/capi/config-c.cpp +1 -0
  93. package/src/duckdb/src/main/capi/duckdb-c.cpp +9 -1
  94. package/src/duckdb/src/main/config.cpp +18 -0
  95. package/src/duckdb/src/main/database.cpp +1 -0
  96. package/src/duckdb/src/main/extension/extension_alias.cpp +2 -1
  97. package/src/duckdb/src/main/extension/extension_helper.cpp +5 -4
  98. package/src/duckdb/src/main/settings/settings.cpp +49 -0
  99. package/src/duckdb/src/optimizer/expression_rewriter.cpp +0 -8
  100. package/src/duckdb/src/optimizer/filter_combiner.cpp +37 -23
  101. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +7 -4
  102. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +5 -4
  103. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +15 -4
  104. package/src/duckdb/src/parallel/pipeline_executor.cpp +81 -40
  105. package/src/duckdb/src/parser/parsed_data/create_view_info.cpp +27 -0
  106. package/src/duckdb/src/parser/statement/create_statement.cpp +4 -0
  107. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +16 -3
  108. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +7 -0
  109. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +3 -2
  110. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +3 -0
  111. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +1 -1
  112. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +76 -2
  113. package/src/duckdb/src/storage/data_table.cpp +7 -1
  114. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +14 -0
  115. package/src/duckdb/src/storage/storage_info.cpp +2 -1
  116. package/src/duckdb/src/storage/table/row_version_manager.cpp +5 -3
  117. package/src/duckdb/src/transaction/commit_state.cpp +1 -0
  118. package/src/duckdb/third_party/parquet/parquet_types.cpp +224 -221
  119. package/src/duckdb/third_party/parquet/parquet_types.h +0 -14
  120. package/src/duckdb/ub_src_common_arrow_appender.cpp +0 -4
  121. package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
  122. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
  123. package/test/columns.test.ts +1 -1
@@ -0,0 +1,36 @@
1
+ name: Create Internal issue when the "High Priority" label is applied
2
+ on:
3
+ issues:
4
+ types:
5
+ - labeled
6
+
7
+ env:
8
+ GH_TOKEN: ${{ secrets.DUCKDBLABS_BOT_TOKEN }}
9
+ # an event triggering this workflow is either an issue or a pull request,
10
+ # hence only one of the numbers will be filled in the TITLE_PREFIX
11
+ TITLE_PREFIX: "[duckdb-node/#${{ github.event.issue.number }}]"
12
+ PUBLIC_ISSUE_TITLE: ${{ github.event.issue.title }}
13
+
14
+ jobs:
15
+ create_or_label_issue:
16
+ if: github.event.label.name == 'High Priority'
17
+ runs-on: ubuntu-latest
18
+ steps:
19
+ - name: Get mirror issue number
20
+ run: |
21
+ gh issue list --repo duckdblabs/duckdb-internal --search "${TITLE_PREFIX}" --json title,number --jq ".[] | select(.title | startswith(\"$TITLE_PREFIX\")).number" > mirror_issue_number.txt
22
+ echo "MIRROR_ISSUE_NUMBER=$(cat mirror_issue_number.txt)" >> $GITHUB_ENV
23
+
24
+ - name: Print whether mirror issue exists
25
+ run: |
26
+ if [ "$MIRROR_ISSUE_NUMBER" == "" ]; then
27
+ echo "Mirror issue with title prefix '$TITLE_PREFIX' does not exist yet"
28
+ else
29
+ echo "Mirror issue with title prefix '$TITLE_PREFIX' exists with number $MIRROR_ISSUE_NUMBER"
30
+ fi
31
+
32
+ - name: Create or label issue
33
+ run: |
34
+ if [ "$MIRROR_ISSUE_NUMBER" == "" ]; then
35
+ gh issue create --repo duckdblabs/duckdb-internal --label "Node.js" --label "High Priority" --title "$TITLE_PREFIX - $PUBLIC_ISSUE_TITLE" --body "See https://github.com/duckdb/duckdb-node/issues/${{ github.event.issue.number }}"
36
+ fi
@@ -2,6 +2,8 @@ name: NodeJS
2
2
  on:
3
3
  push:
4
4
  pull_request:
5
+ workflow_dispatch:
6
+ repository_dispatch:
5
7
 
6
8
  concurrency:
7
9
  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
@@ -34,55 +36,36 @@ jobs:
34
36
  name: node.js Linux
35
37
  runs-on: ubuntu-20.04
36
38
  needs: set-up-npm
39
+ continue-on-error: ${{ matrix.node != '18' && matrix.node != '20' && matrix.node != '21' }}
37
40
  env:
38
41
  TARGET_ARCH: ${{ matrix.target_arch }}
39
42
  DUCKDB_NODE_BUILD_CACHE: 0
40
43
  strategy:
41
44
  matrix:
42
45
  # node.js current support policy to be found at https://github.com/duckdb/duckdb-node/tree/main/#Supported-Node-versions
43
- node: [ '12', '14', '16', '17', '18', '19', '20' ]
46
+ node: [ '12', '14', '16', '17', '18', '19', '20', '21']
44
47
  target_arch: [ x64, arm64 ]
45
48
  isRelease:
46
49
  - ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
47
50
  exclude:
48
51
  - isRelease: false
49
52
  node: 12
50
- target_arch: x64
51
53
  - isRelease: false
52
54
  node: 14
53
- target_arch: x64
54
55
  - isRelease: false
55
56
  node: 16
56
- target_arch: x64
57
57
  - isRelease: false
58
58
  node: 17
59
- target_arch: x64
60
- - isRelease: false
61
- node: 18
62
- target_arch: x64
63
59
  - isRelease: false
64
60
  node: 19
65
- target_arch: x64
66
- - isRelease: false
67
- node: 12
68
- target_arch: arm64
69
- - isRelease: false
70
- node: 14
71
- target_arch: arm64
72
- - isRelease: false
73
- node: 16
74
- target_arch: arm64
75
- - isRelease: false
76
- node: 17
77
- target_arch: arm64
78
61
  - isRelease: false
79
62
  node: 18
80
63
  target_arch: arm64
81
64
  - isRelease: false
82
- node: 19
65
+ node: 20
83
66
  target_arch: arm64
84
67
  - isRelease: false
85
- node: 20
68
+ node: 21
86
69
  target_arch: arm64
87
70
 
88
71
  steps:
@@ -127,10 +110,11 @@ jobs:
127
110
  name: node.js OSX
128
111
  runs-on: macos-latest
129
112
  needs: linux-nodejs
113
+ continue-on-error: ${{ matrix.node != '18' && matrix.node != '20' && matrix.node != '21' }}
130
114
  strategy:
131
115
  matrix:
132
116
  target_arch: [ x64, arm64 ]
133
- node: [ '12', '14', '16', '17', '18', '19', '20' ]
117
+ node: [ '12', '14', '16', '17', '18', '19', '20', '21']
134
118
  isRelease:
135
119
  - ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
136
120
  exclude:
@@ -143,7 +127,7 @@ jobs:
143
127
  - isRelease: false
144
128
  node: 17
145
129
  - isRelease: false
146
- node: 18
130
+ node: 19
147
131
  - target_arch: arm64
148
132
  node: 12
149
133
  - target_arch: arm64
@@ -158,6 +142,11 @@ jobs:
158
142
  with:
159
143
  fetch-depth: 0
160
144
 
145
+ # Default Python (3.12) doesn't have support for distutils
146
+ - uses: actions/setup-python@v4
147
+ with:
148
+ python-version: '3.11'
149
+
161
150
  - name: Setup Ccache
162
151
  uses: hendrikmuhs/ccache-action@main
163
152
  with:
@@ -184,12 +173,13 @@ jobs:
184
173
  name: node.js Windows
185
174
  runs-on: windows-latest
186
175
  needs: linux-nodejs
176
+ continue-on-error: ${{ matrix.node != '18' && matrix.node != '20' && matrix.node != '21' }}
187
177
  env:
188
178
  npm_config_msvs_version: 2019
189
179
 
190
180
  strategy:
191
181
  matrix:
192
- node: [ '12', '14', '16', '17', '18', '19', '20' ]
182
+ node: [ '12', '14', '16', '17', '18', '19', '20', '21']
193
183
  isRelease:
194
184
  - ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
195
185
  exclude:
@@ -205,6 +195,8 @@ jobs:
205
195
  node: 18
206
196
  - isRelease: false
207
197
  node: 19
198
+ - isRelease: false
199
+ node: 20
208
200
 
209
201
  steps:
210
202
  - uses: actions/setup-python@v4
package/README.md CHANGED
@@ -101,10 +101,10 @@ var stmt = con.prepare('select ?::INTEGER as fortytwo', function(err, stmt) {
101
101
  ```
102
102
 
103
103
  ## Supported Node versions
104
- We actively support only LTS and In-Support Node versions, as per July 2023, they are: Node 16, Node 18 and Node 20.
104
+ We actively support only LTS and In-Support Node versions, as per July 2023, they are: Node 18, Node 20 and Node 21.
105
105
  Release schedule for Node.js can be checked here: https://github.com/nodejs/release#release-schedule.
106
106
 
107
- We currently bundle and test DuckDB also for Node 10, 12, 14, 17 and 19. We plan of going so going forward as long as the tooling supports it.
107
+ We currently bundle and test DuckDB also for Node 10, 12, 14, 16, 17 and 19. We plan of going so going forward as long as the tooling supports it.
108
108
  As per July 2023, Node 15 has been removed from the supported versions.
109
109
 
110
110
  ## Development
package/lib/duckdb.d.ts CHANGED
@@ -78,6 +78,8 @@ export type ArrowArray = Uint8Array[];
78
78
  export class Connection {
79
79
  constructor(db: Database, callback?: Callback<any>);
80
80
 
81
+ close(callback?: Callback<void>): void;
82
+
81
83
  all(sql: string, ...args: [...any, Callback<TableData>] | []): void;
82
84
  arrowIPCAll(sql: string, ...args: [...any, Callback<ArrowArray>] | []): void;
83
85
  each(sql: string, ...args: [...any, Callback<RowData>] | []): void;
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.9.2-dev7.0",
5
+ "version": "0.9.2",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -76,24 +76,21 @@ struct ICUTimeBucket : public ICUDateFunc {
76
76
 
77
77
  static inline timestamp_t WidthConvertibleToDaysCommon(int32_t bucket_width_days, const timestamp_t ts,
78
78
  const timestamp_t origin, icu::Calendar *calendar) {
79
- const auto trunc_days = TruncationFactory(DatePartSpecifier::DAY);
80
79
  const auto sub_days = SubtractFactory(DatePartSpecifier::DAY);
81
80
 
82
- uint64_t tmp_micros = SetTime(calendar, ts);
83
- trunc_days(calendar, tmp_micros);
84
- timestamp_t truncated_ts = GetTimeUnsafe(calendar, tmp_micros);
85
-
86
- int64_t ts_days = sub_days(calendar, origin, truncated_ts);
81
+ int64_t ts_days = sub_days(calendar, origin, ts);
87
82
  int64_t result_days = (ts_days / bucket_width_days) * bucket_width_days;
88
83
  if (result_days < NumericLimits<int32_t>::Minimum() || result_days > NumericLimits<int32_t>::Maximum()) {
89
84
  throw OutOfRangeException("Timestamp out of range");
90
85
  }
91
- if (ts_days < 0 && ts_days % bucket_width_days != 0) {
92
- result_days =
93
- SubtractOperatorOverflowCheck::Operation<int32_t, int32_t, int32_t>(result_days, bucket_width_days);
86
+ timestamp_t bucket = Add(calendar, origin, interval_t {0, static_cast<int32_t>(result_days), 0});
87
+ if (ts < bucket) {
88
+ D_ASSERT(ts < origin);
89
+ bucket = Add(calendar, bucket, interval_t {0, -bucket_width_days, 0});
90
+ D_ASSERT(ts > bucket);
94
91
  }
95
92
 
96
- return Add(calendar, origin, interval_t {0, static_cast<int32_t>(result_days), 0});
93
+ return bucket;
97
94
  }
98
95
 
99
96
  static inline timestamp_t WidthConvertibleToMonthsCommon(int32_t bucket_width_months, const timestamp_t ts,
@@ -81,6 +81,9 @@ static void ICUTimeZoneFunction(ClientContext &context, TableFunctionInput &data
81
81
  break;
82
82
  }
83
83
 
84
+ // What PG reports is the total offset for today,
85
+ // which is the ICU total offset (i.e., "raw") plus the DST offset.
86
+ raw_offset_ms += dst_offset_ms;
84
87
  output.SetValue(2, index, Value::INTERVAL(Interval::FromMicro(raw_offset_ms * Interval::MICROS_PER_MSEC)));
85
88
  output.SetValue(3, index, Value(dst_offset_ms != 0));
86
89
  ++index;
@@ -23,7 +23,7 @@ bool JSONFileHandle::IsOpen() const {
23
23
  }
24
24
 
25
25
  void JSONFileHandle::Close() {
26
- if (IsOpen() && file_handle->OnDiskFile()) {
26
+ if (IsOpen() && !file_handle->IsPipe()) {
27
27
  file_handle->Close();
28
28
  file_handle = nullptr;
29
29
  }
@@ -72,30 +72,23 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
72
72
  D_ASSERT(size != 0);
73
73
  if (plain_file_source) {
74
74
  file_handle->Read(pointer, size, position);
75
- actual_reads++;
76
-
77
- return;
78
- }
79
-
80
- if (sample_run) { // Cache the buffer
75
+ } else if (sample_run) { // Cache the buffer
81
76
  file_handle->Read(pointer, size, position);
82
- actual_reads++;
83
77
 
84
78
  cached_buffers.emplace_back(allocator.Allocate(size));
85
79
  memcpy(cached_buffers.back().get(), pointer, size);
86
80
  cached_size += size;
81
+ } else {
82
+ if (!cached_buffers.empty() || position < cached_size) {
83
+ ReadFromCache(pointer, size, position);
84
+ }
87
85
 
88
- return;
89
- }
90
-
91
- if (!cached_buffers.empty() || position < cached_size) {
92
- ReadFromCache(pointer, size, position);
93
- actual_reads++;
86
+ if (size != 0) {
87
+ file_handle->Read(pointer, size, position);
88
+ }
94
89
  }
95
-
96
- if (size != 0) {
97
- file_handle->Read(pointer, size, position);
98
- actual_reads++;
90
+ if (++actual_reads > requested_reads) {
91
+ throw InternalException("JSONFileHandle performed more actual reads than requested reads");
99
92
  }
100
93
  }
101
94
 
@@ -214,17 +214,22 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
214
214
 
215
215
  idx_t JSONGlobalTableFunctionState::MaxThreads() const {
216
216
  auto &bind_data = state.bind_data;
217
- if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED) {
218
- return state.system_threads;
219
- }
220
217
 
221
218
  if (!state.json_readers.empty() && state.json_readers[0]->HasFileHandle()) {
219
+ // We opened and auto-detected a file, so we can get a better estimate
222
220
  auto &reader = *state.json_readers[0];
223
- if (reader.GetFormat() == JSONFormat::NEWLINE_DELIMITED) { // Auto-detected NDJSON
224
- return state.system_threads;
221
+ if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED ||
222
+ reader.GetFormat() == JSONFormat::NEWLINE_DELIMITED) {
223
+ return MaxValue<idx_t>(state.json_readers[0]->GetFileHandle().FileSize() / bind_data.maximum_object_size,
224
+ 1);
225
225
  }
226
226
  }
227
227
 
228
+ if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED) {
229
+ // We haven't opened any files, so this is our best bet
230
+ return state.system_threads;
231
+ }
232
+
228
233
  // One reader per file
229
234
  return bind_data.files.size();
230
235
  }
@@ -740,8 +740,8 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
740
740
  }
741
741
  }
742
742
 
743
- unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
744
- vector<LogicalType> &sql_types) {
743
+ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, const CopyInfo &info, const vector<string> &names,
744
+ const vector<LogicalType> &sql_types) {
745
745
  D_ASSERT(names.size() == sql_types.size());
746
746
  bool row_group_size_bytes_set = false;
747
747
  auto bind_data = make_uniq<ParquetWriteBindData>();
@@ -32,6 +32,7 @@ unique_ptr<CreateInfo> ViewCatalogEntry::GetInfo() const {
32
32
  result->query = unique_ptr_cast<SQLStatement, SelectStatement>(query->Copy());
33
33
  result->aliases = aliases;
34
34
  result->types = types;
35
+ result->temporary = temporary;
35
36
  return std::move(result);
36
37
  }
37
38
 
@@ -58,23 +59,16 @@ string ViewCatalogEntry::ToSQL() const {
58
59
  //! Return empty sql with view name so pragma view_tables don't complain
59
60
  return sql;
60
61
  }
61
- return sql + "\n;";
62
+ auto info = GetInfo();
63
+ auto result = info->ToString();
64
+ return result + ";\n";
62
65
  }
63
66
 
64
67
  unique_ptr<CatalogEntry> ViewCatalogEntry::Copy(ClientContext &context) const {
65
68
  D_ASSERT(!internal);
66
- CreateViewInfo create_info(schema, name);
67
- create_info.query = unique_ptr_cast<SQLStatement, SelectStatement>(query->Copy());
68
- for (idx_t i = 0; i < aliases.size(); i++) {
69
- create_info.aliases.push_back(aliases[i]);
70
- }
71
- for (idx_t i = 0; i < types.size(); i++) {
72
- create_info.types.push_back(types[i]);
73
- }
74
- create_info.temporary = temporary;
75
- create_info.sql = sql;
69
+ auto create_info = GetInfo();
76
70
 
77
- return make_uniq<ViewCatalogEntry>(catalog, schema, create_info);
71
+ return make_uniq<ViewCatalogEntry>(catalog, schema, create_info->Cast<CreateViewInfo>());
78
72
  }
79
73
 
80
74
  } // namespace duckdb
@@ -199,6 +199,8 @@ bool CatalogSet::AlterOwnership(CatalogTransaction transaction, ChangeOwnershipI
199
199
  bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name, AlterInfo &alter_info) {
200
200
  // lock the catalog for writing
201
201
  lock_guard<mutex> write_lock(catalog.GetWriteLock());
202
+ // lock this catalog set to disallow reading
203
+ lock_guard<mutex> read_lock(catalog_lock);
202
204
 
203
205
  // first check if the entry exists in the unordered set
204
206
  EntryIndex entry_index;
@@ -210,9 +212,6 @@ bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name,
210
212
  throw CatalogException("Cannot alter entry \"%s\" because it is an internal system entry", entry->name);
211
213
  }
212
214
 
213
- // lock this catalog set to disallow reading
214
- lock_guard<mutex> read_lock(catalog_lock);
215
-
216
215
  // create a new entry and replace the currently stored one
217
216
  // set the timestamp to the timestamp of the current transaction
218
217
  // and point it to the updated table node
@@ -316,6 +315,7 @@ void CatalogSet::DropEntryInternal(CatalogTransaction transaction, EntryIndex en
316
315
  bool CatalogSet::DropEntry(CatalogTransaction transaction, const string &name, bool cascade, bool allow_drop_internal) {
317
316
  // lock the catalog for writing
318
317
  lock_guard<mutex> write_lock(catalog.GetWriteLock());
318
+ lock_guard<mutex> read_lock(catalog_lock);
319
319
  // we can only delete an entry that exists
320
320
  EntryIndex entry_index;
321
321
  auto entry = GetEntryInternal(transaction, name, &entry_index);
@@ -326,7 +326,6 @@ bool CatalogSet::DropEntry(CatalogTransaction transaction, const string &name, b
326
326
  throw CatalogException("Cannot drop entry \"%s\" because it is an internal system entry", entry->name);
327
327
  }
328
328
 
329
- lock_guard<mutex> read_lock(catalog_lock);
330
329
  DropEntryInternal(transaction, std::move(entry_index), *entry, cascade);
331
330
  return true;
332
331
  }
@@ -24,7 +24,7 @@ void ArrowUnionData::Append(ArrowAppendData &append_data, Vector &input, idx_t f
24
24
 
25
25
  duckdb::vector<Vector> child_vectors;
26
26
  for (const auto &child : UnionType::CopyMemberTypes(input.GetType())) {
27
- child_vectors.emplace_back(child.second);
27
+ child_vectors.emplace_back(child.second, size);
28
28
  }
29
29
 
30
30
  for (idx_t input_idx = from; input_idx < to; input_idx++) {
@@ -193,26 +193,26 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
193
193
  if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
194
194
  InitializeAppenderForType<ArrowVarcharData<string_t>>(append_data);
195
195
  } else {
196
- InitializeAppenderForType<ArrowVarcharData<string_t, ArrowVarcharConverter, uint32_t>>(append_data);
196
+ InitializeAppenderForType<ArrowVarcharData<string_t, ArrowVarcharConverter, int32_t>>(append_data);
197
197
  }
198
198
  break;
199
199
  case LogicalTypeId::UUID:
200
200
  if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
201
201
  InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter>>(append_data);
202
202
  } else {
203
- InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter, uint32_t>>(append_data);
203
+ InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter, int32_t>>(append_data);
204
204
  }
205
205
  break;
206
206
  case LogicalTypeId::ENUM:
207
207
  switch (type.InternalType()) {
208
208
  case PhysicalType::UINT8:
209
- InitializeAppenderForType<ArrowEnumData<uint8_t>>(append_data);
209
+ InitializeAppenderForType<ArrowEnumData<int8_t>>(append_data);
210
210
  break;
211
211
  case PhysicalType::UINT16:
212
- InitializeAppenderForType<ArrowEnumData<uint16_t>>(append_data);
212
+ InitializeAppenderForType<ArrowEnumData<int16_t>>(append_data);
213
213
  break;
214
214
  case PhysicalType::UINT32:
215
- InitializeAppenderForType<ArrowEnumData<uint32_t>>(append_data);
215
+ InitializeAppenderForType<ArrowEnumData<int32_t>>(append_data);
216
216
  break;
217
217
  default:
218
218
  throw InternalException("Unsupported internal enum type");
@@ -227,11 +227,20 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
227
227
  case LogicalTypeId::STRUCT:
228
228
  InitializeAppenderForType<ArrowStructData>(append_data);
229
229
  break;
230
- case LogicalTypeId::LIST:
231
- InitializeAppenderForType<ArrowListData>(append_data);
230
+ case LogicalTypeId::LIST: {
231
+ if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
232
+ InitializeAppenderForType<ArrowListData<int64_t>>(append_data);
233
+ } else {
234
+ InitializeAppenderForType<ArrowListData<int32_t>>(append_data);
235
+ }
232
236
  break;
237
+ }
233
238
  case LogicalTypeId::MAP:
234
- InitializeAppenderForType<ArrowMapData>(append_data);
239
+ if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
240
+ InitializeAppenderForType<ArrowMapData<int64_t>>(append_data);
241
+ } else {
242
+ InitializeAppenderForType<ArrowMapData<int32_t>>(append_data);
243
+ }
235
244
  break;
236
245
  default:
237
246
  throw NotImplementedException("Unsupported type in DuckDB -> Arrow Conversion: %s\n", type.ToString());
@@ -187,7 +187,11 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
187
187
  break;
188
188
  }
189
189
  case LogicalTypeId::LIST: {
190
- child.format = "+l";
190
+ if (options.arrow_offset_size == ArrowOffsetSize::LARGE) {
191
+ child.format = "+L";
192
+ } else {
193
+ child.format = "+l";
194
+ }
191
195
  child.n_children = 1;
192
196
  root_holder.nested_children.emplace_back();
193
197
  root_holder.nested_children.back().resize(1);
@@ -64,6 +64,7 @@
64
64
  #include "duckdb/common/types/timestamp.hpp"
65
65
  #include "duckdb/common/types/vector.hpp"
66
66
  #include "duckdb/common/types/vector_buffer.hpp"
67
+ #include "duckdb/core_functions/aggregate/quantile_enum.hpp"
67
68
  #include "duckdb/execution/index/art/art.hpp"
68
69
  #include "duckdb/execution/index/art/node.hpp"
69
70
  #include "duckdb/execution/operator/scan/csv/base_csv_reader.hpp"
@@ -4571,6 +4572,44 @@ ProfilerPrintFormat EnumUtil::FromString<ProfilerPrintFormat>(const char *value)
4571
4572
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
4572
4573
  }
4573
4574
 
4575
+ template<>
4576
+ const char* EnumUtil::ToChars<QuantileSerializationType>(QuantileSerializationType value) {
4577
+ switch(value) {
4578
+ case QuantileSerializationType::NON_DECIMAL:
4579
+ return "NON_DECIMAL";
4580
+ case QuantileSerializationType::DECIMAL_DISCRETE:
4581
+ return "DECIMAL_DISCRETE";
4582
+ case QuantileSerializationType::DECIMAL_DISCRETE_LIST:
4583
+ return "DECIMAL_DISCRETE_LIST";
4584
+ case QuantileSerializationType::DECIMAL_CONTINUOUS:
4585
+ return "DECIMAL_CONTINUOUS";
4586
+ case QuantileSerializationType::DECIMAL_CONTINUOUS_LIST:
4587
+ return "DECIMAL_CONTINUOUS_LIST";
4588
+ default:
4589
+ throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
4590
+ }
4591
+ }
4592
+
4593
+ template<>
4594
+ QuantileSerializationType EnumUtil::FromString<QuantileSerializationType>(const char *value) {
4595
+ if (StringUtil::Equals(value, "NON_DECIMAL")) {
4596
+ return QuantileSerializationType::NON_DECIMAL;
4597
+ }
4598
+ if (StringUtil::Equals(value, "DECIMAL_DISCRETE")) {
4599
+ return QuantileSerializationType::DECIMAL_DISCRETE;
4600
+ }
4601
+ if (StringUtil::Equals(value, "DECIMAL_DISCRETE_LIST")) {
4602
+ return QuantileSerializationType::DECIMAL_DISCRETE_LIST;
4603
+ }
4604
+ if (StringUtil::Equals(value, "DECIMAL_CONTINUOUS")) {
4605
+ return QuantileSerializationType::DECIMAL_CONTINUOUS;
4606
+ }
4607
+ if (StringUtil::Equals(value, "DECIMAL_CONTINUOUS_LIST")) {
4608
+ return QuantileSerializationType::DECIMAL_CONTINUOUS_LIST;
4609
+ }
4610
+ throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
4611
+ }
4612
+
4574
4613
  template<>
4575
4614
  const char* EnumUtil::ToChars<QueryNodeType>(QueryNodeType value) {
4576
4615
  switch(value) {
@@ -5118,6 +5157,29 @@ SinkFinalizeType EnumUtil::FromString<SinkFinalizeType>(const char *value) {
5118
5157
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
5119
5158
  }
5120
5159
 
5160
+ template<>
5161
+ const char* EnumUtil::ToChars<SinkNextBatchType>(SinkNextBatchType value) {
5162
+ switch(value) {
5163
+ case SinkNextBatchType::READY:
5164
+ return "READY";
5165
+ case SinkNextBatchType::BLOCKED:
5166
+ return "BLOCKED";
5167
+ default:
5168
+ throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
5169
+ }
5170
+ }
5171
+
5172
+ template<>
5173
+ SinkNextBatchType EnumUtil::FromString<SinkNextBatchType>(const char *value) {
5174
+ if (StringUtil::Equals(value, "READY")) {
5175
+ return SinkNextBatchType::READY;
5176
+ }
5177
+ if (StringUtil::Equals(value, "BLOCKED")) {
5178
+ return SinkNextBatchType::BLOCKED;
5179
+ }
5180
+ throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
5181
+ }
5182
+
5121
5183
  template<>
5122
5184
  const char* EnumUtil::ToChars<SinkResultType>(SinkResultType value) {
5123
5185
  switch(value) {
@@ -6010,6 +6072,8 @@ const char* EnumUtil::ToChars<UnionInvalidReason>(UnionInvalidReason value) {
6010
6072
  return "VALIDITY_OVERLAP";
6011
6073
  case UnionInvalidReason::TAG_MISMATCH:
6012
6074
  return "TAG_MISMATCH";
6075
+ case UnionInvalidReason::NULL_TAG:
6076
+ return "NULL_TAG";
6013
6077
  default:
6014
6078
  throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
6015
6079
  }
@@ -6032,6 +6096,9 @@ UnionInvalidReason EnumUtil::FromString<UnionInvalidReason>(const char *value) {
6032
6096
  if (StringUtil::Equals(value, "TAG_MISMATCH")) {
6033
6097
  return UnionInvalidReason::TAG_MISMATCH;
6034
6098
  }
6099
+ if (StringUtil::Equals(value, "NULL_TAG")) {
6100
+ return UnionInvalidReason::NULL_TAG;
6101
+ }
6035
6102
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
6036
6103
  }
6037
6104
 
@@ -344,7 +344,7 @@ bool FileSystem::FileExists(const string &filename) {
344
344
  }
345
345
 
346
346
  bool FileSystem::IsPipe(const string &filename) {
347
- throw NotImplementedException("%s: IsPipe is not implemented!", GetName());
347
+ return false;
348
348
  }
349
349
 
350
350
  void FileSystem::RemoveFile(const string &filename) {
@@ -500,6 +500,10 @@ bool FileHandle::CanSeek() {
500
500
  return file_system.CanSeek();
501
501
  }
502
502
 
503
+ bool FileHandle::IsPipe() {
504
+ return file_system.IsPipe(path);
505
+ }
506
+
503
507
  string FileHandle::ReadLine() {
504
508
  string result;
505
509
  char buffer[1];
@@ -64,7 +64,10 @@ static void ConvertKnownColRefToConstants(unique_ptr<Expression> &expr,
64
64
  // - s3://bucket/var1=value1/bla/bla/var2=value2
65
65
  // - http(s)://domain(:port)/lala/kasdl/var1=value1/?not-a-var=not-a-value
66
66
  // - folder/folder/folder/../var1=value1/etc/.//var2=value2
67
- const string HivePartitioning::REGEX_STRING = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
67
+ const string &HivePartitioning::RegexString() {
68
+ static string REGEX = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
69
+ return REGEX;
70
+ }
68
71
 
69
72
  std::map<string, string> HivePartitioning::Parse(const string &filename, duckdb_re2::RE2 &regex) {
70
73
  std::map<string, string> result;
@@ -79,7 +82,7 @@ std::map<string, string> HivePartitioning::Parse(const string &filename, duckdb_
79
82
  }
80
83
 
81
84
  std::map<string, string> HivePartitioning::Parse(const string &filename) {
82
- duckdb_re2::RE2 regex(REGEX_STRING);
85
+ duckdb_re2::RE2 regex(RegexString());
83
86
  return Parse(filename, regex);
84
87
  }
85
88
 
@@ -94,7 +97,7 @@ void HivePartitioning::ApplyFiltersToFileList(ClientContext &context, vector<str
94
97
  vector<bool> have_preserved_filter(filters.size(), false);
95
98
  vector<unique_ptr<Expression>> pruned_filters;
96
99
  unordered_set<idx_t> filters_applied_to_files;
97
- duckdb_re2::RE2 regex(REGEX_STRING);
100
+ duckdb_re2::RE2 regex(RegexString());
98
101
  auto table_index = get.table_index;
99
102
 
100
103
  if ((!filename_enabled && !hive_enabled) || filters.empty()) {
@@ -102,7 +102,9 @@ bool MultiFileReader::ComplexFilterPushdown(ClientContext &context, vector<strin
102
102
 
103
103
  unordered_map<string, column_t> column_map;
104
104
  for (idx_t i = 0; i < get.column_ids.size(); i++) {
105
- column_map.insert({get.names[get.column_ids[i]], i});
105
+ if (!IsRowIdColumnId(get.column_ids[i])) {
106
+ column_map.insert({get.names[get.column_ids[i]], i});
107
+ }
106
108
  }
107
109
 
108
110
  auto start_files = files.size();
@@ -432,7 +434,7 @@ void MultiFileReaderOptions::AutoDetectHiveTypesInternal(const string &file, Cli
432
434
  }
433
435
  Value value(part.second);
434
436
  for (auto &candidate : candidates) {
435
- const bool success = value.TryCastAs(context, candidate);
437
+ const bool success = value.TryCastAs(context, candidate, true);
436
438
  if (success) {
437
439
  hive_types_schema[name] = candidate;
438
440
  break;