duckdb 0.7.1-dev7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. package/README.md +1 -1
  2. package/binding.gyp +7 -7
  3. package/package.json +3 -3
  4. package/src/duckdb/extension/json/buffered_json_reader.cpp +50 -9
  5. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +7 -2
  6. package/src/duckdb/extension/json/include/json_scan.hpp +45 -10
  7. package/src/duckdb/extension/json/json_functions/copy_json.cpp +35 -22
  8. package/src/duckdb/extension/json/json_functions/json_create.cpp +8 -8
  9. package/src/duckdb/extension/json/json_functions/json_structure.cpp +8 -3
  10. package/src/duckdb/extension/json/json_functions/json_transform.cpp +54 -10
  11. package/src/duckdb/extension/json/json_functions/read_json.cpp +104 -49
  12. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +5 -3
  13. package/src/duckdb/extension/json/json_functions.cpp +7 -0
  14. package/src/duckdb/extension/json/json_scan.cpp +144 -37
  15. package/src/duckdb/extension/parquet/column_reader.cpp +7 -0
  16. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -0
  17. package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -9
  18. package/src/duckdb/src/catalog/catalog.cpp +62 -13
  19. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +8 -7
  20. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +1 -1
  21. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  22. package/src/duckdb/src/catalog/default/default_functions.cpp +1 -0
  23. package/src/duckdb/src/catalog/default/default_views.cpp +1 -1
  24. package/src/duckdb/src/common/bind_helpers.cpp +55 -0
  25. package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
  26. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  27. package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
  28. package/src/duckdb/src/common/file_system.cpp +28 -0
  29. package/src/duckdb/src/common/hive_partitioning.cpp +1 -0
  30. package/src/duckdb/src/common/local_file_system.cpp +4 -4
  31. package/src/duckdb/src/common/operator/cast_operators.cpp +14 -8
  32. package/src/duckdb/src/common/printer.cpp +1 -1
  33. package/src/duckdb/src/common/string_util.cpp +8 -4
  34. package/src/duckdb/src/common/types/partitioned_column_data.cpp +1 -0
  35. package/src/duckdb/src/common/types/time.cpp +1 -1
  36. package/src/duckdb/src/common/types/timestamp.cpp +35 -4
  37. package/src/duckdb/src/common/types.cpp +37 -11
  38. package/src/duckdb/src/execution/column_binding_resolver.cpp +5 -2
  39. package/src/duckdb/src/execution/index/art/art.cpp +117 -67
  40. package/src/duckdb/src/execution/index/art/art_key.cpp +24 -12
  41. package/src/duckdb/src/execution/index/art/leaf.cpp +7 -8
  42. package/src/duckdb/src/execution/index/art/node.cpp +13 -27
  43. package/src/duckdb/src/execution/index/art/node16.cpp +5 -8
  44. package/src/duckdb/src/execution/index/art/node256.cpp +3 -5
  45. package/src/duckdb/src/execution/index/art/node4.cpp +4 -7
  46. package/src/duckdb/src/execution/index/art/node48.cpp +5 -8
  47. package/src/duckdb/src/execution/index/art/prefix.cpp +2 -3
  48. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +6 -27
  49. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +1 -9
  50. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -9
  51. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +7 -9
  52. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +6 -11
  53. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +13 -13
  54. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +1 -1
  55. package/src/duckdb/src/execution/operator/schema/physical_detach.cpp +37 -0
  56. package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +0 -5
  57. package/src/duckdb/src/execution/physical_operator.cpp +6 -6
  58. package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +4 -0
  59. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -0
  60. package/src/duckdb/src/function/pragma/pragma_queries.cpp +38 -11
  61. package/src/duckdb/src/function/scalar/generic/current_setting.cpp +2 -2
  62. package/src/duckdb/src/function/scalar/list/array_slice.cpp +2 -3
  63. package/src/duckdb/src/function/scalar/map/map.cpp +69 -21
  64. package/src/duckdb/src/function/scalar/string/like.cpp +6 -3
  65. package/src/duckdb/src/function/table/read_csv.cpp +17 -8
  66. package/src/duckdb/src/function/table/system/duckdb_temporary_files.cpp +59 -0
  67. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  68. package/src/duckdb/src/function/table/table_scan.cpp +3 -0
  69. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  70. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +7 -1
  71. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
  72. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  73. package/src/duckdb/src/include/duckdb/common/bind_helpers.hpp +2 -0
  74. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
  75. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  76. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +3 -2
  77. package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +3 -0
  78. package/src/duckdb/src/include/duckdb/common/exception.hpp +10 -0
  79. package/src/duckdb/src/include/duckdb/common/file_system.hpp +1 -0
  80. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +9 -1
  81. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +4 -4
  82. package/src/duckdb/src/include/duckdb/common/string_util.hpp +9 -2
  83. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +5 -1
  84. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +37 -41
  85. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +8 -11
  86. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +1 -3
  87. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -2
  88. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
  89. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_detach.hpp +32 -0
  90. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -1
  91. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  92. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -2
  93. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -3
  94. package/src/duckdb/src/include/duckdb/main/{extension_functions.hpp → extension_entries.hpp} +27 -5
  95. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +11 -1
  96. package/src/duckdb/src/include/duckdb/main/settings.hpp +9 -0
  97. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +0 -7
  98. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_database_info.hpp +0 -4
  99. package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +32 -0
  100. package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +1 -1
  101. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -2
  102. package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +1 -1
  103. package/src/duckdb/src/include/duckdb/parser/statement/detach_statement.hpp +29 -0
  104. package/src/duckdb/src/include/duckdb/parser/statement/list.hpp +1 -0
  105. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +3 -3
  106. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +1 -1
  107. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  108. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +1 -0
  109. package/src/duckdb/src/include/duckdb/planner/binder.hpp +4 -0
  110. package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +10 -3
  111. package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -5
  112. package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +1 -2
  113. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +8 -0
  114. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +7 -1
  115. package/src/duckdb/src/include/duckdb/storage/index.hpp +47 -38
  116. package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +7 -0
  117. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -0
  118. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +7 -0
  119. package/src/duckdb/src/main/client_context.cpp +2 -0
  120. package/src/duckdb/src/main/config.cpp +1 -0
  121. package/src/duckdb/src/main/database.cpp +14 -5
  122. package/src/duckdb/src/main/extension/extension_alias.cpp +2 -1
  123. package/src/duckdb/src/main/extension/extension_helper.cpp +15 -0
  124. package/src/duckdb/src/main/extension/extension_install.cpp +60 -16
  125. package/src/duckdb/src/main/extension/extension_load.cpp +62 -13
  126. package/src/duckdb/src/main/settings/settings.cpp +16 -0
  127. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +2 -6
  128. package/src/duckdb/src/parallel/pipeline_executor.cpp +1 -55
  129. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +3 -0
  130. package/src/duckdb/src/parser/statement/copy_statement.cpp +2 -13
  131. package/src/duckdb/src/parser/statement/delete_statement.cpp +3 -0
  132. package/src/duckdb/src/parser/statement/detach_statement.cpp +15 -0
  133. package/src/duckdb/src/parser/statement/insert_statement.cpp +9 -0
  134. package/src/duckdb/src/parser/statement/update_statement.cpp +3 -0
  135. package/src/duckdb/src/parser/transform/expression/transform_case.cpp +3 -3
  136. package/src/duckdb/src/parser/transform/statement/transform_create_database.cpp +0 -1
  137. package/src/duckdb/src/parser/transform/statement/transform_detach.cpp +19 -0
  138. package/src/duckdb/src/parser/transformer.cpp +2 -0
  139. package/src/duckdb/src/planner/bind_context.cpp +1 -1
  140. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +3 -0
  141. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +7 -14
  142. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +16 -14
  143. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +13 -0
  144. package/src/duckdb/src/planner/binder/statement/bind_detach.cpp +19 -0
  145. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +29 -4
  146. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +22 -1
  147. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +2 -1
  148. package/src/duckdb/src/planner/binder.cpp +2 -0
  149. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +32 -1
  150. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +21 -5
  151. package/src/duckdb/src/planner/logical_operator.cpp +6 -1
  152. package/src/duckdb/src/planner/planner.cpp +1 -0
  153. package/src/duckdb/src/storage/buffer_manager.cpp +105 -26
  154. package/src/duckdb/src/storage/compression/bitpacking.cpp +16 -7
  155. package/src/duckdb/src/storage/data_table.cpp +66 -3
  156. package/src/duckdb/src/storage/index.cpp +1 -1
  157. package/src/duckdb/src/storage/local_storage.cpp +1 -1
  158. package/src/duckdb/src/storage/storage_info.cpp +2 -1
  159. package/src/duckdb/src/storage/table/column_data.cpp +4 -2
  160. package/src/duckdb/src/storage/table/update_segment.cpp +15 -0
  161. package/src/duckdb/src/storage/table_index_list.cpp +1 -2
  162. package/src/duckdb/src/storage/wal_replay.cpp +68 -0
  163. package/src/duckdb/src/storage/write_ahead_log.cpp +21 -1
  164. package/src/duckdb/src/transaction/commit_state.cpp +5 -2
  165. package/src/duckdb/third_party/concurrentqueue/blockingconcurrentqueue.h +2 -2
  166. package/src/duckdb/third_party/fmt/include/fmt/core.h +1 -2
  167. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +1 -0
  168. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +14 -0
  169. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +530 -1006
  170. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +17659 -17626
  171. package/src/duckdb/third_party/thrift/thrift/Thrift.h +8 -2
  172. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  173. package/src/duckdb/ub_src_execution_operator_schema.cpp +2 -0
  174. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
  175. package/src/duckdb/ub_src_parser_statement.cpp +2 -0
  176. package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
  177. package/src/duckdb/ub_src_planner_binder_statement.cpp +2 -0
  178. package/src/statement.cpp +46 -12
  179. package/test/arrow.test.ts +3 -3
  180. package/test/prepare.test.ts +39 -1
  181. package/test/typescript_decls.test.ts +1 -1
  182. package/src/duckdb/src/include/duckdb/function/create_database_extension.hpp +0 -37
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # DuckDB Node Bindings
2
2
 
3
- This package provides a node.js API for [DuckDB](https://github.com/cwida/duckdb), the "SQLite for Analytics". The API for this client is somewhat compliant to the SQLite node.js client for easier transition (and transition you must eventually).
3
+ This package provides a node.js API for [DuckDB](https://github.com/duckdb/duckdb), the "SQLite for Analytics". The API for this client is somewhat compliant to the SQLite node.js client for easier transition (and transition you must eventually).
4
4
 
5
5
  Load the package and create a database object:
6
6
 
package/binding.gyp CHANGED
@@ -222,16 +222,16 @@
222
222
  "src/duckdb/third_party/zstd/compress/zstd_lazy.cpp",
223
223
  "src/duckdb/third_party/zstd/compress/zstd_ldm.cpp",
224
224
  "src/duckdb/third_party/zstd/compress/zstd_opt.cpp",
225
- "src/duckdb/extension/icu/./icu-timezone.cpp",
226
- "src/duckdb/extension/icu/./icu-makedate.cpp",
227
- "src/duckdb/extension/icu/./icu-datepart.cpp",
228
- "src/duckdb/extension/icu/./icu-datesub.cpp",
225
+ "src/duckdb/extension/icu/./icu-dateadd.cpp",
229
226
  "src/duckdb/extension/icu/./icu-datetrunc.cpp",
230
- "src/duckdb/extension/icu/./icu-timebucket.cpp",
231
227
  "src/duckdb/extension/icu/./icu-strptime.cpp",
232
- "src/duckdb/extension/icu/./icu-extension.cpp",
233
- "src/duckdb/extension/icu/./icu-dateadd.cpp",
234
228
  "src/duckdb/extension/icu/./icu-datefunc.cpp",
229
+ "src/duckdb/extension/icu/./icu-extension.cpp",
230
+ "src/duckdb/extension/icu/./icu-makedate.cpp",
231
+ "src/duckdb/extension/icu/./icu-timezone.cpp",
232
+ "src/duckdb/extension/icu/./icu-datesub.cpp",
233
+ "src/duckdb/extension/icu/./icu-timebucket.cpp",
234
+ "src/duckdb/extension/icu/./icu-datepart.cpp",
235
235
  "src/duckdb/ub_extension_icu_third_party_icu_common.cpp",
236
236
  "src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp",
237
237
  "src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp",
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.1-dev7.0",
5
+ "version": "0.7.1",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -41,7 +41,7 @@
41
41
  },
42
42
  "repository": {
43
43
  "type": "git",
44
- "url": "git+https://github.com/cwida/duckdb.git"
44
+ "url": "git+https://github.com/duckdb/duckdb.git"
45
45
  },
46
46
  "ts-node": {
47
47
  "require": [
@@ -56,7 +56,7 @@
56
56
  "author": "Hannes Mühleisen",
57
57
  "license": "MPL-2.0",
58
58
  "bugs": {
59
- "url": "https://github.com/cwida/duckdb/issues"
59
+ "url": "https://github.com/duckdb/duckdb/issues"
60
60
  },
61
61
  "homepage": "https://www.duckdb.org"
62
62
  }
@@ -25,7 +25,12 @@ JSONBufferHandle::JSONBufferHandle(idx_t buffer_index_p, idx_t readers_p, Alloca
25
25
  JSONFileHandle::JSONFileHandle(unique_ptr<FileHandle> file_handle_p, Allocator &allocator_p)
26
26
  : file_handle(std::move(file_handle_p)), allocator(allocator_p), can_seek(file_handle->CanSeek()),
27
27
  plain_file_source(file_handle->OnDiskFile() && can_seek), file_size(file_handle->GetFileSize()), read_position(0),
28
- cached_size(0) {
28
+ requested_reads(0), actual_reads(0), cached_size(0) {
29
+ }
30
+
31
+ void JSONFileHandle::Close() {
32
+ file_handle->Close();
33
+ cached_buffers.clear();
29
34
  }
30
35
 
31
36
  idx_t JSONFileHandle::FileSize() const {
@@ -36,10 +41,6 @@ idx_t JSONFileHandle::Remaining() const {
36
41
  return file_size - read_position;
37
42
  }
38
43
 
39
- bool JSONFileHandle::PlainFileSource() const {
40
- return plain_file_source;
41
- }
42
-
43
44
  bool JSONFileHandle::CanSeek() const {
44
45
  return can_seek;
45
46
  }
@@ -53,6 +54,9 @@ idx_t JSONFileHandle::GetPositionAndSize(idx_t &position, idx_t requested_size)
53
54
  position = read_position;
54
55
  auto actual_size = MinValue<idx_t>(requested_size, Remaining());
55
56
  read_position += actual_size;
57
+ if (actual_size != 0) {
58
+ requested_reads++;
59
+ }
56
60
  return actual_size;
57
61
  }
58
62
 
@@ -60,11 +64,13 @@ void JSONFileHandle::ReadAtPosition(const char *pointer, idx_t size, idx_t posit
60
64
  D_ASSERT(size != 0);
61
65
  if (plain_file_source) {
62
66
  file_handle->Read((void *)pointer, size, position);
67
+ actual_reads++;
63
68
  return;
64
69
  }
65
70
 
66
71
  if (sample_run) { // Cache the buffer
67
72
  file_handle->Read((void *)pointer, size, position);
73
+ actual_reads++;
68
74
  cached_buffers.emplace_back(allocator.Allocate(size));
69
75
  memcpy(cached_buffers.back().get(), pointer, size);
70
76
  cached_size += size;
@@ -73,22 +79,24 @@ void JSONFileHandle::ReadAtPosition(const char *pointer, idx_t size, idx_t posit
73
79
 
74
80
  if (!cached_buffers.empty() || position < cached_size) {
75
81
  ReadFromCache(pointer, size, position);
82
+ actual_reads++;
76
83
  }
77
84
  if (size != 0) {
78
85
  file_handle->Read((void *)pointer, size, position);
86
+ actual_reads++;
79
87
  }
80
88
  }
81
89
 
82
90
  idx_t JSONFileHandle::Read(const char *pointer, idx_t requested_size, bool sample_run) {
83
91
  D_ASSERT(requested_size != 0);
84
92
  if (plain_file_source) {
85
- auto actual_size = file_handle->Read((void *)pointer, requested_size);
93
+ auto actual_size = ReadInternal(pointer, requested_size);
86
94
  read_position += actual_size;
87
95
  return actual_size;
88
96
  }
89
97
 
90
98
  if (sample_run) { // Cache the buffer
91
- auto actual_size = file_handle->Read((void *)pointer, requested_size);
99
+ auto actual_size = ReadInternal(pointer, requested_size);
92
100
  if (actual_size > 0) {
93
101
  cached_buffers.emplace_back(allocator.Allocate(actual_size));
94
102
  memcpy(cached_buffers.back().get(), pointer, actual_size);
@@ -103,7 +111,7 @@ idx_t JSONFileHandle::Read(const char *pointer, idx_t requested_size, bool sampl
103
111
  actual_size += ReadFromCache(pointer, requested_size, read_position);
104
112
  }
105
113
  if (requested_size != 0) {
106
- actual_size += file_handle->Read((void *)pointer, requested_size);
114
+ actual_size += ReadInternal(pointer, requested_size);
107
115
  }
108
116
  return actual_size;
109
117
  }
@@ -111,7 +119,10 @@ idx_t JSONFileHandle::Read(const char *pointer, idx_t requested_size, bool sampl
111
119
  idx_t JSONFileHandle::ReadFromCache(const char *&pointer, idx_t &size, idx_t &position) {
112
120
  idx_t read_size = 0;
113
121
  idx_t total_offset = 0;
114
- for (auto &cached_buffer : cached_buffers) {
122
+
123
+ idx_t cached_buffer_idx;
124
+ for (cached_buffer_idx = 0; cached_buffer_idx < cached_buffers.size(); cached_buffer_idx++) {
125
+ auto &cached_buffer = cached_buffers[cached_buffer_idx];
115
126
  if (size == 0) {
116
127
  break;
117
128
  }
@@ -127,9 +138,23 @@ idx_t JSONFileHandle::ReadFromCache(const char *&pointer, idx_t &size, idx_t &po
127
138
  }
128
139
  total_offset += cached_buffer.GetSize();
129
140
  }
141
+
130
142
  return read_size;
131
143
  }
132
144
 
145
+ idx_t JSONFileHandle::ReadInternal(const char *pointer, const idx_t requested_size) {
146
+ // Deal with reading from pipes
147
+ idx_t total_read_size = 0;
148
+ while (total_read_size < requested_size) {
149
+ auto read_size = file_handle->Read((void *)(pointer + total_read_size), requested_size - total_read_size);
150
+ if (read_size == 0) {
151
+ break;
152
+ }
153
+ total_read_size += read_size;
154
+ }
155
+ return total_read_size;
156
+ }
157
+
133
158
  BufferedJSONReader::BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options_p, string file_path_p)
134
159
  : file_path(std::move(file_path_p)), context(context), options(std::move(options_p)), buffer_index(0) {
135
160
  }
@@ -143,6 +168,16 @@ void BufferedJSONReader::OpenJSONFile() {
143
168
  file_handle = make_unique<JSONFileHandle>(std::move(regular_file_handle), BufferAllocator::Get(context));
144
169
  }
145
170
 
171
+ void BufferedJSONReader::CloseJSONFile() {
172
+ while (true) {
173
+ lock_guard<mutex> guard(lock);
174
+ if (file_handle->RequestedReadsComplete()) {
175
+ file_handle->Close();
176
+ break;
177
+ }
178
+ }
179
+ }
180
+
146
181
  bool BufferedJSONReader::IsOpen() {
147
182
  return file_handle != nullptr;
148
183
  }
@@ -246,9 +281,15 @@ void BufferedJSONReader::Reset() {
246
281
 
247
282
  void JSONFileHandle::Reset() {
248
283
  read_position = 0;
284
+ requested_reads = 0;
285
+ actual_reads = 0;
249
286
  if (plain_file_source) {
250
287
  file_handle->Reset();
251
288
  }
252
289
  }
253
290
 
291
+ bool JSONFileHandle::RequestedReadsComplete() {
292
+ return requested_reads == actual_reads;
293
+ }
294
+
254
295
  } // namespace duckdb
@@ -21,7 +21,7 @@ enum class JSONFormat : uint8_t {
21
21
  AUTO_DETECT = 0,
22
22
  //! One object after another, newlines can be anywhere
23
23
  UNSTRUCTURED = 1,
24
- //! Objects are separated by newlines, newlines do not occur within objects (NDJSON)
24
+ //! Objects are separated by newlines, newlines do not occur within values (NDJSON)
25
25
  NEWLINE_DELIMITED = 2,
26
26
  };
27
27
 
@@ -58,11 +58,11 @@ public:
58
58
  struct JSONFileHandle {
59
59
  public:
60
60
  JSONFileHandle(unique_ptr<FileHandle> file_handle, Allocator &allocator);
61
+ void Close();
61
62
 
62
63
  idx_t FileSize() const;
63
64
  idx_t Remaining() const;
64
65
 
65
- bool PlainFileSource() const;
66
66
  bool CanSeek() const;
67
67
  void Seek(idx_t position);
68
68
 
@@ -71,9 +71,11 @@ public:
71
71
  idx_t Read(const char *pointer, idx_t requested_size, bool sample_run);
72
72
 
73
73
  void Reset();
74
+ bool RequestedReadsComplete();
74
75
 
75
76
  private:
76
77
  idx_t ReadFromCache(const char *&pointer, idx_t &size, idx_t &position);
78
+ idx_t ReadInternal(const char *pointer, const idx_t requested_size);
77
79
 
78
80
  private:
79
81
  //! The JSON file handle
@@ -87,6 +89,8 @@ private:
87
89
 
88
90
  //! Read properties
89
91
  idx_t read_position;
92
+ idx_t requested_reads;
93
+ atomic<idx_t> actual_reads;
90
94
 
91
95
  //! Cached buffers for resetting when reading stream
92
96
  vector<AllocatedData> cached_buffers;
@@ -98,6 +102,7 @@ public:
98
102
  BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options, string file_path);
99
103
 
100
104
  void OpenJSONFile();
105
+ void CloseJSONFile();
101
106
  bool IsOpen();
102
107
 
103
108
  BufferedJSONReaderOptions &GetOptions();
@@ -20,12 +20,25 @@ enum class JSONScanType : uint8_t {
20
20
  INVALID = 0,
21
21
  //! Read JSON straight to columnar data
22
22
  READ_JSON = 1,
23
- //! Read JSON objects as strings
23
+ //! Read JSON values as strings
24
24
  READ_JSON_OBJECTS = 2,
25
25
  //! Sample run for schema detection
26
26
  SAMPLE = 3,
27
27
  };
28
28
 
29
+ enum class JSONRecordType : uint8_t {
30
+ //! Sequential values
31
+ RECORDS = 0,
32
+ //! Array of values
33
+ ARRAY_OF_RECORDS = 1,
34
+ //! Sequential non-object JSON
35
+ JSON = 2,
36
+ //! Array of non-object JSON
37
+ ARRAY_OF_JSON = 3,
38
+ //! Auto-detect
39
+ AUTO = 4,
40
+ };
41
+
29
42
  //! Even though LogicalTypeId is just a uint8_t, this is still needed ...
30
43
  struct LogicalTypeIdHash {
31
44
  inline std::size_t operator()(const LogicalTypeId &id) const {
@@ -104,8 +117,8 @@ public:
104
117
  vector<idx_t> valid_cols;
105
118
  //! Max depth we go to detect nested JSON schema (defaults to unlimited)
106
119
  idx_t max_depth = NumericLimits<idx_t>::Maximum();
107
- //! Whether we're parsing objects (usually), or something else like arrays
108
- bool objects = true;
120
+ //! Whether we're parsing values (usually), or something else
121
+ JSONRecordType record_type = JSONRecordType::RECORDS;
109
122
  //! Forced date/timestamp formats
110
123
  string date_format;
111
124
  string timestamp_format;
@@ -119,12 +132,13 @@ public:
119
132
  struct JSONScanInfo : public TableFunctionInfo {
120
133
  public:
121
134
  explicit JSONScanInfo(JSONScanType type_p = JSONScanType::INVALID, JSONFormat format_p = JSONFormat::AUTO_DETECT,
122
- bool auto_detect_p = false)
123
- : type(type_p), format(format_p), auto_detect(auto_detect_p) {
135
+ JSONRecordType record_type_p = JSONRecordType::AUTO, bool auto_detect_p = false)
136
+ : type(type_p), format(format_p), record_type(record_type_p), auto_detect(auto_detect_p) {
124
137
  }
125
138
 
126
139
  JSONScanType type;
127
140
  JSONFormat format;
141
+ JSONRecordType record_type;
128
142
  bool auto_detect;
129
143
  };
130
144
 
@@ -179,10 +193,15 @@ public:
179
193
  public:
180
194
  idx_t ReadNext(JSONScanGlobalState &gstate);
181
195
  yyjson_alc *GetAllocator();
182
- void ThrowTransformError(idx_t count, idx_t object_index, const string &error_message);
196
+ void ThrowTransformError(idx_t object_index, const string &error_message);
183
197
 
198
+ idx_t scan_count;
184
199
  JSONLine lines[STANDARD_VECTOR_SIZE];
185
- yyjson_val *objects[STANDARD_VECTOR_SIZE];
200
+ yyjson_val *values[STANDARD_VECTOR_SIZE];
201
+
202
+ idx_t array_idx;
203
+ idx_t array_offset;
204
+ yyjson_val *array_values[STANDARD_VECTOR_SIZE];
186
205
 
187
206
  idx_t batch_index;
188
207
 
@@ -192,6 +211,7 @@ public:
192
211
 
193
212
  private:
194
213
  yyjson_val *ParseLine(char *line_start, idx_t line_size, idx_t remaining, JSONLine &line);
214
+ idx_t GetObjectsFromArray(JSONScanGlobalState &gstate);
195
215
 
196
216
  private:
197
217
  //! Bind data
@@ -212,7 +232,7 @@ private:
212
232
  idx_t prev_buffer_remainder;
213
233
  idx_t lines_or_objects_in_buffer;
214
234
 
215
- //! Buffer to reconstruct split objects
235
+ //! Buffer to reconstruct split values
216
236
  AllocatedData reconstruct_buffer;
217
237
  //! Copy of current buffer for YYJSON_READ_INSITU
218
238
  AllocatedData current_buffer_copy;
@@ -276,6 +296,21 @@ public:
276
296
  return lstate.GetBatchIndex();
277
297
  }
278
298
 
299
+ static unique_ptr<NodeStatistics> JSONScanCardinality(ClientContext &context, const FunctionData *bind_data) {
300
+ auto &data = (JSONScanData &)*bind_data;
301
+ idx_t per_file_cardinality;
302
+ if (data.stored_readers.empty()) {
303
+ // The cardinality of an unknown JSON file is the almighty number 42 except when it's not
304
+ per_file_cardinality = 42;
305
+ } else {
306
+ // If we multiply the almighty number 42 by 10, we get the exact average size of a JSON
307
+ // Not really, but the average size of a lineitem row in JSON is around 360 bytes
308
+ per_file_cardinality = data.stored_readers[0]->GetFileHandle().FileSize() / 420;
309
+ }
310
+ // Obviously this can be improved but this is better than defaulting to 0
311
+ return make_unique<NodeStatistics>(per_file_cardinality * data.file_paths.size());
312
+ }
313
+
279
314
  static void JSONScanSerialize(FieldWriter &writer, const FunctionData *bind_data_p, const TableFunction &function) {
280
315
  auto &bind_data = (JSONScanData &)*bind_data_p;
281
316
  bind_data.Serialize(writer);
@@ -291,16 +326,16 @@ public:
291
326
  static void TableFunctionDefaults(TableFunction &table_function) {
292
327
  table_function.named_parameters["maximum_object_size"] = LogicalType::UINTEGER;
293
328
  table_function.named_parameters["ignore_errors"] = LogicalType::BOOLEAN;
294
- table_function.named_parameters["format"] = LogicalType::VARCHAR;
329
+ table_function.named_parameters["lines"] = LogicalType::VARCHAR;
295
330
  table_function.named_parameters["compression"] = LogicalType::VARCHAR;
296
331
 
297
332
  table_function.table_scan_progress = JSONScanProgress;
298
333
  table_function.get_batch_index = JSONScanGetBatchIndex;
334
+ table_function.cardinality = JSONScanCardinality;
299
335
 
300
336
  table_function.serialize = JSONScanSerialize;
301
337
  table_function.deserialize = JSONScanDeserialize;
302
338
 
303
- // TODO: might be able to do some of these
304
339
  table_function.projection_pushdown = false;
305
340
  table_function.filter_pushdown = false;
306
341
  table_function.filter_prune = false;
@@ -1,7 +1,9 @@
1
1
  #include "duckdb/function/copy_function.hpp"
2
2
  #include "duckdb/parser/expression/constant_expression.hpp"
3
3
  #include "duckdb/parser/expression/function_expression.hpp"
4
+ #include "duckdb/parser/expression/positional_reference_expression.hpp"
4
5
  #include "duckdb/parser/query_node/select_node.hpp"
6
+ #include "duckdb/parser/tableref/subqueryref.hpp"
5
7
  #include "duckdb/planner/binder.hpp"
6
8
  #include "json_functions.hpp"
7
9
  #include "json_scan.hpp"
@@ -12,42 +14,52 @@ namespace duckdb {
12
14
  static BoundStatement CopyToJSONPlan(Binder &binder, CopyStatement &stmt) {
13
15
  auto stmt_copy = stmt.Copy();
14
16
  auto &copy = (CopyStatement &)*stmt_copy;
15
- auto &select_stmt = (SelectNode &)*copy.select_statement;
16
17
  auto &info = *copy.info;
17
18
 
18
- // strftime if the user specified a format TODO: deal with date/timestamp within nested types
19
- auto date_it = info.options.find("dateformat");
20
- auto timestamp_it = info.options.find("timestampformat");
21
-
22
19
  // Bind the select statement of the original to resolve the types
23
20
  auto dummy_binder = Binder::CreateBinder(binder.context, &binder, true);
24
21
  auto bound_original = dummy_binder->Bind(*stmt.select_statement);
25
- D_ASSERT(bound_original.types.size() == select_stmt.select_list.size());
26
- const idx_t num_cols = bound_original.types.size();
27
22
 
28
- // This loop also makes sure the columns have an alias (needed for struct_pack)
23
+ // Create new SelectNode with the original SelectNode as a subquery in the FROM clause
24
+ auto select_stmt = make_unique<SelectStatement>();
25
+ select_stmt->node = std::move(copy.select_statement);
26
+ auto subquery_ref = make_unique<SubqueryRef>(std::move(select_stmt));
27
+ copy.select_statement = make_unique_base<QueryNode, SelectNode>();
28
+ auto &new_select_node = (SelectNode &)*copy.select_statement;
29
+ new_select_node.from_table = std::move(subquery_ref);
30
+
31
+ // Create new select list
32
+ vector<unique_ptr<ParsedExpression>> select_list;
33
+ select_list.reserve(bound_original.types.size());
34
+
35
+ // strftime if the user specified a format (loop also gives columns a name, needed for struct_pack)
36
+ // TODO: deal with date/timestamp within nested types
37
+ const auto date_it = info.options.find("dateformat");
38
+ const auto timestamp_it = info.options.find("timestampformat");
29
39
  vector<unique_ptr<ParsedExpression>> strftime_children;
30
- for (idx_t i = 0; i < num_cols; i++) {
40
+ for (idx_t col_idx = 0; col_idx < bound_original.types.size(); col_idx++) {
41
+ auto column = make_unique_base<ParsedExpression, PositionalReferenceExpression>(col_idx + 1);
31
42
  strftime_children.clear();
32
- auto &col = select_stmt.select_list[i];
33
- auto name = col->GetName();
34
- if (bound_original.types[i] == LogicalTypeId::DATE && date_it != info.options.end()) {
35
- strftime_children.emplace_back(std::move(col));
43
+ const auto &type = bound_original.types[col_idx];
44
+ const auto &name = bound_original.names[col_idx];
45
+ if (date_it != info.options.end() && type == LogicalTypeId::DATE) {
46
+ strftime_children.emplace_back(std::move(column));
36
47
  strftime_children.emplace_back(make_unique<ConstantExpression>(date_it->second.back()));
37
- col = make_unique<FunctionExpression>("strftime", std::move(strftime_children));
38
- } else if (bound_original.types[i] == LogicalTypeId::TIMESTAMP && timestamp_it != info.options.end()) {
39
- strftime_children.emplace_back(std::move(col));
48
+ column = make_unique<FunctionExpression>("strftime", std::move(strftime_children));
49
+ } else if (timestamp_it != info.options.end() && type == LogicalTypeId::TIMESTAMP) {
50
+ strftime_children.emplace_back(std::move(column));
40
51
  strftime_children.emplace_back(make_unique<ConstantExpression>(timestamp_it->second.back()));
41
- col = make_unique<FunctionExpression>("strftime", std::move(strftime_children));
52
+ column = make_unique<FunctionExpression>("strftime", std::move(strftime_children));
42
53
  }
43
- col->alias = name;
54
+ column->alias = name;
55
+ select_list.emplace_back(std::move(column));
44
56
  }
45
57
 
46
58
  // Now create the struct_pack/to_json to create a JSON object per row
59
+ auto &select_node = (SelectNode &)*copy.select_statement;
47
60
  vector<unique_ptr<ParsedExpression>> struct_pack_child;
48
- struct_pack_child.emplace_back(make_unique<FunctionExpression>("struct_pack", std::move(select_stmt.select_list)));
49
- select_stmt.select_list.clear();
50
- select_stmt.select_list.emplace_back(make_unique<FunctionExpression>("to_json", std::move(struct_pack_child)));
61
+ struct_pack_child.emplace_back(make_unique<FunctionExpression>("struct_pack", std::move(select_list)));
62
+ select_node.select_list.emplace_back(make_unique<FunctionExpression>("to_json", std::move(struct_pack_child)));
51
63
 
52
64
  // Now we can just use the CSV writer
53
65
  info.format = "csv";
@@ -101,7 +113,8 @@ CreateCopyFunctionInfo JSONFunctions::GetJSONCopyFunction() {
101
113
 
102
114
  function.copy_from_bind = CopyFromJSONBind;
103
115
  function.copy_from_function = JSONFunctions::GetReadJSONTableFunction(
104
- false, make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::AUTO_DETECT, false));
116
+ false,
117
+ make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::AUTO_DETECT, JSONRecordType::RECORDS, false));
105
118
 
106
119
  return CreateCopyFunctionInfo(function);
107
120
  }
@@ -56,7 +56,7 @@ static LogicalType GetJSONType(unordered_map<string, unique_ptr<Vector>> &const_
56
56
  // The nested types need to conform as well
57
57
  case LogicalTypeId::LIST:
58
58
  return LogicalType::LIST(GetJSONType(const_struct_names, ListType::GetChildType(type)));
59
- // Struct and MAP are treated as JSON objects
59
+ // Struct and MAP are treated as JSON values
60
60
  case LogicalTypeId::STRUCT: {
61
61
  child_list_t<LogicalType> child_types;
62
62
  for (const auto &child_type : StructType::GetChildTypes(type)) {
@@ -247,14 +247,14 @@ static void TemplatedCreateValues(yyjson_mut_doc *doc, yyjson_mut_val *vals[], V
247
247
 
248
248
  static void CreateValuesStruct(const JSONCreateFunctionData &info, yyjson_mut_doc *doc, yyjson_mut_val *vals[],
249
249
  Vector &value_v, idx_t count) {
250
- // Structs become objects, therefore we initialize vals to JSON objects
250
+ // Structs become values, therefore we initialize vals to JSON values
251
251
  for (idx_t i = 0; i < count; i++) {
252
252
  vals[i] = yyjson_mut_obj(doc);
253
253
  }
254
254
  // Initialize re-usable array for the nested values
255
255
  auto nested_vals = (yyjson_mut_val **)doc->alc.malloc(doc->alc.ctx, sizeof(yyjson_mut_val *) * count);
256
256
 
257
- // Add the key/value pairs to the objects
257
+ // Add the key/value pairs to the values
258
258
  auto &entries = StructVector::GetEntries(value_v);
259
259
  for (idx_t entry_i = 0; entry_i < entries.size(); entry_i++) {
260
260
  auto &struct_key_v = *info.const_struct_names.at(StructType::GetChildName(value_v.GetType(), entry_i));
@@ -284,7 +284,7 @@ static void CreateValuesMap(const JSONCreateFunctionData &info, yyjson_mut_doc *
284
284
  auto map_val_count = ListVector::GetListSize(value_v);
285
285
  auto nested_vals = (yyjson_mut_val **)doc->alc.malloc(doc->alc.ctx, sizeof(yyjson_mut_val *) * map_val_count);
286
286
  CreateValues(info, doc, nested_vals, map_val_v, map_val_count);
287
- // Add the key/value pairs to the objects
287
+ // Add the key/value pairs to the values
288
288
  UnifiedVectorFormat map_data;
289
289
  value_v.ToUnifiedFormat(count, map_data);
290
290
  auto map_key_list_entries = (list_entry_t *)map_data.data;
@@ -308,7 +308,7 @@ static void CreateValuesMap(const JSONCreateFunctionData &info, yyjson_mut_doc *
308
308
 
309
309
  static void CreateValuesUnion(const JSONCreateFunctionData &info, yyjson_mut_doc *doc, yyjson_mut_val *vals[],
310
310
  Vector &value_v, idx_t count) {
311
- // Structs become objects, therefore we initialize vals to JSON objects
311
+ // Structs become values, therefore we initialize vals to JSON values
312
312
  for (idx_t i = 0; i < count; i++) {
313
313
  vals[i] = yyjson_mut_obj(doc);
314
314
  }
@@ -320,7 +320,7 @@ static void CreateValuesUnion(const JSONCreateFunctionData &info, yyjson_mut_doc
320
320
  UnifiedVectorFormat tag_data;
321
321
  tag_v.ToUnifiedFormat(count, tag_data);
322
322
 
323
- // Add the key/value pairs to the objects
323
+ // Add the key/value pairs to the values
324
324
  for (idx_t member_idx = 0; member_idx < UnionType::GetMemberCount(value_v.GetType()); member_idx++) {
325
325
  auto &member_val_v = UnionVector::GetMember(value_v, member_idx);
326
326
  auto &member_key_v = *info.const_struct_names.at(UnionType::GetMemberName(value_v.GetType(), member_idx));
@@ -425,7 +425,7 @@ static void ObjectFunction(DataChunk &args, ExpressionState &state, Vector &resu
425
425
  auto &lstate = JSONFunctionLocalState::ResetAndGet(state);
426
426
  auto alc = lstate.json_allocator.GetYYJSONAllocator();
427
427
 
428
- // Initialize objects
428
+ // Initialize values
429
429
  const idx_t count = args.size();
430
430
  auto doc = JSONCommon::CreateDocument(alc);
431
431
  yyjson_mut_val *objs[STANDARD_VECTOR_SIZE];
@@ -440,7 +440,7 @@ static void ObjectFunction(DataChunk &args, ExpressionState &state, Vector &resu
440
440
  Vector &value_v = args.data[pair_idx * 2 + 1];
441
441
  CreateKeyValuePairs(info, doc, objs, vals, key_v, value_v, count);
442
442
  }
443
- // Write JSON objects to string
443
+ // Write JSON values to string
444
444
  auto objects = FlatVector::GetData<string_t>(result);
445
445
  for (idx_t i = 0; i < count; i++) {
446
446
  objects[i] = JSONCommon::WriteVal<yyjson_mut_val>(objs[i], alc);
@@ -214,9 +214,6 @@ void JSONStructureNode::RefineCandidateTypesObject(yyjson_val *vals[], idx_t cou
214
214
  }
215
215
  }
216
216
 
217
- if (count > STANDARD_VECTOR_SIZE) {
218
- string_vector.Initialize(false, count);
219
- }
220
217
  for (idx_t child_idx = 0; child_idx < child_count; child_idx++) {
221
218
  desc.children[child_idx].RefineCandidateTypes(child_vals[child_idx], count, string_vector, allocator,
222
219
  date_format_map);
@@ -431,6 +428,10 @@ static inline yyjson_mut_val *ConvertStructureArray(const JSONStructureNode &nod
431
428
  static inline yyjson_mut_val *ConvertStructureObject(const JSONStructureNode &node, yyjson_mut_doc *doc) {
432
429
  D_ASSERT(node.descriptions.size() == 1 && node.descriptions[0].type == LogicalTypeId::STRUCT);
433
430
  auto &desc = node.descriptions[0];
431
+ if (desc.children.empty()) {
432
+ // Empty struct - let's do JSON instead
433
+ return yyjson_mut_str(doc, JSONCommon::JSON_TYPE_NAME);
434
+ }
434
435
 
435
436
  auto obj = yyjson_mut_obj(doc);
436
437
  for (auto &child : desc.children) {
@@ -495,6 +496,10 @@ static LogicalType StructureToTypeObject(ClientContext &context, const JSONStruc
495
496
  idx_t depth) {
496
497
  D_ASSERT(node.descriptions.size() == 1 && node.descriptions[0].type == LogicalTypeId::STRUCT);
497
498
  auto &desc = node.descriptions[0];
499
+ if (desc.children.empty()) {
500
+ // Empty struct - let's do JSON instead
501
+ return JSONCommon::JSONType();
502
+ }
498
503
 
499
504
  child_list_t<LogicalType> child_types;
500
505
  child_types.reserve(desc.children.size());