kuzu 0.10.1.dev42__tar.gz → 0.10.2.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. sdist/PKG-INFO +1 -1
  2. sdist/kuzu-source/CMakeLists.txt +1 -1
  3. sdist/kuzu-source/scripts/antlr4/Cypher.g4 +6 -2
  4. sdist/kuzu-source/scripts/antlr4/hash.md5 +1 -1
  5. sdist/kuzu-source/src/antlr4/Cypher.g4 +6 -2
  6. sdist/kuzu-source/src/binder/bind/bind_ddl.cpp +8 -13
  7. sdist/kuzu-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +14 -2
  8. sdist/kuzu-source/src/catalog/catalog_entry/table_catalog_entry.cpp +10 -5
  9. sdist/kuzu-source/src/catalog/catalog_set.cpp +2 -1
  10. sdist/kuzu-source/src/common/copier_config/csv_reader_config.cpp +26 -0
  11. sdist/kuzu-source/src/common/data_chunk/data_chunk_collection.cpp +2 -13
  12. sdist/kuzu-source/src/common/data_chunk/data_chunk_state.cpp +0 -13
  13. sdist/kuzu-source/src/common/data_chunk/sel_vector.cpp +2 -2
  14. sdist/kuzu-source/src/common/serializer/metadata_writer.cpp +16 -6
  15. sdist/kuzu-source/src/common/utils.cpp +0 -40
  16. sdist/kuzu-source/src/function/cast_from_string_functions.cpp +23 -3
  17. sdist/kuzu-source/src/function/gds/gds.cpp +1 -0
  18. sdist/kuzu-source/src/function/vector_hash_functions.cpp +22 -22
  19. sdist/kuzu-source/src/include/binder/binder.h +1 -1
  20. sdist/kuzu-source/src/include/binder/ddl/bound_alter_info.h +6 -6
  21. sdist/kuzu-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +1 -0
  22. sdist/kuzu-source/src/include/common/constants.h +3 -0
  23. sdist/kuzu-source/src/include/common/copier_config/csv_reader_config.h +4 -2
  24. sdist/kuzu-source/src/include/common/data_chunk/data_chunk_collection.h +7 -13
  25. sdist/kuzu-source/src/include/common/data_chunk/data_chunk_state.h +0 -2
  26. sdist/kuzu-source/src/include/common/data_chunk/sel_vector.h +1 -3
  27. sdist/kuzu-source/src/include/common/enums/alter_type.h +2 -0
  28. sdist/kuzu-source/src/include/common/finally_wrapper.h +14 -0
  29. sdist/kuzu-source/src/include/common/serializer/metadata_writer.h +6 -0
  30. sdist/kuzu-source/src/include/common/utils.h +0 -3
  31. sdist/kuzu-source/src/include/function/hash/vector_hash_functions.h +10 -10
  32. sdist/kuzu-source/src/include/function/table/scan_file_function.h +4 -3
  33. sdist/kuzu-source/src/include/parser/transformer.h +2 -0
  34. sdist/kuzu-source/src/include/processor/operator/aggregate/base_aggregate.h +1 -1
  35. sdist/kuzu-source/src/include/processor/operator/ddl/alter.h +1 -1
  36. sdist/kuzu-source/src/include/processor/operator/persistent/reader/csv/base_csv_reader.h +4 -3
  37. sdist/kuzu-source/src/include/processor/operator/persistent/reader/csv/parallel_csv_reader.h +1 -1
  38. sdist/kuzu-source/src/include/processor/operator/persistent/reader/parquet/parquet_reader.h +1 -1
  39. sdist/kuzu-source/src/include/processor/operator/persistent/rel_batch_insert.h +4 -0
  40. sdist/kuzu-source/src/include/processor/operator/physical_operator.h +0 -2
  41. sdist/kuzu-source/src/include/processor/operator/scan/scan_node_table.h +1 -1
  42. sdist/kuzu-source/src/include/processor/result/base_hash_table.h +1 -2
  43. sdist/kuzu-source/src/include/storage/buffer_manager/buffer_manager.h +2 -0
  44. sdist/kuzu-source/src/include/storage/buffer_manager/page_state.h +9 -0
  45. sdist/kuzu-source/src/include/storage/buffer_manager/spiller.h +1 -1
  46. sdist/kuzu-source/src/include/storage/free_space_manager.h +5 -0
  47. sdist/kuzu-source/src/include/storage/index/hash_index.h +9 -0
  48. sdist/kuzu-source/src/include/storage/local_storage/local_hash_index.h +2 -0
  49. sdist/kuzu-source/src/include/storage/overflow_file.h +2 -4
  50. sdist/kuzu-source/src/include/storage/page_manager.h +4 -0
  51. sdist/kuzu-source/src/include/storage/storage_utils.h +0 -2
  52. sdist/kuzu-source/src/include/storage/table/column.h +27 -54
  53. sdist/kuzu-source/src/include/storage/table/column_reader_writer.h +11 -14
  54. sdist/kuzu-source/src/include/storage/table/dictionary_column.h +10 -14
  55. sdist/kuzu-source/src/include/storage/table/in_memory_exception_chunk.h +2 -2
  56. sdist/kuzu-source/src/include/storage/table/list_column.h +19 -25
  57. sdist/kuzu-source/src/include/storage/table/string_column.h +15 -17
  58. sdist/kuzu-source/src/include/storage/table/struct_column.h +9 -11
  59. sdist/kuzu-source/src/include/storage/table/table.h +1 -1
  60. sdist/kuzu-source/src/include/transaction/transaction.h +2 -1
  61. sdist/kuzu-source/src/parser/transform/transform_ddl.cpp +42 -17
  62. sdist/kuzu-source/src/processor/operator/aggregate/aggregate_hash_table.cpp +3 -3
  63. sdist/kuzu-source/src/processor/operator/aggregate/base_aggregate.cpp +0 -11
  64. sdist/kuzu-source/src/processor/operator/aggregate/hash_aggregate.cpp +1 -1
  65. sdist/kuzu-source/src/processor/operator/aggregate/simple_aggregate.cpp +3 -3
  66. sdist/kuzu-source/src/processor/operator/ddl/alter.cpp +84 -43
  67. sdist/kuzu-source/src/processor/operator/persistent/node_batch_insert.cpp +15 -6
  68. sdist/kuzu-source/src/processor/operator/persistent/reader/csv/base_csv_reader.cpp +1 -1
  69. sdist/kuzu-source/src/processor/operator/persistent/reader/csv/serial_csv_reader.cpp +2 -0
  70. sdist/kuzu-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +1 -1
  71. sdist/kuzu-source/src/processor/operator/persistent/rel_batch_insert.cpp +1 -1
  72. sdist/kuzu-source/src/processor/operator/physical_operator.cpp +1 -7
  73. sdist/kuzu-source/src/processor/result/base_hash_table.cpp +1 -2
  74. sdist/kuzu-source/src/storage/buffer_manager/buffer_manager.cpp +14 -4
  75. sdist/kuzu-source/src/storage/buffer_manager/spiller.cpp +6 -5
  76. sdist/kuzu-source/src/storage/checkpointer.cpp +20 -1
  77. sdist/kuzu-source/src/storage/file_handle.cpp +2 -2
  78. sdist/kuzu-source/src/storage/free_space_manager.cpp +64 -14
  79. sdist/kuzu-source/src/storage/overflow_file.cpp +2 -5
  80. sdist/kuzu-source/src/storage/page_manager.cpp +4 -0
  81. sdist/kuzu-source/src/storage/storage_manager.cpp +0 -5
  82. sdist/kuzu-source/src/storage/storage_utils.cpp +0 -20
  83. sdist/kuzu-source/src/storage/table/column.cpp +38 -44
  84. sdist/kuzu-source/src/storage/table/column_chunk.cpp +5 -5
  85. sdist/kuzu-source/src/storage/table/column_reader_writer.cpp +52 -60
  86. sdist/kuzu-source/src/storage/table/csr_chunked_node_group.cpp +2 -4
  87. sdist/kuzu-source/src/storage/table/dictionary_column.cpp +18 -20
  88. sdist/kuzu-source/src/storage/table/in_memory_exception_chunk.cpp +3 -4
  89. sdist/kuzu-source/src/storage/table/list_column.cpp +58 -69
  90. sdist/kuzu-source/src/storage/table/node_table.cpp +3 -1
  91. sdist/kuzu-source/src/storage/table/string_column.cpp +29 -32
  92. sdist/kuzu-source/src/storage/table/struct_column.cpp +18 -20
  93. sdist/kuzu-source/src/storage/wal/wal_record.cpp +8 -6
  94. sdist/kuzu-source/src/storage/wal/wal_replayer.cpp +2 -2
  95. sdist/kuzu-source/third_party/antlr4_cypher/cypher_parser.cpp +3226 -3117
  96. sdist/kuzu-source/third_party/antlr4_cypher/include/cypher_parser.h +65 -46
  97. sdist/kuzu-source/tools/python_api/src_cpp/py_connection.cpp +43 -0
  98. sdist/kuzu.egg-info/PKG-INFO +1 -1
  99. sdist/pyproject.toml +1 -1
sdist/PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kuzu
3
- Version: 0.10.1.dev42
3
+ Version: 0.10.2.dev1
4
4
  Summary: Highly scalable, extremely fast, easy-to-use embeddable graph database
5
5
  Home-page: https://github.com/kuzudb/kuzu
6
6
  License: MIT License
@@ -1,6 +1,6 @@
1
1
  cmake_minimum_required(VERSION 3.15)
2
2
 
3
- project(Kuzu VERSION 0.10.1.42 LANGUAGES CXX C)
3
+ project(Kuzu VERSION 0.10.2.1 LANGUAGES CXX C)
4
4
 
5
5
  option(SINGLE_THREADED "Single-threaded mode" FALSE)
6
6
  if(SINGLE_THREADED)
@@ -385,7 +385,8 @@ kU_AlterOptions
385
385
  | kU_DropProperty
386
386
  | kU_RenameTable
387
387
  | kU_RenameProperty
388
- | kU_AddFromToConnection;
388
+ | kU_AddFromToConnection
389
+ | kU_DropFromToConnection;
389
390
 
390
391
  kU_AddProperty
391
392
  : ADD SP (kU_IfNotExists SP)? oC_PropertyKeyName SP kU_DataType ( SP kU_Default )? ;
@@ -403,7 +404,10 @@ kU_RenameProperty
403
404
  : RENAME SP oC_PropertyKeyName SP TO SP oC_PropertyKeyName ;
404
405
 
405
406
  kU_AddFromToConnection
406
- : ADD SP kU_FromToConnection ;
407
+ : ADD SP (kU_IfNotExists SP)? kU_FromToConnection ;
408
+
409
+ kU_DropFromToConnection
410
+ : DROP SP (kU_IfExists SP)? kU_FromToConnection ;
407
411
 
408
412
  kU_ColumnDefinitions: kU_ColumnDefinition ( SP? ',' SP? kU_ColumnDefinition )* ;
409
413
 
@@ -1 +1 @@
1
- 567a0f306e010f245ff575499552caac
1
+ 8908fae875ccf47f64fa086eefd53a1a
@@ -148,7 +148,8 @@ kU_AlterOptions
148
148
  | kU_DropProperty
149
149
  | kU_RenameTable
150
150
  | kU_RenameProperty
151
- | kU_AddFromToConnection;
151
+ | kU_AddFromToConnection
152
+ | kU_DropFromToConnection;
152
153
 
153
154
  kU_AddProperty
154
155
  : ADD SP (kU_IfNotExists SP)? oC_PropertyKeyName SP kU_DataType ( SP kU_Default )? ;
@@ -166,7 +167,10 @@ kU_RenameProperty
166
167
  : RENAME SP oC_PropertyKeyName SP TO SP oC_PropertyKeyName ;
167
168
 
168
169
  kU_AddFromToConnection
169
- : ADD SP kU_FromToConnection ;
170
+ : ADD SP (kU_IfNotExists SP)? kU_FromToConnection ;
171
+
172
+ kU_DropFromToConnection
173
+ : DROP SP (kU_IfExists SP)? kU_FromToConnection ;
170
174
 
171
175
  kU_ColumnDefinitions: kU_ColumnDefinition ( SP? ',' SP? kU_ColumnDefinition )* ;
172
176
 
@@ -8,7 +8,6 @@
8
8
  #include "catalog/catalog.h"
9
9
  #include "catalog/catalog_entry/index_catalog_entry.h"
10
10
  #include "catalog/catalog_entry/node_table_catalog_entry.h"
11
- #include "catalog/catalog_entry/rel_group_catalog_entry.h"
12
11
  #include "catalog/catalog_entry/sequence_catalog_entry.h"
13
12
  #include "common/enums/extend_direction_util.h"
14
13
  #include "common/exception/binder.h"
@@ -408,8 +407,9 @@ std::unique_ptr<BoundStatement> Binder::bindAlter(const Statement& statement) {
408
407
  case AlterType::COMMENT: {
409
408
  return bindCommentOn(statement);
410
409
  }
411
- case AlterType::ADD_FROM_TO_CONNECTION: {
412
- return bindAddFromToConnection(statement);
410
+ case AlterType::ADD_FROM_TO_CONNECTION:
411
+ case AlterType::DROP_FROM_TO_CONNECTION: {
412
+ return bindAlterFromToConnection(statement);
413
413
  }
414
414
  default: {
415
415
  KU_UNREACHABLE;
@@ -491,7 +491,8 @@ std::unique_ptr<BoundStatement> Binder::bindCommentOn(const Statement& statement
491
491
  return std::make_unique<BoundAlter>(std::move(boundInfo));
492
492
  }
493
493
 
494
- std::unique_ptr<BoundStatement> Binder::bindAddFromToConnection(const Statement& statement) const {
494
+ std::unique_ptr<BoundStatement> Binder::bindAlterFromToConnection(
495
+ const Statement& statement) const {
495
496
  auto& alter = statement.constCast<Alter>();
496
497
  auto info = alter.getInfo();
497
498
  auto extraInfo = info->extraInfo->constPtrCast<ExtraAddFromToConnection>();
@@ -500,15 +501,9 @@ std::unique_ptr<BoundStatement> Binder::bindAddFromToConnection(const Statement&
500
501
  auto dstTableEntry = bindNodeTableEntry(extraInfo->dstTableName);
501
502
  auto srcTableID = srcTableEntry->getTableID();
502
503
  auto dstTableID = dstTableEntry->getTableID();
503
- auto relGroupEntry = bindRelGroupEntries({tableName})[0]->constPtrCast<RelGroupCatalogEntry>();
504
- if (relGroupEntry->hasRelEntryInfo(srcTableID, dstTableID)) {
505
- throw BinderException{
506
- common::stringFormat("Node table pair: {}->{} already exists in the {} table.",
507
- srcTableEntry->getName(), dstTableEntry->getName(), tableName)};
508
- }
509
- auto boundExtraInfo = std::make_unique<BoundExtraAddFromToConnection>(srcTableID, dstTableID);
510
- auto boundInfo = BoundAlterInfo(AlterType::ADD_FROM_TO_CONNECTION, tableName,
511
- std::move(boundExtraInfo), info->onConflict);
504
+ auto boundExtraInfo = std::make_unique<BoundExtraAlterFromToConnection>(srcTableID, dstTableID);
505
+ auto boundInfo =
506
+ BoundAlterInfo(info->type, tableName, std::move(boundExtraInfo), info->onConflict);
512
507
  return std::make_unique<BoundAlter>(std::move(boundInfo));
513
508
  }
514
509
 
@@ -14,11 +14,23 @@ using namespace kuzu::main;
14
14
  namespace kuzu {
15
15
  namespace catalog {
16
16
 
17
- void RelGroupCatalogEntry::addFromToConnection(common::table_id_t srcTableID,
18
- common::table_id_t dstTableID, common::oid_t oid) {
17
+ void RelGroupCatalogEntry::addFromToConnection(table_id_t srcTableID, table_id_t dstTableID,
18
+ oid_t oid) {
19
19
  relTableInfos.emplace_back(NodeTableIDPair{srcTableID, dstTableID}, oid);
20
20
  }
21
21
 
22
+ void RelGroupCatalogEntry::dropFromToConnection(table_id_t srcTableID, table_id_t dstTableID) {
23
+ auto tmpInfos = relTableInfos;
24
+ relTableInfos.clear();
25
+ for (auto& tmpInfo : tmpInfos) {
26
+ if (tmpInfo.nodePair.srcTableID == srcTableID &&
27
+ tmpInfo.nodePair.dstTableID == dstTableID) {
28
+ continue;
29
+ }
30
+ relTableInfos.emplace_back(tmpInfo);
31
+ }
32
+ }
33
+
22
34
  void RelTableCatalogInfo::serialize(Serializer& ser) const {
23
35
  ser.writeDebuggingInfo("nodePair");
24
36
  ser.serializeValue(nodePair);
@@ -38,11 +38,16 @@ std::unique_ptr<TableCatalogEntry> TableCatalogEntry::alter(transaction_t timest
38
38
  newEntry->setComment(commentInfo.comment);
39
39
  } break;
40
40
  case AlterType::ADD_FROM_TO_CONNECTION: {
41
- auto& fromToConnectionInfo =
42
- *alterInfo.extraInfo->constPtrCast<BoundExtraAddFromToConnection>();
43
- newEntry->ptrCast<RelGroupCatalogEntry>()->addFromToConnection(
44
- fromToConnectionInfo.srcTableID, fromToConnectionInfo.dstTableID,
45
- tables->getNextOIDNoLock());
41
+ auto& connectionInfo =
42
+ *alterInfo.extraInfo->constPtrCast<BoundExtraAlterFromToConnection>();
43
+ newEntry->ptrCast<RelGroupCatalogEntry>()->addFromToConnection(connectionInfo.fromTableID,
44
+ connectionInfo.toTableID, tables->getNextOIDNoLock());
45
+ } break;
46
+ case AlterType::DROP_FROM_TO_CONNECTION: {
47
+ auto& connectionInfo =
48
+ *alterInfo.extraInfo->constPtrCast<BoundExtraAlterFromToConnection>();
49
+ newEntry->ptrCast<RelGroupCatalogEntry>()->dropFromToConnection(connectionInfo.fromTableID,
50
+ connectionInfo.toTableID);
46
51
  } break;
47
52
  default: {
48
53
  KU_UNREACHABLE;
@@ -194,7 +194,8 @@ void CatalogSet::alterTableEntry(Transaction* transaction,
194
194
  case AlterType::ADD_PROPERTY:
195
195
  case AlterType::DROP_PROPERTY:
196
196
  case AlterType::RENAME_PROPERTY:
197
- case AlterType::ADD_FROM_TO_CONNECTION: {
197
+ case AlterType::ADD_FROM_TO_CONNECTION:
198
+ case AlterType::DROP_FROM_TO_CONNECTION: {
198
199
  emplaceNoLock(std::move(newEntry));
199
200
  if (transaction->shouldAppendToUndoBuffer()) {
200
201
  transaction->pushAlterCatalogEntry(*this, *entry, alterInfo);
@@ -5,6 +5,7 @@
5
5
  #include "common/exception/binder.h"
6
6
  #include "common/exception/runtime.h"
7
7
  #include "common/string_utils.h"
8
+ #include "common/types/value/nested.h"
8
9
 
9
10
  namespace kuzu {
10
11
  namespace common {
@@ -74,6 +75,15 @@ static void bindIntParsingOption(CSVReaderConfig& config, const std::string& opt
74
75
  }
75
76
  }
76
77
 
78
+ static void bindListParsingOption(CSVReaderConfig& config, const std::string& optionName,
79
+ const std::vector<std::string>& optionValue) {
80
+ if (optionName == "NULL_STRINGS") {
81
+ config.option.nullStrings = optionValue;
82
+ } else {
83
+ KU_UNREACHABLE;
84
+ }
85
+ }
86
+
77
87
  template<uint64_t size>
78
88
  static bool hasOption(const char* const (&arr)[size], const std::string& option) {
79
89
  return std::find(std::begin(arr), std::end(arr), option) != std::end(arr);
@@ -91,6 +101,10 @@ static bool validateIntParsingOptionName(const std::string& parsingOptionName) {
91
101
  return hasOption(CopyConstants::INT_CSV_PARSING_OPTIONS, parsingOptionName);
92
102
  }
93
103
 
104
+ static bool validateListParsingOptionName(const std::string& parsingOptionName) {
105
+ return hasOption(CopyConstants::LIST_CSV_PARSING_OPTIONS, parsingOptionName);
106
+ }
107
+
94
108
  static bool isValidBooleanOptionValue(const Value& value, const std::string& name) {
95
109
  // Normalize and check if the string is a valid Boolean representation
96
110
  auto strValue = value.toString();
@@ -115,6 +129,7 @@ CSVReaderConfig CSVReaderConfig::construct(const case_insensitive_map_t<Value>&
115
129
  auto isValidStringParsingOption = validateStringParsingOptionName(name);
116
130
  auto isValidBoolParsingOption = validateBoolParsingOptionName(name);
117
131
  auto isValidIntParsingOption = validateIntParsingOptionName(name);
132
+ auto isValidListParsingOption = validateListParsingOptionName(name);
118
133
  if (isValidBoolParsingOption) {
119
134
  bindBoolParsingOption(config, name, isValidBooleanOptionValue(op.second, name));
120
135
  } else if (isValidStringParsingOption) {
@@ -129,6 +144,17 @@ CSVReaderConfig CSVReaderConfig::construct(const case_insensitive_map_t<Value>&
129
144
  stringFormat("The type of csv parsing option {} must be a INT64.", name));
130
145
  }
131
146
  bindIntParsingOption(config, name, op.second.getValue<int64_t>());
147
+ } else if (isValidListParsingOption) {
148
+ if (op.second.getDataType() != LogicalType::LIST(LogicalType::STRING())) {
149
+ throw BinderException(
150
+ stringFormat("The type of csv parsing option {} must be a STRING[].", name));
151
+ }
152
+ std::vector<std::string> optionValues;
153
+ for (auto i = 0u; i < op.second.getChildrenSize(); i++) {
154
+ optionValues.push_back(
155
+ NestedVal::getChildVal(&op.second, i)->getValue<std::string>());
156
+ }
157
+ bindListParsingOption(config, name, optionValues);
132
158
  } else {
133
159
  throw BinderException(stringFormat("Unrecognized csv parsing option: {}.", name));
134
160
  }
@@ -31,18 +31,7 @@ void DataChunkCollection::append(DataChunk& chunk) {
31
31
  }
32
32
  }
33
33
 
34
- void DataChunkCollection::merge(DataChunk chunk) {
35
- if (chunks.empty()) {
36
- initTypes(chunk);
37
- }
38
- KU_ASSERT(chunk.getNumValueVectors() == types.size());
39
- for (auto vectorIdx = 0u; vectorIdx < chunk.getNumValueVectors(); vectorIdx++) {
40
- KU_ASSERT(chunk.getValueVector(vectorIdx).dataType == types[vectorIdx]);
41
- }
42
- chunks.push_back(std::move(chunk));
43
- }
44
-
45
- void DataChunkCollection::initTypes(DataChunk& chunk) {
34
+ void DataChunkCollection::initTypes(const DataChunk& chunk) {
46
35
  types.clear();
47
36
  types.reserve(chunk.getNumValueVectors());
48
37
  for (auto vectorIdx = 0u; vectorIdx < chunk.getNumValueVectors(); vectorIdx++) {
@@ -50,7 +39,7 @@ void DataChunkCollection::initTypes(DataChunk& chunk) {
50
39
  }
51
40
  }
52
41
 
53
- void DataChunkCollection::allocateChunk(DataChunk& chunk) {
42
+ void DataChunkCollection::allocateChunk(const DataChunk& chunk) {
54
43
  if (chunks.empty()) {
55
44
  types.reserve(chunk.getNumValueVectors());
56
45
  for (auto vectorIdx = 0u; vectorIdx < chunk.getNumValueVectors(); vectorIdx++) {
@@ -1,8 +1,6 @@
1
1
  #include "common/data_chunk/data_chunk_state.h"
2
2
 
3
- #include "common/data_chunk/sel_vector.h"
4
3
  #include "common/system_config.h"
5
- #include "common/types/types.h"
6
4
 
7
5
  namespace kuzu {
8
6
  namespace common {
@@ -16,16 +14,5 @@ std::shared_ptr<DataChunkState> DataChunkState::getSingleValueDataChunkState() {
16
14
  return state;
17
15
  }
18
16
 
19
- void DataChunkState::slice(offset_t offset) {
20
- // NOTE: this operation has performance penalty. Ideally we should directly modify selVector
21
- // instead of creating a new one.
22
- auto slicedSelVector = std::make_shared<SelectionVector>(DEFAULT_VECTOR_CAPACITY);
23
- for (auto i = 0u; i < selVector->getSelSize() - offset; i++) {
24
- slicedSelVector->getMutableBuffer()[i] = selVector->operator[](i + offset);
25
- }
26
- slicedSelVector->setToFiltered(selVector->getSelSize() - offset);
27
- selVector = std::move(slicedSelVector);
28
- }
29
-
30
17
  } // namespace common
31
18
  } // namespace kuzu
@@ -18,8 +18,8 @@ static const std::array<sel_t, DEFAULT_VECTOR_CAPACITY> INCREMENTAL_SELECTED_POS
18
18
  return selectedPos;
19
19
  }();
20
20
 
21
- SelectionView::SelectionView(sel_t startPos, sel_t selectedSize)
22
- : selectedPositions{INCREMENTAL_SELECTED_POS.data() + startPos}, selectedSize{selectedSize},
21
+ SelectionView::SelectionView(sel_t selectedSize)
22
+ : selectedPositions{INCREMENTAL_SELECTED_POS.data()}, selectedSize{selectedSize},
23
23
  state{State::STATIC} {}
24
24
 
25
25
  SelectionVector::SelectionVector() : SelectionVector{DEFAULT_VECTOR_CAPACITY} {}
@@ -31,24 +31,34 @@ void MetaWriter::write(const uint8_t* data, uint64_t size) {
31
31
 
32
32
  storage::PageRange MetaWriter::flush(storage::FileHandle* fileHandle,
33
33
  storage::ShadowFile& shadowFile) const {
34
- auto numPagesToFlush = pages.size();
34
+ auto numPagesToFlush = getNumPagesToFlush();
35
35
  auto pageManager = fileHandle->getPageManager();
36
- auto numPages = fileHandle->getNumPages();
37
36
  auto pageRange = pageManager->allocatePageRange(numPagesToFlush);
38
- for (auto i = 0u; i < pageRange.numPages; i++) {
39
- auto pageIdx = pageRange.startPageIdx + i;
40
- auto insertingNewPage = pageIdx >= numPages;
37
+ flush(pageRange, fileHandle, shadowFile);
38
+ return pageRange;
39
+ }
40
+
41
+ void MetaWriter::flush(storage::PageRange allocatedPageRange, storage::FileHandle* fileHandle,
42
+ storage::ShadowFile& shadowFile) const {
43
+ KU_ASSERT(allocatedPageRange.numPages >= getNumPagesToFlush());
44
+ auto numPagesBeforeAllocate = allocatedPageRange.startPageIdx;
45
+ for (auto i = 0u; i < getNumPagesToFlush(); i++) {
46
+ auto pageIdx = allocatedPageRange.startPageIdx + i;
47
+ auto insertingNewPage = pageIdx >= numPagesBeforeAllocate;
41
48
  auto shadowPageAndFrame = storage::ShadowUtils::createShadowVersionIfNecessaryAndPinPage(
42
49
  pageIdx, insertingNewPage, *fileHandle, shadowFile);
43
50
  memcpy(shadowPageAndFrame.frame, pages[i]->getData(), KUZU_PAGE_SIZE);
44
51
  shadowFile.getShadowingFH().unpinPage(shadowPageAndFrame.shadowPage);
45
52
  }
46
- return pageRange;
47
53
  }
48
54
 
49
55
  bool MetaWriter::needNewBuffer(uint64_t size) const {
50
56
  return pages.empty() || pageOffset + size > KUZU_PAGE_SIZE;
51
57
  }
52
58
 
59
+ uint64_t MetaWriter::getPageSize() {
60
+ return KUZU_PAGE_SIZE;
61
+ }
62
+
53
63
  } // namespace common
54
64
  } // namespace kuzu
@@ -25,45 +25,5 @@ bool isLittleEndian() {
25
25
  return *(uint8_t*)&testNumber == 1;
26
26
  }
27
27
 
28
- template<>
29
- bool integerFitsIn<int64_t>(int64_t) {
30
- return true;
31
- }
32
-
33
- template<>
34
- bool integerFitsIn<int32_t>(int64_t val) {
35
- return val >= INT32_MIN && val <= INT32_MAX;
36
- }
37
-
38
- template<>
39
- bool integerFitsIn<int16_t>(int64_t val) {
40
- return val >= INT16_MIN && val <= INT16_MAX;
41
- }
42
-
43
- template<>
44
- bool integerFitsIn<int8_t>(int64_t val) {
45
- return val >= INT8_MIN && val <= INT8_MAX;
46
- }
47
-
48
- template<>
49
- bool integerFitsIn<uint64_t>(int64_t val) {
50
- return val >= 0;
51
- }
52
-
53
- template<>
54
- bool integerFitsIn<uint32_t>(int64_t val) {
55
- return val >= 0 && val <= UINT32_MAX;
56
- }
57
-
58
- template<>
59
- bool integerFitsIn<uint16_t>(int64_t val) {
60
- return val >= 0 && val <= UINT16_MAX;
61
- }
62
-
63
- template<>
64
- bool integerFitsIn<uint8_t>(int64_t val) {
65
- return val >= 0 && val <= UINT8_MAX;
66
- }
67
-
68
28
  } // namespace common
69
29
  } // namespace kuzu
@@ -847,14 +847,34 @@ void CastString::operation(const ku_string_t& input, union_entry_t& result,
847
847
  resultVector, rowToAdd, CSVOption);
848
848
  }
849
849
 
850
+ static void setVectorNull(ValueVector* vector, uint64_t vectorPos, std::string_view strVal,
851
+ const CSVOption* option) {
852
+ auto& type = vector->dataType;
853
+ switch (type.getLogicalTypeID()) {
854
+ case LogicalTypeID::STRING: {
855
+ if (std::any_of(option->nullStrings.begin(), option->nullStrings.end(),
856
+ [&](const std::string& nullStr) { return nullStr == strVal; })) {
857
+ vector->setNull(vectorPos, true /* isNull */);
858
+ return;
859
+ }
860
+ } break;
861
+ default: {
862
+ if (isNull(strVal)) {
863
+ vector->setNull(vectorPos, true /* isNull */);
864
+ return;
865
+ }
866
+ } break;
867
+ }
868
+ vector->setNull(vectorPos, false /* isNull */);
869
+ }
870
+
850
871
  void CastString::copyStringToVector(ValueVector* vector, uint64_t vectorPos,
851
872
  std::string_view strVal, const CSVOption* option) {
852
873
  auto& type = vector->dataType;
853
- if (strVal.empty() || isNull(strVal) || isAnyType(strVal)) {
854
- vector->setNull(vectorPos, true /* isNull */);
874
+ setVectorNull(vector, vectorPos, strVal, option);
875
+ if (vector->isNull(vectorPos)) {
855
876
  return;
856
877
  }
857
- vector->setNull(vectorPos, false /* isNull */);
858
878
  switch (type.getLogicalTypeID()) {
859
879
  case LogicalTypeID::INT128: {
860
880
  int128_t val = 0;
@@ -2,6 +2,7 @@
2
2
 
3
3
  #include "binder/binder.h"
4
4
  #include "binder/query/reading_clause/bound_table_function_call.h"
5
+ #include "catalog/catalog.h"
5
6
  #include "catalog/catalog_entry/rel_group_catalog_entry.h"
6
7
  #include "common/exception/binder.h"
7
8
  #include "graph/graph_entry_set.h"
@@ -1,5 +1,6 @@
1
1
  #include "function/hash/vector_hash_functions.h"
2
2
 
3
+ #include "common/data_chunk/sel_vector.h"
3
4
  #include "common/system_config.h"
4
5
  #include "common/type_utils.h"
5
6
  #include "function/hash/hash_functions.h"
@@ -19,8 +20,8 @@ static void executeOnValue(const ValueVector& operand, sel_t operandPos, ValueVe
19
20
 
20
21
  template<typename OPERAND_TYPE, typename RESULT_TYPE>
21
22
  void UnaryHashFunctionExecutor::execute(const ValueVector& operand,
22
- const SelectionVector& operandSelectVec, ValueVector& result,
23
- const SelectionVector& resultSelectVec) {
23
+ const SelectionView& operandSelectVec, ValueVector& result,
24
+ const SelectionView& resultSelectVec) {
24
25
  auto resultValues = (RESULT_TYPE*)result.getData();
25
26
  if (operand.hasNoNullsGuarantee()) {
26
27
  if (operandSelectVec.isUnfiltered()) {
@@ -69,8 +70,8 @@ static void executeOnValue(const common::ValueVector& left, common::sel_t leftPo
69
70
  result.getValue<RESULT_TYPE>(resultPos));
70
71
  }
71
72
 
72
- static void validateSelState(const common::SelectionVector& leftSelVec,
73
- const common::SelectionVector& rightSelVec, const common::SelectionVector& resultSelVec) {
73
+ static void validateSelState(const SelectionView& leftSelVec, const SelectionView& rightSelVec,
74
+ const SelectionView& resultSelVec) {
74
75
  auto leftSelSize = leftSelVec.getSelSize();
75
76
  auto rightSelSize = rightSelVec.getSelSize();
76
77
  auto resultSelSize = resultSelVec.getSelSize();
@@ -87,9 +88,9 @@ static void validateSelState(const common::SelectionVector& leftSelVec,
87
88
 
88
89
  template<typename LEFT_TYPE, typename RIGHT_TYPE, typename RESULT_TYPE, typename FUNC>
89
90
  void BinaryHashFunctionExecutor::execute(const common::ValueVector& left,
90
- const common::SelectionVector& leftSelVec, const common::ValueVector& right,
91
- const common::SelectionVector& rightSelVec, common::ValueVector& result,
92
- const common::SelectionVector& resultSelVec) {
91
+ const SelectionView& leftSelVec, const common::ValueVector& right,
92
+ const SelectionView& rightSelVec, common::ValueVector& result,
93
+ const SelectionView& resultSelVec) {
93
94
  validateSelState(leftSelVec, rightSelVec, resultSelVec);
94
95
  result.resetAuxiliaryBuffer();
95
96
  if (leftSelVec.getSelSize() != 1 && rightSelVec.getSelSize() != 1) {
@@ -143,8 +144,8 @@ static std::unique_ptr<ValueVector> computeDataVecHash(const ValueVector& operan
143
144
  return hashVector;
144
145
  }
145
146
 
146
- static void finalizeDataVecHash(const ValueVector& operand, const SelectionVector& operandSelVec,
147
- ValueVector& result, const SelectionVector& resultSelVec, ValueVector& tmpHashVec) {
147
+ static void finalizeDataVecHash(const ValueVector& operand, const SelectionView& operandSelVec,
148
+ ValueVector& result, const SelectionView& resultSelVec, ValueVector& tmpHashVec) {
148
149
  for (auto i = 0u; i < operandSelVec.getSelSize(); i++) {
149
150
  auto pos = operandSelVec[i];
150
151
  auto resultPos = resultSelVec[i];
@@ -162,15 +163,14 @@ static void finalizeDataVecHash(const ValueVector& operand, const SelectionVecto
162
163
  }
163
164
  }
164
165
 
165
- static void computeListVectorHash(const ValueVector& operand,
166
- const SelectionVector& operandSelectVec, ValueVector& result,
167
- const SelectionVector& resultSelectVec) {
166
+ static void computeListVectorHash(const ValueVector& operand, const SelectionView& operandSelectVec,
167
+ ValueVector& result, const SelectionView& resultSelectVec) {
168
168
  auto dataVecHash = computeDataVecHash(operand);
169
169
  finalizeDataVecHash(operand, operandSelectVec, result, resultSelectVec, *dataVecHash);
170
170
  }
171
171
 
172
- static void computeStructVecHash(const ValueVector& operand, const SelectionVector& operandSelVec,
173
- ValueVector& result, const SelectionVector& resultSelVec) {
172
+ static void computeStructVecHash(const ValueVector& operand, const SelectionView& operandSelVec,
173
+ ValueVector& result, const SelectionView& resultSelVec) {
174
174
  switch (operand.dataType.getLogicalTypeID()) {
175
175
  case LogicalTypeID::NODE: {
176
176
  KU_ASSERT(0 == common::StructType::getFieldIdx(operand.dataType, InternalKeyword::ID));
@@ -188,12 +188,12 @@ static void computeStructVecHash(const ValueVector& operand, const SelectionVect
188
188
  VectorHashFunction::computeHash(*StructVector::getFieldVector(&operand, 0 /* idx */),
189
189
  operandSelVec, result, resultSelVec);
190
190
  auto tmpHashVector = std::make_unique<ValueVector>(LogicalType::HASH());
191
+ SelectionView tmpSel(resultSelVec.getSelSize());
191
192
  for (auto i = 1u; i < StructType::getNumFields(operand.dataType); i++) {
192
193
  auto fieldVector = StructVector::getFieldVector(&operand, i);
193
- VectorHashFunction::computeHash(*fieldVector, operandSelVec, *tmpHashVector,
194
+ VectorHashFunction::computeHash(*fieldVector, operandSelVec, *tmpHashVector, tmpSel);
195
+ VectorHashFunction::combineHash(*tmpHashVector, tmpSel, result, resultSelVec, result,
194
196
  resultSelVec);
195
- VectorHashFunction::combineHash(*tmpHashVector, resultSelVec, result, resultSelVec,
196
- result, resultSelVec);
197
197
  }
198
198
  } break;
199
199
  default:
@@ -202,8 +202,8 @@ static void computeStructVecHash(const ValueVector& operand, const SelectionVect
202
202
  }
203
203
 
204
204
  void VectorHashFunction::computeHash(const ValueVector& operand,
205
- const SelectionVector& operandSelectVec, ValueVector& result,
206
- const SelectionVector& resultSelectVec) {
205
+ const SelectionView& operandSelectVec, ValueVector& result,
206
+ const SelectionView& resultSelectVec) {
207
207
  result.state = operand.state;
208
208
  KU_ASSERT(result.dataType.getLogicalTypeID() == LogicalType::HASH().getLogicalTypeID());
209
209
  TypeUtils::visit(
@@ -225,9 +225,9 @@ void VectorHashFunction::computeHash(const ValueVector& operand,
225
225
  });
226
226
  }
227
227
 
228
- void VectorHashFunction::combineHash(const ValueVector& left, const SelectionVector& leftSelVec,
229
- const ValueVector& right, const SelectionVector& rightSelVec, ValueVector& result,
230
- const SelectionVector& resultSelVec) {
228
+ void VectorHashFunction::combineHash(const ValueVector& left, const SelectionView& leftSelVec,
229
+ const ValueVector& right, const SelectionView& rightSelVec, ValueVector& result,
230
+ const SelectionView& resultSelVec) {
231
231
  KU_ASSERT(left.dataType.getLogicalTypeID() == LogicalType::HASH().getLogicalTypeID());
232
232
  KU_ASSERT(left.dataType.getLogicalTypeID() == right.dataType.getLogicalTypeID());
233
233
  KU_ASSERT(left.dataType.getLogicalTypeID() == result.dataType.getLogicalTypeID());
@@ -111,7 +111,7 @@ public:
111
111
  std::unique_ptr<BoundStatement> bindDropProperty(const parser::Statement& statement) const;
112
112
  std::unique_ptr<BoundStatement> bindRenameProperty(const parser::Statement& statement) const;
113
113
  std::unique_ptr<BoundStatement> bindCommentOn(const parser::Statement& statement) const;
114
- std::unique_ptr<BoundStatement> bindAddFromToConnection(
114
+ std::unique_ptr<BoundStatement> bindAlterFromToConnection(
115
115
  const parser::Statement& statement) const;
116
116
 
117
117
  std::vector<PropertyDefinition> bindPropertyDefinitions(
@@ -110,14 +110,14 @@ struct BoundExtraCommentInfo final : BoundExtraAlterInfo {
110
110
  }
111
111
  };
112
112
 
113
- struct BoundExtraAddFromToConnection final : BoundExtraAlterInfo {
114
- common::table_id_t srcTableID;
115
- common::table_id_t dstTableID;
113
+ struct BoundExtraAlterFromToConnection final : BoundExtraAlterInfo {
114
+ common::table_id_t fromTableID;
115
+ common::table_id_t toTableID;
116
116
 
117
- BoundExtraAddFromToConnection(common::table_id_t srcTableID, common::table_id_t dstTableID)
118
- : srcTableID{srcTableID}, dstTableID{dstTableID} {}
117
+ BoundExtraAlterFromToConnection(common::table_id_t fromTableID, common::table_id_t toTableID)
118
+ : fromTableID{fromTableID}, toTableID{toTableID} {}
119
119
  std::unique_ptr<BoundExtraAlterInfo> copy() const override {
120
- return std::make_unique<BoundExtraAddFromToConnection>(*this);
120
+ return std::make_unique<BoundExtraAlterFromToConnection>(*this);
121
121
  }
122
122
  };
123
123
 
@@ -80,6 +80,7 @@ public:
80
80
 
81
81
  void addFromToConnection(common::table_id_t srcTableID, common::table_id_t dstTableID,
82
82
  common::oid_t oid);
83
+ void dropFromToConnection(common::table_id_t srcTableID, common::table_id_t dstTableID);
83
84
  void serialize(common::Serializer& serializer) const override;
84
85
  static std::unique_ptr<RelGroupCatalogEntry> deserialize(common::Deserializer& deserializer);
85
86
  std::string toCypher(const ToCypherInfo& info) const override;
@@ -134,11 +134,14 @@ struct CopyConstants {
134
134
  static constexpr std::array DEFAULT_CSV_DELIMITER_SEARCH_SPACE = {',', ';', '\t', '|'};
135
135
  static constexpr std::array DEFAULT_CSV_QUOTE_SEARCH_SPACE = {'"', '\''};
136
136
  static constexpr std::array DEFAULT_CSV_ESCAPE_SEARCH_SPACE = {'"', '\\', '\''};
137
+ static constexpr std::array DEFAULT_CSV_NULL_STRINGS = {""};
137
138
 
138
139
  static constexpr const char* INT_CSV_PARSING_OPTIONS[] = {"SKIP", "SAMPLE_SIZE"};
139
140
  static constexpr uint64_t DEFAULT_CSV_SKIP_NUM = 0;
140
141
  static constexpr uint64_t DEFAULT_CSV_TYPE_DEDUCTION_SAMPLE_SIZE = 256;
141
142
 
143
+ static constexpr const char* LIST_CSV_PARSING_OPTIONS[] = {"NULL_STRINGS"};
144
+
142
145
  // metadata columns used to populate CSV warnings
143
146
  static constexpr std::array SHARED_WARNING_DATA_COLUMN_NAMES = {"blockIdx", "offsetInBlock",
144
147
  "startByteOffset", "endByteOffset"};
@@ -25,6 +25,7 @@ struct CSVOption {
25
25
  bool setDelim;
26
26
  bool setQuote;
27
27
  bool setHeader;
28
+ std::vector<std::string> nullStrings;
28
29
 
29
30
  CSVOption()
30
31
  : escapeChar{CopyConstants::DEFAULT_CSV_ESCAPE_CHAR},
@@ -39,7 +40,8 @@ struct CSVOption {
39
40
  setEscape{CopyConstants::DEFAULT_CSV_SET_DIALECT},
40
41
  setDelim{CopyConstants::DEFAULT_CSV_SET_DIALECT},
41
42
  setQuote{CopyConstants::DEFAULT_CSV_SET_DIALECT},
42
- setHeader{CopyConstants::DEFAULT_CSV_SET_DIALECT} {}
43
+ setHeader{CopyConstants::DEFAULT_CSV_SET_DIALECT},
44
+ nullStrings{CopyConstants::DEFAULT_CSV_NULL_STRINGS[0]} {}
43
45
 
44
46
  EXPLICIT_COPY_DEFAULT_MOVE(CSVOption);
45
47
 
@@ -85,7 +87,7 @@ struct CSVOption {
85
87
  // sampleSize is 0
86
88
  allowUnbracedList{other.allowUnbracedList}, ignoreErrors{other.ignoreErrors},
87
89
  autoDetection{other.autoDetection}, setEscape{other.setEscape}, setDelim{other.setDelim},
88
- setQuote{other.setQuote}, setHeader{other.setHeader} {}
90
+ setQuote{other.setQuote}, setHeader{other.setHeader}, nullStrings{other.nullStrings} {}
89
91
  };
90
92
 
91
93
  struct CSVReaderConfig {