kuzu 0.10.1.dev42__tar.gz → 0.10.2.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdist/PKG-INFO +1 -1
- sdist/kuzu-source/CMakeLists.txt +1 -1
- sdist/kuzu-source/scripts/antlr4/Cypher.g4 +6 -2
- sdist/kuzu-source/scripts/antlr4/hash.md5 +1 -1
- sdist/kuzu-source/src/antlr4/Cypher.g4 +6 -2
- sdist/kuzu-source/src/binder/bind/bind_ddl.cpp +8 -13
- sdist/kuzu-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +14 -2
- sdist/kuzu-source/src/catalog/catalog_entry/table_catalog_entry.cpp +10 -5
- sdist/kuzu-source/src/catalog/catalog_set.cpp +2 -1
- sdist/kuzu-source/src/common/copier_config/csv_reader_config.cpp +26 -0
- sdist/kuzu-source/src/common/data_chunk/data_chunk_collection.cpp +2 -13
- sdist/kuzu-source/src/common/data_chunk/data_chunk_state.cpp +0 -13
- sdist/kuzu-source/src/common/data_chunk/sel_vector.cpp +2 -2
- sdist/kuzu-source/src/common/serializer/metadata_writer.cpp +16 -6
- sdist/kuzu-source/src/common/utils.cpp +0 -40
- sdist/kuzu-source/src/function/cast_from_string_functions.cpp +23 -3
- sdist/kuzu-source/src/function/gds/gds.cpp +1 -0
- sdist/kuzu-source/src/function/vector_hash_functions.cpp +22 -22
- sdist/kuzu-source/src/include/binder/binder.h +1 -1
- sdist/kuzu-source/src/include/binder/ddl/bound_alter_info.h +6 -6
- sdist/kuzu-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +1 -0
- sdist/kuzu-source/src/include/common/constants.h +3 -0
- sdist/kuzu-source/src/include/common/copier_config/csv_reader_config.h +4 -2
- sdist/kuzu-source/src/include/common/data_chunk/data_chunk_collection.h +7 -13
- sdist/kuzu-source/src/include/common/data_chunk/data_chunk_state.h +0 -2
- sdist/kuzu-source/src/include/common/data_chunk/sel_vector.h +1 -3
- sdist/kuzu-source/src/include/common/enums/alter_type.h +2 -0
- sdist/kuzu-source/src/include/common/finally_wrapper.h +14 -0
- sdist/kuzu-source/src/include/common/serializer/metadata_writer.h +6 -0
- sdist/kuzu-source/src/include/common/utils.h +0 -3
- sdist/kuzu-source/src/include/function/hash/vector_hash_functions.h +10 -10
- sdist/kuzu-source/src/include/function/table/scan_file_function.h +4 -3
- sdist/kuzu-source/src/include/parser/transformer.h +2 -0
- sdist/kuzu-source/src/include/processor/operator/aggregate/base_aggregate.h +1 -1
- sdist/kuzu-source/src/include/processor/operator/ddl/alter.h +1 -1
- sdist/kuzu-source/src/include/processor/operator/persistent/reader/csv/base_csv_reader.h +4 -3
- sdist/kuzu-source/src/include/processor/operator/persistent/reader/csv/parallel_csv_reader.h +1 -1
- sdist/kuzu-source/src/include/processor/operator/persistent/reader/parquet/parquet_reader.h +1 -1
- sdist/kuzu-source/src/include/processor/operator/persistent/rel_batch_insert.h +4 -0
- sdist/kuzu-source/src/include/processor/operator/physical_operator.h +0 -2
- sdist/kuzu-source/src/include/processor/operator/scan/scan_node_table.h +1 -1
- sdist/kuzu-source/src/include/processor/result/base_hash_table.h +1 -2
- sdist/kuzu-source/src/include/storage/buffer_manager/buffer_manager.h +2 -0
- sdist/kuzu-source/src/include/storage/buffer_manager/page_state.h +9 -0
- sdist/kuzu-source/src/include/storage/buffer_manager/spiller.h +1 -1
- sdist/kuzu-source/src/include/storage/free_space_manager.h +5 -0
- sdist/kuzu-source/src/include/storage/index/hash_index.h +9 -0
- sdist/kuzu-source/src/include/storage/local_storage/local_hash_index.h +2 -0
- sdist/kuzu-source/src/include/storage/overflow_file.h +2 -4
- sdist/kuzu-source/src/include/storage/page_manager.h +4 -0
- sdist/kuzu-source/src/include/storage/storage_utils.h +0 -2
- sdist/kuzu-source/src/include/storage/table/column.h +27 -54
- sdist/kuzu-source/src/include/storage/table/column_reader_writer.h +11 -14
- sdist/kuzu-source/src/include/storage/table/dictionary_column.h +10 -14
- sdist/kuzu-source/src/include/storage/table/in_memory_exception_chunk.h +2 -2
- sdist/kuzu-source/src/include/storage/table/list_column.h +19 -25
- sdist/kuzu-source/src/include/storage/table/string_column.h +15 -17
- sdist/kuzu-source/src/include/storage/table/struct_column.h +9 -11
- sdist/kuzu-source/src/include/storage/table/table.h +1 -1
- sdist/kuzu-source/src/include/transaction/transaction.h +2 -1
- sdist/kuzu-source/src/parser/transform/transform_ddl.cpp +42 -17
- sdist/kuzu-source/src/processor/operator/aggregate/aggregate_hash_table.cpp +3 -3
- sdist/kuzu-source/src/processor/operator/aggregate/base_aggregate.cpp +0 -11
- sdist/kuzu-source/src/processor/operator/aggregate/hash_aggregate.cpp +1 -1
- sdist/kuzu-source/src/processor/operator/aggregate/simple_aggregate.cpp +3 -3
- sdist/kuzu-source/src/processor/operator/ddl/alter.cpp +84 -43
- sdist/kuzu-source/src/processor/operator/persistent/node_batch_insert.cpp +15 -6
- sdist/kuzu-source/src/processor/operator/persistent/reader/csv/base_csv_reader.cpp +1 -1
- sdist/kuzu-source/src/processor/operator/persistent/reader/csv/serial_csv_reader.cpp +2 -0
- sdist/kuzu-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +1 -1
- sdist/kuzu-source/src/processor/operator/persistent/rel_batch_insert.cpp +1 -1
- sdist/kuzu-source/src/processor/operator/physical_operator.cpp +1 -7
- sdist/kuzu-source/src/processor/result/base_hash_table.cpp +1 -2
- sdist/kuzu-source/src/storage/buffer_manager/buffer_manager.cpp +14 -4
- sdist/kuzu-source/src/storage/buffer_manager/spiller.cpp +6 -5
- sdist/kuzu-source/src/storage/checkpointer.cpp +20 -1
- sdist/kuzu-source/src/storage/file_handle.cpp +2 -2
- sdist/kuzu-source/src/storage/free_space_manager.cpp +64 -14
- sdist/kuzu-source/src/storage/overflow_file.cpp +2 -5
- sdist/kuzu-source/src/storage/page_manager.cpp +4 -0
- sdist/kuzu-source/src/storage/storage_manager.cpp +0 -5
- sdist/kuzu-source/src/storage/storage_utils.cpp +0 -20
- sdist/kuzu-source/src/storage/table/column.cpp +38 -44
- sdist/kuzu-source/src/storage/table/column_chunk.cpp +5 -5
- sdist/kuzu-source/src/storage/table/column_reader_writer.cpp +52 -60
- sdist/kuzu-source/src/storage/table/csr_chunked_node_group.cpp +2 -4
- sdist/kuzu-source/src/storage/table/dictionary_column.cpp +18 -20
- sdist/kuzu-source/src/storage/table/in_memory_exception_chunk.cpp +3 -4
- sdist/kuzu-source/src/storage/table/list_column.cpp +58 -69
- sdist/kuzu-source/src/storage/table/node_table.cpp +3 -1
- sdist/kuzu-source/src/storage/table/string_column.cpp +29 -32
- sdist/kuzu-source/src/storage/table/struct_column.cpp +18 -20
- sdist/kuzu-source/src/storage/wal/wal_record.cpp +8 -6
- sdist/kuzu-source/src/storage/wal/wal_replayer.cpp +2 -2
- sdist/kuzu-source/third_party/antlr4_cypher/cypher_parser.cpp +3226 -3117
- sdist/kuzu-source/third_party/antlr4_cypher/include/cypher_parser.h +65 -46
- sdist/kuzu-source/tools/python_api/src_cpp/py_connection.cpp +43 -0
- sdist/kuzu.egg-info/PKG-INFO +1 -1
- sdist/pyproject.toml +1 -1
sdist/PKG-INFO
CHANGED
sdist/kuzu-source/CMakeLists.txt
CHANGED
@@ -385,7 +385,8 @@ kU_AlterOptions
|
|
385
385
|
| kU_DropProperty
|
386
386
|
| kU_RenameTable
|
387
387
|
| kU_RenameProperty
|
388
|
-
| kU_AddFromToConnection
|
388
|
+
| kU_AddFromToConnection
|
389
|
+
| kU_DropFromToConnection;
|
389
390
|
|
390
391
|
kU_AddProperty
|
391
392
|
: ADD SP (kU_IfNotExists SP)? oC_PropertyKeyName SP kU_DataType ( SP kU_Default )? ;
|
@@ -403,7 +404,10 @@ kU_RenameProperty
|
|
403
404
|
: RENAME SP oC_PropertyKeyName SP TO SP oC_PropertyKeyName ;
|
404
405
|
|
405
406
|
kU_AddFromToConnection
|
406
|
-
: ADD SP kU_FromToConnection ;
|
407
|
+
: ADD SP (kU_IfNotExists SP)? kU_FromToConnection ;
|
408
|
+
|
409
|
+
kU_DropFromToConnection
|
410
|
+
: DROP SP (kU_IfExists SP)? kU_FromToConnection ;
|
407
411
|
|
408
412
|
kU_ColumnDefinitions: kU_ColumnDefinition ( SP? ',' SP? kU_ColumnDefinition )* ;
|
409
413
|
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
8908fae875ccf47f64fa086eefd53a1a
|
@@ -148,7 +148,8 @@ kU_AlterOptions
|
|
148
148
|
| kU_DropProperty
|
149
149
|
| kU_RenameTable
|
150
150
|
| kU_RenameProperty
|
151
|
-
| kU_AddFromToConnection
|
151
|
+
| kU_AddFromToConnection
|
152
|
+
| kU_DropFromToConnection;
|
152
153
|
|
153
154
|
kU_AddProperty
|
154
155
|
: ADD SP (kU_IfNotExists SP)? oC_PropertyKeyName SP kU_DataType ( SP kU_Default )? ;
|
@@ -166,7 +167,10 @@ kU_RenameProperty
|
|
166
167
|
: RENAME SP oC_PropertyKeyName SP TO SP oC_PropertyKeyName ;
|
167
168
|
|
168
169
|
kU_AddFromToConnection
|
169
|
-
: ADD SP kU_FromToConnection ;
|
170
|
+
: ADD SP (kU_IfNotExists SP)? kU_FromToConnection ;
|
171
|
+
|
172
|
+
kU_DropFromToConnection
|
173
|
+
: DROP SP (kU_IfExists SP)? kU_FromToConnection ;
|
170
174
|
|
171
175
|
kU_ColumnDefinitions: kU_ColumnDefinition ( SP? ',' SP? kU_ColumnDefinition )* ;
|
172
176
|
|
@@ -8,7 +8,6 @@
|
|
8
8
|
#include "catalog/catalog.h"
|
9
9
|
#include "catalog/catalog_entry/index_catalog_entry.h"
|
10
10
|
#include "catalog/catalog_entry/node_table_catalog_entry.h"
|
11
|
-
#include "catalog/catalog_entry/rel_group_catalog_entry.h"
|
12
11
|
#include "catalog/catalog_entry/sequence_catalog_entry.h"
|
13
12
|
#include "common/enums/extend_direction_util.h"
|
14
13
|
#include "common/exception/binder.h"
|
@@ -408,8 +407,9 @@ std::unique_ptr<BoundStatement> Binder::bindAlter(const Statement& statement) {
|
|
408
407
|
case AlterType::COMMENT: {
|
409
408
|
return bindCommentOn(statement);
|
410
409
|
}
|
411
|
-
case AlterType::ADD_FROM_TO_CONNECTION:
|
412
|
-
|
410
|
+
case AlterType::ADD_FROM_TO_CONNECTION:
|
411
|
+
case AlterType::DROP_FROM_TO_CONNECTION: {
|
412
|
+
return bindAlterFromToConnection(statement);
|
413
413
|
}
|
414
414
|
default: {
|
415
415
|
KU_UNREACHABLE;
|
@@ -491,7 +491,8 @@ std::unique_ptr<BoundStatement> Binder::bindCommentOn(const Statement& statement
|
|
491
491
|
return std::make_unique<BoundAlter>(std::move(boundInfo));
|
492
492
|
}
|
493
493
|
|
494
|
-
std::unique_ptr<BoundStatement> Binder::
|
494
|
+
std::unique_ptr<BoundStatement> Binder::bindAlterFromToConnection(
|
495
|
+
const Statement& statement) const {
|
495
496
|
auto& alter = statement.constCast<Alter>();
|
496
497
|
auto info = alter.getInfo();
|
497
498
|
auto extraInfo = info->extraInfo->constPtrCast<ExtraAddFromToConnection>();
|
@@ -500,15 +501,9 @@ std::unique_ptr<BoundStatement> Binder::bindAddFromToConnection(const Statement&
|
|
500
501
|
auto dstTableEntry = bindNodeTableEntry(extraInfo->dstTableName);
|
501
502
|
auto srcTableID = srcTableEntry->getTableID();
|
502
503
|
auto dstTableID = dstTableEntry->getTableID();
|
503
|
-
auto
|
504
|
-
|
505
|
-
|
506
|
-
common::stringFormat("Node table pair: {}->{} already exists in the {} table.",
|
507
|
-
srcTableEntry->getName(), dstTableEntry->getName(), tableName)};
|
508
|
-
}
|
509
|
-
auto boundExtraInfo = std::make_unique<BoundExtraAddFromToConnection>(srcTableID, dstTableID);
|
510
|
-
auto boundInfo = BoundAlterInfo(AlterType::ADD_FROM_TO_CONNECTION, tableName,
|
511
|
-
std::move(boundExtraInfo), info->onConflict);
|
504
|
+
auto boundExtraInfo = std::make_unique<BoundExtraAlterFromToConnection>(srcTableID, dstTableID);
|
505
|
+
auto boundInfo =
|
506
|
+
BoundAlterInfo(info->type, tableName, std::move(boundExtraInfo), info->onConflict);
|
512
507
|
return std::make_unique<BoundAlter>(std::move(boundInfo));
|
513
508
|
}
|
514
509
|
|
@@ -14,11 +14,23 @@ using namespace kuzu::main;
|
|
14
14
|
namespace kuzu {
|
15
15
|
namespace catalog {
|
16
16
|
|
17
|
-
void RelGroupCatalogEntry::addFromToConnection(
|
18
|
-
|
17
|
+
void RelGroupCatalogEntry::addFromToConnection(table_id_t srcTableID, table_id_t dstTableID,
|
18
|
+
oid_t oid) {
|
19
19
|
relTableInfos.emplace_back(NodeTableIDPair{srcTableID, dstTableID}, oid);
|
20
20
|
}
|
21
21
|
|
22
|
+
void RelGroupCatalogEntry::dropFromToConnection(table_id_t srcTableID, table_id_t dstTableID) {
|
23
|
+
auto tmpInfos = relTableInfos;
|
24
|
+
relTableInfos.clear();
|
25
|
+
for (auto& tmpInfo : tmpInfos) {
|
26
|
+
if (tmpInfo.nodePair.srcTableID == srcTableID &&
|
27
|
+
tmpInfo.nodePair.dstTableID == dstTableID) {
|
28
|
+
continue;
|
29
|
+
}
|
30
|
+
relTableInfos.emplace_back(tmpInfo);
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
22
34
|
void RelTableCatalogInfo::serialize(Serializer& ser) const {
|
23
35
|
ser.writeDebuggingInfo("nodePair");
|
24
36
|
ser.serializeValue(nodePair);
|
@@ -38,11 +38,16 @@ std::unique_ptr<TableCatalogEntry> TableCatalogEntry::alter(transaction_t timest
|
|
38
38
|
newEntry->setComment(commentInfo.comment);
|
39
39
|
} break;
|
40
40
|
case AlterType::ADD_FROM_TO_CONNECTION: {
|
41
|
-
auto&
|
42
|
-
*alterInfo.extraInfo->constPtrCast<
|
43
|
-
newEntry->ptrCast<RelGroupCatalogEntry>()->addFromToConnection(
|
44
|
-
|
45
|
-
|
41
|
+
auto& connectionInfo =
|
42
|
+
*alterInfo.extraInfo->constPtrCast<BoundExtraAlterFromToConnection>();
|
43
|
+
newEntry->ptrCast<RelGroupCatalogEntry>()->addFromToConnection(connectionInfo.fromTableID,
|
44
|
+
connectionInfo.toTableID, tables->getNextOIDNoLock());
|
45
|
+
} break;
|
46
|
+
case AlterType::DROP_FROM_TO_CONNECTION: {
|
47
|
+
auto& connectionInfo =
|
48
|
+
*alterInfo.extraInfo->constPtrCast<BoundExtraAlterFromToConnection>();
|
49
|
+
newEntry->ptrCast<RelGroupCatalogEntry>()->dropFromToConnection(connectionInfo.fromTableID,
|
50
|
+
connectionInfo.toTableID);
|
46
51
|
} break;
|
47
52
|
default: {
|
48
53
|
KU_UNREACHABLE;
|
@@ -194,7 +194,8 @@ void CatalogSet::alterTableEntry(Transaction* transaction,
|
|
194
194
|
case AlterType::ADD_PROPERTY:
|
195
195
|
case AlterType::DROP_PROPERTY:
|
196
196
|
case AlterType::RENAME_PROPERTY:
|
197
|
-
case AlterType::ADD_FROM_TO_CONNECTION:
|
197
|
+
case AlterType::ADD_FROM_TO_CONNECTION:
|
198
|
+
case AlterType::DROP_FROM_TO_CONNECTION: {
|
198
199
|
emplaceNoLock(std::move(newEntry));
|
199
200
|
if (transaction->shouldAppendToUndoBuffer()) {
|
200
201
|
transaction->pushAlterCatalogEntry(*this, *entry, alterInfo);
|
@@ -5,6 +5,7 @@
|
|
5
5
|
#include "common/exception/binder.h"
|
6
6
|
#include "common/exception/runtime.h"
|
7
7
|
#include "common/string_utils.h"
|
8
|
+
#include "common/types/value/nested.h"
|
8
9
|
|
9
10
|
namespace kuzu {
|
10
11
|
namespace common {
|
@@ -74,6 +75,15 @@ static void bindIntParsingOption(CSVReaderConfig& config, const std::string& opt
|
|
74
75
|
}
|
75
76
|
}
|
76
77
|
|
78
|
+
static void bindListParsingOption(CSVReaderConfig& config, const std::string& optionName,
|
79
|
+
const std::vector<std::string>& optionValue) {
|
80
|
+
if (optionName == "NULL_STRINGS") {
|
81
|
+
config.option.nullStrings = optionValue;
|
82
|
+
} else {
|
83
|
+
KU_UNREACHABLE;
|
84
|
+
}
|
85
|
+
}
|
86
|
+
|
77
87
|
template<uint64_t size>
|
78
88
|
static bool hasOption(const char* const (&arr)[size], const std::string& option) {
|
79
89
|
return std::find(std::begin(arr), std::end(arr), option) != std::end(arr);
|
@@ -91,6 +101,10 @@ static bool validateIntParsingOptionName(const std::string& parsingOptionName) {
|
|
91
101
|
return hasOption(CopyConstants::INT_CSV_PARSING_OPTIONS, parsingOptionName);
|
92
102
|
}
|
93
103
|
|
104
|
+
static bool validateListParsingOptionName(const std::string& parsingOptionName) {
|
105
|
+
return hasOption(CopyConstants::LIST_CSV_PARSING_OPTIONS, parsingOptionName);
|
106
|
+
}
|
107
|
+
|
94
108
|
static bool isValidBooleanOptionValue(const Value& value, const std::string& name) {
|
95
109
|
// Normalize and check if the string is a valid Boolean representation
|
96
110
|
auto strValue = value.toString();
|
@@ -115,6 +129,7 @@ CSVReaderConfig CSVReaderConfig::construct(const case_insensitive_map_t<Value>&
|
|
115
129
|
auto isValidStringParsingOption = validateStringParsingOptionName(name);
|
116
130
|
auto isValidBoolParsingOption = validateBoolParsingOptionName(name);
|
117
131
|
auto isValidIntParsingOption = validateIntParsingOptionName(name);
|
132
|
+
auto isValidListParsingOption = validateListParsingOptionName(name);
|
118
133
|
if (isValidBoolParsingOption) {
|
119
134
|
bindBoolParsingOption(config, name, isValidBooleanOptionValue(op.second, name));
|
120
135
|
} else if (isValidStringParsingOption) {
|
@@ -129,6 +144,17 @@ CSVReaderConfig CSVReaderConfig::construct(const case_insensitive_map_t<Value>&
|
|
129
144
|
stringFormat("The type of csv parsing option {} must be a INT64.", name));
|
130
145
|
}
|
131
146
|
bindIntParsingOption(config, name, op.second.getValue<int64_t>());
|
147
|
+
} else if (isValidListParsingOption) {
|
148
|
+
if (op.second.getDataType() != LogicalType::LIST(LogicalType::STRING())) {
|
149
|
+
throw BinderException(
|
150
|
+
stringFormat("The type of csv parsing option {} must be a STRING[].", name));
|
151
|
+
}
|
152
|
+
std::vector<std::string> optionValues;
|
153
|
+
for (auto i = 0u; i < op.second.getChildrenSize(); i++) {
|
154
|
+
optionValues.push_back(
|
155
|
+
NestedVal::getChildVal(&op.second, i)->getValue<std::string>());
|
156
|
+
}
|
157
|
+
bindListParsingOption(config, name, optionValues);
|
132
158
|
} else {
|
133
159
|
throw BinderException(stringFormat("Unrecognized csv parsing option: {}.", name));
|
134
160
|
}
|
@@ -31,18 +31,7 @@ void DataChunkCollection::append(DataChunk& chunk) {
|
|
31
31
|
}
|
32
32
|
}
|
33
33
|
|
34
|
-
void DataChunkCollection::
|
35
|
-
if (chunks.empty()) {
|
36
|
-
initTypes(chunk);
|
37
|
-
}
|
38
|
-
KU_ASSERT(chunk.getNumValueVectors() == types.size());
|
39
|
-
for (auto vectorIdx = 0u; vectorIdx < chunk.getNumValueVectors(); vectorIdx++) {
|
40
|
-
KU_ASSERT(chunk.getValueVector(vectorIdx).dataType == types[vectorIdx]);
|
41
|
-
}
|
42
|
-
chunks.push_back(std::move(chunk));
|
43
|
-
}
|
44
|
-
|
45
|
-
void DataChunkCollection::initTypes(DataChunk& chunk) {
|
34
|
+
void DataChunkCollection::initTypes(const DataChunk& chunk) {
|
46
35
|
types.clear();
|
47
36
|
types.reserve(chunk.getNumValueVectors());
|
48
37
|
for (auto vectorIdx = 0u; vectorIdx < chunk.getNumValueVectors(); vectorIdx++) {
|
@@ -50,7 +39,7 @@ void DataChunkCollection::initTypes(DataChunk& chunk) {
|
|
50
39
|
}
|
51
40
|
}
|
52
41
|
|
53
|
-
void DataChunkCollection::allocateChunk(DataChunk& chunk) {
|
42
|
+
void DataChunkCollection::allocateChunk(const DataChunk& chunk) {
|
54
43
|
if (chunks.empty()) {
|
55
44
|
types.reserve(chunk.getNumValueVectors());
|
56
45
|
for (auto vectorIdx = 0u; vectorIdx < chunk.getNumValueVectors(); vectorIdx++) {
|
@@ -1,8 +1,6 @@
|
|
1
1
|
#include "common/data_chunk/data_chunk_state.h"
|
2
2
|
|
3
|
-
#include "common/data_chunk/sel_vector.h"
|
4
3
|
#include "common/system_config.h"
|
5
|
-
#include "common/types/types.h"
|
6
4
|
|
7
5
|
namespace kuzu {
|
8
6
|
namespace common {
|
@@ -16,16 +14,5 @@ std::shared_ptr<DataChunkState> DataChunkState::getSingleValueDataChunkState() {
|
|
16
14
|
return state;
|
17
15
|
}
|
18
16
|
|
19
|
-
void DataChunkState::slice(offset_t offset) {
|
20
|
-
// NOTE: this operation has performance penalty. Ideally we should directly modify selVector
|
21
|
-
// instead of creating a new one.
|
22
|
-
auto slicedSelVector = std::make_shared<SelectionVector>(DEFAULT_VECTOR_CAPACITY);
|
23
|
-
for (auto i = 0u; i < selVector->getSelSize() - offset; i++) {
|
24
|
-
slicedSelVector->getMutableBuffer()[i] = selVector->operator[](i + offset);
|
25
|
-
}
|
26
|
-
slicedSelVector->setToFiltered(selVector->getSelSize() - offset);
|
27
|
-
selVector = std::move(slicedSelVector);
|
28
|
-
}
|
29
|
-
|
30
17
|
} // namespace common
|
31
18
|
} // namespace kuzu
|
@@ -18,8 +18,8 @@ static const std::array<sel_t, DEFAULT_VECTOR_CAPACITY> INCREMENTAL_SELECTED_POS
|
|
18
18
|
return selectedPos;
|
19
19
|
}();
|
20
20
|
|
21
|
-
SelectionView::SelectionView(sel_t
|
22
|
-
: selectedPositions{INCREMENTAL_SELECTED_POS.data()
|
21
|
+
SelectionView::SelectionView(sel_t selectedSize)
|
22
|
+
: selectedPositions{INCREMENTAL_SELECTED_POS.data()}, selectedSize{selectedSize},
|
23
23
|
state{State::STATIC} {}
|
24
24
|
|
25
25
|
SelectionVector::SelectionVector() : SelectionVector{DEFAULT_VECTOR_CAPACITY} {}
|
@@ -31,24 +31,34 @@ void MetaWriter::write(const uint8_t* data, uint64_t size) {
|
|
31
31
|
|
32
32
|
storage::PageRange MetaWriter::flush(storage::FileHandle* fileHandle,
|
33
33
|
storage::ShadowFile& shadowFile) const {
|
34
|
-
auto numPagesToFlush =
|
34
|
+
auto numPagesToFlush = getNumPagesToFlush();
|
35
35
|
auto pageManager = fileHandle->getPageManager();
|
36
|
-
auto numPages = fileHandle->getNumPages();
|
37
36
|
auto pageRange = pageManager->allocatePageRange(numPagesToFlush);
|
38
|
-
|
39
|
-
|
40
|
-
|
37
|
+
flush(pageRange, fileHandle, shadowFile);
|
38
|
+
return pageRange;
|
39
|
+
}
|
40
|
+
|
41
|
+
void MetaWriter::flush(storage::PageRange allocatedPageRange, storage::FileHandle* fileHandle,
|
42
|
+
storage::ShadowFile& shadowFile) const {
|
43
|
+
KU_ASSERT(allocatedPageRange.numPages >= getNumPagesToFlush());
|
44
|
+
auto numPagesBeforeAllocate = allocatedPageRange.startPageIdx;
|
45
|
+
for (auto i = 0u; i < getNumPagesToFlush(); i++) {
|
46
|
+
auto pageIdx = allocatedPageRange.startPageIdx + i;
|
47
|
+
auto insertingNewPage = pageIdx >= numPagesBeforeAllocate;
|
41
48
|
auto shadowPageAndFrame = storage::ShadowUtils::createShadowVersionIfNecessaryAndPinPage(
|
42
49
|
pageIdx, insertingNewPage, *fileHandle, shadowFile);
|
43
50
|
memcpy(shadowPageAndFrame.frame, pages[i]->getData(), KUZU_PAGE_SIZE);
|
44
51
|
shadowFile.getShadowingFH().unpinPage(shadowPageAndFrame.shadowPage);
|
45
52
|
}
|
46
|
-
return pageRange;
|
47
53
|
}
|
48
54
|
|
49
55
|
bool MetaWriter::needNewBuffer(uint64_t size) const {
|
50
56
|
return pages.empty() || pageOffset + size > KUZU_PAGE_SIZE;
|
51
57
|
}
|
52
58
|
|
59
|
+
uint64_t MetaWriter::getPageSize() {
|
60
|
+
return KUZU_PAGE_SIZE;
|
61
|
+
}
|
62
|
+
|
53
63
|
} // namespace common
|
54
64
|
} // namespace kuzu
|
@@ -25,45 +25,5 @@ bool isLittleEndian() {
|
|
25
25
|
return *(uint8_t*)&testNumber == 1;
|
26
26
|
}
|
27
27
|
|
28
|
-
template<>
|
29
|
-
bool integerFitsIn<int64_t>(int64_t) {
|
30
|
-
return true;
|
31
|
-
}
|
32
|
-
|
33
|
-
template<>
|
34
|
-
bool integerFitsIn<int32_t>(int64_t val) {
|
35
|
-
return val >= INT32_MIN && val <= INT32_MAX;
|
36
|
-
}
|
37
|
-
|
38
|
-
template<>
|
39
|
-
bool integerFitsIn<int16_t>(int64_t val) {
|
40
|
-
return val >= INT16_MIN && val <= INT16_MAX;
|
41
|
-
}
|
42
|
-
|
43
|
-
template<>
|
44
|
-
bool integerFitsIn<int8_t>(int64_t val) {
|
45
|
-
return val >= INT8_MIN && val <= INT8_MAX;
|
46
|
-
}
|
47
|
-
|
48
|
-
template<>
|
49
|
-
bool integerFitsIn<uint64_t>(int64_t val) {
|
50
|
-
return val >= 0;
|
51
|
-
}
|
52
|
-
|
53
|
-
template<>
|
54
|
-
bool integerFitsIn<uint32_t>(int64_t val) {
|
55
|
-
return val >= 0 && val <= UINT32_MAX;
|
56
|
-
}
|
57
|
-
|
58
|
-
template<>
|
59
|
-
bool integerFitsIn<uint16_t>(int64_t val) {
|
60
|
-
return val >= 0 && val <= UINT16_MAX;
|
61
|
-
}
|
62
|
-
|
63
|
-
template<>
|
64
|
-
bool integerFitsIn<uint8_t>(int64_t val) {
|
65
|
-
return val >= 0 && val <= UINT8_MAX;
|
66
|
-
}
|
67
|
-
|
68
28
|
} // namespace common
|
69
29
|
} // namespace kuzu
|
@@ -847,14 +847,34 @@ void CastString::operation(const ku_string_t& input, union_entry_t& result,
|
|
847
847
|
resultVector, rowToAdd, CSVOption);
|
848
848
|
}
|
849
849
|
|
850
|
+
static void setVectorNull(ValueVector* vector, uint64_t vectorPos, std::string_view strVal,
|
851
|
+
const CSVOption* option) {
|
852
|
+
auto& type = vector->dataType;
|
853
|
+
switch (type.getLogicalTypeID()) {
|
854
|
+
case LogicalTypeID::STRING: {
|
855
|
+
if (std::any_of(option->nullStrings.begin(), option->nullStrings.end(),
|
856
|
+
[&](const std::string& nullStr) { return nullStr == strVal; })) {
|
857
|
+
vector->setNull(vectorPos, true /* isNull */);
|
858
|
+
return;
|
859
|
+
}
|
860
|
+
} break;
|
861
|
+
default: {
|
862
|
+
if (isNull(strVal)) {
|
863
|
+
vector->setNull(vectorPos, true /* isNull */);
|
864
|
+
return;
|
865
|
+
}
|
866
|
+
} break;
|
867
|
+
}
|
868
|
+
vector->setNull(vectorPos, false /* isNull */);
|
869
|
+
}
|
870
|
+
|
850
871
|
void CastString::copyStringToVector(ValueVector* vector, uint64_t vectorPos,
|
851
872
|
std::string_view strVal, const CSVOption* option) {
|
852
873
|
auto& type = vector->dataType;
|
853
|
-
|
854
|
-
|
874
|
+
setVectorNull(vector, vectorPos, strVal, option);
|
875
|
+
if (vector->isNull(vectorPos)) {
|
855
876
|
return;
|
856
877
|
}
|
857
|
-
vector->setNull(vectorPos, false /* isNull */);
|
858
878
|
switch (type.getLogicalTypeID()) {
|
859
879
|
case LogicalTypeID::INT128: {
|
860
880
|
int128_t val = 0;
|
@@ -1,5 +1,6 @@
|
|
1
1
|
#include "function/hash/vector_hash_functions.h"
|
2
2
|
|
3
|
+
#include "common/data_chunk/sel_vector.h"
|
3
4
|
#include "common/system_config.h"
|
4
5
|
#include "common/type_utils.h"
|
5
6
|
#include "function/hash/hash_functions.h"
|
@@ -19,8 +20,8 @@ static void executeOnValue(const ValueVector& operand, sel_t operandPos, ValueVe
|
|
19
20
|
|
20
21
|
template<typename OPERAND_TYPE, typename RESULT_TYPE>
|
21
22
|
void UnaryHashFunctionExecutor::execute(const ValueVector& operand,
|
22
|
-
const
|
23
|
-
const
|
23
|
+
const SelectionView& operandSelectVec, ValueVector& result,
|
24
|
+
const SelectionView& resultSelectVec) {
|
24
25
|
auto resultValues = (RESULT_TYPE*)result.getData();
|
25
26
|
if (operand.hasNoNullsGuarantee()) {
|
26
27
|
if (operandSelectVec.isUnfiltered()) {
|
@@ -69,8 +70,8 @@ static void executeOnValue(const common::ValueVector& left, common::sel_t leftPo
|
|
69
70
|
result.getValue<RESULT_TYPE>(resultPos));
|
70
71
|
}
|
71
72
|
|
72
|
-
static void validateSelState(const
|
73
|
-
const
|
73
|
+
static void validateSelState(const SelectionView& leftSelVec, const SelectionView& rightSelVec,
|
74
|
+
const SelectionView& resultSelVec) {
|
74
75
|
auto leftSelSize = leftSelVec.getSelSize();
|
75
76
|
auto rightSelSize = rightSelVec.getSelSize();
|
76
77
|
auto resultSelSize = resultSelVec.getSelSize();
|
@@ -87,9 +88,9 @@ static void validateSelState(const common::SelectionVector& leftSelVec,
|
|
87
88
|
|
88
89
|
template<typename LEFT_TYPE, typename RIGHT_TYPE, typename RESULT_TYPE, typename FUNC>
|
89
90
|
void BinaryHashFunctionExecutor::execute(const common::ValueVector& left,
|
90
|
-
const
|
91
|
-
const
|
92
|
-
const
|
91
|
+
const SelectionView& leftSelVec, const common::ValueVector& right,
|
92
|
+
const SelectionView& rightSelVec, common::ValueVector& result,
|
93
|
+
const SelectionView& resultSelVec) {
|
93
94
|
validateSelState(leftSelVec, rightSelVec, resultSelVec);
|
94
95
|
result.resetAuxiliaryBuffer();
|
95
96
|
if (leftSelVec.getSelSize() != 1 && rightSelVec.getSelSize() != 1) {
|
@@ -143,8 +144,8 @@ static std::unique_ptr<ValueVector> computeDataVecHash(const ValueVector& operan
|
|
143
144
|
return hashVector;
|
144
145
|
}
|
145
146
|
|
146
|
-
static void finalizeDataVecHash(const ValueVector& operand, const
|
147
|
-
ValueVector& result, const
|
147
|
+
static void finalizeDataVecHash(const ValueVector& operand, const SelectionView& operandSelVec,
|
148
|
+
ValueVector& result, const SelectionView& resultSelVec, ValueVector& tmpHashVec) {
|
148
149
|
for (auto i = 0u; i < operandSelVec.getSelSize(); i++) {
|
149
150
|
auto pos = operandSelVec[i];
|
150
151
|
auto resultPos = resultSelVec[i];
|
@@ -162,15 +163,14 @@ static void finalizeDataVecHash(const ValueVector& operand, const SelectionVecto
|
|
162
163
|
}
|
163
164
|
}
|
164
165
|
|
165
|
-
static void computeListVectorHash(const ValueVector& operand,
|
166
|
-
|
167
|
-
const SelectionVector& resultSelectVec) {
|
166
|
+
static void computeListVectorHash(const ValueVector& operand, const SelectionView& operandSelectVec,
|
167
|
+
ValueVector& result, const SelectionView& resultSelectVec) {
|
168
168
|
auto dataVecHash = computeDataVecHash(operand);
|
169
169
|
finalizeDataVecHash(operand, operandSelectVec, result, resultSelectVec, *dataVecHash);
|
170
170
|
}
|
171
171
|
|
172
|
-
static void computeStructVecHash(const ValueVector& operand, const
|
173
|
-
ValueVector& result, const
|
172
|
+
static void computeStructVecHash(const ValueVector& operand, const SelectionView& operandSelVec,
|
173
|
+
ValueVector& result, const SelectionView& resultSelVec) {
|
174
174
|
switch (operand.dataType.getLogicalTypeID()) {
|
175
175
|
case LogicalTypeID::NODE: {
|
176
176
|
KU_ASSERT(0 == common::StructType::getFieldIdx(operand.dataType, InternalKeyword::ID));
|
@@ -188,12 +188,12 @@ static void computeStructVecHash(const ValueVector& operand, const SelectionVect
|
|
188
188
|
VectorHashFunction::computeHash(*StructVector::getFieldVector(&operand, 0 /* idx */),
|
189
189
|
operandSelVec, result, resultSelVec);
|
190
190
|
auto tmpHashVector = std::make_unique<ValueVector>(LogicalType::HASH());
|
191
|
+
SelectionView tmpSel(resultSelVec.getSelSize());
|
191
192
|
for (auto i = 1u; i < StructType::getNumFields(operand.dataType); i++) {
|
192
193
|
auto fieldVector = StructVector::getFieldVector(&operand, i);
|
193
|
-
VectorHashFunction::computeHash(*fieldVector, operandSelVec, *tmpHashVector,
|
194
|
+
VectorHashFunction::computeHash(*fieldVector, operandSelVec, *tmpHashVector, tmpSel);
|
195
|
+
VectorHashFunction::combineHash(*tmpHashVector, tmpSel, result, resultSelVec, result,
|
194
196
|
resultSelVec);
|
195
|
-
VectorHashFunction::combineHash(*tmpHashVector, resultSelVec, result, resultSelVec,
|
196
|
-
result, resultSelVec);
|
197
197
|
}
|
198
198
|
} break;
|
199
199
|
default:
|
@@ -202,8 +202,8 @@ static void computeStructVecHash(const ValueVector& operand, const SelectionVect
|
|
202
202
|
}
|
203
203
|
|
204
204
|
void VectorHashFunction::computeHash(const ValueVector& operand,
|
205
|
-
const
|
206
|
-
const
|
205
|
+
const SelectionView& operandSelectVec, ValueVector& result,
|
206
|
+
const SelectionView& resultSelectVec) {
|
207
207
|
result.state = operand.state;
|
208
208
|
KU_ASSERT(result.dataType.getLogicalTypeID() == LogicalType::HASH().getLogicalTypeID());
|
209
209
|
TypeUtils::visit(
|
@@ -225,9 +225,9 @@ void VectorHashFunction::computeHash(const ValueVector& operand,
|
|
225
225
|
});
|
226
226
|
}
|
227
227
|
|
228
|
-
void VectorHashFunction::combineHash(const ValueVector& left, const
|
229
|
-
const ValueVector& right, const
|
230
|
-
const
|
228
|
+
void VectorHashFunction::combineHash(const ValueVector& left, const SelectionView& leftSelVec,
|
229
|
+
const ValueVector& right, const SelectionView& rightSelVec, ValueVector& result,
|
230
|
+
const SelectionView& resultSelVec) {
|
231
231
|
KU_ASSERT(left.dataType.getLogicalTypeID() == LogicalType::HASH().getLogicalTypeID());
|
232
232
|
KU_ASSERT(left.dataType.getLogicalTypeID() == right.dataType.getLogicalTypeID());
|
233
233
|
KU_ASSERT(left.dataType.getLogicalTypeID() == result.dataType.getLogicalTypeID());
|
@@ -111,7 +111,7 @@ public:
|
|
111
111
|
std::unique_ptr<BoundStatement> bindDropProperty(const parser::Statement& statement) const;
|
112
112
|
std::unique_ptr<BoundStatement> bindRenameProperty(const parser::Statement& statement) const;
|
113
113
|
std::unique_ptr<BoundStatement> bindCommentOn(const parser::Statement& statement) const;
|
114
|
-
std::unique_ptr<BoundStatement>
|
114
|
+
std::unique_ptr<BoundStatement> bindAlterFromToConnection(
|
115
115
|
const parser::Statement& statement) const;
|
116
116
|
|
117
117
|
std::vector<PropertyDefinition> bindPropertyDefinitions(
|
@@ -110,14 +110,14 @@ struct BoundExtraCommentInfo final : BoundExtraAlterInfo {
|
|
110
110
|
}
|
111
111
|
};
|
112
112
|
|
113
|
-
struct
|
114
|
-
common::table_id_t
|
115
|
-
common::table_id_t
|
113
|
+
struct BoundExtraAlterFromToConnection final : BoundExtraAlterInfo {
|
114
|
+
common::table_id_t fromTableID;
|
115
|
+
common::table_id_t toTableID;
|
116
116
|
|
117
|
-
|
118
|
-
:
|
117
|
+
BoundExtraAlterFromToConnection(common::table_id_t fromTableID, common::table_id_t toTableID)
|
118
|
+
: fromTableID{fromTableID}, toTableID{toTableID} {}
|
119
119
|
std::unique_ptr<BoundExtraAlterInfo> copy() const override {
|
120
|
-
return std::make_unique<
|
120
|
+
return std::make_unique<BoundExtraAlterFromToConnection>(*this);
|
121
121
|
}
|
122
122
|
};
|
123
123
|
|
@@ -80,6 +80,7 @@ public:
|
|
80
80
|
|
81
81
|
void addFromToConnection(common::table_id_t srcTableID, common::table_id_t dstTableID,
|
82
82
|
common::oid_t oid);
|
83
|
+
void dropFromToConnection(common::table_id_t srcTableID, common::table_id_t dstTableID);
|
83
84
|
void serialize(common::Serializer& serializer) const override;
|
84
85
|
static std::unique_ptr<RelGroupCatalogEntry> deserialize(common::Deserializer& deserializer);
|
85
86
|
std::string toCypher(const ToCypherInfo& info) const override;
|
@@ -134,11 +134,14 @@ struct CopyConstants {
|
|
134
134
|
static constexpr std::array DEFAULT_CSV_DELIMITER_SEARCH_SPACE = {',', ';', '\t', '|'};
|
135
135
|
static constexpr std::array DEFAULT_CSV_QUOTE_SEARCH_SPACE = {'"', '\''};
|
136
136
|
static constexpr std::array DEFAULT_CSV_ESCAPE_SEARCH_SPACE = {'"', '\\', '\''};
|
137
|
+
static constexpr std::array DEFAULT_CSV_NULL_STRINGS = {""};
|
137
138
|
|
138
139
|
static constexpr const char* INT_CSV_PARSING_OPTIONS[] = {"SKIP", "SAMPLE_SIZE"};
|
139
140
|
static constexpr uint64_t DEFAULT_CSV_SKIP_NUM = 0;
|
140
141
|
static constexpr uint64_t DEFAULT_CSV_TYPE_DEDUCTION_SAMPLE_SIZE = 256;
|
141
142
|
|
143
|
+
static constexpr const char* LIST_CSV_PARSING_OPTIONS[] = {"NULL_STRINGS"};
|
144
|
+
|
142
145
|
// metadata columns used to populate CSV warnings
|
143
146
|
static constexpr std::array SHARED_WARNING_DATA_COLUMN_NAMES = {"blockIdx", "offsetInBlock",
|
144
147
|
"startByteOffset", "endByteOffset"};
|
@@ -25,6 +25,7 @@ struct CSVOption {
|
|
25
25
|
bool setDelim;
|
26
26
|
bool setQuote;
|
27
27
|
bool setHeader;
|
28
|
+
std::vector<std::string> nullStrings;
|
28
29
|
|
29
30
|
CSVOption()
|
30
31
|
: escapeChar{CopyConstants::DEFAULT_CSV_ESCAPE_CHAR},
|
@@ -39,7 +40,8 @@ struct CSVOption {
|
|
39
40
|
setEscape{CopyConstants::DEFAULT_CSV_SET_DIALECT},
|
40
41
|
setDelim{CopyConstants::DEFAULT_CSV_SET_DIALECT},
|
41
42
|
setQuote{CopyConstants::DEFAULT_CSV_SET_DIALECT},
|
42
|
-
setHeader{CopyConstants::DEFAULT_CSV_SET_DIALECT}
|
43
|
+
setHeader{CopyConstants::DEFAULT_CSV_SET_DIALECT},
|
44
|
+
nullStrings{CopyConstants::DEFAULT_CSV_NULL_STRINGS[0]} {}
|
43
45
|
|
44
46
|
EXPLICIT_COPY_DEFAULT_MOVE(CSVOption);
|
45
47
|
|
@@ -85,7 +87,7 @@ struct CSVOption {
|
|
85
87
|
// sampleSize is 0
|
86
88
|
allowUnbracedList{other.allowUnbracedList}, ignoreErrors{other.ignoreErrors},
|
87
89
|
autoDetection{other.autoDetection}, setEscape{other.setEscape}, setDelim{other.setDelim},
|
88
|
-
setQuote{other.setQuote}, setHeader{other.setHeader} {}
|
90
|
+
setQuote{other.setQuote}, setHeader{other.setHeader}, nullStrings{other.nullStrings} {}
|
89
91
|
};
|
90
92
|
|
91
93
|
struct CSVReaderConfig {
|