lbug 0.12.3-dev.2 → 0.12.3-dev.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -6
- package/lbug-source/.github/workflows/ci-workflow.yml +9 -2
- package/lbug-source/CMakeLists.txt +15 -6
- package/lbug-source/Makefile +1 -2
- package/lbug-source/README.md +2 -6
- package/lbug-source/benchmark/serializer.py +24 -3
- package/lbug-source/dataset/demo-db/csv/copy.cypher +4 -4
- package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_mapping_city.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_mapping_user.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_nodes_city.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/schema.cypher +4 -0
- package/lbug-source/dataset/demo-db/parquet/copy.cypher +4 -4
- package/lbug-source/extension/httpfs/test/test_files/http.test +1 -0
- package/lbug-source/scripts/antlr4/Cypher.g4 +1 -1
- package/lbug-source/scripts/antlr4/hash.md5 +1 -1
- package/lbug-source/scripts/generate_binary_demo.sh +1 -1
- package/lbug-source/src/antlr4/Cypher.g4 +1 -1
- package/lbug-source/src/binder/bind/bind_ddl.cpp +23 -13
- package/lbug-source/src/catalog/catalog.cpp +5 -4
- package/lbug-source/src/catalog/catalog_entry/node_table_catalog_entry.cpp +8 -1
- package/lbug-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +7 -0
- package/lbug-source/src/function/function_collection.cpp +2 -1
- package/lbug-source/src/function/table/CMakeLists.txt +1 -0
- package/lbug-source/src/function/table/disk_size_info.cpp +322 -0
- package/lbug-source/src/include/binder/ddl/bound_create_table_info.h +10 -6
- package/lbug-source/src/include/catalog/catalog_entry/node_table_catalog_entry.h +5 -3
- package/lbug-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +4 -2
- package/lbug-source/src/include/common/constants.h +1 -0
- package/lbug-source/src/include/function/table/simple_table_function.h +6 -0
- package/lbug-source/src/include/optimizer/count_rel_table_optimizer.h +49 -0
- package/lbug-source/src/include/optimizer/logical_operator_visitor.h +6 -0
- package/lbug-source/src/include/parser/ddl/create_table_info.h +3 -1
- package/lbug-source/src/include/planner/operator/logical_operator.h +1 -0
- package/lbug-source/src/include/planner/operator/scan/logical_count_rel_table.h +84 -0
- package/lbug-source/src/include/processor/operator/physical_operator.h +1 -0
- package/lbug-source/src/include/processor/operator/scan/count_rel_table.h +62 -0
- package/lbug-source/src/include/processor/operator/scan/scan_node_table.h +2 -2
- package/lbug-source/src/include/processor/plan_mapper.h +2 -0
- package/lbug-source/src/include/storage/storage_manager.h +1 -0
- package/lbug-source/src/include/storage/storage_version_info.h +1 -7
- package/lbug-source/src/include/storage/table/node_table.h +6 -1
- package/lbug-source/src/include/storage/table/parquet_node_table.h +103 -0
- package/lbug-source/src/include/storage/table/parquet_rel_table.h +91 -0
- package/lbug-source/src/include/storage/table/rel_table.h +2 -2
- package/lbug-source/src/include/transaction/transaction.h +2 -0
- package/lbug-source/src/main/query_result/materialized_query_result.cpp +2 -2
- package/lbug-source/src/optimizer/CMakeLists.txt +1 -0
- package/lbug-source/src/optimizer/count_rel_table_optimizer.cpp +217 -0
- package/lbug-source/src/optimizer/logical_operator_visitor.cpp +6 -0
- package/lbug-source/src/optimizer/optimizer.cpp +6 -0
- package/lbug-source/src/parser/transform/transform_ddl.cpp +6 -1
- package/lbug-source/src/planner/operator/logical_operator.cpp +2 -0
- package/lbug-source/src/planner/operator/scan/CMakeLists.txt +1 -0
- package/lbug-source/src/planner/operator/scan/logical_count_rel_table.cpp +24 -0
- package/lbug-source/src/processor/map/CMakeLists.txt +1 -0
- package/lbug-source/src/processor/map/map_count_rel_table.cpp +55 -0
- package/lbug-source/src/processor/map/plan_mapper.cpp +3 -0
- package/lbug-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +4 -0
- package/lbug-source/src/processor/operator/physical_operator.cpp +2 -0
- package/lbug-source/src/processor/operator/scan/CMakeLists.txt +1 -0
- package/lbug-source/src/processor/operator/scan/count_rel_table.cpp +137 -0
- package/lbug-source/src/processor/operator/scan/scan_multi_rel_tables.cpp +24 -2
- package/lbug-source/src/processor/operator/scan/scan_node_table.cpp +44 -8
- package/lbug-source/src/processor/operator/scan/scan_rel_table.cpp +12 -2
- package/lbug-source/src/storage/storage_manager.cpp +37 -6
- package/lbug-source/src/storage/table/CMakeLists.txt +2 -0
- package/lbug-source/src/storage/table/parquet_node_table.cpp +338 -0
- package/lbug-source/src/storage/table/parquet_rel_table.cpp +388 -0
- package/lbug-source/test/api/api_test.cpp +18 -0
- package/lbug-source/test/common/string_format.cpp +9 -1
- package/lbug-source/test/copy/copy_test.cpp +4 -4
- package/lbug-source/test/graph_test/CMakeLists.txt +1 -1
- package/lbug-source/test/include/test_runner/test_group.h +11 -1
- package/lbug-source/test/optimizer/optimizer_test.cpp +46 -0
- package/lbug-source/test/runner/e2e_test.cpp +7 -1
- package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +77 -0
- package/lbug-source/test/test_helper/CMakeLists.txt +1 -1
- package/lbug-source/test/test_helper/test_helper.cpp +33 -1
- package/lbug-source/test/test_runner/CMakeLists.txt +1 -1
- package/lbug-source/test/test_runner/insert_by_row.cpp +6 -8
- package/lbug-source/test/test_runner/multi_copy_split.cpp +2 -4
- package/lbug-source/test/test_runner/test_parser.cpp +3 -0
- package/lbug-source/test/transaction/checkpoint_test.cpp +1 -1
- package/lbug-source/test/transaction/transaction_test.cpp +19 -15
- package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +2761 -2701
- package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +2 -0
- package/lbug-source/tools/benchmark/count_rel_table.benchmark +5 -0
- package/lbug-source/tools/shell/embedded_shell.cpp +78 -3
- package/lbug-source/tools/shell/include/embedded_shell.h +2 -0
- package/lbug-source/tools/shell/linenoise.cpp +3 -3
- package/lbug-source/tools/shell/test/test_helper.py +1 -1
- package/lbug-source/tools/shell/test/test_shell_basics.py +12 -0
- package/lbug-source/tools/shell/test/test_shell_commands.py +19 -0
- package/package.json +1 -1
- package/prebuilt/lbugjs-darwin-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-x64.node +0 -0
- package/prebuilt/lbugjs-win32-x64.node +0 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#include "planner/operator/scan/logical_count_rel_table.h"
|
|
2
|
+
|
|
3
|
+
namespace lbug {
|
|
4
|
+
namespace planner {
|
|
5
|
+
|
|
6
|
+
void LogicalCountRelTable::computeFactorizedSchema() {
|
|
7
|
+
createEmptySchema();
|
|
8
|
+
// Only output the count expression in a single-state group.
|
|
9
|
+
// This operator is a source - it has no child in the logical plan.
|
|
10
|
+
// The bound node is used internally for scanning but not exposed.
|
|
11
|
+
auto groupPos = schema->createGroup();
|
|
12
|
+
schema->insertToGroupAndScope(countExpr, groupPos);
|
|
13
|
+
schema->setGroupAsSingleState(groupPos);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
void LogicalCountRelTable::computeFlatSchema() {
|
|
17
|
+
createEmptySchema();
|
|
18
|
+
// For flat schema, create a single group with the count expression.
|
|
19
|
+
auto groupPos = schema->createGroup();
|
|
20
|
+
schema->insertToGroupAndScope(countExpr, groupPos);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
} // namespace planner
|
|
24
|
+
} // namespace lbug
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#include "planner/operator/scan/logical_count_rel_table.h"
|
|
2
|
+
#include "processor/operator/scan/count_rel_table.h"
|
|
3
|
+
#include "processor/plan_mapper.h"
|
|
4
|
+
#include "storage/storage_manager.h"
|
|
5
|
+
|
|
6
|
+
using namespace lbug::common;
|
|
7
|
+
using namespace lbug::planner;
|
|
8
|
+
using namespace lbug::storage;
|
|
9
|
+
|
|
10
|
+
namespace lbug {
|
|
11
|
+
namespace processor {
|
|
12
|
+
|
|
13
|
+
std::unique_ptr<PhysicalOperator> PlanMapper::mapCountRelTable(
|
|
14
|
+
const LogicalOperator* logicalOperator) {
|
|
15
|
+
auto& logicalCountRelTable = logicalOperator->constCast<LogicalCountRelTable>();
|
|
16
|
+
auto outSchema = logicalCountRelTable.getSchema();
|
|
17
|
+
|
|
18
|
+
auto storageManager = StorageManager::Get(*clientContext);
|
|
19
|
+
|
|
20
|
+
// Get the node tables for scanning bound nodes
|
|
21
|
+
std::vector<NodeTable*> nodeTables;
|
|
22
|
+
for (auto tableID : logicalCountRelTable.getBoundNodeTableIDs()) {
|
|
23
|
+
nodeTables.push_back(storageManager->getTable(tableID)->ptrCast<NodeTable>());
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Get the rel tables
|
|
27
|
+
std::vector<RelTable*> relTables;
|
|
28
|
+
for (auto tableID : logicalCountRelTable.getRelTableIDs()) {
|
|
29
|
+
relTables.push_back(storageManager->getTable(tableID)->ptrCast<RelTable>());
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Determine rel data direction from extend direction
|
|
33
|
+
auto extendDirection = logicalCountRelTable.getDirection();
|
|
34
|
+
RelDataDirection relDirection;
|
|
35
|
+
if (extendDirection == ExtendDirection::FWD) {
|
|
36
|
+
relDirection = RelDataDirection::FWD;
|
|
37
|
+
} else if (extendDirection == ExtendDirection::BWD) {
|
|
38
|
+
relDirection = RelDataDirection::BWD;
|
|
39
|
+
} else {
|
|
40
|
+
// For BOTH, we'll scan FWD (shouldn't reach here as optimizer filters BOTH)
|
|
41
|
+
relDirection = RelDataDirection::FWD;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Get the output position for the count expression
|
|
45
|
+
auto countOutputPos = getDataPos(*logicalCountRelTable.getCountExpr(), *outSchema);
|
|
46
|
+
|
|
47
|
+
auto printInfo = std::make_unique<CountRelTablePrintInfo>(
|
|
48
|
+
logicalCountRelTable.getRelGroupEntry()->getName());
|
|
49
|
+
|
|
50
|
+
return std::make_unique<CountRelTable>(std::move(nodeTables), std::move(relTables),
|
|
51
|
+
relDirection, countOutputPos, getOperatorID(), std::move(printInfo));
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
} // namespace processor
|
|
55
|
+
} // namespace lbug
|
|
@@ -62,6 +62,9 @@ std::unique_ptr<PhysicalOperator> PlanMapper::mapOperator(const LogicalOperator*
|
|
|
62
62
|
case LogicalOperatorType::COPY_TO: {
|
|
63
63
|
physicalOperator = mapCopyTo(logicalOperator);
|
|
64
64
|
} break;
|
|
65
|
+
case LogicalOperatorType::COUNT_REL_TABLE: {
|
|
66
|
+
physicalOperator = mapCountRelTable(logicalOperator);
|
|
67
|
+
} break;
|
|
65
68
|
case LogicalOperatorType::CREATE_MACRO: {
|
|
66
69
|
physicalOperator = mapCreateMacro(logicalOperator);
|
|
67
70
|
} break;
|
|
@@ -340,6 +340,10 @@ std::unique_ptr<ColumnReader> ParquetReader::createReader() {
|
|
|
340
340
|
throw CopyException{"Root element of Parquet file must be a struct"};
|
|
341
341
|
}
|
|
342
342
|
// LCOV_EXCL_STOP
|
|
343
|
+
// Clear existing column metadata before populating (in case createReader is called multiple
|
|
344
|
+
// times)
|
|
345
|
+
columnNames.clear();
|
|
346
|
+
columnTypes.clear();
|
|
343
347
|
for (auto& field : StructType::getFields(rootReader->getDataType())) {
|
|
344
348
|
columnNames.push_back(field.getName());
|
|
345
349
|
columnTypes.push_back(field.getType().copy());
|
|
@@ -27,6 +27,8 @@ std::string PhysicalOperatorUtils::operatorTypeToString(PhysicalOperatorType ope
|
|
|
27
27
|
return "BATCH_INSERT";
|
|
28
28
|
case PhysicalOperatorType::COPY_TO:
|
|
29
29
|
return "COPY_TO";
|
|
30
|
+
case PhysicalOperatorType::COUNT_REL_TABLE:
|
|
31
|
+
return "COUNT_REL_TABLE";
|
|
30
32
|
case PhysicalOperatorType::CREATE_MACRO:
|
|
31
33
|
return "CREATE_MACRO";
|
|
32
34
|
case PhysicalOperatorType::CREATE_SEQUENCE:
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
#include "processor/operator/scan/count_rel_table.h"
|
|
2
|
+
|
|
3
|
+
#include "common/system_config.h"
|
|
4
|
+
#include "main/client_context.h"
|
|
5
|
+
#include "main/database.h"
|
|
6
|
+
#include "processor/execution_context.h"
|
|
7
|
+
#include "storage/buffer_manager/memory_manager.h"
|
|
8
|
+
#include "storage/local_storage/local_rel_table.h"
|
|
9
|
+
#include "storage/local_storage/local_storage.h"
|
|
10
|
+
#include "storage/table/column.h"
|
|
11
|
+
#include "storage/table/column_chunk_data.h"
|
|
12
|
+
#include "storage/table/csr_chunked_node_group.h"
|
|
13
|
+
#include "storage/table/csr_node_group.h"
|
|
14
|
+
#include "storage/table/rel_table_data.h"
|
|
15
|
+
#include "transaction/transaction.h"
|
|
16
|
+
|
|
17
|
+
using namespace lbug::common;
|
|
18
|
+
using namespace lbug::storage;
|
|
19
|
+
using namespace lbug::transaction;
|
|
20
|
+
|
|
21
|
+
namespace lbug {
|
|
22
|
+
namespace processor {
|
|
23
|
+
|
|
24
|
+
void CountRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* /*context*/) {
|
|
25
|
+
countVector = resultSet->getValueVector(countOutputPos).get();
|
|
26
|
+
hasExecuted = false;
|
|
27
|
+
totalCount = 0;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Count rels by using CSR metadata, accounting for deletions and uncommitted data.
|
|
31
|
+
// This is more efficient than scanning through all edges.
|
|
32
|
+
bool CountRelTable::getNextTuplesInternal(ExecutionContext* context) {
|
|
33
|
+
if (hasExecuted) {
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
auto transaction = Transaction::Get(*context->clientContext);
|
|
38
|
+
auto* memoryManager = context->clientContext->getDatabase()->getMemoryManager();
|
|
39
|
+
|
|
40
|
+
for (auto* relTable : relTables) {
|
|
41
|
+
// Get the RelTableData for the specified direction
|
|
42
|
+
auto* relTableData = relTable->getDirectedTableData(direction);
|
|
43
|
+
auto numNodeGroups = relTableData->getNumNodeGroups();
|
|
44
|
+
auto* csrLengthColumn = relTableData->getCSRLengthColumn();
|
|
45
|
+
|
|
46
|
+
// For each node group in the rel table
|
|
47
|
+
for (node_group_idx_t nodeGroupIdx = 0; nodeGroupIdx < numNodeGroups; nodeGroupIdx++) {
|
|
48
|
+
auto* nodeGroup = relTableData->getNodeGroup(nodeGroupIdx);
|
|
49
|
+
if (!nodeGroup) {
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
auto& csrNodeGroup = nodeGroup->cast<CSRNodeGroup>();
|
|
54
|
+
|
|
55
|
+
// Count from persistent (checkpointed) data
|
|
56
|
+
if (auto* persistentGroup = csrNodeGroup.getPersistentChunkedGroup()) {
|
|
57
|
+
// Sum the actual relationship lengths from the CSR header instead of using
|
|
58
|
+
// getNumRows() which includes dummy rows added for CSR offset array gaps
|
|
59
|
+
auto& csrPersistentGroup = persistentGroup->cast<ChunkedCSRNodeGroup>();
|
|
60
|
+
auto& csrHeader = csrPersistentGroup.getCSRHeader();
|
|
61
|
+
|
|
62
|
+
// Get the number of nodes in this CSR header
|
|
63
|
+
auto numNodes = csrHeader.length->getNumValues();
|
|
64
|
+
if (numNodes == 0) {
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Create an in-memory chunk to scan the CSR length column into
|
|
69
|
+
auto lengthChunk =
|
|
70
|
+
ColumnChunkFactory::createColumnChunkData(*memoryManager, LogicalType::UINT64(),
|
|
71
|
+
false /*enableCompression*/, StorageConfig::NODE_GROUP_SIZE,
|
|
72
|
+
ResidencyState::IN_MEMORY, false /*initializeToZero*/);
|
|
73
|
+
|
|
74
|
+
// Initialize scan state and scan the length column from disk
|
|
75
|
+
ChunkState chunkState;
|
|
76
|
+
csrHeader.length->initializeScanState(chunkState, csrLengthColumn);
|
|
77
|
+
csrLengthColumn->scan(chunkState, lengthChunk.get(), 0 /*offsetInChunk*/, numNodes);
|
|
78
|
+
|
|
79
|
+
// Sum all the lengths
|
|
80
|
+
auto* lengthData = reinterpret_cast<const uint64_t*>(lengthChunk->getData());
|
|
81
|
+
row_idx_t groupRelCount = 0;
|
|
82
|
+
for (offset_t i = 0; i < numNodes; ++i) {
|
|
83
|
+
groupRelCount += lengthData[i];
|
|
84
|
+
}
|
|
85
|
+
totalCount += groupRelCount;
|
|
86
|
+
|
|
87
|
+
// Subtract deletions from persistent data
|
|
88
|
+
if (persistentGroup->hasVersionInfo()) {
|
|
89
|
+
auto numDeletions =
|
|
90
|
+
persistentGroup->getNumDeletions(transaction, 0, groupRelCount);
|
|
91
|
+
totalCount -= numDeletions;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Count in-memory committed data (not yet checkpointed)
|
|
96
|
+
// This data is stored in chunkedGroups within the NodeGroup
|
|
97
|
+
auto numChunkedGroups = csrNodeGroup.getNumChunkedGroups();
|
|
98
|
+
for (node_group_idx_t i = 0; i < numChunkedGroups; i++) {
|
|
99
|
+
auto* chunkedGroup = csrNodeGroup.getChunkedNodeGroup(i);
|
|
100
|
+
if (chunkedGroup) {
|
|
101
|
+
auto numRows = chunkedGroup->getNumRows();
|
|
102
|
+
totalCount += numRows;
|
|
103
|
+
// Subtract deletions from in-memory committed data
|
|
104
|
+
if (chunkedGroup->hasVersionInfo()) {
|
|
105
|
+
auto numDeletions = chunkedGroup->getNumDeletions(transaction, 0, numRows);
|
|
106
|
+
totalCount -= numDeletions;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Add uncommitted insertions from local storage
|
|
113
|
+
if (transaction->isWriteTransaction()) {
|
|
114
|
+
if (auto* localTable =
|
|
115
|
+
transaction->getLocalStorage()->getLocalTable(relTable->getTableID())) {
|
|
116
|
+
auto& localRelTable = localTable->cast<LocalRelTable>();
|
|
117
|
+
// Count entries in the CSR index for this direction.
|
|
118
|
+
// We can't use getNumTotalRows() because it includes deleted rows.
|
|
119
|
+
auto& csrIndex = localRelTable.getCSRIndex(direction);
|
|
120
|
+
for (const auto& [nodeOffset, rowIndices] : csrIndex) {
|
|
121
|
+
totalCount += rowIndices.size();
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
hasExecuted = true;
|
|
128
|
+
|
|
129
|
+
// Write the count to the output vector (single value)
|
|
130
|
+
countVector->state->getSelVectorUnsafe().setToUnfiltered(1);
|
|
131
|
+
countVector->setValue<int64_t>(0, static_cast<int64_t>(totalCount));
|
|
132
|
+
|
|
133
|
+
return true;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
} // namespace processor
|
|
137
|
+
} // namespace lbug
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
#include "processor/execution_context.h"
|
|
4
4
|
#include "storage/local_storage/local_storage.h"
|
|
5
|
+
#include "storage/table/parquet_rel_table.h"
|
|
5
6
|
|
|
6
7
|
using namespace lbug::common;
|
|
7
8
|
using namespace lbug::storage;
|
|
@@ -54,8 +55,29 @@ void ScanMultiRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionCo
|
|
|
54
55
|
auto clientContext = context->clientContext;
|
|
55
56
|
boundNodeIDVector = resultSet->getValueVector(opInfo.nodeIDPos).get();
|
|
56
57
|
auto nbrNodeIDVector = outVectors[0];
|
|
57
|
-
|
|
58
|
-
|
|
58
|
+
|
|
59
|
+
// Check if any table in any scanner is a ParquetRelTable
|
|
60
|
+
bool hasParquetTable = false;
|
|
61
|
+
for (auto& [_, scanner] : scanners) {
|
|
62
|
+
for (auto& relInfo : scanner.relInfos) {
|
|
63
|
+
if (dynamic_cast<storage::ParquetRelTable*>(relInfo.table) != nullptr) {
|
|
64
|
+
hasParquetTable = true;
|
|
65
|
+
break;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
if (hasParquetTable)
|
|
69
|
+
break;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Create appropriate scan state type
|
|
73
|
+
if (hasParquetTable) {
|
|
74
|
+
scanState =
|
|
75
|
+
std::make_unique<storage::ParquetRelTableScanState>(*MemoryManager::Get(*clientContext),
|
|
76
|
+
boundNodeIDVector, outVectors, nbrNodeIDVector->state);
|
|
77
|
+
} else {
|
|
78
|
+
scanState = std::make_unique<RelTableScanState>(*MemoryManager::Get(*clientContext),
|
|
79
|
+
boundNodeIDVector, outVectors, nbrNodeIDVector->state);
|
|
80
|
+
}
|
|
59
81
|
for (auto& [_, scanner] : scanners) {
|
|
60
82
|
for (auto& relInfo : scanner.relInfos) {
|
|
61
83
|
if (directionInfo.directionPos.isValid()) {
|
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
#include "binder/expression/expression_util.h"
|
|
4
4
|
#include "processor/execution_context.h"
|
|
5
|
+
#include "storage/buffer_manager/memory_manager.h"
|
|
5
6
|
#include "storage/local_storage/local_node_table.h"
|
|
6
7
|
#include "storage/local_storage/local_storage.h"
|
|
8
|
+
#include "storage/table/parquet_node_table.h"
|
|
7
9
|
|
|
8
10
|
using namespace lbug::common;
|
|
9
11
|
using namespace lbug::storage;
|
|
@@ -35,7 +37,23 @@ void ScanNodeTableSharedState::initialize(const transaction::Transaction* transa
|
|
|
35
37
|
this->table = table;
|
|
36
38
|
this->currentCommittedGroupIdx = 0;
|
|
37
39
|
this->currentUnCommittedGroupIdx = 0;
|
|
38
|
-
|
|
40
|
+
|
|
41
|
+
// Initialize table-specific scan coordination (e.g., for ParquetNodeTable)
|
|
42
|
+
table->initializeScanCoordination(transaction);
|
|
43
|
+
|
|
44
|
+
if (const auto parquetTable = dynamic_cast<ParquetNodeTable*>(table)) {
|
|
45
|
+
// For parquet tables, set numCommittedNodeGroups to number of row groups
|
|
46
|
+
std::vector<bool> columnSkips;
|
|
47
|
+
try {
|
|
48
|
+
auto tempReader = std::make_unique<processor::ParquetReader>(
|
|
49
|
+
parquetTable->getParquetFilePath(), columnSkips, transaction->getClientContext());
|
|
50
|
+
this->numCommittedNodeGroups = tempReader->getNumRowsGroups();
|
|
51
|
+
} catch (const std::exception& e) {
|
|
52
|
+
this->numCommittedNodeGroups = 1;
|
|
53
|
+
}
|
|
54
|
+
} else {
|
|
55
|
+
this->numCommittedNodeGroups = table->getNumCommittedNodeGroups();
|
|
56
|
+
}
|
|
39
57
|
if (transaction->isWriteTransaction()) {
|
|
40
58
|
if (const auto localTable =
|
|
41
59
|
transaction->getLocalStorage()->getLocalTable(this->table->getTableID())) {
|
|
@@ -46,21 +64,23 @@ void ScanNodeTableSharedState::initialize(const transaction::Transaction* transa
|
|
|
46
64
|
progressSharedState.numGroups += numCommittedNodeGroups;
|
|
47
65
|
}
|
|
48
66
|
|
|
49
|
-
void ScanNodeTableSharedState::nextMorsel(
|
|
67
|
+
void ScanNodeTableSharedState::nextMorsel(TableScanState& scanState,
|
|
50
68
|
ScanNodeTableProgressSharedState& progressSharedState) {
|
|
51
69
|
std::unique_lock lck{mtx};
|
|
70
|
+
// Cast to NodeTableScanState since we know this is for node tables
|
|
71
|
+
auto& nodeScanState = scanState.cast<NodeTableScanState>();
|
|
52
72
|
if (currentCommittedGroupIdx < numCommittedNodeGroups) {
|
|
53
|
-
|
|
73
|
+
nodeScanState.nodeGroupIdx = currentCommittedGroupIdx++;
|
|
54
74
|
progressSharedState.numGroupsScanned++;
|
|
55
|
-
|
|
75
|
+
nodeScanState.source = TableScanSource::COMMITTED;
|
|
56
76
|
return;
|
|
57
77
|
}
|
|
58
78
|
if (currentUnCommittedGroupIdx < numUnCommittedNodeGroups) {
|
|
59
|
-
|
|
60
|
-
|
|
79
|
+
nodeScanState.nodeGroupIdx = currentUnCommittedGroupIdx++;
|
|
80
|
+
nodeScanState.source = TableScanSource::UNCOMMITTED;
|
|
61
81
|
return;
|
|
62
82
|
}
|
|
63
|
-
|
|
83
|
+
nodeScanState.source = TableScanSource::NONE;
|
|
64
84
|
}
|
|
65
85
|
|
|
66
86
|
table_id_map_t<SemiMask*> ScanNodeTable::getSemiMasks() const {
|
|
@@ -82,7 +102,18 @@ void ScanNodeTableInfo::initScanState(TableScanState& scanState,
|
|
|
82
102
|
void ScanNodeTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) {
|
|
83
103
|
ScanTable::initLocalStateInternal(resultSet, context);
|
|
84
104
|
auto nodeIDVector = resultSet->getValueVector(opInfo.nodeIDPos).get();
|
|
85
|
-
|
|
105
|
+
|
|
106
|
+
// Check if the first table is a ParquetNodeTable and create appropriate scan state
|
|
107
|
+
auto* parquetTable = dynamic_cast<ParquetNodeTable*>(tableInfos[0].table);
|
|
108
|
+
if (parquetTable) {
|
|
109
|
+
scanState = std::make_unique<ParquetNodeTableScanState>(
|
|
110
|
+
*MemoryManager::Get(*context->clientContext), nodeIDVector, outVectors,
|
|
111
|
+
nodeIDVector->state);
|
|
112
|
+
} else {
|
|
113
|
+
scanState =
|
|
114
|
+
std::make_unique<NodeTableScanState>(nodeIDVector, outVectors, nodeIDVector->state);
|
|
115
|
+
}
|
|
116
|
+
|
|
86
117
|
currentTableIdx = 0;
|
|
87
118
|
initCurrentTable(context);
|
|
88
119
|
}
|
|
@@ -91,6 +122,11 @@ void ScanNodeTable::initCurrentTable(ExecutionContext* context) {
|
|
|
91
122
|
auto& currentInfo = tableInfos[currentTableIdx];
|
|
92
123
|
currentInfo.initScanState(*scanState, outVectors, context->clientContext);
|
|
93
124
|
scanState->semiMask = sharedStates[currentTableIdx]->getSemiMask();
|
|
125
|
+
// Call table->initScanState for ParquetNodeTable
|
|
126
|
+
if (dynamic_cast<ParquetNodeTable*>(tableInfos[currentTableIdx].table)) {
|
|
127
|
+
auto transaction = transaction::Transaction::Get(*context->clientContext);
|
|
128
|
+
tableInfos[currentTableIdx].table->initScanState(transaction, *scanState);
|
|
129
|
+
}
|
|
94
130
|
}
|
|
95
131
|
|
|
96
132
|
void ScanNodeTable::initGlobalStateInternal(ExecutionContext* context) {
|
|
@@ -2,7 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
#include "binder/expression/expression_util.h"
|
|
4
4
|
#include "processor/execution_context.h"
|
|
5
|
+
#include "storage/buffer_manager/memory_manager.h"
|
|
5
6
|
#include "storage/local_storage/local_rel_table.h"
|
|
7
|
+
#include "storage/table/parquet_rel_table.h"
|
|
6
8
|
|
|
7
9
|
using namespace lbug::common;
|
|
8
10
|
using namespace lbug::storage;
|
|
@@ -66,8 +68,16 @@ void ScanRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext
|
|
|
66
68
|
auto clientContext = context->clientContext;
|
|
67
69
|
auto boundNodeIDVector = resultSet->getValueVector(opInfo.nodeIDPos).get();
|
|
68
70
|
auto nbrNodeIDVector = outVectors[0];
|
|
69
|
-
|
|
70
|
-
|
|
71
|
+
// Check if this is a ParquetRelTable and create appropriate scan state
|
|
72
|
+
auto* parquetTable = dynamic_cast<storage::ParquetRelTable*>(tableInfo.table);
|
|
73
|
+
if (parquetTable) {
|
|
74
|
+
scanState =
|
|
75
|
+
std::make_unique<storage::ParquetRelTableScanState>(*MemoryManager::Get(*clientContext),
|
|
76
|
+
boundNodeIDVector, outVectors, nbrNodeIDVector->state);
|
|
77
|
+
} else {
|
|
78
|
+
scanState = std::make_unique<RelTableScanState>(*MemoryManager::Get(*clientContext),
|
|
79
|
+
boundNodeIDVector, outVectors, nbrNodeIDVector->state);
|
|
80
|
+
}
|
|
71
81
|
tableInfo.initScanState(*scanState, outVectors, clientContext);
|
|
72
82
|
}
|
|
73
83
|
|
|
@@ -13,6 +13,8 @@
|
|
|
13
13
|
#include "storage/buffer_manager/memory_manager.h"
|
|
14
14
|
#include "storage/checkpointer.h"
|
|
15
15
|
#include "storage/table/node_table.h"
|
|
16
|
+
#include "storage/table/parquet_node_table.h"
|
|
17
|
+
#include "storage/table/parquet_rel_table.h"
|
|
16
18
|
#include "storage/table/rel_table.h"
|
|
17
19
|
#include "storage/wal/wal_replayer.h"
|
|
18
20
|
#include "transaction/transaction.h"
|
|
@@ -77,15 +79,30 @@ void StorageManager::recover(main::ClientContext& clientContext, bool throwOnWal
|
|
|
77
79
|
}
|
|
78
80
|
|
|
79
81
|
void StorageManager::createNodeTable(NodeTableCatalogEntry* entry) {
|
|
80
|
-
|
|
82
|
+
tableNameCache[entry->getTableID()] = entry->getName();
|
|
83
|
+
if (!entry->getStorage().empty()) {
|
|
84
|
+
// Create parquet-backed node table
|
|
85
|
+
tables[entry->getTableID()] =
|
|
86
|
+
std::make_unique<ParquetNodeTable>(this, entry, &memoryManager);
|
|
87
|
+
} else {
|
|
88
|
+
// Create regular node table
|
|
89
|
+
tables[entry->getTableID()] = std::make_unique<NodeTable>(this, entry, &memoryManager);
|
|
90
|
+
}
|
|
81
91
|
}
|
|
82
92
|
|
|
83
93
|
// TODO(Guodong): This API is added since storageManager doesn't provide an API to add a single
|
|
84
94
|
// rel table. We may have to refactor the existing StorageManager::createTable(TableCatalogEntry*
|
|
85
95
|
// entry).
|
|
86
96
|
void StorageManager::addRelTable(RelGroupCatalogEntry* entry, const RelTableCatalogInfo& info) {
|
|
87
|
-
|
|
88
|
-
|
|
97
|
+
if (!entry->getStorage().empty()) {
|
|
98
|
+
// Create parquet-backed rel table
|
|
99
|
+
tables[info.oid] = std::make_unique<ParquetRelTable>(entry, info.nodePair.srcTableID,
|
|
100
|
+
info.nodePair.dstTableID, this, &memoryManager);
|
|
101
|
+
} else {
|
|
102
|
+
// Create regular rel table
|
|
103
|
+
tables[info.oid] = std::make_unique<RelTable>(entry, info.nodePair.srcTableID,
|
|
104
|
+
info.nodePair.dstTableID, this, &memoryManager);
|
|
105
|
+
}
|
|
89
106
|
}
|
|
90
107
|
|
|
91
108
|
void StorageManager::createRelTableGroup(RelGroupCatalogEntry* entry) {
|
|
@@ -257,7 +274,14 @@ void StorageManager::deserialize(main::ClientContext* context, const Catalog* ca
|
|
|
257
274
|
KU_ASSERT(!tables.contains(tableID));
|
|
258
275
|
auto tableEntry = catalog->getTableCatalogEntry(&DUMMY_TRANSACTION, tableID)
|
|
259
276
|
->ptrCast<NodeTableCatalogEntry>();
|
|
260
|
-
|
|
277
|
+
tableNameCache[tableID] = tableEntry->getName();
|
|
278
|
+
if (!tableEntry->getStorage().empty()) {
|
|
279
|
+
// Create parquet-backed node table
|
|
280
|
+
tables[tableID] = std::make_unique<ParquetNodeTable>(this, tableEntry, &memoryManager);
|
|
281
|
+
} else {
|
|
282
|
+
// Create regular node table
|
|
283
|
+
tables[tableID] = std::make_unique<NodeTable>(this, tableEntry, &memoryManager);
|
|
284
|
+
}
|
|
261
285
|
tables[tableID]->deserialize(context, this, deSer);
|
|
262
286
|
}
|
|
263
287
|
deSer.validateDebuggingInfo(key, "num_rel_groups");
|
|
@@ -279,8 +303,15 @@ void StorageManager::deserialize(main::ClientContext* context, const Catalog* ca
|
|
|
279
303
|
for (auto k = 0u; k < numInnerRelTables; k++) {
|
|
280
304
|
RelTableCatalogInfo info = RelTableCatalogInfo::deserialize(deSer);
|
|
281
305
|
KU_ASSERT(!tables.contains(info.oid));
|
|
282
|
-
|
|
283
|
-
|
|
306
|
+
if (!relGroupEntry->getStorage().empty()) {
|
|
307
|
+
// Create parquet-backed rel table
|
|
308
|
+
tables[info.oid] = std::make_unique<ParquetRelTable>(relGroupEntry,
|
|
309
|
+
info.nodePair.srcTableID, info.nodePair.dstTableID, this, &memoryManager);
|
|
310
|
+
} else {
|
|
311
|
+
// Create regular rel table
|
|
312
|
+
tables[info.oid] = std::make_unique<RelTable>(relGroupEntry,
|
|
313
|
+
info.nodePair.srcTableID, info.nodePair.dstTableID, this, &memoryManager);
|
|
314
|
+
}
|
|
284
315
|
tables.at(info.oid)->deserialize(context, this, deSer);
|
|
285
316
|
}
|
|
286
317
|
}
|