lbug 0.12.3-dev.3 → 0.12.3-dev.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/README.md +2 -6
  2. package/lbug-source/.github/workflows/ci-workflow.yml +9 -2
  3. package/lbug-source/CMakeLists.txt +15 -6
  4. package/lbug-source/Makefile +15 -4
  5. package/lbug-source/README.md +2 -6
  6. package/lbug-source/benchmark/serializer.py +24 -3
  7. package/lbug-source/dataset/demo-db/csv/copy.cypher +4 -4
  8. package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
  9. package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
  10. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
  11. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
  12. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_city.parquet +0 -0
  13. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_user.parquet +0 -0
  14. package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
  15. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_city.parquet +0 -0
  16. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
  17. package/lbug-source/dataset/demo-db/graph-std/schema.cypher +4 -0
  18. package/lbug-source/dataset/demo-db/parquet/copy.cypher +4 -4
  19. package/lbug-source/extension/duckdb/src/catalog/duckdb_catalog.cpp +1 -1
  20. package/lbug-source/extension/duckdb/src/catalog/duckdb_table_catalog_entry.cpp +43 -4
  21. package/lbug-source/extension/duckdb/src/connector/duckdb_result_converter.cpp +6 -0
  22. package/lbug-source/extension/duckdb/src/connector/duckdb_secret_manager.cpp +1 -1
  23. package/lbug-source/extension/duckdb/src/function/duckdb_scan.cpp +49 -4
  24. package/lbug-source/extension/duckdb/src/include/catalog/duckdb_table_catalog_entry.h +6 -1
  25. package/lbug-source/extension/duckdb/src/include/function/duckdb_scan.h +2 -0
  26. package/lbug-source/extension/duckdb/test/test_files/duckdb.test +28 -0
  27. package/lbug-source/extension/extension_config.cmake +3 -2
  28. package/lbug-source/extension/httpfs/test/test_files/http.test +1 -0
  29. package/lbug-source/scripts/antlr4/Cypher.g4 +4 -4
  30. package/lbug-source/scripts/antlr4/hash.md5 +1 -1
  31. package/lbug-source/scripts/generate_binary_demo.sh +1 -1
  32. package/lbug-source/src/antlr4/Cypher.g4 +4 -4
  33. package/lbug-source/src/binder/bind/bind_ddl.cpp +97 -15
  34. package/lbug-source/src/binder/bind/bind_graph_pattern.cpp +30 -3
  35. package/lbug-source/src/catalog/catalog.cpp +6 -4
  36. package/lbug-source/src/catalog/catalog_entry/node_table_catalog_entry.cpp +8 -1
  37. package/lbug-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +46 -7
  38. package/lbug-source/src/catalog/catalog_set.cpp +1 -0
  39. package/lbug-source/src/function/function_collection.cpp +2 -1
  40. package/lbug-source/src/function/table/CMakeLists.txt +1 -0
  41. package/lbug-source/src/function/table/disk_size_info.cpp +322 -0
  42. package/lbug-source/src/function/table/show_connection.cpp +6 -1
  43. package/lbug-source/src/function/table/show_tables.cpp +10 -2
  44. package/lbug-source/src/function/table/table_function.cpp +11 -2
  45. package/lbug-source/src/include/binder/ddl/bound_create_table_info.h +23 -6
  46. package/lbug-source/src/include/catalog/catalog_entry/node_table_catalog_entry.h +5 -3
  47. package/lbug-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +21 -2
  48. package/lbug-source/src/include/catalog/catalog_entry/table_catalog_entry.h +7 -0
  49. package/lbug-source/src/include/common/constants.h +1 -0
  50. package/lbug-source/src/include/common/string_format.h +2 -2
  51. package/lbug-source/src/include/common/types/types.h +1 -0
  52. package/lbug-source/src/include/function/table/bind_data.h +12 -1
  53. package/lbug-source/src/include/function/table/simple_table_function.h +6 -0
  54. package/lbug-source/src/include/function/table/table_function.h +2 -0
  55. package/lbug-source/src/include/optimizer/count_rel_table_optimizer.h +49 -0
  56. package/lbug-source/src/include/optimizer/logical_operator_visitor.h +6 -0
  57. package/lbug-source/src/include/optimizer/order_by_push_down_optimizer.h +21 -0
  58. package/lbug-source/src/include/parser/ddl/create_table_info.h +3 -1
  59. package/lbug-source/src/include/planner/operator/logical_operator.h +1 -0
  60. package/lbug-source/src/include/planner/operator/logical_table_function_call.h +14 -1
  61. package/lbug-source/src/include/planner/operator/scan/logical_count_rel_table.h +84 -0
  62. package/lbug-source/src/include/processor/operator/physical_operator.h +1 -0
  63. package/lbug-source/src/include/processor/operator/scan/count_rel_table.h +62 -0
  64. package/lbug-source/src/include/processor/operator/scan/scan_node_table.h +2 -2
  65. package/lbug-source/src/include/processor/plan_mapper.h +2 -0
  66. package/lbug-source/src/include/storage/storage_manager.h +1 -0
  67. package/lbug-source/src/include/storage/storage_version_info.h +1 -7
  68. package/lbug-source/src/include/storage/table/foreign_rel_table.h +56 -0
  69. package/lbug-source/src/include/storage/table/node_table.h +6 -1
  70. package/lbug-source/src/include/storage/table/parquet_node_table.h +103 -0
  71. package/lbug-source/src/include/storage/table/parquet_rel_table.h +91 -0
  72. package/lbug-source/src/include/storage/table/rel_table.h +2 -2
  73. package/lbug-source/src/include/transaction/transaction.h +2 -0
  74. package/lbug-source/src/main/query_result/materialized_query_result.cpp +2 -2
  75. package/lbug-source/src/optimizer/CMakeLists.txt +3 -1
  76. package/lbug-source/src/optimizer/count_rel_table_optimizer.cpp +217 -0
  77. package/lbug-source/src/optimizer/limit_push_down_optimizer.cpp +12 -0
  78. package/lbug-source/src/optimizer/logical_operator_visitor.cpp +6 -0
  79. package/lbug-source/src/optimizer/optimizer.cpp +10 -0
  80. package/lbug-source/src/optimizer/order_by_push_down_optimizer.cpp +123 -0
  81. package/lbug-source/src/optimizer/projection_push_down_optimizer.cpp +5 -1
  82. package/lbug-source/src/parser/transform/transform_ddl.cpp +6 -1
  83. package/lbug-source/src/parser/transform/transform_expression.cpp +1 -1
  84. package/lbug-source/src/parser/transform/transform_graph_pattern.cpp +6 -1
  85. package/lbug-source/src/parser/transformer.cpp +7 -1
  86. package/lbug-source/src/planner/join_order/cardinality_estimator.cpp +11 -2
  87. package/lbug-source/src/planner/operator/logical_operator.cpp +2 -0
  88. package/lbug-source/src/planner/operator/logical_table_function_call.cpp +4 -0
  89. package/lbug-source/src/planner/operator/scan/CMakeLists.txt +1 -0
  90. package/lbug-source/src/planner/operator/scan/logical_count_rel_table.cpp +24 -0
  91. package/lbug-source/src/planner/plan/plan_join_order.cpp +16 -1
  92. package/lbug-source/src/processor/map/CMakeLists.txt +1 -0
  93. package/lbug-source/src/processor/map/map_count_rel_table.cpp +55 -0
  94. package/lbug-source/src/processor/map/plan_mapper.cpp +3 -0
  95. package/lbug-source/src/processor/operator/index_lookup.cpp +31 -23
  96. package/lbug-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +4 -0
  97. package/lbug-source/src/processor/operator/physical_operator.cpp +2 -0
  98. package/lbug-source/src/processor/operator/scan/CMakeLists.txt +1 -0
  99. package/lbug-source/src/processor/operator/scan/count_rel_table.cpp +137 -0
  100. package/lbug-source/src/processor/operator/scan/scan_multi_rel_tables.cpp +24 -2
  101. package/lbug-source/src/processor/operator/scan/scan_node_table.cpp +44 -8
  102. package/lbug-source/src/processor/operator/scan/scan_rel_table.cpp +18 -2
  103. package/lbug-source/src/storage/storage_manager.cpp +43 -6
  104. package/lbug-source/src/storage/table/CMakeLists.txt +3 -0
  105. package/lbug-source/src/storage/table/foreign_rel_table.cpp +63 -0
  106. package/lbug-source/src/storage/table/parquet_node_table.cpp +338 -0
  107. package/lbug-source/src/storage/table/parquet_rel_table.cpp +388 -0
  108. package/lbug-source/test/api/api_test.cpp +18 -0
  109. package/lbug-source/test/common/string_format.cpp +9 -1
  110. package/lbug-source/test/copy/copy_test.cpp +4 -4
  111. package/lbug-source/test/graph_test/CMakeLists.txt +1 -1
  112. package/lbug-source/test/include/test_runner/test_group.h +11 -1
  113. package/lbug-source/test/optimizer/optimizer_test.cpp +46 -0
  114. package/lbug-source/test/runner/e2e_test.cpp +7 -1
  115. package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +77 -0
  116. package/lbug-source/test/test_helper/CMakeLists.txt +1 -1
  117. package/lbug-source/test/test_helper/test_helper.cpp +33 -1
  118. package/lbug-source/test/test_runner/CMakeLists.txt +1 -1
  119. package/lbug-source/test/test_runner/insert_by_row.cpp +6 -8
  120. package/lbug-source/test/test_runner/multi_copy_split.cpp +2 -4
  121. package/lbug-source/test/test_runner/test_parser.cpp +3 -0
  122. package/lbug-source/test/transaction/checkpoint_test.cpp +1 -1
  123. package/lbug-source/test/transaction/transaction_test.cpp +19 -15
  124. package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +2805 -2708
  125. package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +7 -3
  126. package/lbug-source/tools/benchmark/count_rel_table.benchmark +5 -0
  127. package/lbug-source/tools/nodejs_api/package.json +4 -2
  128. package/lbug-source/tools/shell/embedded_shell.cpp +78 -3
  129. package/lbug-source/tools/shell/include/embedded_shell.h +2 -0
  130. package/lbug-source/tools/shell/linenoise.cpp +3 -3
  131. package/lbug-source/tools/shell/test/test_helper.py +1 -1
  132. package/lbug-source/tools/shell/test/test_shell_basics.py +12 -0
  133. package/lbug-source/tools/shell/test/test_shell_commands.py +19 -0
  134. package/package.json +9 -2
  135. package/prebuilt/lbugjs-darwin-arm64.node +0 -0
  136. package/prebuilt/lbugjs-linux-arm64.node +0 -0
  137. package/prebuilt/lbugjs-linux-x64.node +0 -0
  138. package/prebuilt/lbugjs-win32-x64.node +0 -0
package/README.md CHANGED
@@ -38,17 +38,13 @@ The database was formerly known as [Kuzu](https://github.com/kuzudb/kuzu).
38
38
 
39
39
  ## Installation
40
40
 
41
- > [!WARNING]
42
- > Many of these binary installation methods are not functional yet. We need to work through package names, availability and convention issues.
43
- > For now, use the build from source method.
44
-
45
41
  | Language | Installation |
46
42
  | -------- |------------------------------------------------------------------------|
47
43
  | Python | `pip install real_ladybug` |
48
44
  | NodeJS | `npm install lbug` |
49
45
  | Rust | `cargo add lbug` |
50
- | Go | `go get github.com/lbugdb/go-lbug` |
51
- | Swift | [lbug-swift](https://github.com/lbugdb/lbug-swift) |
46
+ | Go | `go get github.com/lbugdb/go-ladybug` |
47
+ | Swift | [lbug-swift](https://github.com/lbugdb/swift-ladybug) |
52
48
  | Java | [Maven Central](https://central.sonatype.com/artifact/com.ladybugdb/lbug) |
53
49
  | C/C++ | [precompiled binaries](https://github.com/LadybugDB/ladybug/releases/latest) |
54
50
  | CLI | [precompiled binaries](https://github.com/LadybugDB/ladybug/releases/latest) |
@@ -1152,6 +1152,9 @@ jobs:
1152
1152
  name: minimal test
1153
1153
  runs-on: ubuntu-latest
1154
1154
  needs: [ sanity-checks ]
1155
+ env:
1156
+ GEN: Ninja
1157
+ USE_STD_FORMAT: 1
1155
1158
  steps:
1156
1159
  - uses: actions/checkout@v4
1157
1160
 
@@ -1159,12 +1162,14 @@ jobs:
1159
1162
  uses: hendrikmuhs/ccache-action@v1.2
1160
1163
  with:
1161
1164
  key: minimal-test-${{ runner.os }}
1165
+ max-size: 2G
1166
+ restore-keys: minimal-test-
1162
1167
 
1163
1168
  - name: Build
1164
- run: make GEN=Ninja release USE_STD_FORMAT=1
1169
+ run: make relwithdebinfo
1165
1170
 
1166
1171
  - name: Generate datasets
1167
- run: bash scripts/generate_binary_demo.sh
1172
+ run: bash scripts/generate_binary_demo.sh --lbug-shell-mode relwithdebinfo
1168
1173
 
1169
1174
  - name: Install uv
1170
1175
  run: pip3 install uv
@@ -1210,6 +1215,8 @@ jobs:
1210
1215
  uses: hendrikmuhs/ccache-action@v1.2
1211
1216
  with:
1212
1217
  key: minimal-extension-test-${{ runner.os }}
1218
+ max-size: 2G
1219
+ restore-keys: minimal-extension-test-
1213
1220
 
1214
1221
  - name: Update PostgreSQL host
1215
1222
  working-directory: extension/postgres/test/test_files
@@ -1,6 +1,6 @@
1
1
  cmake_minimum_required(VERSION 3.15)
2
2
 
3
- project(Lbug VERSION 0.12.3.3 LANGUAGES CXX C)
3
+ project(Lbug VERSION 0.12.3.30 LANGUAGES CXX C)
4
4
 
5
5
  option(SINGLE_THREADED "Single-threaded mode" FALSE)
6
6
  if(SINGLE_THREADED)
@@ -14,15 +14,26 @@ endif()
14
14
 
15
15
  set(CMAKE_CXX_STANDARD 20)
16
16
  set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
17
- set(CMAKE_CXX_VISIBILITY_PRESET hidden)
18
- set(CMAKE_C_VISIBILITY_PRESET hidden)
19
17
  set(CMAKE_EXPORT_COMPILE_COMMANDS TRUE)
20
18
  set(CMAKE_FIND_PACKAGE_RESOLVE_SYMLINKS TRUE)
21
19
  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
22
- set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
23
20
  # On Linux, symbols in executables are not accessible by loaded shared libraries (e.g. via dlopen(3)). However, we need to export public symbols in executables so that extensions can access public symbols. This enables that behaviour.
24
21
  set(CMAKE_ENABLE_EXPORTS TRUE)
25
22
 
23
+ # When building tests, we need all symbols visible so tests can link to the shared library
24
+ # instead of static linking (which bloats binary sizes significantly)
25
+ option(BUILD_TESTS "Build C++ tests." FALSE)
26
+ option(BUILD_EXTENSION_TESTS "Build C++ extension tests." FALSE)
27
+ if(BUILD_TESTS OR BUILD_EXTENSION_TESTS)
28
+ set(CMAKE_CXX_VISIBILITY_PRESET default)
29
+ set(CMAKE_C_VISIBILITY_PRESET default)
30
+ set(CMAKE_VISIBILITY_INLINES_HIDDEN OFF)
31
+ else()
32
+ set(CMAKE_CXX_VISIBILITY_PRESET hidden)
33
+ set(CMAKE_C_VISIBILITY_PRESET hidden)
34
+ set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
35
+ endif()
36
+
26
37
  option(ENABLE_WERROR "Treat all warnings as errors" FALSE)
27
38
  if(ENABLE_WERROR)
28
39
  if (CMAKE_VERSION VERSION_GREATER "3.24.0" OR CMAKE_VERSION VERSION_EQUAL "3.24.0")
@@ -302,8 +313,6 @@ option(BUILD_NODEJS "Build NodeJS API." FALSE)
302
313
  option(BUILD_PYTHON "Build Python API." FALSE)
303
314
  option(BUILD_SHELL "Build Interactive Shell" TRUE)
304
315
  option(BUILD_SINGLE_FILE_HEADER "Build single file header. Requires Python >= 3.9." TRUE)
305
- option(BUILD_TESTS "Build C++ tests." FALSE)
306
- option(BUILD_EXTENSION_TESTS "Build C++ extension tests." FALSE)
307
316
  option(BUILD_LBUG "Build Lbug." TRUE)
308
317
  option(ENABLE_BACKTRACES "Enable backtrace printing for exceptions and segfaults" FALSE)
309
318
  option(USE_STD_FORMAT "Use std::format instead of a custom formatter." FALSE)
@@ -14,7 +14,7 @@
14
14
  benchmark example \
15
15
  extension-test-build extension-test extension-json-test-build extension-json-test \
16
16
  extension-debug extension-release \
17
- shell-test \
17
+ shell shell-debug shell-test \
18
18
  tidy tidy-analyzer clangd-diagnostics \
19
19
  install \
20
20
  clean-extension clean-python-api clean-java clean
@@ -234,7 +234,7 @@ example:
234
234
  $(call run-cmake-release, -DBUILD_EXAMPLES=TRUE)
235
235
 
236
236
  extension-build:
237
- $(call run-cmake-relwithdebinfo,-DBUILD_EXTENSIONS="$(EXTENSION_LIST)")
237
+ $(call run-cmake-relwithdebinfo,-DBUILD_EXTENSIONS="$(EXTENSION_LIST)" -DEXTENSION_STATIC_LINK_LIST="$(EXTENSION_STATIC_LINK_LIST)")
238
238
 
239
239
  extension-test-build:
240
240
  $(call run-cmake-relwithdebinfo, \
@@ -294,9 +294,20 @@ extension-release:
294
294
  -DBUILD_LBUG=FALSE \
295
295
  )
296
296
 
297
- # pytest expects a `Release` build path.
297
+ shell:
298
+ BM_MALLOC=1 $(call run-cmake-release, \
299
+ -DBUILD_SHELL=TRUE \
300
+ -DEXTENSION_STATIC_LINK_LIST=duckdb \
301
+ )
302
+
303
+ shell-debug:
304
+ BM_MALLOC=1 $(call run-cmake-debug, \
305
+ -DBUILD_SHELL=TRUE \
306
+ -DEXTENSION_STATIC_LINK_LIST=duckdb \
307
+ )
308
+
298
309
  shell-test:
299
- $(call run-cmake-release, \
310
+ $(call run-cmake-relwithdebinfo, \
300
311
  -DBUILD_SHELL=TRUE \
301
312
  )
302
313
  $(MAKE) -C tools/shell/test test
@@ -38,17 +38,13 @@ The database was formerly known as [Kuzu](https://github.com/kuzudb/kuzu).
38
38
 
39
39
  ## Installation
40
40
 
41
- > [!WARNING]
42
- > Many of these binary installation methods are not functional yet. We need to work through package names, availability and convention issues.
43
- > For now, use the build from source method.
44
-
45
41
  | Language | Installation |
46
42
  | -------- |------------------------------------------------------------------------|
47
43
  | Python | `pip install real_ladybug` |
48
44
  | NodeJS | `npm install lbug` |
49
45
  | Rust | `cargo add lbug` |
50
- | Go | `go get github.com/lbugdb/go-lbug` |
51
- | Swift | [lbug-swift](https://github.com/lbugdb/lbug-swift) |
46
+ | Go | `go get github.com/lbugdb/go-ladybug` |
47
+ | Swift | [lbug-swift](https://github.com/lbugdb/swift-ladybug) |
52
48
  | Java | [Maven Central](https://central.sonatype.com/artifact/com.ladybugdb/lbug) |
53
49
  | C/C++ | [precompiled binaries](https://github.com/LadybugDB/ladybug/releases/latest) |
54
50
  | CLI | [precompiled binaries](https://github.com/LadybugDB/ladybug/releases/latest) |
@@ -39,7 +39,17 @@ def serialize(lbug_exec_path, dataset_name, dataset_path, serialized_graph_path,
39
39
  with open(os.path.join(dataset_path, 'schema.cypher'), 'r') as f:
40
40
  serialize_queries += f.readlines()
41
41
  with open(os.path.join(dataset_path, 'copy.cypher'), 'r') as f:
42
- serialize_queries += f.readlines()
42
+ copy_lines = f.readlines()
43
+ # Fix relative paths in copy.cypher
44
+ for line in copy_lines:
45
+ # Replace quoted paths with absolute paths
46
+ def replace_path(match):
47
+ path = match.group(1)
48
+ if not os.path.isabs(path):
49
+ return '"' + os.path.join(dataset_path, path) + '"'
50
+ return match.group(0)
51
+ fixed_line = re.sub(r'"([^"]*)"', replace_path, line)
52
+ serialize_queries.append(fixed_line.strip())
43
53
  else:
44
54
  with open(os.path.join(base_dir, 'serialize.cypher'), 'r') as f:
45
55
  serialize_queries += f.readlines()
@@ -95,17 +105,28 @@ if __name__ == '__main__':
95
105
  parser.add_argument("--single-thread",
96
106
  help="If true, copy single threaded, which makes the results more reproducible",
97
107
  action="store_true")
108
+ parser.add_argument("--lbug-shell-mode",
109
+ help="debug, release or relwithdebinfo",
110
+ default="release")
111
+ default_mode = "release"
98
112
  if sys.platform == "win32":
99
113
  default_lbug_exec_path = os.path.join(
100
- base_dir, '..', 'build', 'release', 'tools', 'shell', 'lbug_shell')
114
+ base_dir, '..', 'build', default_mode, 'tools', 'shell', 'lbug_shell')
101
115
  else:
102
116
  default_lbug_exec_path = os.path.join(
103
- base_dir, '..', 'build', 'release', 'tools', 'shell', 'lbug')
117
+ base_dir, '..', 'build', default_mode, 'tools', 'shell', 'lbug')
104
118
  parser.add_argument("--lbug-shell",
105
119
  help="Path of the lbug shell executable. Defaults to the path as built in the default release build directory",
106
120
  default=default_lbug_exec_path)
107
121
  args = parser.parse_args()
108
122
 
123
+ if args.lbug_shell == default_lbug_exec_path:
124
+ mode = args.lbug_shell_mode
125
+ if sys.platform == "win32":
126
+ args.lbug_shell = os.path.join(base_dir, '..', 'build', mode, 'tools', 'shell', 'lbug_shell')
127
+ else:
128
+ args.lbug_shell = os.path.join(base_dir, '..', 'build', mode, 'tools', 'shell', 'lbug')
129
+
109
130
  try:
110
131
  serialize(args.lbug_shell, args.dataset_name, args.dataset_path, args.serialized_graph_path,
111
132
  args.benchmark_copy_log_dir, args.single_thread)
@@ -1,4 +1,4 @@
1
- COPY User From "dataset/demo-db/csv/user.csv"
2
- COPY City FROM "dataset/demo-db/csv/city.csv"
3
- COPY Follows FROM "dataset/demo-db/csv/follows.csv"
4
- COPY LivesIn FROM "dataset/demo-db/csv/lives-in.csv"
1
+ COPY User From "user.csv"
2
+ COPY City FROM "city.csv"
3
+ COPY Follows FROM "follows.csv"
4
+ COPY LivesIn FROM "lives-in.csv"
@@ -0,0 +1,4 @@
1
+ CREATE NODE TABLE city(id INT32, name STRING, population INT64, PRIMARY KEY(id)) WITH (storage = 'dataset/demo-db/graph-std/demo');
2
+ CREATE NODE TABLE user(id INT32, name STRING, age INT64, PRIMARY KEY(id)) WITH (storage = 'dataset/demo-db/graph-std/demo');
3
+ CREATE REL TABLE follows(FROM user TO user, since INT32) WITH (storage = 'dataset/demo-db/graph-std/demo');
4
+ CREATE REL TABLE livesin(FROM user TO city) WITH (storage = 'dataset/demo-db/graph-std/demo');
@@ -1,4 +1,4 @@
1
- COPY User From "dataset/demo-db/parquet/user.parquet";
2
- COPY City FROM "dataset/demo-db/parquet/city.parquet";
3
- COPY Follows FROM "dataset/demo-db/parquet/follows.parquet";
4
- COPY LivesIn FROM "dataset/demo-db/parquet/lives-in.parquet";
1
+ COPY User From "user.parquet";
2
+ COPY City FROM "city.parquet";
3
+ COPY Follows FROM "follows.parquet";
4
+ COPY LivesIn FROM "lives-in.parquet";
@@ -91,7 +91,7 @@ void DuckDBCatalog::createForeignTable(const std::string& tableName) {
91
91
  auto duckdbTableInfo =
92
92
  connector.getTableScanInfo(getQuery(*info), std::move(columnTypes), columnNames);
93
93
  auto tableEntry = std::make_unique<catalog::DuckDBTableCatalogEntry>(info->tableName,
94
- getScanFunction(duckdbTableInfo));
94
+ getScanFunction(duckdbTableInfo), duckdbTableInfo);
95
95
  for (auto& definition : extraInfo->propertyDefinitions) {
96
96
  tableEntry->addProperty(definition);
97
97
  }
@@ -1,21 +1,60 @@
1
1
  #include "catalog/duckdb_table_catalog_entry.h"
2
2
 
3
- #include "binder/ddl/bound_create_table_info.h"
3
+ #include "binder/bound_scan_source.h"
4
+ #include "binder/expression/variable_expression.h"
5
+ #include "common/constants.h"
6
+ #include "function/duckdb_scan.h"
4
7
 
5
8
  namespace lbug {
6
9
  namespace catalog {
7
10
 
8
11
  DuckDBTableCatalogEntry::DuckDBTableCatalogEntry(std::string name,
9
- function::TableFunction scanFunction)
12
+ function::TableFunction scanFunction,
13
+ std::shared_ptr<duckdb_extension::DuckDBTableScanInfo> scanInfo)
10
14
  : TableCatalogEntry{CatalogEntryType::FOREIGN_TABLE_ENTRY, std::move(name)},
11
- scanFunction{std::move(scanFunction)} {}
15
+ scanFunction{std::move(scanFunction)}, scanInfo{std::move(scanInfo)} {}
12
16
 
13
17
  common::TableType DuckDBTableCatalogEntry::getTableType() const {
14
18
  return common::TableType::FOREIGN;
15
19
  }
16
20
 
21
+ std::unique_ptr<binder::BoundTableScanInfo> DuckDBTableCatalogEntry::getBoundScanInfo(
22
+ main::ClientContext* context, const std::string& nodeUniqueName) {
23
+ auto columnNames = scanInfo->getColumnNames();
24
+ auto columnTypes = scanInfo->getColumnTypes(*context);
25
+ binder::expression_vector columns;
26
+
27
+ // Add rowid as _ID (internal ID) if nodeUniqueName is provided
28
+ if (!nodeUniqueName.empty()) {
29
+ auto idUniqueName = nodeUniqueName + "." + std::string(common::InternalKeyword::ID);
30
+ columns.push_back(std::make_shared<binder::VariableExpression>(common::LogicalType::INT64(),
31
+ idUniqueName, "rowid"));
32
+ }
33
+
34
+ for (auto i = 0u; i < columnNames.size(); i++) {
35
+ std::string uniqueName = columnNames[i];
36
+ if (!nodeUniqueName.empty()) {
37
+ uniqueName = nodeUniqueName + "." + columnNames[i];
38
+ }
39
+ columns.push_back(std::make_shared<binder::VariableExpression>(std::move(columnTypes[i]),
40
+ uniqueName, columnNames[i]));
41
+ }
42
+
43
+ // Build column names for DuckDB query - include rowid if needed
44
+ std::vector<std::string> duckdbColumnNames;
45
+ if (!nodeUniqueName.empty()) {
46
+ duckdbColumnNames.push_back("rowid");
47
+ }
48
+ duckdbColumnNames.insert(duckdbColumnNames.end(), columnNames.begin(), columnNames.end());
49
+
50
+ auto bindData =
51
+ std::make_unique<duckdb_extension::DuckDBScanBindData>(scanInfo->getTemplateQuery(*context),
52
+ duckdbColumnNames, scanInfo->getConnector(), std::move(columns));
53
+ return std::make_unique<binder::BoundTableScanInfo>(scanFunction, std::move(bindData));
54
+ }
55
+
17
56
  std::unique_ptr<TableCatalogEntry> DuckDBTableCatalogEntry::copy() const {
18
- auto other = std::make_unique<DuckDBTableCatalogEntry>(name, scanFunction);
57
+ auto other = std::make_unique<DuckDBTableCatalogEntry>(name, scanFunction, scanInfo);
19
58
  other->copyFrom(*this);
20
59
  return other;
21
60
  }
@@ -121,10 +121,16 @@ void DuckDBResultConverter::convertDuckDBResultToVector(duckdb::DataChunk& duckD
121
121
  for (auto i = 0u; i < conversionFunctions.size(); i++) {
122
122
  result.state->getSelVectorUnsafe().setSelSize(duckDBResult.size());
123
123
  if (columnSkips && columnSkips.value()[i]) {
124
+ // For rowid (first column), we always fetch it from DuckDB but skip writing to output.
125
+ // This keeps DuckDB result columns aligned with our expected order.
126
+ if (i == 0) {
127
+ duckdbResultColIdx++;
128
+ }
124
129
  continue;
125
130
  }
126
131
  KU_ASSERT(duckDBResult.data[duckdbResultColIdx].GetVectorType() ==
127
132
  duckdb::VectorType::FLAT_VECTOR);
133
+ // Write to output vector at position i (the original column index)
128
134
  conversionFunctions[i](duckDBResult.data[duckdbResultColIdx],
129
135
  result.getValueVectorMutable(i), result.state->getSelVector().getSelSize());
130
136
  duckdbResultColIdx++;
@@ -18,7 +18,7 @@ static std::string getDuckDBExtensionOptions(httpfs_extension::S3AuthParams lbug
18
18
  std::string DuckDBSecretManager::getRemoteS3FSSecret(main::ClientContext* context,
19
19
  const httpfs_extension::S3FileSystemConfig& config) {
20
20
  KU_ASSERT(config.fsName == "S3" || config.fsName == "GCS");
21
- std::string templateQuery = R"(CREATE SECRET {}_secret (
21
+ static constexpr std::string_view templateQuery = R"(CREATE SECRET {}_secret (
22
22
  {}
23
23
  TYPE {}
24
24
  );)";
@@ -15,21 +15,56 @@ namespace duckdb_extension {
15
15
 
16
16
  std::string DuckDBScanBindData::getColumnsToSelect() const {
17
17
  std::string columnNames = "";
18
+ auto columnSkips = getColumnSkips();
18
19
  auto numSkippedColumns =
19
20
  std::count_if(columnSkips.begin(), columnSkips.end(), [](auto item) { return item; });
20
21
  if (getNumColumns() == numSkippedColumns) {
21
- columnNames = columnNamesInDuckDB[0];
22
+ return columnNamesInDuckDB[0];
22
23
  }
24
+ bool first = true;
23
25
  for (auto i = 0u; i < getNumColumns(); i++) {
24
- if (columnSkips[i]) {
26
+ // Always include rowid (first column) even if marked as skipped.
27
+ // This ensures consistent column ordering between DuckDB results and the converter.
28
+ bool isRowid =
29
+ (i == 0 && !columnNamesInDuckDB.empty() && columnNamesInDuckDB[0] == "rowid");
30
+ if (columnSkips[i] && !isRowid) {
25
31
  continue;
26
32
  }
33
+ if (!first) {
34
+ columnNames += ",";
35
+ }
27
36
  columnNames += columnNamesInDuckDB[i];
28
- columnNames += (i == getNumColumns() - 1) ? "" : ",";
37
+ first = false;
29
38
  }
30
39
  return columnNames;
31
40
  }
32
41
 
42
+ std::string DuckDBScanBindData::getDescription() const {
43
+ auto columns = getColumnsToSelect();
44
+ std::string predicatesString = "";
45
+ for (auto& predicates : getColumnPredicates()) {
46
+ if (predicates.isEmpty()) {
47
+ continue;
48
+ }
49
+ if (predicatesString.empty()) {
50
+ predicatesString = " WHERE " + predicates.toString();
51
+ } else {
52
+ predicatesString += common::stringFormat(" AND {}", predicates.toString());
53
+ }
54
+ }
55
+ std::string q = query;
56
+ size_t pos = q.find("{}");
57
+ if (pos != std::string::npos) {
58
+ q.replace(pos, 2, columns);
59
+ }
60
+ q += predicatesString;
61
+ q += getOrderBy();
62
+ if (getLimitNum() != common::INVALID_ROW_IDX) {
63
+ q += common::stringFormat(" LIMIT {}", getLimitNum());
64
+ }
65
+ return q;
66
+ }
67
+
33
68
  DuckDBScanSharedState::DuckDBScanSharedState(
34
69
  std::shared_ptr<duckdb::MaterializedQueryResult> queryResult)
35
70
  : function::TableFuncSharedState{queryResult->RowCount()}, queryResult{std::move(queryResult)} {
@@ -66,7 +101,16 @@ std::unique_ptr<TableFuncSharedState> DuckDBScanFunction::initSharedState(
66
101
  predicatesString += stringFormat(" AND {}", predicates.toString());
67
102
  }
68
103
  }
69
- auto finalQuery = stringFormat(scanBindData->query, columnNames) + predicatesString;
104
+ std::string finalQuery = scanBindData->query;
105
+ size_t pos = finalQuery.find("{}");
106
+ if (pos != std::string::npos) {
107
+ finalQuery.replace(pos, 2, columnNames);
108
+ }
109
+ finalQuery += predicatesString;
110
+ finalQuery += scanBindData->getOrderBy();
111
+ if (scanBindData->getLimitNum() != INVALID_ROW_IDX) {
112
+ finalQuery += stringFormat(" LIMIT {}", scanBindData->getLimitNum());
113
+ }
70
114
  auto result = scanBindData->connector.executeQuery(finalQuery);
71
115
  if (result->HasError()) {
72
116
  throw RuntimeException(
@@ -117,6 +161,7 @@ TableFunction getScanFunction(std::shared_ptr<DuckDBTableScanInfo> scanInfo) {
117
161
  std::placeholders::_2);
118
162
  function.initSharedStateFunc = DuckDBScanFunction::initSharedState;
119
163
  function.initLocalStateFunc = DuckDBScanFunction::initLocalState;
164
+ function.supportsPushDownFunc = [] { return true; };
120
165
  return function;
121
166
  }
122
167
 
@@ -1,6 +1,7 @@
1
1
  #pragma once
2
2
 
3
3
  #include "catalog/catalog_entry/table_catalog_entry.h"
4
+ #include "function/duckdb_scan.h"
4
5
  #include "function/table/table_function.h"
5
6
 
6
7
  namespace lbug {
@@ -11,13 +12,16 @@ public:
11
12
  //===--------------------------------------------------------------------===//
12
13
  // constructors
13
14
  //===--------------------------------------------------------------------===//
14
- DuckDBTableCatalogEntry(std::string name, function::TableFunction scanFunction);
15
+ DuckDBTableCatalogEntry(std::string name, function::TableFunction scanFunction,
16
+ std::shared_ptr<duckdb_extension::DuckDBTableScanInfo> scanInfo);
15
17
 
16
18
  //===--------------------------------------------------------------------===//
17
19
  // getter & setter
18
20
  //===--------------------------------------------------------------------===//
19
21
  common::TableType getTableType() const override;
20
22
  function::TableFunction getScanFunction() override { return scanFunction; }
23
+ std::unique_ptr<binder::BoundTableScanInfo> getBoundScanInfo(main::ClientContext* context,
24
+ const std::string& nodeUniqueName = "") override;
21
25
 
22
26
  //===--------------------------------------------------------------------===//
23
27
  // serialization & deserialization
@@ -30,6 +34,7 @@ private:
30
34
 
31
35
  private:
32
36
  function::TableFunction scanFunction;
37
+ std::shared_ptr<duckdb_extension::DuckDBTableScanInfo> scanInfo;
33
38
  };
34
39
 
35
40
  } // namespace catalog
@@ -60,6 +60,8 @@ struct DuckDBScanBindData : function::TableFuncBindData {
60
60
 
61
61
  std::string getColumnsToSelect() const;
62
62
 
63
+ std::string getDescription() const override;
64
+
63
65
  std::unique_ptr<TableFuncBindData> copy() const override {
64
66
  return std::make_unique<DuckDBScanBindData>(*this);
65
67
  }
@@ -321,3 +321,31 @@ Attached database successfully.
321
321
  7|Elizabeth|1|False|True|20|4.700000|1980-10-26|1976-12-23 11:21:42|48:24:11|[2]|[Ein]|[[6],[7],[8]]|1.463000|a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a15|[96,59,65,88]
322
322
  8|Farooq|2|True|False|25|4.500000|1980-10-26|1972-07-31 13:22:30.678559|00:18:00.024|[3,4,5,6,7]|[Fesdwe]|[[8]]|1.510000|a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a16|[80,78,34,83]
323
323
  9|Greg|2|False|False|40|4.900000|1980-10-26|1976-12-23 11:21:42|10 years 5 months 13:00:00.000024|[1]|[Grad]|[[10]]|1.600000|a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a17|[43,83,67,43]
324
+
325
+ -CASE DuckDBNodeTable
326
+ -LOAD_DYNAMIC_EXTENSION duckdb
327
+ -STATEMENT ATTACH '${LBUG_ROOT_DIRECTORY}/dataset/databases/duckdb_database/tinysnb.db' as ts (dbtype duckdb, skip_unsupported_table = true);
328
+ ---- 1
329
+ Attached database successfully.
330
+ -STATEMENT MATCH (a:ts.person) RETURN count(*);
331
+ ---- 1
332
+ 8
333
+ -STATEMENT MATCH (a:ts.person) WHERE a.age > 30 RETURN count(*);
334
+ ---- 1
335
+ 4
336
+ -STATEMENT MATCH (a:ts.person) RETURN a.ID ORDER BY a.ID LIMIT 3;
337
+ ---- 3
338
+ 0
339
+ 2
340
+ 3
341
+ -STATEMENT MATCH (a:ts.person) RETURN a.ID ORDER BY a.ID DESC LIMIT 3;
342
+ ---- 3
343
+ 10
344
+ 9
345
+ 8
346
+ -STATEMENT MATCH (a:ts.person) WHERE a.age > 20 RETURN a.ID ORDER BY a.age DESC LIMIT 2;
347
+ ---- 2
348
+ 10
349
+ 3
350
+ -STATEMENT DETACH ts;
351
+ ---- ok
@@ -1,8 +1,6 @@
1
1
  set(EXTENSION_LIST azure delta duckdb fts httpfs iceberg json llm postgres sqlite unity_catalog vector neo4j algo)
2
2
 
3
3
  #set(EXTENSION_STATIC_LINK_LIST fts)
4
- string(JOIN ", " joined_extensions ${EXTENSION_STATIC_LINK_LIST})
5
- message(STATUS "Static link extensions: ${joined_extensions}")
6
4
  foreach(extension IN LISTS EXTENSION_STATIC_LINK_LIST)
7
5
  add_static_link_extension(${extension})
8
6
  endforeach()
@@ -30,3 +28,6 @@ if(${BUILD_SWIFT})
30
28
  add_static_link_extension(vector)
31
29
  add_static_link_extension(algo)
32
30
  endif()
31
+
32
+ string(JOIN ", " joined_extensions ${STATICALLY_LINKED_EXTENSIONS})
33
+ message(STATUS "Static link extensions: ${joined_extensions}")
@@ -148,6 +148,7 @@ tinysnb1|LBUG
148
148
  ---- ok
149
149
 
150
150
  -CASE AttachNotExistPath
151
+ -SKIP
151
152
  -LOAD_DYNAMIC_EXTENSION httpfs
152
153
  -STATEMENT attach 'http://localhost/dataset/databases/tinysnb1' as test (dbtype lbug)
153
154
  ---- error
@@ -342,7 +342,7 @@ kU_IfNotExists
342
342
  : IF SP NOT SP EXISTS ;
343
343
 
344
344
  kU_CreateNodeTable
345
- : CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ;
345
+ : CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ( SP WITH SP? '(' SP? kU_Options SP? ')')? ;
346
346
 
347
347
  kU_CreateRelTable
348
348
  : CREATE SP REL SP TABLE ( SP GROUP )? ( SP kU_IfNotExists )? SP oC_SchemaName
@@ -699,7 +699,7 @@ oC_UpperBound
699
699
  : DecimalInteger ;
700
700
 
701
701
  oC_LabelName
702
- : oC_SchemaName ;
702
+ : oC_SchemaName ( '.' oC_SchemaName )? ;
703
703
 
704
704
  oC_RelTypeName
705
705
  : oC_SchemaName ;
@@ -896,7 +896,7 @@ oC_PropertyExpression
896
896
  : oC_Atom SP? oC_PropertyLookup ;
897
897
 
898
898
  oC_PropertyKeyName
899
- : oC_SchemaName ;
899
+ : oC_SymbolicName ;
900
900
 
901
901
  oC_IntegerLiteral
902
902
  : DecimalInteger ;
@@ -956,7 +956,7 @@ RegularDecimalReal
956
956
  : ( Digit )* '.' ( Digit )+ ;
957
957
 
958
958
  oC_SchemaName
959
- : oC_SymbolicName ;
959
+ : oC_SymbolicName ( '.' oC_SymbolicName )? ;
960
960
 
961
961
  oC_SymbolicName
962
962
  : UnescapedSymbolicName
@@ -1 +1 @@
1
- 8334a684be17e562250acf07ae2bbca0
1
+ 3f763bc647d64c15286c2616518546df
@@ -2,4 +2,4 @@
2
2
 
3
3
  CD=`dirname "$0"`
4
4
  DATASET_DIR=$CD/../dataset
5
- python3 $CD/../benchmark/serializer.py DemoDB $DATASET_DIR/demo-db/parquet $DATASET_DIR/binary-demo --single-thread
5
+ python3 $CD/../benchmark/serializer.py DemoDB $DATASET_DIR/demo-db/parquet $DATASET_DIR/binary-demo --single-thread $*
@@ -95,7 +95,7 @@ kU_IfNotExists
95
95
  : IF SP NOT SP EXISTS ;
96
96
 
97
97
  kU_CreateNodeTable
98
- : CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ;
98
+ : CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ( SP WITH SP? '(' SP? kU_Options SP? ')')? ;
99
99
 
100
100
  kU_CreateRelTable
101
101
  : CREATE SP REL SP TABLE ( SP GROUP )? ( SP kU_IfNotExists )? SP oC_SchemaName
@@ -452,7 +452,7 @@ oC_UpperBound
452
452
  : DecimalInteger ;
453
453
 
454
454
  oC_LabelName
455
- : oC_SchemaName ;
455
+ : oC_SchemaName ( '.' oC_SchemaName )? ;
456
456
 
457
457
  oC_RelTypeName
458
458
  : oC_SchemaName ;
@@ -649,7 +649,7 @@ oC_PropertyExpression
649
649
  : oC_Atom SP? oC_PropertyLookup ;
650
650
 
651
651
  oC_PropertyKeyName
652
- : oC_SchemaName ;
652
+ : oC_SymbolicName ;
653
653
 
654
654
  oC_IntegerLiteral
655
655
  : DecimalInteger ;
@@ -709,7 +709,7 @@ RegularDecimalReal
709
709
  : ( Digit )* '.' ( Digit )+ ;
710
710
 
711
711
  oC_SchemaName
712
- : oC_SymbolicName ;
712
+ : oC_SymbolicName ( '.' oC_SymbolicName )? ;
713
713
 
714
714
  oC_SymbolicName
715
715
  : UnescapedSymbolicName