chdb 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (287) hide show
  1. chdb-4.1.0.dist-info/METADATA +636 -0
  2. chdb-4.1.0.dist-info/RECORD +287 -0
  3. chdb-4.1.0.dist-info/WHEEL +5 -0
  4. chdb-4.1.0.dist-info/licenses/AUTHORS.md +18 -0
  5. chdb-4.1.0.dist-info/licenses/LICENSE.txt +203 -0
  6. chdb-4.1.0.dist-info/top_level.txt +1 -0
  7. datastore/__init__.py +510 -0
  8. datastore/accessors/__init__.py +37 -0
  9. datastore/accessors/array.py +93 -0
  10. datastore/accessors/base.py +95 -0
  11. datastore/accessors/datetime.py +261 -0
  12. datastore/accessors/geo.py +72 -0
  13. datastore/accessors/ip.py +72 -0
  14. datastore/accessors/json.py +72 -0
  15. datastore/accessors/string.py +179 -0
  16. datastore/accessors/url.py +72 -0
  17. datastore/adapters.py +283 -0
  18. datastore/case_when.py +361 -0
  19. datastore/column_expr.py +6783 -0
  20. datastore/conditions.py +540 -0
  21. datastore/config.py +792 -0
  22. datastore/connection.py +1269 -0
  23. datastore/core.py +6690 -0
  24. datastore/dtype_correction/__init__.py +39 -0
  25. datastore/dtype_correction/config.py +82 -0
  26. datastore/dtype_correction/registry.py +233 -0
  27. datastore/dtype_correction/rules.py +342 -0
  28. datastore/enums.py +23 -0
  29. datastore/exceptions.py +135 -0
  30. datastore/executor.py +144 -0
  31. datastore/expression_evaluator.py +1314 -0
  32. datastore/expressions.py +1149 -0
  33. datastore/function_definitions.py +13300 -0
  34. datastore/function_executor.py +1089 -0
  35. datastore/function_mixin.py +395 -0
  36. datastore/function_registry.py +420 -0
  37. datastore/functions.py +708 -0
  38. datastore/groupby.py +808 -0
  39. datastore/lazy_ops.py +2455 -0
  40. datastore/lazy_result.py +977 -0
  41. datastore/pandas_api.py +2460 -0
  42. datastore/pandas_compat.py +2980 -0
  43. datastore/query_planner.py +999 -0
  44. datastore/schema_state.py +324 -0
  45. datastore/sql_builder.py +551 -0
  46. datastore/sql_executor.py +2635 -0
  47. datastore/table_functions.py +1166 -0
  48. datastore/tests/__init__.py +4 -0
  49. datastore/tests/conftest.py +195 -0
  50. datastore/tests/test_accessor_fallback_registry.py +188 -0
  51. datastore/tests/test_advanced_feature_engineering.py +1172 -0
  52. datastore/tests/test_advanced_pandas_operations.py +300 -0
  53. datastore/tests/test_advanced_queries.py +512 -0
  54. datastore/tests/test_aggregate_functions_complex.py +1449 -0
  55. datastore/tests/test_arithmetic.py +313 -0
  56. datastore/tests/test_array_accessor.py +479 -0
  57. datastore/tests/test_assign_mixed.py +495 -0
  58. datastore/tests/test_benchmark_fixes.py +1415 -0
  59. datastore/tests/test_between.py +218 -0
  60. datastore/tests/test_blob_column.py +305 -0
  61. datastore/tests/test_bugfixes.py +240 -0
  62. datastore/tests/test_cache.py +1819 -0
  63. datastore/tests/test_case_when.py +782 -0
  64. datastore/tests/test_chdb_dtype_differences.py +111 -0
  65. datastore/tests/test_chdb_integration.py +402 -0
  66. datastore/tests/test_chdb_limitations_tracker.py +243 -0
  67. datastore/tests/test_column_assignment.py +233 -0
  68. datastore/tests/test_column_expr_pandas_alignment.py +1577 -0
  69. datastore/tests/test_column_expr_reset_index.py +210 -0
  70. datastore/tests/test_column_reference_consistency.py +1276 -0
  71. datastore/tests/test_compat_mode.py +159 -0
  72. datastore/tests/test_comprehensive_joins.py +452 -0
  73. datastore/tests/test_comprehensive_segmented_execution.py +1297 -0
  74. datastore/tests/test_concurrency.py +231 -0
  75. datastore/tests/test_condition_combinations.py +260 -0
  76. datastore/tests/test_conditions.py +112 -0
  77. datastore/tests/test_convenience_methods.py +442 -0
  78. datastore/tests/test_criterions_advanced.py +437 -0
  79. datastore/tests/test_cross_datastore_ops.py +241 -0
  80. datastore/tests/test_custom_functions.py +238 -0
  81. datastore/tests/test_data_types.py +213 -0
  82. datastore/tests/test_datastore_core.py +448 -0
  83. datastore/tests/test_datastore_core_compat.py +408 -0
  84. datastore/tests/test_deep_edge_cases.py +643 -0
  85. datastore/tests/test_deep_probing.py +1586 -0
  86. datastore/tests/test_dtype_alignment.py +163 -0
  87. datastore/tests/test_dtype_correction.py +952 -0
  88. datastore/tests/test_dynamic_pandas_methods.py +99 -0
  89. datastore/tests/test_equals_and_alignment_semantics.py +246 -0
  90. datastore/tests/test_error_messages.py +167 -0
  91. datastore/tests/test_explain_method.py +383 -0
  92. datastore/tests/test_explain_segmented_execution.py +203 -0
  93. datastore/tests/test_exploratory_batch10_window_timeseries.py +945 -0
  94. datastore/tests/test_exploratory_batch11_advanced_indexing.py +854 -0
  95. datastore/tests/test_exploratory_batch12_statistical_timezone.py +838 -0
  96. datastore/tests/test_exploratory_batch13_edge_combinations.py +651 -0
  97. datastore/tests/test_exploratory_batch14_io_sql_validation.py +695 -0
  98. datastore/tests/test_exploratory_batch15_columnexpr_setitem.py +749 -0
  99. datastore/tests/test_exploratory_batch16_index_copy_edge.py +745 -0
  100. datastore/tests/test_exploratory_batch17_accessor_params.py +946 -0
  101. datastore/tests/test_exploratory_batch18_groupby_lazy_combinations.py +767 -0
  102. datastore/tests/test_exploratory_batch19_pandas_api_module.py +879 -0
  103. datastore/tests/test_exploratory_batch20_series_advanced.py +643 -0
  104. datastore/tests/test_exploratory_batch21_category_inplace.py +993 -0
  105. datastore/tests/test_exploratory_batch22_advanced_ops.py +804 -0
  106. datastore/tests/test_exploratory_batch23_update_bitwise.py +979 -0
  107. datastore/tests/test_exploratory_batch24_datetime_accessor.py +1264 -0
  108. datastore/tests/test_exploratory_batch25_boolean_indexing.py +448 -0
  109. datastore/tests/test_exploratory_batch26_advanced_methods.py +850 -0
  110. datastore/tests/test_exploratory_batch27_reverse_ops.py +704 -0
  111. datastore/tests/test_exploratory_batch28_edge_lazy.py +643 -0
  112. datastore/tests/test_exploratory_batch29_loc_iloc_deep.py +775 -0
  113. datastore/tests/test_exploratory_batch30_special_cases.py +612 -0
  114. datastore/tests/test_exploratory_batch31_iteration_rare.py +799 -0
  115. datastore/tests/test_exploratory_batch32_complex_scenarios.py +697 -0
  116. datastore/tests/test_exploratory_batch33_multiop_chains.py +833 -0
  117. datastore/tests/test_exploratory_batch34_ultra_chains.py +887 -0
  118. datastore/tests/test_exploratory_batch35_engine_boundary.py +690 -0
  119. datastore/tests/test_exploratory_batch36_merge_chains.py +638 -0
  120. datastore/tests/test_exploratory_batch37_groupby_apply_reshape.py +857 -0
  121. datastore/tests/test_exploratory_batch38_accessor_chain_agg.py +835 -0
  122. datastore/tests/test_exploratory_batch39_shift_diff_assign.py +928 -0
  123. datastore/tests/test_exploratory_batch40_eval_query_coerce.py +674 -0
  124. datastore/tests/test_exploratory_batch41_fillna_duplicates_chains.py +1006 -0
  125. datastore/tests/test_exploratory_batch42_statistical_reshape_chain.py +1215 -0
  126. datastore/tests/test_exploratory_batch43_concat_pipe_combine.py +1017 -0
  127. datastore/tests/test_exploratory_batch44_rolling_interop_edge.py +871 -0
  128. datastore/tests/test_exploratory_batch45_insert_pop_level_ops.py +728 -0
  129. datastore/tests/test_exploratory_batch46_agg_type_stack.py +904 -0
  130. datastore/tests/test_exploratory_batch47_multicolumn_multiindex_sparse.py +929 -0
  131. datastore/tests/test_exploratory_batch48_type_chain_empty_edge.py +790 -0
  132. datastore/tests/test_exploratory_batch49_scalar_nested_edge.py +1007 -0
  133. datastore/tests/test_exploratory_batch50_advanced_iteration_agg.py +1019 -0
  134. datastore/tests/test_exploratory_batch51_groupby_edge_cases.py +935 -0
  135. datastore/tests/test_exploratory_batch52_negative_index_accessor.py +1085 -0
  136. datastore/tests/test_exploratory_batch53_module_funcs_casewhen.py +1000 -0
  137. datastore/tests/test_exploratory_batch54_binary_ops_compare_axis.py +1031 -0
  138. datastore/tests/test_exploratory_batch55_io_metadata_sort.py +996 -0
  139. datastore/tests/test_exploratory_batch56_apply_window_fillna.py +939 -0
  140. datastore/tests/test_exploratory_batch57_copy_index_slice.py +1147 -0
  141. datastore/tests/test_exploratory_batch58_param_edge_chain.py +935 -0
  142. datastore/tests/test_exploratory_batch59_reshape_agg.py +878 -0
  143. datastore/tests/test_exploratory_batch60_equals_arithmetic_edge.py +789 -0
  144. datastore/tests/test_exploratory_batch61_pipe_dtype_meta.py +742 -0
  145. datastore/tests/test_exploratory_batch62_columnexpr_advanced.py +952 -0
  146. datastore/tests/test_exploratory_batch63_chain_special_values.py +730 -0
  147. datastore/tests/test_exploratory_batch64_join_cumulative.py +777 -0
  148. datastore/tests/test_exploratory_batch65_empty_single_null_chain.py +734 -0
  149. datastore/tests/test_exploratory_batch66_constructor_transform.py +629 -0
  150. datastore/tests/test_exploratory_batch67_lazy_index_special.py +549 -0
  151. datastore/tests/test_exploratory_batch68_callable_at_chains.py +652 -0
  152. datastore/tests/test_exploratory_batch69_df_binary_compare.py +661 -0
  153. datastore/tests/test_exploratory_batch70_complex_type_chain.py +665 -0
  154. datastore/tests/test_exploratory_batch71_sql_merge_edge.py +656 -0
  155. datastore/tests/test_exploratory_batch72_iter_accessor_empty.py +711 -0
  156. datastore/tests/test_exploratory_batch73_update_numeric_edge.py +617 -0
  157. datastore/tests/test_exploratory_batch75_advanced_ops.py +726 -0
  158. datastore/tests/test_exploratory_batch76_boundary_chain.py +643 -0
  159. datastore/tests/test_exploratory_batch77_pivot_transform_edge.py +691 -0
  160. datastore/tests/test_exploratory_batch78_inplace_index_dtype.py +722 -0
  161. datastore/tests/test_exploratory_batch79_construction_coercion_edge.py +706 -0
  162. datastore/tests/test_exploratory_batch7_edge_cases.py +915 -0
  163. datastore/tests/test_exploratory_batch80_return_type_consistency.py +635 -0
  164. datastore/tests/test_exploratory_batch81_chain_pushdown_edge.py +485 -0
  165. datastore/tests/test_exploratory_batch82_sort_groupby_chain.py +374 -0
  166. datastore/tests/test_exploratory_batch83_apply_transform_agg.py +643 -0
  167. datastore/tests/test_exploratory_batch84_query_eval_cumulative.py +680 -0
  168. datastore/tests/test_exploratory_batch85_deep_chain_boundary.py +714 -0
  169. datastore/tests/test_exploratory_batch86_complex_edge.py +670 -0
  170. datastore/tests/test_exploratory_batch87_advanced_edge.py +850 -0
  171. datastore/tests/test_exploratory_batch88_indexing_complex.py +851 -0
  172. datastore/tests/test_exploratory_batch89_apply_rolling_rank.py +638 -0
  173. datastore/tests/test_exploratory_batch8_datetime_reshape.py +741 -0
  174. datastore/tests/test_exploratory_batch90_indexing_chain_edge.py +600 -0
  175. datastore/tests/test_exploratory_batch91_squeeze_explode_stack.py +720 -0
  176. datastore/tests/test_exploratory_batch92_returntype_chain_inf.py +654 -0
  177. datastore/tests/test_exploratory_batch93_multi_segment_edge.py +816 -0
  178. datastore/tests/test_exploratory_batch94_null_semantics.py +733 -0
  179. datastore/tests/test_exploratory_batch95_type_coercion.py +552 -0
  180. datastore/tests/test_exploratory_batch96_deep_chains.py +679 -0
  181. datastore/tests/test_exploratory_batch97_type_nullable_boundary.py +775 -0
  182. datastore/tests/test_exploratory_batch98_advanced_chain_boundary.py +847 -0
  183. datastore/tests/test_exploratory_batch99_cross_datastore_accessor.py +691 -0
  184. datastore/tests/test_exploratory_batch9_merge_groupby.py +231 -0
  185. datastore/tests/test_exploratory_discovery_2026_01_04.py +453 -0
  186. datastore/tests/test_exploratory_discovery_2026_01_04_batch4.py +365 -0
  187. datastore/tests/test_exploratory_discovery_2026_01_04_batch5.py +607 -0
  188. datastore/tests/test_expressions.py +524 -0
  189. datastore/tests/test_file_path_auto_detection.py +189 -0
  190. datastore/tests/test_formats.py +79 -0
  191. datastore/tests/test_from_dataframe.py +408 -0
  192. datastore/tests/test_from_uri.py +312 -0
  193. datastore/tests/test_function_engine_switch.py +490 -0
  194. datastore/tests/test_function_registry.py +407 -0
  195. datastore/tests/test_functions.py +142 -0
  196. datastore/tests/test_functions_execution.py +393 -0
  197. datastore/tests/test_groupby_apply_sql_pushdown.py +394 -0
  198. datastore/tests/test_groupby_column_selection.py +362 -0
  199. datastore/tests/test_groupby_dropna.py +259 -0
  200. datastore/tests/test_groupby_first_last.py +195 -0
  201. datastore/tests/test_groupby_head_tail.py +361 -0
  202. datastore/tests/test_groupby_multiindex_columns.py +257 -0
  203. datastore/tests/test_groupby_nth.py +238 -0
  204. datastore/tests/test_groupby_sql_pushdown.py +457 -0
  205. datastore/tests/test_groupby_udf.py +356 -0
  206. datastore/tests/test_head_performance.py +182 -0
  207. datastore/tests/test_heuristic_edge_cases.py +297 -0
  208. datastore/tests/test_immutability.py +248 -0
  209. datastore/tests/test_in_conditions.py +229 -0
  210. datastore/tests/test_insert_pandas_style.py +202 -0
  211. datastore/tests/test_insert_update_delete.py +424 -0
  212. datastore/tests/test_isna_deep_edge_cases.py +1027 -0
  213. datastore/tests/test_joins.py +391 -0
  214. datastore/tests/test_json_accessor.py +612 -0
  215. datastore/tests/test_kaggle_domains.py +963 -0
  216. datastore/tests/test_kaggle_pandas_compat.py +767 -0
  217. datastore/tests/test_kaggle_pandas_compat2.py +954 -0
  218. datastore/tests/test_known_issues_verification.py +352 -0
  219. datastore/tests/test_large_data_row_order.py +286 -0
  220. datastore/tests/test_lazy_chain_engine_verification.py +728 -0
  221. datastore/tests/test_lazy_column_assignment_pushdown.py +396 -0
  222. datastore/tests/test_lazy_engine_exploration_batch6.py +956 -0
  223. datastore/tests/test_lazy_engine_switch.py +451 -0
  224. datastore/tests/test_lazy_execution.py +257 -0
  225. datastore/tests/test_lazy_ops_edge_cases.py +380 -0
  226. datastore/tests/test_lazy_where_mask.py +1285 -0
  227. datastore/tests/test_like_patterns.py +198 -0
  228. datastore/tests/test_limit_count_edge_cases.py +1324 -0
  229. datastore/tests/test_llm_pandas_compat.py +350 -0
  230. datastore/tests/test_loc_condition_pushdown.py +357 -0
  231. datastore/tests/test_logging_explain_lazy_ops.py +182 -0
  232. datastore/tests/test_migration_guide_examples.py +356 -0
  233. datastore/tests/test_mixed_operations.py +276 -0
  234. datastore/tests/test_mixed_sql_pandas_complex.py +851 -0
  235. datastore/tests/test_ml_feature_engineering.py +627 -0
  236. datastore/tests/test_module_level_functions.py +610 -0
  237. datastore/tests/test_multi_datasource.py +255 -0
  238. datastore/tests/test_negation.py +145 -0
  239. datastore/tests/test_notebook_churn_dashboard_mirror.py +595 -0
  240. datastore/tests/test_notebook_churn_model_mirror.py +417 -0
  241. datastore/tests/test_notebook_climate_mirror.py +294 -0
  242. datastore/tests/test_notebook_outlier_detection_mirror.py +309 -0
  243. datastore/tests/test_notebook_perth_mirror.py +307 -0
  244. datastore/tests/test_notebook_temperature_mirror.py +768 -0
  245. datastore/tests/test_notebook_titanic_mirror.py +698 -0
  246. datastore/tests/test_notebook_titanic_solutions_mirror.py +836 -0
  247. datastore/tests/test_null_conditions.py +328 -0
  248. datastore/tests/test_nullable_sql_pushdown.py +444 -0
  249. datastore/tests/test_numpy_compatibility.py +694 -0
  250. datastore/tests/test_pandas_alignment.py +656 -0
  251. datastore/tests/test_pandas_compat.py +1488 -0
  252. datastore/tests/test_pandas_compatibility.py +922 -0
  253. datastore/tests/test_pandas_immutability_alignment.py +781 -0
  254. datastore/tests/test_parquet_datetime_issues.py +543 -0
  255. datastore/tests/test_performance_mode.py +693 -0
  256. datastore/tests/test_profiling.py +370 -0
  257. datastore/tests/test_rank_sql_pushdown.py +300 -0
  258. datastore/tests/test_readme_example.py +389 -0
  259. datastore/tests/test_real_world_scenarios.py +512 -0
  260. datastore/tests/test_remote_connection.py +1334 -0
  261. datastore/tests/test_remote_connection_integration.py +483 -0
  262. datastore/tests/test_replace_comprehensive.py +321 -0
  263. datastore/tests/test_row_order_optimization.py +194 -0
  264. datastore/tests/test_schema_state.py +345 -0
  265. datastore/tests/test_segmented_execution.py +421 -0
  266. datastore/tests/test_selects.py +691 -0
  267. datastore/tests/test_slice_step.py +337 -0
  268. datastore/tests/test_sql_builder.py +731 -0
  269. datastore/tests/test_sql_expressions.py +240 -0
  270. datastore/tests/test_sql_vs_pandas_filter.py +2114 -0
  271. datastore/tests/test_str_accessor_lazy.py +647 -0
  272. datastore/tests/test_str_accessor_mirror.py +293 -0
  273. datastore/tests/test_str_accessor_recursion_fix.py +511 -0
  274. datastore/tests/test_string_functions.py +203 -0
  275. datastore/tests/test_subqueries.py +265 -0
  276. datastore/tests/test_table_functions.py +1063 -0
  277. datastore/tests/test_titanic_notebook_mirror.py +1210 -0
  278. datastore/tests/test_titanic_pandas_comparison.py +917 -0
  279. datastore/tests/test_tolist_return_type.py +236 -0
  280. datastore/tests/test_type_consistency.py +385 -0
  281. datastore/tests/test_uri_parser.py +397 -0
  282. datastore/tests/test_url_ip_geo_accessor.py +230 -0
  283. datastore/tests/test_utils.py +809 -0
  284. datastore/tests/test_value_counts_sql_pushdown.py +541 -0
  285. datastore/tests/xfail_markers.py +692 -0
  286. datastore/uri_parser.py +578 -0
  287. datastore/utils.py +294 -0
@@ -0,0 +1,636 @@
1
+ Metadata-Version: 2.4
2
+ Name: chdb
3
+ Version: 4.1.0
4
+ Summary: chDB is an in-process OLAP SQL Engine powered by ClickHouse
5
+ Author-email: chDB Team <auxten@clickhouse.com>
6
+ License: Apache-2.0
7
+ Project-URL: Homepage, https://clickhouse.com/chdb
8
+ Project-URL: Documentation, https://chdb.readthedocs.io/en/latest/index.html
9
+ Project-URL: Repository, https://github.com/chdb-io/chdb
10
+ Project-URL: Changelog, https://github.com/chdb-io/chdb/releases
11
+ Project-URL: Issues, https://github.com/chdb-io/chdb/issues
12
+ Keywords: chdb,clickhouse,olap,analytics,database,sql
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Environment :: Plugins
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: Apache Software License
17
+ Classifier: Operating System :: MacOS :: MacOS X
18
+ Classifier: Operating System :: POSIX :: Linux
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Programming Language :: Python :: 3.14
26
+ Classifier: Topic :: Software Development :: Libraries
27
+ Classifier: Topic :: Database
28
+ Classifier: Topic :: Scientific/Engineering
29
+ Requires-Python: >=3.9
30
+ Description-Content-Type: text/markdown
31
+ License-File: LICENSE.txt
32
+ License-File: AUTHORS.md
33
+ Requires-Dist: chdb-core>=26.1.0
34
+ Requires-Dist: pandas>=2.1.0
35
+ Requires-Dist: pyarrow>=13.0.0
36
+ Provides-Extra: dev
37
+ Requires-Dist: pytest; extra == "dev"
38
+ Requires-Dist: pytest-cov; extra == "dev"
39
+ Provides-Extra: publish
40
+ Requires-Dist: twine; extra == "publish"
41
+ Requires-Dist: wheel; extra == "publish"
42
+ Dynamic: license-file
43
+
44
+ <div align="center">
45
+ <a href="https://clickhouse.com/blog/chdb-joins-clickhouse-family">📢 chDB joins the ClickHouse family 🐍+🚀</a>
46
+ </div>
47
+ <div align="center">
48
+ <picture>
49
+ <source media="(prefers-color-scheme: dark)" srcset="https://github.com/chdb-io/chdb/raw/main/docs/_static/snake-chdb-dark.png" height="130">
50
+ <img src="https://github.com/chdb-io/chdb/raw/main/docs/_static/snake-chdb.png" height="130">
51
+ </picture>
52
+
53
+ [![Build X86](https://github.com/chdb-io/chdb/actions/workflows/build_linux_x86_wheels.yml/badge.svg?event=release)](https://github.com/chdb-io/chdb/actions/workflows/build_linux_x86_wheels.yml)
54
+ [![PyPI](https://img.shields.io/pypi/v/chdb.svg)](https://pypi.org/project/chdb/)
55
+ [![Downloads](https://static.pepy.tech/badge/chdb)](https://pepy.tech/project/chdb)
56
+ [![Discord](https://img.shields.io/discord/1098133460310294528?logo=Discord)](https://discord.gg/D2Daa2fM5K)
57
+ [![Twitter](https://img.shields.io/twitter/url/http/shields.io.svg?style=social&label=Twitter)](https://twitter.com/chdb_io)
58
+ </div>
59
+
60
+ # chDB
61
+
62
+
63
+ > chDB is an in-process SQL OLAP Engine powered by ClickHouse [^1]
64
+ > For more details: [The birth of chDB](https://auxten.com/the-birth-of-chdb/)
65
+
66
+
67
+ ## Features
68
+
69
+ * **🐼 Pandas-compatible DataStore API** - Use familiar pandas syntax with ClickHouse performance
70
+ * In-process SQL OLAP Engine, powered by ClickHouse
71
+ * No need to install ClickHouse
72
+ * Minimized data copy from C++ to Python with [python memoryview](https://docs.python.org/3/c-api/memoryview.html)
73
+ * Input&Output support Parquet, CSV, JSON, Arrow, ORC and 60+[more](https://clickhouse.com/docs/en/interfaces/formats) formats
74
+ * Support Python DB API 2.0
75
+
76
+ ## Arch
77
+ <div align="center">
78
+ <img src="https://github.com/chdb-io/chdb/raw/main/docs/_static/arch-chdb3.png" width="450">
79
+ </div>
80
+
81
+ ## Installation
82
+ Currently, chDB supports Python 3.9+ on macOS and Linux (x86_64 and ARM64).
83
+ ```bash
84
+ pip install chdb
85
+ ```
86
+
87
+ <br>
88
+
89
+ ---
90
+
91
+ ## 🐼 DataStore: Pandas-Compatible API (Recommended)
92
+
93
+ DataStore provides a **familiar pandas-like API** with automatic SQL generation and ClickHouse performance. Write pandas code, get SQL performance - no learning curve required.
94
+
95
+ ### Quick Start (30 seconds)
96
+
97
+ Just change your import - use the pandas API you already know:
98
+
99
+ ```python
100
+ import datastore as pd # That's it! Use pandas API as usual
101
+
102
+ # Create a DataFrame - works exactly like pandas
103
+ df = pd.DataFrame({
104
+ 'name': ['Alice', 'Bob', 'Charlie', 'Diana'],
105
+ 'age': [25, 30, 35, 28],
106
+ 'city': ['NYC', 'LA', 'NYC', 'LA']
107
+ })
108
+
109
+ # Filter with familiar pandas syntax
110
+ result = df[df['age'] > 26]
111
+ print(result)
112
+ # name age city
113
+ # 1 Bob 30 LA
114
+ # 2 Charlie 35 NYC
115
+ # 3 Diana 28 LA
116
+
117
+ # GroupBy works too
118
+ print(df.groupby('city')['age'].mean())
119
+ # city
120
+ # LA 29.0
121
+ # NYC 30.0
122
+ ```
123
+
124
+ **✨ Zero code changes required.** All operations are lazy - they're recorded and compiled into optimized SQL, executed only when results are needed.
125
+
126
+ ### Why DataStore?
127
+
128
+ | Feature | pandas | DataStore |
129
+ |---------|--------|-----------|
130
+ | API | ✅ Familiar | ✅ Same pandas API |
131
+ | Large datasets | ❌ Memory limited | ✅ SQL-optimized |
132
+ | Learning curve | ✅ Easy | ✅ None - same syntax |
133
+ | Performance | ❌ Single-threaded | ✅ ClickHouse engine |
134
+
135
+ ### Architecture
136
+
137
+ <div align="center">
138
+ <img src="https://github.com/chdb-io/chdb/raw/main/docs/_static/datastore_architecture.png" width="700">
139
+ </div>
140
+
141
+ DataStore uses **lazy evaluation** with **dual-engine execution**:
142
+ 1. **Lazy Operation Chain**: Operations are recorded, not executed immediately
143
+ 2. **Smart Engine Selection**: QueryPlanner routes each segment to optimal engine (chDB for SQL, Pandas for complex ops)
144
+ 3. **Intermediate Caching**: Results cached at each step for fast iterative exploration
145
+
146
+ ### Working with Files
147
+
148
+ ```python
149
+ from datastore import DataStore
150
+
151
+ # Load any file format
152
+ ds = DataStore.from_file("data.parquet") # or CSV, JSON, ORC...
153
+
154
+ # Explore your data
155
+ print(ds.head()) # Preview first 5 rows
156
+ print(ds.shape) # (rows, columns)
157
+ print(ds.columns) # Column names
158
+
159
+ # Build queries with method chaining
160
+ result = (ds
161
+ .select("product", "revenue", "date")
162
+ .filter(ds.revenue > 1000)
163
+ .sort("revenue", ascending=False)
164
+ .head(10))
165
+
166
+ print(result)
167
+ ```
168
+
169
+ ### Query Any Data Source
170
+
171
+ ```python
172
+ from datastore import DataStore
173
+
174
+ # S3 (with anonymous access)
175
+ ds = DataStore.uri("s3://bucket/data.parquet?nosign=true")
176
+
177
+ # MySQL
178
+ ds = DataStore.uri("mysql://user:pass@localhost:3306/mydb/users")
179
+
180
+ # PostgreSQL
181
+ ds = DataStore.uri("postgresql://user:pass@localhost:5432/mydb/products")
182
+
183
+ # And more: SQLite, MongoDB, ClickHouse, HDFS, Azure, GCS...
184
+ ```
185
+
186
+ ### Pandas API Coverage
187
+
188
+ DataStore implements **comprehensive pandas compatibility**:
189
+
190
+ | Category | Coverage |
191
+ |----------|----------|
192
+ | DataFrame methods | 209 methods |
193
+ | Series.str accessor | 56 methods |
194
+ | Series.dt accessor | 42+ methods |
195
+ | ClickHouse SQL functions | 334 functions |
196
+
197
+ ```python
198
+ # All these pandas methods work:
199
+ df.drop(columns=['unused'])
200
+ df.fillna(0)
201
+ df.assign(revenue=lambda x: x['price'] * x['quantity'])
202
+ df.sort_values('revenue', ascending=False)
203
+ df.groupby('category').agg({'revenue': 'sum', 'quantity': 'mean'})
204
+ df.merge(other_df, on='id')
205
+ df.pivot_table(values='sales', index='date', columns='product')
206
+ # ... and 200+ more
207
+ ```
208
+
209
+ ### String and DateTime Operations
210
+
211
+ ```python
212
+ # String operations via .str accessor
213
+ ds['name'].str.upper()
214
+ ds['email'].str.contains('@gmail')
215
+ ds['text'].str.replace('old', 'new')
216
+
217
+ # DateTime operations via .dt accessor
218
+ ds['date'].dt.year
219
+ ds['date'].dt.month
220
+ ds['timestamp'].dt.hour
221
+ ```
222
+
223
+ ### Documentation
224
+
225
+ - **[Pandas Compatibility Guide](docs/PANDAS_COMPATIBILITY.md)** - Full list of supported methods
226
+ - **[Function Reference](docs/FUNCTIONS.md)** - 334 ClickHouse SQL functions
227
+ - **[Migration Guide](docs/PANDAS_MIGRATION_GUIDE.md)** - Step-by-step guide for pandas users
228
+
229
+ ---
230
+
231
+ <br>
232
+
233
+ ## SQL API
234
+
235
+ For users who prefer SQL or need advanced ClickHouse features:
236
+
237
+ ### Run in command line
238
+ > `python3 -m chdb SQL [OutputFormat]`
239
+ ```bash
240
+ python3 -m chdb "SELECT 1,'abc'" Pretty
241
+ ```
242
+
243
+ <br>
244
+
245
+ ### Data Input
246
+ The following methods are available to access on-disk and in-memory data formats:
247
+
248
+ <details>
249
+ <summary><h4>🗂️ Connection based API</h4></summary>
250
+
251
+ ```python
252
+ import chdb
253
+
254
+ # Create a connection (in-memory by default)
255
+ conn = chdb.connect(":memory:")
256
+ # Or use file-based: conn = chdb.connect("test.db")
257
+
258
+ # Create a cursor
259
+ cur = conn.cursor()
260
+
261
+ # Execute queries
262
+ cur.execute("SELECT number, toString(number) as str FROM system.numbers LIMIT 3")
263
+
264
+ # Fetch data in different ways
265
+ print(cur.fetchone()) # Single row: (0, '0')
266
+ print(cur.fetchmany(2)) # Multiple rows: ((1, '1'), (2, '2'))
267
+
268
+ # Get column information
269
+ print(cur.column_names()) # ['number', 'str']
270
+ print(cur.column_types()) # ['UInt64', 'String']
271
+
272
+ # Use the cursor as an iterator
273
+ cur.execute("SELECT number FROM system.numbers LIMIT 3")
274
+ for row in cur:
275
+ print(row)
276
+
277
+ # Always close resources when done
278
+ cur.close()
279
+ conn.close()
280
+ ```
281
+
282
+ For more details, see [examples/connect.py](examples/connect.py).
283
+ </details>
284
+
285
+
286
+ <details>
287
+ <summary><h4>🗂️ Query On File</h4> (Parquet, CSV, JSON, Arrow, ORC and 60+)</summary>
288
+
289
+ You can execute SQL and return desired format data.
290
+
291
+ ```python
292
+ import chdb
293
+ res = chdb.query('select version()', 'Pretty'); print(res)
294
+ ```
295
+
296
+ ### Work with Parquet or CSV
297
+ ```python
298
+ # See more data type format in tests/format_output.py
299
+ res = chdb.query('select * from file("data.parquet", Parquet)', 'JSON'); print(res)
300
+ res = chdb.query('select * from file("data.csv", CSV)', 'CSV'); print(res)
301
+ print(f"SQL read {res.rows_read()} rows, {res.bytes_read()} bytes, storage read {res.storage_rows_read()} rows, {res.storage_bytes_read()} bytes, elapsed {res.elapsed()} seconds")
302
+ ```
303
+
304
+ ### Parameterized queries
305
+ ```python
306
+ import chdb
307
+
308
+ df = chdb.query(
309
+ "SELECT toDate({base_date:String}) + number AS date "
310
+ "FROM numbers({total_days:UInt64}) "
311
+ "LIMIT {items_per_page:UInt64}",
312
+ "DataFrame",
313
+ params={"base_date": "2025-01-01", "total_days": 10, "items_per_page": 2},
314
+ )
315
+ print(df)
316
+ # date
317
+ # 0 2025-01-01
318
+ # 1 2025-01-02
319
+ ```
320
+
321
+ ### Query progress (`progress=auto`)
322
+ ```python
323
+ import chdb
324
+
325
+ # Connection API
326
+ conn = chdb.connect(":memory:?progress=auto")
327
+ conn.query("SELECT sum(number) FROM numbers_mt(1e10) GROUP BY number % 10 SETTINGS max_threads=4")
328
+ ```
329
+
330
+ ```python
331
+ import chdb
332
+
333
+ # One-shot API
334
+ res = chdb.query(
335
+ "SELECT sum(number) FROM numbers_mt(1e10) GROUP BY number % 10 SETTINGS max_threads=4",
336
+ options={"progress": "auto"},
337
+ )
338
+ ```
339
+
340
+ `progress=auto` behavior:
341
+ - In terminal runs: show textual progress updates in the terminal.
342
+ - Jupyter/Marimo notebook: render progress bar in notebook output.
343
+
344
+ Other progress options:
345
+ - Progress bar:
346
+ - `progress=tty`: write progress to terminal TTY.
347
+ - `progress=err`: write progress to `stderr`.
348
+ - `progress=off`: disable progress bar output.
349
+ - Progress table (terminal output):
350
+ - `progress-table=tty`: write progress table to terminal TTY.
351
+ - `progress-table=err`: write progress table to `stderr`.
352
+ - `progress-table=off`: disable progress table output.
353
+
354
+ ### Pandas dataframe output
355
+ ```python
356
+ # See more in https://clickhouse.com/docs/en/interfaces/formats
357
+ chdb.query('select * from file("data.parquet", Parquet)', 'Dataframe')
358
+ ```
359
+ </details>
360
+
361
+ <details>
362
+ <summary><h4>🗂️ Query On Table</h4> (Pandas DataFrame, Parquet file/bytes, Arrow bytes) </summary>
363
+
364
+ ### Query On Pandas DataFrame
365
+ ```python
366
+ import chdb.dataframe as cdf
367
+ import pandas as pd
368
+ # Join 2 DataFrames
369
+ df1 = pd.DataFrame({'a': [1, 2, 3], 'b': ["one", "two", "three"]})
370
+ df2 = pd.DataFrame({'c': [1, 2, 3], 'd': ["①", "②", "③"]})
371
+ ret_tbl = cdf.query(sql="select * from __tbl1__ t1 join __tbl2__ t2 on t1.a = t2.c",
372
+ tbl1=df1, tbl2=df2)
373
+ print(ret_tbl)
374
+ # Query on the DataFrame Table
375
+ print(ret_tbl.query('select b, sum(a) from __table__ group by b'))
376
+ # Pandas DataFrames are automatically registered as temporary tables in ClickHouse
377
+ chdb.query("SELECT * FROM Python(df1) t1 JOIN Python(df2) t2 ON t1.a = t2.c").show()
378
+ ```
379
+ </details>
380
+
381
+ <details>
382
+ <summary><h4>🗂️ Query with Stateful Session</h4></summary>
383
+
384
+ ```python
385
+ from chdb import session as chs
386
+
387
+ ## Create DB, Table, View in temp session, auto cleanup when session is deleted.
388
+ sess = chs.Session()
389
+ sess.query("CREATE DATABASE IF NOT EXISTS db_xxx ENGINE = Atomic")
390
+ sess.query("CREATE TABLE IF NOT EXISTS db_xxx.log_table_xxx (x String, y Int) ENGINE = Log;")
391
+ sess.query("INSERT INTO db_xxx.log_table_xxx VALUES ('a', 1), ('b', 3), ('c', 2), ('d', 5);")
392
+ sess.query(
393
+ "CREATE VIEW db_xxx.view_xxx AS SELECT * FROM db_xxx.log_table_xxx LIMIT 4;"
394
+ )
395
+ print("Select from view:\n")
396
+ print(sess.query("SELECT * FROM db_xxx.view_xxx", "Pretty"))
397
+ ```
398
+
399
+ see also: [test_stateful.py](tests/test_stateful.py).
400
+ </details>
401
+
402
+ <details>
403
+ <summary><h4>🗂️ Query with Python DB-API 2.0</h4></summary>
404
+
405
+ ```python
406
+ import chdb.dbapi as dbapi
407
+ print("chdb driver version: {0}".format(dbapi.get_client_info()))
408
+
409
+ conn1 = dbapi.connect()
410
+ cur1 = conn1.cursor()
411
+ cur1.execute('select version()')
412
+ print("description: ", cur1.description)
413
+ print("data: ", cur1.fetchone())
414
+ cur1.close()
415
+ conn1.close()
416
+ ```
417
+ </details>
418
+
419
+
420
+ <details>
421
+ <summary><h4>🗂️ Query with UDF (User Defined Functions)</h4></summary>
422
+
423
+ ```python
424
+ from chdb.udf import chdb_udf
425
+ from chdb import query
426
+
427
+ @chdb_udf()
428
+ def sum_udf(lhs, rhs):
429
+ return int(lhs) + int(rhs)
430
+
431
+ print(query("select sum_udf(12,22)"))
432
+ ```
433
+
434
+ Some notes on chDB Python UDF(User Defined Function) decorator.
435
+ 1. The function should be stateless. So, only UDFs are supported, not UDAFs(User Defined Aggregation Function).
436
+ 2. Default return type is String. If you want to change the return type, you can pass in the return type as an argument.
437
+ The return type should be one of the following: https://clickhouse.com/docs/en/sql-reference/data-types
438
+ 3. The function should take in arguments of type String. As the input is TabSeparated, all arguments are strings.
439
+ 4. The function will be called for each line of input. Something like this:
440
+ ```
441
+ def sum_udf(lhs, rhs):
442
+ return int(lhs) + int(rhs)
443
+
444
+ for line in sys.stdin:
445
+ args = line.strip().split('\t')
446
+ lhs = args[0]
447
+ rhs = args[1]
448
+ print(sum_udf(lhs, rhs))
449
+ sys.stdout.flush()
450
+ ```
451
+ 5. The function should be pure python function. You SHOULD import all python modules used IN THE FUNCTION.
452
+ ```
453
+ def func_use_json(arg):
454
+ import json
455
+ ...
456
+ ```
457
+ 6. Python interpertor used is the same as the one used to run the script. Get from `sys.executable`
458
+
459
+ see also: [test_udf.py](tests/test_udf.py).
460
+ </details>
461
+
462
+
463
+ <details>
464
+ <summary><h4>🗂️ Streaming Query</h4></summary>
465
+
466
+ Process large datasets with constant memory usage through chunked streaming.
467
+
468
+ ```python
469
+ from chdb import session as chs
470
+
471
+ sess = chs.Session()
472
+
473
+ # Example 1: Basic example of using streaming query
474
+ rows_cnt = 0
475
+ with sess.send_query("SELECT * FROM numbers(200000)", "CSV") as stream_result:
476
+ for chunk in stream_result:
477
+ rows_cnt += chunk.rows_read()
478
+
479
+ print(rows_cnt) # 200000
480
+
481
+ # Example 2: Manual iteration with fetch()
482
+ rows_cnt = 0
483
+ stream_result = sess.send_query("SELECT * FROM numbers(200000)", "CSV")
484
+ while True:
485
+ chunk = stream_result.fetch()
486
+ if chunk is None:
487
+ break
488
+ rows_cnt += chunk.rows_read()
489
+
490
+ print(rows_cnt) # 200000
491
+ ```
492
+
493
+ For more details, see [test_streaming_query.py](tests/test_streaming_query.py).
494
+ </details>
495
+
496
+
497
+ <details>
498
+ <summary><h4>🗂️ Python Table Engine</h4></summary>
499
+
500
+ ### Query on Pandas DataFrame
501
+
502
+ ```python
503
+ import chdb
504
+ import pandas as pd
505
+ df = pd.DataFrame(
506
+ {
507
+ "a": [1, 2, 3, 4, 5, 6],
508
+ "b": ["tom", "jerry", "auxten", "tom", "jerry", "auxten"],
509
+ }
510
+ )
511
+
512
+ chdb.query("SELECT b, sum(a) FROM Python(df) GROUP BY b ORDER BY b").show()
513
+ ```
514
+
515
+ ### Query on Arrow Table
516
+
517
+ ```python
518
+ import chdb
519
+ import pyarrow as pa
520
+ arrow_table = pa.table(
521
+ {
522
+ "a": [1, 2, 3, 4, 5, 6],
523
+ "b": ["tom", "jerry", "auxten", "tom", "jerry", "auxten"],
524
+ }
525
+ )
526
+
527
+ chdb.query("SELECT b, sum(a) FROM Python(arrow_table) GROUP BY b ORDER BY b").show()
528
+ ```
529
+
530
+ see also: [test_query_py.py](tests/test_query_py.py).
531
+ </details>
532
+
533
+ <details>
534
+ <summary><h4>🧠 AI-assisted SQL generation</h4></summary>
535
+
536
+ chDB can translate natural language prompts into SQL. Configure the AI client through the connection (or session) string parameters:
537
+
538
+ - `ai_provider`: `openai` or `anthropic`. Defaults to OpenAI-compatible when `ai_base_url` is set, otherwise auto-detected.
539
+ - `ai_api_key`: API key; falls back to `AI_API_KEY`, `OPENAI_API_KEY`, or `ANTHROPIC_API_KEY` env vars.
540
+ - `ai_base_url`: Custom base URL for OpenAI-compatible endpoints.
541
+ - `ai_model`: Model name (e.g., `gpt-4o-mini`, `claude-3-opus-20240229`).
542
+
543
+ ```python
544
+ import chdb
545
+
546
+ # Use env OPENAI_API_KEY/AI_API_KEY/ANTHROPIC_API_KEY for credentials
547
+ conn = chdb.connect("file::memory:?ai_provider=openai&ai_model=gpt-4o-mini")
548
+ conn.query("CREATE TABLE nums (n UInt32) ENGINE = Memory")
549
+ conn.query("INSERT INTO nums VALUES (1), (2), (3)")
550
+
551
+ sql = conn.generate_sql("Select all rows from nums ordered by n desc")
552
+ print(sql) # e.g., SELECT * FROM nums ORDER BY n DESC
553
+
554
+ # ask(): one-call generate + execute
555
+ print(conn.ask("List the numbers table", format="Pretty"))
556
+ ```
557
+
558
+ </details>
559
+
560
+ For more examples, see [examples](examples) and [tests](tests).
561
+
562
+ <br>
563
+
564
+ ## Demos and Examples
565
+
566
+ - [Project Documentation](https://clickhouse.com/docs/en/chdb) and [Usage Examples](https://clickhouse.com/docs/en/chdb/install/python)
567
+ - [Colab Notebooks](https://colab.research.google.com/drive/1-zKB6oKfXeptggXi0kUX87iR8ZTSr4P3?usp=sharing) and other [Script Examples](examples)
568
+
569
+ ## Benchmark
570
+
571
+ - [ClickBench of embedded engines](https://benchmark.clickhouse.com/#eyJzeXN0ZW0iOnsiQXRoZW5hIChwYXJ0aXRpb25lZCkiOnRydWUsIkF0aGVuYSAoc2luZ2xlKSI6dHJ1ZSwiQXVyb3JhIGZvciBNeVNRTCI6dHJ1ZSwiQXVyb3JhIGZvciBQb3N0Z3JlU1FMIjp0cnVlLCJCeXRlSG91c2UiOnRydWUsImNoREIiOnRydWUsIkNpdHVzIjp0cnVlLCJjbGlja2hvdXNlLWxvY2FsIChwYXJ0aXRpb25lZCkiOnRydWUsImNsaWNraG91c2UtbG9jYWwgKHNpbmdsZSkiOnRydWUsIkNsaWNrSG91c2UiOnRydWUsIkNsaWNrSG91c2UgKHR1bmVkKSI6dHJ1ZSwiQ2xpY2tIb3VzZSAoenN0ZCkiOnRydWUsIkNsaWNrSG91c2UgQ2xvdWQiOnRydWUsIkNsaWNrSG91c2UgKHdlYikiOnRydWUsIkNyYXRlREIiOnRydWUsIkRhdGFiZW5kIjp0cnVlLCJEYXRhRnVzaW9uIChzaW5nbGUpIjp0cnVlLCJBcGFjaGUgRG9yaXMiOnRydWUsIkRydWlkIjp0cnVlLCJEdWNrREIgKFBhcnF1ZXQpIjp0cnVlLCJEdWNrREIiOnRydWUsIkVsYXN0aWNzZWFyY2giOnRydWUsIkVsYXN0aWNzZWFyY2ggKHR1bmVkKSI6ZmFsc2UsIkdyZWVucGx1bSI6dHJ1ZSwiSGVhdnlBSSI6dHJ1ZSwiSHlkcmEiOnRydWUsIkluZm9icmlnaHQiOnRydWUsIktpbmV0aWNhIjp0cnVlLCJNYXJpYURCIENvbHVtblN0b3JlIjp0cnVlLCJNYXJpYURCIjpmYWxzZSwiTW9uZXREQiI6dHJ1ZSwiTW9uZ29EQiI6dHJ1ZSwiTXlTUUwgKE15SVNBTSkiOnRydWUsIk15U1FMIjp0cnVlLCJQaW5vdCI6dHJ1ZSwiUG9zdGdyZVNRTCI6dHJ1ZSwiUG9zdGdyZVNRTCAodHVuZWQpIjpmYWxzZSwiUXVlc3REQiAocGFydGl0aW9uZWQpIjp0cnVlLCJRdWVzdERCIjp0cnVlLCJSZWRzaGlmdCI6dHJ1ZSwiU2VsZWN0REIiOnRydWUsIlNpbmdsZVN0b3JlIjp0cnVlLCJTbm93Zmxha2UiOnRydWUsIlNRTGl0ZSI6dHJ1ZSwiU3RhclJvY2tzIjp0cnVlLCJUaW1lc2NhbGVEQiAoY29tcHJlc3Npb24pIjp0cnVlLCJUaW1lc2NhbGVEQiI6dHJ1ZX0sInR5cGUiOnsic3RhdGVsZXNzIjpmYWxzZSwibWFuYWdlZCI6ZmFsc2UsIkphdmEiOmZhbHNlLCJjb2x1bW4tb3JpZW50ZWQiOmZhbHNlLCJDKysiOmZhbHNlLCJNeVNRTCBjb21wYXRpYmxlIjpmYWxzZSwicm93LW9yaWVudGVkIjpmYWxzZSwiQyI6ZmFsc2UsIlBvc3RncmVTUUwgY29tcGF0aWJsZSI6ZmFsc2UsIkNsaWNrSG91c2UgZGVyaXZhdGl2ZSI6ZmFsc2UsImVtYmVkZGVkIjp0cnVlLCJzZXJ2ZXJsZXNzIjpmYWxzZSwiUnVzdCI6ZmFsc2UsInNlYXJjaCI6ZmFsc2UsImRvY3VtZW50IjpmYWxzZSwidGltZS1zZXJpZXMiOmZhbHNlfSwibWFjaGluZSI6eyJzZXJ2ZXJsZXNzIjp0cnVlLCIxNmFjdSI6dHJ1ZSwiTCI6dHJ1ZSwiTSI6dHJ1ZSwiUyI6dHJ1ZSwiWFMiOnRydWUsImM2YS5tZXRhbCwgNTAwZ2IgZ3AyIjp0cnVlLCJjNmEuNHhsYXJnZSwgNTAwZ2IgZ3AyIjp0cnVlLCJjNS40eGxhcmdlLCA1MDBnYiBncDIiOnRydWUsIjE2IHRocmVhZHMiOnRydWUsIjIwIHRocmVhZHMiOnRydWUsIjI0IHRocmVhZHMiOnRydWUsIjI4IHRocmVhZHMiOnRydWUsIjMwIHRocmVhZHMiOnRydWUsIjQ4IHRocmVhZHMiOnRydWUsIjYwIHRocmVhZHMiOnRydWUsIm01ZC4yNHhsYXJnZSI6dHJ1ZSwiYzVuLjR4bGFyZ2UsIDIwMGdiIGdwMiI6dHJ1ZSwiYzZhLjR4bGFyZ2UsIDE1MDBnYiBncDIiOnRydWUsImRjMi44eGxhcmdlIjp0cnVlLCJyYTMuMTZ4bGFyZ2UiOnRydWUsInJhMy40eGxhcmdlIjp0cnVlLCJyYTMueGxwbHVzIjp0cnVlLCJTMjQiOnRydWUsIlMyIjp0cnVlLCIyWEwiOnRydWUsIjNYTCI6dHJ1ZSwiNFhMIjp0cnVlLCJYTCI6dHJ1ZX0sImNsdXN0ZXJfc2l6ZSI6eyIxIjp0cnVlLCIyIjp0cnVlLCI0Ijp0cnVlLCI4Ijp0cnVlLCIxNiI6dHJ1ZSwiMzIiOnRydWUsIjY0Ijp0cnVlLCIxMjgiOnRydWUsInNlcnZlcmxlc3MiOnRydWUsInVuZGVmaW5lZCI6dHJ1ZX0sIm1ldHJpYyI6ImhvdCIsInF1ZXJpZXMiOlt0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlXX0=)
572
+
573
+ - [chDB vs Pandas](https://colab.research.google.com/drive/1FogLujJ_-ds7RGurDrUnK-U0IW8a8Qd0)
574
+
575
+ - [Benchmark on DataFrame: chDB Pandas DuckDB Polars](https://benchmark.clickhouse.com/#eyJzeXN0ZW0iOnsiQWxsb3lEQiI6dHJ1ZSwiQWxsb3lEQiAodHVuZWQpIjp0cnVlLCJBdGhlbmEgKHBhcnRpdGlvbmVkKSI6dHJ1ZSwiQXRoZW5hIChzaW5nbGUpIjp0cnVlLCJBdXJvcmEgZm9yIE15U1FMIjp0cnVlLCJBdXJvcmEgZm9yIFBvc3RncmVTUUwiOnRydWUsIkJ5Q29uaXR5Ijp0cnVlLCJCeXRlSG91c2UiOnRydWUsImNoREIgKERhdGFGcmFtZSkiOnRydWUsImNoREIgKFBhcnF1ZXQsIHBhcnRpdGlvbmVkKSI6dHJ1ZSwiY2hEQiI6dHJ1ZSwiQ2l0dXMiOnRydWUsIkNsaWNrSG91c2UgQ2xvdWQgKGF3cykiOnRydWUsIkNsaWNrSG91c2UgQ2xvdWQgKGF6dXJlKSI6dHJ1ZSwiQ2xpY2tIb3VzZSBDbG91ZCAoZ2NwKSI6dHJ1ZSwiQ2xpY2tIb3VzZSAoZGF0YSBsYWtlLCBwYXJ0aXRpb25lZCkiOnRydWUsIkNsaWNrSG91c2UgKGRhdGEgbGFrZSwgc2luZ2xlKSI6dHJ1ZSwiQ2xpY2tIb3VzZSAoUGFycXVldCwgcGFydGl0aW9uZWQpIjp0cnVlLCJDbGlja0hvdXNlIChQYXJxdWV0LCBzaW5nbGUpIjp0cnVlLCJDbGlja0hvdXNlICh3ZWIpIjp0cnVlLCJDbGlja0hvdXNlIjp0cnVlLCJDbGlja0hvdXNlICh0dW5lZCkiOnRydWUsIkNsaWNrSG91c2UgKHR1bmVkLCBtZW1vcnkpIjp0cnVlLCJDbG91ZGJlcnJ5Ijp0cnVlLCJDcmF0ZURCIjp0cnVlLCJDcnVuY2h5IEJyaWRnZSBmb3IgQW5hbHl0aWNzIChQYXJxdWV0KSI6dHJ1ZSwiRGF0YWJlbmQiOnRydWUsIkRhdGFGdXNpb24gKFBhcnF1ZXQsIHBhcnRpdGlvbmVkKSI6dHJ1ZSwiRGF0YUZ1c2lvbiAoUGFycXVldCwgc2luZ2xlKSI6dHJ1ZSwiQXBhY2hlIERvcmlzIjp0cnVlLCJEcnVpZCI6dHJ1ZSwiRHVja0RCIChEYXRhRnJhbWUpIjp0cnVlLCJEdWNrREIgKFBhcnF1ZXQsIHBhcnRpdGlvbmVkKSI6dHJ1ZSwiRHVja0RCIjp0cnVlLCJFbGFzdGljc2VhcmNoIjp0cnVlLCJFbGFzdGljc2VhcmNoICh0dW5lZCkiOmZhbHNlLCJHbGFyZURCIjp0cnVlLCJHcmVlbnBsdW0iOnRydWUsIkhlYXZ5QUkiOnRydWUsIkh5ZHJhIjp0cnVlLCJJbmZvYnJpZ2h0Ijp0cnVlLCJLaW5ldGljYSI6dHJ1ZSwiTWFyaWFEQiBDb2x1bW5TdG9yZSI6dHJ1ZSwiTWFyaWFEQiI6ZmFsc2UsIk1vbmV0REIiOnRydWUsIk1vbmdvREIiOnRydWUsIk1vdGhlcmR1Y2siOnRydWUsIk15U1FMIChNeUlTQU0pIjp0cnVlLCJNeVNRTCI6dHJ1ZSwiT3hsYSI6dHJ1ZSwiUGFuZGFzIChEYXRhRnJhbWUpIjp0cnVlLCJQYXJhZGVEQiAoUGFycXVldCwgcGFydGl0aW9uZWQpIjp0cnVlLCJQYXJhZGVEQiAoUGFycXVldCwgc2luZ2xlKSI6dHJ1ZSwiUGlub3QiOnRydWUsIlBvbGFycyAoRGF0YUZyYW1lKSI6dHJ1ZSwiUG9zdGdyZVNRTCAodHVuZWQpIjpmYWxzZSwiUG9zdGdyZVNRTCI6dHJ1ZSwiUXVlc3REQiAocGFydGl0aW9uZWQpIjp0cnVlLCJRdWVzdERCIjp0cnVlLCJSZWRzaGlmdCI6dHJ1ZSwiU2luZ2xlU3RvcmUiOnRydWUsIlNub3dmbGFrZSI6dHJ1ZSwiU1FMaXRlIjp0cnVlLCJTdGFyUm9ja3MiOnRydWUsIlRhYmxlc3BhY2UiOnRydWUsIlRlbWJvIE9MQVAgKGNvbHVtbmFyKSI6dHJ1ZSwiVGltZXNjYWxlREIgKGNvbXByZXNzaW9uKSI6dHJ1ZSwiVGltZXNjYWxlREIiOnRydWUsIlVtYnJhIjp0cnVlfSwidHlwZSI6eyJDIjpmYWxzZSwiY29sdW1uLW9yaWVudGVkIjpmYWxzZSwiUG9zdGdyZVNRTCBjb21wYXRpYmxlIjpmYWxzZSwibWFuYWdlZCI6ZmFsc2UsImdjcCI6ZmFsc2UsInN0YXRlbGVzcyI6ZmFsc2UsIkphdmEiOmZhbHNlLCJDKysiOmZhbHNlLCJNeVNRTCBjb21wYXRpYmxlIjpmYWxzZSwicm93LW9yaWVudGVkIjpmYWxzZSwiQ2xpY2tIb3VzZSBkZXJpdmF0aXZlIjpmYWxzZSwiZW1iZWRkZWQiOmZhbHNlLCJzZXJ2ZXJsZXNzIjpmYWxzZSwiZGF0YWZyYW1lIjp0cnVlLCJhd3MiOmZhbHNlLCJhenVyZSI6ZmFsc2UsImFuYWx5dGljYWwiOmZhbHNlLCJSdXN0IjpmYWxzZSwic2VhcmNoIjpmYWxzZSwiZG9jdW1lbnQiOmZhbHNlLCJzb21ld2hhdCBQb3N0Z3JlU1FMIGNvbXBhdGlibGUiOmZhbHNlLCJ0aW1lLXNlcmllcyI6ZmFsc2V9LCJtYWNoaW5lIjp7IjE2IHZDUFUgMTI4R0IiOnRydWUsIjggdkNQVSA2NEdCIjp0cnVlLCJzZXJ2ZXJsZXNzIjp0cnVlLCIxNmFjdSI6dHJ1ZSwiYzZhLjR4bGFyZ2UsIDUwMGdiIGdwMiI6dHJ1ZSwiTCI6dHJ1ZSwiTSI6dHJ1ZSwiUyI6dHJ1ZSwiWFMiOnRydWUsImM2YS5tZXRhbCwgNTAwZ2IgZ3AyIjp0cnVlLCIxOTJHQiI6dHJ1ZSwiMjRHQiI6dHJ1ZSwiMzYwR0IiOnRydWUsIjQ4R0IiOnRydWUsIjcyMEdCIjp0cnVlLCI5NkdCIjp0cnVlLCJkZXYiOnRydWUsIjcwOEdCIjp0cnVlLCJjNW4uNHhsYXJnZSwgNTAwZ2IgZ3AyIjp0cnVlLCJBbmFseXRpY3MtMjU2R0IgKDY0IHZDb3JlcywgMjU2IEdCKSI6dHJ1ZSwiYzUuNHhsYXJnZSwgNTAwZ2IgZ3AyIjp0cnVlLCJjNmEuNHhsYXJnZSwgMTUwMGdiIGdwMiI6dHJ1ZSwiY2xvdWQiOnRydWUsImRjMi44eGxhcmdlIjp0cnVlLCJyYTMuMTZ4bGFyZ2UiOnRydWUsInJhMy40eGxhcmdlIjp0cnVlLCJyYTMueGxwbHVzIjp0cnVlLCJTMiI6dHJ1ZSwiUzI0Ijp0cnVlLCIyWEwiOnRydWUsIjNYTCI6dHJ1ZSwiNFhMIjp0cnVlLCJYTCI6dHJ1ZSwiTDEgLSAxNkNQVSAzMkdCIjp0cnVlLCJjNmEuNHhsYXJnZSwgNTAwZ2IgZ3AzIjp0cnVlfSwiY2x1c3Rlcl9zaXplIjp7IjEiOnRydWUsIjIiOnRydWUsIjQiOnRydWUsIjgiOnRydWUsIjE2Ijp0cnVlLCIzMiI6dHJ1ZSwiNjQiOnRydWUsIjEyOCI6dHJ1ZSwic2VydmVybGVzcyI6dHJ1ZX0sIm1ldHJpYyI6ImhvdCIsInF1ZXJpZXMiOlt0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlLHRydWUsdHJ1ZSx0cnVlXX0=)
576
+
577
+
578
+ <div align="center">
579
+ <img src="https://github.com/chdb-io/chdb/raw/main/docs/_static/df_bench.png" width="800">
580
+ </div>
581
+
582
+
583
+ ## Documentation
584
+ - For chdb specific examples and documentation refer to [chDB docs](https://clickhouse.com/docs/en/chdb)
585
+ - For SQL syntax, please refer to [ClickHouse SQL Reference](https://clickhouse.com/docs/en/sql-reference/syntax)
586
+ - For DataStore API, see [Pandas Compatibility Guide](docs/PANDAS_COMPATIBILITY.md)
587
+
588
+
589
+ ## AI Coding Agent Skill
590
+
591
+ chdb provides an [AI Skill](agent/skills/using-chdb/) that teaches AI coding agents (Cursor, Claude Code, etc.) chdb's multi-source data analytics API. Install it so your AI assistant can write correct chdb code out of the box:
592
+
593
+ ```bash
594
+ curl -sL https://raw.githubusercontent.com/chdb-io/chdb/main/install_skill.sh | bash
595
+ ```
596
+
597
+
598
+ ## Events
599
+
600
+ - Demo chDB at [ClickHouse v23.7 livehouse!](https://t.co/todc13Kn19) and [Slides](https://docs.google.com/presentation/d/1ikqjOlimRa7QAg588TAB_Fna-Tad2WMg7_4AgnbQbFA/edit?usp=sharing)
601
+
602
+ ## Contributing
603
+ Contributions are what make the open source community such an amazing place to be learn, inspire, and create. Any contributions you make are **greatly appreciated**.
604
+ There are something you can help:
605
+ - [ ] Help test and report bugs
606
+ - [ ] Help improve documentation
607
+ - [ ] Help improve code quality and performance
608
+
609
+ ### Bindings
610
+
611
+ We welcome bindings for other languages, please refer to [bindings](bindings.md) for more details.
612
+
613
+ ## Version Guide
614
+
615
+ Please refer to [VERSION-GUIDE.md](VERSION-GUIDE.md) for more details.
616
+
617
+ ## Paper
618
+
619
+ - [ClickHouse - Lightning Fast Analytics for Everyone](https://www.vldb.org/pvldb/vol17/p3731-schulze.pdf)
620
+
621
+ ## License
622
+ Apache 2.0, see [LICENSE](LICENSE.txt) for more information.
623
+
624
+ ## Acknowledgments
625
+ chDB is mainly based on [ClickHouse](https://github.com/ClickHouse/ClickHouse) [^1]
626
+ for trade mark and other reasons, I named it chDB.
627
+
628
+ ## Contact
629
+ - Discord: [https://discord.gg/D2Daa2fM5K](https://discord.gg/D2Daa2fM5K)
630
+ - Email: auxten@clickhouse.com
631
+ - Twitter: [@chdb](https://twitter.com/chdb_io)
632
+
633
+
634
+ <br>
635
+
636
+ [^1]: ClickHouse® is a trademark of ClickHouse Inc. All trademarks, service marks, and logos mentioned or depicted are the property of their respective owners. The use of any third-party trademarks, brand names, product names, and company names does not imply endorsement, affiliation, or association with the respective owners.