opteryx-core 0.5.0__cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (364) hide show
  1. opteryx/__init__.py +344 -0
  2. opteryx/__main__.py +212 -0
  3. opteryx/__version__.py +13 -0
  4. opteryx/command.py +11 -0
  5. opteryx/compiled/__init__.py +1 -0
  6. opteryx/compiled/aggregations/__init__.py +0 -0
  7. opteryx/compiled/aggregations/count_distinct.cpp +29589 -0
  8. opteryx/compiled/aggregations/count_distinct.cpython-313-x86_64-linux-gnu.so +0 -0
  9. opteryx/compiled/aggregations/count_distinct.pyi +1 -0
  10. opteryx/compiled/aggregations/count_distinct.pyx +84 -0
  11. opteryx/compiled/aggregations/group_by_draken.py +225 -0
  12. opteryx/compiled/aggregations/group_by_draken.pyx +262 -0
  13. opteryx/compiled/functions/__init__.py +0 -0
  14. opteryx/compiled/functions/strings.cpp +28527 -0
  15. opteryx/compiled/functions/strings.cpython-313-x86_64-linux-gnu.so +0 -0
  16. opteryx/compiled/functions/strings.pyx +122 -0
  17. opteryx/compiled/functions/timestamp.c +11280 -0
  18. opteryx/compiled/functions/timestamp.cpython-313-x86_64-linux-gnu.so +0 -0
  19. opteryx/compiled/functions/timestamp.pyx +201 -0
  20. opteryx/compiled/functions/vectors.cpp +33139 -0
  21. opteryx/compiled/functions/vectors.cpython-313-x86_64-linux-gnu.so +0 -0
  22. opteryx/compiled/functions/vectors.pyx +302 -0
  23. opteryx/compiled/io/__init__.py +30 -0
  24. opteryx/compiled/io/disk_reader.cpp +32229 -0
  25. opteryx/compiled/io/disk_reader.cpython-313-x86_64-linux-gnu.so +0 -0
  26. opteryx/compiled/io/disk_reader.pyx +287 -0
  27. opteryx/compiled/joins/__init__.py +14 -0
  28. opteryx/compiled/joins/cross_join.pyx +315 -0
  29. opteryx/compiled/joins/filter_join.pyx +68 -0
  30. opteryx/compiled/joins/inner_join.pyx +134 -0
  31. opteryx/compiled/joins/join_definitions.cpython-313-x86_64-linux-gnu.so +0 -0
  32. opteryx/compiled/joins/joins.cpp +46166 -0
  33. opteryx/compiled/joins/joins.pyx +9 -0
  34. opteryx/compiled/joins/nested_loop_join_equals.pyx +65 -0
  35. opteryx/compiled/joins/nested_loop_join_non_eqi.pyx +101 -0
  36. opteryx/compiled/joins/outer_join.pyx +116 -0
  37. opteryx/compiled/list_ops/__init__.py +17 -0
  38. opteryx/compiled/list_ops/function_definitions.cpython-313-x86_64-linux-gnu.so +0 -0
  39. opteryx/compiled/list_ops/list_allop_eq.pyx +170 -0
  40. opteryx/compiled/list_ops/list_allop_neq.pyx +193 -0
  41. opteryx/compiled/list_ops/list_anyop_eq.pyx +278 -0
  42. opteryx/compiled/list_ops/list_anyop_gt.pyx +178 -0
  43. opteryx/compiled/list_ops/list_anyop_gte.pyx +178 -0
  44. opteryx/compiled/list_ops/list_anyop_lt.pyx +178 -0
  45. opteryx/compiled/list_ops/list_anyop_lte.pyx +178 -0
  46. opteryx/compiled/list_ops/list_anyop_neq.pyx +309 -0
  47. opteryx/compiled/list_ops/list_arrow_op.pyx +45 -0
  48. opteryx/compiled/list_ops/list_cast_int64_to_string.pyx +69 -0
  49. opteryx/compiled/list_ops/list_cast_string_to_int.pyx +76 -0
  50. opteryx/compiled/list_ops/list_cast_uint64_to_string.pyx +63 -0
  51. opteryx/compiled/list_ops/list_contains_all.pyx +54 -0
  52. opteryx/compiled/list_ops/list_contains_any.pyx +43 -0
  53. opteryx/compiled/list_ops/list_date_diff.pyx +76 -0
  54. opteryx/compiled/list_ops/list_encode_utf8.pyx +27 -0
  55. opteryx/compiled/list_ops/list_get_element.pyx +47 -0
  56. opteryx/compiled/list_ops/list_hash.pyx +303 -0
  57. opteryx/compiled/list_ops/list_in_list.pyx +138 -0
  58. opteryx/compiled/list_ops/list_in_string.pyx +508 -0
  59. opteryx/compiled/list_ops/list_initcap.pyx +110 -0
  60. opteryx/compiled/list_ops/list_ip_in_cidr.pyx +87 -0
  61. opteryx/compiled/list_ops/list_length.pyx +54 -0
  62. opteryx/compiled/list_ops/list_levenshtein.pyx +87 -0
  63. opteryx/compiled/list_ops/list_long_arrow_op.pyx +42 -0
  64. opteryx/compiled/list_ops/list_ops.cpp +80184 -0
  65. opteryx/compiled/list_ops/list_ops.pyx +33 -0
  66. opteryx/compiled/list_ops/list_random_string.pyx +58 -0
  67. opteryx/compiled/list_ops/list_regex_replace.pyx +104 -0
  68. opteryx/compiled/list_ops/list_replace.pyx +81 -0
  69. opteryx/compiled/list_ops/list_soundex.pyx +37 -0
  70. opteryx/compiled/list_ops/list_string_slice.pyx +107 -0
  71. opteryx/compiled/simd_probe.cpp +7674 -0
  72. opteryx/compiled/simd_probe.cpython-313-x86_64-linux-gnu.so +0 -0
  73. opteryx/compiled/simd_probe.pyx +34 -0
  74. opteryx/compiled/structures/__init__.py +0 -0
  75. opteryx/compiled/structures/bloom_filter.c +33376 -0
  76. opteryx/compiled/structures/bloom_filter.cpython-313-x86_64-linux-gnu.so +0 -0
  77. opteryx/compiled/structures/bloom_filter.pxd +26 -0
  78. opteryx/compiled/structures/bloom_filter.pyx +197 -0
  79. opteryx/compiled/structures/buffers.cpp +38029 -0
  80. opteryx/compiled/structures/buffers.cpython-313-x86_64-linux-gnu.so +0 -0
  81. opteryx/compiled/structures/buffers.pxd +67 -0
  82. opteryx/compiled/structures/buffers.pyx +232 -0
  83. opteryx/compiled/structures/hash_table.cpp +34303 -0
  84. opteryx/compiled/structures/hash_table.cpython-313-x86_64-linux-gnu.so +0 -0
  85. opteryx/compiled/structures/hash_table.pxd +19 -0
  86. opteryx/compiled/structures/hash_table.pyx +75 -0
  87. opteryx/compiled/structures/lru_k.cpp +14869 -0
  88. opteryx/compiled/structures/lru_k.cpython-313-x86_64-linux-gnu.so +0 -0
  89. opteryx/compiled/structures/lru_k.pyx +288 -0
  90. opteryx/compiled/structures/memory_pool.cpp +42090 -0
  91. opteryx/compiled/structures/memory_pool.cpython-313-x86_64-linux-gnu.so +0 -0
  92. opteryx/compiled/structures/memory_pool.pyx +716 -0
  93. opteryx/compiled/structures/memory_view_stream.c +35019 -0
  94. opteryx/compiled/structures/memory_view_stream.cpython-313-x86_64-linux-gnu.so +0 -0
  95. opteryx/compiled/structures/memory_view_stream.pyx +264 -0
  96. opteryx/compiled/structures/node.c +14653 -0
  97. opteryx/compiled/structures/node.cpython-313-x86_64-linux-gnu.so +0 -0
  98. opteryx/compiled/structures/node.pyx +171 -0
  99. opteryx/compiled/structures/relation_statistics.cpp +16573 -0
  100. opteryx/compiled/structures/relation_statistics.cpython-313-x86_64-linux-gnu.so +0 -0
  101. opteryx/compiled/structures/relation_statistics.pyx +292 -0
  102. opteryx/compiled/table_ops/distinct.cpp +30196 -0
  103. opteryx/compiled/table_ops/distinct.cpython-313-x86_64-linux-gnu.so +0 -0
  104. opteryx/compiled/table_ops/distinct.pyx +82 -0
  105. opteryx/compiled/table_ops/hash_ops.cpp +32005 -0
  106. opteryx/compiled/table_ops/hash_ops.cpython-313-x86_64-linux-gnu.so +0 -0
  107. opteryx/compiled/table_ops/hash_ops.pxd +12 -0
  108. opteryx/compiled/table_ops/hash_ops.pyx +415 -0
  109. opteryx/compiled/table_ops/null_avoidant_ops.cpp +31984 -0
  110. opteryx/compiled/table_ops/null_avoidant_ops.cpython-313-x86_64-linux-gnu.so +0 -0
  111. opteryx/compiled/table_ops/null_avoidant_ops.pxd +13 -0
  112. opteryx/compiled/table_ops/null_avoidant_ops.pyx +77 -0
  113. opteryx/compute.cpython-313-x86_64-linux-gnu.so +0 -0
  114. opteryx/config.py +250 -0
  115. opteryx/connection.py +125 -0
  116. opteryx/connectors/__init__.py +385 -0
  117. opteryx/connectors/base/__init__.py +1 -0
  118. opteryx/connectors/base/base_connector.py +232 -0
  119. opteryx/connectors/capabilities/__init__.py +20 -0
  120. opteryx/connectors/capabilities/asynchronous.py +20 -0
  121. opteryx/connectors/capabilities/diachronic.py +19 -0
  122. opteryx/connectors/capabilities/eidetic.py +43 -0
  123. opteryx/connectors/capabilities/limit_pushable.py +9 -0
  124. opteryx/connectors/capabilities/predicate_pushable.py +143 -0
  125. opteryx/connectors/capabilities/statistics.py +154 -0
  126. opteryx/connectors/catalogs/__init__.py +23 -0
  127. opteryx/connectors/catalogs/gcs_catalog.py +35 -0
  128. opteryx/connectors/catalogs/local_catalog.py +251 -0
  129. opteryx/connectors/catalogs/s3_catalog.py +39 -0
  130. opteryx/connectors/filesystem_connector.py +469 -0
  131. opteryx/connectors/iceberg_connector.py +640 -0
  132. opteryx/connectors/io_systems/__init__.py +16 -0
  133. opteryx/connectors/io_systems/gcs_filesystem.py +197 -0
  134. opteryx/connectors/io_systems/local_filesystem.py +224 -0
  135. opteryx/connectors/io_systems/s3_filesystem.py +112 -0
  136. opteryx/connectors/opteryx_connector.py +678 -0
  137. opteryx/connectors/virtual_data.py +189 -0
  138. opteryx/connectors/virtual_data_connector.py +189 -0
  139. opteryx/constants/__init__.py +10 -0
  140. opteryx/constants/character_set.py +577 -0
  141. opteryx/constants/permissions.py +24 -0
  142. opteryx/constants/query_status.py +13 -0
  143. opteryx/constants/result_type.py +13 -0
  144. opteryx/cursor.py +600 -0
  145. opteryx/datatypes/__init__.py +0 -0
  146. opteryx/datatypes/intervals.py +201 -0
  147. opteryx/debugging.py +83 -0
  148. opteryx/draken/__init__.py +39 -0
  149. opteryx/draken/compiled/__init__.py +0 -0
  150. opteryx/draken/core/buffers.pxd +53 -0
  151. opteryx/draken/core/fixed_vector.pxd +62 -0
  152. opteryx/draken/core/ops.pxd +33 -0
  153. opteryx/draken/core/var_vector.pxd +73 -0
  154. opteryx/draken/evaluators/__init__.py +29 -0
  155. opteryx/draken/evaluators/compiled/__init__.py +9 -0
  156. opteryx/draken/evaluators/evaluator.py +420 -0
  157. opteryx/draken/evaluators/expression.py +162 -0
  158. opteryx/draken/evaluators/generator.py +252 -0
  159. opteryx/draken/interop/arrow.cpython-313-x86_64-linux-gnu.so +0 -0
  160. opteryx/draken/interop/arrow.pxd +5 -0
  161. opteryx/draken/interop/arrow_c_data_interface.pxd +32 -0
  162. opteryx/draken/morsels/__init__.py +10 -0
  163. opteryx/draken/morsels/align.cpython-313-x86_64-linux-gnu.so +0 -0
  164. opteryx/draken/morsels/morsel.cpython-313-x86_64-linux-gnu.so +0 -0
  165. opteryx/draken/morsels/morsel.pxd +23 -0
  166. opteryx/draken/vectors/__init__.py +29 -0
  167. opteryx/draken/vectors/_hash_api.cpp +27613 -0
  168. opteryx/draken/vectors/_hash_api.cpython-313-x86_64-linux-gnu.so +0 -0
  169. opteryx/draken/vectors/_hash_api.pyx +33 -0
  170. opteryx/draken/vectors/array_vector.cpython-313-x86_64-linux-gnu.so +0 -0
  171. opteryx/draken/vectors/array_vector.pxd +28 -0
  172. opteryx/draken/vectors/arrow_vector.py +187 -0
  173. opteryx/draken/vectors/bool_mask.py +70 -0
  174. opteryx/draken/vectors/bool_vector.cpython-313-x86_64-linux-gnu.so +0 -0
  175. opteryx/draken/vectors/bool_vector.pxd +29 -0
  176. opteryx/draken/vectors/date32_vector.cpython-313-x86_64-linux-gnu.so +0 -0
  177. opteryx/draken/vectors/date32_vector.pxd +32 -0
  178. opteryx/draken/vectors/float64_vector.cpython-313-x86_64-linux-gnu.so +0 -0
  179. opteryx/draken/vectors/float64_vector.pxd +42 -0
  180. opteryx/draken/vectors/int64_vector.cpython-313-x86_64-linux-gnu.so +0 -0
  181. opteryx/draken/vectors/int64_vector.pxd +43 -0
  182. opteryx/draken/vectors/interval_vector.cpython-313-x86_64-linux-gnu.so +0 -0
  183. opteryx/draken/vectors/interval_vector.pxd +28 -0
  184. opteryx/draken/vectors/string_vector.cpython-313-x86_64-linux-gnu.so +0 -0
  185. opteryx/draken/vectors/string_vector.pxd +103 -0
  186. opteryx/draken/vectors/time_vector.cpython-313-x86_64-linux-gnu.so +0 -0
  187. opteryx/draken/vectors/time_vector.pxd +25 -0
  188. opteryx/draken/vectors/timestamp_vector.cpython-313-x86_64-linux-gnu.so +0 -0
  189. opteryx/draken/vectors/timestamp_vector.pxd +34 -0
  190. opteryx/draken/vectors/vector.cpython-313-x86_64-linux-gnu.so +0 -0
  191. opteryx/draken/vectors/vector.pxd +37 -0
  192. opteryx/exceptions.py +434 -0
  193. opteryx/functions/__init__.py +735 -0
  194. opteryx/functions/date_functions.py +247 -0
  195. opteryx/functions/number_functions.py +116 -0
  196. opteryx/functions/other_functions.py +385 -0
  197. opteryx/functions/string_functions.py +342 -0
  198. opteryx/managers/__init__.py +4 -0
  199. opteryx/managers/execution/__init__.py +24 -0
  200. opteryx/managers/execution/parallel_engine.py +219 -0
  201. opteryx/managers/execution/serial_engine.py +150 -0
  202. opteryx/managers/expression/__init__.py +513 -0
  203. opteryx/managers/expression/binary_operators.py +222 -0
  204. opteryx/managers/expression/formatter.py +147 -0
  205. opteryx/managers/expression/ops.py +381 -0
  206. opteryx/managers/expression/unary_operations.py +128 -0
  207. opteryx/managers/kvstores/__init__.py +22 -0
  208. opteryx/managers/kvstores/base_kv_store.py +52 -0
  209. opteryx/managers/kvstores/factory.py +47 -0
  210. opteryx/managers/kvstores/file_kv_store.py +91 -0
  211. opteryx/managers/kvstores/gcs_kv_store.py +87 -0
  212. opteryx/managers/kvstores/null_cache.py +23 -0
  213. opteryx/managers/kvstores/s3_kv_store.py +112 -0
  214. opteryx/managers/kvstores/valkey.py +93 -0
  215. opteryx/managers/permissions/__init__.py +88 -0
  216. opteryx/models/__init__.py +24 -0
  217. opteryx/models/connection_context.py +67 -0
  218. opteryx/models/logical_column.py +93 -0
  219. opteryx/models/non_tabular_result.py +16 -0
  220. opteryx/models/physical_plan.py +100 -0
  221. opteryx/models/query_properties.py +25 -0
  222. opteryx/models/query_telemetry.py +77 -0
  223. opteryx/operators/__init__.py +168 -0
  224. opteryx/operators/aggregate_and_group_node.py +282 -0
  225. opteryx/operators/aggregate_node.py +243 -0
  226. opteryx/operators/base_plan_node.py +220 -0
  227. opteryx/operators/cross_join_node.py +164 -0
  228. opteryx/operators/distinct_node.py +65 -0
  229. opteryx/operators/exit_node.py +141 -0
  230. opteryx/operators/explain_node.py +40 -0
  231. opteryx/operators/filter_join_node.py +82 -0
  232. opteryx/operators/filter_node.py +87 -0
  233. opteryx/operators/function_dataset_node.py +138 -0
  234. opteryx/operators/heap_sort_node.py +126 -0
  235. opteryx/operators/iceberg_read_node.py +236 -0
  236. opteryx/operators/inner_join_node.py +174 -0
  237. opteryx/operators/limit_node.py +66 -0
  238. opteryx/operators/nested_loop_join_node.py +102 -0
  239. opteryx/operators/non_equi_join_node.py +128 -0
  240. opteryx/operators/null_reader_node.py +105 -0
  241. opteryx/operators/outer_join_node.py +251 -0
  242. opteryx/operators/projection_node.py +65 -0
  243. opteryx/operators/read_node.py +289 -0
  244. opteryx/operators/set_variable_node.py +37 -0
  245. opteryx/operators/show_columns_node.py +90 -0
  246. opteryx/operators/show_create_node.py +51 -0
  247. opteryx/operators/show_value_node.py +48 -0
  248. opteryx/operators/simple_aggregate_and_group_node.py +199 -0
  249. opteryx/operators/simple_aggregate_node.py +193 -0
  250. opteryx/operators/sort_node.py +106 -0
  251. opteryx/operators/union_node.py +60 -0
  252. opteryx/operators/unnest_join_node.py +282 -0
  253. opteryx/planner/__init__.py +214 -0
  254. opteryx/planner/ast_rewriter.py +359 -0
  255. opteryx/planner/binder/__init__.py +227 -0
  256. opteryx/planner/binder/binder.py +486 -0
  257. opteryx/planner/binder/binder_visitor.py +1338 -0
  258. opteryx/planner/binder/binding_context.py +73 -0
  259. opteryx/planner/binder/operator_map.py +357 -0
  260. opteryx/planner/logical_planner/__init__.py +44 -0
  261. opteryx/planner/logical_planner/logical_planner.py +1229 -0
  262. opteryx/planner/logical_planner/logical_planner_builders.py +996 -0
  263. opteryx/planner/logical_planner/logical_planner_renderers.py +247 -0
  264. opteryx/planner/logical_planner/logical_planner_rewriter.py +117 -0
  265. opteryx/planner/optimizer/__init__.py +178 -0
  266. opteryx/planner/optimizer/bench/__init__.py +0 -0
  267. opteryx/planner/optimizer/bench/cost_model.py +74 -0
  268. opteryx/planner/optimizer/bench/empty_table.py +140 -0
  269. opteryx/planner/optimizer/bench/remote_database_pushdown.py +465 -0
  270. opteryx/planner/optimizer/strategies/__init__.py +33 -0
  271. opteryx/planner/optimizer/strategies/boolean_simplication.py +326 -0
  272. opteryx/planner/optimizer/strategies/constant_folding.py +340 -0
  273. opteryx/planner/optimizer/strategies/correlated_filters.py +182 -0
  274. opteryx/planner/optimizer/strategies/distinct_pushdown.py +84 -0
  275. opteryx/planner/optimizer/strategies/join_ordering.py +74 -0
  276. opteryx/planner/optimizer/strategies/join_rewriter.py +76 -0
  277. opteryx/planner/optimizer/strategies/limit_pushdown.py +189 -0
  278. opteryx/planner/optimizer/strategies/operator_fusion.py +54 -0
  279. opteryx/planner/optimizer/strategies/optimization_strategy.py +85 -0
  280. opteryx/planner/optimizer/strategies/predicate_compaction.py +480 -0
  281. opteryx/planner/optimizer/strategies/predicate_ordering.py +213 -0
  282. opteryx/planner/optimizer/strategies/predicate_pushdown.py +442 -0
  283. opteryx/planner/optimizer/strategies/predicate_rewriter.py +559 -0
  284. opteryx/planner/optimizer/strategies/projection_pushdown.py +151 -0
  285. opteryx/planner/optimizer/strategies/redundant_operators.py +93 -0
  286. opteryx/planner/optimizer/strategies/split_conjunctive_predicates.py +90 -0
  287. opteryx/planner/physical_planner.py +112 -0
  288. opteryx/planner/sql_rewriter.py +226 -0
  289. opteryx/planner/views/__init__.py +67 -0
  290. opteryx/rugo/__init__.py +0 -0
  291. opteryx/rugo/converters/__init__.py +7 -0
  292. opteryx/rugo/converters/orso.py +331 -0
  293. opteryx/rugo/jsonl.cpython-313-x86_64-linux-gnu.so +0 -0
  294. opteryx/rugo/parquet.cpython-313-x86_64-linux-gnu.so +0 -0
  295. opteryx/shared/__init__.py +10 -0
  296. opteryx/shared/async_memory_pool.py +49 -0
  297. opteryx/shared/materialized_datasets.py +21 -0
  298. opteryx/shared/variables.py +157 -0
  299. opteryx/third_party/__init__.py +0 -0
  300. opteryx/third_party/abseil/containers.cpp +12501 -0
  301. opteryx/third_party/abseil/containers.cpython-313-x86_64-linux-gnu.so +0 -0
  302. opteryx/third_party/abseil/containers.pxd +63 -0
  303. opteryx/third_party/abseil/containers.pyx +110 -0
  304. opteryx/third_party/alantsd/base64.c +8666 -0
  305. opteryx/third_party/alantsd/base64.cpython-313-x86_64-linux-gnu.so +0 -0
  306. opteryx/third_party/alantsd/base64.pxd +14 -0
  307. opteryx/third_party/alantsd/base64.pyx +65 -0
  308. opteryx/third_party/cyan4973/xxhash.c +8293 -0
  309. opteryx/third_party/cyan4973/xxhash.cpython-313-x86_64-linux-gnu.so +0 -0
  310. opteryx/third_party/cyan4973/xxhash.pxd +12 -0
  311. opteryx/third_party/cyan4973/xxhash.pyx +27 -0
  312. opteryx/third_party/facebook/zstd.cpp +8823 -0
  313. opteryx/third_party/facebook/zstd.cpython-313-x86_64-linux-gnu.so +0 -0
  314. opteryx/third_party/facebook/zstd.pyx +75 -0
  315. opteryx/third_party/fastfloat/fast_float.cpp +31231 -0
  316. opteryx/third_party/fastfloat/fast_float.cpython-313-x86_64-linux-gnu.so +0 -0
  317. opteryx/third_party/fastfloat/fast_float.pxd +21 -0
  318. opteryx/third_party/fastfloat/fast_float.pyx +105 -0
  319. opteryx/third_party/fuzzy/soundex.c +8454 -0
  320. opteryx/third_party/fuzzy/soundex.pyx +86 -0
  321. opteryx/third_party/fuzzy.cpython-313-x86_64-linux-gnu.so +0 -0
  322. opteryx/third_party/mbleven.py +93 -0
  323. opteryx/third_party/query_builder/__init__.py +1 -0
  324. opteryx/third_party/query_builder/builder.py +147 -0
  325. opteryx/third_party/query_builder/test_builder.py +120 -0
  326. opteryx/third_party/sqloxide/__init__.py +13 -0
  327. opteryx/third_party/tktech/csimdjson.cpp +38578 -0
  328. opteryx/third_party/tktech/csimdjson.cpython-313-x86_64-linux-gnu.so +0 -0
  329. opteryx/third_party/tktech/csimdjson.pxd +119 -0
  330. opteryx/third_party/tktech/csimdjson.pyx +568 -0
  331. opteryx/third_party/travers/__init__.py +1 -0
  332. opteryx/third_party/travers/graph.py +602 -0
  333. opteryx/third_party/ulfjack/ryu.c +31341 -0
  334. opteryx/third_party/ulfjack/ryu.cpython-313-x86_64-linux-gnu.so +0 -0
  335. opteryx/third_party/ulfjack/ryu.pyx +89 -0
  336. opteryx/utils/__init__.py +90 -0
  337. opteryx/utils/arrow.py +161 -0
  338. opteryx/utils/dates.py +199 -0
  339. opteryx/utils/dnf.py +327 -0
  340. opteryx/utils/file_decoders.py +411 -0
  341. opteryx/utils/formatter.py +256 -0
  342. opteryx/utils/free_threading.py +45 -0
  343. opteryx/utils/mermaid.py +60 -0
  344. opteryx/utils/paths.py +141 -0
  345. opteryx/utils/query_parser.py +309 -0
  346. opteryx/utils/series.py +80 -0
  347. opteryx/utils/sql.py +220 -0
  348. opteryx/virtual_datasets/__init__.py +36 -0
  349. opteryx/virtual_datasets/derived_data.py +25 -0
  350. opteryx/virtual_datasets/no_table_data.py +42 -0
  351. opteryx/virtual_datasets/planet_data.py +188 -0
  352. opteryx/virtual_datasets/stop_words.py +356 -0
  353. opteryx/virtual_datasets/telemetry.py +52 -0
  354. opteryx/virtual_datasets/user.py +46 -0
  355. opteryx/virtual_datasets/variables_data.py +58 -0
  356. opteryx_core/__init__.py +55 -0
  357. opteryx_core/__version__.py +8 -0
  358. opteryx_core-0.5.0.dist-info/METADATA +36 -0
  359. opteryx_core-0.5.0.dist-info/RECORD +364 -0
  360. opteryx_core-0.5.0.dist-info/WHEEL +6 -0
  361. opteryx_core-0.5.0.dist-info/licenses/LICENSE +201 -0
  362. opteryx_core-0.5.0.dist-info/sboms/auditwheel.cdx.json +1 -0
  363. opteryx_core-0.5.0.dist-info/top_level.txt +2 -0
  364. opteryx_core.libs/libcrypto-d3570994.so.1.0.2k +0 -0
opteryx/__init__.py ADDED
@@ -0,0 +1,344 @@
1
+ # isort: skip_file
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # See the License at http://www.apache.org/licenses/LICENSE-2.0
5
+ # Distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND.
6
+
7
+ """
8
+ Opteryx is a SQL query engine optimized for speed and efficiency.
9
+
10
+ To get started:
11
+ import opteryx
12
+ results = opteryx.query("SELECT * FROM my_table")
13
+
14
+ Opteryx handles parsing, planning, and execution of SQL queries with a focus on low-latency analytics over
15
+ local or remote data sources.
16
+
17
+ For more information check out https://opteryx.dev.
18
+ """
19
+
20
+ import datetime
21
+ import os
22
+ import platform
23
+ import secrets
24
+ import time
25
+ import warnings
26
+ from pathlib import Path
27
+
28
+ from decimal import getcontext
29
+ from typing import Optional, Union, Dict, Any, List, TYPE_CHECKING, Iterable
30
+
31
+ if TYPE_CHECKING: # pragma: no cover - only for type checkers
32
+ import pyarrow
33
+
34
+ # Set Decimal precision to 28 globally
35
+ getcontext().prec = 28
36
+
37
+
38
+ # end-of-stream marker
39
+ def _generate_eos_marker() -> int:
40
+ """Generate a random 64-bit signed end-of-stream marker."""
41
+ return secrets.randbits(64) - (1 << 63)
42
+
43
+
44
+ EOS: int = _generate_eos_marker()
45
+
46
+
47
+ def is_mac() -> bool: # pragma: no cover
48
+ """
49
+ Check if the current platform is macOS.
50
+
51
+ Returns:
52
+ bool: True if the platform is macOS, False otherwise.
53
+ """
54
+ return platform.system().lower() == "darwin"
55
+
56
+
57
+ # we do a separate check for debug mode here so we don't load the config module just yet
58
+ OPTERYX_DEBUG = os.environ.get("OPTERYX_DEBUG") is not None
59
+
60
+
61
+ # python-dotenv allows us to create an environment file to store secrets.
62
+ # Only try to import dotenv if a .env file exists to avoid paying the
63
+ # import cost when no environment file is present.
64
+ _env_path = Path.cwd() / ".env"
65
+ if _env_path.exists():
66
+ try:
67
+ import dotenv # type:ignore
68
+
69
+ dotenv.load_dotenv(dotenv_path=_env_path)
70
+ if OPTERYX_DEBUG:
71
+ print(f"{datetime.datetime.now()} [LOADER] Loading `.env` file.")
72
+ except ImportError: # pragma: no cover
73
+ # dotenv is optional; if it's not installed, just continue.
74
+ pass
75
+
76
+
77
+ if OPTERYX_DEBUG: # pragma: no cover
78
+ from opteryx.debugging import OpteryxOrsoImportFinder
79
+
80
+ from opteryx import config
81
+
82
+ from opteryx.connectors import register_workspace
83
+ from opteryx.connectors import set_default_connector
84
+
85
+ from opteryx.__version__ import __author__
86
+ from opteryx.__version__ import __build__
87
+ from opteryx.__version__ import __version__
88
+ from opteryx.__version__ import __lib__
89
+
90
+
91
+ __all__ = [
92
+ "analyze_query",
93
+ "connect",
94
+ "Connection",
95
+ "query",
96
+ "query_to_arrow",
97
+ "register_workspace",
98
+ "set_default_connector",
99
+ "__author__",
100
+ "__build__",
101
+ "__version__",
102
+ "__lib__",
103
+ "OPTERYX_DEBUG",
104
+ ]
105
+
106
+
107
+ def connect(*args, **kwargs) -> "Connection":
108
+ """
109
+ Establish a new database connection and return a Connection object.
110
+
111
+ Note: This function is designed to comply with the 'connect' method
112
+ described in PEP0249 for Python Database API Specification v2.0.
113
+ """
114
+ # Lazy import Connection
115
+ from opteryx.connection import Connection
116
+
117
+ # Create and return a Connection object
118
+ return Connection(*args, **kwargs)
119
+
120
+
121
+ def query(
122
+ operation: str,
123
+ params: Union[list, Dict, None] = None,
124
+ visibility_filters: Optional[Dict[str, Any]] = None,
125
+ **kwargs,
126
+ ):
127
+ """
128
+ Helper function to execute a query and return a cursor.
129
+
130
+ This function is designed to be similar to the DuckDB function of the same name.
131
+ It simplifies the process of executing queries by abstracting away the connection
132
+ and cursor creation steps.
133
+
134
+ Parameters:
135
+ operation: SQL query string
136
+ params: list of parameters to bind into the SQL query
137
+ kwargs: additional arguments for creating the Connection
138
+
139
+ Returns:
140
+ Executed cursor
141
+ """
142
+ # Lazy import Connection
143
+ from opteryx.connection import Connection
144
+
145
+ # Create a new database connection
146
+ conn = Connection(**kwargs)
147
+
148
+ # Create a new cursor object using the connection
149
+ curr = conn.cursor()
150
+ curr._owns_connection = True
151
+
152
+ # Execute the SQL query using the cursor
153
+ curr.execute(operation=operation, params=params, visibility_filters=visibility_filters)
154
+
155
+ # Return the executed cursor
156
+ return curr
157
+
158
+
159
+ def query_to_arrow(
160
+ operation: str,
161
+ params: Union[List, Dict, None] = None,
162
+ visibility_filters: Optional[Dict[str, Any]] = None,
163
+ limit: int = None,
164
+ **kwargs,
165
+ ) -> "pyarrow.Table":
166
+ """
167
+ Helper function to execute a query and return a pyarrow Table.
168
+
169
+ This is the fastest way to get a pyarrow table from Opteryx, it bypasses needing
170
+ orso to create a Dataframe and converting from the Dataframe. This is fast, but
171
+ not doing it is faster.
172
+
173
+ Parameters:
174
+ operation: SQL query string
175
+ params: list of parameters to bind into the SQL query (optional)
176
+ limit: stop after this many rows (optional)
177
+ kwargs: additional arguments for creating the Connection
178
+
179
+ Returns:
180
+ pyarrow Table
181
+ """
182
+ # Lazy import Connection
183
+ from opteryx.connection import Connection
184
+
185
+ # Create a new database connection
186
+ conn = Connection(**kwargs)
187
+
188
+ # Create a new cursor object using the connection
189
+ curr = conn.cursor()
190
+ curr._owns_connection = True
191
+
192
+ # Execute the SQL query using the cursor
193
+ return curr.execute_to_arrow(
194
+ operation=operation, params=params, visibility_filters=visibility_filters, limit=limit
195
+ )
196
+
197
+
198
+ def query_to_arrow_batches(
199
+ operation: str,
200
+ params: Union[List, Dict, None] = None,
201
+ batch_size: int = 1024,
202
+ limit: int = None,
203
+ visibility_filters: Optional[Dict[str, Any]] = None,
204
+ **kwargs,
205
+ ) -> "Iterable[pyarrow.RecordBatch]":
206
+ """
207
+ Helper function to execute a query and stream pyarrow RecordBatch objects.
208
+
209
+ Parameters:
210
+ operation: SQL query string
211
+ params: list of parameters to bind into the SQL query
212
+ batch_size: Number of rows per arrow record batch
213
+ limit: stop after this many rows (optional)
214
+ kwargs: additional arguments for creating the Connection
215
+ Returns:
216
+ Iterable over pyarrow.RecordBatch
217
+ """
218
+ # Lazy import Connection
219
+ from opteryx.connection import Connection
220
+
221
+ # Create a new database connection
222
+ conn = Connection(**kwargs)
223
+
224
+ # Create a new cursor object using the connection
225
+ curr = conn.cursor()
226
+ curr._owns_connection = True
227
+
228
+ # Execute the SQL query using the cursor
229
+ return curr.execute_to_arrow_batches(
230
+ operation=operation,
231
+ params=params,
232
+ batch_size=batch_size,
233
+ limit=limit,
234
+ visibility_filters=visibility_filters,
235
+ )
236
+
237
+
238
+ def analyze_query(sql: str) -> Dict[str, Any]:
239
+ """
240
+ Parse a SQL query and extract metadata without executing it.
241
+
242
+ This function analyzes the SQL query structure to extract information such as:
243
+ - Query type (SELECT, INSERT, UPDATE, DELETE, etc.)
244
+ - Tables being queried
245
+ - Other metadata available from the SQL syntax alone
246
+
247
+ This is useful for:
248
+ - Pre-flight permission checks
249
+ - Query validation before queueing
250
+ - Resource planning
251
+ - Query analysis
252
+
253
+ Parameters:
254
+ sql: SQL query string to parse
255
+
256
+ Returns:
257
+ Dictionary containing:
258
+ - query_type: Type of query (e.g., "Query", "Insert", "Update")
259
+ - tables: List of table names referenced in the query
260
+ - is_select: True if this is a SELECT query
261
+ - is_mutation: True if this modifies data (INSERT, UPDATE, DELETE)
262
+
263
+ Example:
264
+ >>> info = opteryx.parse_query_info("SELECT * FROM users WHERE id = 1")
265
+ >>> print(info["query_type"])
266
+ 'Query'
267
+ >>> print(info["tables"])
268
+ ['users']
269
+ """
270
+ from opteryx.utils.query_parser import parse_query_info as _parse_query_info
271
+
272
+ return _parse_query_info(sql)
273
+
274
+
275
+ # Try to increase the priority of the application
276
+ if not config.DISABLE_HIGH_PRIORITY and hasattr(os, "nice"): # pragma: no cover
277
+ nice_value = 0
278
+ try:
279
+ nice_value = os.nice(0)
280
+ if not is_mac():
281
+ os.nice(-20 + nice_value)
282
+ except PermissionError:
283
+ display_nice = f"{nice_value} (normal)" if nice_value == 0 else str(nice_value)
284
+ if OPTERYX_DEBUG:
285
+ print(
286
+ f"{datetime.datetime.now()} [LOADER] Cannot update process priority. Currently set to {display_nice}."
287
+ )
288
+
289
+ # Enable all warnings, including DeprecationWarning
290
+ warnings.simplefilter("once", DeprecationWarning)
291
+
292
+ # Lazy initialization of system_telemetry
293
+ _system_telemetry = None
294
+
295
+
296
+ def _get_system_telemetry():
297
+ """
298
+ Lazy getter for system telemetry.
299
+
300
+ System telemetry are only created when first accessed, which avoids
301
+ importing the QueryTelemetry model (and its dependencies) during the
302
+ initial import of the opteryx module.
303
+
304
+ Returns:
305
+ QueryTelemetry: The global system telemetry object
306
+ """
307
+ global _system_telemetry
308
+ if _system_telemetry is None:
309
+ from opteryx.models import QueryTelemetry
310
+
311
+ _system_telemetry = QueryTelemetry("system")
312
+ _system_telemetry.start_time = time.time_ns()
313
+ return _system_telemetry
314
+
315
+
316
+ # Provide access via module attribute
317
+ def __getattr__(name):
318
+ """
319
+ Lazy load module attributes to improve import performance.
320
+
321
+ This function intercepts attribute access on the opteryx module to
322
+ implement lazy loading of heavy components like Connection and
323
+ system_telemetry. This reduces initial import time from ~500ms to ~130ms.
324
+
325
+ Supported lazy attributes:
326
+ - Connection: The main connection class
327
+ - system_telemetry: Global query telemetry object
328
+
329
+ Args:
330
+ name: The attribute name being accessed
331
+
332
+ Returns:
333
+ The requested attribute
334
+
335
+ Raises:
336
+ AttributeError: If the attribute doesn't exist
337
+ """
338
+ if name == "Connection":
339
+ from opteryx.connection import Connection
340
+
341
+ return Connection
342
+ elif name == "system_telemetry":
343
+ return _get_system_telemetry()
344
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
opteryx/__main__.py ADDED
@@ -0,0 +1,212 @@
1
+ #!/usr/bin/env python
2
+ # pragma: no cover
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # See the License at http://www.apache.org/licenses/LICENSE-2.0
7
+ # Distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND.
8
+
9
+ """
10
+ A command line interface for Opteryx
11
+ """
12
+
13
+ import argparse
14
+ import os
15
+ import sys
16
+ import threading
17
+ import time
18
+
19
+ sys.path.insert(1, os.path.join(sys.path[0], ".."))
20
+
21
+ if True:
22
+ import opteryx
23
+ from opteryx.exceptions import MissingSqlStatement
24
+ from opteryx.utils.sql import clean_statement
25
+ from opteryx.utils.sql import remove_comments
26
+
27
+ # Define ANSI color codes
28
+ ANSI_RED = "\u001b[31m"
29
+ ANSI_RESET = "\u001b[0m"
30
+
31
+
32
+ def print_dots(stop_event):
33
+ """
34
+ Prints dots with pauses to indicate processing activity until the stop_event is set.
35
+ """
36
+ while not stop_event.is_set(): # pragma: no cover
37
+ print(".", end="", flush=True)
38
+ time.sleep(0.5)
39
+ if not stop_event.is_set():
40
+ print(".", end="", flush=True)
41
+ time.sleep(0.5)
42
+ if not stop_event.is_set():
43
+ print(".", end="", flush=True)
44
+ time.sleep(0.5)
45
+ if not stop_event.is_set():
46
+ print(".", end="", flush=True)
47
+ time.sleep(0.5)
48
+ if not stop_event.is_set():
49
+ print(".", end="", flush=True)
50
+ time.sleep(0.5)
51
+ if not stop_event.is_set():
52
+ print("\r \r", end="", flush=True)
53
+ time.sleep(0.5)
54
+
55
+
56
+ def main():
57
+ parser = argparse.ArgumentParser(description="A command line interface for Opteryx")
58
+
59
+ parser.add_argument(
60
+ "--o", type=str, default="console", help="Output location (ignored by REPL)", dest="output"
61
+ )
62
+
63
+ # Mutually exclusive group for `--color` and `--no-color`
64
+ color_group = parser.add_mutually_exclusive_group()
65
+ color_group.add_argument(
66
+ "--color", dest="color", action="store_true", default=True, help="Colorize the table."
67
+ )
68
+ color_group.add_argument(
69
+ "--no-color", dest="color", action="store_false", help="Disable colorized output."
70
+ )
71
+
72
+ parser.add_argument(
73
+ "--table_width",
74
+ action="store_true",
75
+ default=True,
76
+ help="Limit console display to the screen width.",
77
+ )
78
+ parser.add_argument("--max_col_width", type=int, default=64, help="Maximum column width")
79
+
80
+ # Mutually exclusive group for `--stats` and `--no-stats`
81
+ stats_group = parser.add_mutually_exclusive_group()
82
+ stats_group.add_argument(
83
+ "--stats", dest="stats", action="store_true", default=True, help="Report statistics."
84
+ )
85
+ stats_group.add_argument(
86
+ "--no-stats", dest="stats", action="store_false", help="Disable report statistics."
87
+ )
88
+
89
+ parser.add_argument("--cycles", type=int, default=1, help="Repeat Execution.")
90
+ parser.add_argument("sql", type=str, nargs="?", help="Execute SQL statement and quit.")
91
+
92
+ args = parser.parse_args()
93
+
94
+ # Run in REPL mode if no SQL is provided
95
+ if args.sql is None: # pragma: no cover
96
+ if args.output != "console":
97
+ raise ValueError("Cannot specify output location and not provide a SQL statement.")
98
+ print(f"Opteryx version {opteryx.__version__}")
99
+ print(" Enter '.help' for usage hints")
100
+ print(" Enter '.exit' to exit this program")
101
+
102
+ while True: # REPL loop
103
+ print()
104
+ statement = input("opteryx> ")
105
+ if statement in {".exit", ".quit"}:
106
+ break
107
+ if statement == ".help":
108
+ print(" .exit Exit this program")
109
+ print(" .help Show help text")
110
+ continue
111
+
112
+ stop_event = threading.Event()
113
+ dot_thread = threading.Thread(target=print_dots, args=(stop_event,))
114
+ dot_thread.start()
115
+ try:
116
+ start = time.monotonic_ns()
117
+ result = opteryx.query(statement, memberships=["opteryx"])
118
+ result.materialize()
119
+ stop_event.set()
120
+ duration = time.monotonic_ns() - start
121
+ print("\r \r", end="", flush=True)
122
+ print(
123
+ result.display(
124
+ limit=-1,
125
+ display_width=args.table_width,
126
+ colorize=args.color,
127
+ max_column_width=args.max_col_width,
128
+ )
129
+ )
130
+ if args.stats:
131
+ print(
132
+ f"[ {result.rowcount} rows x {result.columncount} columns ] ( {duration / 1e9} seconds )"
133
+ )
134
+ except MissingSqlStatement:
135
+ print(
136
+ f"{ANSI_RED}Error{ANSI_RESET}: Expected SQL statement or dot command missing."
137
+ )
138
+ except Exception as e:
139
+ print(f"{ANSI_RED}Error{ANSI_RESET}: {e}")
140
+ finally:
141
+ stop_event.set()
142
+ dot_thread.join()
143
+ quit()
144
+
145
+ # Process the SQL query
146
+ sql = clean_statement(remove_comments(args.sql))
147
+
148
+ if args.cycles > 1: # Benchmarking mode
149
+ print("[", end="")
150
+ for i in range(args.cycles):
151
+ start = time.monotonic_ns()
152
+ result = opteryx.query_to_arrow(sql)
153
+ print(
154
+ f"{(time.monotonic_ns() - start) / 1e9:.3f}",
155
+ flush=True,
156
+ end=("," if (i + 1) < args.cycles else "]\n"),
157
+ )
158
+ sys.stdout.flush()
159
+ return
160
+
161
+ start = time.monotonic_ns()
162
+ result = opteryx.query(sql)
163
+ result.materialize()
164
+ duration = time.monotonic_ns() - start
165
+
166
+ if args.output == "console":
167
+ print(
168
+ result.display(
169
+ limit=-1,
170
+ display_width=args.table_width,
171
+ colorize=args.color,
172
+ max_column_width=args.max_col_width,
173
+ )
174
+ )
175
+ if args.stats:
176
+ print(
177
+ f"[ {result.rowcount} rows x {result.columncount} columns ] ( {duration / 1e9} seconds )"
178
+ )
179
+ else:
180
+ table = result.arrow()
181
+ ext = args.output.lower().split(".")[-1]
182
+
183
+ if ext == "parquet":
184
+ from pyarrow import parquet
185
+
186
+ parquet.write_table(table, args.output)
187
+ elif ext == "csv":
188
+ from pyarrow import csv
189
+
190
+ csv.write_csv(table, args.output)
191
+ elif ext == "jsonl":
192
+ with open(args.output, mode="wb") as file:
193
+ for row in result:
194
+ file.write(row.as_json + b"\n")
195
+ elif ext == "md":
196
+ with open(args.output, mode="w") as file:
197
+ file.write(result.markdown(limit=-1))
198
+ else:
199
+ raise ValueError(f"Unknown output format '{ext}'")
200
+ print(
201
+ f"[ {result.rowcount} rows x {result.columncount} columns ] ( {duration / 1e9} seconds )"
202
+ )
203
+ print(f"Written result to '{args.output}'")
204
+
205
+
206
+ if __name__ == "__main__":
207
+ import readline # pragma: no cover
208
+
209
+ try:
210
+ main()
211
+ except Exception as e:
212
+ print(f"{ANSI_RED}Error{ANSI_RESET}: {e}")
opteryx/__version__.py ADDED
@@ -0,0 +1,13 @@
1
+ # THIS FILE IS AUTOMATICALLY UPDATED DURING THE BUILD PROCESS
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+
4
+ __build__ = 113
5
+ __author__ = "@joocer"
6
+ __version__ = "0.5.0"
7
+ __lib__ = "opteryx-core"
8
+ __build_date__ = "2025-12-30T15:40:08.207992+00:00Z"
9
+
10
+ # Store the version here so:
11
+ # 1) we don't load dependencies by storing it in __init__.py
12
+ # 2) we can import it in setup.py for the same reason
13
+ # 3) we can import it in the CLI for the same reason
opteryx/command.py ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env python
2
+ # pragma: no cover
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # See the License at http://www.apache.org/licenses/LICENSE-2.0
7
+ # Distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND.
8
+
9
+ from opteryx.__main__ import main
10
+
11
+ __all__ = ("main",)
@@ -0,0 +1 @@
1
+
File without changes