relationalai 0.13.5__py3-none-any.whl → 1.0.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (856) hide show
  1. relationalai/__init__.py +1 -256
  2. relationalai/config/__init__.py +56 -0
  3. relationalai/config/config.py +289 -0
  4. relationalai/config/config_fields.py +86 -0
  5. relationalai/config/connections/__init__.py +46 -0
  6. relationalai/config/connections/base.py +23 -0
  7. relationalai/config/connections/duckdb.py +29 -0
  8. relationalai/config/connections/snowflake.py +243 -0
  9. relationalai/config/external/__init__.py +17 -0
  10. relationalai/config/external/dbt_converter.py +101 -0
  11. relationalai/config/external/dbt_models.py +93 -0
  12. relationalai/config/external/snowflake_converter.py +41 -0
  13. relationalai/config/external/snowflake_models.py +85 -0
  14. relationalai/config/external/utils.py +19 -0
  15. relationalai/config/shims.py +1 -0
  16. relationalai/semantics/__init__.py +146 -22
  17. relationalai/semantics/backends/lqp/annotations.py +11 -0
  18. relationalai/semantics/backends/sql/sql_compiler.py +327 -0
  19. relationalai/semantics/frontend/base.py +1719 -0
  20. relationalai/semantics/frontend/core.py +179 -0
  21. relationalai/semantics/frontend/front_compiler.py +1316 -0
  22. relationalai/semantics/frontend/pprint.py +408 -0
  23. relationalai/semantics/metamodel/__init__.py +6 -40
  24. relationalai/semantics/metamodel/builtins.py +206 -772
  25. relationalai/semantics/metamodel/metamodel.py +465 -0
  26. relationalai/semantics/metamodel/metamodel_analyzer.py +519 -0
  27. relationalai/semantics/metamodel/pprint.py +414 -0
  28. relationalai/semantics/metamodel/rewriter.py +266 -0
  29. relationalai/semantics/metamodel/typer.py +1213 -0
  30. relationalai/semantics/std/__init__.py +60 -40
  31. relationalai/semantics/std/aggregates.py +148 -0
  32. relationalai/semantics/std/common.py +44 -0
  33. relationalai/semantics/std/constraints.py +37 -43
  34. relationalai/semantics/std/datetime.py +249 -135
  35. relationalai/semantics/std/decimals.py +45 -52
  36. relationalai/semantics/std/floats.py +13 -5
  37. relationalai/semantics/std/integers.py +26 -11
  38. relationalai/semantics/std/math.py +183 -112
  39. relationalai/semantics/std/numbers.py +86 -0
  40. relationalai/semantics/std/re.py +80 -62
  41. relationalai/semantics/std/strings.py +101 -46
  42. relationalai/shims/executor.py +179 -0
  43. relationalai/shims/helpers.py +126 -0
  44. relationalai/shims/hoister.py +221 -0
  45. relationalai/shims/mm2v0.py +1394 -0
  46. relationalai/tools/cli/__init__.py +6 -0
  47. relationalai/tools/cli/cli.py +90 -0
  48. relationalai/tools/cli/components/__init__.py +5 -0
  49. relationalai/tools/cli/components/progress_reader.py +1524 -0
  50. relationalai/tools/cli/components/utils.py +58 -0
  51. relationalai/tools/cli/config_template.py +45 -0
  52. relationalai/tools/cli/dev.py +19 -0
  53. relationalai/tools/debugger.py +289 -183
  54. relationalai/tools/typer_debugger.py +93 -0
  55. relationalai/util/dataclasses.py +43 -0
  56. relationalai/util/docutils.py +40 -0
  57. relationalai/util/error.py +199 -0
  58. relationalai/util/format.py +48 -109
  59. relationalai/util/naming.py +145 -0
  60. relationalai/util/python.py +35 -0
  61. relationalai/util/runtime.py +156 -0
  62. relationalai/util/schema.py +197 -0
  63. relationalai/util/source.py +185 -0
  64. relationalai/util/structures.py +163 -0
  65. relationalai/util/tracing.py +261 -0
  66. relationalai-1.0.0a2.dist-info/METADATA +44 -0
  67. relationalai-1.0.0a2.dist-info/RECORD +489 -0
  68. relationalai-1.0.0a2.dist-info/WHEEL +5 -0
  69. relationalai-1.0.0a2.dist-info/entry_points.txt +3 -0
  70. relationalai-1.0.0a2.dist-info/top_level.txt +2 -0
  71. v0/relationalai/__init__.py +216 -0
  72. v0/relationalai/clients/__init__.py +5 -0
  73. v0/relationalai/clients/azure.py +477 -0
  74. v0/relationalai/clients/client.py +912 -0
  75. v0/relationalai/clients/config.py +673 -0
  76. v0/relationalai/clients/direct_access_client.py +118 -0
  77. v0/relationalai/clients/hash_util.py +31 -0
  78. v0/relationalai/clients/local.py +571 -0
  79. v0/relationalai/clients/profile_polling.py +73 -0
  80. v0/relationalai/clients/result_helpers.py +420 -0
  81. v0/relationalai/clients/snowflake.py +3869 -0
  82. v0/relationalai/clients/types.py +113 -0
  83. v0/relationalai/clients/use_index_poller.py +980 -0
  84. v0/relationalai/clients/util.py +356 -0
  85. v0/relationalai/debugging.py +389 -0
  86. v0/relationalai/dsl.py +1749 -0
  87. v0/relationalai/early_access/builder/__init__.py +30 -0
  88. v0/relationalai/early_access/builder/builder/__init__.py +35 -0
  89. v0/relationalai/early_access/builder/snowflake/__init__.py +12 -0
  90. v0/relationalai/early_access/builder/std/__init__.py +25 -0
  91. v0/relationalai/early_access/builder/std/decimals/__init__.py +12 -0
  92. v0/relationalai/early_access/builder/std/integers/__init__.py +12 -0
  93. v0/relationalai/early_access/builder/std/math/__init__.py +12 -0
  94. v0/relationalai/early_access/builder/std/strings/__init__.py +14 -0
  95. v0/relationalai/early_access/devtools/__init__.py +12 -0
  96. v0/relationalai/early_access/devtools/benchmark_lqp/__init__.py +12 -0
  97. v0/relationalai/early_access/devtools/extract_lqp/__init__.py +12 -0
  98. v0/relationalai/early_access/dsl/adapters/orm/adapter_qb.py +427 -0
  99. v0/relationalai/early_access/dsl/adapters/orm/parser.py +636 -0
  100. v0/relationalai/early_access/dsl/adapters/owl/adapter.py +176 -0
  101. v0/relationalai/early_access/dsl/adapters/owl/parser.py +160 -0
  102. v0/relationalai/early_access/dsl/bindings/common.py +402 -0
  103. v0/relationalai/early_access/dsl/bindings/csv.py +170 -0
  104. v0/relationalai/early_access/dsl/bindings/legacy/binding_models.py +143 -0
  105. v0/relationalai/early_access/dsl/bindings/snowflake.py +64 -0
  106. v0/relationalai/early_access/dsl/codegen/binder.py +411 -0
  107. v0/relationalai/early_access/dsl/codegen/common.py +79 -0
  108. v0/relationalai/early_access/dsl/codegen/helpers.py +23 -0
  109. v0/relationalai/early_access/dsl/codegen/relations.py +700 -0
  110. v0/relationalai/early_access/dsl/codegen/weaver.py +417 -0
  111. v0/relationalai/early_access/dsl/core/builders/__init__.py +47 -0
  112. v0/relationalai/early_access/dsl/core/builders/logic.py +19 -0
  113. v0/relationalai/early_access/dsl/core/builders/scalar_constraint.py +11 -0
  114. v0/relationalai/early_access/dsl/core/constraints/predicate/atomic.py +455 -0
  115. v0/relationalai/early_access/dsl/core/constraints/predicate/universal.py +73 -0
  116. v0/relationalai/early_access/dsl/core/constraints/scalar.py +310 -0
  117. v0/relationalai/early_access/dsl/core/context.py +13 -0
  118. v0/relationalai/early_access/dsl/core/cset.py +132 -0
  119. v0/relationalai/early_access/dsl/core/exprs/__init__.py +116 -0
  120. v0/relationalai/early_access/dsl/core/exprs/relational.py +18 -0
  121. v0/relationalai/early_access/dsl/core/exprs/scalar.py +412 -0
  122. v0/relationalai/early_access/dsl/core/instances.py +44 -0
  123. v0/relationalai/early_access/dsl/core/logic/__init__.py +193 -0
  124. v0/relationalai/early_access/dsl/core/logic/aggregation.py +98 -0
  125. v0/relationalai/early_access/dsl/core/logic/exists.py +223 -0
  126. v0/relationalai/early_access/dsl/core/logic/helper.py +163 -0
  127. v0/relationalai/early_access/dsl/core/namespaces.py +32 -0
  128. v0/relationalai/early_access/dsl/core/relations.py +276 -0
  129. v0/relationalai/early_access/dsl/core/rules.py +112 -0
  130. v0/relationalai/early_access/dsl/core/std/__init__.py +45 -0
  131. v0/relationalai/early_access/dsl/core/temporal/recall.py +6 -0
  132. v0/relationalai/early_access/dsl/core/types/__init__.py +270 -0
  133. v0/relationalai/early_access/dsl/core/types/concepts.py +128 -0
  134. v0/relationalai/early_access/dsl/core/types/constrained/__init__.py +267 -0
  135. v0/relationalai/early_access/dsl/core/types/constrained/nominal.py +143 -0
  136. v0/relationalai/early_access/dsl/core/types/constrained/subtype.py +124 -0
  137. v0/relationalai/early_access/dsl/core/types/standard.py +92 -0
  138. v0/relationalai/early_access/dsl/core/types/unconstrained.py +50 -0
  139. v0/relationalai/early_access/dsl/core/types/variables.py +203 -0
  140. v0/relationalai/early_access/dsl/ir/compiler.py +318 -0
  141. v0/relationalai/early_access/dsl/ir/executor.py +260 -0
  142. v0/relationalai/early_access/dsl/ontologies/constraints.py +88 -0
  143. v0/relationalai/early_access/dsl/ontologies/export.py +30 -0
  144. v0/relationalai/early_access/dsl/ontologies/models.py +453 -0
  145. v0/relationalai/early_access/dsl/ontologies/python_printer.py +303 -0
  146. v0/relationalai/early_access/dsl/ontologies/readings.py +60 -0
  147. v0/relationalai/early_access/dsl/ontologies/relationships.py +322 -0
  148. v0/relationalai/early_access/dsl/ontologies/roles.py +87 -0
  149. v0/relationalai/early_access/dsl/ontologies/subtyping.py +55 -0
  150. v0/relationalai/early_access/dsl/orm/constraints.py +438 -0
  151. v0/relationalai/early_access/dsl/orm/measures/dimensions.py +200 -0
  152. v0/relationalai/early_access/dsl/orm/measures/initializer.py +16 -0
  153. v0/relationalai/early_access/dsl/orm/measures/measure_rules.py +275 -0
  154. v0/relationalai/early_access/dsl/orm/measures/measures.py +299 -0
  155. v0/relationalai/early_access/dsl/orm/measures/role_exprs.py +268 -0
  156. v0/relationalai/early_access/dsl/orm/models.py +256 -0
  157. v0/relationalai/early_access/dsl/orm/object_oriented_printer.py +344 -0
  158. v0/relationalai/early_access/dsl/orm/printer.py +469 -0
  159. v0/relationalai/early_access/dsl/orm/reasoners.py +480 -0
  160. v0/relationalai/early_access/dsl/orm/relations.py +19 -0
  161. v0/relationalai/early_access/dsl/orm/relationships.py +251 -0
  162. v0/relationalai/early_access/dsl/orm/types.py +42 -0
  163. v0/relationalai/early_access/dsl/orm/utils.py +79 -0
  164. v0/relationalai/early_access/dsl/orm/verb.py +204 -0
  165. v0/relationalai/early_access/dsl/physical_metadata/tables.py +133 -0
  166. v0/relationalai/early_access/dsl/relations.py +170 -0
  167. v0/relationalai/early_access/dsl/rulesets.py +69 -0
  168. v0/relationalai/early_access/dsl/schemas/__init__.py +450 -0
  169. v0/relationalai/early_access/dsl/schemas/builder.py +48 -0
  170. v0/relationalai/early_access/dsl/schemas/comp_names.py +51 -0
  171. v0/relationalai/early_access/dsl/schemas/components.py +203 -0
  172. v0/relationalai/early_access/dsl/schemas/contexts.py +156 -0
  173. v0/relationalai/early_access/dsl/schemas/exprs.py +89 -0
  174. v0/relationalai/early_access/dsl/schemas/fragments.py +464 -0
  175. v0/relationalai/early_access/dsl/serialization.py +79 -0
  176. v0/relationalai/early_access/dsl/serialize/exporter.py +163 -0
  177. v0/relationalai/early_access/dsl/snow/api.py +104 -0
  178. v0/relationalai/early_access/dsl/snow/common.py +76 -0
  179. v0/relationalai/early_access/dsl/state_mgmt/__init__.py +129 -0
  180. v0/relationalai/early_access/dsl/state_mgmt/state_charts.py +125 -0
  181. v0/relationalai/early_access/dsl/state_mgmt/transitions.py +130 -0
  182. v0/relationalai/early_access/dsl/types/__init__.py +40 -0
  183. v0/relationalai/early_access/dsl/types/concepts.py +12 -0
  184. v0/relationalai/early_access/dsl/types/entities.py +135 -0
  185. v0/relationalai/early_access/dsl/types/values.py +17 -0
  186. v0/relationalai/early_access/dsl/utils.py +102 -0
  187. v0/relationalai/early_access/graphs/__init__.py +13 -0
  188. v0/relationalai/early_access/lqp/__init__.py +12 -0
  189. v0/relationalai/early_access/lqp/compiler/__init__.py +12 -0
  190. v0/relationalai/early_access/lqp/constructors/__init__.py +18 -0
  191. v0/relationalai/early_access/lqp/executor/__init__.py +12 -0
  192. v0/relationalai/early_access/lqp/ir/__init__.py +12 -0
  193. v0/relationalai/early_access/lqp/passes/__init__.py +12 -0
  194. v0/relationalai/early_access/lqp/pragmas/__init__.py +12 -0
  195. v0/relationalai/early_access/lqp/primitives/__init__.py +12 -0
  196. v0/relationalai/early_access/lqp/types/__init__.py +12 -0
  197. v0/relationalai/early_access/lqp/utils/__init__.py +12 -0
  198. v0/relationalai/early_access/lqp/validators/__init__.py +12 -0
  199. v0/relationalai/early_access/metamodel/__init__.py +58 -0
  200. v0/relationalai/early_access/metamodel/builtins/__init__.py +12 -0
  201. v0/relationalai/early_access/metamodel/compiler/__init__.py +12 -0
  202. v0/relationalai/early_access/metamodel/dependency/__init__.py +12 -0
  203. v0/relationalai/early_access/metamodel/factory/__init__.py +17 -0
  204. v0/relationalai/early_access/metamodel/helpers/__init__.py +12 -0
  205. v0/relationalai/early_access/metamodel/ir/__init__.py +14 -0
  206. v0/relationalai/early_access/metamodel/rewrite/__init__.py +7 -0
  207. v0/relationalai/early_access/metamodel/typer/__init__.py +3 -0
  208. v0/relationalai/early_access/metamodel/typer/typer/__init__.py +12 -0
  209. v0/relationalai/early_access/metamodel/types/__init__.py +15 -0
  210. v0/relationalai/early_access/metamodel/util/__init__.py +15 -0
  211. v0/relationalai/early_access/metamodel/visitor/__init__.py +12 -0
  212. v0/relationalai/early_access/rel/__init__.py +12 -0
  213. v0/relationalai/early_access/rel/executor/__init__.py +12 -0
  214. v0/relationalai/early_access/rel/rel_utils/__init__.py +12 -0
  215. v0/relationalai/early_access/rel/rewrite/__init__.py +7 -0
  216. v0/relationalai/early_access/solvers/__init__.py +19 -0
  217. v0/relationalai/early_access/sql/__init__.py +11 -0
  218. v0/relationalai/early_access/sql/executor/__init__.py +3 -0
  219. v0/relationalai/early_access/sql/rewrite/__init__.py +3 -0
  220. v0/relationalai/early_access/tests/logging/__init__.py +12 -0
  221. v0/relationalai/early_access/tests/test_snapshot_base/__init__.py +12 -0
  222. v0/relationalai/early_access/tests/utils/__init__.py +12 -0
  223. v0/relationalai/environments/__init__.py +35 -0
  224. v0/relationalai/environments/base.py +381 -0
  225. v0/relationalai/environments/colab.py +14 -0
  226. v0/relationalai/environments/generic.py +71 -0
  227. v0/relationalai/environments/ipython.py +68 -0
  228. v0/relationalai/environments/jupyter.py +9 -0
  229. v0/relationalai/environments/snowbook.py +169 -0
  230. v0/relationalai/errors.py +2478 -0
  231. v0/relationalai/experimental/SF.py +38 -0
  232. v0/relationalai/experimental/inspect.py +47 -0
  233. v0/relationalai/experimental/pathfinder/__init__.py +158 -0
  234. v0/relationalai/experimental/pathfinder/api.py +160 -0
  235. v0/relationalai/experimental/pathfinder/automaton.py +584 -0
  236. v0/relationalai/experimental/pathfinder/bridge.py +226 -0
  237. v0/relationalai/experimental/pathfinder/compiler.py +416 -0
  238. v0/relationalai/experimental/pathfinder/datalog.py +214 -0
  239. v0/relationalai/experimental/pathfinder/diagnostics.py +56 -0
  240. v0/relationalai/experimental/pathfinder/filter.py +236 -0
  241. v0/relationalai/experimental/pathfinder/glushkov.py +439 -0
  242. v0/relationalai/experimental/pathfinder/options.py +265 -0
  243. v0/relationalai/experimental/pathfinder/rpq.py +344 -0
  244. v0/relationalai/experimental/pathfinder/transition.py +200 -0
  245. v0/relationalai/experimental/pathfinder/utils.py +26 -0
  246. v0/relationalai/experimental/paths/api.py +143 -0
  247. v0/relationalai/experimental/paths/benchmarks/grid_graph.py +37 -0
  248. v0/relationalai/experimental/paths/examples/basic_example.py +40 -0
  249. v0/relationalai/experimental/paths/examples/minimal_engine_warmup.py +3 -0
  250. v0/relationalai/experimental/paths/examples/movie_example.py +77 -0
  251. v0/relationalai/experimental/paths/examples/paths_benchmark.py +115 -0
  252. v0/relationalai/experimental/paths/examples/paths_example.py +116 -0
  253. v0/relationalai/experimental/paths/examples/pattern_to_automaton.py +28 -0
  254. v0/relationalai/experimental/paths/find_paths_via_automaton.py +85 -0
  255. v0/relationalai/experimental/paths/graph.py +185 -0
  256. v0/relationalai/experimental/paths/path_algorithms/find_paths.py +280 -0
  257. v0/relationalai/experimental/paths/path_algorithms/one_sided_ball_repetition.py +26 -0
  258. v0/relationalai/experimental/paths/path_algorithms/one_sided_ball_upto.py +111 -0
  259. v0/relationalai/experimental/paths/path_algorithms/single.py +59 -0
  260. v0/relationalai/experimental/paths/path_algorithms/two_sided_balls_repetition.py +39 -0
  261. v0/relationalai/experimental/paths/path_algorithms/two_sided_balls_upto.py +103 -0
  262. v0/relationalai/experimental/paths/path_algorithms/usp-old.py +130 -0
  263. v0/relationalai/experimental/paths/path_algorithms/usp-tuple.py +183 -0
  264. v0/relationalai/experimental/paths/path_algorithms/usp.py +150 -0
  265. v0/relationalai/experimental/paths/product_graph.py +93 -0
  266. v0/relationalai/experimental/paths/rpq/automaton.py +584 -0
  267. v0/relationalai/experimental/paths/rpq/diagnostics.py +56 -0
  268. v0/relationalai/experimental/paths/rpq/rpq.py +378 -0
  269. v0/relationalai/experimental/paths/tests/tests_limit_sp_max_length.py +90 -0
  270. v0/relationalai/experimental/paths/tests/tests_limit_sp_multiple.py +119 -0
  271. v0/relationalai/experimental/paths/tests/tests_limit_sp_single.py +104 -0
  272. v0/relationalai/experimental/paths/tests/tests_limit_walks_multiple.py +113 -0
  273. v0/relationalai/experimental/paths/tests/tests_limit_walks_single.py +149 -0
  274. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_multiple.py +70 -0
  275. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_single.py +64 -0
  276. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_upto_multiple.py +115 -0
  277. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_upto_single.py +75 -0
  278. v0/relationalai/experimental/paths/tests/tests_single_paths.py +152 -0
  279. v0/relationalai/experimental/paths/tests/tests_single_walks.py +208 -0
  280. v0/relationalai/experimental/paths/tests/tests_single_walks_undirected.py +297 -0
  281. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_multiple.py +107 -0
  282. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_single.py +76 -0
  283. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_upto_multiple.py +76 -0
  284. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_upto_single.py +110 -0
  285. v0/relationalai/experimental/paths/tests/tests_usp_nsp_multiple.py +229 -0
  286. v0/relationalai/experimental/paths/tests/tests_usp_nsp_single.py +108 -0
  287. v0/relationalai/experimental/paths/tree_agg.py +168 -0
  288. v0/relationalai/experimental/paths/utilities/iterators.py +27 -0
  289. v0/relationalai/experimental/paths/utilities/prefix_sum.py +91 -0
  290. v0/relationalai/experimental/solvers.py +1087 -0
  291. v0/relationalai/loaders/csv.py +195 -0
  292. v0/relationalai/loaders/loader.py +177 -0
  293. v0/relationalai/loaders/types.py +23 -0
  294. v0/relationalai/rel_emitter.py +373 -0
  295. v0/relationalai/rel_utils.py +185 -0
  296. v0/relationalai/semantics/__init__.py +29 -0
  297. v0/relationalai/semantics/devtools/benchmark_lqp.py +536 -0
  298. v0/relationalai/semantics/devtools/compilation_manager.py +294 -0
  299. v0/relationalai/semantics/devtools/extract_lqp.py +110 -0
  300. v0/relationalai/semantics/internal/internal.py +3785 -0
  301. v0/relationalai/semantics/internal/snowflake.py +325 -0
  302. v0/relationalai/semantics/lqp/builtins.py +16 -0
  303. v0/relationalai/semantics/lqp/compiler.py +22 -0
  304. v0/relationalai/semantics/lqp/constructors.py +68 -0
  305. v0/relationalai/semantics/lqp/executor.py +474 -0
  306. v0/relationalai/semantics/lqp/intrinsics.py +24 -0
  307. v0/relationalai/semantics/lqp/ir.py +124 -0
  308. v0/relationalai/semantics/lqp/model2lqp.py +877 -0
  309. v0/relationalai/semantics/lqp/passes.py +680 -0
  310. v0/relationalai/semantics/lqp/primitives.py +252 -0
  311. v0/relationalai/semantics/lqp/result_helpers.py +202 -0
  312. v0/relationalai/semantics/lqp/rewrite/__init__.py +18 -0
  313. v0/relationalai/semantics/lqp/rewrite/annotate_constraints.py +57 -0
  314. v0/relationalai/semantics/lqp/rewrite/cdc.py +216 -0
  315. v0/relationalai/semantics/lqp/rewrite/extract_common.py +338 -0
  316. v0/relationalai/semantics/lqp/rewrite/extract_keys.py +490 -0
  317. v0/relationalai/semantics/lqp/rewrite/function_annotations.py +114 -0
  318. v0/relationalai/semantics/lqp/rewrite/functional_dependencies.py +314 -0
  319. v0/relationalai/semantics/lqp/rewrite/quantify_vars.py +296 -0
  320. v0/relationalai/semantics/lqp/rewrite/splinter.py +76 -0
  321. v0/relationalai/semantics/lqp/types.py +101 -0
  322. v0/relationalai/semantics/lqp/utils.py +160 -0
  323. v0/relationalai/semantics/lqp/validators.py +57 -0
  324. v0/relationalai/semantics/metamodel/__init__.py +40 -0
  325. v0/relationalai/semantics/metamodel/builtins.py +776 -0
  326. v0/relationalai/semantics/metamodel/compiler.py +133 -0
  327. v0/relationalai/semantics/metamodel/dependency.py +862 -0
  328. v0/relationalai/semantics/metamodel/executor.py +61 -0
  329. v0/relationalai/semantics/metamodel/factory.py +287 -0
  330. v0/relationalai/semantics/metamodel/helpers.py +361 -0
  331. v0/relationalai/semantics/metamodel/ir.py +923 -0
  332. v0/relationalai/semantics/metamodel/rewrite/__init__.py +7 -0
  333. v0/relationalai/semantics/metamodel/rewrite/discharge_constraints.py +39 -0
  334. v0/relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +210 -0
  335. v0/relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +78 -0
  336. v0/relationalai/semantics/metamodel/rewrite/flatten.py +554 -0
  337. v0/relationalai/semantics/metamodel/rewrite/format_outputs.py +165 -0
  338. v0/relationalai/semantics/metamodel/typer/checker.py +353 -0
  339. v0/relationalai/semantics/metamodel/typer/typer.py +1395 -0
  340. v0/relationalai/semantics/metamodel/util.py +505 -0
  341. v0/relationalai/semantics/metamodel/visitor.py +944 -0
  342. v0/relationalai/semantics/reasoners/__init__.py +10 -0
  343. v0/relationalai/semantics/reasoners/graph/__init__.py +37 -0
  344. v0/relationalai/semantics/reasoners/graph/core.py +9019 -0
  345. v0/relationalai/semantics/reasoners/optimization/__init__.py +68 -0
  346. v0/relationalai/semantics/reasoners/optimization/common.py +88 -0
  347. v0/relationalai/semantics/reasoners/optimization/solvers_dev.py +568 -0
  348. v0/relationalai/semantics/reasoners/optimization/solvers_pb.py +1163 -0
  349. v0/relationalai/semantics/rel/builtins.py +40 -0
  350. v0/relationalai/semantics/rel/compiler.py +989 -0
  351. v0/relationalai/semantics/rel/executor.py +359 -0
  352. v0/relationalai/semantics/rel/rel.py +482 -0
  353. v0/relationalai/semantics/rel/rel_utils.py +276 -0
  354. v0/relationalai/semantics/snowflake/__init__.py +3 -0
  355. v0/relationalai/semantics/sql/compiler.py +2503 -0
  356. v0/relationalai/semantics/sql/executor/duck_db.py +52 -0
  357. v0/relationalai/semantics/sql/executor/result_helpers.py +64 -0
  358. v0/relationalai/semantics/sql/executor/snowflake.py +145 -0
  359. v0/relationalai/semantics/sql/rewrite/denormalize.py +222 -0
  360. v0/relationalai/semantics/sql/rewrite/double_negation.py +49 -0
  361. v0/relationalai/semantics/sql/rewrite/recursive_union.py +127 -0
  362. v0/relationalai/semantics/sql/rewrite/sort_output_query.py +246 -0
  363. v0/relationalai/semantics/sql/sql.py +504 -0
  364. v0/relationalai/semantics/std/__init__.py +54 -0
  365. v0/relationalai/semantics/std/constraints.py +43 -0
  366. v0/relationalai/semantics/std/datetime.py +363 -0
  367. v0/relationalai/semantics/std/decimals.py +62 -0
  368. v0/relationalai/semantics/std/floats.py +7 -0
  369. v0/relationalai/semantics/std/integers.py +22 -0
  370. v0/relationalai/semantics/std/math.py +141 -0
  371. v0/relationalai/semantics/std/pragmas.py +11 -0
  372. v0/relationalai/semantics/std/re.py +83 -0
  373. v0/relationalai/semantics/std/std.py +14 -0
  374. v0/relationalai/semantics/std/strings.py +63 -0
  375. v0/relationalai/semantics/tests/__init__.py +0 -0
  376. v0/relationalai/semantics/tests/test_snapshot_abstract.py +143 -0
  377. v0/relationalai/semantics/tests/test_snapshot_base.py +9 -0
  378. v0/relationalai/semantics/tests/utils.py +46 -0
  379. v0/relationalai/std/__init__.py +70 -0
  380. v0/relationalai/tools/__init__.py +0 -0
  381. v0/relationalai/tools/cli.py +1940 -0
  382. v0/relationalai/tools/cli_controls.py +1826 -0
  383. v0/relationalai/tools/cli_helpers.py +390 -0
  384. v0/relationalai/tools/debugger.py +183 -0
  385. v0/relationalai/tools/debugger_client.py +109 -0
  386. v0/relationalai/tools/debugger_server.py +302 -0
  387. v0/relationalai/tools/dev.py +685 -0
  388. v0/relationalai/tools/qb_debugger.py +425 -0
  389. v0/relationalai/util/clean_up_databases.py +95 -0
  390. v0/relationalai/util/format.py +123 -0
  391. v0/relationalai/util/list_databases.py +9 -0
  392. v0/relationalai/util/otel_configuration.py +25 -0
  393. v0/relationalai/util/otel_handler.py +484 -0
  394. v0/relationalai/util/snowflake_handler.py +88 -0
  395. v0/relationalai/util/span_format_test.py +43 -0
  396. v0/relationalai/util/span_tracker.py +207 -0
  397. v0/relationalai/util/spans_file_handler.py +72 -0
  398. v0/relationalai/util/tracing_handler.py +34 -0
  399. frontend/debugger/dist/.gitignore +0 -2
  400. frontend/debugger/dist/assets/favicon-Dy0ZgA6N.png +0 -0
  401. frontend/debugger/dist/assets/index-Cssla-O7.js +0 -208
  402. frontend/debugger/dist/assets/index-DlHsYx1V.css +0 -9
  403. frontend/debugger/dist/index.html +0 -17
  404. relationalai/clients/__init__.py +0 -18
  405. relationalai/clients/client.py +0 -946
  406. relationalai/clients/config.py +0 -673
  407. relationalai/clients/direct_access_client.py +0 -118
  408. relationalai/clients/exec_txn_poller.py +0 -153
  409. relationalai/clients/hash_util.py +0 -31
  410. relationalai/clients/local.py +0 -594
  411. relationalai/clients/profile_polling.py +0 -73
  412. relationalai/clients/resources/__init__.py +0 -8
  413. relationalai/clients/resources/azure/azure.py +0 -502
  414. relationalai/clients/resources/snowflake/__init__.py +0 -20
  415. relationalai/clients/resources/snowflake/cli_resources.py +0 -98
  416. relationalai/clients/resources/snowflake/direct_access_resources.py +0 -739
  417. relationalai/clients/resources/snowflake/engine_service.py +0 -381
  418. relationalai/clients/resources/snowflake/engine_state_handlers.py +0 -315
  419. relationalai/clients/resources/snowflake/error_handlers.py +0 -240
  420. relationalai/clients/resources/snowflake/export_procedure.py.jinja +0 -249
  421. relationalai/clients/resources/snowflake/resources_factory.py +0 -99
  422. relationalai/clients/resources/snowflake/snowflake.py +0 -3193
  423. relationalai/clients/resources/snowflake/use_index_poller.py +0 -1019
  424. relationalai/clients/resources/snowflake/use_index_resources.py +0 -188
  425. relationalai/clients/resources/snowflake/util.py +0 -387
  426. relationalai/clients/result_helpers.py +0 -420
  427. relationalai/clients/types.py +0 -118
  428. relationalai/clients/util.py +0 -356
  429. relationalai/debugging.py +0 -389
  430. relationalai/dsl.py +0 -1749
  431. relationalai/early_access/builder/__init__.py +0 -30
  432. relationalai/early_access/builder/builder/__init__.py +0 -35
  433. relationalai/early_access/builder/snowflake/__init__.py +0 -12
  434. relationalai/early_access/builder/std/__init__.py +0 -25
  435. relationalai/early_access/builder/std/decimals/__init__.py +0 -12
  436. relationalai/early_access/builder/std/integers/__init__.py +0 -12
  437. relationalai/early_access/builder/std/math/__init__.py +0 -12
  438. relationalai/early_access/builder/std/strings/__init__.py +0 -14
  439. relationalai/early_access/devtools/__init__.py +0 -12
  440. relationalai/early_access/devtools/benchmark_lqp/__init__.py +0 -12
  441. relationalai/early_access/devtools/extract_lqp/__init__.py +0 -12
  442. relationalai/early_access/dsl/adapters/orm/adapter_qb.py +0 -427
  443. relationalai/early_access/dsl/adapters/orm/parser.py +0 -636
  444. relationalai/early_access/dsl/adapters/owl/adapter.py +0 -176
  445. relationalai/early_access/dsl/adapters/owl/parser.py +0 -160
  446. relationalai/early_access/dsl/bindings/common.py +0 -402
  447. relationalai/early_access/dsl/bindings/csv.py +0 -170
  448. relationalai/early_access/dsl/bindings/legacy/binding_models.py +0 -143
  449. relationalai/early_access/dsl/bindings/snowflake.py +0 -64
  450. relationalai/early_access/dsl/codegen/binder.py +0 -411
  451. relationalai/early_access/dsl/codegen/common.py +0 -79
  452. relationalai/early_access/dsl/codegen/helpers.py +0 -23
  453. relationalai/early_access/dsl/codegen/relations.py +0 -700
  454. relationalai/early_access/dsl/codegen/weaver.py +0 -417
  455. relationalai/early_access/dsl/core/builders/__init__.py +0 -47
  456. relationalai/early_access/dsl/core/builders/logic.py +0 -19
  457. relationalai/early_access/dsl/core/builders/scalar_constraint.py +0 -11
  458. relationalai/early_access/dsl/core/constraints/predicate/atomic.py +0 -455
  459. relationalai/early_access/dsl/core/constraints/predicate/universal.py +0 -73
  460. relationalai/early_access/dsl/core/constraints/scalar.py +0 -310
  461. relationalai/early_access/dsl/core/context.py +0 -13
  462. relationalai/early_access/dsl/core/cset.py +0 -132
  463. relationalai/early_access/dsl/core/exprs/__init__.py +0 -116
  464. relationalai/early_access/dsl/core/exprs/relational.py +0 -18
  465. relationalai/early_access/dsl/core/exprs/scalar.py +0 -412
  466. relationalai/early_access/dsl/core/instances.py +0 -44
  467. relationalai/early_access/dsl/core/logic/__init__.py +0 -193
  468. relationalai/early_access/dsl/core/logic/aggregation.py +0 -98
  469. relationalai/early_access/dsl/core/logic/exists.py +0 -223
  470. relationalai/early_access/dsl/core/logic/helper.py +0 -163
  471. relationalai/early_access/dsl/core/namespaces.py +0 -32
  472. relationalai/early_access/dsl/core/relations.py +0 -276
  473. relationalai/early_access/dsl/core/rules.py +0 -112
  474. relationalai/early_access/dsl/core/std/__init__.py +0 -45
  475. relationalai/early_access/dsl/core/temporal/recall.py +0 -6
  476. relationalai/early_access/dsl/core/types/__init__.py +0 -270
  477. relationalai/early_access/dsl/core/types/concepts.py +0 -128
  478. relationalai/early_access/dsl/core/types/constrained/__init__.py +0 -267
  479. relationalai/early_access/dsl/core/types/constrained/nominal.py +0 -143
  480. relationalai/early_access/dsl/core/types/constrained/subtype.py +0 -124
  481. relationalai/early_access/dsl/core/types/standard.py +0 -92
  482. relationalai/early_access/dsl/core/types/unconstrained.py +0 -50
  483. relationalai/early_access/dsl/core/types/variables.py +0 -203
  484. relationalai/early_access/dsl/ir/compiler.py +0 -318
  485. relationalai/early_access/dsl/ir/executor.py +0 -260
  486. relationalai/early_access/dsl/ontologies/constraints.py +0 -88
  487. relationalai/early_access/dsl/ontologies/export.py +0 -30
  488. relationalai/early_access/dsl/ontologies/models.py +0 -453
  489. relationalai/early_access/dsl/ontologies/python_printer.py +0 -303
  490. relationalai/early_access/dsl/ontologies/readings.py +0 -60
  491. relationalai/early_access/dsl/ontologies/relationships.py +0 -322
  492. relationalai/early_access/dsl/ontologies/roles.py +0 -87
  493. relationalai/early_access/dsl/ontologies/subtyping.py +0 -55
  494. relationalai/early_access/dsl/orm/constraints.py +0 -438
  495. relationalai/early_access/dsl/orm/measures/dimensions.py +0 -200
  496. relationalai/early_access/dsl/orm/measures/initializer.py +0 -16
  497. relationalai/early_access/dsl/orm/measures/measure_rules.py +0 -275
  498. relationalai/early_access/dsl/orm/measures/measures.py +0 -299
  499. relationalai/early_access/dsl/orm/measures/role_exprs.py +0 -268
  500. relationalai/early_access/dsl/orm/models.py +0 -256
  501. relationalai/early_access/dsl/orm/object_oriented_printer.py +0 -344
  502. relationalai/early_access/dsl/orm/printer.py +0 -469
  503. relationalai/early_access/dsl/orm/reasoners.py +0 -480
  504. relationalai/early_access/dsl/orm/relations.py +0 -19
  505. relationalai/early_access/dsl/orm/relationships.py +0 -251
  506. relationalai/early_access/dsl/orm/types.py +0 -42
  507. relationalai/early_access/dsl/orm/utils.py +0 -79
  508. relationalai/early_access/dsl/orm/verb.py +0 -204
  509. relationalai/early_access/dsl/physical_metadata/tables.py +0 -133
  510. relationalai/early_access/dsl/relations.py +0 -170
  511. relationalai/early_access/dsl/rulesets.py +0 -69
  512. relationalai/early_access/dsl/schemas/__init__.py +0 -450
  513. relationalai/early_access/dsl/schemas/builder.py +0 -48
  514. relationalai/early_access/dsl/schemas/comp_names.py +0 -51
  515. relationalai/early_access/dsl/schemas/components.py +0 -203
  516. relationalai/early_access/dsl/schemas/contexts.py +0 -156
  517. relationalai/early_access/dsl/schemas/exprs.py +0 -89
  518. relationalai/early_access/dsl/schemas/fragments.py +0 -464
  519. relationalai/early_access/dsl/serialization.py +0 -79
  520. relationalai/early_access/dsl/serialize/exporter.py +0 -163
  521. relationalai/early_access/dsl/snow/api.py +0 -105
  522. relationalai/early_access/dsl/snow/common.py +0 -76
  523. relationalai/early_access/dsl/state_mgmt/__init__.py +0 -129
  524. relationalai/early_access/dsl/state_mgmt/state_charts.py +0 -125
  525. relationalai/early_access/dsl/state_mgmt/transitions.py +0 -130
  526. relationalai/early_access/dsl/types/__init__.py +0 -40
  527. relationalai/early_access/dsl/types/concepts.py +0 -12
  528. relationalai/early_access/dsl/types/entities.py +0 -135
  529. relationalai/early_access/dsl/types/values.py +0 -17
  530. relationalai/early_access/dsl/utils.py +0 -102
  531. relationalai/early_access/graphs/__init__.py +0 -13
  532. relationalai/early_access/lqp/__init__.py +0 -12
  533. relationalai/early_access/lqp/compiler/__init__.py +0 -12
  534. relationalai/early_access/lqp/constructors/__init__.py +0 -18
  535. relationalai/early_access/lqp/executor/__init__.py +0 -12
  536. relationalai/early_access/lqp/ir/__init__.py +0 -12
  537. relationalai/early_access/lqp/passes/__init__.py +0 -12
  538. relationalai/early_access/lqp/pragmas/__init__.py +0 -12
  539. relationalai/early_access/lqp/primitives/__init__.py +0 -12
  540. relationalai/early_access/lqp/types/__init__.py +0 -12
  541. relationalai/early_access/lqp/utils/__init__.py +0 -12
  542. relationalai/early_access/lqp/validators/__init__.py +0 -12
  543. relationalai/early_access/metamodel/__init__.py +0 -58
  544. relationalai/early_access/metamodel/builtins/__init__.py +0 -12
  545. relationalai/early_access/metamodel/compiler/__init__.py +0 -12
  546. relationalai/early_access/metamodel/dependency/__init__.py +0 -12
  547. relationalai/early_access/metamodel/factory/__init__.py +0 -17
  548. relationalai/early_access/metamodel/helpers/__init__.py +0 -12
  549. relationalai/early_access/metamodel/ir/__init__.py +0 -14
  550. relationalai/early_access/metamodel/rewrite/__init__.py +0 -7
  551. relationalai/early_access/metamodel/typer/__init__.py +0 -3
  552. relationalai/early_access/metamodel/typer/typer/__init__.py +0 -12
  553. relationalai/early_access/metamodel/types/__init__.py +0 -15
  554. relationalai/early_access/metamodel/util/__init__.py +0 -15
  555. relationalai/early_access/metamodel/visitor/__init__.py +0 -12
  556. relationalai/early_access/rel/__init__.py +0 -12
  557. relationalai/early_access/rel/executor/__init__.py +0 -12
  558. relationalai/early_access/rel/rel_utils/__init__.py +0 -12
  559. relationalai/early_access/rel/rewrite/__init__.py +0 -7
  560. relationalai/early_access/solvers/__init__.py +0 -19
  561. relationalai/early_access/sql/__init__.py +0 -11
  562. relationalai/early_access/sql/executor/__init__.py +0 -3
  563. relationalai/early_access/sql/rewrite/__init__.py +0 -3
  564. relationalai/early_access/tests/logging/__init__.py +0 -12
  565. relationalai/early_access/tests/test_snapshot_base/__init__.py +0 -12
  566. relationalai/early_access/tests/utils/__init__.py +0 -12
  567. relationalai/environments/__init__.py +0 -35
  568. relationalai/environments/base.py +0 -381
  569. relationalai/environments/colab.py +0 -14
  570. relationalai/environments/generic.py +0 -71
  571. relationalai/environments/ipython.py +0 -68
  572. relationalai/environments/jupyter.py +0 -9
  573. relationalai/environments/snowbook.py +0 -169
  574. relationalai/errors.py +0 -2496
  575. relationalai/experimental/SF.py +0 -38
  576. relationalai/experimental/inspect.py +0 -47
  577. relationalai/experimental/pathfinder/__init__.py +0 -158
  578. relationalai/experimental/pathfinder/api.py +0 -160
  579. relationalai/experimental/pathfinder/automaton.py +0 -584
  580. relationalai/experimental/pathfinder/bridge.py +0 -226
  581. relationalai/experimental/pathfinder/compiler.py +0 -416
  582. relationalai/experimental/pathfinder/datalog.py +0 -214
  583. relationalai/experimental/pathfinder/diagnostics.py +0 -56
  584. relationalai/experimental/pathfinder/filter.py +0 -236
  585. relationalai/experimental/pathfinder/glushkov.py +0 -439
  586. relationalai/experimental/pathfinder/options.py +0 -265
  587. relationalai/experimental/pathfinder/pathfinder-v0.7.0.rel +0 -1951
  588. relationalai/experimental/pathfinder/rpq.py +0 -344
  589. relationalai/experimental/pathfinder/transition.py +0 -200
  590. relationalai/experimental/pathfinder/utils.py +0 -26
  591. relationalai/experimental/paths/README.md +0 -107
  592. relationalai/experimental/paths/api.py +0 -143
  593. relationalai/experimental/paths/benchmarks/grid_graph.py +0 -37
  594. relationalai/experimental/paths/code_organization.md +0 -2
  595. relationalai/experimental/paths/examples/Movies.ipynb +0 -16328
  596. relationalai/experimental/paths/examples/basic_example.py +0 -40
  597. relationalai/experimental/paths/examples/minimal_engine_warmup.py +0 -3
  598. relationalai/experimental/paths/examples/movie_example.py +0 -77
  599. relationalai/experimental/paths/examples/movies_data/actedin.csv +0 -193
  600. relationalai/experimental/paths/examples/movies_data/directed.csv +0 -45
  601. relationalai/experimental/paths/examples/movies_data/follows.csv +0 -7
  602. relationalai/experimental/paths/examples/movies_data/movies.csv +0 -39
  603. relationalai/experimental/paths/examples/movies_data/person.csv +0 -134
  604. relationalai/experimental/paths/examples/movies_data/produced.csv +0 -16
  605. relationalai/experimental/paths/examples/movies_data/ratings.csv +0 -10
  606. relationalai/experimental/paths/examples/movies_data/wrote.csv +0 -11
  607. relationalai/experimental/paths/examples/paths_benchmark.py +0 -115
  608. relationalai/experimental/paths/examples/paths_example.py +0 -116
  609. relationalai/experimental/paths/examples/pattern_to_automaton.py +0 -28
  610. relationalai/experimental/paths/find_paths_via_automaton.py +0 -85
  611. relationalai/experimental/paths/graph.py +0 -185
  612. relationalai/experimental/paths/path_algorithms/find_paths.py +0 -280
  613. relationalai/experimental/paths/path_algorithms/one_sided_ball_repetition.py +0 -26
  614. relationalai/experimental/paths/path_algorithms/one_sided_ball_upto.py +0 -111
  615. relationalai/experimental/paths/path_algorithms/single.py +0 -59
  616. relationalai/experimental/paths/path_algorithms/two_sided_balls_repetition.py +0 -39
  617. relationalai/experimental/paths/path_algorithms/two_sided_balls_upto.py +0 -103
  618. relationalai/experimental/paths/path_algorithms/usp-old.py +0 -130
  619. relationalai/experimental/paths/path_algorithms/usp-tuple.py +0 -183
  620. relationalai/experimental/paths/path_algorithms/usp.py +0 -150
  621. relationalai/experimental/paths/product_graph.py +0 -93
  622. relationalai/experimental/paths/rpq/automaton.py +0 -584
  623. relationalai/experimental/paths/rpq/diagnostics.py +0 -56
  624. relationalai/experimental/paths/rpq/rpq.py +0 -378
  625. relationalai/experimental/paths/tests/tests_limit_sp_max_length.py +0 -90
  626. relationalai/experimental/paths/tests/tests_limit_sp_multiple.py +0 -119
  627. relationalai/experimental/paths/tests/tests_limit_sp_single.py +0 -104
  628. relationalai/experimental/paths/tests/tests_limit_walks_multiple.py +0 -113
  629. relationalai/experimental/paths/tests/tests_limit_walks_single.py +0 -149
  630. relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_multiple.py +0 -70
  631. relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_single.py +0 -64
  632. relationalai/experimental/paths/tests/tests_one_sided_ball_upto_multiple.py +0 -115
  633. relationalai/experimental/paths/tests/tests_one_sided_ball_upto_single.py +0 -75
  634. relationalai/experimental/paths/tests/tests_single_paths.py +0 -152
  635. relationalai/experimental/paths/tests/tests_single_walks.py +0 -208
  636. relationalai/experimental/paths/tests/tests_single_walks_undirected.py +0 -297
  637. relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_multiple.py +0 -107
  638. relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_single.py +0 -76
  639. relationalai/experimental/paths/tests/tests_two_sided_balls_upto_multiple.py +0 -76
  640. relationalai/experimental/paths/tests/tests_two_sided_balls_upto_single.py +0 -110
  641. relationalai/experimental/paths/tests/tests_usp_nsp_multiple.py +0 -229
  642. relationalai/experimental/paths/tests/tests_usp_nsp_single.py +0 -108
  643. relationalai/experimental/paths/tree_agg.py +0 -168
  644. relationalai/experimental/paths/utilities/iterators.py +0 -27
  645. relationalai/experimental/paths/utilities/prefix_sum.py +0 -91
  646. relationalai/experimental/solvers.py +0 -1095
  647. relationalai/loaders/csv.py +0 -195
  648. relationalai/loaders/loader.py +0 -177
  649. relationalai/loaders/types.py +0 -23
  650. relationalai/rel_emitter.py +0 -373
  651. relationalai/rel_utils.py +0 -185
  652. relationalai/semantics/designs/query_builder/identify_by.md +0 -106
  653. relationalai/semantics/devtools/benchmark_lqp.py +0 -535
  654. relationalai/semantics/devtools/compilation_manager.py +0 -294
  655. relationalai/semantics/devtools/extract_lqp.py +0 -110
  656. relationalai/semantics/internal/internal.py +0 -3785
  657. relationalai/semantics/internal/snowflake.py +0 -329
  658. relationalai/semantics/lqp/README.md +0 -34
  659. relationalai/semantics/lqp/algorithms.py +0 -173
  660. relationalai/semantics/lqp/builtins.py +0 -213
  661. relationalai/semantics/lqp/compiler.py +0 -22
  662. relationalai/semantics/lqp/constructors.py +0 -68
  663. relationalai/semantics/lqp/executor.py +0 -518
  664. relationalai/semantics/lqp/export_rewriter.py +0 -40
  665. relationalai/semantics/lqp/intrinsics.py +0 -24
  666. relationalai/semantics/lqp/ir.py +0 -150
  667. relationalai/semantics/lqp/model2lqp.py +0 -1056
  668. relationalai/semantics/lqp/passes.py +0 -38
  669. relationalai/semantics/lqp/primitives.py +0 -252
  670. relationalai/semantics/lqp/result_helpers.py +0 -266
  671. relationalai/semantics/lqp/rewrite/__init__.py +0 -32
  672. relationalai/semantics/lqp/rewrite/algorithm.py +0 -385
  673. relationalai/semantics/lqp/rewrite/annotate_constraints.py +0 -69
  674. relationalai/semantics/lqp/rewrite/cdc.py +0 -216
  675. relationalai/semantics/lqp/rewrite/constants_to_vars.py +0 -70
  676. relationalai/semantics/lqp/rewrite/deduplicate_vars.py +0 -104
  677. relationalai/semantics/lqp/rewrite/eliminate_data.py +0 -108
  678. relationalai/semantics/lqp/rewrite/extract_common.py +0 -340
  679. relationalai/semantics/lqp/rewrite/extract_keys.py +0 -577
  680. relationalai/semantics/lqp/rewrite/flatten_script.py +0 -301
  681. relationalai/semantics/lqp/rewrite/function_annotations.py +0 -114
  682. relationalai/semantics/lqp/rewrite/functional_dependencies.py +0 -348
  683. relationalai/semantics/lqp/rewrite/period_math.py +0 -77
  684. relationalai/semantics/lqp/rewrite/quantify_vars.py +0 -339
  685. relationalai/semantics/lqp/rewrite/splinter.py +0 -76
  686. relationalai/semantics/lqp/rewrite/unify_definitions.py +0 -323
  687. relationalai/semantics/lqp/types.py +0 -101
  688. relationalai/semantics/lqp/utils.py +0 -170
  689. relationalai/semantics/lqp/validators.py +0 -70
  690. relationalai/semantics/metamodel/compiler.py +0 -134
  691. relationalai/semantics/metamodel/dependency.py +0 -880
  692. relationalai/semantics/metamodel/executor.py +0 -78
  693. relationalai/semantics/metamodel/factory.py +0 -287
  694. relationalai/semantics/metamodel/helpers.py +0 -368
  695. relationalai/semantics/metamodel/ir.py +0 -924
  696. relationalai/semantics/metamodel/rewrite/__init__.py +0 -8
  697. relationalai/semantics/metamodel/rewrite/discharge_constraints.py +0 -39
  698. relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +0 -220
  699. relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +0 -78
  700. relationalai/semantics/metamodel/rewrite/flatten.py +0 -590
  701. relationalai/semantics/metamodel/rewrite/format_outputs.py +0 -256
  702. relationalai/semantics/metamodel/rewrite/handle_aggregations_and_ranks.py +0 -237
  703. relationalai/semantics/metamodel/typer/checker.py +0 -355
  704. relationalai/semantics/metamodel/typer/typer.py +0 -1396
  705. relationalai/semantics/metamodel/util.py +0 -506
  706. relationalai/semantics/metamodel/visitor.py +0 -945
  707. relationalai/semantics/reasoners/__init__.py +0 -10
  708. relationalai/semantics/reasoners/graph/README.md +0 -620
  709. relationalai/semantics/reasoners/graph/__init__.py +0 -37
  710. relationalai/semantics/reasoners/graph/core.py +0 -9019
  711. relationalai/semantics/reasoners/graph/design/beyond_demand_transform.md +0 -797
  712. relationalai/semantics/reasoners/graph/tests/README.md +0 -21
  713. relationalai/semantics/reasoners/optimization/__init__.py +0 -68
  714. relationalai/semantics/reasoners/optimization/common.py +0 -88
  715. relationalai/semantics/reasoners/optimization/solvers_dev.py +0 -568
  716. relationalai/semantics/reasoners/optimization/solvers_pb.py +0 -1407
  717. relationalai/semantics/rel/builtins.py +0 -40
  718. relationalai/semantics/rel/compiler.py +0 -994
  719. relationalai/semantics/rel/executor.py +0 -363
  720. relationalai/semantics/rel/rel.py +0 -482
  721. relationalai/semantics/rel/rel_utils.py +0 -276
  722. relationalai/semantics/snowflake/__init__.py +0 -3
  723. relationalai/semantics/sql/compiler.py +0 -2503
  724. relationalai/semantics/sql/executor/duck_db.py +0 -52
  725. relationalai/semantics/sql/executor/result_helpers.py +0 -64
  726. relationalai/semantics/sql/executor/snowflake.py +0 -149
  727. relationalai/semantics/sql/rewrite/denormalize.py +0 -222
  728. relationalai/semantics/sql/rewrite/double_negation.py +0 -49
  729. relationalai/semantics/sql/rewrite/recursive_union.py +0 -127
  730. relationalai/semantics/sql/rewrite/sort_output_query.py +0 -246
  731. relationalai/semantics/sql/sql.py +0 -504
  732. relationalai/semantics/std/pragmas.py +0 -11
  733. relationalai/semantics/std/std.py +0 -14
  734. relationalai/semantics/tests/lqp/algorithms.py +0 -345
  735. relationalai/semantics/tests/test_snapshot_abstract.py +0 -144
  736. relationalai/semantics/tests/test_snapshot_base.py +0 -9
  737. relationalai/semantics/tests/utils.py +0 -46
  738. relationalai/std/__init__.py +0 -70
  739. relationalai/tools/cli.py +0 -2089
  740. relationalai/tools/cli_controls.py +0 -1975
  741. relationalai/tools/cli_helpers.py +0 -802
  742. relationalai/tools/debugger_client.py +0 -109
  743. relationalai/tools/debugger_server.py +0 -302
  744. relationalai/tools/dev.py +0 -685
  745. relationalai/tools/notes +0 -7
  746. relationalai/tools/qb_debugger.py +0 -425
  747. relationalai/tools/txn_progress.py +0 -188
  748. relationalai/util/clean_up_databases.py +0 -95
  749. relationalai/util/list_databases.py +0 -9
  750. relationalai/util/otel_configuration.py +0 -26
  751. relationalai/util/otel_handler.py +0 -484
  752. relationalai/util/snowflake_handler.py +0 -88
  753. relationalai/util/span_format_test.py +0 -43
  754. relationalai/util/span_tracker.py +0 -207
  755. relationalai/util/spans_file_handler.py +0 -72
  756. relationalai/util/tracing_handler.py +0 -34
  757. relationalai-0.13.5.dist-info/METADATA +0 -74
  758. relationalai-0.13.5.dist-info/RECORD +0 -473
  759. relationalai-0.13.5.dist-info/WHEEL +0 -4
  760. relationalai-0.13.5.dist-info/entry_points.txt +0 -3
  761. relationalai-0.13.5.dist-info/licenses/LICENSE +0 -202
  762. relationalai_test_util/__init__.py +0 -4
  763. relationalai_test_util/fixtures.py +0 -233
  764. relationalai_test_util/snapshot.py +0 -252
  765. relationalai_test_util/traceback.py +0 -118
  766. /relationalai/{analysis → semantics/frontend}/__init__.py +0 -0
  767. /relationalai/{auth/__init__.py → semantics/metamodel/metamodel_compiler.py} +0 -0
  768. /relationalai/{early_access → shims}/__init__.py +0 -0
  769. {relationalai/early_access/dsl/adapters → v0/relationalai/analysis}/__init__.py +0 -0
  770. {relationalai → v0/relationalai}/analysis/mechanistic.py +0 -0
  771. {relationalai → v0/relationalai}/analysis/whynot.py +0 -0
  772. {relationalai/early_access/dsl/adapters/orm → v0/relationalai/auth}/__init__.py +0 -0
  773. {relationalai → v0/relationalai}/auth/jwt_generator.py +0 -0
  774. {relationalai → v0/relationalai}/auth/oauth_callback_server.py +0 -0
  775. {relationalai → v0/relationalai}/auth/token_handler.py +0 -0
  776. {relationalai → v0/relationalai}/auth/util.py +0 -0
  777. {relationalai/clients/resources/snowflake → v0/relationalai/clients}/cache_store.py +0 -0
  778. {relationalai → v0/relationalai}/compiler.py +0 -0
  779. {relationalai → v0/relationalai}/dependencies.py +0 -0
  780. {relationalai → v0/relationalai}/docutils.py +0 -0
  781. {relationalai/early_access/dsl/adapters/owl → v0/relationalai/early_access}/__init__.py +0 -0
  782. {relationalai → v0/relationalai}/early_access/dsl/__init__.py +0 -0
  783. {relationalai/early_access/dsl/bindings → v0/relationalai/early_access/dsl/adapters}/__init__.py +0 -0
  784. {relationalai/early_access/dsl/bindings/legacy → v0/relationalai/early_access/dsl/adapters/orm}/__init__.py +0 -0
  785. {relationalai → v0/relationalai}/early_access/dsl/adapters/orm/model.py +0 -0
  786. {relationalai/early_access/dsl/codegen → v0/relationalai/early_access/dsl/adapters/owl}/__init__.py +0 -0
  787. {relationalai → v0/relationalai}/early_access/dsl/adapters/owl/model.py +0 -0
  788. {relationalai/early_access/dsl/core/temporal → v0/relationalai/early_access/dsl/bindings}/__init__.py +0 -0
  789. {relationalai/early_access/dsl/ir → v0/relationalai/early_access/dsl/bindings/legacy}/__init__.py +0 -0
  790. {relationalai/early_access/dsl/ontologies → v0/relationalai/early_access/dsl/codegen}/__init__.py +0 -0
  791. {relationalai → v0/relationalai}/early_access/dsl/constants.py +0 -0
  792. {relationalai → v0/relationalai}/early_access/dsl/core/__init__.py +0 -0
  793. {relationalai → v0/relationalai}/early_access/dsl/core/constraints/__init__.py +0 -0
  794. {relationalai → v0/relationalai}/early_access/dsl/core/constraints/predicate/__init__.py +0 -0
  795. {relationalai → v0/relationalai}/early_access/dsl/core/stack.py +0 -0
  796. {relationalai/early_access/dsl/orm → v0/relationalai/early_access/dsl/core/temporal}/__init__.py +0 -0
  797. {relationalai → v0/relationalai}/early_access/dsl/core/utils.py +0 -0
  798. {relationalai/early_access/dsl/orm/measures → v0/relationalai/early_access/dsl/ir}/__init__.py +0 -0
  799. {relationalai/early_access/dsl/physical_metadata → v0/relationalai/early_access/dsl/ontologies}/__init__.py +0 -0
  800. {relationalai → v0/relationalai}/early_access/dsl/ontologies/raw_source.py +0 -0
  801. {relationalai/early_access/dsl/serialize → v0/relationalai/early_access/dsl/orm}/__init__.py +0 -0
  802. {relationalai/early_access/dsl/snow → v0/relationalai/early_access/dsl/orm/measures}/__init__.py +0 -0
  803. {relationalai → v0/relationalai}/early_access/dsl/orm/reasoner_errors.py +0 -0
  804. {relationalai/loaders → v0/relationalai/early_access/dsl/physical_metadata}/__init__.py +0 -0
  805. {relationalai/semantics/tests → v0/relationalai/early_access/dsl/serialize}/__init__.py +0 -0
  806. {relationalai → v0/relationalai}/early_access/dsl/serialize/binding_model.py +0 -0
  807. {relationalai → v0/relationalai}/early_access/dsl/serialize/model.py +0 -0
  808. {relationalai/semantics/tests/lqp → v0/relationalai/early_access/dsl/snow}/__init__.py +0 -0
  809. {relationalai → v0/relationalai}/early_access/tests/__init__.py +0 -0
  810. {relationalai → v0/relationalai}/environments/ci.py +0 -0
  811. {relationalai → v0/relationalai}/environments/hex.py +0 -0
  812. {relationalai → v0/relationalai}/environments/terminal.py +0 -0
  813. {relationalai → v0/relationalai}/experimental/__init__.py +0 -0
  814. {relationalai → v0/relationalai}/experimental/graphs.py +0 -0
  815. {relationalai → v0/relationalai}/experimental/paths/__init__.py +0 -0
  816. {relationalai → v0/relationalai}/experimental/paths/benchmarks/__init__.py +0 -0
  817. {relationalai → v0/relationalai}/experimental/paths/path_algorithms/__init__.py +0 -0
  818. {relationalai → v0/relationalai}/experimental/paths/rpq/__init__.py +0 -0
  819. {relationalai → v0/relationalai}/experimental/paths/rpq/filter.py +0 -0
  820. {relationalai → v0/relationalai}/experimental/paths/rpq/glushkov.py +0 -0
  821. {relationalai → v0/relationalai}/experimental/paths/rpq/transition.py +0 -0
  822. {relationalai → v0/relationalai}/experimental/paths/utilities/__init__.py +0 -0
  823. {relationalai → v0/relationalai}/experimental/paths/utilities/utilities.py +0 -0
  824. {relationalai/tools → v0/relationalai/loaders}/__init__.py +0 -0
  825. {relationalai → v0/relationalai}/metagen.py +0 -0
  826. {relationalai → v0/relationalai}/metamodel.py +0 -0
  827. {relationalai → v0/relationalai}/rel.py +0 -0
  828. {relationalai → v0/relationalai}/semantics/devtools/__init__.py +0 -0
  829. {relationalai → v0/relationalai}/semantics/internal/__init__.py +0 -0
  830. {relationalai → v0/relationalai}/semantics/internal/annotations.py +0 -0
  831. {relationalai → v0/relationalai}/semantics/lqp/__init__.py +0 -0
  832. {relationalai → v0/relationalai}/semantics/lqp/pragmas.py +0 -0
  833. {relationalai → v0/relationalai}/semantics/metamodel/dataflow.py +0 -0
  834. {relationalai → v0/relationalai}/semantics/metamodel/typer/__init__.py +0 -0
  835. {relationalai → v0/relationalai}/semantics/metamodel/types.py +0 -0
  836. {relationalai → v0/relationalai}/semantics/reasoners/experimental/__init__.py +0 -0
  837. {relationalai → v0/relationalai}/semantics/rel/__init__.py +0 -0
  838. {relationalai → v0/relationalai}/semantics/sql/__init__.py +0 -0
  839. {relationalai → v0/relationalai}/semantics/sql/executor/__init__.py +0 -0
  840. {relationalai → v0/relationalai}/semantics/sql/rewrite/__init__.py +0 -0
  841. {relationalai → v0/relationalai}/semantics/tests/logging.py +0 -0
  842. {relationalai → v0/relationalai}/std/aggregates.py +0 -0
  843. {relationalai → v0/relationalai}/std/dates.py +0 -0
  844. {relationalai → v0/relationalai}/std/graphs.py +0 -0
  845. {relationalai → v0/relationalai}/std/inspect.py +0 -0
  846. {relationalai → v0/relationalai}/std/math.py +0 -0
  847. {relationalai → v0/relationalai}/std/re.py +0 -0
  848. {relationalai → v0/relationalai}/std/strings.py +0 -0
  849. {relationalai → v0/relationalai}/tools/cleanup_snapshots.py +0 -0
  850. {relationalai → v0/relationalai}/tools/constants.py +0 -0
  851. {relationalai → v0/relationalai}/tools/query_utils.py +0 -0
  852. {relationalai → v0/relationalai}/tools/snapshot_viewer.py +0 -0
  853. {relationalai → v0/relationalai}/util/__init__.py +0 -0
  854. {relationalai → v0/relationalai}/util/constants.py +0 -0
  855. {relationalai → v0/relationalai}/util/graph.py +0 -0
  856. {relationalai → v0/relationalai}/util/timeout.py +0 -0
@@ -1,797 +0,0 @@
1
- # A graphs interface to obviate demand transformation
2
-
3
- - Authors: Sacha Verweij
4
- - Reviewers: David Sanders, Ryan Gao, Huda Nassar, Kenton van Perseum
5
- - Last Updated: September 2025
6
- - Status: Draft
7
-
8
- # Background
9
-
10
- ## Problem
11
-
12
- 1) The number of tuples in, and/or overall cost of evaluation of, some relations in the graph library nominally scales quadratically or worse with the number nodes and/or edges in the graph. `distance(u, v, d)` is a good example: It maps pairs of nodes `(u, v)` to the shortest path length between them. To the degree that the graph is strongly connected, the number of tuples `distance` contains approaches quadratic in the number of nodes. Such relations may be expensive or intractable to compute in full.
13
-
14
- 2) Moreoever, often the customer is interested in only a small subset of the tuples in such relations. For example, the customer may be interested in the `distance` from only a single node `u` to all other nodes `v`, or even between only a specific pair of nodes `(u, v)`. Where the customer tries to express that intent, they may reasonably expect the computation to be much -- i.e. asymptotically -- less expensive than computing the full relation.
15
-
16
- 3) A similar situation exists with smaller / less expensive relations. `degree(u, d)` is a good example: It maps nodes `u` to the node's degree `d`. While it is tractable and relatively inexpensive to compute, the customer may be interested in the degree of only a single node `u`, or small number of such nodes. Where the customer tries to express that intent, they may similarly reasonably expect the computation to be asymptotically less expensive than computing the full relation.
17
-
18
- At the moment, the query builder graph library computes all relations in full, independent of the customer's expression. In combination with (1), this constrains practical customer use of the library's large/expensive relations to small graphs. In combination with (2) and (3), this can result in surprising or unsatisfying performance, without recourse.
19
-
20
- ## How does the previous version of the graph library (Rel/PyRel-v0) address this problem?
21
-
22
- The previous version of the graph library (Rel/PyRel-v0) attempts to constrain the computed subset of some large / expensive relations like `distance` to the customer's interest. It does not provide a customer-facing mechanism to constrain computation of small / less expensive relations like `degree`.
23
-
24
- For the former, it uses either demand transformation or inlining. Here we'll focus on demand transformation as the primary mechanism.
25
-
26
- ## How does demand transformation address this problem?
27
-
28
- In broad strokes, demand transformation attempts to infer the customer's intent from context, and constrain computation to the inferred subset of interest of a given relation. It does this roughly as follows:
29
-
30
- 1) It attempts to infer the overall shape of the customer's interest. For example, in `distance(u, v, d)`, does the customer want to compute all `v` and `d` for some given set of `u`, or only the `d` for some given pairs `(u, v)`, or something else? This overall shape is called the _demand pattern_.
31
-
32
- 2) Given that inferred demand pattern, it attempts to infer the specifics of the customer's interest. For example, supposing that it inferred that the customer wants to compute all `v` and `d` for some given set of `u`, what is the set of `u` of interest? Let's call this specific interest the _demand set_.
33
-
34
- 3) Given that inferred demand pattern and set, it attempts to rewrite the generic algorithm it has for the relation, such that computation is constrained to the inferred demand.
35
-
36
- ## That sounds great. Why can't the query builder graph library do the same?
37
-
38
- The query builder graph library could also use demand transformation to address parts of this problem. Exposing demand transform in query builder should be fairly straightforward. But demand transformation has some issues, and doesn't address all parts of this problem.
39
-
40
- ## What's the issue with demand transformation?
41
-
42
- The tl;dr is that mind-reading is hard:
43
-
44
- 1) Demand transformation can fail to accurately infer the overall shape of the customer's intent. That is, it can select a demand pattern inconsistent with the customer's intent, leading to surprising and/or poor performance. For example, the customer may intend to compute `distance(u, v, d)` for a small set of `u`, while demand transformation infers that `distance(u, v, d)` should instead be computed for some set of `v`, without constraint on `u`.
45
-
46
- 2) Where demand transformation accurately infers the overall shape of the customer's intent, it can fail to infer the customer's specific intent. That is, it can select a demand set that (grossly) overapproximates the set of interest to the customer, leading to surprising and/or poor performance. For example, the customer may intend to compute `distance(u, v, d)` for some small number of combinations of `u` and `v`, while demand transformation infers a much larger set of combinations of `u` and `v`.
47
-
48
- 3) Demand transformation can introduce unnecessary and costly recursion.
49
-
50
- Additionally, there are some outstanding soundness questions about demand transformation.
51
-
52
- (1) and (2) have caused much customer pain, and pain on our side trying to resolve that pain. The outstanding soundness questions are also driving folks to remove demand transformation from the product.
53
-
54
- # Design
55
-
56
- ## In the abstract
57
-
58
- ### What can we do instead, broadly speaking?
59
-
60
- Consider that each relation allows the customer to ask a collection of questions. For example, among (many*) others, the `distance(u, v, d)` relation allows the customer to ask the following fairly common questions:
61
-
62
- 1) What is the distance from each node `u` to each node `v`? (No constraint.)
63
- 2) What is the distance from each node in a given set of nodes `u` to each node `v`? (Constrain `u`.)
64
- 3) What is the distance to each node in a given set of nodes `v` from each node `u`? (Constrain `v`.)
65
- 4) What is the distance from each node in a given set of nodes `u` to each node in a given set of nodes `v`? (Constrain `u` and `v`, separately.)
66
- 5) What is the distance between each pair of nodes `(u, v)` in a given set of pairs of nodes? (Constrain `u` and `v`, jointly, one to one.)
67
-
68
- (Presently all of these questions are conflated in the name/spelling `distance(u, v, d)`, and the system tries to de-conflate those questions based on context with demand transformation.)
69
-
70
- At a high level, we have two options:
71
- A) attempt to infer the customer's intent, i.e. infer which question they are asking; or
72
- B) allow the customer to specify their intent, i.e. explicitly express which question they are asking.
73
-
74
- (A) leads back to something at least in spirit like demand transformation, with its attendant pitfalls/challenges.
75
-
76
- With (B) we also have two options:
77
- (B1) Allow the customer to specify their intent (demand pattern, demand set) via language-level features, and either add functionality under the hood to specialize generic logic to that intent, or allow libraries to provide specialized logic for given intents; or
78
- (B2) Design this library's API to allow the customer to specify their intent.
79
-
80
- Regarding (B1), it's not clear whether we want such language-level features, we do not have such features at this time, and the timescale for desiging and implementing such features is longer than the timescale on which we need to deliver a solid initial graphs library to customers. Fundamental feasibility of some aspects of this is not clear either.
81
-
82
- (B2) is a pragmatic approach that will allow us to deliver something solid to customers on a reasonable timescale.
83
-
84
- The design below takes approach (B2), allowing the customer to explicitly express common questions via the library's API.
85
-
86
- ### What about those many* other questions?
87
-
88
- The list of questions above was far from exhaustive. For example, the customer could also ask any of:
89
-
90
- 6) What nodes are at given distance(s) `d` from each other? (Constrain `d`.)
91
- 7) What nodes `v` are at given distance(s) `d` from each node in a given set of nodes `u`? (Constrain `u` and `d`, separately.)
92
- 8) What nodes `u` have given distance(s) `d` to each node in a given set of nodes `v`? (Constrain `v` and `d`, separately.)
93
- 9) What (specified) pairs of nodes are at given distance(s) `d` from each other? (Constrain `u` and `v` jointly one to one, and `d` separately.)
94
-
95
- and many more. Exposing means to distinctly express all such questions is probably neither practically possible (may require a constraint language in itself) nor, happily, necessary.
96
-
97
- ### Which questions do we bake directly into the API?
98
-
99
- Three razors for which questions to bake directly into the API, and how:
100
-
101
- (1) Common questions should be easy to ask, baked directly into the API. Uncommon questions should be possible to ask, but if asking them requires more work, is less graceful, or is less efficient, that seems reasonable; they don't need to be baked directly into the API.
102
-
103
- (2) Make questions that we can answer efficiently easy to ask. Make questions that we cannot answer efficiently possible to ask, but perhaps less easily. If we can't answer a question efficiently, prompt the customer to think about the question they are asking / its cost, and nudge them towards efficient tools/questions.
104
-
105
- (3) Like (2), but more extreme: Questions that involve potentially intractable or exorbitantly expensive computations are footguns if baked into the API without requiring pause-ACKs and/or injecting friction. (For example, being able to frictionlessly ask for the full `jaccard_similarity` or `distance` relation is a footgun.)
106
-
107
- ## Concrete
108
-
109
- ### How to spell the questions / express constraints on computed subsets.
110
-
111
- The query builder graph library exposes relations through member methods of the `Graph` class, e.g. `graph.distance()`. There are at least two natural ways to allow the customer to make their intent explicit under that design:
112
-
113
- 1) provide separate method names for each supported question; and
114
- 2) provide arguments to these methods that distinguish the supported questions.
115
-
116
- For example, suppose we directly support the following questions associated with `distance` mentioned above:
117
-
118
- 2) What is the distance from each node in a given set of nodes `u` to each node `v`? (Constrain `u`.)
119
- 3) What is the distance to each node in a given set of nodes `v` from each node `u`? (Constrain `v`.)
120
- 4) What is the distance from each node in a given set of nodes `u` to each node in a given set of nodes `v`? (Constrain `u` and `v`, separately.)
121
- 5) What is the distance between each pair of nodes `(u, v)` in a given set of pairs of nodes? (Constrain `u` and `v` jointly, one to one.)
122
-
123
- What would we do in each case?
124
-
125
- #### Approach (1): distinguish questions via method names.
126
-
127
- Approach (1) might replace `.distance()` with four separate methods, each corresponding to one of the questions above, and accepting `Relationship` positional arguments containing the demand set(s) relevant to the question. To illustrate, considering `distance(u, v, d)`, these member methods might look like:
128
- ```
129
- .distance_from(from: Relationship) # (Constrain `u` in the computed result to nodes in the argument.)
130
- .distance_to(to: Relationship) # (Constrain `v` in the computed result to nodes in the argument.)
131
- .distance_from_to(from: Relationship, to: Relationship) # (Separately constrain `u` and `v` in the computed result, to the nodes in the first and second arguments respectively.)
132
- .distance_between(pairs: Relationship) # (Jointly constrain `u` and `v` in the computed result, to the pairs of nodes in the argument.)
133
- ```
134
- These methods' arguments could be positional or keyword arguments. Each would yield a constrained `distance` relationship, backed by logic specialized to the question and provided demand set(s).
135
-
136
- #### Approach (2): distinguish questions via keyword arguments.
137
-
138
- Approach (2) might add keyword argument combinations to `.distance()`, each corresponding to one of the questions above, and accepting `Relationship` arguments containing the demand set(s) relevant to the question. To illustrate, considering `distance(u, v, d)`, these keyword argument combinations might look like:
139
- ```
140
- .distance(from=Relationship) # (Constrain `u` in the computed result to nodes in keyword argument `from`.)
141
- .distance(to=Relationship) # (Constrain `v` in the computed result to nodes in the keyword argument `to`.)
142
- .distance(from=Relationship, to=Relationship) # (Separately constrain `u` and `v` in the computed result, to the nodes in the `from` and `to` keyword arguments respectively.)
143
- .distance(pairs=Relationship) # (Jointly constrain `u` and `v` in the computed result, to the pairs of nodes in the `pairs` keyword argument.)
144
- ```
145
-
146
- (Please note that in later sections we work through what approach (2) would look like for all relations presently in the library, in logical groups. Those sections are deferred to the end of this document for folks with less time/interest.)
147
-
148
- #### Tradeoffs of these approaches.
149
-
150
- Either of these approaches is viable. They have some tradeoffs:
151
-
152
- a) Approach (1) requires a separate method name for each question, resulting in a potentially large set of methods of the `Graph` class. Groups of those methods will be related, as different questions associated with the same underlying relation, but that grouping will not be captured in the API's structure. In contrast, approach (2) naturally groups/consolidates related questions by method, i.e. the associated underlying relation. While the number of method-kwarg combinations in approach (2) matches the number of methods in approach (1), the API structure of approach (2) makes that complexity more manageable from both customer and developer perspectives.
153
-
154
- b) The natural grouping/consolidation of questions in approach (2) significantly improves discoverability of related functionality relative to approach (1).
155
-
156
- c) Related to discoverability, approach (2) allows the library to guide the customer to the functionality best for their use case: Suppose the customer calls `.distance()` with no arguments. Taking a soft stance, the library could issue a warning about cost, guiding the customer to constrain the computation via keyword arguments. Taking a hard stance, the library could throw an exception, directing the customer to constrain the computation via keyword arguments or, if they _really_ want to compute the full relation, use a slightly more verbose expression to do so. Approach (1) provides fewer opportunities in this direction: Either `.distance()` would not exist, impeding discoverability, or `.distance()` could exist, but only take the soft stance.
157
-
158
- d) Adding a question under approach (1) requires adding a new method, whereas under approach (2) it requires extending an existing method with a new keyword argument or keyword-argument combination.
159
-
160
- e) Approach (2) is consistent with and a natural extension of the existing API, where existing methods for parameterized algorithms accept keyword arguments that modify their behavior.
161
-
162
- f) For relations that are not symmetric, for example `reachable(u, v)`, there is an open question around whether we should provide different permutations of that relation. The motivation being that different permutations of a given relation may be in/efficient to compute and/or use. For example, looking up `u` by `v` in `reachable(u, v)` is not efficient, whereas looking up `v` by `u` is; doing the former efficiently requires computing an additional index. Similar statements hold for computing `reachable(u, v)` for constraints on `v` versus `u`. It may be advantageous to expose both permutations of `reachable(u, v)`, e.g. as `reachable_from(u, v)`, and (assume a better name here) `can_be_reached_from(v, u)`. Under approach (1), adding that dimension results in a further (combinatorial) explosion of then relatively long and tricky names. Under approach (2), there are two options: separate a `.reachable(...kwargs...)` method into two methods with similar keyword arguments, or retain one such method but add a keyword argument, both of which seem more natural and manageable.
163
-
164
- #### Which approach do we take?
165
-
166
- Both the balance of tradeoffs and early polling seem to favor approach (2), which this design recommends.
167
-
168
- ##### Reviewer sentiments
169
-
170
- - Favor approach (1):
171
- - Favor approach (2): David Sanders, Huda Nassar, Kenton van Perseum, Ryan Gao.
172
-
173
- ##### Conclusion
174
-
175
- Approach (2).
176
-
177
- ### How do we handle potentially intractable or exorbitantly expensive, but common, questions under this proposal?
178
-
179
- Recall from the "Which questions do we bake directly into the API?" section above the following note:
180
-
181
- > Questions that involve potentially intractable or exorbitantly expensive computations are footguns if baked into the API without requiring pause-ACKs and/or injecting friction.
182
-
183
- For example, being able to frictionlessly ask for the full `distance(u, v, d)` relation is a footgun.
184
-
185
- Injecting friction, however, requires careful balance. Consider the following use cases where minimizing friction is strongly advantageous:
186
- 1) the initial/learning experience, which we would like to make as friendly as possible;
187
- 2) exploratory use, or protoying upstream of scaling;
188
- 3) use solely on relatively small graphs (fairly common);
189
- 4) general low floor use.
190
-
191
- How might we handle such questions while making the user experience as good as possible in use cases like 1-4?
192
-
193
- Additional consideration: Some potentially non-scalable relations may benefit from guarding while not supporting constraints, e.g. `unique_triangle` (see section in relation-by-relation workthrough, below).
194
-
195
- #### Initial options
196
-
197
- Five initial options, roughly in descending order of the strength of the guard / pause-ACK involved:
198
-
199
- ##### Error to redirect.
200
-
201
- When the customer calls `graph.distance()`, emit an error (early, at python execution time) that: 1) educates the customer about the potential cost of computing `distance` in full; 2) guides the user towards an appropriate constrained form of the call, with examples; and 3) notes that if the customer really wants to compute the full relation, they can do so using a constrained form, and shows them how to do it.
202
-
203
- Upsides: Maximal guard against footgun. Maximal user education.
204
- Downsides: Maximal friction. Least graceful when actually computing the full relation over small graphs.
205
-
206
- ##### Require an opt-in at call site.
207
-
208
- Provide a keyword argument to each non-scalable relation, for sake of argument say `allow_nonscalable` defaulted to `False`, that allows the customer to control whether non-scalable relations compute or error over the given graph, per non-scalable.
209
-
210
- When the customer calls `graph.distance()`, emit an error (early, at python execution time) that: 1) educates the user about the potential cost of computing `distance` in full; 2) guides the user towards an appropriate constrained form of the call, with examples; and 3) notes that if the customer really wants to compute the full relation, they can opt-in via `graph.distance(allow_nonscalable=True)`.
211
-
212
- When the customer calls `graph.distance(allow_nonscalable=True)`, yield the full `distance` relation.
213
-
214
- Upsides: Strong initial guard against the footgun. Strong user education. Risk of surprise on non-scalable computation from, e.g., setting a `Graph`-constructor-level flag (below), mitigated.
215
- Downsides: While the initial guard against the footgun is strong, inevitably someone will opt-in, forget, and be surprised when they attempt to scale up (but seems like a reasonable/defensible risk). A bit less graceful when actually computing the full relation over small graphs.
216
-
217
- ##### Require an opt-in on Graph construction.
218
-
219
- Provide a keyword argument to the `Graph` constructor, for sake of argument say `allow_nonscalable_relations` defaulted to `False`, that allows the customer to control whether non-scalable relations compute or error over the given graph.
220
-
221
- With `allow_nonscalable_relations=False`, when the customer calls `graph.distance()`, emit an error (early, at python execution time) that: 1) educates the user about the potential cost of computing `distance` in full; 2) guides the user towards an appropriate constrained form of the call, with examples; and 3) notes that if the customer really wants to compute the full relation, they can opt-in via `allow_nonscalable_relations=True` on the `Graph` constructor.
222
-
223
- With `allow_nonscalable_relations=True`, when the customer calls `graph.distance()`, yield the full `distance` relation.
224
-
225
- Upsides: Strong initial guard against the footgun. Strong user education. Minimal friction and reasonably graceful once `allow_nonscalable_relations` is set at the `Graph` constructor level, when actually computing the full relation over small graphs.
226
- Downsides: While the initial guard against the footgun is strong, inevitably someone will opt-in to non-scalable relations and then be surprised when some computation doesn't scale (either due to the relation for which they set the flag, or more likely due to another non-scalable relation).
227
-
228
- ##### Require an opt-in only over a certain scale.
229
-
230
- Variation of either of the two preceding opt-in options, focused on the relation-level opt-in form for sake of argument.
231
-
232
- When the opt-in flag is set (`graph.distance(allow_nonscalable=True)`), yield the `distance` relation in full.
233
-
234
- When the opt-in flag is not set (`graph.distance()`), emit logic that checks graph scale prior to computing the full relation. (Note that this kind of logic can be a bit brittle, or at least was historically in Rel, given enforcing evaluation order of relations in a declarative expression can be tricky.) If the graph is below threshold scale, yield the `distance` relation in full. If the graph is above threshold scale, derive an error (necessarily late, at query evaluation time) that: 1) educates the user about the potential cost of computing `distance` in full; 2) guides the user towards an appropriate constrained form of the call, with examples; and 3) notes that if the customer really wants to compute the full relation, they can opt-in via `graph.distance(allow_nonscalable=True)`.
235
-
236
- Upsides: Reasonable but weaker guard against the footgun, with feedback deferred from python execution to query evaluation time. Reasonable user education, but a bit later than otherwise. Least friction, maximal grace, when actually computing the full relation over small graphs.
237
- Downsides: Feedback comes a bit later. Whether feedback comes is data-dependent, which could be quite surprising. Scale threshold selection may be tricky. Logic may have some brittleness, but also possibly not. A bit less graceful when actually computing the full relation over what the customer may perceive as small graphs that are over threshold. Maybe a bit mysterious/magical.
238
-
239
- ##### Warn to redirect, but proceed blithely.
240
-
241
- When the customer calls `graph.distance()`, emit a warning (early, at python execution time) that: 1) educates the customer about the potential cost of computing `distance` in full; and 2) guides the user towards an appropriate constrained form of the call, with examples. But nonetheless return the full relation.
242
-
243
- Upsides: Reasonable user education. Minimal friction, maximal grace if the customer really wants to compute the full relation.
244
- Downsides: Risk of weak user education if (as is often the case) warnings/documentation are unread or ignored. Greatest risk of surprise. Greatest risk of expensive footgunning.
245
-
246
- #### Additional options proposed during review
247
-
248
- ##### Require an opt-in on Graph construction, and choose your own adventure.
249
-
250
- Kenton made the great observation that, as a user, he would like to be able to tell the library what controls / level of stricture he prefers for a given use case. Particularly, he would like a constructor-level argument that accepts, e.g., `nonscalable='error'`, `nonscalable='warn'`, and `nonscalable='allow'` (with `'error'` as the default). He characterized this approach as providing him "the best of both worlds", and noted that pandas does something along these lines with success.
251
-
252
- ##### Require an opt-in on Graph construction, choose your own adventure, and allow fine control at call sites.
253
-
254
- Ryan and Sacha discussed layering call-site control on top of Kenton's suggestion.
255
-
256
- #### Which option do we go with?
257
-
258
- This author would like additional perspectives before making a recommendation.
259
-
260
- ##### Reviewer sentiments:
261
-
262
- - David Sanders: Among the original options, David expressed preference for a combination of the opt-in approaches. Among the expanded options / after discussion, he expressed inclination to Kenton's suggestion IIRC, possibly with call-site control.
263
- - Huda Nassar: Among the original options, "Error to redirect" was Huda's preference. Among the expanded options / after discussion, Huda expressed incliation to Kenton's proposal + call-site controls, but not without call-site controls.
264
- - Ryan Gao: Among the original options, any of the middle three options (opt-in on construction, opt-in at call site, opt-in over certain scale) were Ryan's preference. Among the expanded options, he expressed preference for Kenton's suggestion, plus call-site control.
265
- - Kenton van Perseum: Given the original options, Kenton suggested the first of the additional options above (require a constructor-level opt-in, and choose your own adventure). Among the expanded suggestions, IIRC he preferred to not provide `nonscalable='allow'` at the constructor level, and to not provide call-site control, at least upstream of discussion.
266
-
267
- ##### Conclusion
268
-
269
- As of this writing, looks like constructor-level control (error, warn, allow) plus call-site control (local override).
270
-
271
- ### (High-level) implementation process considerations.
272
-
273
- One of the virtues of the recommended approach is that, for all relations in the library that are scalable, it's an extension of the existing API that can be implemented lazily as customers need and developer bandwidth allows. For relations that are not scalable, adding pause-ACK mechanisms will be API breaking, pulling in implementation of which would be advantageous.
274
-
275
- For whichever API changes are implemented at a given time, a phased approach is possible under the hood:
276
-
277
- 1) To start, constrained versions of a given relation can be implemented as shallow filters on the existing implementation of the full relation. This should allow for rapid, relatively inexpensive implementation of the API to pull in breakage, and for testing, feedback, and iteration.
278
-
279
- 2) Later, guided by customer need and given developer bandwidth, implementations specific to each constrained version can be written, enabling a level of (relatively predictable, reliable) performance that could not be achieved through demand transformation.
280
-
281
- ### Risks/downsides
282
-
283
- 1) This design does increase the level of complexity of the API. Excepting non-scalable relations, though, if the customer does not need that complexity (need the additional performance and control that it provides), they need not be aware of or manage it.
284
-
285
- 2) This design does ultimately require more implementations backing a given relation. That cost only need be paid, though, if/when the additional performance those tailored implementations provide is desired.
286
-
287
- 3) This design is less magical than a mechanism like demand transformation. On the other hand, that's also an upside, as are the associated performance, predictability, and reliability.
288
-
289
- 4) Reduction in reuse of relations: Everywhere the customer calls, e.g., `... = graph.distance()`, their computation hits the same, single computed relation; reuse is maximal. If the customer instead calls, e.g., `foo = graph.distance(from=some_nodes)`, the logic and computed relation are specialized to the `some_nodes` relation. The customer gets reuse everywhere they consume `foo`, but not with other `graph.distance` calls such as `graph.distance()` or `graph.distance(from=other_nodes)`. Whether the reuse or constraint is more advantageous depends on the use case, requiring some understanding and thought on part of the customer.
290
-
291
- ### Before details, what needs deciding?
292
-
293
- #### Questions we need to answer now
294
-
295
- 1) Overall design, yay or nay?
296
-
297
- *Reviewer sentiments*
298
-
299
- - Ryan Gao: yay
300
- - David Sanders: yay
301
- - Huda Nassar: yay
302
- - Kenton van Perseum: yay
303
-
304
- *Conclusion*
305
-
306
- As of this writing, appears yay.
307
-
308
- 2) Which option for handling non-scalable relations?
309
-
310
- *Reviewer sentiments*
311
-
312
- Please see dedicated section for reviewer sentiments.
313
-
314
- *Conclusion*
315
-
316
- At time of this writing, it looks like constructor-level control (error, warn, allow) plus call-site control (local override).
317
-
318
- #### Questions that don't need to be answered in full now, but maybe partly
319
-
320
- 1) Do we want to expose different permutations of relations that aren't symmetric? Must be at least partially answered at this time, informing whether we, e.g., name the relevant relation `distance` or `distance_from` (to be paired with `distance_to`).
321
-
322
- *Reviewer sentiments*
323
-
324
- - David Sanders: nay
325
- - Ryan Gao: nay
326
- - Huda Nassar: nay
327
- - Kenton van Perseum: nay
328
-
329
- *Conclusion*
330
-
331
- Consensus seems to be that the likely marginal potential performance upside is not worth the complexity and potential for confusion.
332
-
333
- 2) Which questions do we want to support? (Also see details below.) Can mostly be answered over time, informed by customer need/feedback.
334
-
335
- ### Relation-by-relation workthrough
336
-
337
- Let's work through all relations in the graph library to develop a clearer sense of what this proposal entails. Those relations logically group by their handling under this design; we'll work through them in those logical groups, roughly from most straightforward to most tricky.
338
-
339
- #### Logical groups
340
-
341
- At a high level, there are two groups of relations:
342
- 1) relations that can only be computed in full; and
343
- 2) relations that can be computed in part.
344
-
345
- This proposal does not apply to relations in group (1), which includes:
346
- - num_nodes
347
- - num_edges
348
- - num_triangles
349
- - is_connected
350
- - diameter_range
351
- - pagerank
352
- - eigenvector_centrality
353
- - betweenness_centrality
354
- - average_clustering_coefficient
355
- - louvain
356
- - infomap
357
- - label_propagation
358
- - triangle_community
359
-
360
- Group (2) consists of the following subgroups:
361
-
362
- (2a): binary relations mapping each node to a single value
363
- - degree
364
- - indegree
365
- - outdegree
366
- - weighted_degree
367
- - weighted_indegree
368
- - weighted_outdegree
369
- - degree_centrality
370
- - local_clustering_coefficient
371
- - triangle_count
372
-
373
- (2b): binary relations mapping each node to a nominally small collection of nodes
374
- - neighbor
375
- - inneighbor
376
- - outneighbor
377
-
378
- (2c): ternary relations mapping pairs of nodes to single values, symmetric in nodes
379
- - adamic_adar
380
- - jaccard_similarity
381
- - cosine_similarity
382
- - preferential_attachment
383
-
384
- (2d): ternary relation mapping pairs of nodes to a nominally small collection of nodes
385
- - common_neighbor
386
-
387
- (2e): binary relation mapping each node to an identifier, but somewhat special
388
- - weakly_connected_component
389
-
390
- (2f): binary relation mapping each node to a nominally large collection of nodes
391
- - reachable_from
392
-
393
- (2g): ternary relation mapping pairs of nodes to a single value, asymmetric in nodes
394
- - distance
395
-
396
- (2h): ternary relations of nodes, somewhat special
397
- - triangle
398
- - unique_triangle
399
-
400
- #### Group (2a): binary relations mapping each node to a single value
401
- - degree
402
- - indegree
403
- - outdegree
404
- - weighted_degree
405
- - weighted_indegree
406
- - weighted_outdegree
407
- - degree_centrality
408
- - local_clustering_coefficient
409
- - triangle_count
410
-
411
- These are binary relations that map each node to a single associated value. Each tuple is efficiently computable separately.
412
-
413
- ##### Recommendation
414
-
415
- At this time, do nothing, retaining present behavior of computing the full relation. When customers need and/or developer bandwidth allows, extend the method for each of these relations to support constraint on the set of nodes for which the relation is computed, e.g. via
416
- ```
417
- graph.degree() # Yields the full relation, with reuse across .degree() calls.
418
- graph.degree(nodes=Relationship) # Yields a relation specialized to the identified nodes.
419
- ```
420
-
421
- ##### Rationale
422
-
423
- ###### Computing the full relation
424
-
425
- The number of tuples in each such relation being the number of nodes, and the cost of computing each tuple being small, computing these relations in full is reasonable even for large graphs. Consequently it seems reasonable to continue making that easy, e.g. allow `graph.degree()` to compute the full relation. Moreover, `graph.degree()` returning the full relation enables reuse; chances are this should be the default mode of use.
426
-
427
- ###### Computing subsets
428
-
429
- **Is there is value in allowing customers to constrain these computations?**
430
-
431
- Yes. Customers have made it clear that, with at least some of these relations (e.g. *degree), they want to be able to compute/lookup subsets of tuples in these relations without paying the cost of computing (or precomputing) the full relation.
432
-
433
- **What questions may be worth supporting?**
434
-
435
- Let's consider the common structure `relation(node, value)`. We can efficiently compute the result for constrained `node`. We cannot, in general, compute the result for constrained `value` meaningfully more efficiently than computing the entire relation. We cannot, in general, compute the result for `(node, value)` pairs meaningfully more efficiently than computing the tuple for `node`, i.e. constraining `node`. (There are special cases worth considering, e.g. for `value` zero or one, but such functionality would better be exposed through separate, dedicated relations such as `leaf_node`, `root_node`, `isolated_node`, and similar.) This makes constraining `node` potentially worth supporting, but probably not other constraints.
436
-
437
- #### Group (2b): binary relations mapping each node to a nominally small collection of nodes
438
-
439
- - neighbor
440
- - inneighbor
441
- - outneighbor
442
-
443
- These are binary relations that map each of the subset of nodes with [in/out]neighbors to the nominally small collection of nodes constituting those [in/out]neighbors. Each tuple is efficiently computable separately.
444
-
445
- ##### Recommendation
446
-
447
- At this time, do nothing, retaining present behavior of computing the full relation. When customers need and/or developer bandwidth allows, extend the method for each of these relations to support constraint on the set of (non-neighbor / first argument) nodes for which the relation is computed, e.g. via
448
- ```
449
- graph.[in,out]neighbor() # Yields the full relation, with reuse across .[in,out]neighbor() calls.
450
- graph.[in,out]neighbor(nodes=Relationship) # Yields a relation specialized to the identified nodes.
451
- ```
452
-
453
- ##### Rationale
454
-
455
- ###### Computing the full relation
456
-
457
- The number of tuples in each such relation is typically proportional to the number of edges in the graph, and the cost of computing each tuple being small, computing these relations in full is typically reasonable even for large graphs. In other words, if we can work with the graph in full (i.e. its edge-list scale), we can work with these relations in full. Consequently it seems reasonable to continue making that easy, e.g. allow `graph.[in/out]neighor()` to compute the full relation. Moreover, `graph.[in/out]neighbor()` returning the full relation enables reuse; chances are this should be the default mode of use.
458
-
459
- ###### Computing subsets
460
-
461
- **Is there value in allowing customers to constrain these computations?**
462
-
463
- Yes. Customers have made it clear that they want to be able to compute/lookup subsets of tuples in these relations without paying the cost of computing (or precomputing) the full relation.
464
-
465
- **What questions may be worth supporting?**
466
-
467
- Let's consider the common structure `[in/out]neighbor(node, neigh)`. If we compute and cache a reversed edge list, we can efficiently compute the result for either of constrained `node` or `neigh` for any of these relations. Computing coupled `(node, neigh)` constrained subsets can be more efficient than either of the former in some cases, but it's not clear whether that efficiency improvement is meaningful or common enough to justify exposure.
468
-
469
- Looking up the set of [in/out]neighbors for a given node is pretty common, i.e. constraining `node` seems reasonable. What about constraining `neigh`? `neighbor` is symmetric, so contraining `neigh` is redundant there. Computing the set of `node`s for given `neigh` in {in,out}neighbor is equivalent to computing the set of `neigh`s for given `node` in {out,in}neighbor, making this constraint somewhat redundant as well.
470
-
471
- In sum, supporting constraint of `node` in each of these relations seems worthwhile, but other constraints are likely to be low value. Can be implemented piecemeal guided by customer feedback if need be.
472
-
473
- #### Group (2c): ternary relations mapping pairs of nodes to single values, symmetric in nodes
474
-
475
- - adamic_adar
476
- - jaccard_similarity
477
- - cosine_similarity
478
- - preferential_attachment
479
-
480
- These are ternary relations that map pairs of nodes to a single value. Each tuple is efficiently computable separately. Additionally, each of these relations is symmetric in the nodes in each tuple, i.e. `relation(u, v, f)` biconditionally implies `relation(v, u, f)`.
481
-
482
- ##### Recommendation
483
-
484
- At this time, add the TBD guard/pause-ACK mechanism for non-scalable relations to calls without constraints, e.g. `graph.cosine_similarity()`.
485
-
486
- Consider supporting constraint on `u`, on both `u` and `v` separately, and `u` and `v` jointly as pairs, in that priority order. E.g., modulo bikeshedding:
487
- ```
488
- graph.cosine_similarity(from=Relationship)
489
- graph.cosine_similarity(from=Relationship, to=Relationship)
490
- graph.cosine_similarity(pairs=Relationship)
491
- ```
492
- (Note that `from` and `to` don't seem quite right given the symmetry. Suggestions appreciated.)
493
-
494
- ##### Rationale
495
-
496
- ###### Computing the full relation
497
-
498
- The number of tuples in each such relation can approach, or in some cases (`preferential_attachment`) always is, quadratic in the number of nodes. Over the graphs that we've encountered in practice, computing these relations in full for large graphs tends to be intractable or exorbitantly expensive, and should be guarded against.
499
-
500
- ###### Computing subsets
501
-
502
- **Is there value in allowing customers to constrain these computations?**
503
-
504
- Yes. Computing subsets of these relations is the nominal mode of exercise.
505
-
506
- **What questions may be worth supporting?**
507
-
508
- Let's consider the common structure `relation(u, v, f)`.
509
-
510
- Given the symmetry of these relations, it makes sense to constrain `u`, or both `u` and `v` separately, or both `u` and `v` as `(u, v)` pairs, but not to constrain `v` alone (redundant with constraining `u` alone). We can efficiently compute all three of these constraint types, and all three seem common. We should probably support all three, and at least the first two out of the gate.
511
-
512
- What about constraining `f`, or combinations of `f` and `v`? Some questions along these lines -- e.g., give me all the nodes whose cosine similarity to a given node is 1.0 -- may not be uncommon. And it's possible that we can compute some of them more efficiently than constraining on `u` and `v` and then filtering on `f` post. But this becomes tricky very fast, is likely out of scope for the foreseeable future, and likely can be added in a non-breaking way, so let's defer.
513
-
514
- ### Group (2d): ternary relation mapping pairs of nodes to a nominally small collection of nodes
515
- - common_neighbor
516
-
517
- This is a ternary relationship that maps pairs of nodes to a set of associated nodes. Each tuple (or subset of tuples for a given leading pair of nodes) is efficiently computable separately. Additionally, this relation is symmetric in the leading pairs of nodes in each tuple, i.e. `common_neighbor(u, v, w)` biconditionally implies `common_neighbor(v, u, w)`.
518
-
519
- ##### Recommendation
520
-
521
- At this time, add the TBD guard/pause-ACK mechanism for non-scalable relations to calls without constraints (i.e. `graph.common_neighbor()`).
522
-
523
- Consider supporting constraint on `u`, on both `u` and `v` separately, and `u` and `v` jointly as pairs, in that priority order. E.g., modulo bikeshedding:
524
- ```
525
- graph.common_neighbor(from=Relationship)
526
- graph.common_neighbor(from=Relationship, to=Relationship)
527
- graph.common_neighbor(pairs=Relationship)
528
- ```
529
- (Note that `from` and `to` don't seem quite right given the symmetry. Suggestions appreciated.)
530
-
531
- Only implement constraint on all of `u`, `v`, and `w` if customers indicate need.
532
-
533
- ##### Rationale
534
-
535
- ###### Computing the full relation
536
-
537
- The number of tuples in this relation can scale quadratically in the number of edges or even cubically in the number of nodes in the graph. Computing this relation in full for large graphs is often intractable or exorbitantly expensive.
538
-
539
- ###### Computing subsets
540
-
541
- **Is there value in allowing customers to constrain these computations?**
542
-
543
- Yes. The nominal mode of exercise of this relation is over a subset of the possible tuples.
544
-
545
- **What questions may be worth supporting?**
546
-
547
- Let's consider `common_neighbor(u, v, w)`.
548
-
549
- Given the symmetry of this relation, it makes sense to constrain `u`, or both `u` and `v` separately, or both `u` and `v` as `(u, v)` pairs, or all of `u`, `v`, and `w` separately, or as a triplet `(u, v, w)`, but constraining `v` alone is redundant.
550
-
551
- Does it make sense to constrain `w` alone? That's equivalent to asking for the outer product of `w`'s neighbors with that same set, so not really.
552
-
553
- Does it make sense to constrain `w` and `u` without `v` (or `w` and `v` without `u`), separately or jointly? Those are equivalent to asking for the neighbors of `w` in a weird way, so not really.
554
-
555
- We can efficiently compute constraining `u`, constraining both `u` and `v` separately, constraining both `u` and `v` as a pair `(u, v)`, and constraining `u`, `v`, and `w` as a triplet `(u, v, w)`. Constraining `u`, `v`, and `w` separately may be more efficiently computable than computing `u` and `v` separately and then filtering on `w`, but to the degree that it is more efficient, it may not be much more efficient, and the question may not be common.
556
-
557
- #### Group (2e): binary relation mapping each node to an identifier, but somewhat special
558
- - weakly_connected_component
559
-
560
- This is a binary relationship that maps each node to a single value, where that single value happens to be another node identifying the leading node's weakly connected component.
561
-
562
- ##### Recommendation
563
-
564
- At this time, do nothing, retaining present behavior of computing the full relation. If customers need and developer bandwidth allows, consider extending this relation's method to support constraint on the set of nodes for which the relation is computed, e.g. via
565
- ```
566
- graph.weakly_connected_component() # Yields the full relation, with reuse across .weakly_connected_component() calls.
567
- graph.weakly_connected_component(nodes=Relationship) # Yields a relation specialized to the identified nodes.
568
- ```
569
-
570
- ##### Rationale
571
-
572
- ###### Computing the full relation
573
-
574
- The number of tuples in this relation matches the number of nodes in the graph. It can be computed in full fairly efficiently (at least in princple). Computing the full relation is the nominal mode of exercise.
575
-
576
- ###### Computing subsets
577
-
578
- **Is there value in allowing customers to constrain these computations?**
579
-
580
- Possibly, yes. May need more customer insight.
581
-
582
- **What questions may be worth supporting?**
583
-
584
- Constraining on the node (first argument, as opposed to component identifier, i.e. second argument) is likely the only constraint that makes sense in practice.
585
-
586
- Computing a given tuple requires computation of all tuples associated with that given tuple's weakly connected component. This means there can be no upside to constraint (if the graph is weakly connected), or substantial upside to constraint (if it contains many separate components). In many cases the benefit of reuse makes constraint fraught, but in other use cases not using constraint will be fraught. Allowing constraint may be worthwhile as such.
587
-
588
- #### Group (2f): binary relation mapping each node to a nominally large collection of nodes
589
- - reachable_from
590
-
591
- This is a binary relation that maps each of the subset of nodes with (out)edges to the collection of nodes that can be reached from that node. This relation is not symmetric.
592
-
593
- ##### Recommendation
594
-
595
- At this time, add the TBD guard/pause-ACK mechanism for non-scalable relations to calls without constraints (i.e. `graph.reachable_from()`). Depending on decision on whether to expose different permutations of asymmetric relations, potentially rename accordingly.
596
-
597
- Consider supporting constraint on `u` (for `reachable_from(u, v)`), constraint on `v`, constraint on both `u` and `v` separately, and constraint on `u` and `v` jointly as pairs, in that priority order. E.g., modulo bikeshedding:
598
- ```
599
- graph.reachable(from=Relationship)
600
- graph.reachable(to=Relationship)
601
- graph.reachable(from=Relationship, to=Relationship)
602
- graph.reachable(pairs=Relationship)
603
- ```
604
-
605
- ###### Other notes
606
-
607
- Regarding asymmetry: In some cases `reachable_from(u, v)` is needed (or more descriptively, `nodes_downstream_of(u, v)`), and in other cases, say, `nodes_upstream_of(v, u)`. Not sure whether we want to separate those, given that generating one from the other is inexpensive. Perspectives appreciated.
608
-
609
- In any case, if we add keyword arguments `from`, `to`, and `pairs`, or similar, the `_from` in `reachable_from` no longer seems right. Might want to call this something else. `reach` or `reachability` may be the most common terms, modulo (non-reflexive) `transitive_closure` outside the graph-specific world.
610
-
611
- ##### Rationale
612
-
613
- ###### Computing the full relation
614
-
615
- The number of tuples in this relation can approach quadratic in the number of nodes in the graph, and computing it in full is often exorbitantly expensive or intractable for large graphs.
616
-
617
- ###### Computing subsets
618
-
619
- **Is there value in allowing customers to constrain these computations?**
620
-
621
- Yes. The nominal mode of exercise of this relation is with constraint to a subset of the possible tuples.
622
-
623
- **What questions may be worth supporting?**
624
-
625
- Let's consider `reachable_from(u, v)`.
626
-
627
- It makes sense to constrain `u`, or `v`, or both `u` and `v` separately, or both `u` and `v` as `(u, v)` pairs; these all seem like reasonable and fairly common questions.
628
-
629
- Efficiency perspective: Computing the full relation is `O(edges*(edges + nodes))` IIRC. Constraining `u` requires `O(|u|*(edges + nodes))` or so, and likewise for constraining `v`. Constraining `u` and `v` to pairs `(u, v)` requires roughly `O(|(u,v)|*(edges + nodes))` in the worst case, but in practice early termination can reduce that to roughly `|(u, v)|*O(distance(u, v))`. Constraining `u` and `v` separately hypothetically can be done a bit more efficiently than constraining `u` only. Each of these potentially has merit and could warrant its own implementation.
630
-
631
- #### Group (2g): ternary relation mapping pairs of nodes to a single value, asymmetric in nodes
632
- - distance
633
-
634
- This is a ternary relation that maps pairs of nodes to the distance between them. This relation is not symmetric in its leading pair of nodes.
635
-
636
- ##### Recommendation
637
-
638
- At this time, add the TBD guard/pause-ACK mechanism for non-scalable relations to calls without constraints (i.e. `graph.distance()`). Depending on decision on whether to expose different permutations of asymmetric relations, potentially rename accordingly.
639
-
640
- Consider supporting constraint on `u` (for `distance(u, v, d)`), constraint on `v`, constraint on both `u` and `v` separately, and constraint on `u` and `v` jointly as pairs, in that priority order. E.g., modulo bikeshedding:
641
- ```
642
- graph.distance(from=Relationship)
643
- graph.distance(to=Relationship)
644
- graph.distance(from=Relationship, to=Relationship)
645
- graph.distance(pairs=Relationship)
646
- ```
647
-
648
- Questions related to constraint on `d` probably warrant a separate method if supported; can be driven by customer demand.
649
-
650
- ##### Other notes:
651
-
652
- Regarding the asymmetry: In some cases `distance_from(u, v)` is needed, and in other cases, say, `distance_to(v, u)`. Not sure whether we want to separate those, given that generating one from the other is inexpensive. Perspectives appreciated.
653
-
654
- ##### Rationale
655
-
656
- ###### Computing the full relation
657
-
658
- The number of tuples in this relation can approach quadratic in the number of nodes in the graph, and computing it in full is often exorbitantly expensive or intractable for large graphs.
659
-
660
- ###### Computing subsets
661
-
662
- **Is there value in allowing customers to constrain these computations?**
663
-
664
- Yes. The nominal mode of exercise of this relation is with constraint to a subset of the possible tuples.
665
-
666
- **What questions may be worth supporting?**
667
-
668
- Let's consider `distance(u, v, d)`.
669
-
670
- It makes sense to constrain `u`, or `v`, or both `u` and `v` separately, or both `u` and `v` as `(u, v)` pairs; these all seem like reasonable and fairly common questions.
671
-
672
- What about constraining `d`? Some questions corresponding to constraining `d` (in combination with `u` and `v`) seem worth supporting. Particularly, constraining `u` and `d` is equivalent to asking "what is the d-shell of nodes from `u`", similar to looking for a ball around `u` with a given radius. `v` and `d` yields a similar question, for inbound paths. But it's probably best to expose these questions via other/dedicated relations/methods, as with `ball`, e.g. `shell`. Apart from that, constraining all of `u`, `v`, and `d` at once isn't particularly useful given constraint of `u` and `v` to a pair `(u, v)`, and constraining just `d` doesn't seem like a very common question ("yield all pairs of nodes in the graph separated by distance `d`").
673
-
674
- #### Group (2h): ternary relations of nodes, somewhat special
675
- - triangle
676
- - unique_triangle
677
-
678
- These are ternary relations of nodes that include all permutations (`triangle`) or unique ordered permutations (`unique_triangle`) of triangles in the graph.
679
-
680
- ##### Recommendation
681
-
682
- Go outside.
683
-
684
- ###### `unique_triangle`
685
-
686
- At this time, add the TBD guard/pause-ACK mechanism for non-scalable relations to calls without constraints. Do not plan to add calls with constraints later, in favor of adding them to `triangle` only.
687
-
688
- ###### `triangle`
689
-
690
- At this time, add the TBD guard/pause-ACK mechanism for non-scalable relations to calls without constraints (i.e. `graph.triangle()`).
691
-
692
- Consider supporting constraint on `u` (for `triangle(u, v, w)`), constraint on pairs `(u, v)`, and constraint on triples `(u, v, w)`. E.g., modulo bikeshedding:
693
- ```
694
- graph.triangle(nodes=Relationship)
695
- graph.triangle(pairs=Relationship)
696
- graph.distance(triples=Relationship)
697
- ```
698
-
699
- ##### Rationale
700
-
701
- It's nice outside.
702
-
703
- ###### Computing the full relation
704
-
705
- Worst case number of triangles, and ergo tuples, is O(|E|^{3/2}), and likewise with computational complexity. Typical case is near linear. Computing the full relation is not an unreasonable ask in many cases, but can explode in others. Tricky middle-ground case. Perhaps best to err on the side of caution and require a pause-ACK, as long as the ACK mechanism remains easy to exercise.
706
-
707
- ###### Computing subsets
708
-
709
- **Is there value in allowing customers to constrain these computations?**
710
-
711
- Yes. For example, being able to ask "does this triplet of nodes form a triangle?" is a common question.
712
-
713
- **What questions may be worth supporting?**
714
-
715
- Let's consider `[unique_]triangle(u, v, w)`.
716
-
717
- It's common to ask "is this triple a triangle?", which suggests allowing `triangle(triple=Relationship)` or similar.
718
-
719
- It's common to ask "what triangles include this given node / these given nodes?", which suggests allowing `triangle(node=Relationship)` or similar.
720
-
721
- It may not be uncommon to ask "what nodes form triangles with this given pair of nodes / these given pairs of nodes?", which suggests allowing `triangle(pair=Relationship)`.
722
-
723
- Each of the three preceding questions can be answered meaningfully more efficiently than computing all triangles. Each of the three preceding questions is most relevant to `triangle`; asking them same questions through `unique_triangle` doesn't seem ergonomic, though it is possible.
724
-
725
- #### Groups that don't exist yet but are worth mentioning
726
-
727
- As of this writing, the library does not contain any unary relations that contain more than one tuple. But chances are it will at some point, e.g. `leaf_node`, `root_node`, `isolated_node`, and similar. Chances are these will all be of a kind where it's reasonable to compute the full relation without guards, to which the ability to constrain the computation can be added later if need be.
728
-
729
- # Appendix
730
-
731
- ## A recent customer problem worked under this design
732
-
733
- A customer ran into issues making a computation along the lines of the following efficient. The computation was roughly: For a given node, compute the set of nodes downstream (variant one) or upstream (variant two) of that node, and filter those nodes for (one case) leaves and (another case) roots. Under this design, specifically with at least constructor-level controls on allowing non-scalable relations with minimal friction, what would that look like, both in prototyping (low floor perspective) and refinement for production (high ceiling perspective)?
734
-
735
- Let's focus on variant one, and to make the example a bit more challenging and interesting, let's allow multiple seed nodes.
736
-
737
- Low floor use (prototyping):
738
- ```
739
- # Given a seed node, find the leaf nodes downstream of that seed node.
740
-
741
- model = Model(...)
742
- graph = Graph(model, ..., nonscalable='allow')
743
-
744
- seeds = model.Relationship("{Node} is of interest")
745
- where(... graph.Node is of interest ...).define(seeds(graph.Node))
746
-
747
- reachable = graph.reachable()
748
- outdegree = graph.outdegree()
749
-
750
- leaves_reachable_from_seed = model.Relationship("seed {Node} can reach leaf {Node}")
751
-
752
- seed_node = graph.Node.ref()
753
- reachable_leaf_node = graph.Node.ref()
754
-
755
- define(
756
- leaves_reachable_from_seed(seed_node, reachable_leaf_node)
757
- ).where(
758
- seeds(seed_node),
759
- reachable(seed_node, reachable_leaf_node),
760
- outdegree(reachable_leaf_node, 0)
761
- )
762
- ```
763
- In prototyping / low-floor use, the user need not bother with constraints, definition and propagation of demand sets, and with `nonscalable='allow'` friction using non-scalable relations is minimized. On the other hand, `graph.reachable()` and `graph.outdegree()` are computed in full; i.e. this formulation is not efficient unless the cost of computing the reachable and outdegree relations in full is amortized over many instances of the above query (and that assumes that computiong the reachable relation is tractable at all).
764
-
765
- High ceiling use (refinement for production):
766
- ```
767
- # Given a seed node, find the leaf nodes downstream of that seed node.
768
-
769
- model = Model(...)
770
- graph = Graph(model, ...)
771
-
772
- seeds = model.Relationship("{Node} is of interest")
773
- where(... graph.Node is of interest ...).define(seeds(graph.Node))
774
-
775
- reachable_from_seed = graph.reachable(from=seeds)
776
- # yields Relationship("seed {Node} can reach {Node}")
777
-
778
- reached_node = graph.Node.ref()
779
- reached_nodes = model.Relationship("{Node} can be reached from some seed")
780
- define(reached_nodes(reached_node)).where(reachable_from_seeds(graph.Node, reached_node))
781
-
782
- reached_node_outdegree = graph.outdegree(of=reached_nodes)
783
- # yields Relationship("{Node} reachable from some seed has {outdegree:Integer}")
784
-
785
- leaves_reachable_from_seed = model.Relationship("seed {Node} can reach leaf {Node}")
786
-
787
- seed_node = graph.Node.ref()
788
- reached_leaf_node = graph.Node.ref()
789
-
790
- define(
791
- leaves_reachable_from_seed(seed_node, reached_leaf_node)
792
- ).where(
793
- reachable_from_seed(seed_node, reached_leaf_node),
794
- reached_node_outdegree(reached_leaf_node, 0)
795
- )
796
- ```
797
- In refinement for production / high-ceiling use, the user must specify constraints, and define and propagate demand sets. On the other hand, only relevant subsets of `graph.reachable()` and `graph.outdegree()` are computed; i.e. this formulation is relatively efficient, and is computable even if the reachable relation is not tractable to comptue in full.