qseal 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. qseal-0.1.0/.github/workflows/ci.yml +56 -0
  2. qseal-0.1.0/.gitignore +16 -0
  3. qseal-0.1.0/CHANGELOG.md +28 -0
  4. qseal-0.1.0/CONTRIBUTING.md +45 -0
  5. qseal-0.1.0/LICENSE +21 -0
  6. qseal-0.1.0/PKG-INFO +270 -0
  7. qseal-0.1.0/README.md +241 -0
  8. qseal-0.1.0/docker/sqlsolver-smoke.Dockerfile +20 -0
  9. qseal-0.1.0/docs/architecture.md +61 -0
  10. qseal-0.1.0/docs/artifacts.md +303 -0
  11. qseal-0.1.0/docs/caching-and-trajectories.md +83 -0
  12. qseal-0.1.0/docs/candidate-evidence-ci.md +144 -0
  13. qseal-0.1.0/docs/ci.md +113 -0
  14. qseal-0.1.0/docs/duckdb-fixtures.md +38 -0
  15. qseal-0.1.0/docs/github-actions.md +195 -0
  16. qseal-0.1.0/docs/llm-candidates.md +210 -0
  17. qseal-0.1.0/docs/performance-evidence.md +219 -0
  18. qseal-0.1.0/docs/product-demo.md +218 -0
  19. qseal-0.1.0/docs/qed-spike.md +76 -0
  20. qseal-0.1.0/docs/real-project-evaluation.md +299 -0
  21. qseal-0.1.0/docs/rewrite-environment.md +78 -0
  22. qseal-0.1.0/docs/rewrite-policy-gym.md +116 -0
  23. qseal-0.1.0/docs/roadmap.md +79 -0
  24. qseal-0.1.0/docs/scope.md +308 -0
  25. qseal-0.1.0/docs/search-baselines.md +75 -0
  26. qseal-0.1.0/docs/sqlsolver-spike.md +152 -0
  27. qseal-0.1.0/docs/task-corpus.md +397 -0
  28. qseal-0.1.0/docs/verieql-spike.md +142 -0
  29. qseal-0.1.0/examples/benchmark/original.sql +2 -0
  30. qseal-0.1.0/examples/benchmark/rewritten.sql +2 -0
  31. qseal-0.1.0/examples/benchmark/setup.sql +5 -0
  32. qseal-0.1.0/examples/candidates/manual/001_manual_distinct_removed.sql +2 -0
  33. qseal-0.1.0/examples/candidates/manual/metadata.json +13 -0
  34. qseal-0.1.0/examples/candidates/original.sql +2 -0
  35. qseal-0.1.0/examples/candidates/schema.yml +7 -0
  36. qseal-0.1.0/examples/dbt/distinct.sql +2 -0
  37. qseal-0.1.0/examples/dbt/not_null.sql +3 -0
  38. qseal-0.1.0/examples/dbt/schema.yml +9 -0
  39. qseal-0.1.0/examples/dbt_project/models/dim_users.sql +2 -0
  40. qseal-0.1.0/examples/dbt_project/models/fact_orders.sql +3 -0
  41. qseal-0.1.0/examples/dbt_project/models/marts/positive_orders.sql +6 -0
  42. qseal-0.1.0/examples/dbt_project/models/schema.yml +21 -0
  43. qseal-0.1.0/examples/dbt_project/target/compiled/qseal/models/dim_users.sql +2 -0
  44. qseal-0.1.0/examples/dbt_project/target/compiled/qseal/models/fact_orders.sql +3 -0
  45. qseal-0.1.0/examples/dbt_project/target/compiled/qseal/models/marts/positive_orders.sql +6 -0
  46. qseal-0.1.0/examples/distinct/original.sql +2 -0
  47. qseal-0.1.0/examples/distinct/original_where.sql +3 -0
  48. qseal-0.1.0/examples/distinct/rewritten.sql +2 -0
  49. qseal-0.1.0/examples/distinct/rewritten_where.sql +3 -0
  50. qseal-0.1.0/examples/distinct/schema.yml +7 -0
  51. qseal-0.1.0/examples/join_elimination/original.sql +3 -0
  52. qseal-0.1.0/examples/join_elimination/rewritten.sql +2 -0
  53. qseal-0.1.0/examples/join_elimination/schema.yml +4 -0
  54. qseal-0.1.0/examples/predicate_pushdown/original.sql +6 -0
  55. qseal-0.1.0/examples/predicate_pushdown/rewritten.sql +3 -0
  56. qseal-0.1.0/examples/product_demo/README.md +89 -0
  57. qseal-0.1.0/examples/product_demo/candidates/001_remove_distinct.sql +2 -0
  58. qseal-0.1.0/examples/product_demo/candidates/002_filter_rows.sql +3 -0
  59. qseal-0.1.0/examples/product_demo/candidates/metadata.json +18 -0
  60. qseal-0.1.0/examples/product_demo/dbt_project/models/dim_users.sql +2 -0
  61. qseal-0.1.0/examples/product_demo/dbt_project/models/fct_orders.sql +8 -0
  62. qseal-0.1.0/examples/product_demo/dbt_project/models/fct_orders_fk.sql +8 -0
  63. qseal-0.1.0/examples/product_demo/dbt_project/models/schema.yml +26 -0
  64. qseal-0.1.0/examples/product_demo/original.sql +2 -0
  65. qseal-0.1.0/examples/product_demo/setup.sql +5 -0
  66. qseal-0.1.0/examples/unsupported/join.sql +3 -0
  67. qseal-0.1.0/pyproject.toml +78 -0
  68. qseal-0.1.0/scripts/benchmark_proven_candidates.py +41 -0
  69. qseal-0.1.0/scripts/compare_real_project_reports.py +48 -0
  70. qseal-0.1.0/scripts/evaluate_real_projects.sh +227 -0
  71. qseal-0.1.0/scripts/explain_proven_candidates.py +33 -0
  72. qseal-0.1.0/scripts/generate_llm_candidates.py +44 -0
  73. qseal-0.1.0/scripts/modal_benchmark.py +137 -0
  74. qseal-0.1.0/scripts/modal_verify.py +182 -0
  75. qseal-0.1.0/scripts/package_smoke.sh +172 -0
  76. qseal-0.1.0/scripts/qed_spike_unknowns.py +84 -0
  77. qseal-0.1.0/scripts/run_llm_verification_sqlsolver.sh +99 -0
  78. qseal-0.1.0/scripts/run_qseal_sqlsolver_candidate_smoke.sh +73 -0
  79. qseal-0.1.0/scripts/run_qseal_sqlsolver_fixture.sh +71 -0
  80. qseal-0.1.0/scripts/run_qseal_sqlsolver_pair.sh +49 -0
  81. qseal-0.1.0/scripts/run_sqlsolver_container_smoke.sh +175 -0
  82. qseal-0.1.0/scripts/run_sqlsolver_fixture.sh +116 -0
  83. qseal-0.1.0/scripts/run_verieql_spike.sh +37 -0
  84. qseal-0.1.0/scripts/sqlsolver_command.sh +21 -0
  85. qseal-0.1.0/scripts/verieql_driver.py +62 -0
  86. qseal-0.1.0/scripts/verieql_spike.py +121 -0
  87. qseal-0.1.0/scripts/verify_llm_candidates.py +77 -0
  88. qseal-0.1.0/src/qseal/__init__.py +3 -0
  89. qseal-0.1.0/src/qseal/benchmark/__init__.py +36 -0
  90. qseal-0.1.0/src/qseal/benchmark/duckdb.py +476 -0
  91. qseal-0.1.0/src/qseal/benchmark/model.py +81 -0
  92. qseal-0.1.0/src/qseal/benchmark/snowflake.py +506 -0
  93. qseal-0.1.0/src/qseal/benchmark/snowflake_suite.py +855 -0
  94. qseal-0.1.0/src/qseal/cache.py +54 -0
  95. qseal-0.1.0/src/qseal/candidates/__init__.py +1 -0
  96. qseal-0.1.0/src/qseal/candidates/benchmarking.py +246 -0
  97. qseal-0.1.0/src/qseal/candidates/bundle.py +34 -0
  98. qseal-0.1.0/src/qseal/candidates/evidence.py +198 -0
  99. qseal-0.1.0/src/qseal/candidates/explain.py +252 -0
  100. qseal-0.1.0/src/qseal/candidates/generation.py +439 -0
  101. qseal-0.1.0/src/qseal/candidates/verification.py +239 -0
  102. qseal-0.1.0/src/qseal/cli.py +3779 -0
  103. qseal-0.1.0/src/qseal/constraints/__init__.py +1 -0
  104. qseal-0.1.0/src/qseal/constraints/dbt_loader.py +223 -0
  105. qseal-0.1.0/src/qseal/constraints/loader.py +36 -0
  106. qseal-0.1.0/src/qseal/constraints/model.py +94 -0
  107. qseal-0.1.0/src/qseal/constraints/yaml_loader.py +38 -0
  108. qseal-0.1.0/src/qseal/corpora/__init__.py +11 -0
  109. qseal-0.1.0/src/qseal/corpora/duckdb-v1/corpus.yml +540 -0
  110. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-distinct-not-null-active-users.sql +3 -0
  111. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-distinct-not-null-events-user-range.sql +3 -0
  112. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-distinct-not-null-high-value-orders.sql +3 -0
  113. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-double-not-null-active-users.sql +3 -0
  114. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-double-not-null-events-user-range.sql +3 -0
  115. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-double-not-null-high-value-orders.sql +3 -0
  116. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-double-not-null-inverse-active-users.sql +3 -0
  117. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-double-not-null-inverse-events-user-range.sql +3 -0
  118. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-double-not-null-inverse-high-value-orders.sql +3 -0
  119. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-not-null-distinct-active-users.sql +3 -0
  120. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-not-null-distinct-events-user-range.sql +3 -0
  121. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-not-null-distinct-high-value-orders.sql +3 -0
  122. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/distinct-and-not-null-events.sql +3 -0
  123. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/distinct-and-not-null-orders.sql +3 -0
  124. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/distinct-and-not-null.sql +3 -0
  125. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/double-not-null-events.sql +3 -0
  126. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/double-not-null-orders.sql +3 -0
  127. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/double-not-null-users.sql +3 -0
  128. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/join-distinct-events-to-exists.sql +3 -0
  129. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/join-distinct-to-exists.sql +3 -0
  130. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/not-null-pushdown-events.sql +6 -0
  131. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/not-null-pushdown-orders.sql +6 -0
  132. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/not-null-pushdown-users.sql +6 -0
  133. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/predicate-pushdown-events-user.sql +6 -0
  134. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/predicate-pushdown-high-value-orders.sql +6 -0
  135. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/predicate-pushdown-user-status.sql +6 -0
  136. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/redundant-distinct-events.sql +2 -0
  137. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/redundant-distinct-orders.sql +2 -0
  138. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/redundant-distinct-users.sql +2 -0
  139. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/redundant-not-null-event-id.sql +3 -0
  140. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/redundant-not-null-order-id.sql +3 -0
  141. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/redundant-not-null-user-id.sql +3 -0
  142. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/unused-left-join-events-users.sql +3 -0
  143. qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/unused-left-join-users.sql +3 -0
  144. qseal-0.1.0/src/qseal/corpora/duckdb-v1/schema.yml +31 -0
  145. qseal-0.1.0/src/qseal/corpus/__init__.py +93 -0
  146. qseal-0.1.0/src/qseal/corpus/aggregate.py +373 -0
  147. qseal-0.1.0/src/qseal/corpus/inspect.py +353 -0
  148. qseal-0.1.0/src/qseal/corpus/loader.py +173 -0
  149. qseal-0.1.0/src/qseal/corpus/materialize.py +24 -0
  150. qseal-0.1.0/src/qseal/corpus/model.py +157 -0
  151. qseal-0.1.0/src/qseal/corpus/repeat.py +62 -0
  152. qseal-0.1.0/src/qseal/corpus/runner.py +586 -0
  153. qseal-0.1.0/src/qseal/corpus/summary.py +298 -0
  154. qseal-0.1.0/src/qseal/corpus/trajectories.py +327 -0
  155. qseal-0.1.0/src/qseal/dbt/__init__.py +1 -0
  156. qseal-0.1.0/src/qseal/dbt/git_diff.py +55 -0
  157. qseal-0.1.0/src/qseal/dbt/intake.py +276 -0
  158. qseal-0.1.0/src/qseal/dbt/jinja.py +108 -0
  159. qseal-0.1.0/src/qseal/dbt/project.py +152 -0
  160. qseal-0.1.0/src/qseal/dbt/scan.py +385 -0
  161. qseal-0.1.0/src/qseal/dialects.py +5 -0
  162. qseal-0.1.0/src/qseal/environment/__init__.py +25 -0
  163. qseal-0.1.0/src/qseal/environment/cache.py +288 -0
  164. qseal-0.1.0/src/qseal/environment/core.py +402 -0
  165. qseal-0.1.0/src/qseal/environment/model.py +53 -0
  166. qseal-0.1.0/src/qseal/environment/trajectory.py +87 -0
  167. qseal-0.1.0/src/qseal/evaluation.py +170 -0
  168. qseal-0.1.0/src/qseal/fixtures/__init__.py +4 -0
  169. qseal-0.1.0/src/qseal/fixtures/duckdb.py +230 -0
  170. qseal-0.1.0/src/qseal/fixtures/model.py +41 -0
  171. qseal-0.1.0/src/qseal/ir/__init__.py +1 -0
  172. qseal-0.1.0/src/qseal/ir/model.py +250 -0
  173. qseal-0.1.0/src/qseal/parser/__init__.py +1 -0
  174. qseal-0.1.0/src/qseal/parser/fragments.py +114 -0
  175. qseal-0.1.0/src/qseal/parser/sqlglot_parser.py +835 -0
  176. qseal-0.1.0/src/qseal/policy/__init__.py +77 -0
  177. qseal-0.1.0/src/qseal/policy/baseline.py +1867 -0
  178. qseal-0.1.0/src/qseal/report/__init__.py +1 -0
  179. qseal-0.1.0/src/qseal/report/diff.py +27 -0
  180. qseal-0.1.0/src/qseal/report/guards.py +105 -0
  181. qseal-0.1.0/src/qseal/report/json.py +251 -0
  182. qseal-0.1.0/src/qseal/report/markdown.py +205 -0
  183. qseal-0.1.0/src/qseal/report/patch.py +130 -0
  184. qseal-0.1.0/src/qseal/report/text.py +696 -0
  185. qseal-0.1.0/src/qseal/rewrites/__init__.py +1 -0
  186. qseal-0.1.0/src/qseal/rewrites/accepted_values_case.py +350 -0
  187. qseal-0.1.0/src/qseal/rewrites/accepted_values_filter.py +212 -0
  188. qseal-0.1.0/src/qseal/rewrites/base.py +40 -0
  189. qseal-0.1.0/src/qseal/rewrites/chain.py +140 -0
  190. qseal-0.1.0/src/qseal/rewrites/count_distinct.py +224 -0
  191. qseal-0.1.0/src/qseal/rewrites/distinct.py +125 -0
  192. qseal-0.1.0/src/qseal/rewrites/group_by_unique.py +196 -0
  193. qseal-0.1.0/src/qseal/rewrites/join_distinct_exists.py +190 -0
  194. qseal-0.1.0/src/qseal/rewrites/join_elimination.py +378 -0
  195. qseal-0.1.0/src/qseal/rewrites/not_null_filter.py +193 -0
  196. qseal-0.1.0/src/qseal/rewrites/predicate_pushdown.py +184 -0
  197. qseal-0.1.0/src/qseal/rewrites/registry.py +106 -0
  198. qseal-0.1.0/src/qseal/rewrites/subtree.py +72 -0
  199. qseal-0.1.0/src/qseal/search/__init__.py +22 -0
  200. qseal-0.1.0/src/qseal/search/algorithms.py +551 -0
  201. qseal-0.1.0/src/qseal/search/model.py +51 -0
  202. qseal-0.1.0/src/qseal/verifier/__init__.py +1 -0
  203. qseal-0.1.0/src/qseal/verifier/backends/__init__.py +27 -0
  204. qseal-0.1.0/src/qseal/verifier/backends/base.py +18 -0
  205. qseal-0.1.0/src/qseal/verifier/backends/builtin.py +63 -0
  206. qseal-0.1.0/src/qseal/verifier/backends/external.py +50 -0
  207. qseal-0.1.0/src/qseal/verifier/backends/external_contract.py +22 -0
  208. qseal-0.1.0/src/qseal/verifier/backends/qed.py +360 -0
  209. qseal-0.1.0/src/qseal/verifier/backends/sqlsolver.py +289 -0
  210. qseal-0.1.0/src/qseal/verifier/backends/verieql.py +448 -0
  211. qseal-0.1.0/src/qseal/verifier/check.py +219 -0
  212. qseal-0.1.0/src/qseal/verifier/model.py +18 -0
  213. qseal-0.1.0/src/qseal/verifier/pair_reduction.py +72 -0
  214. qseal-0.1.0/tests/fixtures/candidates/candidate_distinct_removed.sql +2 -0
  215. qseal-0.1.0/tests/fixtures/candidates/candidate_filtered.sql +3 -0
  216. qseal-0.1.0/tests/fixtures/candidates/original.sql +2 -0
  217. qseal-0.1.0/tests/fixtures/candidates/schema.yml +7 -0
  218. qseal-0.1.0/tests/fixtures/dbt_projects/jaffle_like/models/complex_cte.sql +6 -0
  219. qseal-0.1.0/tests/fixtures/dbt_projects/jaffle_like/models/customer_flags.sql +8 -0
  220. qseal-0.1.0/tests/fixtures/dbt_projects/jaffle_like/models/dim_users.sql +2 -0
  221. qseal-0.1.0/tests/fixtures/dbt_projects/jaffle_like/models/schema.yml +8 -0
  222. qseal-0.1.0/tests/fixtures/dbt_projects/jaffle_like/models/stg_customers.sql +10 -0
  223. qseal-0.1.0/tests/fixtures/dbt_projects/jaffle_like/models/unsupported_macro.sql +4 -0
  224. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/dbt_project.yml +5 -0
  225. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/accepted_values_orders.sql +10 -0
  226. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/customer_orders.sql +15 -0
  227. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/dim_users.sql +2 -0
  228. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/duplicate_orders.sql +6 -0
  229. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/grouped_cte_filter.sql +11 -0
  230. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/grouped_cte_join.sql +17 -0
  231. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/order_counts.sql +5 -0
  232. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/payment_pivot.sql +10 -0
  233. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/schema.yml +14 -0
  234. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/stg_payments.sql +8 -0
  235. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/accepted_values_orders.sql +10 -0
  236. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/customer_orders.sql +15 -0
  237. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/dim_users.sql +2 -0
  238. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/duplicate_orders.sql +6 -0
  239. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/grouped_cte_filter.sql +11 -0
  240. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/grouped_cte_join.sql +17 -0
  241. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/order_counts.sql +5 -0
  242. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/payment_pivot.sql +8 -0
  243. qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/stg_payments.sql +8 -0
  244. qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/dbt_project.yml +5 -0
  245. qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/dim_users_deduped.sql +2 -0
  246. qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/order_count_distinct.sql +2 -0
  247. qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/order_unique_rollup.sql +3 -0
  248. qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_composite_left_join_users.sql +8 -0
  249. qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_distinct_chain.sql +3 -0
  250. qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_fk_inner_join_users.sql +7 -0
  251. qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_left_join_users.sql +7 -0
  252. qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_not_null.sql +3 -0
  253. qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_pushdown.sql +6 -0
  254. qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_status_case.sql +6 -0
  255. qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_status_filter.sql +3 -0
  256. qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/schema.yml +42 -0
  257. qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/users_with_orders_exists.sql +4 -0
  258. qseal-0.1.0/tests/fixtures/solver_compat/cases.yml +25 -0
  259. qseal-0.1.0/tests/fixtures/solver_compat/cte_projection_attribution/original.sql +11 -0
  260. qseal-0.1.0/tests/fixtures/solver_compat/cte_projection_attribution/rewritten.sql +11 -0
  261. qseal-0.1.0/tests/fixtures/solver_compat/fk_inner_join/original.sql +3 -0
  262. qseal-0.1.0/tests/fixtures/solver_compat/fk_inner_join/rewritten.sql +2 -0
  263. qseal-0.1.0/tests/fixtures/solver_compat/join_distinct_exists/original.sql +3 -0
  264. qseal-0.1.0/tests/fixtures/solver_compat/join_distinct_exists/rewritten.sql +7 -0
  265. qseal-0.1.0/tests/fixtures/solver_compat/normalized_identity/original.sql +3 -0
  266. qseal-0.1.0/tests/fixtures/solver_compat/normalized_identity/rewritten.sql +3 -0
  267. qseal-0.1.0/tests/fixtures/solver_compat/redundant_distinct/original.sql +2 -0
  268. qseal-0.1.0/tests/fixtures/solver_compat/redundant_distinct/rewritten.sql +2 -0
  269. qseal-0.1.0/tests/fixtures/solver_compat/schema.yml +31 -0
  270. qseal-0.1.0/tests/fixtures/solver_compat/unsafe_distinct/original.sql +2 -0
  271. qseal-0.1.0/tests/fixtures/solver_compat/unsafe_distinct/rewritten.sql +2 -0
  272. qseal-0.1.0/tests/fixtures/solver_compat/unused_left_join/original.sql +3 -0
  273. qseal-0.1.0/tests/fixtures/solver_compat/unused_left_join/rewritten.sql +2 -0
  274. qseal-0.1.0/tests/test_accepted_values_case.py +122 -0
  275. qseal-0.1.0/tests/test_accepted_values_filter.py +124 -0
  276. qseal-0.1.0/tests/test_candidate_evidence.py +195 -0
  277. qseal-0.1.0/tests/test_candidate_generation.py +130 -0
  278. qseal-0.1.0/tests/test_candidate_verification.py +99 -0
  279. qseal-0.1.0/tests/test_check_verifier.py +425 -0
  280. qseal-0.1.0/tests/test_cli.py +2259 -0
  281. qseal-0.1.0/tests/test_constraint_loader.py +84 -0
  282. qseal-0.1.0/tests/test_constraints.py +113 -0
  283. qseal-0.1.0/tests/test_corpus.py +229 -0
  284. qseal-0.1.0/tests/test_corpus_aggregate.py +237 -0
  285. qseal-0.1.0/tests/test_corpus_runner.py +370 -0
  286. qseal-0.1.0/tests/test_corpus_summary.py +258 -0
  287. qseal-0.1.0/tests/test_corpus_trajectories.py +135 -0
  288. qseal-0.1.0/tests/test_count_distinct_rewrite.py +85 -0
  289. qseal-0.1.0/tests/test_dbt_diff_scoping.py +103 -0
  290. qseal-0.1.0/tests/test_dbt_intake.py +135 -0
  291. qseal-0.1.0/tests/test_dbt_jinja.py +81 -0
  292. qseal-0.1.0/tests/test_dbt_loader.py +267 -0
  293. qseal-0.1.0/tests/test_dbt_project.py +102 -0
  294. qseal-0.1.0/tests/test_dbt_scan.py +1036 -0
  295. qseal-0.1.0/tests/test_diff_report.py +35 -0
  296. qseal-0.1.0/tests/test_distinct_rewrite.py +121 -0
  297. qseal-0.1.0/tests/test_duckdb_benchmark.py +136 -0
  298. qseal-0.1.0/tests/test_duckdb_fixtures.py +93 -0
  299. qseal-0.1.0/tests/test_environment.py +244 -0
  300. qseal-0.1.0/tests/test_environment_cache.py +544 -0
  301. qseal-0.1.0/tests/test_evaluation.py +100 -0
  302. qseal-0.1.0/tests/test_group_by_unique.py +114 -0
  303. qseal-0.1.0/tests/test_join_distinct_exists.py +110 -0
  304. qseal-0.1.0/tests/test_join_elimination.py +327 -0
  305. qseal-0.1.0/tests/test_json_report.py +154 -0
  306. qseal-0.1.0/tests/test_markdown_report.py +90 -0
  307. qseal-0.1.0/tests/test_not_null_filter.py +137 -0
  308. qseal-0.1.0/tests/test_pair_reduction.py +52 -0
  309. qseal-0.1.0/tests/test_parser.py +515 -0
  310. qseal-0.1.0/tests/test_patch_report.py +202 -0
  311. qseal-0.1.0/tests/test_policy_baseline.py +937 -0
  312. qseal-0.1.0/tests/test_predicate_pushdown.py +79 -0
  313. qseal-0.1.0/tests/test_product_demo.py +167 -0
  314. qseal-0.1.0/tests/test_qed_backend.py +115 -0
  315. qseal-0.1.0/tests/test_qualify.py +98 -0
  316. qseal-0.1.0/tests/test_report.py +146 -0
  317. qseal-0.1.0/tests/test_rewrite_actions.py +220 -0
  318. qseal-0.1.0/tests/test_rewrite_chain.py +91 -0
  319. qseal-0.1.0/tests/test_rewrite_registry.py +69 -0
  320. qseal-0.1.0/tests/test_search.py +354 -0
  321. qseal-0.1.0/tests/test_snowflake_benchmark.py +189 -0
  322. qseal-0.1.0/tests/test_snowflake_family_suite.py +308 -0
  323. qseal-0.1.0/tests/test_solver_compat.py +286 -0
  324. qseal-0.1.0/tests/test_subtree_rewrites.py +282 -0
  325. qseal-0.1.0/tests/test_verieql_backend.py +372 -0
  326. qseal-0.1.0/uv.lock +1791 -0
@@ -0,0 +1,56 @@
1
+ name: CI
2
+
3
+ on:
4
+ pull_request:
5
+ push:
6
+ branches: [main]
7
+
8
+ jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Check out repository
14
+ uses: actions/checkout@v4
15
+
16
+ - name: Install uv
17
+ uses: astral-sh/setup-uv@v6
18
+ with:
19
+ enable-cache: true
20
+
21
+ - name: Set up Python
22
+ uses: actions/setup-python@v5
23
+ with:
24
+ python-version: "3.12"
25
+
26
+ - name: Install dependencies
27
+ run: uv sync --locked
28
+
29
+ - name: Run tests
30
+ run: uv run pytest
31
+
32
+ - name: Run lint
33
+ run: uv run ruff check .
34
+
35
+ package-smoke:
36
+ runs-on: ubuntu-latest
37
+
38
+ steps:
39
+ - name: Check out repository
40
+ uses: actions/checkout@v4
41
+
42
+ - name: Install uv
43
+ uses: astral-sh/setup-uv@v6
44
+ with:
45
+ enable-cache: true
46
+
47
+ - name: Set up Python
48
+ uses: actions/setup-python@v5
49
+ with:
50
+ python-version: "3.12"
51
+
52
+ - name: Build package
53
+ run: uv build
54
+
55
+ - name: Smoke test installed wheel
56
+ run: bash scripts/package_smoke.sh
qseal-0.1.0/.gitignore ADDED
@@ -0,0 +1,16 @@
1
+ .venv/
2
+ .uv-cache/
3
+ .claude/
4
+ .ruff_cache/
5
+ .pytest_cache/
6
+ __pycache__/
7
+ *.py[cod]
8
+ dist/
9
+ build/
10
+ *.egg-info/
11
+ .DS_Store
12
+ /AGENTS.md
13
+ /core
14
+ /core.*
15
+ /hs_err_pid*.log
16
+ qseal-runs/
@@ -0,0 +1,28 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0 - Unreleased
4
+
5
+ - Add a CLI-first prototype for verified-safe SQL rewrites over a constrained
6
+ Snowflake and DuckDB SQL subset.
7
+ - Add dbt scanner workflows: `dbt scan`, `dbt intake`, compiled-SQL scanning,
8
+ changed-file scanning, markdown/JSON/text reports, patch files, and
9
+ composition-chain evidence.
10
+ - Add conservative premise-backed rewrite rules for redundant `DISTINCT`,
11
+ redundant `IS NOT NULL`, unused `LEFT JOIN`, FK-backed unused `INNER JOIN`,
12
+ `JOIN DISTINCT` to `EXISTS`, redundant `COUNT(DISTINCT)`, accepted-values
13
+ filters, accepted-values `CASE`, unique-key `GROUP BY` collapse, and
14
+ predicate pushdown through simple projection subqueries.
15
+ - Add dbt premise ingestion for `unique`, `not_null`, `relationships`,
16
+ `accepted_values`, and `dbt_utils.unique_combination_of_columns`.
17
+ - Add candidate verification/evidence workflows for generated or manual SQL
18
+ candidates, with unproven candidates rejected before benchmarking.
19
+ - Add repeatable DuckDB benchmark and fixture workflows, plus Snowflake
20
+ benchmark-suite commands for target-engine evidence.
21
+ - Add the rewrite-policy experiment surface: structured rewrite actions,
22
+ verified environment steps, corpus runs, trajectory export, search baselines,
23
+ and baseline/ranker policy evaluation.
24
+ - Add optional external verifier adapter spikes for SQLSolver, QED, and VeriEQL.
25
+ VeriEQL remains documented as research/evaluation-only and is not bundled.
26
+ - Add GitHub CI for tests, Ruff, package build, and installed-wheel smoke tests.
27
+ - Remove dormant GitHub Action metadata from the public-v0 surface; CI examples
28
+ install and run the CLI directly.
@@ -0,0 +1,45 @@
1
+ # Contributing
2
+
3
+ QuerySeal is early and intentionally conservative. Changes should keep the
4
+ modeled SQL subset small, explicit, and easy to audit.
5
+
6
+ ## Development
7
+
8
+ ```bash
9
+ uv sync
10
+ uv run pytest
11
+ uv run ruff check .
12
+ uv run ruff format .
13
+ ```
14
+
15
+ CI runs tests and Ruff on every push to `main` and on pull requests.
16
+
17
+ ## Adding Rewrite Rules
18
+
19
+ Prefer small, rule-specific changes:
20
+
21
+ - add parser or IR support only for syntax the rule needs
22
+ - reject unsupported SQL explicitly
23
+ - add focused rewrite tests
24
+ - add `qseal check` verifier coverage
25
+ - add example SQL under `examples/`
26
+ - document any new assumptions in `docs/scope.md`
27
+
28
+ Rules should return `UNKNOWN` when a required assumption is missing and
29
+ `UNSUPPORTED` when the SQL shape is outside the modeled subset.
30
+
31
+ ## Constraints
32
+
33
+ QuerySeal treats YAML constraints as trusted input. Do not infer production
34
+ truth from Snowflake metadata unless the source is clearly documented and the
35
+ tool reports the assumption.
36
+
37
+ ## Commit Style
38
+
39
+ Keep commits self-contained. Good examples:
40
+
41
+ ```text
42
+ Add dbt schema constraint loader
43
+ Support IS NULL predicates
44
+ Document Snowflake EXPLAIN plan goals
45
+ ```
qseal-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 QuerySeal contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
qseal-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,270 @@
1
+ Metadata-Version: 2.4
2
+ Name: qseal
3
+ Version: 0.1.0
4
+ Summary: QuerySeal verifies safe SQL rewrites for a constrained Snowflake and DuckDB SQL subset.
5
+ Project-URL: Repository, https://github.com/neelvad/qseal
6
+ Project-URL: Issues, https://github.com/neelvad/qseal/issues
7
+ Project-URL: Documentation, https://github.com/neelvad/qseal#readme
8
+ Author: QuerySeal contributors
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: dbt,formal-verification,query-optimization,snowflake,sql
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Database
19
+ Classifier: Topic :: Software Development :: Quality Assurance
20
+ Requires-Python: >=3.12
21
+ Requires-Dist: click>=8.1.8
22
+ Requires-Dist: duckdb>=1.4.0
23
+ Requires-Dist: jinja2>=3.1.6
24
+ Requires-Dist: pydantic>=2.10.6
25
+ Requires-Dist: pyyaml>=6.0.2
26
+ Requires-Dist: rich>=13.9.4
27
+ Requires-Dist: sqlglot>=26.3.0
28
+ Description-Content-Type: text/markdown
29
+
30
+ # QuerySeal
31
+
32
+ QuerySeal is a research-grade CLI for verified SQL rewrite experiments.
33
+
34
+ It has two public-v0 surfaces:
35
+
36
+ - **dbt scanner:** find small, premise-backed SQL rewrites that are safe under
37
+ trusted dbt tests or QuerySeal YAML constraints.
38
+ - **rewrite-policy gym:** run search, ranking, and policy-learning experiments
39
+ over a finite SQL rewrite action space where every transition is verified and
40
+ rewards come from repeatable DuckDB benchmarks.
41
+
42
+ QuerySeal is intentionally not a general SQL optimizer, not a full SQL
43
+ equivalence prover, and not a warehouse savings guarantee. A proven rewrite
44
+ means: for the supported SQL subset, the rewritten query returns the same rows
45
+ as the original under the declared assumptions.
46
+
47
+ ## Why This Exists
48
+
49
+ Warehouses such as Snowflake cannot generally use dbt tests as optimizer
50
+ premises. If dbt says a column is unique, non-null, or related to a parent table,
51
+ that is valuable semantic information, but it is not an enforced database
52
+ constraint. QuerySeal treats those tests as explicit trusted assumptions and
53
+ uses them to prove conservative rewrites such as:
54
+
55
+ - removing redundant `DISTINCT`
56
+ - removing redundant `IS NOT NULL` filters
57
+ - removing unused `LEFT JOIN`s
58
+ - removing FK-backed unused `INNER JOIN`s
59
+ - simplifying `COUNT(DISTINCT col)` when `col` is unique and non-null
60
+ - removing accepted-values filters and simplifying accepted-values `CASE`
61
+ - collapsing narrow `GROUP BY` queries over trusted unique keys
62
+ - pushing predicates through simple projection subqueries
63
+
64
+ The proof is conditional. If a rewrite depends on a dbt `unique`, `not_null`,
65
+ `relationships`, or `accepted_values` test, that test must keep passing.
66
+
67
+ ## Install
68
+
69
+ From a checkout:
70
+
71
+ ```bash
72
+ uv sync
73
+ uv run qseal --help
74
+ ```
75
+
76
+ After the package is published, the intended quick paths are:
77
+
78
+ ```bash
79
+ uvx qseal --help
80
+ pipx install qseal
81
+ ```
82
+
83
+ The default scanner, corpus runner, and DuckDB benchmark tools are pure Python.
84
+ Optional external solver integrations require user-supplied toolchains:
85
+
86
+ - **SQLSolver**: optional independent equivalence prover; Apache 2.0 upstream.
87
+ - **QED**: optional independent equivalence prover; MIT/Apache-compatible
88
+ upstream components.
89
+ - **VeriEQL**: optional bounded refuter for research/evaluation only. It is
90
+ CC BY-NC-SA 4.0 and is not bundled, vendored, or part of a commercial path.
91
+
92
+ ## Quick Demos
93
+
94
+ Suggest a rewrite for one query:
95
+
96
+ ```bash
97
+ uv run qseal suggest examples/dbt/distinct.sql \
98
+ --schema examples/dbt/schema.yml \
99
+ --all
100
+ ```
101
+
102
+ Scan a small dbt-like fixture and produce a privacy-preserving intake report:
103
+
104
+ ```bash
105
+ uv run qseal dbt intake tests/fixtures/dbt_projects/yield_pack
106
+ ```
107
+
108
+ Scan the product demo project for advisory findings:
109
+
110
+ ```bash
111
+ uv run qseal dbt scan examples/product_demo/dbt_project --format text
112
+ ```
113
+
114
+ Run a tiny rewrite-policy corpus experiment:
115
+
116
+ ```bash
117
+ uv run qseal corpus run /tmp/qseal-corpus-smoke \
118
+ --task redundant-distinct-users \
119
+ --strategy fixed_order \
120
+ --strategy greedy \
121
+ --warmups 0 \
122
+ --repetitions 1
123
+ ```
124
+
125
+ ## Mode A: dbt Scanner
126
+
127
+ The dbt scanner is an advisory workflow for data projects. It scans dbt model
128
+ SQL, reads nearby `schema.yml` / `.yaml` tests, and reports proven-safe rewrite
129
+ opportunities. It can emit text, JSON, markdown, diffs, patch files, and
130
+ redacted intake artifacts.
131
+
132
+ Recommended first command for a private project:
133
+
134
+ ```bash
135
+ uv run qseal dbt intake . --use-compiled --report-file qseal-intake.json
136
+ ```
137
+
138
+ The intake artifact is aggregate-only. It omits SQL, model names, file paths,
139
+ diffs, raw unsupported reasons, and literal accepted values. It keeps the useful
140
+ fit signals: scanned model count, silent model count, proven finding count, rule
141
+ counts, required test categories, redacted unsupported reason categories, and
142
+ apply-readiness counts.
143
+
144
+ For local advisory review:
145
+
146
+ ```bash
147
+ uv run qseal dbt scan . --all --report-file qseal-report.json
148
+ uv run qseal dbt scan . --use-compiled --all --report-file qseal-compiled-report.json
149
+ ```
150
+
151
+ For CI today, use the CLI in your workflow. The repository contains workflow
152
+ examples, but the project should not be treated as a published Marketplace
153
+ Action yet. See [docs/github-actions.md](docs/github-actions.md) and
154
+ [docs/ci.md](docs/ci.md).
155
+
156
+ ## Mode B: Rewrite-Policy Gym
157
+
158
+ The policy/research side exposes QuerySeal's rewrite rules as a finite action
159
+ space. An environment step proposes one rewrite action, verifies semantic
160
+ safety, optionally benchmarks the transition on DuckDB, and records the reward.
161
+
162
+ This is for experiments in search, ranking, RL-style policy learning, and
163
+ verified action selection. It is not production query optimization.
164
+
165
+ Useful commands:
166
+
167
+ ```bash
168
+ uv run qseal corpus run /tmp/qseal-run \
169
+ --strategy fixed_order \
170
+ --strategy random \
171
+ --strategy greedy \
172
+ --strategy beam \
173
+ --reward-margin 0.05
174
+
175
+ uv run qseal corpus export-trajectories \
176
+ /tmp/qseal-run/corpus-run.json \
177
+ --output /tmp/qseal-trajectories.jsonl
178
+
179
+ uv run qseal policy train-ranker \
180
+ /tmp/qseal-trajectories.jsonl \
181
+ --model-file /tmp/qseal-ranker.json
182
+ ```
183
+
184
+ The bundled DuckDB corpus is deliberately small and controlled. That is useful
185
+ for reproducibility and policy debugging, but it is not evidence that the same
186
+ policy improves arbitrary production SQL. See
187
+ [docs/rewrite-policy-gym.md](docs/rewrite-policy-gym.md),
188
+ [docs/rewrite-environment.md](docs/rewrite-environment.md),
189
+ [docs/search-baselines.md](docs/search-baselines.md), and
190
+ [docs/task-corpus.md](docs/task-corpus.md).
191
+
192
+ ## What "Proven" Means
193
+
194
+ QuerySeal reports how a finding was certified:
195
+
196
+ - **builtin**: a hand-written rule replayed the same rewrite after parsing and
197
+ normalization. This is the default scanner path.
198
+ - **SQLSolver / QED**: an external prover returned an equivalence result.
199
+ - **VeriEQL**: a bounded refuter found a counterexample or did not find one up
200
+ to a bound. A counterexample is a sound disproof; bounded-OK is evidence, not
201
+ a proof.
202
+
203
+ Runtime speed is separate from semantic safety. QuerySeal can benchmark proven
204
+ pairs with DuckDB or Snowflake helpers, but performance evidence is diagnostic
205
+ and workload-specific.
206
+
207
+ ## Supported Inputs
208
+
209
+ The SQL subset is intentionally conservative:
210
+
211
+ - direct table sources and simple subquery sources
212
+ - narrow non-recursive CTE pass-through chains
213
+ - direct, star, and simple aliased scalar projections
214
+ - simple `WHERE` predicates joined by `AND`
215
+ - simple `EXISTS`
216
+ - `INNER JOIN` / `LEFT JOIN` with column equality predicates
217
+ - qualified Snowflake relation names
218
+ - selected `GROUP BY`, aggregate, window, and `QUALIFY` shapes where a parser or
219
+ rule explicitly supports them
220
+
221
+ Trusted constraints can come from QuerySeal YAML or dbt `schema.yml` / `.yaml`.
222
+ Supported dbt premise types include:
223
+
224
+ - `unique`
225
+ - `not_null`
226
+ - `relationships`
227
+ - `accepted_values`
228
+ - `dbt_utils.unique_combination_of_columns`
229
+
230
+ Out of scope includes full SQL equivalence, arbitrary subqueries, join
231
+ reordering, recursive CTEs, UDFs, semi-structured `VARIANT` / `FLATTEN`, and any
232
+ rewrite that QuerySeal cannot verify. Full detail: [docs/scope.md](docs/scope.md).
233
+
234
+ ## Candidate Verification
235
+
236
+ If another tool, human, or model generates candidate SQL files, keep generation
237
+ outside the trusted path and gate candidates with QuerySeal:
238
+
239
+ ```bash
240
+ uv run qseal candidates evidence original.sql \
241
+ --candidates-dir generated-candidates \
242
+ --schema schema.yml \
243
+ --fail-on unproven \
244
+ --report-file qseal-candidate-evidence.json
245
+ ```
246
+
247
+ Only `PROVEN_EQUIVALENT` candidates should be considered for review. See
248
+ [docs/candidate-evidence-ci.md](docs/candidate-evidence-ci.md).
249
+
250
+ ## Documentation
251
+
252
+ - [Scope](docs/scope.md): supported SQL, assumptions, and non-goals.
253
+ - [Artifacts](docs/artifacts.md): JSON report contracts.
254
+ - [GitHub workflow examples](docs/github-actions.md): CLI-based CI examples.
255
+ - [Candidate evidence](docs/candidate-evidence-ci.md): verify generated SQL.
256
+ - [Rewrite-policy gym](docs/rewrite-policy-gym.md): corpus, search, and policy
257
+ experiments.
258
+ - [Performance evidence](docs/performance-evidence.md): benchmark tiers and
259
+ evidence limits.
260
+ - [Product demo](docs/product-demo.md): product-shaped demo narrative.
261
+ - [Roadmap](docs/roadmap.md): near-term premise/rewrite direction.
262
+ - Solver notes: [SQLSolver](docs/sqlsolver-spike.md),
263
+ [QED](docs/qed-spike.md), [VeriEQL](docs/verieql-spike.md).
264
+
265
+ ## Public v0 Status
266
+
267
+ This is an alpha research/prototype release. The useful public artifact is a
268
+ reproducible verified-rewrite workbench, not a mature optimizer. If you try it
269
+ on a real dbt project, start with `qseal dbt intake` and share the redacted
270
+ artifact before sharing source SQL.
qseal-0.1.0/README.md ADDED
@@ -0,0 +1,241 @@
1
+ # QuerySeal
2
+
3
+ QuerySeal is a research-grade CLI for verified SQL rewrite experiments.
4
+
5
+ It has two public-v0 surfaces:
6
+
7
+ - **dbt scanner:** find small, premise-backed SQL rewrites that are safe under
8
+ trusted dbt tests or QuerySeal YAML constraints.
9
+ - **rewrite-policy gym:** run search, ranking, and policy-learning experiments
10
+ over a finite SQL rewrite action space where every transition is verified and
11
+ rewards come from repeatable DuckDB benchmarks.
12
+
13
+ QuerySeal is intentionally not a general SQL optimizer, not a full SQL
14
+ equivalence prover, and not a warehouse savings guarantee. A proven rewrite
15
+ means: for the supported SQL subset, the rewritten query returns the same rows
16
+ as the original under the declared assumptions.
17
+
18
+ ## Why This Exists
19
+
20
+ Warehouses such as Snowflake cannot generally use dbt tests as optimizer
21
+ premises. If dbt says a column is unique, non-null, or related to a parent table,
22
+ that is valuable semantic information, but it is not an enforced database
23
+ constraint. QuerySeal treats those tests as explicit trusted assumptions and
24
+ uses them to prove conservative rewrites such as:
25
+
26
+ - removing redundant `DISTINCT`
27
+ - removing redundant `IS NOT NULL` filters
28
+ - removing unused `LEFT JOIN`s
29
+ - removing FK-backed unused `INNER JOIN`s
30
+ - simplifying `COUNT(DISTINCT col)` when `col` is unique and non-null
31
+ - removing accepted-values filters and simplifying accepted-values `CASE`
32
+ - collapsing narrow `GROUP BY` queries over trusted unique keys
33
+ - pushing predicates through simple projection subqueries
34
+
35
+ The proof is conditional. If a rewrite depends on a dbt `unique`, `not_null`,
36
+ `relationships`, or `accepted_values` test, that test must keep passing.
37
+
38
+ ## Install
39
+
40
+ From a checkout:
41
+
42
+ ```bash
43
+ uv sync
44
+ uv run qseal --help
45
+ ```
46
+
47
+ After the package is published, the intended quick paths are:
48
+
49
+ ```bash
50
+ uvx qseal --help
51
+ pipx install qseal
52
+ ```
53
+
54
+ The default scanner, corpus runner, and DuckDB benchmark tools are pure Python.
55
+ Optional external solver integrations require user-supplied toolchains:
56
+
57
+ - **SQLSolver**: optional independent equivalence prover; Apache 2.0 upstream.
58
+ - **QED**: optional independent equivalence prover; MIT/Apache-compatible
59
+ upstream components.
60
+ - **VeriEQL**: optional bounded refuter for research/evaluation only. It is
61
+ CC BY-NC-SA 4.0 and is not bundled, vendored, or part of a commercial path.
62
+
63
+ ## Quick Demos
64
+
65
+ Suggest a rewrite for one query:
66
+
67
+ ```bash
68
+ uv run qseal suggest examples/dbt/distinct.sql \
69
+ --schema examples/dbt/schema.yml \
70
+ --all
71
+ ```
72
+
73
+ Scan a small dbt-like fixture and produce a privacy-preserving intake report:
74
+
75
+ ```bash
76
+ uv run qseal dbt intake tests/fixtures/dbt_projects/yield_pack
77
+ ```
78
+
79
+ Scan the product demo project for advisory findings:
80
+
81
+ ```bash
82
+ uv run qseal dbt scan examples/product_demo/dbt_project --format text
83
+ ```
84
+
85
+ Run a tiny rewrite-policy corpus experiment:
86
+
87
+ ```bash
88
+ uv run qseal corpus run /tmp/qseal-corpus-smoke \
89
+ --task redundant-distinct-users \
90
+ --strategy fixed_order \
91
+ --strategy greedy \
92
+ --warmups 0 \
93
+ --repetitions 1
94
+ ```
95
+
96
+ ## Mode A: dbt Scanner
97
+
98
+ The dbt scanner is an advisory workflow for data projects. It scans dbt model
99
+ SQL, reads nearby `schema.yml` / `.yaml` tests, and reports proven-safe rewrite
100
+ opportunities. It can emit text, JSON, markdown, diffs, patch files, and
101
+ redacted intake artifacts.
102
+
103
+ Recommended first command for a private project:
104
+
105
+ ```bash
106
+ uv run qseal dbt intake . --use-compiled --report-file qseal-intake.json
107
+ ```
108
+
109
+ The intake artifact is aggregate-only. It omits SQL, model names, file paths,
110
+ diffs, raw unsupported reasons, and literal accepted values. It keeps the useful
111
+ fit signals: scanned model count, silent model count, proven finding count, rule
112
+ counts, required test categories, redacted unsupported reason categories, and
113
+ apply-readiness counts.
114
+
115
+ For local advisory review:
116
+
117
+ ```bash
118
+ uv run qseal dbt scan . --all --report-file qseal-report.json
119
+ uv run qseal dbt scan . --use-compiled --all --report-file qseal-compiled-report.json
120
+ ```
121
+
122
+ For CI today, use the CLI in your workflow. The repository contains workflow
123
+ examples, but the project should not be treated as a published Marketplace
124
+ Action yet. See [docs/github-actions.md](docs/github-actions.md) and
125
+ [docs/ci.md](docs/ci.md).
126
+
127
+ ## Mode B: Rewrite-Policy Gym
128
+
129
+ The policy/research side exposes QuerySeal's rewrite rules as a finite action
130
+ space. An environment step proposes one rewrite action, verifies semantic
131
+ safety, optionally benchmarks the transition on DuckDB, and records the reward.
132
+
133
+ This is for experiments in search, ranking, RL-style policy learning, and
134
+ verified action selection. It is not production query optimization.
135
+
136
+ Useful commands:
137
+
138
+ ```bash
139
+ uv run qseal corpus run /tmp/qseal-run \
140
+ --strategy fixed_order \
141
+ --strategy random \
142
+ --strategy greedy \
143
+ --strategy beam \
144
+ --reward-margin 0.05
145
+
146
+ uv run qseal corpus export-trajectories \
147
+ /tmp/qseal-run/corpus-run.json \
148
+ --output /tmp/qseal-trajectories.jsonl
149
+
150
+ uv run qseal policy train-ranker \
151
+ /tmp/qseal-trajectories.jsonl \
152
+ --model-file /tmp/qseal-ranker.json
153
+ ```
154
+
155
+ The bundled DuckDB corpus is deliberately small and controlled. That is useful
156
+ for reproducibility and policy debugging, but it is not evidence that the same
157
+ policy improves arbitrary production SQL. See
158
+ [docs/rewrite-policy-gym.md](docs/rewrite-policy-gym.md),
159
+ [docs/rewrite-environment.md](docs/rewrite-environment.md),
160
+ [docs/search-baselines.md](docs/search-baselines.md), and
161
+ [docs/task-corpus.md](docs/task-corpus.md).
162
+
163
+ ## What "Proven" Means
164
+
165
+ QuerySeal reports how a finding was certified:
166
+
167
+ - **builtin**: a hand-written rule replayed the same rewrite after parsing and
168
+ normalization. This is the default scanner path.
169
+ - **SQLSolver / QED**: an external prover returned an equivalence result.
170
+ - **VeriEQL**: a bounded refuter found a counterexample or did not find one up
171
+ to a bound. A counterexample is a sound disproof; bounded-OK is evidence, not
172
+ a proof.
173
+
174
+ Runtime speed is separate from semantic safety. QuerySeal can benchmark proven
175
+ pairs with DuckDB or Snowflake helpers, but performance evidence is diagnostic
176
+ and workload-specific.
177
+
178
+ ## Supported Inputs
179
+
180
+ The SQL subset is intentionally conservative:
181
+
182
+ - direct table sources and simple subquery sources
183
+ - narrow non-recursive CTE pass-through chains
184
+ - direct, star, and simple aliased scalar projections
185
+ - simple `WHERE` predicates joined by `AND`
186
+ - simple `EXISTS`
187
+ - `INNER JOIN` / `LEFT JOIN` with column equality predicates
188
+ - qualified Snowflake relation names
189
+ - selected `GROUP BY`, aggregate, window, and `QUALIFY` shapes where a parser or
190
+ rule explicitly supports them
191
+
192
+ Trusted constraints can come from QuerySeal YAML or dbt `schema.yml` / `.yaml`.
193
+ Supported dbt premise types include:
194
+
195
+ - `unique`
196
+ - `not_null`
197
+ - `relationships`
198
+ - `accepted_values`
199
+ - `dbt_utils.unique_combination_of_columns`
200
+
201
+ Out of scope includes full SQL equivalence, arbitrary subqueries, join
202
+ reordering, recursive CTEs, UDFs, semi-structured `VARIANT` / `FLATTEN`, and any
203
+ rewrite that QuerySeal cannot verify. Full detail: [docs/scope.md](docs/scope.md).
204
+
205
+ ## Candidate Verification
206
+
207
+ If another tool, human, or model generates candidate SQL files, keep generation
208
+ outside the trusted path and gate candidates with QuerySeal:
209
+
210
+ ```bash
211
+ uv run qseal candidates evidence original.sql \
212
+ --candidates-dir generated-candidates \
213
+ --schema schema.yml \
214
+ --fail-on unproven \
215
+ --report-file qseal-candidate-evidence.json
216
+ ```
217
+
218
+ Only `PROVEN_EQUIVALENT` candidates should be considered for review. See
219
+ [docs/candidate-evidence-ci.md](docs/candidate-evidence-ci.md).
220
+
221
+ ## Documentation
222
+
223
+ - [Scope](docs/scope.md): supported SQL, assumptions, and non-goals.
224
+ - [Artifacts](docs/artifacts.md): JSON report contracts.
225
+ - [GitHub workflow examples](docs/github-actions.md): CLI-based CI examples.
226
+ - [Candidate evidence](docs/candidate-evidence-ci.md): verify generated SQL.
227
+ - [Rewrite-policy gym](docs/rewrite-policy-gym.md): corpus, search, and policy
228
+ experiments.
229
+ - [Performance evidence](docs/performance-evidence.md): benchmark tiers and
230
+ evidence limits.
231
+ - [Product demo](docs/product-demo.md): product-shaped demo narrative.
232
+ - [Roadmap](docs/roadmap.md): near-term premise/rewrite direction.
233
+ - Solver notes: [SQLSolver](docs/sqlsolver-spike.md),
234
+ [QED](docs/qed-spike.md), [VeriEQL](docs/verieql-spike.md).
235
+
236
+ ## Public v0 Status
237
+
238
+ This is an alpha research/prototype release. The useful public artifact is a
239
+ reproducible verified-rewrite workbench, not a mature optimizer. If you try it
240
+ on a real dbt project, start with `qseal dbt intake` and share the redacted
241
+ artifact before sharing source SQL.
@@ -0,0 +1,20 @@
1
+ FROM ubuntu:22.04
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+
5
+ RUN apt-get update \
6
+ && apt-get install -y --no-install-recommends \
7
+ ca-certificates \
8
+ curl \
9
+ file \
10
+ openjdk-17-jdk \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh
14
+
15
+ ENV PATH="/root/.local/bin:${PATH}"
16
+ ENV UV_LINK_MODE=copy
17
+ ENV UV_PROJECT_ENVIRONMENT=/tmp/qseal-venv
18
+ ENV UV_CACHE_DIR=/tmp/qseal-uv-cache
19
+
20
+ WORKDIR /sqlsolver