qseal 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qseal-0.1.0/.github/workflows/ci.yml +56 -0
- qseal-0.1.0/.gitignore +16 -0
- qseal-0.1.0/CHANGELOG.md +28 -0
- qseal-0.1.0/CONTRIBUTING.md +45 -0
- qseal-0.1.0/LICENSE +21 -0
- qseal-0.1.0/PKG-INFO +270 -0
- qseal-0.1.0/README.md +241 -0
- qseal-0.1.0/docker/sqlsolver-smoke.Dockerfile +20 -0
- qseal-0.1.0/docs/architecture.md +61 -0
- qseal-0.1.0/docs/artifacts.md +303 -0
- qseal-0.1.0/docs/caching-and-trajectories.md +83 -0
- qseal-0.1.0/docs/candidate-evidence-ci.md +144 -0
- qseal-0.1.0/docs/ci.md +113 -0
- qseal-0.1.0/docs/duckdb-fixtures.md +38 -0
- qseal-0.1.0/docs/github-actions.md +195 -0
- qseal-0.1.0/docs/llm-candidates.md +210 -0
- qseal-0.1.0/docs/performance-evidence.md +219 -0
- qseal-0.1.0/docs/product-demo.md +218 -0
- qseal-0.1.0/docs/qed-spike.md +76 -0
- qseal-0.1.0/docs/real-project-evaluation.md +299 -0
- qseal-0.1.0/docs/rewrite-environment.md +78 -0
- qseal-0.1.0/docs/rewrite-policy-gym.md +116 -0
- qseal-0.1.0/docs/roadmap.md +79 -0
- qseal-0.1.0/docs/scope.md +308 -0
- qseal-0.1.0/docs/search-baselines.md +75 -0
- qseal-0.1.0/docs/sqlsolver-spike.md +152 -0
- qseal-0.1.0/docs/task-corpus.md +397 -0
- qseal-0.1.0/docs/verieql-spike.md +142 -0
- qseal-0.1.0/examples/benchmark/original.sql +2 -0
- qseal-0.1.0/examples/benchmark/rewritten.sql +2 -0
- qseal-0.1.0/examples/benchmark/setup.sql +5 -0
- qseal-0.1.0/examples/candidates/manual/001_manual_distinct_removed.sql +2 -0
- qseal-0.1.0/examples/candidates/manual/metadata.json +13 -0
- qseal-0.1.0/examples/candidates/original.sql +2 -0
- qseal-0.1.0/examples/candidates/schema.yml +7 -0
- qseal-0.1.0/examples/dbt/distinct.sql +2 -0
- qseal-0.1.0/examples/dbt/not_null.sql +3 -0
- qseal-0.1.0/examples/dbt/schema.yml +9 -0
- qseal-0.1.0/examples/dbt_project/models/dim_users.sql +2 -0
- qseal-0.1.0/examples/dbt_project/models/fact_orders.sql +3 -0
- qseal-0.1.0/examples/dbt_project/models/marts/positive_orders.sql +6 -0
- qseal-0.1.0/examples/dbt_project/models/schema.yml +21 -0
- qseal-0.1.0/examples/dbt_project/target/compiled/qseal/models/dim_users.sql +2 -0
- qseal-0.1.0/examples/dbt_project/target/compiled/qseal/models/fact_orders.sql +3 -0
- qseal-0.1.0/examples/dbt_project/target/compiled/qseal/models/marts/positive_orders.sql +6 -0
- qseal-0.1.0/examples/distinct/original.sql +2 -0
- qseal-0.1.0/examples/distinct/original_where.sql +3 -0
- qseal-0.1.0/examples/distinct/rewritten.sql +2 -0
- qseal-0.1.0/examples/distinct/rewritten_where.sql +3 -0
- qseal-0.1.0/examples/distinct/schema.yml +7 -0
- qseal-0.1.0/examples/join_elimination/original.sql +3 -0
- qseal-0.1.0/examples/join_elimination/rewritten.sql +2 -0
- qseal-0.1.0/examples/join_elimination/schema.yml +4 -0
- qseal-0.1.0/examples/predicate_pushdown/original.sql +6 -0
- qseal-0.1.0/examples/predicate_pushdown/rewritten.sql +3 -0
- qseal-0.1.0/examples/product_demo/README.md +89 -0
- qseal-0.1.0/examples/product_demo/candidates/001_remove_distinct.sql +2 -0
- qseal-0.1.0/examples/product_demo/candidates/002_filter_rows.sql +3 -0
- qseal-0.1.0/examples/product_demo/candidates/metadata.json +18 -0
- qseal-0.1.0/examples/product_demo/dbt_project/models/dim_users.sql +2 -0
- qseal-0.1.0/examples/product_demo/dbt_project/models/fct_orders.sql +8 -0
- qseal-0.1.0/examples/product_demo/dbt_project/models/fct_orders_fk.sql +8 -0
- qseal-0.1.0/examples/product_demo/dbt_project/models/schema.yml +26 -0
- qseal-0.1.0/examples/product_demo/original.sql +2 -0
- qseal-0.1.0/examples/product_demo/setup.sql +5 -0
- qseal-0.1.0/examples/unsupported/join.sql +3 -0
- qseal-0.1.0/pyproject.toml +78 -0
- qseal-0.1.0/scripts/benchmark_proven_candidates.py +41 -0
- qseal-0.1.0/scripts/compare_real_project_reports.py +48 -0
- qseal-0.1.0/scripts/evaluate_real_projects.sh +227 -0
- qseal-0.1.0/scripts/explain_proven_candidates.py +33 -0
- qseal-0.1.0/scripts/generate_llm_candidates.py +44 -0
- qseal-0.1.0/scripts/modal_benchmark.py +137 -0
- qseal-0.1.0/scripts/modal_verify.py +182 -0
- qseal-0.1.0/scripts/package_smoke.sh +172 -0
- qseal-0.1.0/scripts/qed_spike_unknowns.py +84 -0
- qseal-0.1.0/scripts/run_llm_verification_sqlsolver.sh +99 -0
- qseal-0.1.0/scripts/run_qseal_sqlsolver_candidate_smoke.sh +73 -0
- qseal-0.1.0/scripts/run_qseal_sqlsolver_fixture.sh +71 -0
- qseal-0.1.0/scripts/run_qseal_sqlsolver_pair.sh +49 -0
- qseal-0.1.0/scripts/run_sqlsolver_container_smoke.sh +175 -0
- qseal-0.1.0/scripts/run_sqlsolver_fixture.sh +116 -0
- qseal-0.1.0/scripts/run_verieql_spike.sh +37 -0
- qseal-0.1.0/scripts/sqlsolver_command.sh +21 -0
- qseal-0.1.0/scripts/verieql_driver.py +62 -0
- qseal-0.1.0/scripts/verieql_spike.py +121 -0
- qseal-0.1.0/scripts/verify_llm_candidates.py +77 -0
- qseal-0.1.0/src/qseal/__init__.py +3 -0
- qseal-0.1.0/src/qseal/benchmark/__init__.py +36 -0
- qseal-0.1.0/src/qseal/benchmark/duckdb.py +476 -0
- qseal-0.1.0/src/qseal/benchmark/model.py +81 -0
- qseal-0.1.0/src/qseal/benchmark/snowflake.py +506 -0
- qseal-0.1.0/src/qseal/benchmark/snowflake_suite.py +855 -0
- qseal-0.1.0/src/qseal/cache.py +54 -0
- qseal-0.1.0/src/qseal/candidates/__init__.py +1 -0
- qseal-0.1.0/src/qseal/candidates/benchmarking.py +246 -0
- qseal-0.1.0/src/qseal/candidates/bundle.py +34 -0
- qseal-0.1.0/src/qseal/candidates/evidence.py +198 -0
- qseal-0.1.0/src/qseal/candidates/explain.py +252 -0
- qseal-0.1.0/src/qseal/candidates/generation.py +439 -0
- qseal-0.1.0/src/qseal/candidates/verification.py +239 -0
- qseal-0.1.0/src/qseal/cli.py +3779 -0
- qseal-0.1.0/src/qseal/constraints/__init__.py +1 -0
- qseal-0.1.0/src/qseal/constraints/dbt_loader.py +223 -0
- qseal-0.1.0/src/qseal/constraints/loader.py +36 -0
- qseal-0.1.0/src/qseal/constraints/model.py +94 -0
- qseal-0.1.0/src/qseal/constraints/yaml_loader.py +38 -0
- qseal-0.1.0/src/qseal/corpora/__init__.py +11 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/corpus.yml +540 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-distinct-not-null-active-users.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-distinct-not-null-events-user-range.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-distinct-not-null-high-value-orders.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-double-not-null-active-users.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-double-not-null-events-user-range.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-double-not-null-high-value-orders.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-double-not-null-inverse-active-users.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-double-not-null-inverse-events-user-range.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-double-not-null-inverse-high-value-orders.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-not-null-distinct-active-users.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-not-null-distinct-events-user-range.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/choice-not-null-distinct-high-value-orders.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/distinct-and-not-null-events.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/distinct-and-not-null-orders.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/distinct-and-not-null.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/double-not-null-events.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/double-not-null-orders.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/double-not-null-users.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/join-distinct-events-to-exists.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/join-distinct-to-exists.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/not-null-pushdown-events.sql +6 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/not-null-pushdown-orders.sql +6 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/not-null-pushdown-users.sql +6 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/predicate-pushdown-events-user.sql +6 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/predicate-pushdown-high-value-orders.sql +6 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/predicate-pushdown-user-status.sql +6 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/redundant-distinct-events.sql +2 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/redundant-distinct-orders.sql +2 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/redundant-distinct-users.sql +2 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/redundant-not-null-event-id.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/redundant-not-null-order-id.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/redundant-not-null-user-id.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/unused-left-join-events-users.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/queries/unused-left-join-users.sql +3 -0
- qseal-0.1.0/src/qseal/corpora/duckdb-v1/schema.yml +31 -0
- qseal-0.1.0/src/qseal/corpus/__init__.py +93 -0
- qseal-0.1.0/src/qseal/corpus/aggregate.py +373 -0
- qseal-0.1.0/src/qseal/corpus/inspect.py +353 -0
- qseal-0.1.0/src/qseal/corpus/loader.py +173 -0
- qseal-0.1.0/src/qseal/corpus/materialize.py +24 -0
- qseal-0.1.0/src/qseal/corpus/model.py +157 -0
- qseal-0.1.0/src/qseal/corpus/repeat.py +62 -0
- qseal-0.1.0/src/qseal/corpus/runner.py +586 -0
- qseal-0.1.0/src/qseal/corpus/summary.py +298 -0
- qseal-0.1.0/src/qseal/corpus/trajectories.py +327 -0
- qseal-0.1.0/src/qseal/dbt/__init__.py +1 -0
- qseal-0.1.0/src/qseal/dbt/git_diff.py +55 -0
- qseal-0.1.0/src/qseal/dbt/intake.py +276 -0
- qseal-0.1.0/src/qseal/dbt/jinja.py +108 -0
- qseal-0.1.0/src/qseal/dbt/project.py +152 -0
- qseal-0.1.0/src/qseal/dbt/scan.py +385 -0
- qseal-0.1.0/src/qseal/dialects.py +5 -0
- qseal-0.1.0/src/qseal/environment/__init__.py +25 -0
- qseal-0.1.0/src/qseal/environment/cache.py +288 -0
- qseal-0.1.0/src/qseal/environment/core.py +402 -0
- qseal-0.1.0/src/qseal/environment/model.py +53 -0
- qseal-0.1.0/src/qseal/environment/trajectory.py +87 -0
- qseal-0.1.0/src/qseal/evaluation.py +170 -0
- qseal-0.1.0/src/qseal/fixtures/__init__.py +4 -0
- qseal-0.1.0/src/qseal/fixtures/duckdb.py +230 -0
- qseal-0.1.0/src/qseal/fixtures/model.py +41 -0
- qseal-0.1.0/src/qseal/ir/__init__.py +1 -0
- qseal-0.1.0/src/qseal/ir/model.py +250 -0
- qseal-0.1.0/src/qseal/parser/__init__.py +1 -0
- qseal-0.1.0/src/qseal/parser/fragments.py +114 -0
- qseal-0.1.0/src/qseal/parser/sqlglot_parser.py +835 -0
- qseal-0.1.0/src/qseal/policy/__init__.py +77 -0
- qseal-0.1.0/src/qseal/policy/baseline.py +1867 -0
- qseal-0.1.0/src/qseal/report/__init__.py +1 -0
- qseal-0.1.0/src/qseal/report/diff.py +27 -0
- qseal-0.1.0/src/qseal/report/guards.py +105 -0
- qseal-0.1.0/src/qseal/report/json.py +251 -0
- qseal-0.1.0/src/qseal/report/markdown.py +205 -0
- qseal-0.1.0/src/qseal/report/patch.py +130 -0
- qseal-0.1.0/src/qseal/report/text.py +696 -0
- qseal-0.1.0/src/qseal/rewrites/__init__.py +1 -0
- qseal-0.1.0/src/qseal/rewrites/accepted_values_case.py +350 -0
- qseal-0.1.0/src/qseal/rewrites/accepted_values_filter.py +212 -0
- qseal-0.1.0/src/qseal/rewrites/base.py +40 -0
- qseal-0.1.0/src/qseal/rewrites/chain.py +140 -0
- qseal-0.1.0/src/qseal/rewrites/count_distinct.py +224 -0
- qseal-0.1.0/src/qseal/rewrites/distinct.py +125 -0
- qseal-0.1.0/src/qseal/rewrites/group_by_unique.py +196 -0
- qseal-0.1.0/src/qseal/rewrites/join_distinct_exists.py +190 -0
- qseal-0.1.0/src/qseal/rewrites/join_elimination.py +378 -0
- qseal-0.1.0/src/qseal/rewrites/not_null_filter.py +193 -0
- qseal-0.1.0/src/qseal/rewrites/predicate_pushdown.py +184 -0
- qseal-0.1.0/src/qseal/rewrites/registry.py +106 -0
- qseal-0.1.0/src/qseal/rewrites/subtree.py +72 -0
- qseal-0.1.0/src/qseal/search/__init__.py +22 -0
- qseal-0.1.0/src/qseal/search/algorithms.py +551 -0
- qseal-0.1.0/src/qseal/search/model.py +51 -0
- qseal-0.1.0/src/qseal/verifier/__init__.py +1 -0
- qseal-0.1.0/src/qseal/verifier/backends/__init__.py +27 -0
- qseal-0.1.0/src/qseal/verifier/backends/base.py +18 -0
- qseal-0.1.0/src/qseal/verifier/backends/builtin.py +63 -0
- qseal-0.1.0/src/qseal/verifier/backends/external.py +50 -0
- qseal-0.1.0/src/qseal/verifier/backends/external_contract.py +22 -0
- qseal-0.1.0/src/qseal/verifier/backends/qed.py +360 -0
- qseal-0.1.0/src/qseal/verifier/backends/sqlsolver.py +289 -0
- qseal-0.1.0/src/qseal/verifier/backends/verieql.py +448 -0
- qseal-0.1.0/src/qseal/verifier/check.py +219 -0
- qseal-0.1.0/src/qseal/verifier/model.py +18 -0
- qseal-0.1.0/src/qseal/verifier/pair_reduction.py +72 -0
- qseal-0.1.0/tests/fixtures/candidates/candidate_distinct_removed.sql +2 -0
- qseal-0.1.0/tests/fixtures/candidates/candidate_filtered.sql +3 -0
- qseal-0.1.0/tests/fixtures/candidates/original.sql +2 -0
- qseal-0.1.0/tests/fixtures/candidates/schema.yml +7 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/jaffle_like/models/complex_cte.sql +6 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/jaffle_like/models/customer_flags.sql +8 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/jaffle_like/models/dim_users.sql +2 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/jaffle_like/models/schema.yml +8 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/jaffle_like/models/stg_customers.sql +10 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/jaffle_like/models/unsupported_macro.sql +4 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/dbt_project.yml +5 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/accepted_values_orders.sql +10 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/customer_orders.sql +15 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/dim_users.sql +2 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/duplicate_orders.sql +6 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/grouped_cte_filter.sql +11 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/grouped_cte_join.sql +17 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/order_counts.sql +5 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/payment_pivot.sql +10 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/schema.yml +14 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/models/stg_payments.sql +8 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/accepted_values_orders.sql +10 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/customer_orders.sql +15 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/dim_users.sql +2 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/duplicate_orders.sql +6 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/grouped_cte_filter.sql +11 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/grouped_cte_join.sql +17 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/order_counts.sql +5 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/payment_pivot.sql +8 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/synthetic_duckdb/target/compiled/synthetic_duckdb/models/stg_payments.sql +8 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/dbt_project.yml +5 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/dim_users_deduped.sql +2 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/order_count_distinct.sql +2 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/order_unique_rollup.sql +3 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_composite_left_join_users.sql +8 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_distinct_chain.sql +3 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_fk_inner_join_users.sql +7 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_left_join_users.sql +7 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_not_null.sql +3 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_pushdown.sql +6 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_status_case.sql +6 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/orders_status_filter.sql +3 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/schema.yml +42 -0
- qseal-0.1.0/tests/fixtures/dbt_projects/yield_pack/models/users_with_orders_exists.sql +4 -0
- qseal-0.1.0/tests/fixtures/solver_compat/cases.yml +25 -0
- qseal-0.1.0/tests/fixtures/solver_compat/cte_projection_attribution/original.sql +11 -0
- qseal-0.1.0/tests/fixtures/solver_compat/cte_projection_attribution/rewritten.sql +11 -0
- qseal-0.1.0/tests/fixtures/solver_compat/fk_inner_join/original.sql +3 -0
- qseal-0.1.0/tests/fixtures/solver_compat/fk_inner_join/rewritten.sql +2 -0
- qseal-0.1.0/tests/fixtures/solver_compat/join_distinct_exists/original.sql +3 -0
- qseal-0.1.0/tests/fixtures/solver_compat/join_distinct_exists/rewritten.sql +7 -0
- qseal-0.1.0/tests/fixtures/solver_compat/normalized_identity/original.sql +3 -0
- qseal-0.1.0/tests/fixtures/solver_compat/normalized_identity/rewritten.sql +3 -0
- qseal-0.1.0/tests/fixtures/solver_compat/redundant_distinct/original.sql +2 -0
- qseal-0.1.0/tests/fixtures/solver_compat/redundant_distinct/rewritten.sql +2 -0
- qseal-0.1.0/tests/fixtures/solver_compat/schema.yml +31 -0
- qseal-0.1.0/tests/fixtures/solver_compat/unsafe_distinct/original.sql +2 -0
- qseal-0.1.0/tests/fixtures/solver_compat/unsafe_distinct/rewritten.sql +2 -0
- qseal-0.1.0/tests/fixtures/solver_compat/unused_left_join/original.sql +3 -0
- qseal-0.1.0/tests/fixtures/solver_compat/unused_left_join/rewritten.sql +2 -0
- qseal-0.1.0/tests/test_accepted_values_case.py +122 -0
- qseal-0.1.0/tests/test_accepted_values_filter.py +124 -0
- qseal-0.1.0/tests/test_candidate_evidence.py +195 -0
- qseal-0.1.0/tests/test_candidate_generation.py +130 -0
- qseal-0.1.0/tests/test_candidate_verification.py +99 -0
- qseal-0.1.0/tests/test_check_verifier.py +425 -0
- qseal-0.1.0/tests/test_cli.py +2259 -0
- qseal-0.1.0/tests/test_constraint_loader.py +84 -0
- qseal-0.1.0/tests/test_constraints.py +113 -0
- qseal-0.1.0/tests/test_corpus.py +229 -0
- qseal-0.1.0/tests/test_corpus_aggregate.py +237 -0
- qseal-0.1.0/tests/test_corpus_runner.py +370 -0
- qseal-0.1.0/tests/test_corpus_summary.py +258 -0
- qseal-0.1.0/tests/test_corpus_trajectories.py +135 -0
- qseal-0.1.0/tests/test_count_distinct_rewrite.py +85 -0
- qseal-0.1.0/tests/test_dbt_diff_scoping.py +103 -0
- qseal-0.1.0/tests/test_dbt_intake.py +135 -0
- qseal-0.1.0/tests/test_dbt_jinja.py +81 -0
- qseal-0.1.0/tests/test_dbt_loader.py +267 -0
- qseal-0.1.0/tests/test_dbt_project.py +102 -0
- qseal-0.1.0/tests/test_dbt_scan.py +1036 -0
- qseal-0.1.0/tests/test_diff_report.py +35 -0
- qseal-0.1.0/tests/test_distinct_rewrite.py +121 -0
- qseal-0.1.0/tests/test_duckdb_benchmark.py +136 -0
- qseal-0.1.0/tests/test_duckdb_fixtures.py +93 -0
- qseal-0.1.0/tests/test_environment.py +244 -0
- qseal-0.1.0/tests/test_environment_cache.py +544 -0
- qseal-0.1.0/tests/test_evaluation.py +100 -0
- qseal-0.1.0/tests/test_group_by_unique.py +114 -0
- qseal-0.1.0/tests/test_join_distinct_exists.py +110 -0
- qseal-0.1.0/tests/test_join_elimination.py +327 -0
- qseal-0.1.0/tests/test_json_report.py +154 -0
- qseal-0.1.0/tests/test_markdown_report.py +90 -0
- qseal-0.1.0/tests/test_not_null_filter.py +137 -0
- qseal-0.1.0/tests/test_pair_reduction.py +52 -0
- qseal-0.1.0/tests/test_parser.py +515 -0
- qseal-0.1.0/tests/test_patch_report.py +202 -0
- qseal-0.1.0/tests/test_policy_baseline.py +937 -0
- qseal-0.1.0/tests/test_predicate_pushdown.py +79 -0
- qseal-0.1.0/tests/test_product_demo.py +167 -0
- qseal-0.1.0/tests/test_qed_backend.py +115 -0
- qseal-0.1.0/tests/test_qualify.py +98 -0
- qseal-0.1.0/tests/test_report.py +146 -0
- qseal-0.1.0/tests/test_rewrite_actions.py +220 -0
- qseal-0.1.0/tests/test_rewrite_chain.py +91 -0
- qseal-0.1.0/tests/test_rewrite_registry.py +69 -0
- qseal-0.1.0/tests/test_search.py +354 -0
- qseal-0.1.0/tests/test_snowflake_benchmark.py +189 -0
- qseal-0.1.0/tests/test_snowflake_family_suite.py +308 -0
- qseal-0.1.0/tests/test_solver_compat.py +286 -0
- qseal-0.1.0/tests/test_subtree_rewrites.py +282 -0
- qseal-0.1.0/tests/test_verieql_backend.py +372 -0
- qseal-0.1.0/uv.lock +1791 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
push:
|
|
6
|
+
branches: [main]
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
|
|
12
|
+
steps:
|
|
13
|
+
- name: Check out repository
|
|
14
|
+
uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- name: Install uv
|
|
17
|
+
uses: astral-sh/setup-uv@v6
|
|
18
|
+
with:
|
|
19
|
+
enable-cache: true
|
|
20
|
+
|
|
21
|
+
- name: Set up Python
|
|
22
|
+
uses: actions/setup-python@v5
|
|
23
|
+
with:
|
|
24
|
+
python-version: "3.12"
|
|
25
|
+
|
|
26
|
+
- name: Install dependencies
|
|
27
|
+
run: uv sync --locked
|
|
28
|
+
|
|
29
|
+
- name: Run tests
|
|
30
|
+
run: uv run pytest
|
|
31
|
+
|
|
32
|
+
- name: Run lint
|
|
33
|
+
run: uv run ruff check .
|
|
34
|
+
|
|
35
|
+
package-smoke:
|
|
36
|
+
runs-on: ubuntu-latest
|
|
37
|
+
|
|
38
|
+
steps:
|
|
39
|
+
- name: Check out repository
|
|
40
|
+
uses: actions/checkout@v4
|
|
41
|
+
|
|
42
|
+
- name: Install uv
|
|
43
|
+
uses: astral-sh/setup-uv@v6
|
|
44
|
+
with:
|
|
45
|
+
enable-cache: true
|
|
46
|
+
|
|
47
|
+
- name: Set up Python
|
|
48
|
+
uses: actions/setup-python@v5
|
|
49
|
+
with:
|
|
50
|
+
python-version: "3.12"
|
|
51
|
+
|
|
52
|
+
- name: Build package
|
|
53
|
+
run: uv build
|
|
54
|
+
|
|
55
|
+
- name: Smoke test installed wheel
|
|
56
|
+
run: bash scripts/package_smoke.sh
|
qseal-0.1.0/.gitignore
ADDED
qseal-0.1.0/CHANGELOG.md
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.1.0 - Unreleased
|
|
4
|
+
|
|
5
|
+
- Add a CLI-first prototype for verified-safe SQL rewrites over a constrained
|
|
6
|
+
Snowflake and DuckDB SQL subset.
|
|
7
|
+
- Add dbt scanner workflows: `dbt scan`, `dbt intake`, compiled-SQL scanning,
|
|
8
|
+
changed-file scanning, markdown/JSON/text reports, patch files, and
|
|
9
|
+
composition-chain evidence.
|
|
10
|
+
- Add conservative premise-backed rewrite rules for redundant `DISTINCT`,
|
|
11
|
+
redundant `IS NOT NULL`, unused `LEFT JOIN`, FK-backed unused `INNER JOIN`,
|
|
12
|
+
`JOIN DISTINCT` to `EXISTS`, redundant `COUNT(DISTINCT)`, accepted-values
|
|
13
|
+
filters, accepted-values `CASE`, unique-key `GROUP BY` collapse, and
|
|
14
|
+
predicate pushdown through simple projection subqueries.
|
|
15
|
+
- Add dbt premise ingestion for `unique`, `not_null`, `relationships`,
|
|
16
|
+
`accepted_values`, and `dbt_utils.unique_combination_of_columns`.
|
|
17
|
+
- Add candidate verification/evidence workflows for generated or manual SQL
|
|
18
|
+
candidates, with unproven candidates rejected before benchmarking.
|
|
19
|
+
- Add repeatable DuckDB benchmark and fixture workflows, plus Snowflake
|
|
20
|
+
benchmark-suite commands for target-engine evidence.
|
|
21
|
+
- Add the rewrite-policy experiment surface: structured rewrite actions,
|
|
22
|
+
verified environment steps, corpus runs, trajectory export, search baselines,
|
|
23
|
+
and baseline/ranker policy evaluation.
|
|
24
|
+
- Add optional external verifier adapter spikes for SQLSolver, QED, and VeriEQL.
|
|
25
|
+
VeriEQL remains documented as research/evaluation-only and is not bundled.
|
|
26
|
+
- Add GitHub CI for tests, Ruff, package build, and installed-wheel smoke tests.
|
|
27
|
+
- Remove dormant GitHub Action metadata from the public-v0 surface; CI examples
|
|
28
|
+
install and run the CLI directly.
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Contributing
|
|
2
|
+
|
|
3
|
+
QuerySeal is early and intentionally conservative. Changes should keep the
|
|
4
|
+
modeled SQL subset small, explicit, and easy to audit.
|
|
5
|
+
|
|
6
|
+
## Development
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
uv sync
|
|
10
|
+
uv run pytest
|
|
11
|
+
uv run ruff check .
|
|
12
|
+
uv run ruff format .
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
CI runs tests and Ruff on every push to `main` and on pull requests.
|
|
16
|
+
|
|
17
|
+
## Adding Rewrite Rules
|
|
18
|
+
|
|
19
|
+
Prefer small, rule-specific changes:
|
|
20
|
+
|
|
21
|
+
- add parser or IR support only for syntax the rule needs
|
|
22
|
+
- reject unsupported SQL explicitly
|
|
23
|
+
- add focused rewrite tests
|
|
24
|
+
- add `qseal check` verifier coverage
|
|
25
|
+
- add example SQL under `examples/`
|
|
26
|
+
- document any new assumptions in `docs/scope.md`
|
|
27
|
+
|
|
28
|
+
Rules should return `UNKNOWN` when a required assumption is missing and
|
|
29
|
+
`UNSUPPORTED` when the SQL shape is outside the modeled subset.
|
|
30
|
+
|
|
31
|
+
## Constraints
|
|
32
|
+
|
|
33
|
+
QuerySeal treats YAML constraints as trusted input. Do not infer production
|
|
34
|
+
truth from Snowflake metadata unless the source is clearly documented and the
|
|
35
|
+
tool reports the assumption.
|
|
36
|
+
|
|
37
|
+
## Commit Style
|
|
38
|
+
|
|
39
|
+
Keep commits self-contained. Good examples:
|
|
40
|
+
|
|
41
|
+
```text
|
|
42
|
+
Add dbt schema constraint loader
|
|
43
|
+
Support IS NULL predicates
|
|
44
|
+
Document Snowflake EXPLAIN plan goals
|
|
45
|
+
```
|
qseal-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 QuerySeal contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
qseal-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: qseal
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: QuerySeal verifies safe SQL rewrites for a constrained Snowflake and DuckDB SQL subset.
|
|
5
|
+
Project-URL: Repository, https://github.com/neelvad/qseal
|
|
6
|
+
Project-URL: Issues, https://github.com/neelvad/qseal/issues
|
|
7
|
+
Project-URL: Documentation, https://github.com/neelvad/qseal#readme
|
|
8
|
+
Author: QuerySeal contributors
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: dbt,formal-verification,query-optimization,snowflake,sql
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Database
|
|
19
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Requires-Dist: click>=8.1.8
|
|
22
|
+
Requires-Dist: duckdb>=1.4.0
|
|
23
|
+
Requires-Dist: jinja2>=3.1.6
|
|
24
|
+
Requires-Dist: pydantic>=2.10.6
|
|
25
|
+
Requires-Dist: pyyaml>=6.0.2
|
|
26
|
+
Requires-Dist: rich>=13.9.4
|
|
27
|
+
Requires-Dist: sqlglot>=26.3.0
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# QuerySeal
|
|
31
|
+
|
|
32
|
+
QuerySeal is a research-grade CLI for verified SQL rewrite experiments.
|
|
33
|
+
|
|
34
|
+
It has two public-v0 surfaces:
|
|
35
|
+
|
|
36
|
+
- **dbt scanner:** find small, premise-backed SQL rewrites that are safe under
|
|
37
|
+
trusted dbt tests or QuerySeal YAML constraints.
|
|
38
|
+
- **rewrite-policy gym:** run search, ranking, and policy-learning experiments
|
|
39
|
+
over a finite SQL rewrite action space where every transition is verified and
|
|
40
|
+
rewards come from repeatable DuckDB benchmarks.
|
|
41
|
+
|
|
42
|
+
QuerySeal is intentionally not a general SQL optimizer, not a full SQL
|
|
43
|
+
equivalence prover, and not a warehouse savings guarantee. A proven rewrite
|
|
44
|
+
means: for the supported SQL subset, the rewritten query returns the same rows
|
|
45
|
+
as the original under the declared assumptions.
|
|
46
|
+
|
|
47
|
+
## Why This Exists
|
|
48
|
+
|
|
49
|
+
Warehouses such as Snowflake cannot generally use dbt tests as optimizer
|
|
50
|
+
premises. If dbt says a column is unique, non-null, or related to a parent table,
|
|
51
|
+
that is valuable semantic information, but it is not an enforced database
|
|
52
|
+
constraint. QuerySeal treats those tests as explicit trusted assumptions and
|
|
53
|
+
uses them to prove conservative rewrites such as:
|
|
54
|
+
|
|
55
|
+
- removing redundant `DISTINCT`
|
|
56
|
+
- removing redundant `IS NOT NULL` filters
|
|
57
|
+
- removing unused `LEFT JOIN`s
|
|
58
|
+
- removing FK-backed unused `INNER JOIN`s
|
|
59
|
+
- simplifying `COUNT(DISTINCT col)` when `col` is unique and non-null
|
|
60
|
+
- removing accepted-values filters and simplifying accepted-values `CASE`
|
|
61
|
+
- collapsing narrow `GROUP BY` queries over trusted unique keys
|
|
62
|
+
- pushing predicates through simple projection subqueries
|
|
63
|
+
|
|
64
|
+
The proof is conditional. If a rewrite depends on a dbt `unique`, `not_null`,
|
|
65
|
+
`relationships`, or `accepted_values` test, that test must keep passing.
|
|
66
|
+
|
|
67
|
+
## Install
|
|
68
|
+
|
|
69
|
+
From a checkout:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
uv sync
|
|
73
|
+
uv run qseal --help
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
After the package is published, the intended quick paths are:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
uvx qseal --help
|
|
80
|
+
pipx install qseal
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
The default scanner, corpus runner, and DuckDB benchmark tools are pure Python.
|
|
84
|
+
Optional external solver integrations require user-supplied toolchains:
|
|
85
|
+
|
|
86
|
+
- **SQLSolver**: optional independent equivalence prover; Apache 2.0 upstream.
|
|
87
|
+
- **QED**: optional independent equivalence prover; MIT/Apache-compatible
|
|
88
|
+
upstream components.
|
|
89
|
+
- **VeriEQL**: optional bounded refuter for research/evaluation only. It is
|
|
90
|
+
CC BY-NC-SA 4.0 and is not bundled, vendored, or part of a commercial path.
|
|
91
|
+
|
|
92
|
+
## Quick Demos
|
|
93
|
+
|
|
94
|
+
Suggest a rewrite for one query:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
uv run qseal suggest examples/dbt/distinct.sql \
|
|
98
|
+
--schema examples/dbt/schema.yml \
|
|
99
|
+
--all
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Scan a small dbt-like fixture and produce a privacy-preserving intake report:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
uv run qseal dbt intake tests/fixtures/dbt_projects/yield_pack
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Scan the product demo project for advisory findings:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
uv run qseal dbt scan examples/product_demo/dbt_project --format text
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Run a tiny rewrite-policy corpus experiment:
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
uv run qseal corpus run /tmp/qseal-corpus-smoke \
|
|
118
|
+
--task redundant-distinct-users \
|
|
119
|
+
--strategy fixed_order \
|
|
120
|
+
--strategy greedy \
|
|
121
|
+
--warmups 0 \
|
|
122
|
+
--repetitions 1
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Mode A: dbt Scanner
|
|
126
|
+
|
|
127
|
+
The dbt scanner is an advisory workflow for data projects. It scans dbt model
|
|
128
|
+
SQL, reads nearby `schema.yml` / `.yaml` tests, and reports proven-safe rewrite
|
|
129
|
+
opportunities. It can emit text, JSON, markdown, diffs, patch files, and
|
|
130
|
+
redacted intake artifacts.
|
|
131
|
+
|
|
132
|
+
Recommended first command for a private project:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
uv run qseal dbt intake . --use-compiled --report-file qseal-intake.json
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
The intake artifact is aggregate-only. It omits SQL, model names, file paths,
|
|
139
|
+
diffs, raw unsupported reasons, and literal accepted values. It keeps the useful
|
|
140
|
+
fit signals: scanned model count, silent model count, proven finding count, rule
|
|
141
|
+
counts, required test categories, redacted unsupported reason categories, and
|
|
142
|
+
apply-readiness counts.
|
|
143
|
+
|
|
144
|
+
For local advisory review:
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
uv run qseal dbt scan . --all --report-file qseal-report.json
|
|
148
|
+
uv run qseal dbt scan . --use-compiled --all --report-file qseal-compiled-report.json
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
For CI today, use the CLI in your workflow. The repository contains workflow
|
|
152
|
+
examples, but the project should not be treated as a published Marketplace
|
|
153
|
+
Action yet. See [docs/github-actions.md](docs/github-actions.md) and
|
|
154
|
+
[docs/ci.md](docs/ci.md).
|
|
155
|
+
|
|
156
|
+
## Mode B: Rewrite-Policy Gym
|
|
157
|
+
|
|
158
|
+
The policy/research side exposes QuerySeal's rewrite rules as a finite action
|
|
159
|
+
space. An environment step proposes one rewrite action, verifies semantic
|
|
160
|
+
safety, optionally benchmarks the transition on DuckDB, and records the reward.
|
|
161
|
+
|
|
162
|
+
This is for experiments in search, ranking, RL-style policy learning, and
|
|
163
|
+
verified action selection. It is not production query optimization.
|
|
164
|
+
|
|
165
|
+
Useful commands:
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
uv run qseal corpus run /tmp/qseal-run \
|
|
169
|
+
--strategy fixed_order \
|
|
170
|
+
--strategy random \
|
|
171
|
+
--strategy greedy \
|
|
172
|
+
--strategy beam \
|
|
173
|
+
--reward-margin 0.05
|
|
174
|
+
|
|
175
|
+
uv run qseal corpus export-trajectories \
|
|
176
|
+
/tmp/qseal-run/corpus-run.json \
|
|
177
|
+
--output /tmp/qseal-trajectories.jsonl
|
|
178
|
+
|
|
179
|
+
uv run qseal policy train-ranker \
|
|
180
|
+
/tmp/qseal-trajectories.jsonl \
|
|
181
|
+
--model-file /tmp/qseal-ranker.json
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
The bundled DuckDB corpus is deliberately small and controlled. That is useful
|
|
185
|
+
for reproducibility and policy debugging, but it is not evidence that the same
|
|
186
|
+
policy improves arbitrary production SQL. See
|
|
187
|
+
[docs/rewrite-policy-gym.md](docs/rewrite-policy-gym.md),
|
|
188
|
+
[docs/rewrite-environment.md](docs/rewrite-environment.md),
|
|
189
|
+
[docs/search-baselines.md](docs/search-baselines.md), and
|
|
190
|
+
[docs/task-corpus.md](docs/task-corpus.md).
|
|
191
|
+
|
|
192
|
+
## What "Proven" Means
|
|
193
|
+
|
|
194
|
+
QuerySeal reports how a finding was certified:
|
|
195
|
+
|
|
196
|
+
- **builtin**: a hand-written rule replayed the same rewrite after parsing and
|
|
197
|
+
normalization. This is the default scanner path.
|
|
198
|
+
- **SQLSolver / QED**: an external prover returned an equivalence result.
|
|
199
|
+
- **VeriEQL**: a bounded refuter found a counterexample or did not find one up
|
|
200
|
+
to a bound. A counterexample is a sound disproof; bounded-OK is evidence, not
|
|
201
|
+
a proof.
|
|
202
|
+
|
|
203
|
+
Runtime speed is separate from semantic safety. QuerySeal can benchmark proven
|
|
204
|
+
pairs with DuckDB or Snowflake helpers, but performance evidence is diagnostic
|
|
205
|
+
and workload-specific.
|
|
206
|
+
|
|
207
|
+
## Supported Inputs
|
|
208
|
+
|
|
209
|
+
The SQL subset is intentionally conservative:
|
|
210
|
+
|
|
211
|
+
- direct table sources and simple subquery sources
|
|
212
|
+
- narrow non-recursive CTE pass-through chains
|
|
213
|
+
- direct, star, and simple aliased scalar projections
|
|
214
|
+
- simple `WHERE` predicates joined by `AND`
|
|
215
|
+
- simple `EXISTS`
|
|
216
|
+
- `INNER JOIN` / `LEFT JOIN` with column equality predicates
|
|
217
|
+
- qualified Snowflake relation names
|
|
218
|
+
- selected `GROUP BY`, aggregate, window, and `QUALIFY` shapes where a parser or
|
|
219
|
+
rule explicitly supports them
|
|
220
|
+
|
|
221
|
+
Trusted constraints can come from QuerySeal YAML or dbt `schema.yml` / `.yaml`.
|
|
222
|
+
Supported dbt premise types include:
|
|
223
|
+
|
|
224
|
+
- `unique`
|
|
225
|
+
- `not_null`
|
|
226
|
+
- `relationships`
|
|
227
|
+
- `accepted_values`
|
|
228
|
+
- `dbt_utils.unique_combination_of_columns`
|
|
229
|
+
|
|
230
|
+
Out of scope includes full SQL equivalence, arbitrary subqueries, join
|
|
231
|
+
reordering, recursive CTEs, UDFs, semi-structured `VARIANT` / `FLATTEN`, and any
|
|
232
|
+
rewrite that QuerySeal cannot verify. Full detail: [docs/scope.md](docs/scope.md).
|
|
233
|
+
|
|
234
|
+
## Candidate Verification
|
|
235
|
+
|
|
236
|
+
If another tool, human, or model generates candidate SQL files, keep generation
|
|
237
|
+
outside the trusted path and gate candidates with QuerySeal:
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
uv run qseal candidates evidence original.sql \
|
|
241
|
+
--candidates-dir generated-candidates \
|
|
242
|
+
--schema schema.yml \
|
|
243
|
+
--fail-on unproven \
|
|
244
|
+
--report-file qseal-candidate-evidence.json
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
Only `PROVEN_EQUIVALENT` candidates should be considered for review. See
|
|
248
|
+
[docs/candidate-evidence-ci.md](docs/candidate-evidence-ci.md).
|
|
249
|
+
|
|
250
|
+
## Documentation
|
|
251
|
+
|
|
252
|
+
- [Scope](docs/scope.md): supported SQL, assumptions, and non-goals.
|
|
253
|
+
- [Artifacts](docs/artifacts.md): JSON report contracts.
|
|
254
|
+
- [GitHub workflow examples](docs/github-actions.md): CLI-based CI examples.
|
|
255
|
+
- [Candidate evidence](docs/candidate-evidence-ci.md): verify generated SQL.
|
|
256
|
+
- [Rewrite-policy gym](docs/rewrite-policy-gym.md): corpus, search, and policy
|
|
257
|
+
experiments.
|
|
258
|
+
- [Performance evidence](docs/performance-evidence.md): benchmark tiers and
|
|
259
|
+
evidence limits.
|
|
260
|
+
- [Product demo](docs/product-demo.md): product-shaped demo narrative.
|
|
261
|
+
- [Roadmap](docs/roadmap.md): near-term premise/rewrite direction.
|
|
262
|
+
- Solver notes: [SQLSolver](docs/sqlsolver-spike.md),
|
|
263
|
+
[QED](docs/qed-spike.md), [VeriEQL](docs/verieql-spike.md).
|
|
264
|
+
|
|
265
|
+
## Public v0 Status
|
|
266
|
+
|
|
267
|
+
This is an alpha research/prototype release. The useful public artifact is a
|
|
268
|
+
reproducible verified-rewrite workbench, not a mature optimizer. If you try it
|
|
269
|
+
on a real dbt project, start with `qseal dbt intake` and share the redacted
|
|
270
|
+
artifact before sharing source SQL.
|
qseal-0.1.0/README.md
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
# QuerySeal
|
|
2
|
+
|
|
3
|
+
QuerySeal is a research-grade CLI for verified SQL rewrite experiments.
|
|
4
|
+
|
|
5
|
+
It has two public-v0 surfaces:
|
|
6
|
+
|
|
7
|
+
- **dbt scanner:** find small, premise-backed SQL rewrites that are safe under
|
|
8
|
+
trusted dbt tests or QuerySeal YAML constraints.
|
|
9
|
+
- **rewrite-policy gym:** run search, ranking, and policy-learning experiments
|
|
10
|
+
over a finite SQL rewrite action space where every transition is verified and
|
|
11
|
+
rewards come from repeatable DuckDB benchmarks.
|
|
12
|
+
|
|
13
|
+
QuerySeal is intentionally not a general SQL optimizer, not a full SQL
|
|
14
|
+
equivalence prover, and not a warehouse savings guarantee. A proven rewrite
|
|
15
|
+
means: for the supported SQL subset, the rewritten query returns the same rows
|
|
16
|
+
as the original under the declared assumptions.
|
|
17
|
+
|
|
18
|
+
## Why This Exists
|
|
19
|
+
|
|
20
|
+
Warehouses such as Snowflake cannot generally use dbt tests as optimizer
|
|
21
|
+
premises. If dbt says a column is unique, non-null, or related to a parent table,
|
|
22
|
+
that is valuable semantic information, but it is not an enforced database
|
|
23
|
+
constraint. QuerySeal treats those tests as explicit trusted assumptions and
|
|
24
|
+
uses them to prove conservative rewrites such as:
|
|
25
|
+
|
|
26
|
+
- removing redundant `DISTINCT`
|
|
27
|
+
- removing redundant `IS NOT NULL` filters
|
|
28
|
+
- removing unused `LEFT JOIN`s
|
|
29
|
+
- removing FK-backed unused `INNER JOIN`s
|
|
30
|
+
- simplifying `COUNT(DISTINCT col)` when `col` is unique and non-null
|
|
31
|
+
- removing accepted-values filters and simplifying accepted-values `CASE`
|
|
32
|
+
- collapsing narrow `GROUP BY` queries over trusted unique keys
|
|
33
|
+
- pushing predicates through simple projection subqueries
|
|
34
|
+
|
|
35
|
+
The proof is conditional. If a rewrite depends on a dbt `unique`, `not_null`,
|
|
36
|
+
`relationships`, or `accepted_values` test, that test must keep passing.
|
|
37
|
+
|
|
38
|
+
## Install
|
|
39
|
+
|
|
40
|
+
From a checkout:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
uv sync
|
|
44
|
+
uv run qseal --help
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
After the package is published, the intended quick paths are:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
uvx qseal --help
|
|
51
|
+
pipx install qseal
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The default scanner, corpus runner, and DuckDB benchmark tools are pure Python.
|
|
55
|
+
Optional external solver integrations require user-supplied toolchains:
|
|
56
|
+
|
|
57
|
+
- **SQLSolver**: optional independent equivalence prover; Apache 2.0 upstream.
|
|
58
|
+
- **QED**: optional independent equivalence prover; MIT/Apache-compatible
|
|
59
|
+
upstream components.
|
|
60
|
+
- **VeriEQL**: optional bounded refuter for research/evaluation only. It is
|
|
61
|
+
CC BY-NC-SA 4.0 and is not bundled, vendored, or part of a commercial path.
|
|
62
|
+
|
|
63
|
+
## Quick Demos
|
|
64
|
+
|
|
65
|
+
Suggest a rewrite for one query:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
uv run qseal suggest examples/dbt/distinct.sql \
|
|
69
|
+
--schema examples/dbt/schema.yml \
|
|
70
|
+
--all
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Scan a small dbt-like fixture and produce a privacy-preserving intake report:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
uv run qseal dbt intake tests/fixtures/dbt_projects/yield_pack
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Scan the product demo project for advisory findings:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
uv run qseal dbt scan examples/product_demo/dbt_project --format text
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Run a tiny rewrite-policy corpus experiment:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
uv run qseal corpus run /tmp/qseal-corpus-smoke \
|
|
89
|
+
--task redundant-distinct-users \
|
|
90
|
+
--strategy fixed_order \
|
|
91
|
+
--strategy greedy \
|
|
92
|
+
--warmups 0 \
|
|
93
|
+
--repetitions 1
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Mode A: dbt Scanner
|
|
97
|
+
|
|
98
|
+
The dbt scanner is an advisory workflow for data projects. It scans dbt model
|
|
99
|
+
SQL, reads nearby `schema.yml` / `.yaml` tests, and reports proven-safe rewrite
|
|
100
|
+
opportunities. It can emit text, JSON, markdown, diffs, patch files, and
|
|
101
|
+
redacted intake artifacts.
|
|
102
|
+
|
|
103
|
+
Recommended first command for a private project:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
uv run qseal dbt intake . --use-compiled --report-file qseal-intake.json
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
The intake artifact is aggregate-only. It omits SQL, model names, file paths,
|
|
110
|
+
diffs, raw unsupported reasons, and literal accepted values. It keeps the useful
|
|
111
|
+
fit signals: scanned model count, silent model count, proven finding count, rule
|
|
112
|
+
counts, required test categories, redacted unsupported reason categories, and
|
|
113
|
+
apply-readiness counts.
|
|
114
|
+
|
|
115
|
+
For local advisory review:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
uv run qseal dbt scan . --all --report-file qseal-report.json
|
|
119
|
+
uv run qseal dbt scan . --use-compiled --all --report-file qseal-compiled-report.json
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
For CI today, use the CLI in your workflow. The repository contains workflow
|
|
123
|
+
examples, but the project should not be treated as a published Marketplace
|
|
124
|
+
Action yet. See [docs/github-actions.md](docs/github-actions.md) and
|
|
125
|
+
[docs/ci.md](docs/ci.md).
|
|
126
|
+
|
|
127
|
+
## Mode B: Rewrite-Policy Gym
|
|
128
|
+
|
|
129
|
+
The policy/research side exposes QuerySeal's rewrite rules as a finite action
|
|
130
|
+
space. An environment step proposes one rewrite action, verifies semantic
|
|
131
|
+
safety, optionally benchmarks the transition on DuckDB, and records the reward.
|
|
132
|
+
|
|
133
|
+
This is for experiments in search, ranking, RL-style policy learning, and
|
|
134
|
+
verified action selection. It is not production query optimization.
|
|
135
|
+
|
|
136
|
+
Useful commands:
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
uv run qseal corpus run /tmp/qseal-run \
|
|
140
|
+
--strategy fixed_order \
|
|
141
|
+
--strategy random \
|
|
142
|
+
--strategy greedy \
|
|
143
|
+
--strategy beam \
|
|
144
|
+
--reward-margin 0.05
|
|
145
|
+
|
|
146
|
+
uv run qseal corpus export-trajectories \
|
|
147
|
+
/tmp/qseal-run/corpus-run.json \
|
|
148
|
+
--output /tmp/qseal-trajectories.jsonl
|
|
149
|
+
|
|
150
|
+
uv run qseal policy train-ranker \
|
|
151
|
+
/tmp/qseal-trajectories.jsonl \
|
|
152
|
+
--model-file /tmp/qseal-ranker.json
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
The bundled DuckDB corpus is deliberately small and controlled. That is useful
|
|
156
|
+
for reproducibility and policy debugging, but it is not evidence that the same
|
|
157
|
+
policy improves arbitrary production SQL. See
|
|
158
|
+
[docs/rewrite-policy-gym.md](docs/rewrite-policy-gym.md),
|
|
159
|
+
[docs/rewrite-environment.md](docs/rewrite-environment.md),
|
|
160
|
+
[docs/search-baselines.md](docs/search-baselines.md), and
|
|
161
|
+
[docs/task-corpus.md](docs/task-corpus.md).
|
|
162
|
+
|
|
163
|
+
## What "Proven" Means
|
|
164
|
+
|
|
165
|
+
QuerySeal reports how a finding was certified:
|
|
166
|
+
|
|
167
|
+
- **builtin**: a hand-written rule replayed the same rewrite after parsing and
|
|
168
|
+
normalization. This is the default scanner path.
|
|
169
|
+
- **SQLSolver / QED**: an external prover returned an equivalence result.
|
|
170
|
+
- **VeriEQL**: a bounded refuter found a counterexample or did not find one up
|
|
171
|
+
to a bound. A counterexample is a sound disproof; bounded-OK is evidence, not
|
|
172
|
+
a proof.
|
|
173
|
+
|
|
174
|
+
Runtime speed is separate from semantic safety. QuerySeal can benchmark proven
|
|
175
|
+
pairs with DuckDB or Snowflake helpers, but performance evidence is diagnostic
|
|
176
|
+
and workload-specific.
|
|
177
|
+
|
|
178
|
+
## Supported Inputs
|
|
179
|
+
|
|
180
|
+
The SQL subset is intentionally conservative:
|
|
181
|
+
|
|
182
|
+
- direct table sources and simple subquery sources
|
|
183
|
+
- narrow non-recursive CTE pass-through chains
|
|
184
|
+
- direct, star, and simple aliased scalar projections
|
|
185
|
+
- simple `WHERE` predicates joined by `AND`
|
|
186
|
+
- simple `EXISTS`
|
|
187
|
+
- `INNER JOIN` / `LEFT JOIN` with column equality predicates
|
|
188
|
+
- qualified Snowflake relation names
|
|
189
|
+
- selected `GROUP BY`, aggregate, window, and `QUALIFY` shapes where a parser or
|
|
190
|
+
rule explicitly supports them
|
|
191
|
+
|
|
192
|
+
Trusted constraints can come from QuerySeal YAML or dbt `schema.yml` / `.yaml`.
|
|
193
|
+
Supported dbt premise types include:
|
|
194
|
+
|
|
195
|
+
- `unique`
|
|
196
|
+
- `not_null`
|
|
197
|
+
- `relationships`
|
|
198
|
+
- `accepted_values`
|
|
199
|
+
- `dbt_utils.unique_combination_of_columns`
|
|
200
|
+
|
|
201
|
+
Out of scope includes full SQL equivalence, arbitrary subqueries, join
|
|
202
|
+
reordering, recursive CTEs, UDFs, semi-structured `VARIANT` / `FLATTEN`, and any
|
|
203
|
+
rewrite that QuerySeal cannot verify. Full detail: [docs/scope.md](docs/scope.md).
|
|
204
|
+
|
|
205
|
+
## Candidate Verification
|
|
206
|
+
|
|
207
|
+
If another tool, human, or model generates candidate SQL files, keep generation
|
|
208
|
+
outside the trusted path and gate candidates with QuerySeal:
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
uv run qseal candidates evidence original.sql \
|
|
212
|
+
--candidates-dir generated-candidates \
|
|
213
|
+
--schema schema.yml \
|
|
214
|
+
--fail-on unproven \
|
|
215
|
+
--report-file qseal-candidate-evidence.json
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
Only `PROVEN_EQUIVALENT` candidates should be considered for review. See
|
|
219
|
+
[docs/candidate-evidence-ci.md](docs/candidate-evidence-ci.md).
|
|
220
|
+
|
|
221
|
+
## Documentation
|
|
222
|
+
|
|
223
|
+
- [Scope](docs/scope.md): supported SQL, assumptions, and non-goals.
|
|
224
|
+
- [Artifacts](docs/artifacts.md): JSON report contracts.
|
|
225
|
+
- [GitHub workflow examples](docs/github-actions.md): CLI-based CI examples.
|
|
226
|
+
- [Candidate evidence](docs/candidate-evidence-ci.md): verify generated SQL.
|
|
227
|
+
- [Rewrite-policy gym](docs/rewrite-policy-gym.md): corpus, search, and policy
|
|
228
|
+
experiments.
|
|
229
|
+
- [Performance evidence](docs/performance-evidence.md): benchmark tiers and
|
|
230
|
+
evidence limits.
|
|
231
|
+
- [Product demo](docs/product-demo.md): product-shaped demo narrative.
|
|
232
|
+
- [Roadmap](docs/roadmap.md): near-term premise/rewrite direction.
|
|
233
|
+
- Solver notes: [SQLSolver](docs/sqlsolver-spike.md),
|
|
234
|
+
[QED](docs/qed-spike.md), [VeriEQL](docs/verieql-spike.md).
|
|
235
|
+
|
|
236
|
+
## Public v0 Status
|
|
237
|
+
|
|
238
|
+
This is an alpha research/prototype release. The useful public artifact is a
|
|
239
|
+
reproducible verified-rewrite workbench, not a mature optimizer. If you try it
|
|
240
|
+
on a real dbt project, start with `qseal dbt intake` and share the redacted
|
|
241
|
+
artifact before sharing source SQL.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
FROM ubuntu:22.04
|
|
2
|
+
|
|
3
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
|
4
|
+
|
|
5
|
+
RUN apt-get update \
|
|
6
|
+
&& apt-get install -y --no-install-recommends \
|
|
7
|
+
ca-certificates \
|
|
8
|
+
curl \
|
|
9
|
+
file \
|
|
10
|
+
openjdk-17-jdk \
|
|
11
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
12
|
+
|
|
13
|
+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
14
|
+
|
|
15
|
+
ENV PATH="/root/.local/bin:${PATH}"
|
|
16
|
+
ENV UV_LINK_MODE=copy
|
|
17
|
+
ENV UV_PROJECT_ENVIRONMENT=/tmp/qseal-venv
|
|
18
|
+
ENV UV_CACHE_DIR=/tmp/qseal-uv-cache
|
|
19
|
+
|
|
20
|
+
WORKDIR /sqlsolver
|