PyPI - dbt-polyglot - Versions diffs - 0.1.0__tar.gz - Mend

dbt-polyglot 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

dbt_polyglot-0.1.0/CHANGELOG.md +25 -0
dbt_polyglot-0.1.0/LICENSE +202 -0
dbt_polyglot-0.1.0/MANIFEST.in +5 -0
dbt_polyglot-0.1.0/PKG-INFO +247 -0
dbt_polyglot-0.1.0/README.md +222 -0
dbt_polyglot-0.1.0/dbt_polyglot.pth +1 -0
dbt_polyglot-0.1.0/pyproject.toml +40 -0
dbt_polyglot-0.1.0/setup.cfg +4 -0
dbt_polyglot-0.1.0/setup.py +21 -0
dbt_polyglot-0.1.0/src/dbt_polyglot/__init__.py +27 -0
dbt_polyglot-0.1.0/src/dbt_polyglot/fixups.py +31 -0
dbt_polyglot-0.1.0/src/dbt_polyglot/transpile.py +56 -0
dbt_polyglot-0.1.0/src/dbt_polyglot.egg-info/PKG-INFO +247 -0
dbt_polyglot-0.1.0/src/dbt_polyglot.egg-info/SOURCES.txt +17 -0
dbt_polyglot-0.1.0/src/dbt_polyglot.egg-info/dependency_links.txt +1 -0
dbt_polyglot-0.1.0/src/dbt_polyglot.egg-info/requires.txt +5 -0
dbt_polyglot-0.1.0/src/dbt_polyglot.egg-info/top_level.txt +1 -0
dbt_polyglot-0.1.0/tests/__init__.py +0 -0
dbt_polyglot-0.1.0/tests/test_transpile.py +42 -0

dbt_polyglot-0.1.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,25 @@
+# Changelog
+All notable changes to `dbt-polyglot` are documented here. Format loosely follows
+[Keep a Changelog](https://keepachangelog.com/); this project uses [SemVer](https://semver.org/).
+## [0.1.0] — Unreleased
+### Added
+Initial release (as `dbt-polyglot`).
+- `dbt-polyglot` - A standard src-layout
+  (`src/dbt_polyglot/`): split into `transpile` (the compile-phase patch) and `fixups` (the
+  `SPARK_FIXUPS` registry), with import-time activation in `__init__`.
+- Compile-phase transpile: wraps `dbt.compilation.Compiler._compile_code` to translate each opted-in
+  model's SQL from a source dialect to Spark via `sqlglot` (`parse → fix-ups → generate`), before dbt
+  wraps it in materialization DDL. Opt in with `+transpile_from: <dialect>` in dbt config; no model edits.
+- **Spark-output fix-up layer** (`SPARK_FIXUPS`): repairs sqlglot output that Spark's real parser rejects.
+  First transform rewrites quantified-subquery comparisons (`x <> ALL (subq)` / `x = ANY (subq)`) back to
+  `NOT x IN (subq)` / `x IN (subq)`. Extensible registry.
+- Fail-soft: any transpile error / empty / multi-statement output logs a WARNING and passes the original
+  SQL through unchanged — never crashes a compile, never silently emits a wrong result.
+- Pretty-printed output; no-op when `transpile_from` is unset or equals the target dialect.
+### Notes
+- Patches a dbt-core private method (`_compile_code`); import-guarded to fail open. Pin a supported
+  dbt-core range and re-verify on major dbt upgrades.

dbt_polyglot-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,202 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [2026] [Saket Kumar]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

dbt_polyglot-0.1.0/MANIFEST.in ADDED Viewed

@@ -0,0 +1,5 @@
+include dbt_polyglot.pth
+include LICENSE
+include README.md
+include CHANGELOG.md
+recursive-include tests *.py

dbt_polyglot-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,247 @@
+Metadata-Version: 2.4
+Name: dbt-polyglot
+Version: 0.1.0
+Summary: Run any-dialect dbt models on Spark unchanged — transpiles each model's SQL to Spark at dbt compile time via sqlglot.
+Author-email: Saket Kumar <kumar.saket0021@gmail.com>
+License-Expression: Apache-2.0
+Project-URL: Homepage, https://github.com/Saketkr21/dbt-polyglot
+Project-URL: Repository, https://github.com/Saketkr21/dbt-polyglot
+Project-URL: Issues, https://github.com/Saketkr21/dbt-polyglot/issues
+Keywords: dbt,spark,sqlglot,snowflake,sql,transpile,dialect,polyglot
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: SQL
+Classifier: Topic :: Database
+Classifier: Topic :: Software Development :: Code Generators
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: sqlglot>=20.0
+Requires-Dist: dbt-core>=1.5
+Provides-Extra: test
+Requires-Dist: pytest; extra == "test"
+Dynamic: license-file
+# dbt-polyglot
+**Run a dbt project written in another SQL dialect (Snowflake, BigQuery, Redshift, …) on
+Spark — unchanged.** Each model's SQL is transpiled to Spark with
+[`sqlglot`](https://github.com/tobikodata/sqlglot) at dbt's **compile phase**, so the SQL
+dbt actually executes (and what lands in `target/compiled/`) is already Spark.
+The only changes are configuration — your model `.sql` files are never edited. Drop the
+package into any existing dbt repo, point `profiles.yml` at Spark, declare the source
+dialect in `dbt_project.yml`, and `dbt build`.
+> Why this exists: Spark has no `QUALIFY` clause (`[PARSE_SYNTAX_ERROR] … near 'QUALIFY'`),
+> plus dozens of smaller dialect gaps (`IFF`, `NVL`, `::` casts, `DATEADD`, null ordering, …).
+> A portable/Snowflake-style model fails on Spark until its SQL is translated. This package
+> does that translation transparently, in-place, at compile time.
+---
+## Install
+It is a **normal Python package** — install it into the same virtualenv your `dbt` runs in.
+Installation auto-activates the patch (via a `.pth` file that imports the module on
+interpreter start-up; see [Installation: why pip, not `dbt deps`](#installation-why-pip-not-dbt-deps)).
+```bash
+pip install dbt-polyglot
+```
+From a git checkout (bleeding edge):
+```bash
+pip install "git+https://github.com/SaketKumar/dbt-polyglot.git"
+```
+Local / editable (developing the package):
+```bash
+pip install -e path/to/dbt-polyglot
+```
+You also need a Spark adapter for dbt (this package does not pull one in, so you can choose
+your connection method):
+```bash
+pip install "dbt-spark[PyHive]"     # Thrift/HiveServer2, used in the examples below
+```
+---
+## Configure (the only changes you make)
+### 1. `profiles.yml` — point the output at Spark
+```yaml
+your_profile:
+  target: dev
+  outputs:
+    dev:
+      type: spark
+      method: thrift
+      host: "{{ env_var('DBT_SPARK_HOST', 'localhost') }}"
+      port: "{{ env_var('DBT_SPARK_PORT', 10000) | int }}"
+      schema: analytics
+```
+### 2. `dbt_project.yml` — declare your models' source dialect
+```yaml
+models:
+  your_project:
+    +transpile_from: snowflake     # the dialect your models are written in
+    # +transpile_to: spark         # optional, default 'spark'
+```
+`transpile_from` accepts **any** dialect `sqlglot` understands — `snowflake`, `bigquery`,
+`redshift`, `tsql`, `postgres`, `duckdb`, `presto`, `trino`, … `transpile_to` defaults to
+`spark` and rarely needs changing.
+You can scope it to a subtree (`models.your_project.staging.+transpile_from: …`) or override
+it per model — a per-model `config` beats the project default:
+```sql
+-- models/marts/latest_order.sql  (written in Snowflake SQL, runs on Spark)
+{{ config(materialized='table', transpile_from='snowflake') }}
+select *
+from {{ ref('orders') }}
+qualify row_number() over (partition by customer_id order by ordered_at desc) = 1
+```
+That's it. `dbt build` now runs your existing models on Spark, no model edits.
+---
+## How it works
+At dbt **compile**, the package wraps `dbt.compilation.Compiler._compile_code` and runs an
+extra step on each opted-in model's compiled SQL body:
+```
+parse(read=transpile_from)  →  apply SPARK_FIXUPS  →  generate(transpile_to, pretty=True)
+```
+Because the rewrite happens on the model body **before** dbt wraps it in the materialization
+DDL (`create table … as …`), both `target/compiled/` and the SQL sent to Spark are pure
+Spark — there is no mixed-dialect string and no separate output directory.
+### The fix-up layer (what makes it trustable)
+`sqlglot`'s output is occasionally valid in *its* model of Spark but rejected by Spark's
+**real** parser. The classic case: `x NOT IN (subquery)`, which `sqlglot`'s Snowflake reader
+canonicalizes to the **unsupported** `x <> ALL (subquery)`. The `SPARK_FIXUPS` registry is a
+list of small AST transforms applied to the parsed tree before Spark SQL is generated; the
+first one rewrites quantified-subquery comparisons (`<> ALL` / `= ANY (subq)`) back to
+`NOT x IN` / `x IN (subq)`. The registry is extensible — one EXPLAIN-verified transform per
+gap discovered.
+### Trust model — verified, or fails loud (never silently wrong)
+A model is either converted to **valid Spark SQL** or it **fails loudly** with a clear
+dbt/Spark error naming the model. It never silently emits a wrong result from an
+un-converted construct:
+- **Fail-soft + loud.** If `sqlglot` can't parse the SQL as the source dialect, or produces
+  empty/multi-statement output, the patch logs a `WARNING` (visible in the dbt run) and
+  passes the **original SQL through unchanged**. Spark then either runs it (it was already
+  valid) or rejects it loudly — so the failure surfaces, it is never hidden.
+To certify a whole repo **upfront** — before a heavy run — use dbt's own native validation.
+No extra tooling: dbt already runs SQL through your `profiles.yml` adapter, against whatever
+warehouse you target.
+```bash
+dbt build --empty              # build every model with 0 input rows (DAG-ordered)
+dbt build --empty --select marts.*   # any dbt selector works
+dbt show --limit 0 -s my_model # read-only: validate the SELECT without materializing
+```
+`--empty` limits every `ref`/`source` to zero rows, so dbt executes each model's real SQL
+against the warehouse — moving no data — and **fails loudly, naming the model**, if the
+transpiled SQL is invalid. Because it builds in dependency order, there is no "upstream not
+built" ambiguity. That makes `dbt build --empty` a drop-in CI gate (it exits non-zero on the
+first invalid model). `dbt show --limit 0` is the non-destructive variant when the target
+role can't create objects.
+### Scope
+Every opted-in model is transpiled — the full `sqlglot` breadth (`IFF`→`IF`, `NVL`→`COALESCE`,
+`::`→`CAST`, `DATEADD`→`DATE_ADD`, `QUALIFY`→windowed subquery, …). To transpile only part of a
+project, scope `+transpile_from` to a folder/model subtree (or set it per model) — the dbt-native
+way — rather than a global on/off.
+### No-op guarantee
+If `transpile_from` is unset, or equals `transpile_to` (you're already writing Spark SQL),
+the model is **never touched** — `sqlglot` is not even called and nothing is reformatted.
+### A note on `NULLS LAST` in the output (intentional)
+Snowflake and Spark have **opposite** default null ordering (Snowflake sorts NULLs largest →
+last; Spark sorts them smallest → first). When translating a Snowflake `ORDER BY x`,
+`sqlglot` appends an explicit `… NULLS LAST` to **preserve Snowflake semantics** — without
+it, a `QUALIFY ROW_NUMBER() … = 1` top-N pick could choose a different row. It is added only
+on a true cross-dialect translation, and is semantically required — do not strip it.
+---
+## Installation: why `pip`, not `dbt deps`
+**`dbt deps` cannot install this — you must `pip install` it.** They do different things:
+- **`dbt deps`** installs **dbt packages**: bundles of dbt *macros, models, seeds, and
+  tests* (the things listed in `packages.yml` / `dependencies.yml`). It pulls SQL/Jinja
+  assets into `dbt_packages/` and **never installs or runs Python code**.
+- **`dbt-polyglot`** is a **Python package**. It works by monkeypatching a dbt-core
+  function at runtime, and it activates through a `.pth` file that Python executes on
+  interpreter start-up. Both of those are Python-installer concerns — only `pip` (or `uv`,
+  `poetry`, etc.) places a `.pth` into `site-packages` and registers the dependency.
+So it is installed exactly like `dbt-core` or an adapter, into the same environment as your
+dbt. It does not appear in `packages.yml`.
+---
+## Package contents
+A standard src-layout package — `src/dbt_polyglot/` holds the import package, plus a `.pth`
+that activates it on start-up:
+| File | Role |
+|------|------|
+| `src/dbt_polyglot/__init__.py` | Import-time activation: patches the dbt Compiler. |
+| `src/dbt_polyglot/transpile.py` | The compile-phase patch (`patch_compiler`) + core `spark_safe_transpile`. |
+| `src/dbt_polyglot/fixups.py` | The `SPARK_FIXUPS` registry of AST transforms. |
+| `dbt_polyglot.pth` | One line (`import dbt_polyglot`); auto-activates on start-up. Installed into `site-packages` by the `build_py` shim in `setup.py`. |
+| `pyproject.toml` / `setup.py` | PEP 517 metadata; `setup.py` exists only to place the `.pth` into purelib. |
+| `LICENSE` | Apache-2.0. |
+This package is intentionally limited to **transpilation**. Validating the result is left to
+dbt's native `dbt build --empty` (see [Trust model](#trust-model--verified-or-fails-loud-never-silently-wrong)
+above); catalog routing (mapping `file_format` → a Spark catalog) and seed re-runnability are
+**separate concerns** and are not bundled here.
+---
+## Compatibility & caveats
+- **dbt-core private method.** The patch wraps `dbt.compilation.Compiler._compile_code`, a
+  **private** dbt-core method. It forwards `*args/**kwargs` to tolerate signature drift and
+  is fully import-guarded (if dbt-core or `sqlglot` aren't importable, or the seam moves, the
+  patch does nothing rather than breaking the interpreter). Still, **pin a supported dbt-core
+  range** when depending on this in production, and re-verify after major dbt upgrades.
+- **`sqlglot` coverage.** `sqlglot` maps a large surface but not everything. Exotic dialect
+  features — Snowflake `LATERAL FLATTEN`, `VARIANT`/`OBJECT`/`ARRAY` semantics, `:` path
+  access, `LISTAGG`, and similar — may not translate cleanly. Those surface via the fail-soft
+  WARNING and `dbt build --empty`, by design, rather than silently.
+- **Self-contained.** The module imports nothing from any host project, so it can be lifted
+  into its own repo unchanged.
+## License
+Apache-2.0 — see [LICENSE](LICENSE).

dbt_polyglot-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,222 @@
+# dbt-polyglot
+**Run a dbt project written in another SQL dialect (Snowflake, BigQuery, Redshift, …) on
+Spark — unchanged.** Each model's SQL is transpiled to Spark with
+[`sqlglot`](https://github.com/tobikodata/sqlglot) at dbt's **compile phase**, so the SQL
+dbt actually executes (and what lands in `target/compiled/`) is already Spark.
+The only changes are configuration — your model `.sql` files are never edited. Drop the
+package into any existing dbt repo, point `profiles.yml` at Spark, declare the source
+dialect in `dbt_project.yml`, and `dbt build`.
+> Why this exists: Spark has no `QUALIFY` clause (`[PARSE_SYNTAX_ERROR] … near 'QUALIFY'`),
+> plus dozens of smaller dialect gaps (`IFF`, `NVL`, `::` casts, `DATEADD`, null ordering, …).
+> A portable/Snowflake-style model fails on Spark until its SQL is translated. This package
+> does that translation transparently, in-place, at compile time.
+---
+## Install
+It is a **normal Python package** — install it into the same virtualenv your `dbt` runs in.
+Installation auto-activates the patch (via a `.pth` file that imports the module on
+interpreter start-up; see [Installation: why pip, not `dbt deps`](#installation-why-pip-not-dbt-deps)).
+```bash
+pip install dbt-polyglot
+```
+From a git checkout (bleeding edge):
+```bash
+pip install "git+https://github.com/SaketKumar/dbt-polyglot.git"
+```
+Local / editable (developing the package):
+```bash
+pip install -e path/to/dbt-polyglot
+```
+You also need a Spark adapter for dbt (this package does not pull one in, so you can choose
+your connection method):
+```bash
+pip install "dbt-spark[PyHive]"     # Thrift/HiveServer2, used in the examples below
+```
+---
+## Configure (the only changes you make)
+### 1. `profiles.yml` — point the output at Spark
+```yaml
+your_profile:
+  target: dev
+  outputs:
+    dev:
+      type: spark
+      method: thrift
+      host: "{{ env_var('DBT_SPARK_HOST', 'localhost') }}"
+      port: "{{ env_var('DBT_SPARK_PORT', 10000) | int }}"
+      schema: analytics
+```
+### 2. `dbt_project.yml` — declare your models' source dialect
+```yaml
+models:
+  your_project:
+    +transpile_from: snowflake     # the dialect your models are written in
+    # +transpile_to: spark         # optional, default 'spark'
+```
+`transpile_from` accepts **any** dialect `sqlglot` understands — `snowflake`, `bigquery`,
+`redshift`, `tsql`, `postgres`, `duckdb`, `presto`, `trino`, … `transpile_to` defaults to
+`spark` and rarely needs changing.
+You can scope it to a subtree (`models.your_project.staging.+transpile_from: …`) or override
+it per model — a per-model `config` beats the project default:
+```sql
+-- models/marts/latest_order.sql  (written in Snowflake SQL, runs on Spark)
+{{ config(materialized='table', transpile_from='snowflake') }}
+select *
+from {{ ref('orders') }}
+qualify row_number() over (partition by customer_id order by ordered_at desc) = 1
+```
+That's it. `dbt build` now runs your existing models on Spark, no model edits.
+---
+## How it works
+At dbt **compile**, the package wraps `dbt.compilation.Compiler._compile_code` and runs an
+extra step on each opted-in model's compiled SQL body:
+```
+parse(read=transpile_from)  →  apply SPARK_FIXUPS  →  generate(transpile_to, pretty=True)
+```
+Because the rewrite happens on the model body **before** dbt wraps it in the materialization
+DDL (`create table … as …`), both `target/compiled/` and the SQL sent to Spark are pure
+Spark — there is no mixed-dialect string and no separate output directory.
+### The fix-up layer (what makes it trustable)
+`sqlglot`'s output is occasionally valid in *its* model of Spark but rejected by Spark's
+**real** parser. The classic case: `x NOT IN (subquery)`, which `sqlglot`'s Snowflake reader
+canonicalizes to the **unsupported** `x <> ALL (subquery)`. The `SPARK_FIXUPS` registry is a
+list of small AST transforms applied to the parsed tree before Spark SQL is generated; the
+first one rewrites quantified-subquery comparisons (`<> ALL` / `= ANY (subq)`) back to
+`NOT x IN` / `x IN (subq)`. The registry is extensible — one EXPLAIN-verified transform per
+gap discovered.
+### Trust model — verified, or fails loud (never silently wrong)
+A model is either converted to **valid Spark SQL** or it **fails loudly** with a clear
+dbt/Spark error naming the model. It never silently emits a wrong result from an
+un-converted construct:
+- **Fail-soft + loud.** If `sqlglot` can't parse the SQL as the source dialect, or produces
+  empty/multi-statement output, the patch logs a `WARNING` (visible in the dbt run) and
+  passes the **original SQL through unchanged**. Spark then either runs it (it was already
+  valid) or rejects it loudly — so the failure surfaces, it is never hidden.
+To certify a whole repo **upfront** — before a heavy run — use dbt's own native validation.
+No extra tooling: dbt already runs SQL through your `profiles.yml` adapter, against whatever
+warehouse you target.
+```bash
+dbt build --empty              # build every model with 0 input rows (DAG-ordered)
+dbt build --empty --select marts.*   # any dbt selector works
+dbt show --limit 0 -s my_model # read-only: validate the SELECT without materializing
+```
+`--empty` limits every `ref`/`source` to zero rows, so dbt executes each model's real SQL
+against the warehouse — moving no data — and **fails loudly, naming the model**, if the
+transpiled SQL is invalid. Because it builds in dependency order, there is no "upstream not
+built" ambiguity. That makes `dbt build --empty` a drop-in CI gate (it exits non-zero on the
+first invalid model). `dbt show --limit 0` is the non-destructive variant when the target
+role can't create objects.
+### Scope
+Every opted-in model is transpiled — the full `sqlglot` breadth (`IFF`→`IF`, `NVL`→`COALESCE`,
+`::`→`CAST`, `DATEADD`→`DATE_ADD`, `QUALIFY`→windowed subquery, …). To transpile only part of a
+project, scope `+transpile_from` to a folder/model subtree (or set it per model) — the dbt-native
+way — rather than a global on/off.
+### No-op guarantee
+If `transpile_from` is unset, or equals `transpile_to` (you're already writing Spark SQL),
+the model is **never touched** — `sqlglot` is not even called and nothing is reformatted.
+### A note on `NULLS LAST` in the output (intentional)
+Snowflake and Spark have **opposite** default null ordering (Snowflake sorts NULLs largest →
+last; Spark sorts them smallest → first). When translating a Snowflake `ORDER BY x`,
+`sqlglot` appends an explicit `… NULLS LAST` to **preserve Snowflake semantics** — without
+it, a `QUALIFY ROW_NUMBER() … = 1` top-N pick could choose a different row. It is added only
+on a true cross-dialect translation, and is semantically required — do not strip it.
+---
+## Installation: why `pip`, not `dbt deps`
+**`dbt deps` cannot install this — you must `pip install` it.** They do different things:
+- **`dbt deps`** installs **dbt packages**: bundles of dbt *macros, models, seeds, and
+  tests* (the things listed in `packages.yml` / `dependencies.yml`). It pulls SQL/Jinja
+  assets into `dbt_packages/` and **never installs or runs Python code**.
+- **`dbt-polyglot`** is a **Python package**. It works by monkeypatching a dbt-core
+  function at runtime, and it activates through a `.pth` file that Python executes on
+  interpreter start-up. Both of those are Python-installer concerns — only `pip` (or `uv`,
+  `poetry`, etc.) places a `.pth` into `site-packages` and registers the dependency.
+So it is installed exactly like `dbt-core` or an adapter, into the same environment as your
+dbt. It does not appear in `packages.yml`.
+---
+## Package contents
+A standard src-layout package — `src/dbt_polyglot/` holds the import package, plus a `.pth`
+that activates it on start-up:
+| File | Role |
+|------|------|
+| `src/dbt_polyglot/__init__.py` | Import-time activation: patches the dbt Compiler. |
+| `src/dbt_polyglot/transpile.py` | The compile-phase patch (`patch_compiler`) + core `spark_safe_transpile`. |
+| `src/dbt_polyglot/fixups.py` | The `SPARK_FIXUPS` registry of AST transforms. |
+| `dbt_polyglot.pth` | One line (`import dbt_polyglot`); auto-activates on start-up. Installed into `site-packages` by the `build_py` shim in `setup.py`. |
+| `pyproject.toml` / `setup.py` | PEP 517 metadata; `setup.py` exists only to place the `.pth` into purelib. |
+| `LICENSE` | Apache-2.0. |
+This package is intentionally limited to **transpilation**. Validating the result is left to
+dbt's native `dbt build --empty` (see [Trust model](#trust-model--verified-or-fails-loud-never-silently-wrong)
+above); catalog routing (mapping `file_format` → a Spark catalog) and seed re-runnability are
+**separate concerns** and are not bundled here.
+---
+## Compatibility & caveats
+- **dbt-core private method.** The patch wraps `dbt.compilation.Compiler._compile_code`, a
+  **private** dbt-core method. It forwards `*args/**kwargs` to tolerate signature drift and
+  is fully import-guarded (if dbt-core or `sqlglot` aren't importable, or the seam moves, the
+  patch does nothing rather than breaking the interpreter). Still, **pin a supported dbt-core
+  range** when depending on this in production, and re-verify after major dbt upgrades.
+- **`sqlglot` coverage.** `sqlglot` maps a large surface but not everything. Exotic dialect
+  features — Snowflake `LATERAL FLATTEN`, `VARIANT`/`OBJECT`/`ARRAY` semantics, `:` path
+  access, `LISTAGG`, and similar — may not translate cleanly. Those surface via the fail-soft
+  WARNING and `dbt build --empty`, by design, rather than silently.
+- **Self-contained.** The module imports nothing from any host project, so it can be lifted
+  into its own repo unchanged.
+## License
+Apache-2.0 — see [LICENSE](LICENSE).

dbt_polyglot-0.1.0/dbt_polyglot.pth ADDED Viewed

	@@ -0,0 +1 @@
1	+ import dbt_polyglot

dbt_polyglot-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,40 @@
+[build-system]
+requires = ["setuptools>=77"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "dbt-polyglot"
+version = "0.1.0"
+description = "Run any-dialect dbt models on Spark unchanged — transpiles each model's SQL to Spark at dbt compile time via sqlglot."
+readme = "README.md"
+requires-python = ">=3.9"
+license = "Apache-2.0"
+license-files = ["LICENSE"]
+authors = [{ name = "Saket Kumar", email = "kumar.saket0021@gmail.com" }]
+keywords = ["dbt", "spark", "sqlglot", "snowflake", "sql", "transpile", "dialect", "polyglot"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: SQL",
+    "Topic :: Database",
+    "Topic :: Software Development :: Code Generators",
+]
+dependencies = [
+    "sqlglot>=20.0",
+    "dbt-core>=1.5",
+]
+[project.optional-dependencies]
+test = ["pytest"]
+[project.urls]
+Homepage = "https://github.com/Saketkr21/dbt-polyglot"
+Repository = "https://github.com/Saketkr21/dbt-polyglot"
+Issues = "https://github.com/Saketkr21/dbt-polyglot/issues"
+[tool.setuptools.packages.find]
+where = ["src"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]

dbt_polyglot-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

dbt_polyglot-0.1.0/setup.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Shim setup.py — metadata lives in pyproject.toml.
+Places ``dbt_polyglot.pth`` into the wheel's purelib (site-packages) so the patch
+auto-activates on interpreter start-up.
+"""
+import os
+import shutil
+from setuptools import setup
+from setuptools.command.build_py import build_py
+PTH = "dbt_polyglot.pth"
+class build_py_with_pth(build_py):
+    def run(self):
+        super().run()
+        shutil.copyfile(PTH, os.path.join(self.build_lib, PTH))
+setup(cmdclass={"build_py": build_py_with_pth})

dbt_polyglot-0.1.0/src/dbt_polyglot/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""dbt-polyglot — run any-dialect dbt models on Spark unchanged.
+Transpiles each opted-in model's SQL to Spark via sqlglot at dbt's compile phase.
+Install:
+    pip install dbt-polyglot
+Config (dbt_project.yml):
+    models:
+      your_project:
+        +transpile_from: snowflake
+To validate the transpiled SQL against your warehouse before a heavy run, use dbt's
+own native flag — no extra tooling needed:
+    dbt build --empty           # build every model with zero input rows
+    dbt show --limit 0 -s model # read-only: validate without materializing
+"""
+__version__ = "0.2.0"
+# Activate the compile-time transpile patch. Import-guarded so non-dbt Python is unaffected.
+try:
+    from dbt_polyglot.transpile import patch_compiler
+    patch_compiler()
+except Exception:
+    pass

dbt_polyglot-0.1.0/src/dbt_polyglot/fixups.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""Spark-output fix-up registry.
+Each entry is an (exp.Expression -> exp.Expression) transform applied (via .transform,
+bottom-up) to the parsed tree BEFORE generating Spark SQL. They repair cases where
+sqlglot's output is rejected by Spark's real parser.
+Extensible: append a transform function per gap found, EXPLAIN-verify on Spark.
+"""
+from sqlglot import exp
+def _as_subquery(node):
+    return node if isinstance(node, exp.Subquery) else exp.Subquery(this=node)
+def fixup_quantified_subquery(node):
+    """Spark has no quantified subquery comparison.
+    sqlglot's Snowflake parser canonicalizes:
+      x NOT IN (subq) -> x <> ALL (subq)
+      x IN (subq)     -> x = ANY (subq)
+    Spark rejects both. Rewrite back to NOT x IN (subq) / x IN (subq).
+    """
+    if isinstance(node, exp.NEQ) and isinstance(node.expression, exp.All):
+        return exp.Not(this=exp.In(this=node.this, query=_as_subquery(node.expression.this)))
+    if isinstance(node, exp.EQ) and isinstance(node.expression, exp.Any):
+        return exp.In(this=node.this, query=_as_subquery(node.expression.this))
+    return node
+SPARK_FIXUPS = [fixup_quantified_subquery]

dbt_polyglot-0.1.0/src/dbt_polyglot/transpile.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""Core transpile logic — parse source dialect, apply fix-ups, generate Spark SQL.
+Called at dbt compile time via the Compiler._compile_code monkeypatch.
+"""
+import sqlglot
+from dbt_polyglot.fixups import SPARK_FIXUPS
+_DEFAULT_TARGET = "spark"
+def spark_safe_transpile(code, src, dst=None):
+    """Parse as `src`, apply fix-up registry (when targeting spark), generate `dst` SQL.
+    Raises on multi-statement / empty so the caller's fail-soft kicks in.
+    """
+    dst = dst or _DEFAULT_TARGET
+    statements = sqlglot.parse(code, read=src)
+    if len(statements) != 1 or statements[0] is None:
+        raise ValueError(f"expected exactly one statement, got {len(statements)}")
+    tree = statements[0]
+    if dst == _DEFAULT_TARGET:
+        for fixup in SPARK_FIXUPS:
+            tree = tree.transform(fixup)
+    out = tree.sql(dialect=dst, pretty=True)
+    if not (out or "").strip():
+        raise ValueError("transpile produced empty SQL")
+    return out
+def patch_compiler():
+    """Monkeypatch dbt's Compiler._compile_code to transpile opted-in models."""
+    from dbt.compilation import Compiler
+    from dbt.adapters.events.logging import AdapterLogger
+    logger = AdapterLogger("dbt-polyglot")
+    orig = Compiler._compile_code
+    def _patched(self, node, manifest, extra_context=None, *args, **kwargs):
+        node = orig(self, node, manifest, extra_context, *args, **kwargs)
+        src = dst = None
+        try:
+            src = node.config.get("transpile_from")
+            dst = node.config.get("transpile_to") or _DEFAULT_TARGET
+            if not src or src == dst:
+                return node
+            node.compiled_code = spark_safe_transpile(node.compiled_code or "", src, dst)
+        except Exception as e:
+            uid = getattr(node, "unique_id", "<unknown>")
+            logger.warning(
+                f"[dbt-polyglot] could not transpile {uid} from '{src}' -> "
+                f"'{dst or _DEFAULT_TARGET}' ({type(e).__name__}: {e}); "
+                f"passing model SQL through UNCHANGED."
+            )
+        return node
+    Compiler._compile_code = _patched

dbt_polyglot-0.1.0/src/dbt_polyglot.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,247 @@
+Metadata-Version: 2.4
+Name: dbt-polyglot
+Version: 0.1.0
+Summary: Run any-dialect dbt models on Spark unchanged — transpiles each model's SQL to Spark at dbt compile time via sqlglot.
+Author-email: Saket Kumar <kumar.saket0021@gmail.com>
+License-Expression: Apache-2.0
+Project-URL: Homepage, https://github.com/Saketkr21/dbt-polyglot
+Project-URL: Repository, https://github.com/Saketkr21/dbt-polyglot
+Project-URL: Issues, https://github.com/Saketkr21/dbt-polyglot/issues
+Keywords: dbt,spark,sqlglot,snowflake,sql,transpile,dialect,polyglot
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: SQL
+Classifier: Topic :: Database
+Classifier: Topic :: Software Development :: Code Generators
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: sqlglot>=20.0
+Requires-Dist: dbt-core>=1.5
+Provides-Extra: test
+Requires-Dist: pytest; extra == "test"
+Dynamic: license-file
+# dbt-polyglot
+**Run a dbt project written in another SQL dialect (Snowflake, BigQuery, Redshift, …) on
+Spark — unchanged.** Each model's SQL is transpiled to Spark with
+[`sqlglot`](https://github.com/tobikodata/sqlglot) at dbt's **compile phase**, so the SQL
+dbt actually executes (and what lands in `target/compiled/`) is already Spark.
+The only changes are configuration — your model `.sql` files are never edited. Drop the
+package into any existing dbt repo, point `profiles.yml` at Spark, declare the source
+dialect in `dbt_project.yml`, and `dbt build`.
+> Why this exists: Spark has no `QUALIFY` clause (`[PARSE_SYNTAX_ERROR] … near 'QUALIFY'`),
+> plus dozens of smaller dialect gaps (`IFF`, `NVL`, `::` casts, `DATEADD`, null ordering, …).
+> A portable/Snowflake-style model fails on Spark until its SQL is translated. This package
+> does that translation transparently, in-place, at compile time.
+---
+## Install
+It is a **normal Python package** — install it into the same virtualenv your `dbt` runs in.
+Installation auto-activates the patch (via a `.pth` file that imports the module on
+interpreter start-up; see [Installation: why pip, not `dbt deps`](#installation-why-pip-not-dbt-deps)).
+```bash
+pip install dbt-polyglot
+```
+From a git checkout (bleeding edge):
+```bash
+pip install "git+https://github.com/SaketKumar/dbt-polyglot.git"
+```
+Local / editable (developing the package):
+```bash
+pip install -e path/to/dbt-polyglot
+```
+You also need a Spark adapter for dbt (this package does not pull one in, so you can choose
+your connection method):
+```bash
+pip install "dbt-spark[PyHive]"     # Thrift/HiveServer2, used in the examples below
+```
+---
+## Configure (the only changes you make)
+### 1. `profiles.yml` — point the output at Spark
+```yaml
+your_profile:
+  target: dev
+  outputs:
+    dev:
+      type: spark
+      method: thrift
+      host: "{{ env_var('DBT_SPARK_HOST', 'localhost') }}"
+      port: "{{ env_var('DBT_SPARK_PORT', 10000) | int }}"
+      schema: analytics
+```
+### 2. `dbt_project.yml` — declare your models' source dialect
+```yaml
+models:
+  your_project:
+    +transpile_from: snowflake     # the dialect your models are written in
+    # +transpile_to: spark         # optional, default 'spark'
+```
+`transpile_from` accepts **any** dialect `sqlglot` understands — `snowflake`, `bigquery`,
+`redshift`, `tsql`, `postgres`, `duckdb`, `presto`, `trino`, … `transpile_to` defaults to
+`spark` and rarely needs changing.
+You can scope it to a subtree (`models.your_project.staging.+transpile_from: …`) or override
+it per model — a per-model `config` beats the project default:
+```sql
+-- models/marts/latest_order.sql  (written in Snowflake SQL, runs on Spark)
+{{ config(materialized='table', transpile_from='snowflake') }}
+select *
+from {{ ref('orders') }}
+qualify row_number() over (partition by customer_id order by ordered_at desc) = 1
+```
+That's it. `dbt build` now runs your existing models on Spark, no model edits.
+---
+## How it works
+At dbt **compile**, the package wraps `dbt.compilation.Compiler._compile_code` and runs an
+extra step on each opted-in model's compiled SQL body:
+```
+parse(read=transpile_from)  →  apply SPARK_FIXUPS  →  generate(transpile_to, pretty=True)
+```
+Because the rewrite happens on the model body **before** dbt wraps it in the materialization
+DDL (`create table … as …`), both `target/compiled/` and the SQL sent to Spark are pure
+Spark — there is no mixed-dialect string and no separate output directory.
+### The fix-up layer (what makes it trustable)
+`sqlglot`'s output is occasionally valid in *its* model of Spark but rejected by Spark's
+**real** parser. The classic case: `x NOT IN (subquery)`, which `sqlglot`'s Snowflake reader
+canonicalizes to the **unsupported** `x <> ALL (subquery)`. The `SPARK_FIXUPS` registry is a
+list of small AST transforms applied to the parsed tree before Spark SQL is generated; the
+first one rewrites quantified-subquery comparisons (`<> ALL` / `= ANY (subq)`) back to
+`NOT x IN` / `x IN (subq)`. The registry is extensible — one EXPLAIN-verified transform per
+gap discovered.
+### Trust model — verified, or fails loud (never silently wrong)
+A model is either converted to **valid Spark SQL** or it **fails loudly** with a clear
+dbt/Spark error naming the model. It never silently emits a wrong result from an
+un-converted construct:
+- **Fail-soft + loud.** If `sqlglot` can't parse the SQL as the source dialect, or produces
+  empty/multi-statement output, the patch logs a `WARNING` (visible in the dbt run) and
+  passes the **original SQL through unchanged**. Spark then either runs it (it was already
+  valid) or rejects it loudly — so the failure surfaces, it is never hidden.
+To certify a whole repo **upfront** — before a heavy run — use dbt's own native validation.
+No extra tooling: dbt already runs SQL through your `profiles.yml` adapter, against whatever
+warehouse you target.
+```bash
+dbt build --empty              # build every model with 0 input rows (DAG-ordered)
+dbt build --empty --select marts.*   # any dbt selector works
+dbt show --limit 0 -s my_model # read-only: validate the SELECT without materializing
+```
+`--empty` limits every `ref`/`source` to zero rows, so dbt executes each model's real SQL
+against the warehouse — moving no data — and **fails loudly, naming the model**, if the
+transpiled SQL is invalid. Because it builds in dependency order, there is no "upstream not
+built" ambiguity. That makes `dbt build --empty` a drop-in CI gate (it exits non-zero on the
+first invalid model). `dbt show --limit 0` is the non-destructive variant when the target
+role can't create objects.
+### Scope
+Every opted-in model is transpiled — the full `sqlglot` breadth (`IFF`→`IF`, `NVL`→`COALESCE`,
+`::`→`CAST`, `DATEADD`→`DATE_ADD`, `QUALIFY`→windowed subquery, …). To transpile only part of a
+project, scope `+transpile_from` to a folder/model subtree (or set it per model) — the dbt-native
+way — rather than a global on/off.
+### No-op guarantee
+If `transpile_from` is unset, or equals `transpile_to` (you're already writing Spark SQL),
+the model is **never touched** — `sqlglot` is not even called and nothing is reformatted.
+### A note on `NULLS LAST` in the output (intentional)
+Snowflake and Spark have **opposite** default null ordering (Snowflake sorts NULLs largest →
+last; Spark sorts them smallest → first). When translating a Snowflake `ORDER BY x`,
+`sqlglot` appends an explicit `… NULLS LAST` to **preserve Snowflake semantics** — without
+it, a `QUALIFY ROW_NUMBER() … = 1` top-N pick could choose a different row. It is added only
+on a true cross-dialect translation, and is semantically required — do not strip it.
+---
+## Installation: why `pip`, not `dbt deps`
+**`dbt deps` cannot install this — you must `pip install` it.** They do different things:
+- **`dbt deps`** installs **dbt packages**: bundles of dbt *macros, models, seeds, and
+  tests* (the things listed in `packages.yml` / `dependencies.yml`). It pulls SQL/Jinja
+  assets into `dbt_packages/` and **never installs or runs Python code**.
+- **`dbt-polyglot`** is a **Python package**. It works by monkeypatching a dbt-core
+  function at runtime, and it activates through a `.pth` file that Python executes on
+  interpreter start-up. Both of those are Python-installer concerns — only `pip` (or `uv`,
+  `poetry`, etc.) places a `.pth` into `site-packages` and registers the dependency.
+So it is installed exactly like `dbt-core` or an adapter, into the same environment as your
+dbt. It does not appear in `packages.yml`.
+---
+## Package contents
+A standard src-layout package — `src/dbt_polyglot/` holds the import package, plus a `.pth`
+that activates it on start-up:
+| File | Role |
+|------|------|
+| `src/dbt_polyglot/__init__.py` | Import-time activation: patches the dbt Compiler. |
+| `src/dbt_polyglot/transpile.py` | The compile-phase patch (`patch_compiler`) + core `spark_safe_transpile`. |
+| `src/dbt_polyglot/fixups.py` | The `SPARK_FIXUPS` registry of AST transforms. |
+| `dbt_polyglot.pth` | One line (`import dbt_polyglot`); auto-activates on start-up. Installed into `site-packages` by the `build_py` shim in `setup.py`. |
+| `pyproject.toml` / `setup.py` | PEP 517 metadata; `setup.py` exists only to place the `.pth` into purelib. |
+| `LICENSE` | Apache-2.0. |
+This package is intentionally limited to **transpilation**. Validating the result is left to
+dbt's native `dbt build --empty` (see [Trust model](#trust-model--verified-or-fails-loud-never-silently-wrong)
+above); catalog routing (mapping `file_format` → a Spark catalog) and seed re-runnability are
+**separate concerns** and are not bundled here.
+---
+## Compatibility & caveats
+- **dbt-core private method.** The patch wraps `dbt.compilation.Compiler._compile_code`, a
+  **private** dbt-core method. It forwards `*args/**kwargs` to tolerate signature drift and
+  is fully import-guarded (if dbt-core or `sqlglot` aren't importable, or the seam moves, the
+  patch does nothing rather than breaking the interpreter). Still, **pin a supported dbt-core
+  range** when depending on this in production, and re-verify after major dbt upgrades.
+- **`sqlglot` coverage.** `sqlglot` maps a large surface but not everything. Exotic dialect
+  features — Snowflake `LATERAL FLATTEN`, `VARIANT`/`OBJECT`/`ARRAY` semantics, `:` path
+  access, `LISTAGG`, and similar — may not translate cleanly. Those surface via the fail-soft
+  WARNING and `dbt build --empty`, by design, rather than silently.
+- **Self-contained.** The module imports nothing from any host project, so it can be lifted
+  into its own repo unchanged.
+## License
+Apache-2.0 — see [LICENSE](LICENSE).

dbt_polyglot-0.1.0/src/dbt_polyglot.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,17 @@
+CHANGELOG.md
+LICENSE
+MANIFEST.in
+README.md
+dbt_polyglot.pth
+pyproject.toml
+setup.py
+src/dbt_polyglot/__init__.py
+src/dbt_polyglot/fixups.py
+src/dbt_polyglot/transpile.py
+src/dbt_polyglot.egg-info/PKG-INFO
+src/dbt_polyglot.egg-info/SOURCES.txt
+src/dbt_polyglot.egg-info/dependency_links.txt
+src/dbt_polyglot.egg-info/requires.txt
+src/dbt_polyglot.egg-info/top_level.txt
+tests/__init__.py
+tests/test_transpile.py

dbt_polyglot-0.1.0/src/dbt_polyglot.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

dbt_polyglot-0.1.0/src/dbt_polyglot.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,5 @@
+sqlglot>=20.0
+dbt-core>=1.5
+[test]
+pytest

dbt_polyglot-0.1.0/src/dbt_polyglot.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ dbt_polyglot

dbt_polyglot-0.1.0/tests/__init__.py ADDED Viewed

File without changes

dbt_polyglot-0.1.0/tests/test_transpile.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""Unit tests for the transpile + fix-up layer. No Spark required (pure sqlglot string checks).
+Run:  pip install -e ".[test]" && pytest
+"""
+import pytest
+from dbt_polyglot.transpile import spark_safe_transpile as transpile
+def test_not_in_subquery_is_not_emitted_as_unsupported_all():
+    out = transpile("select 1 from x where a not in (select a from y)", "snowflake", "spark")
+    assert "ALL" not in out.upper()
+    assert "NOT" in out.upper() and "IN (" in out.replace("\n", " ").upper().replace("IN(", "IN (")
+def test_eq_any_subquery_becomes_in():
+    out = transpile("select 1 from x where a = any (select a from y)", "snowflake", "spark")
+    assert "ANY" not in out.upper()
+    assert "IN" in out.upper()
+def test_qualify_is_rewritten_to_subquery():
+    out = transpile("select a from x qualify row_number() over (order by a) = 1", "snowflake", "spark")
+    assert "QUALIFY" not in out.upper()
+def test_common_snowflake_functions_translate():
+    out = transpile("select iff(a > 0, 1, 0) c, nvl(b, 'x') d, a::string e from x", "snowflake", "spark")
+    up = out.upper()
+    assert "IFF(" not in up
+    assert "::" not in out
+    assert "CAST(" in up
+def test_plain_spark_passthrough_is_valid():
+    out = transpile("select a, b from x where a = 1", "snowflake", "spark")
+    assert "SELECT" in out.upper() and "FROM X" in out.upper()
+@pytest.mark.parametrize("bad", ["", "/* only a comment */", "select 1; select 2"])
+def test_empty_or_multistatement_raises_so_failsoft_engages(bad):
+    with pytest.raises(Exception):
+        transpile(bad, "snowflake", "spark")