sf-synth 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. sf_synth-0.1.0/.gitignore +218 -0
  2. sf_synth-0.1.0/CHANGELOG.md +25 -0
  3. sf_synth-0.1.0/LICENSE +21 -0
  4. sf_synth-0.1.0/PKG-INFO +288 -0
  5. sf_synth-0.1.0/README.md +250 -0
  6. sf_synth-0.1.0/examples/ecommerce.yaml +221 -0
  7. sf_synth-0.1.0/examples/multi_schema.yaml +272 -0
  8. sf_synth-0.1.0/examples/selfref_employees.yaml +166 -0
  9. sf_synth-0.1.0/pyproject.toml +113 -0
  10. sf_synth-0.1.0/src/sf_synth/__init__.py +21 -0
  11. sf_synth-0.1.0/src/sf_synth/backend.py +283 -0
  12. sf_synth-0.1.0/src/sf_synth/cli.py +394 -0
  13. sf_synth-0.1.0/src/sf_synth/config.py +268 -0
  14. sf_synth-0.1.0/src/sf_synth/dag.py +339 -0
  15. sf_synth-0.1.0/src/sf_synth/discovery.py +475 -0
  16. sf_synth-0.1.0/src/sf_synth/engine.py +621 -0
  17. sf_synth-0.1.0/src/sf_synth/errors.py +58 -0
  18. sf_synth-0.1.0/src/sf_synth/generators/__init__.py +24 -0
  19. sf_synth-0.1.0/src/sf_synth/generators/base.py +95 -0
  20. sf_synth-0.1.0/src/sf_synth/generators/distribution.py +273 -0
  21. sf_synth-0.1.0/src/sf_synth/generators/faker_udf.py +212 -0
  22. sf_synth-0.1.0/src/sf_synth/generators/sql.py +255 -0
  23. sf_synth-0.1.0/src/sf_synth/ri.py +381 -0
  24. sf_synth-0.1.0/src/sf_synth/semantic.py +289 -0
  25. sf_synth-0.1.0/src/sf_synth/stats.py +372 -0
  26. sf_synth-0.1.0/tests/__init__.py +1 -0
  27. sf_synth-0.1.0/tests/conftest.py +80 -0
  28. sf_synth-0.1.0/tests/integration/__init__.py +1 -0
  29. sf_synth-0.1.0/tests/integration/test_snowflake.py +125 -0
  30. sf_synth-0.1.0/tests/unit/__init__.py +1 -0
  31. sf_synth-0.1.0/tests/unit/test_config.py +282 -0
  32. sf_synth-0.1.0/tests/unit/test_dag.py +302 -0
  33. sf_synth-0.1.0/tests/unit/test_semantic.py +135 -0
@@ -0,0 +1,218 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+ # Temporary file for partial code execution
204
+ tempCodeRunnerFile.py
205
+
206
+ # Ruff stuff:
207
+ .ruff_cache/
208
+
209
+ # PyPI configuration file
210
+ .pypirc
211
+
212
+ # Marimo
213
+ marimo/_static/
214
+ marimo/_lsp/
215
+ __marimo__/
216
+
217
+ # Streamlit
218
+ .streamlit/secrets.toml
@@ -0,0 +1,25 @@
1
+ # Changelog
2
+
3
+ All notable changes to sf-synth are documented here.
4
+ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
5
+
6
+ ## [0.1.0] - 2026-05-08
7
+
8
+ ### Added
9
+ - Snowpark-first execution engine — data is generated entirely inside Snowflake.
10
+ - Auto-discovery of tables, columns, types, PK/FK/UNIQUE/NOT NULL from `INFORMATION_SCHEMA`.
11
+ - DAG-based generation order using `networkx` topological sort.
12
+ - Self-referential FK support via two-pass generation (insert NULL → UPDATE).
13
+ - Distribution-preserving generators using `APPROX_TOP_K`, `APPROX_PERCENTILE`, and `HLL`.
14
+ - SQL-native generators: `seq`, `uniform`, `choice`, `range`, `regex`.
15
+ - Faker UDF generators for rich fake data (email, name, address, phone, etc.).
16
+ - Zipf-weighted FK sampling for realistic skewed distributions.
17
+ - Pydantic v2 configuration with YAML loader and strict validation.
18
+ - Column-name semantic type inference (80+ patterns).
19
+ - Typer CLI with `discover`, `plan`, `generate`, and `clean` commands.
20
+ - `plan` command shows generation order, row counts, and byte estimates without writing.
21
+ - `--seed` flag for fully deterministic output.
22
+ - Multi-schema support: FK references across schemas within the same database.
23
+ - Backend abstraction (`Backend` ABC + `SnowparkBackend` implementation).
24
+ - Unit tests and opt-in Snowflake integration tests.
25
+ - Example configs: e-commerce, self-referential employees, multi-schema enterprise.
sf_synth-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ayush Pareek
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,288 @@
1
+ Metadata-Version: 2.4
2
+ Name: sf-synth
3
+ Version: 0.1.0
4
+ Summary: High-fidelity synthetic data generation for Snowflake
5
+ Project-URL: Homepage, https://github.com/apareek/snowflake-synthesizer
6
+ Project-URL: Repository, https://github.com/apareek/snowflake-synthesizer
7
+ Project-URL: Bug Tracker, https://github.com/apareek/snowflake-synthesizer/issues
8
+ Project-URL: Changelog, https://github.com/apareek/snowflake-synthesizer/blob/main/CHANGELOG.md
9
+ Author-email: Apareek <ayushpareek1608@gmail.com>
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: data-generation,faker,snowflake,snowpark,synthetic-data
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Requires-Python: >=3.9
22
+ Requires-Dist: faker>=18.0.0
23
+ Requires-Dist: networkx>=3.0
24
+ Requires-Dist: pydantic>=2.0.0
25
+ Requires-Dist: pyyaml>=6.0
26
+ Requires-Dist: rich>=13.0.0
27
+ Requires-Dist: snowflake-connector-python>=3.0.0
28
+ Requires-Dist: snowflake-snowpark-python>=1.11.0
29
+ Requires-Dist: typer[all]>=0.9.0
30
+ Provides-Extra: dev
31
+ Requires-Dist: duckdb>=0.9.0; extra == 'dev'
32
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
33
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
34
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
35
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
36
+ Requires-Dist: types-pyyaml>=6.0.0; extra == 'dev'
37
+ Description-Content-Type: text/markdown
38
+
39
+ # sf-synth
40
+
41
+ High-fidelity synthetic data generation for Snowflake.
42
+
43
+ A Snowpark-first Python library and CLI that generates realistic synthetic data inside Snowflake using auto-discovered schema, distribution statistics, Faker-based rules, and a DAG-driven referential-integrity engine. All generation runs server-side, so PII never leaves the account.
44
+
45
+ ## Features
46
+
47
+ - **Snowpark-first execution**: Data is generated entirely within Snowflake using Snowpark. No data egress required.
48
+ - **Auto-discovery**: Automatically detects tables, columns, types, constraints (PK, FK, UNIQUE, NOT NULL) from `INFORMATION_SCHEMA`.
49
+ - **Referential integrity**: DAG-based generation ensures parent tables are populated before children. FK values are sampled from actual parent keys.
50
+ - **Self-referential tables**: Handles self-referential FKs (e.g., `employees.manager_id → employees.id`) via two-pass generation.
51
+ - **Multi-schema support**: Reference tables across different schemas within the same database (e.g., `SALES.CUSTOMERS` → `CORE.COUNTRIES`).
52
+ - **Distribution-preserving**: Sample from real column statistics (`APPROX_TOP_K`, `APPROX_PERCENTILE`, `HLL`) to preserve data distributions without exposing PII.
53
+ - **Skewed FK distributions**: Support for Zipf-weighted FK sampling (e.g., 80% of orders belong to 20% of customers).
54
+ - **Semantic inference**: Automatically infers generators based on column names (e.g., `email`, `phone`, `created_at`).
55
+ - **Deterministic output**: Seed-based generation for reproducible results.
56
+ - **YAML configuration**: Simple, validated config with Pydantic.
57
+
58
+ ## Installation
59
+
60
+ ```bash
61
+ pip install sf-synth
62
+ ```
63
+
64
+ Or install from source:
65
+
66
+ ```bash
67
+ git clone https://github.com/apareek/snowflake-synthesizer.git
68
+ cd snowflake-synthesizer
69
+ pip install -e ".[dev]"
70
+ ```
71
+
72
+ ## Quick Start
73
+
74
+ ### 1. Discover your schema
75
+
76
+ Generate a starter config by discovering your existing Snowflake schema:
77
+
78
+ ```bash
79
+ sf-synth discover MY_DATABASE --output config.yaml
80
+ ```
81
+
82
+ ### 2. Edit the config
83
+
84
+ Customize row counts, add generators, and define relationships:
85
+
86
+ ```yaml
87
+ defaults:
88
+ seed: 42
89
+ database: MY_DATABASE
90
+ schema: PUBLIC
91
+
92
+ tables:
93
+ - name: CUSTOMERS
94
+ rows: 10000
95
+ columns:
96
+ EMAIL:
97
+ generator: faker
98
+ provider: email
99
+ unique: true
100
+ MEMBERSHIP:
101
+ generator: choice
102
+ values: [Gold, Silver, Bronze]
103
+ weights: [0.1, 0.3, 0.6]
104
+
105
+ - name: ORDERS
106
+ rows: 50000
107
+ relationships:
108
+ - column: CUSTOMER_ID
109
+ references: CUSTOMERS.ID
110
+ skew: zipf
111
+ ```
112
+
113
+ ### 3. Preview the plan
114
+
115
+ See the generation order and dependencies without executing:
116
+
117
+ ```bash
118
+ sf-synth plan config.yaml
119
+ ```
120
+
121
+ ### 4. Generate data
122
+
123
+ ```bash
124
+ sf-synth generate config.yaml
125
+ ```
126
+
127
+ ### 5. Clean up
128
+
129
+ Remove temporary tables created during generation:
130
+
131
+ ```bash
132
+ sf-synth clean config.yaml
133
+ ```
134
+
135
+ ## Configuration Reference
136
+
137
+ ### Defaults
138
+
139
+ ```yaml
140
+ defaults:
141
+ seed: 42 # Random seed for reproducibility
142
+ locale: en_US # Faker locale
143
+ database: MY_DB # Default database
144
+ schema: PUBLIC # Default schema
145
+ null_ratio: 0.0 # Default null ratio for all columns
146
+ ```
147
+
148
+ ### Generator Types
149
+
150
+ | Generator | Description | Required Parameters |
151
+ |-----------|-------------|---------------------|
152
+ | `seq` | Sequential integers | `start`, `step` |
153
+ | `uniform` | Uniform random numbers | `min_value`, `max_value` |
154
+ | `choice` | Random selection from list | `values`, `weights` (optional) |
155
+ | `range` | Values in numeric/date range | `min_value`, `max_value` |
156
+ | `faker` | Faker provider | `provider`, `locale` (optional) |
157
+ | `distribution` | Sample from source column stats | `source` (FQN: DB.SCHEMA.TABLE.COL) |
158
+ | `regex` | Pattern-based strings | `pattern` |
159
+
160
+ ### Faker Providers
161
+
162
+ Common providers: `email`, `name`, `first_name`, `last_name`, `phone_number`, `address`, `city`, `state`, `zipcode`, `country`, `company`, `job`, `date`, `date_time`, `uuid4`, `url`, `ipv4`, `ssn`, `credit_card_number`.
163
+
164
+ ### Relationships
165
+
166
+ ```yaml
167
+ relationships:
168
+ - column: CUSTOMER_ID # FK column in this table
169
+ references: CUSTOMERS.ID # Parent table.column
170
+ null_ratio: 0.05 # 5% null FKs
171
+ skew: zipf # Distribution: uniform or zipf
172
+ skew_param: 1.5 # Zipf exponent (higher = more skewed)
173
+ ```
174
+
175
+ ## Python API
176
+
177
+ ```python
178
+ from sf_synth import SynthConfig, SynthEngine, discover_schema
179
+ from sf_synth.backend import SnowparkBackend
180
+
181
+ # Connect to Snowflake
182
+ backend = SnowparkBackend(connection_name="my_connection")
183
+ backend.connect()
184
+
185
+ # Discover schema
186
+ schema = backend.discover_schema("MY_DATABASE")
187
+
188
+ # Load config
189
+ from sf_synth.config import load_config
190
+ config = load_config("config.yaml")
191
+
192
+ # Generate
193
+ engine = SynthEngine(backend.session, config, schema_model=schema)
194
+ result = engine.generate()
195
+
196
+ print(f"Generated {result.total_rows} rows in {result.total_elapsed_seconds:.2f}s")
197
+
198
+ # Cleanup
199
+ engine.cleanup()
200
+ backend.disconnect()
201
+ ```
202
+
203
+ ## Examples
204
+
205
+ The `examples/` directory contains ready-to-use configurations:
206
+
207
+ | Example | Description |
208
+ |---------|-------------|
209
+ | [`ecommerce.yaml`](examples/ecommerce.yaml) | E-commerce schema with customers, products, orders, and reviews. Demonstrates FK relationships, Zipf-skewed distributions, and various generators. |
210
+ | [`selfref_employees.yaml`](examples/selfref_employees.yaml) | HR schema with self-referential `manager_id` FK. Shows how sf-synth handles circular references via two-pass generation. |
211
+ | [`multi_schema.yaml`](examples/multi_schema.yaml) | Enterprise schema spanning CORE, HR, SALES, and FINANCE schemas. Demonstrates cross-schema FK relationships within a single database. |
212
+
213
+ ## Architecture
214
+
215
+ ```
216
+ ┌─────────────┐ ┌──────────────┐ ┌─────────────┐
217
+ │ CLI │────▶│ Config │────▶│ Discovery │
218
+ │ (Typer) │ │ (Pydantic) │ │ (INFO_SCH) │
219
+ └─────────────┘ └──────────────┘ └─────────────┘
220
+ │ │
221
+ ▼ ▼
222
+ ┌─────────────┐ ┌─────────────┐
223
+ │ DAG Builder│────▶│ Schema │
224
+ │ (networkx) │ │ Model │
225
+ └─────────────┘ └─────────────┘
226
+
227
+
228
+ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
229
+ │ Stats │────▶│ Engine │────▶│ RI Manager │
230
+ │ Sampler │ │ (Snowpark) │ │ (Parent Keys│
231
+ │ (APPROX_*) │ └─────────────┘ └─────────────┘
232
+ └─────────────┘ │
233
+
234
+ ┌─────────────┐
235
+ │ Snowflake │
236
+ │ Tables │
237
+ └─────────────┘
238
+ ```
239
+
240
+ ## Connection Configuration
241
+
242
+ sf-synth uses standard Snowflake connection methods:
243
+
244
+ 1. **Named connection** (recommended): `~/.snowflake/connections.toml`
245
+ 2. **Environment variables**: `SNOWFLAKE_ACCOUNT`, `SNOWFLAKE_USER`, etc.
246
+ 3. **CLI parameters**: `--connection`, `--account`, etc.
247
+
248
+ Example `~/.snowflake/connections.toml`:
249
+
250
+ ```toml
251
+ [my_connection]
252
+ account = "myaccount"
253
+ user = "myuser"
254
+ authenticator = "externalbrowser"
255
+ database = "MY_DB"
256
+ schema = "PUBLIC"
257
+ warehouse = "COMPUTE_WH"
258
+ ```
259
+
260
+ ## Performance Notes
261
+
262
+ - **SQL-first generators** (seq, uniform, choice, range) are fast and scale to billions of rows.
263
+ - **Faker UDFs** are slower due to Python UDF overhead. Use them only when SQL alternatives don't exist.
264
+ - **Distribution sampling** requires one-time stats queries per column but generates data efficiently.
265
+ - For very large tables (>100M rows), consider chunked generation or Snowflake-native `GENERATOR()` patterns.
266
+
267
+ ## Development
268
+
269
+ ```bash
270
+ # Install dev dependencies
271
+ pip install -e ".[dev]"
272
+
273
+ # Run tests
274
+ pytest tests/unit/
275
+
276
+ # Run integration tests (requires Snowflake credentials)
277
+ SF_SYNTH_INTEGRATION_TESTS=1 pytest tests/integration/
278
+
279
+ # Lint
280
+ ruff check src/ tests/
281
+
282
+ # Type check
283
+ mypy src/
284
+ ```
285
+
286
+ ## License
287
+
288
+ MIT License. See [LICENSE](LICENSE) for details.