dbslice 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. dbslice-0.1.0/.gitignore +228 -0
  2. dbslice-0.1.0/LICENSE +21 -0
  3. dbslice-0.1.0/PKG-INFO +235 -0
  4. dbslice-0.1.0/README.md +183 -0
  5. dbslice-0.1.0/pyproject.toml +97 -0
  6. dbslice-0.1.0/src/dbslice/__init__.py +9 -0
  7. dbslice-0.1.0/src/dbslice/__main__.py +6 -0
  8. dbslice-0.1.0/src/dbslice/adapters/__init__.py +9 -0
  9. dbslice-0.1.0/src/dbslice/adapters/base.py +273 -0
  10. dbslice-0.1.0/src/dbslice/adapters/postgresql.py +722 -0
  11. dbslice-0.1.0/src/dbslice/cli.py +1473 -0
  12. dbslice-0.1.0/src/dbslice/config.py +310 -0
  13. dbslice-0.1.0/src/dbslice/config_file.py +633 -0
  14. dbslice-0.1.0/src/dbslice/constants.py +29 -0
  15. dbslice-0.1.0/src/dbslice/core/__init__.py +13 -0
  16. dbslice-0.1.0/src/dbslice/core/cycles.py +272 -0
  17. dbslice-0.1.0/src/dbslice/core/engine.py +748 -0
  18. dbslice-0.1.0/src/dbslice/core/graph.py +424 -0
  19. dbslice-0.1.0/src/dbslice/core/streaming.py +300 -0
  20. dbslice-0.1.0/src/dbslice/exceptions.py +175 -0
  21. dbslice-0.1.0/src/dbslice/input_validators.py +478 -0
  22. dbslice-0.1.0/src/dbslice/logging.py +318 -0
  23. dbslice-0.1.0/src/dbslice/models.py +227 -0
  24. dbslice-0.1.0/src/dbslice/output/__init__.py +11 -0
  25. dbslice-0.1.0/src/dbslice/output/csv_out.py +345 -0
  26. dbslice-0.1.0/src/dbslice/output/json_out.py +315 -0
  27. dbslice-0.1.0/src/dbslice/output/sql.py +407 -0
  28. dbslice-0.1.0/src/dbslice/py.typed +0 -0
  29. dbslice-0.1.0/src/dbslice/utils/__init__.py +8 -0
  30. dbslice-0.1.0/src/dbslice/utils/anonymizer.py +365 -0
  31. dbslice-0.1.0/src/dbslice/utils/connection.py +249 -0
  32. dbslice-0.1.0/src/dbslice/utils/profiling.py +308 -0
  33. dbslice-0.1.0/src/dbslice/validation.py +323 -0
  34. dbslice-0.1.0/tests/__init__.py +1 -0
  35. dbslice-0.1.0/tests/conftest.py +357 -0
  36. dbslice-0.1.0/tests/integration/IMPLEMENTATION_SUMMARY.md +383 -0
  37. dbslice-0.1.0/tests/integration/README.md +332 -0
  38. dbslice-0.1.0/tests/integration/__init__.py +1 -0
  39. dbslice-0.1.0/tests/integration/conftest.py +436 -0
  40. dbslice-0.1.0/tests/integration/test_cli_integration.py +773 -0
  41. dbslice-0.1.0/tests/integration/test_full_extraction.py +531 -0
  42. dbslice-0.1.0/tests/integration/test_performance.py +585 -0
  43. dbslice-0.1.0/tests/integration/test_sql_reimport.py +557 -0
  44. dbslice-0.1.0/tests/test_anonymizer.py +419 -0
  45. dbslice-0.1.0/tests/test_config.py +256 -0
  46. dbslice-0.1.0/tests/test_config_file.py +542 -0
  47. dbslice-0.1.0/tests/test_connection.py +149 -0
  48. dbslice-0.1.0/tests/test_csv_output.py +505 -0
  49. dbslice-0.1.0/tests/test_cycles.py +411 -0
  50. dbslice-0.1.0/tests/test_graph.py +187 -0
  51. dbslice-0.1.0/tests/test_json_output.py +557 -0
  52. dbslice-0.1.0/tests/test_passthrough.py +427 -0
  53. dbslice-0.1.0/tests/test_performance.py +342 -0
  54. dbslice-0.1.0/tests/test_security.py +662 -0
  55. dbslice-0.1.0/tests/test_sql_output.py +207 -0
  56. dbslice-0.1.0/tests/test_streaming.py +537 -0
  57. dbslice-0.1.0/tests/test_streaming_integration.py +309 -0
  58. dbslice-0.1.0/tests/test_validation.py +499 -0
  59. dbslice-0.1.0/tests/test_validation_integration.py +352 -0
  60. dbslice-0.1.0/tests/test_validators.py +472 -0
  61. dbslice-0.1.0/tests/test_virtual_fks.py +628 -0
@@ -0,0 +1,228 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+
204
+ # Ruff stuff:
205
+ .ruff_cache/
206
+
207
+ # PyPI configuration file
208
+ .pypirc
209
+
210
+ # Marimo
211
+ marimo/_static/
212
+ marimo/_lsp/
213
+ __marimo__/
214
+
215
+ # Streamlit
216
+ .streamlit/secrets.toml
217
+
218
+ # Database dumps
219
+ *.sql
220
+ !examples/**/*.sql
221
+
222
+ # Environment files
223
+ .env
224
+ .env.*
225
+
226
+ # Internal docs
227
+ executive_summary.md
228
+ dbslice-prd.md
dbslice-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 nabroleonx
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
dbslice-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,235 @@
1
+ Metadata-Version: 2.4
2
+ Name: dbslice
3
+ Version: 0.1.0
4
+ Summary: Extract minimal, referentially-intact database subsets for local development
5
+ Project-URL: Homepage, https://github.com/nabroleonx/dbslice
6
+ Project-URL: Documentation, https://github.com/nabroleonx/dbslice#readme
7
+ Project-URL: Repository, https://github.com/nabroleonx/dbslice
8
+ Project-URL: Issues, https://github.com/nabroleonx/dbslice/issues
9
+ Author: nabroleonx
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: cli,database,extraction,mysql,postgresql,sqlite,subset
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Environment :: Console
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Database
23
+ Classifier: Topic :: Software Development :: Testing
24
+ Classifier: Topic :: Utilities
25
+ Requires-Python: >=3.10
26
+ Requires-Dist: faker>=20.0.0
27
+ Requires-Dist: psycopg2-binary>=2.9.9
28
+ Requires-Dist: pyyaml>=6.0.0
29
+ Requires-Dist: rich>=13.0.0
30
+ Requires-Dist: typer>=0.9.0
31
+ Provides-Extra: all
32
+ Requires-Dist: mkdocs-material>=9.5.0; extra == 'all'
33
+ Requires-Dist: mkdocs>=1.5.0; extra == 'all'
34
+ Requires-Dist: mypy>=1.0.0; extra == 'all'
35
+ Requires-Dist: mysql-connector-python>=8.0.0; extra == 'all'
36
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'all'
37
+ Requires-Dist: pytest>=7.0.0; extra == 'all'
38
+ Requires-Dist: ruff>=0.1.0; extra == 'all'
39
+ Requires-Dist: types-pyyaml>=6.0.0; extra == 'all'
40
+ Provides-Extra: dev
41
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
42
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
43
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
44
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
45
+ Requires-Dist: types-pyyaml>=6.0.0; extra == 'dev'
46
+ Provides-Extra: docs
47
+ Requires-Dist: mkdocs-material>=9.5.0; extra == 'docs'
48
+ Requires-Dist: mkdocs>=1.5.0; extra == 'docs'
49
+ Provides-Extra: mysql
50
+ Requires-Dist: mysql-connector-python>=8.0.0; extra == 'mysql'
51
+ Description-Content-Type: text/markdown
52
+
53
+ # dbslice
54
+
55
+ [![PyPI version](https://img.shields.io/pypi/v/dbslice)](https://pypi.org/project/dbslice/)
56
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
57
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
58
+
59
+ Extract minimal, referentially-intact database subsets for local development and debugging.
60
+
61
+ ## The Problem
62
+
63
+ Copying an entire production database to your machine is infeasible. But reproducing a bug often requires having the exact data that caused it. **dbslice** solves this by extracting only the records you need, following foreign key relationships to ensure referential integrity.
64
+
65
+ ## Quick Start
66
+
67
+ ```bash
68
+ uv add dbslice
69
+
70
+ # Extract an order and all related records
71
+ dbslice extract postgres://localhost/myapp --seed "orders.id=12345" > subset.sql
72
+
73
+ # Import into local database
74
+ psql -d localdb < subset.sql
75
+ ```
76
+
77
+ ## Features
78
+
79
+ - **Zero-config start** -- Introspects schema automatically, no data model file required
80
+ - **Single command** -- Extract complete data subsets with one CLI invocation
81
+ - **Safe by default** -- Auto-detects and anonymizes sensitive fields (emails, phones, SSNs, etc.)
82
+ - **Multiple output formats** -- SQL, JSON, and CSV
83
+ - **Streaming** -- Memory-efficient extraction for large datasets (100K+ rows)
84
+ - **Virtual foreign keys** -- Support for Django GenericForeignKeys and implicit relationships via config
85
+ - **Config files** -- YAML-based configuration for repeatable extractions
86
+ - **Validation** -- Checks referential integrity of extracted data
87
+
88
+ ### Database Support
89
+
90
+ | Database | Status |
91
+ |------------|-----------------------|
92
+ | PostgreSQL | Fully supported |
93
+ | MySQL | Planned (not yet implemented) |
94
+ | SQLite | Planned (not yet implemented) |
95
+
96
+ ## Installation
97
+
98
+ ```bash
99
+ # Install with uv (recommended)
100
+ uv add dbslice
101
+
102
+ # Try without installing
103
+ uvx dbslice --help
104
+
105
+ # Or with pip
106
+ pip install dbslice
107
+ ```
108
+
109
+ ## Usage
110
+
111
+ ### Basic Extraction
112
+
113
+ ```bash
114
+ # Extract by primary key
115
+ dbslice extract postgres://user:pass@host:5432/db --seed "orders.id=12345"
116
+
117
+ # Extract with WHERE clause
118
+ dbslice extract postgres://localhost/db --seed "orders:status='failed' AND created_at > '2024-01-01'"
119
+
120
+ # Multiple seeds
121
+ dbslice extract postgres://localhost/db \
122
+ --seed "orders.id=100" \
123
+ --seed "orders.id=101"
124
+ ```
125
+
126
+ ### Control Traversal
127
+
128
+ ```bash
129
+ # Limit depth (default: 3)
130
+ dbslice extract postgres://... --seed "orders.id=1" --depth 2
131
+
132
+ # Direction: up (parents only), down (children only), both (default)
133
+ dbslice extract postgres://... --seed "orders.id=1" --direction up
134
+ ```
135
+
136
+ ### Anonymization
137
+
138
+ ```bash
139
+ # Auto-anonymize detected sensitive fields
140
+ dbslice extract postgres://... --seed "users.id=1" --anonymize
141
+
142
+ # Redact additional fields
143
+ dbslice extract postgres://... --seed "users.id=1" --anonymize --redact "audit_logs.ip_address"
144
+ ```
145
+
146
+ ### Output Formats
147
+
148
+ ```bash
149
+ # SQL (default)
150
+ dbslice extract postgres://... --seed "orders.id=1" --output sql
151
+
152
+ # JSON fixtures
153
+ dbslice extract postgres://... --seed "orders.id=1" --output json --out-file fixtures/
154
+
155
+ # CSV
156
+ dbslice extract postgres://... --seed "orders.id=1" --output csv --out-file data/
157
+ ```
158
+
159
+ ### Virtual Foreign Keys
160
+
161
+ For relationships not defined in the database schema (Django GenericForeignKeys, implicit relationships):
162
+
163
+ ```yaml
164
+ # dbslice.yaml
165
+ database:
166
+ url: postgres://localhost:5432/myapp
167
+
168
+ virtual_foreign_keys:
169
+ - source_table: notifications
170
+ source_columns: [object_id]
171
+ target_table: orders
172
+ description: "Generic FK to orders via ContentType"
173
+
174
+ - source_table: audit_log
175
+ source_columns: [user_id]
176
+ target_table: users
177
+ description: "Implicit FK without DB constraint"
178
+ ```
179
+
180
+ ```bash
181
+ dbslice extract --config dbslice.yaml --seed "users.id=1"
182
+ ```
183
+
184
+ ### Inspect Schema
185
+
186
+ ```bash
187
+ dbslice inspect postgres://localhost/myapp
188
+ ```
189
+
190
+ ### Configuration File
191
+
192
+ ```bash
193
+ # Generate config from database
194
+ dbslice init postgres://localhost/myapp --out-file dbslice.yaml
195
+
196
+ # Use config
197
+ dbslice extract --config dbslice.yaml --seed "orders.id=12345"
198
+ ```
199
+
200
+ ## How It Works
201
+
202
+ 1. **Introspect** -- Reads database schema to discover tables and foreign key relationships
203
+ 2. **Traverse** -- Starting from seed record(s), follows FK relationships via BFS
204
+ 3. **Extract** -- Fetches all identified records
205
+ 4. **Sort** -- Topologically sorts tables for correct INSERT order
206
+ 5. **Output** -- Generates SQL/JSON/CSV with proper escaping
207
+
208
+ ## Comparison
209
+
210
+ | Feature | dbslice | Jailer | Greenmask | slice-db |
211
+ |---------|---------|--------|-----------|----------|
212
+ | Language | Python | Java | Go | Ruby |
213
+ | Configuration | Zero-config | Requires model file | Config required | Manual YAML |
214
+ | Setup time | Seconds | Hours | Medium | Medium |
215
+ | Anonymization | Built-in (Faker) | Plugin-based | Advanced transformers | Not available |
216
+ | Subsetting | FK traversal | FK traversal | Limited | FK traversal |
217
+ | Output formats | SQL, JSON, CSV | SQL, XML, CSV | SQL | SQL only |
218
+ | Cycle handling | Automatic | Manual config | N/A | Manual |
219
+ | Streaming | Built-in | Configurable | Built-in | Not available |
220
+ | Maintenance | Active | Active | Active | Unmaintained |
221
+
222
+ **dbslice** is the lightweight, zero-config Python option: install and extract in under a minute.
223
+
224
+ ## Development
225
+
226
+ ```bash
227
+ git clone https://github.com/nabroleonx/dbslice.git
228
+ cd dbslice
229
+ uv sync --dev
230
+ uv run pytest
231
+ ```
232
+
233
+ ## License
234
+
235
+ MIT
@@ -0,0 +1,183 @@
1
+ # dbslice
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/dbslice)](https://pypi.org/project/dbslice/)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
5
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
6
+
7
+ Extract minimal, referentially-intact database subsets for local development and debugging.
8
+
9
+ ## The Problem
10
+
11
+ Copying an entire production database to your machine is infeasible. But reproducing a bug often requires having the exact data that caused it. **dbslice** solves this by extracting only the records you need, following foreign key relationships to ensure referential integrity.
12
+
13
+ ## Quick Start
14
+
15
+ ```bash
16
+ uv add dbslice
17
+
18
+ # Extract an order and all related records
19
+ dbslice extract postgres://localhost/myapp --seed "orders.id=12345" > subset.sql
20
+
21
+ # Import into local database
22
+ psql -d localdb < subset.sql
23
+ ```
24
+
25
+ ## Features
26
+
27
+ - **Zero-config start** -- Introspects schema automatically, no data model file required
28
+ - **Single command** -- Extract complete data subsets with one CLI invocation
29
+ - **Safe by default** -- Auto-detects and anonymizes sensitive fields (emails, phones, SSNs, etc.)
30
+ - **Multiple output formats** -- SQL, JSON, and CSV
31
+ - **Streaming** -- Memory-efficient extraction for large datasets (100K+ rows)
32
+ - **Virtual foreign keys** -- Support for Django GenericForeignKeys and implicit relationships via config
33
+ - **Config files** -- YAML-based configuration for repeatable extractions
34
+ - **Validation** -- Checks referential integrity of extracted data
35
+
36
+ ### Database Support
37
+
38
+ | Database | Status |
39
+ |------------|-----------------------|
40
+ | PostgreSQL | Fully supported |
41
+ | MySQL | Planned (not yet implemented) |
42
+ | SQLite | Planned (not yet implemented) |
43
+
44
+ ## Installation
45
+
46
+ ```bash
47
+ # Install with uv (recommended)
48
+ uv add dbslice
49
+
50
+ # Try without installing
51
+ uvx dbslice --help
52
+
53
+ # Or with pip
54
+ pip install dbslice
55
+ ```
56
+
57
+ ## Usage
58
+
59
+ ### Basic Extraction
60
+
61
+ ```bash
62
+ # Extract by primary key
63
+ dbslice extract postgres://user:pass@host:5432/db --seed "orders.id=12345"
64
+
65
+ # Extract with WHERE clause
66
+ dbslice extract postgres://localhost/db --seed "orders:status='failed' AND created_at > '2024-01-01'"
67
+
68
+ # Multiple seeds
69
+ dbslice extract postgres://localhost/db \
70
+ --seed "orders.id=100" \
71
+ --seed "orders.id=101"
72
+ ```
73
+
74
+ ### Control Traversal
75
+
76
+ ```bash
77
+ # Limit depth (default: 3)
78
+ dbslice extract postgres://... --seed "orders.id=1" --depth 2
79
+
80
+ # Direction: up (parents only), down (children only), both (default)
81
+ dbslice extract postgres://... --seed "orders.id=1" --direction up
82
+ ```
83
+
84
+ ### Anonymization
85
+
86
+ ```bash
87
+ # Auto-anonymize detected sensitive fields
88
+ dbslice extract postgres://... --seed "users.id=1" --anonymize
89
+
90
+ # Redact additional fields
91
+ dbslice extract postgres://... --seed "users.id=1" --anonymize --redact "audit_logs.ip_address"
92
+ ```
93
+
94
+ ### Output Formats
95
+
96
+ ```bash
97
+ # SQL (default)
98
+ dbslice extract postgres://... --seed "orders.id=1" --output sql
99
+
100
+ # JSON fixtures
101
+ dbslice extract postgres://... --seed "orders.id=1" --output json --out-file fixtures/
102
+
103
+ # CSV
104
+ dbslice extract postgres://... --seed "orders.id=1" --output csv --out-file data/
105
+ ```
106
+
107
+ ### Virtual Foreign Keys
108
+
109
+ For relationships not defined in the database schema (Django GenericForeignKeys, implicit relationships):
110
+
111
+ ```yaml
112
+ # dbslice.yaml
113
+ database:
114
+ url: postgres://localhost:5432/myapp
115
+
116
+ virtual_foreign_keys:
117
+ - source_table: notifications
118
+ source_columns: [object_id]
119
+ target_table: orders
120
+ description: "Generic FK to orders via ContentType"
121
+
122
+ - source_table: audit_log
123
+ source_columns: [user_id]
124
+ target_table: users
125
+ description: "Implicit FK without DB constraint"
126
+ ```
127
+
128
+ ```bash
129
+ dbslice extract --config dbslice.yaml --seed "users.id=1"
130
+ ```
131
+
132
+ ### Inspect Schema
133
+
134
+ ```bash
135
+ dbslice inspect postgres://localhost/myapp
136
+ ```
137
+
138
+ ### Configuration File
139
+
140
+ ```bash
141
+ # Generate config from database
142
+ dbslice init postgres://localhost/myapp --out-file dbslice.yaml
143
+
144
+ # Use config
145
+ dbslice extract --config dbslice.yaml --seed "orders.id=12345"
146
+ ```
147
+
148
+ ## How It Works
149
+
150
+ 1. **Introspect** -- Reads database schema to discover tables and foreign key relationships
151
+ 2. **Traverse** -- Starting from seed record(s), follows FK relationships via BFS
152
+ 3. **Extract** -- Fetches all identified records
153
+ 4. **Sort** -- Topologically sorts tables for correct INSERT order
154
+ 5. **Output** -- Generates SQL/JSON/CSV with proper escaping
155
+
156
+ ## Comparison
157
+
158
+ | Feature | dbslice | Jailer | Greenmask | slice-db |
159
+ |---------|---------|--------|-----------|----------|
160
+ | Language | Python | Java | Go | Ruby |
161
+ | Configuration | Zero-config | Requires model file | Config required | Manual YAML |
162
+ | Setup time | Seconds | Hours | Medium | Medium |
163
+ | Anonymization | Built-in (Faker) | Plugin-based | Advanced transformers | Not available |
164
+ | Subsetting | FK traversal | FK traversal | Limited | FK traversal |
165
+ | Output formats | SQL, JSON, CSV | SQL, XML, CSV | SQL | SQL only |
166
+ | Cycle handling | Automatic | Manual config | N/A | Manual |
167
+ | Streaming | Built-in | Configurable | Built-in | Not available |
168
+ | Maintenance | Active | Active | Active | Unmaintained |
169
+
170
+ **dbslice** is the lightweight, zero-config Python option: install and extract in under a minute.
171
+
172
+ ## Development
173
+
174
+ ```bash
175
+ git clone https://github.com/nabroleonx/dbslice.git
176
+ cd dbslice
177
+ uv sync --dev
178
+ uv run pytest
179
+ ```
180
+
181
+ ## License
182
+
183
+ MIT