data-dictionary-builder 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data_dictionary_builder-1.0.0/LICENSE +21 -0
  2. data_dictionary_builder-1.0.0/PKG-INFO +173 -0
  3. data_dictionary_builder-1.0.0/README.md +116 -0
  4. data_dictionary_builder-1.0.0/pyproject.toml +79 -0
  5. data_dictionary_builder-1.0.0/setup.cfg +4 -0
  6. data_dictionary_builder-1.0.0/src/data_dictionary_builder/DDHelper.py +578 -0
  7. data_dictionary_builder-1.0.0/src/data_dictionary_builder/__init__.py +25 -0
  8. data_dictionary_builder-1.0.0/src/data_dictionary_builder/cli.py +16 -0
  9. data_dictionary_builder-1.0.0/src/data_dictionary_builder/comparison/__init__.py +9 -0
  10. data_dictionary_builder-1.0.0/src/data_dictionary_builder/comparison/comparator.py +374 -0
  11. data_dictionary_builder-1.0.0/src/data_dictionary_builder/connectors/__init__.py +58 -0
  12. data_dictionary_builder-1.0.0/src/data_dictionary_builder/connectors/base.py +186 -0
  13. data_dictionary_builder-1.0.0/src/data_dictionary_builder/connectors/clickhouse_connector.py +368 -0
  14. data_dictionary_builder-1.0.0/src/data_dictionary_builder/connectors/mysql_connector.py +346 -0
  15. data_dictionary_builder-1.0.0/src/data_dictionary_builder/connectors/postgres_connector.py +511 -0
  16. data_dictionary_builder-1.0.0/src/data_dictionary_builder/connectors/spanner_connector.py +272 -0
  17. data_dictionary_builder-1.0.0/src/data_dictionary_builder/connectors/sqlite_connector.py +236 -0
  18. data_dictionary_builder-1.0.0/src/data_dictionary_builder/metadata/__init__.py +21 -0
  19. data_dictionary_builder-1.0.0/src/data_dictionary_builder/metadata/extractor.py +492 -0
  20. data_dictionary_builder-1.0.0/src/data_dictionary_builder/metadata/models.py +253 -0
  21. data_dictionary_builder-1.0.0/src/data_dictionary_builder/notifications/__init__.py +9 -0
  22. data_dictionary_builder-1.0.0/src/data_dictionary_builder/notifications/email_sender.py +296 -0
  23. data_dictionary_builder-1.0.0/src/data_dictionary_builder/timer.py +177 -0
  24. data_dictionary_builder-1.0.0/src/data_dictionary_builder/yaml_generator/__init__.py +9 -0
  25. data_dictionary_builder-1.0.0/src/data_dictionary_builder/yaml_generator/generator.py +474 -0
  26. data_dictionary_builder-1.0.0/src/data_dictionary_builder.egg-info/PKG-INFO +173 -0
  27. data_dictionary_builder-1.0.0/src/data_dictionary_builder.egg-info/SOURCES.txt +36 -0
  28. data_dictionary_builder-1.0.0/src/data_dictionary_builder.egg-info/dependency_links.txt +1 -0
  29. data_dictionary_builder-1.0.0/src/data_dictionary_builder.egg-info/entry_points.txt +2 -0
  30. data_dictionary_builder-1.0.0/src/data_dictionary_builder.egg-info/requires.txt +38 -0
  31. data_dictionary_builder-1.0.0/src/data_dictionary_builder.egg-info/top_level.txt +1 -0
  32. data_dictionary_builder-1.0.0/tests/test_airflow_dag.py +286 -0
  33. data_dictionary_builder-1.0.0/tests/test_clickhouse.py +418 -0
  34. data_dictionary_builder-1.0.0/tests/test_mysql.py +252 -0
  35. data_dictionary_builder-1.0.0/tests/test_postgres.py +254 -0
  36. data_dictionary_builder-1.0.0/tests/test_server_level.py +239 -0
  37. data_dictionary_builder-1.0.0/tests/test_spanner.py +284 -0
  38. data_dictionary_builder-1.0.0/tests/test_sqlite.py +326 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Isaiah Johnson
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,173 @@
1
+ Metadata-Version: 2.4
2
+ Name: data-dictionary-builder
3
+ Version: 1.0.0
4
+ Summary: Extract database metadata, generate dbt-compatible YAML, compare schemas, and deliver reports — all in a single Python import.
5
+ Author-email: Isaiah Johnson <d8aguy@mail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/GraFreak0/data_dictionary_builder
8
+ Project-URL: Repository, https://github.com/GraFreak0/data_dictionary_builder
9
+ Project-URL: Issues, https://github.com/GraFreak0/data_dictionary_builder/issues
10
+ Keywords: database,metadata,dbt,yaml,data-engineering,schema,documentation
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Topic :: Database
14
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Requires-Python: >=3.8
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: PyYAML>=6.0.1
27
+ Requires-Dist: python-dotenv>=1.0.0
28
+ Requires-Dist: click>=8.0.0
29
+ Requires-Dist: typing-extensions>=4.9.0
30
+ Requires-Dist: python-dateutil>=2.8.2
31
+ Requires-Dist: colorlog>=6.8.0
32
+ Provides-Extra: postgres
33
+ Requires-Dist: psycopg2-binary>=2.9.9; extra == "postgres"
34
+ Provides-Extra: mysql
35
+ Requires-Dist: PyMySQL>=1.1.0; extra == "mysql"
36
+ Provides-Extra: clickhouse
37
+ Requires-Dist: clickhouse-driver>=0.2.6; extra == "clickhouse"
38
+ Provides-Extra: spanner
39
+ Requires-Dist: google-cloud-spanner>=3.40.0; extra == "spanner"
40
+ Provides-Extra: pdf
41
+ Requires-Dist: reportlab>=4.0.0; extra == "pdf"
42
+ Provides-Extra: email
43
+ Requires-Dist: secure-smtplib>=0.1.1; extra == "email"
44
+ Provides-Extra: all
45
+ Requires-Dist: psycopg2-binary>=2.9.9; extra == "all"
46
+ Requires-Dist: PyMySQL>=1.1.0; extra == "all"
47
+ Requires-Dist: clickhouse-driver>=0.2.6; extra == "all"
48
+ Requires-Dist: google-cloud-spanner>=3.40.0; extra == "all"
49
+ Requires-Dist: reportlab>=4.0.0; extra == "all"
50
+ Requires-Dist: secure-smtplib>=0.1.1; extra == "all"
51
+ Provides-Extra: dev
52
+ Requires-Dist: pytest>=7.4.3; extra == "dev"
53
+ Requires-Dist: black>=23.12.0; extra == "dev"
54
+ Requires-Dist: flake8>=6.1.0; extra == "dev"
55
+ Requires-Dist: mypy>=1.7.1; extra == "dev"
56
+ Dynamic: license-file
57
+
58
+ # Database Metadata Generator
59
+
60
+ A Python library for extracting database metadata and generating dbt-compatible YAML files with schema comparison capabilities.
61
+
62
+ ## Features
63
+
64
+ - Connect to multiple database types (SQLite, PostgreSQL, MySQL, ClickHouse, Spanner)
65
+ - Extract complete schema metadata (tables, columns, data types, constraints)
66
+ - Generate dbt-compatible YAML files (one per schema)
67
+ - Compare source and destination database schemas
68
+ - Email reporting for schema differences and missing descriptions
69
+ - Airflow-compatible design for orchestration
70
+
71
+ ## Project Structure
72
+
73
+ ```
74
+ data_dictionary_builder/
75
+ ├── README.md
76
+ ├── requirements.txt
77
+ ├── setup.py
78
+ ├── src/
79
+ │ ├── __init__.py
80
+ │ ├── connectors/
81
+ │ │ ├── __init__.py
82
+ │ │ ├── base.py
83
+ │ │ ├── sqlite_connector.py
84
+ │ │ ├── postgres_connector.py
85
+ │ │ ├── mysql_connector.py
86
+ │ │ ├── clickhouse_connector.py
87
+ │ │ └── spanner_connector.py
88
+ │ ├── metadata/
89
+ │ │ ├── __init__.py
90
+ │ │ ├── extractor.py
91
+ │ │ └── models.py
92
+ │ ├── yaml_generator/
93
+ │ │ ├── __init__.py
94
+ │ │ └── generator.py
95
+ │ ├── comparison/
96
+ │ │ ├── __init__.py
97
+ │ │ └── comparator.py
98
+ │ └── notifications/
99
+ │ ├── __init__.py
100
+ │ └── email_sender.py
101
+ ├── examples/
102
+ │ ├── airflow_dag_example.py
103
+ │ └── standalone_usage.py
104
+ └── tests/
105
+ └── __init__.py
106
+ ```
107
+
108
+ ## Installation
109
+
110
+ ```bash
111
+ pip install -r requirements.txt
112
+ pip install -e .
113
+ ```
114
+
115
+ ## Usage
116
+
117
+ ### Basic Usage
118
+
119
+ ```python
120
+ from data_dictionary_builder import MetadataExtractor, YAMLGenerator
121
+
122
+ # Extract metadata
123
+ extractor = MetadataExtractor(
124
+ db_type='postgres',
125
+ host='localhost',
126
+ port=5432,
127
+ database='mydb',
128
+ user='user',
129
+ password='pass'
130
+ )
131
+
132
+ metadata = extractor.extract_all_schemas()
133
+
134
+ # Generate YAML files
135
+ generator = YAMLGenerator(output_dir='./models')
136
+ generator.generate_yaml_files(metadata)
137
+ ```
138
+
139
+ ### With Schema Comparison
140
+
141
+ ```python
142
+ from data_dictionary_builder import SchemaComparator
143
+
144
+ comparator = SchemaComparator(
145
+ source_config={...},
146
+ destination_config={...},
147
+ yaml_output_dir='./models'
148
+ )
149
+
150
+ report = comparator.compare_and_report(
151
+ email_to='team@example.com',
152
+ smtp_config={...}
153
+ )
154
+ ```
155
+
156
+ ## Airflow Integration
157
+
158
+ See `examples/airflow_dag_example.py` for complete DAG implementation.
159
+
160
+ ## Configuration
161
+
162
+ Database connection configurations should include:
163
+ - `db_type`: sqlite, postgres, mysql, clickhouse, spanner
164
+ - `host`: Database host
165
+ - `port`: Database port
166
+ - `database`: Database name
167
+ - `user`: Username
168
+ - `password`: Password
169
+ - Additional driver-specific parameters
170
+
171
+ ## License
172
+
173
+ MIT License
@@ -0,0 +1,116 @@
1
+ # Database Metadata Generator
2
+
3
+ A Python library for extracting database metadata and generating dbt-compatible YAML files with schema comparison capabilities.
4
+
5
+ ## Features
6
+
7
+ - Connect to multiple database types (SQLite, PostgreSQL, MySQL, ClickHouse, Spanner)
8
+ - Extract complete schema metadata (tables, columns, data types, constraints)
9
+ - Generate dbt-compatible YAML files (one per schema)
10
+ - Compare source and destination database schemas
11
+ - Email reporting for schema differences and missing descriptions
12
+ - Airflow-compatible design for orchestration
13
+
14
+ ## Project Structure
15
+
16
+ ```
17
+ data_dictionary_builder/
18
+ ├── README.md
19
+ ├── requirements.txt
20
+ ├── setup.py
21
+ ├── src/
22
+ │ ├── __init__.py
23
+ │ ├── connectors/
24
+ │ │ ├── __init__.py
25
+ │ │ ├── base.py
26
+ │ │ ├── sqlite_connector.py
27
+ │ │ ├── postgres_connector.py
28
+ │ │ ├── mysql_connector.py
29
+ │ │ ├── clickhouse_connector.py
30
+ │ │ └── spanner_connector.py
31
+ │ ├── metadata/
32
+ │ │ ├── __init__.py
33
+ │ │ ├── extractor.py
34
+ │ │ └── models.py
35
+ │ ├── yaml_generator/
36
+ │ │ ├── __init__.py
37
+ │ │ └── generator.py
38
+ │ ├── comparison/
39
+ │ │ ├── __init__.py
40
+ │ │ └── comparator.py
41
+ │ └── notifications/
42
+ │ ├── __init__.py
43
+ │ └── email_sender.py
44
+ ├── examples/
45
+ │ ├── airflow_dag_example.py
46
+ │ └── standalone_usage.py
47
+ └── tests/
48
+ └── __init__.py
49
+ ```
50
+
51
+ ## Installation
52
+
53
+ ```bash
54
+ pip install -r requirements.txt
55
+ pip install -e .
56
+ ```
57
+
58
+ ## Usage
59
+
60
+ ### Basic Usage
61
+
62
+ ```python
63
+ from data_dictionary_builder import MetadataExtractor, YAMLGenerator
64
+
65
+ # Extract metadata
66
+ extractor = MetadataExtractor(
67
+ db_type='postgres',
68
+ host='localhost',
69
+ port=5432,
70
+ database='mydb',
71
+ user='user',
72
+ password='pass'
73
+ )
74
+
75
+ metadata = extractor.extract_all_schemas()
76
+
77
+ # Generate YAML files
78
+ generator = YAMLGenerator(output_dir='./models')
79
+ generator.generate_yaml_files(metadata)
80
+ ```
81
+
82
+ ### With Schema Comparison
83
+
84
+ ```python
85
+ from data_dictionary_builder import SchemaComparator
86
+
87
+ comparator = SchemaComparator(
88
+ source_config={...},
89
+ destination_config={...},
90
+ yaml_output_dir='./models'
91
+ )
92
+
93
+ report = comparator.compare_and_report(
94
+ email_to='team@example.com',
95
+ smtp_config={...}
96
+ )
97
+ ```
98
+
99
+ ## Airflow Integration
100
+
101
+ See `examples/airflow_dag_example.py` for complete DAG implementation.
102
+
103
+ ## Configuration
104
+
105
+ Database connection configurations should include:
106
+ - `db_type`: sqlite, postgres, mysql, clickhouse, spanner
107
+ - `host`: Database host
108
+ - `port`: Database port
109
+ - `database`: Database name
110
+ - `user`: Username
111
+ - `password`: Password
112
+ - Additional driver-specific parameters
113
+
114
+ ## License
115
+
116
+ MIT License
@@ -0,0 +1,79 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "data-dictionary-builder"
7
+ version = "1.0.0"
8
+ description = "Extract database metadata, generate dbt-compatible YAML, compare schemas, and deliver reports — all in a single Python import."
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Isaiah Johnson", email = "d8aguy@mail.com"}
14
+ ]
15
+ keywords = ["database", "metadata", "dbt", "yaml", "data-engineering", "schema", "documentation"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "Topic :: Database",
20
+ "Topic :: Software Development :: Libraries :: Python Modules",
21
+ "License :: OSI Approved :: MIT License",
22
+ "Programming Language :: Python :: 3",
23
+ "Programming Language :: Python :: 3.8",
24
+ "Programming Language :: Python :: 3.9",
25
+ "Programming Language :: Python :: 3.10",
26
+ "Programming Language :: Python :: 3.11",
27
+ "Programming Language :: Python :: 3.12",
28
+ "Programming Language :: Python :: 3.13",
29
+ ]
30
+
31
+ # Core dependencies — always installed
32
+ dependencies = [
33
+ "PyYAML>=6.0.1",
34
+ "python-dotenv>=1.0.0",
35
+ "click>=8.0.0",
36
+ "typing-extensions>=4.9.0",
37
+ "python-dateutil>=2.8.2",
38
+ "colorlog>=6.8.0",
39
+ ]
40
+
41
+ [project.optional-dependencies]
42
+ # Database connectors — install only what you need
43
+ postgres = ["psycopg2-binary>=2.9.9"]
44
+ mysql = ["PyMySQL>=1.1.0"]
45
+ clickhouse = ["clickhouse-driver>=0.2.6"]
46
+ spanner = ["google-cloud-spanner>=3.40.0"]
47
+
48
+ # Reporting
49
+ pdf = ["reportlab>=4.0.0"]
50
+ email = ["secure-smtplib>=0.1.1"]
51
+
52
+ # Install everything
53
+ all = [
54
+ "psycopg2-binary>=2.9.9",
55
+ "PyMySQL>=1.1.0",
56
+ "clickhouse-driver>=0.2.6",
57
+ "google-cloud-spanner>=3.40.0",
58
+ "reportlab>=4.0.0",
59
+ "secure-smtplib>=0.1.1",
60
+ ]
61
+
62
+ # Development tools
63
+ dev = [
64
+ "pytest>=7.4.3",
65
+ "black>=23.12.0",
66
+ "flake8>=6.1.0",
67
+ "mypy>=1.7.1",
68
+ ]
69
+
70
+ [project.urls]
71
+ Homepage = "https://github.com/GraFreak0/data_dictionary_builder"
72
+ Repository = "https://github.com/GraFreak0/data_dictionary_builder"
73
+ Issues = "https://github.com/GraFreak0/data_dictionary_builder/issues"
74
+
75
+ [project.scripts]
76
+ db-metadata-gen = "data_dictionary_builder.cli:main"
77
+
78
+ [tool.setuptools.packages.find]
79
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+