db-toolkit 0.0.2__tar.gz → 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- db_toolkit-0.0.4/PKG-INFO +112 -0
- db_toolkit-0.0.4/README.md +74 -0
- db_toolkit-0.0.4/db_toolkit/__init__.py +1 -0
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/db_toolkit/_modidx.py +7 -5
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/db_toolkit/db_extract.py +75 -47
- db_toolkit-0.0.4/db_toolkit.egg-info/PKG-INFO +112 -0
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/settings.ini +15 -23
- db_toolkit-0.0.2/PKG-INFO +0 -167
- db_toolkit-0.0.2/README.md +0 -129
- db_toolkit-0.0.2/db_toolkit/__init__.py +0 -1
- db_toolkit-0.0.2/db_toolkit.egg-info/PKG-INFO +0 -167
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/LICENSE +0 -0
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/MANIFEST.in +0 -0
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/db_toolkit/core.py +0 -0
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/db_toolkit/db_connect.py +0 -0
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/db_toolkit.egg-info/SOURCES.txt +0 -0
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/db_toolkit.egg-info/dependency_links.txt +0 -0
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/db_toolkit.egg-info/entry_points.txt +0 -0
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/db_toolkit.egg-info/not-zip-safe +0 -0
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/db_toolkit.egg-info/requires.txt +0 -0
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/db_toolkit.egg-info/top_level.txt +0 -0
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/pyproject.toml +0 -0
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/setup.cfg +0 -0
- {db_toolkit-0.0.2 → db_toolkit-0.0.4}/setup.py +0 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: db-toolkit
|
|
3
|
+
Version: 0.0.4
|
|
4
|
+
Summary: utility tools for db access and data extraction
|
|
5
|
+
Home-page: https://github.com/Analytics/db-toolkit
|
|
6
|
+
Author: frangs
|
|
7
|
+
Author-email: giordanofrancisco@duck.com
|
|
8
|
+
License: Apache Software License 2.0
|
|
9
|
+
Keywords: nbdev jupyter notebook python
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Natural Language :: English
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: fastcore
|
|
22
|
+
Requires-Dist: pandas
|
|
23
|
+
Requires-Dist: sqlalchemy
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Dynamic: author
|
|
26
|
+
Dynamic: author-email
|
|
27
|
+
Dynamic: classifier
|
|
28
|
+
Dynamic: description
|
|
29
|
+
Dynamic: description-content-type
|
|
30
|
+
Dynamic: home-page
|
|
31
|
+
Dynamic: keywords
|
|
32
|
+
Dynamic: license
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
Dynamic: provides-extra
|
|
35
|
+
Dynamic: requires-dist
|
|
36
|
+
Dynamic: requires-python
|
|
37
|
+
Dynamic: summary
|
|
38
|
+
|
|
39
|
+
# db-toolkit
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
43
|
+
|
|
44
|
+
``` python
|
|
45
|
+
from db_toolkit.db_extract import *
|
|
46
|
+
from db_toolkit.db_connect import *
|
|
47
|
+
from pathlib import Path
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Developer Guide
|
|
51
|
+
|
|
52
|
+
### Install db_toolkit in Development mode
|
|
53
|
+
|
|
54
|
+
``` sh
|
|
55
|
+
# make sure db_toolkit package is installed in development mode
|
|
56
|
+
$ pip install -e .
|
|
57
|
+
|
|
58
|
+
# make changes under nbs/ directory
|
|
59
|
+
# ...
|
|
60
|
+
|
|
61
|
+
# compile to have changes apply to db_toolkit
|
|
62
|
+
$ nbdev_prepare
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Usage
|
|
66
|
+
|
|
67
|
+
### Installation
|
|
68
|
+
|
|
69
|
+
To use this package in another project, install as below
|
|
70
|
+
|
|
71
|
+
Install latest from the AzureDevOps \[repository\]\[repo\]:
|
|
72
|
+
|
|
73
|
+
``` sh
|
|
74
|
+
$ pip install db-toolkit
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## How to use
|
|
78
|
+
|
|
79
|
+
### 1. Set up your config file
|
|
80
|
+
|
|
81
|
+
Create an `.ini` file with your database connection details:
|
|
82
|
+
|
|
83
|
+
``` ini
|
|
84
|
+
[CONN_ORC]
|
|
85
|
+
user:TEST_USER
|
|
86
|
+
pass:<your_password>
|
|
87
|
+
dsn:<...>
|
|
88
|
+
port:<...>
|
|
89
|
+
dbname:<...>
|
|
90
|
+
|
|
91
|
+
[CONN_MS]
|
|
92
|
+
server:<...>
|
|
93
|
+
dbname:<...>
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### 2. Create your SQL file
|
|
97
|
+
|
|
98
|
+
Save your query in `data/sql/` with the naming convention
|
|
99
|
+
`<CONNECTION>__<tablename>.sql`:
|
|
100
|
+
|
|
101
|
+
data/sql/ODS__address_ctax.sql
|
|
102
|
+
|
|
103
|
+
### 3. Pull the data
|
|
104
|
+
|
|
105
|
+
``` python
|
|
106
|
+
from db_toolkit.db_extract import DBExtract
|
|
107
|
+
|
|
108
|
+
dbe = DBExtract(cfg_fpath='path/to/your/config.ini')
|
|
109
|
+
dbe.pull_table('BIODS__address_ctax')
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
The resulting CSV will be saved to `data/landing/`.
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# db-toolkit
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
5
|
+
|
|
6
|
+
``` python
|
|
7
|
+
from db_toolkit.db_extract import *
|
|
8
|
+
from db_toolkit.db_connect import *
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Developer Guide
|
|
13
|
+
|
|
14
|
+
### Install db_toolkit in Development mode
|
|
15
|
+
|
|
16
|
+
``` sh
|
|
17
|
+
# make sure db_toolkit package is installed in development mode
|
|
18
|
+
$ pip install -e .
|
|
19
|
+
|
|
20
|
+
# make changes under nbs/ directory
|
|
21
|
+
# ...
|
|
22
|
+
|
|
23
|
+
# compile to have changes apply to db_toolkit
|
|
24
|
+
$ nbdev_prepare
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
### Installation
|
|
30
|
+
|
|
31
|
+
To use this package in another project, install as below
|
|
32
|
+
|
|
33
|
+
Install latest from the AzureDevOps \[repository\]\[repo\]:
|
|
34
|
+
|
|
35
|
+
``` sh
|
|
36
|
+
$ pip install db-toolkit
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## How to use
|
|
40
|
+
|
|
41
|
+
### 1. Set up your config file
|
|
42
|
+
|
|
43
|
+
Create an `.ini` file with your database connection details:
|
|
44
|
+
|
|
45
|
+
``` ini
|
|
46
|
+
[CONN_ORC]
|
|
47
|
+
user:TEST_USER
|
|
48
|
+
pass:<your_password>
|
|
49
|
+
dsn:<...>
|
|
50
|
+
port:<...>
|
|
51
|
+
dbname:<...>
|
|
52
|
+
|
|
53
|
+
[CONN_MS]
|
|
54
|
+
server:<...>
|
|
55
|
+
dbname:<...>
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### 2. Create your SQL file
|
|
59
|
+
|
|
60
|
+
Save your query in `data/sql/` with the naming convention
|
|
61
|
+
`<CONNECTION>__<tablename>.sql`:
|
|
62
|
+
|
|
63
|
+
data/sql/ODS__address_ctax.sql
|
|
64
|
+
|
|
65
|
+
### 3. Pull the data
|
|
66
|
+
|
|
67
|
+
``` python
|
|
68
|
+
from db_toolkit.db_extract import DBExtract
|
|
69
|
+
|
|
70
|
+
dbe = DBExtract(cfg_fpath='path/to/your/config.ini')
|
|
71
|
+
dbe.pull_table('BIODS__address_ctax')
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
The resulting CSV will be saved to `data/landing/`.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.4"
|
|
@@ -34,12 +34,14 @@ d = { 'settings': { 'branch': 'main',
|
|
|
34
34
|
'db_toolkit/db_extract.py'),
|
|
35
35
|
'db_toolkit.db_extract.DBExtract._get_sql': ( 'db_extract.html#dbextract._get_sql',
|
|
36
36
|
'db_toolkit/db_extract.py'),
|
|
37
|
-
'db_toolkit.db_extract.DBExtract.
|
|
38
|
-
|
|
39
|
-
'db_toolkit.db_extract.DBExtract.
|
|
37
|
+
'db_toolkit.db_extract.DBExtract.pull_table': ( 'db_extract.html#dbextract.pull_table',
|
|
38
|
+
'db_toolkit/db_extract.py'),
|
|
39
|
+
'db_toolkit.db_extract.DBExtract.pull_tables': ( 'db_extract.html#dbextract.pull_tables',
|
|
40
|
+
'db_toolkit/db_extract.py'),
|
|
41
|
+
'db_toolkit.db_extract.DBExtract.pull_with_manifest': ( 'db_extract.html#dbextract.pull_with_manifest',
|
|
40
42
|
'db_toolkit/db_extract.py'),
|
|
41
|
-
'db_toolkit.db_extract.DBExtract.
|
|
42
|
-
|
|
43
|
+
'db_toolkit.db_extract.DBExtract.sample': ( 'db_extract.html#dbextract.sample',
|
|
44
|
+
'db_toolkit/db_extract.py'),
|
|
43
45
|
'db_toolkit.db_extract.PullManifest': ('db_extract.html#pullmanifest', 'db_toolkit/db_extract.py'),
|
|
44
46
|
'db_toolkit.db_extract.PullManifest.__init__': ( 'db_extract.html#pullmanifest.__init__',
|
|
45
47
|
'db_toolkit/db_extract.py'),
|
|
@@ -6,27 +6,65 @@ from pathlib import Path
|
|
|
6
6
|
import json, time
|
|
7
7
|
from datetime import datetime, timedelta
|
|
8
8
|
from fastcore.basics import patch
|
|
9
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
9
10
|
import pandas as pd
|
|
10
11
|
from .db_connect import *
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
# %% auto #0
|
|
14
|
-
__all__ = ['
|
|
15
|
+
__all__ = ['PullManifest', 'DBExtract']
|
|
16
|
+
|
|
17
|
+
# %% ../nbs/02_db_extract.ipynb #bf53f340
|
|
18
|
+
class PullManifest:
|
|
19
|
+
def __init__(self, manifest_path, pull_interval_days=14):
|
|
20
|
+
self.path = Path(manifest_path)
|
|
21
|
+
self.pull_interval_days = pull_interval_days
|
|
22
|
+
self._load()
|
|
23
|
+
|
|
24
|
+
def _load(self):
|
|
25
|
+
"""Load manifest from disk, or create empty one"""
|
|
26
|
+
if self.path.exists(): self.data = json.loads(self.path.read_text())
|
|
27
|
+
else: self.data = {"pulls": {}}
|
|
28
|
+
|
|
29
|
+
def _save(self):
|
|
30
|
+
"""Persist manifest to disk"""
|
|
31
|
+
self.path.write_text(json.dumps(self.data, indent=2))
|
|
32
|
+
|
|
33
|
+
def should_pull(self, table_name):
|
|
34
|
+
"""Check if table needs refresh based on TTL"""
|
|
35
|
+
if table_name not in self.data["pulls"]: return True
|
|
36
|
+
last = datetime.fromisoformat(self.data["pulls"][table_name]["last_pull"])
|
|
37
|
+
return datetime.now() - last > timedelta(days=self.pull_interval_days)
|
|
38
|
+
|
|
39
|
+
def record_pull(self, table_name, row_count, status="success"):
|
|
40
|
+
"""Record a completed pull"""
|
|
41
|
+
self.data["pulls"][table_name] = {
|
|
42
|
+
"last_pull": datetime.now().isoformat(),
|
|
43
|
+
"row_count": row_count,
|
|
44
|
+
"status": status
|
|
45
|
+
|
|
46
|
+
}
|
|
47
|
+
self._save()
|
|
48
|
+
|
|
15
49
|
|
|
16
50
|
# %% ../nbs/02_db_extract.ipynb #4ebf670b-04ae-4d44-9eac-aa4caf54be99
|
|
17
51
|
class DBExtract:
|
|
18
|
-
def __init__(self, cfg_fpath):
|
|
52
|
+
def __init__(self, cfg_fpath, manifest=None):
|
|
19
53
|
"""
|
|
20
54
|
Constructor for the DBExtract class.
|
|
21
55
|
Reads parameters from the specified configuration file
|
|
22
|
-
Pairs SQL
|
|
56
|
+
Pairs SQL script to connection engine
|
|
23
57
|
Pulls and saves data to landing folder
|
|
58
|
+
|
|
59
|
+
SQL files must follow naming convention: <DB_NAME>__<TABLE_NAME>.sql
|
|
60
|
+
where DB_NAME matches a key in the config connections.
|
|
24
61
|
"""
|
|
25
62
|
DBConfig.file_exists(cfg_fpath)
|
|
26
63
|
self.data_dir = Path("../data")
|
|
27
64
|
self.sql_dir = self.data_dir / 'sql'
|
|
28
65
|
self.cfg = DBConfig(cfg_fpath)
|
|
29
66
|
self.cfg.set_conn()
|
|
67
|
+
self.manifest = manifest
|
|
30
68
|
|
|
31
69
|
def _get_pairs(self):
|
|
32
70
|
"""Using SQL files name convention <CONN>__<DF NAME>, create list collection type (df name, connection engine, sql script)"""
|
|
@@ -54,53 +92,43 @@ class DBExtract:
|
|
|
54
92
|
retries += 1
|
|
55
93
|
wait_time = 2**retries
|
|
56
94
|
time.sleep(wait_time)
|
|
57
|
-
return self._get_dataframe(
|
|
95
|
+
return self._get_dataframe(conn, sql, chunksize, retries=retries)
|
|
58
96
|
return pd.concat([df.dropna(how='all', axis=1) for df in dataframes], ignore_index=True)
|
|
59
|
-
|
|
60
|
-
def _process_all_sql(self, chunksize=10_000):
|
|
61
|
-
self._get_pairs()
|
|
62
|
-
self.dataframes = {}
|
|
63
|
-
for name, conn, sql in self.pairs: self.dataframes[name] = self._get_dataframe(conn, sql, chunksize=chunksize)
|
|
64
|
-
|
|
65
|
-
def _save_data_landing(self):
|
|
66
|
-
for key, df in self.dataframes.items(): df.to_csv(self.data_dir/f"landing/{key}.csv", index=False)
|
|
67
|
-
|
|
68
|
-
def process_save_landing(self, chunksize=10_000):
|
|
69
|
-
"""pull and saves all db dataframes as csv to data/landing"""
|
|
70
|
-
self._process_all_sql(chunksize=chunksize)
|
|
71
|
-
self._save_data_landing()
|
|
72
97
|
|
|
73
98
|
|
|
74
99
|
|
|
75
|
-
# %% ../nbs/02_db_extract.ipynb #
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def _load(self):
|
|
83
|
-
"""Load manifest from disk, or create empty one"""
|
|
84
|
-
if self.path.exists(): self.data = json.loads(self.path.read_text())
|
|
85
|
-
else: self.data = {"pulls": {}}
|
|
86
|
-
|
|
87
|
-
def _save(self):
|
|
88
|
-
"""Persist manifest to disk"""
|
|
89
|
-
self.path.write_text(json.dumps(self.data, indent=2))
|
|
90
|
-
|
|
91
|
-
def should_pull(self, table_name):
|
|
92
|
-
"""Check if table needs refresh based on TTL"""
|
|
93
|
-
if table_name not in self.data["pulls"]: return True
|
|
94
|
-
last = datetime.fromisoformat(self.data["pulls"][table_name]["last_pull"])
|
|
95
|
-
return datetime.now() - last > timedelta(days=self.pull_interval_days)
|
|
96
|
-
|
|
97
|
-
def record_pull(self, table_name, row_count, status="success"):
|
|
98
|
-
"""Record a completed pull"""
|
|
99
|
-
self.data["pulls"][table_name] = {
|
|
100
|
-
"last_pull": datetime.now().isoformat(),
|
|
101
|
-
"row_count": row_count,
|
|
102
|
-
"status": status
|
|
100
|
+
# %% ../nbs/02_db_extract.ipynb #bc75d61c-94cb-4322-a1bf-d3be6d73110f
|
|
101
|
+
@patch
|
|
102
|
+
def sample(self: DBExtract, name, n=100):
|
|
103
|
+
"""Pull first n rows from a table for testing"""
|
|
104
|
+
self._get_pairs()
|
|
105
|
+
_, conn, sql = next((p for p in self.pairs if p[0] == name))
|
|
106
|
+
return next(self._get_data_stream(conn, sql, chunksize=n))
|
|
103
107
|
|
|
104
|
-
|
|
105
|
-
|
|
108
|
+
# %% ../nbs/02_db_extract.ipynb #e2934306
|
|
109
|
+
@patch
|
|
110
|
+
def pull_table(self:DBExtract, name, chunksize=10_000):
|
|
111
|
+
"""Pull full table from a given name save it to landing"""
|
|
112
|
+
self._get_pairs()
|
|
113
|
+
_, conn, sql = next((p for p in self.pairs if p[0] == name))
|
|
114
|
+
df = self._get_dataframe(conn, sql, chunksize=chunksize)
|
|
115
|
+
df.to_csv(self.data_dir/f"landing/{name}.csv", index=False)
|
|
116
|
+
if self.manifest: self.manifest.record_pull(name, len(df))
|
|
117
|
+
return df
|
|
118
|
+
|
|
119
|
+
# %% ../nbs/02_db_extract.ipynb #44b8f7be
|
|
120
|
+
@patch
|
|
121
|
+
def pull_tables(self:DBExtract, names, chunksize=10_000, max_workers=4):
|
|
122
|
+
"""Pull multiple tables in parallel"""
|
|
123
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
124
|
+
results = list(executor.map(lambda name: self.pull_table(name, chunksize), names))
|
|
125
|
+
return results
|
|
106
126
|
|
|
127
|
+
# %% ../nbs/02_db_extract.ipynb #112fff76
|
|
128
|
+
@patch
|
|
129
|
+
def pull_with_manifest(self:DBExtract, chunksize=10_000, max_workers=4):
|
|
130
|
+
"""Pull full table from a given collection of table names"""
|
|
131
|
+
if self.manifest is None: return # No manifest, skip pulling
|
|
132
|
+
self._get_pairs()
|
|
133
|
+
tables_to_pull = [name for name, _, _ in self.pairs if self.manifest.should_pull(name)]
|
|
134
|
+
if tables_to_pull: self.pull_tables(tables_to_pull, chunksize, max_workers)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: db-toolkit
|
|
3
|
+
Version: 0.0.4
|
|
4
|
+
Summary: utility tools for db access and data extraction
|
|
5
|
+
Home-page: https://github.com/Analytics/db-toolkit
|
|
6
|
+
Author: frangs
|
|
7
|
+
Author-email: giordanofrancisco@duck.com
|
|
8
|
+
License: Apache Software License 2.0
|
|
9
|
+
Keywords: nbdev jupyter notebook python
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Natural Language :: English
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: fastcore
|
|
22
|
+
Requires-Dist: pandas
|
|
23
|
+
Requires-Dist: sqlalchemy
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Dynamic: author
|
|
26
|
+
Dynamic: author-email
|
|
27
|
+
Dynamic: classifier
|
|
28
|
+
Dynamic: description
|
|
29
|
+
Dynamic: description-content-type
|
|
30
|
+
Dynamic: home-page
|
|
31
|
+
Dynamic: keywords
|
|
32
|
+
Dynamic: license
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
Dynamic: provides-extra
|
|
35
|
+
Dynamic: requires-dist
|
|
36
|
+
Dynamic: requires-python
|
|
37
|
+
Dynamic: summary
|
|
38
|
+
|
|
39
|
+
# db-toolkit
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
43
|
+
|
|
44
|
+
``` python
|
|
45
|
+
from db_toolkit.db_extract import *
|
|
46
|
+
from db_toolkit.db_connect import *
|
|
47
|
+
from pathlib import Path
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Developer Guide
|
|
51
|
+
|
|
52
|
+
### Install db_toolkit in Development mode
|
|
53
|
+
|
|
54
|
+
``` sh
|
|
55
|
+
# make sure db_toolkit package is installed in development mode
|
|
56
|
+
$ pip install -e .
|
|
57
|
+
|
|
58
|
+
# make changes under nbs/ directory
|
|
59
|
+
# ...
|
|
60
|
+
|
|
61
|
+
# compile to have changes apply to db_toolkit
|
|
62
|
+
$ nbdev_prepare
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Usage
|
|
66
|
+
|
|
67
|
+
### Installation
|
|
68
|
+
|
|
69
|
+
To use this package in another project, install as below
|
|
70
|
+
|
|
71
|
+
Install latest from the AzureDevOps \[repository\]\[repo\]:
|
|
72
|
+
|
|
73
|
+
``` sh
|
|
74
|
+
$ pip install db-toolkit
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## How to use
|
|
78
|
+
|
|
79
|
+
### 1. Set up your config file
|
|
80
|
+
|
|
81
|
+
Create an `.ini` file with your database connection details:
|
|
82
|
+
|
|
83
|
+
``` ini
|
|
84
|
+
[CONN_ORC]
|
|
85
|
+
user:TEST_USER
|
|
86
|
+
pass:<your_password>
|
|
87
|
+
dsn:<...>
|
|
88
|
+
port:<...>
|
|
89
|
+
dbname:<...>
|
|
90
|
+
|
|
91
|
+
[CONN_MS]
|
|
92
|
+
server:<...>
|
|
93
|
+
dbname:<...>
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### 2. Create your SQL file
|
|
97
|
+
|
|
98
|
+
Save your query in `data/sql/` with the naming convention
|
|
99
|
+
`<CONNECTION>__<tablename>.sql`:
|
|
100
|
+
|
|
101
|
+
data/sql/ODS__address_ctax.sql
|
|
102
|
+
|
|
103
|
+
### 3. Pull the data
|
|
104
|
+
|
|
105
|
+
``` python
|
|
106
|
+
from db_toolkit.db_extract import DBExtract
|
|
107
|
+
|
|
108
|
+
dbe = DBExtract(cfg_fpath='path/to/your/config.ini')
|
|
109
|
+
dbe.pull_table('BIODS__address_ctax')
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
The resulting CSV will be saved to `data/landing/`.
|
|
@@ -1,16 +1,10 @@
|
|
|
1
1
|
[DEFAULT]
|
|
2
|
-
# All sections below are required unless otherwise specified.
|
|
3
|
-
# See https://github.com/AnswerDotAI/nbdev/blob/main/settings.ini for examples.
|
|
4
|
-
|
|
5
|
-
### Python library ###
|
|
6
2
|
repo = db-toolkit
|
|
7
|
-
lib_name =
|
|
8
|
-
version = 0.0.
|
|
3
|
+
lib_name = db-toolkit
|
|
4
|
+
version = 0.0.4
|
|
9
5
|
min_python = 3.9
|
|
10
6
|
license = apache2
|
|
11
7
|
black_formatting = False
|
|
12
|
-
|
|
13
|
-
### nbdev ###
|
|
14
8
|
doc_path = _docs
|
|
15
9
|
lib_path = db_toolkit
|
|
16
10
|
nbs_path = nbs
|
|
@@ -18,29 +12,27 @@ recursive = True
|
|
|
18
12
|
tst_flags = notest
|
|
19
13
|
put_version_in_init = True
|
|
20
14
|
update_pyproject = True
|
|
21
|
-
|
|
22
|
-
### Docs ###
|
|
23
15
|
branch = main
|
|
24
16
|
custom_sidebar = False
|
|
25
|
-
doc_host = https
|
|
26
|
-
doc_baseurl =
|
|
27
|
-
git_url = https://github.com
|
|
28
|
-
title =
|
|
29
|
-
|
|
30
|
-
### PyPI ###
|
|
17
|
+
doc_host = https://Analytics.github.io
|
|
18
|
+
doc_baseurl = /db-toolkit
|
|
19
|
+
git_url = https://github.com/Analytics/db-toolkit
|
|
20
|
+
title = db-toolkit
|
|
31
21
|
audience = Developers
|
|
32
22
|
author = frangs
|
|
33
23
|
author_email = giordanofrancisco@duck.com
|
|
34
|
-
copyright = 2025 onwards,
|
|
24
|
+
copyright = 2025 onwards, frangs
|
|
35
25
|
description = utility tools for db access and data extraction
|
|
36
26
|
keywords = nbdev jupyter notebook python
|
|
37
27
|
language = English
|
|
38
28
|
status = 3
|
|
39
29
|
user = Analytics
|
|
40
|
-
|
|
41
|
-
### Optional ###
|
|
42
30
|
requirements = fastcore pandas sqlalchemy
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
31
|
+
readme_nb = index.ipynb
|
|
32
|
+
allowed_metadata_keys =
|
|
33
|
+
allowed_cell_metadata_keys =
|
|
34
|
+
jupyter_hooks = False
|
|
35
|
+
clean_ids = True
|
|
36
|
+
clear_all = False
|
|
37
|
+
skip_procs =
|
|
38
|
+
|
db_toolkit-0.0.2/PKG-INFO
DELETED
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: db-toolkit
|
|
3
|
-
Version: 0.0.2
|
|
4
|
-
Summary: utility tools for db access and data extraction
|
|
5
|
-
Home-page: https://github.com/Analytics/db-toolkit
|
|
6
|
-
Author: frangs
|
|
7
|
-
Author-email: giordanofrancisco@duck.com
|
|
8
|
-
License: Apache Software License 2.0
|
|
9
|
-
Keywords: nbdev jupyter notebook python
|
|
10
|
-
Classifier: Development Status :: 4 - Beta
|
|
11
|
-
Classifier: Intended Audience :: Developers
|
|
12
|
-
Classifier: Natural Language :: English
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
18
|
-
Requires-Python: >=3.9
|
|
19
|
-
Description-Content-Type: text/markdown
|
|
20
|
-
License-File: LICENSE
|
|
21
|
-
Requires-Dist: fastcore
|
|
22
|
-
Requires-Dist: pandas
|
|
23
|
-
Requires-Dist: sqlalchemy
|
|
24
|
-
Provides-Extra: dev
|
|
25
|
-
Dynamic: author
|
|
26
|
-
Dynamic: author-email
|
|
27
|
-
Dynamic: classifier
|
|
28
|
-
Dynamic: description
|
|
29
|
-
Dynamic: description-content-type
|
|
30
|
-
Dynamic: home-page
|
|
31
|
-
Dynamic: keywords
|
|
32
|
-
Dynamic: license
|
|
33
|
-
Dynamic: license-file
|
|
34
|
-
Dynamic: provides-extra
|
|
35
|
-
Dynamic: requires-dist
|
|
36
|
-
Dynamic: requires-python
|
|
37
|
-
Dynamic: summary
|
|
38
|
-
|
|
39
|
-
# db-toolkit
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
43
|
-
|
|
44
|
-
``` python
|
|
45
|
-
from db_toolkit.db_extract import *
|
|
46
|
-
from db_toolkit.db_connect import *
|
|
47
|
-
from pathlib import Path
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
This file will become your README and also the index of your
|
|
51
|
-
documentation.
|
|
52
|
-
|
|
53
|
-
## Developer Guide
|
|
54
|
-
|
|
55
|
-
If you are new to using `nbdev` here are some useful pointers to get you
|
|
56
|
-
started.
|
|
57
|
-
|
|
58
|
-
### Install db_toolkit in Development mode
|
|
59
|
-
|
|
60
|
-
``` sh
|
|
61
|
-
# make sure db_toolkit package is installed in development mode
|
|
62
|
-
$ pip install -e .
|
|
63
|
-
|
|
64
|
-
# make changes under nbs/ directory
|
|
65
|
-
# ...
|
|
66
|
-
|
|
67
|
-
# compile to have changes apply to db_toolkit
|
|
68
|
-
$ nbdev_prepare
|
|
69
|
-
```
|
|
70
|
-
|
|
71
|
-
## Usage
|
|
72
|
-
|
|
73
|
-
### Installation
|
|
74
|
-
|
|
75
|
-
To use this package in another project, install as below
|
|
76
|
-
|
|
77
|
-
Install latest from the AzureDevOps
|
|
78
|
-
[repository](https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit):
|
|
79
|
-
|
|
80
|
-
``` sh
|
|
81
|
-
$ pip install git+https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit
|
|
82
|
-
```
|
|
83
|
-
|
|
84
|
-
### Dependencies
|
|
85
|
-
|
|
86
|
-
The YAML file for this package dependencies can be found in the `envs\`
|
|
87
|
-
DIR. If using toolkit on another project, it is advisable to define
|
|
88
|
-
`db-toolkit` as the below format YAML.
|
|
89
|
-
|
|
90
|
-
``` yaml
|
|
91
|
-
name: er
|
|
92
|
-
channels:
|
|
93
|
-
- conda-forge
|
|
94
|
-
dependencies:
|
|
95
|
-
- jupyterlab
|
|
96
|
-
- pandas
|
|
97
|
-
- oracledb
|
|
98
|
-
- pyodbc
|
|
99
|
-
- pip
|
|
100
|
-
- pip:
|
|
101
|
-
- nbdev
|
|
102
|
-
- git+https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit
|
|
103
|
-
- sqlalchemy
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
### Documentation
|
|
107
|
-
|
|
108
|
-
TODO potentially when github is available.
|
|
109
|
-
|
|
110
|
-
## How to use
|
|
111
|
-
|
|
112
|
-
## Set ini file location path
|
|
113
|
-
|
|
114
|
-
- TODO init file struct details show example
|
|
115
|
-
|
|
116
|
-
``` python
|
|
117
|
-
fpath2 = '../../../data/db_connections/cfg_address.ini'
|
|
118
|
-
dbe = DBExtract(cfg_fpath=fpath2)
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
## SQL files
|
|
122
|
-
|
|
123
|
-
- Saved at ../data/sql
|
|
124
|
-
- TODO add details on path \<dbname\>\_\_\<tablename\>
|
|
125
|
-
|
|
126
|
-
------------------------------------------------------------------------
|
|
127
|
-
|
|
128
|
-
<a
|
|
129
|
-
href="https://github.com/Analytics/db-toolkit/blob/main/db_toolkit/db_connect.py#LNone"
|
|
130
|
-
target="_blank" style="float:right; font-size:smaller">source</a>
|
|
131
|
-
|
|
132
|
-
### DBConfig
|
|
133
|
-
|
|
134
|
-
> DBConfig (file_path)
|
|
135
|
-
|
|
136
|
-
*Constructor for the DBConfig class. Reads parameters from the specified
|
|
137
|
-
configuration file and presents them appropriately to the application.*
|
|
138
|
-
|
|
139
|
-
------------------------------------------------------------------------
|
|
140
|
-
|
|
141
|
-
<a
|
|
142
|
-
href="https://github.com/Analytics/db-toolkit/blob/main/db_toolkit/db_extract.py#LNone"
|
|
143
|
-
target="_blank" style="float:right; font-size:smaller">source</a>
|
|
144
|
-
|
|
145
|
-
### DBExtract
|
|
146
|
-
|
|
147
|
-
> DBExtract (cfg_fpath)
|
|
148
|
-
|
|
149
|
-
*Constructor for the DBExtract class. Reads parameters from the
|
|
150
|
-
specified configuration file Pairs SQL scrit to connection engine Pulls
|
|
151
|
-
and saves data to landing folder*
|
|
152
|
-
|
|
153
|
-
``` python
|
|
154
|
-
data_dir = Path("../data")
|
|
155
|
-
sql_dir = data_dir / 'sql'
|
|
156
|
-
sql_files = list(sql_dir.glob("*.sql"))
|
|
157
|
-
sql_files
|
|
158
|
-
```
|
|
159
|
-
|
|
160
|
-
[Path('../data/sql/BIODS__address_ctax.sql')]
|
|
161
|
-
|
|
162
|
-
## Process files
|
|
163
|
-
|
|
164
|
-
``` python
|
|
165
|
-
# files wil be saved at ../data/landing
|
|
166
|
-
# dbe.process_save_landing()
|
|
167
|
-
```
|
db_toolkit-0.0.2/README.md
DELETED
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
# db-toolkit
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
5
|
-
|
|
6
|
-
``` python
|
|
7
|
-
from db_toolkit.db_extract import *
|
|
8
|
-
from db_toolkit.db_connect import *
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
```
|
|
11
|
-
|
|
12
|
-
This file will become your README and also the index of your
|
|
13
|
-
documentation.
|
|
14
|
-
|
|
15
|
-
## Developer Guide
|
|
16
|
-
|
|
17
|
-
If you are new to using `nbdev` here are some useful pointers to get you
|
|
18
|
-
started.
|
|
19
|
-
|
|
20
|
-
### Install db_toolkit in Development mode
|
|
21
|
-
|
|
22
|
-
``` sh
|
|
23
|
-
# make sure db_toolkit package is installed in development mode
|
|
24
|
-
$ pip install -e .
|
|
25
|
-
|
|
26
|
-
# make changes under nbs/ directory
|
|
27
|
-
# ...
|
|
28
|
-
|
|
29
|
-
# compile to have changes apply to db_toolkit
|
|
30
|
-
$ nbdev_prepare
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
## Usage
|
|
34
|
-
|
|
35
|
-
### Installation
|
|
36
|
-
|
|
37
|
-
To use this package in another project, install as below
|
|
38
|
-
|
|
39
|
-
Install latest from the AzureDevOps
|
|
40
|
-
[repository](https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit):
|
|
41
|
-
|
|
42
|
-
``` sh
|
|
43
|
-
$ pip install git+https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
### Dependencies
|
|
47
|
-
|
|
48
|
-
The YAML file for this package dependencies can be found in the `envs\`
|
|
49
|
-
DIR. If using toolkit on another project, it is advisable to define
|
|
50
|
-
`db-toolkit` as the below format YAML.
|
|
51
|
-
|
|
52
|
-
``` yaml
|
|
53
|
-
name: er
|
|
54
|
-
channels:
|
|
55
|
-
- conda-forge
|
|
56
|
-
dependencies:
|
|
57
|
-
- jupyterlab
|
|
58
|
-
- pandas
|
|
59
|
-
- oracledb
|
|
60
|
-
- pyodbc
|
|
61
|
-
- pip
|
|
62
|
-
- pip:
|
|
63
|
-
- nbdev
|
|
64
|
-
- git+https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit
|
|
65
|
-
- sqlalchemy
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
### Documentation
|
|
69
|
-
|
|
70
|
-
TODO potentially when github is available.
|
|
71
|
-
|
|
72
|
-
## How to use
|
|
73
|
-
|
|
74
|
-
## Set ini file location path
|
|
75
|
-
|
|
76
|
-
- TODO init file struct details show example
|
|
77
|
-
|
|
78
|
-
``` python
|
|
79
|
-
fpath2 = '../../../data/db_connections/cfg_address.ini'
|
|
80
|
-
dbe = DBExtract(cfg_fpath=fpath2)
|
|
81
|
-
```
|
|
82
|
-
|
|
83
|
-
## SQL files
|
|
84
|
-
|
|
85
|
-
- Saved at ../data/sql
|
|
86
|
-
- TODO add details on path \<dbname\>\_\_\<tablename\>
|
|
87
|
-
|
|
88
|
-
------------------------------------------------------------------------
|
|
89
|
-
|
|
90
|
-
<a
|
|
91
|
-
href="https://github.com/Analytics/db-toolkit/blob/main/db_toolkit/db_connect.py#LNone"
|
|
92
|
-
target="_blank" style="float:right; font-size:smaller">source</a>
|
|
93
|
-
|
|
94
|
-
### DBConfig
|
|
95
|
-
|
|
96
|
-
> DBConfig (file_path)
|
|
97
|
-
|
|
98
|
-
*Constructor for the DBConfig class. Reads parameters from the specified
|
|
99
|
-
configuration file and presents them appropriately to the application.*
|
|
100
|
-
|
|
101
|
-
------------------------------------------------------------------------
|
|
102
|
-
|
|
103
|
-
<a
|
|
104
|
-
href="https://github.com/Analytics/db-toolkit/blob/main/db_toolkit/db_extract.py#LNone"
|
|
105
|
-
target="_blank" style="float:right; font-size:smaller">source</a>
|
|
106
|
-
|
|
107
|
-
### DBExtract
|
|
108
|
-
|
|
109
|
-
> DBExtract (cfg_fpath)
|
|
110
|
-
|
|
111
|
-
*Constructor for the DBExtract class. Reads parameters from the
|
|
112
|
-
specified configuration file Pairs SQL scrit to connection engine Pulls
|
|
113
|
-
and saves data to landing folder*
|
|
114
|
-
|
|
115
|
-
``` python
|
|
116
|
-
data_dir = Path("../data")
|
|
117
|
-
sql_dir = data_dir / 'sql'
|
|
118
|
-
sql_files = list(sql_dir.glob("*.sql"))
|
|
119
|
-
sql_files
|
|
120
|
-
```
|
|
121
|
-
|
|
122
|
-
[Path('../data/sql/BIODS__address_ctax.sql')]
|
|
123
|
-
|
|
124
|
-
## Process files
|
|
125
|
-
|
|
126
|
-
``` python
|
|
127
|
-
# files wil be saved at ../data/landing
|
|
128
|
-
# dbe.process_save_landing()
|
|
129
|
-
```
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.0.2"
|
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: db-toolkit
|
|
3
|
-
Version: 0.0.2
|
|
4
|
-
Summary: utility tools for db access and data extraction
|
|
5
|
-
Home-page: https://github.com/Analytics/db-toolkit
|
|
6
|
-
Author: frangs
|
|
7
|
-
Author-email: giordanofrancisco@duck.com
|
|
8
|
-
License: Apache Software License 2.0
|
|
9
|
-
Keywords: nbdev jupyter notebook python
|
|
10
|
-
Classifier: Development Status :: 4 - Beta
|
|
11
|
-
Classifier: Intended Audience :: Developers
|
|
12
|
-
Classifier: Natural Language :: English
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
18
|
-
Requires-Python: >=3.9
|
|
19
|
-
Description-Content-Type: text/markdown
|
|
20
|
-
License-File: LICENSE
|
|
21
|
-
Requires-Dist: fastcore
|
|
22
|
-
Requires-Dist: pandas
|
|
23
|
-
Requires-Dist: sqlalchemy
|
|
24
|
-
Provides-Extra: dev
|
|
25
|
-
Dynamic: author
|
|
26
|
-
Dynamic: author-email
|
|
27
|
-
Dynamic: classifier
|
|
28
|
-
Dynamic: description
|
|
29
|
-
Dynamic: description-content-type
|
|
30
|
-
Dynamic: home-page
|
|
31
|
-
Dynamic: keywords
|
|
32
|
-
Dynamic: license
|
|
33
|
-
Dynamic: license-file
|
|
34
|
-
Dynamic: provides-extra
|
|
35
|
-
Dynamic: requires-dist
|
|
36
|
-
Dynamic: requires-python
|
|
37
|
-
Dynamic: summary
|
|
38
|
-
|
|
39
|
-
# db-toolkit
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
43
|
-
|
|
44
|
-
``` python
|
|
45
|
-
from db_toolkit.db_extract import *
|
|
46
|
-
from db_toolkit.db_connect import *
|
|
47
|
-
from pathlib import Path
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
This file will become your README and also the index of your
|
|
51
|
-
documentation.
|
|
52
|
-
|
|
53
|
-
## Developer Guide
|
|
54
|
-
|
|
55
|
-
If you are new to using `nbdev` here are some useful pointers to get you
|
|
56
|
-
started.
|
|
57
|
-
|
|
58
|
-
### Install db_toolkit in Development mode
|
|
59
|
-
|
|
60
|
-
``` sh
|
|
61
|
-
# make sure db_toolkit package is installed in development mode
|
|
62
|
-
$ pip install -e .
|
|
63
|
-
|
|
64
|
-
# make changes under nbs/ directory
|
|
65
|
-
# ...
|
|
66
|
-
|
|
67
|
-
# compile to have changes apply to db_toolkit
|
|
68
|
-
$ nbdev_prepare
|
|
69
|
-
```
|
|
70
|
-
|
|
71
|
-
## Usage
|
|
72
|
-
|
|
73
|
-
### Installation
|
|
74
|
-
|
|
75
|
-
To use this package in another project, install as below
|
|
76
|
-
|
|
77
|
-
Install latest from the AzureDevOps
|
|
78
|
-
[repository](https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit):
|
|
79
|
-
|
|
80
|
-
``` sh
|
|
81
|
-
$ pip install git+https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit
|
|
82
|
-
```
|
|
83
|
-
|
|
84
|
-
### Dependencies
|
|
85
|
-
|
|
86
|
-
The YAML file for this package dependencies can be found in the `envs\`
|
|
87
|
-
DIR. If using toolkit on another project, it is advisable to define
|
|
88
|
-
`db-toolkit` as the below format YAML.
|
|
89
|
-
|
|
90
|
-
``` yaml
|
|
91
|
-
name: er
|
|
92
|
-
channels:
|
|
93
|
-
- conda-forge
|
|
94
|
-
dependencies:
|
|
95
|
-
- jupyterlab
|
|
96
|
-
- pandas
|
|
97
|
-
- oracledb
|
|
98
|
-
- pyodbc
|
|
99
|
-
- pip
|
|
100
|
-
- pip:
|
|
101
|
-
- nbdev
|
|
102
|
-
- git+https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit
|
|
103
|
-
- sqlalchemy
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
### Documentation
|
|
107
|
-
|
|
108
|
-
TODO potentially when github is available.
|
|
109
|
-
|
|
110
|
-
## How to use
|
|
111
|
-
|
|
112
|
-
## Set ini file location path
|
|
113
|
-
|
|
114
|
-
- TODO init file struct details show example
|
|
115
|
-
|
|
116
|
-
``` python
|
|
117
|
-
fpath2 = '../../../data/db_connections/cfg_address.ini'
|
|
118
|
-
dbe = DBExtract(cfg_fpath=fpath2)
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
## SQL files
|
|
122
|
-
|
|
123
|
-
- Saved at ../data/sql
|
|
124
|
-
- TODO add details on path \<dbname\>\_\_\<tablename\>
|
|
125
|
-
|
|
126
|
-
------------------------------------------------------------------------
|
|
127
|
-
|
|
128
|
-
<a
|
|
129
|
-
href="https://github.com/Analytics/db-toolkit/blob/main/db_toolkit/db_connect.py#LNone"
|
|
130
|
-
target="_blank" style="float:right; font-size:smaller">source</a>
|
|
131
|
-
|
|
132
|
-
### DBConfig
|
|
133
|
-
|
|
134
|
-
> DBConfig (file_path)
|
|
135
|
-
|
|
136
|
-
*Constructor for the DBConfig class. Reads parameters from the specified
|
|
137
|
-
configuration file and presents them appropriately to the application.*
|
|
138
|
-
|
|
139
|
-
------------------------------------------------------------------------
|
|
140
|
-
|
|
141
|
-
<a
|
|
142
|
-
href="https://github.com/Analytics/db-toolkit/blob/main/db_toolkit/db_extract.py#LNone"
|
|
143
|
-
target="_blank" style="float:right; font-size:smaller">source</a>
|
|
144
|
-
|
|
145
|
-
### DBExtract
|
|
146
|
-
|
|
147
|
-
> DBExtract (cfg_fpath)
|
|
148
|
-
|
|
149
|
-
*Constructor for the DBExtract class. Reads parameters from the
|
|
150
|
-
specified configuration file Pairs SQL scrit to connection engine Pulls
|
|
151
|
-
and saves data to landing folder*
|
|
152
|
-
|
|
153
|
-
``` python
|
|
154
|
-
data_dir = Path("../data")
|
|
155
|
-
sql_dir = data_dir / 'sql'
|
|
156
|
-
sql_files = list(sql_dir.glob("*.sql"))
|
|
157
|
-
sql_files
|
|
158
|
-
```
|
|
159
|
-
|
|
160
|
-
[Path('../data/sql/BIODS__address_ctax.sql')]
|
|
161
|
-
|
|
162
|
-
## Process files
|
|
163
|
-
|
|
164
|
-
``` python
|
|
165
|
-
# files wil be saved at ../data/landing
|
|
166
|
-
# dbe.process_save_landing()
|
|
167
|
-
```
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|