db-toolkit 0.0.2__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,112 @@
1
+ Metadata-Version: 2.4
2
+ Name: db-toolkit
3
+ Version: 0.0.4
4
+ Summary: utility tools for db access and data extraction
5
+ Home-page: https://github.com/Analytics/db-toolkit
6
+ Author: frangs
7
+ Author-email: giordanofrancisco@duck.com
8
+ License: Apache Software License 2.0
9
+ Keywords: nbdev jupyter notebook python
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Natural Language :: English
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: License :: OSI Approved :: Apache Software License
18
+ Requires-Python: >=3.9
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: fastcore
22
+ Requires-Dist: pandas
23
+ Requires-Dist: sqlalchemy
24
+ Provides-Extra: dev
25
+ Dynamic: author
26
+ Dynamic: author-email
27
+ Dynamic: classifier
28
+ Dynamic: description
29
+ Dynamic: description-content-type
30
+ Dynamic: home-page
31
+ Dynamic: keywords
32
+ Dynamic: license
33
+ Dynamic: license-file
34
+ Dynamic: provides-extra
35
+ Dynamic: requires-dist
36
+ Dynamic: requires-python
37
+ Dynamic: summary
38
+
39
+ # db-toolkit
40
+
41
+
42
+ <!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
43
+
44
+ ``` python
45
+ from db_toolkit.db_extract import *
46
+ from db_toolkit.db_connect import *
47
+ from pathlib import Path
48
+ ```
49
+
50
+ ## Developer Guide
51
+
52
+ ### Install db_toolkit in Development mode
53
+
54
+ ``` sh
55
+ # make sure db_toolkit package is installed in development mode
56
+ $ pip install -e .
57
+
58
+ # make changes under nbs/ directory
59
+ # ...
60
+
61
+ # compile to have changes apply to db_toolkit
62
+ $ nbdev_prepare
63
+ ```
64
+
65
+ ## Usage
66
+
67
+ ### Installation
68
+
69
+ To use this package in another project, install as below
70
+
71
+ Install latest from the AzureDevOps \[repository\]\[repo\]:
72
+
73
+ ``` sh
74
+ $ pip install db-toolkit
75
+ ```
76
+
77
+ ## How to use
78
+
79
+ ### 1. Set up your config file
80
+
81
+ Create an `.ini` file with your database connection details:
82
+
83
+ ``` ini
84
+ [CONN_ORC]
85
+ user:TEST_USER
86
+ pass:<your_password>
87
+ dsn:<...>
88
+ port:<...>
89
+ dbname:<...>
90
+
91
+ [CONN_MS]
92
+ server:<...>
93
+ dbname:<...>
94
+ ```
95
+
96
+ ### 2. Create your SQL file
97
+
98
+ Save your query in `data/sql/` with the naming convention
99
+ `<CONNECTION>__<tablename>.sql`:
100
+
101
+ data/sql/ODS__address_ctax.sql
102
+
103
+ ### 3. Pull the data
104
+
105
+ ``` python
106
+ from db_toolkit.db_extract import DBExtract
107
+
108
+ dbe = DBExtract(cfg_fpath='path/to/your/config.ini')
109
+ dbe.pull_table('BIODS__address_ctax')
110
+ ```
111
+
112
+ The resulting CSV will be saved to `data/landing/`.
@@ -0,0 +1,74 @@
1
+ # db-toolkit
2
+
3
+
4
+ <!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
5
+
6
+ ``` python
7
+ from db_toolkit.db_extract import *
8
+ from db_toolkit.db_connect import *
9
+ from pathlib import Path
10
+ ```
11
+
12
+ ## Developer Guide
13
+
14
+ ### Install db_toolkit in Development mode
15
+
16
+ ``` sh
17
+ # make sure db_toolkit package is installed in development mode
18
+ $ pip install -e .
19
+
20
+ # make changes under nbs/ directory
21
+ # ...
22
+
23
+ # compile to have changes apply to db_toolkit
24
+ $ nbdev_prepare
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ### Installation
30
+
31
+ To use this package in another project, install as below
32
+
33
+ Install latest from the AzureDevOps \[repository\]\[repo\]:
34
+
35
+ ``` sh
36
+ $ pip install db-toolkit
37
+ ```
38
+
39
+ ## How to use
40
+
41
+ ### 1. Set up your config file
42
+
43
+ Create an `.ini` file with your database connection details:
44
+
45
+ ``` ini
46
+ [CONN_ORC]
47
+ user:TEST_USER
48
+ pass:<your_password>
49
+ dsn:<...>
50
+ port:<...>
51
+ dbname:<...>
52
+
53
+ [CONN_MS]
54
+ server:<...>
55
+ dbname:<...>
56
+ ```
57
+
58
+ ### 2. Create your SQL file
59
+
60
+ Save your query in `data/sql/` with the naming convention
61
+ `<CONNECTION>__<tablename>.sql`:
62
+
63
+ data/sql/ODS__address_ctax.sql
64
+
65
+ ### 3. Pull the data
66
+
67
+ ``` python
68
+ from db_toolkit.db_extract import DBExtract
69
+
70
+ dbe = DBExtract(cfg_fpath='path/to/your/config.ini')
71
+ dbe.pull_table('BIODS__address_ctax')
72
+ ```
73
+
74
+ The resulting CSV will be saved to `data/landing/`.
@@ -0,0 +1 @@
1
+ __version__ = "0.0.4"
@@ -34,12 +34,14 @@ d = { 'settings': { 'branch': 'main',
34
34
  'db_toolkit/db_extract.py'),
35
35
  'db_toolkit.db_extract.DBExtract._get_sql': ( 'db_extract.html#dbextract._get_sql',
36
36
  'db_toolkit/db_extract.py'),
37
- 'db_toolkit.db_extract.DBExtract._process_all_sql': ( 'db_extract.html#dbextract._process_all_sql',
38
- 'db_toolkit/db_extract.py'),
39
- 'db_toolkit.db_extract.DBExtract._save_data_landing': ( 'db_extract.html#dbextract._save_data_landing',
37
+ 'db_toolkit.db_extract.DBExtract.pull_table': ( 'db_extract.html#dbextract.pull_table',
38
+ 'db_toolkit/db_extract.py'),
39
+ 'db_toolkit.db_extract.DBExtract.pull_tables': ( 'db_extract.html#dbextract.pull_tables',
40
+ 'db_toolkit/db_extract.py'),
41
+ 'db_toolkit.db_extract.DBExtract.pull_with_manifest': ( 'db_extract.html#dbextract.pull_with_manifest',
40
42
  'db_toolkit/db_extract.py'),
41
- 'db_toolkit.db_extract.DBExtract.process_save_landing': ( 'db_extract.html#dbextract.process_save_landing',
42
- 'db_toolkit/db_extract.py'),
43
+ 'db_toolkit.db_extract.DBExtract.sample': ( 'db_extract.html#dbextract.sample',
44
+ 'db_toolkit/db_extract.py'),
43
45
  'db_toolkit.db_extract.PullManifest': ('db_extract.html#pullmanifest', 'db_toolkit/db_extract.py'),
44
46
  'db_toolkit.db_extract.PullManifest.__init__': ( 'db_extract.html#pullmanifest.__init__',
45
47
  'db_toolkit/db_extract.py'),
@@ -6,27 +6,65 @@ from pathlib import Path
6
6
  import json, time
7
7
  from datetime import datetime, timedelta
8
8
  from fastcore.basics import patch
9
+ from concurrent.futures import ThreadPoolExecutor
9
10
  import pandas as pd
10
11
  from .db_connect import *
11
12
 
12
13
 
13
14
  # %% auto #0
14
- __all__ = ['DBExtract', 'PullManifest']
15
+ __all__ = ['PullManifest', 'DBExtract']
16
+
17
+ # %% ../nbs/02_db_extract.ipynb #bf53f340
18
+ class PullManifest:
19
+ def __init__(self, manifest_path, pull_interval_days=14):
20
+ self.path = Path(manifest_path)
21
+ self.pull_interval_days = pull_interval_days
22
+ self._load()
23
+
24
+ def _load(self):
25
+ """Load manifest from disk, or create empty one"""
26
+ if self.path.exists(): self.data = json.loads(self.path.read_text())
27
+ else: self.data = {"pulls": {}}
28
+
29
+ def _save(self):
30
+ """Persist manifest to disk"""
31
+ self.path.write_text(json.dumps(self.data, indent=2))
32
+
33
+ def should_pull(self, table_name):
34
+ """Check if table needs refresh based on TTL"""
35
+ if table_name not in self.data["pulls"]: return True
36
+ last = datetime.fromisoformat(self.data["pulls"][table_name]["last_pull"])
37
+ return datetime.now() - last > timedelta(days=self.pull_interval_days)
38
+
39
+ def record_pull(self, table_name, row_count, status="success"):
40
+ """Record a completed pull"""
41
+ self.data["pulls"][table_name] = {
42
+ "last_pull": datetime.now().isoformat(),
43
+ "row_count": row_count,
44
+ "status": status
45
+
46
+ }
47
+ self._save()
48
+
15
49
 
16
50
  # %% ../nbs/02_db_extract.ipynb #4ebf670b-04ae-4d44-9eac-aa4caf54be99
17
51
  class DBExtract:
18
- def __init__(self, cfg_fpath):
52
+ def __init__(self, cfg_fpath, manifest=None):
19
53
  """
20
54
  Constructor for the DBExtract class.
21
55
  Reads parameters from the specified configuration file
22
- Pairs SQL scrit to connection engine
56
+ Pairs SQL script to connection engine
23
57
  Pulls and saves data to landing folder
58
+
59
+ SQL files must follow naming convention: <DB_NAME>__<TABLE_NAME>.sql
60
+ where DB_NAME matches a key in the config connections.
24
61
  """
25
62
  DBConfig.file_exists(cfg_fpath)
26
63
  self.data_dir = Path("../data")
27
64
  self.sql_dir = self.data_dir / 'sql'
28
65
  self.cfg = DBConfig(cfg_fpath)
29
66
  self.cfg.set_conn()
67
+ self.manifest = manifest
30
68
 
31
69
  def _get_pairs(self):
32
70
  """Using SQL files name convention <CONN>__<DF NAME>, create list collection type (df name, connection engine, sql script)"""
@@ -54,53 +92,43 @@ class DBExtract:
54
92
  retries += 1
55
93
  wait_time = 2**retries
56
94
  time.sleep(wait_time)
57
- return self._get_dataframe(stream, retries=retries)
95
+ return self._get_dataframe(conn, sql, chunksize, retries=retries)
58
96
  return pd.concat([df.dropna(how='all', axis=1) for df in dataframes], ignore_index=True)
59
-
60
- def _process_all_sql(self, chunksize=10_000):
61
- self._get_pairs()
62
- self.dataframes = {}
63
- for name, conn, sql in self.pairs: self.dataframes[name] = self._get_dataframe(conn, sql, chunksize=chunksize)
64
-
65
- def _save_data_landing(self):
66
- for key, df in self.dataframes.items(): df.to_csv(self.data_dir/f"landing/{key}.csv", index=False)
67
-
68
- def process_save_landing(self, chunksize=10_000):
69
- """pull and saves all db dataframes as csv to data/landing"""
70
- self._process_all_sql(chunksize=chunksize)
71
- self._save_data_landing()
72
97
 
73
98
 
74
99
 
75
- # %% ../nbs/02_db_extract.ipynb #b6b66498-f021-4fe5-9706-43f27bad1fdb
76
- class PullManifest:
77
- def __init__(self, manifest_path, pull_interval_days=14):
78
- self.path = Path(manifest_path)
79
- self.pull_interval_days = pull_interval_days
80
- self._load()
81
-
82
- def _load(self):
83
- """Load manifest from disk, or create empty one"""
84
- if self.path.exists(): self.data = json.loads(self.path.read_text())
85
- else: self.data = {"pulls": {}}
86
-
87
- def _save(self):
88
- """Persist manifest to disk"""
89
- self.path.write_text(json.dumps(self.data, indent=2))
90
-
91
- def should_pull(self, table_name):
92
- """Check if table needs refresh based on TTL"""
93
- if table_name not in self.data["pulls"]: return True
94
- last = datetime.fromisoformat(self.data["pulls"][table_name]["last_pull"])
95
- return datetime.now() - last > timedelta(days=self.pull_interval_days)
96
-
97
- def record_pull(self, table_name, row_count, status="success"):
98
- """Record a completed pull"""
99
- self.data["pulls"][table_name] = {
100
- "last_pull": datetime.now().isoformat(),
101
- "row_count": row_count,
102
- "status": status
100
+ # %% ../nbs/02_db_extract.ipynb #bc75d61c-94cb-4322-a1bf-d3be6d73110f
101
+ @patch
102
+ def sample(self: DBExtract, name, n=100):
103
+ """Pull first n rows from a table for testing"""
104
+ self._get_pairs()
105
+ _, conn, sql = next((p for p in self.pairs if p[0] == name))
106
+ return next(self._get_data_stream(conn, sql, chunksize=n))
103
107
 
104
- }
105
- self._save()
108
+ # %% ../nbs/02_db_extract.ipynb #e2934306
109
+ @patch
110
+ def pull_table(self:DBExtract, name, chunksize=10_000):
111
+ """Pull full table from a given name save it to landing"""
112
+ self._get_pairs()
113
+ _, conn, sql = next((p for p in self.pairs if p[0] == name))
114
+ df = self._get_dataframe(conn, sql, chunksize=chunksize)
115
+ df.to_csv(self.data_dir/f"landing/{name}.csv", index=False)
116
+ if self.manifest: self.manifest.record_pull(name, len(df))
117
+ return df
118
+
119
+ # %% ../nbs/02_db_extract.ipynb #44b8f7be
120
+ @patch
121
+ def pull_tables(self:DBExtract, names, chunksize=10_000, max_workers=4):
122
+ """Pull multiple tables in parallel"""
123
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
124
+ results = list(executor.map(lambda name: self.pull_table(name, chunksize), names))
125
+ return results
106
126
 
127
+ # %% ../nbs/02_db_extract.ipynb #112fff76
128
+ @patch
129
+ def pull_with_manifest(self:DBExtract, chunksize=10_000, max_workers=4):
130
+ """Pull full table from a given collection of table names"""
131
+ if self.manifest is None: return # No manifest, skip pulling
132
+ self._get_pairs()
133
+ tables_to_pull = [name for name, _, _ in self.pairs if self.manifest.should_pull(name)]
134
+ if tables_to_pull: self.pull_tables(tables_to_pull, chunksize, max_workers)
@@ -0,0 +1,112 @@
1
+ Metadata-Version: 2.4
2
+ Name: db-toolkit
3
+ Version: 0.0.4
4
+ Summary: utility tools for db access and data extraction
5
+ Home-page: https://github.com/Analytics/db-toolkit
6
+ Author: frangs
7
+ Author-email: giordanofrancisco@duck.com
8
+ License: Apache Software License 2.0
9
+ Keywords: nbdev jupyter notebook python
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Natural Language :: English
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: License :: OSI Approved :: Apache Software License
18
+ Requires-Python: >=3.9
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: fastcore
22
+ Requires-Dist: pandas
23
+ Requires-Dist: sqlalchemy
24
+ Provides-Extra: dev
25
+ Dynamic: author
26
+ Dynamic: author-email
27
+ Dynamic: classifier
28
+ Dynamic: description
29
+ Dynamic: description-content-type
30
+ Dynamic: home-page
31
+ Dynamic: keywords
32
+ Dynamic: license
33
+ Dynamic: license-file
34
+ Dynamic: provides-extra
35
+ Dynamic: requires-dist
36
+ Dynamic: requires-python
37
+ Dynamic: summary
38
+
39
+ # db-toolkit
40
+
41
+
42
+ <!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
43
+
44
+ ``` python
45
+ from db_toolkit.db_extract import *
46
+ from db_toolkit.db_connect import *
47
+ from pathlib import Path
48
+ ```
49
+
50
+ ## Developer Guide
51
+
52
+ ### Install db_toolkit in Development mode
53
+
54
+ ``` sh
55
+ # make sure db_toolkit package is installed in development mode
56
+ $ pip install -e .
57
+
58
+ # make changes under nbs/ directory
59
+ # ...
60
+
61
+ # compile to have changes apply to db_toolkit
62
+ $ nbdev_prepare
63
+ ```
64
+
65
+ ## Usage
66
+
67
+ ### Installation
68
+
69
+ To use this package in another project, install as below
70
+
71
+ Install latest from the AzureDevOps \[repository\]\[repo\]:
72
+
73
+ ``` sh
74
+ $ pip install db-toolkit
75
+ ```
76
+
77
+ ## How to use
78
+
79
+ ### 1. Set up your config file
80
+
81
+ Create an `.ini` file with your database connection details:
82
+
83
+ ``` ini
84
+ [CONN_ORC]
85
+ user:TEST_USER
86
+ pass:<your_password>
87
+ dsn:<...>
88
+ port:<...>
89
+ dbname:<...>
90
+
91
+ [CONN_MS]
92
+ server:<...>
93
+ dbname:<...>
94
+ ```
95
+
96
+ ### 2. Create your SQL file
97
+
98
+ Save your query in `data/sql/` with the naming convention
99
+ `<CONNECTION>__<tablename>.sql`:
100
+
101
+ data/sql/ODS__address_ctax.sql
102
+
103
+ ### 3. Pull the data
104
+
105
+ ``` python
106
+ from db_toolkit.db_extract import DBExtract
107
+
108
+ dbe = DBExtract(cfg_fpath='path/to/your/config.ini')
109
+ dbe.pull_table('BIODS__address_ctax')
110
+ ```
111
+
112
+ The resulting CSV will be saved to `data/landing/`.
@@ -1,16 +1,10 @@
1
1
  [DEFAULT]
2
- # All sections below are required unless otherwise specified.
3
- # See https://github.com/AnswerDotAI/nbdev/blob/main/settings.ini for examples.
4
-
5
- ### Python library ###
6
2
  repo = db-toolkit
7
- lib_name = %(repo)s
8
- version = 0.0.2
3
+ lib_name = db-toolkit
4
+ version = 0.0.4
9
5
  min_python = 3.9
10
6
  license = apache2
11
7
  black_formatting = False
12
-
13
- ### nbdev ###
14
8
  doc_path = _docs
15
9
  lib_path = db_toolkit
16
10
  nbs_path = nbs
@@ -18,29 +12,27 @@ recursive = True
18
12
  tst_flags = notest
19
13
  put_version_in_init = True
20
14
  update_pyproject = True
21
-
22
- ### Docs ###
23
15
  branch = main
24
16
  custom_sidebar = False
25
- doc_host = https://%(user)s.github.io
26
- doc_baseurl = /%(repo)s
27
- git_url = https://github.com/%(user)s/%(repo)s
28
- title = %(lib_name)s
29
-
30
- ### PyPI ###
17
+ doc_host = https://Analytics.github.io
18
+ doc_baseurl = /db-toolkit
19
+ git_url = https://github.com/Analytics/db-toolkit
20
+ title = db-toolkit
31
21
  audience = Developers
32
22
  author = frangs
33
23
  author_email = giordanofrancisco@duck.com
34
- copyright = 2025 onwards, %(author)s
24
+ copyright = 2025 onwards, frangs
35
25
  description = utility tools for db access and data extraction
36
26
  keywords = nbdev jupyter notebook python
37
27
  language = English
38
28
  status = 3
39
29
  user = Analytics
40
-
41
- ### Optional ###
42
30
  requirements = fastcore pandas sqlalchemy
43
- # dev_requirements =
44
- # console_scripts =
45
- # conda_user =
46
- # package_data =
31
+ readme_nb = index.ipynb
32
+ allowed_metadata_keys =
33
+ allowed_cell_metadata_keys =
34
+ jupyter_hooks = False
35
+ clean_ids = True
36
+ clear_all = False
37
+ skip_procs =
38
+
db_toolkit-0.0.2/PKG-INFO DELETED
@@ -1,167 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: db-toolkit
3
- Version: 0.0.2
4
- Summary: utility tools for db access and data extraction
5
- Home-page: https://github.com/Analytics/db-toolkit
6
- Author: frangs
7
- Author-email: giordanofrancisco@duck.com
8
- License: Apache Software License 2.0
9
- Keywords: nbdev jupyter notebook python
10
- Classifier: Development Status :: 4 - Beta
11
- Classifier: Intended Audience :: Developers
12
- Classifier: Natural Language :: English
13
- Classifier: Programming Language :: Python :: 3.9
14
- Classifier: Programming Language :: Python :: 3.10
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: License :: OSI Approved :: Apache Software License
18
- Requires-Python: >=3.9
19
- Description-Content-Type: text/markdown
20
- License-File: LICENSE
21
- Requires-Dist: fastcore
22
- Requires-Dist: pandas
23
- Requires-Dist: sqlalchemy
24
- Provides-Extra: dev
25
- Dynamic: author
26
- Dynamic: author-email
27
- Dynamic: classifier
28
- Dynamic: description
29
- Dynamic: description-content-type
30
- Dynamic: home-page
31
- Dynamic: keywords
32
- Dynamic: license
33
- Dynamic: license-file
34
- Dynamic: provides-extra
35
- Dynamic: requires-dist
36
- Dynamic: requires-python
37
- Dynamic: summary
38
-
39
- # db-toolkit
40
-
41
-
42
- <!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
43
-
44
- ``` python
45
- from db_toolkit.db_extract import *
46
- from db_toolkit.db_connect import *
47
- from pathlib import Path
48
- ```
49
-
50
- This file will become your README and also the index of your
51
- documentation.
52
-
53
- ## Developer Guide
54
-
55
- If you are new to using `nbdev` here are some useful pointers to get you
56
- started.
57
-
58
- ### Install db_toolkit in Development mode
59
-
60
- ``` sh
61
- # make sure db_toolkit package is installed in development mode
62
- $ pip install -e .
63
-
64
- # make changes under nbs/ directory
65
- # ...
66
-
67
- # compile to have changes apply to db_toolkit
68
- $ nbdev_prepare
69
- ```
70
-
71
- ## Usage
72
-
73
- ### Installation
74
-
75
- To use this package in another project, install as below
76
-
77
- Install latest from the AzureDevOps
78
- [repository](https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit):
79
-
80
- ``` sh
81
- $ pip install git+https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit
82
- ```
83
-
84
- ### Dependencies
85
-
86
- The YAML file for this package dependencies can be found in the `envs\`
87
- DIR. If using toolkit on another project, it is advisable to define
88
- `db-toolkit` as the below format YAML.
89
-
90
- ``` yaml
91
- name: er
92
- channels:
93
- - conda-forge
94
- dependencies:
95
- - jupyterlab
96
- - pandas
97
- - oracledb
98
- - pyodbc
99
- - pip
100
- - pip:
101
- - nbdev
102
- - git+https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit
103
- - sqlalchemy
104
- ```
105
-
106
- ### Documentation
107
-
108
- TODO potentially when github is available.
109
-
110
- ## How to use
111
-
112
- ## Set ini file location path
113
-
114
- - TODO init file struct details show example
115
-
116
- ``` python
117
- fpath2 = '../../../data/db_connections/cfg_address.ini'
118
- dbe = DBExtract(cfg_fpath=fpath2)
119
- ```
120
-
121
- ## SQL files
122
-
123
- - Saved at ../data/sql
124
- - TODO add details on path \<dbname\>\_\_\<tablename\>
125
-
126
- ------------------------------------------------------------------------
127
-
128
- <a
129
- href="https://github.com/Analytics/db-toolkit/blob/main/db_toolkit/db_connect.py#LNone"
130
- target="_blank" style="float:right; font-size:smaller">source</a>
131
-
132
- ### DBConfig
133
-
134
- > DBConfig (file_path)
135
-
136
- *Constructor for the DBConfig class. Reads parameters from the specified
137
- configuration file and presents them appropriately to the application.*
138
-
139
- ------------------------------------------------------------------------
140
-
141
- <a
142
- href="https://github.com/Analytics/db-toolkit/blob/main/db_toolkit/db_extract.py#LNone"
143
- target="_blank" style="float:right; font-size:smaller">source</a>
144
-
145
- ### DBExtract
146
-
147
- > DBExtract (cfg_fpath)
148
-
149
- *Constructor for the DBExtract class. Reads parameters from the
150
- specified configuration file Pairs SQL scrit to connection engine Pulls
151
- and saves data to landing folder*
152
-
153
- ``` python
154
- data_dir = Path("../data")
155
- sql_dir = data_dir / 'sql'
156
- sql_files = list(sql_dir.glob("*.sql"))
157
- sql_files
158
- ```
159
-
160
- [Path('../data/sql/BIODS__address_ctax.sql')]
161
-
162
- ## Process files
163
-
164
- ``` python
165
- # files wil be saved at ../data/landing
166
- # dbe.process_save_landing()
167
- ```
@@ -1,129 +0,0 @@
1
- # db-toolkit
2
-
3
-
4
- <!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
5
-
6
- ``` python
7
- from db_toolkit.db_extract import *
8
- from db_toolkit.db_connect import *
9
- from pathlib import Path
10
- ```
11
-
12
- This file will become your README and also the index of your
13
- documentation.
14
-
15
- ## Developer Guide
16
-
17
- If you are new to using `nbdev` here are some useful pointers to get you
18
- started.
19
-
20
- ### Install db_toolkit in Development mode
21
-
22
- ``` sh
23
- # make sure db_toolkit package is installed in development mode
24
- $ pip install -e .
25
-
26
- # make changes under nbs/ directory
27
- # ...
28
-
29
- # compile to have changes apply to db_toolkit
30
- $ nbdev_prepare
31
- ```
32
-
33
- ## Usage
34
-
35
- ### Installation
36
-
37
- To use this package in another project, install as below
38
-
39
- Install latest from the AzureDevOps
40
- [repository](https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit):
41
-
42
- ``` sh
43
- $ pip install git+https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit
44
- ```
45
-
46
- ### Dependencies
47
-
48
- The YAML file for this package dependencies can be found in the `envs\`
49
- DIR. If using toolkit on another project, it is advisable to define
50
- `db-toolkit` as the below format YAML.
51
-
52
- ``` yaml
53
- name: er
54
- channels:
55
- - conda-forge
56
- dependencies:
57
- - jupyterlab
58
- - pandas
59
- - oracledb
60
- - pyodbc
61
- - pip
62
- - pip:
63
- - nbdev
64
- - git+https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit
65
- - sqlalchemy
66
- ```
67
-
68
- ### Documentation
69
-
70
- TODO potentially when github is available.
71
-
72
- ## How to use
73
-
74
- ## Set ini file location path
75
-
76
- - TODO init file struct details show example
77
-
78
- ``` python
79
- fpath2 = '../../../data/db_connections/cfg_address.ini'
80
- dbe = DBExtract(cfg_fpath=fpath2)
81
- ```
82
-
83
- ## SQL files
84
-
85
- - Saved at ../data/sql
86
- - TODO add details on path \<dbname\>\_\_\<tablename\>
87
-
88
- ------------------------------------------------------------------------
89
-
90
- <a
91
- href="https://github.com/Analytics/db-toolkit/blob/main/db_toolkit/db_connect.py#LNone"
92
- target="_blank" style="float:right; font-size:smaller">source</a>
93
-
94
- ### DBConfig
95
-
96
- > DBConfig (file_path)
97
-
98
- *Constructor for the DBConfig class. Reads parameters from the specified
99
- configuration file and presents them appropriately to the application.*
100
-
101
- ------------------------------------------------------------------------
102
-
103
- <a
104
- href="https://github.com/Analytics/db-toolkit/blob/main/db_toolkit/db_extract.py#LNone"
105
- target="_blank" style="float:right; font-size:smaller">source</a>
106
-
107
- ### DBExtract
108
-
109
- > DBExtract (cfg_fpath)
110
-
111
- *Constructor for the DBExtract class. Reads parameters from the
112
- specified configuration file Pairs SQL scrit to connection engine Pulls
113
- and saves data to landing folder*
114
-
115
- ``` python
116
- data_dir = Path("../data")
117
- sql_dir = data_dir / 'sql'
118
- sql_files = list(sql_dir.glob("*.sql"))
119
- sql_files
120
- ```
121
-
122
- [Path('../data/sql/BIODS__address_ctax.sql')]
123
-
124
- ## Process files
125
-
126
- ``` python
127
- # files wil be saved at ../data/landing
128
- # dbe.process_save_landing()
129
- ```
@@ -1 +0,0 @@
1
- __version__ = "0.0.2"
@@ -1,167 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: db-toolkit
3
- Version: 0.0.2
4
- Summary: utility tools for db access and data extraction
5
- Home-page: https://github.com/Analytics/db-toolkit
6
- Author: frangs
7
- Author-email: giordanofrancisco@duck.com
8
- License: Apache Software License 2.0
9
- Keywords: nbdev jupyter notebook python
10
- Classifier: Development Status :: 4 - Beta
11
- Classifier: Intended Audience :: Developers
12
- Classifier: Natural Language :: English
13
- Classifier: Programming Language :: Python :: 3.9
14
- Classifier: Programming Language :: Python :: 3.10
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: License :: OSI Approved :: Apache Software License
18
- Requires-Python: >=3.9
19
- Description-Content-Type: text/markdown
20
- License-File: LICENSE
21
- Requires-Dist: fastcore
22
- Requires-Dist: pandas
23
- Requires-Dist: sqlalchemy
24
- Provides-Extra: dev
25
- Dynamic: author
26
- Dynamic: author-email
27
- Dynamic: classifier
28
- Dynamic: description
29
- Dynamic: description-content-type
30
- Dynamic: home-page
31
- Dynamic: keywords
32
- Dynamic: license
33
- Dynamic: license-file
34
- Dynamic: provides-extra
35
- Dynamic: requires-dist
36
- Dynamic: requires-python
37
- Dynamic: summary
38
-
39
- # db-toolkit
40
-
41
-
42
- <!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
43
-
44
- ``` python
45
- from db_toolkit.db_extract import *
46
- from db_toolkit.db_connect import *
47
- from pathlib import Path
48
- ```
49
-
50
- This file will become your README and also the index of your
51
- documentation.
52
-
53
- ## Developer Guide
54
-
55
- If you are new to using `nbdev` here are some useful pointers to get you
56
- started.
57
-
58
- ### Install db_toolkit in Development mode
59
-
60
- ``` sh
61
- # make sure db_toolkit package is installed in development mode
62
- $ pip install -e .
63
-
64
- # make changes under nbs/ directory
65
- # ...
66
-
67
- # compile to have changes apply to db_toolkit
68
- $ nbdev_prepare
69
- ```
70
-
71
- ## Usage
72
-
73
- ### Installation
74
-
75
- To use this package in another project, install as below
76
-
77
- Install latest from the AzureDevOps
78
- [repository](https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit):
79
-
80
- ``` sh
81
- $ pip install git+https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit
82
- ```
83
-
84
- ### Dependencies
85
-
86
- The YAML file for this package dependencies can be found in the `envs\`
87
- DIR. If using toolkit on another project, it is advisable to define
88
- `db-toolkit` as the below format YAML.
89
-
90
- ``` yaml
91
- name: er
92
- channels:
93
- - conda-forge
94
- dependencies:
95
- - jupyterlab
96
- - pandas
97
- - oracledb
98
- - pyodbc
99
- - pip
100
- - pip:
101
- - nbdev
102
- - git+https://lambeth@dev.azure.com/lambeth/Analytics/_git/db-toolkit
103
- - sqlalchemy
104
- ```
105
-
106
- ### Documentation
107
-
108
- TODO potentially when github is available.
109
-
110
- ## How to use
111
-
112
- ## Set ini file location path
113
-
114
- - TODO init file struct details show example
115
-
116
- ``` python
117
- fpath2 = '../../../data/db_connections/cfg_address.ini'
118
- dbe = DBExtract(cfg_fpath=fpath2)
119
- ```
120
-
121
- ## SQL files
122
-
123
- - Saved at ../data/sql
124
- - TODO add details on path \<dbname\>\_\_\<tablename\>
125
-
126
- ------------------------------------------------------------------------
127
-
128
- <a
129
- href="https://github.com/Analytics/db-toolkit/blob/main/db_toolkit/db_connect.py#LNone"
130
- target="_blank" style="float:right; font-size:smaller">source</a>
131
-
132
- ### DBConfig
133
-
134
- > DBConfig (file_path)
135
-
136
- *Constructor for the DBConfig class. Reads parameters from the specified
137
- configuration file and presents them appropriately to the application.*
138
-
139
- ------------------------------------------------------------------------
140
-
141
- <a
142
- href="https://github.com/Analytics/db-toolkit/blob/main/db_toolkit/db_extract.py#LNone"
143
- target="_blank" style="float:right; font-size:smaller">source</a>
144
-
145
- ### DBExtract
146
-
147
- > DBExtract (cfg_fpath)
148
-
149
- *Constructor for the DBExtract class. Reads parameters from the
150
- specified configuration file Pairs SQL scrit to connection engine Pulls
151
- and saves data to landing folder*
152
-
153
- ``` python
154
- data_dir = Path("../data")
155
- sql_dir = data_dir / 'sql'
156
- sql_files = list(sql_dir.glob("*.sql"))
157
- sql_files
158
- ```
159
-
160
- [Path('../data/sql/BIODS__address_ctax.sql')]
161
-
162
- ## Process files
163
-
164
- ``` python
165
- # files wil be saved at ../data/landing
166
- # dbe.process_save_landing()
167
- ```
File without changes
File without changes
File without changes
File without changes
File without changes