netrias_client 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of netrias_client might be problematic. Click here for more details.
- netrias_client-0.0.1/.gitignore +207 -0
- netrias_client-0.0.1/LICENSE +21 -0
- netrias_client-0.0.1/PKG-INFO +222 -0
- netrias_client-0.0.1/README.md +169 -0
- netrias_client-0.0.1/pyproject.toml +132 -0
- netrias_client-0.0.1/src/netrias_client/__init__.py +9 -0
- netrias_client-0.0.1/src/netrias_client/_adapter.py +288 -0
- netrias_client-0.0.1/src/netrias_client/_client.py +251 -0
- netrias_client-0.0.1/src/netrias_client/_config.py +95 -0
- netrias_client-0.0.1/src/netrias_client/_core.py +560 -0
- netrias_client-0.0.1/src/netrias_client/_discovery.py +437 -0
- netrias_client-0.0.1/src/netrias_client/_errors.py +33 -0
- netrias_client-0.0.1/src/netrias_client/_gateway_bypass.py +208 -0
- netrias_client-0.0.1/src/netrias_client/_http.py +126 -0
- netrias_client-0.0.1/src/netrias_client/_io.py +28 -0
- netrias_client-0.0.1/src/netrias_client/_logging.py +46 -0
- netrias_client-0.0.1/src/netrias_client/_models.py +72 -0
- netrias_client-0.0.1/src/netrias_client/_validators.py +173 -0
- netrias_client-0.0.1/src/netrias_client/scripts.py +313 -0
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
#poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
#pdm.lock
|
|
116
|
+
#pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
#pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# SageMath parsed files
|
|
135
|
+
*.sage.py
|
|
136
|
+
|
|
137
|
+
# Environments
|
|
138
|
+
.env
|
|
139
|
+
.envrc
|
|
140
|
+
.venv
|
|
141
|
+
env/
|
|
142
|
+
venv/
|
|
143
|
+
ENV/
|
|
144
|
+
env.bak/
|
|
145
|
+
venv.bak/
|
|
146
|
+
|
|
147
|
+
# Spyder project settings
|
|
148
|
+
.spyderproject
|
|
149
|
+
.spyproject
|
|
150
|
+
|
|
151
|
+
# Rope project settings
|
|
152
|
+
.ropeproject
|
|
153
|
+
|
|
154
|
+
# mkdocs documentation
|
|
155
|
+
/site
|
|
156
|
+
|
|
157
|
+
# mypy
|
|
158
|
+
.mypy_cache/
|
|
159
|
+
.dmypy.json
|
|
160
|
+
dmypy.json
|
|
161
|
+
|
|
162
|
+
# Pyre type checker
|
|
163
|
+
.pyre/
|
|
164
|
+
|
|
165
|
+
# pytype static type analyzer
|
|
166
|
+
.pytype/
|
|
167
|
+
|
|
168
|
+
# Cython debug symbols
|
|
169
|
+
cython_debug/
|
|
170
|
+
|
|
171
|
+
# PyCharm
|
|
172
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
173
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
174
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
175
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
176
|
+
#.idea/
|
|
177
|
+
|
|
178
|
+
# Abstra
|
|
179
|
+
# Abstra is an AI-powered process automation framework.
|
|
180
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
181
|
+
# Learn more at https://abstra.io/docs
|
|
182
|
+
.abstra/
|
|
183
|
+
|
|
184
|
+
# Visual Studio Code
|
|
185
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
186
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
187
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
188
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
189
|
+
# .vscode/
|
|
190
|
+
|
|
191
|
+
# Ruff stuff:
|
|
192
|
+
.ruff_cache/
|
|
193
|
+
|
|
194
|
+
# PyPI configuration file
|
|
195
|
+
.pypirc
|
|
196
|
+
|
|
197
|
+
# Cursor
|
|
198
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
199
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
200
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
201
|
+
.cursorignore
|
|
202
|
+
.cursorindexingignore
|
|
203
|
+
|
|
204
|
+
# Marimo
|
|
205
|
+
marimo/_static/
|
|
206
|
+
marimo/_lsp/
|
|
207
|
+
__marimo__/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Netrias
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: netrias_client
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Python client for the Netrias harmonization API
|
|
5
|
+
Project-URL: Homepage, https://github.com/netrias/netrias_client
|
|
6
|
+
Project-URL: Repository, https://github.com/netrias/netrias_client
|
|
7
|
+
Project-URL: Documentation, https://github.com/netrias/netrias_client#readme
|
|
8
|
+
Author-email: Chris Harman <charman@netrias.com>
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2025 Netrias
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Keywords: api,cde,client,harmonization,netrias
|
|
32
|
+
Classifier: Intended Audience :: Developers
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Operating System :: OS Independent
|
|
35
|
+
Classifier: Programming Language :: Python :: 3
|
|
36
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
40
|
+
Requires-Python: >=3.10
|
|
41
|
+
Requires-Dist: boto3
|
|
42
|
+
Requires-Dist: httpx
|
|
43
|
+
Provides-Extra: dev
|
|
44
|
+
Requires-Dist: basedpyright; extra == 'dev'
|
|
45
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
46
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
47
|
+
Requires-Dist: python-dotenv>=1.0; extra == 'dev'
|
|
48
|
+
Requires-Dist: ruff>=0.5.0; extra == 'dev'
|
|
49
|
+
Requires-Dist: twine>=5.0; extra == 'dev'
|
|
50
|
+
Requires-Dist: ty; extra == 'dev'
|
|
51
|
+
Requires-Dist: typing-extensions; extra == 'dev'
|
|
52
|
+
Description-Content-Type: text/markdown
|
|
53
|
+
|
|
54
|
+
# Netrias Client
|
|
55
|
+
|
|
56
|
+
Python toolkit for working with the Netrias recommendation and harmonization services. The client wraps the HTTP APIs with strong typing, logging, and guard rails so analytics code can focus on describing data rather than orchestrating requests.
|
|
57
|
+
|
|
58
|
+
## Highlights
|
|
59
|
+
- **Stateful client facade** – instantiate `NetriasClient` and call `client.configure(...)` once.
|
|
60
|
+
- **Column discovery helpers** – derive column samples from CSV files, invoke the recommendation service, and normalize responses into `MappingDiscoveryResult` models.
|
|
61
|
+
- **Adapter utilities** – convert discovery output into harmonization-ready manifest payloads while applying confidence filters and CDE overrides.
|
|
62
|
+
- **Asynchronous harmonization loop** – submit jobs, poll for completion, download results, and version output files automatically to avoid accidental overwrites.
|
|
63
|
+
- **Extended timing logs** – discovery and harmonization emit duration metrics so you can spot slow calls quickly during live runs.
|
|
64
|
+
|
|
65
|
+
## Installation
|
|
66
|
+
|
|
67
|
+
The project targets Python 3.12+.
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install netrias_client
|
|
71
|
+
|
|
72
|
+
# optional AWS helpers (gateway bypass)
|
|
73
|
+
pip install netrias_client[aws]
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
We recommend managing environments with [uv](https://github.com/astral-sh/uv):
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# create or update a project that depends on netrias_client
|
|
80
|
+
uv add netrias_client
|
|
81
|
+
|
|
82
|
+
# install optional AWS helpers (gateway bypass)
|
|
83
|
+
uv add netrias_client[aws]
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
For local development within this repository:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
uv sync --group dev # install development tooling
|
|
90
|
+
uv sync --group aws --group dev # include optional AWS dependencies
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Configuration
|
|
94
|
+
|
|
95
|
+
All client entry points require explicit configuration. Create a `NetriasClient`, then provide the API key; discovery and harmonization endpoints remain fixed by the library.
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from pathlib import Path
|
|
99
|
+
|
|
100
|
+
from netrias_client import NetriasClient
|
|
101
|
+
from netrias_client._models import LogLevel
|
|
102
|
+
|
|
103
|
+
client = NetriasClient()
|
|
104
|
+
client.configure(
|
|
105
|
+
api_key="<netrias api key>",
|
|
106
|
+
# Optional overrides:
|
|
107
|
+
timeout=21600.0, # seconds (default: 6 hours)
|
|
108
|
+
log_level=LogLevel.INFO,
|
|
109
|
+
confidence_threshold=0.80, # discovery adapter filter, 0.0–1.0
|
|
110
|
+
discovery_use_gateway_bypass=True, # toggle Lambda bypass (default: True)
|
|
111
|
+
log_directory=Path("logs/netrias"), # optional per-client log files
|
|
112
|
+
)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Configuration errors raise `ClientConfigurationError`. Calling `configure` again replaces the active settings snapshot and reinitializes the dedicated logger (refreshing file handlers when `log_directory` is supplied).
|
|
116
|
+
|
|
117
|
+
## End-to-End Workflow
|
|
118
|
+
|
|
119
|
+
The typical harmonization flow contains three steps:
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
from pathlib import Path
|
|
123
|
+
|
|
124
|
+
from netrias_client import NetriasClient
|
|
125
|
+
|
|
126
|
+
client = NetriasClient()
|
|
127
|
+
client.configure(api_key="<netrias api key>")
|
|
128
|
+
|
|
129
|
+
csv_path = Path("/path/to/source.csv")
|
|
130
|
+
schema = "ccdi"
|
|
131
|
+
|
|
132
|
+
# 1. Ask the recommendation service for potential targets.
|
|
133
|
+
manifest_payload = client.discover_mapping_from_csv(
|
|
134
|
+
source_csv=csv_path,
|
|
135
|
+
target_schema=schema,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# 2. Kick off harmonization directly with the manifest payload.
|
|
139
|
+
result = client.harmonize(source_path=csv_path, manifest=manifest_payload)
|
|
140
|
+
print(result.status)
|
|
141
|
+
print(result.description)
|
|
142
|
+
print(result.file_path)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
- `client.discover_mapping_from_csv(...)` samples up to 25 values per column (configurable), calls the API, and returns a manifest-ready payload (including static metadata such as CDE routes/IDs where configured).
|
|
146
|
+
- `client.harmonize(...)` submits a job and polls `GET /v1/jobs/{jobId}` until the backend returns success or failure. Downloaded CSVs are written next to the source file (versioned as `data.harmonized.v1.csv`, etc.). Pass `manifest_output_path=` if you also want to persist the manifest JSON for inspection.
|
|
147
|
+
|
|
148
|
+
### Timing Logs
|
|
149
|
+
|
|
150
|
+
Both discovery and harmonization log elapsed seconds for the full operation and for timeout/transport failures. Sample output:
|
|
151
|
+
|
|
152
|
+
```
|
|
153
|
+
INFO netrias_client: discover mapping start: schema=ccdi columns=12
|
|
154
|
+
INFO netrias_client: discover mapping complete: schema=ccdi suggestions=0 duration=47.12s
|
|
155
|
+
INFO netrias_client: harmonize start: file=data.csv
|
|
156
|
+
INFO netrias_client: harmonize finished: file=data.csv status=succeeded duration=182.45s
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Use these metrics to separate slow API responses from downstream processing overhead.
|
|
160
|
+
|
|
161
|
+
## Adapter Notes
|
|
162
|
+
|
|
163
|
+
Discovery results are normalized to manifest payloads automatically; unmatched columns are logged so you can expand coverage. Confidence thresholds come from `configure(confidence_threshold=...)` and default to 0.8.
|
|
164
|
+
|
|
165
|
+
## Gateway Bypass (Temporary)
|
|
166
|
+
|
|
167
|
+
The module `netrias_client._gateway_bypass` exposes `invoke_cde_recommendation_alias(...)`, a stopgap helper that calls the `cde-recommendation` Lambda alias directly. This avoids API Gateway’s short timeout window but requires AWS credentials with `lambda:InvokeFunction` permission and the `boto3` dependency.
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
from netrias_client._gateway_bypass import invoke_cde_recommendation_alias
|
|
171
|
+
|
|
172
|
+
result = invoke_cde_recommendation_alias(
|
|
173
|
+
target_schema="ccdi",
|
|
174
|
+
columns={"study_name": ["foo", "bar"]},
|
|
175
|
+
alias="prod",
|
|
176
|
+
region_name="us-east-2",
|
|
177
|
+
)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Install `boto3` (or `netrias-client[aws]` if provided) before importing the bypass module, and rotate IAM credentials frequently. Once API Gateway limits are raised, prefer the standard discovery flow again.
|
|
181
|
+
|
|
182
|
+
## Testing & Tooling
|
|
183
|
+
|
|
184
|
+
The repository ships with pytest-based integration tests plus lint/type tooling.
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
uv run pytest
|
|
189
|
+
uv run ruff check
|
|
190
|
+
uv run basedpyright
|
|
191
|
+
uv build # produce wheel + sdist
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
Live verification scripts are located under `live_test/` and require a populated `.env` file containing `NETRIAS_API_KEY` (and optionally harmonization overrides while services converge).
|
|
195
|
+
|
|
196
|
+
## Project Layout
|
|
197
|
+
|
|
198
|
+
```
|
|
199
|
+
src/netrias_client/
|
|
200
|
+
__init__.py # re-exported public surface
|
|
201
|
+
_adapter.py # discovery → manifest conversion
|
|
202
|
+
_client.py # NetriasClient facade and state management
|
|
203
|
+
_config.py # settings validation helpers
|
|
204
|
+
_core.py # harmonization workflow
|
|
205
|
+
_discovery.py # discovery wrappers and CSV sampling
|
|
206
|
+
_errors.py # exception taxonomy
|
|
207
|
+
_http.py # HTTP primitives (submit/poll/download)
|
|
208
|
+
_io.py # streaming helpers
|
|
209
|
+
_logging.py # standardized logger setup
|
|
210
|
+
_models.py # dataclasses for structured responses
|
|
211
|
+
_validators.py # filesystem and payload validation
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
Tests reside under `src/netrias_client/tests/` and are excluded from the published wheel to keep installs slim; run them locally via `uv run pytest`.
|
|
215
|
+
|
|
216
|
+
## Contributing
|
|
217
|
+
|
|
218
|
+
1. `uv sync --group dev` (add `--group aws` if needed) to create the virtual environment.
|
|
219
|
+
2. `uv run pytest` to ensure the suite passes prior to committing.
|
|
220
|
+
3. Follow the repo conventions: keep functions focused, prefer typed interfaces, and favor logging key transitions over verbose chatter.
|
|
221
|
+
|
|
222
|
+
Pull requests should include updated documentation or fixtures when they alter API behavior or the manifest contract.
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# Netrias Client
|
|
2
|
+
|
|
3
|
+
Python toolkit for working with the Netrias recommendation and harmonization services. The client wraps the HTTP APIs with strong typing, logging, and guard rails so analytics code can focus on describing data rather than orchestrating requests.
|
|
4
|
+
|
|
5
|
+
## Highlights
|
|
6
|
+
- **Stateful client facade** – instantiate `NetriasClient` and call `client.configure(...)` once.
|
|
7
|
+
- **Column discovery helpers** – derive column samples from CSV files, invoke the recommendation service, and normalize responses into `MappingDiscoveryResult` models.
|
|
8
|
+
- **Adapter utilities** – convert discovery output into harmonization-ready manifest payloads while applying confidence filters and CDE overrides.
|
|
9
|
+
- **Asynchronous harmonization loop** – submit jobs, poll for completion, download results, and version output files automatically to avoid accidental overwrites.
|
|
10
|
+
- **Extended timing logs** – discovery and harmonization emit duration metrics so you can spot slow calls quickly during live runs.
|
|
11
|
+
|
|
12
|
+
## Installation
|
|
13
|
+
|
|
14
|
+
The project targets Python 3.12+.
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install netrias_client
|
|
18
|
+
|
|
19
|
+
# optional AWS helpers (gateway bypass)
|
|
20
|
+
pip install netrias_client[aws]
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
We recommend managing environments with [uv](https://github.com/astral-sh/uv):
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# create or update a project that depends on netrias_client
|
|
27
|
+
uv add netrias_client
|
|
28
|
+
|
|
29
|
+
# install optional AWS helpers (gateway bypass)
|
|
30
|
+
uv add netrias_client[aws]
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
For local development within this repository:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
uv sync --group dev # install development tooling
|
|
37
|
+
uv sync --group aws --group dev # include optional AWS dependencies
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Configuration
|
|
41
|
+
|
|
42
|
+
All client entry points require explicit configuration. Create a `NetriasClient`, then provide the API key; discovery and harmonization endpoints remain fixed by the library.
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from pathlib import Path
|
|
46
|
+
|
|
47
|
+
from netrias_client import NetriasClient
|
|
48
|
+
from netrias_client._models import LogLevel
|
|
49
|
+
|
|
50
|
+
client = NetriasClient()
|
|
51
|
+
client.configure(
|
|
52
|
+
api_key="<netrias api key>",
|
|
53
|
+
# Optional overrides:
|
|
54
|
+
timeout=21600.0, # seconds (default: 6 hours)
|
|
55
|
+
log_level=LogLevel.INFO,
|
|
56
|
+
confidence_threshold=0.80, # discovery adapter filter, 0.0–1.0
|
|
57
|
+
discovery_use_gateway_bypass=True, # toggle Lambda bypass (default: True)
|
|
58
|
+
log_directory=Path("logs/netrias"), # optional per-client log files
|
|
59
|
+
)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Configuration errors raise `ClientConfigurationError`. Calling `configure` again replaces the active settings snapshot and reinitializes the dedicated logger (refreshing file handlers when `log_directory` is supplied).
|
|
63
|
+
|
|
64
|
+
## End-to-End Workflow
|
|
65
|
+
|
|
66
|
+
The typical harmonization flow contains three steps:
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from pathlib import Path
|
|
70
|
+
|
|
71
|
+
from netrias_client import NetriasClient
|
|
72
|
+
|
|
73
|
+
client = NetriasClient()
|
|
74
|
+
client.configure(api_key="<netrias api key>")
|
|
75
|
+
|
|
76
|
+
csv_path = Path("/path/to/source.csv")
|
|
77
|
+
schema = "ccdi"
|
|
78
|
+
|
|
79
|
+
# 1. Ask the recommendation service for potential targets.
|
|
80
|
+
manifest_payload = client.discover_mapping_from_csv(
|
|
81
|
+
source_csv=csv_path,
|
|
82
|
+
target_schema=schema,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# 2. Kick off harmonization directly with the manifest payload.
|
|
86
|
+
result = client.harmonize(source_path=csv_path, manifest=manifest_payload)
|
|
87
|
+
print(result.status)
|
|
88
|
+
print(result.description)
|
|
89
|
+
print(result.file_path)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
- `client.discover_mapping_from_csv(...)` samples up to 25 values per column (configurable), calls the API, and returns a manifest-ready payload (including static metadata such as CDE routes/IDs where configured).
|
|
93
|
+
- `client.harmonize(...)` submits a job and polls `GET /v1/jobs/{jobId}` until the backend returns success or failure. Downloaded CSVs are written next to the source file (versioned as `data.harmonized.v1.csv`, etc.). Pass `manifest_output_path=` if you also want to persist the manifest JSON for inspection.
|
|
94
|
+
|
|
95
|
+
### Timing Logs
|
|
96
|
+
|
|
97
|
+
Both discovery and harmonization log elapsed seconds for the full operation and for timeout/transport failures. Sample output:
|
|
98
|
+
|
|
99
|
+
```
|
|
100
|
+
INFO netrias_client: discover mapping start: schema=ccdi columns=12
|
|
101
|
+
INFO netrias_client: discover mapping complete: schema=ccdi suggestions=0 duration=47.12s
|
|
102
|
+
INFO netrias_client: harmonize start: file=data.csv
|
|
103
|
+
INFO netrias_client: harmonize finished: file=data.csv status=succeeded duration=182.45s
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Use these metrics to separate slow API responses from downstream processing overhead.
|
|
107
|
+
|
|
108
|
+
## Adapter Notes
|
|
109
|
+
|
|
110
|
+
Discovery results are normalized to manifest payloads automatically; unmatched columns are logged so you can expand coverage. Confidence thresholds come from `configure(confidence_threshold=...)` and default to 0.8.
|
|
111
|
+
|
|
112
|
+
## Gateway Bypass (Temporary)
|
|
113
|
+
|
|
114
|
+
The module `netrias_client._gateway_bypass` exposes `invoke_cde_recommendation_alias(...)`, a stopgap helper that calls the `cde-recommendation` Lambda alias directly. This avoids API Gateway’s short timeout window but requires AWS credentials with `lambda:InvokeFunction` permission and the `boto3` dependency.
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from netrias_client._gateway_bypass import invoke_cde_recommendation_alias
|
|
118
|
+
|
|
119
|
+
result = invoke_cde_recommendation_alias(
|
|
120
|
+
target_schema="ccdi",
|
|
121
|
+
columns={"study_name": ["foo", "bar"]},
|
|
122
|
+
alias="prod",
|
|
123
|
+
region_name="us-east-2",
|
|
124
|
+
)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Install `boto3` (or `netrias-client[aws]` if provided) before importing the bypass module, and rotate IAM credentials frequently. Once API Gateway limits are raised, prefer the standard discovery flow again.
|
|
128
|
+
|
|
129
|
+
## Testing & Tooling
|
|
130
|
+
|
|
131
|
+
The repository ships with pytest-based integration tests plus lint/type tooling.
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
uv run pytest
|
|
136
|
+
uv run ruff check
|
|
137
|
+
uv run basedpyright
|
|
138
|
+
uv build # produce wheel + sdist
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Live verification scripts are located under `live_test/` and require a populated `.env` file containing `NETRIAS_API_KEY` (and optionally harmonization overrides while services converge).
|
|
142
|
+
|
|
143
|
+
## Project Layout
|
|
144
|
+
|
|
145
|
+
```
|
|
146
|
+
src/netrias_client/
|
|
147
|
+
__init__.py # re-exported public surface
|
|
148
|
+
_adapter.py # discovery → manifest conversion
|
|
149
|
+
_client.py # NetriasClient facade and state management
|
|
150
|
+
_config.py # settings validation helpers
|
|
151
|
+
_core.py # harmonization workflow
|
|
152
|
+
_discovery.py # discovery wrappers and CSV sampling
|
|
153
|
+
_errors.py # exception taxonomy
|
|
154
|
+
_http.py # HTTP primitives (submit/poll/download)
|
|
155
|
+
_io.py # streaming helpers
|
|
156
|
+
_logging.py # standardized logger setup
|
|
157
|
+
_models.py # dataclasses for structured responses
|
|
158
|
+
_validators.py # filesystem and payload validation
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
Tests reside under `src/netrias_client/tests/` and are excluded from the published wheel to keep installs slim; run them locally via `uv run pytest`.
|
|
162
|
+
|
|
163
|
+
## Contributing
|
|
164
|
+
|
|
165
|
+
1. `uv sync --group dev` (add `--group aws` if needed) to create the virtual environment.
|
|
166
|
+
2. `uv run pytest` to ensure the suite passes prior to committing.
|
|
167
|
+
3. Follow the repo conventions: keep functions focused, prefer typed interfaces, and favor logging key transitions over verbose chatter.
|
|
168
|
+
|
|
169
|
+
Pull requests should include updated documentation or fixtures when they alter API behavior or the manifest contract.
|