netrias_client 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,192 @@
1
+ """Validate inputs for harmonization.
2
+
3
+ 'why': fail fast with clear, actionable messages prior to network calls
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import os
8
+ from collections.abc import Mapping, Sequence
9
+ from pathlib import Path
10
+
11
+ from ._errors import FileValidationError, MappingValidationError, OutputLocationError
12
+
13
+
14
+ # OBVIOUS HARD-CODED SIZE LIMIT: 250 MB maximum CSV size prior to upload
15
+ HARD_MAX_CSV_BYTES = 250 * 1024 * 1024
16
+
17
+
18
+ def validate_source_path(path: Path) -> Path:
19
+ """Ensure the CSV exists, is a file, has a .csv extension, and respects size limits."""
20
+
21
+ _require_exists(path, "source CSV not found")
22
+ _require_is_file(path, "source path is not a file")
23
+ _require_suffix(path, ".csv", "unsupported file extension for source CSV")
24
+ _require_not_too_large(path)
25
+ return path
26
+
27
+
28
+ def validate_manifest_path(path: Path) -> Path:
29
+ """Ensure the manifest JSON exists and is a file."""
30
+
31
+ _require_exists(path, "manifest JSON not found")
32
+ _require_is_file(path, "manifest path is not a file")
33
+ _require_suffix(path, ".json", "manifest must be a .json file")
34
+ return path
35
+
36
+
37
+ def validate_output_path(path: Path | None, source_name: str, allow_versioning: bool = False) -> Path:
38
+ """Return a valid output file path, creating parent directories when needed.
39
+
40
+ Defaults to `<CWD>/<source_name>.harmonized.csv` when `path` is None or a directory.
41
+ """
42
+
43
+ candidate = _resolve_output_candidate(path, source_name)
44
+ _ensure_parent(candidate)
45
+ _require_parent_writable(candidate)
46
+ if allow_versioning:
47
+ candidate = _next_available_path(candidate)
48
+ else:
49
+ _require_not_exists(candidate)
50
+ return candidate
51
+
52
+
53
+ def validate_target_schema(schema: str) -> str:
54
+ """Ensure the target schema identifier is a non-empty string."""
55
+
56
+ candidate = (schema or "").strip()
57
+ if not candidate:
58
+ raise MappingValidationError("target_schema must be a non-empty string")
59
+ return candidate
60
+
61
+
62
+ def validate_target_version(version: str) -> str:
63
+ """Ensure the target version identifier is a non-empty string."""
64
+
65
+ candidate = (version or "").strip()
66
+ if not candidate:
67
+ raise MappingValidationError("target_version must be a non-empty string")
68
+ return candidate
69
+
70
+
71
+ def validate_top_k(top_k: int | None) -> int | None:
72
+ """Ensure top_k is a positive integer when provided."""
73
+
74
+ if top_k is None:
75
+ return None
76
+ if top_k < 1:
77
+ raise MappingValidationError("top_k must be a positive integer")
78
+ return top_k
79
+
80
+
81
+ def validate_column_samples(columns: Mapping[str, Sequence[object]]) -> dict[str, list[str]]:
82
+ """Normalize column sample data for mapping discovery."""
83
+
84
+ if not columns:
85
+ raise MappingValidationError("column data must include at least one column")
86
+ normalized: dict[str, list[str]] = {}
87
+ for raw_name, values in columns.items():
88
+ name = _normalized_column_name(raw_name)
89
+ samples = _normalized_samples(name, values)
90
+ normalized[name] = samples
91
+ return normalized
92
+
93
+
94
+ def _require_exists(path: Path, message: str) -> None:
95
+ if not path.exists():
96
+ raise FileValidationError(f"{message}: {path}")
97
+
98
+
99
+ def _require_is_file(path: Path, message: str) -> None:
100
+ if not path.is_file():
101
+ raise FileValidationError(f"{message}: {path}")
102
+
103
+
104
+ def _require_suffix(path: Path, suffix: str, message: str) -> None:
105
+ if path.suffix.lower() != suffix:
106
+ raise FileValidationError(f"{message}: {path.suffix}")
107
+
108
+
109
+ def _require_not_too_large(path: Path) -> None:
110
+ try:
111
+ size = os.path.getsize(path)
112
+ except OSError as exc:
113
+ raise FileValidationError(f"unable to stat source CSV: {exc}") from exc
114
+ if size > HARD_MAX_CSV_BYTES:
115
+ raise FileValidationError(
116
+ f"source CSV exceeds hard-coded limit of {HARD_MAX_CSV_BYTES // (1024 * 1024)} MB (got {size} bytes)"
117
+ )
118
+
119
+
120
+ def _resolve_output_candidate(path: Path | None, source_name: str) -> Path:
121
+ if path is None:
122
+ return Path.cwd() / f"{source_name}.harmonized.csv"
123
+ if path.exists() and path.is_dir():
124
+ return path / f"{source_name}.harmonized.csv"
125
+ return path
126
+
127
+
128
+ def _ensure_parent(candidate: Path) -> None:
129
+ parent = candidate.parent
130
+ if not parent.exists():
131
+ try:
132
+ parent.mkdir(parents=True, exist_ok=True)
133
+ except OSError as exc:
134
+ raise OutputLocationError(f"unable to create output directory {parent}: {exc}") from exc
135
+
136
+
137
+ def _require_parent_writable(candidate: Path) -> None:
138
+ parent = candidate.parent
139
+ if parent.exists() and not os.access(parent, os.W_OK):
140
+ raise OutputLocationError(f"output directory not writable: {parent}")
141
+
142
+
143
+ def _require_not_exists(candidate: Path) -> None:
144
+ if candidate.exists():
145
+ raise OutputLocationError(f"refusing to overwrite existing file: {candidate}")
146
+
147
+
148
+ def _next_available_path(candidate: Path) -> Path:
149
+ if not candidate.exists():
150
+ return candidate
151
+ stem = candidate.stem
152
+ suffix = candidate.suffix
153
+ parent = candidate.parent
154
+ index = 1
155
+ while index < 1000:
156
+ versioned = parent / f"{stem}.v{index}{suffix}"
157
+ if not versioned.exists():
158
+ return versioned
159
+ index += 1
160
+ raise OutputLocationError(
161
+ f"unable to determine unique output path after {index - 1} attempts for {candidate}"
162
+ )
163
+
164
+
165
+ def _normalized_column_name(raw_name: object) -> str:
166
+ if not isinstance(raw_name, str):
167
+ raise MappingValidationError("column names must be strings")
168
+ name = raw_name.strip()
169
+ if not name:
170
+ raise MappingValidationError("column names must be non-empty strings")
171
+ return name
172
+
173
+
174
+ def _normalized_samples(column_name: str, values: Sequence[object] | None) -> list[str]:
175
+ sequence = _require_sequence(column_name, values)
176
+ samples = [sample for sample in (_coerced_sample(value) for value in sequence) if sample]
177
+ if not samples:
178
+ raise MappingValidationError(f"column '{column_name}' must include at least one non-empty sample value")
179
+ return samples
180
+
181
+
182
+ def _require_sequence(column_name: str, values: Sequence[object] | None) -> Sequence[object]:
183
+ if values is None or isinstance(values, (str, bytes)):
184
+ raise MappingValidationError(f"column '{column_name}' values must be a sequence of samples")
185
+ return values
186
+
187
+
188
+ def _coerced_sample(value: object) -> str | None:
189
+ if value is None:
190
+ return None
191
+ text = str(value).strip()
192
+ return text or None
@@ -0,0 +1,313 @@
1
+ """Coordinate project command entry points.
2
+
3
+ 'why': centralize developer workflows for `uv run` execution
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import argparse
8
+ import logging
9
+ import os
10
+ import re
11
+ import shutil
12
+ import subprocess
13
+ import sys
14
+ import tempfile
15
+ from collections.abc import Mapping, Sequence
16
+ from dataclasses import dataclass
17
+ from pathlib import Path
18
+ from typing import Final, cast
19
+
20
+ _LOGGER = logging.getLogger("netrias_client.scripts")
21
+ _COMMANDS: Final[tuple[tuple[str, ...], ...]] = (
22
+ ("pytest",),
23
+ ("ruff", "check", "."),
24
+ ("basedpyright", "."),
25
+ )
26
+ _REPO_ROOT: Final[Path] = Path(__file__).resolve().parents[2]
27
+ _PYPROJECT_PATH: Final[Path] = _REPO_ROOT / "pyproject.toml"
28
+ _PACKAGE_INIT_PATH: Final[Path] = Path(__file__).resolve().parent / "__init__.py"
29
+ _DIST_PATH: Final[Path] = _REPO_ROOT / "dist"
30
+ _VERSION_PATTERN: Final[re.Pattern[str]] = re.compile(r'^version\s*=\s*"(?P<value>[^"]+)"$', re.MULTILINE)
31
+ _INIT_VERSION_PATTERN: Final[re.Pattern[str]] = re.compile(r'^__version__\s*=\s*"(?P<value>[^"]+)"$', re.MULTILINE)
32
+ _REPOSITORY_CONFIG: Final[dict[str, tuple[str, str | None]]] = {
33
+ "testpypi": ("TEST_PYPI_TOKEN", "https://test.pypi.org/legacy/"),
34
+ "pypi": ("PYPI_TOKEN", None),
35
+ }
36
+
37
+
38
+ @dataclass(frozen=True)
39
+ class ReleaseOptions:
40
+ """Capture release CLI arguments as structured options.
41
+
42
+ 'why': keep parsing separate from orchestration logic
43
+ """
44
+
45
+ version: str | None
46
+ bump: str
47
+ repository: str | None
48
+
49
+
50
+ def check() -> None:
51
+ """Run the combined test and lint pipeline invoked by `uv run check`.
52
+
53
+ 'why': provide a single entry point that exits early on the first failure
54
+ """
55
+
56
+ _ensure_logging()
57
+ for command in _COMMANDS:
58
+ _run_command_or_raise(command)
59
+
60
+
61
+ def live_check() -> None:
62
+ """Execute the live service smoke test harness.
63
+
64
+ 'why': exercise production endpoints without duplicating CLI plumbing
65
+ """
66
+
67
+ _run_command_or_raise(("python", "-m", "netrias_client.live_test.test"))
68
+
69
+
70
+ def release(argv: Sequence[str] | None = None) -> None:
71
+ """Run the release pipeline: bump version, validate, build, publish.
72
+
73
+ 'why': streamline TestPyPI/PyPI releases from a single script
74
+ """
75
+
76
+ options = _parse_release_args(argv)
77
+ _ensure_logging()
78
+ target_version = _determine_target_version(options)
79
+ _update_versions(target_version)
80
+ _LOGGER.info("version synchronized → %s", target_version)
81
+ check()
82
+ artifacts = _build_distributions()
83
+ _verify_artifacts(artifacts)
84
+ if options.repository:
85
+ _publish_artifacts(options.repository)
86
+
87
+
88
+ def _ensure_logging() -> None:
89
+ """Provision a minimalist logging configuration for script execution."""
90
+
91
+ if not _LOGGER.handlers:
92
+ logging.basicConfig(level=logging.INFO, format="%(message)s")
93
+
94
+
95
+ def _run_command(command: Sequence[str], *, env: Mapping[str, str] | None = None, display_command: Sequence[str] | None = None) -> int:
96
+ """Run `command` and return its exit status without raising on failure.
97
+
98
+ 'why': centralize subprocess logging and leave error handling to callers
99
+ """
100
+
101
+ shown = display_command or command
102
+ _LOGGER.info("→ %s", " ".join(shown))
103
+ completed = subprocess.run(command, check=False, env=dict(env) if env else None)
104
+ return completed.returncode
105
+
106
+
107
+ def _run_command_or_raise(
108
+ command: Sequence[str],
109
+ *,
110
+ env: Mapping[str, str] | None = None,
111
+ display_command: Sequence[str] | None = None,
112
+ ) -> None:
113
+ """Execute `command` and abort immediately on failure.
114
+
115
+ 'why': surface the first failing command to halt composite workflows
116
+ """
117
+
118
+ exit_code = _run_command(command, env=env, display_command=display_command)
119
+ if exit_code != 0:
120
+ raise SystemExit(exit_code)
121
+
122
+
123
+ def _parse_release_args(argv: Sequence[str] | None) -> ReleaseOptions:
124
+ """Parse CLI arguments into a `ReleaseOptions` instance.
125
+
126
+ 'why': isolate argparse wiring for straightforward testing
127
+ """
128
+
129
+ parser = argparse.ArgumentParser(prog="uv run release")
130
+ group = parser.add_mutually_exclusive_group()
131
+ _ = group.add_argument("--version", help="Explicit semantic version to publish")
132
+ _ = group.add_argument("--bump", choices=("patch", "minor", "major"), default="patch", help="Increment the current version")
133
+ _ = parser.add_argument("--publish", choices=("testpypi", "pypi"), dest="repository", help="Publish artifacts after verification")
134
+ namespace = parser.parse_args(list(argv) if argv is not None else sys.argv[1:])
135
+ version_arg = cast(str | None, getattr(namespace, "version", None))
136
+ bump_arg = cast(str | None, getattr(namespace, "bump", None))
137
+ repository_arg = cast(str | None, getattr(namespace, "repository", None))
138
+ bump = bump_arg or "patch"
139
+ return ReleaseOptions(version=version_arg, bump=bump, repository=repository_arg)
140
+
141
+
142
+ def _determine_target_version(options: ReleaseOptions) -> str:
143
+ """Decide the release version based on explicit input or a bump type.
144
+
145
+ 'why': ensure both pyproject and package versions stay aligned
146
+ """
147
+
148
+ current = _read_version(_PYPROJECT_PATH, _VERSION_PATTERN)
149
+ _assert_versions_match(current)
150
+ if options.version:
151
+ return options.version
152
+ return _bump_semver(current, options.bump)
153
+
154
+
155
+ def _assert_versions_match(expected: str) -> None:
156
+ """Verify the package and pyproject versions are identical before bumping.
157
+
158
+ 'why': prevent partial version updates that ship inconsistent metadata
159
+ """
160
+
161
+ package_version = _read_version(_PACKAGE_INIT_PATH, _INIT_VERSION_PATTERN)
162
+ if package_version != expected:
163
+ message = " ".join(
164
+ [
165
+ f"Package version mismatch: pyproject.toml has {expected}",
166
+ f"but src/netrias_client/__init__.py has {package_version}",
167
+ ]
168
+ )
169
+ raise RuntimeError(message)
170
+
171
+
172
+ def _read_version(path: Path, pattern: re.Pattern[str]) -> str:
173
+ """Extract a version string from `path` using `pattern`.
174
+
175
+ 'why': share parsing logic between pyproject and package metadata
176
+ """
177
+
178
+ text = path.read_text(encoding="utf-8")
179
+ match = pattern.search(text)
180
+ if match is None:
181
+ raise RuntimeError(f"Could not locate version string in {path}")
182
+ return match.group("value")
183
+
184
+
185
+ def _bump_semver(version: str, bump: str) -> str:
186
+ """Return a new semantic version string incremented by `bump` type.
187
+
188
+ 'why': keep release increments predictable without external tooling
189
+ """
190
+
191
+ major_str, minor_str, patch_str = version.split(".")
192
+ major, minor, patch = int(major_str), int(minor_str), int(patch_str)
193
+ if bump == "major":
194
+ return f"{major + 1}.0.0"
195
+ if bump == "minor":
196
+ return f"{major}.{minor + 1}.0"
197
+ return f"{major}.{minor}.{patch + 1}"
198
+
199
+
200
+ def _update_versions(version: str) -> None:
201
+ """Write `version` to pyproject.toml and the package __init__ module.
202
+
203
+ 'why': guarantee distribution metadata matches the Python package
204
+ """
205
+
206
+ _replace_version(_PYPROJECT_PATH, _VERSION_PATTERN, f'version = "{version}"')
207
+ _replace_version(_PACKAGE_INIT_PATH, _INIT_VERSION_PATTERN, f'__version__ = "{version}"')
208
+
209
+
210
+ def _replace_version(path: Path, pattern: re.Pattern[str], replacement: str) -> None:
211
+ """Swap the first version match in `path` with `replacement`.
212
+
213
+ 'why': avoid manual editing and keep formatting stable
214
+ """
215
+
216
+ text = path.read_text(encoding="utf-8")
217
+ updated, count = pattern.subn(replacement, text, count=1)
218
+ if count != 1:
219
+ raise RuntimeError(f"Failed to update version in {path}")
220
+ _ = path.write_text(updated, encoding="utf-8")
221
+
222
+
223
+ def _build_distributions() -> list[Path]:
224
+ """Build wheel and sdist artifacts and return their paths.
225
+
226
+ 'why': provide a clean slate before handing artifacts to verifiers
227
+ """
228
+
229
+ if _DIST_PATH.exists():
230
+ shutil.rmtree(_DIST_PATH)
231
+ _run_command_or_raise(("uv", "build"))
232
+ return sorted(_DIST_PATH.glob("*"))
233
+
234
+
235
+ def _verify_artifacts(artifacts: Sequence[Path]) -> None:
236
+ """Run metadata checks and a local install smoke test for `artifacts`.
237
+
238
+ 'why': catch packaging issues before publishing to a remote index
239
+ """
240
+
241
+ dist_files = [
242
+ path
243
+ for path in artifacts
244
+ if path.suffix == ".whl" or tuple(path.suffixes[-2:]) == (".tar", ".gz")
245
+ ]
246
+ if not dist_files:
247
+ raise RuntimeError("No distribution artifacts produced; run `uv build` first")
248
+ _run_command_or_raise(("uv", "run", "twine", "check", *[str(path) for path in dist_files]))
249
+ _smoke_test_artifacts(dist_files)
250
+
251
+
252
+ def _smoke_test_artifacts(artifacts: Sequence[Path]) -> None:
253
+ """Install the wheel into a temp venv and import the package.
254
+
255
+ 'why': validate that the built wheel installs and exposes metadata
256
+ """
257
+
258
+ wheel = next((path for path in artifacts if path.suffix == ".whl"), None)
259
+ if wheel is None:
260
+ raise RuntimeError("Wheel artifact missing; expected *.whl after build")
261
+ with tempfile.TemporaryDirectory() as tmp_dir:
262
+ env_path = Path(tmp_dir) / "venv"
263
+ _create_virtualenv(env_path)
264
+ python_path = _resolve_python(env_path)
265
+ if not python_path.exists():
266
+ raise RuntimeError(f"Smoke test interpreter missing: {python_path}")
267
+ _LOGGER.info("smoke test interpreter → %s", python_path)
268
+ _run_command_or_raise((str(python_path), "-m", "pip", "install", str(wheel)))
269
+ _run_command_or_raise((str(python_path), "-c", "import netrias_client as pkg; print(pkg.__version__)"))
270
+
271
+
272
+ def _create_virtualenv(target: Path) -> None:
273
+ """Create a fresh virtual environment at `target` with pip installed.
274
+
275
+ 'why': isolate smoke tests from the developer environment
276
+ """
277
+
278
+ _run_command_or_raise(("uv", "venv", str(target), "--seed"))
279
+
280
+
281
+ def _resolve_python(env_path: Path) -> Path:
282
+ """Locate the Python executable underneath `env_path`.
283
+
284
+ 'why': handle platform differences without duplicating logic
285
+ """
286
+
287
+ bin_dir = env_path / ("Scripts" if sys.platform == "win32" else "bin")
288
+ candidates = ("python", "python3", "python.exe")
289
+ for candidate in candidates:
290
+ python_path = bin_dir / candidate
291
+ if python_path.exists():
292
+ return python_path
293
+ raise RuntimeError(f"Python executable not found under {bin_dir}")
294
+
295
+
296
+ def _publish_artifacts(repository: str) -> None:
297
+ """Publish artifacts to the requested repository via `uv publish`.
298
+
299
+ 'why': keep credential handling opinionated yet minimal
300
+ """
301
+
302
+ if repository not in _REPOSITORY_CONFIG:
303
+ raise RuntimeError(f"Unsupported repository '{repository}'")
304
+ env_var, publish_url = _REPOSITORY_CONFIG[repository]
305
+ token = os.environ.get(env_var)
306
+ if not token:
307
+ raise RuntimeError(f"Set {env_var} before publishing to {repository}")
308
+ command: list[str] = ["uv", "publish", "--username", "__token__", "--password", token]
309
+ display: list[str] = ["uv", "publish", "--username", "__token__", "--password", "******"]
310
+ if publish_url:
311
+ command.extend(["--publish-url", publish_url])
312
+ display.extend(["--publish-url", publish_url])
313
+ _run_command_or_raise(tuple(command), display_command=tuple(display))
@@ -0,0 +1,178 @@
1
+ Metadata-Version: 2.4
2
+ Name: netrias_client
3
+ Version: 0.1.0
4
+ Summary: Python client for the Netrias harmonization API
5
+ Project-URL: Homepage, https://github.com/netrias/netrias_client
6
+ Project-URL: Repository, https://github.com/netrias/netrias_client
7
+ Project-URL: Documentation, https://github.com/netrias/netrias_client#readme
8
+ Author-email: Chris Harman <charman@netrias.com>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2025 Netrias
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: api,cde,client,harmonization,netrias
32
+ Classifier: Intended Audience :: Developers
33
+ Classifier: License :: OSI Approved :: MIT License
34
+ Classifier: Operating System :: OS Independent
35
+ Classifier: Programming Language :: Python :: 3
36
+ Classifier: Programming Language :: Python :: 3 :: Only
37
+ Classifier: Programming Language :: Python :: 3.10
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Requires-Python: >=3.10
41
+ Requires-Dist: boto3
42
+ Requires-Dist: httpx
43
+ Provides-Extra: dev
44
+ Requires-Dist: basedpyright; extra == 'dev'
45
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
46
+ Requires-Dist: pytest>=7; extra == 'dev'
47
+ Requires-Dist: python-dotenv>=1.0; extra == 'dev'
48
+ Requires-Dist: ruff>=0.5.0; extra == 'dev'
49
+ Requires-Dist: twine>=5.0; extra == 'dev'
50
+ Requires-Dist: ty; extra == 'dev'
51
+ Requires-Dist: typing-extensions; extra == 'dev'
52
+ Description-Content-Type: text/markdown
53
+
54
+ # Netrias Client
55
+
56
+ """Explain how to install and exercise the Netrias harmonization client."""
57
+
58
+ ## Install with `uv`
59
+ - Install `uv` once (or update): `curl -LsSf https://astral.sh/uv/install.sh | sh`
60
+ - Sync dependencies for a project that consumes the client:
61
+ ```bash
62
+ uv add netrias_client
63
+ uv add python-dotenv # optional helper for loading .env files
64
+ ```
65
+ - Prefer `uv run <command>` for executing scripts so the managed environment is reused automatically.
66
+
67
+ ### Alternative: `pip`
68
+ ```bash
69
+ python -m pip install netrias_client
70
+ python -m pip install python-dotenv # optional
71
+ ```
72
+
73
+ ## Quickstart Script
74
+ Reference script (save as `main.py`) showing a full harmonization round-trip:
75
+
76
+ ```python
77
+ #!/usr/bin/env -S uv run python
78
+ # /// script
79
+ # requires-python = ">=3.13"
80
+ # dependencies = ["netrias_client", "python-dotenv"]
81
+ # ///
82
+
83
+ """Exercise the packaged Netrias client against the live APIs."""
84
+
85
+ import asyncio
86
+ import os
87
+ from pathlib import Path
88
+ from typing import Final
89
+
90
+ from dotenv import load_dotenv
91
+ from netrias_client import NetriasClient, __version__ as CLIENT_VERSION
92
+
93
+ load_dotenv(override=True)
94
+
95
+ CSV_PATH: Final[Path] = Path("data/primary_diagnosis_1.csv")
96
+
97
+
98
+ async def main() -> None:
99
+ client = NetriasClient()
100
+ client.configure(api_key=_resolve_api_key())
101
+
102
+ manifest = client.discover_cde_mapping(
103
+ source_csv=CSV_PATH,
104
+ target_schema="ccdi",
105
+ )
106
+
107
+ result = await client.harmonize_async(
108
+ source_path=CSV_PATH,
109
+ manifest=manifest,
110
+ )
111
+
112
+ print(f"netrias_client version: {CLIENT_VERSION}")
113
+ print(f"Harmonize status: {result.status}")
114
+ print(f"Harmonized file: {result.file_path}")
115
+
116
+
117
+ def _resolve_api_key() -> str:
118
+ api_key = os.getenv("NETRIAS_API_KEY")
119
+ if api_key:
120
+ return api_key
121
+ msg = "Set NETRIAS_API_KEY in your environment or .env file"
122
+ raise RuntimeError(msg)
123
+
124
+
125
+ if __name__ == "__main__":
126
+ asyncio.run(main())
127
+ ```
128
+
129
+ ### Steps
130
+ 1. Install or update `uv` (see above).
131
+ 2. Export `NETRIAS_API_KEY` (or add it to a local `.env`).
132
+ 3. Adjust `CSV_PATH` to point at the source CSV you want to harmonize.
133
+ 4. Run `uv run python main.py`.
134
+
135
+ ## `configure()` Options
136
+ `NetriasClient.configure(...)` accepts additional tuning knobs. You can mix and match the ones you need:
137
+
138
+ | Parameter | Type | Purpose |
139
+ | --- | --- | --- |
140
+ | `api_key` | `str` | **Required.** Bearer token for authenticating with the Netrias services. |
141
+ | `timeout` | `float | None` | Override the default 6-hour timeout for long-running harmonization jobs. |
142
+ | `log_level` | `LogLevel | str | None` | Control verbosity (`INFO` by default). Accepts enum members or string names. |
143
+ | `confidence_threshold` | `float | None` | Minimum score (0–1) for keeping discovery recommendations; lower it to capture more tentative matches. |
144
+ | `discovery_use_gateway_bypass` | `bool | None` | Toggle the temporary AWS Lambda bypass path for discovery (defaults to `True`). Set to `False` once API Gateway limits are sufficient. |
145
+ | `log_directory` | `Path | str | None` | Directory for per-client log files. When omitted, logs stay on stdout. |
146
+
147
+ Configure only the options you need; unspecified values fall back to sensible defaults.
148
+
149
+ ## Usage Notes
150
+ - `discover_cde_mapping(...)` samples CSV values and returns a manifest-ready payload; use the async variant if you’re already in an event loop.
151
+ - Call `harmonize(...)` (sync) or `harmonize_async(...)` (async) with the manifest to download a harmonized CSV. The result object reports status, description, and the output path.
152
+ - The package exposes `__version__` so callers can assert the installed release.
153
+ - Optional extras (`netrias_client[aws]`) add boto3 helpers for the temporary gateway bypass.
154
+
155
+ ## Data Model Store (Validation)
156
+ Query reference data for validation use cases:
157
+
158
+ ```python
159
+ from netrias_client import NetriasClient
160
+
161
+ client = NetriasClient()
162
+ client.configure(api_key="...")
163
+
164
+ # List available data models
165
+ models = client.list_data_models()
166
+
167
+ # List CDEs for a model version
168
+ cdes = client.list_cdes("ccdi", "v1")
169
+
170
+ # Validate a value against permissible values
171
+ is_valid = client.validate_value("Male", "ccdi", "v1", "sex_at_birth")
172
+
173
+ # Or get the full PV set for repeated lookups
174
+ pv_set = client.get_pv_set("ccdi", "v1", "sex_at_birth")
175
+ assert "Male" in pv_set
176
+ ```
177
+
178
+ All methods have async variants (`list_data_models_async`, `validate_value_async`, etc.).