data360-autodoc 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ """data360-autodoc: auto-generate documentation for Salesforce Data 360 orgs."""
@@ -0,0 +1 @@
1
+ """Command-line interface for data360-autodoc."""
@@ -0,0 +1,209 @@
1
+ """``data360-autodoc`` command-line entry point.
2
+
3
+ Orchestrates the one-shot pipeline::
4
+
5
+ auth (JWT bearer) -> fetch metadata -> render outputs -> write files
6
+
7
+ Outputs are selected with ``--format``:
8
+
9
+ - ``markdown`` — human-readable ``.md`` plus a ``.mmd`` Mermaid diagram
10
+ - ``json`` — deterministic ``.json`` snapshot (the drift-detection seam)
11
+ - ``pdf`` — not yet implemented (stub; warns and skips)
12
+ - ``all`` — markdown + json (+ pdf stub warning)
13
+
14
+ The ``.json`` snapshot is a first-class output, not a debug artifact: the paid
15
+ drift tier loads a prior snapshot and diffs it against a fresh fetch.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ from pathlib import Path
22
+
23
+ import click
24
+
25
+ from data360_autodoc.fetcher.auth import (
26
+ AUD_PRODUCTION,
27
+ AUD_SANDBOX,
28
+ DEFAULT_TOKEN_URL,
29
+ SANDBOX_TOKEN_URL,
30
+ AuthError,
31
+ get_access_token,
32
+ )
33
+ from data360_autodoc.fetcher.metadata import MetadataError, fetch_metadata
34
+ from data360_autodoc.generator.markdown import render_markdown
35
+ from data360_autodoc.generator.mermaid import render_mermaid
36
+ from data360_autodoc.generator.snapshot import render_json
37
+
38
+ #: Valid values for the ``--format`` option.
39
+ FORMATS = ["markdown", "json", "pdf", "all"]
40
+
41
+
42
+ @click.group()
43
+ def cli() -> None:
44
+ """Auto-generate documentation for Salesforce Data 360 orgs."""
45
+
46
+
47
+ @cli.command()
48
+ @click.option("--instance-url", required=True, help="Org base URL.")
49
+ @click.option(
50
+ "--access-token",
51
+ default=None,
52
+ help="Use a pre-obtained OAuth access token and skip JWT auth. When set, "
53
+ "--client-id / --private-key / --username are not needed.",
54
+ )
55
+ @click.option(
56
+ "--client-id", default=None, help="Connected app consumer key (JWT auth)."
57
+ )
58
+ @click.option(
59
+ "--private-key",
60
+ "private_key_path",
61
+ default=None,
62
+ type=click.Path(exists=True, dir_okay=False),
63
+ help="Path to the connected app's PEM private key (JWT auth).",
64
+ )
65
+ @click.option(
66
+ "--username", default=None, help="Salesforce username to impersonate (JWT auth)."
67
+ )
68
+ @click.option(
69
+ "--output",
70
+ "output_dir",
71
+ default=".",
72
+ type=click.Path(file_okay=False),
73
+ help="Directory to write output files into (created if missing).",
74
+ )
75
+ @click.option(
76
+ "--format",
77
+ "output_format",
78
+ type=click.Choice(FORMATS),
79
+ default="all",
80
+ show_default=True,
81
+ help="Which artifacts to generate.",
82
+ )
83
+ @click.option(
84
+ "--sandbox",
85
+ is_flag=True,
86
+ default=False,
87
+ help="Authenticate against test.salesforce.com (sandbox/scratch orgs).",
88
+ )
89
+ @click.option(
90
+ "--api-version",
91
+ "api_version",
92
+ default=None,
93
+ help="Data API version (e.g. v62.0). Default: auto-detect the org's highest.",
94
+ )
95
+ @click.option(
96
+ "--timeout",
97
+ default=120.0,
98
+ show_default=True,
99
+ type=float,
100
+ help="Per-request timeout (seconds) for metadata calls. Data Cloud is slow.",
101
+ )
102
+ def generate(
103
+ instance_url: str,
104
+ access_token: str | None,
105
+ client_id: str | None,
106
+ private_key_path: str | None,
107
+ username: str | None,
108
+ output_dir: str,
109
+ output_format: str,
110
+ sandbox: bool,
111
+ api_version: str | None,
112
+ timeout: float,
113
+ ) -> None:
114
+ """Fetch an org's metadata and write documentation artifacts."""
115
+ if access_token:
116
+ token = access_token
117
+ org_url = instance_url
118
+ else:
119
+ missing = [
120
+ flag
121
+ for flag, value in (
122
+ ("--client-id", client_id),
123
+ ("--private-key", private_key_path),
124
+ ("--username", username),
125
+ )
126
+ if not value
127
+ ]
128
+ if missing:
129
+ raise click.UsageError(
130
+ "Provide --access-token, or all of --client-id, --private-key, "
131
+ f"--username for JWT auth. Missing: {', '.join(missing)}."
132
+ )
133
+ token_url = SANDBOX_TOKEN_URL if sandbox else DEFAULT_TOKEN_URL
134
+ audience = AUD_SANDBOX if sandbox else AUD_PRODUCTION
135
+ try:
136
+ auth = get_access_token(
137
+ instance_url=instance_url,
138
+ client_id=client_id,
139
+ private_key_path=private_key_path,
140
+ username=username,
141
+ token_url=token_url,
142
+ audience=audience,
143
+ )
144
+ except AuthError as exc:
145
+ raise click.ClickException(str(exc)) from exc
146
+ token = auth["access_token"]
147
+ org_url = auth["instance_url"]
148
+
149
+ def _progress(message: str, inline: bool) -> None:
150
+ # Progress goes to stderr so it never pollutes the doc output. inline
151
+ # uses a carriage return to update the DMO counter in place.
152
+ if inline:
153
+ click.echo(f"\r{message}", nl=False, err=True)
154
+ else:
155
+ click.echo(message, err=True)
156
+
157
+ try:
158
+ schema = fetch_metadata(
159
+ instance_url=org_url,
160
+ access_token=token,
161
+ api_version=api_version,
162
+ timeout=timeout,
163
+ progress=_progress,
164
+ )
165
+ except MetadataError as exc:
166
+ # Surface a clean one-line error and exit non-zero — never a traceback.
167
+ raise click.ClickException(str(exc)) from exc
168
+
169
+ out_dir = Path(output_dir)
170
+ out_dir.mkdir(parents=True, exist_ok=True)
171
+ base = _slug(schema.org_name) or "data360"
172
+
173
+ written: list[str] = []
174
+ want_markdown = output_format in ("markdown", "all")
175
+ want_json = output_format in ("json", "all")
176
+ want_pdf = output_format in ("pdf", "all")
177
+
178
+ if want_markdown:
179
+ md_path = out_dir / f"{base}.md"
180
+ md_path.write_text(render_markdown(schema), encoding="utf-8")
181
+ written.append(md_path.name)
182
+ mmd_path = out_dir / f"{base}.mmd"
183
+ mmd_path.write_text(render_mermaid(schema) + "\n", encoding="utf-8")
184
+ written.append(mmd_path.name)
185
+
186
+ if want_json:
187
+ json_path = out_dir / f"{base}.json"
188
+ json_path.write_text(render_json(schema), encoding="utf-8")
189
+ written.append(json_path.name)
190
+
191
+ if want_pdf:
192
+ click.echo("PDF output is not yet implemented (stub) — skipping.", err=True)
193
+
194
+ for name in written:
195
+ click.echo(f"Wrote {name}")
196
+ click.echo(
197
+ f"Generated docs for {len(schema.dmos)} DMOs, "
198
+ f"{len(schema.dlos)} DLOs, "
199
+ f"{len(schema.identity_rulesets)} Identity Rulesets"
200
+ )
201
+
202
+
203
+ def _slug(value: str) -> str:
204
+ """Slugify an org name for use as an output filename stem."""
205
+ return re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
206
+
207
+
208
+ if __name__ == "__main__": # pragma: no cover
209
+ cli()
@@ -0,0 +1 @@
1
+ """Salesforce Data 360 API client package."""
@@ -0,0 +1,110 @@
1
+ """Shared HTTP helpers for the Data 360 Connect REST API clients.
2
+
3
+ All ``/ssot/*`` fetchers share the same needs: a bearer-authenticated GET with
4
+ exponential-backoff retry, and ``nextPageUrl`` pagination with a cycle guard.
5
+ Centralizing them here keeps the per-endpoint clients (``metadata.py``,
6
+ ``streams.py``) small and consistent.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import time
12
+ from typing import Any, Final, Iterator
13
+
14
+ import requests
15
+
16
+ _MAX_ATTEMPTS: Final = 3
17
+ _BACKOFF_BASE_SECONDS: Final = 1.0
18
+
19
+
20
+ class FetchError(RuntimeError):
21
+ """Raised when a Data 360 API request fails (4xx, or retries exhausted)."""
22
+
23
+
24
+ def get_json(url: str, *, access_token: str, timeout: float) -> Any:
25
+ """GET ``url`` with bearer auth, retrying transient failures.
26
+
27
+ Retries transport errors and 5xx responses up to three times with
28
+ exponential backoff (1s, 2s, 4s). 4xx responses are terminal.
29
+
30
+ Args:
31
+ url: Absolute URL to request.
32
+ access_token: OAuth bearer token.
33
+ timeout: Per-request timeout in seconds.
34
+
35
+ Returns:
36
+ The decoded JSON body.
37
+
38
+ Raises:
39
+ FetchError: On a 4xx response or after exhausting retries.
40
+ """
41
+ headers = {"Authorization": f"Bearer {access_token}", "Accept": "application/json"}
42
+ last_error: str | None = None
43
+ for attempt in range(_MAX_ATTEMPTS):
44
+ try:
45
+ response = requests.get(url, headers=headers, timeout=timeout)
46
+ except requests.RequestException as exc:
47
+ last_error = str(exc)
48
+ else:
49
+ if response.status_code == 200:
50
+ try:
51
+ return response.json()
52
+ except ValueError as exc:
53
+ # A 200 with a non-JSON body (e.g. a proxy/login HTML page)
54
+ # must become a FetchError, not leak a raw JSONDecodeError
55
+ # past the CLI's clean-error handler.
56
+ raise FetchError(
57
+ f"Non-JSON 200 response from {url}: {exc}"
58
+ ) from exc
59
+ if 400 <= response.status_code < 500:
60
+ raise FetchError(
61
+ f"Request rejected ({response.status_code}) for {url}: "
62
+ f"{response.text[:500]}"
63
+ )
64
+ last_error = f"HTTP {response.status_code}: {response.text[:500]}"
65
+ if attempt < _MAX_ATTEMPTS - 1:
66
+ time.sleep(_BACKOFF_BASE_SECONDS * (2**attempt))
67
+ raise FetchError(
68
+ f"Request to {url} failed after {_MAX_ATTEMPTS} attempts: {last_error}"
69
+ )
70
+
71
+
72
+ def iter_pages(
73
+ first_url: str, *, base_url: str, access_token: str, timeout: float
74
+ ) -> Iterator[dict[str, Any]]:
75
+ """Yield each page of a paginated ``/ssot/*`` response.
76
+
77
+ Follows each page's ``nextPageUrl`` (absolute or relative) until exhausted,
78
+ guarding against a self-referential or repeating link that would otherwise
79
+ loop forever.
80
+
81
+ Args:
82
+ first_url: Absolute URL of the first page.
83
+ base_url: Org base URL, used to resolve relative ``nextPageUrl`` values.
84
+ access_token: OAuth bearer token.
85
+ timeout: Per-request timeout in seconds.
86
+
87
+ Yields:
88
+ Each page's decoded JSON body.
89
+
90
+ Raises:
91
+ FetchError: On a request failure or a detected pagination cycle.
92
+ """
93
+ seen: set[str] = set()
94
+ next_url: str | None = first_url
95
+ while next_url:
96
+ if next_url in seen:
97
+ raise FetchError(f"Pagination cycle detected at {next_url}")
98
+ seen.add(next_url)
99
+ page = get_json(next_url, access_token=access_token, timeout=timeout)
100
+ yield page
101
+ next_url = _resolve_next(page.get("nextPageUrl"), base_url)
102
+
103
+
104
+ def _resolve_next(raw: Any, base_url: str) -> str | None:
105
+ """Normalize a ``nextPageUrl`` value to an absolute URL, or ``None``."""
106
+ if not raw or not isinstance(raw, str):
107
+ return None
108
+ if raw.startswith("http://") or raw.startswith("https://"):
109
+ return raw
110
+ return f"{base_url.rstrip('/')}/{raw.lstrip('/')}"
@@ -0,0 +1,161 @@
1
+ """OAuth 2.0 JWT Bearer Flow for Salesforce Data 360.
2
+
3
+ This module performs the server-to-server JWT bearer flow only (per project
4
+ policy: never store user passwords or client secrets). The caller supplies a
5
+ connected-app ``client_id``, the path to the app's private key, and the
6
+ ``username`` to impersonate; we mint a short-lived signed assertion and exchange
7
+ it for an access token.
8
+
9
+ Reference: https://help.salesforce.com/s/articleView?id=sf.remoteaccess_oauth_jwt_flow.htm
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import time
15
+ from pathlib import Path
16
+ from typing import Final
17
+
18
+ import jwt
19
+ import requests
20
+
21
+ #: OAuth grant type identifier for the JWT bearer flow.
22
+ _GRANT_TYPE: Final = "urn:ietf:params:oauth:grant-type:jwt-bearer"
23
+ #: Audience for production logins.
24
+ AUD_PRODUCTION: Final = "https://login.salesforce.com"
25
+ #: Audience for sandbox / scratch-org logins.
26
+ AUD_SANDBOX: Final = "https://test.salesforce.com"
27
+ #: Default token endpoint for production JWT bearer exchange.
28
+ DEFAULT_TOKEN_URL: Final = "https://login.salesforce.com/services/oauth2/token"
29
+ #: Token endpoint for sandbox / scratch-org JWT bearer exchange.
30
+ SANDBOX_TOKEN_URL: Final = "https://test.salesforce.com/services/oauth2/token"
31
+ #: Assertion lifetime in seconds (Salesforce caps this at 3 minutes).
32
+ _ASSERTION_TTL_SECONDS: Final = 180
33
+ #: Number of token-exchange attempts before giving up.
34
+ _MAX_ATTEMPTS: Final = 3
35
+ #: Base delay (seconds) for exponential backoff: 1s, 2s, 4s.
36
+ _BACKOFF_BASE_SECONDS: Final = 1.0
37
+
38
+
39
+ class AuthError(RuntimeError):
40
+ """Raised when the JWT bearer token exchange ultimately fails."""
41
+
42
+
43
+ def build_assertion(
44
+ *,
45
+ client_id: str,
46
+ username: str,
47
+ private_key: str,
48
+ audience: str = AUD_PRODUCTION,
49
+ now: int | None = None,
50
+ ) -> str:
51
+ """Build and sign the JWT assertion for the bearer flow.
52
+
53
+ Args:
54
+ client_id: The connected app's consumer key (``iss`` claim).
55
+ username: The Salesforce username to impersonate (``sub`` claim).
56
+ private_key: PEM-encoded RSA private key contents used to sign (RS256).
57
+ audience: Token endpoint audience; use :data:`AUD_PRODUCTION` or
58
+ :data:`AUD_SANDBOX`.
59
+ now: Override for the current epoch seconds (used in tests for
60
+ deterministic ``exp`` claims).
61
+
62
+ Returns:
63
+ The encoded, signed JWT assertion as a compact string.
64
+ """
65
+ issued_at = int(time.time()) if now is None else now
66
+ claims = {
67
+ "iss": client_id,
68
+ "sub": username,
69
+ "aud": audience,
70
+ "exp": issued_at + _ASSERTION_TTL_SECONDS,
71
+ }
72
+ return jwt.encode(claims, private_key, algorithm="RS256")
73
+
74
+
75
+ def get_access_token(
76
+ *,
77
+ instance_url: str,
78
+ client_id: str,
79
+ private_key_path: str,
80
+ username: str,
81
+ token_url: str = DEFAULT_TOKEN_URL,
82
+ audience: str = AUD_PRODUCTION,
83
+ timeout: float = 30.0,
84
+ ) -> dict[str, str]:
85
+ """Obtain an access token via the JWT bearer flow.
86
+
87
+ Retries the token exchange up to three times with exponential backoff
88
+ (1s, 2s, 4s) on transient transport errors or 5xx responses. Client errors
89
+ (4xx) are treated as terminal and surfaced immediately.
90
+
91
+ Args:
92
+ instance_url: Base URL of the Salesforce org (e.g.
93
+ ``https://mydomain.my.salesforce.com``). Used as the fallback
94
+ ``instance_url`` in the return value when the org does not echo one.
95
+ client_id: The connected app's consumer key.
96
+ private_key_path: Filesystem path to the PEM private key.
97
+ username: The Salesforce username to impersonate.
98
+ token_url: The OAuth token endpoint to POST the assertion to. Defaults
99
+ to :data:`DEFAULT_TOKEN_URL`
100
+ (``https://login.salesforce.com/services/oauth2/token``). **Sandbox
101
+ and scratch orgs must override this** with
102
+ :data:`SANDBOX_TOKEN_URL`
103
+ (``https://test.salesforce.com/services/oauth2/token``).
104
+ audience: JWT ``aud`` claim; :data:`AUD_PRODUCTION` (default) or
105
+ :data:`AUD_SANDBOX` for sandboxes. Should match ``token_url``.
106
+ timeout: Per-request timeout in seconds.
107
+
108
+ Returns:
109
+ A dict with ``access_token`` and ``instance_url`` keys. The
110
+ ``instance_url`` reflects the value returned by Salesforce when present
111
+ (it may differ from the input), otherwise the input value.
112
+
113
+ Raises:
114
+ FileNotFoundError: If ``private_key_path`` does not exist.
115
+ AuthError: If the token exchange fails after all retries, the org
116
+ returns a non-retryable error, or the success response is missing
117
+ ``access_token``.
118
+ """
119
+ private_key = Path(private_key_path).read_text(encoding="utf-8")
120
+ assertion = build_assertion(
121
+ client_id=client_id,
122
+ username=username,
123
+ private_key=private_key,
124
+ audience=audience,
125
+ )
126
+ data = {"grant_type": _GRANT_TYPE, "assertion": assertion}
127
+
128
+ last_error: str | None = None
129
+ for attempt in range(_MAX_ATTEMPTS):
130
+ try:
131
+ response = requests.post(token_url, data=data, timeout=timeout)
132
+ except requests.RequestException as exc: # transport-level failure
133
+ last_error = str(exc)
134
+ else:
135
+ if response.status_code == 200:
136
+ body = response.json()
137
+ try:
138
+ access_token = body["access_token"]
139
+ except KeyError as exc:
140
+ raise AuthError(
141
+ "Unexpected response: access_token not found"
142
+ ) from exc
143
+ return {
144
+ "access_token": access_token,
145
+ "instance_url": body.get("instance_url", instance_url),
146
+ }
147
+ # 4xx: bad assertion / config — retrying will not help.
148
+ if 400 <= response.status_code < 500:
149
+ raise AuthError(
150
+ f"JWT bearer exchange rejected ({response.status_code}): "
151
+ f"{response.text}"
152
+ )
153
+ last_error = f"HTTP {response.status_code}: {response.text}"
154
+
155
+ # Back off before the next attempt, but never after the final one.
156
+ if attempt < _MAX_ATTEMPTS - 1:
157
+ time.sleep(_BACKOFF_BASE_SECONDS * (2**attempt))
158
+
159
+ raise AuthError(
160
+ f"JWT bearer exchange failed after {_MAX_ATTEMPTS} attempts: {last_error}"
161
+ )