data360-autodoc 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. data360_autodoc-0.2.0/LICENSE +21 -0
  2. data360_autodoc-0.2.0/PKG-INFO +162 -0
  3. data360_autodoc-0.2.0/README.md +132 -0
  4. data360_autodoc-0.2.0/data360_autodoc/__init__.py +1 -0
  5. data360_autodoc-0.2.0/data360_autodoc/cli/__init__.py +1 -0
  6. data360_autodoc-0.2.0/data360_autodoc/cli/main.py +209 -0
  7. data360_autodoc-0.2.0/data360_autodoc/fetcher/__init__.py +1 -0
  8. data360_autodoc-0.2.0/data360_autodoc/fetcher/_http.py +110 -0
  9. data360_autodoc-0.2.0/data360_autodoc/fetcher/auth.py +161 -0
  10. data360_autodoc-0.2.0/data360_autodoc/fetcher/metadata.py +661 -0
  11. data360_autodoc-0.2.0/data360_autodoc/fetcher/streams.py +240 -0
  12. data360_autodoc-0.2.0/data360_autodoc/generator/__init__.py +1 -0
  13. data360_autodoc-0.2.0/data360_autodoc/generator/markdown.py +413 -0
  14. data360_autodoc-0.2.0/data360_autodoc/generator/mermaid.py +141 -0
  15. data360_autodoc-0.2.0/data360_autodoc/generator/snapshot.py +300 -0
  16. data360_autodoc-0.2.0/data360_autodoc/models.py +277 -0
  17. data360_autodoc-0.2.0/data360_autodoc.egg-info/PKG-INFO +162 -0
  18. data360_autodoc-0.2.0/data360_autodoc.egg-info/SOURCES.txt +34 -0
  19. data360_autodoc-0.2.0/data360_autodoc.egg-info/dependency_links.txt +1 -0
  20. data360_autodoc-0.2.0/data360_autodoc.egg-info/entry_points.txt +2 -0
  21. data360_autodoc-0.2.0/data360_autodoc.egg-info/requires.txt +9 -0
  22. data360_autodoc-0.2.0/data360_autodoc.egg-info/top_level.txt +1 -0
  23. data360_autodoc-0.2.0/pyproject.toml +53 -0
  24. data360_autodoc-0.2.0/setup.cfg +4 -0
  25. data360_autodoc-0.2.0/tests/test_api_version.py +91 -0
  26. data360_autodoc-0.2.0/tests/test_auth.py +141 -0
  27. data360_autodoc-0.2.0/tests/test_cli.py +196 -0
  28. data360_autodoc-0.2.0/tests/test_dmo_enrichment.py +86 -0
  29. data360_autodoc-0.2.0/tests/test_dmo_resilience.py +93 -0
  30. data360_autodoc-0.2.0/tests/test_http.py +25 -0
  31. data360_autodoc-0.2.0/tests/test_markdown.py +388 -0
  32. data360_autodoc-0.2.0/tests/test_mermaid.py +167 -0
  33. data360_autodoc-0.2.0/tests/test_metadata.py +545 -0
  34. data360_autodoc-0.2.0/tests/test_metadata_errors.py +39 -0
  35. data360_autodoc-0.2.0/tests/test_snapshot.py +291 -0
  36. data360_autodoc-0.2.0/tests/test_streams.py +354 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Valentina Tihova
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,162 @@
1
+ Metadata-Version: 2.4
2
+ Name: data360-autodoc
3
+ Version: 0.2.0
4
+ Summary: Auto-generate human-readable documentation for Salesforce Data 360 orgs.
5
+ Author: data360-autodoc contributors
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/valentinatihova/data360-autodoc
8
+ Project-URL: Repository, https://github.com/valentinatihova/data360-autodoc
9
+ Project-URL: Issues, https://github.com/valentinatihova/data360-autodoc/issues
10
+ Keywords: salesforce,data-cloud,data-360,documentation,cli
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Topic :: Documentation
17
+ Classifier: Topic :: Software Development :: Documentation
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: click>=8.1
22
+ Requires-Dist: PyJWT>=2.8
23
+ Requires-Dist: cryptography>=42.0
24
+ Requires-Dist: requests>=2.31
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=8.0; extra == "dev"
27
+ Requires-Dist: responses>=0.25; extra == "dev"
28
+ Requires-Dist: black>=24.0; extra == "dev"
29
+ Dynamic: license-file
30
+
31
+ # data360-autodoc
32
+
33
+ [![PyPI version](https://img.shields.io/badge/pypi-coming%20soon-blue)](https://pypi.org/project/data360-autodoc/)
34
+ [![Tests](https://img.shields.io/badge/tests-passing-brightgreen)](#)
35
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green)](LICENSE)
36
+
37
+ **Auto-generate human-readable documentation for Salesforce Data 360 (Data Cloud) orgs — in seconds, not days.**
38
+
39
+ Point it at an org and it produces a full data dictionary (DMOs, DLOs, fields, keys), the data streams and field-level mappings behind them, the DMO relationship graph, an ERD, and a deterministic JSON snapshot.
40
+
41
+ - 📓 **Data dictionary** — every DMO and DLO as clean Markdown tables, with field names, types, and keys.
42
+ - 🌊 **Data streams + field mappings** — per-stream source/refresh metadata, the Stream → DLO field map, and the DLO → DMO field map (with real labels, not just API names).
43
+ - 🔗 **Relationships + ERD** — DMO-to-DMO relationships with cardinality and status, plus a Mermaid graph of DLO → DMO mappings and relationship edges.
44
+ - 🧊 **JSON snapshot** — a deterministic, diff-friendly export of your whole org schema (the foundation for drift detection — see below).
45
+
46
+ ## For who
47
+
48
+ Built for **Salesforce SI consultants and Data Cloud practitioners** who lose days hand-writing org documentation for every engagement. Works against any Data 360 org you can authenticate to with a connected app — including **Developer Edition / Data Cloud Dev orgs**, so you can try it on a sandbox before pointing it at a client.
49
+
50
+ ## Quick start
51
+
52
+ ```bash
53
+ pip install data360-autodoc
54
+
55
+ data360-autodoc generate \
56
+ --instance-url https://mydomain.my.salesforce.com \
57
+ --client-id <connected-app-consumer-key> \
58
+ --private-key ./server.pem \
59
+ --username admin@myorg.com \
60
+ --output ./docs \
61
+ --format all
62
+ ```
63
+
64
+ ```
65
+ Wrote acme-data-cloud.md
66
+ Wrote acme-data-cloud.mmd
67
+ Wrote acme-data-cloud.json
68
+ Generated docs for 24 DMOs, 11 DLOs, 0 Identity Rulesets
69
+ ```
70
+
71
+ Authentication uses the **OAuth 2.0 JWT Bearer flow** (connected app + private key — no passwords stored).
72
+
73
+ **Options that affect the metadata fetch:**
74
+
75
+ - `--sandbox` — authenticate against `test.salesforce.com` (sandbox / scratch orgs).
76
+ - `--api-version` — the Salesforce REST API version used for the `/ssot/*` metadata calls (e.g. `v62.0`). **By default the tool auto-detects your org's highest supported version** (from `GET /services/data/`), so you normally don't set this. Force it only if auto-detection picks a version where a Data Cloud endpoint misbehaves, or to pin output to a specific version. It must be a valid Salesforce REST API version your org supports.
77
+
78
+ (The `Identity Rulesets` count is currently always `0` — see "Not supported yet" below.)
79
+
80
+ ## What you get
81
+
82
+ `--format` controls the output:
83
+
84
+ | Format | Files | What it is |
85
+ |--------|-------|------------|
86
+ | `markdown` | `.md` + `.mmd` | Data dictionary + Mermaid ERD |
87
+ | `json` | `.json` | Deterministic org-schema snapshot |
88
+ | `pdf` | — | _Coming soon_ |
89
+ | `all` | all of the above | Everything |
90
+
91
+ ### Example output
92
+
93
+ The Markdown data dictionary. DMO field types come from the org's relationships metadata, and fall back to the mapped DLO field type when the DMO endpoint returns a generic type (shown as `(via DLO)`); DLO keys come from the data streams:
94
+
95
+ ```markdown
96
+ ## Data Model Objects (DMOs)
97
+
98
+ ### Individual (`Individual__dmo`)
99
+
100
+ | Name | Type | Key |
101
+ | --- | --- | --- |
102
+ | Email__c | EmailAddress | |
103
+ | Id__c | Text | |
104
+
105
+ ## Data Lake Objects (DLOs)
106
+
107
+ ### Order (Home) (`Order_Home__dll`)
108
+
109
+ | Name | Type | Key |
110
+ | --- | --- | --- |
111
+ | Amount | Number | |
112
+ | OrderId | Text | PrimaryKey |
113
+ ```
114
+
115
+ Beyond the dictionary, the document includes (in this order):
116
+
117
+ - **Data Streams** — one row per stream: data source, category, primary key, schedule, refresh mode.
118
+ - **Field Mapping (Streams → DLO)** — every Data Lake field with its source field, DLO label, type, and a `KQ_`-prefix foreign-key flag.
119
+ - **DLO → DMO Field Mappings** — field-level source → target mappings, grouped by DLO → DMO pairing, with real labels joined from DLO metadata.
120
+ - **Relationships** — DMO-to-DMO links with cardinality and status (inactive standard relationships stay visible, never dropped):
121
+
122
+ ```markdown
123
+ ## Relationships
124
+
125
+ | Object | Field | Cardinality | Related Object | Related Field | Status |
126
+ | --- | --- | --- | --- | --- | --- |
127
+ | Account | ssot__PrimarySalesContactPointId__c | N:1 | ssot__ContactPointEmail__dlm | ssot__Id__c | INACTIVE |
128
+ ```
129
+
130
+ The ERD (renders natively in GitHub). Solid arrows are DLO → DMO mappings; dashed, cardinality-labeled arrows are active DMO → DMO relationships:
131
+
132
+ ```mermaid
133
+ graph LR
134
+ Order_Home__dll["Order (Home)"]
135
+ Individual__dmo["Individual"]
136
+ Order_Home__dll --> Individual__dmo
137
+ ContactPointEmail__dlm["Contact Point Email"] -.->|N:1| Individual__dmo
138
+ ```
139
+
140
+ Output is **deterministic** — the same org always produces byte-identical docs (collections are sorted alphabetically). That makes the output safe to commit and easy to diff.
141
+
142
+ ### What it reads — and what it doesn't yet
143
+
144
+ Under the hood it calls the **Data 360 Connect REST API** (`/services/data/v…/ssot/*`): `data-model-objects` (DMOs), `data-model-object-mappings` (DLO→DMO mappings + field names), `…/{dmo}/relationships` (DMO field types **and** the DMO-to-DMO relationship graph), and `data-streams` (DLOs + their fields + the per-stream and field-level mappings, including primary keys). Full request/response shapes are in [`agent_docs/api_reference.md`](agent_docs/api_reference.md).
145
+
146
+ **Not supported yet.** Calculated Insights and Identity Resolution rulesets are **not fetched** — those sections render as empty placeholders (e.g. `_No Calculated Insights found._`) and the `Identity Rulesets` count stays `0`. Documenting them is on the roadmap. (Profile and Engagement DMOs *are* covered — those are DMO categories, not separate entities.)
147
+
148
+ **Resilient by default.** If one DMO's metadata can't be read, that DMO is skipped with a warning and the rest of the document is still produced. If the org has more than 500 DMOs, the list is capped (with a warning). A failure fetching the DMO list or the data streams stops the run with a clean one-line error — never a stack trace.
149
+
150
+ ## Future: drift monitoring (paid tier)
151
+
152
+ The open-source CLI documents your org once. The thing that actually bites consultants is when an org **changes** after you've documented it — a client admin adds a DLO, a field type changes, an identity rule shifts — and your beautiful docs quietly go stale.
153
+
154
+ A hosted tier (planned) will turn the deterministic JSON snapshot into **drift monitoring**: re-run on a schedule, diff today's snapshot against the last one, and get a client-ready changelog of exactly what changed — without ever handing over your org credentials (drift runs in your own environment; the hosted service only stores snapshots and sends alerts). The CLI stays free forever; the recurring watching, history, and multi-org dashboard are the paid layer.
155
+
156
+ ## Hosted version
157
+
158
+ A hosted web UI is in the works at **[data360doc.com](https://data360doc.com)** _(placeholder)_ — same docs, plus scheduled drift alerts and a multi-org dashboard for agencies.
159
+
160
+ ## License
161
+
162
+ MIT
@@ -0,0 +1,132 @@
1
+ # data360-autodoc
2
+
3
+ [![PyPI version](https://img.shields.io/badge/pypi-coming%20soon-blue)](https://pypi.org/project/data360-autodoc/)
4
+ [![Tests](https://img.shields.io/badge/tests-passing-brightgreen)](#)
5
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green)](LICENSE)
6
+
7
+ **Auto-generate human-readable documentation for Salesforce Data 360 (Data Cloud) orgs — in seconds, not days.**
8
+
9
+ Point it at an org and it produces a full data dictionary (DMOs, DLOs, fields, keys), the data streams and field-level mappings behind them, the DMO relationship graph, an ERD, and a deterministic JSON snapshot.
10
+
11
+ - 📓 **Data dictionary** — every DMO and DLO as clean Markdown tables, with field names, types, and keys.
12
+ - 🌊 **Data streams + field mappings** — per-stream source/refresh metadata, the Stream → DLO field map, and the DLO → DMO field map (with real labels, not just API names).
13
+ - 🔗 **Relationships + ERD** — DMO-to-DMO relationships with cardinality and status, plus a Mermaid graph of DLO → DMO mappings and relationship edges.
14
+ - 🧊 **JSON snapshot** — a deterministic, diff-friendly export of your whole org schema (the foundation for drift detection — see below).
15
+
16
+ ## For who
17
+
18
+ Built for **Salesforce SI consultants and Data Cloud practitioners** who lose days hand-writing org documentation for every engagement. Works against any Data 360 org you can authenticate to with a connected app — including **Developer Edition / Data Cloud Dev orgs**, so you can try it on a sandbox before pointing it at a client.
19
+
20
+ ## Quick start
21
+
22
+ ```bash
23
+ pip install data360-autodoc
24
+
25
+ data360-autodoc generate \
26
+ --instance-url https://mydomain.my.salesforce.com \
27
+ --client-id <connected-app-consumer-key> \
28
+ --private-key ./server.pem \
29
+ --username admin@myorg.com \
30
+ --output ./docs \
31
+ --format all
32
+ ```
33
+
34
+ ```
35
+ Wrote acme-data-cloud.md
36
+ Wrote acme-data-cloud.mmd
37
+ Wrote acme-data-cloud.json
38
+ Generated docs for 24 DMOs, 11 DLOs, 0 Identity Rulesets
39
+ ```
40
+
41
+ Authentication uses the **OAuth 2.0 JWT Bearer flow** (connected app + private key — no passwords stored).
42
+
43
+ **Options that affect the metadata fetch:**
44
+
45
+ - `--sandbox` — authenticate against `test.salesforce.com` (sandbox / scratch orgs).
46
+ - `--api-version` — the Salesforce REST API version used for the `/ssot/*` metadata calls (e.g. `v62.0`). **By default the tool auto-detects your org's highest supported version** (from `GET /services/data/`), so you normally don't set this. Force it only if auto-detection picks a version where a Data Cloud endpoint misbehaves, or to pin output to a specific version. It must be a valid Salesforce REST API version your org supports.
47
+
48
+ (The `Identity Rulesets` count is currently always `0` — see "Not supported yet" below.)
49
+
50
+ ## What you get
51
+
52
+ `--format` controls the output:
53
+
54
+ | Format | Files | What it is |
55
+ |--------|-------|------------|
56
+ | `markdown` | `.md` + `.mmd` | Data dictionary + Mermaid ERD |
57
+ | `json` | `.json` | Deterministic org-schema snapshot |
58
+ | `pdf` | — | _Coming soon_ |
59
+ | `all` | all of the above | Everything |
60
+
61
+ ### Example output
62
+
63
+ The Markdown data dictionary. DMO field types come from the org's relationships metadata, and fall back to the mapped DLO field type when the DMO endpoint returns a generic type (shown as `(via DLO)`); DLO keys come from the data streams:
64
+
65
+ ```markdown
66
+ ## Data Model Objects (DMOs)
67
+
68
+ ### Individual (`Individual__dmo`)
69
+
70
+ | Name | Type | Key |
71
+ | --- | --- | --- |
72
+ | Email__c | EmailAddress | |
73
+ | Id__c | Text | |
74
+
75
+ ## Data Lake Objects (DLOs)
76
+
77
+ ### Order (Home) (`Order_Home__dll`)
78
+
79
+ | Name | Type | Key |
80
+ | --- | --- | --- |
81
+ | Amount | Number | |
82
+ | OrderId | Text | PrimaryKey |
83
+ ```
84
+
85
+ Beyond the dictionary, the document includes (in this order):
86
+
87
+ - **Data Streams** — one row per stream: data source, category, primary key, schedule, refresh mode.
88
+ - **Field Mapping (Streams → DLO)** — every Data Lake field with its source field, DLO label, type, and a `KQ_`-prefix foreign-key flag.
89
+ - **DLO → DMO Field Mappings** — field-level source → target mappings, grouped by DLO → DMO pairing, with real labels joined from DLO metadata.
90
+ - **Relationships** — DMO-to-DMO links with cardinality and status (inactive standard relationships stay visible, never dropped):
91
+
92
+ ```markdown
93
+ ## Relationships
94
+
95
+ | Object | Field | Cardinality | Related Object | Related Field | Status |
96
+ | --- | --- | --- | --- | --- | --- |
97
+ | Account | ssot__PrimarySalesContactPointId__c | N:1 | ssot__ContactPointEmail__dlm | ssot__Id__c | INACTIVE |
98
+ ```
99
+
100
+ The ERD (renders natively in GitHub). Solid arrows are DLO → DMO mappings; dashed, cardinality-labeled arrows are active DMO → DMO relationships:
101
+
102
+ ```mermaid
103
+ graph LR
104
+ Order_Home__dll["Order (Home)"]
105
+ Individual__dmo["Individual"]
106
+ Order_Home__dll --> Individual__dmo
107
+ ContactPointEmail__dlm["Contact Point Email"] -.->|N:1| Individual__dmo
108
+ ```
109
+
110
+ Output is **deterministic** — the same org always produces byte-identical docs (collections are sorted alphabetically). That makes the output safe to commit and easy to diff.
111
+
112
+ ### What it reads — and what it doesn't yet
113
+
114
+ Under the hood it calls the **Data 360 Connect REST API** (`/services/data/v…/ssot/*`): `data-model-objects` (DMOs), `data-model-object-mappings` (DLO→DMO mappings + field names), `…/{dmo}/relationships` (DMO field types **and** the DMO-to-DMO relationship graph), and `data-streams` (DLOs + their fields + the per-stream and field-level mappings, including primary keys). Full request/response shapes are in [`agent_docs/api_reference.md`](agent_docs/api_reference.md).
115
+
116
+ **Not supported yet.** Calculated Insights and Identity Resolution rulesets are **not fetched** — those sections render as empty placeholders (e.g. `_No Calculated Insights found._`) and the `Identity Rulesets` count stays `0`. Documenting them is on the roadmap. (Profile and Engagement DMOs *are* covered — those are DMO categories, not separate entities.)
117
+
118
+ **Resilient by default.** If one DMO's metadata can't be read, that DMO is skipped with a warning and the rest of the document is still produced. If the org has more than 500 DMOs, the list is capped (with a warning). A failure fetching the DMO list or the data streams stops the run with a clean one-line error — never a stack trace.
119
+
120
+ ## Future: drift monitoring (paid tier)
121
+
122
+ The open-source CLI documents your org once. The thing that actually bites consultants is when an org **changes** after you've documented it — a client admin adds a DLO, a field type changes, an identity rule shifts — and your beautiful docs quietly go stale.
123
+
124
+ A hosted tier (planned) will turn the deterministic JSON snapshot into **drift monitoring**: re-run on a schedule, diff today's snapshot against the last one, and get a client-ready changelog of exactly what changed — without ever handing over your org credentials (drift runs in your own environment; the hosted service only stores snapshots and sends alerts). The CLI stays free forever; the recurring watching, history, and multi-org dashboard are the paid layer.
125
+
126
+ ## Hosted version
127
+
128
+ A hosted web UI is in the works at **[data360doc.com](https://data360doc.com)** _(placeholder)_ — same docs, plus scheduled drift alerts and a multi-org dashboard for agencies.
129
+
130
+ ## License
131
+
132
+ MIT
@@ -0,0 +1 @@
1
+ """data360-autodoc: auto-generate documentation for Salesforce Data 360 orgs."""
@@ -0,0 +1 @@
1
+ """Command-line interface for data360-autodoc."""
@@ -0,0 +1,209 @@
1
+ """``data360-autodoc`` command-line entry point.
2
+
3
+ Orchestrates the one-shot pipeline::
4
+
5
+ auth (JWT bearer) -> fetch metadata -> render outputs -> write files
6
+
7
+ Outputs are selected with ``--format``:
8
+
9
+ - ``markdown`` — human-readable ``.md`` plus a ``.mmd`` Mermaid diagram
10
+ - ``json`` — deterministic ``.json`` snapshot (the drift-detection seam)
11
+ - ``pdf`` — not yet implemented (stub; warns and skips)
12
+ - ``all`` — markdown + json (+ pdf stub warning)
13
+
14
+ The ``.json`` snapshot is a first-class output, not a debug artifact: the paid
15
+ drift tier loads a prior snapshot and diffs it against a fresh fetch.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ from pathlib import Path
22
+
23
+ import click
24
+
25
+ from data360_autodoc.fetcher.auth import (
26
+ AUD_PRODUCTION,
27
+ AUD_SANDBOX,
28
+ DEFAULT_TOKEN_URL,
29
+ SANDBOX_TOKEN_URL,
30
+ AuthError,
31
+ get_access_token,
32
+ )
33
+ from data360_autodoc.fetcher.metadata import MetadataError, fetch_metadata
34
+ from data360_autodoc.generator.markdown import render_markdown
35
+ from data360_autodoc.generator.mermaid import render_mermaid
36
+ from data360_autodoc.generator.snapshot import render_json
37
+
38
+ #: Valid values for the ``--format`` option.
39
+ FORMATS = ["markdown", "json", "pdf", "all"]
40
+
41
+
42
+ @click.group()
43
+ def cli() -> None:
44
+ """Auto-generate documentation for Salesforce Data 360 orgs."""
45
+
46
+
47
+ @cli.command()
48
+ @click.option("--instance-url", required=True, help="Org base URL.")
49
+ @click.option(
50
+ "--access-token",
51
+ default=None,
52
+ help="Use a pre-obtained OAuth access token and skip JWT auth. When set, "
53
+ "--client-id / --private-key / --username are not needed.",
54
+ )
55
+ @click.option(
56
+ "--client-id", default=None, help="Connected app consumer key (JWT auth)."
57
+ )
58
+ @click.option(
59
+ "--private-key",
60
+ "private_key_path",
61
+ default=None,
62
+ type=click.Path(exists=True, dir_okay=False),
63
+ help="Path to the connected app's PEM private key (JWT auth).",
64
+ )
65
+ @click.option(
66
+ "--username", default=None, help="Salesforce username to impersonate (JWT auth)."
67
+ )
68
+ @click.option(
69
+ "--output",
70
+ "output_dir",
71
+ default=".",
72
+ type=click.Path(file_okay=False),
73
+ help="Directory to write output files into (created if missing).",
74
+ )
75
+ @click.option(
76
+ "--format",
77
+ "output_format",
78
+ type=click.Choice(FORMATS),
79
+ default="all",
80
+ show_default=True,
81
+ help="Which artifacts to generate.",
82
+ )
83
+ @click.option(
84
+ "--sandbox",
85
+ is_flag=True,
86
+ default=False,
87
+ help="Authenticate against test.salesforce.com (sandbox/scratch orgs).",
88
+ )
89
+ @click.option(
90
+ "--api-version",
91
+ "api_version",
92
+ default=None,
93
+ help="Data API version (e.g. v62.0). Default: auto-detect the org's highest.",
94
+ )
95
+ @click.option(
96
+ "--timeout",
97
+ default=120.0,
98
+ show_default=True,
99
+ type=float,
100
+ help="Per-request timeout (seconds) for metadata calls. Data Cloud is slow.",
101
+ )
102
+ def generate(
103
+ instance_url: str,
104
+ access_token: str | None,
105
+ client_id: str | None,
106
+ private_key_path: str | None,
107
+ username: str | None,
108
+ output_dir: str,
109
+ output_format: str,
110
+ sandbox: bool,
111
+ api_version: str | None,
112
+ timeout: float,
113
+ ) -> None:
114
+ """Fetch an org's metadata and write documentation artifacts."""
115
+ if access_token:
116
+ token = access_token
117
+ org_url = instance_url
118
+ else:
119
+ missing = [
120
+ flag
121
+ for flag, value in (
122
+ ("--client-id", client_id),
123
+ ("--private-key", private_key_path),
124
+ ("--username", username),
125
+ )
126
+ if not value
127
+ ]
128
+ if missing:
129
+ raise click.UsageError(
130
+ "Provide --access-token, or all of --client-id, --private-key, "
131
+ f"--username for JWT auth. Missing: {', '.join(missing)}."
132
+ )
133
+ token_url = SANDBOX_TOKEN_URL if sandbox else DEFAULT_TOKEN_URL
134
+ audience = AUD_SANDBOX if sandbox else AUD_PRODUCTION
135
+ try:
136
+ auth = get_access_token(
137
+ instance_url=instance_url,
138
+ client_id=client_id,
139
+ private_key_path=private_key_path,
140
+ username=username,
141
+ token_url=token_url,
142
+ audience=audience,
143
+ )
144
+ except AuthError as exc:
145
+ raise click.ClickException(str(exc)) from exc
146
+ token = auth["access_token"]
147
+ org_url = auth["instance_url"]
148
+
149
+ def _progress(message: str, inline: bool) -> None:
150
+ # Progress goes to stderr so it never pollutes the doc output. inline
151
+ # uses a carriage return to update the DMO counter in place.
152
+ if inline:
153
+ click.echo(f"\r{message}", nl=False, err=True)
154
+ else:
155
+ click.echo(message, err=True)
156
+
157
+ try:
158
+ schema = fetch_metadata(
159
+ instance_url=org_url,
160
+ access_token=token,
161
+ api_version=api_version,
162
+ timeout=timeout,
163
+ progress=_progress,
164
+ )
165
+ except MetadataError as exc:
166
+ # Surface a clean one-line error and exit non-zero — never a traceback.
167
+ raise click.ClickException(str(exc)) from exc
168
+
169
+ out_dir = Path(output_dir)
170
+ out_dir.mkdir(parents=True, exist_ok=True)
171
+ base = _slug(schema.org_name) or "data360"
172
+
173
+ written: list[str] = []
174
+ want_markdown = output_format in ("markdown", "all")
175
+ want_json = output_format in ("json", "all")
176
+ want_pdf = output_format in ("pdf", "all")
177
+
178
+ if want_markdown:
179
+ md_path = out_dir / f"{base}.md"
180
+ md_path.write_text(render_markdown(schema), encoding="utf-8")
181
+ written.append(md_path.name)
182
+ mmd_path = out_dir / f"{base}.mmd"
183
+ mmd_path.write_text(render_mermaid(schema) + "\n", encoding="utf-8")
184
+ written.append(mmd_path.name)
185
+
186
+ if want_json:
187
+ json_path = out_dir / f"{base}.json"
188
+ json_path.write_text(render_json(schema), encoding="utf-8")
189
+ written.append(json_path.name)
190
+
191
+ if want_pdf:
192
+ click.echo("PDF output is not yet implemented (stub) — skipping.", err=True)
193
+
194
+ for name in written:
195
+ click.echo(f"Wrote {name}")
196
+ click.echo(
197
+ f"Generated docs for {len(schema.dmos)} DMOs, "
198
+ f"{len(schema.dlos)} DLOs, "
199
+ f"{len(schema.identity_rulesets)} Identity Rulesets"
200
+ )
201
+
202
+
203
+ def _slug(value: str) -> str:
204
+ """Slugify an org name for use as an output filename stem."""
205
+ return re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
206
+
207
+
208
+ if __name__ == "__main__": # pragma: no cover
209
+ cli()
@@ -0,0 +1 @@
1
+ """Salesforce Data 360 API client package."""
@@ -0,0 +1,110 @@
1
+ """Shared HTTP helpers for the Data 360 Connect REST API clients.
2
+
3
+ All ``/ssot/*`` fetchers share the same needs: a bearer-authenticated GET with
4
+ exponential-backoff retry, and ``nextPageUrl`` pagination with a cycle guard.
5
+ Centralizing them here keeps the per-endpoint clients (``metadata.py``,
6
+ ``streams.py``) small and consistent.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import time
12
+ from typing import Any, Final, Iterator
13
+
14
+ import requests
15
+
16
+ _MAX_ATTEMPTS: Final = 3
17
+ _BACKOFF_BASE_SECONDS: Final = 1.0
18
+
19
+
20
+ class FetchError(RuntimeError):
21
+ """Raised when a Data 360 API request fails (4xx, or retries exhausted)."""
22
+
23
+
24
+ def get_json(url: str, *, access_token: str, timeout: float) -> Any:
25
+ """GET ``url`` with bearer auth, retrying transient failures.
26
+
27
+ Retries transport errors and 5xx responses up to three times with
28
+ exponential backoff (1s, 2s, 4s). 4xx responses are terminal.
29
+
30
+ Args:
31
+ url: Absolute URL to request.
32
+ access_token: OAuth bearer token.
33
+ timeout: Per-request timeout in seconds.
34
+
35
+ Returns:
36
+ The decoded JSON body.
37
+
38
+ Raises:
39
+ FetchError: On a 4xx response or after exhausting retries.
40
+ """
41
+ headers = {"Authorization": f"Bearer {access_token}", "Accept": "application/json"}
42
+ last_error: str | None = None
43
+ for attempt in range(_MAX_ATTEMPTS):
44
+ try:
45
+ response = requests.get(url, headers=headers, timeout=timeout)
46
+ except requests.RequestException as exc:
47
+ last_error = str(exc)
48
+ else:
49
+ if response.status_code == 200:
50
+ try:
51
+ return response.json()
52
+ except ValueError as exc:
53
+ # A 200 with a non-JSON body (e.g. a proxy/login HTML page)
54
+ # must become a FetchError, not leak a raw JSONDecodeError
55
+ # past the CLI's clean-error handler.
56
+ raise FetchError(
57
+ f"Non-JSON 200 response from {url}: {exc}"
58
+ ) from exc
59
+ if 400 <= response.status_code < 500:
60
+ raise FetchError(
61
+ f"Request rejected ({response.status_code}) for {url}: "
62
+ f"{response.text[:500]}"
63
+ )
64
+ last_error = f"HTTP {response.status_code}: {response.text[:500]}"
65
+ if attempt < _MAX_ATTEMPTS - 1:
66
+ time.sleep(_BACKOFF_BASE_SECONDS * (2**attempt))
67
+ raise FetchError(
68
+ f"Request to {url} failed after {_MAX_ATTEMPTS} attempts: {last_error}"
69
+ )
70
+
71
+
72
+ def iter_pages(
73
+ first_url: str, *, base_url: str, access_token: str, timeout: float
74
+ ) -> Iterator[dict[str, Any]]:
75
+ """Yield each page of a paginated ``/ssot/*`` response.
76
+
77
+ Follows each page's ``nextPageUrl`` (absolute or relative) until exhausted,
78
+ guarding against a self-referential or repeating link that would otherwise
79
+ loop forever.
80
+
81
+ Args:
82
+ first_url: Absolute URL of the first page.
83
+ base_url: Org base URL, used to resolve relative ``nextPageUrl`` values.
84
+ access_token: OAuth bearer token.
85
+ timeout: Per-request timeout in seconds.
86
+
87
+ Yields:
88
+ Each page's decoded JSON body.
89
+
90
+ Raises:
91
+ FetchError: On a request failure or a detected pagination cycle.
92
+ """
93
+ seen: set[str] = set()
94
+ next_url: str | None = first_url
95
+ while next_url:
96
+ if next_url in seen:
97
+ raise FetchError(f"Pagination cycle detected at {next_url}")
98
+ seen.add(next_url)
99
+ page = get_json(next_url, access_token=access_token, timeout=timeout)
100
+ yield page
101
+ next_url = _resolve_next(page.get("nextPageUrl"), base_url)
102
+
103
+
104
+ def _resolve_next(raw: Any, base_url: str) -> str | None:
105
+ """Normalize a ``nextPageUrl`` value to an absolute URL, or ``None``."""
106
+ if not raw or not isinstance(raw, str):
107
+ return None
108
+ if raw.startswith("http://") or raw.startswith("https://"):
109
+ return raw
110
+ return f"{base_url.rstrip('/')}/{raw.lstrip('/')}"