datamasque-cli 1.2.0__tar.gz → 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/CHANGELOG.md +13 -0
  2. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/CONTRIBUTING.md +44 -0
  3. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/PKG-INFO +4 -2
  4. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/README.md +3 -1
  5. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/claude-skills/datamasque-cli/skills/datamasque-cli/SKILL.md +1 -1
  6. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/claude-skills/ruleset-builder/.claude-plugin/plugin.json +1 -1
  7. datamasque_cli-1.4.0/claude-skills/ruleset-builder/skills/ruleset-builder/SKILL.md +176 -0
  8. datamasque_cli-1.4.0/claude-skills/ruleset-builder/skills/ruleset-builder/references/fk-cascade.md +109 -0
  9. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/claude-skills/ruleset-builder/skills/ruleset-builder/references/hash-columns-guide.md +8 -8
  10. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/claude-skills/ruleset-builder/skills/ruleset-builder/references/mask-definitions-guide.md +7 -7
  11. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/claude-skills/ruleset-builder/skills/ruleset-builder/references/ruleset-libraries-guide.md +7 -7
  12. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/claude-skills/ruleset-builder/skills/ruleset-builder/references/ruleset-yaml-reference.md +24 -0
  13. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/pyproject.toml +1 -1
  14. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/client.py +68 -4
  15. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/commands/connections.py +14 -1
  16. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/commands/runs.py +2 -3
  17. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/commands/system.py +70 -8
  18. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/commands/test_connections.py +60 -0
  19. datamasque_cli-1.4.0/tests/commands/test_system.py +192 -0
  20. datamasque_cli-1.4.0/tests/integration/test_system.py +97 -0
  21. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/test_client_auth.py +22 -1
  22. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/uv.lock +4 -4
  23. datamasque_cli-1.2.0/claude-skills/ruleset-builder/skills/ruleset-builder/SKILL.md +0 -175
  24. datamasque_cli-1.2.0/tests/commands/test_system.py +0 -38
  25. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/.claude-plugin/marketplace.json +0 -0
  26. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/.github/workflows/ci.yml +0 -0
  27. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/.github/workflows/release-testpypi.yml +0 -0
  28. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/.github/workflows/release.yml +0 -0
  29. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/.gitignore +0 -0
  30. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/LICENSE +0 -0
  31. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/Makefile +0 -0
  32. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/NOTICE +0 -0
  33. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/assets/demo.gif +0 -0
  34. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/claude-skills/README.md +0 -0
  35. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/claude-skills/datamasque-cli/.claude-plugin/plugin.json +0 -0
  36. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/claude-skills/ruleset-splitter/.claude-plugin/plugin.json +0 -0
  37. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/claude-skills/ruleset-splitter/skills/ruleset-splitter/SKILL.md +0 -0
  38. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/scripts/active_profile_env.py +0 -0
  39. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/scripts/bump_version.py +0 -0
  40. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/__init__.py +0 -0
  41. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/commands/__init__.py +0 -0
  42. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/commands/auth.py +0 -0
  43. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/commands/discovery.py +0 -0
  44. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/commands/files.py +0 -0
  45. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/commands/ifm.py +0 -0
  46. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/commands/ruleset_libraries.py +0 -0
  47. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/commands/rulesets.py +0 -0
  48. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/commands/seeds.py +0 -0
  49. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/commands/users.py +0 -0
  50. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/config.py +0 -0
  51. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/main.py +0 -0
  52. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/output.py +0 -0
  53. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/src/datamasque_cli/py.typed +0 -0
  54. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/__init__.py +0 -0
  55. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/commands/__init__.py +0 -0
  56. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/commands/test_auth.py +0 -0
  57. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/commands/test_catalog.py +0 -0
  58. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/commands/test_discovery.py +0 -0
  59. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/commands/test_files.py +0 -0
  60. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/commands/test_ifm.py +0 -0
  61. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/commands/test_ruleset_libraries.py +0 -0
  62. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/commands/test_rulesets.py +0 -0
  63. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/commands/test_runs.py +0 -0
  64. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/commands/test_seeds.py +0 -0
  65. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/commands/test_users.py +0 -0
  66. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/conftest.py +0 -0
  67. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/integration/README.md +0 -0
  68. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/integration/__init__.py +0 -0
  69. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/integration/conftest.py +0 -0
  70. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/integration/test_connections.py +0 -0
  71. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/integration/test_delete_safety.py +0 -0
  72. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/integration/test_rulesets.py +0 -0
  73. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/integration/test_runs.py +0 -0
  74. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/test_client_env.py +0 -0
  75. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/test_client_ifm.py +0 -0
  76. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/test_client_profile.py +0 -0
  77. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/test_config.py +0 -0
  78. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/test_output.py +0 -0
  79. {datamasque_cli-1.2.0 → datamasque_cli-1.4.0}/tests/test_version.py +0 -0
@@ -1,5 +1,18 @@
1
1
  # Changelog
2
2
 
3
+ ## v1.4.0
4
+
5
+ ### Added
6
+ - `dm connections create --file` now supports Databricks SQL Warehouse
7
+ (`"type": "databricks"`) and MongoDB (`"type": "mongodb"`) connections.
8
+ Both list, get, create, and delete like the existing connection types.
9
+
10
+ ## v1.3.0
11
+
12
+ ### Added
13
+ - `dm system ai-engine show` and `dm system ai-engine set <URL>` — view and
14
+ configure the AI Engine URL.
15
+
3
16
  ## v1.2.0
4
17
 
5
18
  ### Added
@@ -41,6 +41,50 @@ uv sync
41
41
  Then either activate the venv (`source .venv/bin/activate`)
42
42
  or prefix commands with `uv run`.
43
43
 
44
+ ## Running `dm` locally
45
+
46
+ `uv sync` installs the CLI in editable mode,
47
+ so the `dm` entry point on the venv reflects your working tree —
48
+ no reinstall after each edit.
49
+
50
+ ```console
51
+ uv run dm --version # one-shot, no venv activation needed
52
+ source .venv/bin/activate && dm --version # or activate once per shell
53
+ ```
54
+
55
+ Point it at a DataMasque instance.
56
+ For ad-hoc development, env vars are the lowest-friction path
57
+ (no `~/.config/datamasque-cli/config.toml` to clean up afterwards):
58
+
59
+ ```console
60
+ export DATAMASQUE_URL=http://127.0.0.1:8000
61
+ export DATAMASQUE_USERNAME=admin
62
+ export DATAMASQUE_PASSWORD='P@ssword12'
63
+ export DATAMASQUE_VERIFY_SSL=false # for self-signed local builds
64
+ dm system health
65
+ dm connections list
66
+ ```
67
+
68
+ For longer-lived work, save a profile with `dm auth login`
69
+ (stored at `~/.config/datamasque-cli/config.toml`, mode 600).
70
+
71
+ ### Pairing with a local `datamasque-python` checkout
72
+
73
+ `datamasque-cli` depends on the `datamasque-python` package
74
+ for its actual API client.
75
+ If you're changing both repos at once
76
+ (for example, adding a new endpoint that needs a CLI surface),
77
+ install the sibling checkout in editable mode against the CLI's venv:
78
+
79
+ ```console
80
+ uv pip install -e ../datamasque-python
81
+ ```
82
+
83
+ The dependency is satisfied by the local checkout
84
+ and edits to either repo are picked up immediately by `dm`.
85
+ A subsequent `uv sync` will re-pin to the registered version —
86
+ re-run the `uv pip install -e` if you want the local override back.
87
+
44
88
  ## Running the tests
45
89
 
46
90
  ```console
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamasque-cli
3
- Version: 1.2.0
3
+ Version: 1.4.0
4
4
  Summary: Official command-line interface for the DataMasque data-masking platform.
5
5
  Project-URL: Homepage, https://datamasque.com/
6
6
  Project-URL: Repository, https://github.com/datamasque/datamasque-cli
@@ -200,7 +200,7 @@ dm libraries usage <name> # Show rulesets using it
200
200
  ### In-flight masking
201
201
 
202
202
  The IFM service runs alongside the admin server,
203
- reached at `<DataMasque URL>/ifm` via the standard nginx topology.
203
+ reached at `<DataMasque URL>/ifm`.
204
204
 
205
205
  ```console
206
206
  dm ifm list # List ruleset plans
@@ -278,6 +278,8 @@ dm system upload-licence ./licence.lic # Upload a licence file
278
278
  dm system logs -o logs.tar.gz # Download application logs
279
279
  dm system admin-install --email admin@co.com # Initial admin setup
280
280
  dm system set-locality AU # Set system locality
281
+ dm system ai-engine show # Show the configured AI Engine URL
282
+ dm system ai-engine set <URL> # Point DataMasque at an AI Engine
281
283
  ```
282
284
 
283
285
  ## JSON output
@@ -170,7 +170,7 @@ dm libraries usage <name> # Show rulesets using it
170
170
  ### In-flight masking
171
171
 
172
172
  The IFM service runs alongside the admin server,
173
- reached at `<DataMasque URL>/ifm` via the standard nginx topology.
173
+ reached at `<DataMasque URL>/ifm`.
174
174
 
175
175
  ```console
176
176
  dm ifm list # List ruleset plans
@@ -248,6 +248,8 @@ dm system upload-licence ./licence.lic # Upload a licence file
248
248
  dm system logs -o logs.tar.gz # Download application logs
249
249
  dm system admin-install --email admin@co.com # Initial admin setup
250
250
  dm system set-locality AU # Set system locality
251
+ dm system ai-engine show # Show the configured AI Engine URL
252
+ dm system ai-engine set <URL> # Point DataMasque at an AI Engine
251
253
  ```
252
254
 
253
255
  ## JSON output
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: datamasque-cli
3
- description: Use when the user wants to interact with a DataMasque instance — start masking runs, check run status, list connections or rulesets, manage seeds, manage ruleset libraries, check system health, or any task involving the DataMasque API. Triggers on "mask the data", "start a run", "check the run", "list connections", "list rulesets", "upload a seed", "check DataMasque health", "dm status", "ruleset library", or any request to operate DataMasque programmatically.
3
+ description: Use when the user wants to interact with a DataMasque instance — start masking runs, check run status, list connections or rulesets, manage seeds, manage ruleset libraries, check system health, configure the AI Engine, or any task involving the DataMasque API. Triggers on "mask the data", "start a run", "check the run", "list connections", "list rulesets", "upload a seed", "check DataMasque health", "dm status", "ruleset library", "configure the AI Engine", "set the AI Engine URL", or any request to operate DataMasque programmatically.
4
4
  argument-hint: e.g. "start a run with docx_masking on var_input_docx"
5
5
  user-invocable: true
6
6
  ---
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ruleset-builder",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "Convert auto-generated DataMasque rulesets into production-ready form. Validate and iterate.",
5
5
  "author": { "name": "DataMasque Ltd" },
6
6
  "repository": "https://github.com/datamasque/datamasque-cli",
@@ -0,0 +1,176 @@
1
+ ---
2
+ name: ruleset-builder
3
+ description: Use when the user wants to turn auto-generated DataMasque rulesets into production-ready ones — extract a `ruleset_library`, add `hash_columns`, refine a ruleset, or clean up generated YAML. Triggers on "ruleset builder", "build ruleset", "refine ruleset", "add hash columns", "add ruleset library", "production ruleset", "clean up ruleset".
4
+ argument-hint: e.g. "build a ruleset from these generated files"
5
+ user-invocable: true
6
+ ---
7
+
8
+ # Ruleset Builder
9
+
10
+ Transform auto-generated DataMasque rulesets into production-ready rulesets with three improvements:
11
+ 1. **`ruleset_library` references** — `$ref` links replacing every repeated inline mask
12
+ 2. **`hash_columns`** — on every applicable `mask_table` task for deterministic consistency
13
+ 3. **Clean structure** — `skip_defaults`, no doc blocks, validated
14
+
15
+ FK cascade is automatic: mask the parent PK with `imitate_unique` (or `imitate_uuid` / `imitate_nz_ird`) and the engine replicates the rule onto every FK column referencing it. **Do NOT add explicit rules for FK columns.** Avoid `from_unique_imitate` and `mask_unique_key` (both deprecated). Never skip IDs.
16
+
17
+ 5-step process (1–5). Use `TaskCreate` to track all 5; report after each step before proceeding. The prompt must include business domain and application type — ask if missing.
18
+
19
+ ---
20
+
21
+ ## Step 1: Report versions
22
+
23
+ Report the Ruleset Builder version (from `plugin.json`) and `dm version` so the operator can correlate output with releases.
24
+
25
+ ---
26
+
27
+ ## Step 2: Read reference docs
28
+
29
+ Canonical mask reference:
30
+ <https://portal.datamasque.com/portal/documentation/latest/masking-functions-overview.html>
31
+
32
+ Read all of these before any other work:
33
+ ```
34
+ ${CLAUDE_PLUGIN_ROOT}/skills/ruleset-builder/references/fk-cascade.md
35
+ ${CLAUDE_PLUGIN_ROOT}/skills/ruleset-builder/references/mask-definitions-guide.md
36
+ ${CLAUDE_PLUGIN_ROOT}/skills/ruleset-builder/references/hash-columns-guide.md
37
+ ${CLAUDE_PLUGIN_ROOT}/skills/ruleset-builder/references/ruleset-yaml-reference.md
38
+ ```
39
+
40
+ ---
41
+
42
+ ## Step 3: Extract ruleset_library
43
+
44
+ Write a Python script using `ruamel.yaml` (`uv pip install ruamel.yaml`).
45
+
46
+ Process the input YAML. For each `mask_table` task, replace every inline mask with a `$ref` to a rule in `ruleset_library.yaml`. Build the library progressively — read its current state at the start of each iteration, create it if absent.
47
+
48
+ The library `masks` section structure:
49
+ ```yaml
50
+ version: "1.0"
51
+ masks:
52
+ rule_name:
53
+ type: rule_type
54
+ ...params
55
+ ```
56
+
57
+ ### Classification rules (apply in order)
58
+
59
+ **1. ID columns** — any column ending in `_ID`, `_NO`, `_NR`, `_NBR` is an entity identifier.
60
+ - **FK side: drop the rule entirely.** If an ID column is a foreign key (the table's `Foreign Keys` metadata in the discovery CSV has an entry for it), do NOT emit a rule for it. The engine cascades automatically from the parent PK rule. See `fk-cascade.md`.
61
+ - **PK side: use `imitate_unique` with `seed:`.** Strip adjective/verb prefixes before the noun: `PREVIOUS_`, `OLD_`, `TRANSFERRED_`, `PRIOR_`, `CURR_`, `NEW_`, `NEXT_`, `ALT_`, `PARENT_`, `CHILD_`, `SOURCE_`, `TARGET_`, `ORIG_`, `PENDING_`, `ARCHIVED_`, `DELETED_`. Extract the core entity (`PREVIOUS_INVOICE_ID` → `invoice`).
62
+ - Library entry name: `{entity}_id`. Reference it as `$ref: "Global/RuleLib#masks/{entity}_id"`.
63
+ - Library entry body: `type: imitate_unique`, `seed: "{entity}"`. The `seed` is optional but recommended: it namespaces by entity so unrelated IDs don't collide (e.g. `customer.id=42` doesn't mask to the same value as `product.id=42`). Doesn't affect FK cascade.
64
+ - This overrides whatever mask was originally generated (even `from_random_number`).
65
+
66
+ **2. Named patterns** — detect by mask structure:
67
+
68
+ | Pattern | Detection | Library rule |
69
+ |-----------------|-----------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|
70
+ | Email | `chain(concat(concat(firstName+lastName, glue='.')+email_suffix)+transform_case(lower))` | `email_address` |
71
+ | Full name | `chain(concat(firstName+lastName, glue=' ')+take_substring)` OR plain `concat(firstName+lastName, glue=' ')` — column not containing USERNAME/LOGIN | `full_name` |
72
+ | Username | Same mask as full_name but column name contains USERNAME, USER_NAME, LOGIN, LOGON | `username` |
73
+ | First name only | `from_file` with firstNames seed | `name_first` |
74
+ | Last name only | `from_file` with lastNames seed | `name_last` |
75
+ | DOB | Column name contains DOB/BIRTH/DATE_OF_BIRTH — use `retain_age` regardless of original type | `dob` |
76
+ | Company | `chain(from_file(companies)+take_substring)` | `company_name` |
77
+ | Country name | `from_file(country_codes, seed_column=name)` | `country_name` |
78
+ | Country alpha-2 | `from_file(country_codes, seed_column=alpha_2)` | `country_code_2` |
79
+ | Country alpha-3 | `from_file(country_codes, seed_column=alpha_3)` | `country_code_3` |
80
+ | Phone/fax | `imitate` on column name containing PHONE, TEL, FAX, MOBILE, CELL | `phone` |
81
+ | Address line 1 | `from_file(addresses, seed_column=street_address)` on LINE_1/ADDRESS_LINE_1 columns | `address_line1` |
82
+ | Address line N | Same for LINE_2, LINE_3 etc. | `address_lineN` |
83
+ | Address full | `from_file(addresses, seed_column=street_address)` on non-line-numbered columns | `address_full` |
84
+ | Address expr | `concat(address+city+state+postcode, glue=', ')` | `network_address_expr` |
85
+ | City | `from_file(addresses, seed_column=city)` | `city` |
86
+ | Postcode | `from_file(addresses, seed_column=postcode)` | `post_code` |
87
+ | Suburb | `from_file(addresses, seed_column=suburb)` | `suburb` |
88
+ | Occupation | `from_file(occupations)` | `occupation` |
89
+
90
+ **3. Remaining** — group by column name concept. Where column names share a root (e.g., `RESULT3_VALUE`, `RESULT5_VALUE` → `result_value`; `GENERAL_2`, `GENERAL_6` → `general`), use one shared rule. Strip adjective prefixes. Use first occurrence's parameters.
91
+
92
+ - `imitate_unique` (non-ID cols) → `{col_group}: type: imitate_unique, seed: "{col_group}"` (seed recommended for namespacing; see ID columns section).
93
+ - `from_random_date` → `{col_group}: type: from_random_date, min/max from first occurrence`
94
+ - `from_random_number` → `{col_group}: type: from_random_number, min/max from first occurrence`
95
+ - String catch-all → `{col_group}: type: imitate_unique, seed: "{col_group}"` (use `imitate` only for types `imitate_unique` can't handle, e.g. datetime, bool).
96
+ - Complex chains → keep structure, group by column name
97
+
98
+ ### Output format
99
+
100
+ `Global/RuleLib` below is a placeholder for `<namespace>/<library_name>` — substitute the operator's real values, and create the library with `dm libraries create` before running the ruleset.
101
+
102
+ ```yaml
103
+ version: '1.0'
104
+ skip_defaults:
105
+ - ''
106
+ - null
107
+ imports:
108
+ - Global/RuleLib
109
+
110
+ tasks:
111
+ - type: mask_table
112
+ table: '"SCHEMA"."TABLE"'
113
+ key: '"ROWID"'
114
+ rules:
115
+ - column: '"FIRST_NAME"'
116
+ masks:
117
+ - $ref: "Global/RuleLib#masks/name_first"
118
+ ```
119
+
120
+ Do NOT write a custom YAML serializer. Use `ruamel.yaml` round-trip dumper. Use `DoubleQuotedScalarString` for `$ref` values.
121
+
122
+ **Report:** "Step 3 done — extracted N rule library definitions: [list each name and usage count]."
123
+
124
+ ---
125
+
126
+ ## Step 4: Add hash_columns
127
+
128
+ Write a Python script that:
129
+
130
+ **Parse the discovery CSV** (comma-separated):
131
+ `Selected`, `Table schema`, `Table name`, `Column name`, `Data Type`, `Constraint`, `Foreign Keys`, `Max Length`, `Numeric Precision`, `Numeric Scale`, `Reason for flag`, `Flagged by`, `Data classifications`
132
+
133
+ Build a lookup of `(schema, table)` → columns with constraint and FK metadata:
134
+ - `Constraint` patterns: `Primary(COL)`, `Unique(COL)`, `Foreign(COL)`
135
+ - `Foreign Keys` JSON: `["FK_NAME", "SCHEMA.TABLE.COLUMN"]` — index 1 gives the referenced table
136
+
137
+ **For each `mask_table` task:**
138
+
139
+ 1. **Pick hash column** using this priority:
140
+ - **Parent-entity FK first**: find FK columns where the referenced table is the parent of the current table — i.e., the current table name *starts with* the referenced table name (e.g., `ACCOUNT_HISTORY` starts with `ACCOUNT` → use `ACCOUNT_ID`). This avoids choosing lookup-table FKs (e.g., don't choose `ACCOUNT_TYPE_ID` in `ACCOUNT` just because it has a FK).
141
+ - **PK fallback**: if no parent-entity FK found, use the Primary Key column (never `ROWID`)
142
+ - **Archive table fallback**: if no PK in the CSV (archive tables `_A`, `_A_R`, `_R` often lack explicit keys), strip the suffix and look up the base table recursively
143
+ - **Composite PKs**: prefer `*_ID` or `*_NO` columns; deduplicate derivatives (`ACCOUNT_ID` + `PREVIOUS_ACCOUNT_ID` → keep `ACCOUNT_ID`)
144
+ - **Skip** if no suitable column found
145
+
146
+ 2. Insert `hash_columns: ["COLUMN_NAME"]` after the `key:` field
147
+
148
+ 3. Verify all rules in output are `$ref` — fix any remaining inline masks
149
+
150
+ 4. Write to output file
151
+
152
+ **Report:** "Step 4 done — added hash_columns to N tables, skipped M (all-unique), skipped K (no suitable key). Top hash columns: [column → count]."
153
+
154
+ ---
155
+
156
+ ## Step 5: Validate and clean up
157
+
158
+ Remove any comment lines containing `ROWID`.
159
+
160
+ Run `dm rulesets validate --file <output_file> --type database`
161
+ (use `file` for file-masking rulesets).
162
+
163
+ Fix any errors and re-validate until passing.
164
+
165
+ ---
166
+
167
+ ## Summary
168
+
169
+ | Metric | Value |
170
+ |----------------------------|----------------|
171
+ | Total tables | N |
172
+ | Mask definitions extracted | N (list names) |
173
+ | Tables with hash_columns | N |
174
+ | Tables skipped (no key) | N |
175
+ | Validation | passed/failed |
176
+ | Output file | path |
@@ -0,0 +1,109 @@
1
+ # FK Cascade Invariant
2
+
3
+ The most important rule when refining a DataMasque ruleset that spans
4
+ related tables. Get this wrong and you either leak identity (by skipping
5
+ IDs entirely) or break the engine (by adding rules for FK columns).
6
+
7
+ ## The rule
8
+
9
+ **Mask only the parent PK column. The engine cascades the same masked value
10
+ to every FK column referencing it.**
11
+
12
+ Three masks support this cascade:
13
+
14
+ - `imitate_unique` — recommended for new work.
15
+ - `imitate_uuid` — for UUID-shaped IDs.
16
+ - `imitate_nz_ird` — for NZ IRD numbers.
17
+
18
+ (`from_unique_imitate` and `mask_unique_key` are deprecated; do not emit.)
19
+
20
+ When `mask_table` runs and a rule on a referenced column uses one of these
21
+ masks, the engine:
22
+
23
+ 1. Discovers child tables with FKs referencing this column.
24
+ 2. Auto-replicates the parent's rule onto every FK column.
25
+ 3. Same mask config → same masked output → joins survive.
26
+
27
+ This is documented at
28
+ <https://portal.datamasque.com/portal/documentation/latest/unique-masks.html>:
29
+
30
+ > "You can apply an `imitate_unique` mask to a primary key column or a
31
+ > column that is used as a foreign key in another table. References will be
32
+ > updated automatically. Composite primary keys are supported."
33
+
34
+ ## Worked example
35
+
36
+ Schema:
37
+ - `customers.id` (PK), `customers.email`
38
+ - `orders.id` (PK), `orders.customer_id` (FK → `customers.id`), `orders.tracking_number`
39
+
40
+ Correct ruleset:
41
+
42
+ ```yaml
43
+ - type: mask_table
44
+ table: customers
45
+ key: id
46
+ rules:
47
+ - column: id
48
+ masks:
49
+ - type: imitate_unique
50
+ seed: customer
51
+ - column: email
52
+ masks:
53
+ - type: from_file
54
+ seed_file: DataMasque_emails.csv
55
+ seed_column: email
56
+
57
+ - type: mask_table
58
+ table: orders
59
+ key: id
60
+ rules:
61
+ # customer_id is intentionally absent — the engine replicates the
62
+ # `customers.id` rule onto it automatically. Adding it here would
63
+ # be rejected by the runtime FK check.
64
+ - column: tracking_number
65
+ masks:
66
+ - type: imitate_unique
67
+ seed: tracking
68
+ ```
69
+
70
+ After the run, `orders.customer_id` holds the same masked values as
71
+ `customers.id`, joins remain intact, and `tracking_number` is independently
72
+ masked with its own seed.
73
+
74
+ ## Anti-patterns to refuse
75
+
76
+ - **Adding explicit FK rules** ("I'll mask both PK and FK with shared
77
+ `$ref` so the cascade works"). The runtime rejects this by default with
78
+ the error:
79
+ *"To preserve referential integrity, the following foreign key columns
80
+ cannot be directly masked by this task."*
81
+ The engine will replicate the rule for you; adding your own conflicts.
82
+ - **Skipping IDs to "preserve FK joins"**. Leaves identifiers in plain
83
+ sight. Mask the parent PK with `imitate_unique` — joins survive via
84
+ the auto-cascade.
85
+ - **Inventing linking parameters** (`source_table`, `source_column`,
86
+ `parent_column`, `link_to`). None of these exist on any DataMasque mask.
87
+ - **Inventing a hashing mask** (`hash_text`, `hash`, `link`, `match_id`).
88
+ None of these exist. `imitate_unique` is the deterministic mask.
89
+ - **Using `from_unique_imitate` or `mask_unique_key`**. Both deprecated.
90
+ `imitate_unique` replaces both.
91
+
92
+ ## Cross-run consistency requires `run_secret`
93
+
94
+ Within a single run, `imitate_unique` is deterministic via a per-run
95
+ `insecure_seed`. Across runs, the cascade only holds if the run is
96
+ invoked with a `run_secret`. Without it, the same input maps to a
97
+ different masked value next run. If cross-run consistency matters, flag
98
+ this in the final summary.
99
+
100
+ ## Self-check before finishing
101
+
102
+ For each FK relationship in the schema:
103
+
104
+ 1. Is the parent PK masked with `imitate_unique`, `imitate_uuid`, or
105
+ `imitate_nz_ird`?
106
+ 2. Is the FK column **absent** from your output (no explicit rule)?
107
+ 3. Are `from_unique_imitate` and `mask_unique_key` absent from your output?
108
+
109
+ If any answer is "no", fix it before validation.
@@ -71,14 +71,14 @@ hash_columns:
71
71
 
72
72
  Every table belongs to a domain entity. Find the column that identifies that entity:
73
73
 
74
- | Domain | Typical hash column | Examples |
75
- |--------|-------------------|----------|
76
- | Customer | `cust_id`, `customer_id`, `client_id` | CUST_MASTER, CUST_ADDRESS |
77
- | Account | `acc_id`, `account_id`, `account_no` | DEP_ACCOUNT, DEP_EMAIL_ALERT |
78
- | Card | `card_id`, `card_no` | CARD_MASTER, CARD_INSURANCE |
79
- | Loan | `loan_id`, `loan_no` | LOAN_COLLATERAL, LOAN_GUARANTOR |
80
- | Employee | `emp_id`, `emp_no`, `employee_id` | COM_EMPLOYEE, COM_EMP_ROLE |
81
- | Transaction | `tx_id`, `trf_id`, `fx_tx_id` | TRF_MASTER, FX_RECEIPT |
74
+ | Domain | Typical hash column | Examples |
75
+ |-------------|---------------------------------------|---------------------------------|
76
+ | Customer | `cust_id`, `customer_id`, `client_id` | CUST_MASTER, CUST_ADDRESS |
77
+ | Account | `acc_id`, `account_id`, `account_no` | DEP_ACCOUNT, DEP_EMAIL_ALERT |
78
+ | Card | `card_id`, `card_no` | CARD_MASTER, CARD_INSURANCE |
79
+ | Loan | `loan_id`, `loan_no` | LOAN_COLLATERAL, LOAN_GUARANTOR |
80
+ | Employee | `emp_id`, `emp_no`, `employee_id` | COM_EMPLOYEE, COM_EMP_ROLE |
81
+ | Transaction | `tx_id`, `trf_id`, `fx_tx_id` | TRF_MASTER, FX_RECEIPT |
82
82
 
83
83
  ### Step 2: Check foreign keys in the DDL
84
84
 
@@ -175,14 +175,14 @@ tasks:
175
175
 
176
176
  Common seed files for `from_file` masks:
177
177
 
178
- | Category | Files |
179
- |----------|-------|
180
- | Names | `DataMasque_firstNames_mixed.csv`, `DataMasque_lastNames_v2.csv` |
178
+ | Category | Files |
179
+ |-----------|-------------------------------------------------------------------------------------------------------|
180
+ | Names | `DataMasque_firstNames_mixed.csv`, `DataMasque_lastNames_v2.csv` |
181
181
  | Addresses | `DataMasque_US_addresses.csv`, `DataMasque_AU_addresses_real.csv`, `DataMasque_NZ_addresses_real.csv` |
182
- | Companies | `DataMasque_companies.csv`, `DataMasque_NZ_companies.csv`, `DataMasque_AU_companies.csv` |
183
- | Email | `DataMasque_fake_email_suffixes.csv`, `DataMasque_email_suffixes.csv` |
184
- | Reference | `DataMasque_country_codes.csv`, `DataMasque_occupations.csv` |
185
- | Cards | `DataMasque_credit_card_numbers.csv`, `DataMasque_credit_card_prefixes.csv` |
182
+ | Companies | `DataMasque_companies.csv`, `DataMasque_NZ_companies.csv`, `DataMasque_AU_companies.csv` |
183
+ | Email | `DataMasque_fake_email_suffixes.csv`, `DataMasque_email_suffixes.csv` |
184
+ | Reference | `DataMasque_country_codes.csv`, `DataMasque_occupations.csv` |
185
+ | Cards | `DataMasque_credit_card_numbers.csv`, `DataMasque_credit_card_prefixes.csv` |
186
186
 
187
187
  Regional variants exist for BR, IN, AU, NZ, US.
188
188
  Use `from_file` when there are more than ~50 distinct values;
@@ -143,13 +143,13 @@ tasks: [...]
143
143
 
144
144
  ## Libraries vs YAML Anchors
145
145
 
146
- | Feature | YAML Anchors (`&`/`*`) | Libraries (`$ref`) |
147
- |---------|----------------------|-------------------|
148
- | Scope | Within one ruleset | Across multiple rulesets |
149
- | Management | Inline in YAML | Managed via API/CLI, versioned |
150
- | Syntax | `<<: *anchor_name` | `$ref: "lib#path"` |
151
- | Override | `<<:` merge key | Not supported (use as-is) |
152
- | Best for | Single-ruleset reuse | Organisation-wide standards |
146
+ | Feature | YAML Anchors (`&`/`*`) | Libraries (`$ref`) |
147
+ |------------|------------------------|--------------------------------|
148
+ | Scope | Within one ruleset | Across multiple rulesets |
149
+ | Management | Inline in YAML | Managed via API/CLI, versioned |
150
+ | Syntax | `<<: *anchor_name` | `$ref: "lib#path"` |
151
+ | Override | `<<:` merge key | Not supported (use as-is) |
152
+ | Best for | Single-ruleset reuse | Organisation-wide standards |
153
153
 
154
154
  **Recommendation:**
155
155
  - Start with YAML anchors (`mask_definitions`) for within-ruleset deduplication
@@ -72,6 +72,16 @@ For PostgreSQL/MySQL, plain names work: `table: users`, `key: id`.
72
72
 
73
73
  ## Mask Types Quick Reference
74
74
 
75
+ This is the **closed list** of every `type:` value DataMasque accepts. Do
76
+ not invent mask types or parameters (no `source_table`, no `link_to`, no
77
+ `parent_column` — none exist). For per-mask parameter details, see the
78
+ canonical source:
79
+ <https://portal.datamasque.com/portal/documentation/latest/masking-functions-overview.html>.
80
+
81
+ For a deterministic hash, use `imitate_unique` (or `imitate_uuid` for UUIDs)
82
+ optionally with `seed:` to namespace. The cascade is automatic; no
83
+ cross-table reference parameter exists. See `fk-cascade.md`.
84
+
75
85
  ### Generic
76
86
  - `from_fixed` — fixed replacement value
77
87
  - `from_column` — copy from another column
@@ -124,6 +134,20 @@ For PostgreSQL/MySQL, plain names work: `table: users`, `key: id`.
124
134
  ### Document
125
135
  - `json` — mask JSON fields within a column
126
136
  - `xml` — mask XML elements within a column
137
+ - `unstructured_text` — mask entities inside free text
138
+
139
+ ### Commonly-hallucinated names that do NOT exist
140
+
141
+ These plausible-sounding names are not in DataMasque. Refuse to emit them:
142
+
143
+ | Hallucinated name | What was wanted | Use instead |
144
+ |-------------------------------------------------------------|-----------------------------------|---------------------------------------------------------------|
145
+ | `hash_text`, `hash` | deterministic hash of a value | `imitate_unique` (or `imitate_uuid` for UUIDs) |
146
+ | `link`, `match_id`, `link_to` | join two columns after masking | shared `imitate_unique` config on both sides |
147
+ | `from_random_words` | random words / short text | `from_random_text` (random chars) or `from_file` |
148
+ | `from_random_string` | random string | `from_random_text` |
149
+ | `redact`, `mask_value` | constant placeholder | `from_fixed` with `value:` |
150
+ | `source_table`, `source_column`, `parent_column`, `link_to` | param to point a FK at its parent | does not exist — cascade is automatic with shared mask config |
127
151
 
128
152
  ## skip_defaults
129
153
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "datamasque-cli"
3
- version = "1.2.0"
3
+ version = "1.4.0"
4
4
  description = "Official command-line interface for the DataMasque data-masking platform."
5
5
  authors = [
6
6
  { name = "DataMasque Ltd" },
@@ -48,6 +48,25 @@ def profile_from_env() -> Profile | None:
48
48
  return None
49
49
 
50
50
 
51
+ def _profile_from_env_url_only() -> Profile | None:
52
+ """Build a URL-only profile from `DATAMASQUE_URL`, with empty username/password.
53
+
54
+ Used by the unauthenticated client factory so callers can hit anonymous
55
+ endpoints (admin-install, health) without setting `DATAMASQUE_USERNAME`
56
+ and `DATAMASQUE_PASSWORD` -- those fields aren't read by anonymous calls
57
+ and demanding them is friction for the first-run setup workflow.
58
+ """
59
+ url = os.environ.get(ENV_URL)
60
+ if not url:
61
+ return None
62
+ return Profile(
63
+ url=url.rstrip("/"),
64
+ username="",
65
+ password="",
66
+ verify_ssl=_verify_ssl_from_env(default=True),
67
+ )
68
+
69
+
51
70
  def _resolve_profile(config: Config, profile_name: str | None) -> Profile:
52
71
  profile = config.get_profile(profile_name)
53
72
  if not profile.is_configured:
@@ -60,6 +79,25 @@ def _resolve_profile(config: Config, profile_name: str | None) -> Profile:
60
79
  return profile
61
80
 
62
81
 
82
+ def _resolve_profile_for_unauthenticated(profile_name: str | None) -> Profile:
83
+ """Resolve a profile for an unauthenticated call -- only the URL is required.
84
+
85
+ Order: explicit `--profile`, env vars (URL-only is sufficient here),
86
+ saved active profile. If none yield a URL, abort with a clear hint.
87
+ """
88
+ if profile_name is not None:
89
+ profile = load_config().get_profile(profile_name)
90
+ else:
91
+ profile = _profile_from_env_url_only() or load_config().get_profile()
92
+ if not profile.url:
93
+ abort(
94
+ "No DataMasque URL configured.",
95
+ code=ErrorCode.AUTH_REQUIRED,
96
+ hint=f"Set {ENV_URL} or run: dm auth login",
97
+ )
98
+ return profile
99
+
100
+
63
101
  def _resolve_profile_with_verify(profile_name: str | None) -> tuple[Profile, bool]:
64
102
  """Resolve the active `Profile` and apply env-var overrides for `verify_ssl`."""
65
103
  env_profile = profile_from_env() if profile_name is None else None
@@ -98,17 +136,43 @@ def get_client(profile_name: str | None = None) -> DataMasqueClient:
98
136
  `DATAMASQUE_VERIFY_SSL` always wins over the stored profile so you can
99
137
  flip TLS verification per-call without re-running `dm auth login`.
100
138
  """
101
- profile, verify_ssl = _resolve_profile_with_verify(profile_name)
139
+ client, profile, verify_ssl = _build_client(profile_name)
140
+ _authenticate_or_abort(client, profile.url, verify_ssl=verify_ssl)
141
+ return client
142
+
143
+
144
+ def get_unauthenticated_client(profile_name: str | None = None) -> DataMasqueClient:
145
+ """Build a `DataMasqueClient` without performing the up-front login handshake.
146
+
147
+ Used by commands that hit endpoints which don't require — or can't yet
148
+ use — a token. `admin-install` is the canonical example: on a fresh
149
+ server there's no user to authenticate as, so `client.authenticate()`
150
+ would always fail before the command ran.
151
+
152
+ Only `DATAMASQUE_URL` (or a profile with a URL) is required — username
153
+ and password aren't read by anonymous endpoints, so demanding them
154
+ would be unnecessary friction for first-run setup.
155
+ """
156
+ profile = _resolve_profile_for_unauthenticated(profile_name)
157
+ verify_ssl = _verify_ssl_from_env(default=profile.verify_ssl)
102
158
  instance_config = DataMasqueInstanceConfig(
103
159
  base_url=profile.url,
104
160
  username=profile.username,
105
161
  password=profile.password,
106
162
  verify_ssl=verify_ssl,
107
163
  )
164
+ return DataMasqueClient(instance_config)
108
165
 
109
- client = DataMasqueClient(instance_config)
110
- _authenticate_or_abort(client, profile.url, verify_ssl=verify_ssl)
111
- return client
166
+
167
+ def _build_client(profile_name: str | None) -> tuple[DataMasqueClient, Profile, bool]:
168
+ profile, verify_ssl = _resolve_profile_with_verify(profile_name)
169
+ instance_config = DataMasqueInstanceConfig(
170
+ base_url=profile.url,
171
+ username=profile.username,
172
+ password=profile.password,
173
+ verify_ssl=verify_ssl,
174
+ )
175
+ return DataMasqueClient(instance_config), profile, verify_ssl
112
176
 
113
177
 
114
178
  # Substrings that suggest the underlying error was a TLS failure rather than