datamasque-cli 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/CHANGELOG.md +6 -0
  2. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/PKG-INFO +4 -2
  3. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/README.md +3 -1
  4. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/claude-skills/datamasque-cli/skills/datamasque-cli/SKILL.md +1 -1
  5. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/claude-skills/ruleset-builder/.claude-plugin/plugin.json +1 -1
  6. datamasque_cli-1.3.0/claude-skills/ruleset-builder/skills/ruleset-builder/SKILL.md +176 -0
  7. datamasque_cli-1.3.0/claude-skills/ruleset-builder/skills/ruleset-builder/references/fk-cascade.md +109 -0
  8. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/claude-skills/ruleset-builder/skills/ruleset-builder/references/hash-columns-guide.md +8 -8
  9. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/claude-skills/ruleset-builder/skills/ruleset-builder/references/mask-definitions-guide.md +7 -7
  10. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/claude-skills/ruleset-builder/skills/ruleset-builder/references/ruleset-libraries-guide.md +7 -7
  11. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/claude-skills/ruleset-builder/skills/ruleset-builder/references/ruleset-yaml-reference.md +24 -0
  12. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/pyproject.toml +1 -1
  13. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/commands/system.py +32 -1
  14. datamasque_cli-1.3.0/tests/commands/test_system.py +82 -0
  15. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/uv.lock +1 -1
  16. datamasque_cli-1.2.0/claude-skills/ruleset-builder/skills/ruleset-builder/SKILL.md +0 -175
  17. datamasque_cli-1.2.0/tests/commands/test_system.py +0 -38
  18. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/.claude-plugin/marketplace.json +0 -0
  19. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/.github/workflows/ci.yml +0 -0
  20. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/.github/workflows/release-testpypi.yml +0 -0
  21. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/.github/workflows/release.yml +0 -0
  22. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/.gitignore +0 -0
  23. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/CONTRIBUTING.md +0 -0
  24. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/LICENSE +0 -0
  25. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/Makefile +0 -0
  26. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/NOTICE +0 -0
  27. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/assets/demo.gif +0 -0
  28. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/claude-skills/README.md +0 -0
  29. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/claude-skills/datamasque-cli/.claude-plugin/plugin.json +0 -0
  30. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/claude-skills/ruleset-splitter/.claude-plugin/plugin.json +0 -0
  31. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/claude-skills/ruleset-splitter/skills/ruleset-splitter/SKILL.md +0 -0
  32. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/scripts/active_profile_env.py +0 -0
  33. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/scripts/bump_version.py +0 -0
  34. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/__init__.py +0 -0
  35. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/client.py +0 -0
  36. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/commands/__init__.py +0 -0
  37. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/commands/auth.py +0 -0
  38. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/commands/connections.py +0 -0
  39. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/commands/discovery.py +0 -0
  40. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/commands/files.py +0 -0
  41. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/commands/ifm.py +0 -0
  42. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/commands/ruleset_libraries.py +0 -0
  43. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/commands/rulesets.py +0 -0
  44. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/commands/runs.py +0 -0
  45. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/commands/seeds.py +0 -0
  46. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/commands/users.py +0 -0
  47. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/config.py +0 -0
  48. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/main.py +0 -0
  49. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/output.py +0 -0
  50. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/src/datamasque_cli/py.typed +0 -0
  51. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/__init__.py +0 -0
  52. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/commands/__init__.py +0 -0
  53. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/commands/test_auth.py +0 -0
  54. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/commands/test_catalog.py +0 -0
  55. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/commands/test_connections.py +0 -0
  56. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/commands/test_discovery.py +0 -0
  57. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/commands/test_files.py +0 -0
  58. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/commands/test_ifm.py +0 -0
  59. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/commands/test_ruleset_libraries.py +0 -0
  60. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/commands/test_rulesets.py +0 -0
  61. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/commands/test_runs.py +0 -0
  62. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/commands/test_seeds.py +0 -0
  63. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/commands/test_users.py +0 -0
  64. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/conftest.py +0 -0
  65. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/integration/README.md +0 -0
  66. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/integration/__init__.py +0 -0
  67. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/integration/conftest.py +0 -0
  68. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/integration/test_connections.py +0 -0
  69. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/integration/test_delete_safety.py +0 -0
  70. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/integration/test_rulesets.py +0 -0
  71. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/integration/test_runs.py +0 -0
  72. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/test_client_auth.py +0 -0
  73. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/test_client_env.py +0 -0
  74. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/test_client_ifm.py +0 -0
  75. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/test_client_profile.py +0 -0
  76. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/test_config.py +0 -0
  77. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/test_output.py +0 -0
  78. {datamasque_cli-1.2.0 → datamasque_cli-1.3.0}/tests/test_version.py +0 -0
@@ -1,5 +1,11 @@
1
1
  # Changelog
2
2
 
3
+ ## v1.3.0
4
+
5
+ ### Added
6
+ - `dm system ai-engine show` and `dm system ai-engine set <URL>` — view and
7
+ configure the AI Engine URL.
8
+
3
9
  ## v1.2.0
4
10
 
5
11
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamasque-cli
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: Official command-line interface for the DataMasque data-masking platform.
5
5
  Project-URL: Homepage, https://datamasque.com/
6
6
  Project-URL: Repository, https://github.com/datamasque/datamasque-cli
@@ -200,7 +200,7 @@ dm libraries usage <name> # Show rulesets using it
200
200
  ### In-flight masking
201
201
 
202
202
  The IFM service runs alongside the admin server,
203
- reached at `<DataMasque URL>/ifm` via the standard nginx topology.
203
+ reached at `<DataMasque URL>/ifm`.
204
204
 
205
205
  ```console
206
206
  dm ifm list # List ruleset plans
@@ -278,6 +278,8 @@ dm system upload-licence ./licence.lic # Upload a licence file
278
278
  dm system logs -o logs.tar.gz # Download application logs
279
279
  dm system admin-install --email admin@co.com # Initial admin setup
280
280
  dm system set-locality AU # Set system locality
281
+ dm system ai-engine show # Show the configured AI Engine URL
282
+ dm system ai-engine set <URL> # Point DataMasque at an AI Engine
281
283
  ```
282
284
 
283
285
  ## JSON output
@@ -170,7 +170,7 @@ dm libraries usage <name> # Show rulesets using it
170
170
  ### In-flight masking
171
171
 
172
172
  The IFM service runs alongside the admin server,
173
- reached at `<DataMasque URL>/ifm` via the standard nginx topology.
173
+ reached at `<DataMasque URL>/ifm`.
174
174
 
175
175
  ```console
176
176
  dm ifm list # List ruleset plans
@@ -248,6 +248,8 @@ dm system upload-licence ./licence.lic # Upload a licence file
248
248
  dm system logs -o logs.tar.gz # Download application logs
249
249
  dm system admin-install --email admin@co.com # Initial admin setup
250
250
  dm system set-locality AU # Set system locality
251
+ dm system ai-engine show # Show the configured AI Engine URL
252
+ dm system ai-engine set <URL> # Point DataMasque at an AI Engine
251
253
  ```
252
254
 
253
255
  ## JSON output
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: datamasque-cli
3
- description: Use when the user wants to interact with a DataMasque instance — start masking runs, check run status, list connections or rulesets, manage seeds, manage ruleset libraries, check system health, or any task involving the DataMasque API. Triggers on "mask the data", "start a run", "check the run", "list connections", "list rulesets", "upload a seed", "check DataMasque health", "dm status", "ruleset library", or any request to operate DataMasque programmatically.
3
+ description: Use when the user wants to interact with a DataMasque instance — start masking runs, check run status, list connections or rulesets, manage seeds, manage ruleset libraries, check system health, configure the AI Engine, or any task involving the DataMasque API. Triggers on "mask the data", "start a run", "check the run", "list connections", "list rulesets", "upload a seed", "check DataMasque health", "dm status", "ruleset library", "configure the AI Engine", "set the AI Engine URL", or any request to operate DataMasque programmatically.
4
4
  argument-hint: e.g. "start a run with docx_masking on var_input_docx"
5
5
  user-invocable: true
6
6
  ---
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ruleset-builder",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "Convert auto-generated DataMasque rulesets into production-ready form. Validate and iterate.",
5
5
  "author": { "name": "DataMasque Ltd" },
6
6
  "repository": "https://github.com/datamasque/datamasque-cli",
@@ -0,0 +1,176 @@
1
+ ---
2
+ name: ruleset-builder
3
+ description: Use when the user wants to turn auto-generated DataMasque rulesets into production-ready ones — extract a `ruleset_library`, add `hash_columns`, refine a ruleset, or clean up generated YAML. Triggers on "ruleset builder", "build ruleset", "refine ruleset", "add hash columns", "add ruleset library", "production ruleset", "clean up ruleset".
4
+ argument-hint: e.g. "build a ruleset from these generated files"
5
+ user-invocable: true
6
+ ---
7
+
8
+ # Ruleset Builder
9
+
10
+ Transform auto-generated DataMasque rulesets into production-ready rulesets with three improvements:
11
+ 1. **`ruleset_library` references** — `$ref` links replacing every repeated inline mask
12
+ 2. **`hash_columns`** — on every applicable `mask_table` task for deterministic consistency
13
+ 3. **Clean structure** — `skip_defaults`, no doc blocks, validated
14
+
15
+ FK cascade is automatic: mask the parent PK with `imitate_unique` (or `imitate_uuid` / `imitate_nz_ird`) and the engine replicates the rule onto every FK column referencing it. **Do NOT add explicit rules for FK columns.** Avoid `from_unique_imitate` and `mask_unique_key` (both deprecated). Never skip IDs.
16
+
17
+ 5-step process (1–5). Use `TaskCreate` to track all 5; report after each step before proceeding. The prompt must include business domain and application type — ask if missing.
18
+
19
+ ---
20
+
21
+ ## Step 1: Report versions
22
+
23
+ Report the Ruleset Builder version (from `plugin.json`) and `dm version` so the operator can correlate output with releases.
24
+
25
+ ---
26
+
27
+ ## Step 2: Read reference docs
28
+
29
+ Canonical mask reference:
30
+ <https://portal.datamasque.com/portal/documentation/latest/masking-functions-overview.html>
31
+
32
+ Read all of these before any other work:
33
+ ```
34
+ ${CLAUDE_PLUGIN_ROOT}/skills/ruleset-builder/references/fk-cascade.md
35
+ ${CLAUDE_PLUGIN_ROOT}/skills/ruleset-builder/references/mask-definitions-guide.md
36
+ ${CLAUDE_PLUGIN_ROOT}/skills/ruleset-builder/references/hash-columns-guide.md
37
+ ${CLAUDE_PLUGIN_ROOT}/skills/ruleset-builder/references/ruleset-yaml-reference.md
38
+ ```
39
+
40
+ ---
41
+
42
+ ## Step 3: Extract ruleset_library
43
+
44
+ Write a Python script using `ruamel.yaml` (`uv pip install ruamel.yaml`).
45
+
46
+ Process the input YAML. For each `mask_table` task, replace every inline mask with a `$ref` to a rule in `ruleset_library.yaml`. Build the library progressively — read its current state at the start of each iteration, create it if absent.
47
+
48
+ The library `masks` section structure:
49
+ ```yaml
50
+ version: "1.0"
51
+ masks:
52
+ rule_name:
53
+ type: rule_type
54
+ ...params
55
+ ```
56
+
57
+ ### Classification rules (apply in order)
58
+
59
+ **1. ID columns** — any column ending in `_ID`, `_NO`, `_NR`, `_NBR` is an entity identifier.
60
+ - **FK side: drop the rule entirely.** If an ID column is a foreign key (the table's `Foreign Keys` metadata in the discovery CSV has an entry for it), do NOT emit a rule for it. The engine cascades automatically from the parent PK rule. See `fk-cascade.md`.
61
+ - **PK side: use `imitate_unique` with `seed:`.** Strip adjective/verb prefixes before the noun: `PREVIOUS_`, `OLD_`, `TRANSFERRED_`, `PRIOR_`, `CURR_`, `NEW_`, `NEXT_`, `ALT_`, `PARENT_`, `CHILD_`, `SOURCE_`, `TARGET_`, `ORIG_`, `PENDING_`, `ARCHIVED_`, `DELETED_`. Extract the core entity (`PREVIOUS_INVOICE_ID` → `invoice`).
62
+ - Library entry name: `{entity}_id`. Reference it as `$ref: "Global/RuleLib#masks/{entity}_id"`.
63
+ - Library entry body: `type: imitate_unique`, `seed: "{entity}"`. The `seed` is optional but recommended: it namespaces by entity so unrelated IDs don't collide (e.g. `customer.id=42` doesn't mask to the same value as `product.id=42`). Doesn't affect FK cascade.
64
+ - This overrides whatever mask was originally generated (even `from_random_number`).
65
+
66
+ **2. Named patterns** — detect by mask structure:
67
+
68
+ | Pattern | Detection | Library rule |
69
+ |-----------------|-----------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|
70
+ | Email | `chain(concat(concat(firstName+lastName, glue='.')+email_suffix)+transform_case(lower))` | `email_address` |
71
+ | Full name | `chain(concat(firstName+lastName, glue=' ')+take_substring)` OR plain `concat(firstName+lastName, glue=' ')` — column not containing USERNAME/LOGIN | `full_name` |
72
+ | Username | Same mask as full_name but column name contains USERNAME, USER_NAME, LOGIN, LOGON | `username` |
73
+ | First name only | `from_file` with firstNames seed | `name_first` |
74
+ | Last name only | `from_file` with lastNames seed | `name_last` |
75
+ | DOB | Column name contains DOB/BIRTH/DATE_OF_BIRTH — use `retain_age` regardless of original type | `dob` |
76
+ | Company | `chain(from_file(companies)+take_substring)` | `company_name` |
77
+ | Country name | `from_file(country_codes, seed_column=name)` | `country_name` |
78
+ | Country alpha-2 | `from_file(country_codes, seed_column=alpha_2)` | `country_code_2` |
79
+ | Country alpha-3 | `from_file(country_codes, seed_column=alpha_3)` | `country_code_3` |
80
+ | Phone/fax | `imitate` on column name containing PHONE, TEL, FAX, MOBILE, CELL | `phone` |
81
+ | Address line 1 | `from_file(addresses, seed_column=street_address)` on LINE_1/ADDRESS_LINE_1 columns | `address_line1` |
82
+ | Address line N | Same for LINE_2, LINE_3 etc. | `address_lineN` |
83
+ | Address full | `from_file(addresses, seed_column=street_address)` on non-line-numbered columns | `address_full` |
84
+ | Address expr | `concat(address+city+state+postcode, glue=', ')` | `network_address_expr` |
85
+ | City | `from_file(addresses, seed_column=city)` | `city` |
86
+ | Postcode | `from_file(addresses, seed_column=postcode)` | `post_code` |
87
+ | Suburb | `from_file(addresses, seed_column=suburb)` | `suburb` |
88
+ | Occupation | `from_file(occupations)` | `occupation` |
89
+
90
+ **3. Remaining** — group by column name concept. Where column names share a root (e.g., `RESULT3_VALUE`, `RESULT5_VALUE` → `result_value`; `GENERAL_2`, `GENERAL_6` → `general`), use one shared rule. Strip adjective prefixes. Use first occurrence's parameters.
91
+
92
+ - `imitate_unique` (non-ID cols) → `{col_group}: type: imitate_unique, seed: "{col_group}"` (seed recommended for namespacing; see ID columns section).
93
+ - `from_random_date` → `{col_group}: type: from_random_date, min/max from first occurrence`
94
+ - `from_random_number` → `{col_group}: type: from_random_number, min/max from first occurrence`
95
+ - String catch-all → `{col_group}: type: imitate_unique, seed: "{col_group}"` (use `imitate` only for types `imitate_unique` can't handle, e.g. datetime, bool).
96
+ - Complex chains → keep structure, group by column name
97
+
98
+ ### Output format
99
+
100
+ `Global/RuleLib` below is a placeholder for `<namespace>/<library_name>` — substitute the operator's real values, and create the library with `dm libraries create` before running the ruleset.
101
+
102
+ ```yaml
103
+ version: '1.0'
104
+ skip_defaults:
105
+ - ''
106
+ - null
107
+ imports:
108
+ - Global/RuleLib
109
+
110
+ tasks:
111
+ - type: mask_table
112
+ table: '"SCHEMA"."TABLE"'
113
+ key: '"ROWID"'
114
+ rules:
115
+ - column: '"FIRST_NAME"'
116
+ masks:
117
+ - $ref: "Global/RuleLib#masks/name_first"
118
+ ```
119
+
120
+ Do NOT write a custom YAML serializer. Use `ruamel.yaml` round-trip dumper. Use `DoubleQuotedScalarString` for `$ref` values.
121
+
122
+ **Report:** "Step 3 done — extracted N rule library definitions: [list each name and usage count]."
123
+
124
+ ---
125
+
126
+ ## Step 4: Add hash_columns
127
+
128
+ Write a Python script that:
129
+
130
+ **Parse the discovery CSV** (comma-separated):
131
+ `Selected`, `Table schema`, `Table name`, `Column name`, `Data Type`, `Constraint`, `Foreign Keys`, `Max Length`, `Numeric Precision`, `Numeric Scale`, `Reason for flag`, `Flagged by`, `Data classifications`
132
+
133
+ Build a lookup of `(schema, table)` → columns with constraint and FK metadata:
134
+ - `Constraint` patterns: `Primary(COL)`, `Unique(COL)`, `Foreign(COL)`
135
+ - `Foreign Keys` JSON: `["FK_NAME", "SCHEMA.TABLE.COLUMN"]` — index 1 gives the referenced table
136
+
137
+ **For each `mask_table` task:**
138
+
139
+ 1. **Pick hash column** using this priority:
140
+ - **Parent-entity FK first**: find FK columns where the referenced table is the parent of the current table — i.e., the current table name *starts with* the referenced table name (e.g., `ACCOUNT_HISTORY` starts with `ACCOUNT` → use `ACCOUNT_ID`). This avoids choosing lookup-table FKs (e.g., don't choose `ACCOUNT_TYPE_ID` in `ACCOUNT` just because it has a FK).
141
+ - **PK fallback**: if no parent-entity FK found, use the Primary Key column (never `ROWID`)
142
+ - **Archive table fallback**: if no PK in the CSV (archive tables `_A`, `_A_R`, `_R` often lack explicit keys), strip the suffix and look up the base table recursively
143
+ - **Composite PKs**: prefer `*_ID` or `*_NO` columns; deduplicate derivatives (`ACCOUNT_ID` + `PREVIOUS_ACCOUNT_ID` → keep `ACCOUNT_ID`)
144
+ - **Skip** if no suitable column found
145
+
146
+ 2. Insert `hash_columns: ["COLUMN_NAME"]` after the `key:` field
147
+
148
+ 3. Verify all rules in output are `$ref` — fix any remaining inline masks
149
+
150
+ 4. Write to output file
151
+
152
+ **Report:** "Step 4 done — added hash_columns to N tables, skipped M (all-unique), skipped K (no suitable key). Top hash columns: [column → count]."
153
+
154
+ ---
155
+
156
+ ## Step 5: Validate and clean up
157
+
158
+ Remove any comment lines containing `ROWID`.
159
+
160
+ Run `dm rulesets validate --file <output_file> --type database`
161
+ (use `file` for file-masking rulesets).
162
+
163
+ Fix any errors and re-validate until passing.
164
+
165
+ ---
166
+
167
+ ## Summary
168
+
169
+ | Metric | Value |
170
+ |----------------------------|----------------|
171
+ | Total tables | N |
172
+ | Mask definitions extracted | N (list names) |
173
+ | Tables with hash_columns | N |
174
+ | Tables skipped (no key) | N |
175
+ | Validation | passed/failed |
176
+ | Output file | path |
@@ -0,0 +1,109 @@
1
+ # FK Cascade Invariant
2
+
3
+ The most important rule when refining a DataMasque ruleset that spans
4
+ related tables. Get this wrong and you either leak identity (by skipping
5
+ IDs entirely) or break the engine (by adding rules for FK columns).
6
+
7
+ ## The rule
8
+
9
+ **Mask only the parent PK column. The engine cascades the same masked value
10
+ to every FK column referencing it.**
11
+
12
+ Three masks support this cascade:
13
+
14
+ - `imitate_unique` — recommended for new work.
15
+ - `imitate_uuid` — for UUID-shaped IDs.
16
+ - `imitate_nz_ird` — for NZ IRD numbers.
17
+
18
+ (`from_unique_imitate` and `mask_unique_key` are deprecated; do not emit.)
19
+
20
+ When `mask_table` runs and a rule on a referenced column uses one of these
21
+ masks, the engine:
22
+
23
+ 1. Discovers child tables with FKs referencing this column.
24
+ 2. Auto-replicates the parent's rule onto every FK column.
25
+ 3. Same mask config → same masked output → joins survive.
26
+
27
+ This is documented at
28
+ <https://portal.datamasque.com/portal/documentation/latest/unique-masks.html>:
29
+
30
+ > "You can apply an `imitate_unique` mask to a primary key column or a
31
+ > column that is used as a foreign key in another table. References will be
32
+ > updated automatically. Composite primary keys are supported."
33
+
34
+ ## Worked example
35
+
36
+ Schema:
37
+ - `customers.id` (PK), `customers.email`
38
+ - `orders.id` (PK), `orders.customer_id` (FK → `customers.id`), `orders.tracking_number`
39
+
40
+ Correct ruleset:
41
+
42
+ ```yaml
43
+ - type: mask_table
44
+ table: customers
45
+ key: id
46
+ rules:
47
+ - column: id
48
+ masks:
49
+ - type: imitate_unique
50
+ seed: customer
51
+ - column: email
52
+ masks:
53
+ - type: from_file
54
+ seed_file: DataMasque_emails.csv
55
+ seed_column: email
56
+
57
+ - type: mask_table
58
+ table: orders
59
+ key: id
60
+ rules:
61
+ # customer_id is intentionally absent — the engine replicates the
62
+ # `customers.id` rule onto it automatically. Adding it here would
63
+ # be rejected by the runtime FK check.
64
+ - column: tracking_number
65
+ masks:
66
+ - type: imitate_unique
67
+ seed: tracking
68
+ ```
69
+
70
+ After the run, `orders.customer_id` holds the same masked values as
71
+ `customers.id`, joins remain intact, and `tracking_number` is independently
72
+ masked with its own seed.
73
+
74
+ ## Anti-patterns to refuse
75
+
76
+ - **Adding explicit FK rules** ("I'll mask both PK and FK with shared
77
+ `$ref` so the cascade works"). The runtime rejects this by default with
78
+ the error:
79
+ *"To preserve referential integrity, the following foreign key columns
80
+ cannot be directly masked by this task."*
81
+ The engine will replicate the rule for you; adding your own conflicts.
82
+ - **Skipping IDs to "preserve FK joins"**. Leaves identifiers in plain
83
+ sight. Mask the parent PK with `imitate_unique` — joins survive via
84
+ the auto-cascade.
85
+ - **Inventing linking parameters** (`source_table`, `source_column`,
86
+ `parent_column`, `link_to`). None of these exist on any DataMasque mask.
87
+ - **Inventing a hashing mask** (`hash_text`, `hash`, `link`, `match_id`).
88
+ None of these exist. `imitate_unique` is the deterministic mask.
89
+ - **Using `from_unique_imitate` or `mask_unique_key`**. Both deprecated.
90
+ `imitate_unique` replaces both.
91
+
92
+ ## Cross-run consistency requires `run_secret`
93
+
94
+ Within a single run, `imitate_unique` is deterministic via a per-run
95
+ `insecure_seed`. Across runs, the cascade only holds if the run is
96
+ invoked with a `run_secret`. Without it, the same input maps to a
97
+ different masked value next run. If cross-run consistency matters, flag
98
+ this in the final summary.
99
+
100
+ ## Self-check before finishing
101
+
102
+ For each FK relationship in the schema:
103
+
104
+ 1. Is the parent PK masked with `imitate_unique`, `imitate_uuid`, or
105
+ `imitate_nz_ird`?
106
+ 2. Is the FK column **absent** from your output (no explicit rule)?
107
+ 3. Are `from_unique_imitate` and `mask_unique_key` absent from your output?
108
+
109
+ If any answer is "no", fix it before validation.
@@ -71,14 +71,14 @@ hash_columns:
71
71
 
72
72
  Every table belongs to a domain entity. Find the column that identifies that entity:
73
73
 
74
- | Domain | Typical hash column | Examples |
75
- |--------|-------------------|----------|
76
- | Customer | `cust_id`, `customer_id`, `client_id` | CUST_MASTER, CUST_ADDRESS |
77
- | Account | `acc_id`, `account_id`, `account_no` | DEP_ACCOUNT, DEP_EMAIL_ALERT |
78
- | Card | `card_id`, `card_no` | CARD_MASTER, CARD_INSURANCE |
79
- | Loan | `loan_id`, `loan_no` | LOAN_COLLATERAL, LOAN_GUARANTOR |
80
- | Employee | `emp_id`, `emp_no`, `employee_id` | COM_EMPLOYEE, COM_EMP_ROLE |
81
- | Transaction | `tx_id`, `trf_id`, `fx_tx_id` | TRF_MASTER, FX_RECEIPT |
74
+ | Domain | Typical hash column | Examples |
75
+ |-------------|---------------------------------------|---------------------------------|
76
+ | Customer | `cust_id`, `customer_id`, `client_id` | CUST_MASTER, CUST_ADDRESS |
77
+ | Account | `acc_id`, `account_id`, `account_no` | DEP_ACCOUNT, DEP_EMAIL_ALERT |
78
+ | Card | `card_id`, `card_no` | CARD_MASTER, CARD_INSURANCE |
79
+ | Loan | `loan_id`, `loan_no` | LOAN_COLLATERAL, LOAN_GUARANTOR |
80
+ | Employee | `emp_id`, `emp_no`, `employee_id` | COM_EMPLOYEE, COM_EMP_ROLE |
81
+ | Transaction | `tx_id`, `trf_id`, `fx_tx_id` | TRF_MASTER, FX_RECEIPT |
82
82
 
83
83
  ### Step 2: Check foreign keys in the DDL
84
84
 
@@ -175,14 +175,14 @@ tasks:
175
175
 
176
176
  Common seed files for `from_file` masks:
177
177
 
178
- | Category | Files |
179
- |----------|-------|
180
- | Names | `DataMasque_firstNames_mixed.csv`, `DataMasque_lastNames_v2.csv` |
178
+ | Category | Files |
179
+ |-----------|-------------------------------------------------------------------------------------------------------|
180
+ | Names | `DataMasque_firstNames_mixed.csv`, `DataMasque_lastNames_v2.csv` |
181
181
  | Addresses | `DataMasque_US_addresses.csv`, `DataMasque_AU_addresses_real.csv`, `DataMasque_NZ_addresses_real.csv` |
182
- | Companies | `DataMasque_companies.csv`, `DataMasque_NZ_companies.csv`, `DataMasque_AU_companies.csv` |
183
- | Email | `DataMasque_fake_email_suffixes.csv`, `DataMasque_email_suffixes.csv` |
184
- | Reference | `DataMasque_country_codes.csv`, `DataMasque_occupations.csv` |
185
- | Cards | `DataMasque_credit_card_numbers.csv`, `DataMasque_credit_card_prefixes.csv` |
182
+ | Companies | `DataMasque_companies.csv`, `DataMasque_NZ_companies.csv`, `DataMasque_AU_companies.csv` |
183
+ | Email | `DataMasque_fake_email_suffixes.csv`, `DataMasque_email_suffixes.csv` |
184
+ | Reference | `DataMasque_country_codes.csv`, `DataMasque_occupations.csv` |
185
+ | Cards | `DataMasque_credit_card_numbers.csv`, `DataMasque_credit_card_prefixes.csv` |
186
186
 
187
187
  Regional variants exist for BR, IN, AU, NZ, US.
188
188
  Use `from_file` when there are more than ~50 distinct values;
@@ -143,13 +143,13 @@ tasks: [...]
143
143
 
144
144
  ## Libraries vs YAML Anchors
145
145
 
146
- | Feature | YAML Anchors (`&`/`*`) | Libraries (`$ref`) |
147
- |---------|----------------------|-------------------|
148
- | Scope | Within one ruleset | Across multiple rulesets |
149
- | Management | Inline in YAML | Managed via API/CLI, versioned |
150
- | Syntax | `<<: *anchor_name` | `$ref: "lib#path"` |
151
- | Override | `<<:` merge key | Not supported (use as-is) |
152
- | Best for | Single-ruleset reuse | Organisation-wide standards |
146
+ | Feature | YAML Anchors (`&`/`*`) | Libraries (`$ref`) |
147
+ |------------|------------------------|--------------------------------|
148
+ | Scope | Within one ruleset | Across multiple rulesets |
149
+ | Management | Inline in YAML | Managed via API/CLI, versioned |
150
+ | Syntax | `<<: *anchor_name` | `$ref: "lib#path"` |
151
+ | Override | `<<:` merge key | Not supported (use as-is) |
152
+ | Best for | Single-ruleset reuse | Organisation-wide standards |
153
153
 
154
154
  **Recommendation:**
155
155
  - Start with YAML anchors (`mask_definitions`) for within-ruleset deduplication
@@ -72,6 +72,16 @@ For PostgreSQL/MySQL, plain names work: `table: users`, `key: id`.
72
72
 
73
73
  ## Mask Types Quick Reference
74
74
 
75
+ This is the **closed list** of every `type:` value DataMasque accepts. Do
76
+ not invent mask types or parameters (no `source_table`, no `link_to`, no
77
+ `parent_column` — none exist). For per-mask parameter details, see the
78
+ canonical source:
79
+ <https://portal.datamasque.com/portal/documentation/latest/masking-functions-overview.html>.
80
+
81
+ For a deterministic hash, use `imitate_unique` (or `imitate_uuid` for UUIDs)
82
+ optionally with `seed:` to namespace. The cascade is automatic; no
83
+ cross-table reference parameter exists. See `fk-cascade.md`.
84
+
75
85
  ### Generic
76
86
  - `from_fixed` — fixed replacement value
77
87
  - `from_column` — copy from another column
@@ -124,6 +134,20 @@ For PostgreSQL/MySQL, plain names work: `table: users`, `key: id`.
124
134
  ### Document
125
135
  - `json` — mask JSON fields within a column
126
136
  - `xml` — mask XML elements within a column
137
+ - `unstructured_text` — mask entities inside free text
138
+
139
+ ### Commonly-hallucinated names that do NOT exist
140
+
141
+ These plausible-sounding names are not in DataMasque. Refuse to emit them:
142
+
143
+ | Hallucinated name | What was wanted | Use instead |
144
+ |-------------------------------------------------------------|-----------------------------------|---------------------------------------------------------------|
145
+ | `hash_text`, `hash` | deterministic hash of a value | `imitate_unique` (or `imitate_uuid` for UUIDs) |
146
+ | `link`, `match_id`, `link_to` | join two columns after masking | shared `imitate_unique` config on both sides |
147
+ | `from_random_words` | random words / short text | `from_random_text` (random chars) or `from_file` |
148
+ | `from_random_string` | random string | `from_random_text` |
149
+ | `redact`, `mask_value` | constant placeholder | `from_fixed` with `value:` |
150
+ | `source_table`, `source_column`, `parent_column`, `link_to` | param to point a FK at its parent | does not exist — cascade is automatic with shared mask config |
127
151
 
128
152
  ## skip_defaults
129
153
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "datamasque-cli"
3
- version = "1.2.0"
3
+ version = "1.3.0"
4
4
  description = "Official command-line interface for the DataMasque data-masking platform."
5
5
  authors = [
6
6
  { name = "DataMasque Ltd" },
@@ -1,4 +1,4 @@
1
- """System-level commands: health, licence, logs, admin-install."""
1
+ """System administration commands."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -116,3 +116,34 @@ def set_locality(
116
116
  client = get_client(profile)
117
117
  client.set_locality(locality)
118
118
  print_success(f"Locality set to '{locality}'.")
119
+
120
+
121
+ ai_engine_app = typer.Typer(help="Configure the AI Engine.", no_args_is_help=True)
122
+ app.add_typer(ai_engine_app, name="ai-engine")
123
+
124
+
125
+ @ai_engine_app.command("show")
126
+ def ai_engine_show(
127
+ profile: str | None = typer.Option(None, "--profile", "-p", help="Profile to use"),
128
+ is_json: bool = typer.Option(False, "--json", help="Output as JSON"),
129
+ ) -> None:
130
+ """Show the configured AI Engine URL."""
131
+ client = get_client(profile)
132
+ response = client.make_request("GET", "/api/settings/")
133
+ url = response.json().get("dm_ai_engine_url") or None
134
+ if should_emit_json(is_json):
135
+ print_json({"dm_ai_engine_url": url})
136
+ return
137
+ # An empty table cell would look like a rendering bug.
138
+ render_output({"dm_ai_engine_url": url or "<not configured>"}, is_json=False, title="AI Engine")
139
+
140
+
141
+ @ai_engine_app.command("set")
142
+ def ai_engine_set(
143
+ url: str = typer.Argument(help="AI Engine base URL"),
144
+ profile: str | None = typer.Option(None, "--profile", "-p", help="Profile to use"),
145
+ ) -> None:
146
+ """Point DataMasque at an AI Engine."""
147
+ client = get_client(profile)
148
+ client.make_request("PATCH", "/api/settings/", data={"dm_ai_engine_url": url})
149
+ print_success(f"AI Engine URL set to '{url}'.")
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import UTC, datetime
4
+ from unittest.mock import MagicMock, patch
5
+
6
+ import pytest
7
+ from datamasque.client.models.license import LicenseInfo, SwitchableLicenseMetadata
8
+ from typer.testing import CliRunner
9
+
10
+ from datamasque_cli.main import app
11
+
12
+ MODULE = "datamasque_cli.commands.system"
13
+
14
+
15
+ @patch(f"{MODULE}.get_client")
16
+ def test_licence_projects_to_user_facing_fields(mock_get_client: MagicMock, runner: CliRunner) -> None:
17
+ client = MagicMock()
18
+ mock_get_client.return_value = client
19
+ client.get_current_license_info.return_value = LicenseInfo(
20
+ uuid="lic-123",
21
+ name="Test Licence",
22
+ type="standard",
23
+ is_expired=False,
24
+ uploadable=True,
25
+ expiry_date=datetime(2027, 6, 1, tzinfo=UTC),
26
+ days_until_expiry=400,
27
+ platform_name="DataMasque",
28
+ # Noisy nested field that should NOT appear in the projected output.
29
+ switchable_license_metadata=SwitchableLicenseMetadata(license_source="aws"),
30
+ )
31
+
32
+ result = runner.invoke(app, ["system", "licence", "--json"])
33
+
34
+ assert result.exit_code == 0
35
+ assert '"uuid": "lic-123"' in result.stdout
36
+ assert '"days_until_expiry": 400' in result.stdout
37
+ assert '"platform_name": "DataMasque"' in result.stdout
38
+ assert "switchable_license_metadata" not in result.stdout
39
+ assert "license_source" not in result.stdout
40
+
41
+
42
+ @pytest.mark.parametrize(
43
+ ("extra_args", "settings_url", "expected_output"),
44
+ [
45
+ (["--json"], "http://engine.example.com:9021", '"dm_ai_engine_url": "http://engine.example.com:9021"'),
46
+ ([], "http://engine.example.com:9021", "http://engine.example.com:9021"),
47
+ ([], None, "<not configured>"),
48
+ ([], "", "<not configured>"),
49
+ ],
50
+ )
51
+ @patch(f"{MODULE}.get_client")
52
+ def test_ai_engine_show(
53
+ mock_get_client: MagicMock,
54
+ runner: CliRunner,
55
+ extra_args: list[str],
56
+ settings_url: str | None,
57
+ expected_output: str,
58
+ ) -> None:
59
+ client = MagicMock()
60
+ mock_get_client.return_value = client
61
+ response = MagicMock()
62
+ response.json.return_value = {"dm_ai_engine_url": settings_url}
63
+ client.make_request.return_value = response
64
+
65
+ result = runner.invoke(app, ["system", "ai-engine", "show", *extra_args])
66
+
67
+ assert result.exit_code == 0
68
+ client.make_request.assert_called_once_with("GET", "/api/settings/")
69
+ assert expected_output in result.stdout
70
+
71
+
72
+ @patch(f"{MODULE}.get_client")
73
+ def test_ai_engine_set_patches_settings_with_url(mock_get_client: MagicMock, runner: CliRunner) -> None:
74
+ client = MagicMock()
75
+ mock_get_client.return_value = client
76
+
77
+ result = runner.invoke(app, ["system", "ai-engine", "set", "http://engine.example.com:9021"])
78
+
79
+ assert result.exit_code == 0
80
+ client.make_request.assert_called_once_with(
81
+ "PATCH", "/api/settings/", data={"dm_ai_engine_url": "http://engine.example.com:9021"}
82
+ )
@@ -141,7 +141,7 @@ wheels = [
141
141
 
142
142
  [[package]]
143
143
  name = "datamasque-cli"
144
- version = "1.2.0"
144
+ version = "1.3.0"
145
145
  source = { editable = "." }
146
146
  dependencies = [
147
147
  { name = "datamasque-python" },
@@ -1,175 +0,0 @@
1
- ---
2
- name: ruleset-builder
3
- description: Use when the user wants to turn auto-generated DataMasque rulesets into production-ready ones — extract a `ruleset_library`, add `hash_columns`, refine a ruleset, or clean up generated YAML. Triggers on "ruleset builder", "build ruleset", "refine ruleset", "add hash columns", "add ruleset library", "production ruleset", "clean up ruleset".
4
- argument-hint: e.g. "build a ruleset from these generated files"
5
- user-invocable: true
6
- ---
7
-
8
- # Ruleset Builder
9
-
10
- Transform auto-generated DataMasque rulesets into production-ready rulesets with three improvements:
11
- 1. **`ruleset_library` references** — `$ref` links replacing every repeated inline mask
12
- 2. **`hash_columns`** — on every applicable `mask_table` task for deterministic consistency
13
- 3. **Clean structure** — `skip_defaults`, no doc blocks, validated
14
-
15
- **4-step process. Complete all 4 steps. Report after each step before proceeding.**
16
-
17
- Use `TaskCreate` for all 4 steps before starting. The prompt must include business domain and application type — ask if missing.
18
-
19
- ---
20
-
21
- ## Step 0: Report version
22
- Report: **Version 1.5**
23
-
24
- ---
25
-
26
- ## Step 1: Read reference docs
27
-
28
- Read all three before any other work:
29
- ```
30
- ${CLAUDE_PLUGIN_ROOT}/skills/ruleset-builder/references/mask-definitions-guide.md
31
- ${CLAUDE_PLUGIN_ROOT}/skills/ruleset-builder/references/hash-columns-guide.md
32
- ${CLAUDE_PLUGIN_ROOT}/skills/ruleset-builder/references/ruleset-yaml-reference.md
33
- ```
34
-
35
- ---
36
-
37
- ## Step 2: Extract ruleset_library
38
-
39
- Write a Python script using `ruamel.yaml` (`uv pip install ruamel.yaml`).
40
-
41
- Process the input YAML. For each `mask_table` task, replace every inline mask with a `$ref` to a rule in `ruleset_library.yaml`. Build the library progressively — read its current state at the start of each iteration, create it if absent.
42
-
43
- The library `masks` section structure:
44
- ```yaml
45
- version: "1.0"
46
- masks:
47
- rule_name:
48
- type: rule_type
49
- ...params
50
- ```
51
-
52
- ### Classification rules (apply in order)
53
-
54
- **1. ID columns** — any column ending in `_ID`, `_NO`, `_NR`, `_NBR` is an entity identifier.
55
- - Strip adjective/verb prefixes before the noun: `PREVIOUS_`, `OLD_`, `TRANSFERRED_`, `PRIOR_`, `CURR_`, `NEW_`, `NEXT_`, `ALT_`, `PARENT_`, `CHILD_`, `SOURCE_`, `TARGET_`, `ORIG_`, `PENDING_`, `ARCHIVED_`, `DELETED_`
56
- - Extract the core entity: `PREVIOUS_INVOICE_ID` → `invoice`, `TRANSFERRED_ACCOUNT_ID` → `account`, `INVOICE_ACCOUNT_ID` → `invoice_account` (compound kept — no prefix stripped)
57
- - Group all derivatives to one rule: `$ref: "Global/RuleLib#masks/{entity}_id"`
58
- - Library entry: `type: imitate_unique`, `seed: "{entity}"` — **seed is required**
59
- - This overrides whatever mask was originally generated (even `imitate_unique`, `from_random_number`, etc.)
60
-
61
- **2. Named patterns** — detect by mask structure:
62
-
63
- | Pattern | Detection | Library rule |
64
- |---------|-----------|--------------|
65
- | Email | `chain(concat(concat(firstName+lastName, glue='.')+email_suffix)+transform_case(lower))` | `email_address` |
66
- | Full name | `chain(concat(firstName+lastName, glue=' ')+take_substring)` OR plain `concat(firstName+lastName, glue=' ')` — column not containing USERNAME/LOGIN | `full_name` |
67
- | Username | Same mask as full_name but column name contains USERNAME, USER_NAME, LOGIN, LOGON | `username` |
68
- | First name only | `from_file` with firstNames seed | `name_first` |
69
- | Last name only | `from_file` with lastNames seed | `name_last` |
70
- | DOB | Column name contains DOB/BIRTH/DATE_OF_BIRTH — use `retain_age` regardless of original type | `dob` |
71
- | Company | `chain(from_file(companies)+take_substring)` | `company_name` |
72
- | Country name | `from_file(country_codes, seed_column=name)` | `country_name` |
73
- | Country alpha-2 | `from_file(country_codes, seed_column=alpha_2)` | `country_code_2` |
74
- | Country alpha-3 | `from_file(country_codes, seed_column=alpha_3)` | `country_code_3` |
75
- | Phone/fax | `imitate` on column name containing PHONE, TEL, FAX, MOBILE, CELL | `phone` |
76
- | Address line 1 | `from_file(addresses, seed_column=street_address)` on LINE_1/ADDRESS_LINE_1 columns | `address_line1` |
77
- | Address line N | Same for LINE_2, LINE_3 etc. | `address_lineN` |
78
- | Address full | `from_file(addresses, seed_column=street_address)` on non-line-numbered columns | `address_full` |
79
- | Address expr | `concat(address+city+state+postcode, glue=', ')` | `network_address_expr` |
80
- | City | `from_file(addresses, seed_column=city)` | `city` |
81
- | Postcode | `from_file(addresses, seed_column=postcode)` | `post_code` |
82
- | Suburb | `from_file(addresses, seed_column=suburb)` | `suburb` |
83
- | Occupation | `from_file(occupations)` | `occupation` |
84
-
85
- **3. Remaining** — group by column name concept. Where column names share a root (e.g., `RESULT3_VALUE`, `RESULT5_VALUE` → `result_value`; `GENERAL_2`, `GENERAL_6` → `general`), use one shared rule. Strip adjective prefixes. Use first occurrence's parameters.
86
-
87
- - `imitate_unique` (non-ID cols) → `{col_group}: type: imitate_unique, seed: "{col_group}"` — **seed is required**
88
- - `from_random_date` → `{col_group}: type: from_random_date, min/max from first occurrence`
89
- - `from_random_number` → `{col_group}: type: from_random_number, min/max from first occurrence`
90
- - `imitate` (non-phone) → `{col_group}: type: imitate`
91
- - Complex chains → keep structure, group by column name
92
-
93
- > **Critical rule:** Every `imitate_unique` entry in `ruleset_library.yaml` MUST have a `seed` value.
94
- > - Entity ID rules: `seed: "{entity_name}"` (e.g., `account_id` → `seed: "account"`)
95
- > - All other `imitate_unique` rules: `seed: "{rule_name}"` (e.g., `field_name` → `seed: "field_name"`)
96
-
97
- ### Output format
98
-
99
- ```yaml
100
- version: '1.0'
101
- skip_defaults:
102
- - ''
103
- - null
104
- imports:
105
- - Global/RuleLib
106
-
107
- tasks:
108
- - type: mask_table
109
- table: '"SCHEMA"."TABLE"'
110
- key: '"ROWID"'
111
- rules:
112
- - column: '"FIRST_NAME"'
113
- masks:
114
- - $ref: "Global/RuleLib#masks/name_first"
115
- ```
116
-
117
- Do NOT write a custom YAML serializer. Use `ruamel.yaml` round-trip dumper. Use `DoubleQuotedScalarString` for `$ref` values.
118
-
119
- **Report:** "Step 2 done — extracted N rule library definitions: [list each name and usage count]."
120
-
121
- ---
122
-
123
- ## Step 3: Add hash_columns
124
-
125
- Write a Python script that:
126
-
127
- **Parse the discovery CSV** (comma-separated):
128
- `Selected`, `Table schema`, `Table name`, `Column name`, `Data Type`, `Constraint`, `Foreign Keys`, `Max Length`, `Numeric Precision`, `Numeric Scale`, `Reason for flag`, `Flagged by`, `Data classifications`
129
-
130
- Build a lookup of `(schema, table)` → columns with constraint and FK metadata:
131
- - `Constraint` patterns: `Primary(COL)`, `Unique(COL)`, `Foreign(COL)`
132
- - `Foreign Keys` JSON: `["FK_NAME", "SCHEMA.TABLE.COLUMN"]` — index 1 gives the referenced table
133
-
134
- **For each `mask_table` task:**
135
-
136
- 1. **Pick hash column** using this priority:
137
- - **Parent-entity FK first**: find FK columns where the referenced table is the parent of the current table — i.e., the current table name *starts with* the referenced table name (e.g., `ACCOUNT_HISTORY` starts with `ACCOUNT` → use `ACCOUNT_ID`). This avoids choosing lookup-table FKs (e.g., don't choose `ACCOUNT_TYPE_ID` in `ACCOUNT` just because it has a FK).
138
- - **PK fallback**: if no parent-entity FK found, use the Primary Key column (never `ROWID`)
139
- - **Archive table fallback**: if no PK in the CSV (archive tables `_A`, `_A_R`, `_R` often lack explicit keys), strip the suffix and look up the base table recursively
140
- - **Composite PKs**: prefer `*_ID` or `*_NO` columns; deduplicate derivatives (`ACCOUNT_ID` + `PREVIOUS_ACCOUNT_ID` → keep `ACCOUNT_ID`)
141
- - **Skip** if no suitable column found
142
-
143
- 2. Insert `hash_columns: ["COLUMN_NAME"]` after the `key:` field
144
-
145
- 3. Verify all rules in output are `$ref` — fix any remaining inline masks
146
-
147
- 4. Write to output file
148
-
149
- **Report:** "Step 3 done — added hash_columns to N tables, skipped M (all-unique), skipped K (no suitable key). Top hash columns: [column → count]."
150
-
151
- ---
152
-
153
- ## Step 4: Validate and clean up
154
-
155
- Remove any comment lines containing `ROWID`.
156
-
157
- Run:
158
- ```bash
159
- dm rulesets validate --file <output_file>
160
- ```
161
-
162
- Fix any errors and re-validate until passing.
163
-
164
- ---
165
-
166
- ## Summary
167
-
168
- | Metric | Value |
169
- |--------|-------|
170
- | Total tables | N |
171
- | Mask definitions extracted | N (list names) |
172
- | Tables with hash_columns | N |
173
- | Tables skipped (no key) | N |
174
- | Validation | passed/failed |
175
- | Output file | path |
@@ -1,38 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from datetime import UTC, datetime
4
- from unittest.mock import MagicMock, patch
5
-
6
- from datamasque.client.models.license import LicenseInfo, SwitchableLicenseMetadata
7
- from typer.testing import CliRunner
8
-
9
- from datamasque_cli.main import app
10
-
11
- MODULE = "datamasque_cli.commands.system"
12
-
13
-
14
- @patch(f"{MODULE}.get_client")
15
- def test_licence_projects_to_user_facing_fields(mock_get_client: MagicMock, runner: CliRunner) -> None:
16
- client = MagicMock()
17
- mock_get_client.return_value = client
18
- client.get_current_license_info.return_value = LicenseInfo(
19
- uuid="lic-123",
20
- name="Test Licence",
21
- type="standard",
22
- is_expired=False,
23
- uploadable=True,
24
- expiry_date=datetime(2027, 6, 1, tzinfo=UTC),
25
- days_until_expiry=400,
26
- platform_name="DataMasque",
27
- # Noisy nested field that should NOT appear in the projected output.
28
- switchable_license_metadata=SwitchableLicenseMetadata(license_source="aws"),
29
- )
30
-
31
- result = runner.invoke(app, ["system", "licence", "--json"])
32
-
33
- assert result.exit_code == 0
34
- assert '"uuid": "lic-123"' in result.stdout
35
- assert '"days_until_expiry": 400' in result.stdout
36
- assert '"platform_name": "DataMasque"' in result.stdout
37
- assert "switchable_license_metadata" not in result.stdout
38
- assert "license_source" not in result.stdout
File without changes
File without changes
File without changes