data-boar 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. analysis/__init__.py +0 -0
  2. api/__init__.py +0 -0
  3. api/app.py +8 -0
  4. api/locale_i18n.py +209 -0
  5. api/locales/en.json +254 -0
  6. api/locales/pt-BR.json +254 -0
  7. api/rbac.py +190 -0
  8. api/routes.py +2014 -0
  9. api/static/app.js +2 -0
  10. api/static/chart.umd.min.js +20 -0
  11. api/static/dashboard/en/config.png +0 -0
  12. api/static/dashboard/en/reports-download.png +0 -0
  13. api/static/dashboard/en/scan-start.png +0 -0
  14. api/static/dashboard/pt-br/config.png +0 -0
  15. api/static/dashboard/pt-br/reports-download.png +0 -0
  16. api/static/dashboard/pt-br/scan-start.png +0 -0
  17. api/static/dashboard.js +269 -0
  18. api/static/favicon.ico +0 -0
  19. api/static/logo-candidates/v0-original.svg +23 -0
  20. api/static/logo-candidates/v1-cream-notepad-radar.svg +38 -0
  21. api/static/logo-candidates/v2-cream-laser-db.svg +17 -0
  22. api/static/logo-candidates/v3-cream-crawler-db.svg +20 -0
  23. api/static/logo-candidates/v4-cream-radar-table.svg +16 -0
  24. api/static/logo-candidates/v5-cream-scan-docs-floppy.svg +38 -0
  25. api/static/logo-candidates/v6-leitao-piglet.svg +65 -0
  26. api/static/logo-candidates/v7-javali-data-soup.svg +78 -0
  27. api/static/mascot/data_boar_mascot_original.png +0 -0
  28. api/static/mascot/data_boar_mascot_original_half.png +0 -0
  29. api/static/mascot/data_boar_mascote_bw.svg +174 -0
  30. api/static/mascot/data_boar_mascote_bw_core.png +0 -0
  31. api/static/mascot/data_boar_mascote_bw_core_half.png +0 -0
  32. api/static/mascot/data_boar_mascote_bw_full.png +0 -0
  33. api/static/mascot/data_boar_mascote_bw_full_half.png +0 -0
  34. api/static/mascot/data_boar_mascote_bw_translucent.png +0 -0
  35. api/static/mascot/data_boar_mascote_bw_translucent_half.png +0 -0
  36. api/static/mascot/data_boar_mascote_color.svg +151 -0
  37. api/static/mascot/data_boar_mascote_color_full.png +0 -0
  38. api/static/mascot/data_boar_mascote_color_full_haf.png +0 -0
  39. api/static/mascot/data_boar_mascote_color_translucent.png +0 -0
  40. api/static/mascot/data_boar_mascote_color_translucent_half.png +0 -0
  41. api/static/mascot/data_boar_mascote_color_translucent_not_zombie.png +0 -0
  42. api/static/mascot/data_boar_mascote_color_translucent_not_zombie_half.png +0 -0
  43. api/static/style.css +100 -0
  44. api/static/webauthn-login.js +198 -0
  45. api/templates/about.html +39 -0
  46. api/templates/assessment_placeholder.html +109 -0
  47. api/templates/base.html +48 -0
  48. api/templates/config.html +17 -0
  49. api/templates/dashboard.html +146 -0
  50. api/templates/help.html +191 -0
  51. api/templates/login.html +17 -0
  52. api/templates/reports.html +41 -0
  53. api/webauthn_html_gate.py +159 -0
  54. api/webauthn_routes.py +278 -0
  55. app/__init__.py +1 -0
  56. app/dashboard.py +178 -0
  57. app/grc_dashboard_model.py +135 -0
  58. cli/__init__.py +1 -0
  59. cli/reporter.py +346 -0
  60. config/README.md +14 -0
  61. config/README.pt_BR.md +14 -0
  62. config/boar_config.yaml +13 -0
  63. config/config.json +28 -0
  64. config/file_scan_config.yaml +0 -0
  65. config/loader.py +1088 -0
  66. config/plugin_schema.yaml +140 -0
  67. config/plugin_validator.py +369 -0
  68. config/redact_config.py +150 -0
  69. config/regex_overrides.example.yaml +27 -0
  70. config/scan_defaults.py +25 -0
  71. config/scope_import_csv.py +335 -0
  72. config/scope_import_glpi.py +260 -0
  73. connectors/data_soup_formats.py +148 -0
  74. connectors/dataverse_connector.py +315 -0
  75. connectors/file_connector.py +31 -0
  76. connectors/filesystem_connector.py +891 -0
  77. connectors/mongodb_connector.py +184 -0
  78. connectors/nfs_connector.py +75 -0
  79. connectors/powerbi_connector.py +356 -0
  80. connectors/redis_connector.py +163 -0
  81. connectors/rest_connector.py +412 -0
  82. connectors/rich_media_sample.py +168 -0
  83. connectors/sharepoint_connector.py +285 -0
  84. connectors/smb_connector.py +286 -0
  85. connectors/snowflake_connector.py +314 -0
  86. connectors/sql_connector.py +565 -0
  87. connectors/sql_sampling.py +664 -0
  88. connectors/sql_table_row_estimate.py +119 -0
  89. connectors/stego_hint.py +53 -0
  90. connectors/url_guard.py +126 -0
  91. connectors/webdav_connector.py +315 -0
  92. core/about.py +54 -0
  93. core/advisor.py +52 -0
  94. core/aggregated_identification.py +404 -0
  95. core/archives.py +306 -0
  96. core/audit_export.py +92 -0
  97. core/brazilian_cpf.py +272 -0
  98. core/column_name_normalize.py +43 -0
  99. core/connector_registry.py +203 -0
  100. core/content_type.py +123 -0
  101. core/crypto_audit.py +91 -0
  102. core/dashboard_transport.py +123 -0
  103. core/database.py +1556 -0
  104. core/database_manager.py +142 -0
  105. core/detector.py +1456 -0
  106. core/discovery_orchestrator.py +216 -0
  107. core/dl_backend.py +175 -0
  108. core/dsar_export.py +136 -0
  109. core/embedding_prototype_hint.py +50 -0
  110. core/engine.py +466 -0
  111. core/enterprise_surface_posture.py +110 -0
  112. core/fuzzy_column_match.py +117 -0
  113. core/host_resolution.py +86 -0
  114. core/integrity_anchor.py +331 -0
  115. core/intelligence.py +238 -0
  116. core/learned_patterns.py +247 -0
  117. core/licensing/__init__.py +21 -0
  118. core/licensing/audit.py +63 -0
  119. core/licensing/errors.py +29 -0
  120. core/licensing/feature_gate.py +86 -0
  121. core/licensing/fingerprint.py +28 -0
  122. core/licensing/guard.py +564 -0
  123. core/licensing/integrity.py +76 -0
  124. core/licensing/license-pub-v1.pem +3 -0
  125. core/licensing/runtime_feature_tier.py +126 -0
  126. core/licensing/tier_features.py +168 -0
  127. core/licensing/verify.py +65 -0
  128. core/maturity_assessment/__init__.py +14 -0
  129. core/maturity_assessment/export_render.py +117 -0
  130. core/maturity_assessment/integrity.py +122 -0
  131. core/maturity_assessment/pack.py +131 -0
  132. core/maturity_assessment/scoring.py +118 -0
  133. core/ml_engine.py +54 -0
  134. core/prefilter.py +50 -0
  135. core/rbac_settings.py +24 -0
  136. core/recommendations.py +106 -0
  137. core/rich_media_magic.py +103 -0
  138. core/runtime_trust.py +52 -0
  139. core/sampling.py +23 -0
  140. core/sampling_policy.py +126 -0
  141. core/scan_audit_log.py +157 -0
  142. core/scanner.py +85 -0
  143. core/session.py +20 -0
  144. core/suggested_review.py +75 -0
  145. core/throttler.py +69 -0
  146. core/validation.py +146 -0
  147. core/webauthn_rp/__init__.py +7 -0
  148. core/webauthn_rp/challenges.py +61 -0
  149. core/webauthn_rp/html_csrf.py +25 -0
  150. core/webauthn_rp/session_cookie.py +27 -0
  151. core/webauthn_rp/settings.py +54 -0
  152. data_boar-1.7.4.dist-info/METADATA +224 -0
  153. data_boar-1.7.4.dist-info/RECORD +200 -0
  154. data_boar-1.7.4.dist-info/WHEEL +4 -0
  155. data_boar-1.7.4.dist-info/entry_points.txt +3 -0
  156. data_boar-1.7.4.dist-info/licenses/LICENSE +28 -0
  157. database/README.md +9 -0
  158. database/__init__.py +0 -0
  159. database/connectors.py +20 -0
  160. database/scanner.py +29 -0
  161. db/database.py +57 -0
  162. file_scan/__init__.py +0 -0
  163. file_scan/text_extractor.py +27 -0
  164. logging_custom/__init__.py +0 -0
  165. logging_custom/logger.py +17 -0
  166. main.py +957 -0
  167. pro/__init__.py +1 -0
  168. pro/engine.py +127 -0
  169. pro/orchestrator.py +134 -0
  170. pro/prefilter.py +67 -0
  171. pro/state_tracker.py +71 -0
  172. pro/worker_logic.py +153 -0
  173. report/__init__.py +0 -0
  174. report/evidence_collector.py +129 -0
  175. report/executive_report.py +344 -0
  176. report/generator.py +1352 -0
  177. report/grc_export_multiformat.py +156 -0
  178. report/grc_reporter.py +453 -0
  179. report/grc_risk_taxonomy.py +133 -0
  180. report/jurisdiction_hints.py +211 -0
  181. report/nist_csf_hints.py +110 -0
  182. report/nist_csf_mapping.yaml +87 -0
  183. report/recommendation_engine.py +126 -0
  184. report/safe_prefix.py +26 -0
  185. report/scan_evidence.py +402 -0
  186. report/sqlite_reporter.py +40 -0
  187. scanners/README.md +9 -0
  188. scanners/data_scanner.py +57 -0
  189. scanners/db_connector.py +17 -0
  190. scanners/db_scanner.py +148 -0
  191. scanners/report_generator.py +23 -0
  192. scanners/scanner_factory.py +20 -0
  193. utils/audit_log_display.py +101 -0
  194. utils/file_encoding.py +68 -0
  195. utils/logger.py +82 -0
  196. utils/ml_classifier.py +18 -0
  197. utils/notify.py +509 -0
  198. utils/regex_patterns.py +29 -0
  199. utils/report_gen.py +114 -0
  200. utils/subtitle_text.py +127 -0
analysis/__init__.py ADDED
File without changes
api/__init__.py ADDED
File without changes
api/app.py ADDED
@@ -0,0 +1,8 @@
1
+ """
2
+ Re-export the single FastAPI app from api.routes for backward compatibility.
3
+ Use: from api.routes import app (or from api.app import app).
4
+ """
5
+
6
+ from api.routes import app
7
+
8
+ __all__ = ["app"]
api/locale_i18n.py ADDED
@@ -0,0 +1,209 @@
1
+ """
2
+ Dashboard HTML locale: JSON catalogs (no gettext in v1), slug negotiation, and t(key).
3
+
4
+ See docs/plans/completed/PLAN_DASHBOARD_I18N.md (M-LOCALE-V1).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import re
11
+ from functools import lru_cache
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from starlette.requests import Request
16
+
17
+ # BCP 47 tag -> URL path segment (slug). Slugs are lowercase in paths; tags use
18
+ # standard BCP 47 casing (e.g. pt-BR) and match api/locales/<tag>.json and config.
19
+ LOCALE_SLUG_BY_TAG: dict[str, str] = {"en": "en", "pt-BR": "pt-br"}
20
+ LOCALE_TAG_BY_SLUG: dict[str, str] = {v: k for k, v in LOCALE_SLUG_BY_TAG.items()}
21
+ VALID_SLUGS = frozenset(LOCALE_TAG_BY_SLUG.keys())
22
+
23
+ _LOCALES_DIR = Path(__file__).resolve().parent / "locales"
24
+
25
+
26
+ def _flatten_keys(obj: Any, prefix: str = "") -> set[str]:
27
+ """Collect dot-separated keys for parity checks (leaf values must be str)."""
28
+ keys: set[str] = set()
29
+ if isinstance(obj, dict):
30
+ for k, v in obj.items():
31
+ p = f"{prefix}.{k}" if prefix else str(k)
32
+ if isinstance(v, dict):
33
+ keys |= _flatten_keys(v, p)
34
+ elif isinstance(v, str):
35
+ keys.add(p)
36
+ else:
37
+ keys |= _flatten_keys(v, p)
38
+ return keys
39
+
40
+
41
+ @lru_cache(maxsize=8)
42
+ def _load_locale_json(tag: str) -> dict[str, Any]:
43
+ path = _LOCALES_DIR / f"{tag}.json"
44
+ if not path.is_file():
45
+ return {}
46
+ raw = path.read_text(encoding="utf-8")
47
+ data = json.loads(raw)
48
+ return data if isinstance(data, dict) else {}
49
+
50
+
51
+ def locale_catalog_keys(tag: str) -> set[str]:
52
+ """All translation keys for a locale file (flattened)."""
53
+ return _flatten_keys(_load_locale_json(tag))
54
+
55
+
56
+ def get_fallback_chain(supported_locales: list[str], default_locale: str) -> list[str]:
57
+ """Locales to try after the active tag (missing-key fallback), ending with English."""
58
+ seen: set[str] = set()
59
+ out: list[str] = []
60
+ for t in list(supported_locales) + [default_locale]:
61
+ if t and t not in seen:
62
+ seen.add(t)
63
+ out.append(t)
64
+ if "en" not in seen:
65
+ out.append("en")
66
+ return out
67
+
68
+
69
+ def _get_leaf(catalog: dict[str, Any], key: str) -> str | None:
70
+ parts = key.split(".")
71
+ cur: Any = catalog
72
+ for p in parts:
73
+ if not isinstance(cur, dict) or p not in cur:
74
+ return None
75
+ cur = cur[p]
76
+ return cur if isinstance(cur, str) else None
77
+
78
+
79
+ def translate(
80
+ catalogs: dict[str, dict[str, Any]],
81
+ key: str,
82
+ locale_tag: str,
83
+ fallback_chain: list[str],
84
+ ) -> str:
85
+ """Return translated string; try active locale, then fallback_chain order."""
86
+ order: list[str] = [locale_tag]
87
+ for t in fallback_chain:
88
+ if t not in order:
89
+ order.append(t)
90
+ for tag in order:
91
+ cat = catalogs.get(tag)
92
+ if cat is None:
93
+ cat = _load_locale_json(tag)
94
+ catalogs[tag] = cat
95
+ val = _get_leaf(cat, key)
96
+ if val is not None:
97
+ return val
98
+ return key
99
+
100
+
101
+ def make_t(
102
+ locale_tag: str,
103
+ supported_locales: list[str],
104
+ default_locale: str,
105
+ catalogs: dict[str, dict[str, Any]] | None = None,
106
+ ):
107
+ """Build t(key) -> str for Jinja."""
108
+ catalogs = catalogs if catalogs is not None else {}
109
+ chain = get_fallback_chain(supported_locales, default_locale)
110
+
111
+ def t(key: str) -> str:
112
+ return translate(catalogs, key, locale_tag, chain)
113
+
114
+ return t
115
+
116
+
117
+ def _normalize_tag(tag: str) -> str:
118
+ t = tag.strip().replace("_", "-")
119
+ if t.lower() in ("pt-br", "pt_br"):
120
+ return "pt-BR"
121
+ return t
122
+
123
+
124
+ def parse_accept_language(header: str | None, supported: list[str]) -> str | None:
125
+ """
126
+ Pick first supported locale from Accept-Language (RFC 7231-ish).
127
+ supported tags: e.g. ['en', 'pt-BR'].
128
+ """
129
+ if not header or not supported:
130
+ return None
131
+ supported_norm = {_normalize_tag(x): x for x in supported}
132
+ # Parse "en-US,en;q=0.9,pt-BR;q=0.8"
133
+ entries: list[tuple[str, float]] = []
134
+ for part in header.split(","):
135
+ part = part.strip()
136
+ if not part:
137
+ continue
138
+ if ";" in part:
139
+ lang, _, rest = part.partition(";")
140
+ lang = lang.strip()
141
+ q = 1.0
142
+ if "q=" in rest:
143
+ try:
144
+ q = float(re.search(r"q=([\d.]+)", rest).group(1)) # type: ignore[union-attr]
145
+ except (AttributeError, ValueError):
146
+ q = 1.0
147
+ else:
148
+ lang = part
149
+ q = 1.0
150
+ base = lang.split("-")[0].lower() if lang else ""
151
+ entries.append((lang, q))
152
+ if base and base != lang.lower():
153
+ entries.append((base, q * 0.9))
154
+ entries.sort(key=lambda x: -x[1])
155
+ for lang, _ in entries:
156
+ nt = _normalize_tag(lang)
157
+ if nt in supported_norm:
158
+ return supported_norm[nt]
159
+ # en-US -> en
160
+ base = lang.split("-")[0].lower()
161
+ for sup in supported:
162
+ if sup.lower() == base:
163
+ return sup
164
+ return None
165
+
166
+
167
+ def negotiate_locale_tag(request: Request, cfg: dict[str, Any]) -> str:
168
+ """
169
+ (1) Cookie (2) Accept-Language (3) default_locale from config.locale.
170
+ """
171
+ loc = cfg.get("locale") or {}
172
+ supported = list(loc.get("supported_locales") or ["en", "pt-BR"])
173
+ default_locale = str(loc.get("default_locale") or "en")
174
+ if default_locale.lower() in ("pt-br", "pt_br"):
175
+ default_locale = "pt-BR"
176
+ cookie_name = str(loc.get("cookie_name") or "db_locale")
177
+ raw = request.cookies.get(cookie_name)
178
+ if raw:
179
+ nt = _normalize_tag(raw)
180
+ if nt in supported:
181
+ return nt
182
+ if nt.lower() == "en":
183
+ return "en"
184
+ al = request.headers.get("accept-language")
185
+ picked = parse_accept_language(al, supported)
186
+ if picked:
187
+ return picked
188
+ return default_locale if default_locale in supported else supported[0]
189
+
190
+
191
+ def html_base_path(locale_slug: str) -> str:
192
+ """Prefix for dashboard HTML links, e.g. /en or /pt-br."""
193
+ return f"/{locale_slug}".rstrip("/") or "/"
194
+
195
+
196
+ def strip_locale_prefix(path: str) -> tuple[str | None, str]:
197
+ """
198
+ If path starts with /{slug}/..., return (slug, remainder path starting with /).
199
+ Otherwise (None, path).
200
+ """
201
+ segments = [s for s in path.split("/") if s]
202
+ if not segments:
203
+ return None, path
204
+ first = segments[0].lower()
205
+ if first in LOCALE_TAG_BY_SLUG:
206
+ slug = LOCALE_TAG_BY_SLUG[first]
207
+ rest = "/" + "/".join(segments[1:]) if len(segments) > 1 else "/"
208
+ return slug, rest
209
+ return None, path
api/locales/en.json ADDED
@@ -0,0 +1,254 @@
1
+ {
2
+ "nav": {
3
+ "dashboard": "Dashboard (dashBOARd)",
4
+ "reports": "Reports",
5
+ "config": "Configuration",
6
+ "help": "Help & Docs",
7
+ "about": "About",
8
+ "assessment": "Self-assessment (POC)",
9
+ "locale_en": "English",
10
+ "locale_pt": "Portuguese (Brazil)"
11
+ },
12
+ "banner": {
13
+ "insecure_title": "Plaintext HTTP",
14
+ "insecure_body": "this dashboard is not using TLS. Traffic can be read or modified on the network path. Use --https-cert-file / --https-key-file or terminate TLS on a reverse proxy. See GET /status (dashboard_transport).",
15
+ "gov_title": "Governance / trust",
16
+ "gov_body": "severity {severity} (license {license_state}). Global API key surface: {mode}. Optional per-route RBAC (api.rbac, Pro+ dashboard_rbac) — see GET /status (enterprise_surface.access_surface.rbac)."
17
+ },
18
+ "meta": {
19
+ "title_suffix": "Data Boar"
20
+ },
21
+ "assessment": {
22
+ "page_title": "Self-assessment (POC) — Data Boar",
23
+ "h1": "Organizational self-assessment",
24
+ "pack_intro": "Below is a YAML-loaded placeholder questionnaire (generic copy in the public repo). Replace with a private or licensed pack via config.",
25
+ "pack_note": "Submitted answers are stored in the local SQLite database (same file as scan results) for this POC.",
26
+ "saved_ok": "Responses saved.",
27
+ "summary_title": "Submission summary",
28
+ "summary_rows_stored": "Stored {count} answer row(s) for this submission.",
29
+ "summary_score": "Rubric score: {total} / {maximum} ({pct}%).",
30
+ "summary_score_none": "No rubric weights are defined in the YAML pack (all scores zero); add optional per-question scores to enable a percentage.",
31
+ "summary_disclaimer": "Outputs are self-reported signals only — not legal advice, audit, or certification. Keep proprietary questionnaire text in a private pack.",
32
+ "export_csv": "Download CSV",
33
+ "export_md": "Download Markdown",
34
+ "export_contract": "Exports use this authenticated download URL (attachment). There is no separate on-disk export path in the product; redirect curl or your browser to save a file. When api.require_api_key is enabled, send X-API-Key or Authorization: Bearer like other dashboard requests.",
35
+ "history_title": "Recent submissions",
36
+ "history_intro": "Stored batches in this SQLite database (newest first). Open a summary or export; highlighted row matches the batch in the URL.",
37
+ "history_th_submitted": "Submitted (UTC)",
38
+ "history_th_batch": "Batch id",
39
+ "history_th_locale": "Locale",
40
+ "history_th_pack": "Pack",
41
+ "history_th_rows": "Rows",
42
+ "history_th_actions": "Actions",
43
+ "history_view": "Summary",
44
+ "summary_integrity_heading": "Integrity (POC — HMAC)",
45
+ "summary_integrity_detail": "{ok} verified · {mismatch} failed verification · {unsealed} without seal · {unknown} sealed but key unavailable",
46
+ "summary_integrity_none": "No HMAC secret was configured when saving; rows are stored without seals.",
47
+ "form_submit": "Save responses",
48
+ "answer_unset": "—",
49
+ "answer_yes": "Yes",
50
+ "answer_no": "No",
51
+ "answer_na": "Not applicable",
52
+ "lead": "This page is an optional product placeholder. The open repository does not ship proprietary questionnaire items.",
53
+ "body_p1": "Scope and architecture options are described in docs/plans/PLAN_MATURITY_SELF_ASSESSMENT_GRC_QUESTIONNAIRE.md.",
54
+ "body_p2": "Any future output is a self-reported maturity signal only—not legal advice, audit, or certification."
55
+ },
56
+ "dashboard": {
57
+ "page_title": "Dashboard — Data Boar",
58
+ "h1": "Audit Dashboard",
59
+ "license_h2": "License status",
60
+ "scan_h2": "Scan status",
61
+ "state": "State:",
62
+ "running": "Running",
63
+ "idle": "Idle",
64
+ "current_session": "Current session:",
65
+ "findings_run": "Findings this run:",
66
+ "tenant_label": "Customer / tenant name (optional)",
67
+ "tenant_ph": "e.g. Acme Corp",
68
+ "tech_label": "Technician / operator (optional)",
69
+ "tech_ph": "e.g. Colleague-W Colleague-V",
70
+ "cli_parity": "CLI parity for one-shot scans:",
71
+ "cli_parity_link": "Help & Docs",
72
+ "cli_parity_tail": "From the CLI (--scan-compressed, --content-type-check, --scan-stego, --jurisdiction-hint).",
73
+ "scan_compressed": "Also scan inside compressed files (zip, tar, 7z, …)",
74
+ "scan_compressed_hint": "May significantly increase run time and I/O. Use only when you need to inspect contents of archives.",
75
+ "content_type": "Use content-type detection (magic bytes) for file format",
76
+ "content_type_hint": "Helps find renamed or cloaked files (e.g. PDF saved as .txt). Reads the start of each file—may increase I/O and run time.",
77
+ "scan_stego": "Steganography hints on rich media (entropy heuristic)",
78
+ "scan_stego_hint": "Optional byte-entropy hint for images/audio/video—not proof of hidden data. Extra read per file; use only when investigating concealment.",
79
+ "jurisdiction": "Heuristic jurisdiction hints (Report info — DPO / counsel)",
80
+ "jurisdiction_hint": "Optional notes when metadata may suggest CCPA/CPRA, Colorado, or Japan APPI scope. Not a legal conclusion; configure report.jurisdiction_hints for defaults.",
81
+ "start_scan": "Start scan",
82
+ "discovery_h2": "Data discovery (last run)",
83
+ "stat_db": "Database findings",
84
+ "stat_fs": "Filesystem findings",
85
+ "stat_fail": "Scan failures",
86
+ "stat_total": "Total findings",
87
+ "session_line": "Session:",
88
+ "tenant_kv": "Tenant:",
89
+ "tech_kv": "Technician:",
90
+ "chart_h2": "Progress over time",
91
+ "chart_intro": "Total findings and risk score (0–100) per scan, in chronological order.",
92
+ "chart_empty": "Run at least one scan to see the progress graph.",
93
+ "chart_aria": "Findings and score over sequential scans",
94
+ "sessions_h2": "Recent sessions",
95
+ "view_reports": "View all reports",
96
+ "th_session": "Session",
97
+ "th_started": "Started",
98
+ "th_tenant": "Tenant / Customer",
99
+ "th_technician": "Technician / Operator",
100
+ "th_db": "DB",
101
+ "th_fs": "FS",
102
+ "th_failures": "Failures",
103
+ "th_report": "Report",
104
+ "download": "Download",
105
+ "no_sessions_row": "No sessions yet. Start a scan.",
106
+ "about_app_h2": "About this application",
107
+ "about_app_p": "Reports (Excel and heatmap) and this dashboard are generated by this application; see About for full details.",
108
+ "about_link": "About"
109
+ },
110
+ "config": {
111
+ "page_title": "Configuration — Data Boar",
112
+ "h1": "Scan configuration",
113
+ "intro": "Edit the audit configuration (YAML). Saved file:",
114
+ "intro_tail": "Changes apply to the next scan.",
115
+ "error_prefix": "Error:",
116
+ "no_yaml_error": "No YAML content provided.",
117
+ "saved_ok": "Configuration saved successfully.",
118
+ "yaml_label": "YAML configuration",
119
+ "save": "Save configuration",
120
+ "readme_blurb": "See README and docs/USAGE.md for target types (database, filesystem, api, powerbi, dataverse, shares) and options."
121
+ },
122
+ "reports": {
123
+ "page_title": "Reports — Data Boar",
124
+ "h1": "Reports",
125
+ "intro": "Download Excel reports (and heatmap) for each scan session. “Download” regenerates the report for that session.",
126
+ "sort_newest": "Newest first",
127
+ "sort_oldest": "Oldest first",
128
+ "order_label": "Order by date:",
129
+ "th_session": "Session ID",
130
+ "th_started": "Started",
131
+ "th_finished": "Finished",
132
+ "th_status": "Status",
133
+ "th_tenant": "Tenant / Customer",
134
+ "th_technician": "Technician / Operator",
135
+ "th_db": "DB findings",
136
+ "th_fs": "FS findings",
137
+ "th_failures": "Failures",
138
+ "th_report": "Report",
139
+ "download": "Download",
140
+ "no_sessions": "No sessions yet. Run a scan from the",
141
+ "dashboard_link": "dashboard"
142
+ },
143
+ "about": {
144
+ "page_title": "About — Data Boar (dashBOARd)",
145
+ "h1": "About",
146
+ "dashmuted": "(dashBOARd)",
147
+ "app_h2": "Application",
148
+ "web_dash": "Web dashboard experience:",
149
+ "author_h2": "Author",
150
+ "license_h2": "Subscription / runtime license status",
151
+ "license_intro": "Operational status of optional commercial enforcement (default: open distribution). See docs/LICENSING_SPEC.md.",
152
+ "state": "State:",
153
+ "mode": "Mode:",
154
+ "licensed_customer": "Licensed customer (token):",
155
+ "expires": "Expiration (UTC):",
156
+ "issuer": "Issuer (token):"
157
+ },
158
+ "help": {
159
+ "page_title": "Help & Docs — Data Boar",
160
+ "h1": "Help & documentation",
161
+ "what_h2": "What Data Boar does",
162
+ "what_p1": "Data Boar is an enterprise data discovery and risk governance engine: it scans configured data sources (databases, filesystems, APIs and shares) for personal and sensitive data, stores only metadata in a local SQLite database, and generates Excel reports, heatmaps, executive Markdown (APG + evidence), and optional GRC-oriented exports.",
163
+ "what_p2": "Each run is a scan session with optional tenant / customer and technician / operator tags, so you can attribute results.",
164
+ "run_h2": "How to run a scan",
165
+ "run_web_h3": "From the web dashboard",
166
+ "run_web_li1": "Go to the Dashboard page.",
167
+ "run_web_li2": "Ensure your Configuration (see Config page) has the desired targets (databases, filesystems, APIs, etc.). The scan will run exactly those targets.",
168
+ "run_web_li3": "(Optional) Fill in Customer / tenant and Technician / operator.",
169
+ "run_web_li4": "Click Start scan. This triggers a full audit of all targets in the current config. The status panel will update and the page will auto-refresh when the run finishes.",
170
+ "run_web_tail": "Recent sessions are listed at the bottom of the dashboard and on the Reports page, with download links.",
171
+ "run_web_shot_alt": "Dashboard scan form with Start scan button highlighted",
172
+ "run_web_shot_caption": "Dashboard — optional tenant/technician fields and Start scan (no shell required).",
173
+ "run_auto_h3": "For automation / scripting",
174
+ "run_cli_h3": "From the CLI",
175
+ "run_cli_p1": "From the repo root, prefer uv run python main.py (or activate your venv and run python main.py). Full flag list: python main.py --help. Packaged installs: man 1 data-boar (command) and man 5 data-boar (config file format).",
176
+ "run_cli_oneshot": "One-shot audit (same targets as a dashboard scan):",
177
+ "run_cli_api": "Start the API / dashBOARd (no scan until you use the browser or HTTP):",
178
+ "run_cli_tls": "You must either terminate TLS at the process (--https-cert-file + --https-key-file, or the same under api: in config) or explicitly accept plaintext with --allow-insecure-http (or api.allow_insecure_http: true). The official Docker image passes --allow-insecure-http by default; mount certs and remove it for HTTPS.",
179
+ "run_cli_bind": "Bind order when using --web: --host overrides api.host in config and API_HOST; if none are set, the default is 127.0.0.1 (Docker image often sets API_HOST=0.0.0.0). Port: api.port in config vs --port. Transport: GET /status and GET /health include dashboard_transport — see docs/USAGE.md.",
180
+ "run_cli_danger": "Dangerous maintenance (lab/demo only): --reset-data wipes SQLite sessions/findings and deletes generated reports under report.output_dir. See SECURITY.md before using in production.",
181
+ "run_cli_audit": "Audit trail export (CLI only): --export-audit-trail or --export-audit-trail path.json writes a JSON snapshot (wipe log, session summary; future: integrity rows). Does not modify the database. Omit the path or use - for stdout.",
182
+ "dl_h2": "Downloading reports, heatmaps and logs",
183
+ "dl_web_h3": "From the web dashboard",
184
+ "dl_web_li1": "Open Reports from the top menu (or go to /en/reports or /pt-br/reports).",
185
+ "dl_web_li2": "Click Download on the session row to save the Excel report (the heatmap is generated with the workbook).",
186
+ "dl_web_shot_alt": "Reports table with Download button on each session row",
187
+ "dl_web_shot_caption": "Reports — click Download on the row you need (no curl required).",
188
+ "dl_auto_h3": "For automation / scripting",
189
+ "dl_auto_p": "Use curl or your HTTP client when you need scripted downloads (CI, cron, integrations):",
190
+ "dl_li1": "Use the Reports page to see all sessions and click Download to get the Excel report for a specific run.",
191
+ "dl_li2_intro": "Use the API to script downloads:",
192
+ "findings_h3": "Findings export (JSON / CSV)",
193
+ "findings_p": "Download unified findings (same schema as the dashboard) for the latest session or a specific session_id. UTF-8 CSV uses attachment headers on /findings/csv and /findings/<session_id>/csv.",
194
+ "sql_env_h2": "Database sampling (optional env overrides)",
195
+ "sql_env_p": "Break-glass knobs without editing YAML: DATA_BOAR_SQL_SAMPLE_LIMIT (integer, clamped 1–10000) replaces file_scan.sample_limit for SQLAlchemy and Snowflake column sampling; DATA_BOAR_SAMPLE_STATEMENT_TIMEOUT_MS overrides the short-read timeout budget on database targets. See docs/USAGE.md (Relational database sampling; SRE sampling knobs).",
196
+ "cfg_h2": "Configuration examples",
197
+ "cfg_p1": "The configuration file (YAML or JSON) defines which targets are scanned. Minimal YAML example:",
198
+ "cfg_tail": "Place this file alongside the application (default name config.yaml) or set the CONFIG_PATH environment variable to point to a different location.",
199
+ "rate_h2": "Rate limiting & safety",
200
+ "rate_p1": "To avoid accidental overload (for example, many scans triggered in a row or too many sessions in parallel via the API/dashboard), you can enable a rate_limit block in the main config:",
201
+ "rate_tail": "When enabled, API endpoints that start scans may return HTTP 429 with a JSON body indicating why the request was rate-limited (too many running scans, or minimum interval not elapsed). The CLI only prints warnings using the same rules. See docs/USAGE.md (EN), docs/USAGE.pt_BR.md (pt-BR) and docs/data_boar.5 for details.",
202
+ "sense_h2": "Sensitivity detection and reporting",
203
+ "sense_p1": "The app detects personal and sensitive data using regex, ML (TF-IDF + classifier), and optional DL (sentence embeddings). Out-of-the-box it recognises PII (CPF, email, phone, etc.) and sensitive categories (health, religion, political affiliation, gender, biometric, genetic, race, union, PEP, sex life). You can:",
204
+ "sense_li1": "Add your own terms via ml_patterns_file or sensitivity_detection.ml_terms in config.",
205
+ "sense_li2": "Use the example file docs/sensitivity_terms_sensitive_categories.example.yaml and set ml_patterns_file (or dl_patterns_file) to it.",
206
+ "sense_li3": "Customise the Recommendations sheet with report.recommendation_overrides: map norm_tag patterns (e.g. health, religious, political) to Base legal, Risk, Priority, and “Relevante para”.",
207
+ "sense_tail": "See docs/SENSITIVITY_DETECTION.md (EN) and docs/SENSITIVITY_DETECTION.pt_BR.md (pt-BR); docs/plans/completed/PLAN_SENSITIVE_CATEGORIES_ML_DL.md for design; docs/USAGE.md (EN) and docs/USAGE.pt_BR.md (pt-BR) for full recommendation_overrides examples.",
208
+ "sec_h2": "Security headers and hardening",
209
+ "sec_p1": "The API and dashboard send security headers on every response (Content-Security-Policy, X-Frame-Options, Referrer-Policy, Permissions-Policy, HSTS when HTTPS). For deployment hardening (Docker, Kubernetes, non-root, NetworkPolicy, reverse proxy), see:",
210
+ "sec_li1": "SECURITY.md – HTTP security headers and optional API key",
211
+ "sec_li2": "docs/deploy/DEPLOY.md – section \"Security and hardening (optional)\"",
212
+ "sec_h3": "What to keep an eye out for (technicians)",
213
+ "sec_t1": "Passwords with special characters: Database and MongoDB passwords containing @, :, / or # are supported; the app encodes them in connection URLs automatically.",
214
+ "sec_t2": "Protecting the Configuration page: The config file may contain credentials. If the API is exposed to untrusted users or networks, set api.require_api_key: true and use a strong API key so only authenticated requests can access /config.",
215
+ "sec_t3": "Deployment: Run behind a reverse proxy with TLS when the dashboard or API is exposed to the internet; use the optional API key and rate limiting as an extra layer.",
216
+ "sec_tail": "For a full list of security fixes, regression tests, and recommendations, see docs/SECURITY.md (EN) and docs/SECURITY.pt_BR.md (pt-BR).",
217
+ "more_h2": "More documentation",
218
+ "more_p1": "For full details, including optional connectors (Power BI, Dataverse, REST APIs, shares) and advanced tuning, see:",
219
+ "more_li1": "README.md and docs/USAGE.md (English) · docs/USAGE.pt_BR.md (Português – Brasil)",
220
+ "more_li2": "docs/SENSITIVITY_DETECTION.md (EN) · docs/SENSITIVITY_DETECTION.pt_BR.md (pt-BR) – ML/DL terms and config",
221
+ "more_li3": "docs/plans/completed/PLAN_SENSITIVE_CATEGORIES_ML_DL.md – sensitive categories (health, religion, political, etc.)",
222
+ "more_tail": "These files live in the project repository and should be kept in sync with the behaviour you see here."
223
+ },
224
+ "login": {
225
+ "page_title": "Sign in — Data Boar",
226
+ "h1": "Sign in",
227
+ "disabled": "WebAuthn is not enabled in this deployment. Set api.webauthn.enabled and the token secret environment variable before starting the process.",
228
+ "help_link": "Help & Docs",
229
+ "intro": "Use a passkey (WebAuthn) to access the dashboard after the first passkey is registered. If none exist yet, open this page and choose Register passkey.",
230
+ "button_placeholder": "…",
231
+ "hint_https": "WebAuthn requires HTTPS or localhost. Match api.webauthn.origin and rp_id to the browser URL."
232
+ },
233
+ "js": {
234
+ "chart_total": "Total findings",
235
+ "chart_risk": "Risk score (0–100)",
236
+ "chart_y_total": "Total findings",
237
+ "chart_y_risk": "Risk score",
238
+ "status_running": "Running",
239
+ "status_idle": "Idle",
240
+ "starting": "Starting…",
241
+ "started_prefix": "Started:",
242
+ "err_scan_progress": "Scan already in progress.",
243
+ "err_scan_progress_guide": "Wait for the current scan to finish, or restart the API if it is stuck.",
244
+ "err_rate": "Rate limited; try again shortly.",
245
+ "err_rate_guide": "Wait and try again, or adjust rate_limit.max_concurrent_scans and min_interval_seconds in config.",
246
+ "err_auth": "Not authorized ({code}).",
247
+ "err_auth_guide": "Check API key or auth configuration if the API is protected.",
248
+ "err_network_guide": "Request did not reach the server. Check network, CORS, or ad-blockers; ensure the API is running.",
249
+ "err_server_guide": "Server error. Check API logs and try again.",
250
+ "err_what": "What to do:",
251
+ "err_prefix": "Error:",
252
+ "retry_after": "Retry after {n} seconds, or adjust rate_limit in config."
253
+ }
254
+ }