borderlint 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
borderlint/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """borderlint — map and govern where your AI data and traffic flow."""
2
+
3
+ __version__ = "0.2.0"
borderlint/__main__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .cli import main
2
+
3
+ raise SystemExit(main())
borderlint/cli.py ADDED
@@ -0,0 +1,52 @@
1
+ """borderlint command-line interface."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import sys
7
+
8
+ from . import report
9
+ from .detect import scan
10
+ from .kb import load_kb
11
+ from .policy import Finding, evaluate, load_policy
12
+
13
+
14
+ def main(argv=None) -> int:
15
+ ap = argparse.ArgumentParser(prog="borderlint", description="Map and govern where your AI data flows.")
16
+ sub = ap.add_subparsers(dest="cmd")
17
+ s = sub.add_parser("scan", help="Scan a path for AI data flows and check a residency policy.")
18
+ s.add_argument("path", nargs="?", default=".")
19
+ s.add_argument("-p", "--policy", help="residency policy JSON (omit for inventory mode)")
20
+ s.add_argument("-c", "--classification", help="data class on the scanned path (required with --policy)")
21
+ s.add_argument("-f", "--format", choices=["text", "json", "mermaid"], default="text")
22
+ s.add_argument("--providers", help="custom provider knowledge base JSON")
23
+ a = ap.parse_args(argv)
24
+
25
+ if a.cmd != "scan":
26
+ ap.print_help()
27
+ return 0
28
+
29
+ kb = load_kb(a.providers)
30
+ detections = scan(a.path, kb)
31
+
32
+ policy = None
33
+ if a.policy:
34
+ if not a.classification:
35
+ print("error: --classification is required when --policy is given", file=sys.stderr)
36
+ return 2
37
+ policy = load_policy(a.policy)
38
+ try:
39
+ findings = evaluate(detections, policy, a.classification, kb)
40
+ except KeyError as e:
41
+ print(f"error: {e}", file=sys.stderr)
42
+ return 2
43
+ else:
44
+ findings = [Finding(d, "ok", []) for d in detections] # inventory mode
45
+
46
+ renderers = {"text": report.text, "json": report.as_json, "mermaid": report.mermaid}
47
+ print(renderers[a.format](findings, kb, policy))
48
+ return 1 if any(f.severity == "fail" for f in findings) else 0
49
+
50
+
51
+ if __name__ == "__main__":
52
+ raise SystemExit(main())
@@ -0,0 +1,23 @@
1
+ {
2
+ "providers": [
3
+ {"id": "openai", "name": "OpenAI", "sdks": ["openai"], "npm": ["openai", "@ai-sdk/openai"], "endpoints": ["api.openai.com"], "jurisdiction": "us"},
4
+ {"id": "anthropic", "name": "Anthropic", "sdks": ["anthropic"], "npm": ["@anthropic-ai/sdk", "@ai-sdk/anthropic"], "endpoints": ["api.anthropic.com"], "jurisdiction": "us"},
5
+ {"id": "google_gemini", "name": "Google Gemini", "sdks": ["google.generativeai", "google.genai"], "npm": ["@google/generative-ai", "@google/genai", "@ai-sdk/google", "@ai-sdk/google-vertex"], "endpoints": ["generativelanguage.googleapis.com"], "jurisdiction": "us"},
6
+ {"id": "azure_openai", "name": "Azure OpenAI", "sdks": [], "npm": ["@azure/openai", "@ai-sdk/azure"], "endpoints": ["openai.azure.com", "api.cognitive.microsoft.com", "inference.ai.azure.com"], "jurisdiction": "unknown", "region_scheme": "azure", "note": "regional hosts (<region>.api.cognitive.microsoft.com, *.<region>.inference.ai.azure.com) resolve; the standard openai.azure.com does not carry a region"},
7
+ {"id": "aws_bedrock", "name": "AWS Bedrock", "sdks": [], "npm": ["@aws-sdk/client-bedrock-runtime", "@ai-sdk/amazon-bedrock"], "endpoints": ["bedrock-runtime"], "jurisdiction": "unknown", "region_scheme": "aws", "note": "region is in the host: bedrock-runtime.<region>.amazonaws.com"},
8
+ {"id": "mistral", "name": "Mistral AI", "sdks": ["mistralai"], "npm": ["@mistralai/mistralai", "@ai-sdk/mistral"], "endpoints": ["api.mistral.ai"], "jurisdiction": "eu"},
9
+ {"id": "cohere", "name": "Cohere", "sdks": ["cohere"], "npm": ["cohere-ai", "@ai-sdk/cohere"], "endpoints": ["api.cohere.com", "api.cohere.ai"], "jurisdiction": "us"},
10
+ {"id": "deepseek", "name": "DeepSeek", "sdks": [], "npm": ["@ai-sdk/deepseek"], "endpoints": ["api.deepseek.com"], "jurisdiction": "cn"},
11
+ {"id": "tencent_hunyuan", "name": "Tencent Hunyuan", "sdks": ["tencentcloud"], "npm": [], "endpoints": ["hunyuan.tencentcloudapi.com"], "jurisdiction": "cn"},
12
+ {"id": "alibaba_dashscope", "name": "Alibaba DashScope", "sdks": ["dashscope"], "npm": [], "endpoints": ["dashscope-intl.aliyuncs.com", "dashscope.aliyuncs.com"], "jurisdiction": "cn", "endpoint_jurisdictions": {"dashscope-intl.aliyuncs.com": "sg"}},
13
+ {"id": "moonshot", "name": "Moonshot (Kimi)", "sdks": [], "npm": [], "endpoints": ["api.moonshot.cn"], "jurisdiction": "cn"},
14
+ {"id": "zhipu", "name": "Zhipu GLM", "sdks": ["zhipuai"], "npm": [], "endpoints": ["open.bigmodel.cn"], "jurisdiction": "cn"},
15
+ {"id": "baidu_ernie", "name": "Baidu ERNIE", "sdks": [], "npm": [], "endpoints": ["aip.baidubce.com"], "jurisdiction": "cn"},
16
+
17
+ {"id": "litellm", "name": "LiteLLM (router)", "kind": "aggregator", "sdks": ["litellm"], "npm": [], "endpoints": [], "jurisdiction": "unknown", "note": "multi-provider router; destination chosen at runtime"},
18
+ {"id": "langchain", "name": "LangChain (router)", "kind": "aggregator", "sdks": ["langchain", "langchain_openai", "langchain_anthropic", "langchain_community", "langchain_google_genai", "langchain_aws", "langchain_mistralai"], "npm": ["langchain", "@langchain/openai", "@langchain/anthropic", "@langchain/community", "@langchain/google-genai", "@langchain/aws", "@langchain/mistralai"], "endpoints": [], "jurisdiction": "unknown", "note": "multi-provider router; destination chosen at runtime"},
19
+ {"id": "llama_index", "name": "LlamaIndex (router)", "kind": "aggregator", "sdks": ["llama_index"], "npm": ["llamaindex"], "endpoints": [], "jurisdiction": "unknown", "note": "multi-provider router; destination chosen at runtime"},
20
+ {"id": "aisuite", "name": "aisuite (router)", "kind": "aggregator", "sdks": ["aisuite"], "npm": ["aisuite"], "endpoints": [], "jurisdiction": "unknown", "note": "multi-provider router; destination chosen at runtime"},
21
+ {"id": "vercel_ai", "name": "Vercel AI SDK (router)", "kind": "aggregator", "sdks": [], "npm": ["ai"], "endpoints": [], "jurisdiction": "unknown", "note": "provider-agnostic core; the @ai-sdk/<provider> adapter package determines the provider"}
22
+ ]
23
+ }
borderlint/detect.py ADDED
@@ -0,0 +1,105 @@
1
+ """Scan a path for AI provider usage (SDK imports + endpoint references)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ import re
7
+ import warnings
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+
11
+ IGNORE = {".git", "node_modules", "__pycache__", ".venv", "venv", "build", "dist",
12
+ ".mypy_cache", ".pytest_cache", ".tox", ".ruff_cache"}
13
+ TEXT_EXT = {".env", ".ts", ".tsx", ".js", ".jsx", ".yaml", ".yml", ".toml", ".json", ".ini", ".cfg", ".sh"}
14
+ JS_EXT = {".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"}
15
+
16
+ # Capture the module specifier from: `import X from "pkg"`, `import "pkg"`, `export ... from "pkg"`,
17
+ # `require("pkg")`, dynamic `import("pkg")`. Regex over tree-sitter keeps borderlint zero-dependency.
18
+ _JS_IMPORT = re.compile(
19
+ r'''(?:^[ \t]*import\b[^'"\n]*?\bfrom[ \t]*|^[ \t]*import[ \t]*|^[ \t]*export\b[^'"\n]*?\bfrom[ \t]*|\brequire[ \t]*\([ \t]*|\bimport[ \t]*\([ \t]*)['"]([^'"]+)['"]''',
20
+ re.M)
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class Detection:
25
+ provider_id: str
26
+ kind: str # "sdk_import" | "endpoint_reference"
27
+ evidence: str
28
+ file: str
29
+ line: int
30
+ jurisdiction: str
31
+
32
+
33
+ def _scan_py(path: str, src: str, kb) -> list[Detection]:
34
+ out: list[Detection] = []
35
+ try:
36
+ with warnings.catch_warnings(): # ponytail: hush the scanned file's own warnings, not ours
37
+ warnings.simplefilter("ignore")
38
+ tree = ast.parse(src)
39
+ except SyntaxError:
40
+ return out # resilient: skip unparseable files
41
+ for n in ast.walk(tree):
42
+ if isinstance(n, ast.Import):
43
+ for a in n.names:
44
+ pid = kb.match_sdk(a.name)
45
+ if pid:
46
+ out.append(Detection(pid, "sdk_import", a.name, path, n.lineno, kb.default_jurisdiction(pid)))
47
+ elif isinstance(n, ast.ImportFrom):
48
+ pid = kb.match_sdk(n.module or "")
49
+ if pid:
50
+ out.append(Detection(pid, "sdk_import", n.module, path, n.lineno, kb.default_jurisdiction(pid)))
51
+ elif isinstance(n, ast.Constant) and isinstance(n.value, str):
52
+ m = kb.match_endpoint(n.value)
53
+ if m:
54
+ out.append(Detection(m[0], "endpoint_reference", m[1], path, n.lineno, m[2]))
55
+ return out
56
+
57
+
58
+ def _scan_text(path: str, src: str, kb) -> list[Detection]:
59
+ out: list[Detection] = []
60
+ for i, line in enumerate(src.splitlines(), 1):
61
+ m = kb.match_endpoint(line)
62
+ if m:
63
+ out.append(Detection(m[0], "endpoint_reference", m[1], path, i, m[2]))
64
+ return out
65
+
66
+
67
+ def _scan_js(path: str, src: str, kb) -> list[Detection]:
68
+ out: list[Detection] = []
69
+ for m in _JS_IMPORT.finditer(src):
70
+ pid = kb.match_npm(m.group(1))
71
+ if pid:
72
+ line = src.count("\n", 0, m.start()) + 1
73
+ out.append(Detection(pid, "sdk_import", m.group(1), path, line, kb.default_jurisdiction(pid)))
74
+ return out
75
+
76
+
77
+ def scan(root, kb) -> list[Detection]:
78
+ root = Path(root)
79
+ paths = [root] if root.is_file() else [p for p in root.rglob("*") if p.is_file()]
80
+ seen, out = set(), []
81
+ for p in paths:
82
+ if any(part in IGNORE for part in p.parts):
83
+ continue
84
+ suffix = p.suffix
85
+ is_py = suffix == ".py"
86
+ is_js = suffix in JS_EXT
87
+ is_text = suffix in TEXT_EXT or p.name == ".env"
88
+ if not (is_py or is_js or is_text):
89
+ continue
90
+ try:
91
+ src = p.read_text("utf-8", errors="ignore")
92
+ except OSError:
93
+ continue
94
+ if is_py:
95
+ dets = _scan_py(str(p), src, kb)
96
+ elif is_js: # imports (new) + endpoint literals (existing text scan)
97
+ dets = _scan_js(str(p), src, kb) + _scan_text(str(p), src, kb)
98
+ else:
99
+ dets = _scan_text(str(p), src, kb)
100
+ for d in dets:
101
+ key = (d.provider_id, d.kind, d.evidence, d.file, d.line)
102
+ if key not in seen:
103
+ seen.add(key)
104
+ out.append(d)
105
+ return out
borderlint/kb.py ADDED
@@ -0,0 +1,105 @@
1
+ """Provider knowledge base: load and resolve a provider/endpoint to a jurisdiction."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from importlib.resources import files
8
+
9
+ # Region-coded endpoints (the host carries the region) → ccTLD jurisdiction.
10
+ _AWS_RE = re.compile(r"\b([a-z]{2}(?:-gov)?-[a-z]+-\d)\b")
11
+ _AWS_REGION = {
12
+ "ap-east-1": "hk", "ap-east-2": "hk", "cn-north-1": "cn", "cn-northwest-1": "cn",
13
+ "ap-southeast-1": "sg", "ap-southeast-2": "au", "ap-southeast-3": "id",
14
+ "ap-southeast-5": "my", "ap-southeast-7": "th", "ap-south-1": "in", "ap-south-2": "in",
15
+ "ap-northeast-1": "jp", "ap-northeast-2": "kr", "ap-northeast-3": "jp",
16
+ "eu-west-1": "ie", "eu-west-2": "gb", "eu-west-3": "fr", "eu-central-1": "de",
17
+ "eu-central-2": "ch", "eu-north-1": "se", "eu-south-1": "it", "eu-south-2": "es",
18
+ "me-south-1": "bh", "me-central-1": "ae", "af-south-1": "za", "il-central-1": "il",
19
+ }
20
+ _AZURE_RE = re.compile(
21
+ r"\b(eastus2?|westus[123]?|centralus|southcentralus|northcentralus|canadacentral|canadaeast|"
22
+ r"brazilsouth|northeurope|westeurope|uksouth|ukwest|francecentral|germanywestcentral|"
23
+ r"switzerlandnorth|swedencentral|norwayeast|polandcentral|italynorth|spaincentral|eastasia|"
24
+ r"southeastasia|japaneast|japanwest|koreacentral|australiaeast|australiasoutheast|centralindia|"
25
+ r"southindia|uaenorth|qatarcentral|southafricanorth|israelcentral|chinaeast2?|chinanorth[23]?)\b")
26
+ _AZURE_REGION = {
27
+ "eastus": "us", "eastus2": "us", "westus": "us", "westus2": "us", "westus3": "us",
28
+ "centralus": "us", "southcentralus": "us", "northcentralus": "us", "canadacentral": "ca",
29
+ "canadaeast": "ca", "brazilsouth": "br", "northeurope": "ie", "westeurope": "nl",
30
+ "uksouth": "gb", "ukwest": "gb", "francecentral": "fr", "germanywestcentral": "de",
31
+ "switzerlandnorth": "ch", "swedencentral": "se", "norwayeast": "no", "polandcentral": "pl",
32
+ "italynorth": "it", "spaincentral": "es", "eastasia": "hk", "southeastasia": "sg",
33
+ "japaneast": "jp", "japanwest": "jp", "koreacentral": "kr", "australiaeast": "au",
34
+ "australiasoutheast": "au", "centralindia": "in", "southindia": "in", "uaenorth": "ae",
35
+ "qatarcentral": "qa", "southafricanorth": "za", "israelcentral": "il",
36
+ "chinaeast": "cn", "chinaeast2": "cn", "chinanorth": "cn", "chinanorth2": "cn", "chinanorth3": "cn",
37
+ }
38
+
39
+
40
+ def _region_jurisdiction(text: str, scheme: str):
41
+ if scheme == "aws":
42
+ m = _AWS_RE.search(text)
43
+ if not m:
44
+ return None
45
+ r = m.group(1)
46
+ return _AWS_REGION.get(r) or {"us": "us", "ca": "ca", "sa": "br", "cn": "cn"}.get(r.split("-")[0])
47
+ if scheme == "azure":
48
+ m = _AZURE_RE.search(text)
49
+ return _AZURE_REGION.get(m.group(1)) if m else None
50
+ return None
51
+
52
+
53
+ def load_kb(path: str | None = None) -> "KB":
54
+ if path:
55
+ with open(path, encoding="utf-8") as fh:
56
+ data = json.load(fh)
57
+ else:
58
+ data = json.loads(files("borderlint").joinpath("data/providers.json").read_text("utf-8"))
59
+ return KB(data.get("providers", []))
60
+
61
+
62
+ class KB:
63
+ def __init__(self, providers: list[dict]):
64
+ self.by_id = {p["id"]: p for p in providers}
65
+ sdks, npm, eps = [], [], []
66
+ for p in providers:
67
+ for s in p.get("sdks", []):
68
+ sdks.append((s, p["id"]))
69
+ for n in p.get("npm", []):
70
+ npm.append((n, p["id"]))
71
+ ej = p.get("endpoint_jurisdictions", {})
72
+ for h in p.get("endpoints", []):
73
+ eps.append((h, p["id"], ej.get(h, p.get("jurisdiction", "unknown"))))
74
+ # Longest match first so specific SDKs/hosts win over shorter ones.
75
+ self._sdks = sorted(sdks, key=lambda x: -len(x[0]))
76
+ self._npm = sorted(npm, key=lambda x: -len(x[0]))
77
+ self._eps = sorted(eps, key=lambda x: -len(x[0]))
78
+ self.region_scheme = {p["id"]: p["region_scheme"] for p in providers if p.get("region_scheme")}
79
+
80
+ def name(self, pid: str) -> str:
81
+ return self.by_id.get(pid, {}).get("name", pid)
82
+
83
+ def default_jurisdiction(self, pid: str) -> str:
84
+ return self.by_id.get(pid, {}).get("jurisdiction", "unknown")
85
+
86
+ def match_sdk(self, module: str) -> str | None:
87
+ for s, pid in self._sdks:
88
+ if module == s or module.startswith(s + "."):
89
+ return pid
90
+ return None
91
+
92
+ def match_npm(self, pkg: str) -> str | None:
93
+ for name, pid in self._npm:
94
+ if pkg == name or pkg.startswith(name + "/"):
95
+ return pid
96
+ return None
97
+
98
+ def match_endpoint(self, text: str):
99
+ for h, pid, juris in self._eps:
100
+ if h in text:
101
+ scheme = self.region_scheme.get(pid)
102
+ if scheme:
103
+ juris = _region_jurisdiction(text, scheme) or juris
104
+ return pid, h, juris
105
+ return None
borderlint/policy.py ADDED
@@ -0,0 +1,61 @@
1
+ """Load a residency policy and evaluate detections (deny-by-default)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import dataclass, field
7
+
8
+
9
+ @dataclass
10
+ class Finding:
11
+ detection: object
12
+ severity: str # "ok" | "warn" | "fail"
13
+ reasons: list = field(default_factory=list)
14
+
15
+
16
+ def load_policy(path: str) -> dict:
17
+ with open(path, encoding="utf-8") as fh:
18
+ data = json.load(fh)
19
+ # Shorthand: a bare {classification: [jurisdictions]} map is the classifications block.
20
+ if "classifications" not in data and data and all(isinstance(v, list) for v in data.values()):
21
+ data = {"classifications": data}
22
+ return data
23
+
24
+
25
+ def _allowed(allow: list[str]) -> set[str]:
26
+ s = set(allow)
27
+ if "GBA" in s: # GBA alias = hk + the nine Mainland GBA cities
28
+ s.update({"hk", "CN-GBA"})
29
+ return s
30
+
31
+
32
+ def evaluate(detections, policy: dict, classification: str, kb=None) -> list[Finding]:
33
+ classes = policy.get("classifications", {})
34
+ if classification not in classes:
35
+ raise KeyError(f"classification '{classification}' not defined in policy")
36
+ allow = _allowed(classes[classification])
37
+ deny = set(policy.get("providers", {}).get("deny", []))
38
+ prov_allow = set(policy.get("providers", {}).get("allow", []))
39
+ on_unknown = policy.get("on_unknown", "warn")
40
+ fail_on = set(policy.get("fail_on", ["residency", "denied_provider"]))
41
+
42
+ findings = []
43
+ for d in detections:
44
+ reasons = []
45
+ if d.provider_id in deny or (prov_allow and d.provider_id not in prov_allow):
46
+ reasons.append("denied_provider")
47
+ if d.jurisdiction == "unknown":
48
+ reasons.append("unknown")
49
+ elif d.jurisdiction not in allow:
50
+ reasons.append("residency")
51
+ findings.append(Finding(d, _severity(reasons, fail_on, on_unknown), reasons))
52
+ return findings
53
+
54
+
55
+ def _severity(reasons: list[str], fail_on: set[str], on_unknown: str) -> str:
56
+ if not reasons:
57
+ return "ok"
58
+ fail = (("denied_provider" in reasons and "denied_provider" in fail_on)
59
+ or ("residency" in reasons and "residency" in fail_on)
60
+ or ("unknown" in reasons and on_unknown == "fail"))
61
+ return "fail" if fail else "warn"
borderlint/report.py ADDED
@@ -0,0 +1,79 @@
1
+ """Render findings as text, JSON, or a Mermaid flow map (grouped by jurisdiction)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+
7
+ JURIS = {"us": "United States", "eu": "European Union", "cn": "Mainland China", "hk": "Hong Kong",
8
+ "sg": "Singapore", "gb": "United Kingdom", "mo": "Macao", "my": "Malaysia",
9
+ "CN-GBA": "Mainland GBA", "GBA": "Greater Bay Area", "unknown": "Unknown (region-dependent)"}
10
+ REASON = {"denied_provider": "provider denied by policy",
11
+ "residency": "jurisdiction outside the allow-list for this data class",
12
+ "unknown": "jurisdiction could not be determined"}
13
+ GBA_REF = ("GBA Standard Contract — https://www.digitalpolicy.gov.hk/en/our_work/"
14
+ "digital_infrastructure/mainland/gbacbdf/cross-boundary_data_flow/index.html")
15
+ _RANK = {"ok": 0, "warn": 1, "fail": 2}
16
+
17
+
18
+ def juris(j: str) -> str:
19
+ return JURIS.get(j, j)
20
+
21
+
22
+ def _arrangements(findings, policy) -> list[str]:
23
+ regime = (policy or {}).get("home_regime")
24
+ flagged_china = any(f.severity != "ok" and f.detection.jurisdiction in ("cn", "CN-GBA") for f in findings)
25
+ if regime in ("pdpo", "pipl") and flagged_china:
26
+ return [f"Reference ({regime}): {GBA_REF}"]
27
+ return []
28
+
29
+
30
+ def text(findings, kb, policy=None) -> str:
31
+ if not findings:
32
+ return "borderlint: no AI provider usage detected."
33
+ lines = ["borderlint — AI data-flow & residency report", "=" * 46]
34
+ by = {}
35
+ for f in findings:
36
+ by.setdefault(f.detection.provider_id, []).append(f)
37
+ for pid in sorted(by):
38
+ fs = by[pid]
39
+ worst = max((f.severity for f in fs), key=lambda s: _RANK[s])
40
+ mark = {"ok": " OK ", "warn": "WARN", "fail": "FAIL"}[worst]
41
+ js = ", ".join(juris(x) for x in sorted({f.detection.jurisdiction for f in fs}))
42
+ lines.append(f"[{mark}] {kb.name(pid)} -> {js}")
43
+ for f in fs:
44
+ d = f.detection
45
+ lines.append(f" {d.file}:{d.line} ({d.kind}: {d.evidence})")
46
+ for r in f.reasons:
47
+ lines.append(f" ! {REASON.get(r, r)}")
48
+ fails = sum(f.severity == "fail" for f in findings)
49
+ warns = sum(f.severity == "warn" for f in findings)
50
+ lines.append("")
51
+ lines += _arrangements(findings, policy)
52
+ lines.append(f"Summary: {fails} fail, {warns} warn, {len(findings) - fails - warns} ok")
53
+ return "\n".join(lines)
54
+
55
+
56
+ def as_json(findings, kb, policy=None) -> str:
57
+ return json.dumps({
58
+ "findings": [{"provider": f.detection.provider_id, "name": kb.name(f.detection.provider_id),
59
+ "jurisdiction": f.detection.jurisdiction, "severity": f.severity, "reasons": f.reasons,
60
+ "kind": f.detection.kind, "evidence": f.detection.evidence,
61
+ "file": f.detection.file, "line": f.detection.line} for f in findings],
62
+ "references": _arrangements(findings, policy),
63
+ }, indent=2)
64
+
65
+
66
+ def mermaid(findings, kb, policy=None) -> str:
67
+ by_j = {}
68
+ for f in findings:
69
+ by_j.setdefault(f.detection.jurisdiction, set()).add(f.detection.provider_id)
70
+ lines = ["flowchart LR", " app([Your application])"]
71
+ for j, pids in by_j.items():
72
+ jid = "j_" + j.replace("-", "_")
73
+ lines.append(f" subgraph {jid}[{juris(j)}]")
74
+ for pid in sorted(pids):
75
+ lines.append(f" {pid}[{kb.name(pid)}]")
76
+ lines.append(" end")
77
+ for pid in sorted(pids):
78
+ lines.append(f" app --> {pid}")
79
+ return "\n".join(lines)
@@ -0,0 +1,87 @@
1
+ Metadata-Version: 2.4
2
+ Name: borderlint
3
+ Version: 0.2.0
4
+ Summary: Map and govern where your AI data and traffic flow — east-west / APAC lens.
5
+ Author: Iolaire McKinnon
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Keywords: ai,compliance,data-residency,governance,llm,sovereignty
9
+ Requires-Python: >=3.10
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest>=7; extra == 'dev'
12
+ Description-Content-Type: text/markdown
13
+
14
+ # borderlint
15
+
16
+ **Map and govern where your AI data and traffic flow — east-west / APAC lens.**
17
+
18
+ A static, in-CI check for **HK / GBA entities**: does your AI data stay within the jurisdictions
19
+ your PDPO / PIPL policy allows? borderlint statically scans your repo (**Python and
20
+ TypeScript/JavaScript**) for AI provider usage, resolves each flow to a jurisdiction (ccTLD codes
21
+ plus the `CN-GBA` / `GBA` tokens), and fails the build on any flow outside the allow-list for the
22
+ data class you declare. Western and Chinese providers are treated evenly. **Zero runtime dependencies.**
23
+
24
+ ## Use
25
+
26
+ ```bash
27
+ python -m borderlint scan ./service --policy residency.json --classification customer-pii
28
+ ```
29
+
30
+ - No `--policy` → **inventory mode** (lists flows + jurisdictions, exits 0).
31
+ - `--format json|mermaid` for machine output or a flow map.
32
+ - Exit code is non-zero on a violation, so it gates CI.
33
+
34
+ ## Policy (the eval-set)
35
+
36
+ `residency.json` maps each data class to the jurisdictions you accept:
37
+
38
+ ```json
39
+ {
40
+ "home_regime": "pdpo",
41
+ "classifications": {
42
+ "customer-pii": ["hk", "CN-GBA", "sg"],
43
+ "employee-pii": ["hk", "CN-GBA"],
44
+ "non-pii": ["hk", "CN-GBA", "cn", "mo", "sg", "us", "gb"]
45
+ }
46
+ }
47
+ ```
48
+
49
+ **Deny-by-default**: a flow to any code not on the list for the declared class fails — so `sg` is
50
+ allowed but `my` is not, matching a PDPO agreed-locations EULA. `GBA` is shorthand for `hk` +
51
+ `CN-GBA`. Cross-border arrangements (e.g. the GBA Standard Contract) are surfaced as reference
52
+ links, never adjudicated.
53
+
54
+ ## Capabilities
55
+
56
+ - **Languages:** Python (AST) and TypeScript/JavaScript (`import` / `require` / dynamic `import()`),
57
+ plus endpoint references in config/text files.
58
+ - **Providers:** 13+ across the east-west boundary (OpenAI, Anthropic, Google, Azure, Bedrock,
59
+ Mistral, Cohere + Tencent, Alibaba, DeepSeek, Moonshot, Zhipu, Baidu), with Python and JS/TS
60
+ package names and the **Vercel AI SDK** (`@ai-sdk/*`).
61
+ - **Aggregators:** litellm, langchain, LlamaIndex, aisuite, Vercel AI core (`ai`) → `unknown`
62
+ (runtime-routed), so `on_unknown: fail` blocks them for sensitive classes.
63
+ - **Jurisdictions:** ccTLD/ISO codes + `CN-GBA` / `GBA`; **AWS/Azure region resolved from the
64
+ endpoint host** where present (e.g. `bedrock-runtime.ap-east-1…` → `hk`).
65
+ - **Policy:** classification-keyed JSON eval-set, deny-by-default, provider allow/deny, configurable
66
+ failure set, declared home regime.
67
+ - **Output & CI:** text / JSON / Mermaid, exit codes, GitHub Action + Jenkins.
68
+
69
+ ## Scope
70
+
71
+ For HK / GBA home bases under PDPO / PIPL / GBA. Not yet: SARIF output, container/SCA mode, LLM
72
+ enrichment, and dynamic / `base_url` endpoint resolution. Full roadmap in `CAPABILITIES.md`.
73
+
74
+ ## CI
75
+
76
+ Same command in any pipeline. GitHub Actions (composite action):
77
+
78
+ ```yaml
79
+ - uses: iolairus/borderlint@v0.2.0
80
+ with: { path: ., policy: residency.json, classification: customer-pii }
81
+ ```
82
+
83
+ Jenkins / anything else: `pip install borderlint && borderlint scan . --policy residency.json --classification customer-pii` — a non-zero exit fails the stage. Full examples in `examples/ci/`.
84
+
85
+ ## License
86
+
87
+ MIT © 2026 Iolaire McKinnon. Vendor-neutral by design.
@@ -0,0 +1,13 @@
1
+ borderlint/__init__.py,sha256=ZYDwkT4abi5MhAa6LCVYCF2BQ4x56ye41e92S2EHluA,96
2
+ borderlint/__main__.py,sha256=k1ocEWawweo1qCJWNFAAvyxz3tcY13dzvCenHszij30,48
3
+ borderlint/cli.py,sha256=ngdFutjBo_g5tYp1t9veSLOBGegzcSUmv89mx4FWmGw,1856
4
+ borderlint/detect.py,sha256=_YDxFeTwk7aci80HMObUKROnkpKxGhEzxJZq6uJiG7g,3943
5
+ borderlint/kb.py,sha256=rMy2rL-5w0dbGZTu4qhrT6fh7evtacIvYxJ4czA_1CY,4865
6
+ borderlint/policy.py,sha256=KFiXxmhe2gD0wx6P_lbNgIcVFnEeNQKM5-t-QtrPXjQ,2230
7
+ borderlint/report.py,sha256=gwLZRMQBbqos2PfBvd-MURtYAtDfsbvTHOhal5C61eA,3491
8
+ borderlint/data/providers.json,sha256=A1JL9nvP5jux2UPvmwltevU2--13wVroCFLmexvCi28,4050
9
+ borderlint-0.2.0.dist-info/METADATA,sha256=6jV0tJrnI1Fj0xhXYgGimdF_uXsZeP0myBZOlM-xBA8,3561
10
+ borderlint-0.2.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
11
+ borderlint-0.2.0.dist-info/entry_points.txt,sha256=MPBR-FwC2fgMOXIcPZZ8dkYu30p7jRzzJDdwnxskRqE,51
12
+ borderlint-0.2.0.dist-info/licenses/LICENSE,sha256=feLNgoCutHpNXMNV5ZNI3KymqMNJ5XC3DJB_YOry6Dw,1073
13
+ borderlint-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ borderlint = borderlint.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Iolaire McKinnon
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.