borderlint 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- borderlint/__init__.py +3 -0
- borderlint/__main__.py +3 -0
- borderlint/cli.py +52 -0
- borderlint/data/providers.json +23 -0
- borderlint/detect.py +105 -0
- borderlint/kb.py +105 -0
- borderlint/policy.py +61 -0
- borderlint/report.py +79 -0
- borderlint-0.2.0.dist-info/METADATA +87 -0
- borderlint-0.2.0.dist-info/RECORD +13 -0
- borderlint-0.2.0.dist-info/WHEEL +4 -0
- borderlint-0.2.0.dist-info/entry_points.txt +2 -0
- borderlint-0.2.0.dist-info/licenses/LICENSE +21 -0
borderlint/__init__.py
ADDED
borderlint/__main__.py
ADDED
borderlint/cli.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""borderlint command-line interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from . import report
|
|
9
|
+
from .detect import scan
|
|
10
|
+
from .kb import load_kb
|
|
11
|
+
from .policy import Finding, evaluate, load_policy
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def main(argv=None) -> int:
|
|
15
|
+
ap = argparse.ArgumentParser(prog="borderlint", description="Map and govern where your AI data flows.")
|
|
16
|
+
sub = ap.add_subparsers(dest="cmd")
|
|
17
|
+
s = sub.add_parser("scan", help="Scan a path for AI data flows and check a residency policy.")
|
|
18
|
+
s.add_argument("path", nargs="?", default=".")
|
|
19
|
+
s.add_argument("-p", "--policy", help="residency policy JSON (omit for inventory mode)")
|
|
20
|
+
s.add_argument("-c", "--classification", help="data class on the scanned path (required with --policy)")
|
|
21
|
+
s.add_argument("-f", "--format", choices=["text", "json", "mermaid"], default="text")
|
|
22
|
+
s.add_argument("--providers", help="custom provider knowledge base JSON")
|
|
23
|
+
a = ap.parse_args(argv)
|
|
24
|
+
|
|
25
|
+
if a.cmd != "scan":
|
|
26
|
+
ap.print_help()
|
|
27
|
+
return 0
|
|
28
|
+
|
|
29
|
+
kb = load_kb(a.providers)
|
|
30
|
+
detections = scan(a.path, kb)
|
|
31
|
+
|
|
32
|
+
policy = None
|
|
33
|
+
if a.policy:
|
|
34
|
+
if not a.classification:
|
|
35
|
+
print("error: --classification is required when --policy is given", file=sys.stderr)
|
|
36
|
+
return 2
|
|
37
|
+
policy = load_policy(a.policy)
|
|
38
|
+
try:
|
|
39
|
+
findings = evaluate(detections, policy, a.classification, kb)
|
|
40
|
+
except KeyError as e:
|
|
41
|
+
print(f"error: {e}", file=sys.stderr)
|
|
42
|
+
return 2
|
|
43
|
+
else:
|
|
44
|
+
findings = [Finding(d, "ok", []) for d in detections] # inventory mode
|
|
45
|
+
|
|
46
|
+
renderers = {"text": report.text, "json": report.as_json, "mermaid": report.mermaid}
|
|
47
|
+
print(renderers[a.format](findings, kb, policy))
|
|
48
|
+
return 1 if any(f.severity == "fail" for f in findings) else 0
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
if __name__ == "__main__":
|
|
52
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"providers": [
|
|
3
|
+
{"id": "openai", "name": "OpenAI", "sdks": ["openai"], "npm": ["openai", "@ai-sdk/openai"], "endpoints": ["api.openai.com"], "jurisdiction": "us"},
|
|
4
|
+
{"id": "anthropic", "name": "Anthropic", "sdks": ["anthropic"], "npm": ["@anthropic-ai/sdk", "@ai-sdk/anthropic"], "endpoints": ["api.anthropic.com"], "jurisdiction": "us"},
|
|
5
|
+
{"id": "google_gemini", "name": "Google Gemini", "sdks": ["google.generativeai", "google.genai"], "npm": ["@google/generative-ai", "@google/genai", "@ai-sdk/google", "@ai-sdk/google-vertex"], "endpoints": ["generativelanguage.googleapis.com"], "jurisdiction": "us"},
|
|
6
|
+
{"id": "azure_openai", "name": "Azure OpenAI", "sdks": [], "npm": ["@azure/openai", "@ai-sdk/azure"], "endpoints": ["openai.azure.com", "api.cognitive.microsoft.com", "inference.ai.azure.com"], "jurisdiction": "unknown", "region_scheme": "azure", "note": "regional hosts (<region>.api.cognitive.microsoft.com, *.<region>.inference.ai.azure.com) resolve; the standard openai.azure.com does not carry a region"},
|
|
7
|
+
{"id": "aws_bedrock", "name": "AWS Bedrock", "sdks": [], "npm": ["@aws-sdk/client-bedrock-runtime", "@ai-sdk/amazon-bedrock"], "endpoints": ["bedrock-runtime"], "jurisdiction": "unknown", "region_scheme": "aws", "note": "region is in the host: bedrock-runtime.<region>.amazonaws.com"},
|
|
8
|
+
{"id": "mistral", "name": "Mistral AI", "sdks": ["mistralai"], "npm": ["@mistralai/mistralai", "@ai-sdk/mistral"], "endpoints": ["api.mistral.ai"], "jurisdiction": "eu"},
|
|
9
|
+
{"id": "cohere", "name": "Cohere", "sdks": ["cohere"], "npm": ["cohere-ai", "@ai-sdk/cohere"], "endpoints": ["api.cohere.com", "api.cohere.ai"], "jurisdiction": "us"},
|
|
10
|
+
{"id": "deepseek", "name": "DeepSeek", "sdks": [], "npm": ["@ai-sdk/deepseek"], "endpoints": ["api.deepseek.com"], "jurisdiction": "cn"},
|
|
11
|
+
{"id": "tencent_hunyuan", "name": "Tencent Hunyuan", "sdks": ["tencentcloud"], "npm": [], "endpoints": ["hunyuan.tencentcloudapi.com"], "jurisdiction": "cn"},
|
|
12
|
+
{"id": "alibaba_dashscope", "name": "Alibaba DashScope", "sdks": ["dashscope"], "npm": [], "endpoints": ["dashscope-intl.aliyuncs.com", "dashscope.aliyuncs.com"], "jurisdiction": "cn", "endpoint_jurisdictions": {"dashscope-intl.aliyuncs.com": "sg"}},
|
|
13
|
+
{"id": "moonshot", "name": "Moonshot (Kimi)", "sdks": [], "npm": [], "endpoints": ["api.moonshot.cn"], "jurisdiction": "cn"},
|
|
14
|
+
{"id": "zhipu", "name": "Zhipu GLM", "sdks": ["zhipuai"], "npm": [], "endpoints": ["open.bigmodel.cn"], "jurisdiction": "cn"},
|
|
15
|
+
{"id": "baidu_ernie", "name": "Baidu ERNIE", "sdks": [], "npm": [], "endpoints": ["aip.baidubce.com"], "jurisdiction": "cn"},
|
|
16
|
+
|
|
17
|
+
{"id": "litellm", "name": "LiteLLM (router)", "kind": "aggregator", "sdks": ["litellm"], "npm": [], "endpoints": [], "jurisdiction": "unknown", "note": "multi-provider router; destination chosen at runtime"},
|
|
18
|
+
{"id": "langchain", "name": "LangChain (router)", "kind": "aggregator", "sdks": ["langchain", "langchain_openai", "langchain_anthropic", "langchain_community", "langchain_google_genai", "langchain_aws", "langchain_mistralai"], "npm": ["langchain", "@langchain/openai", "@langchain/anthropic", "@langchain/community", "@langchain/google-genai", "@langchain/aws", "@langchain/mistralai"], "endpoints": [], "jurisdiction": "unknown", "note": "multi-provider router; destination chosen at runtime"},
|
|
19
|
+
{"id": "llama_index", "name": "LlamaIndex (router)", "kind": "aggregator", "sdks": ["llama_index"], "npm": ["llamaindex"], "endpoints": [], "jurisdiction": "unknown", "note": "multi-provider router; destination chosen at runtime"},
|
|
20
|
+
{"id": "aisuite", "name": "aisuite (router)", "kind": "aggregator", "sdks": ["aisuite"], "npm": ["aisuite"], "endpoints": [], "jurisdiction": "unknown", "note": "multi-provider router; destination chosen at runtime"},
|
|
21
|
+
{"id": "vercel_ai", "name": "Vercel AI SDK (router)", "kind": "aggregator", "sdks": [], "npm": ["ai"], "endpoints": [], "jurisdiction": "unknown", "note": "provider-agnostic core; the @ai-sdk/<provider> adapter package determines the provider"}
|
|
22
|
+
]
|
|
23
|
+
}
|
borderlint/detect.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Scan a path for AI provider usage (SDK imports + endpoint references)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import ast
|
|
6
|
+
import re
|
|
7
|
+
import warnings
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
IGNORE = {".git", "node_modules", "__pycache__", ".venv", "venv", "build", "dist",
|
|
12
|
+
".mypy_cache", ".pytest_cache", ".tox", ".ruff_cache"}
|
|
13
|
+
TEXT_EXT = {".env", ".ts", ".tsx", ".js", ".jsx", ".yaml", ".yml", ".toml", ".json", ".ini", ".cfg", ".sh"}
|
|
14
|
+
JS_EXT = {".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"}
|
|
15
|
+
|
|
16
|
+
# Capture the module specifier from: `import X from "pkg"`, `import "pkg"`, `export ... from "pkg"`,
|
|
17
|
+
# `require("pkg")`, dynamic `import("pkg")`. Regex over tree-sitter keeps borderlint zero-dependency.
|
|
18
|
+
_JS_IMPORT = re.compile(
|
|
19
|
+
r'''(?:^[ \t]*import\b[^'"\n]*?\bfrom[ \t]*|^[ \t]*import[ \t]*|^[ \t]*export\b[^'"\n]*?\bfrom[ \t]*|\brequire[ \t]*\([ \t]*|\bimport[ \t]*\([ \t]*)['"]([^'"]+)['"]''',
|
|
20
|
+
re.M)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass(frozen=True)
|
|
24
|
+
class Detection:
|
|
25
|
+
provider_id: str
|
|
26
|
+
kind: str # "sdk_import" | "endpoint_reference"
|
|
27
|
+
evidence: str
|
|
28
|
+
file: str
|
|
29
|
+
line: int
|
|
30
|
+
jurisdiction: str
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _scan_py(path: str, src: str, kb) -> list[Detection]:
|
|
34
|
+
out: list[Detection] = []
|
|
35
|
+
try:
|
|
36
|
+
with warnings.catch_warnings(): # ponytail: hush the scanned file's own warnings, not ours
|
|
37
|
+
warnings.simplefilter("ignore")
|
|
38
|
+
tree = ast.parse(src)
|
|
39
|
+
except SyntaxError:
|
|
40
|
+
return out # resilient: skip unparseable files
|
|
41
|
+
for n in ast.walk(tree):
|
|
42
|
+
if isinstance(n, ast.Import):
|
|
43
|
+
for a in n.names:
|
|
44
|
+
pid = kb.match_sdk(a.name)
|
|
45
|
+
if pid:
|
|
46
|
+
out.append(Detection(pid, "sdk_import", a.name, path, n.lineno, kb.default_jurisdiction(pid)))
|
|
47
|
+
elif isinstance(n, ast.ImportFrom):
|
|
48
|
+
pid = kb.match_sdk(n.module or "")
|
|
49
|
+
if pid:
|
|
50
|
+
out.append(Detection(pid, "sdk_import", n.module, path, n.lineno, kb.default_jurisdiction(pid)))
|
|
51
|
+
elif isinstance(n, ast.Constant) and isinstance(n.value, str):
|
|
52
|
+
m = kb.match_endpoint(n.value)
|
|
53
|
+
if m:
|
|
54
|
+
out.append(Detection(m[0], "endpoint_reference", m[1], path, n.lineno, m[2]))
|
|
55
|
+
return out
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _scan_text(path: str, src: str, kb) -> list[Detection]:
|
|
59
|
+
out: list[Detection] = []
|
|
60
|
+
for i, line in enumerate(src.splitlines(), 1):
|
|
61
|
+
m = kb.match_endpoint(line)
|
|
62
|
+
if m:
|
|
63
|
+
out.append(Detection(m[0], "endpoint_reference", m[1], path, i, m[2]))
|
|
64
|
+
return out
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _scan_js(path: str, src: str, kb) -> list[Detection]:
|
|
68
|
+
out: list[Detection] = []
|
|
69
|
+
for m in _JS_IMPORT.finditer(src):
|
|
70
|
+
pid = kb.match_npm(m.group(1))
|
|
71
|
+
if pid:
|
|
72
|
+
line = src.count("\n", 0, m.start()) + 1
|
|
73
|
+
out.append(Detection(pid, "sdk_import", m.group(1), path, line, kb.default_jurisdiction(pid)))
|
|
74
|
+
return out
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def scan(root, kb) -> list[Detection]:
|
|
78
|
+
root = Path(root)
|
|
79
|
+
paths = [root] if root.is_file() else [p for p in root.rglob("*") if p.is_file()]
|
|
80
|
+
seen, out = set(), []
|
|
81
|
+
for p in paths:
|
|
82
|
+
if any(part in IGNORE for part in p.parts):
|
|
83
|
+
continue
|
|
84
|
+
suffix = p.suffix
|
|
85
|
+
is_py = suffix == ".py"
|
|
86
|
+
is_js = suffix in JS_EXT
|
|
87
|
+
is_text = suffix in TEXT_EXT or p.name == ".env"
|
|
88
|
+
if not (is_py or is_js or is_text):
|
|
89
|
+
continue
|
|
90
|
+
try:
|
|
91
|
+
src = p.read_text("utf-8", errors="ignore")
|
|
92
|
+
except OSError:
|
|
93
|
+
continue
|
|
94
|
+
if is_py:
|
|
95
|
+
dets = _scan_py(str(p), src, kb)
|
|
96
|
+
elif is_js: # imports (new) + endpoint literals (existing text scan)
|
|
97
|
+
dets = _scan_js(str(p), src, kb) + _scan_text(str(p), src, kb)
|
|
98
|
+
else:
|
|
99
|
+
dets = _scan_text(str(p), src, kb)
|
|
100
|
+
for d in dets:
|
|
101
|
+
key = (d.provider_id, d.kind, d.evidence, d.file, d.line)
|
|
102
|
+
if key not in seen:
|
|
103
|
+
seen.add(key)
|
|
104
|
+
out.append(d)
|
|
105
|
+
return out
|
borderlint/kb.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Provider knowledge base: load and resolve a provider/endpoint to a jurisdiction."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from importlib.resources import files
|
|
8
|
+
|
|
9
|
+
# Region-coded endpoints (the host carries the region) → ccTLD jurisdiction.
|
|
10
|
+
_AWS_RE = re.compile(r"\b([a-z]{2}(?:-gov)?-[a-z]+-\d)\b")
|
|
11
|
+
_AWS_REGION = {
|
|
12
|
+
"ap-east-1": "hk", "ap-east-2": "hk", "cn-north-1": "cn", "cn-northwest-1": "cn",
|
|
13
|
+
"ap-southeast-1": "sg", "ap-southeast-2": "au", "ap-southeast-3": "id",
|
|
14
|
+
"ap-southeast-5": "my", "ap-southeast-7": "th", "ap-south-1": "in", "ap-south-2": "in",
|
|
15
|
+
"ap-northeast-1": "jp", "ap-northeast-2": "kr", "ap-northeast-3": "jp",
|
|
16
|
+
"eu-west-1": "ie", "eu-west-2": "gb", "eu-west-3": "fr", "eu-central-1": "de",
|
|
17
|
+
"eu-central-2": "ch", "eu-north-1": "se", "eu-south-1": "it", "eu-south-2": "es",
|
|
18
|
+
"me-south-1": "bh", "me-central-1": "ae", "af-south-1": "za", "il-central-1": "il",
|
|
19
|
+
}
|
|
20
|
+
_AZURE_RE = re.compile(
|
|
21
|
+
r"\b(eastus2?|westus[123]?|centralus|southcentralus|northcentralus|canadacentral|canadaeast|"
|
|
22
|
+
r"brazilsouth|northeurope|westeurope|uksouth|ukwest|francecentral|germanywestcentral|"
|
|
23
|
+
r"switzerlandnorth|swedencentral|norwayeast|polandcentral|italynorth|spaincentral|eastasia|"
|
|
24
|
+
r"southeastasia|japaneast|japanwest|koreacentral|australiaeast|australiasoutheast|centralindia|"
|
|
25
|
+
r"southindia|uaenorth|qatarcentral|southafricanorth|israelcentral|chinaeast2?|chinanorth[23]?)\b")
|
|
26
|
+
_AZURE_REGION = {
|
|
27
|
+
"eastus": "us", "eastus2": "us", "westus": "us", "westus2": "us", "westus3": "us",
|
|
28
|
+
"centralus": "us", "southcentralus": "us", "northcentralus": "us", "canadacentral": "ca",
|
|
29
|
+
"canadaeast": "ca", "brazilsouth": "br", "northeurope": "ie", "westeurope": "nl",
|
|
30
|
+
"uksouth": "gb", "ukwest": "gb", "francecentral": "fr", "germanywestcentral": "de",
|
|
31
|
+
"switzerlandnorth": "ch", "swedencentral": "se", "norwayeast": "no", "polandcentral": "pl",
|
|
32
|
+
"italynorth": "it", "spaincentral": "es", "eastasia": "hk", "southeastasia": "sg",
|
|
33
|
+
"japaneast": "jp", "japanwest": "jp", "koreacentral": "kr", "australiaeast": "au",
|
|
34
|
+
"australiasoutheast": "au", "centralindia": "in", "southindia": "in", "uaenorth": "ae",
|
|
35
|
+
"qatarcentral": "qa", "southafricanorth": "za", "israelcentral": "il",
|
|
36
|
+
"chinaeast": "cn", "chinaeast2": "cn", "chinanorth": "cn", "chinanorth2": "cn", "chinanorth3": "cn",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _region_jurisdiction(text: str, scheme: str):
|
|
41
|
+
if scheme == "aws":
|
|
42
|
+
m = _AWS_RE.search(text)
|
|
43
|
+
if not m:
|
|
44
|
+
return None
|
|
45
|
+
r = m.group(1)
|
|
46
|
+
return _AWS_REGION.get(r) or {"us": "us", "ca": "ca", "sa": "br", "cn": "cn"}.get(r.split("-")[0])
|
|
47
|
+
if scheme == "azure":
|
|
48
|
+
m = _AZURE_RE.search(text)
|
|
49
|
+
return _AZURE_REGION.get(m.group(1)) if m else None
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def load_kb(path: str | None = None) -> "KB":
|
|
54
|
+
if path:
|
|
55
|
+
with open(path, encoding="utf-8") as fh:
|
|
56
|
+
data = json.load(fh)
|
|
57
|
+
else:
|
|
58
|
+
data = json.loads(files("borderlint").joinpath("data/providers.json").read_text("utf-8"))
|
|
59
|
+
return KB(data.get("providers", []))
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class KB:
|
|
63
|
+
def __init__(self, providers: list[dict]):
|
|
64
|
+
self.by_id = {p["id"]: p for p in providers}
|
|
65
|
+
sdks, npm, eps = [], [], []
|
|
66
|
+
for p in providers:
|
|
67
|
+
for s in p.get("sdks", []):
|
|
68
|
+
sdks.append((s, p["id"]))
|
|
69
|
+
for n in p.get("npm", []):
|
|
70
|
+
npm.append((n, p["id"]))
|
|
71
|
+
ej = p.get("endpoint_jurisdictions", {})
|
|
72
|
+
for h in p.get("endpoints", []):
|
|
73
|
+
eps.append((h, p["id"], ej.get(h, p.get("jurisdiction", "unknown"))))
|
|
74
|
+
# Longest match first so specific SDKs/hosts win over shorter ones.
|
|
75
|
+
self._sdks = sorted(sdks, key=lambda x: -len(x[0]))
|
|
76
|
+
self._npm = sorted(npm, key=lambda x: -len(x[0]))
|
|
77
|
+
self._eps = sorted(eps, key=lambda x: -len(x[0]))
|
|
78
|
+
self.region_scheme = {p["id"]: p["region_scheme"] for p in providers if p.get("region_scheme")}
|
|
79
|
+
|
|
80
|
+
def name(self, pid: str) -> str:
|
|
81
|
+
return self.by_id.get(pid, {}).get("name", pid)
|
|
82
|
+
|
|
83
|
+
def default_jurisdiction(self, pid: str) -> str:
|
|
84
|
+
return self.by_id.get(pid, {}).get("jurisdiction", "unknown")
|
|
85
|
+
|
|
86
|
+
def match_sdk(self, module: str) -> str | None:
|
|
87
|
+
for s, pid in self._sdks:
|
|
88
|
+
if module == s or module.startswith(s + "."):
|
|
89
|
+
return pid
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
def match_npm(self, pkg: str) -> str | None:
|
|
93
|
+
for name, pid in self._npm:
|
|
94
|
+
if pkg == name or pkg.startswith(name + "/"):
|
|
95
|
+
return pid
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
def match_endpoint(self, text: str):
|
|
99
|
+
for h, pid, juris in self._eps:
|
|
100
|
+
if h in text:
|
|
101
|
+
scheme = self.region_scheme.get(pid)
|
|
102
|
+
if scheme:
|
|
103
|
+
juris = _region_jurisdiction(text, scheme) or juris
|
|
104
|
+
return pid, h, juris
|
|
105
|
+
return None
|
borderlint/policy.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Load a residency policy and evaluate detections (deny-by-default)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Finding:
|
|
11
|
+
detection: object
|
|
12
|
+
severity: str # "ok" | "warn" | "fail"
|
|
13
|
+
reasons: list = field(default_factory=list)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def load_policy(path: str) -> dict:
|
|
17
|
+
with open(path, encoding="utf-8") as fh:
|
|
18
|
+
data = json.load(fh)
|
|
19
|
+
# Shorthand: a bare {classification: [jurisdictions]} map is the classifications block.
|
|
20
|
+
if "classifications" not in data and data and all(isinstance(v, list) for v in data.values()):
|
|
21
|
+
data = {"classifications": data}
|
|
22
|
+
return data
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _allowed(allow: list[str]) -> set[str]:
|
|
26
|
+
s = set(allow)
|
|
27
|
+
if "GBA" in s: # GBA alias = hk + the nine Mainland GBA cities
|
|
28
|
+
s.update({"hk", "CN-GBA"})
|
|
29
|
+
return s
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def evaluate(detections, policy: dict, classification: str, kb=None) -> list[Finding]:
|
|
33
|
+
classes = policy.get("classifications", {})
|
|
34
|
+
if classification not in classes:
|
|
35
|
+
raise KeyError(f"classification '{classification}' not defined in policy")
|
|
36
|
+
allow = _allowed(classes[classification])
|
|
37
|
+
deny = set(policy.get("providers", {}).get("deny", []))
|
|
38
|
+
prov_allow = set(policy.get("providers", {}).get("allow", []))
|
|
39
|
+
on_unknown = policy.get("on_unknown", "warn")
|
|
40
|
+
fail_on = set(policy.get("fail_on", ["residency", "denied_provider"]))
|
|
41
|
+
|
|
42
|
+
findings = []
|
|
43
|
+
for d in detections:
|
|
44
|
+
reasons = []
|
|
45
|
+
if d.provider_id in deny or (prov_allow and d.provider_id not in prov_allow):
|
|
46
|
+
reasons.append("denied_provider")
|
|
47
|
+
if d.jurisdiction == "unknown":
|
|
48
|
+
reasons.append("unknown")
|
|
49
|
+
elif d.jurisdiction not in allow:
|
|
50
|
+
reasons.append("residency")
|
|
51
|
+
findings.append(Finding(d, _severity(reasons, fail_on, on_unknown), reasons))
|
|
52
|
+
return findings
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _severity(reasons: list[str], fail_on: set[str], on_unknown: str) -> str:
|
|
56
|
+
if not reasons:
|
|
57
|
+
return "ok"
|
|
58
|
+
fail = (("denied_provider" in reasons and "denied_provider" in fail_on)
|
|
59
|
+
or ("residency" in reasons and "residency" in fail_on)
|
|
60
|
+
or ("unknown" in reasons and on_unknown == "fail"))
|
|
61
|
+
return "fail" if fail else "warn"
|
borderlint/report.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Render findings as text, JSON, or a Mermaid flow map (grouped by jurisdiction)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
JURIS = {"us": "United States", "eu": "European Union", "cn": "Mainland China", "hk": "Hong Kong",
|
|
8
|
+
"sg": "Singapore", "gb": "United Kingdom", "mo": "Macao", "my": "Malaysia",
|
|
9
|
+
"CN-GBA": "Mainland GBA", "GBA": "Greater Bay Area", "unknown": "Unknown (region-dependent)"}
|
|
10
|
+
REASON = {"denied_provider": "provider denied by policy",
|
|
11
|
+
"residency": "jurisdiction outside the allow-list for this data class",
|
|
12
|
+
"unknown": "jurisdiction could not be determined"}
|
|
13
|
+
GBA_REF = ("GBA Standard Contract — https://www.digitalpolicy.gov.hk/en/our_work/"
|
|
14
|
+
"digital_infrastructure/mainland/gbacbdf/cross-boundary_data_flow/index.html")
|
|
15
|
+
_RANK = {"ok": 0, "warn": 1, "fail": 2}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def juris(j: str) -> str:
|
|
19
|
+
return JURIS.get(j, j)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _arrangements(findings, policy) -> list[str]:
|
|
23
|
+
regime = (policy or {}).get("home_regime")
|
|
24
|
+
flagged_china = any(f.severity != "ok" and f.detection.jurisdiction in ("cn", "CN-GBA") for f in findings)
|
|
25
|
+
if regime in ("pdpo", "pipl") and flagged_china:
|
|
26
|
+
return [f"Reference ({regime}): {GBA_REF}"]
|
|
27
|
+
return []
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def text(findings, kb, policy=None) -> str:
|
|
31
|
+
if not findings:
|
|
32
|
+
return "borderlint: no AI provider usage detected."
|
|
33
|
+
lines = ["borderlint — AI data-flow & residency report", "=" * 46]
|
|
34
|
+
by = {}
|
|
35
|
+
for f in findings:
|
|
36
|
+
by.setdefault(f.detection.provider_id, []).append(f)
|
|
37
|
+
for pid in sorted(by):
|
|
38
|
+
fs = by[pid]
|
|
39
|
+
worst = max((f.severity for f in fs), key=lambda s: _RANK[s])
|
|
40
|
+
mark = {"ok": " OK ", "warn": "WARN", "fail": "FAIL"}[worst]
|
|
41
|
+
js = ", ".join(juris(x) for x in sorted({f.detection.jurisdiction for f in fs}))
|
|
42
|
+
lines.append(f"[{mark}] {kb.name(pid)} -> {js}")
|
|
43
|
+
for f in fs:
|
|
44
|
+
d = f.detection
|
|
45
|
+
lines.append(f" {d.file}:{d.line} ({d.kind}: {d.evidence})")
|
|
46
|
+
for r in f.reasons:
|
|
47
|
+
lines.append(f" ! {REASON.get(r, r)}")
|
|
48
|
+
fails = sum(f.severity == "fail" for f in findings)
|
|
49
|
+
warns = sum(f.severity == "warn" for f in findings)
|
|
50
|
+
lines.append("")
|
|
51
|
+
lines += _arrangements(findings, policy)
|
|
52
|
+
lines.append(f"Summary: {fails} fail, {warns} warn, {len(findings) - fails - warns} ok")
|
|
53
|
+
return "\n".join(lines)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def as_json(findings, kb, policy=None) -> str:
|
|
57
|
+
return json.dumps({
|
|
58
|
+
"findings": [{"provider": f.detection.provider_id, "name": kb.name(f.detection.provider_id),
|
|
59
|
+
"jurisdiction": f.detection.jurisdiction, "severity": f.severity, "reasons": f.reasons,
|
|
60
|
+
"kind": f.detection.kind, "evidence": f.detection.evidence,
|
|
61
|
+
"file": f.detection.file, "line": f.detection.line} for f in findings],
|
|
62
|
+
"references": _arrangements(findings, policy),
|
|
63
|
+
}, indent=2)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def mermaid(findings, kb, policy=None) -> str:
|
|
67
|
+
by_j = {}
|
|
68
|
+
for f in findings:
|
|
69
|
+
by_j.setdefault(f.detection.jurisdiction, set()).add(f.detection.provider_id)
|
|
70
|
+
lines = ["flowchart LR", " app([Your application])"]
|
|
71
|
+
for j, pids in by_j.items():
|
|
72
|
+
jid = "j_" + j.replace("-", "_")
|
|
73
|
+
lines.append(f" subgraph {jid}[{juris(j)}]")
|
|
74
|
+
for pid in sorted(pids):
|
|
75
|
+
lines.append(f" {pid}[{kb.name(pid)}]")
|
|
76
|
+
lines.append(" end")
|
|
77
|
+
for pid in sorted(pids):
|
|
78
|
+
lines.append(f" app --> {pid}")
|
|
79
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: borderlint
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Map and govern where your AI data and traffic flow — east-west / APAC lens.
|
|
5
|
+
Author: Iolaire McKinnon
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: ai,compliance,data-residency,governance,llm,sovereignty
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# borderlint
|
|
15
|
+
|
|
16
|
+
**Map and govern where your AI data and traffic flow — east-west / APAC lens.**
|
|
17
|
+
|
|
18
|
+
A static, in-CI check for **HK / GBA entities**: does your AI data stay within the jurisdictions
|
|
19
|
+
your PDPO / PIPL policy allows? borderlint statically scans your repo (**Python and
|
|
20
|
+
TypeScript/JavaScript**) for AI provider usage, resolves each flow to a jurisdiction (ccTLD codes
|
|
21
|
+
plus the `CN-GBA` / `GBA` tokens), and fails the build on any flow outside the allow-list for the
|
|
22
|
+
data class you declare. Western and Chinese providers are treated evenly. **Zero runtime dependencies.**
|
|
23
|
+
|
|
24
|
+
## Use
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
python -m borderlint scan ./service --policy residency.json --classification customer-pii
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
- No `--policy` → **inventory mode** (lists flows + jurisdictions, exits 0).
|
|
31
|
+
- `--format json|mermaid` for machine output or a flow map.
|
|
32
|
+
- Exit code is non-zero on a violation, so it gates CI.
|
|
33
|
+
|
|
34
|
+
## Policy (the eval-set)
|
|
35
|
+
|
|
36
|
+
`residency.json` maps each data class to the jurisdictions you accept:
|
|
37
|
+
|
|
38
|
+
```json
|
|
39
|
+
{
|
|
40
|
+
"home_regime": "pdpo",
|
|
41
|
+
"classifications": {
|
|
42
|
+
"customer-pii": ["hk", "CN-GBA", "sg"],
|
|
43
|
+
"employee-pii": ["hk", "CN-GBA"],
|
|
44
|
+
"non-pii": ["hk", "CN-GBA", "cn", "mo", "sg", "us", "gb"]
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**Deny-by-default**: a flow to any code not on the list for the declared class fails — so `sg` is
|
|
50
|
+
allowed but `my` is not, matching a PDPO agreed-locations EULA. `GBA` is shorthand for `hk` +
|
|
51
|
+
`CN-GBA`. Cross-border arrangements (e.g. the GBA Standard Contract) are surfaced as reference
|
|
52
|
+
links, never adjudicated.
|
|
53
|
+
|
|
54
|
+
## Capabilities
|
|
55
|
+
|
|
56
|
+
- **Languages:** Python (AST) and TypeScript/JavaScript (`import` / `require` / dynamic `import()`),
|
|
57
|
+
plus endpoint references in config/text files.
|
|
58
|
+
- **Providers:** 13+ across the east-west boundary (OpenAI, Anthropic, Google, Azure, Bedrock,
|
|
59
|
+
Mistral, Cohere + Tencent, Alibaba, DeepSeek, Moonshot, Zhipu, Baidu), with Python and JS/TS
|
|
60
|
+
package names and the **Vercel AI SDK** (`@ai-sdk/*`).
|
|
61
|
+
- **Aggregators:** litellm, langchain, LlamaIndex, aisuite, Vercel AI core (`ai`) → `unknown`
|
|
62
|
+
(runtime-routed), so `on_unknown: fail` blocks them for sensitive classes.
|
|
63
|
+
- **Jurisdictions:** ccTLD/ISO codes + `CN-GBA` / `GBA`; **AWS/Azure region resolved from the
|
|
64
|
+
endpoint host** where present (e.g. `bedrock-runtime.ap-east-1…` → `hk`).
|
|
65
|
+
- **Policy:** classification-keyed JSON eval-set, deny-by-default, provider allow/deny, configurable
|
|
66
|
+
failure set, declared home regime.
|
|
67
|
+
- **Output & CI:** text / JSON / Mermaid, exit codes, GitHub Action + Jenkins.
|
|
68
|
+
|
|
69
|
+
## Scope
|
|
70
|
+
|
|
71
|
+
For HK / GBA home bases under PDPO / PIPL / GBA. Not yet: SARIF output, container/SCA mode, LLM
|
|
72
|
+
enrichment, and dynamic / `base_url` endpoint resolution. Full roadmap in `CAPABILITIES.md`.
|
|
73
|
+
|
|
74
|
+
## CI
|
|
75
|
+
|
|
76
|
+
Same command in any pipeline. GitHub Actions (composite action):
|
|
77
|
+
|
|
78
|
+
```yaml
|
|
79
|
+
- uses: iolairus/borderlint@v0.2.0
|
|
80
|
+
with: { path: ., policy: residency.json, classification: customer-pii }
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Jenkins / anything else: `pip install borderlint && borderlint scan . --policy residency.json --classification customer-pii` — a non-zero exit fails the stage. Full examples in `examples/ci/`.
|
|
84
|
+
|
|
85
|
+
## License
|
|
86
|
+
|
|
87
|
+
MIT © 2026 Iolaire McKinnon. Vendor-neutral by design.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
borderlint/__init__.py,sha256=ZYDwkT4abi5MhAa6LCVYCF2BQ4x56ye41e92S2EHluA,96
|
|
2
|
+
borderlint/__main__.py,sha256=k1ocEWawweo1qCJWNFAAvyxz3tcY13dzvCenHszij30,48
|
|
3
|
+
borderlint/cli.py,sha256=ngdFutjBo_g5tYp1t9veSLOBGegzcSUmv89mx4FWmGw,1856
|
|
4
|
+
borderlint/detect.py,sha256=_YDxFeTwk7aci80HMObUKROnkpKxGhEzxJZq6uJiG7g,3943
|
|
5
|
+
borderlint/kb.py,sha256=rMy2rL-5w0dbGZTu4qhrT6fh7evtacIvYxJ4czA_1CY,4865
|
|
6
|
+
borderlint/policy.py,sha256=KFiXxmhe2gD0wx6P_lbNgIcVFnEeNQKM5-t-QtrPXjQ,2230
|
|
7
|
+
borderlint/report.py,sha256=gwLZRMQBbqos2PfBvd-MURtYAtDfsbvTHOhal5C61eA,3491
|
|
8
|
+
borderlint/data/providers.json,sha256=A1JL9nvP5jux2UPvmwltevU2--13wVroCFLmexvCi28,4050
|
|
9
|
+
borderlint-0.2.0.dist-info/METADATA,sha256=6jV0tJrnI1Fj0xhXYgGimdF_uXsZeP0myBZOlM-xBA8,3561
|
|
10
|
+
borderlint-0.2.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
11
|
+
borderlint-0.2.0.dist-info/entry_points.txt,sha256=MPBR-FwC2fgMOXIcPZZ8dkYu30p7jRzzJDdwnxskRqE,51
|
|
12
|
+
borderlint-0.2.0.dist-info/licenses/LICENSE,sha256=feLNgoCutHpNXMNV5ZNI3KymqMNJ5XC3DJB_YOry6Dw,1073
|
|
13
|
+
borderlint-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Iolaire McKinnon
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|